Skip to content

Commit

Permalink
added a last couple of notes for my future reference #2 #20 #19
Browse files Browse the repository at this point in the history
  • Loading branch information
dkoslicki committed Mar 18, 2020
1 parent a49980f commit f7bee92
Show file tree
Hide file tree
Showing 5 changed files with 4 additions and 4 deletions.
Binary file modified dataForShaopeng/test_issue/TrainingDatabase_k_61.h5
Binary file not shown.
2 changes: 1 addition & 1 deletion dataForShaopeng/test_issue/out_40_61_1.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
,k=40,k=41,k=42,k=43,k=44,k=45,k=46,k=47,k=48,k=49,k=50,k=51,k=52,k=53,k=54,k=55,k=56,k=57,k=58,k=59,k=60,k=61
taxid_1909294_104_genomic.fna.gz,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
taxid_1909294_109_genomic.fna.gz,0.677,0.585,0.576,0.576,0.566,0.56,0.556,0.555,0.551,0.552,0.543,0.537,0.533,0.528,0.523,0.519,0.518,0.515,0.504,0.501,0.497,0.493
taxid_1909294_109_genomic.fna.gz,0.677,0.582,0.575,0.57,0.564,0.559,0.555,0.555,0.551,0.547,0.542,0.535,0.532,0.527,0.523,0.52,0.518,0.513,0.504,0.5,0.497,0.493
2 changes: 1 addition & 1 deletion dataForShaopeng/test_issue/out_50_61_1.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
,k=50,k=51,k=52,k=53,k=54,k=55,k=56,k=57,k=58,k=59,k=60,k=61
taxid_1909294_104_genomic.fna.gz,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
taxid_1909294_109_genomic.fna.gz,0.589,0.536,0.532,0.527,0.525,0.522,0.518,0.515,0.504,0.501,0.497,0.493
taxid_1909294_109_genomic.fna.gz,0.589,0.535,0.532,0.527,0.523,0.519,0.518,0.513,0.504,0.5,0.497,0.493
2 changes: 1 addition & 1 deletion scripts/MakeStreamingDNADatabase.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def main():
print("Creating ternary search tree")
to_insert = set()
for i in range(len(genome_sketches)):
for kmer_index in range(len(genome_sketches[i]._kmers)):
for kmer_index in range(len(genome_sketches[i]._kmers)): # FIXME: think about which suffix/prefixes to add here
kmer = genome_sketches[i]._kmers[kmer_index]
to_insert.add(kmer + 'x' + str(i) + 'x' + str(kmer_index)) # format here is kmer+x+hash_index+kmer_index
kmer = khmer.reverse_complement(genome_sketches[i]._kmers[kmer_index])
Expand Down
2 changes: 1 addition & 1 deletion scripts/StreamingQueryDNADatabase.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ def keyfunction(item):
try:
all_kmers_bf = WritingBloomFilter(len(sketches)*len(k_range)*num_hashes*10, 0.01)
for sketch in sketches:
for kmer in sketch._kmers:
for kmer in sketch._kmers: # FIXME: think about what all actually needs to be added
for ksize in k_range:
all_kmers_bf.add(kmer[0:ksize]) # put all the k-mers and the appropriate suffixes in
all_kmers_bf.add(kmer[0:ksize][::-1])
Expand Down

0 comments on commit f7bee92

Please sign in to comment.