Skip to content

Commit

Permalink
working version
Browse files Browse the repository at this point in the history
  • Loading branch information
Dmitry-Antipov authored and skoren committed Nov 20, 2023
1 parent c28584e commit 8d984cf
Showing 1 changed file with 67 additions and 5 deletions.
72 changes: 67 additions & 5 deletions src/scripts/get_layout_from_mbg.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,11 +362,69 @@ def get_exact_match_length(clusters):

read_clusters = {}
for contig in contig_contains_reads:
print (f"debugggging {contig}")
cont_seq = ""
for i in range(0, len(contig_nodeseqs[contig])):
cont_seq += (contig_nodeseqs[contig][i][0])
print (cont_seq)

sys.stdout.flush()
#lets try to ban the reads that have a suffix or prefix that contradict to the contig
contig_ends = [contig_nodeseqs[contig][0][0], contig_nodeseqs[contig][-1][0]]

contig_nodes_no_dir = set()
for i in range(0, len(contig_nodeseqs[contig])):
contig_nodes_no_dir.add(contig_nodeseqs[contig][i][0][1:])

for readname in contig_contains_reads[contig]:
if readname not in read_clusters: read_clusters[readname] = []
lines = contig_contains_reads[contig][readname]
assert len(lines) > 0
readlen = lines[0][4]

#lets not ban gap containing reads
if not readname_to_paths[readname][1]:
to_ban = False
near_contig_end = False
#if read_end not in contig, then something likely went wrong..
#can be replaced by more accurate check to exclude case where read end is somewhere else, but it should not make big sense.

to_ban_left = False
to_ban_right = False
if not (readname_to_paths[readname][0][0][1:] in contig_nodes_no_dir):
to_ban_left = True
if not (readname_to_paths[readname][0][-1][1:] in contig_nodes_no_dir):
to_ban_right = True
#lets always allow read to extend contig
contains_contig_ends = [False, False]
for node in readname_to_paths[readname][0]:
for i in range(0,2):
#nodes same without respect to the orientation
if node[1:] == contig_ends[i][1:]:
contains_contig_ends[i] = True
#read going different direction with contig, so should switch ban logic
#rc loop may confuse this logic, but it would be weird anyway.
'''if node[0] != contig_ends[i][0]:
tmp = to_ban_left
to_ban_left = to_ban_right
to_ban_right = tmp
print ("switching")
print (readname_to_paths[readname][0]) '''


if to_ban_left and not(contains_contig_ends[0]):
to_ban = True
if to_ban_right and not(contains_contig_ends[1]):
to_ban = True
if to_ban:
print(f" {to_ban_left} {to_ban_right} banning")
print (readname_to_paths[readname][0])
total_banned +=1
continue
else:
total_notbanned +=1

>>>>>>> 50e523e (working version)
lines.sort(key=lambda x: min(x[0], x[1]))
fwcluster = None
bwcluster = None
Expand All @@ -385,21 +443,25 @@ def get_exact_match_length(clusters):
elif contigstart < fwcluster[0] + 50 and contigend < fwcluster[1] + 50:
fwcluster = (contigstart, contigend, min(fwcluster[2], readstart), max(fwcluster[3], readend), fwcluster[4] + [(real_readstart, real_readend)])
else:
if fwcluster[3] - fwcluster[2] >= readlen * min_read_len_fraction: read_clusters[readname].append((contig, fwcluster[0], fwcluster[1], get_exact_match_length(fwcluster[4])))
if fwcluster[3] - fwcluster[2] >= readlen * min_read_len_fraction:
read_clusters[readname].append((contig, fwcluster[0], fwcluster[1], get_exact_match_length(fwcluster[4])))
fwcluster = (contigstart, contigend, readstart, readend, [(real_readstart, real_readend)])
else:
if bwcluster is None:
bwcluster = (contigstart, contigend, readstart, readend, [(real_readstart, real_readend)])
elif contigstart < bwcluster[0] + 50 and contigend < bwcluster[1] + 50:
bwcluster = (contigstart, contigend, min(bwcluster[2], readstart), max(bwcluster[3], readend), bwcluster[4] + [(real_readstart, real_readend)])
else:
if bwcluster[3] - bwcluster[2] >= readlen * min_read_len_fraction: read_clusters[readname].append((contig, bwcluster[1], bwcluster[0], get_exact_match_length(bwcluster[4])))
if bwcluster[3] - bwcluster[2] >= readlen * min_read_len_fraction:
read_clusters[readname].append((contig, bwcluster[1], bwcluster[0], get_exact_match_length(bwcluster[4])))
bwcluster = (contigstart, contigend, readstart, readend, [(real_readstart, real_readend)])
if fwcluster is not None:
if fwcluster[3] - fwcluster[2] >= readlen * min_read_len_fraction: read_clusters[readname].append((contig, fwcluster[0], fwcluster[1], get_exact_match_length(fwcluster[4])))
if fwcluster[3] - fwcluster[2] >= readlen * min_read_len_fraction:
read_clusters[readname].append((contig, fwcluster[0], fwcluster[1], get_exact_match_length(fwcluster[4])))
if bwcluster is not None:
if bwcluster[3] - bwcluster[2] >= readlen * min_read_len_fraction: read_clusters[readname].append((contig, bwcluster[1], bwcluster[0], get_exact_match_length(bwcluster[4])))

if bwcluster[3] - bwcluster[2] >= readlen * min_read_len_fraction:
read_clusters[readname].append((contig, bwcluster[1], bwcluster[0], get_exact_match_length(bwcluster[4])))
print (f"Banned {total_banned} allowed {total_notbanned}")
contig_actual_lines = {}
for readname in read_clusters:
longest = []
Expand Down

0 comments on commit 8d984cf

Please sign in to comment.