Skip to content

Commit

Permalink
working version
Browse files Browse the repository at this point in the history
  • Loading branch information
Dmitry-Antipov committed Oct 23, 2023
1 parent 6c1031b commit 50e523e
Showing 1 changed file with 43 additions and 14 deletions.
57 changes: 43 additions & 14 deletions src/scripts/get_layout_from_mbg.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,11 +383,15 @@ def get_exact_match_length(clusters):
total_notbanned = 0
for contig in contig_contains_reads:
print (f"debugggging {contig}")
print (contig_nodeseqs[contig][0])
print (contig_nodeseqs[contig][0][0])
cont_seq = ""
for i in range(0, len(contig_nodeseqs[contig])):
cont_seq += (contig_nodeseqs[contig][i][0])
print (cont_seq)

sys.stdout.flush()
#lets try to ban the reads that have a suffix or prefix that contradict to the contig
contig_ends = {contig_nodeseqs[contig][0][0][1:], contig_nodeseqs[contig][-1][0][1:]}
contig_ends = [contig_nodeseqs[contig][0][0], contig_nodeseqs[contig][-1][0]]

contig_nodes_no_dir = set()
for i in range(0, len(contig_nodeseqs[contig])):
contig_nodes_no_dir.add(contig_nodeseqs[contig][i][0][1:])
Expand All @@ -406,16 +410,37 @@ def get_exact_match_length(clusters):
near_contig_end = False
#if read_end not in contig, then something likely went wrong..
#can be replaced by more accurate check to exclude case where read end is somewhere else, but it should not make big sense.
for read_end in {readname_to_paths[readname][0][0][1:], readname_to_paths[readname][0][-1][1:]}:
if not (read_end in contig_nodes_no_dir):
to_ban = True


to_ban_left = False
to_ban_right = False
if not (readname_to_paths[readname][0][0][1:] in contig_nodes_no_dir):
to_ban_left = True
if not (readname_to_paths[readname][0][-1][1:] in contig_nodes_no_dir):
to_ban_right = True
#lets always allow read to extend contig
contains_contig_ends = [False, False]
for node in readname_to_paths[readname][0]:
if node[1:] in contig_ends:
to_ban = False
near_contig_end = True
for i in range(0,2):
#nodes same without respect to the orientation
if node[1:] == contig_ends[i][1:]:
contains_contig_ends[i] = True
#read going different direction with contig, so should switch ban logic
#rc loop may confuse this logic, but it would be weird anyway.
'''if node[0] != contig_ends[i][0]:
tmp = to_ban_left
to_ban_left = to_ban_right
to_ban_right = tmp
print ("switching")
print (readname_to_paths[readname][0]) '''


if to_ban_left and not(contains_contig_ends[0]):
to_ban = True
if to_ban_right and not(contains_contig_ends[1]):
to_ban = True
if to_ban:
print(f" {to_ban_left} {to_ban_right} banning")
print (readname_to_paths[readname][0])
total_banned +=1
continue
else:
Expand All @@ -440,20 +465,24 @@ def get_exact_match_length(clusters):
elif contigstart < fwcluster[0] + 50 and contigend < fwcluster[1] + 50:
fwcluster = (contigstart, contigend, min(fwcluster[2], readstart), max(fwcluster[3], readend), fwcluster[4] + [(real_readstart, real_readend)])
else:
if fwcluster[3] - fwcluster[2] >= readlen * min_read_len_fraction: read_clusters[readname].append((contig, fwcluster[0], fwcluster[1], get_exact_match_length(fwcluster[4])))
if fwcluster[3] - fwcluster[2] >= readlen * min_read_len_fraction:
read_clusters[readname].append((contig, fwcluster[0], fwcluster[1], get_exact_match_length(fwcluster[4])))
fwcluster = (contigstart, contigend, readstart, readend, [(real_readstart, real_readend)])
else:
if bwcluster is None:
bwcluster = (contigstart, contigend, readstart, readend, [(real_readstart, real_readend)])
elif contigstart < bwcluster[0] + 50 and contigend < bwcluster[1] + 50:
bwcluster = (contigstart, contigend, min(bwcluster[2], readstart), max(bwcluster[3], readend), bwcluster[4] + [(real_readstart, real_readend)])
else:
if bwcluster[3] - bwcluster[2] >= readlen * min_read_len_fraction: read_clusters[readname].append((contig, bwcluster[1], bwcluster[0], get_exact_match_length(bwcluster[4])))
if bwcluster[3] - bwcluster[2] >= readlen * min_read_len_fraction:
read_clusters[readname].append((contig, bwcluster[1], bwcluster[0], get_exact_match_length(bwcluster[4])))
bwcluster = (contigstart, contigend, readstart, readend, [(real_readstart, real_readend)])
if fwcluster is not None:
if fwcluster[3] - fwcluster[2] >= readlen * min_read_len_fraction: read_clusters[readname].append((contig, fwcluster[0], fwcluster[1], get_exact_match_length(fwcluster[4])))
if fwcluster[3] - fwcluster[2] >= readlen * min_read_len_fraction:
read_clusters[readname].append((contig, fwcluster[0], fwcluster[1], get_exact_match_length(fwcluster[4])))
if bwcluster is not None:
if bwcluster[3] - bwcluster[2] >= readlen * min_read_len_fraction: read_clusters[readname].append((contig, bwcluster[1], bwcluster[0], get_exact_match_length(bwcluster[4])))
if bwcluster[3] - bwcluster[2] >= readlen * min_read_len_fraction:
read_clusters[readname].append((contig, bwcluster[1], bwcluster[0], get_exact_match_length(bwcluster[4])))
print (f"Banned {total_banned} allowed {total_notbanned}")
contig_actual_lines = {}
#selecting best alignment to the contig, if multiple best use all of them.
Expand Down

0 comments on commit 50e523e

Please sign in to comment.