Skip to content

Commit

Permalink
fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
Dmitry-Antipov committed Jun 5, 2024
1 parent 59594ec commit 691f381
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 16 deletions.
3 changes: 2 additions & 1 deletion src/scripts/rdna_scaff.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,10 @@
G = nx.DiGraph()
gf.load_direct_graph(gfa_file, G)
logger = logger_wrap.initLogger(os.path.join(hicrun_dir, "scaffolding.log"))
path_mashmap = os.path.join(hicrun_dir, "paths2ref.mashmap")
rukki_path_storage = path_storage.PathStorage(G)
rukki_path_storage.readFromFile(old_rukki_tsv_file)
#sf.try_to_scaff(rukki_paths, telomere_locations_file, os.path.join(hicrun_dir, "hic_mapping.byread.output"), os.path.join(hicrun_dir, "unitigs.matches"), G, indirectG, uncompressed_nodes)
sg = scaffold_graph.ScaffoldGraph(rukki_path_storage, telomere_locations_file, os.path.join(hicrun_dir, "hic_mapping.byread.output"), os.path.join(hicrun_dir, "unitigs_nonhpc50.mashmap"), G, uncompressed_nodes, logger)
sg = scaffold_graph.ScaffoldGraph(rukki_path_storage, telomere_locations_file, os.path.join(hicrun_dir, "hic_mapping.byread.output"), os.path.join(hicrun_dir, "unitigs_nonhpc50.mashmap"), G, uncompressed_nodes, path_mashmap, logger)
res = sg.generateScaffolds()

5 changes: 3 additions & 2 deletions src/scripts/scaffolding/match_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ class HomologyStorage:
#{node1: {node2: HomologyInfo(node_1, node_2)}}
def __init__(self, logger, mashmap_file, min_alignment):
self.homologies = {}
self.lens = {}

self.logger = logger_wrap.UpdatedAdapter(logger, self.__class__.__name__)

total_lines = 0
Expand All @@ -87,7 +89,6 @@ def __init__(self, logger, mashmap_file, min_alignment):
self.addHomology(arr[0], arr[5], int(arr[1]), int(arr[6]), [[int(arr[2]), int(arr[3])], [int(arr[7]), int(arr[8])]], arr[4], arr[12])
self.logger.info(f"Loaded {used_lines} out of {total_lines} mashmap lines")
self.logger.info(f"{len(self.homologies)} nodes have at least one used homology")
self.lens = {}
self.fillCoverage()

#do we want to add other direction of pair?
Expand All @@ -112,7 +113,7 @@ def isValid(self, node1, node2):
return False
#extracting length, sometimes we do not haev this info in other places
def getLength(self, node):
return self.lens(node)
return self.lens[node]

# homology_weight - large negative something, min_big_homology - minimal length of homology to be taken in account.
# Some shorter ones can still sometimes be used if they are in regular bulge_like structure
Expand Down
27 changes: 14 additions & 13 deletions src/scripts/scaffolding/scaffold_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ class ScaffoldGraph:



def __init__(self, rukki_paths, telomere_locations_file, hic_alignment_file, matches_file, G, uncompressed_fasta, logger):
def __init__(self, rukki_paths, telomere_locations_file, hic_alignment_file, matches_file, G, uncompressed_fasta, path_mashmap, logger):
self.logger = logger_wrap.UpdatedAdapter(logger, self.__class__.__name__)
self.rukki_paths = rukki_paths

Expand Down Expand Up @@ -97,6 +97,7 @@ def __init__(self, rukki_paths, telomere_locations_file, hic_alignment_file, mat
#positions on refence or other haplotype
self.reference_positions = {}
self.assigned_reference = {}
self.getPathPositions(path_mashmap)
self.G = G
self.dists = dict(nx.all_pairs_dijkstra_path_length(self.upd_G, weight=lambda u, v, d: self.upd_G.edges[u, v]['mid_length']))
self.logger.info("Pairwise distances in assembly graph calculated")
Expand Down Expand Up @@ -253,29 +254,30 @@ def pathDist(self, path_from, path_to, check_homologous):
return closest

def getPathPositions(self, path_mashmap):
hom_storage = match_graph.HomologyStorage(path_mashmap, ScaffoldGraph.MIN_HOMOLOGY_REF)
hom_storage = match_graph.HomologyStorage(self.logger, path_mashmap, ScaffoldGraph.MIN_HOMOLOGY_REF)
for path_id in hom_storage.homologies:
all_refs = []
for ref_id in hom_storage.homologies[path_id]:
all_refs.append([hom_storage[path_id][ref_id].getCoveredLen(path_id, ref_id), ref_id])
all_refs.append([hom_storage.homologies[path_id][ref_id].getCoveredLen(), ref_id])
all_refs.sort(reverse=True)
if len(all_refs) == 1 or all_refs[0][0] > ScaffoldGraph.RATIO_HOMOLOGY_REF * all_refs[1][0]:
best_ref = all_refs[0][1]
self.logger.debug(f"Clearly best homology for {path_id} is {best_ref} with size{all_refs[0][0]}")
if all_refs[0][0] > ScaffoldGraph.LEN_FRAC_REF * self.hom_storage.getLength(path_id):
if all_refs[0][0] > ScaffoldGraph.LEN_FRAC_REF * hom_storage.getLength(path_id):
self.logger.debug(f"Best homology for {path_id} is {best_ref} with size{all_refs[0][0]}")
if not best_ref in self.reference_positions:
self.reference_positions[best_ref] = []
hom_info = hom_storage[path_id][best_ref]
self.assigned_reference[path_id + '+'] = best_ref + hom_info.orientaion
self.assigned_reference[path_id + '-'] = best_ref + self.rc_orientation(hom_info.orientaion)
self.reference_positions[best_ref+ "+"] = []
self.reference_positions[best_ref+ "-"] = []
hom_info = hom_storage.homologies[path_id][best_ref]
self.assigned_reference[path_id + '+'] = best_ref + hom_info.orientation
self.assigned_reference[path_id + '-'] = best_ref + self.rc_orientation(hom_info.orientation)
# self.assigned_reference[path_id] = best_ref
# self.reference_positions[best_ref].append(ReferencePosition(path_id, best_ref, hom_info.largest_interval_center[1], self.hom_storage.getLength(path_id), hom_info.orientaion))
self.reference_positions[best_ref + "+"].append(ReferencePosition(path_id + hom_info.orientaion, best_ref + "+", hom_info.largest_interval_center[1], self.hom_storage.getLength(path_id), hom_info.orientaion))
self.reference_positions[best_ref + "-"].append(ReferencePosition(path_id + self.rc_orientation(hom_info.orientation), best_ref + '-', self.hom_storage.getLength(best_ref) - hom_info.largest_interval_center[1], self.hom_storage.getLength(path_id), self.rc_orientation(hom_info.orientaion)))
# self.reference_positions[best_ref].append(ReferencePosition(path_id, best_ref, hom_info.largest_interval_center[1], hom_storage.getLength(path_id), hom_info.orientation))
self.reference_positions[best_ref + "+"].append(ReferencePosition(path_id + hom_info.orientation, best_ref + "+", hom_info.largest_interval_center[1], hom_storage.getLength(path_id), hom_info.orientation))
self.reference_positions[best_ref + "-"].append(ReferencePosition(path_id + self.rc_orientation(hom_info.orientation), best_ref + '-', hom_storage.getLength(best_ref) - hom_info.largest_interval_center[1], hom_storage.getLength(path_id), self.rc_orientation(hom_info.orientation)))

else:
self.logger.debug(f"Best homology for {path_id} is {best_ref} not covered enough frac {all_refs[0][0] / self.hom_storage.getLength(path_id)}")
self.logger.debug(f"Best homology for {path_id} is {best_ref} not covered enough frac {all_refs[0][0] / hom_storage.getLength(path_id)}")
for ref in self.reference_positions:
self.logger.debug(f"Reference positions for {ref}")
self.reference_positions[ref] = sorted(self.reference_positions[ref], key=lambda x: x.average_pos)
Expand Down Expand Up @@ -692,7 +694,6 @@ def getPathPairConnections(self, path_ids, connections, lens):
if self.isNextByRef(path_ids[0] + orientation[0], path_ids[1] + orientation[1]):
self.logger.debug (f"Reference connection found! {path_ids} {orientation}")
scores[orientation] *= self.REFERENCE_MULTIPLICATIVE_BONUS
scores[orientation] = scores[orientation] / (len(connections[(path_ids[0], path_ids[1])]) + 1)
return scores

#returns dict, {id:[present_start_relo, present_end_telo]}
Expand Down

0 comments on commit 691f381

Please sign in to comment.