From 618854d75bcec46a50283dd51beb3241b8d530e2 Mon Sep 17 00:00:00 2001 From: Kevin Wu Date: Sat, 8 Feb 2025 01:50:43 +0000 Subject: [PATCH] Refactor --- chai_lab/data/parsing/templates/m8.py | 31 ++++++++++++++++----------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/chai_lab/data/parsing/templates/m8.py b/chai_lab/data/parsing/templates/m8.py index 61a1032..df2925e 100644 --- a/chai_lab/data/parsing/templates/m8.py +++ b/chai_lab/data/parsing/templates/m8.py @@ -19,19 +19,10 @@ logger = logging.getLogger(name=__name__) -def parse_m8_to_template_hits( - query_pdb_id: str, - query_sequence: str, - m8_path: Path, - template_cif_folder: Path | None = None, -) -> Iterator[TemplateHit]: - assert m8_path.is_file() and m8_path.stat().st_size > 0 - - if template_cif_folder is not None: - template_cif_folder.mkdir(parents=True, exist_ok=True) - - table = pd.read_csv( - m8_path, +def parse_m8_file(fname: Path) -> pd.DataFrame: + """Parse the m8 alignment format describing template information.""" + return pd.read_csv( + fname, delimiter="\t", header=None, names=[ @@ -51,6 +42,20 @@ def parse_m8_to_template_hits( ], ).sort_values(by=["query_id", "evalue"]) + +def parse_m8_to_template_hits( + query_pdb_id: str, + query_sequence: str, + m8_path: Path, + template_cif_folder: Path | None = None, +) -> Iterator[TemplateHit]: + assert m8_path.is_file() and m8_path.stat().st_size > 0 + + if template_cif_folder is not None: + template_cif_folder.mkdir(parents=True, exist_ok=True) + + table = parse_m8_file(m8_path) + # Subset to those matching the query pdb id table = table.loc[table.query_id.astype(str) == query_pdb_id] if len(table) == 0: