Skip to content

Commit

Permalink
Cleanup (#311)
Browse files Browse the repository at this point in the history
  • Loading branch information
wukevin authored Feb 18, 2025
1 parent d591369 commit 91d01cd
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 9 deletions.
13 changes: 8 additions & 5 deletions chai_lab/data/parsing/fasta.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import logging
from io import StringIO
from pathlib import Path
from typing import NamedTuple, Sequence
from typing import NamedTuple, Sequence, TextIO

from chai_lab.data.parsing.structure.entity_type import EntityType
from chai_lab.data.residue_constants import restype_1to3_with_x
Expand All @@ -31,12 +31,15 @@ def fastas_to_str(fastas: Sequence[Fasta]) -> str:
return "".join(f">{fasta.header}\n{fasta.sequence}\n" for fasta in fastas)


def read_fasta(file_path: str | Path | StringIO) -> list[Fasta]:
def read_fasta(file_path: str | Path) -> list[Fasta]:
with open(file_path) as source:
return read_fasta_content(source)


def read_fasta_content(content: StringIO | TextIO) -> list[Fasta]:
from Bio import SeqIO

fasta_sequences = SeqIO.parse(
open(file_path) if isinstance(file_path, (str, Path)) else file_path, "fasta"
)
fasta_sequences = SeqIO.parse(content, "fasta")
return [Fasta(fasta.description, str(fasta.seq)) for fasta in fasta_sequences]


Expand Down
8 changes: 5 additions & 3 deletions chai_lab/data/parsing/msas/a3m.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import numba
import numpy as np

from chai_lab.data.parsing.fasta import Fasta, read_fasta
from chai_lab.data.parsing.fasta import Fasta, read_fasta_content
from chai_lab.data.residue_constants import residue_types_with_nucleotides_order

MAPPED_TOKEN_SKIP: Final[int] = -1
Expand Down Expand Up @@ -124,8 +124,10 @@ def read_colabfold_a3m(fname: Path) -> dict[str, list[Fasta]]:
if not block:
continue
strio = StringIO(block)
hits = read_fasta(strio)
hits = read_fasta_content(strio)
assert len(hits) > 0
assert re.match(r"^[0-9]{3}$", (query := hits[0].header))

query = hits[0].header
assert re.match(r"^[0-9]{3}$", query)
retval[query] = hits
return retval
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
# See the LICENSE file for details.
"""
Given a output directory from a ColabFold run, traverses the directory structure and stage
the same MSA and templates to run through Chai1.
the same MSA and templates to run through Chai1. This is likely not applicable unless you are
incorporating Chai1 into adhoc analyses based on ColabFold outputs.
Some minimal example:
Expand Down

0 comments on commit 91d01cd

Please sign in to comment.