Skip to content

Commit

Permalink
Fix some more code
Browse files Browse the repository at this point in the history
  • Loading branch information
Chiara Rasi committed Jan 15, 2025
1 parent 3b57562 commit 87450c8
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 13 deletions.
10 changes: 2 additions & 8 deletions src/chanjo2/crud/intervals.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,28 +180,22 @@ def get_ensembl_gene_ids_from_gene_filter(filter_value, filter_column):
genes = (
db.query(SQLGene.ensembl_ids).filter(filter_column.in_(filter_value)).all()
)
return [gene.ensembl_ids for gene in genes]
return [ensembl_id for gene in genes for ensembl_id in gene.ensembl_ids]

# Handle filtering based on ensembl_ids, ensembl_gene_ids, hgnc_ids, or hgnc_symbols
if ensembl_ids:
intervals = intervals.filter(
func.json_contains(SQLGene.ensembl_ids, func.json_array(*ensembl_ids))
)
elif ensembl_gene_ids:
intervals = intervals.filter(
interval_type.ensembl_gene_id.in_(ensembl_gene_ids)
)
elif hgnc_ids:
ensembl_gene_ids = get_ensembl_gene_ids_from_gene_filter(
hgnc_ids, SQLGene.hgnc_id
)
intervals = intervals.filter(
interval_type.ensembl_gene_id.in_(ensembl_gene_ids)
)
elif hgnc_symbols:
ensembl_gene_ids = get_ensembl_gene_ids_from_gene_filter(
hgnc_symbols, SQLGene.hgnc_symbol
)
if ensembl_gene_ids:
intervals = intervals.filter(
interval_type.ensembl_gene_id.in_(ensembl_gene_ids)
)
Expand Down
16 changes: 11 additions & 5 deletions src/chanjo2/meta/handle_load_intervals.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,13 @@ def read_resource_lines(build: Builds, interval_type: IntervalType) -> Iterator[
return response.iter_lines(decode_unicode=True)


def _replace_empty_cols(line: str) -> List[Union[str, None]]:
def _replace_empty_cols(line: str, nr_expected_columns: int) -> List[Union[str, None]]:
"""Split line into columns, replacing empty columns with None values."""
return [None if cell == "" else cell for cell in line.split("\t")]
cols = [None if cell == "" else cell for cell in line.split("\t")]

# Make sure that expected nr of cols are returned if last cols are blank
cols += [None] * (nr_expected_columns - len(cols))
return cols


async def update_genes(
Expand Down Expand Up @@ -93,7 +97,7 @@ def update_or_insert_gene(session, sql_gene):
if line == END_OF_PARSED_FILE:
break

items: List = _replace_empty_cols(line=line)
items: List = _replace_empty_cols(line=line, nr_expected_columns=len(header))

try:
sql_gene = SQLGene(
Expand Down Expand Up @@ -151,7 +155,9 @@ async def update_transcripts(
if line == END_OF_PARSED_FILE:
break

items: List = _replace_empty_cols(line=line)
items: List = _replace_empty_cols(line=line, nr_expected_columns=len(header))
LOG.warning(header)
LOG.warning(items)

try:
transcript = TranscriptBase(
Expand Down Expand Up @@ -216,7 +222,7 @@ async def update_exons(
if line == END_OF_PARSED_FILE:
break

items: List = _replace_empty_cols(line=line)
items: List = _replace_empty_cols(line=line, nr_expected_columns=len(header))

try:
# Load Exon interval into the database
Expand Down

0 comments on commit 87450c8

Please sign in to comment.