Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MNT] Configure mypy in pre-commit for type checking #423

Merged
merged 6 commits into from
Jan 25, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,14 @@ repos:
- id: codespell
additional_dependencies:
- tomli

- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.14.1
hooks:
- id: mypy
args:
- --ignore-missing-imports
- --no-warn-no-return
- --warn-redundant-casts
- --show-error-codes
- --pretty
16 changes: 7 additions & 9 deletions bagel/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def pheno(
tool_mapping = pheno_utils.map_tools_to_columns(data_dictionary)

# TODO: needs refactoring once we handle multiple participant IDs
participants = column_mapping.get("participant")[0]
participants = column_mapping["participant"][0]

# Note that `session_column will be None if there is no session column in the pheno.tsv
session_column = column_mapping.get("session")
Expand Down Expand Up @@ -273,12 +273,14 @@ def bids(
print("Initial checks of inputs passed.\n")

print("Parsing and validating BIDS dataset. This may take a while...")
# NOTE: If there are no subjects in the BIDS dataset, the validation should fail.
# The rest of this workflow assumes there's at least one subject in the BIDS dataset.
layout = BIDSLayout(bids_dir, validate=True)
print("BIDS parsing completed.\n")

print("Merging BIDS metadata with existing subject annotations...\n")
for bids_sub_id in layout.get_subjects():
existing_subject = existing_subs_dict.get(f"sub-{bids_sub_id}")
existing_subject = existing_subs_dict[f"sub-{bids_sub_id}"]
existing_sessions_dict = model_utils.get_imaging_session_instances(
existing_subject
)
Expand Down Expand Up @@ -323,9 +325,7 @@ def bids(
# If a custom Neurobagel-created session already exists (if `bagel derivatives` was run first),
# we add to that session when there is no session layer in the BIDS directory
if session_label in existing_sessions_dict:
existing_img_session = existing_sessions_dict.get(
session_label
)
existing_img_session = existing_sessions_dict[session_label]
existing_img_session.hasAcquisition = image_list
existing_img_session.hasFilePath = session_path
else:
Expand Down Expand Up @@ -434,7 +434,7 @@ def derivatives(
for subject, sub_proc_df in status_df.groupby(
PROC_STATUS_COLS["participant"]
):
existing_subject = existing_subs_dict.get(subject)
existing_subject = existing_subs_dict[subject]

# Note: Dictionary of existing imaging sessions can be empty if only bagel pheno was run
existing_sessions_dict = model_utils.get_imaging_session_instances(
Expand All @@ -455,9 +455,7 @@ def derivatives(
CUSTOM_SESSION_LABEL if session_label == "" else session_label
)
if session_label in existing_sessions_dict:
existing_img_session = existing_sessions_dict.get(
session_label
)
existing_img_session = existing_sessions_dict[session_label]
existing_img_session.hasCompletedPipeline = completed_pipelines
else:
new_img_session = models.ImagingSession(
Expand Down
6 changes: 3 additions & 3 deletions bagel/utilities/file_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import json
from pathlib import Path
from typing import Optional
from typing import Any

import pandas as pd
import typer
Expand All @@ -18,7 +18,7 @@ def file_encoding_error_message(input_p: Path) -> str:

def load_tabular(
input_p: Path, input_type: str = "phenotypic"
) -> Optional[pd.DataFrame]:
) -> pd.DataFrame:
"""Load a .tsv pheno file and do some basic validation of the file type."""
if input_p.suffix == ".tsv":
try:
Expand Down Expand Up @@ -61,7 +61,7 @@ def load_tabular(
)


def load_json(input_p: Path) -> dict:
def load_json(input_p: Path) -> Any:
"""Load a user-specified json type file."""
try:
with open(input_p, "r", encoding="utf-8") as f:
Expand Down
6 changes: 4 additions & 2 deletions bagel/utilities/model_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,9 @@ def extract_and_validate_jsonld_dataset(file_path: Path) -> models.Dataset:
return jsonld_dataset


def get_subject_instances(dataset: models.Dataset) -> dict:
def get_subject_instances(
dataset: models.Dataset,
) -> dict[str, models.Subject]:
"""
Return a dictionary of subjects for a given Neurobagel dataset from JSONLD data,
where keys are subject labels and values are the subject objects.
Expand All @@ -109,7 +111,7 @@ def get_subject_instances(dataset: models.Dataset) -> dict:

def get_imaging_session_instances(
jsonld_subject: models.Subject,
) -> dict:
) -> dict[str, models.ImagingSession]:
"""
Return a dictionary of imaging sessions for a given subject from JSONLD data,
where the keys are the session labels and values are the session objects.
Expand Down
22 changes: 9 additions & 13 deletions bagel/utilities/pheno_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def find_deprecated_namespaces(namespaces: list) -> list:
return [ns for ns in namespaces if ns in DEPRECATED_NAMESPACE_PREFIXES]


def map_categories_to_columns(data_dict: dict) -> dict:
def map_categories_to_columns(data_dict: dict) -> dict[str, list]:
"""
Maps all pre-defined Neurobagel categories (e.g. "Sex") to a list containing all column names (if any) that
have been linked to this category.
Expand All @@ -142,7 +142,7 @@ def map_categories_to_columns(data_dict: dict) -> dict:
}


def map_tools_to_columns(data_dict: dict) -> dict:
def map_tools_to_columns(data_dict: dict) -> dict[str, list]:
"""
Return a mapping of all assessment tools described in the data dictionary to the columns that
are mapped to it.
Expand Down Expand Up @@ -184,7 +184,6 @@ def get_age_heuristic(column: str, data_dict: dict) -> str:


def transform_age(value: str, heuristic: str) -> float:
is_recognized_heuristic = True
try:
if heuristic in [
AGE_HEURISTICS["float"],
Expand All @@ -200,26 +199,23 @@ def transform_age(value: str, heuristic: str) -> float:
value = "P" + value
duration = isodate.parse_duration(value)
return float(duration.years + duration.months / 12)
else:
is_recognized_heuristic = False
raise ValueError(
f"The provided data dictionary contains an unrecognized age transformation: {heuristic}. "
f"Ensure that the transformation TermURL is one of {list(AGE_HEURISTICS.values())}."
)
except (ValueError, isodate.isoerror.ISO8601Error) as e:
raise ValueError(
f"There was a problem with applying the age transformation: {heuristic}. Error: {str(e)}\n"
f"Check that the transformation specified in the data dictionary ({heuristic}) is correct for the age values in your phenotypic file, "
"and that you correctly annotated any missing values in your age column."
) from e
if not is_recognized_heuristic:
raise ValueError(
f"The provided data dictionary contains an unrecognized age transformation: {heuristic}. "
f"Ensure that the transformation TermURL is one of {list(AGE_HEURISTICS.values())}."
)


def get_transformed_values(
columns: list, row: pd.Series, data_dict: dict
) -> list:
"""Convert a list of raw phenotypic values to the corresponding controlled terms, from columns that have not been annotated as being about an assessment tool."""
transf_vals = []
transf_vals: list[float | str] = []
for col in columns:
value = row[col]
if is_missing_value(value, col, data_dict):
Expand Down Expand Up @@ -290,7 +286,7 @@ def are_inputs_compatible(data_dict: dict, pheno_df: pd.DataFrame) -> bool:

def find_undefined_cat_col_values(
data_dict: dict, pheno_df: pd.DataFrame
) -> dict:
) -> dict[str, list]:
"""
Checks that all categorical column values have annotations. Returns a dictionary containing
any categorical column names and specific column values not defined in the corresponding data
Expand All @@ -314,7 +310,7 @@ def find_undefined_cat_col_values(

def find_unused_missing_values(
data_dict: dict, pheno_df: pd.DataFrame
) -> dict:
) -> dict[str, list]:
"""
Checks if missing values annotated in the data dictionary appear at least once in the phenotypic file.
Returns a dictionary containing any column names and annotated missing values not found in the phenotypic
Expand Down
14 changes: 9 additions & 5 deletions tests/unit/test_derivative_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def test_raises_exception_if_remote_and_local_pipeline_catalog_fails(
monkeypatch, tmp_path
):
"""
If I cannot get the pipeline catalog from the remote location and the local backup, I should raise an exception.
If I cannot get the pipeline catalog from both the remote location and the local backup, I should raise an exception.
"""
nonsense_url = "https://does.not.exist.url"

Expand All @@ -59,11 +59,15 @@ def mock_httpx_get(*args, **kwargs):

monkeypatch.setattr(httpx, "get", mock_httpx_get)

with pytest.raises(FileNotFoundError) as e:
mappings.get_pipeline_catalog(
url=nonsense_url, path=tmp_path / "does_not_exist.json"
)
with pytest.warns(
UserWarning, match="Unable to download pipeline catalog"
) as w:
with pytest.raises(FileNotFoundError) as e:
mappings.get_pipeline_catalog(
url=nonsense_url, path=tmp_path / "does_not_exist.json"
)

assert len(w) == 1
assert "Have you correctly initialized the submodules" in str(e.value)


Expand Down
Loading