Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add sex check #1516

Open
wants to merge 41 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
978bf0b
begin adding sex check
mathiasbio Dec 20, 2024
f9fdf84
update script
mathiasbio Dec 20, 2024
d7f00f2
update script
mathiasbio Jan 2, 2025
e15ac96
refactoring
mathiasbio Jan 2, 2025
ce1ef2a
refactoring black
mathiasbio Jan 2, 2025
68579a5
add sex check
mathiasbio Jan 3, 2025
366f8f9
fix format
mathiasbio Jan 3, 2025
aeaf0c6
add wgs sex check
mathiasbio Jan 3, 2025
902d45f
add sex check to qc metrics
mathiasbio Jan 3, 2025
803f099
add sex check for wgs
mathiasbio Jan 3, 2025
f57f940
add wgs tumor only
mathiasbio Jan 7, 2025
8195cf1
black
mathiasbio Jan 7, 2025
29a3c1a
fix bugs
mathiasbio Jan 7, 2025
34c9663
changelog
mathiasbio Jan 7, 2025
f257381
fix code
mathiasbio Jan 7, 2025
dbd3159
black
mathiasbio Jan 7, 2025
50af315
refactor
mathiasbio Jan 7, 2025
f08533b
black
mathiasbio Jan 7, 2025
b68fff6
add pytests to new scripts
mathiasbio Jan 7, 2025
7fb7e31
add additional pytest for final qc
mathiasbio Jan 7, 2025
eaee329
black
mathiasbio Jan 7, 2025
02ba985
fix bug
mathiasbio Jan 7, 2025
04e3959
fix
mathiasbio Jan 8, 2025
417685a
make metrics model work with strings too
mathiasbio Jan 8, 2025
e29a79b
fix pytests
mathiasbio Jan 9, 2025
a312c69
black
mathiasbio Jan 9, 2025
fb8b4cf
Merge branch 'develop' into add_sex_check
mathiasbio Jan 9, 2025
976dbec
fix issues
mathiasbio Jan 9, 2025
55ebb77
Merge branch 'add_sex_check' of github.com:Clinical-Genomics/BALSAMIC…
mathiasbio Jan 9, 2025
8b67ee7
fix issues
mathiasbio Jan 9, 2025
5f24021
fix
mathiasbio Jan 9, 2025
ceceef2
fix
mathiasbio Jan 9, 2025
4c226e7
add new pytest
mathiasbio Jan 9, 2025
e95fccd
fix
mathiasbio Jan 10, 2025
5b4938d
switch from ascat to per base coverage for wgs tn, and remove case_sex
mathiasbio Jan 10, 2025
f62f8aa
black
mathiasbio Jan 10, 2025
41f7a38
fix
mathiasbio Jan 10, 2025
b16557b
replace sex prediction json files
mathiasbio Jan 10, 2025
706e736
replace with tn female sex prediction
mathiasbio Jan 10, 2025
d50c524
fix pytests
mathiasbio Jan 10, 2025
fc59c9e
black and docstring
mathiasbio Jan 10, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 49 additions & 0 deletions BALSAMIC/assets/scripts/collect_qc_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,17 @@
@click.argument("output_path", type=click.Path(exists=False), required=True)
@click.argument("multiqc_data_path", type=click.Path(exists=True), required=True)
@click.argument("counts_path", nargs=-1, type=click.Path(exists=True), required=False)
@click.option(
"--sex-prediction-path",
type=click.Path(exists=True),
required=False,
help="Path to sex prediction json (optional for balsamic-qc)",
)
def collect_qc_metrics(
config_path: Path,
output_path: Path,
multiqc_data_path: Path,
sex_prediction_path: Path,
counts_path: List[Path],
):
"""Extracts the requested metrics from a JSON multiqc file and saves them to a YAML file
Expand All @@ -37,6 +44,7 @@ def collect_qc_metrics(
config_path: Path; case config file path
output_path: Path; destination path for the extracted YAML formatted metrics
multiqc_data_path: Path; multiqc JSON path from which the metrics will be extracted
sex_prediction_path: Path; sex prediction JSON path from which sex prediction info will be extracted
counts_path: Path; list of variant caller specific files containing the number of variants
"""

Expand All @@ -50,6 +58,10 @@ def collect_qc_metrics(
for count in counts_path:
metrics += get_variant_metrics(count)

# Sex check
if sex_prediction_path:
metrics += get_sex_check_metrics(sex_prediction_path, config)

# Relatedness
analysis_type = get_analysis_type(config)
if analysis_type == "paired" and "Somalier" in multiqc_data["report_data_sources"]:
Expand Down Expand Up @@ -108,6 +120,43 @@ def get_multiqc_data_source(multiqc_data: dict, sample: str, tool: str) -> str:
)


def get_sex_check_metrics(sex_prediction_path: str, config: dict) -> list:
"""Retrieves the sex check metrics and returns them as a Metric list."""
metric = "compare_predicted_to_given_sex"
case_id: str = config["analysis"]["case_id"]
sex_prediction = read_json(sex_prediction_path)

given_sex: str = config["analysis"]["gender"]
tumor_predicted_sex: str = sex_prediction["tumor"]["predicted_sex"]

tumor_sex_prediction_metrics = Metric(
id=f"{case_id}_tumor",
input=os.path.basename(sex_prediction_path),
name=metric.upper(),
step="sex_check",
value=tumor_predicted_sex,
condition={"norm": "eq", "threshold": given_sex},
).model_dump()

sex_check_metrics = [tumor_sex_prediction_metrics]

if "normal" in sex_prediction:
normal_predicted_sex: str = sex_prediction["normal"]["predicted_sex"]

normal_sex_prediction_metrics = Metric(
id=f"{case_id}_normal",
input=os.path.basename(sex_prediction_path),
name=metric.upper(),
step="sex_check",
value=normal_predicted_sex,
condition={"norm": "eq", "threshold": given_sex},
).model_dump()

sex_check_metrics.append(normal_sex_prediction_metrics)

return sex_check_metrics


def get_relatedness_metrics(multiqc_data: dict) -> list:
"""Retrieves the relatedness metrics and returns them as a Metric list."""
source_tool = "Somalier"
Expand Down
Loading