Skip to content

Commit

Permalink
Merge pull request #345 from Clinical-Genomics/fix_summary
Browse files Browse the repository at this point in the history
d4_genes_condensed_summary to return nan & inf coverage values when error occurs
  • Loading branch information
northwestwitch authored Aug 23, 2024
2 parents 74574de + 93ff116 commit 75ca2a6
Show file tree
Hide file tree
Showing 5 changed files with 37 additions and 5 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
- Replaced deprecated Pydantic `parse_obj` method with `model_validate`
- Report and genes overview endpoints accept only POST requests with form data now (application/x-www-form-urlencoded) - no json
- Sort alphabetically the list genes that are incompletely covered on report page
- `d4_genes_condensed_summary` coverage endpoint will not convert `nan` or `inf` coverage values to None, but to str(value)
### Fixed
- Updated dependencies including `certifi` to address dependabot alert
- Update pytest to v.7.4.4 to address a `ReDoS` vulnerability
Expand Down
7 changes: 3 additions & 4 deletions src/chanjo2/endpoints/coverage.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
get_d4tools_intervals_mean_coverage,
get_samples_sex_metrics,
)
from chanjo2.meta.handle_report_contents import INTERVAL_TYPE_SQL_TYPE
from chanjo2.meta.handle_report_contents import INTERVAL_TYPE_SQL_TYPE, get_mean
from chanjo2.models import SQLGene
from chanjo2.models.pydantic_models import (
CoverageSummaryQuery,
Expand Down Expand Up @@ -187,10 +187,9 @@ def d4_genes_condensed_summary(
value[query.coverage_threshold] * 100
for value in genes_coverage_completeness.values()
]

condensed_stats[sample.name] = {
"mean_coverage": (
round(mean(genes_mean_coverage), 2) if genes_mean_coverage else "NA"
),
"mean_coverage": get_mean(float_list=genes_mean_coverage),
"coverage_completeness_percent": (
round(mean(genes_coverage_completeness_values), 2)
if genes_coverage_completeness_values
Expand Down
11 changes: 11 additions & 0 deletions src/chanjo2/meta/handle_report_contents.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from collections import OrderedDict
from statistics import mean
from typing import Dict, List, Optional, Tuple, Union

from sqlalchemy.orm import Session
Expand Down Expand Up @@ -29,6 +30,16 @@
#### Functions used by all reports ####


def get_mean(float_list: List[float], round_by: int = 2) -> Union[float, str]:
"""Return the mean value from a list of floating point numbers, or a string when the value can't be converted to number."""
if float_list:
mean_value = round(mean(float_list), round_by)
else:
mean_value = "NA"

return mean_value if str(mean_value).split(".")[0].isdigit() else str(mean_value)


def get_ordered_levels(threshold_levels: List[int]) -> OrderedDict:
"""Returns the coverage threshold levels as an ordered dictionary."""
report_levels = OrderedDict()
Expand Down
2 changes: 1 addition & 1 deletion tests/src/chanjo2/endpoints/test_coverage.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ def test_d4_genes_coverage_summary(
condensed_summary = response.json()
# And return the expected data
assert condensed_summary[DEMO_SAMPLE["name"]]["coverage_completeness_percent"] > 0
assert condensed_summary[DEMO_SAMPLE["name"]]["mean_coverage"] == "whatsthis"
assert condensed_summary[DEMO_SAMPLE["name"]]["mean_coverage"] > 0


def test_get_samples_predicted_sex(
Expand Down
21 changes: 21 additions & 0 deletions tests/src/chanjo2/meta/test_handle_report_contents.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from chanjo2.demo import DEMO_COVERAGE_QUERY_FORM
from chanjo2.meta.handle_report_contents import (
get_mean,
get_missing_genes_from_db,
get_report_data,
)
Expand All @@ -29,6 +30,26 @@
]


def test_get_mean_floats():
"""Test invoking the get_mean function with a list of floating point numbers."""

# GIVEN a list of float numbers
value_list = [12.81, 34.72, 22.53]

# get_mean should return a float
assert isinstance(get_mean(float_list=value_list), float)


def test_get_mean_inf():
"""Test invoking the get_mean function with a list of numbers that contains a non-number."""

# GIVEN a list of floats and an inf
value_list = [float("inf"), 7, 45.22]

# get_mean should return a string
assert get_mean(float_list=value_list) == "inf"


def test_get_missing_genes_from_db(
demo_session: sessionmaker, demo_genes_37: List[SQLGene]
):
Expand Down

0 comments on commit 75ca2a6

Please sign in to comment.