diff --git a/CHANGELOG.md b/CHANGELOG.md index 5587cc91..92ac3b5a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ - Replaced deprecated Pydantic `parse_obj` method with `model_validate` - Report and genes overview endpoints accept only POST requests with form data now (application/x-www-form-urlencoded) - no json - Sort alphabetically the list genes that are incompletely covered on report page +- `d4_genes_condensed_summary` coverage endpoint will not convert `nan` or `inf` coverage values to None, but to str(value) ### Fixed - Updated dependencies including `certifi` to address dependabot alert - Update pytest to v.7.4.4 to address a `ReDoS` vulnerability diff --git a/src/chanjo2/endpoints/coverage.py b/src/chanjo2/endpoints/coverage.py index ff21a18f..edafa41d 100644 --- a/src/chanjo2/endpoints/coverage.py +++ b/src/chanjo2/endpoints/coverage.py @@ -22,7 +22,7 @@ get_d4tools_intervals_mean_coverage, get_samples_sex_metrics, ) -from chanjo2.meta.handle_report_contents import INTERVAL_TYPE_SQL_TYPE +from chanjo2.meta.handle_report_contents import INTERVAL_TYPE_SQL_TYPE, get_mean from chanjo2.models import SQLGene from chanjo2.models.pydantic_models import ( CoverageSummaryQuery, @@ -187,10 +187,9 @@ def d4_genes_condensed_summary( value[query.coverage_threshold] * 100 for value in genes_coverage_completeness.values() ] + condensed_stats[sample.name] = { - "mean_coverage": ( - round(mean(genes_mean_coverage), 2) if genes_mean_coverage else "NA" - ), + "mean_coverage": get_mean(float_list=genes_mean_coverage), "coverage_completeness_percent": ( round(mean(genes_coverage_completeness_values), 2) if genes_coverage_completeness_values diff --git a/src/chanjo2/meta/handle_report_contents.py b/src/chanjo2/meta/handle_report_contents.py index bcdc6762..78303c9f 100644 --- a/src/chanjo2/meta/handle_report_contents.py +++ b/src/chanjo2/meta/handle_report_contents.py @@ -1,4 +1,5 @@ from collections import OrderedDict +from statistics import mean from typing import Dict, List, Optional, Tuple, Union from sqlalchemy.orm import Session @@ -29,6 +30,16 @@ #### Functions used by all reports #### +def get_mean(float_list: List[float], round_by: int = 2) -> Union[float, str]: + """Return the mean value from a list of floating point numbers, or a string when the value can't be converted to number.""" + if float_list: + mean_value = round(mean(float_list), round_by) + else: + mean_value = "NA" + + return mean_value if str(mean_value).split(".")[0].isdigit() else str(mean_value) + + def get_ordered_levels(threshold_levels: List[int]) -> OrderedDict: """Returns the coverage threshold levels as an ordered dictionary.""" report_levels = OrderedDict() diff --git a/tests/src/chanjo2/endpoints/test_coverage.py b/tests/src/chanjo2/endpoints/test_coverage.py index 52c14d74..29270435 100644 --- a/tests/src/chanjo2/endpoints/test_coverage.py +++ b/tests/src/chanjo2/endpoints/test_coverage.py @@ -229,7 +229,7 @@ def test_d4_genes_coverage_summary( condensed_summary = response.json() # And return the expected data assert condensed_summary[DEMO_SAMPLE["name"]]["coverage_completeness_percent"] > 0 - assert condensed_summary[DEMO_SAMPLE["name"]]["mean_coverage"] == "whatsthis" + assert condensed_summary[DEMO_SAMPLE["name"]]["mean_coverage"] > 0 def test_get_samples_predicted_sex( diff --git a/tests/src/chanjo2/meta/test_handle_report_contents.py b/tests/src/chanjo2/meta/test_handle_report_contents.py index 4a771be3..d971e124 100644 --- a/tests/src/chanjo2/meta/test_handle_report_contents.py +++ b/tests/src/chanjo2/meta/test_handle_report_contents.py @@ -4,6 +4,7 @@ from chanjo2.demo import DEMO_COVERAGE_QUERY_FORM from chanjo2.meta.handle_report_contents import ( + get_mean, get_missing_genes_from_db, get_report_data, ) @@ -29,6 +30,26 @@ ] +def test_get_mean_floats(): + """Test invoking the get_mean function with a list of floating point numbers.""" + + # GIVEN a list of float numbers + value_list = [12.81, 34.72, 22.53] + + # get_mean should return a float + assert isinstance(get_mean(float_list=value_list), float) + + +def test_get_mean_inf(): + """Test invoking the get_mean function with a list of numbers that contains a non-number.""" + + # GIVEN a list of floats and an inf + value_list = [float("inf"), 7, 45.22] + + # get_mean should return a string + assert get_mean(float_list=value_list) == "inf" + + def test_get_missing_genes_from_db( demo_session: sessionmaker, demo_genes_37: List[SQLGene] ):