Add a test for measures where the interval population is dependent on…

… the specific interval Tests the case where an interval denominator is dependent on the specific interval, and the union of all denominators would not capture the full population. If we want to extract variables that don't vary with the interval (e.g. sex, ethnicity), we can get the variable definition for them, but we'd need a population to extract it for. We could define a population that covers the entire interval period, from first start date to first end date, which would work in many cases, but would not work if the interval population is dependent on the specific interval in some way. This test would catch that erroneous assumption.
opensafely-core · Jan 29, 2025 · 7d46cc3 · 7d46cc3
1 parent 5f61a34
commit 7d46cc3
Showing 1 changed file with 67 additions and 0 deletions.
diff --git a/tests/integration/measures/test_calculate.py b/tests/integration/measures/test_calculate.py
@@ -101,6 +101,73 @@ def test_get_measure_results(engine):
     assert set(results) == set(expected)
 
 
+def test_get_measures_interval_dependent_denominator(engine):
+    # Test results when an interval denominator is dependent on the specific interval
+    # (i.e. values in other intervals affect the inclusion in this interval population)
+    # i.e. the union of all measure denominators will exclude some patients
+    intervals = years(2).starting_on("2020-01-01")
+    measures = Measures()
+
+    is_female = patients.sex == "female"
+    had_event_in_interval = events.where(
+        events.date.is_during(INTERVAL)
+    ).exists_for_patient()
+    had_event_outside_interval = events.where(
+        events.date.is_before(INTERVAL.start_date)
+        | events.date.is_after(INTERVAL.end_date)
+    ).exists_for_patient()
+    measures.define_measure(
+        "female_by_events_outside_interval_only",
+        numerator=is_female,
+        denominator=had_event_outside_interval & ~(had_event_in_interval),
+        intervals=intervals,
+    )
+
+    patient_data = [
+        dict(patient_id=1, sex="male"),
+        dict(patient_id=2, sex="female"),
+        dict(patient_id=3, sex="male"),
+        dict(patient_id=4, sex="female"),
+    ]
+    event_data = [
+        # Interval 1 includes only patient 2 (female) in the population (has an event in interval 2 only)
+        # Interval 2 includes only patient 1 (male) in the population (has an event in interval 1 only)
+        dict(patient_id=1, code="abc", date=date(2020, 2, 1)),
+        dict(patient_id=2, code="abc", date=date(2021, 2, 1)),
+        # Patient 3 and 4 have events in both intervals, so aren't included in the population for
+        # either
+        dict(patient_id=3, code="abc", date=date(2020, 2, 1)),
+        dict(patient_id=4, code="abc", date=date(2020, 2, 1)),
+        dict(patient_id=3, code="abc", date=date(2021, 2, 1)),
+        dict(patient_id=4, code="abc", date=date(2021, 2, 1)),
+    ]
+    engine.populate({patients: patient_data, events: event_data})
+    results = get_measure_results(engine.query_engine(), measures)
+
+    expected = [
+        # interval 1 has 1 female patient in the population - numerator 1, denominator 1
+        (
+            "female_by_events_outside_interval_only",
+            date(2020, 1, 1),
+            date(2020, 12, 31),
+            1.0,
+            1,
+            1,
+        ),
+        # interval 2 has 1 male patient in the population - numerator 0, denominator 1
+        (
+            "female_by_events_outside_interval_only",
+            date(2021, 1, 1),
+            date(2021, 12, 31),
+            0.0,
+            0,
+            1,
+        ),
+    ]
+
+    assert set(results) == set(expected)
+
+
 @mock.patch("ehrql.measures.calculate.time")
 def test_get_measure_results_with_timeout(patched_time, in_memory_engine):
     events_in_interval = events.where(events.date.is_during(INTERVAL))