Skip to content

Commit

Permalink
Add a test for measures where the interval population is dependent on…
Browse files Browse the repository at this point in the history
… the specific interval

Tests the case where an interval denominator is dependent on the specific
interval, and the union of all denominators would not capture the full
population.

If we want to extract variables that don't vary with the interval (e.g.
sex, ethnicity), we can get the variable definition for them, but we'd
need a population to extract it for. We could define a population that
covers the entire interval period, from first start date to first end
date, which would work in many cases, but would not work if the
interval population is dependent on the specific interval in some way.
This test would catch that erroneous assumption.
  • Loading branch information
rebkwok committed Jan 29, 2025
1 parent 5f61a34 commit 7d46cc3
Showing 1 changed file with 67 additions and 0 deletions.
67 changes: 67 additions & 0 deletions tests/integration/measures/test_calculate.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,73 @@ def test_get_measure_results(engine):
assert set(results) == set(expected)


def test_get_measures_interval_dependent_denominator(engine):
# Test results when an interval denominator is dependent on the specific interval
# (i.e. values in other intervals affect the inclusion in this interval population)
# i.e. the union of all measure denominators will exclude some patients
intervals = years(2).starting_on("2020-01-01")
measures = Measures()

is_female = patients.sex == "female"
had_event_in_interval = events.where(
events.date.is_during(INTERVAL)
).exists_for_patient()
had_event_outside_interval = events.where(
events.date.is_before(INTERVAL.start_date)
| events.date.is_after(INTERVAL.end_date)
).exists_for_patient()
measures.define_measure(
"female_by_events_outside_interval_only",
numerator=is_female,
denominator=had_event_outside_interval & ~(had_event_in_interval),
intervals=intervals,
)

patient_data = [
dict(patient_id=1, sex="male"),
dict(patient_id=2, sex="female"),
dict(patient_id=3, sex="male"),
dict(patient_id=4, sex="female"),
]
event_data = [
# Interval 1 includes only patient 2 (female) in the population (has an event in interval 2 only)
# Interval 2 includes only patient 1 (male) in the population (has an event in interval 1 only)
dict(patient_id=1, code="abc", date=date(2020, 2, 1)),
dict(patient_id=2, code="abc", date=date(2021, 2, 1)),
# Patient 3 and 4 have events in both intervals, so aren't included in the population for
# either
dict(patient_id=3, code="abc", date=date(2020, 2, 1)),
dict(patient_id=4, code="abc", date=date(2020, 2, 1)),
dict(patient_id=3, code="abc", date=date(2021, 2, 1)),
dict(patient_id=4, code="abc", date=date(2021, 2, 1)),
]
engine.populate({patients: patient_data, events: event_data})
results = get_measure_results(engine.query_engine(), measures)

expected = [
# interval 1 has 1 female patient in the population - numerator 1, denominator 1
(
"female_by_events_outside_interval_only",
date(2020, 1, 1),
date(2020, 12, 31),
1.0,
1,
1,
),
# interval 2 has 1 male patient in the population - numerator 0, denominator 1
(
"female_by_events_outside_interval_only",
date(2021, 1, 1),
date(2021, 12, 31),
0.0,
0,
1,
),
]

assert set(results) == set(expected)


@mock.patch("ehrql.measures.calculate.time")
def test_get_measure_results_with_timeout(patched_time, in_memory_engine):
events_in_interval = events.where(events.date.is_during(INTERVAL))
Expand Down

0 comments on commit 7d46cc3

Please sign in to comment.