From fd57daa7e147c7ea4c6b06510f3d018c59ff2d21 Mon Sep 17 00:00:00 2001 From: Tom Ward Date: Thu, 12 Dec 2024 16:18:29 +0000 Subject: [PATCH] Reorganise examples page (#2314) Reorganise examples page * some examples use multiple tables, in this case I have used what seems to be the most relevant table (e.g. get the first medication event for a patient with a valid registration => medication) * add links in both directions between the examples and the table reference * I think this examples page should contrast with e.g. the cheatsheet page by doing less of providing composable blocks (cheatsheet) & more concrete examples that could be adopted wholesale. Currently the examples page introduces reusable concepts across multiple tables in a slightly ad-hoc way - I think it should do less of this & instead provide a specific (and ideally useful) example for each table. --- docs/how-to/examples.md | 531 ++++++++++--------- docs/includes/generated_docs/schemas/core.md | 10 +- docs/includes/generated_docs/schemas/emis.md | 8 +- docs/includes/generated_docs/schemas/tpp.md | 16 +- ehrql/tables/core.py | 9 +- ehrql/tables/emis.py | 2 + ehrql/tables/tpp.py | 14 +- 7 files changed, 334 insertions(+), 256 deletions(-) diff --git a/docs/how-to/examples.md b/docs/how-to/examples.md index 60d1a928b..f42f00aa1 100644 --- a/docs/how-to/examples.md +++ b/docs/how-to/examples.md @@ -7,6 +7,10 @@ Or you can use the navigation bar at the top-right of this page, to see a list of the examples, and then jump to a specific example of interest. +The examples are organised firstly by the table which they pull data from - +for a more complete guide to the tables, refer to the +[Table Schemas](https://docs.opensafely.org/ehrql/reference/schemas/) section of the +ehrQL documentation. ## Understanding these examples @@ -56,9 +60,37 @@ If you include an argument for `category_column`, the codelist returned will be You can see an example of [how to access these categories within your dataset definition ](#finding-each-patients-ethnicity) below. -## Finding patient demographics +## Patients -### Finding each patient's age +Examples for the [patients table](../../reference/schemas/core/#patients). + +### Finding patient demographics + +#### Finding each patient's sex + +```ehrql +from ehrql import create_dataset +from ehrql.tables.core import patients + +dataset = create_dataset() +dataset.sex = patients.sex +dataset.define_population(patients.exists_for_patient()) +``` + +The possible values are "female", "male", "intersex", and "unknown". + +#### Finding each patient's date of birth + +```ehrql +from ehrql import create_dataset +from ehrql.tables.core import patients + +dataset = create_dataset() +dataset.date_of_birth = patients.date_of_birth +dataset.define_population(patients.exists_for_patient()) +``` + +#### Finding each patient's age ```ehrql from ehrql import create_dataset @@ -93,7 +125,7 @@ dataset.age = patients.age_on(index_date) dataset.define_population(patients.exists_for_patient()) ``` -### Assigning each patient an age band +#### Assigning each patient an age band ```ehrql from ehrql import create_dataset, case, when @@ -112,18 +144,7 @@ dataset.age_band = case( dataset.define_population(patients.exists_for_patient()) ``` -### Finding each patient's date of birth - -```ehrql -from ehrql import create_dataset -from ehrql.tables.core import patients - -dataset = create_dataset() -dataset.date_of_birth = patients.date_of_birth -dataset.define_population(patients.exists_for_patient()) -``` - -### Finding each patient's date of death in their primary care record +#### Finding each patient's date of death in their primary care record ```ehrql from ehrql import create_dataset @@ -136,8 +157,13 @@ dataset.define_population(patients.exists_for_patient()) :notepad_spiral: This value comes from the patient's EHR record. You can find more information about the accuracy of this value in the [reference schema](../reference/schemas/core.md#recording-of-death-in-primary-care). +## ONS Deaths + +Examples for the [ons_deaths table](../../reference/schemas/core/#ons_deaths). + +### Finding patient demographics -### Finding each patient's date, underlying_cause_of_death, and first noted additional medical condition noted on the death certificate from ONS records +#### Finding each patient's date, underlying_cause_of_death, and first noted additional medical condition noted on the death certificate from ONS records ```ehrql from ehrql import create_dataset @@ -153,7 +179,7 @@ dataset.define_population(patients.exists_for_patient()) :notepad_spiral: There are currently [multiple](https://github.com/opensafely-core/ehrql/blob/d29ff8ab2cebf3522258c408f8225b7a76f7b6f2/ehrql/tables/beta/core.py#L78-L92) cause of death fields. We aim to resolve these to a single feature in the future. -### Finding patients with a particular cause of death +#### Finding patients with a particular cause of death The `ons_deaths` table has multiple "cause of death" fields. Using the [`cause_of_death_is_in()`](../reference/schemas/core.md#ons_deaths.cause_of_death_is_in) @@ -171,53 +197,13 @@ dataset.died_with_X = ons_deaths.cause_of_death_is_in(cause_of_death_X_codelist) dataset.define_population(patients.exists_for_patient()) ``` +## Addresses -### Finding each patient's sex - -```ehrql -from ehrql import create_dataset -from ehrql.tables.core import patients - -dataset = create_dataset() -dataset.sex = patients.sex -dataset.define_population(patients.exists_for_patient()) -``` - -The possible values are "female", "male", "intersex", and "unknown". - -### Finding each patient's ethnicity - -Ethnicity can be defined using a codelist. There are a lot of individual codes that can used to indicate a patients' fine-grained ethnicity. To make analysis more manageable, ethnicity is therefore commonly grouped into higher level categories. Above, we described how you can [import codelists that have a category column](#some-examples-using-codelist_from_csv). You can use a codelist with a category column to map clinical event codes for ethnicity to higher level categories as in this example: - -```ehrql -from ehrql import create_dataset -from ehrql.tables.core import clinical_events, patients -from ehrql import codelist_from_csv - -dataset = create_dataset() - -ethnicity_codelist = codelist_from_csv( - "ethnicity_codelist_with_categories", - column="snomedcode", - category_column="Grouping_6", -) - -dataset.latest_ethnicity_code = ( - clinical_events.where(clinical_events.snomedct_code.is_in(ethnicity_codelist)) - .where(clinical_events.date.is_on_or_before("2023-01-01")) - .sort_by(clinical_events.date) - .last_for_patient() - .snomedct_code -) -dataset.latest_ethnicity_group = dataset.latest_ethnicity_code.to_category( - ethnicity_codelist -) -dataset.define_population(patients.exists_for_patient()) -``` +Examples for the [TPP addresses table](../../reference/schemas/tpp/#addresses). -## Finding attributes related to each patient's address as of a given date +### Finding attributes related to each patient's address as of a given date -### Finding each patient's IMD rank +#### Finding each patient's IMD rank ```ehrql from ehrql import create_dataset @@ -233,7 +219,7 @@ The rounded IMD ranking ranges from 0 to 32,800. See [this code comment](https://github.com/opensafely-core/ehrql/blob/d29ff8ab2cebf3522258c408f8225b7a76f7b6f2/ehrql/tables/beta/tpp.py#L117-L123) about how we choose one address if a patient has multiple registered addresses on the given date. -### Calculating each patient's IMD quintile and/or decile +#### Calculating each patient's IMD quintile and/or decile ```ehrql from ehrql import create_dataset @@ -247,7 +233,7 @@ dataset.imd_decile = patient_address.imd_decile dataset.define_population(patients.exists_for_patient()) ``` -### Finding each patient's rural/urban classification +#### Finding each patient's rural/urban classification ```ehrql from ehrql import create_dataset @@ -269,7 +255,7 @@ The meaning of this value is as follows: * 7 - Rural village and dispersed * 8 - Rural village and dispersed in a sparse setting -### Finding each patient's MSOA +#### Finding each patient's MSOA ```ehrql from ehrql import create_dataset @@ -280,7 +266,7 @@ dataset.msoa_code = addresses.for_patient_on("2023-01-01").msoa_code dataset.define_population(patients.exists_for_patient()) ``` -### Finding multiple attributes of each patient's address +#### Finding multiple attributes of each patient's address ```ehrql from ehrql import create_dataset @@ -294,9 +280,13 @@ dataset.msoa_code = address.msoa_code dataset.define_population(patients.exists_for_patient()) ``` -## Finding attributes related to each patient's GP practice as of a given date +## Practice Registrations + +Examples for the [practice_registrations table](../../reference/schemas/core/#practice_registrations). -### Finding each patient's practice's pseudonymised identifier +### Finding attributes related to each patient's GP practice as of a given date + +#### Finding each patient's practice's pseudonymised identifier ```ehrql from ehrql import create_dataset @@ -307,7 +297,7 @@ dataset.practice = practice_registrations.for_patient_on("2023-01-01").practice_ dataset.define_population(patients.exists_for_patient()) ``` -### Finding each patient's practice's STP +#### Finding each patient's practice's STP ```ehrql from ehrql import create_dataset @@ -318,7 +308,7 @@ dataset.stp = practice_registrations.for_patient_on("2023-01-01").practice_stp dataset.define_population(patients.exists_for_patient()) ``` -### Finding each patient's practice's region +#### Finding each patient's practice's region ```ehrql from ehrql import create_dataset @@ -329,7 +319,7 @@ dataset.region = practice_registrations.for_patient_on("2023-01-01").practice_nu dataset.define_population(patients.exists_for_patient()) ``` -### Finding multiple attributes of each patient's practice +#### Finding multiple attributes of each patient's practice ```ehrql from ehrql import create_dataset @@ -343,97 +333,111 @@ dataset.region = registration.practice_nuts1_region_name dataset.define_population(patients.exists_for_patient()) ``` -## Does each patient have an event matching some criteria? +#### Excluding patients based on study dates -### Does each patient have a clinical event matching a code in a codelist? +The following example ensures that the dataset only includes patients registered at a +single practice for the entire duration of the study, plus at least 3 months prior to the +study start. ```ehrql -from ehrql import create_dataset, codelist_from_csv -from ehrql.tables.core import clinical_events, patients +from ehrql import create_dataset, codelist_from_csv, months +from ehrql.tables.tpp import patients, practice_registrations -asthma_codelist = codelist_from_csv("XXX", column="YYY") +study_start_date = "2022-01-01" +study_end_date = "2022-12-31" dataset = create_dataset() -dataset.has_had_asthma_diagnosis = clinical_events.where( - clinical_events.snomedct_code.is_in(asthma_codelist) -).exists_for_patient() -dataset.define_population(patients.exists_for_patient()) + +# find registrations that exist for the full study period, and at least 3 months +# prior +registrations = ( + practice_registrations.where( + practice_registrations.start_date.is_on_or_before(study_start_date - months(3)) + ) + .except_where( + practice_registrations.end_date.is_on_or_before(study_end_date) + ) +) + +dataset.define_population(registrations.exists_for_patient()) ``` -### Does each patient have a clinical event matching a code in a codelist in a time period? +## Clinical Events -```ehrql -from ehrql import create_dataset, codelist_from_csv -from ehrql.tables.core import clinical_events, patients +Examples for the [clinical_events table](../../reference/schemas/core/#clinical_events). -asthma_codelist = codelist_from_csv("XXX", column="YYY") +### Finding patient demographics -dataset = create_dataset() -dataset.has_recent_asthma_diagnosis = clinical_events.where( - clinical_events.snomedct_code.is_in(asthma_codelist) -).where( - clinical_events.date.is_on_or_between("2022-07-01", "2023-01-01") -).exists_for_patient() -dataset.define_population(patients.exists_for_patient()) -``` +#### Finding each patient's ethnicity -### Does each patient have a medication event matching some criteria? +Ethnicity can be defined using a codelist. There are a lot of individual codes that can used to indicate a patients' fine-grained ethnicity. To make analysis more manageable, ethnicity is therefore commonly grouped into higher level categories. Above, we described how you can [import codelists that have a category column](#some-examples-using-codelist_from_csv). You can use a codelist with a category column to map clinical event codes for ethnicity to higher level categories as in this example: ```ehrql -from ehrql import create_dataset, codelist_from_csv -from ehrql.tables.core import medications, patients - -statin_medications = codelist_from_csv("XXX", column="YYY") +from ehrql import create_dataset +from ehrql.tables.core import clinical_events, patients +from ehrql import codelist_from_csv dataset = create_dataset() -dataset.has_recent_statin_prescription = medications.where( - medications.dmd_code.is_in(statin_medications) -).where( - medications.date.is_on_or_between("2022-07-01", "2023-01-01") -).exists_for_patient() + +ethnicity_codelist = codelist_from_csv( + "ethnicity_codelist_with_categories", + column="snomedcode", + category_column="Grouping_6", +) + +dataset.latest_ethnicity_code = ( + clinical_events.where(clinical_events.snomedct_code.is_in(ethnicity_codelist)) + .where(clinical_events.date.is_on_or_before("2023-01-01")) + .sort_by(clinical_events.date) + .last_for_patient() + .snomedct_code +) +dataset.latest_ethnicity_group = dataset.latest_ethnicity_code.to_category( + ethnicity_codelist +) dataset.define_population(patients.exists_for_patient()) ``` -### Does each patient have a hospitalisation event matching some criteria? +### Does each patient have an event matching some criteria? + +#### Does each patient have a clinical event matching a code in a codelist? ```ehrql from ehrql import create_dataset, codelist_from_csv -from ehrql.tables.tpp import apcs, patients +from ehrql.tables.core import clinical_events, patients -cardiac_diagnosis_codes = codelist_from_csv("XXX", column="YYY") +asthma_codelist = codelist_from_csv("XXX", column="YYY") dataset = create_dataset() -dataset.has_recent_cardiac_admission = apcs.where( - apcs.primary_diagnosis.is_in(cardiac_diagnosis_codes) -).where( - apcs.admission_date.is_on_or_between("2022-07-01", "2023-01-01") +dataset.has_had_asthma_diagnosis = clinical_events.where( + clinical_events.snomedct_code.is_in(asthma_codelist) ).exists_for_patient() dataset.define_population(patients.exists_for_patient()) ``` -## How many events does each patient have matching some criteria? +#### Does each patient have a clinical event matching a code in a codelist in a time period? ```ehrql from ehrql import create_dataset, codelist_from_csv -from ehrql.tables.core import medications, patients +from ehrql.tables.core import clinical_events, patients -statin_medications = codelist_from_csv("XXX", column="YYY") +asthma_codelist = codelist_from_csv("XXX", column="YYY") dataset = create_dataset() -dataset.number_of_statin_prescriptions_in_last_year = medications.where( - medications.dmd_code.is_in(statin_medications) +dataset.has_recent_asthma_diagnosis = clinical_events.where( + clinical_events.snomedct_code.is_in(asthma_codelist) ).where( - medications.date.is_on_or_between("2022-01-01", "2023-01-01") -).count_for_patient() + clinical_events.date.is_on_or_between("2022-07-01", "2023-01-01") +).exists_for_patient() dataset.define_population(patients.exists_for_patient()) ``` -## What is the first/last event matching some criteria? +### What is the first/last event matching some criteria? The `first_for_patient()` and `last_for_patient()` methods can only be used on a sorted frame. Frames can be sorted by calling the `sort_by()` method with the column to sort the frame by. -### What is the earliest/latest clinical event matching some criteria? +#### What is the earliest/latest clinical event matching some criteria? ```ehrql from ehrql import create_dataset, codelist_from_csv @@ -469,43 +473,7 @@ dataset.last_asthma_diagnosis_date = clinical_events.where( dataset.define_population(patients.exists_for_patient()) ``` -### What is the earliest/latest medication event matching some criteria? - -```ehrql -from ehrql import create_dataset, codelist_from_csv -from ehrql.tables.core import medications, patients - -statin_medications = codelist_from_csv("XXX", column="YYY") - -dataset = create_dataset() -dataset.first_statin_prescription_date = medications.where( - medications.dmd_code.is_in(statin_medications) -).where( - medications.date.is_on_or_after("2022-07-01") -).sort_by( - medications.date -).first_for_patient().date -dataset.define_population(patients.exists_for_patient()) -``` - -```ehrql -from ehrql import create_dataset, codelist_from_csv -from ehrql.tables.core import medications, patients - -statin_medications = codelist_from_csv("XXX", column="YYY") - -dataset = create_dataset() -dataset.last_statin_prescription_date = medications.where( - medications.dmd_code.is_in(statin_medications) -).where( - medications.date.is_on_or_after("2022-07-01") -).sort_by( - medications.date -).last_for_patient().date -dataset.define_population(patients.exists_for_patient()) -``` - -### What is the clinical event, matching some criteria, with the least/greatest value? +#### What is the clinical event, matching some criteria, with the least/greatest value? ```ehrql from ehrql import create_dataset, codelist_from_csv @@ -552,9 +520,9 @@ dataset.value_of_last_max_hba1c_observed = latest_max_hba1c_event.numeric_value dataset.define_population(patients.exists_for_patient()) ``` -## Getting properties of an event matching some criteria +### Getting properties of an event matching some criteria -### What is the code of the first/last clinical event matching some criteria? +#### What is the code of the first/last clinical event matching some criteria? ```ehrql from ehrql import create_dataset, codelist_from_csv @@ -573,7 +541,7 @@ dataset.first_asthma_diagnosis_code = clinical_events.where( dataset.define_population(patients.exists_for_patient()) ``` -### What is the date of the first/last clinical event matching some criteria? +#### What is the date of the first/last clinical event matching some criteria? ```ehrql from ehrql import create_dataset, codelist_from_csv @@ -592,7 +560,7 @@ dataset.first_asthma_diagnosis_date = clinical_events.where( dataset.define_population(patients.exists_for_patient()) ``` -### What is the code and date of the first/last clinical event matching some criteria? +#### What is the code and date of the first/last clinical event matching some criteria? ```ehrql from ehrql import create_dataset, codelist_from_csv @@ -613,37 +581,9 @@ dataset.first_asthma_diagnosis_date = first_asthma_diagnosis.date dataset.define_population(patients.exists_for_patient()) ``` -## Finding events occuring close in time to another event +### Performing arithmetic on numeric values of clinical events -### Finding the code of the first medication after the first clinical event matching some criteria - -```ehrql -from ehrql import create_dataset, codelist_from_csv, weeks -from ehrql.tables.core import clinical_events, medications, patients - -asthma_codelist = codelist_from_csv("XXX", column="YYY") -inhaled_corticosteroid_codelist = codelist_from_csv("XXX", column="YYY") - -dataset = create_dataset() -first_asthma_diagnosis_date = clinical_events.where( - clinical_events.snomedct_code.is_in(asthma_codelist) -).where( - clinical_events.date.is_on_or_after("2022-07-01") -).sort_by( - clinical_events.date -).first_for_patient().date -dataset.first_asthma_diagnosis_date = first_asthma_diagnosis_date -dataset.count_ics_prescriptions_2wks_post_diagnosis = medications.where( - medications.dmd_code.is_in(inhaled_corticosteroid_codelist) -).where( - medications.date.is_on_or_between(first_asthma_diagnosis_date,first_asthma_diagnosis_date + weeks(2)) -).count_for_patient() -dataset.define_population(patients.exists_for_patient()) -``` - -## Performing arithmetic on numeric values of clinical events - -### Finding the mean observed value of clinical events matching some criteria +#### Finding the mean observed value of clinical events matching some criteria ```ehrql from ehrql import create_dataset, codelist_from_csv @@ -660,9 +600,9 @@ dataset.mean_hba1c = clinical_events.where( dataset.define_population(patients.exists_for_patient()) ``` -## Finding events within a date range +### Finding events within a date range -### Finding events within a fixed date range +#### Finding events within a fixed date range ```ehrql from ehrql import create_dataset, codelist_from_csv @@ -679,7 +619,7 @@ dataset.has_recent_asthma_diagnosis = clinical_events.where( dataset.define_population(patients.exists_for_patient()) ``` -### Finding events within a date range plus a constant +#### Finding events within a date range plus a constant ```ehrql from ehrql import create_dataset, codelist_from_csv, weeks @@ -698,7 +638,7 @@ dataset.has_recent_asthma_diagnosis = clinical_events.where( dataset.define_population(patients.exists_for_patient()) ``` -### Finding events within a dynamic date range +#### Finding events within a dynamic date range ```ehrql from ehrql import create_dataset, codelist_from_csv, months @@ -722,7 +662,7 @@ dataset.count_of_hba1c_tests_6mo_post_first_diabetes_code = clinical_events.wher dataset.define_population(patients.exists_for_patient()) ``` -### Excluding events which have happened in the future +#### Excluding events which have happened in the future Data quality issues with many sources may result in events apparently happening in future dates (e.g. 9999-01-01), it is useful to filter these from your analysis. @@ -744,9 +684,9 @@ dataset.has_recent_asthma_diagnosis = clinical_events.where( dataset.define_population(patients.exists_for_patient()) ``` -## Extracting parts of dates and date differences +### Extracting parts of dates and date differences -### Finding the year an event occurred +#### Finding the year an event occurred ```ehrql from datetime import date @@ -764,7 +704,132 @@ dataset.year_of_first = clinical_events.where( dataset.define_population(patients.exists_for_patient()) ``` -### Finding prescriptions made in particular months of the year +#### Finding the number of weeks between two events + +```ehrql +from ehrql import create_dataset, codelist_from_csv +from ehrql.tables.core import clinical_events, patients + +asthma_codelist = codelist_from_csv("XXX", column="YYY") +asthma_review_codelist = codelist_from_csv("XXX", column="YYY") + +dataset = create_dataset() +first_asthma_diagnosis_date = clinical_events.where( + clinical_events.snomedct_code.is_in(asthma_codelist) +).sort_by(clinical_events.date).first_for_patient().date + +first_asthma_review_date = clinical_events.where( + clinical_events.snomedct_code.is_in(asthma_review_codelist) +).where( + clinical_events.date.is_on_or_after(first_asthma_diagnosis_date) +).sort_by(clinical_events.date).first_for_patient().date + +dataset.weeks_between_diagnosis_and_review = (first_asthma_review_date - first_asthma_diagnosis_date).weeks +dataset.define_population(patients.exists_for_patient()) +``` + +## Admitted Patient Care Spells (APCS) + +Examples for the [TPP apcs table](../../reference/schemas/tpp/#apcs). + +### Does each patient have an event matching some criteria? + +#### Does each patient have a hospitalisation event matching some criteria? + +```ehrql +from ehrql import create_dataset, codelist_from_csv +from ehrql.tables.tpp import apcs, patients + +cardiac_diagnosis_codes = codelist_from_csv("XXX", column="YYY") + +dataset = create_dataset() +dataset.has_recent_cardiac_admission = apcs.where( + apcs.primary_diagnosis.is_in(cardiac_diagnosis_codes) +).where( + apcs.admission_date.is_on_or_between("2022-07-01", "2023-01-01") +).exists_for_patient() +dataset.define_population(patients.exists_for_patient()) +``` + +## Medications + +Examples for the [medications table](../../reference/schemas/core/#medications). + +### Does each patient have an event matching some criteria? + +#### Does each patient have a medication event matching some criteria? + +```ehrql +from ehrql import create_dataset, codelist_from_csv +from ehrql.tables.core import medications, patients + +statin_medications = codelist_from_csv("XXX", column="YYY") + +dataset = create_dataset() +dataset.has_recent_statin_prescription = medications.where( + medications.dmd_code.is_in(statin_medications) +).where( + medications.date.is_on_or_between("2022-07-01", "2023-01-01") +).exists_for_patient() +dataset.define_population(patients.exists_for_patient()) +``` + +#### How many events does each patient have matching some criteria? + +```ehrql +from ehrql import create_dataset, codelist_from_csv +from ehrql.tables.core import medications, patients + +statin_medications = codelist_from_csv("XXX", column="YYY") + +dataset = create_dataset() +dataset.number_of_statin_prescriptions_in_last_year = medications.where( + medications.dmd_code.is_in(statin_medications) +).where( + medications.date.is_on_or_between("2022-01-01", "2023-01-01") +).count_for_patient() +dataset.define_population(patients.exists_for_patient()) +``` + +#### What is the earliest/latest medication event matching some criteria? + +```ehrql +from ehrql import create_dataset, codelist_from_csv +from ehrql.tables.core import medications, patients + +statin_medications = codelist_from_csv("XXX", column="YYY") + +dataset = create_dataset() +dataset.first_statin_prescription_date = medications.where( + medications.dmd_code.is_in(statin_medications) +).where( + medications.date.is_on_or_after("2022-07-01") +).sort_by( + medications.date +).first_for_patient().date +dataset.define_population(patients.exists_for_patient()) +``` + +```ehrql +from ehrql import create_dataset, codelist_from_csv +from ehrql.tables.core import medications, patients + +statin_medications = codelist_from_csv("XXX", column="YYY") + +dataset = create_dataset() +dataset.last_statin_prescription_date = medications.where( + medications.dmd_code.is_in(statin_medications) +).where( + medications.date.is_on_or_after("2022-07-01") +).sort_by( + medications.date +).last_for_patient().date +dataset.define_population(patients.exists_for_patient()) +``` + +### Extracting parts of dates and date differences + +#### Finding prescriptions made in particular months of the year ```ehrql from ehrql import create_dataset, codelist_from_csv @@ -783,36 +848,39 @@ dataset.winter_amoxicillin_count = medications.where( dataset.define_population(patients.exists_for_patient()) ``` -### Finding the number of weeks between two events +### Finding events occuring close in time to another event + +#### Finding the code of the first medication after the first clinical event matching some criteria ```ehrql -from ehrql import create_dataset, codelist_from_csv -from ehrql.tables.core import clinical_events, patients +from ehrql import create_dataset, codelist_from_csv, weeks +from ehrql.tables.core import clinical_events, medications, patients asthma_codelist = codelist_from_csv("XXX", column="YYY") -asthma_review_codelist = codelist_from_csv("XXX", column="YYY") +inhaled_corticosteroid_codelist = codelist_from_csv("XXX", column="YYY") dataset = create_dataset() first_asthma_diagnosis_date = clinical_events.where( clinical_events.snomedct_code.is_in(asthma_codelist) -).sort_by(clinical_events.date).first_for_patient().date - -first_asthma_review_date = clinical_events.where( - clinical_events.snomedct_code.is_in(asthma_review_codelist) ).where( - clinical_events.date.is_on_or_after(first_asthma_diagnosis_date) -).sort_by(clinical_events.date).first_for_patient().date - -dataset.weeks_between_diagnosis_and_review = (first_asthma_review_date - first_asthma_diagnosis_date).weeks + clinical_events.date.is_on_or_after("2022-07-01") +).sort_by( + clinical_events.date +).first_for_patient().date +dataset.first_asthma_diagnosis_date = first_asthma_diagnosis_date +dataset.count_ics_prescriptions_2wks_post_diagnosis = medications.where( + medications.dmd_code.is_in(inhaled_corticosteroid_codelist) +).where( + medications.date.is_on_or_between(first_asthma_diagnosis_date,first_asthma_diagnosis_date + weeks(2)) +).count_for_patient() dataset.define_population(patients.exists_for_patient()) ``` - -## Excluding medications for patients who have transferred between practices +### Excluding medications for patients who have transferred between practices Note that in these examples, the periods defined are illustrative only. -### Excluding patients based on prescription date +#### Excluding patients based on prescription date ```ehrql from ehrql import case, create_dataset, codelist_from_csv, when, weeks @@ -856,42 +924,3 @@ dataset.prescription_date = case( dataset.define_population(patients.exists_for_patient()) ``` -### Excluding patients based on study dates - -The following example ensures that the dataset only includes patients registered at a -single practice for the entire duration of the study, plus at least 3 months prior to the -study start. - -```ehrql -from ehrql import create_dataset, codelist_from_csv, months -from ehrql.tables.tpp import medications, patients, practice_registrations - -study_start_date = "2022-01-01" -study_end_date = "2022-12-31" - -medication_codelist = codelist_from_csv("XXX", column="YYY") - -dataset = create_dataset() - -# First relevant prescription per patient -first_prescription = ( - medications.where(medications.dmd_code.is_in(medication_codelist)) - .sort_by(medications.date) - .first_for_patient() -) - -dataset.prescription_date = first_prescription.date - -# find registrations that exist for the full study period, and at least 3 months -# prior -registrations = ( - practice_registrations.where( - practice_registrations.start_date.is_on_or_before(study_start_date - months(3)) - ) - .except_where( - practice_registrations.end_date.is_on_or_before(study_end_date) - ) -) - -dataset.define_population(registrations.exists_for_patient()) -``` diff --git a/docs/includes/generated_docs/schemas/core.md b/docs/includes/generated_docs/schemas/core.md index a1e3025e2..e8785cf51 100644 --- a/docs/includes/generated_docs/schemas/core.md +++ b/docs/includes/generated_docs/schemas/core.md @@ -24,6 +24,8 @@ Each record corresponds to a single clinical or consultation event for a patient Note that event codes do not change in this table. If an event code in the coding system becomes inactive, the event will still be coded to the inactive code. As such, codelists should include all relevant inactive codes. + +[Example ehrQL usage of clinical_events](../../../how-to/examples/#clinical-events)
Columns
@@ -99,7 +101,7 @@ registered at the same practice for the duration of the study period. Examples of using ehrQL to calculation such periods can be found in the documentation on how to -[use ehrQL to answer specific questions](../../how-to/examples.md#excluding-medications-for-patients-who-have-transferred-between-practices). +[use ehrQL to answer specific questions using the medications table](../../../how-to/examples/#clinical-events)
Columns
@@ -161,6 +163,8 @@ The `ehrql.tables.raw.core.ons_deaths` table contains all registered deaths. like autopsies and inquests delaying reporting on cause of death. This is evident in the [OpenSAFELY historical database coverage report](https://reports.opensafely.org/reports/opensafely-tpp-database-history/#ons_deaths) + +[Example ehrQL usage of ons_deaths](../../../how-to/examples/#ons-deaths)
Columns
@@ -450,6 +454,8 @@ recording in primary care in: By contrast, cause of death is often not accurate in the primary care record so we don't make it available to query here. + +[Example ehrQL usage of patients](../../../how-to/examples/#patients)
Columns
@@ -573,6 +579,8 @@ return patients.date_of_death.is_not_null() & patients.date_of_death.is_before(d ## practice_registrations Each record corresponds to a patient's registration with a practice. + +[Example ehrQL usage of practice_registrations](../../../how-to/examples/#practice-registrations)
Columns
diff --git a/docs/includes/generated_docs/schemas/emis.md b/docs/includes/generated_docs/schemas/emis.md index d6909bbfb..ef71b3e0d 100644 --- a/docs/includes/generated_docs/schemas/emis.md +++ b/docs/includes/generated_docs/schemas/emis.md @@ -25,6 +25,8 @@ Each record corresponds to a single clinical or consultation event for a patient Note that event codes do not change in this table. If an event code in the coding system becomes inactive, the event will still be coded to the inactive code. As such, codelists should include all relevant inactive codes. + +[Example ehrQL usage of clinical_events](../../../how-to/examples/#clinical-events)
Columns
@@ -100,7 +102,7 @@ registered at the same practice for the duration of the study period. Examples of using ehrQL to calculation such periods can be found in the documentation on how to -[use ehrQL to answer specific questions](../../how-to/examples.md#excluding-medications-for-patients-who-have-transferred-between-practices). +[use ehrQL to answer specific questions using the medications table](../../../how-to/examples/#clinical-events)
Columns
@@ -162,6 +164,8 @@ The `ehrql.tables.raw.core.ons_deaths` table contains all registered deaths. like autopsies and inquests delaying reporting on cause of death. This is evident in the [OpenSAFELY historical database coverage report](https://reports.opensafely.org/reports/opensafely-tpp-database-history/#ons_deaths) + +[Example ehrQL usage of ons_deaths](../../../how-to/examples/#ons-deaths)
Columns
@@ -610,6 +614,8 @@ return patients.registration_start_date.is_on_or_before(start_date) & ( Each record corresponds to a patient's registration with a practice. +[Example ehrQL usage of practice_registrations](../../../how-to/examples/#practice-registrations) + !!! warning At present, the EMIS database contains only the patient's current practice registration and does not include their full registration history. diff --git a/docs/includes/generated_docs/schemas/tpp.md b/docs/includes/generated_docs/schemas/tpp.md index b6b1c88d8..52303996b 100644 --- a/docs/includes/generated_docs/schemas/tpp.md +++ b/docs/includes/generated_docs/schemas/tpp.md @@ -50,6 +50,8 @@ from which other larger geographic representations can be derived (see various [ONS publications][addresses_ukgeographies] for more detail). [addresses_ukgeographies]: https://www.ons.gov.uk/methodology/geography/ukgeographies + +[Example ehrQL usage of addresses](../../../how-to/examples/#addresses)
Columns
@@ -362,6 +364,8 @@ and the [GitHub issue discussing more of the background context][apcs_context_is [apcs_data_source_docs]: https://docs.opensafely.org/data-sources/apc/ [apcs_context_issue]: https://github.com/opensafely-core/cohort-extractor/issues/186 + +[Example ehrQL usage of apcs](../../../how-to/examples/#admitted-patient-care-spells-apcs)
Columns
@@ -1758,7 +1762,7 @@ registered at the same practice for the duration of the study period. Examples of using ehrQL to calculation such periods can be found in the documentation on how to -[use ehrQL to answer specific questions](../../how-to/examples.md#excluding-medications-for-patients-who-have-transferred-between-practices). +[use ehrQL to answer specific questions using the medications table](../../../how-to/examples/#clinical-events)
Columns
@@ -1867,6 +1871,10 @@ The `ehrql.tables.raw.core.ons_deaths` table contains all registered deaths. evident in the [OpenSAFELY historical database coverage report](https://reports.opensafely.org/reports/opensafely-tpp-database-history/#ons_deaths) +[Example ehrQL usage of ons_deaths](../../../how-to/examples/#ons-deaths) + +### TPP specific information + !!! tip Note that this version of the table, which includes a place of death field, is only available in the `tpp` schema and not the `core` schema. @@ -2715,6 +2723,8 @@ recording in primary care in: By contrast, cause of death is often not accurate in the primary care record so we don't make it available to query here. + +[Example ehrQL usage of patients](../../../how-to/examples/#patients)
Columns
@@ -2839,6 +2849,10 @@ return patients.date_of_death.is_not_null() & patients.date_of_death.is_before(d Each record corresponds to a patient's registration with a practice. +[Example ehrQL usage of practice_registrations](../../../how-to/examples/#practice-registrations) + +### TPP specific information + See the [TPP backend information](../backends.md#patients-included-in-the-tpp-backend) for details of which patients are included.
diff --git a/ehrql/tables/core.py b/ehrql/tables/core.py index 6caaabd16..64858b544 100644 --- a/ehrql/tables/core.py +++ b/ehrql/tables/core.py @@ -67,6 +67,8 @@ class patients(PatientFrame): By contrast, cause of death is often not accurate in the primary care record so we don't make it available to query here. + + [Example ehrQL usage of patients](../../../how-to/examples/#patients) """ date_of_birth = Series( @@ -126,6 +128,8 @@ def is_dead_on(self, date): class practice_registrations(EventFrame): """ Each record corresponds to a patient's registration with a practice. + + [Example ehrQL usage of practice_registrations](../../../how-to/examples/#practice-registrations) """ start_date = Series( @@ -215,6 +219,8 @@ class ons_deaths(PatientFrame): like autopsies and inquests delaying reporting on cause of death. This is evident in the [OpenSAFELY historical database coverage report](https://reports.opensafely.org/reports/opensafely-tpp-database-history/#ons_deaths) + + [Example ehrQL usage of ons_deaths](../../../how-to/examples/#ons-deaths) """ date = Series( @@ -312,6 +318,7 @@ class clinical_events(EventFrame): system becomes inactive, the event will still be coded to the inactive code. As such, codelists should include all relevant inactive codes. + [Example ehrQL usage of clinical_events](../../../how-to/examples/#clinical-events) """ date = Series(datetime.date) @@ -351,7 +358,7 @@ class medications(EventFrame): Examples of using ehrQL to calculation such periods can be found in the documentation on how to - [use ehrQL to answer specific questions](../../how-to/examples.md#excluding-medications-for-patients-who-have-transferred-between-practices). + [use ehrQL to answer specific questions using the medications table](../../../how-to/examples/#clinical-events) """ date = Series(datetime.date) diff --git a/ehrql/tables/emis.py b/ehrql/tables/emis.py index 11ba4bad6..c872fa086 100644 --- a/ehrql/tables/emis.py +++ b/ehrql/tables/emis.py @@ -136,6 +136,8 @@ class practice_registrations(ehrql.tables.core.practice_registrations.__class__) """ Each record corresponds to a patient's registration with a practice. + [Example ehrQL usage of practice_registrations](../../../how-to/examples/#practice-registrations) + !!! warning At present, the EMIS database contains only the patient's current practice registration and does not include their full registration history. diff --git a/ehrql/tables/tpp.py b/ehrql/tables/tpp.py index cd92ea9c8..f529400d6 100644 --- a/ehrql/tables/tpp.py +++ b/ehrql/tables/tpp.py @@ -62,6 +62,8 @@ class addresses(EventFrame): (see various [ONS publications][addresses_ukgeographies] for more detail). [addresses_ukgeographies]: https://www.ons.gov.uk/methodology/geography/ukgeographies + + [Example ehrQL usage of addresses](../../../how-to/examples/#addresses) """ address_id = Series( @@ -252,6 +254,8 @@ class apcs(EventFrame): [apcs_data_source_docs]: https://docs.opensafely.org/data-sources/apc/ [apcs_context_issue]: https://github.com/opensafely-core/cohort-extractor/issues/186 + + [Example ehrQL usage of apcs](../../../how-to/examples/#admitted-patient-care-spells-apcs) """ apcs_ident = Series( @@ -957,7 +961,7 @@ class medications(ehrql.tables.core.medications.__class__): Examples of using ehrQL to calculation such periods can be found in the documentation on how to - [use ehrQL to answer specific questions](../../how-to/examples.md#excluding-medications-for-patients-who-have-transferred-between-practices). + [use ehrQL to answer specific questions using the medications table](../../../how-to/examples/#clinical-events) """ consultation_id = Series( @@ -1019,6 +1023,10 @@ class ons_deaths(ehrql.tables.core.ons_deaths.__class__): evident in the [OpenSAFELY historical database coverage report](https://reports.opensafely.org/reports/opensafely-tpp-database-history/#ons_deaths) + [Example ehrQL usage of ons_deaths](../../../how-to/examples/#ons-deaths) + + ### TPP specific information + !!! tip Note that this version of the table, which includes a place of death field, is only available in the `tpp` schema and not the `core` schema. @@ -1374,6 +1382,10 @@ class practice_registrations(ehrql.tables.core.practice_registrations.__class__) """ Each record corresponds to a patient's registration with a practice. + [Example ehrQL usage of practice_registrations](../../../how-to/examples/#practice-registrations) + + ### TPP specific information + See the [TPP backend information](../backends.md#patients-included-in-the-tpp-backend) for details of which patients are included. """