Skip to content

Commit

Permalink
Merge pull request #71 from opensafely/dataset_definition_revision
Browse files Browse the repository at this point in the history
Dataset definition revision
  • Loading branch information
ZoeMZou authored Jan 21, 2025
2 parents 846cc38 + 00c62bd commit 09877e9
Show file tree
Hide file tree
Showing 10 changed files with 233 additions and 177 deletions.
6 changes: 3 additions & 3 deletions analysis/active_analyses.R
Original file line number Diff line number Diff line change
Expand Up @@ -65,14 +65,14 @@ core_covars <- c(

## Define project-specific covariates (specific to respiratory project) ----
project_covars <- c(
"cov_bin_history_pneumonia_snomed", "cov_bin_history_asthma_snomed",
"cov_bin_history_pulmonary_fibrosis_snomed", "cov_bin_all_stroke"
"cov_bin_history_pneumonia", "cov_bin_history_asthma",
"cov_bin_history_pulmonary_fibrosis", "cov_bin_stroke_isch"
)
# Combine covariates into a single vector ----
all_covars <- c(core_covars, project_covars)

## Combine covariates into a single string for analysis ----
preex_FALSE_covars <- paste0(all_covars[!all_covars %in% c("cov_bin_history_asthma_snomed", "cov_bin_history_copd")], collapse = ";")
preex_FALSE_covars <- paste0(all_covars[!all_covars %in% c("cov_bin_history_asthma", "cov_bin_history_copd")], collapse = ";")
all_covars <- paste0(c(core_covars, project_covars), collapse = ";")

# Specify cohorts ----
Expand Down
29 changes: 25 additions & 4 deletions analysis/dataset_definition/codelists.py
Original file line number Diff line number Diff line change
Expand Up @@ -423,25 +423,46 @@

# Preexisting respiratory condition

copd_ctv3 = codelist_from_csv(
copd_ctv3_clinical = codelist_from_csv(
"codelists/opensafely-current-copd.csv",
column="CTV3ID"
)
asthma_snomed = codelist_from_csv(

copd_icd10 = codelist_from_csv(
"codelists/opensafely-copd-secondary-care.csv",
column="code"
)

asthma_snomed_clinical = codelist_from_csv(
"codelists/opensafely-asthma-diagnosis-snomed.csv",
column="id"
)

asthma_icd10 = codelist_from_csv(
"codelists/opensafely-asthma-exacerbation-secondary-care.csv",
column="code"
)

# Respiratory outcome(s)

pneumonia_snomed = codelist_from_csv(
pneumonia_snomed_clinical = codelist_from_csv(
"codelists/bristol-pneumonia-snomed.csv",
column="code"
)

pulmonary_fibrosis_snomed = codelist_from_csv(
pneumonia_icd10 = codelist_from_csv(
"codelists/opensafely-pneumonia-secondary-care.csv",
column="ICD code"
)

pulmonary_fibrosis_snomed_clinical = codelist_from_csv(
"codelists/bristol-ild-snomed.csv",
column="code"
)

pulmonary_fibrosis_icd10 = codelist_from_csv(
"codelists/bristol-pulmonary-fibrosis-interstitial-lung-disease.csv",
column="code"
)

# asthma and copd snomed codes are above under 'Preexisting respiratory condition' - same codes and variable names to be used for outcomes
54 changes: 18 additions & 36 deletions analysis/dataset_definition/variable_helper_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@
sgss_covid_all_tests,
apcs,
ec,
opa,
opa_diag,
clinical_events,
medications,
ons_deaths,
Expand Down Expand Up @@ -52,23 +50,15 @@ def last_matching_med_dmd_before(codelist, start_date, where=True):
.last_for_patient()
)

def last_matching_event_apc_before(codelist, start_date, where=True):
return(
apcs.where(where)
.where(apcs.primary_diagnosis.is_in(codelist) | apcs.secondary_diagnosis.is_in(codelist))
.where(apcs.admission_date.is_before(start_date))
.sort_by(apcs.admission_date)
.last_for_patient()
)

def last_matching_event_opa_before(codelist, start_date, where=True):
return(
opa_diag.where(where)
.where(opa_diag.primary_diagnosis_code.is_in(codelist) | opa_diag.secondary_diagnosis_code_1.is_in(codelist))
.where(opa_diag.appointment_date.is_before(start_date))
.sort_by(opa_diag.appointment_date)
.last_for_patient()
)
def last_matching_event_apc_before(codelist, start_date, only_prim_diagnoses=False, where=True):
query = apcs.where(where).where(apcs.admission_date.is_before(start_date))
if only_prim_diagnoses:
query = query.where(
apcs.primary_diagnosis.is_in(codelist)
)
else:
query = query.where(apcs.all_diagnoses.contains_any_of(codelist))
return query.sort_by(apcs.admission_date).last_for_patient()

# helper function
def any_of(conditions):
Expand Down Expand Up @@ -139,23 +129,15 @@ def first_matching_med_dmd_between(codelist, start_date, end_date, where=True):
.first_for_patient()
)

def first_matching_event_apc_between(codelist, start_date, end_date, where=True):
return(
apcs.where(where)
.where(apcs.primary_diagnosis.is_in(codelist) | apcs.secondary_diagnosis.is_in(codelist))
.where(apcs.admission_date.is_on_or_between(start_date, end_date))
.sort_by(apcs.admission_date)
.first_for_patient()
)

def first_matching_event_opa_between(codelist, start_date, end_date, where=True):
return(
opa_diag.where(where)
.where(opa_diag.primary_diagnosis_code.is_in(codelist) | opa_diag.secondary_diagnosis_code_1.is_in(codelist))
.where(opa_diag.appointment_date.is_on_or_between(start_date, end_date))
.sort_by(opa_diag.appointment_date)
.first_for_patient()
)
def first_matching_event_apc_between(codelist, start_date, end_date, only_prim_diagnoses=False, where=True):
query = apcs.where(where).where(apcs.admission_date.is_on_or_between(start_date, end_date))
if only_prim_diagnoses:
query = query.where(
apcs.primary_diagnosis.is_in(codelist)
)
else:
query = query.where(apcs.all_diagnoses.contains_any_of(codelist))
return query.sort_by(apcs.admission_date).first_for_patient()

def first_matching_event_ec_snomed_between(codelist, start_date, end_date, where=True):
conditions = [
Expand Down
Loading

0 comments on commit 09877e9

Please sign in to comment.