Skip to content

Commit

Permalink
Merge pull request #65 from opensafely/update-long-covid-definition
Browse files Browse the repository at this point in the history
Add code for therapeutics code graph
  • Loading branch information
rose-higgins authored May 9, 2024
2 parents 56b4f24 + 7021e63 commit f991220
Show file tree
Hide file tree
Showing 13 changed files with 303 additions and 35 deletions.
31 changes: 31 additions & 0 deletions .devcontainer/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
FROM remlapmot/r-docker:2024-04-02-rstudio

LABEL org.opencontainers.image.source https://github.com/opensafely/research-template

# we are going to use an apt cache on the host, so disable the default debian
# docker clean up that deletes that cache on every apt install
RUN rm -f /etc/apt/apt.conf.d/docker-clean

# Install python 3.10. This is the version used by the python-docker
# image, used for analyses using the OpenSAFELY pipeline.
RUN --mount=type=cache,target=/var/cache/apt \
echo "deb http://ppa.launchpad.net/deadsnakes/ppa/ubuntu focal main" > /etc/apt/sources.list.d/deadsnakes-ppa.list &&\
/usr/lib/apt/apt-helper download-file 'https://keyserver.ubuntu.com/pks/lookup?op=get&search=0xf23c5a6cf475977595c89f51ba6932366a755776' /etc/apt/trusted.gpg.d/deadsnakes.asc &&\
apt update &&\
apt install -y curl python3.10 python3.10-distutils python3.10-venv &&\
# Pip for Python 3.10 isn't included in deadsnakes, so install separately
curl https://bootstrap.pypa.io/get-pip.py | python3.10 &&\
# Set default python, so that the Python virtualenv works as expected
rm /usr/bin/python3 && ln -s /usr/bin/python3.10 /usr/bin/python3

# Copy the Python virtualenv from OpenSAFELY Python action image
COPY --from=ghcr.io/opensafely-core/python:v2 /opt/venv /opt/venv

# Create a local user and give it sudo (aka root) permissions
RUN usermod -aG sudo rstudio
RUN echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers

# Required for installing opensafely cli
ENV PATH="/home/rstudio/.local/bin:${PATH}"

USER rstudio
43 changes: 43 additions & 0 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
// README at: https://github.com/devcontainers/templates/tree/main/src/python
{
"name": "OpenSAFELY",
"image": "ghcr.io/opensafely/research-template:latest",
// Features to add to the dev container. More info: https://containers.dev/features.
"features": {
"ghcr.io/devcontainers/features/docker-in-docker:2": {
"moby": true,
"azureDnsAutoDetection": true,
"installDockerBuildx": true,
"version": "latest",
"dockerDashComposeVersion": "v2"
}
},
"postCreateCommand": "/bin/bash .devcontainer/postCreate.sh",
"postAttachCommand": {
"rstudio-start": "sudo rstudio-server start"
},
"forwardPorts": [
8787
],
"portsAttributes": {
"8787": {
"label": "RStudio IDE"
}
},
// Configure tool-specific properties.
"customizations": {
"vscode": {
"extensions": [
"ms-python.python",
"ms-toolsai.jupyter",
"ms-toolsai.jupyter-renderers"
]
}
},
// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
// "remoteUser": "root"
"remoteEnv": {
"MAX_WORKERS": "2"
}
}
15 changes: 15 additions & 0 deletions .devcontainer/postCreate.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#!/bin/bash

set -euo pipefail

pip3 install --user -r .devcontainer/requirements.in

#set R working directory
! grep -q `pwd` $R_HOME/etc/Rprofile.site && sudo tee -a $R_HOME/etc/Rprofile.site <<< "setwd(\"`pwd`\")"
#set RStudio working directory
! grep -q `pwd` ~/.config/rstudio/rstudio-prefs.json && cat ~/.config/rstudio/rstudio-prefs.json | jq ". + {\"initial_working_directory\":\"`pwd`\"}" > ~/.config/rstudio/rstudio-prefs.json
#download and extract latest ehrql source
wget https://github.com/opensafely-core/ehrql/archive/main.zip -P .devcontainer
unzip -o .devcontainer/main.zip -d .devcontainer/
rm .devcontainer/main.zip
1 change: 1 addition & 0 deletions .devcontainer/requirements.in
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
opensafely
6 changes: 4 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,12 @@ venv/
.DS_Store
.Rproj.user
.Rhistory
.devcontainer/ehrql-main
reports/variation/*
reports/coverage/table_prop_eligible_clinc_demo.csv
reports/coverage/mabs_and_antivirals_coverage_report.html
reports/coverage/figures/*
reports/coverage/tables/*
released_outputs/reports_wip/coverage/*
reports/coverage_wip/*
released_outputs/*
reports/coverage_wip/*
analysis/descriptive/mabs-and-avs-by-stp.html
6 changes: 3 additions & 3 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"python.linting.pylintEnabled": false,
"python.linting.flake8Enabled": true,
"python.linting.enabled": true,
"python.analysis.extraPaths": [".devcontainer/ehrql-main/"],
"python.defaultInterpreterPath": "/opt/venv/bin/python3.10",
"python.terminal.activateEnvironment": true,
"data.preview.create.json.schema": false,
"files.associations": {
"*.feather": "arrow",
Expand Down
41 changes: 41 additions & 0 deletions analysis/descriptive/coverage_report_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -751,3 +751,44 @@ groups2 <- groups2 %>%

write_csv(rbind(all, groups) %>% filter(!is.na(tb)), fs::path(output_dir, "table_time_to_treat_redacted.csv"))
write_csv(groups2, fs::path(output_dir, "table_time_to_treat_groups_redacted.csv"))


# Treatment recording over time
plot_data_treatment_codes <- data_processed_clean %>%
filter(!is.na(last_treatment_date)) %>%
select(last_treatment_date, last_treatment_type) %>%
rbind(data_processed_clean %>%
filter(!is.na(last_treatment_date)) %>%
select(last_treatment_date, last_treatment_type) %>%
mutate(last_treatment_type = "All")) %>%
group_by(last_treatment_date, last_treatment_type) %>%
tally() %>%
group_by(last_treatment_type) %>%
arrange(last_treatment_type, last_treatment_date) %>%
complete(last_treatment_date = seq.Date(min(last_treatment_date, na.rm = T), max(last_treatment_date, na.rm = T), by="day")) %>%
mutate(count = ifelse(is.na(n), 0, n),
count_redacted = plyr::round_any(count, 10),
count_redacted = ifelse(count < threshold, NA, count_redacted)) %>%
# cum_count = cumsum(count),
# cum_count_redacted = plyr::round_any(cum_count, 10),
# cum_count_redacted = ifelse(cum_count < threshold, NA, cum_count_redacted)
select(-n) %>%
arrange(last_treatment_type, last_treatment_date)

plot_order <- plot_data_treatment_codes %>%
group_by(last_treatment_type) %>%
mutate(order = max(count_redacted, na.rm = T)) %>%
arrange(desc(order)) %>%
filter(count_redacted == order) %>%
select(last_treatment_type, order) %>%
distinct()

treatment_codes_plot_data <- plot_data_treatment_codes %>%
mutate(last_treatment_type = factor(last_treatment_type, levels = plot_order$last_treatment_type))

write_csv(treatment_codes_plot_data %>%
select(last_treatment_date, count_redacted, last_treatment_type),
fs::path(output_dir, "table_last_treatment_codes_redacted.csv"))
write_csv(treatment_plot_data, fs::path(output_dir2, "table_last_treatment_codes.csv"))

print("treatment_codes_plot_data saved")
2 changes: 1 addition & 1 deletion analysis/descriptive/crude_outcomes.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -685,7 +685,7 @@ sgtf_outcomes <- data_processed_clean %>%
# Redact values < threshold
count = ifelse(count < threshold, NA, as.numeric(count)),
covid_positive_test = ifelse(covid_positive_test < threshold, NA, covid_positive_test),
covid_hospital_admission = ifelse(covid_hospital_admission < threshold, NA, as.numeric(covid_hospital_admission)),
covid_hospital_admission = ifelse(covid_hospital_admission < threshold, NA_integer_, as.numeric(covid_hospital_admission)),
covid_hospitalisation_critical_care = ifelse(covid_hospitalisation_critical_care < threshold, NA, as.numeric(covid_hospitalisation_critical_care)),
covid_death = ifelse(covid_death < threshold, NA, as.numeric(covid_death)),
any_death = ifelse(any_death < threshold, NA, as.numeric(any_death)),
Expand Down
4 changes: 2 additions & 2 deletions analysis/descriptive/mabs-and-avs-by-stp.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -498,8 +498,8 @@ table_elig_treat_redacted <- table_elig_treat %>%
Paxlovid = plyr::round_any(as.numeric(Paxlovid), 10),
Sotrovimab = plyr::round_any(as.numeric(Sotrovimab), 10),
Remdesivir = plyr::round_any(as.numeric(Remdesivir), 10),
Molnupiravir = plyr::round_any(Molnupiravir, 10),
Casirivimab = plyr::round_any(Casirivimab, 10)) %>%
Molnupiravir = plyr::round_any(as.numeric(Molnupiravir), 10),
Casirivimab = plyr::round_any(as.numeric(Casirivimab), 10)) %>%
arrange(desc(decile)) %>%
mutate(Paxlovid_perc = paste(round(Paxlovid/Treated*100, digits = 0), " (",
round((Paxlovid/Treated - 1.96*sqrt((Paxlovid/Treated)*(1-Paxlovid/Treated)/Treated))*100, digits = 0),
Expand Down
34 changes: 31 additions & 3 deletions analysis/process/process_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,12 @@ data_extract0 <- read_csv(
remdesivir_covid_therapeutics = col_date(format = "%Y-%m-%d"),
molnupiravir_covid_therapeutics = col_date(format = "%Y-%m-%d"),
casirivimab_covid_therapeutics = col_date(format = "%Y-%m-%d"),

paxlovid_covid_therapeutics_last = col_date(format = "%Y-%m-%d"),
sotrovimab_covid_therapeutics_last = col_date(format = "%Y-%m-%d"),
remdesivir_covid_therapeutics_last = col_date(format = "%Y-%m-%d"),
molnupiravir_covid_therapeutics_last = col_date(format = "%Y-%m-%d"),
casirivimab_covid_therapeutics_last = col_date(format = "%Y-%m-%d"),

# ELIGIBILITY CRITERIA VARIABLES ----
covid_test_positive = col_logical(),
Expand Down Expand Up @@ -120,7 +126,7 @@ data_extract0 <- read_csv(
serious_mental_illness_nhsd = col_logical(),
sickle_cell_disease_nhsd = col_date(format = "%Y-%m-%d"),
vaccination_status = col_character(),
first_lc_code_date = col_date(format = "%Y-%m-%d"),
first_long_covid = col_date(format = "%Y-%m-%d"),

# COVID VARIENT
sgtf = col_character(),
Expand Down Expand Up @@ -151,7 +157,13 @@ if(Sys.getenv("OPENSAFELY_BACKEND") %in% c("", "expectations")){
remdesivir_covid_therapeutics = as.Date(ifelse(!is.na(remdesivir_covid_therapeutics), date, NA), origin = "1970-01-01"),
molnupiravir_covid_therapeutics = as.Date(ifelse(!is.na(molnupiravir_covid_therapeutics), date, NA), origin = "1970-01-01"),
casirivimab_covid_therapeutics = as.Date(ifelse(!is.na(casirivimab_covid_therapeutics), date, NA), origin = "1970-01-01"),


paxlovid_covid_therapeutics_last = as.Date(ifelse(!is.na(paxlovid_covid_therapeutics_last), date, NA), origin = "1970-01-01"),
sotrovimab_covid_therapeutics_last = as.Date(ifelse(!is.na(sotrovimab_covid_therapeutics_last), date, NA), origin = "1970-01-01"),
remdesivir_covid_therapeutics_last = as.Date(ifelse(!is.na(remdesivir_covid_therapeutics_last), date, NA), origin = "1970-01-01"),
molnupiravir_covid_therapeutics_last = as.Date(ifelse(!is.na(molnupiravir_covid_therapeutics_last), date, NA), origin = "1970-01-01"),
casirivimab_covid_therapeutics_last = as.Date(ifelse(!is.na(casirivimab_covid_therapeutics_last), date, NA), origin = "1970-01-01"),

covid_positive_test_30_days_post_elig_or_treat = as.Date(ifelse(covid_positive_test_30_days_post_elig_or_treat > covid_test_positive_date + 30,
covid_positive_test_30_days_post_elig_or_treat, NA), origin = "1970-01-01"))

Expand Down Expand Up @@ -192,6 +204,19 @@ data_processed <- data_extract %>%
treatment_date == casirivimab_covid_therapeutics ~ "Casirivimab",
TRUE ~ NA_character_),

# last MAB or Antiviral recorded
last_treatment_date = as.Date(pmin(paxlovid_covid_therapeutics_last, sotrovimab_covid_therapeutics_last,
remdesivir_covid_therapeutics_last, molnupiravir_covid_therapeutics_last,
casirivimab_covid_therapeutics_last, na.rm = TRUE), origin = "1970-01-01"),
last_treatment_type = case_when(
last_treatment_date == paxlovid_covid_therapeutics_last ~ "Paxlovid",
last_treatment_date == sotrovimab_covid_therapeutics_last ~ "Sotrovimab",
last_treatment_date == remdesivir_covid_therapeutics_last ~ "Remdesivir",
last_treatment_date == molnupiravir_covid_therapeutics_last ~ "Molnupiravir",
last_treatment_date == casirivimab_covid_therapeutics_last ~ "Casirivimab",
TRUE ~ NA_character_),



# ELIGIBILITY VARIABLES ----

Expand All @@ -212,7 +237,7 @@ data_processed <- data_extract %>%
sickle_cell_disease_nhsd = ifelse(!is.na(sickle_cell_disease_nhsd), 1, 0),

## Long COVID
long_covid = ifelse(!is.na(first_lc_code_date), 1, 0),
long_covid = ifelse(!is.na(first_long_covid), 1, 0),

# Combine subgoups of rare neurological conditions cohort
rare_neurological_conditions_nhsd = pmin(multiple_sclerosis_nhsd, motor_neurone_disease_nhsd, myasthenia_gravis_nhsd,
Expand Down Expand Up @@ -561,6 +586,9 @@ data_processed_clean <- data_processed_combined %>%
# Treatment
paxlovid_covid_therapeutics, sotrovimab_covid_therapeutics, remdesivir_covid_therapeutics, molnupiravir_covid_therapeutics,
casirivimab_covid_therapeutics, treatment_date, treatment_type,

# Last treatment code recorded
last_treatment_date, last_treatment_type,

# High risk cohort
downs_syndrome, solid_cancer, haematological_disease, renal_disease, liver_disease, imid, immunosupression,
Expand Down
Loading

0 comments on commit f991220

Please sign in to comment.