Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add code for therapeutics code graph #65

Merged
merged 8 commits into from
May 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions .devcontainer/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
FROM remlapmot/r-docker:2024-04-02-rstudio

LABEL org.opencontainers.image.source https://github.com/opensafely/research-template

# we are going to use an apt cache on the host, so disable the default debian
# docker clean up that deletes that cache on every apt install
RUN rm -f /etc/apt/apt.conf.d/docker-clean

# Install python 3.10. This is the version used by the python-docker
# image, used for analyses using the OpenSAFELY pipeline.
RUN --mount=type=cache,target=/var/cache/apt \
echo "deb http://ppa.launchpad.net/deadsnakes/ppa/ubuntu focal main" > /etc/apt/sources.list.d/deadsnakes-ppa.list &&\
/usr/lib/apt/apt-helper download-file 'https://keyserver.ubuntu.com/pks/lookup?op=get&search=0xf23c5a6cf475977595c89f51ba6932366a755776' /etc/apt/trusted.gpg.d/deadsnakes.asc &&\
apt update &&\
apt install -y curl python3.10 python3.10-distutils python3.10-venv &&\
# Pip for Python 3.10 isn't included in deadsnakes, so install separately
curl https://bootstrap.pypa.io/get-pip.py | python3.10 &&\
# Set default python, so that the Python virtualenv works as expected
rm /usr/bin/python3 && ln -s /usr/bin/python3.10 /usr/bin/python3

# Copy the Python virtualenv from OpenSAFELY Python action image
COPY --from=ghcr.io/opensafely-core/python:v2 /opt/venv /opt/venv

# Create a local user and give it sudo (aka root) permissions
RUN usermod -aG sudo rstudio
RUN echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers

# Required for installing opensafely cli
ENV PATH="/home/rstudio/.local/bin:${PATH}"

USER rstudio
43 changes: 43 additions & 0 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
// README at: https://github.com/devcontainers/templates/tree/main/src/python
{
"name": "OpenSAFELY",
"image": "ghcr.io/opensafely/research-template:latest",
// Features to add to the dev container. More info: https://containers.dev/features.
"features": {
"ghcr.io/devcontainers/features/docker-in-docker:2": {
"moby": true,
"azureDnsAutoDetection": true,
"installDockerBuildx": true,
"version": "latest",
"dockerDashComposeVersion": "v2"
}
},
"postCreateCommand": "/bin/bash .devcontainer/postCreate.sh",
"postAttachCommand": {
"rstudio-start": "sudo rstudio-server start"
},
"forwardPorts": [
8787
],
"portsAttributes": {
"8787": {
"label": "RStudio IDE"
}
},
// Configure tool-specific properties.
"customizations": {
"vscode": {
"extensions": [
"ms-python.python",
"ms-toolsai.jupyter",
"ms-toolsai.jupyter-renderers"
]
}
},
// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
// "remoteUser": "root"
"remoteEnv": {
"MAX_WORKERS": "2"
}
}
15 changes: 15 additions & 0 deletions .devcontainer/postCreate.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#!/bin/bash

set -euo pipefail

pip3 install --user -r .devcontainer/requirements.in

#set R working directory
! grep -q `pwd` $R_HOME/etc/Rprofile.site && sudo tee -a $R_HOME/etc/Rprofile.site <<< "setwd(\"`pwd`\")"
#set RStudio working directory
! grep -q `pwd` ~/.config/rstudio/rstudio-prefs.json && cat ~/.config/rstudio/rstudio-prefs.json | jq ". + {\"initial_working_directory\":\"`pwd`\"}" > ~/.config/rstudio/rstudio-prefs.json

#download and extract latest ehrql source
wget https://github.com/opensafely-core/ehrql/archive/main.zip -P .devcontainer
unzip -o .devcontainer/main.zip -d .devcontainer/
rm .devcontainer/main.zip
1 change: 1 addition & 0 deletions .devcontainer/requirements.in
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
opensafely
6 changes: 4 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,12 @@ venv/
.DS_Store
.Rproj.user
.Rhistory
.devcontainer/ehrql-main
reports/variation/*
reports/coverage/table_prop_eligible_clinc_demo.csv
reports/coverage/mabs_and_antivirals_coverage_report.html
reports/coverage/figures/*
reports/coverage/tables/*
released_outputs/reports_wip/coverage/*
reports/coverage_wip/*
released_outputs/*
reports/coverage_wip/*
analysis/descriptive/mabs-and-avs-by-stp.html
6 changes: 3 additions & 3 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"python.linting.pylintEnabled": false,
"python.linting.flake8Enabled": true,
"python.linting.enabled": true,
"python.analysis.extraPaths": [".devcontainer/ehrql-main/"],
"python.defaultInterpreterPath": "/opt/venv/bin/python3.10",
"python.terminal.activateEnvironment": true,
"data.preview.create.json.schema": false,
"files.associations": {
"*.feather": "arrow",
Expand Down
41 changes: 41 additions & 0 deletions analysis/descriptive/coverage_report_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -751,3 +751,44 @@ groups2 <- groups2 %>%

write_csv(rbind(all, groups) %>% filter(!is.na(tb)), fs::path(output_dir, "table_time_to_treat_redacted.csv"))
write_csv(groups2, fs::path(output_dir, "table_time_to_treat_groups_redacted.csv"))


# Treatment recording over time
plot_data_treatment_codes <- data_processed_clean %>%
filter(!is.na(last_treatment_date)) %>%
select(last_treatment_date, last_treatment_type) %>%
rbind(data_processed_clean %>%
filter(!is.na(last_treatment_date)) %>%
select(last_treatment_date, last_treatment_type) %>%
mutate(last_treatment_type = "All")) %>%
group_by(last_treatment_date, last_treatment_type) %>%
tally() %>%
group_by(last_treatment_type) %>%
arrange(last_treatment_type, last_treatment_date) %>%
complete(last_treatment_date = seq.Date(min(last_treatment_date, na.rm = T), max(last_treatment_date, na.rm = T), by="day")) %>%
mutate(count = ifelse(is.na(n), 0, n),
count_redacted = plyr::round_any(count, 10),
count_redacted = ifelse(count < threshold, NA, count_redacted)) %>%
# cum_count = cumsum(count),
# cum_count_redacted = plyr::round_any(cum_count, 10),
# cum_count_redacted = ifelse(cum_count < threshold, NA, cum_count_redacted)
select(-n) %>%
arrange(last_treatment_type, last_treatment_date)

plot_order <- plot_data_treatment_codes %>%
group_by(last_treatment_type) %>%
mutate(order = max(count_redacted, na.rm = T)) %>%
arrange(desc(order)) %>%
filter(count_redacted == order) %>%
select(last_treatment_type, order) %>%
distinct()

treatment_codes_plot_data <- plot_data_treatment_codes %>%
mutate(last_treatment_type = factor(last_treatment_type, levels = plot_order$last_treatment_type))

write_csv(treatment_codes_plot_data %>%
select(last_treatment_date, count_redacted, last_treatment_type),
fs::path(output_dir, "table_last_treatment_codes_redacted.csv"))
write_csv(treatment_plot_data, fs::path(output_dir2, "table_last_treatment_codes.csv"))

print("treatment_codes_plot_data saved")
2 changes: 1 addition & 1 deletion analysis/descriptive/crude_outcomes.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -685,7 +685,7 @@ sgtf_outcomes <- data_processed_clean %>%
# Redact values < threshold
count = ifelse(count < threshold, NA, as.numeric(count)),
covid_positive_test = ifelse(covid_positive_test < threshold, NA, covid_positive_test),
covid_hospital_admission = ifelse(covid_hospital_admission < threshold, NA, as.numeric(covid_hospital_admission)),
covid_hospital_admission = ifelse(covid_hospital_admission < threshold, NA_integer_, as.numeric(covid_hospital_admission)),
covid_hospitalisation_critical_care = ifelse(covid_hospitalisation_critical_care < threshold, NA, as.numeric(covid_hospitalisation_critical_care)),
covid_death = ifelse(covid_death < threshold, NA, as.numeric(covid_death)),
any_death = ifelse(any_death < threshold, NA, as.numeric(any_death)),
Expand Down
4 changes: 2 additions & 2 deletions analysis/descriptive/mabs-and-avs-by-stp.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -498,8 +498,8 @@ table_elig_treat_redacted <- table_elig_treat %>%
Paxlovid = plyr::round_any(as.numeric(Paxlovid), 10),
Sotrovimab = plyr::round_any(as.numeric(Sotrovimab), 10),
Remdesivir = plyr::round_any(as.numeric(Remdesivir), 10),
Molnupiravir = plyr::round_any(Molnupiravir, 10),
Casirivimab = plyr::round_any(Casirivimab, 10)) %>%
Molnupiravir = plyr::round_any(as.numeric(Molnupiravir), 10),
Casirivimab = plyr::round_any(as.numeric(Casirivimab), 10)) %>%
arrange(desc(decile)) %>%
mutate(Paxlovid_perc = paste(round(Paxlovid/Treated*100, digits = 0), " (",
round((Paxlovid/Treated - 1.96*sqrt((Paxlovid/Treated)*(1-Paxlovid/Treated)/Treated))*100, digits = 0),
Expand Down
34 changes: 31 additions & 3 deletions analysis/process/process_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,12 @@ data_extract0 <- read_csv(
remdesivir_covid_therapeutics = col_date(format = "%Y-%m-%d"),
molnupiravir_covid_therapeutics = col_date(format = "%Y-%m-%d"),
casirivimab_covid_therapeutics = col_date(format = "%Y-%m-%d"),

paxlovid_covid_therapeutics_last = col_date(format = "%Y-%m-%d"),
sotrovimab_covid_therapeutics_last = col_date(format = "%Y-%m-%d"),
remdesivir_covid_therapeutics_last = col_date(format = "%Y-%m-%d"),
molnupiravir_covid_therapeutics_last = col_date(format = "%Y-%m-%d"),
casirivimab_covid_therapeutics_last = col_date(format = "%Y-%m-%d"),

# ELIGIBILITY CRITERIA VARIABLES ----
covid_test_positive = col_logical(),
Expand Down Expand Up @@ -120,7 +126,7 @@ data_extract0 <- read_csv(
serious_mental_illness_nhsd = col_logical(),
sickle_cell_disease_nhsd = col_date(format = "%Y-%m-%d"),
vaccination_status = col_character(),
first_lc_code_date = col_date(format = "%Y-%m-%d"),
first_long_covid = col_date(format = "%Y-%m-%d"),

# COVID VARIENT
sgtf = col_character(),
Expand Down Expand Up @@ -151,7 +157,13 @@ if(Sys.getenv("OPENSAFELY_BACKEND") %in% c("", "expectations")){
remdesivir_covid_therapeutics = as.Date(ifelse(!is.na(remdesivir_covid_therapeutics), date, NA), origin = "1970-01-01"),
molnupiravir_covid_therapeutics = as.Date(ifelse(!is.na(molnupiravir_covid_therapeutics), date, NA), origin = "1970-01-01"),
casirivimab_covid_therapeutics = as.Date(ifelse(!is.na(casirivimab_covid_therapeutics), date, NA), origin = "1970-01-01"),


paxlovid_covid_therapeutics_last = as.Date(ifelse(!is.na(paxlovid_covid_therapeutics_last), date, NA), origin = "1970-01-01"),
sotrovimab_covid_therapeutics_last = as.Date(ifelse(!is.na(sotrovimab_covid_therapeutics_last), date, NA), origin = "1970-01-01"),
remdesivir_covid_therapeutics_last = as.Date(ifelse(!is.na(remdesivir_covid_therapeutics_last), date, NA), origin = "1970-01-01"),
molnupiravir_covid_therapeutics_last = as.Date(ifelse(!is.na(molnupiravir_covid_therapeutics_last), date, NA), origin = "1970-01-01"),
casirivimab_covid_therapeutics_last = as.Date(ifelse(!is.na(casirivimab_covid_therapeutics_last), date, NA), origin = "1970-01-01"),

covid_positive_test_30_days_post_elig_or_treat = as.Date(ifelse(covid_positive_test_30_days_post_elig_or_treat > covid_test_positive_date + 30,
covid_positive_test_30_days_post_elig_or_treat, NA), origin = "1970-01-01"))

Expand Down Expand Up @@ -192,6 +204,19 @@ data_processed <- data_extract %>%
treatment_date == casirivimab_covid_therapeutics ~ "Casirivimab",
TRUE ~ NA_character_),

# last MAB or Antiviral recorded
last_treatment_date = as.Date(pmin(paxlovid_covid_therapeutics_last, sotrovimab_covid_therapeutics_last,
remdesivir_covid_therapeutics_last, molnupiravir_covid_therapeutics_last,
casirivimab_covid_therapeutics_last, na.rm = TRUE), origin = "1970-01-01"),
last_treatment_type = case_when(
last_treatment_date == paxlovid_covid_therapeutics_last ~ "Paxlovid",
last_treatment_date == sotrovimab_covid_therapeutics_last ~ "Sotrovimab",
last_treatment_date == remdesivir_covid_therapeutics_last ~ "Remdesivir",
last_treatment_date == molnupiravir_covid_therapeutics_last ~ "Molnupiravir",
last_treatment_date == casirivimab_covid_therapeutics_last ~ "Casirivimab",
TRUE ~ NA_character_),



# ELIGIBILITY VARIABLES ----

Expand All @@ -212,7 +237,7 @@ data_processed <- data_extract %>%
sickle_cell_disease_nhsd = ifelse(!is.na(sickle_cell_disease_nhsd), 1, 0),

## Long COVID
long_covid = ifelse(!is.na(first_lc_code_date), 1, 0),
long_covid = ifelse(!is.na(first_long_covid), 1, 0),

# Combine subgoups of rare neurological conditions cohort
rare_neurological_conditions_nhsd = pmin(multiple_sclerosis_nhsd, motor_neurone_disease_nhsd, myasthenia_gravis_nhsd,
Expand Down Expand Up @@ -561,6 +586,9 @@ data_processed_clean <- data_processed_combined %>%
# Treatment
paxlovid_covid_therapeutics, sotrovimab_covid_therapeutics, remdesivir_covid_therapeutics, molnupiravir_covid_therapeutics,
casirivimab_covid_therapeutics, treatment_date, treatment_type,

# Last treatment code recorded
last_treatment_date, last_treatment_type,

# High risk cohort
downs_syndrome, solid_cancer, haematological_disease, renal_disease, liver_disease, imid, immunosupression,
Expand Down
Loading
Loading