Skip to content

Commit

Permalink
resolve merge conflict with develop
Browse files Browse the repository at this point in the history
  • Loading branch information
rxu17 committed Oct 23, 2024
2 parents 682088a + d8a8888 commit 5fc96da
Show file tree
Hide file tree
Showing 9 changed files with 355 additions and 92 deletions.
1 change: 1 addition & 0 deletions .github/CODEOWNERS
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
* @Sage-Bionetworks/genie_admins
36 changes: 24 additions & 12 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ params.comment = 'NSCLC public release update'
params.production = false
params.schema_ignore_params = ""
params.help = false
params.step = "update_potential_phi_fields_table"

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand All @@ -44,9 +45,12 @@ NfcoreSchema.validateParameters(workflow, params, log)
if (params.cohort == null) { exit 1, 'cohort parameter not specified!' }
if (params.comment == null) { exit 1, 'comment parameter not specified!' }
if (params.production == null) { exit 1, 'production parameter not specified!' }
if (params.step == null) { exit 1, 'step parameter not specified!' }


// Print parameter summary log to screen
log.info NfcoreSchema.paramsSummaryLog(workflow, params)
log.info "Running step: ${params.step}"

// Print message for production mode vs test mode
if (params.production) {
Expand All @@ -66,6 +70,7 @@ else {
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

include { update_potential_phi_fields_table } from './modules/update_potential_phi_fields_table'
include { run_quac_upload_report_error } from './modules/run_quac_upload_report_error'
include { run_quac_upload_report_warning } from './modules/run_quac_upload_report_warning'
include { merge_and_uncode_rca_uploads } from './modules/merge_and_uncode_rca_uploads'
Expand All @@ -77,7 +82,6 @@ include { run_quac_comparison_report } from './modules/run_quac_comparison_repor
include { create_masking_report } from './modules/create_masking_report'
include { update_case_count_table } from './modules/update_case_count_table'
include { run_clinical_release } from './modules/run_clinical_release'

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
RUN WORKFLOW
Expand All @@ -87,17 +91,25 @@ include { run_clinical_release } from './modules/run_clinical_release'
workflow BPC_PIPELINE {
ch_cohort = Channel.value(params.cohort)
ch_comment = Channel.value(params.comment)

run_quac_upload_report_error(ch_cohort)
run_quac_upload_report_warning(run_quac_upload_report_error.out, ch_cohort, params.production)
merge_and_uncode_rca_uploads(run_quac_upload_report_warning.out, ch_cohort, params.production)
// remove_patients_from_merged(merge_and_uncode_rca_uploads.out, ch_cohort, params.production)
update_data_table(merge_and_uncode_rca_uploads.out, ch_comment, params.production)
update_date_tracking_table(update_data_table.out, ch_cohort, ch_comment, params.production)
run_quac_table_report(update_date_tracking_table.out, ch_cohort, params.production)
run_quac_comparison_report(run_quac_table_report.out, ch_cohort, params.production)
create_masking_report(run_quac_comparison_report.out, ch_cohort, params.production)
update_case_count_table(create_masking_report.out, ch_comment, params.production)

if (params.step == "update_potential_phi_fields_table") {
update_potential_phi_fields_table(ch_comment, params.production)
// validate_data.out.view()
} else if (params.step == "genie_bpc_pipeline"){
update_potential_phi_fields_table(ch_comment, params.production)
run_quac_upload_report_error(update_potential_phi_fields_table.out, ch_cohort)
run_quac_upload_report_warning(run_quac_upload_report_error.out, ch_cohort, params.production)
merge_and_uncode_rca_uploads(run_quac_upload_report_warning.out, ch_cohort, params.production)
// remove_patients_from_merged(merge_and_uncode_rca_uploads.out, ch_cohort, params.production)
update_data_table(merge_and_uncode_rca_uploads.out, ch_comment, params.production)
update_date_tracking_table(update_data_table.out, ch_cohort, ch_comment, params.production)
run_quac_table_report(update_date_tracking_table.out, ch_cohort, params.production)
run_quac_comparison_report(run_quac_table_report.out, ch_cohort, params.production)
create_masking_report(run_quac_comparison_report.out, ch_cohort, params.production)
update_case_count_table(create_masking_report.out, ch_comment, params.production)
} else {
exit 1, 'step not supported'
}
}

/*
Expand Down
30 changes: 30 additions & 0 deletions modules/update_potential_phi_fields_table.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/*
Updates the potential pHI fields table with any new variables to redact
*/
process update_potential_phi_fields_table {

container "$params.references_docker"
secret 'SYNAPSE_AUTH_TOKEN'
debug true

input:
val comment
val production

output:
stdout

script:
if (production) {
"""
cd /usr/local/src/myscripts/
Rscript update_potential_phi_fields_table.R -c $comment --production
"""
}
else {
"""
cd /usr/local/src/myscripts/
Rscript update_potential_phi_fields_table.R -c $comment
"""
}
}
8 changes: 8 additions & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ manifest {
profiles {
aws_prod {
process {
withName: update_potential_phi_fields_table {
memory = 32.GB
cpus = 8
}
withName: run_workflow_case_selection {
memory = 32.GB
cpus = 8
Expand Down Expand Up @@ -46,5 +50,9 @@ profiles {
cpus = 8
}
}
params {
// docker image parameters, see nextflow_schema.json for details
references_docker = "sagebionetworks/genie-bpc-pipeline-references"
}
}
}
13 changes: 13 additions & 0 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,19 @@
false
]
},
"step": {
"type": "string",
"default": "update_potential_phi_fields_table",
"description": "Available BPC steps",
"enum": [
"update_potential_phi_fields_table",
"genie_bpc_pipeline"
]
},
"references_docker":{
"type": "string",
"description": "Name of docker to use in processes in scripts/references"
},
"schema_ignore_params": {
"type": "string",
"description": "Put parameters to ignore for validation here separated by comma",
Expand Down
35 changes: 24 additions & 11 deletions scripts/references/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,21 +1,34 @@
FROM r-base:4.0.0
FROM rstudio/r-base:4.0-bullseye

# Set working directory
WORKDIR /usr/local/src/myscripts

# Set environment variable for renv version
ENV RENV_VERSION 0.14.0

RUN rm /etc/apt/apt.conf.d/default
RUN apt-get update -y
RUN apt-get install -y dpkg-dev zlib1g-dev libssl-dev libffi-dev
# procps is required for nextflow tower
RUN apt-get install -y curl libcurl4-openssl-dev procps
RUN R -e "install.packages('synapser', repos=c('http://ran.synapse.org', 'http://cran.fhcrc.org'))"
# Update apt-get and install system dependencies (only install required)
RUN apt-get update -y && \
apt-get install -y --no-install-recommends \
dpkg-dev zlib1g-dev libssl-dev libffi-dev \
libcurl4-openssl-dev curl procps && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

ENV PYTHON /usr/local/lib/R/site-library/PythonEmbedInR/bin/python3.6
# Install R packages including remotes and renv
RUN R -e "install.packages('remotes', repos = 'https://cloud.r-project.org')" && \
R -e "remotes::install_github('rstudio/renv', ref = '${RENV_VERSION}')" || true

RUN R -e "install.packages('remotes', repos = c(CRAN = 'https://cloud.r-project.org'))"
RUN R -e "remotes::install_github('rstudio/renv@${RENV_VERSION}')"
# Install synapser with specific version
RUN R -e "remotes::install_version('synapser', version = '0.11.7', repos = c('http://ran.synapse.org', 'http://cran.fhcrc.org'))"

COPY . .
# Set Python environment variable for R
ENV PYTHON /usr/local/lib/R/site-library/PythonEmbedInR/bin/python3.6

# Copy only renv.lock first to leverage docker cache for dependencies
COPY renv.lock renv.lock

# Restore R environment with renv
RUN R -e "renv::restore()"

# Copy the local project files into the container
COPY . .
57 changes: 48 additions & 9 deletions scripts/references/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,22 +45,26 @@ Usage: update_potential_phi_fields_table.R [options]
Options:
-f SYNID_FILE_SOR, --synid_file_sor=SYNID_FILE_SOR
Synapse ID of Scope of Release file (default: syn22294851)
-t SYNID_TABLE_RED, --synid_table_red=SYNID_TABLE_RED
Synapse ID of table listing variables to redact (default: syn23281483)
-a AUTH, --auth=AUTH
path to .synapseConfig or Synapse PAT (default: standard login precedence)
-d , --dry_run
Whether to dry-run or not.
--production
Whether to run in production mode (uses production project) or not (runs in staging mode and uses staging project).
-h, --help
Show this help message and exit
-c, --comment
Comment for new table snapshot version. This must be unique and is tied to the cohort run.
```

Example run:
Example run (runs in staging mode) with version comment 3.0.1 for
potential PHI fields table when updated:
```
Rscript update_potential_phi_fields_table.R
Rscript update_potential_phi_fields_table.R -c "version3.0.1"
```

## Usage: updating the cBioPortal mapping table
Expand Down Expand Up @@ -94,7 +98,7 @@ Options:

Example run:
```
Rscript update_potential_phi_fields_table.R -v
Rscript update_cbio_mapping.R -v
```

## Usage: updating upload tracking table
Expand Down Expand Up @@ -127,3 +131,38 @@ Example run:
```
Rscript update_date_tracking_table.R -c CRC -d 2022-03-31 -s 'round x update to crc'
```

## Running tests
There are unit tests under `scripts/references/tests`.

1. Please pull and run the docker image associated with this modules from [here](https://github.com/Sage-Bionetworks/genie-bpc-pipeline/pkgs/container/genie-bpc-pipeline) into your EC2/local.

```bash
docker run -d --name <nickname_for_container> <container_name> /bin/bash -c "while true; do sleep 1; done"
```

2. Do anything you need to do to the container (e.g: copy current local changes)

```bash
docker cp ./. test_container:/usr/local/src/myscripts
```

3. Execute container into a bash session

```bash
docker exec -it <nickname_for_container> /bin/bash
```

4. Install the `mockery` and `testthat` packages:

```bash
R -e "remotes::install_cran('mockery')"
R -e "remotes::install_cran('testthat')"
```

5. Run the following in a R session:

```R
library(testthat)
test_dir("/usr/local/src/myscripts/tests")
```
100 changes: 100 additions & 0 deletions scripts/references/tests/test_update_potential_phi_fields_table.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
library(mockery)
library(testthat)

source(testthat::test_path("..", "update_potential_phi_fields_table.R"))

# Setup code: This will run before any tests
setup({
# Mock synapse function
mock_synStore <<- mock()
mock_snapshot_version <<- mock(return_value=3)

# Create dummy input data
test_df_update_non_empty <<- data.frame(col1 = 1:3, col2 = letters[1:3])
test_df_update_empty <<- data.frame()
synid_table <<- "syn123"
synid_file_sor <<- "syn456"
test_comment <<- "Test comment"
})


test_that("update_red_table does not update table when dry_run is TRUE and df_update has non-empty rows", {
dry_run <- TRUE

# Use mockery to mock the synStore and snapshot_synapse_table functions
stub(update_red_table, "synStore", mock_synStore)
stub(update_red_table, "snapshot_synapse_table", mock_snapshot_version)

# Call the function with dry_run = TRUE
result <- update_red_table(synid_table, synid_file_sor, test_df_update_non_empty, test_comment, dry_run)

# Assert that synStore was never called
expect_called(mock_synStore, 0)

# Assert that snapshot_synapse_table was never called
expect_called(mock_snapshot_version, 0)

# Assert that result is 3 (since update was made)
expect_equal(result, NA)
})


test_that("update_red_table does not update table when dry_run is TRUE and df_update has empty rows", {
dry_run <- TRUE

# Use mockery to mock the synStore and snapshot_synapse_table functions
stub(update_red_table, "synStore", mock_synStore)
stub(update_red_table, "snapshot_synapse_table", mock_snapshot_version)

# Call the function with dry_run = TRUE
result <- update_red_table(synid_table, synid_file_sor, test_df_update_empty, test_comment, dry_run)

# Assert that synStore was never called
expect_called(mock_synStore, 0)

# Assert that snapshot_synapse_table was never called
expect_called(mock_snapshot_version, 0)

# Assert that result is NA (since no update was made)
expect_equal(result, NA)
})

test_that("update_red_table updates table when dry_run is FALSE and df_update has non-empty rows", {
dry_run <- FALSE

# Use mockery to mock the synStore and snapshot_synapse_table functions
stub(update_red_table, "synStore", mock_synStore)
stub(update_red_table, "snapshot_synapse_table", mock_snapshot_version)

# Call the function with dry_run = FALSE
result <- update_red_table(synid_table, synid_file_sor, test_df_update_non_empty, test_comment, dry_run)

# Assert that synStore was called once
expect_called(mock_synStore, 1)

# Assert that snapshot_synapse_table was called once
expect_called(mock_snapshot_version, 1)

# Assert that result is 1 (simulated version returned by mock_snapshot_version)
expect_equal(result, 3)
})


test_that("update_red_table does not update table when dry_run is FALSE and df_update has empty rows", {
dry_run <- FALSE

# Use mockery to mock the synStore and snapshot_synapse_table functions
stub(update_red_table, "synStore", mock_synStore)
stub(update_red_table, "snapshot_synapse_table", mock_snapshot_version)

# Call the function with dry_run = FALSE
result <- update_red_table(synid_table, synid_file_sor, test_df_update_empty, test_comment, dry_run)

# Assert that synStore was called once
expect_called(mock_synStore, 1)

# Assert that snapshot_synapse_table was called once
expect_called(mock_snapshot_version, 1)

expect_equal(result, NA)
})
Loading

0 comments on commit 5fc96da

Please sign in to comment.