Skip to content

Commit

Permalink
Merge pull request #94 from icgc-argo/[email protected]
Browse files Browse the repository at this point in the history
[release]
  • Loading branch information
edsu7 authored Dec 1, 2023
2 parents 13c8ae4 + f1d4e81 commit 30bb5d3
Show file tree
Hide file tree
Showing 7 changed files with 129 additions and 78 deletions.
164 changes: 101 additions & 63 deletions argo-data-submission-wf/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
*/

nextflow.enable.dsl = 2
version = '1.0.3'
version = '1.1.0'

// universal params go here, change default value as needed
params.container = ""
Expand All @@ -44,10 +44,11 @@ params.download_mode="local"
params.song_container = "ghcr.io/overture-stack/song-client"
params.song_container_version = "5.0.2"
params.score_container = "ghcr.io/overture-stack/score"
params.score_container_version = "5.9.0"
params.score_container_version = "5.10.0"
params.score_mem = 20
params.score_cpus = 8
params.score_force = false
params.skip_upload = false

// sanityChecks
params.song_url=""
Expand All @@ -58,7 +59,7 @@ params.api_token=""
// payloadJsonToTsvs
params.data_directory="NO_FILE1"
params.skip_duplicate_check=false

params.skip_sanity_check=false
// payloadGenSeqExperiment
params.schema_url=""
params.experiment_info_tsv="NO_FILE2"
Expand Down Expand Up @@ -237,6 +238,19 @@ workflow ArgoDataSubmissionWf {
clinical_url
main:

if (!"${workflow.profile}".contains('docker') && !"${workflow.profile}".contains('singularity')){
exit 1, "Error Missing profile. `-profile` must be specified with the engines :`docker` or `singularity`."
}
if (!"${workflow.profile}".contains('rdpc_qa') && !"${workflow.profile}".contains('rdpc_dev') && !"${workflow.profile}".contains('rdpc')){
exit 1, "Error Missing profile. `-profile` must be specified with the rdpc environments : `rdpc_qa`,`rdpc_dev`, or `rdpc`."
}

if (!params.api_token){
if (!params.api_download_token || !params.api_upload_token) {
exit 1, "Error SONG parameters detected but missing token params. `--api_token` or `api_upload_token` and `api_download_token` must be supplied when uploading."
}
}

if (
og_experiment_info_tsv.startsWith("NO_FILE") && \
og_read_group_info_tsv.startsWith("NO_FILE") && \
Expand Down Expand Up @@ -278,29 +292,41 @@ workflow ArgoDataSubmissionWf {
file(data_directory)
)

sanityCheck(
payloadJsonToTsvs.out.experiment_tsv,
api_token,
song_url,
clinical_url,
params.skip_duplicate_check
)

experiment_info_tsv=sanityCheck.out.updated_experiment_info_tsv
read_group_info_tsv=payloadJsonToTsvs.out.read_group_tsv
file_info_tsv=payloadJsonToTsvs.out.file_tsv
if (params.skip_sanity_check){
experiment_info_tsv=file(payloadJsonToTsvs.out.experiment_tsv)
read_group_info_tsv=file(og_read_group_info_tsv)
file_info_tsv=file(og_file_info_tsv)
} else {
sanityCheck(
payloadJsonToTsvs.out.experiment_tsv,
api_token,
song_url,
clinical_url,
params.skip_duplicate_check
)

experiment_info_tsv=sanityCheck.out.updated_experiment_info_tsv
read_group_info_tsv=file(og_read_group_info_tsv)
file_info_tsv=file(og_file_info_tsv)
}
} else {
sanityCheck(
file(og_experiment_info_tsv),
api_token,
song_url,
clinical_url,
params.skip_duplicate_check
)

experiment_info_tsv=sanityCheck.out.updated_experiment_info_tsv
read_group_info_tsv=file(og_read_group_info_tsv)
file_info_tsv=file(og_file_info_tsv)
if (params.skip_sanity_check){
experiment_info_tsv=file(og_experiment_info_tsv)
read_group_info_tsv=file(og_read_group_info_tsv)
file_info_tsv=file(og_file_info_tsv)
} else {
sanityCheck(
file(og_experiment_info_tsv),
api_token,
song_url,
clinical_url,
params.skip_duplicate_check
)

experiment_info_tsv=sanityCheck.out.updated_experiment_info_tsv
read_group_info_tsv=file(og_read_group_info_tsv)
file_info_tsv=file(og_file_info_tsv)
}
}

checkCramReference(
Expand Down Expand Up @@ -362,20 +388,22 @@ workflow ArgoDataSubmissionWf {
skipping_tests
)

uploadWf(
study_id,
valSeq.out.validated_payload,
sequence_files.collect(),
''
)

submissionReceipt(
if (!params.skip_upload){
uploadWf(
study_id,
uploadWf.out.analysis_id,
song_url,
params.skip_submission_check,
sequence_files.collect()
)
valSeq.out.validated_payload,
sequence_files.collect(),
''
)

submissionReceipt(
study_id,
uploadWf.out.analysis_id,
song_url,
params.skip_submission_check,
sequence_files.collect()
)
}

} else if (checkCramReference.out.check_status && !ref_genome_fa.startsWith("NO_FILE")){
// If reference genome is provided...
Expand Down Expand Up @@ -403,52 +431,62 @@ workflow ArgoDataSubmissionWf {
sequence_files.collect().concat(cram2bam.out.output_bam.collect()).collect(),
skipping_tests
)

uploadWf(
study_id,
valSeq.out.validated_payload,
not_cram_sequence_files.concat(cram2bam.out.output_bam.collect()).collect(),
''
)

submissionReceipt(
if (!params.skip_upload){
uploadWf(
study_id,
uploadWf.out.analysis_id,
song_url,
params.skip_submission_check,
valSeq.out.validated_payload,
not_cram_sequence_files.concat(cram2bam.out.output_bam.collect()).collect(),
)
''
)

submissionReceipt(
study_id,
uploadWf.out.analysis_id,
song_url,
params.skip_submission_check,
not_cram_sequence_files.concat(cram2bam.out.output_bam.collect()).collect()
)
}
}
if (params.cleanup && params.download_mode!='local' && ref_genome_fa.startsWith("NO_FILE")) {
if (params.cleanup && params.download_mode!='local' && ref_genome_fa.startsWith("NO_FILE") && !params.skip_upload) {
// only cleanup the sequence files when they are not from local
cleanup(
sequence_files.collect(),
submissionReceipt.out.receipt // wait until upload is done
)
} else if (params.cleanup && params.download_mode!='local' && !ref_genome_fa.startsWith("NO_FILE")){
} else if (params.cleanup && params.download_mode!='local' && !ref_genome_fa.startsWith("NO_FILE") && !params.skip_upload){
// only cleanup the sequence files and cram2bam output when they are not from local
cleanup(
sequence_files.collect().concat(cram2bam.out.output_bam.collect()).collect(),
submissionReceipt.out.receipt // wait until upload is done
)
} else if (params.cleanup && params.download_mode=='local' && !ref_genome_fa.startsWith("NO_FILE")){
} else if (params.cleanup && params.download_mode=='local' && !ref_genome_fa.startsWith("NO_FILE") && !params.skip_upload){
// only cleanup output from cram2bam on local
cleanup(
cram2bam.out.output_bam.collect(),
submissionReceipt.out.receipt // wait until upload is done
)
}

printOut(
pGenExp.out.payload,
uploadWf.out.analysis_id,
submissionReceipt.out.receipt
)
if (!params.skip_upload){
printOut(
pGenExp.out.payload,
uploadWf.out.analysis_id,
submissionReceipt.out.receipt
)
out_payload = pGenExp.out.payload
out_analysis_id = uploadWf.out.analysis_id
out_receipt = submissionReceipt.out.receipt
} else {
out_payload = null
out_analysis_id = null
out_receipt = null
}

emit:
json_file=pGenExp.out.payload
output_analysis_id=uploadWf.out.analysis_id
receipt=submissionReceipt.out.receipt
out_json_file=out_payload
out_output_analysis_id=out_analysis_id
out_receipt=out_receipt
}

// this provides an entry point for this main script, so it can be run directly without clone the repo
Expand All @@ -467,4 +505,4 @@ workflow {
params.song_url,
params.clinical_url
)
}
}
4 changes: 2 additions & 2 deletions argo-data-submission-wf/pkg.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "argo-data-submission-wf",
"version": "1.0.3",
"version": "1.1.0",
"description": "ARGO data submission workflow",
"main": "main.nf",
"deprecated": false,
Expand Down Expand Up @@ -36,4 +36,4 @@
"license": "GNU Affero General Public License v3",
"bugReport": "https://github.com/icgc-argo/argo-data-submission/issues",
"homepage": "https://github.com/icgc-argo/argo-data-submission#readme"
}
}
2 changes: 1 addition & 1 deletion argo-data-submission-wf/tests/checker.nf
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
*/

nextflow.enable.dsl = 2
version = '1.0.3'
version = '1.1.0'

// universal params
params.publish_dir = ""
Expand Down
2 changes: 2 additions & 0 deletions argo-data-submission-wf/tests/input/prod_experiment-fq.v2.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
type submitter_sequencing_experiment_id program_id submitter_donor_id submitter_specimen_id submitter_sample_id submitter_matched_normal_sample_id sequencing_center platform platform_model experimental_strategy sequencing_date read_group_count
sequencing_experiment SubWf_exp_02_FASTQ_input TEST-CA TEST_SUBMITTER_DONOR_ID_zzespofnsa TEST_SUBMITTER_SPECIMEN_ID_zzespofnsaT1 TEST_SUBMITTER_SAMPLE_ID_zzespofnsaT1 TEST_SUBMITTER_SAMPLE_ID_zzespofnsaN1 EXT ILLUMINA HiSeq 2000 WGS 2014-12-12 3
7 changes: 7 additions & 0 deletions argo-data-submission-wf/tests/input/prod_file-fq.v2.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
type name format size md5sum path
file C0HVY.2_r1.fq.gz FASTQ 2040 c4971f805930e9e31e1c45314c652d3c input/C0HVY.2_r1.fq.gz
file C0HVY.2_r2.fq.gz FASTQ 2042 f60026e682bf55f014c84d494112aa13 input/C0HVY.2_r2.fq.gz
file D0RE2.1_r1.fq.gz FASTQ 2309 835b35f6e7f8263636fce4224b1aca69 input/D0RE2.1_r1.fq.gz
file D0RE2.1_r2.fq.gz FASTQ 2365 23d030ddba2916f8430b13234e3e9a0c input/D0RE2.1_r2.fq.gz
file D0RH0.2_r1.fq.gz FASTQ 1872 b3aa75d0585a989bb25fae252a10b532 input/D0RH0.2_r1.fq.gz
file D0RH0.2_r2.fq.gz FASTQ 1894 c59ebfa4199287bf3911a4c1d6eda71e input/D0RH0.2_r2.fq.gz
4 changes: 4 additions & 0 deletions argo-data-submission-wf/tests/input/prod_read_group-fq.v2.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
type submitter_read_group_id read_group_id_in_bam submitter_sequencing_experiment_id platform_unit is_paired_end file_r1 file_r2 read_length_r1 read_length_r2 insert_size sample_barcode library_name
read_group C0HVY.2 SubWf_exp_02_FASTQ_input 74_8a true C0HVY.2_r1.fq.gz C0HVY.2_r2.fq.gz 150 150 298 Pond-147580
read_group D0RE2.1 SubWf_exp_02_FASTQ_input 74_8b true D0RE2.1_r1.fq.gz D0RE2.1_r2.fq.gz 150 150 298 Pond-147580
read_group D0RH0.2 SubWf_exp_02_FASTQ_input 74_8c true D0RH0.2_r1.fq.gz D0RH0.2_r2.fq.gz 150 150 298 Pond-147580
24 changes: 12 additions & 12 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,11 @@ process {
}

profiles {
collab {
params.song_url = 'https://submission-song.rdpc.cancercollaboratory.org'
params.score_url = 'https://submission-score.rdpc.cancercollaboratory.org'
rdpc {
params.song_url = 'https://submission-song.rdpc.argo.genomeinformatics.org'
params.score_url = 'https://submission-score.rdpc.argo.genomeinformatics.org'
params.clinical_url = 'https://clinical.platform.icgc-argo.org'
params.schema_url = 'https://submission-song.rdpc.cancercollaboratory.org/schemas/sequencing_experiment'
params.schema_url = 'https://submission-song.rdpc.argo.genomeinformatics.org/schemas/sequencing_experiment'
}
docker {
docker.enabled = true
Expand All @@ -73,16 +73,16 @@ profiles {
singularity.autoMounts = true
singularity.engineOptions = '-s'
}
debug_qa {
params.song_url = 'https://submission-song.rdpc-qa.cancercollaboratory.org'
params.score_url = 'https://submission-score.rdpc-qa.cancercollaboratory.org'
rdpc_qa {
params.song_url = 'https://submission-song.rdpc-qa.cumulus.genomeinformatics.org'
params.score_url = 'https://submission-score.rdpc-qa.cumulus.genomeinformatics.org'
params.clinical_url = 'https://clinical.qa.argo.cancercollaboratory.org'
params.schema_url = 'https://submission-song.rdpc-qa.cancercollaboratory.org/schemas/sequencing_experiment'
params.schema_url = 'https://submission-song.rdpc-qa.cumulus.genomeinformatics.org/schemas/sequencing_experiment'
}
debug_dev {
params.song_url = 'https://submission-song.rdpc-dev.cancercollaboratory.org'
params.score_url = 'https://submission-score.rdpc-dev.cancercollaboratory.org'
rdpc_dev {
params.song_url = 'https://submission-song.rdpc-dev.cumulus.genomeinformatics.org'
params.score_url = 'https://submission-score.rdpc-dev.cumulus.genomeinformatics.org'
params.clinical_url = 'https://clinical.dev.argo.cancercollaboratory.org'
params.schema_url = 'https://submission-song.rdpc-dev.cancercollaboratory.org/schemas/sequencing_experiment'
params.schema_url = 'https://submission-song.rdpc-dev.cumulus.genomeinformatics.org/schemas/sequencing_experiment'
}
}

0 comments on commit 30bb5d3

Please sign in to comment.