From e2fef9ff1a4a904b0c4efcefa5b8aa2d71ff275b Mon Sep 17 00:00:00 2001 From: ekiernan Date: Wed, 8 Jan 2025 13:06:35 -0500 Subject: [PATCH 01/17] testing h5adutils with new docker --- tasks/skylab/H5adUtils.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/skylab/H5adUtils.wdl b/tasks/skylab/H5adUtils.wdl index af83a9e3f8..b54a8eaa17 100644 --- a/tasks/skylab/H5adUtils.wdl +++ b/tasks/skylab/H5adUtils.wdl @@ -106,7 +106,7 @@ task OptimusH5adGeneration { >>> runtime { - docker: warp_tools_docker_path + docker: "us.gcr.io/broad-gotc-prod/warp-tools:lk-PD-2814-add-gtf-h5ad" cpu: cpu # note that only 1 thread is supported by pseudobam memory: "~{machine_mem_mb} MiB" disks: "local-disk ~{disk} HDD" From 12b99ac2a9ef500333a75148d6b7f681fbef6387 Mon Sep 17 00:00:00 2001 From: ekiernan Date: Wed, 8 Jan 2025 14:21:01 -0500 Subject: [PATCH 02/17] added GTF file to adata_uns --- pipelines/skylab/atac/atac.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index c0c748c042..024f2e8fe5 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -601,6 +601,8 @@ task CreateFragmentFile { atac_data = ad.read_h5ad("temp_metrics.h5ad") # Add nhash_id to h5ad file as unstructured metadata atac_data.uns['NHashID'] = atac_nhash_id + + atac_data.uns['GTF'] = str(atac_gtf) # calculate tsse metrics snap.metrics.tsse(atac_data, atac_gtf) # Write new atac file From 87b73d544c06ab777f0f38dd4c9747995ac4c794 Mon Sep 17 00:00:00 2001 From: ekiernan Date: Wed, 8 Jan 2025 15:24:34 -0500 Subject: [PATCH 03/17] changing name of warp_tools_docker variable so be version agnostic --- pipelines/skylab/optimus/Optimus.wdl | 12 ++++++------ tasks/skylab/H5adUtils.wdl | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/pipelines/skylab/optimus/Optimus.wdl b/pipelines/skylab/optimus/Optimus.wdl index 37d28381e2..f9e8bbe815 100644 --- a/pipelines/skylab/optimus/Optimus.wdl +++ b/pipelines/skylab/optimus/Optimus.wdl @@ -94,7 +94,7 @@ workflow Optimus { String pytools_docker = "pytools:1.0.0-1661263730" String empty_drops_docker = "empty-drops:1.0.1-4.2" String star_docker = "star:1.0.1-2.7.11a-1692706072" - String warp_tools_docker_2_2_0 = "warp-tools:2.5.0" + String warp_tools_docker = "warp-tools:lk-PD-2814-add-gtf-h5ad" String star_merge_docker = "star-merge-npz:1.3.0" String samtools_star = "samtools-star:1.0.0-1.11-2.7.11a-1731516196" @@ -171,7 +171,7 @@ workflow Optimus { chemistry = tenx_chemistry_version, sample_id = input_id, read_struct = read_struct, - warp_tools_docker_path = docker_prefix + warp_tools_docker_2_2_0 + warp_tools_docker_path = docker_prefix + warp_tools_docker } scatter(idx in range(length(SplitFastq.fastq_R1_output_array))) { @@ -203,7 +203,7 @@ workflow Optimus { mt_genes = mt_genes, original_gtf = annotations_gtf, input_id = input_id, - warp_tools_docker_path = docker_prefix + warp_tools_docker_2_2_0 + warp_tools_docker_path = docker_prefix + warp_tools_docker } call Metrics.CalculateCellMetrics as CellMetrics { @@ -212,7 +212,7 @@ workflow Optimus { mt_genes = mt_genes, original_gtf = annotations_gtf, input_id = input_id, - warp_tools_docker_path = docker_prefix + warp_tools_docker_2_2_0 + warp_tools_docker_path = docker_prefix + warp_tools_docker } call StarAlign.MergeStarOutput as MergeStarOutputs { @@ -261,7 +261,7 @@ workflow Optimus { empty_drops_result = RunEmptyDrops.empty_drops_result, counting_mode = counting_mode, pipeline_version = "Optimus_v~{pipeline_version}", - warp_tools_docker_path = docker_prefix + warp_tools_docker_2_2_0 + warp_tools_docker_path = docker_prefix + warp_tools_docker } } if (count_exons && counting_mode=="sn_rna") { @@ -300,7 +300,7 @@ workflow Optimus { cell_id_exon = MergeStarOutputsExons.row_index, gene_id_exon = MergeStarOutputsExons.col_index, pipeline_version = "Optimus_v~{pipeline_version}", - warp_tools_docker_path = docker_prefix + warp_tools_docker_2_2_0 + warp_tools_docker_path = docker_prefix + warp_tools_docker } } diff --git a/tasks/skylab/H5adUtils.wdl b/tasks/skylab/H5adUtils.wdl index b54a8eaa17..5304e113f7 100644 --- a/tasks/skylab/H5adUtils.wdl +++ b/tasks/skylab/H5adUtils.wdl @@ -106,7 +106,7 @@ task OptimusH5adGeneration { >>> runtime { - docker: "us.gcr.io/broad-gotc-prod/warp-tools:lk-PD-2814-add-gtf-h5ad" + docker: "warp_tools_docker_path" cpu: cpu # note that only 1 thread is supported by pseudobam memory: "~{machine_mem_mb} MiB" disks: "local-disk ~{disk} HDD" From 2fec7b82a1f632fe359adcb6dbcf2f0eb2bb063f Mon Sep 17 00:00:00 2001 From: ekiernan Date: Wed, 8 Jan 2025 18:30:52 -0500 Subject: [PATCH 04/17] removed quotes --- tasks/skylab/H5adUtils.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/skylab/H5adUtils.wdl b/tasks/skylab/H5adUtils.wdl index 5304e113f7..af83a9e3f8 100644 --- a/tasks/skylab/H5adUtils.wdl +++ b/tasks/skylab/H5adUtils.wdl @@ -106,7 +106,7 @@ task OptimusH5adGeneration { >>> runtime { - docker: "warp_tools_docker_path" + docker: warp_tools_docker_path cpu: cpu # note that only 1 thread is supported by pseudobam memory: "~{machine_mem_mb} MiB" disks: "local-disk ~{disk} HDD" From 38d731ce72c6680b15dd32d4284278fee1fa48f3 Mon Sep 17 00:00:00 2001 From: ekiernan Date: Thu, 9 Jan 2025 14:18:55 -0500 Subject: [PATCH 05/17] testing all wdls with new warp-tools docker --- pipelines/skylab/atac/atac.wdl | 4 ++-- pipelines/skylab/slideseq/SlideSeq.wdl | 10 +++++----- tasks/skylab/FastqProcessing.wdl | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index 024f2e8fe5..a829413458 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -57,7 +57,7 @@ workflow ATAC { String docker_prefix = if cloud_provider == "gcp" then gcr_docker_prefix else acr_docker_prefix # Docker image names - String warp_tools_2_2_0 = "warp-tools:2.5.0" + String warp_tools_docker = "warp-tools:lk-PD-2814-add-gtf-h5ad" String cutadapt_docker = "cutadapt:1.0.0-4.4-1686752919" String samtools_docker = "samtools-dist-bwa:3.0.0" String upstools_docker = "upstools:1.0.0-2023.03.03-1704300311" @@ -99,7 +99,7 @@ workflow ATAC { output_base_name = input_id, num_output_files = GetNumSplits.ranks_per_node_out, whitelist = whitelist, - docker_path = docker_prefix + warp_tools_2_2_0 + docker_path = docker_prefix + warp_tools_docker } scatter(idx in range(length(SplitFastq.fastq_R1_output_array))) { diff --git a/pipelines/skylab/slideseq/SlideSeq.wdl b/pipelines/skylab/slideseq/SlideSeq.wdl index e43f07979c..408b62eeb1 100644 --- a/pipelines/skylab/slideseq/SlideSeq.wdl +++ b/pipelines/skylab/slideseq/SlideSeq.wdl @@ -48,7 +48,7 @@ workflow SlideSeq { # docker images String pytools_docker = "pytools:1.0.0-1661263730" String picard_cloud_docker = "picard-cloud:2.26.10" - String warp_tools_docker_2_2_0 = "warp-tools:2.5.0" + String warp_tools_docker = "warp-tools:lk-PD-2814-add-gtf-h5ad" String star_merge_docker = "star-merge-npz:1.3.0" String ubuntu_docker = "ubuntu_16_0_4@sha256:025124e2f1cf4d29149958f17270596bffe13fc6acca6252977c572dd5ba01bf" @@ -124,7 +124,7 @@ workflow SlideSeq { bam_input = MergeBam.output_bam, original_gtf = annotations_gtf, input_id = input_id, - warp_tools_docker_path = docker_prefix + warp_tools_docker_2_2_0 + warp_tools_docker_path = docker_prefix + warp_tools_docker } call Metrics.CalculateUMIsMetrics as UMIsMetrics { input: @@ -138,7 +138,7 @@ workflow SlideSeq { bam_input = MergeBam.output_bam, original_gtf = annotations_gtf, input_id = input_id, - warp_tools_docker_path = docker_prefix + warp_tools_docker_2_2_0 + warp_tools_docker_path = docker_prefix + warp_tools_docker } @@ -162,7 +162,7 @@ workflow SlideSeq { gene_id = MergeStarOutputs.col_index, add_emptydrops_data = "no", pipeline_version = "SlideSeq_v~{pipeline_version}", - warp_tools_docker_path = docker_prefix + warp_tools_docker_2_2_0 + warp_tools_docker_path = docker_prefix + warp_tools_docker } } @@ -188,7 +188,7 @@ workflow SlideSeq { cell_id_exon = MergeStarOutputsExons.row_index, gene_id_exon = MergeStarOutputsExons.col_index, pipeline_version = "SlideSeq_v~{pipeline_version}", - warp_tools_docker_path = docker_prefix + warp_tools_docker_2_2_0 + warp_tools_docker_path = docker_prefix + warp_tools_docker } } diff --git a/tasks/skylab/FastqProcessing.wdl b/tasks/skylab/FastqProcessing.wdl index 5263f53ef2..9a6b3ed101 100644 --- a/tasks/skylab/FastqProcessing.wdl +++ b/tasks/skylab/FastqProcessing.wdl @@ -138,7 +138,7 @@ task FastqProcessingSlidSeq { # Runtime attributes - String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.5.0" + String docker = "us.gcr.io/broad-gotc-prod/warp-tools:lk-PD-2814-add-gtf-h5ad" Int cpu = 16 Int machine_mb = 40000 Int disk = ceil(size(r1_fastq, "GiB")*3 + size(r2_fastq, "GiB")*3) + 50 From 13fbf2aec6a5d1f2d273f5b2d0dbb50b31adbd43 Mon Sep 17 00:00:00 2001 From: ekiernan Date: Fri, 10 Jan 2025 15:23:37 -0500 Subject: [PATCH 06/17] updated h5ad with gs:// path for reference_gtf_file --- pipelines/skylab/atac/atac.wdl | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index a829413458..e147a24a75 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -602,7 +602,19 @@ task CreateFragmentFile { # Add nhash_id to h5ad file as unstructured metadata atac_data.uns['NHashID'] = atac_nhash_id - atac_data.uns['GTF'] = str(atac_gtf) + # Add GTF to uns field + + # Original path from args.annotation_file + annotation_gtf = str(atac_gtf) # e.g., '/cromwell_root/gcp-public-data--broad-references/hg38/v0/star/v2_7_10a/modified_v43.annotation.gtf' + + # Transform the path + if annotation_gtf.startswith('/cromwell_root/'): + stripped_path = annotation_gtf[len('/cromwell_root/'):] # Remove '/cromwell_root/' + updated_path = f'gs://{stripped_path}' # Add 'gs://' prefix + else: + updated_path = str(atac_gtf) + + atac_data.uns["reference_gtf_file"] = updated_path # calculate tsse metrics snap.metrics.tsse(atac_data, atac_gtf) # Write new atac file From 7b5cae51ea79e01f2411bcdc4c6a8c297ef96b14 Mon Sep 17 00:00:00 2001 From: ekiernan Date: Fri, 10 Jan 2025 15:41:01 -0500 Subject: [PATCH 07/17] testing gtf path --- pipelines/skylab/atac/atac.wdl | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index e147a24a75..437a87fa5f 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -605,16 +605,16 @@ task CreateFragmentFile { # Add GTF to uns field # Original path from args.annotation_file - annotation_gtf = str(atac_gtf) # e.g., '/cromwell_root/gcp-public-data--broad-references/hg38/v0/star/v2_7_10a/modified_v43.annotation.gtf' - - # Transform the path - if annotation_gtf.startswith('/cromwell_root/'): - stripped_path = annotation_gtf[len('/cromwell_root/'):] # Remove '/cromwell_root/' - updated_path = f'gs://{stripped_path}' # Add 'gs://' prefix - else: - updated_path = str(atac_gtf) + gtf_path = str(~{annotations_gtf}) # e.g., '/cromwell_root/gcp-public-data--broad-references/hg38/v0/star/v2_7_10a/modified_v43.annotation.gtf' + + # # Transform the path + # if annotation_gtf.startswith('/cromwell_root/'): + # stripped_path = annotation_gtf[len('/cromwell_root/'):] # Remove '/cromwell_root/' + # updated_path = f'gs://{stripped_path}' # Add 'gs://' prefix + # else: + # updated_path = str(atac_gtf) - atac_data.uns["reference_gtf_file"] = updated_path + atac_data.uns["reference_gtf_file"] = gtf_path # calculate tsse metrics snap.metrics.tsse(atac_data, atac_gtf) # Write new atac file From cb69b95be3a868ef60c42cd6f657b78c2a5fff20 Mon Sep 17 00:00:00 2001 From: ekiernan Date: Fri, 10 Jan 2025 15:51:51 -0500 Subject: [PATCH 08/17] testing new script and new logging for gtf path --- tasks/skylab/H5adUtils.wdl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tasks/skylab/H5adUtils.wdl b/tasks/skylab/H5adUtils.wdl index af83a9e3f8..1ac71225f2 100644 --- a/tasks/skylab/H5adUtils.wdl +++ b/tasks/skylab/H5adUtils.wdl @@ -57,6 +57,12 @@ task OptimusH5adGeneration { touch empty_drops_result.csv + # Save the path of the annotation_file to a variable + gtf_path="~{annotation_file}" + + # Echo the gtf_path for logging/debugging purposes + echo "GTF Path: $gtf_path" + if [ "~{counting_mode}" == "sc_rna" ]; then python3 /warptools/scripts/create_h5ad_optimus.py \ ~{if defined(empty_drops_result) then "--empty_drops_file " + empty_drops_result else "--empty_drops_file empty_drops_result.csv " } \ @@ -74,6 +80,7 @@ task OptimusH5adGeneration { --count_matrix ~{sparse_count_matrix} \ --expression_data_type "exonic" \ --pipeline_version ~{pipeline_version} + --gtf-path $gtf_path else python3 /warptools/scripts/create_snrna_optimus_full_h5ad.py \ --annotation_file ~{annotation_file} \ From 6f10319727817f46fdffe1b3d4cf9a847881649d Mon Sep 17 00:00:00 2001 From: ekiernan Date: Fri, 10 Jan 2025 16:00:36 -0500 Subject: [PATCH 09/17] updated gtf_path --- tasks/skylab/H5adUtils.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/skylab/H5adUtils.wdl b/tasks/skylab/H5adUtils.wdl index 1ac71225f2..f5bb418e91 100644 --- a/tasks/skylab/H5adUtils.wdl +++ b/tasks/skylab/H5adUtils.wdl @@ -80,7 +80,7 @@ task OptimusH5adGeneration { --count_matrix ~{sparse_count_matrix} \ --expression_data_type "exonic" \ --pipeline_version ~{pipeline_version} - --gtf-path $gtf_path + --gtf_path $gtf_path else python3 /warptools/scripts/create_snrna_optimus_full_h5ad.py \ --annotation_file ~{annotation_file} \ From 422ad3be3a4f30383e41aa96767ca66bdb4013b1 Mon Sep 17 00:00:00 2001 From: ekiernan Date: Fri, 10 Jan 2025 20:01:53 -0500 Subject: [PATCH 10/17] testing fix for wdl gtf_path syntax --- pipelines/skylab/atac/atac.wdl | 3 ++- tasks/skylab/H5adUtils.wdl | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index 437a87fa5f..92037920b8 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -521,6 +521,7 @@ task CreateFragmentFile { String atac_nhash_id = "" String input_id Int atac_expected_cells = 3000 + String gtf_path = annotations_gtf } parameter_meta { @@ -605,7 +606,7 @@ task CreateFragmentFile { # Add GTF to uns field # Original path from args.annotation_file - gtf_path = str(~{annotations_gtf}) # e.g., '/cromwell_root/gcp-public-data--broad-references/hg38/v0/star/v2_7_10a/modified_v43.annotation.gtf' + gtf_path = "~{gtf_path}" # e.g., '/cromwell_root/gcp-public-data--broad-references/hg38/v0/star/v2_7_10a/modified_v43.annotation.gtf' # # Transform the path # if annotation_gtf.startswith('/cromwell_root/'): diff --git a/tasks/skylab/H5adUtils.wdl b/tasks/skylab/H5adUtils.wdl index f5bb418e91..8d5b279c56 100644 --- a/tasks/skylab/H5adUtils.wdl +++ b/tasks/skylab/H5adUtils.wdl @@ -79,7 +79,7 @@ task OptimusH5adGeneration { ~{"--input_name_metadata_field " + input_name_metadata_field} \ --count_matrix ~{sparse_count_matrix} \ --expression_data_type "exonic" \ - --pipeline_version ~{pipeline_version} + --pipeline_version ~{pipeline_version} \ --gtf_path $gtf_path else python3 /warptools/scripts/create_snrna_optimus_full_h5ad.py \ From 29a37fc5d8bac4baad10f3df89e5ffb2d68c497d Mon Sep 17 00:00:00 2001 From: ekiernan Date: Mon, 13 Jan 2025 09:54:01 -0500 Subject: [PATCH 11/17] added annotation file as string to h5ad utils task --- tasks/skylab/H5adUtils.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tasks/skylab/H5adUtils.wdl b/tasks/skylab/H5adUtils.wdl index 8d5b279c56..e6a04977b1 100644 --- a/tasks/skylab/H5adUtils.wdl +++ b/tasks/skylab/H5adUtils.wdl @@ -34,6 +34,7 @@ task OptimusH5adGeneration { File? empty_drops_result String counting_mode = "sc_rna" String add_emptydrops_data = "yes" + String gtf_path = annotation_file String pipeline_version @@ -58,7 +59,7 @@ task OptimusH5adGeneration { touch empty_drops_result.csv # Save the path of the annotation_file to a variable - gtf_path="~{annotation_file}" + gtf_path="~{gtf_path}" # Echo the gtf_path for logging/debugging purposes echo "GTF Path: $gtf_path" From ee6c5b753e93bea3bbc590fd8d9a4e017d28a0a0 Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Mon, 13 Jan 2025 14:54:28 +0000 Subject: [PATCH 12/17] Updated pipeline_versions.txt with all pipeline version information --- pipeline_versions.txt | 56 +++++++++++++++++++++---------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/pipeline_versions.txt b/pipeline_versions.txt index b24084e5fe..45c3483d7c 100644 --- a/pipeline_versions.txt +++ b/pipeline_versions.txt @@ -1,40 +1,40 @@ Pipeline Name Version Date of Last Commit -CheckFingerprint 1.0.22 2024-10-28 -RNAWithUMIsPipeline 1.0.18 2024-11-04 -AnnotationFiltration 1.2.7 2024-11-04 -UltimaGenomicsWholeGenomeGermline 1.1.3 2024-12-05 -WholeGenomeGermlineSingleSample 3.3.3 2024-11-04 -ExomeGermlineSingleSample 3.2.3 2024-11-04 -JointGenotypingByChromosomePartTwo 1.5.2 2024-11-04 -JointGenotypingByChromosomePartOne 1.5.2 2024-11-04 +ExomeReprocessing 3.3.3 2024-11-04 +CramToUnmappedBams 1.1.3 2024-08-02 +WholeGenomeReprocessing 3.3.3 2024-11-04 +ExternalExomeReprocessing 3.3.3 2024-11-04 +ExternalWholeGenomeReprocessing 2.3.3 2024-11-04 +UltimaGenomicsJointGenotyping 1.2.2 2024-11-04 ReblockGVCF 2.4.0 2024-12-05 +JointGenotypingByChromosomePartOne 1.5.2 2024-11-04 +JointGenotypingByChromosomePartTwo 1.5.2 2024-11-04 JointGenotyping 1.7.2 2024-11-04 -UltimaGenomicsJointGenotyping 1.2.2 2024-11-04 +ExomeGermlineSingleSample 3.2.3 2024-11-04 +WholeGenomeGermlineSingleSample 3.3.3 2024-11-04 +UltimaGenomicsWholeGenomeGermline 1.1.3 2024-12-05 VariantCalling 2.2.4 2024-11-04 -UltimaGenomicsWholeGenomeCramOnly 1.0.23 2024-11-04 GDCWholeGenomeSomaticSingleSample 1.3.4 2024-11-04 -BroadInternalRNAWithUMIs 1.0.36 2024-11-04 -BroadInternalUltimaGenomics 1.1.3 2024-12-05 -BroadInternalArrays 1.1.14 2024-11-04 -BroadInternalImputation 1.1.14 2024-11-04 -Arrays 2.6.30 2024-11-04 +UltimaGenomicsWholeGenomeCramOnly 1.0.23 2024-11-04 +IlluminaGenotypingArray 1.12.24 2024-11-04 +AnnotationFiltration 1.2.7 2024-11-04 +CheckFingerprint 1.0.22 2024-10-28 ValidateChip 1.16.7 2024-11-04 -MultiSampleArrays 1.6.2 2024-08-02 Imputation 1.1.15 2024-11-04 -IlluminaGenotypingArray 1.12.24 2024-11-04 -ExternalWholeGenomeReprocessing 2.3.3 2024-11-04 -ExternalExomeReprocessing 3.3.3 2024-11-04 -CramToUnmappedBams 1.1.3 2024-08-02 -WholeGenomeReprocessing 3.3.3 2024-11-04 -ExomeReprocessing 3.3.3 2024-11-04 +MultiSampleArrays 1.6.2 2024-08-02 +Arrays 2.6.30 2024-11-04 +BroadInternalUltimaGenomics 1.1.3 2024-12-05 +BroadInternalImputation 1.1.14 2024-11-04 +BroadInternalArrays 1.1.14 2024-11-04 +BroadInternalRNAWithUMIs 1.0.36 2024-11-04 +RNAWithUMIsPipeline 1.0.18 2024-11-04 +Multiome 5.9.4 2024-12-05 +MultiSampleSmartSeq2SingleNucleus 2.0.6 2024-11-15 BuildIndices 3.1.0 2024-11-26 +SlideSeq 3.4.7 2024-12-3 +PairedTag 1.9.0 2024-12-05 +atac 2.5.3 2024-11-22 scATAC 1.3.2 2023-08-03 snm3C 4.0.4 2024-08-06 -Multiome 5.9.4 2024-12-05 -PairedTag 1.9.0 2024-12-05 -MultiSampleSmartSeq2 2.2.22 2024-09-11 -MultiSampleSmartSeq2SingleNucleus 2.0.6 2024-11-15 Optimus 7.9.0 2024-12-05 -atac 2.5.3 2024-11-22 +MultiSampleSmartSeq2 2.2.22 2024-09-11 SmartSeq2SingleSample 5.1.21 2024-09-11 -SlideSeq 3.4.7 2024-12-3 From 4e74a75a0edbb38cc595a467f44a827d3ecb15b9 Mon Sep 17 00:00:00 2001 From: ekiernan Date: Mon, 13 Jan 2025 10:32:53 -0500 Subject: [PATCH 13/17] updating gtf_path for remaining scripts --- tasks/skylab/H5adUtils.wdl | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/tasks/skylab/H5adUtils.wdl b/tasks/skylab/H5adUtils.wdl index e6a04977b1..dedd01a509 100644 --- a/tasks/skylab/H5adUtils.wdl +++ b/tasks/skylab/H5adUtils.wdl @@ -56,13 +56,7 @@ task OptimusH5adGeneration { command <<< set -euo pipefail - touch empty_drops_result.csv - - # Save the path of the annotation_file to a variable - gtf_path="~{gtf_path}" - - # Echo the gtf_path for logging/debugging purposes - echo "GTF Path: $gtf_path" + touch empty_drops_result.csvs if [ "~{counting_mode}" == "sc_rna" ]; then python3 /warptools/scripts/create_h5ad_optimus.py \ @@ -81,7 +75,7 @@ task OptimusH5adGeneration { --count_matrix ~{sparse_count_matrix} \ --expression_data_type "exonic" \ --pipeline_version ~{pipeline_version} \ - --gtf_path $gtf_path + --gtf_path ~{gtf_path} else python3 /warptools/scripts/create_snrna_optimus_full_h5ad.py \ --annotation_file ~{annotation_file} \ @@ -96,7 +90,8 @@ task OptimusH5adGeneration { ~{"--input_name_metadata_field " + input_name_metadata_field} \ --count_matrix ~{sparse_count_matrix} \ --expression_data_type "whole_transcript"\ - --pipeline_version ~{pipeline_version} + --pipeline_version ~{pipeline_version} \ + --gtf_path ~{gtf_path} fi # modify h5ad to include doublets, NHASHID, and build library metrics @@ -166,6 +161,7 @@ task SingleNucleusOptimusH5adOutput { File? library_metrics # Cell calls from starsolo in TSV format File? cellbarcodes + String gtf_path = annotation_file String pipeline_version @@ -202,7 +198,8 @@ task SingleNucleusOptimusH5adOutput { ~{"--input_id_metadata_field " + input_id_metadata_field} \ ~{"--input_name_metadata_field " + input_name_metadata_field} \ --expression_data_type "whole_transcript" \ - --pipeline_version ~{pipeline_version} + --pipeline_version ~{pipeline_version} \ + --gtf_path ~{gtf_path} # modify h5ad to include doublets, NHASHID, and build library metrics python3 /warptools/scripts/add_library_tso_doublets.py \ From e1ee33202067b038af1eb3c4ea3b279e13a8f541 Mon Sep 17 00:00:00 2001 From: ekiernan Date: Mon, 13 Jan 2025 15:01:50 -0500 Subject: [PATCH 14/17] updated warp-tools docker to versioned docker 2.6.0 --- pipelines/skylab/atac/atac.wdl | 2 +- pipelines/skylab/optimus/Optimus.wdl | 2 +- pipelines/skylab/slideseq/SlideSeq.wdl | 2 +- tasks/skylab/FastqProcessing.wdl | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index 92037920b8..c165b091b6 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -57,7 +57,7 @@ workflow ATAC { String docker_prefix = if cloud_provider == "gcp" then gcr_docker_prefix else acr_docker_prefix # Docker image names - String warp_tools_docker = "warp-tools:lk-PD-2814-add-gtf-h5ad" + String warp_tools_docker = "warp-tools:2.6.0" String cutadapt_docker = "cutadapt:1.0.0-4.4-1686752919" String samtools_docker = "samtools-dist-bwa:3.0.0" String upstools_docker = "upstools:1.0.0-2023.03.03-1704300311" diff --git a/pipelines/skylab/optimus/Optimus.wdl b/pipelines/skylab/optimus/Optimus.wdl index f9e8bbe815..7031c1f676 100644 --- a/pipelines/skylab/optimus/Optimus.wdl +++ b/pipelines/skylab/optimus/Optimus.wdl @@ -94,7 +94,7 @@ workflow Optimus { String pytools_docker = "pytools:1.0.0-1661263730" String empty_drops_docker = "empty-drops:1.0.1-4.2" String star_docker = "star:1.0.1-2.7.11a-1692706072" - String warp_tools_docker = "warp-tools:lk-PD-2814-add-gtf-h5ad" + String warp_tools_docker = "warp-tools:2.6.0" String star_merge_docker = "star-merge-npz:1.3.0" String samtools_star = "samtools-star:1.0.0-1.11-2.7.11a-1731516196" diff --git a/pipelines/skylab/slideseq/SlideSeq.wdl b/pipelines/skylab/slideseq/SlideSeq.wdl index 408b62eeb1..fdd9702393 100644 --- a/pipelines/skylab/slideseq/SlideSeq.wdl +++ b/pipelines/skylab/slideseq/SlideSeq.wdl @@ -48,7 +48,7 @@ workflow SlideSeq { # docker images String pytools_docker = "pytools:1.0.0-1661263730" String picard_cloud_docker = "picard-cloud:2.26.10" - String warp_tools_docker = "warp-tools:lk-PD-2814-add-gtf-h5ad" + String warp_tools_docker = "warp-tools:2.6.0" String star_merge_docker = "star-merge-npz:1.3.0" String ubuntu_docker = "ubuntu_16_0_4@sha256:025124e2f1cf4d29149958f17270596bffe13fc6acca6252977c572dd5ba01bf" diff --git a/tasks/skylab/FastqProcessing.wdl b/tasks/skylab/FastqProcessing.wdl index 9a6b3ed101..530eee652b 100644 --- a/tasks/skylab/FastqProcessing.wdl +++ b/tasks/skylab/FastqProcessing.wdl @@ -138,7 +138,7 @@ task FastqProcessingSlidSeq { # Runtime attributes - String docker = "us.gcr.io/broad-gotc-prod/warp-tools:lk-PD-2814-add-gtf-h5ad" + String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.6.0" Int cpu = 16 Int machine_mb = 40000 Int disk = ceil(size(r1_fastq, "GiB")*3 + size(r2_fastq, "GiB")*3) + 50 From 9b6b1904e21b5a9a8c8074e41cd65090efce78bf Mon Sep 17 00:00:00 2001 From: ekiernan Date: Mon, 13 Jan 2025 15:11:01 -0500 Subject: [PATCH 15/17] changelog updates --- pipelines/skylab/atac/atac.changelog.md | 5 +++++ pipelines/skylab/atac/atac.wdl | 2 +- pipelines/skylab/multiome/Multiome.changelog.md | 3 ++- pipelines/skylab/optimus/Optimus.changelog.md | 4 +++- pipelines/skylab/paired_tag/PairedTag.changelog.md | 5 +++-- pipelines/skylab/slideseq/SlideSeq.changelog.md | 3 ++- .../MultiSampleSmartSeq2SingleNucleus.changelog.md | 3 ++- 7 files changed, 18 insertions(+), 7 deletions(-) diff --git a/pipelines/skylab/atac/atac.changelog.md b/pipelines/skylab/atac/atac.changelog.md index 578088a0d6..074e2e3614 100644 --- a/pipelines/skylab/atac/atac.changelog.md +++ b/pipelines/skylab/atac/atac.changelog.md @@ -1,3 +1,8 @@ +# 2.5.4 +2025-01-13 (Date of Last Commit) + +* Added reference_gtf_file to the output h5ad unstructured metadata + # 2.5.3 2024-11-22 (Date of Last Commit) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index c165b091b6..ddfc7c43a1 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -49,7 +49,7 @@ workflow ATAC { String adapter_seq_read3 = "TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG" } - String pipeline_version = "2.5.3" + String pipeline_version = "2.5.4" # Determine docker prefix based on cloud provider String gcr_docker_prefix = "us.gcr.io/broad-gotc-prod/" diff --git a/pipelines/skylab/multiome/Multiome.changelog.md b/pipelines/skylab/multiome/Multiome.changelog.md index f324ff841c..42b92421f5 100644 --- a/pipelines/skylab/multiome/Multiome.changelog.md +++ b/pipelines/skylab/multiome/Multiome.changelog.md @@ -1,7 +1,8 @@ # 5.9.5 -2024-12-12 (Date of Last Commit) +2025-01-13 (Date of Last Commit) * Added a boolean variable is_slidetags; default is false but it is set to true if the Slide-Tags pipeline is calling Optimus +* Added reference_gtf_file to the output h5ad unstructured metadata # 5.9.4 2024-12-05 (Date of Last Commit) diff --git a/pipelines/skylab/optimus/Optimus.changelog.md b/pipelines/skylab/optimus/Optimus.changelog.md index 59f45b5005..a1df2a0575 100644 --- a/pipelines/skylab/optimus/Optimus.changelog.md +++ b/pipelines/skylab/optimus/Optimus.changelog.md @@ -1,8 +1,10 @@ # 7.9.1 -2024-12-12 (Date of Last Commit) +2025-01-13 (Date of Last Commit) * Added a boolean variable is_slidetags; set to false by default, but set to true if the Slide-Tags pipeline is calling Optimus +* Added reference_gtf_file to the output h5ad unstructured metadata + # 7.9.0 2024-12-05 (Date of Last Commit) diff --git a/pipelines/skylab/paired_tag/PairedTag.changelog.md b/pipelines/skylab/paired_tag/PairedTag.changelog.md index ac48970ae8..1b272008cd 100644 --- a/pipelines/skylab/paired_tag/PairedTag.changelog.md +++ b/pipelines/skylab/paired_tag/PairedTag.changelog.md @@ -1,7 +1,8 @@ # 1.9.1 -2024-12-12 (Date of Last Commit) +2025-01-13 (Date of Last Commit) -* Added a boolean variable is_slidetags. Set to true if Slide-Tags pipeline calling Optimus, otherwise false. +* Added a boolean variable is_slidetags; default is false, but set to true if Slide-Tags pipeline is calling Optimus +* Added reference_gtf_file to the output h5ad unstructured metadata # 1.9.0 2024-12-05 (Date of Last Commit) diff --git a/pipelines/skylab/slideseq/SlideSeq.changelog.md b/pipelines/skylab/slideseq/SlideSeq.changelog.md index 1aecacb54c..cc1ba27ed1 100644 --- a/pipelines/skylab/slideseq/SlideSeq.changelog.md +++ b/pipelines/skylab/slideseq/SlideSeq.changelog.md @@ -1,7 +1,8 @@ # 3.4.8 -2024-12-12 (Date of Last Commit) +2025-01-13 (Date of Last Commit) * Added a boolean variable is_slidetags; this does not affect the outputs of the pipeline +* Added reference_gtf_file to the output h5ad unstructured metadata # 3.4.7 2024-12-3 (Date of Last Commit) diff --git a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md index 908720365d..2bfa234f11 100644 --- a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md +++ b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md @@ -1,7 +1,8 @@ # 2.0.7 -2024-12-12 (Date of Last Commit) +2025-01-13 (Date of Last Commit) * Added a boolean variable is_slidetags; this does not affect the outputs of the pipeline +* Added reference_gtf_file to the output h5ad unstructured metadata # 2.0.6 2024-11-15 (Date of Last Commit) From 343595ed15665dcc6fc48fa37ed4c153de54bcf1 Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Mon, 13 Jan 2025 20:11:22 +0000 Subject: [PATCH 16/17] Updated pipeline_versions.txt with all pipeline version information --- pipeline_versions.txt | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pipeline_versions.txt b/pipeline_versions.txt index eb77d3561a..0c8b90a4f5 100644 --- a/pipeline_versions.txt +++ b/pipeline_versions.txt @@ -27,14 +27,14 @@ BroadInternalImputation 1.1.14 2024-11-04 BroadInternalArrays 1.1.14 2024-11-04 BroadInternalRNAWithUMIs 1.0.36 2024-11-04 RNAWithUMIsPipeline 1.0.18 2024-11-04 -Multiome 5.9.5 2024-12-12 -MultiSampleSmartSeq2SingleNucleus 2.0.7 2024-12-12 +Multiome 5.9.5 2025-01-13 +MultiSampleSmartSeq2SingleNucleus 2.0.7 2025-01-13 BuildIndices 3.1.0 2024-11-26 -SlideSeq 3.4.8 2024-12-12 -PairedTag 1.9.1 2024-12-12 -atac 2.5.3 2024-11-22 +SlideSeq 3.4.8 2025-01-13 +PairedTag 1.9.1 2025-01-13 +atac 2.5.4 2025-01-13 scATAC 1.3.2 2023-08-03 snm3C 4.0.4 2024-08-06 -Optimus 7.9.1 2024-12-12 +Optimus 7.9.1 2025-01-13 MultiSampleSmartSeq2 2.2.22 2024-09-11 SmartSeq2SingleSample 5.1.21 2024-09-11 From a01cc5760e4dc32263fd0e35f59bd563f615bf7e Mon Sep 17 00:00:00 2001 From: ekiernan Date: Wed, 15 Jan 2025 10:32:19 -0500 Subject: [PATCH 17/17] removing unnecessary code from atac createfragmentfile --- pipelines/skylab/atac/atac.wdl | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index ddfc7c43a1..32a06b6951 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -604,16 +604,8 @@ task CreateFragmentFile { atac_data.uns['NHashID'] = atac_nhash_id # Add GTF to uns field - # Original path from args.annotation_file - gtf_path = "~{gtf_path}" # e.g., '/cromwell_root/gcp-public-data--broad-references/hg38/v0/star/v2_7_10a/modified_v43.annotation.gtf' - - # # Transform the path - # if annotation_gtf.startswith('/cromwell_root/'): - # stripped_path = annotation_gtf[len('/cromwell_root/'):] # Remove '/cromwell_root/' - # updated_path = f'gs://{stripped_path}' # Add 'gs://' prefix - # else: - # updated_path = str(atac_gtf) + gtf_path = "~{gtf_path}" # e.g., 'gs://gcp-public-data--broad-references/hg38/v0/star/v2_7_10a/modified_v43.annotation.gtf' atac_data.uns["reference_gtf_file"] = gtf_path # calculate tsse metrics