Skip to content

Commit

Permalink
Merge branch 'rc_peakcall_2797' of https://github.com/broadinstitute/…
Browse files Browse the repository at this point in the history
…warp into rc_peakcall_2797
  • Loading branch information
aawdeh committed Jan 21, 2025
2 parents cf63ad1 + 41de09c commit 093faea
Show file tree
Hide file tree
Showing 36 changed files with 358 additions and 156 deletions.
52 changes: 52 additions & 0 deletions beta-pipelines/skylab/slidetags/SlideTags.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,42 @@ version 1.0

import "scripts/spatial-count.wdl" as SpatialCount
import "scripts/positioning.wdl" as Positioning
import "../../../pipelines/skylab/optimus/Optimus.wdl" as optimus

workflow SlideTags {

String pipeline_version = "1.0.0"

input {
# slide-tags inputs
String id
Array[String] fastq_paths
Array[String] pucks
Array[String] rna_paths
String sb_path

# Optimus Inputs
String cloud_provider = "gcp"
Boolean is_slidetags = true
String input_id
Int expected_cells = 3000 ## copied from Multiome ?
String counting_mode = "sn_rna"
Array[File] gex_r1_fastq
Array[File] gex_r2_fastq
Array[File]? gex_i1_fastq
File tar_star_reference
File annotations_gtf
File? mt_genes
Int tenx_chemistry_version = 3
Int emptydrops_lower = 100
Boolean force_no_check = false
Boolean ignore_r1_read_length = false
String star_strand_mode = "Reverse"
Boolean count_exons = false
File gex_whitelist
String? soloMultiMappers
String? gex_nhash_id

String docker = "us.gcr.io/broad-gotc-prod/slide-tags:1.1.0"
}

Expand All @@ -21,6 +46,33 @@ workflow SlideTags {
pucks: "Array of paths to puck files"
docker: "Docker image to use"
}

# Call the Optimus workflow
call optimus.Optimus as Optimus {
input:
cloud_provider = cloud_provider,
#disk_starsolo = disk_starsolo,
counting_mode = counting_mode,
r1_fastq = gex_r1_fastq,
r2_fastq = gex_r2_fastq,
i1_fastq = gex_i1_fastq,
input_id = input_id + "_gex",
output_bam_basename = input_id + "_gex",
gex_nhash_id = gex_nhash_id,
tar_star_reference = tar_star_reference,
annotations_gtf = annotations_gtf,
mt_genes = mt_genes,
tenx_chemistry_version = tenx_chemistry_version,
whitelist = gex_whitelist,
emptydrops_lower = emptydrops_lower,
force_no_check = force_no_check,
ignore_r1_read_length = ignore_r1_read_length,
star_strand_mode = star_strand_mode,
count_exons = count_exons,
soloMultiMappers = soloMultiMappers,
gex_expected_cells = expected_cells,
is_slidetags = is_slidetags
}

call SpatialCount.count as spatial_count {
input:
Expand Down
20 changes: 10 additions & 10 deletions pipeline_versions.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@ WholeGenomeReprocessing 3.3.3 2024-11-04
ExternalExomeReprocessing 3.3.3 2024-11-04
ExternalWholeGenomeReprocessing 2.3.3 2024-11-04
UltimaGenomicsJointGenotyping 1.2.2 2024-11-04
ReblockGVCF 2.3.2 2024-11-04
ReblockGVCF 2.4.0 2024-12-05
JointGenotypingByChromosomePartOne 1.5.2 2024-11-04
JointGenotypingByChromosomePartTwo 1.5.2 2024-11-04
JointGenotyping 1.7.2 2024-11-04
ExomeGermlineSingleSample 3.2.3 2024-11-04
WholeGenomeGermlineSingleSample 3.3.3 2024-11-04
UltimaGenomicsWholeGenomeGermline 1.1.2 2024-11-04
UltimaGenomicsWholeGenomeGermline 1.1.3 2024-12-05
VariantCalling 2.2.4 2024-11-04
GDCWholeGenomeSomaticSingleSample 1.3.4 2024-11-04
UltimaGenomicsWholeGenomeCramOnly 1.0.23 2024-11-04
Expand All @@ -22,19 +22,19 @@ ValidateChip 1.16.7 2024-11-04
Imputation 1.1.15 2024-11-04
MultiSampleArrays 1.6.2 2024-08-02
Arrays 2.6.30 2024-11-04
BroadInternalUltimaGenomics 1.1.2 2024-11-04
BroadInternalUltimaGenomics 1.1.3 2024-12-05
BroadInternalImputation 1.1.14 2024-11-04
BroadInternalArrays 1.1.14 2024-11-04
BroadInternalRNAWithUMIs 1.0.36 2024-11-04
RNAWithUMIsPipeline 1.0.18 2024-11-04
Multiome 5.9.3 2024-12-3
MultiSampleSmartSeq2SingleNucleus 2.0.6 2024-11-15
BuildIndices 3.1.0 2024-11-26
SlideSeq 3.4.7 2024-12-3
PairedTag 1.8.4 2024-12-3
atac 2.5.3 2024-11-22
Multiome 5.9.5 2025-01-13
MultiSampleSmartSeq2SingleNucleus 2.0.7 2025-01-13
BuildIndices 4.0.0 2025-01-17
SlideSeq 3.4.8 2025-01-13
PairedTag 1.9.1 2025-01-13
atac 2.5.4 2025-01-13
scATAC 1.3.2 2023-08-03
snm3C 4.0.4 2024-08-06
Optimus 7.8.4 2024-12-3
Optimus 7.9.1 2025-01-13
MultiSampleSmartSeq2 2.2.22 2024-09-11
SmartSeq2SingleSample 5.1.21 2024-09-11
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# 2.4.0
2024-12-05 (Date of Last Commit)

* Updated output names for ReblockGVCF workflow from output_vcf and output_vcf_index to reblocked_gvcf and reblocked_gvcf_index respectively

# 2.3.2
2024-11-04 (Date of Last Commit)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import "../../../../../../tasks/broad/Utilities.wdl" as utils

workflow ReblockGVCF {

String pipeline_version = "2.3.2"
String pipeline_version = "2.4.0"


input {
Expand Down Expand Up @@ -70,8 +70,8 @@ workflow ReblockGVCF {
}

output {
File output_vcf = Reblock.output_vcf
File output_vcf_index = Reblock.output_vcf_index
File reblocked_gvcf = Reblock.output_vcf
File reblocked_gvcf_index = Reblock.output_vcf_index
}
meta {
allowNestedInputs: true
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# 1.1.3
2024-12-05 (Date of Last Commit)

* Updated the name of the output for ReblockGVCFs; this does not affect this pipeline

# 1.1.2
2024-11-04 (Date of Last Commit)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ workflow UltimaGenomicsWholeGenomeGermline {
filtering_model_no_gt_name: "String describing the optional filtering model; default set to rf_model_ignore_gt_incl_hpol_runs"
}

String pipeline_version = "1.1.2"
String pipeline_version = "1.1.3"


References references = alignment_references.references
Expand Down Expand Up @@ -202,8 +202,8 @@ workflow UltimaGenomicsWholeGenomeGermline {

# Outputs that will be retained when execution is complete
output {
File output_gvcf = ReblockGVCF.output_vcf
File output_gvcf_index = ReblockGVCF.output_vcf_index
File output_gvcf = ReblockGVCF.reblocked_gvcf
File output_gvcf_index = ReblockGVCF.reblocked_gvcf_index
File output_vcf = ConvertGVCFtoVCF.output_vcf
File output_vcf_index = ConvertGVCFtoVCF.output_vcf_index

Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# 1.1.3
2024-12-05 (Date of Last Commit)

* Updated the name of the output for ReblockGVCFs; this does not affect this pipeline

# 1.1.2
2024-11-04 (Date of Last Commit)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import "../../../../../../../pipelines/broad/qc/CheckFingerprint.wdl" as FP

workflow BroadInternalUltimaGenomics {

String pipeline_version = "1.1.2"
String pipeline_version = "1.1.3"

input {

Expand Down
5 changes: 5 additions & 0 deletions pipelines/skylab/atac/atac.changelog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# 2.5.4
2025-01-13 (Date of Last Commit)

* Added reference_gtf_file to the output h5ad unstructured metadata

# 2.5.3
2024-11-22 (Date of Last Commit)

Expand Down
13 changes: 10 additions & 3 deletions pipelines/skylab/atac/atac.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -51,15 +51,15 @@ workflow ATAC {
String adapter_seq_read3 = "TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG"
}

String pipeline_version = "2.5.3"
String pipeline_version = "2.5.4"

# Determine docker prefix based on cloud provider
String gcr_docker_prefix = "us.gcr.io/broad-gotc-prod/"
String acr_docker_prefix = "dsppipelinedev.azurecr.io/"
String docker_prefix = if cloud_provider == "gcp" then gcr_docker_prefix else acr_docker_prefix

# Docker image names
String warp_tools_2_2_0 = "warp-tools:2.5.0"
String warp_tools_docker = "warp-tools:2.6.0"
String cutadapt_docker = "cutadapt:1.0.0-4.4-1686752919"
String samtools_docker = "samtools-dist-bwa:3.0.0"
String upstools_docker = "upstools:1.0.0-2023.03.03-1704300311"
Expand Down Expand Up @@ -100,7 +100,7 @@ workflow ATAC {
output_base_name = input_id,
num_output_files = GetNumSplits.ranks_per_node_out,
whitelist = whitelist,
docker_path = docker_prefix + warp_tools_2_2_0
docker_path = docker_prefix + warp_tools_docker
}

scatter(idx in range(length(SplitFastq.fastq_R1_output_array))) {
Expand Down Expand Up @@ -533,6 +533,7 @@ task CreateFragmentFile {
String atac_nhash_id = ""
String input_id
Int atac_expected_cells = 3000
String gtf_path = annotations_gtf
}

parameter_meta {
Expand Down Expand Up @@ -616,6 +617,12 @@ task CreateFragmentFile {
atac_data = ad.read_h5ad("temp_metrics.h5ad")
# Add nhash_id to h5ad file as unstructured metadata
atac_data.uns['NHashID'] = atac_nhash_id
# Add GTF to uns field
# Original path from args.annotation_file
gtf_path = "~{gtf_path}" # e.g., 'gs://gcp-public-data--broad-references/hg38/v0/star/v2_7_10a/modified_v43.annotation.gtf'
atac_data.uns["reference_gtf_file"] = gtf_path
# calculate tsse metrics
snap.metrics.tsse(atac_data, atac_gtf)
# Write new atac file
Expand Down
6 changes: 6 additions & 0 deletions pipelines/skylab/build_indices/BuildIndices.changelog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# 4.0.0
2025-01-17 (Date of Last Commit)

* Updated the WDL to include a new docker version 2.1.0 which has new python scripts for handling a custom marmoset GTF input
* Updated the WDL to run new marmoset scripts if the organism input is set to marmoset

# 3.1.0
2024-11-26 (Date of Last Commit)

Expand Down
67 changes: 48 additions & 19 deletions pipelines/skylab/build_indices/BuildIndices.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ workflow BuildIndices {
}

# version of this pipeline
String pipeline_version = "3.1.0"
String pipeline_version = "4.0.0"


parameter_meta {
Expand Down Expand Up @@ -114,29 +114,58 @@ task BuildStarSingleNucleus {
String annotation_gtf_modified = "modified_v~{gtf_annotation_version}.annotation.gtf"

command <<<
# Check that input GTF files contain input genome source, genome build version, and annotation version
if head -10 ~{annotation_gtf} | grep -qi ~{genome_build}
# First check for marmoset GTF and modify header
echo "checking for marmoset"
if [[ "~{organism}" == "marmoset" || "~{organism}" == "Marmoset" ]]
then
echo Genome version found in the GTF file
echo "marmoset is detected, running header modification"
python3 /script/create_marmoset_header_mt_genes.py \
~{annotation_gtf} > "/cromwell_root/header.gtf"
else
echo Error: Input genome version does not match version in GTF file
exit 1;
echo "marmoset is not detected"

# Check that input GTF files contain input genome source, genome build version, and annotation version
if head -10 ~{annotation_gtf} | grep -qi ~{genome_build}
then
echo Genome version found in the GTF file
else
echo Error: Input genome version does not match version in GTF file
exit 1;
fi

# Check that GTF file contains correct build source info in the first 10 lines of the GTF
if head -10 ~{annotation_gtf} | grep -qi ~{genome_source}
then
echo Source of genome build identified in the GTF file
else
echo Error: Source of genome build not identified in the GTF file
exit 1;
fi
set -eo pipefail
fi
# Check that GTF file contains correct build source info in the first 10 lines of the GTF
if head -10 ~{annotation_gtf} | grep -qi ~{genome_source}

if [[ "~{organism}" == "marmoset" || "~{organism}" == "Marmoset" ]]
then
echo Source of genome build identified in the GTF file
echo "marmoset detected, running marmoset GTF modification"
echo "Listing files to check for head.gtf"
ls
python3 /script/modify_gtf_marmoset.py \
--input-gtf "/cromwell_root/header.gtf" \
--output-gtf ~{annotation_gtf_modified} \
--species ~{organism}
echo "listing files, should see modified gtf"
ls
else
echo Error: Source of genome build not identified in the GTF file
exit 1;
echo "running GTF modification for non-marmoset"
python3 /script/modify_gtf.py \
--input-gtf ~{annotation_gtf} \
--output-gtf ~{annotation_gtf_modified} \
--biotypes ~{biotypes}
fi

set -eo pipefail

python3 /script/modify_gtf.py \
--input-gtf ~{annotation_gtf} \
--output-gtf ~{annotation_gtf_modified} \
--biotypes ~{biotypes}
# python3 /script/modify_gtf.py \
# --input-gtf ~{annotation_gtf} \
# --output-gtf ~{annotation_gtf_modified} \
# --biotypes ~{biotypes}

mkdir star
STAR --runMode genomeGenerate \
Expand All @@ -156,7 +185,7 @@ task BuildStarSingleNucleus {
}

runtime {
docker: "us.gcr.io/broad-gotc-prod/build-indices:2.0.0"
docker: "us.gcr.io/broad-gotc-prod/build-indices:2.1.0"
memory: "50 GiB"
disks: "local-disk ${disk} HDD"
disk: disk + " GB" # TES
Expand Down
11 changes: 11 additions & 0 deletions pipelines/skylab/multiome/Multiome.changelog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
# 5.9.5
2025-01-13 (Date of Last Commit)

* Added a boolean variable is_slidetags; default is false but it is set to true if the Slide-Tags pipeline is calling Optimus
* Added reference_gtf_file to the output h5ad unstructured metadata

# 5.9.4
2024-12-05 (Date of Last Commit)

* Moved the optional CellBender task to the Optimus.wdl

# 5.9.3
2024-12-3 (Date of Last Commit)

Expand Down
Loading

0 comments on commit 093faea

Please sign in to comment.