From ce90d5380c88f31f284839b2509f6ac16683ac0a Mon Sep 17 00:00:00 2001 From: aawdeh Date: Wed, 16 Oct 2024 14:49:56 -0400 Subject: [PATCH 01/91] add peak calling --- pipelines/skylab/atac/atac.wdl | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index 8918a8d8ad..2516b8088b 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -537,6 +537,7 @@ task CreateFragmentFile { atac_gtf = "~{annotations_gtf}" preindex = "~{preindex}" atac_nhash_id = "~{atac_nhash_id}" + peakcalling_bool = true # calculate chrom size dictionary based on text file chrom_size_dict={} @@ -553,10 +554,10 @@ task CreateFragmentFile { import csv # extract CB or BB (if preindex is true) tag from bam file to create fragment file - if preindex == "true": - data = pp.recipe_10x_metrics("~{bam}", "~{bam_base_name}.fragments.tsv", "temp_metrics.h5ad", is_paired=True, barcode_tag="BB", chrom_sizes=chrom_size_dict, gene_anno=atac_gtf, peaks=None) - elif preindex == "false": - data = pp.recipe_10x_metrics("~{bam}", "~{bam_base_name}.fragments.tsv", "temp_metrics.h5ad", is_paired=True, barcode_tag="CB", chrom_sizes=chrom_size_dict, gene_anno=atac_gtf, peaks=None) + if preindex: + data = pp.recipe_10x_metrics("~{bam}", "~{bam_base_name}.fragments.tsv", "temp_metrics.h5ad", is_paired=True, barcode_tag="BB", chrom_sizes=chrom_size_dict, gene_anno=atac_gtf, peaks=None) + else: + data = pp.recipe_10x_metrics("~{bam}", "~{bam_base_name}.fragments.tsv", "temp_metrics.h5ad", is_paired=True, barcode_tag="CB", chrom_sizes=chrom_size_dict, gene_anno=atac_gtf, peaks=None) # Add NHashID to metrics nhash_ID_value = "XXX" @@ -575,7 +576,6 @@ task CreateFragmentFile { with open(csv_file_path, mode='w', newline='') as file: writer = csv.writer(file) writer.writerows(flattened_data) # Write data - print(f"Dictionary successfully written to {csv_file_path}") atac_data = ad.read_h5ad("temp_metrics.h5ad") @@ -583,9 +583,23 @@ task CreateFragmentFile { atac_data.uns['NHashID'] = atac_nhash_id # calculate tsse metrics snap.metrics.tsse(atac_data, atac_gtf) - # Write new atac file atac_data.write_h5ad("~{bam_base_name}.metrics.h5ad") + # peak calling? https://kzhang.org/SnapATAC2/tutorials/diff.html#Peak-calling-at-the-cluster-level + if peakcalling_bool: + print("Peak calling using MACS3") + snap.tl.macs3(atac_data) # were not including cell type? + print("test") + print(data.uns['macs3']) + + print("Writing peak files") + for k, peaks in data.uns['macs3'].items(): + peaks.to_csv(f'{k}.NarrowPeak', sep='\t', header=False, index=False) + ls + + # Write atac file + print("Writing h5ad file") + atac_data.write_h5ad("~{bam_base_name}.peaks.h5ad") CODE >>> From 82c5c454daed51a1a8c748b61b78c54b3201cea1 Mon Sep 17 00:00:00 2001 From: aawdeh Date: Wed, 16 Oct 2024 15:32:09 -0400 Subject: [PATCH 02/91] added output for peaks --- pipelines/skylab/atac/atac.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index 2516b8088b..61f38361d6 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -598,7 +598,7 @@ task CreateFragmentFile { ls # Write atac file - print("Writing h5ad file") + print("Writing h5ad file with peaks") atac_data.write_h5ad("~{bam_base_name}.peaks.h5ad") CODE >>> @@ -614,6 +614,7 @@ task CreateFragmentFile { output { File fragment_file = "~{bam_base_name}.fragments.tsv" File Snap_metrics = "~{bam_base_name}.metrics.h5ad" + File peaks_h5 = "~{bam_base_name}.peaks.h5ad" # test File atac_library_metrics = "~{bam_base_name}_~{atac_nhash_id}.atac_metrics.csv" } } From 6e833d2f3cf02a3ad7cf865651679aee0c8f914a Mon Sep 17 00:00:00 2001 From: aawdeh Date: Wed, 16 Oct 2024 16:59:52 -0400 Subject: [PATCH 03/91] fix --- pipelines/skylab/atac/atac.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index 61f38361d6..c547c0ce8e 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -537,7 +537,7 @@ task CreateFragmentFile { atac_gtf = "~{annotations_gtf}" preindex = "~{preindex}" atac_nhash_id = "~{atac_nhash_id}" - peakcalling_bool = true + peakcalling_bool = True # calculate chrom size dictionary based on text file chrom_size_dict={} @@ -554,7 +554,7 @@ task CreateFragmentFile { import csv # extract CB or BB (if preindex is true) tag from bam file to create fragment file - if preindex: + if preindex == "true": data = pp.recipe_10x_metrics("~{bam}", "~{bam_base_name}.fragments.tsv", "temp_metrics.h5ad", is_paired=True, barcode_tag="BB", chrom_sizes=chrom_size_dict, gene_anno=atac_gtf, peaks=None) else: data = pp.recipe_10x_metrics("~{bam}", "~{bam_base_name}.fragments.tsv", "temp_metrics.h5ad", is_paired=True, barcode_tag="CB", chrom_sizes=chrom_size_dict, gene_anno=atac_gtf, peaks=None) From 76286b482f916296c02f21e6c703b85be8da2c6b Mon Sep 17 00:00:00 2001 From: aawdeh Date: Wed, 16 Oct 2024 17:45:15 -0400 Subject: [PATCH 04/91] fix --- pipelines/skylab/atac/atac.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index c547c0ce8e..15e938c3e5 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -590,10 +590,10 @@ task CreateFragmentFile { print("Peak calling using MACS3") snap.tl.macs3(atac_data) # were not including cell type? print("test") - print(data.uns['macs3']) + print(atac_data.uns['macs3']) print("Writing peak files") - for k, peaks in data.uns['macs3'].items(): + for k, peaks in atac_data.uns['macs3'].items(): peaks.to_csv(f'{k}.NarrowPeak', sep='\t', header=False, index=False) ls From 90805ca0736cdce0c3c0e0b16c565a89ceaea06d Mon Sep 17 00:00:00 2001 From: aawdeh Date: Wed, 16 Oct 2024 18:33:48 -0400 Subject: [PATCH 05/91] test --- pipelines/skylab/atac/atac.wdl | 5 ----- 1 file changed, 5 deletions(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index 15e938c3e5..6073ac0cfb 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -590,11 +590,6 @@ task CreateFragmentFile { print("Peak calling using MACS3") snap.tl.macs3(atac_data) # were not including cell type? print("test") - print(atac_data.uns['macs3']) - - print("Writing peak files") - for k, peaks in atac_data.uns['macs3'].items(): - peaks.to_csv(f'{k}.NarrowPeak', sep='\t', header=False, index=False) ls # Write atac file From b27011c80bd5cea6cb3d79e03b68e2494754fb52 Mon Sep 17 00:00:00 2001 From: aawdeh Date: Thu, 17 Oct 2024 11:27:40 -0400 Subject: [PATCH 06/91] remove ls --- pipelines/skylab/atac/atac.wdl | 1 - 1 file changed, 1 deletion(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index 6073ac0cfb..2326d27b64 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -590,7 +590,6 @@ task CreateFragmentFile { print("Peak calling using MACS3") snap.tl.macs3(atac_data) # were not including cell type? print("test") - ls # Write atac file print("Writing h5ad file with peaks") From c1f0e5709aedf2a4460695619575fb4e45a599ba Mon Sep 17 00:00:00 2001 From: aawdeh Date: Fri, 18 Oct 2024 09:41:05 -0400 Subject: [PATCH 07/91] test --- pipelines/skylab/atac/atac.wdl | 51 ++++++++++++++++++++++++++++++++-- 1 file changed, 49 insertions(+), 2 deletions(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index 2326d27b64..e354ca13a8 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -550,6 +550,7 @@ task CreateFragmentFile { import snapatac2.preprocessing as pp import snapatac2 as snap import anndata as ad + import scanpy as sc from collections import OrderedDict import csv @@ -585,10 +586,56 @@ task CreateFragmentFile { snap.metrics.tsse(atac_data, atac_gtf) atac_data.write_h5ad("~{bam_base_name}.metrics.h5ad") - # peak calling? https://kzhang.org/SnapATAC2/tutorials/diff.html#Peak-calling-at-the-cluster-level + # Peak calling if peakcalling_bool: + print("Peak calling") + # Calculate and plot the size distribution of fragments + print("1.1 Calculating fragment size distribution") + snap.pl.frag_size_distr(atac_data) + # Need to parameterize + # Filter cells + print("1.2 Filtering cells") + snap.pp.filter_cells(atac_data, min_counts=5000, min_tsse=10, max_counts=100000) + print(atac_data) + # Create a cell by bin matrix containing insertion counts across genome-wide 500-bp bins. + print("1.3 Creating cell by bin matrix") + snap.pp.add_tile_matrix(atac_data) + print(atac_data) + # Feature selection + print("1.4 Feature selection") + snap.pp.select_features(atac_data) + # Run customized scrublet algorithm to identify potential doublets + print("1.5 Run scrublet to identify potential doublets") + snap.pp.scrublet(atac_data) + # Employ spectral embedding for dimensionality reduction + print("1.6 Employ spectral embedding for dimensionality reduction") + snap.tl.spectral(atac_data) + # Filter doublets based on scrublet scores + print("1.7 Filter doublets based on scrublet scores") + snap.pp.filter_doublets(atac_data, probability_threshold=0.5) + # Perform graph-based clustering to identify cell clusters. + # Build a k-nearest neighbour graph using snap.pp.knn + print("1.8 Perform knn graph-based clustering to identify cell clusters") + snap.pp.knn(atac_data) + # Use the Leiden community detection algorithm to identify densely-connected subgraphs/clusters in the graph + print("1.9 Use the Leiden community detection algorithm to identify densely-connected subgraphs/clusters in the graph") + snap.tl.leiden(atac_data) + # Create the cell by gene activity matrix + print("1.10 Create the cell by gene activity matrix") + gene_mat = snap.pp.make_gene_matrix(atac_data, gene_anno=atac_gtf) + # Normalize the gene matrix + print("1.11 Normalize the gene matrix") + gene_mat.obs['leiden'] = atac_data.obs['leiden'] + sc.pp.normalize_total(gene_mat) + sc.pp.log1p(gene_mat) + sc.tl.rank_genes_groups(gene_mat, groupby="leiden", method="wilcoxon") + + for i in np.unique(gene_mat.obs['leiden']): + markers = sc.get.rank_genes_groups_df(gene_mat, group=i).head(7)['names'] + print(f"Cluster {i}: {', '.join(markers)}") + print("Peak calling using MACS3") - snap.tl.macs3(atac_data) # were not including cell type? + snap.tl.macs3(atac_data, groupby='leiden') print("test") # Write atac file From 779fe9bd27d4cb0cb8efad0e2fc9842f9685c765 Mon Sep 17 00:00:00 2001 From: aawdeh Date: Fri, 18 Oct 2024 13:23:45 -0400 Subject: [PATCH 08/91] test --- pipelines/skylab/atac/atac.wdl | 71 ++++++++++++++++++++++------------ 1 file changed, 46 insertions(+), 25 deletions(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index e354ca13a8..87b2cf9e83 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -588,44 +588,63 @@ task CreateFragmentFile { # Peak calling if peakcalling_bool: - print("Peak calling") + print("Peak calling starting...") + # Calculate and plot the size distribution of fragments - print("1.1 Calculating fragment size distribution") + print("Calculating fragment size distribution") snap.pl.frag_size_distr(atac_data) - # Need to parameterize - # Filter cells - print("1.2 Filtering cells") + print(atac_data) + + # Filter cells -- Need to parameterize + print("Filtering cells") snap.pp.filter_cells(atac_data, min_counts=5000, min_tsse=10, max_counts=100000) print(atac_data) + # Create a cell by bin matrix containing insertion counts across genome-wide 500-bp bins. - print("1.3 Creating cell by bin matrix") - snap.pp.add_tile_matrix(atac_data) - print(atac_data) + print("Creating cell by bin matrix") + atac_data_mod = snap.pp.add_tile_matrix(atac_data) + atac_data_mod.obsm = atac_data.obsm + print(atac_data_mod) + # Feature selection - print("1.4 Feature selection") - snap.pp.select_features(atac_data) + print("Feature selection") + snap.pp.select_features(atac_data_mod) + print(atac_data_mod) + # Run customized scrublet algorithm to identify potential doublets - print("1.5 Run scrublet to identify potential doublets") - snap.pp.scrublet(atac_data) + print("Run scrublet to identify potential doublets") + snap.pp.scrublet(atac_data_mod) + print(atac_data_mod) + # Employ spectral embedding for dimensionality reduction - print("1.6 Employ spectral embedding for dimensionality reduction") - snap.tl.spectral(atac_data) + print("Employ spectral embedding for dimensionality reduction") + snap.tl.spectral(atac_data_mod) + print(atac_data_mod) + # Filter doublets based on scrublet scores - print("1.7 Filter doublets based on scrublet scores") - snap.pp.filter_doublets(atac_data, probability_threshold=0.5) + print("Filter doublets based on scrublet scores") + snap.pp.filter_doublets(atac_data_mod, probability_threshold=0.5) + print(atac_data_mod) + # Perform graph-based clustering to identify cell clusters. # Build a k-nearest neighbour graph using snap.pp.knn - print("1.8 Perform knn graph-based clustering to identify cell clusters") - snap.pp.knn(atac_data) + print("Perform knn graph-based clustering to identify cell clusters") + snap.pp.knn(atac_data_mod) + print(atac_data_mod) + # Use the Leiden community detection algorithm to identify densely-connected subgraphs/clusters in the graph - print("1.9 Use the Leiden community detection algorithm to identify densely-connected subgraphs/clusters in the graph") - snap.tl.leiden(atac_data) + print("Use the Leiden community detection algorithm to identify densely-connected subgraphs/clusters in the graph") + snap.tl.leiden(atac_data_mod) + print(atac_data_mod) + # Create the cell by gene activity matrix - print("1.10 Create the cell by gene activity matrix") - gene_mat = snap.pp.make_gene_matrix(atac_data, gene_anno=atac_gtf) + print("Create the cell by gene activity matrix") + gene_mat = snap.pp.make_gene_matrix(atac_data_mod, gene_anno=atac_gtf) + print(atac_data_mod) + # Normalize the gene matrix - print("1.11 Normalize the gene matrix") - gene_mat.obs['leiden'] = atac_data.obs['leiden'] + print("Normalize the gene matrix") + gene_mat.obs['leiden'] = atac_data_mod.obs['leiden'] sc.pp.normalize_total(gene_mat) sc.pp.log1p(gene_mat) sc.tl.rank_genes_groups(gene_mat, groupby="leiden", method="wilcoxon") @@ -635,7 +654,9 @@ task CreateFragmentFile { print(f"Cluster {i}: {', '.join(markers)}") print("Peak calling using MACS3") - snap.tl.macs3(atac_data, groupby='leiden') + snap.tl.macs3(atac_data_mod, groupby='leiden') + + print(atac_data_mod.uns['macs3']) print("test") # Write atac file From 5743eff9753a3ccb183730ea275c59003aa4c89c Mon Sep 17 00:00:00 2001 From: aawdeh Date: Fri, 18 Oct 2024 13:38:54 -0400 Subject: [PATCH 09/91] test --- pipelines/skylab/atac/atac.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index 87b2cf9e83..1d3a03d2fe 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -527,7 +527,8 @@ task CreateFragmentFile { command <<< set -e pipefail - + pip3 install snapatac2==2.7.0 scanpy + python3 < Date: Fri, 18 Oct 2024 14:09:06 -0400 Subject: [PATCH 10/91] test --- pipelines/skylab/atac/atac.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index 1d3a03d2fe..ab32f30397 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -605,6 +605,7 @@ task CreateFragmentFile { print("Creating cell by bin matrix") atac_data_mod = snap.pp.add_tile_matrix(atac_data) atac_data_mod.obsm = atac_data.obsm + new_adata.uns["reference_sequences"] = atac_data.uns["reference_sequences"] print(atac_data_mod) # Feature selection From 1de1c85103a0edb23d234bce4adc714f7d000bc2 Mon Sep 17 00:00:00 2001 From: aawdeh Date: Fri, 18 Oct 2024 15:38:37 -0400 Subject: [PATCH 11/91] test --- pipelines/skylab/atac/atac.wdl | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index ab32f30397..d7d2e5b34f 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -526,7 +526,9 @@ task CreateFragmentFile { } command <<< - set -e pipefail + set -eou pipefail + set -x + # install packages -- need to add to the docker pip3 install snapatac2==2.7.0 scanpy python3 < Date: Fri, 18 Oct 2024 15:59:36 -0400 Subject: [PATCH 12/91] test --- pipelines/skylab/atac/atac.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index d7d2e5b34f..90953769fa 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -540,7 +540,7 @@ task CreateFragmentFile { atac_gtf = "~{annotations_gtf}" preindex = "~{preindex}" atac_nhash_id = "~{atac_nhash_id}" - peakcalling_bool = False + peakcalling_bool = True # calculate chrom size dictionary based on text file chrom_size_dict={} From 13a44f9ff6eef6356d34a528f2aeb88f7a348581 Mon Sep 17 00:00:00 2001 From: aawdeh Date: Mon, 21 Oct 2024 10:11:17 -0400 Subject: [PATCH 13/91] test --- pipelines/skylab/atac/atac.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index 90953769fa..ae563eb49a 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -607,7 +607,7 @@ task CreateFragmentFile { print("Creating cell by bin matrix") atac_data_mod = snap.pp.add_tile_matrix(atac_data) print("set obsm") - atac_data_mod.obsm = atac_data.obsm + atac_data_mod.obsm["fragment_paired"] = atac_data.obsm["fragment_paired"] print("set uns") new_adata.uns["reference_sequences"] = atac_data.uns["reference_sequences"] print(atac_data_mod) From 3f12b9e3dc84031a401132792d6c7e6ff9549c96 Mon Sep 17 00:00:00 2001 From: aawdeh Date: Mon, 21 Oct 2024 16:38:31 -0400 Subject: [PATCH 14/91] made another task --- pipelines/skylab/atac/atac.wdl | 198 +++++++++++++++++++-------------- 1 file changed, 113 insertions(+), 85 deletions(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index ae563eb49a..db4b94de64 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -155,6 +155,13 @@ workflow ATAC { } } + call PeakCalling { + input: + bam = BWAPairedEndAlignment.bam_aligned_output, + annotations_gtf = annotations_gtf, + metrics_h5ad = CreateFragmentFile.Snap_metrics + } + File bam_aligned_output_atac = select_first([BBTag.bb_bam, BWAPairedEndAlignment.bam_aligned_output]) File fragment_file_atac = select_first([BB_fragment.fragment_file, CreateFragmentFile.fragment_file]) File snap_metrics_atac = select_first([BB_fragment.Snap_metrics,CreateFragmentFile.Snap_metrics]) @@ -504,7 +511,6 @@ task CreateFragmentFile { File bam File annotations_gtf File chrom_sizes - File annotations_gtf Boolean preindex Int disk_size = 500 Int mem_size = 64 @@ -528,9 +534,7 @@ task CreateFragmentFile { command <<< set -eou pipefail set -x - # install packages -- need to add to the docker - pip3 install snapatac2==2.7.0 scanpy - + python3 <>> - # Peak calling - if peakcalling_bool: - print("Peak calling starting...") + runtime { + docker: docker_path + disks: "local-disk ${disk_size} SSD" + memory: "${mem_size} GiB" + cpu: nthreads + cpuPlatform: cpuPlatform + } + + output { + File fragment_file = "~{bam_base_name}.fragments.tsv" + File Snap_metrics = "~{bam_base_name}.metrics.h5ad" + File atac_library_metrics = "~{bam_base_name}_~{atac_nhash_id}.atac_metrics.csv" + } +} - # Calculate and plot the size distribution of fragments - print("Calculating fragment size distribution") - snap.pl.frag_size_distr(atac_data) - print(atac_data) - # Filter cells -- Need to parameterize - print("Filtering cells") - snap.pp.filter_cells(atac_data, min_counts=5000, min_tsse=10, max_counts=100000) - print(atac_data) +task PeakCalling { + input { + File bam + File annotations_gtf + File metrics_h5ad + } + String bam_base_name = basename(bam, ".bam") + + command <<< + set -euo pipefail + set -x + + # install packages -- need to add to the docker + pip3 install snapatac2==2.7.0 scanpy + + python3 <>> - - runtime { - docker: docker_path - disks: "local-disk ${disk_size} SSD" - memory: "${mem_size} GiB" - cpu: nthreads - cpuPlatform: cpuPlatform - } - + output { - File fragment_file = "~{bam_base_name}.fragments.tsv" - File Snap_metrics = "~{bam_base_name}.metrics.h5ad" - File peaks_h5 = "~{bam_base_name}.peaks.h5ad" # test - File atac_library_metrics = "~{bam_base_name}_~{atac_nhash_id}.atac_metrics.csv" + File peaks_h5ad = "~{bam_base_name}.peaks.h5ad" } -} + +} \ No newline at end of file From 08b2f2410dca33918a15456bd4963959cbbc6170 Mon Sep 17 00:00:00 2001 From: aawdeh Date: Mon, 21 Oct 2024 17:04:27 -0400 Subject: [PATCH 15/91] test --- pipelines/skylab/atac/atac.wdl | 38 +++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index db4b94de64..1a8a5533bd 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -153,14 +153,14 @@ workflow ATAC { atac_nhash_id = atac_nhash_id } - } - - call PeakCalling { + call PeakCalling { input: bam = BWAPairedEndAlignment.bam_aligned_output, annotations_gtf = annotations_gtf, - metrics_h5ad = CreateFragmentFile.Snap_metrics + metrics_h5ad = CreateFragmentFile.Snap_metrics, + docker_path = docker_prefix + snap_atac_docker } + } File bam_aligned_output_atac = select_first([BBTag.bb_bam, BWAPairedEndAlignment.bam_aligned_output]) File fragment_file_atac = select_first([BB_fragment.fragment_file, CreateFragmentFile.fragment_file]) @@ -267,7 +267,6 @@ task GetNumSplits { } } - # trim read 1 and read 2 adapter sequeunce with cutadapt task TrimAdapters { input { @@ -615,8 +614,21 @@ task PeakCalling { File bam File annotations_gtf File metrics_h5ad + # copied from previous task + Int disk_size = 500 + Int mem_size = 64 + Int nthreads = 4 + String docker_path } String bam_base_name = basename(bam, ".bam") + + parameter_meta { + bam: "Aligned bam with CB in CB tag. This is the output of the BWAPairedEndAlignment task." + annotations_gtf: "GTF for SnapATAC2 to calculate TSS sites of fragment file." + disk_size: "Disk size used in create fragment file step." + mem_size: "The size of memory used in create fragment file." + docker_path: "The docker image path containing the runtime environment for this task" + } command <<< set -euo pipefail @@ -627,12 +639,17 @@ task PeakCalling { python3 <>> + runtime { + docker: docker_path + disks: "local-disk ${disk_size} SSD" + memory: "${mem_size} GiB" + cpu: nthreads + } + output { File peaks_h5ad = "~{bam_base_name}.peaks.h5ad" } From 80c1416b5debd07775a8fefd0b5f0d3038a2bf6c Mon Sep 17 00:00:00 2001 From: aawdeh Date: Mon, 21 Oct 2024 17:19:26 -0400 Subject: [PATCH 16/91] test --- pipelines/skylab/atac/atac.wdl | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index 1a8a5533bd..d0ef3ee92e 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -510,6 +510,7 @@ task CreateFragmentFile { File bam File annotations_gtf File chrom_sizes + File annotations_gtf Boolean preindex Int disk_size = 500 Int mem_size = 64 @@ -531,8 +532,7 @@ task CreateFragmentFile { } command <<< - set -eou pipefail - set -x + set -e pipefail python3 <>> @@ -608,7 +609,6 @@ task CreateFragmentFile { } } - task PeakCalling { input { File bam @@ -666,8 +666,10 @@ task PeakCalling { atac_data_mod = snap.pp.add_tile_matrix(atac_data) print("set obsm") atac_data_mod.obsm["fragment_paired"] = atac_data.obsm["fragment_paired"] - print("set uns") - new_adata.uns["reference_sequences"] = atac_data.uns["reference_sequences"] + print("set all uns") + for key in atac_data.uns.keys(): + print("set ",key) + new_adata.uns[key] = atac_data.uns[key] print(atac_data_mod) # Feature selection From b5191e526d28b1b8a83dc89c7a792b62d089351e Mon Sep 17 00:00:00 2001 From: aawdeh Date: Mon, 21 Oct 2024 17:44:49 -0400 Subject: [PATCH 17/91] test --- pipelines/skylab/atac/atac.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index d0ef3ee92e..6a39a3472d 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -663,13 +663,13 @@ task PeakCalling { # Create a cell by bin matrix containing insertion counts across genome-wide 500-bp bins. print("Creating cell by bin matrix") - atac_data_mod = snap.pp.add_tile_matrix(atac_data) + atac_data_mod = snap.pp.add_tile_matrix(atac_data, inplace=False) print("set obsm") atac_data_mod.obsm["fragment_paired"] = atac_data.obsm["fragment_paired"] print("set all uns") for key in atac_data.uns.keys(): print("set ",key) - new_adata.uns[key] = atac_data.uns[key] + atac_data_mod.uns[key] = atac_data.uns[key] print(atac_data_mod) # Feature selection From 4725080516d8cffaa47382c659c3989bbb72775e Mon Sep 17 00:00:00 2001 From: aawdeh Date: Mon, 21 Oct 2024 18:49:20 -0400 Subject: [PATCH 18/91] test --- pipelines/skylab/atac/atac.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index 6a39a3472d..28d746600d 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -642,6 +642,7 @@ task PeakCalling { # use snap atac2 import snapatac2 as snap import scanpy as sc + import numpy as np bam = "~{bam}" bam_base_name = "~{bam_base_name}" From f139f84743b66aefe5cab0a0c16d7e6838ed7179 Mon Sep 17 00:00:00 2001 From: aawdeh Date: Tue, 22 Oct 2024 08:51:50 -0400 Subject: [PATCH 19/91] test --- pipelines/skylab/atac/atac.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index 28d746600d..8af59d2a3f 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -721,7 +721,7 @@ task PeakCalling { print(f"Cluster {i}: {', '.join(markers)}") print("Peak calling using MACS3") - snap.tl.macs3(atac_data_mod, groupby='leiden') + snap.tl.macs3(atac_data_mod, groupby='leiden', n_jobs=1) atac_data_mod.write_h5ad("~{bam_base_name}.peaks.h5ad") print("test") From 9a0ea9b47e5de7dec7d968a001894247351d225e Mon Sep 17 00:00:00 2001 From: aawdeh Date: Tue, 22 Oct 2024 10:08:42 -0400 Subject: [PATCH 20/91] test --- pipelines/skylab/atac/atac.wdl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index 8af59d2a3f..ccb474acc2 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -720,8 +720,9 @@ task PeakCalling { markers = sc.get.rank_genes_groups_df(gene_mat, group=i).head(7)['names'] print(f"Cluster {i}: {', '.join(markers)}") - print("Peak calling using MACS3") - snap.tl.macs3(atac_data_mod, groupby='leiden', n_jobs=1) + if __name__ == '__main__': + print("Peak calling using MACS3") + snap.tl.macs3(atac_data_mod, groupby='leiden', n_jobs=1) atac_data_mod.write_h5ad("~{bam_base_name}.peaks.h5ad") print("test") From 04cf0d2a91df97d737a70ec562fe61cc1301fe15 Mon Sep 17 00:00:00 2001 From: aawdeh Date: Tue, 22 Oct 2024 10:28:26 -0400 Subject: [PATCH 21/91] test --- pipelines/skylab/atac/atac.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index ccb474acc2..5227c534ff 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -722,7 +722,7 @@ task PeakCalling { if __name__ == '__main__': print("Peak calling using MACS3") - snap.tl.macs3(atac_data_mod, groupby='leiden', n_jobs=1) + snap.tl.macs3(atac_data_mod, groupby='leiden', n_jobs=8) atac_data_mod.write_h5ad("~{bam_base_name}.peaks.h5ad") print("test") From f9bc8718b488bc13d1e20028fe56a2180d81013b Mon Sep 17 00:00:00 2001 From: aawdeh Date: Thu, 24 Oct 2024 12:58:18 -0400 Subject: [PATCH 22/91] change a line --- pipelines/skylab/atac/atac.wdl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index 5227c534ff..9576c42602 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -637,6 +637,9 @@ task PeakCalling { # install packages -- need to add to the docker pip3 install snapatac2==2.7.0 scanpy + file_to_change=/usr/local/lib/python3.10/site-packages/snapatac2/tools/_call_peaks.py + sed -i '164s/.*/\tpeaks = [_call_peaks(x) for x in fragments.values()]/' $file_to_change + python3 < Date: Thu, 24 Oct 2024 14:44:52 -0400 Subject: [PATCH 23/91] test --- pipelines/skylab/atac/atac.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index 9576c42602..5c57a7c6ba 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -638,7 +638,7 @@ task PeakCalling { pip3 install snapatac2==2.7.0 scanpy file_to_change=/usr/local/lib/python3.10/site-packages/snapatac2/tools/_call_peaks.py - sed -i '164s/.*/\tpeaks = [_call_peaks(x) for x in fragments.values()]/' $file_to_change + sed -i '164s/.*/peaks = [_call_peaks(x) for x in fragments.values()]/' $file_to_change python3 < Date: Thu, 24 Oct 2024 15:10:32 -0400 Subject: [PATCH 24/91] test --- pipelines/skylab/atac/atac.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index 5c57a7c6ba..60bd780f22 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -638,7 +638,7 @@ task PeakCalling { pip3 install snapatac2==2.7.0 scanpy file_to_change=/usr/local/lib/python3.10/site-packages/snapatac2/tools/_call_peaks.py - sed -i '164s/.*/peaks = [_call_peaks(x) for x in fragments.values()]/' $file_to_change + sed -i '164s/.*/\t\t\t\tpeaks = [_call_peaks(x) for x in fragments.values()]/' $file_to_change python3 < Date: Thu, 24 Oct 2024 16:00:38 -0400 Subject: [PATCH 25/91] test --- pipelines/skylab/atac/atac.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index 60bd780f22..563796f72f 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -638,7 +638,7 @@ task PeakCalling { pip3 install snapatac2==2.7.0 scanpy file_to_change=/usr/local/lib/python3.10/site-packages/snapatac2/tools/_call_peaks.py - sed -i '164s/.*/\t\t\t\tpeaks = [_call_peaks(x) for x in fragments.values()]/' $file_to_change + sed -i '164s/.*/ peaks = [_call_peaks(x) for x in fragments.values()]/' $file_to_change python3 < Date: Thu, 24 Oct 2024 18:50:22 -0400 Subject: [PATCH 26/91] test --- pipelines/skylab/atac/atac.wdl | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index 563796f72f..e11f6853c2 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -646,6 +646,7 @@ task PeakCalling { import snapatac2 as snap import scanpy as sc import numpy as np + import polars as pl bam = "~{bam}" bam_base_name = "~{bam_base_name}" @@ -723,11 +724,29 @@ task PeakCalling { markers = sc.get.rank_genes_groups_df(gene_mat, group=i).head(7)['names'] print(f"Cluster {i}: {', '.join(markers)}") - if __name__ == '__main__': - print("Peak calling using MACS3") - snap.tl.macs3(atac_data_mod, groupby='leiden', n_jobs=8) - - atac_data_mod.write_h5ad("~{bam_base_name}.peaks.h5ad") + + print("Peak calling using MACS3") + snap.tl.macs3(atac_data_mod, groupby='leiden', n_jobs=8) + + print("NarrowPeak") + for k, peaks in atac_data_mod.uns['macs3'].items(): + peaks.to_csv(f'{k}.NarrowPeak', sep='\t', header=False, index=False) + + print("test") + snap.ex.export_coverage( + atac_data_mod, + groupby='leiden', + ) + + print("Convert pl.DataFrame to pandas DataFrame") + # Convert pl.DataFrame to pandas DataFrame + for key in new_adata.uns.keys(): + if isinstance(new_adata.uns[key], pl.DataFrame): + print(key) + new_adata.uns[key] = new_adata.uns[key].to_pandas() + + print("Write into h5ad file") + atac_data_mod.write_h5ad("~{bam_base_name}.peaks.h5ad", compression='gzip') print("test") CODE From b15b146281e944f4a809ac364870a84fc387b458 Mon Sep 17 00:00:00 2001 From: aawdeh Date: Thu, 24 Oct 2024 18:50:52 -0400 Subject: [PATCH 27/91] test --- pipelines/skylab/atac/atac.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index e11f6853c2..f645309639 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -746,7 +746,7 @@ task PeakCalling { new_adata.uns[key] = new_adata.uns[key].to_pandas() print("Write into h5ad file") - atac_data_mod.write_h5ad("~{bam_base_name}.peaks.h5ad", compression='gzip') + atac_data_mod.write_h5ad("~{bam_base_name}.peaks.h5ad") print("test") CODE From ff36c155c5b010c117ed3d34c9574344f54e0fe2 Mon Sep 17 00:00:00 2001 From: aawdeh Date: Thu, 24 Oct 2024 18:51:29 -0400 Subject: [PATCH 28/91] test --- pipelines/skylab/atac/atac.wdl | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index f645309639..dc41406540 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -724,20 +724,9 @@ task PeakCalling { markers = sc.get.rank_genes_groups_df(gene_mat, group=i).head(7)['names'] print(f"Cluster {i}: {', '.join(markers)}") - print("Peak calling using MACS3") snap.tl.macs3(atac_data_mod, groupby='leiden', n_jobs=8) - print("NarrowPeak") - for k, peaks in atac_data_mod.uns['macs3'].items(): - peaks.to_csv(f'{k}.NarrowPeak', sep='\t', header=False, index=False) - - print("test") - snap.ex.export_coverage( - atac_data_mod, - groupby='leiden', - ) - print("Convert pl.DataFrame to pandas DataFrame") # Convert pl.DataFrame to pandas DataFrame for key in new_adata.uns.keys(): From 27a419678613a1a624cdbcab2f7426a0476a9e34 Mon Sep 17 00:00:00 2001 From: aawdeh Date: Thu, 24 Oct 2024 19:49:17 -0400 Subject: [PATCH 29/91] test --- pipelines/skylab/atac/atac.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index dc41406540..41abc21bcd 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -729,10 +729,10 @@ task PeakCalling { print("Convert pl.DataFrame to pandas DataFrame") # Convert pl.DataFrame to pandas DataFrame - for key in new_adata.uns.keys(): - if isinstance(new_adata.uns[key], pl.DataFrame): + for key in atac_data_mod.uns.keys(): + if isinstance(atac_data_mod.uns[key], pl.DataFrame): print(key) - new_adata.uns[key] = new_adata.uns[key].to_pandas() + atac_data_mod.uns[key] = atac_data_mod.uns[key].to_pandas() print("Write into h5ad file") atac_data_mod.write_h5ad("~{bam_base_name}.peaks.h5ad") From 987b5d2d6f0bb8201d13e7fd1936eb992dba03e0 Mon Sep 17 00:00:00 2001 From: aawdeh Date: Mon, 28 Oct 2024 15:02:49 -0400 Subject: [PATCH 30/91] Add peakcall to task --- pipelines/skylab/atac/atac.wdl | 96 +++++++++++++++++++++++++++++++++- 1 file changed, 95 insertions(+), 1 deletion(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index 41abc21bcd..6b563244e9 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -532,7 +532,14 @@ task CreateFragmentFile { } command <<< - set -e pipefail + set -euo pipefail + set -x + + # install packages -- need to add to the docker + pip3 install snapatac2==2.7.0 scanpy + + file_to_change=/usr/local/lib/python3.10/site-packages/snapatac2/tools/_call_peaks.py + sed -i '164s/.*/ peaks = [_call_peaks(x) for x in fragments.values()]/' $file_to_change python3 <>> From cce58417212c7d576a7a049cd66579d010bf3cae Mon Sep 17 00:00:00 2001 From: aawdeh Date: Mon, 28 Oct 2024 15:21:15 -0400 Subject: [PATCH 31/91] test --- pipelines/skylab/atac/atac.wdl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index 6b563244e9..265bb5e90e 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -562,6 +562,9 @@ task CreateFragmentFile { # use snap atac2 import snapatac2.preprocessing as pp import snapatac2 as snap + import scanpy as sc + import numpy as np + import polars as pl import anndata as ad from collections import OrderedDict import csv From bf0e9ce33829aafd1166d96fc08c6183effda6da Mon Sep 17 00:00:00 2001 From: aawdeh Date: Mon, 28 Oct 2024 17:57:44 -0400 Subject: [PATCH 32/91] add parameters --- pipelines/skylab/atac/atac.wdl | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index 265bb5e90e..974578dbd7 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -710,12 +710,17 @@ task PeakCalling { input { File bam File annotations_gtf - File metrics_h5ad - # copied from previous task + File metrics_h5ad + # SnapATAC2 parameters + Int min_counts = 5000 + Int min_tsse = 10 + Int max_counts = 100000 + Int doublet_probability_threshold = 0.5 + # Runtime attributes/docker + String docker_path Int disk_size = 500 Int mem_size = 64 - Int nthreads = 4 - String docker_path + Int nthreads = 4 } String bam_base_name = basename(bam, ".bam") @@ -749,6 +754,10 @@ task PeakCalling { bam_base_name = "~{bam_base_name}" atac_gtf = "~{annotations_gtf}" metrics_h5ad = "~{metrics_h5ad}" + min_counts = "~{min_counts}" + min_tsse = "~{min_tsse}" + max_counts = "~{max_counts}" + probability_threshold = "~{doublet_probability_threshold}" print("Peak calling starting...") atac_data = snap.read(metrics_h5ad) @@ -760,7 +769,7 @@ task PeakCalling { # Filter cells -- Need to parameterize print("Filtering cells") - snap.pp.filter_cells(atac_data, min_counts=5000, min_tsse=10, max_counts=100000) + snap.pp.filter_cells(atac_data, min_counts=min_counts, min_tsse=min_tsse, max_counts=max_counts) print(atac_data) # Create a cell by bin matrix containing insertion counts across genome-wide 500-bp bins. @@ -791,7 +800,7 @@ task PeakCalling { # Filter doublets based on scrublet scores print("Filter doublets based on scrublet scores") - snap.pp.filter_doublets(atac_data_mod, probability_threshold=0.5) + snap.pp.filter_doublets(atac_data_mod, probability_threshold=probability_threshold) print(atac_data_mod) # Perform graph-based clustering to identify cell clusters. From 18f77d8d0189ba30ec445b88ace4ba528be8882b Mon Sep 17 00:00:00 2001 From: aawdeh Date: Mon, 4 Nov 2024 09:46:07 -0500 Subject: [PATCH 33/91] test --- pipelines/skylab/atac/atac.wdl | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index 89646be7ea..8ac108e959 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -715,7 +715,6 @@ task PeakCalling { Int min_counts = 5000 Int min_tsse = 10 Int max_counts = 100000 - Int doublet_probability_threshold = 0.5 # Runtime attributes/docker String docker_path Int disk_size = 500 @@ -757,7 +756,6 @@ task PeakCalling { min_counts = "~{min_counts}" min_tsse = "~{min_tsse}" max_counts = "~{max_counts}" - probability_threshold = "~{doublet_probability_threshold}" print("Peak calling starting...") atac_data = snap.read(metrics_h5ad) @@ -800,7 +798,7 @@ task PeakCalling { # Filter doublets based on scrublet scores print("Filter doublets based on scrublet scores") - snap.pp.filter_doublets(atac_data_mod, probability_threshold=probability_threshold) + snap.pp.filter_doublets(atac_data_mod, probability_threshold=0.5) print(atac_data_mod) # Perform graph-based clustering to identify cell clusters. From db59231f001dfa60306e62f23cc024b1b9fdcbd4 Mon Sep 17 00:00:00 2001 From: Sid Cox Date: Fri, 13 Dec 2024 10:24:41 -0500 Subject: [PATCH 34/91] peak calling --- pipelines/skylab/atac/atac.wdl | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index 7e42a65802..87df29afa0 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -763,13 +763,8 @@ task PeakCalling { command <<< set -euo pipefail set -x - - # install packages -- need to add to the docker - pip3 install snapatac2==2.7.0 scanpy file_to_change=/usr/local/lib/python3.10/site-packages/snapatac2/tools/_call_peaks.py - sed -i '164s/.*/ peaks = [_call_peaks(x) for x in fragments.values()]/' $file_to_change - python3 < Date: Fri, 13 Dec 2024 11:00:33 -0500 Subject: [PATCH 35/91] remove bam_base_name from CreateFragments task --- pipelines/skylab/atac/atac.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index 3290d22498..0a55334c15 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -625,7 +625,7 @@ task CreateFragmentFile { if peakcalling_bool: print("Peak calling starting...") - atac_data = snap.read("~{bam_base_name}.metrics.h5ad") + atac_data = snap.read("~{input_id}.metrics.h5ad") # Calculate and plot the size distribution of fragments print("Calculating fragment size distribution") @@ -706,7 +706,7 @@ task CreateFragmentFile { atac_data_mod.uns[key] = atac_data_mod.uns[key].to_pandas() print("Write into h5ad file") - atac_data_mod.write_h5ad("~{bam_base_name}.peaks.h5ad") + atac_data_mod.write_h5ad("~{input_id}.peaks.h5ad") print("test") CODE From 987e1e527e5cf1d4de8436aaa4ccdaddb7c79c48 Mon Sep 17 00:00:00 2001 From: Sid Cox Date: Fri, 13 Dec 2024 11:10:40 -0500 Subject: [PATCH 36/91] separate task for macs3 consistent with create fragments --- pipelines/skylab/atac/atac.wdl | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index 0a55334c15..e03edb7e2c 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -542,12 +542,6 @@ task CreateFragmentFile { set -euo pipefail set -x - # install packages -- need to add to the docker - pip3 install snapatac2==2.7.0 scanpy - - file_to_change=/usr/local/lib/python3.10/site-packages/snapatac2/tools/_call_peaks.py - sed -i '164s/.*/ peaks = [_call_peaks(x) for x in fragments.values()]/' $file_to_change - python3 < Date: Mon, 16 Dec 2024 09:40:14 -0500 Subject: [PATCH 37/91] Update atac.wdl update snap_atac_docker tag to 2.0.0 --- pipelines/skylab/atac/atac.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index e03edb7e2c..ffd4ae3f0e 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -61,7 +61,7 @@ workflow ATAC { String cutadapt_docker = "cutadapt:1.0.0-4.4-1686752919" String samtools_docker = "samtools-dist-bwa:3.0.0" String upstools_docker = "upstools:1.0.0-2023.03.03-1704300311" - String snap_atac_docker = "snapatac2:1.1.0" + String snap_atac_docker = "snapatac2:2.0.0" # Make sure either 'gcp' or 'azure' is supplied as cloud_provider input. If not, raise an error if ((cloud_provider != "gcp") && (cloud_provider != "azure")) { From b136ba30e94df8895f0def5592c012cd87be7628 Mon Sep 17 00:00:00 2001 From: Sid Cox Date: Mon, 16 Dec 2024 10:26:30 -0500 Subject: [PATCH 38/91] snapatac2 updates --- pipelines/skylab/atac/atac.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index ffd4ae3f0e..a2375ad802 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -551,7 +551,7 @@ task CreateFragmentFile { atac_gtf = "~{annotations_gtf}" preindex = "~{preindex}" atac_nhash_id = "~{atac_nhash_id}" - peakcalling_bool = True + peakcalling_bool = True # set peakcalling expected_cells = ~{atac_expected_cells} # calculate chrom size dictionary based on text file From 573a29fea508cc75552bcd4b76b7bfddb8d02817 Mon Sep 17 00:00:00 2001 From: Sid Cox Date: Mon, 13 Jan 2025 15:12:49 -0500 Subject: [PATCH 39/91] turn off createfragments peakcalling --- pipelines/skylab/atac/atac.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index a2375ad802..868ddbf161 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -551,7 +551,7 @@ task CreateFragmentFile { atac_gtf = "~{annotations_gtf}" preindex = "~{preindex}" atac_nhash_id = "~{atac_nhash_id}" - peakcalling_bool = True # set peakcalling + peakcalling_bool = False # set peakcalling expected_cells = ~{atac_expected_cells} # calculate chrom size dictionary based on text file From 7b8d1fcb82d503f2ed12b66d9f33511ceca7d8ee Mon Sep 17 00:00:00 2001 From: Sid Cox Date: Tue, 14 Jan 2025 08:53:19 -0500 Subject: [PATCH 40/91] remove peakcalling from createfragments and parameterize probability_threshold --- pipelines/skylab/atac/atac.wdl | 93 ++-------------------------------- 1 file changed, 3 insertions(+), 90 deletions(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index 868ddbf161..f9edb0362b 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -551,7 +551,6 @@ task CreateFragmentFile { atac_gtf = "~{annotations_gtf}" preindex = "~{preindex}" atac_nhash_id = "~{atac_nhash_id}" - peakcalling_bool = False # set peakcalling expected_cells = ~{atac_expected_cells} # calculate chrom size dictionary based on text file @@ -587,8 +586,7 @@ task CreateFragmentFile { atac_percent_target = number_of_cells / expected_cells*100 print("Setting percent target in nested dictionary") data['Cells']['atac_percent_target'] = atac_percent_target - - + # Flatten the dictionary flattened_data = [] for category, metrics in data.items(): @@ -617,92 +615,6 @@ task CreateFragmentFile { # Write new atac file atac_data.write_h5ad("~{input_id}.metrics.h5ad") - if peakcalling_bool: - print("Peak calling starting...") - atac_data = snap.read("~{input_id}.metrics.h5ad") - - # Calculate and plot the size distribution of fragments - print("Calculating fragment size distribution") - snap.pl.frag_size_distr(atac_data) - print(atac_data) - - # Filter cells -- Need to parameterize - print("Filtering cells") - snap.pp.filter_cells(atac_data, min_counts=5000, min_tsse=10, max_counts=100000) - print(atac_data) - - # Create a cell by bin matrix containing insertion counts across genome-wide 500-bp bins. - print("Creating cell by bin matrix") - atac_data_mod = snap.pp.add_tile_matrix(atac_data, inplace=False) - print("set obsm") - atac_data_mod.obsm["fragment_paired"] = atac_data.obsm["fragment_paired"] - print("set all uns") - for key in atac_data.uns.keys(): - print("set ",key) - atac_data_mod.uns[key] = atac_data.uns[key] - print(atac_data_mod) - - # Feature selection - print("Feature selection") - snap.pp.select_features(atac_data_mod) - print(atac_data_mod) - - # Run customized scrublet algorithm to identify potential doublets - print("Run scrublet to identify potential doublets") - snap.pp.scrublet(atac_data_mod) - print(atac_data_mod) - - # Employ spectral embedding for dimensionality reduction - print("Employ spectral embedding for dimensionality reduction") - snap.tl.spectral(atac_data_mod) - print(atac_data_mod) - - # Filter doublets based on scrublet scores - print("Filter doublets based on scrublet scores") - snap.pp.filter_doublets(atac_data_mod, probability_threshold=0.5) - print(atac_data_mod) - - # Perform graph-based clustering to identify cell clusters. - # Build a k-nearest neighbour graph using snap.pp.knn - print("Perform knn graph-based clustering to identify cell clusters") - snap.pp.knn(atac_data_mod) - print(atac_data_mod) - - # Use the Leiden community detection algorithm to identify densely-connected subgraphs/clusters in the graph - print("Use the Leiden community detection algorithm to identify densely-connected subgraphs/clusters in the graph") - snap.tl.leiden(atac_data_mod) - print(atac_data_mod) - - # Create the cell by gene activity matrix - print("Create the cell by gene activity matrix") - gene_mat = snap.pp.make_gene_matrix(atac_data_mod, gene_anno=atac_gtf) - print(atac_data_mod) - - # Normalize the gene matrix - print("Normalize the gene matrix") - gene_mat.obs['leiden'] = atac_data_mod.obs['leiden'] - sc.pp.normalize_total(gene_mat) - sc.pp.log1p(gene_mat) - sc.tl.rank_genes_groups(gene_mat, groupby="leiden", method="wilcoxon") - - for i in np.unique(gene_mat.obs['leiden']): - markers = sc.get.rank_genes_groups_df(gene_mat, group=i).head(7)['names'] - print(f"Cluster {i}: {', '.join(markers)}") - - print("Peak calling using MACS3") - snap.tl.macs3(atac_data_mod, groupby='leiden', n_jobs=1) - - print("Convert pl.DataFrame to pandas DataFrame") - # Convert pl.DataFrame to pandas DataFrame - for key in atac_data_mod.uns.keys(): - if isinstance(atac_data_mod.uns[key], pl.DataFrame): - print(key) - atac_data_mod.uns[key] = atac_data_mod.uns[key].to_pandas() - - print("Write into h5ad file") - atac_data_mod.write_h5ad("~{input_id}.peaks.h5ad") - print("test") - CODE # sorting the file @@ -740,6 +652,7 @@ task PeakCalling { Int min_counts = 5000 Int min_tsse = 10 Int max_counts = 100000 + Float probability_threshold = 0.5 # Runtime attributes/docker String docker_path Int disk_size = 500 @@ -817,7 +730,7 @@ task PeakCalling { # Filter doublets based on scrublet scores print("Filter doublets based on scrublet scores") - snap.pp.filter_doublets(atac_data_mod, probability_threshold=0.5) + snap.pp.filter_doublets(atac_data_mod, probability_threshold=probability_threshold) print(atac_data_mod) # Perform graph-based clustering to identify cell clusters. From 0689868c2543282088c020bc575c35e594660dad Mon Sep 17 00:00:00 2001 From: Sid Cox Date: Tue, 14 Jan 2025 15:54:40 -0500 Subject: [PATCH 41/91] remove probability_threshold parameterization --- pipelines/skylab/atac/atac.wdl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index f9edb0362b..03a5f466e5 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -652,7 +652,6 @@ task PeakCalling { Int min_counts = 5000 Int min_tsse = 10 Int max_counts = 100000 - Float probability_threshold = 0.5 # Runtime attributes/docker String docker_path Int disk_size = 500 @@ -730,7 +729,7 @@ task PeakCalling { # Filter doublets based on scrublet scores print("Filter doublets based on scrublet scores") - snap.pp.filter_doublets(atac_data_mod, probability_threshold=probability_threshold) + snap.pp.filter_doublets(atac_data_mod, probability_threshold=0.5) print(atac_data_mod) # Perform graph-based clustering to identify cell clusters. From 32f4b7fb70ffba7c436c1947b1bd846ed065caca Mon Sep 17 00:00:00 2001 From: Sid Cox Date: Tue, 14 Jan 2025 17:39:55 -0500 Subject: [PATCH 42/91] remove repeated line --- pipelines/skylab/atac/atac.wdl | 1 - 1 file changed, 1 deletion(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index 03a5f466e5..43b2ad21ec 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -517,7 +517,6 @@ task CreateFragmentFile { File bam File annotations_gtf File chrom_sizes - File annotations_gtf Boolean preindex Int disk_size = 500 Int mem_size = 64 From fffb2564a740484931853a68127adadcc08f4ac9 Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Tue, 14 Jan 2025 22:43:31 +0000 Subject: [PATCH 43/91] Updated pipeline_versions.txt with all pipeline version information --- pipeline_versions.txt | 56 +++++++++++++++++++++---------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/pipeline_versions.txt b/pipeline_versions.txt index 5ecffd5530..729f50381e 100644 --- a/pipeline_versions.txt +++ b/pipeline_versions.txt @@ -1,40 +1,40 @@ Pipeline Name Version Date of Last Commit -CheckFingerprint 1.0.22 2024-10-28 -RNAWithUMIsPipeline 1.0.18 2024-11-04 -AnnotationFiltration 1.2.7 2024-11-04 -UltimaGenomicsWholeGenomeGermline 1.1.2 2024-11-04 -WholeGenomeGermlineSingleSample 3.3.3 2024-11-04 -ExomeGermlineSingleSample 3.2.3 2024-11-04 -JointGenotypingByChromosomePartTwo 1.5.2 2024-11-04 -JointGenotypingByChromosomePartOne 1.5.2 2024-11-04 +ExomeReprocessing 3.3.3 2024-11-04 +CramToUnmappedBams 1.1.3 2024-08-02 +WholeGenomeReprocessing 3.3.3 2024-11-04 +ExternalExomeReprocessing 3.3.3 2024-11-04 +ExternalWholeGenomeReprocessing 2.3.3 2024-11-04 +UltimaGenomicsJointGenotyping 1.2.2 2024-11-04 ReblockGVCF 2.3.2 2024-11-04 +JointGenotypingByChromosomePartOne 1.5.2 2024-11-04 +JointGenotypingByChromosomePartTwo 1.5.2 2024-11-04 JointGenotyping 1.7.2 2024-11-04 -UltimaGenomicsJointGenotyping 1.2.2 2024-11-04 +ExomeGermlineSingleSample 3.2.3 2024-11-04 +WholeGenomeGermlineSingleSample 3.3.3 2024-11-04 +UltimaGenomicsWholeGenomeGermline 1.1.2 2024-11-04 VariantCalling 2.2.4 2024-11-04 -UltimaGenomicsWholeGenomeCramOnly 1.0.23 2024-11-04 GDCWholeGenomeSomaticSingleSample 1.3.4 2024-11-04 -BroadInternalRNAWithUMIs 1.0.36 2024-11-04 -BroadInternalUltimaGenomics 1.1.2 2024-11-04 -BroadInternalArrays 1.1.14 2024-11-04 -BroadInternalImputation 1.1.14 2024-11-04 -Arrays 2.6.30 2024-11-04 +UltimaGenomicsWholeGenomeCramOnly 1.0.23 2024-11-04 +IlluminaGenotypingArray 1.12.24 2024-11-04 +AnnotationFiltration 1.2.7 2024-11-04 +CheckFingerprint 1.0.22 2024-10-28 ValidateChip 1.16.7 2024-11-04 -MultiSampleArrays 1.6.2 2024-08-02 Imputation 1.1.15 2024-11-04 -IlluminaGenotypingArray 1.12.24 2024-11-04 -ExternalWholeGenomeReprocessing 2.3.3 2024-11-04 -ExternalExomeReprocessing 3.3.3 2024-11-04 -CramToUnmappedBams 1.1.3 2024-08-02 -WholeGenomeReprocessing 3.3.3 2024-11-04 -ExomeReprocessing 3.3.3 2024-11-04 +MultiSampleArrays 1.6.2 2024-08-02 +Arrays 2.6.30 2024-11-04 +BroadInternalUltimaGenomics 1.1.2 2024-11-04 +BroadInternalImputation 1.1.14 2024-11-04 +BroadInternalArrays 1.1.14 2024-11-04 +BroadInternalRNAWithUMIs 1.0.36 2024-11-04 +RNAWithUMIsPipeline 1.0.18 2024-11-04 +Multiome 5.9.3 2024-12-3 +MultiSampleSmartSeq2SingleNucleus 2.0.6 2024-11-15 BuildIndices 3.1.0 2024-11-26 +SlideSeq 3.4.7 2024-12-3 +PairedTag 1.8.4 2024-12-3 +atac 2.5.3 2024-11-22 scATAC 1.3.2 2023-08-03 snm3C 4.0.4 2024-08-06 -Multiome 5.9.3 2024-12-3 -PairedTag 1.8.4 2024-12-3 -MultiSampleSmartSeq2 2.2.22 2024-09-11 -MultiSampleSmartSeq2SingleNucleus 2.0.6 2024-11-15 Optimus 7.8.4 2024-12-3 -atac 2.5.3 2024-11-22 +MultiSampleSmartSeq2 2.2.22 2024-09-11 SmartSeq2SingleSample 5.1.21 2024-09-11 -SlideSeq 3.4.7 2024-12-3 From cf2a77b841882fe7bc7dfc96d8eae86c4cdb63ab Mon Sep 17 00:00:00 2001 From: aawdeh Date: Sun, 19 Jan 2025 13:17:50 -0500 Subject: [PATCH 44/91] add optional parameter --- pipelines/skylab/atac/atac.wdl | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index 43b2ad21ec..43b9e920f0 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -28,6 +28,8 @@ workflow ATAC { # Option for running files with preindex Boolean preindex = false + # Option for running peak calling + Boolean peak_calling = true # BWA ref File tar_bwa_reference @@ -71,7 +73,6 @@ workflow ATAC { } } - parameter_meta { read1_fastq_gzipped: "read 1 FASTQ file as input for the pipeline, contains read 1 of paired reads" read2_fastq_gzipped: "read 2 FASTQ file as input for the pipeline, contains the cellular barcodes corresponding to the reads in the read1 FASTQ and read 3 FASTQ" @@ -160,15 +161,17 @@ workflow ATAC { input_id = input_id } - call PeakCalling { - input: - bam = BWAPairedEndAlignment.bam_aligned_output, - annotations_gtf = annotations_gtf, - metrics_h5ad = CreateFragmentFile.Snap_metrics, - docker_path = docker_prefix + snap_atac_docker + if (peak_calling) { + call PeakCalling { + input: + bam = BWAPairedEndAlignment.bam_aligned_output, + annotations_gtf = annotations_gtf, + metrics_h5ad = CreateFragmentFile.Snap_metrics, + docker_path = docker_prefix + snap_atac_docker + } } } - + File bam_aligned_output_atac = select_first([BBTag.bb_bam, BWAPairedEndAlignment.bam_aligned_output]) File fragment_file_atac = select_first([BB_fragment.fragment_file, CreateFragmentFile.fragment_file]) File snap_metrics_atac = select_first([BB_fragment.Snap_metrics,CreateFragmentFile.Snap_metrics]) @@ -642,6 +645,7 @@ task CreateFragmentFile { } } +# peak calling using SnapATAC2 task PeakCalling { input { File bam From cf63ad1f3ca83054941842906efd9c497bb66797 Mon Sep 17 00:00:00 2001 From: aawdeh Date: Tue, 21 Jan 2025 10:29:17 -0500 Subject: [PATCH 45/91] add output --- pipelines/skylab/atac/atac.wdl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index 43b9e920f0..7941597e50 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -176,12 +176,16 @@ workflow ATAC { File fragment_file_atac = select_first([BB_fragment.fragment_file, CreateFragmentFile.fragment_file]) File snap_metrics_atac = select_first([BB_fragment.Snap_metrics,CreateFragmentFile.Snap_metrics]) File library_metrics = select_first([BB_fragment.atac_library_metrics, CreateFragmentFile.atac_library_metrics]) - + + # if peakcalling task not called set peakcall_h5ad to null + File peakcall_h5ad = if (peak_calling) PeakCalling.peaks_h5ad else snap_metrics_atac + output { File bam_aligned_output = bam_aligned_output_atac File fragment_file = fragment_file_atac File snap_metrics = snap_metrics_atac File library_metrics_file = library_metrics + File peakcall_h5ad_file = peakcall_h5ad } } @@ -639,7 +643,6 @@ task CreateFragmentFile { output { File fragment_file = "~{input_id}.fragments.sorted.tsv.gz" File fragment_file_index = "~{input_id}.fragments.sorted.tsv.gz.csi" - File Snap_metrics = "~{input_id}.metrics.h5ad" File atac_library_metrics = "~{input_id}_~{atac_nhash_id}_library_metrics.csv" } From 5e81613d7a61cc2b3283a7947e7c5ad8548d1d5c Mon Sep 17 00:00:00 2001 From: aawdeh Date: Tue, 21 Jan 2025 10:38:27 -0500 Subject: [PATCH 46/91] add output --- pipelines/skylab/atac/atac.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index 0fa2d09aff..9563fd0896 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -178,8 +178,8 @@ workflow ATAC { File library_metrics = select_first([BB_fragment.atac_library_metrics, CreateFragmentFile.atac_library_metrics]) # if peakcalling task not called set peakcall_h5ad to null - File peakcall_h5ad = if (peak_calling) PeakCalling.peaks_h5ad else snap_metrics_atac - + File peakcall_h5ad = (if (peak_calling) PeakCalling.peaks_h5ad else snap_metrics_atac) + output { File bam_aligned_output = bam_aligned_output_atac File fragment_file = fragment_file_atac From f0216ab5f4d3d0e4418a8afc10970547f6bb8517 Mon Sep 17 00:00:00 2001 From: aawdeh Date: Tue, 21 Jan 2025 10:47:10 -0500 Subject: [PATCH 47/91] test --- pipelines/skylab/atac/atac.wdl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index 9563fd0896..cbbcbf461f 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -178,8 +178,11 @@ workflow ATAC { File library_metrics = select_first([BB_fragment.atac_library_metrics, CreateFragmentFile.atac_library_metrics]) # if peakcalling task not called set peakcall_h5ad to null - File peakcall_h5ad = (if (peak_calling) PeakCalling.peaks_h5ad else snap_metrics_atac) - + File peakcall_h5ad = snap_metrics_atac + if (peak_calling) { + File peakcall_h5ad_file = PeakCalling.peaks_h5ad + } + output { File bam_aligned_output = bam_aligned_output_atac File fragment_file = fragment_file_atac From ef09ec90c283d99c78ad6e65678e321e25def90d Mon Sep 17 00:00:00 2001 From: aawdeh Date: Tue, 21 Jan 2025 10:51:46 -0500 Subject: [PATCH 48/91] test --- pipelines/skylab/atac/atac.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index cbbcbf461f..fae7e2bf8c 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -180,7 +180,7 @@ workflow ATAC { # if peakcalling task not called set peakcall_h5ad to null File peakcall_h5ad = snap_metrics_atac if (peak_calling) { - File peakcall_h5ad_file = PeakCalling.peaks_h5ad + File peakcall_h5ad = PeakCalling.peaks_h5ad } output { From d64ad4dfca29a50140bc916fba78db73258e0cb4 Mon Sep 17 00:00:00 2001 From: aawdeh Date: Tue, 21 Jan 2025 12:39:55 -0500 Subject: [PATCH 49/91] test --- pipelines/skylab/atac/atac.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index fae7e2bf8c..c633a348b8 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -180,7 +180,7 @@ workflow ATAC { # if peakcalling task not called set peakcall_h5ad to null File peakcall_h5ad = snap_metrics_atac if (peak_calling) { - File peakcall_h5ad = PeakCalling.peaks_h5ad + peakcall_h5ad = PeakCalling.peaks_h5ad } output { @@ -188,7 +188,7 @@ workflow ATAC { File fragment_file = fragment_file_atac File snap_metrics = snap_metrics_atac File library_metrics_file = library_metrics - File peakcall_h5ad_file = peakcall_h5ad + File? peakcall_h5ad_file = peakcall_h5ad } } From e3fe25fa55c09f8d5c9a516864c4718b9594ea00 Mon Sep 17 00:00:00 2001 From: aawdeh Date: Tue, 21 Jan 2025 12:42:32 -0500 Subject: [PATCH 50/91] test --- pipelines/skylab/atac/atac.wdl | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index c633a348b8..6a434061d5 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -176,19 +176,13 @@ workflow ATAC { File fragment_file_atac = select_first([BB_fragment.fragment_file, CreateFragmentFile.fragment_file]) File snap_metrics_atac = select_first([BB_fragment.Snap_metrics,CreateFragmentFile.Snap_metrics]) File library_metrics = select_first([BB_fragment.atac_library_metrics, CreateFragmentFile.atac_library_metrics]) - - # if peakcalling task not called set peakcall_h5ad to null - File peakcall_h5ad = snap_metrics_atac - if (peak_calling) { - peakcall_h5ad = PeakCalling.peaks_h5ad - } output { File bam_aligned_output = bam_aligned_output_atac File fragment_file = fragment_file_atac File snap_metrics = snap_metrics_atac File library_metrics_file = library_metrics - File? peakcall_h5ad_file = peakcall_h5ad + File? peakcall_h5ad_file = PeakCalling.peaks_h5ad } } From e6dad583dcff51b7470bc7b720988bedbc001f4f Mon Sep 17 00:00:00 2001 From: aawdeh Date: Tue, 21 Jan 2025 12:48:19 -0500 Subject: [PATCH 51/91] test --- pipelines/skylab/multiome/Multiome.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/pipelines/skylab/multiome/Multiome.wdl b/pipelines/skylab/multiome/Multiome.wdl index 1553513fd7..1c2e11f08b 100644 --- a/pipelines/skylab/multiome/Multiome.wdl +++ b/pipelines/skylab/multiome/Multiome.wdl @@ -149,6 +149,7 @@ workflow Multiome { File fragment_file_index = JoinBarcodes.atac_fragment_tsv_index File snap_metrics_atac = JoinBarcodes.atac_h5ad_file File atac_library_metrics = Atac.library_metrics_file + File? peak_call_atac = Atac.peakcall_h5ad_file # optimus outputs File genomic_reference_version_gex = Optimus.genomic_reference_version From 80e3fa1cdff2fa339a2ba094cd6ae0a869625eee Mon Sep 17 00:00:00 2001 From: aawdeh Date: Mon, 27 Jan 2025 15:07:36 -0500 Subject: [PATCH 52/91] Added matrix as output --- pipelines/skylab/atac/atac.wdl | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index 6a434061d5..971a98006a 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -167,6 +167,7 @@ workflow ATAC { bam = BWAPairedEndAlignment.bam_aligned_output, annotations_gtf = annotations_gtf, metrics_h5ad = CreateFragmentFile.Snap_metrics, + chrom_sizes = chrom_sizes, docker_path = docker_prefix + snap_atac_docker } } @@ -657,7 +658,8 @@ task PeakCalling { input { File bam File annotations_gtf - File metrics_h5ad + File metrics_h5ad + File chrom_sizes # SnapATAC2 parameters Int min_counts = 5000 Int min_tsse = 10 @@ -694,6 +696,7 @@ task PeakCalling { bam_base_name = "~{bam_base_name}" atac_gtf = "~{annotations_gtf}" metrics_h5ad = "~{metrics_h5ad}" + chrom_sizes = "~{chrom_sizes}" min_counts = "~{min_counts}" min_tsse = "~{min_tsse}" max_counts = "~{max_counts}" @@ -772,6 +775,9 @@ task PeakCalling { print("Peak calling using MACS3") snap.tl.macs3(atac_data_mod, groupby='leiden', n_jobs=1) + peaks = snap.tl.merge_peaks(atac_data_mod.uns['macs3'], chrom_sizes) + peak_matrix = snap.pp.make_peak_matrix(atac_data_mod, use_rep=peaks['Peaks']) + print("Convert pl.DataFrame to pandas DataFrame") # Convert pl.DataFrame to pandas DataFrame for key in atac_data_mod.uns.keys(): @@ -781,6 +787,7 @@ task PeakCalling { print("Write into h5ad file") atac_data_mod.write_h5ad("~{bam_base_name}.peaks.h5ad") + peak_matrix.write_h5ad("~{bam_base_name}.matrix.h5ad") print("test") CODE @@ -795,6 +802,6 @@ task PeakCalling { output { File peaks_h5ad = "~{bam_base_name}.peaks.h5ad" + File matrix_h5ad = "~{bam_base_name}.matrix.h5ad" } - } From b3ebae925351d439a84ed1e44675621badd0ffda Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Mon, 27 Jan 2025 20:08:33 +0000 Subject: [PATCH 53/91] Updated pipeline_versions.txt with all pipeline version information --- pipeline_versions.txt | 50 +++++++++++++++++++++---------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/pipeline_versions.txt b/pipeline_versions.txt index 85b1dbb384..745d09057e 100644 --- a/pipeline_versions.txt +++ b/pipeline_versions.txt @@ -1,40 +1,40 @@ Pipeline Name Version Date of Last Commit -ExomeReprocessing 3.3.3 2024-11-04 -CramToUnmappedBams 1.1.3 2024-08-02 +IlluminaGenotypingArray 1.12.24 2024-11-04 WholeGenomeReprocessing 3.3.3 2024-11-04 -ExternalExomeReprocessing 3.3.3 2024-11-04 ExternalWholeGenomeReprocessing 2.3.3 2024-11-04 -UltimaGenomicsJointGenotyping 1.2.2 2024-11-04 -ReblockGVCF 2.4.0 2024-12-05 -JointGenotypingByChromosomePartOne 1.5.2 2024-11-04 -JointGenotypingByChromosomePartTwo 1.5.2 2024-11-04 -JointGenotyping 1.7.2 2024-11-04 -ExomeGermlineSingleSample 3.2.3 2024-11-04 +ExternalExomeReprocessing 3.3.3 2024-11-04 +CramToUnmappedBams 1.1.3 2024-08-02 +ExomeReprocessing 3.3.3 2024-11-04 +GDCWholeGenomeSomaticSingleSample 1.3.4 2024-11-04 +UltimaGenomicsWholeGenomeCramOnly 1.0.23 2024-11-04 WholeGenomeGermlineSingleSample 3.3.3 2024-11-04 UltimaGenomicsWholeGenomeGermline 1.1.3 2024-12-05 +ExomeGermlineSingleSample 3.2.3 2024-11-04 +JointGenotyping 1.7.2 2024-11-04 +ReblockGVCF 2.4.0 2024-12-05 +UltimaGenomicsJointGenotyping 1.2.2 2024-11-04 +JointGenotypingByChromosomePartTwo 1.5.2 2024-11-04 +JointGenotypingByChromosomePartOne 1.5.2 2024-11-04 VariantCalling 2.2.4 2024-11-04 -GDCWholeGenomeSomaticSingleSample 1.3.4 2024-11-04 -UltimaGenomicsWholeGenomeCramOnly 1.0.23 2024-11-04 -IlluminaGenotypingArray 1.12.24 2024-11-04 -AnnotationFiltration 1.2.7 2024-11-04 CheckFingerprint 1.0.22 2024-10-28 -ValidateChip 1.16.7 2024-11-04 -Imputation 1.1.15 2024-11-04 -MultiSampleArrays 1.6.2 2024-08-02 -Arrays 2.6.30 2024-11-04 +RNAWithUMIsPipeline 1.0.18 2024-11-04 BroadInternalUltimaGenomics 1.1.3 2024-12-05 +BroadInternalRNAWithUMIs 1.0.36 2024-11-04 BroadInternalImputation 1.1.14 2024-11-04 BroadInternalArrays 1.1.14 2024-11-04 -BroadInternalRNAWithUMIs 1.0.36 2024-11-04 -RNAWithUMIsPipeline 1.0.18 2024-11-04 +Imputation 1.1.15 2024-11-04 +MultiSampleArrays 1.6.2 2024-08-02 +ValidateChip 1.16.7 2024-11-04 +Arrays 2.6.30 2024-11-04 +AnnotationFiltration 1.2.7 2024-11-04 Multiome 5.9.6 2025-01-21 -MultiSampleSmartSeq2SingleNucleus 2.0.7 2025-01-13 -BuildIndices 4.0.0 2025-01-17 +snm3C 4.0.4 2024-08-06 SlideSeq 3.4.8 2025-01-13 -PairedTag 1.10.0 2025-01-21 -atac 2.6.0 2025-01-21 scATAC 1.3.2 2023-08-03 -snm3C 4.0.4 2024-08-06 -Optimus 7.9.1 2025-01-13 +BuildIndices 4.0.0 2025-01-17 MultiSampleSmartSeq2 2.2.22 2024-09-11 +Optimus 7.9.1 2025-01-13 +atac 2.6.0 2025-01-21 +PairedTag 1.10.0 2025-01-21 SmartSeq2SingleSample 5.1.21 2024-09-11 +MultiSampleSmartSeq2SingleNucleus 2.0.7 2025-01-13 From 9e4916aaab19ee0e45aae955efb18aa24fbaaccc Mon Sep 17 00:00:00 2001 From: aawdeh Date: Mon, 27 Jan 2025 15:09:32 -0500 Subject: [PATCH 54/91] Added matrix as output --- pipelines/skylab/atac/atac.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index e159a9c855..4883a27ec7 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -777,8 +777,10 @@ task PeakCalling { print("Peak calling using MACS3") snap.tl.macs3(atac_data_mod, groupby='leiden', n_jobs=1) + print("Merge peaks and create peak matrix") peaks = snap.tl.merge_peaks(atac_data_mod.uns['macs3'], chrom_sizes) peak_matrix = snap.pp.make_peak_matrix(atac_data_mod, use_rep=peaks['Peaks']) + print(type(peak_matrix)) print("Convert pl.DataFrame to pandas DataFrame") # Convert pl.DataFrame to pandas DataFrame From cd7c100633351128395c08ef27824137621be851 Mon Sep 17 00:00:00 2001 From: aawdeh Date: Mon, 27 Jan 2025 16:23:21 -0500 Subject: [PATCH 55/91] Added matrix as output --- pipelines/skylab/atac/atac.wdl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index 4883a27ec7..bfce85f386 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -778,7 +778,11 @@ task PeakCalling { snap.tl.macs3(atac_data_mod, groupby='leiden', n_jobs=1) print("Merge peaks and create peak matrix") - peaks = snap.tl.merge_peaks(atac_data_mod.uns['macs3'], chrom_sizes) + # read chrom sizes + chromsize_dict = pd.read_csv(chrom_sizes, sep='\t', header=None) + chromsize_dict = pd.Series(mouse_chromsize[1].values, index=mouse_chromsize[0]).to_dict() + # merge peaks and create peak matrix + peaks = snap.tl.merge_peaks(atac_data_mod.uns['macs3'], chromsize_dict) peak_matrix = snap.pp.make_peak_matrix(atac_data_mod, use_rep=peaks['Peaks']) print(type(peak_matrix)) From d7486d98999f120cf41d230d5a16e3e79c54bf06 Mon Sep 17 00:00:00 2001 From: aawdeh Date: Tue, 28 Jan 2025 07:34:42 -0500 Subject: [PATCH 56/91] test --- pipelines/skylab/atac/atac.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index bfce85f386..75a38c0010 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -693,6 +693,7 @@ task PeakCalling { import scanpy as sc import numpy as np import polars as pl + import pandas as pd bam = "~{bam}" bam_base_name = "~{bam_base_name}" From 89824b03c9606c6cef4212dcb19ea7091c062251 Mon Sep 17 00:00:00 2001 From: aawdeh Date: Tue, 28 Jan 2025 08:23:12 -0500 Subject: [PATCH 57/91] test --- pipelines/skylab/atac/atac.wdl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index 75a38c0010..35a2676e58 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -781,11 +781,13 @@ task PeakCalling { print("Merge peaks and create peak matrix") # read chrom sizes chromsize_dict = pd.read_csv(chrom_sizes, sep='\t', header=None) - chromsize_dict = pd.Series(mouse_chromsize[1].values, index=mouse_chromsize[0]).to_dict() + chromsize_dict = pd.Series(chromsize_dict[1].values, index=chromsize_dict[0]).to_dict() # merge peaks and create peak matrix peaks = snap.tl.merge_peaks(atac_data_mod.uns['macs3'], chromsize_dict) + print(peaks) peak_matrix = snap.pp.make_peak_matrix(atac_data_mod, use_rep=peaks['Peaks']) print(type(peak_matrix)) + print(peak_matrix) print("Convert pl.DataFrame to pandas DataFrame") # Convert pl.DataFrame to pandas DataFrame From aee56ecf1b720d32e7b34144875a0c1d957ed51b Mon Sep 17 00:00:00 2001 From: aawdeh Date: Tue, 28 Jan 2025 14:09:37 -0500 Subject: [PATCH 58/91] remove comments --- pipelines/skylab/atac/atac.wdl | 3 --- 1 file changed, 3 deletions(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index 35a2676e58..4ad785eeec 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -784,10 +784,7 @@ task PeakCalling { chromsize_dict = pd.Series(chromsize_dict[1].values, index=chromsize_dict[0]).to_dict() # merge peaks and create peak matrix peaks = snap.tl.merge_peaks(atac_data_mod.uns['macs3'], chromsize_dict) - print(peaks) peak_matrix = snap.pp.make_peak_matrix(atac_data_mod, use_rep=peaks['Peaks']) - print(type(peak_matrix)) - print(peak_matrix) print("Convert pl.DataFrame to pandas DataFrame") # Convert pl.DataFrame to pandas DataFrame From 83fbe0506f19cf58211e5bf38ed112e5ed74cd52 Mon Sep 17 00:00:00 2001 From: aawdeh Date: Fri, 31 Jan 2025 09:46:05 -0500 Subject: [PATCH 59/91] update changelogs/versions --- pipelines/skylab/atac/atac.changelog.md | 4 ++++ pipelines/skylab/atac/atac.wdl | 2 +- pipelines/skylab/multiome/Multiome.changelog.md | 4 ++++ pipelines/skylab/multiome/Multiome.wdl | 7 +++++-- pipelines/skylab/paired_tag/PairedTag.changelog.md | 4 ++++ pipelines/skylab/paired_tag/PairedTag.wdl | 2 +- 6 files changed, 19 insertions(+), 4 deletions(-) diff --git a/pipelines/skylab/atac/atac.changelog.md b/pipelines/skylab/atac/atac.changelog.md index d47665f255..8145cb8761 100644 --- a/pipelines/skylab/atac/atac.changelog.md +++ b/pipelines/skylab/atac/atac.changelog.md @@ -1,3 +1,7 @@ +# 2.6.1 +2025-01-31 (Date of Last Commit) +* Added an optional PeakCalling task. Added a boolean variable run_peak_calling; default is false. + # 2.6.0 2025-01-21 (Date of Last Commit) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index 4ad785eeec..47383df63a 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -51,7 +51,7 @@ workflow ATAC { String adapter_seq_read3 = "TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG" } - String pipeline_version = "2.6.0" + String pipeline_version = "2.6.1" # Determine docker prefix based on cloud provider String gcr_docker_prefix = "us.gcr.io/broad-gotc-prod/" diff --git a/pipelines/skylab/multiome/Multiome.changelog.md b/pipelines/skylab/multiome/Multiome.changelog.md index c73565dbbc..e870007734 100644 --- a/pipelines/skylab/multiome/Multiome.changelog.md +++ b/pipelines/skylab/multiome/Multiome.changelog.md @@ -1,3 +1,7 @@ +# 5.9.7 +2025-01-31 (Date of Last Commit) +* Added an optional PeakCalling task to the ATAC workflow. Added a boolean variable run_peak_calling to the Multiome pipeline; default is false. + # 5.9.6 2025-01-21 (Date of Last Commit) diff --git a/pipelines/skylab/multiome/Multiome.wdl b/pipelines/skylab/multiome/Multiome.wdl index 6cf6b92e4e..d6d97a0f84 100644 --- a/pipelines/skylab/multiome/Multiome.wdl +++ b/pipelines/skylab/multiome/Multiome.wdl @@ -7,7 +7,7 @@ import "../../../tasks/broad/Utilities.wdl" as utils workflow Multiome { - String pipeline_version = "5.9.6" + String pipeline_version = "5.9.7" input { String cloud_provider @@ -50,6 +50,8 @@ workflow Multiome { # CellBender Boolean run_cellbender = false + # Peak Calling + Boolean run_peak_calling = false } # Determine docker prefix based on cloud provider @@ -121,7 +123,8 @@ workflow Multiome { annotations_gtf = annotations_gtf, atac_nhash_id = atac_nhash_id, adapter_seq_read3 = adapter_seq_read3, - atac_expected_cells = expected_cells + atac_expected_cells = expected_cells, + peak_calling = run_peak_calling } call H5adUtils.JoinMultiomeBarcodes as JoinBarcodes { input: diff --git a/pipelines/skylab/paired_tag/PairedTag.changelog.md b/pipelines/skylab/paired_tag/PairedTag.changelog.md index d26db399ff..1bc923a3cd 100644 --- a/pipelines/skylab/paired_tag/PairedTag.changelog.md +++ b/pipelines/skylab/paired_tag/PairedTag.changelog.md @@ -1,3 +1,7 @@ +# 1.10.1 +2025-01-31 (Date of Last Commit) +* Added an optional PeakCalling task to the ATAC workflow. Added a boolean variable run_peak_calling to the Multiome pipeline; default is false. This does not affect the outputs of the pipeline + # 1.10.0 2025-01-21 (Date of Last Commit) diff --git a/pipelines/skylab/paired_tag/PairedTag.wdl b/pipelines/skylab/paired_tag/PairedTag.wdl index fdf942bf90..05d1b74b96 100644 --- a/pipelines/skylab/paired_tag/PairedTag.wdl +++ b/pipelines/skylab/paired_tag/PairedTag.wdl @@ -8,7 +8,7 @@ import "../../../tasks/broad/Utilities.wdl" as utils workflow PairedTag { - String pipeline_version = "1.10.0" + String pipeline_version = "1.10.1" input { From a0dd316c26238a85928eaa8f7164c2e881d8bf73 Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Fri, 31 Jan 2025 14:46:42 +0000 Subject: [PATCH 60/91] Updated pipeline_versions.txt with all pipeline version information --- pipeline_versions.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pipeline_versions.txt b/pipeline_versions.txt index 745d09057e..f2cfe20fed 100644 --- a/pipeline_versions.txt +++ b/pipeline_versions.txt @@ -27,14 +27,14 @@ MultiSampleArrays 1.6.2 2024-08-02 ValidateChip 1.16.7 2024-11-04 Arrays 2.6.30 2024-11-04 AnnotationFiltration 1.2.7 2024-11-04 -Multiome 5.9.6 2025-01-21 +Multiome 5.9.7 2025-01-31 snm3C 4.0.4 2024-08-06 SlideSeq 3.4.8 2025-01-13 scATAC 1.3.2 2023-08-03 BuildIndices 4.0.0 2025-01-17 MultiSampleSmartSeq2 2.2.22 2024-09-11 Optimus 7.9.1 2025-01-13 -atac 2.6.0 2025-01-21 -PairedTag 1.10.0 2025-01-21 +atac 2.6.1 2025-01-31 +PairedTag 1.10.1 2025-01-31 SmartSeq2SingleSample 5.1.21 2024-09-11 MultiSampleSmartSeq2SingleNucleus 2.0.7 2025-01-13 From 56806372f6c297f2a1664399226eb1a22763f9d4 Mon Sep 17 00:00:00 2001 From: aawdeh Date: Fri, 31 Jan 2025 14:35:16 -0500 Subject: [PATCH 61/91] documenation --- website/docs/Pipelines/ATAC/README.md | 8 ++++++-- website/docs/Pipelines/Multiome_Pipeline/README.md | 1 + 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/website/docs/Pipelines/ATAC/README.md b/website/docs/Pipelines/ATAC/README.md index 86d3eaab3e..2651b080f4 100644 --- a/website/docs/Pipelines/ATAC/README.md +++ b/website/docs/Pipelines/ATAC/README.md @@ -65,6 +65,7 @@ The following describes the inputs of the ATAC workflow. For more details on how | adapter_seq_read3 | TrimAdapters input: Sequence adapter for read 3 fastq. | | vm_size | String defining the Azure virtual machine family for the workflow (default: "Standard_M128s"). | atac_nhash_id | String that represents an optional library aliquot identifier. When used, it is echoed in the h5ad unstructured data. | +| peak_calling | Optional boolean used to determine if the ATAC pipeline should run Peak Calling; default is "false". | Boolean | ## ATAC tasks and tools @@ -86,7 +87,7 @@ To see specific tool parameters, select the task WDL link in the table; then vie | [TrimAdapters](https://github.com/broadinstitute/warp/blob/master/pipelines/skylab/atac/atac.wdl) | Cutadapt v4.4 | cutadapt | Trims adaptor sequences. | | [BWAPairedEndAlignment](https://github.com/broadinstitute/warp/blob/master/pipelines/skylab/atac/atac.wdl) | bwa-mem2 | mem | Aligns reads from each set of partitioned FASTQ files to the genome and outputs a BAM with ATAC barcodes in the CB:Z tag. | | [CreateFragmentFile](https://github.com/broadinstitute/warp/blob/master/pipelines/skylab/atac/atac.wdl) | make_fragment_file, import_data | SnapATAC2 | Generates a fragment file from the final aligned BAM and outputs per barcode quality metrics in h5ad. A detailed list of these metrics is found in the [ATAC Count Matrix Overview](./count-matrix-overview.md). | - +| [PeakCalling](https://github.com/broadinstitute/warp/blob/master/pipelines/skylab/atac/atac.wdl) | macs3 | SnapATAC2 | Generates two h5ad files (`peaks.h5ad` and `matrix.h5ad`) from the CreateFragmentFile h5ad output file (`metrics.h5ad`). The `peaks.h5ad` contains the peak called per cluster in the macs3 unstructured metadata and `matrix.h5ad` contains the merged peaks and the count matrix of peaks per fragment. A detailed list of these metrics is found in the [ATAC Count Matrix Overview](./count-matrix-overview.md). | ## Output variables @@ -95,7 +96,10 @@ To see specific tool parameters, select the task WDL link in the table; then vie | bam_aligned_output | ``.bam | BAM containing aligned reads from ATAC workflow. | | fragment_file | ``.fragments.sorted.tsv.gz | Bgzipped TSV containing fragment start and stop coordinates per barcode. In order, the columns are "Chromosome", "Start", "Stop", "ATAC Barcode", and "Number Reads". | | snap_metrics | ``_`_library_metrics.csv | CSV file containing library-level metrics. Read more in the [Library Metrics Overview](library-metrics.md) + library_metrics | ``_`_library_metrics.csv | CSV file containing library-level metrics. Read more in the [Library Metrics Overview](library-metrics.md) | +| snap_peaks | ` Date: Fri, 31 Jan 2025 14:46:47 -0500 Subject: [PATCH 62/91] rename --- pipelines/skylab/atac/atac.wdl | 9 ++++----- website/docs/Pipelines/ATAC/README.md | 7 +++---- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index 47383df63a..ff34a7239d 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -794,9 +794,8 @@ task PeakCalling { atac_data_mod.uns[key] = atac_data_mod.uns[key].to_pandas() print("Write into h5ad file") - atac_data_mod.write_h5ad("~{bam_base_name}.peaks.h5ad") - peak_matrix.write_h5ad("~{bam_base_name}.matrix.h5ad") - print("test") + atac_data_mod.write_h5ad("~{bam_base_name}.cellbybin.h5ad") + peak_matrix.write_h5ad("~{bam_base_name}.cellbypeak.h5ad") CODE >>> @@ -809,7 +808,7 @@ task PeakCalling { } output { - File peaks_h5ad = "~{bam_base_name}.peaks.h5ad" - File matrix_h5ad = "~{bam_base_name}.matrix.h5ad" + File peaks_h5ad = "~{bam_base_name}.cellbybin.h5ad" + File matrix_h5ad = "~{bam_base_name}.cellbypeak.h5ad" } } diff --git a/website/docs/Pipelines/ATAC/README.md b/website/docs/Pipelines/ATAC/README.md index 2651b080f4..f3d7bc6767 100644 --- a/website/docs/Pipelines/ATAC/README.md +++ b/website/docs/Pipelines/ATAC/README.md @@ -87,7 +87,7 @@ To see specific tool parameters, select the task WDL link in the table; then vie | [TrimAdapters](https://github.com/broadinstitute/warp/blob/master/pipelines/skylab/atac/atac.wdl) | Cutadapt v4.4 | cutadapt | Trims adaptor sequences. | | [BWAPairedEndAlignment](https://github.com/broadinstitute/warp/blob/master/pipelines/skylab/atac/atac.wdl) | bwa-mem2 | mem | Aligns reads from each set of partitioned FASTQ files to the genome and outputs a BAM with ATAC barcodes in the CB:Z tag. | | [CreateFragmentFile](https://github.com/broadinstitute/warp/blob/master/pipelines/skylab/atac/atac.wdl) | make_fragment_file, import_data | SnapATAC2 | Generates a fragment file from the final aligned BAM and outputs per barcode quality metrics in h5ad. A detailed list of these metrics is found in the [ATAC Count Matrix Overview](./count-matrix-overview.md). | -| [PeakCalling](https://github.com/broadinstitute/warp/blob/master/pipelines/skylab/atac/atac.wdl) | macs3 | SnapATAC2 | Generates two h5ad files (`peaks.h5ad` and `matrix.h5ad`) from the CreateFragmentFile h5ad output file (`metrics.h5ad`). The `peaks.h5ad` contains the peak called per cluster in the macs3 unstructured metadata and `matrix.h5ad` contains the merged peaks and the count matrix of peaks per fragment. A detailed list of these metrics is found in the [ATAC Count Matrix Overview](./count-matrix-overview.md). | +| [PeakCalling](https://github.com/broadinstitute/warp/blob/master/pipelines/skylab/atac/atac.wdl) | macs3 | SnapATAC2 | Generates two h5ad files (`cellbybin.h5ad` and `cellbypeak.h5ad`) from the CreateFragmentFile h5ad output file (`metrics.h5ad`). The `peaks.h5ad` contains the peak called per cluster in the macs3 unstructured metadata and `matrix.h5ad` contains the merged peaks and the count matrix of peaks per fragment. A detailed list of these metrics is found in the [ATAC Count Matrix Overview](./count-matrix-overview.md). | ## Output variables @@ -97,9 +97,8 @@ To see specific tool parameters, select the task WDL link in the table; then vie | fragment_file | ``.fragments.sorted.tsv.gz | Bgzipped TSV containing fragment start and stop coordinates per barcode. In order, the columns are "Chromosome", "Start", "Stop", "ATAC Barcode", and "Number Reads". | | snap_metrics | ``_`_library_metrics.csv | CSV file containing library-level metrics. Read more in the [Library Metrics Overview](library-metrics.md) | -| snap_peaks | ` Date: Fri, 31 Jan 2025 14:59:44 -0500 Subject: [PATCH 63/91] rename --- pipelines/skylab/atac/atac.wdl | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index ff34a7239d..3ed3228f25 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -658,7 +658,6 @@ task CreateFragmentFile { # peak calling using SnapATAC2 task PeakCalling { input { - File bam File annotations_gtf File metrics_h5ad File chrom_sizes @@ -672,7 +671,7 @@ task PeakCalling { Int mem_size = 64 Int nthreads = 4 } - String bam_base_name = basename(bam, ".bam") + String base_name = basename(metrics_h5ad, ".h5ad") parameter_meta { bam: "Aligned bam with CB in CB tag. This is the output of the BWAPairedEndAlignment task." @@ -695,8 +694,7 @@ task PeakCalling { import polars as pl import pandas as pd - bam = "~{bam}" - bam_base_name = "~{bam_base_name}" + base_name = "~{base_name}" atac_gtf = "~{annotations_gtf}" metrics_h5ad = "~{metrics_h5ad}" chrom_sizes = "~{chrom_sizes}" @@ -794,8 +792,8 @@ task PeakCalling { atac_data_mod.uns[key] = atac_data_mod.uns[key].to_pandas() print("Write into h5ad file") - atac_data_mod.write_h5ad("~{bam_base_name}.cellbybin.h5ad") - peak_matrix.write_h5ad("~{bam_base_name}.cellbypeak.h5ad") + atac_data_mod.write_h5ad("~{base_name}.cellbybin.h5ad") + peak_matrix.write_h5ad("~{base_name}.cellbypeak.h5ad") CODE >>> @@ -808,7 +806,7 @@ task PeakCalling { } output { - File peaks_h5ad = "~{bam_base_name}.cellbybin.h5ad" - File matrix_h5ad = "~{bam_base_name}.cellbypeak.h5ad" + File peaks_h5ad = "~{base_name}.cellbybin.h5ad" + File matrix_h5ad = "~{base_name}.cellbypeak.h5ad" } } From c664618367be41e10d321cb859a8115c7a5cfbc0 Mon Sep 17 00:00:00 2001 From: aawdeh Date: Fri, 31 Jan 2025 15:05:35 -0500 Subject: [PATCH 64/91] rename --- pipelines/skylab/atac/atac.wdl | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index 3ed3228f25..98c1a5b990 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -661,6 +661,8 @@ task PeakCalling { File annotations_gtf File metrics_h5ad File chrom_sizes + String output_base_name + # SnapATAC2 parameters Int min_counts = 5000 Int min_tsse = 10 @@ -671,7 +673,6 @@ task PeakCalling { Int mem_size = 64 Int nthreads = 4 } - String base_name = basename(metrics_h5ad, ".h5ad") parameter_meta { bam: "Aligned bam with CB in CB tag. This is the output of the BWAPairedEndAlignment task." @@ -694,7 +695,7 @@ task PeakCalling { import polars as pl import pandas as pd - base_name = "~{base_name}" + base_name = "~{output_base_name}" atac_gtf = "~{annotations_gtf}" metrics_h5ad = "~{metrics_h5ad}" chrom_sizes = "~{chrom_sizes}" @@ -710,7 +711,7 @@ task PeakCalling { snap.pl.frag_size_distr(atac_data) print(atac_data) - # Filter cells -- Need to parameterize + # Filter cells print("Filtering cells") snap.pp.filter_cells(atac_data, min_counts=min_counts, min_tsse=min_tsse, max_counts=max_counts) print(atac_data) @@ -792,8 +793,8 @@ task PeakCalling { atac_data_mod.uns[key] = atac_data_mod.uns[key].to_pandas() print("Write into h5ad file") - atac_data_mod.write_h5ad("~{base_name}.cellbybin.h5ad") - peak_matrix.write_h5ad("~{base_name}.cellbypeak.h5ad") + atac_data_mod.write_h5ad("~{output_base_name}.cellbybin.h5ad") + peak_matrix.write_h5ad("~{output_base_name}.cellbypeak.h5ad") CODE >>> @@ -806,7 +807,7 @@ task PeakCalling { } output { - File peaks_h5ad = "~{base_name}.cellbybin.h5ad" - File matrix_h5ad = "~{base_name}.cellbypeak.h5ad" + File peaks_h5ad = "~{output_base_name}.cellbybin.h5ad" + File matrix_h5ad = "~{output_base_name}.cellbypeak.h5ad" } } From 328ec9a0362f819317ece5d111d2941dd6f33359 Mon Sep 17 00:00:00 2001 From: aawdeh Date: Fri, 31 Jan 2025 15:24:02 -0500 Subject: [PATCH 65/91] rename --- pipelines/skylab/atac/atac.wdl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index 98c1a5b990..10cfb5af66 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -164,7 +164,6 @@ workflow ATAC { if (peak_calling) { call PeakCalling { input: - bam = BWAPairedEndAlignment.bam_aligned_output, annotations_gtf = annotations_gtf, metrics_h5ad = CreateFragmentFile.Snap_metrics, chrom_sizes = chrom_sizes, @@ -793,8 +792,8 @@ task PeakCalling { atac_data_mod.uns[key] = atac_data_mod.uns[key].to_pandas() print("Write into h5ad file") - atac_data_mod.write_h5ad("~{output_base_name}.cellbybin.h5ad") - peak_matrix.write_h5ad("~{output_base_name}.cellbypeak.h5ad") + atac_data_mod.write_h5ad("~{base_name}.cellbybin.h5ad") + peak_matrix.write_h5ad("~{base_name}.cellbypeak.h5ad") CODE >>> From 3ee54aa9024aa09624c9486ac86f9c4f1b2c02d9 Mon Sep 17 00:00:00 2001 From: aawdeh Date: Mon, 3 Feb 2025 09:18:47 -0500 Subject: [PATCH 66/91] fix --- pipelines/skylab/atac/atac.wdl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index 10cfb5af66..d6c3d29579 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -694,7 +694,7 @@ task PeakCalling { import polars as pl import pandas as pd - base_name = "~{output_base_name}" + output_base_name = "~{output_base_name}" atac_gtf = "~{annotations_gtf}" metrics_h5ad = "~{metrics_h5ad}" chrom_sizes = "~{chrom_sizes}" @@ -792,8 +792,8 @@ task PeakCalling { atac_data_mod.uns[key] = atac_data_mod.uns[key].to_pandas() print("Write into h5ad file") - atac_data_mod.write_h5ad("~{base_name}.cellbybin.h5ad") - peak_matrix.write_h5ad("~{base_name}.cellbypeak.h5ad") + atac_data_mod.write_h5ad("~{output_base_name}.cellbybin.h5ad") + peak_matrix.write_h5ad("~{output_base_name}.cellbypeak.h5ad") CODE >>> @@ -806,7 +806,7 @@ task PeakCalling { } output { - File peaks_h5ad = "~{output_base_name}.cellbybin.h5ad" - File matrix_h5ad = "~{output_base_name}.cellbypeak.h5ad" + File cellbybin_h5ad = "~{output_base_name}.cellbybin.h5ad" + File cellbypeak_h5ad = "~{output_base_name}.cellbypeak.h5ad" } } From a3e99ceeb37a10547f9eb9c54fc4a7a936b75501 Mon Sep 17 00:00:00 2001 From: aawdeh Date: Mon, 3 Feb 2025 09:20:28 -0500 Subject: [PATCH 67/91] fix --- pipelines/skylab/atac/atac.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index d6c3d29579..9b62bf376c 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -184,7 +184,8 @@ workflow ATAC { File fragment_file_index = fragment_file_index_atac File snap_metrics = snap_metrics_atac File library_metrics_file = library_metrics - File? peakcall_h5ad_file = PeakCalling.peaks_h5ad + File? cellbybin_h5ad_file = PeakCalling.cellbybin_h5ad + File? cellbypeak_h5ad_file = PeakCalling.cellbypeak_h5ad } } From 581d03923eed61a02edbeb6d86850fef8ab6e346 Mon Sep 17 00:00:00 2001 From: aawdeh Date: Mon, 3 Feb 2025 09:32:14 -0500 Subject: [PATCH 68/91] fix --- pipelines/skylab/atac/atac.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index 9b62bf376c..d87c770279 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -164,6 +164,7 @@ workflow ATAC { if (peak_calling) { call PeakCalling { input: + output_base_name = input_id, annotations_gtf = annotations_gtf, metrics_h5ad = CreateFragmentFile.Snap_metrics, chrom_sizes = chrom_sizes, From b5f0d2fa17eef716fe7a501bfc2051557c64e1bc Mon Sep 17 00:00:00 2001 From: aawdeh Date: Mon, 3 Feb 2025 09:45:51 -0500 Subject: [PATCH 69/91] fix --- pipelines/skylab/multiome/Multiome.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pipelines/skylab/multiome/Multiome.wdl b/pipelines/skylab/multiome/Multiome.wdl index d6d97a0f84..46056dae5d 100644 --- a/pipelines/skylab/multiome/Multiome.wdl +++ b/pipelines/skylab/multiome/Multiome.wdl @@ -152,7 +152,8 @@ workflow Multiome { File fragment_file_index = JoinBarcodes.atac_fragment_tsv_index File snap_metrics_atac = JoinBarcodes.atac_h5ad_file File atac_library_metrics = Atac.library_metrics_file - File? peak_call_atac = Atac.peakcall_h5ad_file + File? cellbybin_h5ad_file = Atac.cellbybin_h5ad_file + File? cellbypeak_h5ad_file = Atac.cellbypeak_h5ad_file # optimus outputs File genomic_reference_version_gex = Optimus.genomic_reference_version From 9ca0202f39d0243ecf3d13bb2b79b671cb9523a4 Mon Sep 17 00:00:00 2001 From: aawdeh Date: Mon, 3 Feb 2025 09:46:37 -0500 Subject: [PATCH 70/91] fix --- pipelines/skylab/atac/atac.wdl | 1 - 1 file changed, 1 deletion(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index d87c770279..f3c00516f8 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -676,7 +676,6 @@ task PeakCalling { } parameter_meta { - bam: "Aligned bam with CB in CB tag. This is the output of the BWAPairedEndAlignment task." annotations_gtf: "GTF for SnapATAC2 to calculate TSS sites of fragment file." disk_size: "Disk size used in create fragment file step." mem_size: "The size of memory used in create fragment file." From d7b84de2c8ac52fbf5baf2047abff49f9c08f964 Mon Sep 17 00:00:00 2001 From: aawdeh Date: Mon, 3 Feb 2025 10:56:27 -0500 Subject: [PATCH 71/91] fix --- pipelines/skylab/atac/atac.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index f3c00516f8..2001fc875b 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -29,7 +29,7 @@ workflow ATAC { # Option for running files with preindex Boolean preindex = false # Option for running peak calling - Boolean peak_calling = true + Boolean peak_calling = false # BWA ref File tar_bwa_reference From e699564c217ccd99d5be0ec311317af99c07c590 Mon Sep 17 00:00:00 2001 From: aawdeh Date: Mon, 3 Feb 2025 11:05:21 -0500 Subject: [PATCH 72/91] add peak call json --- .../10k_pbmc_downsampled_peakcall.json | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 pipelines/skylab/multiome/test_inputs/Plumbing/10k_pbmc_downsampled_peakcall.json diff --git a/pipelines/skylab/multiome/test_inputs/Plumbing/10k_pbmc_downsampled_peakcall.json b/pipelines/skylab/multiome/test_inputs/Plumbing/10k_pbmc_downsampled_peakcall.json new file mode 100644 index 0000000000..a567f330b8 --- /dev/null +++ b/pipelines/skylab/multiome/test_inputs/Plumbing/10k_pbmc_downsampled_peakcall.json @@ -0,0 +1,31 @@ +{ + "Multiome.annotations_gtf":"gs://gcp-public-data--broad-references/hg38/v0/star/v2_7_10a/modified_v43.annotation.gtf", + "Multiome.input_id":"10k_PBMC_downsampled", + "Multiome.cloud_provider":"gcp", + "Multiome.gex_r1_fastq":[ + "gs://broad-gotc-test-storage/Multiome/input/plumbing/fastq_R1_gex.fastq.gz" + ], + "Multiome.gex_r2_fastq":[ + "gs://broad-gotc-test-storage/Multiome/input/plumbing/fastq_R2_gex.fastq.gz" + ], + "Multiome.atac_r1_fastq":[ + "gs://broad-gotc-test-storage/Multiome/input/plumbing/fastq_R1_atac.fastq.gz" + ], + "Multiome.atac_r2_fastq":[ + "gs://broad-gotc-test-storage/Multiome/input/plumbing/fastq_R2_atac.fastq.gz" + ], + "Multiome.atac_r3_fastq":[ + "gs://broad-gotc-test-storage/Multiome/input/plumbing/fastq_R3_atac.fastq.gz" + ], + "Multiome.tar_bwa_reference":"gs://gcp-public-data--broad-references/hg38/v0/bwa/v2_2_1/bwa-mem2-2.2.1-Human-GENCODE-build-GRCh38.tar", + "Multiome.tar_star_reference":"gs://gcp-public-data--broad-references/hg38/v0/star/v2_7_10a/modified_star2.7.10a-Human-GENCODE-build-GRCh38-43.tar", + "Multiome.chrom_sizes":"gs://broad-gotc-test-storage/Multiome/input/hg38.chrom.sizes", + "Multiome.run_cellbender":"false", + "Multiome.Atac.cpu_platform_bwa":"Intel Cascade Lake", + "Multiome.Atac.num_threads_bwa":"16", + "Multiome.Atac.mem_size_bwa":"64", + "Multiome.soloMultiMappers":"Uniform", + "Multiome.gex_nhash_id":"example_1234", + "Multiome.atac_nhash_id":"example_1234", + "Multiome.run_peak_calling":"true" +} From 0159899a6335b64d6b98ea79080d2da3f49e7e13 Mon Sep 17 00:00:00 2001 From: aawdeh Date: Mon, 3 Feb 2025 11:11:17 -0500 Subject: [PATCH 73/91] fix --- pipelines/skylab/multiome/Multiome.wdl | 3 ++- website/docs/Pipelines/ATAC/README.md | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/pipelines/skylab/multiome/Multiome.wdl b/pipelines/skylab/multiome/Multiome.wdl index 46056dae5d..7822f754e5 100644 --- a/pipelines/skylab/multiome/Multiome.wdl +++ b/pipelines/skylab/multiome/Multiome.wdl @@ -152,8 +152,9 @@ workflow Multiome { File fragment_file_index = JoinBarcodes.atac_fragment_tsv_index File snap_metrics_atac = JoinBarcodes.atac_h5ad_file File atac_library_metrics = Atac.library_metrics_file + File? peak_call_atac = Atac.peakcall_h5ad_file File? cellbybin_h5ad_file = Atac.cellbybin_h5ad_file - File? cellbypeak_h5ad_file = Atac.cellbypeak_h5ad_file + File? cellbypeak_h5ad_file = Atac.cellbypeak_h5ad # optimus outputs File genomic_reference_version_gex = Optimus.genomic_reference_version diff --git a/website/docs/Pipelines/ATAC/README.md b/website/docs/Pipelines/ATAC/README.md index f3d7bc6767..aedaf51ed5 100644 --- a/website/docs/Pipelines/ATAC/README.md +++ b/website/docs/Pipelines/ATAC/README.md @@ -87,7 +87,7 @@ To see specific tool parameters, select the task WDL link in the table; then vie | [TrimAdapters](https://github.com/broadinstitute/warp/blob/master/pipelines/skylab/atac/atac.wdl) | Cutadapt v4.4 | cutadapt | Trims adaptor sequences. | | [BWAPairedEndAlignment](https://github.com/broadinstitute/warp/blob/master/pipelines/skylab/atac/atac.wdl) | bwa-mem2 | mem | Aligns reads from each set of partitioned FASTQ files to the genome and outputs a BAM with ATAC barcodes in the CB:Z tag. | | [CreateFragmentFile](https://github.com/broadinstitute/warp/blob/master/pipelines/skylab/atac/atac.wdl) | make_fragment_file, import_data | SnapATAC2 | Generates a fragment file from the final aligned BAM and outputs per barcode quality metrics in h5ad. A detailed list of these metrics is found in the [ATAC Count Matrix Overview](./count-matrix-overview.md). | -| [PeakCalling](https://github.com/broadinstitute/warp/blob/master/pipelines/skylab/atac/atac.wdl) | macs3 | SnapATAC2 | Generates two h5ad files (`cellbybin.h5ad` and `cellbypeak.h5ad`) from the CreateFragmentFile h5ad output file (`metrics.h5ad`). The `peaks.h5ad` contains the peak called per cluster in the macs3 unstructured metadata and `matrix.h5ad` contains the merged peaks and the count matrix of peaks per fragment. A detailed list of these metrics is found in the [ATAC Count Matrix Overview](./count-matrix-overview.md). | +| [PeakCalling](https://github.com/broadinstitute/warp/blob/master/pipelines/skylab/atac/atac.wdl) | macs3 | SnapATAC2 | Generates two h5ad files (`cellbybin.h5ad` and `cellbypeak.h5ad`) from the CreateFragmentFile h5ad output file (`metrics.h5ad`). The `cellbybin.h5ad` contains the peak called per cluster in the macs3 unstructured metadata and `cellbypeak.h5ad` contains the merged peaks and the count matrix of peaks per fragment. A detailed list of these metrics is found in the [ATAC Count Matrix Overview](./count-matrix-overview.md). | ## Output variables @@ -97,8 +97,8 @@ To see specific tool parameters, select the task WDL link in the table; then vie | fragment_file | ``.fragments.sorted.tsv.gz | Bgzipped TSV containing fragment start and stop coordinates per barcode. In order, the columns are "Chromosome", "Start", "Stop", "ATAC Barcode", and "Number Reads". | | snap_metrics | ``_`_library_metrics.csv | CSV file containing library-level metrics. Read more in the [Library Metrics Overview](library-metrics.md) | -| snap_peaks | ` Date: Mon, 3 Feb 2025 11:14:57 -0500 Subject: [PATCH 74/91] fix --- pipelines/skylab/atac/atac.changelog.md | 8 +++++--- pipelines/skylab/atac/atac.wdl | 2 +- pipelines/skylab/multiome/Multiome.changelog.md | 8 +++++--- pipelines/skylab/multiome/Multiome.wdl | 2 +- pipelines/skylab/paired_tag/PairedTag.changelog.md | 8 +++++--- pipelines/skylab/paired_tag/PairedTag.wdl | 2 +- 6 files changed, 18 insertions(+), 12 deletions(-) diff --git a/pipelines/skylab/atac/atac.changelog.md b/pipelines/skylab/atac/atac.changelog.md index 8145cb8761..636f57ac71 100644 --- a/pipelines/skylab/atac/atac.changelog.md +++ b/pipelines/skylab/atac/atac.changelog.md @@ -1,6 +1,8 @@ -# 2.6.1 -2025-01-31 (Date of Last Commit) -* Added an optional PeakCalling task. Added a boolean variable run_peak_calling; default is false. +# 2.7.0 +2025-02-03 (Date of Last Commit) + +* Added an optional PeakCalling task +* Added a boolean variable peak_calling; default is false # 2.6.0 2025-01-21 (Date of Last Commit) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index 2001fc875b..d9931517cb 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -51,7 +51,7 @@ workflow ATAC { String adapter_seq_read3 = "TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG" } - String pipeline_version = "2.6.1" + String pipeline_version = "2.7.0" # Determine docker prefix based on cloud provider String gcr_docker_prefix = "us.gcr.io/broad-gotc-prod/" diff --git a/pipelines/skylab/multiome/Multiome.changelog.md b/pipelines/skylab/multiome/Multiome.changelog.md index e870007734..2dd910c46a 100644 --- a/pipelines/skylab/multiome/Multiome.changelog.md +++ b/pipelines/skylab/multiome/Multiome.changelog.md @@ -1,6 +1,8 @@ -# 5.9.7 -2025-01-31 (Date of Last Commit) -* Added an optional PeakCalling task to the ATAC workflow. Added a boolean variable run_peak_calling to the Multiome pipeline; default is false. +# 5.10.0 +2025-02-03 (Date of Last Commit) + +* Added an optional PeakCalling task to the ATAC workflow +* Added a boolean variable run_peak_calling to the Multiome pipeline; default is false # 5.9.6 2025-01-21 (Date of Last Commit) diff --git a/pipelines/skylab/multiome/Multiome.wdl b/pipelines/skylab/multiome/Multiome.wdl index 7822f754e5..4309a1476c 100644 --- a/pipelines/skylab/multiome/Multiome.wdl +++ b/pipelines/skylab/multiome/Multiome.wdl @@ -7,7 +7,7 @@ import "../../../tasks/broad/Utilities.wdl" as utils workflow Multiome { - String pipeline_version = "5.9.7" + String pipeline_version = "5.10.0" input { String cloud_provider diff --git a/pipelines/skylab/paired_tag/PairedTag.changelog.md b/pipelines/skylab/paired_tag/PairedTag.changelog.md index 3c303dd3e9..a1bf71667b 100644 --- a/pipelines/skylab/paired_tag/PairedTag.changelog.md +++ b/pipelines/skylab/paired_tag/PairedTag.changelog.md @@ -1,6 +1,8 @@ -# 1.10.1 -2025-01-31 (Date of Last Commit) -* Added an optional PeakCalling task to the ATAC workflow. Added a boolean variable run_peak_calling to the Multiome pipeline; default is false. This does not affect the outputs of the pipeline +# 1.11.0 +2025-02-03 (Date of Last Commit) + +* Added an optional PeakCalling task to the ATAC workflow; this does not affect the outputs of the pipeline +* Added a boolean variable run_peak_calling to the Multiome pipeline; default is false and this does not affect the outputs of the pipeline # 1.10.0 2025-01-21 (Date of Last Commit) diff --git a/pipelines/skylab/paired_tag/PairedTag.wdl b/pipelines/skylab/paired_tag/PairedTag.wdl index 05d1b74b96..99a67dc365 100644 --- a/pipelines/skylab/paired_tag/PairedTag.wdl +++ b/pipelines/skylab/paired_tag/PairedTag.wdl @@ -8,7 +8,7 @@ import "../../../tasks/broad/Utilities.wdl" as utils workflow PairedTag { - String pipeline_version = "1.10.1" + String pipeline_version = "1.11.0" input { From c2e4649a1ab377cbbb7b2003f8a2f10eb1ae3851 Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Mon, 3 Feb 2025 16:15:23 +0000 Subject: [PATCH 75/91] Updated pipeline_versions.txt with all pipeline version information --- pipeline_versions.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pipeline_versions.txt b/pipeline_versions.txt index f2cfe20fed..74b548c199 100644 --- a/pipeline_versions.txt +++ b/pipeline_versions.txt @@ -27,14 +27,14 @@ MultiSampleArrays 1.6.2 2024-08-02 ValidateChip 1.16.7 2024-11-04 Arrays 2.6.30 2024-11-04 AnnotationFiltration 1.2.7 2024-11-04 -Multiome 5.9.7 2025-01-31 +Multiome 5.10.0 2025-02-03 snm3C 4.0.4 2024-08-06 SlideSeq 3.4.8 2025-01-13 scATAC 1.3.2 2023-08-03 BuildIndices 4.0.0 2025-01-17 MultiSampleSmartSeq2 2.2.22 2024-09-11 Optimus 7.9.1 2025-01-13 -atac 2.6.1 2025-01-31 -PairedTag 1.10.1 2025-01-31 +atac 2.7.0 2025-02-03 +PairedTag 1.11.0 2025-02-03 SmartSeq2SingleSample 5.1.21 2024-09-11 MultiSampleSmartSeq2SingleNucleus 2.0.7 2025-01-13 From 9dd8e2adfa98dfa61ba5308d97de7b96cfcf7132 Mon Sep 17 00:00:00 2001 From: aawdeh Date: Mon, 3 Feb 2025 11:19:50 -0500 Subject: [PATCH 76/91] fix --- website/docs/Pipelines/ATAC/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/website/docs/Pipelines/ATAC/README.md b/website/docs/Pipelines/ATAC/README.md index aedaf51ed5..6989fead44 100644 --- a/website/docs/Pipelines/ATAC/README.md +++ b/website/docs/Pipelines/ATAC/README.md @@ -97,8 +97,8 @@ To see specific tool parameters, select the task WDL link in the table; then vie | fragment_file | ``.fragments.sorted.tsv.gz | Bgzipped TSV containing fragment start and stop coordinates per barcode. In order, the columns are "Chromosome", "Start", "Stop", "ATAC Barcode", and "Number Reads". | | snap_metrics | ``_`_library_metrics.csv | CSV file containing library-level metrics. Read more in the [Library Metrics Overview](library-metrics.md) | -| snap_cellbybin | ``.cellbybin.h5ad | h5ad (Anndata) containing peaks (called by MACS3) per cluster [SnapATAC2](https://github.com/kaizhang/SnapATAC2). | +| snap_cellbypeak | ``.cellbypeak.h5ad | h5ad (Anndata) containing merged peaks (called by MACS3) per cluster and count matrix of insertion sites per peak and cell [SnapATAC2](https://github.com/kaizhang/SnapATAC2). ## Versioning and testing From 89189c0b34e3b344426e8e41c8708acb15ab5678 Mon Sep 17 00:00:00 2001 From: aawdeh Date: Mon, 3 Feb 2025 11:22:01 -0500 Subject: [PATCH 77/91] fix --- pipelines/skylab/paired_tag/PairedTag.changelog.md | 2 +- pipelines/skylab/paired_tag/PairedTag.wdl | 2 +- website/docs/Pipelines/ATAC/README.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pipelines/skylab/paired_tag/PairedTag.changelog.md b/pipelines/skylab/paired_tag/PairedTag.changelog.md index a1bf71667b..403d6ce5b5 100644 --- a/pipelines/skylab/paired_tag/PairedTag.changelog.md +++ b/pipelines/skylab/paired_tag/PairedTag.changelog.md @@ -1,4 +1,4 @@ -# 1.11.0 +# 1.10.1 2025-02-03 (Date of Last Commit) * Added an optional PeakCalling task to the ATAC workflow; this does not affect the outputs of the pipeline diff --git a/pipelines/skylab/paired_tag/PairedTag.wdl b/pipelines/skylab/paired_tag/PairedTag.wdl index 99a67dc365..05d1b74b96 100644 --- a/pipelines/skylab/paired_tag/PairedTag.wdl +++ b/pipelines/skylab/paired_tag/PairedTag.wdl @@ -8,7 +8,7 @@ import "../../../tasks/broad/Utilities.wdl" as utils workflow PairedTag { - String pipeline_version = "1.11.0" + String pipeline_version = "1.10.1" input { diff --git a/website/docs/Pipelines/ATAC/README.md b/website/docs/Pipelines/ATAC/README.md index 6989fead44..cd19ec4ac1 100644 --- a/website/docs/Pipelines/ATAC/README.md +++ b/website/docs/Pipelines/ATAC/README.md @@ -98,7 +98,7 @@ To see specific tool parameters, select the task WDL link in the table; then vie | snap_metrics | ``_`_library_metrics.csv | CSV file containing library-level metrics. Read more in the [Library Metrics Overview](library-metrics.md) | | snap_cellbybin | ``.cellbybin.h5ad | h5ad (Anndata) containing peaks (called by MACS3) per cluster [SnapATAC2](https://github.com/kaizhang/SnapATAC2). | -| snap_cellbypeak | ``.cellbypeak.h5ad | h5ad (Anndata) containing merged peaks (called by MACS3) per cluster and count matrix of insertion sites per peak and cell [SnapATAC2](https://github.com/kaizhang/SnapATAC2). +| snap_cellbypeak | ``.cellbypeak.h5ad | h5ad (Anndata) containing merged peaks (called by MACS3) per cluster and count matrix of insertion sites per peak and cell [SnapATAC2](https://github.com/kaizhang/SnapATAC2).| ## Versioning and testing From 86bee87f6ca63291f26da5341e25a7a2a72fc14f Mon Sep 17 00:00:00 2001 From: aawdeh Date: Mon, 3 Feb 2025 11:22:52 -0500 Subject: [PATCH 78/91] fix --- website/docs/Pipelines/ATAC/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/website/docs/Pipelines/ATAC/README.md b/website/docs/Pipelines/ATAC/README.md index cd19ec4ac1..7cfde275da 100644 --- a/website/docs/Pipelines/ATAC/README.md +++ b/website/docs/Pipelines/ATAC/README.md @@ -97,8 +97,8 @@ To see specific tool parameters, select the task WDL link in the table; then vie | fragment_file | ``.fragments.sorted.tsv.gz | Bgzipped TSV containing fragment start and stop coordinates per barcode. In order, the columns are "Chromosome", "Start", "Stop", "ATAC Barcode", and "Number Reads". | | snap_metrics | ``_`_library_metrics.csv | CSV file containing library-level metrics. Read more in the [Library Metrics Overview](library-metrics.md) | -| snap_cellbybin | ``.cellbybin.h5ad | h5ad (Anndata) containing peaks (called by MACS3) per cluster [SnapATAC2](https://github.com/kaizhang/SnapATAC2). | -| snap_cellbypeak | ``.cellbypeak.h5ad | h5ad (Anndata) containing merged peaks (called by MACS3) per cluster and count matrix of insertion sites per peak and cell [SnapATAC2](https://github.com/kaizhang/SnapATAC2).| +| snap_cellbybin | ``.cellbybin.h5ad | h5ad (Anndata) containing peaks (called by MACS3) per cluster. [SnapATAC2](https://github.com/kaizhang/SnapATAC2). | +| snap_cellbypeak | ``.cellbypeak.h5ad | h5ad (Anndata) containing merged peaks (called by MACS3) per cluster and count matrix of insertion sites per peak and cell. [SnapATAC2](https://github.com/kaizhang/SnapATAC2).| ## Versioning and testing From d2a77e712f9f4819d94b6d6c18505ee9457e190b Mon Sep 17 00:00:00 2001 From: aawdeh Date: Mon, 3 Feb 2025 11:24:08 -0500 Subject: [PATCH 79/91] fix --- website/docs/Pipelines/ATAC/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/docs/Pipelines/ATAC/README.md b/website/docs/Pipelines/ATAC/README.md index 7cfde275da..0d5ee42458 100644 --- a/website/docs/Pipelines/ATAC/README.md +++ b/website/docs/Pipelines/ATAC/README.md @@ -96,7 +96,7 @@ To see specific tool parameters, select the task WDL link in the table; then vie | bam_aligned_output | ``.bam | BAM containing aligned reads from ATAC workflow. | | fragment_file | ``.fragments.sorted.tsv.gz | Bgzipped TSV containing fragment start and stop coordinates per barcode. In order, the columns are "Chromosome", "Start", "Stop", "ATAC Barcode", and "Number Reads". | | snap_metrics | ``_`_library_metrics.csv | CSV file containing library-level metrics. Read more in the [Library Metrics Overview](library-metrics.md) | + library_metrics | ``_``_library_metrics.csv | CSV file containing library-level metrics. Read more in the [Library Metrics Overview](library-metrics.md) | | snap_cellbybin | ``.cellbybin.h5ad | h5ad (Anndata) containing peaks (called by MACS3) per cluster. [SnapATAC2](https://github.com/kaizhang/SnapATAC2). | | snap_cellbypeak | ``.cellbypeak.h5ad | h5ad (Anndata) containing merged peaks (called by MACS3) per cluster and count matrix of insertion sites per peak and cell. [SnapATAC2](https://github.com/kaizhang/SnapATAC2).| From 15950eca4a8769b9067b391ffeaf363305b1fb82 Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Mon, 3 Feb 2025 16:26:23 +0000 Subject: [PATCH 80/91] Updated pipeline_versions.txt with all pipeline version information --- pipeline_versions.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipeline_versions.txt b/pipeline_versions.txt index 74b548c199..0aafa27cf9 100644 --- a/pipeline_versions.txt +++ b/pipeline_versions.txt @@ -35,6 +35,6 @@ BuildIndices 4.0.0 2025-01-17 MultiSampleSmartSeq2 2.2.22 2024-09-11 Optimus 7.9.1 2025-01-13 atac 2.7.0 2025-02-03 -PairedTag 1.11.0 2025-02-03 +PairedTag 1.10.1 2025-02-03 SmartSeq2SingleSample 5.1.21 2024-09-11 MultiSampleSmartSeq2SingleNucleus 2.0.7 2025-01-13 From 049af9d93e5bbf11e26933fae751921dbf527b04 Mon Sep 17 00:00:00 2001 From: aawdeh Date: Mon, 3 Feb 2025 11:39:22 -0500 Subject: [PATCH 81/91] fix --- pipelines/skylab/multiome/Multiome.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/skylab/multiome/Multiome.wdl b/pipelines/skylab/multiome/Multiome.wdl index 4309a1476c..3b73b9b31a 100644 --- a/pipelines/skylab/multiome/Multiome.wdl +++ b/pipelines/skylab/multiome/Multiome.wdl @@ -154,7 +154,7 @@ workflow Multiome { File atac_library_metrics = Atac.library_metrics_file File? peak_call_atac = Atac.peakcall_h5ad_file File? cellbybin_h5ad_file = Atac.cellbybin_h5ad_file - File? cellbypeak_h5ad_file = Atac.cellbypeak_h5ad + File? cellbypeak_h5ad_file = Atac.cellbypeak_h5ad_file # optimus outputs File genomic_reference_version_gex = Optimus.genomic_reference_version From 1a02ba72c6143568546405e9ec34a4f44b6ae94a Mon Sep 17 00:00:00 2001 From: aawdeh Date: Mon, 3 Feb 2025 12:03:00 -0500 Subject: [PATCH 82/91] fix --- pipelines/skylab/multiome/Multiome.wdl | 1 - 1 file changed, 1 deletion(-) diff --git a/pipelines/skylab/multiome/Multiome.wdl b/pipelines/skylab/multiome/Multiome.wdl index 3b73b9b31a..d079e5c58a 100644 --- a/pipelines/skylab/multiome/Multiome.wdl +++ b/pipelines/skylab/multiome/Multiome.wdl @@ -152,7 +152,6 @@ workflow Multiome { File fragment_file_index = JoinBarcodes.atac_fragment_tsv_index File snap_metrics_atac = JoinBarcodes.atac_h5ad_file File atac_library_metrics = Atac.library_metrics_file - File? peak_call_atac = Atac.peakcall_h5ad_file File? cellbybin_h5ad_file = Atac.cellbybin_h5ad_file File? cellbypeak_h5ad_file = Atac.cellbypeak_h5ad_file From ba0982a433a46ec307e7cb734cecea9480539764 Mon Sep 17 00:00:00 2001 From: ekiernan Date: Mon, 3 Feb 2025 14:08:41 -0500 Subject: [PATCH 83/91] updating TestMultiome to run peak calling --- verification/test-wdls/TestMultiome.wdl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/verification/test-wdls/TestMultiome.wdl b/verification/test-wdls/TestMultiome.wdl index d2fcd0eb9a..e710bb1942 100644 --- a/verification/test-wdls/TestMultiome.wdl +++ b/verification/test-wdls/TestMultiome.wdl @@ -52,6 +52,7 @@ workflow TestMultiome { String vault_token_path String google_account_vault_path Boolean run_cellbender = false + Boolean run_peak_calling = false } @@ -86,7 +87,8 @@ workflow TestMultiome { soloMultiMappers = soloMultiMappers, cloud_provider = cloud_provider, gex_nhash_id = gex_nhash_id, - atac_nhash_id = atac_nhash_id + atac_nhash_id = atac_nhash_id, + run_peak_calling = run_peak_calling } From 1023faa2cf0cafd1ca8cea3c9026cf6f7f7203da Mon Sep 17 00:00:00 2001 From: Sid Cox Date: Mon, 3 Feb 2025 14:42:15 -0500 Subject: [PATCH 84/91] update expected_cells and parameterize probability thresholds --- pipelines/skylab/atac/atac.wdl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index d9931517cb..11ab27103e 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -559,7 +559,7 @@ task CreateFragmentFile { atac_gtf = "~{annotations_gtf}" preindex = "~{preindex}" atac_nhash_id = "~{atac_nhash_id}" - expected_cells = ~{atac_expected_cells} + expected_cells = "~{atac_expected_cells}" # calculate chrom size dictionary based on text file chrom_size_dict={} @@ -668,6 +668,7 @@ task PeakCalling { Int min_counts = 5000 Int min_tsse = 10 Int max_counts = 100000 + Float probability_threshold = 0.5 # Runtime attributes/docker String docker_path Int disk_size = 500 @@ -702,6 +703,7 @@ task PeakCalling { min_counts = "~{min_counts}" min_tsse = "~{min_tsse}" max_counts = "~{max_counts}" + probability_threshold = "~{probability_threshold}" print("Peak calling starting...") atac_data = snap.read(metrics_h5ad) From 09adf51c0944ee38ab13928b096b2818b6530a11 Mon Sep 17 00:00:00 2001 From: Sid Cox Date: Mon, 3 Feb 2025 14:47:49 -0500 Subject: [PATCH 85/91] expected cells as integer --- pipelines/skylab/atac/atac.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index 11ab27103e..24b3f103e0 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -559,7 +559,7 @@ task CreateFragmentFile { atac_gtf = "~{annotations_gtf}" preindex = "~{preindex}" atac_nhash_id = "~{atac_nhash_id}" - expected_cells = "~{atac_expected_cells}" + expected_cells = ~{atac_expected_cells} # calculate chrom size dictionary based on text file chrom_size_dict={} From 2681b75c29c46de271e3836b8e8d20c78245686b Mon Sep 17 00:00:00 2001 From: aawdeh Date: Tue, 4 Feb 2025 09:24:04 -0500 Subject: [PATCH 86/91] fix --- pipelines/skylab/atac/atac.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index 24b3f103e0..f39c8a77c8 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -746,7 +746,7 @@ task PeakCalling { # Filter doublets based on scrublet scores print("Filter doublets based on scrublet scores") - snap.pp.filter_doublets(atac_data_mod, probability_threshold=0.5) + snap.pp.filter_doublets(atac_data_mod, probability_threshold=0.01) print(atac_data_mod) # Perform graph-based clustering to identify cell clusters. From 6a5f83f7c88cc82e6637dd8d657fb620f6e54b79 Mon Sep 17 00:00:00 2001 From: Sid Cox Date: Tue, 4 Feb 2025 11:26:03 -0500 Subject: [PATCH 87/91] update probability_threshold --- pipelines/skylab/atac/atac.wdl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index f39c8a77c8..0f9826dde8 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -668,7 +668,8 @@ task PeakCalling { Int min_counts = 5000 Int min_tsse = 10 Int max_counts = 100000 - Float probability_threshold = 0.5 + Float probability_threshold = 0.01 + # Runtime attributes/docker String docker_path Int disk_size = 500 @@ -746,7 +747,7 @@ task PeakCalling { # Filter doublets based on scrublet scores print("Filter doublets based on scrublet scores") - snap.pp.filter_doublets(atac_data_mod, probability_threshold=0.01) + snap.pp.filter_doublets(atac_data_mod, probability_threshold=probability_threshold) print(atac_data_mod) # Perform graph-based clustering to identify cell clusters. From e1cdc1453c24776e3c5d92add2122b5856dce3e2 Mon Sep 17 00:00:00 2001 From: Sid Cox Date: Tue, 4 Feb 2025 13:14:19 -0500 Subject: [PATCH 88/91] remove quotes around probability_threshold --- pipelines/skylab/atac/atac.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index 0f9826dde8..bb41cb0688 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -675,7 +675,7 @@ task PeakCalling { Int disk_size = 500 Int mem_size = 64 Int nthreads = 4 - } + }gi parameter_meta { annotations_gtf: "GTF for SnapATAC2 to calculate TSS sites of fragment file." @@ -704,7 +704,7 @@ task PeakCalling { min_counts = "~{min_counts}" min_tsse = "~{min_tsse}" max_counts = "~{max_counts}" - probability_threshold = "~{probability_threshold}" + probability_threshold = ~{probability_threshold} print("Peak calling starting...") atac_data = snap.read(metrics_h5ad) From 92468efb74c7fce832f16da7c01940c1faf6f743 Mon Sep 17 00:00:00 2001 From: Sid Cox Date: Tue, 4 Feb 2025 13:37:57 -0500 Subject: [PATCH 89/91] remove typo --- pipelines/skylab/atac/atac.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index bb41cb0688..72c6687a7c 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -675,7 +675,7 @@ task PeakCalling { Int disk_size = 500 Int mem_size = 64 Int nthreads = 4 - }gi + } parameter_meta { annotations_gtf: "GTF for SnapATAC2 to calculate TSS sites of fragment file." From 6bca8f53f45c5c3a89a734b18db8596774f15ecb Mon Sep 17 00:00:00 2001 From: aawdeh Date: Tue, 4 Feb 2025 14:14:59 -0500 Subject: [PATCH 90/91] fix --- pipelines/skylab/atac/atac.wdl | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index 72c6687a7c..890d9001eb 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -704,11 +704,15 @@ task PeakCalling { min_counts = "~{min_counts}" min_tsse = "~{min_tsse}" max_counts = "~{max_counts}" - probability_threshold = ~{probability_threshold} + probability_threshold = "~{probability_threshold}" + + print(type(probability_threshold)) + probability_threshold = float(probability_threshold) + print(type(probability_threshold)) print("Peak calling starting...") atac_data = snap.read(metrics_h5ad) - + # Calculate and plot the size distribution of fragments print("Calculating fragment size distribution") snap.pl.frag_size_distr(atac_data) From 01249c26e59c56ddbc6dca8e1486555085de1774 Mon Sep 17 00:00:00 2001 From: aawdeh Date: Tue, 4 Feb 2025 14:44:30 -0500 Subject: [PATCH 91/91] fix --- pipelines/skylab/atac/atac.wdl | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index 890d9001eb..3c74822c93 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -668,7 +668,7 @@ task PeakCalling { Int min_counts = 5000 Int min_tsse = 10 Int max_counts = 100000 - Float probability_threshold = 0.01 + Float probability_threshold = 1 # Runtime attributes/docker String docker_path @@ -706,9 +706,7 @@ task PeakCalling { max_counts = "~{max_counts}" probability_threshold = "~{probability_threshold}" - print(type(probability_threshold)) probability_threshold = float(probability_threshold) - print(type(probability_threshold)) print("Peak calling starting...") atac_data = snap.read(metrics_h5ad)