Skip to content

Commit

Permalink
Add summary stats
Browse files Browse the repository at this point in the history
  • Loading branch information
abbyevewilliams committed Feb 21, 2025
1 parent 604ed04 commit 1833cdc
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 21 deletions.
12 changes: 1 addition & 11 deletions profiles/slurm/config.yaml
Original file line number Diff line number Diff line change
@@ -1,19 +1,9 @@
executor: slurm
jobs: 20
latency-wait: 60
retries: 3
retries: 1

default-resources:
slurm_partition: "short"
slurm_account: "biol-silvereye"
runtime: 720 # 12 hours

set-resources:
bwa_index:
mem: "32G"
cpus_per_task: 4
bwa_map:
slurm_partition: "long"
runtime: 1440 # 24 hours
mem: "32G"
cpus_per_task: 8
11 changes: 7 additions & 4 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,13 @@ READS = ['1', '2']

rule all:
input:
expand("results/dedup/{sample}.bam", sample=SAMPLES),
expand("results/dedup/{sample}.stats", sample=SAMPLES),
expand("results/dedup/{sample}.depth", sample=SAMPLES),
expand("results/mapdamage/{sample}/Runtime_log.txt", sample=SAMPLES)
expand("results/sorted/{sample}.bam.bai", sample=SAMPLES), # indexed bams
expand("results/dedup/{sample}.bam", sample=SAMPLES), # deduplicated bams
expand("results/dedup/{sample}.stats", sample=SAMPLES), # mapping stats
expand("results/dedup/{sample}.depth", sample=SAMPLES), # depth stats
"results/mapping_summary.txt" # summary of stats
"results/avg_depth.txt" # average depth
expand("results/mapdamage/{sample}/Runtime_log.txt", sample=SAMPLES) # mapdamage results

localrules:
all
Expand Down
3 changes: 3 additions & 0 deletions workflow/rules/damage.smk
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,8 @@ rule mapdamage2:
rescaled_bam="results/mapdamage/{sample}.rescaled.bam" # uncomment if you want the rescaled BAM file
log:
"results/logs/mapdamage/{sample}.log"
threads: 4
resources:
mem="64GB"
wrapper:
"v5.8.0/bio/mapdamage2"
61 changes: 55 additions & 6 deletions workflow/rules/map.smk
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ rule samtools_sort:
"results/sorted/{sample}.bam"
log:
"results/logs/samtools_sort/{sample}.log"
threads: 4
resources:
mem="100GB"
wrapper:
"v5.8.0/bio/samtools/sort"

Expand All @@ -46,42 +49,88 @@ rule samtools_index:
"results/sorted/{sample}.bam.bai"
log:
"results/logs/samtools_index/{sample}.log"
resources:
mem="8GB"
wrapper:
"v5.8.0/bio/samtools/index"

# Mark duplicates
rule markduplicates_bam:
input:
"results/sorted/{sample}.bam"
bams="results/sorted/{sample}.bam"
output:
"results/dedup/{sample}.bam",
"results/dedup/{sample}.metrics.txt"
bam="results/dedup/{sample}.bam",
metrics="results/dedup/{sample}.metrics.txt"
log:
"results/logs/markduplicates/{sample}.log"
params:
extra="--REMOVE_DUPLICATES true"
threads: 4
resources:
mem="64GB"
wrapper:
"v5.8.0/bio/picard/markduplicates"

# Calculate depth
rule samtools_depth:
input:
"results/dedup/{sample}.bam"
bams="results/dedup/{sample}.bam"
output:
"results/dedup/{sample}.depth"
log:
"results/logs/samtools_depth/{sample}.log"
threads: 4
resources:
mem="32GB"
wrapper:
"v5.8.0/bio/samtools/depth"

# Calculate stats
rule samtools_stats:
input:
"results/dedup/{sample}.bam"
bam="results/dedup/{sample}.bam"
output:
"results/dedup/{sample}.stats"
log:
"results/logs/samtools_stats/{sample}.log"
threads: 4
resources:
mem="32GB"
wrapper:
"v5.8.0/bio/samtools/stats"
"v5.8.0/bio/samtools/stats"

#Summarise key stats across all samples
rule summarise_samtools_stats:
input:
stats_files="results/dedup/{sample}.stats"
output:
"results/mapping_summary.txt"
shell:
"""
echo -e "Sample\tTotal_Reads\tMapped_Reads\tPercent_Mapped\tError_Rate\tAvg_Quality" > {output}
for stats_file in {input.stats_files}; do
sample=$(basename "$stats_file" .stats)
total_reads=$(grep "raw total sequences:" "$stats_file" | cut -f 3)
mapped_reads=$(grep "reads mapped:" "$stats_file" | head -n 1 | cut -f 3)
error_rate=$(grep "error rate:" "$stats_file" | cut -f 3)
avg_quality=$(grep "average quality:" "$stats_file" | cut -f 3)
percent_mapped=$(awk -v mapped="$mapped_reads" -v total="$total_reads" 'BEGIN {{ if (total>0) print (mapped/total)*100; else print 0 }}')
echo -e "$sample\t$total_reads\t$mapped_reads\t$percent_mapped\t$error_rate\t$avg_quality" >> {output}
done
"""

# Calculate average depth for each sample
rule compute_average_depth:
input:
depth_files="results/dedup/{sample}.depth"
output:
"results/avg_depth.txt"
shell:
"""
echo -e "Sample\tAverage_Depth" > {output}
for depth_file in {input.depth_files}; do
sample=$(basename "$depth_file" .depth)
avg_depth=$(awk '{{sum+=$3}} END {{if (NR>0) print sum/NR; else print 0}}' "$depth_file")
echo -e "$sample\t$avg_depth" >> {output}
done
"""

0 comments on commit 1833cdc

Please sign in to comment.