Skip to content

Commit

Permalink
Update paths
Browse files Browse the repository at this point in the history
  • Loading branch information
abbyevewilliams committed Feb 19, 2025
1 parent e9a8e72 commit 4da7533
Show file tree
Hide file tree
Showing 8 changed files with 121 additions and 68 deletions.
10 changes: 10 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
results/**
resources/**
logs/**
slurm-logs/
.snakemake
.snakemake/**

*.dot
*.png
*.csv
*.tsv
*.txt
*.pdf

snakemake-env/
11 changes: 5 additions & 6 deletions config/config.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
data:
reads_dir: "/data/biol-silvereye/norfolk_wgs/arbor"
samples: "/data/biol-silvereye/ball6625/norfolk-pipeline/samples.txt"
reference-genome: "/data/biol-silvereye/sjoh4959/ref_genome/Zlat_2_Tgutt_ZW.fasta"
adapter1: "AGATCGGAAGAGCACACGTCTGAACTCCAGTCA"
adapter2: "AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT"
reads_dir: "/data/biol-silvereye/norfolk_wgs/arbor"
samples: "/data/biol-silvereye/ball6625/norfolk-pipeline/samples.txt"
reference-genome: "/data/biol-silvereye/sjoh4959/ref_genome/Zlat_2_Tgutt_ZW.fasta"
adapter1: "AGATCGGAAGAGCACACGTCTGAACTCCAGTCA"
adapter2: "AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT"
2 changes: 1 addition & 1 deletion profiles/slurm/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ retries: 3
default-resources:
slurm_partition: "short"
slurm_account: "biol-silvereye"
runtime: 1220 # 12 hours
runtime: 720 # 12 hours

set-resources:
bwa-map:
Expand Down
8 changes: 4 additions & 4 deletions run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
#SBATCH --cpus-per-task=1
#SBATCH --mem=8G
#SBATCH --time=12:00:00
#SBATCH --output=logs/sbatch_%j.log
#SBATCH --error=logs/sbatch_%j.error
#SBATCH --output=slurm-logs/%j.log
#SBATCH --error=slurm-logs/%j.error

# Load necessary modules
ml Mamba/23.11.0-0
Expand All @@ -15,7 +15,7 @@ source activate /data/biol-silvereye/ball6625/norfolk-pipeline/snakemake-env
conda config --set channel_priority strict

# Run the pipeline
#snakemake --unlock
snakemake --unlock
snakemake --workflow-profile profiles/slurm \
--use-conda --keep-going \
--rerun-incomplete -j 10
--rerun-incomplete -j 20
48 changes: 44 additions & 4 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,44 @@
# Main entrypoint of the workflow.
# Please follow the best practices:
# https://snakemake.readthedocs.io/en/stable/snakefiles/best_practices.html,
# in particular regarding the standardized folder structure mentioned there.
# =================================================================================================
# Setup
# =================================================================================================

# Packages
import pandas as pd
import os

# Point to config file
configfile: "config/config.yaml"

# Read in sample names
with open(config["samples"]) as f:
SAMPLES = [line.strip() for line in f]

# State reads
READS = ['1', '2']

# =================================================================================================
# Default "All" Target Rule
# =================================================================================================

# This rule requests that other rules be run.

rule all:
input:
expand("results/fastqc_initial/{sample}_R{read}_fastqc.html", sample=SAMPLES, read=READS),
expand("results/trimmed/{sample}.collapsed.trimmed.fastq.gz", sample=SAMPLES),
expand("results/fastqc_post_trim/{sample}_collapsed_fastqc.html", sample=SAMPLES),
expand("results/dedup/{sample}.bam", sample=SAMPLES),
expand("results/mapdamage/{sample}/Runtime_log.txt", sample=SAMPLES),
expand("results/dedup/{sample}.stats", sample=SAMPLES),
expand("results/dedup/{sample}.depth", sample=SAMPLES)

localrules:
all

# =================================================================================================
# Rule Modules
# =================================================================================================

include: "rules/clean.smk"
include: "rules/map.smk"
include: "rules/damage.smk"
42 changes: 24 additions & 18 deletions workflow/rules/clean.smk
Original file line number Diff line number Diff line change
Expand Up @@ -2,43 +2,49 @@

rule fastqc_initial:
input:
fq=lambda wildcards: os.path.join(config["data"]["reads_dir"], f"{wildcards.sample}_R{wildcards.read}.fastq.gz")
fq=lambda wildcards: os.path.join(config["reads_dir"], f"{wildcards.sample}_R{wildcards.read}.fastq.gz")
output:
html="fastqc_initial/{sample}_R{read}_fastqc.html",
zip="fastqc_initial/{sample}_R{read}_fastqc.zip"
html="results/fastqc_initial/{sample}_R{read}_fastqc.html",
zip="results/fastqc_initial/{sample}_R{read}_fastqc.zip"
log:
"logs/fastqc_initial/{sample}_R{read}.log"
"results/logs/fastqc_initial/{sample}_R{read}.log"
wrapper:
"v5.8.0/bio/fastqc"

# Remove adapters and merge

rule adapterremoval:
input:
R1=lambda wildcards: os.path.join(config["data"]["reads_dir"], f"{wildcards.sample}_R1.fastq.gz"),
R2=lambda wildcards: os.path.join(config["data"]["reads_dir"], f"{wildcards.sample}_R2.fastq.gz")
sample=lambda wildcards: [
os.path.join(config["reads_dir"], f"{wildcards.sample}_R1.fastq.gz"),
os.path.join(config["reads_dir"], f"{wildcards.sample}_R2.fastq.gz"),
]
output:
fq1="trimmed/{sample}_R1.fastq.gz", # trimmed mate1 reads
fq2="trimmed/{sample}_R2.fastq.gz", # trimmed mate2 reads
collapsed="trimmed/{sample}.collapsed.fastq.gz", # overlapping mate-pairs which have been merged into a single read
collapsed_trunc="trimmed/{sample}.collapsed.trimmed.fastq.gz", # collapsed reads that were quality trimmed
settings="trimmed/{sample}.settings" # parameters as well as overall statistics
fq1="results/trimmed/{sample}_R1.fastq.gz", # trimmed mate1 reads
fq2="results/trimmed/{sample}_R2.fastq.gz", # trimmed mate2 reads
collapsed="results/trimmed/{sample}.collapsed.fastq.gz", # overlapping mate-pairs which have been merged into a single read
collapsed_trunc="results/trimmed/{sample}.collapsed.trimmed.fastq.gz", # collapsed reads that were quality trimmed
singleton="results/trimmed/{sample}.singleton.fastq.gz", # mate-pairs for which the mate has been discarded
discarded="results/trimmed/{sample}.discarded.fastq.gz", # reads that did not pass filters
settings="results/trimmed/{sample}.settings" # parameters as well as overall statistics
log:
"logs/adapterremoval/{sample}.log"
"results/logs/adapterremoval/{sample}.log"
params:
adapter1=config["data"]["adapter1"],
adapter2=config["data"]["adapter2"],
adapter1=config["adapter1"],
adapter2=config["adapter2"],
extra="--collapse --collapse-deterministic --trimns --trimqualities"
wrapper:
"v5.8.0/bio/adapterremoval"

# Run fastqc again

rule fastqc_post_trim:
input:
"trimmed/{sample}.collapsed.trimmed.fastq.gz"
"results/trimmed/{sample}.collapsed.trimmed.fastq.gz"
output:
html="fastqc_post_trim/{sample}_collapsed_fastqc.html",
zip="fastqc_post_trim/{sample}_collapsed_fastqc.zip"
html="results/fastqc_post_trim/{sample}_collapsed_fastqc.html",
zip="results/fastqc_post_trim/{sample}_collapsed_fastqc.zip"
log:
"logs/fastqc_post_trim/{sample}.log"
"results/logs/fastqc_post_trim/{sample}.log"
wrapper:
"v5.8.0/bio/fastqc"
26 changes: 12 additions & 14 deletions workflow/rules/damage.smk
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,19 @@

rule mapdamage2:
input:
ref=config["data"]["reference-genome"],
bam="dedup/{sample}.bam",
ref=config["reference-genome"],
bam="results/dedup/{sample}.bam",
output:
log="mapdamage/{sample}/Runtime_log.txt", # output folder is infered from this file, so it needs to be the same folder for all output files
GtoA3p="mapdamage/{sample}/3pGtoA_freq.txt",
CtoT5p="mapdamage/{sample}/5pCtoT_freq.txt",
dnacomp="mapdamage/{sample}/dnacomp.txt",
frag_misincorp="mapdamage/{sample}/Fragmisincorporation_plot.pdf",
len="mapdamage/{sample}/Length_plot.pdf",
lg_dist="mapdamage/{sample}/lgdistribution.txt",
misincorp="mapdamage/{sample}/misincorporation.txt",
rescaled_bam="mapdamage/{sample}.rescaled.bam" # uncomment if you want the rescaled BAM file
params:
extra="--no-stats", # optional parameters for mapdamage2 (except -i, -r, -d, --rescale)
log="results/mapdamage/{sample}/Runtime_log.txt", # output folder is infered from this file, so it needs to be the same folder for all output files
GtoA3p="results/mapdamage/{sample}/3pGtoA_freq.txt",
CtoT5p="results/mapdamage/{sample}/5pCtoT_freq.txt",
dnacomp="results/mapdamage/{sample}/dnacomp.txt",
frag_misincorp="results/mapdamage/{sample}/Fragmisincorporation_plot.pdf",
len="results/mapdamage/{sample}/Length_plot.pdf",
lg_dist="results/mapdamage/{sample}/lgdistribution.txt",
misincorp="results/mapdamage/{sample}/misincorporation.txt",
rescaled_bam="results/mapdamage/{sample}.rescaled.bam" # uncomment if you want the rescaled BAM file
log:
"logs/mapdamage/{sample}.log"
"results/logs/mapdamage/{sample}.log"
wrapper:
"v5.8.0/bio/mapdamage2"
42 changes: 21 additions & 21 deletions workflow/rules/map.smk
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
# Map reads to the reference
rule bwa_map:
input:
ref=config["data"]["reference-genome"],
fq1="trimmed/{sample}.collapsed.trimmed.fastq.gz", # Adjust to the desired output from adapterremoval
fq2="trimmed/{sample}.collapsed.trimmed.fastq.gz" # Same file used for both paired-end (collapsed reads)
ref=config["reference-genome"],
fq1="results/trimmed/{sample}.collapsed.trimmed.fastq.gz", # Adjust to the desired output from adapterremoval
fq2="results/trimmed/{sample}.collapsed.trimmed.fastq.gz" # Same file used for both paired-end (collapsed reads)
output:
"mapped/{sample}.bam"
"results/mapped/{sample}.bam"
log:
"logs/bwa-map/{sample}.log"
"results/logs/bwa-map/{sample}.log"
threads: 8
resources:
mem="32GB"
Expand All @@ -17,34 +17,34 @@ rule bwa_map:
# Sort reads
rule samtools_sort:
input:
"mapped/{sample}.bam"
"results/mapped/{sample}.bam"
output:
"sorted/{sample}.bam"
"results/sorted/{sample}.bam"
log:
"logs/samtools_sort/{sample}.log"
"results/logs/samtools_sort/{sample}.log"
wrapper:
"v5.8.0/bio/samtools/sort"

# Index the sorted bam file
rule samtools_index:
input:
"sorted/{sample}.bam"
"results/sorted/{sample}.bam"
output:
"sorted/{sample}.bam.bai"
"results/sorted/{sample}.bam.bai"
log:
"logs/samtools_index/{sample}.log"
"results/logs/samtools_index/{sample}.log"
wrapper:
"v5.8.0/bio/samtools/index"

# Mark duplicates
rule markduplicates_bam:
input:
"sorted/{sample}.bam"
"results/sorted/{sample}.bam"
output:
"dedup/{sample}.bam",
"dedup/{sample}.metrics.txt"
"results/dedup/{sample}.bam",
"results/dedup/{sample}.metrics.txt"
log:
"logs/markduplicates/{sample}.log"
"results/logs/markduplicates/{sample}.log"
params:
extra="--REMOVE_DUPLICATES true"
threads: 4
Expand All @@ -54,21 +54,21 @@ rule markduplicates_bam:
# Calculate depth
rule samtools_depth:
input:
"dedup/{sample}.bam"
"results/dedup/{sample}.bam"
output:
"dedup/{sample}.depth"
"results/dedup/{sample}.depth"
log:
"logs/samtools_depth/{sample}.log"
"results/logs/samtools_depth/{sample}.log"
wrapper:
"v5.8.0/bio/samtools/depth"

# Calculate stats
rule samtools_stats:
input:
"dedup/{sample}.bam"
"results/dedup/{sample}.bam"
output:
"dedup/{sample}.stats"
"results/dedup/{sample}.stats"
log:
"logs/samtools_stats/{sample}.log"
"results/logs/samtools_stats/{sample}.log"
wrapper:
"v5.8.0/bio/samtools/stats"

0 comments on commit 4da7533

Please sign in to comment.