Skip to content

Commit

Permalink
Updated environment and formatted .smk files (#18)
Browse files Browse the repository at this point in the history
  • Loading branch information
CarlosClassen authored Jun 3, 2024
1 parent 9ebbae5 commit 2f123cc
Show file tree
Hide file tree
Showing 4 changed files with 116 additions and 73 deletions.
3 changes: 2 additions & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,5 @@ dependencies:
- pytest==8.2.1
- pillow==10.3.0
- pdf2image==1.17.0
- samtools==1.20
- samtools==1.20
- snakefmt==0.10.2
162 changes: 99 additions & 63 deletions workflow/rules/cnv.smk
Original file line number Diff line number Diff line change
@@ -1,185 +1,221 @@
rule cnvkit_target_coverage:
input:
bam=os.path.join(config['outdir'], 'preprocessing', '{sample}.sorted.bam'),
bai=os.path.join(config['outdir'], 'preprocessing', '{sample}.sorted.bam.bai'),
target_bed=config['ref']['target_bed']
bam=os.path.join(config["outdir"], "preprocessing", "{sample}.sorted.bam"),
bai=os.path.join(config["outdir"], "preprocessing", "{sample}.sorted.bam.bai"),
target_bed=config["ref"]["target_bed"],
output:
os.path.join(config['outdir'], 'cnv', '{sample}', '{sample}.targetcoverage.cnn')
os.path.join(config["outdir"], "cnv", "{sample}", "{sample}.targetcoverage.cnn"),
params:
extra=config['params']['cnvkit']['target_coverage']['extra']
extra=config["params"]["cnvkit"]["target_coverage"]["extra"],
threads: 8
log:
os.path.join(config['outdir'], 'logs', 'cnvkit_target_coverage', '{sample}.log')
os.path.join(config["outdir"], "logs", "cnvkit_target_coverage", "{sample}.log"),
conda:
"../envs/cnvkit.yaml"
shell:
"cnvkit.py coverage {input.bam} {input.target_bed} -o {output} -p {threads} {params.extra} > {log} 2>&1"


rule cnvkit_antitarget_coverage:
input:
bam=os.path.join(config['outdir'], 'preprocessing', '{sample}.sorted.bam'),
bai=os.path.join(config['outdir'], 'preprocessing', '{sample}.sorted.bam.bai'),
antitarget_bed=config['ref']['antitarget_bed']
bam=os.path.join(config["outdir"], "preprocessing", "{sample}.sorted.bam"),
bai=os.path.join(config["outdir"], "preprocessing", "{sample}.sorted.bam.bai"),
antitarget_bed=config["ref"]["antitarget_bed"],
output:
os.path.join(config['outdir'], 'cnv', '{sample}', '{sample}.antitargetcoverage.cnn')
os.path.join(
config["outdir"], "cnv", "{sample}", "{sample}.antitargetcoverage.cnn"
),
params:
extra=config['params']['cnvkit']['antitarget_coverage']['extra']
extra=config["params"]["cnvkit"]["antitarget_coverage"]["extra"],
threads: 8
log:
os.path.join(config['outdir'], 'logs', 'cnvkit_antitarget_coverage', '{sample}.log')
os.path.join(
config["outdir"], "logs", "cnvkit_antitarget_coverage", "{sample}.log"
),
conda:
"../envs/cnvkit.yaml"
shell:
"cnvkit.py coverage {input.bam} {input.antitarget_bed} -o {output} -p {threads} {params.extra} > {log} 2>&1"


rule cnvkit_reference:
input:
target=collect(os.path.join(config['outdir'], 'cnv', '{sample}', '{sample}.targetcoverage.cnn'), sample=SAMPLES.index),
antitarget=collect(os.path.join(config['outdir'], 'cnv', '{sample}', '{sample}.antitargetcoverage.cnn'), sample=SAMPLES.index),
ref=config['ref']['genome']
target=collect(
os.path.join(
config["outdir"], "cnv", "{sample}", "{sample}.targetcoverage.cnn"
),
sample=SAMPLES.index,
),
antitarget=collect(
os.path.join(
config["outdir"], "cnv", "{sample}", "{sample}.antitargetcoverage.cnn"
),
sample=SAMPLES.index,
),
ref=config["ref"]["genome"],
output:
os.path.join(config['outdir'], 'cnv', 'reference.cnn')
os.path.join(config["outdir"], "cnv", "reference.cnn"),
params:
extra=config['params']['cnvkit']['reference']['extra']
extra=config["params"]["cnvkit"]["reference"]["extra"],
log:
os.path.join(config['outdir'], 'logs', 'cnvkit_reference.log')
os.path.join(config["outdir"], "logs", "cnvkit_reference.log"),
conda:
"../envs/cnvkit.yaml"
shell:
"cnvkit.py reference {input.target} {input.antitarget} --fasta {input.ref} -o {output} > {log} 2>&1"


rule cnvkit_fix:
input:
target=os.path.join(config['outdir'], 'cnv', '{sample}', '{sample}.targetcoverage.cnn'),
antitarget=os.path.join(config['outdir'], 'cnv', '{sample}', '{sample}.antitargetcoverage.cnn'),
reference=os.path.join(config['outdir'], 'cnv', 'reference.cnn')
target=os.path.join(
config["outdir"], "cnv", "{sample}", "{sample}.targetcoverage.cnn"
),
antitarget=os.path.join(
config["outdir"], "cnv", "{sample}", "{sample}.antitargetcoverage.cnn"
),
reference=os.path.join(config["outdir"], "cnv", "reference.cnn"),
output:
os.path.join(config['outdir'], 'cnv', '{sample}', '{sample}.cnr')
os.path.join(config["outdir"], "cnv", "{sample}", "{sample}.cnr"),
log:
os.path.join(config['outdir'], 'logs', 'cnvkit_fix', '{sample}.log')
os.path.join(config["outdir"], "logs", "cnvkit_fix", "{sample}.log"),
conda:
"../envs/cnvkit.yaml"
shell:
"cnvkit.py fix {input.target} {input.antitarget} {input.reference} -o {output} > {log} 2>&1"


rule cnvkit_segment:
input:
cnr=os.path.join(config['outdir'], 'cnv', '{sample}', '{sample}.cnr'),
vcf=get_vcf
cnr=os.path.join(config["outdir"], "cnv", "{sample}", "{sample}.cnr"),
vcf=get_vcf,
output:
temp(os.path.join(config['outdir'], 'cnv', '{sample}', '{sample}.cns'))
temp(os.path.join(config["outdir"], "cnv", "{sample}", "{sample}.cns")),
params:
extra=config['params']['cnvkit']['segment']['extra']
threads: 1 # Limited by HMM method
extra=config["params"]["cnvkit"]["segment"]["extra"],
threads: 1 # Limited by HMM method
log:
os.path.join(config['outdir'], 'logs', 'cnvkit_segment', '{sample}.log')
os.path.join(config["outdir"], "logs", "cnvkit_segment", "{sample}.log"),
conda:
"../envs/cnvkit.yaml"
shell:
"cnvkit.py segment -m hmm {input.cnr} -o {output} --vcf {input.vcf} -p {threads} > {log} 2>&1"


rule cnvkit_call:
input:
cns=os.path.join(config['outdir'], 'cnv', '{sample}', '{sample}.cns'),
vcf=get_vcf
cns=os.path.join(config["outdir"], "cnv", "{sample}", "{sample}.cns"),
vcf=get_vcf,
output:
os.path.join(config['outdir'], 'cnv', '{sample}', '{sample}_call.cns')
os.path.join(config["outdir"], "cnv", "{sample}", "{sample}_call.cns"),
params:
ploidy=config['params']['cnvkit']['call']['ploidy']
ploidy=config["params"]["cnvkit"]["call"]["ploidy"],
log:
os.path.join(config['outdir'], 'logs', 'cnvkit_call', '{sample}.log')
os.path.join(config["outdir"], "logs", "cnvkit_call", "{sample}.log"),
conda:
"../envs/cnvkit.yaml"
shell:
"cnvkit.py call {input.cns} -o {output} -v {input.vcf} --ploidy {params.ploidy} > {log} 2>&1"


rule cnvkit_bintest:
input:
cnr=os.path.join(config['outdir'], 'cnv', '{sample}', '{sample}.cnr'),
call_cns=os.path.join(config['outdir'], 'cnv', '{sample}', '{sample}_call.cns')
cnr=os.path.join(config["outdir"], "cnv", "{sample}", "{sample}.cnr"),
call_cns=os.path.join(config["outdir"], "cnv", "{sample}", "{sample}_call.cns"),
output:
os.path.join(config['outdir'], 'cnv', '{sample}', '{sample}_bintest.tsv')
os.path.join(config["outdir"], "cnv", "{sample}", "{sample}_bintest.tsv"),
log:
os.path.join(config['outdir'], 'logs', 'cnvkit_bintest', '{sample}.log')
os.path.join(config["outdir"], "logs", "cnvkit_bintest", "{sample}.log"),
conda:
"../envs/cnvkit.yaml"
shell:
"cnvkit.py bintest {input.cnr} -s {input.call_cns} --target -o {output} > {log} 2>&1"


rule cnvkit_breaks:
input:
cnr=os.path.join(config['outdir'], 'cnv', '{sample}', '{sample}.cnr'),
cns=os.path.join(config['outdir'], 'cnv', '{sample}', '{sample}.cns')
cnr=os.path.join(config["outdir"], "cnv", "{sample}", "{sample}.cnr"),
cns=os.path.join(config["outdir"], "cnv", "{sample}", "{sample}.cns"),
output:
os.path.join(config['outdir'], 'cnv', '{sample}', '{sample}_breaks.tsv')
os.path.join(config["outdir"], "cnv", "{sample}", "{sample}_breaks.tsv"),
log:
os.path.join(config['outdir'], 'logs', 'cnvkit_breaks', '{sample}.log')
os.path.join(config["outdir"], "logs", "cnvkit_breaks", "{sample}.log"),
conda:
"../envs/cnvkit.yaml"
shell:
"cnvkit.py breaks {input.cnr} {input.cns} > {output} 2>&1"


rule cnvkit_genemetrics:
input:
cnr=os.path.join(config['outdir'], 'cnv', '{sample}', '{sample}.cnr'),
cns=os.path.join(config['outdir'], 'cnv', '{sample}', '{sample}.cns')
cnr=os.path.join(config["outdir"], "cnv", "{sample}", "{sample}.cnr"),
cns=os.path.join(config["outdir"], "cnv", "{sample}", "{sample}.cns"),
output:
os.path.join(config['outdir'], 'cnv', '{sample}', '{sample}_genemetrics.tsv')
os.path.join(config["outdir"], "cnv", "{sample}", "{sample}_genemetrics.tsv"),
log:
os.path.join(config['outdir'], 'logs', 'cnvkit_genemetrics', '{sample}.log')
os.path.join(config["outdir"], "logs", "cnvkit_genemetrics", "{sample}.log"),
conda:
"../envs/cnvkit.yaml"
shell:
"cnvkit.py genemetrics {input.cnr} -s {input.cns} > {output} 2>&1"


rule cnvkit_scatter_pdf:
input:
cnr=os.path.join(config['outdir'], 'cnv', '{sample}', '{sample}.cnr'),
call_cns=os.path.join(config['outdir'], 'cnv', '{sample}', '{sample}_call.cns'),
vcf=get_vcf
cnr=os.path.join(config["outdir"], "cnv", "{sample}", "{sample}.cnr"),
call_cns=os.path.join(config["outdir"], "cnv", "{sample}", "{sample}_call.cns"),
vcf=get_vcf,
output:
os.path.join(config['outdir'], 'cnv', '{sample}', '{sample}_scatter.pdf')
os.path.join(config["outdir"], "cnv", "{sample}", "{sample}_scatter.pdf"),
log:
os.path.join(config['outdir'], 'logs', 'cnvkit_scatter_pdf', '{sample}.log')
os.path.join(config["outdir"], "logs", "cnvkit_scatter_pdf", "{sample}.log"),
conda:
"../envs/cnvkit.yaml"
shell:
"cnvkit.py scatter {input.cnr} -s {input.call_cns} --y-min -2.5 -v {input.vcf} -o {output} > {log} 2>&1"


rule cnvkit_scatter_png:
input:
cnr=os.path.join(config['outdir'], 'cnv', '{sample}', '{sample}.cnr'),
call_cns=os.path.join(config['outdir'], 'cnv', '{sample}', '{sample}_call.cns'),
vcf=get_vcf
cnr=os.path.join(config["outdir"], "cnv", "{sample}", "{sample}.cnr"),
call_cns=os.path.join(config["outdir"], "cnv", "{sample}", "{sample}_call.cns"),
vcf=get_vcf,
output:
os.path.join(config['outdir'], 'cnv', '{sample}', '{sample}_scatter.png')
os.path.join(config["outdir"], "cnv", "{sample}", "{sample}_scatter.png"),
log:
os.path.join(config['outdir'], 'logs', 'cnvkit_scatter_png', '{sample}.log')
os.path.join(config["outdir"], "logs", "cnvkit_scatter_png", "{sample}.log"),
conda:
"../envs/cnvkit.yaml"
shell:
"cnvkit.py scatter {input.cnr} -s {input.call_cns} --y-min -2.5 -v {input.vcf} -o {output} > {log} 2>&1"


rule cnvkit_export_vcf:
input:
call_cns=os.path.join(config['outdir'], 'cnv', '{sample}', '{sample}_call.cns')
call_cns=os.path.join(config["outdir"], "cnv", "{sample}", "{sample}_call.cns"),
output:
os.path.join(config['outdir'], 'cnv', '{sample}', '{sample}_cnv.vcf')
os.path.join(config["outdir"], "cnv", "{sample}", "{sample}_cnv.vcf"),
log:
os.path.join(config['outdir'], 'logs', 'cnvkit_export_vcf', '{sample}.log')
os.path.join(config["outdir"], "logs", "cnvkit_export_vcf", "{sample}.log"),
conda:
"../envs/cnvkit.yaml"
shell:
"cnvkit.py export vcf {input.call_cns} -i {wildcards.sample} -o {output} > {log} 2>&1"


rule cnvkit_metrics:
input:
expand(os.path.join(config['outdir'], 'cnv', '{sample}', '{sample}.cnr'), sample=SAMPLES.index),
expand(os.path.join(config['outdir'], 'cnv', '{sample}', '{sample}_call.cns'), sample=SAMPLES.index)
expand(
os.path.join(config["outdir"], "cnv", "{sample}", "{sample}.cnr"),
sample=SAMPLES.index,
),
expand(
os.path.join(config["outdir"], "cnv", "{sample}", "{sample}_call.cns"),
sample=SAMPLES.index,
),
output:
os.path.join(config['outdir'], 'cnv', 'run_metrics.tsv')
os.path.join(config["outdir"], "cnv", "run_metrics.tsv"),
log:
os.path.join(config['outdir'], 'logs', 'cnvkit_metrics.log')
os.path.join(config["outdir"], "logs", "cnvkit_metrics.log"),
conda:
"../envs/cnvkit.yaml"
shell:
Expand Down
9 changes: 7 additions & 2 deletions workflow/rules/common.smk
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,14 @@ from snakemake.utils import min_version

min_version("8.12.0")


report: "../report/workflow.rst"


###### Config file and sample sheets #####
configfile: "config/config.yaml"


SAMPLES = pd.read_table(config["samples"]).set_index("sample", drop=False)

snakedir = workflow.basedir
Expand All @@ -20,10 +23,12 @@ validate(SAMPLES, schema="../schemas/samplesheet.schema.yaml")

##### Helper functions #####


def get_bam(wildcards):
"""Retrieve BAM file for a given sample."""
return SAMPLES.loc[wildcards.sample, 'bam_path']
return SAMPLES.loc[wildcards.sample, "bam_path"]


def get_vcf(wildcards):
"""Retrieve VCF file for a given sample."""
return SAMPLES.loc[wildcards.sample, 'vcf_path']
return SAMPLES.loc[wildcards.sample, "vcf_path"]
15 changes: 8 additions & 7 deletions workflow/rules/preprocessing.smk
Original file line number Diff line number Diff line change
@@ -1,24 +1,25 @@
rule samtools_sort:
input:
get_bam
get_bam,
output:
os.path.join(config['outdir'], 'preprocessing', '{sample}.sorted.bam')
os.path.join(config["outdir"], "preprocessing", "{sample}.sorted.bam"),
log:
os.path.join(config['outdir'], 'logs', 'samtools_sort', '{sample}.log')
os.path.join(config["outdir"], "logs", "samtools_sort", "{sample}.log"),
threads: 8
conda:
"../envs/samtools.yaml"
shell:
"samtools sort -@ {threads} {input} -o {output} > {log} 2>&1"


rule samtools_index:
input:
os.path.join(config['outdir'], 'preprocessing', '{sample}.sorted.bam')
os.path.join(config["outdir"], "preprocessing", "{sample}.sorted.bam"),
output:
os.path.join(config['outdir'], 'preprocessing', '{sample}.sorted.bam.bai')
os.path.join(config["outdir"], "preprocessing", "{sample}.sorted.bam.bai"),
log:
os.path.join(config['outdir'], 'logs', 'samtools_index', '{sample}.log')
os.path.join(config["outdir"], "logs", "samtools_index", "{sample}.log"),
conda:
"../envs/samtools.yaml"
shell:
"samtools index {input} > {log} 2>&1"
"samtools index {input} > {log} 2>&1"

0 comments on commit 2f123cc

Please sign in to comment.