From ddf82ce3b03daefca79fb83636def28aa0603059 Mon Sep 17 00:00:00 2001 From: deliaBlue <103108590+deliaBlue@users.noreply.github.com> Date: Tue, 21 Nov 2023 21:46:14 +0100 Subject: [PATCH] test: fix workflow descriptors format and lint test (#124) * refactor: fix formatting * test: fix test call * build: add config file for lint test * test: add lint test config file path --- test/config_lint.yaml | 6 ++++ test/test_snakefmt.sh | 2 +- workflow/Snakefile | 8 +++-- workflow/rules/common.smk | 4 +-- workflow/rules/map.smk | 20 ++++--------- workflow/rules/quantify.smk | 58 ++++++++++--------------------------- 6 files changed, 35 insertions(+), 63 deletions(-) create mode 100644 test/config_lint.yaml diff --git a/test/config_lint.yaml b/test/config_lint.yaml new file mode 100644 index 0000000..658d126 --- /dev/null +++ b/test/config_lint.yaml @@ -0,0 +1,6 @@ +samples: test/test_files/samples_table.tsv +genome_file: test/test_files/genome.fa.gz +gtf_file: test/test_files/gene_annotations.gtf.gz +mirna_file: test/test_files/mirna_annotations.gff3 +map_chr_file: test/test_files/ucsc_to_ensembl.tsv +mir_list: ['isomir', 'mirna', 'pri-mir'] diff --git a/test/test_snakefmt.sh b/test/test_snakefmt.sh index f6128e6..be7ecf6 100755 --- a/test/test_snakefmt.sh +++ b/test/test_snakefmt.sh @@ -17,4 +17,4 @@ script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)" cd $script_dir # Run tests -snakefmt --check -l 80 ../workflow +snakefmt --check ../workflow diff --git a/workflow/Snakefile b/workflow/Snakefile index 06570ec..4dcd95c 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -18,6 +18,10 @@ from pathlib import Path ### Configuration validation ############################################################################### + +configfile: Path(f"{workflow.basedir}/../test/config_lint.yaml") + + validate(config, Path("../config/config_schema.json")) @@ -64,9 +68,7 @@ rule finish: sample=pd.unique(samples_table.index.values), ), intersect_sam=expand( - OUT_DIR - / "{sample}" - / "alignments_intersecting_mirna_sorted_tag.sam", + OUT_DIR / "{sample}" / "alignments_intersecting_mirna_sorted_tag.sam", sample=pd.unique(samples_table.index.values), ), table=expand( diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index 4bf25bc..1565371 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -6,9 +6,7 @@ def get_sample(column_id: str, sample_id: int = None) -> str: """Get relevant per sample information.""" if sample_id: - return str( - samples_table[column_id][samples_table.index == sample_id].iloc[0] - ) + return str(samples_table[column_id][samples_table.index == sample_id].iloc[0]) else: return str(samples_table[column_id].iloc[0]) diff --git a/workflow/rules/map.smk b/workflow/rules/map.smk index 672b9c5..effa9a6 100644 --- a/workflow/rules/map.smk +++ b/workflow/rules/map.smk @@ -83,16 +83,13 @@ rule finish_map: rule start: input: reads=lambda wildcards: expand( - pd.Series( - samples_table.loc[wildcards.sample, "sample_file"] - ).values, + pd.Series(samples_table.loc[wildcards.sample, "sample_file"]).values, format=convert_lib_format(get_sample("format")), ), output: reads=OUT_DIR / "{sample}" / "{format}" / "reads.{format}", params: - cluster_log=CLUSTER_LOG - / "uncompress_zipped_files_{sample}_{format}.log", + cluster_log=CLUSTER_LOG / "uncompress_zipped_files_{sample}_{format}.log", log: LOCAL_LOG / "uncompress_zipped_files_{sample}_{format}.log", container: @@ -189,9 +186,7 @@ rule remove_adapters: output: reads=OUT_DIR / "{sample}" / "reads_trimmed_adapters.fasta", params: - adapter=lambda wildcards: get_sample( - "adapter", wildcards.sample - ).upper(), + adapter=lambda wildcards: get_sample("adapter", wildcards.sample).upper(), error_rate=config["error_rate"], minimum_length=config["minimum_length"], overlap=config["overlap"], @@ -283,8 +278,7 @@ rule map_transcriptome_segemehl: input: reads=OUT_DIR / "{sample}" / "reads_collapsed.fasta", transcriptome=OUT_DIR / "transcriptome_trimmed_id.fa", - transcriptome_index_segemehl=OUT_DIR - / "segemehl_transcriptome_index.idx", + transcriptome_index_segemehl=OUT_DIR / "segemehl_transcriptome_index.idx", output: tmap=OUT_DIR / "{sample}" / "segemehl_transcriptome_mappings.sam", params: @@ -650,8 +644,7 @@ rule remove_header_transcriptome_mappings: output: tmap=OUT_DIR / "{sample}" / "transcriptome_mappings_no_header.sam", params: - cluster_log=CLUSTER_LOG - / "remove_header_transcriptome_mappings_{sample}.log", + cluster_log=CLUSTER_LOG / "remove_header_transcriptome_mappings_{sample}.log", log: LOCAL_LOG / "remove_header_transcriptome_mappings_{sample}.log", container: @@ -762,8 +755,7 @@ rule sort_maps_by_id: rule remove_inferiors: input: sort=OUT_DIR / "{sample}" / "mappings_all_sorted_by_id.sam", - script=SCRIPTS_DIR - / "sam_remove_duplicates_inferior_alignments_multimappers.pl", + script=SCRIPTS_DIR / "sam_remove_duplicates_inferior_alignments_multimappers.pl", output: remove_inf=OUT_DIR / "{sample}" / "mappings_all_removed_inferiors.sam", params: diff --git a/workflow/rules/quantify.smk b/workflow/rules/quantify.smk index 422635a..15aa1e7 100644 --- a/workflow/rules/quantify.smk +++ b/workflow/rules/quantify.smk @@ -61,12 +61,8 @@ localrules: rule finish_quantify: input: - primir_intersect_sam=OUT_DIR - / "{sample}" - / "alignments_intersecting_primir.sam", - mirna_intersect_sam=OUT_DIR - / "{sample}" - / "alignments_intersecting_mirna.sam", + primir_intersect_sam=OUT_DIR / "{sample}" / "alignments_intersecting_primir.sam", + mirna_intersect_sam=OUT_DIR / "{sample}" / "alignments_intersecting_mirna.sam", intersect_sam=OUT_DIR / "{sample}" / "alignments_intersecting_mirna_sorted_tag.sam", @@ -131,8 +127,7 @@ rule filter_sam_by_intersecting_primir: output: sam=OUT_DIR / "{sample}" / "alignments_intersecting_primir.sam", params: - cluster_log=CLUSTER_LOG - / "filter_sam_by_intersecting_primir_{sample}.log", + cluster_log=CLUSTER_LOG / "filter_sam_by_intersecting_primir_{sample}.log", log: LOCAL_LOG / "filter_sam_by_intersecting_primir_{sample}.log", container: @@ -159,8 +154,7 @@ rule convert_intersecting_primir_sam_to_bam: output: maps=OUT_DIR / "{sample}" / "alignments_intersecting_primir.bam", params: - cluster_log=CLUSTER_LOG - / "convert_intersecting_primir_sam_to_bam_{sample}.log", + cluster_log=CLUSTER_LOG / "convert_intersecting_primir_sam_to_bam_{sample}.log", log: LOCAL_LOG / "convert_intersecting_primir_sam_to_bam_{sample}.log", container: @@ -203,9 +197,7 @@ rule index_intersecting_primir_bam: input: maps=OUT_DIR / "{sample}" / "alignments_intersecting_primir_sorted.bam", output: - maps=OUT_DIR - / "{sample}" - / "alignments_intersecting_primir_sorted.bam.bai", + maps=OUT_DIR / "{sample}" / "alignments_intersecting_primir_sorted.bam.bai", params: cluster_log=CLUSTER_LOG / "index_intersecting_primir_bam_{sample}.log", log: @@ -225,9 +217,7 @@ rule index_intersecting_primir_bam: rule intersect_extended_mirna: input: - alignment=OUT_DIR - / "{sample}" - / "alignments_intersecting_primir_sorted.bam", + alignment=OUT_DIR / "{sample}" / "alignments_intersecting_primir_sorted.bam", mirna=expand( OUT_DIR / "extended_mirna_annotation_{extension}_nt.gff3", extension=config["extension"], @@ -266,8 +256,7 @@ rule filter_sam_by_intersecting_mirna: output: sam=OUT_DIR / "{sample}" / "alignments_intersecting_mirna.sam", params: - cluster_log=CLUSTER_LOG - / "filter_sam_by__intersecting_mirna_{sample}.log", + cluster_log=CLUSTER_LOG / "filter_sam_by__intersecting_mirna_{sample}.log", log: LOCAL_LOG / "filter_sam_by_intersecting_mirna_{sample}.log", container: @@ -322,12 +311,9 @@ rule sort_intersecting_mirna_by_feat_tag: input: sam=OUT_DIR / "{sample}" / "alignments_intersecting_mirna_tag.sam", output: - sam=OUT_DIR - / "{sample}" - / "alignments_intersecting_mirna_sorted_tag.sam", + sam=OUT_DIR / "{sample}" / "alignments_intersecting_mirna_sorted_tag.sam", params: - cluster_log=CLUSTER_LOG - / "sort_intersecting_mirna_by_feat_tag_{sample}.log", + cluster_log=CLUSTER_LOG / "sort_intersecting_mirna_by_feat_tag_{sample}.log", log: LOCAL_LOG / "sort_intersecting_mirna_by_feat_tag_{sample}.log", container: @@ -345,9 +331,7 @@ rule sort_intersecting_mirna_by_feat_tag: rule quantify_mirna: input: - alignments=OUT_DIR - / "{sample}" - / "alignments_intersecting_mirna_sorted_tag.sam", + alignments=OUT_DIR / "{sample}" / "alignments_intersecting_mirna_sorted_tag.sam", script=SCRIPTS_DIR / "mirna_quantification.py", output: table=OUT_DIR / "TABLES" / "mirna_counts_{sample}", @@ -448,9 +432,7 @@ rule uncollapse_reads: maps=OUT_DIR / "{sample}" / "alignments_intersecting_mirna.sam", script=SCRIPTS_DIR / "sam_uncollapse.pl", output: - maps=OUT_DIR - / "{sample}" - / "alignments_intersecting_mirna_uncollapsed.sam", + maps=OUT_DIR / "{sample}" / "alignments_intersecting_mirna_uncollapsed.sam", params: cluster_log=CLUSTER_LOG / "uncollapse_reads_{sample}.log", log: @@ -474,16 +456,11 @@ rule uncollapse_reads: rule convert_uncollpased_reads_sam_to_bam: input: - maps=OUT_DIR - / "{sample}" - / "alignments_intersecting_mirna_uncollapsed.sam", + maps=OUT_DIR / "{sample}" / "alignments_intersecting_mirna_uncollapsed.sam", output: - maps=OUT_DIR - / "{sample}" - / "alignments_intersecting_mirna_uncollapsed.bam", + maps=OUT_DIR / "{sample}" / "alignments_intersecting_mirna_uncollapsed.bam", params: - cluster_log=CLUSTER_LOG - / "convert_uncollapsed_reads_sam_to_bam_{sample}.log", + cluster_log=CLUSTER_LOG / "convert_uncollapsed_reads_sam_to_bam_{sample}.log", log: LOCAL_LOG / "convert_uncollapsed_reads_sam_to_bam_{sample}.log", container: @@ -501,16 +478,13 @@ rule convert_uncollpased_reads_sam_to_bam: rule sort_uncollpased_reads_bam_by_position: input: - maps=OUT_DIR - / "{sample}" - / "alignments_intersecting_mirna_uncollapsed.bam", + maps=OUT_DIR / "{sample}" / "alignments_intersecting_mirna_uncollapsed.bam", output: maps=OUT_DIR / "{sample}" / "alignments_intersecting_mirna_uncollapsed_sorted.bam", params: - cluster_log=CLUSTER_LOG - / "sort_uncollapsed_reads_bam_by_position_{sample}.log", + cluster_log=CLUSTER_LOG / "sort_uncollapsed_reads_bam_by_position_{sample}.log", log: LOCAL_LOG / "sort_uncollapsed_reads_bam_by_position_{sample}.log", container: