diff --git a/CHANGELOG.md b/CHANGELOG.md index 70d4f552..72171997 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -# v2.8.0.0 +# v2.8.0.0 (in development) - Major changes to many parts of the pipeline as part of a general performance overhaul - Modified most processes in the RUN and RUN_VALIDATION workflows to stream data in and out rather than reading whole files - As part of the previous change, modified most processes in the RUN and RUN_VALIDATION workflows to work with interleaved rather than paired sequence data @@ -18,6 +18,15 @@ - Numerous changes to column names in viral hits TSV, mainly to improve clarity - Updated mislabeled processes +# v2.7.0.2 +- Updated `pipeline-version.txt` + +# v2.7.0.1 +- Fixed index-related issues from v2.7.0.0: + - Updated `EXTRACT_VIRAL_READS` to expect updated path to viral genome DB + - Added `adapters` param to the index config file used to run our tests + - Updated `RUN` and `RUN_VALIDATION` tests to use up-to-date test index (location: `s3://nao-testing/index/20250130`) + # v2.7.0.0 - Implemented masking of viral genome reference in index workflow with MASK_GENOME_FASTA to remove adapter, low-entropy and repeat sequences. - Removed TRIMMOMATIC and BBMAP from EXTRACT_VIRAL_READS. diff --git a/configs/index-for-run-test.config b/configs/index-for-run-test.config index 227a3ce2..4991c442 100644 --- a/configs/index-for-run-test.config +++ b/configs/index-for-run-test.config @@ -27,14 +27,10 @@ params { // Other reference files host_taxon_db = "${projectDir}/ref/host-taxa.tsv" contaminants = "${projectDir}/ref/contaminants.fasta.gz" + adapters = "${projectDir}/ref/adapters.fasta" genome_patterns_exclude = "${projectDir}/ref/hv_patterns_exclude.txt" - - // Kraken viral DB kraken_db = "https://genome-idx.s3.amazonaws.com/kraken/k2_viral_20240904.tar.gz" - // Smallest possible BLAST DB blast_db_name = "nt_others" - - // Pull information from GenBank or Ref Seq ncbi_viral_params = "--section refseq --assembly-level complete" // Other input values @@ -52,4 +48,4 @@ includeConfig "${projectDir}/configs/containers.config" includeConfig "${projectDir}/configs/resources.config" includeConfig "${projectDir}/configs/profiles.config" includeConfig "${projectDir}/configs/output.config" -process.queue = "harmon-queue" // AWS Batch job queue +process.queue = "will-batch-queue" // AWS Batch job queue diff --git a/pipeline-version.txt b/pipeline-version.txt index f225a78a..2635525f 100644 --- a/pipeline-version.txt +++ b/pipeline-version.txt @@ -1 +1 @@ -2.5.2 +2.7.0.2 diff --git a/subworkflows/local/extractViralReads/main.nf b/subworkflows/local/extractViralReads/main.nf index fb806418..4811a7da 100644 --- a/subworkflows/local/extractViralReads/main.nf +++ b/subworkflows/local/extractViralReads/main.nf @@ -42,8 +42,8 @@ workflow EXTRACT_VIRAL_READS { bbduk_suffix bracken_threshold main: - // 0. Get reference paths - viral_genome_path = "${ref_dir}/results/virus-genomes-filtered.fasta.gz" + // Get reference paths + viral_genome_path = "${ref_dir}/results/virus-genomes-masked.fasta.gz" genome_meta_path = "${ref_dir}/results/virus-genome-metadata-gid.tsv.gz" bt2_virus_index_path = "${ref_dir}/results/bt2-virus-index" bt2_human_index_path = "${ref_dir}/results/bt2-human-index" diff --git a/test-data/gold-standard-results/bracken_reports_merged.tsv.gz b/test-data/gold-standard-results/bracken_reports_merged.tsv.gz index 1a830d51..73853322 100644 Binary files a/test-data/gold-standard-results/bracken_reports_merged.tsv.gz and b/test-data/gold-standard-results/bracken_reports_merged.tsv.gz differ diff --git a/test-data/gold-standard-results/kraken_reports_merged.tsv.gz b/test-data/gold-standard-results/kraken_reports_merged.tsv.gz index f9290033..b0fe1786 100644 Binary files a/test-data/gold-standard-results/kraken_reports_merged.tsv.gz and b/test-data/gold-standard-results/kraken_reports_merged.tsv.gz differ diff --git a/test-data/gold-standard-results/merged_blast_filtered.tsv.gz b/test-data/gold-standard-results/merged_blast_filtered.tsv.gz index fabbe596..957bda37 100644 Binary files a/test-data/gold-standard-results/merged_blast_filtered.tsv.gz and b/test-data/gold-standard-results/merged_blast_filtered.tsv.gz differ diff --git a/test-data/gold-standard-results/read_counts.tsv.gz b/test-data/gold-standard-results/read_counts.tsv.gz index 382e26d8..eacfb360 100644 Binary files a/test-data/gold-standard-results/read_counts.tsv.gz and b/test-data/gold-standard-results/read_counts.tsv.gz differ diff --git a/test-data/gold-standard-results/subset_qc_adapter_stats.tsv.gz b/test-data/gold-standard-results/subset_qc_adapter_stats.tsv.gz index fff68330..d7c98a62 100644 Binary files a/test-data/gold-standard-results/subset_qc_adapter_stats.tsv.gz and b/test-data/gold-standard-results/subset_qc_adapter_stats.tsv.gz differ diff --git a/test-data/gold-standard-results/subset_qc_basic_stats.tsv.gz b/test-data/gold-standard-results/subset_qc_basic_stats.tsv.gz index 2dbff162..548c2f13 100644 Binary files a/test-data/gold-standard-results/subset_qc_basic_stats.tsv.gz and b/test-data/gold-standard-results/subset_qc_basic_stats.tsv.gz differ diff --git a/test-data/gold-standard-results/subset_qc_length_stats.tsv.gz b/test-data/gold-standard-results/subset_qc_length_stats.tsv.gz index 98a7bba5..c08544f3 100644 Binary files a/test-data/gold-standard-results/subset_qc_length_stats.tsv.gz and b/test-data/gold-standard-results/subset_qc_length_stats.tsv.gz differ diff --git a/test-data/gold-standard-results/subset_qc_quality_base_stats.tsv.gz b/test-data/gold-standard-results/subset_qc_quality_base_stats.tsv.gz index 327b596b..a7bd9b01 100644 Binary files a/test-data/gold-standard-results/subset_qc_quality_base_stats.tsv.gz and b/test-data/gold-standard-results/subset_qc_quality_base_stats.tsv.gz differ diff --git a/test-data/gold-standard-results/subset_qc_quality_sequence_stats.tsv.gz b/test-data/gold-standard-results/subset_qc_quality_sequence_stats.tsv.gz index 59d30100..7c40eddc 100644 Binary files a/test-data/gold-standard-results/subset_qc_quality_sequence_stats.tsv.gz and b/test-data/gold-standard-results/subset_qc_quality_sequence_stats.tsv.gz differ diff --git a/test-data/gold-standard-results/virus_hits_filtered.tsv.gz b/test-data/gold-standard-results/virus_hits_filtered.tsv.gz index 7c80816f..c74a57ca 100644 Binary files a/test-data/gold-standard-results/virus_hits_filtered.tsv.gz and b/test-data/gold-standard-results/virus_hits_filtered.tsv.gz differ diff --git a/tests/modules/local/bbduk/bbduk.nf.test b/tests/modules/local/bbduk/bbduk.nf.test index d0b82782..4729e2e2 100644 --- a/tests/modules/local/bbduk/bbduk.nf.test +++ b/tests/modules/local/bbduk/bbduk.nf.test @@ -35,7 +35,7 @@ nextflow_process { process { ''' input[0] = INTERLEAVE_FASTQ.out.output - input[1] = "${params.ref_dir}/results/virus-genomes-filtered.fasta.gz" + input[1] = "${params.ref_dir}/results/virus-genomes-masked.fasta.gz" input[2] = "0.4" input[3] = "27" input[4] = "ribo" @@ -89,7 +89,7 @@ nextflow_process { process { ''' input[0] = LOAD_SAMPLESHEET.out.samplesheet - input[1] = "${params.ref_dir}/results/virus-genomes-filtered.fasta.gz" + input[1] = "${params.ref_dir}/results/virus-genomes-masked.fasta.gz" input[2] = "0.4" input[3] = "27" input[4] = "ribo" diff --git a/tests/modules/local/bbduk/bbduk_hits.nf.test b/tests/modules/local/bbduk/bbduk_hits.nf.test index d9ea4c69..2dfa2dcc 100644 --- a/tests/modules/local/bbduk/bbduk_hits.nf.test +++ b/tests/modules/local/bbduk/bbduk_hits.nf.test @@ -28,7 +28,7 @@ nextflow_process { process { ''' input[0] = LOAD_SAMPLESHEET.out.samplesheet - input[1] = "${params.ref_dir}/results/virus-genomes-filtered.fasta.gz" + input[1] = "${params.ref_dir}/results/virus-genomes-masked.fasta.gz" input[2] = "1" input[3] = "24" input[4] = "viral" diff --git a/tests/run.config b/tests/run.config index 3ee11bb9..9bc5e4fa 100644 --- a/tests/run.config +++ b/tests/run.config @@ -12,7 +12,7 @@ params { // Directories base_dir = "./" // Parent for working and output directories (can be S3) - ref_dir = "s3://nao-testing/index-test/output" // Reference/index directory (generated by index workflow) + ref_dir = "s3://nao-testing/index/20250130/output/" // Reference/index directory (generated by index workflow) // Files sample_sheet = "${projectDir}/test-data/samplesheet.csv" // Path to library TSV diff --git a/tests/run_dev_se.config b/tests/run_dev_se.config index 8d7d9490..6b7cc056 100644 --- a/tests/run_dev_se.config +++ b/tests/run_dev_se.config @@ -10,7 +10,7 @@ params { // Directories base_dir = "./" // Parent for working and output directories (can be S3) - ref_dir = "s3://nao-testing/index-test/output" // Reference/index directory (generated by index workflow) + ref_dir = "s3://nao-testing/index/20250130/output/" // Reference/index directory (generated by index workflow) // Files sample_sheet = "${projectDir}/test-data/single-end-samplesheet.csv" // Path to library TSV diff --git a/tests/run_validation.config b/tests/run_validation.config index 6bd1f2ca..088b0962 100644 --- a/tests/run_validation.config +++ b/tests/run_validation.config @@ -7,7 +7,7 @@ params { // Directories base_dir = "./" // Parent for working and output directories (can be S3) - ref_dir = "s3://nao-testing/index-test/output" // Reference/index directory (generated by index workflow) + ref_dir = "s3://nao-testing/index/20250130/output/" // Reference/index directory (generated by index workflow) // Files viral_tsv = "${projectDir}/test-data/gold-standard-results/virus_hits_filtered.tsv.gz" diff --git a/tests/workflows/run.nf.test.snap b/tests/workflows/run.nf.test.snap index 9667a87a..6069541b 100644 --- a/tests/workflows/run.nf.test.snap +++ b/tests/workflows/run.nf.test.snap @@ -3,7 +3,7 @@ "content": [ "bracken_reports_merged.tsv.gz:md5,6c504fa837ef97ef2096f2569d8c6902", "kraken_reports_merged.tsv.gz:md5,84f070b42b948d36ae38eaee4a61982e", - "merged_blast_filtered.tsv.gz:md5,be7002de8c1878da615ba4379b84feab", + "merged_blast_filtered.tsv.gz:md5,b26a764f7b7271256c0d58a89b5517eb", "read_counts.tsv.gz:md5,8dc2e3ad82f42202262a5e67a9d91e1b", "subset_qc_adapter_stats.tsv.gz:md5,43a90fc81f11a57e191f10176d3b7caf", "subset_qc_basic_stats.tsv.gz:md5,98699e1e92085c89771f0a46fa54df0d", @@ -16,6 +16,6 @@ "nf-test": "0.9.2", "nextflow": "24.10.4" }, - "timestamp": "2025-01-30T14:46:04.796716034" + "timestamp": "2025-01-31T16:27:43.310277911" } } \ No newline at end of file