From 02f185b162ed1af7d17082e98dceecb29c8a00be Mon Sep 17 00:00:00 2001 From: EC2 Default User Date: Thu, 30 Jan 2025 19:24:36 +0000 Subject: [PATCH 1/8] New index uses virus-genomes-masked.fasta.gz instead of virus-genomes-filtered.fasta.gz --- CHANGELOG.md | 3 +++ subworkflows/local/extractViralReads/main.nf | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d2217a35..2d84b7bb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +# v2.7.0.1 +- Fixing mislabeled index that causes the pipeline to fail + # v2.7.0.0 - Implemented masking of viral genome reference in index workflow with MASK_GENOME_FASTA to remove adapter, low-entropy and repeat sequences. - Removed TRIMMOMATIC and BBMAP from EXTRACT_VIRAL_READS. diff --git a/subworkflows/local/extractViralReads/main.nf b/subworkflows/local/extractViralReads/main.nf index 69877a96..6ed8dd1e 100644 --- a/subworkflows/local/extractViralReads/main.nf +++ b/subworkflows/local/extractViralReads/main.nf @@ -53,7 +53,7 @@ workflow EXTRACT_VIRAL_READS { single_end main: // Get reference paths. - viral_genome_path = "${ref_dir}/results/virus-genomes-filtered.fasta.gz" + viral_genome_path = "${ref_dir}/results/virus-genomes-masked.fasta.gz" genome_meta_path = "${ref_dir}/results/virus-genome-metadata-gid.tsv.gz" bt2_virus_index_path = "${ref_dir}/results/bt2-virus-index" bt2_human_index_path = "${ref_dir}/results/bt2-human-index" From b6f3199c69e9ef334003fd36cabf816a26524fab Mon Sep 17 00:00:00 2001 From: EC2 Default User Date: Thu, 30 Jan 2025 19:34:47 +0000 Subject: [PATCH 2/8] index-for-run-test.config was missing adapters param, and organized the format to match index.config --- CHANGELOG.md | 2 +- configs/index-for-run-test.config | 8 ++------ 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2d84b7bb..feaa07b2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,5 @@ # v2.7.0.1 -- Fixing mislabeled index that causes the pipeline to fail +- Fixing mislabeled index that causes the pipeline to fail, and added `adapters` param to the index config file used to run our tests # v2.7.0.0 - Implemented masking of viral genome reference in index workflow with MASK_GENOME_FASTA to remove adapter, low-entropy and repeat sequences. diff --git a/configs/index-for-run-test.config b/configs/index-for-run-test.config index 227a3ce2..4991c442 100644 --- a/configs/index-for-run-test.config +++ b/configs/index-for-run-test.config @@ -27,14 +27,10 @@ params { // Other reference files host_taxon_db = "${projectDir}/ref/host-taxa.tsv" contaminants = "${projectDir}/ref/contaminants.fasta.gz" + adapters = "${projectDir}/ref/adapters.fasta" genome_patterns_exclude = "${projectDir}/ref/hv_patterns_exclude.txt" - - // Kraken viral DB kraken_db = "https://genome-idx.s3.amazonaws.com/kraken/k2_viral_20240904.tar.gz" - // Smallest possible BLAST DB blast_db_name = "nt_others" - - // Pull information from GenBank or Ref Seq ncbi_viral_params = "--section refseq --assembly-level complete" // Other input values @@ -52,4 +48,4 @@ includeConfig "${projectDir}/configs/containers.config" includeConfig "${projectDir}/configs/resources.config" includeConfig "${projectDir}/configs/profiles.config" includeConfig "${projectDir}/configs/output.config" -process.queue = "harmon-queue" // AWS Batch job queue +process.queue = "will-batch-queue" // AWS Batch job queue From 482606250d98333b2112fad978b7c2b722245e34 Mon Sep 17 00:00:00 2001 From: EC2 Default User Date: Thu, 30 Jan 2025 19:38:52 +0000 Subject: [PATCH 3/8] Updated the RUN workflow test to use the new index --- CHANGELOG.md | 2 +- tests/run.config | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index feaa07b2..a1a36bcf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,5 @@ # v2.7.0.1 -- Fixing mislabeled index that causes the pipeline to fail, and added `adapters` param to the index config file used to run our tests +- Fixing mislabeled index that causes the pipeline to fail, added `adapters` param to the index config file used to run our tests, updated run test to use new index # v2.7.0.0 - Implemented masking of viral genome reference in index workflow with MASK_GENOME_FASTA to remove adapter, low-entropy and repeat sequences. diff --git a/tests/run.config b/tests/run.config index c6d68ffb..ab05c7f1 100644 --- a/tests/run.config +++ b/tests/run.config @@ -12,7 +12,7 @@ params { // Directories base_dir = "./" // Parent for working and output directories (can be S3) - ref_dir = "s3://nao-testing/index-test/output" // Reference/index directory (generated by index workflow) + ref_dir = "s3://nao-testing/index/20250130/output/" // Reference/index directory (generated by index workflow) // Files sample_sheet = "${projectDir}/test-data/samplesheet.csv" // Path to library TSV From 82f1d1c35ddffe74b679cd814dfe2e867074720f Mon Sep 17 00:00:00 2001 From: EC2 Default User Date: Thu, 30 Jan 2025 19:44:30 +0000 Subject: [PATCH 4/8] Forgot to update se test. --- tests/run_dev_se.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/run_dev_se.config b/tests/run_dev_se.config index 1846c3c8..d44e2eb6 100644 --- a/tests/run_dev_se.config +++ b/tests/run_dev_se.config @@ -10,7 +10,7 @@ params { // Directories base_dir = "./" // Parent for working and output directories (can be S3) - ref_dir = "s3://nao-testing/index-test/output" // Reference/index directory (generated by index workflow) + ref_dir = "s3://nao-testing/index/20250130/output/" // Reference/index directory (generated by index workflow) // Files sample_sheet = "${projectDir}/test-data/single-end-samplesheet.csv" // Path to library TSV From 68b30aa1a1e862ea919d096154685c640743d8d5 Mon Sep 17 00:00:00 2001 From: EC2 Default User Date: Thu, 30 Jan 2025 21:43:44 +0000 Subject: [PATCH 5/8] Updated run validation test with new index. --- tests/run_validation.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/run_validation.config b/tests/run_validation.config index 1fa40fb6..cfb39813 100644 --- a/tests/run_validation.config +++ b/tests/run_validation.config @@ -7,7 +7,7 @@ params { // Directories base_dir = "./" // Parent for working and output directories (can be S3) - ref_dir = "s3://nao-testing/index-test/output" // Reference/index directory (generated by index workflow) + ref_dir = "s3://nao-testing/index/20250130/output/" // Reference/index directory (generated by index workflow) // Files viral_tsv_collapsed = "${projectDir}/test-data/gold-standard-results/virus_hits_db.tsv.gz" From 098f9435d59685420f12625b362ef3beec0f21dd Mon Sep 17 00:00:00 2001 From: EC2 Default User Date: Thu, 30 Jan 2025 21:46:36 +0000 Subject: [PATCH 6/8] Updated changelog to reflect change in run validation as well. --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a1a36bcf..7f296043 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,5 @@ # v2.7.0.1 -- Fixing mislabeled index that causes the pipeline to fail, added `adapters` param to the index config file used to run our tests, updated run test to use new index +- Fixing mislabeled index that causes the pipeline to fail, added `adapters` param to the index config file used to run our tests, updated `RUN` and `RUN_VALIDATION` tests to use new index (location: `s3://nao-testing/index/20250130`) # v2.7.0.0 - Implemented masking of viral genome reference in index workflow with MASK_GENOME_FASTA to remove adapter, low-entropy and repeat sequences. From 9f0823891b39be718a19789785865244795ee13f Mon Sep 17 00:00:00 2001 From: Harmon Date: Fri, 31 Jan 2025 14:25:08 +0000 Subject: [PATCH 7/8] Updated snapshot, and gold standard output. --- .../blast_hits_paired.tsv.gz | Bin 1525 -> 1514 bytes tests/main.nf.test.snap | 6 +++--- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/test-data/gold-standard-results/blast_hits_paired.tsv.gz b/test-data/gold-standard-results/blast_hits_paired.tsv.gz index ba70e32fdb691d4e60de72381912b7e49cd32ece..e796d9bbd6ab04a06c779a0a63ec98164e2d74ce 100644 GIT binary patch literal 1514 zcmVB(PR4i)KBSdzrEXCntgrz`rBV`i5AOF5 z_nVt0^bXSQ+x)w2@7o{y_Cp^Q{MiR9;QQ@k1+34CR2vdSz%O6W-IM;fZy-1ffxv2Q>0RixU`Pi!vtM=0+%pGGgco3DO;^Trc7Ni5bo$`U0< zbG3W&sT7xmb9lOX`}D(wV;7RQ{>dCi48>A}gYn``v68m0*cs1aymPfPvyC|$V&ly1 z^~b&OwHs@lVSEPf7hf;??JG_rC*XAAT=0tg$2Xo`Vp8PC5raOTJ%4%ROi@Ch+>k?h zEUBc5{DMnPE;QuB2#`w&oQ=mCvbnhN4{u+E0?@wkz?4!f`2yKT8t9dfoDqC*H5w5} z*_~rLz-a@33&0dM1=Kj1;%!L)I|i=ZO3QJ?>s(1*Fxy6g1VVqT5`<08HskX?jy23C z=?sr4hAJMcI8Ak-t-XNiEJd1YaTPWinSfhUx0eg)5Su8Oc{B&p;xntqPVE_(=tUTNYEOrOKAIhvlDg;OP~3-EW6>@r2agzQYk zu1z*%az*5fB&~+@JiX;(by8XZ$u8#Z zaVgKm1j{H4BUCKiCeT@u&4@AMGYBhiByuP=4`w$*l$Q(<$wNfaee7nJRYXZV5rJM( z=RtK4n$=-GU_E0?2&Yg&g@CE3+M;ju$S*Nv6eJbGVZ^|O^^7HmWpGdcL5afLfruu+ zjMqu*6hjWh2-0NeT;~YV;w;(%5n2xC&b1ZVb$4l6XoH1le0GhjV#?_s$S%tq0(WPg zoYDfEAKMGV9FbSerYZS=qlO7kB4EvqFAK?QSz?0cu4%IC z;k(y_s3@Wk4g-i-kz;Jur81#uL^_E_lZ!Z_kO@Rd8R=7hv{3>bPMdQjHZp(14 zAla{xT@-Yo5a_H67WfvFF)3+IYY3)*ki!e>bewP0_psxxT{>`0x{~l1s|T8KVpvxU z)r^ZLc`h`?^LfrL$yeg3hmqiND8SuRK4@#ck<3?*9{-*6lSFLCe^&4DD2n9S=^s^- zf>)pW{0VZ3d)WO&xMyzns<_p)qvQWzzZI@6J^z-&3Bucq^ZoRa53?iJd$%+X_P@B@ zE5g=-2?=Gq-Ka6)-V1RfMaL*X!qes3Kc2q-CJ{%f;rTdJ?>tdqrCw(Ald@=GtJf{hF!Jj0&)U4(Uv@W-wRyeouXm5{TL0DV?)Gl@VBbGJ z>~7km_n7UWjlb`PzW&(Pk9~OI@8Mwu{Lp=@fc09DYGXjE%>&Xppd!`QfK;0UQf(fP z>dlC>&M+XYA8Dv+vm5&QV_!e^RixU`PwX!HBb4jir^(I!=9^#NyzxX%5{vbYvP8+z zTA}gYn{xx1n?wRE%da-nrVD*~Xj=v2o_^ z{fFN8+KqReVSEPnufARPJG^lkIRU2==Ym({Kfm$p5|bi7J~8O?+4IXIXNnR6<%S%y z$C65_$X{{E$%Te|d;;WB0%zm#j@S%3#~dH|20;-E<+M23FmRw>u!>jnBCFw5MRWi!Yc~i)`R1BtSICL2P zBG!P39LiSVI0e4aN9%w`%n?)EEE~)$F0AMzxoYw63#F8DwfNzQAwrfa&EjGMTu&M{ zWD?a&4wi~&aM6I=*Rh2%{xLYwHpoNfT-c|xG5}k`vIvM?0xC_!$Xvn3027Zf+ZK=y z3}P{E7AjNRtC#c3;Ld_55a%fscaFUoNg_&_{-i_7X!Y-8&hW`&Tn@4-18a>X!lJ?r zUR6uMCOKNss#~U0VD_l*WRjMc6jPAX3{);SspvnHWohGx&C=`&!7%}SgWL9cT_0add40{%{tUFYa8 zA-mAATayi$ToE~=+#-+&wmK?{(!?Lz6il9Q)q!{$X42O-$wX7s>s<75sEei$g6e|c zU#+{jm~0Do7ZGlh!SCoS32L0obi=?390eK9%VTIQn92*RU_+W^Da@DDj6v3Q6L~vkB7lt|bm>>jRon*ElL3pR} zC1YLSd0lu;cV!HJ5#5DByfNL3m|?d#v1eZer?&k8*Tg!uFxqFcBuoajd<2i^6Ri)Ti)@?lNTJjJ@ z@@y9m)udE(1%7^)IA<~XVik*p#{Y^eblFG8|AQ|8m#v4?()5(eVbkDk#+H75LWkjz z>!Dd%hWjg8e@on|nNV(~28YbV;$I0I1w2Lx5}q#K{`K_ZcZoPsP2R^x^~5q+mO*z6 zyJJypt4As_P1SRI(&ewmyvo_aX|eqqL^WAx*Grhsf7Lp diff --git a/tests/main.nf.test.snap b/tests/main.nf.test.snap index bdbbb0e9..db987675 100644 --- a/tests/main.nf.test.snap +++ b/tests/main.nf.test.snap @@ -10,15 +10,15 @@ "subset_qc_length_stats.tsv.gz:md5,032d70cbd5bfa40c163c13885447b92a", "virus_clade_counts.tsv.gz:md5,1c5712b6d4726908058cd1b5f91ce9f1", "virus_hits_db.tsv.gz:md5,5f1f53ac7aba3c241b06e71ce0abf46d", - "blast_hits_paired.tsv.gz:md5,5b6eb0055bc60be196b2a1b09006f9af", + "blast_hits_paired.tsv.gz:md5,bb5ad68a287734537a72984deeac003d", "virus_hits_1.fasta.gz:md5,1bb499ee5557f8db2ffa9ddad1481ca3", "virus_hits_2.fasta.gz:md5,c7123d3ae68212cf911b90eadf749e28", "read_counts.tsv.gz:md5,042434b274310ce5ac28391c95bd2322" ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.1" + "nextflow": "24.10.4" }, - "timestamp": "2025-01-27T14:42:02.13541549" + "timestamp": "2025-01-31T14:10:14.960119159" } } \ No newline at end of file From 96b786f63d7d5f6787f902bfac9d6f6a84de8b52 Mon Sep 17 00:00:00 2001 From: Harmon Date: Fri, 31 Jan 2025 15:22:11 +0000 Subject: [PATCH 8/8] Updating pipeline version --- CHANGELOG.md | 3 +++ pipeline-version.txt | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7f296043..426a49a5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +# v2.7.0.2 +- Updating `pipeline-version.txt` + # v2.7.0.1 - Fixing mislabeled index that causes the pipeline to fail, added `adapters` param to the index config file used to run our tests, updated `RUN` and `RUN_VALIDATION` tests to use new index (location: `s3://nao-testing/index/20250130`) diff --git a/pipeline-version.txt b/pipeline-version.txt index f225a78a..2635525f 100644 --- a/pipeline-version.txt +++ b/pipeline-version.txt @@ -1 +1 @@ -2.5.2 +2.7.0.2