From d5de7ad5fb71a914333a1a51593160b7cd69dee4 Mon Sep 17 00:00:00 2001 From: EC2 Default User Date: Tue, 4 Feb 2025 15:23:01 +0000 Subject: [PATCH 1/3] Ungzipping gs files. --- .../bracken_reports_merged.tsv | 3 + .../bracken_reports_merged.tsv.gz | Bin 177 -> 0 bytes .../kraken_reports_merged.tsv | 78 +++++++++++++ .../kraken_reports_merged.tsv.gz | Bin 1504 -> 0 bytes .../merged_blast_filtered.tsv | 31 +++++ .../merged_blast_filtered.tsv.gz | Bin 1166 -> 0 bytes .../gold-standard-results/read_counts.tsv | 2 + .../gold-standard-results/read_counts.tsv.gz | Bin 85 -> 0 bytes .../subset_qc_adapter_stats.tsv | 109 ++++++++++++++++++ .../subset_qc_adapter_stats.tsv.gz | Bin 568 -> 0 bytes .../subset_qc_basic_stats.tsv | 3 + .../subset_qc_basic_stats.tsv.gz | Bin 311 -> 0 bytes .../subset_qc_length_stats.tsv | 43 +++++++ .../subset_qc_length_stats.tsv.gz | Bin 260 -> 0 bytes .../subset_qc_quality_base_stats.tsv | 77 +++++++++++++ .../subset_qc_quality_base_stats.tsv.gz | Bin 779 -> 0 bytes .../subset_qc_quality_sequence_stats.tsv | 44 +++++++ .../subset_qc_quality_sequence_stats.tsv.gz | Bin 309 -> 0 bytes .../virus_hits_filtered.tsv | 38 ++++++ .../virus_hits_filtered.tsv.gz | Bin 11079 -> 0 bytes 20 files changed, 428 insertions(+) create mode 100644 test-data/gold-standard-results/bracken_reports_merged.tsv delete mode 100644 test-data/gold-standard-results/bracken_reports_merged.tsv.gz create mode 100644 test-data/gold-standard-results/kraken_reports_merged.tsv delete mode 100644 test-data/gold-standard-results/kraken_reports_merged.tsv.gz create mode 100644 test-data/gold-standard-results/merged_blast_filtered.tsv delete mode 100644 test-data/gold-standard-results/merged_blast_filtered.tsv.gz create mode 100644 test-data/gold-standard-results/read_counts.tsv delete mode 100644 test-data/gold-standard-results/read_counts.tsv.gz create mode 100644 test-data/gold-standard-results/subset_qc_adapter_stats.tsv delete mode 100644 test-data/gold-standard-results/subset_qc_adapter_stats.tsv.gz create mode 100644 test-data/gold-standard-results/subset_qc_basic_stats.tsv delete mode 100644 test-data/gold-standard-results/subset_qc_basic_stats.tsv.gz create mode 100644 test-data/gold-standard-results/subset_qc_length_stats.tsv delete mode 100644 test-data/gold-standard-results/subset_qc_length_stats.tsv.gz create mode 100644 test-data/gold-standard-results/subset_qc_quality_base_stats.tsv delete mode 100644 test-data/gold-standard-results/subset_qc_quality_base_stats.tsv.gz create mode 100644 test-data/gold-standard-results/subset_qc_quality_sequence_stats.tsv delete mode 100644 test-data/gold-standard-results/subset_qc_quality_sequence_stats.tsv.gz create mode 100644 test-data/gold-standard-results/virus_hits_filtered.tsv delete mode 100644 test-data/gold-standard-results/virus_hits_filtered.tsv.gz diff --git a/test-data/gold-standard-results/bracken_reports_merged.tsv b/test-data/gold-standard-results/bracken_reports_merged.tsv new file mode 100644 index 00000000..0ffd0319 --- /dev/null +++ b/test-data/gold-standard-results/bracken_reports_merged.tsv @@ -0,0 +1,3 @@ +name taxid rank kraken_assigned_reads added_reads new_est_reads fraction_total_reads sample ribosomal +Viruses 10239 D 1 0 1 1.00000 gold_standard TRUE +Viruses 10239 D 41 0 41 1.00000 gold_standard FALSE diff --git a/test-data/gold-standard-results/bracken_reports_merged.tsv.gz b/test-data/gold-standard-results/bracken_reports_merged.tsv.gz deleted file mode 100644 index 73853322ebca806b6e9ccc988429c91a47141636..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 177 zcmV;i08alOiwFo8`kZF~17dPvV{2t@Uvgz|Z*p{VUu|V_XJuqAbaQq9b&EX&=gmMAKLb#>&dC_88qY9J%bt?uq_<4Tz06Ffec6DMq1!XJj(yK!afr4RDXW3I`Lg z6PUkS+}7Cf90lWO$9gBs``yu^~dC>Ql&P@*4DP`citgN7*7$#KTLgSbh9`mLLA*u#Dqvi62l~npJCbmn%bz@p?dEEQd|MK_Yipjfn8}(lSi5Sis(Kd zN=3{jAb(0n)?s~7JH&}#5sw3`2@N;d8z{ce)r+oQb=54bl=gi1L!?w(M(+69y{Xc| zWIVfE&NrZ4P(P#B*z5l0^X}6JDL0g($jzXHP?RQe&Smox*39N-(hW-yPhe)6!WGBY zdTu&DNnyVHE9>>kNjkWq4QFF2{!85_-_z)g(tXrj<> z_65Wl5@wG|H!?whfNiDSwd+7>KeMC1PIxgx;5 zJ!FRx%!prWt*axtw2d;^!YA+z;|K~Cf<+(K9bJa<7=kRb{lbr*u>z+XNqhebH+py| zSDoNj6FG6CBMKfQma4r5*m*8?X6L|#%yJa@kowIQhfFQYd`dd1JVE1?Eh~J=xko&W zd4eW8d^t2r0uPW}I+BFM(cUoLnH`_m@CyoWxWGaBaWGuztcHFf!_D5d3&LUX2Bb{g+99VyIDjkoFkd$t~S`(Q_G1U&n{~StH;kBKga}9CgCMTy2 zR|<}WjB<6R_oOEj$%v%m4dhDu~x&WgLPYI#-F1skW-t+1t83dytbnaY-(?!66IZESueBCKoS8 zWT;tkul)!Yj@Rixp|kC30^`u^UdI2Q;(+2wN~eweGeqNxQq-vvu_TE@Rz5*yM1GPI^eajCzztC$zNY<9 zu!a6;!sjxc{pcqj*YQ!6zige)KW&evZ~gRh>-P1K720O+?eeqs7~|^_8%ASmfB)9a zvO2~TFAp|ucW>WJyC{$`ZxC9$?^&z0sAE02J#m(X zTKK2>{Xb7zt4BS{DJ4lF63&RD#B(*9_v{@sLL>=2>2|8pHx(8MRQiSIi~j-NMNMbI G761So9O+8{ diff --git a/test-data/gold-standard-results/merged_blast_filtered.tsv b/test-data/gold-standard-results/merged_blast_filtered.tsv new file mode 100644 index 00000000..d012a1b5 --- /dev/null +++ b/test-data/gold-standard-results/merged_blast_filtered.tsv @@ -0,0 +1,31 @@ +qseqid sseqid sgi staxid qlen evalue bitscore qcovs length pident mismatch gapopen sstrand qstart qend sstart send bitscore_rank_dense bitscore_fraction +SRR12204734.102846486 gi|1799084385|gb|MN030571.1| 1799084385 2699435 141 9.47e-30 134 100 141 83.688 23 0 minus 1 141 3990 3850 1 1.0 +SRR12204734.102846486 gi|2571040051|gb|OR130732.1| 2571040051 3071813 141 9.47e-30 134 100 141 83.688 23 0 minus 1 141 3990 3850 2 1.0 +SRR12204734.132645528 gi|2510069797|gb|ON398705.1| 2510069797 3049538 128 2.89e-59 231 100 128 99.219 1 0 minus 1 128 4697 4570 1 1.0 +SRR12204734.136705306 gi|1799084385|gb|MN030571.1| 1799084385 2699435 140 1.53e-47 193 98 137 91.971 11 0 minus 4 140 1036 900 1 1.0 +SRR12204734.136705306 gi|2571040051|gb|OR130732.1| 2571040051 3071813 140 1.53e-47 193 98 137 91.971 11 0 minus 4 140 1036 900 2 1.0 +SRR12204734.146348965 gi|1799084385|gb|MN030571.1| 1799084385 2699435 142 7.38e-31 137 71 101 92.079 3 1 minus 1 101 6359 6264 1 1.0 +SRR12204734.146348965 gi|2571040051|gb|OR130732.1| 2571040051 3071813 142 7.38e-31 137 71 101 92.079 3 1 minus 1 101 6760 6665 2 1.0 +SRR12204734.146348965 gi|1430400470|gb|MF927778.1| 1430400470 2268895 142 2.69e-20 102 72 102 85.294 11 2 minus 1 102 9379 9282 2 0.7445255474452555 +SRR12204734.158389317 gi|1799084385|gb|MN030571.1| 1799084385 2699435 140 1.95e-56 222 99 138 95.652 6 0 minus 1 138 1669 1532 1 1.0 +SRR12204734.158389317 gi|2571040051|gb|OR130732.1| 2571040051 3071813 140 1.95e-56 222 99 138 95.652 6 0 minus 1 138 1669 1532 2 1.0 +SRR12204848.140127907 gi|1799084385|gb|MN030571.1| 1799084385 2699435 143 1.58e-42 176 100 143 88.811 16 0 minus 1 143 1854 1712 1 1.0 +SRR12204848.140127907 gi|2571040051|gb|OR130732.1| 2571040051 3071813 143 1.58e-42 176 100 143 88.811 16 0 minus 1 143 1854 1712 2 1.0 +SRR12204848.15085913 gi|1799084385|gb|MN030571.1| 1799084385 2699435 142 2.04e-36 156 72 103 94.175 4 2 plus 41 142 1938 2039 1 1.0 +SRR12204848.15085913 gi|2571040051|gb|OR130732.1| 2571040051 3071813 142 2.04e-36 156 72 103 94.175 4 2 plus 41 142 1938 2039 2 1.0 +SRR12204848.28156033 gi|1799084385|gb|MN030571.1| 1799084385 2699435 136 1.90e-46 189 99 135 91.852 11 0 plus 1 135 1633 1767 1 1.0 +SRR12204848.28156033 gi|2571040051|gb|OR130732.1| 2571040051 3071813 136 1.90e-46 189 99 135 91.852 11 0 plus 1 135 1633 1767 2 1.0 +SRR12204848.31434798 gi|1799084385|gb|MN030571.1| 1799084385 2699435 147 4.47e-53 211 100 147 92.517 11 0 plus 1 147 1180 1326 1 1.0 +SRR12204848.31434798 gi|2571040051|gb|OR130732.1| 2571040051 3071813 147 4.47e-53 211 100 147 92.517 11 0 plus 1 147 1180 1326 2 1.0 +SRR12204849.140297134 gi|2510069797|gb|ON398705.1| 2510069797 3049538 148 5.71e-67 257 100 148 97.973 3 0 plus 1 148 2798 2945 1 1.0 +SRR12204849.142401718 gi|2294264610|gb|MW678777.1| 2294264610 32630 144 9.31e-60 233 100 144 95.833 6 0 minus 1 144 2225 2082 1 1.0 +SRR12204849.142401718 gi|2571040051|gb|OR130732.1| 2571040051 3071813 144 4.33e-58 228 100 144 95.139 7 0 minus 1 144 5510 5367 2 0.9785407725321889 +SRR12204849.147306033 gi|2450664545|dbj|LC723624.1| 2450664545 2973485 146 4.37e-63 244 99 144 97.222 4 0 plus 3 146 499 642 1 1.0 +SRR12204849.16071359 gi|2784409127|gb|PQ072823.1| 2784409127 32630 138 1.51e-42 176 99 139 89.928 10 3 plus 1 137 63 199 1 1.0 +SRR12204850.120094795 gi|380719094|gb|JQ281544.1| 380719094 1163660 148 1.24e-63 246 100 148 96.622 5 0 minus 1 148 646 499 1 1.0 +SRR12204850.140730292 gi|1799084385|gb|MN030571.1| 1799084385 2699435 139 1.16e-53 213 100 139 94.245 8 0 plus 1 139 2108 2246 1 1.0 +SRR12204850.140730292 gi|2571040051|gb|OR130732.1| 2571040051 3071813 139 1.16e-53 213 100 139 94.245 8 0 plus 1 139 2108 2246 2 1.0 +SRR12204850.28709236 gi|380719094|gb|JQ281544.1| 380719094 1163660 148 3.51e-49 198 95 140 92.143 11 0 plus 3 142 4290 4429 1 1.0 +SRR12204850.8190301 gi|380719094|gb|JQ281544.1| 380719094 1163660 147 2.69e-50 202 99 145 91.724 12 0 minus 1 145 5481 5337 1 1.0 +SRR12204850.89255206 gi|1799084385|gb|MN030571.1| 1799084385 2699435 148 4.51e-53 211 99 147 92.517 11 0 minus 2 148 553 407 1 1.0 +SRR12204850.89255206 gi|2571040051|gb|OR130732.1| 2571040051 3071813 148 4.51e-53 211 99 147 92.517 11 0 minus 2 148 553 407 2 1.0 diff --git a/test-data/gold-standard-results/merged_blast_filtered.tsv.gz b/test-data/gold-standard-results/merged_blast_filtered.tsv.gz deleted file mode 100644 index 957bda37b7beecb4679b4f35b3c655be79a84194..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1166 zcmV;91abQxiwFo8`kZF~18rq;XJuqxVr*e^bYEs^Y;1bGhW(mcdx4rR=`THJ9_Chb^Z2vTR*?8c7vzB-e8UGafh4FudBP=`f2mi zpV!xc*=rVc!E@21CDFrrAF`P*Dkbh8Zuh@^^x|UQ?K5~P%(bL4&t+L#W>Co#zG15e=ztG zLpW)STzU*Fl`LCs7$3z-_OZu!n*oMeltKaTuq)PGMZ=z<$H1VO<6zi(M1f(&>!dMH ziXjObiC+>EtmmS1SDGo9kbz7xS#XxI(UC&?XT2mc|1QMLh%AJdh=(T8Vks_Af+;&G zNQ=;RuRD9B3Ok>TKzR%YhLa#{3BjLIVhk};iFpt?c}fWzy{Cjq-AS`=O8jD+b1qGZ z!wfMa{jFDmT|#DZ)fzCY?6rKvt7-B)9E`T@cb@>YB`*1ae*;Yi^UidYD8b6Vbvv7i`q~W3UyNKaZ2QGNu%K{zh*3LYlfe*#{%#77S3o^4=XoCoCI3(7p*S{0Hp1TwOo6#gDBl&BC4pTOZ71Rlb@ zmqLG+{FQRSWzrxHGlav`?n)J{mQ(~>FE!kL7>usq>#0R(wN!-CmrJKQXP6fbMK}cQ zMZ^iwN{uK2$}pc%L}*D-T4H)g1>f>HGkiSYemsN9Xn5X=MWavX1~u;QZ~xwZ_&KvA z)<$)hg2cFkO3l<%sgNWINm6O7hsv(rEczB5>7T@G(n(zsj#-wWt{J@x?7g8%TALvSEQarO`(Hm* zC((BP!vu0YNlJYp2FE%aKOM6(xrFn_lb{qFz87JF0MQW?2`?~BtOo&yIvvszgK%y{ zjLO)sVAsLd{Lu@qIER?&nhPQL0)pWNhMZdv3h7e;C|28<*6F~l+Pxjdsff&E{2WsJ zosgkmWkF*)7a$J}XZ?-k`?gBk#gQ40@kuU3idq5+R4#6>N?>R@dC@N+{Kjf345r;6 gPCdJa;NDvC7=r!|!c3!DhR|oizZm=rG(Hdj0CqARng9R* diff --git a/test-data/gold-standard-results/read_counts.tsv b/test-data/gold-standard-results/read_counts.tsv new file mode 100644 index 00000000..3a0daacf --- /dev/null +++ b/test-data/gold-standard-results/read_counts.tsv @@ -0,0 +1,2 @@ +sample n_reads_single n_read_pairs +gold_standard 330 165 diff --git a/test-data/gold-standard-results/read_counts.tsv.gz b/test-data/gold-standard-results/read_counts.tsv.gz deleted file mode 100644 index eacfb360314363d15a289c9cf3eeda8aea92f7f0..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 85 zcmV-b0IL5ViwFo8`kZF~19D|yWM5-%b#8QXE_8Et04q++Eyzjb%!@BdO-w0{FV4(M rhjZf#5;Kd6xzh7}LyiwFo8`kZF~19Np^b7gd2absU$WMOc0WpZD0bYXOJE_8Et0JWD}Zi6ro zMZev}AZ+}=tBI^aBP2>tlD6vZYx)MomT*t=VJT7OCWrmFI3 zHdU1muBod0Y)Dn*<7lcXpFC4lOUT@GQ0p{k8e$@7XozGrkKcavf;jE!Y%EMwyq8@qayJaM8$Z#MZ3HeCKM?pRc@@XKS2J(qX4B_lQvyL}I*zTj|7N%h^ zn!y+bV;M}rX?_dSG8oTbB-A((x;qN;QIJnVZ7j$~Lp~bvF_4ded@ST+As+|%IBNPp zJ}ucrzlerqdwB%zW;$YIq&K4@0>Sh6W=4_ G9smGh9}o8c diff --git a/test-data/gold-standard-results/subset_qc_basic_stats.tsv b/test-data/gold-standard-results/subset_qc_basic_stats.tsv new file mode 100644 index 00000000..b60aec9c --- /dev/null +++ b/test-data/gold-standard-results/subset_qc_basic_stats.tsv @@ -0,0 +1,3 @@ +percent_gc mean_seq_len n_reads_single n_read_pairs percent_duplicates n_bases_approx per_base_sequence_quality per_sequence_quality_scores per_base_sequence_content per_sequence_gc_content per_base_n_content sequence_length_distribution sequence_duplication_levels overrepresented_sequences adapter_content stage sample +46 150.24761904761905 210 105 4.285714285714278 31500 pass pass fail fail pass warn pass fail fail raw gold_standard +47 145.31764705882352 170 85 7.058823529411768 24700 pass pass fail fail pass warn pass fail pass cleaned gold_standard diff --git a/test-data/gold-standard-results/subset_qc_basic_stats.tsv.gz b/test-data/gold-standard-results/subset_qc_basic_stats.tsv.gz deleted file mode 100644 index 548c2f13d02114837d142eba98bc751c4f10d8eb..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 311 zcmV-70m%LziwFo8`kZF~19Np^b7gd2absU%VRLC?UvqR}baO6rb9MlgQqgY1APjtV zKPCz(3F+4$7rYFK5{gN?{r%ju&D3fydolR#>^mef6-TMs$-#**S*Ej%lpxtc=-F~e zQ=~_0Gls&DA2WW*F*vNm!XJ@|E#_S2ubW~8ZtE6Gj_i5C80z=Ryn~kAT!{aea&xNU z^g1=U7kr&a8??#f6;u7RK5#AJxYRJGU-B7UKoMRjGRzk$g|Y;ZHRLxx2K1P#IBgac zCj!P(jqGO*i&N=|?pq>_y!|M&^6@z!2|cZLe6;J5lfo z(;OvI#pJR0oi?C!-|Uqcv}yZs?Dl;J%Cs={V45d#&`J~|NVtD^t~VFuH;{kx{Q*t( JP>wDF003C5l=%Pv diff --git a/test-data/gold-standard-results/subset_qc_length_stats.tsv b/test-data/gold-standard-results/subset_qc_length_stats.tsv new file mode 100644 index 00000000..5d447a32 --- /dev/null +++ b/test-data/gold-standard-results/subset_qc_length_stats.tsv @@ -0,0 +1,43 @@ +length n_sequences file stage sample +150 158 gold_standard_interleaved raw gold_standard +151 52 gold_standard_interleaved raw gold_standard +112 6 gold_standard_fastp cleaned gold_standard +113 0 gold_standard_fastp cleaned gold_standard +114 0 gold_standard_fastp cleaned gold_standard +115 2 gold_standard_fastp cleaned gold_standard +116 2 gold_standard_fastp cleaned gold_standard +117 2 gold_standard_fastp cleaned gold_standard +118 0 gold_standard_fastp cleaned gold_standard +119 0 gold_standard_fastp cleaned gold_standard +120 0 gold_standard_fastp cleaned gold_standard +121 1 gold_standard_fastp cleaned gold_standard +122 0 gold_standard_fastp cleaned gold_standard +123 0 gold_standard_fastp cleaned gold_standard +124 0 gold_standard_fastp cleaned gold_standard +125 0 gold_standard_fastp cleaned gold_standard +126 0 gold_standard_fastp cleaned gold_standard +127 0 gold_standard_fastp cleaned gold_standard +128 0 gold_standard_fastp cleaned gold_standard +129 4 gold_standard_fastp cleaned gold_standard +130 1 gold_standard_fastp cleaned gold_standard +131 0 gold_standard_fastp cleaned gold_standard +132 0 gold_standard_fastp cleaned gold_standard +133 0 gold_standard_fastp cleaned gold_standard +134 0 gold_standard_fastp cleaned gold_standard +135 2 gold_standard_fastp cleaned gold_standard +136 4 gold_standard_fastp cleaned gold_standard +137 2 gold_standard_fastp cleaned gold_standard +138 0 gold_standard_fastp cleaned gold_standard +139 2 gold_standard_fastp cleaned gold_standard +140 4 gold_standard_fastp cleaned gold_standard +141 4 gold_standard_fastp cleaned gold_standard +142 4 gold_standard_fastp cleaned gold_standard +143 2 gold_standard_fastp cleaned gold_standard +144 2 gold_standard_fastp cleaned gold_standard +145 1 gold_standard_fastp cleaned gold_standard +146 0 gold_standard_fastp cleaned gold_standard +147 1 gold_standard_fastp cleaned gold_standard +148 0 gold_standard_fastp cleaned gold_standard +149 0 gold_standard_fastp cleaned gold_standard +150 79 gold_standard_fastp cleaned gold_standard +151 45 gold_standard_fastp cleaned gold_standard diff --git a/test-data/gold-standard-results/subset_qc_length_stats.tsv.gz b/test-data/gold-standard-results/subset_qc_length_stats.tsv.gz deleted file mode 100644 index c08544f3fedf4ff47166ddd01a0c7dae284832a2..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 260 zcmV+f0sHWo~D5XkT-5VRUmYbaQq9m6bsff-nq4 z*YPxM(}J9a=}1IpYE?kJzTL{gnYZaC?`KjTfns&uE>g$Uo-eB>TjhK$ChL~xcIMlo z*uf_!J{@JgmuYO3I^|^=uXVGfSbp0iOaA;;TgFQYEaHQ7yTm!K+e1z*Q(I=wt2o3* z$S2v~;6L9%zK8q~&Ob0dILLePh!3zo^c$exkRabdzK1;Y8wQDlSI2y*XY(lZtI)4P zzY6zPJNP`|{wnmV1M43hn?HKU2goa&KSDmS_c_A-jXmrS{YLnGZolDvK!GMWfxs8x K3SA8j2mk=j4t;h2 diff --git a/test-data/gold-standard-results/subset_qc_quality_base_stats.tsv b/test-data/gold-standard-results/subset_qc_quality_base_stats.tsv new file mode 100644 index 00000000..77ea08a7 --- /dev/null +++ b/test-data/gold-standard-results/subset_qc_quality_base_stats.tsv @@ -0,0 +1,77 @@ +position mean_phred_score file stage sample +1 33.39047619047619 gold_standard_interleaved raw gold_standard +2 32.91904761904762 gold_standard_interleaved raw gold_standard +3 34.10476190476191 gold_standard_interleaved raw gold_standard +4 34.80952380952381 gold_standard_interleaved raw gold_standard +5 34.56190476190476 gold_standard_interleaved raw gold_standard +6 36.06190476190476 gold_standard_interleaved raw gold_standard +7 35.84285714285714 gold_standard_interleaved raw gold_standard +8 34.804761904761904 gold_standard_interleaved raw gold_standard +9 35.58571428571429 gold_standard_interleaved raw gold_standard +12 35.266666666666666 gold_standard_interleaved raw gold_standard +17 34.97619047619048 gold_standard_interleaved raw gold_standard +22 34.66571428571429 gold_standard_interleaved raw gold_standard +27 34.80761904761905 gold_standard_interleaved raw gold_standard +32 34.59809523809524 gold_standard_interleaved raw gold_standard +37 34.80571428571429 gold_standard_interleaved raw gold_standard +42 34.81333333333334 gold_standard_interleaved raw gold_standard +47 34.20666666666666 gold_standard_interleaved raw gold_standard +52 34.27047619047619 gold_standard_interleaved raw gold_standard +57 34.359047619047615 gold_standard_interleaved raw gold_standard +62 34.20761904761905 gold_standard_interleaved raw gold_standard +67 34.18571428571429 gold_standard_interleaved raw gold_standard +72 33.986666666666665 gold_standard_interleaved raw gold_standard +77 34.2552380952381 gold_standard_interleaved raw gold_standard +82 34.25333333333334 gold_standard_interleaved raw gold_standard +87 34.16190476190476 gold_standard_interleaved raw gold_standard +92 33.894285714285715 gold_standard_interleaved raw gold_standard +97 34.1447619047619 gold_standard_interleaved raw gold_standard +102 34.0352380952381 gold_standard_interleaved raw gold_standard +107 34.01619047619048 gold_standard_interleaved raw gold_standard +112 33.71142857142858 gold_standard_interleaved raw gold_standard +117 34.01714285714286 gold_standard_interleaved raw gold_standard +122 34.03809523809524 gold_standard_interleaved raw gold_standard +127 33.72190476190476 gold_standard_interleaved raw gold_standard +132 33.597142857142856 gold_standard_interleaved raw gold_standard +137 33.59142857142857 gold_standard_interleaved raw gold_standard +142 33.45047619047619 gold_standard_interleaved raw gold_standard +147 33.289523809523814 gold_standard_interleaved raw gold_standard +150 29.86474358974359 gold_standard_interleaved raw gold_standard +1 34.16470588235294 gold_standard_fastp cleaned gold_standard +2 34.03529411764706 gold_standard_fastp cleaned gold_standard +3 35.00588235294118 gold_standard_fastp cleaned gold_standard +4 35.970588235294116 gold_standard_fastp cleaned gold_standard +5 35.76470588235294 gold_standard_fastp cleaned gold_standard +6 37.258823529411764 gold_standard_fastp cleaned gold_standard +7 36.98235294117647 gold_standard_fastp cleaned gold_standard +8 36.588235294117645 gold_standard_fastp cleaned gold_standard +9 37.294117647058826 gold_standard_fastp cleaned gold_standard +12 36.737647058823534 gold_standard_fastp cleaned gold_standard +17 36.72 gold_standard_fastp cleaned gold_standard +22 36.30705882352941 gold_standard_fastp cleaned gold_standard +27 36.21058823529412 gold_standard_fastp cleaned gold_standard +32 36.423529411764704 gold_standard_fastp cleaned gold_standard +37 36.31058823529412 gold_standard_fastp cleaned gold_standard +42 36.21529411764706 gold_standard_fastp cleaned gold_standard +47 35.65647058823529 gold_standard_fastp cleaned gold_standard +52 35.80588235294117 gold_standard_fastp cleaned gold_standard +57 35.65176470588235 gold_standard_fastp cleaned gold_standard +62 35.76705882352941 gold_standard_fastp cleaned gold_standard +67 35.512941176470584 gold_standard_fastp cleaned gold_standard +72 35.45529411764706 gold_standard_fastp cleaned gold_standard +77 35.511764705882356 gold_standard_fastp cleaned gold_standard +82 35.83529411764706 gold_standard_fastp cleaned gold_standard +87 35.628235294117644 gold_standard_fastp cleaned gold_standard +92 35.35411764705882 gold_standard_fastp cleaned gold_standard +97 35.69411764705882 gold_standard_fastp cleaned gold_standard +102 35.46 gold_standard_fastp cleaned gold_standard +107 35.32352941176471 gold_standard_fastp cleaned gold_standard +112 35.2574175035868 gold_standard_fastp cleaned gold_standard +117 35.36279730255108 gold_standard_fastp cleaned gold_standard +122 35.22118842215593 gold_standard_fastp cleaned gold_standard +127 35.04713375796179 gold_standard_fastp cleaned gold_standard +132 34.982791537667694 gold_standard_fastp cleaned gold_standard +137 34.84032087639189 gold_standard_fastp cleaned gold_standard +142 34.218702482306774 gold_standard_fastp cleaned gold_standard +147 34.23558341013825 gold_standard_fastp cleaned gold_standard +150 30.611648745519716 gold_standard_fastp cleaned gold_standard diff --git a/test-data/gold-standard-results/subset_qc_quality_base_stats.tsv.gz b/test-data/gold-standard-results/subset_qc_quality_base_stats.tsv.gz deleted file mode 100644 index a7bd9b0171adf4f2eb0f241b810ae40ea98d4e6b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 779 zcmV+m1N8hKiwFo8`kZF~19Np^b7gd2absU`bzy92ba`K5VRL0)b97;Jb1rmqb^x7M z(T?LV2z<_dOk}_q`)gLwHoI4;*H&$H_xoq#=*o7bNVpGqsfHQGfbsQmKR%9^^YnXN z&hOV>xApLT|G3=N>GOD6r~701s^8`J^|ZdhL^M+gQ4(z5>FaW;fX8w^EVsk^@%&hC zr*-+W9;Vy!_xrzZI1$bTOaOG@X`(oTr3V*2Hhd0+k+!c7&kfJXCg8%SiP9X#@iGz5 zIbvpk?d!tlp1<)g_yUwBfDJwR3IMPL!1V3siy$qBVl&9T8pIYLrQsYI$K za507GI|bSl45tuPK?ZvMd&@pDWn0mi#Qu+7tw zt&2Vh($9fM2U_kP@NgmI?!ov-E9yZmcBuJ0C|yDvtt1F)LZDF-A*g}?!!rp$T@Zk$ z4c-WwhG#8i=U~OnhT*XUZ8Fv@aJU$N)CVpm3-*SP9>PQrUxcV4A`j0dh^hgzT{N6t za+t8pIYo)YxwNlSL~qO}fpf-M+~rll=W>5sr;i%pTqFG0)Ot&lSWBx#c|C?bG__kk zJc$8bt9cI{8@h;vd&s%LWi;`+qDdWyPwE=U0hhMn6$^eXzCCnq=-YT+h7;3$;<0XzguQXNx>er2d<0=2JK-dWsml-e(b}$c%`RN`iNcq zV3S3C4J5Xe?r9TU#k1`)PpR_84yIZ>ZHB!|2M<0eJA*BDSDPw*@VR?G zI$m@49_^g)WM7n?*bWL0ywjIKxb9okeAj3S%1O3WJ@o?H!;>X~aJ|=Z^4v(eT~+#o zQiMX?w;{Oj*y*ueg!TDeTb>KK@XB9Tk@{p4mXbg`y$=1!S??t&z;(bUNv>NFwkKx{ zgdt0!0@)>?zc>-H1dO$ALlSWb*j}8(oGC&8%IKLlZJ(krr@BhSEVc2W?wFnm{0Afd Jy=x~E0002*fRO+I diff --git a/test-data/gold-standard-results/subset_qc_quality_sequence_stats.tsv b/test-data/gold-standard-results/subset_qc_quality_sequence_stats.tsv new file mode 100644 index 00000000..d63a802b --- /dev/null +++ b/test-data/gold-standard-results/subset_qc_quality_sequence_stats.tsv @@ -0,0 +1,44 @@ +mean_phred_score n_sequences file stage sample +14 2 gold_standard_interleaved raw gold_standard +15 0 gold_standard_interleaved raw gold_standard +16 0 gold_standard_interleaved raw gold_standard +17 0 gold_standard_interleaved raw gold_standard +18 0 gold_standard_interleaved raw gold_standard +19 1 gold_standard_interleaved raw gold_standard +20 1 gold_standard_interleaved raw gold_standard +21 2 gold_standard_interleaved raw gold_standard +22 1 gold_standard_interleaved raw gold_standard +23 7 gold_standard_interleaved raw gold_standard +24 3 gold_standard_interleaved raw gold_standard +25 3 gold_standard_interleaved raw gold_standard +26 2 gold_standard_interleaved raw gold_standard +27 1 gold_standard_interleaved raw gold_standard +28 4 gold_standard_interleaved raw gold_standard +29 5 gold_standard_interleaved raw gold_standard +30 4 gold_standard_interleaved raw gold_standard +31 7 gold_standard_interleaved raw gold_standard +32 11 gold_standard_interleaved raw gold_standard +33 10 gold_standard_interleaved raw gold_standard +34 13 gold_standard_interleaved raw gold_standard +35 16 gold_standard_interleaved raw gold_standard +36 89 gold_standard_interleaved raw gold_standard +37 12 gold_standard_interleaved raw gold_standard +38 6 gold_standard_interleaved raw gold_standard +39 5 gold_standard_interleaved raw gold_standard +40 5 gold_standard_interleaved raw gold_standard +25 1 gold_standard_fastp cleaned gold_standard +26 0 gold_standard_fastp cleaned gold_standard +27 2 gold_standard_fastp cleaned gold_standard +28 1 gold_standard_fastp cleaned gold_standard +29 5 gold_standard_fastp cleaned gold_standard +30 5 gold_standard_fastp cleaned gold_standard +31 6 gold_standard_fastp cleaned gold_standard +32 7 gold_standard_fastp cleaned gold_standard +33 7 gold_standard_fastp cleaned gold_standard +34 11 gold_standard_fastp cleaned gold_standard +35 17 gold_standard_fastp cleaned gold_standard +36 77 gold_standard_fastp cleaned gold_standard +37 14 gold_standard_fastp cleaned gold_standard +38 7 gold_standard_fastp cleaned gold_standard +39 5 gold_standard_fastp cleaned gold_standard +40 5 gold_standard_fastp cleaned gold_standard diff --git a/test-data/gold-standard-results/subset_qc_quality_sequence_stats.tsv.gz b/test-data/gold-standard-results/subset_qc_quality_sequence_stats.tsv.gz deleted file mode 100644 index 7c40eddcc0ae2ce69751459ed7dcbd9da10a9439..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 309 zcmV-50m}X#iwFo8`kZF~19Np^b7gd2absU`bzy92ba`KMWpQ<7ZewL%b97;Jb1rmq zb^w)>L2kq#3`N(T(*#TcIZcET&vc{=lS0~Fzuq=fmi{K2ya$+X^O9Va#C?_KFLmo3+Ad6rY^F8NzFs`=;d-Pj2Rj?)*ze+JQSLD7(hX%;+oz)xAUM?m7+>F6W8cWJa!{w;t4xU ze>)b={oR%uZ|-wlq5_wSvFhknXTUSoyUpD04S_Wt!@G5Lg*izWL{*IW26#oMrG HR0se7IZl>F diff --git a/test-data/gold-standard-results/virus_hits_filtered.tsv b/test-data/gold-standard-results/virus_hits_filtered.tsv new file mode 100644 index 00000000..8cc91bee --- /dev/null +++ b/test-data/gold-standard-results/virus_hits_filtered.tsv @@ -0,0 +1,38 @@ +seq_id bowtie2_genome_id_best bowtie2_genome_id_all bowtie2_taxid_best bowtie2_taxid_all bowtie2_fragment_length bowtie2_genome_id_fwd bowtie2_genome_id_rev bowtie2_taxid_fwd bowtie2_taxid_rev bowtie2_fragment_length_fwd bowtie2_fragment_length_rev bowtie2_best_alignment_score_fwd bowtie2_best_alignment_score_rev bowtie2_next_alignment_score_fwd bowtie2_next_alignment_score_rev bowtie2_edit_distance_fwd bowtie2_edit_distance_rev bowtie2_ref_start_fwd bowtie2_ref_start_rev bowtie2_map_qual_fwd bowtie2_map_qual_rev bowtie2_cigar_fwd bowtie2_cigar_rev query_len_fwd query_len_rev query_seq_fwd query_seq_rev query_qual_fwd query_qual_rev bowtie2_length_normalized_score_fwd bowtie2_length_normalized_score_rev bowtie2_length_normalized_score_max bowtie2_pair_status kraken_classified kraken_assigned_name kraken_assigned_taxid kraken_assigned_host_virus kraken_length kraken_encoded_hits bbmerge_frag_length sample +SRR12204734.102846486 NC_001943.1 NC_001943.1 1868658 1868658 370 NC_001943.1 NC_001943.1 1868658 1868658 370 370 121 146 45 14 17 16 3915 3672 12 12 21S120M 7S101M1D3M1I26M5S 141 143 AATGGGCCGAACAAGGATCTCTGGAAGACTTATGACACCGTAGTCTATGGAGATGATAGGCTCTCCACCACACCTTCAGTGCCAGACAATTATGAAGAAAGAGTAATTGCCATGTATAGAGACATCTTTGGCATGTGGGTT ATTTAGGCACATTAAAGAGATTAGATGGAATTTCATAAACAAAGACCAACGTGAAAAATACAGACATGTGCATGAATGGTATGTTGACAACCTCCTCAATCGTCATGTCTTATTACCATCTGGTGAGGTAACCGTGCAAACAC FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF FF:FFFFFFFFFFFF:FFFFFFF:FFFF:FFFFFFFFFFF,:FFFFFFFFFFFFFFFFFFFFFFFFFF:FFF:FFFFFFFFFFFFF,FFFFFFFFFFF:FFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFF:F,F 24.45056997718949 29.418611880330797 29.418611880330797 CP False unclassified 0 0 285 0:107 A:34 0:51 1239565:5 0:54 NA gold_standard +SRR12204734.122670570 NC_007574.1 NC_007574.1 36427 36427 479 NC_007574.1 NC_007574.1 36427 36427 479 479 162 162 24 10 19 17 353 19 17 17 145M 136M5S 145 141 TACATATTATAAAGATAATGAATTTGTTGTTAGTGATGAATTTTGGTTACATACTAATATAAATGAGTTAATACCATATAAGTTACTATATTATGAACGGGGATTGAGAAAATTATATGATGGTAAAGAGTACATATTGTATAAT GCAATGCGTTCATTATAATTCCGTTCACGGGTGTAAGGAGGGTATTGGGGCTTTTTGAACGCGGGAATCACTTGAATTTTGCAGACACCTACGTATATACGTGGAATCAACAGTATTCATACCATGAGAATGCATTATTAA F:FFFF:FFFF,FFFFF,FFFFFF:FFFFFFFFFFFF,FFFFFFFFFFFFFFFFFFFFFFFFFFFF,F:FF,F:,FFFFFF,FFFFFFFFFFFFFFF:FF,FFFFFF,FFFFFFFFFFFFFFFFFF::FF:FFF:F:F:FFFF,F FFF,F:FF:FF,,F,F,F,FF,FFFF,,,FFFFFF,,FF::F,F,FFFFF,FF,F:F,:FFFF,FF,FFFFFFFFF:,FF:F:FFF:FFF,F,,,FF,,FFFFFFFFFFFFFF,,:F:FFFFFFF,FFFFFF,,FF,FFFF 32.55147017794984 32.73547385375783 32.73547385375783 CP False unclassified 0 0 287 0:111 A:34 0:108 NA gold_standard +SRR12204734.132645528 NC_019028.1 NC_019028.1 1247114 1247114 458 NC_019028.1 NC_019028.1 1247114 1247114 458 458 76 15 221 14 12 1 4598 4358 2 2 29S80M19S 119S10M13S 128 142 ATGCTGCCTCTCCAGCCCTTGCTACACTCACACAACATAAAGATGACAGTGAGACCACAAAAACACTTGCAATTAGTTCTGATGGTGCTAATGAACCAGTTGAGATGCTAATACCTGTGAATGAATGG TATGTAGGCTCAATTTGCTAGAAGTCAGACATCAGCCTTTGGTGTTCAACTCAGCTGTGAGTGGTACTGAATCTCGTGTAAGTCTTAACAAGACTACTGGTCCCACTCTAAATTCATGTTCTGGACTTGCAGCTCTAATTCA FFFFFFFFFF,FFF:FFF:FFFFFFFFFFFFFFFFFFFFFF,FFFF:F:FFFFFFFF:F,:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFF,:FFFFFFFFFFFFFFF:FFF FF:F,F,:FFFFF,::F,:,FF,F,,FF:F:::FF,F:,,,FFF,:FF:FFFF:FF:FFFFFFFFFFF:,F,:FF,,FF,:FFFF::FFF,FF,:FFFFF:FFFFFF,F,:FFF,FFF,F,FFF,F::F:FFFF:,FFF,FF 15.663546158223031 3.0267400023560063 15.663546158223031 CP True Astrovirus MLB1 568715 1 271 568715:9 0:9 568715:3 0:52 568715:1 0:6 568715:3 0:2 568715:3 0:6 A:34 0:109 NA gold_standard +SRR12204734.136705306 NC_001943.1 NC_001943.1 1868658 1868658 264 NC_001943.1 NC_001943.1 1868658 1868658 264 264 191 199 217 16 12 10 899 784 18 18 137M3S 9S134M3S 140 146 TTACACTTGCAACATCCCATTTGTCTGGTTTTAGGATGGCTGTTTTACCCACAATTCCATTCCACACCACTATGACTTTGTGGGTTATGAACATGCTTATGGTTTGCTACTACTTTGACAATTTGTTATCAATAACAATG CCTAGGGCACATTGGTTAAGAACTGTTTTCTATTACATCCATTATTATGAGATGTGGAATATTTTTATGTTTGTTCTTGCAATTGGCACTGTCATGAGAAGCGCCCGCCCCGGCACAGACTTGATTACACTTGCAACATCCCATTT FFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFF FFFFFFFFFFFFFFFFF:FF:FFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFF:F:FFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFF 38.65111711161559 39.93092053718007 39.93092053718007 CP True Mamastrovirus 1 1239565 1 264 0:45 1239565:5 0:3 1239565:39 0:8 39733:1 0:4 1239565:1 0:5 1239565:3 0:3 1239565:5 0:1 1239565:8 0:6 1239565:7 0:28 1868658:1 0:5 1239565:1 0:5 1868658:5 0:10 1239565:8 0:23 264 gold_standard +SRR12204734.146348965 NC_022249.1 NC_022249.1 1389204 1389204 242 NC_022249.1 NC_022249.1 1389204 1389204 242 242 140 16 152 14 7 0 6677 6617 2 2 47S37M1D3M4I1M1I49M 91S8M44S 142 143 ATCAAGCACGCAAAGTAGGCGATAATCATGACCTACAAGCTTCAGGAAGCCGCGGCCACGCCGAGTAGGATCGAGGGTACAGCTTCCTTTTCTTCTTTTCTGTCTCTGTTTAGATTATTTTAATCAACATTTAAAATTGATT TTACTTTGAGATGCTTACTTCTCTGCCACAGGCCGGGGCAGCACAATTTGACCAAAGTGAACAAGCAGTAACATTTCATGATAGCCCTGAACAAGCACGATTAAGTGCTGAAGAGACAGATAGTGACTTCGAGAGTACTGAGG FFFFFF:FFFFFFFFF:F:FFFFF,FF:FFFF:FFFFFFF::FF:FFFFFFFF:FFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF,F,FFFFFFFFF:FFFFFFFF,:FF,,FFFFFFFFFFFFFFFFF,F ::FFFFFF:FFFFFF,FF,FFFFFFFFF::FFFF:,FFFFFFFFFF,FF,FFFFFF:FF:FFFFFFFFFFF,FF:FF,::FF,F:,FFFFFFFFF::,,,:FFFFFFFFFFFF,FF,FFF:FFFF:FFF:F:FFFFFFF,FFF 28.249573355322724 3.2239574663376214 28.249573355322724 CP True Feline astrovirus 2 1389204 1 286 1389204:15 39733:2 1868658:3 0:33 39733:6 2732408:5 1141625:2 0:42 A:34 0:110 NA gold_standard +SRR12204734.158389317 NC_001943.1 NC_001943.1 1868658 1868658 410 NC_001943.1 NC_001943.1 1868658 1868658 410 410 220 113 210 18 8 24 1531 1259 14 14 2S138M 126M8S 140 134 TTCGGTACATGCCCGAAAAGGATATAGCATTCATAACTTGCCCTGGTGATTTGCATCCAACAGCAAGACTAAAATTATCAAAGAATCCAGATTATAGTTGTGTAACAGTTATGGCTTATGTGAATGAAGATCTTGTGGTT GTGATGCTAATGGGAAGTTTGTTGCCACTGTACCAACTATTATCAAAAATGTTGCATTTGAATTCTTACAGAAGCTTAAAAAGTCAGTTGTGAGATTTTTAGTCAATGAGTTTGTAGTTATAAAACAATTTGCT FFF,:FFFFFFFF,,FFFF:FFFFFFFFFFFFFFFFFFFFFFFFF,FFFFFF:,FFFFFFFFF,FFFFFFFFFFFFFFFFFF,FFFFFF:FFFFFFFFFFFFFFFFFFFFFFF:FFFFF:,FFFFFFFFFFFFFFFFFFF ,FF,:F:F::FF:,F:F,FFF::FF,F,FF:F,:FF,FF,,:F,::::,FFFF:::FFF,F,FF:F:F:,F,FFFF,:FF:,FF,::,FFF:FFF,,F,,F,,FFF:FF,FFF::F:FFFFF:F,,FFF,FF:: 44.51961133275095 23.071395679608088 44.51961133275095 CP True Human astrovirus 1868658 1 275 0:1 1239565:1 0:16 1868658:1 0:35 1868658:5 0:34 1868658:2 0:5 39733:1 0:5 A:34 0:58 1239565:5 0:38 NA gold_standard +SRR12204734.194975304 NC_035475.1 NC_035475.1 2021738 2021738 240 NC_035475.1 NC_035475.1 2021738 2021738 240 240 97 133 20 21 15 9 924 996 12 12 24S101M6S 2S95M49S 131 146 ATGTTGCTCTTGAGCCATGTCATTCTGTTCCTGCGCTTGAGGATGAGGAAGCCCTGGTAGTGTGGAGTGCCGTTCTCACCCACTTCCTGCTGCACTATGAGGTATTCAAGGTGCTCCCTCTGCTCATCGTT CTATGAGGTATTCAAGGTGCTCCCTCTGCTCATCGTTCTCCCAGAACTTGTCTTCGTCTTTAGGGTTGTTGATGGTGAAGCACAACCGCTTTGCTGCTCCTTCTGCAACTGGCATCGTGTCGTTCATTCAGATAAAGAATTAGGAT FFFF:FFFFFFFFF,FF:FFFFFFFFF:::FFFFFFFFFFFFFFFFFFF,FFFFFFF:,FFFFFFFFFFFFF:FFFFFF:FFFFFFFFFFFFFFFF,FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF F,,F,FF,FFFF,F,:FFFF:,,FF,,F:FF,F,FFFF,FFFF:FF,FFFFF,FF:FFF,:FF:F,F,F,FFFFFFFF::FF,,FFFF::FFF,FFFFFFFF,FFF,F,F:FFFF::F,FFFF:,FFFF,::,FFFFFF,FFFFF: 19.89663055039337 26.687499655502258 26.687499655502258 CP False unclassified 0 0 240 0:140 2021738:5 0:61 240 gold_standard +SRR12204735.74874701 NC_029052.1 NC_029052.1 1776109 1776109 408 NC_029052.1 NC_029052.1 1776109 1776109 408 408 235 233 None 19 8 7 5619 5357 18 18 1S145M 1S141M 146 142 CTAGATAACAAACAAAAAGTTTGTTTGCTCAATACAATGAGTTTTGACAAACATGTTTCTCCAGTTGTTTATGGTGATGATAACATATTAAATGTAAGTGATTATATTAGTGATGTTTTCAATCAGCAAACTTTAACAGATGCGTT AAATACTCAAGTATTATGGTTAGTTTATGAGATTATTGAAAATTTCTACAAGCAGTATGACAAGAATTATAGTGATAAGGATGCAAAGATTCGTTATTCATTGTGGCTCCATATAGTGAATTCAATTCATGTTTATGGAGAT FFFFFFFFFFFFFFFFFF:FFFFFFFFF:FFFFFFF:FFFF:FFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF FFFFFFFFFFF:FFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF,FFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFF 47.15460465445888 47.01536136992996 47.15460465445888 CP True Goose dicistrovirus 1776109 1 289 0:1 1776109:1 0:24 1776109:4 0:15 1776109:5 0:33 1776109:5 0:2 1776109:3 0:13 1776109:6 A:34 0:24 1776109:2 0:9 1776109:4 0:7 1776109:1 0:1 1776109:2 0:35 1776109:1 0:11 1776109:1 0:11 NA gold_standard +SRR12204847.194714938 NC_001943.1 NC_001943.1 1868658 1868658 453 NC_001943.1 NC_001943.1 1868658 1868658 453 453 17 138 16 144 1 20 5091 4661 2 2 124S12M11S 136M1S 147 137 ATTGGCTAGTCAAAGGTGGATGGGGGTTTGTCAAGCTGATTGCAGGTAGAACAAGGAATGGTACGCGTAGCTTCTATGTGTATCCAAGCTACCAGGATGCGCTGTCTAATAAGCCAGCTCTTTGCACTGGAGGGCTTCCAAGTGGGC GTGGAACAACCGAGATTGAGGCGTGTATTCTCCTCAATCCGGTACTTGTTAAGGATGCTACAGGGAGTACTCAATTTGGACCAGTGCAGGCGCTAGGGGCGCAGTATTCAATGTGGAAGCTGAAGTATCTTAATGTG F,F,FF:FFFF::FFFFFFFFFF,:FFFFFFFF,:F:FF:FFFFFFFFFFFFFFFFFF,,F:FFFFFFFFFFFFFFFFFF:FFFFFFF:FFFFFFF,FFF,F,FFFFF:FFFFFFFFFF:FFFFFFFF:FFFFF:FFFFF::FFF:F F,:,F:FFF::FFFFFFFF::FF,:FFFFFF,,FFF:FFFF:FFFFFF,FFFF,FFFF,,:FF:F:F,,FFFF:,:FFFF:,FF:FFF::FFF,FFFFFFF:FFFF::FFFF:F:FF::F:FFF,:F:,FF:::FF, 3.4065183136705377 28.048889229539444 28.048889229539444 CP False unclassified 0 0 285 0:113 A:34 0:104 NA gold_standard +SRR12204848.140127907 NC_030922.1 NC_030922.1 1239565 1239565 170 NC_030922.1 NC_030922.1 1239565 1239565 170 170 230 219 14 149 8 9 1626 1599 18 18 143M 141M 143 141 GCAGTTCGCACCCAAGATGGGATGTCGGGTGCACCAGTTTGTGACAAGTATGGTCGAGTATTGGCAGTTCATCAAACGAATACTGGATATACCGGAGGGGCTGTTATAATAGATCCAGCAGATTTCCATCCAGTGAAAGCCCC ATTGTACATGGCAACACCCTCTCATATGCAGTTCGCACCCAAGATGGGATGTCGGGTGCACCAGTTTGTGACAAGTATGGTCGAGTATTGGCAGTTCATCAAACGAATACTGGATATACCGGAGGGGCTGTTATAATAGAT FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:F:FFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFF 46.34438857860331 44.2535109504504 46.34438857860331 CP True Mamastrovirus 1 1239565 1 170 0:2 1239565:2 0:51 1239565:1 0:18 1239565:1 0:7 1239565:2 0:16 1239565:3 0:1 1239565:5 0:11 1239565:5 0:5 1239565:2 0:4 170 gold_standard +SRR12204848.15085913 NC_030922.1 NC_030922.1 1239565 1239565 369 NC_030922.1 NC_030922.1 1239565 1239565 369 369 123 12 110 23 1 0 2227 2028 0 0 75S65M 105S6M31S 140 142 CTGGCTACCCTGACTATGATGATGAGGATTACTATGATGAAGATGATGATGGATGGGGAATGGTTGGTGATGATGTAGAATTTGATTATACTGAAGTAATTAATTTTGACCAAACAAAACCAACTCCTGCCCCGAGAACA GCCCAATTGAATTCCGCTGTTGAAAACCCAGTAACTGCCATTACACAACAACCTGTCGTTGCACTAGAACAGAAAAGTGTTAGCGATAGCGATGTGGTTGACCTTGTCAGAACTGCAATGGAACGTGAGATGAAGGTGCTGC FF:FFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFF:FFFFF:FFFFFFFFFFFFFF,FFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFF:FFFFFFFFFFFFFFFFFFFFFF FFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:F,FFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFF::FFF:FFFFFFF:FFFFF, 24.890509972401667 2.421392001884805 24.890509972401667 CP True Mamastrovirus 1 1239565 1 283 0:6 1239565:13 39733:3 1239565:8 39733:2 0:74 A:34 1868658:9 39733:5 1868658:1 0:94 NA gold_standard +SRR12204848.28156033 NC_030922.1 NC_030922.1 1239565 1239565 442 NC_030922.1 NC_030922.1 1239565 1239565 442 442 164 234 16 None 15 6 1855 1547 17 17 134M 136M 134 136 CACAACAACCTGTCGTTGCACTAGAACAGAAAAGTGTTAGCGATAGCGATGTGGTTGACCTCGTCAGAACTGCAATGGAACGTGAGATGAAGGTGCTGCGTGATGAAATTAATGGTATACTTGCACCATTCCTA CACAGTAATGGCTTATGTGAATGAAGATCTTGTGGTCTCAACCGCAGCAGCTATTGTACATGGCAACACCCTCTCATATGCAGTTCGCACCCAAGATGGGATGTCGGGTGCACCAGTTTGTGACAAGTATGGTCGA ,F:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFF::FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF FFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFF:F:FF,FFFFFFFFFFFFFFF,:FFFFFFF:FFFFFFFF:FFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFF,FFFFFFFFF 33.484149481909085 47.63208599883977 47.63208599883977 CP True Human astrovirus 1868658 1 271 0:11 1868658:5 0:6 1868658:17 0:9 1868658:1 0:51 A:34 0:15 1239565:3 0:1 1239565:5 0:11 1239565:5 0:5 1239565:2 0:54 1239565:2 NA gold_standard +SRR12204848.31434798 NC_001943.1 NC_001943.1 1868658 1868658 235 NC_001943.1 NC_001943.1 1868658 1868658 235 235 216 225 None 18 8 9 1278 1179 18 18 136M 144M3S 136 147 TGTGGCCACTGTACCAACTAGGATAAAAAATGTAGCATTTGACTTCTTTCAGAAGCTGAAACAGTCAGGGGTGCGTGTTGGAGTCAATGAATTTGTTGTCATAAAACCAGGTGCATTATGTGTTATAGACACTCCT CACGTCATTGTTTGTTGTCATACTAACCTGTAGGTTTATACGTATGGCAACAGTTTTTATTGGCACCAGGTTTGAGATCCGTGATGCTAACGGGAAGGTTGTGGCCACTGTACCAACTAGGATAAAAAATGTAGCATTTGACTTCTT FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF FFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFF 43.9680793835444 45.08627179858065 45.08627179858065 CP True Human astrovirus 1868658 1 235 1868658:12 0:92 1868658:3 0:16 1239565:2 0:63 1868658:8 0:2 1868658:3 235 gold_standard +SRR12204848.39902754 NC_022249.1 NC_022249.1 1389204 1389204 277 NC_022249.1 NC_022249.1 1389204 1389204 277 277 23 103 18 133 1 23 4486 4337 2 2 30S15M97S 16S52M1D6M2D71M 142 145 TCCATACTTTGGGTAGGACTATGTCATCCTACCAAAACCAGCAATTTACCGGAGGGCTGTTTCTTGTGGAGCTTGCCTCTGAGTGGTGCTTTACAGGCTATGCTGCAAATCCAAACCTTGTTAACTTAATGAAGTCGACCGA ATCTAGGCCTCGATCTCAATCCAGAGGCCGAGACAAATCAGTCAAAATTACAGTTAATTCCAGAAACAAAGGTAGAAGACAAAACGGACGCAACAAACATCAATCTAATCAACGTGTCCGTAACATTGTCAATAAAAAACTCAGG FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF,FFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFF FFFFFFFFFFFFFFFF,FFFFF::FFFFF:F:F,FFFF,FFFF:FFF:FF,FFFFFFF:FFFF:FFF:F,F,FFFFF,::FF,,FFFF,FFFF::FFFFF:FFF:FFF:,FF,F,FFF::FFF,F,F,FFFFFF:F,FFFFF:F, 4.641001336945877 20.696305113140944 20.696305113140944 CP False unclassified 0 0 288 0:108 A:34 0:112 NA gold_standard +SRR12204848.58209445 NC_030922.1 NC_030922.1 1239565 1239565 458 NC_030922.1 NC_030922.1 1239565 1239565 458 458 165 229 18 159 13 7 2868 2538 17 17 1S128M 138M 129 138 CCCATTATTAGGTCATTTACCAATCAATAGACCCATTTTTGATGATAAGAAACCCAGGGATGATCTCCTTGGTCTACTCCCAGAACCAACCTGGCATGCTTTTGAGGAATATGGACCAACTACATGGGG TTTATCTTGCCAGCGCCACACCGACTAACTAAAGCTGATGAAATAGTTCTTGGATCAAAAATTGTTAAGCTTAGGACGATTATTGAAACAGCCATAAAGACCCAGAACTATAGTGCACTACCTGAAGCTGTGTTTGAG FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF F:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFF:FFFFFFFFFF:FFFFFFFFFFFFF:FFFFFFF,F,FFFFFFFFFFFFFFFFFFF:F,FFFFFFFFFF,FF:FFFFFFFFFFFFF,FFFFFFFFFFF 33.951927825838055 46.47619437372113 46.47619437372113 CP True Mamastrovirus 1 1239565 1 268 0:71 1868658:5 0:19 A:34 0:1 1239565:5 0:17 1239565:1 0:35 1239565:5 0:1 1239565:10 0:30 NA gold_standard +SRR12204849.101995294 NC_025675.1 NC_025675.1 1562064 1562064 421 NC_025675.1 NC_025675.1 1562064 1562064 421 421 221 179 None 18 4 13 6033 5737 18 18 125M 135M 125 135 GCTGGAGCACACGGGATAGGCATGGCTGCCATCATCTCGCAAGAGATGCTGGAGGCTGTGTTTACACTCGGTGAGTTCCAAGGTAAAATACACACAGTGAAAGAACACCCCTACATCTATACCCC TGAATTGGGCAAAAACACCAGGTAGACAATTCAGAGAGATCTCTGGATACTTTCCTCGCGGCAAAGAAGGACATTTCAAACTGAGCCCTGCCGCAAAAATTACAGGGGTAGTGGCCCATCAAAATCCCAGCTTTA FA-JJJJJJJJJJJFA@a6W@&76WpZU?E_8Et0L6V>liN6!67&M`s3#>&zqC4c36+_>G>bGN4VNyJ;L+*58r-x|KaPmr%yj_Pu(6_pY!GUuRl=p zmv5W?OY6Gt$EUyj@b_Pze%W-L8`nL5{P5G$mrXacaG$^b`uyep@}E$SFMG?euQZvK zW4yNFC)8Q!$fEsDPaL>#~rOPyB z>O({EnKZ1NOP_h8)2V@vP4sc%7V1N%Q)7`vx`}COsFMuL2Pd4#ABtR;4^n%QJLLiB zjc$yuK*tiU%R^kmG(p&N!6O+WadvBSaBJT<28Tj$z1#A{9Y z- zZy@!r`t8OWuO0WLU%Tslk8{(NQI5w7iDSKorVhy`?neB>xw~B* zX(y-dmRsgCcE@>V9)CCL$VWLOBkV42WVbucUw3r@`Pevn2*LQ+nivUUz2}zb0?qe6 zIGU}!yV7zCVv!+cOof?*`|!5Dyv42|?y^E^ z!d}!m&>=_1EdLuG`}V;Mge>}H=%b^LbW_ILhG!Jh3kZom@a*LIn0U?4LI4$jMW$o{ zfn*^M%w~`l<3Qn4vZ9s81i-Y7Cwl?O2+=gcG@wL)J2PuBXDZY2O#$HoG`X*WL5e^% z%TSr=NdPGPB%I7>7lz7NzK<*m;4n}%Zoc3UA6fC%ZoIbUZw`;k?3*X?tFO7&i~hXd zfFbnQj-hhgVgAeIj*3`8!tS_m*oJ^GfItDqfuTC=v3`Y}wksZY-0`R*$N`#Jhpr=p zbSCuHy4KMGe`tv-eA~9xg|2NocdZxUX!`Q&bCbS)`|?@D#CH$>=U+_|x<1l$HI0pR z=Qt3UhIhK-n5}HxGql&O583rjw?J`t4T?-}uDxHfBTFJCP6czH78!oyiyxRZursEsG%6E@8u0M2v z$cd=I5DddWD$QYV?Os9@S%Xm-anI*%78-FWTR;^^FXd?|)X-{yGSblpm` z3#*QohcDI+-2JW+{c4EaZYwhE79nL8#pffWWcCM|qk(`Kg0nV~^5qVep%Oi@wTqpl zsd=SWT@=RWUp{<#ez#f}>@wf!#IEigP0$s<&d?ORJ8kaX+32iq8L=d~k}4ReD=ijw zCU7Fk+XNnZx|k#Q+)K*d+;1APui;O z9(v_(=Nq1#P#A^Q7&eSOnd7R(fz`6wOrZZw8Jb=dS5*&%okorVlp;n?fDd?GDZR?l zVt*12CZs|&h7c|bgKEtg6bu8`6#v%cD@@gRd9Cdb;S6Q0JR&n}xY`i}NGjue`oQ6nI@-+lY0zPt@2UOPf5?HN{nhh`w44ntS1w_eP)-6Mf@2oIih zUk9VFPIUoGQoE`ERD@L~vw}mS@G7ecWSND;ipw|>$u>g_*Yl=Q1@NMVn*>!jXRst+ zhfi$o>Lo1@yJ2(9@>>LRpc$%mQohY{Wu4?wFIGq*m#ej6IfUFj$dfW4wQ?=arPIj-_(~k(&uS&&7VGhdS=^fPl9Xc=tFQJR{ix6 zwpw~K4}vI`R>f!o^-(QDN;#7ncGAKXP&?sqnAGlszB-{N9SU~Nq*fi}!P)RD(rbc& zC<#hHj%uN8;Mx#;sgeq#@vN_kjTVGERY_X<78s8z@5L*IQ-`MoY=thqvE1)oe)=nF$x;4fSb`*#P|z;DjYGa z(N0iYA?{9)nZ=Bi6_*8StXQzbkDB2j6>(RzUmSzA=E?T6 z%BS`7Y%Qzr>PEe{FK^bdFsnKyQ?GbU*@83-m3qBn#ueIi;v$tfHt|%;GP|MfiKHh& zrTSk)ZBrs-Z%9e%pv37=0&@Xjs@BSr9}{^}%}yv@zy%dBLwiLWf!J{y_)4>Ar{Wxf zV@dp~|LwQ>dtwm&OX*Kv|FoEhm#;g_%CvMJocFHpL+rcOk$Oa0rzJ^`m?yNJey((R z-CoRdS?E%gM!Stg5K8MeyK&RkbQ_cLHj2Sq1T}2V^m6SRI1X(e2IKCACP^J{>dPDL ziw?@RB>Kb)jO7#=$opIdr=aHjfySb{E~v@EvHq;fD6(P?Q(Wj>@l3Zh&@ zXK?5=Rf)N>j!4tfVwM#-9hM-`Uo*&%+M6O6t;QhGD`aKSNyU^^;Qmn;G5or@DCuGx zSiumikzwvtKM`&M{14BP&}7lLB>ko^1m`7u=lig{YSz6=?)+Yd?{sQ5on9N}N=U_P z*S&FceOqek=1`)Z)Vg*UB59DO>u9nayftpHB-Qma@!N2%(;c3l-Zg8#jH%XKw{o~t zvoFEegHfwRY0zU~V}Tl8AzRy>4brsV0LLCnon0rbYi zzG_GM-LLP{>gDB*i`i&-?T(kdU!HdBwT#y|5?6aq(wpzi&VOrRgnwWh1y+8x*oV zL}VMOqlvSKEO_;ZtZerdFfUJSXgUsE-^Ri9F0fkZ!=ddvLL<%f(6>w}FIw(_GtK51tYR$M z`b}3dFdyyQe6)uts`izUZoqOFS{sAEKYAg?H}#c`#N5^ad``X`actkwAI~Aw!L^Sj zJPeT#**ymPVE)iO*gr&3W35E{!hVVkJ0 zki(10+5DvdX@oUN$fCwsh;Js>*odN`SaO;yKK~UcUGd&MAi34&#qFqhsmkETt)phm zx~o`)IRtod)a*9H=(ei{tl1SiJAtcNYuETg=M6E4BOT0#o~LDd=mvJ&7(?vT4s0iS z*>uZJ)Js5pzeFg!LV}^UJZ89G9Mxdod;9WcIu7*(-El;MYuF^#5laTP&q8v2G?oJt z5T!7P|FkCdkQ2nL}UJs?83@x=0 z8UsWT`-k)x@Sk`i^DO)(4IC_Ue1h6wGyIi#<-W=A;%Z`x%Ghwmh*v zH}&OBBJMd6M&(ha#zu74K!2nvkXlJ>;a?=W(ubzNB_Y#a(3OCnq9&D$SoW+rT-tq} zzmO)8b)`OyGSxh;oMwBhYIy))hM6?y22!hp>1q6a^}49nvf#3s%LY~!Cd@_sp8`O{ z@Fj%eW!R5O#cEQCIYAm#t1V)ALCVdT_UGj_rjxuy{wVwBaet2U;XRu9BfU=hcupsD zloo#Hf9JWI99duBtbOQ}h~Y7ij(6ryeBypg;!8kkg z9ZmU;CjFI86?;Gb`svdL052``laV*iT13;5Zn)Zw^PadLyLorz6KR(|uV9UC*3g2vV9=&H@2(=ve(42&#`-4xkpIzg9--E+$>LNRI3P)bD)e$ za*zh6RO9auA)8C}*X0`DrrKH3L=(jGp_A}b(bHk+5dknah_Tg>1EoB*{-m1UmNdRD zP3qo2EFqH79kMUA;lB5q*IJQ7~z;^aw4eEIOp+nPUjhxTL=PydthJzzko}SlK{;EUm&IMY@ojKSr^oC{0j+ygH%MIaj5PMF$kfFJ07ZsR! z>*(M89*JV`w+( z8JV`8k@C7{q#&N?!ZT9Myjf#ms|pE!4RW{B5`s|R$$?CZ^cLnkhD!vWF~)$q6${3_ z@|qwi#dxlmn|s;k9!~~G|6JHX{p_D;kNs>djf?4J z-C-@PE-nxInjA`i?E`(foA2BkxRLwvW_m7HV3`yliV|>56I*KPA}3yJhBdo0eE)z5 z9#4NaowB(G;1x}g79y0|2H>KE$*<{?NNR>;sah7$GHH0KB2u+T$hkERupoX_!&bBE znR0|38yaqtE1p49S^fWNNI`m-VpcQR5a*gw%@YxW3QV=)wm>BW&5IMKjGBUqD!~Oy z7oq@>_a;={Bq77Z;vC^Kil|7q6)}zxDKKO9qb`IE<`JSxra}fci?j@n6Qg@XM+Pgs z?`hoeJQEb*l#Y!|L&9cfJ!8h8t_!@1ocDS}1XYISXeIyO(K!nu=XWGt$ z$cFnXO?O7`4dRJh;^f#2>gG}h{VEs_#?y5i-zIP`zDBd2;1=28Cz;o@+tw^>{jB+` z`o@y>{XJ#lI3Mq4w02CYgP~ts;Gudk!h@uNi4|v<(VEpZ0#S5=y+b2Um4Ry20S&aR z-ac6WQAS`=6;3%A5~0@eNHtn*(zNLe18YSM!;iUD^GYC z$g7ZKw9=#pF5{>ZJ}vNogv+Yj20_g9;|w1fc?9X6`HCzS??pF^s<;Y8E6Fa9JTs*E zqJ&!!(Rsu15zv*>c_?0b_AK?1d<^kd6wq|$C5cf_W6&kohC?{$eQ1drNR4!j?+?8t z96Hx`&e0rv(eQc}cgK=!zfRZo?%gV{JFN4y-Q1D&P*1=$oO#w)U!)%N>!fdSZq9ok zW8IOkgSnsF%PnYg=tDGb2$b#KkZeYMd9$4G*)_J6Y78C8`oR+$2VPPnPL^rdcaQx; zFUj8?F|sHdGQ$k$Qkp9ed|}rtn$dxJ2e78B;$m}+i-qY(20(_H#>FNinIt4#V?X2S zS!HY1R2Oqn#)>j84H;nrN|`0qz&srUu?CvsrjmPByOP#$Q2WrN@P!e1daiJ0T z5L)*tFGt~77ABcxPt`0W!Z9PCWoXe=N2Y7~d}6CktR^Bcz!34q6XOdKG9X7PZDRIS zEDL*%mv|>^(}JK&0qjovJH`kJ0r+I_iY&}%q}4-4Iwf98PKsBNppz_EYz^A)6V@&k zizA5+krYu{laeUHqKt|}&ua~@2c&Y3O=u61nAEi+=KRh;(>)m5x1AN2<;za@ zI4FDDEDp(<6*jvpg?`&5cdP}pH{m}L*|+pDqUGKw(g29kn$vZ|pgbDd{$MwsZQsYB zzk?QYxTha)q_;L4$}X9mT!4rx5w#qkr!nmI(OC1)xJRzh&^;oSk4rPC6bwOF%_y4x zq1q_~(>Wj&^%1m)Pf<&cMzU%Q{Iuky)Cvu&m^;0qcukgaUXyz9k!v}r#0x1|O+#gl zAV302(wZy@<%u)`pjv-`Nb#5CnsHN|n)sez^-29JMTmA$4QHlYw9B|@8pXbICTTN5 z1Qy#0QRaLGa~6MOQUj6FZ#56`q-XkK78aZ~dl-tD1EH%b3g=t7F#EEYz1+^Y?LiBH zVq5J|bqp36R4bMaY;QV2zF^4GYO-jz99w7F;A1qNRK1pERns20#z5EG-cfsq-L+m7 zhkyS3?MaQeI%7HDN3z7;$-aoYeY@C-+-oq3uAPJE2!Y*PN=QG`%yy1bo%P)s^QOMM zk!H=e+Dp+B(nOmu5ZN*6nC2dd=Sch)b5!AU{6b)I@A~mrCEFxtHiio(9!cl7;OqC5M6^8YKjb@c4bz=ZplLpzrp)5k-h^6!Roc%026x%ri`FqDZD{J`%oTDRJCG?G(eT zxG$1Gh5L%F_9PAtHPKf5j*x!21goj2ju|-god&L5qmVGwPqAODs^WsSW zRYm5rYO9r7AxEE1s$(j;U;{pjhC42%r6d9X(>jSx2b$+G2FafuI`*)IL+c0UX;N~u zYv6Q>mrWIVe|!4!{Nv9m*e0IqQq4%_H&Gf_C-5~cj*c8P&JIY>3Pg1;Xz`PpxWx|5 zR=l1hY_si1v{Nn*Vs4759e)PhH7eQp`_6ZQzc>)8_6|}HE9^mOxaVe&JffgnKV=m3T4t@d9`1qlxisy zG#8a~QNvTjVfI~$Id-oe$g@bMViZj*TW%>Ro|VqjI&%^|lPPhgPnHP^@q%8 znGv_z6%{irmr87rfzO2{V}bced>cuu!tBjt*hOJE0T9W~5vnBJwCIm4gR(BUXA2Q= zw3vj|lFQO48-v(TMS3ltF815gzOjeCi-ZrF*zFK|uBJ+YXzxj`@{uN>V}HqumfQ5l z_#5Wfn^>7p3;tk7WXf=x3!QI!OPb$(Fp1&re!Ou_0c2-ciF;$qP0!WPV)aUfho;o(3co7KX=dHc_-=@L%Rm*pJo>Hc2`V*9 z==>y(3C6+V{1bqzeeel_A}M>RdON^CE5jvwEKVX$rnBgfG^3eGB0~@EN}1SEG&KmM z;#FZ)jPWpvco^B_+l(v9J1eOE%uCn#Q&RiwLU-#WVjhs$+V zYEU#dYSJ{rDzE8@NIg`iMOJGkG%&d15$BR;<0VQnYfNdSFyuq4u>|O^s7mmTz;>fL z9AaM8CWL!buLRSv@od~lE-MoC!r!2F~k0R5+PIQ>lD>+auS;%dlAIP zzoHm+;|WiVQA`)YRhZf1#$UfCCRkg=zH%|^#p9QfB0a73Gx5@I@gdfgH69q>v#Kbe z0DG~u5|@r5>}qU5m01_F&g|iYA_sIeE*t_0N1o6#41^*!*u_KVEeC)1eQ=VFd!^UC z6$rSFUg$T@iLh}_cynh?@^I*a>#Yg*!_X+T-P>0-%LWoQmR6&Aot%2A)uvt=6AJ>>8#rtQQRqC9TpS0ogjaHT72` z2Ic@Xjcr3mq&n4L9L=53u-6HJb}4CEgvzN?jh+!lVo9s(p@c_n_Rww4v-?aOe(?3x z=q_E>#q_^gzGwZZPHUQ@QRg@Fha!flOmr_EOUw>?Rkb((?Nm({N)vQ2jv}4`1Tu=m z%s>Z}=m2p&fI!C7xC2LzMAODT#eoJ58h!RoSt-8o9&E-gJ*>oH`T|MNF#LFh=N5=}$3%%H^6FScSMe6WXMsK@^oQ zP&7xXk+~a_`j;VnoiJ}x(lAc8jj#0Oc)ON=>cFrjlL|^iI!`59E7-OU5%m+@d?R%h!3w}n}9NM zkku5UnitBjba!~9eCpGeoa4v}Vo@3i;7#WB%F87$u*IqTs20czI z&)o{$FEG3QeY!X*lf~7kt>10q(u_Nh?f7ul^h6l?A!QnycIt`8FxuQrC zQr9I01kT-z&qb>qAuc*;Ds9&6)}jt&b&{2&>9y*ssv+1gn7~d@)E2KKoaPdS0AG`l zo1oF&lLoIOt(LCQEX!kdG+!aZf}#*Er6%2~Hvu{Ob9FaOZqn}|44P}H*R1J;)ZVcD z?R5GVaYUc?bv{Lj01;cpSOc@*;M$8DN;IX#E}zyiWI~LY%;Kw7hL@kVv$=+Re&_H? zf2(aPoQ*5ZPINB`m7Yg~F%*V;_|eaE3^=>M*`y=UYHUGF`#o1}>VoH(PTSIoPc+F- z<6u#6Fu@OoSkqi-dHwkut1O1@6=2!69ef<-Ty)bfVIK0?IOD7d^SiVCoBHx*IV^

*lnPPP>>QXAT`bw_pK}BnkDrPd|QrJ0QG{|@+O%`N|K~IY( z);j(g%>_#X=mRzm&Evt7YE<|{fF&Pkiq`R$B;A}59F|oCt%*c6T}UgHW=M@0qO%$# zk>!d6_o+IeYh(&4p&~G~XhCzW2u`OqNdmiI*gmN{Ry;=>28rm#QuT(Ufg$LqF_4dK zY=S#@JH&nn90L-4;Ak#mdqd(`h>^(3cUL-9f%ECZk3WA{9AR}X(=;{E@3aYFl=EKK zL2}exP342+xjLbn_ab64bx$NsyFIwwUCuPOO#O^z-$vImz203}BEGFJZ&KNb{*vr# z><2uUaY6XHX>K5wqp_Oc4jhro_a+=D_5<6C^7XgAQkYA8|1g8KJhLe_= zDKKDaUX%1Gc({rDL7FH*P-2CQqh!xXG#2~HB(jUGlN@BlIXWCAB{@lPznIMTNwh?X z+Yn0#hlp^8$d0X4LpuQS@xm5)9#2a~k}eE0TUHTtw%1(rJ45T0Ooliv@zi^vc3xOg zWmmnw^_8kQNR(Wpbj;K%9y=Bf#>UzM;g~csXI%_!pm|4{S8yDu>08@#w(7N}yMn#q zQjLNET>Wku_tjk#313HeEyp4Pu~(_SzP)^g$GQ&GZ7j1EJ6MMRuju2Plv_VvlC4#j z#JzoaGn`2{2xR4LU;)@WE;B1pJUD`f(uFxFOdkK(HxnMQq5LP>m#`&@f)@=tL4=4p z?G$IoeOI2;{CTbVpz+uW3Js&2Y9kyAS@Z8D7BeA1QENh>f`tTS7A14I!%-(g6P9X? zj0_`^mbK6a3TetNIJ|oG^jj6JUvuX-Z~moD^O|=c`4D5QcGR>_<2?CKYf)1%$uNzz z3He250BrUIY8G$xKFk8^Mv>zq&GfLhy`oDZGB^3-UNA8@|@&(l7lz}S^1XdbI6Zh{DFw9E*cGz(p<74gUGZDe!-L%(~UUT zHKe&@6;!ou1>A8NbgTts)bFldKm=iGOc$P?F+b%7_92d*1UHNRtLkm31)Zjv&mp$} z*F-hpW*t=(U#+`{i^IGF>kqvD1quMH)&=cq9o7f@<*8xJT}Q-fz9uopvOc$-*|W25 zcKc~xyuBQC#hGjN+jLxvQEQgHc6-p@sIb&+J}Xj(3I)RGWq z{UHur@4abBrelA?%RaYsQ~UBMT3vgezmEXM1?608?xr|r{G0mn#*ttqA|ebqgoLf& zd`lpkEkVSCo)uMbl!$EGHVG1AeHsSpx@ctz%||a_I(3U9eiB7S<%1;d>Ko#yq1z(g z2gmThWKo4{)g+9WP+h6a&&*?aGB1^CDw~!1pbmHeCw(qf(Hkzc=68)35G$xWiTMA#ZKk5&)yDVo`!)~+d+*@XiIxKYO01CaZf9yocITb5!jYs!&$$UPkH zNCM{c{XN22q}Bx5&*Ln_a++@1QsDer`9U>3e{s#6s{aG#RA`w3&v?x%xwY}?Dzxn= zySNn&lM_i`=I>c(tr<<{CUEX9X=JAB`hl28Oi84OatYJk(ABtsGha~_1_L?xn zA-M2{$H=;NcvD~A? zsTEkUSUo&SYxcs)Cnh|$M6ti}By2s-xrY)MH^+aTYW%kZK2*&YM&(PZ zQ$6Q9?VCPA!m_F~TX_G@*KMu%#R64Z7qb Date: Tue, 4 Feb 2025 15:24:10 +0000 Subject: [PATCH 2/3] updated changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 72171997..0c29c0b0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ - Viral hits TSV moved from `virus_hits_db.tsv.gz` to `virus_hits_filtered.tsv.gz` - Numerous changes to column names in viral hits TSV, mainly to improve clarity - Updated mislabeled processes +- Unzipped gold standard reference output in `test-data/gold-standard-results` # v2.7.0.2 - Updated `pipeline-version.txt` From a541c9ce531a2cc89d63c0aa36f2edb907dbc432 Mon Sep 17 00:00:00 2001 From: EC2 Default User Date: Wed, 5 Feb 2025 16:19:06 +0000 Subject: [PATCH 3/3] Updated test to use ungzipped version of hits. --- tests/run_validation.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/run_validation.config b/tests/run_validation.config index 088b0962..2d80e3bc 100644 --- a/tests/run_validation.config +++ b/tests/run_validation.config @@ -10,7 +10,7 @@ params { ref_dir = "s3://nao-testing/index/20250130/output/" // Reference/index directory (generated by index workflow) // Files - viral_tsv = "${projectDir}/test-data/gold-standard-results/virus_hits_filtered.tsv.gz" + viral_tsv = "${projectDir}/test-data/gold-standard-results/virus_hits_filtered.tsv" viral_fastq = "" // BLAST