diff --git a/.github/scripts/download_assemblies.sh b/.github/scripts/download_assemblies.sh index 232d43a..bd237ab 100755 --- a/.github/scripts/download_assemblies.sh +++ b/.github/scripts/download_assemblies.sh @@ -9,6 +9,6 @@ curl -o .github/data/assemblies/MK583613.1_segment_4_HA_H3N2.fa "https://eutils. curl -o .github/data/assemblies/MK583614.1_segment_5_NP_H3N2.fa "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?retmode=text&id=MK583614.1&db=nucleotide&rettype=fasta" curl -o .github/data/assemblies/MK583615.1_segment_6_NA_H3N2.fa "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?retmode=text&id=MK583615.1&db=nucleotide&rettype=fasta" -cat .github/data/assemblies/MK58361*.fa > .github/data/assemblies/MK58361X-H3N2.fa +cat .github/data/assemblies/MK58361*_segment_*.fa > .github/data/assemblies/MK58361X-H3N2.fa rm .github/data/assemblies/MK58361*.1_segment_*.fa diff --git a/.github/scripts/run_pipeline.sh b/.github/scripts/run_pipeline.sh index 37e1360..cbf69f5 100755 --- a/.github/scripts/run_pipeline.sh +++ b/.github/scripts/run_pipeline.sh @@ -15,7 +15,7 @@ if [ -z "${GITHUB_ACTIONS}" ]; then else echo "In GitHub Actions environment. Modifying nextflow.config and FluViewer.nf." sed -i 's/cpus = 8/cpus = 4/g' nextflow.config - sed -i '/memory/d' modules/FluViewer.nf + sed -i '/memory/d' modules/fluviewer.nf fi nextflow -log artifacts/nextflow.log \ diff --git a/bin/tools.py b/bin/tools.py index e1897fc..e832d1f 100644 --- a/bin/tools.py +++ b/bin/tools.py @@ -12,7 +12,7 @@ def flex_translate(nt_seq, debug=False): - An integer indicating the best reading frame that was used for translation (0, 1, or 2) - An integer indicating the number of stop codons in this best translation candidate """ - min_count = np.Inf + min_count = np.inf best_seq = None best_frame = -1 diff --git a/environments/environment.yml b/environments/environment.yml index c9df87d..7b75d98 100644 --- a/environments/environment.yml +++ b/environments/environment.yml @@ -4,263 +4,8 @@ channels: - bioconda - defaults dependencies: - - _libgcc_mutex=0.1=conda_forge - - _openmp_mutex=4.5=2_gnu - - _r-mutex=1.0.1=anacondar_1 - - alsa-lib=1.2.10=hd590300_0 - - argcomplete=3.1.1=pyhd8ed1ab_0 - - binutils_impl_linux-64=2.40=hf600244_0 - - bioconductor-biobase=2.58.0=r42ha9d7317_1 - - bioconductor-biocgenerics=0.44.0=r42hdfd78af_0 - - bioconductor-biocio=1.8.0=r42hdfd78af_0 - - bioconductor-biocparallel=1.32.5=r42hf17093f_1 - - bioconductor-biostrings=2.66.0=r42ha9d7317_1 - - bioconductor-data-packages=20230718=hdfd78af_1 - - bioconductor-delayedarray=0.24.0=r42ha9d7317_1 - - bioconductor-genomeinfodb=1.34.9=r42hdfd78af_0 - - bioconductor-genomeinfodbdata=1.2.9=r42hdfd78af_0 - - bioconductor-genomicalignments=1.34.0=r42ha9d7317_1 - - bioconductor-genomicranges=1.50.0=r42ha9d7317_1 - - bioconductor-iranges=2.32.0=r42ha9d7317_1 - - bioconductor-matrixgenerics=1.10.0=r42hdfd78af_0 - - bioconductor-noiseq=2.42.0=r42hdfd78af_0 - - bioconductor-rhtslib=2.0.0=r42ha9d7317_1 - - bioconductor-rsamtools=2.14.0=r42hf17093f_1 - - bioconductor-rtracklayer=1.58.0=r42h58c1800_2 - - bioconductor-s4vectors=0.36.0=r42ha9d7317_1 - - bioconductor-summarizedexperiment=1.28.0=r42hdfd78af_0 - - bioconductor-xvector=0.38.0=r42ha9d7317_1 - - bioconductor-zlibbioc=1.44.0=r42ha9d7317_2 - - blast=2.14.1=pl5321h6f7f691_0 - - brotli-bin=1.1.0=hd590300_0 - - brotli-python=1.1.0=py310hc6cd4ac_0 - - bwidget=1.9.14=ha770c72_1 - - bzip2=1.0.8=h7f98852_4 - - c-ares=1.19.1=hd590300_0 - - ca-certificates=2024.2.2=hbcca054_0 - - cairo=1.16.0=h0c91306_1017 - - certifi=2024.2.2=pyhd8ed1ab_0 - - charset-normalizer=3.2.0=pyhd8ed1ab_0 - - click=8.1.7=unix_pyh707e725_0 - - coloredlogs=15.0.1=pyhd8ed1ab_3 - - colormath=3.0.0=py_2 - - curl=8.2.1=hca28451_0 - - cycler=0.11.0=pyhd8ed1ab_0 - - entrez-direct=16.2=he881be0_1 - - et_xmlfile=1.1.0=pyhd8ed1ab_0 - - expat=2.5.0=hcb278e6_1 - - fastp=0.23.4=hadf994f_2 - - fastqc=0.12.1=hdfd78af_0 - - font-ttf-dejavu-sans-mono=2.37=hab24e00_0 - - font-ttf-inconsolata=3.000=h77eed37_0 - - font-ttf-source-code-pro=2.038=h77eed37_0 - - font-ttf-ubuntu=0.83=hab24e00_0 - - fontconfig=2.14.2=h14ed4e7_0 - - fonts-conda-ecosystem=1=0 - - fonts-conda-forge=1=0 - - freetype=2.12.1=hca18f0e_1 - - fribidi=1.0.10=h36c2ea0_0 - - future=0.18.3=pyhd8ed1ab_0 - - gcc_impl_linux-64=13.2.0=h338b0a0_0 - - genoflu=1.03=hdfd78af_0 - - gettext=0.21.1=h27087fc_0 - - gfortran_impl_linux-64=13.2.0=h76e1118_0 - - giflib=5.2.1=h0b41bf4_3 - - graphite2=1.3.13=h58526e2_1001 - - gsl=2.7=he838d99_0 - - gxx_impl_linux-64=13.2.0=h338b0a0_0 - - harfbuzz=8.2.0=h3d44ed6_0 - - htslib=1.18=h81da01d_0 - - icu=73.2=h59595ed_0 - - idna=3.4=pyhd8ed1ab_0 - - importlib-metadata=6.8.0=pyha770c72_0 - - isa-l=2.30.0=ha770c72_4 - - jinja2=3.1.2=pyhd8ed1ab_1 - - jq=1.6=h36c2ea0_1000 - - kernel-headers_linux-64=2.6.32=he073ed8_16 - - keyutils=1.6.1=h166bdaf_0 - - krb5=1.21.2=h659d440_0 - - lcms2=2.15=haa2dc70_1 - - ld_impl_linux-64=2.40=h41732ed_0 - - lerc=4.0.0=h27087fc_0 - - libblas=3.9.0=18_linux64_openblas - - libbrotlicommon=1.1.0=hd590300_0 - - libbrotlidec=1.1.0=hd590300_0 - - libbrotlienc=1.1.0=hd590300_0 - - libcblas=3.9.0=18_linux64_openblas - - libcups=2.3.3=h4637d8d_4 - - libcurl=8.2.1=hca28451_0 - - libdeflate=1.18=h0b41bf4_0 - - libedit=3.1.20191231=he28a2e2_2 - - libev=4.33=h516909a_1 - - libexpat=2.5.0=hcb278e6_1 - - libffi=3.4.2=h7f98852_5 - - libgcc-devel_linux-64=13.2.0=ha9c7c90_0 - - libgcc-ng=13.2.0=h807b86a_0 - - libgfortran-ng=13.2.0=h69a702a_0 - - libgfortran5=13.2.0=ha4646dd_0 - - libglib=2.78.0=hebfc3b9_0 - - libgomp=13.2.0=h807b86a_0 - - libiconv=1.17=h166bdaf_0 - - libidn2=2.3.4=h166bdaf_0 - - libjpeg-turbo=2.1.5.1=h0b41bf4_0 - - liblapack=3.9.0=18_linux64_openblas - - libnghttp2=1.52.0=h61bc06f_0 - - libnsl=2.0.0=h7f98852_0 - - libopenblas=0.3.24=pthreads_h413a1c8_0 - - libpng=1.6.39=h753d276_0 - - libsanitizer=13.2.0=h7e041cc_0 - - libsqlite=3.43.0=h2797004_0 - - libssh2=1.11.0=h0841786_0 - - libstdcxx-devel_linux-64=13.2.0=ha9c7c90_0 - - libstdcxx-ng=13.2.0=h7e041cc_0 - - libtiff=4.5.1=h8b53f26_1 - - libunistring=0.9.10=h7f98852_0 - - libuuid=2.38.1=h0b41bf4_0 - - libwebp-base=1.3.1=hd590300_0 - - libxcb=1.15=h0b41bf4_0 - - libxml2=2.11.5=h232c23b_1 - - libzlib=1.2.13=hd590300_5 - - lzstring=1.0.4=py_1001 - - make=4.3=hd18ef5c_1 - - markdown=3.4.4=pyhd8ed1ab_0 - - markdown-it-py=3.0.0=pyhd8ed1ab_0 - - matplotlib-base=3.7.2=py310hf38f957_0 - - mdurl=0.1.0=pyhd8ed1ab_0 - - multiqc=1.15=pyhdfd78af_0 - - munkres=1.1.4=pyh9f0ad1d_0 - - ncbi-vdb=3.0.0=pl5321h87f3376_0 - - ncurses=6.4=hcb278e6_0 - - networkx=3.1=pyhd8ed1ab_0 - - oniguruma=6.9.8=h166bdaf_0 - - openjdk=20.0.2=hfea2f88_1 - - openjpeg=2.5.0=hfec8fc6_2 - - openssl=3.3.0=hd590300_0 - - packaging=23.1=pyhd8ed1ab_0 - - pango=1.50.14=ha41ecd1_2 - - pbzip2=1.1.13=0 - - pcre=8.45=h9c3ff4c_0 - - pcre2=10.40=hc3806b6_0 - - perl=5.32.1=4_hd590300_perl5 - - perl-archive-tar=2.40=pl5321hdfd78af_0 - - perl-carp=1.50=pl5321hd8ed1ab_0 - - perl-common-sense=3.75=pl5321hd8ed1ab_0 - - perl-compress-raw-bzip2=2.201=pl5321h166bdaf_0 - - perl-compress-raw-zlib=2.202=pl5321h166bdaf_0 - - perl-encode=3.19=pl5321h166bdaf_0 - - perl-exporter=5.74=pl5321hd8ed1ab_0 - - perl-exporter-tiny=1.002002=pl5321hd8ed1ab_0 - - perl-extutils-makemaker=7.70=pl5321hd8ed1ab_0 - - perl-io-compress=2.201=pl5321hdbdd923_2 - - perl-io-zlib=1.14=pl5321hdfd78af_0 - - perl-json=4.10=pl5321hdfd78af_0 - - perl-json-xs=2.34=pl5321h4ac6f70_6 - - perl-list-moreutils=0.430=pl5321hdfd78af_0 - - perl-list-moreutils-xs=0.430=pl5321h031d066_2 - - perl-parent=0.241=pl5321hd8ed1ab_0 - - perl-pathtools=3.75=pl5321h166bdaf_0 - - perl-scalar-list-utils=1.63=pl5321h166bdaf_0 - - perl-storable=3.15=pl5321h166bdaf_0 - - perl-types-serialiser=1.01=pl5321hdfd78af_0 - - pigz=2.6=h27826a3_0 - - pip=23.2.1=pyhd8ed1ab_0 - - pixman=0.40.0=h36c2ea0_0 - - pthread-stubs=0.4=h36c2ea0_1001 - - pycparser=2.21=pyhd8ed1ab_0 - - pygments=2.16.1=pyhd8ed1ab_0 - - pyparsing=3.0.9=pyhd8ed1ab_0 - - pysocks=1.7.1=pyha2e5f31_6 - - python=3.10.12=hd12c33a_0_cpython - - python-dateutil=2.8.2=pyhd8ed1ab_0 - - python-isal=1.2.0=py310h2372a71_0 - - python-tzdata=2023.3=pyhd8ed1ab_0 - - python_abi=3.10=3_cp310 - - pytz=2023.3.post1=pyhd8ed1ab_0 - - qualimap=2.2.2d=hdfd78af_2 - - r-base=4.2.3=hbfee4d0_7 - - r-bh=1.81.0_1=r42hc72bb7e_1 - - r-bitops=1.0_7=r42h57805ef_2 - - r-codetools=0.2_19=r42hc72bb7e_1 - - r-cpp11=0.4.6=r42hc72bb7e_0 - - r-crayon=1.5.2=r42hc72bb7e_2 - - r-formatr=1.14=r42hc72bb7e_1 - - r-futile.logger=1.4.3=r42hc72bb7e_1005 - - r-futile.options=1.0.1=r42hc72bb7e_1004 - - r-getopt=1.20.3=r42ha770c72_4 - - r-lambda.r=1.2.4=r42hc72bb7e_3 - - r-lattice=0.21_8=r42h57805ef_1 - - r-matrix=1.6_1=r42h316c678_0 - - r-matrixstats=1.0.0=r42h57805ef_1 - - r-optparse=1.7.3=r42hc72bb7e_2 - - r-rcurl=1.98_1.12=r42hf9611b0_3 - - r-restfulr=0.0.15=r42h56115f1_2 - - r-rjson=0.2.21=r42ha503ecb_3 - - r-snow=0.4_4=r42hc72bb7e_2 - - r-xml=3.99_0.14=r42hc38eee6_2 - - r-yaml=2.3.7=r42h57805ef_1 - - readline=8.2=h8228510_1 - - requests=2.31.0=pyhd8ed1ab_0 - - rich=13.5.1=pyhd8ed1ab_0 - - rich-click=1.6.1=pyhd8ed1ab_0 - - samtools=1.17=hd87286a_1 - - sed=4.8=he412f7d_0 - - setuptools=68.1.2=pyhd8ed1ab_0 - - six=1.16.0=pyh6c4a22f_0 - - spectra=0.0.11=py_1 - - sysroot_linux-64=2.12=he073ed8_16 - - tk=8.6.12=h27826a3_0 - - tktable=2.10=hb7b940f_3 - - toml=0.10.2=pyhd8ed1ab_0 - - tomlkit=0.12.1=pyha770c72_0 - - typing_extensions=4.7.1=pyha770c72_0 - - tzdata=2023c=h71feb2d_0 - - urllib3=2.0.4=pyhd8ed1ab_0 - - wget=1.20.3=ha35d2d1_1 - - wheel=0.41.2=pyhd8ed1ab_0 - - xmltodict=0.13.0=pyhd8ed1ab_0 - - xorg-fixesproto=5.0=h7f98852_1002 - - xorg-inputproto=2.3.2=h7f98852_1002 - - xorg-kbproto=1.0.7=h7f98852_1002 - - xorg-libice=1.1.1=hd590300_0 - - xorg-libsm=1.2.4=h7391055_0 - - xorg-libx11=1.8.6=h8ee46fc_0 - - xorg-libxau=1.0.11=hd590300_0 - - xorg-libxdmcp=1.1.3=h7f98852_0 - - xorg-libxext=1.3.4=h0b41bf4_2 - - xorg-libxfixes=5.0.3=h7f98852_1004 - - xorg-libxi=1.7.10=h7f98852_0 - - xorg-libxrender=0.9.11=hd590300_0 - - xorg-libxt=1.3.0=hd590300_1 - - xorg-libxtst=1.2.3=h7f98852_1002 - - xorg-recordproto=1.14.2=h7f98852_1002 - - xorg-renderproto=0.11.1=h7f98852_1002 - - xorg-xextproto=7.3.0=h0b41bf4_1003 - - xorg-xproto=7.0.31=h7f98852_1007 - - xz=5.2.6=h166bdaf_0 - - yaml=0.2.5=h7f98852_2 - - yq=3.2.3=pyhd8ed1ab_0 - - zipp=3.16.2=pyhd8ed1ab_0 - - zlib=1.2.13=hd590300_5 - - zstd=1.5.2=hfc55251_7 - - pip: - - biopython==1.81 - - brotli==1.1.0 - - cffi==1.15.1 - - contourpy==1.1.0 - - cutadapt==4.4 - - dnaio==1.0.0 - - fonttools==4.42.1 - - humanfriendly==10.0 - - isal==1.2.0 - - kiwisolver==1.4.5 - - markupsafe==2.1.3 - - matplotlib==3.7.2 - - numpy==1.25.2 - - openpyxl==3.1.2 - - pandas==2.1.0 - - pillow==10.0.0 - - pyyaml==6.0.1 - - simplejson==3.19.1 - - unicodedata2==15.0.0 - - xopen==1.7.0 - - zstandard==0.19.0 + - fastp=0.23.2 + - cutadapt=4.4 + - multiqc=1.15 + - fastqc=0.12.1 + - genoflu=1.03 diff --git a/environments/fluviewer.yml b/environments/fluviewer.yml index 7b52e30..b65ad79 100644 --- a/environments/fluviewer.yml +++ b/environments/fluviewer.yml @@ -4,209 +4,15 @@ channels: - bioconda - defaults dependencies: - - _libgcc_mutex=0.1=conda_forge - - _openmp_mutex=4.5=2_gnu - - _sysroot_linux-64_curr_repodata_hack=3=h69a702a_13 - - alsa-lib=1.2.3.2=h166bdaf_0 - - bbmap=39.01=h92535d8_1 - - bcftools=1.17=h3cc50cf_1 - - bedtools=2.31.0=hf5e1c6e_2 - - blast=2.14.1=pl5321h6f7f691_0 - - brotli=1.0.9=h166bdaf_9 - - brotli-bin=1.0.9=h166bdaf_9 - - brotli-python=1.0.9=py38hfa26641_9 - - bwa=0.7.17=he4a0461_11 - - bzip2=1.0.8=h7f98852_4 - - c-ares=1.19.1=hd590300_0 - - ca-certificates=2023.7.22=hbcca054_0 - - cairo=1.16.0=h18b612c_1001 - - certifi=2023.7.22=pyhd8ed1ab_0 - - charset-normalizer=3.2.0=pyhd8ed1ab_0 - - clustalw=2.1=h4ac6f70_9 - - cmake=3.25.2=h077f3f9_0 - - contourpy=1.1.0=py38h7f3f72f_0 - - curl=7.87.0=h6312ad2_0 - - cycler=0.11.0=pyhd8ed1ab_0 - - eigen=3.2=3 - - entrez-direct=16.2=he881be0_1 - - expat=2.5.0=hcb278e6_1 - - fontconfig=2.14.2=h14ed4e7_0 - - fonttools=4.42.1=py38h01eb140_0 - - freebayes=1.3.6=hb0f3ef8_7 - - freetype=2.12.1=hca18f0e_1 - - gettext=0.21.1=h27087fc_0 - - giflib=5.2.1=h0b41bf4_3 - - glib=2.66.3=h58526e2_0 - - graphite2=1.3.13=h58526e2_1001 - - gsl=2.7=he838d99_0 - - harfbuzz=2.4.0=h37c48d4_1 - - htslib=1.17=h6bc39ce_1 - - icu=58.2=hf484d3e_1000 - - idna=3.4=pyhd8ed1ab_0 - - importlib-resources=6.0.1=pyhd8ed1ab_0 - - importlib_resources=6.0.1=pyhd8ed1ab_0 - - jpeg=9e=h0b41bf4_3 - - jsoncpp=1.9.5=h4bd325d_1 - - kernel-headers_linux-64=3.10.0=h4a8ded7_13 - - keyutils=1.6.1=h166bdaf_0 - - kiwisolver=1.4.5=py38h7f3f72f_0 - - krb5=1.20.1=hf9c8cef_0 - - lcms2=2.15=hfd0df8a_0 - - ld_impl_linux-64=2.40=h41732ed_0 - - lerc=4.0.0=h27087fc_0 - - libblas=3.9.0=17_linux64_openblas - - libbrotlicommon=1.0.9=h166bdaf_9 - - libbrotlidec=1.0.9=h166bdaf_9 - - libbrotlienc=1.0.9=h166bdaf_9 - - libcblas=3.9.0=17_linux64_openblas - - libcurl=7.87.0=h6312ad2_0 - - libdeflate=1.17=h0b41bf4_0 - - libedit=3.1.20191231=he28a2e2_2 - - libev=4.33=h516909a_1 - - libexpat=2.5.0=hcb278e6_1 - - libffi=3.2.1=he1b5a44_1007 - - libgcc-ng=13.1.0=he5830b7_0 - - libgfortran-ng=13.1.0=h69a702a_0 - - libgfortran5=13.1.0=h15d22d2_0 - - libglib=2.66.3=hbe7bbb4_0 - - libgomp=13.1.0=he5830b7_0 - - libiconv=1.17=h166bdaf_0 - - libidn2=2.3.4=h166bdaf_0 - - liblapack=3.9.0=17_linux64_openblas - - libnghttp2=1.51.0=hdcd2b5c_0 - - libnsl=2.0.0=h7f98852_0 - - libopenblas=0.3.23=pthreads_h80387f5_0 - - libpng=1.6.39=h753d276_0 - - libsqlite=3.43.0=h2797004_0 - - libssh2=1.10.0=haa6b8db_3 - - libstdcxx-ng=13.1.0=hfd8a6a1_0 - - libtiff=4.5.0=h6adf6a1_2 - - libunistring=0.9.10=h7f98852_0 - - libuuid=2.38.1=h0b41bf4_0 - - libuv=1.44.2=hd590300_1 - - libwebp-base=1.3.1=hd590300_0 - - libxcb=1.15=h0b41bf4_0 - - libxml2=2.9.14=h74e7548_0 - - libzlib=1.2.13=hd590300_5 - - llvm-openmp=8.0.1=hc9558a2_0 - - matplotlib-base=3.7.2=py38hf5b0b65_0 - - munkres=1.1.4=pyh9f0ad1d_0 - - ncbi-vdb=3.0.6=hdbdd923_0 - - ncurses=6.4=hcb278e6_0 - - numpy=1.24.4=py38h59b608b_0 - - olefile=0.46=pyh9f0ad1d_1 - - openjdk=11.0.8=hacce0ff_0 - - openjpeg=2.5.0=hfec8fc6_2 - - openmp=8.0.1=0 - - openssl=1.1.1v=hd590300_0 - - ossuuid=1.6.2=hf484d3e_1000 - - packaging=23.1=pyhd8ed1ab_0 - - pandas=2.0.3=py38h01efb38_1 - - parallel=20160622=1 - - patsy=0.5.3=pyhd8ed1ab_0 - - pbzip2=1.1.13=0 - - pcre=8.45=h9c3ff4c_0 - - perl=5.32.1=4_hd590300_perl5 - - perl-alien-build=2.48=pl5321hec16e2b_0 - - perl-alien-libxml2=0.17=pl5321hec16e2b_0 - - perl-archive-tar=2.40=pl5321hdfd78af_0 - - perl-business-isbn=3.007=pl5321hdfd78af_0 - - perl-business-isbn-data=20210112.006=pl5321hdfd78af_0 - - perl-capture-tiny=0.48=pl5321hdfd78af_2 - - perl-carp=1.38=pl5321hdfd78af_4 - - perl-common-sense=3.75=pl5321hdfd78af_0 - - perl-compress-raw-bzip2=2.201=pl5321h87f3376_1 - - perl-compress-raw-zlib=2.105=pl5321h87f3376_0 - - perl-constant=1.33=pl5321hdfd78af_2 - - perl-data-dumper=2.183=pl5321hec16e2b_1 - - perl-encode=3.19=pl5321hec16e2b_1 - - perl-exporter=5.72=pl5321hdfd78af_2 - - perl-exporter-tiny=1.002002=pl5321hdfd78af_0 - - perl-extutils-makemaker=7.70=pl5321hd8ed1ab_0 - - perl-ffi-checklib=0.28=pl5321hdfd78af_0 - - perl-file-chdir=0.1010=pl5321hdfd78af_3 - - perl-file-path=2.18=pl5321hd8ed1ab_0 - - perl-file-temp=0.2304=pl5321hd8ed1ab_0 - - perl-file-which=1.24=pl5321hd8ed1ab_0 - - perl-importer=0.026=pl5321hdfd78af_0 - - perl-io-compress=2.201=pl5321hdbdd923_2 - - perl-io-zlib=1.14=pl5321hdfd78af_0 - - perl-json=4.10=pl5321hdfd78af_0 - - perl-json-xs=2.34=pl5321h4ac6f70_6 - - perl-list-moreutils=0.430=pl5321hdfd78af_0 - - perl-list-moreutils-xs=0.430=pl5321h031d066_2 - - perl-mime-base64=3.16=pl5321hec16e2b_2 - - perl-parent=0.236=pl5321hdfd78af_2 - - perl-path-tiny=0.122=pl5321hdfd78af_0 - - perl-pathtools=3.75=pl5321hec16e2b_3 - - perl-scalar-list-utils=1.62=pl5321hec16e2b_1 - - perl-scope-guard=0.21=pl5321hdfd78af_3 - - perl-sub-info=0.002=pl5321hdfd78af_1 - - perl-term-table=0.016=pl5321hdfd78af_0 - - perl-test2-suite=0.000145=pl5321hdfd78af_0 - - perl-threaded=5.32.1=hdfd78af_1 - - perl-types-serialiser=1.01=pl5321hdfd78af_0 - - perl-uri=5.12=pl5321hdfd78af_0 - - perl-xml-libxml=2.0207=pl5321h661654b_0 - - perl-xml-namespacesupport=1.12=pl5321hdfd78af_1 - - perl-xml-sax=1.02=pl5321hdfd78af_1 - - perl-xml-sax-base=1.09=pl5321hdfd78af_1 - - pillow=8.3.1=py38h2c7a002_0 - - pip=23.2.1=pyhd8ed1ab_0 - - pixman=0.38.0=h516909a_1003 - - platformdirs=3.10.0=pyhd8ed1ab_0 - - pooch=1.7.0=pyha770c72_3 - - pthread-stubs=0.4=h36c2ea0_1001 - - pyparsing=3.0.9=pyhd8ed1ab_0 - - pysocks=1.7.1=pyha2e5f31_6 - - python=3.8.5=h1103e12_9_cpython - - python-dateutil=2.8.2=pyhd8ed1ab_0 - - python-tzdata=2023.3=pyhd8ed1ab_0 - - python_abi=3.8=3_cp38 - - pytz=2023.3=pyhd8ed1ab_0 - - readline=8.2=h8228510_1 - - requests=2.31.0=pyhd8ed1ab_0 - - rhash=1.4.3=hd590300_1 - - samtools=1.17=hd87286a_1 - - scipy=1.10.1=py38h59b608b_3 - - seaborn=0.12.2=hd8ed1ab_0 - - seaborn-base=0.12.2=pyhd8ed1ab_0 - - setuptools=68.1.2=pyhd8ed1ab_0 - - six=1.16.0=pyh6c4a22f_0 - - spades=3.15.3=h95f258a_1 - - sqlite=3.43.0=h2c6b66d_0 - - statsmodels=0.14.0=py38h31356c5_1 - - sysroot_linux-64=2.17=h4a8ded7_13 - - tabixpp=1.1.2=hd68fcf3_1 - - tk=8.6.12=h27826a3_0 - - typing-extensions=4.7.1=hd8ed1ab_0 - - typing_extensions=4.7.1=pyha770c72_0 - - unicodedata2=15.0.0=py38h0a891b7_0 - - urllib3=2.0.4=pyhd8ed1ab_0 - - vcflib=1.0.9=h146fbdb_2 - - wfa2-lib=2.3.3=h4ac6f70_1 - - wget=1.20.3=ha56f1ee_1 - - wheel=0.41.2=pyhd8ed1ab_0 - - xorg-fixesproto=5.0=h7f98852_1002 - - xorg-inputproto=2.3.2=h7f98852_1002 - - xorg-kbproto=1.0.7=h7f98852_1002 - - xorg-libice=1.1.1=hd590300_0 - - xorg-libsm=1.2.4=h7391055_0 - - xorg-libx11=1.8.6=h8ee46fc_0 - - xorg-libxau=1.0.11=hd590300_0 - - xorg-libxdmcp=1.1.3=h7f98852_0 - - xorg-libxext=1.3.4=h0b41bf4_2 - - xorg-libxfixes=5.0.3=h7f98852_1004 - - xorg-libxi=1.7.10=h7f98852_0 - - xorg-libxrender=0.9.11=hd590300_0 - - xorg-libxtst=1.2.3=h7f98852_1002 - - xorg-recordproto=1.14.2=h7f98852_1002 - - xorg-renderproto=0.11.1=h7f98852_1002 - - xorg-xextproto=7.3.0=h0b41bf4_1003 - - xorg-xproto=7.0.31=h7f98852_1007 - - xz=5.2.6=h166bdaf_0 - - zipp=3.16.2=pyhd8ed1ab_0 - - zlib=1.2.13=hd590300_5 - - zstd=1.5.5=hfc55251_0 + - bbmap=39.01 + - bcftools=1.17 + - blast=2.14.1 + - bwa=0.7.17 + - samtools=1.17 + - spades=3.15.3 + - clustalw=2.1 + - freebayes=1.3.6 + - pandas=2.0.3 + - seaborn=0.12.2 - pip: - - FluViewer==0.1.11 + - git+https://github.com/BCCDC-PHL/FluViewer.git@v0.1.11-3 diff --git a/main.nf b/main.nf index fe85487..d6d47b9 100644 --- a/main.nf +++ b/main.nf @@ -14,7 +14,7 @@ include { pipeline_provenance } from './modules/provenance.nf' include { collect_provenance } from './modules/provenance.nf' include { fastp } from './modules/fastp.nf' include { cutadapt} from './modules/cutadapt.nf' -include { fluviewer } from './modules/FluViewer.nf' +include { fluviewer } from './modules/fluviewer.nf' include { multiqc } from './modules/multiqc.nf' include { fastqc } from './modules/fastqc.nf' include { clade_calling } from './modules/clade_calling.nf' diff --git a/modules/FluViewer.nf b/modules/FluViewer.nf deleted file mode 100644 index 2c95f38..0000000 --- a/modules/FluViewer.nf +++ /dev/null @@ -1,93 +0,0 @@ -process fluviewer { - - tag { sample_id } - - memory { 50.GB * task.attempt } - errorStrategy { (task.exitStatus == 2 && task.attempt <= maxRetries) ? 'retry' : 'ignore' } - maxRetries 5 - - publishDir "${params.outdir}/${params.run_name}/${params.pipeline_short_name}-v${params.pipeline_minor_version}/${sample_id}", pattern: "${sample_id}_fluviewer/${sample_id}*", mode:'copy', saveAs: { filename -> filename.split("/").last() } - publishDir "${params.outdir}/${params.run_name}/${params.pipeline_short_name}-v${params.pipeline_minor_version}/${sample_id}", pattern: "${sample_id}_fluviewer/*tsv", mode:'copy', saveAs: { filename -> filename.split("/").last() } - publishDir "${params.outdir}/${params.run_name}/${params.pipeline_short_name}-v${params.pipeline_minor_version}/${sample_id}", pattern: "${sample_id}_fluviewer/spades_output", mode:'copy', saveAs: { filename -> "spades_output" } - publishDir "${params.outdir}/${params.run_name}/${params.pipeline_short_name}-v${params.pipeline_minor_version}/${sample_id}", pattern: ".*", mode:'copy' - publishDir "${params.outdir}/${params.run_name}/${params.pipeline_short_name}-v${params.pipeline_minor_version}/${sample_id}", pattern: "${sample_id}_fluviewer/logs", mode:'copy', saveAs: { filename -> "fluviewer_logs" } - publishDir "${params.outdir}/${params.run_name}/${params.pipeline_short_name}-v${params.pipeline_minor_version}/${sample_id}", pattern: ".exitcode", mode:'copy' - publishDir "${params.outdir}/${params.run_name}/${params.pipeline_short_name}-v${params.pipeline_minor_version}/${sample_id}", pattern: ".command.*", mode:'copy' - - input: - tuple val(sample_id), path(reads_1), path(reads_2), path(db) - - output: - tuple val(sample_id), path("${sample_id}_fluviewer/${sample_id}*.bam"), emit: alignment - tuple val(sample_id), path("${sample_id}_fluviewer/${sample_id}*.bam.bai"), emit: alignmentindex, optional: true - tuple val(sample_id), path("${sample_id}_fluviewer/${sample_id}*report.tsv"), emit: reports, optional: true - tuple val(sample_id), path("${sample_id}_fluviewer/${sample_id}*_consensus.fa"), emit: consensus_seqs, optional: true - tuple val(sample_id), path("${sample_id}_fluviewer/${sample_id}*consensus_seqs.fa"), emit: consensus_main - tuple val(sample_id), path("${sample_id}_fluviewer/${sample_id}*_HPAI.tsv"), emit: HPAI, optional: true - tuple val(sample_id), path("${sample_id}_fluviewer/${sample_id}*_cov.png"), emit: coverage_plot, optional: true - tuple val(sample_id), path("${sample_id}_fluviewer/${sample_id}*_variants.vcf"), emit: vcf, optional: true - tuple val(sample_id), path("${sample_id}_fluviewer/logs"), emit: fluviewer_logs - tuple val(sample_id), path("${sample_id}_FluViewer_provenance.yml"), emit: provenance - tuple val(sample_id), path("${sample_id}_fluviewer/${sample_id}*_mapping_refs.fa"), emit: ref_seqs_for_mapping, optional: true - tuple val(sample_id), path("${sample_id}_fluviewer/contigs_blast.tsv"), emit: contig_blast_results, optional: true - tuple val(sample_id), path("${sample_id}_fluviewer/spades_output"), emit: spades_results, optional: true - tuple val(sample_id), path("${sample_id}_fluviewer/${sample_id}*.png"), emit: depth_cov_plot, optional: true - - script: - garbage_collection = params.keep_interfiles ? '-g' : '' - """ - printf -- "- process_name: fluviewer\\n" >> ${sample_id}_FluViewer_provenance.yml - printf -- " tools:\\n" >> ${sample_id}_FluViewer_provenance.yml - printf -- " - tool_name: FluViewer\\n" >> ${sample_id}_FluViewer_provenance.yml - printf -- " tool_version: \$(FluViewer | sed -n '4 p')\\n" >> ${sample_id}_FluViewer_provenance.yml - printf -- " databases:\\n" >> ${sample_id}_FluViewer_provenance.yml - printf -- " - database_name: ${db}\\n" >> ${sample_id}_FluViewer_provenance.yml - printf -- " database_path: \$(readlink -f ${db})\\n" >> ${sample_id}_FluViewer_provenance.yml - printf -- " database_sha256: \$(shasum -a 256 ${db}|awk '{print \$1}')\\n" >> ${sample_id}_FluViewer_provenance.yml - - EXITCODE=0 - (FluViewer \ - ${garbage_collection} \ - -T ${task.cpus} \ - -f ${reads_1} -r ${reads_2} \ - -n ${sample_id}_fluviewer \ - -d ${db} \ - -D ${params.min_depth} \ - -q ${params.min_q} \ - -i ${params.min_ident} \ - -M 40 && EXITCODE=\$?) \ - || EXITCODE=\$? - - echo "Extracting NA and HA consensus sequences..." - - if [ `grep "|HA|" ${sample_id}_fluviewer/${sample_id}*consensus_seqs.fa` ]; then - grep -A1 "|HA|" ${sample_id}_fluviewer/${sample_id}*consensus_seqs.fa > ${sample_id}_fluviewer/${sample_id}_HA_consensus.fa - else - echo "No HA consensus sequence generated." - fi - - if [ `grep "|NA|" ${sample_id}_fluviewer/${sample_id}*consensus_seqs.fa` ]; then - grep -A1 "|NA|" ${sample_id}_fluviewer/${sample_id}*consensus_seqs.fa > ${sample_id}_fluviewer/${sample_id}_NA_consensus.fa - else - echo "No NA consensus sequence generated." - fi - - if [[ ! -f ${sample_id}_fluviewer/${sample_id}_HA_consensus.fa ]]; then - echo "HA segment consensus not generated. Skipping FindCleave.py..." - else - python ${projectDir}/bin/FindCleave.py -i ${sample_id}_fluviewer/${sample_id}_HA_consensus.fa -o ${sample_id}_fluviewer/${sample_id}_HPAI.tsv - echo "Finished running FindCleave.py." - fi - - echo \$EXITCODE > .exitcode - - OUTPATH=${params.outdir}/${params.run_name}/${params.pipeline_short_name}-v${params.pipeline_minor_version}/${sample_id} - if [[ \$OUTPATH != /* ]]; then - OUTPATH=${workflow.launchDir}/\$OUTPATH - fi - - cp .command.* \$OUTPATH - cp .exitcode \$OUTPATH - exit \$EXITCODE - """ -} diff --git a/modules/clade_calling.nf b/modules/clade_calling.nf index d6be48d..eaedbeb 100644 --- a/modules/clade_calling.nf +++ b/modules/clade_calling.nf @@ -14,7 +14,7 @@ process clade_calling { output: tuple val(sample_id), path("*nextclade*"), emit: nextclade, optional: true - tuple val(sample_id), path("${sample_id}_clade_calling__provenance.yml"), emit: provenance, optional: true + tuple val(sample_id), path("${sample_id}_clade_calling_provenance.yml"), emit: provenance, optional: true script: """ diff --git a/modules/fluviewer.nf b/modules/fluviewer.nf new file mode 100644 index 0000000..be904b3 --- /dev/null +++ b/modules/fluviewer.nf @@ -0,0 +1,114 @@ +process fluviewer { + + tag { sample_id } + + memory { 50.GB * task.attempt } + errorStrategy { (task.exitStatus == 2 && task.attempt <= maxRetries) ? 'retry' : 'ignore' } + maxRetries 5 + + publishDir "${params.outdir}/${params.run_name}/${params.pipeline_short_name}-v${params.pipeline_minor_version}/${sample_id}", pattern: "${sample_id}*", mode:'copy', saveAs: { filename -> filename.split("/").last() } + publishDir "${params.outdir}/${params.run_name}/${params.pipeline_short_name}-v${params.pipeline_minor_version}/${sample_id}", pattern: "*tsv", mode:'copy', saveAs: { filename -> filename.split("/").last() } + //publishDir "${params.outdir}/${params.run_name}/${params.pipeline_short_name}-v${params.pipeline_minor_version}/${sample_id}", pattern: "${sample_id}_fluviewer/spades_output", mode:'copy', saveAs: { filename -> "spades_output" } + publishDir "${params.outdir}/${params.run_name}/${params.pipeline_short_name}-v${params.pipeline_minor_version}/${sample_id}", pattern: ".*", mode:'copy' + publishDir "${params.outdir}/${params.run_name}/${params.pipeline_short_name}-v${params.pipeline_minor_version}/${sample_id}", pattern: "logs", mode:'copy', saveAs: { filename -> "fluviewer_logs" } + publishDir "${params.outdir}/${params.run_name}/${params.pipeline_short_name}-v${params.pipeline_minor_version}/${sample_id}", pattern: ".exitcode", mode:'copy' + publishDir "${params.outdir}/${params.run_name}/${params.pipeline_short_name}-v${params.pipeline_minor_version}/${sample_id}", pattern: ".command.*", mode:'copy' + + input: + tuple val(sample_id), path(reads_1), path(reads_2), path(db) + + output: + tuple val(sample_id), path("${sample_id}*.bam"), emit: alignment + tuple val(sample_id), path("${sample_id}*.bam.bai"), emit: alignmentindex, optional: true + tuple val(sample_id), path("${sample_id}*report.tsv"), emit: reports, optional: true + tuple val(sample_id), path("${sample_id}*_consensus.fa"), emit: consensus_seqs, optional: true + tuple val(sample_id), path("${sample_id}*consensus_seqs.fa"), emit: consensus_main + tuple val(sample_id), path("${sample_id}*_HPAI.tsv"), emit: HPAI, optional: true + tuple val(sample_id), path("${sample_id}*_cov.png"), emit: coverage_plot, optional: true + tuple val(sample_id), path("${sample_id}*_variants.vcf"), emit: vcf, optional: true + tuple val(sample_id), path("logs"), emit: fluviewer_logs + tuple val(sample_id), path("${sample_id}_fluviewer_provenance.yml"), emit: provenance + tuple val(sample_id), path("${sample_id}*_mapping_refs.fa"), emit: ref_seqs_for_mapping, optional: true + tuple val(sample_id), path("${sample_id}_contigs_blast.tsv"), emit: contig_blast_results, optional: true + //tuple val(sample_id), path("${sample_id}_fluviewer/spades_output"), emit: spades_results, optional: true + tuple val(sample_id), path("${sample_id}*.png"), emit: depth_cov_plot, optional: true + + script: + garbage_collection = params.keep_interfiles ? '-g' : '' + OUTPATH="${params.outdir}/${params.run_name}/${params.pipeline_short_name}-v${params.pipeline_minor_version}/${sample_id}" + + """ + printf -- "- process_name: fluviewer\\n" >> ${sample_id}_fluviewer_provenance.yml + printf -- " tools:\\n" >> ${sample_id}_fluviewer_provenance.yml + printf -- " - tool_name: fluviewer\\n" >> ${sample_id}_fluviewer_provenance.yml + printf -- " tool_version: \$(fluviewer --version)\\n" >> ${sample_id}_fluviewer_provenance.yml + printf -- " databases:\\n" >> ${sample_id}_fluviewer_provenance.yml + printf -- " - database_name: ${db}\\n" >> ${sample_id}_fluviewer_provenance.yml + printf -- " database_path: \$(readlink -f ${db})\\n" >> ${sample_id}_fluviewer_provenance.yml + printf -- " database_sha256: \$(shasum -a 256 ${db}|awk '{print \$1}')\\n" >> ${sample_id}_fluviewer_provenance.yml + + EXITCODE=0 + (fluviewer \ + --threads ${task.cpus} \ + --forward-reads ${reads_1} \ + --reverse-reads ${reads_2} \ + --outdir . \ + --output-name ${sample_id} \ + --db ${db} \ + --min-depth ${params.min_depth} \ + --min-mapping-quality ${params.min_q} \ + --min-identity ${params.min_ident} \ + --max-memory 40 \ + --disable-garbage-collection \ + --force && EXITCODE=\$?) \ + || EXITCODE=\$? + + function SAFE_EXIT { + EXITCODE=\$1 + OUTPATH=\$2 + + echo \$EXITCODE > .exitcode + cp .command.* \$OUTPATH + cp .exitcode \$OUTPATH + exit \$EXITCODE + } + + OUTPATH=${params.outdir}/${params.run_name}/${params.pipeline_short_name}-v${params.pipeline_minor_version}/${sample_id} + + if [[ \$OUTPATH != /* ]]; then # catch case where params.outdir is relative path and fix OUTPATH variable + OUTPATH=${workflow.launchDir}/\$OUTPATH + fi + + + if [ \$EXITCODE -ne 0 ]; then + echo "fluviewer exited with non-zero exit code. Skipping remaining analyses." + SAFE_EXIT \$EXITCODE \$OUTPATH + fi + + echo "Extracting NA and HA consensus sequences..." + + + if [ `grep "|HA|" ${sample_id}*consensus_seqs.fa` ]; then + grep -A1 "|HA|" ${sample_id}*consensus_seqs.fa > ${sample_id}_HA_consensus.fa + else + echo "No HA consensus sequence generated." + fi + + if [ `grep "|NA|" ${sample_id}*consensus_seqs.fa` ]; then + grep -A1 "|NA|" ${sample_id}*consensus_seqs.fa > ${sample_id}_NA_consensus.fa + else + echo "No NA consensus sequence generated." + fi + + if [[ ! -f ${sample_id}_HA_consensus.fa ]]; then + echo "HA segment consensus not generated. Skipping FindCleave.py..." + else + python ${projectDir}/bin/FindCleave.py -i ${sample_id}_HA_consensus.fa -o ${sample_id}_HPAI.tsv + echo "Finished running FindCleave.py." + fi + + cp analysis_by_stage/02_blast_contigs/${sample_id}_contigs_blast.tsv . + + SAFE_EXIT \$EXITCODE \$OUTPATH + """ +}