diff --git a/.editorconfig b/.editorconfig index de059a6..09f78a3 100644 --- a/.editorconfig +++ b/.editorconfig @@ -22,17 +22,12 @@ indent_style = unset charset = unset end_of_line = unset insert_final_newline = unset - +trim_trailing_whitespace = unset indent_style = unset [/assets/email*] indent_size = unset -# ignore Readme -[README.md] -indent_style = unset -trim_trailing_whitespace = unset - # ignore python [*.{py,md}] indent_style = unset diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index e4d6351..2d11236 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -7,7 +7,7 @@ We try to manage the required tasks for phac-nml/viralassembly using GitHub issu Please use the pre-filled template to save time. However, don't be put off by this template - other more general issues and suggestions are welcome! -Contributions to the code are even more welcome ;) +Contributions to the code are even more welcome ## Contribution workflow @@ -16,7 +16,7 @@ If you'd like to write some code for phac-nml/viralassembly, the standard workfl 1. Check that there isn't already an issue about your idea in the [phac-nml/viralassembly issues](https://github.com/phac-nml/viralassembly/issues) to avoid duplicating work. If there isn't one already, please create one so that others know you're working on this 2. [Fork](https://help.github.com/en/github/getting-started-with-github/fork-a-repo) the [phac-nml/viralassembly repository](https://github.com/phac-nml/viralassembly) to your GitHub account 3. Make the necessary changes / additions within your forked repository following [Pipeline conventions](#pipeline-contribution-conventions) -4. Use `nf-core schema build` and add any new parameters to the pipeline JSON schema (requires [nf-core tools](https://github.com/nf-core/tools) >= 1.10). +4. Use `nf-core schema build` and add any new parameters to the pipeline JSON schema (requires [nf-core tools](https://github.com/nf-core/tools) >= 3.0.2). 5. Submit a Pull Request against the `dev` branch and wait for the code to be reviewed and merged If you're not used to this workflow with git, you can start with some [docs from GitHub](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests) or even their [excellent `git` resources](https://try.github.io/). @@ -49,9 +49,9 @@ These tests are run both with the latest available version of `Nextflow` and als :warning: Only in the unlikely and regretful event of a release happening with a bug. -- On your own fork, make a new branch `patch` based on `upstream/master`. +- On your own fork, make a new branch `patch` based on `upstream/main`. - Fix the bug, and bump version (X.Y.Z+1). -- A PR should be made on `master` from patch to directly this particular bug. +- A PR should be made on `main` from patch to directly this particular bug. ## Pipeline contribution conventions diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index ffda452..2c54c08 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -2,53 +2,53 @@ name: Bug report description: Report something that is broken or incorrect labels: bug body: -- type: textarea - id: description - attributes: - label: Description of the bug - description: A clear and concise description of what the bug is. - validations: - required: true -- type: textarea - id: command_used - attributes: - label: Command used and terminal output - description: Steps to reproduce the behaviour. Please paste the command you used - to launch the pipeline and the output from your terminal. - render: console - placeholder: '$ nextflow run ... - - - Some output where something broke - - ' -- type: textarea - id: files - attributes: - label: Relevant files - description: 'Please drag and drop the relevant files here. Create a `.zip` archive - if the extension is not allowed. - - Your verbose log file `.nextflow.log` is often useful _(this is a hidden file - in the directory where you launched the pipeline)_ as well as custom Nextflow - configuration files. - - ' -- type: textarea - id: system - attributes: - label: System information - description: '* Nextflow version _(eg. 23.04.0)_ - - * Hardware _(eg. HPC, Desktop, Cloud)_ - - * Executor _(eg. slurm, local, awsbatch)_ - - * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter, Charliecloud, - or Apptainer)_ - - * OS _(eg. CentOS Linux, macOS, Linux Mint)_ - - * Version of phac-nml/viralassembly _(eg. 1.1, 1.5, 1.8.2)_ - - ' + - type: textarea + id: description + attributes: + label: Description of the bug + description: A clear and concise description of what the bug is. + validations: + required: true + - type: textarea + id: command_used + attributes: + label: Command used and terminal output + description: Steps to reproduce the behaviour. Please paste the command you used + to launch the pipeline and the output from your terminal. + render: console + placeholder: "$ nextflow run ... + + + Some output where something broke + + " + - type: textarea + id: files + attributes: + label: Relevant files + description: "Please drag and drop the relevant files here. Create a `.zip` archive + if the extension is not allowed. + + Your verbose log file `.nextflow.log` is often useful _(this is a hidden file + in the directory where you launched the pipeline)_ as well as custom Nextflow + configuration files. + + " + - type: textarea + id: system + attributes: + label: System information + description: "* Nextflow version _(eg. 23.04.0)_ + + * Hardware _(eg. HPC, Desktop, Cloud)_ + + * Executor _(eg. slurm, local, awsbatch)_ + + * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter, Charliecloud, + or Apptainer)_ + + * OS _(eg. CentOS Linux, macOS, Linux Mint)_ + + * Version of phac-nml/viralassembly _(eg. 1.1, 1.5, 1.8.2)_ + + " diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 8a0ba3a..f37faf5 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -8,14 +8,14 @@ These are the most common things requested on pull requests (PRs). Remember that PRs should be made against the dev branch, unless you're preparing a pipeline release. -Learn more about contributing: [CONTRIBUTING.md](https://github.com/phac-nml/viralassembly/tree/master/.github/CONTRIBUTING.md) +Learn more about contributing: [CONTRIBUTING.md](https://github.com/phac-nml/viralassembly/tree/main/.github/CONTRIBUTING.md) --> ## PR checklist - [ ] This comment contains a description of changes (with reason). - [ ] If you've fixed a bug or added code that should be tested, add tests! -- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/phac-nml/viralassembly/tree/master/.github/CONTRIBUTING.md) +- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/phac-nml/viralassembly/tree/main/.github/CONTRIBUTING.md) - [ ] Make sure your code lints (`nf-core lint`). - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). - [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml index c5c93ab..c6324f7 100644 --- a/.github/workflows/branch.yml +++ b/.github/workflows/branch.yml @@ -1,15 +1,15 @@ name: nf-core branch protection -# This workflow is triggered on PRs to master branch on the repository -# It fails when someone tries to make a PR against the nf-core `master` branch instead of `dev` +# This workflow is triggered on PRs to main branch on the repository +# It fails when someone tries to make a PR against the nf-core `main` branch instead of `dev` on: pull_request_target: - branches: [master] + branches: [main] jobs: test: runs-on: ubuntu-latest steps: - # PRs to the nf-core repo master branch are only ok if coming from the nf-core repo `dev` or any `patch` branches + # PRs to the nf-core repo main branch are only ok if coming from the nf-core repo `dev` or any `patch` branches - name: Check PRs if: github.repository == 'phac-nml/viralassembly' run: | @@ -22,7 +22,7 @@ jobs: uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc # v2 with: message: | - ## This PR is against the `master` branch :x: + ## This PR is against the `main` branch :x: * Do not close this PR * Click _Edit_ and change the `base` to `dev` @@ -32,9 +32,9 @@ jobs: Hi @${{ github.event.pull_request.user.login }}, - It looks like this pull-request is has been made against the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `master` branch. - The `master` branch on nf-core repositories should always contain code from the latest release. - Because of this, PRs to `master` are only allowed if they come from the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `dev` branch. + It looks like this pull-request is has been made against the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `main` branch. + The `main` branch on nf-core repositories should always contain code from the latest release. + Because of this, PRs to `main` are only allowed if they come from the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `dev` branch. You do not need to close this PR, you can change the target branch to `dev` by clicking the _"Edit"_ button at the top of this page. Note that even after this, the test will continue to show as failing until you push a new commit. diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a1eaebc..88f881f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -7,6 +7,7 @@ on: pull_request: release: types: [published] + workflow_dispatch: env: NXF_ANSI_LOG: false @@ -28,16 +29,25 @@ jobs: - "latest-everything" steps: - name: Check out pipeline code - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 - - name: Install Nextflow - uses: nf-core/setup-nextflow@b9f764e8ba5c76b712ace14ecbfcef0e40ae2dd8 # v1 + - name: Set up Nextflow + uses: nf-core/setup-nextflow@v2 with: version: "${{ matrix.NXF_VER }}" - - name: Disk space cleanup + - name: Clean up Disk space uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + - name: Install nf-test + run: | + wget -qO- https://get.nf-test.com | bash + sudo mv nf-test /usr/local/bin/ + + - name: Run nf-test + run: | + nf-test test --verbose + - name: Run pipeline with test data # TODO nf-core: You can customise CI pipeline run tests as required # For example: adding multiple test runs with different parameters diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index cc7c037..6bfe937 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -1,6 +1,6 @@ name: nf-core linting # This workflow is triggered on pushes and PRs to the repository. -# It runs the `nf-core lint` and markdown lint tests to ensure +# It runs the `nf-core pipelines lint` and markdown lint tests to ensure # that the code meets the nf-core guidelines. on: push: @@ -14,13 +14,12 @@ jobs: pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 - - name: Set up Python 3.11 - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 + - name: Set up Python 3.12 + uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 with: - python-version: 3.11 - cache: "pip" + python-version: "3.12" - name: Install pre-commit run: pip install pre-commit @@ -32,27 +31,42 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 - name: Install Nextflow - uses: nf-core/setup-nextflow@b9f764e8ba5c76b712ace14ecbfcef0e40ae2dd8 # v1 + uses: nf-core/setup-nextflow@v2 - - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 with: - python-version: "3.11" + python-version: "3.12" architecture: "x64" + - name: read .nf-core.yml + uses: pietrobolcato/action-read-yaml@1.1.0 + id: read_yml + with: + config: ${{ github.workspace }}/.nf-core.yml + - name: Install dependencies run: | python -m pip install --upgrade pip - pip install nf-core + pip install nf-core==${{ steps.read_yml.outputs['nf_core_version'] }} + + - name: Run nf-core pipelines lint + if: ${{ github.base_ref != 'main' }} + env: + GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_PR_COMMIT: ${{ github.event.pull_request.head.sha }} + run: nf-core -l lint_log.txt pipelines lint --dir ${GITHUB_WORKSPACE} --markdown lint_results.md - - name: Run nf-core lint + - name: Run nf-core pipelines lint --release + if: ${{ github.base_ref == 'main' }} env: GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_PR_COMMIT: ${{ github.event.pull_request.head.sha }} - run: echo "nf-core -l lint_log.txt lint --dir ${GITHUB_WORKSPACE} --markdown lint_results.md" + run: nf-core -l lint_log.txt pipelines lint --release --dir ${GITHUB_WORKSPACE} --markdown lint_results.md - name: Save PR number if: ${{ always() }} @@ -60,7 +74,7 @@ jobs: - name: Upload linting log file artifact if: ${{ always() }} - uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4 + uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4 with: name: linting-logs path: | diff --git a/.gitignore b/.gitignore index 5124c9a..5347ceb 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,14 @@ .nextflow* work/ -data/ results/ +data/ .DS_Store testing/ testing* *.pyc +null/ +slurm* +.nf-test.log +.nf-test/ +lint_log.txt +lint_results.md diff --git a/.nf-core.yml b/.nf-core.yml index 59723d8..c86751f 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1,16 +1,34 @@ +repository_type: pipeline +nf_core_version: "3.0.2" +bump_version: null lint: files_exist: - - CODE_OF_CONDUCT.md - - .github/ISSUE_TEMPLATE/config.yml - - .github/workflows/awstest.yml - - .github/workflows/awsfulltest.yml + - CODE_OF_CONDUCT.md + - .github/ISSUE_TEMPLATE/config.yml + - .github/workflows/awstest.yml + - .github/workflows/awsfulltest.yml + - assets/multiqc_config.yml + - assets/nf-core-viralassembly_logo_light.png + - conf/igenomes.config + - conf/igenomes_ignored.config + - docs/images/nf-core-viralassembly_logo_light.png + - docs/images/nf-core-viralassembly_logo_dark.png + - lib/NfcoreTemplate.groovy + - lib/Utils.groovy + - lib/WorkflowMain.groovy + - lib/nfcore_external_java_deps.jar files_unchanged: - - CODE_OF_CONDUCT.md + - .github/PULL_REQUEST_TEMPLATE.md + - .github/CONTRIBUTING.md + - .github/ISSUE_TEMPLATE/bug_report.yml + - .github/workflows/linting_comment.yml + - docs/README.md + - LICENSE + - .gitignore + multiqc_config: False pipeline_name_conventions: False actions_awsfulltest: False - nextflow_config: - - custom_config - - manifest.name - - manifest.homePage + nextflow_config: False -repository_type: pipeline +template: + prefix: phac-nml diff --git a/CHANGELOG.md b/CHANGELOG.md index c6a43f0..7ffdf76 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,16 +6,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## v1.1.0 - [Unreleased] ### `Added` + - Input schema JSON and validation - FORMAT_INPUT workflow - - Handles the input data now + - Handles the input data now - `nf-schema@2.0.0` plugin ### `Changed` + - `--input SAMPLESHEET_CSV` header - - Went from `reads` with path to barcode directories to `fastq_1` with path to fastq files + - Went from `reads` with path to barcode directories to `fastq_1` with path to fastq files - Fixed bug so that SNPEff will now work with given gff files - - Issue was typo related in the build module + - Issue was typo related in the build module - Fixed bug with `calc_bam_variation` caused by genome case - Log and error statements - Fixed the cache directory statements @@ -25,5 +27,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 Initial release of `phac-nml/viralassembly`, created from combining the [nf-core](https://nf-co.re/) template with the artic steps. ### `Added` + - All initial pipeline features and logic - All initial docs and images diff --git a/README.md b/README.md index 910b2c8..3304fd0 100644 --- a/README.md +++ b/README.md @@ -1,25 +1,28 @@ # viralassembly + A generic viral assembly and QC pipeline which utilises a re-implementation of the [artic pipeline](https://github.com/artic-network/fieldbioinformatics/tree/master/artic) to separate out the individual steps allowing greater control on tool versions along with how data is run through the processes. This pipeline can be used as a starting point for analyses on viruses without dedicated workflows already available. This pipeline is intended to be run on either Nanopore Amplicon Sequencing data or Basic Nanopore NGS Sequencing data that can utilize a reference genome for mapping variant calling, and other downstream analyses. It generates variant calls, consensus sequences, and quality control information based on the reference. To do this, there are three different variant callers that can be utilized which includes: `clair3`, `medaka`, and `nanopolish` (For R9.4.1 flowcells and below only). Some of the goals of this pipeline are: + 1. Rework the artic nanopore pipeline steps as nextflow modules to deal with specific bugs and version incompatibilities - - Example: BCFtools consensus error seen in artic pipeline sometimes - - Allows adding in clair3 as a new variant calling tool - - Potentially eventually work to remove `artic` as a dependency + - Example: BCFtools consensus error seen in artic pipeline sometimes + - Allows adding in clair3 as a new variant calling tool + - Potentially eventually work to remove `artic` as a dependency 2. Allow the pipeline to be used on other viruses with or without amplicon schemes - - Due to the QC steps there is unfortunately a current limitation at working with segmented viruses - - The pipeline will automatically exit after assembly and not generate QC and Reports for these at this time - - This will hopefully be fully implemented at some point in the future + - Due to the QC steps there is unfortunately a current limitation at working with segmented viruses + - The pipeline will automatically exit after assembly and not generate QC and Reports for these at this time + - This will hopefully be fully implemented at some point in the future 3. Provide `Run` level and `Sample` level final reports ## Index + - [Installation](#installation) - [Base Run Commands](#running-commands) - - [Nanopore - Clair3](#nanopore---clair3) - - [Nanopore - Medaka](#nanopore---medaka) - - [Nanopore - Nanopolish](#nanopore---nanopolish) + - [Nanopore - Clair3](#nanopore---clair3) + - [Nanopore - Medaka](#nanopore---medaka) + - [Nanopore - Nanopolish](#nanopore---nanopolish) - [Outputs](#outputs) - [Limitations](#limitations) - [Citations](#citations) @@ -27,37 +30,44 @@ Some of the goals of this pipeline are: - [Legal](#legal) ## Installation + 1. Download and install nextflow - 1. Download and install with [conda](https://docs.conda.io/en/latest/miniconda.html) - - Conda command: `conda create on nextflow -c conda-forge -c bioconda nextflow` - 2. Install with the instructions at https://www.nextflow.io/ + + 1. Download and install with [conda](https://docs.conda.io/en/latest/miniconda.html) + - Conda command: `conda create on nextflow -c conda-forge -c bioconda nextflow` + 2. Install with the instructions at https://www.nextflow.io/ 2. Run the pipeline with one of the following profiles to handle dependencies (or use your own profile if you have one!): - - `conda` - - `mamba` - - `singularity` - - `docker` + - `conda` + - `mamba` + - `singularity` + - `docker` ## Running Commands + Simple commands to run input data. Input data can be done in three different ways: + 1. Passing `--fastq_pass ` where `fastq_pass` is a directory containing `barcode##` subdirectories with fastq files 2. Passing `--fastq_pass ` where `fastqs` is a directory containing `.fastq*` files 3. Passing `--input ` where `samplesheet.csv` is a CSV file with two columns - 1. `sample` - The name of the sample - 2. `fastq_1` - Path to one fastq file per sample in `.fastq*` format + 1. `sample` - The name of the sample + 2. `fastq_1` - Path to one fastq file per sample in `.fastq*` format The basic examples will show how to run the pipeline using the `--fastq_pass` input but it could be subbed in for the `--input` CSV file if wanted. -*All detailed running information is available in the [usage docs](./docs/usage.md)* +_All detailed running information is available in the [usage docs](./docs/usage.md)_ ### Nanopore - Clair3 + Running the pipeline with [Clair3](https://github.com/HKU-BAL/Clair3) for variant calls requires fastq files and a clair3 model. When running, the pipeline will either: + - Look for subdirectories off of the input "--fastq_pass" directory called `barcode##` to be used in the pipeline - Look for fastq files in the input "--fastq_pass" directory called `*.fastq*` to be used in the pipeline This pipeline utilizes the same steps as the artic fieldbioinformatics minion pipeline but with each step run using nextflow to allow clair3 to be easily slotted in. See the [clair3 section](./docs/usage.md#clair3) of the usage docs for more information Basic command: + ```bash nextflow run /PATH/TO/artic-generic-nf/main.nf \ -profile \ @@ -68,6 +78,7 @@ nextflow run /PATH/TO/artic-generic-nf/main.nf \ ``` [Optional inputs](./docs/usage.md#all-parameters) could include: + - [Amplicon scheme](./docs/usage.md#schemes-and-reference) instead of just a reference fasta file - Metadata - Filtering options @@ -75,13 +86,16 @@ nextflow run /PATH/TO/artic-generic-nf/main.nf \ - Output reporting options ### Nanopore - Medaka + Running the pipeline with [medaka](https://github.com/nanoporetech/medaka) for variant calls requires fastq files and a medaka model. When running, the pipeline will either: + - Look for subdirectories off of the input "--fastq_pass" directory called `barcode##` to be used in the pipeline - Look for fastq files in the input "--fastq_pass" directory called `*.fastq*` to be used in the pipeline See the [medaka section](./docs/usage.md#medaka) of the usage docs for more information Basic command: + ```bash nextflow run /PATH/TO/artic-generic-nf/main.nf \ -profile \ @@ -93,6 +107,7 @@ nextflow run /PATH/TO/artic-generic-nf/main.nf \ ``` [Optional inputs](./docs/usage.md#all-parameters) could include: + - [Amplicon scheme](./docs/usage.md#schemes-and-reference) instead of just a reference fasta file - Metadata - Filtering options @@ -103,11 +118,13 @@ nextflow run /PATH/TO/artic-generic-nf/main.nf \ Medaka model information [can be found here](https://github.com/nanoporetech/medaka#models) ### Nanopore - Nanopolish + Running the pipeline with [nanopolish](https://github.com/jts/nanopolish) for variant calls requires fastq files, fast5 files, and the sequencing summary file instead of providing a model. As such, nanopolish requires that the read ids in the fastq files are linked by the sequencing summary file to their signal-level data in the fast5 files. This makes it **a lot** easier to run using barcoded directories but it can be run with individual read files See the [nanopolish section](./docs/usage.md#nanopolish) of the usage docs for more information Basic command: + ```bash nextflow run /PATH/TO/artic-generic-nf/main.nf \ -profile \ @@ -120,6 +137,7 @@ nextflow run /PATH/TO/artic-generic-nf/main.nf \ ``` [Optional inputs](./docs/usage.md#all-parameters) could include: + - [Amplicon scheme](./docs/usage.md#schemes-and-reference) instead of just a reference fasta file - Metadata - Filtering options @@ -128,34 +146,38 @@ nextflow run /PATH/TO/artic-generic-nf/main.nf \ - Output reporting options ## Outputs + Outputs are separated based off of their tool or file format and found in the `results/` directory by default. Outputs include: + - Consensus fasta files - VCF files - Bam files - HTML summary files (either custom or MultiQC) -*More output information on pipeline steps and output files can be found in the [output docs](./docs/output.md)* +_More output information on pipeline steps and output files can be found in the [output docs](./docs/output.md)_ ## Limitations + Current limitations include: 1. Nanopore data only at this time 2. Currently runs for viruses using a reference genome - - Segmented viruses will exit before the QC section for now + - Segmented viruses will exit before the QC section for now 3. Custom report can only work when running with `conda` 4. SnpEff issues in running and database building/downloading - - Database building/downloading requires one of three things: - - The reference ID is in the SnpEff database - - This allows the database to be downloaded - - A gff3 file - - This is used with the reference sequence to build a database - - A well annotated NCBI genome matching the reference ID - - This will pull the genbank file and use that to build a database - - Running SnpEff with singularity sometimes leads to a lock issue which is hopefully fixed + - Database building/downloading requires one of three things: + - The reference ID is in the SnpEff database + - This allows the database to be downloaded + - A gff3 file + - This is used with the reference sequence to build a database + - A well annotated NCBI genome matching the reference ID + - This will pull the genbank file and use that to build a database + - Running SnpEff with singularity sometimes leads to a lock issue which is hopefully fixed ## Citations + This pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [MIT license](https://github.com/nf-core/tools/blob/master/LICENSE). > The nf-core framework for community-curated bioinformatics pipelines. @@ -168,9 +190,11 @@ This pipeline uses code and infrastructure developed and maintained by the [nf-c Detailed citations for utilized tools are found in [citations.md](./citations.md) ## Contributing + Contributions are welcome through creating PRs or Issues ## Legal + Copyright 2023 Government of Canada Licensed under the MIT License (the "License"); you may not use this work except in compliance with the License. You may obtain a copy of the License at: diff --git a/assets/multiqc_config_overall.yaml b/assets/multiqc_config_overall.yaml index e1f6d78..e57db2e 100644 --- a/assets/multiqc_config_overall.yaml +++ b/assets/multiqc_config_overall.yaml @@ -60,12 +60,12 @@ custom_data: title: "QC Pass" description: "Overall status of the sample" cond_formatting_rules: - warn: [{"s_ne": "PASS"}] + warn: [{ "s_ne": "PASS" }] run_status: title: "Run Status" description: "Overall status of the run taking negative control samples into account" cond_formatting_rules: - warn: [{"s_ne": "PASS"}] + warn: [{ "s_ne": "PASS" }] num_aligned_reads: title: "# Aligned Reads" description: "Number of aligned reads used for variant calling and consensus generation" @@ -114,9 +114,9 @@ custom_data: description: "Variants detected by SnpEFF as containing a frameshift or if SnpEFF was not run, indels that fail a divisible by 3 check" # Note that the order here must be kept or the no_data will be yellow cond_formatting_rules: - pass: [{"s_eq": "none"}] - frameshift: [{"s_ne": "none"}] - no_data: [{"s_eq": "NA"}] + pass: [{ "s_eq": "none" }] + frameshift: [{ "s_ne": "none" }] + no_data: [{ "s_eq": "NA" }] cond_formatting_colours: - 1: frameshift: "#f0ad4e" # Yellow Warn colour @@ -132,66 +132,67 @@ custom_data: parent_name: "Amplicon Summary" parent_description: > Custom amplicon plots to show how deep each amplicon is sequenced and how complete each amplicon is - id: 'amplicon_depth' - file_format: 'tsv' - section_name: 'Amplicon Depth Lineplot' + id: "amplicon_depth" + file_format: "tsv" + section_name: "Amplicon Depth Lineplot" description: > Calculated from the primertrimmed sorted bam file using bedtools coverage looking for reads that overlap 85% of the given amplicon region - plot_type: 'linegraph' + plot_type: "linegraph" pconfig: - id: 'amplicon_depth' - title: 'Amplicon Depth' - xlab: 'Amplicon ID' - ylab: 'Read Depth' + id: "amplicon_depth" + title: "Amplicon Depth" + xlab: "Amplicon ID" + ylab: "Read Depth" logswitch: True logswitch_active: False categories: True ymin: 0 amplicon_completeness: parent_id: amplicons_qc - file_format: 'csv' - section_name: 'Amplicon Completeness' + file_format: "csv" + section_name: "Amplicon Completeness" description: > Heatmap showing how complete each amplicon is based on the consensus sequence. Amplicon completeness is calculated using the number of Ns in the amplicon compared to the amplicon length. A 1.0 means the amplicon is fully sequenced while a 0 means that there were no bases called in the amplicon. Note that as amplicons overlap, a fully dropped amplicon could still have some completeness from its two neighbours - id: 'amplicon_completeness' - plot_type: 'heatmap' + id: "amplicon_completeness" + plot_type: "heatmap" pconfig: - id: 'amplicon_completeness' - title: 'Amplicon Completeness' - xTitle: 'Amplicon ID' + id: "amplicon_completeness" + title: "Amplicon Completeness" + xTitle: "Amplicon ID" xcats_samples: False ycats_samples: True square: False min: 0 max: 1 - colstops: [ - [0, '#a50026'], - [0.1, '#d73027'], - [0.2, '#f46d43'], - [0.3, '#fdae61'], - [0.4, '#fee08b'], - [0.5, '#ffffbf'], - [0.6, '#d9ef8b'], - [0.7, '#a6d96a'], - [0.8, '#66bd63'], - [0.9, '#1a9850'], - [1, '#006837'], - ] + colstops: + [ + [0, "#a50026"], + [0.1, "#d73027"], + [0.2, "#f46d43"], + [0.3, "#fdae61"], + [0.4, "#fee08b"], + [0.5, "#ffffbf"], + [0.6, "#d9ef8b"], + [0.7, "#a6d96a"], + [0.8, "#66bd63"], + [0.9, "#1a9850"], + [1, "#006837"], + ] # Extensions to clean from names extra_fn_clean_exts: - - '.nanostat' - - '.ann' + - ".nanostat" + - ".ann" # Search Pathes sp: qc_csv_table: - fn: '*.qc.csv' + fn: "*.qc.csv" amplicon_depth: - fn: '*_ampdepth.tsv' + fn: "*_ampdepth.tsv" amplicon_completeness: fn: "merged_amplicon_completeness.csv" diff --git a/assets/multiqc_config_sample.yaml b/assets/multiqc_config_sample.yaml index df77717..311d0da 100644 --- a/assets/multiqc_config_sample.yaml +++ b/assets/multiqc_config_sample.yaml @@ -53,7 +53,7 @@ custom_data: title: "QC Pass" description: "Overall status of the sample" cond_formatting_rules: - warn: [{"s_ne": "PASS"}] + warn: [{ "s_ne": "PASS" }] num_aligned_reads: title: "# Aligned Reads" description: "Number of aligned reads used for variant calling and consensus generation" @@ -102,9 +102,9 @@ custom_data: description: "Variants detected by SnpEFF as containing a frameshift or if SnpEFF was not run, indels that fail a divisible by 3 check" # Note that the order here must be kept or the no_data will be yellow cond_formatting_rules: - pass: [{"s_eq": "none"}] - frameshift: [{"s_ne": "none"}] - no_data: [{"s_eq": "NA"}] + pass: [{ "s_eq": "none" }] + frameshift: [{ "s_ne": "none" }] + no_data: [{ "s_eq": "NA" }] cond_formatting_colours: - 1: frameshift: "#f0ad4e" # Yellow Warn colour @@ -113,18 +113,18 @@ custom_data: # Amplicons section amplicon_depth: - id: 'amplicon_depth' - file_format: 'tsv' - section_name: 'Amplicon Depth Lineplot' + id: "amplicon_depth" + file_format: "tsv" + section_name: "Amplicon Depth Lineplot" description: > Calculated from the primertrimmed sorted bam file using bedtools coverage looking for reads that overlap 85% of the given amplicon region - plot_type: 'linegraph' + plot_type: "linegraph" pconfig: - id: 'amplicon_depth' - title: 'Amplicon Depth' - xlab: 'Amplicon ID' - ylab: 'Read Depth' + id: "amplicon_depth" + title: "Amplicon Depth" + xlab: "Amplicon ID" + ylab: "Read Depth" logswitch: True logswitch_active: False categories: True @@ -161,9 +161,9 @@ custom_data: Consequence: description: "SnpEff consequence" cond_formatting_rules: - missnese: [{"s_contains": "missense"}] - nonsense: [{"s_contains": "nonsense"}] - frameshift: [{"s_contains": "frameshift"}] + missnese: [{ "s_contains": "missense" }] + nonsense: [{ "s_contains": "nonsense" }] + frameshift: [{ "s_contains": "frameshift" }] cond_formatting_colours: - 1: missnese: "#080180" # Blue @@ -198,12 +198,12 @@ custom_data: title: "Variant Type" description: "Type of variation occuring at the specific site" cond_formatting_rules: - A: [{"s_eq": "A SNP"}] - T: [{"s_eq": "T SNP"}] - G: [{"s_eq": "G SNP"}] - C: [{"s_eq": "C SNP"}] - Del: [{"s_eq": "Del"}] - mixed: [{"s_eq": "Mixed"}] + A: [{ "s_eq": "A SNP" }] + T: [{ "s_eq": "T SNP" }] + G: [{ "s_eq": "G SNP" }] + C: [{ "s_eq": "C SNP" }] + Del: [{ "s_eq": "Del" }] + mixed: [{ "s_eq": "Mixed" }] cond_formatting_colours: - 1: A: "#05a605" # Green @@ -256,15 +256,15 @@ custom_data: # Extensions to clean from names extra_fn_clean_exts: - - '.nanostat' + - ".nanostat" # Search Pathes sp: qc_csv_table: - fn: '*.qc.csv' + fn: "*.qc.csv" amplicon_depth: - fn: '*_ampdepth.tsv' + fn: "*_ampdepth.tsv" variation_table: - fn: '*_variation.csv' + fn: "*_variation.csv" consensus_variants_table: - fn: '*.vcf.tsv' + fn: "*.vcf.tsv" diff --git a/conf/modules.config b/conf/modules.config index 7064ed8..5d5b280 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -11,6 +11,7 @@ */ process { + withName: CUSTOM_DUMPSOFTWAREVERSIONS { publishDir = [ path: { "${params.outdir}/pipeline_info" }, @@ -18,5 +19,4 @@ process { pattern: '*_versions.yml' ] } - } diff --git a/conf/nml.config b/conf/nml.config index 58176be..fbff1dd 100644 --- a/conf/nml.config +++ b/conf/nml.config @@ -6,7 +6,7 @@ params { // Config params config_profile_name = "nml" config_profile_description = "National Microbiology Laboratory Canada basic cluster config" - config_profile_contact = 'Darian Hole (darian.hole@phac-aspc.gc.ca)' + config_profile_contact = "" // Cluster specific params partition = '' diff --git a/conf/test.config b/conf/test.config index cafad4b..834c822 100644 --- a/conf/test.config +++ b/conf/test.config @@ -25,4 +25,6 @@ params { // Args variant_caller = "clair3" reference = "$projectDir/.github/test-data/nanopore/MN908947.3.reference.fasta" + skip_snpeff = true + skip_qc = true } diff --git a/docs/example_commands.md b/docs/example_commands.md index 90f8d85..84de2d2 100644 --- a/docs/example_commands.md +++ b/docs/example_commands.md @@ -1,9 +1,11 @@ # phac-nml/viralassembly: Example Commands + A variety of example commands using different parameter options to display how to use each ## Amplicon ### Clair3 + Clair3 with a local model, local scheme, fastq directory, conda, and the custom report output ```bash @@ -20,6 +22,7 @@ nextflow run phac-nml/viralassembly \ ``` ### Medaka + Minimal input medaka with conda, an input csv file for data, and the nCoV-2019 scheme ```bash @@ -33,6 +36,7 @@ nextflow run phac-nml/viralassembly \ ``` ### Nanopolish + Nanopolish run using singularity and the base artic command line tool (instead of the default nextflow implementation) ```bash @@ -48,11 +52,12 @@ nextflow run phac-nml/viralassembly \ --outdir ./results ``` --------------------------- +--- ## Non-Amplicon ### Clair3 + Minimal clair3 with docker using a fastq input directory along wth a gff3 reference file for SnpEff ```bash @@ -65,6 +70,7 @@ nextflow run phac-nml/viralassembly \ ``` ### Medaka + Medaka with conda skipping QC and SnpEff ```bash @@ -78,6 +84,7 @@ nextflow run phac-nml/viralassembly \ ``` ### Nanopolish + Nanopolish running with conda, filtering the read lengths to be shorter, and creating a custom report ```bash diff --git a/docs/output.md b/docs/output.md index 9cd5f71..1ae6098 100644 --- a/docs/output.md +++ b/docs/output.md @@ -11,42 +11,47 @@ The directories listed below will be created in the results directory after the The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps (where a `*` indicates a final output kept in the top level results directory): - [Preprocessing](#preprocessing) - - [Reference Stats](#reference-stats)* - Get reference genome information needed for variant calling and QC + + - [Reference Stats](#reference-stats)\* - Get reference genome information needed for variant calling and QC - [Artic Guppyplex](#artic-guppyplex) - Read length filtering - [Chopper](#chopper) - Additional Read QC - [Nanostat](#nanostat) - Read statistics - [Variant Calling](#variant-calling) - - [Minimap2](#minimap2)* - Read mapping - - [Artic Align Trim](#artic-align_trim)* - Primer trimming and normalisation + + - [Minimap2](#minimap2)\* - Read mapping + - [Artic Align Trim](#artic-align_trim)\* - Primer trimming and normalisation - [Clair3](#clair3) - Determine initial variants with clair3 - [Medaka](#medaka) - Determine initial variants with medaka - [Nanopolish](#nanopolish) - Determine initial variants with nanopolish - - [Longshot](#longshot)* - Genotype and phase called medaka variants - - [Variant Filter](#variant-filter)* - Filter variants not matching required criteria + - [Longshot](#longshot)\* - Genotype and phase called medaka variants + - [Variant Filter](#variant-filter)\* - Filter variants not matching required criteria - [Consensus Generation](#consensus-generation) + - [Artic Mask](#artic-mask) - Mask failing variants and low depth sites in preparation for consensus generation - - [BCFtools Norm](#bcftools-norm)* - Left-align and normalize indels along with make sure the reference alleles match - - [BCFtools Consensus](#bcftools-consensus)* - Create consensus sequence from VCF variants and Masked sites + - [BCFtools Norm](#bcftools-norm)\* - Left-align and normalize indels along with make sure the reference alleles match + - [BCFtools Consensus](#bcftools-consensus)\* - Create consensus sequence from VCF variants and Masked sites - [QC and Reporting](#qc-and-reporting) - - [SnpEff](#snpeff)* - Variant annotation and functional prediction + - [SnpEff](#snpeff)\* - Variant annotation and functional prediction - [Qualimap BAMQC](#qualimap-bamqc) - Alignment quality and metrics - [Samtools Flagstat](#samtools-flagstat) - Alignment flag stats - [BCFtools Stats](#bcftools-stats) - Variant quality and statistics - - [Variation CSV](#variation-csv)* - Custom reporting script for finding and calculating variation in the BAM pileups + - [Variation CSV](#variation-csv)\* - Custom reporting script for finding and calculating variation in the BAM pileups - [Amplicon Completeness](#amplicon-completeness) - Custom reporting script for calculating amplicon completeness based on bedtools output - - [QC Compilation](#qc-compilation)* - Custom reporting scripts for each sample and the overall run - - [MultiQC](#multiqc)* - Sample and Run HTML visual report - - [Custom Report](#custom-report)* - Custom single HTML report including the run and all individual samples + - [QC Compilation](#qc-compilation)\* - Custom reporting scripts for each sample and the overall run + - [MultiQC](#multiqc)\* - Sample and Run HTML visual report + - [Custom Report](#custom-report)\* - Custom single HTML report including the run and all individual samples Additionally [Pipeline information](#pipeline-information) which includes report metrics generated during the workflow execution can also be found ### Preprocessing + Initial processing steps and statistic gathering. The reference statistics are output to their own final folder while the other statistics are passed to the final multiqc report. #### Reference Stats +
Output files @@ -54,69 +59,80 @@ Initial processing steps and statistic gathering. The reference statistics are o - `genome.bed`: Genomic information in bed format that has the coordiantes of the reference genome needed for nanopolish - `refstats.txt`: Genomic information in a format needed for clair3 - `*.fai`: Samtools faidx fai file for reference genome -
+ The reference files are generated with both `awk` and `samtools` and are needed as different inputs for downstream tools. #### Artic Guppyplex + Select reads by size and generate size selected fastq files. #### Chopper + [Chopper](https://github.com/wdecoster/chopper) filter and trim fastq reads by quality and length. #### Nanostat + [Nanostat](https://github.com/wdecoster/nanostat) generates plots and statistics on trimmed fastq files for the final multiqc reports. ![nanostats_mqc](./images/nanostat_mqc.png) ----------- +--- ### Variant Calling + Read mapping and variant calling. Note that only one of `clair3`, `medaka`, and `nanopolish` is used. In the end, final normalized passing and failing variants are output along with the BAM files to their respective folders. #### Minimap2 +
Output files - `bam/` - `*.sorted.bam`: Sorted bam file from minimap2 and samtools -
+ The sorted BAM file from minimap2 and samtools. #### Artic Align_Trim -*Amplicon only* + +_Amplicon only_ +
Output files - `bam/` - `*.trimmed.rg.sorted.bam`: Artic align_trim output which normalises coverage and assigns reads to amplicons - - `*.primertrimmed.rg.sorted.bam`: Artic align_trim output which normalises coverage and assigns reads to amplicons along with softmasking the primer sequences - - The primertrimmed file is used for subsequent variant calling -
+ - `*.primertrimmed.rg.sorted.bam`: Artic align_trim output which normalises coverage and assigns reads to amplicons along with softmasking the primer sequences - The primertrimmed file is used for subsequent variant calling + See [the artic core pipeline](https://artic.readthedocs.io/en/latest/minion/#core-pipeline) for more info on how `align_trim` trims the BAM files. #### Clair3 + Run clair3 variant caller on BAM files to create initial variant calls in VCF format. #### Medaka + Run medaka variant caller on BAM files to create initial variant calls in VCF format. #### Nanopolish + Run nanopolish variant caller on BAM files, fast5 files, and the sequencing summary file to create initial variant calls in VCF format. #### Longshot +
Output files - `vcf/` - `*.longshot.merged.vcf`: Longshot phased VCF file -
+ Genotype and phase the variants from the initial medaka VCF variant file. [Longshot](https://github.com/pjedge/longshot) #### Variant Filter +
Output files @@ -124,90 +140,101 @@ Genotype and phase the variants from the initial medaka VCF variant file. [Longs - `*.pass.vcf.gz`: VCF file containing variants passing quality filters - `*.pass.vcf.gz.tbi`: VCF index file containing variants passing quality filters - `*.fail.vcf`: VCF file containing variants failing quality filters -
+ Pass/Fail variants based on quality for the final consensus sequence generation. ----------- +--- ### Consensus Generation + Final consensus sequence generation based on passing/failing variants and sequencing depth. #### Artic Mask + Mask low depth and failing variants to create a preconsensus sequence for BCFtools consensus. #### BCFtools Norm +
Output files - `vcf/` - - `*.pass.norm.vcf.gz`: VCF file containing variants passing quality filters that have their indels normalized and reference positions fixed - - Reference positions may need to be fixed if there are overlapping variants -
+ - `*.pass.norm.vcf.gz`: VCF file containing variants passing quality filters that have their indels normalized and reference positions fixed - Reference positions may need to be fixed if there are overlapping variants + BCFtools norm is utilized to fix locations in which one two variants overlap which during BCFtools consensus would crash the pipeline previously. [BCFtools](https://samtools.github.io/bcftools/bcftools.html#norm) #### BCFtools Consensus +
Output files - `consensus/` - `*.consensus.fasta`: Fasta file containing the final output consensus sequence with applied variants and masked sites -
+ Final output consensus sequence for the sample with variants applied and low coverage/failing variants masked with N's. [BCFtools](https://samtools.github.io/bcftools/bcftools.html#norm) ----------- +--- ### QC and Reporting + All QC and reporting is only currently done on non-segmented viruses #### SnpEff +
Output files - `snpeff/` - `*.ann.vcf`: VCF file with variant annotations - `*.csv`: Variant annotation csv file -
+ [SnpEff](https://pcingola.github.io/SnpEff/) is a genetic variant annotation and functional effect prediction toolbox. It annotates and predicts the effects of genetic variants on genes and proteins (such as amino acid changes). ![snpeff_mqc](./images/snpeff_mqc.png) #### Qualimap BAMQC + [Qualimap BAMQC](http://qualimap.conesalab.org/) platform-independent application written in Java and R that provides a command-line interface to facilitate the quality control of alignment sequencing data and its derivatives like feature counts. The output is used in the final MultiQC reports. ![qualimap_mqc](./images/qualimap_mqc.png) #### Samtools Flagstat + [Samtools flagstat](http://www.htslib.org/doc/samtools-flagstat.html) counts the number of alignments for each FLAG type. The output is used in the final MultiQC reports. ![samtools_mqc](./images/samtools_mqc.png) #### BCFtools Stats + [BCFtools stats](https://samtools.github.io/bcftools/bcftools.html#stats) produces machine readable variant quality and statistics. The output is used in the final MultiQC reports ![bcftools_mqc](./images/bcftools_mqc.png) #### Variation CSV +
Output files - `variation_csvs/` - `*_variation.csv`: CSV file displaying positions where there is >= 15% variation from the reference base call -
+ Custom python script using [pysam](https://pysam.readthedocs.io/en/latest/api.html) to find positions in the pileup which have >= 15% variation from the reference sequence. This gives information on any mixed-sites along with identifying spots in the genome where there may be sequencing artifacts or issues. The CSV file can be viewed or a coloured table can be found in each sample MultiQC report or custom report. ![variation_mqc](./images/variation_mqc.png) #### Amplicon Completeness + Amplicon completeness is calculated using a custom python script along with an amplicon bed file and the final consensus sequence. It reports how many bases were called in each amplicon and gives a final completeness value from `0` - `1.00`. ![completeness_mqc](./images/completeness_mqc.png) #### QC Compilation +
Output files @@ -221,6 +248,7 @@ Final CSV file(s) for both individual samples and the overall run that combines ![qc_mqc](./images/qc_mqc.png) #### MultiQC +
Output files @@ -234,6 +262,7 @@ Final output reports generated by [MultiQC](https://multiqc.info/docs/) based on ![sample_mqc_mqc](./images/sample_mqc_mqc.png) #### Custom Report +
Output files @@ -251,9 +280,10 @@ Example sample page ![amplicons_custom](./images/amplicons_custom.png) Amplicons page ----------- +--- ### Pipeline information +
Output files @@ -262,6 +292,6 @@ Amplicons page - Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.yml`. The `pipeline_report*` files will only be present if the `--email` / `--email_on_fail` parameter's are used when running the pipeline. - Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`. - Parameters used by the pipeline run: `params.json`. -
+
[Nextflow](https://www.nextflow.io/docs/latest/tracing.html) provides excellent functionality for generating various reports relevant to the running and execution of the pipeline. This will allow you to troubleshoot errors with the running of the pipeline, and also provide you with other information such as launch commands, run times and resource usage. diff --git a/docs/usage.md b/docs/usage.md index 2821abc..3371a63 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -1,20 +1,24 @@ # phac-nml/viralassembly: Usage ## Introduction + This pipeline is intended to be run on either Nanopore Amplicon Sequencing data or Basic Nanopore NGS Sequencing data that can utilize a reference genome for mapping variant calling, and other downstream analyses. It generates variant calls, consensus sequences, and quality control information based on the reference. To do this, there are three different variant callers that can be utilized which includes: `clair3`, `medaka`, and `nanopolish` (which is for R9.4.1 flowcells and below only!). For Amplicon Sequencing data it is at minimum required to: + 1. Specify a path to the reads/input file 2. Specify the scheme name 3. Specify the scheme version 4. Pick a variant caller and caller model For Basic NGS Sequencing data it is at minimum required to: + 1. Specify a path to the reads/input file 2. Specify a path to the reference genome 3. Pick a variant caller and caller model ## Index + - [Profiles](#profiles) - [Data Inputs](#data-inputs) - [Fastq Pass Directory](#fastq-pass-directory---fastq_pass) @@ -37,21 +41,26 @@ For Basic NGS Sequencing data it is at minimum required to: - [Core Nextflow Arguments](#core-nextflow-arguments) ## Profiles -Profiles are used to specify dependency installation, resources, and how to handle pipeline jobs. You can specify more than one profile but *avoid* passing in more than one dependency managment profiles. They can be passed with `-profile ` + +Profiles are used to specify dependency installation, resources, and how to handle pipeline jobs. You can specify more than one profile but _avoid_ passing in more than one dependency managment profiles. They can be passed with `-profile ` Available: + - `conda`: Utilize conda to install dependencies and environment management - `mamba`: Utilize mamba to install dependencies and environment management - `singularity`: Utilize singularity for dependencies and environment management - `docker`: Utilize docker to for dependencies and environment management ## Data Inputs + Two options for fastq data input: `--fastq_pass ` or `--input ` ### Fastq Pass Directory (--fastq_pass) + Specify fastq data to input based on a given directory. The directory can either contain barcoded directories (barcodexx), as would be seen after demultiplexing, or it could contain sample fastq files (one fastq per sample). The barcoded fastq data will be output with the barcode number but can be renamed with a [metadata tsv](#metadata) file input. The flat fastq files will keep their basename (separated out at the first `.`). Example: Barcoded: + ``` ├── barcode01 @@ -65,6 +74,7 @@ Barcoded: ``` Flat: + ``` ├── sample1.fastq @@ -75,7 +85,9 @@ Flat: ``` ### Input CSV (--input) + You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to pass in an input CSV file containing 2 columns, `sample`, and `fastq_1` where: + - `sample` is the sample name to use - `fastq_1` is the path to one fastq file per sample in `.fastq*` format @@ -91,25 +103,31 @@ Ex. A sample can be given multiple fastq files if it was resequenced or needed a top up run. If there are multiple fastq files for a sample they will be concatenated and gzipped. If not, the input fastq file will just be gzipped (if it isn't already). ## Variant Callers + Three different variant callers are available with slightly different options regarding running with them. For the most accurate results when running with `clair3` or `medaka` pick a model that best matches the input data!! ### [Clair3](https://github.com/HKU-BAL/Clair3) + Clair3 is a germline small variant caller for long-reads. Running with `clair3` requires the following parameters: + - `--variant_caller clair3`: Sets clair3 as the variant caller And has the optional parameters of: + - `--clair3_model `: Specify the base clair3 model -- `--clair3_user_variant_model `: Specify the path to an additionally downloaded model directory +- `--clair3_user_variant_model `: Specify the path to an additionally downloaded model directory - `clair3_no_pool_split`: Do not split inputs into pools Clair3 comes with some models available and is defaulted to `r941_prom_sup_g5014`. Additional models can be downloaded from [ONT Rerio](https://github.com/nanoporetech/rerio/tree/master) and then specified in the `--clair3_user_variant_model ` parameter shown above. Remember to pick a model that best represents the data! ### [Medaka](https://github.com/nanoporetech/medaka) + Medaka is a tool to create consensus sequences and variant calls from nanopore sequencing data using neural networks and provied by ONT. Running with `medaka` requires the following parameters: + - `--variant_caller medaka`: Sets medaka as the variant caller And has the optional parameters of: @@ -118,9 +136,11 @@ And has the optional parameters of: Medaka models come built in with the tool itself with the default set to `r941_min_hac_g507` which can be changed with `--medaka_model ` parameter shown above. More information on models [can be found here](https://github.com/nanoporetech/medaka#models). Remember to pick a model that best represents the data! ### [Nanopolish](https://github.com/jts/nanopolish) -Nanopolish is a software package for signal-level analysis of Oxford Nanopore sequencing data. It *does not presently support the R10.4 flowcells* so as a variant caller it should only be used with R9.4 flowcells. + +Nanopolish is a software package for signal-level analysis of Oxford Nanopore sequencing data. It _does not presently support the R10.4 flowcells_ so as a variant caller it should only be used with R9.4 flowcells. Running with `nanopolish` requires the following parameters: + - `--variant_caller nanopolish` - `--fast5_pass ` - `--sequencing_summary ` @@ -130,6 +150,7 @@ Nanopolish requires the fast5 directory along with the sequencing summary file t ## Running the pipeline ### Amplicon + The typical command for running the pipeline with an [amplicon scheme](#schemes-and-reference) using medaka and a different medaka model is as follows: ```bash @@ -146,6 +167,7 @@ nextflow run phac-nml/viralassembly \ This will launch the pipeline with the `docker` configuration profile, the `medaka` variant caller, and the `nCoV-2019` version `V5.3.2` primer scheme from https://github.com/artic-network/primer-schemes/tree/master/nCoV-2019 (default scheme repo to pull). Profile information [can be found above](#profiles) ### Non-Amplicon + The typical command for running the pipeline without an amplicon scheme using medaka and a different medaka model is as follows: ```bash @@ -188,11 +210,11 @@ nextflow run phac-nml/viralassembly -profile docker -params-file params.yaml with `params.yaml` containing: ```yaml -fastq_pass: './fastq_pass' -variant_caller: 'medaka' -medaka_model: 'r1041_e82_400bps_sup_v4.3.0' -reference: 'reference.fa' -outdir: './results/' +fastq_pass: "./fastq_pass" +variant_caller: "medaka" +medaka_model: "r1041_e82_400bps_sup_v4.3.0" +reference: "reference.fa" +outdir: "./results/" ``` ### Updating the pipeline @@ -218,70 +240,73 @@ If you wish to share such profile (such as upload as supplementary material for ::: ## Input Parameters + Use `--help` to see all options formatted on the command line Use `--version` to see version information ### All Parameters -| Parameter | Description | Type | Default | Notes | -| - | - | - | - | - | -| --fastq_pass | Path to directory containing `barcode##` subdirectories OR Path to directory containing `*.fastq*` files | Path | null | [Option for input params](#input-parameters) | -| --input | Path to samplesheet with information about the samples you would like to analyse | Path | null | [Option for input params](#input-parameters) | -| --variant_caller | Pick from the 3 variant callers: 'clair3', 'medaka', 'nanopolish' | Choice | '' | Details above | -| --clair3_model | Clair3 base model to be used in the pipeline | Str | 'r941_prom_sup_g5014' | Default model will not work the best for all inputs. [See clair3 docs](https://github.com/HKU-BAL/Clair3#pre-trained-models) for additional info | -| --clair3_user_variant_model | Path to clair3 additional model directory to use instead of a base model | Path | '' | Default model will not work the best for all inputs. [See clair3 docs](https://github.com/HKU-BAL/Clair3#pre-trained-models) for additional info | -| --clair3_no_pool_split | Do not split reads into separate pools | Bool | False | Clair3 amplicon sequencing only | -| --medaka_model | Medaka model to be used in the pipeline | Str | 'r941_min_hac_g507' | Default model will not work the best for all inputs. [See medaka docs](https://github.com/nanoporetech/medaka#models) for additional info | -| --fast5_pass | Path to directory containing `barcode##` fast5 subdirectories | Path | null | Only for nanopolish | -| --sequencing_summary | Path to run `sequencing_summary*.txt` file | Path | null | Only for nanopolish | -| --min_length | Minimum read length to be kept | Int | 200 | For artic guppyplex | -| --max_length | Maximum read length to be kept | Int | 3000 | For artic guppyplex | -| --min_reads | Minimum size selected reads to be used in pipeline | Int | 20 | | -| --reference | Specify the path to a reference fasta file to run pipeline without a primer scheme | Path | '' | Ignores all scheme inputs. See [schemes and reference](#schemes-and-reference) | -| --scheme | Name of the primer scheme to use | Str | '' | See [schemes and reference](#schemes-and-reference) | -| --scheme_version | Version name of primer scheme to use | Str | '' | See [schemes and reference](#schemes-and-reference) | -| --scheme_repo | Github repository URL to download scheme from | Str | 'https://github.com/artic-network/primer-schemes.git' | See [schemes and reference](#schemes-and-reference) | -| --local_scheme | Path to directory containing local scheme files | Path | null | See [schemes and reference](#schemes-and-reference) | -| --metadata | Path to metadata TSV file with columns 'sample' and 'barcode' | Path | null | See [metadata](#metadata) for more info | -| --use_artic_tool | Run the artic tool itself instead of nextflow implementation | Bool | False | Not available with clair3 | -| --normalise | Artic minion normalise coverage option | Int | 1000 | Entering `0` turns off normalisation. Only for amplicon sequencing | -| --no_frameshift | Use the Artic minion no frameshift vcf filter | Bool | False | Simple `%3 == 0` check for variants | -| --use_bwa | Use BWA instead of minimap2 for read mapping | Bool | False | | -| --skip_longshot | When running with `medaka`, skip running longshot | Bool | False | Medaka only!! | -| --skip_snpeff | Skip running SnpEff | Bool | False | | -| --gff | Path to gff3 formatted file to use in SnpEff database build | Path | False | Not required to run [SnpEff](#snpeff). See below for details | -| --skip_qc | Skip running all QC and reporting steps | Bool | false | | -| --custom_report | Run the custom HTML report | Bool | false | Currently requires the use of conda as there is not a singularity container yet | -| --pcr_primer_bed | Path to PCR primer bed file to check for mutations against | Path | null | For output QC checks | -| --neg_control_threshold | Coverage threshold at which to fail negative control samples | Float | 0.10 | | -| --neg_ctrl_substrings | Negative control sample substrings separated by a `,` | Str | 'ntc,neg,blank' | | -| --outdir | Directory name to output results to | Str | 'results' | | -| --cache | Specify a location to store conda/singularity envs/containers for reuse | Path | null | | + +| Parameter | Description | Type | Default | Notes | +| --------------------------- | -------------------------------------------------------------------------------------------------------- | ------ | ----------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------ | +| --fastq_pass | Path to directory containing `barcode##` subdirectories OR Path to directory containing `*.fastq*` files | Path | null | [Option for input params](#input-parameters) | +| --input | Path to samplesheet with information about the samples you would like to analyse | Path | null | [Option for input params](#input-parameters) | +| --variant_caller | Pick from the 3 variant callers: 'clair3', 'medaka', 'nanopolish' | Choice | '' | Details above | +| --clair3_model | Clair3 base model to be used in the pipeline | Str | 'r941_prom_sup_g5014' | Default model will not work the best for all inputs. [See clair3 docs](https://github.com/HKU-BAL/Clair3#pre-trained-models) for additional info | +| --clair3_user_variant_model | Path to clair3 additional model directory to use instead of a base model | Path | '' | Default model will not work the best for all inputs. [See clair3 docs](https://github.com/HKU-BAL/Clair3#pre-trained-models) for additional info | +| --clair3_no_pool_split | Do not split reads into separate pools | Bool | False | Clair3 amplicon sequencing only | +| --medaka_model | Medaka model to be used in the pipeline | Str | 'r941_min_hac_g507' | Default model will not work the best for all inputs. [See medaka docs](https://github.com/nanoporetech/medaka#models) for additional info | +| --fast5_pass | Path to directory containing `barcode##` fast5 subdirectories | Path | null | Only for nanopolish | +| --sequencing_summary | Path to run `sequencing_summary*.txt` file | Path | null | Only for nanopolish | +| --min_length | Minimum read length to be kept | Int | 200 | For artic guppyplex | +| --max_length | Maximum read length to be kept | Int | 3000 | For artic guppyplex | +| --min_reads | Minimum size selected reads to be used in pipeline | Int | 20 | | +| --reference | Specify the path to a reference fasta file to run pipeline without a primer scheme | Path | '' | Ignores all scheme inputs. See [schemes and reference](#schemes-and-reference) | +| --scheme | Name of the primer scheme to use | Str | '' | See [schemes and reference](#schemes-and-reference) | +| --scheme_version | Version name of primer scheme to use | Str | '' | See [schemes and reference](#schemes-and-reference) | +| --scheme_repo | Github repository URL to download scheme from | Str | 'https://github.com/artic-network/primer-schemes.git' | See [schemes and reference](#schemes-and-reference) | +| --local_scheme | Path to directory containing local scheme files | Path | null | See [schemes and reference](#schemes-and-reference) | +| --metadata | Path to metadata TSV file with columns 'sample' and 'barcode' | Path | null | See [metadata](#metadata) for more info | +| --use_artic_tool | Run the artic tool itself instead of nextflow implementation | Bool | False | Not available with clair3 | +| --normalise | Artic minion normalise coverage option | Int | 1000 | Entering `0` turns off normalisation. Only for amplicon sequencing | +| --no_frameshift | Use the Artic minion no frameshift vcf filter | Bool | False | Simple `%3 == 0` check for variants | +| --use_bwa | Use BWA instead of minimap2 for read mapping | Bool | False | | +| --skip_longshot | When running with `medaka`, skip running longshot | Bool | False | Medaka only!! | +| --skip_snpeff | Skip running SnpEff | Bool | False | | +| --gff | Path to gff3 formatted file to use in SnpEff database build | Path | False | Not required to run [SnpEff](#snpeff). See below for details | +| --skip_qc | Skip running all QC and reporting steps | Bool | false | | +| --custom_report | Run the custom HTML report | Bool | false | Currently requires the use of conda as there is not a singularity container yet | +| --pcr_primer_bed | Path to PCR primer bed file to check for mutations against | Path | null | For output QC checks | +| --neg_control_threshold | Coverage threshold at which to fail negative control samples | Float | 0.10 | | +| --neg_ctrl_substrings | Negative control sample substrings separated by a `,` | Str | 'ntc,neg,blank' | | +| --outdir | Directory name to output results to | Str | 'results' | | ### Schemes and Reference + Amplicon schemes are a highly targeted approach to sequencing focusing on a specific target genome. If using an amplicon scheme with this pipeline, either a local directory or a URL that contains the wanted primer scheme formatted according to the below information must be provided. If not running with an amplicon scheme, pass the `--reference ` argument with a reference fasta file and the pipeline will run without amplicon specific checks/outputs. The primer scheme must contain: + - A reference genome fasta sequence titled `*reference.fasta` - A primer bed file titled `*primer.bed` - - Minimum of 6 columns - - Primer pairs with names containing `_LEFT` and `_RIGHT` - - Primer pools + - Minimum of 6 columns + - Primer pairs with names containing `_LEFT` and `_RIGHT` + - Primer pools Example Primer file: -| MN908947.3 | 30 | 54 | nCoV-2019_1_LEFT | 1 | + | -| - | - | - | - | - | - | -| MN908947.3 | 1183 | 1205 | nCoV-2019_1_RIGHT | 1 | - | -| MN908947.3 | 1100 | 1128 | nCoV-2019_2_LEFT | 2 | + | -| MN908947.3 | 2244 | 2266 | nCoV-2019_2_RIGHT | 2 | - | -| ... | ... | ... | ... | ... | ... | -| REF ID | Start | Stop | Primer Name | Primer Pool | Direction - +| MN908947.3 | 30 | 54 | nCoV-2019_1_LEFT | 1 | + | +| ---------- | ----- | ---- | ----------------- | ----------- | --------- | +| MN908947.3 | 1183 | 1205 | nCoV-2019_1_RIGHT | 1 | - | +| MN908947.3 | 1100 | 1128 | nCoV-2019_2_LEFT | 2 | + | +| MN908947.3 | 2244 | 2266 | nCoV-2019_2_RIGHT | 2 | - | +| ... | ... | ... | ... | ... | ... | +| REF ID | Start | Stop | Primer Name | Primer Pool | Direction | The directory structure must follow the basic structure as follows: + ``` primer-schemes └── @@ -291,6 +316,7 @@ primer-schemes ``` Example for Sars-CoV2: + ``` primer-schemes └── nCoV-2019 @@ -303,18 +329,21 @@ primer-schemes ``` ### Metadata + Input metadata is used to rename barcoded fastq files along with adding additional lines to the final overall QC csv file. Note that the metadata input is expected to be of a `TSV` format Structure for example `metadata.tsv` file: | sample | barcode | \ | -| - | - | - | -| SR-1 | 1 | X | -| SR-2 | 02 | Y | -| NTC-12 | 12 | Z | +| ------ | ------- | --------------------------------- | +| SR-1 | 1 | X | +| SR-2 | 02 | Y | +| NTC-12 | 12 | Z | ### SnpEff + SnpEff is run by default on all non-segmented viruses (due to current implementation) by using the reference sequence ID to either: + 1. Check if there is a SnpEff database available to download 2. Build a SnpEff database by downloading the sequence genbank file from NCBI @@ -339,4 +368,3 @@ You can also supply a run name to resume a specific run: `-resume [run-name]`. U ### `-c` Specify the path to a specific config file (this is a core Nextflow command). See the [nf-core website documentation](https://nf-co.re/usage/configuration) for more information. - diff --git a/lib/Utils.groovy b/lib/Utils.groovy index 8d030f4..13cb02d 100755 --- a/lib/Utils.groovy +++ b/lib/Utils.groovy @@ -22,7 +22,7 @@ class Utils { // Check that all channels are present // This channel list is ordered by required channel priority. - def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] + def required_channels_in_order = ['conda-forge', 'bioconda'] def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean // Check that they are in the right order diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index 6db097f..7db030c 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -17,7 +17,7 @@ class WorkflowMain { "* The nf-core framework\n" + " https://doi.org/10.1038/s41587-020-0439-x\n\n" + "* Software dependencies\n" + - " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" + " https://github.com/${workflow.manifest.name}/blob/main/CITATIONS.md" } // diff --git a/modules/local/artic/guppyplex/environment.yml b/modules/local/artic/guppyplex/environment.yml index 93a6fa6..5c73bf6 100644 --- a/modules/local/artic/guppyplex/environment.yml +++ b/modules/local/artic/guppyplex/environment.yml @@ -2,6 +2,6 @@ name: artic channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::artic=1.2.4 + - bioconda::medaka=1.11.3 diff --git a/modules/local/artic/guppyplex/main.nf b/modules/local/artic/guppyplex/main.nf index 3ffee6e..3c4b3b3 100644 --- a/modules/local/artic/guppyplex/main.nf +++ b/modules/local/artic/guppyplex/main.nf @@ -11,11 +11,10 @@ process ARTIC_GUPPYPLEX { tuple val(meta), path(fastq) output: - tuple val(meta), path("${sampleName}.fastq"), emit: fastq + tuple val(meta), path("${meta.id}.fastq"), emit: fastq path("versions.yml"), emit: versions script: - sampleName = "$meta.id" // Fastq input can either be a directory or a set of fastq files // Outputs are the same then after allowing a streamlined pipeline if ( fastq.isDirectory() ) { @@ -23,7 +22,7 @@ process ARTIC_GUPPYPLEX { artic guppyplex \\ --min-length ${params.min_length} \\ --max-length ${params.max_length} \\ - --output ${sampleName}.fastq \\ + --output ${meta.id}.fastq \\ --directory $fastq # Versions # @@ -39,7 +38,7 @@ process ARTIC_GUPPYPLEX { artic guppyplex \\ --min-length ${params.min_length} \\ --max-length ${params.max_length} \\ - --output ${sampleName}.fastq \\ + --output ${meta.id}.fastq \\ --directory input_fastq # Versions # @@ -49,4 +48,15 @@ process ARTIC_GUPPYPLEX { END_VERSIONS """ } + + stub: + """ + touch ${meta.id}.fastq + + # Versions # + cat <<-END_VERSIONS > versions.yml + "${task.process}": + artic: \$(echo \$(artic --version 2>&1) | sed 's/artic //') + END_VERSIONS + """ } diff --git a/modules/local/artic/minion/environment.yml b/modules/local/artic/minion/environment.yml index 93a6fa6..5c73bf6 100644 --- a/modules/local/artic/minion/environment.yml +++ b/modules/local/artic/minion/environment.yml @@ -2,6 +2,6 @@ name: artic channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::artic=1.2.4 + - bioconda::medaka=1.11.3 diff --git a/modules/local/artic/minion/main.nf b/modules/local/artic/minion/main.nf index 2603e21..37f1961 100644 --- a/modules/local/artic/minion/main.nf +++ b/modules/local/artic/minion/main.nf @@ -2,10 +2,10 @@ process ARTIC_MINION { label 'process_high' label 'error_retry' tag "$meta.id" - publishDir "${params.outdir}/consensus", pattern: "${sampleName}.consensus.fasta", mode: "copy" - publishDir "${params.outdir}/bam", pattern: "${sampleName}.*bam*", mode: "copy" - publishDir "${params.outdir}/vcf", pattern: "${sampleName}.pass.vcf*", mode: "copy" - publishDir "${params.outdir}/vcf", pattern: "${sampleName}.fail.vcf", mode: "copy" + publishDir "${params.outdir}/consensus", pattern: "${meta.id}.consensus.fasta", mode: "copy" + publishDir "${params.outdir}/bam", pattern: "${meta.id}.*bam*", mode: "copy" + publishDir "${params.outdir}/vcf", pattern: "${meta.id}.pass.vcf*", mode: "copy" + publishDir "${params.outdir}/vcf", pattern: "${meta.id}.fail.vcf", mode: "copy" conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -19,15 +19,14 @@ process ARTIC_MINION { path scheme output: - tuple val(meta), path("${sampleName}.primertrimmed.rg.sorted.bam"), path("${sampleName}.primertrimmed.rg.sorted.bam.bai"), emit: bam - tuple val(meta), path("${sampleName}.pass.vcf.gz"), emit: vcf - tuple val(meta), path("${sampleName}.consensus.fasta"), emit: consensus - tuple val(meta), path("${sampleName}.fail.vcf"), emit: fail_vcf - path "${sampleName}*", emit: all + tuple val(meta), path("${meta.id}.primertrimmed.rg.sorted.bam"), path("${meta.id}.primertrimmed.rg.sorted.bam.bai"), emit: bam + tuple val(meta), path("${meta.id}.pass.vcf.gz"), emit: vcf + tuple val(meta), path("${meta.id}.consensus.fasta"), emit: consensus + tuple val(meta), path("${meta.id}.fail.vcf"), emit: fail_vcf + path "${meta.id}*", emit: all path "versions.yml", emit: versions script: - sampleName = "$meta.id" // Setup args for medaka vs nanopolish def argsList = [] if ( params.variant_caller == 'medaka' ) { @@ -60,7 +59,21 @@ process ARTIC_MINION { --read-file $fastq \\ --scheme-version ${params.scheme_version} \\ ${params.scheme} \\ - $sampleName + ${meta.id} + + # Versions # + cat <<-END_VERSIONS > versions.yml + "${task.process}": + artic: \$(echo \$(artic --version 2>&1) | sed 's/artic //') + END_VERSIONS + """ + + stub: + """ + touch ${meta.id}.primertrimmed.rg.sorted.bam + touch ${meta.id}.primertrimmed.rg.sorted.bam.bai + touch ${meta.id}.pass.vcf.gz + touch ${meta.id}.consensus.fasta # Versions # cat <<-END_VERSIONS > versions.yml diff --git a/modules/local/artic_subcommands/environment.yml b/modules/local/artic_subcommands/environment.yml index 93a6fa6..5c73bf6 100644 --- a/modules/local/artic_subcommands/environment.yml +++ b/modules/local/artic_subcommands/environment.yml @@ -2,6 +2,6 @@ name: artic channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::artic=1.2.4 + - bioconda::medaka=1.11.3 diff --git a/modules/local/artic_subcommands/main.nf b/modules/local/artic_subcommands/main.nf index 10e0a77..c80da82 100644 --- a/modules/local/artic_subcommands/main.nf +++ b/modules/local/artic_subcommands/main.nf @@ -15,8 +15,8 @@ def transformVCFList (inputList) { process ARTIC_ALIGN_TRIM { label 'process_single' tag "$meta.id" - publishDir "${params.outdir}/bam", pattern: "${sampleName}.*trimmed.rg.sorted.bam", mode: "copy" - // publishDir "${params.outdir}/articMinionNextflow", pattern: "${sampleName}.alignreport-*", mode: "copy" + publishDir "${params.outdir}/bam", pattern: "${meta.id}.*trimmed.rg.sorted.bam", mode: "copy" + // publishDir "${params.outdir}/articMinionNextflow", pattern: "${meta.id}.alignreport-*", mode: "copy" conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -29,19 +29,18 @@ process ARTIC_ALIGN_TRIM { val mode output: - tuple val(meta), path("${sampleName}.*trimmed.rg.sorted.bam"), path("${sampleName}.*trimmed.rg.sorted.bam.bai"), emit: bam + tuple val(meta), path("${meta.id}.*trimmed.rg.sorted.bam"), path("${meta.id}.*trimmed.rg.sorted.bam.bai"), emit: bam path "versions.yml", emit: versions script: - sampleName = "$meta.id" def argsList = [] if ( params.normalise ) { argsList.add("--normalise ${params.normalise}") } - outName = "${sampleName}.primertrimmed.rg.sorted.bam" + outName = "${meta.id}.primertrimmed.rg.sorted.bam" // Start mode = Trim to start of primers instead of ends if ( mode == "start" ) { - outName = "${sampleName}.trimmed.rg.sorted.bam" + outName = "${meta.id}.trimmed.rg.sorted.bam" argsList.add("--start") } def argsConfig = argsList.join(" ") @@ -49,13 +48,25 @@ process ARTIC_ALIGN_TRIM { align_trim \\ $argsConfig \\ --remove-incorrect-pairs \\ - --report ${sampleName}.alignreport-${mode}.txt \\ + --report ${meta.id}.alignreport-${mode}.txt \\ $primer_bed \\ - < $bam 2> ${sampleName}.alignreport-${mode}.er | samtools sort -T ${sampleName} - -o $outName + < $bam 2> ${meta.id}.alignreport-${mode}.er | samtools sort -T ${meta.id} - -o $outName samtools index $outName - # Versions from nf-core # + # Versions # + cat <<-END_VERSIONS > versions.yml + "${task.process}": + artic: \$(echo \$(artic --version 2>&1) | sed 's/artic //') + END_VERSIONS + """ + + stub: + """ + touch ${meta.id}.*trimmed.rg.sorted.bam + touch ${meta.id}.*trimmed.rg.sorted.bam.bai + + # Versions # cat <<-END_VERSIONS > versions.yml "${task.process}": artic: \$(echo \$(artic --version 2>&1) | sed 's/artic //') @@ -65,7 +76,7 @@ process ARTIC_ALIGN_TRIM { process ARTIC_VCF_MERGE { label 'process_single' tag "$meta.id" - // publishDir "${params.outdir}/articMinionNextflow", pattern: "${sampleName}.merged.vcf", mode: "copy" + // publishDir "${params.outdir}/articMinionNextflow", pattern: "${meta.id}.merged.vcf", mode: "copy" conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -80,20 +91,30 @@ process ARTIC_VCF_MERGE { path primer_bed output: - tuple val(meta), path("${sampleName}.merged.vcf"), emit: vcf + tuple val(meta), path("${meta.id}.merged.vcf"), emit: vcf path "versions.yml", emit: versions script: - sampleName = "$meta.id" def vcfs = transformVCFList(vcf_tuples) """ artic_vcf_merge \\ - ${sampleName} \\ + ${meta.id} \\ $primer_bed \\ - 2> ${sampleName}.primersitereport.txt \\ + 2> ${meta.id}.primersitereport.txt \\ $vcfs - # Versions from nf-core # + # Versions # + cat <<-END_VERSIONS > versions.yml + "${task.process}": + artic: \$(echo \$(artic --version 2>&1) | sed 's/artic //') + END_VERSIONS + """ + + stub: + """ + touch ${meta.id}.merged.vcf + + # Versions # cat <<-END_VERSIONS > versions.yml "${task.process}": artic: \$(echo \$(artic --version 2>&1) | sed 's/artic //') @@ -113,27 +134,38 @@ process ZIP_AND_INDEX_VCF { tuple val(meta), path(vcf) output: - tuple val(meta), path("${sampleName}*.vcf.gz"), path("${sampleName}*.vcf.gz.tbi"), emit: vcf + tuple val(meta), path("${meta.id}*.vcf.gz"), path("${meta.id}*.vcf.gz.tbi"), emit: vcf path "versions.yml", emit: versions script: - sampleName = "$meta.id" """ bgzip -f $vcf tabix -f -p vcf ${vcf}.gz - # Versions from nf-core # + # Versions # cat <<-END_VERSIONS > versions.yml "${task.process}": tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') END_VERSIONS """ + + stub: + """ + touch ${meta.id}.vcf.gz + touch ${meta.id}.vcf.gz.tbi + + # Versions # + cat <<-END_VERSIONS > versions.yml + "${task.process}": + artic: \$(echo \$(artic --version 2>&1) | sed 's/artic //') + END_VERSIONS + """ } process CUSTOM_VCF_FILTER { label 'process_single' tag "$meta.id" - publishDir "${params.outdir}/vcf", pattern: "${sampleName}.pass.vcf.gz*", mode: "copy" - publishDir "${params.outdir}/vcf", pattern: "${sampleName}.fail.vcf", mode: "copy" + publishDir "${params.outdir}/vcf", pattern: "${meta.id}.pass.vcf.gz*", mode: "copy" + publishDir "${params.outdir}/vcf", pattern: "${meta.id}.fail.vcf", mode: "copy" conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -144,12 +176,11 @@ process CUSTOM_VCF_FILTER { tuple val(meta), path(vcf) output: - tuple val(meta), path("${sampleName}.pass.vcf.gz"), path("${sampleName}.pass.vcf.gz.tbi"), emit: pass_vcf - tuple val(meta), path("${sampleName}.fail.vcf"), emit: fail_vcf + tuple val(meta), path("${meta.id}.pass.vcf.gz"), path("${meta.id}.pass.vcf.gz.tbi"), emit: pass_vcf + tuple val(meta), path("${meta.id}.fail.vcf"), emit: fail_vcf path "versions.yml", emit: versions script: - sampleName = "$meta.id" def filterArg = '--nanopolish' if ( params.variant_caller == 'medaka' ) { filterArg = '--medaka' @@ -160,12 +191,25 @@ process CUSTOM_VCF_FILTER { cs_vcf_filter.py \\ $filterArg \\ $vcf \\ - ${sampleName}.pass.vcf \\ - ${sampleName}.fail.vcf - bgzip -f ${sampleName}.pass.vcf - tabix -p vcf ${sampleName}.pass.vcf.gz + ${meta.id}.pass.vcf \\ + ${meta.id}.fail.vcf + bgzip -f ${meta.id}.pass.vcf + tabix -p vcf ${meta.id}.pass.vcf.gz - # Versions from nf-core # + # Versions # + cat <<-END_VERSIONS > versions.yml + "${task.process}": + artic: \$(echo \$(artic --version 2>&1) | sed 's/artic //') + END_VERSIONS + """ + + stub: + """ + touch ${meta.id}.pass.vcf.gz + touch ${meta.id}.pass.vcf.gz.tbi + touch ${meta.id}.fail.vcf + + # Versions # cat <<-END_VERSIONS > versions.yml "${task.process}": artic: \$(echo \$(artic --version 2>&1) | sed 's/artic //') @@ -175,7 +219,7 @@ process CUSTOM_VCF_FILTER { process ARTIC_MAKE_DEPTH_MASK{ label 'process_single' tag "$meta.id" - // publishDir "${params.outdir}/articMinionNextflow", pattern: "${sampleName}.coverage_mask.txt", mode: "copy" + // publishDir "${params.outdir}/articMinionNextflow", pattern: "${meta.id}.coverage_mask.txt", mode: "copy" conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -187,19 +231,29 @@ process ARTIC_MAKE_DEPTH_MASK{ path reference output: - tuple val(meta), path("${sampleName}.coverage_mask.txt"), emit: coverage_mask + tuple val(meta), path("${meta.id}.coverage_mask.txt"), emit: coverage_mask path "versions.yml", emit: versions script: - sampleName = "$meta.id" """ artic_make_depth_mask \\ --store-rg-depths \\ $reference \\ $bam \\ - ${sampleName}.coverage_mask.txt + ${meta.id}.coverage_mask.txt - # Versions from nf-core # + # Versions # + cat <<-END_VERSIONS > versions.yml + "${task.process}": + artic: \$(echo \$(artic --version 2>&1) | sed 's/artic //') + END_VERSIONS + """ + + stub: + """ + touch ${meta.id}.coverage_mask.txt + + # Versions # cat <<-END_VERSIONS > versions.yml "${task.process}": artic: \$(echo \$(artic --version 2>&1) | sed 's/artic //') @@ -212,7 +266,7 @@ process CUSTOM_MAKE_DEPTH_MASK { label 'process_medium' label 'error_retry' tag "${meta.id}" - // publishDir "${params.outdir}/articMinionNextflow", pattern: "${sampleName}.coverage_mask.txt", mode: "copy" + // publishDir "${params.outdir}/articMinionNextflow", pattern: "${meta.id}.coverage_mask.txt", mode: "copy" conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -224,16 +278,20 @@ process CUSTOM_MAKE_DEPTH_MASK { path(reference) output: - tuple val(meta), path("${sampleName}.coverage_mask.txt"), emit: coverage_mask + tuple val(meta), path("${meta.id}.coverage_mask.txt"), emit: coverage_mask script: - sampleName = "$meta.id" """ cs_make_depth_mask.py \\ --depth 20 \\ $reference \\ $bam \\ - ${sampleName}.coverage_mask.txt + ${meta.id}.coverage_mask.txt + """ + + stub: + """ + touch ${meta.id}.coverage_mask.txt """ } process ARTIC_MASK { @@ -250,19 +308,29 @@ process ARTIC_MASK { path reference output: - tuple val(meta), path("${sampleName}.preconsensus.fasta"), emit: preconsensus + tuple val(meta), path("${meta.id}.preconsensus.fasta"), emit: preconsensus path "versions.yml", emit: versions script: - sampleName = "$meta.id" """ artic_mask \\ $reference \\ $coverage_mask \\ $fail_vcf \\ - ${sampleName}.preconsensus.fasta + ${meta.id}.preconsensus.fasta + + # Versions # + cat <<-END_VERSIONS > versions.yml + "${task.process}": + artic: \$(echo \$(artic --version 2>&1) | sed 's/artic //') + END_VERSIONS + """ + + stub: + """ + touch ${meta.id}.preconsensus.fasta - # Versions from nf-core # + # Versions # cat <<-END_VERSIONS > versions.yml "${task.process}": artic: \$(echo \$(artic --version 2>&1) | sed 's/artic //') diff --git a/modules/local/bcftools/consensus/environment.yml b/modules/local/bcftools/consensus/environment.yml index bcf745c..d38a7a7 100644 --- a/modules/local/bcftools/consensus/environment.yml +++ b/modules/local/bcftools/consensus/environment.yml @@ -2,6 +2,5 @@ name: bcftools_consensus channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::bcftools=1.19 diff --git a/modules/local/bcftools/consensus/main.nf b/modules/local/bcftools/consensus/main.nf index 0ff24a3..647ed95 100644 --- a/modules/local/bcftools/consensus/main.nf +++ b/modules/local/bcftools/consensus/main.nf @@ -1,7 +1,7 @@ process BCFTOOLS_CONSENSUS { label 'process_single' tag "$meta.id" - publishDir "${params.outdir}/consensus", pattern: "${sampleName}.consensus.fasta", mode: "copy" + publishDir "${params.outdir}/consensus", pattern: "${meta.id}.consensus.fasta", mode: "copy" conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -12,23 +12,33 @@ process BCFTOOLS_CONSENSUS { tuple val(meta), path(preconsensus), path(coverage_mask), path(pass_vcf), path(pass_vcf_tbi) output: - tuple val(meta), path("${sampleName}.consensus.fasta"), emit: consensus + tuple val(meta), path("${meta.id}.consensus.fasta"), emit: consensus path "versions.yml", emit: versions script: - sampleName = "$meta.id" """ # Command # bcftools consensus \\ -f $preconsensus \\ - ${sampleName}.pass.norm.vcf.gz \\ + ${meta.id}.pass.norm.vcf.gz \\ -m $coverage_mask \\ - -o ${sampleName}.consensus.fasta + -o ${meta.id}.consensus.fasta # Apply samplename as header but keep existing info # - sed -i "s/>/>$sampleName /" ${sampleName}.consensus.fasta + sed -i "s/>/>$meta.id /" ${meta.id}.consensus.fasta - # Versions from nf-core # + # Versions # + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ + + stub: + """ + touch ${meta.id}.consensus.fasta + + # Versions # cat <<-END_VERSIONS > versions.yml "${task.process}": bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') diff --git a/modules/local/bcftools/norm/environment.yml b/modules/local/bcftools/norm/environment.yml index 2fe64fa..6a3bbf7 100644 --- a/modules/local/bcftools/norm/environment.yml +++ b/modules/local/bcftools/norm/environment.yml @@ -2,6 +2,5 @@ name: bcftools_norm channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::bcftools=1.19 diff --git a/modules/local/bcftools/norm/main.nf b/modules/local/bcftools/norm/main.nf index 04e1b0d..a324f81 100644 --- a/modules/local/bcftools/norm/main.nf +++ b/modules/local/bcftools/norm/main.nf @@ -1,7 +1,7 @@ process BCFTOOLS_NORM { label 'process_single' tag "$meta.id" - publishDir "${params.outdir}/vcf", pattern: "${sampleName}.pass.norm.vcf.gz", mode: "copy" + publishDir "${params.outdir}/vcf", pattern: "${meta.id}.pass.norm.vcf.gz", mode: "copy" conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -12,22 +12,33 @@ process BCFTOOLS_NORM { tuple val(meta), path(preconsensus), path(pass_vcf), path(pass_vcf_tbi) output: - tuple val(meta), path("${sampleName}.pass.norm.vcf.gz"), path("${sampleName}.pass.norm.vcf.gz.tbi"), emit: vcf + tuple val(meta), path("${meta.id}.pass.norm.vcf.gz"), path("${meta.id}.pass.norm.vcf.gz.tbi"), emit: vcf path "versions.yml", emit: versions script: - sampleName = "$meta.id" """ # Fixes variants that are in both the pass and fail vcf that were masked # bcftools norm \\ --check-ref s \\ -f $preconsensus \\ $pass_vcf \\ - > ${sampleName}.pass.norm.vcf - bgzip ${sampleName}.pass.norm.vcf - tabix ${sampleName}.pass.norm.vcf.gz + > ${meta.id}.pass.norm.vcf + bgzip ${meta.id}.pass.norm.vcf + tabix ${meta.id}.pass.norm.vcf.gz - # Versions from nf-core # + # Versions # + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ + + stub: + """ + touch ${meta.id}.pass.norm.vcf.gz + touch ${meta.id}.pass.norm.vcf.gz.tbi + + # Versions # cat <<-END_VERSIONS > versions.yml "${task.process}": bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') diff --git a/modules/local/bcftools/stats/environment.yml b/modules/local/bcftools/stats/environment.yml index 4f40061..effaaa1 100644 --- a/modules/local/bcftools/stats/environment.yml +++ b/modules/local/bcftools/stats/environment.yml @@ -2,6 +2,5 @@ name: bcftools_stats channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::bcftools=1.19 diff --git a/modules/local/bcftools/stats/main.nf b/modules/local/bcftools/stats/main.nf index 9f424c1..37d30aa 100644 --- a/modules/local/bcftools/stats/main.nf +++ b/modules/local/bcftools/stats/main.nf @@ -16,15 +16,25 @@ process BCFTOOLS_STATS { path "versions.yml", emit: versions script: - sampleName = "$meta.id" """ # Command # bcftools stats \\ --fasta-ref $reference \\ $vcf \\ - > ${sampleName}.stats.txt + > ${meta.id}.stats.txt - # Versions from nf-core # + # Versions # + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ + + stub: + """ + touch ${sampleName}.stats.txt + + # Versions # cat <<-END_VERSIONS > versions.yml "${task.process}": bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') diff --git a/modules/local/bedtools/coverage/environment.yml b/modules/local/bedtools/coverage/environment.yml index 1df149e..b3368f0 100644 --- a/modules/local/bedtools/coverage/environment.yml +++ b/modules/local/bedtools/coverage/environment.yml @@ -2,6 +2,5 @@ name: bedtools channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::bedtools=2.31.1 diff --git a/modules/local/bedtools/coverage/main.nf b/modules/local/bedtools/coverage/main.nf index 7385985..496701b 100644 --- a/modules/local/bedtools/coverage/main.nf +++ b/modules/local/bedtools/coverage/main.nf @@ -12,21 +12,31 @@ process BEDTOOLS_COVERAGE_GENOME_BED { path genome_bed output: - tuple val(meta), path("${sampleName}.per_base_coverage.bed"), emit: cov_bed + tuple val(meta), path("${meta.id}.per_base_coverage.bed"), emit: cov_bed path "versions.yml", emit: versions script: - sampleName = "$meta.id" """ echo -e "reference_name start end position depth" \\ - > ${sampleName}.per_base_coverage.bed + > ${meta.id}.per_base_coverage.bed bedtools coverage \\ -d \\ -a $genome_bed \\ -b $bam \\ - >> ${sampleName}.per_base_coverage.bed + >> ${meta.id}.per_base_coverage.bed - # Versions from nf-core # + # Versions # + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(echo \$(bedtools --version 2>&1) | sed 's/^.*bedtools v//' ) + END_VERSIONS + """ + + stub: + """ + touch ${meta.id}.per_base_coverage.bed + + # Versions # cat <<-END_VERSIONS > versions.yml "${task.process}": bedtools: \$(echo \$(bedtools --version 2>&1) | sed 's/^.*bedtools v//' ) @@ -49,17 +59,27 @@ process BEDTOOLS_COVERAGE_AMPLICON_BED { path "versions.yml", emit: versions script: - sampleName = "$meta.id" """ echo -e "reference_name start end amplicon_id pool strand read_count covered_bases amplicon_length fraction_covered" \\ - > ${sampleName}.amplicon_coverage.bed + > ${meta.id}.amplicon_coverage.bed bedtools coverage \\ -a $amplicon_bed \\ -b $bam \\ -F 0.85 \\ - >> ${sampleName}.amplicon_coverage.bed + >> ${meta.id}.amplicon_coverage.bed + + # Versions # + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(echo \$(bedtools --version 2>&1) | sed 's/^.*bedtools v//' ) + END_VERSIONS + """ + + stub: + """ + touch ${meta.id}.amplicon_coverage.bed - # Versions from nf-core # + # Versions # cat <<-END_VERSIONS > versions.yml "${task.process}": bedtools: \$(echo \$(bedtools --version 2>&1) | sed 's/^.*bedtools v//' ) diff --git a/modules/local/chopper/environment.yml b/modules/local/chopper/environment.yml index 920bc79..2b63dcc 100644 --- a/modules/local/chopper/environment.yml +++ b/modules/local/chopper/environment.yml @@ -2,6 +2,5 @@ name: chopper channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::chopper=0.7.0 diff --git a/modules/local/chopper/main.nf b/modules/local/chopper/main.nf index 12dbdf1..9304d94 100644 --- a/modules/local/chopper/main.nf +++ b/modules/local/chopper/main.nf @@ -15,7 +15,6 @@ process CHOPPER { path "versions.yml", emit: versions script: - sampleName = "$meta.id" // Checking if gzipped or not for stdin to chopper // Note that pipeline should always be just cat def cat_cmd = "cat" @@ -29,8 +28,29 @@ process CHOPPER { --threads $task.cpus \\ --quality 8 \\ --minlength 100 \\ - | gzip > ${sampleName}.processed.fastq.gz + | gzip > ${meta.id}.processed.fastq.gz + # Versions # + cat <<-END_VERSIONS > versions.yml + "${task.process}": + chopper: \$(chopper --version 2>&1 | cut -d ' ' -f 2) + END_VERSIONS + """ + + stub: + """ + # Adding a read to get the option to pass the filtering read check + # if we want to + read="@read1 + TTT + + + CCC + " + + echo -e \$read > ${meta.id}.processed.fastq + gzip ${meta.id}.processed.fastq + + # Versions # cat <<-END_VERSIONS > versions.yml "${task.process}": chopper: \$(chopper --version 2>&1 | cut -d ' ' -f 2) diff --git a/modules/local/custom/filtering.nf b/modules/local/custom/filtering.nf index a25b5c5..d5f5ef7 100644 --- a/modules/local/custom/filtering.nf +++ b/modules/local/custom/filtering.nf @@ -38,4 +38,9 @@ process TRACK_FILTERED_SAMPLES { echo "sample,qc_pass" > $SAMPLENAME.status.csv echo "$SAMPLENAME,!{fail_message}" >> $SAMPLENAME.status.csv ''' + + stub: + """ + touch ${meta.id}.status.csv + """ } diff --git a/modules/local/custom/utils.nf b/modules/local/custom/utils.nf index 761aeb9..a11c4f2 100644 --- a/modules/local/custom/utils.nf +++ b/modules/local/custom/utils.nf @@ -29,6 +29,11 @@ process CAT_FASTQ { cat $reads | pigz -ck >> $outName fi """ + + stub: + """ + touch ${meta.id}.merged.fastq.gz + """ } process DOWNLOAD_SCHEME { label 'process_single' @@ -42,6 +47,13 @@ process DOWNLOAD_SCHEME { """ git clone ${params.scheme_repo} primer-schemes """ + + stub: + """ + mkdir -p primer-schemes/stub/V1 + touch primer-schemes/stub/V1/stub.reference.fasta + touch primer-schemes/stub/V1/stub.scheme.bed + """ } process SIMPLE_SCHEME_VALIDATE { label 'process_single' @@ -95,7 +107,20 @@ process GET_REF_STATS { cat ${reference}.fai | awk '{print \$1 ":1-" \$2+1}' > refstats.txt cat ${reference}.fai | awk '{ print \$1 " 0 " \$2 }' > genome.bed - # Versions from nf-core # + # Versions # + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + """ + touch ${reference}.fai + touch refstats.txt + touch genome.bed + + # Versions # cat <<-END_VERSIONS > versions.yml "${task.process}": samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') @@ -123,7 +148,19 @@ process CREATE_AMPLICON_BED { primers_to_amplicons.py \\ --bed $bed - # Versions from nf-core # + # Versions # + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ + + stub: + """ + touch amplicon.bed + touch tiling_region.bed + + # Versions # cat <<-END_VERSIONS > versions.yml "${task.process}": python: \$(python --version | sed 's/Python //g') @@ -146,14 +183,24 @@ process RENAME_FASTQ { path "versions.yml", emit: versions script: - sampleName = "$meta.id" """ rename_fastq.py \\ --fastq $fastq \\ --metadata $metadata \\ - --barcode $sampleName + --barcode $meta.id + + # Versions # + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ + + stub: + """ + touch ${meta.id}.fastq - # Versions from nf-core # + # Versions # cat <<-END_VERSIONS > versions.yml "${task.process}": python: \$(python --version | sed 's/Python //g') @@ -174,4 +221,10 @@ process SPLIT_BED_BY_POOL { """ awk -F'\t' -v OFS='\t' 'NR>0{print \$1, \$2, \$3, \$4, \$5, \$6 > \$5".split.bed"}' $bed """ + + stub: + """ + touch 1.split.bed + touch 2.split.bed + """ } diff --git a/modules/local/longshot/environment.yml b/modules/local/longshot/environment.yml index 99f3d56..d7cd738 100644 --- a/modules/local/longshot/environment.yml +++ b/modules/local/longshot/environment.yml @@ -2,6 +2,5 @@ name: longshot channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::longshot=0.4.5 diff --git a/modules/local/longshot/main.nf b/modules/local/longshot/main.nf index fb70f10..17112b6 100644 --- a/modules/local/longshot/main.nf +++ b/modules/local/longshot/main.nf @@ -1,7 +1,7 @@ process LONGSHOT { label 'process_medium' tag "$meta.id" - publishDir "${params.outdir}/vcf", pattern: "${sampleName}.longshot.merged.vcf", mode: "copy" + publishDir "${params.outdir}/vcf", pattern: "${meta.id}.longshot.merged.vcf", mode: "copy" conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -14,11 +14,10 @@ process LONGSHOT { path reference_fai output: - tuple val(meta), path("${sampleName}.longshot.merged.vcf"), emit: vcf + tuple val(meta), path("${meta.id}.longshot.merged.vcf"), emit: vcf path "versions.yml", emit: versions script: - sampleName = "$meta.id" def VERSION = '0.4.5' // Longshot version does not seem to be being printed out """ longshot \\ @@ -28,10 +27,22 @@ process LONGSHOT { --no_haps \\ --bam $bam \\ --ref $reference \\ - --out ${sampleName}.longshot.merged.vcf \\ + --out ${meta.id}.longshot.merged.vcf \\ --potential_variants $vcf - # Versions from nf-core # + # Versions # + cat <<-END_VERSIONS > versions.yml + "${task.process}": + longshot: $VERSION + END_VERSIONS + """ + + stub: + def VERSION = '0.4.5' // Longshot version does not seem to be being printed out + """ + touch ${meta.id}.longshot.merged.vcf + + # Versions # cat <<-END_VERSIONS > versions.yml "${task.process}": longshot: $VERSION diff --git a/modules/local/minimap2/environment.yml b/modules/local/minimap2/environment.yml index 72b67d5..fb10aed 100644 --- a/modules/local/minimap2/environment.yml +++ b/modules/local/minimap2/environment.yml @@ -2,7 +2,6 @@ name: minimap2 channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::minimap2=2.24 - bioconda::samtools=1.18 diff --git a/modules/local/minimap2/main.nf b/modules/local/minimap2/main.nf index cc7ab1c..590983a 100644 --- a/modules/local/minimap2/main.nf +++ b/modules/local/minimap2/main.nf @@ -1,7 +1,7 @@ process MINIMAP2_ALIGN { label 'process_medium' tag "$meta.id" - publishDir "${params.outdir}/bam", pattern: "${sampleName}.sorted.bam*", mode: "copy" + publishDir "${params.outdir}/bam", pattern: "${meta.id}.sorted.bam*", mode: "copy" conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -13,11 +13,10 @@ process MINIMAP2_ALIGN { path reference output: - tuple val(meta), path("${sampleName}.sorted.bam"), path("${sampleName}.sorted.bam.bai"), emit: bam + tuple val(meta), path("${meta.id}.sorted.bam"), path("${meta.id}.sorted.bam.bai"), emit: bam path "versions.yml", emit: versions script: - sampleName = "$meta.id" """ minimap2 \\ -a \\ @@ -26,11 +25,11 @@ process MINIMAP2_ALIGN { $reference \\ $fastq \\ | samtools view -bS -F 4 - \\ - | samtools sort -o ${sampleName}.sorted.bam + | samtools sort -o ${meta.id}.sorted.bam - samtools index ${sampleName}.sorted.bam + samtools index ${meta.id}.sorted.bam - # Versions from nf-core # + # Versions # cat <<-END_VERSIONS > versions.yml "${task.process}": minimap2: \$(minimap2 --version 2>&1) diff --git a/modules/local/multiqc/environment.yml b/modules/local/multiqc/environment.yml index 2212096..5e30e5e 100644 --- a/modules/local/multiqc/environment.yml +++ b/modules/local/multiqc/environment.yml @@ -2,6 +2,5 @@ name: multiqc channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::multiqc=1.20 diff --git a/modules/local/multiqc/main.nf b/modules/local/multiqc/main.nf index 19e42de..28aeee8 100644 --- a/modules/local/multiqc/main.nf +++ b/modules/local/multiqc/main.nf @@ -19,16 +19,20 @@ process MULTIQC_SAMPLE { path "*.html", emit: html script: - sampleName = "$meta.id" """ multiqc \\ -f \\ -k yaml \\ --config $multiqc_config \\ - --filename ${sampleName}.report.html \\ - --title "NML ${sampleName} Sample Report" \\ + --filename ${meta.id}.report.html \\ + --title "NML ${meta.id} Sample Report" \\ . """ + + stub: + """ + touch ${meta.id}.report.html + """ } process MULTIQC_OVERALL { label 'process_single' @@ -63,4 +67,9 @@ process MULTIQC_OVERALL { --config $multiqc_config \\ . """ + + stub: + """ + touch Overall-Run-MultiQC.report.html + """ } diff --git a/modules/local/nanopore_amplicon/env-clair3.yml b/modules/local/nanopore_amplicon/env-clair3.yml index e099b88..fcc6385 100644 --- a/modules/local/nanopore_amplicon/env-clair3.yml +++ b/modules/local/nanopore_amplicon/env-clair3.yml @@ -2,6 +2,5 @@ name: clair3 channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::clair3=1.0.6 diff --git a/modules/local/nanopore_amplicon/env-medaka.yml b/modules/local/nanopore_amplicon/env-medaka.yml index cdba783..2772779 100644 --- a/modules/local/nanopore_amplicon/env-medaka.yml +++ b/modules/local/nanopore_amplicon/env-medaka.yml @@ -2,6 +2,5 @@ name: medaka channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::medaka=1.11.3 diff --git a/modules/local/nanopore_amplicon/env-nanopolish.yml b/modules/local/nanopore_amplicon/env-nanopolish.yml index ef8827a..cba5284 100644 --- a/modules/local/nanopore_amplicon/env-nanopolish.yml +++ b/modules/local/nanopore_amplicon/env-nanopolish.yml @@ -2,6 +2,5 @@ name: nanopolish channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::nanopolish=0.14.0 diff --git a/modules/local/nanopore_amplicon/main.nf b/modules/local/nanopore_amplicon/main.nf index 756cd8d..a1f84b2 100644 --- a/modules/local/nanopore_amplicon/main.nf +++ b/modules/local/nanopore_amplicon/main.nf @@ -3,7 +3,7 @@ The parameters match those for shotgun data The main difference is that the amplicon processes are heavily coupled to use of the readgroup/bed file/regions - while the shotgun callers are not + while the shotgun processes are not */ process MEDAKA_CONSENSUS { label 'process_medium' @@ -19,11 +19,10 @@ process MEDAKA_CONSENSUS { tuple val(meta), path(bam), path(bai), val(pool) output: - tuple val(meta), path("${sampleName}.${pool}.hdf"), val(pool), emit: hdf + tuple val(meta), path("${meta.id}.${pool}.hdf"), val(pool), emit: hdf path "versions.yml", emit: versions script: - sampleName = "$meta.id" """ medaka consensus \\ --model ${params.medaka_model} \\ @@ -32,9 +31,20 @@ process MEDAKA_CONSENSUS { --chunk_ovlp 400 \\ --RG ${pool} \\ $bam \\ - ${sampleName}.${pool}.hdf + ${meta.id}.${pool}.hdf - # Versions from nf-core # + # Versions # + cat <<-END_VERSIONS > versions.yml + "${task.process}": + medaka: \$( medaka --version 2>&1 | sed 's/medaka //g' ) + END_VERSIONS + """ + + stub: + """ + touch ${meta.id}.${pool}.hdf + + # Versions # cat <<-END_VERSIONS > versions.yml "${task.process}": medaka: \$( medaka --version 2>&1 | sed 's/medaka //g' ) @@ -44,7 +54,7 @@ process MEDAKA_CONSENSUS { process MEDAKA_VARIANT { label 'process_medium' tag "${meta.id}-${pool}" - // publishDir "${params.outdir}/articMinionNextflow", pattern: "${sampleName}.${pool}.vcf", mode: "copy" + // publishDir "${params.outdir}/articMinionNextflow", pattern: "${meta.id}.${pool}.vcf", mode: "copy" conda "${moduleDir}/env-medaka.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -56,18 +66,28 @@ process MEDAKA_VARIANT { path reference output: - tuple val(meta), path("${sampleName}.${pool}.vcf"), val(pool), emit: vcf + tuple val(meta), path("${meta.id}.${pool}.vcf"), val(pool), emit: vcf path "versions.yml", emit: versions script: - sampleName = "$meta.id" """ medaka variant \\ $reference \\ $hdf \\ - ${sampleName}.${pool}.vcf + ${meta.id}.${pool}.vcf + + # Versions # + cat <<-END_VERSIONS > versions.yml + "${task.process}": + medaka: \$( medaka --version 2>&1 | sed 's/medaka //g' ) + END_VERSIONS + """ + + stub: + """ + touch ${meta.id}.${pool}.vcf - # Versions from nf-core # + # Versions # cat <<-END_VERSIONS > versions.yml "${task.process}": medaka: \$( medaka --version 2>&1 | sed 's/medaka //g' ) @@ -78,7 +98,7 @@ process NANOPOLISH_VARIANTS { label 'process_high' label 'error_retry' tag "${meta.id}-${pool}" - // publishDir "${params.outdir}/articMinionNextflow", pattern: "${sampleName}.${pool}.vcf", mode: "copy" + // publishDir "${params.outdir}/articMinionNextflow", pattern: "${meta.id}.${pool}.vcf", mode: "copy" conda "${moduleDir}/env-nanopolish.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -93,13 +113,12 @@ process NANOPOLISH_VARIANTS { path reference_stats // Example: MN908947.3:1-29904 output: - tuple val(meta), path("${sampleName}.${pool}.vcf"), val(pool), emit: vcf + tuple val(meta), path("${meta.id}.${pool}.vcf"), val(pool), emit: vcf path "versions.yml", emit: versions // Should look into if index can be a separate step per fastq file to speed up time // As then instead of doing it 2x (for 2 pools) it'd only be 1x script: - sampleName = "$meta.id" """ refstats=\$(cat $reference_stats) nanopolish index \\ @@ -118,9 +137,20 @@ process NANOPOLISH_VARIANTS { --ploidy 1 \\ -m 0.15 \\ --read-group ${pool} \\ - -o ${sampleName}.${pool}.vcf + -o ${meta.id}.${pool}.vcf - # Versions from nf-core # + # Versions # + cat <<-END_VERSIONS > versions.yml + "${task.process}": + nanopolish: \$(echo \$(nanopolish --version | grep nanopolish | sed 's/nanopolish version //')) + END_VERSIONS + """ + + stub: + """ + touch ${meta.id}.${pool}.vcf + + # Versions # cat <<-END_VERSIONS > versions.yml "${task.process}": nanopolish: \$(echo \$(nanopolish --version | grep nanopolish | sed 's/nanopolish version //')) @@ -131,7 +161,7 @@ process CLAIR3_VARIANTS { label 'process_high' label 'error_retry' tag "${meta.id}-${pool}" - // publishDir "${params.outdir}/articMinionNextflow", pattern: "${sampleName}.${pool}.vcf", mode: "copy" + // publishDir "${params.outdir}/articMinionNextflow", pattern: "${meta.id}.${pool}.vcf", mode: "copy" conda "${moduleDir}/env-clair3.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -146,11 +176,10 @@ process CLAIR3_VARIANTS { path model_path output: - tuple val(meta), path("${sampleName}.${pool}.vcf"), val(pool), emit: vcf + tuple val(meta), path("${meta.id}.${pool}.vcf"), val(pool), emit: vcf path "versions.yml", emit: versions script: - sampleName = "$meta.id" // Using some of the nf-flu work to get clair3 working model_suffix = "models/${params.clair3_model}" using_conda = (workflow.containerEngine == null || workflow.containerEngine == '') @@ -173,7 +202,7 @@ process CLAIR3_VARIANTS { --threads=${task.cpus} \\ --platform='ont' \\ --model_path="\$MODEL_PATH" \\ - --output="${sampleName}-out" \\ + --output="${meta.id}-out" \\ --min_coverage=10 \\ --haploid_precise \\ --enable_long_indel \\ @@ -181,10 +210,21 @@ process CLAIR3_VARIANTS { --include_all_ctgs \\ --no_phasing_for_fa - gunzip ${sampleName}-out/merge_output.vcf.gz - ln -s ${sampleName}-out/merge_output.vcf ${sampleName}.${pool}.vcf + gunzip ${meta.id}-out/merge_output.vcf.gz + ln -s ${meta.id}-out/merge_output.vcf ${meta.id}.${pool}.vcf + + # Versions # + cat <<-END_VERSIONS > versions.yml + "${task.process}": + clair3: \$(echo \$(run_clair3.sh -v) | sed 's/Clair3 //') + END_VERSIONS + """ + + stub: + """ + touch ${meta.id}.${pool}.vcf - # Versions from nf-core # + # Versions # cat <<-END_VERSIONS > versions.yml "${task.process}": clair3: \$(echo \$(run_clair3.sh -v) | sed 's/Clair3 //') diff --git a/modules/local/nanopore_shotgun/env-clair3.yml b/modules/local/nanopore_shotgun/env-clair3.yml index e099b88..fcc6385 100644 --- a/modules/local/nanopore_shotgun/env-clair3.yml +++ b/modules/local/nanopore_shotgun/env-clair3.yml @@ -2,6 +2,5 @@ name: clair3 channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::clair3=1.0.6 diff --git a/modules/local/nanopore_shotgun/env-medaka.yml b/modules/local/nanopore_shotgun/env-medaka.yml index cdba783..2772779 100644 --- a/modules/local/nanopore_shotgun/env-medaka.yml +++ b/modules/local/nanopore_shotgun/env-medaka.yml @@ -2,6 +2,5 @@ name: medaka channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::medaka=1.11.3 diff --git a/modules/local/nanopore_shotgun/env-nanopolish.yml b/modules/local/nanopore_shotgun/env-nanopolish.yml index ef8827a..cba5284 100644 --- a/modules/local/nanopore_shotgun/env-nanopolish.yml +++ b/modules/local/nanopore_shotgun/env-nanopolish.yml @@ -2,6 +2,5 @@ name: nanopolish channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::nanopolish=0.14.0 diff --git a/modules/local/nanopore_shotgun/main.nf b/modules/local/nanopore_shotgun/main.nf index 977589c..a20a461 100644 --- a/modules/local/nanopore_shotgun/main.nf +++ b/modules/local/nanopore_shotgun/main.nf @@ -3,7 +3,7 @@ The parameters match those for shotgun data The main difference is that the amplicon processes are heavily coupled to use of the readgroup/bed file/regions - while the shotgun callers are not + while the shotgun processes are not */ process MEDAKA_CONSENSUS { label 'process_medium' @@ -19,11 +19,10 @@ process MEDAKA_CONSENSUS { tuple val(meta), path(bam), path(bai) output: - tuple val(meta), path("${sampleName}.hdf"), emit: hdf + tuple val(meta), path("${meta.id}.hdf"), emit: hdf path "versions.yml", emit: versions script: - sampleName = "$meta.id" """ medaka consensus \\ --model ${params.medaka_model} \\ @@ -31,9 +30,20 @@ process MEDAKA_CONSENSUS { --chunk_len 800 \\ --chunk_ovlp 400 \\ $bam \\ - ${sampleName}.hdf + ${meta.id}.hdf - # Versions from nf-core # + # Versions # + cat <<-END_VERSIONS > versions.yml + "${task.process}": + medaka: \$( medaka --version 2>&1 | sed 's/medaka //g' ) + END_VERSIONS + """ + + stub: + """ + touch ${meta.id}.hdf + + # Versions # cat <<-END_VERSIONS > versions.yml "${task.process}": medaka: \$( medaka --version 2>&1 | sed 's/medaka //g' ) @@ -43,7 +53,7 @@ process MEDAKA_CONSENSUS { process MEDAKA_VARIANT { label 'process_medium' tag "${meta.id}" - // publishDir "${params.outdir}/articMinionNextflow", pattern: "${sampleName}.vcf", mode: "copy" + // publishDir "${params.outdir}/articMinionNextflow", pattern: "${meta.id}.vcf", mode: "copy" conda "${moduleDir}/env-medaka.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -55,18 +65,28 @@ process MEDAKA_VARIANT { path reference output: - tuple val(meta), path("${sampleName}.vcf"), emit: vcf + tuple val(meta), path("${meta.id}.vcf"), emit: vcf path "versions.yml", emit: versions script: - sampleName = "$meta.id" """ medaka variant \\ $reference \\ $hdf \\ - ${sampleName}.vcf + ${meta.id}.vcf + + # Versions # + cat <<-END_VERSIONS > versions.yml + "${task.process}": + medaka: \$( medaka --version 2>&1 | sed 's/medaka //g' ) + END_VERSIONS + """ + + stub: + """ + touch ${meta.id}.vcf - # Versions from nf-core # + # Versions # cat <<-END_VERSIONS > versions.yml "${task.process}": medaka: \$( medaka --version 2>&1 | sed 's/medaka //g' ) @@ -77,7 +97,7 @@ process NANOPOLISH_VARIANTS { label 'process_high' label 'error_retry' tag "${meta.id}" - // publishDir "${params.outdir}/articMinionNextflow", pattern: "${sampleName}.vcf", mode: "copy" + // publishDir "${params.outdir}/articMinionNextflow", pattern: "${meta.id}.vcf", mode: "copy" conda "${moduleDir}/env-nanopolish.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -92,13 +112,12 @@ process NANOPOLISH_VARIANTS { path reference_stats // Example: MN908947.3:1-29904 output: - tuple val(meta), path("${sampleName}.vcf"), emit: vcf + tuple val(meta), path("${meta.id}.vcf"), emit: vcf path "versions.yml", emit: versions // Should look into if index can be a separate step per fastq file to speed up time // As then instead of doing it 2x (for 2 pools) it'd only be 1x script: - sampleName = "$meta.id" """ refstats=\$(cat $reference_stats) nanopolish index \\ @@ -116,9 +135,20 @@ process NANOPOLISH_VARIANTS { -w "\$refstats" \\ --ploidy 1 \\ -m 0.15 \\ - -o ${sampleName}.vcf + -o ${meta.id}.vcf - # Versions from nf-core # + # Versions # + cat <<-END_VERSIONS > versions.yml + "${task.process}": + nanopolish: \$(echo \$(nanopolish --version | grep nanopolish | sed 's/nanopolish version //')) + END_VERSIONS + """ + + stub: + """ + touch ${meta.id}.vcf + + # Versions # cat <<-END_VERSIONS > versions.yml "${task.process}": nanopolish: \$(echo \$(nanopolish --version | grep nanopolish | sed 's/nanopolish version //')) @@ -129,7 +159,7 @@ process CLAIR3_VARIANTS { label 'process_high' label 'error_retry' tag "${meta.id}" - // publishDir "${params.outdir}/articMinionNextflow", pattern: "${sampleName}.vcf", mode: "copy" + // publishDir "${params.outdir}/articMinionNextflow", pattern: "${meta.id}.vcf", mode: "copy" conda "${moduleDir}/env-clair3.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -144,11 +174,10 @@ process CLAIR3_VARIANTS { path model_path output: - tuple val(meta), path("${sampleName}.vcf"), emit: vcf + tuple val(meta), path("${meta.id}.vcf"), emit: vcf path "versions.yml", emit: versions script: - sampleName = "$meta.id" // Using some of the nf-flu work to get clair3 working model_suffix = "models/${params.clair3_model}" using_conda = (workflow.containerEngine == null || workflow.containerEngine == '') @@ -170,7 +199,7 @@ process CLAIR3_VARIANTS { --threads=${task.cpus} \\ --platform='ont' \\ --model_path="\$MODEL_PATH" \\ - --output="${sampleName}-out" \\ + --output="${meta.id}-out" \\ --min_coverage=10 \\ --haploid_precise \\ --enable_long_indel \\ @@ -178,10 +207,21 @@ process CLAIR3_VARIANTS { --include_all_ctgs \\ --no_phasing_for_fa - gunzip ${sampleName}-out/merge_output.vcf.gz - ln -s ${sampleName}-out/merge_output.vcf ${sampleName}.vcf + gunzip ${meta.id}-out/merge_output.vcf.gz + ln -s ${meta.id}-out/merge_output.vcf ${meta.id}.vcf + + # Versions # + cat <<-END_VERSIONS > versions.yml + "${task.process}": + clair3: \$(echo \$(run_clair3.sh -v) | sed 's/Clair3 //') + END_VERSIONS + """ + + stub: + """ + touch ${meta.id}.vcf - # Versions from nf-core # + # Versions # cat <<-END_VERSIONS > versions.yml "${task.process}": clair3: \$(echo \$(run_clair3.sh -v) | sed 's/Clair3 //') diff --git a/modules/local/nanostat/environment.yml b/modules/local/nanostat/environment.yml index 6828529..c27c678 100644 --- a/modules/local/nanostat/environment.yml +++ b/modules/local/nanostat/environment.yml @@ -2,6 +2,5 @@ name: nanostat channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::nanostat=1.6.0 diff --git a/modules/local/nanostat/main.nf b/modules/local/nanostat/main.nf index cb72d9b..a0c4307 100644 --- a/modules/local/nanostat/main.nf +++ b/modules/local/nanostat/main.nf @@ -11,15 +11,24 @@ process NANOSTAT { tuple val(meta), path(fastq) output: - tuple val(meta), path("${sampleName}.nanostat.txt"), emit: stats + tuple val(meta), path("${meta.id}.nanostat.txt"), emit: stats path "versions.yml", emit: versions script: - sampleName = "$meta.id" """ NanoStat \\ --fastq $fastq \\ - > ${sampleName}.nanostat.txt + > ${meta.id}.nanostat.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + NanoStat: \$(NanoStat --version | cut -d ' ' -f 2) + END_VERSIONS + """ + + stub: + """ + touch ${meta.id}.nanostat.txt cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/qc/environment.yml b/modules/local/qc/environment.yml index 25ecfba..5c73bf6 100644 --- a/modules/local/qc/environment.yml +++ b/modules/local/qc/environment.yml @@ -1,7 +1,7 @@ -name: qc +name: artic channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::artic=1.2.4 + - bioconda::medaka=1.11.3 diff --git a/modules/local/qc/main.nf b/modules/local/qc/main.nf index d758aa2..e0373aa 100644 --- a/modules/local/qc/main.nf +++ b/modules/local/qc/main.nf @@ -18,11 +18,10 @@ process MAKE_SAMPLE_QC_CSV { path pcr_primers output: - tuple val(meta), path ("${sampleName}.qc.csv"), emit: csv + tuple val(meta), path ("${meta.id}.qc.csv"), emit: csv path "versions.yml", emit: versions script: - sampleName = "$meta.id" // Need to structure args based on what we have def version = workflow.manifest.version def metadataArg = metadata ? "--metadata $metadata" : "" @@ -44,9 +43,20 @@ process MAKE_SAMPLE_QC_CSV { $metadataArg \\ $seqArg \\ $pcrArg \\ - --sample $sampleName + --sample $meta.id - # Versions from nf-core # + # Versions # + cat <<-END_VERSIONS > versions.yml + "${task.process}": + artic: \$(echo \$(artic --version 2>&1) | sed 's/artic //') + END_VERSIONS + """ + + stub: + """ + touch ${meta.id}.qc.csv + + # Versions # cat <<-END_VERSIONS > versions.yml "${task.process}": artic: \$(echo \$(artic --version 2>&1) | sed 's/artic //') @@ -89,7 +99,18 @@ process FINAL_QC_CSV { --reference $reference \\ --version $version - # Versions from nf-core # + # Versions # + cat <<-END_VERSIONS > versions.yml + "${task.process}": + artic: \$(echo \$(artic --version 2>&1) | sed 's/artic //') + END_VERSIONS + """ + + stub: + """ + touch overall.qc.csv + + # Versions # cat <<-END_VERSIONS > versions.yml "${task.process}": artic: \$(echo \$(artic --version 2>&1) | sed 's/artic //') diff --git a/modules/local/qualimap/bamqc/environment.yml b/modules/local/qualimap/bamqc/environment.yml index 3f30d0c..77ed7d5 100644 --- a/modules/local/qualimap/bamqc/environment.yml +++ b/modules/local/qualimap/bamqc/environment.yml @@ -2,6 +2,5 @@ name: qualimap_bamqc channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::qualimap=2.3 diff --git a/modules/local/qualimap/bamqc/main.nf b/modules/local/qualimap/bamqc/main.nf index 14b2fc5..6361e1c 100644 --- a/modules/local/qualimap/bamqc/main.nf +++ b/modules/local/qualimap/bamqc/main.nf @@ -11,12 +11,10 @@ process QUALIMAP_BAMQC { tuple val(meta), path(bam) output: - tuple val(meta), path("${sampleName}"), emit: results + tuple val(meta), path("${meta.id}"), emit: results path "versions.yml", emit: versions script: - sampleName = "$meta.id" - def memory = (task.memory.mega*0.8).intValue() + 'M' def strandedness = 'non-strand-specific' if (meta.strandedness == 'forward') { @@ -33,9 +31,21 @@ process QUALIMAP_BAMQC { bamqc \\ -bam $bam \\ -p $strandedness \\ - -outdir $sampleName \\ + -outdir $meta.id \\ -nt ${task.cpus} + # Versions # + cat <<-END_VERSIONS > versions.yml + "${task.process}": + qualimap: \$(echo \$(qualimap 2>&1) | sed 's/^.*QualiMap v.//; s/Built.*\$//') + END_VERSIONS + """ + + stub: + """ + mkdir ${meta.id} + + # Versions # cat <<-END_VERSIONS > versions.yml "${task.process}": qualimap: \$(echo \$(qualimap 2>&1) | sed 's/^.*QualiMap v.//; s/Built.*\$//') diff --git a/modules/local/samtools/depth/environment.yml b/modules/local/samtools/depth/environment.yml index 3fd0e7c..f00f8ca 100644 --- a/modules/local/samtools/depth/environment.yml +++ b/modules/local/samtools/depth/environment.yml @@ -2,7 +2,6 @@ name: samtools_depth channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::samtools=1.19.2 - bioconda::htslib=1.19.1 diff --git a/modules/local/samtools/depth/main.nf b/modules/local/samtools/depth/main.nf index 1f2b0cb..4197c99 100644 --- a/modules/local/samtools/depth/main.nf +++ b/modules/local/samtools/depth/main.nf @@ -11,20 +11,30 @@ process SAMTOOLS_DEPTH { tuple val(meta), path(bam), path(bai) output: - tuple val(meta), path("${sampleName}.depth.bed"), emit: bed + tuple val(meta), path("${meta.id}.depth.bed"), emit: bed path "versions.yml", emit: versions script: - sampleName = "$meta.id" """ echo -e "chrom\tpos\tdepth" \\ - > ${sampleName}.depth.bed + > ${meta.id}.depth.bed samtools depth \\ -a \\ $bam \\ - >> ${sampleName}.depth.bed + >> ${meta.id}.depth.bed - # Versions from nf-core # + # Versions # + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + """ + ${meta.id}.depth.bed + + # Versions # cat <<-END_VERSIONS > versions.yml "${task.process}": samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') diff --git a/modules/local/samtools/reheader/environment.yml b/modules/local/samtools/reheader/environment.yml index 8e6408d..53a06c8 100644 --- a/modules/local/samtools/reheader/environment.yml +++ b/modules/local/samtools/reheader/environment.yml @@ -2,7 +2,6 @@ name: samtools_reheader channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::samtools=1.19.2 - bioconda::htslib=1.19.1 diff --git a/modules/local/samtools/reheader/main.nf b/modules/local/samtools/reheader/main.nf index b719983..e32659f 100644 --- a/modules/local/samtools/reheader/main.nf +++ b/modules/local/samtools/reheader/main.nf @@ -12,18 +12,29 @@ process SAMTOOLS_REHEADER { val command output: - tuple val(meta), path("${sampleName}.bam"), emit: bam + tuple val(meta), path("${meta.id}.bam"), emit: bam path "versions.yml", emit: versions script: - sampleName = "$meta.id" """ samtools \\ reheader \\ ${command} \\ $bam \\ - > ${sampleName}.bam + > ${meta.id}.bam + # Versions # + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + """ + touch ${meta.id}.bam + + # Versions # cat <<-END_VERSIONS > versions.yml "${task.process}": samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') diff --git a/modules/local/snpeff/environment.yml b/modules/local/snpeff/environment.yml index 0c0656a..68553c1 100644 --- a/modules/local/snpeff/environment.yml +++ b/modules/local/snpeff/environment.yml @@ -2,6 +2,5 @@ name: snpeff channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::snpeff=5.2 diff --git a/modules/local/snpeff/main.nf b/modules/local/snpeff/main.nf index b4b895e..0c5908b 100644 --- a/modules/local/snpeff/main.nf +++ b/modules/local/snpeff/main.nf @@ -16,7 +16,7 @@ process SNPEFF_DATABASE { output: path("snpeff_db"), emit: db - path('snpeff.config'), optional: true, emit: config + path("snpeff.config"), optional: true, emit: config path "versions.yml", emit: versions script: @@ -52,6 +52,12 @@ process SNPEFF_DATABASE { -dataDir ./snpeff_db \\ -gff3 \\ ${genome} + + # Versions # + cat <<-END_VERSIONS > versions.yml + "${task.process}": + snpeff: \$(echo \$(snpEff -version 2>&1) | cut -f 2 -d ' ') + END_VERSIONS """ } else { """ @@ -86,12 +92,25 @@ process SNPEFF_DATABASE { ${genome} fi + # Versions # cat <<-END_VERSIONS > versions.yml "${task.process}": snpeff: \$(echo \$(snpEff -version 2>&1) | cut -f 2 -d ' ') END_VERSIONS """ } + + stub: + """ + mkdir snpeff_db + touch snpeff.config + + # Versions # + cat <<-END_VERSIONS > versions.yml + "${task.process}": + snpeff: \$(echo \$(snpEff -version 2>&1) | cut -f 2 -d ' ') + END_VERSIONS + """ } process SNPEFF_ANNOTATE { tag "$meta.id" @@ -115,9 +134,6 @@ process SNPEFF_ANNOTATE { tuple val(meta), path("*.csv"), emit: csv path "versions.yml", emit: versions - when: - task.ext.when == null || task.ext.when - script: // Memory def avail_mem = 6144 @@ -126,7 +142,6 @@ process SNPEFF_ANNOTATE { } else { avail_mem = (task.memory.mega*0.8).intValue() } - sampleName = "$meta.id" // Args for db and config def snpeff_db_command = snpeff_db ? "-dataDir \${PWD}/${snpeff_db}" : "" def config_command = config ? "-config \${PWD}/${config}" : "" @@ -139,7 +154,7 @@ process SNPEFF_ANNOTATE { # Run command snpEff \\ -Xmx${avail_mem}M \\ - -csvStats ${sampleName}.csv \\ + -csvStats ${meta.id}.csv \\ $snpeff_db_command \\ $config_command \\ -no-intergenic \\ @@ -147,8 +162,21 @@ process SNPEFF_ANNOTATE { -hgvs1LetterAa \\ ${genome} \\ $vcf \\ - > ${sampleName}.ann.vcf + > ${meta.id}.ann.vcf + + # Versions # + cat <<-END_VERSIONS > versions.yml + "${task.process}": + snpeff: \$(echo \$(snpEff -version 2>&1) | cut -f 2 -d ' ') + END_VERSIONS + """ + + stub: + """ + touch ${meta.id}.ann.vcf + touch ${meta.id}.csv + # Versions # cat <<-END_VERSIONS > versions.yml "${task.process}": snpeff: \$(echo \$(snpEff -version 2>&1) | cut -f 2 -d ' ') diff --git a/modules/local/visualization/env-artic.yml b/modules/local/visualization/env-artic.yml index 93a6fa6..5c73bf6 100644 --- a/modules/local/visualization/env-artic.yml +++ b/modules/local/visualization/env-artic.yml @@ -2,6 +2,6 @@ name: artic channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::artic=1.2.4 + - bioconda::medaka=1.11.3 diff --git a/modules/local/visualization/env-csvtk.yml b/modules/local/visualization/env-csvtk.yml index f7f40ba..d9b8bf2 100644 --- a/modules/local/visualization/env-csvtk.yml +++ b/modules/local/visualization/env-csvtk.yml @@ -2,6 +2,5 @@ name: csvtk channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::csvtk=0.29.0 diff --git a/modules/local/visualization/env-custom-report.yml b/modules/local/visualization/env-custom-report.yml index 4121641..07b5a11 100644 --- a/modules/local/visualization/env-custom-report.yml +++ b/modules/local/visualization/env-custom-report.yml @@ -2,7 +2,6 @@ name: custom-report channels: - conda-forge - bioconda - - defaults dependencies: - conda-forge::r-base=4.3.2 - conda-forge::r-rmarkdown=2.25 diff --git a/modules/local/visualization/env-pandas.yml b/modules/local/visualization/env-pandas.yml index 6be88b2..3239bd8 100644 --- a/modules/local/visualization/env-pandas.yml +++ b/modules/local/visualization/env-pandas.yml @@ -2,7 +2,6 @@ name: pandas channels: - conda-forge - bioconda - - defaults dependencies: - conda-forge::python=3.10.2 - conda-forge::pandas=1.5.2 diff --git a/modules/local/visualization/main.nf b/modules/local/visualization/main.nf index 8a37e24..abd7d4d 100644 --- a/modules/local/visualization/main.nf +++ b/modules/local/visualization/main.nf @@ -15,18 +15,29 @@ process CREATE_READ_VARIATION_CSV { path reference output: - tuple val(meta), path("${sampleName}_variation.csv"), optional: true, emit: csv + tuple val(meta), path("${meta.id}_variation.csv"), optional: true, emit: csv path "versions.yml", emit: versions script: - sampleName = "$meta.id" """ calc_bam_variation.py \\ --bam $bam \\ --reference $reference \\ - --sample $sampleName + --sample $meta.id - # Versions from nf-core # + # Versions # + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + calc_bam_variation.py: 0.1.0 + END_VERSIONS + """ + + stub: + """ + touch ${meta.id}_variation.csv + + # Versions # cat <<-END_VERSIONS > versions.yml "${task.process}": python: \$(python --version | sed 's/Python //g') @@ -47,19 +58,30 @@ process CREATE_VARIANT_TSV { tuple val(meta), path(vcf) output: - tuple val(meta), path("${sampleName}.vcf.tsv"), optional: true, emit: tsv + tuple val(meta), path("${meta.id}.vcf.tsv"), optional: true, emit: tsv path "versions.yml", emit: versions script: - sampleName = "$meta.id" def annotated_arg = params.skip_snpeff ? "" : "--annotated" """ vcf_to_tsv.py \\ - --sample $sampleName \\ + --sample $meta.id \\ $annotated_arg \\ --vcf $vcf - # Versions from nf-core # + # Versions # + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + vcf_to_tsv.py: 0.1.0 + END_VERSIONS + """ + + stub: + """ + touch ${meta.id}.vcf.tsv + + # Versions # cat <<-END_VERSIONS > versions.yml "${task.process}": python: \$(python --version | sed 's/Python //g') @@ -109,6 +131,11 @@ process COMBINE_AMPLICON_COVERAGE { df.sort_index(axis=0, inplace=True) df.to_csv('merged_amplicon_depth.csv', index=True) """ + + stub: + """ + touch merged_amplicon_depth.csv + """ } process CSVTK_SAMPLE_AMPLICON_DEPTH { // Just to get the two columns of the amplicon depth file with no header @@ -124,11 +151,10 @@ process CSVTK_SAMPLE_AMPLICON_DEPTH { tuple val(meta), path(bed) output: - tuple val(meta), path("${sampleName}_ampdepth.tsv"), emit: tsv + tuple val(meta), path("${meta.id}_ampdepth.tsv"), emit: tsv path "versions.yml", emit: versions script: - sampleName = "$meta.id" """ csvtk cut \\ -tT \\ @@ -139,9 +165,20 @@ process CSVTK_SAMPLE_AMPLICON_DEPTH { -p "^0\$" \\ -r 0.1 \\ | tail -n +2 \\ - > ${sampleName}_ampdepth.tsv + > ${meta.id}_ampdepth.tsv - # Versions from nf-core # + # Versions # + cat <<-END_VERSIONS > versions.yml + "${task.process}": + csvtk: \$(csvtk version | sed 's/csvtk v//g') + END_VERSIONS + """ + + stub: + """ + touch ${meta.id}_ampdepth.tsv + + # Versions # cat <<-END_VERSIONS > versions.yml "${task.process}": csvtk: \$(csvtk version | sed 's/csvtk v//g') @@ -166,14 +203,25 @@ process CREATE_AMPLICON_COMPLETENESS { path "versions.yml", emit: versions script: - sampleName = "$meta.id" """ calc_amplicon_completeness.py \\ - --sample $sampleName \\ + --sample $meta.id \\ --amplicon_bed $amplicon_bed \\ --consensus $consensus - # Versions from nf-core # + # Versions # + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + calc_amplicon_completeness.py: 0.1.0 + END_VERSIONS + """ + + stub: + """ + touch ${meta.id}_amplicon_completeness.csv + + # Versions # cat <<-END_VERSIONS > versions.yml "${task.process}": python: \$(python --version | sed 's/Python //g') @@ -221,4 +269,9 @@ process CREATE_ALL_SAMPLE_SUMMARY_REPORT { -e "library(flexdashboard)" \\ -e "rmarkdown::render('reportDashboard.Rmd', params=list($amp_arg))" """ + + stub: + """ + touch reportDashboard.html + """ } diff --git a/nextflow.config b/nextflow.config index 5819c2a..a45302b 100644 --- a/nextflow.config +++ b/nextflow.config @@ -68,7 +68,6 @@ params { // Other options outdir = 'results' - cache = '' help = false version = false show_hidden_params = false @@ -78,12 +77,14 @@ params { max_memory = '256.GB' max_cpus = 16 max_time = '120.h' - } // Load base.config by default for all pipelines includeConfig 'conf/base.config' +// Load modules.config for DSL2 module specific options +includeConfig 'conf/modules.config' + profiles { debug { dumpHashes = true @@ -100,10 +101,6 @@ profiles { charliecloud.enabled = false channels = ['conda-forge', 'bioconda', 'defaults'] apptainer.enabled = false - // Use cache if arg given - if ( params.cache ) { - conda.cacheDir = params.cache - } } mamba { conda.enabled = true @@ -114,10 +111,6 @@ profiles { shifter.enabled = false charliecloud.enabled = false apptainer.enabled = false - // Use cache if arg given - if ( params.cache ) { - conda.cacheDir = params.cache - } } docker { docker.enabled = true @@ -141,10 +134,6 @@ profiles { shifter.enabled = false charliecloud.enabled = false apptainer.enabled = false - // Use cache if arg given - if ( params.cache ) { - singularity.cacheDir = params.cache - } } podman { podman.enabled = true @@ -190,7 +179,7 @@ profiles { } test { includeConfig 'conf/test.config' } test_full { includeConfig 'conf/test_full.config' } - nml { includeConfig 'conf/nml.config' } + nml { includeConfig 'conf/nml.config' } } // Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile @@ -201,9 +190,19 @@ docker.registry = 'quay.io' podman.registry = 'quay.io' singularity.registry = 'quay.io' -// Nextflow plugins - When I have a chance to adjust inputs +// Nextflow plugins plugins { - id 'nf-schema@2.0.0' + id 'nf-schema@2.2.1' + id 'nf-prov@1.2.4' +} +prov { + enabled = true + formats { + legacy { + file = "${params.outdir}/pipeline_info/manifest.json" + overwrite = true + } + } } // Export these variables to prevent local Python/R libraries from conflicting with those in the container @@ -251,10 +250,6 @@ manifest { defaultBranch = 'main' } -// Load modules.config for DSL2 module specific options -// ToDo - Probably have to convert more args to the modules config -includeConfig 'conf/modules.config' - // Function to ensure that resource requirements don't go beyond // a maximum limit def check_max(obj, type) { diff --git a/nextflow_schema.json b/nextflow_schema.json index 5f113b4..20af383 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -1,10 +1,10 @@ { - "$schema": "http://json-schema.org/draft-07/schema", - "$id": "https://raw.githubusercontent.com/phac-nml/viralassembly/master/nextflow_schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/phac-nml/viralassembly/main/nextflow_schema.json", "title": "phac-nml/viralassembly pipeline parameters", "description": "Assemble and QC viral reads", "type": "object", - "definitions": { + "$defs": { "input_data_option_choose_1": { "title": "Input data option (choose 1)", "type": "object", @@ -15,6 +15,7 @@ "type": "string", "format": "file-path", "exists": true, + "schema": "assets/schema_input.json", "mimetype": "text/csv", "pattern": "^\\S+\\.csv$", "description": "Path to comma-separated file containing information about the samples in the experiment.", @@ -24,60 +25,46 @@ "fastq_pass": { "type": "string", "description": "Path to directory containing either barcoded fastq files or individually named fastq files", - "help_text": "Barcodes must be formatted as 'barcode##' to be found. Fastq files must end in '.fastq' or '.fastq.gz'", "fa_icon": "fas fa-folder-open", - "format": "directory-path" + "help_text": "Barcodes must be formatted as 'barcode##' to be found. Fastq files must end in '.fastq' or '.fastq.gz'" } } }, "important_input_values": { "title": "Important input values", "type": "object", - "fa_icon": "fas fa-sign-out-alt", "description": "Important pipeline values for that are based on which variant caller is being used along with if any metadata is to be added", + "default": "", + "fa_icon": "fas fa-sign-in-alt", "properties": { "variant_caller": { "type": "string", - "enum": [ - "medaka", - "nanopolish", - "clair3" - ], - "fa_icon": "fas fa-code", + "fa_icon": "fas fa-terminal", "description": "Choose which variant caller to use. Options are 'medaka', 'clair3', and 'nanopolish'" }, "metadata": { "type": "string", "fa_icon": "fas fa-file-csv", - "format": "file-path", - "mimetype": "text/plain", "description": "Path to metadata TSV file containing at least columns 'sample' and 'barcode'" }, "fast5_pass": { "type": "string", - "format": "directory-path", "fa_icon": "fas fa-folder", - "description": "Path to directory containing fast5 files for nanopolish", - "help_text": "Only for nanopolish" + "description": "Path to directory containing fast5 files for nanopolish only" }, "sequencing_summary": { "type": "string", - "format": "file-path", "fa_icon": "fas fa-file-alt", - "description": "Path to sequencing_summary file needed for nanopolish", - "help_text": "Only for nanopolish" + "description": "Path to sequencing_summary file needed for nanopolish only" } }, - "required": [ - "variant_caller" - ] + "required": ["variant_caller"] }, "variant_model_settings": { "title": "Variant model settings", "type": "object", "description": "Settings pertaining to the specific variant callers models. Be as accurate as possible for best results", "default": "", - "fa_icon": "fas fa-tools", "properties": { "medaka_model": { "type": "string", @@ -91,14 +78,14 @@ }, "clair3_user_variant_model": { "type": "string", - "description": "Path to local clair3 model folder", - "format": "directory-path" + "description": "Path to local clair3 model folder" }, "clair3_no_pool_split": { "type": "boolean", "description": "Pass to not split amplicon pools when running clair3" } - } + }, + "fa_icon": "fas fa-tools" }, "read_filtering_options": { "title": "Read filtering options", @@ -136,25 +123,23 @@ }, "scheme": { "type": "string", - "default": "nCoV-2019", "description": "Name of the pathogen to look for a scheme to use" }, "scheme_version": { "type": "string", - "default": "freed_nml_test_V2", "description": "Name of the amplicon scheme" }, "scheme_repo": { "type": "string", - "default": "https://github.com/DarianHole/primer-schemes.git", + "default": "https://github.com/artic-network/primer-schemes.git", "description": "URL to the online scheme git repo to download" }, "local_scheme": { "type": "string", - "format": "directory-path", "description": "Path to local scheme repo use instead of downloading" } - } + }, + "fa_icon": "fas fa-book-open" }, "artic_minion_options": { "title": "Artic minion options", @@ -183,9 +168,7 @@ }, "use_artic_tool": { "type": "boolean", - "fa_icon": "fas fa-caret-right", - "description": "Pass to run tool artic pipeline instead of the nextflow adaptation this pipeline uses", - "help_text": "Clair3 always runs this" + "description": "Pass to run tool artic pipeline instead of the nextflow adaptation this pipeline uses" } } }, @@ -202,7 +185,6 @@ }, "gff": { "type": "string", - "format": "file-path", "description": "Path to gff3 formatted file to be used to build SnpEff database" } } @@ -212,16 +194,14 @@ "type": "object", "description": "Options pertaining to QC outputs/stats", "default": "", - "fa_icon": "fas fa-star", "properties": { "skip_qc": { "type": "boolean", - "description": "If passed, skip running all QC and reporting steps", - "fa_icon": "fas fa-forward" + "fa_icon": "fas fa-forward", + "description": "If passed, skip running all QC and reporting steps" }, "pcr_primer_bed": { "type": "string", - "format": "file-path", "description": "Path to PCR Primer bed to check for any mutations occuring in defined regions" }, "neg_control_threshold": { @@ -234,40 +214,39 @@ "default": "neg,ntc,blank", "description": "Control prefixes separated by a ',' to determine which samples are negative controls" } - } + }, + "fa_icon": "fas fa-star" }, "report_generation_options": { "title": "Report generation options", "type": "object", "description": "Options pertaining to final report generation", "default": "", - "fa_icon": "fas fa-file-code", "properties": { "custom_report": { "type": "boolean", - "description": "Create custom report instead of the MultiQC report (singularity not yet supported for custom reports)", - "fa_icon": "fas fa-file-signature" + "fa_icon": "fas fa-file-signature", + "description": "Create custom report instead of the MultiQC report (singularity not yet supported for custom reports)" }, "multiqc_config_overall": { "type": "string", "default": "$projectDir/assets/multiqc_config_overall.yaml", - "description": "MultiQC overall run config yaml file", - "format": "file-path" + "description": "MultiQC overall run config yaml file" }, "multiqc_config_sample": { "type": "string", "default": "$projectDir/assets/multiqc_config_sample.yaml", - "description": "MultiQC sample config yaml file", - "format": "file-path" + "description": "MultiQC sample config yaml file" } - } + }, + "fa_icon": "fas fa-file-code" }, "generic_options": { "title": "Generic options", "type": "object", - "description": "Less common options for the pipeline, typically set in a config file.", - "help_text": "These options are generic and don't quite all under any other categories", "fa_icon": "fas fa-file-import", + "description": "Less common options for the pipeline, typically set in a config file.", + "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", "properties": { "outdir": { "type": "string", @@ -276,19 +255,6 @@ "fa_icon": "fas fa-folder-open", "default": "results" }, - "cache": { - "type": "string", - "help_text": "Path to directory to store or find environments/containers", - "fa_icon": "fas fa-folder", - "format": "directory-path", - "description": "Path to directory containing env/container shared store" - }, - "help": { - "type": "boolean", - "description": "Display help text.", - "fa_icon": "fas fa-question-circle", - "hidden": true - }, "version": { "type": "boolean", "description": "Display version and exit.", @@ -297,78 +263,65 @@ }, "monochrome_logs": { "type": "boolean", - "description": "Do not use coloured log outputs.", "fa_icon": "fas fa-palette", - "hidden": true + "description": "Do not use coloured log outputs." }, "show_hidden_params": { "type": "boolean", - "hidden": true, "fa_icon": "fas fa-minus-square" - } - }, - "required": [ - "outdir" - ] - }, - "resource_options": { - "title": "Resource options", - "type": "object", - "description": "Options pertaining to resource usage", - "default": "", - "fa_icon": "fas fa-cog", - "properties": { + }, "max_memory": { "type": "string", "default": "256.GB", + "fa_icon": "fas fa-database", "description": "Maximum memory to use in pipeline for a single process. Given as a string with int.GB" }, "max_cpus": { "type": "integer", "default": 16, + "fa_icon": "fas fa-microchip", "description": "Maximum number of CPUs available to use for a single process" }, "max_time": { "type": "string", - "description": "Maximum time available for a single process to run with", - "default": "120.h" + "default": "120.h", + "fa_icon": "fas fa-clock", + "description": "Maximum time available for a single process to run with" } - } + }, + "required": ["outdir"] } }, "allOf": [ { - "$ref": "#/definitions/input_data_option_choose_1" - }, - { - "$ref": "#/definitions/important_input_values" + "$ref": "#/$defs/input_data_option_choose_1" }, { - "$ref": "#/definitions/variant_model_settings" + "$ref": "#/$defs/important_input_values" }, { - "$ref": "#/definitions/read_filtering_options" + "$ref": "#/$defs/variant_model_settings" }, { - "$ref": "#/definitions/scheme_options" + "$ref": "#/$defs/read_filtering_options" }, { - "$ref": "#/definitions/artic_minion_options" + "$ref": "#/$defs/scheme_options" }, { - "$ref": "#/definitions/snpeff_options" + "$ref": "#/$defs/artic_minion_options" }, { - "$ref": "#/definitions/qc_options" + "$ref": "#/$defs/snpeff_options" }, { - "$ref": "#/definitions/report_generation_options" + "$ref": "#/$defs/qc_options" }, { - "$ref": "#/definitions/generic_options" + "$ref": "#/$defs/report_generation_options" }, { - "$ref": "#/definitions/resource_options" + "$ref": "#/$defs/generic_options" } ] }