conf-files/new_multiqc_config.yaml

report_comment: >
  This report has been generated by the <a href="https://github.com/nf-core/viralrecon" target="_blank">nf-core/viralrecon</a>
  analysis pipeline. For information about how to interpret these results, please see the
  <a href="https://github.com/nf-core/viralrecon" target="_blank">documentation</a>.

data_format: "yaml"

max_table_rows: 10000

mosdepth_config:
  general_stats_coverage:
    - 1
    - 10
    - 100

run_modules:
  - custom_content
  - fastqc
  - fastp
  - kraken
  - bowtie2
  - samtools
  - mosdepth
  - bcftools
  - snpeff
  - quast
  - pangolin
  - cutadapt

module_order:
  - fastqc:
      name: "PREPROCESS: FastQC (raw reads)"
      info: "This section of the report shows FastQC results for the raw reads before adapter trimming."
      path_filters:
        - "./fastqc/*.zip"
  - fastp:
      name: "PREPROCESS: fastp (adapter trimming)"
      info: "This section of the report shows fastp results for reads after adapter and quality trimming."
  - kraken:
      name: "PREPROCESS: Kraken 2"
      info: "This section of the report shows Kraken 2 classification results for reads after adapter trimming with fastp."
  - bowtie2:
      name: "VARIANTS: Bowtie 2"
      info: "This section of the report shows Bowtie 2 mapping results for reads after adapter trimming and quality trimming."
  - samtools:
      name: "VARIANTS: SAMTools (raw)"
      anchor: "samtools_bowtie2"
      info: "This section of the report shows SAMTools counts/statistics after mapping with Bowtie 2."
      path_filters:
        - "./bowtie2/*"
  - samtools:
      name: "VARIANTS: SAMTools (iVar)"
      anchor: "samtools_ivar"
      info: "This section of the report shows SAMTools counts/statistics after primer sequence removal with iVar."
      path_filters:
        - "./ivar_trim/*"
  - samtools:
      name: "VARIANTS: SAMTools (MarkDuplicates)"
      anchor: "samtools_markduplicates"
      info: "This section of the report shows SAMTools counts/statistics after duplicate removal with picard MarkDuplicates."
      path_filters:
        - "./picard_markduplicates/*"
  - mosdepth:
      name: "VARIANTS: mosdepth"
      info: "This section of the report shows genome-wide coverage metrics generated by mosdepth."
  - pangolin:
      name: "VARIANTS: Pangolin"
      info: "This section of the report shows Pangolin lineage analysis results for the called variants."
      path_filters:
        - "./variants/*.pangolin.csv"
  - bcftools:
      name: "VARIANTS: BCFTools"
      info: "This section of the report shows BCFTools stats results for the called variants."
      path_filters:
        - "./variants/*.txt"
  - snpeff:
      name: "VARIANTS: SnpEff"
      info: "This section of the report shows SnpEff results for the called variants."
      path_filters:
        - "./variants/*.csv"
  - quast:
      name: "VARIANTS: QUAST"
      anchor: "quast_variants"
      info: "This section of the report shows QUAST QC results for the consensus sequence."
      path_filters:
        - "./variants/*.tsv"
  - cutadapt:
      name: "ASSEMBLY: Cutadapt (primer trimming)"
      info: "This section of the report shows Cutadapt results for reads after primer sequence trimming."
  - quast:
      name: "ASSEMBLY: QUAST (SPAdes)"
      anchor: "quast_spades"
      info: "This section of the report shows QUAST results from SPAdes de novo assembly."
      path_filters:
        - "./assembly_spades/*.tsv"
  - quast:
      name: "ASSEMBLY: QUAST (Unicycler)"
      anchor: "quast_unicycler"
      info: "This section of the report shows QUAST results from Unicycler de novo assembly."
      path_filters:
        - "./assembly_unicycler/*.tsv"
  - quast:
      name: "ASSEMBLY: QUAST (minia)"
      anchor: "quast_minia"
      info: "This section of the report shows QUAST results from minia de novo assembly."
      path_filters:
        - "./assembly_minia/*.tsv"

report_section_order:
  summary_assembly_metrics:
    before: summary_variants_metrics
  amplicon_heatmap:
    before: summary_assembly_metrics
  ivar_variants:
    before: mosdepth
  software_versions:
    order: -1001
  nf-core-viralrecon-summary:
    order: -1002

bcftools:
  collapse_complementary_changes: true

# See https://github.com/ewels/MultiQC_TestData/blob/master/data/custom_content/with_config/table_headerconfig/multiqc_config.yaml
custom_data:
  amplicon_heatmap:
    section_name: "Amplicon coverage heatmap"
    description: "Heatmap to show median log10(coverage+1) per amplicon across samples."
    plot_type: "heatmap"
    pconfig:
      id: "amplicon_heatmap"
      xTitle: "Amplicon"
      namespace: "Heatmap to show median log10(coverage+1) per amplicon across samples"
      square: False
      colstops:
        [
          [0, "#440154"],
          [0.05, "#471365"],
          [0.1, "#482475"],
          [0.15, "#463480"],
          [0.2, "#414487"],
          [0.25, "#3b528b"],
          [0.3, "#355f8d"],
          [0.35, "#2f6c8e"],
          [0.4, "#2a788e"],
          [0.45, "#25848e"],
          [0.5, "#21918c"],
          [0.55, "#1e9c89"],
          [0.6, "#22a884"],
          [0.65, "#2fb47c"],
          [0.7, "#44bf70"],
          [0.75, "#5ec962"],
          [0.8, "#7ad151"],
          [0.85, "#9bd93c"],
          [0.9, "#bddf26"],
          [0.95, "#dfe318"],
          [1, "#fde725"],
        ]
  summary_variants_metrics:
    section_name: "Variant calling metrics"
    description: "generated by the nf-core/viralrecon pipeline"
    plot_type: "table"
    headers:
      "# Input reads":
        description: "Total number of reads in raw fastq file"
        format: "{:,.0f}"
      "% Non-host reads (Kraken 2)":
        description: "Total number of non-host reads identified by Kraken2"
        format: "{:,.2f}"
      "# Trimmed reads (fastp)":
        description: "Total number of reads remaining after adapter/quality trimming with fastp"
        format: "{:,.0f}"
      "# Mapped reads":
        description: "Total number of Bowtie2 mapped reads relative to the viral genome"
        format: "{:,.0f}"
      "% Mapped reads":
        description: "Percentage of Bowtie2 mapped reads relative to the viral genome"
        format: "{:,.2f}"
      "# Trimmed reads (iVar)":
        description: "Total number of reads remaining after primer trimming with iVar"
        format: "{:,.0f}"
      "Coverage median":
        description: "Median coverage calculated by mosdepth"
        format: "{:,.2f}"
      "% Coverage > 1x":
        description: "Coverage > 1x calculated by mosdepth"
        format: "{:,.2f}"
      "% Coverage > 10x":
        description: "Coverage > 10x calculated by mosdepth"
        format: "{:,.2f}"
      "# SNPs":
        description: "Total number of SNPs"
        format: "{:,.0f}"
      "# INDELs":
        description: "Total number of INDELs"
        format: "{:,.0f}"
      "# Missense variants":
        description: "Total number of variants identified as missense mutations with SnpEff"
        format: "{:,.0f}"
      "# Ns per 100kb consensus":
        description: "Number of N bases per 100kb in consensus sequence"
        format: "{:,.2f}"
      "Pangolin lineage":
        description: "Pangolin lineage inferred from the consensus sequence"
      "Nextclade clade":
        description: "Nextclade clade inferred from the consensus sequence"
    pconfig:
      id: "summary_variants_metrics_plot"
      table_title: "Variant calling metrics"
      namespace: "Variant calling metrics"
      only_defined_headers: False
      format: "{:.0f}"
  summary_assembly_metrics:
    section_name: "De novo assembly metrics"
    description: "generated by the nf-core/viralrecon pipeline"
    plot_type: "table"
    headers:
      "# Input reads":
        description: "Total number of reads in raw fastq file"
        format: "{:,.0f}"
      "# Trimmed reads (Cutadapt)":
        description: "Total number of reads remaining after adapter/quality trimming with fastp"
        format: "{:,.0f}"
      "% Non-host reads (Kraken 2)":
        description: "Total number of non-host reads identified by Kraken2"
        format: "{:,.2f}"
      "# Contigs (SPAdes)":
        description: "Total number of contigs in SPAdes assembly as calculated by QUAST"
        format: "{:,.0f}"
      "Largest contig (SPAdes)":
        description: "Size of largest contig in SPAdes assembly as calculated by QUAST"
        format: "{:,.0f}"
      "% Genome fraction (SPAdes)":
        description: "% genome fraction for SPAdes assembly as calculated by QUAST"
        format: "{:,.2f}"
      "N50 (SPAdes)":
        description: "N50 metric for SPAdes assembly as calculated by QUAST"
        format: "{:,.2f}"
      "# Contigs (Unicycler)":
        description: "Total number of contigs in Unicycler assembly as calculated by QUAST"
        format: "{:,.0f}"
      "Largest contig (Unicycler)":
        description: "Size of largest contig in Unicycler assembly as calculated by QUAST"
        format: "{:,.0f}"
      "% Genome fraction (Unicycler)":
        description: "% genome fraction for Unicycler assembly as calculated by QUAST"
        format: "{:,.2f}"
      "N50 (Unicycler)":
        description: "N50 metric for Unicycler assembly as calculated by QUAST"
        format: "{:,.2f}"
      "# Contigs (minia)":
        description: "Total number of contigs in minia assembly as calculated by QUAST"
        format: "{:,.0f}"
      "Largest contig (minia)":
        description: "Size of largest contig in minia assembly as calculated by QUAST"
        format: "{:,.0f}"
      "% Genome fraction (minia)":
        description: "% genome fraction for minia assembly as calculated by QUAST"
        format: "{:,.2f}"
      "N50 (minia)":
        description: "N50 metric for minia assembly as calculated by QUAST"
        format: "{:,.2f}"
    pconfig:
      id: "summary_assembly_metrics_plot"
      table_title: "De novo assembly metrics"
      namespace: "De novo assembly metrics"
      only_defined_headers: False
      format: "{:.0f}"
  fail_mapped_reads:
    section_name: "WARNING: Fail Reads Check"
    description: "List of samples that had no reads after adapter trimming, and hence were ignored for the downstream processing steps."
    plot_type: "table"
    pconfig:
      id: "fail_mapped_reads_table"
      table_title: "Samples failed read threshold"
      namespace: "Samples failed read threshold"
      format: "{:,.0f}"
  fail_mapped_samples:
    section_name: "WARNING: Fail Alignment Check"
    description: "List of samples that failed the Bowtie2 minimum mapped reads threshold specified via the '--min_mapped_reads' parameter, and hence were ignored for the downstream processing steps."
    plot_type: "table"
    pconfig:
      id: "fail_mapped_samples_table"
      table_title: "Samples failed mapped read threshold"
      namespace: "Samples failed mapping read threshold"
      format: "{:,.0f}"

extra_fn_clean_exts:
  - ".markduplicates"
  - ".unclassified"
  - "_MN908947.3"

extra_fn_clean_trim:
  - "Consensus_"

# # Customise the module search patterns to speed up execution time
# #  - Skip module sub-tools that we are not interested in
# #  - Replace file-content searching with filename pattern searching
# #  - Don't add anything that is the same as the MultiQC default
# # See https://multiqc.info/docs/#optimise-file-search-patterns for details
sp:
  fastp:
    fn: "*.fastp.json"
  bowtie2:
    fn: "*.bowtie2.log"
  mosdepth/global_dist:
    fn: "*.global.dist.txt"
  cutadapt:
    fn: "*.cutadapt.log"