v1.0/ChIP-seq_pipeline/pipeline-se.cwl

#!/usr/bin/env cwl-runner
 class: Workflow
 cwlVersion: v1.0
 doc: "ChIP-seq pipeline - reads: SE, samples: treatment."
 requirements:
  - class: ScatterFeatureRequirement
  - class: SubworkflowFeatureRequirement
  - class: StepInputExpressionRequirement
 inputs:
    input_treatment_fastq_files:
      doc: Input treatment fastq files
      type: File[]
    genome_sizes_file:
      doc: Genome sizes tab-delimited file (used in samtools)
      type: File
    genome_effective_size:
      default: hs
      doc: Effective genome size used by MACS2. It can be numeric or a shortcuts:'hs' for human (2.7e9), 'mm' for mouse (1.87e9), 'ce' for C. elegans (9e7) and 'dm' for fruitfly (1.2e8), Default:hs
      type: string
    default_adapters_file:
      doc: Adapters file
      type: File
    ENCODE_blacklist_bedfile:
      doc: Bedfile containing ENCODE consensus blacklist regions to be excluded.
      type: File
    genome_ref_first_index_file:
      doc: '"First index file of Bowtie reference genome with extension 1.ebwt. \ (Note: the rest of the index files MUST be in the same folder)" '
      type: File
      secondaryFiles:
        - ^^.2.ebwt
        - ^^.3.ebwt
        - ^^.4.ebwt
        - ^^.rev.1.ebwt
        - ^^.rev.2.ebwt
    as_narrowPeak_file:
      doc: Definition narrowPeak file in AutoSql format (used in bedToBigBed)
      type: File
    as_broadPeak_file:
      doc: Definition broadPeak file in AutoSql format (used in bedToBigBed)
      type: File
    trimmomatic_java_opts:
      doc: JVM arguments should be a quoted, space separated list (e.g. "-Xms128m -Xmx512m")
      type: string?
    trimmomatic_jar_path:
      doc: Trimmomatic Java jar file
      type: string
    picard_java_opts:
      doc: JVM arguments should be a quoted, space separated list (e.g. "-Xms128m -Xmx512m")
      type: string?
    picard_jar_path:
      doc: Picard Java jar file
      type: string
    nthreads_qc:
      doc: Number of threads required for the 01-qc step
      type: int
    nthreads_trimm:
      doc: Number of threads required for the 02-trim step
      type: int
    nthreads_map:
      doc: Number of threads required for the 03-map step
      type: int
    nthreads_peakcall:
      doc: Number of threads required for the 04-peakcall step
      type: int
    nthreads_quant:
      doc: Number of threads required for the 05-quantification step
      type: int
 outputs:
    qc_treatment_count_raw_reads:
      doc: Raw read counts of fastq files after QC for treatment
      type: File[]
      outputSource: qc_treatment/output_count_raw_reads
    qc_treatment_fastqc_data_files:
      doc: FastQC data files
      type: File[]
      outputSource: qc_treatment/output_fastqc_data_files
    qc_treatment_fastqc_report_files:
      doc: FastQC report files
      type: File[]
      outputSource: qc_treatment/output_fastqc_report_files
    qc_treatment_diff_counts:
      doc: Diff file between number of raw reads and number of reads counted by FASTQC, for treatment
      type: File[]
      outputSource: qc_treatment/output_diff_counts
    trimm_treatment_fastq_files:
      doc: FASTQ files after trimming step for control
      type: File[]
      outputSource: trimm_treatment/output_data_fastq_trimmed_files
    trimm_treatment_raw_counts:
      doc: Raw read counts for fastq files after trimming for treatment
      type: File[]
      outputSource: trimm_treatment/output_trimmed_fastq_read_count
    map_treatment_mark_duplicates_files:
      doc: Summary of duplicates removed with Picard tool MarkDuplicates (for multiple reads aligned to the same positions) for treatment
      type: File[]
      outputSource: map_treatment/output_picard_mark_duplicates_files
    map_treatment_dedup_bam_files:
      doc: Filtered BAM files (post-processing end point) for treatment
      type: File[]
      outputSource: map_treatment/output_data_sorted_dedup_bam_files
    map_treatment_dups_marked_bam_files:
      doc: Filtered BAM files with duplicates marked (post-processing end point) for treatment
      type: File[]
      outputSource: map_treatment/output_data_sorted_dups_marked_bam_files
    map_treatment_pbc_files:
      doc: PCR Bottleneck Coefficient files (used to flag samples when pbc<0.5) for control
      type: File[]
      outputSource: map_treatment/output_pbc_files
    map_treatment_preseq_percentage_uniq_reads:
      doc: Preseq percentage of uniq reads
      type: File[]
      outputSource: map_treatment/output_percentage_uniq_reads
    map_treatment_read_count_mapped:
      doc: Read counts of the mapped BAM files
      type: File[]
      outputSource: map_treatment/output_read_count_mapped
    map_treatment_bowtie_log_files:
      doc: Bowtie log file with mapping stats for treatment
      type: File[]
      outputSource: map_treatment/output_bowtie_log
    map_treatment_preseq_c_curve_files:
      doc: Preseq c_curve output files for treatment
      type: File[]
      outputSource: map_treatment/output_preseq_c_curve_files
    peak_call_treatment_spp_x_cross_corr:
      doc: SPP strand cross correlation summary
      type: File[]
      outputSource: peak_call_treatment/output_spp_x_cross_corr
    peak_call_treatment_spp_x_cross_corr_plot:
      doc: SPP strand cross correlation plot
      type: File[]
      outputSource: peak_call_treatment/output_spp_cross_corr_plot
    peak_call_treatment_filtered_read_count_file:
      doc: Filtered read count after peak calling
      type: File[]
      outputSource: peak_call_treatment/output_filtered_read_count_file
    peak_call_treatment_narrowpeak_peak_xls_file:
      doc: Peak calling report file
      type: File[]
      outputSource: peak_call_treatment/output_narrowpeak_xls_file
    peak_call_treatment_read_in_narrowpeak_count_within_replicate:
      doc: Peak counts within replicate
      type: File[]
      outputSource: peak_call_treatment/output_read_in_narrowpeak_count_within_replicate
    peak_call_treatment_narrowpeak_count:
      doc: Peak counts within replicate
      type: File[]
      outputSource: peak_call_treatment/output_narrowpeak_count
    peak_call_treatment_narrowpeak_file:
      doc: Peaks in narrowPeak file format
      type: File[]
      outputSource: peak_call_treatment/output_narrowpeak_file
    peak_call_treatment_narrowpeak_summits_file:
      doc: Peaks summits in bedfile format
      type:
        type: array
        items:
        - 'null'
        - items: File
          type: array
      outputSource: peak_call_treatment/output_narrowpeak_summits_file
    peak_call_treatment_narrowpeak_bigbed_file:
      doc: narrowPeaks in bigBed format
      type: File[]
      outputSource: peak_call_treatment/output_narrowpeak_bigbed_file
    peak_call_treatment_read_in_broadpeak_count_within_replicate:
      doc: Peak counts within replicate
      type: File[]
      outputSource: peak_call_treatment/output_read_in_broadpeak_count_within_replicate
    peak_call_treatment_broadpeak_count:
      doc: Peak counts within replicate
      type: File[]
      outputSource: peak_call_treatment/output_broadpeak_count
    peak_call_treatment_broadpeak_file:
      doc: Peaks in broadPeak file format
      type: File[]
      outputSource: peak_call_treatment/output_broadpeak_file
    peak_call_treatment_broadpeak_bigbed_file:
      doc: broadPeaks in bigBed format
      type: File[]
      outputSource: peak_call_treatment/output_broadpeak_bigbed_file
    quant_bigwig_raw_files:
      doc: Raw reads bigWig (signal) files
      type: File[]
      outputSource: quant/bigwig_raw_files
    quant_bigwig_rpkm_extended_files:
      doc: Fragment extended reads bigWig (signal) files
      type: File[]
      outputSource: quant/bigwig_rpkm_extended_files
 steps:
    qc_treatment:
      run: 01-qc-se.cwl
      in:
        default_adapters_file: default_adapters_file
        input_fastq_files: input_treatment_fastq_files
        nthreads: nthreads_qc
      out:
      - output_count_raw_reads
      - output_diff_counts
      - output_fastqc_report_files
      - output_fastqc_data_files
      - output_custom_adapters
    trimm_treatment:
      run: 02-trim-se.cwl
      in:
        input_adapters_files: qc_treatment/output_custom_adapters
        input_read1_fastq_files: input_treatment_fastq_files
        trimmomatic_java_opts: trimmomatic_java_opts
        trimmomatic_jar_path: trimmomatic_jar_path
        nthreads: nthreads_trimm
      out:
      - output_data_fastq_trimmed_files
      - output_trimmed_fastq_read_count
    map_treatment:
      run: 03-map-se.cwl
      in:
        input_fastq_files: trimm_treatment/output_data_fastq_trimmed_files
        genome_sizes_file: genome_sizes_file
        ENCODE_blacklist_bedfile: ENCODE_blacklist_bedfile
        genome_ref_first_index_file: genome_ref_first_index_file
        picard_jar_path: picard_jar_path
        picard_java_opts: picard_java_opts
        nthreads: nthreads_map
      out:
      - output_data_sorted_dedup_bam_files
      - output_data_sorted_dups_marked_bam_files
      - output_picard_mark_duplicates_files
      - output_pbc_files
      - output_bowtie_log
      - output_preseq_c_curve_files
      - output_percentage_uniq_reads
      - output_read_count_mapped
    peak_call_treatment:
      run: 04-peakcall.cwl
      in:
        input_bam_files: map_treatment/output_data_sorted_dedup_bam_files
        input_genome_sizes: genome_sizes_file
        genome_effective_size: genome_effective_size
        as_narrowPeak_file: as_narrowPeak_file
        as_broadPeak_file: as_broadPeak_file
        nthreads: nthreads_peakcall
      out:
      - output_spp_x_cross_corr
      - output_spp_cross_corr_plot
      - output_filtered_read_count_file
      - output_read_in_narrowpeak_count_within_replicate
      - output_narrowpeak_count
      - output_narrowpeak_file
      - output_narrowpeak_summits_file
      - output_narrowpeak_bigbed_file
      - output_narrowpeak_xls_file
      - output_read_in_broadpeak_count_within_replicate
      - output_broadpeak_count
      - output_broadpeak_file
      - output_broadpeak_summits_file
      - output_broadpeak_bigbed_file
    quant:
      run: 05-quantification.cwl
      in:
        nthreads: nthreads_quant
        input_trt_bam_files: map_treatment/output_data_sorted_dedup_bam_files
        input_genome_sizes: genome_sizes_file
      out:
      - bigwig_raw_files
      - bigwig_rpkm_extended_files