Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Trainmultiqc #6

Closed
wants to merge 9 commits into from
70 changes: 70 additions & 0 deletions modules/local/custommodule.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
process CUSTOMMODULE {
label 'process_single'
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/jq:1.6':
'biocontainers/jq:1.6' }"


input:
path(json)

output:
path "*_mqc.tsv", emit: tsv

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
"""
echo "# id: 'base content summary'" > gc_summary_mqc.tsv
echo "# section_name: 'base and contigs summary statistics'" >> gc_summary_mqc.tsv
echo "# format: 'tsv'" >> gc_summary_mqc.tsv
echo "# plot_type: 'bargraph'" >> gc_summary_mqc.tsv
echo "# description: 'This plot shows a brief summary of each base content/percentage in the query genomes'" >> gc_summary_mqc.tsv
echo "# pconfig:" >> gc_summary_mqc.tsv
echo "# id: 'base content summary'" >> gc_summary_mqc.tsv
echo "# title: 'per_base content and percentage'" >> gc_summary_mqc.tsv
echo "# ylab: ''" >> gc_summary_mqc.tsv
echo "id\tpercent_A\tpercent_C\tpercent_G\tpercent_T\tpercent_N\tcontig_non_ACGTN" >> gc_summary_mqc.tsv
for i in $json
do
printf "\$(basename \$i .json)\t" >> gc_summary_mqc.tsv
jq -r '[.contig_percent_a, .contig_percent_c, .contig_percent_g, .contig_percent_t, .contig_percent_n, .contig_non_acgtn] | @tsv' \$i >> gc_summary_mqc.tsv
done

echo "# id: 'contigs length statistics'" > contig_length_mqc.tsv
echo "# section_name: 'base and contigs summary statistics'" >> contig_length_mqc.tsv
echo "# format: 'tsv'" >> contig_length_mqc.tsv
echo "# plot_type: 'heatmap'" >> contig_length_mqc.tsv
echo "# description: 'This plot shows a short statistics abouth the length of contigs in the query genomes'" >> contig_length_mqc.tsv
echo "# pconfig:" >> contig_length_mqc.tsv
echo "# id: 'contigs length statistics'" >> contig_length_mqc.tsv
echo "# title: 'contigs length statistics'" >> contig_length_mqc.tsv
echo "# ylab: 'length'" >> contig_length_mqc.tsv
echo "id\tTOTALcontiglen\tMINcontiglen\tMAXcontiglen" >> contig_length_mqc.tsv
for i in $json
do
printf "\$(basename \$i .json)\t" >> contig_length_mqc.tsv
jq -r '[.total_contig_length, .min_contig_length, .max_contig_length] | @tsv' \$i >> contig_length_mqc.tsv
done

echo "# id: 'contigs number'" > contig_total_mqc.tsv
echo "# section_name: 'base and contigs summary statistics'" >> contig_total_mqc.tsv
echo "# format: 'tsv'" >> contig_total_mqc.tsv
echo "# plot_type: 'heatmap'" >> contig_total_mqc.tsv
echo "# id: 'contigs length statistics'" >> contig_length_mqc.tsv
echo "# description: 'This plot shows the total number of contigs in each test with threshold length'" >> contig_total_mqc.tsv
echo "# pconfig:" >> contig_total_mqc.tsv
echo "# id: 'number of contigs'" >> contig_total_mqc.tsv
echo "# title: 'number of contigs'" >> contig_total_mqc.tsv
echo "# ylab: 'number'" >> contig_total_mqc.tsv
echo "id\ttotalcontigs\tcontigs>1k\tcontigs>10k" >> contig_total_mqc.tsv
for i in $json
do
printf "\$(basename \$i .json)\t" >> contig_total_mqc.tsv
jq -r '[.total_contig, .contigs_greater_1k, .contigs_greater_10k] | @tsv' \$i >> contig_total_mqc.tsv
done
"""
}
43 changes: 43 additions & 0 deletions modules/local/custommoduletrain.nf
U13bs1125 marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
process CUSTOMMODULETRAIN {
label 'process_single'
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/last:1542--h43eeafb_1':
'biocontainers/last:1542--h43eeafb_1' }"


input:
path(param_file)

output:
path "*_mqc.tsv", emit: tsv

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
"""
echo "# id: 'alignment parameters'" > lastid_mqc.tsv
echo "# section_name: 'Alignment parameters and summary'" >> lastid_mqc.tsv
echo "# format: 'tsv'" >> lastid_mqc.tsv
echo "# plot_type: 'table'" >> lastid_mqc.tsv
echo "# description: 'This plot shows the last alignment parameters'" >> lastid_mqc.tsv
echo "# pconfig:" >> lastid_mqc.tsv
echo "# id: 'alingment parameters'" >> lastid_mqc.tsv
echo "# title: 'alingment parameters'" >> lastid_mqc.tsv
echo "# ylab: ''" >> lastid_mqc.tsv
echo "id\tsubstitution_percent_identity\tlast -t\tlast -a\tlast -A\tlast -b\tlast -B\tlast -S" >> lastid_mqc.tsv
for i in $param_file
do
printf "\$(basename \$i .target.train)\t" >> lastid_mqc.tsv
grep 'substitution percent identity' \$i | tail -n 1 | awk '{print \$5}' | tr '\n' '\t' >> lastid_mqc.tsv
grep 'last -t' \$i | tail -n 1 | awk '{print \$2}' | sed -e 's/-t//' | tr '\n' '\t' >> lastid_mqc.tsv
grep 'last -a' \$i | tail -n 1 | awk '{print \$3}' | tr '\n' '\t' >> lastid_mqc.tsv
grep 'last -A' \$i | tail -n 1 | awk '{print \$3}' | tr '\n' '\t' >> lastid_mqc.tsv
grep 'last -b' \$i | tail -n 1 | awk '{print \$3}' | tr '\n' '\t' >> lastid_mqc.tsv
grep 'last -B' \$i | tail -n 1 | awk '{print \$3}' | tr '\n' '\t' >> lastid_mqc.tsv
grep 'last -S' \$i | tail -n 1 | awk '{print \$3}' >> lastid_mqc.tsv
done
"""
}
23 changes: 23 additions & 0 deletions modules/nf-core/last/train/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions modules/nf-core/multiqc/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions subworkflows/local/pairalign_m2m/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,8 @@ workflow PAIRALIGN_M2M {

emit:

train = LAST_TRAIN.out.param_file
U13bs1125 marked this conversation as resolved.
Show resolved Hide resolved
tsv = LAST_TRAIN.out.tsv
U13bs1125 marked this conversation as resolved.
Show resolved Hide resolved
m2m = LAST_LASTAL_M2M.out.maf
m2o = LAST_SPLIT_M2O.out.maf
o2m = LAST_SPLIT_O2M.out.maf
Expand Down
2 changes: 2 additions & 0 deletions subworkflows/local/pairalign_m2o/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ workflow PAIRALIGN_M2O {

emit:

tsv = LAST_TRAIN.out.tsv
train = LAST_TRAIN.out.param_file
m2o = LAST_LASTAL_M2O.out.maf
o2o = LAST_SPLIT_O2O.out.maf
}
Expand Down
15 changes: 13 additions & 2 deletions workflows/pairgenomealign.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
*/

include { ASSEMBLYSCAN } from '../modules/nf-core/assemblyscan/main'
include { CUSTOMMODULE } from '../modules/local/custommodule.nf'
include { PAIRALIGN_M2M } from '../subworkflows/local/pairalign_m2m/main'
include { SEQTK_CUTN as SEQTK_CUTN_TARGET } from '../modules/nf-core/seqtk/cutn/main'
include { SEQTK_CUTN as SEQTK_CUTN_QUERY } from '../modules/nf-core/seqtk/cutn/main'
Expand Down Expand Up @@ -52,7 +53,15 @@ workflow PAIRGENOMEALIGN {
ASSEMBLYSCAN (
ch_samplesheet
)
ch_multiqc_files = ch_multiqc_files.mix(ASSEMBLYSCAN.out.json.collect{it[1]})

//
// MODULE: CUSTOMMODULE
//
CUSTOMMODULE (
ASSEMBLYSCAN.out.json.collect{it[1]}
)

ch_multiqc_files = ch_multiqc_files.mix(CUSTOMMODULE.out.tsv)
ch_versions = ch_versions.mix(ASSEMBLYSCAN.out.versions.first())

// Prefix id with target genome name before producing alignment files
Expand All @@ -67,6 +76,7 @@ workflow PAIRGENOMEALIGN {
ch_targetgenome,
ch_samplesheet
)
ch_train = PAIRALIGN_M2O.out.tsv
} else {

//
Expand All @@ -76,8 +86,9 @@ workflow PAIRGENOMEALIGN {
ch_targetgenome,
ch_samplesheet
)
ch_train = PAIRALIGN_M2M.out.tsv
}

ch_multiqc_files = ch_multiqc_files.mix(ch_train.collect{it[1]})
// Collate and save software versions
//
softwareVersionsToYAML(ch_versions)
Expand Down
Loading