From d1e58845f2173783e5b310666bd1364608d2bfda Mon Sep 17 00:00:00 2001 From: Erin Young Date: Tue, 18 Jul 2023 13:20:02 -0600 Subject: [PATCH 1/6] now works with -b and -m --- bin/HeatCluster.py | 38 ++++++++++++++++---------------------- 1 file changed, 16 insertions(+), 22 deletions(-) diff --git a/bin/HeatCluster.py b/bin/HeatCluster.py index 01147ec..419efef 100644 --- a/bin/HeatCluster.py +++ b/bin/HeatCluster.py @@ -11,23 +11,21 @@ import scipy.cluster.hierarchy as sch from sklearn.cluster import KMeans from sklearn.metrics import silhouette_score -from io import StringIO -# Read the SNP matrix file -with open("snp_matrix.txt", "r") as infile: - lines = infile.readlines() -numSamples = len(lines) -1 #counts data lines - -# Remove 'snp-dists 0.8.2', '_contigs' and '_genomic', & replace commas with tabs -cleaned_lines = [line.replace('snp-dists 0.8.2\t', '').replace('snp-dists 0.8.2,', ''). - replace(",", "\t").replace('_contigs', '').replace('_genomic', '').replace("^\t", '') - for line in lines] +tabs = pd.read_csv("snp_matrix.txt", nrows=1, sep='\t').shape[1] +commas = pd.read_csv("snp_matrix.txt", nrows=1, sep=',').shape[1] +if tabs > commas: + df = pd.read_csv("snp_matrix.txt", sep='\t', index_col=0) +else: + df = pd.read_csv("snp_matrix.txt", sep=',', index_col=0) -# Combine the cleaned lines into a single string instead of a file -snp_matrix_string = "\n".join(cleaned_lines) +print("Found ", len(df.columns), " samples in snp_matrix.txt") -# Read the tab-delimited string into a DataFrame -df = pd.read_csv(StringIO(snp_matrix_string), sep='\t') +if len(df.columns) <= 2: + print("This matrix has too few samples or has been melted. Sorry!") + exit(0) +else: + numSamples = len(df.columns) #Define colormap for heatmap cmap = 'Reds_r' @@ -45,14 +43,10 @@ sorted_cluster_matrix=sorted_cluster_matrix.reindex(columns=sorted_cluster_matrix.index) #Change output figure size tuple based on number of samples -if (numSamples <= 20): +if (numSamples <= 20): figureSize = (10, 8) -elif (numSamples <= 40): - figureSize = (20, 16) -elif (numSamples <= 60): - figureSize = (30, 24) -else: - figureSize = (40, 32) +else: + figureSize = (round(numSamples / 2,0) , round(numSamples / 2.5,0)) print("\n\nNumber of samples: ", numSamples,"\nFigure size: ", figureSize) # Compute clusters @@ -142,4 +136,4 @@ plt.show() plt.close() -print("Saved heatmap as Heatmap.{pdf,png}") \ No newline at end of file +print("Saved heatmap as SNP_matrix.{pdf,png}") From a870d8479b1c86fde8e6fd7dbbe7db53004297db Mon Sep 17 00:00:00 2001 From: Erin Young Date: Tue, 18 Jul 2023 13:20:48 -0600 Subject: [PATCH 2/6] made snp matrix optional --- modules/grandeur.nf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/grandeur.nf b/modules/grandeur.nf index 94fb4f8..10019f1 100644 --- a/modules/grandeur.nf +++ b/modules/grandeur.nf @@ -381,9 +381,9 @@ process snp_matrix_heatmap { tuple file(snp_matrix), file(script) output: - path "snp-dists/SNP_matrix*" - path "snp-dists/SNP_matrix_mqc.png" , emit: for_multiqc - path "logs/${task.process}/snp_matrix.${workflow.sessionId}.log" , emit: log_files + path "snp-dists/SNP_matrix*", optional : true + path "snp-dists/SNP_matrix_mqc.png", optional : true, emit: for_multiqc + path "logs/${task.process}/snp_matrix.${workflow.sessionId}.log", emit: log_files shell: ''' From 254a7b7f1c8f2d84a163bd86428bd756353320a0 Mon Sep 17 00:00:00 2001 From: Erin Young Date: Tue, 18 Jul 2023 13:21:04 -0600 Subject: [PATCH 3/6] no longer creates empty file --- modules/quast.nf | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/modules/quast.nf b/modules/quast.nf index 985ec13..8560bce 100644 --- a/modules/quast.nf +++ b/modules/quast.nf @@ -39,8 +39,11 @@ process quast { if [ -f "quast/!{sample}/report.tsv" ] ; then cp quast/!{sample}/report.tsv quast/!{sample}_quast_report.tsv ; fi - head -n 1 quast/!{sample}/transposed_report.tsv | awk '{print "sample\\t" $0 }' > quast/!{sample}/transposed_report.tsv.tmp - tail -n 1 quast/!{sample}/transposed_report.tsv | awk -v sample=!{sample} '{print sample "\\t" $0}' >> quast/!{sample}/transposed_report.tsv.tmp - mv quast/!{sample}/transposed_report.tsv.tmp quast/!{sample}/transposed_report.tsv + if [ -f "quast/!{sample}/transposed_report.tsv" ] + then + head -n 1 quast/!{sample}/transposed_report.tsv | awk '{print "sample\\t" $0 }' > quast/!{sample}/transposed_report.tsv.tmp + tail -n 1 quast/!{sample}/transposed_report.tsv | awk -v sample=!{sample} '{print sample "\\t" $0}' >> quast/!{sample}/transposed_report.tsv.tmp + mv quast/!{sample}/transposed_report.tsv.tmp quast/!{sample}/transposed_report.tsv + fi ''' } From 7250c9fd8bb03b77fba0497aa8b49fa35bce9e6d Mon Sep 17 00:00:00 2001 From: Erin Young Date: Tue, 18 Jul 2023 13:21:39 -0600 Subject: [PATCH 4/6] updates to version 2.0.5 --- modules/shigatyper.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/shigatyper.nf b/modules/shigatyper.nf index 4be5502..92b9183 100644 --- a/modules/shigatyper.nf +++ b/modules/shigatyper.nf @@ -2,7 +2,7 @@ process shigatyper { tag "${sample}" label "medcpus" publishDir params.outdir, mode: 'copy' - container 'staphb/shigatyper:2.0.3' + container 'staphb/shigatyper:2.0.5' stageInMode 'copy' maxForks 10 //#UPHLICA errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} From c9155e28fe67041dad95f7b3228937756115367f Mon Sep 17 00:00:00 2001 From: Erin Young Date: Tue, 18 Jul 2023 13:23:34 -0600 Subject: [PATCH 5/6] updated to version 3.2.20230718 --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index fc1f522..96c7826 100644 --- a/nextflow.config +++ b/nextflow.config @@ -3,7 +3,7 @@ manifest { author = 'Erin Young' homePage = 'https://github.com/UPHL-BioNGS/Grandeur' mainScript = 'grandeur.nf' - version = '3.2.20230711' + version = '3.2.20230718' defaultBranch = 'main' description = 'Grandeur is short-read de novo assembly pipeline with serotyping.' } From 8ec00cb97daf4737ab3ea99c4d94a57e21e61a0a Mon Sep 17 00:00:00 2001 From: Erin Young Date: Tue, 18 Jul 2023 13:32:50 -0600 Subject: [PATCH 6/6] updated to version 2.23.0-2023-07 --- modules/mlst.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/mlst.nf b/modules/mlst.nf index ed882bf..d802ba1 100644 --- a/modules/mlst.nf +++ b/modules/mlst.nf @@ -1,7 +1,7 @@ process mlst { tag "${sample}" publishDir params.outdir, mode: 'copy' - container 'staphb/mlst:2.23.0' + container 'staphb/mlst:2.23.0-2023-07' maxForks 10 //#UPHLICA errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'} //#UPHLICA pod annotation: 'scheduler.illumina.com/presetSize', value: 'standard-medium'