Skip to content

Commit

Permalink
Merge pull request #92 from UPHL-BioNGS/update20230717
Browse files Browse the repository at this point in the history
Update 20230717
  • Loading branch information
erinyoung authored Jul 18, 2023
2 parents 986b26d + 8ec00cb commit 8de2193
Show file tree
Hide file tree
Showing 6 changed files with 28 additions and 31 deletions.
38 changes: 16 additions & 22 deletions bin/HeatCluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,21 @@
import scipy.cluster.hierarchy as sch
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from io import StringIO

# Read the SNP matrix file
with open("snp_matrix.txt", "r") as infile:
lines = infile.readlines()
numSamples = len(lines) -1 #counts data lines

# Remove 'snp-dists 0.8.2', '_contigs' and '_genomic', & replace commas with tabs
cleaned_lines = [line.replace('snp-dists 0.8.2\t', '').replace('snp-dists 0.8.2,', '').
replace(",", "\t").replace('_contigs', '').replace('_genomic', '').replace("^\t", '')
for line in lines]
tabs = pd.read_csv("snp_matrix.txt", nrows=1, sep='\t').shape[1]
commas = pd.read_csv("snp_matrix.txt", nrows=1, sep=',').shape[1]
if tabs > commas:
df = pd.read_csv("snp_matrix.txt", sep='\t', index_col=0)
else:
df = pd.read_csv("snp_matrix.txt", sep=',', index_col=0)

# Combine the cleaned lines into a single string instead of a file
snp_matrix_string = "\n".join(cleaned_lines)
print("Found ", len(df.columns), " samples in snp_matrix.txt")

# Read the tab-delimited string into a DataFrame
df = pd.read_csv(StringIO(snp_matrix_string), sep='\t')
if len(df.columns) <= 2:
print("This matrix has too few samples or has been melted. Sorry!")
exit(0)
else:
numSamples = len(df.columns)

#Define colormap for heatmap
cmap = 'Reds_r'
Expand All @@ -45,14 +43,10 @@
sorted_cluster_matrix=sorted_cluster_matrix.reindex(columns=sorted_cluster_matrix.index)

#Change output figure size tuple based on number of samples
if (numSamples <= 20):
if (numSamples <= 20):
figureSize = (10, 8)
elif (numSamples <= 40):
figureSize = (20, 16)
elif (numSamples <= 60):
figureSize = (30, 24)
else:
figureSize = (40, 32)
else:
figureSize = (round(numSamples / 2,0) , round(numSamples / 2.5,0))
print("\n\nNumber of samples: ", numSamples,"\nFigure size: ", figureSize)

# Compute clusters
Expand Down Expand Up @@ -142,4 +136,4 @@
plt.show()
plt.close()

print("Saved heatmap as Heatmap.{pdf,png}")
print("Saved heatmap as SNP_matrix.{pdf,png}")
6 changes: 3 additions & 3 deletions modules/grandeur.nf
Original file line number Diff line number Diff line change
Expand Up @@ -381,9 +381,9 @@ process snp_matrix_heatmap {
tuple file(snp_matrix), file(script)

output:
path "snp-dists/SNP_matrix*"
path "snp-dists/SNP_matrix_mqc.png" , emit: for_multiqc
path "logs/${task.process}/snp_matrix.${workflow.sessionId}.log" , emit: log_files
path "snp-dists/SNP_matrix*", optional : true
path "snp-dists/SNP_matrix_mqc.png", optional : true, emit: for_multiqc
path "logs/${task.process}/snp_matrix.${workflow.sessionId}.log", emit: log_files

shell:
'''
Expand Down
2 changes: 1 addition & 1 deletion modules/mlst.nf
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
process mlst {
tag "${sample}"
publishDir params.outdir, mode: 'copy'
container 'staphb/mlst:2.23.0'
container 'staphb/mlst:2.23.0-2023-07'
maxForks 10
//#UPHLICA errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'}
//#UPHLICA pod annotation: 'scheduler.illumina.com/presetSize', value: 'standard-medium'
Expand Down
9 changes: 6 additions & 3 deletions modules/quast.nf
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,11 @@ process quast {
if [ -f "quast/!{sample}/report.tsv" ] ; then cp quast/!{sample}/report.tsv quast/!{sample}_quast_report.tsv ; fi
head -n 1 quast/!{sample}/transposed_report.tsv | awk '{print "sample\\t" $0 }' > quast/!{sample}/transposed_report.tsv.tmp
tail -n 1 quast/!{sample}/transposed_report.tsv | awk -v sample=!{sample} '{print sample "\\t" $0}' >> quast/!{sample}/transposed_report.tsv.tmp
mv quast/!{sample}/transposed_report.tsv.tmp quast/!{sample}/transposed_report.tsv
if [ -f "quast/!{sample}/transposed_report.tsv" ]
then
head -n 1 quast/!{sample}/transposed_report.tsv | awk '{print "sample\\t" $0 }' > quast/!{sample}/transposed_report.tsv.tmp
tail -n 1 quast/!{sample}/transposed_report.tsv | awk -v sample=!{sample} '{print sample "\\t" $0}' >> quast/!{sample}/transposed_report.tsv.tmp
mv quast/!{sample}/transposed_report.tsv.tmp quast/!{sample}/transposed_report.tsv
fi
'''
}
2 changes: 1 addition & 1 deletion modules/shigatyper.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ process shigatyper {
tag "${sample}"
label "medcpus"
publishDir params.outdir, mode: 'copy'
container 'staphb/shigatyper:2.0.3'
container 'staphb/shigatyper:2.0.5'
stageInMode 'copy'
maxForks 10
//#UPHLICA errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'}
Expand Down
2 changes: 1 addition & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ manifest {
author = 'Erin Young'
homePage = 'https://github.com/UPHL-BioNGS/Grandeur'
mainScript = 'grandeur.nf'
version = '3.2.20230711'
version = '3.2.20230718'
defaultBranch = 'main'
description = 'Grandeur is short-read de novo assembly pipeline with serotyping.'
}
Expand Down

0 comments on commit 8de2193

Please sign in to comment.