Skip to content

Commit

Permalink
Merge pull request #13 from phac-nml/fix/missing-rgi-results
Browse files Browse the repository at this point in the history
Fix/missing rgi results
  • Loading branch information
JeffreyThiessen authored Mar 30, 2021
2 parents 51ed11c + 702ed20 commit 1571af2
Show file tree
Hide file tree
Showing 3 changed files with 126 additions and 74 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
* The plugin will now save the additional fields provided by the newer staramr version in the metadata table.
* Updated RGI version to `5.1.1`.
* Updated Shovill to version `1.1.0`.
* Fixed bug where empty RGI results would lead to metadata not being written (#8)

# 0.1.0

Expand Down
4 changes: 2 additions & 2 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@

<groupId>ca.corefacility.bioinformatics.irida</groupId>
<artifactId>amr-detection</artifactId>
<version>0.3.0-SNAPSHOT</version>
<version>0.2.0</version>

<properties>
<plugin.id>amr-detection</plugin.id>
<plugin.class>ca.corefacility.bioinformatics.irida.plugin.amrdetection.AMRDetectionPlugin</plugin.class>
<plugin.version>0.3.0</plugin.version>
<plugin.version>0.2.0</plugin.version>
<plugin.provider>Aaron Petkau</plugin.provider>
<plugin.dependencies></plugin.dependencies>
<plugin.requires.runtime>1.1.0</plugin.requires.runtime>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;

Expand Down Expand Up @@ -47,17 +48,37 @@ public class AMRDetectionUpdater implements AnalysisSampleUpdater {

private static final String RGI_DRUG_CLASS = "rgi/drug-class";
private static final String RGI_GENE = "rgi/gene";

private static final String STARAMR_QUALITY_MODULE = "staramr/quality";
private static final String STARAMR_GENE = "staramr/gene";
private static final String STARAMR_DRUG_CLASS = "staramr/drug-class";
private static final String STARAMR_PLASMID = "staramr/plasmid";
private static final String STARAMR_MLST_SCHEME = "staramr/mlst-scheme";
private static final String STARAMR_MLST_TYPE = "staramr/mlst-sequence-type";
private static final String STARAMR_GENOME_LENGTH = "staramr/genome-length";
private static final String STARAMR_N50 = "staramr/n50";
private static final String STARAMR_NUMBER_CONTIGS = "staramr/number-contigs";
private static final String STARAMR_QUALITY_FEEDBACK = "staramr/quality-feedback";
//
// The "Number of Contigs" column is a special case since the name can change
// (e.g., the full name is like "Number of Contigs Greater Than Or Equal To 300
// bp" where "300 bp" can change).
private static final String STARAMR_RESULTS_CONTIGS_PREFIX = "Number of Contigs Greater Than Or Equal To";

// Maps IRIDA metadata field name to the column name of the staramr results
// (e.g., "staramr/quality" => "Quality Module")
//@formatter:off
private static final Map<String, String> STARAMR_RESULTS_METADATA_MAP = ImmutableMap.<String, String>builder()
.put("staramr/quality" , "Quality Module")
.put("staramr/gene" , "Genotype")
.put("staramr/drug-class" , "Predicted Phenotype")
.put("staramr/plasmid" , "Plasmid")
.put("staramr/mlst-scheme" , "Scheme")
.put("staramr/mlst-sequence-type", "Sequence Type")
.put("staramr/genome-length" , "Genome Length")
.put("staramr/n50" , "N50 value")
.put("staramr/quality-feedback" , "Quality Module Feedback")
.put("staramr/number-contigs" , STARAMR_RESULTS_CONTIGS_PREFIX)
.build();
//@formatter:on

// Maps IRIDA metadata field name to the column name of the RGI results
// (e.g., "rgi/gene" => "Best_Hit_ARO")
//@formatter:off
private static final Map<String, String> RGI_RESULTS_METADATA_MAP = ImmutableMap.<String, String>builder()
.put(RGI_GENE , "Best_Hit_ARO")
.put(RGI_DRUG_CLASS, "Drug Class")
.build();
//@formatter:on

private MetadataTemplateService metadataTemplateService;
private SampleService sampleService;
Expand All @@ -77,6 +98,47 @@ public AMRDetectionUpdater(MetadataTemplateService metadataTemplateService, Samp
this.iridaWorkflowsService = iridaWorkflowsService;
}

/**
* Parses a line of the results file and gets a Map linking the column to the
* value in the line. (e.g., "N50 value" => "100").
*
* @param columnNames A List of names of the columns in the results file.
* @param line The line to parse.
* @param singleColumnPrefix A prefix for a special case in the staramr results
* where the column prefix is constant but the suffix
* changes. Set to null to ignore.
* @param resultsFile The specific file being parsed (for error
* messages).
* @param analysis The analysis submission being parsed (for error
* messages).
* @return A Map linking the column to the value for the line.
* @throws PostProcessingException If there was an error parsing the results.
*/
private Map<String, String> getDataMapForLine(List<String> columnNames, String line, String singleColumnPrefix,
Path resultsFile, AnalysisSubmission analysis) throws PostProcessingException {
Map<String, String> dataMap = new HashMap<>();

List<String> values = SPLITTER.splitToList(line);

if (columnNames.size() != values.size()) {
throw new PostProcessingException("Mismatch in number of column names [" + columnNames.size()
+ "] and number of files [" + values.size() + "] in results file [" + resultsFile + "]");
}

for (int i = 0; i < columnNames.size(); i++) {
String column = columnNames.get(i);
String value = values.get(i);

if (singleColumnPrefix != null && column.startsWith(singleColumnPrefix)) {
dataMap.put(singleColumnPrefix, value);
} else {
dataMap.put(column, value);
}
}

return dataMap;
}

/**
* Gets the staramr results from the given output file.
*
Expand All @@ -89,47 +151,36 @@ public AMRDetectionUpdater(MetadataTemplateService metadataTemplateService, Samp
*/
private Map<String, PipelineProvidedMetadataEntry> getStarAMRResults(Path staramrFilePath,
AnalysisSubmission analysis) throws IOException, PostProcessingException {
final int QUALITY_MODULE = 1;
final int GENOTYPE = 2;
final int DRUG = 3;
final int PLASMID = 4;
final int MLST_SCHEME = 5;
final int MLST_SEQUENCE_TYPE = 6;
final int GENOME_LENGTH = 7;
final int N50 = 8;
final int NUMBER_CONTIGS = 9;
final int QUALITY_FEEDBACK = 10;

final int MAX_TOKENS = 11;
final int MIN_TOKENS = 2;

Map<String, PipelineProvidedMetadataEntry> results = new HashMap<>();
Map<String, String> dataMap;

@SuppressWarnings("resource")
BufferedReader reader = new BufferedReader(new FileReader(staramrFilePath.toFile()));
String line = reader.readLine();
List<String> tokens = SPLITTER.splitToList(line);
if (tokens.size() != MAX_TOKENS) {
List<String> columnNames = SPLITTER.splitToList(line);
if (columnNames.size() < MIN_TOKENS) {
throw new PostProcessingException("Invalid number of columns in staramr results file [" + staramrFilePath
+ "], expected [" + MAX_TOKENS + "] got [" + tokens.size() + "]");
+ "], expected at least [" + MIN_TOKENS + "] got [" + columnNames.size() + "]");
}

line = reader.readLine();
tokens = SPLITTER.splitToList(line);
results.put(STARAMR_QUALITY_MODULE,
new PipelineProvidedMetadataEntry(tokens.get(QUALITY_MODULE), "text", analysis));
results.put(STARAMR_GENE, new PipelineProvidedMetadataEntry(tokens.get(GENOTYPE), "text", analysis));
results.put(STARAMR_DRUG_CLASS, new PipelineProvidedMetadataEntry(tokens.get(DRUG), "text", analysis));
results.put(STARAMR_PLASMID, new PipelineProvidedMetadataEntry(tokens.get(PLASMID), "text", analysis));
results.put(STARAMR_MLST_SCHEME, new PipelineProvidedMetadataEntry(tokens.get(MLST_SCHEME), "text", analysis));
results.put(STARAMR_MLST_TYPE,
new PipelineProvidedMetadataEntry(tokens.get(MLST_SEQUENCE_TYPE), "text", analysis));
results.put(STARAMR_GENOME_LENGTH,
new PipelineProvidedMetadataEntry(tokens.get(GENOME_LENGTH), "text", analysis));
results.put(STARAMR_N50, new PipelineProvidedMetadataEntry(tokens.get(N50), "text", analysis));
results.put(STARAMR_NUMBER_CONTIGS,
new PipelineProvidedMetadataEntry(tokens.get(NUMBER_CONTIGS), "text", analysis));
results.put(STARAMR_QUALITY_FEEDBACK,
new PipelineProvidedMetadataEntry(tokens.get(QUALITY_FEEDBACK), "text", analysis));

if (line == null || line.length() == 0) {
dataMap = new HashMap<>();
logger.info(
"Got empty results for staramr file [" + staramrFilePath + "] for analysis submission " + analysis);
} else {
dataMap = getDataMapForLine(columnNames, line, STARAMR_RESULTS_CONTIGS_PREFIX, staramrFilePath, analysis);
}

for (String resultsFieldName : STARAMR_RESULTS_METADATA_MAP.keySet()) {
String staramrColumnName = STARAMR_RESULTS_METADATA_MAP.get(resultsFieldName);
String value = dataMap.containsKey(staramrColumnName) ? dataMap.get(staramrColumnName) : "-";

results.put(resultsFieldName, new PipelineProvidedMetadataEntry(value, "text", analysis));
}

line = reader.readLine();

Expand All @@ -153,11 +204,6 @@ private Map<String, PipelineProvidedMetadataEntry> getStarAMRResults(Path staram
*/
private Map<String, PipelineProvidedMetadataEntry> getRgiResults(Path rgiFilePath, AnalysisSubmission analysis)
throws IOException, PostProcessingException {
final int MAX_TOKENS = 25;

final int BEST_HIT_ARO_INDEX = 8;
final int DRUG_CLASS_INDEX = 14;

final String DRUG_CLASS_SPLIT = ";";

final Joiner joiner = Joiner.on(", ");
Expand All @@ -170,48 +216,53 @@ private Map<String, PipelineProvidedMetadataEntry> getRgiResults(Path rgiFilePat

String line = reader.readLine();

List<String> tokens = SPLITTER.splitToList(line);
if (tokens.size() != MAX_TOKENS) {
throw new PostProcessingException("Invalid number of columns in RGI results file [" + rgiFilePath
+ "], expected [" + MAX_TOKENS + "] got [" + tokens.size() + "]");
List<String> columnNames = SPLITTER.splitToList(line);
if (columnNames.isEmpty()) {
logger.warn("Missing columns in RGI results file [" + rgiFilePath + "] for analysis submission " + analysis);
}

line = reader.readLine();
while (line != null) {
tokens = SPLITTER.splitToList(line);

if (tokens.size() != MAX_TOKENS) {
line = reader.readLine();
continue;
Map<String, String> lineDataMap = getDataMapForLine(columnNames, line, null, rgiFilePath, analysis);

if (lineDataMap.containsKey(RGI_RESULTS_METADATA_MAP.get(RGI_GENE))) {
genotypes.add(lineDataMap.get(RGI_RESULTS_METADATA_MAP.get(RGI_GENE)));
}

if (lineDataMap.containsKey(RGI_RESULTS_METADATA_MAP.get(RGI_DRUG_CLASS))) {
drugs.add(lineDataMap.get(RGI_RESULTS_METADATA_MAP.get(RGI_DRUG_CLASS)));
}

String genotype = tokens.get(BEST_HIT_ARO_INDEX);
String drugClass = tokens.get(DRUG_CLASS_INDEX);

genotypes.add(genotype);
drugs.add(drugClass);

line = reader.readLine();
}

if (!genotypes.isEmpty()) {
String genotypesString = "-";
String drugsString = "-";

if (genotypes.isEmpty()) {
logger.info("No genotype results found in rgi output file [" + rgiFilePath + "], for analysis submission "
+ analysis);
} else {
Collections.sort(genotypes);

String genotypesString = joiner.join(genotypes);
genotypesString = joiner.join(genotypes);
}

if (drugs.isEmpty()) {
logger.info("No drug results found in rgi output file [" + rgiFilePath + "], for analysis submission "
+ analysis);
} else {
Set<String> drugsSet = Sets.newTreeSet();
drugs.forEach(t -> drugsSet.addAll(Lists.newArrayList(t.split(DRUG_CLASS_SPLIT))));

String drugsString = joiner.join(drugsSet);
drugsString = joiner.join(drugsSet);
}

Map<String, PipelineProvidedMetadataEntry> results = new HashMap<>();
results.put(RGI_GENE, new PipelineProvidedMetadataEntry(genotypesString, "text", analysis));
results.put(RGI_DRUG_CLASS, new PipelineProvidedMetadataEntry(drugsString, "text", analysis));
Map<String, PipelineProvidedMetadataEntry> results = new HashMap<>();
results.put(RGI_GENE, new PipelineProvidedMetadataEntry(genotypesString, "text", analysis));
results.put(RGI_DRUG_CLASS, new PipelineProvidedMetadataEntry(drugsString, "text", analysis));

return results;
} else {
throw new PostProcessingException("No results found in rgi output file [" + rgiFilePath + "]");
}
return results;
}

/**
Expand Down

0 comments on commit 1571af2

Please sign in to comment.