diff --git a/IslandCompare/analysis/components.py b/IslandCompare/analysis/components.py index 92e1aa4d..16e79c2f 100644 --- a/IslandCompare/analysis/components.py +++ b/IslandCompare/analysis/components.py @@ -1,5 +1,6 @@ from analysis.pipeline import PipelineComponent from analysis.models import Analysis, Genome +from genomes.models import Gene, GenomicIsland, UserGenomicIsland from tempfile import mkdtemp, NamedTemporaryFile from shutil import rmtree from Bio import SeqIO, Phylo @@ -9,7 +10,6 @@ from io import StringIO import csv from datetime import datetime -import copy from Bio.SeqRecord import SeqRecord import numpy as np from analysis.lib.mcl_clustering import mcl @@ -42,11 +42,55 @@ def analysis(self, report): class SetupGbkPipelineComponent(PipelineComponent): """ - Pipeline component that adds the gbk paths to the report. + Pipeline component that adds the gbk paths to the report and genes to the database. """ name = "setup_gbk" result_types = ["gbk_paths"] + @staticmethod + def create_genes(genome): + with open(genome.gbk.path) as genbank_handle: + record = SeqIO.read(genbank_handle, "genbank") + last_gene = None + for feature in record.features: + if feature.type in ["gene", "CDS", "tRNA", "rRNA"]: + start = feature.location.start + end = feature.location.end + # Check if the current gene is in the same position as the last gene. + if last_gene is not None and start == last_gene.start and end == last_gene.end: + if feature.type is not "gene": + # we don't want to replace the more specific types with "gene". + last_gene.type = feature.type + gene = last_gene + else: + gene = Gene( + type=feature.type, + gene="", + locus_tag="", + product="", + start=start, + end=end, + strand=feature.location.strand, + genome=genome + ) + last_gene = gene + if gene.gene == "": + try: + gene.gene = feature.qualifiers['gene'][0] + except KeyError: + pass + if gene.locus_tag == "": + try: + gene.locus_tag = feature.qualifiers['locus_tag'][0] + except KeyError: + pass + if gene.product == "": + try: + gene.product = feature.qualifiers['product'][0] + except KeyError: + pass + gene.save() + def analysis(self, report): analysis_entry = Analysis.objects.get(id=report['analysis']) genomes = analysis_entry.genomes @@ -54,6 +98,11 @@ def analysis(self, report): report['gbk_paths'] = dict() for genome in genomes.all(): report['gbk_paths'][str(genome.id)] = genome.gbk.path + if genome.gene_set.exists(): + self.logger.info("{} gene set found".format(genome.name)) + else: + self.logger.info("Creating gene set for genome {}".format(genome.name)) + self.create_genes(genome) class GbkMetadataComponent(PipelineComponent): @@ -78,6 +127,9 @@ def analysis(self, report): class RGIPipelineComponent(PipelineComponent): + """ + Pipeline component that runs and adds RGI data to the report + """ name = "rgi" dependencies = ["gbk_paths"] result_types = ["amr_genes"] @@ -95,7 +147,7 @@ def parse_rgi_json(json_file): for entry_key in json_dict[key]: entry = json_dict[key][entry_key] if type(entry) is dict and "orf_start" in entry: - amr_genes.append({k : entry.get(k) for k in ("orf_start", "orf_end", "orf_strand", "type_match")}) + amr_genes.append({k : entry.get(k) for k in ("orf_start", "orf_end", "orf_strand")}) # Keep only unique entries amr_genes = [dict(y) for y in set(tuple(x.items()) for x in amr_genes)] amr_sorted = sorted(amr_genes, key=lambda gene: gene['orf_start']) @@ -166,10 +218,13 @@ def convert_gbk_to_fna(input_path, output_path): input_handle.close() @staticmethod - def parse_newick(input_path): + def parse_newick(input_path, num_genomes): tree = Phylo.read(input_path, 'newick') terminals = tree.get_terminals() + # Parsnp may fail to include all genomes in the newick in specific cases + assert (len(terminals) == num_genomes), "Newick produced by Parsnp does not contain all genomes" + for leaf in terminals: leaf.name = leaf.name.split('.')[0] @@ -203,7 +258,14 @@ def analysis(self, report): subprocess.check_call(script_file.name, stdout=logs) script_file.close() - report["newick"] = self.parse_newick(self.temp_results_dir + "/parsnp.tree") + try: + # parse_newick also validates that newick contains all genomes + report["newick"] = self.parse_newick(self.temp_results_dir + "/parsnp.tree", len(report["gbk_paths"])) + except AssertionError: + # temp dir with incorrect newick file is deleted so parsnp can remake on retry + if self.temp_results_dir is not None and os.path.exists(self.temp_results_dir): + rmtree(self.temp_results_dir) + raise def cleanup(self): if self.temp_dir_path is not None and os.path.exists(self.temp_dir_path): @@ -226,7 +288,7 @@ def set_newick(self, user_newick): self.param['user_file_contents'] = user_newick def analysis(self, report): - selected_genomes = Genome.objects.filter(id__in=[int(_) for _ in report["gbk_paths"].keys()]) + selected_genomes = Genome.objects.filter(id__in=report["gbk_paths"].keys()) tree = Phylo.read(StringIO(self.param['user_file_contents']), 'newick') terminals = tree.get_terminals() @@ -251,7 +313,6 @@ class UserGIPipelineComponent(PipelineComponent): """ name = "user_gi" dependencies = ["gbk_paths"] - result_types = ["user_gis"] def set_gi(self, user_gi): self.logger.info("Set User GI as:\n{}".format(user_gi)) @@ -278,17 +339,24 @@ def parse_gi_file(gifile): return genomeDict def analysis(self, report): - selected_genomes = Genome.objects.filter(id__in=[int(_) for _ in report["gbk_paths"].keys()]) - output_dict = dict() + analysis = Analysis.objects.get(id=report["analysis"]) + selected_genomes = Genome.objects.filter(id__in=report["gbk_paths"].keys()) gi_dict = self.parse_gi_file(StringIO(self.param['user_file_contents'])) for key in gi_dict.keys(): - selected_genome = selected_genomes.filter(name__exact=key) - key_id = selected_genome.get().id - output_dict[str(key_id)] = gi_dict[key] - - report["user_gis"] = output_dict + selected_genome = selected_genomes.get(name=key) + for island in gi_dict[key]: + if 'color' in island: + color = island['color'] + else: + color = "" + UserGenomicIsland(method="user", + start=island['start'], + end=island['end'], + genome=selected_genome, + analysis=analysis, + color=color).save() class MauvePipelineComponent(PipelineComponent): @@ -389,16 +457,16 @@ def cleanup(self): class SigiHMMPipelineComponent(PipelineComponent): """ - Pipeline component that runs and adds sigi-hmm data for each genome in the report to the report. + Pipeline component that runs and adds sigi-hmm data for each genome in the report to the database. """ name = "sigi" dependencies = ["gbk_paths"] - result_types = ["sigi_gis"] output_dir = settings.BIO_APP_TEMP_DIR + "sigi/" temp_dir_path = None embl_files = {} SIGIHMM_PATH = settings.SIGIHMM_PATH SIGIHMM_EXE = settings.SIGIHMM_EXE + exception = None @staticmethod def parse_sigi_gff(gff_file): @@ -416,25 +484,25 @@ def parse_sigi_gff(gff_file): else: cleaned_line = ' '.join(line.split()) gene_dict = cleaned_line.split(' ') - # at the start of a genomic island, set start and end of possible genomic island - if gene_dict[2] == 'PUTAL' and not island_flag: - start = gene_dict[3] - end = gene_dict[4] - island_flag = True - # continuation of current genomic island, change end and continue - elif gene_dict[2] == 'PUTAL' and island_flag: - end = gene_dict[4] - # end of genomic island, append current start and end to list + # Some results contain lines where start is 1 / end is 0. Skip to avoid erroneous GIs + if gene_dict[3] == '1' or gene_dict[4] == '0': + continue + if gene_dict[2] == 'PUTAL': + # At the start of a genomic island, set start and end of possible genomic island + if not island_flag: + start = gene_dict[3] + end = gene_dict[4] + island_flag = True + # Continuation of current genomic island, change end and continue + else: + end = gene_dict[4] + # End of genomic island, append current start and end to list elif island_flag: gi_list.append([int(start), int(end)]) island_flag = False - # not currently in a genomic island, continue to next line - elif not island_flag: - continue - # condition not included in above reached, throw an error - else: - raise Exception("Error occurred in sigi, unexpected condition reached") - + # For cases where the last line is part of a genomic island + if island_flag: + gi_list.append([int(start), int(end)]) return gi_list def setup(self, report): @@ -442,16 +510,20 @@ def setup(self, report): os.makedirs(self.temp_dir_path, 0o777) for gbk_id in report["gbk_paths"]: - basename = os.path.splitext(os.path.basename(report["gbk_paths"][gbk_id]))[0] - with open(self.output_dir + str(report["analysis"]) + "/" + basename + ".embl", 'w') as embl_output: - SeqIO.convert(report["gbk_paths"][gbk_id], "genbank", embl_output, "embl") - self.embl_files[gbk_id] = os.path.abspath(embl_output.name) + genome = Genome.objects.get(id=gbk_id) + if genome.genomicisland_set.filter(method="sigi").exists(): + self.logger.info("{} Sigi-HMM genomic islands found".format(genome.name)) + else: + basename = os.path.splitext(os.path.basename(report["gbk_paths"][gbk_id]))[0] + with open(self.output_dir + str(report["analysis"]) + "/" + basename + ".embl", 'w') as embl_output: + SeqIO.convert(report["gbk_paths"][gbk_id], "genbank", embl_output, "embl") + self.embl_files[gbk_id] = os.path.abspath(embl_output.name) def analysis(self, report): script_file = NamedTemporaryFile(delete=True) - report["sigi_gis"] = {} for embl_id in self.embl_files: + genome = Genome.objects.get(id=embl_id) sigi_gff_output = os.path.abspath(self.temp_dir_path + "/" + str(embl_id) + ".gff") sigi_output = os.path.abspath(self.temp_dir_path +"/" + str(embl_id) + ".embl") with open(script_file.name, 'w') as script: @@ -463,28 +535,43 @@ def analysis(self, report): os.chmod(script_file.name, 0o755) script_file.file.close() - with open(self.temp_dir_path + "/logs", 'w') as logs: - subprocess.check_call(script_file.name, stdout=logs, cwd=self.SIGIHMM_PATH) - script_file.close() - - report["sigi_gis"][str(embl_id)] = self.parse_sigi_gff(sigi_gff_output) + try: + with open(self.temp_dir_path + "/logs", 'w') as logs: + subprocess.check_call(script_file.name, stdout=logs, cwd=self.SIGIHMM_PATH) + script_file.close() + + for sigi_gi in self.parse_sigi_gff(sigi_gff_output): + GenomicIsland(method="sigi", + start=sigi_gi[0], + end=sigi_gi[1], + genome=genome + ).save() + except subprocess.CalledProcessError as err: + self.logger.info("Sigi-HMM Failed! analysis {}, genome {}".format(report["analysis"], embl_id)) + self.exception = err + if "sigi" not in report["failed_components"]: + report["failed_components"]["sigi"] = [genome.name] + else: + report["failed_components"]["sigi"].append(genome.name) def cleanup(self): if self.temp_dir_path is not None and os.path.exists(self.temp_dir_path): rmtree(self.temp_dir_path) + if self.exception: + raise self.exception class IslandPathPipelineComponent(PipelineComponent): """ - Pipeline component that runs and adds islandpath data to the report. + Pipeline component that runs and adds islandpath data to the database. """ name = "islandpath" dependencies = ["gbk_paths"] - result_types = ["islandpath_gis"] output_dir = settings.BIO_APP_TEMP_DIR + "islandpath/" ISLANDPATH_PATH = settings.ISLANDPATH_PATH log_path = None temp_dir_path = None + exception = None @staticmethod def parse_islandpath(islandpath_file): @@ -502,74 +589,86 @@ def setup(self, report): os.mkdir(self.temp_dir_path, 0o777) def analysis(self, report): - report["islandpath_gis"] = {} for gbk_id in report["gbk_paths"]: - script_file = NamedTemporaryFile(delete=True) - temp_path = self.temp_dir_path + "/" + str(gbk_id) - - with open(script_file.name, 'w') as script: - script.write("#!/bin/bash\n") - script.write(os.path.abspath(self.ISLANDPATH_PATH) - + " " + os.path.abspath(report["gbk_paths"][gbk_id]) - + " " + os.path.abspath(temp_path)) - script.close() - - os.chmod(script_file.name, 0o755) - script_file.file.close() - - self.log_path = self.temp_dir_path + "/logs_" + str(report["analysis"]) - with open(self.log_path, 'w') as logs: - subprocess.check_call(script_file.name, stdout=logs, cwd=self.temp_dir_path) - script_file.close() - - report["islandpath_gis"][gbk_id] = self.parse_islandpath(temp_path) + genome = Genome.objects.get(id=gbk_id) + if genome.genomicisland_set.filter(method="islandpath").exists(): + self.logger.info("{} IslandPath genomic islands found".format(genome.name)) + else: + script_file = NamedTemporaryFile(delete=True) + temp_path = self.temp_dir_path + "/" + str(gbk_id) + + with open(script_file.name, 'w') as script: + script.write("#!/bin/bash\n") + script.write(os.path.abspath(self.ISLANDPATH_PATH) + + " " + os.path.abspath(report["gbk_paths"][gbk_id]) + + " " + os.path.abspath(temp_path)) + script.close() + + os.chmod(script_file.name, 0o755) + script_file.file.close() + + self.log_path = self.temp_dir_path + "/logs_" + str(report["analysis"]) + try: + with open(self.log_path, 'w') as logs: + subprocess.check_call(script_file.name, stdout=logs, cwd=self.temp_dir_path) + script_file.close() + + for islandpath_gi in self.parse_islandpath(temp_path): + GenomicIsland(method="islandpath", + start=islandpath_gi[0], + end=islandpath_gi[1], + genome=genome + ).save() + except subprocess.CalledProcessError as err: + self.logger.info("IslandPath Failed! analysis {}, genome {}".format(report["analysis"], gbk_id)) + self.exception = err + if "islandpath" not in report["failed_components"]: + report["failed_components"]["islandpath"] = [genome.name] + else: + report["failed_components"]["islandpath"].append(genome.name) def cleanup(self): if self.temp_dir_path is not None and os.path.exists(self.temp_dir_path): rmtree(self.temp_dir_path) + if self.exception: + raise self.exception class MergeIslandsPipelineComponent(PipelineComponent): """ - Pipeline component that merges islands and adds the merged islands to the report. + Pipeline component that merges islands and adds the merged islands to the database. """ name = "merge_gis" - dependencies = ["islandpath_gis", "sigi_gis"] - result_types = ["merge_gis"] threshold = 500 - def merge_gi_list(self, first_list, second_list): - merged_gis = first_list + second_list - merged_gis.sort(key=lambda x: int(x[0])) - output_gis = [] - - current_gi = None - for gi in merged_gis: - if current_gi is None: - current_gi = copy.deepcopy(gi) - elif int(gi[0]) < (int(current_gi[1]) + self.threshold) and int(gi[1]) > int(current_gi[1]): - current_gi[1] = gi[1] - else: - output_gis.append(current_gi) - current_gi = copy.deepcopy(gi) + def merge_gi_queryset(self, gi_queryset): + ordered_gis = gi_queryset.order_by('start') - if current_gi is not None: - output_gis.append(current_gi) - - return output_gis + current_gi = ordered_gis[0] + current_gi.pk = None # Creates a copy of the gi object + for i in range(1, len(ordered_gis)): + next_gi = ordered_gis[i] + if current_gi.end + self.threshold >= next_gi.start: + if current_gi.end < next_gi.end: + current_gi.end = next_gi.end + else: + current_gi.method = "merge" + current_gi.save() # Will create a new entry in the database because pk was set to None + current_gi = next_gi + current_gi.pk = None + current_gi.method = "merge" + current_gi.save() def set_threshold(self, threshold): self.threshold = threshold def analysis(self, report): - merged_gi_dict = dict() - - for genome_id in report["islandpath_gis"]: - merged_gi_dict[genome_id] = self.merge_gi_list(report["islandpath_gis"][str(genome_id)], - report["sigi_gis"][str(genome_id)]) - - report["merge_gis"] = merged_gi_dict + genomes = Genome.objects.filter(id__in=report['gbk_paths'].keys()) + for genome in genomes: + gis = genome.genomicisland_set + if gis.filter(method__in=["sigi", "islandpath"]).exists() and not gis.filter(method="merge").exists(): + self.merge_gi_queryset(gis.all()) class MashMclClusterPipelineComponent(PipelineComponent): @@ -577,8 +676,8 @@ class MashMclClusterPipelineComponent(PipelineComponent): Pipeline component that runs mash and clusters gis. """ name = "mash_mcl" - dependencies = ["gbk_paths", "merge_gis"] - result_types = ["cluster_gis"] + dependencies = ["gbk_paths"] + result_types = ["numberClusters"] output_dir = settings.BIO_APP_TEMP_DIR + "mash/" MASH_PATH = settings.MASH_PATH temp_dir_path = None @@ -614,106 +713,104 @@ def calculate_mash_distance(self, referenceFile, queryFastaFile, outputFile): subprocess.check_call(scriptFile.name) scriptFile.close() - def parse_mash_output(self, outputFile): - outputList = [] - with open(outputFile, 'r') as output: + def parse_mash_output(self, output_file): + with open(output_file, 'r') as output: reader = csv.reader(output, delimiter='\t') - for row in reader: - outputList.append({'referenceId': row[0], - 'queryId': row[1], - 'mashDistance': row[2], - 'pValue': row[3], - 'matchingHashes': row[4]}) - return outputList - - def getSubsequence(self, genbankFile, startPosition, endPosition, islandNumber, description=None): - record_dict = SeqIO.index(genbankFile, "genbank") - sequenceName = list(record_dict.keys())[0] + # The third column contains the mash distance + return [row[2] for row in reader] + + def get_subsequence(self, record, startPosition, endPosition, islandNumber, description=None): if description is not None: - return SeqRecord(record_dict[sequenceName].seq[int(startPosition):int(endPosition)], id=sequenceName + "-" + str(islandNumber), description=description) + return SeqRecord(record.seq[int(startPosition):int(endPosition)], id=record.id + "-" + str(islandNumber), description=description) else: - return SeqRecord(record_dict[sequenceName].seq[int(startPosition):int(endPosition)], id=sequenceName + "-" + str(islandNumber)) + return SeqRecord(record.seq[int(startPosition):int(endPosition)], id=record.id + "-" + str(islandNumber)) def writeFastaFile(self, outputFileName, seqRecordList): with open(outputFileName, 'w') as outputFileHandle: SeqIO.write(seqRecordList, outputFileHandle, "fasta") - def setup(self, report): - self.temp_dir_path = self.output_dir + str(report["analysis"]) - os.mkdir(self.temp_dir_path, 0o777) - - self.fna_dir_path = self.temp_dir_path + "/fna" - os.mkdir(self.fna_dir_path) - def create_gi_fasta_files(self, report): - genome_list = report["gbk_paths"] island_path_list = [] + if "user_gi" in report["pipeline_components"]: + method = "user" + else: + method = "merge" - for genome_id in genome_list.keys(): + for genome_id in report["gbk_paths"]: + record = SeqIO.read(report['gbk_paths'][genome_id], "genbank") genome_fna_path = self.fna_dir_path + "/" + str(genome_id) os.mkdir(genome_fna_path) gi_counter = 0 - for gi in report['merge_gis'][genome_id]: + gis = Genome.objects.get(id=genome_id).genomicisland_set.filter(method=method) + if method == "user": + gis = gis.filter(usergenomicisland__analysis__id=report['analysis']) + for gi in gis.all(): self.logger.info("Adding GI: " + str(genome_id) + "-" + str(gi_counter)) - entrySequence = self.getSubsequence(report['gbk_paths'][genome_id], gi[0], gi[1], gi_counter) + entrySequence = self.get_subsequence(record, gi.start, gi.end, gi_counter) self.writeFastaFile(self.fna_dir_path + "/" + str(genome_id) + "/" + str(gi_counter), entrySequence) island_path_list.append(self.fna_dir_path + "/" + str(genome_id) + "/" + str(gi_counter)) gi_counter += 1 return island_path_list + def apply_cutoff(self, matrix, cutoff=0.96): + n = len(matrix) + for i in range(n - 1): + for j in range(i + 1, n): + if matrix[i][j] < cutoff: + matrix[i][j] = matrix[j][i] = 0 + return matrix + + def setup(self, report): + self.temp_dir_path = self.output_dir + str(report["analysis"]) + os.mkdir(self.temp_dir_path, 0o777) + + self.fna_dir_path = self.temp_dir_path + "/fna" + os.mkdir(self.fna_dir_path) + def analysis(self, report): island_path_list = self.create_gi_fasta_files(report) - self.create_compound_sketch(island_path_list, self.temp_dir_path + "/compoundScratch") - - distance_matrix = [] - - for island in island_path_list: - self.logger.info("Processing island: " + island) - if os.path.isfile(self.temp_dir_path + "/output"): - os.remove(self.temp_dir_path + "/output") - self.calculate_mash_distance(self.temp_dir_path + "/compoundScratch.msh", island, self.temp_dir_path + "/output") - distance_matrix.append([float(i['mashDistance']) for i in self.parse_mash_output(self.temp_dir_path + "/output")]) - - baseMatrix = [] - for i in range(len(island_path_list)): - nextRow = [] - for j in range(len(island_path_list)): - nextRow.append(1) - baseMatrix.append(nextRow) - numpyBaseMatrix = np.array(baseMatrix) - numpyDistanceMatrix = np.array(distance_matrix) - - mclAdjacencyMatrix = np.subtract(numpyBaseMatrix, numpyDistanceMatrix) - np.set_printoptions(threshold='nan') - - M, clusters = mcl(mclAdjacencyMatrix) - outputList = {} - - for sequenceId in report["gbk_paths"].keys(): - outputList[str(sequenceId)] = {} - islandIdList = list([i for i in range(len(island_path_list))]) - - numberClusters = 0 - - while len(islandIdList) > 0: - numberClusters += 1 - currentCluster = clusters[islandIdList[0]] - for i in currentCluster: - self.logger.info("Assigning clusters for cluster {}".format(numberClusters)) - island = island_path_list[i] - splitIsland = island.split('/')[-2:] - currentSequenceId = splitIsland[0] - islandId = splitIsland[1] - self.logger.info("Current Sequence Id: " + str(currentSequenceId)) - self.logger.info("Current Island Id: " + str(islandId)) - outputList[str(currentSequenceId)][str(islandId)] = numberClusters - 1 - remainingIslands = filter(lambda x: x not in currentCluster, islandIdList) - islandIdList = list(remainingIslands) - - outputList['numberClusters'] = numberClusters - 1 - report["cluster_gis"] = outputList + if len(island_path_list) > 0: + self.create_compound_sketch(island_path_list, self.temp_dir_path + "/compoundScratch") + + # Calculates the distance [0,1] from each genomic island to each genomic island + distance_matrix = [] + for island in island_path_list: + self.logger.info("Processing island: " + island) + if os.path.isfile(self.temp_dir_path + "/output"): + os.remove(self.temp_dir_path + "/output") + self.calculate_mash_distance(self.temp_dir_path + "/compoundScratch.msh", island, self.temp_dir_path + "/output") + distance_matrix.append([float(i) for i in self.parse_mash_output(self.temp_dir_path + "/output")]) + + numpy_distance_matrix = np.array(distance_matrix) + # Convert to adjacency matrix by setting each value as 1 - value + mcl_adjacency_matrix = np.vectorize(lambda i: 1 - i)(numpy_distance_matrix) + np.set_printoptions(threshold='nan') + + # The cutoff ensures only very closely matches islands will be placed in a cluster together + mcl_adjacency_matrix = self.apply_cutoff(mcl_adjacency_matrix) + + # mcl computes genomic island clusters + M, raw_clusters = mcl(mcl_adjacency_matrix) + + processed_clusters = {str(genome_id): {} for genome_id in report["gbk_paths"]} + cluster_number = 0 + for island in raw_clusters: + advance_flag = 0 # Only advance the cluster_number if it is used + for cluster_mate in raw_clusters[island]: + # island_path_list to get the genome ID and genome-specific GI number from the unspecific GI number + genome_id, island_num = island_path_list[cluster_mate].split("/")[-2:] + # Only add GIs to the same cluster if they both list each other in their cluster + if island in raw_clusters[cluster_mate] and island_num not in processed_clusters[genome_id]: + processed_clusters[genome_id][island_num] = cluster_number + advance_flag = 1 + cluster_number += advance_flag + + report['numberClusters'] = cluster_number + analysis = Analysis.objects.get(id=report['analysis']) + analysis.clusters = str(processed_clusters) + analysis.save() def cleanup(self): if self.temp_dir_path is not None and os.path.exists(self.temp_dir_path): diff --git a/IslandCompare/analysis/integration_tests.py b/IslandCompare/analysis/integration_tests.py index 1164e294..d55e8d5a 100644 --- a/IslandCompare/analysis/integration_tests.py +++ b/IslandCompare/analysis/integration_tests.py @@ -1,5 +1,6 @@ from django.test import TestCase, override_settings -from genomes.models import Genome +from genomes.models import Genome, GenomicIsland +from analysis.models import Analysis from django.contrib.auth.models import User from django.core.files import File from analysis.components import ParsnpPipelineComponent, MauvePipelineComponent, \ @@ -155,10 +156,12 @@ def setUp(self): self.test_genome_1 = Genome.objects.create(name=self.test_genome_1_name, owner=self.test_user, gbk=self.test_genome_1_gbk) + self.test_genome_1.save() self.test_genome_2 = Genome.objects.create(name=self.test_genome_2_name, owner=self.test_user, gbk=self.test_genome_2_gbk) + self.test_genome_2.save() def test_sigihmm_analysis(self): report = { @@ -175,8 +178,8 @@ def test_sigihmm_analysis(self): component.analysis(report) component.cleanup() - self.assertTrue("sigi_gis" in report) - self.assertEqual(2, len(report["sigi_gis"])) + self.assertTrue(self.test_genome_1.genomicisland_set.filter(method="sigi").exists()) + self.assertTrue(self.test_genome_2.genomicisland_set.filter(method="sigi").exists()) def tearDown(self): for genome in Genome.objects.all(): @@ -202,10 +205,12 @@ def setUp(self): self.test_genome_1 = Genome.objects.create(name=self.test_genome_1_name, owner=self.test_user, gbk=self.test_genome_1_gbk) + self.test_genome_1.save() self.test_genome_2 = Genome.objects.create(name=self.test_genome_2_name, owner=self.test_user, gbk=self.test_genome_2_gbk) + self.test_genome_2.save() def test_islandpath_analysis(self): report = { @@ -222,10 +227,8 @@ def test_islandpath_analysis(self): component.analysis(report) component.cleanup() - self.assertTrue("islandpath_gis" in report) - self.assertEqual(2, len(report["islandpath_gis"])) - self.assertTrue(self.test_genome_1.id in report["islandpath_gis"]) - self.assertTrue(self.test_genome_2.id in report["islandpath_gis"]) + self.assertTrue(self.test_genome_1.genomicisland_set.filter(method="islandpath").exists()) + self.assertTrue(self.test_genome_2.genomicisland_set.filter(method="islandpath").exists()) def tearDown(self): for genome in Genome.objects.all(): @@ -284,6 +287,8 @@ class MashMCLTestCase(TestCase): test_username = "username" test_user = None + test_cluster_analysis = None + test_genome_1 = None test_genome_1_name = "genome_1" test_genome_1_gbk = File(open("../TestFiles/AE009952.gbk")) @@ -292,32 +297,62 @@ class MashMCLTestCase(TestCase): test_genome_2_name = "genome_2" test_genome_2_gbk = File(open("../TestFiles/BX936398.gbk")) + test_gi_1 = None + test_gi_2 = None + + report = None + def setUp(self): self.test_user = User(username=self.test_username) self.test_user.save() + self.test_cluster_analysis = Analysis( + celery_task_id="1", + name="test_cluster_analysis", + owner=self.test_user + ) + self.test_cluster_analysis.save() + self.test_genome_1 = Genome.objects.create(name=self.test_genome_1_name, owner=self.test_user, gbk=self.test_genome_1_gbk) + self.test_genome_1.save() self.test_genome_2 = Genome.objects.create(name=self.test_genome_2_name, owner=self.test_user, gbk=self.test_genome_2_gbk) - - def test_generate_gi_fna(self): - component = MashMclClusterPipelineComponent() - - report = { - "analysis": 1, - "available_dependencies": ["merge_gis", "gbk_paths"], + self.test_genome_2.save() + + self.test_cluster_analysis.genomes.add(self.test_genome_1) + self.test_cluster_analysis.genomes.add(self.test_genome_2) + self.test_cluster_analysis.save() + + self.test_gi_1 = GenomicIsland( + method="merge", + start=0, + end=1000, + genome=self.test_genome_1 + ).save() + self.test_gi_2 = GenomicIsland( + method="merge", + start=0, + end=1000, + genome=self.test_genome_2 + ).save() + + self.report = { + "analysis": self.test_cluster_analysis.id, + "available_dependencies": ["gbk_paths"], "gbk_paths": {self.test_genome_1.id: self.test_genome_1.gbk.path, self.test_genome_2.id: self.test_genome_2.gbk.path}, - "merge_gis": {self.test_genome_1.id: [[0, 1000]], - self.test_genome_2.id: [[0, 1000]]} + "pipeline_components": "merge" } - component.setup(report) - component.create_gi_fasta_files(report) + def test_generate_gi_fna(self): + component = MashMclClusterPipelineComponent() + + component.setup(self.report) + component.create_gi_fasta_files(self.report) self.assertTrue(os.path.exists(component.temp_dir_path + "/fna/1/0")) self.assertTrue(os.path.exists(component.temp_dir_path + "/fna/2/0")) @@ -327,25 +362,18 @@ def test_generate_gi_fna(self): def test_analysis(self): component = MashMclClusterPipelineComponent() - report = { - "analysis": 1, - "available_dependencies": ["merge_gis", "gbk_paths"], - "gbk_paths": {self.test_genome_1.id: self.test_genome_1.gbk.path, - self.test_genome_2.id: self.test_genome_2.gbk.path}, - "merge_gis": {self.test_genome_1.id: [[0, 1000]], - self.test_genome_2.id: [[0, 1000]]} - } - - component.setup(report) - component.analysis(report) + component.setup(self.report) + component.analysis(self.report) component.cleanup() - self.assertTrue("cluster_gis" in report) - self.assertEqual(0, report["cluster_gis"]['numberClusters']) + self.assertTrue("numberClusters" in self.report) + self.assertTrue(Analysis.objects.get(id=self.report["analysis"]).clusters != "") def tearDown(self): for genome in Genome.objects.all(): genome.delete() + for analysis in Analysis.objects.all(): + analysis.delete() class RGIComponentIntegrationTestCase(TestCase): @@ -364,7 +392,6 @@ def setUp(self): owner=self.test_user, gbk=self.test_genome_1_gbk) - @skip("Complete writing tests when component is defined and implemented") def test_rgi_component(self): report = { "analysis": 1, @@ -372,7 +399,7 @@ def test_rgi_component(self): "gbk_paths": {self.test_genome_1.id: self.test_genome_1.gbk.path}, } - component = RGIPipelineComponent()#mock.MagicMock(RGIPipelineComponent) + component = RGIPipelineComponent() component.setup(report) component.analysis(report) component.cleanup() diff --git a/IslandCompare/analysis/migrations/0002_auto_20170718_1718.py b/IslandCompare/analysis/migrations/0002_auto_20170718_1718.py new file mode 100644 index 00000000..5e9e2a66 --- /dev/null +++ b/IslandCompare/analysis/migrations/0002_auto_20170718_1718.py @@ -0,0 +1,25 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.10.5 on 2017-07-18 17:18 +from __future__ import unicode_literals + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('analysis', '0001_initial'), + ] + + operations = [ + migrations.AlterField( + model_name='analysis', + name='celery_task_id', + field=models.CharField(max_length=765), + ), + migrations.AlterField( + model_name='analysiscomponent', + name='celery_task_id', + field=models.CharField(max_length=765), + ), + ] diff --git a/IslandCompare/analysis/migrations/0003_analysis_clusters.py b/IslandCompare/analysis/migrations/0003_analysis_clusters.py new file mode 100644 index 00000000..87425b55 --- /dev/null +++ b/IslandCompare/analysis/migrations/0003_analysis_clusters.py @@ -0,0 +1,20 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.10.5 on 2017-07-31 17:44 +from __future__ import unicode_literals + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('analysis', '0002_auto_20170718_1718'), + ] + + operations = [ + migrations.AddField( + model_name='analysis', + name='clusters', + field=models.TextField(blank=True), + ), + ] diff --git a/IslandCompare/analysis/models.py b/IslandCompare/analysis/models.py index 8fd71294..1e92ab61 100644 --- a/IslandCompare/analysis/models.py +++ b/IslandCompare/analysis/models.py @@ -3,8 +3,6 @@ from genomes.models import Genome from django.utils import timezone -# Create your models here. - class Analysis(models.Model): id = models.AutoField(primary_key=True) @@ -15,6 +13,7 @@ class Analysis(models.Model): start_time = models.DateTimeField(null=True) complete_time = models.DateTimeField(null=True) owner = models.ForeignKey(User) + clusters = models.TextField(blank=True) class Meta: unique_together = ('name', 'owner') diff --git a/IslandCompare/analysis/pipeline.py b/IslandCompare/analysis/pipeline.py index b1bdcf66..8d1d0f8f 100644 --- a/IslandCompare/analysis/pipeline.py +++ b/IslandCompare/analysis/pipeline.py @@ -133,6 +133,8 @@ class Pipeline(object): """A list of available dependencies""" analysis = None """The analysis""" + failed_components = None + """A list of failed components""" def __init__(self): """ @@ -141,6 +143,7 @@ def __init__(self): self.pipeline_components = [] self.available_dependencies = [] self.analysis = None + self.failed_components = {} def create_database_entry(self, name, genomes, owner): """ @@ -188,7 +191,8 @@ def serialize(pipeline): 'analysis': pipeline.analysis.id, 'available_dependencies': pipeline.available_dependencies, 'pipeline_components': serialized_pipeline_components, - 'component_param': serialized_pipeline_param + 'component_param': serialized_pipeline_param, + 'failed_components': pipeline.failed_components } return serialized_dict @@ -208,5 +212,6 @@ def deserialize(serialized_dict, pipeline_component_factory): ) pipeline.available_dependencies = serialized_dict['available_dependencies'] pipeline.analysis = Analysis.objects.get(id=serialized_dict['analysis']) + pipeline.failed_components = serialized_dict['failed_components'] return pipeline diff --git a/IslandCompare/analysis/serializers.py b/IslandCompare/analysis/serializers.py index 126c56f6..6490a51b 100644 --- a/IslandCompare/analysis/serializers.py +++ b/IslandCompare/analysis/serializers.py @@ -1,8 +1,9 @@ from rest_framework import serializers from analysis.models import Analysis, AnalysisComponent, AnalysisType from genomes.models import Genome +from genomes.serializers import GenomeSerializer from io import StringIO -import csv, re +import csv, ast, re from celery.result import AsyncResult from Bio import Phylo @@ -82,23 +83,56 @@ class RunAnalysisSerializer(serializers.Serializer): def validate_genomes(self, value): if len(value) <= 1: - raise serializers.ValidationError("Need 2 or more genomes to create an analysis. Only received {}" + raise serializers.ValidationError("Need 2 or more genomes to create an analysis. Received {}" .format(len(value))) return value def validate(self, data): + cluster_flag = True if 'newick' in data.keys(): - selected_genomes = Genome.objects.filter(id__in=[genome.id for genome in data['genomes']]) + selected_genome_names = [genome.name for genome in data['genomes']] tree = Phylo.read(StringIO(data['newick'].read().decode('utf-8')), 'newick') terminals = tree.get_terminals() for leaf in terminals: leaf.name = re.sub(r'(\.genbank|\.gbff)$', ".gbk", leaf.name) - selected_genome = selected_genomes.filter(owner__exact=self.context['request'].user, + selected_genome = Genome.objects.filter(owner__exact=self.context['request'].user, name__exact=leaf.name) if not selected_genome.exists(): - raise serializers.ValidationError("Genome with Name: {} Does not Exist".format(leaf)) + raise serializers.ValidationError("Genome {} does not exist".format(leaf.name)) + if leaf.name not in selected_genome_names: + raise serializers.ValidationError("Genome {} in newick file not included in this analysis".format(leaf)) + if len(terminals) != len(selected_genome_names): + if len(selected_genome_names) - len(terminals) > 1: + error_message = "Selected genomes missing from newick file" + else: + error_message = "Selected genome missing from newick file" + raise serializers.ValidationError(error_message) + + if 'gi' in data.keys(): + selected_genomes = [genome.name for genome in data['genomes']] + formatted_lines = [] + + # Replace carriage returns to avoid decode errors + user_gis = re.sub(b'\r\n?', b'\n', data["gi"].read()) + # Replace problematic file extensions + user_gis = re.sub(b'.genbank|.gbff', b'.gbk', user_gis) + for line in user_gis.decode("utf-8").split("\n"): + if line is not "": + formatted_lines.append(line) + columns = line.split("\t") + if len(columns) == 4: + cluster_flag = False + elif len(columns) != 3: + raise serializers.ValidationError("Improperly formatted genomic islands file") + if columns[0] not in selected_genomes: + raise serializers.ValidationError("Genome {} in GI file not included in this analysis".format(columns[0])) + + # Replace user supplied gi data with properly formatted version + data["gi"] = "\n".join(formatted_lines) + # Tells AnalysisRunView whether to add MashMclClusterPipelineComponent + data["cluster_gis"] = cluster_flag return data def create(self, validated_data): @@ -124,37 +158,48 @@ def get_spaced_colors(n): def to_representation(self, instance): analysis = Analysis.objects.get(id=instance["analysis"]) - genomes = analysis.genomes output = dict() output["genomes"] = dict() - for genome in genomes.all(): + if "user_gi" in instance["pipeline_components"]: + gi_method = "user" + else: + gi_method = "merge" + # numberClusters signifies that clustering of genomic islands was run + if "numberClusters" in instance: + clustering = True + color_list = self.get_spaced_colors(instance["numberClusters"]) + clusters = ast.literal_eval(analysis.clusters) + else: + clustering = False + for genome in analysis.genomes.all(): output["genomes"][genome.id] = dict() output["genomes"][genome.id]["name"] = genome.name output["genomes"][genome.id]["length"] = instance["gbk_metadata"][str(genome.id)]['size'] output["genomes"][genome.id]["amr_genes"] = [{'start': amr['orf_start'], 'end': amr['orf_end'], 'strand': amr['orf_strand']} for amr in instance["amr_genes"][str(genome.id)]] output["genomes"][genome.id]["genomic_islands"] = dict() - - if "user_gis" in instance.keys(): - output["genomes"][genome.id]["genomic_islands"]["user"] = instance["user_gis"][str(genome.id)] + if gi_method == "user": + gis = genome.genomicisland_set.filter(method="user").filter(usergenomicisland__analysis=analysis) + gi_dicts = [{'start': gi.start, 'end': gi.end, 'color': gi.usergenomicisland.color} for gi in gis] + output["genomes"][genome.id]["genomic_islands"]["user"] = gi_dicts else: - output["genomes"][genome.id]["genomic_islands"]["sigi"] = [{'start': island[0], 'end': island[1]} for island in instance["sigi_gis"][str(genome.id)]] - output["genomes"][genome.id]["genomic_islands"]["islandpath"] = [{'start': island[0], 'end':island[1]} for island in instance["islandpath_gis"][str(genome.id)]] - output["genomes"][genome.id]["genomic_islands"]["merged"] = [{'start': island[0], 'end':island[1]} for island in instance["merge_gis"][str(genome.id)]] - - if "user_gis" not in instance.keys(): - number_clusters = instance["cluster_gis"]["numberClusters"] - color_index = self.get_spaced_colors(number_clusters) - - for genome_id in instance["gbk_paths"].keys(): - for gi_index in range(len(output["genomes"][int(genome_id)]["genomic_islands"]["merged"])): - clusters = instance['cluster_gis'][str(genome_id)] - cluster_index = int(clusters[str(gi_index)]) - output["genomes"][int(genome_id)]["genomic_islands"]["merged"][gi_index]['color'] = color_index[cluster_index] + for gi_type in ["sigi", "islandpath", "merge"]: + gis = genome.genomicisland_set.filter(method=gi_type) + if gis.exists(): + gi_dicts = [{'start': gi.start, 'end': gi.end} for gi in gis] + output["genomes"][genome.id]["genomic_islands"][gi_type] = gi_dicts + if clustering and gi_method in output["genomes"][genome.id]["genomic_islands"]: + for gi_index in range(len(output["genomes"][genome.id]["genomic_islands"][gi_method])): + cluster_index = int(clusters[str(genome.id)][str(gi_index)]) + output["genomes"][genome.id]["genomic_islands"][gi_method][gi_index]["cluster"] = cluster_index + output["genomes"][genome.id]["genomic_islands"][gi_method][gi_index]["color"] = color_list[cluster_index] output["newick"] = instance["newick"] output["alignment"] = instance["alignment"] + output["failed_components"] = instance["failed_components"] + output["cluster_gis"] = clustering and gi_method # 'user', 'merge', or False + output["analysis_name"] = analysis.name return output @@ -172,34 +217,34 @@ class ReportCsvSerializer(serializers.BaseSerializer): """ Serializer needed to return a csv file to the user """ - def to_representation(self, instance): + def to_representation(self, analysis): output = StringIO() fieldnames = ['name', 'start', 'end', 'method', 'cluster_id'] writer = csv.DictWriter(output, fieldnames=fieldnames) writer.writeheader() - method_keys = ["islandpath_gis", "sigi_gis"] - - for key in instance["gbk_paths"]: - counter = 0 - for island in instance["merge_gis"][key]: - genome = Genome.objects.get(id=key) - writer.writerow({'name': genome.name, - 'start': island[0], - 'end': island[1], - 'method': 'merge_gis', - 'cluster_id': instance["cluster_gis"][str(genome.id)][str(counter)] - }) - counter += 1 + if analysis.usergenomicisland_set.exists(): + method_keys = ["user"] + else: + method_keys = ["merge", "islandpath", "sigi"] + clusters = ast.literal_eval(analysis.clusters) for method in method_keys: - for key in instance["gbk_paths"]: - for island in instance[method][key]: - genome = Genome.objects.get(id=key) - writer.writerow({'name': genome.name, - 'start': island[0], - 'end': island[1], - 'method': method}) + for genome in analysis.genomes.all(): + if method == "user": + gis = analysis.usergenomicisland_set.filter(genome=genome) + else: + gis = genome.genomicisland_set.filter(method=method) + for i, island in enumerate(gis): + row = { + 'name': genome.name, + 'start': island.start, + 'end': island.end, + 'method': method + "_gis" + } + if method in ["merge", "user"]: + row['cluster_id'] = clusters[str(genome.id)][str(i)] + writer.writerow(row) contents = output.getvalue() output.close() @@ -214,3 +259,44 @@ def create(self, validated_data): def update(self, instance, validated_data): return + +class ReportGeneCsvSerializer(serializers.BaseSerializer): + """ + Serializer to return a csv of genes contained within GIs + """ + def to_representation(self, analysis): + output = StringIO() + fieldnames = ['genome', 'gene_name', 'locus_tag', 'product', 'start', 'end', 'strand', 'gi_start', 'gi_end', 'gi_method'] + writer = csv.DictWriter(output, fieldnames=fieldnames) + writer.writeheader() + + method_keys = ["merge", "islandpath", "sigi"] + + for genome in analysis.genomes.all(): + for method in method_keys: + for island in genome.genomicisland_set.filter(method=method): + for gi_gene in genome.gene_set.filter(start__gte=island.start).filter(end__lte=island.end): + writer.writerow({'genome': genome.name, + 'gene_name': gi_gene.gene, + 'locus_tag': gi_gene.locus_tag, + 'product': gi_gene.product, + 'start': gi_gene.start, + 'end': gi_gene.end, + 'strand': gi_gene.strand, + 'gi_start': island.start, + 'gi_end': island.end, + 'gi_method': method + "_gis"}) + + contents = output.getvalue() + output.close() + + return contents + +class AnalysisGenomicIslandSerializer(serializers.Serializer): + """ + Serializer for GenomicIsland objects + """ + method = serializers.CharField(max_length=10) + start = serializers.IntegerField() + end = serializers.IntegerField() + genome = GenomeSerializer() diff --git a/IslandCompare/analysis/tasks.py b/IslandCompare/analysis/tasks.py index 70905d68..e472df49 100644 --- a/IslandCompare/analysis/tasks.py +++ b/IslandCompare/analysis/tasks.py @@ -1,5 +1,5 @@ from __future__ import absolute_import, unicode_literals -from celery import shared_task +from celery import shared_task, chain from analysis.models import AnalysisComponent from analysis import components from analysis.pipeline import PipelineSerializer @@ -75,26 +75,23 @@ def run_pipeline(self, serialized_pipeline): pipeline.analysis.celery_task_id = self.request.id pipeline.analysis.save() - index = len(pipeline.pipeline_components) - 1 - next_component = None - - while index != 0: - if next_component is None: - next_component = run_pipeline_component.s(pipeline.analysis.id, - pipeline.pipeline_components[index].name, - pipeline.pipeline_components[index].param) + tasks = [run_pipeline_component.s(serialized_pipeline, pipeline.analysis.id, "start_pipeline")] + for component in pipeline.pipeline_components[1:]: + if component.name == "sigi": + tasks.append(run_pipeline_component.s(pipeline.analysis.id, + component.name, + component.param) + .on_error(sigi_error_handler.s(pipeline.analysis.id))) + elif component.name == "islandpath": + tasks.append(run_pipeline_component.s(pipeline.analysis.id, + component.name, + component.param) + .on_error(ipath_error_handler.s(pipeline.analysis.id))) else: - current_component = run_pipeline_component.s(pipeline.analysis.id, - pipeline.pipeline_components[index].name, - pipeline.pipeline_components[index].param) - current_component.link(next_component) - next_component = current_component - index -= 1 - - task_chain = run_pipeline_component.s(serialized_pipeline, - pipeline.analysis.id, - pipeline.pipeline_components[index].name) - task_chain.link(next_component) + tasks.append(run_pipeline_component.s(pipeline.analysis.id, + component.name, + component.param)) + task_chain = chain(*tasks) logger.info("End scheduling of pipeline components with celery task id: {}".format(self.request.id)) return task_chain.apply_async() @@ -103,7 +100,7 @@ def run_pipeline(self, serialized_pipeline): @shared_task(bind=True) def run_pipeline_component(self, report, analysis_id, pipeline_component_name, pipeline_components_param=None): """ - Runs a pipleine component + Runs a pipeline component :param self: :param report: :param analysis_id: @@ -112,12 +109,35 @@ def run_pipeline_component(self, report, analysis_id, pipeline_component_name, p :return: """ pipeline_component = PipelineComponentFactory.create_component(pipeline_component_name, pipeline_components_param) - logger.info("Attempting to run component: {} for analysis with id: {}".format(pipeline_component_name, - analysis_id)) + logger.info("Attempting to run component: {} for analysis with id: {}".format(pipeline_component_name, analysis_id)) - analysis_component = AnalysisComponent.objects.get(analysis__id=analysis_id, - type__name=pipeline_component.name) + analysis_component = AnalysisComponent.objects.get(analysis__id=analysis_id, type__name=pipeline_component.name) analysis_component.celery_task_id = self.request.id analysis_component.save() - return pipeline_component.run(report) + try: + return pipeline_component.run(report) + except (AssertionError, FileNotFoundError) as exc: + # Parsnp may fail to include all genomes in the newick in specific cases - retry until success or 6 failures + if pipeline_component_name == "parsnp": + raise self.retry(exc=exc, countdown=10, max_retries=5) + else: + raise + +# Error handlers are used here because if exceptions are handled in the component, status will be 'SUCCESS' +@shared_task() +def sigi_error_handler(context, exc, traceback, pipeline_id): + task_chain = chain(run_pipeline_component.s(context.args[0], pipeline_id, "islandpath") + .on_error(ipath_error_handler.s(pipeline_id)), + run_pipeline_component.s(pipeline_id, "merge_gis"), + run_pipeline_component.s(pipeline_id, "mash_mcl"), + run_pipeline_component.s(pipeline_id, "rgi"), + run_pipeline_component.s(pipeline_id, "end_pipeline")) + return task_chain.apply_async() + +@shared_task() +def ipath_error_handler(context, exc, traceback, pipeline_id): + return chain(run_pipeline_component.s(context.args[0], pipeline_id, "merge_gis"), + run_pipeline_component.s(pipeline_id, "mash_mcl"), + run_pipeline_component.s(pipeline_id, "rgi"), + run_pipeline_component.s(pipeline_id, "end_pipeline")).apply_async() diff --git a/IslandCompare/analysis/tests_components.py b/IslandCompare/analysis/tests_components.py index e1bff010..fd794ce0 100644 --- a/IslandCompare/analysis/tests_components.py +++ b/IslandCompare/analysis/tests_components.py @@ -1,11 +1,11 @@ from django.test import TestCase, mock -from django.core.files.uploadedfile import SimpleUploadedFile from django.contrib.auth.models import User -from genomes.models import Genome +from genomes.models import Genome, GenomicIsland from analysis.components import SetupGbkPipelineComponent, ParsnpPipelineComponent, MauvePipelineComponent, \ SigiHMMPipelineComponent, GbkMetadataComponent, MergeIslandsPipelineComponent from analysis.pipeline import Pipeline, PipelineSerializer from django.core.files import File +from django.core.files.uploadedfile import SimpleUploadedFile import filecmp import os @@ -16,15 +16,11 @@ class GbkComponentTestCase(TestCase): test_genome_1 = None test_genome_1_name = "genome_1" - test_genome_1_gbk_name = "test.gbk" - test_genome_1_gbk_contents = bytes("test", 'utf-8') - test_genome_1_gbk = SimpleUploadedFile(test_genome_1_gbk_name, test_genome_1_gbk_contents) + test_genome_1_gbk = File(open("../TestFiles/AE009952.gbk")) test_genome_2 = None test_genome_2_name = "genome_2" - test_genome_2_gbk_name = "test_2.gbk" - test_genome_2_gbk_contents = bytes("test2", 'utf-8') - test_genome_2_gbk = SimpleUploadedFile(test_genome_1_gbk_name, test_genome_1_gbk_contents) + test_genome_2_gbk = File(open("../TestFiles/BX936398.gbk")) serialized_pipeline = None component = SetupGbkPipelineComponent() @@ -56,6 +52,8 @@ def test_run_gbk_component(self): self.assertEqual(self.test_genome_2.gbk.path, result['gbk_paths'][str(self.test_genome_2.id)]) self.assertTrue('gbk_paths' in result['available_dependencies']) self.assertTrue('setup_gbk' in result['pipeline_components']) + for genome in Genome.objects.filter(owner=self.test_user): + self.assertTrue(genome.gene_set.exists()) def tearDown(self): for genome in Genome.objects.all(): @@ -275,19 +273,49 @@ def tearDown(self): class MergeGITestCase(TestCase): + test_username = "test_user" + test_user = None + + test_analysis_name = "test_analysis" + + test_genome = None + test_genome_name = "genome_1" + test_genome_gbk_name = "test.gbk" + test_genome_gbk_contents = bytes("test", 'utf-8') + test_genome_gbk = SimpleUploadedFile(test_genome_gbk_name, test_genome_gbk_contents) + report = None def setUp(self): + self.test_user = User(username=self.test_username) + self.test_user.save() + + self.test_genome = Genome.objects.create(name=self.test_genome_name, + owner=self.test_user, + gbk=self.test_genome_gbk) + self.test_genome.save() + self.report = { "analysis": 1, "available_dependencies": ["sigi_gis", "islandpath_gis"], + "gbk_paths": {self.test_genome.id: self.test_genome_gbk}, "sigi_gis": {}, "islandpath_gis": {} } def test_merge_single_gi_list(self): - self.report["sigi_gis"] = {"1": [["0", "100"], ["400", "600"]]} - self.report["islandpath_gis"] = {"1": []} + GenomicIsland( + method="sigi", + start=0, + end=100, + genome=self.test_genome + ).save() + GenomicIsland( + method="sigi", + start=400, + end=600, + genome=self.test_genome + ).save() component = MergeIslandsPipelineComponent() @@ -295,11 +323,28 @@ def test_merge_single_gi_list(self): component.analysis(self.report) component.cleanup() - self.assertEqual(len(self.report["sigi_gis"]), len(self.report["merge_gis"])) + self.assertTrue(self.test_genome.genomicisland_set.filter(method="merge").exists()) + self.assertEqual(len(self.test_genome.genomicisland_set.filter(method="merge")), 1) def test_no_merge_gi_list(self): - self.report["sigi_gis"] = {"1": [["0", "100"], ["400", "600"]]} - self.report["islandpath_gis"] = {"1": [["1000", "1200"]]} + GenomicIsland( + method="sigi", + start=0, + end=100, + genome=self.test_genome + ).save() + GenomicIsland( + method="sigi", + start=400, + end=600, + genome=self.test_genome + ).save() + GenomicIsland( + method="islandpath", + start=1000, + end=1200, + genome=self.test_genome + ).save() component = MergeIslandsPipelineComponent() component.set_threshold(100) @@ -308,12 +353,21 @@ def test_no_merge_gi_list(self): component.analysis(self.report) component.cleanup() - self.assertEqual(len(self.report["sigi_gis"]["1"]) + len(self.report["islandpath_gis"]["1"]), - len(self.report["merge_gis"]["1"])) + self.assertEqual(len(self.test_genome.genomicisland_set.filter(method="merge")), 3) def test_merge_gi_list(self): - self.report["sigi_gis"] = {"1": [["0", "100"]]} - self.report["islandpath_gis"] = {"1": [["199", "1200"]]} + GenomicIsland( + method="sigi", + start=0, + end=100, + genome=self.test_genome + ).save() + GenomicIsland( + method="islandpath", + start=199, + end=1200, + genome=self.test_genome + ).save() component = MergeIslandsPipelineComponent() @@ -321,11 +375,54 @@ def test_merge_gi_list(self): component.analysis(self.report) component.cleanup() - self.assertListEqual([["0", "1200"]], self.report["merge_gis"]["1"]) + self.assertEqual(len(self.test_genome.genomicisland_set.filter(method="merge")), 1) + self.assertEqual(self.test_genome.genomicisland_set.filter(method="merge")[0].start, 0) + self.assertEqual(self.test_genome.genomicisland_set.filter(method="merge")[0].end, 1200) def test_merge_secondlist_gi_list(self): - self.report["sigi_gis"] = {"1": [["199", "1200"]]} - self.report["islandpath_gis"] = {"1": [["0", "100"]]} + GenomicIsland( + method="sigi", + start=199, + end=1200, + genome=self.test_genome + ).save() + GenomicIsland( + method="islandpath", + start=0, + end=100, + genome=self.test_genome + ).save() + + component = MergeIslandsPipelineComponent() + + component.setup(self.report) + component.analysis(self.report) + component.cleanup() + + self.assertEqual(len(self.test_genome.genomicisland_set.filter(method="merge")), 1) + self.assertEqual(self.test_genome.genomicisland_set.filter(method="merge")[0].start, 0) + self.assertEqual(self.test_genome.genomicisland_set.filter(method="merge")[0].end, 1200) + + def test_merge_encompassed_gi(self): + GenomicIsland( + method="sigi", + start=1000, + end=2000, + genome=self.test_genome + ).save() + GenomicIsland( + method="sigi", + start=3000, + end=4000, + genome=self.test_genome + ).save() + GenomicIsland( + method="islandpath", + start=100, + end=5000, + genome=self.test_genome + ).save() + component = MergeIslandsPipelineComponent() @@ -333,4 +430,6 @@ def test_merge_secondlist_gi_list(self): component.analysis(self.report) component.cleanup() - self.assertListEqual([["0", "1200"]], self.report["merge_gis"]["1"]) + self.assertEqual(len(self.test_genome.genomicisland_set.filter(method="merge")), 1) + self.assertEqual(self.test_genome.genomicisland_set.filter(method="merge")[0].start, 100) + self.assertEqual(self.test_genome.genomicisland_set.filter(method="merge")[0].end, 5000) diff --git a/IslandCompare/analysis/tests_pipeline.py b/IslandCompare/analysis/tests_pipeline.py index cb05fa50..f85cda26 100644 --- a/IslandCompare/analysis/tests_pipeline.py +++ b/IslandCompare/analysis/tests_pipeline.py @@ -161,6 +161,7 @@ class PipelineSerializerTestCase(TestCase): test_component = PipelineComponentStub() test_component.name = "component_1" test_pipeline_components = [test_component] + test_failed_components = {'failure_1': 1} test_username = "test_username" test_user = None @@ -179,6 +180,7 @@ def setUp(self): self.pipeline.available_dependencies = self.test_available_dependencies self.pipeline.pipeline_components = self.test_pipeline_components self.pipeline.analysis = self.test_analysis + self.pipeline.failed_components = self.test_failed_components def test_serialize_pipeline(self): json_dict = PipelineSerializer.serialize(self.pipeline) @@ -186,13 +188,15 @@ def test_serialize_pipeline(self): self.assertEqual(self.test_available_dependencies, json_dict['available_dependencies']) self.assertTrue(self.test_component.name in json_dict['pipeline_components']) self.assertEqual(self.test_analysis.id, json_dict['analysis']) + self.assertEqual(self.test_failed_components, json_dict['failed_components']) def test_deserialize_pipeline(self): json_dict = { 'available_dependencies': self.test_available_dependencies, 'pipeline_components': [self.test_component.name], 'analysis': self.test_analysis.id, - 'component_param': {self.test_component.name: {}} + 'component_param': {self.test_component.name: {}}, + 'failed_components': self.test_failed_components } mock_factory = mock.MagicMock() @@ -204,3 +208,4 @@ def test_deserialize_pipeline(self): self.assertEqual(self.test_available_dependencies, deserialized_pipeline.available_dependencies) self.assertEqual(self.test_pipeline_components, deserialized_pipeline.pipeline_components) self.assertEqual(self.test_analysis, deserialized_pipeline.analysis) + self.assertEqual(self.test_failed_components, deserialized_pipeline.failed_components) diff --git a/IslandCompare/analysis/urls.py b/IslandCompare/analysis/urls.py index 7d6ea063..66fe43d4 100644 --- a/IslandCompare/analysis/urls.py +++ b/IslandCompare/analysis/urls.py @@ -5,6 +5,9 @@ url(r'^$', views.AnalysisListView.as_view(), name="analysis"), url(r'^details/(?P[0-9]+)$', views.AnalysisRetrieveUpdateView.as_view(), name="analysis_details"), url(r'^export/(?P[0-9]+)$', views.ExportAnalysisResultView.as_view(), name="analysis_export"), + url(r'^export-genes/(?P[0-9]+)$', views.ExportAnalysisGenesView.as_view(), name="analysis_genes_export"), url(r'^results/(?P[0-9]+)$', views.AnalysisResultsView.as_view(), name="analysis_results"), url(r'^run/', views.AnalysisRunView.as_view(), name="analysis_run"), + url(r'^delete/(?P[0-9]+)$', views.AnalysisDestroyView.as_view(), name="analysis_destroy"), + url(r'^islands/(?P[0-9]+)$', views.AnalysisGenomicIslandRetrieveView.as_view(), name="analysis_islands"), ] diff --git a/IslandCompare/analysis/views.py b/IslandCompare/analysis/views.py index 79daba64..ff0fb797 100644 --- a/IslandCompare/analysis/views.py +++ b/IslandCompare/analysis/views.py @@ -3,7 +3,7 @@ from rest_framework.permissions import IsAuthenticated from rest_framework.views import APIView from analysis.serializers import AnalysisSerializer, RunAnalysisSerializer, ReportCsvSerializer, \ - ReportVisualizationOverviewSerializer + ReportVisualizationOverviewSerializer, ReportGeneCsvSerializer, AnalysisGenomicIslandSerializer from analysis.models import Analysis, AnalysisComponent from rest_framework.response import Response from analysis.pipeline import Pipeline @@ -11,6 +11,7 @@ from analysis.tasks import run_pipeline_wrapper from celery.result import AsyncResult from rest_framework.parsers import FormParser, MultiPartParser +from genomes.models import GenomicIsland @@ -41,6 +42,15 @@ class AnalysisRetrieveUpdateView(generics.RetrieveUpdateAPIView): def get_queryset(self): return Analysis.objects.filter(owner=self.request.user) +class AnalysisDestroyView(generics.RetrieveDestroyAPIView): + """ + Destroy Analysis + """ + permission_classes = [IsAuthenticated] + serializer_class = AnalysisSerializer + + def get_queryset(self): + return Analysis.objects.filter(owner=self.request.user) class AnalysisRunView(APIView): """ @@ -58,7 +68,6 @@ def post(self, request): pipeline.append_component(components.StartPipelineComponent()) pipeline.append_component(components.SetupGbkPipelineComponent()) pipeline.append_component(components.GbkMetadataComponent()) - pipeline.append_component(components.RGIPipelineComponent()) if 'newick' in serializer.validated_data: serializer.validated_data['newick'].seek(0) @@ -73,18 +82,17 @@ def post(self, request): pipeline.append_component(components.MauvePipelineComponent()) if 'gi' in serializer.validated_data: - serializer.validated_data['gi'].seek(0) user_gi_component = components.UserGIPipelineComponent() - user_gi_file_contents = serializer.validated_data['gi'].read().decode('utf-8') - user_gi_file_contents = re.sub(r'(\.genbank|\.gbff)', ".gbk", user_gi_file_contents) - user_gi_component.set_gi(user_gi_file_contents) + user_gi_component.set_gi(serializer.validated_data['gi']) pipeline.append_component(user_gi_component) else: pipeline.append_component(components.SigiHMMPipelineComponent()) pipeline.append_component(components.IslandPathPipelineComponent()) pipeline.append_component(components.MergeIslandsPipelineComponent()) + if serializer.validated_data["cluster_gis"]: pipeline.append_component(components.MashMclClusterPipelineComponent()) + pipeline.append_component(components.RGIPipelineComponent()) pipeline.append_component(components.EndPipelineComponent()) pipeline.create_database_entry(name=serializer.validated_data['name'], genomes=serializer.validated_data['genomes'], @@ -132,7 +140,7 @@ def retrieve(self, request, *args, **kwargs): class ExportAnalysisResultView(generics.RetrieveAPIView): """ - Retrieve a CSV of the Analysis + Retrieve a CSV of the Analysis GIs """ permission_classes = [IsAuthenticated] serializer_class = AnalysisSerializer @@ -145,10 +153,30 @@ def retrieve(self, request, *args, **kwargs): task = AsyncResult(analysis.celery_task_id) if task.status == 'SUCCESS': - end_pipeline = AnalysisComponent.objects.get(analysis=analysis, - type__name="end_pipeline") - result = AsyncResult(end_pipeline.celery_task_id).get() - return Response(ReportCsvSerializer(result).data) + return Response(ReportCsvSerializer(analysis).data) + elif task.status == 'FAILURE': + response = Response() + response.status_code = 500 + response.data = {'content': "Job has errored"} + return response + else: + response = Response() + response.status_code = 202 + response.data = {'content': "Job has not completed"} + return response + +class ExportAnalysisGenesView(generics.RetrieveAPIView): + """ + Retrieve a CSV of the Analysis GI genes + """ + permission_classes = [IsAuthenticated] + + def retrieve(self, request, *args, **kwargs): + analysis = Analysis.objects.get(id=kwargs['pk']) + task = AsyncResult(analysis.celery_task_id) + + if task.status == 'SUCCESS': + return Response(ReportGeneCsvSerializer(analysis).data) elif task.status == 'FAILURE': response = Response() response.status_code = 500 @@ -159,3 +187,21 @@ def retrieve(self, request, *args, **kwargs): response.status_code = 202 response.data = {'content': "Job has not completed"} return response + +class AnalysisGenomicIslandRetrieveView(generics.RetrieveAPIView): + """ + Retrieve GIs of the analysis + """ + permission_classes = [IsAuthenticated] + serializer_class = AnalysisGenomicIslandSerializer + + def get_queryset(self): + return Analysis.objects.filter(owner=self.request.user) + + def retrieve(self, request, *args, **kwargs): + analysis = Analysis.objects.get(id=kwargs['pk']) + gis = GenomicIsland.objects.filter(genome__in=analysis.genomes.all()) + + serializer = self.serializer_class(gis, many=True) + + return Response(serializer.data) diff --git a/IslandCompare/genomes/migrations/0002_gene.py b/IslandCompare/genomes/migrations/0002_gene.py new file mode 100644 index 00000000..603e786c --- /dev/null +++ b/IslandCompare/genomes/migrations/0002_gene.py @@ -0,0 +1,28 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.10.5 on 2017-07-13 21:23 +from __future__ import unicode_literals + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('genomes', '0001_initial'), + ] + + operations = [ + migrations.CreateModel( + name='Gene', + fields=[ + ('id', models.AutoField(primary_key=True, serialize=False)), + ('name', models.CharField(max_length=50)), + ('start', models.IntegerField()), + ('end', models.IntegerField()), + ('strand', models.SmallIntegerField()), + ('type', models.CharField(max_length=4)), + ('genome', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='genomes.Genome')), + ], + ), + ] diff --git a/IslandCompare/genomes/migrations/0003_auto_20170720_2135.py b/IslandCompare/genomes/migrations/0003_auto_20170720_2135.py new file mode 100644 index 00000000..5465dbd6 --- /dev/null +++ b/IslandCompare/genomes/migrations/0003_auto_20170720_2135.py @@ -0,0 +1,37 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.10.5 on 2017-07-20 21:35 +from __future__ import unicode_literals + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('genomes', '0002_gene'), + ] + + operations = [ + migrations.RemoveField( + model_name='gene', + name='name', + ), + migrations.AddField( + model_name='gene', + name='gene', + field=models.CharField(default='', max_length=12), + preserve_default=False, + ), + migrations.AddField( + model_name='gene', + name='locus_tag', + field=models.CharField(default='', max_length=12), + preserve_default=False, + ), + migrations.AddField( + model_name='gene', + name='product', + field=models.CharField(default='', max_length=200), + preserve_default=False, + ), + ] diff --git a/IslandCompare/genomes/migrations/0004_genomicisland.py b/IslandCompare/genomes/migrations/0004_genomicisland.py new file mode 100644 index 00000000..f4739a15 --- /dev/null +++ b/IslandCompare/genomes/migrations/0004_genomicisland.py @@ -0,0 +1,26 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.10.5 on 2017-07-26 21:44 +from __future__ import unicode_literals + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('genomes', '0003_auto_20170720_2135'), + ] + + operations = [ + migrations.CreateModel( + name='GenomicIsland', + fields=[ + ('id', models.AutoField(primary_key=True, serialize=False)), + ('method', models.CharField(max_length=10)), + ('start', models.IntegerField()), + ('end', models.IntegerField()), + ('genome', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='genomes.Genome')), + ], + ), + ] diff --git a/IslandCompare/genomes/migrations/0005_usergenomicisland.py b/IslandCompare/genomes/migrations/0005_usergenomicisland.py new file mode 100644 index 00000000..da2ff347 --- /dev/null +++ b/IslandCompare/genomes/migrations/0005_usergenomicisland.py @@ -0,0 +1,26 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.10.5 on 2017-08-01 22:41 +from __future__ import unicode_literals + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('analysis', '0003_analysis_clusters'), + ('genomes', '0004_genomicisland'), + ] + + operations = [ + migrations.CreateModel( + name='UserGenomicIsland', + fields=[ + ('genomicisland_ptr', models.OneToOneField(auto_created=True, on_delete=django.db.models.deletion.CASCADE, parent_link=True, primary_key=True, serialize=False, to='genomes.GenomicIsland')), + ('color', models.CharField(max_length=30)), + ('analysis', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='analysis.Analysis')), + ], + bases=('genomes.genomicisland',), + ), + ] diff --git a/IslandCompare/genomes/models.py b/IslandCompare/genomes/models.py index 92ebf63f..f97fb361 100644 --- a/IslandCompare/genomes/models.py +++ b/IslandCompare/genomes/models.py @@ -13,8 +13,29 @@ class Genome(models.Model): class Meta: unique_together = ('name', 'owner') - @receiver(post_delete, sender=Genome) def delete_gbk(sender, instance, **kwargs): if instance.gbk is not None: instance.gbk.delete(save=False) + +class Gene(models.Model): + id = models.AutoField(primary_key=True) + type = models.CharField(max_length=4) + gene = models.CharField(max_length=12) + locus_tag = models.CharField(max_length=12) + product = models.CharField(max_length=200) + start = models.IntegerField() + end = models.IntegerField() + strand = models.SmallIntegerField() + genome = models.ForeignKey(Genome, on_delete=models.CASCADE) + +class GenomicIsland(models.Model): + id = models.AutoField(primary_key=True) + method = models.CharField(max_length=10) + start = models.IntegerField() + end = models.IntegerField() + genome = models.ForeignKey(Genome, on_delete=models.CASCADE) + +class UserGenomicIsland(GenomicIsland): + analysis = models.ForeignKey("analysis.Analysis") + color = models.CharField(max_length=30) diff --git a/IslandCompare/genomes/serializers.py b/IslandCompare/genomes/serializers.py index 427f682c..8a24b905 100644 --- a/IslandCompare/genomes/serializers.py +++ b/IslandCompare/genomes/serializers.py @@ -1,4 +1,4 @@ -from genomes.models import Genome +from genomes.models import Genome, Gene from rest_framework import serializers from Bio import SeqIO from Bio.Seq import UnknownSeq @@ -10,6 +10,9 @@ class GenomeSerializer(serializers.ModelSerializer): Serializer for genomes submitted by the user. Ensures that genome file names are unique and that gbk files are valid """ + name = serializers.CharField(max_length=100) + gbk = serializers.FileField() + class Meta: model = Genome fields = ('id', 'name', 'gbk') @@ -32,7 +35,7 @@ def validate_gbk(self, value): """ # 'value' is of type # SeqIO.parse cannot read this as its contents are of type bytes rather than str - # 'gbk' is created as a decded 'value', to be passed to SeqIO.parse + # 'gbk' is created as a decoded 'value', to be passed to SeqIO.parse with StringIO(value.read().decode("utf-8")) as gbk: value.seek(0) gbk_records = list(SeqIO.parse(gbk, 'genbank')) @@ -82,7 +85,7 @@ def update(self, instance, validated_data): class GenomeGenesSerializer(serializers.Serializer): """ - Serializer for returning genes for a genome + Serializer for returning genes for a genome by parsing the genbank file """ logger = logging.getLogger(__name__) start_cut_off = None @@ -94,16 +97,17 @@ def get_genes_from_gbk(self, filePath, start_cut_off=None, end_cut_off=None): if start_cut_off is None and end_cut_off is not None: self.logger.warning("End cut off is set but start cut off is not. Not using specified cut offs") - # Given a path to a gbk file, this will return all CDS + # Given a path to a gbk file, this will return all genes geneList = [] for record in SeqIO.parse(open(filePath), "genbank"): for feature in record.features: geneInfo = {} - if feature.type == 'gene' or feature.type == 'CDS': + if feature.type in ["gene", "rRNA", "tRNA"]: # Bio.SeqIO returns 1 for (+) and -1 for (-) geneInfo['strand'] = feature.location.strand geneInfo['start'] = feature.location.start geneInfo['end'] = feature.location.end + geneInfo['type'] = feature.type try: geneInfo['note'] = feature.qualifiers['note'] except: @@ -113,7 +117,7 @@ def get_genes_from_gbk(self, filePath, start_cut_off=None, end_cut_off=None): except: logging.info("No Name Found For This Gene") try: - geneInfo['name'] = feature.qualifiers['locus_tag'] + geneInfo['name'] = feature.qualifiers['locus_tag'][0] except: logging.info("No Locus Found For This Gene") if start_cut_off is not None and end_cut_off is not None: @@ -137,4 +141,24 @@ def to_internal_value(self, data): return data def update(self, instance, validated_data): - return validated_data \ No newline at end of file + return validated_data + +class GeneSerializer(serializers.Serializer): + """ + Serializer for Gene objects + """ + type = serializers.CharField(max_length=4) + gene = serializers.CharField(max_length=12) + locus_tag = serializers.CharField(max_length=12) + product = serializers.CharField(max_length=200) + start = serializers.IntegerField() + end = serializers.IntegerField() + strand = serializers.IntegerField() + +class GenomicIslandSerializer(serializers.Serializer): + """ + Serializer for GenomicIsland objects + """ + method = serializers.CharField(max_length=10) + start = serializers.IntegerField() + end = serializers.IntegerField() diff --git a/IslandCompare/genomes/tests.py b/IslandCompare/genomes/tests.py old mode 100644 new mode 100755 index 4acba78f..acec1b1e --- a/IslandCompare/genomes/tests.py +++ b/IslandCompare/genomes/tests.py @@ -1,6 +1,6 @@ from django.test import TestCase from rest_framework.test import APIRequestFactory, force_authenticate -from genomes.models import Genome +from genomes.models import Genome, Gene from genomes.views import GenomeListView, GenomeUploadView from django.contrib.auth.models import User from django.core.files.uploadedfile import SimpleUploadedFile @@ -8,7 +8,7 @@ from rest_framework.reverse import reverse from rest_framework.test import APIClient from django.core.files import File -from genomes.serializers import GenomeGenesSerializer +from genomes.serializers import GenomeSerializer, GenomeGenesSerializer, GeneSerializer # Create your tests here. @@ -182,6 +182,7 @@ class UpdateGenomeTestCase(TestCase): test_name = "test_genome" test_gbk = None test_genome = None + test_gbk_path = '../TestFiles/AE009952.gbk' def setUp(self): self.factory = APIRequestFactory() @@ -198,10 +199,7 @@ def test_authenticated_update_genome(self): url = reverse('genome_details', kwargs={'pk': self.test_genome.id}) updated_name = "updated_genome" - updated_gbk_name = "test.gbk" - with open('../TestFiles/AE009952.gbk', mode='rb') as test_gbk: - updated_gbk_content = test_gbk.read() - updated_gbk = SimpleUploadedFile(updated_gbk_name, updated_gbk_content) + updated_gbk = File(open(self.test_gbk_path)) client = APIClient() client.force_authenticate(user=self.test_user) @@ -213,7 +211,8 @@ def test_authenticated_update_genome(self): self.assertEqual(200, response.status_code) self.assertEqual(updated_name, genome.name) - self.assertEqual(updated_gbk_content, genome.gbk.read()) + updated_gbk.seek(0) + self.assertEqual(updated_gbk.read().encode('utf-8'), genome.gbk.read()) def test_authenticated_update_genome_no_records(self): url = reverse('genome_details', kwargs={'pk': self.test_genome.id}) @@ -387,8 +386,60 @@ def test_gene_filter_serializer(self): serializer.start_cut_off = 4598500 serializer.end_cut_off = 4744561 serializer.is_valid() - self.assertEqual(2, len(serializer.data['genes'])) + self.assertEqual(1, len(serializer.data['genes'])) def tearDown(self): for genome in Genome.objects.all(): genome.delete() + + +class GeneSerializerTestCase(TestCase): + test_username = "username" + test_user = None + + test_name = "test_genome" + test_gbk_path = '../TestFiles/AE009952.gbk' + test_genome = None + + test_genes = None + + def setUp(self): + self.test_user = User(username=self.test_username) + self.test_user.save() + + test_gbk = File(open(self.test_gbk_path)) + + self.test_genome = Genome(name=self.test_name, + owner=self.test_user, + gbk=test_gbk) + + self.test_genome.save() + test_gbk.close() + Gene(name="test_gene_1", + start=0, + end=1000, + strand=1, + type="gene", + genome=self.test_genome).save() + Gene(name="test_gene_2", + start=5000, + end=10000, + strand=-1, + type="tRNA", + genome=self.test_genome).save() + self.test_genes = self.test_genome.gene_set.all() + + def test_gene_serializer(self): + serializer = GeneSerializer(data=self.test_genes, many=True) + serializer.is_valid() + self.assertEqual(2, len(serializer.data)) + + def test_gene_filter_serializer(self): + filtered_genes = self.test_genes.filter(start__gte=2500).filter(end__lte=1000000) + serializer = GeneSerializer(data=filtered_genes, many=True) + serializer.is_valid() + self.assertEqual(1, len(serializer.data)) + + def tearDown(self): + for genome in Genome.objects.all(): + genome.delete() \ No newline at end of file diff --git a/IslandCompare/genomes/urls.py b/IslandCompare/genomes/urls.py index cb6ff191..531d7cc9 100644 --- a/IslandCompare/genomes/urls.py +++ b/IslandCompare/genomes/urls.py @@ -6,4 +6,5 @@ url(r'^genes/(?P[0-9]+)$', views.GenomeGeneRetrieveView.as_view(), name="genome_genes"), url(r'^upload/$', views.GenomeUploadView.as_view(), name="genome_upload"), url(r'^details/(?P[0-9]+)$', views.GenomeRetrieveUpdateDestroyView.as_view(), name="genome_details"), + url(r'^islands/(?P[0-9]+)$', views.GenomicIslandRetrieveView.as_view(), name="genomic_islands") ] diff --git a/IslandCompare/genomes/views.py b/IslandCompare/genomes/views.py index c953c426..f953cb42 100644 --- a/IslandCompare/genomes/views.py +++ b/IslandCompare/genomes/views.py @@ -1,6 +1,6 @@ from rest_framework import generics, response from rest_framework.permissions import IsAuthenticated -from genomes.serializers import GenomeSerializer, GenomeUploadSerializer, GenomeGenesSerializer +from genomes.serializers import GenomeSerializer, GenomeUploadSerializer, GeneSerializer, GenomicIslandSerializer from genomes.models import Genome from rest_framework.parsers import MultiPartParser, FormParser from rest_framework.response import Response @@ -38,7 +38,7 @@ def create(self, request, *args, **kwargs): ) if genome_serializer.is_valid(): genome_serializer.save() - return response.Response(status=201) + return response.Response(data=genome_serializer.data['id'], status=201) else: errors = "" for key in genome_serializer.errors: @@ -59,13 +59,20 @@ class GenomeRetrieveUpdateDestroyView(generics.RetrieveUpdateDestroyAPIView): def get_queryset(self): return Genome.objects.filter(owner=self.request.user) + def delete(self, request, *args, **kwargs): + genome = self.get_object() + for analysis in genome.analysis_set.all(): + analysis.delete() + genome.delete() + return response.Response(status=204) + class GenomeGeneRetrieveView(generics.RetrieveAPIView): """ - Retrieve Gene Information for a Group of Genomes + Retrieve Gene Information for a Genome """ permission_classes = [IsAuthenticated] - serializer_class = GenomeGenesSerializer + serializer_class = GeneSerializer def get_queryset(self): return Genome.objects.filter(owner=self.request.user) @@ -74,10 +81,37 @@ def retrieve(self, request, *args, **kwargs): queryset = self.get_queryset() url_queries = self.request.query_params - serializer = GenomeGenesSerializer(queryset.get(id=kwargs['pk'])) + genes = queryset.get(id=kwargs['pk']).gene_set.all() + + if 'start' in url_queries: + genes = genes.filter(end__gte=url_queries['start']) + if 'end' in url_queries: + genes = genes.filter(start__lte=url_queries['end']) + + serializer = GeneSerializer(genes, many=True) + + return Response({'genes': serializer.data}) + +class GenomicIslandRetrieveView(generics.RetrieveAPIView): + """ + Retrieve Genomic Islands for a Genome + """ + permission_classes = [IsAuthenticated] + serializer_class = GenomicIslandSerializer + + def get_queryset(self): + return Genome.objects.filter(owner=self.request.user) + + def retrieve(self, request, *args, **kwargs): + url_queries = self.request.query_params + genome = Genome.objects.get(id=kwargs['pk']) + gis = genome.genomicisland_set.all() + + if 'start' in url_queries: + gis = gis.filter(end__gte=url_queries['start']) + if 'end' in url_queries: + gis = gis.filter(start__lte=url_queries['end']) - if 'start' in url_queries and 'end' in url_queries: - serializer.start_cut_off = int(url_queries['start']) - serializer.end_cut_off = int(url_queries['end']) + serializer = self.serializer_class(gis, many=True) return Response(serializer.data) diff --git a/web/dist/css/linearplot.css b/web/dist/css/linearplot.css index 470080ce..b2eba5d8 100644 --- a/web/dist/css/linearplot.css +++ b/web/dist/css/linearplot.css @@ -49,7 +49,7 @@ g.root.node circle{ stroke: #0d0d0d; } -.homologous-region polygon, #homologosRegionLegend{ +.homologous-region polygon, #homologousRegionLegend{ fill: #d1d1d1; stroke: #A4A4A4; } @@ -67,14 +67,24 @@ g.root.node circle{ stroke:#ff9933; } -.genes.print polygon, #genesLegend.print{ - fill: #0d0d0d; - stroke: #0d0d0d; +.genes polygon.gene, .genes polygon.CDS, #genesLegend{ + fill: #6acbff; + /*stroke: #6acbff;*/ } -.genes polygon, #genesLegend{ - fill: #6acbff; - stroke: #6acbff; +.genes polygon.tRNA, #tRNALegend{ + fill: #c26dff; + /*stroke: #c26dff;*/ +} + +.genes polygon.rRNA, #rRNALegend{ + fill: #7ceeb2; + /*stroke: #7ceeb2;*/ +} + +.genes.print polygon, #genesLegend.print, #tRNALegend.print, #rRNALegend.print{ + fill: #0d0d0d; + stroke: #0d0d0d; } .amrs.print polygon, #amrLegend.print{ @@ -83,7 +93,7 @@ g.root.node circle{ } .amrs polygon, #amrLegend{ - fill: red; + fill: #ff34f3; } #alignmentControls{ @@ -105,14 +115,34 @@ g.root.node circle{ margin-right: 8px; } +.legendBody, .predictorLegend, #clusterLegend{ + padding: 3px; +} + +.GIColourBody label{ + font-weight: normal; +} + .controlSide{ display: table-cell; } -.islandpath{ +.islandpath, #islandpathLegend{ fill: red; } -.sigi{ +.sigi, #sigiLegend{ fill: blue; } + +.GIColourBody label{ + margin: 2px; +} + +.loadingGenes{ + margin: 5px; +} + +.legendText{ + margin-right: 5px; +} \ No newline at end of file diff --git a/web/dist/js/clustervis.js b/web/dist/js/clustervis.js new file mode 100644 index 00000000..7724cf95 --- /dev/null +++ b/web/dist/js/clustervis.js @@ -0,0 +1,200 @@ +function createClusterVisualization() { + var SEQSTART = 115; // Islands will begin at this horizontal distance from the left - makes room for position. + var SVGPADDING = 30; // Accounts for padding of each div + var SEQPADDING = 25; // Vertical distance between each island in a genome + var TEXTOFFSET = 15; // Adjustment to align position text with island + var RECTHEIGHT = 10; // Height of the island rect elements + + var clusterDict = window.clusterDict; + if (clusterDict === undefined) { + clusterDict = JSON.parse(sessionStorage.getItem("clusterDict")); + } else { + // Store the clusterDict so it can be retrieved if the page is refreshed + sessionStorage.setItem("clusterDict", JSON.stringify(clusterDict)); + } + + var cluster = clusterDict.cluster; + $("#header").html("Cluster " + cluster); + var color = clusterDict.color; + var visWidth = $("#visualizationBody").width(); + var scale = getScale(clusterDict.sequences, visWidth - SVGPADDING - SEQSTART); + + // D3.js // + var seq = d3.select("#visualizationBody") + .selectAll("div") + .data(clusterDict.sequences); + + var div = seq.enter().append("div"); + div.attr("class", function(seq) { + return seq.name; + }); + // Add genome name + div.append("p").append("b") + .html(function(seq) { + return seq.name; + }); + + var svg = div.append("svg") + .attr("height", function(seq) { + return seq.islands.length * SEQPADDING; + }) + .attr("width", visWidth - SVGPADDING); + + var rectGroup = svg.selectAll("g") + .data(function(seq) { + return seq.islands; + }); + + rectGroup.enter().append("g"); + // Add start and Stop text + rectGroup.append("text") + .text(function(d) { + return d[0] + " - " + d[1]; + }) + .attr("y", function(d, i) { + return (i * SEQPADDING) + TEXTOFFSET; + }); + // Add the rect representing the genomic island + rectGroup.append("rect") + .attr("y", function(d, i) { + return (i * SEQPADDING) + RECTHEIGHT / 2; + }) + .attr("height", RECTHEIGHT) + .attr("width", function(d) { + return scale(Math.abs(d[1] - d[0])); + }) + .attr("rx", 5) + .attr("ry", 5) + .attr("fill", color) + .attr("transform", "translate(" + SEQSTART +",0)"); + // Add the genes + rectGroup.append("g") + .attr("class", "genes") + .attr("transform", function(d, i) { + return "translate(" + SEQSTART + "," + i * SEQPADDING + ")"; + }).each(function(island) { + var geneGroup = d3.select(this); + // For each island, an asynchronous call is made to fetch the genes within its boundaries. + // This prevents the page from freezing up while it gets all the genes. + $.ajax({ + url: genomesGenesUrl + island[2] + "?start=" + island[0] + "&end=" + island[1], + headers: { + "Authorization": 'Token' + ' ' + getAuthenticationCookie() + }, + success: function(data) { + // The genes are added to the island once they have been retreived from the database. + geneGroup.selectAll("polygon") + .data(data.genes) + .enter().append("polygon") + .attr("points", function(gene) { + var geneStart = gene.start - island[0]; + // Cut off genes that extend beyond the island boundary + var geneEnd = Math.min(gene.end - island[0], island[1] - island[0]); + return scale(geneStart) + "," + RECTHEIGHT / 2 + "," + + scale(geneEnd) + "," + RECTHEIGHT / 2 + "," + + scale(geneEnd) + "," + RECTHEIGHT + "," + + scale(geneStart) + "," + RECTHEIGHT; + }) + .attr("class", function(gene) { + return gene.type; // gene, rRNA, or tRNA + }) + .attr("transform", function(gene) { + // Move negative strand genes to the bottom half of the island + if (gene.strand === -1) { + return "translate(0," + RECTHEIGHT / 2 +")"; + } + }) + .append("title") + .text(function(gene) { + var hover = ""; + if(gene.gene) { + hover = hover.concat("Gene name: " + gene.gene + "\n"); + } + if(gene.locus_tag) { + hover = hover.concat("Locus tag: " + gene.locus_tag + "\n"); + } + if(gene.product) { + hover = hover.concat("Product: " + gene.product) + } + return hover; + }); + } + }); + }); + // Add the AMR genes + rectGroup.append("g").attr("class", "amrs") + .attr("transform", function(d, i) { + return "translate(" + SEQSTART + "," + i * SEQPADDING + ")" + }) + .each(function(island, i) { + var start = island[0]; + var end = island[1]; + var seq = island[2]; + var index = clusterDict.sequences.findIndex(i => i.id === seq); + var amrs = clusterDict.sequences[index].amr; + for (var i = 0; i < amrs.length; i++) { + if (amrs[i].start < end && amrs[i].end > start) { + var amr_start = scale(amrs[i].start - start); + var amr_end = scale(amrs[i].end - start); + var neg_strand = (amrs[i].strand === "-"); + d3.select(this).append("polygon") + .attr("points", function() { + // Returns a trapezoid shape. + // Shape depends on whether it is on negative strand and + // whether it is within the boundaries of the GI + var points = ""; + if (neg_strand) { + if (amrs[i].start < start) { + points += "0,0,0," + RECTHEIGHT / 2 + ","; + } else { + points += amr_start + ",0," + + (amr_start + RECTHEIGHT / 2) + "," + RECTHEIGHT / 2 + ","; + } + if (amrs[i].end > end) { + points += scale(end - start) + "," + RECTHEIGHT / 2 + + "," + scale(end - start) + ",0"; + } else { + points += (amr_end - RECTHEIGHT / 2) + "," + RECTHEIGHT / 2 + "," + + amr_end + ",0"; + } + } else { + if (amrs[i].start < start) { + points += "0,0,0," + RECTHEIGHT / 2 + ","; + } else { + points += (amr_start + RECTHEIGHT / 2) + ",0," + amr_start + "," + RECTHEIGHT / 2 + ","; + } + if (amrs[i].end > end) { + points += scale(end - start) + "," + RECTHEIGHT / 2 + + "," + scale(end - start) + ",0"; + } else { + points += amr_end + "," + RECTHEIGHT / 2 + "," + + (amr_end - RECTHEIGHT / 2) + ",0"; + } + } + return points; + }) + .attr("transform", function() { + // Move negative strand AMR genes to the underside of the GI + if (neg_strand) { + return "translate(0, "+ 3 / 2 * RECTHEIGHT +")" + } + }); + } + } + }); +} + +function getScale(sequences, containerWidth) { + var lengths = []; + for (var seqIndex = 0; seqIndex < sequences.length; seqIndex++) { + var islands = sequences[seqIndex].islands; + for (var islandIndex = 0; islandIndex < islands.length; islandIndex++) { + lengths.push(Math.abs(islands[islandIndex][1] - islands[islandIndex][0])); + } + } + var max = Math.max.apply(null, lengths); + return d3.scale.linear() + .domain([0,max]) + .range([0,containerWidth]) + .clamp(true); +} \ No newline at end of file diff --git a/web/dist/js/genomecomparevis.js b/web/dist/js/genomecomparevis.js index 9836ec26..41799b70 100644 --- a/web/dist/js/genomecomparevis.js +++ b/web/dist/js/genomecomparevis.js @@ -30,6 +30,7 @@ function MultiVis(targetNode){ this.newickData = null; this.newickRoot = null; this.trueBranchLengths = false; + this.cluster = null; this.toggleTrueBranchLengths = function(){ this.trueBranchLengths = !this.trueBranchLengths; @@ -175,7 +176,7 @@ function MultiVis(targetNode){ //Readjusts the graph for updated sequence domains, TODO (improve later, currently just re-renders graph) this.transition = function(){ - this.container.select("svg").remove(); + this.container.select("svg").remove(); //Redundant? this.render(); }; @@ -192,12 +193,25 @@ function MultiVis(targetNode){ this.transition(); }; + // Post-order tree traversal to get the order of the sequences + this.traverseTreeForOrder = function (node) { + var tree = []; + if (node.branchset != null) { + for (var nodeIndex = 0; nodeIndex < node.branchset.length; nodeIndex++) { + var output = this.traverseTreeForOrder(node.branchset[nodeIndex]); + Array.prototype.push.apply(tree, output); + } + } + else { + return [node['name']]; + } + return tree; + }; + //Renders the graph this.render = function (){ this.container.select("svg").remove(); - if (self.scale == null){ - self.setScale(0,this.getLargestSequenceSize()); - } + // Gets the sequenceOrder of the graph var seqOrder = self.getSequenceOrder(); @@ -209,6 +223,11 @@ function MultiVis(targetNode){ .attr("width",this.containerWidth()) .attr("height",(this.containerHeight()+this.getSequenceModHeight()*2)+TOPPADDING); + //Scale is set after adding svg as adding svg changes container width + if (self.scale == null){ + self.setScale(0,this.getLargestSequenceSize()); + } + //Add the visualization container var visContainer = svg.append("g") .attr("class","visContainer"); @@ -233,26 +252,8 @@ function MultiVis(targetNode){ self.newickRoot = d; //Set the order of the sequences (as linear plot and tree should match) //Do a post-order traversal on tree looking for leaves. - var tree = []; - traverseTreeForOrder = function (node) { - if (node['children'] != null) { - for (var nodeIndex = 0; nodeIndex < node['children'].length; nodeIndex++) { - output = traverseTreeForOrder(node['children'][nodeIndex]); - if (output != null) { - tree.push(output); - } - } - } - else { - var memory = (node['name'].replace(/'/g, "").split(/[\\/]/).pop()); - memory = memory.split(".")[0]; - return self.backbone.getSequenceIdFromName(memory); - } - return null; - }; - traverseTreeForOrder(self.newickRoot); - //Set the sequenceOrder of the tree to the sequences obtained from traversal of the new treeRoot - self.sequenceOrder = tree; + var seqIds = self.traverseTreeForOrder(self.newickRoot); + self.sequenceOrder = self.backbone.getIndicesFromIds(seqIds); //Re-renders the graph self.transition(); } @@ -263,29 +264,28 @@ function MultiVis(targetNode){ var sequenceHolder = visContainer.append("svg") .attr("width",this.visualizationWidth()) .append("g") - .attr("transform","translate("+ 0 +","+GISIZE+")"); + .attr("transform","translate("+ 0 +","+(GISIZE / 2)+")"); //Draw Homologous Region Lines var lines = []; - - for (var i=0; i= 0) { + return width; + } }); - //Add the gis to the SVG - var giFiltervalue = self.getGIFilterValue(); - var gis = seq.each(function(d, i){ - var genomicIslandcontainer = seq.append("g") - .attr("class","genomicIslands") - .attr("transform","translate("+ 0 +"," - +0+")"); - for (var prediction_name in d.gi){ - var giClassContainer = genomicIslandcontainer.append("g") - .attr("class", prediction_name); - - for (var giIndex=0;giIndex< d.gi[prediction_name].length;giIndex++){ - var startPosition = parseInt(d.gi[prediction_name][giIndex]['start']); - var endPosition = parseInt(d.gi[prediction_name][giIndex]['end']); - - if ((endPosition - startPosition)>giFiltervalue) { - var rectpoints = self.scale(startPosition) + "," + (self.getSequenceModHeight() * i + GISIZE / 2) + " "; - rectpoints += self.scale(endPosition) + "," + (self.getSequenceModHeight() * i + GISIZE / 2) + " "; - rectpoints += self.scale(endPosition) + "," + (self.getSequenceModHeight() * i - GISIZE / 2) + " "; - rectpoints += self.scale(startPosition) + "," + (self.getSequenceModHeight() * i - GISIZE / 2) + " "; - - var gi = giClassContainer.append("polygon") - .attr("points", rectpoints) - .attr("stroke-width", 1) - .attr("transform", "translate(" + 0 + "," + (GISIZE / 2) + ")"); - - // if color was given for this gi, then color the fill and stroke of this gi to the given color - if (d.gi[prediction_name][giIndex]['color'] != null) { - gi.attr("fill", d.gi[prediction_name][giIndex]['color']) - .attr("stroke", d.gi[prediction_name][giIndex]['color']); + //Add GIs to each sequence + seq.append("g").attr("class","genomicIslands").each(function(seqData, i){ + var methods = d3.select(this).selectAll("g") + .data(Object.keys(seqData.gi)) + .enter().append("g") + .attr("class", function(method) { + return method; + }); + methods.each(function(method) { + d3.select(this).selectAll("polygon") + .data(seqData.gi[method].filter(function(gi) { + // Filter out the GIs that aren't within the current scale or are too small + var withinScale = gi.start < self.scale.domain()[1] && gi.end > self.scale.domain()[0]; + var withinLength = gi.end - gi.start > self.getGIFilterValue(); + return withinScale && withinLength; + })) + .enter().append("polygon") + .attr("points", function(gi) { + var startPosition = self.scale(parseInt(gi.start)); + var endPosition = self.scale(parseInt(gi.end)); + return startPosition + "," + (self.getSequenceModHeight() * i + GISIZE) + " " + + endPosition + "," + (self.getSequenceModHeight() * i + GISIZE) + " " + + endPosition + "," + (self.getSequenceModHeight() * i) + " " + + startPosition + "," + (self.getSequenceModHeight() * i) + " "; + }) + .attr("start", function(gi) { return gi.start; }) + .attr("end",function(gi) { return gi.end; }) + .attr("fill", function(gi) { + if (gi.color != null) { return gi.color; } + }) + .attr("stroke", function(gi) { + if (gi.color != null) { return gi.color; } + }) + .attr("class", function(gi) { + if (gi.cluster != null) { return "cluster-" + gi.cluster; } + }) + .on("mouseover", function(gi) { + if(gi.cluster != null) { self.highlightCluster(".cluster-" + gi.cluster); } + }) + .on("mouseout", function(gi) { + if (gi.cluster != null) { self.unhighlightCluster(".cluster-" + gi.cluster); } + }) + .on("click", function(gi) { + if (gi.cluster != null) { + try { + self.openClusterPage(self, gi.cluster, gi.color); + } catch(err) { + alert("Problem encountered when constructing cluster view: \n\n" + err); + } } - } - } - } - }); - - //Add AMR genes to the SVG - var amrcontainer = seq.append("g") - .attr("class", "amrs"); - seq.each(function(d, i) { - for (var amrIndex = 0; amrIndex < d.amr.length; amrIndex++) { - var amr = d.amr[amrIndex]; - var startPosition = parseInt(amr['start']); - var endPosition = parseInt(amr['end']); - var plus_strand = (amr['strand'] == "+") ? true : false; - var adjust = GISIZE * 2; - var polypoints = ""; - - // If scale is close, AMRs will be rendered as trapezoids - if ((self.scale.domain()[1] - self.scale.domain()[0]) < self.getGeneFilterValue() * 3) { - var width = Math.abs(self.scale(endPosition) - self.scale(startPosition)); - // min ensures the angles on the trapezoids are at most 45 degrees - polypoints = self.scale(startPosition) + Math.min(+!plus_strand * width / 3, GISIZE) + "," + (self.getSequenceModHeight() * i) + " "; - polypoints += self.scale(endPosition) - Math.min(+!plus_strand * width / 3, GISIZE) + "," + (self.getSequenceModHeight() * i) + " "; - polypoints += self.scale(endPosition) - Math.min(+plus_strand * width / 3, GISIZE) + "," + (self.getSequenceModHeight() * i - GISIZE) + " "; - polypoints += self.scale(startPosition) + Math.min(+plus_strand * width / 3, GISIZE) + "," + (self.getSequenceModHeight() * i - GISIZE); - // Else AMRs will be rectangles of reduced height to improve appearance from a wider view - } else { - // Each rectangle is given a min width of 1 so they will be visible - polypoints = self.scale(startPosition) + "," + (self.getSequenceModHeight() * i) + " "; - polypoints += Math.max(self.scale(endPosition), self.scale(startPosition) + 1) + "," + (self.getSequenceModHeight() * i) + " "; - polypoints += Math.max(self.scale(endPosition), self.scale(startPosition) + 1) + "," + (self.getSequenceModHeight() * i - GISIZE / 2) + " "; - polypoints += self.scale(startPosition) + "," + (self.getSequenceModHeight() * i - GISIZE / 2); - adjust = 3 * GISIZE / 2; - } - var p = amrcontainer.append("polygon") - .attr("points", polypoints); - // Positive strand AMRs are in place, negative strand AMRs must be moved down to the other side - if (!plus_strand) { - p.attr("transform", "translate(0," + ( adjust ) +")"); - } - } + }) + .append("title").text(function(gi) { + if(gi.cluster != null) { return "Click to open GI cluster " + gi.cluster + " view"; } + }); + }); }); - - //Add the brush for zooming and focusing - var brush = d3.svg.brush() - .x(self.scale) - .on("brush", brushmove) - .on("brushend", brushend); - - sequenceHolder.append("g") - .attr("class", "brush") - .call(brush) - .selectAll('rect') - .attr('height', this.containerHeight()) - .attr("transform","translate(0,"+(-0.7)*this.getSequenceModHeight()+")"); - - function brushmove() { - var extent = brush.extent(); + // Hide GIs if a cluster is selected + if (self.cluster) { + $("svg .genomicIslands polygon").not(".cluster-" + self.cluster).hide(); } - function brushend() { - var extent = brush.extent(); - self.setScale(extent[0],extent[1]); - self.transition(); - } + //Add AMR genes to each sequence + seq.append("g").attr("class", "amrs").each(function(d, i) { + // AMRs are given more detail at a smaller scale + var details = ((self.scale.domain()[1] - self.scale.domain()[0]) < self.getGeneFilterValue() * 3); + d3.select(this).selectAll("polygon") + .data(d.amr.filter(function(d) { + // Filter out the AMRs that aren't within the current scale + return (d.start < self.scale.domain()[1] && d.end > self.scale.domain()[0]); + })) + .enter().append("polygon") + .attr("points", function(d) { + var startPosition = self.scale(parseInt(d.start)); + var endPosition = self.scale(parseInt(d.end)); + var plus_strand = (d.strand === "+"); + var polypoints = ""; + // If scale is close, AMRs will be rendered as trapezoids + if (details) { + var width = Math.abs(endPosition - startPosition); + // min ensures the angles on the trapezoids are at most 45 degrees + polypoints += startPosition + Math.min(+!plus_strand * width / 3, GISIZE) + "," + (self.getSequenceModHeight() * i) + " "; + polypoints += endPosition - Math.min(+!plus_strand * width / 3, GISIZE) + "," + (self.getSequenceModHeight() * i) + " "; + polypoints += endPosition - Math.min(+plus_strand * width / 3, GISIZE) + "," + (self.getSequenceModHeight() * i - GISIZE / 2) + " "; + polypoints += startPosition + Math.min(+plus_strand * width / 3, GISIZE) + "," + (self.getSequenceModHeight() * i - GISIZE / 2); + // Else AMRs will be rectangles to improve appearance from a wider view + } else { + polypoints += startPosition + "," + (self.getSequenceModHeight() * i) + " "; + polypoints += startPosition + 1 + "," + (self.getSequenceModHeight() * i) + " "; + polypoints += startPosition + 1 + "," + (self.getSequenceModHeight() * i - GISIZE / 2) + " "; + polypoints += startPosition + "," + (self.getSequenceModHeight() * i - GISIZE / 2); + } + return polypoints; + }) + .attr("transform", function(d) { + // Positive strand AMRs are in place, negative strand AMRs need to be moved to the other side + if (d.strand === "-") { + return "translate(0," + (3 * GISIZE / 2) + ")"; + } + }); + }); //Add the genes to the plot var geneFilterValue = self.getGeneFilterValue(); - if((self.scale.domain()[1]-self.scale.domain()[0]) LEFTPADDING - TREECONTAINERWIDTH - TEXTPADDING - 5) { + d3.select(this) + .attr("textLength", LEFTPADDING - TREECONTAINERWIDTH - TEXTPADDING - 5) + .attr("lengthAdjust", "spacingAndGlyphs"); + } + }); + textContainer.attr("transform","translate("+(TREECONTAINERWIDTH+TEXTPADDING)+","+(TEXTTOPPADDING+TOPPADDING)+")"); //Aligns the viscontainer to the right to make room for other containers @@ -505,8 +569,20 @@ function MultiVis(targetNode){ if (this.isPrinterColors){ this.setPrinterColors(); } + + // Colour GIs by specified method + this.predictorToggle(); + this.GIColourToggle(); }; + // Resets the scale when the sidebar is toggled + $(".sidebar-toggle").on("click", function() { + setTimeout(function() { + self.setScale(self.scale.domain()[0], self.scale.domain()[1]); + self.transition(); + }, 500); + }); + this.togglePrinterColors = function(){ this.isPrinterColors=!this.isPrinterColors; }; @@ -519,13 +595,180 @@ function MultiVis(targetNode){ $(".amrs").attr("class", "amrs print"); }; + this.predictorToggle = function() { + if ($(".GIColourBody :checkbox[name=sigi]").is(":checked")) { + $(".sigi").show(); + } else { + $(".sigi").hide(); + } + if ($(".GIColourBody :checkbox[name=islandpath]").is(":checked")) { + $(".islandpath").show(); + } else { + $(".islandpath").hide(); + } + }; + + this.GIColourToggle = function() { + if ($("input[name=GIColour]:checked").val() === "similarity") { + $(".GIColourBody :checkbox").attr("disabled", true); + $(".predictorLegend").hide(); + $(".merged").show(); + } else { + $(".GIColourBody :checkbox").removeAttr("disabled"); + $(".predictorLegend").show(); + $(".merged").hide(); + } + }; + + this.saveImage = function(mode) { + // https://stackoverflow.com/a/38085847 + var svg = this.container.select("svg"), + img = new Image(), + serializer = new XMLSerializer(), + width = svg.node().getBoundingClientRect().width, + height = svg.node().getBoundingClientRect().height; + + // get the css from the stylesheet + var style = "\nsvg {background-color: white;}\n"; + for (var i=0; i i.id === seqID); + if (index === -1) { + clusterDict.sequences.push({"id": seqID, "islands": []}); + index = clusterDict.sequences.length - 1; + } + clusterDict.sequences[index].islands.push([start, end, seqID]); + }); + // Assign sequence names + for (var i = 0; i < multiVis.sequences.length; i++) { + var currentSeq = multiVis.sequences[i]; + var index = clusterDict.sequences.findIndex(i => i.id === currentSeq.sequenceId); + if (index !== -1) { + clusterDict.sequences[index].name = currentSeq.sequenceName; + clusterDict.sequences[index].amr = currentSeq.amr; + } + } + + clusterDict.alignment = multiVis.backbone.backbone; + + var clusterPage = open("cluster.html"); + clusterPage.clusterDict = clusterDict; + }; + return this; } function Backbone() { var self = this; this.sequences = []; - this.backbone = [[]]; + this.backbone = {}; // Retrieves the sequenceId from sequences given the name of a sequence. // Will return -1 if no sequence is found @@ -538,6 +781,20 @@ function Backbone() { return -1; }; + this.getIndicesFromIds = function(ids) { + var adjuster = 0; + var indices = []; + for (var seq_i = 0; seq_i < this.sequences.length; seq_i++) { + var index = ids.indexOf(this.sequences[seq_i].sequenceId) + if (index != -1) { + indices.push(index + adjuster); + } else { + adjuster++; + } + } + return indices; + }; + this.addSequence = function (sequenceId, sequenceSize, sequenceName, givenName) { sequenceName = sequenceName || null; givenName = givenName || null; @@ -550,29 +807,16 @@ function Backbone() { return self.sequences; }; - this.addHomologousRegion = function (seqId1, seqId2, start1, end1, start2, end2) { - if (this.backbone[seqId1] === undefined) { - this.backbone[seqId1] = []; - } - - if (this.backbone[seqId1][seqId2] === undefined) { - this.backbone[seqId1][seqId2] = []; - } - - if (this.backbone[seqId2] === undefined) { - this.backbone[seqId2] = []; - } - - if (this.backbone[seqId2][seqId1] === undefined) { - this.backbone[seqId2][seqId1] = []; + this.addHomologousRegion = function (seqId, start1, end1, start2, end2) { + if (this.backbone[seqId] === undefined) { + this.backbone[seqId] = []; } - this.backbone[seqId1][seqId2].push(new HomologousRegion(start1, end1, start2, end2)); - this.backbone[seqId2][seqId1].push(new HomologousRegion(start2, end2, start1, end1)); + this.backbone[seqId].push(new HomologousRegion(start1, end1, start2, end2)); }; - this.retrieveHomologousRegions = function (seqId1, seqId2) { + this.retrieveHomologousRegions = function (seqId) { try { - var homologousRegions = this.backbone[seqId1][seqId2]; + var homologousRegions = this.backbone[seqId]; } catch (e) { return []; } diff --git a/web/dist/js/islandcompare-rest-urls.js b/web/dist/js/islandcompare-rest-urls.js index 1b66ae35..4a410f30 100644 --- a/web/dist/js/islandcompare-rest-urls.js +++ b/web/dist/js/islandcompare-rest-urls.js @@ -4,8 +4,12 @@ var authUrl = rootUrl + "accounts/auth-token/"; var registerUrl = rootUrl + "accounts/register/"; var genomesUrl = rootUrl + "genomes/"; var genomesUploadUrl = rootUrl + "genomes/upload/"; +var genomesUpdateUrl = rootUrl + "genomes/details/"; var genomesGenesUrl = rootUrl + "genomes/genes/"; var analysisUrl = rootUrl + "analysis/"; var analysisRunUrl = rootUrl + "analysis/run/"; var analysisResultsUrl = rootUrl + "analysis/results/"; -var analysisExportUrl = rootUrl + "analysis/export/"; \ No newline at end of file +var analysisExportUrl = rootUrl + "analysis/export/"; +var analysisGenesExportUrl = rootUrl + "analysis/export-genes/"; +var analysisDeleteUrl = rootUrl + "analysis/delete/"; +var analysisUpdateUrl = rootUrl + "analysis/details/"; \ No newline at end of file diff --git a/web/pages/cluster.html b/web/pages/cluster.html new file mode 100644 index 00000000..b4edf5f1 --- /dev/null +++ b/web/pages/cluster.html @@ -0,0 +1,159 @@ + + + + + + IslandCompare Cluster + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + +
+ +
+ +
+

+ +
+
+
+
+
+ Legend: +
+
+ + + + + + + AntiMicrobial Resistance Gene + + + AMR Gene + + + + Gene + + + + tRNA + + + + rRNA +
+
+
+
+
+
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/web/pages/history.html b/web/pages/history.html index 57348845..fccbc695 100644 --- a/web/pages/history.html +++ b/web/pages/history.html @@ -24,8 +24,9 @@

Job History

- + + @@ -33,6 +34,8 @@

Job History

+ + @@ -64,7 +67,7 @@

Job History

); } - function export_job(jobid) { + function export_gis(jobid) { $.ajax({ url: analysisExportUrl + jobid, async: false, @@ -87,6 +90,93 @@

Job History

}); } + function export_genes(jobid) { + $.ajax({ + url: analysisGenesExportUrl + jobid, + async: false, + beforeSend: function(request) { + request.setRequestHeader("Authorization", 'Token'+' '+getAuthenticationCookie()); + }, + success: function(data) { + // Construct the element + var link = document.createElement("a"); + link.download = "IslandCompareGenes-" + jobid + ".csv"; + // Construct the uri + var uri = 'data:text/csv;charset=utf-8;base64,' + btoa(data); + link.href = uri; + document.body.appendChild(link); + link.click(); + // Cleanup the DOM + document.body.removeChild(link); + delete link; + } + }); + } + + function rename_job(jobid) { + //var new_name = prompt("Please enter new name"); + BootstrapDialog.confirm({ + title: 'Please enter new name', + message: $(""), + callback: function(result) { + if (result) { + var new_name = $("#new_name").val(); + $.ajax({ + type: "PATCH", + url: analysisUpdateUrl + jobid, + data: {name: new_name}, + beforeSend: function (request) { + request.setRequestHeader("Authorization", 'Token'+' '+getAuthenticationCookie()); + }, + success: function() { + var jobtable = $("#job-history").DataTable(); + var row_values = jobtable.row('#'+jobid).data(); + row_values.name = new_name; + jobtable.row('#'+jobid).data(row_values).draw(paging=false); + }, + error: function(message) { + BootstrapDialog.warning(message.responseText); + } + }); + } + } + }); + + } + + function delete_job(jobid) { + BootstrapDialog.show({ + title: 'Warning', + message: 'Are you sure you want to delete analysis ' + jobid + '?', + type: BootstrapDialog.TYPE_WARNING, + buttons: [{ + label: 'Delete', + action: function(dialog) { + $.ajax({ + type: "DELETE", + url: analysisDeleteUrl + jobid, + beforeSend: function (request) { + request.setRequestHeader("Authorization", 'Token'+' '+getAuthenticationCookie()); + }, + success: function(data){ + var jobtable = $("#job-history").DataTable(); + jobtable.row('#' + jobid).remove().draw(paging=false); + }, + error: function(message) { + BootstrapDialog.warning(message.responseText); + } + }); + dialog.close(); + } + },{ + label: 'Cancel', + action: function(dialog) { + dialog.close(); + } + }] + }); + } + $(document).ready(function () { jobtable = $("#job-history").DataTable({ ajax: { @@ -99,23 +189,43 @@

Job History

columns: [ { data: 'id'}, { data: 'name' }, + { data: 'analysiscomponent_set.rgi.status'}, { data: 'analysiscomponent_set.parsnp.status', defaultContent: "USER DEFINED TREE"}, { data: 'analysiscomponent_set.mauve.status'}, { data: 'analysiscomponent_set.sigi.status', defaultContent: "USER DEFINED GIS"}, - { data: 'analysiscomponent_set.islandpath.status', defaultContent: "USER DEFINED GIS"}, - { data: 'analysiscomponent_set.mash_mcl.status', defaultContent: "USER DEFINED GIS"} + { data: 'analysiscomponent_set.islandpath.status', defaultContent: "USER DEFINED GIS"} ], aoColumnDefs: [ { "aTargets": [7], "mData": null, + "mRender": function (data, type, full) { + if ( typeof full['analysiscomponent_set']['mash_mcl'] !== 'undefined') { + if (full['analysiscomponent_set']['mash_mcl']['status'] === "PENDING") { + if (full['analysiscomponent_set']['sigi']['status'] === "FAILURE" && + full['analysiscomponent_set']['islandpath']['status'] === "FAILURE") { + return "SKIPPED" + } + return "PENDING" + } else { + return full['analysiscomponent_set']['mash_mcl']['status'] + } + } else { + return "USER DEFINED CLUSTERS" + } + } + }, + { + "aTargets": [8], + "mData": null, "mRender": function (data, type, full) { var analysis_id = full['id']; var end_status = null; + var rgi_status = null; var parsnp_status = null; var mauve_status = null; var sigi_status = null; - var island_path_status = null; + var islandpath_status = null; if( typeof full['analysiscomponent_set']['end_pipeline'] !== 'undefined' ) { end_status = full['analysiscomponent_set']['end_pipeline']['status']; @@ -124,6 +234,13 @@

Job History

end_status = "UNKNOWN" } + if( typeof full['analysiscomponent_set']['rgi'] !== 'undefined' ) { + rgi_status = full['analysiscomponent_set']['rgi']['status']; + } + else{ + rgi_status = "UNKNOWN" + } + if( typeof full['analysiscomponent_set']['parsnp'] !== 'undefined' ) { parsnp_status = full['analysiscomponent_set']['parsnp']['status']; } @@ -147,18 +264,16 @@

Job History

else{ sigi_status = "UNKNOWN" } - - if( typeof full['analysiscomponent_set']['island_path'] !== 'undefined' ) { - island_path_status = full['analysiscomponent_set']['island_path']['status']; + if( typeof full['analysiscomponent_set']['islandpath'] !== 'undefined' ) { + islandpath_status = full['analysiscomponent_set']['islandpath']['status']; } else{ - island_path_status = "UNKNOWN" + islandpath_status = "UNKNOWN" } - if (parsnp_status === "FAILURE" || - mauve_status === "FAILURE" || - sigi_status === "FAILURE" || - island_path_status === "FAILURE") { + if (rgi_status === "FAILURE" || + parsnp_status === "FAILURE" || + mauve_status === "FAILURE") { return ''; } else if (end_status === "SUCCESS") { @@ -170,26 +285,80 @@

Job History

} }, { - "aTargets": [8], + "aTargets": [9], "mData": null, "mRender": function (data, type, full) { var analysis_id = full['id']; + var sigi_status = null; + var islandpath_status = null; + var mash_mcl_status = true; + var end_status = null; + if( typeof full['analysiscomponent_set']['sigi'] !== 'undefined' ) { + sigi_status = full['analysiscomponent_set']['sigi']['status']; + } + else{ + sigi_status = "UNKNOWN" + } + if( typeof full['analysiscomponent_set']['islandpath'] !== 'undefined' ) { + islandpath_status = full['analysiscomponent_set']['islandpath']['status']; + } + else{ + islandpath_status = "UNKNOWN" + } + if ( typeof full['analysiscomponent_set']['mash_mcl'] === 'undefined') { + mash_mcl_status = false; + } if( typeof full['analysiscomponent_set']['end_pipeline'] !== 'undefined' ) { end_status = full['analysiscomponent_set']['end_pipeline']['status']; } else{ end_status = "UNKNOWN" } - if (end_status === "FAILURE") { + if (end_status === "FAILURE" || (sigi_status === "FAILURE" && islandpath_status === "FAILURE")) { return '
Analysis IDID Analysis NameRGI Status Parsnp Status Mauve Status SigiHMM Status Mash-MCL Status Results ExportRenameDelete