From 12b197e4021935261f720304c1c5712c751b11ed Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Mon, 12 Feb 2018 14:36:23 -0500 Subject: [PATCH 001/122] START: added test for new ectyper call --- app/tests/test_modules.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/app/tests/test_modules.py b/app/tests/test_modules.py index bb92ecac..63466e49 100644 --- a/app/tests/test_modules.py +++ b/app/tests/test_modules.py @@ -3,6 +3,7 @@ import pytest import os +import subprocess import cPickle as pickle from modules.qc.qc import qc, check_header_parsing, check_ecoli @@ -57,7 +58,10 @@ def test_qc(): for non_ecoli_genome in GENOMES_LIST_NOT_ECOLI: assert qc(non_ecoli_genome) == False -def test_ectyper(): +def test_ectyper_vf(): + """Check the ECTyper from `superphy` which is used for virulance factor + identification. Installed as a submodule in the `modules` directory. + """ for ecoli_genome in GENOMES_LIST_ECOLI: # basic ECTyper check single_dict = dict(ARGS_DICT) @@ -70,6 +74,14 @@ def test_ectyper(): json_return = beautify(single_dict, pickled_ectyper_dict) assert type(json_return) == list +def test_ectyper_serotype(): + """Check the ECTyper from `master` which only performs serotyping. + Installed in the conda environment. + """ + for ecoli_genome in GENOMES_LIST_ECOLI: + ret_code = subprocess.call(['ectyper', '-i', ecoli_genome]) + assert ret_code == 0 + def test_amr(): ecoli_genome = GENOMES_LIST_ECOLI[0] # this generates the .tsv From 4635e6e9c1d798bfc13ded12208ec6f58274439d Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Mon, 12 Feb 2018 22:46:38 -0500 Subject: [PATCH 002/122] ADD: new calls + tests for them --- app/modules/ectyper/call_ectyper.py | 64 ++++++++++++++++++++++------- app/modules/spfy.py | 52 ++++++++++++++++++----- app/tests/test_modules.py | 13 +++++- 3 files changed, 101 insertions(+), 28 deletions(-) diff --git a/app/modules/ectyper/call_ectyper.py b/app/modules/ectyper/call_ectyper.py index 91f11f23..73273e2c 100644 --- a/app/modules/ectyper/call_ectyper.py +++ b/app/modules/ectyper/call_ectyper.py @@ -4,6 +4,7 @@ import subprocess import cPickle as pickle import tempfile +import pandas as pd from ast import literal_eval from os.path import basename from modules.loggingFunctions import initialize_logging @@ -11,23 +12,21 @@ log_file = initialize_logging() log = logging.getLogger(__name__) -def call_ectyper(args_dict): - # i don't intend to import anything from ECTyper (there are a lot of - # imports in it - not sure if we'll use them all) - # concurrency is handled at the batch level, not here (note: this might change) - # we only use ectyper for serotyping and vf, amr is handled by rgi directly - +def call_ectyper_vf(args_dict): + """ Use the old version of ECTyper at `superphy` for VF. + """ + # Init return. p = 'no pickle' - - if args_dict['options']['serotype'] or args_dict['options']['vf']: - #hack to allow ectyper to run in docker + if args_dict['options']['vf']: + # Workaround to allow ECTYPER to run in Docker. filepath=(args_dict['i']) wrapper_dir = os.path.dirname(os.path.abspath(__file__)) - # this temp file path is req for ectyper + # This temp file path is required for ectyper. temp = tempfile.NamedTemporaryFile() + # Copy the actual genome file into the tempfile. shutil.copyfile(args_dict['i'], temp.name) - # create a copy of args_dict so we don't modify it from calling functions + # Create a copy of args_dict and update with the tempfile. args_dict = dict(args_dict) args_dict['i']= temp.name log.debug(temp.name) @@ -42,14 +41,15 @@ def call_ectyper(args_dict): int(args_dict['options']['vf'])), '-pi', str(args_dict['pi']) ]) - # removing that temp file we created + # Removing that temp file we created. temp.close() - # because we are using check_output, this catches any print messages from tools_controller + # Because we are using check_output, this catches any print messages + # from tools_controller. # TODO: switch to pipes if 'error' in ectyper_dict.lower(): log.fatal('ECTper failed for' + args_dict['i']) - raise Exception('ECTper failed for' + filepath) + raise Exception('ECTyper VF failed for' + filepath) ectyper_dict = literal_eval(ectyper_dict) @@ -57,7 +57,41 @@ def call_ectyper(args_dict): # we are calling tools_controller on only one file, so grab that dict key, ectyper_dict = ectyper_dict.popitem() - p = os.path.join(filepath + '_ectyper.p') + p = os.path.join(filepath + '_ectyper_vf.p') pickle.dump(ectyper_dict,open(p,'wb')) return p + +def call_ectyper_serotype(args_dict): + """Use the new version of ECTyper at `master` for serotyping. + """ + genome_file = args_dict['i'] + pi = args_dict['options']['pi'] + pl = '50' # This is the default in ECTyper. + output_dir = tempfile.mkdtemp() + ret_code = subprocess.call([ + "ectyper", + "-i", + genome_file, + "-pi", + pi, + "-pl", + pl, + "-o", + output_dir + ]) + if ret_code == 0: + output_file = os.path.join(output_dir, 'output.csv') + df = pd.read_csv(output_file) + # Add the PI to our DataFrame. + df['pi'] = pi + # Add the PL to our DataFrame. + df['pl'] = pl + # The final result file from ECTyper serotyping. This copies it back to + # config.DATASTORE + p = os.path.join(genome_file + '_ectyper_serotype.p') + with open(p, 'w') as fh: + df.to_csv(fh, header=True, index_label='genome') + return p + else: + raise Exception('ECTyper Serotyping failed for' + genome_file) diff --git a/app/modules/spfy.py b/app/modules/spfy.py index 7d4754e3..3cc679c4 100644 --- a/app/modules/spfy.py +++ b/app/modules/spfy.py @@ -72,18 +72,48 @@ def blob_savvy_enqueue(single_dict): # ECTYPER PIPELINE def ectyper_pipeline(singles, multiples): - # the ectyper call is special in that it requires the entire arguments - # to decide whether to carry the serotype option flag, virulance - # factors option flag, and percent identity field - job_ectyper = singles.enqueue( - call_ectyper, single_dict, depends_on=job_id) - # after this call, the result is stored in Blazegraph - # new to 4.3.3 - # if bulk uploading is set, we return the datastruct as the end task - # to poll for job completion, therefore must set ttl of -1 + """The ectyper call is special in that it requires the entire arguments + to decide whether to carry the serotype option flag, virulance + factors option flag, and percent identity field. We use the old ECTyper + for VF and the new ECTyper for Serotyping. + """ + if single_dict['options']['vf']: + # Create a copy of the arguments dictionary and disable Serotype. + # This copy is passed to the old ECTyper. + single_dict_vf = dict(single_dict) + single_dict_vf['options']['serotype'] = False + # Enqueue the old ECTyper + job_ectyper_vf = singles.enqueue( + call_ectyper_vf, + single_dict_vf, + depends_on=job_id) + if single_dict['options']['serotype']: + # Enqueue the new ECTyper + job_ectyper_serotype = multiples.enqueue( + call_ectyper_serotype, + single_dict, + depends_on=job_id) + + # datastruct_savvy() stores result to Blazegraph. if single_dict['options']['bulk']: - job_ectyper_datastruct = multiples.enqueue( - datastruct_savvy, query_file, query_file + '_id.txt', query_file + '_ectyper.p', depends_on=job_ectyper, result_ttl=-1) + # If bulk uploading is set, we return the datastruct as the end task + # to poll for job completion, therefore must set ttl of -1. + if single_dict['options']['vf']: + job_ectyper_datastruct = multiples.enqueue( + datastruct_savvy, + query_file, + query_file + '_id.txt', + query_file + '_ectyper_vf.p', + depends_on=job_ectyper, + result_ttl=-1) + if single_dict['options']['serotype']: + job_ectyper_datastruct = multiples.enqueue( + datastruct_savvy, + query_file, + query_file + '_id.txt', + query_file + '_ectyper_serotype.p', + depends_on=job_ectyper, + result_ttl=-1) else: job_ectyper_datastruct = multiples.enqueue( datastruct_savvy, query_file, query_file + '_id.txt', query_file + '_ectyper.p', depends_on=job_ectyper) diff --git a/app/tests/test_modules.py b/app/tests/test_modules.py index 63466e49..53da8277 100644 --- a/app/tests/test_modules.py +++ b/app/tests/test_modules.py @@ -5,10 +5,11 @@ import os import subprocess import cPickle as pickle +import pandas as pd from modules.qc.qc import qc, check_header_parsing, check_ecoli from modules.blazeUploader.reserve_id import write_reserve_id -from modules.ectyper.call_ectyper import call_ectyper +from modules.ectyper.call_ectyper import call_ectyper_vf, call_ectyper_serotype from modules.amr.amr import amr from modules.amr.amr_to_dict import amr_to_dict from modules.beautify.beautify import beautify @@ -66,7 +67,7 @@ def test_ectyper_vf(): # basic ECTyper check single_dict = dict(ARGS_DICT) single_dict.update({'i':ecoli_genome}) - pickled_ectyper_dict = call_ectyper(single_dict) + pickled_ectyper_dict = call_ectyper_vf(single_dict) ectyper_dict = pickle.load(open(pickled_ectyper_dict,'rb')) assert type(ectyper_dict) == dict @@ -79,9 +80,17 @@ def test_ectyper_serotype(): Installed in the conda environment. """ for ecoli_genome in GENOMES_LIST_ECOLI: + # Check that the conda env can run ectyper. ret_code = subprocess.call(['ectyper', '-i', ecoli_genome]) assert ret_code == 0 + # Check the actual call from Spfy's code. + single_dict = dict(ARGS_DICT) + single_dict.update({'i':ecoli_genome}) + pickled_serotype_df = call_ectyper_serotype(single_dict) + ectyper_serotype_df = pickle.load(open(pickled_serotype_df,'rb')) + assert isinstance(ectyper_serotype_df, pd.DataFrame) + def test_amr(): ecoli_genome = GENOMES_LIST_ECOLI[0] # this generates the .tsv From 48ca4b94631cc86fb100366f26c02b6057b3eaf8 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Mon, 12 Feb 2018 23:18:29 -0500 Subject: [PATCH 003/122] FIX: imports --- app/modules/pan_spfy.py | 1 - app/modules/savvy.py | 2 +- app/modules/spfy.py | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/app/modules/pan_spfy.py b/app/modules/pan_spfy.py index 7bfc4840..3af6caf0 100644 --- a/app/modules/pan_spfy.py +++ b/app/modules/pan_spfy.py @@ -17,7 +17,6 @@ from modules.qc.qc import qc from modules.blazeUploader.reserve_id import write_reserve_id -from modules.ectyper.call_ectyper import call_ectyper from modules.amr.amr import amr from modules.amr.amr_to_dict import amr_to_dict from modules.beautify.beautify import beautify diff --git a/app/modules/savvy.py b/app/modules/savvy.py index f5d9cc8a..de958cee 100755 --- a/app/modules/savvy.py +++ b/app/modules/savvy.py @@ -17,7 +17,7 @@ import json from modules.qc.qc import qc from modules.blazeUploader.reserve_id import write_reserve_id -from modules.ectyper.call_ectyper import call_ectyper +from modules.ectyper.call_ectyper import call_ectyper_vf, call_ectyper_serotype from modules.amr.amr import amr from modules.amr.amr_to_dict import amr_to_dict from modules.beautify.beautify import beautify diff --git a/app/modules/spfy.py b/app/modules/spfy.py index 3cc679c4..b04355ab 100644 --- a/app/modules/spfy.py +++ b/app/modules/spfy.py @@ -17,7 +17,7 @@ from modules.qc.qc import qc from modules.blazeUploader.reserve_id import write_reserve_id -from modules.ectyper.call_ectyper import call_ectyper +from modules.ectyper.call_ectyper import call_ectyper_vf, call_ectyper_serotype from modules.amr.amr import amr from modules.amr.amr_to_dict import amr_to_dict from modules.beautify.beautify import beautify From 725071ee13ae02369eeb5652cf380e9258fcda60 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Tue, 13 Feb 2018 12:58:18 -0500 Subject: [PATCH 004/122] FIX: calls --- app/modules/ectyper/call_ectyper.py | 2 +- app/modules/savvy.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/app/modules/ectyper/call_ectyper.py b/app/modules/ectyper/call_ectyper.py index 73273e2c..c2a1c07a 100644 --- a/app/modules/ectyper/call_ectyper.py +++ b/app/modules/ectyper/call_ectyper.py @@ -66,7 +66,7 @@ def call_ectyper_serotype(args_dict): """Use the new version of ECTyper at `master` for serotyping. """ genome_file = args_dict['i'] - pi = args_dict['options']['pi'] + pi = args_dict['pi'] pl = '50' # This is the default in ECTyper. output_dir = tempfile.mkdtemp() ret_code = subprocess.call([ diff --git a/app/modules/savvy.py b/app/modules/savvy.py index de958cee..355d9064 100755 --- a/app/modules/savvy.py +++ b/app/modules/savvy.py @@ -119,7 +119,7 @@ def write_json(json_r, analysis): reservation_ttl = write_graph(reservation_graph, 'reservation') # (3) ECTyper Step: - ectyper_p = call_ectyper(args_dict) + ectyper_p = call_ectyper_vf(args_dict) # call_ectyper_vf is the older ver. log.debug("Pickled ECTyper File: " + ectyper_p) # (4) ECTyper Beautify Step: From d02ef1d01c805f5550bcab0310b2a4f6308444f6 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Tue, 13 Feb 2018 16:06:58 -0500 Subject: [PATCH 005/122] FIX: more calls --- app/modules/ectyper/call_ectyper.py | 2 +- app/modules/savvy.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/app/modules/ectyper/call_ectyper.py b/app/modules/ectyper/call_ectyper.py index c2a1c07a..26c9070d 100644 --- a/app/modules/ectyper/call_ectyper.py +++ b/app/modules/ectyper/call_ectyper.py @@ -66,7 +66,7 @@ def call_ectyper_serotype(args_dict): """Use the new version of ECTyper at `master` for serotyping. """ genome_file = args_dict['i'] - pi = args_dict['pi'] + pi = str(args_dict['pi']) # Cast to str to execvp() in subprocess(). pl = '50' # This is the default in ECTyper. output_dir = tempfile.mkdtemp() ret_code = subprocess.call([ diff --git a/app/modules/savvy.py b/app/modules/savvy.py index 355d9064..f286b682 100755 --- a/app/modules/savvy.py +++ b/app/modules/savvy.py @@ -128,7 +128,7 @@ def write_json(json_r, analysis): ectyper_json = write_json(ectyper_beautify, 'ectyper') # (5) Graphing ECTyper Result: - ectyper_graph = generate_datastruct(query_file, query_file + '_id.txt', query_file + '_ectyper.p') + ectyper_graph = generate_datastruct(query_file, query_file + '_id.txt', ectyper_p) ectyper_ttl = write_graph(ectyper_graph, 'ectyper') log.debug('Graph Result for ECtyper: ' + ectyper_ttl) From b4d7c1253099d84fb852becd024b7f613af0d96d Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Tue, 13 Feb 2018 16:41:43 -0500 Subject: [PATCH 006/122] FIX: that should do it for the calls --- app/modules/ectyper/call_ectyper.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/modules/ectyper/call_ectyper.py b/app/modules/ectyper/call_ectyper.py index 26c9070d..3540f913 100644 --- a/app/modules/ectyper/call_ectyper.py +++ b/app/modules/ectyper/call_ectyper.py @@ -73,9 +73,9 @@ def call_ectyper_serotype(args_dict): "ectyper", "-i", genome_file, - "-pi", + "-d", # Percent Identity pi, - "-pl", + "-l", # Percent Length pl, "-o", output_dir From 7db9630a9f9f056b0be2fe554a4ded5a79cc6d03 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Tue, 13 Feb 2018 17:20:28 -0500 Subject: [PATCH 007/122] CHANGE: use csv as inbetween for ectyper --- app/modules/ectyper/call_ectyper.py | 6 +++--- app/tests/test_modules.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/app/modules/ectyper/call_ectyper.py b/app/modules/ectyper/call_ectyper.py index 3540f913..e6e981d6 100644 --- a/app/modules/ectyper/call_ectyper.py +++ b/app/modules/ectyper/call_ectyper.py @@ -89,9 +89,9 @@ def call_ectyper_serotype(args_dict): df['pl'] = pl # The final result file from ECTyper serotyping. This copies it back to # config.DATASTORE - p = os.path.join(genome_file + '_ectyper_serotype.p') - with open(p, 'w') as fh: + csv_file = os.path.join(genome_file + '_ectyper_serotype.csv') + with open(csv_file, 'w') as fh: df.to_csv(fh, header=True, index_label='genome') - return p + return csv_file else: raise Exception('ECTyper Serotyping failed for' + genome_file) diff --git a/app/tests/test_modules.py b/app/tests/test_modules.py index 53da8277..454d017d 100644 --- a/app/tests/test_modules.py +++ b/app/tests/test_modules.py @@ -87,8 +87,8 @@ def test_ectyper_serotype(): # Check the actual call from Spfy's code. single_dict = dict(ARGS_DICT) single_dict.update({'i':ecoli_genome}) - pickled_serotype_df = call_ectyper_serotype(single_dict) - ectyper_serotype_df = pickle.load(open(pickled_serotype_df,'rb')) + serotype_csv = call_ectyper_serotype(single_dict) + ectyper_serotype_df = pd.read_csv(serotype_csv) assert isinstance(ectyper_serotype_df, pd.DataFrame) def test_amr(): From 522641a2ca3f1f43fcadceb30067d32814a9f1af Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Tue, 13 Feb 2018 21:59:05 -0500 Subject: [PATCH 008/122] START: ctrl+f approach to refactoring --- app/batch_download_insert.py | 74 ------------------- app/batch_insert.py | 21 ------ .../blazegraph}/__init__.py | 0 .../blazegraph}/reserve_id.py | 4 +- .../blazegraph}/upload_graph.py | 0 app/{modules => middleware}/decorators.py | 0 .../display}/__init__.py | 0 .../display}/beautify.py | 4 +- .../display}/find_widest.py | 0 .../graphers}/__init__.py | 0 .../graphers}/datastruct_savvy.py | 6 +- .../graphers}/turtle_grapher.py | 6 +- .../graphers}/turtle_utils.py | 0 app/modules/PanPredic | 2 +- .../__init__.py | 0 .../fishers.py | 0 .../frontend_queries.py | 6 +- .../groupcomparisons.py | 8 +- .../handle_logical.py | 2 +- .../logical_queries.py | 8 +- .../sparql_utils.py | 0 .../spfyOntology.rdf | 0 app/modules/database/status_queries.py | 2 +- app/modules/gc.py | 2 +- app/modules/metadata/metadata.py | 6 +- app/modules/pan_spfy.py | 12 +-- app/modules/phylotyper/graph_refs.py | 4 +- app/modules/phylotyper/ontology.py | 6 +- app/modules/phylotyper/phylotyper.py | 4 +- app/modules/phylotyper/sequences.py | 4 +- app/modules/qc/qc.py | 2 +- app/modules/spfy.py | 10 +-- app/routes/ra_views.py | 4 +- app/scripts/generate_ontology.py | 8 +- app/{modules => scripts}/savvy.py | 14 ++-- app/tests/test_beautify.py | 2 +- app/tests/test_modules.py | 8 +- app/tests/test_savvy.py | 2 +- app/tests/test_turtle_utils.py | 4 +- docs/source/contributing.rst | 4 +- 40 files changed, 72 insertions(+), 167 deletions(-) delete mode 100644 app/batch_download_insert.py delete mode 100644 app/batch_insert.py rename app/{modules/beautify => middleware/blazegraph}/__init__.py (100%) rename app/{modules/blazeUploader => middleware/blazegraph}/reserve_id.py (97%) rename app/{modules/blazeUploader => middleware/blazegraph}/upload_graph.py (100%) rename app/{modules => middleware}/decorators.py (100%) rename app/{modules/blazeUploader => middleware/display}/__init__.py (100%) rename app/{modules/beautify => middleware/display}/beautify.py (97%) rename app/{modules/beautify => middleware/display}/find_widest.py (100%) rename app/{modules/groupComparisons => middleware/graphers}/__init__.py (100%) rename app/{modules/turtleGrapher => middleware/graphers}/datastruct_savvy.py (97%) rename app/{modules/turtleGrapher => middleware/graphers}/turtle_grapher.py (96%) rename app/{modules/turtleGrapher => middleware/graphers}/turtle_utils.py (100%) rename app/modules/{turtleGrapher => comparisons}/__init__.py (100%) rename app/modules/{groupComparisons => comparisons}/fishers.py (100%) rename app/modules/{groupComparisons => comparisons}/frontend_queries.py (94%) rename app/modules/{groupComparisons => comparisons}/groupcomparisons.py (96%) rename app/modules/{groupComparisons => comparisons}/handle_logical.py (96%) rename app/modules/{groupComparisons => comparisons}/logical_queries.py (94%) rename app/modules/{groupComparisons => comparisons}/sparql_utils.py (100%) rename app/modules/{groupComparisons => comparisons}/spfyOntology.rdf (100%) rename app/{modules => scripts}/savvy.py (93%) diff --git a/app/batch_download_insert.py b/app/batch_download_insert.py deleted file mode 100644 index 10439324..00000000 --- a/app/batch_download_insert.py +++ /dev/null @@ -1,74 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -#usage python batch_download_insert.py - -from Bio import SeqIO - -def download_to_insert(accession): - import subprocess - - r = from_nuccore(accession) - - if r is None: - print 'OH CRAP' - else: - subprocess.call(['./savvy.py', '-i', from_nuccore(accession)]) - - print 'woogle' - -def downloadFasta_to_insert(url): - import subprocess, os - - from time import sleep - - print 'working on ' + url - - i = 1 - - while i < 4: - try: - r = download_fasta(url) - i = 4 - except: - sleep(60 * i) #'linear backoff equation', for those of us too impatiant for the exponential kind - i += 1 - continue - - print 'done downloading, file at ' + r - - print 'now generating .ttl' - if r is None: - print 'OH CRAP' - else: - print 'calling subproces' - subprocess.call(['./savvy.py', '-i', r]) - print 'done generating turtle' - #os.remove(r) need to add way to check after process completes, for now added it to insert.py script - print 'woogle' - -if __name__ == "__main__": - from multiprocessing import Pool, cpu_count - - '''this is testing code using the .csv file - import pandas #this is the .csv parser - from _utils import from_nuccore - - metadata_table = pandas.read_csv('data/metadata_table.csv') - accessions = metadata_table['primary_dbxref'].apply(lambda s: s.strip().split(':')[1]) - - p = Pool(multiprocessing.cpu_count()) #you can use an int instead, just don't go crazy - #note: you may want to write out the fasta file, but I'm unsure whether it will improve performance as concurrency requires them all to be loaded into memory anyways - p.map(download_to_insert, accessions) - ''' - - #testing using the .txt file as source - from _utils import download_fasta - - with open('data/download_files.txt') as f: - lines = f.read().splitlines() - #p = Pool(cpu_count()) - p = Pool(2) - p.map(downloadFasta_to_insert, lines) - - print 'ALL DONE XD!!!!!!!!' diff --git a/app/batch_insert.py b/app/batch_insert.py deleted file mode 100644 index 4174a03b..00000000 --- a/app/batch_insert.py +++ /dev/null @@ -1,21 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- -import subprocess - -from multiprocessing import Pool, cpu_count -from os import listdir -from time import time - -def batch_call(filename): - subprocess.call(['./savvy.py', '-i', 'tmp/' + filename]) -if __name__ == "__main__": - - start = time() - print 'Starting batch insert at: ', start - - p = Pool(cpu_count()) - p.map(batch_call, listdir('tmp')) - - print '***ALL DONE***' - print 'Completed at: ', time() - print 'Elapsed: ', time() - s diff --git a/app/modules/beautify/__init__.py b/app/middleware/blazegraph/__init__.py similarity index 100% rename from app/modules/beautify/__init__.py rename to app/middleware/blazegraph/__init__.py diff --git a/app/modules/blazeUploader/reserve_id.py b/app/middleware/blazegraph/reserve_id.py similarity index 97% rename from app/modules/blazeUploader/reserve_id.py rename to app/middleware/blazegraph/reserve_id.py index eba6f98b..fdc27d21 100644 --- a/app/modules/blazeUploader/reserve_id.py +++ b/app/middleware/blazegraph/reserve_id.py @@ -1,8 +1,8 @@ import os import logging from datetime import datetime -from modules.turtleGrapher.turtle_utils import generate_hash, generate_uri as gu, link_uris -from modules.blazeUploader.upload_graph import upload_graph +from middleware.graphers.turtle_utils import generate_hash, generate_uri as gu, link_uris +from middleware.blazegraph.upload_graph import upload_graph from SPARQLWrapper import SPARQLWrapper, JSON from rdflib import Literal, Graph import config diff --git a/app/modules/blazeUploader/upload_graph.py b/app/middleware/blazegraph/upload_graph.py similarity index 100% rename from app/modules/blazeUploader/upload_graph.py rename to app/middleware/blazegraph/upload_graph.py diff --git a/app/modules/decorators.py b/app/middleware/decorators.py similarity index 100% rename from app/modules/decorators.py rename to app/middleware/decorators.py diff --git a/app/modules/blazeUploader/__init__.py b/app/middleware/display/__init__.py similarity index 100% rename from app/modules/blazeUploader/__init__.py rename to app/middleware/display/__init__.py diff --git a/app/modules/beautify/beautify.py b/app/middleware/display/beautify.py similarity index 97% rename from app/modules/beautify/beautify.py rename to app/middleware/display/beautify.py index c91ca566..51bb6254 100644 --- a/app/modules/beautify/beautify.py +++ b/app/middleware/display/beautify.py @@ -3,8 +3,8 @@ import cPickle as pickle from os.path import basename from modules.loggingFunctions import initialize_logging -from modules.beautify.find_widest import check_alleles -from modules.turtleGrapher.turtle_utils import actual_filename +from middleware.display.find_widest import check_alleles +from middleware.graphers.turtle_utils import actual_filename # logging log_file = initialize_logging() diff --git a/app/modules/beautify/find_widest.py b/app/middleware/display/find_widest.py similarity index 100% rename from app/modules/beautify/find_widest.py rename to app/middleware/display/find_widest.py diff --git a/app/modules/groupComparisons/__init__.py b/app/middleware/graphers/__init__.py similarity index 100% rename from app/modules/groupComparisons/__init__.py rename to app/middleware/graphers/__init__.py diff --git a/app/modules/turtleGrapher/datastruct_savvy.py b/app/middleware/graphers/datastruct_savvy.py similarity index 97% rename from app/modules/turtleGrapher/datastruct_savvy.py rename to app/middleware/graphers/datastruct_savvy.py index 7c72b567..ba374a2c 100644 --- a/app/modules/turtleGrapher/datastruct_savvy.py +++ b/app/middleware/graphers/datastruct_savvy.py @@ -1,8 +1,8 @@ import cPickle as pickle from rdflib import BNode, Literal, Graph -from modules.turtleGrapher.turtle_utils import generate_uri as gu, generate_hash, link_uris -from modules.turtleGrapher.turtle_grapher import generate_graph -from modules.blazeUploader.upload_graph import queue_upload +from middleware.graphers.turtle_utils import generate_uri as gu, generate_hash, link_uris +from middleware.graphers.turtle_grapher import generate_graph +from middleware.blazegraph.upload_graph import queue_upload from modules.PanPredic.pan_utils import contig_name_parse # working with Serotype, Antimicrobial Resistance, & Virulence Factor data # structures diff --git a/app/modules/turtleGrapher/turtle_grapher.py b/app/middleware/graphers/turtle_grapher.py similarity index 96% rename from app/modules/turtleGrapher/turtle_grapher.py rename to app/middleware/graphers/turtle_grapher.py index 9bcfd19a..e725a677 100644 --- a/app/modules/turtleGrapher/turtle_grapher.py +++ b/app/middleware/graphers/turtle_grapher.py @@ -5,9 +5,9 @@ # to data structure(rdf triple organization) of the modules you're dev'ing import config -from modules.turtleGrapher.turtle_utils import generate_hash, generate_uri as gu, link_uris -from modules.blazeUploader.upload_graph import queue_upload -from modules.turtleGrapher.turtle_utils import actual_filename +from middleware.graphers.turtle_utils import generate_hash, generate_uri as gu, link_uris +from middleware.blazegraph.upload_graph import queue_upload +from middleware.graphers.turtle_utils import actual_filename from rdflib import Namespace, Graph, Literal, plugin from Bio import SeqIO from os.path import basename diff --git a/app/modules/turtleGrapher/turtle_utils.py b/app/middleware/graphers/turtle_utils.py similarity index 100% rename from app/modules/turtleGrapher/turtle_utils.py rename to app/middleware/graphers/turtle_utils.py diff --git a/app/modules/PanPredic b/app/modules/PanPredic index 50e53502..59eb6025 160000 --- a/app/modules/PanPredic +++ b/app/modules/PanPredic @@ -1 +1 @@ -Subproject commit 50e53502289723406109cb7a504110f1753a56e6 +Subproject commit 59eb60253859abd61c7a1f44e0e7fd2ec9bf7e40 diff --git a/app/modules/turtleGrapher/__init__.py b/app/modules/comparisons/__init__.py similarity index 100% rename from app/modules/turtleGrapher/__init__.py rename to app/modules/comparisons/__init__.py diff --git a/app/modules/groupComparisons/fishers.py b/app/modules/comparisons/fishers.py similarity index 100% rename from app/modules/groupComparisons/fishers.py rename to app/modules/comparisons/fishers.py diff --git a/app/modules/groupComparisons/frontend_queries.py b/app/modules/comparisons/frontend_queries.py similarity index 94% rename from app/modules/groupComparisons/frontend_queries.py rename to app/modules/comparisons/frontend_queries.py index 919afe38..d9e1727e 100644 --- a/app/modules/groupComparisons/frontend_queries.py +++ b/app/modules/comparisons/frontend_queries.py @@ -6,9 +6,9 @@ from functools import wraps from SPARQLWrapper import SPARQLWrapper, JSON from modules.loggingFunctions import initialize_logging -from modules.turtleGrapher.turtle_utils import generate_uri as gu -from modules.groupComparisons.sparql_utils import generate_prefixes -from modules.decorators import toset, tolist, submit +from middleware.graphers.turtle_utils import generate_uri as gu +from modules.comparisons.sparql_utils import generate_prefixes +from middleware.decorators import toset, tolist, submit # logging log_file = initialize_logging() diff --git a/app/modules/groupComparisons/groupcomparisons.py b/app/modules/comparisons/groupcomparisons.py similarity index 96% rename from app/modules/groupComparisons/groupcomparisons.py rename to app/modules/comparisons/groupcomparisons.py index 68c8f8a0..a63b930d 100644 --- a/app/modules/groupComparisons/groupcomparisons.py +++ b/app/modules/comparisons/groupcomparisons.py @@ -1,9 +1,9 @@ import logging from modules.loggingFunctions import initialize_logging -from modules.groupComparisons.handle_logical import handle_logical -from modules.groupComparisons.logical_queries import query_targets -from modules.groupComparisons.fishers import fishers -from modules.decorators import tofromHumanReadable +from modules.comparisons.handle_logical import handle_logical +from modules.comparisons.logical_queries import query_targets +from modules.comparisons.fishers import fishers +from middleware.decorators import tofromHumanReadable # logging log_file = initialize_logging() diff --git a/app/modules/groupComparisons/handle_logical.py b/app/modules/comparisons/handle_logical.py similarity index 96% rename from app/modules/groupComparisons/handle_logical.py rename to app/modules/comparisons/handle_logical.py index 2c7ae7c5..8610f6d1 100644 --- a/app/modules/groupComparisons/handle_logical.py +++ b/app/modules/comparisons/handle_logical.py @@ -1,6 +1,6 @@ import logging from modules.loggingFunctions import initialize_logging -from modules.groupComparisons.logical_queries import resolve_spfyids, resolve_spfyids_negated +from modules.comparisons.logical_queries import resolve_spfyids, resolve_spfyids_negated # logging log_file = initialize_logging() diff --git a/app/modules/groupComparisons/logical_queries.py b/app/modules/comparisons/logical_queries.py similarity index 94% rename from app/modules/groupComparisons/logical_queries.py rename to app/modules/comparisons/logical_queries.py index 1de2be74..522ef5a2 100644 --- a/app/modules/groupComparisons/logical_queries.py +++ b/app/modules/comparisons/logical_queries.py @@ -3,10 +3,10 @@ import logging import time from modules.loggingFunctions import initialize_logging -from modules.turtleGrapher.turtle_utils import generate_uri as gu -from modules.groupComparisons.sparql_utils import generate_prefixes -from modules.decorators import toset, tolist, tostring, prefix, submit -from modules.groupComparisons.frontend_queries import is_group +from middleware.graphers.turtle_utils import generate_uri as gu +from modules.comparisons.sparql_utils import generate_prefixes +from middleware.decorators import toset, tolist, tostring, prefix, submit +from modules.comparisons.frontend_queries import is_group # logging log_file = initialize_logging() diff --git a/app/modules/groupComparisons/sparql_utils.py b/app/modules/comparisons/sparql_utils.py similarity index 100% rename from app/modules/groupComparisons/sparql_utils.py rename to app/modules/comparisons/sparql_utils.py diff --git a/app/modules/groupComparisons/spfyOntology.rdf b/app/modules/comparisons/spfyOntology.rdf similarity index 100% rename from app/modules/groupComparisons/spfyOntology.rdf rename to app/modules/comparisons/spfyOntology.rdf diff --git a/app/modules/database/status_queries.py b/app/modules/database/status_queries.py index 5ae82ace..82cde656 100644 --- a/app/modules/database/status_queries.py +++ b/app/modules/database/status_queries.py @@ -1,6 +1,6 @@ import logging from modules.loggingFunctions import initialize_logging -from modules.decorators import tojson, prefix, submit +from middleware.decorators import tojson, prefix, submit # logging log_file = initialize_logging() diff --git a/app/modules/gc.py b/app/modules/gc.py index eefbba56..4227a88e 100644 --- a/app/modules/gc.py +++ b/app/modules/gc.py @@ -2,7 +2,7 @@ import config import redis from rq import Queue -from modules.groupComparisons.groupcomparisons import groupcomparisons +from modules.comparisons.groupcomparisons import groupcomparisons from modules.loggingFunctions import initialize_logging # logging diff --git a/app/modules/metadata/metadata.py b/app/modules/metadata/metadata.py index 5f0c8d6e..585875f1 100644 --- a/app/modules/metadata/metadata.py +++ b/app/modules/metadata/metadata.py @@ -2,9 +2,9 @@ import pandas as pd from rdflib import Graph, Literal from werkzeug.utils import secure_filename -from modules.groupComparisons.logical_queries import resolve_spfyids -from modules.turtleGrapher.turtle_utils import generate_uri as gu -from modules.blazeUploader.upload_graph import upload_graph +from modules.comparisons.logical_queries import resolve_spfyids +from middleware.graphers.turtle_utils import generate_uri as gu +from middleware.blazegraph.upload_graph import upload_graph from modules.metadata.mappings import mapping d = {'Human': 'http://purl.bioontology.org/ontology/NCBITAXON/9606', diff --git a/app/modules/pan_spfy.py b/app/modules/pan_spfy.py index 3af6caf0..73389bcf 100644 --- a/app/modules/pan_spfy.py +++ b/app/modules/pan_spfy.py @@ -16,19 +16,19 @@ from rdflib import Graph from modules.qc.qc import qc -from modules.blazeUploader.reserve_id import write_reserve_id +from middleware.blazegraph.reserve_id import write_reserve_id from modules.amr.amr import amr from modules.amr.amr_to_dict import amr_to_dict -from modules.beautify.beautify import beautify -from modules.turtleGrapher.datastruct_savvy import datastruct_savvy, parse_gene_dict -from modules.turtleGrapher.turtle_grapher import turtle_grapher, generate_graph +from middleware.display.beautify import beautify +from middleware.graphers.datastruct_savvy import datastruct_savvy, parse_gene_dict +from middleware.graphers.turtle_grapher import turtle_grapher, generate_graph from modules.PanPredic.pan import pan -from modules.turtleGrapher.turtle_utils import generate_uri as gu +from middleware.graphers.turtle_utils import generate_uri as gu from modules.PanPredic.queries import get_single_region from datetime import datetime import ast import cPickle as pickle -from modules.blazeUploader import upload_graph +from middleware.blazegraph import upload_graph # the only ONE time for global variables # when naming queues, make sure you actually set a worker to listen to that queue diff --git a/app/modules/phylotyper/graph_refs.py b/app/modules/phylotyper/graph_refs.py index 786b6e9c..de8ee3c4 100644 --- a/app/modules/phylotyper/graph_refs.py +++ b/app/modules/phylotyper/graph_refs.py @@ -4,8 +4,8 @@ import requests from tempfile import NamedTemporaryFile from rdflib import Literal -from modules.turtleGrapher.turtle_grapher import generate_graph -from modules.turtleGrapher.turtle_utils import generate_uri as gu +from middleware.graphers.turtle_grapher import generate_graph +from middleware.graphers.turtle_utils import generate_uri as gu def get_ref_vfs(): # we use a tempfile.TemporaryFile to store the ref diff --git a/app/modules/phylotyper/ontology.py b/app/modules/phylotyper/ontology.py index 9ad3d24d..3fd71eb0 100644 --- a/app/modules/phylotyper/ontology.py +++ b/app/modules/phylotyper/ontology.py @@ -11,9 +11,9 @@ from rdflib import Graph, Literal, XSD from modules.phylotyper.exceptions import ValuesError, DatabaseError -from modules.turtleGrapher.turtle_utils import generate_uri as gu -from modules.decorators import submit, prefix, tojson -from modules.blazeUploader.upload_graph import upload_turtle, upload_graph +from middleware.graphers.turtle_utils import generate_uri as gu +from middleware.decorators import submit, prefix, tojson +from middleware.blazegraph.upload_graph import upload_turtle, upload_graph from modules.phylotyper.graph_refs import graph_refs diff --git a/app/modules/phylotyper/phylotyper.py b/app/modules/phylotyper/phylotyper.py index 0903686d..6c8af7fd 100644 --- a/app/modules/phylotyper/phylotyper.py +++ b/app/modules/phylotyper/phylotyper.py @@ -22,8 +22,8 @@ import config -from modules.turtleGrapher.turtle_utils import generate_uri as gu, fulluri_to_basename as u2b, normalize_rdfterm as normalize -from modules.blazeUploader.upload_graph import upload_graph +from middleware.graphers.turtle_utils import generate_uri as gu, fulluri_to_basename as u2b, normalize_rdfterm as normalize +from middleware.blazegraph.upload_graph import upload_graph from modules.phylotyper import ontology, exceptions from modules.phylotyper.sequences import MarkerSequences, phylotyper_query, genename_query diff --git a/app/modules/phylotyper/sequences.py b/app/modules/phylotyper/sequences.py index 58539d20..91e5fe76 100644 --- a/app/modules/phylotyper/sequences.py +++ b/app/modules/phylotyper/sequences.py @@ -6,8 +6,8 @@ """ -from modules.decorators import submit, prefix, tojson -from modules.turtleGrapher import turtle_utils +from middleware.decorators import submit, prefix, tojson +from middleware.graphers import turtle_utils @submit @prefix diff --git a/app/modules/qc/qc.py b/app/modules/qc/qc.py index fd404f3c..3286bb17 100755 --- a/app/modules/qc/qc.py +++ b/app/modules/qc/qc.py @@ -4,7 +4,7 @@ import subprocess import argparse import pandas as pd -from modules.turtleGrapher.turtle_grapher import generate_turtle_skeleton +from middleware.graphers.turtle_grapher import generate_turtle_skeleton def create_blast_db(query_file): ''' diff --git a/app/modules/spfy.py b/app/modules/spfy.py index b04355ab..8962b77e 100644 --- a/app/modules/spfy.py +++ b/app/modules/spfy.py @@ -16,14 +16,14 @@ from rdflib import Graph from modules.qc.qc import qc -from modules.blazeUploader.reserve_id import write_reserve_id +from middleware.blazegraph.reserve_id import write_reserve_id from modules.ectyper.call_ectyper import call_ectyper_vf, call_ectyper_serotype from modules.amr.amr import amr from modules.amr.amr_to_dict import amr_to_dict -from modules.beautify.beautify import beautify -from modules.turtleGrapher.datastruct_savvy import datastruct_savvy -from modules.turtleGrapher.turtle_grapher import turtle_grapher -from modules.turtleGrapher.turtle_utils import actual_filename +from middleware.display.beautify import beautify +from middleware.graphers.datastruct_savvy import datastruct_savvy +from middleware.graphers.turtle_grapher import turtle_grapher +from middleware.graphers.turtle_utils import actual_filename from modules.phylotyper import phylotyper from modules.loggingFunctions import initialize_logging diff --git a/app/routes/ra_views.py b/app/routes/ra_views.py index e2f44e64..e31fdece 100644 --- a/app/routes/ra_views.py +++ b/app/routes/ra_views.py @@ -1,7 +1,7 @@ from flask import Blueprint, request, jsonify, current_app -from modules.groupComparisons.frontend_queries import get_all_attribute_types, get_attribute_values, get_types +from modules.comparisons.frontend_queries import get_all_attribute_types, get_attribute_values, get_types from routes.file_utils import fix_uri -from modules.decorators import tofromHumanReadable +from middleware.decorators import tofromHumanReadable bp_ra_views = Blueprint('reactapp_views', __name__) diff --git a/app/scripts/generate_ontology.py b/app/scripts/generate_ontology.py index dcbfbb2f..30d9db25 100644 --- a/app/scripts/generate_ontology.py +++ b/app/scripts/generate_ontology.py @@ -1,10 +1,10 @@ # baseURI: https://www.github.com/superphy# from datetime import datetime from rdflib import Literal -from modules.turtleGrapher.turtle_grapher import generate_graph -from modules.turtleGrapher.turtle_utils import generate_uri as gu, link_uris -from modules.blazeUploader.reserve_id import reservation_triple -from modules.savvy import savvy +from middleware.graphers.turtle_grapher import generate_graph +from middleware.graphers.turtle_utils import generate_uri as gu, link_uris +from middleware.blazegraph.reserve_id import reservation_triple +from scripts.savvy import savvy def write_graph(graph): ''' diff --git a/app/modules/savvy.py b/app/scripts/savvy.py similarity index 93% rename from app/modules/savvy.py rename to app/scripts/savvy.py index f286b682..bf037cf2 100755 --- a/app/modules/savvy.py +++ b/app/scripts/savvy.py @@ -1,7 +1,7 @@ #!/usr/bin/env python2 # -*- coding: UTF-8 -*- -# use: python -m modules.savvy -i /home/kevin/Desktop/nonGenBankEcoli/ECI-2866_lcl.fasta +# use: python -m scripts.savvy -i /home/kevin/Desktop/nonGenBankEcoli/ECI-2866_lcl.fasta # S:erotype # A:ntimicrobial Resistance @@ -16,16 +16,16 @@ import shutil import json from modules.qc.qc import qc -from modules.blazeUploader.reserve_id import write_reserve_id +from middleware.blazegraph.reserve_id import write_reserve_id from modules.ectyper.call_ectyper import call_ectyper_vf, call_ectyper_serotype from modules.amr.amr import amr from modules.amr.amr_to_dict import amr_to_dict -from modules.beautify.beautify import beautify -from modules.turtleGrapher.datastruct_savvy import generate_datastruct -from modules.turtleGrapher.turtle_grapher import generate_turtle_skeleton -from modules.turtleGrapher.turtle_utils import generate_hash, generate_uri as gu +from middleware.display.beautify import beautify +from middleware.graphers.datastruct_savvy import generate_datastruct +from middleware.graphers.turtle_grapher import generate_turtle_skeleton +from middleware.graphers.turtle_utils import generate_hash, generate_uri as gu from modules.loggingFunctions import initialize_logging -from modules.blazeUploader.reserve_id import reservation_triple +from middleware.blazegraph.reserve_id import reservation_triple log_file = initialize_logging() log = logging.getLogger(__name__) diff --git a/app/tests/test_beautify.py b/app/tests/test_beautify.py index abe03570..ea7546f0 100644 --- a/app/tests/test_beautify.py +++ b/app/tests/test_beautify.py @@ -2,7 +2,7 @@ import pytest import cPickle as pickle import pandas as pd -from modules.beautify.beautify import beautify, json_return, has_failed +from middleware.display.beautify import beautify, json_return, has_failed from tests.constants import ARGS_DICT, BEAUTIFY_VF_SEROTYPE vf_serotype_gene_dict = os.path.join('tests/refs', 'GCA_000005845.2_ASM584v2_genomic.fna_ectyper-vf_serotype.p') diff --git a/app/tests/test_modules.py b/app/tests/test_modules.py index 454d017d..b5d2c006 100644 --- a/app/tests/test_modules.py +++ b/app/tests/test_modules.py @@ -8,13 +8,13 @@ import pandas as pd from modules.qc.qc import qc, check_header_parsing, check_ecoli -from modules.blazeUploader.reserve_id import write_reserve_id +from middleware.blazegraph.reserve_id import write_reserve_id from modules.ectyper.call_ectyper import call_ectyper_vf, call_ectyper_serotype from modules.amr.amr import amr from modules.amr.amr_to_dict import amr_to_dict -from modules.beautify.beautify import beautify -from modules.turtleGrapher.datastruct_savvy import datastruct_savvy -from modules.turtleGrapher.turtle_grapher import turtle_grapher +from middleware.display.beautify import beautify +from middleware.graphers.datastruct_savvy import datastruct_savvy +from middleware.graphers.turtle_grapher import turtle_grapher from tests.constants import ARGS_DICT diff --git a/app/tests/test_savvy.py b/app/tests/test_savvy.py index 4a89857d..fe507445 100644 --- a/app/tests/test_savvy.py +++ b/app/tests/test_savvy.py @@ -2,7 +2,7 @@ import shutil import pytest from hashlib import sha1 -from modules.savvy import mock_reserve_id, get_spfyid_file, savvy +from scripts.savvy import mock_reserve_id, get_spfyid_file, savvy from tests.constants import ARGS_DICT def sha1_hash(f): diff --git a/app/tests/test_turtle_utils.py b/app/tests/test_turtle_utils.py index 53934cea..725b5ae0 100644 --- a/app/tests/test_turtle_utils.py +++ b/app/tests/test_turtle_utils.py @@ -1,7 +1,7 @@ from hashlib import sha1 from rdflib import URIRef -from modules.turtleGrapher.turtle_utils import generate_uri as gu -from modules.turtleGrapher.turtle_utils import actual_filename +from middleware.graphers.turtle_utils import generate_uri as gu +from middleware.graphers.turtle_utils import actual_filename def test_generate_uri(): # test generate usage: diff --git a/docs/source/contributing.rst b/docs/source/contributing.rst index 047be560..d2e5ed9e 100644 --- a/docs/source/contributing.rst +++ b/docs/source/contributing.rst @@ -289,7 +289,7 @@ Directly Adding a New Module .. code-block:: python - from modules.blazeUploader.reserve_id import write_reserve_id + from middleware.blazegraph.reserve_id import write_reserve_id The top-most directory is used to build Docker Images and copies the contents of ``/app`` to run inside the containers. This is done as the apps (Flask, Reactapp) themselves don't need copies of the Dockerfiles, other apps, etc. @@ -471,7 +471,7 @@ If you're integrating your codebase with Spfy, add your code to a new directory import config import redis from rq import Queue - from modules.groupComparisons.groupcomparisons import groupcomparisons + from modules.comparisons.groupcomparisons import groupcomparisons from modules.loggingFunctions import initialize_logging # logging From 2306caebdb797b04627bceee609611f52900cda1 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Tue, 13 Feb 2018 22:32:58 -0500 Subject: [PATCH 009/122] DEBUG: `scripts` dir not picked up during tests? --- app/{scripts => modules}/savvy.py | 2 +- app/scripts/generate_ontology.py | 2 +- app/tests/test_savvy.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) rename app/{scripts => modules}/savvy.py (99%) diff --git a/app/scripts/savvy.py b/app/modules/savvy.py similarity index 99% rename from app/scripts/savvy.py rename to app/modules/savvy.py index bf037cf2..b354a9cf 100755 --- a/app/scripts/savvy.py +++ b/app/modules/savvy.py @@ -1,7 +1,7 @@ #!/usr/bin/env python2 # -*- coding: UTF-8 -*- -# use: python -m scripts.savvy -i /home/kevin/Desktop/nonGenBankEcoli/ECI-2866_lcl.fasta +# use: python -m modules.savvy -i /home/kevin/Desktop/nonGenBankEcoli/ECI-2866_lcl.fasta # S:erotype # A:ntimicrobial Resistance diff --git a/app/scripts/generate_ontology.py b/app/scripts/generate_ontology.py index 30d9db25..38312e47 100644 --- a/app/scripts/generate_ontology.py +++ b/app/scripts/generate_ontology.py @@ -4,7 +4,7 @@ from middleware.graphers.turtle_grapher import generate_graph from middleware.graphers.turtle_utils import generate_uri as gu, link_uris from middleware.blazegraph.reserve_id import reservation_triple -from scripts.savvy import savvy +from modules.savvy import savvy def write_graph(graph): ''' diff --git a/app/tests/test_savvy.py b/app/tests/test_savvy.py index fe507445..4a89857d 100644 --- a/app/tests/test_savvy.py +++ b/app/tests/test_savvy.py @@ -2,7 +2,7 @@ import shutil import pytest from hashlib import sha1 -from scripts.savvy import mock_reserve_id, get_spfyid_file, savvy +from modules.savvy import mock_reserve_id, get_spfyid_file, savvy from tests.constants import ARGS_DICT def sha1_hash(f): From e258414505a10a6bafa7479fa5e5e7394e1a2d38 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Tue, 13 Feb 2018 22:33:44 -0500 Subject: [PATCH 010/122] FIX: right the __init__.py --- app/scripts/__init__.py | 0 app/{modules => scripts}/savvy.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 app/scripts/__init__.py rename app/{modules => scripts}/savvy.py (100%) diff --git a/app/scripts/__init__.py b/app/scripts/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/app/modules/savvy.py b/app/scripts/savvy.py similarity index 100% rename from app/modules/savvy.py rename to app/scripts/savvy.py From 4d4963c05b364b71680d4dbdc1e2c26075bff9bf Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Tue, 13 Feb 2018 22:34:06 -0500 Subject: [PATCH 011/122] FIX: right the __init__.py --- app/scripts/generate_ontology.py | 2 +- app/scripts/savvy.py | 2 +- app/tests/test_savvy.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/app/scripts/generate_ontology.py b/app/scripts/generate_ontology.py index 38312e47..30d9db25 100644 --- a/app/scripts/generate_ontology.py +++ b/app/scripts/generate_ontology.py @@ -4,7 +4,7 @@ from middleware.graphers.turtle_grapher import generate_graph from middleware.graphers.turtle_utils import generate_uri as gu, link_uris from middleware.blazegraph.reserve_id import reservation_triple -from modules.savvy import savvy +from scripts.savvy import savvy def write_graph(graph): ''' diff --git a/app/scripts/savvy.py b/app/scripts/savvy.py index b354a9cf..bf037cf2 100755 --- a/app/scripts/savvy.py +++ b/app/scripts/savvy.py @@ -1,7 +1,7 @@ #!/usr/bin/env python2 # -*- coding: UTF-8 -*- -# use: python -m modules.savvy -i /home/kevin/Desktop/nonGenBankEcoli/ECI-2866_lcl.fasta +# use: python -m scripts.savvy -i /home/kevin/Desktop/nonGenBankEcoli/ECI-2866_lcl.fasta # S:erotype # A:ntimicrobial Resistance diff --git a/app/tests/test_savvy.py b/app/tests/test_savvy.py index 4a89857d..fe507445 100644 --- a/app/tests/test_savvy.py +++ b/app/tests/test_savvy.py @@ -2,7 +2,7 @@ import shutil import pytest from hashlib import sha1 -from modules.savvy import mock_reserve_id, get_spfyid_file, savvy +from scripts.savvy import mock_reserve_id, get_spfyid_file, savvy from tests.constants import ARGS_DICT def sha1_hash(f): From 56064372101a79ff9c31dd3191aa3883814edfae Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Wed, 14 Feb 2018 01:39:40 -0500 Subject: [PATCH 012/122] START: define a model for the frontend returns --- app/middleware/models.py | 16 ++++++++++++++++ app/tests/test_models.py | 17 +++++++++++++++++ 2 files changed, 33 insertions(+) create mode 100644 app/middleware/models.py create mode 100644 app/tests/test_models.py diff --git a/app/middleware/models.py b/app/middleware/models.py new file mode 100644 index 00000000..d17e1a16 --- /dev/null +++ b/app/middleware/models.py @@ -0,0 +1,16 @@ +from jsonmodels import models, fields + + +class SubtypingRow(models.Base): + analysis = fields.StringField(required=True) + contigid = fields.StringField(required=True) + filename = fields.StringField(required=True) + hitcutoff = fields.StringField(nullable=True) + hitname = fields.StringField(required=True) + hitorientation = fields.StringField(nullable=True) + hitstart = fields.StringField(nullable=True) + hitstop = fields.StringField(nullable=True) + + +class SubtypingResult(models.Base): + rows = fields.ListField([SubtypingRow], nullable=True) diff --git a/app/tests/test_models.py b/app/tests/test_models.py new file mode 100644 index 00000000..1566d1ee --- /dev/null +++ b/app/tests/test_models.py @@ -0,0 +1,17 @@ +from middleware.models import SubtypingRow, SubtypingResult +from tests.constants import BEAUTIFY_VF_SEROTYPE + +def test_models(): + subtyping_result = [ + SubtypingRow( + analysis=d['analysis'], + contigid=d['contigid'], + filename=d['filename'], + hitcutoff=str(d['hitcutoff']), + hitname=d['hitname'], + hitorientation=d['hitorientation'], + hitstart=str(d['hitstart']), + hitstop=str(d['hitstop']) + ) + for d in BEAUTIFY_VF_SEROTYPE] + subtyping_result.validate() From b94ede6602d0d3a948e2b3e1df00e398a79ad754 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Wed, 14 Feb 2018 02:11:49 -0500 Subject: [PATCH 013/122] FIX: convert to the main model before validate --- app/tests/test_models.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/app/tests/test_models.py b/app/tests/test_models.py index 1566d1ee..4911161e 100644 --- a/app/tests/test_models.py +++ b/app/tests/test_models.py @@ -2,7 +2,7 @@ from tests.constants import BEAUTIFY_VF_SEROTYPE def test_models(): - subtyping_result = [ + subtyping_list = [ SubtypingRow( analysis=d['analysis'], contigid=d['contigid'], @@ -14,4 +14,7 @@ def test_models(): hitstop=str(d['hitstop']) ) for d in BEAUTIFY_VF_SEROTYPE] + subtyping_result = SubtypingResult( + rows = subtyping_list + ) subtyping_result.validate() From 46c65e618196b26923aad78dae9ae4e3630cf0af Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Wed, 14 Feb 2018 11:46:51 -0500 Subject: [PATCH 014/122] CHANGE: have ectyper call convert result into our model --- app/middleware/modellers.py | 34 +++++++++++++++++++++++++++++ app/modules/ectyper/call_ectyper.py | 21 +++++++++--------- app/tests/test_models.py | 5 ++++- app/tests/test_modules.py | 7 +++--- 4 files changed, 52 insertions(+), 15 deletions(-) create mode 100644 app/middleware/modellers.py diff --git a/app/middleware/modellers.py b/app/middleware/modellers.py new file mode 100644 index 00000000..0512bd0a --- /dev/null +++ b/app/middleware/modellers.py @@ -0,0 +1,34 @@ +# We try to keep all model creation in this file so it's easier to reference. +import pandas as pd +from middleware.graphers.turtle_utils import actual_filename + + +def model_serotype(pi, pl, output_file): + """ + Creates a SubtypingResult model from ECTYper's serotyping output. + """ + # Read the vanilla output_file from ECTyper. + df = pd.read_csv(output_file) + + # TODO: incorporate the pl. + + # Loop. + subtyping_list = [ + SubtypingRow( + analysis='Serotype', + contigid='n/a', + filename=actual_filename(row['genome']), + hitcutoff=str(pi), + hitname="{0}:{1}".format(row['O_prediction'],row['H_prediction']), + hitorientation='n/a', + hitstart='n/a', + hitstop='n/a' + ) + for index, row in df.iterrows()] + + # SubtypingResult.row expects a list. + subtyping_result = SubtypingResult([ + subtyping_row + ]) + + return subtyping_result diff --git a/app/modules/ectyper/call_ectyper.py b/app/modules/ectyper/call_ectyper.py index e6e981d6..4cc52d6f 100644 --- a/app/modules/ectyper/call_ectyper.py +++ b/app/modules/ectyper/call_ectyper.py @@ -8,6 +8,7 @@ from ast import literal_eval from os.path import basename from modules.loggingFunctions import initialize_logging +from middleware.modellers import model_serotype log_file = initialize_logging() log = logging.getLogger(__name__) @@ -82,16 +83,14 @@ def call_ectyper_serotype(args_dict): ]) if ret_code == 0: output_file = os.path.join(output_dir, 'output.csv') - df = pd.read_csv(output_file) - # Add the PI to our DataFrame. - df['pi'] = pi - # Add the PL to our DataFrame. - df['pl'] = pl - # The final result file from ECTyper serotyping. This copies it back to - # config.DATASTORE - csv_file = os.path.join(genome_file + '_ectyper_serotype.csv') - with open(csv_file, 'w') as fh: - df.to_csv(fh, header=True, index_label='genome') - return csv_file + # Create a SubtypingResult model from the output. + subtyping_result = model_serotype( + pi=pi, + pl=pl, + output_file=output_file + ) + p = os.path.join(genome_file, '_ectyper_vf.p') + pickle.dump(subtyping_result,open(p,'wb')) + return p else: raise Exception('ECTyper Serotyping failed for' + genome_file) diff --git a/app/tests/test_models.py b/app/tests/test_models.py index 4911161e..526e784a 100644 --- a/app/tests/test_models.py +++ b/app/tests/test_models.py @@ -1,7 +1,10 @@ from middleware.models import SubtypingRow, SubtypingResult from tests.constants import BEAUTIFY_VF_SEROTYPE -def test_models(): +def test_subtyping_model_direct(): + """ + Use our dataset to directly create a subtyping results model and validate it. + """ subtyping_list = [ SubtypingRow( analysis=d['analysis'], diff --git a/app/tests/test_modules.py b/app/tests/test_modules.py index b5d2c006..71c133a4 100644 --- a/app/tests/test_modules.py +++ b/app/tests/test_modules.py @@ -87,9 +87,10 @@ def test_ectyper_serotype(): # Check the actual call from Spfy's code. single_dict = dict(ARGS_DICT) single_dict.update({'i':ecoli_genome}) - serotype_csv = call_ectyper_serotype(single_dict) - ectyper_serotype_df = pd.read_csv(serotype_csv) - assert isinstance(ectyper_serotype_df, pd.DataFrame) + pickled_serotype_model = call_ectyper_serotype(single_dict) + ectyper_serotype_model = pickle.load(open(pickled_serotype_model,'rb')) + # Validate (throws error if invalidate). + ectyper_serotype_model.validate() def test_amr(): ecoli_genome = GENOMES_LIST_ECOLI[0] From 4de78a216bae89c13079c75cbaf91ba89e1b010b Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Wed, 14 Feb 2018 12:06:03 -0500 Subject: [PATCH 015/122] FIX: list in list --- app/middleware/modellers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/app/middleware/modellers.py b/app/middleware/modellers.py index 0512bd0a..c22bedda 100644 --- a/app/middleware/modellers.py +++ b/app/middleware/modellers.py @@ -26,9 +26,9 @@ def model_serotype(pi, pl, output_file): ) for index, row in df.iterrows()] - # SubtypingResult.row expects a list. - subtyping_result = SubtypingResult([ + # Convert the list of rows into a SubtypingResult model. + subtyping_result = SubtypingResult( subtyping_row - ]) + ) return subtyping_result From d91bcc71f447335a28a1e53d779dc38778d2d337 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Wed, 14 Feb 2018 12:06:24 -0500 Subject: [PATCH 016/122] FIX: list in list --- app/middleware/modellers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/middleware/modellers.py b/app/middleware/modellers.py index c22bedda..18f3d31d 100644 --- a/app/middleware/modellers.py +++ b/app/middleware/modellers.py @@ -28,7 +28,7 @@ def model_serotype(pi, pl, output_file): # Convert the list of rows into a SubtypingResult model. subtyping_result = SubtypingResult( - subtyping_row + subtyping_list ) return subtyping_result From 49098e4b79615c0f70126e46a9b2886bcc35050f Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Wed, 14 Feb 2018 12:45:48 -0500 Subject: [PATCH 017/122] FIX: imports --- app/middleware/modellers.py | 1 + 1 file changed, 1 insertion(+) diff --git a/app/middleware/modellers.py b/app/middleware/modellers.py index 18f3d31d..2a0546ac 100644 --- a/app/middleware/modellers.py +++ b/app/middleware/modellers.py @@ -1,5 +1,6 @@ # We try to keep all model creation in this file so it's easier to reference. import pandas as pd +from middleware.models import SubtypingRow, SubtypingResult from middleware.graphers.turtle_utils import actual_filename From 3b51b706695c43721794d256a693a6d358325731 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Wed, 14 Feb 2018 13:20:39 -0500 Subject: [PATCH 018/122] FIX: model creation --- .travis.yml | 1 + app/middleware/modellers.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index b135076c..982e5bce 100644 --- a/.travis.yml +++ b/.travis.yml @@ -12,6 +12,7 @@ before_install: - docker build -t superphy/backend-rq-blazegraph:2.0.0 -f Dockerfile-rq-blazegraph . - docker-compose up -d - docker ps -a + - docker-compose logs webserver - ls #### miniconda install: # We do this conditionally because it saves us some downloading if the diff --git a/app/middleware/modellers.py b/app/middleware/modellers.py index 2a0546ac..7fed795c 100644 --- a/app/middleware/modellers.py +++ b/app/middleware/modellers.py @@ -29,7 +29,7 @@ def model_serotype(pi, pl, output_file): # Convert the list of rows into a SubtypingResult model. subtyping_result = SubtypingResult( - subtyping_list + rows = subtyping_list ) return subtyping_result From 6fa042fbca37d4ffdfbee5cef363e990a712ba4f Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Wed, 14 Feb 2018 13:58:45 -0500 Subject: [PATCH 019/122] FIX: pickling --- app/modules/ectyper/call_ectyper.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/app/modules/ectyper/call_ectyper.py b/app/modules/ectyper/call_ectyper.py index 4cc52d6f..30838a5d 100644 --- a/app/modules/ectyper/call_ectyper.py +++ b/app/modules/ectyper/call_ectyper.py @@ -58,7 +58,8 @@ def call_ectyper_vf(args_dict): # we are calling tools_controller on only one file, so grab that dict key, ectyper_dict = ectyper_dict.popitem() - p = os.path.join(filepath + '_ectyper_vf.p') + # Path for the pickle dump. + p = filepath + '_ectyper_vf.p' pickle.dump(ectyper_dict,open(p,'wb')) return p @@ -89,7 +90,8 @@ def call_ectyper_serotype(args_dict): pl=pl, output_file=output_file ) - p = os.path.join(genome_file, '_ectyper_vf.p') + # Path for the pickle dump. + p = genome_file + '_ectyper_vf.p' pickle.dump(subtyping_result,open(p,'wb')) return p else: From a888c9030d8ceb60fa98858951bd9ea02336dd0b Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Wed, 14 Feb 2018 15:36:48 -0500 Subject: [PATCH 020/122] ADD: conversion to json + tests --- app/middleware/display/beautify.py | 22 ++++++++++++++++++++++ app/tests/test_modules.py | 7 ++++++- 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/app/middleware/display/beautify.py b/app/middleware/display/beautify.py index 51bb6254..7ff19427 100644 --- a/app/middleware/display/beautify.py +++ b/app/middleware/display/beautify.py @@ -5,11 +5,33 @@ from modules.loggingFunctions import initialize_logging from middleware.display.find_widest import check_alleles from middleware.graphers.turtle_utils import actual_filename +from middleware.models import SubtypingResult # logging log_file = initialize_logging() log = logging.getLogger(__name__) + +def _convert_subtyping(model): + # Convert the model to a generic JSON structure. + struct = model.to_struct() + # This is not strictly json; more like a list than a dict structure. + rows_list = struct['rows'] + return rows_list + +def model_to_json(model): + """ + Converts models to json for the front-end. + """ + # Validate the model submitted before processing. + model.validate() + # Conversion. + if isinstance(model, SubtypingResult): + return _convert_subtyping(model) + else: + raise Exception('model_to_json() called for a model without a handler.') + + def json_return(args_dict, gene_dict): """ This converts the gene dict into a json format for return to the front end diff --git a/app/tests/test_modules.py b/app/tests/test_modules.py index 71c133a4..e8ffde76 100644 --- a/app/tests/test_modules.py +++ b/app/tests/test_modules.py @@ -12,7 +12,7 @@ from modules.ectyper.call_ectyper import call_ectyper_vf, call_ectyper_serotype from modules.amr.amr import amr from modules.amr.amr_to_dict import amr_to_dict -from middleware.display.beautify import beautify +from middleware.display.beautify import beautify, model_to_json from middleware.graphers.datastruct_savvy import datastruct_savvy from middleware.graphers.turtle_grapher import turtle_grapher @@ -92,6 +92,11 @@ def test_ectyper_serotype(): # Validate (throws error if invalidate). ectyper_serotype_model.validate() + # Check the conversion for the front-end. + json_r = model_to_json(ectyper_serotype_model) + # This is not strictly json; more like a list than a dict structure. + assert isinstance(json_r, list) + def test_amr(): ecoli_genome = GENOMES_LIST_ECOLI[0] # this generates the .tsv From 2332f055e039957758d5a1697b00fc8ae28d9d03 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Wed, 14 Feb 2018 19:14:43 -0500 Subject: [PATCH 021/122] STOP: knitted everything together + started some restructing of how we do pipelines --- app/config.py | 2 + app/middleware/display/beautify.py | 33 +-- app/middleware/graphers/datastruct_savvy.py | 64 ++++-- app/modules/spfy.py | 241 +++++++++++++------- 4 files changed, 229 insertions(+), 111 deletions(-) diff --git a/app/config.py b/app/config.py index 6b33ee9e..0a7a98da 100644 --- a/app/config.py +++ b/app/config.py @@ -14,6 +14,8 @@ # enqueued function to complete before terminating it with and ERROR # If note specified, jobs must execute within 3 mins DEFAULT_TIMEOUT = 600 # in seconds (ie. 10 mins) +# Defines how long results are kept in Redis. 500 is the default for RQ. +DEFAULT_RESULT_TTL=500 PAN_TIMEOUT = 100000 # if BACKLOG_ENABLED = True, then all analyses modules will be run in the # in the background for every submitted file diff --git a/app/middleware/display/beautify.py b/app/middleware/display/beautify.py index 7ff19427..c93b3f69 100644 --- a/app/middleware/display/beautify.py +++ b/app/middleware/display/beautify.py @@ -130,7 +130,8 @@ def handle_failed(json_r, args_dict): ret.append(t) return ret -def beautify(args_dict, pickled_dictionary): +# TODO: convert this to models-only. +def beautify(args_dict=None, pickled_result): ''' Converts a given 'spit' datum (a dictionary with our results from rgi/ectyper) to a json form used by the frontend. This result is to be stored in Redis by the calling RQ Worker. :param args_dict: The arguments supplied by the user. In the case of spfy web-app, this is used to determine which analysis options were set. @@ -139,16 +140,22 @@ def beautify(args_dict, pickled_dictionary): :return: json representation of the results, as required by the front-end. ''' - gene_dict = pickle.load(open(pickled_dictionary, 'rb')) - # this converts our dictionary structure into json and adds metadata (filename, etc.) - json_r = json_return(args_dict, gene_dict) - log.debug('First parse into json_r: ' + str(json_r)) - # if looking for only serotype, skip this step - if args_dict['options']['vf'] or args_dict['options']['amr']: - json_r = check_alleles(json_r) - log.debug('After checking alleles json_r: ' + str(json_r)) - # check if there is an analysis module that has failed in the result - if has_failed(json_r): - return handle_failed(json_r, args_dict) + result = pickle.load(open(pickled_result, 'rb')) + if isinstance(result, dict): + gene_dict = result + # this converts our dictionary structure into json and adds metadata (filename, etc.) + json_r = json_return(args_dict, gene_dict) + log.debug('First parse into json_r: ' + str(json_r)) + # if looking for only serotype, skip this step + if args_dict['options']['vf'] or args_dict['options']['amr']: + json_r = check_alleles(json_r) + log.debug('After checking alleles json_r: ' + str(json_r)) + # check if there is an analysis module that has failed in the result + if has_failed(json_r): + return handle_failed(json_r, args_dict) + else: + return json_r + elif isinstance(result, SubtypingResult): + return model_to_json(result) else: - return json_r + raise Exception("beautify() could not handle pickled file: {0}.".format(pickled_result)) diff --git a/app/middleware/graphers/datastruct_savvy.py b/app/middleware/graphers/datastruct_savvy.py index ba374a2c..9d58bd90 100644 --- a/app/middleware/graphers/datastruct_savvy.py +++ b/app/middleware/graphers/datastruct_savvy.py @@ -4,9 +4,36 @@ from middleware.graphers.turtle_grapher import generate_graph from middleware.blazegraph.upload_graph import queue_upload from modules.PanPredic.pan_utils import contig_name_parse +from middleware.models import SubtypingResult # working with Serotype, Antimicrobial Resistance, & Virulence Factor data # structures +def _convert_subtyping(graph, model, uriIsolate): + # Convert the model to a graph. + struct = model.to_struct() + rows_list = struct['rows'] + for row in rows_list: + graph.add(( + uriIsolate, + gu('ge:0001076'), + Literal(row['O_prediction']) + )) + graph.add(( + uriIsolate, + gu('ge:0001077'), + Literal(serotyper_dict['H_prediction']) + )) + return graph + +def model_to_graph(graph, model, uriIsolate): + # Validate the model submitted before processing. + model.validate() + # Conversion. + if isinstance(model, SubtypingResult): + return _convert_subtyping(model) + else: + raise Exception('model_to_graph() called for a model without a handler.') + def parse_serotype(graph, serotyper_dict, uriIsolate): if 'O type' in serotyper_dict: graph.add((uriIsolate, gu('ge:0001076'), @@ -148,7 +175,7 @@ def parse_gene_dict(graph, gene_dict, uriGenome, geneType): def generate_datastruct(query_file, id_file, pickled_dictionary): ''' - This is simply to decouple the graph generation code from the + Separates the graph generation code from the upload code. In RQ backend, the datastruct_savvy() method is called where-as in savvy.py (without RQ or Blazegraph) only compute_datastruct() is called. The return type must be the same in datastruct_savvy to @@ -168,22 +195,25 @@ def generate_datastruct(query_file, id_file, pickled_dictionary): spfyid = int(l) uriIsolate = gu(':spfy' + str(spfyid)) - # results dict retrieval - results_dict = pickle.load(open(pickled_dictionary, 'rb')) - - # graphing functions - for key in results_dict.keys(): - if key == 'Serotype': - graph = parse_serotype(graph,results_dict['Serotype'],uriIsolate) - elif key == 'Virulence Factors': - graph = parse_gene_dict(graph, results_dict['Virulence Factors'], uriGenome, 'VirulenceFactor') - elif key == 'Antimicrobial Resistance': - graph = parse_gene_dict(graph, results_dict['Antimicrobial Resistance'], uriGenome, - 'AntimicrobialResistanceGene') - #elif key == 'PanGenomeRegion': - # graph = parse_gene_dict(graph, results_dict[key], uriGenome, key) - - return graph + # Unpickle. + results = pickle.load(open(pickled_dictionary, 'rb')) + # Check if we have a model or a dictionary. + if isinstance(results, dict): + # graphing functions + for key in results: + if key == 'Serotype': + graph = parse_serotype(graph,results['Serotype'],uriIsolate) + elif key == 'Virulence Factors': + graph = parse_gene_dict(graph, results['Virulence Factors'], uriGenome, 'VirulenceFactor') + elif key == 'Antimicrobial Resistance': + graph = parse_gene_dict(graph, results['Antimicrobial Resistance'], uriGenome, + 'AntimicrobialResistanceGene') + return graph + elif isinstance(results, SubtypingResult): + graph = model_to_graph(graph, results, uriIsolate) + return graph + else: + raise Exception("generate_datastruct() could not handle pickled file: {0}.".format(pickled_dictionary)) def datastruct_savvy(query_file, id_file, pickled_dictionary): """ diff --git a/app/modules/spfy.py b/app/modules/spfy.py index 8962b77e..33947c84 100644 --- a/app/modules/spfy.py +++ b/app/modules/spfy.py @@ -2,6 +2,7 @@ # -*- coding: UTF-8 -*- import os +import copy import redis import config @@ -20,7 +21,7 @@ from modules.ectyper.call_ectyper import call_ectyper_vf, call_ectyper_serotype from modules.amr.amr import amr from modules.amr.amr_to_dict import amr_to_dict -from middleware.display.beautify import beautify +from middleware.display.beautify import beautify, model_to_json from middleware.graphers.datastruct_savvy import datastruct_savvy from middleware.graphers.turtle_grapher import turtle_grapher from middleware.graphers.turtle_utils import actual_filename @@ -50,6 +51,101 @@ backlog_multiples_q = Queue( 'backlog_multiples', connection=redis_conn, default_timeout=config.DEFAULT_TIMEOUT) +def _ectyper_pipeline_vf(singles, multiples, query_file, single_dict): + """ + Enqueue all the jobs required for VF. + """ + # Dictionary of Job instances to return + d = {} + + # Create a copy of the arguments dictionary and disable Serotype. + # This copy is passed to the old ECTyper. + single_dict_vf = copy.deepcopy(single_dict) + single_dict_vf['options']['serotype'] = False + # Enqueue the old ECTyper + job_ectyper_vf = singles.enqueue( + call_ectyper_vf, + single_dict_vf, + depends_on=job_id) + d['job_ectyper_vf'] = job_ectyper_vf + + # If bulk uploading is set, we return the datastruct as the end task + # to poll for job completion, therefore must set ttl of -1. + if single_dict['options']['bulk']: + ttl_value = -1 + else: + ttl_value = config.DEFAULT_RESULT_TTL + + # datastruct_savvy() stores result to Blazegraph. + job_ectyper_datastruct_vf = multiples.enqueue( + datastruct_savvy, + query_file, + query_file + '_id.txt', + query_file + '_ectyper_vf.p', + depends_on=job_ectyper, + result_ttl=ttl_value) + d['job_ectyper_datastruct_vf'] = job_ectyper_datastruct_vf + + if not single_dict['options']['bulk']: + # Only bother parsing into json if user has requested either vf or + # serotype, and we're not in bulk uploading. + job_ectyper_beautify_vf = multiples.enqueue( + beautify, + single_dict, + query_file + '_ectyper.p', + depends_on=job_ectyper_vf, + result_ttl=-1 + ) + d['job_ectyper_beautify_vf'] = job_ectyper_beautify_vf + + return d + +def _ectyper_pipeline_serotype(singles, multiples, query_file, single_dict): + """ + Enqueue all the jobs required for VF. + """ + # Dictionary of Job instances to return + d = {} + + # Create a copy of the arguments dictionary and disable Serotype. + # This copy is passed to the old ECTyper. + single_dict_vf = copy.deepcopy(single_dict) + # Enqueue the new ECTyper + job_ectyper_serotype = multiples.enqueue( + call_ectyper_serotype, + single_dict, + depends_on=job_id) + d['job_ectyper_serotype'] = job_ectyper_serotype + + # If bulk uploading is set, we return the datastruct as the end task + # to poll for job completion, therefore must set ttl of -1. + if single_dict['options']['bulk']: + ttl_value = -1 + else: + ttl_value = config.DEFAULT_RESULT_TTL + + # datastruct_savvy() stores result to Blazegraph. + job_ectyper_datastruct_serotype = multiples.enqueue( + datastruct_savvy, + query_file, + query_file + '_id.txt', + query_file + '_ectyper_serotype.p', + depends_on=job_ectyper, + result_ttl=-1) + d['job_ectyper_serotype'] = job_ectyper_datastruct_serotype + + if not single_dict['options']['bulk']: + # Only bother parsing into json if user has requested either vf or + # serotype, and we're not in bulk uploading. + job_ectyper_beautify_serotype = multiples.enqueue( + beautify, + pickled_result = query_file + '_ectyper_serotype.p', + depends_on=job_ectyper_vf, + result_ttl=-1 + ) + d['job_ectyper_beautify_serotype'] = job_ectyper_beautify_serotype + + return d def blob_savvy_enqueue(single_dict): ''' @@ -70,77 +166,69 @@ def blob_savvy_enqueue(single_dict): job_id = blazegraph_q.enqueue( write_reserve_id, query_file, depends_on=job_qc, result_ttl=-1) - # ECTYPER PIPELINE - def ectyper_pipeline(singles, multiples): - """The ectyper call is special in that it requires the entire arguments - to decide whether to carry the serotype option flag, virulance - factors option flag, and percent identity field. We use the old ECTyper - for VF and the new ECTyper for Serotyping. - """ - if single_dict['options']['vf']: - # Create a copy of the arguments dictionary and disable Serotype. - # This copy is passed to the old ECTyper. - single_dict_vf = dict(single_dict) - single_dict_vf['options']['serotype'] = False - # Enqueue the old ECTyper - job_ectyper_vf = singles.enqueue( - call_ectyper_vf, - single_dict_vf, - depends_on=job_id) - if single_dict['options']['serotype']: - # Enqueue the new ECTyper - job_ectyper_serotype = multiples.enqueue( - call_ectyper_serotype, - single_dict, - depends_on=job_id) - - # datastruct_savvy() stores result to Blazegraph. + ## ECTyper (VF & Serotype) + # VF + if single_dict['options']['vf']: + ectyper_vf_jobs = _ectyper_pipeline_vf( + singles_q, + multiples_q, + query_file, + single_dict + ) if single_dict['options']['bulk']: - # If bulk uploading is set, we return the datastruct as the end task - # to poll for job completion, therefore must set ttl of -1. - if single_dict['options']['vf']: - job_ectyper_datastruct = multiples.enqueue( - datastruct_savvy, - query_file, - query_file + '_id.txt', - query_file + '_ectyper_vf.p', - depends_on=job_ectyper, - result_ttl=-1) - if single_dict['options']['serotype']: - job_ectyper_datastruct = multiples.enqueue( - datastruct_savvy, - query_file, - query_file + '_id.txt', - query_file + '_ectyper_serotype.p', - depends_on=job_ectyper, - result_ttl=-1) + ret_job_ectyper = ectyper_vf_jobs['job_ectyper_datastruct_vf'] + jobs[ret_job_ectyper.get_id()] = { + 'file': single_dict['i'], + 'analysis': 'Virulence Factors'} else: - job_ectyper_datastruct = multiples.enqueue( - datastruct_savvy, query_file, query_file + '_id.txt', query_file + '_ectyper.p', depends_on=job_ectyper) - d = {'job_ectyper': job_ectyper, - 'job_ectyper_datastruct': job_ectyper_datastruct} - # only bother parsing into json if user has requested either vf or - # serotype - if (single_dict['options']['vf'] or single_dict['options']['serotype']) and not single_dict['options']['bulk']: - job_ectyper_beautify = multiples.enqueue( - beautify, single_dict, query_file + '_ectyper.p', depends_on=job_ectyper, result_ttl=-1) - d.update({'job_ectyper_beautify': job_ectyper_beautify}) - return d - - # if user selected any ectyper-dependent options on the front-end - if single_dict['options']['vf'] or single_dict['options']['serotype']: - ectyper_jobs = ectyper_pipeline(singles_q, multiples_q) - job_ectyper = ectyper_jobs['job_ectyper'] - job_ectyper_datastruct = ectyper_jobs['job_ectyper_datastruct'] - if not single_dict['options']['bulk']: - job_ectyper_beautify = ectyper_jobs['job_ectyper_beautify'] - # or if the backlog queue is enabled + ret_job_ectyper = ectyper_vf_jobs['job_ectyper_beautify_vf'] + jobs[ret_job_ectyper.get_id()] = { + 'file': single_dict['i'], + 'analysis': 'Virulence Factors'} elif config.BACKLOG_ENABLED: - # we need to create a dict with these options enabled: - - # just enqueue the jobs, we don't care about returning them - ectyper_jobs = ectyper_pipeline(backlog_singles_q, backlog_multiples_q) - job_ectyper_datastruct = ectyper_jobs['job_ectyper_datastruct'] + # We need to create a dict with the options enabled. + backlog_d = copy.deepcopy(single_dict) + backlog_d['options']['vf'] = True + # Explictedly set serotype to false in case of overlap. + backlog_d['options']['serotype'] = False + # Note: we use different queues. + _ectyper_pipeline_vf( + backlog_singles_q, + backlog_multiples_q, + query_file, + backlog_d + ) + + # Serotype + if single_dict['options']['serotype']: + ectyper_serotype_jobs = _ectyper_pipeline_serotype( + singles_q, + multiples_q, + query_file, + single_dict + ) + if single_dict['options']['bulk']: + ret_job_ectyper = ectyper_serotype_jobs['job_ectyper_datastruct_serotype'] + jobs[ret_job_ectyper.get_id()] = { + 'file': single_dict['i'], + 'analysis': 'Serotype'} + else: + ret_job_ectyper = ectyper_serotype_jobs['job_ectyper_beautify_serotype'] + jobs[ret_job_ectyper.get_id()] = { + 'file': single_dict['i'], + 'analysis': 'Virulence Factors'} + elif config.BACKLOG_ENABLED: + # We need to create a dict with the options enabled. + backlog_d = copy.deepcopy(single_dict) + # Explictedly set vf to false in case of overlap. + backlog_d['options']['vf'] = False + backlog_d['options']['serotype'] = True + _ectyper_pipeline_serotype( + backlog_singles_q, + backlog_multiples_q, + query_file, + backlog_d + ) # END ECTYPER PIPELINE # AMR PIPELINE @@ -241,20 +329,11 @@ def phylotyper_pipeline(multiples, subtype): # to poll for completion of all jobs # these two ifs handle the case where amr (or vf or serotype) might not # be selected but bulk is - if (single_dict['options']['vf'] or single_dict['options']['serotype']): - ret_job_ectyper = job_ectyper_datastruct if single_dict['options']['amr']: ret_job_amr = job_amr_datastruct - # if bulk uploading isnt used, return the beautify result as the final task - if not single_dict['options']['bulk']: - if (single_dict['options']['vf'] or single_dict['options']['serotype']): - ret_job_ectyper = job_ectyper_beautify - if single_dict['options']['amr']: - ret_job_amr = job_amr_beautify - # add the jobs to the return - if (single_dict['options']['vf'] or single_dict['options']['serotype']): - jobs[ret_job_ectyper.get_id()] = {'file': single_dict[ - 'i'], 'analysis': 'Virulence Factors and Serotype'} + + # Add the jobs to the return. + # TODO: incorporate this into pipeline calls, as in the ECTYper pipeline. if single_dict['options']['amr']: jobs[ret_job_amr.get_id()] = {'file': single_dict[ 'i'], 'analysis': 'Antimicrobial Resistance'} From 5296c23e5698cdd769ea0537f774ff5aca91624b Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Wed, 14 Feb 2018 19:47:56 -0500 Subject: [PATCH 022/122] FIX: reverse order of beautify params --- app/middleware/display/beautify.py | 2 +- app/scripts/savvy.py | 4 ++-- app/tests/test_beautify.py | 6 +++--- app/tests/test_modules.py | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/app/middleware/display/beautify.py b/app/middleware/display/beautify.py index c93b3f69..b4980276 100644 --- a/app/middleware/display/beautify.py +++ b/app/middleware/display/beautify.py @@ -131,7 +131,7 @@ def handle_failed(json_r, args_dict): return ret # TODO: convert this to models-only. -def beautify(args_dict=None, pickled_result): +def beautify(pickled_result, args_dict=None): ''' Converts a given 'spit' datum (a dictionary with our results from rgi/ectyper) to a json form used by the frontend. This result is to be stored in Redis by the calling RQ Worker. :param args_dict: The arguments supplied by the user. In the case of spfy web-app, this is used to determine which analysis options were set. diff --git a/app/scripts/savvy.py b/app/scripts/savvy.py index bf037cf2..05aca0c9 100755 --- a/app/scripts/savvy.py +++ b/app/scripts/savvy.py @@ -123,7 +123,7 @@ def write_json(json_r, analysis): log.debug("Pickled ECTyper File: " + ectyper_p) # (4) ECTyper Beautify Step: - ectyper_beautify = beautify(args_dict, ectyper_p) + ectyper_beautify = beautify(ectyper_p, args_dict) log.debug('Beautified ECTyper Result: ' + str(ectyper_beautify)) ectyper_json = write_json(ectyper_beautify, 'ectyper') @@ -142,7 +142,7 @@ def write_json(json_r, analysis): log.debug("Pickled AMR Results File: " + amr_p) # (8) AMR Beautify Step: - amr_beautify = beautify(args_dict, amr_p) + amr_beautify = beautify(amr_p, args_dict) log.debug('Beautified AMR Result: ' + str(amr_beautify)) amr_json = write_json(amr_beautify, 'rgi') diff --git a/app/tests/test_beautify.py b/app/tests/test_beautify.py index ea7546f0..6b97a814 100644 --- a/app/tests/test_beautify.py +++ b/app/tests/test_beautify.py @@ -13,7 +13,7 @@ def test_beautify_vf_serotype(): ## test vf & serotype json return single_dict = dict(ARGS_DICT) single_dict.update({'i': vf_serotype_gene_dict}) - assert len(beautify(single_dict, vf_serotype_gene_dict)) == len(BEAUTIFY_VF_SEROTYPE) + assert len(beautify(vf_serotype_gene_dict, single_dict)) == len(BEAUTIFY_VF_SEROTYPE) def test_beautify_serotype_only(): ## test serotype only json return @@ -24,7 +24,7 @@ def test_beautify_serotype_only(): # this mimicks user selection of serotype only single_dict.update({'options':{'vf': False, 'amr': False, 'serotype': True}}) # beautify is what is actually called by the RQ worker & returned to the user - r = beautify(single_dict, vf_serotype_gene_dict) + r = beautify(vf_serotype_gene_dict, single_dict) assert len(r) == 1 def test_beautify_json_r_serotype_only(): @@ -48,7 +48,7 @@ def test_beautify_amr_only(): single_dict.update({'i': amr_gene_dict}) # this mimicks user selection of serotype only single_dict.update({'options':{'vf': False, 'amr': True, 'serotype': False}}) - r = beautify(single_dict, amr_gene_dict) + r = beautify(amr_gene_dict, single_dict) assert len(r) > 1 def test_beautify_json_r_amr_only(): diff --git a/app/tests/test_modules.py b/app/tests/test_modules.py index e8ffde76..af9541c7 100644 --- a/app/tests/test_modules.py +++ b/app/tests/test_modules.py @@ -72,7 +72,7 @@ def test_ectyper_vf(): assert type(ectyper_dict) == dict # beautify ECTyper check - json_return = beautify(single_dict, pickled_ectyper_dict) + json_return = beautify(pickled_ectyper_dict, single_dict) assert type(json_return) == list def test_ectyper_serotype(): @@ -112,5 +112,5 @@ def test_amr(): # beautify amr check single_dict = dict(ARGS_DICT) single_dict.update({'i':ecoli_genome}) - json_return = beautify(single_dict,pickled_amr_dict) + json_return = beautify(pickled_amr_dict, single_dict) assert type(json_return) == list From a48d411915436dae3e24e89bd5a36d5976e2ec36 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Wed, 14 Feb 2018 22:57:17 -0500 Subject: [PATCH 023/122] DEBUG: check the logs if tests fail --- .travis.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.travis.yml b/.travis.yml index 982e5bce..c7fcfa5c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -48,5 +48,8 @@ install: script: #### Run Pytest - python -m pytest --ignore modules/ectyper/ecoli_serotyping -v +after_failure: + # Check the logs if tests fail. + - docker-compose logs webserver notifications: email: false From ce63813cba76ee11cf4e56884b8ae259a4dab4ed Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Thu, 15 Feb 2018 10:57:22 -0500 Subject: [PATCH 024/122] FIX: start loading some jobs into the new Jobs class --- app/middleware/graphers/datastruct_savvy.py | 2 +- app/middleware/models.py | 21 ++++---- app/modules/spfy.py | 53 ++++++++++++++------- 3 files changed, 50 insertions(+), 26 deletions(-) diff --git a/app/middleware/graphers/datastruct_savvy.py b/app/middleware/graphers/datastruct_savvy.py index 9d58bd90..5a515e7e 100644 --- a/app/middleware/graphers/datastruct_savvy.py +++ b/app/middleware/graphers/datastruct_savvy.py @@ -21,7 +21,7 @@ def _convert_subtyping(graph, model, uriIsolate): graph.add(( uriIsolate, gu('ge:0001077'), - Literal(serotyper_dict['H_prediction']) + Literal(row['H_prediction']) )) return graph diff --git a/app/middleware/models.py b/app/middleware/models.py index d17e1a16..3ff05164 100644 --- a/app/middleware/models.py +++ b/app/middleware/models.py @@ -2,15 +2,20 @@ class SubtypingRow(models.Base): - analysis = fields.StringField(required=True) - contigid = fields.StringField(required=True) - filename = fields.StringField(required=True) - hitcutoff = fields.StringField(nullable=True) - hitname = fields.StringField(required=True) - hitorientation = fields.StringField(nullable=True) - hitstart = fields.StringField(nullable=True) - hitstop = fields.StringField(nullable=True) + analysis = fields.StringField(required=True) + contigid = fields.StringField(required=True) + filename = fields.StringField(required=True) + hitcutoff = fields.StringField(nullable=True) + hitname = fields.StringField(required=True) + hitorientation = fields.StringField(nullable=True) + hitstart = fields.StringField(nullable=True) + hitstop = fields.StringField(nullable=True) class SubtypingResult(models.Base): rows = fields.ListField([SubtypingRow], nullable=True) + + +class Pipeline(models.Base): + jobs = fields.EmbeddedField(dict, default={}) + single_dict = fields.EmbeddedField(dict, default={}) \ No newline at end of file diff --git a/app/modules/spfy.py b/app/modules/spfy.py index 33947c84..3931958f 100644 --- a/app/modules/spfy.py +++ b/app/modules/spfy.py @@ -26,6 +26,7 @@ from middleware.graphers.turtle_grapher import turtle_grapher from middleware.graphers.turtle_utils import actual_filename from modules.phylotyper import phylotyper +from middleware.models import Pipeline from modules.loggingFunctions import initialize_logging import logging @@ -51,12 +52,14 @@ backlog_multiples_q = Queue( 'backlog_multiples', connection=redis_conn, default_timeout=config.DEFAULT_TIMEOUT) -def _ectyper_pipeline_vf(singles, multiples, query_file, single_dict): +def _ectyper_pipeline_vf(singles, multiples, query_file, single_dict, pipeline=None): """ Enqueue all the jobs required for VF. """ # Dictionary of Job instances to return d = {} + # Alias. + job_id = pipeline.jobs['job_id'] # Create a copy of the arguments dictionary and disable Serotype. # This copy is passed to the old ECTyper. @@ -82,7 +85,7 @@ def _ectyper_pipeline_vf(singles, multiples, query_file, single_dict): query_file, query_file + '_id.txt', query_file + '_ectyper_vf.p', - depends_on=job_ectyper, + depends_on=job_ectyper_vf, result_ttl=ttl_value) d['job_ectyper_datastruct_vf'] = job_ectyper_datastruct_vf @@ -94,18 +97,20 @@ def _ectyper_pipeline_vf(singles, multiples, query_file, single_dict): single_dict, query_file + '_ectyper.p', depends_on=job_ectyper_vf, - result_ttl=-1 + result_ttl=ttl_value ) d['job_ectyper_beautify_vf'] = job_ectyper_beautify_vf return d -def _ectyper_pipeline_serotype(singles, multiples, query_file, single_dict): +def _ectyper_pipeline_serotype(singles, multiples, query_file, single_dict, pipeline=None): """ Enqueue all the jobs required for VF. """ # Dictionary of Job instances to return d = {} + # Alias. + job_id = pipeline.jobs['job_id'] # Create a copy of the arguments dictionary and disable Serotype. # This copy is passed to the old ECTyper. @@ -113,7 +118,7 @@ def _ectyper_pipeline_serotype(singles, multiples, query_file, single_dict): # Enqueue the new ECTyper job_ectyper_serotype = multiples.enqueue( call_ectyper_serotype, - single_dict, + single_dict_vf, depends_on=job_id) d['job_ectyper_serotype'] = job_ectyper_serotype @@ -130,8 +135,8 @@ def _ectyper_pipeline_serotype(singles, multiples, query_file, single_dict): query_file, query_file + '_id.txt', query_file + '_ectyper_serotype.p', - depends_on=job_ectyper, - result_ttl=-1) + depends_on=job_ectyper_serotype, + result_ttl=ttl_value) d['job_ectyper_serotype'] = job_ectyper_datastruct_serotype if not single_dict['options']['bulk']: @@ -140,8 +145,8 @@ def _ectyper_pipeline_serotype(singles, multiples, query_file, single_dict): job_ectyper_beautify_serotype = multiples.enqueue( beautify, pickled_result = query_file + '_ectyper_serotype.p', - depends_on=job_ectyper_vf, - result_ttl=-1 + depends_on=job_ectyper_serotype, + result_ttl=ttl_value ) d['job_ectyper_beautify_serotype'] = job_ectyper_beautify_serotype @@ -161,10 +166,14 @@ def blob_savvy_enqueue(single_dict): ''' jobs = {} query_file = single_dict['i'] + pipeline = Pipeline + pipeline.single_dict = copy.deepcopy(single_dict) job_qc = multiples_q.enqueue(qc, query_file, result_ttl=-1) + pipeline.jobs.update({'job_qc':job_qc}) job_id = blazegraph_q.enqueue( write_reserve_id, query_file, depends_on=job_qc, result_ttl=-1) + pipeline.jobs.update({'job_id':job_id}) ## ECTyper (VF & Serotype) # VF @@ -173,8 +182,10 @@ def blob_savvy_enqueue(single_dict): singles_q, multiples_q, query_file, - single_dict + single_dict, + pipeline=pipeline ) + pipeline.jobs.update(ectyper_vf_jobs) if single_dict['options']['bulk']: ret_job_ectyper = ectyper_vf_jobs['job_ectyper_datastruct_vf'] jobs[ret_job_ectyper.get_id()] = { @@ -192,11 +203,12 @@ def blob_savvy_enqueue(single_dict): # Explictedly set serotype to false in case of overlap. backlog_d['options']['serotype'] = False # Note: we use different queues. - _ectyper_pipeline_vf( + _ectyper_pipeline_vf( backlog_singles_q, backlog_multiples_q, query_file, - backlog_d + backlog_d, + pipeline=pipeline ) # Serotype @@ -205,8 +217,10 @@ def blob_savvy_enqueue(single_dict): singles_q, multiples_q, query_file, - single_dict + single_dict, + pipeline=pipeline ) + pipeline.jobs.update(ectyper_serotype_jobs) if single_dict['options']['bulk']: ret_job_ectyper = ectyper_serotype_jobs['job_ectyper_datastruct_serotype'] jobs[ret_job_ectyper.get_id()] = { @@ -227,8 +241,9 @@ def blob_savvy_enqueue(single_dict): backlog_singles_q, backlog_multiples_q, query_file, - backlog_d - ) + backlog_d, + pipeline=pipeline + ) # END ECTYPER PIPELINE # AMR PIPELINE @@ -274,8 +289,12 @@ def phylotyper_pipeline(multiples, subtype): picklefile = query_file + jobname + '.p' job_pt = multiples.enqueue( - phylotyper.phylotyper, None, subtype, tsvfile, id_file=query_file + '_id.txt', - depends_on=job_ectyper_datastruct) + phylotyper.phylotyper, + None, + subtype, + tsvfile, + id_file=query_file + '_id.txt', + depends_on=pipeline.jobs['job_ectyper_datastruct_vf']) job_pt_dict = multiples.enqueue( phylotyper.to_dict, tsvfile, subtype, picklefile, depends_on=job_pt) From 49549b8527a343dc868473c61727c09adeef85c5 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Thu, 15 Feb 2018 13:25:00 -0500 Subject: [PATCH 025/122] FIX: embed regular dicts for now --- app/middleware/models.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/app/middleware/models.py b/app/middleware/models.py index 3ff05164..182442ca 100644 --- a/app/middleware/models.py +++ b/app/middleware/models.py @@ -15,7 +15,6 @@ class SubtypingRow(models.Base): class SubtypingResult(models.Base): rows = fields.ListField([SubtypingRow], nullable=True) - class Pipeline(models.Base): - jobs = fields.EmbeddedField(dict, default={}) - single_dict = fields.EmbeddedField(dict, default={}) \ No newline at end of file + jobs = {} + single_dict = fields.EmbeddedField(dict, default={}) From b040d932c3edd62ad705761fcd4c270cdee0f7db Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Thu, 15 Feb 2018 13:25:13 -0500 Subject: [PATCH 026/122] FIX: embed regular dicts for now --- app/middleware/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/middleware/models.py b/app/middleware/models.py index 182442ca..f584bcd7 100644 --- a/app/middleware/models.py +++ b/app/middleware/models.py @@ -17,4 +17,4 @@ class SubtypingResult(models.Base): class Pipeline(models.Base): jobs = {} - single_dict = fields.EmbeddedField(dict, default={}) + single_dict = {} From df975f9eb91a344a253ae481a045b6cda59b0da5 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Thu, 15 Feb 2018 13:37:21 -0500 Subject: [PATCH 027/122] FIX: order of calls for beautify --- app/modules/spfy.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/app/modules/spfy.py b/app/modules/spfy.py index 3931958f..9ea2993c 100644 --- a/app/modules/spfy.py +++ b/app/modules/spfy.py @@ -94,8 +94,8 @@ def _ectyper_pipeline_vf(singles, multiples, query_file, single_dict, pipeline=N # serotype, and we're not in bulk uploading. job_ectyper_beautify_vf = multiples.enqueue( beautify, - single_dict, query_file + '_ectyper.p', + single_dict, depends_on=job_ectyper_vf, result_ttl=ttl_value ) @@ -266,7 +266,11 @@ def amr_pipeline(multiples): # blazegraph if single_dict['options']['amr'] and not single_dict['options']['bulk']: job_amr_beautify = multiples.enqueue( - beautify, single_dict, query_file + '_rgi.tsv_rgi.p', depends_on=job_amr_dict, result_ttl=-1) + beautify, + query_file + '_rgi.tsv_rgi.p', + single_dict, + depends_on=job_amr_dict, + result_ttl=-1) d.update({'job_amr_beautify': job_amr_beautify}) return d From 988bee6667d25b00f04cc3badfe470f5c463c0c1 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Thu, 15 Feb 2018 13:41:37 -0500 Subject: [PATCH 028/122] FIX: typo --- app/modules/ectyper/call_ectyper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/modules/ectyper/call_ectyper.py b/app/modules/ectyper/call_ectyper.py index 30838a5d..a7bb2aab 100644 --- a/app/modules/ectyper/call_ectyper.py +++ b/app/modules/ectyper/call_ectyper.py @@ -91,7 +91,7 @@ def call_ectyper_serotype(args_dict): output_file=output_file ) # Path for the pickle dump. - p = genome_file + '_ectyper_vf.p' + p = genome_file + '_ectyper_serotype.p' pickle.dump(subtyping_result,open(p,'wb')) return p else: From 0ea77017a659fe0b0e1b2b3ea1d293b028c93a95 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Thu, 15 Feb 2018 22:21:29 -0500 Subject: [PATCH 029/122] FIX: more typos --- app/modules/spfy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/modules/spfy.py b/app/modules/spfy.py index 9ea2993c..c74be7ee 100644 --- a/app/modules/spfy.py +++ b/app/modules/spfy.py @@ -94,7 +94,7 @@ def _ectyper_pipeline_vf(singles, multiples, query_file, single_dict, pipeline=N # serotype, and we're not in bulk uploading. job_ectyper_beautify_vf = multiples.enqueue( beautify, - query_file + '_ectyper.p', + query_file + '_ectyper_vf.p', single_dict, depends_on=job_ectyper_vf, result_ttl=ttl_value From eb2795f98bb4fdcf6078d99a225906bdb2794db2 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Fri, 16 Feb 2018 11:49:45 -0500 Subject: [PATCH 030/122] FIX: the depends_on check in RQ doesnt validate if its called on an actual jobs --- app/modules/spfy.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/app/modules/spfy.py b/app/modules/spfy.py index c74be7ee..3567af98 100644 --- a/app/modules/spfy.py +++ b/app/modules/spfy.py @@ -36,10 +36,9 @@ logger = logging.getLogger(__name__) -# the only ONE time for global variables -# when naming queues, make sure you actually set a worker to listen to that queue +# When naming queues, make sure you set a worker to listen to that queue # we use the high priority queue for things that should be immediately -# returned to the user +# returned to the user. redis_url = config.REDIS_URL redis_conn = redis.from_url(redis_url) singles_q = Queue('singles', connection=redis_conn) @@ -101,6 +100,8 @@ def _ectyper_pipeline_vf(singles, multiples, query_file, single_dict, pipeline=N ) d['job_ectyper_beautify_vf'] = job_ectyper_beautify_vf + # Mutate the jobs pipeline from the calling function. + pipeline.jobs.update(d) return d def _ectyper_pipeline_serotype(singles, multiples, query_file, single_dict, pipeline=None): @@ -150,6 +151,8 @@ def _ectyper_pipeline_serotype(singles, multiples, query_file, single_dict, pipe ) d['job_ectyper_beautify_serotype'] = job_ectyper_beautify_serotype + # Mutate the jobs pipeline from the calling function. + pipeline.jobs.update(d) return d def blob_savvy_enqueue(single_dict): @@ -166,7 +169,7 @@ def blob_savvy_enqueue(single_dict): ''' jobs = {} query_file = single_dict['i'] - pipeline = Pipeline + pipeline = Pipeline() pipeline.single_dict = copy.deepcopy(single_dict) job_qc = multiples_q.enqueue(qc, query_file, result_ttl=-1) @@ -185,7 +188,7 @@ def blob_savvy_enqueue(single_dict): single_dict, pipeline=pipeline ) - pipeline.jobs.update(ectyper_vf_jobs) + # pipeline.jobs.update(ectyper_vf_jobs) if single_dict['options']['bulk']: ret_job_ectyper = ectyper_vf_jobs['job_ectyper_datastruct_vf'] jobs[ret_job_ectyper.get_id()] = { @@ -220,7 +223,7 @@ def blob_savvy_enqueue(single_dict): single_dict, pipeline=pipeline ) - pipeline.jobs.update(ectyper_serotype_jobs) + # pipeline.jobs.update(ectyper_serotype_jobs) if single_dict['options']['bulk']: ret_job_ectyper = ectyper_serotype_jobs['job_ectyper_datastruct_serotype'] jobs[ret_job_ectyper.get_id()] = { From 590aa4391731f9dae5ab254fa5985b76e6c2db3b Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Fri, 16 Feb 2018 12:34:51 -0500 Subject: [PATCH 031/122] CHANGE: use regular python classes instead of inheriting from jsonmodels.models.Base --- app/middleware/models.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/app/middleware/models.py b/app/middleware/models.py index f584bcd7..614ed5ee 100644 --- a/app/middleware/models.py +++ b/app/middleware/models.py @@ -15,6 +15,17 @@ class SubtypingRow(models.Base): class SubtypingResult(models.Base): rows = fields.ListField([SubtypingRow], nullable=True) -class Pipeline(models.Base): - jobs = {} - single_dict = {} +class Job(): + def __init__(self, job, transitory=True, display=False): + self.job = job # an instance of the RQ Job class + self.transitory = # if the job won't persist in Redis DB + self.display = # used for display to the front-end + +class Pipeline(): + def __init__(self, jobs=None, single_dict=None): + if not jobs: + jobs = {} + if not single_dict: + single_dict = {} + self.jobs = {} + self.single_dict = {} From 8efb9fc07291f12f713226e854097749de4efc51 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Fri, 16 Feb 2018 12:44:17 -0500 Subject: [PATCH 032/122] CHANGE: use regular python classes instead of inheriting from jsonmodels.models.Base --- app/middleware/models.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/app/middleware/models.py b/app/middleware/models.py index 614ed5ee..8cbcbc30 100644 --- a/app/middleware/models.py +++ b/app/middleware/models.py @@ -16,16 +16,16 @@ class SubtypingResult(models.Base): rows = fields.ListField([SubtypingRow], nullable=True) class Job(): - def __init__(self, job, transitory=True, display=False): + def __init__(self, rq_job, transitory=True, display=False): self.job = job # an instance of the RQ Job class - self.transitory = # if the job won't persist in Redis DB - self.display = # used for display to the front-end - + self.transitory = transitory # if the job won't persist in Redis DB + self.display = display # used for display to the front-end + class Pipeline(): def __init__(self, jobs=None, single_dict=None): if not jobs: jobs = {} if not single_dict: single_dict = {} - self.jobs = {} + self.jobs = {} # {'somename': instance of RQ.Job} self.single_dict = {} From 81cf5b8962c4415d7262991e2b2f265f48cf24af Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Fri, 16 Feb 2018 16:42:11 -0500 Subject: [PATCH 033/122] ADD: some tests for the pipeline signatures + CHANGE: moved pipeline creation up one level --- app/middleware/models.py | 119 ++++++++++++++++++++++++++++++++++++--- app/modules/spfy.py | 21 ++++--- app/routes/ra_posts.py | 11 +++- app/tests/test_models.py | 50 +++++++++++++++- 4 files changed, 181 insertions(+), 20 deletions(-) diff --git a/app/middleware/models.py b/app/middleware/models.py index 8cbcbc30..de4fe598 100644 --- a/app/middleware/models.py +++ b/app/middleware/models.py @@ -1,4 +1,10 @@ +import sys +from hashlib import sha1 +from dis import dis +from StringIO import StringIO from jsonmodels import models, fields +from middleware.graphing.turtle_utils import actual_filename +from middleware.display.beautify import model_to_json class SubtypingRow(models.Base): @@ -15,17 +21,114 @@ class SubtypingRow(models.Base): class SubtypingResult(models.Base): rows = fields.ListField([SubtypingRow], nullable=True) + class Job(): - def __init__(self, rq_job, transitory=True, display=False): - self.job = job # an instance of the RQ Job class - self.transitory = transitory # if the job won't persist in Redis DB - self.display = display # used for display to the front-end + def __init__(self, rq_job, transitory=True, backlog=True, display=False): + """ + Args: + rq_job: An instance of the RQ Job class. + transitory: Some intermediate, we only care if it failed. It's ok + if the job isn't found in Redis. + backlog: For background processing, we don't care whatsoever. Will + still be caught by Sentry.io if it fails. + display: To per parsed for the front-end. + """ + self.rq_job = rq_job + self.transitory = transitory + self.backlog = backlog + self.display = display class Pipeline(): - def __init__(self, jobs=None, single_dict=None): + def __init__(self, jobs=None, files=None, func=None, options=None): if not jobs: jobs = {} - if not single_dict: - single_dict = {} + if not files: + files = [] + if not options: + options = {} self.jobs = {} # {'somename': instance of RQ.Job} - self.single_dict = {} + self.sig = None # Signtaure isn't generated until necessary + # TODO: incorporate below into the pipeline. + self.files = [] + self.func = func # Additional attribute for storing pipeline function. + self.options = None + + def complete(self): + """ + Check if all jobs are completed + """ + for j in jobs.itervalues(): + rq_job = j.rq_job + if j.backlog: + # Some backlog job, we don't care (though Sentry will catch it). + continue + elif rq_job.is_failed: + # If the job failed, return the error. + return rq_job.exc_info + elif not job.is_finished: + # One of the jobs hasn't finished. + return False + return True + + def to_json(self): + """ + Reduces all results from self.jobs to json for return. + """ + # Gather all the jobs that have finished and haven't failed. + completed_jobs = [ + j.rq_job for j in jobs.itervalues() + if j.display and j.rq_job.is_finished and not j.rq_job.is_failed + ] + # Merge the json lists together. + l = [] + for rq_job in completed_jobs: + model = rq_job.result + list_json = model_to_json(model) + l += list_json + return l + + def _function_signature(self): + """ + Generates signatures for functions. + """ + # dis.dis() sends output to stdout, we need to capture it to generate + # a signature. + + # Assign the old stdout. + old_stdout = sys.stdout + # Create a buffer for the new output. + result = StringIO() + # Swap the stdout to our buffer. + sys.stdout = result + # dis() call. + dis(self.func) + # Restore the stdout to screen. + sys.stdout = old_stdout + # Grab the output from the dis() call. + result_string = result.getvalue() + return result_string + + def signature(self): + """ + Create a signature that can identify a given task. Used to check + if the same task was requested. + """ + # Create a string of the function signature. + str_func = self._function_signature() + # Start the hashing process with the function signature. + hx = sha1(str_func) + + # Create a string of the files. + str_files = str(self.files) + # Update the hash with our args information. + hx.update(str_files) + + # Create a string of the options. + str_options = str(self.options) + # Update the hash with our args information. + hx.update(str_args) + + # Use the hexdigest as the signature. + sig = hx.hexdigest() + self.sig = sig + return sig diff --git a/app/modules/spfy.py b/app/modules/spfy.py index 3567af98..d98694de 100644 --- a/app/modules/spfy.py +++ b/app/modules/spfy.py @@ -155,7 +155,7 @@ def _ectyper_pipeline_serotype(singles, multiples, query_file, single_dict, pipe pipeline.jobs.update(d) return d -def blob_savvy_enqueue(single_dict): +def blob_savvy_enqueue(single_dict, pipeline): ''' Handles enqueueing of single file to multiple queues. :param f: a fasta file @@ -169,9 +169,7 @@ def blob_savvy_enqueue(single_dict): ''' jobs = {} query_file = single_dict['i'] - pipeline = Pipeline() - pipeline.single_dict = copy.deepcopy(single_dict) - + job_qc = multiples_q.enqueue(qc, query_file, result_ttl=-1) pipeline.jobs.update({'job_qc':job_qc}) job_id = blazegraph_q.enqueue( @@ -376,23 +374,28 @@ def phylotyper_pipeline(multiples, subtype): return jobs -def blob_savvy(args_dict): +def blob_savvy(args_dict, pipeline): ''' - Handles enqueuing of all files in a given directory or just a single file + Handles enqueuing of all files in a given directory or just a single file. ''' d = {} if os.path.isdir(args_dict['i']): for f in os.listdir(args_dict['i']): single_dict = dict(args_dict.items() + {'i': os.path.join(args_dict['i'], f)}.items()) - d.update(blob_savvy_enqueue(single_dict)) + d.update( + blob_savvy_enqueue( + single_dict, + pipeline + ) + ) else: d.update(blob_savvy_enqueue(args_dict)) return d -def spfy(args_dict): +def spfy(args_dict, pipeline): ''' ''' # abs path resolution should be handled in spfy.py @@ -400,6 +403,6 @@ def spfy(args_dict): #print 'Starting blob_savvy call' #logger.info('args_dict: ' + str(args_dict)) - jobs_dict = blob_savvy(args_dict) + jobs_dict = blob_savvy(args_dict, pipeline) return jobs_dict diff --git a/app/routes/ra_posts.py b/app/routes/ra_posts.py index 93440141..ab6d9f8f 100644 --- a/app/routes/ra_posts.py +++ b/app/routes/ra_posts.py @@ -17,6 +17,7 @@ from modules.gc import blob_gc_enqueue from modules.spfy import spfy from middleware.api import subtyping_dependencies +from middleware.models import Pipeline bp_ra_posts = Blueprint('reactapp_posts', __name__) @@ -234,6 +235,12 @@ def upload(): now = now.strftime("%Y-%m-%d-%H-%M-%S-%f") jobs_dict = {} + pipeline = Pipeline( + files = uploaded_files, + func = spfy, + options = options + ) + for file in uploaded_files: if file: # for saving file @@ -250,7 +257,9 @@ def upload(): # for enqueing task jobs_enqueued = spfy( - {'i': filename, 'pi':options['pi'], 'options':options}) + args_dict = {'i': filename, 'pi':options['pi'], 'options':options}, + pipeline = pipeline + ) jobs_dict.update(jobs_enqueued) # new in 4.2.0 print 'upload(): all files enqueued, returning...' diff --git a/app/tests/test_models.py b/app/tests/test_models.py index 526e784a..9384ae18 100644 --- a/app/tests/test_models.py +++ b/app/tests/test_models.py @@ -1,5 +1,8 @@ -from middleware.models import SubtypingRow, SubtypingResult -from tests.constants import BEAUTIFY_VF_SEROTYPE +from middleware.models import SubtypingRow, SubtypingResult, Pipeline +from modules.spfy import spfy +from scripts.savvy import savvy +from tests.constants import BEAUTIFY_VF_SEROTYPE, ARGS_DICT + def test_subtyping_model_direct(): """ @@ -21,3 +24,46 @@ def test_subtyping_model_direct(): rows = subtyping_list ) subtyping_result.validate() + +def test_pipeline_model_signature(): + """ + Function signatures should be identical if called on the same function. + """ + p1 = Pipeline( + func = spfy, + options = ARGS_DICT + ) + p2 = Pipeline( + func = spfy, + options = ARGS_DICT + ) + r1 = p1.signature() + r2 = p2.signature() + # These are identical pipelines, should be equal. + assert r1 == r2 + + p1 = Pipeline( + func = spfy, + options = ARGS_DICT + ) + p2 = Pipeline( + func = savvy, + options = ARGS_DICT + ) + r1 = p1.signature() + r2 = p2.signature() + # These pipelines have different functions, should be different. + assert r1 != r2 + + p1 = Pipeline( + func = spfy, + options = ARGS_DICT + ) + p2 = Pipeline( + func = spfy, + options = {'cats':1} + ) + r1 = p1.signature() + r2 = p2.signature() + # These pipelines have different options, should be different. + assert r1 != r2 From 30055a56a3dc3a28119fa9d3f431b974222c32dc Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Fri, 16 Feb 2018 17:07:41 -0500 Subject: [PATCH 034/122] FIX: imports --- app/middleware/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/middleware/models.py b/app/middleware/models.py index de4fe598..3596be13 100644 --- a/app/middleware/models.py +++ b/app/middleware/models.py @@ -3,7 +3,7 @@ from dis import dis from StringIO import StringIO from jsonmodels import models, fields -from middleware.graphing.turtle_utils import actual_filename +from middleware.graphers.turtle_utils import actual_filename from middleware.display.beautify import model_to_json From c844fa3afc7ba91c0f7276b6f34159980cc2325f Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Sat, 17 Feb 2018 18:43:01 -0500 Subject: [PATCH 035/122] FIX: circular imports --- app/middleware/display/beautify.py | 23 +---------------------- app/middleware/models.py | 28 +++++++++++++++++++++++----- app/modules/spfy.py | 3 +-- 3 files changed, 25 insertions(+), 29 deletions(-) diff --git a/app/middleware/display/beautify.py b/app/middleware/display/beautify.py index b4980276..f51fb232 100644 --- a/app/middleware/display/beautify.py +++ b/app/middleware/display/beautify.py @@ -1,37 +1,16 @@ import logging import pandas as pd import cPickle as pickle -from os.path import basename from modules.loggingFunctions import initialize_logging from middleware.display.find_widest import check_alleles from middleware.graphers.turtle_utils import actual_filename -from middleware.models import SubtypingResult +from middleware.models import SubtypingResult, model_to_json # logging log_file = initialize_logging() log = logging.getLogger(__name__) -def _convert_subtyping(model): - # Convert the model to a generic JSON structure. - struct = model.to_struct() - # This is not strictly json; more like a list than a dict structure. - rows_list = struct['rows'] - return rows_list - -def model_to_json(model): - """ - Converts models to json for the front-end. - """ - # Validate the model submitted before processing. - model.validate() - # Conversion. - if isinstance(model, SubtypingResult): - return _convert_subtyping(model) - else: - raise Exception('model_to_json() called for a model without a handler.') - - def json_return(args_dict, gene_dict): """ This converts the gene dict into a json format for return to the front end diff --git a/app/middleware/models.py b/app/middleware/models.py index 3596be13..1e1e3456 100644 --- a/app/middleware/models.py +++ b/app/middleware/models.py @@ -4,7 +4,25 @@ from StringIO import StringIO from jsonmodels import models, fields from middleware.graphers.turtle_utils import actual_filename -from middleware.display.beautify import model_to_json + +def _convert_subtyping(model): + # Convert the model to a generic JSON structure. + struct = model.to_struct() + # This is not strictly json; more like a list than a dict structure. + rows_list = struct['rows'] + return rows_list + +def model_to_json(model): + """ + Converts models to json for the front-end. + """ + # Validate the model submitted before processing. + model.validate() + # Conversion. + if isinstance(model, SubtypingResult): + return _convert_subtyping(model) + else: + raise Exception('model_to_json() called for a model without a handler.') class SubtypingRow(models.Base): @@ -57,7 +75,7 @@ def complete(self): """ Check if all jobs are completed """ - for j in jobs.itervalues(): + for j in self.jobs.itervalues(): rq_job = j.rq_job if j.backlog: # Some backlog job, we don't care (though Sentry will catch it). @@ -65,7 +83,7 @@ def complete(self): elif rq_job.is_failed: # If the job failed, return the error. return rq_job.exc_info - elif not job.is_finished: + elif not rq_job.is_finished: # One of the jobs hasn't finished. return False return True @@ -76,7 +94,7 @@ def to_json(self): """ # Gather all the jobs that have finished and haven't failed. completed_jobs = [ - j.rq_job for j in jobs.itervalues() + j.rq_job for j in self.jobs.itervalues() if j.display and j.rq_job.is_finished and not j.rq_job.is_failed ] # Merge the json lists together. @@ -126,7 +144,7 @@ def signature(self): # Create a string of the options. str_options = str(self.options) # Update the hash with our args information. - hx.update(str_args) + hx.update(str_options) # Use the hexdigest as the signature. sig = hx.hexdigest() diff --git a/app/modules/spfy.py b/app/modules/spfy.py index d98694de..627473bd 100644 --- a/app/modules/spfy.py +++ b/app/modules/spfy.py @@ -21,12 +21,11 @@ from modules.ectyper.call_ectyper import call_ectyper_vf, call_ectyper_serotype from modules.amr.amr import amr from modules.amr.amr_to_dict import amr_to_dict -from middleware.display.beautify import beautify, model_to_json +from middleware.display.beautify import beautify from middleware.graphers.datastruct_savvy import datastruct_savvy from middleware.graphers.turtle_grapher import turtle_grapher from middleware.graphers.turtle_utils import actual_filename from modules.phylotyper import phylotyper -from middleware.models import Pipeline from modules.loggingFunctions import initialize_logging import logging From 2698689b86c1de9adfbfdeef57979df95a1f2a34 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Sat, 17 Feb 2018 19:13:29 -0500 Subject: [PATCH 036/122] FIX: wasnt aaving options --- app/middleware/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/middleware/models.py b/app/middleware/models.py index 1e1e3456..4cfd58de 100644 --- a/app/middleware/models.py +++ b/app/middleware/models.py @@ -69,7 +69,7 @@ def __init__(self, jobs=None, files=None, func=None, options=None): # TODO: incorporate below into the pipeline. self.files = [] self.func = func # Additional attribute for storing pipeline function. - self.options = None + self.options = options def complete(self): """ From 2583b4ea1b76b822096499c3d95b92f877a7f1ea Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Sat, 17 Feb 2018 20:15:01 -0500 Subject: [PATCH 037/122] CHANGE: switch to Job class in Pipeline + some tests for it --- app/middleware/models.py | 3 +- app/modules/spfy.py | 90 ++++++++++++++++++++++++++++++++++------ app/tests/test_models.py | 24 ++++++++++- 3 files changed, 103 insertions(+), 14 deletions(-) diff --git a/app/middleware/models.py b/app/middleware/models.py index 4cfd58de..51fc3379 100644 --- a/app/middleware/models.py +++ b/app/middleware/models.py @@ -41,7 +41,7 @@ class SubtypingResult(models.Base): class Job(): - def __init__(self, rq_job, transitory=True, backlog=True, display=False): + def __init__(self, rq_job, name="", transitory=True, backlog=True, display=False): """ Args: rq_job: An instance of the RQ Job class. @@ -52,6 +52,7 @@ def __init__(self, rq_job, transitory=True, backlog=True, display=False): display: To per parsed for the front-end. """ self.rq_job = rq_job + self.name = name self.transitory = transitory self.backlog = backlog self.display = display diff --git a/app/modules/spfy.py b/app/modules/spfy.py index 627473bd..9ef22f11 100644 --- a/app/modules/spfy.py +++ b/app/modules/spfy.py @@ -26,6 +26,7 @@ from middleware.graphers.turtle_grapher import turtle_grapher from middleware.graphers.turtle_utils import actual_filename from modules.phylotyper import phylotyper +from middleware.models import Job from modules.loggingFunctions import initialize_logging import logging @@ -57,7 +58,7 @@ def _ectyper_pipeline_vf(singles, multiples, query_file, single_dict, pipeline=N # Dictionary of Job instances to return d = {} # Alias. - job_id = pipeline.jobs['job_id'] + job_id = pipeline.jobs['job_id'].rq_job # Create a copy of the arguments dictionary and disable Serotype. # This copy is passed to the old ECTyper. @@ -68,7 +69,17 @@ def _ectyper_pipeline_vf(singles, multiples, query_file, single_dict, pipeline=N call_ectyper_vf, single_dict_vf, depends_on=job_id) + # TODO: this is double, switch everything to pipeline once tested d['job_ectyper_vf'] = job_ectyper_vf + pipeline.jobs.update({ + 'job_ectyper_vf': Job( + rq_job=job_ectyper_vf, + name='job_ectyper_vf', + transitory=True, + backlog=False, + display=False + ) + }) # If bulk uploading is set, we return the datastruct as the end task # to poll for job completion, therefore must set ttl of -1. @@ -86,6 +97,15 @@ def _ectyper_pipeline_vf(singles, multiples, query_file, single_dict, pipeline=N depends_on=job_ectyper_vf, result_ttl=ttl_value) d['job_ectyper_datastruct_vf'] = job_ectyper_datastruct_vf + pipeline.jobs.update({ + 'job_ectyper_datastruct_vf': Job( + rq_job=job_ectyper_datastruct_vf, + name='job_ectyper_datastruct_vf', + transitory=True, + backlog=False, + display=False + ) + }) if not single_dict['options']['bulk']: # Only bother parsing into json if user has requested either vf or @@ -98,9 +118,15 @@ def _ectyper_pipeline_vf(singles, multiples, query_file, single_dict, pipeline=N result_ttl=ttl_value ) d['job_ectyper_beautify_vf'] = job_ectyper_beautify_vf - - # Mutate the jobs pipeline from the calling function. - pipeline.jobs.update(d) + pipeline.jobs.update({ + 'job_ectyper_beautify_vf': Job( + rq_job=job_ectyper_beautify_vf, + name='job_ectyper_beautify_vf', + transitory=True, + backlog=False, + display=True + ) + }) return d def _ectyper_pipeline_serotype(singles, multiples, query_file, single_dict, pipeline=None): @@ -110,7 +136,7 @@ def _ectyper_pipeline_serotype(singles, multiples, query_file, single_dict, pipe # Dictionary of Job instances to return d = {} # Alias. - job_id = pipeline.jobs['job_id'] + job_id = pipeline.jobs['job_id'].rq_job # Create a copy of the arguments dictionary and disable Serotype. # This copy is passed to the old ECTyper. @@ -121,6 +147,15 @@ def _ectyper_pipeline_serotype(singles, multiples, query_file, single_dict, pipe single_dict_vf, depends_on=job_id) d['job_ectyper_serotype'] = job_ectyper_serotype + pipeline.jobs.update({ + 'job_ectyper_serotype': Job( + rq_job=job_ectyper_serotype, + name='job_ectyper_serotype', + transitory=True, + backlog=False, + display=False + ) + }) # If bulk uploading is set, we return the datastruct as the end task # to poll for job completion, therefore must set ttl of -1. @@ -137,7 +172,16 @@ def _ectyper_pipeline_serotype(singles, multiples, query_file, single_dict, pipe query_file + '_ectyper_serotype.p', depends_on=job_ectyper_serotype, result_ttl=ttl_value) - d['job_ectyper_serotype'] = job_ectyper_datastruct_serotype + d['job_ectyper_datastruct_serotype'] = job_ectyper_datastruct_serotype + pipeline.jobs.update({ + 'job_ectyper_datastruct_serotype': Job( + rq_job=job_ectyper_datastruct_serotype, + name='job_ectyper_datastruct_serotype', + transitory=True, + backlog=False, + display=False + ) + }) if not single_dict['options']['bulk']: # Only bother parsing into json if user has requested either vf or @@ -149,9 +193,15 @@ def _ectyper_pipeline_serotype(singles, multiples, query_file, single_dict, pipe result_ttl=ttl_value ) d['job_ectyper_beautify_serotype'] = job_ectyper_beautify_serotype - - # Mutate the jobs pipeline from the calling function. - pipeline.jobs.update(d) + pipeline.jobs.update({ + 'job_ectyper_beautify_serotype': Job( + rq_job=job_ectyper_beautify_serotype, + name='job_ectyper_beautify_serotype', + transitory=True, + backlog=False, + display=True + ) + }) return d def blob_savvy_enqueue(single_dict, pipeline): @@ -168,12 +218,28 @@ def blob_savvy_enqueue(single_dict, pipeline): ''' jobs = {} query_file = single_dict['i'] - + job_qc = multiples_q.enqueue(qc, query_file, result_ttl=-1) - pipeline.jobs.update({'job_qc':job_qc}) + pipeline.jobs.update({ + 'job_qc': Job( + rq_job=job_qc, + name='job_qc', + transitory=False, + backlog=False, + display=False + ) + }) job_id = blazegraph_q.enqueue( write_reserve_id, query_file, depends_on=job_qc, result_ttl=-1) - pipeline.jobs.update({'job_id':job_id}) + pipeline.jobs.update({ + 'job_id': Job( + rq_job=job_id, + name='job_id', + transitory=False, + backlog=False, + display=False + ) + }) ## ECTyper (VF & Serotype) # VF diff --git a/app/tests/test_models.py b/app/tests/test_models.py index 9384ae18..fffea95f 100644 --- a/app/tests/test_models.py +++ b/app/tests/test_models.py @@ -1,4 +1,4 @@ -from middleware.models import SubtypingRow, SubtypingResult, Pipeline +from middleware.models import SubtypingRow, SubtypingResult, Pipeline, Job from modules.spfy import spfy from scripts.savvy import savvy from tests.constants import BEAUTIFY_VF_SEROTYPE, ARGS_DICT @@ -25,6 +25,28 @@ def test_subtyping_model_direct(): ) subtyping_result.validate() +def test_pipeline_model(): + """ + Test the Pipeline model itself. + """ + p = Pipeline( + func = spfy, + options = ARGS_DICT + ) + pipeline.jobs.update({ + 'job_ectyper_vf': Job( + rq_job='SHOULDBEANACTUALJOB', + name='job_ectyper_vf', + transitory=True, + backlog=False, + display=False + ) + }) + assert isinstance(p, Pipeline) + assert isinstance(p.jobs, dict) + assert isinstance(p.jobs['job_ectyper_vf'], Job) + + def test_pipeline_model_signature(): """ Function signatures should be identical if called on the same function. From e8817a2451148cd63c9b5b87f007cf9e825d9263 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Sat, 17 Feb 2018 21:03:15 -0500 Subject: [PATCH 038/122] ADD: cast for VF/AMR + FIX: typo in earlier test --- app/middleware/modellers.py | 23 +++++++++++++++++++++++ app/tests/test_models.py | 2 +- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/app/middleware/modellers.py b/app/middleware/modellers.py index 7fed795c..2ea7a1e0 100644 --- a/app/middleware/modellers.py +++ b/app/middleware/modellers.py @@ -31,5 +31,28 @@ def model_serotype(pi, pl, output_file): subtyping_result = SubtypingResult( rows = subtyping_list ) + return subtyping_result +def model_vf(json_r, analysis="Virulence Factors"): + """ + Casts the output from display.beautify into a SubtypingResult object. + """ + # Type check. + assert isinstance(json_r, list) + subtyping_list = [ + SubtypingRow( + analysis=analysis, + contigid=item['contigid'], + filename=item['filename'], + hitcutoff=item['hitcutoff'], + hitname=item['hitname'], + hitorientation=item['hitorientation'], + hitstart=item['hitstart'], + hitstop=item['hitstop'] + ) + for item in json_r] + # Convert the list of rows into a SubtypingResult model. + subtyping_result = SubtypingResult( + rows = subtyping_list + ) return subtyping_result diff --git a/app/tests/test_models.py b/app/tests/test_models.py index fffea95f..3b039697 100644 --- a/app/tests/test_models.py +++ b/app/tests/test_models.py @@ -33,7 +33,7 @@ def test_pipeline_model(): func = spfy, options = ARGS_DICT ) - pipeline.jobs.update({ + p.jobs.update({ 'job_ectyper_vf': Job( rq_job='SHOULDBEANACTUALJOB', name='job_ectyper_vf', From 6b78e53a83bb3302ff6ad580119d11dbfe7d1305 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Sat, 17 Feb 2018 21:39:59 -0500 Subject: [PATCH 039/122] ADD: test the to_json() for merging all job results --- app/middleware/models.py | 4 +- app/modules/spfy.py | 4 +- app/tests/constants.py | 1216 ++++++++++++++++++++++++++++++++++++++ app/tests/test_models.py | 53 +- 4 files changed, 1270 insertions(+), 7 deletions(-) diff --git a/app/middleware/models.py b/app/middleware/models.py index 51fc3379..8b11b3fe 100644 --- a/app/middleware/models.py +++ b/app/middleware/models.py @@ -91,7 +91,9 @@ def complete(self): def to_json(self): """ - Reduces all results from self.jobs to json for return. + Reduces all results from self.jobs to json for return. Note: currently + using a list as this is what the front-end is expecting, but convert + to dict a some point. """ # Gather all the jobs that have finished and haven't failed. completed_jobs = [ diff --git a/app/modules/spfy.py b/app/modules/spfy.py index 9ef22f11..9fa2c4fa 100644 --- a/app/modules/spfy.py +++ b/app/modules/spfy.py @@ -122,7 +122,7 @@ def _ectyper_pipeline_vf(singles, multiples, query_file, single_dict, pipeline=N 'job_ectyper_beautify_vf': Job( rq_job=job_ectyper_beautify_vf, name='job_ectyper_beautify_vf', - transitory=True, + transitory=False, backlog=False, display=True ) @@ -197,7 +197,7 @@ def _ectyper_pipeline_serotype(singles, multiples, query_file, single_dict, pipe 'job_ectyper_beautify_serotype': Job( rq_job=job_ectyper_beautify_serotype, name='job_ectyper_beautify_serotype', - transitory=True, + transitory=False, backlog=False, display=True ) diff --git a/app/tests/constants.py b/app/tests/constants.py index 9a7acbf8..63af007d 100644 --- a/app/tests/constants.py +++ b/app/tests/constants.py @@ -1214,3 +1214,1219 @@ "hitstop": 3095080 } ] + +BEAUTIFY_SEROTYPE = [ + { + "analysis": "Serotype", + "contigid": "n/a", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": "n/a", + "hitname": "O16:H48", + "hitorientation": "n/a", + "hitstart": "n/a", + "hitstop": "n/a" + } +] + +BEAUTIFY_VF = [ + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "EC958", + "hitorientation": "+", + "hitstart": 2073473, + "hitstop": 2074658 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "ECP", + "hitorientation": "-", + "hitstart": 306807, + "hitstop": 309332 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "ECP", + "hitorientation": "-", + "hitstart": 309358, + "hitstop": 310075 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "ECP", + "hitorientation": "-", + "hitstart": 310084, + "hitstop": 310700 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "ECP", + "hitorientation": "-", + "hitstart": 310746, + "hitstop": 311336 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "ECS88", + "hitorientation": "-", + "hitstart": 3308040, + "hitstop": 3308924 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "Z1307", + "hitorientation": "-", + "hitstart": 1019013, + "hitstop": 1020053 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "Z2203", + "hitorientation": "-", + "hitstart": 1588853, + "hitstop": 1590079 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "Z2204", + "hitorientation": "-", + "hitstart": 1588309, + "hitstop": 1588839 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "Z2205", + "hitorientation": "-", + "hitstart": 1587793, + "hitstop": 1588296 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "Z2206", + "hitorientation": "-", + "hitstart": 1586820, + "hitstop": 1587734 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "agn43", + "hitorientation": "+", + "hitstart": 2071539, + "hitstop": 2074658 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "artj", + "hitorientation": "-", + "hitstart": 899844, + "hitstop": 900575 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "aslA", + "hitorientation": "-", + "hitstart": 3984579, + "hitstop": 3986007 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "b2972", + "hitorientation": "-", + "hitstart": 3113543, + "hitstop": 3114352 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "cadA", + "hitorientation": "-", + "hitstart": 4356481, + "hitstop": 4358656 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "cah", + "hitorientation": "+", + "hitstart": 2073486, + "hitstop": 2074658 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "cheA", + "hitorientation": "-", + "hitstart": 1973360, + "hitstop": 1975324 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "cheB", + "hitorientation": "-", + "hitstart": 1967452, + "hitstop": 1968501 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "cheR", + "hitorientation": "-", + "hitstart": 1968504, + "hitstop": 1969364 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "cheW", + "hitorientation": "-", + "hitstart": 1972836, + "hitstop": 1973339 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "cheZ", + "hitorientation": "-", + "hitstart": 1966393, + "hitstop": 1967037 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "cs3", + "hitorientation": "-", + "hitstart": 2994460, + "hitstop": 2995092 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "csgD", + "hitorientation": "-", + "hitstart": 1102546, + "hitstop": 1103196 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "csgG", + "hitorientation": "-", + "hitstart": 1100851, + "hitstop": 1101684 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "eae", + "hitorientation": "+", + "hitstart": 314420, + "hitstop": 315232 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "ecpA", + "hitorientation": "-", + "hitstart": 310084, + "hitstop": 310671 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "ecpB", + "hitorientation": "-", + "hitstart": 309358, + "hitstop": 310026 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "ecpC", + "hitorientation": "-", + "hitstart": 306807, + "hitstop": 309332 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "ecpD", + "hitorientation": "-", + "hitstart": 305174, + "hitstop": 306817 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "ecpE", + "hitorientation": "-", + "hitstart": 304497, + "hitstop": 305250 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "ecpR", + "hitorientation": "-", + "hitstart": 310746, + "hitstop": 311336 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "ehaB", + "hitorientation": "+", + "hitstart": 392973, + "hitstop": 394418 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "entA", + "hitorientation": "+", + "hitstart": 628551, + "hitstop": 629297 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "entB", + "hitorientation": "+", + "hitstart": 627694, + "hitstop": 628551 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "entC", + "hitorientation": "+", + "hitstart": 624873, + "hitstop": 626060 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "entD", + "hitorientation": "-", + "hitstart": 609459, + "hitstop": 610229 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "entE", + "hitorientation": "+", + "hitstart": 626070, + "hitstop": 627680 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "entF", + "hitorientation": "+", + "hitstart": 614157, + "hitstop": 617980 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "entS", + "hitorientation": "+", + "hitstart": 622300, + "hitstop": 623550 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "espL1", + "hitorientation": "+", + "hitstart": 1803439, + "hitstop": 1804993 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "espL3", + "hitorientation": "-", + "hitstart": 3861987, + "hitstop": 3863864 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "espL4", + "hitorientation": "-", + "hitstart": 4221348, + "hitstop": 4222487 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "espR1", + "hitorientation": "-", + "hitstart": 1544385, + "hitstop": 1545447 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "espX4", + "hitorientation": "+", + "hitstart": 4250703, + "hitstop": 4252283 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "espX5", + "hitorientation": "-", + "hitstart": 4281783, + "hitstop": 4283075 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "espY1", + "hitorientation": "+", + "hitstart": 58474, + "hitstop": 59103 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "fdeC", + "hitorientation": "+", + "hitstart": 314357, + "hitstop": 315232 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "fepA", + "hitorientation": "-", + "hitstart": 610254, + "hitstop": 612494 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "fepB", + "hitorientation": "-", + "hitstart": 623554, + "hitstop": 624510 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "fepC", + "hitorientation": "-", + "hitstart": 619384, + "hitstop": 620199 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "fepD", + "hitorientation": "-", + "hitstart": 621185, + "hitstop": 622201 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "fepE", + "hitorientation": "+", + "hitstart": 618254, + "hitstop": 619387 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "fepG", + "hitorientation": "-", + "hitstart": 620196, + "hitstop": 621188 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "fes", + "hitorientation": "+", + "hitstart": 612737, + "hitstop": 613939 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "fimA", + "hitorientation": "+", + "hitstart": 4543115, + "hitstop": 4543663 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "fimB", + "hitorientation": "+", + "hitstart": 4540957, + "hitstop": 4541559 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "fimC", + "hitorientation": "+", + "hitstart": 4544355, + "hitstop": 4545029 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "fimD", + "hitorientation": "-", + "hitstart": 1588853, + "hitstop": 1590079 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "fimD", + "hitorientation": "+", + "hitstart": 4545096, + "hitstop": 4547732 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "fimE", + "hitorientation": "+", + "hitstart": 4542037, + "hitstop": 4542633 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "fimF", + "hitorientation": "-", + "hitstart": 1588309, + "hitstop": 1588839 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "fimF", + "hitorientation": "+", + "hitstart": 4547742, + "hitstop": 4548272 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "fimG", + "hitorientation": "-", + "hitstart": 1587793, + "hitstop": 1588296 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "fimG", + "hitorientation": "+", + "hitstart": 4548285, + "hitstop": 4548788 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "fimH", + "hitorientation": "+", + "hitstart": 4548808, + "hitstop": 4549710 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "fimI", + "hitorientation": "+", + "hitstart": 4543620, + "hitstop": 4544267 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "flgA", + "hitorientation": "-", + "hitstart": 1130204, + "hitstop": 1130863 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "flgD", + "hitorientation": "+", + "hitstart": 1131854, + "hitstop": 1132549 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "flgE", + "hitorientation": "+", + "hitstart": 1132574, + "hitstop": 1133782 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "flgF", + "hitorientation": "+", + "hitstart": 1133802, + "hitstop": 1134557 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "flgG", + "hitorientation": "+", + "hitstart": 1134729, + "hitstop": 1135511 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "flgH", + "hitorientation": "+", + "hitstart": 1135564, + "hitstop": 1136262 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "flgI", + "hitorientation": "+", + "hitstart": 1136274, + "hitstop": 1137371 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "flgJ", + "hitorientation": "+", + "hitstart": 1137371, + "hitstop": 1138312 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "flgK", + "hitorientation": "+", + "hitstart": 1138378, + "hitstop": 1140021 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "flgL", + "hitorientation": "+", + "hitstart": 1140033, + "hitstop": 1140986 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "flhA", + "hitorientation": "-", + "hitstart": 1962974, + "hitstop": 1965050 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "flhB", + "hitorientation": "-", + "hitstart": 1965043, + "hitstop": 1966191 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "flhC", + "hitorientation": "-", + "hitstart": 1977266, + "hitstop": 1977844 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "fliA", + "hitorientation": "-", + "hitstart": 2001070, + "hitstop": 2001789 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "fliD", + "hitorientation": "+", + "hitstart": 2003872, + "hitstop": 2005278 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "fliF", + "hitorientation": "+", + "hitstart": 2013229, + "hitstop": 2014887 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "fliG", + "hitorientation": "+", + "hitstart": 2014880, + "hitstop": 2015875 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "fliH", + "hitorientation": "+", + "hitstart": 2015868, + "hitstop": 2016554 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "fliI", + "hitorientation": "+", + "hitstart": 2016554, + "hitstop": 2017927 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "fliK", + "hitorientation": "+", + "hitstart": 2018386, + "hitstop": 2019513 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "fliM", + "hitorientation": "+", + "hitstart": 2020087, + "hitstop": 2021091 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "fliP", + "hitorientation": "+", + "hitstart": 2021869, + "hitstop": 2022606 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "fliR", + "hitorientation": "+", + "hitstart": 2022893, + "hitstop": 2023678 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "fliY", + "hitorientation": "-", + "hitstart": 1999585, + "hitstop": 2000385 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "fliZ", + "hitorientation": "-", + "hitstart": 2000473, + "hitstop": 2001060 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "flk", + "hitorientation": "+", + "hitstart": 2437950, + "hitstop": 2438945 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "gadX", + "hitorientation": "-", + "hitstart": 3664986, + "hitstop": 3665618 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "gspC", + "hitorientation": "-", + "hitstart": 3112091, + "hitstop": 3113049 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "gspL", + "hitorientation": "-", + "hitstart": 3111128, + "hitstop": 3112092 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "gspo", + "hitorientation": "+", + "hitstart": 3465543, + "hitstop": 3466220 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "hcp", + "hitorientation": "-", + "hitstart": 115714, + "hitstop": 117099 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "hlye", + "hitorientation": "-", + "hitstart": 1229483, + "hitstop": 1230538 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "hofq", + "hitorientation": "-", + "hitstart": 3519465, + "hitstop": 3520703 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "ibeB", + "hitorientation": "+", + "hitstart": 595600, + "hitstop": 596981 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "ibeC", + "hitorientation": "-", + "hitstart": 4148532, + "hitstop": 4150309 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "motA", + "hitorientation": "-", + "hitstart": 1976252, + "hitstop": 1977139 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "motB", + "hitorientation": "-", + "hitstart": 1975329, + "hitstop": 1976255 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "nada", + "hitorientation": "+", + "hitstart": 782085, + "hitstop": 783128 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "nadb", + "hitorientation": "+", + "hitstart": 2710420, + "hitstop": 2712042 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "ompA", + "hitorientation": "-", + "hitstart": 1019013, + "hitstop": 1020053 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "ompt", + "hitorientation": "-", + "hitstart": 584680, + "hitstop": 585633 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "ppdb", + "hitorientation": "-", + "hitstart": 2963153, + "hitstop": 2963716 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "tar/cheM", + "hitorientation": "-", + "hitstart": 1971030, + "hitstop": 1972691 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "upaC", + "hitorientation": "+", + "hitstart": 392973, + "hitstop": 394418 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "ycbF", + "hitorientation": "+", + "hitstart": 1003920, + "hitstop": 1004657 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "ycbQ", + "hitorientation": "+", + "hitstart": 997859, + "hitstop": 998407 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "ycbR", + "hitorientation": "+", + "hitstart": 998490, + "hitstop": 999191 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "ycbS", + "hitorientation": "+", + "hitstart": 999216, + "hitstop": 1001816 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "ycbT", + "hitorientation": "+", + "hitstart": 1001807, + "hitstop": 1002784 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "ycbV", + "hitorientation": "+", + "hitstart": 1003391, + "hitstop": 1003954 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "ycfz", + "hitorientation": "-", + "hitstart": 1180479, + "hitstop": 1181267 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "ygeH", + "hitorientation": "+", + "hitstart": 2992094, + "hitstop": 2993470 + }, + { + "analysis": "Virulence Factors", + "contigid": "U00096.3", + "filename": "GCA_000005845.2_ASM584v2_genomic.fna", + "hitcutoff": 90, + "hitname": "yggr", + "hitorientation": "-", + "hitstart": 3094100, + "hitstop": 3095080 + } +] diff --git a/app/tests/test_models.py b/app/tests/test_models.py index 3b039697..87a14fa7 100644 --- a/app/tests/test_models.py +++ b/app/tests/test_models.py @@ -1,10 +1,20 @@ from middleware.models import SubtypingRow, SubtypingResult, Pipeline, Job from modules.spfy import spfy from scripts.savvy import savvy -from tests.constants import BEAUTIFY_VF_SEROTYPE, ARGS_DICT +from tests.constants import BEAUTIFY_VF_SEROTYPE, BEAUTIFY_SEROTYPE, BEAUTIFY_VF, ARGS_DICT +class MockRQJob(): + """ + A mock Job class returned by RQ. Also emulates response the Job gets from + querying Redis DB. + """ + def __init__(self, is_finished=True, is_failed=False, exc_info='', result=None): + self.is_finished = is_finished + self.is_failed = is_failed + self.exc_info = exc_info + self.result = result -def test_subtyping_model_direct(): +def test_subtyping_model_direct(l=BEAUTIFY_VF_SEROTYPE): """ Use our dataset to directly create a subtyping results model and validate it. """ @@ -19,11 +29,13 @@ def test_subtyping_model_direct(): hitstart=str(d['hitstart']), hitstop=str(d['hitstop']) ) - for d in BEAUTIFY_VF_SEROTYPE] + for d in l] subtyping_result = SubtypingResult( rows = subtyping_list ) subtyping_result.validate() + # Return for incorporation into later tests. + return subtyping_result def test_pipeline_model(): """ @@ -33,19 +45,52 @@ def test_pipeline_model(): func = spfy, options = ARGS_DICT ) + mock_serotype = MockRQJob( + result = test_subtyping_model_direct(BEAUTIFY_SEROTYPE) + ) + mock_vf = MockRQJob( + result = test_subtyping_model_direct(BEAUTIFY_VF) + ) + # Flags should exclude the result from conversion to json. p.jobs.update({ 'job_ectyper_vf': Job( - rq_job='SHOULDBEANACTUALJOB', + rq_job="Should throw an error if read.", name='job_ectyper_vf', transitory=True, backlog=False, display=False ) }) + # Mimicks a Serotype result that will be converted to json. + p.jobs.update({ + 'job_ectyper_beautify_serotype': Job( + rq_job=mock_serotype, + name='job_ectyper_beautify_vf', + transitory=False, + backlog=False, + display=True + ) + }) + # Mimicks a VF result that will be converted to json. + p.jobs.update({ + 'job_ectyper_beautify_vf': Job( + rq_job=mock_vf, + name='job_ectyper_beautify_vf', + transitory=False, + backlog=False, + display=True + ) + }) assert isinstance(p, Pipeline) assert isinstance(p.jobs, dict) assert isinstance(p.jobs['job_ectyper_vf'], Job) + # Test Pipeline.complete(), should be True. + assert p.complete() + + # Test Pipeline.to_json(). + json = p.to_json() + assert isinstance(json, list) def test_pipeline_model_signature(): """ From afbfed63c202eb9725ce9eeb0b86e22a1ff57787 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Sat, 17 Feb 2018 22:16:42 -0500 Subject: [PATCH 040/122] FIX: tests --- app/middleware/models.py | 2 ++ app/tests/test_models.py | 13 ++----------- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/app/middleware/models.py b/app/middleware/models.py index 8b11b3fe..361364b7 100644 --- a/app/middleware/models.py +++ b/app/middleware/models.py @@ -77,6 +77,8 @@ def complete(self): Check if all jobs are completed """ for j in self.jobs.itervalues(): + # Type check. + assert isinstance(j, Job) rq_job = j.rq_job if j.backlog: # Some backlog job, we don't care (though Sentry will catch it). diff --git a/app/tests/test_models.py b/app/tests/test_models.py index 87a14fa7..8efec140 100644 --- a/app/tests/test_models.py +++ b/app/tests/test_models.py @@ -51,16 +51,6 @@ def test_pipeline_model(): mock_vf = MockRQJob( result = test_subtyping_model_direct(BEAUTIFY_VF) ) - # Flags should exclude the result from conversion to json. - p.jobs.update({ - 'job_ectyper_vf': Job( - rq_job="Should throw an error if read.", - name='job_ectyper_vf', - transitory=True, - backlog=False, - display=False - ) - }) # Mimicks a Serotype result that will be converted to json. p.jobs.update({ 'job_ectyper_beautify_serotype': Job( @@ -83,7 +73,8 @@ def test_pipeline_model(): }) assert isinstance(p, Pipeline) assert isinstance(p.jobs, dict) - assert isinstance(p.jobs['job_ectyper_vf'], Job) + for k in p.jobs: + assert isinstance(p.jobs[k], Job) # Test Pipeline.complete(), should be True. assert p.complete() From 850bad7f99132d20265847c391a2222cd61e46cd Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Sat, 17 Feb 2018 23:33:00 -0500 Subject: [PATCH 041/122] ADD: models for phylotyper --- app/middleware/models.py | 11 + app/tests/constants.py | 885 +++++++++++++++++++++++++++++++++++++++ app/tests/test_models.py | 122 +++++- 3 files changed, 1014 insertions(+), 4 deletions(-) diff --git a/app/middleware/models.py b/app/middleware/models.py index 361364b7..b386970c 100644 --- a/app/middleware/models.py +++ b/app/middleware/models.py @@ -39,6 +39,17 @@ class SubtypingRow(models.Base): class SubtypingResult(models.Base): rows = fields.ListField([SubtypingRow], nullable=True) +class PhylotyperRow(models.Base): + contig = fields.StringField(nullable=True) + genome = fields.StringField() + probability = fields.StringField(nullable=True) # actually float + start = fields.StringField(nullable=True) # actually int + stop = fields.StringField(nullable=True) # actually int + subtype = fields.StringField() + subtype_gene = fields.StringField(nullable=True) + +class PhylotyperResult(models.Base): + rows = fields.ListField([PhylotyperRow], nullable=True) class Job(): def __init__(self, rq_job, name="", transitory=True, backlog=True, display=False): diff --git a/app/tests/constants.py b/app/tests/constants.py index 63af007d..5a761013 100644 --- a/app/tests/constants.py +++ b/app/tests/constants.py @@ -2430,3 +2430,888 @@ "hitstop": 3095080 } ] + +BEAUTIFY_AMR = [ + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000001.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Strict", + "hitname": "Escherichia coli gyrA conferring resistance to fluoroquinolones", + "hitorientation": "+", + "hitstart": 159252, + "hitstop": 161879 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000001.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Strict", + "hitname": "PmrE", + "hitorientation": "+", + "hitstart": 388190, + "hitstop": 389356 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000001.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Perfect", + "hitname": "PmrF", + "hitorientation": "-", + "hitstart": 134984, + "hitstop": 135952 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000001.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Strict", + "hitname": "arnA", + "hitorientation": "-", + "hitstart": 133002, + "hitstop": 134984 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000001.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Perfect", + "hitname": "baeR", + "hitorientation": "-", + "hitstart": 323408, + "hitstop": 324130 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000001.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Perfect", + "hitname": "baeS", + "hitorientation": "-", + "hitstart": 324127, + "hitstop": 325530 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000001.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Perfect", + "hitname": "mdtB", + "hitorientation": "-", + "hitstart": 330021, + "hitstop": 333143 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000001.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Strict", + "hitname": "mdtD", + "hitorientation": "-", + "hitstart": 325527, + "hitstop": 326942 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000001.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Strict", + "hitname": "mexN", + "hitorientation": "-", + "hitstart": 326943, + "hitstop": 330020 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000003.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Strict", + "hitname": "PmrA", + "hitorientation": "+", + "hitstart": 28893, + "hitstop": 29561 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000003.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Strict", + "hitname": "PmrB", + "hitorientation": "+", + "hitstart": 29562, + "hitstop": 30662 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000003.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Strict", + "hitname": "PmrC", + "hitorientation": "+", + "hitstart": 27253, + "hitstop": 28896 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000003.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Strict", + "hitname": "mdtN", + "hitorientation": "+", + "hitstart": 58230, + "hitstop": 59261 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000003.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Strict", + "hitname": "mdtO", + "hitorientation": "+", + "hitstart": 59261, + "hitstop": 61312 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000003.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Strict", + "hitname": "mdtP", + "hitorientation": "+", + "hitstart": 61309, + "hitstop": 62775 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000004.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Strict", + "hitname": "mdtK", + "hitorientation": "+", + "hitstart": 126030, + "hitstop": 127403 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000005.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Strict", + "hitname": "ACT-7", + "hitorientation": "-", + "hitstart": 4604, + "hitstop": 5737 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000005.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Strict", + "hitname": "mdtM", + "hitorientation": "-", + "hitstart": 187550, + "hitstop": 188782 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000005.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Strict", + "hitname": "robA", + "hitorientation": "-", + "hitstart": 251658, + "hitstop": 252527 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000006.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Strict", + "hitname": "CRP", + "hitorientation": "-", + "hitstart": 176803, + "hitstop": 177435 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000006.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Strict", + "hitname": "gadX", + "hitorientation": "+", + "hitstart": 397, + "hitstop": 1221 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000006.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Strict", + "hitname": "mdtE", + "hitorientation": "-", + "hitstart": 5818, + "hitstop": 6975 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000006.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Strict", + "hitname": "mexD", + "hitorientation": "-", + "hitstart": 2680, + "hitstop": 5793 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000007.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Perfect", + "hitname": "H-NS", + "hitorientation": "-", + "hitstart": 187722, + "hitstop": 188135 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000007.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Perfect", + "hitname": "mdtG", + "hitorientation": "-", + "hitstart": 25571, + "hitstop": 26797 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000007.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Strict", + "hitname": "mdtH", + "hitorientation": "-", + "hitstart": 35428, + "hitstop": 36636 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000007.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Strict", + "hitname": "phoP", + "hitorientation": "-", + "hitstart": 101156, + "hitstop": 101827 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000007.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Perfect", + "hitname": "phoQ", + "hitorientation": "-", + "hitstart": 99696, + "hitstop": 101156 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000008.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Strict", + "hitname": "emrK", + "hitorientation": "-", + "hitstart": 9140, + "hitstop": 10303 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000008.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Strict", + "hitname": "emrY", + "hitorientation": "-", + "hitstart": 7602, + "hitstop": 9140 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000008.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Perfect", + "hitname": "evgA", + "hitorientation": "+", + "hitstart": 10719, + "hitstop": 11333 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000008.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Strict", + "hitname": "evgS", + "hitorientation": "+", + "hitstart": 11338, + "hitstop": 14931 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000008.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Strict", + "hitname": "mexD", + "hitorientation": "+", + "hitstart": 104776, + "hitstop": 107889 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000009.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Perfect", + "hitname": "cpxA", + "hitorientation": "+", + "hitstart": 22429, + "hitstop": 23802 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000009.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Perfect", + "hitname": "cpxR", + "hitorientation": "+", + "hitstart": 21734, + "hitstop": 22432 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000011.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Strict", + "hitname": "Escherichia coli marR mutant resulting in antibiotic resistance", + "hitorientation": "+", + "hitstart": 51100, + "hitstop": 51534 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000011.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Perfect", + "hitname": "marA", + "hitorientation": "+", + "hitstart": 51554, + "hitstop": 51937 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000012.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Strict", + "hitname": "emrA", + "hitorientation": "-", + "hitstart": 312493, + "hitstop": 313665 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000012.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Perfect", + "hitname": "emrB", + "hitorientation": "-", + "hitstart": 310938, + "hitstop": 312476 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000012.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Strict", + "hitname": "emrR", + "hitorientation": "-", + "hitstart": 313792, + "hitstop": 314322 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000013.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Strict", + "hitname": "Staphylococcus aureus gyrB conferring resistance to aminocoumarin", + "hitorientation": "-", + "hitstart": 131568, + "hitstop": 133982 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000013.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Strict", + "hitname": "emrD", + "hitorientation": "+", + "hitstart": 107782, + "hitstop": 108966 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000013.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Perfect", + "hitname": "mdtL", + "hitorientation": "+", + "hitstart": 145479, + "hitstop": 146654 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000015.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Strict", + "hitname": "Escherichia coli parC conferring resistance to fluoroquinolone", + "hitorientation": "-", + "hitstart": 68709, + "hitstop": 70967 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000015.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Strict", + "hitname": "bacA", + "hitorientation": "-", + "hitstart": 104717, + "hitstop": 105538 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000015.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Strict", + "hitname": "tolC", + "hitorientation": "+", + "hitstart": 80879, + "hitstop": 82360 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000016.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Strict", + "hitname": "ACT-7", + "hitorientation": "+", + "hitstart": 286, + "hitstop": 1431 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000022.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Strict", + "hitname": "Mycobacterium tuberculosis rpoB mutants conferring resistance to rifampicin", + "hitorientation": "-", + "hitstart": 22720, + "hitstop": 26748 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000023.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Strict", + "hitname": "macA", + "hitorientation": "-", + "hitstart": 5642, + "hitstop": 6757 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000023.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Strict", + "hitname": "macB", + "hitorientation": "-", + "hitstart": 3699, + "hitstop": 5645 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000023.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Strict", + "hitname": "mdfA", + "hitorientation": "-", + "hitstart": 39796, + "hitstop": 41028 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000024.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Perfect", + "hitname": "CTX-M-55", + "hitorientation": "-", + "hitstart": 37702, + "hitstop": 38577 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000026.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Strict", + "hitname": "Mycobacterium tuberculosis katG mutations conferring resistance to isoniazid", + "hitorientation": "+", + "hitstart": 8536, + "hitstop": 10716 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000027.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Strict", + "hitname": "APH(3'')", + "hitorientation": "-", + "hitstart": 10215, + "hitstop": 11018 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000027.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Perfect", + "hitname": "APH(6)", + "hitorientation": "-", + "hitstart": 9379, + "hitstop": 10215 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000027.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Strict", + "hitname": "floR", + "hitorientation": "+", + "hitstart": 5030, + "hitstop": 6244 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000027.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Perfect", + "hitname": "sul2", + "hitorientation": "-", + "hitstart": 11079, + "hitstop": 11894 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000027.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Strict", + "hitname": "tetG", + "hitorientation": "-", + "hitstart": 6844, + "hitstop": 8043 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000028.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Strict", + "hitname": "emrE", + "hitorientation": "+", + "hitstart": 30648, + "hitstop": 30980 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000032.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Perfect", + "hitname": "acrE", + "hitorientation": "+", + "hitstart": 32702, + "hitstop": 33859 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000032.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Strict", + "hitname": "acrS", + "hitorientation": "-", + "hitstart": 31641, + "hitstop": 32303 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000032.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Strict", + "hitname": "mexD", + "hitorientation": "+", + "hitstart": 33871, + "hitstop": 36975 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000036.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Strict", + "hitname": "Klebsiella pneumoniae acrR mutant resulting in high level antibiotic resistance", + "hitorientation": "+", + "hitstart": 107902, + "hitstop": 108495 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000036.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Strict", + "hitname": "acrE", + "hitorientation": "-", + "hitstart": 106513, + "hitstop": 107706 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000036.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Strict", + "hitname": "mexD", + "hitorientation": "-", + "hitstart": 103341, + "hitstop": 106490 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000036.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Strict", + "hitname": "rosA", + "hitorientation": "-", + "hitstart": 125513, + "hitstop": 126733 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000036.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Strict", + "hitname": "rosB", + "hitorientation": "-", + "hitstart": 123599, + "hitstop": 125275 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000036.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Strict", + "hitname": "vanG", + "hitorientation": "-", + "hitstart": 19876, + "hitstop": 20970 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000037.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Perfect", + "hitname": "MCR-1", + "hitorientation": "+", + "hitstart": 13553, + "hitstop": 15178 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000050.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Perfect", + "hitname": "FosA3", + "hitorientation": "+", + "hitstart": 4459, + "hitstop": 4875 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000050.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Perfect", + "hitname": "mphA", + "hitorientation": "+", + "hitstart": 89, + "hitstop": 994 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000053.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Strict", + "hitname": "ErmB", + "hitorientation": "-", + "hitstart": 1455, + "hitstop": 2192 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000062.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Perfect", + "hitname": "sul1", + "hitorientation": "+", + "hitstart": 452, + "hitstop": 1291 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000064.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Perfect", + "hitname": "TEM-1", + "hitorientation": "+", + "hitstart": 3455, + "hitstop": 4315 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000080.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Perfect", + "hitname": "NDM-1", + "hitorientation": "+", + "hitstart": 724, + "hitstop": 1536 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000090.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Strict", + "hitname": "aadA11", + "hitorientation": "+", + "hitstart": 690, + "hitstop": 1535 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000090.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Strict", + "hitname": "dfrA25", + "hitorientation": "+", + "hitstart": 36, + "hitstop": 509 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000098.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Perfect", + "hitname": "catI", + "hitorientation": "-", + "hitstart": 166, + "hitstop": 825 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000101.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Perfect", + "hitname": "arr-3", + "hitorientation": "+", + "hitstart": 37, + "hitstop": 489 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000104.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Perfect", + "hitname": "rmtB", + "hitorientation": "+", + "hitstart": 43, + "hitstop": 798 + }, + { + "analysis": "Antimicrobial Resistance", + "contigid": "MOHB01000106.1", + "filename": "GCA_001891995.1_ASM189199v1_genomic.fna", + "hitcutoff": "Perfect", + "hitname": "aadA5", + "hitorientation": "-", + "hitstart": 115, + "hitstop": 903 + } +] + +BEAUTIFY_STX1 = [ + { + "contig": "lcl|ECI-2644|NODE_8_length_178521_cov_25.218_ID_15", + "genome": "ECI-2644_lcl.fasta", + "probability": 0.9561446, + "start": 174535, + "stop": 175491, + "subtype": "a", + "subtype_gene": "stx1A" + }, + { + "contig": "lcl|ECI-2644|NODE_8_length_178521_cov_25.218_ID_15", + "genome": "ECI-2644_lcl.fasta", + "probability": 0.9561446, + "start": 175501, + "stop": 175770, + "subtype": "a", + "subtype_gene": "stx1B" + }, + { + "contig": "lcl|ECI-2644|NODE_8_length_178521_cov_25.218_ID_15", + "genome": "ECI-2644_lcl.fasta", + "probability": 0.9561446, + "start": 174544, + "stop": 175491, + "subtype": "a", + "subtype_gene": "stx1A" + }, + { + "contig": "lcl|ECI-2644|NODE_8_length_178521_cov_25.218_ID_15", + "genome": "ECI-2644_lcl.fasta", + "probability": 0.9561446, + "start": 175501, + "stop": 175770, + "subtype": "a", + "subtype_gene": "stx1B" + } +] + +BEAUTIFY_STX2 = [ + { + "contig": "lcl|ECI-2644|NODE_51_length_5713_cov_24.063_ID_101", + "genome": "ECI-2644_lcl.fasta", + "probability": 0.9460619, + "start": 4390, + "stop": 5349, + "subtype": "a", + "subtype_gene": "stx2A" + }, + { + "contig": "lcl|ECI-2644|NODE_51_length_5713_cov_24.063_ID_101", + "genome": "ECI-2644_lcl.fasta", + "probability": 0.9460619, + "start": 4109, + "stop": 4378, + "subtype": "a", + "subtype_gene": "stx2B" + } +] + +BEAUTIFY_EAE = [ + { + "contig": "N/A", + "genome": "GCA_000005845.2_ASM584v2_genomic.fna", + "probability": "N/A", + "start": "N/A", + "stop": "N/A", + "subtype": "Subtype loci not found in genome", + "subtype_gene": "N/A" + } +] diff --git a/app/tests/test_models.py b/app/tests/test_models.py index 8efec140..aa0b2662 100644 --- a/app/tests/test_models.py +++ b/app/tests/test_models.py @@ -1,7 +1,21 @@ -from middleware.models import SubtypingRow, SubtypingResult, Pipeline, Job +from middleware.models import + SubtypingRow, + SubtypingResult, + PhylotyperRow, + PhylotyperResult, + Pipeline, + Job from modules.spfy import spfy from scripts.savvy import savvy -from tests.constants import BEAUTIFY_VF_SEROTYPE, BEAUTIFY_SEROTYPE, BEAUTIFY_VF, ARGS_DICT +from tests.constants import + BEAUTIFY_VF_SEROTYPE, + BEAUTIFY_SEROTYPE, + BEAUTIFY_VF, + BEAUTIFY_AMR, + BEAUTIFY_STX1, + BEAUTIFY_STX2, + BEAUTIFY_EAE, + ARGS_DICT class MockRQJob(): """ @@ -37,9 +51,31 @@ def test_subtyping_model_direct(l=BEAUTIFY_VF_SEROTYPE): # Return for incorporation into later tests. return subtyping_result -def test_pipeline_model(): +def test_phylotyper_model_direct(l=BEAUTIFY_STX1): """ - Test the Pipeline model itself. + Use our dataset to directly create a phylotyper results model and validate it. + """ + phylotyper_list = [ + PhylotyperRow( + contig=d['contig'], + genome=d['genome'], + probability=str(d['probability']), + start=str(d['start']), + stop=str(d['stop']), + subtype=d['subtype'], + subtype_gene=d['subtype_gene'] + ) + for d in l] + phylotyper_result = PhylotyperResult( + rows = phylotyper_list + ) + phylotyper_result.validate() + # Return for incorporation into later tests. + return phylotyper_result + +def test_pipeline_model_subtyping(): + """ + Test the Pipeline model itself for subtyping via ECTyper and RGI. """ p = Pipeline( func = spfy, @@ -83,6 +119,84 @@ def test_pipeline_model(): json = p.to_json() assert isinstance(json, list) + # Add an AMR job and re-test. + mock_amr = MockRQJob( + result = test_subtyping_model_direct(BEAUTIFY_AMR) + ) + p.jobs.update({ + 'job_ectyper_beautify_amr': Job( + rq_job=mock_amr, + name='job_ectyper_beautify_amr', + transitory=False, + backlog=False, + display=True + ) + }) + # Test Pipeline.to_json(). + json = p.to_json() + assert isinstance(json, list) + +def test_pipeline_model_phyotyping(): + """ + Test the Pipeline model itself for subtyping via Phylotyper. + """ + p = Pipeline( + func = spfy, + options = ARGS_DICT + ) + mock_stx1 = MockRQJob( + result = test_phylotyper_model_direct(BEAUTIFY_STX1) + ) + mock_stx2 = MockRQJob( + result = test_phylotyper_model_direct(BEAUTIFY_STX2) + ) + p.jobs.update({ + 'job_phylotyper_beautify_stx1': Job( + rq_job=mock_stx1, + name='job_phylotyper_beautify_stx1', + transitory=False, + backlog=False, + display=True + ) + }) + p.jobs.update({ + 'job_phylotyper_beautify_stx2': Job( + rq_job=mock_stx2, + name='job_phylotyper_beautify_stx2', + transitory=False, + backlog=False, + display=True + ) + }) + assert isinstance(p, Pipeline) + assert isinstance(p.jobs, dict) + for k in p.jobs: + assert isinstance(p.jobs[k], Job) + + # Test Pipeline.complete(), should be True. + assert p.complete() + + # Test Pipeline.to_json(). + json = p.to_json() + assert isinstance(json, list) + + # Add an AMR job and re-test. + mock_eae = MockRQJob( + result = test_phylotyper_model_direct(BEAUTIFY_EAE) + ) + p.jobs.update({ + 'job_phylotyper_beautify_eae': Job( + rq_job=mock_eae, + name='job_phylotyper_beautify_stx2', + transitory=False, + backlog=False, + display=True + ) + }) + # Test Pipeline.to_json(). + json = p.to_json() + assert isinstance(json, list) + def test_pipeline_model_signature(): """ Function signatures should be identical if called on the same function. From d9f2bdc055e226120441cd629000fa5f37b0b652 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Sat, 17 Feb 2018 23:38:41 -0500 Subject: [PATCH 042/122] FIX: conversion should actually work on any subclasses of models.Base --- app/middleware/models.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/app/middleware/models.py b/app/middleware/models.py index b386970c..3fd71da1 100644 --- a/app/middleware/models.py +++ b/app/middleware/models.py @@ -5,7 +5,7 @@ from jsonmodels import models, fields from middleware.graphers.turtle_utils import actual_filename -def _convert_subtyping(model): +def _convert_model(model): # Convert the model to a generic JSON structure. struct = model.to_struct() # This is not strictly json; more like a list than a dict structure. @@ -19,8 +19,8 @@ def model_to_json(model): # Validate the model submitted before processing. model.validate() # Conversion. - if isinstance(model, SubtypingResult): - return _convert_subtyping(model) + if issubclass(model, models.Base)(): + return _convert_model(model) else: raise Exception('model_to_json() called for a model without a handler.') From d53189deca9145394706967cea3b1662a3de2257 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Sun, 18 Feb 2018 00:22:47 -0500 Subject: [PATCH 043/122] FIX: imports for tests --- app/middleware/models.py | 9 ++-- app/tests/test_models.py | 92 +++++++++++++++++----------------------- 2 files changed, 45 insertions(+), 56 deletions(-) diff --git a/app/middleware/models.py b/app/middleware/models.py index 3fd71da1..cb5d632b 100644 --- a/app/middleware/models.py +++ b/app/middleware/models.py @@ -8,9 +8,12 @@ def _convert_model(model): # Convert the model to a generic JSON structure. struct = model.to_struct() - # This is not strictly json; more like a list than a dict structure. - rows_list = struct['rows'] - return rows_list + if 'rows' in struct: + # This is not strictly json; more like a list than a dict structure. + rows_list = struct['rows'] + return rows_list + else: + return struct def model_to_json(model): """ diff --git a/app/tests/test_models.py b/app/tests/test_models.py index aa0b2662..d37aa0df 100644 --- a/app/tests/test_models.py +++ b/app/tests/test_models.py @@ -1,21 +1,7 @@ -from middleware.models import - SubtypingRow, - SubtypingResult, - PhylotyperRow, - PhylotyperResult, - Pipeline, - Job +from middleware import models from modules.spfy import spfy from scripts.savvy import savvy -from tests.constants import - BEAUTIFY_VF_SEROTYPE, - BEAUTIFY_SEROTYPE, - BEAUTIFY_VF, - BEAUTIFY_AMR, - BEAUTIFY_STX1, - BEAUTIFY_STX2, - BEAUTIFY_EAE, - ARGS_DICT +from tests import constants class MockRQJob(): """ @@ -28,12 +14,12 @@ def __init__(self, is_finished=True, is_failed=False, exc_info='', result=None): self.exc_info = exc_info self.result = result -def test_subtyping_model_direct(l=BEAUTIFY_VF_SEROTYPE): +def test_subtyping_model_direct(l=constants.BEAUTIFY_VF_SEROTYPE): """ Use our dataset to directly create a subtyping results model and validate it. """ subtyping_list = [ - SubtypingRow( + models.SubtypingRow( analysis=d['analysis'], contigid=d['contigid'], filename=d['filename'], @@ -44,19 +30,19 @@ def test_subtyping_model_direct(l=BEAUTIFY_VF_SEROTYPE): hitstop=str(d['hitstop']) ) for d in l] - subtyping_result = SubtypingResult( + subtyping_result = models.SubtypingResult( rows = subtyping_list ) subtyping_result.validate() # Return for incorporation into later tests. return subtyping_result -def test_phylotyper_model_direct(l=BEAUTIFY_STX1): +def test_phylotyper_model_direct(l=constants.BEAUTIFY_STX1): """ Use our dataset to directly create a phylotyper results model and validate it. """ phylotyper_list = [ - PhylotyperRow( + models.PhylotyperRow( contig=d['contig'], genome=d['genome'], probability=str(d['probability']), @@ -66,7 +52,7 @@ def test_phylotyper_model_direct(l=BEAUTIFY_STX1): subtype_gene=d['subtype_gene'] ) for d in l] - phylotyper_result = PhylotyperResult( + phylotyper_result = models.PhylotyperResult( rows = phylotyper_list ) phylotyper_result.validate() @@ -77,19 +63,19 @@ def test_pipeline_model_subtyping(): """ Test the Pipeline model itself for subtyping via ECTyper and RGI. """ - p = Pipeline( + p = models.Pipeline( func = spfy, - options = ARGS_DICT + options = constants.ARGS_DICT ) mock_serotype = MockRQJob( - result = test_subtyping_model_direct(BEAUTIFY_SEROTYPE) + result = test_subtyping_model_direct(constants.BEAUTIFY_SEROTYPE) ) mock_vf = MockRQJob( - result = test_subtyping_model_direct(BEAUTIFY_VF) + result = test_subtyping_model_direct(constants.BEAUTIFY_VF) ) # Mimicks a Serotype result that will be converted to json. p.jobs.update({ - 'job_ectyper_beautify_serotype': Job( + 'job_ectyper_beautify_serotype': models.Job( rq_job=mock_serotype, name='job_ectyper_beautify_vf', transitory=False, @@ -99,7 +85,7 @@ def test_pipeline_model_subtyping(): }) # Mimicks a VF result that will be converted to json. p.jobs.update({ - 'job_ectyper_beautify_vf': Job( + 'job_ectyper_beautify_vf': models.Job( rq_job=mock_vf, name='job_ectyper_beautify_vf', transitory=False, @@ -107,10 +93,10 @@ def test_pipeline_model_subtyping(): display=True ) }) - assert isinstance(p, Pipeline) + assert isinstance(p, models.Pipeline) assert isinstance(p.jobs, dict) for k in p.jobs: - assert isinstance(p.jobs[k], Job) + assert isinstance(p.jobs[k], models.Job) # Test Pipeline.complete(), should be True. assert p.complete() @@ -121,10 +107,10 @@ def test_pipeline_model_subtyping(): # Add an AMR job and re-test. mock_amr = MockRQJob( - result = test_subtyping_model_direct(BEAUTIFY_AMR) + result = test_subtyping_model_direct(constants.BEAUTIFY_AMR) ) p.jobs.update({ - 'job_ectyper_beautify_amr': Job( + 'job_ectyper_beautify_amr': models.Job( rq_job=mock_amr, name='job_ectyper_beautify_amr', transitory=False, @@ -140,18 +126,18 @@ def test_pipeline_model_phyotyping(): """ Test the Pipeline model itself for subtyping via Phylotyper. """ - p = Pipeline( + p = models.Pipeline( func = spfy, - options = ARGS_DICT + options = constants.ARGS_DICT ) mock_stx1 = MockRQJob( - result = test_phylotyper_model_direct(BEAUTIFY_STX1) + result = test_phylotyper_model_direct(constants.BEAUTIFY_STX1) ) mock_stx2 = MockRQJob( - result = test_phylotyper_model_direct(BEAUTIFY_STX2) + result = test_phylotyper_model_direct(constants.BEAUTIFY_STX2) ) p.jobs.update({ - 'job_phylotyper_beautify_stx1': Job( + 'job_phylotyper_beautify_stx1': models.Job( rq_job=mock_stx1, name='job_phylotyper_beautify_stx1', transitory=False, @@ -160,7 +146,7 @@ def test_pipeline_model_phyotyping(): ) }) p.jobs.update({ - 'job_phylotyper_beautify_stx2': Job( + 'job_phylotyper_beautify_stx2': models.Job( rq_job=mock_stx2, name='job_phylotyper_beautify_stx2', transitory=False, @@ -168,10 +154,10 @@ def test_pipeline_model_phyotyping(): display=True ) }) - assert isinstance(p, Pipeline) + assert isinstance(p, models.Pipeline) assert isinstance(p.jobs, dict) for k in p.jobs: - assert isinstance(p.jobs[k], Job) + assert isinstance(p.jobs[k], models.Job) # Test Pipeline.complete(), should be True. assert p.complete() @@ -182,10 +168,10 @@ def test_pipeline_model_phyotyping(): # Add an AMR job and re-test. mock_eae = MockRQJob( - result = test_phylotyper_model_direct(BEAUTIFY_EAE) + result = test_phylotyper_model_direct(constants.BEAUTIFY_EAE) ) p.jobs.update({ - 'job_phylotyper_beautify_eae': Job( + 'job_phylotyper_beautify_eae': models.Job( rq_job=mock_eae, name='job_phylotyper_beautify_stx2', transitory=False, @@ -201,37 +187,37 @@ def test_pipeline_model_signature(): """ Function signatures should be identical if called on the same function. """ - p1 = Pipeline( + p1 = models.Pipeline( func = spfy, - options = ARGS_DICT + options = constants.ARGS_DICT ) - p2 = Pipeline( + p2 = models.Pipeline( func = spfy, - options = ARGS_DICT + options = constants.ARGS_DICT ) r1 = p1.signature() r2 = p2.signature() # These are identical pipelines, should be equal. assert r1 == r2 - p1 = Pipeline( + p1 = models.Pipeline( func = spfy, - options = ARGS_DICT + options = constants.ARGS_DICT ) - p2 = Pipeline( + p2 = models.Pipeline( func = savvy, - options = ARGS_DICT + options = constants.ARGS_DICT ) r1 = p1.signature() r2 = p2.signature() # These pipelines have different functions, should be different. assert r1 != r2 - p1 = Pipeline( + p1 = models.Pipeline( func = spfy, - options = ARGS_DICT + options = constants.ARGS_DICT ) - p2 = Pipeline( + p2 = models.Pipeline( func = spfy, options = {'cats':1} ) From c30dd50db0d8338af671c8aaa5b0d9903d2465c5 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Sun, 18 Feb 2018 01:13:10 -0500 Subject: [PATCH 044/122] FIX: isinstance also tests for issubclass --- app/middleware/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/middleware/models.py b/app/middleware/models.py index cb5d632b..f765a89e 100644 --- a/app/middleware/models.py +++ b/app/middleware/models.py @@ -22,7 +22,7 @@ def model_to_json(model): # Validate the model submitted before processing. model.validate() # Conversion. - if issubclass(model, models.Base)(): + if isinstance(model, models.Base): return _convert_model(model) else: raise Exception('model_to_json() called for a model without a handler.') From 892a7ebc99bb313c24f5abcdbe651392b69b19cf Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Sun, 18 Feb 2018 10:22:53 -0500 Subject: [PATCH 045/122] ADD: methods for handling multiple files --- app/middleware/models.py | 30 +++++++++++++++++++++++++++--- app/modules/spfy.py | 2 +- app/tests/test_models.py | 14 ++++++++++++++ 3 files changed, 42 insertions(+), 4 deletions(-) diff --git a/app/middleware/models.py b/app/middleware/models.py index f765a89e..25ac087f 100644 --- a/app/middleware/models.py +++ b/app/middleware/models.py @@ -1,4 +1,5 @@ import sys +import copy from hashlib import sha1 from dis import dis from StringIO import StringIO @@ -79,18 +80,41 @@ def __init__(self, jobs=None, files=None, func=None, options=None): files = [] if not options: options = {} - self.jobs = {} # {'somename': instance of RQ.Job} + self.jobs = {} # {'somename': instance of RQ.Job} Only used when enqueing. + self.final_jobs = [] # Jobs for every file in the request. + self.cache = {} # For temporary storage of RQ.Jobs. self.sig = None # Signtaure isn't generated until necessary # TODO: incorporate below into the pipeline. self.files = [] self.func = func # Additional attribute for storing pipeline function. self.options = options + def cache_jobs(self): + """ + Copy current jobs to cache. + """ + self.cache += [copy.deepcopy(self.jobs)] + self.jobs = {} + + def merge_jobs(self): + """ + + """ + # If the jobs dictionary is not empty. + if self.jobs: + self.cache_jobs() + # Actual merge. Notice were converting to list. + self.final_jobs = [ + item + for d in self.cache + for item in d + ] + def complete(self): """ Check if all jobs are completed """ - for j in self.jobs.itervalues(): + for j in self.final_jobs: # Type check. assert isinstance(j, Job) rq_job = j.rq_job @@ -113,7 +137,7 @@ def to_json(self): """ # Gather all the jobs that have finished and haven't failed. completed_jobs = [ - j.rq_job for j in self.jobs.itervalues() + j.rq_job for j in self.final_jobs if j.display and j.rq_job.is_finished and not j.rq_job.is_failed ] # Merge the json lists together. diff --git a/app/modules/spfy.py b/app/modules/spfy.py index 9fa2c4fa..8fac574c 100644 --- a/app/modules/spfy.py +++ b/app/modules/spfy.py @@ -455,7 +455,7 @@ def blob_savvy(args_dict, pipeline): ) ) else: - d.update(blob_savvy_enqueue(args_dict)) + d.update(blob_savvy_enqueue(args_dict, pipeline)) return d diff --git a/app/tests/test_models.py b/app/tests/test_models.py index d37aa0df..271fe565 100644 --- a/app/tests/test_models.py +++ b/app/tests/test_models.py @@ -98,6 +98,10 @@ def test_pipeline_model_subtyping(): for k in p.jobs: assert isinstance(p.jobs[k], models.Job) + # Test Pipeline.cache_jobs() + p.cache_jobs() + # Test Pipeline.merge_jobs() + p.merge_jobs() # Test Pipeline.complete(), should be True. assert p.complete() @@ -118,6 +122,9 @@ def test_pipeline_model_subtyping(): display=True ) }) + p.merge_jobs() + # Test Pipeline.complete(), should be True. + assert p.complete() # Test Pipeline.to_json(). json = p.to_json() assert isinstance(json, list) @@ -159,6 +166,10 @@ def test_pipeline_model_phyotyping(): for k in p.jobs: assert isinstance(p.jobs[k], models.Job) + # Test Pipeline.cache_jobs() + p.cache_jobs() + # Test Pipeline.merge_jobs() + p.merge_jobs() # Test Pipeline.complete(), should be True. assert p.complete() @@ -179,6 +190,9 @@ def test_pipeline_model_phyotyping(): display=True ) }) + p.merge_jobs() + # Test Pipeline.complete(), should be True. + assert p.complete() # Test Pipeline.to_json(). json = p.to_json() assert isinstance(json, list) From 784fbee9de16ec5340fde2963863045a4a119ea1 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Sun, 18 Feb 2018 11:22:05 -0500 Subject: [PATCH 046/122] FIX: cache should be a list --- app/middleware/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/middleware/models.py b/app/middleware/models.py index 25ac087f..1970b60f 100644 --- a/app/middleware/models.py +++ b/app/middleware/models.py @@ -82,7 +82,7 @@ def __init__(self, jobs=None, files=None, func=None, options=None): options = {} self.jobs = {} # {'somename': instance of RQ.Job} Only used when enqueing. self.final_jobs = [] # Jobs for every file in the request. - self.cache = {} # For temporary storage of RQ.Jobs. + self.cache = [] # For temporary storage of RQ.Jobs. self.sig = None # Signtaure isn't generated until necessary # TODO: incorporate below into the pipeline. self.files = [] From 421d409fd6053e128a80f0e7271493be5a4cfe69 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Sun, 18 Feb 2018 11:56:29 -0500 Subject: [PATCH 047/122] FIX: values --- app/middleware/models.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/middleware/models.py b/app/middleware/models.py index 1970b60f..156648c4 100644 --- a/app/middleware/models.py +++ b/app/middleware/models.py @@ -105,9 +105,9 @@ def merge_jobs(self): self.cache_jobs() # Actual merge. Notice were converting to list. self.final_jobs = [ - item + j for d in self.cache - for item in d + for j in d.values() ] def complete(self): From 306727308dccbf1433196259f2544d7b7bdff81f Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Sun, 18 Feb 2018 12:36:25 -0500 Subject: [PATCH 048/122] ADD: try creating a signature on __init__ --- app/middleware/models.py | 4 ++-- app/routes/ra_posts.py | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/app/middleware/models.py b/app/middleware/models.py index 156648c4..b993d5bc 100644 --- a/app/middleware/models.py +++ b/app/middleware/models.py @@ -83,11 +83,11 @@ def __init__(self, jobs=None, files=None, func=None, options=None): self.jobs = {} # {'somename': instance of RQ.Job} Only used when enqueing. self.final_jobs = [] # Jobs for every file in the request. self.cache = [] # For temporary storage of RQ.Jobs. - self.sig = None # Signtaure isn't generated until necessary - # TODO: incorporate below into the pipeline. + self.sig = None self.files = [] self.func = func # Additional attribute for storing pipeline function. self.options = options + self.signature() # Create & Store a signature for the pipeline. def cache_jobs(self): """ diff --git a/app/routes/ra_posts.py b/app/routes/ra_posts.py index ab6d9f8f..107c98a6 100644 --- a/app/routes/ra_posts.py +++ b/app/routes/ra_posts.py @@ -261,8 +261,10 @@ def upload(): pipeline = pipeline ) jobs_dict.update(jobs_enqueued) + pipeline.cache_jobs() # new in 4.2.0 print 'upload(): all files enqueued, returning...' + pipeline.merge_jobs() if groupresults: return jsonify(handle_groupresults(jobs_dict)) else: From ca2948ccc831693c2664b224e17fea93c2b348ab Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Sun, 18 Feb 2018 13:05:57 -0500 Subject: [PATCH 049/122] ADD: more tests for signatures and method to store pipeline into Redis DB --- app/middleware/models.py | 26 ++++++++++++++++++++++++++ app/routes/ra_posts.py | 3 ++- app/tests/test_models.py | 8 ++++++++ 3 files changed, 36 insertions(+), 1 deletion(-) diff --git a/app/middleware/models.py b/app/middleware/models.py index b993d5bc..8e62584f 100644 --- a/app/middleware/models.py +++ b/app/middleware/models.py @@ -1,5 +1,7 @@ import sys import copy +import config +import redis from hashlib import sha1 from dis import dis from StringIO import StringIO @@ -193,3 +195,27 @@ def signature(self): sig = hx.hexdigest() self.sig = sig return sig + + def store(self): + """ + Stores the pipeline to Redis DB and creates a pipeline id for return. + :param pipeline: An instance of the models.Pipeline class. + :return: (dict): {"pipeline..." id: "Subtyping"} + """ + pipeline_id = "pipeline{0}".format(self.sig) + + # Start a Redis connection. + redis_url = config['REDIS_URL'] + redis_connection = redis.from_url(redis_url) + + # Store the pipeline instance. + redis_connection.set(pipeline_id, self) + + # Create a similar structure to the old return + d = {} + d[pipeline_id] = {} + d[pipeline_id]['analysis'] = "Subtyping" + + d[pipeline_id]['file'] = self.files + print '_store_pipeline(): finished' + return d diff --git a/app/routes/ra_posts.py b/app/routes/ra_posts.py index 107c98a6..7d132fee 100644 --- a/app/routes/ra_posts.py +++ b/app/routes/ra_posts.py @@ -266,7 +266,8 @@ def upload(): print 'upload(): all files enqueued, returning...' pipeline.merge_jobs() if groupresults: - return jsonify(handle_groupresults(jobs_dict)) + return jsonify(pipeline.store()) + # return jsonify(handle_groupresults(jobs_dict)) else: return jsonify(handle_singleton(jobs_dict)) else: diff --git a/app/tests/test_models.py b/app/tests/test_models.py index 271fe565..77b18946 100644 --- a/app/tests/test_models.py +++ b/app/tests/test_models.py @@ -209,11 +209,19 @@ def test_pipeline_model_signature(): func = spfy, options = constants.ARGS_DICT ) + # Signatures should be generated on init. + assert p1.sig == p2.sig + + # Call the signature method to re-generate. r1 = p1.signature() r2 = p2.signature() # These are identical pipelines, should be equal. assert r1 == r2 + # Both methods of signature generation should be the same. + assert p1.sig == r1 + assert p2.sig == r2 + p1 = models.Pipeline( func = spfy, options = constants.ARGS_DICT From 8fe03e32ad5271e9f18eccec9fd5960756eed57c Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Sun, 18 Feb 2018 13:43:09 -0500 Subject: [PATCH 050/122] STOP: 1st draft of full circle --- app/middleware/models.py | 5 +++-- app/routes/ra_statuses.py | 24 ++++++++++++++++++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/app/middleware/models.py b/app/middleware/models.py index 8e62584f..2ee99dcc 100644 --- a/app/middleware/models.py +++ b/app/middleware/models.py @@ -2,6 +2,7 @@ import copy import config import redis +import cPickle as pickle from hashlib import sha1 from dis import dis from StringIO import StringIO @@ -198,7 +199,7 @@ def signature(self): def store(self): """ - Stores the pipeline to Redis DB and creates a pipeline id for return. + Stores the pipeline (via Pickle) to Redis DB and creates a pipeline id for return. :param pipeline: An instance of the models.Pipeline class. :return: (dict): {"pipeline..." id: "Subtyping"} """ @@ -209,7 +210,7 @@ def store(self): redis_connection = redis.from_url(redis_url) # Store the pipeline instance. - redis_connection.set(pipeline_id, self) + redis_connection.set(pipeline_id, pickle.dumps(self)) # Create a similar structure to the old return d = {} diff --git a/app/routes/ra_statuses.py b/app/routes/ra_statuses.py index 3d32cd6d..bf84dd2c 100644 --- a/app/routes/ra_statuses.py +++ b/app/routes/ra_statuses.py @@ -1,4 +1,5 @@ import redis +import cPickle as pickle from ast import literal_eval from flask import Blueprint, request, jsonify, current_app from routes.job_utils import fetch_job @@ -60,6 +61,27 @@ def job_status_reactapp_grouped(job_id, redis_connection): # if you've gotten to this point, then all jobs are finished return jsonify(merge_job_results(jobs_dict, redis_connection)) +def _status_pipeline(pipeline_id, redis_connection): + """ + Checks the status of a pipeline. Returns "pending", the exc_info if failed, or the result. + :param pipeline_id: + :param redis_connection: + :return: + """ + # Retrieve the models.Pipeline instance. + pipeline = pickle.loads(redis_connection.get(pipeline_id)) + complete = pipeline.complete() # Normally bool, but str if failed. + if isinstance(complete, bool): + if complete: + # Everything finished successfully. + return pipeline.to_json() + else: + # Some job in the pipeline is still pending. + return jsonify("pending") + else: + # Something failed and we have an exc_info. + return jsonify(complete) + @bp_ra_statuses.route('/api/v0/results/') def job_status_reactapp(job_id): ''' @@ -72,6 +94,8 @@ def job_status_reactapp(job_id): # check if the job_id is of the new format and should be handled diff if job_id.startswith('blob'): return job_status_reactapp_grouped(job_id, redis_connection) + elif job_id.startswith('pipeline'): + return _status_pipeline(job_id, redis_connection) else: # old code job = fetch_job(job_id, redis_connection) From 5cfd6cde9364a40ededef5d40ac7ef3748f9a20f Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Sun, 18 Feb 2018 14:29:14 -0500 Subject: [PATCH 051/122] FIX: access the rq_job --- app/modules/spfy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/modules/spfy.py b/app/modules/spfy.py index 8fac574c..8d8b7421 100644 --- a/app/modules/spfy.py +++ b/app/modules/spfy.py @@ -364,7 +364,7 @@ def phylotyper_pipeline(multiples, subtype): subtype, tsvfile, id_file=query_file + '_id.txt', - depends_on=pipeline.jobs['job_ectyper_datastruct_vf']) + depends_on=pipeline.jobs['job_ectyper_datastruct_vf'].rq_job) job_pt_dict = multiples.enqueue( phylotyper.to_dict, tsvfile, subtype, picklefile, depends_on=job_pt) From 71f20df8643da5feb353309a8d6e1e0c3990221a Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Sun, 18 Feb 2018 14:43:41 -0500 Subject: [PATCH 052/122] DEBUG: try without the depcopy --- app/middleware/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/middleware/models.py b/app/middleware/models.py index 2ee99dcc..3f9e1392 100644 --- a/app/middleware/models.py +++ b/app/middleware/models.py @@ -96,7 +96,7 @@ def cache_jobs(self): """ Copy current jobs to cache. """ - self.cache += [copy.deepcopy(self.jobs)] + self.cache += [self.jobs] self.jobs = {} def merge_jobs(self): From 538573bccfbb37b6f47f68d5459346457015e09a Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Sun, 18 Feb 2018 16:05:03 -0500 Subject: [PATCH 053/122] FIX: namespace for config --- app/middleware/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/middleware/models.py b/app/middleware/models.py index 3f9e1392..425fdb6d 100644 --- a/app/middleware/models.py +++ b/app/middleware/models.py @@ -206,7 +206,7 @@ def store(self): pipeline_id = "pipeline{0}".format(self.sig) # Start a Redis connection. - redis_url = config['REDIS_URL'] + redis_url = config.REDIS_URL redis_connection = redis.from_url(redis_url) # Store the pipeline instance. From c3f8c84570cde3aac3636928dbd9aa38d7f6cc66 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Sun, 18 Feb 2018 16:29:10 -0500 Subject: [PATCH 054/122] FIX: tao of pickle --- app/middleware/models.py | 67 ++++++++++++++++++++++++--------------- app/routes/ra_posts.py | 4 +-- app/routes/ra_statuses.py | 7 ++-- 3 files changed, 48 insertions(+), 30 deletions(-) diff --git a/app/middleware/models.py b/app/middleware/models.py index 425fdb6d..245d90b9 100644 --- a/app/middleware/models.py +++ b/app/middleware/models.py @@ -31,6 +31,46 @@ def model_to_json(model): else: raise Exception('model_to_json() called for a model without a handler.') +def store(pipeline): + """ + Stores the pipeline (via Pickle) to Redis DB and creates a pipeline id for return. + :param pipeline: An instance of the models.Pipeline class. + :return: (dict): {"pipeline..." id: "Subtyping"} + """ + pipeline_id = "pipeline{0}".format(pipeline.sig) + + # Start a Redis connection. + redis_url = config.REDIS_URL + redis_connection = redis.from_url(redis_url) + + # Store the pipeline instance. + redis_connection.set(pipeline_id, pickle.dumps(pipeline)) + + # Create a similar structure to the old return + d = {} + d[pipeline_id] = {} + d[pipeline_id]['analysis'] = "Subtyping" + + d[pipeline_id]['file'] = pipeline.files + print '_store_pipeline(): finished' + return d + +def load(pipeline_id): + """ + Must load Pipeline instances with this function, as a pickle.loads() needs + access to the Pipeline class definition to correctly load it. + :param pipeline_id: + :return: + """ + # Start a Redis connection. + redis_url = config.REDIS_URL + redis_connection = redis.from_url(redis_url) + + # Get the pipeline instance. + raw = redis_connection.get(pipeline_id) + pipeline = pickle.loads(raw) + return pipeline + class SubtypingRow(models.Base): analysis = fields.StringField(required=True) @@ -58,6 +98,7 @@ class PhylotyperRow(models.Base): class PhylotyperResult(models.Base): rows = fields.ListField([PhylotyperRow], nullable=True) + class Job(): def __init__(self, rq_job, name="", transitory=True, backlog=True, display=False): """ @@ -101,7 +142,7 @@ def cache_jobs(self): def merge_jobs(self): """ - + """ # If the jobs dictionary is not empty. if self.jobs: @@ -196,27 +237,3 @@ def signature(self): sig = hx.hexdigest() self.sig = sig return sig - - def store(self): - """ - Stores the pipeline (via Pickle) to Redis DB and creates a pipeline id for return. - :param pipeline: An instance of the models.Pipeline class. - :return: (dict): {"pipeline..." id: "Subtyping"} - """ - pipeline_id = "pipeline{0}".format(self.sig) - - # Start a Redis connection. - redis_url = config.REDIS_URL - redis_connection = redis.from_url(redis_url) - - # Store the pipeline instance. - redis_connection.set(pipeline_id, pickle.dumps(self)) - - # Create a similar structure to the old return - d = {} - d[pipeline_id] = {} - d[pipeline_id]['analysis'] = "Subtyping" - - d[pipeline_id]['file'] = self.files - print '_store_pipeline(): finished' - return d diff --git a/app/routes/ra_posts.py b/app/routes/ra_posts.py index 7d132fee..241bbcc3 100644 --- a/app/routes/ra_posts.py +++ b/app/routes/ra_posts.py @@ -17,7 +17,7 @@ from modules.gc import blob_gc_enqueue from modules.spfy import spfy from middleware.api import subtyping_dependencies -from middleware.models import Pipeline +from middleware.models import Pipeline, store bp_ra_posts = Blueprint('reactapp_posts', __name__) @@ -266,7 +266,7 @@ def upload(): print 'upload(): all files enqueued, returning...' pipeline.merge_jobs() if groupresults: - return jsonify(pipeline.store()) + return jsonify(store(pipeline)) # return jsonify(handle_groupresults(jobs_dict)) else: return jsonify(handle_singleton(jobs_dict)) diff --git a/app/routes/ra_statuses.py b/app/routes/ra_statuses.py index bf84dd2c..4998fa01 100644 --- a/app/routes/ra_statuses.py +++ b/app/routes/ra_statuses.py @@ -3,6 +3,7 @@ from ast import literal_eval from flask import Blueprint, request, jsonify, current_app from routes.job_utils import fetch_job +from middleware.models import load bp_ra_statuses = Blueprint('reactapp_statuses', __name__) @@ -61,7 +62,7 @@ def job_status_reactapp_grouped(job_id, redis_connection): # if you've gotten to this point, then all jobs are finished return jsonify(merge_job_results(jobs_dict, redis_connection)) -def _status_pipeline(pipeline_id, redis_connection): +def _status_pipeline(pipeline_id): """ Checks the status of a pipeline. Returns "pending", the exc_info if failed, or the result. :param pipeline_id: @@ -69,7 +70,7 @@ def _status_pipeline(pipeline_id, redis_connection): :return: """ # Retrieve the models.Pipeline instance. - pipeline = pickle.loads(redis_connection.get(pipeline_id)) + pipeline = load(pipeline_id) complete = pipeline.complete() # Normally bool, but str if failed. if isinstance(complete, bool): if complete: @@ -95,7 +96,7 @@ def job_status_reactapp(job_id): if job_id.startswith('blob'): return job_status_reactapp_grouped(job_id, redis_connection) elif job_id.startswith('pipeline'): - return _status_pipeline(job_id, redis_connection) + return _status_pipeline(job_id) else: # old code job = fetch_job(job_id, redis_connection) From c3b2a0858fdfd9ca42f0fa3a1d889cb130bd62c3 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Sun, 18 Feb 2018 17:13:50 -0500 Subject: [PATCH 055/122] FIX?: use dill instead of cPickle --- app/middleware/models.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/app/middleware/models.py b/app/middleware/models.py index 245d90b9..18cccc3b 100644 --- a/app/middleware/models.py +++ b/app/middleware/models.py @@ -2,7 +2,7 @@ import copy import config import redis -import cPickle as pickle +import dill as pickle from hashlib import sha1 from dis import dis from StringIO import StringIO @@ -59,8 +59,8 @@ def load(pipeline_id): """ Must load Pipeline instances with this function, as a pickle.loads() needs access to the Pipeline class definition to correctly load it. - :param pipeline_id: - :return: + :param pipeline_id: + :return: """ # Start a Redis connection. redis_url = config.REDIS_URL From efc97c4a90fa8e54e6294270e5d5ced45c0a7068 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Sun, 18 Feb 2018 19:14:23 -0500 Subject: [PATCH 056/122] ADD: tests for pickling/unpickling Pipeline instances with dill --- app/tests/test_models.py | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/app/tests/test_models.py b/app/tests/test_models.py index 77b18946..6605f118 100644 --- a/app/tests/test_models.py +++ b/app/tests/test_models.py @@ -1,3 +1,4 @@ +import dill from middleware import models from modules.spfy import spfy from scripts.savvy import savvy @@ -59,19 +60,16 @@ def test_phylotyper_model_direct(l=constants.BEAUTIFY_STX1): # Return for incorporation into later tests. return phylotyper_result -def test_pipeline_model_subtyping(): - """ - Test the Pipeline model itself for subtyping via ECTyper and RGI. - """ +def _create_example_pipeline(): p = models.Pipeline( - func = spfy, - options = constants.ARGS_DICT + func=spfy, + options=constants.ARGS_DICT ) mock_serotype = MockRQJob( - result = test_subtyping_model_direct(constants.BEAUTIFY_SEROTYPE) + result=test_subtyping_model_direct(constants.BEAUTIFY_SEROTYPE) ) mock_vf = MockRQJob( - result = test_subtyping_model_direct(constants.BEAUTIFY_VF) + result=test_subtyping_model_direct(constants.BEAUTIFY_VF) ) # Mimicks a Serotype result that will be converted to json. p.jobs.update({ @@ -93,6 +91,15 @@ def test_pipeline_model_subtyping(): display=True ) }) + return p + +def test_pipeline_model_subtyping(p=None): + """ + Test the Pipeline model itself for subtyping via ECTyper and RGI. + """ + if not p: + p = _create_example_pipeline() + assert isinstance(p, models.Pipeline) assert isinstance(p.jobs, dict) for k in p.jobs: @@ -129,6 +136,15 @@ def test_pipeline_model_subtyping(): json = p.to_json() assert isinstance(json, list) +def test_pipeline_model_dill(): + p = _create_example_pipeline() + # Test dumping the Pipeline into a str. + buffer = dill.dumps(p) + # Test loading the Pipeline from a str. + loaded_pipeline = dill.loads(buffer) + # Run the same tests on the loaded pipeline. + test_pipeline_model_subtyping(p=loaded_pipeline) + def test_pipeline_model_phyotyping(): """ Test the Pipeline model itself for subtyping via Phylotyper. From d8d3e8be538a70041341b45e54af006efa443be0 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Mon, 19 Feb 2018 12:15:35 -0500 Subject: [PATCH 057/122] FIX: pipelines being stored in Redis & pipeline ids being generated. fixed call for model-to-graph --- app/middleware/graphers/datastruct_savvy.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/middleware/graphers/datastruct_savvy.py b/app/middleware/graphers/datastruct_savvy.py index 5a515e7e..9c6069e2 100644 --- a/app/middleware/graphers/datastruct_savvy.py +++ b/app/middleware/graphers/datastruct_savvy.py @@ -8,7 +8,7 @@ # working with Serotype, Antimicrobial Resistance, & Virulence Factor data # structures -def _convert_subtyping(graph, model, uriIsolate): +def _graph_subtyping(graph, model, uriIsolate): # Convert the model to a graph. struct = model.to_struct() rows_list = struct['rows'] @@ -30,7 +30,7 @@ def model_to_graph(graph, model, uriIsolate): model.validate() # Conversion. if isinstance(model, SubtypingResult): - return _convert_subtyping(model) + return _graph_subtyping(graph, model, uriIsolate) else: raise Exception('model_to_graph() called for a model without a handler.') From e01193cdec03d642e2e2d0ecd53f61fbd510b149 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Mon, 19 Feb 2018 14:05:39 -0500 Subject: [PATCH 058/122] ADD: convert VF to model and return --- app/middleware/display/beautify.py | 14 ++++++++------ app/middleware/modellers.py | 4 ++-- app/modules/ectyper/call_ectyper.py | 3 ++- app/modules/spfy.py | 4 ++-- 4 files changed, 14 insertions(+), 11 deletions(-) diff --git a/app/middleware/display/beautify.py b/app/middleware/display/beautify.py index f51fb232..69f18a29 100644 --- a/app/middleware/display/beautify.py +++ b/app/middleware/display/beautify.py @@ -5,6 +5,7 @@ from middleware.display.find_widest import check_alleles from middleware.graphers.turtle_utils import actual_filename from middleware.models import SubtypingResult, model_to_json +from middleware.modellers import model_vf # logging log_file = initialize_logging() @@ -122,18 +123,19 @@ def beautify(pickled_result, args_dict=None): result = pickle.load(open(pickled_result, 'rb')) if isinstance(result, dict): gene_dict = result - # this converts our dictionary structure into json and adds metadata (filename, etc.) + # Convert the old ECTYper's dictionary structure into list and adds metadata (filename, etc.). json_r = json_return(args_dict, gene_dict) - log.debug('First parse into json_r: ' + str(json_r)) - # if looking for only serotype, skip this step + # For VF/AMR, find widest gene matched. Strip shorter matches. if args_dict['options']['vf'] or args_dict['options']['amr']: json_r = check_alleles(json_r) - log.debug('After checking alleles json_r: ' + str(json_r)) - # check if there is an analysis module that has failed in the result + # Check if there is an analysis module that has failed in the result. if has_failed(json_r): + # If failed, return. return handle_failed(json_r, args_dict) else: - return json_r + # Everything worked, cast result into a model. + model = model_vf(json_r) + return model_to_json(model) elif isinstance(result, SubtypingResult): return model_to_json(result) else: diff --git a/app/middleware/modellers.py b/app/middleware/modellers.py index 2ea7a1e0..06aa2b8e 100644 --- a/app/middleware/modellers.py +++ b/app/middleware/modellers.py @@ -33,7 +33,7 @@ def model_serotype(pi, pl, output_file): ) return subtyping_result -def model_vf(json_r, analysis="Virulence Factors"): +def model_vf(json_r): """ Casts the output from display.beautify into a SubtypingResult object. """ @@ -41,7 +41,7 @@ def model_vf(json_r, analysis="Virulence Factors"): assert isinstance(json_r, list) subtyping_list = [ SubtypingRow( - analysis=analysis, + analysis=item('analysis'), contigid=item['contigid'], filename=item['filename'], hitcutoff=item['hitcutoff'], diff --git a/app/modules/ectyper/call_ectyper.py b/app/modules/ectyper/call_ectyper.py index a7bb2aab..92511343 100644 --- a/app/modules/ectyper/call_ectyper.py +++ b/app/modules/ectyper/call_ectyper.py @@ -58,6 +58,7 @@ def call_ectyper_vf(args_dict): # we are calling tools_controller on only one file, so grab that dict key, ectyper_dict = ectyper_dict.popitem() + # TODO: convert this to a VF model. # Path for the pickle dump. p = filepath + '_ectyper_vf.p' pickle.dump(ectyper_dict,open(p,'wb')) @@ -91,7 +92,7 @@ def call_ectyper_serotype(args_dict): output_file=output_file ) # Path for the pickle dump. - p = genome_file + '_ectyper_serotype.p' + p = genome_file + '_ectyper_serotype.model' pickle.dump(subtyping_result,open(p,'wb')) return p else: diff --git a/app/modules/spfy.py b/app/modules/spfy.py index 8d8b7421..10dd3e13 100644 --- a/app/modules/spfy.py +++ b/app/modules/spfy.py @@ -169,7 +169,7 @@ def _ectyper_pipeline_serotype(singles, multiples, query_file, single_dict, pipe datastruct_savvy, query_file, query_file + '_id.txt', - query_file + '_ectyper_serotype.p', + query_file + '_ectyper_serotype.model', depends_on=job_ectyper_serotype, result_ttl=ttl_value) d['job_ectyper_datastruct_serotype'] = job_ectyper_datastruct_serotype @@ -188,7 +188,7 @@ def _ectyper_pipeline_serotype(singles, multiples, query_file, single_dict, pipe # serotype, and we're not in bulk uploading. job_ectyper_beautify_serotype = multiples.enqueue( beautify, - pickled_result = query_file + '_ectyper_serotype.p', + pickled_result = query_file + '_ectyper_serotype.model', depends_on=job_ectyper_serotype, result_ttl=ttl_value ) From db9ecf7416330e9081662f0c54f7de216a5535f0 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Mon, 19 Feb 2018 14:33:42 -0500 Subject: [PATCH 059/122] FIX: shouldnt record backlog items into the pipeline at all --- app/middleware/models.py | 2 +- app/modules/spfy.py | 126 ++++++++++++++++++++------------------ app/routes/ra_posts.py | 1 - app/routes/ra_statuses.py | 9 +-- 4 files changed, 74 insertions(+), 64 deletions(-) diff --git a/app/middleware/models.py b/app/middleware/models.py index 18cccc3b..9e43e996 100644 --- a/app/middleware/models.py +++ b/app/middleware/models.py @@ -149,7 +149,7 @@ def merge_jobs(self): self.cache_jobs() # Actual merge. Notice were converting to list. self.final_jobs = [ - j + j # Where j is our custom Job class, not an rq_job for d in self.cache for j in d.values() ] diff --git a/app/modules/spfy.py b/app/modules/spfy.py index 10dd3e13..cfcd4c07 100644 --- a/app/modules/spfy.py +++ b/app/modules/spfy.py @@ -71,15 +71,17 @@ def _ectyper_pipeline_vf(singles, multiples, query_file, single_dict, pipeline=N depends_on=job_id) # TODO: this is double, switch everything to pipeline once tested d['job_ectyper_vf'] = job_ectyper_vf - pipeline.jobs.update({ - 'job_ectyper_vf': Job( - rq_job=job_ectyper_vf, - name='job_ectyper_vf', - transitory=True, - backlog=False, - display=False - ) - }) + # pipeline is only passed if not running in backlog. + if pipeline: + pipeline.jobs.update({ + 'job_ectyper_vf': Job( + rq_job=job_ectyper_vf, + name='job_ectyper_vf', + transitory=True, + backlog=False, + display=False + ) + }) # If bulk uploading is set, we return the datastruct as the end task # to poll for job completion, therefore must set ttl of -1. @@ -97,15 +99,17 @@ def _ectyper_pipeline_vf(singles, multiples, query_file, single_dict, pipeline=N depends_on=job_ectyper_vf, result_ttl=ttl_value) d['job_ectyper_datastruct_vf'] = job_ectyper_datastruct_vf - pipeline.jobs.update({ - 'job_ectyper_datastruct_vf': Job( - rq_job=job_ectyper_datastruct_vf, - name='job_ectyper_datastruct_vf', - transitory=True, - backlog=False, - display=False - ) - }) + # pipeline is only passed if not running in backlog. + if pipeline: + pipeline.jobs.update({ + 'job_ectyper_datastruct_vf': Job( + rq_job=job_ectyper_datastruct_vf, + name='job_ectyper_datastruct_vf', + transitory=True, + backlog=False, + display=False + ) + }) if not single_dict['options']['bulk']: # Only bother parsing into json if user has requested either vf or @@ -118,15 +122,17 @@ def _ectyper_pipeline_vf(singles, multiples, query_file, single_dict, pipeline=N result_ttl=ttl_value ) d['job_ectyper_beautify_vf'] = job_ectyper_beautify_vf - pipeline.jobs.update({ - 'job_ectyper_beautify_vf': Job( - rq_job=job_ectyper_beautify_vf, - name='job_ectyper_beautify_vf', - transitory=False, - backlog=False, - display=True - ) - }) + # pipeline is only passed if not running in backlog. + if pipeline: + pipeline.jobs.update({ + 'job_ectyper_beautify_vf': Job( + rq_job=job_ectyper_beautify_vf, + name='job_ectyper_beautify_vf', + transitory=False, + backlog=False, + display=True + ) + }) return d def _ectyper_pipeline_serotype(singles, multiples, query_file, single_dict, pipeline=None): @@ -147,15 +153,17 @@ def _ectyper_pipeline_serotype(singles, multiples, query_file, single_dict, pipe single_dict_vf, depends_on=job_id) d['job_ectyper_serotype'] = job_ectyper_serotype - pipeline.jobs.update({ - 'job_ectyper_serotype': Job( - rq_job=job_ectyper_serotype, - name='job_ectyper_serotype', - transitory=True, - backlog=False, - display=False - ) - }) + # pipeline is only passed if not running in backlog. + if pipeline: + pipeline.jobs.update({ + 'job_ectyper_serotype': Job( + rq_job=job_ectyper_serotype, + name='job_ectyper_serotype', + transitory=True, + backlog=False, + display=False + ) + }) # If bulk uploading is set, we return the datastruct as the end task # to poll for job completion, therefore must set ttl of -1. @@ -173,15 +181,17 @@ def _ectyper_pipeline_serotype(singles, multiples, query_file, single_dict, pipe depends_on=job_ectyper_serotype, result_ttl=ttl_value) d['job_ectyper_datastruct_serotype'] = job_ectyper_datastruct_serotype - pipeline.jobs.update({ - 'job_ectyper_datastruct_serotype': Job( - rq_job=job_ectyper_datastruct_serotype, - name='job_ectyper_datastruct_serotype', - transitory=True, - backlog=False, - display=False - ) - }) + # pipeline is only passed if not running in backlog. + if pipeline: + pipeline.jobs.update({ + 'job_ectyper_datastruct_serotype': Job( + rq_job=job_ectyper_datastruct_serotype, + name='job_ectyper_datastruct_serotype', + transitory=True, + backlog=False, + display=False + ) + }) if not single_dict['options']['bulk']: # Only bother parsing into json if user has requested either vf or @@ -193,15 +203,17 @@ def _ectyper_pipeline_serotype(singles, multiples, query_file, single_dict, pipe result_ttl=ttl_value ) d['job_ectyper_beautify_serotype'] = job_ectyper_beautify_serotype - pipeline.jobs.update({ - 'job_ectyper_beautify_serotype': Job( - rq_job=job_ectyper_beautify_serotype, - name='job_ectyper_beautify_serotype', - transitory=False, - backlog=False, - display=True - ) - }) + # pipeline is only passed if not running in backlog. + if pipeline: + pipeline.jobs.update({ + 'job_ectyper_beautify_serotype': Job( + rq_job=job_ectyper_beautify_serotype, + name='job_ectyper_beautify_serotype', + transitory=False, + backlog=False, + display=True + ) + }) return d def blob_savvy_enqueue(single_dict, pipeline): @@ -273,8 +285,7 @@ def blob_savvy_enqueue(single_dict, pipeline): backlog_singles_q, backlog_multiples_q, query_file, - backlog_d, - pipeline=pipeline + backlog_d ) # Serotype @@ -307,8 +318,7 @@ def blob_savvy_enqueue(single_dict, pipeline): backlog_singles_q, backlog_multiples_q, query_file, - backlog_d, - pipeline=pipeline + backlog_d ) # END ECTYPER PIPELINE diff --git a/app/routes/ra_posts.py b/app/routes/ra_posts.py index 241bbcc3..28e15d07 100644 --- a/app/routes/ra_posts.py +++ b/app/routes/ra_posts.py @@ -262,7 +262,6 @@ def upload(): ) jobs_dict.update(jobs_enqueued) pipeline.cache_jobs() - # new in 4.2.0 print 'upload(): all files enqueued, returning...' pipeline.merge_jobs() if groupresults: diff --git a/app/routes/ra_statuses.py b/app/routes/ra_statuses.py index 4998fa01..bd95ad1b 100644 --- a/app/routes/ra_statuses.py +++ b/app/routes/ra_statuses.py @@ -3,7 +3,7 @@ from ast import literal_eval from flask import Blueprint, request, jsonify, current_app from routes.job_utils import fetch_job -from middleware.models import load +from middleware.models import load, Pipeline bp_ra_statuses = Blueprint('reactapp_statuses', __name__) @@ -65,12 +65,13 @@ def job_status_reactapp_grouped(job_id, redis_connection): def _status_pipeline(pipeline_id): """ Checks the status of a pipeline. Returns "pending", the exc_info if failed, or the result. - :param pipeline_id: - :param redis_connection: - :return: + :param pipeline_id: + :param redis_connection: + :return: """ # Retrieve the models.Pipeline instance. pipeline = load(pipeline_id) + assert isinstance(pipeline, Pipeline) complete = pipeline.complete() # Normally bool, but str if failed. if isinstance(complete, bool): if complete: From 9956eb4c9261d546a8ded9d4a140775b547856e1 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Mon, 19 Feb 2018 14:57:58 -0500 Subject: [PATCH 060/122] CHANGE: pass a backlog flag instead of different queues --- app/modules/spfy.py | 152 +++++++++++++++++++++----------------------- 1 file changed, 74 insertions(+), 78 deletions(-) diff --git a/app/modules/spfy.py b/app/modules/spfy.py index cfcd4c07..e1fb7cd0 100644 --- a/app/modules/spfy.py +++ b/app/modules/spfy.py @@ -51,7 +51,7 @@ backlog_multiples_q = Queue( 'backlog_multiples', connection=redis_conn, default_timeout=config.DEFAULT_TIMEOUT) -def _ectyper_pipeline_vf(singles, multiples, query_file, single_dict, pipeline=None): +def _ectyper_pipeline_vf(query_file, single_dict, pipeline=None, backlog=False): """ Enqueue all the jobs required for VF. """ @@ -59,6 +59,12 @@ def _ectyper_pipeline_vf(singles, multiples, query_file, single_dict, pipeline=N d = {} # Alias. job_id = pipeline.jobs['job_id'].rq_job + if not backlog: + singles = singles_q + multiples = multiples_q + else: + singles = backlog_singles_q + multiples = backlog_multiples_q # Create a copy of the arguments dictionary and disable Serotype. # This copy is passed to the old ECTyper. @@ -71,17 +77,15 @@ def _ectyper_pipeline_vf(singles, multiples, query_file, single_dict, pipeline=N depends_on=job_id) # TODO: this is double, switch everything to pipeline once tested d['job_ectyper_vf'] = job_ectyper_vf - # pipeline is only passed if not running in backlog. - if pipeline: - pipeline.jobs.update({ - 'job_ectyper_vf': Job( - rq_job=job_ectyper_vf, - name='job_ectyper_vf', - transitory=True, - backlog=False, - display=False - ) - }) + pipeline.jobs.update({ + 'job_ectyper_vf': Job( + rq_job=job_ectyper_vf, + name='job_ectyper_vf', + transitory=True, + backlog=backlog, + display=False + ) + }) # If bulk uploading is set, we return the datastruct as the end task # to poll for job completion, therefore must set ttl of -1. @@ -99,17 +103,15 @@ def _ectyper_pipeline_vf(singles, multiples, query_file, single_dict, pipeline=N depends_on=job_ectyper_vf, result_ttl=ttl_value) d['job_ectyper_datastruct_vf'] = job_ectyper_datastruct_vf - # pipeline is only passed if not running in backlog. - if pipeline: - pipeline.jobs.update({ - 'job_ectyper_datastruct_vf': Job( - rq_job=job_ectyper_datastruct_vf, - name='job_ectyper_datastruct_vf', - transitory=True, - backlog=False, - display=False - ) - }) + pipeline.jobs.update({ + 'job_ectyper_datastruct_vf': Job( + rq_job=job_ectyper_datastruct_vf, + name='job_ectyper_datastruct_vf', + transitory=True, + backlog=backlog, + display=False + ) + }) if not single_dict['options']['bulk']: # Only bother parsing into json if user has requested either vf or @@ -122,20 +124,18 @@ def _ectyper_pipeline_vf(singles, multiples, query_file, single_dict, pipeline=N result_ttl=ttl_value ) d['job_ectyper_beautify_vf'] = job_ectyper_beautify_vf - # pipeline is only passed if not running in backlog. - if pipeline: - pipeline.jobs.update({ - 'job_ectyper_beautify_vf': Job( - rq_job=job_ectyper_beautify_vf, - name='job_ectyper_beautify_vf', - transitory=False, - backlog=False, - display=True - ) - }) + pipeline.jobs.update({ + 'job_ectyper_beautify_vf': Job( + rq_job=job_ectyper_beautify_vf, + name='job_ectyper_beautify_vf', + transitory=False, + backlog=backlog, + display=True + ) + }) return d -def _ectyper_pipeline_serotype(singles, multiples, query_file, single_dict, pipeline=None): +def _ectyper_pipeline_serotype(query_file, single_dict, pipeline=None, backlog=False): """ Enqueue all the jobs required for VF. """ @@ -143,6 +143,12 @@ def _ectyper_pipeline_serotype(singles, multiples, query_file, single_dict, pipe d = {} # Alias. job_id = pipeline.jobs['job_id'].rq_job + if not backlog: + singles = singles_q + multiples = multiples_q + else: + singles = backlog_singles_q + multiples = backlog_multiples_q # Create a copy of the arguments dictionary and disable Serotype. # This copy is passed to the old ECTyper. @@ -153,17 +159,15 @@ def _ectyper_pipeline_serotype(singles, multiples, query_file, single_dict, pipe single_dict_vf, depends_on=job_id) d['job_ectyper_serotype'] = job_ectyper_serotype - # pipeline is only passed if not running in backlog. - if pipeline: - pipeline.jobs.update({ - 'job_ectyper_serotype': Job( - rq_job=job_ectyper_serotype, - name='job_ectyper_serotype', - transitory=True, - backlog=False, - display=False - ) - }) + pipeline.jobs.update({ + 'job_ectyper_serotype': Job( + rq_job=job_ectyper_serotype, + name='job_ectyper_serotype', + transitory=True, + backlog=backlog, + display=False + ) + }) # If bulk uploading is set, we return the datastruct as the end task # to poll for job completion, therefore must set ttl of -1. @@ -181,17 +185,15 @@ def _ectyper_pipeline_serotype(singles, multiples, query_file, single_dict, pipe depends_on=job_ectyper_serotype, result_ttl=ttl_value) d['job_ectyper_datastruct_serotype'] = job_ectyper_datastruct_serotype - # pipeline is only passed if not running in backlog. - if pipeline: - pipeline.jobs.update({ - 'job_ectyper_datastruct_serotype': Job( - rq_job=job_ectyper_datastruct_serotype, - name='job_ectyper_datastruct_serotype', - transitory=True, - backlog=False, - display=False - ) - }) + pipeline.jobs.update({ + 'job_ectyper_datastruct_serotype': Job( + rq_job=job_ectyper_datastruct_serotype, + name='job_ectyper_datastruct_serotype', + transitory=True, + backlog=backlog, + display=False + ) + }) if not single_dict['options']['bulk']: # Only bother parsing into json if user has requested either vf or @@ -203,17 +205,15 @@ def _ectyper_pipeline_serotype(singles, multiples, query_file, single_dict, pipe result_ttl=ttl_value ) d['job_ectyper_beautify_serotype'] = job_ectyper_beautify_serotype - # pipeline is only passed if not running in backlog. - if pipeline: - pipeline.jobs.update({ - 'job_ectyper_beautify_serotype': Job( - rq_job=job_ectyper_beautify_serotype, - name='job_ectyper_beautify_serotype', - transitory=False, - backlog=False, - display=True - ) - }) + pipeline.jobs.update({ + 'job_ectyper_beautify_serotype': Job( + rq_job=job_ectyper_beautify_serotype, + name='job_ectyper_beautify_serotype', + transitory=False, + backlog=backlog, + display=True + ) + }) return d def blob_savvy_enqueue(single_dict, pipeline): @@ -257,8 +257,6 @@ def blob_savvy_enqueue(single_dict, pipeline): # VF if single_dict['options']['vf']: ectyper_vf_jobs = _ectyper_pipeline_vf( - singles_q, - multiples_q, query_file, single_dict, pipeline=pipeline @@ -282,17 +280,15 @@ def blob_savvy_enqueue(single_dict, pipeline): backlog_d['options']['serotype'] = False # Note: we use different queues. _ectyper_pipeline_vf( - backlog_singles_q, - backlog_multiples_q, query_file, - backlog_d + backlog_d, + pipeline=pipeline, + backlog=True ) # Serotype if single_dict['options']['serotype']: ectyper_serotype_jobs = _ectyper_pipeline_serotype( - singles_q, - multiples_q, query_file, single_dict, pipeline=pipeline @@ -315,10 +311,10 @@ def blob_savvy_enqueue(single_dict, pipeline): backlog_d['options']['vf'] = False backlog_d['options']['serotype'] = True _ectyper_pipeline_serotype( - backlog_singles_q, - backlog_multiples_q, query_file, - backlog_d + backlog_d, + pipeline=pipeline, + backlog=True ) # END ECTYPER PIPELINE From 9f0c91173c40330e6721b6df13de822f4c6a052f Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Mon, 19 Feb 2018 15:15:59 -0500 Subject: [PATCH 061/122] DEBUG: check why to_json / model_to_json(model) is being sent a list for the model --- app/middleware/models.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/app/middleware/models.py b/app/middleware/models.py index 9e43e996..5b6fe877 100644 --- a/app/middleware/models.py +++ b/app/middleware/models.py @@ -37,6 +37,7 @@ def store(pipeline): :param pipeline: An instance of the models.Pipeline class. :return: (dict): {"pipeline..." id: "Subtyping"} """ + assert isinstance(pipeline, Pipeline) pipeline_id = "pipeline{0}".format(pipeline.sig) # Start a Redis connection. @@ -69,6 +70,7 @@ def load(pipeline_id): # Get the pipeline instance. raw = redis_connection.get(pipeline_id) pipeline = pickle.loads(raw) + assert isinstance(pipeline, Pipeline) return pipeline @@ -188,6 +190,10 @@ def to_json(self): l = [] for rq_job in completed_jobs: model = rq_job.result + try: + assert isinstance(model, models.Base) + except: + raise Exception("to_json() called with result of type {0} and info {1}".format(type(model), str(model))) list_json = model_to_json(model) l += list_json return l From 53ef008b30dca717fed378103e8e7eca580d34fc Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Mon, 19 Feb 2018 15:28:26 -0500 Subject: [PATCH 062/122] DEBUG: looks like sometime gives an empty list --- app/middleware/models.py | 2 ++ app/routes/ra_posts.py | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/app/middleware/models.py b/app/middleware/models.py index 5b6fe877..cfabd765 100644 --- a/app/middleware/models.py +++ b/app/middleware/models.py @@ -160,6 +160,7 @@ def complete(self): """ Check if all jobs are completed """ + print("complete() checking status for: {0}".format(str(self.final_jobs))) for j in self.final_jobs: # Type check. assert isinstance(j, Job) @@ -186,6 +187,7 @@ def to_json(self): j.rq_job for j in self.final_jobs if j.display and j.rq_job.is_finished and not j.rq_job.is_failed ] + print("to_json() completed_jobs: {0}".format(str(completed_jobs))) # Merge the json lists together. l = [] for rq_job in completed_jobs: diff --git a/app/routes/ra_posts.py b/app/routes/ra_posts.py index 28e15d07..b9ecbff7 100644 --- a/app/routes/ra_posts.py +++ b/app/routes/ra_posts.py @@ -264,8 +264,10 @@ def upload(): pipeline.cache_jobs() print 'upload(): all files enqueued, returning...' pipeline.merge_jobs() + print("upload() pipeline jobs: {0}".formate(str(pipeline.all_jobs))) + pipeline_id = store(pipeline) if groupresults: - return jsonify(store(pipeline)) + return jsonify(pipeline_id) # return jsonify(handle_groupresults(jobs_dict)) else: return jsonify(handle_singleton(jobs_dict)) From 283bc3fe01c13fcce0cc14a9c456ef231fb4db95 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Mon, 19 Feb 2018 15:30:43 -0500 Subject: [PATCH 063/122] DEBUG: check whats going on with the complete function as well --- app/middleware/models.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/app/middleware/models.py b/app/middleware/models.py index cfabd765..701188f4 100644 --- a/app/middleware/models.py +++ b/app/middleware/models.py @@ -167,12 +167,15 @@ def complete(self): rq_job = j.rq_job if j.backlog: # Some backlog job, we don't care (though Sentry will catch it). + print("complete(): job {0} is in backlog.".format(j.name)) continue elif rq_job.is_failed: # If the job failed, return the error. + print("complete(): job {0} is failed with exc_info {1}.".format(j.name, rq_job.exc_info)) return rq_job.exc_info elif not rq_job.is_finished: # One of the jobs hasn't finished. + print("complete(): job {0} is finished.".format(j.name)) return False return True From 8b62e29d4d9c3e516dd3f1556e4b47da6736caf5 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Mon, 19 Feb 2018 15:43:56 -0500 Subject: [PATCH 064/122] FIX: typo --- app/routes/ra_posts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/routes/ra_posts.py b/app/routes/ra_posts.py index b9ecbff7..ab9124cc 100644 --- a/app/routes/ra_posts.py +++ b/app/routes/ra_posts.py @@ -264,7 +264,7 @@ def upload(): pipeline.cache_jobs() print 'upload(): all files enqueued, returning...' pipeline.merge_jobs() - print("upload() pipeline jobs: {0}".formate(str(pipeline.all_jobs))) + print("upload() pipeline jobs: {0}".format(str(pipeline.all_jobs))) pipeline_id = store(pipeline) if groupresults: return jsonify(pipeline_id) From 04d7ffab6d9d7d900ca8551466a46756cb9eb78f Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Mon, 19 Feb 2018 15:48:45 -0500 Subject: [PATCH 065/122] FIX: typo --- app/routes/ra_posts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/routes/ra_posts.py b/app/routes/ra_posts.py index ab9124cc..1f1f922e 100644 --- a/app/routes/ra_posts.py +++ b/app/routes/ra_posts.py @@ -264,7 +264,7 @@ def upload(): pipeline.cache_jobs() print 'upload(): all files enqueued, returning...' pipeline.merge_jobs() - print("upload() pipeline jobs: {0}".format(str(pipeline.all_jobs))) + print("upload() pipeline jobs: {0}".format(str(pipeline.final_jobs))) pipeline_id = store(pipeline) if groupresults: return jsonify(pipeline_id) From 55af49cfe8a2ac3b6e97b2fc3dd68c3b66680cc0 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Mon, 19 Feb 2018 16:39:42 -0500 Subject: [PATCH 066/122] ADD: some checks that beautify() is returning a list not dict --- app/middleware/modellers.py | 1 + app/tests/test_beautify.py | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/app/middleware/modellers.py b/app/middleware/modellers.py index 06aa2b8e..fd10a2c8 100644 --- a/app/middleware/modellers.py +++ b/app/middleware/modellers.py @@ -39,6 +39,7 @@ def model_vf(json_r): """ # Type check. assert isinstance(json_r, list) + print("model_vf() called with type {0} containing {1}".format(type(json_r), str(json_r))) subtyping_list = [ SubtypingRow( analysis=item('analysis'), diff --git a/app/tests/test_beautify.py b/app/tests/test_beautify.py index 6b97a814..aa73caf1 100644 --- a/app/tests/test_beautify.py +++ b/app/tests/test_beautify.py @@ -13,7 +13,9 @@ def test_beautify_vf_serotype(): ## test vf & serotype json return single_dict = dict(ARGS_DICT) single_dict.update({'i': vf_serotype_gene_dict}) - assert len(beautify(vf_serotype_gene_dict, single_dict)) == len(BEAUTIFY_VF_SEROTYPE) + r = beautify(vf_serotype_gene_dict, single_dict) + assert isinstance(r, list) + assert len(r) == len(BEAUTIFY_VF_SEROTYPE) def test_beautify_serotype_only(): ## test serotype only json return @@ -25,6 +27,7 @@ def test_beautify_serotype_only(): single_dict.update({'options':{'vf': False, 'amr': False, 'serotype': True}}) # beautify is what is actually called by the RQ worker & returned to the user r = beautify(vf_serotype_gene_dict, single_dict) + assert isinstance(r, list) assert len(r) == 1 def test_beautify_json_r_serotype_only(): @@ -49,6 +52,7 @@ def test_beautify_amr_only(): # this mimicks user selection of serotype only single_dict.update({'options':{'vf': False, 'amr': True, 'serotype': False}}) r = beautify(amr_gene_dict, single_dict) + assert isinstance(r, list) assert len(r) > 1 def test_beautify_json_r_amr_only(): From 360c02b6bfab32cc6a3de87d0640d3cd4566a435 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Mon, 19 Feb 2018 17:22:44 -0500 Subject: [PATCH 067/122] DEBUG: keep same return for beautify() so we can see where tests fail --- app/middleware/display/beautify.py | 35 +++++++++++++++++------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/app/middleware/display/beautify.py b/app/middleware/display/beautify.py index 69f18a29..ab356beb 100644 --- a/app/middleware/display/beautify.py +++ b/app/middleware/display/beautify.py @@ -111,7 +111,7 @@ def handle_failed(json_r, args_dict): return ret # TODO: convert this to models-only. -def beautify(pickled_result, args_dict=None): +def beautify(gene_dict, args_dict=None): ''' Converts a given 'spit' datum (a dictionary with our results from rgi/ectyper) to a json form used by the frontend. This result is to be stored in Redis by the calling RQ Worker. :param args_dict: The arguments supplied by the user. In the case of spfy web-app, this is used to determine which analysis options were set. @@ -119,23 +119,28 @@ def beautify(pickled_result, args_dict=None): :param gene_dict: optionally, if using this to test via cli, you can supply the actual dictionary object. :return: json representation of the results, as required by the front-end. ''' + # Convert the old ECTYper's dictionary structure into list and adds metadata (filename, etc.). + json_r = json_return(args_dict, gene_dict) + # For VF/AMR, find widest gene matched. Strip shorter matches. + if args_dict['options']['vf'] or args_dict['options']['amr']: + json_r = check_alleles(json_r) + # Check if there is an analysis module that has failed in the result. + if has_failed(json_r): + # If failed, return. + return handle_failed(json_r, args_dict) + else: + return json_r + # Everything worked, cast result into a model. + model = model_vf(json_r) + return model_to_json(model) +def display_subtyping(pickled_result, args_dict=None): result = pickle.load(open(pickled_result, 'rb')) if isinstance(result, dict): - gene_dict = result - # Convert the old ECTYper's dictionary structure into list and adds metadata (filename, etc.). - json_r = json_return(args_dict, gene_dict) - # For VF/AMR, find widest gene matched. Strip shorter matches. - if args_dict['options']['vf'] or args_dict['options']['amr']: - json_r = check_alleles(json_r) - # Check if there is an analysis module that has failed in the result. - if has_failed(json_r): - # If failed, return. - return handle_failed(json_r, args_dict) - else: - # Everything worked, cast result into a model. - model = model_vf(json_r) - return model_to_json(model) + list_return = beautify(result, args_dict) + assert isinstance(list_return, list) + model = model_vf(json_r) + return model_to_json(model) elif isinstance(result, SubtypingResult): return model_to_json(result) else: From 6c8386aeab9e59d0bf13824736b1ec98d3f7cc2e Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Mon, 19 Feb 2018 17:55:20 -0500 Subject: [PATCH 068/122] DEBUG: let beautify load --- app/middleware/display/beautify.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/app/middleware/display/beautify.py b/app/middleware/display/beautify.py index ab356beb..d0a6a776 100644 --- a/app/middleware/display/beautify.py +++ b/app/middleware/display/beautify.py @@ -12,7 +12,7 @@ log = logging.getLogger(__name__) -def json_return(args_dict, gene_dict): +def json_return(gene_dict, args_dict): """ This converts the gene dict into a json format for return to the front end """ @@ -119,8 +119,10 @@ def beautify(gene_dict, args_dict=None): :param gene_dict: optionally, if using this to test via cli, you can supply the actual dictionary object. :return: json representation of the results, as required by the front-end. ''' + if isinstance(gene_dict, str): # For the tests. + gene_dict = pickle.load(open(gene_dict, 'rb')) # Convert the old ECTYper's dictionary structure into list and adds metadata (filename, etc.). - json_r = json_return(args_dict, gene_dict) + json_r = json_return(gene_dict, args_dict) # For VF/AMR, find widest gene matched. Strip shorter matches. if args_dict['options']['vf'] or args_dict['options']['amr']: json_r = check_alleles(json_r) From 5d61f695afe4d229e60318817bdaee60f2e753cc Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Mon, 19 Feb 2018 18:31:00 -0500 Subject: [PATCH 069/122] FIX: tests call named params --- app/tests/test_beautify.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/tests/test_beautify.py b/app/tests/test_beautify.py index aa73caf1..3fbab9ed 100644 --- a/app/tests/test_beautify.py +++ b/app/tests/test_beautify.py @@ -40,7 +40,7 @@ def test_beautify_json_r_serotype_only(): gene_dict = pickle.load(open(vf_serotype_gene_dict, 'rb')) assert type(gene_dict) == dict assert len(gene_dict.keys()) == 2 - r = json_return(single_dict, gene_dict) + r = json_return(gene_dict=gene_dict, args_dict=single_dict) assert len(r) == 1 failed = has_failed(r) @@ -64,7 +64,7 @@ def test_beautify_json_r_amr_only(): assert type(gene_dict) == dict assert len(gene_dict.keys()) == 1 assert 'Antimicrobial Resistance' in gene_dict.keys() - r = json_return(single_dict, gene_dict) + r = json_return(gene_dict=gene_dict, args_dict=single_dict) assert len(r) > 1 ## test some pandas stuff on the json_r From 31a9c2bf3b4bb3398d10d8a023a851ad1df08023 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Mon, 19 Feb 2018 21:38:23 -0500 Subject: [PATCH 070/122] CHANGE: tests passing, try centralizing the unpickling --- app/middleware/display/beautify.py | 10 +++++----- app/middleware/graphers/datastruct_savvy.py | 4 ++-- app/middleware/models.py | 9 +++++++++ app/modules/spfy.py | 13 +++++-------- 4 files changed, 21 insertions(+), 15 deletions(-) diff --git a/app/middleware/display/beautify.py b/app/middleware/display/beautify.py index d0a6a776..f439fde9 100644 --- a/app/middleware/display/beautify.py +++ b/app/middleware/display/beautify.py @@ -4,7 +4,7 @@ from modules.loggingFunctions import initialize_logging from middleware.display.find_widest import check_alleles from middleware.graphers.turtle_utils import actual_filename -from middleware.models import SubtypingResult, model_to_json +from middleware.models import SubtypingResult, model_to_json, unpickle from middleware.modellers import model_vf # logging @@ -133,15 +133,15 @@ def beautify(gene_dict, args_dict=None): else: return json_r # Everything worked, cast result into a model. - model = model_vf(json_r) - return model_to_json(model) + # model = model_vf(json_r) + # return model_to_json(model) def display_subtyping(pickled_result, args_dict=None): - result = pickle.load(open(pickled_result, 'rb')) + result = unpickle(pickled_result) if isinstance(result, dict): list_return = beautify(result, args_dict) assert isinstance(list_return, list) - model = model_vf(json_r) + model = model_vf(list_return) return model_to_json(model) elif isinstance(result, SubtypingResult): return model_to_json(result) diff --git a/app/middleware/graphers/datastruct_savvy.py b/app/middleware/graphers/datastruct_savvy.py index 9c6069e2..43b729ee 100644 --- a/app/middleware/graphers/datastruct_savvy.py +++ b/app/middleware/graphers/datastruct_savvy.py @@ -4,7 +4,7 @@ from middleware.graphers.turtle_grapher import generate_graph from middleware.blazegraph.upload_graph import queue_upload from modules.PanPredic.pan_utils import contig_name_parse -from middleware.models import SubtypingResult +from middleware.models import SubtypingResult, unpickle # working with Serotype, Antimicrobial Resistance, & Virulence Factor data # structures @@ -196,7 +196,7 @@ def generate_datastruct(query_file, id_file, pickled_dictionary): uriIsolate = gu(':spfy' + str(spfyid)) # Unpickle. - results = pickle.load(open(pickled_dictionary, 'rb')) + results = unpickle(pickled_dictionary) # Check if we have a model or a dictionary. if isinstance(results, dict): # graphing functions diff --git a/app/middleware/models.py b/app/middleware/models.py index 701188f4..cc01d2e4 100644 --- a/app/middleware/models.py +++ b/app/middleware/models.py @@ -73,6 +73,15 @@ def load(pipeline_id): assert isinstance(pipeline, Pipeline) return pipeline +def unpickle(pickled_file): + """ + Define a function for unpickling. Should address issues with unpickling custom classes. + :param pickled_file: + :return: + """ + unpickled = pickle.load(open(pickled_file, 'rb')) + assert isinstance(unpickled, (models.Base, Pipeline, dict, list)) + return unpickled class SubtypingRow(models.Base): analysis = fields.StringField(required=True) diff --git a/app/modules/spfy.py b/app/modules/spfy.py index e1fb7cd0..a1a9b6fd 100644 --- a/app/modules/spfy.py +++ b/app/modules/spfy.py @@ -21,7 +21,7 @@ from modules.ectyper.call_ectyper import call_ectyper_vf, call_ectyper_serotype from modules.amr.amr import amr from modules.amr.amr_to_dict import amr_to_dict -from middleware.display.beautify import beautify +from middleware.display.beautify import beautify, display_subtyping from middleware.graphers.datastruct_savvy import datastruct_savvy from middleware.graphers.turtle_grapher import turtle_grapher from middleware.graphers.turtle_utils import actual_filename @@ -117,7 +117,7 @@ def _ectyper_pipeline_vf(query_file, single_dict, pipeline=None, backlog=False): # Only bother parsing into json if user has requested either vf or # serotype, and we're not in bulk uploading. job_ectyper_beautify_vf = multiples.enqueue( - beautify, + display_subtyping, query_file + '_ectyper_vf.p', single_dict, depends_on=job_ectyper_vf, @@ -150,13 +150,10 @@ def _ectyper_pipeline_serotype(query_file, single_dict, pipeline=None, backlog=F singles = backlog_singles_q multiples = backlog_multiples_q - # Create a copy of the arguments dictionary and disable Serotype. - # This copy is passed to the old ECTyper. - single_dict_vf = copy.deepcopy(single_dict) # Enqueue the new ECTyper job_ectyper_serotype = multiples.enqueue( call_ectyper_serotype, - single_dict_vf, + single_dict, depends_on=job_id) d['job_ectyper_serotype'] = job_ectyper_serotype pipeline.jobs.update({ @@ -199,8 +196,8 @@ def _ectyper_pipeline_serotype(query_file, single_dict, pipeline=None, backlog=F # Only bother parsing into json if user has requested either vf or # serotype, and we're not in bulk uploading. job_ectyper_beautify_serotype = multiples.enqueue( - beautify, - pickled_result = query_file + '_ectyper_serotype.model', + display_subtyping, + query_file + '_ectyper_serotype.model', depends_on=job_ectyper_serotype, result_ttl=ttl_value ) From 7c72205015d069958b661315fb83c19a327d8636 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Mon, 19 Feb 2018 22:14:13 -0500 Subject: [PATCH 071/122] DEBUG: "json_r" is being seen as a dict for some reason --- app/middleware/display/beautify.py | 2 +- app/middleware/modellers.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/app/middleware/display/beautify.py b/app/middleware/display/beautify.py index f439fde9..e9c57798 100644 --- a/app/middleware/display/beautify.py +++ b/app/middleware/display/beautify.py @@ -139,7 +139,7 @@ def beautify(gene_dict, args_dict=None): def display_subtyping(pickled_result, args_dict=None): result = unpickle(pickled_result) if isinstance(result, dict): - list_return = beautify(result, args_dict) + list_return = beautify(gene_dict=result, args_dict=args_dict) assert isinstance(list_return, list) model = model_vf(list_return) return model_to_json(model) diff --git a/app/middleware/modellers.py b/app/middleware/modellers.py index fd10a2c8..04cfb3ed 100644 --- a/app/middleware/modellers.py +++ b/app/middleware/modellers.py @@ -33,13 +33,13 @@ def model_serotype(pi, pl, output_file): ) return subtyping_result -def model_vf(json_r): +def model_vf(lst): """ Casts the output from display.beautify into a SubtypingResult object. """ # Type check. - assert isinstance(json_r, list) - print("model_vf() called with type {0} containing {1}".format(type(json_r), str(json_r))) + assert isinstance(lst, list) + print("model_vf() called with type {0} containing {1}".format(type(lst), str(lst))) subtyping_list = [ SubtypingRow( analysis=item('analysis'), @@ -51,7 +51,7 @@ def model_vf(json_r): hitstart=item['hitstart'], hitstop=item['hitstop'] ) - for item in json_r] + for item in lst] # Convert the list of rows into a SubtypingResult model. subtyping_result = SubtypingResult( rows = subtyping_list From 9b4a37f4e6627494a3616bd2295219f1bc368017 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Mon, 19 Feb 2018 22:36:53 -0500 Subject: [PATCH 072/122] DEBUG: log of name of the job that causes to_json() to be called with an rq_job.result containing an empty list --- app/middleware/models.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/app/middleware/models.py b/app/middleware/models.py index cc01d2e4..9656ddc4 100644 --- a/app/middleware/models.py +++ b/app/middleware/models.py @@ -196,18 +196,19 @@ def to_json(self): """ # Gather all the jobs that have finished and haven't failed. completed_jobs = [ - j.rq_job for j in self.final_jobs + j for j in self.final_jobs if j.display and j.rq_job.is_finished and not j.rq_job.is_failed ] print("to_json() completed_jobs: {0}".format(str(completed_jobs))) # Merge the json lists together. l = [] - for rq_job in completed_jobs: + for j in completed_jobs: + rq_job = j.rq_job model = rq_job.result try: assert isinstance(model, models.Base) except: - raise Exception("to_json() called with result of type {0} and info {1}".format(type(model), str(model))) + raise Exception("to_json() called for job {0} with result of type {1} and info {2}".format(j.name, type(model), str(model))) list_json = model_to_json(model) l += list_json return l From f3801d9d50605f53075900b1a70a027ed2bd68f2 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Mon, 19 Feb 2018 23:04:54 -0500 Subject: [PATCH 073/122] STOP: looks like I was also looking in the wrong place, its the call_ectyper_serotype that returns a SubtypingResult(rows=[]), I wonder if its the call or that jsonmodels classes treat the attributes as instances(and cant be pickled) --- app/middleware/models.py | 4 ++++ app/modules/ectyper/call_ectyper.py | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/app/middleware/models.py b/app/middleware/models.py index 9656ddc4..8ae26ad2 100644 --- a/app/middleware/models.py +++ b/app/middleware/models.py @@ -83,6 +83,9 @@ def unpickle(pickled_file): assert isinstance(unpickled, (models.Base, Pipeline, dict, list)) return unpickled +def dump(obj, path): + pickle.dump(obj, open(path, 'wb')) + class SubtypingRow(models.Base): analysis = fields.StringField(required=True) contigid = fields.StringField(required=True) @@ -206,6 +209,7 @@ def to_json(self): rq_job = j.rq_job model = rq_job.result try: + # TODO: This is not correct as while the new ECTYper call does return a model, the display_subtyping() call that the return job is associated with will already convert the result to a list and return it. assert isinstance(model, models.Base) except: raise Exception("to_json() called for job {0} with result of type {1} and info {2}".format(j.name, type(model), str(model))) diff --git a/app/modules/ectyper/call_ectyper.py b/app/modules/ectyper/call_ectyper.py index 92511343..df904715 100644 --- a/app/modules/ectyper/call_ectyper.py +++ b/app/modules/ectyper/call_ectyper.py @@ -9,6 +9,7 @@ from os.path import basename from modules.loggingFunctions import initialize_logging from middleware.modellers import model_serotype +from middleware.models import dump log_file = initialize_logging() log = logging.getLogger(__name__) @@ -93,7 +94,7 @@ def call_ectyper_serotype(args_dict): ) # Path for the pickle dump. p = genome_file + '_ectyper_serotype.model' - pickle.dump(subtyping_result,open(p,'wb')) + dump(subtyping_result, p) return p else: raise Exception('ECTyper Serotyping failed for' + genome_file) From a5d0e67e58acf8f78cbc24e2d1610e67687a840c Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Mon, 19 Feb 2018 23:12:41 -0500 Subject: [PATCH 074/122] ADD: have tests also check we dont have empties --- app/middleware/models.py | 10 +++++----- app/tests/test_modules.py | 7 ++++++- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/app/middleware/models.py b/app/middleware/models.py index 8ae26ad2..c66e5fe4 100644 --- a/app/middleware/models.py +++ b/app/middleware/models.py @@ -2,7 +2,7 @@ import copy import config import redis -import dill as pickle +import dill from hashlib import sha1 from dis import dis from StringIO import StringIO @@ -45,7 +45,7 @@ def store(pipeline): redis_connection = redis.from_url(redis_url) # Store the pipeline instance. - redis_connection.set(pipeline_id, pickle.dumps(pipeline)) + redis_connection.set(pipeline_id, dill.dumps(pipeline)) # Create a similar structure to the old return d = {} @@ -69,7 +69,7 @@ def load(pipeline_id): # Get the pipeline instance. raw = redis_connection.get(pipeline_id) - pipeline = pickle.loads(raw) + pipeline = dill.loads(raw) assert isinstance(pipeline, Pipeline) return pipeline @@ -79,12 +79,12 @@ def unpickle(pickled_file): :param pickled_file: :return: """ - unpickled = pickle.load(open(pickled_file, 'rb')) + unpickled = dill.load(open(pickled_file, 'rb')) assert isinstance(unpickled, (models.Base, Pipeline, dict, list)) return unpickled def dump(obj, path): - pickle.dump(obj, open(path, 'wb')) + dill.dump(obj, open(path, 'wb')) class SubtypingRow(models.Base): analysis = fields.StringField(required=True) diff --git a/app/tests/test_modules.py b/app/tests/test_modules.py index af9541c7..cb690d9f 100644 --- a/app/tests/test_modules.py +++ b/app/tests/test_modules.py @@ -15,6 +15,7 @@ from middleware.display.beautify import beautify, model_to_json from middleware.graphers.datastruct_savvy import datastruct_savvy from middleware.graphers.turtle_grapher import turtle_grapher +from middleware.models import unpickle from tests.constants import ARGS_DICT @@ -88,14 +89,18 @@ def test_ectyper_serotype(): single_dict = dict(ARGS_DICT) single_dict.update({'i':ecoli_genome}) pickled_serotype_model = call_ectyper_serotype(single_dict) - ectyper_serotype_model = pickle.load(open(pickled_serotype_model,'rb')) + ectyper_serotype_model = unpickle(pickled_serotype_model) # Validate (throws error if invalidate). ectyper_serotype_model.validate() + # Check that the return rows is not some random empty list. + assert ectyper_serotype_model.rows # Check the conversion for the front-end. json_r = model_to_json(ectyper_serotype_model) # This is not strictly json; more like a list than a dict structure. assert isinstance(json_r, list) + # Check that this isn't empty. + assert json_r def test_amr(): ecoli_genome = GENOMES_LIST_ECOLI[0] From 3d103cc0e1b93348e2ce8a4b7dab99d45c19166a Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Tue, 20 Feb 2018 11:13:43 -0500 Subject: [PATCH 075/122] ADD: tests agasint call_ectyper_serotype for pickle/not pickle --- app/modules/ectyper/call_ectyper.py | 13 +++++---- app/tests/test_modules.py | 44 ++++++++++++++++++++--------- 2 files changed, 39 insertions(+), 18 deletions(-) diff --git a/app/modules/ectyper/call_ectyper.py b/app/modules/ectyper/call_ectyper.py index df904715..416e882c 100644 --- a/app/modules/ectyper/call_ectyper.py +++ b/app/modules/ectyper/call_ectyper.py @@ -66,7 +66,7 @@ def call_ectyper_vf(args_dict): return p -def call_ectyper_serotype(args_dict): +def call_ectyper_serotype(args_dict, pickle=True): """Use the new version of ECTyper at `master` for serotyping. """ genome_file = args_dict['i'] @@ -92,9 +92,12 @@ def call_ectyper_serotype(args_dict): pl=pl, output_file=output_file ) - # Path for the pickle dump. - p = genome_file + '_ectyper_serotype.model' - dump(subtyping_result, p) - return p + if pickle: + # Path for the pickle dump. + p = genome_file + '_ectyper_serotype.model' + dump(subtyping_result, p) + return p + else: + return subtyping_result else: raise Exception('ECTyper Serotyping failed for' + genome_file) diff --git a/app/tests/test_modules.py b/app/tests/test_modules.py index cb690d9f..927f41c1 100644 --- a/app/tests/test_modules.py +++ b/app/tests/test_modules.py @@ -76,7 +76,19 @@ def test_ectyper_vf(): json_return = beautify(pickled_ectyper_dict, single_dict) assert type(json_return) == list -def test_ectyper_serotype(): +def _validate_model(model): + # Validate (throws error if invalidate). + model.validate() + # Check that the return rows is not some random empty list. + assert model.rows + # Check the conversion for the front-end. + r = model_to_json(model) + # This is not really json; more like a list than a dict structure. + assert isinstance(r, list) + # Check that this isn't empty. + assert r + +def test_ectyper_serotype_direct(): """Check the ECTyper from `master` which only performs serotyping. Installed in the conda environment. """ @@ -85,22 +97,28 @@ def test_ectyper_serotype(): ret_code = subprocess.call(['ectyper', '-i', ecoli_genome]) assert ret_code == 0 - # Check the actual call from Spfy's code. +def test_ectyper_serotype_call_nopickle(): + """ + Check the actual call from Spfy's code. + """ + for ecoli_genome in GENOMES_LIST_ECOLI: + single_dict = dict(ARGS_DICT) + single_dict.update({'i':ecoli_genome}) + # Have the call return the model without pickling. + serotype_model = call_ectyper_serotype(single_dict, pickle=False) + _validate_model(serotype_model) + +def test_ectyper_serotype_call_pickle(): + """ + Check the actual call from Spfy's code. + """ + for ecoli_genome in GENOMES_LIST_ECOLI: single_dict = dict(ARGS_DICT) single_dict.update({'i':ecoli_genome}) + # Pickle the model, and return the path to the file. pickled_serotype_model = call_ectyper_serotype(single_dict) ectyper_serotype_model = unpickle(pickled_serotype_model) - # Validate (throws error if invalidate). - ectyper_serotype_model.validate() - # Check that the return rows is not some random empty list. - assert ectyper_serotype_model.rows - - # Check the conversion for the front-end. - json_r = model_to_json(ectyper_serotype_model) - # This is not strictly json; more like a list than a dict structure. - assert isinstance(json_r, list) - # Check that this isn't empty. - assert json_r + _validate_model(pickled_serotype_model) def test_amr(): ecoli_genome = GENOMES_LIST_ECOLI[0] From 1edb480356a213ecbb26849dc86f9d8e101dad55 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Tue, 20 Feb 2018 12:17:28 -0500 Subject: [PATCH 076/122] CHANGE: not sure if jsonmodels will let me do this --- app/middleware/models.py | 44 +++++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/app/middleware/models.py b/app/middleware/models.py index c66e5fe4..2d33556d 100644 --- a/app/middleware/models.py +++ b/app/middleware/models.py @@ -76,8 +76,8 @@ def load(pipeline_id): def unpickle(pickled_file): """ Define a function for unpickling. Should address issues with unpickling custom classes. - :param pickled_file: - :return: + :param pickled_file: + :return: """ unpickled = dill.load(open(pickled_file, 'rb')) assert isinstance(unpickled, (models.Base, Pipeline, dict, list)) @@ -87,30 +87,36 @@ def dump(obj, path): dill.dump(obj, open(path, 'wb')) class SubtypingRow(models.Base): - analysis = fields.StringField(required=True) - contigid = fields.StringField(required=True) - filename = fields.StringField(required=True) - hitcutoff = fields.StringField(nullable=True) - hitname = fields.StringField(required=True) - hitorientation = fields.StringField(nullable=True) - hitstart = fields.StringField(nullable=True) - hitstop = fields.StringField(nullable=True) + def __init__(self, analysis="", contigid="", filename="", hitcutoff="", hitname="", hitorientation="", hitstart="",hitstop=""): + self.analysis = analysis + self.contigid = contigid + self.filename = filename + self.hitcutoff = hitcutoff + self.hitname = hitname + self.hitorientation = hitorientation + self.hitstart = hitstart + self.hitstop = hitstop class SubtypingResult(models.Base): - rows = fields.ListField([SubtypingRow], nullable=True) + def __init__(self, rows=None): + if not rows: + rows = [] + self.rows = rows class PhylotyperRow(models.Base): - contig = fields.StringField(nullable=True) - genome = fields.StringField() - probability = fields.StringField(nullable=True) # actually float - start = fields.StringField(nullable=True) # actually int - stop = fields.StringField(nullable=True) # actually int - subtype = fields.StringField() - subtype_gene = fields.StringField(nullable=True) + def __init__(self): + self.contig = fields.StringField(nullable=True) + self.genome = fields.StringField() + self.probability = fields.StringField(nullable=True) # actually float + self.start = fields.StringField(nullable=True) # actually int + self.stop = fields.StringField(nullable=True) # actually int + self.subtype = fields.StringField() + self.subtype_gene = fields.StringField(nullable=True) class PhylotyperResult(models.Base): - rows = fields.ListField([PhylotyperRow], nullable=True) + def __init__(self): + self.rows = fields.ListField([PhylotyperRow], nullable=True) class Job(): From 5b33d63f251b25de4d30c710dd10683b0163b04a Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Tue, 20 Feb 2018 13:13:04 -0500 Subject: [PATCH 077/122] FIX: one of the tests --- app/tests/test_modules.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/tests/test_modules.py b/app/tests/test_modules.py index 927f41c1..cf5d2cf4 100644 --- a/app/tests/test_modules.py +++ b/app/tests/test_modules.py @@ -118,7 +118,7 @@ def test_ectyper_serotype_call_pickle(): # Pickle the model, and return the path to the file. pickled_serotype_model = call_ectyper_serotype(single_dict) ectyper_serotype_model = unpickle(pickled_serotype_model) - _validate_model(pickled_serotype_model) + _validate_model(ectyper_serotype_model) def test_amr(): ecoli_genome = GENOMES_LIST_ECOLI[0] From 4b13e49d929f57137156fda3ea6754f8a5c161db Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Tue, 20 Feb 2018 16:25:37 -0500 Subject: [PATCH 078/122] DEBUG: im guessing the to_struct() method from jsonmodels no longer works --- app/middleware/models.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/app/middleware/models.py b/app/middleware/models.py index 2d33556d..ce1efa45 100644 --- a/app/middleware/models.py +++ b/app/middleware/models.py @@ -12,6 +12,8 @@ def _convert_model(model): # Convert the model to a generic JSON structure. struct = model.to_struct() + # Check that struct isn't empty. + assert struct if 'rows' in struct: # This is not strictly json; more like a list than a dict structure. rows_list = struct['rows'] From c26710619c817890eda60aa6fa0fad93800b6fb2 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Wed, 7 Mar 2018 11:12:24 -0500 Subject: [PATCH 079/122] CHANGE: just define a list generating function --- app/middleware/display/beautify.py | 2 +- app/middleware/modellers.py | 24 ++++++++++----------- app/middleware/models.py | 34 ++++++++++++++++-------------- app/tests/test_modules.py | 10 ++++----- 4 files changed, 36 insertions(+), 34 deletions(-) diff --git a/app/middleware/display/beautify.py b/app/middleware/display/beautify.py index e9c57798..7933b086 100644 --- a/app/middleware/display/beautify.py +++ b/app/middleware/display/beautify.py @@ -143,7 +143,7 @@ def display_subtyping(pickled_result, args_dict=None): assert isinstance(list_return, list) model = model_vf(list_return) return model_to_json(model) - elif isinstance(result, SubtypingResult): + elif isinstance(result, list): return model_to_json(result) else: raise Exception("beautify() could not handle pickled file: {0}.".format(pickled_result)) diff --git a/app/middleware/modellers.py b/app/middleware/modellers.py index 04cfb3ed..9adcef69 100644 --- a/app/middleware/modellers.py +++ b/app/middleware/modellers.py @@ -15,7 +15,7 @@ def model_serotype(pi, pl, output_file): # Loop. subtyping_list = [ - SubtypingRow( + { analysis='Serotype', contigid='n/a', filename=actual_filename(row['genome']), @@ -24,14 +24,14 @@ def model_serotype(pi, pl, output_file): hitorientation='n/a', hitstart='n/a', hitstop='n/a' - ) + } for index, row in df.iterrows()] # Convert the list of rows into a SubtypingResult model. - subtyping_result = SubtypingResult( - rows = subtyping_list - ) - return subtyping_result + # subtyping_result = SubtypingResult( + # rows = subtyping_list + # ) + return subtyping_list def model_vf(lst): """ @@ -41,7 +41,7 @@ def model_vf(lst): assert isinstance(lst, list) print("model_vf() called with type {0} containing {1}".format(type(lst), str(lst))) subtyping_list = [ - SubtypingRow( + { analysis=item('analysis'), contigid=item['contigid'], filename=item['filename'], @@ -50,10 +50,10 @@ def model_vf(lst): hitorientation=item['hitorientation'], hitstart=item['hitstart'], hitstop=item['hitstop'] - ) + } for item in lst] # Convert the list of rows into a SubtypingResult model. - subtyping_result = SubtypingResult( - rows = subtyping_list - ) - return subtyping_result + # subtyping_result = SubtypingResult( + # rows = subtyping_list + # ) + return subtyping_list diff --git a/app/middleware/models.py b/app/middleware/models.py index ce1efa45..08ce3dc1 100644 --- a/app/middleware/models.py +++ b/app/middleware/models.py @@ -9,29 +9,31 @@ from jsonmodels import models, fields from middleware.graphers.turtle_utils import actual_filename -def _convert_model(model): - # Convert the model to a generic JSON structure. - struct = model.to_struct() - # Check that struct isn't empty. - assert struct - if 'rows' in struct: - # This is not strictly json; more like a list than a dict structure. - rows_list = struct['rows'] - return rows_list - else: - return struct +# def _convert_model(model): +# # Convert the model to a generic JSON structure. +# struct = model.to_struct() +# # Check that struct isn't empty. +# assert struct +# if 'rows' in struct: +# # This is not strictly json; more like a list than a dict structure. +# rows_list = struct['rows'] +# return rows_list +# else: +# return struct def model_to_json(model): """ Converts models to json for the front-end. """ # Validate the model submitted before processing. - model.validate() + assert isinstance(model, list) + # model.validate() # Conversion. - if isinstance(model, models.Base): - return _convert_model(model) - else: - raise Exception('model_to_json() called for a model without a handler.') + return model + # if isinstance(model, models.Base): + # return _convert_model(model) + # else: + # raise Exception('model_to_json() called for a model without a handler.') def store(pipeline): """ diff --git a/app/tests/test_modules.py b/app/tests/test_modules.py index cf5d2cf4..db280e68 100644 --- a/app/tests/test_modules.py +++ b/app/tests/test_modules.py @@ -78,15 +78,15 @@ def test_ectyper_vf(): def _validate_model(model): # Validate (throws error if invalidate). - model.validate() + # model.validate() # Check that the return rows is not some random empty list. - assert model.rows + # assert model.rows # Check the conversion for the front-end. - r = model_to_json(model) + # r = model_to_json(model) # This is not really json; more like a list than a dict structure. - assert isinstance(r, list) + assert isinstance(model, list) # Check that this isn't empty. - assert r + assert model def test_ectyper_serotype_direct(): """Check the ECTyper from `master` which only performs serotyping. From f1e2d3068c4770648e36c145b365d5b248ac9b2e Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Wed, 7 Mar 2018 11:37:55 -0500 Subject: [PATCH 080/122] CHANGE: just define a list generating function --- app/middleware/modellers.py | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/app/middleware/modellers.py b/app/middleware/modellers.py index 9adcef69..59b1bed0 100644 --- a/app/middleware/modellers.py +++ b/app/middleware/modellers.py @@ -16,14 +16,14 @@ def model_serotype(pi, pl, output_file): # Loop. subtyping_list = [ { - analysis='Serotype', - contigid='n/a', - filename=actual_filename(row['genome']), - hitcutoff=str(pi), - hitname="{0}:{1}".format(row['O_prediction'],row['H_prediction']), - hitorientation='n/a', - hitstart='n/a', - hitstop='n/a' + 'analysis':'Serotype', + 'contigid':'n/a', + 'filename':actual_filename(row['genome']), + 'hitcutoff':str(pi), + 'hitname':"{0}:{1}".format(row['O_prediction'],row['H_prediction']), + 'hitorientation':'n/a', + 'hitstart':'n/a', + 'hitstop':'n/a' } for index, row in df.iterrows()] @@ -42,14 +42,14 @@ def model_vf(lst): print("model_vf() called with type {0} containing {1}".format(type(lst), str(lst))) subtyping_list = [ { - analysis=item('analysis'), - contigid=item['contigid'], - filename=item['filename'], - hitcutoff=item['hitcutoff'], - hitname=item['hitname'], - hitorientation=item['hitorientation'], - hitstart=item['hitstart'], - hitstop=item['hitstop'] + 'analysis':item('analysis'), + 'contigid':item['contigid'], + 'filename':item['filename'], + 'hitcutoff':item['hitcutoff'], + 'hitname':item['hitname'], + 'hitorientation':item['hitorientation'], + 'hitstart':item['hitstart'], + 'hitstop':item['hitstop'] } for item in lst] # Convert the list of rows into a SubtypingResult model. From 93693616f169e40cba476d06c8fdd3b3ba832d06 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Wed, 7 Mar 2018 12:27:33 -0500 Subject: [PATCH 081/122] FIX: have tests use new funcs --- app/middleware/models.py | 2 +- app/tests/test_models.py | 199 ++++++++++++++++++--------------------- 2 files changed, 93 insertions(+), 108 deletions(-) diff --git a/app/middleware/models.py b/app/middleware/models.py index 08ce3dc1..a3924531 100644 --- a/app/middleware/models.py +++ b/app/middleware/models.py @@ -220,7 +220,7 @@ def to_json(self): model = rq_job.result try: # TODO: This is not correct as while the new ECTYper call does return a model, the display_subtyping() call that the return job is associated with will already convert the result to a list and return it. - assert isinstance(model, models.Base) + assert isinstance(model, (models.Base,list)) except: raise Exception("to_json() called for job {0} with result of type {1} and info {2}".format(j.name, type(model), str(model))) list_json = model_to_json(model) diff --git a/app/tests/test_models.py b/app/tests/test_models.py index 6605f118..9bd811c7 100644 --- a/app/tests/test_models.py +++ b/app/tests/test_models.py @@ -1,5 +1,5 @@ import dill -from middleware import models +from middleware import models, modellers from modules.spfy import spfy from scripts.savvy import savvy from tests import constants @@ -19,46 +19,31 @@ def test_subtyping_model_direct(l=constants.BEAUTIFY_VF_SEROTYPE): """ Use our dataset to directly create a subtyping results model and validate it. """ - subtyping_list = [ - models.SubtypingRow( - analysis=d['analysis'], - contigid=d['contigid'], - filename=d['filename'], - hitcutoff=str(d['hitcutoff']), - hitname=d['hitname'], - hitorientation=d['hitorientation'], - hitstart=str(d['hitstart']), - hitstop=str(d['hitstop']) - ) - for d in l] - subtyping_result = models.SubtypingResult( - rows = subtyping_list - ) - subtyping_result.validate() - # Return for incorporation into later tests. - return subtyping_result - -def test_phylotyper_model_direct(l=constants.BEAUTIFY_STX1): - """ - Use our dataset to directly create a phylotyper results model and validate it. - """ - phylotyper_list = [ - models.PhylotyperRow( - contig=d['contig'], - genome=d['genome'], - probability=str(d['probability']), - start=str(d['start']), - stop=str(d['stop']), - subtype=d['subtype'], - subtype_gene=d['subtype_gene'] - ) - for d in l] - phylotyper_result = models.PhylotyperResult( - rows = phylotyper_list - ) - phylotyper_result.validate() + subtyping_list = modellers.model_vf(l) # Return for incorporation into later tests. - return phylotyper_result + return subtyping_list + +# def test_phylotyper_model_direct(l=constants.BEAUTIFY_STX1): +# """ +# Use our dataset to directly create a phylotyper results model and validate it. +# """ +# phylotyper_list = [ +# models.PhylotyperRow( +# contig=d['contig'], +# genome=d['genome'], +# probability=str(d['probability']), +# start=str(d['start']), +# stop=str(d['stop']), +# subtype=d['subtype'], +# subtype_gene=d['subtype_gene'] +# ) +# for d in l] +# phylotyper_result = models.PhylotyperResult( +# rows = phylotyper_list +# ) +# phylotyper_result.validate() +# # Return for incorporation into later tests. +# return phylotyper_result def _create_example_pipeline(): p = models.Pipeline( @@ -145,73 +130,73 @@ def test_pipeline_model_dill(): # Run the same tests on the loaded pipeline. test_pipeline_model_subtyping(p=loaded_pipeline) -def test_pipeline_model_phyotyping(): - """ - Test the Pipeline model itself for subtyping via Phylotyper. - """ - p = models.Pipeline( - func = spfy, - options = constants.ARGS_DICT - ) - mock_stx1 = MockRQJob( - result = test_phylotyper_model_direct(constants.BEAUTIFY_STX1) - ) - mock_stx2 = MockRQJob( - result = test_phylotyper_model_direct(constants.BEAUTIFY_STX2) - ) - p.jobs.update({ - 'job_phylotyper_beautify_stx1': models.Job( - rq_job=mock_stx1, - name='job_phylotyper_beautify_stx1', - transitory=False, - backlog=False, - display=True - ) - }) - p.jobs.update({ - 'job_phylotyper_beautify_stx2': models.Job( - rq_job=mock_stx2, - name='job_phylotyper_beautify_stx2', - transitory=False, - backlog=False, - display=True - ) - }) - assert isinstance(p, models.Pipeline) - assert isinstance(p.jobs, dict) - for k in p.jobs: - assert isinstance(p.jobs[k], models.Job) - - # Test Pipeline.cache_jobs() - p.cache_jobs() - # Test Pipeline.merge_jobs() - p.merge_jobs() - # Test Pipeline.complete(), should be True. - assert p.complete() - - # Test Pipeline.to_json(). - json = p.to_json() - assert isinstance(json, list) - - # Add an AMR job and re-test. - mock_eae = MockRQJob( - result = test_phylotyper_model_direct(constants.BEAUTIFY_EAE) - ) - p.jobs.update({ - 'job_phylotyper_beautify_eae': models.Job( - rq_job=mock_eae, - name='job_phylotyper_beautify_stx2', - transitory=False, - backlog=False, - display=True - ) - }) - p.merge_jobs() - # Test Pipeline.complete(), should be True. - assert p.complete() - # Test Pipeline.to_json(). - json = p.to_json() - assert isinstance(json, list) +# def test_pipeline_model_phyotyping(): +# """ +# Test the Pipeline model itself for subtyping via Phylotyper. +# """ +# p = models.Pipeline( +# func = spfy, +# options = constants.ARGS_DICT +# ) +# mock_stx1 = MockRQJob( +# result = test_phylotyper_model_direct(constants.BEAUTIFY_STX1) +# ) +# mock_stx2 = MockRQJob( +# result = test_phylotyper_model_direct(constants.BEAUTIFY_STX2) +# ) +# p.jobs.update({ +# 'job_phylotyper_beautify_stx1': models.Job( +# rq_job=mock_stx1, +# name='job_phylotyper_beautify_stx1', +# transitory=False, +# backlog=False, +# display=True +# ) +# }) +# p.jobs.update({ +# 'job_phylotyper_beautify_stx2': models.Job( +# rq_job=mock_stx2, +# name='job_phylotyper_beautify_stx2', +# transitory=False, +# backlog=False, +# display=True +# ) +# }) +# assert isinstance(p, models.Pipeline) +# assert isinstance(p.jobs, dict) +# for k in p.jobs: +# assert isinstance(p.jobs[k], models.Job) +# +# # Test Pipeline.cache_jobs() +# p.cache_jobs() +# # Test Pipeline.merge_jobs() +# p.merge_jobs() +# # Test Pipeline.complete(), should be True. +# assert p.complete() +# +# # Test Pipeline.to_json(). +# json = p.to_json() +# assert isinstance(json, list) +# +# # Add an AMR job and re-test. +# mock_eae = MockRQJob( +# result = test_phylotyper_model_direct(constants.BEAUTIFY_EAE) +# ) +# p.jobs.update({ +# 'job_phylotyper_beautify_eae': models.Job( +# rq_job=mock_eae, +# name='job_phylotyper_beautify_stx2', +# transitory=False, +# backlog=False, +# display=True +# ) +# }) +# p.merge_jobs() +# # Test Pipeline.complete(), should be True. +# assert p.complete() +# # Test Pipeline.to_json(). +# json = p.to_json() +# assert isinstance(json, list) def test_pipeline_model_signature(): """ From 0f0c71d9b8104a42fc7e1a34e05a618335c45c13 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Wed, 7 Mar 2018 13:29:52 -0500 Subject: [PATCH 082/122] DEBUG: checks in model generation --- app/middleware/modellers.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/app/middleware/modellers.py b/app/middleware/modellers.py index 59b1bed0..77cbbca2 100644 --- a/app/middleware/modellers.py +++ b/app/middleware/modellers.py @@ -39,10 +39,11 @@ def model_vf(lst): """ # Type check. assert isinstance(lst, list) + assert isinstance(lst[0], dict) print("model_vf() called with type {0} containing {1}".format(type(lst), str(lst))) subtyping_list = [ { - 'analysis':item('analysis'), + 'analysis':item['analysis'], 'contigid':item['contigid'], 'filename':item['filename'], 'hitcutoff':item['hitcutoff'], From ccda9f66e44f3d68207d951c2d76d23043155424 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Wed, 7 Mar 2018 14:04:10 -0500 Subject: [PATCH 083/122] CHANGE: create test pipelines directly from module calls --- app/tests/test_models.py | 7 +++++-- app/tests/test_modules.py | 8 ++++++-- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/app/tests/test_models.py b/app/tests/test_models.py index 9bd811c7..f2e403e1 100644 --- a/app/tests/test_models.py +++ b/app/tests/test_models.py @@ -3,6 +3,7 @@ from modules.spfy import spfy from scripts.savvy import savvy from tests import constants +from tests.test_modules import test_ectyper_vf, test_ectyper_serotype_call_pickle class MockRQJob(): """ @@ -50,11 +51,13 @@ def _create_example_pipeline(): func=spfy, options=constants.ARGS_DICT ) + r_serotype = test_ectyper_serotype_call_pickle(return_one=True) mock_serotype = MockRQJob( - result=test_subtyping_model_direct(constants.BEAUTIFY_SEROTYPE) + result=test_subtyping_model_direct(r_serotype) ) + r_vf = test_ectyper_vf(return_one=True) mock_vf = MockRQJob( - result=test_subtyping_model_direct(constants.BEAUTIFY_VF) + result=test_subtyping_model_direct(r_vf) ) # Mimicks a Serotype result that will be converted to json. p.jobs.update({ diff --git a/app/tests/test_modules.py b/app/tests/test_modules.py index db280e68..63248bd9 100644 --- a/app/tests/test_modules.py +++ b/app/tests/test_modules.py @@ -60,7 +60,7 @@ def test_qc(): for non_ecoli_genome in GENOMES_LIST_NOT_ECOLI: assert qc(non_ecoli_genome) == False -def test_ectyper_vf(): +def test_ectyper_vf(return_one=False): """Check the ECTyper from `superphy` which is used for virulance factor identification. Installed as a submodule in the `modules` directory. """ @@ -75,6 +75,8 @@ def test_ectyper_vf(): # beautify ECTyper check json_return = beautify(pickled_ectyper_dict, single_dict) assert type(json_return) == list + if return_one: + return json_return def _validate_model(model): # Validate (throws error if invalidate). @@ -108,7 +110,7 @@ def test_ectyper_serotype_call_nopickle(): serotype_model = call_ectyper_serotype(single_dict, pickle=False) _validate_model(serotype_model) -def test_ectyper_serotype_call_pickle(): +def test_ectyper_serotype_call_pickle(return_one=False): """ Check the actual call from Spfy's code. """ @@ -119,6 +121,8 @@ def test_ectyper_serotype_call_pickle(): pickled_serotype_model = call_ectyper_serotype(single_dict) ectyper_serotype_model = unpickle(pickled_serotype_model) _validate_model(ectyper_serotype_model) + if return_one: + return ectyper_serotype_model def test_amr(): ecoli_genome = GENOMES_LIST_ECOLI[0] From 2dc2cb63cd0ee4560b49a3323148a842ab2434e1 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Thu, 8 Mar 2018 12:31:15 -0500 Subject: [PATCH 084/122] CHANGE: wrap the to_json() return with jsonify --- app/middleware/models.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/app/middleware/models.py b/app/middleware/models.py index a3924531..159b0375 100644 --- a/app/middleware/models.py +++ b/app/middleware/models.py @@ -7,6 +7,7 @@ from dis import dis from StringIO import StringIO from jsonmodels import models, fields +from flask import jsonify from middleware.graphers.turtle_utils import actual_filename # def _convert_model(model): @@ -225,7 +226,7 @@ def to_json(self): raise Exception("to_json() called for job {0} with result of type {1} and info {2}".format(j.name, type(model), str(model))) list_json = model_to_json(model) l += list_json - return l + return jsonify(l) def _function_signature(self): """ From d6b3f518d640652d8bd4bb3c2cd8cfbb61fe0f92 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Thu, 8 Mar 2018 12:48:16 -0500 Subject: [PATCH 085/122] CHANGE: work from lists for graphing --- app/middleware/graphers/datastruct_savvy.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/app/middleware/graphers/datastruct_savvy.py b/app/middleware/graphers/datastruct_savvy.py index 43b729ee..be5209f4 100644 --- a/app/middleware/graphers/datastruct_savvy.py +++ b/app/middleware/graphers/datastruct_savvy.py @@ -10,8 +10,8 @@ def _graph_subtyping(graph, model, uriIsolate): # Convert the model to a graph. - struct = model.to_struct() - rows_list = struct['rows'] + # struct = model.to_struct() + rows_list = model for row in rows_list: graph.add(( uriIsolate, @@ -29,7 +29,7 @@ def model_to_graph(graph, model, uriIsolate): # Validate the model submitted before processing. model.validate() # Conversion. - if isinstance(model, SubtypingResult): + if isinstance(model, list): return _graph_subtyping(graph, model, uriIsolate) else: raise Exception('model_to_graph() called for a model without a handler.') @@ -209,7 +209,7 @@ def generate_datastruct(query_file, id_file, pickled_dictionary): graph = parse_gene_dict(graph, results['Antimicrobial Resistance'], uriGenome, 'AntimicrobialResistanceGene') return graph - elif isinstance(results, SubtypingResult): + elif isinstance(results, list): graph = model_to_graph(graph, results, uriIsolate) return graph else: From a0baa16073b5ec81056be48b5c46a83ff34cdd2c Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Thu, 8 Mar 2018 13:01:43 -0500 Subject: [PATCH 086/122] FIX?: comment out validate and report what model_to_json sees --- app/middleware/display/beautify.py | 2 ++ app/middleware/graphers/datastruct_savvy.py | 2 +- app/middleware/models.py | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/app/middleware/display/beautify.py b/app/middleware/display/beautify.py index 7933b086..e9a7c403 100644 --- a/app/middleware/display/beautify.py +++ b/app/middleware/display/beautify.py @@ -139,11 +139,13 @@ def beautify(gene_dict, args_dict=None): def display_subtyping(pickled_result, args_dict=None): result = unpickle(pickled_result) if isinstance(result, dict): + # VF list_return = beautify(gene_dict=result, args_dict=args_dict) assert isinstance(list_return, list) model = model_vf(list_return) return model_to_json(model) elif isinstance(result, list): + # Serotyping return model_to_json(result) else: raise Exception("beautify() could not handle pickled file: {0}.".format(pickled_result)) diff --git a/app/middleware/graphers/datastruct_savvy.py b/app/middleware/graphers/datastruct_savvy.py index be5209f4..4e41cf1c 100644 --- a/app/middleware/graphers/datastruct_savvy.py +++ b/app/middleware/graphers/datastruct_savvy.py @@ -27,7 +27,7 @@ def _graph_subtyping(graph, model, uriIsolate): def model_to_graph(graph, model, uriIsolate): # Validate the model submitted before processing. - model.validate() + # model.validate() # Conversion. if isinstance(model, list): return _graph_subtyping(graph, model, uriIsolate) diff --git a/app/middleware/models.py b/app/middleware/models.py index 159b0375..de4ba915 100644 --- a/app/middleware/models.py +++ b/app/middleware/models.py @@ -30,6 +30,7 @@ def model_to_json(model): assert isinstance(model, list) # model.validate() # Conversion. + print("model_to_json() called with model: {0}".format(str(model))) return model # if isinstance(model, models.Base): # return _convert_model(model) From 8320b8163009d283fe6313f6a4c1e31b3ec3a699 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Thu, 8 Mar 2018 13:38:03 -0500 Subject: [PATCH 087/122] DEBUG: should work for serotyping... --- app/middleware/modellers.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/app/middleware/modellers.py b/app/middleware/modellers.py index 77cbbca2..c0000321 100644 --- a/app/middleware/modellers.py +++ b/app/middleware/modellers.py @@ -31,6 +31,8 @@ def model_serotype(pi, pl, output_file): # subtyping_result = SubtypingResult( # rows = subtyping_list # ) + assert subtyping_list + assert subtyping_list[0] return subtyping_list def model_vf(lst): From 5b821d9d9cda3edbc69639d93e292babb85ed185 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Thu, 8 Mar 2018 13:51:58 -0500 Subject: [PATCH 088/122] FIX: wasnt reading the return from new ectyper call correctly --- app/middleware/graphers/datastruct_savvy.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/app/middleware/graphers/datastruct_savvy.py b/app/middleware/graphers/datastruct_savvy.py index 4e41cf1c..964a91fe 100644 --- a/app/middleware/graphers/datastruct_savvy.py +++ b/app/middleware/graphers/datastruct_savvy.py @@ -13,15 +13,16 @@ def _graph_subtyping(graph, model, uriIsolate): # struct = model.to_struct() rows_list = model for row in rows_list: + o_type, h_type = row['hitname'].split(':') graph.add(( uriIsolate, gu('ge:0001076'), - Literal(row['O_prediction']) + Literal(o_type) )) graph.add(( uriIsolate, gu('ge:0001077'), - Literal(row['H_prediction']) + Literal(h_type) )) return graph From 474be0d70796e29af545c92133d740e86f99eac0 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Thu, 8 Mar 2018 14:39:49 -0500 Subject: [PATCH 089/122] FIX: when collecting finished jobs for display, also check if its a backlog job --- app/middleware/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/middleware/models.py b/app/middleware/models.py index de4ba915..58b62af4 100644 --- a/app/middleware/models.py +++ b/app/middleware/models.py @@ -212,7 +212,7 @@ def to_json(self): # Gather all the jobs that have finished and haven't failed. completed_jobs = [ j for j in self.final_jobs - if j.display and j.rq_job.is_finished and not j.rq_job.is_failed + if j.display and not j.backlog and j.rq_job.is_finished and not j.rq_job.is_failed ] print("to_json() completed_jobs: {0}".format(str(completed_jobs))) # Merge the json lists together. From d51204937f00026d7a54973a8cb35e5832b146c2 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Thu, 8 Mar 2018 14:41:41 -0500 Subject: [PATCH 090/122] FIX: pipeline should not have display copies of results in the first place --- app/modules/spfy.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/modules/spfy.py b/app/modules/spfy.py index a1a9b6fd..3ebf22c1 100644 --- a/app/modules/spfy.py +++ b/app/modules/spfy.py @@ -113,7 +113,7 @@ def _ectyper_pipeline_vf(query_file, single_dict, pipeline=None, backlog=False): ) }) - if not single_dict['options']['bulk']: + if not single_dict['options']['bulk'] or not backlog: # Only bother parsing into json if user has requested either vf or # serotype, and we're not in bulk uploading. job_ectyper_beautify_vf = multiples.enqueue( @@ -192,7 +192,7 @@ def _ectyper_pipeline_serotype(query_file, single_dict, pipeline=None, backlog=F ) }) - if not single_dict['options']['bulk']: + if not single_dict['options']['bulk'] or not backlog: # Only bother parsing into json if user has requested either vf or # serotype, and we're not in bulk uploading. job_ectyper_beautify_serotype = multiples.enqueue( From 41a152ba8af2e3eb4232a58e1c7daec388d69665 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Fri, 9 Mar 2018 17:04:13 -0500 Subject: [PATCH 091/122] CHANGE: amr pipeline into new system --- app/modules/spfy.py | 112 ++++++++++++++++++++++++++++++++------------ 1 file changed, 83 insertions(+), 29 deletions(-) diff --git a/app/modules/spfy.py b/app/modules/spfy.py index 3ebf22c1..08940f10 100644 --- a/app/modules/spfy.py +++ b/app/modules/spfy.py @@ -213,6 +213,88 @@ def _ectyper_pipeline_serotype(query_file, single_dict, pipeline=None, backlog=F }) return d +# AMR PIPELINE +def _amr_pipeline(pipeline=None, backlog=False, bulk=False): + # Alias. + job_id = pipeline.jobs['job_id'].rq_job + if not backlog: + multiples = multiples_q + else: + multiples = backlog_multiples_q + + job_amr = multiples.enqueue(amr, query_file, depends_on=job_id) + pipeline.jobs.update({ + 'job_amr': Job( + rq_job=job_amr, + name='job_amr', + transitory=True, + backlog=backlog, + display=False + ) + }) + + job_amr_dict = multiples.enqueue( + amr_to_dict, query_file + '_rgi.tsv', depends_on=job_amr) + pipeline.jobs.update({ + 'job_amr_dict': Job( + rq_job=job_amr_dict, + name='job_amr_dict', + transitory=True, + backlog=backlog, + display=False + ) + }) + + # Create a graph, and upload to Blazegraph. + if backlog: + job_amr_datastruct = multiples.enqueue( + datastruct_savvy, query_file, query_file + '_id.txt', query_file + '_rgi.tsv_rgi.p', depends_on=job_amr_dict, result_ttl=-1) + pipeline.jobs.update({ + 'job_amr_datastruct': Job( + rq_job=job_amr_datastruct, + name='job_amr_datastruct', + transitory=False, + backlog=backlog, + display=False + ) + }) + else: + job_amr_datastruct = multiples.enqueue( + datastruct_savvy, query_file, query_file + '_id.txt', query_file + '_rgi.tsv_rgi.p', depends_on=job_amr_dict) + pipeline.jobs.update({ + 'job_amr_datastruct': Job( + rq_job=job_amr_datastruct, + name='job_amr_datastruct', + transitory=True, + backlog=backlog, + display=False + ) + }) + d = {'job_amr': job_amr, 'job_amr_dict': job_amr_dict, + 'job_amr_datastruct': job_amr_datastruct} + # we still check for the user-selected amr option again because + # if it was not selected but BACKLOG_ENABLED=True, we dont have to + # enqueue it to backlog_multiples_q since beautify doesnt upload + # blazegraph + if not bulk: + job_amr_beautify = multiples.enqueue( + beautify, + query_file + '_rgi.tsv_rgi.p', + single_dict, + depends_on=job_amr_dict, + result_ttl=-1) + pipeline.jobs.update({ + 'job_amr_beautify': Job( + rq_job=job_amr_beautify, + name='job_amr_beautify', + transitory=False, + backlog=backlog, + display=True + ) + }) + d.update({'job_amr_beautify': job_amr_beautify}) + return d + def blob_savvy_enqueue(single_dict, pipeline): ''' Handles enqueueing of single file to multiple queues. @@ -315,36 +397,8 @@ def blob_savvy_enqueue(single_dict, pipeline): ) # END ECTYPER PIPELINE - # AMR PIPELINE - def amr_pipeline(multiples): - job_amr = multiples.enqueue(amr, query_file, depends_on=job_id) - job_amr_dict = multiples.enqueue( - amr_to_dict, query_file + '_rgi.tsv', depends_on=job_amr) - # this uploads result to blazegraph - if single_dict['options']['bulk']: - job_amr_datastruct = multiples.enqueue( - datastruct_savvy, query_file, query_file + '_id.txt', query_file + '_rgi.tsv_rgi.p', depends_on=job_amr_dict, result_ttl=-1) - else: - job_amr_datastruct = multiples.enqueue( - datastruct_savvy, query_file, query_file + '_id.txt', query_file + '_rgi.tsv_rgi.p', depends_on=job_amr_dict) - d = {'job_amr': job_amr, 'job_amr_dict': job_amr_dict, - 'job_amr_datastruct': job_amr_datastruct} - # we still check for the user-selected amr option again because - # if it was not selected but BACKLOG_ENABLED=True, we dont have to - # enqueue it to backlog_multiples_q since beautify doesnt upload - # blazegraph - if single_dict['options']['amr'] and not single_dict['options']['bulk']: - job_amr_beautify = multiples.enqueue( - beautify, - query_file + '_rgi.tsv_rgi.p', - single_dict, - depends_on=job_amr_dict, - result_ttl=-1) - d.update({'job_amr_beautify': job_amr_beautify}) - return d - if single_dict['options']['amr']: - amr_jobs = amr_pipeline(multiples_q) + amr_jobs = _amr_pipeline(bulk=single_dict['options']['bulk']) job_amr = amr_jobs['job_amr'] job_amr_dict = amr_jobs['job_amr_dict'] job_amr_datastruct = amr_jobs['job_amr_datastruct'] From c3709ea49883c0d438363acfd37dbeee6f74cbce Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Fri, 9 Mar 2018 17:23:56 -0500 Subject: [PATCH 092/122] CHANGE: phylotyper into new system --- app/modules/spfy.py | 139 ++++++++++++++++++++++++++++++-------------- 1 file changed, 96 insertions(+), 43 deletions(-) diff --git a/app/modules/spfy.py b/app/modules/spfy.py index 08940f10..9b123842 100644 --- a/app/modules/spfy.py +++ b/app/modules/spfy.py @@ -276,7 +276,7 @@ def _amr_pipeline(pipeline=None, backlog=False, bulk=False): # if it was not selected but BACKLOG_ENABLED=True, we dont have to # enqueue it to backlog_multiples_q since beautify doesnt upload # blazegraph - if not bulk: + if not backlog and not bulk: job_amr_beautify = multiples.enqueue( beautify, query_file + '_rgi.tsv_rgi.p', @@ -295,6 +295,84 @@ def _amr_pipeline(pipeline=None, backlog=False, bulk=False): d.update({'job_amr_beautify': job_amr_beautify}) return d +def _phylotyper_pipeline(subtype, pipeline=None, backlog=False): + # Alias. + job_id = pipeline.jobs['job_id'].rq_job + if not backlog: + multiples = multiples_q + else: + multiples = backlog_multiples_q + + jobname = '_pt' +subtype + tsvfile = query_file + jobname + '.tsv' + picklefile = query_file + jobname + '.p' + + job_pt = multiples.enqueue( + phylotyper.phylotyper, + None, + subtype, + tsvfile, + id_file=query_file + '_id.txt', + depends_on=pipeline.jobs['job_ectyper_datastruct_vf'].rq_job) + pipeline.jobs.update({ + 'job'+jobname: Job( + rq_job=job_pt, + name='job'+jobname, + transitory=True, + backlog=backlog, + display=False + ) + }) + + job_pt_dict = multiples.enqueue( + phylotyper.to_dict, tsvfile, subtype, picklefile, + depends_on=job_pt) + pipeline.jobs.update({ + 'job'+jobname+'_dict': Job( + rq_job=job_pt_dict, + name='job'+jobname+'_dict', + transitory=True, + backlog=backlog, + display=False + ) + }) + + job_pt_datastruct = multiples.enqueue( + phylotyper.savvy, picklefile, subtype, + depends_on=job_pt_dict) + pipeline.jobs.update({ + 'job'+jobname+'_datastruct': Job( + rq_job=job_pt_datastruct, + name='job'+jobname+'_datastruct', + transitory=True, + backlog=backlog, + display=False + ) + }) + + d = {'job'+jobname: job_pt, 'job'+jobname+'_dict': job_pt_dict, + 'job'+jobname+'_datastruct': job_pt_datastruct} + # we still check for the user-selected amr option again because + # if it was not selected but BACKLOG_ENABLED=True, we dont have to + # enqueue it to backlog_multiples_q since beautify doesnt upload + # blazegraph + if not backlog: + job_pt_beautify = multiples.enqueue( + phylotyper.beautify, picklefile, actual_filename(query_file), + depends_on=job_pt_dict, result_ttl=-1) + pipeline.jobs.update({ + 'job'+jobname+'_beautify': Job( + rq_job=job_pt_beautify, + name='job'+jobname+'_beautify', + transitory=False, + backlog=backlog, + display=True + ) + }) + d.update({'job'+jobname+'_beautify': job_pt_beautify}) + + return d + def blob_savvy_enqueue(single_dict, pipeline): ''' Handles enqueueing of single file to multiple queues. @@ -398,73 +476,48 @@ def blob_savvy_enqueue(single_dict, pipeline): # END ECTYPER PIPELINE if single_dict['options']['amr']: - amr_jobs = _amr_pipeline(bulk=single_dict['options']['bulk']) + amr_jobs = _amr_pipeline(, pipeline=pipeline, bulk=single_dict['options']['bulk']) job_amr = amr_jobs['job_amr'] job_amr_dict = amr_jobs['job_amr_dict'] job_amr_datastruct = amr_jobs['job_amr_datastruct'] if not single_dict['options']['bulk']: job_amr_beautify = amr_jobs['job_amr_beautify'] elif config.BACKLOG_ENABLED: - amr_pipeline(backlog_multiples_q) + _amr_pipeline(pipeline=pipeline, backlog=True) # END AMR PIPELINE # Phylotyper Pipeline - def phylotyper_pipeline(multiples, subtype): - - jobname = '_pt' +subtype - tsvfile = query_file + jobname + '.tsv' - picklefile = query_file + jobname + '.p' - - job_pt = multiples.enqueue( - phylotyper.phylotyper, - None, - subtype, - tsvfile, - id_file=query_file + '_id.txt', - depends_on=pipeline.jobs['job_ectyper_datastruct_vf'].rq_job) - job_pt_dict = multiples.enqueue( - phylotyper.to_dict, tsvfile, subtype, picklefile, - depends_on=job_pt) - job_pt_datastruct = multiples.enqueue( - phylotyper.savvy, picklefile, subtype, - depends_on=job_pt_dict) - - d = {'job'+jobname: job_pt, 'job'+jobname+'_dict': job_pt_dict, - 'job'+jobname+'_datastruct': job_pt_datastruct} - # we still check for the user-selected amr option again because - # if it was not selected but BACKLOG_ENABLED=True, we dont have to - # enqueue it to backlog_multiples_q since beautify doesnt upload - # blazegraph - if single_dict['options'][subtype]: - job_pt_beautify = multiples.enqueue( - phylotyper.beautify, picklefile, actual_filename(query_file), - depends_on=job_pt_dict, result_ttl=-1) - d.update({'job'+jobname+'_beautify': job_pt_beautify}) - - return d - if single_dict['options']['stx1']: - pt_jobs = phylotyper_pipeline(multiples_q, 'stx1') + pt_jobs = _phylotyper_pipeline('stx1', pipeline=pipeline) job_stx1_beautify = pt_jobs['job_ptstx1_beautify'] elif config.BACKLOG_ENABLED: - phylotyper_pipeline(backlog_multiples_q, 'stx1') + _phylotyper_pipeline('stx1', pipeline=pipeline, backlog=True) if single_dict['options']['stx2']: - pt_jobs = phylotyper_pipeline(multiples_q, 'stx2') + pt_jobs = _phylotyper_pipeline('stx2', pipeline=pipeline) job_stx2_beautify = pt_jobs['job_ptstx2_beautify'] elif config.BACKLOG_ENABLED: - phylotyper_pipeline(backlog_multiples_q, 'stx2') + _phylotyper_pipeline('stx2', pipeline=pipeline, backlog=True) if single_dict['options']['eae']: - pt_jobs = phylotyper_pipeline(multiples_q, 'eae') + pt_jobs = _phylotyper_pipeline('eae', pipeline=pipeline) job_eae_beautify = pt_jobs['job_pteae_beautify'] elif config.BACKLOG_ENABLED: - phylotyper_pipeline(backlog_multiples_q, 'eae') + _phylotyper_pipeline('eae', pipeline=pipeline, backlog=True) # END Phylotyper pipeline # the base file data for blazegraph job_turtle = multiples_q.enqueue( turtle_grapher, query_file, depends_on=job_qc) + pipeline.jobs.update({ + 'job_turtle': Job( + rq_job=job_turtle, + name='job_turtle', + transitory=True, + backlog=False, + display=False + ) + }) jobs[job_qc.get_id()] = {'file': single_dict['i'], 'analysis': 'Quality Control'} From 4f73f1647aa74a16193412d8107ba25f9bdb9b7c Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Sat, 10 Mar 2018 17:28:37 -0500 Subject: [PATCH 093/122] CHANGE: merge phylotyper results as well --- app/middleware/modellers.py | 26 ++++++++++++++++++++-- app/modules/phylotyper/phylotyper.py | 33 ++++++++++++++-------------- 2 files changed, 41 insertions(+), 18 deletions(-) diff --git a/app/middleware/modellers.py b/app/middleware/modellers.py index c0000321..8981b4ce 100644 --- a/app/middleware/modellers.py +++ b/app/middleware/modellers.py @@ -23,7 +23,8 @@ def model_serotype(pi, pl, output_file): 'hitname':"{0}:{1}".format(row['O_prediction'],row['H_prediction']), 'hitorientation':'n/a', 'hitstart':'n/a', - 'hitstop':'n/a' + 'hitstop':'n/a', + 'probability':'n/a' } for index, row in df.iterrows()] @@ -52,7 +53,8 @@ def model_vf(lst): 'hitname':item['hitname'], 'hitorientation':item['hitorientation'], 'hitstart':item['hitstart'], - 'hitstop':item['hitstop'] + 'hitstop':item['hitstop'], + 'probability':'n/a' } for item in lst] # Convert the list of rows into a SubtypingResult model. @@ -60,3 +62,23 @@ def model_vf(lst): # rows = subtyping_list # ) return subtyping_list + +def model_phylotyper(lst): + """ + Casts phylotyper's return to the same format as VF/Serotyping. + """ + phylotyper_list = [ + { + 'analysis:':d['subtype_gene'], + 'contigid':d['contig'], + 'filename':d['genome'], + 'hitcutoff':'n/a', + 'hitname':d['subtype'], + 'hitorientation':'n/a', + 'hitstart':d['start'], + 'hitstop':d['stop'], + 'probability':d['probability'] + } + for d in lst] + + return phylotyper_list diff --git a/app/modules/phylotyper/phylotyper.py b/app/modules/phylotyper/phylotyper.py index 6c8af7fd..3c04cde8 100644 --- a/app/modules/phylotyper/phylotyper.py +++ b/app/modules/phylotyper/phylotyper.py @@ -22,8 +22,9 @@ import config -from middleware.graphers.turtle_utils import generate_uri as gu, fulluri_to_basename as u2b, normalize_rdfterm as normalize +from middleware.graphers.turtle_utils import generate_uri as gu, fulluri_to_basename as u2b, normalize_rdfterm as normalize from middleware.blazegraph.upload_graph import upload_graph +from middleware.modellers import model_phylotyper from modules.phylotyper import ontology, exceptions from modules.phylotyper.sequences import MarkerSequences, phylotyper_query, genename_query @@ -42,7 +43,7 @@ def phylotyper(uriIsolate, subtype, result_file, id_file=None): Returns: file to tab-delimited text results - + """ # uriIsolate retrieval @@ -94,7 +95,7 @@ def phylotyper(uriIsolate, subtype, result_file, id_file=None): shutil.move(output_file, result_file) shutil.rmtree(temp_dir) - + return result_file @@ -105,7 +106,7 @@ def to_dict(pt_file, subtype, pickle_file): """ - + pt_results = pd.read_table(pt_file) if pt_results['phylotyper_assignment'].empty or pt_results['phylotyper_assignment'].values[0] == 'Subtype loci not found in genome': @@ -114,7 +115,7 @@ def to_dict(pt_file, subtype, pickle_file): } else: - + pt_results = pt_results[['subtype','probability','loci']] pt_results = pt_results.to_dict() @@ -144,7 +145,7 @@ def to_dict(pt_file, subtype, pickle_file): pt_results['contig'][k] = contigs pt_results['start'][k] = starts pt_results['stop'][k] = stops - + pickle.dump(pt_results, open(pickle_file, 'wb')) return pickle_file @@ -179,7 +180,7 @@ def beautify(p_file, genome): # Expand into table rows - one per loci table_rows = [] for k in pt_dict['loci']: - + # Location info for i in range(len(pt_dict['loci'][k])): instance_dict = {} @@ -194,20 +195,20 @@ def beautify(p_file, genome): allele_rdf = normalize(allele_uri) gene_result = genename_query(allele_rdf) instance_dict['subtype_gene'] = gene_result[0]['markerLabel'] - + # Genome instance_dict['genome'] = genome # Subtype info instance_dict['subtype'] = pt_dict['subtype'][k] instance_dict['probability'] = pt_dict['probability'][k] - - table_rows.append(instance_dict) - return table_rows - + table_rows.append(instance_dict) + # Cast + unified_format = model_phylotyper(table_rows) + return unified_format def savvy(p_file, subtype): """ Load phylotyper results into DB @@ -222,7 +223,7 @@ def savvy(p_file, subtype): # Phylotyper scheme phylotyper_uri = gu('subt:'+subtype) - + # Get list of permissable subtype values subtypes_results = ontology.subtypeset_query(normalize(phylotyper_uri)) subtypes = {} @@ -289,7 +290,7 @@ def ignorant(genome_uri, subtype, pickle_file): 'stop': {} } for row in results: - + if row['pt'] in subtype_assignments: k = subtype_assignments[row['pt']] else: @@ -345,9 +346,9 @@ def ignorant(genome_uri, subtype, pickle_file): g = u2b(gu(input_g)) pt_file = os.path.join(config.DATASTORE, g+'_pt.tsv') pickle_file = os.path.join(config.DATASTORE, g+'_pt.p') - + phylotyper(args.g, args.s, pt_file) to_dict(pt_file, args.s, pickle_file) print(beautify(pickle_file, args.g)) #savvy(pickle_file, args.s) - #ignorant(input_g, args.s, pickle_file+'2') \ No newline at end of file + #ignorant(input_g, args.s, pickle_file+'2') From 2fddaf4032b6af08a2b3f66c69f30f226a8c6bfa Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Sat, 10 Mar 2018 17:41:20 -0500 Subject: [PATCH 094/122] FIX: typos --- app/modules/spfy.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/app/modules/spfy.py b/app/modules/spfy.py index 9b123842..edad68a7 100644 --- a/app/modules/spfy.py +++ b/app/modules/spfy.py @@ -214,7 +214,7 @@ def _ectyper_pipeline_serotype(query_file, single_dict, pipeline=None, backlog=F return d # AMR PIPELINE -def _amr_pipeline(pipeline=None, backlog=False, bulk=False): +def _amr_pipeline(query_file, single_dict, pipeline=None, backlog=False, bulk=False): # Alias. job_id = pipeline.jobs['job_id'].rq_job if not backlog: @@ -295,7 +295,7 @@ def _amr_pipeline(pipeline=None, backlog=False, bulk=False): d.update({'job_amr_beautify': job_amr_beautify}) return d -def _phylotyper_pipeline(subtype, pipeline=None, backlog=False): +def _phylotyper_pipeline(subtype, query_file, pipeline=None, backlog=False): # Alias. job_id = pipeline.jobs['job_id'].rq_job if not backlog: @@ -475,35 +475,36 @@ def blob_savvy_enqueue(single_dict, pipeline): ) # END ECTYPER PIPELINE + # AMR Pipeline if single_dict['options']['amr']: - amr_jobs = _amr_pipeline(, pipeline=pipeline, bulk=single_dict['options']['bulk']) + amr_jobs = _amr_pipeline(query_file=query_file, single_dict=single_dict, pipeline=pipeline, bulk=single_dict['options']['bulk']) job_amr = amr_jobs['job_amr'] job_amr_dict = amr_jobs['job_amr_dict'] job_amr_datastruct = amr_jobs['job_amr_datastruct'] if not single_dict['options']['bulk']: job_amr_beautify = amr_jobs['job_amr_beautify'] elif config.BACKLOG_ENABLED: - _amr_pipeline(pipeline=pipeline, backlog=True) + _amr_pipeline(query_file=query_file, single_dict=single_dict, pipeline=pipeline, backlog=True) # END AMR PIPELINE # Phylotyper Pipeline if single_dict['options']['stx1']: - pt_jobs = _phylotyper_pipeline('stx1', pipeline=pipeline) + pt_jobs = _phylotyper_pipeline('stx1', query_file=query_file, pipeline=pipeline) job_stx1_beautify = pt_jobs['job_ptstx1_beautify'] elif config.BACKLOG_ENABLED: - _phylotyper_pipeline('stx1', pipeline=pipeline, backlog=True) + _phylotyper_pipeline('stx1', query_file=query_file, pipeline=pipeline, backlog=True) if single_dict['options']['stx2']: - pt_jobs = _phylotyper_pipeline('stx2', pipeline=pipeline) + pt_jobs = _phylotyper_pipeline('stx2', query_file=query_file, pipeline=pipeline) job_stx2_beautify = pt_jobs['job_ptstx2_beautify'] elif config.BACKLOG_ENABLED: - _phylotyper_pipeline('stx2', pipeline=pipeline, backlog=True) + _phylotyper_pipeline('stx2', query_file=query_file, pipeline=pipeline, backlog=True) if single_dict['options']['eae']: - pt_jobs = _phylotyper_pipeline('eae', pipeline=pipeline) + pt_jobs = _phylotyper_pipeline('eae', query_file=query_file, pipeline=pipeline) job_eae_beautify = pt_jobs['job_pteae_beautify'] elif config.BACKLOG_ENABLED: - _phylotyper_pipeline('eae', pipeline=pipeline, backlog=True) + _phylotyper_pipeline('eae', query_file=query_file, pipeline=pipeline, backlog=True) # END Phylotyper pipeline # the base file data for blazegraph From 5750d97cc7872e75b0a4d9d5056ef6443f56797d Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Sat, 10 Mar 2018 19:14:24 -0500 Subject: [PATCH 095/122] DEBUG: just getting N/A in phylotyper return. Wondering if datastruct_savvy() is having problems parsing VF --- app/middleware/graphers/datastruct_savvy.py | 4 +++- app/modules/spfy.py | 7 ++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/app/middleware/graphers/datastruct_savvy.py b/app/middleware/graphers/datastruct_savvy.py index 964a91fe..90454055 100644 --- a/app/middleware/graphers/datastruct_savvy.py +++ b/app/middleware/graphers/datastruct_savvy.py @@ -208,7 +208,9 @@ def generate_datastruct(query_file, id_file, pickled_dictionary): graph = parse_gene_dict(graph, results['Virulence Factors'], uriGenome, 'VirulenceFactor') elif key == 'Antimicrobial Resistance': graph = parse_gene_dict(graph, results['Antimicrobial Resistance'], uriGenome, - 'AntimicrobialResistanceGene') + 'AntimicrobialResistanceGene' + else: + raise Exception("generate_datastruct() failed to find key for query_file: {0}, pickled_dictionary: {1}, with results dictionary: {2}".format(query_file, pickled_dictionary, str(results))) return graph elif isinstance(results, list): graph = model_to_graph(graph, results, uriIsolate) diff --git a/app/modules/spfy.py b/app/modules/spfy.py index edad68a7..26ba5c72 100644 --- a/app/modules/spfy.py +++ b/app/modules/spfy.py @@ -477,7 +477,12 @@ def blob_savvy_enqueue(single_dict, pipeline): # AMR Pipeline if single_dict['options']['amr']: - amr_jobs = _amr_pipeline(query_file=query_file, single_dict=single_dict, pipeline=pipeline, bulk=single_dict['options']['bulk']) + amr_jobs = _amr_pipeline( + query_file=query_file, + single_dict=single_dict, + pipeline=pipeline, + backlog=False, + bulk=single_dict['options']['bulk']) job_amr = amr_jobs['job_amr'] job_amr_dict = amr_jobs['job_amr_dict'] job_amr_datastruct = amr_jobs['job_amr_datastruct'] From db6abd5f394eef0bb8387c810830fb664cd6b137 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Sat, 10 Mar 2018 19:23:04 -0500 Subject: [PATCH 096/122] DEBUG: just getting N/A in phylotyper return. Wondering if datastruct_savvy() is having problems parsing VF --- app/middleware/graphers/datastruct_savvy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/middleware/graphers/datastruct_savvy.py b/app/middleware/graphers/datastruct_savvy.py index 90454055..84bf10cc 100644 --- a/app/middleware/graphers/datastruct_savvy.py +++ b/app/middleware/graphers/datastruct_savvy.py @@ -208,7 +208,7 @@ def generate_datastruct(query_file, id_file, pickled_dictionary): graph = parse_gene_dict(graph, results['Virulence Factors'], uriGenome, 'VirulenceFactor') elif key == 'Antimicrobial Resistance': graph = parse_gene_dict(graph, results['Antimicrobial Resistance'], uriGenome, - 'AntimicrobialResistanceGene' + 'AntimicrobialResistanceGene') else: raise Exception("generate_datastruct() failed to find key for query_file: {0}, pickled_dictionary: {1}, with results dictionary: {2}".format(query_file, pickled_dictionary, str(results))) return graph From 926abc9cd34114dad6f834aa6ed227fdd9ed6c1b Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Sat, 10 Mar 2018 20:39:58 -0500 Subject: [PATCH 097/122] CHANGE: submodule name --- .gitmodules | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitmodules b/.gitmodules index dc342682..05221613 100644 --- a/.gitmodules +++ b/.gitmodules @@ -2,9 +2,9 @@ path = app/modules/ectyper/ecoli_serotyping url = https://github.com/phac-nml/ecoli_serotyping.git branch = superphy -[submodule "reactapp"] +[submodule "grouch"] path = reactapp - url = https://github.com/superphy/reactapp.git + url = https://github.com/superphy/grouch.git [submodule "app/modules/PanPredic"] path = app/modules/PanPredic url = https://github.com/superphy/PanPredic.git From 8ed22bd953fe30d162e22a2adf79aa7d389d1fc6 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Sat, 10 Mar 2018 20:40:20 -0500 Subject: [PATCH 098/122] CHANGE: submodule path --- .gitmodules | 2 +- reactapp => grouch | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename reactapp => grouch (100%) diff --git a/.gitmodules b/.gitmodules index 05221613..0238eaee 100644 --- a/.gitmodules +++ b/.gitmodules @@ -3,7 +3,7 @@ url = https://github.com/phac-nml/ecoli_serotyping.git branch = superphy [submodule "grouch"] - path = reactapp + path = grouch url = https://github.com/superphy/grouch.git [submodule "app/modules/PanPredic"] path = app/modules/PanPredic diff --git a/reactapp b/grouch similarity index 100% rename from reactapp rename to grouch From ec31fd5131840ddfc8044ae7be25967f4d73cda7 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Sat, 10 Mar 2018 20:41:26 -0500 Subject: [PATCH 099/122] FIX: grouch (reactapp) now correct HEAD --- grouch | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/grouch b/grouch index a6b539ac..2088077b 160000 --- a/grouch +++ b/grouch @@ -1 +1 @@ -Subproject commit a6b539ac33f50d6f44f35c4eebb5c53bc5fd495f +Subproject commit 2088077b734f737d8a362c06283dcd87b4218be7 From 38a171a39267c73f0a8fa053b459501a045a6207 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Sat, 10 Mar 2018 20:46:36 -0500 Subject: [PATCH 100/122] UPDATE: names in docker-compose --- Dockerfile-reactapp => Dockerfile-grouch | 2 +- docker-compose.yml | 14 +++++++++++--- 2 files changed, 12 insertions(+), 4 deletions(-) rename Dockerfile-reactapp => Dockerfile-grouch (95%) diff --git a/Dockerfile-reactapp b/Dockerfile-grouch similarity index 95% rename from Dockerfile-reactapp rename to Dockerfile-grouch index 0479fec6..2a4c229c 100644 --- a/Dockerfile-reactapp +++ b/Dockerfile-grouch @@ -7,7 +7,7 @@ ENV YARN_VERSION 0.17.6 RUN mkdir /app # Install app dependencies & build -COPY ./reactapp /app +COPY ./grouch /app WORKDIR /app # part of a bug fix; see https://github.com/sass/node-sass/issues/1579 RUN yarn add node-sass diff --git a/docker-compose.yml b/docker-compose.yml index ca0452b7..75f1f0a7 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,5 +1,6 @@ version: '2' services: + # The main controlling webserver. webserver: build: context: . @@ -14,11 +15,12 @@ services: # networks: # - dockernet - reactapp: + # The ReactJS app for the frontend. + grouch: build: context: . - dockerfile: Dockerfile-reactapp - image: reactapp + dockerfile: Dockerfile-grouch + image: grouch ports: - "8090:5000" depends_on: @@ -26,6 +28,7 @@ services: # networks: # - dockernet + # The main set of RQ workers. worker: build: context: . @@ -40,6 +43,7 @@ services: # networks: # - dockernet + # Reserved RQ worker for creating & syncing Spfy IDs. worker-blazegraph-ids: build: context: . @@ -52,6 +56,7 @@ services: # networks: # - dockernet + # Extra priority workers serving the frontend. worker-priority: build: context: . @@ -62,11 +67,13 @@ services: depends_on: - webserver + # Redis DB for the RQ workers + some frontend tasks. redis: image: redis:3.2 # networks: # - dockernet + # Blazegraph DB for LTS. blazegraph: image: superphy/blazegraph:2.1.4-inferencing ports: @@ -74,6 +81,7 @@ services: volumes: - /var/lib/jetty/ + # MongoDB for token based accounts. mongodb: image: mongo:3.6.1-jessie environment: From 196a70b166ddae754fa9bb4b0bc3a92615bdfea8 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Sat, 10 Mar 2018 21:29:24 -0500 Subject: [PATCH 101/122] DEUBG: some more checks to see why phylotyper doesnt see VFs --- app/middleware/graphers/datastruct_savvy.py | 2 ++ app/modules/ectyper/call_ectyper.py | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/app/middleware/graphers/datastruct_savvy.py b/app/middleware/graphers/datastruct_savvy.py index 84bf10cc..a0c793ab 100644 --- a/app/middleware/graphers/datastruct_savvy.py +++ b/app/middleware/graphers/datastruct_savvy.py @@ -198,6 +198,8 @@ def generate_datastruct(query_file, id_file, pickled_dictionary): # Unpickle. results = unpickle(pickled_dictionary) + # Ensure this isn't empty. + assert results # Check if we have a model or a dictionary. if isinstance(results, dict): # graphing functions diff --git a/app/modules/ectyper/call_ectyper.py b/app/modules/ectyper/call_ectyper.py index 416e882c..f25bdeb0 100644 --- a/app/modules/ectyper/call_ectyper.py +++ b/app/modules/ectyper/call_ectyper.py @@ -58,11 +58,11 @@ def call_ectyper_vf(args_dict): # TODO: edit ectyper so we're not using this ducktape approach # we are calling tools_controller on only one file, so grab that dict key, ectyper_dict = ectyper_dict.popitem() - + assert isinstance(ectyper_dict, dict) # TODO: convert this to a VF model. # Path for the pickle dump. p = filepath + '_ectyper_vf.p' - pickle.dump(ectyper_dict,open(p,'wb')) + dump(ectyper_dict, p) return p From 581820656e1c3677d651a428f6cc6f93cc092998 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Sat, 10 Mar 2018 21:32:07 -0500 Subject: [PATCH 102/122] DEBUG: have phylotyper throw and exception if it cant retrieve anything from the db --- app/modules/phylotyper/phylotyper.py | 1 + 1 file changed, 1 insertion(+) diff --git a/app/modules/phylotyper/phylotyper.py b/app/modules/phylotyper/phylotyper.py index 3c04cde8..9522040c 100644 --- a/app/modules/phylotyper/phylotyper.py +++ b/app/modules/phylotyper/phylotyper.py @@ -314,6 +314,7 @@ def ignorant(genome_uri, subtype, pickle_file): pt_dict['stop'][k].append(row['endPos']) if not results: + raise Exception("ignorant() could not find phylotyper results for genome_uri: {0}, subtype: {1}, with pickle_file: {2}".format(genome_uri, subtype, pickle_file)) pt_dict = { 'subtype': 'No loci' } From f4e2b3969457bf9fd8f68cf743ddec0f3dbedacc Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Sat, 10 Mar 2018 21:59:06 -0500 Subject: [PATCH 103/122] DEBUG: looks like were not getting that far into it, have phylotyper raise exception if we cant read any output from phylotyper direct --- app/modules/phylotyper/phylotyper.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/app/modules/phylotyper/phylotyper.py b/app/modules/phylotyper/phylotyper.py index 9522040c..85718b23 100644 --- a/app/modules/phylotyper/phylotyper.py +++ b/app/modules/phylotyper/phylotyper.py @@ -110,6 +110,12 @@ def to_dict(pt_file, subtype, pickle_file): pt_results = pd.read_table(pt_file) if pt_results['phylotyper_assignment'].empty or pt_results['phylotyper_assignment'].values[0] == 'Subtype loci not found in genome': + raise Exception("phylotyper.to_dict() couldnt find loci for file: {0}, subtype: {1}, pickle_file, {2}, with dataframe {3}".format( + pt_file, + subtype, + pickle_file, + str(pt_results) + )) pt_results = { 'subtype': 'No loci', } From 943b36bbe78ff93e41551a4f71b0afe2bb89ceda Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Sat, 10 Mar 2018 22:25:16 -0500 Subject: [PATCH 104/122] DEUBG: phylotyper.to_dict() getting an empty df, is markerseqs = MarkerSequences(loci) blank? --- app/modules/phylotyper/phylotyper.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/app/modules/phylotyper/phylotyper.py b/app/modules/phylotyper/phylotyper.py index 85718b23..207a7cf2 100644 --- a/app/modules/phylotyper/phylotyper.py +++ b/app/modules/phylotyper/phylotyper.py @@ -88,6 +88,11 @@ def phylotyper(uriIsolate, subtype, result_file, id_file=None): else: # No loci + raise Exception('phylotyper.phylotyper() could not retrieve reference sequences for loci: {0}, uriIsolate: {1}, subtype: {2}'.format( + str(loci), + str(uriIsolate), + subtype + )) # Report no loci status in output with open(output_file, 'w') as fh: fh.write('\t'.join(['genome','tree_label','subtype','probability','phylotyper_assignment','loci'])) From 2b619879371d4684757c827be67dc2daff948ffc Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Sat, 10 Mar 2018 22:51:28 -0500 Subject: [PATCH 105/122] CHANGE: have eae alleles all be called eae --- app/middleware/graphers/datastruct_savvy.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/app/middleware/graphers/datastruct_savvy.py b/app/middleware/graphers/datastruct_savvy.py index a0c793ab..7f251f73 100644 --- a/app/middleware/graphers/datastruct_savvy.py +++ b/app/middleware/graphers/datastruct_savvy.py @@ -102,6 +102,10 @@ def parse_gene_dict(graph, gene_dict, uriGenome, geneType): # some gene names, esp those which are effectively a description, # have spaces gene_name = gene_record['GENE_NAME'].replace(' ', '_') + # Workaround to assume all eae alleles are just eae. + # See https://github.com/superphy/spfy/pull/274 + if gene_name.startswith('eae'): + gene_name = 'eae' uriGene = gu(':' + gene_name) # define the object type of the gene graph.add((uriGene, gu('rdf:type'), gu(':' + geneType))) From d0dc50c5ed3d48fd13f00c6b74b76c3f4b71cdb0 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Sat, 10 Mar 2018 23:20:38 -0500 Subject: [PATCH 106/122] UPDATE: pull the optimization branch for blazegraph so were uptodate with production --- app/modules/spfy.py | 2 +- docker-compose.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/app/modules/spfy.py b/app/modules/spfy.py index 26ba5c72..53d0846b 100644 --- a/app/modules/spfy.py +++ b/app/modules/spfy.py @@ -94,7 +94,7 @@ def _ectyper_pipeline_vf(query_file, single_dict, pipeline=None, backlog=False): else: ttl_value = config.DEFAULT_RESULT_TTL - # datastruct_savvy() stores result to Blazegraph. + # datastruct_savvy() graphs and uploads result to Blazegraph. job_ectyper_datastruct_vf = multiples.enqueue( datastruct_savvy, query_file, diff --git a/docker-compose.yml b/docker-compose.yml index 75f1f0a7..7b312419 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -75,7 +75,7 @@ services: # Blazegraph DB for LTS. blazegraph: - image: superphy/blazegraph:2.1.4-inferencing + image: superphy/blazegraph:2.1.4-optimization ports: - "8080:8080" volumes: From 2dcc35a481cb7062708c8b494b1c2e8368a3d73d Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Sun, 11 Mar 2018 00:22:33 -0500 Subject: [PATCH 107/122] DEBUG: some checks on phylotypers job deps --- app/modules/spfy.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/app/modules/spfy.py b/app/modules/spfy.py index 53d0846b..4210386d 100644 --- a/app/modules/spfy.py +++ b/app/modules/spfy.py @@ -298,6 +298,10 @@ def _amr_pipeline(query_file, single_dict, pipeline=None, backlog=False, bulk=Fa def _phylotyper_pipeline(subtype, query_file, pipeline=None, backlog=False): # Alias. job_id = pipeline.jobs['job_id'].rq_job + job_ectyper_datastruct_vf = pipeline.jobs['job_ectyper_datastruct_vf'].rq_job + assert job_id + assert job_ectyper_datastruct_vf + # Alias queues. if not backlog: multiples = multiples_q else: @@ -313,7 +317,7 @@ def _phylotyper_pipeline(subtype, query_file, pipeline=None, backlog=False): subtype, tsvfile, id_file=query_file + '_id.txt', - depends_on=pipeline.jobs['job_ectyper_datastruct_vf'].rq_job) + depends_on=job_ectyper_datastruct_vf) pipeline.jobs.update({ 'job'+jobname: Job( rq_job=job_pt, From 4d1cf017e282c992110bc983f57a90fbf1bce865 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Sun, 11 Mar 2018 01:36:36 -0500 Subject: [PATCH 108/122] UPDATE: submodule blazegraph into spfy (no longer pulling from Docker Hub) --- .gitmodules | 3 +++ docker-blazegraph | 1 + docker-compose.yml | 5 ++++- 3 files changed, 8 insertions(+), 1 deletion(-) create mode 160000 docker-blazegraph diff --git a/.gitmodules b/.gitmodules index 0238eaee..d23df7f5 100644 --- a/.gitmodules +++ b/.gitmodules @@ -8,3 +8,6 @@ [submodule "app/modules/PanPredic"] path = app/modules/PanPredic url = https://github.com/superphy/PanPredic.git +[submodule "docker-blazegraph"] + path = docker-blazegraph + url = https://github.com/superphy/docker-blazegraph.git diff --git a/docker-blazegraph b/docker-blazegraph new file mode 160000 index 00000000..51553836 --- /dev/null +++ b/docker-blazegraph @@ -0,0 +1 @@ +Subproject commit 515538362ddec870f425482958f0e773ef6f1953 diff --git a/docker-compose.yml b/docker-compose.yml index 7b312419..a8f0275f 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -75,7 +75,10 @@ services: # Blazegraph DB for LTS. blazegraph: - image: superphy/blazegraph:2.1.4-optimization + build: + context: ./docker-blazegraph/2.1.4-inferencing + dockerfile: ./docker-blazegraph/2.1.4-inferencing/Dockerfile + image: blazegraph ports: - "8080:8080" volumes: From 24749f0472f92045e883111e3dbea89a3ceeb68e Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Sun, 11 Mar 2018 01:42:09 -0500 Subject: [PATCH 109/122] FIX: ref Dockerfile for blazegraph --- docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-compose.yml b/docker-compose.yml index a8f0275f..5471b67f 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -77,7 +77,7 @@ services: blazegraph: build: context: ./docker-blazegraph/2.1.4-inferencing - dockerfile: ./docker-blazegraph/2.1.4-inferencing/Dockerfile + dockerfile: Dockerfile # inherits the context from above. image: blazegraph ports: - "8080:8080" From 29ebd25ae9914e4b4975da12c90b4b41068546b8 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Sun, 11 Mar 2018 03:31:52 -0400 Subject: [PATCH 110/122] FIX: l0pht never warned me about the craziness of jvm args --- docker-blazegraph | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-blazegraph b/docker-blazegraph index 51553836..0612a9be 160000 --- a/docker-blazegraph +++ b/docker-blazegraph @@ -1 +1 @@ -Subproject commit 515538362ddec870f425482958f0e773ef6f1953 +Subproject commit 0612a9beea4699eff8fd55820799c312747a0c76 From 87f8192c7a6e8531c48fece5ae77282ecd589c83 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Sun, 11 Mar 2018 18:52:39 -0400 Subject: [PATCH 111/122] DEBUG: check that phylotyper is completing correctly --- app/modules/phylotyper/phylotyper.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/app/modules/phylotyper/phylotyper.py b/app/modules/phylotyper/phylotyper.py index 207a7cf2..e99d835d 100644 --- a/app/modules/phylotyper/phylotyper.py +++ b/app/modules/phylotyper/phylotyper.py @@ -30,7 +30,12 @@ logger = logging.getLogger(__name__) - +def _check_tsv(pt_file): + pt_results = pd.read_table(pt_file) + try: + assert pt_results + except: + raise Exception('_check_tsv() failed (df is empty) for pt_file: ' + pt_file) def phylotyper(uriIsolate, subtype, result_file, id_file=None): """ Wrapper for Phylotyper @@ -71,6 +76,7 @@ def phylotyper(uriIsolate, subtype, result_file, id_file=None): # Get alleles for this genome markerseqs = MarkerSequences(loci) fasta = markerseqs.fasta(uriIsolate) + # fasta = temp_dir = mkdtemp(prefix='pt'+subtype, dir=config.DATASTORE) query_file = os.path.join(temp_dir, 'query.fasta') @@ -81,7 +87,7 @@ def phylotyper(uriIsolate, subtype, result_file, id_file=None): with open(query_file, 'w') as fh: fh.write(fasta) - subprocess.call(['phylotyper', 'genome', '--noplots', + subprocess.check_call(['phylotyper', 'genome', '--noplots', subtype, temp_dir, query_file]) @@ -101,6 +107,8 @@ def phylotyper(uriIsolate, subtype, result_file, id_file=None): shutil.move(output_file, result_file) shutil.rmtree(temp_dir) + _check_tsv(result_file) + return result_file From f018d21f4d8512586fc21f72f1b377b5ea29c5e5 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Sun, 11 Mar 2018 23:52:41 -0400 Subject: [PATCH 112/122] CHANGE: pin the RQ worker to the docker-flask-conda image we have up on corefacility --- Dockerfile-rq | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile-rq b/Dockerfile-rq index 1a4b89b9..15471e61 100644 --- a/Dockerfile-rq +++ b/Dockerfile-rq @@ -1,6 +1,6 @@ #this is for RQ for service worker -FROM superphy/docker-flask-conda:latest +FROM superphy/docker-flask-conda:master-6.1.0 COPY ./app /app From 747b849dcc804f3c375582edd5ad1ed28513f2d9 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Mon, 12 Mar 2018 02:16:41 -0400 Subject: [PATCH 113/122] ADD: workaround to run phylotyper.phylotyper() in its own set of workers. This might be a better approach for the future as it lets us define deps directly through Dockerfiles instead of with a unified conda env. --- Dockerfile-rq | 2 +- Dockerfile-rq-phylotyper | 21 +++++++++++++++++++ app/modules/spfy.py | 4 +++- app/supervisord-rq-phylotyper.conf | 33 ++++++++++++++++++++++++++++++ docker-compose.yml | 15 ++++++++++++++ 5 files changed, 73 insertions(+), 2 deletions(-) create mode 100644 Dockerfile-rq-phylotyper create mode 100644 app/supervisord-rq-phylotyper.conf diff --git a/Dockerfile-rq b/Dockerfile-rq index 15471e61..1a4b89b9 100644 --- a/Dockerfile-rq +++ b/Dockerfile-rq @@ -1,6 +1,6 @@ #this is for RQ for service worker -FROM superphy/docker-flask-conda:master-6.1.0 +FROM superphy/docker-flask-conda:latest COPY ./app /app diff --git a/Dockerfile-rq-phylotyper b/Dockerfile-rq-phylotyper new file mode 100644 index 00000000..4b9dc2dd --- /dev/null +++ b/Dockerfile-rq-phylotyper @@ -0,0 +1,21 @@ +#this is for RQ for service worker + +FROM superphy/docker-flask-conda:master-6.1.0 + +COPY ./app /app + +COPY /app/supervisord-rq-phylotyper.conf /etc/supervisor/conf.d/supervisord.conf + +RUN echo 'export PATH=/opt/conda/bin:$PATH' > /etc/profile.d/conda.sh + +ENV PATH /opt/conda/bin:$PATH + +ENV PATH /opt/conda/envs/backend/bin:$PATH + +RUN cat /etc/supervisor/conf.d/supervisord.conf +RUN which python +RUN which conda +RUN which uwsgi +RUN which rq + +CMD ["/usr/bin/supervisord"] diff --git a/app/modules/spfy.py b/app/modules/spfy.py index 4210386d..aea71fb4 100644 --- a/app/modules/spfy.py +++ b/app/modules/spfy.py @@ -44,6 +44,8 @@ singles_q = Queue('singles', connection=redis_conn) multiples_q = Queue('multiples', connection=redis_conn, default_timeout=config.DEFAULT_TIMEOUT) +phylotyper_q = Queue('phylotyper', connection=redis_conn, + default_timeout=config.DEFAULT_TIMEOUT) blazegraph_q = Queue('blazegraph', connection=redis_conn) if config.BACKLOG_ENABLED: # backlog queues @@ -311,7 +313,7 @@ def _phylotyper_pipeline(subtype, query_file, pipeline=None, backlog=False): tsvfile = query_file + jobname + '.tsv' picklefile = query_file + jobname + '.p' - job_pt = multiples.enqueue( + job_pt = phylotyper_q.enqueue( phylotyper.phylotyper, None, subtype, diff --git a/app/supervisord-rq-phylotyper.conf b/app/supervisord-rq-phylotyper.conf new file mode 100644 index 00000000..5b0d1cd7 --- /dev/null +++ b/app/supervisord-rq-phylotyper.conf @@ -0,0 +1,33 @@ +[supervisord] +nodaemon=true + +[program:rqworkermultiples] +; Point the command to the specific rq command you want to run. +; If you use virtualenv, be sure to point it to +; /path/to/virtualenv/bin/rq +; Also, you probably want to include a config module to configure this +; worker. For more info on that, see http://python-rq.org/docs/workers/ +environment=PATH='%(ENV_PATH)s:/opt/conda/envs/backend/bin' +command=/opt/conda/envs/backend/bin/rq worker -c config phylotyper +process_name=%(program_name)s-%(process_num)s + +; If you want to run more than one worker instance, increase this +numprocs=2 + +; This is the directory from which RQ is ran. Be sure to point this to the +; directory where your source code is importable from +directory=/app + +; RQ requires the TERM signal to perform a warm shutdown. If RQ does not die +; within 10 seconds, supervisor will forcefully kill it +stopsignal=TERM + +; These are up to you +autostart=true +autorestart=true + +; redirect stdout and stderr for docker logs +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/stderr +stderr_logfile_maxbytes=0 diff --git a/docker-compose.yml b/docker-compose.yml index 5471b67f..b14ccb6e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -43,6 +43,21 @@ services: # networks: # - dockernet + # The small set of workers specific to phylotyper's env. + worker-phylotyper: + build: + context: . + dockerfile: Dockerfile-rq-phylotyper + image: backend-rq-phylotyper + ports: + - "9181:9181" #this is for debugging, drop a shell and run rq-dashboard if you need to see jobs + volumes_from: + - webserver + depends_on: + - webserver + # networks: + # - dockernet + # Reserved RQ worker for creating & syncing Spfy IDs. worker-blazegraph-ids: build: From 8de636e64658a99e5b93f1bf59214523593c7dcd Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Mon, 12 Mar 2018 02:27:18 -0400 Subject: [PATCH 114/122] FIX: no ports for phylo --- docker-compose.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index b14ccb6e..6e5f9f9d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -49,8 +49,6 @@ services: context: . dockerfile: Dockerfile-rq-phylotyper image: backend-rq-phylotyper - ports: - - "9181:9181" #this is for debugging, drop a shell and run rq-dashboard if you need to see jobs volumes_from: - webserver depends_on: From e4df53ba6350495efbc54a1912c0ba706de043d7 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Mon, 12 Mar 2018 02:32:52 -0400 Subject: [PATCH 115/122] CHANGE: move the model import directly into the beautify func --- app/modules/phylotyper/phylotyper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/modules/phylotyper/phylotyper.py b/app/modules/phylotyper/phylotyper.py index e99d835d..aa6066df 100644 --- a/app/modules/phylotyper/phylotyper.py +++ b/app/modules/phylotyper/phylotyper.py @@ -24,7 +24,6 @@ import config from middleware.graphers.turtle_utils import generate_uri as gu, fulluri_to_basename as u2b, normalize_rdfterm as normalize from middleware.blazegraph.upload_graph import upload_graph -from middleware.modellers import model_phylotyper from modules.phylotyper import ontology, exceptions from modules.phylotyper.sequences import MarkerSequences, phylotyper_query, genename_query @@ -175,6 +174,7 @@ def beautify(p_file, genome): """ + from middleware.modellers import model_phylotyper # See https://github.com/superphy/spfy/issues/271 pt_dict = pickle.load(open(p_file, 'rb')) From 518d7b993428350b06304c087747d80d87a68504 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Mon, 12 Mar 2018 02:57:22 -0400 Subject: [PATCH 116/122] FIX: phylotyper is working!!!! also fixed our _check_tsv() --- app/modules/phylotyper/phylotyper.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/app/modules/phylotyper/phylotyper.py b/app/modules/phylotyper/phylotyper.py index aa6066df..73c66625 100644 --- a/app/modules/phylotyper/phylotyper.py +++ b/app/modules/phylotyper/phylotyper.py @@ -31,10 +31,8 @@ def _check_tsv(pt_file): pt_results = pd.read_table(pt_file) - try: - assert pt_results - except: - raise Exception('_check_tsv() failed (df is empty) for pt_file: ' + pt_file) + if pt_results.empty: + raise Exception('_check_tsv() failed as pt_results.empty == true for pt_file: {0} with df content: {1}'.format(pt_file, str(pt_results))) def phylotyper(uriIsolate, subtype, result_file, id_file=None): """ Wrapper for Phylotyper From a8d0b67796df7b02c6a0f113b8aa7d9726bbe622 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Mon, 12 Mar 2018 03:06:57 -0400 Subject: [PATCH 117/122] FIX: typo in new modellar for phylotyper --- app/middleware/modellers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/middleware/modellers.py b/app/middleware/modellers.py index 8981b4ce..3bb35184 100644 --- a/app/middleware/modellers.py +++ b/app/middleware/modellers.py @@ -69,7 +69,7 @@ def model_phylotyper(lst): """ phylotyper_list = [ { - 'analysis:':d['subtype_gene'], + 'analysis':d['subtype_gene'], 'contigid':d['contig'], 'filename':d['genome'], 'hitcutoff':'n/a', @@ -80,5 +80,5 @@ def model_phylotyper(lst): 'probability':d['probability'] } for d in lst] - + return phylotyper_list From 77f6e77c8b0e2a86e672a304e09e393e259f934c Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Mon, 12 Mar 2018 03:08:57 -0400 Subject: [PATCH 118/122] CHANGE: reallow no loci found, but maintain checks on phylotyper call --- app/modules/phylotyper/phylotyper.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/app/modules/phylotyper/phylotyper.py b/app/modules/phylotyper/phylotyper.py index 73c66625..c7df6241 100644 --- a/app/modules/phylotyper/phylotyper.py +++ b/app/modules/phylotyper/phylotyper.py @@ -120,12 +120,12 @@ def to_dict(pt_file, subtype, pickle_file): pt_results = pd.read_table(pt_file) if pt_results['phylotyper_assignment'].empty or pt_results['phylotyper_assignment'].values[0] == 'Subtype loci not found in genome': - raise Exception("phylotyper.to_dict() couldnt find loci for file: {0}, subtype: {1}, pickle_file, {2}, with dataframe {3}".format( - pt_file, - subtype, - pickle_file, - str(pt_results) - )) + # raise Exception("phylotyper.to_dict() couldnt find loci for file: {0}, subtype: {1}, pickle_file, {2}, with dataframe {3}".format( + # pt_file, + # subtype, + # pickle_file, + # str(pt_results) + # )) pt_results = { 'subtype': 'No loci', } @@ -331,7 +331,7 @@ def ignorant(genome_uri, subtype, pickle_file): pt_dict['stop'][k].append(row['endPos']) if not results: - raise Exception("ignorant() could not find phylotyper results for genome_uri: {0}, subtype: {1}, with pickle_file: {2}".format(genome_uri, subtype, pickle_file)) + # raise Exception("ignorant() could not find phylotyper results for genome_uri: {0}, subtype: {1}, with pickle_file: {2}".format(genome_uri, subtype, pickle_file)) pt_dict = { 'subtype': 'No loci' } From 7fa4743da0c80521ad7a4653adcf0918f4c86396 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Mon, 12 Mar 2018 03:32:04 -0400 Subject: [PATCH 119/122] FIX: hide VF results if not chosen --- app/modules/spfy.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/app/modules/spfy.py b/app/modules/spfy.py index aea71fb4..beaa8f51 100644 --- a/app/modules/spfy.py +++ b/app/modules/spfy.py @@ -53,7 +53,7 @@ backlog_multiples_q = Queue( 'backlog_multiples', connection=redis_conn, default_timeout=config.DEFAULT_TIMEOUT) -def _ectyper_pipeline_vf(query_file, single_dict, pipeline=None, backlog=False): +def _ectyper_pipeline_vf(query_file, single_dict, display_vf=True, pipeline=None, backlog=False): """ Enqueue all the jobs required for VF. """ @@ -132,7 +132,7 @@ def _ectyper_pipeline_vf(query_file, single_dict, pipeline=None, backlog=False): name='job_ectyper_beautify_vf', transitory=False, backlog=backlog, - display=True + display=display_vf ) }) return d @@ -416,12 +416,24 @@ def blob_savvy_enqueue(single_dict, pipeline): ) }) + # A check to allow hiding of VF results if only Phylotyper chosen. + if single_dict['options']['stx1'] or single_dict['options']['stx2'] or single_dict['options']['eae']: + chose_phylotyper = True + else: + chose_phylotyper = False + # Didn't choose VF, but chose phylotyper. + if not single_dict['options']['vf'] and chose_phylotyper: + # Don't display VF. + display_vf = False + else: + display_vf = True ## ECTyper (VF & Serotype) # VF if single_dict['options']['vf']: ectyper_vf_jobs = _ectyper_pipeline_vf( query_file, single_dict, + display_vf=display_vf, pipeline=pipeline ) # pipeline.jobs.update(ectyper_vf_jobs) From ecc84be10a6d30fc4a34d2c98e821fb40a148395 Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Mon, 12 Mar 2018 03:34:48 -0400 Subject: [PATCH 120/122] CHANGE: also no longer "require" user to select VF in display (will hide) --- app/modules/spfy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/modules/spfy.py b/app/modules/spfy.py index beaa8f51..a18f01a9 100644 --- a/app/modules/spfy.py +++ b/app/modules/spfy.py @@ -429,7 +429,7 @@ def blob_savvy_enqueue(single_dict, pipeline): display_vf = True ## ECTyper (VF & Serotype) # VF - if single_dict['options']['vf']: + if single_dict['options']['vf'] or chose_phylotyper: ectyper_vf_jobs = _ectyper_pipeline_vf( query_file, single_dict, From b7e672318cbaefe0e188e9d56a275c412cecdbaf Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Mon, 12 Mar 2018 03:49:51 -0400 Subject: [PATCH 121/122] FIX: rewrite the option for vf as well for phylo --- app/modules/spfy.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/app/modules/spfy.py b/app/modules/spfy.py index a18f01a9..262fae56 100644 --- a/app/modules/spfy.py +++ b/app/modules/spfy.py @@ -72,6 +72,8 @@ def _ectyper_pipeline_vf(query_file, single_dict, display_vf=True, pipeline=None # This copy is passed to the old ECTyper. single_dict_vf = copy.deepcopy(single_dict) single_dict_vf['options']['serotype'] = False + # Rewrite the VF option too, case called this for Phylotyper. + single_dict_vf['options']['vf'] = True # Enqueue the old ECTyper job_ectyper_vf = singles.enqueue( call_ectyper_vf, From 11a2e3bc811672d2738eb379fc60efe715831dfb Mon Sep 17 00:00:00 2001 From: Kevin Le Date: Mon, 12 Mar 2018 04:07:47 -0400 Subject: [PATCH 122/122] UPDATE: reactapp 6.2.0 --- grouch | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/grouch b/grouch index 2088077b..ebb121e7 160000 --- a/grouch +++ b/grouch @@ -1 +1 @@ -Subproject commit 2088077b734f737d8a362c06283dcd87b4218be7 +Subproject commit ebb121e7b2befd7df3c2733c22e06de69d8de1cf