From 12b197e4021935261f720304c1c5712c751b11ed Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Mon, 12 Feb 2018 14:36:23 -0500
Subject: [PATCH 001/122] START: added test for new ectyper call

---
 app/tests/test_modules.py | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/app/tests/test_modules.py b/app/tests/test_modules.py
index bb92ecac..63466e49 100644
--- a/app/tests/test_modules.py
+++ b/app/tests/test_modules.py
@@ -3,6 +3,7 @@
 
 import pytest
 import os
+import subprocess
 import cPickle as pickle
 
 from modules.qc.qc import qc, check_header_parsing, check_ecoli
@@ -57,7 +58,10 @@ def test_qc():
     for non_ecoli_genome in GENOMES_LIST_NOT_ECOLI:
         assert qc(non_ecoli_genome) == False
 
-def test_ectyper():
+def test_ectyper_vf():
+    """Check the ECTyper from `superphy` which is used for virulance factor
+    identification. Installed as a submodule in the `modules` directory.
+    """
     for ecoli_genome in GENOMES_LIST_ECOLI:
         # basic ECTyper check
         single_dict = dict(ARGS_DICT)
@@ -70,6 +74,14 @@ def test_ectyper():
         json_return = beautify(single_dict, pickled_ectyper_dict)
         assert type(json_return) == list
 
+def test_ectyper_serotype():
+    """Check the ECTyper from `master` which only performs serotyping.
+    Installed in the conda environment.
+    """
+    for ecoli_genome in GENOMES_LIST_ECOLI:
+        ret_code = subprocess.call(['ectyper', '-i', ecoli_genome])
+        assert ret_code == 0
+
 def test_amr():
         ecoli_genome = GENOMES_LIST_ECOLI[0]
         # this generates the .tsv

From 4635e6e9c1d798bfc13ded12208ec6f58274439d Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Mon, 12 Feb 2018 22:46:38 -0500
Subject: [PATCH 002/122] ADD: new calls + tests for them

---
 app/modules/ectyper/call_ectyper.py | 64 ++++++++++++++++++++++-------
 app/modules/spfy.py                 | 52 ++++++++++++++++++-----
 app/tests/test_modules.py           | 13 +++++-
 3 files changed, 101 insertions(+), 28 deletions(-)

diff --git a/app/modules/ectyper/call_ectyper.py b/app/modules/ectyper/call_ectyper.py
index 91f11f23..73273e2c 100644
--- a/app/modules/ectyper/call_ectyper.py
+++ b/app/modules/ectyper/call_ectyper.py
@@ -4,6 +4,7 @@
 import subprocess
 import cPickle as pickle
 import tempfile
+import pandas as pd
 from ast import literal_eval
 from os.path import basename
 from modules.loggingFunctions import initialize_logging
@@ -11,23 +12,21 @@
 log_file = initialize_logging()
 log = logging.getLogger(__name__)
 
-def call_ectyper(args_dict):
-    # i don't intend to import anything from ECTyper (there are a lot of
-    # imports in it - not sure if we'll use them all)
-    # concurrency is handled at the batch level, not here (note: this might change)
-    # we only use ectyper for serotyping and vf, amr is handled by rgi directly
-
+def call_ectyper_vf(args_dict):
+    """ Use the old version of ECTyper at `superphy` for VF.
+    """
+    # Init return.
     p = 'no pickle'
-    
-    if args_dict['options']['serotype'] or args_dict['options']['vf']:
 
-        #hack to allow ectyper to run in docker
+    if args_dict['options']['vf']:
+        # Workaround to allow ECTYPER to run in Docker.
         filepath=(args_dict['i'])
         wrapper_dir = os.path.dirname(os.path.abspath(__file__))
-        # this temp file path is req for ectyper
+        # This temp file path is required for ectyper.
         temp = tempfile.NamedTemporaryFile()
+        # Copy the actual genome file into the tempfile.
         shutil.copyfile(args_dict['i'], temp.name)
-        # create a copy of args_dict so we don't modify it from calling functions
+        # Create a copy of args_dict and update with the tempfile.
         args_dict = dict(args_dict)
         args_dict['i']= temp.name
         log.debug(temp.name)
@@ -42,14 +41,15 @@ def call_ectyper(args_dict):
                                                     int(args_dict['options']['vf'])),
                                                 '-pi', str(args_dict['pi'])
                                                 ])
-        # removing that temp file we created
+        # Removing that temp file we created.
         temp.close()
 
-        # because we are using check_output, this catches any print messages from tools_controller
+        # Because we are using check_output, this catches any print messages
+        # from tools_controller.
         # TODO: switch to pipes
         if 'error' in ectyper_dict.lower():
             log.fatal('ECTper failed for' + args_dict['i'])
-            raise Exception('ECTper failed for' + filepath)
+            raise Exception('ECTyper VF failed for' + filepath)
 
         ectyper_dict = literal_eval(ectyper_dict)
 
@@ -57,7 +57,41 @@ def call_ectyper(args_dict):
         # we are calling tools_controller on only one file, so grab that dict
         key, ectyper_dict = ectyper_dict.popitem()
 
-        p = os.path.join(filepath + '_ectyper.p')
+        p = os.path.join(filepath + '_ectyper_vf.p')
         pickle.dump(ectyper_dict,open(p,'wb'))
 
     return p
+
+def call_ectyper_serotype(args_dict):
+    """Use the new version of ECTyper at `master` for serotyping.
+    """
+    genome_file = args_dict['i']
+    pi = args_dict['options']['pi']
+    pl = '50' # This is the default in ECTyper.
+    output_dir = tempfile.mkdtemp()
+    ret_code = subprocess.call([
+        "ectyper",
+        "-i",
+        genome_file,
+        "-pi",
+        pi,
+        "-pl",
+        pl,
+        "-o",
+        output_dir
+    ])
+    if ret_code == 0:
+        output_file = os.path.join(output_dir, 'output.csv')
+        df = pd.read_csv(output_file)
+        # Add the PI to our DataFrame.
+        df['pi'] = pi
+        # Add the PL to our DataFrame.
+        df['pl'] = pl
+        # The final result file from ECTyper serotyping. This copies it back to
+        # config.DATASTORE
+        p = os.path.join(genome_file + '_ectyper_serotype.p')
+        with open(p, 'w') as fh:
+            df.to_csv(fh, header=True, index_label='genome')
+        return p
+    else:
+        raise Exception('ECTyper Serotyping failed for' + genome_file)
diff --git a/app/modules/spfy.py b/app/modules/spfy.py
index 7d4754e3..3cc679c4 100644
--- a/app/modules/spfy.py
+++ b/app/modules/spfy.py
@@ -72,18 +72,48 @@ def blob_savvy_enqueue(single_dict):
 
     # ECTYPER PIPELINE
     def ectyper_pipeline(singles, multiples):
-        # the ectyper call is special in that it requires the entire arguments
-        # to decide whether to carry the serotype option flag, virulance
-        # factors option flag, and percent identity field
-        job_ectyper = singles.enqueue(
-            call_ectyper, single_dict, depends_on=job_id)
-        # after this call, the result is stored in Blazegraph
-        # new to 4.3.3
-        # if bulk uploading is set, we return the datastruct as the end task
-        # to poll for job completion, therefore must set ttl of -1
+        """The ectyper call is special in that it requires the entire arguments
+        to decide whether to carry the serotype option flag, virulance
+        factors option flag, and percent identity field. We use the old ECTyper
+        for VF and the new ECTyper for Serotyping.
+        """
+        if single_dict['options']['vf']:
+            # Create a copy of the arguments dictionary and disable Serotype.
+            # This copy is passed to the old ECTyper.
+            single_dict_vf = dict(single_dict)
+            single_dict_vf['options']['serotype'] = False
+            # Enqueue the old ECTyper
+            job_ectyper_vf = singles.enqueue(
+                call_ectyper_vf,
+                single_dict_vf,
+                depends_on=job_id)
+        if single_dict['options']['serotype']:
+            # Enqueue the new ECTyper
+            job_ectyper_serotype = multiples.enqueue(
+                call_ectyper_serotype,
+                single_dict,
+                depends_on=job_id)
+
+        # datastruct_savvy() stores result to Blazegraph.
         if single_dict['options']['bulk']:
-            job_ectyper_datastruct = multiples.enqueue(
-                datastruct_savvy, query_file, query_file + '_id.txt', query_file + '_ectyper.p', depends_on=job_ectyper, result_ttl=-1)
+            # If bulk uploading is set, we return the datastruct as the end task
+            # to poll for job completion, therefore must set ttl of -1.
+            if single_dict['options']['vf']:
+                job_ectyper_datastruct = multiples.enqueue(
+                    datastruct_savvy,
+                    query_file,
+                    query_file + '_id.txt',
+                    query_file + '_ectyper_vf.p',
+                    depends_on=job_ectyper,
+                    result_ttl=-1)
+            if single_dict['options']['serotype']:
+                job_ectyper_datastruct = multiples.enqueue(
+                    datastruct_savvy,
+                    query_file,
+                    query_file + '_id.txt',
+                    query_file + '_ectyper_serotype.p',
+                    depends_on=job_ectyper,
+                    result_ttl=-1)
         else:
             job_ectyper_datastruct = multiples.enqueue(
                 datastruct_savvy, query_file, query_file + '_id.txt', query_file + '_ectyper.p', depends_on=job_ectyper)
diff --git a/app/tests/test_modules.py b/app/tests/test_modules.py
index 63466e49..53da8277 100644
--- a/app/tests/test_modules.py
+++ b/app/tests/test_modules.py
@@ -5,10 +5,11 @@
 import os
 import subprocess
 import cPickle as pickle
+import pandas as pd
 
 from modules.qc.qc import qc, check_header_parsing, check_ecoli
 from modules.blazeUploader.reserve_id import write_reserve_id
-from modules.ectyper.call_ectyper import call_ectyper
+from modules.ectyper.call_ectyper import call_ectyper_vf, call_ectyper_serotype
 from modules.amr.amr import amr
 from modules.amr.amr_to_dict import amr_to_dict
 from modules.beautify.beautify import beautify
@@ -66,7 +67,7 @@ def test_ectyper_vf():
         # basic ECTyper check
         single_dict = dict(ARGS_DICT)
         single_dict.update({'i':ecoli_genome})
-        pickled_ectyper_dict = call_ectyper(single_dict)
+        pickled_ectyper_dict = call_ectyper_vf(single_dict)
         ectyper_dict = pickle.load(open(pickled_ectyper_dict,'rb'))
         assert type(ectyper_dict) == dict
 
@@ -79,9 +80,17 @@ def test_ectyper_serotype():
     Installed in the conda environment.
     """
     for ecoli_genome in GENOMES_LIST_ECOLI:
+        # Check that the conda env can run ectyper.
         ret_code = subprocess.call(['ectyper', '-i', ecoli_genome])
         assert ret_code == 0
 
+        # Check the actual call from Spfy's code.
+        single_dict = dict(ARGS_DICT)
+        single_dict.update({'i':ecoli_genome})
+        pickled_serotype_df = call_ectyper_serotype(single_dict)
+        ectyper_serotype_df = pickle.load(open(pickled_serotype_df,'rb'))
+        assert isinstance(ectyper_serotype_df, pd.DataFrame)
+
 def test_amr():
         ecoli_genome = GENOMES_LIST_ECOLI[0]
         # this generates the .tsv

From 48ca4b94631cc86fb100366f26c02b6057b3eaf8 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Mon, 12 Feb 2018 23:18:29 -0500
Subject: [PATCH 003/122] FIX: imports

---
 app/modules/pan_spfy.py | 1 -
 app/modules/savvy.py    | 2 +-
 app/modules/spfy.py     | 2 +-
 3 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/app/modules/pan_spfy.py b/app/modules/pan_spfy.py
index 7bfc4840..3af6caf0 100644
--- a/app/modules/pan_spfy.py
+++ b/app/modules/pan_spfy.py
@@ -17,7 +17,6 @@
 
 from modules.qc.qc import qc
 from modules.blazeUploader.reserve_id import write_reserve_id
-from modules.ectyper.call_ectyper import call_ectyper
 from modules.amr.amr import amr
 from modules.amr.amr_to_dict import amr_to_dict
 from modules.beautify.beautify import beautify
diff --git a/app/modules/savvy.py b/app/modules/savvy.py
index f5d9cc8a..de958cee 100755
--- a/app/modules/savvy.py
+++ b/app/modules/savvy.py
@@ -17,7 +17,7 @@
 import json
 from modules.qc.qc import qc
 from modules.blazeUploader.reserve_id import write_reserve_id
-from modules.ectyper.call_ectyper import call_ectyper
+from modules.ectyper.call_ectyper import call_ectyper_vf, call_ectyper_serotype
 from modules.amr.amr import amr
 from modules.amr.amr_to_dict import amr_to_dict
 from modules.beautify.beautify import beautify
diff --git a/app/modules/spfy.py b/app/modules/spfy.py
index 3cc679c4..b04355ab 100644
--- a/app/modules/spfy.py
+++ b/app/modules/spfy.py
@@ -17,7 +17,7 @@
 
 from modules.qc.qc import qc
 from modules.blazeUploader.reserve_id import write_reserve_id
-from modules.ectyper.call_ectyper import call_ectyper
+from modules.ectyper.call_ectyper import call_ectyper_vf, call_ectyper_serotype
 from modules.amr.amr import amr
 from modules.amr.amr_to_dict import amr_to_dict
 from modules.beautify.beautify import beautify

From 725071ee13ae02369eeb5652cf380e9258fcda60 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Tue, 13 Feb 2018 12:58:18 -0500
Subject: [PATCH 004/122] FIX: calls

---
 app/modules/ectyper/call_ectyper.py | 2 +-
 app/modules/savvy.py                | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/app/modules/ectyper/call_ectyper.py b/app/modules/ectyper/call_ectyper.py
index 73273e2c..c2a1c07a 100644
--- a/app/modules/ectyper/call_ectyper.py
+++ b/app/modules/ectyper/call_ectyper.py
@@ -66,7 +66,7 @@ def call_ectyper_serotype(args_dict):
     """Use the new version of ECTyper at `master` for serotyping.
     """
     genome_file = args_dict['i']
-    pi = args_dict['options']['pi']
+    pi = args_dict['pi']
     pl = '50' # This is the default in ECTyper.
     output_dir = tempfile.mkdtemp()
     ret_code = subprocess.call([
diff --git a/app/modules/savvy.py b/app/modules/savvy.py
index de958cee..355d9064 100755
--- a/app/modules/savvy.py
+++ b/app/modules/savvy.py
@@ -119,7 +119,7 @@ def write_json(json_r, analysis):
     reservation_ttl = write_graph(reservation_graph, 'reservation')
 
     # (3) ECTyper Step:
-    ectyper_p = call_ectyper(args_dict)
+    ectyper_p = call_ectyper_vf(args_dict) # call_ectyper_vf is the older ver.
     log.debug("Pickled ECTyper File: " + ectyper_p)
 
     # (4) ECTyper Beautify Step:

From d02ef1d01c805f5550bcab0310b2a4f6308444f6 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Tue, 13 Feb 2018 16:06:58 -0500
Subject: [PATCH 005/122] FIX: more calls

---
 app/modules/ectyper/call_ectyper.py | 2 +-
 app/modules/savvy.py                | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/app/modules/ectyper/call_ectyper.py b/app/modules/ectyper/call_ectyper.py
index c2a1c07a..26c9070d 100644
--- a/app/modules/ectyper/call_ectyper.py
+++ b/app/modules/ectyper/call_ectyper.py
@@ -66,7 +66,7 @@ def call_ectyper_serotype(args_dict):
     """Use the new version of ECTyper at `master` for serotyping.
     """
     genome_file = args_dict['i']
-    pi = args_dict['pi']
+    pi = str(args_dict['pi']) # Cast to str to execvp() in subprocess().
     pl = '50' # This is the default in ECTyper.
     output_dir = tempfile.mkdtemp()
     ret_code = subprocess.call([
diff --git a/app/modules/savvy.py b/app/modules/savvy.py
index 355d9064..f286b682 100755
--- a/app/modules/savvy.py
+++ b/app/modules/savvy.py
@@ -128,7 +128,7 @@ def write_json(json_r, analysis):
     ectyper_json = write_json(ectyper_beautify, 'ectyper')
 
     # (5) Graphing ECTyper Result:
-    ectyper_graph = generate_datastruct(query_file, query_file + '_id.txt', query_file + '_ectyper.p')
+    ectyper_graph = generate_datastruct(query_file, query_file + '_id.txt', ectyper_p)
 
     ectyper_ttl = write_graph(ectyper_graph, 'ectyper')
     log.debug('Graph Result for ECtyper: ' + ectyper_ttl)

From b4d7c1253099d84fb852becd024b7f613af0d96d Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Tue, 13 Feb 2018 16:41:43 -0500
Subject: [PATCH 006/122] FIX: that should do it for the calls

---
 app/modules/ectyper/call_ectyper.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/app/modules/ectyper/call_ectyper.py b/app/modules/ectyper/call_ectyper.py
index 26c9070d..3540f913 100644
--- a/app/modules/ectyper/call_ectyper.py
+++ b/app/modules/ectyper/call_ectyper.py
@@ -73,9 +73,9 @@ def call_ectyper_serotype(args_dict):
         "ectyper",
         "-i",
         genome_file,
-        "-pi",
+        "-d", # Percent Identity
         pi,
-        "-pl",
+        "-l", # Percent Length
         pl,
         "-o",
         output_dir

From 7db9630a9f9f056b0be2fe554a4ded5a79cc6d03 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Tue, 13 Feb 2018 17:20:28 -0500
Subject: [PATCH 007/122] CHANGE: use csv as inbetween for ectyper

---
 app/modules/ectyper/call_ectyper.py | 6 +++---
 app/tests/test_modules.py           | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/app/modules/ectyper/call_ectyper.py b/app/modules/ectyper/call_ectyper.py
index 3540f913..e6e981d6 100644
--- a/app/modules/ectyper/call_ectyper.py
+++ b/app/modules/ectyper/call_ectyper.py
@@ -89,9 +89,9 @@ def call_ectyper_serotype(args_dict):
         df['pl'] = pl
         # The final result file from ECTyper serotyping. This copies it back to
         # config.DATASTORE
-        p = os.path.join(genome_file + '_ectyper_serotype.p')
-        with open(p, 'w') as fh:
+        csv_file = os.path.join(genome_file + '_ectyper_serotype.csv')
+        with open(csv_file, 'w') as fh:
             df.to_csv(fh, header=True, index_label='genome')
-        return p
+        return csv_file
     else:
         raise Exception('ECTyper Serotyping failed for' + genome_file)
diff --git a/app/tests/test_modules.py b/app/tests/test_modules.py
index 53da8277..454d017d 100644
--- a/app/tests/test_modules.py
+++ b/app/tests/test_modules.py
@@ -87,8 +87,8 @@ def test_ectyper_serotype():
         # Check the actual call from Spfy's code.
         single_dict = dict(ARGS_DICT)
         single_dict.update({'i':ecoli_genome})
-        pickled_serotype_df = call_ectyper_serotype(single_dict)
-        ectyper_serotype_df = pickle.load(open(pickled_serotype_df,'rb'))
+        serotype_csv = call_ectyper_serotype(single_dict)
+        ectyper_serotype_df = pd.read_csv(serotype_csv)
         assert isinstance(ectyper_serotype_df, pd.DataFrame)
 
 def test_amr():

From 522641a2ca3f1f43fcadceb30067d32814a9f1af Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Tue, 13 Feb 2018 21:59:05 -0500
Subject: [PATCH 008/122] START: ctrl+f approach to refactoring

---
 app/batch_download_insert.py                  | 74 -------------------
 app/batch_insert.py                           | 21 ------
 .../blazegraph}/__init__.py                   |  0
 .../blazegraph}/reserve_id.py                 |  4 +-
 .../blazegraph}/upload_graph.py               |  0
 app/{modules => middleware}/decorators.py     |  0
 .../display}/__init__.py                      |  0
 .../display}/beautify.py                      |  4 +-
 .../display}/find_widest.py                   |  0
 .../graphers}/__init__.py                     |  0
 .../graphers}/datastruct_savvy.py             |  6 +-
 .../graphers}/turtle_grapher.py               |  6 +-
 .../graphers}/turtle_utils.py                 |  0
 app/modules/PanPredic                         |  2 +-
 .../__init__.py                               |  0
 .../fishers.py                                |  0
 .../frontend_queries.py                       |  6 +-
 .../groupcomparisons.py                       |  8 +-
 .../handle_logical.py                         |  2 +-
 .../logical_queries.py                        |  8 +-
 .../sparql_utils.py                           |  0
 .../spfyOntology.rdf                          |  0
 app/modules/database/status_queries.py        |  2 +-
 app/modules/gc.py                             |  2 +-
 app/modules/metadata/metadata.py              |  6 +-
 app/modules/pan_spfy.py                       | 12 +--
 app/modules/phylotyper/graph_refs.py          |  4 +-
 app/modules/phylotyper/ontology.py            |  6 +-
 app/modules/phylotyper/phylotyper.py          |  4 +-
 app/modules/phylotyper/sequences.py           |  4 +-
 app/modules/qc/qc.py                          |  2 +-
 app/modules/spfy.py                           | 10 +--
 app/routes/ra_views.py                        |  4 +-
 app/scripts/generate_ontology.py              |  8 +-
 app/{modules => scripts}/savvy.py             | 14 ++--
 app/tests/test_beautify.py                    |  2 +-
 app/tests/test_modules.py                     |  8 +-
 app/tests/test_savvy.py                       |  2 +-
 app/tests/test_turtle_utils.py                |  4 +-
 docs/source/contributing.rst                  |  4 +-
 40 files changed, 72 insertions(+), 167 deletions(-)
 delete mode 100644 app/batch_download_insert.py
 delete mode 100644 app/batch_insert.py
 rename app/{modules/beautify => middleware/blazegraph}/__init__.py (100%)
 rename app/{modules/blazeUploader => middleware/blazegraph}/reserve_id.py (97%)
 rename app/{modules/blazeUploader => middleware/blazegraph}/upload_graph.py (100%)
 rename app/{modules => middleware}/decorators.py (100%)
 rename app/{modules/blazeUploader => middleware/display}/__init__.py (100%)
 rename app/{modules/beautify => middleware/display}/beautify.py (97%)
 rename app/{modules/beautify => middleware/display}/find_widest.py (100%)
 rename app/{modules/groupComparisons => middleware/graphers}/__init__.py (100%)
 rename app/{modules/turtleGrapher => middleware/graphers}/datastruct_savvy.py (97%)
 rename app/{modules/turtleGrapher => middleware/graphers}/turtle_grapher.py (96%)
 rename app/{modules/turtleGrapher => middleware/graphers}/turtle_utils.py (100%)
 rename app/modules/{turtleGrapher => comparisons}/__init__.py (100%)
 rename app/modules/{groupComparisons => comparisons}/fishers.py (100%)
 rename app/modules/{groupComparisons => comparisons}/frontend_queries.py (94%)
 rename app/modules/{groupComparisons => comparisons}/groupcomparisons.py (96%)
 rename app/modules/{groupComparisons => comparisons}/handle_logical.py (96%)
 rename app/modules/{groupComparisons => comparisons}/logical_queries.py (94%)
 rename app/modules/{groupComparisons => comparisons}/sparql_utils.py (100%)
 rename app/modules/{groupComparisons => comparisons}/spfyOntology.rdf (100%)
 rename app/{modules => scripts}/savvy.py (93%)

diff --git a/app/batch_download_insert.py b/app/batch_download_insert.py
deleted file mode 100644
index 10439324..00000000
--- a/app/batch_download_insert.py
+++ /dev/null
@@ -1,74 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: UTF-8 -*-
-
-#usage python batch_download_insert.py
-
-from Bio import SeqIO
-
-def download_to_insert(accession):
-    import subprocess
-
-    r = from_nuccore(accession)
-
-    if r is None:
-        print 'OH CRAP'
-    else:
-        subprocess.call(['./savvy.py', '-i', from_nuccore(accession)])
-
-    print 'woogle'
-
-def downloadFasta_to_insert(url):
-    import subprocess, os
-
-    from time import sleep
-
-    print 'working on ' + url
-
-    i = 1
-
-    while i < 4:
-        try:
-            r = download_fasta(url)
-            i = 4
-        except:
-            sleep(60 * i) #'linear backoff equation', for those of us too impatiant for the exponential kind
-            i += 1
-            continue
-
-    print 'done downloading, file at ' + r
-
-    print 'now generating .ttl'
-    if r is None:
-        print 'OH CRAP'
-    else:
-        print 'calling subproces'
-        subprocess.call(['./savvy.py', '-i', r])
-        print 'done generating turtle'
-    #os.remove(r) need to add way to check after process completes, for now added it to insert.py script
-    print 'woogle'
-
-if __name__ == "__main__":
-    from multiprocessing import Pool, cpu_count
-
-    '''this is testing code using the .csv file
-    import pandas #this is the .csv parser
-    from _utils import from_nuccore
-
-    metadata_table = pandas.read_csv('data/metadata_table.csv')
-    accessions = metadata_table['primary_dbxref'].apply(lambda s: s.strip().split(':')[1])
-
-    p = Pool(multiprocessing.cpu_count()) #you can use an int instead, just don't go crazy
-    #note: you may want to write out the fasta file, but I'm unsure whether it will improve performance as concurrency requires them all to be loaded into memory anyways
-    p.map(download_to_insert, accessions)
-    '''
-
-    #testing using the .txt file as source
-    from _utils import download_fasta
-
-    with open('data/download_files.txt') as f:
-        lines = f.read().splitlines()
-        #p = Pool(cpu_count())
-        p = Pool(2)
-        p.map(downloadFasta_to_insert, lines)
-
-    print 'ALL DONE XD!!!!!!!!'
diff --git a/app/batch_insert.py b/app/batch_insert.py
deleted file mode 100644
index 4174a03b..00000000
--- a/app/batch_insert.py
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: UTF-8 -*-
-import subprocess
-
-from multiprocessing import Pool, cpu_count
-from os import listdir
-from time import time
-
-def batch_call(filename):
-    subprocess.call(['./savvy.py', '-i', 'tmp/' + filename])
-if __name__ == "__main__":
-
-    start = time()
-    print 'Starting batch insert at: ', start
-
-    p = Pool(cpu_count())
-    p.map(batch_call, listdir('tmp'))
-
-    print '***ALL DONE***'
-    print 'Completed at: ', time()
-    print 'Elapsed: ', time() - s
diff --git a/app/modules/beautify/__init__.py b/app/middleware/blazegraph/__init__.py
similarity index 100%
rename from app/modules/beautify/__init__.py
rename to app/middleware/blazegraph/__init__.py
diff --git a/app/modules/blazeUploader/reserve_id.py b/app/middleware/blazegraph/reserve_id.py
similarity index 97%
rename from app/modules/blazeUploader/reserve_id.py
rename to app/middleware/blazegraph/reserve_id.py
index eba6f98b..fdc27d21 100644
--- a/app/modules/blazeUploader/reserve_id.py
+++ b/app/middleware/blazegraph/reserve_id.py
@@ -1,8 +1,8 @@
 import os
 import logging
 from datetime import datetime
-from modules.turtleGrapher.turtle_utils import generate_hash, generate_uri as gu, link_uris
-from modules.blazeUploader.upload_graph import upload_graph
+from middleware.graphers.turtle_utils import generate_hash, generate_uri as gu, link_uris
+from middleware.blazegraph.upload_graph import upload_graph
 from SPARQLWrapper import SPARQLWrapper, JSON
 from rdflib import Literal, Graph
 import config
diff --git a/app/modules/blazeUploader/upload_graph.py b/app/middleware/blazegraph/upload_graph.py
similarity index 100%
rename from app/modules/blazeUploader/upload_graph.py
rename to app/middleware/blazegraph/upload_graph.py
diff --git a/app/modules/decorators.py b/app/middleware/decorators.py
similarity index 100%
rename from app/modules/decorators.py
rename to app/middleware/decorators.py
diff --git a/app/modules/blazeUploader/__init__.py b/app/middleware/display/__init__.py
similarity index 100%
rename from app/modules/blazeUploader/__init__.py
rename to app/middleware/display/__init__.py
diff --git a/app/modules/beautify/beautify.py b/app/middleware/display/beautify.py
similarity index 97%
rename from app/modules/beautify/beautify.py
rename to app/middleware/display/beautify.py
index c91ca566..51bb6254 100644
--- a/app/modules/beautify/beautify.py
+++ b/app/middleware/display/beautify.py
@@ -3,8 +3,8 @@
 import cPickle as pickle
 from os.path import basename
 from modules.loggingFunctions import initialize_logging
-from modules.beautify.find_widest import check_alleles
-from modules.turtleGrapher.turtle_utils import actual_filename
+from middleware.display.find_widest import check_alleles
+from middleware.graphers.turtle_utils import actual_filename
 
 # logging
 log_file = initialize_logging()
diff --git a/app/modules/beautify/find_widest.py b/app/middleware/display/find_widest.py
similarity index 100%
rename from app/modules/beautify/find_widest.py
rename to app/middleware/display/find_widest.py
diff --git a/app/modules/groupComparisons/__init__.py b/app/middleware/graphers/__init__.py
similarity index 100%
rename from app/modules/groupComparisons/__init__.py
rename to app/middleware/graphers/__init__.py
diff --git a/app/modules/turtleGrapher/datastruct_savvy.py b/app/middleware/graphers/datastruct_savvy.py
similarity index 97%
rename from app/modules/turtleGrapher/datastruct_savvy.py
rename to app/middleware/graphers/datastruct_savvy.py
index 7c72b567..ba374a2c 100644
--- a/app/modules/turtleGrapher/datastruct_savvy.py
+++ b/app/middleware/graphers/datastruct_savvy.py
@@ -1,8 +1,8 @@
 import cPickle as pickle
 from rdflib import BNode, Literal, Graph
-from modules.turtleGrapher.turtle_utils import generate_uri as gu, generate_hash, link_uris
-from modules.turtleGrapher.turtle_grapher import generate_graph
-from modules.blazeUploader.upload_graph import queue_upload
+from middleware.graphers.turtle_utils import generate_uri as gu, generate_hash, link_uris
+from middleware.graphers.turtle_grapher import generate_graph
+from middleware.blazegraph.upload_graph import queue_upload
 from modules.PanPredic.pan_utils import contig_name_parse
 # working with Serotype, Antimicrobial Resistance, & Virulence Factor data
 # structures
diff --git a/app/modules/turtleGrapher/turtle_grapher.py b/app/middleware/graphers/turtle_grapher.py
similarity index 96%
rename from app/modules/turtleGrapher/turtle_grapher.py
rename to app/middleware/graphers/turtle_grapher.py
index 9bcfd19a..e725a677 100644
--- a/app/modules/turtleGrapher/turtle_grapher.py
+++ b/app/middleware/graphers/turtle_grapher.py
@@ -5,9 +5,9 @@
 # to data structure(rdf triple organization) of the modules you're dev'ing
 
 import config
-from modules.turtleGrapher.turtle_utils import generate_hash, generate_uri as gu, link_uris
-from modules.blazeUploader.upload_graph import queue_upload
-from modules.turtleGrapher.turtle_utils import actual_filename
+from middleware.graphers.turtle_utils import generate_hash, generate_uri as gu, link_uris
+from middleware.blazegraph.upload_graph import queue_upload
+from middleware.graphers.turtle_utils import actual_filename
 from rdflib import Namespace, Graph, Literal, plugin
 from Bio import SeqIO
 from os.path import basename
diff --git a/app/modules/turtleGrapher/turtle_utils.py b/app/middleware/graphers/turtle_utils.py
similarity index 100%
rename from app/modules/turtleGrapher/turtle_utils.py
rename to app/middleware/graphers/turtle_utils.py
diff --git a/app/modules/PanPredic b/app/modules/PanPredic
index 50e53502..59eb6025 160000
--- a/app/modules/PanPredic
+++ b/app/modules/PanPredic
@@ -1 +1 @@
-Subproject commit 50e53502289723406109cb7a504110f1753a56e6
+Subproject commit 59eb60253859abd61c7a1f44e0e7fd2ec9bf7e40
diff --git a/app/modules/turtleGrapher/__init__.py b/app/modules/comparisons/__init__.py
similarity index 100%
rename from app/modules/turtleGrapher/__init__.py
rename to app/modules/comparisons/__init__.py
diff --git a/app/modules/groupComparisons/fishers.py b/app/modules/comparisons/fishers.py
similarity index 100%
rename from app/modules/groupComparisons/fishers.py
rename to app/modules/comparisons/fishers.py
diff --git a/app/modules/groupComparisons/frontend_queries.py b/app/modules/comparisons/frontend_queries.py
similarity index 94%
rename from app/modules/groupComparisons/frontend_queries.py
rename to app/modules/comparisons/frontend_queries.py
index 919afe38..d9e1727e 100644
--- a/app/modules/groupComparisons/frontend_queries.py
+++ b/app/modules/comparisons/frontend_queries.py
@@ -6,9 +6,9 @@
 from functools import wraps
 from SPARQLWrapper import SPARQLWrapper, JSON
 from modules.loggingFunctions import initialize_logging
-from modules.turtleGrapher.turtle_utils import generate_uri as gu
-from modules.groupComparisons.sparql_utils import generate_prefixes
-from modules.decorators import toset, tolist, submit
+from middleware.graphers.turtle_utils import generate_uri as gu
+from modules.comparisons.sparql_utils import generate_prefixes
+from middleware.decorators import toset, tolist, submit
 
 # logging
 log_file = initialize_logging()
diff --git a/app/modules/groupComparisons/groupcomparisons.py b/app/modules/comparisons/groupcomparisons.py
similarity index 96%
rename from app/modules/groupComparisons/groupcomparisons.py
rename to app/modules/comparisons/groupcomparisons.py
index 68c8f8a0..a63b930d 100644
--- a/app/modules/groupComparisons/groupcomparisons.py
+++ b/app/modules/comparisons/groupcomparisons.py
@@ -1,9 +1,9 @@
 import logging
 from modules.loggingFunctions import initialize_logging
-from modules.groupComparisons.handle_logical import handle_logical
-from modules.groupComparisons.logical_queries import query_targets
-from modules.groupComparisons.fishers import fishers
-from modules.decorators import tofromHumanReadable
+from modules.comparisons.handle_logical import handle_logical
+from modules.comparisons.logical_queries import query_targets
+from modules.comparisons.fishers import fishers
+from middleware.decorators import tofromHumanReadable
 
 # logging
 log_file = initialize_logging()
diff --git a/app/modules/groupComparisons/handle_logical.py b/app/modules/comparisons/handle_logical.py
similarity index 96%
rename from app/modules/groupComparisons/handle_logical.py
rename to app/modules/comparisons/handle_logical.py
index 2c7ae7c5..8610f6d1 100644
--- a/app/modules/groupComparisons/handle_logical.py
+++ b/app/modules/comparisons/handle_logical.py
@@ -1,6 +1,6 @@
 import logging
 from modules.loggingFunctions import initialize_logging
-from modules.groupComparisons.logical_queries import resolve_spfyids, resolve_spfyids_negated
+from modules.comparisons.logical_queries import resolve_spfyids, resolve_spfyids_negated
 
 # logging
 log_file = initialize_logging()
diff --git a/app/modules/groupComparisons/logical_queries.py b/app/modules/comparisons/logical_queries.py
similarity index 94%
rename from app/modules/groupComparisons/logical_queries.py
rename to app/modules/comparisons/logical_queries.py
index 1de2be74..522ef5a2 100644
--- a/app/modules/groupComparisons/logical_queries.py
+++ b/app/modules/comparisons/logical_queries.py
@@ -3,10 +3,10 @@
 import logging
 import time
 from modules.loggingFunctions import initialize_logging
-from modules.turtleGrapher.turtle_utils import generate_uri as gu
-from modules.groupComparisons.sparql_utils import generate_prefixes
-from modules.decorators import toset, tolist, tostring, prefix, submit
-from modules.groupComparisons.frontend_queries import is_group
+from middleware.graphers.turtle_utils import generate_uri as gu
+from modules.comparisons.sparql_utils import generate_prefixes
+from middleware.decorators import toset, tolist, tostring, prefix, submit
+from modules.comparisons.frontend_queries import is_group
 
 # logging
 log_file = initialize_logging()
diff --git a/app/modules/groupComparisons/sparql_utils.py b/app/modules/comparisons/sparql_utils.py
similarity index 100%
rename from app/modules/groupComparisons/sparql_utils.py
rename to app/modules/comparisons/sparql_utils.py
diff --git a/app/modules/groupComparisons/spfyOntology.rdf b/app/modules/comparisons/spfyOntology.rdf
similarity index 100%
rename from app/modules/groupComparisons/spfyOntology.rdf
rename to app/modules/comparisons/spfyOntology.rdf
diff --git a/app/modules/database/status_queries.py b/app/modules/database/status_queries.py
index 5ae82ace..82cde656 100644
--- a/app/modules/database/status_queries.py
+++ b/app/modules/database/status_queries.py
@@ -1,6 +1,6 @@
 import logging
 from modules.loggingFunctions import initialize_logging
-from modules.decorators import tojson, prefix, submit
+from middleware.decorators import tojson, prefix, submit
 
 # logging
 log_file = initialize_logging()
diff --git a/app/modules/gc.py b/app/modules/gc.py
index eefbba56..4227a88e 100644
--- a/app/modules/gc.py
+++ b/app/modules/gc.py
@@ -2,7 +2,7 @@
 import config
 import redis
 from rq import Queue
-from modules.groupComparisons.groupcomparisons import groupcomparisons
+from modules.comparisons.groupcomparisons import groupcomparisons
 from modules.loggingFunctions import initialize_logging
 
 # logging
diff --git a/app/modules/metadata/metadata.py b/app/modules/metadata/metadata.py
index 5f0c8d6e..585875f1 100644
--- a/app/modules/metadata/metadata.py
+++ b/app/modules/metadata/metadata.py
@@ -2,9 +2,9 @@
 import pandas as pd
 from rdflib import Graph, Literal
 from werkzeug.utils import secure_filename
-from modules.groupComparisons.logical_queries import resolve_spfyids
-from modules.turtleGrapher.turtle_utils import generate_uri as gu
-from modules.blazeUploader.upload_graph import upload_graph
+from modules.comparisons.logical_queries import resolve_spfyids
+from middleware.graphers.turtle_utils import generate_uri as gu
+from middleware.blazegraph.upload_graph import upload_graph
 from modules.metadata.mappings import mapping
 
 d = {'Human': 'http://purl.bioontology.org/ontology/NCBITAXON/9606',
diff --git a/app/modules/pan_spfy.py b/app/modules/pan_spfy.py
index 3af6caf0..73389bcf 100644
--- a/app/modules/pan_spfy.py
+++ b/app/modules/pan_spfy.py
@@ -16,19 +16,19 @@
 from rdflib import Graph
 
 from modules.qc.qc import qc
-from modules.blazeUploader.reserve_id import write_reserve_id
+from middleware.blazegraph.reserve_id import write_reserve_id
 from modules.amr.amr import amr
 from modules.amr.amr_to_dict import amr_to_dict
-from modules.beautify.beautify import beautify
-from modules.turtleGrapher.datastruct_savvy import datastruct_savvy, parse_gene_dict
-from modules.turtleGrapher.turtle_grapher import turtle_grapher, generate_graph
+from middleware.display.beautify import beautify
+from middleware.graphers.datastruct_savvy import datastruct_savvy, parse_gene_dict
+from middleware.graphers.turtle_grapher import turtle_grapher, generate_graph
 from modules.PanPredic.pan import pan
-from modules.turtleGrapher.turtle_utils import generate_uri as gu
+from middleware.graphers.turtle_utils import generate_uri as gu
 from modules.PanPredic.queries import get_single_region
 from datetime import datetime
 import ast
 import cPickle as pickle
-from modules.blazeUploader import upload_graph
+from middleware.blazegraph import upload_graph
 
 # the only ONE time for global variables
 # when naming queues, make sure you actually set a worker to listen to that queue
diff --git a/app/modules/phylotyper/graph_refs.py b/app/modules/phylotyper/graph_refs.py
index 786b6e9c..de8ee3c4 100644
--- a/app/modules/phylotyper/graph_refs.py
+++ b/app/modules/phylotyper/graph_refs.py
@@ -4,8 +4,8 @@
 import requests
 from tempfile import NamedTemporaryFile
 from rdflib import Literal
-from modules.turtleGrapher.turtle_grapher import generate_graph
-from modules.turtleGrapher.turtle_utils import generate_uri as gu
+from middleware.graphers.turtle_grapher import generate_graph
+from middleware.graphers.turtle_utils import generate_uri as gu
 
 def get_ref_vfs():
     # we use a tempfile.TemporaryFile to store the ref
diff --git a/app/modules/phylotyper/ontology.py b/app/modules/phylotyper/ontology.py
index 9ad3d24d..3fd71eb0 100644
--- a/app/modules/phylotyper/ontology.py
+++ b/app/modules/phylotyper/ontology.py
@@ -11,9 +11,9 @@
 from rdflib import Graph, Literal, XSD
 
 from modules.phylotyper.exceptions import ValuesError, DatabaseError
-from modules.turtleGrapher.turtle_utils import generate_uri as gu
-from modules.decorators import submit, prefix, tojson
-from modules.blazeUploader.upload_graph import upload_turtle, upload_graph
+from middleware.graphers.turtle_utils import generate_uri as gu
+from middleware.decorators import submit, prefix, tojson
+from middleware.blazegraph.upload_graph import upload_turtle, upload_graph
 
 from modules.phylotyper.graph_refs import graph_refs
 
diff --git a/app/modules/phylotyper/phylotyper.py b/app/modules/phylotyper/phylotyper.py
index 0903686d..6c8af7fd 100644
--- a/app/modules/phylotyper/phylotyper.py
+++ b/app/modules/phylotyper/phylotyper.py
@@ -22,8 +22,8 @@
 
 
 import config
-from modules.turtleGrapher.turtle_utils import generate_uri as gu, fulluri_to_basename as u2b, normalize_rdfterm as normalize 
-from modules.blazeUploader.upload_graph import upload_graph
+from middleware.graphers.turtle_utils import generate_uri as gu, fulluri_to_basename as u2b, normalize_rdfterm as normalize 
+from middleware.blazegraph.upload_graph import upload_graph
 from modules.phylotyper import ontology, exceptions
 from modules.phylotyper.sequences import MarkerSequences, phylotyper_query, genename_query
 
diff --git a/app/modules/phylotyper/sequences.py b/app/modules/phylotyper/sequences.py
index 58539d20..91e5fe76 100644
--- a/app/modules/phylotyper/sequences.py
+++ b/app/modules/phylotyper/sequences.py
@@ -6,8 +6,8 @@
 
 """
 
-from modules.decorators import submit, prefix, tojson
-from modules.turtleGrapher import turtle_utils
+from middleware.decorators import submit, prefix, tojson
+from middleware.graphers import turtle_utils
 
 @submit
 @prefix
diff --git a/app/modules/qc/qc.py b/app/modules/qc/qc.py
index fd404f3c..3286bb17 100755
--- a/app/modules/qc/qc.py
+++ b/app/modules/qc/qc.py
@@ -4,7 +4,7 @@
 import subprocess
 import argparse
 import pandas as pd
-from modules.turtleGrapher.turtle_grapher import generate_turtle_skeleton
+from middleware.graphers.turtle_grapher import generate_turtle_skeleton
 
 def create_blast_db(query_file):
     '''
diff --git a/app/modules/spfy.py b/app/modules/spfy.py
index b04355ab..8962b77e 100644
--- a/app/modules/spfy.py
+++ b/app/modules/spfy.py
@@ -16,14 +16,14 @@
 from rdflib import Graph
 
 from modules.qc.qc import qc
-from modules.blazeUploader.reserve_id import write_reserve_id
+from middleware.blazegraph.reserve_id import write_reserve_id
 from modules.ectyper.call_ectyper import call_ectyper_vf, call_ectyper_serotype
 from modules.amr.amr import amr
 from modules.amr.amr_to_dict import amr_to_dict
-from modules.beautify.beautify import beautify
-from modules.turtleGrapher.datastruct_savvy import datastruct_savvy
-from modules.turtleGrapher.turtle_grapher import turtle_grapher
-from modules.turtleGrapher.turtle_utils import actual_filename
+from middleware.display.beautify import beautify
+from middleware.graphers.datastruct_savvy import datastruct_savvy
+from middleware.graphers.turtle_grapher import turtle_grapher
+from middleware.graphers.turtle_utils import actual_filename
 from modules.phylotyper import phylotyper
 
 from modules.loggingFunctions import initialize_logging
diff --git a/app/routes/ra_views.py b/app/routes/ra_views.py
index e2f44e64..e31fdece 100644
--- a/app/routes/ra_views.py
+++ b/app/routes/ra_views.py
@@ -1,7 +1,7 @@
 from flask import Blueprint, request, jsonify, current_app
-from modules.groupComparisons.frontend_queries import get_all_attribute_types, get_attribute_values, get_types
+from modules.comparisons.frontend_queries import get_all_attribute_types, get_attribute_values, get_types
 from routes.file_utils import fix_uri
-from modules.decorators import tofromHumanReadable
+from middleware.decorators import tofromHumanReadable
 
 bp_ra_views = Blueprint('reactapp_views', __name__)
 
diff --git a/app/scripts/generate_ontology.py b/app/scripts/generate_ontology.py
index dcbfbb2f..30d9db25 100644
--- a/app/scripts/generate_ontology.py
+++ b/app/scripts/generate_ontology.py
@@ -1,10 +1,10 @@
 # baseURI: https://www.github.com/superphy#
 from datetime import datetime
 from rdflib import Literal
-from modules.turtleGrapher.turtle_grapher import generate_graph
-from modules.turtleGrapher.turtle_utils import generate_uri as gu, link_uris
-from modules.blazeUploader.reserve_id import reservation_triple
-from modules.savvy import savvy
+from middleware.graphers.turtle_grapher import generate_graph
+from middleware.graphers.turtle_utils import generate_uri as gu, link_uris
+from middleware.blazegraph.reserve_id import reservation_triple
+from scripts.savvy import savvy
 
 def write_graph(graph):
     '''
diff --git a/app/modules/savvy.py b/app/scripts/savvy.py
similarity index 93%
rename from app/modules/savvy.py
rename to app/scripts/savvy.py
index f286b682..bf037cf2 100755
--- a/app/modules/savvy.py
+++ b/app/scripts/savvy.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python2
 # -*- coding: UTF-8 -*-
 
-# use: python -m modules.savvy -i /home/kevin/Desktop/nonGenBankEcoli/ECI-2866_lcl.fasta
+# use: python -m scripts.savvy -i /home/kevin/Desktop/nonGenBankEcoli/ECI-2866_lcl.fasta
 
 # S:erotype
 # A:ntimicrobial Resistance
@@ -16,16 +16,16 @@
 import shutil
 import json
 from modules.qc.qc import qc
-from modules.blazeUploader.reserve_id import write_reserve_id
+from middleware.blazegraph.reserve_id import write_reserve_id
 from modules.ectyper.call_ectyper import call_ectyper_vf, call_ectyper_serotype
 from modules.amr.amr import amr
 from modules.amr.amr_to_dict import amr_to_dict
-from modules.beautify.beautify import beautify
-from modules.turtleGrapher.datastruct_savvy import generate_datastruct
-from modules.turtleGrapher.turtle_grapher import generate_turtle_skeleton
-from modules.turtleGrapher.turtle_utils import generate_hash, generate_uri as gu
+from middleware.display.beautify import beautify
+from middleware.graphers.datastruct_savvy import generate_datastruct
+from middleware.graphers.turtle_grapher import generate_turtle_skeleton
+from middleware.graphers.turtle_utils import generate_hash, generate_uri as gu
 from modules.loggingFunctions import initialize_logging
-from modules.blazeUploader.reserve_id import reservation_triple
+from middleware.blazegraph.reserve_id import reservation_triple
 
 log_file = initialize_logging()
 log = logging.getLogger(__name__)
diff --git a/app/tests/test_beautify.py b/app/tests/test_beautify.py
index abe03570..ea7546f0 100644
--- a/app/tests/test_beautify.py
+++ b/app/tests/test_beautify.py
@@ -2,7 +2,7 @@
 import pytest
 import cPickle as pickle
 import pandas as pd
-from modules.beautify.beautify import beautify, json_return, has_failed
+from middleware.display.beautify import beautify, json_return, has_failed
 from tests.constants import ARGS_DICT, BEAUTIFY_VF_SEROTYPE
 
 vf_serotype_gene_dict = os.path.join('tests/refs', 'GCA_000005845.2_ASM584v2_genomic.fna_ectyper-vf_serotype.p')
diff --git a/app/tests/test_modules.py b/app/tests/test_modules.py
index 454d017d..b5d2c006 100644
--- a/app/tests/test_modules.py
+++ b/app/tests/test_modules.py
@@ -8,13 +8,13 @@
 import pandas as pd
 
 from modules.qc.qc import qc, check_header_parsing, check_ecoli
-from modules.blazeUploader.reserve_id import write_reserve_id
+from middleware.blazegraph.reserve_id import write_reserve_id
 from modules.ectyper.call_ectyper import call_ectyper_vf, call_ectyper_serotype
 from modules.amr.amr import amr
 from modules.amr.amr_to_dict import amr_to_dict
-from modules.beautify.beautify import beautify
-from modules.turtleGrapher.datastruct_savvy import datastruct_savvy
-from modules.turtleGrapher.turtle_grapher import turtle_grapher
+from middleware.display.beautify import beautify
+from middleware.graphers.datastruct_savvy import datastruct_savvy
+from middleware.graphers.turtle_grapher import turtle_grapher
 
 from tests.constants import ARGS_DICT
 
diff --git a/app/tests/test_savvy.py b/app/tests/test_savvy.py
index 4a89857d..fe507445 100644
--- a/app/tests/test_savvy.py
+++ b/app/tests/test_savvy.py
@@ -2,7 +2,7 @@
 import shutil
 import pytest
 from hashlib import sha1
-from modules.savvy import mock_reserve_id, get_spfyid_file, savvy
+from scripts.savvy import mock_reserve_id, get_spfyid_file, savvy
 from tests.constants import ARGS_DICT
 
 def sha1_hash(f):
diff --git a/app/tests/test_turtle_utils.py b/app/tests/test_turtle_utils.py
index 53934cea..725b5ae0 100644
--- a/app/tests/test_turtle_utils.py
+++ b/app/tests/test_turtle_utils.py
@@ -1,7 +1,7 @@
 from hashlib import sha1
 from rdflib import URIRef
-from modules.turtleGrapher.turtle_utils import generate_uri as gu
-from modules.turtleGrapher.turtle_utils import actual_filename
+from middleware.graphers.turtle_utils import generate_uri as gu
+from middleware.graphers.turtle_utils import actual_filename
 
 def test_generate_uri():
     # test generate usage:
diff --git a/docs/source/contributing.rst b/docs/source/contributing.rst
index 047be560..d2e5ed9e 100644
--- a/docs/source/contributing.rst
+++ b/docs/source/contributing.rst
@@ -289,7 +289,7 @@ Directly Adding a New Module
 
 .. code-block:: python
 
-  from modules.blazeUploader.reserve_id import write_reserve_id
+  from middleware.blazegraph.reserve_id import write_reserve_id
 
 The top-most directory is used to build Docker Images and copies the contents of ``/app`` to run inside the containers. This is done as the apps (Flask, Reactapp) themselves don't need copies of the Dockerfiles, other apps, etc.
 
@@ -471,7 +471,7 @@ If you're integrating your codebase with Spfy, add your code to a new directory
   import config
   import redis
   from rq import Queue
-  from modules.groupComparisons.groupcomparisons import groupcomparisons
+  from modules.comparisons.groupcomparisons import groupcomparisons
   from modules.loggingFunctions import initialize_logging
 
   # logging

From 2306caebdb797b04627bceee609611f52900cda1 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Tue, 13 Feb 2018 22:32:58 -0500
Subject: [PATCH 009/122] DEBUG: `scripts` dir not picked up during tests?

---
 app/{scripts => modules}/savvy.py | 2 +-
 app/scripts/generate_ontology.py  | 2 +-
 app/tests/test_savvy.py           | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)
 rename app/{scripts => modules}/savvy.py (99%)

diff --git a/app/scripts/savvy.py b/app/modules/savvy.py
similarity index 99%
rename from app/scripts/savvy.py
rename to app/modules/savvy.py
index bf037cf2..b354a9cf 100755
--- a/app/scripts/savvy.py
+++ b/app/modules/savvy.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python2
 # -*- coding: UTF-8 -*-
 
-# use: python -m scripts.savvy -i /home/kevin/Desktop/nonGenBankEcoli/ECI-2866_lcl.fasta
+# use: python -m modules.savvy -i /home/kevin/Desktop/nonGenBankEcoli/ECI-2866_lcl.fasta
 
 # S:erotype
 # A:ntimicrobial Resistance
diff --git a/app/scripts/generate_ontology.py b/app/scripts/generate_ontology.py
index 30d9db25..38312e47 100644
--- a/app/scripts/generate_ontology.py
+++ b/app/scripts/generate_ontology.py
@@ -4,7 +4,7 @@
 from middleware.graphers.turtle_grapher import generate_graph
 from middleware.graphers.turtle_utils import generate_uri as gu, link_uris
 from middleware.blazegraph.reserve_id import reservation_triple
-from scripts.savvy import savvy
+from modules.savvy import savvy
 
 def write_graph(graph):
     '''
diff --git a/app/tests/test_savvy.py b/app/tests/test_savvy.py
index fe507445..4a89857d 100644
--- a/app/tests/test_savvy.py
+++ b/app/tests/test_savvy.py
@@ -2,7 +2,7 @@
 import shutil
 import pytest
 from hashlib import sha1
-from scripts.savvy import mock_reserve_id, get_spfyid_file, savvy
+from modules.savvy import mock_reserve_id, get_spfyid_file, savvy
 from tests.constants import ARGS_DICT
 
 def sha1_hash(f):

From e258414505a10a6bafa7479fa5e5e7394e1a2d38 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Tue, 13 Feb 2018 22:33:44 -0500
Subject: [PATCH 010/122] FIX: right the __init__.py

---
 app/scripts/__init__.py           | 0
 app/{modules => scripts}/savvy.py | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 app/scripts/__init__.py
 rename app/{modules => scripts}/savvy.py (100%)

diff --git a/app/scripts/__init__.py b/app/scripts/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/app/modules/savvy.py b/app/scripts/savvy.py
similarity index 100%
rename from app/modules/savvy.py
rename to app/scripts/savvy.py

From 4d4963c05b364b71680d4dbdc1e2c26075bff9bf Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Tue, 13 Feb 2018 22:34:06 -0500
Subject: [PATCH 011/122] FIX: right the __init__.py

---
 app/scripts/generate_ontology.py | 2 +-
 app/scripts/savvy.py             | 2 +-
 app/tests/test_savvy.py          | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/app/scripts/generate_ontology.py b/app/scripts/generate_ontology.py
index 38312e47..30d9db25 100644
--- a/app/scripts/generate_ontology.py
+++ b/app/scripts/generate_ontology.py
@@ -4,7 +4,7 @@
 from middleware.graphers.turtle_grapher import generate_graph
 from middleware.graphers.turtle_utils import generate_uri as gu, link_uris
 from middleware.blazegraph.reserve_id import reservation_triple
-from modules.savvy import savvy
+from scripts.savvy import savvy
 
 def write_graph(graph):
     '''
diff --git a/app/scripts/savvy.py b/app/scripts/savvy.py
index b354a9cf..bf037cf2 100755
--- a/app/scripts/savvy.py
+++ b/app/scripts/savvy.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python2
 # -*- coding: UTF-8 -*-
 
-# use: python -m modules.savvy -i /home/kevin/Desktop/nonGenBankEcoli/ECI-2866_lcl.fasta
+# use: python -m scripts.savvy -i /home/kevin/Desktop/nonGenBankEcoli/ECI-2866_lcl.fasta
 
 # S:erotype
 # A:ntimicrobial Resistance
diff --git a/app/tests/test_savvy.py b/app/tests/test_savvy.py
index 4a89857d..fe507445 100644
--- a/app/tests/test_savvy.py
+++ b/app/tests/test_savvy.py
@@ -2,7 +2,7 @@
 import shutil
 import pytest
 from hashlib import sha1
-from modules.savvy import mock_reserve_id, get_spfyid_file, savvy
+from scripts.savvy import mock_reserve_id, get_spfyid_file, savvy
 from tests.constants import ARGS_DICT
 
 def sha1_hash(f):

From 56064372101a79ff9c31dd3191aa3883814edfae Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Wed, 14 Feb 2018 01:39:40 -0500
Subject: [PATCH 012/122] START: define a model for the frontend returns

---
 app/middleware/models.py | 16 ++++++++++++++++
 app/tests/test_models.py | 17 +++++++++++++++++
 2 files changed, 33 insertions(+)
 create mode 100644 app/middleware/models.py
 create mode 100644 app/tests/test_models.py

diff --git a/app/middleware/models.py b/app/middleware/models.py
new file mode 100644
index 00000000..d17e1a16
--- /dev/null
+++ b/app/middleware/models.py
@@ -0,0 +1,16 @@
+from jsonmodels import models, fields
+
+
+class SubtypingRow(models.Base):
+  analysis = fields.StringField(required=True)
+  contigid = fields.StringField(required=True)
+  filename = fields.StringField(required=True)
+  hitcutoff = fields.StringField(nullable=True)
+  hitname = fields.StringField(required=True)
+  hitorientation = fields.StringField(nullable=True)
+  hitstart = fields.StringField(nullable=True)
+  hitstop = fields.StringField(nullable=True)
+
+
+class SubtypingResult(models.Base):
+    rows = fields.ListField([SubtypingRow], nullable=True)
diff --git a/app/tests/test_models.py b/app/tests/test_models.py
new file mode 100644
index 00000000..1566d1ee
--- /dev/null
+++ b/app/tests/test_models.py
@@ -0,0 +1,17 @@
+from middleware.models import SubtypingRow, SubtypingResult
+from tests.constants import BEAUTIFY_VF_SEROTYPE
+
+def test_models():
+    subtyping_result = [
+        SubtypingRow(
+            analysis=d['analysis'],
+            contigid=d['contigid'],
+            filename=d['filename'],
+            hitcutoff=str(d['hitcutoff']),
+            hitname=d['hitname'],
+            hitorientation=d['hitorientation'],
+            hitstart=str(d['hitstart']),
+            hitstop=str(d['hitstop'])
+        )
+    for d in BEAUTIFY_VF_SEROTYPE]
+    subtyping_result.validate()

From b94ede6602d0d3a948e2b3e1df00e398a79ad754 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Wed, 14 Feb 2018 02:11:49 -0500
Subject: [PATCH 013/122] FIX: convert to the main model before validate

---
 app/tests/test_models.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/app/tests/test_models.py b/app/tests/test_models.py
index 1566d1ee..4911161e 100644
--- a/app/tests/test_models.py
+++ b/app/tests/test_models.py
@@ -2,7 +2,7 @@
 from tests.constants import BEAUTIFY_VF_SEROTYPE
 
 def test_models():
-    subtyping_result = [
+    subtyping_list = [
         SubtypingRow(
             analysis=d['analysis'],
             contigid=d['contigid'],
@@ -14,4 +14,7 @@ def test_models():
             hitstop=str(d['hitstop'])
         )
     for d in BEAUTIFY_VF_SEROTYPE]
+    subtyping_result = SubtypingResult(
+        rows = subtyping_list
+    )
     subtyping_result.validate()

From 46c65e618196b26923aad78dae9ae4e3630cf0af Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Wed, 14 Feb 2018 11:46:51 -0500
Subject: [PATCH 014/122] CHANGE: have ectyper call convert result into our
 model

---
 app/middleware/modellers.py         | 34 +++++++++++++++++++++++++++++
 app/modules/ectyper/call_ectyper.py | 21 +++++++++---------
 app/tests/test_models.py            |  5 ++++-
 app/tests/test_modules.py           |  7 +++---
 4 files changed, 52 insertions(+), 15 deletions(-)
 create mode 100644 app/middleware/modellers.py

diff --git a/app/middleware/modellers.py b/app/middleware/modellers.py
new file mode 100644
index 00000000..0512bd0a
--- /dev/null
+++ b/app/middleware/modellers.py
@@ -0,0 +1,34 @@
+# We try to keep all model creation in this file so it's easier to reference.
+import pandas as pd
+from middleware.graphers.turtle_utils import actual_filename
+
+
+def model_serotype(pi, pl, output_file):
+    """
+    Creates a SubtypingResult model from ECTYper's serotyping output.
+    """
+    # Read the vanilla output_file from ECTyper.
+    df = pd.read_csv(output_file)
+
+    # TODO: incorporate the pl.
+
+    # Loop.
+    subtyping_list = [
+        SubtypingRow(
+            analysis='Serotype',
+            contigid='n/a',
+            filename=actual_filename(row['genome']),
+            hitcutoff=str(pi),
+            hitname="{0}:{1}".format(row['O_prediction'],row['H_prediction']),
+            hitorientation='n/a',
+            hitstart='n/a',
+            hitstop='n/a'
+        )
+    for index, row in df.iterrows()]
+
+    # SubtypingResult.row expects a list.
+    subtyping_result = SubtypingResult([
+        subtyping_row
+    ])
+
+    return subtyping_result
diff --git a/app/modules/ectyper/call_ectyper.py b/app/modules/ectyper/call_ectyper.py
index e6e981d6..4cc52d6f 100644
--- a/app/modules/ectyper/call_ectyper.py
+++ b/app/modules/ectyper/call_ectyper.py
@@ -8,6 +8,7 @@
 from ast import literal_eval
 from os.path import basename
 from modules.loggingFunctions import initialize_logging
+from middleware.modellers import model_serotype
 
 log_file = initialize_logging()
 log = logging.getLogger(__name__)
@@ -82,16 +83,14 @@ def call_ectyper_serotype(args_dict):
     ])
     if ret_code == 0:
         output_file = os.path.join(output_dir, 'output.csv')
-        df = pd.read_csv(output_file)
-        # Add the PI to our DataFrame.
-        df['pi'] = pi
-        # Add the PL to our DataFrame.
-        df['pl'] = pl
-        # The final result file from ECTyper serotyping. This copies it back to
-        # config.DATASTORE
-        csv_file = os.path.join(genome_file + '_ectyper_serotype.csv')
-        with open(csv_file, 'w') as fh:
-            df.to_csv(fh, header=True, index_label='genome')
-        return csv_file
+        # Create a SubtypingResult model from the output.
+        subtyping_result = model_serotype(
+            pi=pi,
+            pl=pl,
+            output_file=output_file
+        )
+        p = os.path.join(genome_file, '_ectyper_vf.p')
+        pickle.dump(subtyping_result,open(p,'wb'))
+        return p
     else:
         raise Exception('ECTyper Serotyping failed for' + genome_file)
diff --git a/app/tests/test_models.py b/app/tests/test_models.py
index 4911161e..526e784a 100644
--- a/app/tests/test_models.py
+++ b/app/tests/test_models.py
@@ -1,7 +1,10 @@
 from middleware.models import SubtypingRow, SubtypingResult
 from tests.constants import BEAUTIFY_VF_SEROTYPE
 
-def test_models():
+def test_subtyping_model_direct():
+    """
+    Use our dataset to directly create a subtyping results model and validate it.
+    """
     subtyping_list = [
         SubtypingRow(
             analysis=d['analysis'],
diff --git a/app/tests/test_modules.py b/app/tests/test_modules.py
index b5d2c006..71c133a4 100644
--- a/app/tests/test_modules.py
+++ b/app/tests/test_modules.py
@@ -87,9 +87,10 @@ def test_ectyper_serotype():
         # Check the actual call from Spfy's code.
         single_dict = dict(ARGS_DICT)
         single_dict.update({'i':ecoli_genome})
-        serotype_csv = call_ectyper_serotype(single_dict)
-        ectyper_serotype_df = pd.read_csv(serotype_csv)
-        assert isinstance(ectyper_serotype_df, pd.DataFrame)
+        pickled_serotype_model = call_ectyper_serotype(single_dict)
+        ectyper_serotype_model = pickle.load(open(pickled_serotype_model,'rb'))
+        # Validate (throws error if invalidate).
+        ectyper_serotype_model.validate()
 
 def test_amr():
         ecoli_genome = GENOMES_LIST_ECOLI[0]

From 4de78a216bae89c13079c75cbaf91ba89e1b010b Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Wed, 14 Feb 2018 12:06:03 -0500
Subject: [PATCH 015/122] FIX: list in list

---
 app/middleware/modellers.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/app/middleware/modellers.py b/app/middleware/modellers.py
index 0512bd0a..c22bedda 100644
--- a/app/middleware/modellers.py
+++ b/app/middleware/modellers.py
@@ -26,9 +26,9 @@ def model_serotype(pi, pl, output_file):
         )
     for index, row in df.iterrows()]
 
-    # SubtypingResult.row expects a list.
-    subtyping_result = SubtypingResult([
+    # Convert the list of rows into a SubtypingResult model.
+    subtyping_result = SubtypingResult(
         subtyping_row
-    ])
+    )
 
     return subtyping_result

From d91bcc71f447335a28a1e53d779dc38778d2d337 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Wed, 14 Feb 2018 12:06:24 -0500
Subject: [PATCH 016/122] FIX: list in list

---
 app/middleware/modellers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/middleware/modellers.py b/app/middleware/modellers.py
index c22bedda..18f3d31d 100644
--- a/app/middleware/modellers.py
+++ b/app/middleware/modellers.py
@@ -28,7 +28,7 @@ def model_serotype(pi, pl, output_file):
 
     # Convert the list of rows into a SubtypingResult model.
     subtyping_result = SubtypingResult(
-        subtyping_row
+        subtyping_list
     )
 
     return subtyping_result

From 49098e4b79615c0f70126e46a9b2886bcc35050f Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Wed, 14 Feb 2018 12:45:48 -0500
Subject: [PATCH 017/122] FIX: imports

---
 app/middleware/modellers.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/app/middleware/modellers.py b/app/middleware/modellers.py
index 18f3d31d..2a0546ac 100644
--- a/app/middleware/modellers.py
+++ b/app/middleware/modellers.py
@@ -1,5 +1,6 @@
 # We try to keep all model creation in this file so it's easier to reference.
 import pandas as pd
+from middleware.models import SubtypingRow, SubtypingResult
 from middleware.graphers.turtle_utils import actual_filename
 
 

From 3b51b706695c43721794d256a693a6d358325731 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Wed, 14 Feb 2018 13:20:39 -0500
Subject: [PATCH 018/122] FIX: model creation

---
 .travis.yml                 | 1 +
 app/middleware/modellers.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index b135076c..982e5bce 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -12,6 +12,7 @@ before_install:
   - docker build -t superphy/backend-rq-blazegraph:2.0.0 -f Dockerfile-rq-blazegraph .
   - docker-compose up -d
   - docker ps -a
+  - docker-compose logs webserver
   - ls
   #### miniconda install:
   # We do this conditionally because it saves us some downloading if the
diff --git a/app/middleware/modellers.py b/app/middleware/modellers.py
index 2a0546ac..7fed795c 100644
--- a/app/middleware/modellers.py
+++ b/app/middleware/modellers.py
@@ -29,7 +29,7 @@ def model_serotype(pi, pl, output_file):
 
     # Convert the list of rows into a SubtypingResult model.
     subtyping_result = SubtypingResult(
-        subtyping_list
+        rows = subtyping_list
     )
 
     return subtyping_result

From 6fa042fbca37d4ffdfbee5cef363e990a712ba4f Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Wed, 14 Feb 2018 13:58:45 -0500
Subject: [PATCH 019/122] FIX: pickling

---
 app/modules/ectyper/call_ectyper.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/app/modules/ectyper/call_ectyper.py b/app/modules/ectyper/call_ectyper.py
index 4cc52d6f..30838a5d 100644
--- a/app/modules/ectyper/call_ectyper.py
+++ b/app/modules/ectyper/call_ectyper.py
@@ -58,7 +58,8 @@ def call_ectyper_vf(args_dict):
         # we are calling tools_controller on only one file, so grab that dict
         key, ectyper_dict = ectyper_dict.popitem()
 
-        p = os.path.join(filepath + '_ectyper_vf.p')
+        # Path for the pickle dump.
+        p = filepath + '_ectyper_vf.p'
         pickle.dump(ectyper_dict,open(p,'wb'))
 
     return p
@@ -89,7 +90,8 @@ def call_ectyper_serotype(args_dict):
             pl=pl,
             output_file=output_file
         )
-        p = os.path.join(genome_file, '_ectyper_vf.p')
+        # Path for the pickle dump.
+        p = genome_file + '_ectyper_vf.p'
         pickle.dump(subtyping_result,open(p,'wb'))
         return p
     else:

From a888c9030d8ceb60fa98858951bd9ea02336dd0b Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Wed, 14 Feb 2018 15:36:48 -0500
Subject: [PATCH 020/122] ADD: conversion to json + tests

---
 app/middleware/display/beautify.py | 22 ++++++++++++++++++++++
 app/tests/test_modules.py          |  7 ++++++-
 2 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/app/middleware/display/beautify.py b/app/middleware/display/beautify.py
index 51bb6254..7ff19427 100644
--- a/app/middleware/display/beautify.py
+++ b/app/middleware/display/beautify.py
@@ -5,11 +5,33 @@
 from modules.loggingFunctions import initialize_logging
 from middleware.display.find_widest import check_alleles
 from middleware.graphers.turtle_utils import actual_filename
+from middleware.models import SubtypingResult
 
 # logging
 log_file = initialize_logging()
 log = logging.getLogger(__name__)
 
+
+def _convert_subtyping(model):
+    # Convert the model to a generic JSON structure.
+    struct = model.to_struct()
+    # This is not strictly json; more like a list than a dict structure.
+    rows_list = struct['rows']
+    return rows_list
+
+def model_to_json(model):
+    """
+    Converts models to json for the front-end.
+    """
+    # Validate the model submitted before processing.
+    model.validate()
+    # Conversion.
+    if isinstance(model, SubtypingResult):
+        return _convert_subtyping(model)
+    else:
+        raise Exception('model_to_json() called for a model without a handler.')
+
+
 def json_return(args_dict, gene_dict):
     """
     This converts the gene dict into a json format for return to the front end
diff --git a/app/tests/test_modules.py b/app/tests/test_modules.py
index 71c133a4..e8ffde76 100644
--- a/app/tests/test_modules.py
+++ b/app/tests/test_modules.py
@@ -12,7 +12,7 @@
 from modules.ectyper.call_ectyper import call_ectyper_vf, call_ectyper_serotype
 from modules.amr.amr import amr
 from modules.amr.amr_to_dict import amr_to_dict
-from middleware.display.beautify import beautify
+from middleware.display.beautify import beautify, model_to_json
 from middleware.graphers.datastruct_savvy import datastruct_savvy
 from middleware.graphers.turtle_grapher import turtle_grapher
 
@@ -92,6 +92,11 @@ def test_ectyper_serotype():
         # Validate (throws error if invalidate).
         ectyper_serotype_model.validate()
 
+        # Check the conversion for the front-end.
+        json_r = model_to_json(ectyper_serotype_model)
+        # This is not strictly json; more like a list than a dict structure.
+        assert isinstance(json_r, list)
+
 def test_amr():
         ecoli_genome = GENOMES_LIST_ECOLI[0]
         # this generates the .tsv

From 2332f055e039957758d5a1697b00fc8ae28d9d03 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Wed, 14 Feb 2018 19:14:43 -0500
Subject: [PATCH 021/122] STOP: knitted everything together + started some
 restructing of how we do pipelines

---
 app/config.py                               |   2 +
 app/middleware/display/beautify.py          |  33 +--
 app/middleware/graphers/datastruct_savvy.py |  64 ++++--
 app/modules/spfy.py                         | 241 +++++++++++++-------
 4 files changed, 229 insertions(+), 111 deletions(-)

diff --git a/app/config.py b/app/config.py
index 6b33ee9e..0a7a98da 100644
--- a/app/config.py
+++ b/app/config.py
@@ -14,6 +14,8 @@
 # enqueued function to complete before terminating it with and ERROR
 # If note specified, jobs must execute within 3 mins
 DEFAULT_TIMEOUT = 600 # in seconds (ie. 10 mins)
+# Defines how long results are kept in Redis. 500 is the default for RQ.
+DEFAULT_RESULT_TTL=500
 PAN_TIMEOUT = 100000
 # if BACKLOG_ENABLED = True, then all analyses modules will be run in the
 # in the background for every submitted file
diff --git a/app/middleware/display/beautify.py b/app/middleware/display/beautify.py
index 7ff19427..c93b3f69 100644
--- a/app/middleware/display/beautify.py
+++ b/app/middleware/display/beautify.py
@@ -130,7 +130,8 @@ def handle_failed(json_r, args_dict):
         ret.append(t)
     return ret
 
-def beautify(args_dict, pickled_dictionary):
+# TODO: convert this to models-only.
+def beautify(args_dict=None, pickled_result):
     '''
     Converts a given 'spit' datum (a dictionary with our results from rgi/ectyper) to a json form used by the frontend. This result is to be stored in Redis by the calling RQ Worker.
     :param args_dict: The arguments supplied by the user. In the case of spfy web-app, this is used to determine which analysis options were set.
@@ -139,16 +140,22 @@ def beautify(args_dict, pickled_dictionary):
     :return: json representation of the results, as required by the front-end.
     '''
 
-    gene_dict = pickle.load(open(pickled_dictionary, 'rb'))
-    # this converts our dictionary structure into json and adds metadata (filename, etc.)
-    json_r =  json_return(args_dict, gene_dict)
-    log.debug('First parse into json_r: ' + str(json_r))
-    # if looking for only serotype, skip this step
-    if args_dict['options']['vf'] or args_dict['options']['amr']:
-        json_r = check_alleles(json_r)
-    log.debug('After checking alleles json_r: ' + str(json_r))
-    # check if there is an analysis module that has failed in the result
-    if has_failed(json_r):
-        return handle_failed(json_r, args_dict)
+    result = pickle.load(open(pickled_result, 'rb'))
+    if isinstance(result, dict):
+        gene_dict = result
+        # this converts our dictionary structure into json and adds metadata (filename, etc.)
+        json_r =  json_return(args_dict, gene_dict)
+        log.debug('First parse into json_r: ' + str(json_r))
+        # if looking for only serotype, skip this step
+        if args_dict['options']['vf'] or args_dict['options']['amr']:
+            json_r = check_alleles(json_r)
+        log.debug('After checking alleles json_r: ' + str(json_r))
+        # check if there is an analysis module that has failed in the result
+        if has_failed(json_r):
+            return handle_failed(json_r, args_dict)
+        else:
+            return json_r
+    elif isinstance(result, SubtypingResult):
+        return model_to_json(result)
     else:
-        return json_r
+        raise Exception("beautify() could not handle pickled file: {0}.".format(pickled_result))
diff --git a/app/middleware/graphers/datastruct_savvy.py b/app/middleware/graphers/datastruct_savvy.py
index ba374a2c..9d58bd90 100644
--- a/app/middleware/graphers/datastruct_savvy.py
+++ b/app/middleware/graphers/datastruct_savvy.py
@@ -4,9 +4,36 @@
 from middleware.graphers.turtle_grapher import generate_graph
 from middleware.blazegraph.upload_graph import queue_upload
 from modules.PanPredic.pan_utils import contig_name_parse
+from middleware.models import SubtypingResult
 # working with Serotype, Antimicrobial Resistance, & Virulence Factor data
 # structures
 
+def _convert_subtyping(graph, model, uriIsolate):
+    # Convert the model to a graph.
+    struct = model.to_struct()
+    rows_list = struct['rows']
+    for row in rows_list:
+        graph.add((
+            uriIsolate,
+            gu('ge:0001076'),
+            Literal(row['O_prediction'])
+        ))
+        graph.add((
+            uriIsolate,
+            gu('ge:0001077'),
+            Literal(serotyper_dict['H_prediction'])
+        ))
+    return graph
+
+def model_to_graph(graph, model, uriIsolate):
+    # Validate the model submitted before processing.
+    model.validate()
+    # Conversion.
+    if isinstance(model, SubtypingResult):
+        return _convert_subtyping(model)
+    else:
+        raise Exception('model_to_graph() called for a model without a handler.')
+
 def parse_serotype(graph, serotyper_dict, uriIsolate):
     if 'O type' in serotyper_dict:
         graph.add((uriIsolate, gu('ge:0001076'),
@@ -148,7 +175,7 @@ def parse_gene_dict(graph, gene_dict, uriGenome, geneType):
 
 def generate_datastruct(query_file, id_file, pickled_dictionary):
     '''
-    This is simply to decouple the graph generation code from the
+    Separates the graph generation code from the
     upload code. In RQ backend, the datastruct_savvy() method is called
     where-as in savvy.py (without RQ or Blazegraph) only compute_datastruct()
     is called. The return type must be the same in datastruct_savvy to
@@ -168,22 +195,25 @@ def generate_datastruct(query_file, id_file, pickled_dictionary):
         spfyid = int(l)
     uriIsolate = gu(':spfy' + str(spfyid))
 
-    # results dict retrieval
-    results_dict = pickle.load(open(pickled_dictionary, 'rb'))
-
-    # graphing functions
-    for key in results_dict.keys():
-        if key == 'Serotype':
-            graph = parse_serotype(graph,results_dict['Serotype'],uriIsolate)
-        elif key == 'Virulence Factors':
-            graph = parse_gene_dict(graph, results_dict['Virulence Factors'], uriGenome, 'VirulenceFactor')
-        elif key == 'Antimicrobial Resistance':
-            graph = parse_gene_dict(graph, results_dict['Antimicrobial Resistance'], uriGenome,
-                                    'AntimicrobialResistanceGene')
-        #elif key == 'PanGenomeRegion':
-         #   graph = parse_gene_dict(graph, results_dict[key], uriGenome, key)
-
-    return graph
+    # Unpickle.
+    results = pickle.load(open(pickled_dictionary, 'rb'))
+    # Check if we have a model or a dictionary.
+    if isinstance(results, dict):
+        # graphing functions
+        for key in results:
+            if key == 'Serotype':
+                graph = parse_serotype(graph,results['Serotype'],uriIsolate)
+            elif key == 'Virulence Factors':
+                graph = parse_gene_dict(graph, results['Virulence Factors'], uriGenome, 'VirulenceFactor')
+            elif key == 'Antimicrobial Resistance':
+                graph = parse_gene_dict(graph, results['Antimicrobial Resistance'], uriGenome,
+                                        'AntimicrobialResistanceGene')
+        return graph
+    elif isinstance(results, SubtypingResult):
+        graph = model_to_graph(graph, results, uriIsolate)
+        return graph
+    else:
+        raise Exception("generate_datastruct() could not handle pickled file: {0}.".format(pickled_dictionary))
 
 def datastruct_savvy(query_file, id_file, pickled_dictionary):
     """
diff --git a/app/modules/spfy.py b/app/modules/spfy.py
index 8962b77e..33947c84 100644
--- a/app/modules/spfy.py
+++ b/app/modules/spfy.py
@@ -2,6 +2,7 @@
 # -*- coding: UTF-8 -*-
 
 import os
+import copy
 
 import redis
 import config
@@ -20,7 +21,7 @@
 from modules.ectyper.call_ectyper import call_ectyper_vf, call_ectyper_serotype
 from modules.amr.amr import amr
 from modules.amr.amr_to_dict import amr_to_dict
-from middleware.display.beautify import beautify
+from middleware.display.beautify import beautify, model_to_json
 from middleware.graphers.datastruct_savvy import datastruct_savvy
 from middleware.graphers.turtle_grapher import turtle_grapher
 from middleware.graphers.turtle_utils import actual_filename
@@ -50,6 +51,101 @@
     backlog_multiples_q = Queue(
         'backlog_multiples', connection=redis_conn, default_timeout=config.DEFAULT_TIMEOUT)
 
+def _ectyper_pipeline_vf(singles, multiples, query_file, single_dict):
+    """
+    Enqueue all the jobs required for VF.
+    """
+    # Dictionary of Job instances to return
+    d = {}
+
+    # Create a copy of the arguments dictionary and disable Serotype.
+    # This copy is passed to the old ECTyper.
+    single_dict_vf = copy.deepcopy(single_dict)
+    single_dict_vf['options']['serotype'] = False
+    # Enqueue the old ECTyper
+    job_ectyper_vf = singles.enqueue(
+        call_ectyper_vf,
+        single_dict_vf,
+        depends_on=job_id)
+    d['job_ectyper_vf'] = job_ectyper_vf
+
+    # If bulk uploading is set, we return the datastruct as the end task
+    # to poll for job completion, therefore must set ttl of -1.
+    if single_dict['options']['bulk']:
+        ttl_value = -1
+    else:
+        ttl_value = config.DEFAULT_RESULT_TTL
+
+    # datastruct_savvy() stores result to Blazegraph.
+    job_ectyper_datastruct_vf = multiples.enqueue(
+        datastruct_savvy,
+        query_file,
+        query_file + '_id.txt',
+        query_file + '_ectyper_vf.p',
+        depends_on=job_ectyper,
+        result_ttl=ttl_value)
+    d['job_ectyper_datastruct_vf'] = job_ectyper_datastruct_vf
+
+    if not single_dict['options']['bulk']:
+        # Only bother parsing into json if user has requested either vf or
+        # serotype, and we're not in bulk uploading.
+        job_ectyper_beautify_vf = multiples.enqueue(
+            beautify,
+            single_dict,
+            query_file + '_ectyper.p',
+            depends_on=job_ectyper_vf,
+            result_ttl=-1
+        )
+        d['job_ectyper_beautify_vf'] = job_ectyper_beautify_vf
+
+    return d
+
+def _ectyper_pipeline_serotype(singles, multiples, query_file, single_dict):
+    """
+    Enqueue all the jobs required for VF.
+    """
+    # Dictionary of Job instances to return
+    d = {}
+
+    # Create a copy of the arguments dictionary and disable Serotype.
+    # This copy is passed to the old ECTyper.
+    single_dict_vf = copy.deepcopy(single_dict)
+    # Enqueue the new ECTyper
+    job_ectyper_serotype = multiples.enqueue(
+        call_ectyper_serotype,
+        single_dict,
+        depends_on=job_id)
+    d['job_ectyper_serotype'] = job_ectyper_serotype
+
+    # If bulk uploading is set, we return the datastruct as the end task
+    # to poll for job completion, therefore must set ttl of -1.
+    if single_dict['options']['bulk']:
+        ttl_value = -1
+    else:
+        ttl_value = config.DEFAULT_RESULT_TTL
+
+    # datastruct_savvy() stores result to Blazegraph.
+    job_ectyper_datastruct_serotype = multiples.enqueue(
+        datastruct_savvy,
+        query_file,
+        query_file + '_id.txt',
+        query_file + '_ectyper_serotype.p',
+        depends_on=job_ectyper,
+        result_ttl=-1)
+    d['job_ectyper_serotype'] = job_ectyper_datastruct_serotype
+
+    if not single_dict['options']['bulk']:
+        # Only bother parsing into json if user has requested either vf or
+        # serotype, and we're not in bulk uploading.
+        job_ectyper_beautify_serotype = multiples.enqueue(
+            beautify,
+            pickled_result = query_file + '_ectyper_serotype.p',
+            depends_on=job_ectyper_vf,
+            result_ttl=-1
+        )
+        d['job_ectyper_beautify_serotype'] = job_ectyper_beautify_serotype
+
+    return d
 
 def blob_savvy_enqueue(single_dict):
     '''
@@ -70,77 +166,69 @@ def blob_savvy_enqueue(single_dict):
     job_id = blazegraph_q.enqueue(
         write_reserve_id, query_file, depends_on=job_qc, result_ttl=-1)
 
-    # ECTYPER PIPELINE
-    def ectyper_pipeline(singles, multiples):
-        """The ectyper call is special in that it requires the entire arguments
-        to decide whether to carry the serotype option flag, virulance
-        factors option flag, and percent identity field. We use the old ECTyper
-        for VF and the new ECTyper for Serotyping.
-        """
-        if single_dict['options']['vf']:
-            # Create a copy of the arguments dictionary and disable Serotype.
-            # This copy is passed to the old ECTyper.
-            single_dict_vf = dict(single_dict)
-            single_dict_vf['options']['serotype'] = False
-            # Enqueue the old ECTyper
-            job_ectyper_vf = singles.enqueue(
-                call_ectyper_vf,
-                single_dict_vf,
-                depends_on=job_id)
-        if single_dict['options']['serotype']:
-            # Enqueue the new ECTyper
-            job_ectyper_serotype = multiples.enqueue(
-                call_ectyper_serotype,
-                single_dict,
-                depends_on=job_id)
-
-        # datastruct_savvy() stores result to Blazegraph.
+    ## ECTyper (VF & Serotype)
+    # VF
+    if single_dict['options']['vf']:
+        ectyper_vf_jobs = _ectyper_pipeline_vf(
+            singles_q,
+            multiples_q,
+            query_file,
+            single_dict
+        )
         if single_dict['options']['bulk']:
-            # If bulk uploading is set, we return the datastruct as the end task
-            # to poll for job completion, therefore must set ttl of -1.
-            if single_dict['options']['vf']:
-                job_ectyper_datastruct = multiples.enqueue(
-                    datastruct_savvy,
-                    query_file,
-                    query_file + '_id.txt',
-                    query_file + '_ectyper_vf.p',
-                    depends_on=job_ectyper,
-                    result_ttl=-1)
-            if single_dict['options']['serotype']:
-                job_ectyper_datastruct = multiples.enqueue(
-                    datastruct_savvy,
-                    query_file,
-                    query_file + '_id.txt',
-                    query_file + '_ectyper_serotype.p',
-                    depends_on=job_ectyper,
-                    result_ttl=-1)
+            ret_job_ectyper = ectyper_vf_jobs['job_ectyper_datastruct_vf']
+            jobs[ret_job_ectyper.get_id()] = {
+                'file': single_dict['i'],
+                'analysis': 'Virulence Factors'}
         else:
-            job_ectyper_datastruct = multiples.enqueue(
-                datastruct_savvy, query_file, query_file + '_id.txt', query_file + '_ectyper.p', depends_on=job_ectyper)
-        d = {'job_ectyper': job_ectyper,
-             'job_ectyper_datastruct': job_ectyper_datastruct}
-        # only bother parsing into json if user has requested either vf or
-        # serotype
-        if (single_dict['options']['vf'] or single_dict['options']['serotype']) and not single_dict['options']['bulk']:
-            job_ectyper_beautify = multiples.enqueue(
-                beautify, single_dict, query_file + '_ectyper.p', depends_on=job_ectyper, result_ttl=-1)
-            d.update({'job_ectyper_beautify': job_ectyper_beautify})
-        return d
-
-    # if user selected any ectyper-dependent options on the front-end
-    if single_dict['options']['vf'] or single_dict['options']['serotype']:
-        ectyper_jobs = ectyper_pipeline(singles_q, multiples_q)
-        job_ectyper = ectyper_jobs['job_ectyper']
-        job_ectyper_datastruct = ectyper_jobs['job_ectyper_datastruct']
-        if not single_dict['options']['bulk']:
-            job_ectyper_beautify = ectyper_jobs['job_ectyper_beautify']
-    # or if the backlog queue is enabled
+            ret_job_ectyper = ectyper_vf_jobs['job_ectyper_beautify_vf']
+            jobs[ret_job_ectyper.get_id()] = {
+                'file': single_dict['i'],
+                'analysis': 'Virulence Factors'}
     elif config.BACKLOG_ENABLED:
-        # we need to create a dict with these options enabled:
-
-        # just enqueue the jobs, we don't care about returning them
-        ectyper_jobs = ectyper_pipeline(backlog_singles_q, backlog_multiples_q)
-        job_ectyper_datastruct = ectyper_jobs['job_ectyper_datastruct']
+        # We need to create a dict with the options enabled.
+        backlog_d = copy.deepcopy(single_dict)
+        backlog_d['options']['vf'] = True
+        # Explictedly set serotype to false in case of overlap.
+        backlog_d['options']['serotype'] = False
+        # Note: we use different queues.
+         _ectyper_pipeline_vf(
+            backlog_singles_q,
+            backlog_multiples_q,
+            query_file,
+            backlog_d
+        )
+
+    # Serotype
+    if single_dict['options']['serotype']:
+        ectyper_serotype_jobs = _ectyper_pipeline_serotype(
+            singles_q,
+            multiples_q,
+            query_file,
+            single_dict
+        )
+        if single_dict['options']['bulk']:
+            ret_job_ectyper = ectyper_serotype_jobs['job_ectyper_datastruct_serotype']
+            jobs[ret_job_ectyper.get_id()] = {
+                'file': single_dict['i'],
+                'analysis': 'Serotype'}
+        else:
+            ret_job_ectyper = ectyper_serotype_jobs['job_ectyper_beautify_serotype']
+            jobs[ret_job_ectyper.get_id()] = {
+                'file': single_dict['i'],
+                'analysis': 'Virulence Factors'}
+    elif config.BACKLOG_ENABLED:
+        # We need to create a dict with the options enabled.
+        backlog_d = copy.deepcopy(single_dict)
+        # Explictedly set vf to false in case of overlap.
+        backlog_d['options']['vf'] = False
+        backlog_d['options']['serotype'] = True
+        _ectyper_pipeline_serotype(
+           backlog_singles_q,
+           backlog_multiples_q,
+           query_file,
+           backlog_d
+       )
     # END ECTYPER PIPELINE
 
     # AMR PIPELINE
@@ -241,20 +329,11 @@ def phylotyper_pipeline(multiples, subtype):
     # to poll for completion of all jobs
     # these two ifs handle the case where amr (or vf or serotype) might not
     # be selected but bulk is
-    if (single_dict['options']['vf'] or single_dict['options']['serotype']):
-        ret_job_ectyper = job_ectyper_datastruct
     if single_dict['options']['amr']:
         ret_job_amr = job_amr_datastruct
-    # if bulk uploading isnt used, return the beautify result as the final task
-    if not single_dict['options']['bulk']:
-        if (single_dict['options']['vf'] or single_dict['options']['serotype']):
-            ret_job_ectyper = job_ectyper_beautify
-        if single_dict['options']['amr']:
-            ret_job_amr = job_amr_beautify
-    # add the jobs to the return
-    if (single_dict['options']['vf'] or single_dict['options']['serotype']):
-        jobs[ret_job_ectyper.get_id()] = {'file': single_dict[
-            'i'], 'analysis': 'Virulence Factors and Serotype'}
+
+    # Add the jobs to the return.
+    # TODO: incorporate this into pipeline calls, as in the ECTYper pipeline.
     if single_dict['options']['amr']:
         jobs[ret_job_amr.get_id()] = {'file': single_dict[
             'i'], 'analysis': 'Antimicrobial Resistance'}

From 5296c23e5698cdd769ea0537f774ff5aca91624b Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Wed, 14 Feb 2018 19:47:56 -0500
Subject: [PATCH 022/122] FIX: reverse order of beautify params

---
 app/middleware/display/beautify.py | 2 +-
 app/scripts/savvy.py               | 4 ++--
 app/tests/test_beautify.py         | 6 +++---
 app/tests/test_modules.py          | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/app/middleware/display/beautify.py b/app/middleware/display/beautify.py
index c93b3f69..b4980276 100644
--- a/app/middleware/display/beautify.py
+++ b/app/middleware/display/beautify.py
@@ -131,7 +131,7 @@ def handle_failed(json_r, args_dict):
     return ret
 
 # TODO: convert this to models-only.
-def beautify(args_dict=None, pickled_result):
+def beautify(pickled_result, args_dict=None):
     '''
     Converts a given 'spit' datum (a dictionary with our results from rgi/ectyper) to a json form used by the frontend. This result is to be stored in Redis by the calling RQ Worker.
     :param args_dict: The arguments supplied by the user. In the case of spfy web-app, this is used to determine which analysis options were set.
diff --git a/app/scripts/savvy.py b/app/scripts/savvy.py
index bf037cf2..05aca0c9 100755
--- a/app/scripts/savvy.py
+++ b/app/scripts/savvy.py
@@ -123,7 +123,7 @@ def write_json(json_r, analysis):
     log.debug("Pickled ECTyper File: " + ectyper_p)
 
     # (4) ECTyper Beautify Step:
-    ectyper_beautify = beautify(args_dict, ectyper_p)
+    ectyper_beautify = beautify(ectyper_p, args_dict)
     log.debug('Beautified ECTyper Result: ' + str(ectyper_beautify))
     ectyper_json = write_json(ectyper_beautify, 'ectyper')
 
@@ -142,7 +142,7 @@ def write_json(json_r, analysis):
     log.debug("Pickled AMR Results File: " + amr_p)
 
     # (8) AMR Beautify Step:
-    amr_beautify = beautify(args_dict, amr_p)
+    amr_beautify = beautify(amr_p, args_dict)
     log.debug('Beautified AMR Result: ' + str(amr_beautify))
     amr_json = write_json(amr_beautify, 'rgi')
 
diff --git a/app/tests/test_beautify.py b/app/tests/test_beautify.py
index ea7546f0..6b97a814 100644
--- a/app/tests/test_beautify.py
+++ b/app/tests/test_beautify.py
@@ -13,7 +13,7 @@ def test_beautify_vf_serotype():
     ## test vf & serotype json return
     single_dict = dict(ARGS_DICT)
     single_dict.update({'i': vf_serotype_gene_dict})
-    assert len(beautify(single_dict, vf_serotype_gene_dict)) == len(BEAUTIFY_VF_SEROTYPE)
+    assert len(beautify(vf_serotype_gene_dict, single_dict)) == len(BEAUTIFY_VF_SEROTYPE)
 
 def test_beautify_serotype_only():
     ## test serotype only json return
@@ -24,7 +24,7 @@ def test_beautify_serotype_only():
     # this mimicks user selection of serotype only
     single_dict.update({'options':{'vf': False, 'amr': False, 'serotype': True}})
     # beautify is what is actually called by the RQ worker & returned to the user
-    r = beautify(single_dict, vf_serotype_gene_dict)
+    r = beautify(vf_serotype_gene_dict, single_dict)
     assert len(r) == 1
 
 def test_beautify_json_r_serotype_only():
@@ -48,7 +48,7 @@ def test_beautify_amr_only():
     single_dict.update({'i': amr_gene_dict})
     # this mimicks user selection of serotype only
     single_dict.update({'options':{'vf': False, 'amr': True, 'serotype': False}})
-    r = beautify(single_dict, amr_gene_dict)
+    r = beautify(amr_gene_dict, single_dict)
     assert len(r) > 1
 
 def test_beautify_json_r_amr_only():
diff --git a/app/tests/test_modules.py b/app/tests/test_modules.py
index e8ffde76..af9541c7 100644
--- a/app/tests/test_modules.py
+++ b/app/tests/test_modules.py
@@ -72,7 +72,7 @@ def test_ectyper_vf():
         assert type(ectyper_dict) == dict
 
         # beautify ECTyper check
-        json_return = beautify(single_dict, pickled_ectyper_dict)
+        json_return = beautify(pickled_ectyper_dict, single_dict)
         assert type(json_return) == list
 
 def test_ectyper_serotype():
@@ -112,5 +112,5 @@ def test_amr():
         # beautify amr check
         single_dict = dict(ARGS_DICT)
         single_dict.update({'i':ecoli_genome})
-        json_return = beautify(single_dict,pickled_amr_dict)
+        json_return = beautify(pickled_amr_dict, single_dict)
         assert type(json_return) == list

From a48d411915436dae3e24e89bd5a36d5976e2ec36 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Wed, 14 Feb 2018 22:57:17 -0500
Subject: [PATCH 023/122] DEBUG: check the logs if tests fail

---
 .travis.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.travis.yml b/.travis.yml
index 982e5bce..c7fcfa5c 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -48,5 +48,8 @@ install:
 script:
   #### Run Pytest
   - python -m pytest --ignore modules/ectyper/ecoli_serotyping -v
+after_failure:
+  # Check the logs if tests fail.
+  - docker-compose logs webserver
 notifications:
   email: false

From ce63813cba76ee11cf4e56884b8ae259a4dab4ed Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Thu, 15 Feb 2018 10:57:22 -0500
Subject: [PATCH 024/122] FIX: start loading some jobs into the new Jobs class

---
 app/middleware/graphers/datastruct_savvy.py |  2 +-
 app/middleware/models.py                    | 21 ++++----
 app/modules/spfy.py                         | 53 ++++++++++++++-------
 3 files changed, 50 insertions(+), 26 deletions(-)

diff --git a/app/middleware/graphers/datastruct_savvy.py b/app/middleware/graphers/datastruct_savvy.py
index 9d58bd90..5a515e7e 100644
--- a/app/middleware/graphers/datastruct_savvy.py
+++ b/app/middleware/graphers/datastruct_savvy.py
@@ -21,7 +21,7 @@ def _convert_subtyping(graph, model, uriIsolate):
         graph.add((
             uriIsolate,
             gu('ge:0001077'),
-            Literal(serotyper_dict['H_prediction'])
+            Literal(row['H_prediction'])
         ))
     return graph
 
diff --git a/app/middleware/models.py b/app/middleware/models.py
index d17e1a16..3ff05164 100644
--- a/app/middleware/models.py
+++ b/app/middleware/models.py
@@ -2,15 +2,20 @@
 
 
 class SubtypingRow(models.Base):
-  analysis = fields.StringField(required=True)
-  contigid = fields.StringField(required=True)
-  filename = fields.StringField(required=True)
-  hitcutoff = fields.StringField(nullable=True)
-  hitname = fields.StringField(required=True)
-  hitorientation = fields.StringField(nullable=True)
-  hitstart = fields.StringField(nullable=True)
-  hitstop = fields.StringField(nullable=True)
+    analysis = fields.StringField(required=True)
+    contigid = fields.StringField(required=True)
+    filename = fields.StringField(required=True)
+    hitcutoff = fields.StringField(nullable=True)
+    hitname = fields.StringField(required=True)
+    hitorientation = fields.StringField(nullable=True)
+    hitstart = fields.StringField(nullable=True)
+    hitstop = fields.StringField(nullable=True)
 
 
 class SubtypingResult(models.Base):
     rows = fields.ListField([SubtypingRow], nullable=True)
+
+
+class Pipeline(models.Base):
+    jobs = fields.EmbeddedField(dict, default={})
+    single_dict = fields.EmbeddedField(dict, default={})
\ No newline at end of file
diff --git a/app/modules/spfy.py b/app/modules/spfy.py
index 33947c84..3931958f 100644
--- a/app/modules/spfy.py
+++ b/app/modules/spfy.py
@@ -26,6 +26,7 @@
 from middleware.graphers.turtle_grapher import turtle_grapher
 from middleware.graphers.turtle_utils import actual_filename
 from modules.phylotyper import phylotyper
+from middleware.models import Pipeline
 
 from modules.loggingFunctions import initialize_logging
 import logging
@@ -51,12 +52,14 @@
     backlog_multiples_q = Queue(
         'backlog_multiples', connection=redis_conn, default_timeout=config.DEFAULT_TIMEOUT)
 
-def _ectyper_pipeline_vf(singles, multiples, query_file, single_dict):
+def _ectyper_pipeline_vf(singles, multiples, query_file, single_dict, pipeline=None):
     """
     Enqueue all the jobs required for VF.
     """
     # Dictionary of Job instances to return
     d = {}
+    # Alias.
+    job_id = pipeline.jobs['job_id']
 
     # Create a copy of the arguments dictionary and disable Serotype.
     # This copy is passed to the old ECTyper.
@@ -82,7 +85,7 @@ def _ectyper_pipeline_vf(singles, multiples, query_file, single_dict):
         query_file,
         query_file + '_id.txt',
         query_file + '_ectyper_vf.p',
-        depends_on=job_ectyper,
+        depends_on=job_ectyper_vf,
         result_ttl=ttl_value)
     d['job_ectyper_datastruct_vf'] = job_ectyper_datastruct_vf
 
@@ -94,18 +97,20 @@ def _ectyper_pipeline_vf(singles, multiples, query_file, single_dict):
             single_dict,
             query_file + '_ectyper.p',
             depends_on=job_ectyper_vf,
-            result_ttl=-1
+            result_ttl=ttl_value
         )
         d['job_ectyper_beautify_vf'] = job_ectyper_beautify_vf
 
     return d
 
-def _ectyper_pipeline_serotype(singles, multiples, query_file, single_dict):
+def _ectyper_pipeline_serotype(singles, multiples, query_file, single_dict, pipeline=None):
     """
     Enqueue all the jobs required for VF.
     """
     # Dictionary of Job instances to return
     d = {}
+    # Alias.
+    job_id = pipeline.jobs['job_id']
 
     # Create a copy of the arguments dictionary and disable Serotype.
     # This copy is passed to the old ECTyper.
@@ -113,7 +118,7 @@ def _ectyper_pipeline_serotype(singles, multiples, query_file, single_dict):
     # Enqueue the new ECTyper
     job_ectyper_serotype = multiples.enqueue(
         call_ectyper_serotype,
-        single_dict,
+        single_dict_vf,
         depends_on=job_id)
     d['job_ectyper_serotype'] = job_ectyper_serotype
 
@@ -130,8 +135,8 @@ def _ectyper_pipeline_serotype(singles, multiples, query_file, single_dict):
         query_file,
         query_file + '_id.txt',
         query_file + '_ectyper_serotype.p',
-        depends_on=job_ectyper,
-        result_ttl=-1)
+        depends_on=job_ectyper_serotype,
+        result_ttl=ttl_value)
     d['job_ectyper_serotype'] = job_ectyper_datastruct_serotype
 
     if not single_dict['options']['bulk']:
@@ -140,8 +145,8 @@ def _ectyper_pipeline_serotype(singles, multiples, query_file, single_dict):
         job_ectyper_beautify_serotype = multiples.enqueue(
             beautify,
             pickled_result = query_file + '_ectyper_serotype.p',
-            depends_on=job_ectyper_vf,
-            result_ttl=-1
+            depends_on=job_ectyper_serotype,
+            result_ttl=ttl_value
         )
         d['job_ectyper_beautify_serotype'] = job_ectyper_beautify_serotype
 
@@ -161,10 +166,14 @@ def blob_savvy_enqueue(single_dict):
     '''
     jobs = {}
     query_file = single_dict['i']
+    pipeline = Pipeline
+    pipeline.single_dict = copy.deepcopy(single_dict)
 
     job_qc = multiples_q.enqueue(qc, query_file, result_ttl=-1)
+    pipeline.jobs.update({'job_qc':job_qc})
     job_id = blazegraph_q.enqueue(
         write_reserve_id, query_file, depends_on=job_qc, result_ttl=-1)
+    pipeline.jobs.update({'job_id':job_id})
 
     ## ECTyper (VF & Serotype)
     # VF
@@ -173,8 +182,10 @@ def blob_savvy_enqueue(single_dict):
             singles_q,
             multiples_q,
             query_file,
-            single_dict
+            single_dict,
+            pipeline=pipeline
         )
+        pipeline.jobs.update(ectyper_vf_jobs)
         if single_dict['options']['bulk']:
             ret_job_ectyper = ectyper_vf_jobs['job_ectyper_datastruct_vf']
             jobs[ret_job_ectyper.get_id()] = {
@@ -192,11 +203,12 @@ def blob_savvy_enqueue(single_dict):
         # Explictedly set serotype to false in case of overlap.
         backlog_d['options']['serotype'] = False
         # Note: we use different queues.
-         _ectyper_pipeline_vf(
+        _ectyper_pipeline_vf(
             backlog_singles_q,
             backlog_multiples_q,
             query_file,
-            backlog_d
+            backlog_d,
+            pipeline=pipeline
         )
 
     # Serotype
@@ -205,8 +217,10 @@ def blob_savvy_enqueue(single_dict):
             singles_q,
             multiples_q,
             query_file,
-            single_dict
+            single_dict,
+            pipeline=pipeline
         )
+        pipeline.jobs.update(ectyper_serotype_jobs)
         if single_dict['options']['bulk']:
             ret_job_ectyper = ectyper_serotype_jobs['job_ectyper_datastruct_serotype']
             jobs[ret_job_ectyper.get_id()] = {
@@ -227,8 +241,9 @@ def blob_savvy_enqueue(single_dict):
            backlog_singles_q,
            backlog_multiples_q,
            query_file,
-           backlog_d
-       )
+           backlog_d,
+           pipeline=pipeline
+        )
     # END ECTYPER PIPELINE
 
     # AMR PIPELINE
@@ -274,8 +289,12 @@ def phylotyper_pipeline(multiples, subtype):
         picklefile = query_file + jobname + '.p'
 
         job_pt = multiples.enqueue(
-            phylotyper.phylotyper, None, subtype, tsvfile, id_file=query_file + '_id.txt',
-            depends_on=job_ectyper_datastruct)
+            phylotyper.phylotyper,
+            None,
+            subtype,
+            tsvfile,
+            id_file=query_file + '_id.txt',
+            depends_on=pipeline.jobs['job_ectyper_datastruct_vf'])
         job_pt_dict = multiples.enqueue(
             phylotyper.to_dict, tsvfile, subtype, picklefile,
             depends_on=job_pt)

From 49549b8527a343dc868473c61727c09adeef85c5 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Thu, 15 Feb 2018 13:25:00 -0500
Subject: [PATCH 025/122] FIX: embed regular dicts for now

---
 app/middleware/models.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/app/middleware/models.py b/app/middleware/models.py
index 3ff05164..182442ca 100644
--- a/app/middleware/models.py
+++ b/app/middleware/models.py
@@ -15,7 +15,6 @@ class SubtypingRow(models.Base):
 class SubtypingResult(models.Base):
     rows = fields.ListField([SubtypingRow], nullable=True)
 
-
 class Pipeline(models.Base):
-    jobs = fields.EmbeddedField(dict, default={})
-    single_dict = fields.EmbeddedField(dict, default={})
\ No newline at end of file
+    jobs = {}
+    single_dict = fields.EmbeddedField(dict, default={})

From b040d932c3edd62ad705761fcd4c270cdee0f7db Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Thu, 15 Feb 2018 13:25:13 -0500
Subject: [PATCH 026/122] FIX: embed regular dicts for now

---
 app/middleware/models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/middleware/models.py b/app/middleware/models.py
index 182442ca..f584bcd7 100644
--- a/app/middleware/models.py
+++ b/app/middleware/models.py
@@ -17,4 +17,4 @@ class SubtypingResult(models.Base):
 
 class Pipeline(models.Base):
     jobs = {}
-    single_dict = fields.EmbeddedField(dict, default={})
+    single_dict = {}

From df975f9eb91a344a253ae481a045b6cda59b0da5 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Thu, 15 Feb 2018 13:37:21 -0500
Subject: [PATCH 027/122] FIX: order of calls for beautify

---
 app/modules/spfy.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/app/modules/spfy.py b/app/modules/spfy.py
index 3931958f..9ea2993c 100644
--- a/app/modules/spfy.py
+++ b/app/modules/spfy.py
@@ -94,8 +94,8 @@ def _ectyper_pipeline_vf(singles, multiples, query_file, single_dict, pipeline=N
         # serotype, and we're not in bulk uploading.
         job_ectyper_beautify_vf = multiples.enqueue(
             beautify,
-            single_dict,
             query_file + '_ectyper.p',
+            single_dict,
             depends_on=job_ectyper_vf,
             result_ttl=ttl_value
         )
@@ -266,7 +266,11 @@ def amr_pipeline(multiples):
         # blazegraph
         if single_dict['options']['amr'] and not single_dict['options']['bulk']:
             job_amr_beautify = multiples.enqueue(
-                beautify, single_dict, query_file + '_rgi.tsv_rgi.p', depends_on=job_amr_dict, result_ttl=-1)
+                beautify,
+                query_file + '_rgi.tsv_rgi.p',
+                single_dict,
+                depends_on=job_amr_dict,
+                result_ttl=-1)
             d.update({'job_amr_beautify': job_amr_beautify})
         return d
 

From 988bee6667d25b00f04cc3badfe470f5c463c0c1 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Thu, 15 Feb 2018 13:41:37 -0500
Subject: [PATCH 028/122] FIX: typo

---
 app/modules/ectyper/call_ectyper.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/modules/ectyper/call_ectyper.py b/app/modules/ectyper/call_ectyper.py
index 30838a5d..a7bb2aab 100644
--- a/app/modules/ectyper/call_ectyper.py
+++ b/app/modules/ectyper/call_ectyper.py
@@ -91,7 +91,7 @@ def call_ectyper_serotype(args_dict):
             output_file=output_file
         )
         # Path for the pickle dump.
-        p = genome_file + '_ectyper_vf.p'
+        p = genome_file + '_ectyper_serotype.p'
         pickle.dump(subtyping_result,open(p,'wb'))
         return p
     else:

From 0ea77017a659fe0b0e1b2b3ea1d293b028c93a95 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Thu, 15 Feb 2018 22:21:29 -0500
Subject: [PATCH 029/122] FIX: more typos

---
 app/modules/spfy.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/modules/spfy.py b/app/modules/spfy.py
index 9ea2993c..c74be7ee 100644
--- a/app/modules/spfy.py
+++ b/app/modules/spfy.py
@@ -94,7 +94,7 @@ def _ectyper_pipeline_vf(singles, multiples, query_file, single_dict, pipeline=N
         # serotype, and we're not in bulk uploading.
         job_ectyper_beautify_vf = multiples.enqueue(
             beautify,
-            query_file + '_ectyper.p',
+            query_file + '_ectyper_vf.p',
             single_dict,
             depends_on=job_ectyper_vf,
             result_ttl=ttl_value

From eb2795f98bb4fdcf6078d99a225906bdb2794db2 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Fri, 16 Feb 2018 11:49:45 -0500
Subject: [PATCH 030/122] FIX: the depends_on check in RQ doesnt validate if
 its called on an actual jobs

---
 app/modules/spfy.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/app/modules/spfy.py b/app/modules/spfy.py
index c74be7ee..3567af98 100644
--- a/app/modules/spfy.py
+++ b/app/modules/spfy.py
@@ -36,10 +36,9 @@
 logger = logging.getLogger(__name__)
 
 
-# the only ONE time for global variables
-# when naming queues, make sure you actually set a worker to listen to that queue
+# When naming queues, make sure you set a worker to listen to that queue
 # we use the high priority queue for things that should be immediately
-# returned to the user
+# returned to the user.
 redis_url = config.REDIS_URL
 redis_conn = redis.from_url(redis_url)
 singles_q = Queue('singles', connection=redis_conn)
@@ -101,6 +100,8 @@ def _ectyper_pipeline_vf(singles, multiples, query_file, single_dict, pipeline=N
         )
         d['job_ectyper_beautify_vf'] = job_ectyper_beautify_vf
 
+    # Mutate the jobs pipeline from the calling function.
+    pipeline.jobs.update(d)
     return d
 
 def _ectyper_pipeline_serotype(singles, multiples, query_file, single_dict, pipeline=None):
@@ -150,6 +151,8 @@ def _ectyper_pipeline_serotype(singles, multiples, query_file, single_dict, pipe
         )
         d['job_ectyper_beautify_serotype'] = job_ectyper_beautify_serotype
 
+    # Mutate the jobs pipeline from the calling function.
+    pipeline.jobs.update(d)
     return d
 
 def blob_savvy_enqueue(single_dict):
@@ -166,7 +169,7 @@ def blob_savvy_enqueue(single_dict):
     '''
     jobs = {}
     query_file = single_dict['i']
-    pipeline = Pipeline
+    pipeline = Pipeline()
     pipeline.single_dict = copy.deepcopy(single_dict)
 
     job_qc = multiples_q.enqueue(qc, query_file, result_ttl=-1)
@@ -185,7 +188,7 @@ def blob_savvy_enqueue(single_dict):
             single_dict,
             pipeline=pipeline
         )
-        pipeline.jobs.update(ectyper_vf_jobs)
+        # pipeline.jobs.update(ectyper_vf_jobs)
         if single_dict['options']['bulk']:
             ret_job_ectyper = ectyper_vf_jobs['job_ectyper_datastruct_vf']
             jobs[ret_job_ectyper.get_id()] = {
@@ -220,7 +223,7 @@ def blob_savvy_enqueue(single_dict):
             single_dict,
             pipeline=pipeline
         )
-        pipeline.jobs.update(ectyper_serotype_jobs)
+        # pipeline.jobs.update(ectyper_serotype_jobs)
         if single_dict['options']['bulk']:
             ret_job_ectyper = ectyper_serotype_jobs['job_ectyper_datastruct_serotype']
             jobs[ret_job_ectyper.get_id()] = {

From 590aa4391731f9dae5ab254fa5985b76e6c2db3b Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Fri, 16 Feb 2018 12:34:51 -0500
Subject: [PATCH 031/122] CHANGE: use regular python classes instead of
 inheriting from jsonmodels.models.Base

---
 app/middleware/models.py | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/app/middleware/models.py b/app/middleware/models.py
index f584bcd7..614ed5ee 100644
--- a/app/middleware/models.py
+++ b/app/middleware/models.py
@@ -15,6 +15,17 @@ class SubtypingRow(models.Base):
 class SubtypingResult(models.Base):
     rows = fields.ListField([SubtypingRow], nullable=True)
 
-class Pipeline(models.Base):
-    jobs = {}
-    single_dict = {}
+class Job():
+    def __init__(self, job, transitory=True, display=False):
+        self.job = job # an instance of the RQ Job class
+        self.transitory = # if the job won't persist in Redis DB
+        self.display = # used for display to the front-end
+        
+class Pipeline():
+    def __init__(self, jobs=None, single_dict=None):
+        if not jobs:
+            jobs = {}
+        if not single_dict:
+            single_dict = {}
+        self.jobs = {}
+        self.single_dict = {}

From 8efb9fc07291f12f713226e854097749de4efc51 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Fri, 16 Feb 2018 12:44:17 -0500
Subject: [PATCH 032/122] CHANGE: use regular python classes instead of
 inheriting from jsonmodels.models.Base

---
 app/middleware/models.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/app/middleware/models.py b/app/middleware/models.py
index 614ed5ee..8cbcbc30 100644
--- a/app/middleware/models.py
+++ b/app/middleware/models.py
@@ -16,16 +16,16 @@ class SubtypingResult(models.Base):
     rows = fields.ListField([SubtypingRow], nullable=True)
 
 class Job():
-    def __init__(self, job, transitory=True, display=False):
+    def __init__(self, rq_job, transitory=True, display=False):
         self.job = job # an instance of the RQ Job class
-        self.transitory = # if the job won't persist in Redis DB
-        self.display = # used for display to the front-end
-        
+        self.transitory = transitory # if the job won't persist in Redis DB
+        self.display = display # used for display to the front-end
+
 class Pipeline():
     def __init__(self, jobs=None, single_dict=None):
         if not jobs:
             jobs = {}
         if not single_dict:
             single_dict = {}
-        self.jobs = {}
+        self.jobs = {} # {'somename': instance of RQ.Job}
         self.single_dict = {}

From 81cf5b8962c4415d7262991e2b2f265f48cf24af Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Fri, 16 Feb 2018 16:42:11 -0500
Subject: [PATCH 033/122] ADD: some tests for the pipeline signatures + CHANGE:
 moved pipeline creation up one level

---
 app/middleware/models.py | 119 ++++++++++++++++++++++++++++++++++++---
 app/modules/spfy.py      |  21 ++++---
 app/routes/ra_posts.py   |  11 +++-
 app/tests/test_models.py |  50 +++++++++++++++-
 4 files changed, 181 insertions(+), 20 deletions(-)

diff --git a/app/middleware/models.py b/app/middleware/models.py
index 8cbcbc30..de4fe598 100644
--- a/app/middleware/models.py
+++ b/app/middleware/models.py
@@ -1,4 +1,10 @@
+import sys
+from hashlib import sha1
+from dis import dis
+from StringIO import StringIO
 from jsonmodels import models, fields
+from middleware.graphing.turtle_utils import actual_filename
+from middleware.display.beautify import model_to_json
 
 
 class SubtypingRow(models.Base):
@@ -15,17 +21,114 @@ class SubtypingRow(models.Base):
 class SubtypingResult(models.Base):
     rows = fields.ListField([SubtypingRow], nullable=True)
 
+
 class Job():
-    def __init__(self, rq_job, transitory=True, display=False):
-        self.job = job # an instance of the RQ Job class
-        self.transitory = transitory # if the job won't persist in Redis DB
-        self.display = display # used for display to the front-end
+    def __init__(self, rq_job, transitory=True, backlog=True, display=False):
+        """
+        Args:
+            rq_job: An instance of the RQ Job class.
+            transitory: Some intermediate, we only care if it failed. It's ok
+                if the job isn't found in Redis.
+            backlog: For background processing, we don't care whatsoever. Will
+                still be caught by Sentry.io if it fails.
+            display: To per parsed for the front-end.
+        """
+        self.rq_job = rq_job
+        self.transitory = transitory
+        self.backlog = backlog
+        self.display = display
 
 class Pipeline():
-    def __init__(self, jobs=None, single_dict=None):
+    def __init__(self, jobs=None, files=None, func=None, options=None):
         if not jobs:
             jobs = {}
-        if not single_dict:
-            single_dict = {}
+        if not files:
+            files = []
+        if not options:
+            options = {}
         self.jobs = {} # {'somename': instance of RQ.Job}
-        self.single_dict = {}
+        self.sig = None # Signtaure isn't generated until necessary
+        # TODO: incorporate below into the pipeline.
+        self.files = []
+        self.func = func # Additional attribute for storing pipeline function.
+        self.options = None
+
+    def complete(self):
+        """
+        Check if all jobs are completed
+        """
+        for j in jobs.itervalues():
+            rq_job = j.rq_job
+            if j.backlog:
+                # Some backlog job, we don't care (though Sentry will catch it).
+                continue
+            elif rq_job.is_failed:
+                # If the job failed, return the error.
+                return rq_job.exc_info
+            elif not job.is_finished:
+                # One of the jobs hasn't finished.
+                return False
+        return True
+
+    def to_json(self):
+        """
+        Reduces all results from self.jobs to json for return.
+        """
+        # Gather all the jobs that have finished and haven't failed.
+        completed_jobs = [
+            j.rq_job for j in jobs.itervalues()
+            if j.display and j.rq_job.is_finished and not j.rq_job.is_failed
+        ]
+        # Merge the json lists together.
+        l = []
+        for rq_job in completed_jobs:
+            model = rq_job.result
+            list_json = model_to_json(model)
+            l += list_json
+        return l
+
+    def _function_signature(self):
+        """
+        Generates signatures for functions.
+        """
+        # dis.dis() sends output to stdout, we need to capture it to generate
+        # a signature.
+
+        # Assign the old stdout.
+        old_stdout = sys.stdout
+        # Create a buffer for the new output.
+        result = StringIO()
+        # Swap the stdout to our buffer.
+        sys.stdout = result
+        # dis() call.
+        dis(self.func)
+        # Restore the stdout to screen.
+        sys.stdout = old_stdout
+        # Grab the output from the dis() call.
+        result_string = result.getvalue()
+        return result_string
+
+    def signature(self):
+        """
+        Create a signature that can identify a given task. Used to check
+        if the same task was requested.
+        """
+        # Create a string of the function signature.
+        str_func = self._function_signature()
+        # Start the hashing process with the function signature.
+        hx = sha1(str_func)
+
+        # Create a string of the files.
+        str_files = str(self.files)
+        # Update the hash with our args information.
+        hx.update(str_files)
+
+        # Create a string of the options.
+        str_options = str(self.options)
+        # Update the hash with our args information.
+        hx.update(str_args)
+
+        # Use the hexdigest as the signature.
+        sig = hx.hexdigest()
+        self.sig = sig
+        return sig
diff --git a/app/modules/spfy.py b/app/modules/spfy.py
index 3567af98..d98694de 100644
--- a/app/modules/spfy.py
+++ b/app/modules/spfy.py
@@ -155,7 +155,7 @@ def _ectyper_pipeline_serotype(singles, multiples, query_file, single_dict, pipe
     pipeline.jobs.update(d)
     return d
 
-def blob_savvy_enqueue(single_dict):
+def blob_savvy_enqueue(single_dict, pipeline):
     '''
     Handles enqueueing of single file to multiple queues.
     :param f: a fasta file
@@ -169,9 +169,7 @@ def blob_savvy_enqueue(single_dict):
     '''
     jobs = {}
     query_file = single_dict['i']
-    pipeline = Pipeline()
-    pipeline.single_dict = copy.deepcopy(single_dict)
-
+    
     job_qc = multiples_q.enqueue(qc, query_file, result_ttl=-1)
     pipeline.jobs.update({'job_qc':job_qc})
     job_id = blazegraph_q.enqueue(
@@ -376,23 +374,28 @@ def phylotyper_pipeline(multiples, subtype):
     return jobs
 
 
-def blob_savvy(args_dict):
+def blob_savvy(args_dict, pipeline):
     '''
-    Handles enqueuing of all files in a given directory or just a single file
+    Handles enqueuing of all files in a given directory or just a single file.
     '''
     d = {}
     if os.path.isdir(args_dict['i']):
         for f in os.listdir(args_dict['i']):
             single_dict = dict(args_dict.items() +
                                {'i': os.path.join(args_dict['i'], f)}.items())
-            d.update(blob_savvy_enqueue(single_dict))
+            d.update(
+                blob_savvy_enqueue(
+                    single_dict,
+                    pipeline
+                )
+            )
     else:
         d.update(blob_savvy_enqueue(args_dict))
 
     return d
 
 
-def spfy(args_dict):
+def spfy(args_dict, pipeline):
     '''
     '''
     # abs path resolution should be handled in spfy.py
@@ -400,6 +403,6 @@ def spfy(args_dict):
 
     #print 'Starting blob_savvy call'
     #logger.info('args_dict: ' + str(args_dict))
-    jobs_dict = blob_savvy(args_dict)
+    jobs_dict = blob_savvy(args_dict, pipeline)
 
     return jobs_dict
diff --git a/app/routes/ra_posts.py b/app/routes/ra_posts.py
index 93440141..ab6d9f8f 100644
--- a/app/routes/ra_posts.py
+++ b/app/routes/ra_posts.py
@@ -17,6 +17,7 @@
 from modules.gc import blob_gc_enqueue
 from modules.spfy import spfy
 from middleware.api import subtyping_dependencies
+from middleware.models import Pipeline
 
 bp_ra_posts = Blueprint('reactapp_posts', __name__)
 
@@ -234,6 +235,12 @@ def upload():
         now = now.strftime("%Y-%m-%d-%H-%M-%S-%f")
         jobs_dict = {}
 
+        pipeline = Pipeline(
+            files = uploaded_files,
+            func = spfy,
+            options = options
+        )
+
         for file in uploaded_files:
             if file:
                 # for saving file
@@ -250,7 +257,9 @@ def upload():
 
                 # for enqueing task
                 jobs_enqueued = spfy(
-                    {'i': filename, 'pi':options['pi'], 'options':options})
+                    args_dict = {'i': filename, 'pi':options['pi'], 'options':options},
+                    pipeline = pipeline
+                )
                 jobs_dict.update(jobs_enqueued)
         # new in 4.2.0
         print 'upload(): all files enqueued, returning...'
diff --git a/app/tests/test_models.py b/app/tests/test_models.py
index 526e784a..9384ae18 100644
--- a/app/tests/test_models.py
+++ b/app/tests/test_models.py
@@ -1,5 +1,8 @@
-from middleware.models import SubtypingRow, SubtypingResult
-from tests.constants import BEAUTIFY_VF_SEROTYPE
+from middleware.models import SubtypingRow, SubtypingResult, Pipeline
+from modules.spfy import spfy
+from scripts.savvy import savvy
+from tests.constants import BEAUTIFY_VF_SEROTYPE, ARGS_DICT
+
 
 def test_subtyping_model_direct():
     """
@@ -21,3 +24,46 @@ def test_subtyping_model_direct():
         rows = subtyping_list
     )
     subtyping_result.validate()
+
+def test_pipeline_model_signature():
+    """
+    Function signatures should be identical if called on the same function.
+    """
+    p1 = Pipeline(
+        func = spfy,
+        options = ARGS_DICT
+    )
+    p2 = Pipeline(
+        func = spfy,
+        options = ARGS_DICT
+    )
+    r1 = p1.signature()
+    r2 = p2.signature()
+    # These are identical pipelines, should be equal.
+    assert r1 == r2
+
+    p1 = Pipeline(
+        func = spfy,
+        options = ARGS_DICT
+    )
+    p2 = Pipeline(
+        func = savvy,
+        options = ARGS_DICT
+    )
+    r1 = p1.signature()
+    r2 = p2.signature()
+    # These pipelines have different functions, should be different.
+    assert r1 != r2
+
+    p1 = Pipeline(
+        func = spfy,
+        options = ARGS_DICT
+    )
+    p2 = Pipeline(
+        func = spfy,
+        options = {'cats':1}
+    )
+    r1 = p1.signature()
+    r2 = p2.signature()
+    # These pipelines have different options, should be different.
+    assert r1 != r2

From 30055a56a3dc3a28119fa9d3f431b974222c32dc Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Fri, 16 Feb 2018 17:07:41 -0500
Subject: [PATCH 034/122] FIX: imports

---
 app/middleware/models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/middleware/models.py b/app/middleware/models.py
index de4fe598..3596be13 100644
--- a/app/middleware/models.py
+++ b/app/middleware/models.py
@@ -3,7 +3,7 @@
 from dis import dis
 from StringIO import StringIO
 from jsonmodels import models, fields
-from middleware.graphing.turtle_utils import actual_filename
+from middleware.graphers.turtle_utils import actual_filename
 from middleware.display.beautify import model_to_json
 
 

From c844fa3afc7ba91c0f7276b6f34159980cc2325f Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Sat, 17 Feb 2018 18:43:01 -0500
Subject: [PATCH 035/122] FIX: circular imports

---
 app/middleware/display/beautify.py | 23 +----------------------
 app/middleware/models.py           | 28 +++++++++++++++++++++++-----
 app/modules/spfy.py                |  3 +--
 3 files changed, 25 insertions(+), 29 deletions(-)

diff --git a/app/middleware/display/beautify.py b/app/middleware/display/beautify.py
index b4980276..f51fb232 100644
--- a/app/middleware/display/beautify.py
+++ b/app/middleware/display/beautify.py
@@ -1,37 +1,16 @@
 import logging
 import pandas as pd
 import cPickle as pickle
-from os.path import basename
 from modules.loggingFunctions import initialize_logging
 from middleware.display.find_widest import check_alleles
 from middleware.graphers.turtle_utils import actual_filename
-from middleware.models import SubtypingResult
+from middleware.models import SubtypingResult, model_to_json
 
 # logging
 log_file = initialize_logging()
 log = logging.getLogger(__name__)
 
 
-def _convert_subtyping(model):
-    # Convert the model to a generic JSON structure.
-    struct = model.to_struct()
-    # This is not strictly json; more like a list than a dict structure.
-    rows_list = struct['rows']
-    return rows_list
-
-def model_to_json(model):
-    """
-    Converts models to json for the front-end.
-    """
-    # Validate the model submitted before processing.
-    model.validate()
-    # Conversion.
-    if isinstance(model, SubtypingResult):
-        return _convert_subtyping(model)
-    else:
-        raise Exception('model_to_json() called for a model without a handler.')
-
-
 def json_return(args_dict, gene_dict):
     """
     This converts the gene dict into a json format for return to the front end
diff --git a/app/middleware/models.py b/app/middleware/models.py
index 3596be13..1e1e3456 100644
--- a/app/middleware/models.py
+++ b/app/middleware/models.py
@@ -4,7 +4,25 @@
 from StringIO import StringIO
 from jsonmodels import models, fields
 from middleware.graphers.turtle_utils import actual_filename
-from middleware.display.beautify import model_to_json
+
+def _convert_subtyping(model):
+    # Convert the model to a generic JSON structure.
+    struct = model.to_struct()
+    # This is not strictly json; more like a list than a dict structure.
+    rows_list = struct['rows']
+    return rows_list
+
+def model_to_json(model):
+    """
+    Converts models to json for the front-end.
+    """
+    # Validate the model submitted before processing.
+    model.validate()
+    # Conversion.
+    if isinstance(model, SubtypingResult):
+        return _convert_subtyping(model)
+    else:
+        raise Exception('model_to_json() called for a model without a handler.')
 
 
 class SubtypingRow(models.Base):
@@ -57,7 +75,7 @@ def complete(self):
         """
         Check if all jobs are completed
         """
-        for j in jobs.itervalues():
+        for j in self.jobs.itervalues():
             rq_job = j.rq_job
             if j.backlog:
                 # Some backlog job, we don't care (though Sentry will catch it).
@@ -65,7 +83,7 @@ def complete(self):
             elif rq_job.is_failed:
                 # If the job failed, return the error.
                 return rq_job.exc_info
-            elif not job.is_finished:
+            elif not rq_job.is_finished:
                 # One of the jobs hasn't finished.
                 return False
         return True
@@ -76,7 +94,7 @@ def to_json(self):
         """
         # Gather all the jobs that have finished and haven't failed.
         completed_jobs = [
-            j.rq_job for j in jobs.itervalues()
+            j.rq_job for j in self.jobs.itervalues()
             if j.display and j.rq_job.is_finished and not j.rq_job.is_failed
         ]
         # Merge the json lists together.
@@ -126,7 +144,7 @@ def signature(self):
         # Create a string of the options.
         str_options = str(self.options)
         # Update the hash with our args information.
-        hx.update(str_args)
+        hx.update(str_options)
 
         # Use the hexdigest as the signature.
         sig = hx.hexdigest()
diff --git a/app/modules/spfy.py b/app/modules/spfy.py
index d98694de..627473bd 100644
--- a/app/modules/spfy.py
+++ b/app/modules/spfy.py
@@ -21,12 +21,11 @@
 from modules.ectyper.call_ectyper import call_ectyper_vf, call_ectyper_serotype
 from modules.amr.amr import amr
 from modules.amr.amr_to_dict import amr_to_dict
-from middleware.display.beautify import beautify, model_to_json
+from middleware.display.beautify import beautify
 from middleware.graphers.datastruct_savvy import datastruct_savvy
 from middleware.graphers.turtle_grapher import turtle_grapher
 from middleware.graphers.turtle_utils import actual_filename
 from modules.phylotyper import phylotyper
-from middleware.models import Pipeline
 
 from modules.loggingFunctions import initialize_logging
 import logging

From 2698689b86c1de9adfbfdeef57979df95a1f2a34 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Sat, 17 Feb 2018 19:13:29 -0500
Subject: [PATCH 036/122] FIX: wasnt aaving options

---
 app/middleware/models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/middleware/models.py b/app/middleware/models.py
index 1e1e3456..4cfd58de 100644
--- a/app/middleware/models.py
+++ b/app/middleware/models.py
@@ -69,7 +69,7 @@ def __init__(self, jobs=None, files=None, func=None, options=None):
         # TODO: incorporate below into the pipeline.
         self.files = []
         self.func = func # Additional attribute for storing pipeline function.
-        self.options = None
+        self.options = options
 
     def complete(self):
         """

From 2583b4ea1b76b822096499c3d95b92f877a7f1ea Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Sat, 17 Feb 2018 20:15:01 -0500
Subject: [PATCH 037/122] CHANGE: switch to Job class in Pipeline + some tests
 for it

---
 app/middleware/models.py |  3 +-
 app/modules/spfy.py      | 90 ++++++++++++++++++++++++++++++++++------
 app/tests/test_models.py | 24 ++++++++++-
 3 files changed, 103 insertions(+), 14 deletions(-)

diff --git a/app/middleware/models.py b/app/middleware/models.py
index 4cfd58de..51fc3379 100644
--- a/app/middleware/models.py
+++ b/app/middleware/models.py
@@ -41,7 +41,7 @@ class SubtypingResult(models.Base):
 
 
 class Job():
-    def __init__(self, rq_job, transitory=True, backlog=True, display=False):
+    def __init__(self, rq_job, name="", transitory=True, backlog=True, display=False):
         """
         Args:
             rq_job: An instance of the RQ Job class.
@@ -52,6 +52,7 @@ def __init__(self, rq_job, transitory=True, backlog=True, display=False):
             display: To per parsed for the front-end.
         """
         self.rq_job = rq_job
+        self.name = name
         self.transitory = transitory
         self.backlog = backlog
         self.display = display
diff --git a/app/modules/spfy.py b/app/modules/spfy.py
index 627473bd..9ef22f11 100644
--- a/app/modules/spfy.py
+++ b/app/modules/spfy.py
@@ -26,6 +26,7 @@
 from middleware.graphers.turtle_grapher import turtle_grapher
 from middleware.graphers.turtle_utils import actual_filename
 from modules.phylotyper import phylotyper
+from middleware.models import Job
 
 from modules.loggingFunctions import initialize_logging
 import logging
@@ -57,7 +58,7 @@ def _ectyper_pipeline_vf(singles, multiples, query_file, single_dict, pipeline=N
     # Dictionary of Job instances to return
     d = {}
     # Alias.
-    job_id = pipeline.jobs['job_id']
+    job_id = pipeline.jobs['job_id'].rq_job
 
     # Create a copy of the arguments dictionary and disable Serotype.
     # This copy is passed to the old ECTyper.
@@ -68,7 +69,17 @@ def _ectyper_pipeline_vf(singles, multiples, query_file, single_dict, pipeline=N
         call_ectyper_vf,
         single_dict_vf,
         depends_on=job_id)
+    # TODO: this is double, switch everything to pipeline once tested
     d['job_ectyper_vf'] = job_ectyper_vf
+    pipeline.jobs.update({
+        'job_ectyper_vf': Job(
+            rq_job=job_ectyper_vf,
+            name='job_ectyper_vf',
+            transitory=True,
+            backlog=False,
+            display=False
+        )
+    })
 
     # If bulk uploading is set, we return the datastruct as the end task
     # to poll for job completion, therefore must set ttl of -1.
@@ -86,6 +97,15 @@ def _ectyper_pipeline_vf(singles, multiples, query_file, single_dict, pipeline=N
         depends_on=job_ectyper_vf,
         result_ttl=ttl_value)
     d['job_ectyper_datastruct_vf'] = job_ectyper_datastruct_vf
+    pipeline.jobs.update({
+        'job_ectyper_datastruct_vf': Job(
+            rq_job=job_ectyper_datastruct_vf,
+            name='job_ectyper_datastruct_vf',
+            transitory=True,
+            backlog=False,
+            display=False
+        )
+    })
 
     if not single_dict['options']['bulk']:
         # Only bother parsing into json if user has requested either vf or
@@ -98,9 +118,15 @@ def _ectyper_pipeline_vf(singles, multiples, query_file, single_dict, pipeline=N
             result_ttl=ttl_value
         )
         d['job_ectyper_beautify_vf'] = job_ectyper_beautify_vf
-
-    # Mutate the jobs pipeline from the calling function.
-    pipeline.jobs.update(d)
+        pipeline.jobs.update({
+            'job_ectyper_beautify_vf': Job(
+                rq_job=job_ectyper_beautify_vf,
+                name='job_ectyper_beautify_vf',
+                transitory=True,
+                backlog=False,
+                display=True
+            )
+        })
     return d
 
 def _ectyper_pipeline_serotype(singles, multiples, query_file, single_dict, pipeline=None):
@@ -110,7 +136,7 @@ def _ectyper_pipeline_serotype(singles, multiples, query_file, single_dict, pipe
     # Dictionary of Job instances to return
     d = {}
     # Alias.
-    job_id = pipeline.jobs['job_id']
+    job_id = pipeline.jobs['job_id'].rq_job
 
     # Create a copy of the arguments dictionary and disable Serotype.
     # This copy is passed to the old ECTyper.
@@ -121,6 +147,15 @@ def _ectyper_pipeline_serotype(singles, multiples, query_file, single_dict, pipe
         single_dict_vf,
         depends_on=job_id)
     d['job_ectyper_serotype'] = job_ectyper_serotype
+    pipeline.jobs.update({
+        'job_ectyper_serotype': Job(
+            rq_job=job_ectyper_serotype,
+            name='job_ectyper_serotype',
+            transitory=True,
+            backlog=False,
+            display=False
+        )
+    })
 
     # If bulk uploading is set, we return the datastruct as the end task
     # to poll for job completion, therefore must set ttl of -1.
@@ -137,7 +172,16 @@ def _ectyper_pipeline_serotype(singles, multiples, query_file, single_dict, pipe
         query_file + '_ectyper_serotype.p',
         depends_on=job_ectyper_serotype,
         result_ttl=ttl_value)
-    d['job_ectyper_serotype'] = job_ectyper_datastruct_serotype
+    d['job_ectyper_datastruct_serotype'] = job_ectyper_datastruct_serotype
+    pipeline.jobs.update({
+        'job_ectyper_datastruct_serotype': Job(
+            rq_job=job_ectyper_datastruct_serotype,
+            name='job_ectyper_datastruct_serotype',
+            transitory=True,
+            backlog=False,
+            display=False
+        )
+    })
 
     if not single_dict['options']['bulk']:
         # Only bother parsing into json if user has requested either vf or
@@ -149,9 +193,15 @@ def _ectyper_pipeline_serotype(singles, multiples, query_file, single_dict, pipe
             result_ttl=ttl_value
         )
         d['job_ectyper_beautify_serotype'] = job_ectyper_beautify_serotype
-
-    # Mutate the jobs pipeline from the calling function.
-    pipeline.jobs.update(d)
+        pipeline.jobs.update({
+            'job_ectyper_beautify_serotype':  Job(
+                rq_job=job_ectyper_beautify_serotype,
+                name='job_ectyper_beautify_serotype',
+                transitory=True,
+                backlog=False,
+                display=True
+            )
+        })
     return d
 
 def blob_savvy_enqueue(single_dict, pipeline):
@@ -168,12 +218,28 @@ def blob_savvy_enqueue(single_dict, pipeline):
     '''
     jobs = {}
     query_file = single_dict['i']
-    
+
     job_qc = multiples_q.enqueue(qc, query_file, result_ttl=-1)
-    pipeline.jobs.update({'job_qc':job_qc})
+    pipeline.jobs.update({
+        'job_qc': Job(
+            rq_job=job_qc,
+            name='job_qc',
+            transitory=False,
+            backlog=False,
+            display=False
+        )
+    })
     job_id = blazegraph_q.enqueue(
         write_reserve_id, query_file, depends_on=job_qc, result_ttl=-1)
-    pipeline.jobs.update({'job_id':job_id})
+    pipeline.jobs.update({
+        'job_id': Job(
+            rq_job=job_id,
+            name='job_id',
+            transitory=False,
+            backlog=False,
+            display=False
+        )
+    })
 
     ## ECTyper (VF & Serotype)
     # VF
diff --git a/app/tests/test_models.py b/app/tests/test_models.py
index 9384ae18..fffea95f 100644
--- a/app/tests/test_models.py
+++ b/app/tests/test_models.py
@@ -1,4 +1,4 @@
-from middleware.models import SubtypingRow, SubtypingResult, Pipeline
+from middleware.models import SubtypingRow, SubtypingResult, Pipeline, Job
 from modules.spfy import spfy
 from scripts.savvy import savvy
 from tests.constants import BEAUTIFY_VF_SEROTYPE, ARGS_DICT
@@ -25,6 +25,28 @@ def test_subtyping_model_direct():
     )
     subtyping_result.validate()
 
+def test_pipeline_model():
+    """
+    Test the Pipeline model itself.
+    """
+    p = Pipeline(
+        func = spfy,
+        options = ARGS_DICT
+    )
+    pipeline.jobs.update({
+        'job_ectyper_vf': Job(
+            rq_job='SHOULDBEANACTUALJOB',
+            name='job_ectyper_vf',
+            transitory=True,
+            backlog=False,
+            display=False
+        )
+    })
+    assert isinstance(p, Pipeline)
+    assert isinstance(p.jobs, dict)
+    assert isinstance(p.jobs['job_ectyper_vf'], Job)
+
+
 def test_pipeline_model_signature():
     """
     Function signatures should be identical if called on the same function.

From e8817a2451148cd63c9b5b87f007cf9e825d9263 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Sat, 17 Feb 2018 21:03:15 -0500
Subject: [PATCH 038/122] ADD: cast for VF/AMR + FIX: typo in earlier test

---
 app/middleware/modellers.py | 23 +++++++++++++++++++++++
 app/tests/test_models.py    |  2 +-
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/app/middleware/modellers.py b/app/middleware/modellers.py
index 7fed795c..2ea7a1e0 100644
--- a/app/middleware/modellers.py
+++ b/app/middleware/modellers.py
@@ -31,5 +31,28 @@ def model_serotype(pi, pl, output_file):
     subtyping_result = SubtypingResult(
         rows = subtyping_list
     )
+    return subtyping_result
 
+def model_vf(json_r, analysis="Virulence Factors"):
+    """
+    Casts the output from display.beautify into a SubtypingResult object.
+    """
+    # Type check.
+    assert isinstance(json_r, list)
+    subtyping_list = [
+        SubtypingRow(
+            analysis=analysis,
+            contigid=item['contigid'],
+            filename=item['filename'],
+            hitcutoff=item['hitcutoff'],
+            hitname=item['hitname'],
+            hitorientation=item['hitorientation'],
+            hitstart=item['hitstart'],
+            hitstop=item['hitstop']
+        )
+    for item in json_r]
+    # Convert the list of rows into a SubtypingResult model.
+    subtyping_result = SubtypingResult(
+        rows = subtyping_list
+    )
     return subtyping_result
diff --git a/app/tests/test_models.py b/app/tests/test_models.py
index fffea95f..3b039697 100644
--- a/app/tests/test_models.py
+++ b/app/tests/test_models.py
@@ -33,7 +33,7 @@ def test_pipeline_model():
         func = spfy,
         options = ARGS_DICT
     )
-    pipeline.jobs.update({
+    p.jobs.update({
         'job_ectyper_vf': Job(
             rq_job='SHOULDBEANACTUALJOB',
             name='job_ectyper_vf',

From 6b78e53a83bb3302ff6ad580119d11dbfe7d1305 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Sat, 17 Feb 2018 21:39:59 -0500
Subject: [PATCH 039/122] ADD: test the to_json() for merging all job results

---
 app/middleware/models.py |    4 +-
 app/modules/spfy.py      |    4 +-
 app/tests/constants.py   | 1216 ++++++++++++++++++++++++++++++++++++++
 app/tests/test_models.py |   53 +-
 4 files changed, 1270 insertions(+), 7 deletions(-)

diff --git a/app/middleware/models.py b/app/middleware/models.py
index 51fc3379..8b11b3fe 100644
--- a/app/middleware/models.py
+++ b/app/middleware/models.py
@@ -91,7 +91,9 @@ def complete(self):
 
     def to_json(self):
         """
-        Reduces all results from self.jobs to json for return.
+        Reduces all results from self.jobs to json for return. Note: currently
+        using a list as this is what the front-end is expecting, but convert
+        to dict a some point.
         """
         # Gather all the jobs that have finished and haven't failed.
         completed_jobs = [
diff --git a/app/modules/spfy.py b/app/modules/spfy.py
index 9ef22f11..9fa2c4fa 100644
--- a/app/modules/spfy.py
+++ b/app/modules/spfy.py
@@ -122,7 +122,7 @@ def _ectyper_pipeline_vf(singles, multiples, query_file, single_dict, pipeline=N
             'job_ectyper_beautify_vf': Job(
                 rq_job=job_ectyper_beautify_vf,
                 name='job_ectyper_beautify_vf',
-                transitory=True,
+                transitory=False,
                 backlog=False,
                 display=True
             )
@@ -197,7 +197,7 @@ def _ectyper_pipeline_serotype(singles, multiples, query_file, single_dict, pipe
             'job_ectyper_beautify_serotype':  Job(
                 rq_job=job_ectyper_beautify_serotype,
                 name='job_ectyper_beautify_serotype',
-                transitory=True,
+                transitory=False,
                 backlog=False,
                 display=True
             )
diff --git a/app/tests/constants.py b/app/tests/constants.py
index 9a7acbf8..63af007d 100644
--- a/app/tests/constants.py
+++ b/app/tests/constants.py
@@ -1214,3 +1214,1219 @@
     "hitstop": 3095080
   }
 ]
+
+BEAUTIFY_SEROTYPE = [
+  {
+    "analysis": "Serotype",
+    "contigid": "n/a",
+    "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+    "hitcutoff": "n/a",
+    "hitname": "O16:H48",
+    "hitorientation": "n/a",
+    "hitstart": "n/a",
+    "hitstop": "n/a"
+  }
+]
+
+BEAUTIFY_VF = [
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "EC958",
+      "hitorientation": "+",
+      "hitstart": 2073473,
+      "hitstop": 2074658
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "ECP",
+      "hitorientation": "-",
+      "hitstart": 306807,
+      "hitstop": 309332
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "ECP",
+      "hitorientation": "-",
+      "hitstart": 309358,
+      "hitstop": 310075
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "ECP",
+      "hitorientation": "-",
+      "hitstart": 310084,
+      "hitstop": 310700
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "ECP",
+      "hitorientation": "-",
+      "hitstart": 310746,
+      "hitstop": 311336
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "ECS88",
+      "hitorientation": "-",
+      "hitstart": 3308040,
+      "hitstop": 3308924
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "Z1307",
+      "hitorientation": "-",
+      "hitstart": 1019013,
+      "hitstop": 1020053
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "Z2203",
+      "hitorientation": "-",
+      "hitstart": 1588853,
+      "hitstop": 1590079
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "Z2204",
+      "hitorientation": "-",
+      "hitstart": 1588309,
+      "hitstop": 1588839
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "Z2205",
+      "hitorientation": "-",
+      "hitstart": 1587793,
+      "hitstop": 1588296
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "Z2206",
+      "hitorientation": "-",
+      "hitstart": 1586820,
+      "hitstop": 1587734
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "agn43",
+      "hitorientation": "+",
+      "hitstart": 2071539,
+      "hitstop": 2074658
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "artj",
+      "hitorientation": "-",
+      "hitstart": 899844,
+      "hitstop": 900575
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "aslA",
+      "hitorientation": "-",
+      "hitstart": 3984579,
+      "hitstop": 3986007
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "b2972",
+      "hitorientation": "-",
+      "hitstart": 3113543,
+      "hitstop": 3114352
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "cadA",
+      "hitorientation": "-",
+      "hitstart": 4356481,
+      "hitstop": 4358656
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "cah",
+      "hitorientation": "+",
+      "hitstart": 2073486,
+      "hitstop": 2074658
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "cheA",
+      "hitorientation": "-",
+      "hitstart": 1973360,
+      "hitstop": 1975324
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "cheB",
+      "hitorientation": "-",
+      "hitstart": 1967452,
+      "hitstop": 1968501
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "cheR",
+      "hitorientation": "-",
+      "hitstart": 1968504,
+      "hitstop": 1969364
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "cheW",
+      "hitorientation": "-",
+      "hitstart": 1972836,
+      "hitstop": 1973339
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "cheZ",
+      "hitorientation": "-",
+      "hitstart": 1966393,
+      "hitstop": 1967037
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "cs3",
+      "hitorientation": "-",
+      "hitstart": 2994460,
+      "hitstop": 2995092
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "csgD",
+      "hitorientation": "-",
+      "hitstart": 1102546,
+      "hitstop": 1103196
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "csgG",
+      "hitorientation": "-",
+      "hitstart": 1100851,
+      "hitstop": 1101684
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "eae",
+      "hitorientation": "+",
+      "hitstart": 314420,
+      "hitstop": 315232
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "ecpA",
+      "hitorientation": "-",
+      "hitstart": 310084,
+      "hitstop": 310671
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "ecpB",
+      "hitorientation": "-",
+      "hitstart": 309358,
+      "hitstop": 310026
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "ecpC",
+      "hitorientation": "-",
+      "hitstart": 306807,
+      "hitstop": 309332
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "ecpD",
+      "hitorientation": "-",
+      "hitstart": 305174,
+      "hitstop": 306817
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "ecpE",
+      "hitorientation": "-",
+      "hitstart": 304497,
+      "hitstop": 305250
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "ecpR",
+      "hitorientation": "-",
+      "hitstart": 310746,
+      "hitstop": 311336
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "ehaB",
+      "hitorientation": "+",
+      "hitstart": 392973,
+      "hitstop": 394418
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "entA",
+      "hitorientation": "+",
+      "hitstart": 628551,
+      "hitstop": 629297
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "entB",
+      "hitorientation": "+",
+      "hitstart": 627694,
+      "hitstop": 628551
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "entC",
+      "hitorientation": "+",
+      "hitstart": 624873,
+      "hitstop": 626060
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "entD",
+      "hitorientation": "-",
+      "hitstart": 609459,
+      "hitstop": 610229
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "entE",
+      "hitorientation": "+",
+      "hitstart": 626070,
+      "hitstop": 627680
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "entF",
+      "hitorientation": "+",
+      "hitstart": 614157,
+      "hitstop": 617980
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "entS",
+      "hitorientation": "+",
+      "hitstart": 622300,
+      "hitstop": 623550
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "espL1",
+      "hitorientation": "+",
+      "hitstart": 1803439,
+      "hitstop": 1804993
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "espL3",
+      "hitorientation": "-",
+      "hitstart": 3861987,
+      "hitstop": 3863864
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "espL4",
+      "hitorientation": "-",
+      "hitstart": 4221348,
+      "hitstop": 4222487
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "espR1",
+      "hitorientation": "-",
+      "hitstart": 1544385,
+      "hitstop": 1545447
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "espX4",
+      "hitorientation": "+",
+      "hitstart": 4250703,
+      "hitstop": 4252283
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "espX5",
+      "hitorientation": "-",
+      "hitstart": 4281783,
+      "hitstop": 4283075
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "espY1",
+      "hitorientation": "+",
+      "hitstart": 58474,
+      "hitstop": 59103
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "fdeC",
+      "hitorientation": "+",
+      "hitstart": 314357,
+      "hitstop": 315232
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "fepA",
+      "hitorientation": "-",
+      "hitstart": 610254,
+      "hitstop": 612494
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "fepB",
+      "hitorientation": "-",
+      "hitstart": 623554,
+      "hitstop": 624510
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "fepC",
+      "hitorientation": "-",
+      "hitstart": 619384,
+      "hitstop": 620199
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "fepD",
+      "hitorientation": "-",
+      "hitstart": 621185,
+      "hitstop": 622201
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "fepE",
+      "hitorientation": "+",
+      "hitstart": 618254,
+      "hitstop": 619387
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "fepG",
+      "hitorientation": "-",
+      "hitstart": 620196,
+      "hitstop": 621188
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "fes",
+      "hitorientation": "+",
+      "hitstart": 612737,
+      "hitstop": 613939
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "fimA",
+      "hitorientation": "+",
+      "hitstart": 4543115,
+      "hitstop": 4543663
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "fimB",
+      "hitorientation": "+",
+      "hitstart": 4540957,
+      "hitstop": 4541559
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "fimC",
+      "hitorientation": "+",
+      "hitstart": 4544355,
+      "hitstop": 4545029
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "fimD",
+      "hitorientation": "-",
+      "hitstart": 1588853,
+      "hitstop": 1590079
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "fimD",
+      "hitorientation": "+",
+      "hitstart": 4545096,
+      "hitstop": 4547732
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "fimE",
+      "hitorientation": "+",
+      "hitstart": 4542037,
+      "hitstop": 4542633
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "fimF",
+      "hitorientation": "-",
+      "hitstart": 1588309,
+      "hitstop": 1588839
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "fimF",
+      "hitorientation": "+",
+      "hitstart": 4547742,
+      "hitstop": 4548272
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "fimG",
+      "hitorientation": "-",
+      "hitstart": 1587793,
+      "hitstop": 1588296
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "fimG",
+      "hitorientation": "+",
+      "hitstart": 4548285,
+      "hitstop": 4548788
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "fimH",
+      "hitorientation": "+",
+      "hitstart": 4548808,
+      "hitstop": 4549710
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "fimI",
+      "hitorientation": "+",
+      "hitstart": 4543620,
+      "hitstop": 4544267
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "flgA",
+      "hitorientation": "-",
+      "hitstart": 1130204,
+      "hitstop": 1130863
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "flgD",
+      "hitorientation": "+",
+      "hitstart": 1131854,
+      "hitstop": 1132549
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "flgE",
+      "hitorientation": "+",
+      "hitstart": 1132574,
+      "hitstop": 1133782
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "flgF",
+      "hitorientation": "+",
+      "hitstart": 1133802,
+      "hitstop": 1134557
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "flgG",
+      "hitorientation": "+",
+      "hitstart": 1134729,
+      "hitstop": 1135511
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "flgH",
+      "hitorientation": "+",
+      "hitstart": 1135564,
+      "hitstop": 1136262
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "flgI",
+      "hitorientation": "+",
+      "hitstart": 1136274,
+      "hitstop": 1137371
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "flgJ",
+      "hitorientation": "+",
+      "hitstart": 1137371,
+      "hitstop": 1138312
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "flgK",
+      "hitorientation": "+",
+      "hitstart": 1138378,
+      "hitstop": 1140021
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "flgL",
+      "hitorientation": "+",
+      "hitstart": 1140033,
+      "hitstop": 1140986
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "flhA",
+      "hitorientation": "-",
+      "hitstart": 1962974,
+      "hitstop": 1965050
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "flhB",
+      "hitorientation": "-",
+      "hitstart": 1965043,
+      "hitstop": 1966191
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "flhC",
+      "hitorientation": "-",
+      "hitstart": 1977266,
+      "hitstop": 1977844
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "fliA",
+      "hitorientation": "-",
+      "hitstart": 2001070,
+      "hitstop": 2001789
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "fliD",
+      "hitorientation": "+",
+      "hitstart": 2003872,
+      "hitstop": 2005278
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "fliF",
+      "hitorientation": "+",
+      "hitstart": 2013229,
+      "hitstop": 2014887
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "fliG",
+      "hitorientation": "+",
+      "hitstart": 2014880,
+      "hitstop": 2015875
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "fliH",
+      "hitorientation": "+",
+      "hitstart": 2015868,
+      "hitstop": 2016554
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "fliI",
+      "hitorientation": "+",
+      "hitstart": 2016554,
+      "hitstop": 2017927
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "fliK",
+      "hitorientation": "+",
+      "hitstart": 2018386,
+      "hitstop": 2019513
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "fliM",
+      "hitorientation": "+",
+      "hitstart": 2020087,
+      "hitstop": 2021091
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "fliP",
+      "hitorientation": "+",
+      "hitstart": 2021869,
+      "hitstop": 2022606
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "fliR",
+      "hitorientation": "+",
+      "hitstart": 2022893,
+      "hitstop": 2023678
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "fliY",
+      "hitorientation": "-",
+      "hitstart": 1999585,
+      "hitstop": 2000385
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "fliZ",
+      "hitorientation": "-",
+      "hitstart": 2000473,
+      "hitstop": 2001060
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "flk",
+      "hitorientation": "+",
+      "hitstart": 2437950,
+      "hitstop": 2438945
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "gadX",
+      "hitorientation": "-",
+      "hitstart": 3664986,
+      "hitstop": 3665618
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "gspC",
+      "hitorientation": "-",
+      "hitstart": 3112091,
+      "hitstop": 3113049
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "gspL",
+      "hitorientation": "-",
+      "hitstart": 3111128,
+      "hitstop": 3112092
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "gspo",
+      "hitorientation": "+",
+      "hitstart": 3465543,
+      "hitstop": 3466220
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "hcp",
+      "hitorientation": "-",
+      "hitstart": 115714,
+      "hitstop": 117099
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "hlye",
+      "hitorientation": "-",
+      "hitstart": 1229483,
+      "hitstop": 1230538
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "hofq",
+      "hitorientation": "-",
+      "hitstart": 3519465,
+      "hitstop": 3520703
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "ibeB",
+      "hitorientation": "+",
+      "hitstart": 595600,
+      "hitstop": 596981
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "ibeC",
+      "hitorientation": "-",
+      "hitstart": 4148532,
+      "hitstop": 4150309
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "motA",
+      "hitorientation": "-",
+      "hitstart": 1976252,
+      "hitstop": 1977139
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "motB",
+      "hitorientation": "-",
+      "hitstart": 1975329,
+      "hitstop": 1976255
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "nada",
+      "hitorientation": "+",
+      "hitstart": 782085,
+      "hitstop": 783128
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "nadb",
+      "hitorientation": "+",
+      "hitstart": 2710420,
+      "hitstop": 2712042
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "ompA",
+      "hitorientation": "-",
+      "hitstart": 1019013,
+      "hitstop": 1020053
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "ompt",
+      "hitorientation": "-",
+      "hitstart": 584680,
+      "hitstop": 585633
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "ppdb",
+      "hitorientation": "-",
+      "hitstart": 2963153,
+      "hitstop": 2963716
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "tar/cheM",
+      "hitorientation": "-",
+      "hitstart": 1971030,
+      "hitstop": 1972691
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "upaC",
+      "hitorientation": "+",
+      "hitstart": 392973,
+      "hitstop": 394418
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "ycbF",
+      "hitorientation": "+",
+      "hitstart": 1003920,
+      "hitstop": 1004657
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "ycbQ",
+      "hitorientation": "+",
+      "hitstart": 997859,
+      "hitstop": 998407
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "ycbR",
+      "hitorientation": "+",
+      "hitstart": 998490,
+      "hitstop": 999191
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "ycbS",
+      "hitorientation": "+",
+      "hitstart": 999216,
+      "hitstop": 1001816
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "ycbT",
+      "hitorientation": "+",
+      "hitstart": 1001807,
+      "hitstop": 1002784
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "ycbV",
+      "hitorientation": "+",
+      "hitstart": 1003391,
+      "hitstop": 1003954
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "ycfz",
+      "hitorientation": "-",
+      "hitstart": 1180479,
+      "hitstop": 1181267
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "ygeH",
+      "hitorientation": "+",
+      "hitstart": 2992094,
+      "hitstop": 2993470
+    },
+    {
+      "analysis": "Virulence Factors",
+      "contigid": "U00096.3",
+      "filename": "GCA_000005845.2_ASM584v2_genomic.fna",
+      "hitcutoff": 90,
+      "hitname": "yggr",
+      "hitorientation": "-",
+      "hitstart": 3094100,
+      "hitstop": 3095080
+    }
+]
diff --git a/app/tests/test_models.py b/app/tests/test_models.py
index 3b039697..87a14fa7 100644
--- a/app/tests/test_models.py
+++ b/app/tests/test_models.py
@@ -1,10 +1,20 @@
 from middleware.models import SubtypingRow, SubtypingResult, Pipeline, Job
 from modules.spfy import spfy
 from scripts.savvy import savvy
-from tests.constants import BEAUTIFY_VF_SEROTYPE, ARGS_DICT
+from tests.constants import BEAUTIFY_VF_SEROTYPE, BEAUTIFY_SEROTYPE, BEAUTIFY_VF, ARGS_DICT
 
+class MockRQJob():
+    """
+    A mock Job class returned by RQ. Also emulates response the Job gets from
+    querying Redis DB.
+    """
+    def __init__(self, is_finished=True, is_failed=False, exc_info='', result=None):
+        self.is_finished = is_finished
+        self.is_failed = is_failed
+        self.exc_info = exc_info
+        self.result = result
 
-def test_subtyping_model_direct():
+def test_subtyping_model_direct(l=BEAUTIFY_VF_SEROTYPE):
     """
     Use our dataset to directly create a subtyping results model and validate it.
     """
@@ -19,11 +29,13 @@ def test_subtyping_model_direct():
             hitstart=str(d['hitstart']),
             hitstop=str(d['hitstop'])
         )
-    for d in BEAUTIFY_VF_SEROTYPE]
+    for d in l]
     subtyping_result = SubtypingResult(
         rows = subtyping_list
     )
     subtyping_result.validate()
+    # Return for incorporation into later tests.
+    return subtyping_result
 
 def test_pipeline_model():
     """
@@ -33,19 +45,52 @@ def test_pipeline_model():
         func = spfy,
         options = ARGS_DICT
     )
+    mock_serotype = MockRQJob(
+        result = test_subtyping_model_direct(BEAUTIFY_SEROTYPE)
+    )
+    mock_vf = MockRQJob(
+        result = test_subtyping_model_direct(BEAUTIFY_VF)
+    )
+    # Flags should exclude the result from conversion to json.
     p.jobs.update({
         'job_ectyper_vf': Job(
-            rq_job='SHOULDBEANACTUALJOB',
+            rq_job="Should throw an error if read.",
             name='job_ectyper_vf',
             transitory=True,
             backlog=False,
             display=False
         )
     })
+    # Mimicks a Serotype result that will be converted to json.
+    p.jobs.update({
+        'job_ectyper_beautify_serotype': Job(
+            rq_job=mock_serotype,
+            name='job_ectyper_beautify_vf',
+            transitory=False,
+            backlog=False,
+            display=True
+        )
+    })
+    # Mimicks a VF result that will be converted to json.
+    p.jobs.update({
+        'job_ectyper_beautify_vf': Job(
+            rq_job=mock_vf,
+            name='job_ectyper_beautify_vf',
+            transitory=False,
+            backlog=False,
+            display=True
+        )
+    })
     assert isinstance(p, Pipeline)
     assert isinstance(p.jobs, dict)
     assert isinstance(p.jobs['job_ectyper_vf'], Job)
 
+    # Test Pipeline.complete(), should be True.
+    assert p.complete()
+
+    # Test Pipeline.to_json().
+    json = p.to_json()
+    assert isinstance(json, list)
 
 def test_pipeline_model_signature():
     """

From afbfed63c202eb9725ce9eeb0b86e22a1ff57787 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Sat, 17 Feb 2018 22:16:42 -0500
Subject: [PATCH 040/122] FIX: tests

---
 app/middleware/models.py |  2 ++
 app/tests/test_models.py | 13 ++-----------
 2 files changed, 4 insertions(+), 11 deletions(-)

diff --git a/app/middleware/models.py b/app/middleware/models.py
index 8b11b3fe..361364b7 100644
--- a/app/middleware/models.py
+++ b/app/middleware/models.py
@@ -77,6 +77,8 @@ def complete(self):
         Check if all jobs are completed
         """
         for j in self.jobs.itervalues():
+            # Type check.
+            assert isinstance(j, Job)
             rq_job = j.rq_job
             if j.backlog:
                 # Some backlog job, we don't care (though Sentry will catch it).
diff --git a/app/tests/test_models.py b/app/tests/test_models.py
index 87a14fa7..8efec140 100644
--- a/app/tests/test_models.py
+++ b/app/tests/test_models.py
@@ -51,16 +51,6 @@ def test_pipeline_model():
     mock_vf = MockRQJob(
         result = test_subtyping_model_direct(BEAUTIFY_VF)
     )
-    # Flags should exclude the result from conversion to json.
-    p.jobs.update({
-        'job_ectyper_vf': Job(
-            rq_job="Should throw an error if read.",
-            name='job_ectyper_vf',
-            transitory=True,
-            backlog=False,
-            display=False
-        )
-    })
     # Mimicks a Serotype result that will be converted to json.
     p.jobs.update({
         'job_ectyper_beautify_serotype': Job(
@@ -83,7 +73,8 @@ def test_pipeline_model():
     })
     assert isinstance(p, Pipeline)
     assert isinstance(p.jobs, dict)
-    assert isinstance(p.jobs['job_ectyper_vf'], Job)
+    for k in p.jobs:
+        assert isinstance(p.jobs[k], Job)
 
     # Test Pipeline.complete(), should be True.
     assert p.complete()

From 850bad7f99132d20265847c391a2222cd61e46cd Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Sat, 17 Feb 2018 23:33:00 -0500
Subject: [PATCH 041/122] ADD: models for phylotyper

---
 app/middleware/models.py |  11 +
 app/tests/constants.py   | 885 +++++++++++++++++++++++++++++++++++++++
 app/tests/test_models.py | 122 +++++-
 3 files changed, 1014 insertions(+), 4 deletions(-)

diff --git a/app/middleware/models.py b/app/middleware/models.py
index 361364b7..b386970c 100644
--- a/app/middleware/models.py
+++ b/app/middleware/models.py
@@ -39,6 +39,17 @@ class SubtypingRow(models.Base):
 class SubtypingResult(models.Base):
     rows = fields.ListField([SubtypingRow], nullable=True)
 
+class PhylotyperRow(models.Base):
+    contig = fields.StringField(nullable=True)
+    genome = fields.StringField()
+    probability = fields.StringField(nullable=True) # actually float
+    start = fields.StringField(nullable=True) # actually int
+    stop = fields.StringField(nullable=True) # actually int
+    subtype = fields.StringField()
+    subtype_gene = fields.StringField(nullable=True)
+
+class PhylotyperResult(models.Base):
+    rows = fields.ListField([PhylotyperRow], nullable=True)
 
 class Job():
     def __init__(self, rq_job, name="", transitory=True, backlog=True, display=False):
diff --git a/app/tests/constants.py b/app/tests/constants.py
index 63af007d..5a761013 100644
--- a/app/tests/constants.py
+++ b/app/tests/constants.py
@@ -2430,3 +2430,888 @@
       "hitstop": 3095080
     }
 ]
+
+BEAUTIFY_AMR = [
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000001.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Strict",
+    "hitname": "Escherichia coli gyrA conferring resistance to fluoroquinolones",
+    "hitorientation": "+",
+    "hitstart": 159252,
+    "hitstop": 161879
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000001.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Strict",
+    "hitname": "PmrE",
+    "hitorientation": "+",
+    "hitstart": 388190,
+    "hitstop": 389356
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000001.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Perfect",
+    "hitname": "PmrF",
+    "hitorientation": "-",
+    "hitstart": 134984,
+    "hitstop": 135952
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000001.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Strict",
+    "hitname": "arnA",
+    "hitorientation": "-",
+    "hitstart": 133002,
+    "hitstop": 134984
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000001.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Perfect",
+    "hitname": "baeR",
+    "hitorientation": "-",
+    "hitstart": 323408,
+    "hitstop": 324130
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000001.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Perfect",
+    "hitname": "baeS",
+    "hitorientation": "-",
+    "hitstart": 324127,
+    "hitstop": 325530
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000001.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Perfect",
+    "hitname": "mdtB",
+    "hitorientation": "-",
+    "hitstart": 330021,
+    "hitstop": 333143
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000001.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Strict",
+    "hitname": "mdtD",
+    "hitorientation": "-",
+    "hitstart": 325527,
+    "hitstop": 326942
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000001.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Strict",
+    "hitname": "mexN",
+    "hitorientation": "-",
+    "hitstart": 326943,
+    "hitstop": 330020
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000003.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Strict",
+    "hitname": "PmrA",
+    "hitorientation": "+",
+    "hitstart": 28893,
+    "hitstop": 29561
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000003.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Strict",
+    "hitname": "PmrB",
+    "hitorientation": "+",
+    "hitstart": 29562,
+    "hitstop": 30662
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000003.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Strict",
+    "hitname": "PmrC",
+    "hitorientation": "+",
+    "hitstart": 27253,
+    "hitstop": 28896
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000003.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Strict",
+    "hitname": "mdtN",
+    "hitorientation": "+",
+    "hitstart": 58230,
+    "hitstop": 59261
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000003.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Strict",
+    "hitname": "mdtO",
+    "hitorientation": "+",
+    "hitstart": 59261,
+    "hitstop": 61312
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000003.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Strict",
+    "hitname": "mdtP",
+    "hitorientation": "+",
+    "hitstart": 61309,
+    "hitstop": 62775
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000004.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Strict",
+    "hitname": "mdtK",
+    "hitorientation": "+",
+    "hitstart": 126030,
+    "hitstop": 127403
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000005.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Strict",
+    "hitname": "ACT-7",
+    "hitorientation": "-",
+    "hitstart": 4604,
+    "hitstop": 5737
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000005.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Strict",
+    "hitname": "mdtM",
+    "hitorientation": "-",
+    "hitstart": 187550,
+    "hitstop": 188782
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000005.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Strict",
+    "hitname": "robA",
+    "hitorientation": "-",
+    "hitstart": 251658,
+    "hitstop": 252527
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000006.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Strict",
+    "hitname": "CRP",
+    "hitorientation": "-",
+    "hitstart": 176803,
+    "hitstop": 177435
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000006.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Strict",
+    "hitname": "gadX",
+    "hitorientation": "+",
+    "hitstart": 397,
+    "hitstop": 1221
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000006.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Strict",
+    "hitname": "mdtE",
+    "hitorientation": "-",
+    "hitstart": 5818,
+    "hitstop": 6975
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000006.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Strict",
+    "hitname": "mexD",
+    "hitorientation": "-",
+    "hitstart": 2680,
+    "hitstop": 5793
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000007.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Perfect",
+    "hitname": "H-NS",
+    "hitorientation": "-",
+    "hitstart": 187722,
+    "hitstop": 188135
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000007.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Perfect",
+    "hitname": "mdtG",
+    "hitorientation": "-",
+    "hitstart": 25571,
+    "hitstop": 26797
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000007.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Strict",
+    "hitname": "mdtH",
+    "hitorientation": "-",
+    "hitstart": 35428,
+    "hitstop": 36636
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000007.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Strict",
+    "hitname": "phoP",
+    "hitorientation": "-",
+    "hitstart": 101156,
+    "hitstop": 101827
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000007.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Perfect",
+    "hitname": "phoQ",
+    "hitorientation": "-",
+    "hitstart": 99696,
+    "hitstop": 101156
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000008.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Strict",
+    "hitname": "emrK",
+    "hitorientation": "-",
+    "hitstart": 9140,
+    "hitstop": 10303
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000008.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Strict",
+    "hitname": "emrY",
+    "hitorientation": "-",
+    "hitstart": 7602,
+    "hitstop": 9140
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000008.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Perfect",
+    "hitname": "evgA",
+    "hitorientation": "+",
+    "hitstart": 10719,
+    "hitstop": 11333
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000008.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Strict",
+    "hitname": "evgS",
+    "hitorientation": "+",
+    "hitstart": 11338,
+    "hitstop": 14931
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000008.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Strict",
+    "hitname": "mexD",
+    "hitorientation": "+",
+    "hitstart": 104776,
+    "hitstop": 107889
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000009.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Perfect",
+    "hitname": "cpxA",
+    "hitorientation": "+",
+    "hitstart": 22429,
+    "hitstop": 23802
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000009.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Perfect",
+    "hitname": "cpxR",
+    "hitorientation": "+",
+    "hitstart": 21734,
+    "hitstop": 22432
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000011.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Strict",
+    "hitname": "Escherichia coli marR mutant resulting in antibiotic resistance",
+    "hitorientation": "+",
+    "hitstart": 51100,
+    "hitstop": 51534
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000011.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Perfect",
+    "hitname": "marA",
+    "hitorientation": "+",
+    "hitstart": 51554,
+    "hitstop": 51937
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000012.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Strict",
+    "hitname": "emrA",
+    "hitorientation": "-",
+    "hitstart": 312493,
+    "hitstop": 313665
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000012.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Perfect",
+    "hitname": "emrB",
+    "hitorientation": "-",
+    "hitstart": 310938,
+    "hitstop": 312476
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000012.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Strict",
+    "hitname": "emrR",
+    "hitorientation": "-",
+    "hitstart": 313792,
+    "hitstop": 314322
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000013.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Strict",
+    "hitname": "Staphylococcus aureus gyrB conferring resistance to aminocoumarin",
+    "hitorientation": "-",
+    "hitstart": 131568,
+    "hitstop": 133982
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000013.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Strict",
+    "hitname": "emrD",
+    "hitorientation": "+",
+    "hitstart": 107782,
+    "hitstop": 108966
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000013.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Perfect",
+    "hitname": "mdtL",
+    "hitorientation": "+",
+    "hitstart": 145479,
+    "hitstop": 146654
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000015.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Strict",
+    "hitname": "Escherichia coli parC conferring resistance to fluoroquinolone",
+    "hitorientation": "-",
+    "hitstart": 68709,
+    "hitstop": 70967
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000015.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Strict",
+    "hitname": "bacA",
+    "hitorientation": "-",
+    "hitstart": 104717,
+    "hitstop": 105538
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000015.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Strict",
+    "hitname": "tolC",
+    "hitorientation": "+",
+    "hitstart": 80879,
+    "hitstop": 82360
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000016.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Strict",
+    "hitname": "ACT-7",
+    "hitorientation": "+",
+    "hitstart": 286,
+    "hitstop": 1431
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000022.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Strict",
+    "hitname": "Mycobacterium tuberculosis rpoB mutants conferring resistance to rifampicin",
+    "hitorientation": "-",
+    "hitstart": 22720,
+    "hitstop": 26748
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000023.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Strict",
+    "hitname": "macA",
+    "hitorientation": "-",
+    "hitstart": 5642,
+    "hitstop": 6757
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000023.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Strict",
+    "hitname": "macB",
+    "hitorientation": "-",
+    "hitstart": 3699,
+    "hitstop": 5645
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000023.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Strict",
+    "hitname": "mdfA",
+    "hitorientation": "-",
+    "hitstart": 39796,
+    "hitstop": 41028
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000024.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Perfect",
+    "hitname": "CTX-M-55",
+    "hitorientation": "-",
+    "hitstart": 37702,
+    "hitstop": 38577
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000026.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Strict",
+    "hitname": "Mycobacterium tuberculosis katG mutations conferring resistance to isoniazid",
+    "hitorientation": "+",
+    "hitstart": 8536,
+    "hitstop": 10716
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000027.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Strict",
+    "hitname": "APH(3'')",
+    "hitorientation": "-",
+    "hitstart": 10215,
+    "hitstop": 11018
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000027.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Perfect",
+    "hitname": "APH(6)",
+    "hitorientation": "-",
+    "hitstart": 9379,
+    "hitstop": 10215
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000027.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Strict",
+    "hitname": "floR",
+    "hitorientation": "+",
+    "hitstart": 5030,
+    "hitstop": 6244
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000027.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Perfect",
+    "hitname": "sul2",
+    "hitorientation": "-",
+    "hitstart": 11079,
+    "hitstop": 11894
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000027.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Strict",
+    "hitname": "tetG",
+    "hitorientation": "-",
+    "hitstart": 6844,
+    "hitstop": 8043
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000028.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Strict",
+    "hitname": "emrE",
+    "hitorientation": "+",
+    "hitstart": 30648,
+    "hitstop": 30980
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000032.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Perfect",
+    "hitname": "acrE",
+    "hitorientation": "+",
+    "hitstart": 32702,
+    "hitstop": 33859
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000032.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Strict",
+    "hitname": "acrS",
+    "hitorientation": "-",
+    "hitstart": 31641,
+    "hitstop": 32303
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000032.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Strict",
+    "hitname": "mexD",
+    "hitorientation": "+",
+    "hitstart": 33871,
+    "hitstop": 36975
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000036.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Strict",
+    "hitname": "Klebsiella pneumoniae acrR mutant resulting in high level antibiotic resistance",
+    "hitorientation": "+",
+    "hitstart": 107902,
+    "hitstop": 108495
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000036.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Strict",
+    "hitname": "acrE",
+    "hitorientation": "-",
+    "hitstart": 106513,
+    "hitstop": 107706
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000036.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Strict",
+    "hitname": "mexD",
+    "hitorientation": "-",
+    "hitstart": 103341,
+    "hitstop": 106490
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000036.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Strict",
+    "hitname": "rosA",
+    "hitorientation": "-",
+    "hitstart": 125513,
+    "hitstop": 126733
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000036.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Strict",
+    "hitname": "rosB",
+    "hitorientation": "-",
+    "hitstart": 123599,
+    "hitstop": 125275
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000036.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Strict",
+    "hitname": "vanG",
+    "hitorientation": "-",
+    "hitstart": 19876,
+    "hitstop": 20970
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000037.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Perfect",
+    "hitname": "MCR-1",
+    "hitorientation": "+",
+    "hitstart": 13553,
+    "hitstop": 15178
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000050.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Perfect",
+    "hitname": "FosA3",
+    "hitorientation": "+",
+    "hitstart": 4459,
+    "hitstop": 4875
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000050.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Perfect",
+    "hitname": "mphA",
+    "hitorientation": "+",
+    "hitstart": 89,
+    "hitstop": 994
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000053.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Strict",
+    "hitname": "ErmB",
+    "hitorientation": "-",
+    "hitstart": 1455,
+    "hitstop": 2192
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000062.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Perfect",
+    "hitname": "sul1",
+    "hitorientation": "+",
+    "hitstart": 452,
+    "hitstop": 1291
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000064.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Perfect",
+    "hitname": "TEM-1",
+    "hitorientation": "+",
+    "hitstart": 3455,
+    "hitstop": 4315
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000080.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Perfect",
+    "hitname": "NDM-1",
+    "hitorientation": "+",
+    "hitstart": 724,
+    "hitstop": 1536
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000090.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Strict",
+    "hitname": "aadA11",
+    "hitorientation": "+",
+    "hitstart": 690,
+    "hitstop": 1535
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000090.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Strict",
+    "hitname": "dfrA25",
+    "hitorientation": "+",
+    "hitstart": 36,
+    "hitstop": 509
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000098.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Perfect",
+    "hitname": "catI",
+    "hitorientation": "-",
+    "hitstart": 166,
+    "hitstop": 825
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000101.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Perfect",
+    "hitname": "arr-3",
+    "hitorientation": "+",
+    "hitstart": 37,
+    "hitstop": 489
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000104.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Perfect",
+    "hitname": "rmtB",
+    "hitorientation": "+",
+    "hitstart": 43,
+    "hitstop": 798
+  },
+  {
+    "analysis": "Antimicrobial Resistance",
+    "contigid": "MOHB01000106.1",
+    "filename": "GCA_001891995.1_ASM189199v1_genomic.fna",
+    "hitcutoff": "Perfect",
+    "hitname": "aadA5",
+    "hitorientation": "-",
+    "hitstart": 115,
+    "hitstop": 903
+  }
+]
+
+BEAUTIFY_STX1 = [
+  {
+    "contig": "lcl|ECI-2644|NODE_8_length_178521_cov_25.218_ID_15",
+    "genome": "ECI-2644_lcl.fasta",
+    "probability": 0.9561446,
+    "start": 174535,
+    "stop": 175491,
+    "subtype": "a",
+    "subtype_gene": "stx1A"
+  },
+  {
+    "contig": "lcl|ECI-2644|NODE_8_length_178521_cov_25.218_ID_15",
+    "genome": "ECI-2644_lcl.fasta",
+    "probability": 0.9561446,
+    "start": 175501,
+    "stop": 175770,
+    "subtype": "a",
+    "subtype_gene": "stx1B"
+  },
+  {
+    "contig": "lcl|ECI-2644|NODE_8_length_178521_cov_25.218_ID_15",
+    "genome": "ECI-2644_lcl.fasta",
+    "probability": 0.9561446,
+    "start": 174544,
+    "stop": 175491,
+    "subtype": "a",
+    "subtype_gene": "stx1A"
+  },
+  {
+    "contig": "lcl|ECI-2644|NODE_8_length_178521_cov_25.218_ID_15",
+    "genome": "ECI-2644_lcl.fasta",
+    "probability": 0.9561446,
+    "start": 175501,
+    "stop": 175770,
+    "subtype": "a",
+    "subtype_gene": "stx1B"
+  }
+]
+
+BEAUTIFY_STX2 = [
+  {
+    "contig": "lcl|ECI-2644|NODE_51_length_5713_cov_24.063_ID_101",
+    "genome": "ECI-2644_lcl.fasta",
+    "probability": 0.9460619,
+    "start": 4390,
+    "stop": 5349,
+    "subtype": "a",
+    "subtype_gene": "stx2A"
+  },
+  {
+    "contig": "lcl|ECI-2644|NODE_51_length_5713_cov_24.063_ID_101",
+    "genome": "ECI-2644_lcl.fasta",
+    "probability": 0.9460619,
+    "start": 4109,
+    "stop": 4378,
+    "subtype": "a",
+    "subtype_gene": "stx2B"
+  }
+]
+
+BEAUTIFY_EAE = [
+  {
+    "contig": "N/A",
+    "genome": "GCA_000005845.2_ASM584v2_genomic.fna",
+    "probability": "N/A",
+    "start": "N/A",
+    "stop": "N/A",
+    "subtype": "Subtype loci not found in genome",
+    "subtype_gene": "N/A"
+  }
+]
diff --git a/app/tests/test_models.py b/app/tests/test_models.py
index 8efec140..aa0b2662 100644
--- a/app/tests/test_models.py
+++ b/app/tests/test_models.py
@@ -1,7 +1,21 @@
-from middleware.models import SubtypingRow, SubtypingResult, Pipeline, Job
+from middleware.models import
+    SubtypingRow,
+    SubtypingResult,
+    PhylotyperRow,
+    PhylotyperResult,
+    Pipeline,
+    Job
 from modules.spfy import spfy
 from scripts.savvy import savvy
-from tests.constants import BEAUTIFY_VF_SEROTYPE, BEAUTIFY_SEROTYPE, BEAUTIFY_VF, ARGS_DICT
+from tests.constants import
+    BEAUTIFY_VF_SEROTYPE,
+    BEAUTIFY_SEROTYPE,
+    BEAUTIFY_VF,
+    BEAUTIFY_AMR,
+    BEAUTIFY_STX1,
+    BEAUTIFY_STX2,
+    BEAUTIFY_EAE,
+    ARGS_DICT
 
 class MockRQJob():
     """
@@ -37,9 +51,31 @@ def test_subtyping_model_direct(l=BEAUTIFY_VF_SEROTYPE):
     # Return for incorporation into later tests.
     return subtyping_result
 
-def test_pipeline_model():
+def test_phylotyper_model_direct(l=BEAUTIFY_STX1):
     """
-    Test the Pipeline model itself.
+    Use our dataset to directly create a phylotyper results model and validate it.
+    """
+    phylotyper_list = [
+        PhylotyperRow(
+            contig=d['contig'],
+            genome=d['genome'],
+            probability=str(d['probability']),
+            start=str(d['start']),
+            stop=str(d['stop']),
+            subtype=d['subtype'],
+            subtype_gene=d['subtype_gene']
+        )
+    for d in l]
+    phylotyper_result = PhylotyperResult(
+        rows = phylotyper_list
+    )
+    phylotyper_result.validate()
+    # Return for incorporation into later tests.
+    return phylotyper_result
+
+def test_pipeline_model_subtyping():
+    """
+    Test the Pipeline model itself for subtyping via ECTyper and RGI.
     """
     p = Pipeline(
         func = spfy,
@@ -83,6 +119,84 @@ def test_pipeline_model():
     json = p.to_json()
     assert isinstance(json, list)
 
+    # Add an AMR job and re-test.
+    mock_amr  = MockRQJob(
+        result = test_subtyping_model_direct(BEAUTIFY_AMR)
+    )
+    p.jobs.update({
+        'job_ectyper_beautify_amr': Job(
+            rq_job=mock_amr,
+            name='job_ectyper_beautify_amr',
+            transitory=False,
+            backlog=False,
+            display=True
+        )
+    })
+    # Test Pipeline.to_json().
+    json = p.to_json()
+    assert isinstance(json, list)
+
+def test_pipeline_model_phyotyping():
+    """
+    Test the Pipeline model itself for subtyping via Phylotyper.
+    """
+    p = Pipeline(
+        func = spfy,
+        options = ARGS_DICT
+    )
+    mock_stx1 = MockRQJob(
+        result = test_phylotyper_model_direct(BEAUTIFY_STX1)
+    )
+    mock_stx2 = MockRQJob(
+        result = test_phylotyper_model_direct(BEAUTIFY_STX2)
+    )
+    p.jobs.update({
+        'job_phylotyper_beautify_stx1': Job(
+            rq_job=mock_stx1,
+            name='job_phylotyper_beautify_stx1',
+            transitory=False,
+            backlog=False,
+            display=True
+        )
+    })
+    p.jobs.update({
+        'job_phylotyper_beautify_stx2': Job(
+            rq_job=mock_stx2,
+            name='job_phylotyper_beautify_stx2',
+            transitory=False,
+            backlog=False,
+            display=True
+        )
+    })
+    assert isinstance(p, Pipeline)
+    assert isinstance(p.jobs, dict)
+    for k in p.jobs:
+        assert isinstance(p.jobs[k], Job)
+
+    # Test Pipeline.complete(), should be True.
+    assert p.complete()
+
+    # Test Pipeline.to_json().
+    json = p.to_json()
+    assert isinstance(json, list)
+
+    # Add an AMR job and re-test.
+    mock_eae  = MockRQJob(
+        result = test_phylotyper_model_direct(BEAUTIFY_EAE)
+    )
+    p.jobs.update({
+        'job_phylotyper_beautify_eae': Job(
+            rq_job=mock_eae,
+            name='job_phylotyper_beautify_stx2',
+            transitory=False,
+            backlog=False,
+            display=True
+        )
+    })
+    # Test Pipeline.to_json().
+    json = p.to_json()
+    assert isinstance(json, list)
+
 def test_pipeline_model_signature():
     """
     Function signatures should be identical if called on the same function.

From d9f2bdc055e226120441cd629000fa5f37b0b652 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Sat, 17 Feb 2018 23:38:41 -0500
Subject: [PATCH 042/122] FIX: conversion should actually work on any
 subclasses of models.Base

---
 app/middleware/models.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/app/middleware/models.py b/app/middleware/models.py
index b386970c..3fd71da1 100644
--- a/app/middleware/models.py
+++ b/app/middleware/models.py
@@ -5,7 +5,7 @@
 from jsonmodels import models, fields
 from middleware.graphers.turtle_utils import actual_filename
 
-def _convert_subtyping(model):
+def _convert_model(model):
     # Convert the model to a generic JSON structure.
     struct = model.to_struct()
     # This is not strictly json; more like a list than a dict structure.
@@ -19,8 +19,8 @@ def model_to_json(model):
     # Validate the model submitted before processing.
     model.validate()
     # Conversion.
-    if isinstance(model, SubtypingResult):
-        return _convert_subtyping(model)
+    if issubclass(model, models.Base)():
+        return _convert_model(model)
     else:
         raise Exception('model_to_json() called for a model without a handler.')
 

From d53189deca9145394706967cea3b1662a3de2257 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Sun, 18 Feb 2018 00:22:47 -0500
Subject: [PATCH 043/122] FIX: imports for tests

---
 app/middleware/models.py |  9 ++--
 app/tests/test_models.py | 92 +++++++++++++++++-----------------------
 2 files changed, 45 insertions(+), 56 deletions(-)

diff --git a/app/middleware/models.py b/app/middleware/models.py
index 3fd71da1..cb5d632b 100644
--- a/app/middleware/models.py
+++ b/app/middleware/models.py
@@ -8,9 +8,12 @@
 def _convert_model(model):
     # Convert the model to a generic JSON structure.
     struct = model.to_struct()
-    # This is not strictly json; more like a list than a dict structure.
-    rows_list = struct['rows']
-    return rows_list
+    if 'rows' in struct:
+        # This is not strictly json; more like a list than a dict structure.
+        rows_list = struct['rows']
+        return rows_list
+    else:
+        return struct
 
 def model_to_json(model):
     """
diff --git a/app/tests/test_models.py b/app/tests/test_models.py
index aa0b2662..d37aa0df 100644
--- a/app/tests/test_models.py
+++ b/app/tests/test_models.py
@@ -1,21 +1,7 @@
-from middleware.models import
-    SubtypingRow,
-    SubtypingResult,
-    PhylotyperRow,
-    PhylotyperResult,
-    Pipeline,
-    Job
+from middleware import models
 from modules.spfy import spfy
 from scripts.savvy import savvy
-from tests.constants import
-    BEAUTIFY_VF_SEROTYPE,
-    BEAUTIFY_SEROTYPE,
-    BEAUTIFY_VF,
-    BEAUTIFY_AMR,
-    BEAUTIFY_STX1,
-    BEAUTIFY_STX2,
-    BEAUTIFY_EAE,
-    ARGS_DICT
+from tests import constants
 
 class MockRQJob():
     """
@@ -28,12 +14,12 @@ def __init__(self, is_finished=True, is_failed=False, exc_info='', result=None):
         self.exc_info = exc_info
         self.result = result
 
-def test_subtyping_model_direct(l=BEAUTIFY_VF_SEROTYPE):
+def test_subtyping_model_direct(l=constants.BEAUTIFY_VF_SEROTYPE):
     """
     Use our dataset to directly create a subtyping results model and validate it.
     """
     subtyping_list = [
-        SubtypingRow(
+        models.SubtypingRow(
             analysis=d['analysis'],
             contigid=d['contigid'],
             filename=d['filename'],
@@ -44,19 +30,19 @@ def test_subtyping_model_direct(l=BEAUTIFY_VF_SEROTYPE):
             hitstop=str(d['hitstop'])
         )
     for d in l]
-    subtyping_result = SubtypingResult(
+    subtyping_result = models.SubtypingResult(
         rows = subtyping_list
     )
     subtyping_result.validate()
     # Return for incorporation into later tests.
     return subtyping_result
 
-def test_phylotyper_model_direct(l=BEAUTIFY_STX1):
+def test_phylotyper_model_direct(l=constants.BEAUTIFY_STX1):
     """
     Use our dataset to directly create a phylotyper results model and validate it.
     """
     phylotyper_list = [
-        PhylotyperRow(
+        models.PhylotyperRow(
             contig=d['contig'],
             genome=d['genome'],
             probability=str(d['probability']),
@@ -66,7 +52,7 @@ def test_phylotyper_model_direct(l=BEAUTIFY_STX1):
             subtype_gene=d['subtype_gene']
         )
     for d in l]
-    phylotyper_result = PhylotyperResult(
+    phylotyper_result = models.PhylotyperResult(
         rows = phylotyper_list
     )
     phylotyper_result.validate()
@@ -77,19 +63,19 @@ def test_pipeline_model_subtyping():
     """
     Test the Pipeline model itself for subtyping via ECTyper and RGI.
     """
-    p = Pipeline(
+    p = models.Pipeline(
         func = spfy,
-        options = ARGS_DICT
+        options = constants.ARGS_DICT
     )
     mock_serotype = MockRQJob(
-        result = test_subtyping_model_direct(BEAUTIFY_SEROTYPE)
+        result = test_subtyping_model_direct(constants.BEAUTIFY_SEROTYPE)
     )
     mock_vf = MockRQJob(
-        result = test_subtyping_model_direct(BEAUTIFY_VF)
+        result = test_subtyping_model_direct(constants.BEAUTIFY_VF)
     )
     # Mimicks a Serotype result that will be converted to json.
     p.jobs.update({
-        'job_ectyper_beautify_serotype': Job(
+        'job_ectyper_beautify_serotype': models.Job(
             rq_job=mock_serotype,
             name='job_ectyper_beautify_vf',
             transitory=False,
@@ -99,7 +85,7 @@ def test_pipeline_model_subtyping():
     })
     # Mimicks a VF result that will be converted to json.
     p.jobs.update({
-        'job_ectyper_beautify_vf': Job(
+        'job_ectyper_beautify_vf': models.Job(
             rq_job=mock_vf,
             name='job_ectyper_beautify_vf',
             transitory=False,
@@ -107,10 +93,10 @@ def test_pipeline_model_subtyping():
             display=True
         )
     })
-    assert isinstance(p, Pipeline)
+    assert isinstance(p, models.Pipeline)
     assert isinstance(p.jobs, dict)
     for k in p.jobs:
-        assert isinstance(p.jobs[k], Job)
+        assert isinstance(p.jobs[k], models.Job)
 
     # Test Pipeline.complete(), should be True.
     assert p.complete()
@@ -121,10 +107,10 @@ def test_pipeline_model_subtyping():
 
     # Add an AMR job and re-test.
     mock_amr  = MockRQJob(
-        result = test_subtyping_model_direct(BEAUTIFY_AMR)
+        result = test_subtyping_model_direct(constants.BEAUTIFY_AMR)
     )
     p.jobs.update({
-        'job_ectyper_beautify_amr': Job(
+        'job_ectyper_beautify_amr': models.Job(
             rq_job=mock_amr,
             name='job_ectyper_beautify_amr',
             transitory=False,
@@ -140,18 +126,18 @@ def test_pipeline_model_phyotyping():
     """
     Test the Pipeline model itself for subtyping via Phylotyper.
     """
-    p = Pipeline(
+    p = models.Pipeline(
         func = spfy,
-        options = ARGS_DICT
+        options = constants.ARGS_DICT
     )
     mock_stx1 = MockRQJob(
-        result = test_phylotyper_model_direct(BEAUTIFY_STX1)
+        result = test_phylotyper_model_direct(constants.BEAUTIFY_STX1)
     )
     mock_stx2 = MockRQJob(
-        result = test_phylotyper_model_direct(BEAUTIFY_STX2)
+        result = test_phylotyper_model_direct(constants.BEAUTIFY_STX2)
     )
     p.jobs.update({
-        'job_phylotyper_beautify_stx1': Job(
+        'job_phylotyper_beautify_stx1': models.Job(
             rq_job=mock_stx1,
             name='job_phylotyper_beautify_stx1',
             transitory=False,
@@ -160,7 +146,7 @@ def test_pipeline_model_phyotyping():
         )
     })
     p.jobs.update({
-        'job_phylotyper_beautify_stx2': Job(
+        'job_phylotyper_beautify_stx2': models.Job(
             rq_job=mock_stx2,
             name='job_phylotyper_beautify_stx2',
             transitory=False,
@@ -168,10 +154,10 @@ def test_pipeline_model_phyotyping():
             display=True
         )
     })
-    assert isinstance(p, Pipeline)
+    assert isinstance(p, models.Pipeline)
     assert isinstance(p.jobs, dict)
     for k in p.jobs:
-        assert isinstance(p.jobs[k], Job)
+        assert isinstance(p.jobs[k], models.Job)
 
     # Test Pipeline.complete(), should be True.
     assert p.complete()
@@ -182,10 +168,10 @@ def test_pipeline_model_phyotyping():
 
     # Add an AMR job and re-test.
     mock_eae  = MockRQJob(
-        result = test_phylotyper_model_direct(BEAUTIFY_EAE)
+        result = test_phylotyper_model_direct(constants.BEAUTIFY_EAE)
     )
     p.jobs.update({
-        'job_phylotyper_beautify_eae': Job(
+        'job_phylotyper_beautify_eae': models.Job(
             rq_job=mock_eae,
             name='job_phylotyper_beautify_stx2',
             transitory=False,
@@ -201,37 +187,37 @@ def test_pipeline_model_signature():
     """
     Function signatures should be identical if called on the same function.
     """
-    p1 = Pipeline(
+    p1 = models.Pipeline(
         func = spfy,
-        options = ARGS_DICT
+        options = constants.ARGS_DICT
     )
-    p2 = Pipeline(
+    p2 = models.Pipeline(
         func = spfy,
-        options = ARGS_DICT
+        options = constants.ARGS_DICT
     )
     r1 = p1.signature()
     r2 = p2.signature()
     # These are identical pipelines, should be equal.
     assert r1 == r2
 
-    p1 = Pipeline(
+    p1 = models.Pipeline(
         func = spfy,
-        options = ARGS_DICT
+        options = constants.ARGS_DICT
     )
-    p2 = Pipeline(
+    p2 = models.Pipeline(
         func = savvy,
-        options = ARGS_DICT
+        options = constants.ARGS_DICT
     )
     r1 = p1.signature()
     r2 = p2.signature()
     # These pipelines have different functions, should be different.
     assert r1 != r2
 
-    p1 = Pipeline(
+    p1 = models.Pipeline(
         func = spfy,
-        options = ARGS_DICT
+        options = constants.ARGS_DICT
     )
-    p2 = Pipeline(
+    p2 = models.Pipeline(
         func = spfy,
         options = {'cats':1}
     )

From c30dd50db0d8338af671c8aaa5b0d9903d2465c5 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Sun, 18 Feb 2018 01:13:10 -0500
Subject: [PATCH 044/122] FIX: isinstance also tests for issubclass

---
 app/middleware/models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/middleware/models.py b/app/middleware/models.py
index cb5d632b..f765a89e 100644
--- a/app/middleware/models.py
+++ b/app/middleware/models.py
@@ -22,7 +22,7 @@ def model_to_json(model):
     # Validate the model submitted before processing.
     model.validate()
     # Conversion.
-    if issubclass(model, models.Base)():
+    if isinstance(model, models.Base):
         return _convert_model(model)
     else:
         raise Exception('model_to_json() called for a model without a handler.')

From 892a7ebc99bb313c24f5abcdbe651392b69b19cf Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Sun, 18 Feb 2018 10:22:53 -0500
Subject: [PATCH 045/122] ADD: methods for handling multiple files

---
 app/middleware/models.py | 30 +++++++++++++++++++++++++++---
 app/modules/spfy.py      |  2 +-
 app/tests/test_models.py | 14 ++++++++++++++
 3 files changed, 42 insertions(+), 4 deletions(-)

diff --git a/app/middleware/models.py b/app/middleware/models.py
index f765a89e..25ac087f 100644
--- a/app/middleware/models.py
+++ b/app/middleware/models.py
@@ -1,4 +1,5 @@
 import sys
+import copy
 from hashlib import sha1
 from dis import dis
 from StringIO import StringIO
@@ -79,18 +80,41 @@ def __init__(self, jobs=None, files=None, func=None, options=None):
             files = []
         if not options:
             options = {}
-        self.jobs = {} # {'somename': instance of RQ.Job}
+        self.jobs = {} # {'somename': instance of RQ.Job} Only used when enqueing.
+        self.final_jobs = [] # Jobs for every file in the request.
+        self.cache = {} # For temporary storage of RQ.Jobs.
         self.sig = None # Signtaure isn't generated until necessary
         # TODO: incorporate below into the pipeline.
         self.files = []
         self.func = func # Additional attribute for storing pipeline function.
         self.options = options
 
+    def cache_jobs(self):
+        """
+        Copy current jobs to cache.
+        """
+        self.cache += [copy.deepcopy(self.jobs)]
+        self.jobs = {}
+
+    def merge_jobs(self):
+        """
+        
+        """
+        # If the jobs dictionary is not empty.
+        if self.jobs:
+            self.cache_jobs()
+        # Actual merge. Notice were converting to list.
+        self.final_jobs = [
+            item
+            for d in self.cache
+            for item in d
+        ]
+
     def complete(self):
         """
         Check if all jobs are completed
         """
-        for j in self.jobs.itervalues():
+        for j in self.final_jobs:
             # Type check.
             assert isinstance(j, Job)
             rq_job = j.rq_job
@@ -113,7 +137,7 @@ def to_json(self):
         """
         # Gather all the jobs that have finished and haven't failed.
         completed_jobs = [
-            j.rq_job for j in self.jobs.itervalues()
+            j.rq_job for j in self.final_jobs
             if j.display and j.rq_job.is_finished and not j.rq_job.is_failed
         ]
         # Merge the json lists together.
diff --git a/app/modules/spfy.py b/app/modules/spfy.py
index 9fa2c4fa..8fac574c 100644
--- a/app/modules/spfy.py
+++ b/app/modules/spfy.py
@@ -455,7 +455,7 @@ def blob_savvy(args_dict, pipeline):
                 )
             )
     else:
-        d.update(blob_savvy_enqueue(args_dict))
+        d.update(blob_savvy_enqueue(args_dict, pipeline))
 
     return d
 
diff --git a/app/tests/test_models.py b/app/tests/test_models.py
index d37aa0df..271fe565 100644
--- a/app/tests/test_models.py
+++ b/app/tests/test_models.py
@@ -98,6 +98,10 @@ def test_pipeline_model_subtyping():
     for k in p.jobs:
         assert isinstance(p.jobs[k], models.Job)
 
+    # Test Pipeline.cache_jobs()
+    p.cache_jobs()
+    # Test Pipeline.merge_jobs()
+    p.merge_jobs()
     # Test Pipeline.complete(), should be True.
     assert p.complete()
 
@@ -118,6 +122,9 @@ def test_pipeline_model_subtyping():
             display=True
         )
     })
+    p.merge_jobs()
+    # Test Pipeline.complete(), should be True.
+    assert p.complete()
     # Test Pipeline.to_json().
     json = p.to_json()
     assert isinstance(json, list)
@@ -159,6 +166,10 @@ def test_pipeline_model_phyotyping():
     for k in p.jobs:
         assert isinstance(p.jobs[k], models.Job)
 
+    # Test Pipeline.cache_jobs()
+    p.cache_jobs()
+    # Test Pipeline.merge_jobs()
+    p.merge_jobs()
     # Test Pipeline.complete(), should be True.
     assert p.complete()
 
@@ -179,6 +190,9 @@ def test_pipeline_model_phyotyping():
             display=True
         )
     })
+    p.merge_jobs()
+    # Test Pipeline.complete(), should be True.
+    assert p.complete()
     # Test Pipeline.to_json().
     json = p.to_json()
     assert isinstance(json, list)

From 784fbee9de16ec5340fde2963863045a4a119ea1 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Sun, 18 Feb 2018 11:22:05 -0500
Subject: [PATCH 046/122] FIX: cache should be a list

---
 app/middleware/models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/middleware/models.py b/app/middleware/models.py
index 25ac087f..1970b60f 100644
--- a/app/middleware/models.py
+++ b/app/middleware/models.py
@@ -82,7 +82,7 @@ def __init__(self, jobs=None, files=None, func=None, options=None):
             options = {}
         self.jobs = {} # {'somename': instance of RQ.Job} Only used when enqueing.
         self.final_jobs = [] # Jobs for every file in the request.
-        self.cache = {} # For temporary storage of RQ.Jobs.
+        self.cache = [] # For temporary storage of RQ.Jobs.
         self.sig = None # Signtaure isn't generated until necessary
         # TODO: incorporate below into the pipeline.
         self.files = []

From 421d409fd6053e128a80f0e7271493be5a4cfe69 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Sun, 18 Feb 2018 11:56:29 -0500
Subject: [PATCH 047/122] FIX: values

---
 app/middleware/models.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/app/middleware/models.py b/app/middleware/models.py
index 1970b60f..156648c4 100644
--- a/app/middleware/models.py
+++ b/app/middleware/models.py
@@ -105,9 +105,9 @@ def merge_jobs(self):
             self.cache_jobs()
         # Actual merge. Notice were converting to list.
         self.final_jobs = [
-            item
+            j
             for d in self.cache
-            for item in d
+            for j in d.values()
         ]
 
     def complete(self):

From 306727308dccbf1433196259f2544d7b7bdff81f Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Sun, 18 Feb 2018 12:36:25 -0500
Subject: [PATCH 048/122] ADD: try creating a signature on __init__

---
 app/middleware/models.py | 4 ++--
 app/routes/ra_posts.py   | 2 ++
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/app/middleware/models.py b/app/middleware/models.py
index 156648c4..b993d5bc 100644
--- a/app/middleware/models.py
+++ b/app/middleware/models.py
@@ -83,11 +83,11 @@ def __init__(self, jobs=None, files=None, func=None, options=None):
         self.jobs = {} # {'somename': instance of RQ.Job} Only used when enqueing.
         self.final_jobs = [] # Jobs for every file in the request.
         self.cache = [] # For temporary storage of RQ.Jobs.
-        self.sig = None # Signtaure isn't generated until necessary
-        # TODO: incorporate below into the pipeline.
+        self.sig = None
         self.files = []
         self.func = func # Additional attribute for storing pipeline function.
         self.options = options
+        self.signature() # Create & Store a signature for the pipeline.
 
     def cache_jobs(self):
         """
diff --git a/app/routes/ra_posts.py b/app/routes/ra_posts.py
index ab6d9f8f..107c98a6 100644
--- a/app/routes/ra_posts.py
+++ b/app/routes/ra_posts.py
@@ -261,8 +261,10 @@ def upload():
                     pipeline = pipeline
                 )
                 jobs_dict.update(jobs_enqueued)
+                pipeline.cache_jobs()
         # new in 4.2.0
         print 'upload(): all files enqueued, returning...'
+        pipeline.merge_jobs()
         if groupresults:
             return jsonify(handle_groupresults(jobs_dict))
         else:

From ca2948ccc831693c2664b224e17fea93c2b348ab Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Sun, 18 Feb 2018 13:05:57 -0500
Subject: [PATCH 049/122] ADD: more tests for signatures and method to store
 pipeline into Redis DB

---
 app/middleware/models.py | 26 ++++++++++++++++++++++++++
 app/routes/ra_posts.py   |  3 ++-
 app/tests/test_models.py |  8 ++++++++
 3 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/app/middleware/models.py b/app/middleware/models.py
index b993d5bc..8e62584f 100644
--- a/app/middleware/models.py
+++ b/app/middleware/models.py
@@ -1,5 +1,7 @@
 import sys
 import copy
+import config
+import redis
 from hashlib import sha1
 from dis import dis
 from StringIO import StringIO
@@ -193,3 +195,27 @@ def signature(self):
         sig = hx.hexdigest()
         self.sig = sig
         return sig
+
+    def store(self):
+        """
+        Stores the pipeline to Redis DB and creates a pipeline id for return.
+        :param pipeline: An instance of the models.Pipeline class.
+        :return: (dict): {"pipeline..." id: "Subtyping"}
+        """
+        pipeline_id = "pipeline{0}".format(self.sig)
+
+        # Start a Redis connection.
+        redis_url = config['REDIS_URL']
+        redis_connection = redis.from_url(redis_url)
+
+        # Store the pipeline instance.
+        redis_connection.set(pipeline_id, self)
+
+        # Create a similar structure to the old return
+        d = {}
+        d[pipeline_id] = {}
+        d[pipeline_id]['analysis'] = "Subtyping"
+
+        d[pipeline_id]['file'] = self.files
+        print '_store_pipeline(): finished'
+        return d
diff --git a/app/routes/ra_posts.py b/app/routes/ra_posts.py
index 107c98a6..7d132fee 100644
--- a/app/routes/ra_posts.py
+++ b/app/routes/ra_posts.py
@@ -266,7 +266,8 @@ def upload():
         print 'upload(): all files enqueued, returning...'
         pipeline.merge_jobs()
         if groupresults:
-            return jsonify(handle_groupresults(jobs_dict))
+            return jsonify(pipeline.store())
+            # return jsonify(handle_groupresults(jobs_dict))
         else:
             return jsonify(handle_singleton(jobs_dict))
     else:
diff --git a/app/tests/test_models.py b/app/tests/test_models.py
index 271fe565..77b18946 100644
--- a/app/tests/test_models.py
+++ b/app/tests/test_models.py
@@ -209,11 +209,19 @@ def test_pipeline_model_signature():
         func = spfy,
         options = constants.ARGS_DICT
     )
+    # Signatures should be generated on init.
+    assert p1.sig == p2.sig
+
+    # Call the signature method to re-generate.
     r1 = p1.signature()
     r2 = p2.signature()
     # These are identical pipelines, should be equal.
     assert r1 == r2
 
+    # Both methods of signature generation should be the same.
+    assert p1.sig == r1
+    assert p2.sig == r2
+
     p1 = models.Pipeline(
         func = spfy,
         options = constants.ARGS_DICT

From 8fe03e32ad5271e9f18eccec9fd5960756eed57c Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Sun, 18 Feb 2018 13:43:09 -0500
Subject: [PATCH 050/122] STOP: 1st draft of full circle

---
 app/middleware/models.py  |  5 +++--
 app/routes/ra_statuses.py | 24 ++++++++++++++++++++++++
 2 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/app/middleware/models.py b/app/middleware/models.py
index 8e62584f..2ee99dcc 100644
--- a/app/middleware/models.py
+++ b/app/middleware/models.py
@@ -2,6 +2,7 @@
 import copy
 import config
 import redis
+import cPickle as pickle
 from hashlib import sha1
 from dis import dis
 from StringIO import StringIO
@@ -198,7 +199,7 @@ def signature(self):
 
     def store(self):
         """
-        Stores the pipeline to Redis DB and creates a pipeline id for return.
+        Stores the pipeline (via Pickle) to Redis DB and creates a pipeline id for return.
         :param pipeline: An instance of the models.Pipeline class.
         :return: (dict): {"pipeline..." id: "Subtyping"}
         """
@@ -209,7 +210,7 @@ def store(self):
         redis_connection = redis.from_url(redis_url)
 
         # Store the pipeline instance.
-        redis_connection.set(pipeline_id, self)
+        redis_connection.set(pipeline_id, pickle.dumps(self))
 
         # Create a similar structure to the old return
         d = {}
diff --git a/app/routes/ra_statuses.py b/app/routes/ra_statuses.py
index 3d32cd6d..bf84dd2c 100644
--- a/app/routes/ra_statuses.py
+++ b/app/routes/ra_statuses.py
@@ -1,4 +1,5 @@
 import redis
+import cPickle as pickle
 from ast import literal_eval
 from flask import Blueprint, request, jsonify, current_app
 from routes.job_utils import fetch_job
@@ -60,6 +61,27 @@ def job_status_reactapp_grouped(job_id, redis_connection):
         # if you've gotten to this point, then all jobs are finished
         return jsonify(merge_job_results(jobs_dict, redis_connection))
 
+def _status_pipeline(pipeline_id, redis_connection):
+    """
+    Checks the status of a pipeline. Returns "pending", the exc_info if failed, or the result.
+    :param pipeline_id: 
+    :param redis_connection: 
+    :return: 
+    """
+    # Retrieve the models.Pipeline instance.
+    pipeline = pickle.loads(redis_connection.get(pipeline_id))
+    complete = pipeline.complete() # Normally bool, but str if failed.
+    if isinstance(complete, bool):
+        if complete:
+            # Everything finished successfully.
+            return pipeline.to_json()
+        else:
+            # Some job in the pipeline is still pending.
+            return jsonify("pending")
+    else:
+        # Something failed and we have an exc_info.
+        return jsonify(complete)
+
 @bp_ra_statuses.route('/api/v0/results/<job_id>')
 def job_status_reactapp(job_id):
     '''
@@ -72,6 +94,8 @@ def job_status_reactapp(job_id):
     # check if the job_id is of the new format and should be handled diff
     if job_id.startswith('blob'):
         return job_status_reactapp_grouped(job_id, redis_connection)
+    elif job_id.startswith('pipeline'):
+        return _status_pipeline(job_id, redis_connection)
     else:
         # old code
         job = fetch_job(job_id, redis_connection)

From 5cfd6cde9364a40ededef5d40ac7ef3748f9a20f Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Sun, 18 Feb 2018 14:29:14 -0500
Subject: [PATCH 051/122] FIX: access the rq_job

---
 app/modules/spfy.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/modules/spfy.py b/app/modules/spfy.py
index 8fac574c..8d8b7421 100644
--- a/app/modules/spfy.py
+++ b/app/modules/spfy.py
@@ -364,7 +364,7 @@ def phylotyper_pipeline(multiples, subtype):
             subtype,
             tsvfile,
             id_file=query_file + '_id.txt',
-            depends_on=pipeline.jobs['job_ectyper_datastruct_vf'])
+            depends_on=pipeline.jobs['job_ectyper_datastruct_vf'].rq_job)
         job_pt_dict = multiples.enqueue(
             phylotyper.to_dict, tsvfile, subtype, picklefile,
             depends_on=job_pt)

From 71f20df8643da5feb353309a8d6e1e0c3990221a Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Sun, 18 Feb 2018 14:43:41 -0500
Subject: [PATCH 052/122] DEBUG: try without the depcopy

---
 app/middleware/models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/middleware/models.py b/app/middleware/models.py
index 2ee99dcc..3f9e1392 100644
--- a/app/middleware/models.py
+++ b/app/middleware/models.py
@@ -96,7 +96,7 @@ def cache_jobs(self):
         """
         Copy current jobs to cache.
         """
-        self.cache += [copy.deepcopy(self.jobs)]
+        self.cache += [self.jobs]
         self.jobs = {}
 
     def merge_jobs(self):

From 538573bccfbb37b6f47f68d5459346457015e09a Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Sun, 18 Feb 2018 16:05:03 -0500
Subject: [PATCH 053/122] FIX: namespace for config

---
 app/middleware/models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/middleware/models.py b/app/middleware/models.py
index 3f9e1392..425fdb6d 100644
--- a/app/middleware/models.py
+++ b/app/middleware/models.py
@@ -206,7 +206,7 @@ def store(self):
         pipeline_id = "pipeline{0}".format(self.sig)
 
         # Start a Redis connection.
-        redis_url = config['REDIS_URL']
+        redis_url = config.REDIS_URL
         redis_connection = redis.from_url(redis_url)
 
         # Store the pipeline instance.

From c3f8c84570cde3aac3636928dbd9aa38d7f6cc66 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Sun, 18 Feb 2018 16:29:10 -0500
Subject: [PATCH 054/122] FIX: tao of pickle

---
 app/middleware/models.py  | 67 ++++++++++++++++++++++++---------------
 app/routes/ra_posts.py    |  4 +--
 app/routes/ra_statuses.py |  7 ++--
 3 files changed, 48 insertions(+), 30 deletions(-)

diff --git a/app/middleware/models.py b/app/middleware/models.py
index 425fdb6d..245d90b9 100644
--- a/app/middleware/models.py
+++ b/app/middleware/models.py
@@ -31,6 +31,46 @@ def model_to_json(model):
     else:
         raise Exception('model_to_json() called for a model without a handler.')
 
+def store(pipeline):
+    """
+    Stores the pipeline (via Pickle) to Redis DB and creates a pipeline id for return.
+    :param pipeline: An instance of the models.Pipeline class.
+    :return: (dict): {"pipeline..." id: "Subtyping"}
+    """
+    pipeline_id = "pipeline{0}".format(pipeline.sig)
+
+    # Start a Redis connection.
+    redis_url = config.REDIS_URL
+    redis_connection = redis.from_url(redis_url)
+
+    # Store the pipeline instance.
+    redis_connection.set(pipeline_id, pickle.dumps(pipeline))
+
+    # Create a similar structure to the old return
+    d = {}
+    d[pipeline_id] = {}
+    d[pipeline_id]['analysis'] = "Subtyping"
+
+    d[pipeline_id]['file'] = pipeline.files
+    print '_store_pipeline(): finished'
+    return d
+
+def load(pipeline_id):
+    """
+    Must load Pipeline instances with this function, as a pickle.loads() needs
+    access to the Pipeline class definition to correctly load it.
+    :param pipeline_id: 
+    :return: 
+    """
+    # Start a Redis connection.
+    redis_url = config.REDIS_URL
+    redis_connection = redis.from_url(redis_url)
+
+    # Get the pipeline instance.
+    raw = redis_connection.get(pipeline_id)
+    pipeline = pickle.loads(raw)
+    return pipeline
+
 
 class SubtypingRow(models.Base):
     analysis = fields.StringField(required=True)
@@ -58,6 +98,7 @@ class PhylotyperRow(models.Base):
 class PhylotyperResult(models.Base):
     rows = fields.ListField([PhylotyperRow], nullable=True)
 
+
 class Job():
     def __init__(self, rq_job, name="", transitory=True, backlog=True, display=False):
         """
@@ -101,7 +142,7 @@ def cache_jobs(self):
 
     def merge_jobs(self):
         """
-        
+
         """
         # If the jobs dictionary is not empty.
         if self.jobs:
@@ -196,27 +237,3 @@ def signature(self):
         sig = hx.hexdigest()
         self.sig = sig
         return sig
-
-    def store(self):
-        """
-        Stores the pipeline (via Pickle) to Redis DB and creates a pipeline id for return.
-        :param pipeline: An instance of the models.Pipeline class.
-        :return: (dict): {"pipeline..." id: "Subtyping"}
-        """
-        pipeline_id = "pipeline{0}".format(self.sig)
-
-        # Start a Redis connection.
-        redis_url = config.REDIS_URL
-        redis_connection = redis.from_url(redis_url)
-
-        # Store the pipeline instance.
-        redis_connection.set(pipeline_id, pickle.dumps(self))
-
-        # Create a similar structure to the old return
-        d = {}
-        d[pipeline_id] = {}
-        d[pipeline_id]['analysis'] = "Subtyping"
-
-        d[pipeline_id]['file'] = self.files
-        print '_store_pipeline(): finished'
-        return d
diff --git a/app/routes/ra_posts.py b/app/routes/ra_posts.py
index 7d132fee..241bbcc3 100644
--- a/app/routes/ra_posts.py
+++ b/app/routes/ra_posts.py
@@ -17,7 +17,7 @@
 from modules.gc import blob_gc_enqueue
 from modules.spfy import spfy
 from middleware.api import subtyping_dependencies
-from middleware.models import Pipeline
+from middleware.models import Pipeline, store
 
 bp_ra_posts = Blueprint('reactapp_posts', __name__)
 
@@ -266,7 +266,7 @@ def upload():
         print 'upload(): all files enqueued, returning...'
         pipeline.merge_jobs()
         if groupresults:
-            return jsonify(pipeline.store())
+            return jsonify(store(pipeline))
             # return jsonify(handle_groupresults(jobs_dict))
         else:
             return jsonify(handle_singleton(jobs_dict))
diff --git a/app/routes/ra_statuses.py b/app/routes/ra_statuses.py
index bf84dd2c..4998fa01 100644
--- a/app/routes/ra_statuses.py
+++ b/app/routes/ra_statuses.py
@@ -3,6 +3,7 @@
 from ast import literal_eval
 from flask import Blueprint, request, jsonify, current_app
 from routes.job_utils import fetch_job
+from middleware.models import load
 
 bp_ra_statuses = Blueprint('reactapp_statuses', __name__)
 
@@ -61,7 +62,7 @@ def job_status_reactapp_grouped(job_id, redis_connection):
         # if you've gotten to this point, then all jobs are finished
         return jsonify(merge_job_results(jobs_dict, redis_connection))
 
-def _status_pipeline(pipeline_id, redis_connection):
+def _status_pipeline(pipeline_id):
     """
     Checks the status of a pipeline. Returns "pending", the exc_info if failed, or the result.
     :param pipeline_id: 
@@ -69,7 +70,7 @@ def _status_pipeline(pipeline_id, redis_connection):
     :return: 
     """
     # Retrieve the models.Pipeline instance.
-    pipeline = pickle.loads(redis_connection.get(pipeline_id))
+    pipeline = load(pipeline_id)
     complete = pipeline.complete() # Normally bool, but str if failed.
     if isinstance(complete, bool):
         if complete:
@@ -95,7 +96,7 @@ def job_status_reactapp(job_id):
     if job_id.startswith('blob'):
         return job_status_reactapp_grouped(job_id, redis_connection)
     elif job_id.startswith('pipeline'):
-        return _status_pipeline(job_id, redis_connection)
+        return _status_pipeline(job_id)
     else:
         # old code
         job = fetch_job(job_id, redis_connection)

From c3b2a0858fdfd9ca42f0fa3a1d889cb130bd62c3 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Sun, 18 Feb 2018 17:13:50 -0500
Subject: [PATCH 055/122] FIX?: use dill instead of cPickle

---
 app/middleware/models.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/app/middleware/models.py b/app/middleware/models.py
index 245d90b9..18cccc3b 100644
--- a/app/middleware/models.py
+++ b/app/middleware/models.py
@@ -2,7 +2,7 @@
 import copy
 import config
 import redis
-import cPickle as pickle
+import dill as pickle
 from hashlib import sha1
 from dis import dis
 from StringIO import StringIO
@@ -59,8 +59,8 @@ def load(pipeline_id):
     """
     Must load Pipeline instances with this function, as a pickle.loads() needs
     access to the Pipeline class definition to correctly load it.
-    :param pipeline_id: 
-    :return: 
+    :param pipeline_id:
+    :return:
     """
     # Start a Redis connection.
     redis_url = config.REDIS_URL

From efc97c4a90fa8e54e6294270e5d5ced45c0a7068 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Sun, 18 Feb 2018 19:14:23 -0500
Subject: [PATCH 056/122] ADD: tests for pickling/unpickling Pipeline instances
 with dill

---
 app/tests/test_models.py | 32 ++++++++++++++++++++++++--------
 1 file changed, 24 insertions(+), 8 deletions(-)

diff --git a/app/tests/test_models.py b/app/tests/test_models.py
index 77b18946..6605f118 100644
--- a/app/tests/test_models.py
+++ b/app/tests/test_models.py
@@ -1,3 +1,4 @@
+import dill
 from middleware import models
 from modules.spfy import spfy
 from scripts.savvy import savvy
@@ -59,19 +60,16 @@ def test_phylotyper_model_direct(l=constants.BEAUTIFY_STX1):
     # Return for incorporation into later tests.
     return phylotyper_result
 
-def test_pipeline_model_subtyping():
-    """
-    Test the Pipeline model itself for subtyping via ECTyper and RGI.
-    """
+def _create_example_pipeline():
     p = models.Pipeline(
-        func = spfy,
-        options = constants.ARGS_DICT
+        func=spfy,
+        options=constants.ARGS_DICT
     )
     mock_serotype = MockRQJob(
-        result = test_subtyping_model_direct(constants.BEAUTIFY_SEROTYPE)
+        result=test_subtyping_model_direct(constants.BEAUTIFY_SEROTYPE)
     )
     mock_vf = MockRQJob(
-        result = test_subtyping_model_direct(constants.BEAUTIFY_VF)
+        result=test_subtyping_model_direct(constants.BEAUTIFY_VF)
     )
     # Mimicks a Serotype result that will be converted to json.
     p.jobs.update({
@@ -93,6 +91,15 @@ def test_pipeline_model_subtyping():
             display=True
         )
     })
+    return p
+
+def test_pipeline_model_subtyping(p=None):
+    """
+    Test the Pipeline model itself for subtyping via ECTyper and RGI.
+    """
+    if not p:
+        p = _create_example_pipeline()
+
     assert isinstance(p, models.Pipeline)
     assert isinstance(p.jobs, dict)
     for k in p.jobs:
@@ -129,6 +136,15 @@ def test_pipeline_model_subtyping():
     json = p.to_json()
     assert isinstance(json, list)
 
+def test_pipeline_model_dill():
+    p = _create_example_pipeline()
+    # Test dumping the Pipeline into a str.
+    buffer = dill.dumps(p)
+    # Test loading the Pipeline from a str.
+    loaded_pipeline = dill.loads(buffer)
+    # Run the same tests on the loaded pipeline.
+    test_pipeline_model_subtyping(p=loaded_pipeline)
+
 def test_pipeline_model_phyotyping():
     """
     Test the Pipeline model itself for subtyping via Phylotyper.

From d8d3e8be538a70041341b45e54af006efa443be0 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Mon, 19 Feb 2018 12:15:35 -0500
Subject: [PATCH 057/122] FIX: pipelines being stored in Redis & pipeline ids
 being generated. fixed call for model-to-graph

---
 app/middleware/graphers/datastruct_savvy.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/app/middleware/graphers/datastruct_savvy.py b/app/middleware/graphers/datastruct_savvy.py
index 5a515e7e..9c6069e2 100644
--- a/app/middleware/graphers/datastruct_savvy.py
+++ b/app/middleware/graphers/datastruct_savvy.py
@@ -8,7 +8,7 @@
 # working with Serotype, Antimicrobial Resistance, & Virulence Factor data
 # structures
 
-def _convert_subtyping(graph, model, uriIsolate):
+def _graph_subtyping(graph, model, uriIsolate):
     # Convert the model to a graph.
     struct = model.to_struct()
     rows_list = struct['rows']
@@ -30,7 +30,7 @@ def model_to_graph(graph, model, uriIsolate):
     model.validate()
     # Conversion.
     if isinstance(model, SubtypingResult):
-        return _convert_subtyping(model)
+        return _graph_subtyping(graph, model, uriIsolate)
     else:
         raise Exception('model_to_graph() called for a model without a handler.')
 

From e01193cdec03d642e2e2d0ecd53f61fbd510b149 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Mon, 19 Feb 2018 14:05:39 -0500
Subject: [PATCH 058/122] ADD: convert VF to model and return

---
 app/middleware/display/beautify.py  | 14 ++++++++------
 app/middleware/modellers.py         |  4 ++--
 app/modules/ectyper/call_ectyper.py |  3 ++-
 app/modules/spfy.py                 |  4 ++--
 4 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/app/middleware/display/beautify.py b/app/middleware/display/beautify.py
index f51fb232..69f18a29 100644
--- a/app/middleware/display/beautify.py
+++ b/app/middleware/display/beautify.py
@@ -5,6 +5,7 @@
 from middleware.display.find_widest import check_alleles
 from middleware.graphers.turtle_utils import actual_filename
 from middleware.models import SubtypingResult, model_to_json
+from middleware.modellers import model_vf
 
 # logging
 log_file = initialize_logging()
@@ -122,18 +123,19 @@ def beautify(pickled_result, args_dict=None):
     result = pickle.load(open(pickled_result, 'rb'))
     if isinstance(result, dict):
         gene_dict = result
-        # this converts our dictionary structure into json and adds metadata (filename, etc.)
+        # Convert the old ECTYper's dictionary structure into list and adds metadata (filename, etc.).
         json_r =  json_return(args_dict, gene_dict)
-        log.debug('First parse into json_r: ' + str(json_r))
-        # if looking for only serotype, skip this step
+        # For VF/AMR, find widest gene matched. Strip shorter matches.
         if args_dict['options']['vf'] or args_dict['options']['amr']:
             json_r = check_alleles(json_r)
-        log.debug('After checking alleles json_r: ' + str(json_r))
-        # check if there is an analysis module that has failed in the result
+        # Check if there is an analysis module that has failed in the result.
         if has_failed(json_r):
+            # If failed, return.
             return handle_failed(json_r, args_dict)
         else:
-            return json_r
+            # Everything worked, cast result into a model.
+            model = model_vf(json_r)
+            return model_to_json(model)
     elif isinstance(result, SubtypingResult):
         return model_to_json(result)
     else:
diff --git a/app/middleware/modellers.py b/app/middleware/modellers.py
index 2ea7a1e0..06aa2b8e 100644
--- a/app/middleware/modellers.py
+++ b/app/middleware/modellers.py
@@ -33,7 +33,7 @@ def model_serotype(pi, pl, output_file):
     )
     return subtyping_result
 
-def model_vf(json_r, analysis="Virulence Factors"):
+def model_vf(json_r):
     """
     Casts the output from display.beautify into a SubtypingResult object.
     """
@@ -41,7 +41,7 @@ def model_vf(json_r, analysis="Virulence Factors"):
     assert isinstance(json_r, list)
     subtyping_list = [
         SubtypingRow(
-            analysis=analysis,
+            analysis=item('analysis'),
             contigid=item['contigid'],
             filename=item['filename'],
             hitcutoff=item['hitcutoff'],
diff --git a/app/modules/ectyper/call_ectyper.py b/app/modules/ectyper/call_ectyper.py
index a7bb2aab..92511343 100644
--- a/app/modules/ectyper/call_ectyper.py
+++ b/app/modules/ectyper/call_ectyper.py
@@ -58,6 +58,7 @@ def call_ectyper_vf(args_dict):
         # we are calling tools_controller on only one file, so grab that dict
         key, ectyper_dict = ectyper_dict.popitem()
 
+        # TODO: convert this to a VF model.
         # Path for the pickle dump.
         p = filepath + '_ectyper_vf.p'
         pickle.dump(ectyper_dict,open(p,'wb'))
@@ -91,7 +92,7 @@ def call_ectyper_serotype(args_dict):
             output_file=output_file
         )
         # Path for the pickle dump.
-        p = genome_file + '_ectyper_serotype.p'
+        p = genome_file + '_ectyper_serotype.model'
         pickle.dump(subtyping_result,open(p,'wb'))
         return p
     else:
diff --git a/app/modules/spfy.py b/app/modules/spfy.py
index 8d8b7421..10dd3e13 100644
--- a/app/modules/spfy.py
+++ b/app/modules/spfy.py
@@ -169,7 +169,7 @@ def _ectyper_pipeline_serotype(singles, multiples, query_file, single_dict, pipe
         datastruct_savvy,
         query_file,
         query_file + '_id.txt',
-        query_file + '_ectyper_serotype.p',
+        query_file + '_ectyper_serotype.model',
         depends_on=job_ectyper_serotype,
         result_ttl=ttl_value)
     d['job_ectyper_datastruct_serotype'] = job_ectyper_datastruct_serotype
@@ -188,7 +188,7 @@ def _ectyper_pipeline_serotype(singles, multiples, query_file, single_dict, pipe
         # serotype, and we're not in bulk uploading.
         job_ectyper_beautify_serotype = multiples.enqueue(
             beautify,
-            pickled_result = query_file + '_ectyper_serotype.p',
+            pickled_result = query_file + '_ectyper_serotype.model',
             depends_on=job_ectyper_serotype,
             result_ttl=ttl_value
         )

From db9ecf7416330e9081662f0c54f7de216a5535f0 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Mon, 19 Feb 2018 14:33:42 -0500
Subject: [PATCH 059/122] FIX: shouldnt record backlog items into the pipeline
 at all

---
 app/middleware/models.py  |   2 +-
 app/modules/spfy.py       | 126 ++++++++++++++++++++------------------
 app/routes/ra_posts.py    |   1 -
 app/routes/ra_statuses.py |   9 +--
 4 files changed, 74 insertions(+), 64 deletions(-)

diff --git a/app/middleware/models.py b/app/middleware/models.py
index 18cccc3b..9e43e996 100644
--- a/app/middleware/models.py
+++ b/app/middleware/models.py
@@ -149,7 +149,7 @@ def merge_jobs(self):
             self.cache_jobs()
         # Actual merge. Notice were converting to list.
         self.final_jobs = [
-            j
+            j # Where j is our custom Job class, not an rq_job
             for d in self.cache
             for j in d.values()
         ]
diff --git a/app/modules/spfy.py b/app/modules/spfy.py
index 10dd3e13..cfcd4c07 100644
--- a/app/modules/spfy.py
+++ b/app/modules/spfy.py
@@ -71,15 +71,17 @@ def _ectyper_pipeline_vf(singles, multiples, query_file, single_dict, pipeline=N
         depends_on=job_id)
     # TODO: this is double, switch everything to pipeline once tested
     d['job_ectyper_vf'] = job_ectyper_vf
-    pipeline.jobs.update({
-        'job_ectyper_vf': Job(
-            rq_job=job_ectyper_vf,
-            name='job_ectyper_vf',
-            transitory=True,
-            backlog=False,
-            display=False
-        )
-    })
+    # pipeline is only passed if not running in backlog.
+    if pipeline:
+        pipeline.jobs.update({
+            'job_ectyper_vf': Job(
+                rq_job=job_ectyper_vf,
+                name='job_ectyper_vf',
+                transitory=True,
+                backlog=False,
+                display=False
+            )
+        })
 
     # If bulk uploading is set, we return the datastruct as the end task
     # to poll for job completion, therefore must set ttl of -1.
@@ -97,15 +99,17 @@ def _ectyper_pipeline_vf(singles, multiples, query_file, single_dict, pipeline=N
         depends_on=job_ectyper_vf,
         result_ttl=ttl_value)
     d['job_ectyper_datastruct_vf'] = job_ectyper_datastruct_vf
-    pipeline.jobs.update({
-        'job_ectyper_datastruct_vf': Job(
-            rq_job=job_ectyper_datastruct_vf,
-            name='job_ectyper_datastruct_vf',
-            transitory=True,
-            backlog=False,
-            display=False
-        )
-    })
+    # pipeline is only passed if not running in backlog.
+    if pipeline:
+        pipeline.jobs.update({
+            'job_ectyper_datastruct_vf': Job(
+                rq_job=job_ectyper_datastruct_vf,
+                name='job_ectyper_datastruct_vf',
+                transitory=True,
+                backlog=False,
+                display=False
+            )
+        })
 
     if not single_dict['options']['bulk']:
         # Only bother parsing into json if user has requested either vf or
@@ -118,15 +122,17 @@ def _ectyper_pipeline_vf(singles, multiples, query_file, single_dict, pipeline=N
             result_ttl=ttl_value
         )
         d['job_ectyper_beautify_vf'] = job_ectyper_beautify_vf
-        pipeline.jobs.update({
-            'job_ectyper_beautify_vf': Job(
-                rq_job=job_ectyper_beautify_vf,
-                name='job_ectyper_beautify_vf',
-                transitory=False,
-                backlog=False,
-                display=True
-            )
-        })
+        # pipeline is only passed if not running in backlog.
+        if pipeline:
+            pipeline.jobs.update({
+                'job_ectyper_beautify_vf': Job(
+                    rq_job=job_ectyper_beautify_vf,
+                    name='job_ectyper_beautify_vf',
+                    transitory=False,
+                    backlog=False,
+                    display=True
+                )
+            })
     return d
 
 def _ectyper_pipeline_serotype(singles, multiples, query_file, single_dict, pipeline=None):
@@ -147,15 +153,17 @@ def _ectyper_pipeline_serotype(singles, multiples, query_file, single_dict, pipe
         single_dict_vf,
         depends_on=job_id)
     d['job_ectyper_serotype'] = job_ectyper_serotype
-    pipeline.jobs.update({
-        'job_ectyper_serotype': Job(
-            rq_job=job_ectyper_serotype,
-            name='job_ectyper_serotype',
-            transitory=True,
-            backlog=False,
-            display=False
-        )
-    })
+    # pipeline is only passed if not running in backlog.
+    if pipeline:
+        pipeline.jobs.update({
+            'job_ectyper_serotype': Job(
+                rq_job=job_ectyper_serotype,
+                name='job_ectyper_serotype',
+                transitory=True,
+                backlog=False,
+                display=False
+            )
+        })
 
     # If bulk uploading is set, we return the datastruct as the end task
     # to poll for job completion, therefore must set ttl of -1.
@@ -173,15 +181,17 @@ def _ectyper_pipeline_serotype(singles, multiples, query_file, single_dict, pipe
         depends_on=job_ectyper_serotype,
         result_ttl=ttl_value)
     d['job_ectyper_datastruct_serotype'] = job_ectyper_datastruct_serotype
-    pipeline.jobs.update({
-        'job_ectyper_datastruct_serotype': Job(
-            rq_job=job_ectyper_datastruct_serotype,
-            name='job_ectyper_datastruct_serotype',
-            transitory=True,
-            backlog=False,
-            display=False
-        )
-    })
+    # pipeline is only passed if not running in backlog.
+    if pipeline:
+        pipeline.jobs.update({
+            'job_ectyper_datastruct_serotype': Job(
+                rq_job=job_ectyper_datastruct_serotype,
+                name='job_ectyper_datastruct_serotype',
+                transitory=True,
+                backlog=False,
+                display=False
+            )
+        })
 
     if not single_dict['options']['bulk']:
         # Only bother parsing into json if user has requested either vf or
@@ -193,15 +203,17 @@ def _ectyper_pipeline_serotype(singles, multiples, query_file, single_dict, pipe
             result_ttl=ttl_value
         )
         d['job_ectyper_beautify_serotype'] = job_ectyper_beautify_serotype
-        pipeline.jobs.update({
-            'job_ectyper_beautify_serotype':  Job(
-                rq_job=job_ectyper_beautify_serotype,
-                name='job_ectyper_beautify_serotype',
-                transitory=False,
-                backlog=False,
-                display=True
-            )
-        })
+        # pipeline is only passed if not running in backlog.
+        if pipeline:
+            pipeline.jobs.update({
+                'job_ectyper_beautify_serotype':  Job(
+                    rq_job=job_ectyper_beautify_serotype,
+                    name='job_ectyper_beautify_serotype',
+                    transitory=False,
+                    backlog=False,
+                    display=True
+                )
+            })
     return d
 
 def blob_savvy_enqueue(single_dict, pipeline):
@@ -273,8 +285,7 @@ def blob_savvy_enqueue(single_dict, pipeline):
             backlog_singles_q,
             backlog_multiples_q,
             query_file,
-            backlog_d,
-            pipeline=pipeline
+            backlog_d
         )
 
     # Serotype
@@ -307,8 +318,7 @@ def blob_savvy_enqueue(single_dict, pipeline):
            backlog_singles_q,
            backlog_multiples_q,
            query_file,
-           backlog_d,
-           pipeline=pipeline
+           backlog_d
         )
     # END ECTYPER PIPELINE
 
diff --git a/app/routes/ra_posts.py b/app/routes/ra_posts.py
index 241bbcc3..28e15d07 100644
--- a/app/routes/ra_posts.py
+++ b/app/routes/ra_posts.py
@@ -262,7 +262,6 @@ def upload():
                 )
                 jobs_dict.update(jobs_enqueued)
                 pipeline.cache_jobs()
-        # new in 4.2.0
         print 'upload(): all files enqueued, returning...'
         pipeline.merge_jobs()
         if groupresults:
diff --git a/app/routes/ra_statuses.py b/app/routes/ra_statuses.py
index 4998fa01..bd95ad1b 100644
--- a/app/routes/ra_statuses.py
+++ b/app/routes/ra_statuses.py
@@ -3,7 +3,7 @@
 from ast import literal_eval
 from flask import Blueprint, request, jsonify, current_app
 from routes.job_utils import fetch_job
-from middleware.models import load
+from middleware.models import load, Pipeline
 
 bp_ra_statuses = Blueprint('reactapp_statuses', __name__)
 
@@ -65,12 +65,13 @@ def job_status_reactapp_grouped(job_id, redis_connection):
 def _status_pipeline(pipeline_id):
     """
     Checks the status of a pipeline. Returns "pending", the exc_info if failed, or the result.
-    :param pipeline_id: 
-    :param redis_connection: 
-    :return: 
+    :param pipeline_id:
+    :param redis_connection:
+    :return:
     """
     # Retrieve the models.Pipeline instance.
     pipeline = load(pipeline_id)
+    assert isinstance(pipeline, Pipeline)
     complete = pipeline.complete() # Normally bool, but str if failed.
     if isinstance(complete, bool):
         if complete:

From 9956eb4c9261d546a8ded9d4a140775b547856e1 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Mon, 19 Feb 2018 14:57:58 -0500
Subject: [PATCH 060/122] CHANGE: pass a backlog flag instead of different
 queues

---
 app/modules/spfy.py | 152 +++++++++++++++++++++-----------------------
 1 file changed, 74 insertions(+), 78 deletions(-)

diff --git a/app/modules/spfy.py b/app/modules/spfy.py
index cfcd4c07..e1fb7cd0 100644
--- a/app/modules/spfy.py
+++ b/app/modules/spfy.py
@@ -51,7 +51,7 @@
     backlog_multiples_q = Queue(
         'backlog_multiples', connection=redis_conn, default_timeout=config.DEFAULT_TIMEOUT)
 
-def _ectyper_pipeline_vf(singles, multiples, query_file, single_dict, pipeline=None):
+def _ectyper_pipeline_vf(query_file, single_dict, pipeline=None, backlog=False):
     """
     Enqueue all the jobs required for VF.
     """
@@ -59,6 +59,12 @@ def _ectyper_pipeline_vf(singles, multiples, query_file, single_dict, pipeline=N
     d = {}
     # Alias.
     job_id = pipeline.jobs['job_id'].rq_job
+    if not backlog:
+        singles = singles_q
+        multiples = multiples_q
+    else:
+        singles = backlog_singles_q
+        multiples = backlog_multiples_q
 
     # Create a copy of the arguments dictionary and disable Serotype.
     # This copy is passed to the old ECTyper.
@@ -71,17 +77,15 @@ def _ectyper_pipeline_vf(singles, multiples, query_file, single_dict, pipeline=N
         depends_on=job_id)
     # TODO: this is double, switch everything to pipeline once tested
     d['job_ectyper_vf'] = job_ectyper_vf
-    # pipeline is only passed if not running in backlog.
-    if pipeline:
-        pipeline.jobs.update({
-            'job_ectyper_vf': Job(
-                rq_job=job_ectyper_vf,
-                name='job_ectyper_vf',
-                transitory=True,
-                backlog=False,
-                display=False
-            )
-        })
+    pipeline.jobs.update({
+        'job_ectyper_vf': Job(
+            rq_job=job_ectyper_vf,
+            name='job_ectyper_vf',
+            transitory=True,
+            backlog=backlog,
+            display=False
+        )
+    })
 
     # If bulk uploading is set, we return the datastruct as the end task
     # to poll for job completion, therefore must set ttl of -1.
@@ -99,17 +103,15 @@ def _ectyper_pipeline_vf(singles, multiples, query_file, single_dict, pipeline=N
         depends_on=job_ectyper_vf,
         result_ttl=ttl_value)
     d['job_ectyper_datastruct_vf'] = job_ectyper_datastruct_vf
-    # pipeline is only passed if not running in backlog.
-    if pipeline:
-        pipeline.jobs.update({
-            'job_ectyper_datastruct_vf': Job(
-                rq_job=job_ectyper_datastruct_vf,
-                name='job_ectyper_datastruct_vf',
-                transitory=True,
-                backlog=False,
-                display=False
-            )
-        })
+    pipeline.jobs.update({
+        'job_ectyper_datastruct_vf': Job(
+            rq_job=job_ectyper_datastruct_vf,
+            name='job_ectyper_datastruct_vf',
+            transitory=True,
+            backlog=backlog,
+            display=False
+        )
+    })
 
     if not single_dict['options']['bulk']:
         # Only bother parsing into json if user has requested either vf or
@@ -122,20 +124,18 @@ def _ectyper_pipeline_vf(singles, multiples, query_file, single_dict, pipeline=N
             result_ttl=ttl_value
         )
         d['job_ectyper_beautify_vf'] = job_ectyper_beautify_vf
-        # pipeline is only passed if not running in backlog.
-        if pipeline:
-            pipeline.jobs.update({
-                'job_ectyper_beautify_vf': Job(
-                    rq_job=job_ectyper_beautify_vf,
-                    name='job_ectyper_beautify_vf',
-                    transitory=False,
-                    backlog=False,
-                    display=True
-                )
-            })
+        pipeline.jobs.update({
+            'job_ectyper_beautify_vf': Job(
+                rq_job=job_ectyper_beautify_vf,
+                name='job_ectyper_beautify_vf',
+                transitory=False,
+                backlog=backlog,
+                display=True
+            )
+        })
     return d
 
-def _ectyper_pipeline_serotype(singles, multiples, query_file, single_dict, pipeline=None):
+def _ectyper_pipeline_serotype(query_file, single_dict, pipeline=None, backlog=False):
     """
     Enqueue all the jobs required for VF.
     """
@@ -143,6 +143,12 @@ def _ectyper_pipeline_serotype(singles, multiples, query_file, single_dict, pipe
     d = {}
     # Alias.
     job_id = pipeline.jobs['job_id'].rq_job
+    if not backlog:
+        singles = singles_q
+        multiples = multiples_q
+    else:
+        singles = backlog_singles_q
+        multiples = backlog_multiples_q
 
     # Create a copy of the arguments dictionary and disable Serotype.
     # This copy is passed to the old ECTyper.
@@ -153,17 +159,15 @@ def _ectyper_pipeline_serotype(singles, multiples, query_file, single_dict, pipe
         single_dict_vf,
         depends_on=job_id)
     d['job_ectyper_serotype'] = job_ectyper_serotype
-    # pipeline is only passed if not running in backlog.
-    if pipeline:
-        pipeline.jobs.update({
-            'job_ectyper_serotype': Job(
-                rq_job=job_ectyper_serotype,
-                name='job_ectyper_serotype',
-                transitory=True,
-                backlog=False,
-                display=False
-            )
-        })
+    pipeline.jobs.update({
+        'job_ectyper_serotype': Job(
+            rq_job=job_ectyper_serotype,
+            name='job_ectyper_serotype',
+            transitory=True,
+            backlog=backlog,
+            display=False
+        )
+    })
 
     # If bulk uploading is set, we return the datastruct as the end task
     # to poll for job completion, therefore must set ttl of -1.
@@ -181,17 +185,15 @@ def _ectyper_pipeline_serotype(singles, multiples, query_file, single_dict, pipe
         depends_on=job_ectyper_serotype,
         result_ttl=ttl_value)
     d['job_ectyper_datastruct_serotype'] = job_ectyper_datastruct_serotype
-    # pipeline is only passed if not running in backlog.
-    if pipeline:
-        pipeline.jobs.update({
-            'job_ectyper_datastruct_serotype': Job(
-                rq_job=job_ectyper_datastruct_serotype,
-                name='job_ectyper_datastruct_serotype',
-                transitory=True,
-                backlog=False,
-                display=False
-            )
-        })
+    pipeline.jobs.update({
+        'job_ectyper_datastruct_serotype': Job(
+            rq_job=job_ectyper_datastruct_serotype,
+            name='job_ectyper_datastruct_serotype',
+            transitory=True,
+            backlog=backlog,
+            display=False
+        )
+    })
 
     if not single_dict['options']['bulk']:
         # Only bother parsing into json if user has requested either vf or
@@ -203,17 +205,15 @@ def _ectyper_pipeline_serotype(singles, multiples, query_file, single_dict, pipe
             result_ttl=ttl_value
         )
         d['job_ectyper_beautify_serotype'] = job_ectyper_beautify_serotype
-        # pipeline is only passed if not running in backlog.
-        if pipeline:
-            pipeline.jobs.update({
-                'job_ectyper_beautify_serotype':  Job(
-                    rq_job=job_ectyper_beautify_serotype,
-                    name='job_ectyper_beautify_serotype',
-                    transitory=False,
-                    backlog=False,
-                    display=True
-                )
-            })
+        pipeline.jobs.update({
+            'job_ectyper_beautify_serotype':  Job(
+                rq_job=job_ectyper_beautify_serotype,
+                name='job_ectyper_beautify_serotype',
+                transitory=False,
+                backlog=backlog,
+                display=True
+            )
+        })
     return d
 
 def blob_savvy_enqueue(single_dict, pipeline):
@@ -257,8 +257,6 @@ def blob_savvy_enqueue(single_dict, pipeline):
     # VF
     if single_dict['options']['vf']:
         ectyper_vf_jobs = _ectyper_pipeline_vf(
-            singles_q,
-            multiples_q,
             query_file,
             single_dict,
             pipeline=pipeline
@@ -282,17 +280,15 @@ def blob_savvy_enqueue(single_dict, pipeline):
         backlog_d['options']['serotype'] = False
         # Note: we use different queues.
         _ectyper_pipeline_vf(
-            backlog_singles_q,
-            backlog_multiples_q,
             query_file,
-            backlog_d
+            backlog_d,
+            pipeline=pipeline,
+            backlog=True
         )
 
     # Serotype
     if single_dict['options']['serotype']:
         ectyper_serotype_jobs = _ectyper_pipeline_serotype(
-            singles_q,
-            multiples_q,
             query_file,
             single_dict,
             pipeline=pipeline
@@ -315,10 +311,10 @@ def blob_savvy_enqueue(single_dict, pipeline):
         backlog_d['options']['vf'] = False
         backlog_d['options']['serotype'] = True
         _ectyper_pipeline_serotype(
-           backlog_singles_q,
-           backlog_multiples_q,
            query_file,
-           backlog_d
+           backlog_d,
+           pipeline=pipeline,
+           backlog=True
         )
     # END ECTYPER PIPELINE
 

From 9f0c91173c40330e6721b6df13de822f4c6a052f Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Mon, 19 Feb 2018 15:15:59 -0500
Subject: [PATCH 061/122] DEBUG: check why to_json / model_to_json(model) is
 being sent a list for the model

---
 app/middleware/models.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/app/middleware/models.py b/app/middleware/models.py
index 9e43e996..5b6fe877 100644
--- a/app/middleware/models.py
+++ b/app/middleware/models.py
@@ -37,6 +37,7 @@ def store(pipeline):
     :param pipeline: An instance of the models.Pipeline class.
     :return: (dict): {"pipeline..." id: "Subtyping"}
     """
+    assert isinstance(pipeline, Pipeline)
     pipeline_id = "pipeline{0}".format(pipeline.sig)
 
     # Start a Redis connection.
@@ -69,6 +70,7 @@ def load(pipeline_id):
     # Get the pipeline instance.
     raw = redis_connection.get(pipeline_id)
     pipeline = pickle.loads(raw)
+    assert isinstance(pipeline, Pipeline)
     return pipeline
 
 
@@ -188,6 +190,10 @@ def to_json(self):
         l = []
         for rq_job in completed_jobs:
             model = rq_job.result
+            try:
+                assert isinstance(model, models.Base)
+            except:
+                raise Exception("to_json() called with result of type {0} and info {1}".format(type(model), str(model)))
             list_json = model_to_json(model)
             l += list_json
         return l

From 53ef008b30dca717fed378103e8e7eca580d34fc Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Mon, 19 Feb 2018 15:28:26 -0500
Subject: [PATCH 062/122] DEBUG: looks like sometime gives an empty list

---
 app/middleware/models.py | 2 ++
 app/routes/ra_posts.py   | 4 +++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/app/middleware/models.py b/app/middleware/models.py
index 5b6fe877..cfabd765 100644
--- a/app/middleware/models.py
+++ b/app/middleware/models.py
@@ -160,6 +160,7 @@ def complete(self):
         """
         Check if all jobs are completed
         """
+        print("complete() checking status for: {0}".format(str(self.final_jobs)))
         for j in self.final_jobs:
             # Type check.
             assert isinstance(j, Job)
@@ -186,6 +187,7 @@ def to_json(self):
             j.rq_job for j in self.final_jobs
             if j.display and j.rq_job.is_finished and not j.rq_job.is_failed
         ]
+        print("to_json() completed_jobs: {0}".format(str(completed_jobs)))
         # Merge the json lists together.
         l = []
         for rq_job in completed_jobs:
diff --git a/app/routes/ra_posts.py b/app/routes/ra_posts.py
index 28e15d07..b9ecbff7 100644
--- a/app/routes/ra_posts.py
+++ b/app/routes/ra_posts.py
@@ -264,8 +264,10 @@ def upload():
                 pipeline.cache_jobs()
         print 'upload(): all files enqueued, returning...'
         pipeline.merge_jobs()
+        print("upload() pipeline jobs: {0}".formate(str(pipeline.all_jobs)))
+        pipeline_id = store(pipeline)
         if groupresults:
-            return jsonify(store(pipeline))
+            return jsonify(pipeline_id)
             # return jsonify(handle_groupresults(jobs_dict))
         else:
             return jsonify(handle_singleton(jobs_dict))

From 283bc3fe01c13fcce0cc14a9c456ef231fb4db95 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Mon, 19 Feb 2018 15:30:43 -0500
Subject: [PATCH 063/122] DEBUG: check whats going on with the complete
 function as well

---
 app/middleware/models.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/app/middleware/models.py b/app/middleware/models.py
index cfabd765..701188f4 100644
--- a/app/middleware/models.py
+++ b/app/middleware/models.py
@@ -167,12 +167,15 @@ def complete(self):
             rq_job = j.rq_job
             if j.backlog:
                 # Some backlog job, we don't care (though Sentry will catch it).
+                print("complete(): job {0} is in backlog.".format(j.name))
                 continue
             elif rq_job.is_failed:
                 # If the job failed, return the error.
+                print("complete(): job {0} is failed with exc_info {1}.".format(j.name, rq_job.exc_info))
                 return rq_job.exc_info
             elif not rq_job.is_finished:
                 # One of the jobs hasn't finished.
+                print("complete(): job {0} is finished.".format(j.name))
                 return False
         return True
 

From 8b62e29d4d9c3e516dd3f1556e4b47da6736caf5 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Mon, 19 Feb 2018 15:43:56 -0500
Subject: [PATCH 064/122] FIX: typo

---
 app/routes/ra_posts.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/routes/ra_posts.py b/app/routes/ra_posts.py
index b9ecbff7..ab9124cc 100644
--- a/app/routes/ra_posts.py
+++ b/app/routes/ra_posts.py
@@ -264,7 +264,7 @@ def upload():
                 pipeline.cache_jobs()
         print 'upload(): all files enqueued, returning...'
         pipeline.merge_jobs()
-        print("upload() pipeline jobs: {0}".formate(str(pipeline.all_jobs)))
+        print("upload() pipeline jobs: {0}".format(str(pipeline.all_jobs)))
         pipeline_id = store(pipeline)
         if groupresults:
             return jsonify(pipeline_id)

From 04d7ffab6d9d7d900ca8551466a46756cb9eb78f Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Mon, 19 Feb 2018 15:48:45 -0500
Subject: [PATCH 065/122] FIX: typo

---
 app/routes/ra_posts.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/routes/ra_posts.py b/app/routes/ra_posts.py
index ab9124cc..1f1f922e 100644
--- a/app/routes/ra_posts.py
+++ b/app/routes/ra_posts.py
@@ -264,7 +264,7 @@ def upload():
                 pipeline.cache_jobs()
         print 'upload(): all files enqueued, returning...'
         pipeline.merge_jobs()
-        print("upload() pipeline jobs: {0}".format(str(pipeline.all_jobs)))
+        print("upload() pipeline jobs: {0}".format(str(pipeline.final_jobs)))
         pipeline_id = store(pipeline)
         if groupresults:
             return jsonify(pipeline_id)

From 55af49cfe8a2ac3b6e97b2fc3dd68c3b66680cc0 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Mon, 19 Feb 2018 16:39:42 -0500
Subject: [PATCH 066/122] ADD: some checks that beautify() is returning a list
 not dict

---
 app/middleware/modellers.py | 1 +
 app/tests/test_beautify.py  | 6 +++++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/app/middleware/modellers.py b/app/middleware/modellers.py
index 06aa2b8e..fd10a2c8 100644
--- a/app/middleware/modellers.py
+++ b/app/middleware/modellers.py
@@ -39,6 +39,7 @@ def model_vf(json_r):
     """
     # Type check.
     assert isinstance(json_r, list)
+    print("model_vf() called with type {0} containing {1}".format(type(json_r), str(json_r)))
     subtyping_list = [
         SubtypingRow(
             analysis=item('analysis'),
diff --git a/app/tests/test_beautify.py b/app/tests/test_beautify.py
index 6b97a814..aa73caf1 100644
--- a/app/tests/test_beautify.py
+++ b/app/tests/test_beautify.py
@@ -13,7 +13,9 @@ def test_beautify_vf_serotype():
     ## test vf & serotype json return
     single_dict = dict(ARGS_DICT)
     single_dict.update({'i': vf_serotype_gene_dict})
-    assert len(beautify(vf_serotype_gene_dict, single_dict)) == len(BEAUTIFY_VF_SEROTYPE)
+    r = beautify(vf_serotype_gene_dict, single_dict)
+    assert isinstance(r, list)
+    assert len(r) == len(BEAUTIFY_VF_SEROTYPE)
 
 def test_beautify_serotype_only():
     ## test serotype only json return
@@ -25,6 +27,7 @@ def test_beautify_serotype_only():
     single_dict.update({'options':{'vf': False, 'amr': False, 'serotype': True}})
     # beautify is what is actually called by the RQ worker & returned to the user
     r = beautify(vf_serotype_gene_dict, single_dict)
+    assert isinstance(r, list)
     assert len(r) == 1
 
 def test_beautify_json_r_serotype_only():
@@ -49,6 +52,7 @@ def test_beautify_amr_only():
     # this mimicks user selection of serotype only
     single_dict.update({'options':{'vf': False, 'amr': True, 'serotype': False}})
     r = beautify(amr_gene_dict, single_dict)
+    assert isinstance(r, list)
     assert len(r) > 1
 
 def test_beautify_json_r_amr_only():

From 360c02b6bfab32cc6a3de87d0640d3cd4566a435 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Mon, 19 Feb 2018 17:22:44 -0500
Subject: [PATCH 067/122] DEBUG: keep same return for beautify() so we can see
 where tests fail

---
 app/middleware/display/beautify.py | 35 +++++++++++++++++-------------
 1 file changed, 20 insertions(+), 15 deletions(-)

diff --git a/app/middleware/display/beautify.py b/app/middleware/display/beautify.py
index 69f18a29..ab356beb 100644
--- a/app/middleware/display/beautify.py
+++ b/app/middleware/display/beautify.py
@@ -111,7 +111,7 @@ def handle_failed(json_r, args_dict):
     return ret
 
 # TODO: convert this to models-only.
-def beautify(pickled_result, args_dict=None):
+def beautify(gene_dict, args_dict=None):
     '''
     Converts a given 'spit' datum (a dictionary with our results from rgi/ectyper) to a json form used by the frontend. This result is to be stored in Redis by the calling RQ Worker.
     :param args_dict: The arguments supplied by the user. In the case of spfy web-app, this is used to determine which analysis options were set.
@@ -119,23 +119,28 @@ def beautify(pickled_result, args_dict=None):
     :param gene_dict: optionally, if using this to test via cli, you can supply the actual dictionary object.
     :return: json representation of the results, as required by the front-end.
     '''
+    # Convert the old ECTYper's dictionary structure into list and adds metadata (filename, etc.).
+    json_r =  json_return(args_dict, gene_dict)
+    # For VF/AMR, find widest gene matched. Strip shorter matches.
+    if args_dict['options']['vf'] or args_dict['options']['amr']:
+        json_r = check_alleles(json_r)
+    # Check if there is an analysis module that has failed in the result.
+    if has_failed(json_r):
+        # If failed, return.
+        return handle_failed(json_r, args_dict)
+    else:
+        return json_r
+        # Everything worked, cast result into a model.
+        model = model_vf(json_r)
+        return model_to_json(model)
 
+def display_subtyping(pickled_result, args_dict=None):
     result = pickle.load(open(pickled_result, 'rb'))
     if isinstance(result, dict):
-        gene_dict = result
-        # Convert the old ECTYper's dictionary structure into list and adds metadata (filename, etc.).
-        json_r =  json_return(args_dict, gene_dict)
-        # For VF/AMR, find widest gene matched. Strip shorter matches.
-        if args_dict['options']['vf'] or args_dict['options']['amr']:
-            json_r = check_alleles(json_r)
-        # Check if there is an analysis module that has failed in the result.
-        if has_failed(json_r):
-            # If failed, return.
-            return handle_failed(json_r, args_dict)
-        else:
-            # Everything worked, cast result into a model.
-            model = model_vf(json_r)
-            return model_to_json(model)
+        list_return = beautify(result, args_dict)
+        assert isinstance(list_return, list)
+        model = model_vf(json_r)
+        return model_to_json(model)
     elif isinstance(result, SubtypingResult):
         return model_to_json(result)
     else:

From 6c8386aeab9e59d0bf13824736b1ec98d3f7cc2e Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Mon, 19 Feb 2018 17:55:20 -0500
Subject: [PATCH 068/122] DEBUG: let beautify load

---
 app/middleware/display/beautify.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/app/middleware/display/beautify.py b/app/middleware/display/beautify.py
index ab356beb..d0a6a776 100644
--- a/app/middleware/display/beautify.py
+++ b/app/middleware/display/beautify.py
@@ -12,7 +12,7 @@
 log = logging.getLogger(__name__)
 
 
-def json_return(args_dict, gene_dict):
+def json_return(gene_dict, args_dict):
     """
     This converts the gene dict into a json format for return to the front end
     """
@@ -119,8 +119,10 @@ def beautify(gene_dict, args_dict=None):
     :param gene_dict: optionally, if using this to test via cli, you can supply the actual dictionary object.
     :return: json representation of the results, as required by the front-end.
     '''
+    if isinstance(gene_dict, str): # For the tests.
+        gene_dict = pickle.load(open(gene_dict, 'rb'))
     # Convert the old ECTYper's dictionary structure into list and adds metadata (filename, etc.).
-    json_r =  json_return(args_dict, gene_dict)
+    json_r =  json_return(gene_dict, args_dict)
     # For VF/AMR, find widest gene matched. Strip shorter matches.
     if args_dict['options']['vf'] or args_dict['options']['amr']:
         json_r = check_alleles(json_r)

From 5d61f695afe4d229e60318817bdaee60f2e753cc Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Mon, 19 Feb 2018 18:31:00 -0500
Subject: [PATCH 069/122] FIX: tests call named params

---
 app/tests/test_beautify.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/app/tests/test_beautify.py b/app/tests/test_beautify.py
index aa73caf1..3fbab9ed 100644
--- a/app/tests/test_beautify.py
+++ b/app/tests/test_beautify.py
@@ -40,7 +40,7 @@ def test_beautify_json_r_serotype_only():
     gene_dict = pickle.load(open(vf_serotype_gene_dict, 'rb'))
     assert type(gene_dict) == dict
     assert len(gene_dict.keys()) == 2
-    r = json_return(single_dict, gene_dict)
+    r = json_return(gene_dict=gene_dict, args_dict=single_dict)
     assert len(r) == 1
 
     failed = has_failed(r)
@@ -64,7 +64,7 @@ def test_beautify_json_r_amr_only():
     assert type(gene_dict) == dict
     assert len(gene_dict.keys()) == 1
     assert 'Antimicrobial Resistance' in gene_dict.keys()
-    r = json_return(single_dict, gene_dict)
+    r = json_return(gene_dict=gene_dict, args_dict=single_dict)
     assert len(r) > 1
 
     ## test some pandas stuff on the json_r

From 31a9c2bf3b4bb3398d10d8a023a851ad1df08023 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Mon, 19 Feb 2018 21:38:23 -0500
Subject: [PATCH 070/122] CHANGE: tests passing, try centralizing the
 unpickling

---
 app/middleware/display/beautify.py          | 10 +++++-----
 app/middleware/graphers/datastruct_savvy.py |  4 ++--
 app/middleware/models.py                    |  9 +++++++++
 app/modules/spfy.py                         | 13 +++++--------
 4 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/app/middleware/display/beautify.py b/app/middleware/display/beautify.py
index d0a6a776..f439fde9 100644
--- a/app/middleware/display/beautify.py
+++ b/app/middleware/display/beautify.py
@@ -4,7 +4,7 @@
 from modules.loggingFunctions import initialize_logging
 from middleware.display.find_widest import check_alleles
 from middleware.graphers.turtle_utils import actual_filename
-from middleware.models import SubtypingResult, model_to_json
+from middleware.models import SubtypingResult, model_to_json, unpickle
 from middleware.modellers import model_vf
 
 # logging
@@ -133,15 +133,15 @@ def beautify(gene_dict, args_dict=None):
     else:
         return json_r
         # Everything worked, cast result into a model.
-        model = model_vf(json_r)
-        return model_to_json(model)
+        # model = model_vf(json_r)
+        # return model_to_json(model)
 
 def display_subtyping(pickled_result, args_dict=None):
-    result = pickle.load(open(pickled_result, 'rb'))
+    result = unpickle(pickled_result)
     if isinstance(result, dict):
         list_return = beautify(result, args_dict)
         assert isinstance(list_return, list)
-        model = model_vf(json_r)
+        model = model_vf(list_return)
         return model_to_json(model)
     elif isinstance(result, SubtypingResult):
         return model_to_json(result)
diff --git a/app/middleware/graphers/datastruct_savvy.py b/app/middleware/graphers/datastruct_savvy.py
index 9c6069e2..43b729ee 100644
--- a/app/middleware/graphers/datastruct_savvy.py
+++ b/app/middleware/graphers/datastruct_savvy.py
@@ -4,7 +4,7 @@
 from middleware.graphers.turtle_grapher import generate_graph
 from middleware.blazegraph.upload_graph import queue_upload
 from modules.PanPredic.pan_utils import contig_name_parse
-from middleware.models import SubtypingResult
+from middleware.models import SubtypingResult, unpickle
 # working with Serotype, Antimicrobial Resistance, & Virulence Factor data
 # structures
 
@@ -196,7 +196,7 @@ def generate_datastruct(query_file, id_file, pickled_dictionary):
     uriIsolate = gu(':spfy' + str(spfyid))
 
     # Unpickle.
-    results = pickle.load(open(pickled_dictionary, 'rb'))
+    results = unpickle(pickled_dictionary)
     # Check if we have a model or a dictionary.
     if isinstance(results, dict):
         # graphing functions
diff --git a/app/middleware/models.py b/app/middleware/models.py
index 701188f4..cc01d2e4 100644
--- a/app/middleware/models.py
+++ b/app/middleware/models.py
@@ -73,6 +73,15 @@ def load(pipeline_id):
     assert isinstance(pipeline, Pipeline)
     return pipeline
 
+def unpickle(pickled_file):
+    """
+    Define a function for unpickling. Should address issues with unpickling custom classes.
+    :param pickled_file: 
+    :return: 
+    """
+    unpickled = pickle.load(open(pickled_file, 'rb'))
+    assert isinstance(unpickled, (models.Base, Pipeline, dict, list))
+    return unpickled
 
 class SubtypingRow(models.Base):
     analysis = fields.StringField(required=True)
diff --git a/app/modules/spfy.py b/app/modules/spfy.py
index e1fb7cd0..a1a9b6fd 100644
--- a/app/modules/spfy.py
+++ b/app/modules/spfy.py
@@ -21,7 +21,7 @@
 from modules.ectyper.call_ectyper import call_ectyper_vf, call_ectyper_serotype
 from modules.amr.amr import amr
 from modules.amr.amr_to_dict import amr_to_dict
-from middleware.display.beautify import beautify
+from middleware.display.beautify import beautify, display_subtyping
 from middleware.graphers.datastruct_savvy import datastruct_savvy
 from middleware.graphers.turtle_grapher import turtle_grapher
 from middleware.graphers.turtle_utils import actual_filename
@@ -117,7 +117,7 @@ def _ectyper_pipeline_vf(query_file, single_dict, pipeline=None, backlog=False):
         # Only bother parsing into json if user has requested either vf or
         # serotype, and we're not in bulk uploading.
         job_ectyper_beautify_vf = multiples.enqueue(
-            beautify,
+            display_subtyping,
             query_file + '_ectyper_vf.p',
             single_dict,
             depends_on=job_ectyper_vf,
@@ -150,13 +150,10 @@ def _ectyper_pipeline_serotype(query_file, single_dict, pipeline=None, backlog=F
         singles = backlog_singles_q
         multiples = backlog_multiples_q
 
-    # Create a copy of the arguments dictionary and disable Serotype.
-    # This copy is passed to the old ECTyper.
-    single_dict_vf = copy.deepcopy(single_dict)
     # Enqueue the new ECTyper
     job_ectyper_serotype = multiples.enqueue(
         call_ectyper_serotype,
-        single_dict_vf,
+        single_dict,
         depends_on=job_id)
     d['job_ectyper_serotype'] = job_ectyper_serotype
     pipeline.jobs.update({
@@ -199,8 +196,8 @@ def _ectyper_pipeline_serotype(query_file, single_dict, pipeline=None, backlog=F
         # Only bother parsing into json if user has requested either vf or
         # serotype, and we're not in bulk uploading.
         job_ectyper_beautify_serotype = multiples.enqueue(
-            beautify,
-            pickled_result = query_file + '_ectyper_serotype.model',
+            display_subtyping,
+            query_file + '_ectyper_serotype.model',
             depends_on=job_ectyper_serotype,
             result_ttl=ttl_value
         )

From 7c72205015d069958b661315fb83c19a327d8636 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Mon, 19 Feb 2018 22:14:13 -0500
Subject: [PATCH 071/122] DEBUG: "json_r" is being seen as a dict for some
 reason

---
 app/middleware/display/beautify.py | 2 +-
 app/middleware/modellers.py        | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/app/middleware/display/beautify.py b/app/middleware/display/beautify.py
index f439fde9..e9c57798 100644
--- a/app/middleware/display/beautify.py
+++ b/app/middleware/display/beautify.py
@@ -139,7 +139,7 @@ def beautify(gene_dict, args_dict=None):
 def display_subtyping(pickled_result, args_dict=None):
     result = unpickle(pickled_result)
     if isinstance(result, dict):
-        list_return = beautify(result, args_dict)
+        list_return = beautify(gene_dict=result, args_dict=args_dict)
         assert isinstance(list_return, list)
         model = model_vf(list_return)
         return model_to_json(model)
diff --git a/app/middleware/modellers.py b/app/middleware/modellers.py
index fd10a2c8..04cfb3ed 100644
--- a/app/middleware/modellers.py
+++ b/app/middleware/modellers.py
@@ -33,13 +33,13 @@ def model_serotype(pi, pl, output_file):
     )
     return subtyping_result
 
-def model_vf(json_r):
+def model_vf(lst):
     """
     Casts the output from display.beautify into a SubtypingResult object.
     """
     # Type check.
-    assert isinstance(json_r, list)
-    print("model_vf() called with type {0} containing {1}".format(type(json_r), str(json_r)))
+    assert isinstance(lst, list)
+    print("model_vf() called with type {0} containing {1}".format(type(lst), str(lst)))
     subtyping_list = [
         SubtypingRow(
             analysis=item('analysis'),
@@ -51,7 +51,7 @@ def model_vf(json_r):
             hitstart=item['hitstart'],
             hitstop=item['hitstop']
         )
-    for item in json_r]
+    for item in lst]
     # Convert the list of rows into a SubtypingResult model.
     subtyping_result = SubtypingResult(
         rows = subtyping_list

From 9b4a37f4e6627494a3616bd2295219f1bc368017 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Mon, 19 Feb 2018 22:36:53 -0500
Subject: [PATCH 072/122] DEBUG: log of name of the job that causes to_json()
 to be called with an rq_job.result containing an empty list

---
 app/middleware/models.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/app/middleware/models.py b/app/middleware/models.py
index cc01d2e4..9656ddc4 100644
--- a/app/middleware/models.py
+++ b/app/middleware/models.py
@@ -196,18 +196,19 @@ def to_json(self):
         """
         # Gather all the jobs that have finished and haven't failed.
         completed_jobs = [
-            j.rq_job for j in self.final_jobs
+            j for j in self.final_jobs
             if j.display and j.rq_job.is_finished and not j.rq_job.is_failed
         ]
         print("to_json() completed_jobs: {0}".format(str(completed_jobs)))
         # Merge the json lists together.
         l = []
-        for rq_job in completed_jobs:
+        for j in completed_jobs:
+            rq_job = j.rq_job
             model = rq_job.result
             try:
                 assert isinstance(model, models.Base)
             except:
-                raise Exception("to_json() called with result of type {0} and info {1}".format(type(model), str(model)))
+                raise Exception("to_json() called for job {0}  with result of type {1} and info {2}".format(j.name, type(model), str(model)))
             list_json = model_to_json(model)
             l += list_json
         return l

From f3801d9d50605f53075900b1a70a027ed2bd68f2 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Mon, 19 Feb 2018 23:04:54 -0500
Subject: [PATCH 073/122] STOP: looks like I was also looking in the wrong
 place, its the call_ectyper_serotype that returns a SubtypingResult(rows=[]),
 I wonder if its the call or that jsonmodels classes treat the attributes as
 instances(and cant be pickled)

---
 app/middleware/models.py            | 4 ++++
 app/modules/ectyper/call_ectyper.py | 3 ++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/app/middleware/models.py b/app/middleware/models.py
index 9656ddc4..8ae26ad2 100644
--- a/app/middleware/models.py
+++ b/app/middleware/models.py
@@ -83,6 +83,9 @@ def unpickle(pickled_file):
     assert isinstance(unpickled, (models.Base, Pipeline, dict, list))
     return unpickled
 
+def dump(obj, path):
+    pickle.dump(obj, open(path, 'wb'))
+
 class SubtypingRow(models.Base):
     analysis = fields.StringField(required=True)
     contigid = fields.StringField(required=True)
@@ -206,6 +209,7 @@ def to_json(self):
             rq_job = j.rq_job
             model = rq_job.result
             try:
+                # TODO: This is not correct as while the new ECTYper call does return a model, the display_subtyping() call that the return job is associated with will already convert the result to a list and return it.
                 assert isinstance(model, models.Base)
             except:
                 raise Exception("to_json() called for job {0}  with result of type {1} and info {2}".format(j.name, type(model), str(model)))
diff --git a/app/modules/ectyper/call_ectyper.py b/app/modules/ectyper/call_ectyper.py
index 92511343..df904715 100644
--- a/app/modules/ectyper/call_ectyper.py
+++ b/app/modules/ectyper/call_ectyper.py
@@ -9,6 +9,7 @@
 from os.path import basename
 from modules.loggingFunctions import initialize_logging
 from middleware.modellers import model_serotype
+from middleware.models import dump
 
 log_file = initialize_logging()
 log = logging.getLogger(__name__)
@@ -93,7 +94,7 @@ def call_ectyper_serotype(args_dict):
         )
         # Path for the pickle dump.
         p = genome_file + '_ectyper_serotype.model'
-        pickle.dump(subtyping_result,open(p,'wb'))
+        dump(subtyping_result, p)
         return p
     else:
         raise Exception('ECTyper Serotyping failed for' + genome_file)

From a5d0e67e58acf8f78cbc24e2d1610e67687a840c Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Mon, 19 Feb 2018 23:12:41 -0500
Subject: [PATCH 074/122] ADD: have tests also check we dont have empties

---
 app/middleware/models.py  | 10 +++++-----
 app/tests/test_modules.py |  7 ++++++-
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/app/middleware/models.py b/app/middleware/models.py
index 8ae26ad2..c66e5fe4 100644
--- a/app/middleware/models.py
+++ b/app/middleware/models.py
@@ -2,7 +2,7 @@
 import copy
 import config
 import redis
-import dill as pickle
+import dill
 from hashlib import sha1
 from dis import dis
 from StringIO import StringIO
@@ -45,7 +45,7 @@ def store(pipeline):
     redis_connection = redis.from_url(redis_url)
 
     # Store the pipeline instance.
-    redis_connection.set(pipeline_id, pickle.dumps(pipeline))
+    redis_connection.set(pipeline_id, dill.dumps(pipeline))
 
     # Create a similar structure to the old return
     d = {}
@@ -69,7 +69,7 @@ def load(pipeline_id):
 
     # Get the pipeline instance.
     raw = redis_connection.get(pipeline_id)
-    pipeline = pickle.loads(raw)
+    pipeline = dill.loads(raw)
     assert isinstance(pipeline, Pipeline)
     return pipeline
 
@@ -79,12 +79,12 @@ def unpickle(pickled_file):
     :param pickled_file: 
     :return: 
     """
-    unpickled = pickle.load(open(pickled_file, 'rb'))
+    unpickled = dill.load(open(pickled_file, 'rb'))
     assert isinstance(unpickled, (models.Base, Pipeline, dict, list))
     return unpickled
 
 def dump(obj, path):
-    pickle.dump(obj, open(path, 'wb'))
+    dill.dump(obj, open(path, 'wb'))
 
 class SubtypingRow(models.Base):
     analysis = fields.StringField(required=True)
diff --git a/app/tests/test_modules.py b/app/tests/test_modules.py
index af9541c7..cb690d9f 100644
--- a/app/tests/test_modules.py
+++ b/app/tests/test_modules.py
@@ -15,6 +15,7 @@
 from middleware.display.beautify import beautify, model_to_json
 from middleware.graphers.datastruct_savvy import datastruct_savvy
 from middleware.graphers.turtle_grapher import turtle_grapher
+from middleware.models import unpickle
 
 from tests.constants import ARGS_DICT
 
@@ -88,14 +89,18 @@ def test_ectyper_serotype():
         single_dict = dict(ARGS_DICT)
         single_dict.update({'i':ecoli_genome})
         pickled_serotype_model = call_ectyper_serotype(single_dict)
-        ectyper_serotype_model = pickle.load(open(pickled_serotype_model,'rb'))
+        ectyper_serotype_model = unpickle(pickled_serotype_model)
         # Validate (throws error if invalidate).
         ectyper_serotype_model.validate()
+        # Check that the return rows is not some random empty list.
+        assert ectyper_serotype_model.rows
 
         # Check the conversion for the front-end.
         json_r = model_to_json(ectyper_serotype_model)
         # This is not strictly json; more like a list than a dict structure.
         assert isinstance(json_r, list)
+        # Check that this isn't empty.
+        assert json_r
 
 def test_amr():
         ecoli_genome = GENOMES_LIST_ECOLI[0]

From 3d103cc0e1b93348e2ce8a4b7dab99d45c19166a Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Tue, 20 Feb 2018 11:13:43 -0500
Subject: [PATCH 075/122] ADD: tests agasint call_ectyper_serotype for
 pickle/not pickle

---
 app/modules/ectyper/call_ectyper.py | 13 +++++----
 app/tests/test_modules.py           | 44 ++++++++++++++++++++---------
 2 files changed, 39 insertions(+), 18 deletions(-)

diff --git a/app/modules/ectyper/call_ectyper.py b/app/modules/ectyper/call_ectyper.py
index df904715..416e882c 100644
--- a/app/modules/ectyper/call_ectyper.py
+++ b/app/modules/ectyper/call_ectyper.py
@@ -66,7 +66,7 @@ def call_ectyper_vf(args_dict):
 
     return p
 
-def call_ectyper_serotype(args_dict):
+def call_ectyper_serotype(args_dict, pickle=True):
     """Use the new version of ECTyper at `master` for serotyping.
     """
     genome_file = args_dict['i']
@@ -92,9 +92,12 @@ def call_ectyper_serotype(args_dict):
             pl=pl,
             output_file=output_file
         )
-        # Path for the pickle dump.
-        p = genome_file + '_ectyper_serotype.model'
-        dump(subtyping_result, p)
-        return p
+        if pickle:
+            # Path for the pickle dump.
+            p = genome_file + '_ectyper_serotype.model'
+            dump(subtyping_result, p)
+            return p
+        else:
+            return subtyping_result
     else:
         raise Exception('ECTyper Serotyping failed for' + genome_file)
diff --git a/app/tests/test_modules.py b/app/tests/test_modules.py
index cb690d9f..927f41c1 100644
--- a/app/tests/test_modules.py
+++ b/app/tests/test_modules.py
@@ -76,7 +76,19 @@ def test_ectyper_vf():
         json_return = beautify(pickled_ectyper_dict, single_dict)
         assert type(json_return) == list
 
-def test_ectyper_serotype():
+def _validate_model(model):
+    # Validate (throws error if invalidate).
+    model.validate()
+    # Check that the return rows is not some random empty list.
+    assert model.rows
+    # Check the conversion for the front-end.
+    r = model_to_json(model)
+    # This is not really json; more like a list than a dict structure.
+    assert isinstance(r, list)
+    # Check that this isn't empty.
+    assert r
+
+def test_ectyper_serotype_direct():
     """Check the ECTyper from `master` which only performs serotyping.
     Installed in the conda environment.
     """
@@ -85,22 +97,28 @@ def test_ectyper_serotype():
         ret_code = subprocess.call(['ectyper', '-i', ecoli_genome])
         assert ret_code == 0
 
-        # Check the actual call from Spfy's code.
+def test_ectyper_serotype_call_nopickle():
+    """
+    Check the actual call from Spfy's code.
+    """
+    for ecoli_genome in GENOMES_LIST_ECOLI:
+        single_dict = dict(ARGS_DICT)
+        single_dict.update({'i':ecoli_genome})
+        # Have the call return the model without pickling.
+        serotype_model = call_ectyper_serotype(single_dict, pickle=False)
+        _validate_model(serotype_model)
+
+def test_ectyper_serotype_call_pickle():
+    """
+    Check the actual call from Spfy's code.
+    """
+    for ecoli_genome in GENOMES_LIST_ECOLI:
         single_dict = dict(ARGS_DICT)
         single_dict.update({'i':ecoli_genome})
+        # Pickle the model, and return the path to the file.
         pickled_serotype_model = call_ectyper_serotype(single_dict)
         ectyper_serotype_model = unpickle(pickled_serotype_model)
-        # Validate (throws error if invalidate).
-        ectyper_serotype_model.validate()
-        # Check that the return rows is not some random empty list.
-        assert ectyper_serotype_model.rows
-
-        # Check the conversion for the front-end.
-        json_r = model_to_json(ectyper_serotype_model)
-        # This is not strictly json; more like a list than a dict structure.
-        assert isinstance(json_r, list)
-        # Check that this isn't empty.
-        assert json_r
+        _validate_model(pickled_serotype_model)
 
 def test_amr():
         ecoli_genome = GENOMES_LIST_ECOLI[0]

From 1edb480356a213ecbb26849dc86f9d8e101dad55 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Tue, 20 Feb 2018 12:17:28 -0500
Subject: [PATCH 076/122] CHANGE: not sure if jsonmodels will let me do this

---
 app/middleware/models.py | 44 +++++++++++++++++++++++-----------------
 1 file changed, 25 insertions(+), 19 deletions(-)

diff --git a/app/middleware/models.py b/app/middleware/models.py
index c66e5fe4..2d33556d 100644
--- a/app/middleware/models.py
+++ b/app/middleware/models.py
@@ -76,8 +76,8 @@ def load(pipeline_id):
 def unpickle(pickled_file):
     """
     Define a function for unpickling. Should address issues with unpickling custom classes.
-    :param pickled_file: 
-    :return: 
+    :param pickled_file:
+    :return:
     """
     unpickled = dill.load(open(pickled_file, 'rb'))
     assert isinstance(unpickled, (models.Base, Pipeline, dict, list))
@@ -87,30 +87,36 @@ def dump(obj, path):
     dill.dump(obj, open(path, 'wb'))
 
 class SubtypingRow(models.Base):
-    analysis = fields.StringField(required=True)
-    contigid = fields.StringField(required=True)
-    filename = fields.StringField(required=True)
-    hitcutoff = fields.StringField(nullable=True)
-    hitname = fields.StringField(required=True)
-    hitorientation = fields.StringField(nullable=True)
-    hitstart = fields.StringField(nullable=True)
-    hitstop = fields.StringField(nullable=True)
+    def __init__(self, analysis="", contigid="", filename="", hitcutoff="", hitname="", hitorientation="", hitstart="",hitstop=""):
+        self.analysis = analysis
+        self.contigid = contigid
+        self.filename = filename
+        self.hitcutoff = hitcutoff
+        self.hitname = hitname
+        self.hitorientation = hitorientation
+        self.hitstart = hitstart
+        self.hitstop = hitstop
 
 
 class SubtypingResult(models.Base):
-    rows = fields.ListField([SubtypingRow], nullable=True)
+    def __init__(self, rows=None):
+        if not rows:
+            rows = []
+        self.rows = rows
 
 class PhylotyperRow(models.Base):
-    contig = fields.StringField(nullable=True)
-    genome = fields.StringField()
-    probability = fields.StringField(nullable=True) # actually float
-    start = fields.StringField(nullable=True) # actually int
-    stop = fields.StringField(nullable=True) # actually int
-    subtype = fields.StringField()
-    subtype_gene = fields.StringField(nullable=True)
+    def __init__(self):
+        self.contig = fields.StringField(nullable=True)
+        self.genome = fields.StringField()
+        self.probability = fields.StringField(nullable=True) # actually float
+        self.start = fields.StringField(nullable=True) # actually int
+        self.stop = fields.StringField(nullable=True) # actually int
+        self.subtype = fields.StringField()
+        self.subtype_gene = fields.StringField(nullable=True)
 
 class PhylotyperResult(models.Base):
-    rows = fields.ListField([PhylotyperRow], nullable=True)
+    def __init__(self):
+        self.rows = fields.ListField([PhylotyperRow], nullable=True)
 
 
 class Job():

From 5b33d63f251b25de4d30c710dd10683b0163b04a Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Tue, 20 Feb 2018 13:13:04 -0500
Subject: [PATCH 077/122] FIX: one of the tests

---
 app/tests/test_modules.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/tests/test_modules.py b/app/tests/test_modules.py
index 927f41c1..cf5d2cf4 100644
--- a/app/tests/test_modules.py
+++ b/app/tests/test_modules.py
@@ -118,7 +118,7 @@ def test_ectyper_serotype_call_pickle():
         # Pickle the model, and return the path to the file.
         pickled_serotype_model = call_ectyper_serotype(single_dict)
         ectyper_serotype_model = unpickle(pickled_serotype_model)
-        _validate_model(pickled_serotype_model)
+        _validate_model(ectyper_serotype_model)
 
 def test_amr():
         ecoli_genome = GENOMES_LIST_ECOLI[0]

From 4b13e49d929f57137156fda3ea6754f8a5c161db Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Tue, 20 Feb 2018 16:25:37 -0500
Subject: [PATCH 078/122] DEBUG: im guessing the to_struct() method from
 jsonmodels no longer works

---
 app/middleware/models.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/app/middleware/models.py b/app/middleware/models.py
index 2d33556d..ce1efa45 100644
--- a/app/middleware/models.py
+++ b/app/middleware/models.py
@@ -12,6 +12,8 @@
 def _convert_model(model):
     # Convert the model to a generic JSON structure.
     struct = model.to_struct()
+    # Check that struct isn't empty.
+    assert struct
     if 'rows' in struct:
         # This is not strictly json; more like a list than a dict structure.
         rows_list = struct['rows']

From c26710619c817890eda60aa6fa0fad93800b6fb2 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Wed, 7 Mar 2018 11:12:24 -0500
Subject: [PATCH 079/122] CHANGE: just define a list generating function

---
 app/middleware/display/beautify.py |  2 +-
 app/middleware/modellers.py        | 24 ++++++++++-----------
 app/middleware/models.py           | 34 ++++++++++++++++--------------
 app/tests/test_modules.py          | 10 ++++-----
 4 files changed, 36 insertions(+), 34 deletions(-)

diff --git a/app/middleware/display/beautify.py b/app/middleware/display/beautify.py
index e9c57798..7933b086 100644
--- a/app/middleware/display/beautify.py
+++ b/app/middleware/display/beautify.py
@@ -143,7 +143,7 @@ def display_subtyping(pickled_result, args_dict=None):
         assert isinstance(list_return, list)
         model = model_vf(list_return)
         return model_to_json(model)
-    elif isinstance(result, SubtypingResult):
+    elif isinstance(result, list):
         return model_to_json(result)
     else:
         raise Exception("beautify() could not handle pickled file: {0}.".format(pickled_result))
diff --git a/app/middleware/modellers.py b/app/middleware/modellers.py
index 04cfb3ed..9adcef69 100644
--- a/app/middleware/modellers.py
+++ b/app/middleware/modellers.py
@@ -15,7 +15,7 @@ def model_serotype(pi, pl, output_file):
 
     # Loop.
     subtyping_list = [
-        SubtypingRow(
+        {
             analysis='Serotype',
             contigid='n/a',
             filename=actual_filename(row['genome']),
@@ -24,14 +24,14 @@ def model_serotype(pi, pl, output_file):
             hitorientation='n/a',
             hitstart='n/a',
             hitstop='n/a'
-        )
+        }
     for index, row in df.iterrows()]
 
     # Convert the list of rows into a SubtypingResult model.
-    subtyping_result = SubtypingResult(
-        rows = subtyping_list
-    )
-    return subtyping_result
+    # subtyping_result = SubtypingResult(
+    #     rows = subtyping_list
+    # )
+    return subtyping_list
 
 def model_vf(lst):
     """
@@ -41,7 +41,7 @@ def model_vf(lst):
     assert isinstance(lst, list)
     print("model_vf() called with type {0} containing {1}".format(type(lst), str(lst)))
     subtyping_list = [
-        SubtypingRow(
+        {
             analysis=item('analysis'),
             contigid=item['contigid'],
             filename=item['filename'],
@@ -50,10 +50,10 @@ def model_vf(lst):
             hitorientation=item['hitorientation'],
             hitstart=item['hitstart'],
             hitstop=item['hitstop']
-        )
+        }
     for item in lst]
     # Convert the list of rows into a SubtypingResult model.
-    subtyping_result = SubtypingResult(
-        rows = subtyping_list
-    )
-    return subtyping_result
+    # subtyping_result = SubtypingResult(
+    #     rows = subtyping_list
+    # )
+    return subtyping_list
diff --git a/app/middleware/models.py b/app/middleware/models.py
index ce1efa45..08ce3dc1 100644
--- a/app/middleware/models.py
+++ b/app/middleware/models.py
@@ -9,29 +9,31 @@
 from jsonmodels import models, fields
 from middleware.graphers.turtle_utils import actual_filename
 
-def _convert_model(model):
-    # Convert the model to a generic JSON structure.
-    struct = model.to_struct()
-    # Check that struct isn't empty.
-    assert struct
-    if 'rows' in struct:
-        # This is not strictly json; more like a list than a dict structure.
-        rows_list = struct['rows']
-        return rows_list
-    else:
-        return struct
+# def _convert_model(model):
+#     # Convert the model to a generic JSON structure.
+#     struct = model.to_struct()
+#     # Check that struct isn't empty.
+#     assert struct
+#     if 'rows' in struct:
+#         # This is not strictly json; more like a list than a dict structure.
+#         rows_list = struct['rows']
+#         return rows_list
+#     else:
+#         return struct
 
 def model_to_json(model):
     """
     Converts models to json for the front-end.
     """
     # Validate the model submitted before processing.
-    model.validate()
+    assert isinstance(model, list)
+    # model.validate()
     # Conversion.
-    if isinstance(model, models.Base):
-        return _convert_model(model)
-    else:
-        raise Exception('model_to_json() called for a model without a handler.')
+    return model
+    # if isinstance(model, models.Base):
+    #     return _convert_model(model)
+    # else:
+    #     raise Exception('model_to_json() called for a model without a handler.')
 
 def store(pipeline):
     """
diff --git a/app/tests/test_modules.py b/app/tests/test_modules.py
index cf5d2cf4..db280e68 100644
--- a/app/tests/test_modules.py
+++ b/app/tests/test_modules.py
@@ -78,15 +78,15 @@ def test_ectyper_vf():
 
 def _validate_model(model):
     # Validate (throws error if invalidate).
-    model.validate()
+    # model.validate()
     # Check that the return rows is not some random empty list.
-    assert model.rows
+    # assert model.rows
     # Check the conversion for the front-end.
-    r = model_to_json(model)
+    # r = model_to_json(model)
     # This is not really json; more like a list than a dict structure.
-    assert isinstance(r, list)
+    assert isinstance(model, list)
     # Check that this isn't empty.
-    assert r
+    assert model
 
 def test_ectyper_serotype_direct():
     """Check the ECTyper from `master` which only performs serotyping.

From f1e2d3068c4770648e36c145b365d5b248ac9b2e Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Wed, 7 Mar 2018 11:37:55 -0500
Subject: [PATCH 080/122] CHANGE: just define a list generating function

---
 app/middleware/modellers.py | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/app/middleware/modellers.py b/app/middleware/modellers.py
index 9adcef69..59b1bed0 100644
--- a/app/middleware/modellers.py
+++ b/app/middleware/modellers.py
@@ -16,14 +16,14 @@ def model_serotype(pi, pl, output_file):
     # Loop.
     subtyping_list = [
         {
-            analysis='Serotype',
-            contigid='n/a',
-            filename=actual_filename(row['genome']),
-            hitcutoff=str(pi),
-            hitname="{0}:{1}".format(row['O_prediction'],row['H_prediction']),
-            hitorientation='n/a',
-            hitstart='n/a',
-            hitstop='n/a'
+            'analysis':'Serotype',
+            'contigid':'n/a',
+            'filename':actual_filename(row['genome']),
+            'hitcutoff':str(pi),
+            'hitname':"{0}:{1}".format(row['O_prediction'],row['H_prediction']),
+            'hitorientation':'n/a',
+            'hitstart':'n/a',
+            'hitstop':'n/a'
         }
     for index, row in df.iterrows()]
 
@@ -42,14 +42,14 @@ def model_vf(lst):
     print("model_vf() called with type {0} containing {1}".format(type(lst), str(lst)))
     subtyping_list = [
         {
-            analysis=item('analysis'),
-            contigid=item['contigid'],
-            filename=item['filename'],
-            hitcutoff=item['hitcutoff'],
-            hitname=item['hitname'],
-            hitorientation=item['hitorientation'],
-            hitstart=item['hitstart'],
-            hitstop=item['hitstop']
+            'analysis':item('analysis'),
+            'contigid':item['contigid'],
+            'filename':item['filename'],
+            'hitcutoff':item['hitcutoff'],
+            'hitname':item['hitname'],
+            'hitorientation':item['hitorientation'],
+            'hitstart':item['hitstart'],
+            'hitstop':item['hitstop']
         }
     for item in lst]
     # Convert the list of rows into a SubtypingResult model.

From 93693616f169e40cba476d06c8fdd3b3ba832d06 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Wed, 7 Mar 2018 12:27:33 -0500
Subject: [PATCH 081/122] FIX: have tests use new funcs

---
 app/middleware/models.py |   2 +-
 app/tests/test_models.py | 199 ++++++++++++++++++---------------------
 2 files changed, 93 insertions(+), 108 deletions(-)

diff --git a/app/middleware/models.py b/app/middleware/models.py
index 08ce3dc1..a3924531 100644
--- a/app/middleware/models.py
+++ b/app/middleware/models.py
@@ -220,7 +220,7 @@ def to_json(self):
             model = rq_job.result
             try:
                 # TODO: This is not correct as while the new ECTYper call does return a model, the display_subtyping() call that the return job is associated with will already convert the result to a list and return it.
-                assert isinstance(model, models.Base)
+                assert isinstance(model, (models.Base,list))
             except:
                 raise Exception("to_json() called for job {0}  with result of type {1} and info {2}".format(j.name, type(model), str(model)))
             list_json = model_to_json(model)
diff --git a/app/tests/test_models.py b/app/tests/test_models.py
index 6605f118..9bd811c7 100644
--- a/app/tests/test_models.py
+++ b/app/tests/test_models.py
@@ -1,5 +1,5 @@
 import dill
-from middleware import models
+from middleware import models, modellers
 from modules.spfy import spfy
 from scripts.savvy import savvy
 from tests import constants
@@ -19,46 +19,31 @@ def test_subtyping_model_direct(l=constants.BEAUTIFY_VF_SEROTYPE):
     """
     Use our dataset to directly create a subtyping results model and validate it.
     """
-    subtyping_list = [
-        models.SubtypingRow(
-            analysis=d['analysis'],
-            contigid=d['contigid'],
-            filename=d['filename'],
-            hitcutoff=str(d['hitcutoff']),
-            hitname=d['hitname'],
-            hitorientation=d['hitorientation'],
-            hitstart=str(d['hitstart']),
-            hitstop=str(d['hitstop'])
-        )
-    for d in l]
-    subtyping_result = models.SubtypingResult(
-        rows = subtyping_list
-    )
-    subtyping_result.validate()
-    # Return for incorporation into later tests.
-    return subtyping_result
-
-def test_phylotyper_model_direct(l=constants.BEAUTIFY_STX1):
-    """
-    Use our dataset to directly create a phylotyper results model and validate it.
-    """
-    phylotyper_list = [
-        models.PhylotyperRow(
-            contig=d['contig'],
-            genome=d['genome'],
-            probability=str(d['probability']),
-            start=str(d['start']),
-            stop=str(d['stop']),
-            subtype=d['subtype'],
-            subtype_gene=d['subtype_gene']
-        )
-    for d in l]
-    phylotyper_result = models.PhylotyperResult(
-        rows = phylotyper_list
-    )
-    phylotyper_result.validate()
+    subtyping_list = modellers.model_vf(l)
     # Return for incorporation into later tests.
-    return phylotyper_result
+    return subtyping_list
+
+# def test_phylotyper_model_direct(l=constants.BEAUTIFY_STX1):
+#     """
+#     Use our dataset to directly create a phylotyper results model and validate it.
+#     """
+#     phylotyper_list = [
+#         models.PhylotyperRow(
+#             contig=d['contig'],
+#             genome=d['genome'],
+#             probability=str(d['probability']),
+#             start=str(d['start']),
+#             stop=str(d['stop']),
+#             subtype=d['subtype'],
+#             subtype_gene=d['subtype_gene']
+#         )
+#     for d in l]
+#     phylotyper_result = models.PhylotyperResult(
+#         rows = phylotyper_list
+#     )
+#     phylotyper_result.validate()
+#     # Return for incorporation into later tests.
+#     return phylotyper_result
 
 def _create_example_pipeline():
     p = models.Pipeline(
@@ -145,73 +130,73 @@ def test_pipeline_model_dill():
     # Run the same tests on the loaded pipeline.
     test_pipeline_model_subtyping(p=loaded_pipeline)
 
-def test_pipeline_model_phyotyping():
-    """
-    Test the Pipeline model itself for subtyping via Phylotyper.
-    """
-    p = models.Pipeline(
-        func = spfy,
-        options = constants.ARGS_DICT
-    )
-    mock_stx1 = MockRQJob(
-        result = test_phylotyper_model_direct(constants.BEAUTIFY_STX1)
-    )
-    mock_stx2 = MockRQJob(
-        result = test_phylotyper_model_direct(constants.BEAUTIFY_STX2)
-    )
-    p.jobs.update({
-        'job_phylotyper_beautify_stx1': models.Job(
-            rq_job=mock_stx1,
-            name='job_phylotyper_beautify_stx1',
-            transitory=False,
-            backlog=False,
-            display=True
-        )
-    })
-    p.jobs.update({
-        'job_phylotyper_beautify_stx2': models.Job(
-            rq_job=mock_stx2,
-            name='job_phylotyper_beautify_stx2',
-            transitory=False,
-            backlog=False,
-            display=True
-        )
-    })
-    assert isinstance(p, models.Pipeline)
-    assert isinstance(p.jobs, dict)
-    for k in p.jobs:
-        assert isinstance(p.jobs[k], models.Job)
-
-    # Test Pipeline.cache_jobs()
-    p.cache_jobs()
-    # Test Pipeline.merge_jobs()
-    p.merge_jobs()
-    # Test Pipeline.complete(), should be True.
-    assert p.complete()
-
-    # Test Pipeline.to_json().
-    json = p.to_json()
-    assert isinstance(json, list)
-
-    # Add an AMR job and re-test.
-    mock_eae  = MockRQJob(
-        result = test_phylotyper_model_direct(constants.BEAUTIFY_EAE)
-    )
-    p.jobs.update({
-        'job_phylotyper_beautify_eae': models.Job(
-            rq_job=mock_eae,
-            name='job_phylotyper_beautify_stx2',
-            transitory=False,
-            backlog=False,
-            display=True
-        )
-    })
-    p.merge_jobs()
-    # Test Pipeline.complete(), should be True.
-    assert p.complete()
-    # Test Pipeline.to_json().
-    json = p.to_json()
-    assert isinstance(json, list)
+# def test_pipeline_model_phyotyping():
+#     """
+#     Test the Pipeline model itself for subtyping via Phylotyper.
+#     """
+#     p = models.Pipeline(
+#         func = spfy,
+#         options = constants.ARGS_DICT
+#     )
+#     mock_stx1 = MockRQJob(
+#         result = test_phylotyper_model_direct(constants.BEAUTIFY_STX1)
+#     )
+#     mock_stx2 = MockRQJob(
+#         result = test_phylotyper_model_direct(constants.BEAUTIFY_STX2)
+#     )
+#     p.jobs.update({
+#         'job_phylotyper_beautify_stx1': models.Job(
+#             rq_job=mock_stx1,
+#             name='job_phylotyper_beautify_stx1',
+#             transitory=False,
+#             backlog=False,
+#             display=True
+#         )
+#     })
+#     p.jobs.update({
+#         'job_phylotyper_beautify_stx2': models.Job(
+#             rq_job=mock_stx2,
+#             name='job_phylotyper_beautify_stx2',
+#             transitory=False,
+#             backlog=False,
+#             display=True
+#         )
+#     })
+#     assert isinstance(p, models.Pipeline)
+#     assert isinstance(p.jobs, dict)
+#     for k in p.jobs:
+#         assert isinstance(p.jobs[k], models.Job)
+#
+#     # Test Pipeline.cache_jobs()
+#     p.cache_jobs()
+#     # Test Pipeline.merge_jobs()
+#     p.merge_jobs()
+#     # Test Pipeline.complete(), should be True.
+#     assert p.complete()
+#
+#     # Test Pipeline.to_json().
+#     json = p.to_json()
+#     assert isinstance(json, list)
+#
+#     # Add an AMR job and re-test.
+#     mock_eae  = MockRQJob(
+#         result = test_phylotyper_model_direct(constants.BEAUTIFY_EAE)
+#     )
+#     p.jobs.update({
+#         'job_phylotyper_beautify_eae': models.Job(
+#             rq_job=mock_eae,
+#             name='job_phylotyper_beautify_stx2',
+#             transitory=False,
+#             backlog=False,
+#             display=True
+#         )
+#     })
+#     p.merge_jobs()
+#     # Test Pipeline.complete(), should be True.
+#     assert p.complete()
+#     # Test Pipeline.to_json().
+#     json = p.to_json()
+#     assert isinstance(json, list)
 
 def test_pipeline_model_signature():
     """

From 0f0c71d9b8104a42fc7e1a34e05a618335c45c13 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Wed, 7 Mar 2018 13:29:52 -0500
Subject: [PATCH 082/122] DEBUG: checks in model generation

---
 app/middleware/modellers.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/app/middleware/modellers.py b/app/middleware/modellers.py
index 59b1bed0..77cbbca2 100644
--- a/app/middleware/modellers.py
+++ b/app/middleware/modellers.py
@@ -39,10 +39,11 @@ def model_vf(lst):
     """
     # Type check.
     assert isinstance(lst, list)
+    assert isinstance(lst[0], dict)
     print("model_vf() called with type {0} containing {1}".format(type(lst), str(lst)))
     subtyping_list = [
         {
-            'analysis':item('analysis'),
+            'analysis':item['analysis'],
             'contigid':item['contigid'],
             'filename':item['filename'],
             'hitcutoff':item['hitcutoff'],

From ccda9f66e44f3d68207d951c2d76d23043155424 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Wed, 7 Mar 2018 14:04:10 -0500
Subject: [PATCH 083/122] CHANGE: create test pipelines directly from module
 calls

---
 app/tests/test_models.py  | 7 +++++--
 app/tests/test_modules.py | 8 ++++++--
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/app/tests/test_models.py b/app/tests/test_models.py
index 9bd811c7..f2e403e1 100644
--- a/app/tests/test_models.py
+++ b/app/tests/test_models.py
@@ -3,6 +3,7 @@
 from modules.spfy import spfy
 from scripts.savvy import savvy
 from tests import constants
+from tests.test_modules import test_ectyper_vf, test_ectyper_serotype_call_pickle
 
 class MockRQJob():
     """
@@ -50,11 +51,13 @@ def _create_example_pipeline():
         func=spfy,
         options=constants.ARGS_DICT
     )
+    r_serotype = test_ectyper_serotype_call_pickle(return_one=True)
     mock_serotype = MockRQJob(
-        result=test_subtyping_model_direct(constants.BEAUTIFY_SEROTYPE)
+        result=test_subtyping_model_direct(r_serotype)
     )
+    r_vf = test_ectyper_vf(return_one=True)
     mock_vf = MockRQJob(
-        result=test_subtyping_model_direct(constants.BEAUTIFY_VF)
+        result=test_subtyping_model_direct(r_vf)
     )
     # Mimicks a Serotype result that will be converted to json.
     p.jobs.update({
diff --git a/app/tests/test_modules.py b/app/tests/test_modules.py
index db280e68..63248bd9 100644
--- a/app/tests/test_modules.py
+++ b/app/tests/test_modules.py
@@ -60,7 +60,7 @@ def test_qc():
     for non_ecoli_genome in GENOMES_LIST_NOT_ECOLI:
         assert qc(non_ecoli_genome) == False
 
-def test_ectyper_vf():
+def test_ectyper_vf(return_one=False):
     """Check the ECTyper from `superphy` which is used for virulance factor
     identification. Installed as a submodule in the `modules` directory.
     """
@@ -75,6 +75,8 @@ def test_ectyper_vf():
         # beautify ECTyper check
         json_return = beautify(pickled_ectyper_dict, single_dict)
         assert type(json_return) == list
+        if return_one:
+            return json_return
 
 def _validate_model(model):
     # Validate (throws error if invalidate).
@@ -108,7 +110,7 @@ def test_ectyper_serotype_call_nopickle():
         serotype_model = call_ectyper_serotype(single_dict, pickle=False)
         _validate_model(serotype_model)
 
-def test_ectyper_serotype_call_pickle():
+def test_ectyper_serotype_call_pickle(return_one=False):
     """
     Check the actual call from Spfy's code.
     """
@@ -119,6 +121,8 @@ def test_ectyper_serotype_call_pickle():
         pickled_serotype_model = call_ectyper_serotype(single_dict)
         ectyper_serotype_model = unpickle(pickled_serotype_model)
         _validate_model(ectyper_serotype_model)
+        if return_one:
+            return ectyper_serotype_model
 
 def test_amr():
         ecoli_genome = GENOMES_LIST_ECOLI[0]

From 2dc2cb63cd0ee4560b49a3323148a842ab2434e1 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Thu, 8 Mar 2018 12:31:15 -0500
Subject: [PATCH 084/122] CHANGE: wrap the to_json() return with jsonify

---
 app/middleware/models.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/app/middleware/models.py b/app/middleware/models.py
index a3924531..159b0375 100644
--- a/app/middleware/models.py
+++ b/app/middleware/models.py
@@ -7,6 +7,7 @@
 from dis import dis
 from StringIO import StringIO
 from jsonmodels import models, fields
+from flask import jsonify
 from middleware.graphers.turtle_utils import actual_filename
 
 # def _convert_model(model):
@@ -225,7 +226,7 @@ def to_json(self):
                 raise Exception("to_json() called for job {0}  with result of type {1} and info {2}".format(j.name, type(model), str(model)))
             list_json = model_to_json(model)
             l += list_json
-        return l
+        return jsonify(l)
 
     def _function_signature(self):
         """

From d6b3f518d640652d8bd4bb3c2cd8cfbb61fe0f92 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Thu, 8 Mar 2018 12:48:16 -0500
Subject: [PATCH 085/122] CHANGE: work from lists for graphing

---
 app/middleware/graphers/datastruct_savvy.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/app/middleware/graphers/datastruct_savvy.py b/app/middleware/graphers/datastruct_savvy.py
index 43b729ee..be5209f4 100644
--- a/app/middleware/graphers/datastruct_savvy.py
+++ b/app/middleware/graphers/datastruct_savvy.py
@@ -10,8 +10,8 @@
 
 def _graph_subtyping(graph, model, uriIsolate):
     # Convert the model to a graph.
-    struct = model.to_struct()
-    rows_list = struct['rows']
+    # struct = model.to_struct()
+    rows_list = model
     for row in rows_list:
         graph.add((
             uriIsolate,
@@ -29,7 +29,7 @@ def model_to_graph(graph, model, uriIsolate):
     # Validate the model submitted before processing.
     model.validate()
     # Conversion.
-    if isinstance(model, SubtypingResult):
+    if isinstance(model, list):
         return _graph_subtyping(graph, model, uriIsolate)
     else:
         raise Exception('model_to_graph() called for a model without a handler.')
@@ -209,7 +209,7 @@ def generate_datastruct(query_file, id_file, pickled_dictionary):
                 graph = parse_gene_dict(graph, results['Antimicrobial Resistance'], uriGenome,
                                         'AntimicrobialResistanceGene')
         return graph
-    elif isinstance(results, SubtypingResult):
+    elif isinstance(results, list):
         graph = model_to_graph(graph, results, uriIsolate)
         return graph
     else:

From a0baa16073b5ec81056be48b5c46a83ff34cdd2c Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Thu, 8 Mar 2018 13:01:43 -0500
Subject: [PATCH 086/122] FIX?: comment out validate and report what
 model_to_json sees

---
 app/middleware/display/beautify.py          | 2 ++
 app/middleware/graphers/datastruct_savvy.py | 2 +-
 app/middleware/models.py                    | 1 +
 3 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/app/middleware/display/beautify.py b/app/middleware/display/beautify.py
index 7933b086..e9a7c403 100644
--- a/app/middleware/display/beautify.py
+++ b/app/middleware/display/beautify.py
@@ -139,11 +139,13 @@ def beautify(gene_dict, args_dict=None):
 def display_subtyping(pickled_result, args_dict=None):
     result = unpickle(pickled_result)
     if isinstance(result, dict):
+        # VF
         list_return = beautify(gene_dict=result, args_dict=args_dict)
         assert isinstance(list_return, list)
         model = model_vf(list_return)
         return model_to_json(model)
     elif isinstance(result, list):
+        # Serotyping
         return model_to_json(result)
     else:
         raise Exception("beautify() could not handle pickled file: {0}.".format(pickled_result))
diff --git a/app/middleware/graphers/datastruct_savvy.py b/app/middleware/graphers/datastruct_savvy.py
index be5209f4..4e41cf1c 100644
--- a/app/middleware/graphers/datastruct_savvy.py
+++ b/app/middleware/graphers/datastruct_savvy.py
@@ -27,7 +27,7 @@ def _graph_subtyping(graph, model, uriIsolate):
 
 def model_to_graph(graph, model, uriIsolate):
     # Validate the model submitted before processing.
-    model.validate()
+    # model.validate()
     # Conversion.
     if isinstance(model, list):
         return _graph_subtyping(graph, model, uriIsolate)
diff --git a/app/middleware/models.py b/app/middleware/models.py
index 159b0375..de4ba915 100644
--- a/app/middleware/models.py
+++ b/app/middleware/models.py
@@ -30,6 +30,7 @@ def model_to_json(model):
     assert isinstance(model, list)
     # model.validate()
     # Conversion.
+    print("model_to_json() called with model: {0}".format(str(model)))
     return model
     # if isinstance(model, models.Base):
     #     return _convert_model(model)

From 8320b8163009d283fe6313f6a4c1e31b3ec3a699 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Thu, 8 Mar 2018 13:38:03 -0500
Subject: [PATCH 087/122] DEBUG: should work for serotyping...

---
 app/middleware/modellers.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/app/middleware/modellers.py b/app/middleware/modellers.py
index 77cbbca2..c0000321 100644
--- a/app/middleware/modellers.py
+++ b/app/middleware/modellers.py
@@ -31,6 +31,8 @@ def model_serotype(pi, pl, output_file):
     # subtyping_result = SubtypingResult(
     #     rows = subtyping_list
     # )
+    assert subtyping_list
+    assert subtyping_list[0]
     return subtyping_list
 
 def model_vf(lst):

From 5b821d9d9cda3edbc69639d93e292babb85ed185 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Thu, 8 Mar 2018 13:51:58 -0500
Subject: [PATCH 088/122] FIX: wasnt reading the return from new ectyper call
 correctly

---
 app/middleware/graphers/datastruct_savvy.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/app/middleware/graphers/datastruct_savvy.py b/app/middleware/graphers/datastruct_savvy.py
index 4e41cf1c..964a91fe 100644
--- a/app/middleware/graphers/datastruct_savvy.py
+++ b/app/middleware/graphers/datastruct_savvy.py
@@ -13,15 +13,16 @@ def _graph_subtyping(graph, model, uriIsolate):
     # struct = model.to_struct()
     rows_list = model
     for row in rows_list:
+        o_type, h_type = row['hitname'].split(':')
         graph.add((
             uriIsolate,
             gu('ge:0001076'),
-            Literal(row['O_prediction'])
+            Literal(o_type)
         ))
         graph.add((
             uriIsolate,
             gu('ge:0001077'),
-            Literal(row['H_prediction'])
+            Literal(h_type)
         ))
     return graph
 

From 474be0d70796e29af545c92133d740e86f99eac0 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Thu, 8 Mar 2018 14:39:49 -0500
Subject: [PATCH 089/122] FIX: when collecting finished jobs for display, also
 check if its a backlog job

---
 app/middleware/models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/middleware/models.py b/app/middleware/models.py
index de4ba915..58b62af4 100644
--- a/app/middleware/models.py
+++ b/app/middleware/models.py
@@ -212,7 +212,7 @@ def to_json(self):
         # Gather all the jobs that have finished and haven't failed.
         completed_jobs = [
             j for j in self.final_jobs
-            if j.display and j.rq_job.is_finished and not j.rq_job.is_failed
+            if j.display and not j.backlog and j.rq_job.is_finished and not j.rq_job.is_failed
         ]
         print("to_json() completed_jobs: {0}".format(str(completed_jobs)))
         # Merge the json lists together.

From d51204937f00026d7a54973a8cb35e5832b146c2 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Thu, 8 Mar 2018 14:41:41 -0500
Subject: [PATCH 090/122] FIX: pipeline should not have display copies of
 results in the first place

---
 app/modules/spfy.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/app/modules/spfy.py b/app/modules/spfy.py
index a1a9b6fd..3ebf22c1 100644
--- a/app/modules/spfy.py
+++ b/app/modules/spfy.py
@@ -113,7 +113,7 @@ def _ectyper_pipeline_vf(query_file, single_dict, pipeline=None, backlog=False):
         )
     })
 
-    if not single_dict['options']['bulk']:
+    if not single_dict['options']['bulk'] or not backlog:
         # Only bother parsing into json if user has requested either vf or
         # serotype, and we're not in bulk uploading.
         job_ectyper_beautify_vf = multiples.enqueue(
@@ -192,7 +192,7 @@ def _ectyper_pipeline_serotype(query_file, single_dict, pipeline=None, backlog=F
         )
     })
 
-    if not single_dict['options']['bulk']:
+    if not single_dict['options']['bulk'] or not backlog:
         # Only bother parsing into json if user has requested either vf or
         # serotype, and we're not in bulk uploading.
         job_ectyper_beautify_serotype = multiples.enqueue(

From 41a152ba8af2e3eb4232a58e1c7daec388d69665 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Fri, 9 Mar 2018 17:04:13 -0500
Subject: [PATCH 091/122] CHANGE: amr pipeline into new system

---
 app/modules/spfy.py | 112 ++++++++++++++++++++++++++++++++------------
 1 file changed, 83 insertions(+), 29 deletions(-)

diff --git a/app/modules/spfy.py b/app/modules/spfy.py
index 3ebf22c1..08940f10 100644
--- a/app/modules/spfy.py
+++ b/app/modules/spfy.py
@@ -213,6 +213,88 @@ def _ectyper_pipeline_serotype(query_file, single_dict, pipeline=None, backlog=F
         })
     return d
 
+# AMR PIPELINE
+def _amr_pipeline(pipeline=None, backlog=False, bulk=False):
+    # Alias.
+    job_id = pipeline.jobs['job_id'].rq_job
+    if not backlog:
+        multiples = multiples_q
+    else:
+        multiples = backlog_multiples_q
+
+    job_amr = multiples.enqueue(amr, query_file, depends_on=job_id)
+    pipeline.jobs.update({
+        'job_amr': Job(
+            rq_job=job_amr,
+            name='job_amr',
+            transitory=True,
+            backlog=backlog,
+            display=False
+        )
+    })
+
+    job_amr_dict = multiples.enqueue(
+        amr_to_dict, query_file + '_rgi.tsv', depends_on=job_amr)
+    pipeline.jobs.update({
+        'job_amr_dict': Job(
+            rq_job=job_amr_dict,
+            name='job_amr_dict',
+            transitory=True,
+            backlog=backlog,
+            display=False
+        )
+    })
+
+    # Create a graph, and upload to Blazegraph.
+    if backlog:
+        job_amr_datastruct = multiples.enqueue(
+            datastruct_savvy, query_file, query_file + '_id.txt', query_file + '_rgi.tsv_rgi.p', depends_on=job_amr_dict, result_ttl=-1)
+        pipeline.jobs.update({
+            'job_amr_datastruct': Job(
+                rq_job=job_amr_datastruct,
+                name='job_amr_datastruct',
+                transitory=False,
+                backlog=backlog,
+                display=False
+            )
+        })
+    else:
+        job_amr_datastruct = multiples.enqueue(
+            datastruct_savvy, query_file, query_file + '_id.txt', query_file + '_rgi.tsv_rgi.p', depends_on=job_amr_dict)
+        pipeline.jobs.update({
+            'job_amr_datastruct': Job(
+                rq_job=job_amr_datastruct,
+                name='job_amr_datastruct',
+                transitory=True,
+                backlog=backlog,
+                display=False
+            )
+        })
+    d = {'job_amr': job_amr, 'job_amr_dict': job_amr_dict,
+         'job_amr_datastruct': job_amr_datastruct}
+    # we still check for the user-selected amr option again because
+    # if it was not selected but BACKLOG_ENABLED=True, we dont have to
+    # enqueue it to backlog_multiples_q since beautify doesnt upload
+    # blazegraph
+    if not bulk:
+        job_amr_beautify = multiples.enqueue(
+            beautify,
+            query_file + '_rgi.tsv_rgi.p',
+            single_dict,
+            depends_on=job_amr_dict,
+            result_ttl=-1)
+        pipeline.jobs.update({
+            'job_amr_beautify': Job(
+                rq_job=job_amr_beautify,
+                name='job_amr_beautify',
+                transitory=False,
+                backlog=backlog,
+                display=True
+            )
+        })
+        d.update({'job_amr_beautify': job_amr_beautify})
+    return d
+
 def blob_savvy_enqueue(single_dict, pipeline):
     '''
     Handles enqueueing of single file to multiple queues.
@@ -315,36 +397,8 @@ def blob_savvy_enqueue(single_dict, pipeline):
         )
     # END ECTYPER PIPELINE
 
-    # AMR PIPELINE
-    def amr_pipeline(multiples):
-        job_amr = multiples.enqueue(amr, query_file, depends_on=job_id)
-        job_amr_dict = multiples.enqueue(
-            amr_to_dict, query_file + '_rgi.tsv', depends_on=job_amr)
-        # this uploads result to blazegraph
-        if single_dict['options']['bulk']:
-            job_amr_datastruct = multiples.enqueue(
-                datastruct_savvy, query_file, query_file + '_id.txt', query_file + '_rgi.tsv_rgi.p', depends_on=job_amr_dict, result_ttl=-1)
-        else:
-            job_amr_datastruct = multiples.enqueue(
-                datastruct_savvy, query_file, query_file + '_id.txt', query_file + '_rgi.tsv_rgi.p', depends_on=job_amr_dict)
-        d = {'job_amr': job_amr, 'job_amr_dict': job_amr_dict,
-             'job_amr_datastruct': job_amr_datastruct}
-        # we still check for the user-selected amr option again because
-        # if it was not selected but BACKLOG_ENABLED=True, we dont have to
-        # enqueue it to backlog_multiples_q since beautify doesnt upload
-        # blazegraph
-        if single_dict['options']['amr'] and not single_dict['options']['bulk']:
-            job_amr_beautify = multiples.enqueue(
-                beautify,
-                query_file + '_rgi.tsv_rgi.p',
-                single_dict,
-                depends_on=job_amr_dict,
-                result_ttl=-1)
-            d.update({'job_amr_beautify': job_amr_beautify})
-        return d
-
     if single_dict['options']['amr']:
-        amr_jobs = amr_pipeline(multiples_q)
+        amr_jobs = _amr_pipeline(bulk=single_dict['options']['bulk'])
         job_amr = amr_jobs['job_amr']
         job_amr_dict = amr_jobs['job_amr_dict']
         job_amr_datastruct = amr_jobs['job_amr_datastruct']

From c3709ea49883c0d438363acfd37dbeee6f74cbce Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Fri, 9 Mar 2018 17:23:56 -0500
Subject: [PATCH 092/122] CHANGE: phylotyper into new system

---
 app/modules/spfy.py | 139 ++++++++++++++++++++++++++++++--------------
 1 file changed, 96 insertions(+), 43 deletions(-)

diff --git a/app/modules/spfy.py b/app/modules/spfy.py
index 08940f10..9b123842 100644
--- a/app/modules/spfy.py
+++ b/app/modules/spfy.py
@@ -276,7 +276,7 @@ def _amr_pipeline(pipeline=None, backlog=False, bulk=False):
     # if it was not selected but BACKLOG_ENABLED=True, we dont have to
     # enqueue it to backlog_multiples_q since beautify doesnt upload
     # blazegraph
-    if not bulk:
+    if not backlog and not bulk:
         job_amr_beautify = multiples.enqueue(
             beautify,
             query_file + '_rgi.tsv_rgi.p',
@@ -295,6 +295,84 @@ def _amr_pipeline(pipeline=None, backlog=False, bulk=False):
         d.update({'job_amr_beautify': job_amr_beautify})
     return d
 
+def _phylotyper_pipeline(subtype, pipeline=None, backlog=False):
+    # Alias.
+    job_id = pipeline.jobs['job_id'].rq_job
+    if not backlog:
+        multiples = multiples_q
+    else:
+        multiples = backlog_multiples_q
+
+    jobname = '_pt' +subtype
+    tsvfile = query_file + jobname + '.tsv'
+    picklefile = query_file + jobname + '.p'
+
+    job_pt = multiples.enqueue(
+        phylotyper.phylotyper,
+        None,
+        subtype,
+        tsvfile,
+        id_file=query_file + '_id.txt',
+        depends_on=pipeline.jobs['job_ectyper_datastruct_vf'].rq_job)
+    pipeline.jobs.update({
+        'job'+jobname: Job(
+            rq_job=job_pt,
+            name='job'+jobname,
+            transitory=True,
+            backlog=backlog,
+            display=False
+        )
+    })
+
+    job_pt_dict = multiples.enqueue(
+        phylotyper.to_dict, tsvfile, subtype, picklefile,
+        depends_on=job_pt)
+    pipeline.jobs.update({
+        'job'+jobname+'_dict': Job(
+            rq_job=job_pt_dict,
+            name='job'+jobname+'_dict',
+            transitory=True,
+            backlog=backlog,
+            display=False
+        )
+    })
+
+    job_pt_datastruct = multiples.enqueue(
+        phylotyper.savvy, picklefile, subtype,
+        depends_on=job_pt_dict)
+    pipeline.jobs.update({
+         'job'+jobname+'_datastruct': Job(
+            rq_job=job_pt_datastruct,
+            name='job'+jobname+'_datastruct',
+            transitory=True,
+            backlog=backlog,
+            display=False
+        )
+    })
+
+    d = {'job'+jobname: job_pt, 'job'+jobname+'_dict': job_pt_dict,
+         'job'+jobname+'_datastruct': job_pt_datastruct}
+    # we still check for the user-selected amr option again because
+    # if it was not selected but BACKLOG_ENABLED=True, we dont have to
+    # enqueue it to backlog_multiples_q since beautify doesnt upload
+    # blazegraph
+    if not backlog:
+        job_pt_beautify = multiples.enqueue(
+            phylotyper.beautify, picklefile, actual_filename(query_file),
+            depends_on=job_pt_dict, result_ttl=-1)
+        pipeline.jobs.update({
+            'job'+jobname+'_beautify': Job(
+                rq_job=job_pt_beautify,
+                name='job'+jobname+'_beautify',
+                transitory=False,
+                backlog=backlog,
+                display=True
+            )
+        })
+        d.update({'job'+jobname+'_beautify': job_pt_beautify})
+
+    return d
+
 def blob_savvy_enqueue(single_dict, pipeline):
     '''
     Handles enqueueing of single file to multiple queues.
@@ -398,73 +476,48 @@ def blob_savvy_enqueue(single_dict, pipeline):
     # END ECTYPER PIPELINE
 
     if single_dict['options']['amr']:
-        amr_jobs = _amr_pipeline(bulk=single_dict['options']['bulk'])
+        amr_jobs = _amr_pipeline(, pipeline=pipeline, bulk=single_dict['options']['bulk'])
         job_amr = amr_jobs['job_amr']
         job_amr_dict = amr_jobs['job_amr_dict']
         job_amr_datastruct = amr_jobs['job_amr_datastruct']
         if not single_dict['options']['bulk']:
             job_amr_beautify = amr_jobs['job_amr_beautify']
     elif config.BACKLOG_ENABLED:
-        amr_pipeline(backlog_multiples_q)
+        _amr_pipeline(pipeline=pipeline, backlog=True)
     # END AMR PIPELINE
 
     # Phylotyper Pipeline
-    def phylotyper_pipeline(multiples, subtype):
-
-        jobname = '_pt' +subtype
-        tsvfile = query_file + jobname + '.tsv'
-        picklefile = query_file + jobname + '.p'
-
-        job_pt = multiples.enqueue(
-            phylotyper.phylotyper,
-            None,
-            subtype,
-            tsvfile,
-            id_file=query_file + '_id.txt',
-            depends_on=pipeline.jobs['job_ectyper_datastruct_vf'].rq_job)
-        job_pt_dict = multiples.enqueue(
-            phylotyper.to_dict, tsvfile, subtype, picklefile,
-            depends_on=job_pt)
-        job_pt_datastruct = multiples.enqueue(
-            phylotyper.savvy, picklefile, subtype,
-            depends_on=job_pt_dict)
-
-        d = {'job'+jobname: job_pt, 'job'+jobname+'_dict': job_pt_dict,
-             'job'+jobname+'_datastruct': job_pt_datastruct}
-        # we still check for the user-selected amr option again because
-        # if it was not selected but BACKLOG_ENABLED=True, we dont have to
-        # enqueue it to backlog_multiples_q since beautify doesnt upload
-        # blazegraph
-        if single_dict['options'][subtype]:
-            job_pt_beautify = multiples.enqueue(
-                phylotyper.beautify, picklefile, actual_filename(query_file),
-                depends_on=job_pt_dict, result_ttl=-1)
-            d.update({'job'+jobname+'_beautify': job_pt_beautify})
-
-        return d
-
     if single_dict['options']['stx1']:
-        pt_jobs = phylotyper_pipeline(multiples_q, 'stx1')
+        pt_jobs = _phylotyper_pipeline('stx1', pipeline=pipeline)
         job_stx1_beautify = pt_jobs['job_ptstx1_beautify']
     elif config.BACKLOG_ENABLED:
-        phylotyper_pipeline(backlog_multiples_q, 'stx1')
+        _phylotyper_pipeline('stx1', pipeline=pipeline, backlog=True)
 
     if single_dict['options']['stx2']:
-        pt_jobs = phylotyper_pipeline(multiples_q, 'stx2')
+        pt_jobs = _phylotyper_pipeline('stx2', pipeline=pipeline)
         job_stx2_beautify = pt_jobs['job_ptstx2_beautify']
     elif config.BACKLOG_ENABLED:
-        phylotyper_pipeline(backlog_multiples_q, 'stx2')
+        _phylotyper_pipeline('stx2', pipeline=pipeline, backlog=True)
 
     if single_dict['options']['eae']:
-        pt_jobs = phylotyper_pipeline(multiples_q, 'eae')
+        pt_jobs = _phylotyper_pipeline('eae', pipeline=pipeline)
         job_eae_beautify = pt_jobs['job_pteae_beautify']
     elif config.BACKLOG_ENABLED:
-        phylotyper_pipeline(backlog_multiples_q, 'eae')
+        _phylotyper_pipeline('eae', pipeline=pipeline, backlog=True)
     # END Phylotyper pipeline
 
     # the base file data for blazegraph
     job_turtle = multiples_q.enqueue(
         turtle_grapher, query_file, depends_on=job_qc)
+    pipeline.jobs.update({
+        'job_turtle': Job(
+            rq_job=job_turtle,
+            name='job_turtle',
+            transitory=True,
+            backlog=False,
+            display=False
+        )
+    })
 
     jobs[job_qc.get_id()] = {'file': single_dict['i'],
                              'analysis': 'Quality Control'}

From 4f73f1647aa74a16193412d8107ba25f9bdb9b7c Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Sat, 10 Mar 2018 17:28:37 -0500
Subject: [PATCH 093/122] CHANGE: merge phylotyper results as well

---
 app/middleware/modellers.py          | 26 ++++++++++++++++++++--
 app/modules/phylotyper/phylotyper.py | 33 ++++++++++++++--------------
 2 files changed, 41 insertions(+), 18 deletions(-)

diff --git a/app/middleware/modellers.py b/app/middleware/modellers.py
index c0000321..8981b4ce 100644
--- a/app/middleware/modellers.py
+++ b/app/middleware/modellers.py
@@ -23,7 +23,8 @@ def model_serotype(pi, pl, output_file):
             'hitname':"{0}:{1}".format(row['O_prediction'],row['H_prediction']),
             'hitorientation':'n/a',
             'hitstart':'n/a',
-            'hitstop':'n/a'
+            'hitstop':'n/a',
+            'probability':'n/a'
         }
     for index, row in df.iterrows()]
 
@@ -52,7 +53,8 @@ def model_vf(lst):
             'hitname':item['hitname'],
             'hitorientation':item['hitorientation'],
             'hitstart':item['hitstart'],
-            'hitstop':item['hitstop']
+            'hitstop':item['hitstop'],
+            'probability':'n/a'
         }
     for item in lst]
     # Convert the list of rows into a SubtypingResult model.
@@ -60,3 +62,23 @@ def model_vf(lst):
     #     rows = subtyping_list
     # )
     return subtyping_list
+
+def model_phylotyper(lst):
+    """
+    Casts phylotyper's return to the same format as VF/Serotyping.
+    """
+    phylotyper_list = [
+        {
+            'analysis:':d['subtype_gene'],
+            'contigid':d['contig'],
+            'filename':d['genome'],
+            'hitcutoff':'n/a',
+            'hitname':d['subtype'],
+            'hitorientation':'n/a',
+            'hitstart':d['start'],
+            'hitstop':d['stop'],
+            'probability':d['probability']
+        }
+    for d in lst]
+    
+    return phylotyper_list
diff --git a/app/modules/phylotyper/phylotyper.py b/app/modules/phylotyper/phylotyper.py
index 6c8af7fd..3c04cde8 100644
--- a/app/modules/phylotyper/phylotyper.py
+++ b/app/modules/phylotyper/phylotyper.py
@@ -22,8 +22,9 @@
 
 
 import config
-from middleware.graphers.turtle_utils import generate_uri as gu, fulluri_to_basename as u2b, normalize_rdfterm as normalize 
+from middleware.graphers.turtle_utils import generate_uri as gu, fulluri_to_basename as u2b, normalize_rdfterm as normalize
 from middleware.blazegraph.upload_graph import upload_graph
+from middleware.modellers import model_phylotyper
 from modules.phylotyper import ontology, exceptions
 from modules.phylotyper.sequences import MarkerSequences, phylotyper_query, genename_query
 
@@ -42,7 +43,7 @@ def phylotyper(uriIsolate, subtype, result_file, id_file=None):
 
     Returns:
         file to tab-delimited text results
-    
+
     """
 
     # uriIsolate retrieval
@@ -94,7 +95,7 @@ def phylotyper(uriIsolate, subtype, result_file, id_file=None):
 
     shutil.move(output_file, result_file)
     shutil.rmtree(temp_dir)
-          
+
     return result_file
 
 
@@ -105,7 +106,7 @@ def to_dict(pt_file, subtype, pickle_file):
 
     """
 
-     
+
     pt_results = pd.read_table(pt_file)
 
     if pt_results['phylotyper_assignment'].empty or pt_results['phylotyper_assignment'].values[0] == 'Subtype loci not found in genome':
@@ -114,7 +115,7 @@ def to_dict(pt_file, subtype, pickle_file):
         }
 
     else:
-    
+
         pt_results = pt_results[['subtype','probability','loci']]
 
         pt_results = pt_results.to_dict()
@@ -144,7 +145,7 @@ def to_dict(pt_file, subtype, pickle_file):
             pt_results['contig'][k] = contigs
             pt_results['start'][k] = starts
             pt_results['stop'][k] = stops
-           
+
     pickle.dump(pt_results, open(pickle_file, 'wb'))
 
     return pickle_file
@@ -179,7 +180,7 @@ def beautify(p_file, genome):
         # Expand into table rows - one per loci
         table_rows = []
         for k in pt_dict['loci']:
-            
+
             # Location info
             for i in range(len(pt_dict['loci'][k])):
                 instance_dict = {}
@@ -194,20 +195,20 @@ def beautify(p_file, genome):
                 allele_rdf = normalize(allele_uri)
                 gene_result = genename_query(allele_rdf)
                 instance_dict['subtype_gene'] = gene_result[0]['markerLabel']
-                
+
                 # Genome
                 instance_dict['genome'] = genome
 
                 # Subtype info
                 instance_dict['subtype'] = pt_dict['subtype'][k]
                 instance_dict['probability'] = pt_dict['probability'][k]
-            
-                table_rows.append(instance_dict)
 
-    return table_rows
-        
+                table_rows.append(instance_dict)
 
+    # Cast
+    unified_format = model_phylotyper(table_rows)
 
+    return unified_format
 
 def savvy(p_file, subtype):
     """ Load phylotyper results into DB
@@ -222,7 +223,7 @@ def savvy(p_file, subtype):
 
         # Phylotyper scheme
         phylotyper_uri = gu('subt:'+subtype)
-        
+
         # Get list of permissable subtype values
         subtypes_results = ontology.subtypeset_query(normalize(phylotyper_uri))
         subtypes = {}
@@ -289,7 +290,7 @@ def ignorant(genome_uri, subtype, pickle_file):
         'stop': {}
     }
     for row in results:
-        
+
         if row['pt'] in subtype_assignments:
             k = subtype_assignments[row['pt']]
         else:
@@ -345,9 +346,9 @@ def ignorant(genome_uri, subtype, pickle_file):
     g = u2b(gu(input_g))
     pt_file = os.path.join(config.DATASTORE, g+'_pt.tsv')
     pickle_file = os.path.join(config.DATASTORE, g+'_pt.p')
-    
+
     phylotyper(args.g, args.s, pt_file)
     to_dict(pt_file, args.s, pickle_file)
     print(beautify(pickle_file, args.g))
     #savvy(pickle_file, args.s)
-    #ignorant(input_g, args.s, pickle_file+'2')
\ No newline at end of file
+    #ignorant(input_g, args.s, pickle_file+'2')

From 2fddaf4032b6af08a2b3f66c69f30f226a8c6bfa Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Sat, 10 Mar 2018 17:41:20 -0500
Subject: [PATCH 094/122] FIX: typos

---
 app/modules/spfy.py | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/app/modules/spfy.py b/app/modules/spfy.py
index 9b123842..edad68a7 100644
--- a/app/modules/spfy.py
+++ b/app/modules/spfy.py
@@ -214,7 +214,7 @@ def _ectyper_pipeline_serotype(query_file, single_dict, pipeline=None, backlog=F
     return d
 
 # AMR PIPELINE
-def _amr_pipeline(pipeline=None, backlog=False, bulk=False):
+def _amr_pipeline(query_file, single_dict, pipeline=None, backlog=False, bulk=False):
     # Alias.
     job_id = pipeline.jobs['job_id'].rq_job
     if not backlog:
@@ -295,7 +295,7 @@ def _amr_pipeline(pipeline=None, backlog=False, bulk=False):
         d.update({'job_amr_beautify': job_amr_beautify})
     return d
 
-def _phylotyper_pipeline(subtype, pipeline=None, backlog=False):
+def _phylotyper_pipeline(subtype, query_file, pipeline=None, backlog=False):
     # Alias.
     job_id = pipeline.jobs['job_id'].rq_job
     if not backlog:
@@ -475,35 +475,36 @@ def blob_savvy_enqueue(single_dict, pipeline):
         )
     # END ECTYPER PIPELINE
 
+    # AMR Pipeline
     if single_dict['options']['amr']:
-        amr_jobs = _amr_pipeline(, pipeline=pipeline, bulk=single_dict['options']['bulk'])
+        amr_jobs = _amr_pipeline(query_file=query_file, single_dict=single_dict, pipeline=pipeline, bulk=single_dict['options']['bulk'])
         job_amr = amr_jobs['job_amr']
         job_amr_dict = amr_jobs['job_amr_dict']
         job_amr_datastruct = amr_jobs['job_amr_datastruct']
         if not single_dict['options']['bulk']:
             job_amr_beautify = amr_jobs['job_amr_beautify']
     elif config.BACKLOG_ENABLED:
-        _amr_pipeline(pipeline=pipeline, backlog=True)
+        _amr_pipeline(query_file=query_file, single_dict=single_dict, pipeline=pipeline, backlog=True)
     # END AMR PIPELINE
 
     # Phylotyper Pipeline
     if single_dict['options']['stx1']:
-        pt_jobs = _phylotyper_pipeline('stx1', pipeline=pipeline)
+        pt_jobs = _phylotyper_pipeline('stx1', query_file=query_file, pipeline=pipeline)
         job_stx1_beautify = pt_jobs['job_ptstx1_beautify']
     elif config.BACKLOG_ENABLED:
-        _phylotyper_pipeline('stx1', pipeline=pipeline, backlog=True)
+        _phylotyper_pipeline('stx1', query_file=query_file, pipeline=pipeline, backlog=True)
 
     if single_dict['options']['stx2']:
-        pt_jobs = _phylotyper_pipeline('stx2', pipeline=pipeline)
+        pt_jobs = _phylotyper_pipeline('stx2', query_file=query_file, pipeline=pipeline)
         job_stx2_beautify = pt_jobs['job_ptstx2_beautify']
     elif config.BACKLOG_ENABLED:
-        _phylotyper_pipeline('stx2', pipeline=pipeline, backlog=True)
+        _phylotyper_pipeline('stx2', query_file=query_file, pipeline=pipeline, backlog=True)
 
     if single_dict['options']['eae']:
-        pt_jobs = _phylotyper_pipeline('eae', pipeline=pipeline)
+        pt_jobs = _phylotyper_pipeline('eae', query_file=query_file, pipeline=pipeline)
         job_eae_beautify = pt_jobs['job_pteae_beautify']
     elif config.BACKLOG_ENABLED:
-        _phylotyper_pipeline('eae', pipeline=pipeline, backlog=True)
+        _phylotyper_pipeline('eae', query_file=query_file, pipeline=pipeline, backlog=True)
     # END Phylotyper pipeline
 
     # the base file data for blazegraph

From 5750d97cc7872e75b0a4d9d5056ef6443f56797d Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Sat, 10 Mar 2018 19:14:24 -0500
Subject: [PATCH 095/122] DEBUG: just getting N/A in phylotyper return.
 Wondering if datastruct_savvy() is having problems parsing VF

---
 app/middleware/graphers/datastruct_savvy.py | 4 +++-
 app/modules/spfy.py                         | 7 ++++++-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/app/middleware/graphers/datastruct_savvy.py b/app/middleware/graphers/datastruct_savvy.py
index 964a91fe..90454055 100644
--- a/app/middleware/graphers/datastruct_savvy.py
+++ b/app/middleware/graphers/datastruct_savvy.py
@@ -208,7 +208,9 @@ def generate_datastruct(query_file, id_file, pickled_dictionary):
                 graph = parse_gene_dict(graph, results['Virulence Factors'], uriGenome, 'VirulenceFactor')
             elif key == 'Antimicrobial Resistance':
                 graph = parse_gene_dict(graph, results['Antimicrobial Resistance'], uriGenome,
-                                        'AntimicrobialResistanceGene')
+                                        'AntimicrobialResistanceGene'
+            else:
+                raise Exception("generate_datastruct() failed to find key for query_file: {0}, pickled_dictionary: {1}, with results dictionary: {2}".format(query_file, pickled_dictionary, str(results)))
         return graph
     elif isinstance(results, list):
         graph = model_to_graph(graph, results, uriIsolate)
diff --git a/app/modules/spfy.py b/app/modules/spfy.py
index edad68a7..26ba5c72 100644
--- a/app/modules/spfy.py
+++ b/app/modules/spfy.py
@@ -477,7 +477,12 @@ def blob_savvy_enqueue(single_dict, pipeline):
 
     # AMR Pipeline
     if single_dict['options']['amr']:
-        amr_jobs = _amr_pipeline(query_file=query_file, single_dict=single_dict, pipeline=pipeline, bulk=single_dict['options']['bulk'])
+        amr_jobs = _amr_pipeline(
+            query_file=query_file,
+            single_dict=single_dict,
+            pipeline=pipeline,
+            backlog=False,
+            bulk=single_dict['options']['bulk'])
         job_amr = amr_jobs['job_amr']
         job_amr_dict = amr_jobs['job_amr_dict']
         job_amr_datastruct = amr_jobs['job_amr_datastruct']

From db6abd5f394eef0bb8387c810830fb664cd6b137 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Sat, 10 Mar 2018 19:23:04 -0500
Subject: [PATCH 096/122] DEBUG: just getting N/A in phylotyper return.
 Wondering if datastruct_savvy() is having problems parsing VF

---
 app/middleware/graphers/datastruct_savvy.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/middleware/graphers/datastruct_savvy.py b/app/middleware/graphers/datastruct_savvy.py
index 90454055..84bf10cc 100644
--- a/app/middleware/graphers/datastruct_savvy.py
+++ b/app/middleware/graphers/datastruct_savvy.py
@@ -208,7 +208,7 @@ def generate_datastruct(query_file, id_file, pickled_dictionary):
                 graph = parse_gene_dict(graph, results['Virulence Factors'], uriGenome, 'VirulenceFactor')
             elif key == 'Antimicrobial Resistance':
                 graph = parse_gene_dict(graph, results['Antimicrobial Resistance'], uriGenome,
-                                        'AntimicrobialResistanceGene'
+                                        'AntimicrobialResistanceGene')
             else:
                 raise Exception("generate_datastruct() failed to find key for query_file: {0}, pickled_dictionary: {1}, with results dictionary: {2}".format(query_file, pickled_dictionary, str(results)))
         return graph

From 926abc9cd34114dad6f834aa6ed227fdd9ed6c1b Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Sat, 10 Mar 2018 20:39:58 -0500
Subject: [PATCH 097/122] CHANGE: submodule name

---
 .gitmodules | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.gitmodules b/.gitmodules
index dc342682..05221613 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -2,9 +2,9 @@
 	path = app/modules/ectyper/ecoli_serotyping
 	url = https://github.com/phac-nml/ecoli_serotyping.git
 	branch = superphy
-[submodule "reactapp"]
+[submodule "grouch"]
 	path = reactapp
-	url = https://github.com/superphy/reactapp.git
+	url = https://github.com/superphy/grouch.git
 [submodule "app/modules/PanPredic"]
         path = app/modules/PanPredic
         url = https://github.com/superphy/PanPredic.git

From 8ed22bd953fe30d162e22a2adf79aa7d389d1fc6 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Sat, 10 Mar 2018 20:40:20 -0500
Subject: [PATCH 098/122] CHANGE: submodule path

---
 .gitmodules        | 2 +-
 reactapp => grouch | 0
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename reactapp => grouch (100%)

diff --git a/.gitmodules b/.gitmodules
index 05221613..0238eaee 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -3,7 +3,7 @@
 	url = https://github.com/phac-nml/ecoli_serotyping.git
 	branch = superphy
 [submodule "grouch"]
-	path = reactapp
+	path = grouch
 	url = https://github.com/superphy/grouch.git
 [submodule "app/modules/PanPredic"]
         path = app/modules/PanPredic
diff --git a/reactapp b/grouch
similarity index 100%
rename from reactapp
rename to grouch

From ec31fd5131840ddfc8044ae7be25967f4d73cda7 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Sat, 10 Mar 2018 20:41:26 -0500
Subject: [PATCH 099/122] FIX: grouch (reactapp) now correct HEAD

---
 grouch | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/grouch b/grouch
index a6b539ac..2088077b 160000
--- a/grouch
+++ b/grouch
@@ -1 +1 @@
-Subproject commit a6b539ac33f50d6f44f35c4eebb5c53bc5fd495f
+Subproject commit 2088077b734f737d8a362c06283dcd87b4218be7

From 38a171a39267c73f0a8fa053b459501a045a6207 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Sat, 10 Mar 2018 20:46:36 -0500
Subject: [PATCH 100/122] UPDATE: names in docker-compose

---
 Dockerfile-reactapp => Dockerfile-grouch |  2 +-
 docker-compose.yml                       | 14 +++++++++++---
 2 files changed, 12 insertions(+), 4 deletions(-)
 rename Dockerfile-reactapp => Dockerfile-grouch (95%)

diff --git a/Dockerfile-reactapp b/Dockerfile-grouch
similarity index 95%
rename from Dockerfile-reactapp
rename to Dockerfile-grouch
index 0479fec6..2a4c229c 100644
--- a/Dockerfile-reactapp
+++ b/Dockerfile-grouch
@@ -7,7 +7,7 @@ ENV YARN_VERSION 0.17.6
 RUN mkdir /app
 
 # Install app dependencies & build
-COPY ./reactapp /app
+COPY ./grouch /app
 WORKDIR /app
 # part of a bug fix; see https://github.com/sass/node-sass/issues/1579
 RUN yarn add node-sass
diff --git a/docker-compose.yml b/docker-compose.yml
index ca0452b7..75f1f0a7 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,5 +1,6 @@
 version: '2'
 services:
+  # The main controlling webserver.
   webserver:
     build:
       context: .
@@ -14,11 +15,12 @@ services:
     # networks:
     #   - dockernet
 
-  reactapp:
+  # The ReactJS app for the frontend.
+  grouch:
     build:
       context: .
-      dockerfile: Dockerfile-reactapp
-    image: reactapp
+      dockerfile: Dockerfile-grouch
+    image: grouch
     ports:
     - "8090:5000"
     depends_on:
@@ -26,6 +28,7 @@ services:
     # networks:
     #   - dockernet
 
+  # The main set of RQ workers.
   worker:
     build:
       context: .
@@ -40,6 +43,7 @@ services:
     # networks:
     #   - dockernet
 
+  # Reserved RQ worker for creating & syncing Spfy IDs.
   worker-blazegraph-ids:
     build:
       context: .
@@ -52,6 +56,7 @@ services:
     # networks:
     #   - dockernet
 
+  # Extra priority workers serving the frontend.
   worker-priority:
     build:
       context: .
@@ -62,11 +67,13 @@ services:
     depends_on:
     - webserver
 
+  # Redis DB for the RQ workers + some frontend tasks.
   redis:
     image: redis:3.2
     # networks:
     #   - dockernet
 
+  # Blazegraph DB for LTS.
   blazegraph:
     image: superphy/blazegraph:2.1.4-inferencing
     ports:
@@ -74,6 +81,7 @@ services:
     volumes:
       - /var/lib/jetty/
 
+  # MongoDB for token based accounts.
   mongodb:
     image: mongo:3.6.1-jessie
     environment:

From 196a70b166ddae754fa9bb4b0bc3a92615bdfea8 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Sat, 10 Mar 2018 21:29:24 -0500
Subject: [PATCH 101/122] DEUBG: some more checks to see why phylotyper doesnt
 see VFs

---
 app/middleware/graphers/datastruct_savvy.py | 2 ++
 app/modules/ectyper/call_ectyper.py         | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/app/middleware/graphers/datastruct_savvy.py b/app/middleware/graphers/datastruct_savvy.py
index 84bf10cc..a0c793ab 100644
--- a/app/middleware/graphers/datastruct_savvy.py
+++ b/app/middleware/graphers/datastruct_savvy.py
@@ -198,6 +198,8 @@ def generate_datastruct(query_file, id_file, pickled_dictionary):
 
     # Unpickle.
     results = unpickle(pickled_dictionary)
+    # Ensure this isn't empty.
+    assert results
     # Check if we have a model or a dictionary.
     if isinstance(results, dict):
         # graphing functions
diff --git a/app/modules/ectyper/call_ectyper.py b/app/modules/ectyper/call_ectyper.py
index 416e882c..f25bdeb0 100644
--- a/app/modules/ectyper/call_ectyper.py
+++ b/app/modules/ectyper/call_ectyper.py
@@ -58,11 +58,11 @@ def call_ectyper_vf(args_dict):
         # TODO: edit ectyper so we're not using this ducktape approach
         # we are calling tools_controller on only one file, so grab that dict
         key, ectyper_dict = ectyper_dict.popitem()
-
+        assert isinstance(ectyper_dict, dict)
         # TODO: convert this to a VF model.
         # Path for the pickle dump.
         p = filepath + '_ectyper_vf.p'
-        pickle.dump(ectyper_dict,open(p,'wb'))
+        dump(ectyper_dict, p)
 
     return p
 

From 581820656e1c3677d651a428f6cc6f93cc092998 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Sat, 10 Mar 2018 21:32:07 -0500
Subject: [PATCH 102/122] DEBUG: have phylotyper throw and exception if it cant
 retrieve anything from the db

---
 app/modules/phylotyper/phylotyper.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/app/modules/phylotyper/phylotyper.py b/app/modules/phylotyper/phylotyper.py
index 3c04cde8..9522040c 100644
--- a/app/modules/phylotyper/phylotyper.py
+++ b/app/modules/phylotyper/phylotyper.py
@@ -314,6 +314,7 @@ def ignorant(genome_uri, subtype, pickle_file):
         pt_dict['stop'][k].append(row['endPos'])
 
     if not results:
+        raise Exception("ignorant() could not find phylotyper results for genome_uri: {0}, subtype: {1}, with pickle_file: {2}".format(genome_uri, subtype, pickle_file))
         pt_dict = {
             'subtype': 'No loci'
         }

From f4e2b3969457bf9fd8f68cf743ddec0f3dbedacc Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Sat, 10 Mar 2018 21:59:06 -0500
Subject: [PATCH 103/122] DEBUG: looks like were not getting that far into it,
 have phylotyper raise exception if we cant read any output from phylotyper
 direct

---
 app/modules/phylotyper/phylotyper.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/app/modules/phylotyper/phylotyper.py b/app/modules/phylotyper/phylotyper.py
index 9522040c..85718b23 100644
--- a/app/modules/phylotyper/phylotyper.py
+++ b/app/modules/phylotyper/phylotyper.py
@@ -110,6 +110,12 @@ def to_dict(pt_file, subtype, pickle_file):
     pt_results = pd.read_table(pt_file)
 
     if pt_results['phylotyper_assignment'].empty or pt_results['phylotyper_assignment'].values[0] == 'Subtype loci not found in genome':
+        raise Exception("phylotyper.to_dict() couldnt find loci for file: {0}, subtype: {1}, pickle_file, {2}, with dataframe {3}".format(
+            pt_file,
+            subtype,
+            pickle_file,
+            str(pt_results)
+        ))
         pt_results = {
             'subtype': 'No loci',
         }

From 943b36bbe78ff93e41551a4f71b0afe2bb89ceda Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Sat, 10 Mar 2018 22:25:16 -0500
Subject: [PATCH 104/122] DEUBG: phylotyper.to_dict() getting an empty df, is
 markerseqs = MarkerSequences(loci) blank?

---
 app/modules/phylotyper/phylotyper.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/app/modules/phylotyper/phylotyper.py b/app/modules/phylotyper/phylotyper.py
index 85718b23..207a7cf2 100644
--- a/app/modules/phylotyper/phylotyper.py
+++ b/app/modules/phylotyper/phylotyper.py
@@ -88,6 +88,11 @@ def phylotyper(uriIsolate, subtype, result_file, id_file=None):
 
     else:
         # No loci
+        raise Exception('phylotyper.phylotyper() could not retrieve reference sequences for loci: {0}, uriIsolate: {1}, subtype: {2}'.format(
+            str(loci),
+            str(uriIsolate),
+            subtype
+        ))
         # Report no loci status in output
         with open(output_file, 'w') as fh:
             fh.write('\t'.join(['genome','tree_label','subtype','probability','phylotyper_assignment','loci']))

From 2b619879371d4684757c827be67dc2daff948ffc Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Sat, 10 Mar 2018 22:51:28 -0500
Subject: [PATCH 105/122] CHANGE: have eae alleles all be called eae

---
 app/middleware/graphers/datastruct_savvy.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/app/middleware/graphers/datastruct_savvy.py b/app/middleware/graphers/datastruct_savvy.py
index a0c793ab..7f251f73 100644
--- a/app/middleware/graphers/datastruct_savvy.py
+++ b/app/middleware/graphers/datastruct_savvy.py
@@ -102,6 +102,10 @@ def parse_gene_dict(graph, gene_dict, uriGenome, geneType):
             # some gene names, esp those which are effectively a description,
             # have spaces
             gene_name = gene_record['GENE_NAME'].replace(' ', '_')
+            # Workaround to assume all eae alleles are just eae.
+            # See https://github.com/superphy/spfy/pull/274
+            if gene_name.startswith('eae'):
+                gene_name = 'eae'
             uriGene = gu(':' + gene_name)
             # define the object type of the gene
             graph.add((uriGene, gu('rdf:type'), gu(':' + geneType)))

From d0dc50c5ed3d48fd13f00c6b74b76c3f4b71cdb0 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Sat, 10 Mar 2018 23:20:38 -0500
Subject: [PATCH 106/122] UPDATE: pull the optimization branch for blazegraph
 so were uptodate with production

---
 app/modules/spfy.py | 2 +-
 docker-compose.yml  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/app/modules/spfy.py b/app/modules/spfy.py
index 26ba5c72..53d0846b 100644
--- a/app/modules/spfy.py
+++ b/app/modules/spfy.py
@@ -94,7 +94,7 @@ def _ectyper_pipeline_vf(query_file, single_dict, pipeline=None, backlog=False):
     else:
         ttl_value = config.DEFAULT_RESULT_TTL
 
-    # datastruct_savvy() stores result to Blazegraph.
+    # datastruct_savvy() graphs and uploads result to Blazegraph.
     job_ectyper_datastruct_vf = multiples.enqueue(
         datastruct_savvy,
         query_file,
diff --git a/docker-compose.yml b/docker-compose.yml
index 75f1f0a7..7b312419 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -75,7 +75,7 @@ services:
 
   # Blazegraph DB for LTS.
   blazegraph:
-    image: superphy/blazegraph:2.1.4-inferencing
+    image: superphy/blazegraph:2.1.4-optimization
     ports:
       - "8080:8080"
     volumes:

From 2dcc35a481cb7062708c8b494b1c2e8368a3d73d Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Sun, 11 Mar 2018 00:22:33 -0500
Subject: [PATCH 107/122] DEBUG: some checks on phylotypers job deps

---
 app/modules/spfy.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/app/modules/spfy.py b/app/modules/spfy.py
index 53d0846b..4210386d 100644
--- a/app/modules/spfy.py
+++ b/app/modules/spfy.py
@@ -298,6 +298,10 @@ def _amr_pipeline(query_file, single_dict, pipeline=None, backlog=False, bulk=Fa
 def _phylotyper_pipeline(subtype, query_file, pipeline=None, backlog=False):
     # Alias.
     job_id = pipeline.jobs['job_id'].rq_job
+    job_ectyper_datastruct_vf = pipeline.jobs['job_ectyper_datastruct_vf'].rq_job
+    assert job_id
+    assert job_ectyper_datastruct_vf
+    # Alias queues.
     if not backlog:
         multiples = multiples_q
     else:
@@ -313,7 +317,7 @@ def _phylotyper_pipeline(subtype, query_file, pipeline=None, backlog=False):
         subtype,
         tsvfile,
         id_file=query_file + '_id.txt',
-        depends_on=pipeline.jobs['job_ectyper_datastruct_vf'].rq_job)
+        depends_on=job_ectyper_datastruct_vf)
     pipeline.jobs.update({
         'job'+jobname: Job(
             rq_job=job_pt,

From 4d1cf017e282c992110bc983f57a90fbf1bce865 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Sun, 11 Mar 2018 01:36:36 -0500
Subject: [PATCH 108/122] UPDATE: submodule blazegraph into spfy (no longer
 pulling from Docker Hub)

---
 .gitmodules        | 3 +++
 docker-blazegraph  | 1 +
 docker-compose.yml | 5 ++++-
 3 files changed, 8 insertions(+), 1 deletion(-)
 create mode 160000 docker-blazegraph

diff --git a/.gitmodules b/.gitmodules
index 0238eaee..d23df7f5 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -8,3 +8,6 @@
 [submodule "app/modules/PanPredic"]
         path = app/modules/PanPredic
         url = https://github.com/superphy/PanPredic.git
+[submodule "docker-blazegraph"]
+	path = docker-blazegraph
+	url = https://github.com/superphy/docker-blazegraph.git
diff --git a/docker-blazegraph b/docker-blazegraph
new file mode 160000
index 00000000..51553836
--- /dev/null
+++ b/docker-blazegraph
@@ -0,0 +1 @@
+Subproject commit 515538362ddec870f425482958f0e773ef6f1953
diff --git a/docker-compose.yml b/docker-compose.yml
index 7b312419..a8f0275f 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -75,7 +75,10 @@ services:
 
   # Blazegraph DB for LTS.
   blazegraph:
-    image: superphy/blazegraph:2.1.4-optimization
+    build:
+      context: ./docker-blazegraph/2.1.4-inferencing
+      dockerfile: ./docker-blazegraph/2.1.4-inferencing/Dockerfile
+    image: blazegraph
     ports:
       - "8080:8080"
     volumes:

From 24749f0472f92045e883111e3dbea89a3ceeb68e Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Sun, 11 Mar 2018 01:42:09 -0500
Subject: [PATCH 109/122] FIX: ref Dockerfile for blazegraph

---
 docker-compose.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker-compose.yml b/docker-compose.yml
index a8f0275f..5471b67f 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -77,7 +77,7 @@ services:
   blazegraph:
     build:
       context: ./docker-blazegraph/2.1.4-inferencing
-      dockerfile: ./docker-blazegraph/2.1.4-inferencing/Dockerfile
+      dockerfile: Dockerfile # inherits the context from above.
     image: blazegraph
     ports:
       - "8080:8080"

From 29ebd25ae9914e4b4975da12c90b4b41068546b8 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Sun, 11 Mar 2018 03:31:52 -0400
Subject: [PATCH 110/122] FIX: l0pht never warned me about the craziness of jvm
 args

---
 docker-blazegraph | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker-blazegraph b/docker-blazegraph
index 51553836..0612a9be 160000
--- a/docker-blazegraph
+++ b/docker-blazegraph
@@ -1 +1 @@
-Subproject commit 515538362ddec870f425482958f0e773ef6f1953
+Subproject commit 0612a9beea4699eff8fd55820799c312747a0c76

From 87f8192c7a6e8531c48fece5ae77282ecd589c83 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Sun, 11 Mar 2018 18:52:39 -0400
Subject: [PATCH 111/122] DEBUG: check that phylotyper is completing correctly

---
 app/modules/phylotyper/phylotyper.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/app/modules/phylotyper/phylotyper.py b/app/modules/phylotyper/phylotyper.py
index 207a7cf2..e99d835d 100644
--- a/app/modules/phylotyper/phylotyper.py
+++ b/app/modules/phylotyper/phylotyper.py
@@ -30,7 +30,12 @@
 
 logger = logging.getLogger(__name__)
 
-
+def _check_tsv(pt_file):
+    pt_results = pd.read_table(pt_file)
+    try:
+        assert pt_results
+    except:
+        raise Exception('_check_tsv() failed (df is empty) for pt_file: ' + pt_file)
 
 def phylotyper(uriIsolate, subtype, result_file, id_file=None):
     """ Wrapper for Phylotyper
@@ -71,6 +76,7 @@ def phylotyper(uriIsolate, subtype, result_file, id_file=None):
     # Get alleles for this genome
     markerseqs = MarkerSequences(loci)
     fasta = markerseqs.fasta(uriIsolate)
+    # fasta =
 
     temp_dir = mkdtemp(prefix='pt'+subtype, dir=config.DATASTORE)
     query_file = os.path.join(temp_dir, 'query.fasta')
@@ -81,7 +87,7 @@ def phylotyper(uriIsolate, subtype, result_file, id_file=None):
         with open(query_file, 'w') as fh:
             fh.write(fasta)
 
-        subprocess.call(['phylotyper', 'genome', '--noplots',
+        subprocess.check_call(['phylotyper', 'genome', '--noplots',
                          subtype,
                          temp_dir,
                          query_file])
@@ -101,6 +107,8 @@ def phylotyper(uriIsolate, subtype, result_file, id_file=None):
     shutil.move(output_file, result_file)
     shutil.rmtree(temp_dir)
 
+    _check_tsv(result_file)
+
     return result_file
 
 

From f018d21f4d8512586fc21f72f1b377b5ea29c5e5 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Sun, 11 Mar 2018 23:52:41 -0400
Subject: [PATCH 112/122] CHANGE: pin the RQ worker to the docker-flask-conda
 image we have up on corefacility

---
 Dockerfile-rq | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Dockerfile-rq b/Dockerfile-rq
index 1a4b89b9..15471e61 100644
--- a/Dockerfile-rq
+++ b/Dockerfile-rq
@@ -1,6 +1,6 @@
 #this is for RQ for service worker
 
-FROM superphy/docker-flask-conda:latest
+FROM superphy/docker-flask-conda:master-6.1.0
 
 COPY ./app /app
 

From 747b849dcc804f3c375582edd5ad1ed28513f2d9 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Mon, 12 Mar 2018 02:16:41 -0400
Subject: [PATCH 113/122] ADD: workaround to run phylotyper.phylotyper() in its
 own set of workers. This might be a better approach for the future as it lets
 us define deps directly through Dockerfiles instead of with a unified conda
 env.

---
 Dockerfile-rq                      |  2 +-
 Dockerfile-rq-phylotyper           | 21 +++++++++++++++++++
 app/modules/spfy.py                |  4 +++-
 app/supervisord-rq-phylotyper.conf | 33 ++++++++++++++++++++++++++++++
 docker-compose.yml                 | 15 ++++++++++++++
 5 files changed, 73 insertions(+), 2 deletions(-)
 create mode 100644 Dockerfile-rq-phylotyper
 create mode 100644 app/supervisord-rq-phylotyper.conf

diff --git a/Dockerfile-rq b/Dockerfile-rq
index 15471e61..1a4b89b9 100644
--- a/Dockerfile-rq
+++ b/Dockerfile-rq
@@ -1,6 +1,6 @@
 #this is for RQ for service worker
 
-FROM superphy/docker-flask-conda:master-6.1.0
+FROM superphy/docker-flask-conda:latest
 
 COPY ./app /app
 
diff --git a/Dockerfile-rq-phylotyper b/Dockerfile-rq-phylotyper
new file mode 100644
index 00000000..4b9dc2dd
--- /dev/null
+++ b/Dockerfile-rq-phylotyper
@@ -0,0 +1,21 @@
+#this is for RQ for service worker
+
+FROM superphy/docker-flask-conda:master-6.1.0
+
+COPY ./app /app
+
+COPY /app/supervisord-rq-phylotyper.conf /etc/supervisor/conf.d/supervisord.conf
+
+RUN echo 'export PATH=/opt/conda/bin:$PATH' > /etc/profile.d/conda.sh
+
+ENV PATH /opt/conda/bin:$PATH
+
+ENV PATH /opt/conda/envs/backend/bin:$PATH
+
+RUN cat /etc/supervisor/conf.d/supervisord.conf
+RUN which python
+RUN which conda
+RUN which uwsgi
+RUN which rq
+
+CMD ["/usr/bin/supervisord"]
diff --git a/app/modules/spfy.py b/app/modules/spfy.py
index 4210386d..aea71fb4 100644
--- a/app/modules/spfy.py
+++ b/app/modules/spfy.py
@@ -44,6 +44,8 @@
 singles_q = Queue('singles', connection=redis_conn)
 multiples_q = Queue('multiples', connection=redis_conn,
                     default_timeout=config.DEFAULT_TIMEOUT)
+phylotyper_q = Queue('phylotyper', connection=redis_conn,
+                    default_timeout=config.DEFAULT_TIMEOUT)
 blazegraph_q = Queue('blazegraph', connection=redis_conn)
 if config.BACKLOG_ENABLED:
     # backlog queues
@@ -311,7 +313,7 @@ def _phylotyper_pipeline(subtype, query_file, pipeline=None, backlog=False):
     tsvfile = query_file + jobname + '.tsv'
     picklefile = query_file + jobname + '.p'
 
-    job_pt = multiples.enqueue(
+    job_pt = phylotyper_q.enqueue(
         phylotyper.phylotyper,
         None,
         subtype,
diff --git a/app/supervisord-rq-phylotyper.conf b/app/supervisord-rq-phylotyper.conf
new file mode 100644
index 00000000..5b0d1cd7
--- /dev/null
+++ b/app/supervisord-rq-phylotyper.conf
@@ -0,0 +1,33 @@
+[supervisord]
+nodaemon=true
+
+[program:rqworkermultiples]
+; Point the command to the specific rq command you want to run.
+; If you use virtualenv, be sure to point it to
+; /path/to/virtualenv/bin/rq
+; Also, you probably want to include a config module to configure this
+; worker.  For more info on that, see http://python-rq.org/docs/workers/
+environment=PATH='%(ENV_PATH)s:/opt/conda/envs/backend/bin'
+command=/opt/conda/envs/backend/bin/rq worker -c config phylotyper
+process_name=%(program_name)s-%(process_num)s
+
+; If you want to run more than one worker instance, increase this
+numprocs=2
+
+; This is the directory from which RQ is ran. Be sure to point this to the
+; directory where your source code is importable from
+directory=/app
+
+; RQ requires the TERM signal to perform a warm shutdown. If RQ does not die
+; within 10 seconds, supervisor will forcefully kill it
+stopsignal=TERM
+
+; These are up to you
+autostart=true
+autorestart=true
+
+; redirect stdout and stderr for docker logs
+stdout_logfile=/dev/stdout
+stdout_logfile_maxbytes=0
+stderr_logfile=/dev/stderr
+stderr_logfile_maxbytes=0
diff --git a/docker-compose.yml b/docker-compose.yml
index 5471b67f..b14ccb6e 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -43,6 +43,21 @@ services:
     # networks:
     #   - dockernet
 
+  # The small set of workers specific to phylotyper's env.
+  worker-phylotyper:
+    build:
+      context: .
+      dockerfile: Dockerfile-rq-phylotyper
+    image: backend-rq-phylotyper
+    ports:
+    - "9181:9181" #this is for debugging, drop a shell and run rq-dashboard if you need to see jobs
+    volumes_from:
+    - webserver
+    depends_on:
+    - webserver
+    # networks:
+    #   - dockernet
+
   # Reserved RQ worker for creating & syncing Spfy IDs.
   worker-blazegraph-ids:
     build:

From 8de636e64658a99e5b93f1bf59214523593c7dcd Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Mon, 12 Mar 2018 02:27:18 -0400
Subject: [PATCH 114/122] FIX: no ports for phylo

---
 docker-compose.yml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/docker-compose.yml b/docker-compose.yml
index b14ccb6e..6e5f9f9d 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -49,8 +49,6 @@ services:
       context: .
       dockerfile: Dockerfile-rq-phylotyper
     image: backend-rq-phylotyper
-    ports:
-    - "9181:9181" #this is for debugging, drop a shell and run rq-dashboard if you need to see jobs
     volumes_from:
     - webserver
     depends_on:

From e4df53ba6350495efbc54a1912c0ba706de043d7 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Mon, 12 Mar 2018 02:32:52 -0400
Subject: [PATCH 115/122] CHANGE: move the model import directly into the
 beautify func

---
 app/modules/phylotyper/phylotyper.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/modules/phylotyper/phylotyper.py b/app/modules/phylotyper/phylotyper.py
index e99d835d..aa6066df 100644
--- a/app/modules/phylotyper/phylotyper.py
+++ b/app/modules/phylotyper/phylotyper.py
@@ -24,7 +24,6 @@
 import config
 from middleware.graphers.turtle_utils import generate_uri as gu, fulluri_to_basename as u2b, normalize_rdfterm as normalize
 from middleware.blazegraph.upload_graph import upload_graph
-from middleware.modellers import model_phylotyper
 from modules.phylotyper import ontology, exceptions
 from modules.phylotyper.sequences import MarkerSequences, phylotyper_query, genename_query
 
@@ -175,6 +174,7 @@ def beautify(p_file, genome):
 
 
     """
+    from middleware.modellers import model_phylotyper # See https://github.com/superphy/spfy/issues/271
 
     pt_dict = pickle.load(open(p_file, 'rb'))
 

From 518d7b993428350b06304c087747d80d87a68504 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Mon, 12 Mar 2018 02:57:22 -0400
Subject: [PATCH 116/122] FIX: phylotyper is working!!!! also fixed our
 _check_tsv()

---
 app/modules/phylotyper/phylotyper.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/app/modules/phylotyper/phylotyper.py b/app/modules/phylotyper/phylotyper.py
index aa6066df..73c66625 100644
--- a/app/modules/phylotyper/phylotyper.py
+++ b/app/modules/phylotyper/phylotyper.py
@@ -31,10 +31,8 @@
 
 def _check_tsv(pt_file):
     pt_results = pd.read_table(pt_file)
-    try:
-        assert pt_results
-    except:
-        raise Exception('_check_tsv() failed (df is empty) for pt_file: ' + pt_file)
+    if pt_results.empty:
+        raise Exception('_check_tsv() failed as pt_results.empty == true for pt_file: {0} with df content: {1}'.format(pt_file, str(pt_results)))
 
 def phylotyper(uriIsolate, subtype, result_file, id_file=None):
     """ Wrapper for Phylotyper

From a8d0b67796df7b02c6a0f113b8aa7d9726bbe622 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Mon, 12 Mar 2018 03:06:57 -0400
Subject: [PATCH 117/122] FIX: typo in new modellar for phylotyper

---
 app/middleware/modellers.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/app/middleware/modellers.py b/app/middleware/modellers.py
index 8981b4ce..3bb35184 100644
--- a/app/middleware/modellers.py
+++ b/app/middleware/modellers.py
@@ -69,7 +69,7 @@ def model_phylotyper(lst):
     """
     phylotyper_list = [
         {
-            'analysis:':d['subtype_gene'],
+            'analysis':d['subtype_gene'],
             'contigid':d['contig'],
             'filename':d['genome'],
             'hitcutoff':'n/a',
@@ -80,5 +80,5 @@ def model_phylotyper(lst):
             'probability':d['probability']
         }
     for d in lst]
-    
+
     return phylotyper_list

From 77f6e77c8b0e2a86e672a304e09e393e259f934c Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Mon, 12 Mar 2018 03:08:57 -0400
Subject: [PATCH 118/122] CHANGE: reallow no loci found, but maintain checks on
 phylotyper call

---
 app/modules/phylotyper/phylotyper.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/app/modules/phylotyper/phylotyper.py b/app/modules/phylotyper/phylotyper.py
index 73c66625..c7df6241 100644
--- a/app/modules/phylotyper/phylotyper.py
+++ b/app/modules/phylotyper/phylotyper.py
@@ -120,12 +120,12 @@ def to_dict(pt_file, subtype, pickle_file):
     pt_results = pd.read_table(pt_file)
 
     if pt_results['phylotyper_assignment'].empty or pt_results['phylotyper_assignment'].values[0] == 'Subtype loci not found in genome':
-        raise Exception("phylotyper.to_dict() couldnt find loci for file: {0}, subtype: {1}, pickle_file, {2}, with dataframe {3}".format(
-            pt_file,
-            subtype,
-            pickle_file,
-            str(pt_results)
-        ))
+        # raise Exception("phylotyper.to_dict() couldnt find loci for file: {0}, subtype: {1}, pickle_file, {2}, with dataframe {3}".format(
+        #     pt_file,
+        #     subtype,
+        #     pickle_file,
+        #     str(pt_results)
+        # ))
         pt_results = {
             'subtype': 'No loci',
         }
@@ -331,7 +331,7 @@ def ignorant(genome_uri, subtype, pickle_file):
         pt_dict['stop'][k].append(row['endPos'])
 
     if not results:
-        raise Exception("ignorant() could not find phylotyper results for genome_uri: {0}, subtype: {1}, with pickle_file: {2}".format(genome_uri, subtype, pickle_file))
+        # raise Exception("ignorant() could not find phylotyper results for genome_uri: {0}, subtype: {1}, with pickle_file: {2}".format(genome_uri, subtype, pickle_file))
         pt_dict = {
             'subtype': 'No loci'
         }

From 7fa4743da0c80521ad7a4653adcf0918f4c86396 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Mon, 12 Mar 2018 03:32:04 -0400
Subject: [PATCH 119/122] FIX: hide VF results if not chosen

---
 app/modules/spfy.py | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/app/modules/spfy.py b/app/modules/spfy.py
index aea71fb4..beaa8f51 100644
--- a/app/modules/spfy.py
+++ b/app/modules/spfy.py
@@ -53,7 +53,7 @@
     backlog_multiples_q = Queue(
         'backlog_multiples', connection=redis_conn, default_timeout=config.DEFAULT_TIMEOUT)
 
-def _ectyper_pipeline_vf(query_file, single_dict, pipeline=None, backlog=False):
+def _ectyper_pipeline_vf(query_file, single_dict, display_vf=True, pipeline=None, backlog=False):
     """
     Enqueue all the jobs required for VF.
     """
@@ -132,7 +132,7 @@ def _ectyper_pipeline_vf(query_file, single_dict, pipeline=None, backlog=False):
                 name='job_ectyper_beautify_vf',
                 transitory=False,
                 backlog=backlog,
-                display=True
+                display=display_vf
             )
         })
     return d
@@ -416,12 +416,24 @@ def blob_savvy_enqueue(single_dict, pipeline):
         )
     })
 
+    # A check to allow hiding of VF results if only Phylotyper chosen.
+    if single_dict['options']['stx1'] or single_dict['options']['stx2'] or single_dict['options']['eae']:
+        chose_phylotyper = True
+    else:
+        chose_phylotyper = False
+    # Didn't choose VF, but chose phylotyper.
+    if not single_dict['options']['vf'] and chose_phylotyper:
+        # Don't display VF.
+        display_vf = False
+    else:
+        display_vf = True
     ## ECTyper (VF & Serotype)
     # VF
     if single_dict['options']['vf']:
         ectyper_vf_jobs = _ectyper_pipeline_vf(
             query_file,
             single_dict,
+            display_vf=display_vf,
             pipeline=pipeline
         )
         # pipeline.jobs.update(ectyper_vf_jobs)

From ecc84be10a6d30fc4a34d2c98e821fb40a148395 Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Mon, 12 Mar 2018 03:34:48 -0400
Subject: [PATCH 120/122] CHANGE: also no longer "require" user to select VF in
 display (will hide)

---
 app/modules/spfy.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/modules/spfy.py b/app/modules/spfy.py
index beaa8f51..a18f01a9 100644
--- a/app/modules/spfy.py
+++ b/app/modules/spfy.py
@@ -429,7 +429,7 @@ def blob_savvy_enqueue(single_dict, pipeline):
         display_vf = True
     ## ECTyper (VF & Serotype)
     # VF
-    if single_dict['options']['vf']:
+    if single_dict['options']['vf'] or chose_phylotyper:
         ectyper_vf_jobs = _ectyper_pipeline_vf(
             query_file,
             single_dict,

From b7e672318cbaefe0e188e9d56a275c412cecdbaf Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Mon, 12 Mar 2018 03:49:51 -0400
Subject: [PATCH 121/122] FIX: rewrite the option for vf as well for phylo

---
 app/modules/spfy.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/app/modules/spfy.py b/app/modules/spfy.py
index a18f01a9..262fae56 100644
--- a/app/modules/spfy.py
+++ b/app/modules/spfy.py
@@ -72,6 +72,8 @@ def _ectyper_pipeline_vf(query_file, single_dict, display_vf=True, pipeline=None
     # This copy is passed to the old ECTyper.
     single_dict_vf = copy.deepcopy(single_dict)
     single_dict_vf['options']['serotype'] = False
+    # Rewrite the VF option too, case called this for Phylotyper.
+    single_dict_vf['options']['vf'] = True
     # Enqueue the old ECTyper
     job_ectyper_vf = singles.enqueue(
         call_ectyper_vf,

From 11a2e3bc811672d2738eb379fc60efe715831dfb Mon Sep 17 00:00:00 2001
From: Kevin Le <kevin.kent.le@gmail.com>
Date: Mon, 12 Mar 2018 04:07:47 -0400
Subject: [PATCH 122/122] UPDATE: reactapp 6.2.0

---
 grouch | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/grouch b/grouch
index 2088077b..ebb121e7 160000
--- a/grouch
+++ b/grouch
@@ -1 +1 @@
-Subproject commit 2088077b734f737d8a362c06283dcd87b4218be7
+Subproject commit ebb121e7b2befd7df3c2733c22e06de69d8de1cf