Merge pull request #274 from superphy/271-ectyper-bp-dicts-amrphylo

MERGE: 6.2.0 Integrated Pipelines
superphy · Mar 12, 2018 · fc4a7a0 · fc4a7a0
2 parents 5df5127 + f6db78e
commit fc4a7a0
Show file tree

Hide file tree

Showing 58 changed files with 3,639 additions and 380 deletions.
diff --git a/.gitmodules b/.gitmodules
@@ -2,9 +2,12 @@
 	path = app/modules/ectyper/ecoli_serotyping
 	url = https://github.com/phac-nml/ecoli_serotyping.git
 	branch = superphy
-[submodule "reactapp"]
-	path = reactapp
-	url = https://github.com/superphy/reactapp.git
+[submodule "grouch"]
+	path = grouch
+	url = https://github.com/superphy/grouch.git
 [submodule "app/modules/PanPredic"]
         path = app/modules/PanPredic
         url = https://github.com/superphy/PanPredic.git
+[submodule "docker-blazegraph"]
+	path = docker-blazegraph
+	url = https://github.com/superphy/docker-blazegraph.git
diff --git a/.travis.yml b/.travis.yml
@@ -12,6 +12,7 @@ before_install:
   - docker build -t superphy/backend-rq-blazegraph:2.0.0 -f Dockerfile-rq-blazegraph .
   - docker-compose up -d
   - docker ps -a
+  - docker-compose logs webserver
   - ls
   #### miniconda install:
   # We do this conditionally because it saves us some downloading if the
@@ -47,5 +48,8 @@ install:
 script:
   #### Run Pytest
   - python -m pytest --ignore modules/ectyper/ecoli_serotyping -v
+after_failure:
+  # Check the logs if tests fail.
+  - docker-compose logs webserver
 notifications:
   email: false
diff --git a/Dockerfile-reactapp → Dockerfile-grouch b/Dockerfile-reactapp → Dockerfile-grouch
@@ -7,7 +7,7 @@ ENV YARN_VERSION 0.17.6
 RUN mkdir /app
 
 # Install app dependencies & build
-COPY ./reactapp /app
+COPY ./grouch /app
 WORKDIR /app
 # part of a bug fix; see https://github.com/sass/node-sass/issues/1579
 RUN yarn add node-sass

diff --git a/Dockerfile-rq-phylotyper b/Dockerfile-rq-phylotyper
@@ -0,0 +1,21 @@
+#this is for RQ for service worker
+
+FROM superphy/docker-flask-conda:master-6.1.0
+
+COPY ./app /app
+
+COPY /app/supervisord-rq-phylotyper.conf /etc/supervisor/conf.d/supervisord.conf
+
+RUN echo 'export PATH=/opt/conda/bin:$PATH' > /etc/profile.d/conda.sh
+
+ENV PATH /opt/conda/bin:$PATH
+
+ENV PATH /opt/conda/envs/backend/bin:$PATH
+
+RUN cat /etc/supervisor/conf.d/supervisord.conf
+RUN which python
+RUN which conda
+RUN which uwsgi
+RUN which rq
+
+CMD ["/usr/bin/supervisord"]
diff --git a/app/batch_download_insert.py b/app/batch_download_insert.py
diff --git a/app/batch_insert.py b/app/batch_insert.py
diff --git a/app/config.py b/app/config.py
@@ -14,6 +14,8 @@
 # enqueued function to complete before terminating it with and ERROR
 # If note specified, jobs must execute within 3 mins
 DEFAULT_TIMEOUT = 600 # in seconds (ie. 10 mins)
+# Defines how long results are kept in Redis. 500 is the default for RQ.
+DEFAULT_RESULT_TTL=500
 PAN_TIMEOUT = 100000
 # if BACKLOG_ENABLED = True, then all analyses modules will be run in the
 # in the background for every submitted file

diff --git a/app/modules/beautify/__init__.py → app/middleware/blazegraph/__init__.py b/app/modules/beautify/__init__.py → app/middleware/blazegraph/__init__.py
diff --git a/app/modules/blazeUploader/reserve_id.py → app/middleware/blazegraph/reserve_id.py b/app/modules/blazeUploader/reserve_id.py → app/middleware/blazegraph/reserve_id.py
@@ -1,8 +1,8 @@
 import os
 import logging
 from datetime import datetime
-from modules.turtleGrapher.turtle_utils import generate_hash, generate_uri as gu, link_uris
-from modules.blazeUploader.upload_graph import upload_graph
+from middleware.graphers.turtle_utils import generate_hash, generate_uri as gu, link_uris
+from middleware.blazegraph.upload_graph import upload_graph
 from SPARQLWrapper import SPARQLWrapper, JSON
 from rdflib import Literal, Graph
 import config

diff --git a/app/modules/blazeUploader/upload_graph.py → app/middleware/blazegraph/upload_graph.py b/app/modules/blazeUploader/upload_graph.py → app/middleware/blazegraph/upload_graph.py
diff --git a/app/modules/decorators.py → app/middleware/decorators.py b/app/modules/decorators.py → app/middleware/decorators.py
diff --git a/app/modules/blazeUploader/__init__.py → app/middleware/display/__init__.py b/app/modules/blazeUploader/__init__.py → app/middleware/display/__init__.py
diff --git a/app/modules/beautify/beautify.py → app/middleware/display/beautify.py b/app/modules/beautify/beautify.py → app/middleware/display/beautify.py
@@ -1,16 +1,18 @@
 import logging
 import pandas as pd
 import cPickle as pickle
-from os.path import basename
 from modules.loggingFunctions import initialize_logging
-from modules.beautify.find_widest import check_alleles
-from modules.turtleGrapher.turtle_utils import actual_filename
+from middleware.display.find_widest import check_alleles
+from middleware.graphers.turtle_utils import actual_filename
+from middleware.models import SubtypingResult, model_to_json, unpickle
+from middleware.modellers import model_vf
 
 # logging
 log_file = initialize_logging()
 log = logging.getLogger(__name__)
 
-def json_return(args_dict, gene_dict):
+
+def json_return(gene_dict, args_dict):
     """
     This converts the gene dict into a json format for return to the front end
     """
@@ -108,25 +110,42 @@ def handle_failed(json_r, args_dict):
         ret.append(t)
     return ret
 
-def beautify(args_dict, pickled_dictionary):
+# TODO: convert this to models-only.
+def beautify(gene_dict, args_dict=None):
     '''
     Converts a given 'spit' datum (a dictionary with our results from rgi/ectyper) to a json form used by the frontend. This result is to be stored in Redis by the calling RQ Worker.
     :param args_dict: The arguments supplied by the user. In the case of spfy web-app, this is used to determine which analysis options were set.
     :param pickled_dictionary: location of the .p pickled dictionary object. This is supplied by the enqueue call in spfy.py
     :param gene_dict: optionally, if using this to test via cli, you can supply the actual dictionary object.
     :return: json representation of the results, as required by the front-end.
     '''
-
-    gene_dict = pickle.load(open(pickled_dictionary, 'rb'))
-    # this converts our dictionary structure into json and adds metadata (filename, etc.)
-    json_r =  json_return(args_dict, gene_dict)
-    log.debug('First parse into json_r: ' + str(json_r))
-    # if looking for only serotype, skip this step
+    if isinstance(gene_dict, str): # For the tests.
+        gene_dict = pickle.load(open(gene_dict, 'rb'))
+    # Convert the old ECTYper's dictionary structure into list and adds metadata (filename, etc.).
+    json_r =  json_return(gene_dict, args_dict)
+    # For VF/AMR, find widest gene matched. Strip shorter matches.
     if args_dict['options']['vf'] or args_dict['options']['amr']:
         json_r = check_alleles(json_r)
-    log.debug('After checking alleles json_r: ' + str(json_r))
-    # check if there is an analysis module that has failed in the result
+    # Check if there is an analysis module that has failed in the result.
     if has_failed(json_r):
+        # If failed, return.
         return handle_failed(json_r, args_dict)
     else:
         return json_r
+        # Everything worked, cast result into a model.
+        # model = model_vf(json_r)
+        # return model_to_json(model)
+
+def display_subtyping(pickled_result, args_dict=None):
+    result = unpickle(pickled_result)
+    if isinstance(result, dict):
+        # VF
+        list_return = beautify(gene_dict=result, args_dict=args_dict)
+        assert isinstance(list_return, list)
+        model = model_vf(list_return)
+        return model_to_json(model)
+    elif isinstance(result, list):
+        # Serotyping
+        return model_to_json(result)
+    else:
+        raise Exception("beautify() could not handle pickled file: {0}.".format(pickled_result))
diff --git a/app/modules/beautify/find_widest.py → app/middleware/display/find_widest.py b/app/modules/beautify/find_widest.py → app/middleware/display/find_widest.py
diff --git a/app/modules/groupComparisons/__init__.py → app/middleware/graphers/__init__.py b/app/modules/groupComparisons/__init__.py → app/middleware/graphers/__init__.py
diff --git a/...modules/turtleGrapher/datastruct_savvy.py → app/middleware/graphers/datastruct_savvy.py b/...modules/turtleGrapher/datastruct_savvy.py → app/middleware/graphers/datastruct_savvy.py
@@ -1,12 +1,40 @@
 import cPickle as pickle
 from rdflib import BNode, Literal, Graph
-from modules.turtleGrapher.turtle_utils import generate_uri as gu, generate_hash, link_uris
-from modules.turtleGrapher.turtle_grapher import generate_graph
-from modules.blazeUploader.upload_graph import queue_upload
+from middleware.graphers.turtle_utils import generate_uri as gu, generate_hash, link_uris
+from middleware.graphers.turtle_grapher import generate_graph
+from middleware.blazegraph.upload_graph import queue_upload
 from modules.PanPredic.pan_utils import contig_name_parse
+from middleware.models import SubtypingResult, unpickle
 # working with Serotype, Antimicrobial Resistance, & Virulence Factor data
 # structures
 
+def _graph_subtyping(graph, model, uriIsolate):
+    # Convert the model to a graph.
+    # struct = model.to_struct()
+    rows_list = model
+    for row in rows_list:
+        o_type, h_type = row['hitname'].split(':')
+        graph.add((
+            uriIsolate,
+            gu('ge:0001076'),
+            Literal(o_type)
+        ))
+        graph.add((
+            uriIsolate,
+            gu('ge:0001077'),
+            Literal(h_type)
+        ))
+    return graph
+
+def model_to_graph(graph, model, uriIsolate):
+    # Validate the model submitted before processing.
+    # model.validate()
+    # Conversion.
+    if isinstance(model, list):
+        return _graph_subtyping(graph, model, uriIsolate)
+    else:
+        raise Exception('model_to_graph() called for a model without a handler.')
+
 def parse_serotype(graph, serotyper_dict, uriIsolate):
     if 'O type' in serotyper_dict:
         graph.add((uriIsolate, gu('ge:0001076'),
@@ -74,6 +102,10 @@ def parse_gene_dict(graph, gene_dict, uriGenome, geneType):
             # some gene names, esp those which are effectively a description,
             # have spaces
             gene_name = gene_record['GENE_NAME'].replace(' ', '_')
+            # Workaround to assume all eae alleles are just eae.
+            # See https://github.com/superphy/spfy/pull/274
+            if gene_name.startswith('eae'):
+                gene_name = 'eae'
             uriGene = gu(':' + gene_name)
             # define the object type of the gene
             graph.add((uriGene, gu('rdf:type'), gu(':' + geneType)))
@@ -148,7 +180,7 @@ def parse_gene_dict(graph, gene_dict, uriGenome, geneType):
 
 def generate_datastruct(query_file, id_file, pickled_dictionary):
     '''
-    This is simply to decouple the graph generation code from the
+    Separates the graph generation code from the
     upload code. In RQ backend, the datastruct_savvy() method is called
     where-as in savvy.py (without RQ or Blazegraph) only compute_datastruct()
     is called. The return type must be the same in datastruct_savvy to
@@ -168,22 +200,29 @@ def generate_datastruct(query_file, id_file, pickled_dictionary):
         spfyid = int(l)
     uriIsolate = gu(':spfy' + str(spfyid))
 
-    # results dict retrieval
-    results_dict = pickle.load(open(pickled_dictionary, 'rb'))
-
-    # graphing functions
-    for key in results_dict.keys():
-        if key == 'Serotype':
-            graph = parse_serotype(graph,results_dict['Serotype'],uriIsolate)
-        elif key == 'Virulence Factors':
-            graph = parse_gene_dict(graph, results_dict['Virulence Factors'], uriGenome, 'VirulenceFactor')
-        elif key == 'Antimicrobial Resistance':
-            graph = parse_gene_dict(graph, results_dict['Antimicrobial Resistance'], uriGenome,
-                                    'AntimicrobialResistanceGene')
-        #elif key == 'PanGenomeRegion':
-         #   graph = parse_gene_dict(graph, results_dict[key], uriGenome, key)
-
-    return graph
+    # Unpickle.
+    results = unpickle(pickled_dictionary)
+    # Ensure this isn't empty.
+    assert results
+    # Check if we have a model or a dictionary.
+    if isinstance(results, dict):
+        # graphing functions
+        for key in results:
+            if key == 'Serotype':
+                graph = parse_serotype(graph,results['Serotype'],uriIsolate)
+            elif key == 'Virulence Factors':
+                graph = parse_gene_dict(graph, results['Virulence Factors'], uriGenome, 'VirulenceFactor')
+            elif key == 'Antimicrobial Resistance':
+                graph = parse_gene_dict(graph, results['Antimicrobial Resistance'], uriGenome,
+                                        'AntimicrobialResistanceGene')
+            else:
+                raise Exception("generate_datastruct() failed to find key for query_file: {0}, pickled_dictionary: {1}, with results dictionary: {2}".format(query_file, pickled_dictionary, str(results)))
+        return graph
+    elif isinstance(results, list):
+        graph = model_to_graph(graph, results, uriIsolate)
+        return graph
+    else:
+        raise Exception("generate_datastruct() could not handle pickled file: {0}.".format(pickled_dictionary))
 
 def datastruct_savvy(query_file, id_file, pickled_dictionary):
     """

diff --git a/app/modules/turtleGrapher/turtle_grapher.py → app/middleware/graphers/turtle_grapher.py b/app/modules/turtleGrapher/turtle_grapher.py → app/middleware/graphers/turtle_grapher.py
@@ -5,9 +5,9 @@
 # to data structure(rdf triple organization) of the modules you're dev'ing
 
 import config
-from modules.turtleGrapher.turtle_utils import generate_hash, generate_uri as gu, link_uris
-from modules.blazeUploader.upload_graph import queue_upload
-from modules.turtleGrapher.turtle_utils import actual_filename
+from middleware.graphers.turtle_utils import generate_hash, generate_uri as gu, link_uris
+from middleware.blazegraph.upload_graph import queue_upload
+from middleware.graphers.turtle_utils import actual_filename
 from rdflib import Namespace, Graph, Literal, plugin
 from Bio import SeqIO
 from os.path import basename

diff --git a/app/modules/turtleGrapher/turtle_utils.py → app/middleware/graphers/turtle_utils.py b/app/modules/turtleGrapher/turtle_utils.py → app/middleware/graphers/turtle_utils.py