Skip to content

Commit

Permalink
Merge pull request #274 from superphy/271-ectyper-bp-dicts-amrphylo
Browse files Browse the repository at this point in the history
MERGE: 6.2.0 Integrated Pipelines
  • Loading branch information
kevinkle authored Mar 12, 2018
2 parents 5df5127 + f6db78e commit fc4a7a0
Show file tree
Hide file tree
Showing 58 changed files with 3,639 additions and 380 deletions.
9 changes: 6 additions & 3 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,12 @@
path = app/modules/ectyper/ecoli_serotyping
url = https://github.com/phac-nml/ecoli_serotyping.git
branch = superphy
[submodule "reactapp"]
path = reactapp
url = https://github.com/superphy/reactapp.git
[submodule "grouch"]
path = grouch
url = https://github.com/superphy/grouch.git
[submodule "app/modules/PanPredic"]
path = app/modules/PanPredic
url = https://github.com/superphy/PanPredic.git
[submodule "docker-blazegraph"]
path = docker-blazegraph
url = https://github.com/superphy/docker-blazegraph.git
4 changes: 4 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ before_install:
- docker build -t superphy/backend-rq-blazegraph:2.0.0 -f Dockerfile-rq-blazegraph .
- docker-compose up -d
- docker ps -a
- docker-compose logs webserver
- ls
#### miniconda install:
# We do this conditionally because it saves us some downloading if the
Expand Down Expand Up @@ -47,5 +48,8 @@ install:
script:
#### Run Pytest
- python -m pytest --ignore modules/ectyper/ecoli_serotyping -v
after_failure:
# Check the logs if tests fail.
- docker-compose logs webserver
notifications:
email: false
2 changes: 1 addition & 1 deletion Dockerfile-reactapp → Dockerfile-grouch
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ ENV YARN_VERSION 0.17.6
RUN mkdir /app

# Install app dependencies & build
COPY ./reactapp /app
COPY ./grouch /app
WORKDIR /app
# part of a bug fix; see https://github.com/sass/node-sass/issues/1579
RUN yarn add node-sass
Expand Down
21 changes: 21 additions & 0 deletions Dockerfile-rq-phylotyper
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#this is for RQ for service worker

FROM superphy/docker-flask-conda:master-6.1.0

COPY ./app /app

COPY /app/supervisord-rq-phylotyper.conf /etc/supervisor/conf.d/supervisord.conf

RUN echo 'export PATH=/opt/conda/bin:$PATH' > /etc/profile.d/conda.sh

ENV PATH /opt/conda/bin:$PATH

ENV PATH /opt/conda/envs/backend/bin:$PATH

RUN cat /etc/supervisor/conf.d/supervisord.conf
RUN which python
RUN which conda
RUN which uwsgi
RUN which rq

CMD ["/usr/bin/supervisord"]
74 changes: 0 additions & 74 deletions app/batch_download_insert.py

This file was deleted.

21 changes: 0 additions & 21 deletions app/batch_insert.py

This file was deleted.

2 changes: 2 additions & 0 deletions app/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
# enqueued function to complete before terminating it with and ERROR
# If note specified, jobs must execute within 3 mins
DEFAULT_TIMEOUT = 600 # in seconds (ie. 10 mins)
# Defines how long results are kept in Redis. 500 is the default for RQ.
DEFAULT_RESULT_TTL=500
PAN_TIMEOUT = 100000
# if BACKLOG_ENABLED = True, then all analyses modules will be run in the
# in the background for every submitted file
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import os
import logging
from datetime import datetime
from modules.turtleGrapher.turtle_utils import generate_hash, generate_uri as gu, link_uris
from modules.blazeUploader.upload_graph import upload_graph
from middleware.graphers.turtle_utils import generate_hash, generate_uri as gu, link_uris
from middleware.blazegraph.upload_graph import upload_graph
from SPARQLWrapper import SPARQLWrapper, JSON
from rdflib import Literal, Graph
import config
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
import logging
import pandas as pd
import cPickle as pickle
from os.path import basename
from modules.loggingFunctions import initialize_logging
from modules.beautify.find_widest import check_alleles
from modules.turtleGrapher.turtle_utils import actual_filename
from middleware.display.find_widest import check_alleles
from middleware.graphers.turtle_utils import actual_filename
from middleware.models import SubtypingResult, model_to_json, unpickle
from middleware.modellers import model_vf

# logging
log_file = initialize_logging()
log = logging.getLogger(__name__)

def json_return(args_dict, gene_dict):

def json_return(gene_dict, args_dict):
"""
This converts the gene dict into a json format for return to the front end
"""
Expand Down Expand Up @@ -108,25 +110,42 @@ def handle_failed(json_r, args_dict):
ret.append(t)
return ret

def beautify(args_dict, pickled_dictionary):
# TODO: convert this to models-only.
def beautify(gene_dict, args_dict=None):
'''
Converts a given 'spit' datum (a dictionary with our results from rgi/ectyper) to a json form used by the frontend. This result is to be stored in Redis by the calling RQ Worker.
:param args_dict: The arguments supplied by the user. In the case of spfy web-app, this is used to determine which analysis options were set.
:param pickled_dictionary: location of the .p pickled dictionary object. This is supplied by the enqueue call in spfy.py
:param gene_dict: optionally, if using this to test via cli, you can supply the actual dictionary object.
:return: json representation of the results, as required by the front-end.
'''

gene_dict = pickle.load(open(pickled_dictionary, 'rb'))
# this converts our dictionary structure into json and adds metadata (filename, etc.)
json_r = json_return(args_dict, gene_dict)
log.debug('First parse into json_r: ' + str(json_r))
# if looking for only serotype, skip this step
if isinstance(gene_dict, str): # For the tests.
gene_dict = pickle.load(open(gene_dict, 'rb'))
# Convert the old ECTYper's dictionary structure into list and adds metadata (filename, etc.).
json_r = json_return(gene_dict, args_dict)
# For VF/AMR, find widest gene matched. Strip shorter matches.
if args_dict['options']['vf'] or args_dict['options']['amr']:
json_r = check_alleles(json_r)
log.debug('After checking alleles json_r: ' + str(json_r))
# check if there is an analysis module that has failed in the result
# Check if there is an analysis module that has failed in the result.
if has_failed(json_r):
# If failed, return.
return handle_failed(json_r, args_dict)
else:
return json_r
# Everything worked, cast result into a model.
# model = model_vf(json_r)
# return model_to_json(model)

def display_subtyping(pickled_result, args_dict=None):
result = unpickle(pickled_result)
if isinstance(result, dict):
# VF
list_return = beautify(gene_dict=result, args_dict=args_dict)
assert isinstance(list_return, list)
model = model_vf(list_return)
return model_to_json(model)
elif isinstance(result, list):
# Serotyping
return model_to_json(result)
else:
raise Exception("beautify() could not handle pickled file: {0}.".format(pickled_result))
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,12 +1,40 @@
import cPickle as pickle
from rdflib import BNode, Literal, Graph
from modules.turtleGrapher.turtle_utils import generate_uri as gu, generate_hash, link_uris
from modules.turtleGrapher.turtle_grapher import generate_graph
from modules.blazeUploader.upload_graph import queue_upload
from middleware.graphers.turtle_utils import generate_uri as gu, generate_hash, link_uris
from middleware.graphers.turtle_grapher import generate_graph
from middleware.blazegraph.upload_graph import queue_upload
from modules.PanPredic.pan_utils import contig_name_parse
from middleware.models import SubtypingResult, unpickle
# working with Serotype, Antimicrobial Resistance, & Virulence Factor data
# structures

def _graph_subtyping(graph, model, uriIsolate):
# Convert the model to a graph.
# struct = model.to_struct()
rows_list = model
for row in rows_list:
o_type, h_type = row['hitname'].split(':')
graph.add((
uriIsolate,
gu('ge:0001076'),
Literal(o_type)
))
graph.add((
uriIsolate,
gu('ge:0001077'),
Literal(h_type)
))
return graph

def model_to_graph(graph, model, uriIsolate):
# Validate the model submitted before processing.
# model.validate()
# Conversion.
if isinstance(model, list):
return _graph_subtyping(graph, model, uriIsolate)
else:
raise Exception('model_to_graph() called for a model without a handler.')

def parse_serotype(graph, serotyper_dict, uriIsolate):
if 'O type' in serotyper_dict:
graph.add((uriIsolate, gu('ge:0001076'),
Expand Down Expand Up @@ -74,6 +102,10 @@ def parse_gene_dict(graph, gene_dict, uriGenome, geneType):
# some gene names, esp those which are effectively a description,
# have spaces
gene_name = gene_record['GENE_NAME'].replace(' ', '_')
# Workaround to assume all eae alleles are just eae.
# See https://github.com/superphy/spfy/pull/274
if gene_name.startswith('eae'):
gene_name = 'eae'
uriGene = gu(':' + gene_name)
# define the object type of the gene
graph.add((uriGene, gu('rdf:type'), gu(':' + geneType)))
Expand Down Expand Up @@ -148,7 +180,7 @@ def parse_gene_dict(graph, gene_dict, uriGenome, geneType):

def generate_datastruct(query_file, id_file, pickled_dictionary):
'''
This is simply to decouple the graph generation code from the
Separates the graph generation code from the
upload code. In RQ backend, the datastruct_savvy() method is called
where-as in savvy.py (without RQ or Blazegraph) only compute_datastruct()
is called. The return type must be the same in datastruct_savvy to
Expand All @@ -168,22 +200,29 @@ def generate_datastruct(query_file, id_file, pickled_dictionary):
spfyid = int(l)
uriIsolate = gu(':spfy' + str(spfyid))

# results dict retrieval
results_dict = pickle.load(open(pickled_dictionary, 'rb'))

# graphing functions
for key in results_dict.keys():
if key == 'Serotype':
graph = parse_serotype(graph,results_dict['Serotype'],uriIsolate)
elif key == 'Virulence Factors':
graph = parse_gene_dict(graph, results_dict['Virulence Factors'], uriGenome, 'VirulenceFactor')
elif key == 'Antimicrobial Resistance':
graph = parse_gene_dict(graph, results_dict['Antimicrobial Resistance'], uriGenome,
'AntimicrobialResistanceGene')
#elif key == 'PanGenomeRegion':
# graph = parse_gene_dict(graph, results_dict[key], uriGenome, key)

return graph
# Unpickle.
results = unpickle(pickled_dictionary)
# Ensure this isn't empty.
assert results
# Check if we have a model or a dictionary.
if isinstance(results, dict):
# graphing functions
for key in results:
if key == 'Serotype':
graph = parse_serotype(graph,results['Serotype'],uriIsolate)
elif key == 'Virulence Factors':
graph = parse_gene_dict(graph, results['Virulence Factors'], uriGenome, 'VirulenceFactor')
elif key == 'Antimicrobial Resistance':
graph = parse_gene_dict(graph, results['Antimicrobial Resistance'], uriGenome,
'AntimicrobialResistanceGene')
else:
raise Exception("generate_datastruct() failed to find key for query_file: {0}, pickled_dictionary: {1}, with results dictionary: {2}".format(query_file, pickled_dictionary, str(results)))
return graph
elif isinstance(results, list):
graph = model_to_graph(graph, results, uriIsolate)
return graph
else:
raise Exception("generate_datastruct() could not handle pickled file: {0}.".format(pickled_dictionary))

def datastruct_savvy(query_file, id_file, pickled_dictionary):
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
# to data structure(rdf triple organization) of the modules you're dev'ing

import config
from modules.turtleGrapher.turtle_utils import generate_hash, generate_uri as gu, link_uris
from modules.blazeUploader.upload_graph import queue_upload
from modules.turtleGrapher.turtle_utils import actual_filename
from middleware.graphers.turtle_utils import generate_hash, generate_uri as gu, link_uris
from middleware.blazegraph.upload_graph import queue_upload
from middleware.graphers.turtle_utils import actual_filename
from rdflib import Namespace, Graph, Literal, plugin
from Bio import SeqIO
from os.path import basename
Expand Down
File renamed without changes.
Loading

0 comments on commit fc4a7a0

Please sign in to comment.