Skip to content

Commit

Permalink
Merge pull request #142 from McTavishLab/pylint
Browse files Browse the repository at this point in the history
Pylint
  • Loading branch information
snacktavish authored Sep 2, 2020
2 parents d03f856 + ff061b7 commit 34c1152
Show file tree
Hide file tree
Showing 14 changed files with 301 additions and 272 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,11 @@ and tools for

Physcraper relies on:

[Dendropy](https://dendropy.org/primer/index.html) *Sukumaran, J and
[Dendropy](https://dendropy.org/primer/index.html) *Sukumaran, J and
MT Holder. 2010. DendroPy: a Python library for phylogenetic computing.
Bioinformatics 26: 1569-1571*.

[The Open Tree of Life
[The Open Tree of Life
Project](https://tree.opentreeoflife.org/opentree/argus/opentree12.3@ott93302)
*Open Tree of Life, Benjamin Redelings, Luna Luisa Sanchez Reyes, Karen
A. Cranston, Jim Allman, Mark T. Holder, & Emily Jane McTavish. (2019).
Expand Down
2 changes: 1 addition & 1 deletion physcraper/aligntreetax.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,7 +383,7 @@ def read_in_tree(self, tree, tree_schema=None):
elif isinstance(tree, datamodel.treemodel.Tree):
self.tre = tree
assert isinstance(self.tre, datamodel.treemodel.Tree)

def read_in_aln(self, alignment, aln_schema):
"""Reads in an alignment to the object taxon namespace."""
assert isinstance(alignment, str)
Expand Down
49 changes: 16 additions & 33 deletions physcraper/helpers.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,9 @@
"""Some minor handy functions"""
import os
import sys
import subprocess
import contextlib

if sys.version_info[0] < 3:
str_type = unicode
else:
str_type = bytes


_DEBUG = 0
Expand All @@ -17,30 +14,29 @@ def debug(msg):
print(msg)



def get_raxml_ex():
if subprocess.check_call(["which", "raxmlHPC"]) == 0:
rax_ex = "raxmlHPC"
else:
sys.stderr.write("Did not find raxml executable. Exiting \n")
sys.exit()
return rax_ex
"""Check location of RaxML exectable"""
if subprocess.check_call(["which", "raxmlHPC"]) == 0:
rax_ex = "raxmlHPC"
return rax_ex
sys.stderr.write("Did not find raxml executable. Exiting \n")
sys.exit()



def to_string(input):
if isinstance(input, str):
return input
elif isinstance(input, str_type):
output = input.decode('ascii','replace')
def to_string(inputstr):
"""Coerce to string"""
if isinstance(inputstr, bytes):
output = inputstr.decode('ascii', 'replace')
return output
else:
return input
return inputstr


@contextlib.contextmanager
def cd(path):
"""Change directories and return to original directory"""
# print 'initially inside {0}'.format(os.getcwd())
CWD = os.getcwd()
curr = os.getcwd()
os.chdir(path)
# print 'inside {0}'.format(os.getcwd())
try:
Expand All @@ -49,18 +45,7 @@ def cd(path):
print('Exception caught: ', sys.exc_info()[0])
finally:
# print 'finally inside {0}'.format(os.getcwd())
os.chdir(CWD)


#def generate_from_run(workdir,
# seqaln='physcraper.fas',
# mattype='fasta',
# configfi='config.out',
# treefile='physcraper.tre',
# schema_trf='newick',
# search_taxon = 'mrca.txt'):


os.chdir(curr)

def standardize_label(item):
"""Make sure that the tip names are unicode.
Expand All @@ -72,5 +57,3 @@ def standardize_label(item):
"""
item_edit = item.replace(" ", "_")
return item_edit


47 changes: 19 additions & 28 deletions physcraper/ids.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,9 @@
"""Link together NCBI and Open Tree identifiers and names, with Gen Bank information for updated sequences"""
import sys
import os
import json
import time


if sys.version_info < (3,):
from urllib2 import HTTPError
else:
from urllib.error import HTTPError

from urllib.error import HTTPError
from Bio import Entrez

from physcraper import ncbi_data_parser, ConfigObj # is the ncbi data parser class and associated functions
from physcraper.helpers import debug

Expand All @@ -19,7 +12,7 @@



class IdDicts(object):
class IdDicts():
"""Class contains different taxonomic identifiers and helps to find the corresponding ids between ncbi and OToL
To build the class the following is needed:
Expand Down Expand Up @@ -61,13 +54,14 @@ class IdDicts(object):
* **Optional**:
* depending on blasting method:
* self.ncbi_parser: for local blast, initializes the ncbi_parser class, that contains information about rank and identifiers
* depending on blasting method:
* self.ncbi_parser: for local blast,
initializes the ncbi_parser class, that contains information about rank and identifiers
"""

def __init__(self, configfile = None, workdir=None):
def __init__(self, configfile=None):
"""Generates a series of name disambiguation dicts"""
if configfile == None:
if configfile is None:
self.config = ConfigObj()
elif isinstance(configfile, ConfigObj):
self.config = configfile
Expand All @@ -82,9 +76,9 @@ def __init__(self, configfile = None, workdir=None):
self.ott_to_name = {} # used in add_otu to get name from otuId
self.acc_ncbi_dict = {} # filled by ncbi_parser (by subprocess in earlier versions of the code).
self.spn_to_ncbiid = {} # spn to ncbi_id, it's only fed by the ncbi_data_parser, but makes it faster
self.ncbiid_to_spn = {} #TODO when is this generated? MK: well, here. it is filled with information from genbank to speed up translation between ncbi_taxon_ids and names. similar to acc_ncbi_dict and spn_to_ncbiid.
tax_folder = self.config.taxonomy_dir
fi = open(self.config.ott_ncbi) # This is in the taxonomy folder of the repo, needs to be updated by devs when OpenTree taxonomy changes.
self.ncbiid_to_spn = {}
fi = open(self.config.ott_ncbi)
# This is in the taxonomy folder of the repo, needs to be updated by devs when OpenTree taxonomy changes.
for lin in fi:
lii = lin.split(",")
self.ott_to_ncbi[int(lii[0])] = int(lii[1])
Expand All @@ -107,7 +101,7 @@ def __init__(self, configfile = None, workdir=None):
def get_ncbiid_from_acc(self, acc):
'''checks local dicts, and then runs eftech to get ncbi id for accession'''
gb_id = acc
if gb_id in self.acc_ncbi_dict:#TODO if the accession number and tax id are here, does that mean the name is in ncbiid_to_spn?
if gb_id in self.acc_ncbi_dict:
ncbi_id = self.acc_ncbi_dict[gb_id]
elif gb_id in self.acc_tax_seq_dict:
ncbi_id = self.acc_tax_seq_dict[gb_id]["^ncbi:taxon"]
Expand All @@ -121,6 +115,7 @@ def get_ncbiid_from_acc(self, acc):

#removed function find_tax_id because it wasn't being used
def get_tax_seq_acc(self, acc):
"""Pulls the taxon ID and the full sequences from NCBI"""
if not os.path.exists(self.full_seq_path):
os.makedirs(self.full_seq_path)
gb_id = acc
Expand All @@ -136,23 +131,19 @@ def get_tax_seq_acc(self, acc):
elif os.path.exists(seq_path):
fi = open(seq_path)
header = fi.readline().strip('>')
#try:
assert(header.split()[1].startswith('taxname:'))
assert header.split()[1].startswith('taxname:')
tax_name = header.split()[1].strip('taxname:')
ncbi_id = header.split()[2].strip('ncbi:')
seq = "".join(fi.readlines())
# except IndexError:
# print("IndexError")
# pass
if seq == None:
if seq is None:
read_handle = self.entrez_efetch(gb_id)
tax_name = ncbi_data_parser.get_ncbi_tax_name(read_handle)
ncbi_id = ncbi_data_parser.get_ncbi_tax_id(read_handle)
ncbi_id = ncbi_data_parser.get_ncbi_tax_id(read_handle)
seq = read_handle[0][u'GBSeq_sequence']
tax_name = tax_name.replace(" ","_") #TODO check that searches are using names without spaces
tax_name = tax_name.replace(" ", "_") #TODO check that searches are using names without spaces
self.ncbiid_to_spn[ncbi_id] = tax_name
self.acc_ncbi_dict[gb_id] = ncbi_id
self.acc_tax_seq_dict[gb_id] = {'taxname':tax_name, "^ncbi:taxon":ncbi_id, 'seq':seq} #This is going to be a memory hog...
self.acc_tax_seq_dict[gb_id] = {'taxname':tax_name, "^ncbi:taxon":ncbi_id, 'seq':seq}#ToDo memory hog
with open(seq_path, 'w') as fi:
fi.write("> {} taxname:{} ncbi:{}\n".format(gb_id, tax_name, ncbi_id))
fi.write(self.acc_tax_seq_dict[gb_id]['seq'])
Expand Down Expand Up @@ -209,7 +200,7 @@ def entrez_efetch(self, gb_id):
if i < tries - 1: # i is zero indexed
continue
else:
raise
raise e
# break
assert handle is not None, ("your handle file to access data from efetch does not exist. "
"Likely an issue with the internet connection of ncbi. Try rerun...")
Expand Down
30 changes: 9 additions & 21 deletions physcraper/opentree_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,9 +136,8 @@ def check_if_ottid_in_synth(ottid):
elif r.status_code == 502:
sys.stderr.write("Bad OpenTree taxon ID: {}".format(ottid))
return 0
else:
sys.stderr.write("unexpected status code from node_info call: {}".format(r.status_code))
return 0
sys.stderr.write("unexpected status code from node_info call: {}".format(r.status_code))
return 0
except requests.ConnectionError:
sys.stderr.write("Connection Error - coud not get taxon information from OpenTree\n")

Expand Down Expand Up @@ -235,10 +234,9 @@ def get_citations_from_json(synth_response, citations_file):
# use append

def conflict_tree(inputtree, otu_dict):
"""Write out a tree with labels taht work for the OPenTree Conflict API
"""Write out a tree with labels that work for the OPenTree Conflict API
"""
tmp_tree = copy.deepcopy(inputtree)
new_names = set()
i = 1
for node in tmp_tree:
i += 1
Expand All @@ -257,9 +255,6 @@ def get_tree_from_synth(ott_ids, label_format="name", citation="cites.txt"):
get_citations_from_json(synth_json.response_dict, citation)
return synth_json.tree




def get_tree_from_study(study_id, tree_id, label_format="ot:originallabel"):
"""Create a dendropy Tree object from OpenTree data.
:param study_id: OpenTree Study Id
Expand All @@ -275,8 +270,6 @@ def get_tree_from_study(study_id, tree_id, label_format="ot:originallabel"):
cites = study_nexson['nexml']['^ot:studyPublicationReference']
return tree_obj, cites



# ATT is a dumb acronym for Alignment Tree Taxa object
def generate_ATT_from_phylesystem(alnfile,
aln_schema,
Expand Down Expand Up @@ -317,9 +310,9 @@ def generate_ATT_from_phylesystem(alnfile,
orig_lab_to_otu = {}
treed_taxa = {}
ingroup_otus = nexson_helpers.get_subtree_otus(study_nexson,
tree_id=tree_id,
subtree_id="ingroup",
return_format="otu_id")
tree_id=tree_id,
subtree_id="ingroup",
return_format="otu_id")
if not ingroup_otus:
sys.stdout.write("No ingroup annotation found in tree; using all taxa.\n \
Please update tree annotation through OpenTree curation app.\n")
Expand Down Expand Up @@ -491,14 +484,9 @@ def OtuJsonDict(id_to_spn, id_dict):
if info:
ottid, ottname, ncbiid = info
if not info:
ncbi = NCBITaxa()
name2taxid = ncbi.get_name_translator([spn])
if len(name2taxid.items()) >= 1:
ncbiid = name2taxid.items()[0][1][0]
else:
sys.stderr.write("match to taxon {} not found in open tree taxonomy or NCBI. "
"Proceeding without taxon info\n".format(spn))
nosp.append(spn)
sys.stderr.write("match to taxon {} not found in open tree taxonomy or NCBI. "
"Proceeding without taxon info\n".format(spn))
nosp.append(spn)
ncbi_spn = None
if ncbiid is not None:
ncbi_spn = spn
Expand Down
Loading

0 comments on commit 34c1152

Please sign in to comment.