Merge pull request #142 from McTavishLab/pylint

Pylint
McTavishLab · Sep 2, 2020 · 34c1152 · 34c1152
2 parents d03f856 + ff061b7
commit 34c1152
Show file tree

Hide file tree

Showing 14 changed files with 301 additions and 272 deletions.
diff --git a/README.md b/README.md
@@ -45,11 +45,11 @@ and tools for
 
 Physcraper relies on:
 
-• [Dendropy](https://dendropy.org/primer/index.html) *Sukumaran, J and
+[Dendropy](https://dendropy.org/primer/index.html) *Sukumaran, J and
 MT Holder. 2010. DendroPy: a Python library for phylogenetic computing.
 Bioinformatics 26: 1569-1571*.
 
-• [The Open Tree of Life
+[The Open Tree of Life
 Project](https://tree.opentreeoflife.org/opentree/argus/opentree12.3@ott93302)
 *Open Tree of Life, Benjamin Redelings, Luna Luisa Sanchez Reyes, Karen
 A. Cranston, Jim Allman, Mark T. Holder, & Emily Jane McTavish. (2019).

diff --git a/physcraper/aligntreetax.py b/physcraper/aligntreetax.py
@@ -383,7 +383,7 @@ def read_in_tree(self, tree, tree_schema=None):
         elif isinstance(tree, datamodel.treemodel.Tree):
             self.tre = tree
         assert isinstance(self.tre, datamodel.treemodel.Tree)
-        
+
     def read_in_aln(self, alignment, aln_schema):
         """Reads in an alignment to the object taxon namespace."""
         assert isinstance(alignment, str)

diff --git a/physcraper/helpers.py b/physcraper/helpers.py
@@ -1,12 +1,9 @@
+"""Some minor handy functions"""
 import os
 import sys
 import subprocess
 import contextlib
 
-if sys.version_info[0] < 3:
-    str_type = unicode
-else:
-    str_type = bytes 
 
 
 _DEBUG = 0
@@ -17,30 +14,29 @@ def debug(msg):
         print(msg)
 
 
-
 def get_raxml_ex():
-            if subprocess.check_call(["which", "raxmlHPC"]) == 0:
-                rax_ex = "raxmlHPC"
-            else:
-                sys.stderr.write("Did not find raxml executable. Exiting \n")
-                sys.exit()
-            return rax_ex
+    """Check location of RaxML exectable"""
+    if subprocess.check_call(["which", "raxmlHPC"]) == 0:
+        rax_ex = "raxmlHPC"
+        return rax_ex
+    sys.stderr.write("Did not find raxml executable. Exiting \n")
+    sys.exit()
+
 
 
-def to_string(input):
-    if isinstance(input, str):
-        return input
-    elif isinstance(input, str_type):
-        output = input.decode('ascii','replace')
+def to_string(inputstr):
+    """Coerce to string"""
+    if isinstance(inputstr, bytes):
+        output = inputstr.decode('ascii', 'replace')
         return output
-    else:
-        return input
+    return inputstr
 
 
 @contextlib.contextmanager
 def cd(path):
+    """Change directories and return to original directory"""
     # print 'initially inside {0}'.format(os.getcwd())
-    CWD = os.getcwd()
+    curr = os.getcwd()
     os.chdir(path)
     # print 'inside {0}'.format(os.getcwd())
     try:
@@ -49,18 +45,7 @@ def cd(path):
         print('Exception caught: ', sys.exc_info()[0])
     finally:
         # print 'finally inside {0}'.format(os.getcwd())
-        os.chdir(CWD)
-
-
-#def generate_from_run(workdir,
-#                      seqaln='physcraper.fas',
-                      # mattype='fasta',
-                      # configfi='config.out',
-                      # treefile='physcraper.tre',
-                      # schema_trf='newick',
-                      # search_taxon = 'mrca.txt'):
-
-
+        os.chdir(curr)
 
 def standardize_label(item):
     """Make sure that the tip names are unicode.
@@ -72,5 +57,3 @@ def standardize_label(item):
     """
     item_edit = item.replace(" ", "_")
     return item_edit
-
-
diff --git a/physcraper/ids.py b/physcraper/ids.py
@@ -1,16 +1,9 @@
+"""Link together NCBI and Open Tree identifiers and names, with Gen Bank information for updated sequences"""
 import sys
 import os
-import json
 import time
-
-
-if sys.version_info < (3,):
-    from urllib2 import HTTPError
-else:
-    from urllib.error import HTTPError
-
+from urllib.error import HTTPError
 from Bio import Entrez
-
 from physcraper import ncbi_data_parser, ConfigObj  # is the ncbi data parser class and associated functions
 from physcraper.helpers import debug
 
@@ -19,7 +12,7 @@
 
 
 
-class IdDicts(object):
+class IdDicts():
     """Class contains different taxonomic identifiers and helps to find the corresponding ids between ncbi and OToL
 
         To build the class the following is needed:
@@ -61,13 +54,14 @@ class IdDicts(object):
 
           * **Optional**:
 
-              * depending on blasting method:
-               * self.ncbi_parser: for local blast, initializes the ncbi_parser class, that contains information about rank and identifiers
+            * depending on blasting method:
+            * self.ncbi_parser: for local blast,
+               initializes the ncbi_parser class, that contains information about rank and identifiers
     """
 
-    def __init__(self, configfile = None, workdir=None):
+    def __init__(self, configfile=None):
         """Generates a series of name disambiguation dicts"""
-        if configfile == None:
+        if configfile is None:
             self.config = ConfigObj()
         elif isinstance(configfile, ConfigObj):
             self.config = configfile
@@ -82,9 +76,9 @@ def __init__(self, configfile = None, workdir=None):
         self.ott_to_name = {}  # used in add_otu to get name from otuId
         self.acc_ncbi_dict = {}  # filled by ncbi_parser (by subprocess in earlier versions of the code).
         self.spn_to_ncbiid = {}  # spn to ncbi_id, it's only fed by the ncbi_data_parser, but makes it faster
-        self.ncbiid_to_spn = {} #TODO when is this generated? MK: well, here. it is filled with information from genbank to speed up translation between ncbi_taxon_ids and names. similar to  acc_ncbi_dict and spn_to_ncbiid.
-        tax_folder = self.config.taxonomy_dir
-        fi = open(self.config.ott_ncbi)  # This is in the taxonomy folder of the repo, needs to be updated by devs when OpenTree taxonomy changes.
+        self.ncbiid_to_spn = {}
+        fi = open(self.config.ott_ncbi)
+        # This is in the taxonomy folder of the repo, needs to be updated by devs when OpenTree taxonomy changes.
         for lin in fi:
             lii = lin.split(",")
             self.ott_to_ncbi[int(lii[0])] = int(lii[1])
@@ -107,7 +101,7 @@ def __init__(self, configfile = None, workdir=None):
     def get_ncbiid_from_acc(self, acc):
         '''checks local dicts, and then runs eftech to get ncbi id for accession'''
         gb_id = acc
-        if gb_id in self.acc_ncbi_dict:#TODO if the accession number and tax id are here, does that mean the name is in ncbiid_to_spn?
+        if gb_id in self.acc_ncbi_dict:
             ncbi_id = self.acc_ncbi_dict[gb_id]
         elif gb_id in self.acc_tax_seq_dict:
             ncbi_id = self.acc_tax_seq_dict[gb_id]["^ncbi:taxon"]
@@ -121,6 +115,7 @@ def get_ncbiid_from_acc(self, acc):
 
  #removed function find_tax_id because it wasn't being used
     def get_tax_seq_acc(self, acc):
+        """Pulls the taxon ID and the full sequences from NCBI"""
         if not os.path.exists(self.full_seq_path):
             os.makedirs(self.full_seq_path)
         gb_id = acc
@@ -136,23 +131,19 @@ def get_tax_seq_acc(self, acc):
         elif os.path.exists(seq_path):
             fi = open(seq_path)
             header = fi.readline().strip('>')
-            #try:
-            assert(header.split()[1].startswith('taxname:'))
+            assert header.split()[1].startswith('taxname:')
             tax_name = header.split()[1].strip('taxname:')
             ncbi_id = header.split()[2].strip('ncbi:')
             seq = "".join(fi.readlines())
-#            except IndexError:
- #               print("IndexError")
-  #              pass
-        if seq == None:
+        if seq is None:
             read_handle = self.entrez_efetch(gb_id)
             tax_name = ncbi_data_parser.get_ncbi_tax_name(read_handle)
-            ncbi_id =  ncbi_data_parser.get_ncbi_tax_id(read_handle)
+            ncbi_id = ncbi_data_parser.get_ncbi_tax_id(read_handle)
             seq = read_handle[0][u'GBSeq_sequence']
-            tax_name = tax_name.replace(" ","_") #TODO check that searches are using names without spaces
+            tax_name = tax_name.replace(" ", "_") #TODO check that searches are using names without spaces
             self.ncbiid_to_spn[ncbi_id] = tax_name
             self.acc_ncbi_dict[gb_id] = ncbi_id
-            self.acc_tax_seq_dict[gb_id] = {'taxname':tax_name, "^ncbi:taxon":ncbi_id, 'seq':seq} #This is going to be a memory hog...
+            self.acc_tax_seq_dict[gb_id] = {'taxname':tax_name, "^ncbi:taxon":ncbi_id, 'seq':seq}#ToDo memory hog
             with open(seq_path, 'w') as fi:
                 fi.write("> {} taxname:{} ncbi:{}\n".format(gb_id, tax_name, ncbi_id))
                 fi.write(self.acc_tax_seq_dict[gb_id]['seq'])
@@ -209,7 +200,7 @@ def entrez_efetch(self, gb_id):
                 if i < tries - 1:  # i is zero indexed
                     continue
                 else:
-                    raise
+                    raise e
             # break
         assert handle is not None, ("your handle file to access data from efetch does not exist. "
                                     "Likely an issue with the internet connection of ncbi. Try rerun...")

diff --git a/physcraper/opentree_helpers.py b/physcraper/opentree_helpers.py
@@ -136,9 +136,8 @@ def check_if_ottid_in_synth(ottid):
         elif r.status_code == 502:
             sys.stderr.write("Bad OpenTree taxon ID: {}".format(ottid))
             return 0
-        else:
-            sys.stderr.write("unexpected status code from node_info call: {}".format(r.status_code))
-            return 0
+        sys.stderr.write("unexpected status code from node_info call: {}".format(r.status_code))
+        return 0
     except requests.ConnectionError:
         sys.stderr.write("Connection Error - coud not get taxon information from OpenTree\n")
 
@@ -235,10 +234,9 @@ def get_citations_from_json(synth_response, citations_file):
 # use append
 
 def conflict_tree(inputtree, otu_dict):
-    """Write out a tree with labels taht work for the OPenTree Conflict API
+    """Write out a tree with labels that work for the OPenTree Conflict API
     """
     tmp_tree = copy.deepcopy(inputtree)
-    new_names = set()
     i = 1
     for node in tmp_tree:
         i += 1
@@ -257,9 +255,6 @@ def get_tree_from_synth(ott_ids, label_format="name", citation="cites.txt"):
     get_citations_from_json(synth_json.response_dict, citation)
     return synth_json.tree
 
-
-
-
 def get_tree_from_study(study_id, tree_id, label_format="ot:originallabel"):
     """Create a dendropy Tree object from OpenTree data.
     :param study_id: OpenTree Study Id
@@ -275,8 +270,6 @@ def get_tree_from_study(study_id, tree_id, label_format="ot:originallabel"):
     cites = study_nexson['nexml']['^ot:studyPublicationReference']
     return tree_obj, cites
 
-
-
 # ATT is a dumb acronym for Alignment Tree Taxa object
 def generate_ATT_from_phylesystem(alnfile,
                                   aln_schema,
@@ -317,9 +310,9 @@ def generate_ATT_from_phylesystem(alnfile,
     orig_lab_to_otu = {}
     treed_taxa = {}
     ingroup_otus = nexson_helpers.get_subtree_otus(study_nexson,
-                                                       tree_id=tree_id,
-                                                       subtree_id="ingroup",
-                                                       return_format="otu_id")
+                                                   tree_id=tree_id,
+                                                   subtree_id="ingroup",
+                                                   return_format="otu_id")
     if not ingroup_otus:
         sys.stdout.write("No ingroup annotation found in tree; using all taxa.\n \
                           Please update tree annotation through OpenTree curation app.\n")
@@ -491,14 +484,9 @@ def OtuJsonDict(id_to_spn, id_dict):
             if info:
                 ottid, ottname, ncbiid = info
             if not info:
-                ncbi = NCBITaxa()
-                name2taxid = ncbi.get_name_translator([spn])
-                if len(name2taxid.items()) >= 1:
-                    ncbiid = name2taxid.items()[0][1][0]
-                else:
-                    sys.stderr.write("match to taxon {} not found in open tree taxonomy or NCBI. "
-                                     "Proceeding without taxon info\n".format(spn))
-                    nosp.append(spn)
+                sys.stderr.write("match to taxon {} not found in open tree taxonomy or NCBI. "
+                                 "Proceeding without taxon info\n".format(spn))
+                nosp.append(spn)
             ncbi_spn = None
             if ncbiid is not None:
                 ncbi_spn = spn