Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support multiple names for gnr_resolve(), Add POST. Fixes #12. Resolution from TNRS Fixes #8. #19

Merged
merged 2 commits into from
Mar 30, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pytaxize/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
6 6971717 Apis nigrocincta Species]
'''

from .tnrs import tnrs_resolve
from .gnr import gnr_datasources, gnr_resolve
from .gni import gni_parse, gni_search, gni_details
from .col import col_children, col_downstream, col_search
Expand Down
48 changes: 35 additions & 13 deletions pytaxize/gnr.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,9 @@ def gnr_datasources(todf=True):
return df

def gnr_resolve(names='Homo sapiens', source=None, format='json', resolve_once='false',
with_context='false', best_match_only='false', header_only='false', preferred_data_sources='false'):
with_context='false', best_match_only='false', header_only='false', preferred_data_sources='false', http='get'):
'''
Uses the Global Names Resolver to resolve scientific names

:param names: List of taxonomic names
:param source: Source to pull from, one of x, y, z
:param format: One of json or xml
Expand All @@ -64,31 +63,54 @@ def gnr_resolve(names='Homo sapiens', source=None, format='json', resolve_once='
:param best_match_only: Logical, if true (default) return the best match only
:param header_only: Return header only, logical
:param preferred_data_sources: Return only preferred data sources.

:param http: The HTTP method to use, one of "get" or "post". Default="get"
Usage:
>>> import pytaxize
>>> pytaxize.gnr_resolve('Helianthus annus')
[{u'classification_path': u'', u'data_source_title': u'EOL', u'match_type': 1, u'score': 0.988, u'url': u'http://eol.org/pages/468106/names/synonyms', u'classification_path_ranks': u'', u'name_string': u'Helianthus annus', u'prescore': u'3|0|0', u'canonical_form': u'Helianthus annus', u'classification_path_ids': u'', u'local_id': u'468106', u'data_source_id': 12, u'taxon_id': u's_5106367', u'gni_uuid': u'f5674e32-00cc-57e3-b632-6a0b89fa4df4'}, {u'classification_path': u'|Helianthus annus', u'data_source_title': u'uBio NameBank', u'match_type': 1, u'score': 0.988, u'url': u'http://www.ubio.org/browser/details.php?namebankID=10130157', u'classification_path_ranks': u'kingdom|', u'name_string': u'Helianthus annus', u'global_id': u'urn:lsid:ubio.org:namebank:10130157', u'prescore': u'3|0|0', u'canonical_form': u'Helianthus annus', u'classification_path_ids': u'', u'local_id': u'urn:lsid:ubio.org:namebank:10130157', u'data_source_id': 169, u'taxon_id': u'102910884', u'gni_uuid': u'f5674e32-00cc-57e3-b632-6a0b89fa4df4'}, {u'classification_path': u'', u'data_source_title': u'EOL', u'match_type': 2, u'score': 0.988, u'url': u'http://eol.org/pages/468106', u'classification_path_ranks': u'', u'name_string': u'Helianthus annus L.', u'prescore': u'3|0|0', u'canonical_form': u'Helianthus annus', u'classification_path_ids': u'', u'local_id': u'468106', u'data_source_id': 12, u'taxon_id': u'20584982', u'gni_uuid': u'e757b3c1-421f-5bb9-a27f-d56259baaf3d'}]
>>> pytaxize.gnr_resolve(['Helianthus annus','Poa annua'])
'''
url = 'http://resolver.globalnames.org/name_resolvers'
if(names.__class__.__name__ == 'list'):
names = "|".join(names)
else:
pass
payload = {'names': names, 'data_source_ids': source, 'format': format,
payload = {'data_source_ids': source, 'format': format,
'resolve_once': resolve_once, 'with_context': with_context,
'best_match_only': best_match_only, 'header_only': header_only,
'preferred_data_sources': preferred_data_sources}
out = requests.get(url, params = payload)
out.raise_for_status()
result_json = out.json()
# Return [] for each query with no returned result
if names.__class__.__name__ == 'list':
if len(names) > 300 and http == 'get':
http = 'post'
else:
names = "|".join(names)
payload['names'] = names
else:
payload['names'] = names
if http == 'get':
out = requests.get(url, params = payload)
out.raise_for_status()
result_json = out.json()
else:
if names.__class__.__name__ != 'list':
out = requests.post(url, params = payload)
out.raise_for_status()
result_json = out.json()
else:
with open('names_list.txt', 'wb') as f:
for name in names:
f.write(name+"\n")
f.close()
out = requests.post(url, params = payload, files = {'file': open('names_list.txt', 'rb')} )
out.raise_for_status()
result_json = out.json()
while result_json['status'] == 'working':
result_url = result_json['url']
time.sleep(10)
out = requests.get(url=result_url)
result_json = out.json()

data = []
for each_result in result_json['data']:
data.append( each_result['results'] if 'results' in each_result else [])
return data

if __name__ == "__main__":
import doctest
doctest.testmod()
46 changes: 46 additions & 0 deletions pytaxize/tnrs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import sys
import requests
import json

class NoResultException(Exception):
pass

def tnrs_resolve(names='Homo sapiens', retrieve='all'):
'''
Uses the Global Names Resolver to resolve scientific names
:param names: List of taxonomic names
:param retrieve: all OR best
Usage:
>>> import pytaxize
>>> pytaxize.tnrs_resolve('Helianthus annus')
[{u'selected': True, u'infraspecific2Rank': u'', u'family': u'Asteraceae', u'infraspecific1EpithetScore': u'', u'infraspecific1Rank': u'', u'nameScientific': u'Helianthus annuus', u'speciesMatched': u'annuus', u'authorScore': u'', u'group': u'0', u'author': u'', u'acceptance': u'No opinion', u'authorAttributed': u'L.', u'unmatched': u"'", u'nameSubmitted': u"Helianthus annus'", u'genusScore': u'1', u'matchedFamilyScore': u'', u'infraspecific2EpithetScore': u'', u'infraspecific1Epithet': u'', u'infraspecific2Epithet': u'', u'familySubmitted': u'', u'epithet': u'annuus', u'acceptedName': u'Helianthus annuus', u'overall': u'0.8694502693699', u'speciesMatchedScore': u'0.83333333333333', u'matchedFamily': u'', u'acceptedNameUrl': u'http://www.tropicos.org/Name/2700851', u'epithetScore': u'0.83333333333333', u'annotation': u'', u'url': u'http://www.tropicos.org/Name/2700851', u'scientificScore': u'0.9694502693699', u'acceptedAuthor': u'L.', u'genus': u'Helianthus'}, {u'selected': False, u'infraspecific2Rank': u'', u'family': u'Violaceae', u'infraspecific1EpithetScore': u'', u'infraspecific1Rank': u'', u'nameScientific': u'Hybanthus nanus', u'speciesMatched': u'nanus', u'authorScore': u'', u'group': u'0', u'author': u'', u'acceptance': u'No opinion', u'authorAttributed': u'(A. St.-Hil.) Paula-Souza', u'unmatched': u"'", u'nameSubmitted': u"Helianthus annus'", u'genusScore': u'0.7', u'matchedFamilyScore': u'', u'infraspecific2EpithetScore': u'', u'infraspecific1Epithet': u'', u'infraspecific2Epithet': u'', u'familySubmitted': u'', u'epithet': u'nanus', u'acceptedName': u'Hybanthus nanus', u'overall': u'0.67149326622765', u'speciesMatchedScore': u'0.8', u'matchedFamily': u'', u'acceptedNameUrl': u'http://www.tropicos.org/Name/100000197', u'epithetScore': u'0.8', u'annotation': u'', u'url': u'http://www.tropicos.org/Name/100000197', u'scientificScore': u'0.77149326622765', u'acceptedAuthor': u'(A. St.-Hil.) Paula-Souza', u'genus': u'Hybanthus'}]
>>> pytaxize.tnrs_resolve(['Helianthus annus','Poa annua'])
'''
url = 'http://tnrs.iplantc.org/tnrsm-svc/matchNames'
if(names.__class__.__name__ == 'list'):
names = ",".join(names)
else:
pass
payload = {'retrieve': retrieve, 'names': names }
out = requests.get(url, params = payload)
out.raise_for_status()
result_json = out.json()

data = []
#List to accumulate results for each queried name
single_list=[]
index = result_json['items'][0]['group']
for each_result in result_json['items']:
if each_result['group'] == index:
single_list.append(each_result)
else:
data.append(single_list)
single_list = []
single_list.append(each_result)
index = each_result['group']
data.append(single_list)
return data

if __name__ == "__main__":
import doctest
doctest.testmod()