Skip to content

Commit

Permalink
Updated to Python3
Browse files Browse the repository at this point in the history
  • Loading branch information
nole-lin committed Oct 20, 2019
1 parent 0837a7d commit 2461b17
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 8 deletions.
34 changes: 27 additions & 7 deletions PySysrev/funcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,22 +15,42 @@ def getAnnotations(project_id):
data['external_id'] = [x['pmid'] for x in result]
data['sysrev_id'] = [x['article-id'] for x in result]
data['text'] = [x['context']['text-context'] for x in result]
data['start'] = [x['context']['start-offset'] if 'start-offset' in x['context'].keys() else None for x in result]
data['end'] = [x['context']['end-offset'] if 'end-offset' in x['context'].keys() else None for x in result]
data['start'] = [x['context']['start-offset'] if 'start-offset' in list(x['context'].keys()) else None for x in result]
data['end'] = [x['context']['end-offset'] if 'end-offset' in list(x['context'].keys()) else None for x in result]
data['datasource'] = ['pubmed'] * len(result)

df = pandas.DataFrame.from_dict(data)
return df

def processAnnotations(project_id, label):
response = requests.get('https://sysrev.com/web-api/project-annotations?project-id=' + str(project_id))
SYSREV_DATA = response.json()
annotations = [x for x in SYSREV_DATA['result'] if 'start-offset' in x['context'].keys()]

def remove_overlapping_entities(df):
idx_to_remove = []
for text_id in df.text.unique():
all_ranges = []
sub_df = df[df['text'] == text_id][['sysrev_id', 'text', 'start', 'end']]
for index, row in sub_df.iterrows():
r_start = int(row['start'])
r_end = int(row['end'])
if all([True if x not in all_ranges else False for x in range(r_start, r_end)]):
all_ranges.extend(list(range(r_start, r_end)))
else:
idx_to_remove.append(index)
return idx_to_remove

df = getAnnotations(project_id)
df = df.drop_duplicates(subset=['text', 'start', 'end'])
df = df[(df['start'].notnull()) & (df['end'].notnull())]
df = df[df['end'] - df['start'] < 50]
df = df.reset_index(drop=True)
overlapping_idx = remove_overlapping_entities(df)
df = df.drop(df.index[overlapping_idx])
annotations = df.to_dict('records')

def process_annotation(annotation):
return [annotation["context"]["text-context"], {"entities": [(annotation["context"]["start-offset"],annotation["context"]["end-offset"],label)]}]
return [annotation["text"], {"entities": [(int(annotation["start"]), int(annotation["end"]),label)]}]

processed_annotations = map(process_annotation, annotations)
processed_annotations = list(map(process_annotation, annotations))

def combine_annotations(processed_annotations):
combined_annotations = {}
Expand Down
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from setuptools import setup

setup(name='PySysrev',
version='1.0',
version='1.2.1',
description='Gets annotations from Sysrev API',
url='https://github.com/sysrev/PySysrev',
author='nole-lin',
Expand All @@ -12,4 +12,5 @@
'requests',
'pathlib'
],
python_requires='>=3.6',
zip_safe=False)

0 comments on commit 2461b17

Please sign in to comment.