Skip to content

Commit

Permalink
add autolabels, full_texts, and csl_citations
Browse files Browse the repository at this point in the history
  • Loading branch information
tomlue committed Apr 16, 2024
1 parent 841e65e commit 203385a
Showing 1 changed file with 22 additions and 1 deletion.
23 changes: 22 additions & 1 deletion sysrev/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,13 +111,33 @@ def sync(self, client, project_id):
article_data_df = pd.DataFrame(article_data)
article_data_df['resolve'] = article_data_df['resolve'].apply(json.dumps)

article_info = []
for article_id in tqdm.tqdm(article_data_df['article-id'], total=n_articles):
article_info.append(client.get_article_info(project_id, article_id))

full_texts = pd.DataFrame([{**ft} for a in article_info for ft in a['article'].get('full-texts', []) ])
full_texts.columns = [col.split('/')[-1] for col in full_texts.columns]

auto_labels = pd.DataFrame([
{**{'article-id': a['article'].get('article-id'), 'label-id': label_id}, **details} for a in article_info
for label_id, details in a['article'].get('auto-labels', {}).items() ])
auto_labels['answer'] = auto_labels['answer'].apply(json.dumps)

csl_citations = pd.DataFrame([
{**{k: json.dumps(v) if isinstance(v, (dict, list)) else v for k, v in item['itemData'].items()},
'article-id': a['article'].get('article-id')}
for a in article_info for item in a['article'].get('csl-citation', {}).get('citationItems', [])])

# write everything to .sr/sr.sqlite
conn = sqlite3.connect('.sr/sr.sqlite')

# Writing data to tables
labels_df.to_sql('labels', conn, if_exists='replace', index=False)
article_label_df.to_sql('article_label', conn, if_exists='replace', index=False)
article_data_df.to_sql('article_data', conn, if_exists='replace', index=False)
full_texts.to_sql('full_texts', conn, if_exists='replace', index=False)
auto_labels.to_sql('auto_labels', conn, if_exists='replace', index=False)
csl_citations.to_sql('csl_citations', conn, if_exists='replace', index=False)

conn.close()
class Client():
Expand Down Expand Up @@ -185,7 +205,8 @@ def get_article_info(self, project_id, article_id):
endpoint = f"{self.base_url}/api-json/article-info/{article_id}"
headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
body = {"project-id": project_id,}
return requests.get(endpoint, headers=headers, json=body)
response = requests.get(endpoint, headers=headers, json=body)
return response.json()['result']

def upload_jsonlines(self, file_path, project_id):
url = f"{self.base_url}/api-json/import-files/{project_id}"
Expand Down

0 comments on commit 203385a

Please sign in to comment.