From 8eb806b84804baa201edc363f5c4b0eaabc373d1 Mon Sep 17 00:00:00 2001 From: Thomas Luechtefeld Date: Mon, 3 Jun 2024 08:51:10 -0400 Subject: [PATCH] fix issue with get article info --- setup.py | 2 +- sysrev/client.py | 26 +++++++++++++++++--------- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/setup.py b/setup.py index d840904..cbcbb80 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ setup( name='sysrev', - version='1.3.7', + version='1.3.10', description='get sysrev project data and use the sysrev api', long_description=long_description, long_description_content_type='text/markdown', # Specify the content type here diff --git a/sysrev/client.py b/sysrev/client.py index 998a041..83320ba 100644 --- a/sysrev/client.py +++ b/sysrev/client.py @@ -32,14 +32,21 @@ class Client(): def __init__(self, api_key, base_url="https://www.sysrev.com"): self.api_key = api_key self.base_url = base_url + self.synchronizer = Synchronizer() def sync(self, project_id): - Synchronizer().sync(self, project_id) + self.synchronizer.sync(self, project_id) + + def get(self, endpoint, headers, params): + response = requests.get(endpoint, headers=headers, params=params) + if response.status_code != 200: + raise Exception(f"Request to {endpoint} failed with status code {response.status_code}") + return response def get_project_info(self, project_id): endpoint = f"{self.base_url}/api-json/project-info" headers = {"Authorization": f"Bearer {self.api_key}"} - response = requests.get(endpoint, headers=headers, params={"project-id": project_id}) + response = self.get(endpoint, headers=headers, params={"project-id": project_id}) return response.json() def get_labels(self, project_id): @@ -93,7 +100,7 @@ def get_article_info(self, project_id, article_id): headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"} body = {"project-id": project_id,} response = requests.get(endpoint, headers=headers, json=body) - return response.json()['result'] + return response.json().get('result', None) def upload_jsonlines(self, file_path, project_id): url = f"{self.base_url}/api-json/import-files/{project_id}" @@ -199,7 +206,7 @@ def write_df(self, df, name, db_path='.sr/sr.sqlite'): def sync_article_info(self, client:Client, project_id, article_ids): article_info = [] - for article_id in tqdm.tqdm(article_ids, total=len(article_ids)): + for article_id in tqdm.tqdm(article_ids, total=len(article_ids), desc="Fetching article info"): article_info.append(client.get_article_info(project_id, article_id)) full_texts = pd.DataFrame([{**ft} for a in article_info for ft in a['article'].get('full-texts', []) ]) @@ -228,7 +235,7 @@ def sync_labels(self, client, project_id): self.write_df(labels_df,'labels') # TODO - this could be made more efficient by checking sqlite state and updating the sysrev api - def sync(self, client, project_id): + def sync(self, client : Client, project_id): if not pathlib.Path('.sr/sr.sqlite').exists(): self.create_sqlite_db() @@ -236,21 +243,22 @@ def sync(self, client, project_id): project_info = client.get_project_info(project_id) n_articles = project_info['result']['project']['stats']['articles'] - articles = [resp for resp in tqdm.tqdm(client.fetch_all_articles(project_id), total=n_articles)] + articles = [resp for resp in tqdm.tqdm(client.fetch_all_articles(project_id), total=n_articles, desc="Fetching articles")] article_labels = [a['labels'] for a in articles if a['labels'] is not None] article_labels = [lbl for lbls in article_labels for lbl in lbls] article_label_df = pd.DataFrame(article_labels) article_label_df['answer'] = article_label_df['answer'].apply(json.dumps) + self.write_df(article_label_df,'article_label') article_data = [{k: v for k, v in a.items() if k != 'labels'} for a in articles] article_data_df = pd.DataFrame(article_data) article_data_df['notes'] = article_data_df['notes'].apply(json.dumps) article_data_df['resolve'] = article_data_df['resolve'].apply(json.dumps) + self.write_df(article_data_df,'article_data') self.sync_article_info(client, project_id, article_data_df['article-id']) self.sync_labels(client, project_id) - # Writing data to tables - self.write_df(article_label_df,'article_label') - self.write_df(article_data_df,'article_data') \ No newline at end of file + + \ No newline at end of file