Skip to content

Commit

Permalink
fixes #2
Browse files Browse the repository at this point in the history
  • Loading branch information
tomlue committed Apr 20, 2024
1 parent bd45621 commit 83838bf
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 10 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

setup(
name='sysrev',
version='1.3.4',
version='1.3.5',
description='get sysrev project data and use the sysrev api',
long_description=long_description,
long_description_content_type='text/markdown', # Specify the content type here
Expand Down
29 changes: 20 additions & 9 deletions sysrev/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def create_sqlite_db(self):

# TODO - this could be made more efficient by checking sqlite state and updating the sysrev api
def sync(self, client, project_id):
# check that db exists

if not pathlib.Path('.sr/sr.sqlite').exists():
self.create_sqlite_db()

Expand All @@ -111,11 +111,13 @@ def sync(self, client, project_id):
article_labels = [a['labels'] for a in articles if a['labels'] is not None]
article_labels = [lbl for lbls in article_labels for lbl in lbls]
article_label_df = pd.DataFrame(article_labels)
article_label_df['answer'] = article_label_df['answer'].apply(json.dumps)

article_data = [{k: v for k, v in a.items() if k != 'labels'} for a in articles]
article_data_df = pd.DataFrame(article_data)
article_data_df['notes'] = article_data_df['notes'].apply(json.dumps)
article_data_df['resolve'] = article_data_df['resolve'].apply(json.dumps)

article_info = []
for article_id in tqdm.tqdm(article_data_df['article-id'], total=n_articles):
article_info.append(client.get_article_info(project_id, article_id))
Expand All @@ -132,17 +134,26 @@ def sync(self, client, project_id):
{**{k: json.dumps(v) if isinstance(v, (dict, list)) else v for k, v in item['itemData'].items()},
'article-id': a['article'].get('article-id')}
for a in article_info for item in a['article'].get('csl-citation', {}).get('citationItems', [])])

csl_citations['issued'] = csl_citations['issued'].apply(json.dumps)
csl_citations['author'] = csl_citations['author'].apply(json.dumps)

# write everything to .sr/sr.sqlite
conn = sqlite3.connect('.sr/sr.sqlite')

def write_df(df,name):
# replace any - with _ in column names and remove duplicates
df.columns = df.columns.str.replace('-', '_')
df = df.loc[:,~df.columns.duplicated()]
df.to_sql(name, conn, if_exists='replace', index=False) if not df.empty else None


# Writing data to tables
labels_df.to_sql('labels', conn, if_exists='replace', index=False)
article_label_df.to_sql('article_label', conn, if_exists='replace', index=False)
article_data_df.to_sql('article_data', conn, if_exists='replace', index=False)
full_texts.to_sql('full_texts', conn, if_exists='replace', index=False)
auto_labels.to_sql('auto_labels', conn, if_exists='replace', index=False)
csl_citations.to_sql('csl_citations', conn, if_exists='replace', index=False)
write_df(labels_df,'labels')
write_df(article_label_df,'article_label')
write_df(article_data_df,'article_data')
write_df(full_texts,'full_texts')
write_df(auto_labels,'auto_labels')
write_df(csl_citations,'csl_citations')

conn.close()
class Client():
Expand Down

0 comments on commit 83838bf

Please sign in to comment.