Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Retry OpenAlex SSL exceptions #48

Merged
merged 1 commit into from
Jun 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ dependencies = [
"pandas",
"requests",
"python-dotenv",
"dimcli"
"dimcli",
"tenacity"
]

[tool.pytest.ini_options]
Expand Down
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -102,14 +102,14 @@ sphinxcontrib-serializinghtml==1.1.10
# via sphinx
stack-data==0.6.3
# via ipython
tenacity==8.4.1
# via rialto-airflow (pyproject.toml)
tqdm==4.66.4
# via dimcli
traitlets==5.14.3
# via
# ipython
# matplotlib-inline
typing-extensions==4.12.2
# via ipython
tzdata==2024.1
# via pandas
urllib3==2.2.1
Expand Down
11 changes: 10 additions & 1 deletion rialto_airflow/harvest/openalex.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import time

import requests
from requests.exceptions import SSLError
from tenacity import retry, retry_if_exception_type, stop_after_delay, wait_random

from rialto_airflow.utils import invert_dict

Expand All @@ -27,6 +29,11 @@ def doi_orcids_pickle(authors_csv, pickle_file, limit=None):
pickle.dump(invert_dict(orcid_dois), handle, protocol=pickle.HIGHEST_PROTOCOL)


@retry(
wait=wait_random(1, 5),
stop=stop_after_delay(60),
retry=retry_if_exception_type(SSLError),
)
def dois_from_orcid(orcid: str):
"""
Pass in the ORCID ID and get back an iterator of DOIs for publications authored by that person.
Expand Down Expand Up @@ -79,5 +86,7 @@ def works_from_author_id(author_id, limit=None):
else:
yield result
else:
logging.error(f"encountered non-200 response: {url} {params}")
logging.error(
f"encountered HTTP {resp.status_code} response from {url} {params}: {resp.text}"
)
has_more = False
Loading