Skip to content

Commit

Permalink
advanced sleep
Browse files Browse the repository at this point in the history
  • Loading branch information
yindaheng98 committed Apr 22, 2024
1 parent 21b0c93 commit 06268fa
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 1 deletion.
8 changes: 8 additions & 0 deletions citation_crawler/crawlers/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ def getenv_headers(key) -> Dict:
file_sem = Semaphore(512)
http_headers = getenv_headers('HTTP_HEADERS')
http_sleep = getenv_float('HTTP_SLEEP') or 0
last_request_time = datetime.now()


def get_cache_datetime(path) -> datetime:
Expand Down Expand Up @@ -78,6 +79,13 @@ async def download_item(url: str, path: str, cache_days: int, is_valid: Callable
async with http_sem:
try:
async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(verify_ssl=False), headers=http_headers) as session:
if http_sleep is not None:
global last_request_time
last_request_timedelta = datetime.now() - last_request_time
last_request_time += last_request_timedelta
wait = http_sleep - last_request_timedelta.total_seconds()
if wait > 0:
await asyncio.sleep(wait)
async with session.get(url,
proxy=os.getenv("HTTP_PROXY"),
timeout=os.getenv("HTTP_TIMEOUT") or 30) as response:
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

setup(
name='citation_crawler',
version='2.9.1',
version='2.9.2',
author='yindaheng98',
author_email='[email protected]',
url='https://github.com/yindaheng98/citation-crawler',
Expand Down

0 comments on commit 06268fa

Please sign in to comment.