Skip to content

Commit

Permalink
sleep request
Browse files Browse the repository at this point in the history
  • Loading branch information
yindaheng98 committed Apr 22, 2024
1 parent 4c402b0 commit 21b0c93
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 2 deletions.
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,10 @@ optional arguments:
* default: `8`
* `HTTP_HEADERS`
* Headers for HTTP requests
* default: None
* default: `None`
* `HTTP_SLEEP`
* Sleep after request (in seconds)
* default: `0`

### Write to a JSON file

Expand Down
14 changes: 14 additions & 0 deletions citation_crawler/crawlers/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import aiohttp
import logging
from aiofile import async_open
import asyncio
from asyncio import Semaphore

logger = logging.getLogger("common")
Expand All @@ -21,6 +22,16 @@ def getenv_int(key) -> int:
return None


def getenv_float(key) -> float:
cache_days = os.getenv(key)
if cache_days is not None:
try:
return float(cache_days)
except:
pass
return None


def getenv_headers(key) -> Dict:
headers = os.getenv(key)
if headers is not None:
Expand All @@ -35,6 +46,7 @@ def getenv_headers(key) -> Dict:
http_sem = Semaphore(http_concorent if http_concorent is not None else 8)
file_sem = Semaphore(512)
http_headers = getenv_headers('HTTP_HEADERS')
http_sleep = getenv_float('HTTP_SLEEP') or 0


def get_cache_datetime(path) -> datetime:
Expand Down Expand Up @@ -75,6 +87,8 @@ async def download_item(url: str, path: str, cache_days: int, is_valid: Callable
os.makedirs(os.path.dirname(save_path), exist_ok=True)
async with async_open(save_path, 'w') as f:
await f.write(text)
if http_sleep is not None:
await asyncio.sleep(http_sleep)
return text
except Exception as e:
logger.error(" down err: %s" % e)
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

setup(
name='citation_crawler',
version='2.9',
version='2.9.1',
author='yindaheng98',
author_email='[email protected]',
url='https://github.com/yindaheng98/citation-crawler',
Expand Down

0 comments on commit 21b0c93

Please sign in to comment.