Skip to content

Commit

Permalink
chinese papers
Browse files Browse the repository at this point in the history
  • Loading branch information
yindaheng98 committed May 18, 2024
1 parent c2fc27e commit 4cde227
Show file tree
Hide file tree
Showing 3 changed files with 4 additions and 5 deletions.
5 changes: 2 additions & 3 deletions citation_crawler/graph.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import abc
import logging
import asyncio
from tqdm.asyncio import tqdm
from typing import Tuple, Optional, AsyncIterable, List
from typing import Tuple, Optional, AsyncIterable, List, Dict
import random
from dblp_crawler.gather import gather
from .items import Paper
Expand Down Expand Up @@ -150,7 +149,7 @@ async def _init_papers(self):
async for paper, news in tqdm(gather(*tasks), desc="Writing init papers", total=len(tasks)):
yield paper, news

async def _bfs_once(self) -> int:
async def _bfs_once(self):
# 初始化
if not self.inited:
async for paper, news in self._init_papers():
Expand Down
2 changes: 1 addition & 1 deletion citation_crawler/items.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def title(self) -> str:
return None

def title_hash(self) -> str:
return re.sub(r"[^0-9a-z]", "", self.title().lower())
return re.sub(r"[^0-9a-z\u4e00-\u9fa5]", "", self.title().lower())

@abc.abstractmethod
def year(self) -> Optional[int]:
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

setup(
name='citation_crawler',
version='2.10.1',
version='2.10.2',
author='yindaheng98',
author_email='[email protected]',
url='https://github.com/yindaheng98/citation-crawler',
Expand Down

0 comments on commit 4cde227

Please sign in to comment.