Skip to content

Commit

Permalink
add abstract
Browse files Browse the repository at this point in the history
  • Loading branch information
yindaheng98 committed Apr 15, 2024
1 parent b5684ad commit fd8cc68
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 3 deletions.
8 changes: 6 additions & 2 deletions citation_crawler/crawlers/ss.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,10 @@ def doi(self) -> Optional[str]:
doi = re.sub(r"^/+", "", u.path)
return doi

def abstract(self) -> Optional[int]:
if 'abstract' in self.data:
return self.data['abstract']

async def _get_authors_from_author_data(self) -> Iterable[Author]:
if not self.author_data:
authors = []
Expand Down Expand Up @@ -146,7 +150,7 @@ async def get_citations(self) -> Iterable[Paper]:
yield paper


fields_references = f"title,year,authors,externalIds,publicationTypes,journal"
fields_references = f"title,abstract,year,authors,externalIds,publicationTypes,journal"
root_references = f"semanticscholar/references--{fields_references.replace(',', '-')}"
root_citations = f"semanticscholar/citations--{fields_references.replace(',', '-')}"

Expand Down Expand Up @@ -192,7 +196,7 @@ def paper_is_valid(text):


fields_authors_sub = ','.join([("authors." + f) for f in fields_authors.split(',')])
fields_paper = f"title,year,publicationDate,{fields_authors_sub},externalIds,publicationTypes,journal"
fields_paper = f"title,abstract,year,publicationDate,{fields_authors_sub},externalIds,publicationTypes,journal"
root_paper = f"semanticscholar/paper--{fields_paper.replace(',', '-')}"


Expand Down
6 changes: 6 additions & 0 deletions citation_crawler/items.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,10 @@ def date(self) -> Optional[str]:
def doi(self) -> Optional[str]:
return None

@abc.abstractmethod
def abstract(self) -> Optional[str]:
return None

@abc.abstractmethod
async def authors(self) -> Iterable[Author]:
return
Expand Down Expand Up @@ -91,6 +95,8 @@ async def __dict__(self) -> dict:
d['date'] = self.date()
if self.doi():
d['doi'] = self.doi()
if self.abstract():
d['abstract'] = self.abstract()
d['authors'] = []
async for author in self.authors():
d['authors'].append(author.__dict__())
Expand Down
3 changes: 3 additions & 0 deletions citation_crawler/summarizers/neo4j.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ async def add_paper(tx, paper: Paper):
"SET p.title=$title, p.year=$year"
if paper.doi():
n4jset += ", p.doi=$doi"
if paper.abstract():
n4jset += ", p.abstract=$abstract"
if paper.dblp_id():
n4jset += ", p.dblp_key=$dblp_id"
if paper.paperId():
Expand All @@ -39,6 +41,7 @@ async def add_paper(tx, paper: Paper):
paperId=paper.paperId(),
dblp_id=paper.dblp_id(),
doi=paper.doi(),
abstract=paper.abstract(),
date=date)


Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

setup(
name='citation_crawler',
version='2.8.2',
version='2.8.3',
author='yindaheng98',
author_email='[email protected]',
url='https://github.com/yindaheng98/citation-crawler',
Expand Down

0 comments on commit fd8cc68

Please sign in to comment.