Skip to content

Commit

Permalink
fix pmid keywords, get journal fullnames, get pmid url to paper for l…
Browse files Browse the repository at this point in the history
…abelbuddy, and other small changes
  • Loading branch information
koudyk committed Feb 28, 2024
1 parent cfbf6cd commit 7d3d020
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 5 deletions.
7 changes: 6 additions & 1 deletion src/pubget/_labelbuddy.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,13 +123,18 @@ def _prepare_document(
)
doc_info["metadata"]["efetch_url"] = efetch_url
elif "pmid" in id:
doc_info["display_title"] = f'pmid: {doc_meta["pmid"]}'
url = f"https://pubmed.ncbi.nlm.nih.gov/{doc_meta['pmid']}/"
doc_info["metadata"]["pmid_url"] = url
doc_info["display_title"] = (
f'pmid: <a href="{url}">{doc_meta["pmid"]}</a>'
)
doc_info["list_title"] = f"PMID{doc_meta['pmid']} {doc_text['title']}"
efetch_url = (
"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
f"efetch.fcgi?db=pubmed&id={doc_meta['pmid']}"
)
doc_info["metadata"]["efetch_url"] = efetch_url
doc_info["text"] = doc_info["text"].replace("\n\n# Body\n\n", "")
return doc_info


Expand Down
12 changes: 10 additions & 2 deletions src/pubget/_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ class MetadataExtractor(Extractor):
"doi",
"title",
"journal",
"journal_fullname",
"publication_year",
"license",
)
Expand Down Expand Up @@ -65,15 +66,22 @@ def _add_journal(
) -> None:
if id_type == "pmcid":
journal_elem = article.find(
# "front/journal-meta/journal-id[@journal-id-type='nlm-ta']"
"front/journal-meta/journal-id[@journal-id-type='nlm-ta']"
)
journal_fullname_elem = article.find(
"front/journal-meta/journal-title-group/journal-title"
)
if journal_elem is not None:
metadata["journal"] = journal_elem.text
elif journal_fullname_elem is not None:
metadata["journal_fullname"] = journal_fullname_elem.text
elif id_type == "pmid":
journal_elem = article.find(".//Journal/Title")
journal_elem = article.find(".//Journal/ISOAbbreviation")
journal_fullname_elem = article.find(".//Journal/Title")
if journal_elem is not None:
metadata["journal"] = journal_elem.text
if journal_fullname_elem is not None:
metadata["journal_fullname"] = journal_fullname_elem.text


def _add_pub_date(
Expand Down
4 changes: 2 additions & 2 deletions src/pubget/_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def extract(
result["id"] = id
result["title"] = article.find(".//ArticleTitle").text
keywords = []
for item in article.iterfind(".//DescriptorName"):
for item in article.iterfind(".//KeywordList/Keyword"):
keywords.append(item.text)
keywords = "\n".join(keywords)
result["keywords"] = keywords
Expand All @@ -58,7 +58,7 @@ def extract(
for section in abstract_sections:
try:
abstract = abstract + section.text + " "
except:
except TypeError:
continue
result["abstract"] = abstract
result["body"] = ""
Expand Down

0 comments on commit 7d3d020

Please sign in to comment.