Skip to content

Commit

Permalink
Merge pull request titipata#89 from thomascpan/master
Browse files Browse the repository at this point in the history
Fix subjects xpath in parse_pubmed_xml. Fixes titipata#87
  • Loading branch information
titipata authored Apr 21, 2020
2 parents 1376aa6 + 95cdc51 commit d958d74
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 1 deletion.
2 changes: 1 addition & 1 deletion pubmed_parser/pubmed_oa_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ def parse_pubmed_xml(path, include_path=False, nxml=False):
pub_day_node = tree.find(".//pub-date/day")
pub_day = pub_day_node.text if pub_day_node is not None else "01"

subjects_node = tree.findall(".//article-categories.//subj-group/subject")
subjects_node = tree.findall(".//article-categories//subj-group/subject")
subjects = list()
if subjects_node is not None:
for s in subjects_node:
Expand Down
1 change: 1 addition & 0 deletions tests/test_pubmed_oa_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ def test_parse_pubmed_xml():
assert len(parsed_xml.get("full_title")) > 0
assert parsed_xml.get("pmc") == "3460867"
assert parsed_xml.get("doi") == "10.1371/journal.pone.0046493"
assert parsed_xml.get("subjects") == "Research Article; Biology; Biochemistry; Enzymes; Enzyme Metabolism; Lipids; Fatty Acids; Glycerides; Lipid Metabolism; Neutral Lipids; Metabolism; Lipid Metabolism; Proteins; Globular Proteins; Protein Classes; Recombinant Proteins; Biotechnology; Microbiology; Bacterial Pathogens; Bacteriology; Emerging Infectious Diseases; Host-Pathogen Interaction; Microbial Growth and Development; Microbial Metabolism; Microbial Pathogens; Microbial Physiology; Proteomics; Sequence Analysis; Spectrometric Identification of Proteins"


def test_parse_pubmed_paragraph():
Expand Down

0 comments on commit d958d74

Please sign in to comment.