Skip to content

Commit

Permalink
Define citekey aliases with link reference syntax
Browse files Browse the repository at this point in the history
  • Loading branch information
dhimmel committed Jan 4, 2020
1 parent a423432 commit 11cdf12
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 7 deletions.
52 changes: 45 additions & 7 deletions manubot/pandoc/cite.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,12 @@
http://scorreia.com/software/panflute/code.html#panflute.elements.Citation
"""
import argparse
import logging
import sys

import panflute as pf

from manubot.cite.citekey import (
is_valid_citekey,
)
from manubot.cite.citekey import is_valid_citekey
from manubot.process.util import (
get_citekeys_df,
generate_csl_items,
Expand Down Expand Up @@ -65,7 +64,7 @@ def parse_args():
return args


def _get_citation_string_action(elem, doc):
def _get_citekeys_action(elem, doc):
"""
Panflute action to extract citationId from all Citations in the AST.
"""
Expand All @@ -89,6 +88,39 @@ def _citation_to_id_action(elem, doc):
return None


def _get_reference_link_citekey_aliases(elem, doc):
"""
Based on TypeScript implementation by phiresky at
https://github.com/phiresky/pandoc-url2cite/blob/b28374a9a037a5ce1747b8567160d8dffd64177e/index.ts#L118-L152
Uses markdown's link reference syntax to define citekey aliases (tags)
https://spec.commonmark.org/0.29/#link-reference-definitions
"""
if type(elem) != pf.Para:
return
while (
len(elem.content) >= 3
and type(elem.content[0]) == pf.Cite
and len(elem.content[0].citations) == 1 # differs from pandoc-url2cite
and type(elem.content[1]) == pf.Str
and elem.content[1].text == ":"
):
space_index = 3 if type(elem.content[2]) == pf.Space else 2
destination = elem.content[space_index]
if type(destination) == pf.Str:
# paragraph starts with [@something]: something
# save info to citekeys and remove from paragraph
citekey = elem.content[0].citations[0].id # differs from pandoc-url2cite
citekey_aliases = global_variables["citekey_aliases"]
if citekey in citekey_aliases:
logging.warning(f"duplicate citekey {citekey}")
citekey_aliases[citekey] = destination.text
# found citation, add it to citekeys and remove it from document
elem.content = elem.content[space_index + 1 :]
if len(elem.content) > 0 and type(elem.content[0]) == pf.SoftBreak:
elem.content.pop(0)


def process_citations(doc):
"""
Apply citation-by-identifier to a Python object representation of
Expand All @@ -100,8 +132,13 @@ def process_citations(doc):
- manubot-requests-cache-path
- manubot-clear-requests-cache
"""
global_variables["citekey_aliases"] = doc.get_metadata(
"citekey-aliases", default={}, builtin=True
)
manuscript_citekeys = set(global_variables["manuscript_citekeys"])

doc.walk(_get_citation_string_action)
doc.walk(_get_reference_link_citekey_aliases)
doc.walk(_get_citekeys_action)
manuscript_citekeys = set(global_variables["manuscript_citekeys"])
manuscript_citekeys = sorted(
filter(
Expand All @@ -112,8 +149,9 @@ def process_citations(doc):
)
)
global_variables["manuscript_citekeys"] = manuscript_citekeys
tag_to_string = doc.get_metadata("citekey-aliases", default={}, builtin=True)
citekeys_df = get_citekeys_df(manuscript_citekeys, tag_to_string)
citekeys_df = get_citekeys_df(
manuscript_citekeys, global_variables["citekey_aliases"]
)
global_variables["citekeys_df"] = citekeys_df
global_variables["citekey_shortener"] = dict(
zip((citekeys_df["manuscript_citekey"]), citekeys_df["short_citekey"])
Expand Down
7 changes: 7 additions & 0 deletions manubot/pandoc/tests/input-with-cites.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,13 @@ This is a sentence with many citations [@pmid:20170387; @doi:10.7717/peerj.705].
Citations in code blocks should not be modified [@pmid:20170387].
```

Defining citekeys with the link reference syntax [@tag:issue; @tag:bad-doi; @tag:bad-url].

[@tag:issue]: url:https://github.com/manubot/manubot/pull/189
[@tag:bad-doi]: doi:10.1016/S0022-2836(05)80360-2

[@tag:bad-url]: url:https://openreview.net/forum?id=HkwoSDPgg

## References

::: {#refs}
Expand Down
13 changes: 13 additions & 0 deletions manubot/pandoc/tests/output-with-cites.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ This is a sentence with many citations [1,2].

Citations in code blocks should not be modified [@pmid:20170387].

Defining citekeys with the link reference syntax [3,4,5].

References

1. Orthodontic treatment with tooth transplantation for patients with cleft lip and palate.
Expand All @@ -17,3 +19,14 @@ DOI: 10.1597/08-134 · PMID: 20170387
Kamen P. Simeonov, Daniel S. Himmelstein
PeerJ (2015-01-13) https://doi.org/98p
DOI: 10.7717/peerj.705 · PMID: 25648772 · PMCID: PMC4304851

3. CSL_Item: add date IO functionality by dhimmel · Pull Request #189 · manubot/manubotGitHub https://github.com/manubot/manubot/pull/189

4. Basic local alignment search tool
Stephen F. Altschul, Warren Gish, Webb Miller, Eugene W. Myers, David J. Lipman
Journal of Molecular Biology (1990-10) https://doi.org/cnsjsz
DOI: 10.1016/s0022-2836(05)80360-2

5. Semi-supervised Knowledge Transfer for Deep Learning from Private Training Data
Nicolas Papernot, Martí, N Abadi, Ú, Lfar Erlingsson, Ian Goodfellow, Kunal Talwar
(2016-11-02) https://openreview.net/forum?id=HkwoSDPgg

0 comments on commit 11cdf12

Please sign in to comment.