forked from kaushalmodi/ox-hugo
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathox-hugo-pandoc-cite.el
415 lines (346 loc) · 18.2 KB
/
ox-hugo-pandoc-cite.el
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
;;; ox-hugo-pandoc-cite.el --- Pandoc Citations support for ox-hugo -*- lexical-binding: t -*-
;; Authors: Kaushal Modi <[email protected]>
;; URL: https://ox-hugo.scripter.co
;;; Commentary:
;; *This is NOT a stand-alone package.*
;;
;; It is used by ox-hugo to add support for parsing Pandoc Citations.
;;; Code:
(require 'org)
(declare-function org-hugo--plist-get-true-p "ox-hugo")
(declare-function org-hugo--front-matter-value-booleanize "ox-hugo")
(defcustom org-hugo-pandoc-cite-references-heading "References {#references}"
"Markdown title for Pandoc inserted references section."
:group 'org-export-hugo
:type 'string)
(defvar org-hugo--fm-yaml) ;Silence byte-compiler
(defvar org-hugo-pandoc-cite-pandoc-args-list
`("-f" "markdown"
"-t" ,(concat "markdown-citations"
"-simple_tables"
"+pipe_tables"
"-raw_attribute"
"-fenced_divs"
"-fenced_code_attributes"
"-bracketed_spans")
"--markdown-headings=atx"
"--id-prefix=fn:"
"--citeproc")
"Pandoc arguments used in `org-hugo-pandoc-cite--run-pandoc'.
-f markdown : Convert *from* Markdown
-t markdown : Convert *to* Markdown
-citations : Remove the \"citations\" extension. This will cause
citations to be expanded instead of being included as
markdown citations.
-simple_tables : Remove the \"simple_tables\" style.
+pipe_tables : Add the \"pipe_tables\" style insted that Blackfriday
understands.
-fenced_divs : Do not replace HTML <div> tags with Pandoc fenced
divs \":::\".
-fenced_code_attributes : Create fenced code blocks like
\"``` lang .. ```\" instead of \"``` {.lang} .. ```\".
-bracketed_spans : Do not replace HTML <span> tags with Pandoc
bracketed class notation \"{.some-class}\".
--atx-headers : Use \"# foo\" style heading for output markdown.
--id-prefix=fn: : Create footnote ID's like \"[^fn:1]\" instead of
\"[^1]\" to be consistent with default ox-hugo
exported Markdown footnote style.
These arguments are added to the `pandoc' call in addition to the
\"--bibliography\", output file (\"-o\") and input file
arguments.")
(defvar org-hugo-pandoc-cite-pandoc-meta-data
'("nocite" "csl" "link-citations")
"List of meta-data fields specific to Pandoc.")
(defvar org-hugo-pandoc-cite--run-pandoc-buffer "*Pandoc Citations*"
"Buffer to contain the `pandoc' run output and errors.")
(defvar org-hugo-pandoc-cite--references-header-regexp
"^<div id=\"refs\" class=\"references[^>]+>"
"Regexp to match the Pandoc-inserted references header string.
This string is present only if Pandoc has resolved one or more
references.
Pandoc 2.11.4.")
(defvar org-hugo-pandoc-cite--reference-entry-regexp
"^<div id=\"ref-[^\"]+\" .*csl-entry[^>]+>"
"Regexp to match the Pandoc-inserted reference entry strings.
Pandoc 2.11.4.")
(defun org-hugo-pandoc-cite--restore-fm-in-orig-outfile (orig-outfile fm &optional orig-full-contents)
"Restore the intended front-matter format in ORIG-OUTFILE.
ORIG-OUTFILE is the Org exported file name.
FM is the intended front-matter format.
ORIG-FULL-CONTENTS is a string of ORIG-OUTFILE contents. If this
is nil it is created in this function.
If FM is already in YAML format, this function doesn't do
anything. Otherwise, the YAML format front-matter in
ORIG-OUTFILE is replaced with TOML format."
(unless (string= fm org-hugo--fm-yaml)
(unless orig-full-contents
(setq orig-full-contents (with-temp-buffer
(insert-file-contents orig-outfile)
(buffer-substring-no-properties
(point-min) (point-max)))))
(setq fm (org-hugo-pandoc-cite--remove-pandoc-meta-data fm))
(let* ((orig-contents-only
(replace-regexp-in-string
;; The `orig-contents-only' will always be in YAML.
;; Delete that first.
"\\`---\n\\(.\\|\n\\)+\n---\n" "" orig-full-contents))
(toml-fm-plus-orig-contents (concat fm orig-contents-only)))
;; (message "[ox-hugo-pandoc-cite] orig-contents-only: %S" orig-contents-only)
(write-region toml-fm-plus-orig-contents nil orig-outfile))))
(defun org-hugo-pandoc-cite--run-pandoc (orig-outfile bib-list)
"Run the `pandoc' process and return the generated file name.
ORIG-OUTFILE is the Org exported file name.
BIB-LIST is a list of one or more bibliography files."
;; First kill the Pandoc run buffer if already exists (from a
;; previous run).
(when (get-buffer org-hugo-pandoc-cite--run-pandoc-buffer)
(kill-buffer org-hugo-pandoc-cite--run-pandoc-buffer))
(let* ((pandoc-outfile (make-temp-file ;ORIG_FILE_BASENAME.RANDOM.md
(concat (file-name-base orig-outfile) ".")
nil ".md"))
(bib-args (mapcar (lambda (bib-file)
(concat "--bibliography="
bib-file))
bib-list))
(pandoc-arg-list (append
org-hugo-pandoc-cite-pandoc-args-list
bib-args
`("-o" ,pandoc-outfile ,orig-outfile))) ;-o <OUTPUT FILE> <INPUT FILE>
(pandoc-arg-list-str (mapconcat #'identity pandoc-arg-list " "))
exit-code)
(message (concat "[ox-hugo] Post-processing citations using Pandoc command:\n"
" pandoc " pandoc-arg-list-str))
(setq exit-code (apply 'call-process
(append
`("pandoc" nil
,org-hugo-pandoc-cite--run-pandoc-buffer :display)
pandoc-arg-list)))
(unless (= 0 exit-code)
(user-error (format "[ox-hugo] Pandoc execution failed. See the %S buffer"
org-hugo-pandoc-cite--run-pandoc-buffer)))
pandoc-outfile))
(defun org-hugo-pandoc-cite--remove-pandoc-meta-data (fm)
"Remove Pandoc meta-data from front-matter string FM and return it.
The list of Pandoc specific meta-data is defined in
`org-hugo-pandoc-cite-pandoc-meta-data'."
(with-temp-buffer
(insert fm)
(goto-char (point-min))
(let ((regexp (format "^%s\\(:\\| =\\) "
(regexp-opt org-hugo-pandoc-cite-pandoc-meta-data 'words))))
(delete-matching-lines regexp))
(buffer-substring-no-properties (point-min) (point-max))))
(defun org-hugo-pandoc-cite--fix-pandoc-output (content loffset info)
"Fix the Pandoc output CONTENT and return it.
LOFFSET is the heading level offset.
Required fixes:
- Prepend Pandoc inserted \"references\" class div with
`org-hugo-pandoc-cite-references-heading'.
- When not using Goldmark (Hugo v0.60.0+), add the Blackfriday
required \"<div></div>\" hack to Pandoc divs with \"ref\" id's.
- Unescape the Hugo shortcodes: \"{{\\\\=< shortcode \\\\=>}}\" ->
\"{{< shortcode >}}\"
INFO is a plist used as a communication channel."
(with-temp-buffer
(insert content)
(let ((case-fold-search nil))
(goto-char (point-min))
;; Prepend the Pandoc inserted "references" class div with
;; `org-hugo-pandoc-cite-references-heading' heading in Markdown.
(save-excursion
;; There should be at max only one replacement needed for
;; this.
(when (re-search-forward org-hugo-pandoc-cite--references-header-regexp nil :noerror)
(let ((references-heading ""))
(when (org-string-nw-p org-hugo-pandoc-cite-references-heading)
(let ((level-mark (make-string (+ loffset 1) ?#)))
(setq references-heading (concat level-mark " " org-hugo-pandoc-cite-references-heading))))
(replace-match (concat references-heading "\n\n\\&"
(unless (org-hugo--plist-get-true-p info :hugo-goldmark)
"\n <div></div>\n")))))) ;See footnote 1
;; Add the Blackfriday required hack to Pandoc ref divs.
(unless (org-hugo--plist-get-true-p info :hugo-goldmark)
(save-excursion
(while (re-search-forward org-hugo-pandoc-cite--reference-entry-regexp nil :noerror)
(replace-match "\\&\n <div></div>")))) ;See footnote 1
;; Fix Hugo shortcodes.
(save-excursion
(let ((regexp (concat "{{\\\\<"
"\\(\\s-\\|\n\\)+"
"\\(?1:[[:ascii:][:nonascii:]]+?\\)"
"\\(\\s-\\|\n\\)+"
"\\\\>}}")))
(while (re-search-forward regexp nil :noerror)
(let* ((sc-body (match-string-no-properties 1))
(sc-body-no-newlines (replace-regexp-in-string "\n" " " sc-body))
;; Remove all backslashes except for the one
;; preceding double-quotes, like in:
;; {{< figure src="nested-boxes.svg" caption="<span class=\"figure-number\">Figure 1: </span>
;; PlantUML generated figure showing nested boxes" >}}
(sc-body-no-backlash (replace-regexp-in-string
"\"\"" "\\\\\\\\\""
(replace-regexp-in-string
(rx "\\" (group anything)) "\\1" sc-body-no-newlines))))
(replace-match (format "{{< %s >}}" sc-body-no-backlash) :fixedcase)))))
;; Fix square bracket. \[ abc \] -> [ abc ]
(save-excursion
(let ((regexp (concat
"\\\\\\["
"\\(.+\\)"
"\\\\\\]")))
(while (re-search-forward regexp nil :noerror)
(let* ((sc-body (match-string-no-properties 1)))
;; (message "square bracket [%s]" sc-body)
(replace-match (format "[%s]" sc-body) :fixedcase)))))
(buffer-substring-no-properties (point-min) (point-max)))))
(defun org-hugo-pandoc-cite--parse-citations (info orig-outfile)
"Parse Pandoc Citations in ORIG-OUTFILE and update that file.
INFO is a plist used as a communication channel.
ORIG-OUTFILE is the Org exported file name."
(let ((bib-list (let ((bib-raw
(org-string-nw-p
(or (org-entry-get nil "EXPORT_BIBLIOGRAPHY" :inherit)
(format "%s" (plist-get info :bibliography))))))
(when bib-raw
;; Multiple bibliographies can be comma or
;; newline separated. The newline separated
;; bibliographies work only for the
;; #+bibliography keyword; example:
;;
;; #+bibliography: bibliographies-1.bib
;; #+bibliography: bibliographies-2.bib
;;
;; If using the subtree properties they need to
;; be comma-separated (now don't use commas in
;; those file names, you will suffer):
;;
;; :EXPORT_BIBLIOGRAPHY: bibliographies-1.bib, bibliographies-2.bib
(let ((bib-list-1 (org-split-string bib-raw "[,\n]")))
;; - Don't allow spaces around bib names.
;; - Remove duplicate bibliographies.
(delete-dups
(mapcar (lambda (bib-file)
(let ((fname (org-trim bib-file)))
(unless (file-exists-p fname)
(user-error "[ox-hugo] Bibliography file %S does not exist"
fname))
fname))
bib-list-1)))))))
(if bib-list
(let ((fm (plist-get info :front-matter))
(loffset (string-to-number
(or (org-entry-get nil "EXPORT_HUGO_LEVEL_OFFSET" :inherit)
(plist-get info :hugo-level-offset))))
(pandoc-outfile (org-hugo-pandoc-cite--run-pandoc orig-outfile bib-list)))
;; (message "[ox-hugo parse citations] fm :: %S" fm)
;; (message "[ox-hugo parse citations] loffset :: %S" loffset)
;; (message "[ox-hugo parse citations] pandoc-outfile :: %S" pandoc-outfile)
(let* ((pandoc-outfile-contents (with-temp-buffer
(insert-file-contents pandoc-outfile)
(buffer-substring-no-properties
(point-min) (point-max))))
(content-has-references (string-match-p
org-hugo-pandoc-cite--references-header-regexp
pandoc-outfile-contents)))
;; Prepend the original ox-hugo generated front-matter to
;; Pandoc output, only if the Pandoc output contains
;; references.
(if content-has-references
(let* ((contents-fixed (org-hugo-pandoc-cite--fix-pandoc-output
pandoc-outfile-contents loffset info))
(fm (org-hugo-pandoc-cite--remove-pandoc-meta-data fm))
(fm-plus-content (concat fm "\n" contents-fixed)))
(write-region fm-plus-content nil orig-outfile))
(org-hugo-pandoc-cite--restore-fm-in-orig-outfile orig-outfile fm)
(message (concat "[ox-hugo] Using the original Ox-hugo output instead "
"of Pandoc output as it contained no References"))))
(delete-file pandoc-outfile)
(with-current-buffer org-hugo-pandoc-cite--run-pandoc-buffer
(if (> (point-max) 1) ;buffer is not empty
(message
(format
(concat "[ox-hugo] See the %S buffer for possible Pandoc warnings.\n"
" Review the exported Markdown file for possible missing citations.")
org-hugo-pandoc-cite--run-pandoc-buffer))
;; Kill the Pandoc run buffer if it is empty.
(kill-buffer org-hugo-pandoc-cite--run-pandoc-buffer))))
(message "[ox-hugo] No bibliography file was specified"))))
(defun org-hugo-pandoc-cite--parse-citations-maybe (info)
"Check if Pandoc needs to be run to parse citations; and run it.
INFO is a plist used as a communication channel."
;; (message "pandoc citations keyword: %S"
;; (org-hugo--plist-get-true-p info :hugo-pandoc-citations))
;; (message "pandoc citations prop: %S"
;; (org-entry-get nil "EXPORT_HUGO_PANDOC_CITATIONS" :inherit))
(let* ((orig-outfile (plist-get info :outfile))
(fm (plist-get info :front-matter))
(has-nocite (string-match-p "^nocite\\(:\\| =\\) " fm))
(orig-outfile-contents (with-temp-buffer
(insert-file-contents orig-outfile)
(buffer-substring-no-properties
(point-min) (point-max))))
;; http://pandoc.org/MANUAL.html#citations
;; Each citation must have a key, composed of `@' + the
;; citation identifier from the database, and may optionally
;; have a prefix, a locator, and a suffix. The citation key
;; must begin with a letter, digit, or _, and may contain
;; alphanumerics, _, and internal punctuation characters
;; (:.#$%&-+?<>~/).
;; A minus sign (-) before the @ will suppress mention of the
;; author in the citation.
(valid-citation-key-char-regexp "a-zA-Z0-9_:.#$%&+?<>~/-")
(citation-key-regexp (concat "[^" valid-citation-key-char-regexp "]"
"\\(-?@[a-zA-Z0-9_]"
"[" valid-citation-key-char-regexp "]+\\)"))
(has-@ (string-match-p citation-key-regexp orig-outfile-contents)))
;; Either the nocite front-matter should be there, or the
;; citation keys should be present in the `orig-outfile'.
(if (or has-nocite has-@)
(progn
(unless (executable-find "pandoc")
(user-error "[ox-hugo] pandoc executable not found in PATH"))
(org-hugo-pandoc-cite--parse-citations info orig-outfile))
(org-hugo-pandoc-cite--restore-fm-in-orig-outfile
orig-outfile fm orig-outfile-contents))))
(defun org-hugo-pandoc-cite--meta-data-generator (data)
"Return YAML front-matter to pass citation meta-data to Pandoc.
DATA is the alist containing all the post meta-data for
front-matter generation.
Pandoc accepts `csl', `nocite' and `link-citations' variables via
a YAML front-matter.
References:
- https://pandoc.org/MANUAL.html#citation-rendering
- https://pandoc.org/MANUAL.html#including-uncited-items-in-the-bibliography
- https://pandoc.org/MANUAL.html#other-relevant-metadata-fields"
(let* ((yaml ())
(link-citations (cdr (assoc 'link-citations data)))
(link-citations (if (symbolp link-citations)
(symbol-name link-citations)
link-citations))
(csl (cdr (assoc 'csl data)))
(nocite (cdr (assoc 'nocite data))))
(push "---" yaml)
(when link-citations
(push (format "link-citations: %s"
(org-hugo--front-matter-value-booleanize link-citations))
yaml))
(when csl
(push (format "csl: %S" csl) yaml))
(when nocite
(push (format "nocite: [%s]"
(string-join
(mapcar (lambda (elem)
(format "%S" (symbol-name elem)))
nocite)
", "))
yaml))
(push "---\n" yaml)
;; (message "[org-hugo-pandoc-cite--meta-data-generator DBG] yaml: %S" yaml)
(string-join (nreverse yaml) "\n")))
(provide 'ox-hugo-pandoc-cite)
;;; Footnotes
;;;; Footnote 1
;; The empty HTML element tags like "<div></div>" is a hack to get
;; around a Blackfriday limitation. Details:
;; https://github.com/kaushalmodi/ox-hugo/issues/93.
;;; ox-hugo-pandoc-cite.el ends here