forked from backtracking/bibtex2html
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlatexscan.mll
367 lines (336 loc) · 12.7 KB
/
latexscan.mll
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
(**************************************************************************)
(* bibtex2html - A BibTeX to HTML translator *)
(* Copyright (C) 1997-2014 Jean-Christophe Filliâtre and Claude Marché *)
(* *)
(* This software is free software; you can redistribute it and/or *)
(* modify it under the terms of the GNU General Public *)
(* License version 2, as published by the Free Software Foundation. *)
(* *)
(* This software is distributed in the hope that it will be useful, *)
(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)
(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *)
(* *)
(* See the GNU General Public License version 2 for more details *)
(* (enclosed in the file GPL). *)
(**************************************************************************)
(*
* bibtex2html - A BibTeX to HTML translator
* Copyright (C) 1997 Jean-Christophe FILLIATRE
*
* This software is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*
* See the GNU General Public License version 2 for more details
* (enclosed in the file GPL).
*)
(*i $Id: latexscan.mll,v 1.40 2010-02-22 07:38:19 filliatr Exp $ i*)
(*s This code is Copyright (C) 1997 Xavier Leroy. *)
{
open Printf
open Latexmacros
type math_mode = MathNone | MathDisplay | MathNoDisplay
let brace_nesting = ref 0
let math_mode = ref MathNone
let is_math_mode () =
match !math_mode with
| MathNone -> false
| MathDisplay | MathNoDisplay -> true
let hevea_url = ref false
let html_entities = ref false
let save_nesting f arg =
let n = !brace_nesting in
brace_nesting := 0;
f arg;
brace_nesting := n
let save_state f arg =
let n = !brace_nesting and m = !math_mode in
brace_nesting := 0;
math_mode := MathNone;
f arg;
brace_nesting := n;
math_mode := m
let verb_delim = ref (Char.chr 0)
let r = Str.regexp "[ \t\n]+"
let remove_whitespace u = Str.global_replace r "" u
let amp = Str.regexp_string "&"
let url s = Str.global_replace amp "&" s
let print_latex_url u =
let u = url (remove_whitespace u) in
print_s (sprintf "<a href=\"%s\">%s</a>" u u)
let print_hevea_url u t =
let u = url (remove_whitespace u) in
print_s (sprintf "<a href=\"%s\">%s</a>" u t)
let chop_last_space s =
let n = String.length s in
if s.[n-1] = ' ' then String.sub s 0 (n-1) else s
let def_macro s n b =
if not !Options.quiet then begin
eprintf "macro: %s = %s\n" s b;
flush stderr
end;
let n = match n with None -> 0 | Some n -> int_of_string n in
let rec code i subst =
if i <= n then
let r = Str.regexp ("#" ^ string_of_int i) in
[Parameterized
(fun arg ->
let subst s = Str.global_replace r (subst s) arg in
code (i+1) subst)]
else begin
let _s = subst b in
(* eprintf "subst b = %s\n" s; flush stderr; *)
[Recursive (subst b)]
end
in
def s (code 1 (fun s -> s))
let exec_macro ~main ~print_arg ~raw_arg ~skip_arg lexbuf m =
let rec exec = function
| Print str -> print_s str
| Print_arg -> print_arg lexbuf
| Raw_arg f -> let s = raw_arg lexbuf in f s
| Skip_arg -> save_nesting skip_arg lexbuf
| Recursive s -> main (Lexing.from_string s)
| Parameterized f ->
List.iter exec (f (raw_arg lexbuf))
in List.iter exec (find_macro m)
}
let space = [' ' '\t' '\n' '\r']
let float = '-'? (['0'-'9']+ | ['0'-'9']* '.' ['0'-'9']*)
let dimension = float ("sp" | "pt" | "bp" | "dd" | "mm" | "pc" |
"cc" | "cm" | "in" | "ex" | "em" | "mu")
rule main = parse
(* Comments *)
'%' [^ '\n'] * '\n' { main lexbuf }
(* Paragraphs *)
| "\n\n" '\n' *
{ print_s "<p>\n"; main lexbuf }
(* Font changes *)
| "{\\it" " "* | "{\\itshape" " "*
{ print_s "<i>";
save_state main lexbuf;
print_s "</i>"; main lexbuf }
| "{\\em" " "* | "{\\sl" " "* | "{\\slshape" " "*
{ print_s "<em>";
save_state main lexbuf;
print_s "</em>"; main lexbuf }
| "{\\bf" " "* | "{\\sf" " "* | "{\\bfseries" " "* | "{\\sffamily" " "*
{ print_s "<b>";
save_state main lexbuf;
print_s "</b>"; main lexbuf }
| "{\\sc" " "* | "{\\scshape" " "* | "{\\normalfont" " "*
| "{\\upshape" " "* | "{\\mdseries" " "* | "{\\rmfamily" " "*
{ save_state main lexbuf; main lexbuf }
| "{\\tt" " "* | "{\\ttfamily" " "*
{ print_s "<tt>";
save_state main lexbuf;
print_s "</tt>"; main lexbuf }
| "{\\small" " "*
{ print_s "<font size=\"-1\">";
save_state main lexbuf;
print_s "</font>"; main lexbuf }
| "{\\rm" " "*
{ print_s "<span style=\"font-style: normal\">";
save_state main lexbuf;
print_s "</span>"; main lexbuf }
| "{\\cal" " "*
{ save_state main lexbuf; main lexbuf }
| "\\cal" " "* { main lexbuf }
(* Double quotes *)
(***
| '"' { print_s "<tt>"; indoublequote lexbuf;
print_s "</tt>"; main lexbuf }
***)
(* Verb, verbatim *)
| ("\\verb" | "\\path") _
{ verb_delim := Lexing.lexeme_char lexbuf 5;
print_s "<tt>"; inverb lexbuf; print_s "</tt>";
main lexbuf }
| "\\begin{verbatim}"
{ print_s "<pre>"; inverbatim lexbuf;
print_s "</pre>"; main lexbuf }
(* Raw html, latex only *)
| "\\begin{rawhtml}"
{ rawhtml lexbuf; main lexbuf }
| "\\begin{latexonly}"
{ latexonly lexbuf; main lexbuf }
(* Itemize and similar environments *)
| "\\item[" [^ ']']* "]"
{ print_s "<dt>";
let s = Lexing.lexeme lexbuf in
print_s (String.sub s 6 (String.length s - 7));
print_s "<dd>"; main lexbuf }
| "\\item" { print_s "<li>"; main lexbuf }
(* Math mode (hmph) *)
| "$" { math_mode :=
begin
match !math_mode with
| MathNone -> MathNoDisplay
| MathNoDisplay -> MathNone
| MathDisplay -> (* syntax error *) MathNone
end;
main lexbuf }
| "$$" { math_mode :=
begin
match !math_mode with
| MathNone ->
print_s "<blockquote>";
MathDisplay
| MathNoDisplay -> MathNoDisplay
| MathDisplay ->
print_s "\n</blockquote>";
MathNone
end;
main lexbuf }
(* \hkip *)
| "\\hskip" space* dimension
(space* "plus" space* dimension)? (space* "minus" space* dimension)?
{ print_s " "; main lexbuf }
(* Special characters *)
| "\\char" ['0'-'9']+
{ let lxm = Lexing.lexeme lexbuf in
let code = String.sub lxm 5 (String.length lxm - 5) in
print_c(Char.chr(int_of_string code));
main lexbuf }
| "<" { print_s "<"; main lexbuf }
| ">" { print_s ">"; main lexbuf }
| "~" { print_s " "; main lexbuf }
| "``" { print_s "“"; main lexbuf }
| "''" { print_s "”"; main lexbuf }
| "--" { exec_macro ~main ~print_arg ~raw_arg ~skip_arg lexbuf "--";
main lexbuf }
| "---" { exec_macro ~main ~print_arg ~raw_arg ~skip_arg lexbuf "---";
main lexbuf }
| "^" { if is_math_mode() then begin
let buf = Lexing.from_string (raw_arg lexbuf) in
print_s "<sup>";
save_state main buf;
print_s"</sup>"
end else
print_s "^";
main lexbuf }
| "_" { if is_math_mode() then begin
let buf = Lexing.from_string (raw_arg lexbuf) in
print_s "<sub>";
save_state main buf;
print_s"</sub>"
end else
print_s "_";
main lexbuf }
(* URLs *)
| "\\url" { let url = raw_arg lexbuf in
if !hevea_url then
let text = raw_arg lexbuf in print_hevea_url url text
else
print_latex_url url;
main lexbuf }
| "\\" " "
{ print_s " "; main lexbuf }
(* General case for environments and commands *)
| ("\\begin{" | "\\end{") ['A'-'Z' 'a'-'z' '@']+ "}" |
"\\" (['A'-'Z' 'a'-'z' '@']+ '*'? " "? | [^ 'A'-'Z' 'a'-'z'])
{ let m = chop_last_space (Lexing.lexeme lexbuf) in
exec_macro ~main ~print_arg ~raw_arg ~skip_arg lexbuf m;
main lexbuf }
(* Nesting of braces *)
| '{' { incr brace_nesting; main lexbuf }
| '}' { if !brace_nesting <= 0
then ()
else begin decr brace_nesting; main lexbuf end }
(* Default rule for other characters *)
| eof { () }
| ['A'-'Z' 'a'-'z']+
{ if is_math_mode() then print_s "<em>";
print_s(Lexing.lexeme lexbuf);
if is_math_mode() then print_s "</em>";
main lexbuf }
| _ { print_c(Lexing.lexeme_char lexbuf 0); main lexbuf }
and indoublequote = parse
'"' { () }
| "<" { print_s "<"; indoublequote lexbuf }
| ">" { print_s ">"; indoublequote lexbuf }
| "&" { print_s "&"; indoublequote lexbuf }
| "\\\"" { print_s "\""; indoublequote lexbuf }
| "\\\\" { print_s "\\"; indoublequote lexbuf }
| eof { () }
| _ { print_c(Lexing.lexeme_char lexbuf 0); indoublequote lexbuf }
and inverb = parse
"<" { print_s "<"; inverb lexbuf }
| ">" { print_s ">"; inverb lexbuf }
| "&" { print_s "&"; inverb lexbuf }
| eof { () }
| _ { let c = Lexing.lexeme_char lexbuf 0 in
if c == !verb_delim then ()
else (print_c c; inverb lexbuf) }
and inverbatim = parse
"<" { print_s "<"; inverbatim lexbuf }
| ">" { print_s ">"; inverbatim lexbuf }
| "&" { print_s "&"; inverbatim lexbuf }
| "\\end{verbatim}" { () }
| eof { () }
| _ { print_c(Lexing.lexeme_char lexbuf 0); inverbatim lexbuf }
and rawhtml = parse
"\\end{rawhtml}" { () }
| eof { () }
| _ { print_c(Lexing.lexeme_char lexbuf 0); rawhtml lexbuf }
and latexonly = parse
"\\end{latexonly}" { () }
| eof { () }
| _ { latexonly lexbuf }
and print_arg = parse
"{" { save_nesting main lexbuf }
| "[" { skip_optional_arg lexbuf; print_arg lexbuf }
| " " { print_arg lexbuf }
| eof { () }
| _ { print_c(Lexing.lexeme_char lexbuf 0); main lexbuf }
and skip_arg = parse
"{" { incr brace_nesting; skip_arg lexbuf }
| "}" { decr brace_nesting;
if !brace_nesting > 0 then skip_arg lexbuf }
| "[" { if !brace_nesting = 0 then skip_optional_arg lexbuf;
skip_arg lexbuf }
| " " { skip_arg lexbuf }
| eof { () }
| _ { if !brace_nesting > 0 then skip_arg lexbuf }
and raw_arg = parse
| " " | "\n" { raw_arg lexbuf }
| '{' { nested_arg lexbuf }
| "[" { skip_optional_arg lexbuf; raw_arg lexbuf }
| '\\' ['A'-'Z' 'a'-'z']+
{ Lexing.lexeme lexbuf }
| eof { "" }
| _ { Lexing.lexeme lexbuf }
and nested_arg = parse
'}' { "" }
| '{' { let l = nested_arg lexbuf in
"{" ^ l ^ "}" ^ (nested_arg lexbuf) }
| eof { "" }
| [^ '{' '}']+{ let x = Lexing.lexeme lexbuf in
x ^ (nested_arg lexbuf) }
and skip_optional_arg = parse
"]" { () }
| eof { () }
| _ { skip_optional_arg lexbuf }
(* ajout personnel: [read_macros] pour lire les macros (La)TeX *)
and read_macros = parse
| "\\def" ('\\' ['a'-'z' 'A'-'Z' '@']+ as s) ("#" (['0'-'9']+ as n))?
{ let b = raw_arg lexbuf in
def_macro s n b;
read_macros lexbuf }
| "\\newcommand" space*
"{" ("\\" ['a'-'z' 'A'-'Z']+ as s) "}" ("[" (['0'-'9']+ as n) "]")?
{ let b = raw_arg lexbuf in
def_macro s n b;
read_macros lexbuf }
| "\\let" ('\\' ['a'-'z' 'A'-'Z' '@']+ as s) '='
{ let b = raw_arg lexbuf in
def_macro s None b;
read_macros lexbuf }
| eof
{ () }
| _
{ read_macros lexbuf }