-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtex2markdown.py
101 lines (77 loc) · 3.55 KB
/
tex2markdown.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#!/usr/bin/env python
import sys, re
from operator import itemgetter
from typing import Any, Dict, List, Tuple
from replaceEnvironments import replaceEnvironments
from replaceSpecialChars import replaceSpecialChars
from replaceTextModifiers import replaceTextModifiers
class tex2markdown:
def pairDelimiters(
starts: List[int],
ends: List[int]
) -> List[Tuple[int, int]]:
"""
starts: a sorted list of positions where starting delimiters occur
ends: a sorted list of positions where starting delimiters occur
returns: a list of (start, end) pairs, corresponding to delimiters
that are paired
"""
assert (len(starts) == len(ends)), "Numbers of start \
and end delimiters inequal"
starts = [x for x in zip(starts, [0] * len(starts))]
ends = [x for x in zip(ends, [1] * len(ends))]
delimiters = sorted(starts + ends, key=itemgetter(0))
begin_stack = []
paired_delimiters = []
for (x,y) in delimiters:
if y == 0:
begin_stack.append(x)
else:
paired_delimiters.append((begin_stack.pop(), x))
return paired_delimiters
def getEnvs(input_text: str) -> List[Tuple[int, int]]:
"""
input_text: LaTeX text
output: A list of pairs of indices (index of start of \begin
and index of end of \end) of environments
"""
skip_envs = [
"align",
]
env_starts = [thm.start() for thm in re.finditer(r'\\begin{', input_text)]
env_ends = [thm.end() for thm in re.finditer(r'\\end{', input_text)]
envs = tex2markdown.pairDelimiters(env_starts, env_ends)
for start_idx, end_idx in envs:
env_type = input_text[start_idx+len("\\begin{"):end_idx].split('}')[0]
if env_type in skip_envs:
envs.remove((start_idx, end_idx))
return envs
def tex2markdown(tex_contents: str) -> str:
"""
input_text: A LaTeX string
returns: a markdown-compatible string
"""
tex_contents = replaceSpecialChars.replaceSpecialChars(tex_contents)
tex_contents = replaceTextModifiers.replaceTextModifiers(tex_contents)
thmcounter = 0
#This block is presently really inefficient, since it works with each
#environment and recomputes the indices every time (because the changes
#will usually change the indices). Probably the right way to do this is
#to compute all the changes and then stitch it all together, but there
#seemed to be some annoyances with nested environments and I got bored
#of coding
output = tex_contents
envs = tex2markdown.getEnvs(output)
while len(envs) != 0:
start_idx, end_idx = envs[0]
env_type = output[start_idx+len("\\begin{"):end_idx].split('}')[0]
begin_length = len("\\begin{}") + len(env_type)
end_length = len("\\end{}") + len(env_type)
env_content = output[start_idx+begin_length:end_idx-len("\\end{")]
environment_markdown = replaceEnvironments.replaceEnvironments(env_type, env_content, thmcounter)
output = output[0:start_idx]+environment_markdown+output[end_idx+len(env_type)+1:]
envs = tex2markdown.getEnvs(output)
return output
if __name__ == "__main__":
input_text = sys.stdin.read()
sys.stdout.write(tex2markdown.tex2markdown(input_text))