-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpasture_parser.py
170 lines (134 loc) · 5.42 KB
/
pasture_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
from brace_expansion import expand
from pasture_orthography import \
line_delimiter, \
line_escape, \
assignment, \
application, \
delayed_application, \
argument, \
prior, \
comment_single, \
comment_begin, \
comment_end, \
string
tokens = [assignment, application, delayed_application, argument, prior]
# Return [text] but with all commented text removed
def remove_comments(text):
noncomment_text = ""
i = 0 # current index in the text string
depth = 0 # current depth of nested multiline comments
oneline_comment = False # whether current text is in a oneline comment
wait = False # true iff a comment just finished, but text shouldn't be considered valid yet
while i < len(text):
jump = 1 # number of spaces to step forward in the string (default is 1)
if text[i:i+len(comment_begin)] == comment_begin:
depth += 1
jump = len(comment_begin)
elif text[i:i+len(comment_end)] == comment_end:
depth -= 1
wait = True
jump = len(comment_end)
elif text[i:i+len(comment_single)] == comment_single:
oneline_comment = True
jump = len(comment_single)
elif text[i:i+len(line_delimiter)] == line_delimiter:
oneline_comment = False
jump = len(line_delimiter)
if wait: wait = False # waiting only lasts one step
elif depth == 0 and not oneline_comment: # add to noncomment text iff not in a multiline comment, not in a oneline comment, and not waiting
noncomment_text += text[i]
assert depth >= 0
i += jump
return noncomment_text
# transforms [text] from raw string to list of lines of code
def group(text):
text = text.replace("\n",line_delimiter) # consistent line delimiter
text = text.replace(line_escape + line_delimiter, "") # implement line continuation
text = remove_comments(text) # remove commented text
text = text.split(line_delimiter) # return list of lines
text = flatten([expand(line) for line in text])
return text
# transforms raw text of [line] into a list of tokens
def tokenize(line):
tokenized_line = []
i = 0
token = ""
while i < len(line):
jump = 1
token += line[i]
for t in tokens + list("()"):
if line[i:i+len(t)] == t:
jump = len(t)
if len(token) > 1: tokenized_line.append(token[:-1])
token = ""
tokenized_line.append(t)
i += jump
if len(token) > 0: tokenized_line.append(token)
application_cleaned_line = [] # remove spaces that are not really application
for i in range(len(tokenized_line)):
if tokenized_line[i] == application and \
(len(application_cleaned_line) == 0 \
or application_cleaned_line[-1] == assignment \
or i + 1 == len(tokenized_line) \
or tokenized_line[i+1] in [application, assignment]
or (i > 0 and tokenized_line[i-1] == delayed_application)
or (i + 1 < len(tokenized_line) and tokenized_line[i+1] == delayed_application)): continue
# remove spaces at the beginning
# remove spaces after assignment
# remove spaces at the end
# remove duplicate spaces, and spaces before assignment
# remove spaces bordering delated application
application_cleaned_line.append(tokenized_line[i])
return application_cleaned_line
# flatten a list of lists
def flatten(l):
re = []
for j in l: re.extend(j)
return re
# return the ast of an expression
def ast_exp(exp):
if exp == []: return []
depths = [] # depths[i] is the depth in () at position [i] of [exp]
depth = 0 # the current depth
all_one_part = True # whether exp is all one part (one parenthetical)
for i in range(len(exp)):
if exp[i] == "(": depth += 1
elif exp[i] == ")": depth -= 1
all_one_part = all_one_part and (depth > 0 or i == len(exp) - 1)
depths.append(depth)
assert depth >= 0
assert depths[-1] == 0 # no danlging parens
if all_one_part and len(exp) > 1: return ast_exp(exp[1:-1]) # remove parens around (...)
parts = [] # list of all parts (direct subexpressions)
last_part = 0
for i in range(len(exp)):
if depths[i] == 0:
parts.append(exp[last_part:i+1])
last_part = i+1
if len(parts) == 0: return []
if len(parts) == 1:
if parts[0][0] == argument: return ["argument"]
if parts[0][0] == prior: return ["prior"]
return parts[0]
if len(parts) == 2 and parts[0] == [argument]: return ["argument", parts[1]]
operations = [part[0] for part in parts if part[0] in [application, delayed_application]]
operation_indexes = [i for i in range(len(parts)) if parts[i][0] in [application, delayed_application]]
assert operations[-1] != delayed_application
if len(operations) == 1 or operations[-2] == application:
return ["application", ast_exp(flatten(parts[:operation_indexes[-1]])), ast_exp(flatten(parts[operation_indexes[-1]+1:]))]
last_pos = -1
for i in range(len(operations)):
if operations[i] == delayed_application and (i == 0 or operations[i-1] == application): last_pos = i
if last_pos != -1:
return ["application", ast_exp(flatten(parts[:operation_indexes[last_pos]])), ast_exp(flatten(parts[operation_indexes[last_pos]+1:]))]
if len(operations) > 0:
return ["application", ast_exp(flatten(parts[:operation_indexes[-1]])), ast_exp(flatten(parts[operation_indexes[-1]+1:]))]
raise
# return the ast of a line of code (a rule)
def ast(line):
if line == []: return []
i = line.index(assignment)
return ["assignment", ast_exp(line[:i]), ast_exp(line[i+1:])]
# return a list of rules, in abstract syntax tree form, specified by the pasture code in [text]
def parse(text):
return list(filter(lambda x: x != [], [ast(tokenize(line)) for line in group(text)]))