-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
301 lines (262 loc) · 11.8 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
from pprint import pprint
import prettytable as pt
from collections import OrderedDict as odict
class Parser:
'解析文法文件'
lines = []
terminal = set() # 终结符
nonterminal = set() # 非终结符
follow = odict() # follow集
first = odict()
start = ''
pretable = odict() # 预测分析表
can_be_empty = [] # 能推导出空串的非终
iserror = False
def open(self, path):
with open(path) as f:
for line in f:
self.lines.append(line.strip('\n').replace(' ',''))
self.nonterminal.add(line[0])
# 分割一遍
_tosplit = []
for line in self.lines:
if '|' in line:
_tosplit.append(line)
for _line in _tosplit:
self._split(_line)
# 初始化终结符和非终结符集
for line in self.lines:
for char in line.split('->')[1]:
if char not in self.nonterminal:
self.terminal.add(char)
self.terminal.add('#')
# 初始化可推导出空串的非终结符集合,两次扫描
for line in self.lines:
left, right = line.split('->')
if right=='@':
self.can_be_empty.append(left)
for line in self.lines:
left, right = line.split('->')
_flag = True
for char in right:
if char not in self.can_be_empty:
_flag = False
if _flag:
self.can_be_empty.append(left)
left,right = self.lines[0].split('->')
self.start = left
for non in self.nonterminal:
self.first[non] = [set(), set()]
self.follow[non] = [set(), set(), set()] # 三个集合分别为,终结符集合,非终结符First集,非终结符Follow集
for ter in self.terminal:
if ter != '@':
self.pretable[ter] = odict()
for non in self.nonterminal:
if ter!='@':
self.pretable[ter][non] = 'error'
def make_first(self):
# 终结符直接放到第一个set,非终结符暂时放到第二个set
for line in self.lines:
left,right = line.split('->')
if right[0] in self.terminal:
assert isinstance(self.first[left][0], set)
self.first[left][0].add(right[0])
elif right[0] in self.nonterminal:
# 如果第一个非终结符能推导出空串的情况
if right[0] in self.can_be_empty:
self.first[left][1].add(right[1])
self.first[left][1].add(right[0])
#处理第二个set,将非终结符的first集合添加倒第一个set,移除非终结符,直到第二个set为空
for item in self.nonterminal:
while(len(self.first[item][1])>0):
for _non in self.first[item][1].copy():
self.first[item][0] |= (self.first[_non][0]-{'@'})
# self.first[item][1] |= self.first[_non][1]
self.first[item][1].remove(_non)
for item in self.can_be_empty:
self.first[item][0].add('@')
for item in self.nonterminal:
del self.first[item][1]
def make_follow(self):
# 把#放入开始符号的follow集中
self.follow[self.start][0].add('#')
# 非终结符直接放到第一个set,非终结符的first集和follow集暂时放到第2和第3个set
for production in self.lines:
left,right = production.split('->')
for i in range(len(right)-1):
# print("debug:current nonterminal{}".format(right[i]))
if right[i] in self.nonterminal:
# 非终结符紧跟一个终结符的情况
if right[i+1] in self.terminal:
self.follow[right[i]][0].add(right[i+1])
# 非终结符紧跟一个非终结符的情况
elif right[i+1] in self.nonterminal:
self.follow[right[i]][1].add(right[i+1])
# 如果后跟的非终结符含有空串@,就把其follow集也加入
if right[i+1] in self.can_be_empty:
self.follow[right[i]][2].add(right[i+1])
# 对形如U->…P的产生式的情况,把Follow(U)添加倒Follow(P)
_tmp = right[len(right)-1]
if _tmp in self.nonterminal:
if _tmp is not left:
# print("1.add {}'s follow to {}".format(left, _tmp))
self.follow[_tmp][2].add(left)
# 对形如U->…PB的产生式,且B可以为空的情况,把Follow(U)添加倒Follow(P)
if _tmp in self.can_be_empty:
_tmp2 = right[len(right)-2]
if _tmp2 in self.nonterminal:
self.follow[_tmp2][2].add(left)
# print("2.add {}'s follow to {}".format(left, _tmp2))
# 处理第2,3个集合直到为空
notempty = 1
while(notempty>0):
for item in self.follow:
if len(self.follow[item][2])>0:
_toremove = []
tmp_set = set()
for ans in self.follow[item][2]:
self.follow[item][0] |= self.follow[ans][0]
self.follow[item][1] |= self.follow[ans][1]
tmp_set |= self.follow[ans][2]
_toremove.append(ans)
self.follow[item][2] |= tmp_set
# 移除第3个集合中非终结符
for i in _toremove:
self.follow[item][2].remove(i)
for item in self.follow:
if len(self.follow[item][1])>0:
_toremove = []
for ans in self.follow[item][1]:
self.follow[item][0] |= (self.first[ans][0]-{'@'})
_toremove.append(ans)
# 移除第2个集合中非终结符
for i in _toremove:
self.follow[item][1].remove(i)
for item in self.follow:
if self.follow[item][1]!=set() or (self.follow[item][2]!=set()):
notempty += 1
break
notempty -= 1
def make_pretable(self):
'''
(1)对First(A)中的每一个终结符a,把A->XXX加入M[A,a]中。
(2)若产生式的右部可以为空,则把产生式加入左部的follow集中的每个元素的对应位置
'''
# 执行步骤(1)
for line in self.lines:
# left和x作为坐标确定分析表中的位置
left, right = line.split('->')
# (1) 如果右部是终结符开头,直接把右部加入开头的终结符
if right[0] in self.terminal:
self._set_tableitem(right[0], left, right)
# (2) 如果右部是非终结符开头,则把右部加入此非终结符的first集的每个元素
elif right[0] in self.nonterminal:
for i in self.first[right[0]][0]:
self._set_tableitem(i, left, right)
# (3) 如果右部可以为空
m_canbeempty = True
for ch in right:
if ch in self.terminal and ch != '@':
m_canbeempty = False
elif ch in self.nonterminal and ch not in self.can_be_empty:
m_canbeempty = False
if m_canbeempty:
for element in self.follow[left][0]:
self._set_tableitem(element, left, right)
print("we taking {} into {}'s ".format(line, element))
# isempty_exist = False
# for x in self.first[left][0]:
# if x == '@':
# isempty_exist = True
# self._set_tableitem(x, left, line)
# # 执行步骤(2)
# if isempty_exist:
# for x in self.follow[left][0]:
# self._set_tableitem(x, left, line)
def ll1(self, _str):
symstack = ['#', self.start]
print(symstack)
inputstack = list(_str)
print(inputstack)
current_sym_top = symstack.pop()
current_in_top = inputstack.pop(0)
counter = 0
try:
while(current_sym_top != '#' ):
counter += 1
print("第{}步 ,\33[4m符号栈\33[0m:{}\33[4m输入栈\33[0m{}"
.format(str(counter).ljust(3),
(str(symstack)[:-1]+','+current_sym_top+']').ljust(20),
('['+current_in_top+','+str(inputstack)[1:]).ljust(10)))
if current_sym_top in self.nonterminal:
m_production = self.pretable[current_in_top][current_sym_top].strip('->')
if m_production == 'error':
print('\33[35merror,分析停止')
break
elif m_production == '@':
current_sym_top = symstack.pop()
else:
# 把产生式的每个非终结符入栈
tmp = list(m_production)
for i in range(len(m_production)):
symstack.append(tmp.pop())
current_sym_top = symstack.pop()
# print("产生式:->{}".format(m_production))
elif current_sym_top in self.terminal:
current_sym_top = symstack.pop()
current_in_top = inputstack.pop(0)
if current_sym_top == current_in_top == '#':
print("\33[35;1m分析成功\33[4m")
except:
print('\33[35merror,分析停止')
def _split(self, line):
# 分割带有 '|' 的产生式
left, right = line.split('->')
factions = right.split('|')
for faction in factions:
self.lines.append(left + '->' + faction)
# 删除被分割的产生式
self.lines.remove(line)
def _set_tableitem(self, x, y, content):
try:
if self.pretable[x][y] == 'error' or self.pretable[x][y] == content:
self.pretable[x][y] = '->' + content
else:
# print("{}将被替换为{}".format(self.pretable[x][y], content))
self._error(self.pretable[x][y], content, x, y)
except:
print("x:{}y:{}content:{}".format(x, y, content))
def _error(self, str1, str2, x, y):
print("不是LL1文法,冲突为{},{},位置为{},{}".format(str1, str2, x, y))
self.iserror = True
if __name__ == '__main__':
parser = Parser()
parser.open('data1')
print(parser.lines)
print(parser.nonterminal)
print(parser.terminal)
print("start symbol:{}".format(parser.start))
parser.make_first()
parser.make_follow()
parser.make_pretable()
print ("\033[;32m first\033[0m")
pprint(parser.first,width=40)
print ("\033[;32m follow\033[0m")
pprint(parser.follow,width=70)
print ("\033[;32m can be empty\033[0m")
pprint(parser.can_be_empty,width=40)
# pprint(parser.pretable,width=200)
if not parser.iserror:
tb = pt.PrettyTable()
# tb.field_names = parser.pretable.keys()
_flag = True
for key1 in parser.pretable.keys():
if _flag:
tb.add_column(" ", list(parser.pretable[key1]))
_flag = False
tb.add_column(key1,list(parser.pretable[key1].values()))
print(tb)
target = 'i+i+i*i#'
parser.ll1(target)
else:
print("\33[不是LL1文法")