-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpyparser.py
938 lines (842 loc) · 22.7 KB
/
pyparser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
###Parser used to create simple analytical representations of python code
#It parses the python into a bunch of nested dictionaries that contain
#the arguments and statements inside various functions / classes.
#Thanks to the public domain "GardenSnake" parser, as I took a small
#portion of the token list and regex from there, and it helped me to
#learn David Beazley's PLY from http://www.dabeaz.com/ply/
import sys
import ply.lex as lex
import ply.yacc as yacc
import pprint
###Options###
PARSER_PRINTTOKENS = False #prints every tag generated by the parser on a new line
PARSER_DEBUG = False #prints helpful parser debug messages
LEXER_PRINTTOKENS = False #prints every token synthesized by the tokeniser
LEXER_DEBUG = False #prints helpful tokeniser debug messages
####Lexer####
tokens = (
'CLASS',
'DEF',
'IF',
'ELIF',
'ELSE',
'AND',
'OR',
'FOR',
'IN',
'FROM',
'IMPORT',
'WHILE',
'WITH',
'AS',
'TRY',
'EXCEPT',
'FINALLY',
'PASS',
'BREAK',
'ASSERT',
"YIELD",
"PRINT",
"GLOBAL",
'RETURN',
'NAME',
'NUMBER',
'STRING',
'LPAR',
'RPAR',
'LSPAR',
'RSPAR',
'LCPAR',
'RCPAR',
'COLON',
'EQ',
'NEQ',
'ASSIGN',
'LT',
'LTEQ',
'GT',
'GTEQ',
'PLUS',
'MINUS',
'MULT',
'DIV',
'MOD',
'COMMA',
'DOT',
'SEMICOLON',
'WS',
'NEWLINE',
'ENDMARKER',
'INDENT'
)
def t_NUMBER(t):
r"(\d+(\.\d*)?|\.\d+)([eE][-+]? \d+)?" # (\d+(\.\d*)?|\.\d+) is for decimals, ([eE][-+]? \d+) is for ??? maybe hex?
if (LEXER_PRINTTOKENS): print 'NUMBER',
return t
def t_STRING(t):
r'(r?)((\"{3}(\\"|[^"])*\"{3})|(\'{3}(\\\'|[^\'])*\'{3})|(\"{1}(\\"|[^"])*\"{1})|(\'{1}(\\\'|[^\'])*\'{1}))'
if (LEXER_PRINTTOKENS): print 'STRING', #allow sets of 3 or 1 quotation marks to surround
return t # the string, but ignore escaped quotation marks
t_COLON = r':'
t_EQ = r'==|(is)'
t_NEQ = r'!='
t_ASSIGN = r'=|\+=|-=|\*=|/='
t_LT = r'<'
t_LTEQ = r'<='
t_GT = r'>'
t_GTEQ = r'>='
t_PLUS = r'\+'
t_MINUS = r'-'
t_MULT = r'\*'
t_MOD = r'%'
t_DIV = r'/'
t_COMMA = r','
t_DOT = r'\.'
t_SEMICOLON = r';'
RESERVED = {
"class" : "CLASS",
"def" : "DEF",
"if" : "IF",
"elif" : "ELIF",
"else" : "ELSE",
"return" : "RETURN",
"and" : "AND",
"or" : "OR",
"for" : "FOR",
"in" : "IN",
"import" : "IMPORT",
"while" : "WHILE",
"with" : "WITH",
"as" : "AS",
"try" : "TRY",
"except" : "EXCEPT",
"finally" : "FINALLY",
"pass" : "PASS",
"break" : "BREAK",
"assert" : "ASSERT",
"yield" : "YIELD",
"print" : "PRINT",
"from" : "FROM",
"global" : "GLOBAL"
}
def t_NAME(t):
r'[a-zA-Z_][a-zA-Z0-9_]*'
t.type = RESERVED.get(t.value, "NAME")
if (LEXER_PRINTTOKENS): print 'NAME',
return t
# Putting this before t_WS let it consume lines with only comments in
# them so the latter code never sees the WS part. Not consuming the
# newline. Needed for "if 1: #comment"
def t_comment(t):
r"[ ]*\043[^\n]*" # \043 is '#'
if (LEXER_PRINTTOKENS): 'COMMENT',
pass
#store indentation value for the line
def t_INDENT(t):
r"(\t)|(\ {4})"
t.value.replace(r"(\ {4})", "\t") #convert serieses of four spaces to tabs
if (LEXER_PRINTTOKENS): 'INDENT',
if (t.lexer.paren_count == 0):
return t
# Whitespace
def t_WS(t):
r'\ '
pass #ignore whitespace
# Don't generate newline tokens when inside of parenthesis, eg
# a = (1,
# 2, 3)
def t_newline(t):
r'\n+'
t.lexer.lineno += len(t.value)
t.type = "NEWLINE"
if (LEXER_PRINTTOKENS): print 'NEWLINE',
if (t.lexer.paren_count == 0):
return t
def t_LPAR(t):
r'\('
if (LEXER_PRINTTOKENS): print 'LPAR',
t.lexer.paren_count += 1
return t
def t_RPAR(t):
r'\)'
if (LEXER_PRINTTOKENS): print 'RPAR',
t.lexer.paren_count -= 1
return t
def t_LSPAR(t):
r'\['
if (LEXER_PRINTTOKENS): print 'LSPAR',
t.lexer.paren_count += 1
return t
def t_RSPAR(t):
r'\]'
if (LEXER_PRINTTOKENS): print 'RSPAR',
t.lexer.paren_count -= 1
return t
def t_LCPAR(t):
r'\{'
if (LEXER_PRINTTOKENS): print 'LCPAR',
t.lexer.paren_count += 1
return t
def t_RCPAR(t):
r'\}'
if (LEXER_PRINTTOKENS): print 'RCPAR',
t.lexer.paren_count -= 1
return t
def t_error(t):
if (LEXER_DEBUG): raise SyntaxError("Unknown symbol %r" % (t))
if (LEXER_DEBUG): print "Skipping", repr(t.value[0])
t.lexer.skip(1)
lexer = lex.lex(debug=LEXER_DEBUG)
lexer.paren_count = 0
#function to create a new token (not used)
def _new_token(type, lineno):
tok = lex.LexToken()
tok.type = type
tok.value = None
tok.lineno = lineno
return tok
###Parser Stuff#########################################################
#below is a dictionary that stores data about the classes and functions.
#it stores with the following structure, where "base" is outside declared
# functions or classes, "statements" list contains all statements that
# aren't followed by indents, (excludes for, def, etc), and "arguments"
# list contains the arguments for the parent
#tree###class###function#statements##########
tree = {
"base" : { #the faux-class dictionary containing anything outside a class
"base" : { #the faux-function dictionary containing anything outside a function
"statements" : [] #a list of statements that don't precede indents in the parent
} #the list of arguments is present within every function and class except base
}
}
currentclass, currentfunc = "base", "base" #tracks what class and function we're looking at
numindents, lastnumindents = 0, 0 #tracks the number of indents on the current and previous line, if current < last then it's dedented
laststmttype = "" #tracks the statement type of the last line, for adding to the indentlayer list
indentlayer = [] #a list for keeping track of the layers of indentation we're on, eg ["class", "def", "for", "if"], reducing by 1 for every dedent
tryid = 0 #IDs for the various branching statements so they don't override each other
exceptid = 0
finallyid = 0
ifid = 0
withid = 0
forid = 0
whileid = 0
precedence = (
('left', 'NUMBER', 'STRING'),
('left', 'OR'),
('left', 'AND'),
('left', 'PLUS', 'MINUS', 'MULT', 'DIV'),
('left', 'COMMA'),
('left', 'NODOT'),
('left', 'OPER', 'DOT'),
('left', 'POSTDENT')
)
#this rule encapsulates the entire code
def p_lines(p):
'''
lines : lines indented
| indented
'''
#indentation tracker for each line. Also encapsulates literally every line recognisable by the parser
def p_indented(p):
'''
indented : indent stmt NEWLINE
| stmt NEWLINE
'''
global numindents
global lastnumindents
global currentfunc
global currentclass
global laststmttype
stmttype = '' #used for adding to "indentlayer", and also for checking if the
# currentfunc/class should be switched back to "base"
if (len(p) == 4): #if there are indents
stmt = p[2]
indent = p[1]
else: #if there aren't indents
stmt = p[1]
indent = ()
stmttype = str(stmt[0]) #set statement type to stmt's statement type
if (len(indent) > 1): #if the indents are in nested tuples
numindents = len(make_list(indent))
else:
numindents = len(indent)
relativeindents = numindents-lastnumindents
if (relativeindents < 0): #if dedented
for i in range(0, -relativeindents):
if (len(indentlayer) > 0):
del indentlayer[-1] #we exit the highest indentation layer, no matter what it is
else:
if (currentfunc != "base"):
currentfunc = "base"
elif (currentclass != "base"):
currentclass = "base"
parse_stmt(stmt) #parse the statement AFTER indentations are calculated
if (PARSER_DEBUG):
if (numindents != lastnumindents or numindents == 0):
print("\t the indentation layers are now class {} func {} layers {}").format(currentclass, currentfunc, indentlayer)
laststmttype = stmttype
lastnumindents = numindents
#skips empty lines in the indentation tracker
def p_indented_emptyline(p):
'''
indented : indent NEWLINE
| NEWLINE
'''
#statement tag for a statement with indents after it (they are ignored)
def p_stmt_postdent(p):
'''
stmt : stmt indent %prec POSTDENT
'''
p[0] = p[1]
#statement tag for one-line if statements (or other statements)
def p_stmt_oneline(p):
'''
stmt : branch nonbranch
'''
p[0] = (p[1], p[2])
#statement tag for things that require an indented submethod (classes, functions, for loops etc)
def p_stmt_branching(p):
'''
stmt : branch
'''
p[0] = ("branch", p[1])
#statement tag for things to be added to the "statements" list
def p_stmt(p):
'''
stmt : nonbranch
'''
p[0] = ("nonbranch", p[1])
#tag for statements that don't require a new branch on the tree,
# can either be placed directly after branching statements in the same line or on its own in one line
def p_nonbranch(p):
'''
nonbranch : assign
| variable
| constant
| boring
| import
| print
| global
| return
'''
p[0] = p[1]
#generic tag for all statements that will require a new tree branch
def p_branch(p):
'''
branch : try
| finally
| def
| if
| for
| while
| with
| except
'''
p[0] = p[1] #branching statements should be specially structured as a tuple
# eg (str(tagname), str(uniqueID), args, statements)
# or (str(tagname), str(uniqueID), args)
# or (str(tagname), str(uniqueID)) if no args or statements are necessary
#tag for try statements
def p_try(p):
'''
try : TRY COLON
'''
global tryid
t = find_unclosed_indentlayer()
t[str(p[1])] = {"arguments" : (),
"statements" : []}
p[0] = ("try", "try_"+str(tryid))
tryid += 1
#tag for finally statements
def p_finally(p):
'''
finally : FINALLY COLON
'''
global finallyid
p[0] = ("finally", "finally"+finallyid)
finallyid += 1
#function or class definitions
def p_def(p):
'''
def : DEF nameprmtrs COLON
| CLASS nameprmtrs COLON
'''
p[0] = (str(p[1]), str(p[2][0]), p[2][1])
#class definitions that don't specify any inheritence
def p_def_class(p):
'''
def : CLASS NAME COLON
'''
p[0] = (str(p[1]), str(p[2]), [])
#if statemnet duh
def p_if(p):
'''
if : IF checks COLON
| ELIF checks COLON
| IF value COLON
| ELIF value COLON
| ELSE COLON
'''
global ifid
if (len(p) == 4):
p[0] = (str(p[1]), str(p[1]) + "_" + str(ifid), p[2])
elif (len(p) == 3):
p[0] = (str(p[1]), str(p[1]) + "_" + str(ifid), ())
ifid += 1
#for statement duh
def p_for(p):
'''
for : FOR in COLON
'''
global forid
p[0] = ("for", "for_"+str(forid), str(p[2][1]) + " in " + str(p[2][2]))
forid += 1
#while loop statement duh
def p_while(p):
'''
while : WHILE checks COLON
'''
global whileid
p[0] = ("while", "while_"+str(whileid), p[2])
whileid += 1
#with tag
def p_with(p):
'''
with : WITH variable AS uname COLON
| WITH variable COLON
'''
global withid
if (len(p) == 6):
p[0] = ("with", "with_" + str(withid), str(p[2]) + " as " + str(p[4]))
else:
p[0] = ("with", "with_" + str(withid), str(p[2]))
withid += 1
#exception tag
def p_except(p):
'''
except : EXCEPT COLON
| EXCEPT NAME COLON
| EXCEPT argslist COLON
| EXCEPT argslist AS argslist COLON
'''
global exceptid
if (len(p) == 3):
p[0] = ("except", "except_"+str(exceptid))
elif (len(p) == 4):
p[0] = ("except", "except_"+str(exceptid), p[2])
else:
p[0] = ("except", "except_"+str(exceptid), str(p[2])+" as "+str(p[4]))
exceptid += 1
#for import tags at the beginning of the document
def p_import(p):
'''
import : IMPORT valuelist
| FROM variable IMPORT valuelist
| IMPORT variable AS valuelist
| FROM variable IMPORT variable AS valuelist
'''
if (len(p) == 3):
p[0] = (p[1], p[2])
elif (len(p)==5):
p[0] = (p[1], p[2], p[3], p[4])
else:
p[0] = (p[1], p[2], p[3], p[4], p[5], p[6])
#for print statements
def p_print(p):
'''
print : PRINT valuelist
| PRINT valuelist COMMA
'''
vallist = make_list(p[2])
p[0] = (p[1], vallist)
#for global declarer thingies
def p_global(p):
'''
global : GLOBAL NAME
'''
p[0] = (p[1], p[2])
#for return statements
def p_return(p):
'''
return : RETURN valuelist
'''
p[0] = (p[1], p[2])
#tag used for running functions as well as for function/class definitions
def p_nameprmtrs(p):
'''
nameprmtrs : uname prmtrs
'''
p[0] = (p[1], p[2]) #parameters are listed in the list labelled "arguments"
#for checks surrounded by brackets
def p_checks_par(p):
'''
checks : LPAR checks RPAR
'''
p[0] = p[2]
#for combining if statements
def p_checks(p):
'''
checks : checks andor check
| checks andor value
| value andor value
| check
'''
if (len(p) == 4):
p[0] = (p[2], p[1], p[3])
else:
p[0] = p[1]
#a boolean statement like x > y
def p_check(p):
'''
check : value equals value
| in
'''
if (len(p) == 4):
p[0] = (p[2], p[1], p[3])
else:
p[0] = p[1]
#something in something, eg for something in something or if something in something
def p_in(p):
'''
in : value IN value
'''
p[0] = (p[2], p[1], p[3])
#combines the indentations in nested tuples (to later be tallied up for each line)
def p_indent(p):
'''
indent : indent INDENT
| INDENT
'''
if (len(p) == 3):
p[0] = (p[1], "(indent)")
else:
p[0] = ("(indent)")
if (PARSER_PRINTTOKENS): print '(indent)',
#a list of arguments for use as parameters of some function
def p_prmtrs(p):
'''
prmtrs : LPAR argslist RPAR
| LPAR assign RPAR
| tuple
'''
if (len(p) == 4):
p[0] = make_list(p[2])
else:
p[0] = make_list(p[1])
#a list of arguments, mostly riding on the back of "valuelist"
def p_argslist(p):
'''
argslist : argslist COMMA valuelist
| argslist COMMA assign
| valuelist COMMA assign
| assign COMMA assign
'''
p[0] = (p[1], p[3])
#a tuple in python, also can be reduced to a arglist
def p_tuple(p):
'''
tuple : LPAR valuelist RPAR
| LPAR RPAR
'''
if (len(p) == 3):
p[0] = ()
else:
p[0] = p[2]
#a python-syntax list like l = ["jingle", 2, 4], uses valuelist rather than arglist because a list like "[2, hong= 1]" is not allowed
def p_pythonlist(p):
'''
pythonlist : LSPAR valuelist RSPAR
| LSPAR RSPAR
'''
if (len(p) == 3):
p[0] = []
elif (len(p[2]) > 1):
p[0] = make_list(p[2])
else:
p[0] = p[2]
#dictionary which contains lookuplist between curly brackets
def p_dictionary(p):
'''
dictionary : LCPAR pairlist RCPAR
| LCPAR RCPAR
'''
if (len(p) == 3):
p[0] = []
elif (p[2][0] != ":"):
p[0] = make_dict(p[2], appenddict = {})
else:
p[0] = {p[2][1] : p[2][2]}
#list of key:value pairs
def p_pairlist(p):
'''
pairlist : pairlist COMMA pair
| pair
'''
if (len(p) == 4):
p[0] = (p[1], p[3])
else:
p[0] = (p[1])
#a key:value pair in a dictionary
def p_pair(p):
'''
pair : value COLON value
'''
p[0] = (p[2], p[1], p[3])
#list of values
def p_valuelist(p):
'''
valuelist : valuelist COMMA value
| value
'''
if (len(p) == 4):
p[0] = (p[1], p[3])
else:
p[0] = p[1]
#groups variables, constants and operations because mostly there is no distinction in usage
def p_value(p):
'''
value : variable %prec NODOT
| constant %prec NODOT
| operation
'''
p[0] = p[1] #the operation tag is circularly referenced in the value tag, but this should be ok as chaining operations is allowed anyway
#generates operation tags for any type of operation (all are considered the same)
def p_operation(p):
'''
operation : value operator value %prec OPER
'''
p[0] = (p[2], p[1], p[3])
#variables can be NAMEs or they can be NAME[0]['dingle'] for lists, dicts, tuples etc
def p_variable(p):
'''
variable : uname
| uname accesslist
| nameprmtrs
| variable DOT variable
| constant DOT variable
| print DOT variable
'''
if (len(p) == 2):
p[0] = p[1]
elif (len(p) == 4):
p[0] = (".", p[1], p[3])
else:
p[0] = str(p[1]) + str(p[2])
#for variables preceded by - or + signs
def p_uname(p):
'''
uname : NAME
| MINUS NAME
| PLUS NAME
'''
if (len(p) == 3):
p[0] = str(p[1]) + str(p[2])
else:
p[0] = p[1]
#a tag for access to lists, tuples and dicts (for example, the "[0][1]" part in "p[0][1] = bob")
def p_accesslist(p):
'''
accesslist : accesslist accessor
| accessor
'''
if (len(p) == 3):
p[0] = str(p[1]) + str(p[2])
else:
p[0] = p[1]
#this tag enables [:1], [1:], and [1:2] formats to be used to access lists
def p_accessor(p):
'''
accessor : LSPAR value RSPAR
| LSPAR pair RSPAR
| LSPAR value COLON RSPAR
| LSPAR COLON value RSPAR
'''
if (len(p) == 5):
p[0] = "[" + str(p[2]) + str(p[3]) + "]"
else:
p[0] = "[" + str(p[2]) + "]"
#represents an assignment operation, eg: hours = seconds/360, size = 12, name = "helga", xspeed = yspeed
def p_assign(p):
'''
assign : assign ASSIGN value
| variable ASSIGN value
'''
p[0] = (p[2], p[1], p[3])
#groups NUMBERS, STRINGS, lists together as constants
def p_constant(p):
'''
constant : number
| STRING
| pythonlist
| tuple
| dictionary
'''
p[0] = p[1]
#groups OR/AND because for analysis purposes at the moment they are identical (although parser could be extended for analysis of logic)
def p_andor(p):
'''
andor : AND
| OR
'''
p[0] = p[1]
#for literals that are used on their own and just appear as lone statements on their own line
def p_boring(p):
'''
boring : PASS
| BREAK
| YIELD
'''
p[0] = str(p[1])
#groups numbers and numbers with preceding + or -
def p_number(p):
'''
number : NUMBER
| MINUS NUMBER
| PLUS NUMBER
'''
if (p[1] == '+'):
p[0] = float(p[2])
elif (p[1] == '-'):
p[0] = -float(p[2])
else:
p[0] = p[1]
#groups plus, minus, divide, multiply, exponents together cos we don't need them for analysis (not yet at least)
def p_operator(p):
'''
operator : PLUS
| MINUS
| MULT
| DIV
| MOD
'''
p[0] = p[1]
#groups ==, >=, <=, !=, <, and >
def p_equals(p):
'''
equals : EQ
| NEQ
| LT
| LTEQ
| GT
| GTEQ
| ASSIGN ASSIGN
'''
p[0] = p[1]
#error function for parser if it finds a bad tag
def p_error(p):
if (PARSER_DEBUG): print "Error!", repr(p)
parser = yacc.yacc(debug=PARSER_DEBUG)
###Methods used by Parser###############################################
def make_list(p, appendlist=None): #using recursion, converts series of arguments seperated by commas into a python list
if (appendlist == None):
appendlist = []
if (type(p) == tuple): #if it's not the end of the expression
if (len(p) == 2): #lists as nested tuples always have length 2 expression
appendlist.append(p[1]) #append the argument token to list
appendlist = make_list(p[0], appendlist)
elif (len(p) == 1 or len(p) == 3): #if it's an argument token rather than a list token,
appendlist.append(p[0]) # the function will handle it well
elif (len(p) != 0):
if PARSER_DEBUG: print("!! parser tried to make a list out of something that isn't a list! at {}").format(p)
elif (type(p) == str):
appendlist.append(p) #if it's just a string rather than a tuple
return appendlist #return the modified list
def make_dict(p, appenddict=None): #similar to make_list but it works on tuples formatted
if (appenddict == None):
appenddict = {}
if (type(p) == tuple): # like (":", bing, bong) into dicts formatted like {bing:bong}
if (len(p) == 2): #if it's a valid "list" expression
appenddict[p[1][1]] = p[1][2] #append the key and value tokens from p[1] to list
appenddict = make_dict(p[0], appenddict)
elif (len(p) == 3): #if it's a key:value token rather than a list token,
appenddict[p[1]] = p[2] # the function handles it
else:
print("!! parser tried to make a dict out of something that isn't a dict! at {}").format(p)
return appenddict
#finds the last layer of "unclosed" indents inside the tree, starting at currentclass/currentfunc
def find_unclosed_indentlayer():
global indentlayer
global tree
global currentclass
global currentfunc
t = tree[currentclass][currentfunc]
for l in indentlayer:
if (str(l) != currentfunc and str(l) != currentclass): #skips the class/function indentlayers as they are set to base otherwise
t = t[l] #goes one branch deeper in the tree for every member of the indentlayer list
return t
#creates a new tree branch at the deepest layer of unclosed indentation with the given arguments
def grow_new_branch(p): #p is a tuple from a parser tag, where [0] = stmttype, [1] = stmtid, [2] = args, [3] = stmts
global tree
global currentclass
global currentfunc
stmttype = p[0]
stmtid = p[1]
args = []
stmts = []
if (len(p) > 2): args = p[2]
if (len(p) > 3): stmts = p[3]
if (stmttype == 'class'): #classes are always at the root of the tree
currentclass = stmtid
tree[currentclass] = {"arguments" : args, #make a class including a base function
"base" : {"arguments" : [], "statements" : []}}
elif (stmttype == 'def'): #functions are always second from the tree root
currentfunc = stmtid
tree[currentclass][currentfunc] = {"arguments" : args, "statements" : stmts}
else: #other branching statements may be anywhere above the tree root
t = find_unclosed_indentlayer()
t[stmtid] = {"arguments" : args,
"statements": stmts}
if (PARSER_DEBUG):
print "\t\t branch called", stmtid, " was grown on the tree"
#adds statements to the currently open indent layer
def add_stmts_to_tree(stmts):
global tree
global currentclass
global currentfunc
t = find_unclosed_indentlayer()
if not "statements" in t:
t["statements"] = []
if (type(stmts) == list):
t["statements"] += stmts
else:
t["statements"].append(stmts)
#parses a statement and does the appropriate action to get it into the tree
#branching stmt should be in the format (type, (stmttype, stmtid, args, statements))
#nonbranching stmt will be in the format
def parse_stmt(stmt):
if (PARSER_PRINTTOKENS): print("found {}ing statement that reads {}").format(stmt[0], stmt[1])
global indentlayer
if (stmt[0] == "branch"):
grow_new_branch(stmt[1])
if (stmt[1][0] != "def" and stmt[1][0] != "class"): #don't append if it's a function or class definition
indentlayer.append(stmt[1][1]) #append the declaration's stmtid to the indentlist
elif (stmt[0] == "nonbranch"):
add_stmts_to_tree(stmt[1])
else: #this means it's a oneline statement
grow_new_branch(stmt[0]) #add a new branch for the branching statement
#create a fake indent by
indentlayer.append(stmt[0][1]) # appending the declaration's stmtid to the indentlist
add_stmts_to_tree(stmt[1]) #add the statement to the branch we just created
del indentlayer[-1] #close the one-line branch we just created
###TEST
#~ while True:
#~ with open("./example.py", "r") as f:
#~ data = f.read();
#~ try:
#~ s = data
#~ except EOFError:
#~ break
#~ parser.parse(s)
#~ print("class\t\tfunc\t\tcontents")
#~ pp = pprint.PrettyPrinter(indent=4)
#~ pp.pprint(tree)
#~ break
def parse_file(filename):
with open(filename, "r") as f:
data = f.read();
try:
s = data
except EOFError:
return tree
parser.parse(s)
return tree