-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathopy
executable file
·475 lines (384 loc) · 13.3 KB
/
opy
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
#!/usr/bin/env python3
import sys
import ast
import re
import codecs
from collections import defaultdict
__version__ = "2.1.3"
__author__ = "Ryuichi Ueda"
__license__ = "MIT license"
__url__ = "https://github.com/ryuichiueda/opy"
def usage():
print("opy " + __version__ + "\n", file=sys.stderr)
print("Copyright 2019 " + __author__, file=sys.stderr)
print("Released under " + __license__, file=sys.stderr)
print(__url__, file=sys.stderr)
print("\nYou are using Python {}.{}.{} with this command."
.format(*sys.version_info[0:3]),
file=sys.stderr)
class Rule:
'''
Opy recognizes and executes a set of rules. The
instance of this class contains information of a
rule.
A rule is composed of either or both of a pattern
and an action. In patterns and actions, sentences
or expressions of Python are written.
An action is either a normal action or a list
action.
'''
def __init__(self, pattern, action, has_normal=False):
self.pattern = pattern
self.action = action
self.has_normal_action = has_normal
class Parser:
'''
When a code is given, the instance of this class
parses the code immediately. Then it sorts rules
by patterns.
When the pattern of a rule is B/BEGIN or E/END,
the rule is categorized to "begins" or "ends"
respectively. Otherwise it is appended to "lines."
'''
def __init__(self, code):
rules = []
while True:
rule, code = self.__get_rule(code)
if not rule:
break
# The parser gives each rule in the inverse order.
rules.insert(0, rule)
self.begins = [r for r in rules if r.pattern in ["B", "BEGIN"] ]
self.ends = [r for r in rules if r.pattern in ["E","END"] ]
self.lines = [r for r in rules if r not in self.begins + self.ends ]
def __get_rule(self, code) -> (Rule, str):
'''
This method judges the type of the rightmost
rule by the rightmost character. It uses one
of three scanners based on the judgement.
The scanner returns the rightmost rule and
the residual code and this method returns them.
'''
code = code.strip()
if code == "":
return None, ""
elif code[-1] == "]": # The rule has an list action.
return self.__list(code)
elif code[-1] == "}": # It has an normal action.
return self.__action(code)
else: # It has no action.
return self.__pattern(code)
def __pattern(self, code) -> (Rule, str):
'''
This method searches ";" from right.
When ";" finds, the right part is tested
whether it is a Python code or not.
'''
n = len(code)
while n != -1:
n = code.rfind(";", 0, n)
pattern = code[n+1:].strip()
if self.__test(pattern):
return Rule(pattern, ""), code[:n if n>0 else 0]
print("pattern parse error", file=sys.stderr)
sys.exit(1)
def __list(self, code) -> (Rule, str):
'''
This method searches a list action from the right.
If the rule to which the list action belongs has
a pattern, the pattern is also searched and added
to the rule. The rule and the left residual string
are returned.
'''
n = len(code)
while n != -1:
n = max([ code.rfind(c, 0, n) for c in ";:" ] )
proc = code[n+1:].lstrip()
if not self.__test(proc):
continue
elif n == -1 or code[n] == ";":
return Rule("", proc), code[:n if n>0 else 0]
elif code[n] == ":":
s, r = self.__pattern(code[:n])
return Rule(s.pattern, code[n+1:]), r
print("list action parse error", file=sys.stderr)
sys.exit(1)
def __action(self, code) -> (Rule, str):
'''
This method searches a normal action from the right.
Its procedure is almost the same with that of __list.
'''
n = len(code)
while n != -1:
n = max([ code.rfind(c, 0, n) for c in ";:" ] )
action = code[n+1:].strip()
if action[0] != "{" or action[-1] != "}":
continue
proc = action[1:-1].strip()
if not self.__test(proc):
continue
elif n == -1 or code[n] != ":":
return Rule("", proc, True), code[:n if n>0 else 0]
elif code[n] == ":":
s, r = self.__pattern(code[:n])
return Rule(s.pattern, proc, True), r
print("normal action parse error", file=sys.stderr)
sys.exit(1)
def __test(self, code) -> bool:
'''
This method tests whether the code is
appropriate as a sentence or an expression
of Python.
'''
try:
ast.parse(code)
return True
except:
return False
def __split_fields_normal(line):
'''
This function splits a line into a list by using the characters
in IFS (input field separator), and return the list. This function
tries to convert each element into an int or float value. At the
return of the list, the line before split is added to the list as
the zeroth element.
'''
line = line.rstrip('\n')
fs = re.split(IFS, line) if IFSREGEX else line.split(IFS)
return [line] + [num(e) for e in fs]
def __split_fields_strmode(line):
'''
This function splits a line into a list by using the characters
in IFS (input field separator), and return the list. At the
return of the list, the line before split is added to the list
as the zeroth element.
'''
line = line.rstrip('\n')
fs = re.split(IFS, line) if IFSREGEX else line.split(IFS)
return [line] + fs
def __split_fields_null(line):
'''
__split_fields when the IFS is a null string.
'''
return [line] + [num(e) for e in line.rstrip('\n') ]
def __split_fields_strmode_null(line):
'''
__split_fields when the IFS is a null string.
'''
return [line] + [c for c in line.rstrip('\n') ]
def __dynamic_module_import(msg):
'''
This function tries to import a package with the name extracted
from a given NameError message.
'''
module = re.search(r'\'[^\']+\'', str(msg)).group().strip("'")
try:
exec("import " + module, globals())
except NameError:
print("Name error", file=sys.stderr)
sys.exit(1)
def __print_list(rule, f, glo, loc):
'''
This function outputs the list with the delimiter given in OFS.
At a name error, it calls __dynamic_module_import so as to import
a module with the name. If successful, it retries the output again.
'''
try:
lst = eval(rule.action, glo, loc) if rule.action else f[1:]
print(OFS.join([str(e) for e in lst]))
except NameError as e:
__dynamic_module_import(e)
__print_list(rule, f, glo, loc)
'''
The following __check... or __get... functions search options.
When an option is found. They set or return appropriate
data for reflecting the option. They also removes
the option and its associated arguments.
'''
def __check_option(opt):
if opt in sys.argv:
sys.argv.remove(opt)
return True
return False
def __get_header():
if "-m" in sys.argv:
pos = sys.argv.index("-m")
modules = sys.argv[pos+1]
sys.argv.remove("-m")
sys.argv.remove(modules)
return modules
return ""
def __get_values():
equations = []
while "-v" in sys.argv:
pos = sys.argv.index("-v")
equation = sys.argv[pos+1]
sys.argv.remove("-v")
sys.argv.remove(equation)
equations.append(equation)
return equations
def __get_ifs():
ifs = " "
regex = False
if "-i" in sys.argv:
pos = sys.argv.index("-i")
ifs = sys.argv[pos+1]
sys.argv.remove("-i")
sys.argv.remove(ifs)
regex = False
elif "-I" in sys.argv:
pos = sys.argv.index("-I")
ifs = sys.argv[pos+1]
sys.argv.remove("-I")
sys.argv.remove(ifs)
regex = True
return ifs, regex
def __get_ofs():
ofs = " "
if "-o" in sys.argv:
pos = sys.argv.index("-o")
ofs = sys.argv[pos+1]
sys.argv.remove("-o")
sys.argv.remove(ofs)
return ofs
'''
Following variables and functions before
the main part are for users.
'''
F = []
OFS = " "
IFS = " "
IFSREGEX = False
def r_(rgx, s=None):
if s is None:
s = F[0]
return re.search(rgx, str(s))
def num(s):
try:
return int(s, 0)
except ValueError:
pass
try:
return float(s)
except ValueError:
return s
def join(fs, pos=None):
if pos == None:
return OFS.join( [str(f) for f in fs] )
outindex = [pos] if isinstance(pos, int) else pos
outlist = fs if isinstance(fs, list) else __split_fields(fs)
return OFS.join( [str(outlist[p]) for p in outindex ] )
def dropjoin(fs, pos):
pos = [pos] if isinstance(pos, int) else pos
outindex = list( set(range(1, NF+1)).difference(pos) )
outlist = fs if isinstance(fs, list) else __split_fields(fs)
return OFS.join( [ str(outlist[e]) for e in outindex ] )
def p_(var, end=None):
if isinstance(var, list):
print(OFS.join( [str(f) for f in var] ), end=end)
elif isinstance(var, dict):
for k in var:
print(OFS.join([str(k), str(var[k])]), end=end)
else:
print(var, end=end)
'''
The following are the main part. To assure the consistency
of scopes of variables, I didn't divide processes into
functions. However, please tell me it if you have an idea.
'''
if __name__ == "__main__":
if len(sys.argv) < 2 or sys.argv[1] == "--help":
usage()
sys.exit(1)
IFS, IFSREGEX = __get_ifs()
'''
The following lines extract information from options.
Some names of variables have prefix "__" so as not to
collide with the names that users define.
The names with upper case latters are predefined variables
for users.
'''
__str_mode = __check_option("-s")
if __str_mode and IFS != "":
__split_fields = __split_fields_strmode
elif IFS != "":
__split_fields = __split_fields_normal
elif __str_mode and IFS == "":
__split_fields = __split_fields_strmode_null
else:
__split_fields = __split_fields_null
__buffer_mode = __check_option("-b")
__modules = __get_header()
for eq in __get_values():
token = eq.split("=")
locals()[token[0]] = token[1] if __str_mode else num(token[1])
OFS = __get_ofs()
FILES = sys.argv[2:] if len(sys.argv) > 2 else ["-"]
'''
The following line parses the code. If the code is composed
of only begin/end patterns, the standard input is removed
from the file list. Otherwise, the procedure stops when data
is not given from it.
'''
__p = Parser(sys.argv[1])
if __p.lines == []:
FILES.remove("-")
# Here the main process starts.
NF = 0
NR = 0
FNR = 0
D = defaultdict(int) #general purpose distionary
L = [] #general purpose list
# The modules after -m option are imported here.
if __modules != "":
exec("import " + __modules)
# This loop executes begin rules one by one.
for __r in __p.begins:
'''
This if-sentence appears repeatedly. However, it cannot be grouped
into a function due to scope problems.
'''
if __r.has_normal_action:
exec(__r.action)
else:
__print_list(__r, F, globals(), locals())
if not __buffer_mode:
sys.stdout.flush()
try:
for FILENAME in FILES:
__h_file = sys.stdin if FILENAME == "-" else open(FILENAME, "r")
FNR = 0 # raw number in each file
for F0 in __h_file:
F0 = F0.rstrip()
F = __split_fields(F0)
NF = len(F) - 1
NR += 1
FNR += 1
'''
Each element in F is copied to Fn (F0, F1, ...) variable.
Binded elements to Fn variables are not references but
duplications. Though it may rise to the confusion, now it
is not dealt with.
'''
globals().update( {"F"+str(n): F[n] for n in range(1,NF+1)} )
for __r in __p.lines:
if __r.pattern != "" and not eval(__r.pattern):
continue
if __r.has_normal_action:
exec(__r.action)
else:
__print_list(__r, F, globals(), locals())
if not __buffer_mode:
sys.stdout.flush()
__h_file.close()
# This loop executes end rules one by one.
for __r in __p.ends:
if __r.has_normal_action:
exec(__r.action)
else:
__print_list(__r, F, globals(), locals())
if not __buffer_mode:
sys.stdout.flush()
# Opy ignores PIPEFAIL.
except BrokenPipeError:
sys.exit(0)