-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathquotes.py
1219 lines (997 loc) · 54.9 KB
/
quotes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# -*- coding: utf-8; -*-
"""Quasiquotes. Build ASTs in your macros, using syntax that mostly looks like regular code.
The macro operators `q`, `u`, `n`, `a`, `s`, `t`, `h` are the primary API.
The functions `capture_value` and `capture_as_macro` are public, so you can get the
benefits of hygienic capture also in old-school macros that build ASTs manually
without using quasiquotes.
The `astify` and `unastify` functions are the low-level quasiquote compiler
and uncompiler, respectively.
"""
__all__ = ["capture_value", "capture_macro", "capture_as_macro",
"is_captured_value", "is_captured_macro",
"astify", "unastify",
"q", "u", "n", "a", "s", "t", "h"]
import ast
import copy
import pickle
import sys
from .core import Done, MacroExpansionError, global_bindings
from .coreutils import _mcpyrate_attr
from .expander import MacroExpander, isnamemacro
from .markers import ASTMarker, check_no_markers_remaining, delete_markers
from .unparser import unparse, unparse_with_fallbacks
from .utils import (NestingLevelTracker, extract_bindings, flatten, gensym,
scrub_uuid)
def _mcpyrate_quotes_attr(attr, *, force_import=False):
"""Create an AST that, when compiled and run, looks up `mcpyrate.quotes.attr`.
If `force_import` is `True`, use the builtin `__import__` function to
first import the `mcpyrate.quotes` module. This is useful for e.g.
hygienically unquoted values, whose eventual use site might not import
any `mcpyrate` modules.
"""
return _mcpyrate_attr(f"quotes.{attr}", force_import=force_import)
class QuasiquoteMarker(ASTMarker):
"""Base class for AST markers used by quasiquotes. Compiled away by `astify`."""
pass
class SpliceNodes(QuasiquoteMarker):
"""Splice a `list` of AST nodes into the surrounding context.
Command sent by `ast_literal` (run-time part of `a`)
to `splice_ast_literals` (run-time part of the surrounding `q`).
"""
pass
class QuasiquoteSearchDone(Done, QuasiquoteMarker):
"""Marker used by nested quasiquotes to tell the expander a subtree is already done.
This inherits, but is separate, from the usual `Done`, because:
1. We need to tell the expander that is processing the nested quasiquotes
to stop expanding an invocation that has already been considered.
2. We need to be able to eliminate these (and only these) before
generating the final quoted output.
"""
pass
# --------------------------------------------------------------------------------
# Unquote commands for `astify`. Each type corresponds to an unquote macro.
class Unquote(QuasiquoteMarker):
"""Interpolate the value of the given subtree into the quoted tree. Emitted by `u[]`."""
pass
class LiftSourcecode(QuasiquoteMarker):
"""Parse a string as a Python expression, interpolate the resulting AST. Emitted by `n[]`.
This allows e.g. computing names of lexical variables.
"""
def __init__(self, body, filename):
super().__init__(body)
self.filename = filename
self._fields += ["filename"]
class ASTLiteral(QuasiquoteMarker): # similar to `macropy`'s `Literal`, but supports block mode, too.
"""Interpolate the given AST. Emitted by `a`."""
def __init__(self, body, syntax):
super().__init__(body)
self.syntax = syntax
self._fields += ["syntax"]
class ASTList(QuasiquoteMarker):
"""Interpolate the given iterable of AST nodes as an `ast.List` node. Emitted by `s[]`."""
pass
class ASTTuple(QuasiquoteMarker):
"""Interpolate the given iterable of AST nodes as an `ast.Tuple` node. Emitted by `t[]`."""
pass
class Capture(QuasiquoteMarker): # like `macropy`'s `Captured`
"""Capture given subtree hygienically. Emitted by `h[]`.
Details: capture the value or macro name the given subtree evaluates to,
at the use site of `q`. The value or macro reference is frozen (by pickle)
so that it can be restored also in another Python process later.
(It is important hygienic captures can be restored across process boundaries,
to support bytecode caching for source files that invoke a macro that uses
`h[]` in its output.)
"""
def __init__(self, body, name):
super().__init__(body)
self.name = name
self._fields += ["name"]
# --------------------------------------------------------------------------------
# Run-time parts of the operators.
# Unquote doesn't have its own function here, because it's a special case of `astify`.
def lift_sourcecode(value, filename="<unknown>"):
"""Parse a string as a Python expression. Run-time part of `n[]`.
Main use case is to access lexical variables with names computed at your macro definition site::
lift_sourcecode("kitty") -> Name(id='kitty')
More complex expressions work, too::
lift_sourcecode("kitty.tail") -> Attribute(value=Name(id='kitty'),
attr='tail')
lift_sourcecode("kitty.tail.color") -> Attribute(value=Attribute(value=Name(id='kitty'),
attr='tail'),
attr='color')
lift_sourcecode("kitties[3].paws[2].claws")
"""
if not isinstance(value, str):
raise TypeError(f"`n[]`: expected an expression that evaluates to str, result was {type(value)} with value {repr(value)}")
return ast.parse(value, filename=f"<invocation of n[] in '{filename}'>", mode="eval").body
def _typecheck(node, cls, macroname):
if isinstance(node, ASTMarker):
if isinstance(node.body, list): # statement suite inside a marker
for child in node.body:
_typecheck(child, cls, macroname)
return
# single AST node inside a marker
_typecheck(node.body, cls, macroname)
return
if not isinstance(node, cls):
raise TypeError(f"{macroname}: expected {cls}, got {type(node)} with value {repr(node)}")
def _flatten_and_typecheck_iterable(nodes, cls, macroname):
try:
lst = list(nodes)
except TypeError:
raise TypeError(f"{macroname}: expected an iterable of AST nodes, got {type(nodes)} with value {repr(nodes)}")
lst = flatten(lst)
for node in lst:
_typecheck(node, cls, macroname)
return lst
def ast_literal(tree, syntax):
"""Perform run-time typecheck on AST literal `tree`. Run-time part of `a`.
If `tree` is a run-time iterable, convert it to a `list`, flatten that `list`
locally, and inject a run-time marker for `splice_ast_literals`, to indicate
where splicing into the surrounding context is needed.
"""
if syntax not in ("expr", "block"):
raise ValueError(f"expected `syntax` either 'expr' or 'block', got {repr(syntax)}")
if syntax == "expr":
if isinstance(tree, ast.AST):
_typecheck(tree, ast.expr, "`a` (expr mode)")
return tree
else:
lst = _flatten_and_typecheck_iterable(tree, ast.expr, "`a` (expr mode)")
return SpliceNodes(lst)
assert syntax == "block"
# Block mode `a` always produces a `list` of the items in its body.
# Each item may refer, at run time, to a statement AST node or to a `list`
# of statement AST nodes.
#
# We flatten locally here to get rid of the sublists, so that all statement
# nodes injected by this invocation of block mode `a` become gathered into
# a single flat "master list".
#
# However, there's a piece of postprocessing we cannot do here: the splice
# of the master list into the surrounding context. For that, we mark the
# place for `splice_ast_literals`, which is the run-time part of the
# surrounding block mode `q` (which allows it to operate on the whole
# quoted tree).
#
# The splicer must splice only places marked by us, because lists occur
# in many places in a Python AST beside statement suites (e.g. `Assign`
# targets, the parameter list in a function definition, ...).
lst = _flatten_and_typecheck_iterable(tree, ast.stmt, "`a` (block mode)")
return SpliceNodes(lst)
def splice_ast_literals(tree, filename):
"""Splice list-valued `a` AST literals into the surrounding context. Run-time part of `q`."""
# We do this recursively to splice also at any inner levels of the quoted
# AST (e.g. `with a` inside an `if`).
def doit(thing):
if isinstance(thing, list):
newthing = []
for item in thing:
if isinstance(item, SpliceNodes):
doit(item.body)
# Discard the `SpliceNodes` marker and splice the `list` that was contained in it.
newthing.extend(item.body)
else:
doit(item)
newthing.append(item)
thing[:] = newthing
# As of Python 3.9, `Global` and `Nonlocal` are the only AST node types
# where a field contains a `list` of bare strings.
elif isinstance(thing, (ast.Global, ast.Nonlocal)):
pass
elif isinstance(thing, ast.AST):
for fieldname, value in ast.iter_fields(thing):
if isinstance(value, list):
doit(value)
else:
raise TypeError(f"Expected `list` or AST node, got {type(thing)} with value {repr(thing)}")
doit(tree)
try:
check_no_markers_remaining(tree, filename=filename, cls=SpliceNodes)
except MacroExpansionError:
err = RuntimeError("`q`: `SpliceNodes` markers remaining after expansion, likely a misplaced `a` unquote; did you mean `s[]` or `t[]`?")
# The list of remaining markers is not very useful, suppress it
# (but leave it available for introspection in the `__context__` attribute).
err.__suppress_context__ = True
raise err
return tree
def ast_list(nodes):
"""Interpolate an iterable of expression AST nodes as an `ast.List` node. Run-time part of `s[]`."""
lst = _flatten_and_typecheck_iterable(nodes, ast.expr, "`s[]`")
return ast.List(elts=lst)
def ast_tuple(nodes):
"""Interpolate an iterable of expression AST nodes as an `ast.Tuple` node. Run-time part of `t[]`."""
lst = _flatten_and_typecheck_iterable(nodes, ast.expr, "`t[]`")
return ast.Tuple(elts=lst)
def capture_value(value, name):
"""Hygienically capture a run-time value. Used by `h[]`.
`value`: A run-time value. Must be picklable.
`name`: For human-readability.
The return value is an AST that, when compiled and run, returns the
captured value (even in another Python process later).
"""
# If we didn't need to consider bytecode caching, we could just store the
# value in a dictionary (that lives at the top level of `mcpyrate.quotes`)
# that is populated at macro expansion time. Each unique value (by `id`)
# could be stored only once.
#
# But we want to support bytecode caching. To avoid introducing hard-to-find
# bugs into user code, we must provide consistent semantics, regardless of
# whether updating of the bytecode cache is actually enabled or not (see
# `sys.dont_write_bytecode`). So we must do the same thing regardless of
# whether the captured value is used in the current process, or in another
# Python process later.
#
# If the macro expansion result is to remain available for re-use from a
# `.pyc`, we must serialize and store the captured value to disk, so that
# values from "macro expansion time last week" are still available when the
# `.pyc` is loaded in another Python process later.
#
# Modules are macro-expanded independently (no global finalization for the
# whole codebase), and a `.pyc` may indeed later get loaded into some other
# codebase that imports the same module, so we can't make a centralized
# registry, like we could without bytecode caching.
#
# So really pretty much the only thing we can do reliably and simply is to
# store a fresh serialized copy of the value at the capture location in the
# source code, independently at each capture location.
#
# Putting these considerations together, we pickle the value, causing a copy
# and serialization.
#
frozen_value = pickle.dumps(value)
return ast.Call(_mcpyrate_quotes_attr("lookup_value", force_import=True),
[ast.Tuple(elts=[ast.Constant(value=name),
ast.Constant(value=frozen_value)])],
[])
_lookup_cache = {}
def lookup_value(key):
"""Look up a hygienically captured run-time value. Used by `h[]`.
Usually there's no need to call this function manually; `capture_value`
(and thus also `h[]`) will generate an AST that calls this automatically.
**NOTE**: For advanced macrology: if your own macros need to detect hygienic
captures using `is_captured_value`, and you want to look up the captured
value based on a key returned by that function, be aware that `lookup_value`
will only succeed if a value has been captured.
Trying to look up a key that was extracted from a pre-capture AST
raises `ValueError`. In terms of the discussion in the docstring of
`is_captured_value`, you need a `lookup_value` AST for a value to
be present; a `capture_value` AST is too early. The transition occurs
when the use site of `q` runs.
In that scenario, before you call `lookup_value` on your key, check that
`frozen_value is not None` (see docstring of `is_captured_value`);
that indicates that a value has been captured and can be decoded by
this function.
"""
name, frozen_value = key
# Trying to look up a result of `is_captured_value` that isn't captured yet.
if frozen_value is None:
raise ValueError(f"The given key does not (yet) point to a value: {repr(key)}")
cachekey = (name, id(frozen_value)) # id() so each capture instance behaves independently
if cachekey not in _lookup_cache:
_lookup_cache[cachekey] = pickle.loads(frozen_value)
return _lookup_cache[cachekey]
def capture_macro(macro, name):
"""Hygienically capture a macro. Used by `h[]`.
`macro`: A macro function. Must be picklable.
`name`: For human-readability. The recommended value is the name of
the macro, as it appeared in the bindings of the expander
it was captured from.
The name of the captured macro is automatically uniqified using
`gensym(name)`.
The return value is an AST that, when compiled and run, injects the macro
into the expander's global macro bindings table (even in another Python
process later), and then evaluates to the uniqified macro name as an
`ast.Name`.
"""
if not callable(macro):
raise TypeError(f"`macro` must be callable (a macro function), got {type(macro)} with value {repr(macro)}")
# Scrub any previous UUID suffix from the macro name. We'll get those when
# `unastify` uncompiles a hygienic macro capture, and then `astify`
# compiles the result again.
frozen_macro = pickle.dumps(macro)
name = scrub_uuid(name)
return ast.Call(_mcpyrate_quotes_attr("lookup_macro"),
[ast.Tuple(elts=[ast.Constant(value=name),
ast.Constant(value=gensym(name)),
ast.Constant(value=frozen_macro)])],
[])
def capture_as_macro(macro):
"""Hygienically capture a macro function as a macro, manually.
Like `capture_macro`, but with one less level of delay. This injects the
macro into the expander's global bindings table immediately, and returns
the uniqified `ast.Name` that can be used to refer to it hygienically,
using `a[]`.
The name is taken automatically from the name of the macro function.
"""
if not callable(macro):
raise TypeError(f"`macro` must be callable (a macro function), got {type(macro)} with value {repr(macro)}")
frozen_macro = pickle.dumps(macro)
name = macro.__name__
return lookup_macro((name, gensym(name), frozen_macro))
def lookup_macro(key):
"""Look up a hygienically captured macro. Used by `h[]`.
This injects the macro to the expander's global macro bindings table,
and then returns the macro name, as an `ast.Name`.
Usually there's no need to call this function manually; `capture_macro`
(and thus also `h[]`) will generate an AST that calls this automatically.
"""
name, unique_name, frozen_macro = key
if unique_name not in global_bindings:
global_bindings[unique_name] = pickle.loads(frozen_macro)
return ast.Name(id=unique_name)
# --------------------------------------------------------------------------------
# Advanced macrology support.
# TODO: In a future version, do we want to add an ASTMarker for captured values
# TODO: that are ready for consumption? We could save the actual AST (which is
# TODO: now detected directly) into the `body` attribute of the marker, and make
# TODO: the compiler delete `HygienicValue` markers (replacing each by its `.body`)
# TODO: as the last step before handing the AST over to Python.
def is_captured_value(tree):
"""Test whether `tree` is a hygienically captured run-time value.
This function is sometimes useful for advanced macrology. It facilitates
user-defined macros to work together in an environment where hygienic
captures are present. One macro, using quasiquotes, builds an AST, and
another macro analyzes the expanded AST later.
Consider first, however, if you can arrange things so that the second macro
could analyze an *unexpanded* AST; that's often much easier. When the first
macro simply must expand first (for whatever reason), that's where this function
comes in.
With this function, you can check (either by name or by value) whether some
`q[h[myfunction]]` points to the desired `"myfunction"`, so that e.g. the AST
produced by `q[h[myfunction](a0, ...)]` can be recognized as a call to your
`myfunction`. This allows your second macro to know it's `myfunction`,
so that it'll know how to interpret the args of that call.
Real-world examples of where this is useful are too unwieldy to explain
here, but can be found in `unpythonic.syntax`. Particularly, see any use
sites of the helper function `unpythonic.syntax.nameutil.isx`.
To detect a hygienically captured *macro*, use `is_captured_macro` instead.
Return value:
- On no match, return `False`.
- On match, return a tuple `(name, frozen_value)`, where:
- `name` (str) is the name of the captured identifier, or when the captured
value is from an arbitrary expression, the unparsed source code of that
expression. There is no name mangling for identifiers; it's the exact
original name that appeared in the source code.
- `frozen_value` is either a `bytes` object that stores the frozen value
as opaque binary data, or `None` if the value has not been captured yet.
The `bytes` object can be decoded by passing the whole return value as `key`
to `lookup_value`. That function will decode the data and return the actual
value, just as if the hygienic reference was decoded normally at run time.
**NOTE**:
Stages in the life of a hygienically captured *run-time value* in `mcpyrate`:
1. When the surrounding `q` expands, it first expands any unquotes nested
within it, but only those where the quote level hits zero. The `h[]` is
converted into a `Capture` AST marker; see the `h` operator for details.
2. Then, still while the surrounding `q` expands, `q` compiles quasiquote
markers. A `Capture` marker, in particular, compiles into a call to
the function `capture_value`. This is the output at macro expansion time
(of the use site of `q`).
3. When the use site of `q` reaches run time, the `capture_value` runs
(thus actually performing the capture), and replaces itself (in the
AST that was produced by `q`) with a call to the function `lookup_value`.
That `lookup_value` call is still an AST node.
4. In typical usage, that use site of `q` is inside the implementation
of some user-defined macro. When *that macro's use site* reaches run
time, the `lookup_value` runs (each time that expression is executed).
So in the macro expansion of `q`, we have a call to `capture_value`
representing the hygienically captured run-time value. But once the macro
that uses `q` has returned its output, then we instead have a call to
`lookup_value`. The latter is the most likely scenario for advanced
user-defined macros that work together.
"""
if type(tree) is not ast.Call:
return False
# The format is one of:
#
# - direct reference: `(mcpyrate.quotes).xxx`
# - reference by import: `(__import__("mcpyrate.quotes", ...).quotes).xxx`
#
# First check the `xxx` part:
callee = tree.func
if not (type(callee) is ast.Attribute and callee.attr in ("capture_value", "lookup_value")):
return False
# Then the rest:
if not _is_mcpyrate_quotes_reference(callee.value):
return False
# This AST destructuring and constant extraction must match the format
# of the argument lists produced by the quasiquote system for calls to
# `capture_value` and `lookup_value`.
if callee.attr == "capture_value": # the call is `capture_value(..., name)`
name_node = tree.args[1]
assert type(name_node) is ast.Constant and type(name_node.value) is str
return (name_node.value, None) # the value hasn't been captured yet
elif callee.attr == "lookup_value": # the call is `lookup_value(key)`
key_node = tree.args[0]
name_node, frozen_value_node = key_node.elts
assert type(name_node) is ast.Constant and type(name_node.value) is str
assert type(frozen_value_node) is ast.Constant and type(frozen_value_node.value) is bytes
return (name_node.value, frozen_value_node.value)
assert False # cannot happen
def is_captured_macro(tree):
"""Just like `is_captured_value`, but detect a hygienically captured macro instead.
To detect a hygienically captured *run-time value*, use `is_captured_value` instead.
Return value:
- On no match, return `False`.
- On match, return a tuple `(name, unique_name, frozen_macro)`, where:
- `name` (str) is the name of the macro, as it appeared in the bindings
of the expander instance it was captured from.
- `unique_name` (str) is `name` with an underscore and UUID appended,
to make it unique. This is the name the macro will be injected as
into the expander's global bindings table.
(By unique, we mean "universally unique anywhere for approximately
the next one thousand years"; see `mcpyrate.gensym`, which links to
the UUID spec used by the implementation.)
- `frozen_macro` is a `bytes` object that stores a reference to the
frozen macro function as opaque binary data.
The `bytes` object can be decoded by passing the whole return value as `key`
to `lookup_macro`. That function will decode the data, inject the macro into
the expander's global bindings table (if not already there), and give you an
`ast.Name` node whose `id` attribute contains the unique name (str), just as
if the hygienic reference was decoded normally at macro expansion time.
Then, once the injection has taken place, you can obtain the actual macro
function object by calling `expander.isbound(id)`.
**NOTE**:
Stages in the life of a hygienically captured *macro* in `mcpyrate` are as follows.
Note that unlike `capture_value`, a call to `capture_macro` never appears in the AST.
1. When the surrounding `q` expands, it first expands any unquotes nested
within it, but only those where the quote level hits zero. The `h[]` is
converted into a `Capture` AST marker; see the `h` operator for details.
2. Then, still while the surrounding `q` expands, `q` compiles quasiquote
markers. A `Capture` marker for a macro, in particular, triggers an
immediate call to the function `capture_macro`. The result is an AST
representing a call to the function `lookup_macro`. This gets injected
into the AST produced by `q`.
3. When the use site of `q` reaches run time, the `lookup_macro` runs,
injecting the macro (under its unique name) into the expander's global
bindings table. The `lookup_macro` call replaces itself with an `ast.Name`
whose `id` attribute contains the unique name of the macro.
4. In typical usage, that use site of `q` is inside the implementation
of some user-defined macro. Upon further macro expansion of *that macro's
use site*, the expander finds the now-bound unique name of the macro, and
proceeds to expand that macro.
So in the macro expansion of `q`, we have a call to `lookup_macro`
representing the hygienically captured macro. But this disappears after
a very brief window of time, namely when the use site of `q` reaches run
time. Thus, this function likely has much fewer use cases than
`is_captured_value`, but is provided for completeness.
(The point of hygienic macro capture is that a macro can safely return a further
macro invocation, and guarantee that this will invoke the intended macro - without
requiring the user to import that other macro, and without being forced to expand
it away before returning from the original macro.)
"""
if type(tree) is not ast.Call:
return False
callee = tree.func
if not (type(callee) is ast.Attribute and callee.attr == "lookup_macro"):
return False
if not _is_mcpyrate_quotes_reference(callee.value):
return False
# This AST destructuring and constant extraction must match the format
# of the argument lists produced by the quasiquote system for calls to
# `lookup_macro`.
key_node = tree.args[0] # the call is `lookup_macro(key)`
name_node, unique_name_node, frozen_macro_node = key_node.elts
assert type(name_node) is ast.Constant and type(name_node.value) is str
assert type(unique_name_node) is ast.Constant and type(unique_name_node.value) is str
assert type(frozen_macro_node) is ast.Constant and type(frozen_macro_node.value) is bytes
return (name_node.value, unique_name_node.value, frozen_macro_node.value)
def _is_mcpyrate_quotes_reference(tree):
"""Detect whether `tree` is a reference to `mcpyrate.quotes`.
This matches the ASTs corresponding to:
- direct reference: `mcpyrate.quotes`
- reference by import: `__import__("mcpyrate.quotes", ...).quotes`
Note `__import__` of a dotted module name returns the top-level module,
so we have the name `quotes` appear twice in different places.
See `_mcpyrate_quotes_attr` and `mcpyrate.coreutils._mcpyrate_attr`.
"""
if not (type(tree) is ast.Attribute and tree.attr == "quotes"):
return False
moduleref = tree.value
if type(moduleref) is ast.Name and moduleref.id == "mcpyrate":
return "direct" # ok, direct reference
elif (type(moduleref) is ast.Call and type(moduleref.func) is ast.Name and
moduleref.func.id == "__import__" and type(moduleref.args[0]) is ast.Constant and
moduleref.args[0].value == "mcpyrate.quotes"):
return "import" # ok, reference by import
else:
return False
# --------------------------------------------------------------------------------
# The quasiquote compiler and uncompiler.
def astify(x, expander=None): # like `macropy`'s `ast_repr`
"""Quasiquote compiler. Lift a value into its AST representation, if possible.
When the AST is compiled and run, it will evaluate to `x`.
Note the above implies that if `x` itself is an AST, then this produces
an AST that, when compiled and run, will generate the AST `x`. This is
the mechanism that `q` uses to produce the quoted AST.
If the input is a `list` of ASTs (e.g. body of block mode `q`), the return value
is a single `ast.List` node, with its `elts` taken from the input list
(after recursing into each element).
`expander` is a `BaseMacroExpander` instance, used for detecting macros
inside `Capture` markers. Macros can be hygienically captured only if
an `expander` is provided.
Raises `TypeError` if the lifting fails.
"""
def recurse(x): # second layer just to auto-pass `expander` by closure.
T = type(x)
# Compile the unquote commands.
#
# Minimally, `astify` must support `ASTLiteral`; the others could be
# implemented inside the unquote operators, as `ASTLiteral(ast.Call(...), "expr")`.
# But maybe this approach is cleaner.
if T is Unquote: # `u[]`
# We want to generate an AST that compiles to the *value* of `x.body`,
# evaluated at the use site of `q`. But when the `q` expands, it is
# too early. We must `astify` *at the use site* of `q`. So use an
# `ast.Call` to delay until run-time, and pass in `x.body` as-is.
return ast.Call(_mcpyrate_quotes_attr("astify"), [x.body], [])
elif T is LiftSourcecode: # `n[]`
# Delay the identifier lifting, so it runs at the use site of `q`,
# where the actual value of `x.body` becomes available.
return ast.Call(_mcpyrate_quotes_attr("lift_sourcecode"),
[x.body,
ast.Constant(value=x.filename)],
[])
elif T is ASTLiteral: # `a`
# Pass through this subtree as-is, but apply a run-time typecheck,
# as well as some special run-time handling for `list`s of AST nodes.
return ast.Call(_mcpyrate_quotes_attr("ast_literal"),
[x.body,
ast.Constant(value=x.syntax)],
[])
elif T is ASTList: # `s[]`
return ast.Call(_mcpyrate_quotes_attr("ast_list"), [x.body], [])
elif T is ASTTuple: # `t[]`
return ast.Call(_mcpyrate_quotes_attr("ast_tuple"), [x.body], [])
elif T is Capture: # `h[]`
if expander and type(x.body) is ast.Name:
function = expander.isbound(x.body.id)
if function:
# Hygienically capture a macro. We do this immediately,
# during the expansion of `q`, because the value we want to
# store, i.e. the macro function, is available only at
# macro-expansion time.
#
# This allows macros in scope at the use site of `q` to be
# hygienically propagated out to the use site of the macro
# that used `q`. So you can write macros that `q[h[macroname][...]]`,
# and `macroname` doesn't have to be macro-imported wherever
# that code gets spliced in.
return capture_macro(function, x.body.id)
# Hygienically capture a garden variety run-time value.
# At the use site of q[], this captures the value, and rewrites itself
# into an AST that represents a lookup. At the use site of the macro
# that used q[], that code runs, and looks up the captured value.
return ast.Call(_mcpyrate_quotes_attr("capture_value"),
[x.body,
ast.Constant(value=x.name)],
[])
# Builtin types. Mainly support for `u[]`, but also used by the
# general case for AST node fields that contain bare values.
elif T in (int, float, str, bytes, bool, type(None), type(...)):
return ast.Constant(value=x)
elif T is list:
return ast.List(elts=list(recurse(elt) for elt in x))
elif T is tuple:
return ast.Tuple(elts=list(recurse(elt) for elt in x))
elif T is dict:
return ast.Dict(keys=list(recurse(k) for k in x.keys()),
values=list(recurse(v) for v in x.values()))
elif T is set:
return ast.Set(elts=list(recurse(elt) for elt in x))
# We must support at least the `Done` AST marker, so that things like
# coverage dummy nodes and expanded name macros can be astified.
# (Note we support only exactly `Done`, not arbitrary descendants.)
elif T is Done:
fields = [ast.keyword(a, recurse(b)) for a, b in ast.iter_fields(x)]
# We have imported `Done`, so we can refer to it as `mcpyrate.quotes.Done`.
node = ast.Call(_mcpyrate_quotes_attr("Done"),
[],
fields)
return node
# General case.
elif isinstance(x, ast.AST):
# TODO: Add support for astifying general ASTMarkers?
# Otherwise the same as regular AST node, but need to refer to the
# module it is defined in, and we don't have everything in scope here.
if isinstance(x, ASTMarker):
raise TypeError(f"Cannot astify internal AST markers, got {unparse(x)}")
# The magic is in the Call. Take apart the input AST, and construct a
# new AST, that (when compiled and run) will re-generate the input AST.
#
# We refer to the stdlib `ast` module as `mcpyrate.quotes.ast` to avoid
# name conflicts at the use site of `q[]`.
fields = [ast.keyword(a, recurse(b)) for a, b in ast.iter_fields(x)]
node = ast.Call(ast.Attribute(value=_mcpyrate_quotes_attr("ast"),
attr=x.__class__.__name__),
[],
fields)
# Copy source location info for correct coverage reporting of a quoted block.
#
# The location info we fill in here is for the use site of `q`, which is
# typically inside a macro definition. Coverage for a quoted line of code
# means that the expansion of the quote contains input from that line.
# It says nothing about the run-time behavior of that code.
#
# Running the AST produced by the quote re-produces the input AST, which is
# indeed the whole point of quoting stuff. The AST is re-produced **without
# any source location info**. The fact that *this* location info is missing,
# on purpose, is the magic that allows the missing location fixer to fill
# the correct location info at the final use site, i.e. the use site of the
# macro that used `q`.
node = ast.copy_location(node, x)
return node
raise TypeError(f"Don't know how to astify {repr(x)}")
return recurse(x)
def unastify(tree):
"""Quasiquote uncompiler. Approximate inverse of `astify`.
`tree` must have been produced by `astify`. Otherwise raises `TypeError`.
This turns an "astified" AST, that represents code to construct a run-time
AST value, back into a direct AST. So in a sense, `unastify` is a top-level
unquote operator.
Note subtle difference in meaning to `u[]`. The `u[]` operator interpolates
a value from outside the quote context into the quoted representation - so
that the value actually becomes quoted! - whereas `unastify` inverts the
quote operation.
Note also that `astify` compiles unquote AST markers into ASTs for calls to
the run-time parts of the unquote operators. `unastify` uncompiles those
calls back into the corresponding AST markers. That's the best we can do;
the only context that has the user-provided names (where the unquoted data
comes from) in scope is each particular use site of `q`, at its run time.
The use case of `unastify` is to transform a quoted AST at macro expansion
time when the extra AST layer added by `astify` is still present. The
recipe is `unastify`, process just like any AST, then quote again.
(`expands` and `expand1s` in `mcpyrate.metatools` are examples of this.)
If you just want to macro-expand a quoted AST in the REPL, see the `expand`
family of macros. Prefer the `r` variants; they expand at run time, so
you'll get the final AST with the actual unquoted values spliced in.
"""
# CAUTION: in `unastify`, we implement only what we minimally need.
our_module_globals = globals()
def lookup_thing(dotted_name):
if not dotted_name.startswith("mcpyrate.quotes"):
raise NotImplementedError(f"Don't know how to look up {repr(dotted_name)}")
path = dotted_name.split(".")
if not all(component.isidentifier() for component in path):
raise NotImplementedError(f"Dotted name {repr(dotted_name)} contains at least one non-identifier component")
if len(path) < 3:
raise NotImplementedError(f"Dotted name {repr(dotted_name)} has fewer than two dots (expected 'mcpyrate.quotes.something')")
name_of_thing = path[2]
thing = our_module_globals[name_of_thing]
if len(path) > 3:
for attrname in path[3:]:
thing = getattr(thing, attrname)
return thing
T = type(tree)
if T is ast.Constant:
return tree.value
# Support machinery for `Call` AST node. This serendipitously supports also
# *args and **kwargs, because at least in Pythons 3.6, 3.7, 3.8, 3.9, 3.10
# those appear in `args` and `keywords`, and `Starred` needs no special support here.
elif T is list:
return [unastify(elt) for elt in tree]
elif T is ast.keyword:
return tree.arg, unastify(tree.value)
elif T is ast.List:
return [unastify(elt) for elt in tree.elts]
elif T is ast.Tuple:
return tuple(unastify(elt) for elt in tree.elts)
elif T is ast.Dict:
return {unastify(k): unastify(v) for k, v in zip(tree.keys, tree.values)}
elif T is ast.Set:
return {unastify(elt) for elt in tree.elts}
elif T is ast.Call:
dotted_name = unparse(tree.func)
# Drop the run-time part of `q`, if present. This is added by `q` itself,
# not `astify`, but `unastify` is usually applied to the output of `q`.
if dotted_name == "mcpyrate.quotes.splice_ast_literals": # `q[]`
body = tree.args[0]
return unastify(body)
# Even though the unquote operators compile into calls, `unastify`
# must not apply their run-time parts, because it's running in the
# wrong context. Those only work properly at run time, and they
# must run at the use site of `q`, where the user-provided names
# (where the unquoted data comes from) will be in scope.
#
# So we undo what `astify` did, converting the unquote calls back into
# the corresponding AST markers.
elif dotted_name == "mcpyrate.quotes.astify": # `u[]`
body = tree.args[0]
return Unquote(body)
elif dotted_name == "mcpyrate.quotes.lift_sourcecode": # `n[]`
body, filename = tree.args[0], tree.args[1].value
return LiftSourcecode(body, filename)
elif dotted_name == "mcpyrate.quotes.ast_literal": # `a[]`
body, syntax = tree.args[0], tree.args[1].value
return ASTLiteral(body, syntax)
elif dotted_name == "mcpyrate.quotes.ast_list": # `s[]`
body = tree.args[0]
return ASTList(body)
elif dotted_name == "mcpyrate.quotes.ast_tuple": # `t[]`
body = tree.args[0]
return ASTTuple(body)
elif dotted_name == "mcpyrate.quotes.capture_value": # `h[]` (run-time value)
body, name = tree.args[0], tree.args[1].value
return Capture(body, name)
elif dotted_name == "mcpyrate.quotes.lookup_macro": # `h[]` (macro)
# `capture_macro` is done and gone by the time we get here.
# `astify` has generated an `ast.Call` to `lookup_macro`.
#
# To make the this work properly even across process boundaries,
# we cannot simply run the `lookup_macro`. It injects the binding
# once, and then becomes an inert lexical name (pointing to that
# binding) - so that strategy only works inside the same process.
#
# We can't just leave the `lookup_macro` call in the AST, either,
# since that doesn't make any sense when the tree is later sent
# to `astify` to compile it again (we don't want another `ast.Call`
# layer around it).
#
# So we need something that triggers `capture_macro` when the
# result is astified again.
#
# Hence, we uncompile the `lookup_macro` into a `Capture` marker.
#
# But if the astified tree comes from an earlier run (in another
# Python process), the original macro name might not be in the
# expander's bindings any more.
#
# So we inject the captured macro into the expander's global
# bindings table now (by calling `lookup_macro`), and make the
# uncompiled capture command capture that macro.
#
# This does make the rather mild assumption that our input tree
# will be astified again in the same Python process, in order for
# the uncompiled capture to succeed when `astify` compiles it.
key = tree.args[0]
assert type(key) is ast.Tuple
assert all(type(elt) is ast.Constant for elt in key.elts)
name, unique_name, frozen_macro = [elt.value for elt in key.elts]
uniquename_node = lookup_macro((name, unique_name, frozen_macro))
return Capture(uniquename_node, name)
else:
# General case: an astified AST node.
callee = lookup_thing(dotted_name)
args = unastify(tree.args)
kwargs = {k: v for k, v in unastify(tree.keywords)}
node = callee(*args, **kwargs)
node = ast.copy_location(node, tree)
return node
raise TypeError(f"Don't know how to unastify {unparse_with_fallbacks(tree, debug=True, color=True)}")
# --------------------------------------------------------------------------------
# Quasiquote macros
#
# These operators are named after Qu'nasth, the goddess of quasiquotes in high-tech-elven mythology.
_quotelevel = NestingLevelTracker()
def _expand_quasiquotes(tree, expander):
"""Expand quasiquote macros only."""
# Use a second expander instance, with different bindings. Copy only the
# bindings of the quasiquote macros from the main `expander`, accounting
# for possible as-imports. This second expander won't even see other macros,
# thus leaving them alone.
bindings = extract_bindings(expander.bindings, q, u, n, a, s, t, h)
return MacroExpander(bindings, expander.filename).visit(tree)
# TODO: maybe make the rest of this a method of `MacroExpander`, and only wrap with `QuasiquoteSearchDone` here?
def _replace_tree_in_macro_invocation(invocation, newtree):
"""Helper function for handling nested quasiquotes.
Output a new invocation of the same macro, but wrapped in `QuasiquoteSearchDone`,
and with the `tree` inside replaced by `newtree`.
`expr` and `block` modes are supported; this is autodetected from `invocation`.
"""
new_invocation = copy.copy(invocation)
if type(new_invocation) is ast.Subscript:
if sys.version_info >= (3, 9, 0): # Python 3.9+: no ast.Index wrapper
new_invocation.slice = newtree
else:
new_invocation.slice = copy.copy(invocation.slice)
new_invocation.slice.value = newtree
elif type(new_invocation) is ast.With:
new_invocation.body = newtree
else:
raise NotImplementedError
return QuasiquoteSearchDone(body=new_invocation)
def q(tree, *, syntax, expander, invocation, **kw):
"""[syntax, expr/block] quasiquote. Lift code into its AST representation."""
if syntax not in ("expr", "block"):
raise SyntaxError("`q` is an expr and block macro only")
with _quotelevel.changed_by(+1):
tree = _expand_quasiquotes(tree, expander) # expand any unquotes corresponding to this level first
if _quotelevel.value > 1: # nested inside an outer quote?
# TODO: Implications when in block mode and not the only context manager in the `with`?
# TODO: Probably doesn't work in that case. Document that `q`, when used,
# TODO: should be the only ctxmgr in that particular `with`.
return _replace_tree_in_macro_invocation(invocation, tree)
tree = delete_markers(tree, cls=QuasiquoteSearchDone)
tree = astify(tree, expander) # Magic part of `q`. Supply `expander` for `h[macro]` detection.
# `astify` should compile the unquote command markers away, and `SpliceNodes`
# markers only spring into existence when the run-time part of `a` runs
# (for communication with the run-time part of the surrounding `q`).
# So at this point, there should be no quasiquote markers in `tree`.
try:
check_no_markers_remaining(tree, filename=expander.filename, cls=QuasiquoteMarker)
except MacroExpansionError as err:
raise RuntimeError("`q`: internal error in quasiquote system") from err
# `a` introduces the need to splice any interpolated `list`s of ASTs at
# run time into the surrounding context (which is only available to the
# surrounding `q`). Inject a handler for that.
#
# Block mode `a` always produces a `list` of statement AST nodes.
#
# For expression mode `a`, a `list` of expression AST nodes is valid
# e.g. in a function call argument position, to splice the list into