Skip to content

Commit

Permalink
refactor[venom]: make venom repr parseable (vyperlang#4402)
Browse files Browse the repository at this point in the history
make the `__repr__()` implementations for venom data structures
(`IRContext`, `IRFunction, `IRBasicBlock`) emit strings which will
round-trip through the parser.

for labels generated in the frontend which are not necessarily
valid identifiers (e.g. `"internal 5 foo()"`), these are represented as
escaped strings. the expedient way to implement this was to simply use
`json.loads` / `json.dumps`; there did not seem to be any convenient
stdlib or lark function to do this. since this adds grammar complexity,
the other method that was considered was to map all labels to valid
identifiers in `ir_node_to_venom.py`. but this approach seems easier
than cleaning up all the non-identifier labels generated by the
frontend; plus, being able to have arbitrary strings in labels seems
like it will come in handy during debugging some time.

a couple other grammar updates/fixes:
- update instruction order in the text format for `phi` and `invoke`
- ensure instructions are terminated with newline (otherwise they can
  continue slurping tokens from the next line).
- allow signed ints inside `CONST` nodes as `IRLiteral` accepts negative
  numbers

misc/refactor:
- remove a dead function (`str_short()`).
- remove a dead branch in `ir_node_to_venom.py`
- when optimization level is set to `CODESIZE`, the roundtrip test
  is set to xfail, as the data section contains bytestrings (which do
  not parse yet).
  • Loading branch information
charles-cooper authored Dec 19, 2024
1 parent f6030fb commit eee31e7
Show file tree
Hide file tree
Showing 8 changed files with 111 additions and 54 deletions.
36 changes: 36 additions & 0 deletions tests/functional/venom/test_venom_repr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import glob

import pytest

from tests.venom_utils import assert_ctx_eq, parse_venom
from vyper.compiler import compile_code
from vyper.compiler.settings import OptimizationLevel
from vyper.venom.context import IRContext

"""
Check that venom text format round-trips through parser
"""


def get_example_vy_filenames():
return glob.glob("**/*.vy", root_dir="examples/", recursive=True)


@pytest.mark.parametrize("vy_filename", get_example_vy_filenames())
def test_round_trip(vy_filename, optimize, request):
if optimize == OptimizationLevel.CODESIZE:
# codesize optimization issues things like `db b"\x12\x34"` which we
# don't handle.
request.node.add_marker(pytest.mark.xfail(strict=False, reason="unimplemented in parser"))

path = f"examples/{vy_filename}"
with open(path) as f:
vyper_source = f.read()

out = compile_code(vyper_source, output_formats=["bb_runtime"])
bb_runtime = out["bb_runtime"]
venom_code = IRContext.__repr__(bb_runtime)

ctx = parse_venom(venom_code)

assert_ctx_eq(bb_runtime, ctx)
1 change: 0 additions & 1 deletion tests/venom_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ def assert_fn_eq(fn1: IRFunction, fn2: IRFunction):


def assert_ctx_eq(ctx1: IRContext, ctx2: IRContext):
assert ctx1.last_label == ctx2.last_label
assert len(ctx1.functions) == len(ctx2.functions)
for label1, fn1 in ctx1.functions.items():
assert label1 in ctx2.functions
Expand Down
48 changes: 22 additions & 26 deletions vyper/venom/basicblock.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import json
import re
from typing import TYPE_CHECKING, Any, Iterator, Optional, Union

import vyper.venom.effects as effects
Expand Down Expand Up @@ -105,7 +107,7 @@ def __init__(self, line_no: int, src: str) -> None:

def __repr__(self) -> str:
src = self.src if self.src else ""
return f"\t# line {self.line_no}: {src}".expandtabs(20)
return f"\t; line {self.line_no}: {src}".expandtabs(20)


class IROperand:
Expand Down Expand Up @@ -189,10 +191,19 @@ class IRLabel(IROperand):
value: str

def __init__(self, value: str, is_symbol: bool = False) -> None:
assert isinstance(value, str), "value must be an str"
assert isinstance(value, str), f"not a str: {value} ({type(value)})"
assert len(value) > 0
super().__init__(value)
self.is_symbol = is_symbol

_IS_IDENTIFIER = re.compile("[0-9a-zA-Z_]*")

def __repr__(self):
if self.__class__._IS_IDENTIFIER.fullmatch(self.value):
return self.value

return json.dumps(self.value) # escape it


class IRInstruction:
"""
Expand Down Expand Up @@ -366,35 +377,22 @@ def get_ast_source(self) -> Optional[IRnode]:
return inst.ast_source
return self.parent.parent.ast_source

def str_short(self) -> str:
s = ""
if self.output:
s += f"{self.output} = "
opcode = f"{self.opcode} " if self.opcode != "store" else ""
s += opcode
operands = self.operands
if opcode not in ["jmp", "jnz", "invoke"]:
operands = list(reversed(operands))
s += ", ".join(
[(f"label %{op}" if isinstance(op, IRLabel) else str(op)) for op in operands]
)
return s

def __repr__(self) -> str:
s = ""
if self.output:
s += f"{self.output} = "
opcode = f"{self.opcode} " if self.opcode != "store" else ""
s += opcode
operands = self.operands
if opcode not in ("jmp", "jnz", "invoke"):
if self.opcode == "invoke":
operands = [operands[0]] + list(reversed(operands[1:]))
elif self.opcode not in ("jmp", "jnz", "phi"):
operands = reversed(operands) # type: ignore
s += ", ".join(
[(f"label %{op}" if isinstance(op, IRLabel) else str(op)) for op in operands]
)

s += ", ".join([(f"@{op}" if isinstance(op, IRLabel) else str(op)) for op in operands])

if self.annotation:
s += f" <{self.annotation}>"
s += f" ; {self.annotation}"

return f"{s: <30}"

Expand Down Expand Up @@ -659,10 +657,8 @@ def copy(self):
return bb

def __repr__(self) -> str:
s = (
f"{repr(self.label)}: IN={[bb.label for bb in self.cfg_in]}"
f" OUT={[bb.label for bb in self.cfg_out]} => {self.out_vars}\n"
)
s = f"{self.label}: ; IN={[bb.label for bb in self.cfg_in]}"
s += f" OUT={[bb.label for bb in self.cfg_out]} => {self.out_vars}\n"
for instruction in self.instructions:
s += f" {str(instruction).strip()}\n"
s += f" {str(instruction).strip()}\n"
return s
6 changes: 3 additions & 3 deletions vyper/venom/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,14 +62,14 @@ def as_graph(self) -> str:
return "\n".join(s)

def __repr__(self) -> str:
s = ["IRContext:"]
s = []
for fn in self.functions.values():
s.append(fn.__repr__())
s.append("\n")

if len(self.data_segment) > 0:
s.append("\nData segment:")
s.append("\n[data]")
for inst in self.data_segment:
s.append(f"{inst}")
s.append(f" {inst}")

return "\n".join(s)
11 changes: 7 additions & 4 deletions vyper/venom/function.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import textwrap
from typing import Iterator, Optional

from vyper.codegen.ir_node import IRnode
Expand Down Expand Up @@ -41,7 +42,7 @@ def append_basic_block(self, bb: IRBasicBlock):
Append basic block to function.
"""
assert isinstance(bb, IRBasicBlock), bb
assert bb.label.name not in self._basic_block_dict
assert bb.label.name not in self._basic_block_dict, bb.label
self._basic_block_dict[bb.label.name] = bb

def remove_basic_block(self, bb: IRBasicBlock):
Expand Down Expand Up @@ -222,7 +223,9 @@ def _make_label(bb):
return "\n".join(ret)

def __repr__(self) -> str:
str = f"IRFunction: {self.name}\n"
ret = f"function {self.name} {{\n"
for bb in self.get_basic_blocks():
str += f"{bb}\n"
return str.strip()
bb_str = textwrap.indent(str(bb), " ")
ret += f"{bb_str}\n"
ret = ret.strip() + "\n}"
return ret.strip()
5 changes: 1 addition & 4 deletions vyper/venom/ir_node_to_venom.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,10 +369,7 @@ def _convert_ir_bb(fn, ir, symbols):
label = IRLabel(ir.args[0].value)
ctx.append_data("dbname", [label])
for c in ir.args[1:]:
if isinstance(c, int):
assert 0 <= c <= 255, "data with invalid size"
ctx.append_data("db", [c]) # type: ignore
elif isinstance(c.value, bytes):
if isinstance(c.value, bytes):
ctx.append_data("db", [c.value]) # type: ignore
elif isinstance(c, IRnode):
data = _convert_ir_bb(fn, c, symbols)
Expand Down
52 changes: 39 additions & 13 deletions vyper/venom/parser.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import json

from lark import Lark, Transformer

from vyper.venom.basicblock import (
Expand All @@ -11,13 +13,14 @@
from vyper.venom.context import IRContext
from vyper.venom.function import IRFunction

VENOM_PARSER = Lark(
"""
VENOM_GRAMMAR = """
%import common.CNAME
%import common.DIGIT
%import common.LETTER
%import common.WS
%import common.INT
%import common.SIGNED_INT
%import common.ESCAPED_STRING
# Allow multiple comment styles
COMMENT: ";" /[^\\n]*/ | "//" /[^\\n]*/ | "#" /[^\\n]*/
Expand All @@ -26,13 +29,13 @@
# TODO: consider making entry block implicit, e.g.
# `"{" instruction+ block* "}"`
function: "function" NAME "{" block* "}"
function: "function" LABEL_IDENT "{" block* "}"
data_section: "[data]" instruction*
block: NAME ":" statement*
block: LABEL_IDENT ":" "\\n" statement*
statement: instruction | assignment
statement: (instruction | assignment) "\\n"
assignment: VAR_IDENT "=" expr
expr: instruction | operand
instruction: OPCODE operands_list?
Expand All @@ -41,16 +44,22 @@
operand: VAR_IDENT | CONST | LABEL
CONST: INT
CONST: SIGNED_INT
OPCODE: CNAME
VAR_IDENT: "%" NAME
LABEL: "@" NAME
VAR_IDENT: "%" (DIGIT|LETTER|"_"|":")+
# handy for identifier to be an escaped string sometimes
# (especially for machine-generated labels)
LABEL_IDENT: (NAME | ESCAPED_STRING)
LABEL: "@" LABEL_IDENT
NAME: (DIGIT|LETTER|"_")+
%ignore WS
%ignore COMMENT
"""
)

VENOM_PARSER = Lark(VENOM_GRAMMAR)


def _set_last_var(fn: IRFunction):
Expand Down Expand Up @@ -83,6 +92,15 @@ def _ensure_terminated(bb):
# TODO: raise error if still not terminated.


def _unescape(s: str):
"""
Unescape the escaped string. This is the inverse of `IRLabel.__repr__()`.
"""
if s.startswith('"'):
return json.loads(s)
return s


class _DataSegment:
def __init__(self, instructions):
self.instructions = instructions
Expand All @@ -100,7 +118,7 @@ def start(self, children) -> IRContext:
fn._basic_block_dict.clear()

for block_name, instructions in blocks:
bb = IRBasicBlock(IRLabel(block_name), fn)
bb = IRBasicBlock(IRLabel(block_name, True), fn)
fn.append_basic_block(bb)

for instruction in instructions:
Expand Down Expand Up @@ -152,8 +170,12 @@ def instruction(self, children) -> IRInstruction:

# reverse operands, venom internally represents top of stack
# as rightmost operand
if opcode not in ("jmp", "jnz", "invoke", "phi"):
# special cases: operands with labels look better un-reversed
if opcode == "invoke":
# reverse stack arguments but not label arg
# invoke <target> <stack arguments>
operands = [operands[0]] + list(reversed(operands[1:]))
# special cases: operands with labels look better un-reversed
elif opcode not in ("jmp", "jnz", "phi"):
operands.reverse()
return IRInstruction(opcode, operands)

Expand All @@ -166,8 +188,12 @@ def operand(self, children) -> IROperand:
def OPCODE(self, token):
return token.value

def LABEL_IDENT(self, label) -> str:
return _unescape(label)

def LABEL(self, label) -> IRLabel:
return IRLabel(label[1:])
label = _unescape(label[1:])
return IRLabel(label, True)

def VAR_IDENT(self, var_ident) -> IRVariable:
return IRVariable(var_ident[1:])
Expand Down
6 changes: 3 additions & 3 deletions vyper/venom/venom_to_assembly.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,8 +185,8 @@ def generate_evm(self, no_optimize: bool = False) -> list[str]:
data_segments: dict = dict()
for inst in ctx.data_segment:
if inst.opcode == "dbname":
label = inst.operands[0].value
data_segments[label] = [DataHeader(f"_sym_{label}")]
label = inst.operands[0]
data_segments[label] = [DataHeader(f"_sym_{label.value}")]
elif inst.opcode == "db":
data = inst.operands[0]
if isinstance(data, IRLabel):
Expand Down Expand Up @@ -293,7 +293,7 @@ def _generate_evm_for_basicblock_r(
asm = []

# assembly entry point into the block
asm.append(f"_sym_{basicblock.label}")
asm.append(f"_sym_{basicblock.label.value}")
asm.append("JUMPDEST")

if len(basicblock.cfg_in) == 1:
Expand Down

0 comments on commit eee31e7

Please sign in to comment.