From 417d8b5b70e87f442623140db1423f0d7cc243a3 Mon Sep 17 00:00:00 2001 From: Andrew Byers Date: Sun, 28 Apr 2024 18:25:19 -0500 Subject: [PATCH] =?UTF-8?q?Adds=20basic=20parametric=20polymorphism=20for?= =?UTF-8?q?=20functions=20and=20structures=20At=20this=20point,=20variable?= =?UTF-8?q?s=20with=20generic=20types=20must=20be=20treated=20completely?= =?UTF-8?q?=20generically.=20This=20will=20allow=20abstract=20data=20types?= =?UTF-8?q?,=20like=20'Vec'=20and=20'Map',=20but=20doesn't=20allow=20creat?= =?UTF-8?q?ing,=20for=20example,=20a=20polymorphic=20function=20that=20sum?= =?UTF-8?q?s=20a=20list=20of=20numbers=20(either=20'int'=20or=20'float').?= =?UTF-8?q?=20That=20behavior=20will=20be=20implemented=20later,=20when=20?= =?UTF-8?q?the=20codebase=20becomes=20stable.=20Also=20using=20the=20Boehm?= =?UTF-8?q?=E2=80=93Demers=E2=80=93Weiser=20garbage=20collector=20for=20no?= =?UTF-8?q?w=20(using=20FetchContent=20from=20an=20unofficial=20git=20repo?= =?UTF-8?q?).=20At=20some=20point,=20paw=20will=20get=20its=20own=20garbag?= =?UTF-8?q?e=20collector.=20At=20first,=20it=20will=20be=20a=20simple=20tr?= =?UTF-8?q?acing=20GC,=20like=20the=20one=20we=20had=20before=20static=20t?= =?UTF-8?q?yping=20was=20added,=20except=20that=20it=20needs=20to=20coordi?= =?UTF-8?q?nate=20with=20a=20custom=20allocator=20so=20it=20can=20find=20p?= =?UTF-8?q?ointers=20(or=20maintain=20a=20lookup=20table).?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 16 +- GRAMMER.md | 148 +++ README.md | 176 ++-- src/api.c | 60 +- src/api.h | 5 +- src/array.c | 2 +- src/ast.c | 1507 +++++++++++++++++++++++++++++ src/ast.h | 642 +++++++++++++ src/bigint.c | 2 +- src/call.c | 4 +- src/check.c | 1940 ++++++++++++++++++++++++-------------- src/code.c | 463 ++------- src/code.h | 421 +-------- src/codegen.c | 1231 +++++++++++++++--------- src/ctx.h | 0 src/debug.c | 210 ++++- src/debug.h | 3 +- src/env.c | 4 +- src/env.h | 76 +- src/{gc.c => gc_aux.c} | 44 +- src/{gc.h => gc_aux.h} | 0 src/lex.c | 369 ++++---- src/lex.h | 17 +- src/lib.c | 359 ++++--- src/lib.h | 20 +- src/map.c | 4 +- src/map.h | 31 +- src/mem.c | 2 +- src/mem.h | 3 +- src/opcode.h | 47 +- src/parse.c | 1335 ++++++++++++-------------- src/parse.h | 182 +++- src/paw.h | 9 +- src/rt.c | 145 ++- src/str.c | 2 +- src/type.c | 270 +----- src/type.h | 165 ++-- src/unify.c | 298 ++++++ src/value.c | 107 ++- src/value.h | 68 +- test/scripts/basic.paw | 262 ++--- test/scripts/block.paw | 2 +- test/scripts/closure.paw | 14 +- test/scripts/loop.paw | 912 +++++++++--------- test/scripts/string.paw | 111 +-- test/scripts/types.paw | 923 +++++++++++++++++- test/test.c | 21 +- test/test_error.c | 170 ++-- test/test_rt.c | 6 +- 49 files changed, 8234 insertions(+), 4574 deletions(-) create mode 100644 GRAMMER.md create mode 100644 src/ast.c create mode 100644 src/ast.h create mode 100644 src/ctx.h rename src/{gc.c => gc_aux.c} (94%) rename src/{gc.h => gc_aux.h} (100%) create mode 100644 src/unify.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 769ba63..6eb397a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -31,11 +31,18 @@ else() INTERFACE m) endif() +include(FetchContent) +FetchContent_Declare(gc + GIT_REPOSITORY https://github.com/ivmai/bdwgc.git + GIT_TAG v8.2.6) +FetchContent_MakeAvailable(gc) + set(PAW_SOURCE_DIR ${PROJECT_SOURCE_DIR}/src) add_library(paw STATIC) target_sources(paw PUBLIC ${PAW_SOURCE_DIR}/api.h + ${PAW_SOURCE_DIR}/ast.h ${PAW_SOURCE_DIR}/auxlib.h ${PAW_SOURCE_DIR}/array.h ${PAW_SOURCE_DIR}/bigint.h @@ -43,7 +50,7 @@ target_sources(paw ${PAW_SOURCE_DIR}/code.h ${PAW_SOURCE_DIR}/debug.h ${PAW_SOURCE_DIR}/env.h - ${PAW_SOURCE_DIR}/gc.h + ${PAW_SOURCE_DIR}/gc_aux.h ${PAW_SOURCE_DIR}/lex.h ${PAW_SOURCE_DIR}/lib.h ${PAW_SOURCE_DIR}/map.h @@ -59,6 +66,7 @@ target_sources(paw ${PAW_SOURCE_DIR}/util.h ${PAW_SOURCE_DIR}/value.h PRIVATE ${PAW_SOURCE_DIR}/api.c + ${PAW_SOURCE_DIR}/ast.c ${PAW_SOURCE_DIR}/auxlib.c ${PAW_SOURCE_DIR}/array.c ${PAW_SOURCE_DIR}/bigint.c @@ -68,7 +76,7 @@ target_sources(paw ${PAW_SOURCE_DIR}/codegen.c ${PAW_SOURCE_DIR}/debug.c ${PAW_SOURCE_DIR}/env.c - ${PAW_SOURCE_DIR}/gc.c + ${PAW_SOURCE_DIR}/gc_aux.c #${PAW_SOURCE_DIR}/iolib.c ${PAW_SOURCE_DIR}/lex.c ${PAW_SOURCE_DIR}/lib.c @@ -82,10 +90,12 @@ target_sources(paw ${PAW_SOURCE_DIR}/rt.c ${PAW_SOURCE_DIR}/str.c ${PAW_SOURCE_DIR}/type.c + ${PAW_SOURCE_DIR}/unify.c ${PAW_SOURCE_DIR}/util.c ${PAW_SOURCE_DIR}/value.c) target_link_libraries(paw - PRIVATE paw_context) + PRIVATE paw_context + PUBLIC gc) target_include_directories(paw PUBLIC ${PAW_SOURCE_DIR}) target_compile_definitions(paw diff --git a/GRAMMER.md b/GRAMMER.md new file mode 100644 index 0000000..bc1ea95 --- /dev/null +++ b/GRAMMER.md @@ -0,0 +1,148 @@ +# Paw language grammer (EBNF) +**TODO: get rid of requirement that "break" | "return" | "continue" is the last statement in the block** +** just don't emit unreachable code** +** use a tool to validate this...** + +## Statements +``` +Stmt ::= ExprStmt | WhileLoop | DoWhileLoop | + ForLoop | IfElse | Declaration | + Block . +Chunk ::= {Stmt [";"]} [LastStmt [";"]] . +Block ::= "{" Chunk "}" . +LastStmt ::= "return" [Expr] | "continue" | "break" . +ExprStmt ::= Operand "=" Expr | Call | Match . +``` + +### Control flow +``` +IfElse ::= "if" Expr Block [{"else" IfElse} | "else" Block] . +WhileLoop ::= "while" Expr Block . +DoWhileLoop ::= "do" Block "while" Expr . +ForLoop ::= ForIn | ForNum . +ForIn ::= "for" name "in" Expr Block . +ForNum ::= "for" name "=" Expr "," Expr ["," Expr] Block . +Match ::= "match" Expr MatchBody . +MatchBody ::= "{" {MatchArm ","} MatchArm "}" . +MatchClause ::= Expr "=>" MatchArm . +MatchArm ::= Expr | Block . +``` + +## Declarations +``` +Declaration ::= VarDecl | FunctionDecl | + ClassDecl | EnumDecl | TypeDecl . +VarDecl ::= "let" name [":" Type] "=" Expr . +TypeDecl ::= "type" name [TypeParam] "=" Type . +TypeParam ::= "[" {name ","} name "]" . +``` + +### Functions +``` +FunctionDecl ::= "fn" Function . +Function ::= name [TypeParam] FuncType Block . +FuncType ::= "(" [{Field ","} Field] ")" ["->" Type] . +Field ::= name ":" Type . +``` + +### Classes +``` +ClassDecl ::= "class" ClassType . +ClassType ::= name [TypeParam] ClassBody . +ClassBody ::= "{" {Attribute [";"]} "}" . +Attribute ::= Method | Field . +Method ::= ["static"] Function . +``` + +### Enumerators +``` +EnumDecl ::= "enum" name EnumBody . +EnumBody ::= "{" [{Variant ","} Variant] "}" . +Variant ::= name [Payload] . +Payload ::= "(" {Type ","} Type ")" . +``` + +## Operators +``` +BinOp ::= "+" | "-" | "*" | "/" | + "%" | "&" | "^" | "|" | + "<" | "<=" | ">" | ">=" | + "==" | "!=" | "&&" | "||" . +UnOp ::= "-" | "~" | "!" | "#" . +``` + +## Expressions +``` +Expr ::= PrimaryExpr | Expr BinOp Expr | UnOp Expr . +PrimaryExpr ::= Operand | Call | Literal | "(" Expr ")" . +Call ::= PrimaryExpr "(" [ExprList] ")" . +Operand ::= name | Index | Selector . +Index ::= PrimaryExpr "[" ExprList "]" . +Selector ::= PrimaryExpr "." name . +ExprList ::= {Expr ","} Expr . +``` + +## Types +``` +Type ::= name [TypeArgs] | TypeLit . +TypeLit ::= FuncType | ArrayType | TupleType . +FuncType ::= "fn" "(" [TypeList] ")" ["->" Type] . +TypeList ::= {Type ","} Type +TypeArgs ::= "[" TypeList "]" . +NamedType ::= name [TypeArgs] . +ArrayType ::= "[" Type ";" int_lit "]" . +TupleType ::= "(" [Type "," [Type]] ")" . +``` + +## Operands +``` +Operand ::= Literal | name [TypeArgs] . +Literal ::= BasicLit | CompositeLit . +BasicLit ::= int_lit | bool_lit | float_lit | string_lit . +``` + +### Composite literals +Note that the unit type is just a 0-tuple (an tuple with 0 elements). +A 1-tuple must have a trailing `,` to distinguish it from a parenthesized expression. +``` +CompositeLit ::= ClassLit | ArrayLit | TupleLit | VariantLit . +ClassLit ::= NamedType "{" [ItemList [","]] "}" . +ArrayLit ::= "[" [ExprList [","]] "]" . +TupleLit ::= "(" [Expr "," [Expr [","]]] ")" . +VariantLit ::= name ["(" {Expr ","} Expr ")"] . +ItemList ::= KeyedItem {"," KeyedItem} [","] . +KeyedItem ::= [Key ":"] Expr . +Key ::= name | Expr . +``` + +### Integer literals +``` +int_lit ::= decimal_lit | binary_lit | octal_lit | hex_lit . +decimal_lit ::= "0" | ("1" … "9") [decimal_digits] . +binary_lit ::= "0" ("b" | "B") binary_digits . +octal_lit ::= "0" ("o" | "O") octal_digits . +hex_lit ::= "0" ("x" | "X") hex_digits . +``` + +### Float literals +``` +float_lit := decimal_digits "." [decimal_digits] [decimal_exponent] | + decimal_digits decimal_exponent | + "." decimal_digits [decimal_exponent] . +decimal_exponent := ("e" | "E") ["+" | "-"] decimal_digits . +``` + +## Miscellaneous +``` +name ::= letter {letter | decimal_digit} . +letter ::= "A" … "Z" | "a" … "z" | "_" . +decimal_digit ::= "0" … "9" . +binary_digit ::= "0" | "1" . +octal_digit ::= "0" … "7" . +hex_digit ::= "0" … "9" | "A" … "F" | "a" … "f" . +decimal_digits ::= decimal_digit {decimal_digit} . +binary_digits ::= binary_digit {binary_digit} . +octal_digits ::= octal_digit {octal_digit} . +hex_digits ::= hex_digit {hex_digit} . +``` + diff --git a/README.md b/README.md index 4f2a84e..985dbcf 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,13 @@ # paw +> NOTE: This branch is being used to implement static typing and templates. +> It is likely to be very broken for a while. +> So far, the compiler has been rewritten to work in multiple passes. +> Pass 1 constructs an AST (lexical + syntax analysis). +> Pass 2 is semantic analysis, where types are checked and symbol tables built. +> Template are also intantiated during pass 2. +> Pass 3 generates code (backend). + An unobtrusive scripting language paw is a high-level, imperative, statically-typed programming language intended for embedding into larger projects. @@ -25,8 +33,8 @@ Local variables can be shadowed and 'rebound', but globals cannot. A global can be shadowed by a local, however. Locals can also be captured in a function or method body (see [functions](#functions)). ``` -// short for 'let x: int = null' -let x: int +// initializer (' = 0') is required +let x: int = 0 // rebind 'x' to a float (type is inferred from RHS) let x = 6.63e-34 @@ -42,28 +50,25 @@ paw supports type inference on variable definitions. The following example demonstrates creation of the basic value types. ``` -// variables without an initializer (right-hand side) are set to the default -// value for the type given in the annotation -let b: bool -let i: int - // initializer is validated against the type annotation +let b: bool = true +let i: int = 123 let f: float = 10.0e-1 -let a: [int] = [1, 2, 3] -let m: string[int] = {'a': 1, 'b': 2} -let f: (): int = fn(): int {return 42} +let v: Vec[int] = Vec[int] {1, 2, 3} +let m: Map[string, int] = Map[string, int] {'a': 1, 'b': 2} +let f: fn() -> int = some_function // supports type inference let b = false let i = 40 + 2 let f = 1.0 * 2 -let a = ['a', 'b', 'c'] -let m = {1: 1, 2: 2} -let f = fn(): float {return 42.0} +let a = Vec {'a', 'b', 'c'} +let m = Map {1: 1, 2: 2} +let f = some_other_function class Class { value: int - times2(a: int): int { + times2(a: int) -> int { return a * 2 } } @@ -73,9 +78,9 @@ let method = instance.method // Class.method(int): int ### Scope paw implements lexical scoping, meaning variables declared in a given block can only be referenced from within that block, or one of its subblocks. -A block begins when a '{' token is encountered that is not the start of a map literal, and ends when a matching '}' is found. +A block begins when a '{' token is encountered, and ends when a matching '}' is found. Many language constructs use blocks to create their own scope, like functions, classes, for loops, etc. -paw also provides raw blocks for exerting finer control over variable lifetimes. +Explicit scoping blocks are also supported. ``` { let x = 42 @@ -86,18 +91,13 @@ paw also provides raw blocks for exerting finer control over variable lifetimes. Functions are first-class in paw, which means they are treated like any other paw value. Functions can be stored in variables, or passed as parameters to compose higher-order functions. ``` -fn fib(n: int) { +fn fib(n: int) -> int { if n < 2 { return n } return fib(n - 2) + fib(n - 1) } fib(10) - -// Anonymous functions: -let add = fn(a: str, b: str) { - return a + b -} ``` ### Classes @@ -286,19 +286,62 @@ let a = s.split(',') assert(s == ','.join(a)) ``` -### Arrays +### Generics +``` +// function template +fn fib[T](n: T) -> T { + if n < 2 { + return n + } + return fib(n - 2) + fib(n - 1) +} + +fib[int](10) + +// A template has no value representation. 'func' must be explicitly +// instantiated before it is stored in a variable (there are no +// arguments from which to infer the type parameters). +let fib_i = fib[int] +fib_i(10) + +fib(10) // infer T = int + +// class template +class Cls[S, T] { + a: S + b: T + f(s: S, t: T) -> T { + self.a = self.a + s + return self.b + t + } + // method template + g[U](u: U) -> U { + return u + } +} + +let c = Cls { + a: 123, // infer S = int + b: 'abc', // infer T = string +} +let g_i = c.g[int] +g_i(42) +c.g(123) +``` + +### Vectors +TODO: implement as class template Vec[T] ``` -// inferred as array -let a = [1, 2, 3] -assert(a[:1] == [1]) -assert(a[1:-1] == [2]) -assert(a[-1:] == [3]) +let a = Vec {1, 2, 3} // infer T = int +assert(a[:1] == Vec {1}) +assert(a[1:-1] == Vec {2}) +assert(a[-1:] == Vec {3}) ``` ### Maps +TODO: implement as class template Map[K, V] ``` -// inferred as map -let m = {1: 'a', 2: 'b'} +let m = Map {1: 'a', 2: 'b'} // infer K = int, V = string m[3] = 42 m.erase(1) @@ -308,7 +351,7 @@ print(m.get(1, 'default')) ### Error handling ``` -fn divide_by_0(n: int): int { +fn divide_by_0(n: int) -> int { return n / 0 } let status = try(divide_by_0, 42) @@ -317,37 +360,50 @@ assert(status != 0) ## Operators -|Precedence|Operator |Description |Associativity| -|:---------|:-------------|:-----------------------------------------|:------------| -|16 |`() [] . ?` |Call, Subscript, Member access, Null chain|Left | -|15 |`! - ~` |Not, Negate, Bitwise not |Right | -|14 |`* / // %` |Multiply, Divide, Integer divide, Modulus |Left | -|13 |`+ -` |Add, Subtract |Left | -|12 |`++` |Concatenate |Left | -|11 |`<< >>` |Shift left, Shift right |Left | -|10 |`&` |Bitwise and |Left | -|9 |`^` |Bitwise xor |Left | -|8 |||Bitwise or |Left | -|7 |`in < <= > >=`|Inclusion, Relational comparisons |Left | -|6 |`== !=` |Equality comparisons |Left | -|5 |`&&` |And |Left | -|4 ||||Or |Left | -|3 |`?:` |Null coalesce |Left | -|2 |`??::` |Conditional |Right | -|1 |`=` |Assignment |Right | +|Precedence|Operator |Description |Associativity| +|:---------|:-------------|:---------------------------------------------|:------------| +|16 |`() [] . ?` |Call, Subscript, Member access, Question mark |Left | +|15 |`! - ~` |Not, Negate, Bitwise not |Right | +|14 |`* / %` |Multiply, Divide, Modulus |Left | +|13 |`+ -` |Add, Subtract |Left | +|12 |`++` |Concatenate |Left | +|11 |`<< >>` |Shift left, Shift right |Left | +|10 |`&` |Bitwise and |Left | +|9 |`^` |Bitwise xor |Left | +|8 |||Bitwise or |Left | +|7 |`in < <= > >=`|Inclusion, Relational comparisons |Left | +|6 |`== !=` |Equality comparisons |Left | +|5 |`&&` |And |Left | +|4 ||||Or |Left | +|3 |`?:` |Coalesce |Left | +|2 |`??::` |Conditional |Right | +|1 |`=` |Assignment |Right | ## TODO -+ Add a few things to the C API: - + Better way to call builtin functions and methods on builtin types - + Better API for arrays: `paw_*_item` will throw an error if the index is out of bounds -+ For loops won't work with bigint right now. -+ Finish designing things first... - + Language features: - + `**` (pow) operator - + Slicing syntax - + Spread operator, used in call expressions, assignments/let statements, and array literals - + Multi-return/let/assign with Lua semantics? - + Concurrency: fibers, coroutines? ++ Known defects that need to be fixed! + + The compiler has difficulty distinguishing between instances of a `struct` and the `struct` itself + + Code like `let a = B.c` may cause problems ++ Test bad syntax, and get the fuzzer to work again ++ Write a new garbage collector... + + First, get the project to work with a third party GC + + I feel like it will be painful to try and maintain bookkeeping info about what `Value`s are pointers + + Doing so would be required if we want the GC to know what objects it should try to collect + + The other option, and one I feel would be more feasible, would be to keep track of what regions of memory are in use. + + When the GC is scanning, it can check to see if a value looks like a pointer (a numeric value that seemingly references some place in an allocated block) + + If so, that block can be marked (this allows false positives, but never false negatives, and false positives should only cause memory to not be freed when it actually can be) + + This seems to be what general-purpose GCs do, and also some GCs for statically-typed languages, like Go + + Seems to assume that heap allocations come from the high end of the address space, which is usually the case, but is definitely not guaranteed ++ Error on missing return type ++ Implement the rest of the builtin types: tuple and enum (maybe array), and the builtin classes: Vector, etc. ++ When values are guaranteed to stay within the paw runtime (not exposed to C), we can elide some allocations by reserving more than 1 stack slot + + For example, a tuple `(int, float)` can just be 2 slots, 1 for an `int` and the other for a `float` + + The compiler builds an array of locals as it performs the codegen pass, and each local variable description can store how many slots it occupies, and maybe its starting slot number + + The tuple is still treated like a single object, from the user's point-of-view, and code like `t[1]` is translated to an `OP_GETLOCAL` from the proper stack slot + + Allows an `Option[int]` that doesn't result in a heap allocation (`Option[object]` uses an unused pointer bit for its discriminator) ++ `?` and `?:` should work on `Some(...)` and `None` variants of `Option` enumerator ++ Allow some type params to be specified explicitly and some to be inferred in a given call or composite literal ++ Clean up the compiler code ++ Redesign the C API + Documentation + Make it fast! diff --git a/src/api.c b/src/api.c index 94ea380..961e894 100644 --- a/src/api.c +++ b/src/api.c @@ -7,7 +7,7 @@ #include "auxlib.h" #include "bigint.h" #include "call.h" -#include "gc.h" +#include "gc_aux.h" #include "lib.h" #include "map.h" #include "mem.h" @@ -21,16 +21,20 @@ #include #include +#include + static void *default_alloc(void *ud, void *ptr, size_t size0, size_t size) { paw_unused(ud); if (size0 == 0) { - return malloc(cast_size(size)); + return GC_MALLOC(size); + //return malloc(size); } else if (size == 0) { free(ptr); return NULL; } - return realloc(ptr, cast_size(size)); + return GC_REALLOC(ptr, size); + //return realloc(ptr, size); } static StackPtr access(paw_Env *P, int index) @@ -61,11 +65,8 @@ static void open_aux(paw_Env *P, void *arg) paw_unused(arg); pawG_init(P); pawC_init(P); - -// P->globals = pawH_new(P); - - pawS_init(P); pawY_init(P); + pawS_init(P); pawP_init(P); pawR_init(P); pawL_init(P); @@ -94,8 +95,8 @@ paw_Env *paw_open(paw_Alloc alloc, void *ud) void paw_close(paw_Env *P) { pawG_uninit(P); - pawY_uninit(P); pawC_uninit(P); + pawY_uninit(P); pawS_uninit(P); P->alloc(P->ud, P, sizeof *P, 0); @@ -143,9 +144,9 @@ paw_Bool paw_is_tuple(paw_Env *P, int index) return paw_type(P, index) == PAW_TTUPLE; } -paw_Bool paw_is_class(paw_Env *P, int index) +paw_Bool paw_is_struct(paw_Env *P, int index) { - return paw_type(P, index) == PAW_TCLASS; + return paw_type(P, index) == PAW_TSTRUCT; } paw_Bool paw_is_foreign(paw_Env *P, int index) @@ -173,8 +174,8 @@ int paw_type(paw_Env *P, int index) // return PAW_TARRAY; // case VMAP: // return PAW_TMAP; - // case VCLASS: - // return PAW_TCLASS; + // case VSTRUCT: + // return PAW_TSTRUCT; // case VBIGINT: // case VNUMBER: // return PAW_TINT; @@ -311,7 +312,7 @@ const char *paw_string(paw_Env *P, int index) paw_Function paw_native(paw_Env *P, int index) { - return v_native(*access(P, index))->call; + return v_native(*access(P, index))->func; } paw_Digit *paw_bigint(paw_Env *P, int index) @@ -389,10 +390,13 @@ int paw_load(paw_Env *P, paw_Reader input, const char *name, void *ud) }; const int status = pawC_try(P, parse_aux, &p); pawM_free_vec(P, p.mem.scratch.data, p.mem.scratch.alloc); - pawM_free_vec(P, p.mem.st.scopes, p.mem.st.capacity); // TODO: free nested scope tables, symbols - pawM_free_vec(P, p.mem.ll.values, p.mem.ll.capacity); - pawM_free(P, p.mem.st.globals); - pawM_free(P, p.mem.st.toplevel); + pawM_free_vec(P, p.mem.symbols.scopes, p.mem.symbols.capacity); // TODO: free nested scope tables, symbols + pawM_free_vec(P, p.mem.labels.values, p.mem.labels.capacity); + pawM_free(P, p.mem.symbols.globals); + pawM_free(P, p.mem.symbols.toplevel); + while (p.mem.unifier.table) { + pawP_unifier_leave(&p.mem.unifier); + } return status; } @@ -555,16 +559,16 @@ static int upvalue_index(int nup, int index) // } //} // -//void paw_set_global(paw_Env *P, const char *name) -//{ +void paw_set_global(paw_Env *P, const char *name) +{ // paw_push_string(P, name); // paw_rotate(P, -2, 1); // Swap // // const Value key = P->top.p[-2]; // pawR_write_global(P, key, PAW_TRUE); // paw_pop(P, 1); // Pop 'key' -//} -// +} + //void paw_set_itemi(paw_Env *P, int index, paw_Int idx) //{ // const int abs = paw_abs_index(P, index); @@ -587,27 +591,27 @@ static int upvalue_index(int nup, int index) // pawR_setattr_raw(P); //} // -//void paw_create_array(paw_Env *P, int n) -//{ +void paw_create_array(paw_Env *P, int n) +{ // pawR_literal_array(P, n); -//} +} // //void paw_create_map(paw_Env *P, int n) //{ // pawR_literal_map(P, n); //} // -//void paw_create_class(paw_Env *P) +//void paw_create_struct(paw_Env *P) //{ -// pawV_push_class(P); +// pawV_push_struct(P); //} // //void paw_create_instance(paw_Env *P, int index) //{ // const Value cls = *access(P, index); -// if (pawV_is_class(cls)) { +// if (pawV_is_struct(cls)) { // Value *pv = pawC_push0(P); -// pawV_new_instance(P, pv, v_class(cls)); +// pawV_new_instance(P, pv, v_struct(cls)); // } //} // diff --git a/src/api.h b/src/api.h index c3f013b..e98d24a 100644 --- a/src/api.h +++ b/src/api.h @@ -6,6 +6,7 @@ #include "paw.h" #include "value.h" +// TODO: not necessary, just use the 'name' field of Type static inline const char *api_typename(int type) { switch (type) { @@ -23,8 +24,8 @@ static inline const char *api_typename(int type) return "array"; case PAW_TFUNCTION: return "function"; - case PAW_TCLASS: - return "class"; + case PAW_TSTRUCT: + return "struct"; case PAW_TFOREIGN: return "foreign"; default: diff --git a/src/array.c b/src/array.c index 073b692..0b726ee 100644 --- a/src/array.c +++ b/src/array.c @@ -2,7 +2,7 @@ // This source code is licensed under the MIT License, which can be found in // LICENSE.md. See AUTHORS.md for a list of contributor names. #include "array.h" -#include "gc.h" +#include "gc_aux.h" #include "mem.h" //#include "rt.h" #include "util.h" diff --git a/src/ast.c b/src/ast.c new file mode 100644 index 0000000..da563fd --- /dev/null +++ b/src/ast.c @@ -0,0 +1,1507 @@ +// Copyright (c) 2024, The paw Authors. All rights reserved. +// This source code is licensed under the MIT License, which can be found in +// LICENSE.md. See AUTHORS.md for a list of contributor names. + +#include "ast.h" +#include "mem.h" + +#define FIRST_ARENA_SIZE 512 + +Ast *pawA_new_ast(Lex *lex) +{ + paw_Env *P = env(lex); + Ast *tree = pawM_new(P, Ast); + tree->lex = lex; + // initialize memory pools for storing AST components + pawK_pool_init(P, &tree->nodes, FIRST_ARENA_SIZE, sizeof(AstDecl)); + pawK_pool_init(P, &tree->symbols, FIRST_ARENA_SIZE, sizeof(Symbol)); + pawK_pool_init(P, &tree->sequences, FIRST_ARENA_SIZE, sizeof(void *) * 8); + return tree; +} + +void pawA_free_ast(Ast *ast) +{ + paw_Env *P = env(ast->lex); + pawK_pool_uninit(P, &ast->nodes); + pawK_pool_uninit(P, &ast->symbols); + pawK_pool_uninit(P, &ast->sequences); + pawM_free(P, ast); +} + +#define make_node_constructor(name, T) \ + T *pawA_new_ ## name(Ast *ast, T ## Kind kind) \ + { \ + T *r = pawK_pool_alloc(env((ast)->lex), &(ast)->nodes, sizeof(T), paw_alignof(T)); \ + r->hdr.line = (ast)->lex->line; \ + r->hdr.kind = kind; \ + return r; \ + } +make_node_constructor(expr, AstExpr) +make_node_constructor(decl, AstDecl) +make_node_constructor(stmt, AstStmt) + +#define make_list_constructor(name, T) \ + T *pawA_new_ ## name ## _list(Ast *ast) \ + { \ + return pawK_pool_alloc(env((ast)->lex), &(ast)->nodes, sizeof(T), paw_alignof(T)); \ + } +make_list_constructor(expr, AstExprList) +make_list_constructor(decl, AstDeclList) +make_list_constructor(stmt, AstStmtList) + +void *pawA_new_pointer_vec(Ast *ast, int nptrs) +{ + return pawK_pool_alloc(env(ast->lex), &ast->sequences, + sizeof(void *) * cast_size(nptrs), paw_alignof(void *)); +} + +Symbol *pawA_new_symbol(Lex *lex) +{ + return pawK_pool_alloc(env(lex), &lex->pm->ast->symbols, + sizeof(Symbol), paw_alignof(Symbol)); +} + +#define make_list_visitor(name, base, T, List, source, link) \ + static void visit_ ## name ## _list_aux(AstVisitor *V, List *list, T ## Pass cb) \ + { \ + T *head = list->first; \ + while (head != NULL) { \ + cb(V, head); \ + head = head->source.link; \ + } \ + } +make_list_visitor(decl, decl, AstDecl, AstDeclList, hdr, next) +make_list_visitor(expr, expr, AstExpr, AstExprList, hdr, next) +make_list_visitor(stmt, stmt, AstStmt, AstStmtList, hdr, next) +make_list_visitor(method, decl, AstDecl, AstDeclList, func, sibling) + +#define visit_stmts(V, list) (V)->visit_stmt_list(V, list, (V)->visit_stmt) +#define visit_exprs(V, list) (V)->visit_expr_list(V, list, (V)->visit_expr) +#define visit_decls(V, list) (V)->visit_decl_list(V, list, (V)->visit_decl) +#define visit_methods(V, list) (V)->visit_method_list(V, list, (V)->visit_decl) + +static void visit_block_stmt(AstVisitor *V, Block *s) +{ + visit_stmts(V, s->stmts); +} + +static void visit_logical_expr(AstVisitor *V, LogicalExpr *e) +{ + V->visit_expr(V, e->lhs); + V->visit_expr(V, e->rhs); +} + +static void visit_item_expr(AstVisitor *V, ItemExpr *e) +{ + V->visit_expr(V, e->value); +} + +static void visit_literal_expr(AstVisitor *V, LiteralExpr *e) +{ + switch (e->lit_kind) { + case LIT_BASIC: + break; + case LIT_TUPLE: + case LIT_ARRAY: + paw_assert(0); // TODO + break; // TODO + default: + paw_assert(e->lit_kind == LIT_COMPOSITE); + V->visit_expr(V, e->comp.target); + visit_exprs(V, e->comp.items); + } +} + +static void visit_chain_expr(AstVisitor *V, ChainExpr *e) +{ + V->visit_expr(V, e->target); +} + +static void visit_cond_expr(AstVisitor *V, CondExpr *e) +{ + V->visit_expr(V, e->cond); + V->visit_expr(V, e->lhs); + V->visit_expr(V, e->rhs); +} + +static void visit_unop_expr(AstVisitor *V, UnOpExpr *e) +{ + V->visit_expr(V, e->target); +} + +static void visit_binop_expr(AstVisitor *V, BinOpExpr *e) +{ + V->visit_expr(V, e->lhs); + V->visit_expr(V, e->rhs); +} + +static void visit_expr_stmt(AstVisitor *V, AstExprStmt *s) +{ + V->visit_expr(V, s->lhs); + V->visit_expr(V, s->rhs); +} + +static void visit_signature_expr(AstVisitor *V, FuncType *e) +{ + visit_exprs(V, e->params); + V->visit_expr(V, e->return_); +} + +static void visit_type_name_expr(AstVisitor *V, TypeName *e) +{ + if (e->args != NULL) { + visit_exprs(V, e->args); + } +} + +static void visit_field_decl(AstVisitor *V, FieldDecl *d) +{ + V->visit_expr(V, d->tag); +} + +static void visit_type_decl(AstVisitor *V, TypeDecl *d) +{ + if (d->generics != NULL) { + visit_decls(V, d->generics); + } + V->visit_expr(V, d->rhs); +} + +static void visit_generic_decl(AstVisitor *V, GenericDecl *d) +{ + paw_unused(V); + paw_unused(d); +} + +static void visit_struct_decl(AstVisitor *V, StructDecl *d) +{ + if (d->is_poly) { + visit_decls(V, d->generics); + } + visit_decls(V, d->fields); + visit_methods(V, d->methods); +} + +static void visit_var_decl(AstVisitor *V, VarDecl *d) +{ + V->visit_expr(V, d->init); + V->visit_expr(V, d->tag); +} + +static void visit_return_stmt(AstVisitor *V, ReturnStmt *s) +{ + V->visit_expr(V, s->expr); +} + +static void visit_call_expr(AstVisitor *V, CallExpr *e) +{ + V->visit_expr(V, e->target); + visit_exprs(V, e->args); +} + +static void visit_ident_expr(AstVisitor *V, AstIdent *e) +{ + paw_unused(V); + paw_unused(e); +} + +static void visit_func_decl(AstVisitor *V, FuncDecl *d) +{ + if (d->is_poly) { + visit_decls(V, d->generics); + } + visit_decls(V, d->params); + V->visit_expr(V, d->return_); + V->visit_block_stmt(V, d->body); +} + +static void visit_if_stmt(AstVisitor *V, IfStmt *s) +{ + V->visit_expr(V, s->cond); + V->visit_stmt(V, s->then_arm); + V->visit_stmt(V, s->else_arm); +} + +static void visit_while_stmt(AstVisitor *V, WhileStmt *s) +{ + V->visit_expr(V, s->cond); + V->visit_block_stmt(V, s->block); +} + +static void visit_dowhile_stmt(AstVisitor *V, WhileStmt *s) +{ + V->visit_block_stmt(V, s->block); + V->visit_expr(V, s->cond); +} + +static void visit_label_stmt(AstVisitor *V, LabelStmt *s) +{ + paw_unused(V); + paw_unused(s); +} + +static void visit_for_stmt(AstVisitor *V, ForStmt *s) +{ + if (s->kind == STMT_FORNUM) { + V->visit_expr(V, s->fornum.begin); + V->visit_expr(V, s->fornum.end); + V->visit_expr(V, s->fornum.step); + } else { + V->visit_expr(V, s->forin.target); + } + V->visit_block_stmt(V, s->block); +} + +static void visit_index_expr(AstVisitor *V, Index *e) +{ + V->visit_expr(V, e->target); + visit_exprs(V, e->elems); +} + +static void visit_selector_expr(AstVisitor *V, Selector *e) +{ + V->visit_expr(V, e->target); +} + +static void visit_expr(AstVisitor *V, AstExpr *expr) +{ + if (expr == NULL) { + return; + } + switch (a_kind(expr)) { + case EXPR_LITERAL: + V->visit_literal_expr(V, &expr->literal); + break; + case EXPR_CHAIN: + V->visit_chain_expr(V, &expr->chain); + break; + case EXPR_LOGICAL: + V->visit_logical_expr(V, &expr->logical); + break; + case EXPR_UNOP: + V->visit_unop_expr(V, &expr->unop); + break; + case EXPR_BINOP: + V->visit_binop_expr(V, &expr->binop); + break; + case EXPR_CALL: + V->visit_call_expr(V, &expr->call); + break; + case EXPR_COND: + V->visit_cond_expr(V, &expr->cond); + break; + case EXPR_NAME: + V->visit_ident_expr(V, &expr->name); + break; + case EXPR_INDEX: + V->visit_index_expr(V, &expr->index); + break; + case EXPR_ITEM: + V->visit_item_expr(V, &expr->item); + break; + case EXPR_FUNC_TYPE: + V->visit_signature_expr(V, &expr->func); + break; + case EXPR_TYPE_NAME: + V->visit_type_name_expr(V, &expr->type_name); + break; + default: + paw_assert(a_kind(expr) == EXPR_SELECTOR); + V->visit_selector_expr(V, &expr->selector); + } +} + +static void visit_decl(AstVisitor *V, AstDecl *decl) +{ + if (decl == NULL) { + return; + } + switch (a_kind(decl)) { + case DECL_VAR: + V->visit_var_decl(V, &decl->var); + break; + case DECL_FIELD: + V->visit_field_decl(V, &decl->field); + break; + case DECL_TYPE: + V->visit_type_decl(V, &decl->type); + break; + case DECL_GENERIC: + V->visit_generic_decl(V, &decl->generic); + break; + case DECL_FUNC: + V->visit_func_decl(V, &decl->func); + break; + default: + paw_assert(a_kind(decl) == DECL_STRUCT); + V->visit_struct_decl(V, &decl->struct_); + } +} + +static void visit_decl_stmt(AstVisitor *V, AstDeclStmt *s) +{ + V->visit_decl(V, s->decl); + cast_stmt(s); +} + +static void visit_stmt(AstVisitor *V, AstStmt *stmt) +{ + if (stmt == NULL) { + return; + } + switch (a_kind(stmt)) { + case STMT_EXPR: + V->visit_expr_stmt(V, &stmt->expr); + break; + case STMT_DECL: + V->visit_decl_stmt(V, &stmt->decl); + break; + case STMT_RETURN: + V->visit_return_stmt(V, &stmt->return_); + break; + case STMT_IF: + V->visit_if_stmt(V, &stmt->if_); + break; + case STMT_FORIN: + case STMT_FORNUM: + V->visit_for_stmt(V, &stmt->for_); + break; + case STMT_WHILE: + V->visit_while_stmt(V, &stmt->while_); + break; + case STMT_DOWHILE: + V->visit_dowhile_stmt(V, &stmt->while_); + break; + case STMT_LABEL: + V->visit_label_stmt(V, &stmt->label); + break; + default: + paw_assert(a_kind(stmt) == STMT_BLOCK); + V->visit_block_stmt(V, &stmt->block); + } +} + +void pawA_visitor_init(AstVisitor *V, Ast *ast, AstState state) +{ + *V = (AstVisitor){ + .ast = ast, + .state = state, + .visit_expr = visit_expr, + .visit_decl = visit_decl, + .visit_stmt = visit_stmt, + .visit_expr_list = visit_expr_list_aux, + .visit_decl_list = visit_decl_list_aux, + .visit_stmt_list = visit_stmt_list_aux, + .visit_method_list = visit_method_list_aux, + .visit_literal_expr = visit_literal_expr, + .visit_logical_expr = visit_logical_expr, + .visit_ident_expr = visit_ident_expr, + .visit_chain_expr = visit_chain_expr, + .visit_unop_expr = visit_unop_expr, + .visit_binop_expr = visit_binop_expr, + .visit_cond_expr = visit_cond_expr, + .visit_call_expr = visit_call_expr, + .visit_index_expr = visit_index_expr, + .visit_selector_expr = visit_selector_expr, + .visit_item_expr = visit_item_expr, + .visit_type_name_expr = visit_type_name_expr, + .visit_signature_expr = visit_signature_expr, + .visit_block_stmt = visit_block_stmt, + .visit_expr_stmt = visit_expr_stmt, + .visit_decl_stmt = visit_decl_stmt, + .visit_if_stmt = visit_if_stmt, + .visit_for_stmt = visit_for_stmt, + .visit_while_stmt = visit_while_stmt, + .visit_dowhile_stmt = visit_dowhile_stmt, + .visit_label_stmt = visit_label_stmt, + .visit_return_stmt = visit_return_stmt, + .visit_var_decl = visit_var_decl, + .visit_func_decl = visit_func_decl, + .visit_struct_decl = visit_struct_decl, + .visit_field_decl = visit_field_decl, + .visit_generic_decl = visit_generic_decl, + .visit_type_decl = visit_type_decl, + }; +} + +void pawA_visit(AstVisitor *V) +{ + Ast *ast = V->ast; + visit_stmts(V, ast->stmts); +} + +// Generate code for folding a linked list of AST nodes +// +// fold_*_list_aux: Pass over the list, calling the supplied callback on each node. The +// callback should return (a) the next node n->source.link (with text replacement) +// if the current node n should be removed, (b) n if no folding needs to be +// performed, or (c) a freshly-allocated node if creating a new list. +// visit_*_list: Run fold_*_list_aux with the default callback. +#define make_list_folder(name, base, T, List, source, link) \ + static void fold_ ## name ## _list(AstFolder *F, List *list, T ## Fold cb) \ + { \ + int count = 0; \ + T *head = list->first; \ + T *prev; \ + for (int i = 0; head != NULL; ++i) { \ + T *next = cb(F, head); \ + if (i == 0) { \ + list->first = next; \ + } else { \ + prev->source.link = next; \ + } \ + if (next != NULL) { \ + head = head->source.link; \ + ++count; \ + } else { \ + head = NULL; \ + } \ + prev = next; \ + } \ + list->count = count; \ + } \ + static void fold_ ## name ## s(AstFolder *F, List *list) \ + { \ + fold_ ## name ## _list(F, list, F->fold_ ## base); \ + } +make_list_folder(decl, decl, AstDecl, AstDeclList, hdr, next) +make_list_folder(expr, expr, AstExpr, AstExprList, hdr, next) +make_list_folder(stmt, stmt, AstStmt, AstStmtList, hdr, next) +make_list_folder(method, decl, AstDecl, AstDeclList, func, sibling) + +static AstStmt *fold_block_stmt(AstFolder *F, Block *s) +{ + fold_stmts(F, s->stmts); + return cast_stmt(s); +} +#define fold_block(F, s) cast((F)->fold_block_stmt(F, s), Block *) + +static AstExpr *fold_logical_expr(AstFolder *F, LogicalExpr *e) +{ + e->lhs = F->fold_expr(F, e->lhs); + e->rhs = F->fold_expr(F, e->rhs); + return cast_expr(e); +} + +static AstExpr *fold_item_expr(AstFolder *F, ItemExpr *e) +{ + e->value = F->fold_expr(F, e->value); + return cast_expr(e); +} + +static AstExpr *fold_literal_expr(AstFolder *F, LiteralExpr *e) +{ + switch (e->lit_kind) { + case LIT_BASIC: + break; + case LIT_TUPLE: + case LIT_ARRAY: + paw_assert(0); // TODO + break; // TODO + default: + paw_assert(e->lit_kind == LIT_COMPOSITE); + e->comp.target = F->fold_expr(F, e->comp.target); + fold_exprs(F, e->comp.items); + } + return cast_expr(e); +} + +static AstExpr *fold_chain_expr(AstFolder *F, ChainExpr *e) +{ + e->target = F->fold_expr(F, e->target); + return cast_expr(e); +} + +static AstExpr *fold_cond_expr(AstFolder *F, CondExpr *e) +{ + e->cond = F->fold_expr(F, e->cond); + e->lhs = F->fold_expr(F, e->lhs); + e->rhs = F->fold_expr(F, e->rhs); + return cast_expr(e); +} + +static AstExpr *fold_unop_expr(AstFolder *F, UnOpExpr *e) +{ + e->target = F->fold_expr(F, e->target); + return cast_expr(e); +} + +static AstExpr *fold_binop_expr(AstFolder *F, BinOpExpr *e) +{ + e->lhs = F->fold_expr(F, e->lhs); + e->rhs = F->fold_expr(F, e->rhs); + return cast_expr(e); +} + +static AstStmt *fold_expr_stmt(AstFolder *F, AstExprStmt *s) +{ + s->lhs = F->fold_expr(F, s->lhs); + s->rhs = F->fold_expr(F, s->rhs); + return cast_stmt(s); +} + +static AstExpr *fold_signature_expr(AstFolder *F, FuncType *e) +{ + fold_exprs(F, e->params); + e->return_ = F->fold_expr(F, e->return_); + return cast_expr(e); +} + +static AstExpr *fold_type_name_expr(AstFolder *F, TypeName *e) +{ + if (e->args != NULL) { + fold_exprs(F, e->args); + } + return cast_expr(e); +} + +static AstDecl *fold_field_decl(AstFolder *F, FieldDecl *d) +{ + d->tag = F->fold_expr(F, d->tag); + return cast_decl(d); +} + +static AstDecl *fold_type_decl(AstFolder *F, TypeDecl *d) +{ + fold_decls(F, d->generics); + d->rhs = F->fold_expr(F, d->rhs); + return cast_decl(d); +} + +static AstDecl *fold_generic_decl(AstFolder *F, GenericDecl *d) +{ + paw_unused(F); + return cast_decl(d); +} + +static AstDecl *fold_struct_decl(AstFolder *F, StructDecl *d) +{ + fold_decls(F, d->generics); + fold_decls(F, d->fields); + fold_methods(F, d->methods); + return cast_decl(d); +} + +static AstDecl *fold_var_decl(AstFolder *F, VarDecl *d) +{ + d->init = F->fold_expr(F, d->init); + d->tag = F->fold_expr(F, d->tag); + return cast_decl(d); +} + +static AstStmt *fold_return_stmt(AstFolder *F, ReturnStmt *s) +{ + s->expr = F->fold_expr(F, s->expr); + return cast_stmt(s); +} + +static AstExpr *fold_call_expr(AstFolder *F, CallExpr *e) +{ + e->target = F->fold_expr(F, e->target); + fold_exprs(F, e->args); + return cast_expr(e); +} + +static AstExpr *fold_ident_expr(AstFolder *F, AstIdent *e) +{ + paw_unused(F); + return cast_expr(e); +} + +static AstDecl *fold_func_decl(AstFolder *F, FuncDecl *d) +{ + fold_decls(F, d->generics); + fold_decls(F, d->params); + d->return_ = F->fold_expr(F, d->return_); + d->body = fold_block(F, d->body); + return cast_decl(d); +} + +static AstStmt *fold_if_stmt(AstFolder *F, IfStmt *s) +{ + s->cond = F->fold_expr(F, s->cond); + s->then_arm = F->fold_stmt(F, s->then_arm); + s->else_arm = F->fold_stmt(F, s->else_arm); + return cast_stmt(s); +} + +static AstStmt *fold_while_stmt(AstFolder *F, WhileStmt *s) +{ + s->cond = F->fold_expr(F, s->cond); + s->block = fold_block(F, s->block); + return cast_stmt(s); +} + +static AstStmt *fold_label_stmt(AstFolder *F, LabelStmt *s) +{ + paw_unused(F); + return cast_stmt(s); +} + +static AstStmt *fold_for_stmt(AstFolder *F, ForStmt *s) +{ + if (s->kind == STMT_FORNUM) { + s->fornum.begin = F->fold_expr(F, s->fornum.begin); + s->fornum.end = F->fold_expr(F, s->fornum.end); + s->fornum.step = F->fold_expr(F, s->fornum.step); + } else { + s->forin.target = F->fold_expr(F, s->forin.target); + } + s->block = fold_block(F, s->block); + return cast_stmt(s); +} + +static AstExpr *fold_index_expr(AstFolder *F, Index *e) +{ + e->target = F->fold_expr(F, e->target); + fold_exprs(F, e->elems); + return cast_expr(e); +} + +static AstExpr *fold_selector_expr(AstFolder *F, Selector *e) +{ + e->target = F->fold_expr(F, e->target); + return cast_expr(e); +} + +static AstExpr *fold_expr(AstFolder *F, AstExpr *expr) +{ + if (expr == NULL) { + return NULL; + } + switch (a_kind(expr)) { + case EXPR_LITERAL: + return F->fold_literal_expr(F, &expr->literal); + case EXPR_CHAIN: + return F->fold_chain_expr(F, &expr->chain); + case EXPR_LOGICAL: + return F->fold_logical_expr(F, &expr->logical); + case EXPR_UNOP: + return F->fold_unop_expr(F, &expr->unop); + case EXPR_BINOP: + return F->fold_binop_expr(F, &expr->binop); + case EXPR_CALL: + return F->fold_call_expr(F, &expr->call); + case EXPR_COND: + return F->fold_cond_expr(F, &expr->cond); + case EXPR_NAME: + return F->fold_ident_expr(F, &expr->name); + case EXPR_INDEX: + return F->fold_index_expr(F, &expr->index); + case EXPR_ITEM: + return F->fold_item_expr(F, &expr->item); + case EXPR_FUNC_TYPE: + return F->fold_signature_expr(F, &expr->func); + case EXPR_TYPE_NAME: + return F->fold_type_name_expr(F, &expr->type_name); + default: + paw_assert(a_kind(expr) == EXPR_SELECTOR); + return F->fold_selector_expr(F, &expr->selector); + } +} + +static AstDecl *fold_decl(AstFolder *F, AstDecl *decl) +{ + if (decl == NULL) { + return NULL; + } + switch (a_kind(decl)) { + case DECL_VAR: + return F->fold_var_decl(F, &decl->var); + case DECL_FIELD: + return F->fold_field_decl(F, &decl->field); + case DECL_TYPE: + return F->fold_type_decl(F, &decl->type); + case DECL_GENERIC: + return F->fold_generic_decl(F, &decl->generic); + case DECL_FUNC: + return F->fold_func_decl(F, &decl->func); + default: + paw_assert(a_kind(decl) == DECL_STRUCT); + return F->fold_struct_decl(F, &decl->struct_); + } +} + +static AstStmt *fold_decl_stmt(AstFolder *F, AstDeclStmt *s) +{ + s->decl = F->fold_decl(F, s->decl); + return cast_stmt(s); +} + +static AstStmt *fold_stmt(AstFolder *F, AstStmt *stmt) +{ + if (stmt == NULL) { + return NULL; + } + switch (a_kind(stmt)) { + case STMT_EXPR: + return F->fold_expr_stmt(F, &stmt->expr); + case STMT_DECL: + return F->fold_decl_stmt(F, &stmt->decl); + case STMT_RETURN: + return F->fold_return_stmt(F, &stmt->return_); + case STMT_IF: + return F->fold_if_stmt(F, &stmt->if_); + case STMT_FORIN: + case STMT_FORNUM: + return F->fold_for_stmt(F, &stmt->for_); + case STMT_WHILE: + case STMT_DOWHILE: + return F->fold_while_stmt(F, &stmt->while_); + case STMT_LABEL: + return F->fold_label_stmt(F, &stmt->label); + default: + paw_assert(a_kind(stmt) == STMT_BLOCK); + return F->fold_block_stmt(F, &stmt->block); + } +} + +void pawA_fold_init(AstFolder *F, Ast *ast, AstState state) +{ + *F = (AstFolder){ + .ast = ast, + .state = state, + .fold_expr = fold_expr, + .fold_decl = fold_decl, + .fold_stmt = fold_stmt, + .fold_expr_list = fold_expr_list, + .fold_decl_list = fold_decl_list, + .fold_stmt_list = fold_stmt_list, + .fold_method_list = fold_method_list, + .fold_literal_expr = fold_literal_expr, + .fold_logical_expr = fold_logical_expr, + .fold_ident_expr = fold_ident_expr, + .fold_chain_expr = fold_chain_expr, + .fold_unop_expr = fold_unop_expr, + .fold_binop_expr = fold_binop_expr, + .fold_cond_expr = fold_cond_expr, + .fold_call_expr = fold_call_expr, + .fold_index_expr = fold_index_expr, + .fold_selector_expr = fold_selector_expr, + .fold_item_expr = fold_item_expr, + .fold_type_name_expr = fold_type_name_expr, + .fold_signature_expr = fold_signature_expr, + .fold_block_stmt = fold_block_stmt, + .fold_expr_stmt = fold_expr_stmt, + .fold_decl_stmt = fold_decl_stmt, + .fold_if_stmt = fold_if_stmt, + .fold_for_stmt = fold_for_stmt, + .fold_while_stmt = fold_while_stmt, + .fold_label_stmt = fold_label_stmt, + .fold_return_stmt = fold_return_stmt, + .fold_var_decl = fold_var_decl, + .fold_func_decl = fold_func_decl, + .fold_struct_decl = fold_struct_decl, + .fold_field_decl = fold_field_decl, + .fold_generic_decl = fold_generic_decl, + .fold_type_decl = fold_type_decl, + }; +} + +void pawA_fold(AstFolder *F) +{ + Ast *ast = F->ast; + fold_stmts(F, ast->stmts); +} + +// **************************** +// AST stenciling routines +// **************************** + +typedef struct Stenciler { + Lex *lex; // lexical state + AstDecl *struct_; // enclosing struct AstDecl + Ast *ast; // AST being copied +} Stenciler; + +#define make_stencil_prep(name, T) \ + static T *stencil_prep_ ## name ## _aux(AstFolder *F, T *t) \ + { \ + T *r = pawA_new_ ## name(F->ast, a_kind(t)); \ + r->hdr.kind = t->hdr.kind; \ + r->hdr.line = t->hdr.line; \ + return r; \ + } +make_stencil_prep(expr, AstExpr) +make_stencil_prep(decl, AstDecl) +make_stencil_prep(stmt, AstStmt) + +// Helpers for stenciling: create a new node of the given type and kind, +// and copy the common fields +#define stencil_prep_expr(F, e) stencil_prep_expr_aux(F, cast_expr(e)) +#define stencil_prep_decl(F, d) stencil_prep_decl_aux(F, cast_decl(d)) +#define stencil_prep_stmt(F, s) stencil_prep_stmt_aux(F, cast_stmt(s)) + +#define make_stencil_list(name, base, T) \ + static T ## List *stencil_ ## name ## s(AstFolder *F, T ## List *old_list) \ + { \ + if (old_list == NULL) { \ + return NULL; \ + } \ + T ## List *new_list = pawA_new_ ## base ## _list(F->ast); \ + new_list->first = old_list->first; \ + F->fold_ ## name ## _list(F, new_list, F->fold_ ## base); \ + return new_list; \ + } +make_stencil_list(decl, decl, AstDecl) +make_stencil_list(expr, expr, AstExpr) +make_stencil_list(stmt, stmt, AstStmt) +make_stencil_list(method, decl, AstDecl) + +static AstStmt *stencil_block_stmt(AstFolder *F, Block *s) +{ + AstStmt *r = stencil_prep_stmt(F, s); + r->block.stmts = stencil_stmts(F, s->stmts); + return r; +} +#define stencil_block(F, s) cast((F)->fold_block_stmt(F, s), Block *) + +static AstExpr *stencil_logical_expr(AstFolder *F, LogicalExpr *e) +{ + AstExpr *r = stencil_prep_expr(F, e); + r->logical.lhs = F->fold_expr(F, e->lhs); + r->logical.rhs = F->fold_expr(F, e->rhs); + return r; +} + +static AstExpr *stencil_item_expr(AstFolder *F, ItemExpr *e) +{ + AstExpr *r = stencil_prep_expr(F, e); + r->item.value = F->fold_expr(F, e->value); + r->item.name = e->name; + r->item.index = e->index; + return r; +} + +static AstExpr *stencil_literal_expr(AstFolder *F, LiteralExpr *e) +{ + AstExpr *r = stencil_prep_expr(F, e); + r->literal.lit_kind = e->lit_kind; + switch (e->lit_kind) { + case LIT_BASIC: + r->literal.basic = e->basic; + break; + case LIT_TUPLE: + case LIT_ARRAY: + paw_assert(0); // TODO + break; // TODO + default: + paw_assert(e->lit_kind == LIT_COMPOSITE); + r->literal.comp.target = F->fold_expr(F, e->comp.target); + r->literal.comp.items = stencil_exprs(F, e->comp.items); + break; + } + return r; +} + +static AstExpr *stencil_chain_expr(AstFolder *F, ChainExpr *e) +{ + AstExpr *r = stencil_prep_expr(F, e); + r->chain.target = F->fold_expr(F, e->target); + return r; +} + +static AstExpr *stencil_cond_expr(AstFolder *F, CondExpr *e) +{ + AstExpr *r = stencil_prep_expr(F, e); + r->cond.cond = F->fold_expr(F, e->cond); + r->cond.lhs = F->fold_expr(F, e->lhs); + r->cond.rhs = F->fold_expr(F, e->rhs); + return r; +} + +static AstExpr *stencil_unop_expr(AstFolder *F, UnOpExpr *e) +{ + AstExpr *r = stencil_prep_expr(F, e); + r->unop.target = F->fold_expr(F, e->target); + r->unop.op = e->op; + return r; +} + +static AstExpr *stencil_binop_expr(AstFolder *F, BinOpExpr *e) +{ + AstExpr *r = stencil_prep_expr(F, e); + r->binop.lhs = F->fold_expr(F, e->lhs); + r->binop.rhs = F->fold_expr(F, e->rhs); + r->binop.op = e->op; + return r; +} + +static AstStmt *stencil_expr_stmt(AstFolder *F, AstExprStmt *s) +{ + AstStmt *r = stencil_prep_stmt(F, s); + r->expr.lhs = F->fold_expr(F, s->lhs); + r->expr.rhs = F->fold_expr(F, s->rhs); + return r; +} + +static AstExpr *stencil_signature_expr(AstFolder *F, FuncType *e) +{ + AstExpr *r = stencil_prep_expr(F, e); + r->func.params = stencil_exprs(F, e->params); + r->func.return_ = F->fold_expr(F, e->return_); + return r; +} + +static AstExpr *stencil_type_name_expr(AstFolder *F, TypeName *e) +{ + AstExpr *r = stencil_prep_expr(F, e); + r->type_name.name = e->name; + r->type_name.args = stencil_exprs(F, e->args); + return r; +} + +static AstDecl *stencil_field_decl(AstFolder *F, FieldDecl *d) +{ + AstDecl *r = stencil_prep_decl(F, d); + r->field.name = d->name; + r->field.tag = F->fold_expr(F, d->tag); + return r; +} + +static AstDecl *stencil_type_decl(AstFolder *F, TypeDecl *d) +{ + AstDecl *r = stencil_prep_decl(F, d); + r->type.name = d->name; + r->type.generics = stencil_decls(F, d->generics); + r->type.rhs = F->fold_expr(F, d->rhs); + return r; +} + +static AstDecl *stencil_generic_decl(AstFolder *F, GenericDecl *d) +{ + AstDecl *r = stencil_prep_decl(F, d); + r->generic.name = d->name; + return r; +} + +static AstDecl *stencil_struct_decl(AstFolder *F, StructDecl *d) +{ + Stenciler *S = F->state.S; + AstDecl *r = stencil_prep_decl(F, d); + + // Keep track of the enclosing struct, so that methods can find the proper + // receiver. + AstDecl *enclosing = S->struct_; + S->struct_ = r; + { + r->struct_.is_global = d->is_global; + r->struct_.name = d->name; + r->struct_.generics = stencil_decls(F, d->generics); + r->struct_.fields = stencil_decls(F, d->fields); + r->struct_.methods = stencil_methods(F, d->methods); + } + S->struct_ = enclosing; + return r; +} + +static AstDecl *stencil_var_decl(AstFolder *F, VarDecl *d) +{ + AstDecl *r = stencil_prep_decl(F, d); + r->var.is_global = d->is_global; + r->var.is_const = d->is_const; + r->var.name = d->name; + r->var.init = F->fold_expr(F, d->init); + r->var.tag = F->fold_expr(F, d->tag); + return r; +} + +static AstStmt *stencil_return_stmt(AstFolder *F, ReturnStmt *s) +{ + AstStmt *r = stencil_prep_stmt(F, s); + r->return_.expr = F->fold_expr(F, s->expr); + return r; +} + +static AstExpr *stencil_call_expr(AstFolder *F, CallExpr *e) +{ + AstExpr *r = stencil_prep_expr(F, e); + r->call.target = F->fold_expr(F, e->target); + r->call.args = stencil_exprs(F, e->args); + return r; +} + +static AstExpr *stencil_ident_expr(AstFolder *F, AstIdent *e) +{ + AstExpr *r = stencil_prep_expr(F, e); + r->name.name = e->name; + return r; +} + +static AstDecl *stencil_func_decl(AstFolder *F, FuncDecl *d) +{ + AstDecl *r = stencil_prep_decl(F, d); + r->func.is_global = d->is_global; + r->func.receiver = NULL; // set during visit_*() + r->func.name = d->name; + r->func.generics = stencil_decls(F, d->generics); + r->func.params = stencil_decls(F, d->params); + r->func.return_ = F->fold_expr(F, d->return_); + r->func.body = stencil_block(F, d->body); + r->func.fn_kind = d->fn_kind; + return r; +} + +static AstStmt *stencil_if_stmt(AstFolder *F, IfStmt *s) +{ + AstStmt *r = stencil_prep_stmt(F, s); + r->if_.cond = F->fold_expr(F, s->cond); + r->if_.then_arm = F->fold_stmt(F, s->then_arm); + r->if_.else_arm = F->fold_stmt(F, s->else_arm); + return r; +} + +static AstStmt *stencil_while_stmt(AstFolder *F, WhileStmt *s) +{ + AstStmt *r = stencil_prep_stmt(F, s); + r->while_.cond = F->fold_expr(F, s->cond); + r->while_.block = stencil_block(F, s->block); + return r; +} + +static AstStmt *stencil_label_stmt(AstFolder *F, LabelStmt *s) +{ + AstStmt *r = stencil_prep_stmt(F, s); + r->label.label = s->label; + return r; +} + +static AstStmt *stencil_for_stmt(AstFolder *F, ForStmt *s) +{ + AstStmt *r = stencil_prep_stmt(F, s); + if (s->kind == STMT_FORNUM) { + r->for_.fornum.begin = F->fold_expr(F, s->fornum.begin); + r->for_.fornum.end = F->fold_expr(F, s->fornum.end); + r->for_.fornum.step = F->fold_expr(F, s->fornum.step); + } else { + r->for_.forin.target = F->fold_expr(F, s->forin.target); + } + r->for_.block = stencil_block(F, s->block); + return r; +} + +static AstExpr *stencil_index_expr(AstFolder *F, Index *e) +{ + AstExpr *r = stencil_prep_expr(F, e); + r->index.target = F->fold_expr(F, e->target); + r->index.elems = stencil_exprs(F, e->elems); + return r; +} + +static AstExpr *stencil_selector_expr(AstFolder *F, Selector *e) +{ + AstExpr *r = stencil_prep_expr(F, e); + r->selector.target = F->fold_expr(F, e->target); + r->selector.name = e->name; + return r; +} + +static AstStmt *stencil_decl_stmt(AstFolder *F, AstDeclStmt *s) +{ + AstStmt *r = stencil_prep_stmt(F, s); + r->decl.decl = F->fold_decl(F, s->decl); + return r; +} + +static void setup_stencil_pass(AstFolder *F, Stenciler *S) +{ + const AstState state = {.S = S}; + pawA_fold_init(F, S->ast, state); + F->fold_literal_expr = stencil_literal_expr; + F->fold_logical_expr = stencil_logical_expr; + F->fold_ident_expr = stencil_ident_expr; + F->fold_chain_expr = stencil_chain_expr; + F->fold_unop_expr = stencil_unop_expr; + F->fold_binop_expr = stencil_binop_expr; + F->fold_cond_expr = stencil_cond_expr; + F->fold_call_expr = stencil_call_expr; + F->fold_index_expr = stencil_index_expr; + F->fold_selector_expr = stencil_selector_expr; + F->fold_item_expr = stencil_item_expr; + F->fold_type_name_expr = stencil_type_name_expr; + F->fold_signature_expr = stencil_signature_expr; + F->fold_block_stmt = stencil_block_stmt; + F->fold_expr_stmt = stencil_expr_stmt; + F->fold_decl_stmt = stencil_decl_stmt; + F->fold_if_stmt = stencil_if_stmt; + F->fold_for_stmt = stencil_for_stmt; + F->fold_while_stmt = stencil_while_stmt; + F->fold_label_stmt = stencil_label_stmt; + F->fold_return_stmt = stencil_return_stmt; + F->fold_var_decl = stencil_var_decl; + F->fold_func_decl = stencil_func_decl; + F->fold_struct_decl = stencil_struct_decl; + F->fold_field_decl = stencil_field_decl; + F->fold_generic_decl = stencil_generic_decl; + F->fold_type_decl = stencil_type_decl; +} + +AstDecl *pawA_stencil(Ast *ast, AstDecl *decl) +{ + Stenciler S = { + .lex = ast->lex, + .ast = ast, + }; + AstFolder F; + setup_stencil_pass(&F, &S); + return F.fold_decl(&F, decl); +} + +typedef struct Printer { + FILE *out; + int indent; +} Printer; + +static void indent_line(Printer *P) +{ + for (int i = 0; i < P->indent; ++i) { + fprintf(P->out, " "); + } +} + +#define dump_fmt(P, fmt, ...) (indent_line(P), fprintf((P)->out, fmt, __VA_ARGS__)) +#define dump_msg(P, msg) (indent_line(P), fprintf((P)->out, msg)) + +static void dump_stmt(Printer *, AstStmt *); +static void dump_expr(Printer *, AstExpr *); + +static void print_decl_kind(Printer *P, void *node) +{ + AstDecl *d = node; + switch (a_kind(d)) { + case DECL_FUNC: + fprintf(P->out, "FuncDecl"); + break; + case DECL_FIELD: + fprintf(P->out, "FieldDecl"); + break; + case DECL_VAR: + fprintf(P->out, "VarDecl"); + break; + case DECL_STRUCT: + fprintf(P->out, "StructDecl"); + break; + case DECL_GENERIC: + fprintf(P->out, "GenericDecl"); + break; + case DECL_TYPE: + fprintf(P->out, "TypeDecl"); + break; + default: + fprintf(P->out, "?"); + } +} + +static void print_expr_kind(Printer *P, void *node) +{ + AstExpr *e = node; + switch (a_kind(e)) { + case EXPR_LITERAL: + fprintf(P->out, "LiteralExpr"); + break; + case EXPR_UNOP: + fprintf(P->out, "UnOpExpr"); + break; + case EXPR_BINOP: + fprintf(P->out, "BinOpExpr"); + break; + case EXPR_CALL: + fprintf(P->out, "CallExpr"); + break; + case EXPR_COND: + fprintf(P->out, "CondExpr"); + break; + case EXPR_NAME: + fprintf(P->out, "AstIdent"); + break; + case EXPR_FUNC_TYPE: + fprintf(P->out, "FuncType"); + break; + case EXPR_TYPE_NAME: + fprintf(P->out, "TypeName"); + break; + default: + fprintf(P->out, "?"); + break; + } +} + +static void print_stmt_kind(Printer *P, void *node) +{ + AstStmt *s = node; + switch (a_kind(s)) { + case STMT_EXPR: + fprintf(P->out, "AstExprStmt"); + break; + case STMT_DECL: + fprintf(P->out, "AstDeclStmt"); + break; + case STMT_BLOCK: + fprintf(P->out, "Block"); + break; + case STMT_IF: + fprintf(P->out, "IfStmt"); + break; + case STMT_FORIN: + case STMT_FORNUM: + fprintf(P->out, "ForStmt"); + break; + case STMT_WHILE: + case STMT_DOWHILE: + fprintf(P->out, "WhileStmt"); + break; + case STMT_RETURN: + fprintf(P->out, "ReturnStmt"); + break; + default: + fprintf(P->out, "?"); + } +} + +static int predump_node(Printer *P, void *node, void (*print)(Printer *, void *)) +{ + if (node != NULL) { + print(P, node); + fprintf(P->out, "(%p) {\n", node); + return 0; + } + return -1; +} + +#define dump_block(P, b) check_exp((b)->kind == STMT_BLOCK, dump_stmt(P, cast_stmt(b))) +#define dump_name(P, s) dump_fmt(P, "name: %s\n", s ? s->text : NULL) + +static void dump_expr(Printer *P, AstExpr *e); +static void dump_decl(Printer *P, AstDecl *d); +static void dump_stmt(Printer *P, AstStmt *s); + +#define make_list_dumper(name, T) \ + static void dump_ ## name ## _list(Printer *P, T ## List *list, const char *name) \ + { \ + dump_fmt(P, "%s: {\n", name); \ + ++P->indent; \ + if (list != NULL) { \ + dump_ ## name(P, list->first); \ + } \ + --P->indent; \ + dump_msg(P, "}\n"); \ + } +make_list_dumper(expr, AstExpr) +make_list_dumper(decl, AstDecl) +make_list_dumper(stmt, AstStmt) + +static void dump_decl(Printer *P, AstDecl *d) +{ + if (predump_node(P, d, print_decl_kind)) { + fprintf(P->out, "(null)\n"); + return; + } + ++P->indent; + dump_fmt(P, "line: %d\n", d->hdr.line); + switch (a_kind(d)) { + case DECL_FUNC: + dump_fmt(P, "is_global: %d\n", d->func.is_global); + dump_fmt(P, "receiver: %p\n", (void *)d->func.receiver); + dump_fmt(P, "name: %s\n", d->func.name->text); + dump_decl_list(P, d->func.generics, "generics"); + dump_decl_list(P, d->func.params, "params"); + dump_msg(P, "return_: "); + dump_expr(P, d->func.return_); + break; + case DECL_FIELD: + dump_name(P, d->field.name); + dump_msg(P, "tag: "); + dump_expr(P, d->field.tag); + break; + case DECL_VAR: + dump_fmt(P, "is_global: %d\n", d->var.is_global); + dump_name(P, d->var.name); + dump_msg(P, "tag: "); + dump_expr(P, d->var.tag); + dump_msg(P, "init: "); + dump_expr(P, d->var.init); + break; + case DECL_STRUCT: + dump_name(P, d->struct_.name); + dump_fmt(P, "type: %d\n", d->struct_.type->hdr.def); + dump_decl_list(P, d->struct_.generics, "generics"); + dump_decl_list(P, d->struct_.fields, "fields"); + dump_decl_list(P, d->struct_.methods, "methods"); + break; + case DECL_GENERIC: + dump_name(P, d->generic.name); + break; + case DECL_TYPE: + dump_name(P, d->type.name); + dump_msg(P, "rhs: "); + dump_expr(P, d->type.rhs); + dump_decl_list(P, d->type.generics, "generics"); + break; + default: + paw_assert(a_is_func_decl(d)); + dump_fmt(P, "is_global: %d\n", d->func.is_global); + dump_name(P, d->func.name); + dump_decl_list(P, d->func.generics, "generics"); + } + --P->indent; + dump_msg(P, "}\n"); + if (d->hdr.next != NULL) { + dump_decl(P, d->hdr.next); + } +} + +static void dump_stmt(Printer *P, AstStmt *s) +{ + if (predump_node(P, s, print_stmt_kind)) { + fprintf(P->out, "(null)\n"); + return; + } + ++P->indent; + dump_fmt(P, "line: %d\n", s->hdr.line); + switch (a_kind(s)) { + case STMT_EXPR: + dump_msg(P, "lhs: "); + dump_expr(P, s->expr.lhs); + dump_msg(P, "rhs: "); + dump_expr(P, s->expr.rhs); + break; + case STMT_BLOCK: + dump_stmt_list(P, s->block.stmts, "stmts"); + break; + case STMT_DECL: + dump_msg(P, "decl: "); + dump_decl(P, s->decl.decl); + break; + case STMT_IF: + dump_msg(P, "cond: "); + dump_expr(P, s->if_.cond); + dump_msg(P, "then_arm: "); + dump_stmt(P, s->if_.then_arm); + dump_msg(P, "else_arm: "); + dump_stmt(P, s->if_.else_arm); + break; + case STMT_FORIN: + dump_name(P, s->for_.name); + dump_msg(P, "target: "); + dump_expr(P, s->for_.forin.target); + dump_msg(P, "block: "); + dump_block(P, s->for_.block); + break; + case STMT_FORNUM: + dump_name(P, s->for_.name); + dump_msg(P, "begin: "); + dump_expr(P, s->for_.fornum.begin); + dump_msg(P, "end: "); + dump_expr(P, s->for_.fornum.end); + dump_msg(P, "step: "); + dump_expr(P, s->for_.fornum.step); + dump_msg(P, "block: "); + dump_block(P, s->for_.block); + break; + case STMT_WHILE: + dump_msg(P, "cond: "); + dump_expr(P, s->while_.cond); + dump_msg(P, "block: "); + dump_block(P, s->while_.block); + break; + case STMT_DOWHILE: + dump_msg(P, "block: "); + dump_block(P, s->while_.block); + dump_msg(P, "cond: "); + dump_expr(P, s->while_.cond); + break; + case STMT_RETURN: + dump_msg(P, "expr: "); + dump_stmt(P, cast_stmt(s->return_.expr)); + break; + default: + break; + } + --P->indent; + dump_msg(P, "}\n"); + if (s->hdr.next != NULL) { + dump_stmt(P, s->hdr.next); + } +} + +static void dump_expr(Printer *P, AstExpr *e) +{ + if (predump_node(P, e, print_expr_kind)) { + fprintf(P->out, "(null)\n"); + return; + } + ++P->indent; + dump_fmt(P, "line: %d\n", e->hdr.line); + switch (a_kind(e)) { + case EXPR_LITERAL: + break; + case EXPR_UNOP: + dump_fmt(P, "op: %d\n", e->unop.op); + dump_msg(P, "target: "); + dump_expr(P, e->unop.target); + break; + case EXPR_BINOP: + dump_fmt(P, "op: %d\n", e->binop.op); + dump_msg(P, "lhs: "); + dump_expr(P, e->binop.lhs); + dump_msg(P, "rhs: "); + dump_expr(P, e->binop.rhs); + break; + case EXPR_CALL: + dump_msg(P, "target: "); + dump_expr(P, e->call.target); + dump_expr_list(P, e->call.args, "args"); + break; + case EXPR_COND: + dump_msg(P, "cond: "); + dump_expr(P, e->cond.cond); + dump_msg(P, "lhs: "); + dump_expr(P, e->cond.lhs); + dump_msg(P, "rhs: "); + dump_expr(P, e->cond.rhs); + break; + case EXPR_NAME: + dump_name(P, e->name.name); + break; + case EXPR_FUNC_TYPE: + dump_expr_list(P, e->func.params, "params"); + dump_msg(P, "return_: "); + dump_expr(P, e->func.return_); + break; + case EXPR_TYPE_NAME: + dump_name(P, e->type_name.name); + dump_expr_list(P, e->type_name.args, "args"); + break; + default: + break; + } + --P->indent; + dump_msg(P, "}\n"); + if (e->hdr.next != NULL) { + dump_expr(P, e->hdr.next); + } +} + +void pawA_dump_decl(FILE *out, AstDecl *decl) +{ + Printer P; + P.out = out; + P.indent = 0; + dump_decl(&P, decl); +} + +void pawA_dump_expr(FILE *out, AstExpr *expr) +{ + Printer P; + P.out = out; + P.indent = 0; + dump_expr(&P, expr); +} + +void pawA_dump_stmt(FILE *out, AstStmt *stmt) +{ + Printer P; + P.out = out; + P.indent = 0; + dump_stmt(&P, stmt); +} diff --git a/src/ast.h b/src/ast.h new file mode 100644 index 0000000..061aa51 --- /dev/null +++ b/src/ast.h @@ -0,0 +1,642 @@ +// Copyright (c) 2024, The paw Authors. All rights reserved. +// This source code is licensed under the MIT License, which can be found in +// LICENSE.md. See AUTHORS.md for a list of contributor names. +#ifndef PAW_AST_H +#define PAW_AST_H + +#include "code.h" +#include "paw.h" +#include "type.h" + +// TODO: Prefix the rest of the structs with 'Ast' +typedef struct Ast Ast; +typedef struct AstVisitor AstVisitor; +typedef struct AstFolder AstFolder; +typedef struct AstDecl AstDecl; +typedef struct AstExpr AstExpr; +typedef struct AstStmt AstStmt; +typedef struct Block Block; + +// Represents an entry in the symbol table +// +// During the type checking pass, a Symbol is created for each declaration that is +// encountered. When an identifier is referenced, it is looked up in the list of +// symbol tables representing the enclosing scopes (as well as the global symbol +// table). +// +// The symbol table is used for all symbols, but not every symbol will end up on the +// stack. In particular, symbols with 'is_type' equal to 1 will not get a stack slot. +typedef struct Symbol { + paw_Bool is_init: 1; + paw_Bool is_type: 1; + paw_Bool is_const: 1; + paw_Bool is_generic: 1; + String *name; // name of the symbol + AstDecl *decl; // corresponding declaration +} Symbol; + +typedef enum VarKind { + VAR_GLOBAL, + VAR_UPVALUE, + VAR_LOCAL, + VAR_FIELD, + VAR_METHOD, +} VarKind; + +typedef struct VarInfo { + Symbol *symbol; + VarKind kind; + int index; +} VarInfo; + +//**************************************************************** +// Node containers +//**************************************************************** + +typedef struct AstDeclList { + AstDecl *first; + int count; +} AstDeclList; + +typedef struct AstExprList { + AstExpr *first; + int count; +} AstExprList; + +typedef struct AstStmtList { + AstStmt *first; + int count; +} AstStmtList; + +//**************************************************************** +// Declarations +//**************************************************************** + +typedef enum AstDeclKind { + DECL_VAR, + DECL_TYPE, + DECL_FUNC, + DECL_STRUCT, + DECL_FIELD, + DECL_GENERIC, +} AstDeclKind; + +#define DECL_HEADER \ + Type *type; \ + struct AstDecl *next; \ + String *name; \ + int line; \ + DefId def; \ + AstDeclKind kind: 8 +typedef struct AstDeclHeader { + DECL_HEADER; // common initial sequence +} AstDeclHeader; + +typedef struct VarDecl { + DECL_HEADER; // common initial sequence + paw_Bool is_global: 1; // uses 'global' keyword + paw_Bool is_const: 1; // uses 'const' keyword + AstExpr *tag; // type annotation + AstExpr *init; // initial value +} VarDecl; + +// Node representing a type declaration +// Used for type aliases and builtin types. +typedef struct TypeDecl { + DECL_HEADER; // common initial sequence + AstExpr *rhs; // type right of '=' + AstDeclList *generics; +} TypeDecl; + +typedef struct FuncDecl { + DECL_HEADER; // common initial sequence + paw_Bool is_global: 1; // 1 for global functions, 0 otherwise + paw_Bool is_poly: 1; // 1 for templates, 0 otherwise + paw_Bool is_visited: 1; // 1 if types resolved, 0 otherwise + FuncKind fn_kind: 5; // kind of function (module, method, etc.) + AstDecl *receiver; // pointer to receiver (StructDecl) + AstDecl *sibling; // pointer to next method (FuncDecl) + UniTable *unify; // unification table + Scope *scope; // function-scoped symbols, including generics + AstDeclList *generics; // generic type parameters (FieldDecl) + AstDeclList *params; // parameter declarations + AstExpr *return_; // return type + Block *body; // function body +} FuncDecl; + +// TODO: Need to prevent recursive types, or introduce the concept of indirection... +typedef struct StructDecl { + DECL_HEADER; // common initial sequence + paw_Bool is_global: 1; // uses 'global' keyword + paw_Bool is_poly: 1; // 1 for templates, 0 otherwise + paw_Bool is_visited: 1; // 1 if types resolved, 0 otherwise + UniTable *unify; // unification table + Scope *scope; // scope for struct-level symbols + Scope *field_scope; + Scope *method_scope; + AstDeclList *fields; // list of FieldDecl + AstDeclList *methods; // list of FuncDecl + AstDeclList *generics; // generic type parameters (GenericDecl) +} StructDecl; + +// Represents a template instance +typedef struct InstanceDecl { + DECL_HEADER; // common initial sequence + UniTable *unify; // unification table +} InstanceDecl; + +// AST node representing a 'Field' production +typedef struct FieldDecl { + DECL_HEADER; // common initial sequence + AstExpr *tag; // type annotation +} FieldDecl; + +typedef struct GenericDecl { + DECL_HEADER; // common initial sequence +} GenericDecl; + +typedef struct AstDecl { + union { + AstDeclHeader hdr; + VarDecl var; + FuncDecl func; + StructDecl struct_; + FieldDecl field; + GenericDecl generic; + TypeDecl type; + }; +} AstDecl; + +#define NO_DECL UINT16_MAX + +//**************************************************************** +// AstExpressions +//**************************************************************** + +typedef enum AstExprKind { + EXPR_NAME, + EXPR_CALL, + EXPR_LITERAL, + EXPR_CHAIN, + EXPR_UNOP, + EXPR_BINOP, + EXPR_COALESCE, + EXPR_LOGICAL, + EXPR_COND, + EXPR_INDEX, + EXPR_ACCESS, + EXPR_SELECTOR, + EXPR_INVOKE, + EXPR_SYMBOL, + EXPR_ITEM, + EXPR_FUNC_TYPE, + EXPR_TYPE_NAME, +} AstExprKind; + +#define EXPR_HEADER \ + int line; \ + AstExprKind kind: 8; \ + Type *type; \ + struct AstExpr *next +typedef struct AstExprHeader { + EXPR_HEADER; +} AstExprHeader; + +// TODO: rename LiteralType -> LiteralKind +typedef enum LiteralType { + LIT_BASIC, + LIT_COMPOSITE, + LIT_TUPLE, + LIT_ARRAY, +} LiteralType; + +typedef struct BasicLit { + Value value; + paw_Type t; +} BasicLit; + +typedef struct ArrayLit { + AstExprList *elems; +} ArrayLit; + +typedef struct TupleLit { + AstExprList *elems; +} TupleLit; + +typedef struct CompositeLit { + AstExpr *target; + AstExprList *items; +} CompositeLit; + +typedef struct LiteralExpr { + EXPR_HEADER; + LiteralType lit_kind; + union { + BasicLit basic; + TupleLit tuple; + ArrayLit array; + CompositeLit comp; + }; +} LiteralExpr; + +typedef struct AstIdent { + EXPR_HEADER; + String *name; +} AstIdent; + +typedef struct ItemExpr { + EXPR_HEADER; + int index; + String *name; // attribute name + AstExpr *value; +} ItemExpr; + +typedef struct UnOpExpr { + EXPR_HEADER; + UnaryOp op: 8; + AstExpr *target; +} UnOpExpr; + +typedef struct BinOpExpr { + EXPR_HEADER; + BinaryOp op: 8; + AstExpr *lhs; + AstExpr *rhs; +} BinOpExpr; + +typedef struct CondExpr { + EXPR_HEADER; + AstExpr *cond; + AstExpr *lhs; + AstExpr *rhs; +} CondExpr; + +typedef struct LogicalExpr { + EXPR_HEADER; + paw_Bool is_and: 1; + AstExpr *lhs; + AstExpr *rhs; +} LogicalExpr; + +#define SUFFIXED_HEADER EXPR_HEADER; \ + AstExpr *target +typedef struct SuffixedExpr { + SUFFIXED_HEADER; +} SuffixedExpr; + +typedef struct ChainExpr { + SUFFIXED_HEADER; +} ChainExpr; + +typedef struct CallExpr { + SUFFIXED_HEADER; + Type *func; + AstExprList *args; +} CallExpr; + +typedef struct Selector { + SUFFIXED_HEADER; // common fields + paw_Bool is_method: 1; // 1 if selecting a method, 0 otherwise + String *name; // name of the field +} Selector; + +typedef struct Access { + SUFFIXED_HEADER; // common fields + String *name; // field name +} Access; + +typedef struct Index { + SUFFIXED_HEADER; // common fields + AstExprList *elems; // list of elements +} Index; + +// A valid TypeName is related to a AstDecl through the symbol table. +typedef struct TypeName { + EXPR_HEADER; // common initial sequence + String *name; // name of the struct or enum + AstExprList *args; +} TypeName; + +typedef struct FuncType { + EXPR_HEADER; // common initial sequence + AstExpr *return_; // return type annotation + AstExprList *params; // parameter types +} FuncType; + +typedef struct AstExpr { + union { + AstExprHeader hdr; + LiteralExpr literal; + LogicalExpr logical; + AstIdent name; + ChainExpr chain; + UnOpExpr unop; + BinOpExpr binop; + CondExpr cond; + SuffixedExpr suffix; + CallExpr call; + Index index; + Access access; + Selector selector; + ItemExpr item; + TypeName type_name; + FuncType func; + }; +} AstExpr; + +//**************************************************************** +// Statements +//**************************************************************** + +typedef enum AstStmtKind { + STMT_EXPR, + STMT_DECL, + STMT_BLOCK, + STMT_IF, + STMT_FORIN, + STMT_FORNUM, + STMT_WHILE, + STMT_DOWHILE, + STMT_LABEL, + STMT_RETURN, +} AstStmtKind; + +#define STMT_HEADER \ + int line; \ + AstStmtKind kind: 8; \ + struct AstStmt *next +typedef struct AstStmtHeader { + STMT_HEADER; +} AstStmtHeader; + +typedef struct AstDeclStmt { + STMT_HEADER; + AstDecl *decl; +} AstDeclStmt; + +typedef struct AstExprStmt { + STMT_HEADER; + AstExpr *lhs; + AstExpr *rhs; +} AstExprStmt; + +typedef struct Block { + STMT_HEADER; + Scope *scope; // scope for block + AstStmtList *stmts; +} Block; + +typedef struct ReturnStmt { + STMT_HEADER; + AstExpr *expr; +} ReturnStmt; + +typedef struct IfStmt { + STMT_HEADER; + AstExpr *cond; + AstStmt *then_arm; // BlockStmt + AstStmt *else_arm; // BlockStmt | IfStmt +} IfStmt; + +typedef struct WhileStmt { + STMT_HEADER; + Scope *scope; + AstExpr *cond; + Block *block; +} WhileStmt; + +typedef struct LabelStmt { + STMT_HEADER; + LabelKind label; +} LabelStmt; + +typedef struct ForIn { + AstExpr *target; +} ForIn; + +typedef struct ForNum { + AstExpr *begin; + AstExpr *end; + AstExpr *step; +} ForNum; + +typedef struct ForStmt { + STMT_HEADER; + Scope *scope; // scope for entire loop + String *name; // loop control variable name + Block *block; // body of loop + union { + ForIn forin; + ForNum fornum; + }; +} ForStmt; + +typedef struct AstStmt { + union { + AstStmtHeader hdr; + Block block; + AstExprStmt expr; + AstDeclStmt decl; + IfStmt if_; + ForStmt for_; + WhileStmt while_; + LabelStmt label; + ReturnStmt return_; + }; +} AstStmt; + +// Pointer to a context variable for each compilation pass. +typedef union AstState { + void *state; + struct Resolver *R; // symbol resolution state (pass 2) + struct Stenciler *S; // template expansion state (pass 3) + struct Generator *G; // code generation state (pass 4) + struct Checker *C; +} AstState; + +// TODO: Should be able to use the entrypoint routines on list elements, may need slightly more specific nodes, like ParamDecl for parameters instead of overloading FieldDecl +typedef void (*AstExprPass)(AstVisitor *pass, AstExpr *e); +typedef void (*AstStmtPass)(AstVisitor *pass, AstStmt *s); +typedef void (*AstDeclPass)(AstVisitor *pass, AstDecl *d); + +// Represents a single pass over an AST +struct AstVisitor { + AstState state; + Ast *ast; + + // Entrypoints for each type of node + AstExprPass visit_expr; + AstStmtPass visit_stmt; + AstDeclPass visit_decl; + + void (*visit_expr_list)(AstVisitor *V, AstExprList *list, AstExprPass cb); + void (*visit_decl_list)(AstVisitor *V, AstDeclList *list, AstDeclPass cb); + void (*visit_stmt_list)(AstVisitor *V, AstStmtList *list, AstStmtPass cb); + + // Special case for methods (FuncDecl), which are linked by the 'sibling' field, + // rather than the 'next' field. 'next' is used by method template instances. + void (*visit_method_list)(AstVisitor *V, AstDeclList *list, AstDeclPass cb); + + void (*visit_literal_expr)(AstVisitor *V, LiteralExpr *e); + void (*visit_logical_expr)(AstVisitor *V, LogicalExpr *e); + void (*visit_ident_expr)(AstVisitor *V, AstIdent *e); + void (*visit_chain_expr)(AstVisitor *V, ChainExpr *e); + void (*visit_unop_expr)(AstVisitor *V, UnOpExpr *e); + void (*visit_binop_expr)(AstVisitor *V, BinOpExpr *e); + void (*visit_cond_expr)(AstVisitor *V, CondExpr *e); + void (*visit_suffix_expr)(AstVisitor *V, SuffixedExpr *e); + void (*visit_call_expr)(AstVisitor *V, CallExpr *e); + void (*visit_index_expr)(AstVisitor *V, Index *e); + void (*visit_access_expr)(AstVisitor *V, Access *e); + void (*visit_selector_expr)(AstVisitor *V, Selector *e); + void (*visit_item_expr)(AstVisitor *V, ItemExpr *e); + void (*visit_type_name_expr)(AstVisitor *V, TypeName *e); + void (*visit_signature_expr)(AstVisitor *V, FuncType *e); + + void (*visit_block_stmt)(AstVisitor *V, Block *s); + void (*visit_expr_stmt)(AstVisitor *V, AstExprStmt *s); + void (*visit_decl_stmt)(AstVisitor *V, AstDeclStmt *s); + void (*visit_if_stmt)(AstVisitor *V, IfStmt *s); + void (*visit_for_stmt)(AstVisitor *V, ForStmt *s); + void (*visit_while_stmt)(AstVisitor *V, WhileStmt *s); + void (*visit_dowhile_stmt)(AstVisitor *V, WhileStmt *s); + void (*visit_label_stmt)(AstVisitor *V, LabelStmt *s); + void (*visit_return_stmt)(AstVisitor *V, ReturnStmt *s); + + void (*visit_var_decl)(AstVisitor *V, VarDecl *d); + void (*visit_func_decl)(AstVisitor *V, FuncDecl *d); + void (*visit_struct_decl)(AstVisitor *V, StructDecl *d); + void (*visit_field_decl)(AstVisitor *V, FieldDecl *d); + void (*visit_generic_decl)(AstVisitor *V, GenericDecl *d); + void (*visit_type_decl)(AstVisitor *V, TypeDecl *d); +}; + +void pawA_visitor_init(AstVisitor *V, Ast *ast, AstState state); +void pawA_visit(AstVisitor *V); + +typedef AstExpr *(*AstExprFold)(AstFolder *F, AstExpr *e); +typedef AstStmt *(*AstStmtFold)(AstFolder *F, AstStmt *s); +typedef AstDecl *(*AstDeclFold)(AstFolder *F, AstDecl *d); + +struct AstFolder { + AstState state; + Ast *ast; + + // Entrypoints for each type of node + AstExprFold fold_expr; + AstStmtFold fold_stmt; + AstDeclFold fold_decl; + + void (*fold_expr_list)(AstFolder *F, AstExprList *list, AstExprFold cb); + void (*fold_decl_list)(AstFolder *F, AstDeclList *list, AstDeclFold cb); + void (*fold_stmt_list)(AstFolder *F, AstStmtList *list, AstStmtFold cb); + void (*fold_method_list)(AstFolder *F, AstDeclList *list, AstDeclFold cb); + + AstExpr *(*fold_literal_expr)(AstFolder *F, LiteralExpr *e); + AstExpr *(*fold_logical_expr)(AstFolder *F, LogicalExpr *e); + AstExpr *(*fold_ident_expr)(AstFolder *F, AstIdent *e); + AstExpr *(*fold_chain_expr)(AstFolder *F, ChainExpr *e); + AstExpr *(*fold_unop_expr)(AstFolder *F, UnOpExpr *e); + AstExpr *(*fold_binop_expr)(AstFolder *F, BinOpExpr *e); + AstExpr *(*fold_cond_expr)(AstFolder *F, CondExpr *e); + AstExpr *(*fold_suffix_expr)(AstFolder *F, SuffixedExpr *e); + AstExpr *(*fold_call_expr)(AstFolder *F, CallExpr *e); + AstExpr *(*fold_index_expr)(AstFolder *F, Index *e); + AstExpr *(*fold_access_expr)(AstFolder *F, Access *e); + AstExpr *(*fold_selector_expr)(AstFolder *F, Selector *e); + AstExpr *(*fold_item_expr)(AstFolder *F, ItemExpr *e); + AstExpr *(*fold_type_name_expr)(AstFolder *F, TypeName *e); + AstExpr *(*fold_signature_expr)(AstFolder *F, FuncType *e); + + AstStmt *(*fold_block_stmt)(AstFolder *F, Block *s); + AstStmt *(*fold_expr_stmt)(AstFolder *F, AstExprStmt *s); + AstStmt *(*fold_decl_stmt)(AstFolder *F, AstDeclStmt *s); + AstStmt *(*fold_if_stmt)(AstFolder *F, IfStmt *s); + AstStmt *(*fold_for_stmt)(AstFolder *F, ForStmt *s); + AstStmt *(*fold_while_stmt)(AstFolder *F, WhileStmt *s); + AstStmt *(*fold_label_stmt)(AstFolder *F, LabelStmt *s); + AstStmt *(*fold_return_stmt)(AstFolder *F, ReturnStmt *s); + + AstDecl *(*fold_var_decl)(AstFolder *F, VarDecl *d); + AstDecl *(*fold_func_decl)(AstFolder *F, FuncDecl *d); + AstDecl *(*fold_struct_decl)(AstFolder *F, StructDecl *d); + AstDecl *(*fold_field_decl)(AstFolder *F, FieldDecl *d); + AstDecl *(*fold_generic_decl)(AstFolder *F, GenericDecl *d); + AstDecl *(*fold_type_decl)(AstFolder *F, TypeDecl *d); +}; + +void pawA_folder_init(AstFolder *F, Ast *ast, AstState state); +void pawA_fold(AstFolder *F); + +typedef struct Ast { + Pool nodes; + Pool symbols; + Pool sequences; + AstStmtList *stmts; + Lex *lex; +} Ast; + +//**************************************************************** +// Helper routines +//**************************************************************** + +Symbol *pawA_new_symbol(Lex *lex); +AstDecl *pawA_new_decl(Ast *ast, AstDeclKind kind); +AstExpr *pawA_new_expr(Ast *ast, AstExprKind kind); +AstStmt *pawA_new_stmt(Ast *ast, AstStmtKind kind); +AstDeclList *pawA_new_decl_list(Ast *ast); +AstExprList *pawA_new_expr_list(Ast *ast); +AstStmtList *pawA_new_stmt_list(Ast *ast); + +void *pawA_new_pointer_vec(Ast *ast, int nptrs); + +#define cast_decl(x) ((AstDecl *)(x)) +#define cast_expr(x) ((AstExpr *)(x)) +#define cast_stmt(x) ((AstStmt *)(x)) + +//**************************************************************** +// AST manipulation +//**************************************************************** + +Ast *pawA_new_ast(Lex *lex); +void pawA_free_ast(Ast *ast); + +AstDecl *pawA_stencil(Ast *ast, AstDecl *decl); + +//**************************************************************** +// AST helpers +//**************************************************************** + +#define a_type(x) ((x)->hdr.type) +#define a_kind(x) ((x)->hdr.kind) +#define a_next(x) ((x)->hdr.next) + +// Macros for checking node types +#define a_is_basic(e) (a_kind(e) == EXPR_BASIC_TYPE) +#define a_is_unit(e) (a_is_basic(e) && (e)->basic.code == PAW_TUNIT) +#define a_is_bool(e) (a_is_basic(e) && (e)->basic.code == PAW_TBOOL) +#define a_is_int(e) (a_is_basic(e) && (e)->basic.code == PAW_TINT) +#define a_is_float(e) (a_is_basic(e) && (e)->basic.code == PAW_TFLOAT) +#define a_is_string(e) (a_is_basic(e) && (e)->basic.code == PAW_TSTRING) + +#define a_is_struct_layout(e) (a_kind(e) == EXPR_STRUCT_LAYOUT) + +#define a_is_generic_type(e) (a_kind(e) == EXPR_GENERIC_TYPE) +#define a_is_named_type(e) (a_kind(e) == EXPR_TYPE_NAME) +#define a_is_func_type(e) (a_kind(e) == EXPR_FUNC_TYPE) +#define a_is_generic_decl(e) (a_kind(e) == DECL_GENERIC) +#define a_is_struct_decl(d) (a_kind(d) == DECL_STRUCT) +#define a_is_func_decl(d) (a_kind(d) == DECL_FUNC) + +#define a_has_receiver(d) (a_is_func_decl(d) && (d)->func.receiver != NULL) +#define a_is_template_decl(d) (a_is_func_template_decl(d) || \ + a_is_struct_template_decl(d)) + +#define a_is_func_template_decl(d) (a_is_func_decl(d) && d->func.is_poly) +#define a_is_struct_template_decl(d) (a_is_struct_decl(d) && d->struct_.is_poly) + +void pawA_dump_decl(FILE *out, AstDecl *decl); +void pawA_dump_expr(FILE *out, AstExpr *expr); +void pawA_dump_stmt(FILE *out, AstStmt *stmt); + +#endif // PAW_AST_H diff --git a/src/bigint.c b/src/bigint.c index 949be74..7e3a0ca 100644 --- a/src/bigint.c +++ b/src/bigint.c @@ -6,7 +6,7 @@ TODO: Make BigInt a separate type from int, needs special GC instructions that w emit for every single int. #include "bigint.h" #include "auxlib.h" -#include "gc.h" +#include "gc_aux.h" #include "mem.h" #include "rt.h" diff --git a/src/call.c b/src/call.c index fbaaef6..a40e11c 100644 --- a/src/call.c +++ b/src/call.c @@ -109,6 +109,8 @@ void pawC_stack_overflow(paw_Env *P) void pawC_stack_grow(paw_Env *P, int n) { + paw_assert(n > 0); + paw_assert(P->bound.p >= P->stack.p); const int alloc = cast_size(P->bound.p - P->stack.p); pawC_stack_realloc(P, next_alloc(alloc, n)); } @@ -168,7 +170,7 @@ static void handle_ccall(paw_Env *P, StackPtr base, Native *ccall) cf->top.p = base; // call the C function - const int nret = ccall->call(P); // TODO: Multi-return + const int nret = ccall->func(P); base = restore_pointer(P, pos); call_return(P, base, nret); //pawR_close_upvalues(P, base); diff --git a/src/check.c b/src/check.c index 4a7b804..6fb7fb6 100644 --- a/src/check.c +++ b/src/check.c @@ -1,72 +1,323 @@ // Copyright (c) 2024, The paw Authors. All rights reserved. // This source code is licensed under the MIT License, which can be found in // LICENSE.md. See AUTHORS.md for a list of contributor names. +// +// check.c: Implementation of the type checker. This code transforms an AST +// from the parser into a graph by unifying types based on lexical scope. +#include "ast.h" #include "array.h" #include "check.h" #include "code.h" #include "env.h" -#include "gc.h" +#include "gc_aux.h" #include "map.h" #include "mem.h" #include "str.h" #include "type.h" -static Type *cannonicalize(Lex *lex, const Type *type) +// Helper macros +#define syntax_error(C, ...) pawX_error((C)->lex, __VA_ARGS__) +#define type_error(C, ...) pawX_error((C)->lex, __VA_ARGS__) +#define resolve_expr(V, e) ((V)->visit_expr(V, e), a_type(e)) +#define cached_str(C, i) pawE_cstr(env((C)->lex), cast_size(i)) +#define basic_decl(C, code) basic_symbol(C, code)->decl +#define type2code(e) (y_is_basic(e) ? (e)->hdr.def : -1) +#define is_unit(e) (type2code(e) == PAW_TUNIT) +#define normalize(C, e) pawP_normalize(C->U, e) +#define flag2code(flag) (-(flag) - 1) + +// Entrypoint for type unification +#define unify(C, a, b) pawP_unify(C->U, a, b) + +// Common state for type-checking routines +typedef struct Checker { + Lex *lex; // lexical state + StructDecl *struct_; // enclosing struct declaration + Type *return_; // enclosing function return type + SymbolTable *sym; // scoped symbol table + Ast *ast; // AST being checked + ParseMemory *pm; // dynamic memory + Unifier *U; // unification tables + GenericState *gs; // generic context + AstVisitor *V1; + int func_depth; // number of nested functions + int nexpanded; +} Checker; + +static Type *get_raw_type(Checker *C, DefId id) +{ + paw_assert(id < C->pm->decls.size); + return a_type(C->pm->decls.data[id]); +} + +static Type *get_type(Checker *C, DefId id) +{ + paw_assert(id < C->pm->decls.size); + Type *raw = get_raw_type(C, id); + return normalize(C, raw); +} + +#define are_types_same(a, b) ((a) == (b)) + +static paw_Bool test_types(Checker *C, Type *a, Type *b); + +static paw_Bool test_binders(Checker *C, Binder *a, Binder *b) +{ + for (int i = 0; i < a->count; ++i) { + if (!test_types(C, a->types[i], b->types[i])) { + return PAW_FALSE; + } + } + return PAW_TRUE; +} + +static paw_Bool test_types(Checker *C, Type *a, Type *b) +{ + if (y_kind(a) != y_kind(b)) { + return PAW_FALSE; + } + switch (y_kind(a)) { + case TYPE_FUNC: + return test_binders(C, &a->func.params, &b->func.params); + case TYPE_ADT: { + if (a->adt.target != b->adt.target) { + return PAW_FALSE; + } + return test_binders(C, &a->adt.types, &b->adt.types); + } + default: + return are_types_same(a, b); + } +} + +static Symbol *basic_symbol(Checker *C, paw_Type code) { - return pawY_add_type(env(lex), env(lex)->mod, type); + paw_assert(code >= 0 && code <= PAW_TSTRING); + + // basic types have fixed locations + Scope *toplevel = C->sym->scopes[0]; + return toplevel->symbols[1 + code]; // TODO } -static void push_symbol_table(Lex *lex) +static void push_symbol_table(Checker *C) { - pawP_add_scope(lex, &lex->pm->st); + pawP_new_scope(C->lex, C->sym); } -static void pop_symbol_table(Lex *lex) +static void pop_symbol_table(Checker *C) { // Last symbol table should have been assigned to an AST node. The // next call to push_symbol_table() will allocate a new table. - SymbolTable *st = &lex->pm->st; + SymbolTable *st = C->sym; paw_assert(st->nscopes > 0); --st->nscopes; } -static Scope *get_symbols(Lex *lex) +static DefId add_decl(Checker *C, AstDecl *decl) +{ + paw_Env *P = env(C->lex); + ParseMemory *pm = C->pm; + pawM_grow(P, pm->decls.data, pm->decls.size, pm->decls.alloc); + const DefId id = pm->decls.size++; + pm->decls.data[id] = decl; + decl->hdr.def = id; + return id; +} + +static AstDecl *get_decl(Checker *C, DefId id) +{ + ParseMemory *pm = C->pm; + paw_assert(id < pm->decls.size); + return pm->decls.data[id]; +} + +static Type *new_type(Checker *C, DefId id, TypeKind kind) +{ + paw_Env *P = env(C->lex); + Type *type = pawY_type_new(P, P->mod); + type->hdr.kind = kind; + type->hdr.def = id; + if (id != NO_DECL) { + // set type of associated definition + AstDecl *d = get_decl(C, id); + d->hdr.type = type; + } + return type; +} + +// TODO: Move this elsewhere, and have it write to a Buffer: use for error messages +static void dump_type(Checker *C, Type *type); + +static void dump_binder(Checker *C, Binder *binder) +{ + for (int i = 0; i < binder->count; ++i) { + dump_type(C, binder->types[i]); + if (i < binder->count - 1) { + printf(", "); + } + } +} + +static void dump_type(Checker *C, Type *type) +{ + paw_Env *P = env(C->lex); + const String *basic[] = { + pawE_cstr(P, CSTR_UNIT), + pawE_cstr(P, CSTR_BOOL), + pawE_cstr(P, CSTR_INT), + pawE_cstr(P, CSTR_FLOAT), + pawE_cstr(P, CSTR_STRING), + }; + switch (y_kind(type)) { + case TYPE_VAR: { + TypeVar *var = &type->var; + printf("%s", var->name->text); + break; + } + case TYPE_ADT: { + Adt *adt = &type->adt; + AstDecl *decl = get_decl(C, adt->target); + printf("%s", decl->struct_.name->text); + if (adt->types.count > 0) { + printf("["); + dump_binder(C, &adt->types); + printf("]"); + } + break; + } + case TYPE_FUNC: { + FuncSig *func = &type->func; + if (func->types.count > 0) { + printf("["); + dump_binder(C, &func->types); + printf("]"); + } + printf("("); + dump_binder(C, &func->params); + printf(") -> "); + dump_type(C, func->return_); + break; + } + default: + paw_assert(y_is_basic(type)); + printf("%s", basic[type->hdr.def]->text); + } +} + +static Type *type_collector(AstVisitor *V, AstExpr *expr) +{ + return resolve_expr(V, expr); +} + +static Type *param_collector(AstVisitor *V, AstDecl *decl) +{ + FieldDecl *d = &decl->field; + d->type = resolve_expr(V, d->tag); + return d->type; +} + +static Type *generic_collector(AstVisitor *V, AstDecl *decl) +{ + GenericDecl *d = &decl->generic; + DefId id = add_decl(V->state.C, decl); + d->type = new_type(V->state.C, id, TYPE_VAR); + d->type->var.name = d->name; + return d->type; +} + +static Binder *temp_binder(Checker *C, int count) +{ + ParseMemory *pm = C->pm; + if (pm->temp.size == paw_countof(pm->temp.data)) { + syntax_error(C, "too many nested binders"); + } + Binder *binder = &pm->temp.data[pm->temp.size++]; + binder->types = pawM_new_vec(env(C->lex), count, Type *); + binder->count = count; + return binder; +} + +#define make_collector(name, T, collect) \ + static Binder *collect_ ## name(AstVisitor *V, T ## List *list) \ + { \ + Checker *C = V->state.C; \ + Binder *binder = temp_binder(C, list->count); \ + T *node = list->first; \ + for (int i = 0; i < list->count; ++i) { \ + Type *type = collect(V, node); \ + binder->types[i] = type; \ + node = node->hdr.next; \ + } \ + return binder; \ + } +make_collector(types, AstExpr, type_collector) +make_collector(params, AstDecl, param_collector) +make_collector(generics, AstDecl, generic_collector) + +static void lose_binders(Checker *C, int count) { - SymbolTable *st = &lex->pm->st; + ParseMemory *pm = C->pm; + paw_assert(pm->temp.size >= count); + pm->temp.size -= count; +} + +static void free_last_binder(Checker *C) +{ + ParseMemory *pm = C->pm; + paw_assert(pm->temp.size > 0); + pawM_free_vec(env(C->lex), pm->temp.data, pm->temp.size); + --pm->temp.size; +} + +static void enter_generic_ctx(Checker *C, GenericState *gs, UniTable *table) +{ + gs->outer = C->gs; + pawP_unifier_enter(C->U, table); + C->gs = gs; +} + +static UniTable *leave_generic_ctx(Checker *C) +{ + C->gs = C->gs->outer; + return pawP_unifier_leave(C->U); +} + +static Scope *enclosing_scope(Checker *C) +{ + SymbolTable *st = C->sym; return st->scopes[st->nscopes - 1]; } -static Symbol *register_var(Lex *lex, Scope *st, String *name, Type *type) +static Symbol *add_symbol(Checker *C, Scope *scope, String *name, AstDecl *decl) { - Symbol *sym = pawP_add_symbol(lex, st); - sym->name = name; - sym->type = type; - return sym; + Symbol *symbol = pawP_add_symbol(C->lex, scope); + symbol->name = name; + symbol->decl = decl; + return symbol; } -static Symbol *add_local(Lex *lex, String *name, Type *type) +static Symbol *add_local(Checker *C, String *name, AstDecl *decl) { - Scope *st = get_symbols(lex); - return register_var(lex, st, name, type); + return add_symbol(C, enclosing_scope(C), name, decl); } -static Symbol *add_global(Lex *lex, String *name, Type *type) +static Symbol *add_global(Checker *C, String *name, AstDecl *decl) { - Scope *st = lex->pm->st.globals; + Scope *st = C->sym->globals; for (int i = 0; i < st->nsymbols; ++i) { Symbol *sym = st->symbols[i]; if (pawS_eq(sym->name, name)) { - pawX_error(lex, "duplicate global '%s'", name->text); + syntax_error(C, "duplicate global '%s'", name->text); } } - return register_var(lex, st, name, type); + return add_symbol(C, st, name, decl); } -static Symbol *resolve_symbol(Lex *lex, String *name) +static Symbol *resolve_symbol(Checker *C, String *name) { // search the scoped symbols - SymbolTable *scopes = &lex->pm->st; + SymbolTable *scopes = C->sym; for (int depth = scopes->nscopes - 1; depth >= 0; --depth) { Scope *scope = scopes->scopes[depth]; const int index = pawP_find_symbol(scope, name); @@ -77,236 +328,290 @@ static Symbol *resolve_symbol(Lex *lex, String *name) // search the global symbols const int index = pawP_find_symbol(scopes->globals, name); if (index < 0) { - pawX_error(lex, "undefined symbol '%s'", name->text); + syntax_error(C, "undefined symbol '%s'", name->text); } return scopes->globals->symbols[index]; } -static Symbol *resolve_var(Lex *lex, String *name) +static AstDecl *resolve_attr(AstDeclList *attrs, String *name) { - Symbol *var = resolve_symbol(lex, name); - if (var->is_type) { - pawX_error(lex, "identifier '%s' is not a variable", var->name->text); + AstDecl *decl = attrs->first; + while (decl != NULL) { + if (pawS_eq(name, decl->hdr.name)) { + return decl; + } + if (a_kind(decl) == DECL_FUNC) { + decl = decl->func.sibling; + } else { + decl = decl->hdr.next; + } } - return var; + return NULL; } // Register the name and type of a variable // If 'global' is true, then the variable is a global, otherwise, it is a local. -// Must be called prior to 'define_var', -static Symbol *declare_var(Lex *lex, String *name, Type *tag, paw_Bool global) +// Must be called prior to 'define_symbol', +static Symbol *declare_symbol(Checker *C, String *name, AstDecl *decl, paw_Bool global) { - return global ? add_global(lex, name, tag) - : add_local(lex, name, tag); + return global ? add_global(C, name, decl) + : add_local(C, name, decl); } // Allow a previously-declared variable to be accessed -static void define_var(Symbol *symbol) +static void define_symbol(Symbol *symbol) { symbol->is_init = PAW_TRUE; } -static void new_var(Lex *lex, String *name, Type *tag, paw_Bool global) +static Symbol *new_symbol(Checker *C, String *name, AstDecl *decl, paw_Bool global) { - Symbol *symbol = declare_var(lex, name, tag, global); - define_var(symbol); + Symbol *symbol = declare_symbol(C, name, decl, global); + define_symbol(symbol); + return symbol; } -static void new_local_var(Lex *lex, String *name, Type *tag) -{ - new_var(lex, name, tag, PAW_FALSE); -} +#define new_local(C, name, decl) new_symbol(C, name, decl, PAW_FALSE) +#define new_global(C, name, decl) new_symbol(C, name, decl, PAW_FALSE) -static Scope *leave_block(Lex *lex) +static Scope *leave_block(Checker *C) { - Scope *st = get_symbols(lex); - pop_symbol_table(lex); - return st; + Scope *scope = enclosing_scope(C); + pop_symbol_table(C); + return scope; } -static void enter_block(Lex *lex) +static void enter_block(Checker *C, Scope *scope) { - push_symbol_table(lex); + if (scope == NULL) { + push_symbol_table(C); + } else { + // use an existing scope + pawP_add_scope(C->lex, C->sym, scope); + } } -static Scope *leave_function(Lex *lex) +static Scope *leave_function(Checker *C) { - Scope *scope = get_symbols(lex); - pop_symbol_table(lex); // leave function body - check_gc(env(lex)); - --lex->fn_depth; + Scope *scope = leave_block(C); + check_gc(env(C->lex)); + --C->func_depth; return scope; } -static String *context_name(const Lex *lex, String *name, FnKind kind) +static void enter_function(Checker *C, String *name, Scope *scope, FuncDecl *func, FuncKind kind) { - if (fn_has_self(kind)) { - return v_string(pawE_cstr(env(lex), CSTR_SELF)); + // Enter the function body. + ++C->func_depth; + enter_block(C, scope); + + // for methods, slot 0 is the context variable ('self') + if (kind == FUNC_METHOD) { + name = cached_str(C, CSTR_SELF); + new_local(C, name, func->receiver); + } else { + new_local(C, name, cast_decl(func)); } - return name; } -static void enter_function(Lex *lex, String *name, Type *sig, FnKind kind) +static void new_local_literal(Checker *C, const char *name, paw_Type code) { - ++lex->fn_depth; - - // Enter the function body. - enter_block(lex); - - // Create the context variable in slot 0. For VCLOSURE, this slot holds the closure - // object being called. For VMETHOD, it holds the class instance that the method is - // being called on, i.e. the implicit 'self' parameter. - new_local_var(lex, context_name(lex, name, kind), sig); + Symbol *symbol = basic_symbol(C, code); + new_local(C, scan_string(C->lex, name), symbol->decl); } -static void new_local_literal(Lex *lex, const char *name, int type) +static void visit_block_stmt(AstVisitor *V, Block *block) { - new_local_var(lex, scan_string(lex, name), x_base_type(lex, type)); + enter_block(V->state.C, block->scope); + V->visit_stmt_list(V, block->stmts, V->visit_stmt); + block->scope = leave_block(V->state.C); } -static void type_error(Visitor *V) +static StructDecl *instantiate_struct(AstVisitor *V, StructDecl *base, Binder *types); + +static void create_type_vars(Checker *C, AstDeclList *types) { - pawX_error(V->lex, "invalid type"); + int index = 0; + Unifier *U = C->U; + AstDecl *decl = types->first; + do { + Type *type = get_raw_type(C, decl->hdr.def); + type->var.depth = U->depth; + type->var.index = index++; + pawP_new_type_var(U, type); + decl = decl->hdr.next; + } while (decl != NULL); } -static Type *get_type(Visitor *V, int type) +static void check_template_param(Checker *C, AstDeclList *params, Binder *args) { - if (type < 0) { - type_error(V); + if (args->count > params->count) { + syntax_error(C, "too many generics"); + } else if (args->count < params->count) { + syntax_error(C, "not enough generics"); } - return x_base_type(V->lex, type); } -static void expected_type(Visitor *V, Type *have, Type *want) +static StructDecl *init_struct_template(AstVisitor *V, StructDecl *base, Binder *types) { - pawX_error(V->lex, "expected '%s' type but found '%s'", - pawY_name(y_id(have)), pawY_name(y_id(want))); + GenericState gs; + Checker *C = V->state.C; + enter_generic_ctx(C, &gs, NULL); + + StructDecl *inst; + check_template_param(C, base->generics, types); + inst = instantiate_struct(V, base, types); + + inst->unify = leave_generic_ctx(C); + return inst; } -static void check_same(Visitor *V, Type *lhs, Type *rhs) +static void bind_types(Checker *C, Binder *base, Binder *inst, paw_Bool is_generic) { - if (!pawY_is_same(lhs, rhs)) { - pawX_error(V->lex, "expected equal types but found '%s' and '%s'", - pawY_name(y_id(lhs)), pawY_name(y_id(rhs))); + paw_assert(base->count == inst->count); + for (int i = 0; i < base->count; ++i) { + Type *bt = base->types[i]; + Type *it = inst->types[i]; + AstDecl *decl = get_decl(C, it->hdr.def); + Symbol *symbol = new_local(C, bt->var.name, decl); + symbol->is_generic = is_generic; + symbol->is_type = PAW_TRUE; } } -static void check_similar(Visitor *V, Type *lhs, Type *rhs) +static void setup_poly_func(AstVisitor *V, FuncDecl *d, Type *type) { - if (!pawY_is_same(lhs, rhs)) { - pawX_error(V->lex, "expected compatible types but found '%s' and '%s'", - pawY_name(y_id(lhs)), pawY_name(y_id(rhs))); - } + Checker *C = V->state.C; + enter_block(C, d->scope); + d->type = type; + + // TODO: NOTE: This uses the base's type vars, may need to use instance vars (currently set to NULL) + Type *base = get_raw_type(C, type->func.base); + bind_types(C, &base->func.types, &type->func.types, d->is_poly); + + type->func.params = *collect_params(V, d->params); + type->func.return_ = resolve_expr(V, d->return_); + lose_binders(C, 1); + + d->scope = leave_block(C); } -static Type *get_common(Visitor *V, Type *a, Type *b) +static void setup_poly_struct(AstVisitor *V, StructDecl *d) { - if (!pawY_is_same(a, b)) { - pawX_error(V->lex, "incompatible types '%s' and '%s'", - pawY_name(y_id(a)), pawY_name(y_id(b))); - } - return a; + Checker *C = V->state.C; + enter_block(C, d->scope); + Type *type = d->type; + + Type *base = get_raw_type(C, type->adt.target); + bind_types(C, &base->adt.types, &type->adt.types, d->is_poly); + + d->scope = leave_block(C); } -static void check_primitive(Visitor *V, Type *t) +static void define_func(AstVisitor *V, FuncDecl *d) { - if (!y_is_primitive(t)) { - pawX_error(V->lex, "expected primitive ('bool', 'int', 'float', or 'string') but found '%s'", - pawY_name(y_id(t))); - } + Checker *C = V->state.C; + DefId id = add_decl(C, cast_decl(d)); + Type *r = new_type(C, id, TYPE_FUNC); + d->type = r; + + r->func.params = *collect_params(V, d->params); + r->func.return_ = resolve_expr(V, d->return_); + lose_binders(C, 1); } -static void check_integral(Visitor *V, Type *t) +static void define_poly_func(AstVisitor *V, FuncDecl *d, DefId base, Binder *types) { - if (!y_is_int(t) && !y_is_bool(t)) { - pawX_error(V->lex, "expected integral ('int' or 'bool') but found '%s'", - pawY_name(y_id(t))); - } + Checker *C = V->state.C; + DefId id = add_decl(C, cast_decl(d)); + Type *r = new_type(C, id, TYPE_FUNC); + d->type = r; + + r->func.base = base == NO_DECL ? d->def : base; + r->func.types = *types; + + setup_poly_func(V, d, r); + lose_binders(C, 1); } -static void check_string(Visitor *V, Type *t) +static void define_struct(AstVisitor *V, StructDecl *d) { - if (!y_is_string(t)) { - pawX_error(V->lex, "expected string but found '%s'", - pawY_name(y_id(t))); - } + DefId id = add_decl(V->state.C, cast_decl(d)); + d->type = new_type(V->state.C, id, TYPE_ADT); + d->type->adt.target = d->def; } -static void check_sequence(Visitor *V, Type *t) +static void define_poly_struct(AstVisitor *V, StructDecl *d, DefId base, Binder *types) { - if (!y_is_string(t) && !y_is_array(t) && !y_is_tuple(t)) { - pawX_error(V->lex, "expected sequence ('string', 'array', or 'tuple') but found '%s'", - pawY_name(y_id(t))); - } + define_struct(V, d); + d->type->adt.types = *types; + d->type->adt.target = base == NO_DECL ? d->def : base; + + setup_poly_struct(V, d); + lose_binders(V->state.C, 1); } -static void check_accessible(Visitor *V, Type *t) +static void expect_bool_expr(AstVisitor *V, AstExpr *e) { - if (!y_is_class(t) && !y_is_foreign(t)) { - pawX_error(V->lex, "expected class or foreign object but found '%s'", - pawY_name(y_id(t))); - } + Checker *C = V->state.C; + Type *type = resolve_expr(V, e); + unify(C, type, get_raw_type(C, PAW_TBOOL)); +} + +static void expect_int_expr(AstVisitor *V, AstExpr *e) +{ + Checker *C = V->state.C; + Type *type = resolve_expr(V, e); + unify(C, type, get_raw_type(C, PAW_TINT)); } -static void check_indexable(Visitor *V, Type *t) +static void visit_type_name_expr(AstVisitor *V, TypeName *e) { - if (!y_is_string(t) && !y_is_array(t) && !y_is_tuple(t)) { - pawX_error(V->lex, "expected container ('string' or 'array', or 'map') but found '%s'", - pawY_name(y_id(t))); + Checker *C = V->state.C; + Symbol *symbol = resolve_symbol(C, e->name); + AstDecl *decl = symbol->decl; + if (a_kind(decl) == DECL_VAR) { + type_error(C, "'%s' is not a type", symbol->name->text); + } else if (a_is_struct_template_decl(decl)) { + StructDecl *base = &decl->struct_; + Binder *types = collect_types(V, e->args); + StructDecl *inst = init_struct_template(V, base, types); + decl = cast_decl(inst); } + e->type = a_type(decl); } -static String *meta_key(paw_Env *P, Metamethod mm) +static void visit_ident_expr(AstVisitor *V, AstIdent *e) { - return v_string(P->meta_keys[mm]); + Symbol *symbol = resolve_symbol(V->state.C, e->name); + e->type = get_type(V->state.C, symbol->decl->hdr.def); } -static FunctionType *resolve_mm(paw_Env *P, const Type *tag, Metamethod mm) +static void visit_logical_expr(AstVisitor *V, LogicalExpr *e) { - paw_assert(y_is_class(tag)); - String *name = meta_key(P, mm); - for (int i = 0; i < tag->cls.nattrs; ++i) { - NamedField *a = &tag->cls.attrs[i]; - if (pawS_eq(a->name, name) && y_is_function(a->type)) { - return &a->type->sig; - } - } - return NULL; + expect_bool_expr(V, e->lhs); + expect_bool_expr(V, e->rhs); + e->type = a_type(e->lhs); // same as 'rhs' } -static Type *try_meta_unop(Visitor *V, UnOpExpr *e, const Type *tag) +// TODO: Needs a constraint +static void visit_chain_expr(AstVisitor *V, ChainExpr *e) { - paw_Env *P = env(V->lex); - const Metamethod mm = unop2meta(e->op); - FunctionType *sig = resolve_mm(P, tag, mm); - if (sig->nargs != 0) { - type_error(V); - } - e->mm = sig; - return sig->ret; + Type *type = resolve_expr(V, e->target); +// if (!a_is_object(e->target->hdr.type)) { +// type_error(C, "'?' operator requires an object"); +// } } -static Type *try_meta_binop(Visitor *V, BinOpExpr *e, const Type *lhs, const Type *rhs, paw_Bool is_r) +static void visit_cond_expr(AstVisitor *V, CondExpr *e) { - paw_Env *P = env(V->lex); - Metamethod mm = binop2meta(e->op); - mm = is_r ? mm_get_r(mm) : mm; - const Type *self = is_r ? rhs : lhs; - const Type *other = is_r ? lhs : rhs; - FunctionType *sig = resolve_mm(P, self, mm); - if (sig->nargs != 1 || !pawY_is_same(sig->args[0], other)) { - type_error(V); - } - if (is_r) { - Expr *tmp = e->lhs; - e->lhs = e->rhs; - e->rhs = tmp; - } - e->mm = sig; - return sig->ret; + expect_bool_expr(V, e->cond); + Type *lhs = resolve_expr(V, e->lhs); + Type *rhs = resolve_expr(V, e->rhs); + unify(V->state.C, lhs, rhs); } -static Type *check_unop(Visitor *V, UnOpExpr *e) +static void visit_unop_expr(AstVisitor *V, UnOpExpr *e) { // clang-format off static const int8_t kValidOps[NUNARYOPS][PAW_NTYPES] = { @@ -318,20 +623,20 @@ static Type *check_unop(Visitor *V, UnOpExpr *e) }; // clang-format on - Type *type = e->target->type; - if (y_is_class(type)) { - return try_meta_unop(V, e, type); - } else if (!kValidOps[e->op][y_id(type)]) { - pawX_error(V->lex, "unsupported operand type for unary '%s': '%s'", - "? TODO", pawY_name(y_id(type))); + Checker *C = V->state.C; + Type *type = resolve_expr(V, e->target); + const paw_Type code = type2code(type); + if (!kValidOps[e->op][code]) { + type_error(C, "unsupported operand type for unary '%s'"); } else if (unop_is_bool(e->op)) { - return get_type(V, PAW_TBOOL); + e->type = get_raw_type(C, PAW_TBOOL); + } else { + e->type = type; } - return type; } // TODO: BINARY_IN should be handled separately -static Type *check_binop(Visitor *V, BinOpExpr *e) +static void visit_binop_expr(AstVisitor *V, BinOpExpr *e) { // clang-format off static const uint8_t kValidOps[NBINARYOPS][PAW_NTYPES] = { @@ -355,726 +660,923 @@ static Type *check_binop(Visitor *V, BinOpExpr *e) }; // clang-format on - Type *lhs = e->lhs->type; - Type *rhs = e->rhs->type; - if (y_is_class(lhs)) { - return try_meta_binop(V, e, lhs, rhs, PAW_FALSE); - } else if (y_is_class(rhs)) { - return try_meta_binop(V, e, lhs, rhs, PAW_TRUE); - } else if (!pawY_is_same(lhs, rhs) || - !kValidOps[e->op][y_id(lhs)]) { - pawX_error(V->lex, "unsupported operand types for binary '%s': '%s' and '%s'", - "? TODO", pawY_name(y_id(lhs)), pawY_name(y_id(rhs))); + Checker *C = V->state.C; + Type *lhs = resolve_expr(V, e->lhs); + Type *rhs = resolve_expr(V, e->rhs); + unify(C, lhs, rhs); + + const paw_Type left = type2code(lhs); + const paw_Type right = type2code(rhs); + if (left < 0 || right < 0 || left != right || !kValidOps[e->op][left]) { + type_error(C, "unsupported operand types for binary '%s'"); } else if (binop_is_bool(e->op)) { - return get_type(V, PAW_TBOOL); + e->type = get_raw_type(C, PAW_TBOOL); } else { - return lhs; + e->type = lhs; } - return NULL; } -static void visit_assignment(Visitor *V, Expr *lhs, Expr *rhs) +static void visit_param_decl(AstVisitor *V, AstDecl *decl) { - if (lhs->kind == EXPR_VAR) { - V->expr(V, rhs); // variable assignment - VarExpr *e = cast_to(lhs, VarExpr); - Symbol *var = resolve_var(V->lex, e->name); - check_similar(V, var->type, rhs->type); - e->type = var->type; - return; - } + FieldDecl *d = &decl->field; + d->type = resolve_expr(V, d->tag); - // index, range, or attribute assignment - SuffixedExpr *base = cast_to(lhs, SuffixedExpr); // common base - V->expr(V, base->target); // visit up to last expression - V->expr(V, rhs); - if (lhs->kind == EXPR_INDEX) { - IndexExpr *last = cast_to(lhs, IndexExpr); - V->expr(V, last->first); - if (last->second) { - V->expr(V, last->second); - V->expr(V, rhs); - check_same(V, last->type, rhs->type); - } else { - // V->expr(V, rhs); - // Symbol *elem = pawY_unwrap(env(V->lex), lhs->type); - // check_similar(V, elem, rhs->type); - } - } else { - paw_assert(lhs->kind == EXPR_ACCESS); - const AccessExpr *e = cast_to(lhs, AccessExpr); - V->expr(V, rhs); - - // TODO: Lookup field type - } + new_local(V->state.C, d->name, decl); + add_decl(V->state.C, decl); } -static Type *resolve_type(Lex *lex, TypeDecl *tn); +static void visit_signature_expr(AstVisitor *V, FuncType *e) +{ + Checker *C = V->state.C; + e->type = new_type(C, NO_DECL, TYPE_FUNC); + e->type->func.params = *collect_types(V, e->params); + e->type->func.return_ = resolve_expr(V, e->return_); + lose_binders(C, 1); +} -static Type *resolve_fn_type(Lex *lex, TypeDecl *tn) +static void visit_func(AstVisitor *V, FuncDecl *d, FuncKind kind) { - const int nargs = tn->sig.nargs; - Type **args = NULL; - if (nargs > 0) { - Expr *arg = tn->sig.args; - args = pawM_new_vec(env(lex), nargs, Type *); - for (int i = 0; i < nargs; ++i) { - TypeDecl *decl = cast_to(arg, TypeDecl); - args[i] = resolve_type(lex, decl); - arg = arg->next; - } + Checker *C = V->state.C; + Type *type = get_raw_type(C, d->def); + d->fn_kind = kind; + + enter_function(C, d->name, d->scope, d, kind); + V->visit_decl_list(V, d->params, visit_param_decl); + + Type *outer = C->return_; + C->return_ = type->func.return_; + + V->visit_block_stmt(V, d->body); + d->scope = leave_function(C); + C->return_ = outer; + d->is_visited = PAW_TRUE; +} + +static void register_func(AstVisitor *V, FuncDecl *d) +{ + Checker *C = V->state.C; + if (!d->is_poly) { + define_func(V, d); + return; } - Type fake = {0}; - fake.sig.args = args; - fake.sig.nargs = nargs; - fake.sig.ret = resolve_type(lex, tn->sig.ret); - fake.sig.kind = TYPE_SIGNATURE; - fake.sig.id = PAW_TFUNCTION; // temporary - tn->type = cannonicalize(lex, &fake); - return tn->type; -} - -//static Type *resolve_array_type(Lex *lex, TypeDecl *tn) -//{ -// Type *elem = resolve_type(lex, tn->arr.elem); -// tn->tag = pawY_register_array(env(lex), elem); -// tn->resolved = PAW_TRUE; -// return tn->tag; -//} -// -//static Type *resolve_map_type(Lex *lex, TypeDecl *tn) -//{ -// Type *key = resolve_type(lex, tn->map.key); -// Type *value = resolve_type(lex, tn->map.value); -// tn->tag = pawY_register_map(env(lex), key, value); -// tn->resolved = PAW_TRUE; -// return tn->tag; -//} - -static Type *resolve_class_type(Lex *lex, TypeDecl *tn) -{ - Symbol *var = resolve_symbol(lex, tn->named.name); - if (!var->is_type || !y_is_class(var->type)) { - pawX_error(lex, "invalid class type '%s'", var->name->text); + if (C->func_depth > 1) { + // TODO: This restriction makes it far easier to implement generics. It will + // likely be lifted in the future. + syntax_error(C, "templates must be toplevel"); } - return var->type; + GenericState gs; + enter_generic_ctx(C, &gs, d->unify); + Binder *types = collect_generics(V, d->generics); + create_type_vars(C, d->generics); + define_poly_func(V, d, NO_DECL, types); + d->unify = leave_generic_ctx(C); } -static Type *resolve_type(Lex *lex, TypeDecl *tn) +static void traverse_func(AstVisitor *V, FuncDecl *d, FuncKind kind) { - Type *type; - if (tn->group == TYPE_PRIMITIVE) { - type = lex->P->mod->types[tn->basic.t]; - } else if (tn->group == TYPE_CLASS) { - type = resolve_class_type(lex, tn); + Checker *C = V->state.C; + if (!d->is_poly) { + visit_func(V, d, kind); } else { - type = resolve_fn_type(lex, tn); + GenericState gs; + enter_generic_ctx(C, &gs, d->unify); + visit_func(V, d, kind); + d->unify = leave_generic_ctx(C); } - tn->type = type; - return type; } -static void visit_var_expr(Visitor *V, VarExpr *e) +static void visit_return_stmt(AstVisitor *V, ReturnStmt *s) { - Lex *lex = V->lex; // lookup type - Symbol *var = resolve_var(lex, e->name); - e->type = var->type; + Checker *C = V->state.C; + Type *want = C->return_; // function return type + Type *have = s->expr ? resolve_expr(V, s->expr) : NULL; + + if (y_is_unit(want)) { + if (have != NULL && !y_is_unit(have)) { + type_error(C, "expected '()' or empty return"); + } + } else if (have != NULL) { + unify(C, have, want); + } else { + type_error(C, "expected nonempty return"); + } } -static void visit_primitive_expr(Visitor *V, PrimitiveExpr *e) -{ - e->type = get_type(V, e->t); +static void register_method_decl(AstVisitor *V, AstDecl *method) +{ + FuncDecl *d = &method->func; + register_func(V, d); + + Checker *C = V->state.C; + d->receiver = cast_decl(C->struct_); } -static void visit_literal_expr(Visitor *V, LiteralExpr *e) -{ - V->expr(V, e->expr); - e->type = e->expr->type; - new_local_literal(V->lex, e->label, e->t); - e->type = get_type(V, e->t); +static void visit_method_decl(AstVisitor *V, AstDecl *method) +{ + FuncDecl *d = &method->func; + traverse_func(V, d, FUNC_METHOD); } -static void visit_logical_expr(Visitor *V, LogicalExpr *e) +static void visit_field_decl(AstVisitor *V, AstDecl *decl) { - V->expr(V, e->lhs); - V->expr(V, e->rhs); - e->type = get_type(V, PAW_TBOOL); + add_decl(V->state.C, decl); + FieldDecl *d = &decl->field; + d->type = resolve_expr(V, d->tag); } -static void visit_chain_expr(Visitor *V, ChainExpr *e) +static void register_struct(AstVisitor *V, StructDecl *d) { - V->expr(V, e->target); -// if (!y_is_object(e->target->type)) { -// pawX_error(V->lex, "'?' operator requires an object"); -// } + Checker *C = V->state.C; + StructDecl *enclosing = C->struct_; + C->struct_ = d; // enter struct context + enter_block(C, d->scope); + + // Resolve the fields and method signatures, but don't visit the method + // bodies. This prevents a situation where we could end up visiting a + // template instance before we are finished with the template itself. + V->visit_decl_list(V, d->fields, visit_field_decl); + V->visit_method_list(V, d->methods, register_method_decl); + + d->scope = leave_block(C); + C->struct_ = enclosing; } -static void visit_cond_expr(Visitor *V, CondExpr *e) +static void visit_struct(AstVisitor *V, StructDecl *d) { - V->expr(V, e->cond); - V->expr(V, e->lhs); - V->expr(V, e->rhs); - e->type = get_common(V, e->lhs->type, e->rhs->type); + Checker *C = V->state.C; + StructDecl *enclosing = C->struct_; + C->struct_ = d; // enter struct context + enter_block(C, d->scope); + + V->visit_method_list(V, d->methods, visit_method_decl); + + d->scope = leave_block(C); + C->struct_ = enclosing; + d->is_visited = PAW_TRUE; } -static void visit_coalesce_expr(Visitor *V, CoalesceExpr *e) +static void visit_struct_decl(AstVisitor *V, StructDecl *d) { - V->expr(V, e->lhs); - V->expr(V, e->rhs); - e->type = get_common(V, e->lhs->type, e->rhs->type); -// if (!y_is_object(e->lhs->type)) { -// pawX_error(V->lex, "'?:' operator requires an object"); -// } + Checker *C = V->state.C; + Symbol *symbol = new_symbol(C, d->name, cast_decl(d), d->is_global); + if (!d->is_poly) { + define_struct(V, d); + register_struct(V, d); + visit_struct(V, d); + } else { + if (C->func_depth > 1) { + // TODO: This restriction makes it far easier to implement generics. It will + // likely be lifted in the future. + syntax_error(C, "templates must be toplevel"); + } + symbol->is_type = PAW_TRUE; + + GenericState gs; + enter_generic_ctx(C, &gs, d->unify); + Binder *types = collect_generics(V, d->generics); + create_type_vars(C, d->generics); + define_poly_struct(V, d, NO_DECL, types); + register_struct(V, d); + visit_struct(V, d); + d->unify = leave_generic_ctx(C); + +// AstDecl *inst = d->next; +// while (inst != NULL) { +// d = &inst->struct_; +// enter_generic_ctx(C, &gs, d->unify); +// visit_struct(V, d); +// d->unify = leave_generic_ctx(C); +// } + } } -static void visit_unop_expr(Visitor *V, UnOpExpr *e) +static void visit_var_decl(AstVisitor *V, VarDecl *d) { - V->expr(V, e->target); - e->type = check_unop(V, e); + Checker *C = V->state.C; + Symbol *symbol = declare_symbol(C, d->name, cast_decl(d), d->is_global); + Type *init = resolve_expr(V, d->init); + define_symbol(symbol); + + if (d->tag != NULL) { + // check initializer against annotation + Type *tag = resolve_expr(V, d->tag); + unify(C, init, tag); + } + add_decl(C, cast_decl(d)); + d->type = init; } -static void visit_binop_expr(Visitor *V, BinOpExpr *e) +static void visit_type_decl(AstVisitor *V, TypeDecl *d) { - V->expr(V, e->lhs); - V->expr(V, e->rhs); - e->type = check_binop(V, e); + // TODO: generic parameters for aliases + Symbol *symbol = declare_symbol(V->state.C, d->name, cast_decl(d), PAW_FALSE); + d->type = resolve_expr(V, d->rhs); + //create_type_vars(C, d->generics); + //unify(C, d->name, d->type); + define_symbol(symbol); } -static Type **collect_param_types(Lex *lex, Stmt *head, int nargs) -{ - Type **args = NULL; - if (nargs) { - args = pawM_new_vec(env(lex), nargs, Type *); - for (int i = 0; i < nargs; ++i, head = head->next) { - ParamStmt *s = cast_to(head, ParamStmt); - args[i] = resolve_type(lex, s->tag); +// Run the inference algorithm +// +// parameters +// ------------ +// generics: type parameters from template. +// params: formal parameters from function signature +// args: arguments from function call +static Binder *infer_template_param(AstVisitor *V, AstDeclList *generics, AstDeclList *params, AstExprList *args) +{ + Checker *C = V->state.C; + AstDecl *generic = generics->first; + paw_assert(generic != NULL); + do { + GenericDecl *d = &generic->generic; + Type *type = get_raw_type(C, d->def); + pawP_new_type_var(C->U, type); + generic = d->next; + } while (generic != NULL); + + // Attempt to determine a type for each generic parameter, using the + // combination of function parameters and arguments. Any parameter type + // might equal, or contain, one of the generic type parameters. + AstExpr *arg = args->first; + AstDecl *par = params->first; + while (arg && par) { + Type *a = par->field.type; + Type *b = resolve_expr(V, arg); + unify(C, a, b); + par = par->hdr.next; + arg = arg->hdr.next; + } + + // Create a list of type parameters for the instance. + Binder *types = temp_binder(C, generics->count); + generic = generics->first; + for (int i = 0; i < generics->count; ++i) { + GenericDecl *d = &generic->generic; + Type *type = get_type(C, d->def); + if (y_is_type_var(type) && p_is_bound(C->U, type)) { + type_error(C, "unable to infer generic parameter '%s'", d->name->text); } + types->types[i] = type; + generic = d->next; + } + return types; +} + +static FuncDecl *find_func_instance(Checker *C, FuncDecl *base, Binder *types) +{ + paw_assert(types->count > 0); + AstDecl *inst = base->next; + while (inst != NULL) { + FuncDecl *func = &inst->func; + AstDecl *decl = func->generics->first; + // NOTE: must enter the loop: requires at least 1 type argument + for (int i = 0; i < types->count; ++i) { + GenericDecl *generic = &decl->generic; + Type *type = get_type(C, generic->def); + if (!test_types(C, type, types->types[i])) { + goto next_inst; + } + decl = generic->next; + } + // Found an existing function template instance. Use its inference + // variables, which should already be resolved to concrete types. + return func; + +next_inst: + inst = inst->hdr.next; } - return args; + return NULL; } -static Type *register_fn(Lex *lex, Function *fn) +static StructDecl *find_struct_instance(Checker *C, StructDecl *base, Binder *types) { - Type fake = {0}; - fake.sig.kind = TYPE_SIGNATURE; - fake.sig.id = PAW_TFUNCTION; // temporary - fake.sig.args = collect_param_types(lex, fn->args, fn->nargs); - fake.sig.nargs = fn->nargs; - fake.sig.ret = fn->ret ? resolve_type(lex, fn->ret) : NULL; - fn->type = cannonicalize(lex, &fake); - return fn->type; + AstDecl *inst = base->next; + while (inst != NULL) { + StructDecl *struct_ = &inst->struct_; + Type *type = get_type(C, struct_->def); + if (test_binders(C, &type->adt.types, types)) { + return struct_; + } + inst = inst->hdr.next; + } + return NULL; } -static void visit_fn(Visitor *V, Function *fn, Type *sig) +static FuncDecl *new_func_instance(AstVisitor *V, FuncDecl *base, Binder *types) { - Lex *lex = V->lex; - Function *outer = V->fn; - V->fn = fn; + // Copy the whole function template subtree. + AstDecl *stencil = pawA_stencil(V->ast, cast_decl(base)); + FuncDecl *inst = &stencil->func; + inst->is_poly = PAW_FALSE; + + inst->next = base->next; + base->next = cast_decl(inst); - enter_function(lex, fn->name, sig, fn->kind); - V->stmt_list(V, fn->args); - V->block_stmt(V, fn->body); - fn->scope = leave_function(lex); - V->fn = outer; + define_poly_func(V, inst, base->def, types); + visit_func(V, inst, inst->fn_kind); + return inst; } -static void visit_attr_stmt(Visitor *V, AttrStmt *s) +static StructDecl *new_struct_instance(AstVisitor *V, StructDecl *base, Binder *types) { - if (s->is_fn) { - Type *sig = register_fn(V->lex, &s->fn); - visit_fn(V, &s->fn, sig); - } + AstDecl *stencil = pawA_stencil(V->ast, cast_decl(base)); + StructDecl *inst = &stencil->struct_; + inst->is_poly = PAW_FALSE; + + inst->next = base->next; + base->next = cast_decl(inst); + + define_poly_struct(V, inst, base->def, types); + // Determine attribute types, but don't visit the method bodies, since we may + // still be in the process of visiting the base template's method bodies. That + // would cause control to visit the instance before finishing with the template + // itself. + register_struct(V, inst); + + // Checker *C = V->state.C; + // if (C->struct_ != base) { + visit_struct(V, inst); + // } + return inst; } -static Type *register_attr(Visitor *V, AttrStmt *s) +static FuncDecl *instantiate_func(AstVisitor *V, FuncDecl *base, Binder *types) { - if (s->is_fn) { - return register_fn(V->lex, &s->fn); + Checker *C = V->state.C; + FuncDecl *inst = find_func_instance(C, base, types); + if (inst != NULL) { + // pawP_unifier_replace(C->U, inst->unify); + } else { + inst = new_func_instance(V, base, types); } - return resolve_type(V->lex, s->tag); + return inst; } - // TODO: 'fake' variable's attrs list needs a 'box'. 'release' the box right after - // calling pawY_add_type, which will take ownership of the - // allocation on success. -static void visit_class_stmt(Visitor *V, ClassStmt *s) +static FuncDecl *init_func_template(AstVisitor *V, FuncDecl *base, Binder *types) { - Lex *lex = V->lex; - paw_Env *P = env(lex); - Type fake = {0}; - - Stmt *attr = s->attrs; - NamedField *attrs = NULL; - if (s->nattrs > 0) { - // determine attribute types - attrs = pawM_new_vec(P, s->nattrs, NamedField); - for (int i = 0; i < s->nattrs; ++i) { - AttrStmt *a = cast_to(attr, AttrStmt); - attrs[i].type = register_attr(V, a); - attrs[i].name = a->name; - if (a->is_fn) { - attrs[i].flags = FIELD_IS_METHOD; - } - attr = attr->next; - } - } - fake.cls.id = PAW_TCLASS; // temporary - fake.cls.kind = TYPE_CLASS; - fake.cls.super = s->super ? s->super->type : NULL; - fake.cls.nattrs = s->nattrs; - fake.cls.attrs = attrs; - fake.cls.name = s->name; - Type *type = cannonicalize(lex, &fake); - - Symbol *var = declare_var(lex, s->name, type, s->flags.global); - var->is_type = PAW_TRUE; - - enter_block(lex); // scope for 'super' - V->expr(V, s->super); // before 's->name' defined - define_var(var); // allow access from class body - - // validate attributes - attr = s->attrs; - ClsState cs = {.outer = lex->cs}; - lex->cs = &cs; // enter class context - for (int i = 0; i < s->nattrs; ++i) { - AttrStmt *a = cast_to(attr, AttrStmt); - visit_attr_stmt(V, a); - attr = attr->next; - } - s->scope = leave_block(lex); - lex->cs = cs.outer; + Checker *C = V->state.C; + GenericState gs; + + enter_generic_ctx(C, &gs, NULL); + check_template_param(C, base->generics, types); + FuncDecl *inst = instantiate_func(V, base, types); + inst->unify = leave_generic_ctx(C); + return inst; } -static void visit_block_stmt(Visitor *V, Block *bk) +static FuncDecl *infer_func_template(AstVisitor *V, FuncDecl *base, AstExprList *args) { - Lex *lex = V->lex; - enter_block(lex); - V->stmt_list(V, bk->stmts); - bk->scope = leave_block(lex); + Checker *C = V->state.C; + GenericState gs; + + enter_generic_ctx(C, &gs, NULL); + Binder *types = infer_template_param(V, base->generics, base->params, args); + FuncDecl *inst = instantiate_func(V, base, types); + inst->unify = leave_generic_ctx(C); + return inst; } -static void visit_param_stmt(Visitor *V, ParamStmt *s) +static StructDecl *get_struct_decl(Checker *C, Type *type) { - Type *type = resolve_type(V->lex, s->tag); - paw_assert(type != NULL); // checked in parse.c - new_var(V->lex, s->name, type, PAW_FALSE); + paw_assert(y_kind(type) == TYPE_ADT); + AstDecl *decl = get_decl(C, type->adt.def); + paw_assert(a_kind(decl) == DECL_STRUCT); + return &decl->struct_; } -static void visit_def_stmt(Visitor *V, DefStmt *s) +static FuncDecl *infer_method_template(AstVisitor *V, FuncDecl *base, AstExprList *args) { - Lex *lex = V->lex; - Symbol *var = declare_var(lex, s->name, NULL, s->flags.global); - V->expr(V, s->init); - define_var(var); + Checker *C = V->state.C; + if (!base->is_poly) { + return base; + } + StructDecl *parent = &base->receiver->struct_; + StructDecl *outer = C->struct_; + C->struct_ = parent; - if (s->tag == NULL) { - // infer from initializer - if (s->init == NULL) { - pawX_error(lex, "missing initializer"); - } else if (s->init->type == NULL) { - pawX_error(lex, "unable to infer type from 'null'"); - } - var->type = s->init->type; - return; + enter_block(C, parent->scope); + FuncDecl *inst = infer_func_template(V, base, args); + pop_symbol_table(C); + + C->struct_ = outer; + return inst; +} + +static StructDecl *instantiate_struct(AstVisitor *V, StructDecl *base, Binder *types) +{ + Checker *C = V->state.C; + StructDecl *inst = find_struct_instance(C, base, types); + if (inst != NULL) { + //pawP_unifier_replace(C->U, inst->unify); + } else { + inst = new_struct_instance(V, base, types); } - - Type *type = resolve_type(V->lex, s->tag); - var->type = type; - if (s->init == NULL) { - // empty initializer: set default during codegen - } else if (!pawY_is_same(s->init->type, type)) { - pawX_error(lex, "initializer incompatible with type annotation"); + return inst; +} + +static Type *setup_func(AstVisitor *V, CallExpr *call, FuncDecl *d) +{ + if (d->is_poly) { + d = infer_func_template(V, d, call->args); } + return d->type; } -static void visit_return_stmt(Visitor *V, ReturnStmt *s) +static Type *setup_method(AstVisitor *V, CallExpr *call, FuncDecl *d) { - Function *fn = V->fn; - V->expr(V, s->expr); + paw_assert(a_kind(call->target) == EXPR_SELECTOR); + if (d->is_poly) { + d = infer_method_template(V, d, call->args); + } + // Lets the codegen V know to generate OP_INVOKE instead of OP_CALL, + // without having to look at the callable. + call->target->selector.is_method = PAW_TRUE; + return d->type; +} - if (fn->ret == NULL) { - if (s->expr != NULL) { - pawX_error(V->lex, "expected empty return"); - } - return; - } else if (s->expr == NULL) { - pawX_error(V->lex, "expected nonempty return"); +static Type *setup_call(AstVisitor *V, CallExpr *e) +{ + Type *target = resolve_expr(V, e->target); + if (!y_is_func(target)) { + type_error(V->state.C, "type is not callable"); } - - Type *ret = resolve_type(V->lex, fn->ret); - if (!pawY_is_same(ret, s->expr->type)) { - pawX_error(V->lex, "return type incompatible with annotation"); + FuncSig *func = &target->func; + if (func->def == NO_DECL) { + // This happens when we don't know where a function was declared. For + // example, the following code can produce different values of 'g' + // depending on what value 'n' is given. This is okay, because the + // function returned by 'f' must already be instantiated, if it is a + // function template: + // let n = + // fn f(n: int) -> fn() {...} + // let g = f(n) + // g() + return target; + } + // Function type has an associated declaration. If that declaration is for a + // function or method template, attempt to infer the type parameters. + AstDecl *decl = get_decl(V->state.C, func->def); + return decl->func.fn_kind == FUNC_METHOD + ? setup_method(V, e, &decl->func) + : setup_func(V, e, &decl->func); + +} + +static void visit_call_expr(AstVisitor *V, CallExpr *e) +{ + Checker *C = V->state.C; + // Determine the type of the callable, then find its declaration. Template functions will + // need type inference, which is handled in setup_call(). + e->func = setup_call(V, e); + e->type = e->func->func.return_; + if (y_kind(e->func) != TYPE_FUNC) { + type_error(C, "type is not callable"); + } + FuncSig *func = &e->func->func; + Binder *params = &func->params; + if (params->count != e->args->count) { + syntax_error(C, "expected %d parameter(s) but found %d", + func->params.count, e->args->count); + } + // check call arguments against function parameters + AstExpr *arg = e->args->first; + for (int i = 0; i < params->count; ++i) { + Type *type = resolve_expr(V, arg); + unify(C, type, params->types[i]); + arg = arg->hdr.next; } } -static void visit_call_expr(Visitor *V, CallExpr *e) +// TODO: scratch allocations need to be boxed +// could allow unnamed fields for other classes if they are in the correct order already +static Type *visit_composite_lit(AstVisitor *V, LiteralExpr *lit) { - V->expr(V, e->target); - V->expr_list(V, e->args); + CompositeLit *e = &lit->comp; + Checker *C = V->state.C; + Lex *lex = C->lex; - Type *tag = e->target->type; - if (y_kind(tag) != TYPE_SIGNATURE) { - pawX_error(V->lex, "type is not callable"); + // Replace the AstIdent or IndexExpr with the TypeName of the structure. + Type *target = resolve_expr(V, e->target); + if (!y_is_adt(target)) { + type_error(C, "expected structure type"); } - const FunctionType *sig = &tag->sig; - e->type = sig->ret; // propagate return type - if (sig->nargs != e->nargs) { - pawX_error(V->lex, "expected %d parameters but found %d", - sig->nargs, e->nargs); + StructDecl *struct_ = get_struct_decl(C, target); + if (struct_->is_poly) { + type_error(C, "struct template requires explicit type arguments"); } - Expr *arg = e->args; - for (int i = 0; i < sig->nargs; ++i) { - if (!pawY_is_same(arg->type, sig->args[i])) { - pawX_error(V->lex, "invalid parameter type"); + // Use a temporary Map to avoid searching repeatedly through the + // list of attributes. + paw_Env *P = env(lex); + Value *pv = pawC_push0(P); + Map *map = pawH_new(P); + v_set_object(pv, map); + + Value key; + AstExpr *item = e->items->first; + AstExpr **order = pawM_new_vec(P, e->items->count, AstExpr *); + for (int i = 0; item != NULL; ++i) { + ItemExpr *ie = &item->item; + v_set_object(&key, ie->name); + if (pawH_contains(P, map, key)) { + syntax_error(C, "duplicate attribute '%s' in struct '%s'", + ie->name->text, struct_->name->text); } - arg = arg->next; + Value *value = pawH_action(P, map, key, MAP_ACTION_CREATE); + v_set_int(value, i); + order[i] = item; + item = ie->next; } + AstDecl *decl = struct_->fields->first; + for (int i = 0; i < struct_->fields->count; ++i) { + FieldDecl *field = &decl->field; + v_set_object(&key, field->name); + Value *value = pawH_get(P, map, key); + if (value == NULL) { + syntax_error(C, "missing initializer for field '%s' in struct '%s'", + field->name->text, e->target->type_name.name->text); + } else { + const paw_Int index = v_int(*value); + ItemExpr *ie = &order[index]->item; + ie->index = i; // index of attribute in struct + Type *a = resolve_expr(V, ie->value); + Type *b = get_type(C, field->def); + unify(C, a, b); + } + pawH_remove(P, map, key); + decl = field->next; + } + if (pawH_length(map) > 0) { + syntax_error(C, "found %s extra initializers"); + } + paw_assert(struct_->fields->count == e->items->count); + + pawC_pop(P); // pop map + pawM_free_vec(P, order, e->items->count); + return target; } -static void visit_fn_stmt(Visitor *V, FnStmt *s) +static void visit_literal_expr(AstVisitor *V, LiteralExpr *e) { - Type *sig = register_fn(V->lex, &s->fn); - Symbol *var = declare_var(V->lex, s->fn.name, sig, s->flags.global); - visit_fn(V, &s->fn, sig); - define_var(var); + if (e->lit_kind == LIT_BASIC) { + e->type = get_raw_type(V->state.C, e->basic.t); + } else { + paw_assert(e->lit_kind == LIT_COMPOSITE); + e->type = visit_composite_lit(V, e); + } } -static void visit_ifelse_stmt(Visitor *V, IfElseStmt *s) +static void visit_func_decl(AstVisitor *V, FuncDecl *d) { - V->expr(V, s->cond); - V->stmt(V, s->then_arm); - V->stmt(V, s->else_arm); + Symbol *symbol = declare_symbol(V->state.C, d->name, cast_decl(d), d->is_global); + symbol->is_type = d->is_poly; + register_func(V, d); + traverse_func(V, d, FUNC_FUNCTION); + define_symbol(symbol); } -static void visit_expr_stmt(Visitor *V, ExprStmt *s) +static void visit_if_stmt(AstVisitor *V, IfStmt *s) { + expect_bool_expr(V, s->cond); + V->visit_stmt(V, s->then_arm); + V->visit_stmt(V, s->else_arm); +} + +static void visit_expr_stmt(AstVisitor *V, AstExprStmt *s) +{ + Type *lhs = resolve_expr(V, s->lhs); if (s->rhs != NULL) { - visit_assignment(V, s->lhs, s->rhs); - } else { - V->expr(V, s->lhs); + Type *rhs = resolve_expr(V, s->rhs); + unify(V->state.C, lhs, rhs); } } -static void visit_while_stmt(Visitor *V, WhileStmt *s) +static void visit_while_stmt(AstVisitor *V, WhileStmt *s) { - V->expr(V, s->cond); - V->block_stmt(V, s->block); + enter_block(V->state.C, NULL); + expect_bool_expr(V, s->cond); + V->visit_block_stmt(V, s->block); + s->scope = leave_block(V->state.C); } -static void visit_dowhile_stmt(Visitor *V, WhileStmt *s) +static void visit_dowhile_stmt(AstVisitor *V, WhileStmt *s) { - V->block_stmt(V, s->block); - V->expr(V, s->cond); + enter_block(V->state.C, NULL); + V->visit_block_stmt(V, s->block); + expect_bool_expr(V, s->cond); + s->scope = leave_block(V->state.C); } -static void visit_forbody_stmt(Visitor *V, String *iname, Block *bk) +static void visit_for_body(AstVisitor *V, String *iname, Block *b) { - Lex *lex = V->lex; - enter_block(lex); - new_local_var(lex, iname, get_type(V, PAW_TINT)); - V->block_stmt(V, bk); - bk->scope = leave_block(lex); + Checker *C = V->state.C; + enter_block(C, NULL); + new_local(C, iname, basic_decl(C, PAW_TINT)); + V->visit_stmt_list(V, b->stmts, V->visit_stmt); + b->scope = leave_block(C); } -static void visit_fornum_stmt(Visitor *V, ForStmt *s) +static void visit_fornum(AstVisitor *V, ForStmt *s) { ForNum *fornum = &s->fornum; - V->expr(V, fornum->begin); - V->expr(V, fornum->end); - V->expr(V, fornum->step); - check_integral(V, fornum->begin->type); - check_integral(V, fornum->end->type); - check_integral(V, fornum->step->type); + expect_int_expr(V, fornum->begin); + expect_int_expr(V, fornum->end); + expect_int_expr(V, fornum->step); + + new_local_literal(V->state.C, "(for begin)", PAW_TINT); + new_local_literal(V->state.C, "(for end)", PAW_TINT); + new_local_literal(V->state.C, "(for step)", PAW_TINT); - visit_forbody_stmt(V, s->name, s->block); + visit_for_body(V, s->name, s->block); } -static void visit_forin_stmt(Visitor *V, ForStmt *s) // TODO: forin would need to encode the type of object being iterated over. look into function call for loop? +static void visit_forin(AstVisitor *V, ForStmt *s) // TODO: forin would need to encode the type of object being iterated over. look into function call for loop? { -// Lex *lex = V->lex; +// Lex *lex = C->lex; // ForIn *forin = &s->forin; -// new_local_literal(lex, "(for target)", PAW_TINT); -// new_local_literal(lex, "(for iterator)", PAW_TINT); -// V->expr(V, forin->target); +// new_local_literal(C, "(for target)", PAW_TINT); +// new_local_literal(C, "(for iterator)", PAW_TINT); +// V->visit_expr(V, forin->target); // // Type *inner = pawY_unwrap(env(lex), forin->target->type); -// new_local_var(lex, s->name, inner); +// new_local(C, s->name, inner); // -// visit_forbody_stmt(V, s->name, s->block); +// V->visit_for_body(V, s->name, s->block); } -static void visit_for_stmt(Visitor *V, ForStmt *s) +static void visit_for_stmt(AstVisitor *V, ForStmt *s) { - Lex *lex = V->lex; - enter_block(lex); + enter_block(V->state.C, NULL); if (s->kind == STMT_FORNUM) { - visit_fornum_stmt(V, s); + visit_fornum(V, s); } else { - visit_forin_stmt(V, s); + visit_forin(V, s); } - s->scope = leave_block(lex); + s->scope = leave_block(V->state.C); } -static void visit_array_expr(Visitor *V, ArrayExpr *e) +static void check_index(Checker *C, Type *target, Type *elem) { -// NodeVec elems = e->items; -// paw_Env *P = env(V->lex); -// Type *t = get_type(V, ); -// for (int i = 0; i < elems.size; ++i) { -// Expr *elem = cast_expr(elems.nodes[i]); -// V->expr(V, elem); -// if (t == NULL) { -// t = elem->type; -// } else { -// t = get_common(V, t, elem->type); -// } -// } -// int min_level = -1; -// for (int i = 0; i < elems.size; ++i) { -// Expr *elem = cast_expr(elems.nodes[i]); -// Type *tag = elem->type; -// int level; -// if (y_is_array(tag)) { -// level = tag->a.level; -// } else { -// level = 0; -// } -// if (0 <= min_level && min_level < level) { -// pawX_error(V->lex, "inconsistent array type"); -// } -// min_level = level; -// } -// // Register the array type. '[null]' means the array was empty, and the actual -// // type could not be determined (need to keep track of nesting depth). -// e->type = pawY_register_array(P, t); -} - -//static void visit_map_expr(Visitor *V, MapExpr *e) -//{ -// NodeVec items = e->items; -// Type *tk = NULL; -// Type *tv = NULL; -// for (int i = 0; i < items.size; i += 2) { -// Expr *key = cast_expr(items.nodes[i]); -// Expr *value = cast_expr(items.nodes[i + 1]); -// V->expr(V, key); -// V->expr(V, value); -// if (tk == NULL) { -// tk = key->type; -// tv = value->type; -// check_primitive(V, tk); -// } else { -// tk = get_common(V, tk, key->type); -// tv = get_common(V, tv, value->type); -// } -// } -// paw_Env *P = env(V->lex); -// e->type = pawY_register_map(P, tk, tv); -//} + // TODO: Unify elem with the type expected by target container, return contained type +} -static void check_index(Visitor *V, IndexExpr *e) +static Type *explicit_func_template(AstVisitor *V, FuncDecl *base, Index *e) { - if (y_is_class(e->target->type)) { -// if (e->second == NULL) { -// Type *tag = resolve_mm(env(V->lex), e->target->type, MM_GETITEM); -// } else { -// resolve_mm(env(V->lex), e->target->type, MM_GETSLICE); -// } - paw_assert(0); // TODO: try to lookup metamethod return type, fail if no metamethod for __getitem - return; - } -// e->type = pawY_unwrap(env(V->lex), e->target->type); -// if (y_is_map(e->target->type)) { -// check_primitive(V, e->first->type); -// } else { -// check_sequence(V, e->target->type); -// check_integral(V, e->first->type); -// } + Binder *types = collect_types(V, e->elems); + FuncDecl *inst = init_func_template(V, base, types); + return inst->type; } -static void check_range(Visitor *V, IndexExpr *e) +static Type *explicit_struct_template(AstVisitor *V, StructDecl *base, Index *e) { - if (y_is_class(e->target->type)) { - paw_assert(0); // TODO: try to lookup metamethod return type, fail if no metamethod for __getslice - return; - } - check_same(V, e->first->type, e->second->type); - check_sequence(V, e->target->type); - check_integral(V, e->first->type); - check_integral(V, e->second->type); - e->type = e->target->type; + Binder *types = collect_types(V, e->elems); + StructDecl *inst = init_struct_template(V, base, types); + return inst->type; } -static void visit_index_expr(Visitor *V, IndexExpr *e) +static Type *explicit_method_template(AstVisitor *V, FuncDecl *base, Index *e) { - V->expr(V, e->target); - V->expr(V, e->first); - if (e->second != NULL) { - V->expr(V, e->second); - check_range(V, e); - } else { - check_index(V, e); - } + Checker *C = V->state.C; + StructDecl *parent = &base->receiver->struct_; + StructDecl *outer = C->struct_; + C->struct_ = parent; + + enter_block(C, parent->scope); + Binder *types = collect_types(V, e->elems); + FuncDecl *inst = init_func_template(V, base, types); + pop_symbol_table(C); + + C->struct_ = outer; + return inst->type; } -// TODO: scratch allocations need to be boxed -static void visit_init_expr(Visitor *V, InitExpr *e) -{ - Lex *lex = V->lex; - paw_assert(e->prefix->kind == EXPR_VAR); - VarExpr *ve = cast_to(e->prefix, VarExpr); - Symbol *sym = resolve_symbol(lex, ve->name); - Type *type = sym->type; - if (!y_is_class(type)) { - pawX_error(V->lex, "'%s' is not a class", pawY_name(y_id(type))); +static void visit_index_expr(AstVisitor *V, Index *e) +{ + Checker *C = V->state.C; + Type *target = resolve_expr(V, e->target); + AstDecl *decl = get_decl(C, target->hdr.def); + if (!a_is_template_decl(decl)) { + // 'e' represents a getter for a container element, rather than a + // template instantiation: no folding is necessary + if (e->elems->count != 1) { + paw_assert(e->elems->count > 1); + syntax_error(C, "too many indices (must be 1)"); + } + Type *elem = resolve_expr(V, e->elems->first); + check_index(C, target, elem); + } + if (a_kind(decl) == DECL_STRUCT) { + e->type = explicit_struct_template(V, &decl->struct_, e); + } else if (a_has_receiver(decl)) { + e->type = explicit_method_template(V, &decl->func, e); + } else { + e->type = explicit_func_template(V, &decl->func, e); } - paw_Env *P = env(lex); - Map *map = pawH_new(env(lex)); +} - Value key; - Stmt *attr = e->attrs; - Stmt **before = pawM_new_vec(env(lex), e->nattrs, Stmt *); - for (int i = 0; attr != NULL; ++i) { - ItemStmt *item = cast_to(attr, ItemStmt); - v_set_object(&key, item->name); - if (pawH_contains(P, map, key)) { - pawX_error(lex, "duplicate attribute '%s.%s'", - item->name->text, ve->name->text, item->name->text); - } - Value *value = pawH_action(P, map, key, MAP_ACTION_CREATE); - v_set_int(value, i); - before[i] = attr; +static void visit_item_expr(AstVisitor *V, ItemExpr *e) +{ + e->type = resolve_expr(V, e->value); +} - V->expr(V, item->value); - attr = attr->next; +static void visit_selector_expr(AstVisitor *V, Selector *e) +{ + Checker *C = V->state.C; + Type *type = resolve_expr(V, e->target); + if (!y_is_adt(type)) { + type_error(C, "expected struct instance"); } - Stmt **after = pawM_new_vec(env(lex), e->nattrs, Stmt *); - for (int i = 0; i < type->cls.nattrs; ++i) { - NamedField *a = &type->cls.attrs[i]; - v_set_object(&key, a->name); - Value *value = pawH_get(P, map, key); - if (a->flags & FIELD_IS_METHOD) { - if (value != NULL) { - pawX_error(lex, "initializer not allowed for method '%s.%s'", - ve->name->text, a->name->text); - } - } else if (value == NULL) { - pawX_error(lex, "missing initializer for attribute '%s.%s'", - ve->name->text, a->name->text); - } else { - const paw_Int index = v_int(*value); - after[i] = before[index]; + StructDecl *d = get_struct_decl(C, type); +// e->name = pawI_mangle_attr(C->lex, d->name, e->name); + + AstDecl *attr = resolve_attr(d->fields, e->name); + if (attr == NULL) { + attr = resolve_attr(d->methods, e->name); + if (attr == NULL) { + syntax_error(C, "attribute '%s' does not exist", e->name->text); } + e->is_method = PAW_TRUE; } - paw_assert(type->cls.nattrs >= e->nattrs); - - // Put attributes in the correct order. - e->attrs = after[0]; - for (int i = 0; i < e->nattrs; ++i) { - after[i]->next = i < e->nattrs - 1 - ? after[i + 1] - : NULL; + e->type = get_type(C, attr->hdr.def); +} + +static void visit_decl_stmt(AstVisitor *V, AstDeclStmt *s) +{ + V->visit_decl(V, s->decl); +} + +static void expand_func_decl(AstVisitor *V, FuncDecl *d) +{ + Checker *C = V->state.C; + AstDecl *decl = d->next; + while (decl != NULL) { + d = &decl->func; + if (!d->is_visited) { + traverse_func(C->V1, d, FUNC_FUNCTION); + ++V->state.C->nexpanded; + } + decl = d->next; } - pawH_free(P, map); - pawM_free_vec(P, before, e->nattrs); - pawM_free_vec(P, after, e->nattrs); - e->prefix->type = e->type = type; } -static void find_attr(Lex *lex, ClassType *cls, String *name, int *pindex) +static void expand_struct_decl(AstVisitor *V, StructDecl *d) { - for (int i = 0; i < cls->nattrs; ++i) { - if (pawS_eq(cls->attrs[i].name, name)) { - *pindex = i; - return; + Checker *C = V->state.C; + AstDecl *decl = d->next; + while (decl != NULL) { + d = &decl->struct_; + if (!d->is_visited) { + visit_struct(C->V1, d); + ++V->state.C->nexpanded; } + decl = d->next; } - pawX_error(lex, "attribute '%s' does not exist", name->text); } -static void visit_access_expr(Visitor *V, AccessExpr *e) +static void setup_globals(Checker *C) { - Lex *lex = V->lex; - V->expr(V, e->target); - check_accessible(V, e->target->type); - - int index; - ClassType *cls = &e->target->type->cls; - find_attr(lex, cls, e->name, &index); + paw_Env *P = env(C->lex); + for (int i = 0; i < P->gv.size; ++i) { + // TODO: global type info -> AST type node for globals + GlobalVar g = P->gv.data[i]; + AstDecl *d = pawA_new_decl(C->ast, DECL_FUNC); + d->func.type = P->mod->types[g.desc.code]; + d->func.is_global = PAW_TRUE; + add_decl(C, d); + + //AstExpr *type = pawA_new_expr(C->ast, EXPR_FUNC_TYPE); + //if(0==strcmp(g.desc.name->text,"assert")){ // TODO: Write a function that parses type info into an AST type + // type->func.params = get_type(C, PAW_TBOOL); + // type->func.nparams = 1; + // type->func.return_ = get_type(C, PAW_TUNIT); + //}else if(0==strcmp(g.desc.name->text,"print")){ + // type->func.params = get_type(C, PAW_TSTRING); + // type->func.nparams = 1; + // type->func.return_ = get_type(C, PAW_TUNIT); + //} else{ + // paw_assert(0); + //} + //d->func.type = type; + //type->func.decl = d; + Symbol *s = add_global(C, g.desc.name, d); + define_symbol(s); + } +} + +static void add_basic_symbol(Checker *C, String *name) +{ + AstExpr *e = pawA_new_expr(C->ast, EXPR_TYPE_NAME); + e->type_name.args = pawA_new_expr_list(C->ast); + e->type_name.name = name; - e->type = cls->attrs[index].type; + AstDecl *d = pawA_new_decl(C->ast, DECL_TYPE); + d->type.name = name; + d->type.line = 0; + d->type.rhs = e; + + add_decl(C, d); + + paw_Env *P = env(C->lex); + const paw_Type code = flag2code(name->flag); + d->hdr.type = P->mod->types[code]; + Symbol *symbol = new_local(C, name, d); + symbol->is_type = PAW_TRUE; } -static void visit_invoke_expr(Visitor *V, InvokeExpr *e) +static void setup_resolver(AstVisitor *V, Checker *C) { - Lex *lex = V->lex; - V->expr(V, e->target); - check_accessible(V, e->target->type); + add_basic_symbol(C, cached_str(C, CSTR_UNIT)); + add_basic_symbol(C, cached_str(C, CSTR_BOOL)); + add_basic_symbol(C, cached_str(C, CSTR_INT)); + add_basic_symbol(C, cached_str(C, CSTR_FLOAT)); + add_basic_symbol(C, cached_str(C, CSTR_STRING)); - ClassType *cls = &e->target->type->cls; - find_attr(lex, cls, e->name, &e->index); + // no generic context + C->U->depth = -1; - FunctionType *sig = &cls->attrs[e->index].type->sig; - if (sig->nargs != e->nargs) { - pawX_error(lex, "expected %d arguments but found %d", - sig->nargs, e->nargs); - } + setup_globals(C); - Expr *arg = e->args; - for (int i = 0; i < e->nargs; ++i) { - V->expr(V, arg); - check_same(V, arg->type, sig->args[i]); - arg = arg->next; - } - e->type = sig->ret; + const AstState state = {.C = C}; + pawA_visitor_init(V, C->ast, state); + V->visit_literal_expr = visit_literal_expr; + V->visit_logical_expr = visit_logical_expr; + V->visit_ident_expr = visit_ident_expr; + V->visit_chain_expr = visit_chain_expr; + V->visit_unop_expr = visit_unop_expr; + V->visit_binop_expr = visit_binop_expr; + V->visit_cond_expr = visit_cond_expr; + V->visit_call_expr = visit_call_expr; + V->visit_index_expr = visit_index_expr; + V->visit_selector_expr = visit_selector_expr; + V->visit_item_expr = visit_item_expr; + V->visit_type_name_expr = visit_type_name_expr; + V->visit_signature_expr = visit_signature_expr; + V->visit_block_stmt = visit_block_stmt; + V->visit_expr_stmt = visit_expr_stmt; + V->visit_decl_stmt = visit_decl_stmt; + V->visit_if_stmt = visit_if_stmt; + V->visit_for_stmt = visit_for_stmt; + V->visit_while_stmt = visit_while_stmt; + V->visit_dowhile_stmt = visit_dowhile_stmt; + V->visit_return_stmt = visit_return_stmt; + V->visit_var_decl = visit_var_decl; + V->visit_func_decl = visit_func_decl; + V->visit_struct_decl = visit_struct_decl; + V->visit_type_decl = visit_type_decl; } -void p_check_types(Lex *lex) +static void resolve_module(AstVisitor *V) { - paw_Env *P = env(lex); + pawA_visit(V); +} - Visitor V; - pawK_init_visitor(&V, lex); - V.primitive_expr = visit_primitive_expr; - V.literal_expr = visit_literal_expr; - V.logical_expr = visit_logical_expr; - V.chain_expr = visit_chain_expr; - V.cond_expr = visit_cond_expr; - V.coalesce_expr = visit_coalesce_expr; - V.unop_expr = visit_unop_expr; - V.binop_expr = visit_binop_expr; - V.var_expr = visit_var_expr; - V.array_expr = visit_array_expr; - //V.map_expr = visit_map_expr; - V.init_expr = visit_init_expr; - V.access_expr = visit_access_expr; - V.invoke_expr = visit_invoke_expr; - V.index_expr = visit_index_expr; - V.return_stmt = visit_return_stmt; - V.call_expr = visit_call_expr; - V.param_stmt = visit_param_stmt; - V.class_stmt = visit_class_stmt; - V.block_stmt = visit_block_stmt; - V.def_stmt = visit_def_stmt; - V.fn_stmt = visit_fn_stmt; - V.for_stmt = visit_for_stmt; - V.while_stmt = visit_while_stmt; - V.dowhile_stmt = visit_dowhile_stmt; - V.ifelse_stmt = visit_ifelse_stmt; - V.expr_stmt = visit_expr_stmt; +static void setup_expander(AstVisitor *V1, AstVisitor *V2, Checker *C) +{ + C->V1 = V1; + const AstState state = {.C = C}; + pawA_visitor_init(V2, C->ast, state); + V2->visit_func_decl = expand_func_decl; + V2->visit_struct_decl = expand_struct_decl; +} - for (int i = 0; i < P->gv.size; ++i) { - GlobalVar g = P->gv.data[i]; - Symbol *s = add_global(lex, g.desc.name, g.desc.type); - define_var(s); - } +static void expand_templates(AstVisitor *V) +{ + Checker *C = V->state.C; + do { + C->nexpanded = 0; + pawA_visit(V); + } while (C->nexpanded > 0); +} + +static void visit_module(Checker *C) +{ + Lex *lex = C->lex; + SymbolTable *sym = C->sym; + AstDecl *r = pawA_new_decl(C->ast, DECL_FUNC); + enter_function(C, lex->modname, NULL, &r->func, FUNC_MODULE); - enter_function(lex, lex->modname, NULL, FN_MODULE); - pawK_visit(&V, lex->ast); - lex->pm->st.toplevel = leave_function(lex); + AstVisitor V1; + setup_resolver(&V1, C); + resolve_module(&V1); - paw_assert(lex->pm->st.nscopes == 0); +// AstVisitor V2; +// setup_expander(&V1, &V2, C); +// expand_templates(&V2); + + sym->toplevel = leave_function(C); + paw_assert(sym->nscopes == 0); +} + +void p_check_types(Lex *lex) +{ + Checker C = { + .lex = lex, + .pm = lex->pm, + .ast = lex->pm->ast, + .sym = &lex->pm->symbols, + .U = &lex->pm->unifier, + }; + visit_module(&C); } diff --git a/src/code.c b/src/code.c index 47fa44a..c91b5f1 100644 --- a/src/code.c +++ b/src/code.c @@ -6,29 +6,29 @@ #include "mem.h" #include -static void add_line(FnState *fs) +static void add_line(FuncState *fs) { - Lex *lex = fs->lex; + Lex *lex = fs->G->lex; Proto *p = fs->proto; if (fs->nlines == UINT16_MAX) { pawX_error(lex, "too many instructions"); } pawM_grow(lex->P, p->lines, fs->nlines, p->nlines); p->lines[fs->nlines++] = (struct LineInfo){ - .line = lex->lastline, + .line = lex->line, .pc = fs->pc, }; } -void pawK_fix_line(FnState *fs, int line) +void pawK_fix_line(FuncState *fs, int line) { paw_assert(fs->nlines > 0); fs->proto->lines[fs->nlines - 1].line = line; } -static void add_opcode(FnState *fs, OpCode code) +static void add_opcode(FuncState *fs, OpCode code) { - Lex *lex = fs->lex; + Lex *lex = fs->G->lex; Proto *p = fs->proto; // While code is being generated, the pc is used to track the number of instructions, and @@ -38,447 +38,104 @@ static void add_opcode(FnState *fs, OpCode code) ++fs->pc; } -void pawK_code_0(FnState *fs, Op op) +void pawK_code_0(FuncState *fs, Op op) { add_line(fs); add_opcode(fs, create_OP(op)); } -void pawK_code_S(FnState *fs, Op op, int s) +void pawK_code_S(FuncState *fs, Op op, int s) { add_line(fs); add_opcode(fs, create_S(op, s)); } -void pawK_code_U(FnState *fs, Op op, int u) +void pawK_code_U(FuncState *fs, Op op, int u) { add_line(fs); add_opcode(fs, create_U(op, u)); } -void pawK_code_AB(FnState *fs, Op op, int a, int b) +void pawK_code_AB(FuncState *fs, Op op, int a, int b) { add_line(fs); add_opcode(fs, create_AB(op, a, b)); } -static Arena *new_arena(paw_Env *P, size_t size) +// Add a new arena large enough to allocate memory of the 'required_size' +// Alignment is not considered, since the start of an Arena is suitably-aligned +// for any objects created by the compiler. +static Arena **add_arena(paw_Env *P, Pool *pool, size_t required_size) { + if (required_size > SIZE_MAX / 2) { + pawM_error(P); // sanity check + } + size_t size = pool->last_size; + while (size < required_size) { + size *= 2; + } + pool->last_size = size; Arena *a = pawM_new_flex(P, Arena, size, 1); memset(a->data, 0, size); a->size = size; - return a; -} - -Node *pawK_add_node_aux(Lex *lex, unsigned kind, size_t size, size_t align) -{ - paw_Env *P = lex->P; - Tree *ast = lex->ast; - Arena *a = ast->arena; - const size_t base = (a->used + align - 1) & ~(align - 1); - if (base + size > a->size) { - if (base + size > (SIZE_MAX / 2) - 1) { - pawM_error(P); // sanity check - } - const size_t nbytes = (base + size + 1) * 2; - Arena *anew = new_arena(P, nbytes); - anew->prev = a; - a = ast->arena = anew; - } - a->used = base + size; - Node *node = (Node *)&a->data[base]; - node->line = lex->lastline; - node->kind = kind; - return node; + // attach to pool + a->prev = pool->arena; + pool->arena = a; + return &pool->arena; } -#define FIRST_ARENA_SIZE 512 - -Tree *pawK_new_ast(paw_Env *P) +void pawK_pool_init(paw_Env *P, Pool *pool, size_t base_size, size_t min_size) { - Tree *tree = pawM_new(P, Tree); - tree->arena = new_arena(P, FIRST_ARENA_SIZE); - return tree; + pool->filled = NULL; + pool->last_size = base_size; + add_arena(P, pool, 0); + pool->min_size = min_size; } -void pawK_free_ast(paw_Env *P, Tree *ast) +static void free_arena_list(paw_Env *P, Arena *a) { - // Free the list of arenas backing the AST. - for (Arena *a = ast->arena; a;) { + while (a) { Arena *prev = a->prev; pawM_free_flex(P, a, a->size, 1); a = prev; } - pawM_free(P, ast); -} - -// ******************** -// AST visitors -// ******************** - -static void visit_expr_list(Visitor *V, Expr *head) -{ - for (; head != NULL; head = head->next) { - V->expr(V, head); - } -} - -static void visit_logical_expr(Visitor *V, LogicalExpr *e) -{ - V->expr(V, e->lhs); - V->expr(V, e->rhs); -} - -static void visit_primitive_expr(Visitor *V, PrimitiveExpr *e) -{ - paw_unused(V); - paw_unused(e); -} - -static void visit_literal_expr(Visitor *V, LiteralExpr *e) -{ - paw_unused(V); - paw_unused(e); -} - -static void visit_chain_expr(Visitor *V, ChainExpr *e) -{ - V->expr(V, e->target); -} - -static void visit_cond_expr(Visitor *V, CondExpr *e) -{ - V->expr(V, e->cond); - V->expr(V, e->lhs); - V->expr(V, e->rhs); -} - -static void visit_coalesce_expr(Visitor *V, CoalesceExpr *e) -{ - V->expr(V, e->lhs); - V->expr(V, e->rhs); -} - -static void visit_unop_expr(Visitor *V, UnOpExpr *e) -{ - V->expr(V, e->target); -} - -static void visit_binop_expr(Visitor *V, BinOpExpr *e) -{ - V->expr(V, e->lhs); - V->expr(V, e->rhs); -} - -static void visit_assignment(Visitor *V, Expr *lhs, Expr *rhs) -{ - V->expr(V, lhs); - V->expr(V, rhs); -} - -static void visit_expr_stmt(Visitor *V, ExprStmt *s) -{ - if (s->rhs != NULL) { - V->assign(V, s->lhs, s->rhs); // assignment - } else { - V->expr(V, s->lhs); // function call - } -} - -static void visit_attr_stmt(Visitor *V, AttrStmt *s) -{ - paw_unused(V); - paw_unused(s); -} - -static void visit_class_stmt(Visitor *V, ClassStmt *s) -{ - V->stmt_list(V, s->attrs); -} - -static void visit_stmt_list(Visitor *V, Stmt *head) -{ - for (; head != NULL; head = head->next) { - V->stmt(V, head); - } -} - -static void visit_block_stmt(Visitor *V, Block *b) -{ - V->stmt_list(V, b->stmts); -} - -static void visit_def_stmt(Visitor *V, DefStmt *s) -{ - V->expr(V, s->init); -} - -static void visit_param_stmt(Visitor *V, ParamStmt *s) -{ - paw_unused(V); - paw_unused(s); -} - -static void visit_return_stmt(Visitor *V, ReturnStmt *s) -{ - V->expr(V, s->expr); -} - -static void visit_call_expr(Visitor *V, CallExpr *e) -{ - V->expr(V, e->target); - V->expr_list(V, e->args); -} - -static void visit_var_expr(Visitor *V, VarExpr *e) -{ - paw_unused(V); - paw_unused(e); -} - -static void visit_fn_stmt(Visitor *V, FnStmt *s) -{ - V->stmt_list(V, s->fn.args); // parameters - V->block_stmt(V, s->fn.body); // function body -} - -static void visit_ifelse_stmt(Visitor *V, IfElseStmt *s) -{ - V->expr(V, s->cond); - V->stmt(V, s->then_arm); - V->stmt(V, s->else_arm); -} - -static void visit_while_stmt(Visitor *V, WhileStmt *s) -{ - V->expr(V, s->cond); - V->block_stmt(V, s->block); -} - -static void visit_dowhile_stmt(Visitor *V, WhileStmt *s) -{ - V->block_stmt(V, s->block); - V->expr(V, s->cond); -} - -static void visit_label_stmt(Visitor *V, LabelStmt *s) -{ - paw_unused(V); - paw_unused(s); -} - -static void visit_for_stmt(Visitor *V, ForStmt *s) -{ - if (s->kind == STMT_FORNUM) { - V->expr(V, s->fornum.begin); - V->expr(V, s->fornum.end); - V->expr(V, s->fornum.step); - } else { - V->expr(V, s->forin.target); - } - V->block_stmt(V, s->block); -} - -static void visit_array_expr(Visitor *V, ArrayExpr *e) -{ - V->expr_list(V, e->items); -} - -//static void visit_map_expr(Visitor *V, MapExpr *e) -//{ -// V->expr_list(V, e->items); -//} - -static void visit_init_expr(Visitor *V, InitExpr *e) -{ - Stmt *attr = e->attrs; - while (attr != NULL) { - V->stmt(V, attr); - attr = attr->next; - } } -static void visit_item_stmt(Visitor *V, ItemStmt *s) +void pawK_pool_uninit(paw_Env *P, Pool *pool) { - V->expr(V, s->value); + free_arena_list(P, pool->arena); + free_arena_list(P, pool->filled); } -static void visit_index_expr(Visitor *V, IndexExpr *e) +void *pawK_pool_alloc(paw_Env *P, Pool *pool, size_t size, size_t align) { - V->expr(V, e->target); - V->expr(V, e->first); - V->expr(V, e->second); -} - -static void visit_access_expr(Visitor *V, AccessExpr *e) -{ - V->expr(V, e->target); -} + paw_assert(size > 0); -static void visit_invoke_expr(Visitor *V, InvokeExpr *e) -{ - V->expr(V, e->target); - Expr *arg = e->args; - while (arg != NULL) { - V->expr(V, arg); - arg = arg->next; - } -} - -void pawK_visit_expr(Visitor *V, Expr *expr) -{ - if (expr == NULL) { - return; - } - switch (expr->kind) { - case EXPR_PRIMITIVE: - V->primitive_expr(V, cast_to(expr, PrimitiveExpr)); - break; - case EXPR_LITERAL: - V->literal_expr(V, cast_to(expr, LiteralExpr)); - break; - case EXPR_CHAIN: - V->chain_expr(V, cast_to(expr, ChainExpr)); - break; - case EXPR_COALESCE: - V->coalesce_expr(V, cast_to(expr, CoalesceExpr)); + size_t base; + Arena **pa = &pool->arena; + while (*pa != NULL) { + Arena *a = *pa; + base = (a->used + align - 1) & ~(align - 1); + if (base + size <= a->size) { break; - case EXPR_LOGICAL: - V->logical_expr(V, cast_to(expr, LogicalExpr)); - break; - case EXPR_UNOP: - V->unop_expr(V, cast_to(expr, UnOpExpr)); - break; - case EXPR_BINOP: - V->binop_expr(V, cast_to(expr, BinOpExpr)); - break; - case EXPR_CALL: - V->call_expr(V, cast_to(expr, CallExpr)); - break; - case EXPR_COND: - V->cond_expr(V, cast_to(expr, CondExpr)); - break; - case EXPR_VAR: - V->var_expr(V, cast_to(expr, VarExpr)); - break; - case EXPR_INIT: - V->init_expr(V, cast_to(expr, InitExpr)); - break; - case EXPR_ARRAY: - V->array_expr(V, cast_to(expr, ArrayExpr)); - break; - // case EXPR_MAP: - // V->map_expr(V, cast_to(expr, MapExpr)); - // break; - case EXPR_INDEX: - V->index_expr(V, cast_to(expr, IndexExpr)); - break; - case EXPR_INVOKE: - V->invoke_expr(V, cast_to(expr, InvokeExpr)); - break; - default: - paw_assert(expr->kind == EXPR_ACCESS); - V->access_expr(V, cast_to(expr, AccessExpr)); + } + pa = &a->prev; } -} - -void pawK_visit_stmt(Visitor *V, Stmt *stmt) -{ - if (stmt == NULL) { - return; + if (*pa == NULL) { + // add a new arena to the front of the list, guaranteed to have at + // least 'size' bytes + pa = add_arena(P, pool, size); + base = 0; } - switch (stmt->kind) { - case STMT_EXPR: - V->expr_stmt(V, cast_to(stmt, ExprStmt)); - break; - case STMT_RETURN: - V->return_stmt(V, cast_to(stmt, ReturnStmt)); - break; - case STMT_IFELSE: - V->ifelse_stmt(V, cast_to(stmt, IfElseStmt)); - break; - case STMT_FORIN: - case STMT_FORNUM: - V->for_stmt(V, cast_to(stmt, ForStmt)); - break; - case STMT_WHILE: - V->while_stmt(V, cast_to(stmt, WhileStmt)); - break; - case STMT_DOWHILE: - V->dowhile_stmt(V, cast_to(stmt, WhileStmt)); - break; - case STMT_LABEL: - V->label_stmt(V, cast_to(stmt, LabelStmt)); - break; - case STMT_PARAM: - V->param_stmt(V, cast_to(stmt, ParamStmt)); - break; - case STMT_DEF: - V->def_stmt(V, cast_to(stmt, DefStmt)); - break; - case STMT_FN: - V->fn_stmt(V, cast_to(stmt, FnStmt)); - break; - case STMT_CLASS: - V->class_stmt(V, cast_to(stmt, ClassStmt)); - break; - case STMT_ITEM: - V->item_stmt(V, cast_to(stmt, ItemStmt)); - break; - case STMT_ATTR: - V->attr_stmt(V, cast_to(stmt, AttrStmt)); - break; - default: - paw_assert(stmt->kind == STMT_BLOCK); - V->block_stmt(V, cast_to(stmt, Block)); + Arena *a = *pa; + a->used = base + size; + if (a->size - a->used < pool->min_size) { + // arena has very little memory left: stash it so that we don't keep + // checking it in the above loop + *pa = a->prev; + a->prev = pool->filled; + pool->filled = a; } + return a->data + base; } -void pawK_init_visitor(Visitor *V, Lex *lex) -{ - *V = (Visitor){ - .lex = lex, - - .expr = pawK_visit_expr, - .stmt = pawK_visit_stmt, - .assign = visit_assignment, - - .expr_list = visit_expr_list, - .primitive_expr = visit_primitive_expr, - .chain_expr = visit_chain_expr, - .coalesce_expr = visit_coalesce_expr, - .logical_expr = visit_logical_expr, - .unop_expr = visit_unop_expr, - .binop_expr = visit_binop_expr, - .call_expr = visit_call_expr, - .cond_expr = visit_cond_expr, - .var_expr = visit_var_expr, - .array_expr = visit_array_expr, - //.map_expr = visit_map_expr, - .init_expr = visit_init_expr, - .index_expr = visit_index_expr, - .access_expr = visit_access_expr, - .invoke_expr = visit_invoke_expr, - - .stmt_list = visit_stmt_list, - .expr_stmt = visit_expr_stmt, - .item_stmt = visit_item_stmt, - .return_stmt = visit_return_stmt, - .ifelse_stmt = visit_ifelse_stmt, - .for_stmt = visit_for_stmt, - .while_stmt = visit_while_stmt, - .dowhile_stmt = visit_dowhile_stmt, - .label_stmt = visit_label_stmt, - .fn_stmt = visit_fn_stmt, - .param_stmt = visit_param_stmt, - .def_stmt = visit_def_stmt, - .attr_stmt = visit_attr_stmt, - .class_stmt = visit_class_stmt, - .block_stmt = visit_block_stmt, - }; -} - -void pawK_visit(Visitor *V, Tree *tree) -{ - V->stmt_list(V, tree->stmts); -} diff --git a/src/code.h b/src/code.h index 10f6711..cbf2e28 100644 --- a/src/code.h +++ b/src/code.h @@ -8,75 +8,7 @@ #include "parse.h" #include "paw.h" -typedef enum ExprType { - EXPR_VAR, - EXPR_PRIMITIVE, - EXPR_LITERAL, - EXPR_ARRAY, - EXPR_MAP, - EXPR_CHAIN, - EXPR_COALESCE, - EXPR_LOGICAL, - EXPR_UNOP, - EXPR_BINOP, - EXPR_CALL, - EXPR_COND, - EXPR_INDEX, - EXPR_ACCESS, - EXPR_INVOKE, - EXPR_SYMBOL, - EXPR_TYPE, - EXPR_INIT, -} ExprType; - -typedef enum StmtType { - STMT_EXPR, - STMT_BLOCK, - STMT_IFELSE, - STMT_FORIN, - STMT_FORNUM, - STMT_WHILE, - STMT_DOWHILE, - STMT_LABEL, - STMT_CLASS, - STMT_ATTR, - STMT_FN, - STMT_DEF, - STMT_ITEM, - STMT_PARAM, - STMT_RETURN, - STMT_TYPENAME, -} StmtType; - -#define NODE_HEADER \ - int line; \ - uint8_t kind -#define STMT_HEADER \ - NODE_HEADER; \ - struct Stmt *next -#define EXPR_HEADER \ - NODE_HEADER; \ - Type *type; \ - struct Expr *next - -typedef struct Node { - NODE_HEADER; -} Node; - -typedef struct Expr { - EXPR_HEADER; -} Expr; - -typedef struct Stmt { - STMT_HEADER; -} Stmt; - -typedef struct Symbol { - EXPR_HEADER; - paw_Bool is_init: 1; - paw_Bool is_type: 1; - String *name; -} Symbol; +typedef int NodeId; typedef struct Arena { struct Arena *prev; @@ -84,341 +16,40 @@ typedef struct Arena { size_t size; // Must be aligned to at least the strictest alignment required - // by a Node. + // by an AST or IR node. _Alignas(void *) char data[]; } Arena; -typedef struct Block { - STMT_HEADER; - Stmt *stmts; - Scope *scope; // scope for block - int nstmts; -} Block; - -typedef struct TypeDecl TypeDecl; - -typedef struct BasicTypeDecl { - paw_Type t; -} BasicTypeDecl; - -typedef struct NamedTypeDecl { - String *name; -} NamedTypeDecl; - -typedef struct SignatureDecl { - TypeDecl *ret; - Expr *args; - int nargs; -} SignatureDecl; - -struct TypeDecl { - EXPR_HEADER; - TypeKind group; - union { - BasicTypeDecl basic; - NamedTypeDecl named; - SignatureDecl sig; - }; -}; - -typedef struct Function { - Scope *scope; // function-scoped variables - Type *type; - String *name; // name of the function - Stmt *args; // AST nodes for parameters - TypeDecl *ret; // return type annotation - Block *body; // function body - FnKind kind; // type of function - int nargs; -} Function; - -typedef struct Tree { - Arena *arena; - Stmt *stmts; - int nstmts; -} Tree; - -typedef enum VarKind { - VAR_UNDEF, - VAR_GLOBAL, - VAR_UPVALUE, - VAR_LOCAL, -} VarKind; - -typedef struct VarInfo { - Type *type; - enum VarKind kind; - int index; -} VarInfo; - -typedef struct ItemStmt { - STMT_HEADER; - String *name; // attribute name - Expr *value; -} ItemStmt; - -typedef struct InitExpr { - EXPR_HEADER; - Expr *prefix; - Stmt *attrs; // list of ItemStmt - int nattrs; -} InitExpr; - -typedef struct UnOpExpr { - EXPR_HEADER; - UnaryOp op; - FunctionType *mm; - Expr *target; -} UnOpExpr; - -#define make_binary_expr(a, b) \ - typedef struct a ## Expr { \ - EXPR_HEADER; \ - b \ - Expr *lhs; \ - Expr *rhs; \ - FunctionType *mm; \ - } a ## Expr; - -make_binary_expr(BinOp, BinaryOp op;) -make_binary_expr(Cond, Expr *cond;) -make_binary_expr(Logical, uint8_t is_and;) -make_binary_expr(Coalesce, ) - -#define SUFFIXED_HEADER EXPR_HEADER; Expr *target -typedef struct SuffixedExpr { - SUFFIXED_HEADER; -} SuffixedExpr; - -typedef struct ChainExpr { - SUFFIXED_HEADER; -} ChainExpr; - -typedef struct CallExpr { - SUFFIXED_HEADER; - Expr *args; - int nargs; -} CallExpr; - -typedef struct IndexExpr { - SUFFIXED_HEADER; // common fields - Expr *first; // index/key or slice beginning - Expr *second; // slice end, or NULL -} IndexExpr; - -typedef struct AccessExpr { - SUFFIXED_HEADER; // common fields - uint16_t index; // index in constants table - String *name; // field name -} AccessExpr; - -typedef struct InvokeExpr { - SUFFIXED_HEADER; // common fields - int index; - String *name; // method name - Expr *args; - int nargs; -} InvokeExpr; +typedef struct Pool { + Arena *filled; // list of filled arenas + Arena *arena; // list of available arenas + size_t last_size; // size of last arena allocated + size_t min_size; // minimum allocation size +} Pool; -typedef struct VarExpr { - EXPR_HEADER; // common fields - String *name; // name of the variable -} VarExpr; +void pawK_pool_init(paw_Env *P, Pool *pool, size_t base_size, size_t min_size); +void pawK_pool_uninit(paw_Env *P, Pool *pool); +void *pawK_pool_alloc(paw_Env *P, Pool *pool, size_t size, size_t align); -#define CONTAINER_HEADER EXPR_HEADER; Type *t -typedef struct ContainerExpr { - CONTAINER_HEADER; -} ContainerExpr; +//**************************************************************** +// Code generation +//**************************************************************** -typedef struct ArrayExpr { - CONTAINER_HEADER; - Expr *items; - int nitems; -} ArrayExpr; - -typedef struct PrimitiveExpr { - EXPR_HEADER; - paw_Type t; - Value v; -} PrimitiveExpr; - -typedef struct LiteralExpr { - EXPR_HEADER; - paw_Type t; - const char *label; - Expr *expr; -} LiteralExpr; - -typedef struct DefStmt { - STMT_HEADER; // common fields - struct { - paw_Bool global: 1; // uses 'global' keyword - } flags; - TypeDecl *tag; // type annotation - String *name; // variable name - Expr *init; // initial value -} DefStmt; - -typedef struct ParamStmt { - STMT_HEADER; - TypeDecl *tag; // type annotation - String *name; // variable name -} ParamStmt; - -typedef struct FnStmt { - STMT_HEADER; - struct { - paw_Bool global: 1; // uses 'global' keyword - FnKind kind: 7; - } flags; - Function fn; -} FnStmt; - -typedef struct AttrStmt { - STMT_HEADER; - uint8_t is_fn; // // 1 if 'fn' is valid, 0 otherwise - String *name; // attribute name - union { - TypeDecl *tag; // type annotation - Function fn; // method info - }; -} AttrStmt; - -typedef struct ClassStmt { - STMT_HEADER; - Scope *scope; - struct { - paw_Bool global: 1; // uses 'global' keyword - } flags; - Expr *super; - String *name; - Stmt *attrs; // list of AttrStmt - ClassType *cls; - int nattrs; -} ClassStmt; - -typedef struct ReturnStmt { - STMT_HEADER; - Function *fn; - Expr *expr; -} ReturnStmt; - -typedef struct ExprStmt { - STMT_HEADER; - Expr *lhs; - Expr *rhs; -} ExprStmt; - -typedef struct IfElseStmt { - STMT_HEADER; - Expr *cond; - Stmt *then_arm; - Stmt *else_arm; -} IfElseStmt; - -typedef struct WhileStmt { - STMT_HEADER; - Expr *cond; - Block *block; -} WhileStmt; - -typedef struct LabelStmt { - STMT_HEADER; - LabelKind label; -} LabelStmt; - -typedef struct ForIn { - Expr *target; -} ForIn; - -typedef struct ForNum { - Expr *begin; - Expr *end; - Expr *step; -} ForNum; - -typedef struct ForStmt { - STMT_HEADER; - Scope *scope; // scope for entire loop - String *name; // loop control variable name - Block *block; // body of loop - union { - ForIn forin; - ForNum fornum; - }; -} ForStmt; - -#define cast_node(x) ((Node *)(x)) -#define cast_stmt(x) ((Stmt *)(x)) -#define cast_expr(x) ((Expr *)(x)) -#define cast_to(x, tp) ((tp *)(x)) - -void pawK_dump_ast(Lex *lex, FILE *out); - -Node *pawK_add_node_aux(Lex *lex, unsigned type, size_t size, size_t align); -#define pawK_add_node(x, tt, tp) (tp *)pawK_add_node_aux(x, tt, sizeof(tp), _Alignof(tp)) - -Tree *pawK_new_ast(paw_Env *P); -void pawK_free_ast(paw_Env *P, Tree *ast); - -typedef struct Visitor Visitor; - -struct Visitor { +typedef struct Generator { Lex *lex; // lexical state - Function *fn; // enclosing function - - void (*assign)(Visitor *V, Expr *lhs, Expr *rhs); - - void (*expr)(Visitor *V, Expr *expr); - void (*expr_list)(Visitor *V, Expr *head); - void (*primitive_expr)(Visitor *V, PrimitiveExpr *e); - void (*literal_expr)(Visitor *V, LiteralExpr *e); - void (*chain_expr)(Visitor *V, ChainExpr *e); - void (*coalesce_expr)(Visitor *V, CoalesceExpr *e); - void (*logical_expr)(Visitor *V, LogicalExpr *e); - void (*unop_expr)(Visitor *V, UnOpExpr *e); - void (*binop_expr)(Visitor *V, BinOpExpr *e); - void (*call_expr)(Visitor *V, CallExpr *e); - void (*cond_expr)(Visitor *V, CondExpr *e); - void (*var_expr)(Visitor *V, VarExpr *e); - void (*init_expr)(Visitor *V, InitExpr *e); - void (*array_expr)(Visitor *V, ArrayExpr *e); - void (*index_expr)(Visitor *V, IndexExpr *e); - void (*access_expr)(Visitor *V, AccessExpr *e); - void (*invoke_expr)(Visitor *V, InvokeExpr *e); - - void (*stmt)(Visitor *V, Stmt *stmt); - void (*stmt_list)(Visitor *V, Stmt *head); - void (*expr_stmt)(Visitor *V, ExprStmt *s); - void (*return_stmt)(Visitor *V, ReturnStmt *s); - void (*ifelse_stmt)(Visitor *V, IfElseStmt *s); - void (*for_stmt)(Visitor *V, ForStmt *s); - void (*while_stmt)(Visitor *V, WhileStmt *s); - void (*dowhile_stmt)(Visitor *V, WhileStmt *s); - void (*label_stmt)(Visitor *V, LabelStmt *s); - void (*param_stmt)(Visitor *V, ParamStmt *s); - void (*fn_stmt)(Visitor *V, FnStmt *s); - void (*item_stmt)(Visitor *V, ItemStmt *s); - void (*attr_stmt)(Visitor *V, AttrStmt *s); - void (*class_stmt)(Visitor *V, ClassStmt *s); - void (*def_stmt)(Visitor *V, DefStmt *s); - void (*block_stmt)(Visitor *V, Block *b); -}; - -// Initialize a new visitor object -// Sets the default visitor routines, which do nothing but traverse the AST. Function -// pointers in 'V' can be replaced before 'pawK_visit' is called. -void pawK_init_visitor(Visitor *V, Lex *lex); - -// visitor entrypoint -void pawK_visit(Visitor *V, Tree *tree); + StructState *cs; // enclosing structure context + FuncState *fs; // enclosing function context + SymbolTable *sym; // scoped symbol table + Scope *globals; // global symbols + struct Ast *ast; // typed AST +} Generator; -void pawK_fix_line(FnState *fs, int line); +void pawK_fix_line(FuncState *fs, int line); // Opcode output routines -void pawK_code_0(FnState *fs, Op op); -void pawK_code_S(FnState *fs, Op op, int s); -void pawK_code_U(FnState *fs, Op op, int u); -void pawK_code_AB(FnState *fs, Op op, int a, int b); +void pawK_code_0(FuncState *fs, Op op); +void pawK_code_S(FuncState *fs, Op op, int s); +void pawK_code_U(FuncState *fs, Op op, int u); +void pawK_code_AB(FuncState *fs, Op op, int a, int b); #endif // PAW_CODE_H diff --git a/src/codegen.c b/src/codegen.c index c04c485..a4ad5cd 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -2,15 +2,134 @@ // This source code is licensed under the MIT License, which can be found in // LICENSE.md. See AUTHORS.md for a list of contributor names. +#include "array.h" +#include "ast.h" #include "code.h" -#include "gc.h" +#include "gc_aux.h" #include "lex.h" +#include "map.h" #include "mem.h" #include "parse.h" #include "type.h" +#define syntax_error(G, ...) pawX_error((G)->lex, __VA_ARGS__) #define is_global(lex) (is_toplevel(lex) && (lex)->fs->bs->outer == NULL) +#define code_block(V, b) check_exp((b)->kind == STMT_BLOCK, V->visit_stmt(V, cast_stmt(b))) +#define basic_decl(G, code) basic_symbol(G, code)->decl +#define basic_type(G, code) basic_decl(G, code)->type.type +#define get_decl(G, id) ((G)->lex->pm->decls.data[id]) +#define get_type(G, id) get_decl(G, id)->hdr.type +#define symbol_type(G, symbol) get_type(G, (symbol)->decl->hdr.def) + +// TODO +#define visit_stmts(V, list) (V)->visit_stmt_list(V, list, (V)->visit_stmt) +#define visit_exprs(V, list) (V)->visit_expr_list(V, list, (V)->visit_expr) +#define visit_decls(V, list) (V)->visit_decl_list(V, list, (V)->visit_decl) +#define visit_methods(V, list) (V)->visit_method_list(V, list, (V)->visit_decl) + +static void code_decl(Generator *, AstDecl *); +static void code_expr(Generator *, AstExpr *); +static void code_stmt(Generator *, AstStmt *); + +static void mangle_type(Generator *G, Buffer *buf, Type *type) +{ + paw_Env *P = env(G->lex); + if (y_is_unit(type)) { + pawL_add_literal(P, buf, "0"); + } else if (y_is_bool(type)) { + pawL_add_literal(P, buf, "b"); + } else if (y_is_int(type)) { + pawL_add_literal(P, buf, "i"); + } else if (y_is_float(type)) { + pawL_add_literal(P, buf, "f"); + } else if (y_is_string(type)) { + pawL_add_literal(P, buf, "s"); + } else if (y_is_type_var(type)) { + TypeVar *var = &type->var; + pawL_add_nstring(P, buf, var->name->text, var->name->length); + } else if (y_is_adt(type)) { + Adt *adt = &type->adt; + AstDecl *d = get_decl(G, adt->def); + String *name = d->struct_.name; + pawL_add_nstring(P, buf, name->text, name->length); + for (int i = 0; i < adt->types.count; ++i) { + mangle_type(G, buf, adt->types.types[i]); + } + } else { + paw_assert(y_is_func(type)); + FuncSig *func = &type->func; + pawL_add_char(P, buf, 'F'); + Binder *params = &func->params; + for (int i = 0; i < params->count; ++i) { + mangle_type(G, buf, params->types[i]); + } + pawL_add_char(P, buf, '_'); + mangle_type(G, buf, func->return_); + } +} + +// mangle('recv', 'attr') -> %recv.attr +static String *mangle_attr(Generator *G, String *recv, String *attr) +{ + Buffer buf; + paw_Env *P = env(G->lex); + pawL_init_buffer(P, &buf); + pawL_add_char(P, &buf, '%'); + pawL_add_nstring(P, &buf, recv->text, recv->length); + pawL_add_char(P, &buf, '.'); + pawL_add_nstring(P, &buf, attr->text, attr->length); + pawL_push_result(P, &buf); + String *result = v_string(P->top.p[-1]); + pawC_pop(P); // unanchor + return result; +} + +// mangle('name', ()) -> name_ +// mangle('name', ('int', 'A')) -> nameiA_ +// mangle('name', ('A[int]',)) -> nameAi_ +static String *mangle_name(Generator *G, String *name, Binder *binder) +{ + Buffer buf; + paw_Env *P = env(G->lex); + pawL_init_buffer(P, &buf); + pawL_add_nstring(P, &buf, name->text, name->length); + for (int i = 0; i < binder->count; ++i) { + mangle_type(G, &buf, binder->types[i]); + } + pawL_add_char(P, &buf, '_'); + pawL_push_result(P, &buf); + String *result = v_string(P->top.p[-1]); + pawC_pop(P); // unanchor + return result; +} + +static Symbol *basic_symbol(Generator *G, paw_Type code) +{ + paw_assert(code >= 0 && code <= PAW_TSTRING); + // basic types have fixed locations + Scope *toplevel = G->sym->scopes[0]; + return toplevel->symbols[1 + code]; // TODO +} + +static String *mangle_named_type(Generator *G, Type *type) +{ + paw_assert(y_is_adt(type)); + Adt *adt = &type->adt; + AstDecl *decl = get_decl(G, adt->def); + if (adt->types.count == 0) { + return decl->struct_.name; + } + return mangle_name(G, decl->struct_.name, &adt->types); +} + +static paw_Type basic_code(const Type *type) +{ + paw_assert(y_kind(type) == TYPE_BASIC); + return type->hdr.def; +} + +// TODO: Get rid of this static Symbol *fetch_symbol(Scope *scope, const String *name, int *pindex) { *pindex = pawP_find_symbol(scope, name); @@ -18,18 +137,18 @@ static Symbol *fetch_symbol(Scope *scope, const String *name, int *pindex) return scope->symbols[*pindex]; } -static void push_local_table(FnState *fs, Scope *symbols) +static void push_local_table(FuncState *fs, Scope *symbols) { - Lex *lex = fs->lex; + Generator *G = fs->G; SymbolTable *st = &fs->scopes; if (st->nscopes == UINT16_MAX) { - limit_error(lex, "symbols", UINT16_MAX); + syntax_error(G, "too many nested scopes"); } - pawM_grow(env(lex), st->scopes, st->nscopes, st->capacity); + pawM_grow(env(G->lex), st->scopes, st->nscopes, st->capacity); st->scopes[st->nscopes++] = symbols; } -static void pop_local_table(FnState *fs) +static void pop_local_table(FuncState *fs) { // Last symbol table should have been assigned to an AST node. The // next call to push_symbol_table() will allocate a new table. @@ -38,17 +157,17 @@ static void pop_local_table(FnState *fs) --st->nscopes; } -static int add_constant(Lex *lex, Value v) +static int add_constant(Generator *G, Value v) { - FnState *fs = lex->fs; + FuncState *fs = G->fs; Proto *p = fs->proto; if (fs->nk == UINT16_MAX) { - limit_error(lex, "constants", UINT16_MAX); + syntax_error(G, "too many constants"); } else if (fs->nk == p->nk) { // 'fs->nk' only ever increases by 1, so this will always give us // enough memory. - pawM_grow(env(lex), p->k, fs->nk, p->nk); + pawM_grow(env(G->lex), p->k, fs->nk, p->nk); for (int i = fs->nk + 1; i < p->nk; ++i) { v_set_0(&p->k[i]); // clear for GC } @@ -57,12 +176,29 @@ static int add_constant(Lex *lex, Value v) return fs->nk++; } -static int add_proto(Lex *lex, String *name, Proto **pp) +static int add_struct(Generator *G, Struct *c) +{ + FuncState *fs = G->fs; + Proto *p = fs->proto; + if (fs->nstructs == UINT16_MAX) { + syntax_error(G, "too many structs"); + } else if (fs->nstructs == p->nc) { + pawM_grow(env(G->lex), p->c, fs->nstructs, p->nc); + for (int i = fs->nstructs; i < p->nc; ++i) { + p->c[i] = NULL; // clear for GC (including current) + } + } + p->c[fs->nstructs] = c; + return fs->nstructs++; +} + +static int add_proto(Generator *G, String *name, Proto **pp) { - FnState *fs = lex->fs; + Lex *lex = G->lex; + FuncState *fs = G->fs; Proto *p = fs->proto; if (fs->nproto == UINT16_MAX) { - limit_error(lex, "functions", UINT16_MAX); + syntax_error(G, "too many functions"); } else if (fs->nproto == p->nproto) { pawM_grow(env(lex), p->p, fs->nproto, p->nproto); for (int i = fs->nproto; i < p->nproto; ++i) { @@ -78,31 +214,31 @@ static int add_proto(Lex *lex, String *name, Proto **pp) return id; } -static void add_debug_info(Lex *lex, Symbol *var) +static void add_debug_info(Generator *G, Symbol *symbol) { - FnState *fs = lex->fs; + FuncState *fs = G->fs; Proto *p = fs->proto; if (fs->ndebug == LOCAL_MAX) { - limit_error(lex, "locals", LOCAL_MAX); + syntax_error(G, "too many locals"); } else if (fs->ndebug == p->ndebug) { - pawM_grow(env(lex), p->v, fs->ndebug, p->ndebug); + pawM_grow(env(G->lex), p->v, fs->ndebug, p->ndebug); for (int i = fs->ndebug + 1; i < p->ndebug; ++i) { p->v[i].var = (VarDesc){0}; // clear for GC } } p->v[fs->ndebug] = (struct LocalInfo){ - .var = {var->type, var->name}, + .var = {symbol->name, symbol->decl->hdr.def}, .pc0 = fs->pc, }; ++fs->ndebug; } -static struct LocalInfo *local_info(FnState *fs, int level) +static struct LocalInfo *local_info(FuncState *fs, int level) { return &fs->proto->v[level]; } -static void close_vars(FnState *fs, int target) +static void close_vars(FuncState *fs, int target) { const int upper = fs->level - 1; // first slot to pop const int lower = target - 1; // 1 past 'reverse' end @@ -117,102 +253,100 @@ static void close_vars(FnState *fs, int target) } } -static VarInfo add_local(FnState *fs, String *name) +static VarInfo add_local(FuncState *fs, Symbol *symbol) { - int unused; - Lex *lex = fs->lex; - SymbolTable *scopes = &fs->scopes; // all function scopes - Scope *symbols = scopes->scopes[scopes->nscopes - 1]; // last scope - Symbol *symbol = fetch_symbol(symbols, name, &unused); - pawM_grow(env(lex), fs->locals.symbols, fs->locals.nsymbols, fs->locals.capacity); - const int index = fs->locals.nsymbols++; - fs->locals.symbols[index] = symbol; + Lex *lex = fs->G->lex; + pawM_grow(env(lex), fs->locals.slots, fs->locals.nslots, fs->locals.capacity); + const int index = fs->locals.nslots++; + fs->locals.slots[index].symbol = symbol; + fs->locals.slots[index].index = index; // TODO return (VarInfo){ - .type = symbol->type, + .symbol = symbol, .kind = VAR_LOCAL, .index = index, }; } -static VarInfo add_local_literal(FnState *fs, const char *name) +static paw_Bool symbol_iter(FuncState *fs, Scope *scope, Symbol **out) { - String *str = scan_string(fs->lex, name); - return add_local(fs, str); + BlockState *bs = fs->bs; + paw_assert(bs->isymbol < scope->nsymbols); + *out = scope->symbols[bs->isymbol++]; + return (*out)->is_type; // skip types +} + +static VarInfo transfer_local(FuncState *fs) +{ + Symbol *symbol; + // Find the next symbol that belongs on the stack. + SymbolTable *scopes = &fs->scopes; // all function scopes + Scope *scope = scopes->scopes[scopes->nscopes - 1]; // last scope + while (symbol_iter(fs, scope, &symbol)) {} + return add_local(fs, symbol); } -static VarInfo add_global(Lex *lex, String *name) +static VarInfo add_global(Generator *G) { int index; - Symbol *symbol = fetch_symbol(lex->pm->st.globals, name, &index); - pawE_new_global(env(lex), name, symbol->type); + Symbol *symbol;String*name; // TODO: Integer index to track current local + pawE_new_global(env(G->lex), name, symbol->decl); return (VarInfo){ - .type = symbol->type, + .symbol = symbol, .kind = VAR_GLOBAL, .index = index, }; } -static int add_class(Lex *lex, ClassType *cls) +static Symbol *create_global(Generator *G, String *name) { - FnState *fs = lex->fs; - Proto *p = fs->proto; - if (fs->nclasses == UINT16_MAX) { - limit_error(lex, "classes", UINT16_MAX); - } else if (fs->nclasses == p->nclasses) { -// pawM_grow(env(lex), p->c, fs->nclass, p->nclass); -// for (int i = fs->nclass; i < p->nclass; ++i) { -// p->c[i] = NULL; // clear for GC (including current) -// } - } -// p->classes[fs->nclasses] = pawV_new_class(env(lex), cls); - return fs->nclasses++; + Symbol *symbol = pawA_new_symbol(G->lex); + const int index = pawE_new_global(env(G->lex), name, symbol->decl); + return symbol; } #define JUMP_PLACEHOLDER (-1) -static int code_jump(FnState *fs, OpCode op) +static int code_jump(FuncState *fs, OpCode op) { pawK_code_S(fs, op, JUMP_PLACEHOLDER); return fs->pc - 1; } -static void patch_jump(FnState *fs, int from, int to) +static void patch_jump(FuncState *fs, int from, int to) { - Lex *lex = fs->lex; const int jump = to - (from + 1); if (jump > JUMP_MAX) { - limit_error(lex, "instructions to jump", JUMP_MAX); + syntax_error(fs->G, "too many instructions to jump"); } Proto *p = fs->proto; set_S(&p->source[from], jump); } -static void patch_here(FnState *fs, int from) +static void patch_here(FuncState *fs, int from) { patch_jump(fs, from, fs->pc); } -static void code_loop(FnState *fs, Op op, int to) +static void code_loop(FuncState *fs, Op op, int to) { - Lex *lex = fs->lex; const int jump = to - (fs->pc + 1); if (jump > JUMP_MAX) { - limit_error(lex, "instructions in loop", JUMP_MAX); + syntax_error(fs->G, "too many instructions in loop"); } pawK_code_S(fs, op, jump); } -static void code_closure(FnState *fs, Proto *p, int id) +static void code_closure(FuncState *fs, Proto *p, int id) { Value v; v_set_object(&v, p); pawK_code_U(fs, OP_CLOSURE, id); } -static void add_label(FnState *fs, LabelKind kind) +static void add_label(FuncState *fs, LabelKind kind) { - Lex *lex = fs->lex; - LabelList *ll = &lex->pm->ll; + Lex *lex = fs->G->lex; + LabelList *ll = &lex->pm->labels; pawM_grow(env(lex), ll->values, ll->length, ll->capacity); ll->values[ll->length] = (Label){ .kind = kind, @@ -223,10 +357,10 @@ static void add_label(FnState *fs, LabelKind kind) ++ll->length; } -static void adjust_labels(FnState *fs, BlkState *bs) +static void adjust_labels(FuncState *fs, BlockState *bs) { - Lex *lex = fs->lex; - LabelList *ll = &lex->pm->ll; + Lex *lex = fs->G->lex; + LabelList *ll = &lex->pm->labels; for (int i = bs->label0; i < ll->length; ++i) { Label *lb = &ll->values[i]; lb->level = bs->level; @@ -242,11 +376,11 @@ static void remove_label(LabelList *ll, int index) --ll->length; } -static void adjust_from(FnState *fs, LabelKind kind) +static void adjust_from(FuncState *fs, LabelKind kind) { - Lex *lex = fs->lex; - BlkState *bs = fs->bs; - LabelList *ll = &lex->pm->ll; + Lex *lex = fs->G->lex; + BlockState *bs = fs->bs; + LabelList *ll = &lex->pm->labels; for (int i = bs->label0; i < ll->length;) { Label *lb = &ll->values[i]; if (lb->kind == kind) { @@ -258,12 +392,12 @@ static void adjust_from(FnState *fs, LabelKind kind) } } -static void adjust_to(FnState *fs, LabelKind kind, int to) +static void adjust_to(FuncState *fs, LabelKind kind, int to) { - Lex *lex = fs->lex; + Lex *lex = fs->G->lex; Proto *p = fs->proto; - BlkState *bs = fs->bs; - LabelList *ll = &lex->pm->ll; + BlockState *bs = fs->bs; + LabelList *ll = &lex->pm->labels; for (int i = bs->label0; i < ll->length;) { Label *lb = &ll->values[i]; if (lb->kind == kind) { @@ -276,29 +410,28 @@ static void adjust_to(FnState *fs, LabelKind kind, int to) } } -static void begin_local_scope(FnState *fs, int n) +static void begin_local_scope(FuncState *fs, int n) { - Lex *lex = fs->lex; - Scope *locals = &fs->locals; + LocalStack *locals = &fs->locals; for (int i = 0; i < n; ++i) { const int level = fs->level++; - Symbol *var = locals->symbols[level]; - add_debug_info(lex, var); + LocalSlot slot = locals->slots[level]; + add_debug_info(fs->G, slot.symbol); } } -static void end_local_scope(FnState *fs, BlkState *bs) +static void end_local_scope(FuncState *fs, BlockState *bs) { for (int i = fs->level - 1; i >= bs->level; --i) { local_info(fs, i)->pc1 = fs->pc; } - fs->locals.nsymbols = bs->level; + fs->locals.nslots = bs->level; fs->level = bs->level; } -static void leave_block(FnState *fs) +static void leave_block(FuncState *fs) { - BlkState *bs = fs->bs; + BlockState *bs = fs->bs; if (bs->is_loop) { adjust_from(fs, LBREAK); } @@ -311,10 +444,11 @@ static void leave_block(FnState *fs) pop_local_table(fs); } -static void enter_block(FnState *fs, BlkState *bs, Scope *locals, paw_Bool loop) +static void enter_block(FuncState *fs, BlockState *bs, Scope *locals, paw_Bool loop) { - bs->label0 = fs->lex->pm->ll.length; + bs->label0 = fs->G->lex->pm->labels.length; bs->level = fs->level; + bs->isymbol = 0; bs->is_loop = loop; bs->outer = fs->bs; fs->bs = bs; @@ -322,10 +456,11 @@ static void enter_block(FnState *fs, BlkState *bs, Scope *locals, paw_Bool loop) push_local_table(fs, locals); } -static void leave_function(Lex *lex) +static void leave_function(Generator *G) { - FnState *fs = lex->fs; - BlkState *bs = fs->bs; + Lex *lex = G->lex; + FuncState *fs = G->fs; + BlockState *bs = fs->bs; Proto *p = fs->proto; // end function-scoped locals @@ -333,8 +468,8 @@ static void leave_function(Lex *lex) paw_assert(fs->level == 0); paw_assert(bs->outer == NULL); - // TODO: Check for missing return, this won't work for functions that return a value - // It is an error if such a function is missing a return at the end + // TODO: Need a return at the end to handle cleaning up the stack + // Use a landing pad: all returns are just jumps to the landing pad pawK_code_0(fs, OP_PUSHUNIT); pawK_code_0(fs, OP_RETURN); @@ -351,23 +486,27 @@ static void leave_function(Lex *lex) pawM_shrink(env(lex), p->k, p->nk, fs->nk); p->nk = fs->nk; - lex->fs = fs->outer; + G->fs = fs->outer; check_gc(env(lex)); } -static String *context_name(const FnState *fs, FnKind kind) +static String *context_name(const FuncState *fs, FuncKind kind) { if (fn_has_self(kind)) { - return v_string(pawE_cstr(env(fs->lex), CSTR_SELF)); + return scan_string(fs->G->lex, "(self)"); } return fs->proto->name; } -static void enter_function(Lex *lex, FnState *fs, BlkState *bs, Scope *scope, FnKind kind) +static void enter_function(Generator *G, FuncState *fs, BlockState *bs, Scope *scope, FuncKind kind) { + Lex *lex = G->lex; + + fs->id = -1; // TODO: not used fs->bs = NULL; fs->scopes = (SymbolTable){0}; - fs->locals = (Scope){0}; + fs->locals = (LocalStack){0}; + fs->nstructs = 0; fs->ndebug = 0; fs->nproto = 0; fs->nlines = 0; @@ -377,110 +516,119 @@ static void enter_function(Lex *lex, FnState *fs, BlkState *bs, Scope *scope, Fn fs->pc = 0; fs->kind = kind; - fs->outer = lex->fs; - fs->lex = lex; - lex->fs = fs; + fs->outer = G->fs; + fs->G = G; + G->fs = fs; // Enter the function body. enter_block(fs, bs, scope, PAW_FALSE); - // Create the context variable in slot 0. For VCLOSURE, this slot holds the closure - // object being called. For VMETHOD, it holds the class instance that the method is - // being called on, i.e. the implicit 'self' parameter. - add_local(fs, context_name(fs, kind)); + transfer_local(fs); begin_local_scope(fs, 1); } -static paw_Bool resolve_global(Lex *lex, String *name, VarInfo *pinfo) +static paw_Bool resolve_global(Generator *G, String *name, VarInfo *pinfo) { int index; - Scope *globals = lex->pm->st.globals; + Lex *lex = G->lex; + Scope *globals = lex->pm->symbols.globals; Symbol *symbol = fetch_symbol(globals, name, &index); - pinfo->type = symbol->type; + pinfo->symbol = symbol; pinfo->kind = VAR_GLOBAL; pinfo->index = index; return PAW_TRUE; } -// Find an active local variable with the given 'name' -// Only consider locals that have been brought into scope, using begin_local_scope(). -static paw_Bool resolve_local(FnState *fs, String *name, VarInfo *pinfo) +static paw_Bool resolve_local(FuncState *fs, String *name, VarInfo *pinfo) { for (int i = fs->level - 1; i >= 0; --i) { - Symbol *var = fs->locals.symbols[i]; - if (pawS_eq(var->name, name)) { - pinfo->index = i; + LocalSlot slot = fs->locals.slots[i]; + if (pawS_eq(slot.symbol->name, name)) { + pinfo->symbol = slot.symbol; pinfo->kind = VAR_LOCAL; - pinfo->type = var->type; + pinfo->index = i; return PAW_TRUE; } } return PAW_FALSE; } -static VarInfo add_upvalue(FnState *fs, String *name, int index, Type *type, paw_Bool is_local) +static VarInfo find_var(Generator *G, String *name); + +static VarInfo resolve_attr(Generator *G, Type *type, String *name) +{ + paw_assert(y_is_adt(type)); + AstDecl *decl = get_decl(G, type->hdr.def); + StructDecl *struct_ = &decl->struct_; + Scope *scope = struct_->field_scope; + VarInfo info = (VarInfo){.kind = VAR_FIELD}; + info.index = pawP_find_symbol(scope, name); + if (info.index < 0) { + scope = struct_->method_scope; + info.index = pawP_find_symbol(scope, name); + info.kind = VAR_METHOD; + } + paw_assert(info.index >= 0); // found in last pass + info.symbol = scope->symbols[info.index]; + return info; +} + +static void add_upvalue(FuncState *fs, String *name, VarInfo *info, paw_Bool is_local) { Proto *f = fs->proto; for (int i = 0; i < fs->nup; ++i) { struct UpValueInfo up = f->u[i]; - if (up.index == index && up.is_local == is_local) { - return (VarInfo){ - .index = i, - .kind = VAR_UPVALUE, - .type = up.var.type, - }; + if (up.index == info->index && up.is_local == is_local) { + info->kind = VAR_UPVALUE; + info->index = i; + return; } } if (fs->nup == UPVALUE_MAX) { - limit_error(fs->lex, "upvalues", UPVALUE_MAX); + syntax_error(fs->G, "too many upvalues"); } else if (fs->nup == f->nup) { - pawM_grow(env(fs->lex), f->u, fs->nup, f->nup); + pawM_grow(env(fs->G->lex), f->u, fs->nup, f->nup); for (int i = fs->nup + 1; i < f->nup; ++i) { f->u[i].var = (VarDesc){0}; // clear for GC } } f->u[fs->nup] = (struct UpValueInfo){ - .var = {type, name}, + .var = {name, info->symbol->decl->hdr.def}, .is_local = is_local, - .index = index, - }; - return (VarInfo){ - .index = fs->nup++, - .kind = VAR_UPVALUE, - .type = type, + .index = info->index, }; + info->index = fs->nup++; + info->kind = VAR_UPVALUE; } -static paw_Bool resolve_upvalue(FnState *fs, String *name, VarInfo *pinfo) +static paw_Bool resolve_upvalue(FuncState *fs, String *name, VarInfo *pinfo) { - FnState *caller = fs->outer; + FuncState *caller = fs->outer; if (!caller) { // base case return PAW_FALSE; } // Check the caller's local variables. if (resolve_local(caller, name, pinfo)) { - const int local = pinfo->index; - caller->proto->v[local].captured = PAW_TRUE; - *pinfo = add_upvalue(fs, name, local, pinfo->type, PAW_TRUE); + caller->proto->v[pinfo->index].captured = PAW_TRUE; + add_upvalue(fs, name, pinfo, PAW_TRUE); return PAW_TRUE; } if (resolve_upvalue(caller, name, pinfo)) { - const int upvalue = pinfo->index; - *pinfo = add_upvalue(fs, name, upvalue, pinfo->type, PAW_FALSE); + add_upvalue(fs, name, pinfo, PAW_FALSE); return PAW_TRUE; } return PAW_FALSE; } -static VarInfo declare_var(FnState *fs, String *name, paw_Bool global) +static VarInfo declare_var(FuncState *fs, paw_Bool global) { - return global ? add_global(fs->lex, name) - : add_local(fs, name); + return global ? add_global(fs->G) + : transfer_local(fs); } // Allow a previously-declared variable to be accessed -static void define_var(FnState *fs, VarInfo info) +static void define_var(FuncState *fs, VarInfo info) { if (info.kind == VAR_LOCAL) { begin_local_scope(fs, 1); @@ -492,41 +640,53 @@ static void define_var(FnState *fs, VarInfo info) } } -static VarInfo new_var(Visitor *V, String *name, paw_Bool global) +static VarInfo code_var(Generator *G, paw_Bool global) { - FnState *fs = V->lex->fs; - VarInfo info = declare_var(fs, name, global); + FuncState *fs = G->fs; + VarInfo info = declare_var(fs, global); define_var(fs, info); return info; } -static VarInfo find_var(Visitor *V, String *name) +static VarInfo inject_var(FuncState *fs, String *name, AstDecl *decl, paw_Bool global) +{ + paw_assert(!global); + Symbol *symbol = pawA_new_symbol(fs->G->lex); + symbol->name = name; + symbol->decl = decl; + return add_local(fs, symbol); +} + +static VarInfo find_var(Generator *G, String *name) { VarInfo info; - Lex *lex = V->lex; - FnState *fs = lex->fs; + Lex *lex = G->lex; + FuncState *fs = G->fs; if (!resolve_local(fs, name, &info) && // not local !resolve_upvalue(fs, name, &info) && // not local to caller - !resolve_global(lex, name, &info)) { // not defined + !resolve_global(G, name, &info)) { // not found pawX_error(lex, "undefined variable '%s'", name->text); } return info; } -#define needs_conversion(a, b) ((a) != (b) && y_is_scalar(a) && y_is_scalar(b)) +static void push_self(FuncState *fs) +{ + paw_assert(fs->G->cs != NULL); + pawK_code_U(fs, OP_GETLOCAL, 0); +} -static void convert_bool(FnState *fs, Type *src) +static void push_super(FuncState *fs) { - if (!y_is_bool(src)) { - pawK_code_U(fs, OP_CASTBOOL, y_id(src)); - } + paw_assert(fs->G->cs != NULL); + pawK_code_U(fs, OP_GETUPVALUE, 0); } // Push a variable on to the stack -static void code_discharge(Visitor *V, VarExpr *e) +static void code_getter(AstVisitor *V, VarInfo info) { - FnState *fs = V->lex->fs; - const VarInfo info = find_var(V, e->name); + Generator *G = V->state.G; + FuncState *fs = G->fs; switch (info.kind) { case VAR_LOCAL: pawK_code_U(fs, OP_GETLOCAL, info.index); @@ -534,20 +694,27 @@ static void code_discharge(Visitor *V, VarExpr *e) case VAR_UPVALUE: pawK_code_U(fs, OP_GETUPVALUE, info.index); break; + case VAR_FIELD: + pawK_code_U(fs, OP_GETATTR, info.index); + break; + case VAR_METHOD: + // TODO: bind to receiver: this should not be called immediately, that must be + // handled separately so that OP_INVOKE can be generated + pawK_code_U(fs, OP_GETATTR, info.index); + break; default: paw_assert(info.kind == VAR_GLOBAL); pawK_code_U(fs, OP_GETGLOBAL, info.index); } } -static void code_assignment(Visitor *V, Expr *lhs, Expr *rhs) +static void code_setter(AstVisitor *V, AstExpr *lhs, AstExpr *rhs) { - FnState *fs = V->lex->fs; - - if (lhs->kind == EXPR_VAR) { - V->expr(V, rhs); // push RHS - VarExpr *e = cast_to(lhs, VarExpr); - const VarInfo info = find_var(V, e->name); + Generator *G = V->state.G; + FuncState *fs = G->fs; + if (a_kind(lhs) == EXPR_NAME) { + const VarInfo info = find_var(G, lhs->name.name); + V->visit_expr(V, rhs); switch (info.kind) { case VAR_LOCAL: pawK_code_U(fs, OP_SETLOCAL, info.index); @@ -562,80 +729,105 @@ static void code_assignment(Visitor *V, Expr *lhs, Expr *rhs) return; } - // index, range, or attr assignment - SuffixedExpr *base = cast_to(lhs, SuffixedExpr); // common base - V->expr(V, base->target); // push up to last expression - if (lhs->kind == EXPR_INDEX) { - const IndexExpr *last = cast_to(lhs, IndexExpr); - V->expr(V, last->first); - if (last->second) { - V->expr(V, last->second); - V->expr(V, rhs); - pawK_code_0(fs, OP_SETSLICE); - } else { - V->expr(V, rhs); - pawK_code_0(fs, OP_SETITEM); - } + // index or field assignment + SuffixedExpr *suf = &lhs->suffix; // common base + V->visit_expr(V, suf->target); // push up to last expression + if (a_kind(lhs) == EXPR_SELECTOR) { + V->visit_expr(V, rhs); + // resolve the field index + String *name = lhs->selector.name; + const VarInfo info = resolve_attr(G, a_type(suf->target), name); + pawK_code_U(fs, OP_SETATTR, info.index); } else { - paw_assert(lhs->kind == EXPR_ACCESS); - const AccessExpr *e = cast_to(lhs, AccessExpr); - V->expr(V, rhs); - pawK_code_U(fs, OP_SETATTR, e->index); + paw_assert(a_kind(lhs) == EXPR_INDEX); + visit_exprs(V, lhs->index.elems); + V->visit_expr(V, rhs); + pawK_code_0(fs, OP_SETITEM); } } -static void code_var_expr(Visitor *V, VarExpr *e) +static void code_ident_expr(AstVisitor *V, AstIdent *e) { - code_discharge(V, e); + Generator *G = V->state.G; + const VarInfo info = find_var(G, e->name); + code_getter(V, info); } -static void code_primitive_expr(Visitor *V, PrimitiveExpr *e) +static void code_basic_lit(AstVisitor *V, LiteralExpr *e) { - FnState *fs = V->lex->fs; - if (y_is_unit(e->type)) { + Generator *G = V->state.G; + FuncState *fs = G->fs; + if (e->basic.t == PAW_TUNIT) { pawK_code_0(fs, OP_PUSHUNIT); - } else if (y_is_bool(e->type)) { - pawK_code_0(fs, v_true(e->v) ? OP_PUSHTRUE : OP_PUSHFALSE); - } else { - const int k = add_constant(V->lex, e->v); + } else if (e->basic.t != PAW_TBOOL) { + const int k = add_constant(G, e->basic.value); pawK_code_U(fs, OP_PUSHCONST, k); + } else if (v_true(e->basic.value)) { + pawK_code_0(fs, OP_PUSHTRUE); + } else { + pawK_code_0(fs, OP_PUSHFALSE); } } -static void code_literal_expr(Visitor *V, LiteralExpr *e) +static void code_composite_lit(AstVisitor *V, LiteralExpr *lit) { - FnState *fs = V->lex->fs; - V->expr(V, e->expr); - add_local_literal(fs, e->label); + Generator *G = V->state.G; + FuncState *fs = G->fs; + CompositeLit *e = &lit->comp; + String *name = mangle_named_type(G, e->target->hdr.type); + VarInfo info = find_var(G, name); + + StructDecl *struct_ = &info.symbol->decl->struct_; + + code_getter(V, info); // get Struct + pawK_code_U(fs, OP_NEWINSTANCE, struct_->fields->count); + + AstExpr *attr = e->items->first; + while (attr != NULL) { + V->visit_expr(V, attr); + pawK_code_U(fs, OP_INITATTR, attr->item.index); + attr = attr->hdr.next; + } +} + +static void code_literal_expr(AstVisitor *V, LiteralExpr *e) +{ + switch (e->lit_kind) { + case LIT_BASIC: + code_basic_lit(V, e); + break; + default: + code_composite_lit(V, e); + } } -static void code_and(Visitor *V, LogicalExpr *e) +static void code_and(AstVisitor *V, LogicalExpr *e) { - FnState *fs = V->lex->fs; + Generator *G = V->state.G; + FuncState *fs = G->fs; - V->expr(V, e->lhs); - convert_bool(fs, e->lhs->type); + V->visit_expr(V, e->lhs); const int jump = code_jump(fs, OP_JUMPFALSE); pawK_code_0(fs, OP_POP); - V->expr(V, e->rhs); + V->visit_expr(V, e->rhs); patch_here(fs, jump); } -static void code_or(Visitor *V, LogicalExpr *e) +static void code_or(AstVisitor *V, LogicalExpr *e) { - FnState *fs = V->lex->fs; + Generator *G = V->state.G; + FuncState *fs = G->fs; - V->expr(V, e->lhs); - convert_bool(fs, e->lhs->type); + V->visit_expr(V, e->lhs); const int else_jump = code_jump(fs, OP_JUMPFALSE); const int then_jump = code_jump(fs, OP_JUMP); patch_here(fs, else_jump); pawK_code_0(fs, OP_POP); - V->expr(V, e->rhs); + V->visit_expr(V, e->rhs); patch_here(fs, then_jump); } -static void code_logical_expr(Visitor *V, LogicalExpr *e) +static void code_logical_expr(AstVisitor *V, LogicalExpr *e) { if (e->is_and) { code_and(V, e); @@ -644,236 +836,356 @@ static void code_logical_expr(Visitor *V, LogicalExpr *e) } } -static void code_chain_expr(Visitor *V, ChainExpr *e) +static void code_chain_expr(AstVisitor *V, ChainExpr *e) { - FnState *fs = V->lex->fs; + Generator *G = V->state.G; + FuncState *fs = G->fs; - V->expr(V, e->target); + V->visit_expr(V, e->target); const int else_jump = code_jump(fs, OP_JUMPNULL); const int then_jump = code_jump(fs, OP_JUMP); patch_here(fs, else_jump); - // Return the value on top, which is either 'null', or an instance that - // returned 'null' from its '__null' metamethod. pawK_code_0(fs, OP_RETURN); patch_here(fs, then_jump); } -static void code_cond_expr(Visitor *V, CondExpr *e) +static void code_cond_expr(AstVisitor *V, CondExpr *e) { - FnState *fs = V->lex->fs; + FuncState *fs = V->state.G->fs; - V->expr(V, e->cond); - convert_bool(fs, e->cond->type); + V->visit_expr(V, e->cond); const int else_jump = code_jump(fs, OP_JUMPFALSEPOP); - V->expr(V, e->lhs); + V->visit_expr(V, e->lhs); const int then_jump = code_jump(fs, OP_JUMP); patch_here(fs, else_jump); - V->expr(V, e->rhs); + // same type as 'e->lhs' + V->visit_expr(V, e->rhs); patch_here(fs, then_jump); } -static void code_coalesce_expr(Visitor *V, CoalesceExpr *e) -{ - FnState *fs = V->lex->fs; +#define code_op(fs, op, subop, type) pawK_code_AB(fs, op, cast(subop, int), basic_code(type)) - V->expr(V, e->lhs); - const int else_jump = code_jump(fs, OP_JUMPNULL); - const int then_jump = code_jump(fs, OP_JUMP); - patch_here(fs, else_jump); - pawK_code_0(fs, OP_POP); - V->expr(V, e->rhs); - patch_here(fs, then_jump); -} - -#define code_op(fs, op, subop, type) pawK_code_AB(fs, op, cast(subop, int), y_id(type)) - -static void code_unop_expr(Visitor *V, UnOpExpr *e) +static void code_unop_expr(AstVisitor *V, UnOpExpr *e) { - FnState *fs = V->lex->fs; + FuncState *fs = V->state.G->fs; - V->expr(V, e->target); - code_op(fs, OP_UNOP, e->op, e->target->type); + V->visit_expr(V, e->target); + code_op(fs, OP_UNOP, e->op, e->type); } -static void code_binop_mm(FnState *fs, FunctionType *sig, Type *lhs, Type *rhs) +static void code_binop_expr(AstVisitor *V, BinOpExpr *e) { - + FuncState *fs = V->state.G->fs; + V->visit_expr(V, e->lhs); + V->visit_expr(V, e->rhs); + code_op(fs, OP_BINOP, e->op, e->type); } -static void code_binop_expr(Visitor *V, BinOpExpr *e) +static void code_decl_stmt(AstVisitor *V, AstDeclStmt *s) { - FnState *fs = V->lex->fs; - - Type *lhs_type = e->lhs->type; - Type *rhs_type = e->rhs->type; - V->expr(V, e->lhs); - V->expr(V, e->rhs); - if (e->mm == NULL) { - paw_assert(pawY_is_same(lhs_type, rhs_type)); - code_op(fs, OP_BINOP, e->op, lhs_type); - } else { - code_binop_mm(fs, e->mm, lhs_type, rhs_type); - } + V->visit_decl(V, s->decl); } -static void code_expr_stmt(Visitor *V, ExprStmt *s) +static void code_expr_stmt(AstVisitor *V, AstExprStmt *s) { - FnState *fs = V->lex->fs; + FuncState *fs = V->state.G->fs; if (s->rhs != NULL) { - code_assignment(V, s->lhs, s->rhs); + code_setter(V, s->lhs, s->rhs); return; } - V->expr(V, s->lhs); // function call + V->visit_expr(V, s->lhs); // function call pawK_code_0(fs, OP_POP); // unused return value } -static void code_fn(Visitor *V, Function *fn) +static void code_func(AstVisitor *V, FuncDecl *d, FuncKind kind) { - BlkState bs; - FnState fs; - Function *outer = V->fn; - V->fn = fn; + Generator *G = V->state.G; - fs.name = fn->name; + FuncState fs; + BlockState bs; + fs.name = d->name; + fs.G = G; - Lex *lex = V->lex; - const int id = add_proto(lex, fn->name, &fs.proto); - fs.proto->argc = fn->nargs; - enter_function(lex, &fs, &bs, fn->scope, FN_FUNCTION); - V->stmt_list(V, fn->args); // code parameters - V->block_stmt(V, fn->body); // code function body - leave_function(lex); + FuncSig *func = &d->type->func; + const int id = add_proto(G, d->name, &fs.proto); + fs.proto->argc = func->params.count; + enter_function(G, &fs, &bs, d->scope, kind); + visit_decls(V, d->params); // code parameters + V->visit_block_stmt(V, d->body); // code function body + leave_function(G); - // Create, and allow access to, the closure object. - code_closure(lex->fs, fs.proto, id); - V->fn = outer; + code_closure(G->fs, fs.proto, id); } -static void code_attr_stmt(Visitor *V, AttrStmt *s) +static String *func_name(Generator *G, FuncDecl *func) { - Lex *lex = V->lex; - FnState *fs = lex->fs; + FuncSig *sig = &func->type->func; + if (!func->is_poly && sig->types.count > 0) { + // TODO: Consider mangling all symbol names, not just + // template instances. Mangle templates with their + // generic parameter names? + return mangle_name(G, func->name, &sig->types); + } + return func->name; +} - if (s->is_fn) { - code_fn(V, &s->fn); - } else { - pawK_code_0(fs, OP_PUSHUNIT); - } +static String *method_name(Generator *G, StructDecl *parent, FuncDecl *method) +{ + String *name = func_name(G, method); + return mangle_attr(G, parent->name, name); } -static void code_class_stmt(Visitor *V, ClassStmt *s) +static void register_fields(Generator *G, StructDecl *parent) { - ClsState cs; - Lex *lex = V->lex; - FnState *fs = lex->fs; - lex->cs = &cs; + AstDecl *fields = parent->fields->first; + parent->field_scope = pawM_new(env(G->lex), Scope); // TODO + for (int i = 0; i < parent->fields->count; ++i) { + FieldDecl *d = &fields->field; + Symbol *symbol = pawP_add_symbol(G->lex, parent->field_scope); + symbol->is_init = PAW_TRUE; + symbol->name = d->name; + symbol->decl = fields; + fields = d->next; + } +} + +static void register_methods(Generator *G, StructDecl *parent) +{ + AstDecl *methods = parent->methods->first; + parent->method_scope = pawM_new(env(G->lex), Scope); // TODO + for (int i = 0; i < parent->methods->count; ++i) { + AstDecl *decl = methods; + if (decl->hdr.next != NULL) { + paw_assert(decl->func.is_poly); + decl = decl->hdr.next; + } + while (decl != NULL) { + FuncDecl *d = &decl->func; + Symbol *symbol = pawP_add_symbol(G->lex, parent->method_scope); + symbol->is_init = PAW_TRUE; + symbol->name = func_name(G, d); + symbol->decl = decl; + decl = d->next; + } + methods = methods->func.sibling; + } +} - const VarInfo var = new_var(V, s->name, s->flags.global); - V->stmt_list(V, s->attrs); - pawK_code_U(fs, OP_NEWCLASS, y_id(var.type)); - lex->cs = cs.outer; +static void code_methods(AstVisitor *V, StructDecl *parent, Struct *struct_) +{ + Generator *G = V->state.G; + FuncState *fs = G->fs; + Scope *scope = parent->method_scope; + for (int i = 0; i < scope->nsymbols; ++i) { + Symbol *symbol = scope->symbols[i]; + FuncDecl *d = &symbol->decl->func; + code_func(V, d, FUNC_METHOD); + pawK_code_U(fs, OP_NEWMETHOD, i); + } + // Make room for the methods to be set at runtime. + if (parent->methods->count > 0) { + pawA_resize(env(G->lex), struct_->methods, cast_size(parent->methods->count)); + } } -static void code_item_stmt(Visitor *V, ItemStmt *s) +static void code_struct(AstVisitor *V, StructDecl *d) { - V->expr(V, s->value); + BlockState bs; + Generator *G = V->state.G; + Lex *lex = G->lex; + paw_Env *P = env(lex); + FuncState *fs = G->fs; + + Value *pv = pawC_push0(P); + Struct *struct_ = pawV_new_struct(P, pv); + const int index = add_struct(G, struct_); + + pawK_code_U(fs, OP_PUSHSTRUCT, index); + enter_block(fs, &bs, d->scope, PAW_FALSE); + + register_fields(G, d); + register_methods(G, d); + code_methods(V, d, struct_); + + leave_block(fs); // layout->scope + pawC_pop(P); // pop 'struct_' } -static void code_init_expr(Visitor *V, InitExpr *e) +// TODO: Never generate code for structs whose types contain free type variables +// Those structures are created +static void code_struct_template(AstVisitor *V, StructDecl *tmpl) { - V->expr(V, e->prefix); - Stmt *attr = e->attrs; - while (attr != NULL) { // TODO: That won't work! Fix me! - V->stmt(V, attr); - attr = attr->next; + Generator *G = V->state.G; + FuncState *fs = G->fs; + AstDecl *d = tmpl->next; + while (d != NULL) { + StructDecl *struct_ = &d->struct_; + String *name = mangle_named_type(G, struct_->type); + const VarInfo info = inject_var(fs, name, d, struct_->is_global); + define_var(fs, info); + code_struct(V, struct_); + d = struct_->next; } - Lex *lex = V->lex; - FnState *fs = lex->fs; - ClassType *cls = &e->prefix->type->cls; - pawK_code_U(fs, OP_INIT, cls->nattrs); } -static void code_block_stmt(Visitor *V, Block *bk) +static void code_field_decl(AstVisitor *V, FieldDecl *d) { - BlkState bs; - FnState *fs = V->lex->fs; - enter_block(fs, &bs, bk->scope, PAW_FALSE); - V->stmt_list(V, bk->stmts); - leave_block(fs); + code_var(V->state.G, PAW_FALSE); + paw_unused(d); } -static void code_param_stmt(Visitor *V, ParamStmt *s) +static void code_var_decl(AstVisitor *V, VarDecl *s) { - new_var(V, s->name, PAW_FALSE); + FuncState *fs = V->state.G->fs; + const VarInfo info = declare_var(fs, s->is_global); + V->visit_expr(V, s->init); + define_var(fs, info); } -static void code_def_stmt(Visitor *V, DefStmt *s) +static void code_struct_decl(AstVisitor *V, StructDecl *d) { - Lex *lex = V->lex; - FnState *fs = lex->fs; - const VarInfo info = declare_var(fs, s->name, s->flags.global); - V->expr(V, s->init); + if (d->is_poly) { + code_struct_template(V, d); + return; + } + FuncState *fs = V->state.G->fs; + const VarInfo info = declare_var(fs, d->is_global); define_var(fs, info); + code_struct(V, d); +} + +static void code_item_expr(AstVisitor *V, ItemExpr *e) +{ + V->visit_expr(V, e->value); +} + +static void code_block_stmt(AstVisitor *V, Block *b) +{ + BlockState bs; + FuncState *fs = V->state.G->fs; + enter_block(fs, &bs, b->scope, PAW_FALSE); + visit_stmts(V, b->stmts); + leave_block(fs); } -static void code_return_stmt(Visitor *V, ReturnStmt *s) +static void code_return_stmt(AstVisitor *V, ReturnStmt *s) { - Lex *lex = V->lex; - FnState *fs = lex->fs; - if (is_toplevel(lex)) { + Generator *G = V->state.G; + Lex *lex = G->lex; + FuncState *fs = G->fs; + if (is_toplevel(G)) { pawX_error(lex, "return from module is not allowed"); } - V->expr(V, s->expr); + V->visit_expr(V, s->expr); pawK_code_0(fs, OP_RETURN); } -static void code_call_expr(Visitor *V, CallExpr *e) +static paw_Bool is_method_call(const AstExpr *e) +{ + return a_kind(e) == EXPR_SELECTOR && e->selector.is_method; +} + +static paw_Bool is_instance_call(const Type *type) +{ + return type->func.types.count > 0; +} + +static void code_instance_getter(AstVisitor *V, Type *type) +{ + Generator *G = V->state.G; + AstDecl *decl = get_decl(G, type->hdr.def); + Binder binder = y_is_adt(type) ? type->adt.types : type->func.types; + String *name = mangle_name(G, decl->hdr.name, &binder); + const VarInfo info = find_var(G, name); + code_getter(V, info); +} + +static void code_call_expr(AstVisitor *V, CallExpr *e) { - FnState *fs = V->lex->fs; - V->expr(V, e->target); // callable + //const int invoke = start_call(G, e); + Generator *G = V->state.G; + FuncState *fs = G->fs; - // Code the function parameters. - Type *fn_type = e->target->type; - paw_assert(y_is_function(fn_type)); - for (Expr *arg = e->args; arg != NULL; arg = arg->next) { - Expr *param = cast_expr(arg); - V->expr(V, param); // code parameter + int invoke = -1; + if (e->func->hdr.def == NO_DECL) { + V->visit_expr(V, e->target); + } else if (is_instance_call(e->func)) { + code_instance_getter(V, e->func); + } else if (is_method_call(e->target)) { + AstDecl *decl = get_decl(G, e->func->hdr.def); + Selector *select = &e->target->selector; + Type *parent = a_type(decl->func.receiver); + String *name = func_name(G, &decl->func); + // emit code for the receiver and save the method index + V->visit_expr(V, select->target); + const VarInfo info = resolve_attr(G, parent, name); + invoke = info.index; + } else { + V->visit_expr(V, e->target); + } + + // push function arguments + visit_exprs(V, e->args); + + if (invoke < 0) { + pawK_code_U(fs, OP_CALL, e->args->count); + } else { + pawK_code_AB(fs, OP_INVOKE, invoke, e->args->count); } - pawK_code_U(fs, OP_CALL, e->nargs); } -static void code_fn_stmt(Visitor *V, FnStmt *s) +static void code_func_template(AstVisitor *V, FuncDecl *tmpl) { - code_fn(V, &s->fn); + Generator *G = V->state.G; + FuncState *fs = G->fs; + AstDecl *d = tmpl->next; + while (d != NULL) { + FuncDecl *func = &d->func; + String *name = func_name(G, func); + const VarInfo info = inject_var(fs, name, d, func->is_global); + code_func(V, func, FUNC_FUNCTION); + define_var(fs, info); + d = func->next; + } +} - // Associate the function object with a variable. - new_var(V, s->fn.name, s->flags.global); +static void code_func_decl(AstVisitor *V, FuncDecl *d) +{ + Generator *G = V->state.G; + if (d->is_poly) { + code_func_template(V, d); + return; + } + FuncState *fs = G->fs; + const VarInfo info = declare_var(fs, d->is_global); + code_func(V, d, FUNC_FUNCTION); + define_var(fs, info); } -static void code_ifelse_stmt(Visitor *V, IfElseStmt *s) +static void code_if_stmt(AstVisitor *V, IfStmt *s) { - FnState *fs = V->lex->fs; + Generator *G = V->state.G; + FuncState *fs = G->fs; - V->expr(V, s->cond); - convert_bool(fs, s->cond->type); + V->visit_expr(V, s->cond); const int else_jump = code_jump(fs, OP_JUMPFALSEPOP); - V->stmt(V, s->then_arm); + V->visit_stmt(V, s->then_arm); // NOTE: If there is no 'else' block, this will produce a NOOP jump. const int then_jump = code_jump(fs, OP_JUMP); patch_here(fs, else_jump); - V->stmt(V, s->else_arm); + V->visit_stmt(V, s->else_arm); patch_here(fs, then_jump); } -static void close_until_loop(FnState *fs) +static void close_until_loop(FuncState *fs) { - Lex *lex = fs->lex; - BlkState *bs = fs->bs; + Lex *lex = fs->G->lex; + BlockState *bs = fs->bs; while (bs->outer) { // Emit close/pop instructions, but don't end any lifetimes. Code // paths that doesn't hit this label may still need those locals. - BlkState *outer = bs->outer; + BlockState *outer = bs->outer; if (outer->is_loop) { close_vars(fs, bs->level); return; @@ -883,63 +1195,72 @@ static void close_until_loop(FnState *fs) pawX_error(lex, "label outside loop"); } -static void code_label_stmt(Visitor *V, LabelStmt *s) +static void code_label_stmt(AstVisitor *V, LabelStmt *s) { - FnState *fs = V->lex->fs; + Generator *G = V->state.G; + FuncState *fs = G->fs; close_until_loop(fs); // fix the stack add_label(fs, s->label); // emit a jump, to be patched later } -static void code_while_stmt(Visitor *V, WhileStmt *s) +static void code_while_stmt(AstVisitor *V, WhileStmt *s) { - FnState *fs = V->lex->fs; + Generator *G = V->state.G; + FuncState *fs = G->fs; + BlockState bs; + enter_block(fs, &bs, s->scope, PAW_TRUE); const int loop = fs->pc; - V->expr(V, s->cond); - convert_bool(fs, s->cond->type); + V->visit_expr(V, s->cond); const int jump = code_jump(fs, OP_JUMPFALSEPOP); - V->block_stmt(V, s->block); + V->visit_block_stmt(V, s->block); // Finish the loop. 'break' labels jump here, 'continue' labels back to right // before where the conditional expression was evaluated. code_loop(fs, OP_JUMP, loop); adjust_to(fs, LCONTINUE, loop); patch_here(fs, jump); + leave_block(fs); } -static void code_dowhile_stmt(Visitor *V, WhileStmt *s) +static void code_dowhile_stmt(AstVisitor *V, WhileStmt *s) { - FnState *fs = V->lex->fs; + Generator *G = V->state.G; + FuncState *fs = G->fs; + BlockState bs; + enter_block(fs, &bs, s->scope, PAW_TRUE); const int loop = fs->pc; - V->block_stmt(V, s->block); + V->visit_block_stmt(V, s->block); adjust_from(fs, LCONTINUE); - V->expr(V, s->cond); - convert_bool(fs, s->cond->type); + V->visit_expr(V, s->cond); // If the condition is false, jump over the instruction that moves control back // to the top of the loop. const int jump = code_jump(fs, OP_JUMPFALSEPOP); code_loop(fs, OP_JUMP, loop); patch_here(fs, jump); + leave_block(fs); } -static void code_forbody(Visitor *V, String *iname, Block *block, Op opinit, Op oploop) +static void code_forbody(AstVisitor *V, Block *block, Op opinit, Op oploop) { - BlkState bs; - FnState *fs = V->lex->fs; + BlockState bs; + Generator *G = V->state.G; + FuncState *fs = G->fs; // Emit OP_FOR*0, which either pushes the loop variables, or jumps over // the loop. const int jump = code_jump(fs, opinit); const int loop = fs->pc; + // Enter a scope for the loop variable: if it is captured in a closure, + // the upvalue must be closed at the end of the iteration. enter_block(fs, &bs, block->scope, PAW_FALSE); - add_local(fs, iname); - begin_local_scope(fs, 1); - V->block_stmt(V, block); - leave_block(fs); // close loop variable + code_var(G, PAW_FALSE); + V->visit_block_stmt(V, block); + leave_block(fs); // Continue statements jump here, right before the loop instruction. adjust_from(fs, LCONTINUE); @@ -947,33 +1268,37 @@ static void code_forbody(Visitor *V, String *iname, Block *block, Op opinit, Op patch_here(fs, jump); } -static void code_fornum_stmt(Visitor *V, ForStmt *s) +static void code_fornum_stmt(AstVisitor *V, ForStmt *s) { - FnState *fs = V->lex->fs; + Generator *G = V->state.G; ForNum *fornum = &s->fornum; - V->expr(V, fornum->begin); - V->expr(V, fornum->end); - V->expr(V, fornum->step); - begin_local_scope(fs, 3); - code_forbody(V, s->name, s->block, OP_FORNUM0, OP_FORNUM); + V->visit_expr(V, fornum->begin); + V->visit_expr(V, fornum->end); + V->visit_expr(V, fornum->step); + code_var(G, PAW_FALSE); + code_var(G, PAW_FALSE); + code_var(G, PAW_FALSE); + + code_forbody(V, s->block, OP_FORNUM0, OP_FORNUM); } -static void code_forin_stmt(Visitor *V, ForStmt *s) // TODO: forin would need to encode the type of object being iterated over. look into function call for loop? +static void code_forin_stmt(AstVisitor *V, ForStmt *s) // TODO: forin would need to encode the type of object being iterated over. look into function call for loop? { - FnState *fs = V->lex->fs; + Generator *G = V->state.G; ForIn *forin = &s->forin; - V->expr(V, forin->target); - add_local(fs, s->name); - begin_local_scope(fs, 2); - code_forbody(V, s->name, s->block, OP_FORIN0, OP_FORIN); + V->visit_expr(V, forin->target); + code_var(G, PAW_FALSE); + + code_forbody(V, s->block, OP_FORIN0, OP_FORIN); } -static void code_for_stmt(Visitor *V, ForStmt *s) +static void code_for_stmt(AstVisitor *V, ForStmt *s) { - BlkState bs; - FnState *fs = V->lex->fs; + BlockState bs; + Generator *G = V->state.G; + FuncState *fs = G->fs; enter_block(fs, &bs, s->scope, PAW_TRUE); if (s->kind == STMT_FORNUM) { code_fornum_stmt(V, s); @@ -983,97 +1308,111 @@ static void code_for_stmt(Visitor *V, ForStmt *s) leave_block(fs); } -static void code_array_expr(Visitor *V, ArrayExpr *e) -{ - FnState *fs = V->lex->fs; - - V->expr_list(V, e->items); - pawK_code_U(fs, OP_NEWARRAY, e->nitems); -} - -//static void code_map_expr(Visitor *V, MapExpr *e) +//static void code_array_expr(AstVisitor *V, ArrayExpr *e) +//{ +// FuncState *fs = G->fs; +// +// visit_exprs(V, e->items); +// pawK_code_U(fs, OP_NEWARRAY, e->nitems); +//} +// +//static void code_map_expr(AstVisitor *V, MapExpr *e) //{ -// V->expr_list(V, e->items); -// pawK_code_U(V->lex->fs, OP_NEWARRAY, e->items.size); +// visit_exprs(V, e->items); +// pawK_code_U(G->fs, OP_NEWARRAY, e->items.size); //} -static void code_index_expr(Visitor *V, IndexExpr *e) +static paw_Bool is_index_template(Generator *G, const Type *type) { - V->expr(V, e->target); - V->expr(V, e->first); - - Op op; - if (e->second != NULL) { - V->expr(V, e->second); - op = OP_GETSLICE; + AstDecl *decl = get_decl(G, type->hdr.def); + if (a_is_func_decl(decl)) { + return decl->func.is_poly; } else { - op = OP_GETITEM; + return decl->struct_.is_poly; } - pawK_code_AB(V->lex->fs, op, y_id(e->target->type), y_id(e->first->type)); } -static void code_access_expr(Visitor *V, AccessExpr *e) +static void code_index_expr(AstVisitor *V, Index *e) { - V->expr(V, e->target); - pawK_code_U(V->lex->fs, OP_GETATTR, e->index); + Generator *G = V->state.G; + const Type *target = a_type(e->target); + if (is_index_template(G, target)) { + code_instance_getter(V, e->type); + return; + } + V->visit_expr(V, e->target); + V->visit_expr(V, e->elems->first); + + const paw_Type tt = basic_code(target); + const paw_Type et = basic_code(a_type(e->elems->first)); + pawK_code_AB(G->fs, OP_GETITEM, tt, et); } -static void code_invoke_expr(Visitor *V, InvokeExpr *e) +// TODO: Need to handle a.b and a.b[c], where 'b' is a method (need to bind to 'self') +static void code_selector_expr(AstVisitor *V, Selector *e) { - FnState *fs = V->lex->fs; - V->expr(V, e->target); - - Expr *arg = e->args; - while (arg != NULL) { - V->expr(V, arg); - arg = arg->next; - } + Generator *G = V->state.G; + V->visit_expr(V, e->target); + const VarInfo info = resolve_attr(G, a_type(e->target), e->name); + pawK_code_U(G->fs, OP_GETATTR, info.index); +} - pawK_code_AB(fs, OP_INVOKE, e->index, e->nargs); +static void setup_pass(AstVisitor *V, Generator *G) +{ + const AstState state = {.G = G}; + pawA_visitor_init(V, G->ast, state); + V->visit_literal_expr = code_literal_expr; + V->visit_logical_expr = code_logical_expr; + V->visit_ident_expr = code_ident_expr; + V->visit_chain_expr = code_chain_expr; + V->visit_unop_expr = code_unop_expr; + V->visit_binop_expr = code_binop_expr; + V->visit_cond_expr = code_cond_expr; + V->visit_call_expr = code_call_expr; + V->visit_index_expr = code_index_expr; + V->visit_selector_expr = code_selector_expr; + V->visit_item_expr = code_item_expr; + V->visit_block_stmt = code_block_stmt; + V->visit_expr_stmt = code_expr_stmt; + V->visit_decl_stmt = code_decl_stmt; + V->visit_if_stmt = code_if_stmt; + V->visit_for_stmt = code_for_stmt; + V->visit_while_stmt = code_while_stmt; + V->visit_dowhile_stmt = code_dowhile_stmt; + V->visit_label_stmt = code_label_stmt; + V->visit_return_stmt = code_return_stmt; + V->visit_var_decl = code_var_decl; + V->visit_func_decl = code_func_decl; + V->visit_struct_decl = code_struct_decl; + V->visit_field_decl = code_field_decl; } +static void code_module(Generator *G) +{ + Lex *lex = G->lex; + ParseMemory *pm = lex->pm; + + FuncState fs; + BlockState bs; + fs.name = lex->modname; + fs.proto = lex->main->p; + + Scope *toplevel = pm->symbols.toplevel; + enter_function(G, &fs, &bs, toplevel, FUNC_MODULE); + + AstVisitor V; + setup_pass(&V, G); + pawA_visit(&V); + + leave_function(G); +} void p_generate_code(Lex *lex) { - Visitor V; - pawK_init_visitor(&V, lex); - V.primitive_expr = code_primitive_expr; - V.literal_expr = code_literal_expr; - V.logical_expr = code_logical_expr; - V.chain_expr = code_chain_expr; - V.cond_expr = code_cond_expr; - V.coalesce_expr = code_coalesce_expr; - V.unop_expr = code_unop_expr; - V.binop_expr = code_binop_expr; - V.var_expr = code_var_expr; - V.array_expr = code_array_expr; -// V.map_expr = code_map_expr; - V.access_expr = code_access_expr; - V.invoke_expr = code_invoke_expr; - V.index_expr = code_index_expr; - V.init_expr = code_init_expr; - V.return_stmt = code_return_stmt; - V.call_expr = code_call_expr; - V.param_stmt = code_param_stmt; - V.block_stmt = code_block_stmt; - V.class_stmt = code_class_stmt; - V.item_stmt = code_item_stmt; - V.attr_stmt = code_attr_stmt; - V.def_stmt = code_def_stmt; - V.fn_stmt = code_fn_stmt; - V.for_stmt = code_for_stmt; - V.while_stmt = code_while_stmt; - V.dowhile_stmt = code_dowhile_stmt; - V.label_stmt = code_label_stmt; - V.ifelse_stmt = code_ifelse_stmt; - V.expr_stmt = code_expr_stmt; - - BlkState bs; - FnState fs = { - .name = lex->modname, - .proto = lex->main->p, + Generator G = { + .lex = lex, + .ast = lex->pm->ast, + .sym = &lex->pm->symbols, }; - enter_function(lex, &fs, &bs, lex->pm->st.toplevel, FN_MODULE); - pawK_visit(&V, lex->ast); - leave_function(lex); + code_module(&G); } diff --git a/src/ctx.h b/src/ctx.h new file mode 100644 index 0000000..e69de29 diff --git a/src/debug.c b/src/debug.c index dd050d1..91130dd 100644 --- a/src/debug.c +++ b/src/debug.c @@ -63,7 +63,7 @@ const char *paw_binop_name(BinaryOp binop) } } -const char *paw_opcode_name(Op op) +const char *paw_op_name(Op op) { switch (op) { case OP_CASTBOOL: @@ -72,6 +72,8 @@ const char *paw_opcode_name(Op op) return "CASTINT"; case OP_CASTFLOAT: return "CASTFLOAT"; + case OP_PUSHSTRUCT: + return "PUSHSTRUCT"; case OP_PUSHUNIT: return "PUSHUNIT"; case OP_PUSHTRUE: @@ -80,6 +82,10 @@ const char *paw_opcode_name(Op op) return "PUSHFALSE"; case OP_PUSHCONST: return "PUSHCONST"; + case OP_COPY: + return "COPY"; + case OP_INITATTR: + return "INITATTR"; case OP_POP: return "POP"; case OP_CLOSE: @@ -90,10 +96,6 @@ const char *paw_opcode_name(Op op) return "RETURN"; case OP_CLOSURE: return "CLOSURE"; - case OP_GETSUPER: - return "GETSUPER"; - case OP_INVOKESUPER: - return "INVOKESUPER"; case OP_CALL: return "CALL"; case OP_INVOKE: @@ -120,8 +122,8 @@ const char *paw_opcode_name(Op op) return "GETUPVALUE"; case OP_SETUPVALUE: return "SETUPVALUE"; - case OP_NEWCLASS: - return "NEWCLASS"; + case OP_NEWINSTANCE: + return "NEWINSTANCE"; case OP_NEWMETHOD: return "NEWMETHOD"; case OP_NEWARRAY: @@ -155,6 +157,143 @@ const char *paw_opcode_name(Op op) } } +void paw_dump_opcode(OpCode opcode) +{ + switch (get_OP(opcode)) { + case OP_CASTBOOL: + printf("CASTBOOL\n"); + break; + case OP_CASTINT: + printf("CASTINT\n"); + break; + case OP_CASTFLOAT: + printf("CASTFLOAT\n"); + break; + case OP_PUSHSTRUCT: + printf("PUSHSTRUCT %d\n", get_U(opcode)); + break; + case OP_PUSHUNIT: + printf("PUSHUNIT\n"); + break; + case OP_PUSHTRUE: + printf("PUSHTRUE\n"); + break; + case OP_PUSHFALSE: + printf("PUSHFALSE\n"); + break; + case OP_PUSHCONST: + printf("PUSHCONST %d\n", get_U(opcode)); + break; + case OP_COPY: + printf("COPY\n"); + break; + case OP_INITATTR: + printf("INITATTR\n"); + break; + case OP_POP: + printf("POP\n"); + break; + case OP_CLOSE: + printf("CLOSE\n"); + break; + case OP_INIT: + printf("INIT\n"); + break; + case OP_RETURN: + printf("RETURN\n"); + break; + case OP_CLOSURE: + printf("CLOSURE\n"); + break; + case OP_CALL: + printf("CALL\n"); + break; + case OP_INVOKE: + printf("INVOKE\n"); + break; + case OP_JUMP: + printf("JUMP\n"); + break; + case OP_JUMPFALSE: + printf("JUMPFALSE\n"); + break; + case OP_JUMPFALSEPOP: + printf("JUMPFALSEPOP\n"); + break; + case OP_JUMPNULL: + printf("JUMPNULL\n"); + break; + case OP_GLOBAL: + printf("GLOBAL\n"); + break; + case OP_GETGLOBAL: + printf("GETGLOBAL: %d\n", get_U(opcode)); + break; + case OP_SETGLOBAL: + printf("SETGLOBAL: %d\n", get_U(opcode)); + break; + case OP_GETLOCAL: + printf("GETLOCAL: %d\n", get_U(opcode)); + break; + case OP_SETLOCAL: + printf("SETLOCAL: %d\n", get_U(opcode)); + break; + case OP_GETUPVALUE: + printf("GETUPVALUE: %d\n", get_U(opcode)); + break; + case OP_SETUPVALUE: + printf("SETUPVALUE: %d\n", get_U(opcode)); + break; + case OP_NEWINSTANCE: + printf("NEWINSTANCE\n"); + break; + case OP_NEWMETHOD: + printf("NEWMETHOD\n"); + break; + case OP_NEWARRAY: + printf("NEWARRAY\n"); + break; + case OP_NEWMAP: + printf("NEWMAP\n"); + break; + case OP_VARARG: + printf("VARARG\n"); + break; + case OP_FORNUM0: + printf("FORNUM0\n"); + break; + case OP_FORNUM: + printf("FORNUM\n"); + break; + case OP_FORIN0: + printf("FORIN0\n"); + break; + case OP_FORIN: + printf("FORIN\n"); + break; + case OP_UNOP: + printf("UNOP %s %d\n", paw_unop_name(get_A(opcode)), get_B(opcode)); + break; + case OP_BINOP: + printf("BINOP %s %d\n", paw_binop_name(get_A(opcode)), get_B(opcode)); + break; + case OP_GETATTR: + printf("GETATTR %d\n", get_U(opcode)); + break; + case OP_SETATTR: + printf("SETATTR %d\n", get_U(opcode)); + break; + case OP_GETITEM: + printf("GETITEM %d\n", get_U(opcode)); + break; + case OP_SETITEM: + printf("SETITEM %d\n", get_U(opcode)); + break; + default: + printf("???\n"); + } +} + void dump_aux(paw_Env *P, Proto *proto, Buffer *print) { const OpCode *pc = proto->source; @@ -165,26 +304,31 @@ void dump_aux(paw_Env *P, Proto *proto, Buffer *print) pawL_add_fstring(P, print, "' (%I bytes)\n", (paw_Int)proto->length); pawL_add_fstring(P, print, "constant(s) = %I, upvalue(s) = %I\n", (paw_Int)proto->nk, (paw_Int)proto->nup); for (int i = 0; pc != end; ++i) { - pawL_add_fstring(P, print, "%d %I %s", i, (paw_Int)(pc - proto->source), paw_opcode_name(get_OP(pc[0]))); + pawL_add_fstring(P, print, "%d %I %s", i, (paw_Int)(pc - proto->source), paw_op_name(get_OP(pc[0]))); const OpCode opcode = *pc++; switch (get_OP(opcode)) { case OP_UNOP: { - pawL_add_fstring(P, print, " ; type = %s", paw_unop_name(get_A(opcode))); + pawL_add_fstring(P, print, " ; op = %s", paw_unop_name(get_A(opcode))); break; } case OP_BINOP: { - pawL_add_fstring(P, print, " ; type = %s", paw_binop_name(get_A(opcode))); + pawL_add_fstring(P, print, " ; op = %s", paw_binop_name(get_A(opcode))); break; } case OP_CLOSE: { - pawL_add_fstring(P, print, " ; npop = %d, close = %d", get_A(opcode), get_B(opcode)); + pawL_add_fstring(P, print, " ; count = %d, close = %d", get_A(opcode), get_B(opcode)); break; } case OP_PUSHCONST: { - pawL_add_fstring(P, print, " ; id = %d", get_U(opcode)); + pawL_add_fstring(P, print, " ; k = %d", get_U(opcode)); + break; + } + + case OP_PUSHSTRUCT: { + pawL_add_fstring(P, print, " ; $ = %d", get_U(opcode)); break; } @@ -218,6 +362,16 @@ void dump_aux(paw_Env *P, Proto *proto, Buffer *print) break; } + case OP_GETATTR: { + pawL_add_fstring(P, print, " ; id = %d", get_U(opcode)); + break; + } + + case OP_SETATTR: { + pawL_add_fstring(P, print, " ; id = %d", get_U(opcode)); + break; + } + case OP_GETLOCAL: { pawL_add_fstring(P, print, " ; id = %d", get_U(opcode)); break; @@ -255,11 +409,6 @@ void dump_aux(paw_Env *P, Proto *proto, Buffer *print) break; } - case OP_NEWCLASS: { - pawL_add_fstring(P, print, " ; k = %d, superclass? %d", get_A(opcode), get_B(opcode)); - break; - } - case OP_NEWMETHOD: { pawL_add_fstring(P, print, " ; k = %d", get_U(opcode)); break; @@ -275,25 +424,25 @@ void dump_aux(paw_Env *P, Proto *proto, Buffer *print) } else { pawL_add_string(P, print, ""); } - pawL_add_fstring(P, print, "', nup = %I", (paw_Int)p->nup); + pawL_add_fstring(P, print, "', nupvalues = %I", (paw_Int)p->nup); break; } case OP_INVOKE: { const int id = get_A(opcode); - pawL_add_fstring(P, print, " ; id = %d, # params = %d", id, get_B(opcode)); + pawL_add_fstring(P, print, " ; id = %d, # nargs = %d", id, get_B(opcode)); break; } case OP_CALL: { - pawL_add_fstring(P, print, " ; # params = %d", get_U(opcode)); + pawL_add_fstring(P, print, " ; # nargs = %d", get_U(opcode)); break; } case OP_VARARG: { const int nfixed = get_U(opcode); const int npassed = paw_get_count(P) - 1; - pawL_add_fstring(P, print, " ; # argv = %d", npassed - nfixed); + pawL_add_fstring(P, print, " ; # nargs = %d", npassed - nfixed); break; } @@ -334,7 +483,6 @@ void paw_dump_source(paw_Env *P, Proto *proto) pawL_discard_result(P, &print); } -// TODO: Copy of code in error.c. Maybe merge error.c into this TU static int current_line(CallFrame *cf) { Proto *p = cf->fn->p; @@ -375,6 +523,24 @@ void paw_stacktrace(paw_Env *P) pawL_push_result(P, &buf); } +// TODO: rename paw_dump_locals +void paw_dump_stack(paw_Env *P) +{ + CallFrame *cf = P->main.next; + while (cf != NULL) { + const Proto *func = cf->fn->p; + printf("Frame: %s\n", func->name->text); + for (int i = 0; i < func->ndebug; ++i) { + const struct LocalInfo info = func->v[i]; + const char *capture = info.captured ? "*" : ""; + const String *name = info.var.name; + const paw_Type code = info.var.code; + printf(" %3d: %s%s (%d)\n", i, name->text, capture, code); + } + cf = cf->next; + } +} + void paw_dump_value(paw_Env *P, Value v, paw_Type type) { Buffer buf; diff --git a/src/debug.h b/src/debug.h index 39460b3..bfa8c69 100644 --- a/src/debug.h +++ b/src/debug.h @@ -7,7 +7,8 @@ #include "env.h" #include "paw.h" -const char *paw_opcode_name(Op op); +const char *paw_op_name(Op op); +void paw_dump_opcode(OpCode opcode); void paw_dump_source(paw_Env *P, Proto *proto); void paw_dump_stack(paw_Env *P); void paw_stacktrace(paw_Env *P); diff --git a/src/env.c b/src/env.c index 8bf66a1..bd1c027 100644 --- a/src/env.c +++ b/src/env.c @@ -21,7 +21,7 @@ CallFrame *pawE_extend_cf(paw_Env *P, StackPtr top) return cf; } -int pawE_new_global(paw_Env *P, String *name, Type *tag) +int pawE_new_global(paw_Env *P, String *name, paw_Type type) { struct GlobalVec *gv = &P->gv; // enforce uniqueness for (int i = 0; i < gv->size; ++i) { @@ -33,7 +33,7 @@ int pawE_new_global(paw_Env *P, String *name, Type *tag) const int i = gv->size++; GlobalVar *var = &gv->data[i]; var->desc.name = name; - var->desc.type = tag; + var->desc.code = type; v_set_0(&var->value); return i; } diff --git a/src/env.h b/src/env.h index 6eb3a07..42b0265 100644 --- a/src/env.h +++ b/src/env.h @@ -8,12 +8,61 @@ #include "opcode.h" #include "paw.h" #include "str.h" +#include "type.h" #include "value.h" #include #include struct Jump; // call.c +typedef enum DefKind { + DEF_VAR, + DEF_FUNC, + DEF_STRUCT, + DEF_FIELD, + DEF_TYPE, +} DefKind; + +typedef struct Definition Definition; + +#define PAWE_DEF_HEADER Type *type; \ + DefId id: 8; \ + DefKind kind: 8 +typedef struct DefHeader { + PAWE_DEF_HEADER; +} DefHeader; + +typedef struct VarDef { + PAWE_DEF_HEADER; + String *name; +} VarDef; + +typedef struct FuncDef { + PAWE_DEF_HEADER; + String *name; +} FuncDef; + +typedef struct TypeDef { + PAWE_DEF_HEADER; +} TypeDef; + +typedef struct AdtDef { + PAWE_DEF_HEADER; + paw_Bool is_struct: 1; + int nattrs; + Definition **attrs; +} AdtDef; + +typedef struct Definition { + union { + DefHeader hdr; + VarDef var; + FuncDef func; + TypeDef type; + AdtDef adt; + }; +} Definition; + #define CFF_C 1 #define CFF_ENTRY 2 @@ -33,10 +82,13 @@ typedef struct CallFrame { enum { CSTR_SELF, - CSTR_SUPER, - CSTR_INIT, CSTR_TRUE, CSTR_FALSE, + CSTR_UNIT, + CSTR_BOOL, + CSTR_INT, + CSTR_FLOAT, + CSTR_STRING, NCSTR, }; @@ -59,12 +111,22 @@ typedef struct paw_Env { StackRel bound; StackRel top; + // Array containing a definition for each program construct. Created during + // type checking, and kept around for RTTI purposes. + Definition *defs; + int ndefs; + Map *libs; Value object; - struct ModuleType *mod; - struct Instance *builtin[NOBJECTS]; + Module *mod; + Instance *builtin[NOBJECTS]; Value meta_keys[NMETAMETHODS]; - Value str_cache[NCSTR]; + + // Array of commonly-used strings. + String *str_cache[NCSTR]; + + // Contains an error message that is served when the system runs out of memory + // (a call to the 'alloc' field below returned NULL). Value mem_errmsg; // TODO: At some point, the globals should go into a struct called Module. Make @@ -87,11 +149,11 @@ typedef struct paw_Env { } paw_Env; CallFrame *pawE_extend_cf(paw_Env *P, StackPtr top); -int pawE_new_global(paw_Env *P, String *name, Type *tag); +int pawE_new_global(paw_Env *P, String *name, paw_Type type); GlobalVar *pawE_find_global(paw_Env *P, String *name); #define pawE_get_global(P, i) (&(P)->gv.data[i]) -static inline Value pawE_cstr(paw_Env *P, unsigned type) +static inline String *pawE_cstr(paw_Env *P, unsigned type) { paw_assert(type < NCSTR); return P->str_cache[type]; diff --git a/src/gc.c b/src/gc_aux.c similarity index 94% rename from src/gc.c rename to src/gc_aux.c index ad88630..0efd9db 100644 --- a/src/gc.c +++ b/src/gc_aux.c @@ -1,9 +1,8 @@ // Copyright (c) 2024, The paw Authors. All rights reserved. // This source code is licensed under the MIT License, which can be found in // LICENSE.md. See AUTHORS.md for a list of contributor names. -#include "gc.h" +#include "gc_aux.h" #include "array.h" -//#include "bigint.h" #include "env.h" #include "map.h" #include "mem.h" @@ -13,14 +12,50 @@ #include #include +// TODO: Using Boehm-Demers-Weiser GC for now +#include + +void pawG_init(paw_Env *P) +{ + paw_unused(P); +} + +void pawG_uninit(paw_Env *P) +{ + paw_unused(P); +} + +void pawG_collect(paw_Env *P) +{ + paw_unused(P); + P->gc_bytes = 0; +} + +void pawG_fix_object(paw_Env *P, Object *o) +{ + paw_unused(P); + paw_unused(o); +} + +void pawG_add_object(paw_Env *P, Object *o, ValueKind kind) +{ + paw_unused(P); + o->gc_kind = kind; +} + +void pawG_free_object(paw_Env *P, Object *o) +{ + paw_unused(P); + paw_unused(o); +} + +#if 0 #ifndef PAW_GC_LIMIT #define PAW_GC_LIMIT (1024 * 1024) #endif static void gc_trace_object(const char *msg, void *ptr) { - if (ptr == (void *)0x104406dc0) { - } #ifdef PAW_TRACE_GC fprintf(stdout, "(gc) %s: %p\n", msg, ptr); #else @@ -377,3 +412,4 @@ void pawG_free_object(paw_Env *P, Object *o) paw_assert(PAW_FALSE); } } +#endif // 0 diff --git a/src/gc.h b/src/gc_aux.h similarity index 100% rename from src/gc.h rename to src/gc_aux.h diff --git a/src/lex.c b/src/lex.c index e1b1a8b..c9ede0a 100644 --- a/src/lex.c +++ b/src/lex.c @@ -4,7 +4,7 @@ #include "lex.h" #include "auxlib.h" //#include "bigint.h" -#include "gc.h" +#include "gc_aux.h" #include "map.h" #include "mem.h" #include "parse.h" @@ -16,6 +16,9 @@ #include #define lex_error(x) pawX_error(x, "syntax error") +#define save_and_next(x) (save(x, (x)->c), next(x)) +#define is_eof(x) (cast((x)->c, uint8_t) == TK_END) +#define is_newline(x) ((x)->c == '\r' || (x)->c == '\n') static void add_location(paw_Env *P, Buffer *print, const String *s, int line) { @@ -39,48 +42,7 @@ void pawX_error(Lex *x, const char *fmt, ...) pawC_throw(x->P, PAW_ESYNTAX); } -static struct Token make_token(TokenKind kind) -{ - return (struct Token){ - .kind = kind, - }; -} - -static struct Token make_int(struct Lex *x) -{ - const Value v = x->P->top.p[-1]; - Token t = {.value = v, .kind = TK_INTEGER}; - //if (!i_is_small(t.value)) { - // // Anchor big integers in the strings table so they don't get collected. They - // // will eventually end up anchored in the constants table. Use the value - // // representation as an integer key (contains pointer info, so it is unique). TODO: Won't work: GC needs the type - // Value key; - // v_set_int(&key, v.i); - // Value *slot = pawH_action(x->P, x->strings, key, MAP_ACTION_CREATE); - // *slot = t.value; - //} - pawC_stkdec(x->P, 1); - return t; -} - -static struct Token make_float(struct Lex *x) -{ - const Value v = x->P->top.p[-1]; - pawC_stkdec(x->P, 1); - return (Token){.value = v, .kind = TK_FLOAT}; -} - -static struct Token make_string(struct Lex *x, TokenKind kind) -{ - ParseMemory *pm = x->pm; - struct Token t = make_token(kind); - String *s = pawX_scan_string(x, pm->scratch.data, cast_size(pm->scratch.size)); - v_set_object(&t.value, s); - pm->scratch.size = 0; - return t; -} - -static char next(struct Lex *x) +static char next_raw(struct Lex *x) { paw_Env *P = x->P; if (x->nchunk == 0) { @@ -91,11 +53,29 @@ static char next(struct Lex *x) --x->nchunk; ++x->chunk; } else { - x->c = (char)TK_END; + x->c = cast(TK_END, char); } return x->c; } +static void increment_line(struct Lex *x) +{ + paw_assert(ISNEWLINE(x->c)); + if (x->line == INT_MAX) { + pawX_error(x, "too many lines in module"); + } + ++x->line; +} + +static char next(struct Lex *x) +{ + char c = next_raw(x); + if (ISNEWLINE(c)) { + increment_line(x); + } + return c; +} + static void save(struct Lex *x, char c) { ParseMemory *pm = x->pm; @@ -103,9 +83,6 @@ static void save(struct Lex *x, char c) pm->scratch.data[pm->scratch.size++] = c; } -#define save_and_next(x) (save(x, (x)->c), next(x)) -#define is_eof(x) ((uint8_t)(x)->c == TK_END) - static paw_Bool test_next(struct Lex *x, char c) { if (x->c == c) { @@ -124,6 +101,39 @@ static paw_Bool test_next2(struct Lex *x, const char *c2) return PAW_FALSE; } +static struct Token make_token(TokenKind kind) +{ + return (struct Token){ + .kind = kind, + }; +} + +static struct Token make_int(struct Lex *x) +{ + const Value v = x->P->top.p[-1]; + Token t = {.value = v, .kind = TK_INTEGER}; + pawC_stkdec(x->P, 1); + return t; +} + +static struct Token make_float(struct Lex *x) +{ + const Value v = x->P->top.p[-1]; + pawC_stkdec(x->P, 1); + return (Token){.value = v, .kind = TK_FLOAT}; +} + +static struct Token make_string(struct Lex *x, TokenKind kind) +{ + ParseMemory *pm = x->pm; + struct CharVec *cv = &pm->scratch; + struct Token t = make_token(kind); + String *s = pawX_scan_string(x, cv->data, cast_size(cv->size)); + v_set_object(&t.value, s); + cv->size = 0; + return t; +} + static struct Token consume_name(struct Lex *x) { save_and_next(x); @@ -132,8 +142,9 @@ static struct Token consume_name(struct Lex *x) } struct Token t = make_string(x, TK_NAME); const String *s = v_string(t.value); - t.kind = s->flag > 0 ? (TokenKind)s->flag : t.kind; - if (s->length > PAW_NAME_MAX) { + if (s->flag > 0) { + t.kind = cast(s->flag, TokenKind); + } else if (s->length > PAW_NAME_MAX) { pawX_error(x, "name (%I chars) is too long", paw_cast_int(s->length)); } @@ -205,21 +216,6 @@ static int get_codepoint(struct Lex *x) HEXVAL(c[3]); // } -static void increment_line(struct Lex *x) -{ - paw_assert(ISNEWLINE(x->c)); - const char first = x->c; - if (x->line == INT_MAX) { - pawX_error(x, "too many lines in module"); - } - ++x->line; - - next(x); - if (ISNEWLINE(x->c) && x->c != first) { - next(x); - } -} - static struct Token consume_string(struct Lex *x) { const char quote = x->c; @@ -311,8 +307,8 @@ static struct Token consume_string(struct Lex *x) } else if (test_next(x, quote)) { return make_string(x, TK_STRING); } else if (ISNEWLINE(x->c)) { - save(x, x->c); // newlines allowed in string literals - increment_line(x); + // unescaped newlines allowed in string literals + save_and_next(x); } else if (consume_utf8(x)) { lex_error(x); } @@ -366,13 +362,10 @@ static void skip_block_comment(struct Lex *x) for (;;) { if (test_next(x, '*') && test_next(x, '/')) { break; - } else if (ISNEWLINE(x->c)) { - increment_line(x); } else if (is_eof(x)) { pawX_error(x, "missing end of block comment"); - } else { - next(x); } + next(x); } } @@ -383,126 +376,146 @@ static void skip_line_comment(struct Lex *x) } } -static Token advance(struct Lex *x) +static void skip_whitespace(Lex *x) { -#define T(kind) make_token(kind) - for (;;) { - x->pm->scratch.size = 0; - if (ISDIGIT(x->c)) { - return consume_number(x); - } else if (ISNAME(x->c)) { - return consume_name(x); - } - switch (x->c) { - case '\n': - case '\r': - increment_line(x); - break; - case ' ': - case '\f': - case '\t': - case '\v': - next(x); - break; - case '\'': - case '"': - return consume_string(x); - case '=': - next(x); - if (test_next(x, '=')) { - return T(TK_EQUALS2); - } - return T('='); - case '&': - next(x); - if (test_next(x, '&')) { - return T(TK_AMPER2); - } - return T('&'); - case '|': - next(x); - if (test_next(x, '|')) { - return T(TK_PIPE2); - } - return T('|'); - case '-': - next(x); - if (test_next(x, '>')) { - return T(TK_ARROW); - } - return T('-'); - case '?': - next(x); - if (test_next(x, '?')) { - return T(TK_QUESTION2); - } else if (test_next(x, ':')) { - return T(TK_ELVIS); - } - return T('?'); - case ':': - next(x); - if (test_next(x, ':')) { - return T(TK_COLON2); - } - return T(':'); - case '!': - next(x); - if (test_next(x, '=')) { - return T(TK_BANG_EQ); - } - return T('!'); - case '<': - next(x); - if (test_next(x, '<')) { - return T(TK_LESS2); - } else if (test_next(x, '=')) { - return T(TK_LESS_EQ); - } - return T('<'); - case '>': - next(x); - if (test_next(x, '>')) { - return T(TK_GREATER2); - } else if (test_next(x, '=')) { - return T(TK_GREATER_EQ); - } - return T('>'); - case '.': - save_and_next(x); // may be float + while (x->c == ' ' || x->c == '\t' || + x->c == '\f' || x->c == '\v' || + (is_newline(x) && !x->add_semi)) { + next(x); + } +} + +static Token advance(Lex *x) +{ +try_again: +#define T(kind) make_token(cast(kind, TokenKind)) + skip_whitespace(x); + + // cast to avoid sign extension + Token token = T(cast(x->c, uint8_t)); + paw_Bool semi = PAW_FALSE; + x->pm->scratch.size = 0; + switch (x->c) { + case '\n': + case '\r': + paw_assert(x->add_semi); + x->add_semi = PAW_FALSE; + next(x); // make progress + return T(';'); + case '\'': + case '"': + token = consume_string(x); + semi = PAW_TRUE; + break; + case ')': + case ']': + case '}': + next(x); + semi = PAW_TRUE; + break; + case '=': + next(x); + if (test_next(x, '=')) { + token = T(TK_EQUALS2); + } + break; + case '&': + next(x); + if (test_next(x, '&')) { + token = T(TK_AMPER2); + } + break; + case '|': + next(x); + if (test_next(x, '|')) { + token = T(TK_PIPE2); + } + break; + case '-': + next(x); + if (test_next(x, '>')) { + token = T(TK_ARROW); + } + break; + case '?': + next(x); + if (test_next(x, '?')) { + token = T(TK_QUESTION2); + } + break; + case ':': + next(x); + if (test_next(x, ':')) { + token = T(TK_COLON2); + } + break; + case '!': + next(x); + if (test_next(x, '=')) { + token = T(TK_BANG_EQ); + } + break; + case '<': + next(x); + if (test_next(x, '<')) { + token = T(TK_LESS2); + } else if (test_next(x, '=')) { + token = T(TK_LESS_EQ); + } + break; + case '>': + next(x); + if (test_next(x, '>')) { + token = T(TK_GREATER2); + } else if (test_next(x, '=')) { + token = T(TK_GREATER_EQ); + } + break; + case '.': + save_and_next(x); // may be float + if (test_next(x, '.')) { if (test_next(x, '.')) { - if (test_next(x, '.')) { - return T(TK_DOT3); - } - lex_error(x); // '..' not allowed - } else if (ISDIGIT(x->c)) { - return consume_number(x); - } - return T('.'); - case '/': - next(x); - if (test_next(x, '/')) { - skip_line_comment(x); - break; - } else if (test_next(x, '*')) { - skip_block_comment(x); - break; + token = T(TK_DOT3); } - return T('/'); - default: { - // Cast to uint8_t first, so we don't get sign extension when converting - // to TokenKind. Otherwise, TK_END ends up with the wrong value. - const uint8_t c = (uint8_t)x->c; + lex_error(x); // '..' not allowed + } else if (ISDIGIT(x->c)) { + token = consume_number(x); + semi = PAW_TRUE; + } + break; + case '/': + next(x); + if (test_next(x, '/')) { // TODO: comments may need consideration wrt. auto ';' insertion + skip_line_comment(x); + goto try_again; + } else if (test_next(x, '*')) { + skip_block_comment(x); + goto try_again; + } + break; + default: { + if (ISDIGIT(x->c)) { + token = consume_number(x); + semi = PAW_TRUE; + } else if (ISNAME(x->c)) { + token = consume_name(x); + semi = token.kind == TK_NAME || + token.kind == TK_RETURN || + token.kind == TK_BREAK || + token.kind == TK_CONTINUE; + } else { next(x); - return T(c); } } } + x->add_semi = semi; + return token; #undef T } TokenKind pawX_next(struct Lex *x) { - x->lastline = x->line; + x->last_line = x->line; const TokenKind kind = pawX_peek(x); x->t = x->t2; x->t2.kind = TK_NONE; diff --git a/src/lex.h b/src/lex.h index acf10da..8990f56 100644 --- a/src/lex.h +++ b/src/lex.h @@ -22,7 +22,6 @@ enum MultiChar { TK_DOT3, TK_QUESTION2, TK_COLON2, - TK_ELVIS, TK_LESS2, TK_GREATER2, TK_AMPER2, @@ -41,8 +40,8 @@ enum MultiChar { // Keywords (must be in this order): TK_FN, - TK_CLASS, - TK_SUPER, + TK_TYPE, + TK_STRUCT, TK_GLOBAL, TK_LET, TK_IF, @@ -67,14 +66,10 @@ typedef struct Token { typedef struct Lex { paw_Env *P; - struct ClsState *cs; - struct FnState *fs; Map *strings; String *modname; Closure *main; - struct Tree *ast; - struct ModuleType *mod; paw_Reader input; const char *chunk; @@ -88,11 +83,13 @@ typedef struct Lex { Token t; Token t2; + void *ud; + int line; - int lastline; - int fn_depth; + int last_line; + int expr_depth; - void *ud; + paw_Bool add_semi; } Lex; #define x_base_type(x, t) ((x)->mod->types[t]) diff --git a/src/lib.c b/src/lib.c index 7996577..8aaa24c 100644 --- a/src/lib.c +++ b/src/lib.c @@ -3,7 +3,7 @@ #include "array.h" #include "auxlib.h" #include "call.h" -#include "gc.h" +#include "gc_aux.h" #include "map.h" #include "mem.h" #include "os.h" @@ -15,6 +15,20 @@ #define cf_base(i) P->cf->base.p[i] +void lib_error(paw_Env *P, int error, const char *fmt, ...) +{ + Buffer print; + pawL_init_buffer(P, &print); + + va_list arg; + va_start(arg, fmt); + pawL_add_vfstring(P, &print, fmt, arg); + va_end(arg); + + pawL_push_result(P, &print); + pawC_throw(P, error); +} + static int get_argc(paw_Env *P) { return paw_get_count(P) - 1 /* context */; @@ -387,215 +401,198 @@ static int string_clone(paw_Env *P) // return 1; //} -static int count_types(paw_Env *P, paw_Type *ts) +#define L_MAX_SIZE 256 + +typedef struct pawL_Property { + const char *name; + paw_Type type; +} pawL_Property; + +typedef struct pawL_Signature { + paw_Type *params; + paw_Type return_; + int ngenerics; +} pawL_Signature; + +typedef struct pawL_Layout { + const char *name; + paw_Type super; + int ngenerics; + int nfields; + int nmethods; +} pawL_Layout; + +// +// struct A[T] { +// a: A[int] // ?? +// b: A[T] +// c: A // sugar for A[T] +// } +// +// A[T] = declare_struct(A, 1) +// A[int] = instantiate_struct(A, int) +// A[T] = { +// a: A[int], +// } + +typedef struct pawL_GenericCtx { + int ngenerics; +} pawL_GenericCtx; + +paw_Type pawL_new_generic_type(paw_Env *P, pawL_GenericCtx *ctx); +paw_Type pawL_new_func_type(paw_Env *P, paw_Type *pars, paw_Type return_, int ngenerics); +paw_Type pawL_new_struct_type(paw_Env *P, pawL_Layout *layout); +void pawL_new_func(paw_Env *P, paw_Function func, int nup); +void pawL_new_struct(paw_Env *P, paw_Function *methods); + +paw_Type pawL_instantiate_func(paw_Env *P, paw_Type base, paw_Type *types); +paw_Type pawL_instantiate_struct(paw_Env *P, paw_Type base, paw_Type *types); + +// Create a global symbol and bind to it the value on top of the stack +int pawL_new_global(paw_Env *P, const char *name, paw_Type type); + +paw_Type pawL_bind_method(paw_Env *P, paw_Function func, int index); + +static void create_type_vars(paw_Env *P, int ngenerics, Binder *binder) +{ + if (ngenerics > L_GENERIC_MAX) { + lib_error(P, PAW_EOVERFLOW, "too many generics"); + } + binder->types = pawM_new_vec(P, ngenerics, Type *); + binder->count = ngenerics; + for (int i = 0; i < ngenerics; ++i) { + Type *type = pawY_type_new(P, P->mod); + type->var.kind = TYPE_VAR; + type->var.def = 11111; // TODO: Create a global def + type->var.index = i; + type->var.depth = 0; + binder->types[i] = type; + } +} + +static Type *resolve_type(paw_Env *P, paw_Type type, Binder *generics) { - int nts = 0; - for (paw_Type *t = ts; *t >= 0; ++t) { - if (nts == ARGC_MAX) { - pawC_throw(P, 123); // TODO + if (type >= 0) { + if (type >= P->mod->ntypes) { + lib_error(P, PAW_ETYPE, "unrecognized type"); } - ++nts; + return P->mod->types[type]; + } else if (type < l_generic(generics->count)) { + lib_error(P, PAW_ETYPE, "invalid generic parameter"); } - return nts; + type = l_generic(type); + return generics->types[type]; } -static Type *register_native(paw_Env *P, paw_Type *argt, paw_Type ret) +static Type *register_native(paw_Env *P, paw_Type *pars, paw_Type return_, int ngenerics) { - const int nargs = count_types(P, argt); - Type **args = NULL; - if (nargs > 0) { - args = pawM_new_vec(P, nargs, Type *); - for (int i = 0; i < nargs; ++i) { - args[i] = P->mod->types[argt[i]]; + int n = 0; + Type *buffer[ARGC_MAX]; + Type *r = pawY_type_new(P, P->mod); + r->func.kind = TYPE_FUNC; + create_type_vars(P, ngenerics, &r->func.types); + + // Validate the parameter and return types. + for (; *pars != L_LIST_END; ++n, ++pars) { + if (n == L_PARAM_MAX) { + lib_error(P, PAW_EOVERFLOW, "too many parameters"); } - } - Type type = {0}; - type.sig.kind = TYPE_SIGNATURE; - type.sig.ret = P->mod->types[ret]; - type.sig.args = args; - type.sig.nargs = nargs; - - // register function signature - return pawY_add_type(P, P->mod, &type); + buffer[n] = resolve_type(P, *pars, &r->func.types); + } + + r->func.params.count = n; + r->func.params.types = pawM_new_vec(P, n, Type *); + memcpy(r->func.params.types, buffer, cast_size(n) * sizeof(buffer[0])); + r->func.return_ = resolve_type(P, return_, &r->func.types); + return r; } struct MethodDesc { const char *name; paw_Function func; - paw_Type *args; + paw_Type *pars; paw_Type ret; }; -#define count_list(L, pn) for (; (L) != NULL; ++(L), ++*(pn)) +#define count_list(L, n) for (; (L) != NULL; ++(L), ++(n)); -#define push_builtin_class(a, b, c, d) push_builtin_class_aux(a, b, c, d, paw_countof(d)) -static void push_builtin_class_aux(paw_Env *P, paw_Type base, const char *name, struct MethodDesc *mds, int nattrs) +// TODO: Mangle template instance names to disambiguate between instances of the same template +int pawL_new_global(paw_Env *P, const char *name, paw_Type type) { - Type t = {0}; - - // register method types and attach to class object - t.cls.attrs = pawM_new_vec(P, nattrs, NamedField); - for (int i = 0; i < nattrs; ++i) { - struct MethodDesc *md = &mds[i]; - Type *tag = register_native(P, md->args, md->ret); - String *str = pawS_new_str(P, md->name); - Native *nat = pawV_new_native(P, str, md->func); - t.cls.attrs[i].name = str; - t.cls.attrs[i].type = tag; - } - - // register class type - String *str = pawS_new_str(P, name); - Type *tag = pawY_add_type(P, P->mod, &t); - - // set global variable - const int g = pawE_new_global(P, str, tag); + String *gname = pawS_new_str(P, name); + const int g = pawE_new_global(P, gname, type); GlobalVar *var = pawE_get_global(P, g); -// v_set_object(&var->value, t.cls); -// var->desc.type = tag; - -// P->builtin[base] = v_class(P->top.p[-1]); -// pawC_pop(P); + var->value = P->top.p[-1]; + pawC_pop(P); + return g; } -static void attach_method(paw_Env *P, Value *pv, struct MethodDesc *m) +paw_Type pawL_new_func_type(paw_Env *P, paw_Type *pars, paw_Type return_, int ngenerics) { - Type *tag = register_native(P, m->args, m->ret); - String *str = pawS_new_str(P, m->name); - Native *nat = pawV_new_native(P, str, m->func); - v_set_object(pv, nat); + Type *type = register_native(P, pars, return_, ngenerics); + return type->hdr.def; } -void pawL_bind_method(paw_Env *P, int index, const char *name, paw_Function func, paw_Type *argt, paw_Type ret) +void pawL_new_func(paw_Env *P, paw_Function func, int nup) { - Foreign *fr = v_foreign(P->top.p[-1]); - attach_method(P, &fr->attrs[index], &(struct MethodDesc){ - .name = name, - .func = func, - .args = argt, - .ret = ret, - }); + Native *nat = pawV_new_native(P, func, nup); // TODO: take upvalues off top of stack + pawC_pusho(P, cast_object(nat)); } -void pawL_register_function(paw_Env *P, const char *name, paw_Function func, paw_Type *argt, paw_Type ret) -{ - Type *tag = register_native(P, argt, ret); - String *str = pawS_new_str(P, name); - Native *nat = pawV_new_native(P, str, func); - const int g = pawE_new_global(P, str, tag); - GlobalVar *var = pawE_get_global(P, g); - v_set_object(&var->value, nat); - var->desc.type = tag; -} +//paw_Type pawL_new_struct_type(paw_Env *P, pawL_Class *struct_) +//{ +// Type *type = pawY_new_type(P, P->mod); +// type->cls.name = pawS_new_str(P, struct_->name); +// type->cls.super = type_at(P, struct_->super); +// type->cls.nfields = register_props(P, struct_->fields, &type->cls.fields); +// type->cls.nmethods = register_props(P, struct_->methods, &type->cls.methods); +// +// Value *pv = pawC_push0(P); // anchor here +// Class *c = pawV_new_struct(P, pv); +// +// const int g = pawE_new_global(P, type->hdr.name, type); +// GlobalVar *var = pawE_get_global(P, g); +// v_set_object(&var->value, c); +// var->desc.type = type; +// pawC_pop(P); // pop 'c' +//} +// +//paw_Type pawL_bind_method(paw_Env *P, int g, paw_Function func, int index) +//{ +// GlobalVar *var = pawE_get_global(P, g); +// Class *struct_ = v_struct(var->value); +// Value *pmethod = pawA_get(P, struct_->methods, index); +// v_set_object(pmethod, func); +//} + +#define reset_ctx(p) ((p)->ngenerics = 0) void pawL_init(paw_Env *P) { - pawL_register_function(P, "assert", base_assert, t_list_1(PAW_TBOOL), PAW_TUNIT); - pawL_register_function(P, "print", base_print, t_list_1(PAW_TSTRING), PAW_TUNIT); -// pawL_register_function(P, "to_float", base_to_float_s, t_list_1(PAW_TSTRING), PAW_TFLOAT); -// pawL_register_function(P, "to_float", base_to_float_i, t_list_1(PAW_TINT), PAW_TFLOAT); -// pawL_register_function(P, "to_float", base_to_float_f, t_list_1(PAW_TFLOAT), PAW_TFLOAT); -// pawL_register_function(P, "to_bool", base_to_bool_s, t_list_1(PAW_TSTRING), PAW_TBOOL); -// pawL_register_function(P, "to_bool", base_to_bool_i, t_list_1(PAW_TINT), PAW_TBOOL); -// pawL_register_function(P, "to_bool", base_to_bool_f, t_list_1(PAW_TFLOAT), PAW_TBOOL); -// pawL_register_function(P, "to_int", base_to_int_s, t_list_1(PAW_TSTRING), PAW_TINT); -// pawL_register_function(P, "to_int", base_to_int_i, t_list_1(PAW_TINT), PAW_TINT); -// pawL_register_function(P, "to_int", base_to_int_f, t_list_1(PAW_TFLOAT), PAW_TINT); - -// pawL_register_function(P, "try", base_try, t_list_1(PAW_TFUNCTION), PAW_TINT); -// pawL_register_function(P, "require", base_require, t_list_1(PAW_TSTRING), PAW_TMODULE); -// pawL_register_function(P, "load", base_load, t_list_1(PAW_TSTRING), PAW_TFUNCTION); -// pawL_register_function(P, "ord", base_ord, t_list_1(PAW_TSTRING), PAW_TINT); -// pawL_register_function(P, "chr", base_chr, t_list_1(PAW_TINT), PAW_TSTRING); - -//#define PAW_TSELF 0 -// struct MethodDesc kBigIntDesc[] = { -// {"__eq", pawB_eq_bi, t_list_2(PAW_TSELF, PAW_TSELF), PAW_TBOOL}, -// {"__eq", pawB_eq_i, t_list_2(PAW_TSELF, PAW_TINT), PAW_TBOOL}, -// {"__eq", pawB_eq_f, t_list_2(PAW_TSELF, PAW_TFLOAT), PAW_TBOOL}, -// {"__ne", pawB_ne_bi, t_list_2(PAW_TSELF, PAW_TSELF), PAW_TBOOL}, -// {"__ne", pawB_ne_i, t_list_2(PAW_TSELF, PAW_TINT), PAW_TBOOL}, -// {"__ne", pawB_ne_f, t_list_2(PAW_TSELF, PAW_TFLOAT), PAW_TBOOL}, -// {"__lt", pawB_lt_bi, t_list_2(PAW_TSELF, PAW_TSELF), PAW_TBOOL}, -// {"__lt", pawB_lt_i, t_list_2(PAW_TSELF, PAW_TINT), PAW_TBOOL}, -// {"__lt", pawB_lt_f, t_list_2(PAW_TSELF, PAW_TFLOAT), PAW_TBOOL}, -// {"__le", pawB_le_bi, t_list_2(PAW_TSELF, PAW_TSELF), PAW_TBOOL}, -// {"__le", pawB_le_i, t_list_2(PAW_TSELF, PAW_TINT), PAW_TBOOL}, -// {"__le", pawB_le_f, t_list_2(PAW_TSELF, PAW_TFLOAT), PAW_TBOOL}, -// {"__gt", pawB_gt_bi, t_list_2(PAW_TSELF, PAW_TSELF), PAW_TBOOL}, -// {"__gt", pawB_gt_i, t_list_2(PAW_TSELF, PAW_TINT), PAW_TBOOL}, -// {"__gt", pawB_gt_f, t_list_2(PAW_TSELF, PAW_TFLOAT), PAW_TBOOL}, -// {"__ge", pawB_ge_bi, t_list_2(PAW_TSELF, PAW_TSELF), PAW_TBOOL}, -// {"__ge", pawB_ge_i, t_list_2(PAW_TSELF, PAW_TINT), PAW_TBOOL}, -// {"__ge", pawB_ge_f, t_list_2(PAW_TSELF, PAW_TFLOAT), PAW_TBOOL}, -// {"__add", pawB_add_bi, t_list_2(PAW_TSELF, PAW_TSELF), PAW_TSELF}, -// {"__add", pawB_add_i, t_list_2(PAW_TSELF, PAW_TINT), PAW_TSELF}, -// {"__add", pawB_add_f, t_list_2(PAW_TSELF, PAW_TFLOAT), PAW_TFLOAT}, -// {"__sub", pawB_sub_bi, t_list_2(PAW_TSELF, PAW_TSELF), PAW_TSELF}, -// {"__sub", pawB_sub_i, t_list_2(PAW_TSELF, PAW_TINT), PAW_TSELF}, -// {"__sub", pawB_sub_f, t_list_2(PAW_TSELF, PAW_TFLOAT), PAW_TFLOAT}, -// {"__mul", pawB_mul_bi, t_list_2(PAW_TSELF, PAW_TSELF), PAW_TSELF}, -// {"__mul", pawB_mul_i, t_list_2(PAW_TSELF, PAW_TINT), PAW_TSELF}, -// {"__mul", pawB_mul_f, t_list_2(PAW_TSELF, PAW_TFLOAT), PAW_TFLOAT}, -// {"__div", pawB_div_bi, t_list_2(PAW_TSELF, PAW_TSELF), PAW_TSELF}, -// {"__div", pawB_div_i, t_list_2(PAW_TSELF, PAW_TINT), PAW_TSELF}, -// {"__div", pawB_div_f, t_list_2(PAW_TSELF, PAW_TFLOAT), PAW_TFLOAT}, -// {"__mod", pawB_mod_bi, t_list_2(PAW_TSELF, PAW_TSELF), PAW_TSELF}, -// {"__mod", pawB_mod_i, t_list_2(PAW_TSELF, PAW_TINT), PAW_TSELF}, -// {"__mod", pawB_mod_f, t_list_2(PAW_TSELF, PAW_TFLOAT), PAW_TFLOAT}, -// {"__bxor", pawB_bxor_bi, t_list_2(PAW_TSELF, PAW_TSELF), PAW_TSELF}, -// {"__bxor", pawB_bxor_i, t_list_2(PAW_TSELF, PAW_TINT), PAW_TSELF}, -// {"__band", pawB_band_bi, t_list_2(PAW_TSELF, PAW_TSELF), PAW_TSELF}, -// {"__band", pawB_band_i, t_list_2(PAW_TSELF, PAW_TINT), PAW_TSELF}, -// {"__bor", pawB_bor_bi, t_list_2(PAW_TSELF, PAW_TSELF), PAW_TSELF}, -// {"__bor", pawB_bor_i, t_list_2(PAW_TSELF, PAW_TINT), PAW_TSELF}, -// {"__shl", pawB_shl_bi, t_list_2(PAW_TSELF, PAW_TSELF), PAW_TSELF}, -// {"__shl", pawB_shl_i, t_list_2(PAW_TSELF, PAW_TINT), PAW_TSELF}, -// {"__shr", pawB_shr_bi, t_list_2(PAW_TSELF, PAW_TSELF), PAW_TSELF}, -// {"__shr", pawB_shr_i, t_list_2(PAW_TSELF, PAW_TINT), PAW_TSELF}, -// {"__radd", pawB_radd_bi, t_list_2(PAW_TSELF, PAW_TSELF), PAW_TSELF}, -// {"__radd", pawB_radd_i, t_list_2(PAW_TSELF, PAW_TINT), PAW_TSELF}, -// {"__radd", pawB_radd_f, t_list_2(PAW_TSELF, PAW_TFLOAT), PAW_TFLOAT}, -// {"__rsub", pawB_rsub_bi, t_list_2(PAW_TSELF, PAW_TSELF), PAW_TSELF}, -// {"__rsub", pawB_rsub_i, t_list_2(PAW_TSELF, PAW_TINT), PAW_TSELF}, -// {"__rsub", pawB_rsub_f, t_list_2(PAW_TSELF, PAW_TFLOAT), PAW_TFLOAT}, -// {"__rmul", pawB_rmul_bi, t_list_2(PAW_TSELF, PAW_TSELF), PAW_TSELF}, -// {"__rmul", pawB_rmul_i, t_list_2(PAW_TSELF, PAW_TINT), PAW_TSELF}, -// {"__rmul", pawB_rmul_f, t_list_2(PAW_TSELF, PAW_TFLOAT), PAW_TFLOAT}, -// {"__rdiv", pawB_rdiv_bi, t_list_2(PAW_TSELF, PAW_TSELF), PAW_TSELF}, -// {"__rdiv", pawB_rdiv_i, t_list_2(PAW_TSELF, PAW_TINT), PAW_TSELF}, -// {"__rdiv", pawB_rdiv_f, t_list_2(PAW_TSELF, PAW_TFLOAT), PAW_TFLOAT}, -// {"__rmod", pawB_rmod_bi, t_list_2(PAW_TSELF, PAW_TSELF), PAW_TSELF}, -// {"__rmod", pawB_rmod_i, t_list_2(PAW_TSELF, PAW_TINT), PAW_TSELF}, -// {"__rmod", pawB_rmod_f, t_list_2(PAW_TSELF, PAW_TFLOAT), PAW_TFLOAT}, -// {"__rbxor", pawB_rbxor_bi, t_list_2(PAW_TSELF, PAW_TSELF), PAW_TSELF}, -// {"__rbxor", pawB_rbxor_i, t_list_2(PAW_TSELF, PAW_TINT), PAW_TSELF}, -// {"__rband", pawB_rband_bi, t_list_2(PAW_TSELF, PAW_TSELF), PAW_TSELF}, -// {"__rband", pawB_rband_i, t_list_2(PAW_TSELF, PAW_TINT), PAW_TSELF}, -// {"__rbor", pawB_rbor_bi, t_list_2(PAW_TSELF, PAW_TSELF), PAW_TSELF}, -// {"__rbor", pawB_rbor_i, t_list_2(PAW_TSELF, PAW_TINT), PAW_TSELF}, -// {"__rshl", pawB_rshl_bi, t_list_2(PAW_TSELF, PAW_TSELF), PAW_TSELF}, -// {"__rshl", pawB_rshl_i, t_list_2(PAW_TSELF, PAW_TINT), PAW_TSELF}, -// {"__rshr", pawB_rshr_bi, t_list_2(PAW_TSELF, PAW_TSELF), PAW_TSELF}, -// {"__rshr", pawB_rshr_i, t_list_2(PAW_TSELF, PAW_TINT), PAW_TSELF}, -// }; -// push_builtin_class(P, "string", kStringDesc); - -// Type *string_array_tag = pawY_register_array(P, e_tag(P, PAW_TSTRING)); -// paw_Type string_array = t_type(string_array_tag); - - struct MethodDesc kStringDesc[] = { - {"starts_with", string_starts_with, t_list_1(PAW_TSTRING), PAW_TBOOL}, - {"ends_with", string_ends_with, t_list_1(PAW_TSTRING), PAW_TBOOL}, -// {"split", string_split, t_list_1(PAW_TSTRING), string_array}, -// {"join", string_join, t_list_1(string_array), PAW_TSTRING}, - }; - push_builtin_class(P, PAW_TSTRING, "string", kStringDesc); - -// struct MethodDesc kObjectDesc[] = { -// {"clone", object_clone, t_list_0(), PAW_TSELF}, -// }; -// push_builtin_class(P, "object", kObjectDesc); + pawL_GenericCtx ctx; + paw_Type type, base; + paw_Type T, T2; + + // Builtin functions: + + // fn assert(bool) + type = pawL_new_func_type(P, l_list(PAW_TBOOL), PAW_TUNIT, 0); + pawL_new_func(P, base_assert, 0); + pawL_new_global(P, "assert", type); + + // fn print(string) + type = pawL_new_func_type(P, l_list(PAW_TSTRING), PAW_TUNIT, 0); + pawL_new_func(P, base_print, 0); + pawL_new_global(P, "print", type); + + // TODO: Builtin function templates +// // fn add[T](a: T, b: T) -> T +// reset_ctx(&ctx); +// T = pawL_new_generic_type(P, &ctx); +// base = pawL_new_func_type(P, l_list(T, T), T, 1); +// type = pawL_instantiate_func(P, base, l_list(PAW_TINT)); +// pawL_new_func(P, base_add_i, 0); +// pawL_new_global(P, "addi_", type); // TODO: name mangling // Create a map for caching loaded libraries. P->libs = pawH_new(P); diff --git a/src/lib.h b/src/lib.h index c72f942..48b6f06 100644 --- a/src/lib.h +++ b/src/lib.h @@ -9,6 +9,9 @@ #define IOLIB_NAME "io" #define MATHLIB_NAME "math" +// From value.h +union Value; + // Load the base library void pawL_init(paw_Env *P); @@ -18,17 +21,18 @@ int pawL_check_varargc(paw_Env *P, int min, int max); // TODO: Look on disk for .paw files to load void pawL_require_lib(paw_Env *P, const char *name); -void pawL_register_function(paw_Env *P, const char *name, paw_Function func, paw_Type *argt, paw_Type ret); -void pawL_bind_method(paw_Env *P, int index, const char *name, paw_Function func, paw_Type *argt, paw_Type ret); - -#define t_list_0() (paw_Type[]){-1} -#define t_list_1(a) (paw_Type[]){a, -1} -#define t_list_2(a, b) (paw_Type[]){a, b, -1} -#define t_list_3(a, b, c) (paw_Type[]){a, b, c, -1} - // Functions for loading and compiling source code int pawL_load_file(paw_Env *P, const char *pathname); int pawL_load_nchunk(paw_Env *P, const char *name, const char *source, size_t length); int pawL_load_chunk(paw_Env *P, const char *name, const char *source); +#define L_GENERIC_MAX ARGC_MAX +#define L_PARAM_MAX ARGC_MAX +#define L_LIST_END INT_MIN +#define L_SELF (INT_MIN + 1) + +#define l_generic(i) (-(i) - 1) +#define l_list(...) (paw_Type[]){__VA_ARGS__, L_LIST_END} +#define l_list_0() (paw_Type[]){L_LIST_END} + #endif // PAW_LIB_H diff --git a/src/map.c b/src/map.c index 089c6f0..6ae3685 100644 --- a/src/map.c +++ b/src/map.c @@ -2,7 +2,7 @@ // This source code is licensed under the MIT License, which can be found in // LICENSE.md. See AUTHORS.md for a list of contributor names. #include "map.h" -#include "gc.h" +#include "gc_aux.h" #include "mem.h" //#include "rt.h" #include "util.h" @@ -18,7 +18,7 @@ static paw_Bool is_unoccupied(Value v) static size_t prepare_insert(Map *m, Value key) { - paw_assert(!v_is_null(key)); + paw_assert(pawH_is_occupied(key)); size_t itr = pawH_index(m, key); pawH_locate(m, key, is_unoccupied); // Search for the first vacant slot diff --git a/src/map.h b/src/map.h index 6a153e3..32a3fc1 100644 --- a/src/map.h +++ b/src/map.h @@ -4,26 +4,39 @@ #ifndef PAW_MAP_H #define PAW_MAP_H +// TODO: This code won't work properly anymore: we do not have a way to indicate that a +// given key is a 'tombstone', without limiting what values can be used (using -1 +// value, but a valid integer could easily be -1). Before, we just used 'null'. Only +// works for pointer keys right now. +// Ideas: +// (+) Just use chaining. +// (+) Reserve a single key value to represent 'null', or 'does not exist'. +// Use the value field to indicate either that the item never existed, or that +// it was erased. Problematic, as it limits the keyspace. +// (+) Use a separate array (a bitfield, really) to track which keys are nonexistent. +// (+) Create a somewhat more complicated data structure with an 'index' (see Python +// 'dict' implementation). + #include "paw.h" #include "util.h" #include "value.h" -static inline paw_Bool pawH_is_vacant(Value v) +static inline paw_Bool pawH_is_vacant(Value key) { - return v.u == 0; + return key.u == 0; } -static inline paw_Bool pawH_is_erased(Value v) +static inline paw_Bool pawH_is_erased(Value key) { - return v_is_null(v); + return key.i == -1; } -static inline paw_Bool pawH_is_occupied(Value v) +static inline paw_Bool pawH_is_occupied(Value key) { - return !pawH_is_vacant(v) && !pawH_is_erased(v); + return !pawH_is_vacant(key) && !pawH_is_erased(key); } -#define pawH_index(m, k) check_exp(!v_is_null(k), pawV_hash(k) & ((m)->capacity - 1)) +#define pawH_index(m, k) check_exp(pawH_is_occupied(k), pawV_hash(k) & ((m)->capacity - 1)) // Set 'itr' to the index at which the key 'k' is located, or the first index for // which the function-like macro 'cc' evaluates to true (if 'k' is not found). @@ -80,9 +93,9 @@ static inline Value *pawH_action(paw_Env *P, Map *m, Value key, MapAction action return NULL; } if (action == MAP_ACTION_REMOVE) { - v_set_0(&m->keys[itr]); - v_set_0(&m->values[itr]); + m->keys[itr].i = -1; // tombstone --m->length; + // Return the address of the slot to indicate success. return &m->keys[itr]; } diff --git a/src/mem.c b/src/mem.c index 4c725e5..63cbe9f 100644 --- a/src/mem.c +++ b/src/mem.c @@ -2,7 +2,7 @@ // This source code is licensed under the MIT License, which can be found in // LICENSE.md. See AUTHORS.md for a list of contributor names. #include "mem.h" -#include "gc.h" +#include "gc_aux.h" #include #include #include diff --git a/src/mem.h b/src/mem.h index 7aa68a9..c3a4777 100644 --- a/src/mem.h +++ b/src/mem.h @@ -10,8 +10,7 @@ // Throw an 'out of memory' error // The error message is allocated on startup, and there is always an extra // stack slot to hold it. -#define pawM_error(P) ((P)->bound.p - (P)->top.p >= 1, \ - *(P)->top.p++ = (P)->mem_errmsg, \ +#define pawM_error(P) (*(P)->top.p++ = (P)->mem_errmsg, \ pawC_throw(P, PAW_EMEMORY)) #define pawM_new(P, type) pawM_new_vec(P, 1, type) diff --git a/src/opcode.h b/src/opcode.h index c941ace..1e7b42d 100644 --- a/src/opcode.h +++ b/src/opcode.h @@ -6,8 +6,6 @@ #include "paw.h" -typedef uint32_t OpCode; - #ifndef UPVALUE_MAX #define UPVALUE_MAX 64 #endif @@ -16,14 +14,18 @@ typedef uint32_t OpCode; #define LOCAL_MAX 1024 #endif -#ifndef JUMP_MAX -#define JUMP_MAX S_MAX +#ifndef ATTR_MAX +#define ATTR_MAX 4096 #endif #ifndef ARGC_MAX #define ARGC_MAX 256 #endif +#ifndef JUMP_MAX +#define JUMP_MAX S_MAX +#endif + #define decode_jump(x) ((int)(x) - JUMP_MAX) #define encode_jump_over(x) ((x) + JUMP_MAX) #define encode_jump_back(x) (JUMP_MAX - (x)) @@ -66,16 +68,19 @@ typedef uint32_t OpCode; #define get_B(v) (((v) >> B_OFFSET) & mask1(B_WIDTH, 0)) #define set_B(v, b) (*(v) = (*(v) & mask0(B_WIDTH, B_OFFSET)) | ((OpCode)(b) << B_OFFSET)) +typedef uint32_t OpCode; + // clang-format off // -// Opcode format: Each instruction is packed into a 32-bit unsigned integer +// Opcode format: Each instruction is packed into a 32-bit unsigned integer (OpCode) // // legend: -// G = global variable (requires constant string index) -// K = constants (requires 16-bit index) -// L = local variables (i.e. the stack, requires up to 26-bit index) -// Up = upvalues (requires 16-bit index) -// P = function prototypes (requires 16-bit index) +// G = global variable +// K = constants +// L = local variables +// Up = upvalues +// P = function prototypes +// C = class layouts // // NOTE: Opcode order is only important starting from OP_CALL (opcodes that have // corresponding metamethods). @@ -86,15 +91,15 @@ OP_PUSHUNIT,// - - () - OP_PUSHTRUE,// - - true - OP_PUSHFALSE,// - - false - OP_PUSHCONST,// U - K[u] - +OP_PUSHSTRUCT,// U - C[u] - OP_POP,// - v - - +OP_COPY,// - v v v - OP_CLOSE,// A B v_a..v_1 - if b, close stack to v_a OP_RETURN,// - f..v v closes stack to f OP_CLOSURE,// A B v_b..v_1 f captures v_u..v_1 in f = P[a] -OP_INVOKE,// A B o v_b..v_1 f(v_b..v_1) calls f = o.K[a], with receiver o -OP_INVOKESUPER,// A B o v_b..v_1 f(v_b..v_1) calls f = o.K[a], with receiver o -OP_GETSUPER,// U +OP_INVOKE,// OP_JUMP,// S - - pc += S OP_JUMPFALSEPOP,// S v - pc += S @@ -109,9 +114,10 @@ OP_SETLOCAL,// U v - L[u] = v OP_GETUPVALUE,// U - Up[u] - OP_SETUPVALUE,// U v - Up[u] = v -OP_NEWCLASS,// A B - v v = new class named K[a] +OP_NEWINSTANCE,// U - v v = new instance of class C[u] +OP_INITATTR,// TODO: call this OP_INITFIELD U i v i i.fields[u] = v OP_INHERIT,// - x y x x subclass of y -OP_NEWMETHOD,// U v f v v.K[u] = f +OP_NEWMETHOD,// U v f v v.methods[u] = f OP_NEWARRAY,// U v_u..v_1 [v_u..v_1] - OP_NEWMAP,// U v_2n..v_1 {v_2n..v_1} - @@ -121,7 +127,7 @@ OP_FORIN0,// S *-*-*-*-*-*-*-*-* see notes *-*-*-*-*-*-*-*-* OP_FORIN,// S *-*-*-*-*-*-*-*-* see notes *-*-*-*-*-*-*-*-* OP_UNOP,// A B v ops[a](v) - -OP_BINOP,// A B l r ops[x](l, r) - +OP_BINOP,// A B l r ops[a](l, r) - OP_UNMM,// A B v v.attr[a]() - OP_BINMM,// A B l r l.attr[a](r) - @@ -133,12 +139,13 @@ OP_VARARG,// A B v_u..v_1 [v_u..v_1] - OP_INIT, OP_CALL,// U f v_u..v_1 v v = f(v_u..v_1) -OP_GETATTR,// - v i v.i - -OP_SETATTR,// - v i j - v.i=j +OP_GETMETHOD,// U v v.methods[u] - +OP_GETATTR,// U v v.fields[u] - +OP_SETATTR,// U v x - v.fields[u]=x OP_GETITEM,// - v i v[i] - -OP_SETITEM,// - v i j - v[i]=j +OP_SETITEM,// - v i x - v[i]=x OP_GETSLICE,// - v i j v[i:j] - -OP_SETSLICE,// - v i j k - v[i:j]=k +OP_SETSLICE,// - v i j x - v[i:j]=x NOPCODES } Op; diff --git a/src/parse.c b/src/parse.c index 474451a..d3452fd 100644 --- a/src/parse.c +++ b/src/parse.c @@ -2,12 +2,13 @@ // This source code is licensed under the MIT License, which can be found in // LICENSE.md. See AUTHORS.md for a list of contributor names. #include "parse.h" +#include "ast.h" #include "auxlib.h" #include "call.h" #include "code.h" #include "check.h" #include "env.h" -#include "gc.h" +#include "gc_aux.h" #include "lex.h" #include "map.h" #include "mem.h" @@ -24,10 +25,17 @@ #include #include +#define new_expr(lex, kind) pawA_new_expr((lex)->pm->ast, kind) +#define new_decl(lex, kind) pawA_new_decl((lex)->pm->ast, kind) +#define new_stmt(lex, kind) pawA_new_stmt((lex)->pm->ast, kind) +#define new_expr_list(lex) pawA_new_expr_list((lex)->pm->ast) +#define new_decl_list(lex) pawA_new_decl_list((lex)->pm->ast) +#define new_stmt_list(lex) pawA_new_stmt_list((lex)->pm->ast) + // recursive non-terminals -static Expr *subexpr(Lex *lex, unsigned prec); -static Stmt *stmt(Lex *lex); -#define expr0(x) subexpr(x, 0) +static AstExpr *expression(Lex *lex, unsigned prec); +static AstStmt *statement(Lex *lex); +#define expression0(x) expression(x, 0) static void expected_symbol(Lex *lex, const char *want) { @@ -170,8 +178,6 @@ static InfixOp get_infixop(TokenKind kind) return INFIX_IN; case TK_QUESTION2: return INFIX_COND; - case TK_ELVIS: - return INFIX_COALESCE; case TK_EQUALS2: return INFIX_EQ; case TK_LESS2: @@ -239,124 +245,137 @@ static String *parse_name(Lex *lex) return name; } -static Expr *emit_primitive(Lex *lex, Value v, int type) +static AstExpr *new_basic_lit(Lex *lex, Value v, paw_Type code) { - PrimitiveExpr *expr = pawK_add_node(lex, EXPR_PRIMITIVE, PrimitiveExpr); - expr->t = type; - expr->v = v; - return cast_expr(expr); + AstExpr *r = new_expr(lex, EXPR_LITERAL); + r->literal.basic.t = code; + r->literal.basic.value = v; + return r; } -static Expr *emit_bool(Lex *lex, paw_Bool b) +static AstExpr *emit_unit(Lex *lex) +{ + AstExpr *r = new_expr(lex, EXPR_LITERAL); + r->literal.basic.t = PAW_TUNIT; + return r; +} + +static AstExpr *emit_bool(Lex *lex, paw_Bool b) { Value v; v_set_bool(&v, b); - return emit_primitive(lex, v, PAW_TBOOL); + return new_basic_lit(lex, v, PAW_TBOOL); } -#define make_link_node(suffix, T) \ - static void link_ ## suffix(T **phead, T **plast, T *next) \ +#define make_link_node(suffix, T, source, link) \ + static void link_ ## suffix(T ## List *list, T **plast, T *next) \ { \ - if (*phead == NULL) { \ - *phead = next; \ + if (list->first == NULL) { \ + list->first = next; \ } else { \ - (*plast)->next = next; \ + (*plast)->source.link = next; \ } \ *plast = next; \ + ++list->count; \ } -make_link_node(stmt, Stmt) -make_link_node(expr, Expr) +make_link_node(decl, AstDecl, hdr, next) +make_link_node(expr, AstExpr, hdr, next) +make_link_node(stmt, AstStmt, hdr, next) +make_link_node(method, AstDecl, func, sibling) -// letstmt := storage [const] name [`:` Type] `=` expr -// -//global x: int = 2 -//let const x: int = 3 -//global const x: int = 4 -// -// storage := let | global -// -// Type := TBasic | TArray | TVector | TEnum | TFunction | TClass -// -// TArray := `[` Type `;` Length `]` -// -// TVector := `[` Type `]` -// -// TODO: TGenerics should not appear in type annotation for variable -// TGenerics := `[` name {`,` name} `]` -// -// TArgs := `(` [Type {`,` Type}] `)` -// -// TFunction := `fn` [TGenerics] TArgs [ `:` Type ] -// -// TClass := name [TGenerics] -// -// TBasic := bool | int | float | string -// -static TypeDecl *type_decl(Lex *lex); +static AstExpr *type_expr(Lex *lex); -static void parse_signature(Lex *lex, TypeDecl *pdecl) +static AstExprList *parse_type_list(Lex *lex) { - pdecl->group = TYPE_SIGNATURE; - skip(lex); // '(' token + ++lex->expr_depth; + AstExprList *list = new_expr_list(lex); + AstExpr *prev; + do { + if (list->count == ARGC_MAX) { + limit_error(lex, "generic type arguments", ARGC_MAX); + } + AstExpr *type = type_expr(lex); + link_expr(list, &prev, type); + } while (test_next(lex, ',')); + --lex->expr_depth; + return list; +} - // function parameters - const int line = lex->lastline; - if (!test_next(lex, ')')) { - Expr **phead = &pdecl->sig.args; - Expr *last; - int nargs = 0; - do { - if (nargs == ARGC_MAX) { - limit_error(lex, "function type parameters", ARGC_MAX); - } - check(lex, TK_NAME); - TypeDecl *arg = type_decl(lex); - link_expr(phead, &last, cast_expr(arg)); - ++nargs; - } while (test_next(lex, ',')); - delim_next(lex, ')', '(', line); - pdecl->sig.nargs = nargs; +static AstExprList *maybe_type_args(Lex *lex) +{ + AstExprList *types = NULL; + const int line = lex->line; + if (test_next(lex, '[')) { + // type arguments (requires at least 1) + types = parse_type_list(lex); + delim_next(lex, ']', '[', line); + } + return types; +} + +static AstExprList *parse_param(Lex *lex, int line) +{ + if (test_next(lex, ')')) { + return new_expr_list(lex); } + AstExprList *list = parse_type_list(lex); + delim_next(lex, ')', '(', line); + return list; +} + +static void parse_signature(Lex *lex, AstExpr *pe) +{ + const int line = lex->line; + pe->func.kind = EXPR_FUNC_TYPE; + check_next(lex, '('); + + // function parameters + pe->func.params = parse_param(lex, line); // return type annotation if (test_next(lex, TK_ARROW)) { - pdecl->sig.ret = type_decl(lex); + pe->func.return_ = type_expr(lex); + } else { + pe->func.return_ = emit_unit(lex); } } -static void parse_typename(Lex *lex, TypeDecl *pdecl) +static void parse_named_type(Lex *lex, AstExpr *pe) { String *name = parse_name(lex); - if (name->flag < 0) { // found primitive type - const int code = -name->flag - 1; - pdecl->group = TYPE_PRIMITIVE; - pdecl->basic.t = code; - } else { // found class type - pdecl->named.name = name; - pdecl->group = TYPE_CLASS; - } + pe->type_name.name = name; + pe->type_name.kind = EXPR_TYPE_NAME; + // list of concrete types between '[' and ']' + pe->type_name.args = maybe_type_args(lex); } -static TypeDecl *type_decl(Lex *lex) +static AstExpr *type_expr(Lex *lex) { - TypeDecl *decl = pawK_add_node(lex, EXPR_TYPE, TypeDecl); + AstExpr *r = new_expr(lex, 0 /* set in parse_*() */); if (test_next(lex, TK_FN)) { - parse_signature(lex, decl); + parse_signature(lex, r); } else { - parse_typename(lex, decl); + parse_named_type(lex, r); } - return decl; + return r; +} + +static AstExpr *unit_type(Lex *lex) +{ + AstExpr *r = new_expr(lex, EXPR_TYPE_NAME); + r->type_name.name = pawE_cstr(env(lex), CSTR_UNIT); + return r; } -static TypeDecl *ret_annotation(Lex *lex) +static AstExpr *ret_annotation(Lex *lex) { - return test_next(lex, TK_ARROW) ? type_decl(lex) : NULL; + return test_next(lex, TK_ARROW) ? type_expr(lex) : unit_type(lex); } -static TypeDecl *var_annotation(Lex *lex) +static AstExpr *var_annotation(Lex *lex) { if (test_next(lex, ':')) { - TypeDecl *tn = type_decl(lex); + AstExpr *tn = type_expr(lex); if (tn == NULL) { pawX_error(lex, "invalid type annotation"); } @@ -365,64 +384,93 @@ static TypeDecl *var_annotation(Lex *lex) return NULL; // needs inference } -static Stmt *parameter_def(Lex *lex) +static AstDecl *param_decl(Lex *lex) { - ParamStmt *result = pawK_add_node(lex, STMT_PARAM, ParamStmt); - result->name = parse_name(lex); - result->tag = var_annotation(lex); - if (result->tag == NULL) { + AstDecl *r = new_decl(lex, DECL_FIELD); + r->field.name = parse_name(lex); + r->field.tag = var_annotation(lex); + if (r->field.tag == NULL) { pawX_error(lex, "expected type annotation on parameter '%s'", - result->name->text); + r->field.name->text); } - return cast_stmt(result); + return r; } -static Stmt *variable_def(Lex *lex, int line, paw_Bool global) +static AstDecl *var_decl(Lex *lex, int line, paw_Bool global) { - DefStmt *result = pawK_add_node(lex, STMT_DEF, DefStmt); - result->line = line; // line containing 'global' or 'let' - result->name = parse_name(lex); - result->tag = var_annotation(lex); + AstDecl *r = new_decl(lex, DECL_VAR); + r->var.line = line; // line containing 'global' or 'let' + r->var.name = parse_name(lex); + r->var.tag = var_annotation(lex); if (!test_next(lex, '=')) { pawX_error(lex, "missing initializer"); } - result->init = expr0(lex); - result->flags.global = global; + r->var.init = expression0(lex); + r->var.is_global = global; semicolon(lex); - return cast_stmt(result); + return r; +} + +static AstExprList *expr_list1(Lex *lex, const char *what) +{ + AstExprList *list = new_expr_list(lex); + AstExpr *prev; + do { + if (list->count == ARGC_MAX) { + limit_error(lex, what, ARGC_MAX); + } + link_expr(list, &prev, expression0(lex)); + } while (test_next(lex, ',')); + return list; +} + +static AstExpr *item_expr(Lex *lex) +{ + AstExpr *r = new_expr(lex, EXPR_ITEM); + r->item.name = parse_name(lex); + check_next(lex, ':'); + r->item.value = expression0(lex); + return r; } -static int call_parameters(Lex *lex, Expr **phead) +static AstExprList *item_list0(Lex *lex, const char *what) +{ + AstExprList *list = new_expr_list(lex); + AstExpr *prev; + do { + if (test(lex, '}')) { + break; + } else if (list->count== LOCAL_MAX) { + limit_error(lex, what, LOCAL_MAX); + } + AstExpr *next = item_expr(lex); + link_expr(list, &prev, next); + } while (test_next(lex, ',')); + return list; +} + +static AstExprList *arguments(Lex *lex) { const int line = lex->line; skip(lex); // '(' token - Expr *last; - int nargs = 0; - if (!test_next(lex, ')')) { - link_expr(phead, &last, expr0(lex)); - nargs = 1; - while (test_next(lex, ',')) { - if (nargs == ARGC_MAX) { - limit_error(lex, "function parameters", ARGC_MAX); - } - link_expr(phead, &last, expr0(lex)); - ++nargs; - } - delim_next(lex, ')', '(', line); + if (test_next(lex, ')')) { + return new_expr_list(lex); // empty } - return nargs; + AstExprList *list = expr_list1(lex, "function parameters"); + delim_next(lex, ')', '(', line); + return list; } -// Parse a variable name -static Expr *varexpr(Lex *lex) +// Parse an identifier +static AstExpr *name_expr(Lex *lex) { - VarExpr *result = pawK_add_node(lex, EXPR_VAR, VarExpr); - result->name = parse_name(lex); - return cast_expr(result); + AstExpr *r = new_expr(lex, EXPR_NAME); + r->name.name = parse_name(lex); + return r; } -Scope *pawP_add_scope(Lex *lex, SymbolTable *table) +Scope *pawP_new_scope(Lex *lex, SymbolTable *table) { if (table->nscopes == UINT16_MAX) { limit_error(lex, "scopes", UINT16_MAX); @@ -433,10 +481,19 @@ Scope *pawP_add_scope(Lex *lex, SymbolTable *table) return scope; } +void pawP_add_scope(Lex *lex, SymbolTable *table, Scope *scope) +{ + if (table->nscopes == UINT16_MAX) { + limit_error(lex, "scopes", UINT16_MAX); + } + pawM_grow(env(lex), table->scopes, table->nscopes, table->capacity); + table->scopes[table->nscopes++] = scope; +} + Symbol *pawP_add_symbol(Lex *lex, Scope *table) { pawM_grow(env(lex), table->symbols, table->nsymbols, table->capacity); - Symbol *sym = pawK_add_node(lex, EXPR_SYMBOL, Symbol); + Symbol *sym = pawA_new_symbol(lex); table->symbols[table->nsymbols++] = sym; return sym; } @@ -446,79 +503,69 @@ int pawP_find_symbol(Scope *scope, const String *name) Symbol **symbols = scope->symbols; for (int i = scope->nsymbols - 1; i >= 0; --i) { if (pawS_eq(name, symbols[i]->name)) { - return i; + if (symbols[i]->is_init) { + return i; + } } } return -1; } -// static void code_invoke(Lex *lex, Op op, ExprState *e) -//{ -// const int name = add_name(lex, e->s); -// discard(e); -// -// const int argc = call_parameters(lex); -// pawK_code_AB(lex->fs, op, name, argc); -// } -// -// static void push_special(Lex *lex, unsigned ctag) -//{ -// ExprState e; -// if (ctag == CSTR_SELF) { -// // 'self' is always in slot 0 -// init_expr(&e, EXPR_LOCAL, 0); -// } else { -// // 'super' is an upvalue -// const Value v = pawE_cstr(lex->P, ctag); -// find_var(lex, &e, v_string(v)); -// } -// discharge(&e); -// } - -////static void superexpr(Lex *lex, ExprState *e) -////{ -//// if (!lex->cls) { -//// pawX_error(lex, "'super' used outside class body"); -//// } else if (!lex->cls->has_super) { -//// pawX_error(lex, "class has no superclass"); -//// } -//// -//// skip(lex); // 'super' token -//// check_next(lex, '.'); -//// -//// const int name = parse_name(lex, e); -//// if (test(lex, '(')) { -//// const int argc = call_parameters(lex); -//// push_special(lex, CSTR_SELF); -//// push_special(lex, CSTR_SUPER); -//// pawK_code_AB(lex->fs, OP_INVOKESUPER, name, argc); -//// e->kind = EXPR_CALL; -//// } else { -//// push_special(lex, CSTR_SELF); -//// push_special(lex, CSTR_SUPER); -//// pawK_code_U(lex->fs, OP_GETSUPER, name); -//// e->kind = EXPR_ACCESS; -//// } -////} - -static Expr *unop_expr(Lex *lex, UnOp op) -{ - UnOpExpr *result = pawK_add_node(lex, EXPR_UNOP, UnOpExpr); +static AstExpr *unop_expr(Lex *lex, UnOp op) +{ + AstExpr *result = new_expr(lex, EXPR_UNOP); + UnOpExpr *r = &result->unop; skip(lex); // unary operator token - result->op = (UnaryOp)op; // same order - result->target = subexpr(lex, kUnOpPrecedence); - return cast_expr(result); + r->op = (UnaryOp)op; // same order + r->target = expression(lex, kUnOpPrecedence); + return result; } -static Expr *paren_expr(Lex *lex) +// Parse either a parenthsized expression or a tuple +static AstExpr *paren_expr(Lex *lex) { // Just parse and return the expression contained within the parenthesis. // There is no need for an extra node type. const int line = lex->line; + AstExpr *elems = NULL; + int nelems = 0; skip(lex); // '(' token - Expr *result = expr0(lex); - delim_next(lex, ')', '(', line); - return result; + if (!test(lex, ')')) { + ++lex->expr_depth; + elems = expression0(lex); + --lex->expr_depth; + if (test_next(lex, ')')) { + return elems; + } + } + pawX_error(lex, "TODO: tuples are not yet implemented"); + return NULL; + +// if (!test(lex, ')')) { +// elems = expression0(lex); +// if (test_next(lex, ')')) { +// // Found a normal parenthesized exprssion: "(" AstExpr ")". +// return elems; +// } +// // // Expect an n-tuple (1-tuple requires a trailing ','). +// // while (test_next(lex, ',')) { +// // if (nelems > UINT8_MAX) { +// // limit_error(lex, "tuple elements", UINT8_MAX); +// // } +// // elems->hdr.next = expression0(lex); +// // elems = elems->hdr.next; +// // ++nelems; +// // } +// } +// --lex->expr_depth; +// AstExpr *result = new_expr(lex, EXPR_LITERAL); +// LiteralExpr *lit = &result->literal; +// TupleLit *r = &lit->tuple; +// lit->line = line; +// r->elems = elems; +// r->nelems = nelems; +// delim_next(lex, ')', '(', line); +// return result; } static paw_Bool end_of_block(Lex *lex) @@ -527,248 +574,223 @@ static paw_Bool end_of_block(Lex *lex) test(lex, TK_END); // truncated block } -static int stmtlist(Lex *lex, Stmt **phead) +static AstStmtList *stmt_list(Lex *lex) { - Stmt *last; - int nstmts = 0; + AstStmtList *list = new_stmt_list(lex); + AstStmt *last; while (!end_of_block(lex)) { - const TokenKind tk = lex->t.kind; - Stmt *next = stmt(lex); - link_stmt(phead, &last, next); - ++nstmts; - if (tk == TK_RETURN) { - break; + AstStmt *next = statement(lex); + if (next != NULL) { + link_stmt(list, &last, next); + if (a_kind(next) == STMT_RETURN || // 'return' + a_kind(next) == STMT_LABEL) { // 'break' | 'continue' + break; // must be last statement in block + } } } - return nstmts; + return list; } -static Expr *array_expr(Lex *lex) +static AstExpr *array_expr(Lex *lex) { - ArrayExpr *result = pawK_add_node(lex, EXPR_ARRAY, ArrayExpr); - const int line = lex->line; - skip(lex); // '[' token - - int nitems = 0; - Expr *items = result->items; - do { - if (test(lex, ']')) { - break; - } else if (nitems == LOCAL_MAX) { - limit_error(lex, "array elements", LOCAL_MAX); - } - items->next = expr0(lex); - items = items->next; - ++nitems; - } while (test_next(lex, ',')); - delim_next(lex, ']', '[', line); - return cast_expr(result); +// AstExpr *result = new_expr(lex, EXPR_ARRAY); +// ArrayExpr *r = &result->array; +// const int line = lex->line; +// skip(lex); // '[' token +// +// int nitems = 0; +// AstExpr *items = r->items; +// do { +// if (test(lex, ']')) { +// break; +// } else if (nitems == LOCAL_MAX) { +// limit_error(lex, "array elements", LOCAL_MAX); +// } +// items->next = expression0(lex); +// items = items->next; +// ++nitems; +// } while (test_next(lex, ',')); +// delim_next(lex, ']', '[', line); +// return result; } -static Expr *map_expr(Lex *lex) +static AstExpr *map_expr(Lex *lex) { // const int line = lex->line; // skip(lex); // '{' token -// MapExpr *result = pawK_add_node(lex, EXPR_MAP, MapExpr); +// AstExpr *result = new_expr(lex, EXPR_MAP); +// MapExpr *r = &result->map; // -// NodeVec *items = &result->items; +// NodeVec *items = &r->items; // do { // if (test(lex, '}')) { // break; // } else if (items->size > LOCAL_MAX - 2) { // limit_error(lex, "map items", LOCAL_MAX); // } -// push_node(lex, items, expr0(lex)); +// push_node(lex, items, expression0(lex)); // check_next(lex, ':'); -// push_node(lex, items, expr0(lex)); +// push_node(lex, items, expression0(lex)); // } while (test_next(lex, ',')); // delim_next(lex, '}', '{', line); -// return cast_expr(result); +// return result; return NULL; } -static Expr *index_expr(Lex *lex, Expr *prefix) +static AstExpr *index_expr(Lex *lex, AstExpr *target) { - IndexExpr *result = pawK_add_node(lex, EXPR_INDEX, IndexExpr); + AstExpr *result = new_expr(lex, EXPR_INDEX); + Index *r = &result->index; const int line = lex->line; skip(lex); // '[' token - result->target = prefix; - if (test(lex, ':')) { - result->first = NULL; // TODO: Won't work: use a special range object - } else { - result->first = expr0(lex); - } - if (!test_next(lex, ':')) { - result->second = NULL; - } else if (test(lex, ']')) { - result->second = NULL; - } else { - result->second = expr0(lex); - } + r->target = target; + r->elems = expr_list1(lex, "bracketed terms"); delim_next(lex, ']', '[', line); - return cast_expr(result); -} - -static Stmt *itemstmt(Lex *lex) -{ - check_next(lex, '.'); - ItemStmt *result = pawK_add_node(lex, STMT_ITEM, ItemStmt); - result->name = parse_name(lex); - check_next(lex, '='); - result->value = expr0(lex); - return cast_stmt(result); + return result; } -static Expr *initexpr(Lex *lex, Expr *prefix) +// Parse a composite literal expression +static AstExpr *composite_lit(Lex *lex, AstExpr *target) { - if (prefix->kind != EXPR_VAR) { - pawX_error(lex, "expected class name"); - } - InitExpr *result = pawK_add_node(lex, EXPR_INIT, InitExpr); - result->prefix = prefix; - const int line = lex->line; + AstExpr *result = new_expr(lex, EXPR_LITERAL); + LiteralExpr *lit = &result->literal; + lit->lit_kind = LIT_COMPOSITE; + CompositeLit *r = &lit->comp; skip(lex); // '{' token - - Stmt *last; - do { - if (test(lex, '}')) { - break; - } else if (result->nattrs == LOCAL_MAX) { - limit_error(lex, "attributes", LOCAL_MAX); - } - Stmt *next = itemstmt(lex); - link_stmt(&result->attrs, &last, next); - ++result->nattrs; - } while (test_next(lex, ',')); - delim_next(lex, '}', '{', line); - return cast_expr(result); -} - -static Expr *invokeexpr(Lex *lex, int line, Expr *prefix, String *name) -{ - InvokeExpr *result = pawK_add_node(lex, EXPR_INVOKE, InvokeExpr); - result->nargs = call_parameters(lex, &result->args); - result->target = prefix; - result->line = line; - result->name = name; - return cast_expr(result); + r->items = item_list0(lex, "items"); + r->target = target; + delim_next(lex, '}', '{', lit->line); + return result; } -static Expr *accessexpr(Lex *lex, int line, Expr *prefix, String *name) +static AstExpr *selector_expr(Lex *lex, AstExpr *target) { - AccessExpr *result = pawK_add_node(lex, EXPR_ACCESS, AccessExpr); - result->line = line; - result->target = prefix; - result->name = name; - return cast_expr(result); + skip(lex); // '.' token + AstExpr *r = new_expr(lex, EXPR_SELECTOR); + r->selector.target = target; + r->selector.name = parse_name(lex); + return r; } -static Expr *dotexpr(Lex *lex, Expr *prefix) +static AstExpr *access_expr(Lex *lex, AstExpr *target) { - skip(lex); // '.' token - const int line = lex->lastline; - String *name = parse_name(lex); - if (test(lex, '(')) { - return invokeexpr(lex, line, prefix, name); - } else { - return accessexpr(lex, line, prefix, name); - } + skip(lex); // '::' token + AstExpr *r = new_expr(lex, EXPR_ACCESS); + r->access.target = target; + r->access.name = parse_name(lex); + return r; } -static Expr *call_expr(Lex *lex, Expr *prefix) +static AstExpr *call_expr(Lex *lex, AstExpr *target) { - CallExpr *result = pawK_add_node(lex, EXPR_CALL, CallExpr); - result->nargs = call_parameters(lex, &result->args); - result->target = prefix; - return cast_expr(result); + AstExpr *result = new_expr(lex, EXPR_CALL); + CallExpr *r = &result->call; + r->args = arguments(lex); + r->target = target; + return result; } -static Expr *chain_expr(Lex *lex, Expr *prefix) +static AstExpr *chain_expr(Lex *lex, AstExpr *target) { - ChainExpr *result = pawK_add_node(lex, EXPR_UNOP, ChainExpr); - result->target = prefix; + AstExpr *result = new_expr(lex, EXPR_UNOP); + ChainExpr *r = &result->chain; + r->target = target; skip(lex); // '?' token - return cast_expr(result); + return result; } -static int fn_parameters(Lex *lex, Stmt **phead) +static AstDeclList *parameters(Lex *lex) { + AstDeclList *list = new_decl_list(lex); const int line = lex->line; check_next(lex, '('); - int argc = 0; if (!test_next(lex, ')')) { - Stmt *last; + AstDecl *prev; do { - if (argc == ARGC_MAX) { + if (list->count == ARGC_MAX) { limit_error(lex, "function parameters", ARGC_MAX); } else if (!test(lex, TK_NAME)) { expected_symbol(lex, "name"); } // parse function parameter of form 'name: type' - Stmt *next = parameter_def(lex); - link_stmt(phead, &last, next); - ++argc; + AstDecl *next = param_decl(lex); + link_decl(list, &prev, next); } while (test_next(lex, ',')); delim_next(lex, ')', '(', line); } - return argc; + return list; } static Block *block(Lex *lex) { const int line = lex->line; - Block *result = pawK_add_node(lex, STMT_BLOCK, Block); + AstStmt *result = new_stmt(lex, STMT_BLOCK); + Block *r = &result->block; check_next(lex, '{'); - result->nstmts = stmtlist(lex, &result->stmts); + r->stmts = stmt_list(lex); delim_next(lex, '}', '{', line); - return result; + return r; } -static Expr *primary_expr(Lex *lex) +static AstExpr *primary_expr(Lex *lex) { switch (lex->t.kind) { case '(': return paren_expr(lex); case TK_NAME: - return varexpr(lex); + return name_expr(lex); case TK_STRING: { const Value v = lex->t.value; - skip(lex); - return emit_primitive(lex, v, PAW_TSTRING); + skip(lex); // string token + return new_basic_lit(lex, v, PAW_TSTRING); } - case '[': - return array_expr(lex); - case '{': - return map_expr(lex); - //case TK_SUPER: - // return superexpr(lex, e); + //case '[': + // return array_expr(lex); + //case '{': + // return map_expr(lex); + case TK_FN: + return type_expr(lex); default: expected_symbol(lex, "name or '('"); return NULL; // never run } } -static Expr *suffixed_expr(Lex *lex) +static AstExpr *suffixed_expr(Lex *lex) { - Expr *e = primary_expr(lex); - if (test(lex, '{')) { - e = initexpr(lex, e); - } + AstExpr *e = primary_expr(lex); for (;;) { // parse suffix chain switch (lex->t.kind) { + case '?': + e = chain_expr(lex, e); + break; case '(': e = call_expr(lex, e); break; case '.': - e = dotexpr(lex, e); + e = selector_expr(lex, e); + break; + case TK_COLON2: + e = access_expr(lex, e); break; case '[': e = index_expr(lex, e); break; - case '?': - e = chain_expr(lex, e); + case '{': + switch (a_kind(e)) { + case EXPR_NAME: + case EXPR_INDEX: + case EXPR_ACCESS: + if (lex->expr_depth < 0) { + return e; + } + break; + default: + return e; + } + e = composite_lit(lex, e); break; default: return e; @@ -776,9 +798,9 @@ static Expr *suffixed_expr(Lex *lex) } } -static Expr *simple_expr(Lex *lex) +static AstExpr *simple_expr(Lex *lex) { - Expr *expr; + AstExpr *expr; switch (lex->t.kind) { case TK_TRUE: expr = emit_bool(lex, PAW_TRUE); @@ -787,10 +809,10 @@ static Expr *simple_expr(Lex *lex) expr = emit_bool(lex, PAW_FALSE); break; case TK_INTEGER: - expr = emit_primitive(lex, lex->t.value, PAW_TINT); + expr = new_basic_lit(lex, lex->t.value, PAW_TINT); break; case TK_FLOAT: - expr = emit_primitive(lex, lex->t.value, PAW_TFLOAT); + expr = new_basic_lit(lex, lex->t.value, PAW_TFLOAT); break; default: return suffixed_expr(lex); @@ -799,46 +821,49 @@ static Expr *simple_expr(Lex *lex) return expr; } -static Expr *binop_expr(Lex *lex, InfixOp op, Expr *lhs) +static AstExpr *binop_expr(Lex *lex, InfixOp op, AstExpr *lhs) { skip(lex); // binary operator token - Expr *rhs = subexpr(lex, right_prec(op)); + AstExpr *rhs = expression(lex, right_prec(op)); if (rhs == NULL) { return NULL; // no more binops } - BinOpExpr *result = pawK_add_node(lex, EXPR_BINOP, BinOpExpr); - result->op = (BinaryOp)op; // same order - result->lhs = lhs; - result->rhs = rhs; - return cast_expr(result); + AstExpr *result = new_expr(lex, EXPR_BINOP); + BinOpExpr *r = &result->binop; + r->op = (BinaryOp)op; // same order + r->lhs = lhs; + r->rhs = rhs; + return result; } -static Expr *logical_expr(Lex *lex, Expr *lhs, paw_Bool is_and) +static AstExpr *logical_expr(Lex *lex, AstExpr *lhs, paw_Bool is_and) { skip(lex); // '&&' or '||' token - Expr *rhs = subexpr(lex, right_prec(INFIX_AND)); + AstExpr *rhs = expression(lex, right_prec(INFIX_AND)); if (rhs == NULL) { return NULL; // no more binops } - LogicalExpr *result = pawK_add_node(lex, EXPR_LOGICAL, LogicalExpr); - result->is_and = is_and; - result->lhs = lhs; - result->rhs = rhs; - return cast_expr(result); + AstExpr *result = new_expr(lex, EXPR_LOGICAL); + LogicalExpr *r = &result->logical; + r->is_and = is_and; + r->lhs = lhs; + r->rhs = rhs; + return result; } -static Expr *cond_expr(Lex *lex, Expr *lhs) +static AstExpr *cond_expr(Lex *lex, AstExpr *lhs) { skip(lex); // '??' token - CondExpr *result = pawK_add_node(lex, EXPR_COND, CondExpr); - result->cond = lhs; - result->lhs = subexpr(lex, right_prec(INFIX_COND)); + AstExpr *result = new_expr(lex, EXPR_COND); + CondExpr *r = &result->cond; + r->cond = lhs; + r->lhs = expression(lex, right_prec(INFIX_COND)); check_next(lex, TK_COLON2); - result->rhs = expr0(lex); - return cast_expr(result); + r->rhs = expression0(lex); + return result; } -static Expr *infix_expr(Lex *lex, Expr *lhs, unsigned op) +static AstExpr *infix_expr(Lex *lex, AstExpr *lhs, unsigned op) { switch (op) { case INFIX_AND: @@ -851,10 +876,10 @@ static Expr *infix_expr(Lex *lex, Expr *lhs, unsigned op) } } -static Expr *subexpr(Lex *lex, unsigned prec) +static AstExpr *subexpr(Lex *lex, unsigned prec) { unsigned op = get_unop(lex->t.kind); - Expr *expr = op == NOT_UNOP + AstExpr *expr = op == NOT_UNOP ? simple_expr(lex) : unop_expr(lex, op); @@ -866,82 +891,89 @@ static Expr *subexpr(Lex *lex, unsigned prec) return expr; } -static Stmt *if_stmt(Lex *lex) +// TODO +static AstExpr *expression(Lex *lex, unsigned prec) +{ + return subexpr(lex, prec); +} + +static AstExpr *basic_expr(Lex *lex) +{ + const int prev_depth = lex->expr_depth; + lex->expr_depth = -1; + AstExpr *expr = subexpr(lex, 0); + lex->expr_depth = prev_depth; + return expr; +} + +static AstStmt *if_stmt(Lex *lex) { skip(lex); // 'if' token - IfElseStmt *result = pawK_add_node(lex, STMT_IFELSE, IfElseStmt); - result->cond = expr0(lex); // conditional - result->then_arm = cast_stmt(block(lex)); // 'then' block + AstStmt *result = new_stmt(lex, STMT_IF); + IfStmt *r = &result->if_; + r->cond = basic_expr(lex); // conditional + r->then_arm = cast_stmt(block(lex)); // 'then' block if (test_next(lex, TK_ELSE)) { if (test(lex, TK_IF)) { // Put the rest of the chain in the else branch. This transformation looks // like 'if a {} else if b {} else {}' -> 'if a {} else {if b {} else {}}'. - result->else_arm = if_stmt(lex); + r->else_arm = if_stmt(lex); } else { - result->else_arm = cast_stmt(block(lex)); + r->else_arm = cast_stmt(block(lex)); } } - return cast_stmt(result); + return result; } -static Stmt *exprstmt(Lex *lex) +static AstStmt *expr_stmt(Lex *lex) { - ExprStmt *result = pawK_add_node(lex, STMT_EXPR, ExprStmt); - result->lhs = suffixed_expr(lex); + AstStmt *result = new_stmt(lex, STMT_EXPR); + AstExprStmt *r = &result->expr; + r->lhs = suffixed_expr(lex); if (test_next(lex, '=')) { - result->rhs = expr0(lex); + r->rhs = expression0(lex); } semicolon(lex); - return cast_stmt(result); -} - -static Expr *emit_literal(Lex *lex, const char *name, Expr *expr, paw_Type t) -{ - LiteralExpr *result = pawK_add_node(lex, EXPR_LITERAL, LiteralExpr); - result->label = name; - result->expr = expr; - result->t = t; - return cast_expr(result); + return result; } -static Expr *literal_expr(Lex *lex, const char *name, paw_Type t) +static AstStmt *fornum(Lex *lex, String *ivar) { - return emit_literal(lex, name, expr0(lex), t); -} + AstStmt *result = new_stmt(lex, STMT_FORNUM); + ForStmt *r = &result->for_; + ForNum *fornum = &r->fornum; + r->name = ivar; -static Stmt *fornum(Lex *lex, String *ivar) -{ - ForStmt *result = pawK_add_node(lex, STMT_FORNUM, ForStmt); - ForNum *fornum = &result->fornum; - result->name = ivar; // Parse the loop bounds ('begin', 'end', and 'step' expressions). - fornum->begin = literal_expr(lex, "(for begin)", PAW_TINT); + fornum->begin = basic_expr(lex); check_next(lex, ','); - fornum->end = literal_expr(lex, "(for end)", PAW_TINT); + fornum->end = basic_expr(lex); if (test_next(lex, ',')) { - fornum->step = literal_expr(lex, "(for step)", PAW_TINT); + fornum->step = basic_expr(lex); } else { Value v; v_set_int(&v, 1); // step defaults to 1 - Expr *step = emit_primitive(lex, v, PAW_TINT); - fornum->step = emit_literal(lex, "(for step)", step, PAW_TINT); + fornum->step = new_basic_lit(lex, v, PAW_TINT); } - result->block = block(lex); - return cast_stmt(result); + + r->block = block(lex); + return result; } -static Stmt *forin(Lex *lex, String *ivar) +static AstStmt *forin(Lex *lex, String *ivar) { - ForStmt *result = pawK_add_node(lex, STMT_FORIN, ForStmt); - ForIn *forin = &result->forin; - forin->target = expr0(lex); - result->name = ivar; - result->block = block(lex); - return cast_stmt(result); + AstStmt *result = new_stmt(lex, STMT_FORIN); + ForStmt *r = &result->for_; + ForIn *forin = &r->forin; + + forin->target = basic_expr(lex); + r->name = ivar; + r->block = block(lex); + return result; } -static Stmt *for_stmt(Lex *lex) +static AstStmt *for_stmt(Lex *lex) { skip(lex); // 'for' token String *ivar = parse_name(lex); // loop variable @@ -950,176 +982,241 @@ static Stmt *for_stmt(Lex *lex) } else if (!test_next(lex, TK_IN)) { expected_symbol(lex, "'=' or 'in'"); // no return } - return forin(lex, ivar); + AstStmt *stmt = forin(lex, ivar); + return stmt; } -static Stmt *while_stmt(Lex *lex) +static AstStmt *while_stmt(Lex *lex) { - WhileStmt *result = pawK_add_node(lex, STMT_WHILE, WhileStmt); + AstStmt *result = new_stmt(lex, STMT_WHILE); + WhileStmt *r = &result->while_; skip(lex); // 'while' token - result->cond = expr0(lex); - result->block = block(lex); - return cast_stmt(result); + + r->cond = basic_expr(lex); + r->block = block(lex); + return result; } -static Stmt *dowhile_stmt(Lex *lex) +static AstStmt *dowhile_stmt(Lex *lex) { - WhileStmt *result = pawK_add_node(lex, STMT_DOWHILE, WhileStmt); skip(lex); // 'do' token - result->block = block(lex); + AstStmt *r = new_stmt(lex, STMT_DOWHILE); + r->while_.block = block(lex); check_next(lex, TK_WHILE); - result->cond = expr0(lex); - return cast_stmt(result); + r->while_.cond = basic_expr(lex); + return r; } -static Stmt *return_stmt(Lex *lex) +static AstStmt *return_stmt(Lex *lex) { - ReturnStmt *result = pawK_add_node(lex, STMT_RETURN, ReturnStmt); + AstStmt *result = new_stmt(lex, STMT_RETURN); + ReturnStmt *r = &result->return_; skip(lex); // 'return' token if (end_of_block(lex) || test(lex, ';')) { - result->expr = NULL; + r->expr = NULL; } else { - result->expr = expr0(lex); + r->expr = expression0(lex); } - // NOTE: The construct 'return [expr] [`;`]' must be followed by the - // end of the block. This is necessary because the semicolon is + // NOTE: The construct 'return [expr] [`;`]' must be followed by the TODO: auto semicolon insertion fixes this + // end of the block. This is necessary because the semicolon is could relax restriction, make it easier to 'prototype' in paw // optional, and we cannot easily tell if it was intended to go // before or after the '[expr]' part. semicolon(lex); - return cast_stmt(result); + return result; } -static Stmt *labelstmt(Lex *lex, LabelKind kind) +static AstStmt *label_stmt(Lex *lex, LabelKind kind) { - LabelStmt *result = pawK_add_node(lex, STMT_LABEL, LabelStmt); - result->label = kind; + AstStmt *result = new_stmt(lex, STMT_LABEL); + LabelStmt *r = &result->label; + r->label = kind; skip(lex); // 'break' or 'continue' token semicolon(lex); - return cast_stmt(result); + return result; } -static void function(Lex *lex, String *name, Function *pfn) +static AstDeclList *maybe_type_param(Lex *lex) { - pfn->nargs = fn_parameters(lex, &pfn->args); - pfn->ret = ret_annotation(lex); - pfn->name = name; - pfn->body = block(lex); + const int line = lex->line; + if (!test_next(lex, '[')) { + return NULL; + } else if (test_next(lex, ']')) { + pawX_error(lex, "empty generic parameters"); + } + + AstDecl *prev; + AstDeclList *list = new_decl_list(lex); + ++lex->expr_depth; + do { + if (list->count == ARGC_MAX) { + limit_error(lex, "generic type parameters", ARGC_MAX); + } + AstDecl *r = new_decl(lex, DECL_GENERIC); + r->generic.name = parse_name(lex); + link_decl(list, &prev, r); + } while (test_next(lex, ',')); + delim_next(lex, ']', '[', line); + --lex->expr_depth; + return list; } -static Stmt *fn_stmt(Lex *lex, int line, paw_Bool global) +static AstDecl *function(Lex *lex, String *name, FuncKind kind) +{ + AstDecl *d = new_decl(lex, DECL_FUNC); + d->func.name = name; + d->func.fn_kind = kind; + d->func.generics = maybe_type_param(lex); + d->func.params = parameters(lex); + d->func.return_ = ret_annotation(lex); + d->func.is_poly = d->func.generics != NULL; + d->func.body = block(lex); + return d; +} + +static AstDecl *func_decl(Lex *lex, int line, paw_Bool global) { skip(lex); // 'fn' token - FnStmt *result = pawK_add_node(lex, STMT_FN, FnStmt); String *name = parse_name(lex); - result->flags.kind = FN_FUNCTION; - result->flags.global = global; - result->line = line; - function(lex, name, &result->fn); - return cast_stmt(result); + AstDecl *r = function(lex, name, FUNC_FUNCTION); + r->func.is_global = global; + r->func.line = line; + return r; } -static Stmt *attr_def(Lex *lex, String *name) +static AstDecl *field_decl(Lex *lex, String *name) { - const int line = lex->lastline; - AttrStmt *result = pawK_add_node(lex, STMT_ATTR, AttrStmt); - result->line = line; // line containing 'global' or 'let' - result->name = name; - result->tag = var_annotation(lex); + AstDecl *r = new_decl(lex, DECL_FIELD); + r->field.name = name; + r->field.tag = var_annotation(lex); semicolon(lex); - return cast_stmt(result); + return r; } -static Stmt *method_def(Lex *lex, String *name) +static AstDecl *method_decl(Lex *lex, String *name) { - const int line = lex->lastline; - AttrStmt *result = pawK_add_node(lex, STMT_ATTR, AttrStmt); - result->name = name; - result->line = line; - result->is_fn = PAW_TRUE; - function(lex, name, &result->fn); - return cast_stmt(result); + AstDecl *r = function(lex, name, FUNC_METHOD); + semicolon(lex); + return r; } -static Stmt *attrstmt(Lex *lex) +static AstDecl *attr_decl(Lex *lex) { String *name = v_string(lex->t.value); skip(lex); // name token - if (test(lex, ':') || test(lex, '=')) { - return attr_def(lex, name); + if (test(lex, ':')) { + return field_decl(lex, name); } else { - return method_def(lex, name); + return method_decl(lex, name); } } -static int class_body(Lex *lex, Stmt **phead) +static void struct_body(Lex *lex, StructDecl *struct_) { - Stmt *last; + AstDecl *last_field; + AstDecl *last_method; const int line = lex->line; check_next(lex, '{'); - int nattrs = 0; + struct_->fields = new_decl_list(lex); + struct_->methods = new_decl_list(lex); while (!test(lex, '}')) { check(lex, TK_NAME); - if (nattrs > LOCAL_MAX) { - limit_error(lex, "attributes", LOCAL_MAX); + AstDecl *next = attr_decl(lex); + if (a_kind(next) == DECL_FUNC) { + if (struct_->methods->count == LOCAL_MAX) { + limit_error(lex, "methods", LOCAL_MAX); + } + // use the method chain, 'next' link used for template instances + link_method(struct_->methods, &last_method, next); + } else if (struct_->fields->count == LOCAL_MAX) { + limit_error(lex, "fields", LOCAL_MAX); + } else { + link_decl(struct_->fields, &last_field, next); } - Stmt *next = attrstmt(lex); - link_stmt(phead, &last, next); - ++nattrs; } delim_next(lex, '}', '{', line); - return nattrs; } -static Stmt *class_stmt(Lex *lex, paw_Bool global) +static AstDecl *struct_decl(Lex *lex, paw_Bool global) { - skip(lex); // 'class' token + skip(lex); // 'struct' token + AstDecl *r = new_decl(lex, DECL_STRUCT); + r->struct_.is_global = global; + r->struct_.name = parse_name(lex); + r->struct_.generics = maybe_type_param(lex); + r->struct_.is_poly = r->struct_.generics != NULL; + struct_body(lex, &r->struct_); + semicolon(lex); + return r; +} - ClassStmt *s = pawK_add_node(lex, STMT_CLASS, ClassStmt); - s->flags.global = global; - s->name = parse_name(lex); - if (test_next(lex, ':')) { - // push superclass - s->super = varexpr(lex); - } - s->nattrs = class_body(lex, &s->attrs); +static AstDecl *type_decl(Lex *lex) +{ + AstDecl *r = new_decl(lex, DECL_TYPE); + skip(lex); // 'type' token + + r->type.name = parse_name(lex); + r->type.generics = maybe_type_param(lex); + + check_next(lex, '='); + + // 'type_expr()' parses function signatures, which are not allowed + // on the RHS of a type expression. This should be caught during + // type checking, since we also need to make sure the RHS is not + // referring to an uninstantiated template. + r->type.rhs = type_expr(lex); semicolon(lex); - return cast_stmt(s); + return r; } -static Stmt *global_stmt(Lex *lex) +static AstDecl *global_decl(Lex *lex) { - const int line = lex->lastline; + const int line = lex->line; skip(lex); // 'global' token if (test(lex, TK_FN)) { skip(lex); // 'fn' token - return fn_stmt(lex, line, PAW_TRUE); - } else if (test(lex, TK_CLASS)) { - return class_stmt(lex, PAW_TRUE); + return func_decl(lex, line, PAW_TRUE); + } else if (test(lex, TK_STRUCT)) { + return struct_decl(lex, PAW_TRUE); } else { - return variable_def(lex, line, PAW_TRUE); + return var_decl(lex, line, PAW_TRUE); } } -static Stmt *stmt(Lex *lex) +static AstDecl *decl(Lex *lex) +{ + switch (lex->t.kind) { + case TK_FN: + return func_decl(lex, lex->line, PAW_FALSE); + case TK_STRUCT: + return struct_decl(lex, PAW_FALSE); + case TK_TYPE: + return type_decl(lex); + case TK_LET: + skip(lex); // 'let' token + return var_decl(lex, lex->line, PAW_FALSE); + default: + paw_assert(lex->t.kind == TK_GLOBAL); + return global_decl(lex); + } +} + +static AstStmt *decl_stmt(Lex *lex) +{ + AstStmt *r = new_stmt(lex, STMT_DECL); + r->decl.decl = decl(lex); + return r; +} + +static AstStmt *statement(Lex *lex) { -try_again: switch (lex->t.kind) { case ';': // empty statement skip(lex); // ';' token - goto try_again; + return NULL; case '{': return cast_stmt(block(lex)); - case TK_FN: - return fn_stmt(lex, lex->lastline, PAW_FALSE); - case TK_CLASS: - return class_stmt(lex, PAW_FALSE); - case TK_LET: - skip(lex); // 'let' token - return variable_def(lex, lex->lastline, PAW_FALSE); - case TK_GLOBAL: - return global_stmt(lex); case TK_IF: return if_stmt(lex); case TK_FOR: @@ -1131,20 +1228,27 @@ static Stmt *stmt(Lex *lex) case TK_RETURN: return return_stmt(lex); case TK_BREAK: - return labelstmt(lex, LBREAK); + return label_stmt(lex, LBREAK); case TK_CONTINUE: - return labelstmt(lex, LCONTINUE); + return label_stmt(lex, LCONTINUE); + case TK_FN: + case TK_STRUCT: + case TK_LET: + case TK_GLOBAL: + case TK_TYPE: + return decl_stmt(lex); default: - return exprstmt(lex); + return expr_stmt(lex); } } -// All paw language keywords (must be in this order, the same order as the -// keyword variants in the TokenKind enum in lex.h) +// All paw language keywords +// +// ORDER TokenKind static const char *kKeywords[] = { "fn", - "class", - "super", + "type", + "struct", "global", "let", "if", @@ -1160,11 +1264,12 @@ static const char *kKeywords[] = { "false", }; -static String *new_fixed_string(paw_Env *P, const char *s) + +static String *basic_type_name(paw_Env *P, const char *name, paw_Type type) { - String *str = pawS_new_str(P, s); - pawG_fix_object(P, cast_object(str)); - return str; + String *s = pawS_new_fixed(P, name); + s->flag = -type - 1; // flag < 0 to distinguish from keywords + return s; } void pawP_init(paw_Env *P) @@ -1173,19 +1278,22 @@ void pawP_init(paw_Env *P) // collected. Also added to the lexer string map. for (size_t i = 0; i < paw_countof(kKeywords); ++i) { const char *kw = kKeywords[i]; - String *str = new_fixed_string(P, kw); + String *str = pawS_new_fixed(P, kw); str->flag = i + FIRST_KEYWORD; } for (Metamethod mm = 0; mm < NMETAMETHODS; ++mm) { const char *name = pawT_name(mm); - String *str = new_fixed_string(P, name); + String *str = pawS_new_fixed(P, name); v_set_object(&P->meta_keys[mm], str); } - v_set_object(&P->str_cache[CSTR_SELF], new_fixed_string(P, "self")); - v_set_object(&P->str_cache[CSTR_INIT], pawS_new_str(P, "__init")); - v_set_object(&P->str_cache[CSTR_SUPER], pawS_new_str(P, "super")); - v_set_object(&P->str_cache[CSTR_TRUE], pawS_new_str(P, "true")); - v_set_object(&P->str_cache[CSTR_FALSE], pawS_new_str(P, "false")); + P->str_cache[CSTR_SELF] = pawS_new_str(P, "self"); + P->str_cache[CSTR_TRUE] = pawS_new_str(P, "true"); + P->str_cache[CSTR_FALSE] = pawS_new_str(P, "false"); + P->str_cache[CSTR_UNIT] = basic_type_name(P, "()", PAW_TUNIT); + P->str_cache[CSTR_BOOL] = basic_type_name(P, "bool", PAW_TBOOL); + P->str_cache[CSTR_INT] = basic_type_name(P, "int", PAW_TINT); + P->str_cache[CSTR_FLOAT] = basic_type_name(P, "float", PAW_TFLOAT); + P->str_cache[CSTR_STRING] = basic_type_name(P, "string", PAW_TSTRING); } static void skip_hashbang(Lex *lex) @@ -1199,218 +1307,28 @@ static void skip_hashbang(Lex *lex) } } -static Tree *parse_module(Lex *lex) +static Ast *parse_module(Lex *lex) { - Tree *ast = lex->ast; + Ast *ast = lex->pm->ast; skip_hashbang(lex); - ast->nstmts = stmtlist(lex, &ast->stmts); + ast->stmts = stmt_list(lex); check(lex, TK_END); return ast; } -static void dump_expr(paw_Env *P, Expr *expr, int indent); - -static void dump_symbols(paw_Env *P, Scope *st, int indent) -{ - for (int i = 0; i < st->nsymbols; ++i) { - for (int i = 0; i < indent; ++i) { - printf(" "); - } - const Symbol *s = st->symbols[i]; - printf("symbol %s: type = %s\n", s->name->text, pawY_name(y_id(s->type))); - } -} - -static void dump_stmt(paw_Env *P, Stmt *stmt, int indent) -{ - for (int i = 0; i < indent; ++i) { - printf(" "); - } - if (stmt == NULL) { - puts("NULL\n"); - return; - } - switch (stmt->kind) { - case STMT_EXPR: - printf("exprstmt\n"); - dump_expr(P, cast_expr(cast_to(stmt, ExprStmt)->lhs), indent + 1); - if (cast_to(stmt, ExprStmt)->rhs) { - dump_expr(P, cast_to(stmt, ExprStmt)->rhs, indent + 1); - } - break; - case STMT_BLOCK: { - Block *bn = cast_to(stmt, Block); - printf("block\n"); - dump_symbols(P, bn->scope, indent + 1); - for (Stmt *stmt = bn->stmts; stmt; stmt = stmt->next) { - dump_stmt(P, cast_stmt(stmt), indent + 1); - } - break; - } - case STMT_IFELSE: - printf("ifelse\n"); - break; - case STMT_FORIN: { - ForStmt *s = cast_to(stmt, ForStmt); - printf("forin\n"); - dump_symbols(P, s->scope, indent + 1); - dump_expr(P, s->forin.target, indent + 1); - dump_stmt(P, cast_stmt(s->block), indent + 1); - break; - } - case STMT_FORNUM: { - ForStmt *s = cast_to(stmt, ForStmt); - printf("fornum\n"); - dump_symbols(P, s->scope, indent + 1); - dump_expr(P, s->fornum.begin, indent + 1); - dump_expr(P, s->fornum.end, indent + 1); - dump_expr(P, s->fornum.step, indent + 1); - dump_stmt(P, cast_stmt(s->block), indent + 1); - break; - } - case STMT_WHILE: { - WhileStmt *s = cast_to(stmt, WhileStmt); - printf("while\n"); - dump_expr(P, s->cond, indent + 1); - dump_stmt(P, cast_stmt(s->block), indent + 1); - break; - } - case STMT_DOWHILE: { - WhileStmt *s = cast_to(stmt, WhileStmt); - printf("dowhile\n"); - dump_stmt(P, cast_stmt(s->block), indent + 1); - dump_expr(P, s->cond, indent + 1); - break; - } - case STMT_FN: { - FnStmt *fn = cast_to(stmt, FnStmt); - - printf("fn (%s) %s: %d\n", fn->flags.global ? "global" : "local", fn->fn.name->text, fn->fn.ret ? fn->fn.ret->type->hdr.id : -1); - for (Stmt *arg = fn->fn.args; arg; arg = arg->next) { - dump_stmt(P, cast_stmt(arg), indent + 1); // list of DefStmt - } - dump_stmt(P, cast_stmt(fn->fn.body), indent + 1); - break; - } - case STMT_PARAM: { - ParamStmt *in = cast_to(stmt, ParamStmt); - printf("%s: %d\n", in->name->text, in->tag ? in->tag->type->hdr.id : 42); - break; - } - case STMT_DEF: { - DefStmt *in = cast_to(stmt, DefStmt); - printf("%s %s: %d\n", in->flags.global ? "global" : "let", in->name->text, in->tag ? in->tag->type->hdr.id : 42); - dump_expr(P, in->init, indent + 1); - break; - } - case STMT_RETURN: { - ReturnStmt *bn = cast_to(stmt, ReturnStmt); - printf("return\n"); - dump_expr(P, bn->expr, indent + 1); - break; - } - case STMT_ATTR: { - AttrStmt *s = cast_to(stmt, AttrStmt); - printf("attr '%s'", s->name->text); - if (s->is_fn) { - printf("\n"); - for (Stmt *arg = s->fn.args; arg; arg = arg->next) { - dump_stmt(P, cast_stmt(arg), indent + 1); - } - dump_stmt(P, cast_stmt(s->fn.body), indent + 1); - } else { - printf("%d\n", s->tag->type->hdr.id); - } - break; - } - case STMT_CLASS: { - ClassStmt *s = cast_to(stmt, ClassStmt); - printf("class '%s'\n", s->name->text); - for (Stmt *attr = s->attrs; attr; attr = attr->next) { - dump_stmt(P, cast_stmt(attr), indent + 1); - } - break; - } - default: - printf("?\n"); - break; - } -} - -static void dump_expr(paw_Env *P, Expr *expr, int indent) -{ - for (int i = 0; i < indent; ++i) { - printf(" "); - } - if (expr == NULL) { - puts("NULL\n"); - return; - } - if (expr->type) { - printf("type (%s): ", pawY_name((int)expr->type->hdr.id)); - } else { - printf("NULL "); - } - switch (expr->kind) { - case EXPR_PRIMITIVE: { - Buffer buf; - pawL_init_buffer(P, &buf); - PrimitiveExpr *ln = cast_to(expr, PrimitiveExpr); - pawC_pushv(P, ln->v); - pawL_add_value(P, &buf, P->mod->types[ln->t]->hdr.id); - pawL_add_char(P, &buf, '\0'); - puts(buf.data); - pawL_discard_result(P, &buf); - break; - } - case EXPR_UNOP: - printf("unop %d\n", cast_to(expr, UnOpExpr)->op); - dump_expr(P, cast_to(expr, UnOpExpr)->target, indent + 1); - break; - case EXPR_BINOP: - printf("binop %d\n", cast_to(expr, BinOpExpr)->op); - dump_expr(P, cast_to(expr, BinOpExpr)->lhs, indent + 1); - dump_expr(P, cast_to(expr, BinOpExpr)->rhs, indent + 1); - break; - case EXPR_CALL: - printf("call\n"); - break; - case EXPR_COND: - printf("cond\n"); - dump_expr(P, cast_to(expr, CondExpr)->cond, indent + 1); - dump_expr(P, cast_to(expr, CondExpr)->lhs, indent + 1); - dump_expr(P, cast_to(expr, CondExpr)->rhs, indent + 1); - break; - case EXPR_VAR: { - printf("var '%s' (%p)\n", cast_to(expr, VarExpr)->name->text, (void*)expr); - break; - } - default: - printf("?\n"); - break; - } -} - -#include -static void dump_ast(Lex *lex, Stmt *root) -{ - for (; root; root = root->next) { - dump_stmt(lex->P, root, 0); - } -} - Closure *pawP_parse(paw_Env *P, paw_Reader input, ParseMemory *pm, const char *name, void *ud) { // Initialize the lexical state. Lex lex = { - .mod = P->mod, .pm = pm, .ud = ud, .P = P, }; - pm->st.globals = pawM_new(P, Scope); + pm->unifier.lex = &lex; + pm->ast = pawA_new_ast(&lex); + pm->symbols.globals = pawM_new(P, Scope); pawX_set_source(&lex, input); - lex.ast = pawK_new_ast(P); + // TODO: AST needs to go on the stack in a Foreign object. We will leak the // AST if an error is thrown between now and when it is freed below. // Use a flag stored somewhere in the object to disambiguate between @@ -1446,17 +1364,14 @@ Closure *pawP_parse(paw_Env *P, paw_Reader input, ParseMemory *pm, const char *n // Compile the module. parse_module(&lex); // pass 1 (source -> AST) - p_check_types(&lex); // pass 2 (AST -> checked AST) + p_check_types(&lex); // pass 2 (AST -> graph) - dump_ast(&lex, lex.ast->stmts); // TODO: remove + pawA_dump_stmt(stdout, lex.pm->ast->stmts->first); // TODO: remove - p_generate_code(&lex); // pass 2 (checked AST -> bytecode) + p_generate_code(&lex); // pass 2 (graph -> bytecode) // Pop the lexer map. The strings it contains should be anchored elsewhere. // Leave the main closure on top of the stack. pawC_stkdec(P, 1); - - // cleanup - pawK_free_ast(P, lex.ast); return lex.main; } diff --git a/src/parse.h b/src/parse.h index 30a215b..6165487 100644 --- a/src/parse.h +++ b/src/parse.h @@ -2,14 +2,20 @@ // This source code is licensed under the MIT License, which can be found in // LICENSE.md. See AUTHORS.md for a list of contributor names. // -// Compilation phases: +// parse.h: compiler entrypoint // -// Pass | Target | Purpose -// ------|--------|----------------------------- -// 1 | code | build ast, register symbols -// 2 | tree | check types -// 3 | tree | generate code +// The compiler converts source code into bytecode that can be run in paw's +// virtual machine. It works in 3 passes: // +// Pass | Input | Output | Purpose +// ------|-------------|-----------|--------------------------- +// 1 | source code | AST | build AST +// 2 | AST | typed AST | build symtab, unify types +// 3 | typed AST | bytecode | generate code +// +// TODO: rename some of these files: parse.* should maybe be called compile.*, and +// it would be nice to have a separate AST module. + #ifndef PAW_PARSE_H #define PAW_PARSE_H @@ -24,7 +30,17 @@ #define limit_error(x, what, limit) \ pawX_error(x, "too many %s (limit is %d)", what, limit) -typedef enum LabeKind { +// TODO: Use this to keep track of dynamic memory +typedef union DeferredAlloc DeferredAlloc; + +typedef enum DeferredKind { + DEFER_SCOPE, +} DeferredKind; + +#define DEFERRED_HEADER DeferredAlloc *prev_alloc; \ + DeferredKind alloc_kind + +typedef enum LabelKind { LBREAK, LCONTINUE, } LabelKind; @@ -45,6 +61,7 @@ typedef struct LabelList { // Represents a single lexical scope typedef struct Scope { + DEFERRED_HEADER; struct Symbol **symbols; int nsymbols; int capacity; @@ -58,41 +75,97 @@ typedef struct SymbolTable { Scope **scopes; int nscopes; int capacity; - - Type base_types[PAW_NTYPES]; } SymbolTable; #define last_scope(t) check_exp((t)->size > 0, (t)->data[(t)->size - 1]) -Scope *pawP_add_scope(Lex *lex, SymbolTable *table); +Scope *pawP_new_scope(Lex *lex, SymbolTable *table); +void pawP_add_scope(Lex *lex, SymbolTable *table, Scope *scope); struct Symbol *pawP_add_symbol(Lex *lex, Scope *table); int pawP_find_symbol(Scope *scope, const String *name); -typedef struct ClsState { - struct ClsState *outer; -} ClsState; +typedef struct Type Type; // type for unifier +typedef struct Unifier Unifier; // unification context +typedef struct UniTable UniTable; // unification table + +typedef Type *(*Unify)(Unifier *, Type *, Type *); + +struct Unifier { + UniTable *table; + Unify unify; + Lex *lex; + int depth; +}; + +#define p_is_bound(U, t) check_exp(y_is_type_var(t) && \ + (t)->var.depth <= (U)->depth, \ + (t)->var.depth == (U)->depth) + +// Apply substitutions to a type TODO: use the other one +Type *pawP_normalize(Unifier *U, Type *a); + +Type *pawP_normalize_(UniTable *table, Type *a); + +// Impose the constraint that type variables 'a' and 'b' are equal +void pawP_unify(Unifier *U, Type *a, Type *b); + +// Create a new type variable +// 'type' must be a GenericType. +void pawP_new_type_var(Unifier *U, Type *type); + +// Generics context handling +void pawP_unifier_enter(Unifier *U, UniTable *table); +UniTable *pawP_unifier_leave(Unifier *U); +void pawP_unifier_replace(Unifier *U, UniTable *table); + +// TODO: Don't leak unification tables! Being lazy right now + +typedef struct GenericState { + struct GenericState *outer; +} GenericState; -typedef struct BlkState { - struct BlkState *outer; - struct Block *bk; // AST representation +typedef struct StructState { + struct StructState *outer; + struct StructDecl *struct_; + struct AstDecl *method; + struct AstDecl *field; + int imethod; + int ifield; +} StructState; + +typedef struct BlockState { + struct BlockState *outer; uint8_t is_loop; + int isymbol; int level; int label0; -} BlkState; +} BlockState; -typedef enum FnKind { - FN_MODULE, - FN_FUNCTION, - FN_METHOD, - FN_INIT, -} FnKind; +typedef struct LocalSlot { + struct Symbol *symbol; + int index; +} LocalSlot; -typedef struct FnState { - struct FnState *outer; // enclosing function +typedef struct LocalStack { + LocalSlot *slots; + int nslots; + int capacity; +} LocalStack; + +typedef enum FuncKind { + FUNC_MODULE, + FUNC_FUNCTION, + FUNC_METHOD, +} FuncKind; + +// TODO: Need to keep track of scopes that get removed from the symbol table and placed in 'scopes' field. +// Either use GC, or link in a 'defer' list. +typedef struct FuncState { + struct FuncState *outer; // enclosing function + struct FuncType *type; // function signature + struct Generator *G; // codegen state SymbolTable scopes; // local scopes - Scope locals; // local variables - FunctionType *sig; // function signature - BlkState *bs; // current block - Lex *lex; // lexical state + LocalStack locals; // local variables + BlockState *bs; // current block Proto *proto; // prototype being built String *name; // name of the function int id; // index in caller's prototype list @@ -101,24 +174,61 @@ typedef struct FnState { int nup; // number of upvalues int nk; // number of constants int nproto; // number of nested functions - int nclasses; // number of nested classes + int nstructs; // number of nested structs int nlines; // number of source lines int pc; // number of instructions - FnKind kind; // type of function -} FnState; + FuncKind kind; // type of function +} FuncState; + +// Unifies structures that require dynamic memory +union DeferredAlloc { + Scope scope; +}; + +#define fn_has_self(kind) (kind >= FUNC_METHOD) -#define fn_has_self(kind) (kind >= FN_METHOD) +#define MAX_BINDERS 512 +// Keeps track of dynamic memory used by the compiler typedef struct ParseMemory { // Buffer for accumulating strings - struct Scratch { + struct CharVec { char *data; int size; int alloc; } scratch; - SymbolTable st; - LabelList ll; + struct { + Binder data[MAX_BINDERS]; + int size; + } temp; + + struct { + FuncSig *data; + int size; + int alloc; + } sigs; + + // Operand stack, for linearizing chains of expressions. + struct { + struct IrOperand **data; + int size; + int alloc; + } opers; + + struct { + struct AstDecl **data; + int size; + int alloc; + } decls; + + struct Ast *ast; + struct Ir *ir; + + DeferredAlloc *defer; + Unifier unifier; + SymbolTable symbols; + LabelList labels; } ParseMemory; void pawP_init(paw_Env *P); diff --git a/src/paw.h b/src/paw.h index 8973c2d..3bd7bae 100644 --- a/src/paw.h +++ b/src/paw.h @@ -82,7 +82,7 @@ int paw_call(paw_Env *P, int argc); #define PAW_TARRAY 6 #define PAW_TENUM 7 #define PAW_TFUNCTION 8 -#define PAW_TCLASS 9 +#define PAW_TSTRUCT 9 #define PAW_TFOREIGN 10 #define PAW_TMODULE 11 #define PAW_NTYPES 12 @@ -187,9 +187,12 @@ void paw_pop(paw_Env *P, int n); // Return the number of values in the current stack frame int paw_get_count(paw_Env *P); +int paw_find_global(paw_Env *P, const char *name); +int paw_find_attr(paw_Env *P, int index, const char *name); + void paw_get_upvalue(paw_Env *P, int ifn, int index); -void paw_get_global(paw_Env *P, const char *name); -void paw_get_attr(paw_Env *P, int index, const char *s); +void paw_get_global(paw_Env *P, int index); +void paw_get_attr(paw_Env *P, int index, int iattr); void paw_get_item(paw_Env *P, int index); void paw_get_itemi(paw_Env *P, int index, paw_Int i); diff --git a/src/rt.c b/src/rt.c index 84f5e3a..e84b8b4 100644 --- a/src/rt.c +++ b/src/rt.c @@ -7,7 +7,7 @@ #include "auxlib.h" #include "call.h" #include "env.h" -#include "gc.h" +#include "gc_aux.h" #include "lex.h" #include "lib.h" #include "map.h" @@ -482,7 +482,7 @@ void pawR_setattr(paw_Env *P, int index) const Value obj = *vm_peek(1); Instance *ins = v_instance(obj); - ins->attrs[index] = val; + ins->attrs[1 + index] = val; vm_pop(2); } @@ -510,14 +510,14 @@ void pawR_init(paw_Env *P) v_set_object(&P->mem_errmsg, errmsg); } -//static CallFrame *super_invoke(paw_Env *P, Class *super, Value name, int argc) +//static CallFrame *super_invoke(paw_Env *P, Struct *super, Value name, int argc) //{ // Value *base = vm_peek(argc); // const Value *method = pawH_get(P, super->attr, name); // if (!method) { // pawR_attr_error(P, name); // } -// // The receiver (subclass) instance + parameters are on top of the stack. +// // The receiver (substruct) instance + parameters are on top of the stack. // return pawC_precall(P, base, *method, argc); //} // @@ -525,12 +525,12 @@ static CallFrame *invoke(paw_Env *P, int index, int argc) { Value *base = vm_peek(argc); Instance *ins = v_instance(*base); - Value method = ins->attrs[index]; - *base = method; // replace object with callable + Struct *struct_ = v_struct(ins->attrs[0]); + Value method = struct_->methods->begin[index]; return pawC_precall(P, base, method.o, argc); } -//static void inherit(paw_Env *P, Class *cls, const Class *super) +//static void inherit(paw_Env *P, Struct *cls, const Struct *super) //{ // // 'copy-down' inheritance // pawH_extend(P, cls->attr, super->attr); @@ -967,7 +967,7 @@ void pawR_getattr(paw_Env *P, int index) const Value obj = *vm_peek(0); Instance *ins = v_instance(obj); - *vm_peek(0) = ins->attrs[index]; + *vm_peek(0) = ins->attrs[1 + index]; } static void getitem_list(paw_Env *P, Value obj, Value key) @@ -1021,16 +1021,16 @@ int pawR_getitem(paw_Env *P, int ttarget, int tindex) static void cannonicalize_slice(size_t len, Value begin, Value end, paw_Int *bout, paw_Int *eout, paw_Int *nout) { // TODO: broken now... int value cannot be null - const paw_Int ibegin = v_is_null(begin) - ? 0 // null acts like 0 - : pawA_abs_index(v_int(begin), len); - const paw_Int iend = v_is_null(end) - ? paw_cast_int(len) // null acts like #a - : pawA_abs_index(v_int(end), len); - // clamp to sequence bounds - *bout = paw_min(paw_max(ibegin, 0), paw_cast_int(len)); - *eout = paw_min(paw_max(iend, 0), paw_cast_int(len)); - *nout = paw_max(0, *eout - *bout); +// const paw_Int ibegin = v_is_null(begin) +// ? 0 // null acts like 0 +// : pawA_abs_index(v_int(begin), len); +// const paw_Int iend = v_is_null(end) +// ? paw_cast_int(len) // null acts like #a +// : pawA_abs_index(v_int(end), len); +// // clamp to sequence bounds +// *bout = paw_min(paw_max(ibegin, 0), paw_cast_int(len)); +// *eout = paw_min(paw_max(iend, 0), paw_cast_int(len)); +// *nout = paw_max(0, *eout - *bout); } void pawR_getslice(paw_Env *P, int ttarget) @@ -1152,20 +1152,25 @@ static paw_Bool should_jump_false(paw_Env *P) #include "debug.h" void pawR_execute(paw_Env *P, CallFrame *cf) { + Closure *fn; const OpCode *pc; const Value *K; - Closure *fn; + Struct **C; + Type **T; top: pc = cf->pc; fn = cf->fn; K = fn->p->k; + C = fn->p->c; + T = P->mod->types; for (;;) { const OpCode opcode = *pc++; - printf("n = %d, %s\n",paw_get_count(P),paw_opcode_name(get_OP(opcode))); - // paw_dump_stack(P); + printf("n = %d, ",(int)(P->top.p-P->stack.p)); + paw_dump_opcode(opcode); + // paw_dump_stack(P); vm_switch(get_OP(opcode)) { @@ -1194,6 +1199,11 @@ void pawR_execute(paw_Env *P, CallFrame *cf) vm_pushv(K[get_U(opcode)]); } + vm_case(PUSHSTRUCT) : + { + vm_pusho(C[get_U(opcode)]); + } + vm_case(UNOP) : { vm_protect(); @@ -1235,78 +1245,36 @@ void pawR_execute(paw_Env *P, CallFrame *cf) pawR_cast_float(P, get_U(opcode)); } - vm_case(NEWCLASS) : + vm_case(NEWMETHOD) : { - vm_protect(); - const paw_Type t = get_U(opcode); - Type *type = P->mod->types[t]; - Class *cls = pawV_new_class(P, type); + Struct *cls = v_struct(*vm_peek(1)); + Value method = *vm_peek(0); + const int u = get_U(opcode); + cls->methods->begin[u] = method; + vm_pop(1); + } - const int nattrs = type->cls.nattrs; - for (int i = 0; i < nattrs; ++i) { - cls->attrs[nattrs - i - 1] = *vm_peek(i); - } - vm_pop(nattrs); - vm_pusho(cls); + vm_case(NEWINSTANCE) : + { + vm_protect(); + const int nfields = get_U(opcode); + Value *pv = vm_peek(0); + Struct *struct_ = v_struct(*pv); + Instance *ins = pawV_new_instance(P, 1 + nfields); + v_set_object(ins->attrs, struct_); + v_set_object(pv, ins); // replace Struct check_gc(P); } - vm_case(INIT) : + vm_case(INITATTR) : { vm_protect(); - const int nattrs = get_U(opcode); - Instance *ins = pawV_new_instance(P, nattrs); - for (int i = 0; i < nattrs; ++i) { - ins->attrs[nattrs - i - 1] = *vm_peek(i); - } - vm_pop(nattrs); - - Class *cls = v_class(*vm_peek(0)); - for (int i = 0; i < nattrs; ++i) { - if (ins->attrs[i].u != 0) { - ins->attrs[i] = cls->attrs[i]; - } - } - v_set_object(vm_peek(0), ins); + const int u = get_U(opcode); + Instance *ins = v_instance(*vm_peek(1)); + ins->attrs[1 + u] = *vm_peek(0); + vm_pop(1); } -// vm_case(GETSUPER) : -// { -// vm_protect(); -// // Attributes on 'super' can only refer to methods, not data fields. -// const Value parent = *vm_peek(0); -// const Value self = *vm_peek(1); -// const Value name = K[get_U(opcode)]; -// vm_pop(1); // pop 'parent' -// -// Class *super = v_class(parent); -// Value *value = pawH_get(P, super->attr, name); -// if (!value) { -// pawR_attr_error(P, name); -// } -// -// Value *pv = vm_push0(); -// Method *mtd = pawV_new_method(P, self, *value); -// v_set_method(pv, mtd); -// } -// -// vm_case(INVOKESUPER) : -// { -// vm_protect(); -// const Value parent = *vm_peek(0); -// const Value name = K[get_A(opcode)]; -// const int argc = get_B(opcode); -// vm_pop(1); // pop 'parent' -// vm_save(); -// -// Class *super = v_class(parent); -// CallFrame *callee = super_invoke(P, super, name, argc); -// if (callee) { -// cf = callee; -// goto top; -// } -// } -// vm_case(GETLOCAL) : { const Value local = cf->base.p[get_U(opcode)]; @@ -1345,6 +1313,15 @@ void pawR_execute(paw_Env *P, CallFrame *cf) pawR_write_global(P, u); } + vm_case(GETMETHOD) : + { + vm_protect(); + const int u = get_U(opcode); + Instance *ins = v_instance(*vm_peek(0)); + Struct *struct_ = v_struct(ins->attrs[0]); + *vm_peek(0) = struct_->methods->begin[u]; + } + vm_case(GETATTR) : { vm_protect(); diff --git a/src/str.c b/src/str.c index 7a241e4..d36f17e 100644 --- a/src/str.c +++ b/src/str.c @@ -3,7 +3,7 @@ // LICENSE.md. See AUTHORS.md for a list of contributor names. #include "str.h" #include "auxlib.h" -#include "gc.h" +#include "gc_aux.h" #include "mem.h" #define st_index(st, h) ((h) & (st->capacity - 1)) diff --git a/src/type.c b/src/type.c index 8e6de22..c944fba 100644 --- a/src/type.c +++ b/src/type.c @@ -6,257 +6,69 @@ #include "mem.h" #include "util.h" -// Helpers for cannonicalizing types. -// Subtypes of arguments to match_types() must be cannonicalized via prior -// invocations of pawY_add_type(). The helpers will not recur into subtypes, -// since there are likely cycles in the type graph. - -static paw_Bool match_types(const Type *a, const Type *b); - -static paw_Bool match_args(Type *const *a, Type *const *b, int nargs) +static void add_basic_type(paw_Env *P, paw_Type code) { - for (int i = 0; i < nargs; ++i) { - if (!pawY_is_same(a[i], b[i])) { - return PAW_FALSE; - } - } - return PAW_TRUE; + Type *r = pawY_type_new(P, P->mod); + r->hdr.kind = TYPE_BASIC; + r->hdr.def = code; } -static paw_Bool match_fields(const NamedField *a, const NamedField *b, int nattrs) +void pawY_init(paw_Env *P) { - for (int i = 0; i < nattrs; ++i) { - if (a[i].flags != b[i].flags || - !pawS_eq(a[i].name, b[i].name) || - !pawY_is_same(a[i].type, b[i].type)) { - return PAW_FALSE; - } - } - return PAW_TRUE; -} + P->mod = pawY_module_new(P); -static paw_Bool match_signature(const FunctionType *a, const FunctionType *b) -{ - return a->flags == b->flags && - pawY_is_same(a->ret, b->ret) && - a->nargs == b->nargs && - match_args(a->args, b->args, a->nargs); + add_basic_type(P, PAW_TUNIT); + add_basic_type(P, PAW_TBOOL); + add_basic_type(P, PAW_TINT); + add_basic_type(P, PAW_TFLOAT); + add_basic_type(P, PAW_TSTRING); } -static paw_Bool match_class(const ClassType *a, const ClassType *b) +void pawY_uninit(paw_Env *P) { - return a->flags == b->flags && - pawS_eq(a->name, b->name) && - pawY_is_same(a->super, b->super) && - a->nattrs == b->nattrs && - match_fields(a->attrs, b->attrs, a->nattrs); + pawY_module_free(P, P->mod); } -static paw_Bool match_types(const Type *a, const Type *b) +Module *pawY_module_new(paw_Env *P) { - if (a == NULL) { - return b == NULL; - } else if (b == NULL) { - return a == NULL; - } else if (y_kind(a) != y_kind(b)) { - return PAW_FALSE; - } - switch (y_kind(a)) { - case TYPE_SIGNATURE: - return match_signature(&a->sig, &b->sig); - case TYPE_CLASS: - return match_class(&a->cls, &b->cls); - default: - return y_id(a) == y_id(b); - } + return pawM_new(P, Module); } -//paw_Bool pawY_is_similar(Type *type, Type *tag) -//{ -// paw_assert(type && tag); -// switch (t_type(tag)) { -// case PAW_TBOOL: -// case PAW_TINT: -// case PAW_TFLOAT: -// return type->code == PAW_TBOOL || -// type->code == PAW_TINT || -// type->code == PAW_TFLOAT; -// default: -// // types are internalized -// return type == tag; -// } -//} -// -//int pawY_common(Type *a, Type *b, Type **out) -//{ -// if (!pawY_is_similar(a, b)) { -// return -1; -// } -// switch (a->code) { -// case PAW_TBOOL: -// *out = b; -// break; -// case PAW_TINT: -// // float takes precedence over int -// *out = b->code == PAW_TFLOAT ? b : a; -// break; -// default: -// // 'a' is float or other (a == b for other types) -// *out = a; -// } -// return 0; -//} -// -//Type *pawY_unwrap(paw_Env *P, Type *t) -//{ -// Type *inner = NULL; -// switch (t_base(t)) { -// case PAW_TSTRING: -// inner = e_string(P); -// break; -// case PAW_TARRAY: -// inner = t->a.elem; -// break; -// case PAW_TMAP: -// inner = t->m.value; -// break; -// case PAW_TCLASS: -// // TODO: Lookup return value of __getitem attr, if it exists. need type info for 'first' and 'second' to determine overload -// paw_assert(0); -// break; -// default: -// paw_assert(0); -//// pawY_error(P, "expected container or instance type"); -// } -// return inner; -//} -// -//static paw_Bool same_vecs(Type **va, int na, Type **vb, int nb) -//{ -// if (na != nb) { -// return PAW_FALSE; -// } -// for (int i = 0; i < na; ++i) { -// if (!pawY_is_same(va[i], vb[i])) { -// return PAW_FALSE; -// } -// } -// return PAW_TRUE; -//} -// -//static paw_Bool same_attrs(Attribute *va, int na, Attribute *vb, int nb) -//{ -// if (na != nb) { -// return PAW_FALSE; -// } -// for (int i = 0; i < na; ++i) { -// if (!pawS_eq(va[i].name, vb[i].name) || -// !pawY_is_same(va[i].attr, vb[i].attr)) { -// return PAW_FALSE; -// } -// } -// return PAW_TRUE; -//} -// -//// Return PAW_TRUE if type tags 'a' and 'b' are the same, PAW_FALSE -//// otherwise -//// Helper for internalizing compound types: either 'a' or 'b' is a -//// copy of the Type struct in automatic memory, and the other is -//// internalized. Sub types (parameters, attributes, etc.) are already -//// internalized. -//static paw_Bool same_tags(Type *a, Type *b) -//{ -// if (t_base(a) != t_base(b)) { -// return PAW_FALSE; -// } -// -// switch (t_kind(a)) { -// case PAW_TFUNCTION: -// if (pawY_is_same(a->f.ret, b->f.ret) && -// same_vecs(a->f.param, a->f.nparam, b->f.param, b->f.nparam)) { -// return PAW_TRUE; -// } -// break; -// case PAW_TCLASS: -// if (pawS_eq(a->c.name, b->c.name) && -// same_attrs(a->c.attrs, a->c.nattrs, b->c.attrs, b->c.nattrs)) { -// return PAW_TRUE; -// } -// break; -// default: -// break; -// } -// return PAW_FALSE; -//} -// -//static Type *find_compound_type(paw_Env *P, Type *type) -//{ -// struct TypeVec *tv = &P->tv; -// for (int i = 0; i < tv->size; ++i) { -// Type *t = tv->data[i]; -// if (same_tags(t, type)) { -// return t; -// } -// } -// return NULL; -//} - -static void set_base_type(paw_Env *P, const char *name, int id, TypeKind kind) +static void free_type(paw_Env *P, Type *type) { - String *str = pawS_new_fixed(P, name); - str->flag = -id - 1; // encode type index - Type *t = pawY_new_type(P); - P->mod->types[id] = t; - t->hdr.kind = kind; - t->hdr.id = id; - ++P->mod->ntypes; -} - -void pawY_init(paw_Env *P) -{ - P->mod = pawM_new(P, ModuleType); - P->mod->types = pawM_new_vec(P, PAW_NTYPES, Type *); - P->mod->capacity = PAW_NTYPES; - - set_base_type(P, "unit", PAW_TUNIT, TYPE_PRIMITIVE); - set_base_type(P, "bool", PAW_TBOOL, TYPE_PRIMITIVE); - set_base_type(P, "int", PAW_TINT, TYPE_PRIMITIVE); - set_base_type(P, "float", PAW_TFLOAT, TYPE_PRIMITIVE); - set_base_type(P, "string", PAW_TSTRING, TYPE_PRIMITIVE); - set_base_type(P, "array", PAW_TARRAY, 0); - set_base_type(P, "enum", PAW_TENUM, 0); - set_base_type(P, "tuple", PAW_TTUPLE, 0); - set_base_type(P, "function", PAW_TFUNCTION, TYPE_SIGNATURE); - set_base_type(P, "class", PAW_TCLASS, TYPE_CLASS); - set_base_type(P, "foreign", PAW_TFOREIGN, TYPE_CLASS); - set_base_type(P, "module", PAW_TMODULE, 0); - - paw_assert(P->mod->ntypes == PAW_NTYPES); -} + switch (y_kind(type)) { + case TYPE_FUNC: + pawM_free_vec(P, type->func.params.types, type->func.params.count); + pawM_free_vec(P, type->func.types.types, type->func.types.count); + break; + case TYPE_ADT: + pawM_free_vec(P, type->adt.types.types, type->adt.types.count); + break; + case TYPE_MODULE: + pawY_module_free(P, &type->mod); + break; + default: + break; -void pawY_uninit(paw_Env *P) -{ -// pawM_free_vec(P, P->tv.data, P->tv.alloc); + } + pawM_free(P, type); } -Type *pawY_new_type(paw_Env *P) +void pawY_module_free(paw_Env *P, Module *mod) { - return pawM_new(P, Type); + for (int i = 0; i < mod->ntypes; ++i) { + free_type(P, mod->types[i]); + } + pawM_free_vec(P, mod->types, mod->capacity); } -Type *pawY_add_type(paw_Env *P, ModuleType *mod, const Type *type) +Type *pawY_type_new(paw_Env *P, Module *mod) { - for (int i = 0; i < mod->ntypes; ++i) { - if (match_types(type, mod->types[i])) { - return mod->types[i]; - } - } pawM_grow(P, mod->types, mod->ntypes, mod->capacity); - const int index = mod->ntypes++; - Type **ptarget = &mod->types[index]; - *ptarget = pawM_new(P, Type); // cannonical version - **ptarget = *type; // copy dummy data - (*ptarget)->hdr.id = index; + const int code = mod->ntypes++; + Type **ptarget = &mod->types[code]; + *ptarget = pawM_new(P, Type); + (*ptarget)->hdr.def = code; // TODO: Not correct... return *ptarget; } diff --git a/src/type.h b/src/type.h index 3dc90ab..a2eaa3c 100644 --- a/src/type.h +++ b/src/type.h @@ -1,125 +1,110 @@ // Copyright (c) 2024, The paw Authors. All rights reserved. // This source code is licensed under the MIT License, which can be found in // LICENSE.md. See AUTHORS.md for a list of contributor names. +// +// type.h: Type system for paw +// +// Structures in this file are created during the second pass, used for type +// checking, and then stored for RTTI purposes. Each type is represented by a +// unique Type structure in its containing Module. #ifndef PAW_TYPE_H #define PAW_TYPE_H +#include "opcode.h" #include "paw.h" +#include "str.h" typedef struct Type Type; - -typedef enum TypeKind { - TYPE_PRIMITIVE, - TYPE_SIGNATURE, - TYPE_CLASS, - TYPE_MODULE, +typedef struct GenericBound GenericBound; +typedef uint16_t DefId; + +typedef enum TypeKind { // type->... + TYPE_BASIC, // hdr + TYPE_VAR, // var + TYPE_ADT, // adt + TYPE_FUNC, // func + TYPE_MODULE, // mod } TypeKind; #define TYPE_HEADER \ - Type *next; \ - TypeKind kind: 8; \ - uint8_t flags; \ - uint16_t id - + DefId def; \ + TypeKind kind: 8 typedef struct TypeHeader { TYPE_HEADER; } TypeHeader; -#define FIELD_IS_METHOD 1 - -typedef struct NamedField { - struct String *name; // field name - Type *type; // field type - uint8_t flags; // FIELD_* flags -} NamedField; - -typedef struct ClassType { +typedef struct TypeVar { + TYPE_HEADER; + String *name; + int depth; + int index; +} TypeVar; + +typedef struct Binder { + Type **types; + int count; +} Binder; + +// Represents a function signature +// Note that the type variables for a function signature do not participate in +// unification (they are not part of the function type). +typedef struct FuncSig { TYPE_HEADER; // common initial sequence - struct String *name; // name of class - Type *super; // type of 'super' - Type *self; // type of 'self' - NamedField *attrs; // attributes - int nattrs; // number of attributes -} ClassType; - -typedef struct FunctionType { + DefId base; // base template or 'def' + Type *return_; // return type + Binder params; // parameter types + Binder types; // type variables +} FuncSig; + +// Represents a structure or enumeration type +typedef struct Adt { TYPE_HEADER; // common initial sequence - struct String *name; // name of function - Type *ret; // return type - Type **args; // argument types - int nargs; // number of arguments -} FunctionType; - -typedef struct ModuleType { + DefId target; // location of base ADT definition + Binder types; // type variables +} Adt; + +// Represents the type of a Paw module +// Note that basic types ('int', 'float', etc.) are created only once, at the +// start of the root module's type Binder. Included modules reference these +// Type objects from the root. +typedef struct Module { TYPE_HEADER; // common initial sequence - struct ModuleType *includes; // included modules - FunctionType *functions; // global function types - ClassType *classes; // global class types - Type **types; // vector of unique types - int ntypes; // number of unique types - int capacity; // type vector capacity -} ModuleType; + struct Module *includes; // included modules + Type **types; + int ntypes; + int capacity; +} Module; struct Type { union { TypeHeader hdr; - ClassType cls; - FunctionType sig; - ModuleType mod; + Adt adt; + TypeVar var; + FuncSig func; + Module mod; }; }; -#define cast_type(x) ((Type *)(x)) -Type *pawY_add_type(paw_Env *P, ModuleType *mod, const Type *type); - -#define y_id(t) ((t)->hdr.id) +#define y_cast(x) ((Type *)(x)) +#define y_code(t) ((t)->hdr.def) #define y_kind(t) ((t)->hdr.kind) -#define y_cast(t, T) ((T *)&(t)->hdr) -#define y_is_unit(t) (y_id(t) == PAW_TUNIT) -#define y_is_bool(t) (y_id(t) == PAW_TBOOL) -#define y_is_int(t) (y_id(t) == PAW_TINT) -#define y_is_float(t) (y_id(t) == PAW_TFLOAT) -#define y_is_scalar(t) (y_id(t) < PAW_TSTRING) -#define y_is_string(t) (y_id(t) == PAW_TSTRING) -#define y_is_array(t) (y_id(t) == PAW_TARRAY) -#define y_is_tuple(t) (y_id(t) == PAW_TTUPLE) -#define y_is_primitive(t) (y_id(t) <= PAW_TSTRING) -#define y_is_foreign(t) (y_id(t) == PAW_TFOREIGN) +#define y_is_unit(t) (y_is_basic(t) && y_code(t) == PAW_TUNIT) +#define y_is_bool(t) (y_is_basic(t) && y_code(t) == PAW_TBOOL) +#define y_is_int(t) (y_is_basic(t) && y_code(t) == PAW_TINT) +#define y_is_float(t) (y_is_basic(t) && y_code(t) == PAW_TFLOAT) +#define y_is_string(t) (y_is_basic(t) && y_code(t) == PAW_TSTRING) -#define y_is_class(t) (y_kind(t) == TYPE_CLASS) -#define y_is_function(t) (y_kind(t) == TYPE_SIGNATURE) +#define y_is_basic(t) (y_kind(t) == TYPE_BASIC) +#define y_is_adt(t) (y_kind(t) == TYPE_ADT) +#define y_is_func(t) (y_kind(t) == TYPE_FUNC) +#define y_is_type_var(t) (y_kind(t) == TYPE_VAR) #define y_is_module(t) (y_kind(t) == TYPE_MODULE) -static inline const char *pawY_name(int type) -{ - switch (type) { - case PAW_TBOOL: - return "boolean"; - case PAW_TINT: - return "integer"; - case PAW_TFLOAT: - return "float"; - case PAW_TSTRING: - return "string"; - case PAW_TFUNCTION: - return "function"; - case PAW_TCLASS: - return "class"; - case PAW_TFOREIGN: - return "foreign"; - case PAW_TMODULE: - return "module"; - default: - return "?"; - } -} - -#define pawY_is_same(a, b) ((a) == (b)) - void pawY_init(paw_Env *P); void pawY_uninit(paw_Env *P); - -Type *pawY_new_type(paw_Env *P); +Module *pawY_module_new(paw_Env *P); +void pawY_module_free(paw_Env *P, Module *mod); +Type *pawY_type_new(paw_Env *P, Module *mod); #endif // PAW_TYPE_H diff --git a/src/unify.c b/src/unify.c new file mode 100644 index 0000000..9b411e0 --- /dev/null +++ b/src/unify.c @@ -0,0 +1,298 @@ +// Copyright (c) 2024, The paw Authors. All rights reserved. +// This source code is licensed under the MIT License, which can be found in +// LICENSE.md. See AUTHORS.md for a list of contributor names. +// +// unify.c: type unification module + +#include "code.h" +#include "parse.h" +#include "mem.h" + +#define PAW_DEBUG_UNIFY 1 + +#ifdef PAW_DEBUG_UNIFY +# define debug_log(what, ...) log_unification(what, __VA_ARGS__) +#else +# define debug_log(what, ...) +#endif + +#define unpack_var(v) \ + (TypeVar){ \ + .type = (v)->type, \ + .resolved = (v)->resolved, \ + } +#define pack_type(t) \ + (TypeVar){ \ + .type = (t), \ + .resolved = PAW_TRUE, \ + } + +typedef struct UniVar { + struct UniVar *parent; + Type *type; + int rank; + int depth; + paw_Bool resolved: 1; +} UniVar; + +typedef struct UniTable { + struct UniTable *outer; + struct UniVar **vars; // vector of type variables + int nvars; // number of type variables + int capacity; // capacity of vector +} UniTable; + +static void dump_type(FILE *out, const Type *type) +{ + switch (y_kind(type)) { + case TYPE_BASIC: + switch (type->hdr.def) { + case PAW_TUNIT: + fprintf(out, "()"); + break; + case PAW_TBOOL: + fprintf(out, "bool"); + break; + case PAW_TINT: + fprintf(out, "int"); + break; + case PAW_TFLOAT: + fprintf(out, "float"); + break; + default: + paw_assert(type->hdr.def == PAW_TSTRING); + fprintf(out, "string"); + } + break; + case TYPE_FUNC: + fprintf(out, "fn("); + for (int i = 0; i < type->func.params.count; ++i) { + dump_type(out, type->func.params.types[i]); + if (i < type->func.params.count - 1) { + fprintf(out, ", "); + } + } + fprintf(out, ") -> "); + dump_type(out, type->func.return_); + break; + case TYPE_ADT: + fprintf(out, "%d", type->adt.target); // TODO: Print the name + if (type->adt.types.count > 0) { + fprintf(out, "["); + const Binder *binder = &type->adt.types; + for (int i = 0; i < binder->count; ++i) { + dump_type(out, binder->types[i]); + if (i < binder->count - 1) { + fprintf(out, ", "); + } + } + fprintf(out, "]"); + } + break; + default: + paw_assert(y_is_type_var(type)); + fprintf(out, "?%s", type->var.name->text); + } +} + +static void log_unification(const char *what, Type *a, Type *b) +{ + paw_assert(a && b); + printf("%s: ", what); + dump_type(stdout, a); + fprintf(stdout, " = "); + dump_type(stdout, b); + fprintf(stdout, "\n"); +} + +static UniVar *find_root(UniVar *uvar) +{ + UniVar *up = uvar->parent; + if (up != uvar) { + up = uvar->parent = find_root(up); + } + return up; +} + +static void link_roots(UniVar *a, UniVar *b) +{ + if (a->rank < b->rank) { + a->parent = b; + } else { + b->parent = a; + a->rank += a->rank == b->rank; + } +} + +static void unify_var_type(UniVar *uvar, Type *type) +{ + debug_log("unify_var_type", uvar->type, type); + + uvar->type = type; +} + +static void unify_var_var(UniVar *a, UniVar *b) +{ + a = find_root(a); + b = find_root(b); + + debug_log("unify_var_var", a->type, b->type); + + if (a != b) { + link_roots(a, b); + } +} + +// Check if a type is resolved, that is, if it is a concrete type or a free +// type variable +#define is_resolved(U, v) (!y_is_type_var(v) || !p_is_bound(U, v)) + +Type *pawP_normalize(Unifier *U, Type *type) +{ + if (!is_resolved(U, type)) { + const int index = type->var.index; + UniVar *uvar = U->table->vars[index]; + uvar = find_root(uvar); // normalize + return uvar->type; + } + return type; +} + +static void unify_binders(Unifier *U, Binder *a, Binder *b) +{ + if (a->count != b->count) { + pawX_error(U->lex, "arity mismatch"); + } + for (int i = 0; i < a->count; ++i) { + pawP_unify(U, a->types[i], b->types[i]); + } +} + +static void unify_adt(Unifier *U, Adt *a, Type *b) +{ + if (y_is_type_var(b)) { + return; // ignore free type variables + } else if (!y_is_adt(b)) { + pawX_error(U->lex, "expected struct or enum type"); + } else if (a->target != b->adt.target) { + pawX_error(U->lex, "data types are incompatible"); + } + unify_binders(U, &a->types, &b->adt.types); +} + +static void unify_func_sig(Unifier *U, FuncSig *a, Type *b) +{ + if (!y_is_func(b)) { + pawX_error(U->lex, "expected function type"); + } + // NOTE: 'types' field not unified (not part of function signature) + unify_binders(U, &a->params, &b->func.params); + pawP_unify(U, a->return_, b->func.return_); +} + +static Type *unify_basic(Unifier *U, Type *a, Type *b) +{ + // basic types are cannonicalized + if (a != b) { + pawX_error(U->lex, "basic types are incompatible"); + } + return a; +} + +static void unify_types(Unifier *U, Type *a, Type *b) +{ + debug_log("unify_types", a, b); + if (y_is_type_var(a) || y_is_type_var(b)) { + // Don't worry about free generics, they will be type checked during + // instantiation of their containing template. + } else if (y_is_func(a)) { + unify_func_sig(U, &a->func, b); + } else if (y_is_adt(a)) { + unify_adt(U, &a->adt, b); + } else { + unify_basic(U, a, b); + } +} + +// TODO: Indicate failure rather than throw errors inside, let the caller throw, for better error messages +void pawP_unify(Unifier *U, Type *a, Type *b) +{ + UniTable *ut = U->table; + + // Types may have already been unified. Make sure to always use the + // cannonical type. + a = pawP_normalize(U, a); + b = pawP_normalize(U, b); + if (!is_resolved(U, a)) { + UniVar *va = ut->vars[a->var.index]; + if (!is_resolved(U, b)) { + UniVar *vb = ut->vars[b->var.index]; + unify_var_var(va, vb); + } else { + unify_var_type(va, b); + } + } else if (!is_resolved(U, b)) { + UniVar *vb = ut->vars[b->var.index]; + unify_var_type(vb, a); + } else { + // Both types are known: make sure they are compatible. This is the + // only time pawP_unify can encounter an error. + unify_types(U, a, b); + } +} + +void pawP_new_type_var(Unifier *U, Type *type) +{ + debug_log("new_type_var", type, type); + + paw_Env *P = env(U->lex); + UniTable *table = U->table; + + // add a new set to the forest + pawM_grow(P, table->vars, table->nvars, table->capacity); + UniVar *uvar = pawM_new(P, UniVar); + const int index = table->nvars++; + table->vars[index] = uvar; + + // set contains only 'type' + uvar->parent = uvar; + uvar->type = type; + + type->var.depth = U->depth; + type->var.index = index; +} + +void pawP_unifier_enter(Unifier *U, UniTable *table) +{ + paw_Env *P = env(U->lex); + if (table == NULL) { + table = pawM_new(P, UniTable); + } + table->outer = U->table; + U->table = table; + ++U->depth; +} + +static void free_uni_table(Unifier *U, UniTable *table) +{ + paw_Env *P = env(U->lex); + for (int i = 0; i < table->nvars; ++i) { + pawM_free(P, table->vars[i]); + } + pawM_free(P, table); +} + +void pawP_unifier_replace(Unifier *U, UniTable *table) +{ + table->outer = U->table->outer; + free_uni_table(U, U->table); + U->table = table; +} + +UniTable *pawP_unifier_leave(Unifier *U) +{ + UniTable *table = U->table; + U->table = U->table->outer; + --U->depth; + return table; +} diff --git a/src/value.c b/src/value.c index b8bed37..6efe963 100644 --- a/src/value.c +++ b/src/value.c @@ -5,7 +5,7 @@ #include "array.h" #include "bigint.h" -#include "gc.h" +#include "gc_aux.h" #include "map.h" #include "mem.h" #include "str.h" @@ -50,15 +50,20 @@ const char *pawV_to_string(paw_Env *P, Value v, paw_Type type, size_t *nout) case PAW_TSTRING: pawC_pushv(P, v); // copy break; - case PAW_TBOOL: - pawC_pushv(P, pawE_cstr(P, v_true(v) ? CSTR_TRUE : CSTR_FALSE)); - break; case PAW_TINT: int_to_string(P, v_int(v)); break; case PAW_TFLOAT: float_to_string(P, v_float(v)); break; + case PAW_TBOOL: { + Value v; + v_set_object(&v, pawE_cstr(P, v_true(v) + ? CSTR_TRUE + : CSTR_FALSE)); + pawC_pushv(P, v); + break; + } default: return NULL; } @@ -88,8 +93,8 @@ const char *pawV_name(ValueKind kind) return "array"; case VMAP: return "map"; - case VCLASS: - return "class"; + case VSTRUCT: + return "struct"; case VINSTANCE: return "instance"; case VMETHOD: @@ -167,6 +172,19 @@ void pawV_unlink_upvalue(UpValue *u) } } +Array_ *pawV_new_array(paw_Env *P, int nelems) +{ + Array_ *arr = pawM_new_flex(P, Array_, cast_size(nelems), + sizeof(arr->elems[0])); + pawG_add_object(P, cast_object(arr), VARRAY); + return arr; +} + +void pawV_free_array(paw_Env *P, Array_ *arr, int nelems) +{ + pawM_free_flex(P, arr, nelems, sizeof(arr->elems[0])); +} + Closure *pawV_new_closure(paw_Env *P, int nup) { // Tack on enough space to store 'nup' pointers to UpValue. @@ -182,36 +200,42 @@ void pawV_free_closure(paw_Env *P, Closure *f) pawM_free_flex(P, f, f->nup, sizeof(f->up[0])); } -Class *pawV_new_class(paw_Env *P, Type *type) +Struct *pawV_new_struct(paw_Env *P, Value *pv) +{ + Struct *struct_ = pawM_new(P, Struct); + v_set_object(pv, struct_); // anchor + struct_->methods = pawA_new(P); + pawG_add_object(P, cast_object(struct_), VSTRUCT); + return struct_; +} + +void pawV_free_struct(paw_Env *P, Struct *struct_) { - Class *cls = pawM_new_flex(P, Class, cast_size(type->cls.nattrs), - sizeof(cls->attrs[0])); - pawG_add_object(P, cast_object(cls), VCLASS); - return cls; + pawM_free(P, struct_); } -Instance *pawV_new_instance(paw_Env *P, int nattrs) +Instance *pawV_new_instance(paw_Env *P, int nfields) { - Instance *ins = pawM_new_flex(P, Instance, cast_size(nattrs), + Instance *ins = pawM_new_flex(P, Instance, cast_size(nfields), sizeof(ins->attrs[0])); pawG_add_object(P, cast_object(ins), VINSTANCE); return ins; } -void pawV_free_instance(paw_Env *P, Instance *ins, Type *type) +void pawV_free_instance(paw_Env *P, Instance *ins, int nfields) { - pawM_free_flex(P, ins, cast_size(type->cls.nattrs), sizeof(ins->attrs[0])); + pawM_free_flex(P, ins, cast_size(nfields), sizeof(ins->attrs[0])); } Value *pawV_find_attr(Value *attrs, String *name, Type *type) { - const ClassType *cls = &type->cls; - for (int i = 0; i < cls->nattrs; ++i) { - NamedField *a = &cls->attrs[i]; - if (pawS_eq(a->name, name)) { - return &attrs[i]; - } - } +// const CompositeType *cls = &type->cls; +// for (int i = 0; i < cls->nattrs; ++i) { +// NamedField *a = &cls->attrs[i]; +// if (pawS_eq(a->name, name)) { +// return &attrs[i]; +// } +// } return NULL; } @@ -220,22 +244,6 @@ static void clear_attrs(Value *pv, int nattrs) memset(pv, 0, cast_size(nattrs) * sizeof(*pv)); } -//ClassType *pawV_new_class(paw_Env *P) -//{ -// Class *c = pawM_new(P, Class); -// pawG_add_object(P, cast_object(c), VCLASS); -// c->fields = pawM_new(P, Map); -// c->methods = pawM_new(P, Map); -// return c; -//} -// -//void pawV_free_class(paw_Env *P, Class *c) -//{ -// pawH_free(P, c->fields); -// pawH_free(P, c->methods); -// pawM_free(P, c); -//} - Method *pawV_new_method(paw_Env *P, Value self, Value call) { Method *mtd = pawM_new(P, Method); @@ -250,12 +258,13 @@ void pawV_free_method(paw_Env *P, Method *m) pawM_free(P, m); } -Native *pawV_new_native(paw_Env *P, String *name, paw_Function call) +Native *pawV_new_native(paw_Env *P, paw_Function func, int nup) { - Native *nat = pawM_new(P, Native); + // TODO: nup > UINT16_MAX, check it or assert? + Native *nat = pawM_new_flex(P, Native, nup, sizeof(nat->up[0])); pawG_add_object(P, cast_object(nat), VNATIVE); - nat->name = name; - nat->call = call; + nat->func = func; + nat->nup = nup; return nat; } @@ -264,13 +273,13 @@ void pawV_free_native(paw_Env *P, Native *nat) pawM_free(P, nat); } -Foreign *pawV_push_foreign(paw_Env *P, size_t size, int nattrs) +Foreign *pawV_push_foreign(paw_Env *P, size_t size, int nfields) { if (size > PAW_SIZE_MAX) { pawM_error(P); } Value *pv = pawC_push0(P); - Foreign *ud = pawM_new_flex(P, Foreign, nattrs, sizeof(ud->attrs[0])); + Foreign *ud = pawM_new_flex(P, Foreign, nfields, sizeof(ud->attrs[0])); pawG_add_object(P, cast_object(ud), VFOREIGN); v_set_object(pv, ud); // anchor ud->size = size; @@ -278,14 +287,14 @@ Foreign *pawV_push_foreign(paw_Env *P, size_t size, int nattrs) // Allocate space to hold 'size' bytes of foreign data. ud->data = pawM_new_vec(P, size, char); } - clear_attrs(ud->attrs, nattrs); + clear_attrs(ud->attrs, nfields); return ud; } -void pawV_free_foreign(paw_Env *P, Foreign *ud, Type *type) +void pawV_free_foreign(paw_Env *P, Foreign *ud, int nfields) { - pawM_free_vec(P, (char *)ud->data, ud->size); - pawM_free_flex(P, ud, cast_size(type->cls.nattrs), sizeof(ud->attrs[0])); + pawM_free_vec(P, (char *)ud->data, ud->size); // TODO + pawM_free_flex(P, ud, cast_size(nfields), sizeof(ud->attrs[0])); } paw_Bool pawV_truthy(Value v, paw_Type type) @@ -303,7 +312,7 @@ paw_Bool pawV_truthy(Value v, paw_Type type) // case PAW_TMAP: // return pawH_length(v_map(v)) > 0; default: - return !v_is_null(v); + return PAW_FALSE; } } diff --git a/src/value.h b/src/value.h index 04420eb..331dd2f 100644 --- a/src/value.h +++ b/src/value.h @@ -5,22 +5,19 @@ #define PAW_VALUE_H #include "paw.h" -#include "type.h" #include "util.h" // Initializer for iterator state variables #define PAW_ITER_INIT -1 #define f_is_nan(v) ((v).f != (v).f) -#define v_is_null(v) ((v).u == 0) #define v_true(v) ((v).u != 0) #define v_false(v) (!v_true(v)) #define v_int(v) ((v).i) #define v_float(v) ((v).f) -#define v_check(v) check_exp(!v_is_null(v), v) -#define v_object(v) (v_check(v).o) +#define v_object(v) ((v).o) #define v_native(v) (o_native(v_object(v))) #define v_proto(v) (o_proto(v_object(v))) #define v_closure(v) (o_closure(v_object(v))) @@ -28,7 +25,7 @@ #define v_string(v) (o_string(v_object(v))) #define v_text(v) (v_string(v)->text) #define v_instance(v) (o_instance(v_object(v))) -#define v_class(v) (o_class(v_object(v))) +#define v_struct(v) (o_struct(v_object(v))) #define v_method(v) (o_method(v_object(v))) #define v_foreign(v) (o_foreign(v_object(v))) @@ -45,7 +42,7 @@ #define o_is_closure(o) (o_kind(o) == VCLOSURE) #define o_is_upvalue(o) (o_kind(o) == VUPVALUE) #define o_is_instance(o) (o_kind(o) == VINSTANCE) -#define o_is_class(o) (o_kind(o) == VCLASS) +#define o_is_struct(o) (o_kind(o) == VSTRUCT) #define o_is_method(o) (o_kind(o) == VMETHOD) #define o_is_foreign(o) (o_kind(o) == VFOREIGN) @@ -55,7 +52,7 @@ #define o_closure(o) check_exp(o_is_closure(o), (Closure *)(o)) #define o_upvalue(o) check_exp(o_is_upvalue(o), (UpValue *)(o)) #define o_instance(o) check_exp(o_is_instance(o), (Instance *)(o)) -#define o_class(o) check_exp(o_is_class(o), (Class *)(o)) +#define o_struct(o) check_exp(o_is_struct(o), (Struct *)(o)) #define o_method(o) check_exp(o_is_method(o), (Method *)(o)) #define o_foreign(o) check_exp(o_is_foreign(o), (Foreign *)(o)) @@ -75,6 +72,7 @@ typedef union Value { paw_Int i; paw_Float f; Object *o; + void *p; } Value; typedef Value *StackPtr; @@ -96,7 +94,7 @@ typedef enum ValueKind { VSTRING, VARRAY, VMAP, - VCLASS, + VSTRUCT, VINSTANCE, VMETHOD, VFOREIGN, @@ -129,7 +127,7 @@ static inline int pawV_type(ValueKind vt) case VMETHOD: return PAW_TFUNCTION; case VINSTANCE: - return PAW_TCLASS; + return PAW_TSTRUCT; case VFOREIGN: return PAW_TFOREIGN; default: @@ -186,8 +184,8 @@ typedef struct String { const char *pawV_to_string(paw_Env *P, Value v, paw_Type type, size_t *nout); typedef struct VarDesc { - Type *type; String *name; + paw_Type code; } VarDesc; typedef struct Proto { @@ -217,9 +215,8 @@ typedef struct Proto { int line; } *lines; - struct Class *classes; - Value *k; // constants + struct Struct **c; // nested structs struct Proto **p; // nested functions int nup; // number of upvalues int nlines; // number of lines @@ -227,7 +224,7 @@ typedef struct Proto { int nk; // number of constants int argc; // number of fixed parameters int nproto; // number of nested functions - int nclasses; + int nc; // number of nested structs } Proto; Proto *pawV_new_proto(paw_Env *P); @@ -266,13 +263,21 @@ void pawV_free_closure(paw_Env *P, Closure *c); typedef struct Native { GC_HEADER; - paw_Function call; - String *name; + uint16_t nup; + paw_Function func; + UpValue *up[]; } Native; -Native *pawV_new_native(paw_Env *P, String *name, paw_Function call); +Native *pawV_new_native(paw_Env *P, paw_Function func, int nup); -typedef struct Array { +typedef struct Array_ { + GC_HEADER; + Value elems[]; +} Array_; + +Array_ *pawV_new_array(paw_Env *P, int nelems); + +typedef struct Array { // TODO: Call this Vector GC_HEADER; Value *begin; Value *end; @@ -287,22 +292,27 @@ typedef struct Map { size_t capacity; } Map; -typedef struct Class { +typedef struct Struct { GC_HEADER; // common members for GC - Value attrs[]; // fixed array of attributes -} Class; + paw_Type type; // index in module type list + VarDesc *field_info; // RTTI for fields + VarDesc *method_info; // RTTI for methods + Array *methods; // functions with 'self' (Array[Closure]) +} Struct; -Class *pawV_new_class(paw_Env *P, Type *type); +Struct *pawV_new_struct(paw_Env *P, Value *pv); +void pawV_free_struct(paw_Env *P, Struct *struct_); -// Instance of a class +// Instance of a struct typedef struct Instance { GC_HEADER; // common members for GC Value attrs[]; // fixed array of attributes + //Value fields[]; // data fields, inc. superstruct } Instance; -Instance *pawV_new_instance(paw_Env *P, int nattrs); -void pawV_free_instance(paw_Env *P, Instance *ins, Type *type); -Value *pawV_find_attr(Value *attrs, String *name, Type *type); +Instance *pawV_new_instance(paw_Env *P, int nfields); +void pawV_free_instance(paw_Env *P, Instance *ins, int nfields); +//Value *pawV_find_attr(Value *attrs, String *name, Type *type); // Method bound to an instance typedef struct Method { @@ -314,15 +324,19 @@ typedef struct Method { Method *pawV_new_method(paw_Env *P, Value self, Value call); void pawV_free_method(paw_Env *P, Method *); +#define BOX_PARSE_MAP 1 +#define BOX_PARSE_BUFFER 2 + typedef struct Foreign { GC_HEADER; + uint8_t flags; void *data; size_t size; Value attrs[]; // fixed array of attributes } Foreign; -Foreign *pawV_push_foreign(paw_Env *P, size_t size, int nattrs); -void pawV_free_foreign(paw_Env *P, Foreign *ud, Type *type); +Foreign *pawV_push_foreign(paw_Env *P, size_t size, int nfields); +void pawV_free_foreign(paw_Env *P, Foreign *ud, int nfields); const char *pawV_name(ValueKind type); diff --git a/test/scripts/basic.paw b/test/scripts/basic.paw index ca43f06..00d5cb9 100644 --- a/test/scripts/basic.paw +++ b/test/scripts/basic.paw @@ -17,42 +17,39 @@ { // Semicolons are not necessary in most cases. - let x: int x = 1 + 2 + 3 assert(x == 6) + let x: int = 0 x = 1 + 2 + 3 assert(x == 6) x = 1 - 2 - 3 x = 4 + 5 + 6 assert(x == 15) } { - // Parenthesis are required to call an anonymous function immediately. This - // is also a situation where semicolons are needed to separate statements. - // (it looks like a chain of 'suffixed' expressions, i.e. 'x[10](1, 2, 3)') - (fn() {})(); - (fn(a: int) {})(1); - (fn(a: int, b: int) {})(1, 2); - (fn(a: int, b: int, c: int) {})(1, 2, 3); - - (fn(a: int, - b: int, - c: int) {}) (1, - 2, - 3) - - fn wrapper(n: int, f: fn(int): int) { - return f(n) - } - assert(1 == wrapper(1, fn(n: int) {return n})) - assert(2 == wrapper(2, fn(n: int) {return fn(): int {return n}})()) - assert(3 == wrapper(3, fn(n: int) {return fn(): fn(): int {return fn(): int {return n}}})()()) - - // Semicolons are not needed if we are assigning to or initializing a variable. - let x = (fn(): int {return 42})() - let x = (fn(a: int): int {return a})(1) - x = (fn(a: int, b: int): int {return a + b})(1, 2) - x = (fn(a: int, b: int, c: int): int {return a + b + c})(1, 2, 3) - assert(x != 0) + fn wrapper(n: int, f: fn(int) -> int) -> int { + return f(n) + } + fn f1(n: int) -> int { + return n + } + fn f2(n: int) -> int { + fn f(n: int) -> int { + return n + } + return f(n) + } + fn f3(n: int) -> int { + fn f(n: int) -> fn() -> int { + fn f() -> int { + return n + } + return f + } + return f(n)() + } + assert(1 == wrapper(1, f1)) + assert(2 == wrapper(2, f2)) + assert(3 == wrapper(3, f3)) } { - let x: int + let x: int = 0 {;;;};;;{};;; {; x = 1; assert(x == 1)} } @@ -63,22 +60,22 @@ fn f() {return} f() - fn f(): int {let x: int; return x} + fn f() -> int {let x: int = 0; return x} assert(f() == 0) - fn f(x: int): int {return x} - assert(f(0) == 0) + fn f(x: int) -> int {return x} + assert(f(1) == 1) - fn f(x: int, y: int): int {return x} + fn f(x: int, y: int) -> int {return x} assert(f(0, 1) == 0) - fn f(x: int, y: int): int {return y} + fn f(x: int, y: int) -> int {return y} assert(f(0, 1) == 1) - fn f(x: int, y: int, z: int): int {return x} + fn f(x: int, y: int, z: int) -> int {return x} assert(f(0, 1, 2) == 0) - fn f(x: int, y: int, z: int): int {return y} + fn f(x: int, y: int, z: int) -> int {return y} assert(f(0, 1, 2) == 1) - fn f(x: int, y: int, z: int): int {return z} + fn f(x: int, y: int, z: int) -> int {return z} assert(f(0, 1, 2) == 2) } // @@ -121,37 +118,30 @@ // assert(a[0].ins.x == 2) // assert(a[0].ins.a[0] == 3) //} -// -//{ -// fn f() { -// { -// // Return statement must be the last statement in the block. -// return 42 -// } -// return 24 // Unreachable -// } -// assert(f() == 42) -//} -// -//// String literals: both types of quotes work, and strings can span -//// multiple lines. -//{ -// assert('abc' == "abc"); -// assert('abc\n def\n ghi\n\n' == 'abc -// def -// ghi -// -//') -// -//} -// -//{ -// assert(42 == load('return 42')()) -// assert(42 == load('return fn() {return 42}')()()) -// let test = load('let v = 42; return fn() {return v;}') -// assert(test()() == 42) -//} -// + +{ + fn f() -> int { + { + // Return statement must be the last statement in the block. + return 42 + } + return 24 // Unreachable + } + assert(f() == 42) +} + +// String literals: both types of quotes are okay, and strings can span +// multiple lines. +{ + assert('abc' == "abc"); + assert('abc\n def\n ghi\n\n' == 'abc + def + ghi + +') + +} + //// Array and map are both 'prefixexpr' constructs, meaning they can appear to the //// left of '[]' (__*item) or '.' (__*attr). See grammer.md for details. //{ @@ -160,104 +150,21 @@ // let m = {0: 0, 1: 1, 2: 2}[1] // assert(m == 1) //} -// -//{ -// let s = '' -// for a = 0,10 { -// s = s ++ 'a' -// for b = 0,10 { -// s = s ++ 'b' -// for c = 0,10 { -// s = s ++ 'c' -// } -// } -// } -//} -// -//// Test null chaining operator: -//{ -// fn test(arg) { -// let v = arg? -// return 'nonnull' -// } -// assert(test(null) == null) -// assert(test('nonnull') == 'nonnull') -// -// class Class { -// __init() { -// self.isnull = null -// self.nonnull = self -// } -// __neg() { -// return null -// } -// } -// fn test() { -// let c = Class() -// let x = c.nonnull? -// return 'nonnull' -// } -// assert(test() == 'nonnull') -// -// fn test() { -// let c = Class() -// return c.nonnull?.isnull?.nonnull? -// } -// assert(test() == null) -// -// fn test() { -// // '?' is evaluated before any unary prefix operators. -// // Note that 'c.__neg()' returns null here, but the '?' is -// // applied to 'c' before '__neg' gets called. -// let c = Class() -// let x = -c? -// return 'nonnull' -// } -// assert(test() == 'nonnull') -// -// fn test(a, b) { -// return a? + b? -// } -// assert(null == test(null, null)) -// assert(null == test(1, null)) -// assert(null == test(null, 2)) -// assert(3 == test(1, 2)) -// -// fn test(obj) { -// for o in obj? { -// return 42 -// } -// } -// assert(42 == test([1, 2, 3])) -// assert(null == test(null)) -// -// fn test(first, second, third) { -// let v = first?[second?]?[third?]? -// return 123 -// } -// assert(123 == test([[0]], 0, 0)) -// assert(null == test(null, 0, 0)) // first == null -// assert(null == test([null], 0, 0)) // first[0] == null -// assert(null == test([[null]], 0, 0)) // first[0][0] == null -// assert(null == test([[0]], null, 0)) // second == null -// assert(null == test([[0]], 0, null)) // third == null -// -// class Class { -// __init() { -// // Note that 'self' is not an actual keyword: it only has special meaning -// // in a member function. It is a local variable in this context. -// self.self = self -// self.null_ = null -// } -// } -// fn test() { -// let c = Class() -// let x = c.self?.self?.null_? -// return 'nonnull' -// } -// assert(test() == null) -//} -// + +{ + let s = '' + for a = 0, 10 { + s = s + 'a' + for b = 0, 10 { + s = s + 'b' + for c = 0, 10 { + s = s + 'c' + } + } + } +} + +// TODO: Consider short-circuiting behavior //{ // fn test(a, b) { // return a || b @@ -290,25 +197,4 @@ // assert(!(true && (false || false))) // assert(!((2 == 3) || (4 < 0) && (1 == 1))) //} -// -//{ -// let f = fn() { -// return 0 -// } -// assert(f() == 0) -// -// let f = fn(a) { -// return a -// } -// assert(f(1) == 1) -// -// let f = fn(a, b) { -// return a + b -// } -// assert(f(1, 2) == 3) -// -// let f = fn(a, b, c) { -// return a + b + c -// } -// assert(f(1, 2, 3) == 6) -//} + diff --git a/test/scripts/block.paw b/test/scripts/block.paw index 70567b8..ba5e21e 100644 --- a/test/scripts/block.paw +++ b/test/scripts/block.paw @@ -22,7 +22,7 @@ } { - fn test(n: int): string { + fn test(n: int) -> string { if n == 0 { return 'zero' } else if n == 1 { diff --git a/test/scripts/closure.paw b/test/scripts/closure.paw index 53227f9..5b5eee3 100644 --- a/test/scripts/closure.paw +++ b/test/scripts/closure.paw @@ -1,4 +1,4 @@ --- closure.paw +// closure.paw { let f @@ -18,7 +18,7 @@ } assert(f()[0] == 'abc') assert(f()[1] == 123) - assert(#f() == 6) -- 3 calls == 6 pushes + assert(#f() == 6) // 3 calls == 6 pushes } { @@ -45,7 +45,7 @@ return a } f = closure - } -- Close a + } // Close a { let a = [] fn closure() { @@ -53,7 +53,7 @@ return a } g = closure - } -- Close a + } // Close a } assert(#f() == 1) assert(#g() == 1) @@ -132,7 +132,7 @@ assert(f(30) == 832040) } --- Test vararg functions: +// Test vararg functions: { fn test(n) { return fn(...) { @@ -165,7 +165,7 @@ let temp = argv[0]; return a + temp + argv[-1] } - assert(2 == test(0, 1)) -- 0 + 1 + 1 + assert(2 == test(0, 1)) // 0 + 1 + 1 assert(21 == test(1, 10)) assert(102 == test(2, 1, 99)) assert(503 == test(3, 200, 100, 300)) @@ -174,7 +174,7 @@ let temp = argv[0]; return a + b + temp + argv[-1] } - assert(3 == test(0, 1, 1)) -- 0 + 1 + 1 + assert(3 == test(0, 1, 1)) // 0 + 1 + 1 assert(23 == test(1, 2, 10)) assert(105 == test(2, 3, 1, 99)) assert(507 == test(3, 4, 200, 100, 300)) diff --git a/test/scripts/loop.paw b/test/scripts/loop.paw index 37371db..63a4b63 100644 --- a/test/scripts/loop.paw +++ b/test/scripts/loop.paw @@ -1,447 +1,471 @@ --- loop.paw +// loop.paw --- Make sure loop code cleans the stack up when finished -{ - for i = 1, 8 { - for j = 1, i {} - } - fn test(): int {return 123} - assert(test() == 123) -} - --- Numeric for loop: -{ - fn test(start: int, end: int, step: int): int { - let n = 0 - for i = start, end, step { - n = n + 1 - } - return n - } - assert(test(0, 0, 1) == 0) - assert(test(0, 0, -1) == 0) - assert(test(0, -1, 1) == 0) - assert(test(0, 1, -1) == 0) - assert(test(0, 0, 10) == 0) - assert(test(0, 0, -10) == 0) - assert(test(0, -1, 10) == 0) - assert(test(0, 1, -10) == 0) - - assert(test(-1, 1, 1) == 2) - assert(test(-10, 10, 1) == 20) - assert(test(1, -1, -1) == 2) - assert(test(10, -10, -1) == 20) - assert(test(-1, 1, 2) == 1) - assert(test(-10, 10, 2) == 10) - assert(test(1, -1, -2) == 1) - assert(test(10, -10, -2) == 10) - assert(test(-1, 1, 4) == 1) - assert(test(-10, 10, 4) == 5) - assert(test(1, -1, -4) == 1) - assert(test(10, -10, -4) == 5) - - -- Loop bounds are copied onto the stack when the loop starts. Changing them - -- during the loop has no effect. - let n = 0 - let N = 10 - for i = 0, N { - n = n + 1 - N = 0 - } - assert(n == 10) - - fn test(n: int): int { - let f - for i = 0, 100 { - if i == n { - f = fn() { - return i - } - } - } - return f - } - assert(test(0)() == 0) - assert(test(10)() == 10) - assert(test(50)() == 50) - assert(test(100) == null) -} - --- Iterator for loop: -{ - let i = 1 - for e in [1, 2, 3] { - assert(e == i) - i = i + 1 - } - - let i = 0 - for k in {1: 1, 2: 2, 3: 3} { - i = i + 1 - } - assert(i == 3) - - let a = [1, 2, 3] - let b: [int] - for e in a { - b.push(e) - a.pop() - } - assert(#b == 2) - assert(b[0] == 1) - assert(b[1] == 2) -} - --- While loop: -{ - fn test(n: int): int { - let count = 0 - while count < n { - count = count + 1 - } - return count - } - assert(test(0) == 0) - assert(test(1) == 1) - assert(test(10) == 10) -} - --- Do-while loop: -{ - let i = 0 - do { - i = i + 1 - } while i < 10 - - assert(i == 10) - - do { - i = 42 - } while 0 - - assert(i == 42) -} - --- 'break' and 'continue' statements: -{ - let n - for i = 0,3 { - if i == 2 { - break - } - n = i - } - assert(n == 1) - - let n - for i = 0,3 { - if i == 2 { - continue - } - n = i - } - assert(n == 1) - - -- Capture loop variable 'i' as an upvalue. - fn test(n: int): int { - let f - for i = 0,100 { - if i == n { - let u = i - f = fn() { - return n + i + u - } - break - } - } -- 'i' closed here when i == n, popped otherwise - return f - } - assert(test(0)() == 0) - assert(test(10)() == 30) - assert(test(50)() == 150) - assert(test(100) == null) - - let i = 0 - do { - i = i + 1 - break - } while true - - assert(i == 1) - - let i = 0 - do { - if i == 10 { - break - } - i = i + 1 - } while true - - assert(i == 10) - - let i = 0 - do { - i = i + 1 - continue - } while i < 10 - - assert(i == 10) - - let i = 0 - let n = 0 - do { - i = i + 1 - if i & 1 { - continue - } - n = n + 1 - } while i < 10 - - assert(n == 5) - - let i = 0 - let n = 0 - do { - i = i + 1 - if i & 1 { - continue - } else if i == 10 { - break - } - n = n + 1 - } while true - - assert(n == 4) -} - -{ - let n = 0 - let N = 10 - for i = 0,N { - n = n + 1 - N = 0 - } - assert(n == 10) -} - -{ - let N = 2 - - let a = [] - for i = 0,N { - for j in [0] { - a.push(i + j) - } - } - assert(#a == 2) - assert(a[0] == 0) - assert(a[1] == 1) - - let a = [] - for i = 0, N { - let j = 0 - while j < N { - a.push(i + j) - j = j + 1 - } - } - assert(#a == 4) - assert(a[0] == 0) - assert(a[1] == 1) - assert(a[2] == 1) - assert(a[3] == 2) - - let a = [] - for i = 0,N { - for j in [0] { - let k = 0 - while k < N { - a.push(i + j + k) - k = k + 1 - } - } - } - assert(#a == 4) - assert(a[0] == 0) - assert(a[1] == 1) - assert(a[2] == 1) - assert(a[3] == 2) -} - -{ - let n = 0 - for i = 0,8 { - n = n + 1 - break - } - assert(n == 1) -} - -{ - fn test(n: int): int { - let count = 0 - for i = 0,100 { - if i == n { - break - } - count = count + 1 - continue - } - return count - } - assert(test(-1) == 100) - assert(test(0) == 0) - assert(test(25) == 25) - assert(test(50) == 50) -} - -{ - let i - - -- 'i' shadowed by loop variable. - for i = 0,8 {} - assert(i == null) -} - -{ - -- Invalid operations must be run to throw an exception - if false && 1 / 0 { - let x = 1 // 0 - let y = 1 % 0 - } -} - -{ - let I = [0, 1, 2, 3] - for i = 0,4 { - assert(i == I[i]) - } - - let n = 0 - let I = [2, 3] - for i = 2,4 { - assert(i == I[n]) - n = n + 1 - } - - let n = 0 - let I = [1, 3] - for i = 1,4,2 { - assert(i == I[n]) - n = n + 1 - } -} - -{ - let n = 0 - for i = 0,8 { - n = n + 1 - break - } - assert(n == 1) -} - -{ - let n = 0 - for i = 0,8 { - n = n + 1 - continue - - -- Unreachable - assert(false) - } - assert(n == 8) -} - -{ - fn test(n: int): int { - let f - for i = 0,100 { - if i == n {{{ -- <-- NOTE: Nested blocks here - let a = [i] - fn check() { - -- Capture 'a' as an upvalue. - let m = a[0] - a[0] = a[0] + 1 - -- Equivalent to '3 * n' on the first call. Increases by 1 - -- each time. - return i + n + m - } - f = check - break - }}} - } - return f - } - assert(test(-1) == null) - - let t = test(10) - assert(t() == 30) - assert(t() == 31) - assert(t() == 32) -} - -{ - fn test(b: int): int { - let n = 0 - let f - for i = 0,8 { - let _0 - let _1 - { - let _2 - - n = n + 1 - let m = n - f = fn() { - return m + n - } - -- Need to emit instructions to close 'm' at the break and continue - -- statements. - if b { - break - } else { - continue - } - let _3 - } - let _4 - let _5 - } - return f() - } - assert(test(true) == 2) - assert(test(false) == 16) -} - -{ - fn test(n: int): int { - let f - for _ = 0, 10 { - if n == 0 { - let _0 = 0 - f = fn(): int {return n + _0} - break - } else if n == 1 { - if n == 1 { - let _1 = 1 - f = fn(): int {return n + _1} - break - } - } else if n == 2 { - if n == 2 { - if n == 2 { - let _2 = 2 - f = fn(): int {return n + _2} - break - } - } - } - } - return f() - } - assert(test(0) == 0) - assert(test(1) == 2) - assert(test(2) == 4) +for i = 0, 8 { + let a = -1 + let b = -1 } +//// Make sure loop code cleans the stack up when finished +//{ +// for i = 1, 8 { +// for j = 1, i {} +// } +// fn test() -> int {return 123} +// assert(test() == 123) +//} +// +//// Numeric for loop: +//{ +// fn test(start: int, end: int, step: int) -> int { +// let n = 0 +// for i = start, end, step { +// n = n + 1 +// } +// return n +// } +// assert(test(0, 0, 1) == 0) +// assert(test(0, 0, -1) == 0) +// assert(test(0, -1, 1) == 0) +// assert(test(0, 1, -1) == 0) +// assert(test(0, 0, 10) == 0) +// assert(test(0, 0, -10) == 0) +// assert(test(0, -1, 10) == 0) +// assert(test(0, 1, -10) == 0) +// +// assert(test(-1, 1, 1) == 2) +// assert(test(-10, 10, 1) == 20) +// assert(test(1, -1, -1) == 2) +// assert(test(10, -10, -1) == 20) +// assert(test(-1, 1, 2) == 1) +// assert(test(-10, 10, 2) == 10) +// assert(test(1, -1, -2) == 1) +// assert(test(10, -10, -2) == 10) +// assert(test(-1, 1, 4) == 1) +// assert(test(-10, 10, 4) == 5) +// assert(test(1, -1, -4) == 1) +// assert(test(10, -10, -4) == 5) +// +// // Loop bounds are copied onto the stack when the loop starts. Changing them +// // during the loop has no effect. +// let n = 0 +// let N = 10 +// for i = 0, N { +// n = n + 1 +// N = 0 +// } +// assert(n == 10) +// +// fn test(n: int) -> fn() -> int { +// fn default() -> int { +// return -1 +// } +// let f = default +// for i = 0, 100 { +// if i == n { +// // Capture 'i', the loop variable, when it is equal to 'n'. It should +// // retain this value in the closure, since 'i' is closed over at the +// // end of the loop iteration. +// fn closure() -> int { +// return i +// } +// f = closure +// } +// } +// return f +// } +// assert(test(0)() == 0) +// assert(test(10)() == 10) +// assert(test(50)() == 50) +// assert(test(100)() == -1) +//} +// +//// TODO: Get this working, once data structures are working again +////// Iterator for loop: +////{ +//// let i = 1 +//// for e in [1, 2, 3] { +//// assert(e == i) +//// i = i + 1 +//// } +//// +//// let i = 0 +//// for k in {1: 1, 2: 2, 3: 3} { +//// i = i + 1 +//// } +//// assert(i == 3) +//// +//// let a = [1, 2, 3] +//// let b: [int] +//// for e in a { +//// b.push(e) +//// a.pop() +//// } +//// assert(#b == 2) +//// assert(b[0] == 1) +//// assert(b[1] == 2) +////} +// +//// While loop: +//{ +// fn test(n: int) -> int { +// let count = 0 +// while count < n { +// count = count + 1 +// } +// return count +// } +// assert(test(0) == 0) +// assert(test(1) == 1) +// assert(test(10) == 10) +//} +// +//// Do-while loop: +//{ +// let i = 0 +// do { +// i = i + 1 +// } while i < 10 +// +// assert(i == 10) +// +// do { +// i = 42 +// } while false +// +// assert(i == 42) +//} +// +//// 'break' and 'continue' statements: +//{ +// let n = -1 +// for i = 0,3 { +// if i == 2 { +// break +// } +// n = i +// } +// assert(n == 1) +// +// let n = -1 +// for i = 0,3 { +// if i == 2 { +// continue +// } +// n = i +// } +// assert(n == 1) +// +// // Capture loop variable 'i' as an upvalue. +// fn test(n: int) -> fn() -> int { +// fn default() -> int { +// return -1 +// } +// let f = default +// for i = 0,100 { +// if i == n { +// let u = i +// fn closure() -> int { +// return n + i + u +// } +// f = closure +// break +// } +// } // 'i' closed here when i == n, popped otherwise +// return f +// } +// assert(test(0)() == 0) +// assert(test(10)() == 30) +// assert(test(50)() == 150) +// assert(test(100)() == -1) +// +// let i = 0 +// do { +// i = i + 1 +// break +// } while true +// +// assert(i == 1) +// +// let i = 0 +// do { +// if i == 10 { +// break +// } +// i = i + 1 +// } while true +// +// assert(i == 10) +// +// let i = 0 +// do { +// i = i + 1 +// continue +// } while i < 10 +// +// assert(i == 10) +// +// let i = 0 +// let n = 0 +// do { +// i = i + 1 +// if i & 1 != 0 { +// continue +// } +// n = n + 1 +// } while i < 10 +// +// assert(n == 5) +// +// let i = 0 +// let n = 0 +// do { +// i = i + 1 +// if i & 1 != 0 { +// continue +// } else if i == 10 { +// break +// } +// n = n + 1 +// } while true +// +// assert(n == 4) +//} +// +//{ +// let n = 0 +// let N = 10 +// for i = 0, N { +// n = n + 1 +// N = 0 +// } +// assert(n == 10) +//} +// +////{ +//// let N = 2 +//// +//// let a = [] +//// for i = 0,N { +//// for j in [0] { +//// a.push(i + j) +//// } +//// } +//// assert(#a == 2) +//// assert(a[0] == 0) +//// assert(a[1] == 1) +//// +//// let a = [] +//// for i = 0, N { +//// let j = 0 +//// while j < N { +//// a.push(i + j) +//// j = j + 1 +//// } +//// } +//// assert(#a == 4) +//// assert(a[0] == 0) +//// assert(a[1] == 1) +//// assert(a[2] == 1) +//// assert(a[3] == 2) +//// +//// let a = [] +//// for i = 0,N { +//// for j in [0] { +//// let k = 0 +//// while k < N { +//// a.push(i + j + k) +//// k = k + 1 +//// } +//// } +//// } +//// assert(#a == 4) +//// assert(a[0] == 0) +//// assert(a[1] == 1) +//// assert(a[2] == 1) +//// assert(a[3] == 2) +////} +// +//{ +// let n = 0 +// for i = 0,8 { +// n = n + 1 +// break +// } +// assert(n == 1) +//} +// +//{ +// fn test(n: int) -> int { +// let count = 0 +// for i = 0,100 { +// if i == n { +// break +// } +// count = count + 1 +// continue +// } +// return count +// } +// assert(test(-1) == 100) +// assert(test(0) == 0) +// assert(test(25) == 25) +// assert(test(50) == 50) +//} +// +//{ +// let i = -1 +// +// // 'i' shadowed by loop variable. +// for i = 0,8 {} +// assert(i == -1) +//} +// +////{ +//// // Invalid operations must be run to throw an exception +//// if false && 1 / 0 { +//// let x = 1 / 0 +//// let y = 1 % 0 +//// } +////} +// +////{ +//// let I = [0, 1, 2, 3] +//// for i = 0,4 { +//// assert(i == I[i]) +//// } +//// +//// let n = 0 +//// let I = [2, 3] +//// for i = 2,4 { +//// assert(i == I[n]) +//// n = n + 1 +//// } +//// +//// let n = 0 +//// let I = [1, 3] +//// for i = 1,4,2 { +//// assert(i == I[n]) +//// n = n + 1 +//// } +////} +// +//{ +// let n = 0 +// for i = 0, 8 { +// n = n + 1 +// break +// } +// assert(n == 1) +//} +// +//// TODO: This is a syntax error (stops compilation), check in test_error.c +////{ +//// let n = 0 +//// for i = 0, 8 { +//// n = n + 1 +//// continue +//// +//// // unreachable +//// assert(false) +//// } +//// assert(n == 8) +////} +// +////{ +//// fn test(n: int) -> int { +//// let f +//// for i = 0, 100 { +//// if i == n {{{ // <-- NOTE: Nested blocks here +//// let a = [i] +//// fn check() { +//// // Capture 'a' as an upvalue. +//// let m = a[0] +//// a[0] = a[0] + 1 +//// // Equivalent to '3 * n' on the first call. Increases by 1 +//// // each time. +//// return i + n + m +//// } +//// f = check +//// break +//// }}} +//// } +//// return f +//// } +//// assert(test(-1) == null) +//// +//// let t = test(10) +//// assert(t() == 30) +//// assert(t() == 31) +//// assert(t() == 32) +////} +// +//{ +// fn test(b: bool) -> int { +// fn hack() -> int {return -1} // TODO: Option[T] type, then this is not necessary +// let f = hack +// let n = 0 +// for i = 0,8 { +// let _0 = -1 +// let _1 = -1 +// { +// let _2 = -1 +// +// n = n + 1 +// let m = n +// fn closure() -> int { +// return m + n +// } +// f = closure +// // Need to emit instructions to close 'm' at the break and continue +// // statements. +// if b { +// break +// } else { +// continue +// } +// let _3 = -1 +// } +// let _4 = -1 +// let _5 = -1 +// } +// return f() +// } +// assert(test(true) == 2) +// assert(test(false) == 16) +//} +// +//{ +// fn test(n: int) -> int { +// fn hack() -> int {return -1} // TODO: Option[T] type, then this is not necessary +// let f = hack +// for _ = 0, 10 { +// if n == 0 { +// let _0 = 0 +// fn closure() -> int {return n + _0} +// f = closure +// break +// } else if n == 1 { +// if n == 1 { +// let _1 = 1 +// fn closure() -> int {return n + _1} +// f = closure +// break +// } +// } else if n == 2 { +// if n == 2 { +// if n == 2 { +// let _2 = 2 +// fn closure() -> int {return n + _2} +// f = closure +// break +// } +// } +// } +// } +// return f() +// } +// assert(test(0) == 0) +// assert(test(1) == 2) +// assert(test(2) == 4) +//} +// diff --git a/test/scripts/string.paw b/test/scripts/string.paw index 6a8d65d..c5681a0 100644 --- a/test/scripts/string.paw +++ b/test/scripts/string.paw @@ -8,25 +8,6 @@ assert('str'.clone() == 'str') } -{ - assert('abc' * -1 == '') - assert('abc' * 0 == '') - assert('abc' * 1 == 'abc') - assert('abc' * 2 == 'abcabc') - assert('abc' * 3 == 'abcabcabc') - assert(-1 * 'abc' == '') - assert(0 * 'abc' == '') - assert(1 * 'abc' == 'abc') - assert(2 * 'abc' == 'abcabc') - assert(3 * 'abc' == 'abcabcabc') - - assert('' + '' == '') - assert('' + 'abc' == 'abc') - assert('a' + 'bc' == 'abc') - assert('ab' + 'c' == 'abc') - assert('abc' + '' == 'abc') -} - { let str = 'abcdef' assert(str.starts_with('abcdef')) @@ -49,36 +30,36 @@ assert(!str.ends_with('df')) } -{ - let s = 'abc' - assert(s[:] == 'abc') - assert(s[:null] == 'abc') - assert(s[null:] == 'abc') - assert(s[null:null] == 'abc') - assert(s[0:#s] == 'abc') - - assert(s[:-1] == 'ab') - assert(s[:#s-1] == 'ab') - assert(s[:-2] == 'a') - assert(s[:#s-2] == 'a') - assert(s[:-3] == '') - assert(s[:#s-3] == '') - - assert(s[1:] == 'bc') - assert(s[-2:] == 'bc') - assert(s[2:] == 'c') - assert(s[-1:] == 'c') - assert(s[3:] == '') - assert(s[0:0] == '') - - -- clamped - assert(s[4:] == '') - assert(s[:-4] == '') -} +//{ +// let s = 'abc' +// assert(s[:] == 'abc') +// assert(s[:null] == 'abc') +// assert(s[null:] == 'abc') +// assert(s[null:null] == 'abc') +// assert(s[0:#s] == 'abc') +// +// assert(s[:-1] == 'ab') +// assert(s[:#s-1] == 'ab') +// assert(s[:-2] == 'a') +// assert(s[:#s-2] == 'a') +// assert(s[:-3] == '') +// assert(s[:#s-3] == '') +// +// assert(s[1:] == 'bc') +// assert(s[-2:] == 'bc') +// assert(s[2:] == 'c') +// assert(s[-1:] == 'c') +// assert(s[3:] == '') +// assert(s[0:0] == '') +// +// -- clamped +// assert(s[4:] == '') +// assert(s[:-4] == '') +//} -- String find: { - fn check(s, sub, n) { + fn check(s: string, sub: string, n: int) { assert(n == s.find(sub)) } check('abc', 'a', 0) @@ -87,23 +68,23 @@ check('abc', 'd', -1) } --- String split/join: -{ - fn check(s, sep, parts) { - let a = s.split(sep) - for i = 0, #a { - assert(a[i] == parts[i]) - } - assert(#a == #parts) - let result = sep.join(a) - assert(result == s); - } - check('abc', 'a', ['', 'bc']) - check('abc', 'b', ['a', 'c']) - check('abc', 'c', ['ab', '']) - check('abc', 'd', ['abc']) - - let a = ',a,,b,,,c,,,,d,,,,,e,,,,,,'.split(',') - assert(''.join(a) == 'abcde') -} +//-- String split/join: +//{ +// fn check(s, sep, parts) { +// let a = s.split(sep) +// for i = 0, #a { +// assert(a[i] == parts[i]) +// } +// assert(#a == #parts) +// let result = sep.join(a) +// assert(result == s); +// } +// check('abc', 'a', ['', 'bc']) +// check('abc', 'b', ['a', 'c']) +// check('abc', 'c', ['ab', '']) +// check('abc', 'd', ['abc']) +// +// let a = ',a,,b,,,c,,,,d,,,,,e,,,,,,'.split(',') +// assert(''.join(a) == 'abcde') +//} diff --git a/test/scripts/types.paw b/test/scripts/types.paw index 7f7798d..e736f9d 100644 --- a/test/scripts/types.paw +++ b/test/scripts/types.paw @@ -1,27 +1,38 @@ // types.paw -//class A { -// a: int -// b: float // -// c() { -// print('A.c()\n') +//// TODO: Basic type conversions +////let i = int(42) +////assert(i == 42) +////let f = float(i) +////assert(f == 42.0) +////let b = bool(i) +////assert(b == true) + +//// type system is 'nominal', so there is no structural data included for structures +//// the type of a structure (Adt) requires a reference back to its declaration so this info can be located +//struct A[T] { +// f() -> A[T] { +// return A[T]{} // } +// // g() -> A[int] { +// // return A[int]{} +// // } +// // h() -> A[float] { +// // return A[float]{} +// // } //} -// -//let a: A = A { -// .a = 1, -// .b = 2, -//} +//let a = A[int]{} +//let b = a.f() -fn recursion(n: int) -> int { - if n > 2 { - print('recursion!\n') - return recursion(n - 1) - } - return 0 +fn outer[A, B, C](a: A, b: B, c: C) { + let a2 = a + let b2 = b + let c2 = c } -recursion(15) +outer[int, int, int](1, 2, 3) +outer[float, int, int](1.0, 2, 3) + //*********************************************** // primitives @@ -43,6 +54,25 @@ recursion(15) assert(s == S) assert(b == B) assert(f == F) + + // rebind different types + let a = 123 + assert(a == 123) + let a = 456.0 + assert(a == 456.0) + let a = 'abc' + assert(a == 'abc') + { + // shadow different type + let a = true + } + assert(a == 'abc') +} +{ + let x = 100 + let x = x + let x = x + 23 + assert(x == 123) } //*********************************************** @@ -77,47 +107,842 @@ recursion(15) let f = fii_i f(3, 4) + + // Recursive calls use the function object in local slot 0. Type must + // be set before the function is called. + fn recursion(n: int) -> int { + if n > 0 { + return recursion(n - 1) + } + return -1 + } + + recursion(10) + + let r = recursion + r(20) + + let r: fn(int) -> int = recursion + r(30) +} + +//*********************************************** +// structures +//*********************************************** +{ + struct A {a: int} + let a = A{a: 123} + {} + let a = a.a + assert(a == 123) + + // A 'CompositeLit' can start a suffix chain ('.' must be on the + // same line as the '}' due to auto ';' insertion) + struct B {a: A} + let b = B{a: A {a: 123}}.a.a + assert(b == 123) } +{ + struct A {} + let a = A{} + + let a: A = A {} + + struct A {a: int} + let a = A{a: 123} + + // ';' can go after the type, to separate attributes visually + struct A {a: int; b: string} + // initializer order doesn't matter + let a = A{a: 123, b: 'abc'} + let a = A{b: 'def', a: 456} +} + +//*********************************************** +// methods +//*********************************************** +{ + struct A {a() {}} + let a = A {} + a.a() +} +{ + struct A { + a: bool + b(x: bool) -> bool { + let a: bool = self.a + return x == a + } + c: int + d(x: int, y: float) -> bool { + assert(!self.b(false)) + let c = self.c + let e = self.e + return x == c && y == e + } + e: float + } + let a = A { + e: 456.0, + a: true, + c: 123, + } + assert(a.b(a.a)) + assert(a.d(a.c, a.e)) + + struct A { + f() -> A { + return self + } + g(a: A) -> A { + return a + } + } + let a = A {} + a.g(a.f()) + + struct A { + // not a method: just a function object field + func: fn(int) -> int + } + fn func(i: int) -> int { + return i + } + let a = A {func: func} + assert(123 == a.func(123)) +} +{ + struct A { + value: string + prepend(prefix: string) -> A { + self.value = prefix + self.value + return self + } + append(suffix: string) -> A { + self.value = self.value + suffix + return self + } + } + + let a = A{value: 'test: '} + let a = a.prepend('paw_') + + a.append('result') + assert(a.value == 'paw_test: result') +} +//// TODO: Doesn't work, see TODO in codegen.c (in start_call()) +//{ +// // Separate a method from its receiver and call it later. Struct +// // instances keep a pointer to the Struct object containing the +// // methods. Use the type of 'f' (essentially 'f(A, int) -> int') +// // to remember the method index. When generating 'f(...)', output +// // an OP_INVOKE instead of an OP_CALL with the remembered index. +// struct A { +// f(i: int) -> int { +// return i +// } +// } +// let a = A {} +// let f = a.f +// assert(f(123) == 123) +//} + +//*********************************************** +// function templates +//*********************************************** +{ + fn a[X]() {} + fn b[X](x: X) {} + fn c[X](x: X) -> X {return x} + + // explicit instantiation: + a[int]() + a[float]() + a[string]() + b[int](1) + b[float](2.0) + b[string]('3') + assert(123 == c[int](123)) + assert(1.0 == c[float](1.0)) + assert('abc' == c[string]('abc')) + // implicit instantiation: + b(1) + b(2.0) + b('3') + assert(123 == c(123)) + assert(1.0 == c(1.0)) + assert('abc' == c('abc')) +} +{ + struct A[T] {} + fn f[T](a: A[T]) {} + + f[bool](A[bool]{}) + let a = A[bool]{} + f[bool](a) + f(A[bool]{}) + f(a) + + f[A[int]](A[A[int]]{}) + let a = A[A[int]]{} + f[A[int]](a) + f(A[A[int]]{}) + f(a) + + f[A[A[float]]](A[A[A[float]]]{}) + let a = A[A[A[float]]]{} + f[A[A[float]]](a) + f(A[A[A[float]]]{}) + f(a) + + fn f[T](a: A[A[T]]) {} + f(A[A[string]]{}) + f(A[A[A[float]]]{}) + + fn func[T, T2](t: T, f: fn(T) -> T2) -> T2 { + return f(t) + } + fn f(i: int) -> float { + return 1.0 + } + let r = func[int, float](1, f) + assert(r == 1.0) + + let r = func(1, f) + assert(r == 1.0) +} +//{ +// fn id[T](t: T) -> T { +// return t +// } +// assert(id[bool](false) == false) +// assert(id(1) == 1) +// assert(id(id[float])(2.0) == 2.0) +// assert(id(id[string])('3') == '3') +//} +//{ +// fn times2[X](x: X) -> X { +// return x + x +// } +// assert(-2 == times2[int](-1)) +// assert(2 == times2[int](1)) +// assert(-2.0 == times2[float](-1.0)) +// assert(2.0 == times2[float](1.0)) +// +// assert(-2 == times2(-1)) +// assert(2 == times2(1)) +// assert(-2.0 == times2(-1.0)) +// assert(2.0 == times2(1.0)) +//} +//{ +// // x == y && X == Y must be true +// fn same[X, Y](x: X, y: Y) { +// fn inner[A, B](a: A, b: B) -> A { +// return a + b +// } +// let a = inner[X, Y](x, x) +// let b = inner[X, Y](y, y) +// let c = inner[X, Y](x, y) +// let d = inner[X, Y](y, x) +// assert(a == b) +// assert(b == c) +// assert(c == d) +// let a = inner(x, x) +// let b = inner(y, y) +// let c = inner(x, y) +// let d = inner(y, x) +// assert(a == b) +// assert(b == c) +// assert(c == d) +// } +// same[int, int](1, 1) +// same(1, 1) +// same[float, float](1.0, 1.0) +// same(1.0, 1.0) +//} +//{ +// fn add[T](a: T, b: T) -> T { +// return a + b +// } +// assert(3 == add( 1, 2)) +// assert(7.0 == add(3.0, 4.0)) +// assert('56' == add('5', '6')) +//} +////{ +//// fn outer[Func, Arg](func: Func, arg: Arg) { +//// func(arg) +//// } +//// let result = 0 +//// fn inner[T](t: T) { +//// assert(t == result) +//// } +//// +//// result = 123 +//// outer(inner[int], 123) +//// +//// let func = outer[fn(int), int] +//// result = 456 +//// func(inner[int], 456) +//// +//// let func: fn(fn(int), int) = outer[fn(int), int] +//// result = 789 +//// func(inner[int], 789) +////} +//{ +// fn outer[A, B](a: A, b: B) { +// let u = b // upvalue for 'inner' +// // 'B' in 'inner' shadows 'B' from outer +// fn inner[B, C, D](a: A, b: B, c: C, d: D) { +// assert(a == c) +// assert(b == d) +// assert(b == u) +// } +// inner[B, A, B](a, b, a, b) +// inner(a, b, a, b) +// } +// outer(1, '2, 3') +// outer(1.2, 3) +//} +//{ +// fn test() -> fn() { +// fn upvalue[T](t: T) -> T { +// return t +// } +// fn closure() { +// assert(upvalue(1) == 1) +// assert(upvalue(2.0) == 2.0) +// assert(upvalue('3') == '3') +// let u1 = upvalue[int] +// let u2 = upvalue[float] +// let u3 = upvalue[string] +// assert(u1(1) == 1) +// assert(u2(2.0) == 2.0) +// assert(u3('3') == '3') +// } +// return closure +// } +// let closure = test() +// closure() +//} +//{ +// let u = 42 +// fn func[T](v: int) { +// assert(v == u) +// } +// func[bool](42) +// { +// // Function template instances should be placed adjacent to one +// // another on the stack, right where the template is declared, +// // so that they capture the same upvalues. +// let u = 0 +// func[bool](42) +// func[int](42) +// } +//} +// ////*********************************************** -//// classes +//// structure templates ////*********************************************** //{ -//// class A {} -//// let a: A = A() +// struct A[X] {} +// struct B[X] {x: X} +// struct C[X, Y] {x: X; y: Y} +// +// let a: A[int] = A[int] {} +// let b: B[int] = B[int] {x: 1} +// let c: C[int, float] = C[int, float] {x: 1, y: 2.0} +// +// let a = A[int] {} +// let b = B[int] {x: 2} +// let c = C[int, float] {x: 2, y: 3.0} +// +// struct A[T] { +// func: fn(T) -> T +// } +// fn func(i: int) -> int { +// return i +// } +// let a = A[int] {func: func} +// assert(123 == a.func(123)) +//} +//{ +// struct B[X] {x: X} +// let b: B[int] = B[int] {x: 1} +// let b = B[int] {x: 1} // (1) +//} +//{ +// struct A[T] { +// a(t: T) -> T { +// return self.t + t +// } +// b(t: T) -> T { +// return self.a(t) + 1 +// } +// t: T +// } +// let a = A[int] {t: 40} +// assert(42 == a.b(1)) +//} +//{ +// struct A[T] { +// t: T +// } +// let a = A[int] {t: 42} +// let b = A[A[int]] {t: a} +// let c = A[A[A[int]]] {t: b} +// let d = A[A[A[A[int]]]] {t: c} +// let e = d.t +// let f = e.t +// let g = f.t +// let h = g.t +// assert(h == 42) +// +//// TODO: This should not be allowed: requires indirection, which is not yet possible +//// struct B[T] { +//// b: B[T] +//// } +// +// struct A[A] { +// a: A +// } +// let a = A[int] {a: 123} +// assert(a.a == 123) +//} +//{ +// struct A[T] { +// v: T +// f() -> T { +// return self.v +// } +// } +// let a = A[string]{v: 'abc'} +// assert(a.v == 'abc') +// +// a.v = a.v + 'def' +// assert(a.f() == 'abcdef') +//} +//{ +// struct A[T] { +// value: T +// prepend(prefix: T) -> A[T] { +// self.value = prefix + self.value +// return self +// } +// append(suffix: T) -> A[T] { +// self.value = self.value + suffix +// return self +// } +// } +// let a = A[string]{value: 'test: '} +// let a = a.prepend('paw_') +// a.append('result') +// assert(a.value == 'paw_test: result') +//} +//{ +// struct A[T, T2] { +// v: T +// f() -> T2 { +// // ?T2 = int +// return 42 +// } +// } +// fn func[T, T2](a: A[T, T2]) -> T2 { +// return a.f() +// } +// let a = A[float, int]{v: 12.3} +// let r = func(a) +// assert(r == 42) //} // ////*********************************************** -//// arrays +//// type aliases ////*********************************************** //{ -//// // empty arrays -//// let a1: [int] -//// let a2: [[int]] -//// let a3: [[[int]]] -//// assert(#a1 == 0) -//// assert(#a2 == 0) -//// assert(#a3 == 0) -//// -//// let a1: [int] = [1] -//// let a2: [[int]] = [[1], [2]] -//// let a3: [[[int]]] = [[[1]], [[2]], [[3]]] -//// -//// let a1 = [1] -//// let a2 = [[1], [2]] -//// let a3 = [[[1]], [[2]], [[3]]] -//// -//// let a = [[[], []], [], [[3]]] -//// -//// let a: [[int]] = [] -//// let a: [[int]] = [] +// type Int = int +// let i: Int = 123 +// struct A[T] {t: T} +// struct A2[T1, T2] {t1: T1; t2: T2} +// +// type Struct = A[int] +// let a: Struct = A[int]{t: 1} +// let a: A[int] = Struct{t: 2} +// let a: Struct = Struct{t: 3} +// let a = Struct{t: 4} +// assert(a.t == 4) +// +// type Struct = A[Int] +// let a: Struct = A[Int]{t: 1} +// let a: A[Int] = Struct{t: 2} +// let a: Struct = Struct{t: 3} +// let a = Struct{t: 4} +// assert(a.t == 4) +// +// type Struct[T] = A[T] +// let a = Struct[Int]{t: 4} +// assert(a.t == 4) +// +// type Struct[T1, T2] = A[T1] +// let a = Struct[Int, string]{t: 4} +// assert(a.t == 4) +// +// type Struct[T1, T2] = A[T2] +// let a = Struct[Int, string]{t: '4'} +// assert(a.t == '4') +// +// type Struct[T1, T2] = A2[T1, T2] +// let a2 = Struct[Int, string]{t1: 4, t2: '4'} +// assert(a2.t1 == 4) +// assert(a2.t2 == '4') //} +//{ +// struct S1 {} +// struct S2[T] {} // -//// TODO: move to class.paw -////{ -//// class A { -//// a: A -//// } // a: 0 -//// let a = A() // a: 1 -//// a.a = a // a: 2 -////} // a: 1 +// fn func[T](t: T) -> T { +// type T = T // essentially a NOOP +// let t: T = t +// return t +// } +// func(1) +// func(2.0) +// func('3') +// func(S1{}) +// func(S2[int]{}) +// func(S2[S1]{}) +// func(S2[S2[S1]]{}) +// +// fn func[T](t: T) -> T { +// type A[X] = T // X ignored +// type B[X] = X // B[X] == X +// let a: A[T] = t +// let b: B[T] = a +// assert(t == a) +// assert(a == b) +// return b +// } +// func(1) +// func(2.0) +// func('3') +// func(S1{}) +// func(S2[int]{}) +// func(S2[S1]{}) +// func(S2[S2[S1]]{}) +// +// fn func[T](t: T) -> T { +// type A[X] = X +// let t: A[T] = t +// return t +// } +// func(1) +// func(2.0) +// func('3') +// func(S1{}) +// func(S2[int]{}) +// func(S2[S1]{}) +// func(S2[S2[S1]]{}) +//} +////// +////////*********************************************** +//////// method templates +////////*********************************************** +//////{ +////// struct A { +////// func[T](t: T) -> T { +////// return t +////// } +////// } +////// let a = A {} +////// a.func[int](123) +////// a.func(123) +////// a.func[float](1.0) +////// a.func(1.0) +////// a.func[string]('abc') +////// a.func('abc') +////// a.func[A](a) +////// a.func(a) +//////} +//////{ +////// // call other methods from a method template instance +////// struct A { +////// func[T](t: T) -> T { +////// let c = self.concrete(t) +////// let t = self.template(t) +////// return (c + t) / 2 +////// } +////// concrete(i: int) -> int { +////// return i +////// } +////// template[T](t: T) -> T { +////// return t +////// } +////// } +////// let a = A {} +////// assert(1 == a.func[int](1)) +////// assert(2 == a.func(2)) +//////// let func = a.func[int] +//////// assert(3 == func(3)) +//////} +////// +////////*********************************************** +//////// struct + method templates +////////*********************************************** +//////{ +////// struct A[S] { +////// s: S +////// get[T](t: T) -> T { +////// return self.s + t +////// } +////// } +////// let a1 = A[int] {s: 1} +////// a1.get(1) +//////} +//////{ +////// struct A[S1, S2] { +////// x: S1 +////// y: S2 +////// get_1[T](t: T) -> T { +////// return t + self.x +////// } +////// get_2[T](t: T) -> T { +////// return t + self.y +////// } +////// } +////// let a1 = A[int, float] {x: 1, y: 2.3} +////// a1.get_1(123) +////// a1.get_2(1.0) +////// let a2 = A[float, int] {x: 1.2, y: 3} +////// a2.get_1(1.0) +////// a2.get_2(123) +//////} +//////{ +////// // S1 == S2 && T1 == T2 must be true +////// struct A[S1, T1] { +////// f[S2, T2](s2: S2, t2: T2) { +////// assert(self.s1 == s2) +////// assert(self.t1 == t2) +////// } +////// s1: S1 +////// t1: T1 +////// } +////// +////// fn func[S1, T1, S2, T2](s1: S1, t1: T1, s2: S2, t2: T2) { +////// let a = A[S1, T1] {s1: s1, t1: t1} +////// a.f[S2, T2](s2, t2) +////// a.f(s2, t2) +////// } +////// +////// func(1, 2.0, 1, 2.0) +////// func[int, float, int, float](1, 2.0, 1, 2.0) +////// func(true, '2', true, '2') +////// func[bool, string, bool, string](true, '2', true, '2') +//////} +////// + +//struct Test[A, B, C, D, E, F] {} +//fn test[A, B, C, D, E, F](a: A, b: B, c: C, d: D, e: E, f: F) { +// let t = Test[A, B, C, D, E, F]{} +//} + +//fn func[A, B, C](a: A, b: B, c: C) { +// a = 123 +// b = a +// c = b +// c = 321 +//} + +//fn outer[A, B, C](a: A, b: B, c: C) { +// fn inner[X](x: X) -> X { +// return x +// } +// let aa = inner[A](123) // ?A = int +// let bb = inner[float](b) // ?B = int +// let cc = inner[C](a) // ?A = ?C +//} + +//fn func[A, B](a: A, b: B) { +// a = 1 +// a = b +// b = 1.0 +//} + +//fn outer[A](a: A) { +// fn inner[B](b: B) { +// b = a +// b = 1 +// } +// inner(a) +//} +//outer(1) + +//fn func[A, B](a: A, b: B) { +// a = 1 +// b = a +// b = 1 +//} + +//fn func[A, B, C, D, E, F](a: A, b: B, c: C, d: D, e: E, f: F) { +// // create 2 trees, X and Y ('->' represent parent pointers, so ?E and +// // ?F are the roots of the 2 trees, respectively): +// // X: ?A -> ?B -> ?C -> ?E +// // Y: ?D -> ?F +// a = b +// b = c +// c = e +// d = f +// +// // link the roots of X and Y: ?F -> ?E +// a = d +// +// // NOOPs, since all type variables are already related +// f = a +// e = b +// d = c +//} +// +//fn func[A, B, C, D, E, F](a: A, b: B, c: C, d: D, e: E, f: F) { +// a = b +// a = c +// a = d +// a = e +// a = f +//} + +// +//fn func[T](t: T) {} +// +//fn func[T](t: T) { +// t = t +//} +// +//fn func[T](t: T) { +// t = 1 +// t = 2 +// t = 3 +//} +// +//fn func[T](t: T) { +// t = 1.0 +// t = 2.0 +// t = 3.0 +//} +// +//fn func[A, B](a: A, b: B) { +// a = b +//} +// +//fn func[A, B](a: A, b: B) { +// a = b +// b = 1 +//} +// +//fn func[A, B, C](a: A, b: B, c: C) { +// a = b +// b = c +// c = 1 +//} +// +// + +//{ +// // no constraints on 'T' +// fn func[T](t: T) { +// t = t +// } +// func(1) +// func(2.0) +// func('3') +// func[int](1) +// func[float](2.0) +// func[string]('3') +// +// // T = int +// // T = int +// fn func[T](t: T) { +// t = t + 1 +// } +// func(1) +// func[int](1) +// +// // T1 = T2 +// fn func[T1, T2](t1: T1, t2: T2) { +// let t = t1 + t2 +// } +// func(1, 2) +// func(3.0, 4.0) +// func('5', '6') +// func[int, int](1, 2) +// func[float, float](3.0, 4.0) +// func[string, string]('5', '6') +//} +//{ +// // X == Y +// fn outer[X](x: X) { +// fn inner[Y](y: Y) { +// x = y +// } +// inner(x) +// } +// outer(1) +// outer(2.0) +// outer('3') +//} +//{ +// struct A[T] { +// t: T +// } +// +// // T = int +// fn func[T](a: A[T]) { +// let t = a.t + 1 +// } +// +// // T = int +// fn func[T](a: A[T]) { +// let a2: A[int] = a +// } +// +// // T1 = T2 +// fn func[T1, T2](a1: A[T1], a2: A[T2]) { +// a1 = a2 +// } +//} + +//struct A[T] { +// t: T +//} +// +//fn func[T](a: A[T]) -> T { +// return a.t +//} +//let a = A[int] {t: 1} +//assert(func[int](a) == 1) +//assert(func(a) == 1) +//assert(func(A[int]{t: 2}) == 2) +// +//fn func[T](a: A[A[T]]) -> T { +// return a.t.t +//} +////let a = A[int] {t: 1} +////let a = A[A[int]] {t: a} +////assert(func[int](a) == 1) +////assert(func(a) == 1) +////assert(func(A[A[int]]{t: A[int]{t: 2}}) == 2) +// +// TODO: Hoist toplevel functions so that mutual recursion works? Or allow forward decls? +//fn r1[X, Y, Z]() { +// r2[X, Y, Z]() +//} +//fn r2[X, Y, Z]() { +// r1[Y, Z, X]() +//} +//r1[bool, int, float]() +// +//// fn r1[bool, int, float]() -> () +//// fn r2[bool, int, float]() -> () +//// fn r1[int, float, bool]() -> () +//// fn r2[int, float, bool]() -> () +//// fn r1[float, bool, int]() -> () +//// fn r2[float, bool, int]() -> () +//// fn r1[bool, int, float]() -> () + diff --git a/test/test.c b/test/test.c index 578d9f6..5e70473 100644 --- a/test/test.c +++ b/test/test.c @@ -13,6 +13,10 @@ #include #include #include + +#include + +#if 0 #define TEST_FIND_LEAK // Define TEST_FIND_LEAK to have the program print out the addresses and // sizes of leaked blocks. Watchpoints can be used to figure out exactly @@ -113,6 +117,7 @@ static void report_nonzero_blocks(struct TestAlloc *a) abort(); } } +#endif // 0 static void trash_memory(void *ptr, size_t n) { @@ -125,8 +130,8 @@ static void trash_memory(void *ptr, size_t n) static void *safe_realloc(struct TestAlloc *a, void *ptr, size_t size0, size_t size) { check(a->nbytes >= size0); - register_block(a, size0, size); - void *ptr2 = size ? malloc(size) : NULL; +// register_block(a, size0, size); + void *ptr2 = size ? GC_MALLOC(size) : NULL; check(!size || ptr2); // assume success if (ptr2) { if (ptr) { @@ -156,7 +161,7 @@ static void *safe_realloc(struct TestAlloc *a, void *ptr, size_t size0, size_t s trash_memory(ptr, size0); } a->nbytes += size - size0; - free(ptr); + GC_FREE(ptr); return ptr2; } @@ -219,11 +224,11 @@ void test_close(paw_Env *P, struct TestAlloc *a) { paw_close(P); - if (a->nbytes) { - fprintf(stderr, "error: leaked %zu bytes\n", a->nbytes); - report_nonzero_blocks(a); - abort(); - } +// if (a->nbytes) { +// fprintf(stderr, "error: leaked %zu bytes\n", a->nbytes); +// report_nonzero_blocks(a); +// abort(); +// } } static void check_ok(paw_Env *P, int status) diff --git a/test/test_error.c b/test/test_error.c index 51a0a3a..fabeb6e 100644 --- a/test/test_error.c +++ b/test/test_error.c @@ -36,12 +36,9 @@ static void test_case(int expect, const char *name, const char *text) static void test_name_error(void) { - test_case(PAW_ENAME, "use_before_def_global", "let x = x"); - test_case(PAW_ENAME, "use_before_def_local", "{let x = x}"); - - test_case(PAW_ENAME, "undef_global", "x = 1"); - test_case(PAW_ENAME, "undef_local", "{x = 1}"); - test_case(PAW_ENAME, "undef_upvalue", "(fn() {x = 1})()"); + test_case(PAW_ENAME, "use_before_def_local", "let x = x"); + test_case(PAW_ENAME, "undef_local", "x = 1"); + test_case(PAW_ENAME, "undef_upvalue", "fn f() {x = 1} f()"); } static _Bool has_type(TypeSet ts, int kind) @@ -52,92 +49,117 @@ static _Bool has_type(TypeSet ts, int kind) static const char *get_literal(int kind) { switch (kind) { - case PAW_TNULL: - return "null"; - case PAW_TINTEGER: + case PAW_TUNIT: + return "()"; + case PAW_TINT: return "123"; case PAW_TFLOAT: return "1.0"; - case PAW_TBOOLEAN: + case PAW_TBOOL: return "true"; case PAW_TSTRING: return "'abc'"; - case PAW_TARRAY: - return "[]"; - case PAW_TMAP: - return "{}"; default: check(0); return NULL; } } -static void check_unop_type_error(const char *op, TypeSet ts) +static void check_unop_type_error(const char *op, paw_Type k) +{ + char name_buf[256] = {0}; + snprintf(name_buf, sizeof(name_buf), "unop_type_error('%s', %s)", + op, get_literal(k)); + + char text_buf[256] = {0}; + snprintf(text_buf, sizeof(text_buf), "let x = %s%s", + op, get_literal(k)); + + test_case(PAW_ETYPE, name_buf, text_buf); +} + +static void check_unification_errors(void) { - for (int k = PAW_TNULL; k <= PAW_TUSERDATA; ++k) { - if (has_type(ts, k)) { + for (int k = PAW_TUNIT; k <= PAW_TSTRING; ++k) { + for (int k2 = PAW_TUNIT; k2 <= PAW_TSTRING; ++k2) { + if (k == k2) { + continue; + } char name_buf[256] = {0}; - snprintf(name_buf, sizeof(name_buf), "unop_type_error('%s', %s)", - op, get_literal(k)); + snprintf(name_buf, sizeof(name_buf), "unification_error(%s, %s)", + get_literal(k), get_literal(k2)); char text_buf[256] = {0}; - snprintf(text_buf, sizeof(text_buf), "let x = %s%s", - op, get_literal(k)); + snprintf(text_buf, sizeof(text_buf), "let x = %s; let y = %s; x = y", + get_literal(k), get_literal(k2)); test_case(PAW_ETYPE, name_buf, text_buf); } } } -static void check_binop_type_error_(const char *op, TypeSet ts, TypeSet ts2) +static void check_binop_type_error(const char *op, paw_Type k, paw_Type k2) { - for (int k = PAW_TNULL; k <= PAW_TUSERDATA; ++k) { - for (int k2 = PAW_TNULL; k2 <= PAW_TUSERDATA; ++k2) { - if (has_type(ts, k) && has_type(ts2, k2)) { - char name_buf[256] = {0}; - snprintf(name_buf, sizeof(name_buf), "binop_type_error('%s', %s, %s)", - op, get_literal(k), get_literal(k2)); - - char text_buf[256] = {0}; - snprintf(text_buf, sizeof(text_buf), "let x = %s %s %s", - get_literal(k), op, get_literal(k2)); - - test_case(PAW_ETYPE, name_buf, text_buf); - } - } - } + char name_buf[256] = {0}; + snprintf(name_buf, sizeof(name_buf), "binop_type_error('%s', %s, %s)", + op, get_literal(k), get_literal(k2)); + + char text_buf[256] = {0}; + snprintf(text_buf, sizeof(text_buf), "let x = %s %s %s", + get_literal(k), op, get_literal(k2)); + + test_case(PAW_ETYPE, name_buf, text_buf); } -static void check_binop_type_error(const char *op, TypeSet ts, TypeSet ts2) +static void check_binop_type_errors(const char *op, paw_Type *types) { - check_binop_type_error_(op, ts, ts2); - check_binop_type_error_(op, ts2, ts); + for (int k = PAW_TUNIT; k <= PAW_TSTRING; ++k) { + for (int k2 = PAW_TUNIT; k2 <= PAW_TSTRING; ++k2) { + for (int t = *types; t >= 0; t = *++types) { + if (k == t && k2 == t) { + goto next_round; + } + } + check_binop_type_error(op, k, k2); +next_round: /* combination of types is valid, skip check */; + } + } } -#define NULL_ (1 << PAW_TNULL) -#define CONTAINER (1 << PAW_TARRAY | 1 << PAW_TMAP) -#define NON_ARITHMETIC (1 << PAW_TNULL | 1 << PAW_TSTRING | 1 << PAW_TARRAY | 1 << PAW_TMAP) -#define ARITHMETIC (1 << PAW_TINTEGER | 1 << PAW_TFLOAT) -#define FLOATING_POINT (1 << PAW_TFLOAT) - static void test_type_error(void) { - // Unary operators - check_unop_type_error("-", NON_ARITHMETIC); - check_unop_type_error("~", NON_ARITHMETIC | FLOATING_POINT); - - // Binary operators. The left-hand side might be a valid type for the operator, but the right-hand - // side will never be. - check_binop_type_error("+", NON_ARITHMETIC | ARITHMETIC, NON_ARITHMETIC); - check_binop_type_error("-", NON_ARITHMETIC | ARITHMETIC, NON_ARITHMETIC); - check_binop_type_error("*", NON_ARITHMETIC | ARITHMETIC, NON_ARITHMETIC); - check_binop_type_error("%", NON_ARITHMETIC | ARITHMETIC, NON_ARITHMETIC); - check_binop_type_error("/", NON_ARITHMETIC | ARITHMETIC, NON_ARITHMETIC); - check_binop_type_error("//", NON_ARITHMETIC | ARITHMETIC, NON_ARITHMETIC); - check_binop_type_error("++", NON_ARITHMETIC | ARITHMETIC, ARITHMETIC | CONTAINER | NULL_); - check_binop_type_error("&", NON_ARITHMETIC | ARITHMETIC, NON_ARITHMETIC | FLOATING_POINT); - check_binop_type_error("|", NON_ARITHMETIC | ARITHMETIC, NON_ARITHMETIC | FLOATING_POINT); - check_binop_type_error("^", NON_ARITHMETIC | ARITHMETIC, NON_ARITHMETIC | FLOATING_POINT); + check_unification_errors(); + + check_unop_type_error("#", PAW_TUNIT); + check_unop_type_error("#", PAW_TBOOL); + check_unop_type_error("#", PAW_TINT); + check_unop_type_error("#", PAW_TFLOAT); + check_unop_type_error("#", PAW_TSTRING); + check_unop_type_error("!", PAW_TUNIT); + check_unop_type_error("-", PAW_TUNIT); + check_unop_type_error("-", PAW_TBOOL); + check_unop_type_error("-", PAW_TSTRING); + check_unop_type_error("~", PAW_TUNIT); + check_unop_type_error("~", PAW_TBOOL); + check_unop_type_error("~", PAW_TFLOAT); + check_unop_type_error("~", PAW_TSTRING); + +#define mklist(...) (paw_Type[]){__VA_ARGS__, -1} +#define mklist0() (paw_Type[]){-1} + check_binop_type_errors("+", mklist(PAW_TINT, PAW_TFLOAT, PAW_TSTRING)); + check_binop_type_errors("-", mklist(PAW_TINT, PAW_TFLOAT)); + check_binop_type_errors("*", mklist(PAW_TINT, PAW_TFLOAT)); + check_binop_type_errors("%", mklist(PAW_TINT, PAW_TFLOAT)); + check_binop_type_errors("/", mklist(PAW_TINT, PAW_TFLOAT)); + check_binop_type_errors("&", mklist(PAW_TINT)); + check_binop_type_errors("|", mklist(PAW_TINT)); + check_binop_type_errors("^", mklist(PAW_TINT)); + check_binop_type_errors("<", mklist(PAW_TINT, PAW_TFLOAT, PAW_TSTRING)); + check_binop_type_errors(">", mklist(PAW_TINT, PAW_TFLOAT, PAW_TSTRING)); + check_binop_type_errors("<=", mklist(PAW_TINT, PAW_TFLOAT, PAW_TSTRING)); + check_binop_type_errors(">=", mklist(PAW_TINT, PAW_TFLOAT, PAW_TSTRING)); + check_binop_type_errors("==", mklist(PAW_TBOOL, PAW_TINT, PAW_TFLOAT, PAW_TSTRING)); + check_binop_type_errors("!=", mklist(PAW_TBOOL, PAW_TINT, PAW_TFLOAT, PAW_TSTRING)); } static void too_many_constants(void) @@ -171,9 +193,7 @@ static void too_many_instructions(void) for (int i = 0; i < (1 << 16); ++i) { pawL_add_fstring(P, &buf, "%d + ", i); } - // Adding 'null' to an integer causes a type error, but we should get - // a syntax error during compilation, before the 'null' is parsed. - pawL_add_string(P, &buf, "null}"); + pawL_add_string(P, &buf, "0 }"); pawL_push_result(P, &buf); const char *source = paw_string(P, -1); @@ -189,7 +209,7 @@ static void too_many_locals(void) pawL_init_buffer(P, &buf); pawL_add_string(P, &buf, "{\n"); for (int i = 0; i < (1 << 16) + 1; ++i) { - pawL_add_string(P, &buf, "let x\n"); + pawL_add_string(P, &buf, "let x = 0\n"); } pawL_add_string(P, &buf, "}"); pawL_push_result(P, &buf); @@ -240,12 +260,12 @@ static void test_syntax_error(void) { test_case(PAW_ESYNTAX, "overflow_integer", "-9223372036854775808"); test_case(PAW_ESYNTAX, "stmt_after_return", "fn f() {return; f()}"); - test_case(PAW_ESYNTAX, "missing_right_paren", "fn f(a, b, c {return [a + b + c]}"); - test_case(PAW_ESYNTAX, "missing_left_paren", "fn fa, b, c) {return [a + b + c]}"); - test_case(PAW_ESYNTAX, "missing_right_curly", "fn f(a, b, c) {return [a + b + c]"); - test_case(PAW_ESYNTAX, "missing_left_curly", "fn f(a, b, c) return [a + b + c]}"); - test_case(PAW_ESYNTAX, "missing_right_bracket", "fn f(a, b, c) {return [a + b + c}"); - test_case(PAW_ESYNTAX, "missing_left_bracket", "fn f(a, b, c) {return a + b + c]}"); + test_case(PAW_ESYNTAX, "missing_right_paren", "fn f(a: int, b: int, c: int -> int {return (a + b + c)}"); + test_case(PAW_ESYNTAX, "missing_left_paren", "fn fa: int, b: int, c: int) -> int {return (a + b + c)}"); + test_case(PAW_ESYNTAX, "missing_right_curly", "fn f(a: int, b: int, c: int) -> int {return (a + b + c)"); + test_case(PAW_ESYNTAX, "missing_left_curly", "fn f(a: int, b: int, c: int) -> int return (a + b + c)}"); + test_case(PAW_ESYNTAX, "missing_right_bracket", "fn f[A, B, C() {}"); + test_case(PAW_ESYNTAX, "missing_left_bracket", "fn fA, B, C]() {}"); // The following tests are generated, since they require a lot of text. too_many_locals(); @@ -255,13 +275,14 @@ static void test_syntax_error(void) too_far_to_loop(); } +#if 0 #define codeline(s) s "\n" static void test_line_numbers(void) { // cause errors by dividing by 0 const char *code = - /* 1 */ codeline("return fn(a, b, c, d, e, f, g, h, i, j, k, l) {") + /* 1 */ codeline("return fn(a: int, b: int, c: int, d: int, e: int, f: int, g: int, h: int, i: int, j: int, k: int, l: int) {") /* 2 */ codeline(" let x = 1/a") /* 3 */ codeline(" let func") /* 4 */ codeline(" {") @@ -321,13 +342,14 @@ static void test_line_numbers(void) } test_close(P, &s_alloc); } +#endif // 0 int main(void) { test_name_error(); test_syntax_error(); test_type_error(); - test_line_numbers(); + //test_line_numbers(); test_case(PAW_ESYNTAX, "missing_left_paren", "fn fa, b, c) {return [a + b + c]}"); test_case(PAW_ESYNTAX, "missing_right_paren", "fn f(a, b, c {return [a + b + c]}"); diff --git a/test/test_rt.c b/test/test_rt.c index 05eba15..3616e3f 100644 --- a/test/test_rt.c +++ b/test/test_rt.c @@ -8,18 +8,18 @@ static void script(const char *name) int main(void) { + script("basic"); script("types"); script("block"); + script("loop"); return 0; // TODO - script("basic"); + script("string"); script("operator"); script("integer"); script("float"); - script("loop"); script("closure"); script("array"); script("map"); - script("string"); script("class"); script("error"); script("misc");