diff --git a/.github/workflows/create-release.yml b/.github/workflows/create-release.yml index 1b86d045e..652aaef6b 100644 --- a/.github/workflows/create-release.yml +++ b/.github/workflows/create-release.yml @@ -71,6 +71,7 @@ jobs: workflow: build-ton-macos-13-x86-64-portable.yml path: artifacts workflow_conclusion: success + branch: master skip_unpack: false - name: Download and unzip arm64 artifacts diff --git a/.github/workflows/create-tolk-release.yml b/.github/workflows/create-tolk-release.yml index 370f0d791..fb8438a12 100644 --- a/.github/workflows/create-tolk-release.yml +++ b/.github/workflows/create-tolk-release.yml @@ -151,3 +151,4 @@ jobs: file: artifacts/ton-wasm.zip asset_name: ton-wasm.zip tag: ${{ inputs.tag }} + diff --git a/crypto/smartcont/tolk-stdlib/common.tolk b/crypto/smartcont/tolk-stdlib/common.tolk index dec12e233..46068a206 100644 --- a/crypto/smartcont/tolk-stdlib/common.tolk +++ b/crypto/smartcont/tolk-stdlib/common.tolk @@ -1,7 +1,7 @@ // Standard library for Tolk (LGPL licence). // It contains common functions that are available out of the box, the user doesn't have to import anything. // More specific functions are required to be imported explicitly, like "@stdlib/tvm-dicts". -tolk 0.6 +tolk 0.7 /** Tuple manipulation primitives. @@ -17,17 +17,17 @@ fun createEmptyTuple(): tuple /// Appends a value to tuple, resulting in `Tuple t' = (x1, ..., xn, value)`. /// If its size exceeds 255, throws a type check exception. @pure -fun tuplePush(mutate self: tuple, value: X): void +fun tuplePush(mutate self: tuple, value: T): void asm "TPUSH"; /// Returns the first element of a non-empty tuple. @pure -fun tupleFirst(t: tuple): X +fun tupleFirst(t: tuple): T asm "FIRST"; /// Returns the [`index`]-th element of a tuple. @pure -fun tupleAt(t: tuple, index: int): X +fun tupleAt(t: tuple, index: int): T builtin; /// Returns the size of a tuple (elements count in it). @@ -37,7 +37,7 @@ fun tupleSize(t: tuple): int /// Returns the last element of a non-empty tuple. @pure -fun tupleLast(t: tuple): int +fun tupleLast(t: tuple): T asm "LAST"; @@ -205,7 +205,7 @@ fun stringHash(s: slice): int /// That is, if [hash] is computed as the hash of some data, these data are hashed twice, /// the second hashing occurring inside `CHKSIGNS`. @pure -fun isSignatureValid(hash: int, signature: slice, publicKey: int): int +fun isSignatureValid(hash: int, signature: slice, publicKey: int): bool asm "CHKSIGNU"; /// Checks whether [signature] is a valid Ed25519-signature of the data portion of `slice data` using `publicKey`, @@ -214,7 +214,7 @@ fun isSignatureValid(hash: int, signature: slice, publicKey: int): int /// The verification of Ed25519 signatures is the standard one, /// with sha256 used to reduce [data] to the 256-bit number that is actually signed. @pure -fun isSliceSignatureValid(data: slice, signature: slice, publicKey: int): int +fun isSliceSignatureValid(data: slice, signature: slice, publicKey: int): bool asm "CHKSIGNS"; /// Generates a new pseudo-random unsigned 256-bit integer x. @@ -259,14 +259,14 @@ fun randomizeByLogicalTime(): void /// otherwise the computation is aborted before visiting the `(maxCells + 1)`-st cell and /// a zero flag is returned to indicate failure. If [c] is `null`, returns `x = y = z = 0`. @pure -fun calculateCellSize(c: cell, maxCells: int): (int, int, int, int) +fun calculateCellSize(c: cell, maxCells: int): (int, int, int, bool) asm "CDATASIZEQ NULLSWAPIFNOT2 NULLSWAPIFNOT"; /// Similar to [calculateCellSize], but accepting a `slice` [s] instead of a `cell`. /// The returned value of `x` does not take into account the cell that contains the `slice` [s] itself; /// however, the data bits and the cell references of [s] are accounted for in `y` and `z`. @pure -fun calculateSliceSize(s: slice, maxCells: int): (int, int, int, int) +fun calculateSliceSize(s: slice, maxCells: int): (int, int, int, bool) asm "SDATASIZEQ NULLSWAPIFNOT2 NULLSWAPIFNOT"; /// A non-quiet version of [calculateCellSize] that throws a cell overflow exception (`8`) on failure. @@ -306,11 +306,11 @@ fun getBuilderDepth(b: builder): int */ /// Dump a variable [x] to the debug log. -fun debugPrint(x: X): void +fun debugPrint(x: T): void builtin; /// Dump a string [x] to the debug log. -fun debugPrintString(x: X): void +fun debugPrintString(x: T): void builtin; /// Dumps the stack (at most the top 255 values) and shows the total stack depth. @@ -382,7 +382,7 @@ fun loadCoins(mutate self: slice): int /// Loads bool (-1 or 0) from a slice @pure -fun loadBool(mutate self: slice): int +fun loadBool(mutate self: slice): bool asm( -> 1 0) "1 LDI"; /// Shifts a slice pointer to [len] bits forward, mutating the slice. @@ -482,7 +482,7 @@ fun storeCoins(mutate self: builder, x: int): self /// Stores bool (-1 or 0) into a builder. /// Attention: true value is `-1`, not 1! If you pass `1` here, TVM will throw an exception. @pure -fun storeBool(mutate self: builder, x: int): self +fun storeBool(mutate self: builder, x: bool): self asm(x self) "1 STI"; /// Stores dictionary (represented by TVM `cell` or `null`) into a builder. @@ -529,22 +529,22 @@ fun getRemainingBitsAndRefsCount(self: slice): (int, int) /// Checks whether a slice is empty (i.e., contains no bits of data and no cell references). @pure -fun isEndOfSlice(self: slice): int +fun isEndOfSlice(self: slice): bool asm "SEMPTY"; /// Checks whether a slice has no bits of data. @pure -fun isEndOfSliceBits(self: slice): int +fun isEndOfSliceBits(self: slice): bool asm "SDEMPTY"; /// Checks whether a slice has no references. @pure -fun isEndOfSliceRefs(self: slice): int +fun isEndOfSliceRefs(self: slice): bool asm "SREMPTY"; /// Checks whether data parts of two slices coinside. @pure -fun isSliceBitsEqual(self: slice, b: slice): int +fun isSliceBitsEqual(self: slice, b: slice): bool asm "SDEQ"; /// Returns the number of cell references already stored in a builder. @@ -621,10 +621,10 @@ fun parseStandardAddress(s: slice): (int, int) fun createAddressNone(): slice asm "b{00} PUSHSLICE"; -/// Returns if a slice pointer contains an empty address (`-1` for true, `0` for false, as always). +/// Returns if a slice pointer contains an empty address. /// In other words, a slice starts with two `0` bits (TL addr_none$00). @pure -fun addressIsNone(s: slice): int +fun addressIsNone(s: slice): bool asm "2 PLDU" "0 EQINT"; @@ -677,8 +677,8 @@ fun loadMessageFlags(mutate self: slice): int /// Having msgFlags (4 bits), check that a message is bounced. /// Effectively, it's `msgFlags & 1` (the lowest bit present). @pure -fun isMessageBounced(msgFlags: int): int - asm "1 PUSHINT" "AND"; +fun isMessageBounced(msgFlags: int): bool + asm "2 PUSHINT" "MODR"; /// Skip 0xFFFFFFFF prefix (when a message is bounced). @pure diff --git a/crypto/smartcont/tolk-stdlib/gas-payments.tolk b/crypto/smartcont/tolk-stdlib/gas-payments.tolk index 1dc6f3f89..83893354d 100644 --- a/crypto/smartcont/tolk-stdlib/gas-payments.tolk +++ b/crypto/smartcont/tolk-stdlib/gas-payments.tolk @@ -1,5 +1,5 @@ // A part of standard library for Tolk -tolk 0.6 +tolk 0.7 /** Gas and payment related primitives. @@ -61,3 +61,9 @@ fun calculateOriginalMessageFee(workchain: int, incomingFwdFee: int): int /// If it has no debt, `0` is returned. fun getMyStorageDuePayment(): int asm "DUEPAYMENT"; + +/// Returns the amount of nanotoncoins charged for storage. +/// (during storage phase preceeding to current computation phase) +@pure +fun getMyStoragePaidPayment(): int + asm "STORAGEFEES"; diff --git a/crypto/smartcont/tolk-stdlib/lisp-lists.tolk b/crypto/smartcont/tolk-stdlib/lisp-lists.tolk index f7a721918..429f0cbfd 100644 --- a/crypto/smartcont/tolk-stdlib/lisp-lists.tolk +++ b/crypto/smartcont/tolk-stdlib/lisp-lists.tolk @@ -1,5 +1,5 @@ // A part of standard library for Tolk -tolk 0.6 +tolk 0.7 /** Lisp-style lists are nested 2-elements tuples: `(1, (2, (3, null)))` represents list `[1, 2, 3]`. diff --git a/crypto/smartcont/tolk-stdlib/tvm-dicts.tolk b/crypto/smartcont/tolk-stdlib/tvm-dicts.tolk index 9fba24d90..a47fe5426 100644 --- a/crypto/smartcont/tolk-stdlib/tvm-dicts.tolk +++ b/crypto/smartcont/tolk-stdlib/tvm-dicts.tolk @@ -1,5 +1,5 @@ // A part of standard library for Tolk -tolk 0.6 +tolk 0.7 /** Dictionaries are represented as `cell` data type (cells can store anything, dicts in particular). @@ -19,20 +19,20 @@ fun createEmptyDict(): cell /// Checks whether a dictionary is empty. @pure -fun dictIsEmpty(self: cell): int +fun dictIsEmpty(self: cell): bool asm "DICTEMPTY"; @pure -fun iDictGet(self: cell, keyLen: int, key: int): (slice, int) +fun iDictGet(self: cell, keyLen: int, key: int): (slice, bool) asm(key self keyLen) "DICTIGET" "NULLSWAPIFNOT"; @pure -fun uDictGet(self: cell, keyLen: int, key: int): (slice, int) +fun uDictGet(self: cell, keyLen: int, key: int): (slice, bool) asm(key self keyLen) "DICTUGET" "NULLSWAPIFNOT"; @pure -fun sDictGet(self: cell, keyLen: int, key: slice): (slice, int) +fun sDictGet(self: cell, keyLen: int, key: slice): (slice, bool) asm(key self keyLen) "DICTGET" "NULLSWAPIFNOT"; @@ -63,33 +63,33 @@ fun sDictSetRef(mutate self: cell, keyLen: int, key: slice, value: cell): void @pure -fun iDictSetIfNotExists(mutate self: cell, keyLen: int, key: int, value: slice): int +fun iDictSetIfNotExists(mutate self: cell, keyLen: int, key: int, value: slice): bool asm(value key self keyLen) "DICTIADD"; @pure -fun uDictSetIfNotExists(mutate self: cell, keyLen: int, key: int, value: slice): int +fun uDictSetIfNotExists(mutate self: cell, keyLen: int, key: int, value: slice): bool asm(value key self keyLen) "DICTUADD"; @pure -fun iDictSetIfExists(mutate self: cell, keyLen: int, key: int, value: slice): int +fun iDictSetIfExists(mutate self: cell, keyLen: int, key: int, value: slice): bool asm(value key self keyLen) "DICTIREPLACE"; @pure -fun uDictSetIfExists(mutate self: cell, keyLen: int, key: int, value: slice): int +fun uDictSetIfExists(mutate self: cell, keyLen: int, key: int, value: slice): bool asm(value key self keyLen) "DICTUREPLACE"; @pure -fun iDictGetRef(self: cell, keyLen: int, key: int): (cell, int) +fun iDictGetRef(self: cell, keyLen: int, key: int): (cell, bool) asm(key self keyLen) "DICTIGETREF" "NULLSWAPIFNOT"; @pure -fun uDictGetRef(self: cell, keyLen: int, key: int): (cell, int) +fun uDictGetRef(self: cell, keyLen: int, key: int): (cell, bool) asm(key self keyLen) "DICTUGETREF" "NULLSWAPIFNOT"; @pure -fun sDictGetRef(self: cell, keyLen: int, key: slice): (cell, int) +fun sDictGetRef(self: cell, keyLen: int, key: slice): (cell, bool) asm(key self keyLen) "DICTGETREF" "NULLSWAPIFNOT"; @@ -107,28 +107,28 @@ fun sDictGetRefOrNull(self: cell, keyLen: int, key: slice): cell @pure -fun iDictDelete(mutate self: cell, keyLen: int, key: int): int +fun iDictDelete(mutate self: cell, keyLen: int, key: int): bool asm(key self keyLen) "DICTIDEL"; @pure -fun uDictDelete(mutate self: cell, keyLen: int, key: int): int +fun uDictDelete(mutate self: cell, keyLen: int, key: int): bool asm(key self keyLen) "DICTUDEL"; @pure -fun sDictDelete(mutate self: cell, keyLen: int, key: slice): int +fun sDictDelete(mutate self: cell, keyLen: int, key: slice): bool asm(key self keyLen) "DICTDEL"; @pure -fun iDictSetAndGet(mutate self: cell, keyLen: int, key: int, value: slice): (slice, int) +fun iDictSetAndGet(mutate self: cell, keyLen: int, key: int, value: slice): (slice, bool) asm(value key self keyLen) "DICTISETGET" "NULLSWAPIFNOT"; @pure -fun uDictSetAndGet(mutate self: cell, keyLen: int, key: int, value: slice): (slice, int) +fun uDictSetAndGet(mutate self: cell, keyLen: int, key: int, value: slice): (slice, bool) asm(value key self keyLen) "DICTUSETGET" "NULLSWAPIFNOT"; @pure -fun sDictSetAndGet(mutate self: cell, keyLen: int, key: slice, value: slice): (slice, int) +fun sDictSetAndGet(mutate self: cell, keyLen: int, key: slice, value: slice): (slice, bool) asm(value key self keyLen) "DICTSETGET" "NULLSWAPIFNOT"; @@ -142,15 +142,15 @@ fun uDictSetAndGetRefOrNull(mutate self: cell, keyLen: int, key: int, value: cel @pure -fun iDictDeleteAndGet(mutate self: cell, keyLen: int, key: int): (slice, int) +fun iDictDeleteAndGet(mutate self: cell, keyLen: int, key: int): (slice, bool) asm(key self keyLen) "DICTIDELGET" "NULLSWAPIFNOT"; @pure -fun uDictDeleteAndGet(mutate self: cell, keyLen: int, key: int): (slice, int) +fun uDictDeleteAndGet(mutate self: cell, keyLen: int, key: int): (slice, bool) asm(key self keyLen) "DICTUDELGET" "NULLSWAPIFNOT"; @pure -fun sDictDeleteAndGet(mutate self: cell, keyLen: int, key: slice): (slice, int) +fun sDictDeleteAndGet(mutate self: cell, keyLen: int, key: slice): (slice, bool) asm(key self keyLen) "DICTDELGET" "NULLSWAPIFNOT"; @@ -168,129 +168,129 @@ fun sDictSetBuilder(mutate self: cell, keyLen: int, key: slice, value: builder): @pure -fun iDictSetBuilderIfNotExists(mutate self: cell, keyLen: int, key: int, value: builder): int +fun iDictSetBuilderIfNotExists(mutate self: cell, keyLen: int, key: int, value: builder): bool asm(value key self keyLen) "DICTIADDB"; @pure -fun uDictSetBuilderIfNotExists(mutate self: cell, keyLen: int, key: int, value: builder): int +fun uDictSetBuilderIfNotExists(mutate self: cell, keyLen: int, key: int, value: builder): bool asm(value key self keyLen) "DICTUADDB"; @pure -fun iDictSetBuilderIfExists(mutate self: cell, keyLen: int, key: int, value: builder): int +fun iDictSetBuilderIfExists(mutate self: cell, keyLen: int, key: int, value: builder): bool asm(value key self keyLen) "DICTIREPLACEB"; @pure -fun uDictSetBuilderIfExists(mutate self: cell, keyLen: int, key: int, value: builder): int +fun uDictSetBuilderIfExists(mutate self: cell, keyLen: int, key: int, value: builder): bool asm(value key self keyLen) "DICTUREPLACEB"; @pure -fun iDictDeleteFirstAndGet(mutate self: cell, keyLen: int): (int, slice, int) +fun iDictDeleteFirstAndGet(mutate self: cell, keyLen: int): (int, slice, bool) asm(-> 0 2 1 3) "DICTIREMMIN" "NULLSWAPIFNOT2"; @pure -fun uDictDeleteFirstAndGet(mutate self: cell, keyLen: int): (int, slice, int) +fun uDictDeleteFirstAndGet(mutate self: cell, keyLen: int): (int, slice, bool) asm(-> 0 2 1 3) "DICTUREMMIN" "NULLSWAPIFNOT2"; @pure -fun sDictDeleteFirstAndGet(mutate self: cell, keyLen: int): (slice, slice, int) +fun sDictDeleteFirstAndGet(mutate self: cell, keyLen: int): (slice, slice, bool) asm(-> 0 2 1 3) "DICTREMMIN" "NULLSWAPIFNOT2"; @pure -fun iDictDeleteLastAndGet(mutate self: cell, keyLen: int): (int, slice, int) +fun iDictDeleteLastAndGet(mutate self: cell, keyLen: int): (int, slice, bool) asm(-> 0 2 1 3) "DICTIREMMAX" "NULLSWAPIFNOT2"; @pure -fun uDictDeleteLastAndGet(mutate self: cell, keyLen: int): (int, slice, int) +fun uDictDeleteLastAndGet(mutate self: cell, keyLen: int): (int, slice, bool) asm(-> 0 2 1 3) "DICTUREMMAX" "NULLSWAPIFNOT2"; @pure -fun sDictDeleteLastAndGet(mutate self: cell, keyLen: int): (slice, slice, int) +fun sDictDeleteLastAndGet(mutate self: cell, keyLen: int): (slice, slice, bool) asm(-> 0 2 1 3) "DICTREMMAX" "NULLSWAPIFNOT2"; @pure -fun iDictGetFirst(self: cell, keyLen: int): (int, slice, int) +fun iDictGetFirst(self: cell, keyLen: int): (int, slice, bool) asm (-> 1 0 2) "DICTIMIN" "NULLSWAPIFNOT2"; @pure -fun uDictGetFirst(self: cell, keyLen: int): (int, slice, int) +fun uDictGetFirst(self: cell, keyLen: int): (int, slice, bool) asm (-> 1 0 2) "DICTUMIN" "NULLSWAPIFNOT2"; @pure -fun sDictGetFirst(self: cell, keyLen: int): (slice, slice, int) +fun sDictGetFirst(self: cell, keyLen: int): (slice, slice, bool) asm (-> 1 0 2) "DICTMIN" "NULLSWAPIFNOT2"; @pure -fun iDictGetFirstAsRef(self: cell, keyLen: int): (int, cell, int) +fun iDictGetFirstAsRef(self: cell, keyLen: int): (int, cell, bool) asm (-> 1 0 2) "DICTIMINREF" "NULLSWAPIFNOT2"; @pure -fun uDictGetFirstAsRef(self: cell, keyLen: int): (int, cell, int) +fun uDictGetFirstAsRef(self: cell, keyLen: int): (int, cell, bool) asm (-> 1 0 2) "DICTUMINREF" "NULLSWAPIFNOT2"; @pure -fun sDictGetFirstAsRef(self: cell, keyLen: int): (slice, cell, int) +fun sDictGetFirstAsRef(self: cell, keyLen: int): (slice, cell, bool) asm (-> 1 0 2) "DICTMINREF" "NULLSWAPIFNOT2"; @pure -fun iDictGetLast(self: cell, keyLen: int): (int, slice, int) +fun iDictGetLast(self: cell, keyLen: int): (int, slice, bool) asm (-> 1 0 2) "DICTIMAX" "NULLSWAPIFNOT2"; @pure -fun uDictGetLast(self: cell, keyLen: int): (int, slice, int) +fun uDictGetLast(self: cell, keyLen: int): (int, slice, bool) asm (-> 1 0 2) "DICTUMAX" "NULLSWAPIFNOT2"; @pure -fun sDictGetLast(self: cell, keyLen: int): (slice, slice, int) +fun sDictGetLast(self: cell, keyLen: int): (slice, slice, bool) asm (-> 1 0 2) "DICTMAX" "NULLSWAPIFNOT2"; @pure -fun iDictGetLastAsRef(self: cell, keyLen: int): (int, cell, int) +fun iDictGetLastAsRef(self: cell, keyLen: int): (int, cell, bool) asm (-> 1 0 2) "DICTIMAXREF" "NULLSWAPIFNOT2"; @pure -fun uDictGetLastAsRef(self: cell, keyLen: int): (int, cell, int) +fun uDictGetLastAsRef(self: cell, keyLen: int): (int, cell, bool) asm (-> 1 0 2) "DICTUMAXREF" "NULLSWAPIFNOT2"; @pure -fun sDictGetLastAsRef(self: cell, keyLen: int): (slice, cell, int) +fun sDictGetLastAsRef(self: cell, keyLen: int): (slice, cell, bool) asm (-> 1 0 2) "DICTMAXREF" "NULLSWAPIFNOT2"; @pure -fun iDictGetNext(self: cell, keyLen: int, pivot: int): (int, slice, int) +fun iDictGetNext(self: cell, keyLen: int, pivot: int): (int, slice, bool) asm(pivot self keyLen -> 1 0 2) "DICTIGETNEXT" "NULLSWAPIFNOT2"; @pure -fun uDictGetNext(self: cell, keyLen: int, pivot: int): (int, slice, int) +fun uDictGetNext(self: cell, keyLen: int, pivot: int): (int, slice, bool) asm(pivot self keyLen -> 1 0 2) "DICTUGETNEXT" "NULLSWAPIFNOT2"; @pure -fun iDictGetNextOrEqual(self: cell, keyLen: int, pivot: int): (int, slice, int) +fun iDictGetNextOrEqual(self: cell, keyLen: int, pivot: int): (int, slice, bool) asm(pivot self keyLen -> 1 0 2) "DICTIGETNEXTEQ" "NULLSWAPIFNOT2"; @pure -fun uDictGetNextOrEqual(self: cell, keyLen: int, pivot: int): (int, slice, int) +fun uDictGetNextOrEqual(self: cell, keyLen: int, pivot: int): (int, slice, bool) asm(pivot self keyLen -> 1 0 2) "DICTUGETNEXTEQ" "NULLSWAPIFNOT2"; @pure -fun iDictGetPrev(self: cell, keyLen: int, pivot: int): (int, slice, int) +fun iDictGetPrev(self: cell, keyLen: int, pivot: int): (int, slice, bool) asm(pivot self keyLen -> 1 0 2) "DICTIGETPREV" "NULLSWAPIFNOT2"; @pure -fun uDictGetPrev(self: cell, keyLen: int, pivot: int): (int, slice, int) +fun uDictGetPrev(self: cell, keyLen: int, pivot: int): (int, slice, bool) asm(pivot self keyLen -> 1 0 2) "DICTUGETPREV" "NULLSWAPIFNOT2"; @pure -fun iDictGetPrevOrEqual(self: cell, keyLen: int, pivot: int): (int, slice, int) +fun iDictGetPrevOrEqual(self: cell, keyLen: int, pivot: int): (int, slice, bool) asm(pivot self keyLen -> 1 0 2) "DICTIGETPREVEQ" "NULLSWAPIFNOT2"; @pure -fun uDictGetPrevOrEqual(self: cell, keyLen: int, pivot: int): (int, slice, int) +fun uDictGetPrevOrEqual(self: cell, keyLen: int, pivot: int): (int, slice, bool) asm(pivot self keyLen -> 1 0 2) "DICTUGETPREVEQ" "NULLSWAPIFNOT2"; @@ -299,13 +299,13 @@ fun uDictGetPrevOrEqual(self: cell, keyLen: int, pivot: int): (int, slice, int) */ @pure -fun prefixDictGet(self: cell, keyLen: int, key: slice): (slice, slice, slice, int) +fun prefixDictGet(self: cell, keyLen: int, key: slice): (slice, slice, slice, bool) asm(key self keyLen) "PFXDICTGETQ" "NULLSWAPIFNOT2"; @pure -fun prefixDictSet(mutate self: cell, keyLen: int, key: slice, value: slice): int +fun prefixDictSet(mutate self: cell, keyLen: int, key: slice, value: slice): bool asm(value key self keyLen) "PFXDICTSET"; @pure -fun prefixDictDelete(mutate self: cell, keyLen: int, key: slice): int +fun prefixDictDelete(mutate self: cell, keyLen: int, key: slice): bool asm(key self keyLen) "PFXDICTDEL"; diff --git a/crypto/smartcont/tolk-stdlib/tvm-lowlevel.tolk b/crypto/smartcont/tolk-stdlib/tvm-lowlevel.tolk index 91b35f2bd..ef7c2afe9 100644 --- a/crypto/smartcont/tolk-stdlib/tvm-lowlevel.tolk +++ b/crypto/smartcont/tolk-stdlib/tvm-lowlevel.tolk @@ -1,5 +1,5 @@ // A part of standard library for Tolk -tolk 0.6 +tolk 0.7 /// Usually `c3` has a continuation initialized by the whole code of the contract. It is used for function calls. /// The primitive returns the current value of `c3`. diff --git a/tolk-tester/tests/a10.tolk b/tolk-tester/tests/a10.tolk index d46397c6a..7301f1d50 100644 --- a/tolk-tester/tests/a10.tolk +++ b/tolk-tester/tests/a10.tolk @@ -35,7 +35,7 @@ fun test88(x: int) { } @method_id(89) -fun test89(last: int) { +fun test89(last: int): (int, int, int, int) { var t: tuple = createEmptyTuple(); t.tuplePush(1); t.tuplePush(2); diff --git a/tolk-tester/tests/a6.tolk b/tolk-tester/tests/a6.tolk index 7f2c39461..32fd3364c 100644 --- a/tolk-tester/tests/a6.tolk +++ b/tolk-tester/tests/a6.tolk @@ -9,6 +9,7 @@ fun calc_phi(): int { repeat (70) { n*=10; }; var p= 1; var `q`=1; + _=`q`; do { (p,q)=(q,p+q); } while (q <= n); //;; @@ -27,7 +28,7 @@ fun calc_sqrt2(): int { return mulDivRound(p, n, q); } -fun calc_root(m: auto): auto { +fun calc_root(m: int) { var base: int=1; repeat(70) { base *= 10; } var (a, b, c) = (1,0,-m); diff --git a/tolk-tester/tests/a6_5.tolk b/tolk-tester/tests/a6_5.tolk index 8b300c0c9..43fd59c5a 100644 --- a/tolk-tester/tests/a6_5.tolk +++ b/tolk-tester/tests/a6_5.tolk @@ -1,5 +1,5 @@ @deprecated -fun twice(f: auto, x: auto): auto { +fun twice(f: int -> int, x: int) { return f (f (x)); } diff --git a/tolk-tester/tests/allow_post_modification.tolk b/tolk-tester/tests/allow_post_modification.tolk index 5cfa2f3d8..e374f62b3 100644 --- a/tolk-tester/tests/allow_post_modification.tolk +++ b/tolk-tester/tests/allow_post_modification.tolk @@ -2,85 +2,112 @@ fun unsafe_tuple(x: X): tuple asm "NOP"; fun inc(x: int, y: int): (int, int) { - return (x + y, y * 10); + return (x + y, y * 10); } fun `~inc`(mutate self: int, y: int): int { - val (newX, newY) = inc(self, y); - self = newX; - return newY; + val (newX, newY) = inc(self, y); + self = newX; + return newY; } +fun eq(v: X): X { return v; } +fun eq2(v: (int, int)) { return v; } +fun mul2(mutate dest: int, v: int): int { dest = v*2; return dest; } +fun multens(mutate self: (int, int), v: (int, int)): (int, int) { var (f, s) = self; var (m1, m2) = v; self = (f*m1, s*m2); return self; } + @method_id(11) fun test_return(x: int): (int, int, int, int, int, int, int) { - return (x, x.`~inc`(x / 20), x, x = x * 2, x, x += 1, x); + return (x, x.`~inc`(x / 20), x, x = x * 2, x, x += 1, x); } @method_id(12) fun test_assign(x: int): (int, int, int, int, int, int, int) { - var (x1: int, x2: int, x3: int, x4: int, x5: int, x6: int, x7: int) = (x, x.`~inc`(x / 20), x, x=x*2, x, x+=1, x); - return (x1, x2, x3, x4, x5, x6, x7); + var (x1: int, x2: int, x3: int, x4: int, x5: int, x6: int, x7: int) = (x, x.`~inc`(x / 20), x, x=x*2, x, x+=1, x); + return (x1, x2, x3, x4, x5, x6, x7); } @method_id(13) fun test_tuple(x: int): tuple { - var t: tuple = unsafe_tuple([x, x.`~inc`(x / 20), x, x = x * 2, x, x += 1, x]); - return t; + var t: tuple = unsafe_tuple([x, x.`~inc`(x / 20), x, x = x * 2, x, x += 1, x]); + return t; } @method_id(14) fun test_tuple_assign(x: int): (int, int, int, int, int, int, int) { - var [x1: int, x2: int, x3: int, x4: int, x5: int, x6: int, x7: int] = [x, x.`~inc`(x / 20), x, x = x * 2, x, x += 1, x]; - return (x1, x2, x3, x4, x5, x6, x7); + var [x1: int, x2: int, x3: int, x4: int, x5: int, x6: int, x7: int] = [x, x.`~inc`(x / 20), x, x = x * 2, x, x += 1, x]; + return (x1, x2, x3, x4, x5, x6, x7); } fun foo1(x1: int, x2: int, x3: int, x4: int, x5: int, x6: int, x7: int): (int, int, int, int, int, int, int) { - return (x1, x2, x3, x4, x5, x6, x7); + return (x1, x2, x3, x4, x5, x6, x7); } @method_id(15) fun test_call_1(x: int): (int, int, int, int, int, int, int) { - return foo1(x, x.`~inc`(x / 20), x, x = x * 2, x, x += 1, x); + return foo1(x, x.`~inc`(x / 20), x, x = x * 2, x, x += 1, x); } fun foo2(x1: int, x2: int, x3456: (int, int, int, int), x7: int): (int, int, int, int, int, int, int) { - var (x3: int, x4: int, x5: int, x6: int) = x3456; - return (x1, x2, x3, x4, x5, x6, x7); + var (x3: int, x4: int, x5: int, x6: int) = x3456; + return (x1, x2, x3, x4, x5, x6, x7); } @method_id(16) fun test_call_2(x: int): (int, int, int, int, int, int, int) { - return foo2(x, x.`~inc`(x / 20), (x, x = x * 2, x, x += 1), x); + return foo2(x, x.`~inc`(x / 20), (x, x = x * 2, x, x += 1), x); } fun asm_func(x1: int, x2: int, x3: int, x4: int, x5: int, x6: int, x7: int): (int, int, int, int, int, int, int) -asm - (x4 x5 x6 x7 x1 x2 x3->0 1 2 3 4 5 6) "NOP"; + asm (x4 x5 x6 x7 x1 x2 x3->0 1 2 3 4 5 6) "NOP"; @method_id(17) fun test_call_asm_old(x: int): (int, int, int, int, int, int, int) { - return asm_func(x, x += 1, x, x, x.`~inc`(x / 20), x, x = x * 2); + return asm_func(x, x += 1, x, x, x.`~inc`(x / 20), x, x = x * 2); } @method_id(18) fun test_call_asm_new(x: int): (int, int, int, int, int, int, int) { - return asm_func(x, x.`~inc`(x / 20), x, x = x * 2, x, x += 1, x); + return asm_func(x, x.`~inc`(x / 20), x, x = x * 2, x, x += 1, x); } global xx: int; @method_id(19) -fun test_global(x: int): (int, int, int, int, int, int, int) { - xx = x; - return (xx, xx.`~inc`(xx / 20), xx, xx = xx * 2, xx, xx += 1, xx); +fun test_global(x: int) { + xx = x; + return (x, xx, xx.`~inc`(xx / 20), eq(xx += (x *= 0)), xx = xx * 2, xx, xx += 1, xx, x); } @method_id(20) fun test_if_else(x: int): (int, int, int, int, int) { - if (x > 10) { - return (x.`~inc`(8), x + 1, x = 1, x <<= 3, x); - } else { - xx = 9; - return (x, x.`~inc`(-4), x.`~inc`(-1), x >= 1, x = x + xx); - } + if (x > 10) { + return (x.`~inc`(8), x + 1, x = 1, x <<= 3, x); + } else { + xx = 9; + return (x, x.`~inc`(-4), x.`~inc`(-1), (x >= 1) as int, x = x + xx); + } +} + +@method_id(21) +fun test_assign_with_inner(x: int) { + return (x, x += 10, [(x, x += 20, eq(x -= 50), x)], eq2((x, x *= eq(x /= 2)))); +} + +@method_id(22) +fun test_assign_with_mutate(x: int) { + return (x, mul2(mutate x, x += 5), x.`~inc`(mul2(mutate x, x)), x); +} + +@method_id(23) +fun test_assign_tensor(x: (int, int)) { + var fs = (0, 0); + return (x, x = (20, 30), fs = x.multens((1, 2)), fs.multens(multens(mutate x, (-1, -1))), x, fs); +} + +global fs: (int, int); +@method_id(24) +fun test_assign_tensor_global(x: (int, int)) { + fs = (0, 0); + return (x, x = (20, 30), fs = x.multens((1, 2)), fs.multens(multens(mutate x, (-1, -1))), x, fs); } fun main() { @@ -96,9 +123,13 @@ fun main() { @testcase | 16 | 100 | 100 50 105 210 210 211 211 @testcase | 17 | 100 | 101 50 106 212 100 101 101 @testcase | 18 | 100 | 210 210 211 211 100 50 105 -@testcase | 19 | 100 | 100 50 105 210 210 211 211 +@testcase | 19 | 100 | 100 100 50 105 210 210 211 211 0 @testcase | 20 | 80 | 80 89 1 8 8 @testcase | 20 | 9 | 9 -40 -10 -1 13 +@testcase | 21 | 100 | 100 110 [ 110 130 80 80 ] 80 3200 +@testcase | 22 | 100 | 100 210 4200 630 +@testcase | 23 | 1 1 | 1 1 20 30 20 60 -400 -3600 -20 -60 -400 -3600 +@testcase | 24 | 1 1 | 1 1 20 30 20 60 -400 -3600 -20 -60 -400 -3600 @fif_codegen """ @@ -107,5 +138,5 @@ fun main() { inc CALLDICT // self newY }> """ -@code_hash 97139400653362069936987769894397430077752335662822462908581556703209313861576 +@code_hash 7627024945492125068389905298530400936797031708759561372406088054030801992712 */ diff --git a/tolk-tester/tests/assignment-tests.tolk b/tolk-tester/tests/assignment-tests.tolk new file mode 100644 index 000000000..89de8cf44 --- /dev/null +++ b/tolk-tester/tests/assignment-tests.tolk @@ -0,0 +1,28 @@ +fun extractFromTypedTuple(params: [int]) { + var [payload: int] = params; + return payload + 10; +} + +@method_id(101) +fun test101(x: int) { + var params = [x]; + return extractFromTypedTuple(params); +} + +fun autoInferIntNull(x: int) { + if (x > 10) { return null; } + return x; +} + +fun main(value: int) { + var (x: int, y) = (autoInferIntNull(value), autoInferIntNull(value * 2)); + if (x == null && y == null) { return null; } + return x == null || y == null ? -1 : x + y; +} + +/** +@testcase | 0 | 3 | 9 +@testcase | 0 | 6 | -1 +@testcase | 0 | 11 | (null) +@testcase | 101 | 78 | 88 +*/ diff --git a/tolk-tester/tests/bit-operators.tolk b/tolk-tester/tests/bit-operators.tolk index 049406af9..4cb8e1ba1 100644 --- a/tolk-tester/tests/bit-operators.tolk +++ b/tolk-tester/tests/bit-operators.tolk @@ -1,20 +1,20 @@ -fun lshift(): int { +fun lshift(): bool { return (1 << 0) == 1; } -fun rshift(): int { +fun rshift(): bool { return (1 >> 0) == 1; } -fun lshift_var(i: int): int { +fun lshift_var(i: int): bool { return (1 << i) == 1; } -fun rshift_var(i: int): int { +fun rshift_var(i: int): bool { return (1 >> i) == 1; } -fun main(x: int): int { +fun main(x: int): bool { if (x == 0) { return lshift(); } else if (x == 1) { @@ -31,12 +31,71 @@ fun main(x: int): int { } @method_id(11) -fun is_claimed(index: int): int { +fun is_claimed(index: int): bool { var claim_bit_index: int = index % 256; var mask: int = 1 << claim_bit_index; return (255 & mask) == mask; } +@method_id(12) +fun bit_not(i: int, b: bool): (int, bool, bool, bool, int, bool) { + var i2 = ~i; + var b2 = !b; + var (i3: int, b3: bool) = (i2, b2); + return (i3, b3, !i, !b, ~~~i, !!!b); +} + +@method_id(13) +fun boolWithBitwiseConst() { + var found = true; + return (found & false, found | true, found ^ true, found & found); +} + +global g14: int; +fun getBool() { return (g14 += 1) > 2; } + +@method_id(14) +fun boolWithBitwise(b: bool) { + g14 = 0; + return (b & getBool(), !b & getBool(), b | getBool(), !b | getBool(), b ^ getBool(), !b & getBool(), g14); +} + +@method_id(15) +fun boolWithBitwiseSet(b1: bool, b2: bool) { + b1 &= b2; + b2 |= true; + b1 |= b1 == false; + b2 ^= (b1 ^= b2); + return (b1, b2); +} + +@method_id(16) +fun testDoUntilCodegen(i: bool, n: int) { + var cnt = 0; + do { cnt += 1; } while (i); + do { cnt += 1; } while (!!i); + do { cnt += 1; } while (n); + return (cnt, !i, !n); +} + +@method_id(17) +fun testConstNegateCodegen() { + return (!0, !1, !true, !false, !!true, !!false); +} + +@method_id(18) +fun testBoolNegateOptimized(x: bool) { + return (x, !x, !!x, !!!x, !!!!true); +} + +fun eqX(x: bool) { return x; } + +@method_id(19) +fun testBoolCompareOptimized(x: bool) { + return (x == true, x != true, eqX(x) == false, eqX(x) != false, !!(x == !false)); +} + + /** method_id | in | out @@ -50,4 +109,96 @@ fun is_claimed(index: int): int { @testcase | 11 | 1 | -1 @testcase | 11 | 256 | -1 @testcase | 11 | 8 | 0 +@testcase | 12 | 0 0 | -1 -1 -1 -1 -1 -1 +@testcase | 12 | -1 -1 | 0 0 0 0 0 0 +@testcase | 12 | 7 0 | -8 -1 0 -1 -8 -1 +@testcase | 14 | -1 | 0 0 -1 -1 0 0 6 +@testcase | 14 | 0 | 0 0 -1 -1 -1 -1 6 +@testcase | 15 | -1 -1 | 0 -1 +@testcase | 15 | -1 0 | 0 -1 +@testcase | 16 | 0 0 | 3 -1 -1 +@testcase | 17 | | -1 0 0 -1 -1 0 +@testcase | 18 | 0 | 0 -1 0 -1 -1 +@testcase | 18 | -1 | -1 0 -1 0 -1 +@testcase | 19 | 0 | 0 -1 -1 0 0 +@testcase | 19 | -1 | -1 0 0 -1 -1 + +@fif_codegen +""" + boolWithBitwiseConst PROC:<{ + // + 0 PUSHINT // _3 + -1 PUSHINT // _3 _5 + 0 PUSHINT // _3 _5 _7 + -1 PUSHINT // _3 _5 _7 _8 + }> +""" + +@fif_codegen +""" + testDoUntilCodegen PROC:<{ + // i n + 0 PUSHINT // i n cnt=0 + UNTIL:<{ + INC // i n cnt + s2 PUSH // i n cnt i + NOT // i n cnt _6 + }> // i n cnt + UNTIL:<{ + INC // i n cnt + s2 PUSH // i n cnt i + NOT // i n cnt _9 + }> // i n cnt + UNTIL:<{ + INC // i n cnt + OVER // i n cnt n + 0 EQINT // i n cnt _12 + }> // i n cnt + s0 s2 XCHG // cnt n i + NOT // cnt n _13 + SWAP // cnt _13 n + 0 EQINT // cnt _13 _14 + }> +""" + +@fif_codegen +""" + testConstNegateCodegen PROC:<{ + // + TRUE // _0 + FALSE // _0 _1 + FALSE // _0 _1 _2 + TRUE // _0 _1 _2 _3 + TRUE // _0 _1 _2 _3 _4 + FALSE // _0 _1 _2 _3 _4 _5 + }> +""" + +@fif_codegen +""" + testBoolNegateOptimized PROC:<{ + // x + DUP // x x + NOT // x _1 + OVER // x _1 x + NOT // x _1 _2 + s2 s(-1) PUXC + TRUE // x _1 x _2 _3 + }> +""" + +@fif_codegen +""" + testBoolCompareOptimized PROC:<{ + // x + DUP // x x + NOT // x _1 + OVER // x _1 x + eqX CALLDICT // x _1 _2 + NOT // x _1 _3 + s2 PUSH // x _1 _3 x + eqX CALLDICT // x _1 _3 _4 + s3 PUSH // x _1 _3 _4 x + }> +""" */ diff --git a/tolk-tester/tests/c2.tolk b/tolk-tester/tests/c2.tolk index ec8d32da4..257aba5b8 100644 --- a/tolk-tester/tests/c2.tolk +++ b/tolk-tester/tests/c2.tolk @@ -1,20 +1,21 @@ global op: (int, int) -> int; -fun check_assoc(a: int, b: int, c: int): int { +fun check_assoc(a: int, b: int, c: int): bool { return op(op(a, b), c) == op(a, op(b, c)); } -fun unnamed_args(_: int, _: slice, _: auto): auto { +fun unnamed_args(_: int, _: slice, _: int) { return true; } -fun main(x: int, y: int, z: int): int { +fun main(x: int, y: int, z: int): bool { op = `_+_`; + if (0) { return null; } return check_assoc(x, y, z); } @method_id(101) -fun test101(x: int, z: int): auto { +fun test101(x: int, z: int) { return unnamed_args(x, "asdf", z); } diff --git a/tolk-tester/tests/c2_1.tolk b/tolk-tester/tests/c2_1.tolk index 4e52b9eeb..ef1e589ad 100644 --- a/tolk-tester/tests/c2_1.tolk +++ b/tolk-tester/tests/c2_1.tolk @@ -1,8 +1,8 @@ -fun check_assoc(op: auto, a: int, b: int, c: int) { +fun check_assoc(op: (int, int) -> int, a: int, b: int, c: int) { return op(op(a, b), c) == op(a, op(b, c)); } -fun main(x: int, y: int, z: int): int { +fun main(x: int, y: int, z: int): bool { return check_assoc(`_+_`, x, y, z); } diff --git a/tolk-tester/tests/cells-slices.tolk b/tolk-tester/tests/cells-slices.tolk index e1d28b8b1..6f316f2e6 100644 --- a/tolk-tester/tests/cells-slices.tolk +++ b/tolk-tester/tests/cells-slices.tolk @@ -162,8 +162,8 @@ fun test13() { } @method_id(114) -fun test110(x: int) { - var s = beginCell().storeBool(x < 0).storeBool(0).storeBool(x).endCell().beginParse(); +fun test110(x: bool) { + var s = beginCell().storeBool(x == true).storeBool(false).storeBool(x).endCell().beginParse(); return (s.loadBool(), s.loadBool(), s.loadBool()); } @@ -179,15 +179,15 @@ fun test111() { if (s.addressIsNone()) { s.skipBits(2); } - if (s.loadBool() == 0) { - assert(s.loadBool() == 0) throw 444; + if (s.loadBool() == false) { + assert(!s.loadBool()) throw 444; s.skipBouncedPrefix(); } var op2 = s.loadMessageOp(); var q2 = s.loadMessageQueryId(); s.skipBits(64); s.assertEndOfSlice(); - assert(isMessageBounced(0x001)) throw 444; + assert(isMessageBounced(0x001) && !isMessageBounced(0x002)) throw 444; return (op1, q1, op2, q2); } @@ -216,15 +216,15 @@ Note, that since 'compute-asm-ltr' became on be default, chaining methods codege """ test6 PROC:<{ // - NEWC // _1 - 1 PUSHINT // _1 _2=1 - SWAP // _2=1 _1 + NEWC // _0 + 1 PUSHINT // _0 _1=1 + SWAP // _1=1 _0 32 STU // _0 - 2 PUSHINT // _0 _6=2 - SWAP // _6=2 _0 + 2 PUSHINT // _0 _5=2 + SWAP // _5=2 _0 32 STU // _0 - 3 PUSHINT // _0 _10=3 - SWAP // _10=3 _0 + 3 PUSHINT // _0 _9=3 + SWAP // _9=3 _0 32 STU // _0 }> """ diff --git a/tolk-tester/tests/codegen_check_demo.tolk b/tolk-tester/tests/codegen_check_demo.tolk index 02379540c..e40f03779 100644 --- a/tolk-tester/tests/codegen_check_demo.tolk +++ b/tolk-tester/tests/codegen_check_demo.tolk @@ -1,7 +1,7 @@ @method_id(101) fun test1(): int { - var x = false; - if (x == true) { + var x: int = false as int; + if (x == true as int) { x= 100500; } return x; @@ -35,7 +35,7 @@ Below, I just give examples of @fif_codegen tag: """ main PROC:<{ // s - 17 PUSHINT // s _3=17 + 17 PUSHINT // s _1=17 OVER // s z=17 t WHILE:<{ ... diff --git a/tolk-tester/tests/generics-1.tolk b/tolk-tester/tests/generics-1.tolk new file mode 100644 index 000000000..0d872cc19 --- /dev/null +++ b/tolk-tester/tests/generics-1.tolk @@ -0,0 +1,150 @@ +fun eq1(value: X): X { return value; } +fun eq2(value: X) { return value; } +fun eq3(value: X): X { var cp: [X] = [eq1(value)]; var ((([v: X]))) = cp; return v; } +fun eq4(value: X) { return eq1(value); } + +@method_id(101) +fun test101(x: int) { + var (a, b, c) = (x, (x,x), [x,x]); + return (eq1(a), eq1(b), eq1(c), eq2(a), eq2(b), eq2(c), eq3(a), eq4(b), eq3(createEmptyTuple())); +} + +fun getTwo(): X { return 2 as X; } + +fun takeInt(a: int) { return a; } + +@method_id(102) +fun test102(): (int, int, int, [(int, int)]) { + var a: int = getTwo(); + var _: int = getTwo(); + var b = getTwo() as int; + var c: int = 1 ? getTwo() : getTwo(); + var c redef = getTwo(); + return (eq1(a), eq2(b), takeInt(getTwo()), [(getTwo(), getTwo())]); +} + +@method_id(103) +fun test103(first: int): (int, int, int) { + var t = createEmptyTuple(); + var cs = beginCell().storeInt(100, 32).endCell().beginParse(); + t.tuplePush(first); + t.tuplePush(2); + t.tuplePush(cs); + cs = t.tupleAt(2); + cs = t.tupleAt(2) as slice; + return (t.tupleAt(0), cs.loadInt(32), t.tupleAt(2).loadInt(32)); +} + +fun manyEq(a: T1, b: T2, c: T3): [T1, T2, T3] { + return [a, b, c]; +} + +@method_id(104) +fun test104(f: int) { + return ( + manyEq(1 ? 1 : 1, f ? 0 : null, !f ? getTwo() as int : null), + manyEq((f ? null as int : eq2(2), beginCell().storeBool(true).endCell().beginParse().loadBool()), 0, eq4(f)) + ); +} + +fun calcSum(x: X, y: X) { return x + y; } + +@method_id(105) +fun test105() { + if (0) { calcSum(((0)), null); } + return (calcSum(1, 2)); +} + +fun calcYPlus1(value: Y) { return value + 1; } +fun calcLoad32(cs: slice) { return cs.loadInt(32); } +fun calcTensorPlus1(tens: (int, int)) { var (f, s) = tens; return (f + 1, s + 1); } +fun calcTensorMul2(tens: (int, int)) { var (f, s) = tens; return (f * 2, s * 2); } +fun cellToSlice(c: cell) { return c.beginParse(); } +fun abstractTransform(xToY: (X) -> Y, yToR: (((Y))) -> R, initialX: X): R { + var y = xToY(initialX); + return yToR(y); +} + +@method_id(106) +fun test106() { + var c = beginCell().storeInt(106, 32).endCell(); + return [ + abstractTransform(cellToSlice, calcLoad32, c), + abstractTransform(calcYPlus1, calcYPlus1, 0), + abstractTransform(calcTensorPlus1, calcTensorMul2, (2, 2)) + ]; +} + +fun callTupleFirst(t: X): Y { return t.tupleFirst(); } +fun callTuplePush(mutate self: T, v1: V, v2: V): self { self.tuplePush(v1); tuplePush(mutate self, v2); return self; } +fun getTupleLastInt(t: tuple) { return t.tupleLast(); } +fun getTupleSize(t: tuple) { return t.tupleSize(); } +fun callAnyFn(f: (TObj) -> TResult, arg: TObj) { return f(arg); } +fun callAnyFn2(f: TCallback, arg: tuple) { return f(arg); } + +global t107: tuple; + +@method_id(107) +fun test107() { + t107 = createEmptyTuple(); + callTuplePush(mutate t107, 1, 2); + t107.callTuplePush(3, 4).callTuplePush(5, 6); + var first: int = t107.callTupleFirst(); + return ( + callAnyFn(getTupleSize, t107), + callAnyFn2(getTupleSize, t107), + first, + callTupleFirst(t107) as int, + callAnyFn(getTupleLastInt, t107), + callAnyFn2(getTupleLastInt, t107) + ); +} + +global g108: int; + +fun inc108(by: int) { g108 += by; } +fun getInc108() { return inc108; } +fun returnResult(f: () -> RetT): RetT { return f(); } +fun applyAndReturn(f: () -> (ArgT) -> RetT, arg: ArgT): () -> ArgT -> RetT { + f()(arg); + return f; +} + +@method_id(108) +fun test108() { + g108 = 0; + getInc108()(1); + returnResult<(int) -> void>(getInc108)(2); + applyAndReturn(getInc108, 10)()(10); + returnResult(getInc108)(2); + applyAndReturn(getInc108, 10)()(10); + return g108; +} + +fun main(x: int): (int, [[int, int]]) { + try { if(x) { throw (1, x); } } + catch (excNo, arg) { return (arg as int, [[eq2(arg as int), getTwo()]]); } + return (0, [[x, 1]]); +} + +/** +@testcase | 0 | 1 | 1 [ [ 1 2 ] ] +@testcase | 101 | 0 | 0 0 0 [ 0 0 ] 0 0 0 [ 0 0 ] 0 0 0 [] +@testcase | 102 | | 2 2 2 [ 2 2 ] +@testcase | 103 | 0 | 0 100 100 +@testcase | 104 | 0 | [ 1 (null) 2 ] [ 2 -1 0 0 ] +@testcase | 105 | | 3 +@testcase | 106 | | [ 106 2 6 6 ] +@testcase | 107 | | 6 6 1 1 6 6 +@testcase | 108 | | 45 + +@fif_codegen DECLPROC eq1 +@fif_codegen DECLPROC eq1 +@fif_codegen DECLPROC eq1<(int,int)> +@fif_codegen DECLPROC eq1<[int,int]> +@fif_codegen DECLPROC getTwo + +@fif_codegen_avoid DECLPROC eq1 +@fif_codegen_avoid DECLPROC eq2 +@fif_codegen_avoid DECLPROC eq3 + */ diff --git a/tolk-tester/tests/imports/use-dicts.tolk b/tolk-tester/tests/imports/use-dicts.tolk index 26a9a9ccd..c9d5dcfea 100644 --- a/tolk-tester/tests/imports/use-dicts.tolk +++ b/tolk-tester/tests/imports/use-dicts.tolk @@ -11,7 +11,7 @@ fun prepareDict_3_30_4_40_5_x(valueAt5: int): cell { fun lookupIdxByValue(idict32: cell, value: int): int { var cur_key = -1; do { - var (cur_key redef, cs: slice, found: int) = idict32.iDictGetNext(32, cur_key); + var (cur_key redef, cs: slice, found: bool) = idict32.iDictGetNext(32, cur_key); // one-line condition (via &) doesn't work, since right side is calculated immediately if (found) { if (cs.loadInt(32) == value) { diff --git a/tolk-tester/tests/invalid-call-1.tolk b/tolk-tester/tests/invalid-call-1.tolk index 1c32422ee..3542f5809 100644 --- a/tolk-tester/tests/invalid-call-1.tolk +++ b/tolk-tester/tests/invalid-call-1.tolk @@ -1,9 +1,10 @@ -fun main() { - return true(); +const asdf = 1; + +fun main(x: int) { + return x.asdf(); } /** @compilation_should_fail -The message is weird now, but later I'll rework error messages anyway. -@stderr cannot apply expression of type int to an expression of type (): cannot unify type () -> ??3 with int +@stderr calling a non-function */ diff --git a/tolk-tester/tests/invalid-call-5.tolk b/tolk-tester/tests/invalid-call-5.tolk index 89ab026a9..32905cd77 100644 --- a/tolk-tester/tests/invalid-call-5.tolk +++ b/tolk-tester/tests/invalid-call-5.tolk @@ -8,6 +8,6 @@ fun main() { /** @compilation_should_fail -@stderr rvalue expected +@stderr `_` can't be used as a value; it's a placeholder for a left side of assignment @stderr inc(_) */ diff --git a/tolk-tester/tests/invalid-call-9.tolk b/tolk-tester/tests/invalid-call-9.tolk new file mode 100644 index 000000000..87eb61e84 --- /dev/null +++ b/tolk-tester/tests/invalid-call-9.tolk @@ -0,0 +1,10 @@ +fun getOne() { return 1; } + +fun main() { + return getOne(); +} + +/** +@compilation_should_fail +@stderr calling a not generic function with generic T + */ diff --git a/tolk-tester/tests/invalid-const-1.tolk b/tolk-tester/tests/invalid-const-1.tolk new file mode 100644 index 000000000..10e8303ad --- /dev/null +++ b/tolk-tester/tests/invalid-const-1.tolk @@ -0,0 +1,8 @@ +fun main() { + return 9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999; +} + +/** +@compilation_should_fail +@stderr invalid integer constant + */ diff --git a/tolk-tester/tests/invalid-declaration-11.tolk b/tolk-tester/tests/invalid-declaration-11.tolk new file mode 100644 index 000000000..75ebb450b --- /dev/null +++ b/tolk-tester/tests/invalid-declaration-11.tolk @@ -0,0 +1,13 @@ +// this function is declared incorrectly, +// since it should return 2 values onto a stack (1 for returned slice, 1 for mutated int) +// but contains not 2 numbers in asm ret_order +fun loadAddress2(mutate self: int): slice + asm( -> 1 0 2) "LDMSGADDR"; + +fun main(){} + +/** +@compilation_should_fail +@stderr ret_order (after ->) expected to contain 2 numbers +@stderr asm( -> 1 0 2) + */ diff --git a/tolk-tester/tests/invalid-declaration-12.tolk b/tolk-tester/tests/invalid-declaration-12.tolk new file mode 100644 index 000000000..25ae9de60 --- /dev/null +++ b/tolk-tester/tests/invalid-declaration-12.tolk @@ -0,0 +1,16 @@ +fun proxy(x: int) { + return factorial(x); +} + +fun factorial(x: int) { + if (x <= 0) { + return 1; + } + return x * proxy(x-1); +} + +/** +@compilation_should_fail +@stderr could not infer return type of `factorial`, because it appears in a recursive call chain +@stderr fun factorial + */ diff --git a/tolk-tester/tests/invalid-declaration-13.tolk b/tolk-tester/tests/invalid-declaration-13.tolk new file mode 100644 index 000000000..758a4f21d --- /dev/null +++ b/tolk-tester/tests/invalid-declaration-13.tolk @@ -0,0 +1,7 @@ +const c: slice = 123 + 456; + +/** +@compilation_should_fail +@stderr expression type does not match declared type +@stderr const c + */ diff --git a/tolk-tester/tests/invalid-generics-1.tolk b/tolk-tester/tests/invalid-generics-1.tolk new file mode 100644 index 000000000..c8ff7fec3 --- /dev/null +++ b/tolk-tester/tests/invalid-generics-1.tolk @@ -0,0 +1,10 @@ +fun f(v: int, x: X) {} + +fun failCantDeduceWithoutArgument() { + return f(1); +} + +/** +@compilation_should_fail +@stderr can not deduce X for generic function `f` + */ diff --git a/tolk-tester/tests/invalid-generics-10.tolk b/tolk-tester/tests/invalid-generics-10.tolk new file mode 100644 index 000000000..c7f72bf4d --- /dev/null +++ b/tolk-tester/tests/invalid-generics-10.tolk @@ -0,0 +1,9 @@ +fun invalidReferencingGenericMethodWithoutGeneric() { + var t = createEmptyTuple(); + var cb = t.tupleLast; +} + +/** +@compilation_should_fail +@stderr can not use a generic function `tupleLast` as non-call + */ diff --git a/tolk-tester/tests/invalid-generics-11.tolk b/tolk-tester/tests/invalid-generics-11.tolk new file mode 100644 index 000000000..a399bc917 --- /dev/null +++ b/tolk-tester/tests/invalid-generics-11.tolk @@ -0,0 +1,11 @@ +global gVar: int; + +fun main() { + var x = gVar; + return x; +} + +/** +@compilation_should_fail +@stderr generic T not expected here + */ diff --git a/tolk-tester/tests/invalid-generics-2.tolk b/tolk-tester/tests/invalid-generics-2.tolk new file mode 100644 index 000000000..155944338 --- /dev/null +++ b/tolk-tester/tests/invalid-generics-2.tolk @@ -0,0 +1,10 @@ +fun f(v: int, x: T) {} + +fun failCantDeduceWithPlainNull() { + return f(0, null); +} + +/** +@compilation_should_fail +@stderr can not deduce T for generic function `f` + */ diff --git a/tolk-tester/tests/invalid-generics-3.tolk b/tolk-tester/tests/invalid-generics-3.tolk new file mode 100644 index 000000000..72b7df0ec --- /dev/null +++ b/tolk-tester/tests/invalid-generics-3.tolk @@ -0,0 +1,11 @@ +fun f(x: T, y: T) {} + +fun failIncompatibleTypesForT() { + return f(32, ""); +} + +/** +@compilation_should_fail +@stderr T is both int and slice for generic function `f` +@stderr f(32 + */ diff --git a/tolk-tester/tests/invalid-generics-4.tolk b/tolk-tester/tests/invalid-generics-4.tolk new file mode 100644 index 000000000..07472ba37 --- /dev/null +++ b/tolk-tester/tests/invalid-generics-4.tolk @@ -0,0 +1,10 @@ +fun f(x: T): void asm "NOP"; + +fun failInstantiatingAsmFunctionWithNon1Slot() { + f((1, 2)); +} + +/** +@compilation_should_fail +@stderr can not call `f` with T=(int, int), because it occupies 2 stack slots in TVM, not 1 + */ diff --git a/tolk-tester/tests/invalid-generics-5.tolk b/tolk-tester/tests/invalid-generics-5.tolk new file mode 100644 index 000000000..4d4f29674 --- /dev/null +++ b/tolk-tester/tests/invalid-generics-5.tolk @@ -0,0 +1,10 @@ +fun f(x: T): void asm "NOP"; + +fun failUsingGenericFunctionPartially() { + var cb = f; +} + +/** +@compilation_should_fail +@stderr can not use a generic function `f` as non-call + */ diff --git a/tolk-tester/tests/invalid-generics-6.tolk b/tolk-tester/tests/invalid-generics-6.tolk new file mode 100644 index 000000000..73e6403fd --- /dev/null +++ b/tolk-tester/tests/invalid-generics-6.tolk @@ -0,0 +1,10 @@ +fun eq(t: X) { return t; } + +fun failUsingGenericFunctionPartially() { + var cb = createEmptyTuple().eq().eq().tuplePush; +} + +/** +@compilation_should_fail +@stderr can not use a generic function `tuplePush` as non-call + */ diff --git a/tolk-tester/tests/invalid-generics-7.tolk b/tolk-tester/tests/invalid-generics-7.tolk new file mode 100644 index 000000000..b51bb82cd --- /dev/null +++ b/tolk-tester/tests/invalid-generics-7.tolk @@ -0,0 +1,18 @@ +fun failOnInstantiation(a: slice) { + var b: slice = foo(a); +} + +fun bar(value: X) : X { + return 1; +} +fun foo(value: X) : X { + return bar(value); +} + +/** +@compilation_should_fail +@stderr while instantiating generic function `foo` +@stderr while instantiating generic function `bar` +@stderr can not convert type `int` to return type `slice` +@stderr return 1 + */ diff --git a/tolk-tester/tests/invalid-generics-8.tolk b/tolk-tester/tests/invalid-generics-8.tolk new file mode 100644 index 000000000..d2c24e532 --- /dev/null +++ b/tolk-tester/tests/invalid-generics-8.tolk @@ -0,0 +1,11 @@ +fun withT1T2(a: (T1, T2)) {} + +fun wrongTCountPassed() { + withT1T2((5, "")); +} + +/** +@compilation_should_fail +@stderr wrong count of generic T: expected 2, got 1 +@stderr + */ diff --git a/tolk-tester/tests/invalid-generics-9.tolk b/tolk-tester/tests/invalid-generics-9.tolk new file mode 100644 index 000000000..73fd6f87c --- /dev/null +++ b/tolk-tester/tests/invalid-generics-9.tolk @@ -0,0 +1,8 @@ +fun invalidProvidingGenericTsToNotGeneric() { + beginCell(); +} + +/** +@compilation_should_fail +@stderr calling a not generic function with generic T + */ diff --git a/tolk-tester/tests/invalid-mutate-1.tolk b/tolk-tester/tests/invalid-mutate-1.tolk index 237940fc9..280d1e998 100644 --- a/tolk-tester/tests/invalid-mutate-1.tolk +++ b/tolk-tester/tests/invalid-mutate-1.tolk @@ -7,5 +7,5 @@ fun cantAssignToVal() { /** @compilation_should_fail -@stderr modifying an immutable variable `x` +@stderr modifying immutable variable `x` */ diff --git a/tolk-tester/tests/invalid-mutate-11.tolk b/tolk-tester/tests/invalid-mutate-11.tolk index 9f2c2601e..dfc69851c 100644 --- a/tolk-tester/tests/invalid-mutate-11.tolk +++ b/tolk-tester/tests/invalid-mutate-11.tolk @@ -4,5 +4,5 @@ fun load32(self: slice): int { /** @compilation_should_fail -@stderr modifying `self` (call a mutating method), which is immutable by default +@stderr modifying `self`, which is immutable by default */ diff --git a/tolk-tester/tests/invalid-mutate-16.tolk b/tolk-tester/tests/invalid-mutate-16.tolk new file mode 100644 index 000000000..9da6e2534 --- /dev/null +++ b/tolk-tester/tests/invalid-mutate-16.tolk @@ -0,0 +1,9 @@ +fun cantCallMutatingFunctionWithAssignmentLValue() { + var t: tuple = createEmptyTuple(); + (t = createEmptyTuple()).tuplePush(1); +} + +/** +@compilation_should_fail +@stderr assignment can not be used as lvalue + */ diff --git a/tolk-tester/tests/invalid-mutate-17.tolk b/tolk-tester/tests/invalid-mutate-17.tolk new file mode 100644 index 000000000..9327f07d8 --- /dev/null +++ b/tolk-tester/tests/invalid-mutate-17.tolk @@ -0,0 +1,13 @@ +@pure +fun tupleMut(mutate self: tuple): int + asm "TLEN"; + +fun main() { + var t = createEmptyTuple(); + return [[t.tupleMut]]; +} + +/** +@compilation_should_fail +@stderr saving `tupleMut` into a variable is impossible, since it has `mutate` parameters + */ diff --git a/tolk-tester/tests/invalid-mutate-2.tolk b/tolk-tester/tests/invalid-mutate-2.tolk index 7501fdaf5..71afe7300 100644 --- a/tolk-tester/tests/invalid-mutate-2.tolk +++ b/tolk-tester/tests/invalid-mutate-2.tolk @@ -6,5 +6,5 @@ fun cantAssignToVal() { /** @compilation_should_fail -@stderr modifying an immutable variable `x` +@stderr modifying immutable variable `x` */ diff --git a/tolk-tester/tests/invalid-mutate-3.tolk b/tolk-tester/tests/invalid-mutate-3.tolk index c49973f71..d556c9ed9 100644 --- a/tolk-tester/tests/invalid-mutate-3.tolk +++ b/tolk-tester/tests/invalid-mutate-3.tolk @@ -7,5 +7,5 @@ fun cantAssignToConst() { /** @compilation_should_fail -@stderr modifying an immutable variable `op_increase` +@stderr modifying immutable constant */ diff --git a/tolk-tester/tests/invalid-mutate-4.tolk b/tolk-tester/tests/invalid-mutate-4.tolk index f25a707cb..5f2c111d5 100644 --- a/tolk-tester/tests/invalid-mutate-4.tolk +++ b/tolk-tester/tests/invalid-mutate-4.tolk @@ -10,5 +10,5 @@ fun cantPassToMutatingFunction() { /** @compilation_should_fail -@stderr modifying an immutable variable `myVal` +@stderr modifying immutable variable `myVal` */ diff --git a/tolk-tester/tests/invalid-mutate-5.tolk b/tolk-tester/tests/invalid-mutate-5.tolk index fd8d11924..2b282cf0d 100644 --- a/tolk-tester/tests/invalid-mutate-5.tolk +++ b/tolk-tester/tests/invalid-mutate-5.tolk @@ -9,6 +9,6 @@ fun cantCallMutatingMethod(c: cell) { /** @compilation_should_fail -@stderr modifying an immutable variable `s` (call a mutating method) +@stderr modifying immutable variable `s` @stderr s.loadUint */ diff --git a/tolk-tester/tests/invalid-mutate-6.tolk b/tolk-tester/tests/invalid-mutate-6.tolk index bb577ae47..749d9cab2 100644 --- a/tolk-tester/tests/invalid-mutate-6.tolk +++ b/tolk-tester/tests/invalid-mutate-6.tolk @@ -11,6 +11,6 @@ fun cantCallMutatingFunctionWithImmutable() { /** @compilation_should_fail -@stderr modifying an immutable variable `op_increase` (call a mutating function) +@stderr modifying immutable constant @stderr inc(mutate op_increase) */ diff --git a/tolk-tester/tests/invalid-mutate-7.tolk b/tolk-tester/tests/invalid-mutate-7.tolk index 5b6b6afe4..de3bce454 100644 --- a/tolk-tester/tests/invalid-mutate-7.tolk +++ b/tolk-tester/tests/invalid-mutate-7.tolk @@ -10,6 +10,6 @@ fun cantCallMutatingFunctionWithRvalue() { /** @compilation_should_fail -@stderr lvalue expected (call a mutating function) +@stderr literal can not be used as lvalue @stderr incBoth(mutate x, mutate 30) */ diff --git a/tolk-tester/tests/invalid-mutate-8.tolk b/tolk-tester/tests/invalid-mutate-8.tolk index 0dd7c5687..9b14e28f7 100644 --- a/tolk-tester/tests/invalid-mutate-8.tolk +++ b/tolk-tester/tests/invalid-mutate-8.tolk @@ -6,5 +6,5 @@ fun cantRedefImmutable() { /** @compilation_should_fail -@stderr modifying an immutable variable `x` (left side of assignment) +@stderr `redef` for immutable variable */ diff --git a/tolk-tester/tests/invalid-mutate-9.tolk b/tolk-tester/tests/invalid-mutate-9.tolk index 7e79052e4..3489a2889 100644 --- a/tolk-tester/tests/invalid-mutate-9.tolk +++ b/tolk-tester/tests/invalid-mutate-9.tolk @@ -4,6 +4,6 @@ fun increment(self: int) { /** @compilation_should_fail -@stderr modifying `self` (left side of assignment), which is immutable by default +@stderr modifying `self`, which is immutable by default @stderr probably, you want to declare `mutate self` */ diff --git a/tolk-tester/tests/invalid-nopar-4.tolk b/tolk-tester/tests/invalid-nopar-4.tolk index 6e833f995..033c483ec 100644 --- a/tolk-tester/tests/invalid-nopar-4.tolk +++ b/tolk-tester/tests/invalid-nopar-4.tolk @@ -4,5 +4,5 @@ fun load_u32(cs: slice): (slice, int) { /** @compilation_should_fail -@stderr expected `(`, got `32` +@stderr expected `;`, got `32` */ diff --git a/tolk-tester/tests/invalid-pure-1.tolk b/tolk-tester/tests/invalid-pure-1.tolk index 5baa32922..4f0e9142a 100644 --- a/tolk-tester/tests/invalid-pure-1.tolk +++ b/tolk-tester/tests/invalid-pure-1.tolk @@ -4,7 +4,7 @@ fun f_pure(): int { return f_impure(); } -fun f_impure(): int {} +fun f_impure(): int { return 0; } fun main(): int { return f_pure(); diff --git a/tolk-tester/tests/invalid-pure-3.tolk b/tolk-tester/tests/invalid-pure-3.tolk index f64b81ce7..31d4f0213 100644 --- a/tolk-tester/tests/invalid-pure-3.tolk +++ b/tolk-tester/tests/invalid-pure-3.tolk @@ -2,6 +2,7 @@ fun validate_input(input: cell): (int, int) { var (x, y, z, correct) = calculateCellSize(input, 10); assert(correct) throw 102; + return (x, y); } @pure diff --git a/tolk-tester/tests/invalid-redefinition-6.tolk b/tolk-tester/tests/invalid-redefinition-6.tolk new file mode 100644 index 000000000..e6b087c64 --- /dev/null +++ b/tolk-tester/tests/invalid-redefinition-6.tolk @@ -0,0 +1,10 @@ +const s1 = "asdf"; + +fun main() { + var s1 redef = "d"; +} + +/** +@compilation_should_fail +@stderr `redef` for unknown variable + */ diff --git a/tolk-tester/tests/invalid-self-4.tolk b/tolk-tester/tests/invalid-self-4.tolk index f4856a465..0be6b9e4d 100644 --- a/tolk-tester/tests/invalid-self-4.tolk +++ b/tolk-tester/tests/invalid-self-4.tolk @@ -4,6 +4,6 @@ fun cantReturnNothingFromSelf(mutate self: int): self { /** @compilation_should_fail -@stderr missing return; forgot `return self`? +@stderr missing return @stderr } */ diff --git a/tolk-tester/tests/invalid-syntax-3.tolk b/tolk-tester/tests/invalid-syntax-3.tolk index 26ce82ac5..259ea7958 100644 --- a/tolk-tester/tests/invalid-syntax-3.tolk +++ b/tolk-tester/tests/invalid-syntax-3.tolk @@ -4,5 +4,5 @@ fun main(x: int) { /** @compilation_should_fail -@stderr null is not a function: use `null`, not `null()` +@stderr calling a non-function */ diff --git a/tolk-tester/tests/invalid.tolk b/tolk-tester/tests/invalid-syntax-5.tolk similarity index 100% rename from tolk-tester/tests/invalid.tolk rename to tolk-tester/tests/invalid-syntax-5.tolk diff --git a/tolk-tester/tests/invalid-syntax-6.tolk b/tolk-tester/tests/invalid-syntax-6.tolk new file mode 100644 index 000000000..12e026459 --- /dev/null +++ b/tolk-tester/tests/invalid-syntax-6.tolk @@ -0,0 +1,9 @@ +fun main() { + var a = 1; + (a += 1) += 2; +} + +/** +@compilation_should_fail +@stderr assignment can not be used as lvalue +*/ diff --git a/tolk-tester/tests/invalid-syntax-7.tolk b/tolk-tester/tests/invalid-syntax-7.tolk new file mode 100644 index 000000000..9f63ac104 --- /dev/null +++ b/tolk-tester/tests/invalid-syntax-7.tolk @@ -0,0 +1,9 @@ +fun main() { + var x = 1; + x += (var y = 2); +} + +/** +@compilation_should_fail +@stderr expected , got `var` +*/ diff --git a/tolk-tester/tests/invalid-typing-1.tolk b/tolk-tester/tests/invalid-typing-1.tolk index a0fe296d8..0089bd62f 100644 --- a/tolk-tester/tests/invalid-typing-1.tolk +++ b/tolk-tester/tests/invalid-typing-1.tolk @@ -6,5 +6,5 @@ fun main() { /** @compilation_should_fail @stderr .tolk:2 -@stderr expected , got `scli` +@stderr unknown type name `scli` */ diff --git a/tolk-tester/tests/invalid-typing-10.tolk b/tolk-tester/tests/invalid-typing-10.tolk new file mode 100644 index 000000000..8c1df4a26 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-10.tolk @@ -0,0 +1,8 @@ +fun failMathOnBoolean(c: cell) { + return (null == c) * 10; +} + +/** +@compilation_should_fail +@stderr can not apply operator `*` to `bool` and `int` + */ diff --git a/tolk-tester/tests/invalid-typing-11.tolk b/tolk-tester/tests/invalid-typing-11.tolk new file mode 100644 index 000000000..d6aa09c30 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-11.tolk @@ -0,0 +1,11 @@ +fun failBitwiseNotOnBool() { + var eq = 1 == 0; + if (~eq) { + return 0; + } +} + +/** +@compilation_should_fail +@stderr can not apply operator `~` to `bool` + */ diff --git a/tolk-tester/tests/invalid-typing-12.tolk b/tolk-tester/tests/invalid-typing-12.tolk new file mode 100644 index 000000000..3a5b1fe28 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-12.tolk @@ -0,0 +1,10 @@ +fun failAssignNullToTensor() { + var ab = (1, 2); + ab = null; + return ab; +} + +/** +@compilation_should_fail +@stderr can not assign `null` to variable of type `(int, int)` + */ diff --git a/tolk-tester/tests/invalid-typing-2.tolk b/tolk-tester/tests/invalid-typing-2.tolk index d7c6745f5..052596e4c 100644 --- a/tolk-tester/tests/invalid-typing-2.tolk +++ b/tolk-tester/tests/invalid-typing-2.tolk @@ -1,9 +1,9 @@ fun main() { - var tri: (int, bool) = (10, false); + var tri: (int, int) = (10, false); return; } /** @compilation_should_fail -@stderr bool type is not supported yet +@stderr can not assign `(int, bool)` to variable of type `(int, int)` */ diff --git a/tolk-tester/tests/invalid-typing-3.tolk b/tolk-tester/tests/invalid-typing-3.tolk index fb4b0bc51..ac019a421 100644 --- a/tolk-tester/tests/invalid-typing-3.tolk +++ b/tolk-tester/tests/invalid-typing-3.tolk @@ -15,5 +15,5 @@ fun cantMixDifferentThis() { /** @compilation_should_fail -@stderr cannot apply function appendBuilder : builder -> (builder, ()) to arguments of type int: cannot unify type int with builder +@stderr can not call method for `builder` with object of type `int` */ diff --git a/tolk-tester/tests/invalid-typing-4.tolk b/tolk-tester/tests/invalid-typing-4.tolk index 0e6553690..1ee71290c 100644 --- a/tolk-tester/tests/invalid-typing-4.tolk +++ b/tolk-tester/tests/invalid-typing-4.tolk @@ -7,8 +7,6 @@ fun cantCallNotChainedMethodsInAChain(x: int) { } /** -The error is very weird, but nevertheless, the type system prevents of doing such errors. - @compilation_should_fail -@stderr cannot apply function incNotChained : int -> (int, ()) to arguments of type (): cannot unify type () with int +@stderr can not call method for `int` with object of type `void` */ diff --git a/tolk-tester/tests/invalid-typing-5.tolk b/tolk-tester/tests/invalid-typing-5.tolk index ba3450de2..9d8cd480d 100644 --- a/tolk-tester/tests/invalid-typing-5.tolk +++ b/tolk-tester/tests/invalid-typing-5.tolk @@ -7,8 +7,7 @@ fun failWhenReturnANotChainedValue(x: int): int { } /** -The error is very weird, but nevertheless, the type system prevents of doing such errors. - @compilation_should_fail -@stderr previous function return type int cannot be unified with return statement expression type (): cannot unify type () with int +@stderr x.incNotChained() +@stderr can not convert type `void` to return type `int` */ diff --git a/tolk-tester/tests/invalid-typing-6.tolk b/tolk-tester/tests/invalid-typing-6.tolk new file mode 100644 index 000000000..f2e99c7dd --- /dev/null +++ b/tolk-tester/tests/invalid-typing-6.tolk @@ -0,0 +1,8 @@ +fun failWhenTernaryConditionNotInt(cs: slice) { + return cs ? 1 : 0; +} + +/** +@compilation_should_fail +@stderr can not use `slice` as a boolean condition + */ diff --git a/tolk-tester/tests/invalid-typing-7.tolk b/tolk-tester/tests/invalid-typing-7.tolk new file mode 100644 index 000000000..c192a05b8 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-7.tolk @@ -0,0 +1,9 @@ +fun failAssignPlainNullToVariable() { + var x = null; +} + +/** +@compilation_should_fail +@stderr can not infer type of `x`, it's always null +@stderr specify its type with `x: ` or use `null as ` + */ diff --git a/tolk-tester/tests/invalid-typing-8.tolk b/tolk-tester/tests/invalid-typing-8.tolk new file mode 100644 index 000000000..d696e1320 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-8.tolk @@ -0,0 +1,8 @@ +fun failExplicitCastIncompatible(c: cell) { + return c as slice; +} + +/** +@compilation_should_fail +@stderr type `cell` can not be cast to `slice` + */ diff --git a/tolk-tester/tests/invalid-typing-9.tolk b/tolk-tester/tests/invalid-typing-9.tolk new file mode 100644 index 000000000..a0d5ee04e --- /dev/null +++ b/tolk-tester/tests/invalid-typing-9.tolk @@ -0,0 +1,13 @@ +fun getTupleLastGetter(): tuple -> X { + return tupleLast; +} + +fun failTypeMismatch() { + var t = createEmptyTuple(); + var c: cell = getTupleLastGetter()(t); +} + +/** +@compilation_should_fail +@stderr can not assign `int` to variable of type `cell` + */ diff --git a/tolk-tester/tests/logical-operators.tolk b/tolk-tester/tests/logical-operators.tolk index e9774f3f4..fb437bb34 100644 --- a/tolk-tester/tests/logical-operators.tolk +++ b/tolk-tester/tests/logical-operators.tolk @@ -1,14 +1,14 @@ import "imports/use-dicts.tolk" fun simpleAllConst() { - return (!0, !!0 & !false, !!!0, !1, !!1, !-1, !!-1, (!5 == 0) == !0, !0 == true); + return (!0, !!0 & !false, !!!0, !1, !!1, !-1, !!-1, (!5 as int == 0) == !0, !0 == true); } fun compileTimeEval1(x: int) { // todo now compiler doesn't understand that bool can't be equal to number other than 0/-1 // (but understands that it can't be positive) // that's why for now, the last condition is evaluated at runtime - return (!x, !x > 10, !x < 10, !!x == 5, !x == -10); + return (!x, !x as int > 10, (!x as int) < 10, !!x as int == 5, !x as int == -10); } @method_id(101) @@ -23,13 +23,13 @@ fun withAndOr(x: int, y: int, z: int) { var return_at_end = -1; if (!x & !y) { if (!z & !y) { return 10; } - else if (z | !!y) { return_at_end = 20; } + else if ((z != 0) | !!y) { return_at_end = 20; } } else if (!!x & !!y & !z) { if (!z & (x > 10)) { return_at_end = 30; } if ((x != 11) & !z) { return 40; } return_at_end = 50; } else { - return_at_end = !x ? !y : !z | 1; + return_at_end = !x ? !y as int : (!z as int) | 1; } return return_at_end; } @@ -54,7 +54,8 @@ fun testDict(last: int) { @method_id(105) fun testNotNull(x: int) { - return [x == null, null == x, !(x == null), null == null, +(null != null)]; + // return [x == null, null == x, !(x == null), null == null, +(null != null)]; + return [x == null, null == x, !(x == null)]; } @method_id(106) @@ -123,6 +124,31 @@ fun testLogicalOps2(first: int) { return (s.getRemainingBitsCount(), sum); } +@method_id(112) +fun mixLogicalIntsAndBools(first: int, cond: bool) { + return ( + (first && cond) || (!first && cond), + ((first & -1) & cond as int) == ((first && true) && cond) as int, + 7 && cond, + first || cond || !cond || alwaysThrows(), + cond || first || !first || alwaysThrows() + ); +} + +@method_id(113) +fun testConvertIfToIfnot(x: bool) { + assert(!!(x == false), 100); + assert(!x, 100); + if (x == !!false) { + return 1; + } + if (!!(x != !false)) { + return 1; + } + assert(!!x, 100); + return -4; +} + fun main() { } @@ -144,8 +170,8 @@ fun main() { @testcase | 104 | 50 | 3 5 -1 @testcase | 104 | 100 | 3 5 5 @testcase | 104 | 0 | 3 -1 5 -@testcase | 105 | 0 | [ 0 0 -1 -1 0 ] -@testcase | 105 | null | [ -1 -1 0 -1 0 ] +@testcase | 105 | 0 | [ 0 0 -1 ] +@testcase | 105 | null | [ -1 -1 0 ] @testcase | 106 | | [ 0 0 0 -1 ] [ 0 0 0 ] [ -1 -1 -1 ] [ 0 -1 ] @testcase | 107 | | [ -1 -1 0 -1 ] [ 0 0 0 ] [ -1 -1 -1 ] [ -1 0 ] @testcase | 108 | 1 2 | -1 @@ -159,18 +185,21 @@ fun main() { @testcase | 110 | 500 | -1 -1 0 -1 -1 3 @testcase | 111 | 0 | 32 4 @testcase | 111 | -1 | 0 8 +@testcase | 112 | 5 0 | 0 -1 0 -1 -1 +@testcase | 112 | 0 -1 | -1 -1 -1 -1 -1 +@testcase | 113 | 0 | 1 @fif_codegen """ simpleAllConst PROC:<{ // - -1 PUSHINT - 0 PUSHINT - -1 PUSHINT - 0 PUSHINT - -1 PUSHINT + TRUE 0 PUSHINT - -1 PUSHINT + TRUE + FALSE + TRUE + FALSE + TRUE TRUE TRUE }> @@ -292,4 +321,27 @@ These are moments of future optimizations. For now, it's more than enough. }> """ +@fif_codegen +""" + testConvertIfToIfnot PROC:<{ + // x + DUP // x x + 100 THROWIF + DUP // x x + 100 THROWIF + DUP // x x + IFNOTJMP:<{ // x + DROP // + 1 PUSHINT // _7=1 + }> // x + DUP // x x + IFNOTJMP:<{ // x + DROP // + 1 PUSHINT // _8=1 + }> // x + 100 THROWIFNOT + -4 PUSHINT // _12=-4 + }> +""" + */ diff --git a/tolk-tester/tests/mutate-methods.tolk b/tolk-tester/tests/mutate-methods.tolk index b9184ca9a..816e4c8d2 100644 --- a/tolk-tester/tests/mutate-methods.tolk +++ b/tolk-tester/tests/mutate-methods.tolk @@ -118,12 +118,19 @@ fun updateTwoItems(mutate self: (int, int), byValue: int) { self = (first + byValue, second + byValue); } +global t107_1: int; +global t107_2: int; + @method_id(107) fun testMutableTensor() { var t = (40, 50); t.updateTwoItems(10); updateTwoItems(mutate t, 10); - return t; + t107_1 = 1; + t107_2 = 2; + (t107_1, t107_2).updateTwoItems(10); + updateTwoItems(mutate (t107_1, t107_2), 10); + return (t, t107_1, t107_2); } @pure @@ -147,7 +154,7 @@ fun getSumOfNumbersInCell(c: cell): int { @method_id(110) fun testStoreChaining() { - var b = beginCell().storeUint(1, 32).storeUint(2, 32).storeUint(3, 32); + var b = ((beginCell()).storeUint(1, 32)).storeUint(2, 32).storeUint(3, 32); b.storeUint(4, 32); b.myStoreUint(5, 32).storeUint(6, 32); storeUint(mutate b, 7, 32); @@ -191,7 +198,7 @@ fun testStoreAndMutateBoth() { b.myStoreU32_and_mutate_x(mutate x); var cs: slice = b.endCell().beginParse(); - var (n1,n2,n3,n4,n5) = (cs.loadUint(32),cs.loadUint(32),cs.loadUint(32),cs.loadUint(32),cs.loadUint(32)); + var (n1,n2,n3,n4,n5) = (cs.loadUint(32),((cs)).loadUint(32),cs.loadUint(32),cs.loadUint(32),cs.loadUint(32)); assert(n5 == x) throw 100; return [n1,n2,n3,n4,n5]; @@ -278,7 +285,7 @@ fun main(){} @testcase | 104 | | 1 2 3 @testcase | 105 | | 5 5 110 @testcase | 106 | | 160 110 -@testcase | 107 | | 60 70 +@testcase | 107 | | 60 70 21 22 @testcase | 110 | | 320 @testcase | 111 | | 55 55 @testcase | 112 | | [ 1 13 3 23 33 ] @@ -300,7 +307,7 @@ fun main(){} ... incrementTwoInPlace CALLDICT // x y sum1 -ROT - 10 PUSHINT // sum1 x y _9=10 + 10 PUSHINT // sum1 x y _8=10 incrementTwoInPlace CALLDICT // sum1 x y sum2 s1 s3 s0 XCHG3 // x y sum1 sum2 }> @@ -310,8 +317,8 @@ fun main(){} """ load_next PROC:<{ // cs - 32 LDI // _1 cs - SWAP // cs _1 + 32 LDI // _3 cs + SWAP // cs _3 }> """ @@ -319,7 +326,7 @@ fun main(){} """ testStoreUintPureUnusedResult PROC:<{ // - 0 PUSHINT // _12=0 + 0 PUSHINT // _11=0 }> """ @@ -330,7 +337,7 @@ fun main(){} NEWC // b STIX // _2 DROP // - 0 PUSHINT // _12=0 + 0 PUSHINT // _11=0 }> """ diff --git a/tolk-tester/tests/no-spaces.tolk b/tolk-tester/tests/no-spaces.tolk index 0d4c3b678..da7338989 100644 --- a/tolk-tester/tests/no-spaces.tolk +++ b/tolk-tester/tests/no-spaces.tolk @@ -22,7 +22,7 @@ global `some()var`:int; return `a`*-1*-(1)*---(1)*+just10()+-`just10`()*m1*-m1+-eq(m1)----0x1; } -@method_id(112) fun `bitwise~ops`(flags:int):[int,int] { +@method_id(112) fun `bitwise~ops`(flags:int):[bool,bool] { return[ (just10()-3==just10()-(4)--1)|((2==2)&(eq(eq(10)) -3==just10()--13)), ((flags&0xFF)!=0) diff --git a/tolk-tester/tests/null-keyword.tolk b/tolk-tester/tests/null-keyword.tolk index cdfe5acf9..9ace99956 100644 --- a/tolk-tester/tests/null-keyword.tolk +++ b/tolk-tester/tests/null-keyword.tolk @@ -7,12 +7,14 @@ fun test1() { numbers = listPrepend(2, numbers); numbers = listPrepend(3, numbers); numbers = listPrepend(4, numbers); - var (h, numbers redef) = listSplit(numbers); + var (h: int, numbers redef) = listSplit(numbers); h += listGetHead(numbers); + _ = null; + (_, _) = (null, null); var t = createEmptyTuple(); do { - var num = numbers.listNext(); + var num: int = numbers.listNext(); t.tuplePush(num); } while (numbers != null); @@ -44,7 +46,7 @@ fun test3(x: int) { } fun getUntypedNull() { - var untyped = null; + var untyped: null = null; if (true) { return untyped; } @@ -52,8 +54,8 @@ fun getUntypedNull() { } @method_id(104) -fun test4() { - var (_, (_, untyped)) = (3, (createEmptyTuple, null)); +fun test4(): null { + var (_, (_, untyped: null)) = (3, (createEmptyTuple, null)); if (true) { return untyped; } @@ -62,21 +64,16 @@ fun test4() { @method_id(105) fun test5() { - var n = getUntypedNull(); + var n: slice = getUntypedNull(); return !(null == n) ? n.loadInt(32) : 100; } -@method_id(106) -fun test6(x: int) { - return x > null; // this compiles (for now), but fails at runtime -} - @method_id(107) fun test7() { var b = beginCell().storeMaybeRef(null); var s = b.endCell().beginParse(); var c = s.loadMaybeRef(); - return (null == c) * 10 + (b != null); + return (null == c) as int * 10 + (b != null) as int; } fun main() { @@ -132,27 +129,18 @@ fun main() { }> """ -@fif_codegen -""" - test6 PROC:<{ - // x - PUSHNULL // x _1 - GREATER // _2 - }> -""" - @fif_codegen """ test7 PROC:<{ ... - LDOPTREF // b _20 _19 + LDOPTREF // b _18 _17 DROP // b c - ISNULL // b _13 - 10 MULCONST // b _15 - SWAP // _15 b - ISNULL // _15 _16 - 0 EQINT // _15 _17 - ADD // _18 + ISNULL // b _11 + 10 MULCONST // b _13 + SWAP // _13 b + ISNULL // _13 _14 + NOT // _13 _15 + ADD // _16 }> """ */ diff --git a/tolk-tester/tests/op_priority.tolk b/tolk-tester/tests/op-priority.tolk similarity index 73% rename from tolk-tester/tests/op_priority.tolk rename to tolk-tester/tests/op-priority.tolk index e4f97b759..8a57b3940 100644 --- a/tolk-tester/tests/op_priority.tolk +++ b/tolk-tester/tests/op-priority.tolk @@ -1,4 +1,4 @@ -fun justTrue(): int { return true; } +fun justTrue(): bool { return true; } fun unary_minus_1(a: int, b: int, c: int): int{return -(a+b) *c;} fun unary_minus_2(a: int, b: int, c: int): int{return(-(a+b))*c;} @@ -6,17 +6,17 @@ fun unary_minus_3(a: int, b: int, c: int): int{return-((a+b) *c);} @method_id(101) -fun test1(x: int, y: int, z: int): int { +fun test1(x: int, y: int, z: int): bool { return (x > 0) & (y > 0) & (z > 0); } @method_id(102) -fun test2(x: int, y: int, z: int): int { - return x > (0 & (y > 0) & (z > 0)); +fun test2(x: int, y: int, z: int): bool { + return x > (0 & (y > 0) as int & (z > 0) as int); } @method_id(103) -fun test3(x: int, y: int, z: int): int { +fun test3(x: int, y: int, z: int): bool { if ((x < 0) | (y < 0)) { return z < 0; } @@ -24,29 +24,29 @@ fun test3(x: int, y: int, z: int): int { } @method_id(104) -fun test4(x: int, y: int, mode: int): int { +fun test4(x: int, y: int, mode: int): bool { if (mode == 1) { return (x == 10) | (y == 20); } if (mode == 2) { return (x == 10) | (y == 20); } else { - return x == (10 | (y == 20)); + return x == (10 | (y == 20) as int); } } @method_id(105) -fun test5(status: int): int { - return justTrue() & (status == 1) & ((justTrue() & status) == 1); +fun test5(status: int): bool { + return justTrue() & (status == 1) & ((justTrue() as int & status) == 1); } @method_id(106) -fun test6(a: int, b: int, c: int): int { +fun test6(a: int, b: int, c: int): bool { return (unary_minus_1(a,b,c) == unary_minus_2(a,b,c)) & (unary_minus_1(a,b,c) == unary_minus_3(a,b,c)); } @method_id(107) fun test7(b: int): int { - var a = b == 3 ? 3 : b == 4 ? 4 : (b == 5) & 1 ? 5 : 100; + var a = b == 3 ? 3 : b == 4 ? 4 : (b == 5) & true ? 5 : 100; return a; } @@ -56,14 +56,14 @@ fun test8(b: int): int { return a; } -fun `_ 0, 3 & (3 > 0), 3 & (`_<_`(3, 0)), - 3 & `_ 0, 3 & (3 > 0) as int, 3 & (`_<_`(3, 0)), + 3 & `_ int) { return used_as_noncall2; } +fun receiveGetter(): () -> int { return used_as_noncall2; } @pure fun usedButOptimizedOut(x: int): int { return x + 2; } diff --git a/tolk-tester/tests/self-keyword.tolk b/tolk-tester/tests/self-keyword.tolk index a339e7d01..b0567696d 100644 --- a/tolk-tester/tests/self-keyword.tolk +++ b/tolk-tester/tests/self-keyword.tolk @@ -158,6 +158,44 @@ fun testNotMutatingChainableSelfMutateAnother(initial: int) { return (arg, c108, c109, x); } +fun pickG110(mutate self: int, mutate pushTo: tuple): self { + self += 10; + pushTo.tuplePush(c110); + return self; +} + +global tup110: tuple; +global c110: int; + +@method_id(110) +fun testMutateGlobalsLValue(init: int) { + c110 = init; + tup110 = createEmptyTuple(); + c110.incChained().incChained().pickG110(mutate tup110).incChained().pickG110(mutate tup110).incChained(); + return (c110, tup110); +} + +fun myTuplePush(mutate self: tuple, value: T): self { + self.tuplePush(value); + return self; +} + +fun myTupleAt(self: tuple, idx: int): T { + return self.tupleAt(idx); +} + +global tup111: tuple; + +@method_id(111) +fun testForallFunctionsWithSelf(): (int, int, tuple) { + var t = createEmptyTuple(); + tup111 = createEmptyTuple(); + t.myTuplePush(10); + tup111.myTuplePush(1).myTuplePush(2).myTuplePush(3); + return (t.myTupleAt(0), tup111.myTupleAt(tup111.tupleSize() - 1), tup111); +} + + fun main() { } @@ -179,6 +217,8 @@ fun main() { } @testcase | 109 | 200 | 200 3 1 2 @testcase | 109 | 100 | 100 0 0 1 @testcase | 109 | 102 | 102 2 1 2 +@testcase | 110 | 0 | 24 [ 2 13 ] +@testcase | 111 | | 10 3 [ 1 2 3 ] @fif_codegen """ diff --git a/tolk-tester/tests/test-math.tolk b/tolk-tester/tests/test-math.tolk index 893035fde..6b147c776 100644 --- a/tolk-tester/tests/test-math.tolk +++ b/tolk-tester/tests/test-math.tolk @@ -218,7 +218,7 @@ fun fixed248_log2_const(): int { @pure @inline fun Pi_const_f254(): int { - var (c: auto, _) = Pi_xconst_f254(); + var (c, _) = Pi_xconst_f254(); return c; } @@ -661,7 +661,7 @@ fun fixed248_pow(x: int, y: int): int { return 1 << 248; // x^0 = 1 } if (x <= 0) { - var bad: int = (x | y) < 0; + var bad: int = ((x | y) < 0) as int; return 0 >> bad; // 0^y = 0 if x=0 and y>=0; "out of range" exception otherwise } var (l, s) = log2_aux_f256(x); @@ -677,7 +677,7 @@ fun fixed248_pow(x: int, y: int): int { // now log_2(x^y) = y*log_2(x) = q + ll, ss integer, ll fixed257, -1/2<=ll<1/2 var sq: int = q + 248; if (sq <= 0) { - return -(sq == 0); // underflow + return -((sq == 0) as int); // underflow } y = expm1_f257(mulrshiftr256(ll, log2_const_f256())); return (y ~>> (9 - q)) - (-1 << sq); @@ -986,7 +986,7 @@ fun tset(mutate self: tuple, idx: int, value: X): void // fixed256 acos_prepare_slow(fixed255 x); @inline fun acos_prepare_slow_f255(x: int): int { - x -= (x == 0); + x -= (x == 0) as int; var t: int = 1; repeat (255) { t = t * sign(x) * 2 + 1; // decode Gray code (sign(x_0), sign(x_1), ...) @@ -1019,7 +1019,8 @@ fun test_nrand(n: int): tuple { repeat (n) { var x: int = fixed248_nrand(); var bucket: int = (abs(x) >> 243); // 255 buckets starting from x=0, each 1/32 wide - t.tset(bucket, t.tupleAt(bucket) + 1); + var at_bucket: int = t.tupleAt(bucket); + t.tset(bucket, at_bucket + 1); } return t; } diff --git a/tolk-tester/tests/try-func.tolk b/tolk-tester/tests/try-func.tolk index 7963a8500..dfd72e9e9 100644 --- a/tolk-tester/tests/try-func.tolk +++ b/tolk-tester/tests/try-func.tolk @@ -1,7 +1,3 @@ -fun unsafeGetInt(any: X): int - asm "NOP"; - -@method_id(11) fun foo(x: int): int { try { if (x == 7) { @@ -14,7 +10,6 @@ fun foo(x: int): int { } @inline -@method_id(12) fun foo_inline(x: int): int { try { assert(!(x == 7)) throw 44; @@ -25,36 +20,34 @@ fun foo_inline(x: int): int { } @inline_ref -@method_id(13) fun foo_inlineref(x: int): int { try { if (x == 7) { throw (44, 2); } return x; } catch (_, arg) { - return unsafeGetInt(arg); + return arg as int; } } -@method_id(1) +@method_id(101) fun test(x: int, y: int, z: int): int { y = foo(y); return x * 100 + y * 10 + z; } -@method_id(2) +@method_id(102) fun test_inline(x: int, y: int, z: int): int { y = foo_inline(y); return x * 100 + y * 10 + z; } -@method_id(3) +@method_id(103) fun test_inlineref(x: int, y: int, z: int): int { y = foo_inlineref(y); return x * 100 + y * 10 + z; } @inline -@method_id(14) fun foo_inline_big( x1: int, x2: int, x3: int, x4: int, x5: int, x6: int, x7: int, x8: int, x9: int, x10: int, x11: int, x12: int, x13: int, x14: int, x15: int, x16: int, x17: int, x18: int, x19: int, x20: int @@ -69,7 +62,7 @@ fun foo_inline_big( } } -@method_id(4) +@method_id(104) fun test_inline_big(x: int, y: int, z: int): int { y = foo_inline_big( y, y + 1, y + 2, y + 3, y + 4, y + 5, y + 6, y + 7, y + 8, y + 9, @@ -77,7 +70,6 @@ fun test_inline_big(x: int, y: int, z: int): int { return x * 1000000 + y * 1000 + z; } -@method_id(15) fun foo_big( x1: int, x2: int, x3: int, x4: int, x5: int, x6: int, x7: int, x8: int, x9: int, x10: int, x11: int, x12: int, x13: int, x14: int, x15: int, x16: int, x17: int, x18: int, x19: int, x20: int @@ -88,11 +80,11 @@ fun foo_big( } return x1 + x2 + x3 + x4 + x5 + x6 + x7 + x8 + x9 + x10 + x11 + x12 + x13 + x14 + x15 + x16 + x17 + x18 + x19 + x20; } catch (code, arg) { - return unsafeGetInt(arg); + return arg as int; } } -@method_id(5) +@method_id(105) fun test_big(x: int, y: int, z: int): int { y = foo_big( y, y + 1, y + 2, y + 3, y + 4, y + 5, y + 6, y + 7, y + 8, y + 9, @@ -100,7 +92,7 @@ fun test_big(x: int, y: int, z: int): int { return x * 1000000 + y * 1000 + z; } -@method_id(16) +@method_id(106) fun test_catch_into_same(x: int): int { var code = x; try { @@ -112,7 +104,7 @@ fun test_catch_into_same(x: int): int { } -@method_id(17) +@method_id(107) fun test_catch_into_same_2(x: int): int { var code = x; try { @@ -124,28 +116,77 @@ fun test_catch_into_same_2(x: int): int { return code; } +global after046: int; + +// this bug existed in FunC and is fixed in v0.4.6 +fun bug_046_internal(op: int) { + if (op == 1) { + return; + } else if (op == 2) { + return; + } else { + throw 1; + } +} + +fun bug_046_called() { + after046 = 0; + try { + bug_046_internal(1337); + after046 = 1; // shouldn't be called + } catch(n) { + return; + } + return; +} + +@method_id(108) +fun bug_046_entrypoint() { + bug_046_called(); + return after046; +} + +global g_reg: int; + +@method_id(109) +fun test109(): (int, int) { + var l_reg = 10; + g_reg = 10; + try { + // note, that regardless of assignment, an exception RESTORES them to previous (to 10) + // it's very unexpected, but is considered to be a TVM feature, not a bug + g_reg = 999; + l_reg = 999; + bug_046_internal(999); // throws + } catch { + } + // returns (10,10) because of an exception, see a comment above + return (g_reg, l_reg); +} + fun main() { } /** - method_id | in | out -@testcase | 1 | 1 2 3 | 123 -@testcase | 1 | 3 8 9 | 389 -@testcase | 1 | 3 7 9 | 329 -@testcase | 2 | 1 2 3 | 123 -@testcase | 2 | 3 8 9 | 389 -@testcase | 2 | 3 7 9 | 329 -@testcase | 3 | 1 2 3 | 123 -@testcase | 3 | 3 8 9 | 389 -@testcase | 3 | 3 7 9 | 329 -@testcase | 4 | 4 8 9 | 4350009 -@testcase | 4 | 4 7 9 | 4001009 -@testcase | 5 | 4 8 9 | 4350009 -@testcase | 5 | 4 7 9 | 4001009 -@testcase | 16 | 5 | 5 -@testcase | 16 | 20 | 44 -@testcase | 17 | 5 | 5 -@testcase | 17 | 20 | 20 - -@code_hash 73240939343624734070640372352271282883450660826541545137654364443860257436623 + method_id | in | out +@testcase | 101 | 1 2 3 | 123 +@testcase | 101 | 3 8 9 | 389 +@testcase | 101 | 3 7 9 | 329 +@testcase | 102 | 1 2 3 | 123 +@testcase | 102 | 3 8 9 | 389 +@testcase | 102 | 3 7 9 | 329 +@testcase | 103 | 1 2 3 | 123 +@testcase | 103 | 3 8 9 | 389 +@testcase | 103 | 3 7 9 | 329 +@testcase | 104 | 4 8 9 | 4350009 +@testcase | 104 | 4 7 9 | 4001009 +@testcase | 105 | 4 8 9 | 4350009 +@testcase | 105 | 4 7 9 | 4001009 +@testcase | 106 | 5 | 5 +@testcase | 106 | 20 | 44 +@testcase | 107 | 5 | 5 +@testcase | 107 | 20 | 20 +@testcase | 108 | | 0 + +@code_hash 39307974281105539319288356721945232226028429128341177951717392648324358675585 */ diff --git a/tolk-tester/tests/unbalanced_ret_loops.tolk b/tolk-tester/tests/unbalanced_ret_loops.tolk index 9b59339d8..292b48dac 100644 --- a/tolk-tester/tests/unbalanced_ret_loops.tolk +++ b/tolk-tester/tests/unbalanced_ret_loops.tolk @@ -38,7 +38,7 @@ fun foo_until(x: int): int { } @method_id(4) -fun test4(x: int): (int, int) { +fun test4(x: int): (int, bool) { var s = 0; var reached = false; do { diff --git a/tolk-tester/tests/unreachable-1.tolk b/tolk-tester/tests/unreachable-1.tolk new file mode 100644 index 000000000..5b3cb1b01 --- /dev/null +++ b/tolk-tester/tests/unreachable-1.tolk @@ -0,0 +1,14 @@ +fun main(x: int) { + if (x) { + x = 10;;;;; + return x;;; + x = 20; + } + return -1; +} + +/** +@testcase | 0 | 1 | 10 +@stderr warning: unreachable code +@stderr x = 20; + */ diff --git a/tolk-tester/tests/unreachable-2.tolk b/tolk-tester/tests/unreachable-2.tolk new file mode 100644 index 000000000..aeadd8c64 --- /dev/null +++ b/tolk-tester/tests/unreachable-2.tolk @@ -0,0 +1,22 @@ +fun main(x: int) { + if (x) { + if (x > 10) { + return 1; // throw 1; + } else if (true) { + return -1; + } else { + return 2; // throw 2; + } + } else { + {{return 1;} + x = 30;} + } + assert(false, 10); +} + +/** +@testcase | 0 | 1 | -1 +@stderr warning: unreachable code +@stderr assert(false, 10) +@stderr x = 30 + */ diff --git a/tolk-tester/tests/var-apply.tolk b/tolk-tester/tests/var-apply.tolk index 9bee862ac..a0918c181 100644 --- a/tolk-tester/tests/var-apply.tolk +++ b/tolk-tester/tests/var-apply.tolk @@ -15,8 +15,127 @@ fun testVarApply1() { return (s.loadInt(32), s.loadInt(32)); } +@inline +fun my_throw_always() { + throw 1000; +} + +@inline +fun get_raiser() { + return my_throw_always; +} + +@method_id(102) +fun testVarApplyWithoutSavingResult() { + try { + var raiser = get_raiser(); + raiser(); // `some_var()` is always impure, the compiler has no considerations about its runtime value + return 0; + } catch (code) { + return code; + } +} + +@inline +fun sum(a: int, b: int) { + assert(a + b < 24, 1000); + return a + b; +} + +@inline +fun mul(a: int, b: int) { + assert(a * b < 24, 1001); + return a * b; +} + +fun demo_handler(op: int, query_id: int, a: int, b: int): int { + if (op == 0xF2) { + val func = query_id % 2 == 0 ? sum : mul; + val result = func(a, b); + return 0; // result not used, we test that func is nevertheless called + } + if (op == 0xF4) { + val func = query_id % 2 == 0 ? sum : mul; + val result = func(a, b); + return result; + } + return -1; +} + +@method_id(103) +fun testVarApplyInTernary() { + var t: tuple = createEmptyTuple(); + try { + t.tuplePush(demo_handler(0xF2, 122, 100, 200)); + } catch(code) { + t.tuplePush(code); + } + try { + t.tuplePush(demo_handler(0xF4, 122, 100, 200)); + } catch(code) { + t.tuplePush(code); + } + try { + t.tuplePush(demo_handler(0xF2, 122, 10, 10)); + } catch(code) { + t.tuplePush(code); + } + try { + t.tuplePush(demo_handler(0xF2, 123, 10, 10)); + } catch(code) { + t.tuplePush(code); + } + return t; +} + +fun always_throw2(x: int) { + throw 239 + x; +} + +global global_f: int -> void; + +@method_id(104) +fun testGlobalVarApply() { + try { + global_f = always_throw2; + global_f(1); + return 0; + } catch (code) { + return code; + } +} + +@method_id(105) +fun testVarApply2() { + var creator = createEmptyTuple; + var t = creator(); + t.tuplePush(1); + var sizer = t.tupleSize; + return sizer(t); +} + +fun getTupleLastGetter(): (tuple) -> X { + return tupleLast; +} + +@method_id(106) +fun testVarApply3() { + var t = createEmptyTuple(); + t.tuplePush(1); + t.tuplePush([2]); + var getIntAt = t.tupleAt; + var getTupleFirstInt = createEmptyTuple().tupleFirst; + var getTupleLastTuple = getTupleLastGetter(); + return (getIntAt(t, 0), getTupleFirstInt(t), getTupleLastTuple(t), getTupleLastGetter()(t)); +} + fun main() {} /** @testcase | 101 | | 1 2 +@testcase | 102 | | 1000 +@testcase | 103 | | [ 1000 1000 0 1001 ] +@testcase | 104 | | 240 +@testcase | 105 | | 1 +@testcase | 106 | | 1 1 [ 2 ] [ 2 ] */ diff --git a/tolk-tester/tests/w6.tolk b/tolk-tester/tests/w6.tolk index 2f8956440..489ffa8cc 100644 --- a/tolk-tester/tests/w6.tolk +++ b/tolk-tester/tests/w6.tolk @@ -6,7 +6,7 @@ fun main(x: int): int { if (i > 5) { return 1; } - var f: int = (i * i == 64); + var f: bool = (i * i == 64); } while (!f); return -1; } diff --git a/tolk-tester/tests/w7.tolk b/tolk-tester/tests/w7.tolk index 85081fbb3..3d68c775b 100644 --- a/tolk-tester/tests/w7.tolk +++ b/tolk-tester/tests/w7.tolk @@ -4,7 +4,7 @@ fun test(y: int): int { if (y > 0) { return 1; } - return x > 0; + return x > 0 ? -1 : 0; } @method_id(2) diff --git a/tolk-tester/tolk-tester.js b/tolk-tester/tolk-tester.js index 2a3eb776a..c7e710214 100644 --- a/tolk-tester/tolk-tester.js +++ b/tolk-tester/tolk-tester.js @@ -347,11 +347,11 @@ class TolkTestFile { if (exit_code === 0 && this.compilation_should_fail) throw new TolkCompilationSucceededError("compilation succeeded, but it should have failed") - if (exit_code !== 0 && this.compilation_should_fail) { - for (let should_include of this.stderr_includes) - should_include.check(stderr) + for (let should_include of this.stderr_includes) // @stderr is used to check errors and warnings + should_include.check(stderr) + + if (exit_code !== 0 && this.compilation_should_fail) return - } if (exit_code !== 0 && !this.compilation_should_fail) throw new TolkCompilationFailedError(`tolk exit_code = ${exit_code}`, stderr) diff --git a/tolk-tester/tolk-tester.py b/tolk-tester/tolk-tester.py index 261ab4962..0b3c774ca 100644 --- a/tolk-tester/tolk-tester.py +++ b/tolk-tester/tolk-tester.py @@ -327,9 +327,10 @@ def run_and_check(self): if exit_code == 0 and self.compilation_should_fail: raise TolkCompilationSucceededError("compilation succeeded, but it should have failed") + for should_include in self.stderr_includes: # @stderr is used to check errors and warnings + should_include.check(stderr) + if exit_code != 0 and self.compilation_should_fail: - for should_include in self.stderr_includes: - should_include.check(stderr) return if exit_code != 0 and not self.compilation_should_fail: diff --git a/tolk/CMakeLists.txt b/tolk/CMakeLists.txt index d2decea71..9d7200249 100644 --- a/tolk/CMakeLists.txt +++ b/tolk/CMakeLists.txt @@ -7,14 +7,24 @@ set(TOLK_SOURCE compiler-state.cpp ast.cpp ast-from-tokens.cpp + constant-evaluator.cpp pipe-discover-parse-sources.cpp pipe-register-symbols.cpp + pipe-resolve-identifiers.cpp + pipe-calc-rvalue-lvalue.cpp + pipe-detect-unreachable.cpp + pipe-infer-types-and-calls.cpp + pipe-refine-lvalue-for-mutate.cpp + pipe-check-rvalue-lvalue.cpp + pipe-check-pure-impure.cpp + pipe-constant-folding.cpp + pipe-optimize-boolean-expr.cpp pipe-ast-to-legacy.cpp pipe-find-unused-symbols.cpp pipe-generate-fif-output.cpp - unify-types.cpp + type-system.cpp + generics-helpers.cpp abscode.cpp - gen-abscode.cpp analyzer.cpp asmops.cpp builtins.cpp diff --git a/tolk/abscode.cpp b/tolk/abscode.cpp index c1add6839..7bcb0f84f 100644 --- a/tolk/abscode.cpp +++ b/tolk/abscode.cpp @@ -16,6 +16,7 @@ */ #include "tolk.h" #include "compiler-state.h" +#include "type-system.h" namespace tolk { @@ -25,21 +26,10 @@ namespace tolk { * */ -TmpVar::TmpVar(var_idx_t _idx, TypeExpr* _type, sym_idx_t sym_idx, SrcLocation loc) - : v_type(_type), idx(_idx), sym_idx(sym_idx), coord(0), where(loc) { - if (!_type) { - v_type = TypeExpr::new_hole(); - } -} - -void TmpVar::set_location(SrcLocation loc) { - where = loc; -} - void TmpVar::dump(std::ostream& os) const { show(os); os << " : " << v_type << " (width "; - v_type->show_width(os); + os << v_type->calc_width_on_stack(); os << ")"; if (coord > 0) { os << " = _" << (coord >> 8) << '.' << (coord & 255); @@ -55,8 +45,8 @@ void TmpVar::dump(std::ostream& os) const { } void TmpVar::show(std::ostream& os, int omit_idx) const { - if (!is_unnamed()) { - os << G.symbols.get_name(sym_idx); + if (v_sym) { + os << v_sym->name; if (omit_idx >= 2) { return; } @@ -149,10 +139,6 @@ void VarDescr::set_const(std::string value) { val = _Const; } -void VarDescr::set_const_nan() { - set_const(td::make_refint()); -} - void VarDescr::operator|=(const VarDescr& y) { val &= y.val; if (is_int_const() && y.is_int_const() && cmp(int_const, y.int_const) != 0) { @@ -273,7 +259,7 @@ void Op::show(std::ostream& os, const std::vector& vars, std::string pfx case _Call: os << pfx << dis << "CALL: "; show_var_list(os, left, vars); - os << " := " << (fun_ref ? fun_ref->name() : "(null)") << " "; + os << " := " << (f_sym ? f_sym->name : "(null)") << " "; if ((mode & 4) && args.size() == right.size()) { show_var_list(os, args, vars); } else { @@ -332,11 +318,11 @@ void Op::show(std::ostream& os, const std::vector& vars, std::string pfx case _GlobVar: os << pfx << dis << "GLOBVAR "; show_var_list(os, left, vars); - os << " := " << (fun_ref ? fun_ref->name() : "(null)") << std::endl; + os << " := " << (g_sym ? g_sym->name : "(null)") << std::endl; break; case _SetGlob: os << pfx << dis << "SETGLOB "; - os << (fun_ref ? fun_ref->name() : "(null)") << " := "; + os << (g_sym ? g_sym->name : "(null)") << " := "; show_var_list(os, right, vars); os << std::endl; break; @@ -458,22 +444,22 @@ void CodeBlob::print(std::ostream& os, int flags) const { os << "-------- END ---------\n\n"; } -var_idx_t CodeBlob::create_var(TypeExpr* var_type, var_idx_t sym_idx, SrcLocation location) { - vars.emplace_back(var_cnt, var_type, sym_idx, location); +var_idx_t CodeBlob::create_var(TypePtr var_type, const LocalVarData* v_sym, SrcLocation location) { + vars.emplace_back(var_cnt, var_type, v_sym, location); return var_cnt++; } -bool CodeBlob::import_params(FormalArgList arg_list) { +bool CodeBlob::import_params(FormalArgList&& arg_list) { if (var_cnt || in_var_cnt) { return false; } std::vector list; for (const auto& par : arg_list) { - TypeExpr* arg_type; - SymDef* arg_sym; + TypePtr arg_type; + const LocalVarData* arg_sym; SrcLocation arg_loc; std::tie(arg_type, arg_sym, arg_loc) = par; - list.push_back(create_var(arg_type, arg_sym ? arg_sym->sym_idx : 0, arg_loc)); + list.push_back(create_var(arg_type, arg_sym, arg_loc)); } emplace_back(loc, Op::_Import, list); in_var_cnt = var_cnt; diff --git a/tolk/analyzer.cpp b/tolk/analyzer.cpp index 719df9b7d..8539afdd2 100644 --- a/tolk/analyzer.cpp +++ b/tolk/analyzer.cpp @@ -16,6 +16,7 @@ */ #include "tolk.h" #include "compiler-state.h" +#include "type-system.h" namespace tolk { @@ -25,38 +26,30 @@ namespace tolk { * */ -void CodeBlob::simplify_var_types() { - for (TmpVar& var : vars) { - TypeExpr::remove_indirect(var.v_type); - var.v_type->recompute_width(); - } -} - int CodeBlob::split_vars(bool strict) { int n = var_cnt, changes = 0; for (int j = 0; j < var_cnt; j++) { TmpVar& var = vars[j]; - if (strict && var.v_type->minw != var.v_type->maxw) { + int width_j = var.v_type->calc_width_on_stack(); + if (strict && width_j < 0) { throw ParseError{var.where, "variable does not have fixed width, cannot manipulate it"}; } - std::vector comp_types; - int k = var.v_type->extract_components(comp_types); - tolk_assert(k <= 254 && n <= 0x7fff00); - tolk_assert((unsigned)k == comp_types.size()); - if (k != 1) { - var.coord = ~((n << 8) + k); - for (int i = 0; i < k; i++) { - auto v = create_var(comp_types[i], vars[j].sym_idx, vars[j].where); - tolk_assert(v == n + i); - tolk_assert(vars[v].idx == v); - vars[v].coord = ((int)j << 8) + i + 1; - } - n += k; - ++changes; - } else if (strict && var.v_type->minw != 1) { - throw ParseError{var.where, - "cannot work with variable or variable component of width greater than one"}; + if (width_j == 1) { + continue; } + std::vector comp_types; + var.v_type->extract_components(comp_types); + tolk_assert(width_j <= 254 && n <= 0x7fff00); + tolk_assert((unsigned)width_j == comp_types.size()); + var.coord = ~((n << 8) + width_j); + for (int i = 0; i < width_j; i++) { + auto v = create_var(comp_types[i], vars[j].v_sym, vars[j].where); + tolk_assert(v == n + i); + tolk_assert(vars[v].idx == v); + vars[v].coord = ((int)j << 8) + i + 1; + } + n += width_j; + ++changes; } if (!changes) { return 0; @@ -687,7 +680,7 @@ void CodeBlob::fwd_analyze() { tolk_assert(ops && ops->cl == Op::_Import); for (var_idx_t i : ops->left) { values += i; - if (vars[i].v_type->is_int()) { + if (vars[i].v_type == TypeDataInt::create()) { values[i]->val |= VarDescr::_Int; } } @@ -732,15 +725,18 @@ VarDescrList Op::fwd_analyze(VarDescrList values) { } case _Call: { prepare_args(values); - auto func = dynamic_cast(fun_ref->value); - if (func) { + if (!f_sym->is_code_function()) { std::vector res; res.reserve(left.size()); for (var_idx_t i : left) { res.emplace_back(i); } AsmOpList tmp; - func->compile(tmp, res, args, where); // abstract interpretation of res := f (args) + if (f_sym->is_asm_function()) { + std::get(f_sym->body)->compile(tmp); // abstract interpretation of res := f (args) + } else { + std::get(f_sym->body)->compile(tmp, res, args, where); + } int j = 0; for (var_idx_t i : left) { values.add_newval(i).set_value(res[j++]); @@ -878,27 +874,10 @@ bool Op::set_noreturn(bool flag) { return flag; } -void Op::set_impure(const CodeBlob &code) { - // todo calling this function with `code` is a bad design (flags are assigned after Op is constructed) - // later it's better to check this somewhere in code.emplace_back() - if (code.flags & CodeBlob::_ForbidImpure) { - throw ParseError(where, "an impure operation in a pure function"); - } +void Op::set_impure_flag() { flags |= _Impure; } -void Op::set_impure(const CodeBlob &code, bool flag) { - if (flag) { - if (code.flags & CodeBlob::_ForbidImpure) { - throw ParseError(where, "an impure operation in a pure function"); - } - flags |= _Impure; - } else { - flags &= ~_Impure; - } -} - - bool Op::mark_noreturn() { switch (cl) { case _Nop: diff --git a/tolk/asmops.cpp b/tolk/asmops.cpp index 8db75091b..547922dad 100644 --- a/tolk/asmops.cpp +++ b/tolk/asmops.cpp @@ -52,10 +52,10 @@ std::ostream& operator<<(std::ostream& os, AsmOp::SReg stack_reg) { } } -AsmOp AsmOp::Const(int arg, std::string push_op, td::RefInt256 origin) { +AsmOp AsmOp::Const(int arg, const std::string& push_op) { std::ostringstream os; os << arg << ' ' << push_op; - return AsmOp::Const(os.str(), origin); + return AsmOp::Const(os.str()); } AsmOp AsmOp::make_stk2(int a, int b, const char* str, int delta) { @@ -161,36 +161,36 @@ AsmOp AsmOp::UnTuple(int a) { return AsmOp::Custom(os.str(), 1, a); } -AsmOp AsmOp::IntConst(td::RefInt256 x) { +AsmOp AsmOp::IntConst(const td::RefInt256& x) { if (x->signed_fits_bits(8)) { - return AsmOp::Const(dec_string(x) + " PUSHINT", x); + return AsmOp::Const(dec_string(x) + " PUSHINT"); } if (!x->is_valid()) { - return AsmOp::Const("PUSHNAN", x); + return AsmOp::Const("PUSHNAN"); } int k = is_pos_pow2(x); if (k >= 0) { - return AsmOp::Const(k, "PUSHPOW2", x); + return AsmOp::Const(k, "PUSHPOW2"); } k = is_pos_pow2(x + 1); if (k >= 0) { - return AsmOp::Const(k, "PUSHPOW2DEC", x); + return AsmOp::Const(k, "PUSHPOW2DEC"); } k = is_pos_pow2(-x); if (k >= 0) { - return AsmOp::Const(k, "PUSHNEGPOW2", x); + return AsmOp::Const(k, "PUSHNEGPOW2"); } if (!x->mod_pow2_short(23)) { - return AsmOp::Const(dec_string(x) + " PUSHINTX", x); + return AsmOp::Const(dec_string(x) + " PUSHINTX"); } - return AsmOp::Const(dec_string(x) + " PUSHINT", x); + return AsmOp::Const(dec_string(x) + " PUSHINT"); } AsmOp AsmOp::BoolConst(bool f) { return AsmOp::Const(f ? "TRUE" : "FALSE"); } -AsmOp AsmOp::Parse(std::string custom_op) { +AsmOp AsmOp::Parse(const std::string& custom_op) { if (custom_op == "NOP") { return AsmOp::Nop(); } else if (custom_op == "SWAP") { diff --git a/tolk/ast-from-tokens.cpp b/tolk/ast-from-tokens.cpp index 1a1d199ec..58592011e 100644 --- a/tolk/ast-from-tokens.cpp +++ b/tolk/ast-from-tokens.cpp @@ -16,8 +16,8 @@ */ #include "ast-from-tokens.h" #include "ast.h" +#include "type-system.h" #include "platform-utils.h" -#include "type-expr.h" #include "tolk-version.h" /* @@ -75,7 +75,7 @@ static void fire_error_mix_and_or_no_parenthesis(SrcLocation loc, std::string_vi // the only way to suppress this error for the programmer is to use parenthesis // (how do we detect presence of parenthesis? simple: (0!=1) is ast_parenthesized_expr{ast_binary_operator}, // that's why if rhs->type == ast_binary_operator, it's not surrounded by parenthesis) -static void diagnose_bitwise_precedence(SrcLocation loc, std::string_view operator_name, AnyV lhs, AnyV rhs) { +static void diagnose_bitwise_precedence(SrcLocation loc, std::string_view operator_name, AnyExprV lhs, AnyExprV rhs) { // handle "flags & 0xFF != 0" (rhs = "0xFF != 0") if (rhs->type == ast_binary_operator && is_comparison_binary_op(rhs->as()->tok)) { fire_error_lower_precedence(loc, operator_name, rhs->as()->operator_name); @@ -90,7 +90,7 @@ static void diagnose_bitwise_precedence(SrcLocation loc, std::string_view operat // similar to above, but detect potentially invalid usage of && and || // since anyway, using parenthesis when both && and || occur in the same expression, // && and || have equal operator precedence in Tolk -static void diagnose_and_or_precedence(SrcLocation loc, AnyV lhs, TokenType rhs_tok, std::string_view rhs_operator_name) { +static void diagnose_and_or_precedence(SrcLocation loc, AnyExprV lhs, TokenType rhs_tok, std::string_view rhs_operator_name) { if (auto lhs_op = lhs->try_as()) { // handle "arg1 & arg2 | arg3" (lhs = "arg1 & arg2") if (is_bitwise_binary_op(lhs_op->tok) && is_bitwise_binary_op(rhs_tok) && lhs_op->tok != rhs_tok) { @@ -105,7 +105,7 @@ static void diagnose_and_or_precedence(SrcLocation loc, AnyV lhs, TokenType rhs_ } // diagnose "a << 8 + 1" (equivalent to "a << 9", probably unexpected) -static void diagnose_addition_in_bitshift(SrcLocation loc, std::string_view bitshift_operator_name, AnyV rhs) { +static void diagnose_addition_in_bitshift(SrcLocation loc, std::string_view bitshift_operator_name, AnyExprV rhs) { if (rhs->type == ast_binary_operator && is_add_or_sub_binary_op(rhs->as()->tok)) { fire_error_lower_precedence(loc, bitshift_operator_name, rhs->as()->operator_name); } @@ -122,7 +122,7 @@ static void fire_error_FunC_style_var_declaration(Lexer& lex) { } // replace (a == null) and similar to isNull(a) (call of a built-in function) -static AnyV maybe_replace_eq_null_with_isNull_call(V v) { +static AnyExprV maybe_replace_eq_null_with_isNull_call(V v) { bool has_null = v->get_lhs()->type == ast_null_keyword || v->get_rhs()->type == ast_null_keyword; bool replace = has_null && (v->tok == tok_eq || v->tok == tok_neq); if (!replace) { @@ -130,9 +130,10 @@ static AnyV maybe_replace_eq_null_with_isNull_call(V v) { } auto v_ident = createV(v->loc, "__isNull"); // built-in function - AnyV v_null = v->get_lhs()->type == ast_null_keyword ? v->get_rhs() : v->get_lhs(); - AnyV v_arg = createV(v->loc, v_null, false); - AnyV v_isNull = createV(v->loc, v_ident, createV(v->loc, {v_arg})); + auto v_ref = createV(v->loc, v_ident, nullptr); + AnyExprV v_null = v->get_lhs()->type == ast_null_keyword ? v->get_rhs() : v->get_lhs(); + AnyExprV v_arg = createV(v->loc, v_null, false); + AnyExprV v_isNull = createV(v->loc, v_ref, createV(v->loc, {v_arg})); if (v->tok == tok_neq) { v_isNull = createV(v->loc, "!", tok_logical_not, v_isNull); } @@ -146,98 +147,14 @@ static AnyV maybe_replace_eq_null_with_isNull_call(V v) { * */ -// TE ::= TA | TA -> TE -// TA ::= int | ... | cont | var | _ | () | ( TE { , TE } ) | [ TE { , TE } ] -static TypeExpr* parse_type(Lexer& lex, V genericsT_list); -static TypeExpr* parse_type1(Lexer& lex, V genericsT_list) { - switch (lex.tok()) { - case tok_int: - lex.next(); - return TypeExpr::new_atomic(TypeExpr::_Int); - case tok_cell: - lex.next(); - return TypeExpr::new_atomic(TypeExpr::_Cell); - case tok_slice: - lex.next(); - return TypeExpr::new_atomic(TypeExpr::_Slice); - case tok_builder: - lex.next(); - return TypeExpr::new_atomic(TypeExpr::_Builder); - case tok_continuation: - lex.next(); - return TypeExpr::new_atomic(TypeExpr::_Continutaion); - case tok_tuple: - lex.next(); - return TypeExpr::new_atomic(TypeExpr::_Tuple); - case tok_auto: - lex.next(); - return TypeExpr::new_hole(); - case tok_void: - lex.next(); - return TypeExpr::new_tensor({}); - case tok_bool: - lex.error("bool type is not supported yet"); - case tok_self: - lex.error("`self` type can be used only as a return type of a function (enforcing it to be chainable)"); - case tok_identifier: - if (int idx = genericsT_list ? genericsT_list->lookup_idx(lex.cur_str()) : -1; idx != -1) { - lex.next(); - return genericsT_list->get_item(idx)->created_type; - } - break; - case tok_oppar: { - lex.next(); - if (lex.tok() == tok_clpar) { - lex.next(); - return TypeExpr::new_unit(); - } - std::vector sub{1, parse_type(lex, genericsT_list)}; - while (lex.tok() == tok_comma) { - lex.next(); - sub.push_back(parse_type(lex, genericsT_list)); - } - lex.expect(tok_clpar, "`)`"); - return TypeExpr::new_tensor(std::move(sub)); - } - case tok_opbracket: { - lex.next(); - if (lex.tok() == tok_clbracket) { - lex.next(); - return TypeExpr::new_tuple({}); - } - std::vector sub{1, parse_type(lex, genericsT_list)}; - while (lex.tok() == tok_comma) { - lex.next(); - sub.push_back(parse_type(lex, genericsT_list)); - } - lex.expect(tok_clbracket, "`]`"); - return TypeExpr::new_tuple(std::move(sub)); - } - default: - break; - } - lex.unexpected(""); -} +AnyExprV parse_expr(Lexer& lex); -static TypeExpr* parse_type(Lexer& lex, V genericsT_list) { - TypeExpr* res = parse_type1(lex, genericsT_list); - if (lex.tok() == tok_arrow) { - lex.next(); - TypeExpr* to = parse_type(lex, genericsT_list); - return TypeExpr::new_map(res, to); - } - return res; -} - -AnyV parse_expr(Lexer& lex); - -static AnyV parse_parameter(Lexer& lex, V genericsT_list, bool is_first) { +static AnyV parse_parameter(Lexer& lex, bool is_first) { SrcLocation loc = lex.cur_location(); // optional keyword `mutate` meaning that a function will mutate a passed argument (like passed by reference) bool declared_as_mutate = false; - bool is_param_self = false; if (lex.tok() == tok_mutate) { lex.next(); declared_as_mutate = true; @@ -252,24 +169,16 @@ static AnyV parse_parameter(Lexer& lex, V genericsT_list, bo lex.error("`self` can only be the first parameter"); } param_name = "self"; - is_param_self = true; } else if (lex.tok() != tok_underscore) { lex.unexpected("parameter name"); } - auto v_ident = createV(lex.cur_location(), param_name); lex.next(); - // parameter type after colon, also mandatory (even explicit ":auto") + // parameter type after colon are mandatory lex.expect(tok_colon, "`: `"); - TypeExpr* param_type = parse_type(lex, genericsT_list); - if (declared_as_mutate && !param_type->has_fixed_width()) { - throw ParseError(loc, "`mutate` parameter must be strictly typed"); - } - if (is_param_self && !param_type->has_fixed_width()) { - throw ParseError(loc, "`self` parameter must be strictly typed"); - } + TypePtr param_type = parse_type_from_tokens(lex); - return createV(loc, v_ident, param_type, declared_as_mutate); + return createV(loc, param_name, param_type, declared_as_mutate); } static AnyV parse_global_var_declaration(Lexer& lex, const std::vector>& annotations) { @@ -282,7 +191,7 @@ static AnyV parse_global_var_declaration(Lexer& lex, const std::vector(lex.cur_location(), lex.cur_str()); lex.next(); lex.expect(tok_colon, "`:`"); - TypeExpr* declared_type = parse_type(lex, nullptr); + TypePtr declared_type = parse_type_from_tokens(lex); if (lex.tok() == tok_comma) { lex.error("multiple declarations are not allowed, split globals on separate lines"); } @@ -302,21 +211,13 @@ static AnyV parse_constant_declaration(Lexer& lex, const std::vector(lex.cur_location(), lex.cur_str()); lex.next(); - TypeExpr *declared_type = nullptr; + TypePtr declared_type = nullptr; if (lex.tok() == tok_colon) { lex.next(); - if (lex.tok() == tok_int) { - declared_type = TypeExpr::new_atomic(TypeExpr::_Int); - lex.next(); - } else if (lex.tok() == tok_slice) { - declared_type = TypeExpr::new_atomic(TypeExpr::_Slice); - lex.next(); - } else { - lex.error("a constant can be int or slice only"); - } + declared_type = parse_type_from_tokens(lex); } lex.expect(tok_assign, "`=`"); - AnyV init_value = parse_expr(lex); + AnyExprV init_value = parse_expr(lex); if (lex.tok() == tok_comma) { lex.error("multiple declarations are not allowed, split constants on separate lines"); } @@ -325,15 +226,15 @@ static AnyV parse_constant_declaration(Lexer& lex, const std::vector parse_parameter_list(Lexer& lex, V genericsT_list) { +static V parse_parameter_list(Lexer& lex) { SrcLocation loc = lex.cur_location(); std::vector params; lex.expect(tok_oppar, "parameter list"); if (lex.tok() != tok_clpar) { - params.push_back(parse_parameter(lex, genericsT_list, true)); + params.push_back(parse_parameter(lex, true)); while (lex.tok() == tok_comma) { lex.next(); - params.push_back(parse_parameter(lex, genericsT_list, false)); + params.push_back(parse_parameter(lex, false)); } } lex.expect(tok_clpar, "`)`"); @@ -341,7 +242,7 @@ static V parse_parameter_list(Lexer& lex, V(loc, expr, passed_as_mutate); } static V parse_argument_list(Lexer& lex) { SrcLocation loc = lex.cur_location(); - std::vector args; + std::vector args; lex.expect(tok_oppar, "`(`"); if (lex.tok() != tok_clpar) { args.push_back(parse_argument(lex)); @@ -370,8 +271,28 @@ static V parse_argument_list(Lexer& lex) { return createV(loc, std::move(args)); } +static V parse_maybe_instantiationTs_after_identifier(Lexer& lex) { + lex.check(tok_lt, "`<`"); + Lexer::SavedPositionForLookahead backup = lex.save_parsing_position(); + try { + SrcLocation loc = lex.cur_location(); + lex.next(); + std::vector instantiationTs; + instantiationTs.push_back(createV(lex.cur_location(), parse_type_from_tokens(lex))); + while (lex.tok() == tok_comma) { + lex.next(); + instantiationTs.push_back(createV(lex.cur_location(), parse_type_from_tokens(lex))); + } + lex.expect(tok_gt, "`>`"); + return createV(loc, std::move(instantiationTs)); + } catch (const ParseError&) { + lex.restore_position(backup); + return nullptr; + } +} + // parse (expr) / [expr] / identifier / number -static AnyV parse_expr100(Lexer& lex) { +static AnyExprV parse_expr100(Lexer& lex) { SrcLocation loc = lex.cur_location(); switch (lex.tok()) { case tok_oppar: { @@ -380,12 +301,12 @@ static AnyV parse_expr100(Lexer& lex) { lex.next(); return createV(loc, {}); } - AnyV first = parse_expr(lex); + AnyExprV first = parse_expr(lex); if (lex.tok() == tok_clpar) { lex.next(); - return createV(loc, first); + return createV(loc, first); } - std::vector items(1, first); + std::vector items(1, first); while (lex.tok() == tok_comma) { lex.next(); items.emplace_back(parse_expr(lex)); @@ -397,20 +318,24 @@ static AnyV parse_expr100(Lexer& lex) { lex.next(); if (lex.tok() == tok_clbracket) { lex.next(); - return createV(loc, {}); + return createV(loc, {}); } - std::vector items(1, parse_expr(lex)); + std::vector items(1, parse_expr(lex)); while (lex.tok() == tok_comma) { lex.next(); items.emplace_back(parse_expr(lex)); } lex.expect(tok_clbracket, "`]`"); - return createV(loc, std::move(items)); + return createV(loc, std::move(items)); } case tok_int_const: { - std::string_view int_val = lex.cur_str(); + std::string_view orig_str = lex.cur_str(); + td::RefInt256 intval = td::string_to_int256(static_cast(orig_str)); + if (intval.is_null() || !intval->signed_fits_bits(257)) { + lex.error("invalid integer constant"); + } lex.next(); - return createV(loc, int_val); + return createV(loc, std::move(intval), orig_str); } case tok_string_const: { std::string_view str_val = lex.cur_str(); @@ -440,12 +365,17 @@ static AnyV parse_expr100(Lexer& lex) { } case tok_self: { lex.next(); - return createV(loc); + auto v_ident = createV(loc, "self"); + return createV(loc, v_ident, nullptr); } case tok_identifier: { - std::string_view str_val = lex.cur_str(); + auto v_ident = createV(loc, lex.cur_str()); + V v_instantiationTs = nullptr; lex.next(); - return createV(loc, str_val); + if (lex.tok() == tok_lt) { + v_instantiationTs = parse_maybe_instantiationTs_after_identifier(lex); + } + return createV(loc, v_ident, v_instantiationTs); } default: { // show a proper error for `int i` (FunC-style declarations) @@ -458,51 +388,74 @@ static AnyV parse_expr100(Lexer& lex) { } } -// parse E(args) -static AnyV parse_expr90(Lexer& lex) { - AnyV res = parse_expr100(lex); - if (lex.tok() == tok_oppar) { - return createV(res->loc, res, parse_argument_list(lex)); +// parse E(...) (left-to-right) +static AnyExprV parse_expr90(Lexer& lex) { + AnyExprV res = parse_expr100(lex); + while (lex.tok() == tok_oppar) { + res = createV(res->loc, res, parse_argument_list(lex)); } return res; } -// parse E.method(...) (left-to-right) -static AnyV parse_expr80(Lexer& lex) { - AnyV lhs = parse_expr90(lex); +// parse E.field and E.method(...) (left-to-right) +static AnyExprV parse_expr80(Lexer& lex) { + AnyExprV lhs = parse_expr90(lex); while (lex.tok() == tok_dot) { SrcLocation loc = lex.cur_location(); lex.next(); - lex.check(tok_identifier, "method name"); - std::string_view method_name = lex.cur_str(); - lex.next(); - lhs = createV(loc, method_name, lhs, parse_argument_list(lex)); + V v_ident = nullptr; + V v_instantiationTs = nullptr; + if (lex.tok() == tok_identifier) { + v_ident = createV(lex.cur_location(), lex.cur_str()); + lex.next(); + if (lex.tok() == tok_lt) { + v_instantiationTs = parse_maybe_instantiationTs_after_identifier(lex); + } + } else { + lex.unexpected("method name"); + } + lhs = createV(loc, lhs, v_ident, v_instantiationTs); + while (lex.tok() == tok_oppar) { + lhs = createV(lex.cur_location(), lhs, parse_argument_list(lex)); + } } return lhs; } // parse ! ~ - + E (unary) -static AnyV parse_expr75(Lexer& lex) { +static AnyExprV parse_expr75(Lexer& lex) { TokenType t = lex.tok(); if (t == tok_logical_not || t == tok_bitwise_not || t == tok_minus || t == tok_plus) { SrcLocation loc = lex.cur_location(); std::string_view operator_name = lex.cur_str(); lex.next(); - AnyV rhs = parse_expr75(lex); + AnyExprV rhs = parse_expr75(lex); return createV(loc, operator_name, t, rhs); } return parse_expr80(lex); } +// parse E as +static AnyExprV parse_expr40(Lexer& lex) { + AnyExprV lhs = parse_expr75(lex); + if (lex.tok() == tok_as) { + SrcLocation loc = lex.cur_location(); + lex.next(); + TypePtr cast_to_type = parse_type_from_tokens(lex); + lhs = createV(loc, lhs, cast_to_type); + } + return lhs; +} + // parse E * / % ^/ ~/ E (left-to-right) -static AnyV parse_expr30(Lexer& lex) { - AnyV lhs = parse_expr75(lex); +static AnyExprV parse_expr30(Lexer& lex) { + AnyExprV lhs = parse_expr40(lex); TokenType t = lex.tok(); while (t == tok_mul || t == tok_div || t == tok_mod || t == tok_divC || t == tok_divR) { SrcLocation loc = lex.cur_location(); std::string_view operator_name = lex.cur_str(); lex.next(); - AnyV rhs = parse_expr75(lex); + AnyExprV rhs = parse_expr40(lex); lhs = createV(loc, operator_name, t, lhs, rhs); t = lex.tok(); } @@ -510,14 +463,14 @@ static AnyV parse_expr30(Lexer& lex) { } // parse E + - E (left-to-right) -static AnyV parse_expr20(Lexer& lex) { - AnyV lhs = parse_expr30(lex); +static AnyExprV parse_expr20(Lexer& lex) { + AnyExprV lhs = parse_expr30(lex); TokenType t = lex.tok(); while (t == tok_minus || t == tok_plus) { SrcLocation loc = lex.cur_location(); std::string_view operator_name = lex.cur_str(); lex.next(); - AnyV rhs = parse_expr30(lex); + AnyExprV rhs = parse_expr30(lex); lhs = createV(loc, operator_name, t, lhs, rhs); t = lex.tok(); } @@ -525,14 +478,14 @@ static AnyV parse_expr20(Lexer& lex) { } // parse E << >> ~>> ^>> E (left-to-right) -static AnyV parse_expr17(Lexer& lex) { - AnyV lhs = parse_expr20(lex); +static AnyExprV parse_expr17(Lexer& lex) { + AnyExprV lhs = parse_expr20(lex); TokenType t = lex.tok(); while (t == tok_lshift || t == tok_rshift || t == tok_rshiftC || t == tok_rshiftR) { SrcLocation loc = lex.cur_location(); std::string_view operator_name = lex.cur_str(); lex.next(); - AnyV rhs = parse_expr20(lex); + AnyExprV rhs = parse_expr20(lex); diagnose_addition_in_bitshift(loc, operator_name, rhs); lhs = createV(loc, operator_name, t, lhs, rhs); t = lex.tok(); @@ -541,14 +494,14 @@ static AnyV parse_expr17(Lexer& lex) { } // parse E == < > <= >= != <=> E (left-to-right) -static AnyV parse_expr15(Lexer& lex) { - AnyV lhs = parse_expr17(lex); +static AnyExprV parse_expr15(Lexer& lex) { + AnyExprV lhs = parse_expr17(lex); TokenType t = lex.tok(); if (t == tok_eq || t == tok_lt || t == tok_gt || t == tok_leq || t == tok_geq || t == tok_neq || t == tok_spaceship) { SrcLocation loc = lex.cur_location(); std::string_view operator_name = lex.cur_str(); lex.next(); - AnyV rhs = parse_expr17(lex); + AnyExprV rhs = parse_expr17(lex); lhs = createV(loc, operator_name, t, lhs, rhs); if (t == tok_eq || t == tok_neq) { lhs = maybe_replace_eq_null_with_isNull_call(lhs->as()); @@ -558,14 +511,14 @@ static AnyV parse_expr15(Lexer& lex) { } // parse E & | ^ E (left-to-right) -static AnyV parse_expr14(Lexer& lex) { - AnyV lhs = parse_expr15(lex); +static AnyExprV parse_expr14(Lexer& lex) { + AnyExprV lhs = parse_expr15(lex); TokenType t = lex.tok(); while (t == tok_bitwise_and || t == tok_bitwise_or || t == tok_bitwise_xor) { SrcLocation loc = lex.cur_location(); std::string_view operator_name = lex.cur_str(); lex.next(); - AnyV rhs = parse_expr15(lex); + AnyExprV rhs = parse_expr15(lex); diagnose_bitwise_precedence(loc, operator_name, lhs, rhs); diagnose_and_or_precedence(loc, lhs, t, operator_name); lhs = createV(loc, operator_name, t, lhs, rhs); @@ -575,14 +528,14 @@ static AnyV parse_expr14(Lexer& lex) { } // parse E && || E (left-to-right) -static AnyV parse_expr13(Lexer& lex) { - AnyV lhs = parse_expr14(lex); +static AnyExprV parse_expr13(Lexer& lex) { + AnyExprV lhs = parse_expr14(lex); TokenType t = lex.tok(); while (t == tok_logical_and || t == tok_logical_or) { SrcLocation loc = lex.cur_location(); std::string_view operator_name = lex.cur_str(); lex.next(); - AnyV rhs = parse_expr14(lex); + AnyExprV rhs = parse_expr14(lex); diagnose_and_or_precedence(loc, lhs, t, operator_name); lhs = createV(loc, operator_name, t, lhs, rhs); t = lex.tok(); @@ -591,46 +544,51 @@ static AnyV parse_expr13(Lexer& lex) { } // parse E = += -= E and E ? E : E (right-to-left) -static AnyV parse_expr10(Lexer& lex) { - AnyV lhs = parse_expr13(lex); +static AnyExprV parse_expr10(Lexer& lex) { + AnyExprV lhs = parse_expr13(lex); TokenType t = lex.tok(); + if (t == tok_assign) { + SrcLocation loc = lex.cur_location(); + lex.next(); + AnyExprV rhs = parse_expr10(lex); + return createV(loc, lhs, rhs); + } if (t == tok_set_plus || t == tok_set_minus || t == tok_set_mul || t == tok_set_div || t == tok_set_mod || t == tok_set_lshift || t == tok_set_rshift || - t == tok_set_bitwise_and || t == tok_set_bitwise_or || t == tok_set_bitwise_xor || - t == tok_assign) { + t == tok_set_bitwise_and || t == tok_set_bitwise_or || t == tok_set_bitwise_xor) { SrcLocation loc = lex.cur_location(); - std::string_view operator_name = lex.cur_str(); + std::string_view operator_name = lex.cur_str().substr(0, lex.cur_str().size() - 1); // "+" for += lex.next(); - AnyV rhs = parse_expr10(lex); - return createV(loc, operator_name, t, lhs, rhs); + AnyExprV rhs = parse_expr10(lex); + return createV(loc, operator_name, t, lhs, rhs); } if (t == tok_question) { SrcLocation loc = lex.cur_location(); lex.next(); - AnyV when_true = parse_expr10(lex); + AnyExprV when_true = parse_expr10(lex); lex.expect(tok_colon, "`:`"); - AnyV when_false = parse_expr10(lex); + AnyExprV when_false = parse_expr10(lex); return createV(loc, lhs, when_true, when_false); } return lhs; } -AnyV parse_expr(Lexer& lex) { +AnyExprV parse_expr(Lexer& lex) { return parse_expr10(lex); } AnyV parse_statement(Lexer& lex); -static AnyV parse_var_declaration_lhs(Lexer& lex, bool is_immutable) { +static AnyExprV parse_var_declaration_lhs(Lexer& lex, bool is_immutable) { SrcLocation loc = lex.cur_location(); if (lex.tok() == tok_oppar) { lex.next(); - AnyV first = parse_var_declaration_lhs(lex, is_immutable); + AnyExprV first = parse_var_declaration_lhs(lex, is_immutable); if (lex.tok() == tok_clpar) { lex.next(); - return createV(loc, first); + return first; } - std::vector args(1, first); + std::vector args(1, first); while (lex.tok() == tok_comma) { lex.next(); args.push_back(parse_var_declaration_lhs(lex, is_immutable)); @@ -640,57 +598,57 @@ static AnyV parse_var_declaration_lhs(Lexer& lex, bool is_immutable) { } if (lex.tok() == tok_opbracket) { lex.next(); - std::vector args(1, parse_var_declaration_lhs(lex, is_immutable)); + std::vector args(1, parse_var_declaration_lhs(lex, is_immutable)); while (lex.tok() == tok_comma) { lex.next(); args.push_back(parse_var_declaration_lhs(lex, is_immutable)); } lex.expect(tok_clbracket, "`]`"); - return createV(loc, std::move(args)); + return createV(loc, std::move(args)); } if (lex.tok() == tok_identifier) { auto v_ident = createV(loc, lex.cur_str()); - TypeExpr* declared_type = nullptr; + TypePtr declared_type = nullptr; bool marked_as_redef = false; lex.next(); if (lex.tok() == tok_colon) { lex.next(); - declared_type = parse_type(lex, nullptr); + declared_type = parse_type_from_tokens(lex); } else if (lex.tok() == tok_redef) { lex.next(); marked_as_redef = true; } - return createV(loc, v_ident, declared_type, is_immutable, marked_as_redef); + return createV(loc, v_ident, declared_type, is_immutable, marked_as_redef); } if (lex.tok() == tok_underscore) { - TypeExpr* declared_type = nullptr; + TypePtr declared_type = nullptr; lex.next(); if (lex.tok() == tok_colon) { lex.next(); - declared_type = parse_type(lex, nullptr); + declared_type = parse_type_from_tokens(lex); } - return createV(loc, createV(loc), declared_type, true, false); + return createV(loc, createV(loc, ""), declared_type, true, false); } lex.unexpected("variable name"); } -static AnyV parse_local_vars_declaration(Lexer& lex) { +static AnyV parse_local_vars_declaration_assignment(Lexer& lex) { SrcLocation loc = lex.cur_location(); bool is_immutable = lex.tok() == tok_val; lex.next(); - AnyV lhs = parse_var_declaration_lhs(lex, is_immutable); + AnyExprV lhs = createV(loc, parse_var_declaration_lhs(lex, is_immutable)); if (lex.tok() != tok_assign) { lex.error("variables declaration must be followed by assignment: `var xxx = ...`"); } lex.next(); - AnyV assigned_val = parse_expr(lex); + AnyExprV rhs = parse_expr(lex); if (lex.tok() == tok_comma) { lex.error("multiple declarations are not allowed, split variables on separate lines"); } lex.expect(tok_semicolon, "`;`"); - return createV(loc, lhs, assigned_val); + return createV(loc, lhs, rhs); } static V parse_sequence(Lexer& lex) { @@ -708,32 +666,27 @@ static V parse_sequence(Lexer& lex) { static AnyV parse_return_statement(Lexer& lex) { SrcLocation loc = lex.cur_location(); lex.expect(tok_return, "`return`"); - AnyV child = lex.tok() == tok_semicolon // `return;` actually means `return ();` (which is void) - ? createV(lex.cur_location(), {}) + AnyExprV child = lex.tok() == tok_semicolon // `return;` actually means "nothing" (inferred as void) + ? createV(lex.cur_location()) : parse_expr(lex); lex.expect(tok_semicolon, "`;`"); return createV(loc, child); } -static AnyV parse_if_statement(Lexer& lex, bool is_ifnot) { +static AnyV parse_if_statement(Lexer& lex) { SrcLocation loc = lex.cur_location(); lex.expect(tok_if, "`if`"); lex.expect(tok_oppar, "`(`"); - AnyV cond = parse_expr(lex); + AnyExprV cond = parse_expr(lex); lex.expect(tok_clpar, "`)`"); - // replace if(!expr) with ifnot(expr) (this should be done later, but for now, let this be right at parsing time) - if (auto v_not = cond->try_as(); v_not && v_not->tok == tok_logical_not) { - is_ifnot = !is_ifnot; - cond = v_not->get_rhs(); - } V if_body = parse_sequence(lex); V else_body = nullptr; if (lex.tok() == tok_else) { // else if(e) { } or else { } lex.next(); if (lex.tok() == tok_if) { - AnyV v_inner_if = parse_if_statement(lex, false); + AnyV v_inner_if = parse_if_statement(lex); else_body = createV(v_inner_if->loc, lex.cur_location(), {v_inner_if}); } else { else_body = parse_sequence(lex); @@ -741,14 +694,14 @@ static AnyV parse_if_statement(Lexer& lex, bool is_ifnot) { } else { // no 'else', create empty block else_body = createV(lex.cur_location(), lex.cur_location(), {}); } - return createV(loc, is_ifnot, cond, if_body, else_body); + return createV(loc, false, cond, if_body, else_body); } static AnyV parse_repeat_statement(Lexer& lex) { SrcLocation loc = lex.cur_location(); lex.expect(tok_repeat, "`repeat`"); lex.expect(tok_oppar, "`(`"); - AnyV cond = parse_expr(lex); + AnyExprV cond = parse_expr(lex); lex.expect(tok_clpar, "`)`"); V body = parse_sequence(lex); return createV(loc, cond, body); @@ -758,7 +711,7 @@ static AnyV parse_while_statement(Lexer& lex) { SrcLocation loc = lex.cur_location(); lex.expect(tok_while, "`while`"); lex.expect(tok_oppar, "`(`"); - AnyV cond = parse_expr(lex); + AnyExprV cond = parse_expr(lex); lex.expect(tok_clpar, "`)`"); V body = parse_sequence(lex); return createV(loc, cond, body); @@ -770,31 +723,38 @@ static AnyV parse_do_while_statement(Lexer& lex) { V body = parse_sequence(lex); lex.expect(tok_while, "`while`"); lex.expect(tok_oppar, "`(`"); - AnyV cond = parse_expr(lex); + AnyExprV cond = parse_expr(lex); lex.expect(tok_clpar, "`)`"); lex.expect(tok_semicolon, "`;`"); return createV(loc, body, cond); } -static AnyV parse_catch_variable(Lexer& lex) { +static AnyExprV parse_catch_variable(Lexer& lex) { SrcLocation loc = lex.cur_location(); if (lex.tok() == tok_identifier) { std::string_view var_name = lex.cur_str(); lex.next(); - return createV(loc, var_name); + auto v_ident = createV(loc, var_name); + return createV(loc, v_ident, nullptr); } if (lex.tok() == tok_underscore) { lex.next(); - return createV(loc); + auto v_ident = createV(loc, ""); + return createV(loc, v_ident, nullptr); } lex.unexpected("identifier"); } +static AnyExprV create_catch_underscore_variable(const Lexer& lex) { + auto v_ident = createV(lex.cur_location(), ""); + return createV(lex.cur_location(), v_ident, nullptr); +} + static AnyV parse_throw_statement(Lexer& lex) { SrcLocation loc = lex.cur_location(); lex.expect(tok_throw, "`throw`"); - AnyV thrown_code, thrown_arg; + AnyExprV thrown_code, thrown_arg; if (lex.tok() == tok_oppar) { // throw (code) or throw (code, arg) lex.next(); thrown_code = parse_expr(lex); @@ -802,12 +762,12 @@ static AnyV parse_throw_statement(Lexer& lex) { lex.next(); thrown_arg = parse_expr(lex); } else { - thrown_arg = createV(loc); + thrown_arg = createV(loc); } lex.expect(tok_clpar, "`)`"); } else { // throw code thrown_code = parse_expr(lex); - thrown_arg = createV(loc); + thrown_arg = createV(loc); } lex.expect(tok_semicolon, "`;`"); @@ -819,8 +779,8 @@ static AnyV parse_assert_statement(Lexer& lex) { lex.expect(tok_assert, "`assert`"); lex.expect(tok_oppar, "`(`"); - AnyV cond = parse_expr(lex); - AnyV thrown_code; + AnyExprV cond = parse_expr(lex); + AnyExprV thrown_code; if (lex.tok() == tok_comma) { // assert(cond, code) lex.next(); thrown_code = parse_expr(lex); @@ -840,7 +800,7 @@ static AnyV parse_try_catch_statement(Lexer& lex) { lex.expect(tok_try, "`try`"); V try_body = parse_sequence(lex); - std::vector catch_args; + std::vector catch_args; lex.expect(tok_catch, "`catch`"); SrcLocation catch_loc = lex.cur_location(); if (lex.tok() == tok_oppar) { @@ -850,12 +810,12 @@ static AnyV parse_try_catch_statement(Lexer& lex) { lex.next(); catch_args.push_back(parse_catch_variable(lex)); } else { // catch (excNo) -> catch (excNo, _) - catch_args.push_back(createV(catch_loc)); + catch_args.push_back(create_catch_underscore_variable(lex)); } lex.expect(tok_clpar, "`)`"); } else { // catch -> catch (_, _) - catch_args.push_back(createV(catch_loc)); - catch_args.push_back(createV(catch_loc)); + catch_args.push_back(create_catch_underscore_variable(lex)); + catch_args.push_back(create_catch_underscore_variable(lex)); } V catch_expr = createV(catch_loc, std::move(catch_args)); @@ -865,15 +825,15 @@ static AnyV parse_try_catch_statement(Lexer& lex) { AnyV parse_statement(Lexer& lex) { switch (lex.tok()) { - case tok_var: - case tok_val: - return parse_local_vars_declaration(lex); + case tok_var: // `var x = 0` is technically an expression, but can not appear in "any place", + case tok_val: // only as a separate declaration + return parse_local_vars_declaration_assignment(lex); case tok_opbrace: return parse_sequence(lex); case tok_return: return parse_return_statement(lex); case tok_if: - return parse_if_statement(lex, false); + return parse_if_statement(lex); case tok_repeat: return parse_repeat_statement(lex); case tok_do: @@ -889,13 +849,13 @@ AnyV parse_statement(Lexer& lex) { case tok_semicolon: { SrcLocation loc = lex.cur_location(); lex.next(); - return createV(loc); + return createV(loc); } case tok_break: case tok_continue: lex.error("break/continue from loops are not supported yet"); default: { - AnyV expr = parse_expr(lex); + AnyExprV expr = parse_expr(lex); lex.expect(tok_semicolon, "`;`"); return expr; } @@ -949,12 +909,10 @@ static AnyV parse_genericsT_list(Lexer& lex) { SrcLocation loc = lex.cur_location(); std::vector genericsT_items; lex.expect(tok_lt, "`<`"); - int idx = 0; while (true) { lex.check(tok_identifier, "T"); std::string_view nameT = lex.cur_str(); - TypeExpr* type = TypeExpr::new_var(idx++); - genericsT_items.emplace_back(createV(lex.cur_location(), type, nameT)); + genericsT_items.emplace_back(createV(lex.cur_location(), nameT)); lex.next(); if (lex.tok() != tok_comma) { break; @@ -976,7 +934,7 @@ static V parse_annotation(Lexer& lex) { if (lex.tok() == tok_oppar) { SrcLocation loc_args = lex.cur_location(); lex.next(); - std::vector args; + std::vector args; args.push_back(parse_expr(lex)); while (lex.tok() == tok_comma) { lex.next(); @@ -1037,11 +995,11 @@ static AnyV parse_function_declaration(Lexer& lex, const std::vectoras(); } - V v_param_list = parse_parameter_list(lex, genericsT_list)->as(); - bool accepts_self = !v_param_list->empty() && v_param_list->get_param(0)->get_identifier()->name == "self"; + V v_param_list = parse_parameter_list(lex)->as(); + bool accepts_self = !v_param_list->empty() && v_param_list->get_param(0)->param_name == "self"; int n_mutate_params = v_param_list->get_mutate_params_count(); - TypeExpr* ret_type = nullptr; + TypePtr ret_type = nullptr; bool returns_self = false; if (lex.tok() == tok_colon) { // : (if absent, it means "auto infer", not void) lex.next(); @@ -1051,9 +1009,9 @@ static AnyV parse_function_declaration(Lexer& lex, const std::vector ret_tensor_items; - ret_tensor_items.reserve(1 + n_mutate_params); - for (AnyV v_param : v_param_list->get_params()) { - if (v_param->as()->declared_as_mutate) { - ret_tensor_items.emplace_back(v_param->as()->param_type); - } - } - ret_tensor_items.emplace_back(ret_type ? ret_type : TypeExpr::new_hole()); - ret_type = TypeExpr::new_tensor(std::move(ret_tensor_items)); - } - AnyV v_body = nullptr; if (lex.tok() == tok_builtin) { - v_body = createV(lex.cur_location()); + v_body = createV(lex.cur_location()); lex.next(); lex.expect(tok_semicolon, "`;`"); } else if (lex.tok() == tok_opbrace) { @@ -1093,32 +1039,43 @@ static AnyV parse_function_declaration(Lexer& lex, const std::vector(loc, v_ident, v_param_list, v_body); - f_declaration->ret_type = ret_type ? ret_type : TypeExpr::new_hole(); - f_declaration->is_entrypoint = is_entrypoint; - f_declaration->genericsT_list = genericsT_list; - f_declaration->marked_as_get_method = is_get_method; - f_declaration->marked_as_builtin = v_body->type == ast_empty; - f_declaration->accepts_self = accepts_self; - f_declaration->returns_self = returns_self; + int flags = 0; + if (is_entrypoint) { + flags |= FunctionData::flagIsEntrypoint; + } + if (is_get_method) { + flags |= FunctionData::flagGetMethod; + } + if (accepts_self) { + flags |= FunctionData::flagAcceptsSelf; + } + if (returns_self) { + flags |= FunctionData::flagReturnsSelf; + } + td::RefInt256 method_id; for (auto v_annotation : annotations) { switch (v_annotation->kind) { case AnnotationKind::inline_simple: - f_declaration->marked_as_inline = true; + flags |= FunctionData::flagInline; break; case AnnotationKind::inline_ref: - f_declaration->marked_as_inline_ref = true; + flags |= FunctionData::flagInlineRef; break; case AnnotationKind::pure: - f_declaration->marked_as_pure = true; + flags |= FunctionData::flagMarkedAsPure; break; - case AnnotationKind::method_id: + case AnnotationKind::method_id: { if (is_get_method || genericsT_list || is_entrypoint || n_mutate_params || accepts_self) { v_annotation->error("@method_id can be specified only for regular functions"); } - f_declaration->method_id = v_annotation->get_arg()->get_item(0)->as(); + auto v_int = v_annotation->get_arg()->get_item(0)->as(); + if (v_int->intval.is_null() || !v_int->intval->signed_fits_bits(32)) { + v_int->error("invalid integer constant"); + } + method_id = v_int->intval; break; + } case AnnotationKind::deprecated: // no special handling break; @@ -1128,7 +1085,7 @@ static AnyV parse_function_declaration(Lexer& lex, const std::vector(loc, v_ident, v_param_list, v_body, ret_type, genericsT_list, std::move(method_id), flags); } static AnyV parse_tolk_required_version(Lexer& lex) { @@ -1142,10 +1099,10 @@ static AnyV parse_tolk_required_version(Lexer& lex) { loc.show_warning("the contract is written in Tolk v" + semver + ", but you use Tolk compiler v" + TOLK_VERSION + "; probably, it will lead to compilation errors or hash changes"); } - return createV(loc, tok_eq, semver); // semicolon is not necessary + return createV(loc, semver); // semicolon is not necessary } -static AnyV parse_import_statement(Lexer& lex) { +static AnyV parse_import_directive(Lexer& lex) { SrcLocation loc = lex.cur_location(); lex.expect(tok_import, "`import`"); lex.check(tok_string_const, "source file name"); @@ -1155,7 +1112,7 @@ static AnyV parse_import_statement(Lexer& lex) { } auto v_str = createV(lex.cur_location(), rel_filename, 0); lex.next(); - return createV(loc, v_str); // semicolon is not necessary + return createV(loc, v_str); // semicolon is not necessary } // the main (exported) function @@ -1176,7 +1133,7 @@ AnyV parse_src_file_to_ast(const SrcFile* file) { if (!annotations.empty()) { lex.unexpected("declaration after @annotations"); } - toplevel_declarations.push_back(parse_import_statement(lex)); + toplevel_declarations.push_back(parse_import_directive(lex)); break; case tok_semicolon: if (!annotations.empty()) { diff --git a/tolk/ast-from-tokens.h b/tolk/ast-from-tokens.h index 5f380c569..39574f9c2 100644 --- a/tolk/ast-from-tokens.h +++ b/tolk/ast-from-tokens.h @@ -16,12 +16,10 @@ */ #pragma once -#include "src-file.h" +#include "fwd-declarations.h" namespace tolk { -struct ASTNodeBase; - -const ASTNodeBase* parse_src_file_to_ast(const SrcFile* file); +AnyV parse_src_file_to_ast(const SrcFile* file); } // namespace tolk diff --git a/tolk/ast-replacer.h b/tolk/ast-replacer.h index 478994e8b..c8350747c 100644 --- a/tolk/ast-replacer.h +++ b/tolk/ast-replacer.h @@ -35,25 +35,39 @@ namespace tolk { class ASTReplacer { protected: - GNU_ATTRIBUTE_ALWAYS_INLINE static AnyV replace_children(const ASTNodeLeaf* v) { + GNU_ATTRIBUTE_ALWAYS_INLINE static AnyExprV replace_children(const ASTExprLeaf* v) { return v; } - GNU_ATTRIBUTE_ALWAYS_INLINE AnyV replace_children(const ASTNodeUnary* v) { - auto* v_mutable = const_cast(v); + GNU_ATTRIBUTE_ALWAYS_INLINE AnyExprV replace_children(const ASTExprUnary* v) { + auto* v_mutable = const_cast(v); v_mutable->child = replace(v_mutable->child); return v_mutable; } - GNU_ATTRIBUTE_ALWAYS_INLINE AnyV replace_children(const ASTNodeBinary* v) { - auto* v_mutable = const_cast(v); + GNU_ATTRIBUTE_ALWAYS_INLINE AnyExprV replace_children(const ASTExprBinary* v) { + auto* v_mutable = const_cast(v); v_mutable->lhs = replace(v->lhs); v_mutable->rhs = replace(v->rhs); return v_mutable; } - GNU_ATTRIBUTE_ALWAYS_INLINE AnyV replace_children(const ASTNodeVararg* v) { - auto* v_mutable = const_cast(v); + GNU_ATTRIBUTE_ALWAYS_INLINE AnyExprV replace_children(const ASTExprVararg* v) { + auto* v_mutable = const_cast(v); + for (AnyExprV& child : v_mutable->children) { + child = replace(child); + } + return v_mutable; + } + + GNU_ATTRIBUTE_ALWAYS_INLINE AnyV replace_children(const ASTStatementUnary* v) { + auto* v_mutable = const_cast(v); + v_mutable->child = replace(v_mutable->child); + return v_mutable; + } + + GNU_ATTRIBUTE_ALWAYS_INLINE AnyV replace_children(const ASTStatementVararg* v) { + auto* v_mutable = const_cast(v); for (AnyV& child : v_mutable->children) { child = replace(child); } @@ -64,97 +78,120 @@ class ASTReplacer { virtual ~ASTReplacer() = default; virtual AnyV replace(AnyV v) = 0; + virtual AnyExprV replace(AnyExprV v) = 0; }; class ASTReplacerInFunctionBody : public ASTReplacer { protected: using parent = ASTReplacerInFunctionBody; - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - - AnyV replace(AnyV v) final { + // expressions + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + virtual AnyExprV replace(V v) { return replace_children(v); } + // statements + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + + AnyExprV replace(AnyExprV v) final { switch (v->type) { - case ast_empty: return replace(v->as()); - case ast_parenthesized_expr: return replace(v->as()); + case ast_empty_expression: return replace(v->as()); + case ast_parenthesized_expression: return replace(v->as()); case ast_tensor: return replace(v->as()); - case ast_tensor_square: return replace(v->as()); - case ast_identifier: return replace(v->as()); + case ast_typed_tuple: return replace(v->as()); + case ast_reference: return replace(v->as()); + case ast_local_var_lhs: return replace(v->as()); + case ast_local_vars_declaration: return replace(v->as()); case ast_int_const: return replace(v->as()); case ast_string_const: return replace(v->as()); case ast_bool_const: return replace(v->as()); case ast_null_keyword: return replace(v->as()); - case ast_self_keyword: return replace(v->as()); + case ast_argument: return replace(v->as()); + case ast_argument_list: return replace(v->as()); + case ast_dot_access: return replace(v->as()); case ast_function_call: return replace(v->as()); - case ast_dot_method_call: return replace(v->as()); case ast_underscore: return replace(v->as()); + case ast_assign: return replace(v->as()); + case ast_set_assign: return replace(v->as()); case ast_unary_operator: return replace(v->as()); case ast_binary_operator: return replace(v->as()); case ast_ternary_operator: return replace(v->as()); - case ast_return_statement: return replace(v->as()); + case ast_cast_as_operator: return replace(v->as()); + default: + throw UnexpectedASTNodeType(v, "ASTReplacerInFunctionBody::replace"); + } + } + + AnyV replace(AnyV v) final { + switch (v->type) { + case ast_empty_statement: return replace(v->as()); case ast_sequence: return replace(v->as()); + case ast_return_statement: return replace(v->as()); + case ast_if_statement: return replace(v->as()); case ast_repeat_statement: return replace(v->as()); case ast_while_statement: return replace(v->as()); case ast_do_while_statement: return replace(v->as()); case ast_throw_statement: return replace(v->as()); case ast_assert_statement: return replace(v->as()); case ast_try_catch_statement: return replace(v->as()); - case ast_if_statement: return replace(v->as()); - case ast_local_var: return replace(v->as()); - case ast_local_vars_declaration: return replace(v->as()); - case ast_asm_body: return replace(v->as()); - default: - throw UnexpectedASTNodeType(v, "ASTReplacerInFunctionBody::visit"); +#ifdef TOLK_DEBUG + case ast_asm_body: + throw UnexpectedASTNodeType(v, "ASTReplacer::replace"); +#endif + default: { + // be very careful, don't forget to handle all statements (not expressions) above! + AnyExprV as_expr = reinterpret_cast(v); + return replace(as_expr); + } } } public: - void start_replacing_in_function(V v) { - replace(v->get_body()); + virtual bool should_visit_function(const FunctionData* fun_ref) = 0; + + void start_replacing_in_function(const FunctionData* fun_ref, V v_function) { + replace(v_function->get_body()); } }; -class ASTReplacerAllFunctionsInFile : public ASTReplacerInFunctionBody { -protected: - using parent = ASTReplacerAllFunctionsInFile; - virtual bool should_enter_function(V v) = 0; +const std::vector& get_all_not_builtin_functions(); -public: - void start_replacing_in_file(V v_file) { - for (AnyV v : v_file->get_toplevel_declarations()) { - if (auto v_function = v->try_as()) { - if (should_enter_function(v_function)) { - replace(v_function->get_body()); - } - } +template +void replace_ast_of_all_functions() { + BodyReplacerT visitor; + for (const FunctionData* fun_ref : get_all_not_builtin_functions()) { + if (visitor.should_visit_function(fun_ref)) { + visitor.start_replacing_in_function(fun_ref, fun_ref->ast_root->as()); } } -}; +} } // namespace tolk diff --git a/tolk/ast-replicator.h b/tolk/ast-replicator.h new file mode 100644 index 000000000..02198adb8 --- /dev/null +++ b/tolk/ast-replicator.h @@ -0,0 +1,255 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#pragma once + +#include "ast.h" +#include "platform-utils.h" + +namespace tolk { + +class ASTReplicator { +protected: + virtual AnyV clone(AnyV v) = 0; + virtual AnyExprV clone(AnyExprV v) = 0; + virtual TypePtr clone(TypePtr) = 0; + +public: + virtual ~ASTReplicator() = default; +}; + +class ASTReplicatorFunction : public ASTReplicator { +protected: + using parent = ASTReplicatorFunction; + + std::vector clone(const std::vector& items) { + std::vector result; + result.reserve(items.size()); + for (AnyV item : items) { + result.push_back(clone(item)); + } + return result; + } + + std::vector clone(const std::vector& items) { + std::vector result; + result.reserve(items.size()); + for (AnyExprV item : items) { + result.push_back(clone(item)); + } + return result; + } + + // expressions + + virtual V clone(V v) { + return createV(v->loc); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_expr())); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_items())); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_items())); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_identifier()), v->has_instantiationTs() ? clone(v->get_instantiationTs()) : nullptr); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_identifier()), clone(v->declared_type), v->is_immutable, v->marked_as_redef); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_expr())); + } + virtual V clone(V v) { + return createV(v->loc, v->intval, v->orig_str); + } + virtual V clone(V v) { + return createV(v->loc, v->str_val, v->modifier); + } + virtual V clone(V v) { + return createV(v->loc, v->bool_val); + } + virtual V clone(V v) { + return createV(v->loc); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_expr()), v->passed_as_mutate); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_arguments())); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_obj()), clone(v->get_identifier()), v->has_instantiationTs() ? clone(v->get_instantiationTs()) : nullptr); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_callee()), clone(v->get_arg_list())); + } + virtual V clone(V v) { + return createV(v->loc); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_lhs()), clone(v->get_rhs())); + } + virtual V clone(V v) { + return createV(v->loc, v->operator_name, v->tok, clone(v->get_lhs()), clone(v->get_rhs())); + } + virtual V clone(V v) { + return createV(v->loc, v->operator_name, v->tok, clone(v->get_rhs())); + } + virtual V clone(V v) { + return createV(v->loc, v->operator_name, v->tok, clone(v->get_lhs()), clone(v->get_rhs())); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_cond()), clone(v->get_when_true()), clone(v->get_when_false())); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_expr()), clone(v->cast_to_type)); + } + + // statements + + virtual V clone(V v) { + return createV(v->loc); + } + virtual V clone(V v) { + return createV(v->loc, v->loc_end, clone(v->get_items())); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_return_value())); + } + virtual V clone(V v) { + return createV(v->loc, v->is_ifnot, clone(v->get_cond()), clone(v->get_if_body()), clone(v->get_else_body())); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_cond()), clone(v->get_body())); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_cond()), clone(v->get_body())); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_body()), clone(v->get_cond())); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_thrown_code()), clone(v->get_thrown_arg())); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_cond()), clone(v->get_thrown_code())); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_try_body()), clone(v->get_catch_expr()), clone(v->get_catch_body())); + } + virtual V clone(V v) { + return createV(v->loc, v->arg_order, v->ret_order, clone(v->get_asm_commands())); + } + + // other + + virtual V clone(V v) { + return createV(v->loc, v->name); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->substituted_type)); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_items())); + } + virtual V clone(V v) { + return createV(v->loc, v->param_name, clone(v->declared_type), v->declared_as_mutate); + } + virtual V clone(V v) { + return createV(v->loc, clone(v->get_params())); + } + + AnyExprV clone(AnyExprV v) final { + switch (v->type) { + case ast_empty_expression: return clone(v->as()); + case ast_parenthesized_expression: return clone(v->as()); + case ast_tensor: return clone(v->as()); + case ast_typed_tuple: return clone(v->as()); + case ast_reference: return clone(v->as()); + case ast_local_var_lhs: return clone(v->as()); + case ast_local_vars_declaration: return clone(v->as()); + case ast_int_const: return clone(v->as()); + case ast_string_const: return clone(v->as()); + case ast_bool_const: return clone(v->as()); + case ast_null_keyword: return clone(v->as()); + case ast_argument: return clone(v->as()); + case ast_argument_list: return clone(v->as()); + case ast_dot_access: return clone(v->as()); + case ast_function_call: return clone(v->as()); + case ast_underscore: return clone(v->as()); + case ast_assign: return clone(v->as()); + case ast_set_assign: return clone(v->as()); + case ast_unary_operator: return clone(v->as()); + case ast_binary_operator: return clone(v->as()); + case ast_ternary_operator: return clone(v->as()); + case ast_cast_as_operator: return clone(v->as()); + default: + throw UnexpectedASTNodeType(v, "ASTReplicatorFunction::clone"); + } + } + + AnyV clone(AnyV v) final { + switch (v->type) { + case ast_empty_statement: return clone(v->as()); + case ast_sequence: return clone(v->as()); + case ast_return_statement: return clone(v->as()); + case ast_if_statement: return clone(v->as()); + case ast_repeat_statement: return clone(v->as()); + case ast_while_statement: return clone(v->as()); + case ast_do_while_statement: return clone(v->as()); + case ast_throw_statement: return clone(v->as()); + case ast_assert_statement: return clone(v->as()); + case ast_try_catch_statement: return clone(v->as()); + case ast_asm_body: return clone(v->as()); + // other AST nodes that can be children of ast nodes of function body + case ast_identifier: return clone(v->as()); + case ast_instantiationT_item: return clone(v->as()); + case ast_instantiationT_list: return clone(v->as()); + case ast_parameter: return clone(v->as()); + case ast_parameter_list: return clone(v->as()); + + default: { + // be very careful, don't forget to handle all statements/other (not expressions) above! + AnyExprV as_expr = reinterpret_cast(v); + return clone(as_expr); + } + } + } + + TypePtr clone(TypePtr t) override { + return t; + } + + public: + virtual V clone_function_body(V v_function) { + return createV( + v_function->loc, + clone(v_function->get_identifier()), + clone(v_function->get_param_list()), + clone(v_function->get_body()->as()), + clone(v_function->declared_return_type), + v_function->genericsT_list, + v_function->method_id, + v_function->flags + ); + } +}; + +} // namespace tolk diff --git a/tolk/ast-stringifier.h b/tolk/ast-stringifier.h index 759873b04..4ec72cddc 100644 --- a/tolk/ast-stringifier.h +++ b/tolk/ast-stringifier.h @@ -20,6 +20,7 @@ #include "ast.h" #include "ast-visitor.h" +#include "type-system.h" #include /* @@ -31,46 +32,55 @@ namespace tolk { class ASTStringifier final : public ASTVisitor { constexpr static std::pair name_pairs[] = { - {ast_empty, "ast_empty"}, - {ast_parenthesized_expr, "ast_parenthesized_expr"}, - {ast_tensor, "ast_tensor"}, - {ast_tensor_square, "ast_tensor_square"}, {ast_identifier, "ast_identifier"}, + // expressions + {ast_empty_expression, "ast_empty_expression"}, + {ast_parenthesized_expression, "ast_parenthesized_expression"}, + {ast_tensor, "ast_tensor"}, + {ast_typed_tuple, "ast_typed_tuple"}, + {ast_reference, "ast_reference"}, + {ast_local_var_lhs, "ast_local_var_lhs"}, + {ast_local_vars_declaration, "ast_local_vars_declaration"}, {ast_int_const, "ast_int_const"}, {ast_string_const, "ast_string_const"}, {ast_bool_const, "ast_bool_const"}, {ast_null_keyword, "ast_null_keyword"}, - {ast_self_keyword, "ast_self_keyword"}, {ast_argument, "ast_argument"}, {ast_argument_list, "ast_argument_list"}, + {ast_dot_access, "ast_dot_access"}, {ast_function_call, "ast_function_call"}, - {ast_dot_method_call, "ast_dot_method_call"}, - {ast_global_var_declaration, "ast_global_var_declaration"}, - {ast_constant_declaration, "ast_constant_declaration"}, {ast_underscore, "ast_underscore"}, + {ast_assign, "ast_assign"}, + {ast_set_assign, "ast_set_assign"}, {ast_unary_operator, "ast_unary_operator"}, {ast_binary_operator, "ast_binary_operator"}, {ast_ternary_operator, "ast_ternary_operator"}, - {ast_return_statement, "ast_return_statement"}, + {ast_cast_as_operator, "ast_cast_as_operator"}, + // statements + {ast_empty_statement, "ast_empty_statement"}, {ast_sequence, "ast_sequence"}, + {ast_return_statement, "ast_return_statement"}, + {ast_if_statement, "ast_if_statement"}, {ast_repeat_statement, "ast_repeat_statement"}, {ast_while_statement, "ast_while_statement"}, {ast_do_while_statement, "ast_do_while_statement"}, {ast_throw_statement, "ast_throw_statement"}, {ast_assert_statement, "ast_assert_statement"}, {ast_try_catch_statement, "ast_try_catch_statement"}, - {ast_if_statement, "ast_if_statement"}, + {ast_asm_body, "ast_asm_body"}, + // other {ast_genericsT_item, "ast_genericsT_item"}, {ast_genericsT_list, "ast_genericsT_list"}, + {ast_instantiationT_item, "ast_instantiationT_item"}, + {ast_instantiationT_list, "ast_instantiationT_list"}, {ast_parameter, "ast_parameter"}, {ast_parameter_list, "ast_parameter_list"}, - {ast_asm_body, "ast_asm_body"}, {ast_annotation, "ast_annotation"}, {ast_function_declaration, "ast_function_declaration"}, - {ast_local_var, "ast_local_var"}, - {ast_local_vars_declaration, "ast_local_vars_declaration"}, + {ast_global_var_declaration, "ast_global_var_declaration"}, + {ast_constant_declaration, "ast_constant_declaration"}, {ast_tolk_required_version, "ast_tolk_required_version"}, - {ast_import_statement, "ast_import_statement"}, + {ast_import_directive, "ast_import_directive"}, {ast_tolk_file, "ast_tolk_file"}, }; @@ -114,62 +124,94 @@ class ASTStringifier final : public ASTVisitor { switch (v->type) { case ast_identifier: return static_cast(v->as()->name); + case ast_reference: { + std::string result(v->as()->get_name()); + if (v->as()->has_instantiationTs()) { + result += specific_str(v->as()->get_instantiationTs()); + } + return result; + } case ast_int_const: - return static_cast(v->as()->int_val); + return static_cast(v->as()->orig_str); case ast_string_const: if (char modifier = v->as()->modifier) { return "\"" + static_cast(v->as()->str_val) + "\"" + std::string(1, modifier); } else { return "\"" + static_cast(v->as()->str_val) + "\""; } + case ast_bool_const: + return v->as()->bool_val ? "true" : "false"; + case ast_dot_access: { + std::string result = "." + static_cast(v->as()->get_field_name()); + if (v->as()->has_instantiationTs()) { + result += specific_str(v->as()->get_instantiationTs()); + } + return result; + } case ast_function_call: { - if (auto v_lhs = v->as()->get_called_f()->try_as()) { - return static_cast(v_lhs->name) + "()"; + std::string inner = specific_str(v->as()->get_callee()); + if (int n_args = v->as()->get_num_args()) { + return inner + "(..." + std::to_string(n_args) + ")"; } - return {}; + return inner + "()"; } - case ast_dot_method_call: - return static_cast(v->as()->method_name); case ast_global_var_declaration: return static_cast(v->as()->get_identifier()->name); case ast_constant_declaration: return static_cast(v->as()->get_identifier()->name); + case ast_assign: + return "="; + case ast_set_assign: + return static_cast(v->as()->operator_name) + "="; case ast_unary_operator: return static_cast(v->as()->operator_name); case ast_binary_operator: return static_cast(v->as()->operator_name); + case ast_cast_as_operator: + return v->as()->cast_to_type->as_human_readable(); case ast_sequence: return "↓" + std::to_string(v->as()->get_items().size()); + case ast_instantiationT_item: + return v->as()->substituted_type->as_human_readable(); case ast_if_statement: return v->as()->is_ifnot ? "ifnot" : ""; case ast_annotation: return annotation_kinds[static_cast(v->as()->kind)].second; case ast_parameter: { std::ostringstream os; - os << v->as()->param_type; - return static_cast(v->as()->get_identifier()->name) + ": " + os.str(); + os << v->as()->declared_type; + return static_cast(v->as()->param_name) + ": " + os.str(); } case ast_function_declaration: { std::string param_names; for (int i = 0; i < v->as()->get_num_params(); i++) { if (!param_names.empty()) param_names += ","; - param_names += v->as()->get_param(i)->get_identifier()->name; + param_names += v->as()->get_param(i)->param_name; } return "fun " + static_cast(v->as()->get_identifier()->name) + "(" + param_names + ")"; } - case ast_local_var: { + case ast_local_var_lhs: { std::ostringstream os; - os << v->as()->declared_type; - if (auto v_ident = v->as()->get_identifier()->try_as()) { - return static_cast(v_ident->name) + ":" + os.str(); + os << (v->as()->inferred_type ? v->as()->inferred_type : v->as()->declared_type); + if (v->as()->get_name().empty()) { + return "_: " + os.str(); + } + return static_cast(v->as()->get_name()) + ":" + os.str(); + } + case ast_instantiationT_list: { + std::string result = "<"; + for (AnyV item : v->as()->get_items()) { + if (result.size() > 1) + result += ","; + result += item->as()->substituted_type->as_human_readable(); } - return "_: " + os.str(); + return result + ">"; } case ast_tolk_required_version: return static_cast(v->as()->semver); - case ast_import_statement: - return static_cast(v->as()->get_file_leaf()->str_val); + case ast_import_directive: + return static_cast(v->as()->get_file_leaf()->str_val); case ast_tolk_file: return v->as()->file->rel_filename; default: @@ -202,46 +244,55 @@ class ASTStringifier final : public ASTVisitor { void visit(AnyV v) override { switch (v->type) { - case ast_empty: return handle_vertex(v->as()); - case ast_parenthesized_expr: return handle_vertex(v->as()); - case ast_tensor: return handle_vertex(v->as()); - case ast_tensor_square: return handle_vertex(v->as()); case ast_identifier: return handle_vertex(v->as()); + // expressions + case ast_empty_expression: return handle_vertex(v->as()); + case ast_parenthesized_expression: return handle_vertex(v->as()); + case ast_tensor: return handle_vertex(v->as()); + case ast_typed_tuple: return handle_vertex(v->as()); + case ast_reference: return handle_vertex(v->as()); + case ast_local_var_lhs: return handle_vertex(v->as()); + case ast_local_vars_declaration: return handle_vertex(v->as()); case ast_int_const: return handle_vertex(v->as()); case ast_string_const: return handle_vertex(v->as()); case ast_bool_const: return handle_vertex(v->as()); case ast_null_keyword: return handle_vertex(v->as()); - case ast_self_keyword: return handle_vertex(v->as()); case ast_argument: return handle_vertex(v->as()); case ast_argument_list: return handle_vertex(v->as()); + case ast_dot_access: return handle_vertex(v->as()); case ast_function_call: return handle_vertex(v->as()); - case ast_dot_method_call: return handle_vertex(v->as()); - case ast_global_var_declaration: return handle_vertex(v->as()); - case ast_constant_declaration: return handle_vertex(v->as()); case ast_underscore: return handle_vertex(v->as()); + case ast_assign: return handle_vertex(v->as()); + case ast_set_assign: return handle_vertex(v->as()); case ast_unary_operator: return handle_vertex(v->as()); case ast_binary_operator: return handle_vertex(v->as()); case ast_ternary_operator: return handle_vertex(v->as()); - case ast_return_statement: return handle_vertex(v->as()); + case ast_cast_as_operator: return handle_vertex(v->as()); + // statements + case ast_empty_statement: return handle_vertex(v->as()); case ast_sequence: return handle_vertex(v->as()); + case ast_return_statement: return handle_vertex(v->as()); + case ast_if_statement: return handle_vertex(v->as()); case ast_repeat_statement: return handle_vertex(v->as()); case ast_while_statement: return handle_vertex(v->as()); case ast_do_while_statement: return handle_vertex(v->as()); case ast_throw_statement: return handle_vertex(v->as()); case ast_assert_statement: return handle_vertex(v->as()); case ast_try_catch_statement: return handle_vertex(v->as()); - case ast_if_statement: return handle_vertex(v->as()); + case ast_asm_body: return handle_vertex(v->as()); + // other case ast_genericsT_item: return handle_vertex(v->as()); case ast_genericsT_list: return handle_vertex(v->as()); + case ast_instantiationT_item: return handle_vertex(v->as()); + case ast_instantiationT_list: return handle_vertex(v->as()); case ast_parameter: return handle_vertex(v->as()); case ast_parameter_list: return handle_vertex(v->as()); - case ast_asm_body: return handle_vertex(v->as()); case ast_annotation: return handle_vertex(v->as()); case ast_function_declaration: return handle_vertex(v->as()); - case ast_local_var: return handle_vertex(v->as()); - case ast_local_vars_declaration: return handle_vertex(v->as()); + case ast_global_var_declaration: return handle_vertex(v->as()); + case ast_constant_declaration: return handle_vertex(v->as()); case ast_tolk_required_version: return handle_vertex(v->as()); - case ast_import_statement: return handle_vertex(v->as()); + case ast_import_directive: return handle_vertex(v->as()); case ast_tolk_file: return handle_vertex(v->as()); default: throw UnexpectedASTNodeType(v, "ASTStringifier::visit"); diff --git a/tolk/ast-visitor.h b/tolk/ast-visitor.h index d0a7bfaf6..a54cb13be 100644 --- a/tolk/ast-visitor.h +++ b/tolk/ast-visitor.h @@ -37,20 +37,40 @@ namespace tolk { class ASTVisitor { protected: - GNU_ATTRIBUTE_ALWAYS_INLINE static void visit_children(const ASTNodeLeaf* v) { + GNU_ATTRIBUTE_ALWAYS_INLINE static void visit_children(const ASTExprLeaf* v) { static_cast(v); } - GNU_ATTRIBUTE_ALWAYS_INLINE void visit_children(const ASTNodeUnary* v) { + GNU_ATTRIBUTE_ALWAYS_INLINE void visit_children(const ASTExprUnary* v) { visit(v->child); } - GNU_ATTRIBUTE_ALWAYS_INLINE void visit_children(const ASTNodeBinary* v) { + GNU_ATTRIBUTE_ALWAYS_INLINE void visit_children(const ASTExprBinary* v) { visit(v->lhs); visit(v->rhs); } - GNU_ATTRIBUTE_ALWAYS_INLINE void visit_children(const ASTNodeVararg* v) { + GNU_ATTRIBUTE_ALWAYS_INLINE void visit_children(const ASTExprVararg* v) { + for (AnyExprV child : v->children) { + visit(child); + } + } + + GNU_ATTRIBUTE_ALWAYS_INLINE void visit_children(const ASTStatementUnary* v) { + visit(v->child); + } + + GNU_ATTRIBUTE_ALWAYS_INLINE void visit_children(const ASTStatementVararg* v) { + for (AnyV child : v->children) { + visit(child); + } + } + + GNU_ATTRIBUTE_ALWAYS_INLINE static void visit_children(const ASTOtherLeaf* v) { + static_cast(v); + } + + GNU_ATTRIBUTE_ALWAYS_INLINE void visit_children(const ASTOtherVararg* v) { for (AnyV child : v->children) { visit(child); } @@ -66,90 +86,105 @@ class ASTVisitorFunctionBody : public ASTVisitor { protected: using parent = ASTVisitorFunctionBody; - virtual void visit(V v) { return visit_children(v); } - virtual void visit(V v) { return visit_children(v); } + // expressions + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } - virtual void visit(V v) { return visit_children(v); } - virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } - virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } - virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } - virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + // statements + virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } - virtual void visit(V v) { return visit_children(v); } - virtual void visit(V v) { return visit_children(v); } - virtual void visit(V v) { return visit_children(v); } - virtual void visit(V v) { return visit_children(v); } void visit(AnyV v) final { switch (v->type) { - case ast_empty: return visit(v->as()); - case ast_parenthesized_expr: return visit(v->as()); + // expressions + case ast_empty_expression: return visit(v->as()); + case ast_parenthesized_expression: return visit(v->as()); case ast_tensor: return visit(v->as()); - case ast_tensor_square: return visit(v->as()); - case ast_identifier: return visit(v->as()); + case ast_typed_tuple: return visit(v->as()); + case ast_reference: return visit(v->as()); + case ast_local_var_lhs: return visit(v->as()); + case ast_local_vars_declaration: return visit(v->as()); case ast_int_const: return visit(v->as()); case ast_string_const: return visit(v->as()); case ast_bool_const: return visit(v->as()); case ast_null_keyword: return visit(v->as()); - case ast_self_keyword: return visit(v->as()); + case ast_argument: return visit(v->as()); + case ast_argument_list: return visit(v->as()); + case ast_dot_access: return visit(v->as()); case ast_function_call: return visit(v->as()); - case ast_dot_method_call: return visit(v->as()); case ast_underscore: return visit(v->as()); + case ast_assign: return visit(v->as()); + case ast_set_assign: return visit(v->as()); case ast_unary_operator: return visit(v->as()); case ast_binary_operator: return visit(v->as()); case ast_ternary_operator: return visit(v->as()); - case ast_return_statement: return visit(v->as()); + case ast_cast_as_operator: return visit(v->as()); + // statements + case ast_empty_statement: return visit(v->as()); case ast_sequence: return visit(v->as()); + case ast_return_statement: return visit(v->as()); + case ast_if_statement: return visit(v->as()); case ast_repeat_statement: return visit(v->as()); case ast_while_statement: return visit(v->as()); case ast_do_while_statement: return visit(v->as()); case ast_throw_statement: return visit(v->as()); case ast_assert_statement: return visit(v->as()); case ast_try_catch_statement: return visit(v->as()); - case ast_if_statement: return visit(v->as()); - case ast_local_var: return visit(v->as()); - case ast_local_vars_declaration: return visit(v->as()); - case ast_asm_body: return visit(v->as()); +#ifdef TOLK_DEBUG + case ast_asm_body: + throw UnexpectedASTNodeType(v, "ASTVisitor; forgot to filter out asm functions in should_visit_function()?"); +#endif default: throw UnexpectedASTNodeType(v, "ASTVisitorFunctionBody::visit"); } } public: - void start_visiting_function(V v_function) { + virtual bool should_visit_function(const FunctionData* fun_ref) = 0; + + virtual void start_visiting_function(const FunctionData* fun_ref, V v_function) { visit(v_function->get_body()); } }; -class ASTVisitorAllFunctionsInFile : public ASTVisitorFunctionBody { -protected: - using parent = ASTVisitorAllFunctionsInFile; - virtual bool should_enter_function(V v) = 0; +const std::vector& get_all_not_builtin_functions(); -public: - void start_visiting_file(V v_file) { - for (AnyV v : v_file->get_toplevel_declarations()) { - if (auto v_func = v->try_as()) { - if (should_enter_function(v_func)) { - visit(v_func->get_body()); - } - } +template +void visit_ast_of_all_functions() { + BodyVisitorT visitor; + for (const FunctionData* fun_ref : get_all_not_builtin_functions()) { + if (visitor.should_visit_function(fun_ref)) { + visitor.start_visiting_function(fun_ref, fun_ref->ast_root->as()); } } -}; +} } // namespace tolk diff --git a/tolk/ast.cpp b/tolk/ast.cpp index b1af51005..092260ffc 100644 --- a/tolk/ast.cpp +++ b/tolk/ast.cpp @@ -15,8 +15,9 @@ along with TON Blockchain Library. If not, see . */ #include "ast.h" +#ifdef TOLK_DEBUG #include "ast-stringifier.h" -#include +#endif namespace tolk { @@ -79,7 +80,7 @@ int Vertex::lookup_idx(std::string_view nameT) const { int Vertex::lookup_idx(std::string_view param_name) const { for (size_t idx = 0; idx < children.size(); ++idx) { - if (children[idx] && children[idx]->as()->get_identifier()->name == param_name) { + if (children[idx] && children[idx]->as()->param_name == param_name) { return static_cast(idx); } } @@ -96,8 +97,100 @@ int Vertex::get_mutate_params_count() const { return n; } -void Vertex::mutate_set_src_file(const SrcFile* file) const { - const_cast(this)->file = file; +// --------------------------------------------------------- +// "assign" methods +// +// From the user's point of view, all AST vertices are constant, fields are public, but can't be modified. +// The only way to modify a field is to call "mutate()" and then use these "assign_*" methods. +// Therefore, there is a guarantee, that all AST mutations are done via these methods, +// easily searched by usages, and there is no another way to modify any other field. + +void ASTNodeExpressionBase::assign_inferred_type(TypePtr type) { + this->inferred_type = type; +} + +void ASTNodeExpressionBase::assign_rvalue_true() { + this->is_rvalue = true; +} + +void ASTNodeExpressionBase::assign_lvalue_true() { + this->is_lvalue = true; +} + +void Vertex::assign_sym(const Symbol* sym) { + this->sym = sym; +} + +void Vertex::assign_fun_ref(const FunctionData* fun_ref) { + this->fun_maybe = fun_ref; +} + +void Vertex::assign_resolved_type(TypePtr cast_to_type) { + this->cast_to_type = cast_to_type; +} + +void Vertex::assign_var_ref(const GlobalVarData* var_ref) { + this->var_ref = var_ref; +} + +void Vertex::assign_resolved_type(TypePtr declared_type) { + this->declared_type = declared_type; +} + +void Vertex::assign_const_ref(const GlobalConstData* const_ref) { + this->const_ref = const_ref; +} + +void Vertex::assign_resolved_type(TypePtr declared_type) { + this->declared_type = declared_type; +} + +void Vertex::assign_resolved_type(TypePtr substituted_type) { + this->substituted_type = substituted_type; +} + +void Vertex::assign_param_ref(const LocalVarData* param_ref) { + this->param_ref = param_ref; +} + +void Vertex::assign_resolved_type(TypePtr declared_type) { + this->declared_type = declared_type; +} + +void Vertex::assign_fun_ref(const FunctionData* fun_ref) { + this->fun_ref = fun_ref; +} + +void Vertex::assign_fun_ref(const FunctionData* fun_ref) { + this->fun_ref = fun_ref; +} + +void Vertex::assign_fun_ref(const FunctionData* fun_ref) { + this->fun_ref = fun_ref; +} + +void Vertex::assign_target(const DotTarget& target) { + this->target = target; +} + +void Vertex::assign_fun_ref(const FunctionData* fun_ref) { + this->fun_ref = fun_ref; +} + +void Vertex::assign_resolved_type(TypePtr declared_return_type) { + this->declared_return_type = declared_return_type; +} + +void Vertex::assign_var_ref(const LocalVarData* var_ref) { + this->var_ref = var_ref; +} + +void Vertex::assign_resolved_type(TypePtr declared_type) { + this->declared_type = declared_type; +} + +void Vertex::assign_src_file(const SrcFile* file) { + this->file = file; } } // namespace tolk diff --git a/tolk/ast.h b/tolk/ast.h index fd2b27cbf..b90507e7e 100644 --- a/tolk/ast.h +++ b/tolk/ast.h @@ -17,10 +17,11 @@ #pragma once #include +#include "fwd-declarations.h" #include "platform-utils.h" #include "src-file.h" -#include "type-expr.h" #include "lexer.h" +#include "symtable.h" /* * Here we introduce AST representation of Tolk source code. @@ -32,14 +33,18 @@ * * From the user's point of view, all AST vertices are constant. All API is based on constancy. * Even though fields of vertex structs are public, they can't be modified, since vertices are accepted by const ref. - * Generally, there are two ways of accepting a vertex: + * Generally, there are three ways of accepting a vertex: * * AnyV (= const ASTNodeBase*) * the only you can do with this vertex is to see v->type (ASTNodeType) and to cast via v->as() + * * AnyExprV (= const ASTNodeExpressionBase*) + * in contains expression-specific properties (lvalue/rvalue, inferred type) * * V (= const Vertex*) * a specific type of vertex, you can use its fields and methods * There is one way of creating a vertex: * * createV(...constructor_args) (= new Vertex(...)) * vertices are currently created on a heap, without any custom memory arena, just allocated and never deleted + * The only way to modify a field is to use "mutate()" method (drops constancy, the only point of mutation) + * and then to call "assign_*" method, like "assign_sym", "assign_src_file", etc. * * Having AnyV and knowing its node_type, a call * v->as() @@ -59,46 +64,55 @@ namespace tolk { enum ASTNodeType { - ast_empty, - ast_parenthesized_expr, - ast_tensor, - ast_tensor_square, ast_identifier, + // expressions + ast_empty_expression, + ast_parenthesized_expression, + ast_tensor, + ast_typed_tuple, + ast_reference, + ast_local_var_lhs, + ast_local_vars_declaration, ast_int_const, ast_string_const, ast_bool_const, ast_null_keyword, - ast_self_keyword, ast_argument, ast_argument_list, + ast_dot_access, ast_function_call, - ast_dot_method_call, - ast_global_var_declaration, - ast_constant_declaration, ast_underscore, + ast_assign, + ast_set_assign, ast_unary_operator, ast_binary_operator, ast_ternary_operator, - ast_return_statement, + ast_cast_as_operator, + // statements + ast_empty_statement, ast_sequence, + ast_return_statement, + ast_if_statement, ast_repeat_statement, ast_while_statement, ast_do_while_statement, ast_throw_statement, ast_assert_statement, ast_try_catch_statement, - ast_if_statement, + ast_asm_body, + // other ast_genericsT_item, ast_genericsT_list, + ast_instantiationT_item, + ast_instantiationT_list, ast_parameter, ast_parameter_list, - ast_asm_body, ast_annotation, ast_function_declaration, - ast_local_var, - ast_local_vars_declaration, + ast_global_var_declaration, + ast_constant_declaration, ast_tolk_required_version, - ast_import_statement, + ast_import_directive, ast_tolk_file, }; @@ -111,10 +125,6 @@ enum class AnnotationKind { unknown, }; -struct ASTNodeBase; - -using AnyV = const ASTNodeBase*; - template struct Vertex; @@ -141,6 +151,7 @@ struct ASTNodeBase { const SrcLocation loc; ASTNodeBase(ASTNodeType type, SrcLocation loc) : type(type), loc(loc) {} + ASTNodeBase(const ASTNodeBase&) = delete; template V as() const { @@ -157,7 +168,7 @@ struct ASTNodeBase { return type == node_type ? static_cast>(this) : nullptr; } - #ifdef TOLK_DEBUG +#ifdef TOLK_DEBUG std::string to_debug_string() const { return to_debug_string(false); } std::string to_debug_string(bool colored) const; void debug_print() const; @@ -167,46 +178,123 @@ struct ASTNodeBase { void error(const std::string& err_msg) const; }; -struct ASTNodeLeaf : ASTNodeBase { +struct ASTNodeExpressionBase : ASTNodeBase { + friend class ASTDuplicatorFunction; + + TypePtr inferred_type = nullptr; + bool is_rvalue: 1 = false; + bool is_lvalue: 1 = false; + + ASTNodeExpressionBase* mutate() const { return const_cast(this); } + void assign_inferred_type(TypePtr type); + void assign_rvalue_true(); + void assign_lvalue_true(); + + ASTNodeExpressionBase(ASTNodeType type, SrcLocation loc) : ASTNodeBase(type, loc) {} +}; + +struct ASTNodeStatementBase : ASTNodeBase { + ASTNodeStatementBase(ASTNodeType type, SrcLocation loc) : ASTNodeBase(type, loc) {} +}; + +struct ASTExprLeaf : ASTNodeExpressionBase { friend class ASTVisitor; friend class ASTReplacer; protected: - ASTNodeLeaf(ASTNodeType type, SrcLocation loc) - : ASTNodeBase(type, loc) {} + ASTExprLeaf(ASTNodeType type, SrcLocation loc) + : ASTNodeExpressionBase(type, loc) {} +}; + +struct ASTExprUnary : ASTNodeExpressionBase { + friend class ASTVisitor; + friend class ASTReplacer; + +protected: + AnyExprV child; + + ASTExprUnary(ASTNodeType type, SrcLocation loc, AnyExprV child) + : ASTNodeExpressionBase(type, loc), child(child) {} +}; + +struct ASTExprBinary : ASTNodeExpressionBase { + friend class ASTVisitor; + friend class ASTReplacer; + +protected: + AnyExprV lhs; + AnyExprV rhs; + + ASTExprBinary(ASTNodeType type, SrcLocation loc, AnyExprV lhs, AnyExprV rhs) + : ASTNodeExpressionBase(type, loc), lhs(lhs), rhs(rhs) {} +}; + +struct ASTExprVararg : ASTNodeExpressionBase { + friend class ASTVisitor; + friend class ASTReplacer; + +protected: + std::vector children; + + AnyExprV child(int i) const { return children.at(i); } + + ASTExprVararg(ASTNodeType type, SrcLocation loc, std::vector children) + : ASTNodeExpressionBase(type, loc), children(std::move(children)) {} + +public: + int size() const { return static_cast(children.size()); } + bool empty() const { return children.empty(); } }; -struct ASTNodeUnary : ASTNodeBase { +struct ASTStatementUnary : ASTNodeStatementBase { friend class ASTVisitor; friend class ASTReplacer; protected: AnyV child; - ASTNodeUnary(ASTNodeType type, SrcLocation loc, AnyV child) - : ASTNodeBase(type, loc), child(child) {} + AnyExprV child_as_expr() const { return reinterpret_cast(child); } + + ASTStatementUnary(ASTNodeType type, SrcLocation loc, AnyV child) + : ASTNodeStatementBase(type, loc), child(child) {} }; -struct ASTNodeBinary : ASTNodeBase { +struct ASTStatementVararg : ASTNodeStatementBase { friend class ASTVisitor; friend class ASTReplacer; protected: - AnyV lhs; - AnyV rhs; + std::vector children; + + AnyExprV child_as_expr(int i) const { return reinterpret_cast(children.at(i)); } + + ASTStatementVararg(ASTNodeType type, SrcLocation loc, std::vector children) + : ASTNodeStatementBase(type, loc), children(std::move(children)) {} - ASTNodeBinary(ASTNodeType type, SrcLocation loc, AnyV lhs, AnyV rhs) - : ASTNodeBase(type, loc), lhs(lhs), rhs(rhs) {} +public: + int size() const { return static_cast(children.size()); } + bool empty() const { return children.empty(); } }; -struct ASTNodeVararg : ASTNodeBase { +struct ASTOtherLeaf : ASTNodeBase { + friend class ASTVisitor; + friend class ASTReplacer; + +protected: + ASTOtherLeaf(ASTNodeType type, SrcLocation loc) + : ASTNodeBase(type, loc) {} +}; + +struct ASTOtherVararg : ASTNodeBase { friend class ASTVisitor; friend class ASTReplacer; protected: std::vector children; - ASTNodeVararg(ASTNodeType type, SrcLocation loc, std::vector children) + AnyExprV child_as_expr(int i) const { return reinterpret_cast(children.at(i)); } + + ASTOtherVararg(ASTNodeType type, SrcLocation loc, std::vector children) : ASTNodeBase(type, loc), children(std::move(children)) {} public: @@ -214,312 +302,615 @@ struct ASTNodeVararg : ASTNodeBase { bool empty() const { return children.empty(); } }; + +template<> +// ast_identifier is "a name" in AST structure +// it's NOT a standalone expression, it's "implementation details" of other AST vertices +// example: `var x = 5` then "x" is identifier (inside local var declaration) +// example: `global g: int` then "g" is identifier +// example: `someF` is a reference, which contains identifier +// example: `someF` is a reference which contains identifier and generics instantiation +// example: `fun f()` then "f" is identifier, "" is a generics declaration +struct Vertex final : ASTOtherLeaf { + std::string_view name; // empty for underscore + + Vertex(SrcLocation loc, std::string_view name) + : ASTOtherLeaf(ast_identifier, loc) + , name(name) {} +}; + + +// // --------------------------------------------------------- +// expressions +// + template<> -struct Vertex final : ASTNodeLeaf { +// ast_empty_expression is "nothing" in context of expression, it has "unknown" type +// example: `throw 123;` then "throw arg" is empty expression (opposed to `throw (123, arg)`) +struct Vertex final : ASTExprLeaf { explicit Vertex(SrcLocation loc) - : ASTNodeLeaf(ast_empty, loc) {} + : ASTExprLeaf(ast_empty_expression, loc) {} }; + template<> -struct Vertex final : ASTNodeUnary { - AnyV get_expr() const { return child; } +// ast_parenthesized_expression is something surrounded embraced by (parenthesis) +// example: `(1)`, `((f()))` (two nested) +struct Vertex final : ASTExprUnary { + AnyExprV get_expr() const { return child; } - Vertex(SrcLocation loc, AnyV expr) - : ASTNodeUnary(ast_parenthesized_expr, loc, expr) {} + Vertex(SrcLocation loc, AnyExprV expr) + : ASTExprUnary(ast_parenthesized_expression, loc, expr) {} }; template<> -struct Vertex final : ASTNodeVararg { - const std::vector& get_items() const { return children; } - AnyV get_item(int i) const { return children.at(i); } +// ast_tensor is a set of expressions embraced by (parenthesis) +// in most languages, it's called "tuple", but in TVM, "tuple" is a TVM primitive, that's why "tensor" +// example: `(1, 2)`, `(1, (2, 3))` (nested), `()` (empty tensor) +// note, that `(1)` is not a tensor, it's a parenthesized expression +// a tensor of N elements occupies N slots on a stack (opposed to TVM tuple primitive, 1 slot) +struct Vertex final : ASTExprVararg { + const std::vector& get_items() const { return children; } + AnyExprV get_item(int i) const { return child(i); } + + Vertex(SrcLocation loc, std::vector items) + : ASTExprVararg(ast_tensor, loc, std::move(items)) {} +}; - Vertex(SrcLocation loc, std::vector items) - : ASTNodeVararg(ast_tensor, loc, std::move(items)) {} +template<> +// ast_typed_tuple is a set of expressions in [square brackets] +// in TVM, it's a TVM tuple, that occupies 1 slot, but the compiler knows its "typed structure" +// example: `[1, x]`, `[[0]]` (nested) +// typed tuples can be assigned to N variables, like `[one, _, three] = [1,2,3]` +struct Vertex final : ASTExprVararg { + const std::vector& get_items() const { return children; } + AnyExprV get_item(int i) const { return child(i); } + + Vertex(SrcLocation loc, std::vector items) + : ASTExprVararg(ast_typed_tuple, loc, std::move(items)) {} }; template<> -struct Vertex final : ASTNodeVararg { - const std::vector& get_items() const { return children; } - AnyV get_item(int i) const { return children.at(i); } +// ast_reference is "something that references a symbol" +// examples: `x` / `someF` / `someF` +// it's a leaf expression from traversing point of view, but actually, has children (not expressions) +// note, that both `someF()` and `someF()` are function calls, where a callee is just a reference +struct Vertex final : ASTExprLeaf { +private: + V identifier; // its name, `x` / `someF` + V instantiationTs; // not null if ``, otherwise nullptr - Vertex(SrcLocation loc, std::vector items) - : ASTNodeVararg(ast_tensor_square, loc, std::move(items)) {} +public: + const Symbol* sym = nullptr; // filled on resolve or type inferring; points to local / global / function / constant + + auto get_identifier() const { return identifier; } + bool has_instantiationTs() const { return instantiationTs != nullptr; } + auto get_instantiationTs() const { return instantiationTs; } + std::string_view get_name() const { return identifier->name; } + + Vertex* mutate() const { return const_cast(this); } + void assign_sym(const Symbol* sym); + + Vertex(SrcLocation loc, V name_identifier, V instantiationTs) + : ASTExprLeaf(ast_reference, loc) + , identifier(name_identifier), instantiationTs(instantiationTs) {} }; template<> -struct Vertex final : ASTNodeLeaf { - std::string_view name; +// ast_local_var_lhs is one variable inside `var` declaration +// example: `var x = 0;` then "x" is local var lhs +// example: `val (x: int, [y redef], _) = rhs` then "x" and "y" and "_" are +// it's a leaf from expression's point of view, though technically has an "identifier" child +struct Vertex final : ASTExprLeaf { +private: + V identifier; - Vertex(SrcLocation loc, std::string_view name) - : ASTNodeLeaf(ast_identifier, loc), name(name) {} +public: + const LocalVarData* var_ref = nullptr; // filled on resolve identifiers; for `redef` points to declared above; for underscore, name is empty + TypePtr declared_type; // not null for `var x: int = rhs`, otherwise nullptr + bool is_immutable; // declared via 'val', not 'var' + bool marked_as_redef; // var (existing_var redef, new_var: int) = ... + + V get_identifier() const { return identifier; } + std::string_view get_name() const { return identifier->name; } // empty for underscore + + Vertex* mutate() const { return const_cast(this); } + void assign_var_ref(const LocalVarData* var_ref); + void assign_resolved_type(TypePtr declared_type); + + Vertex(SrcLocation loc, V identifier, TypePtr declared_type, bool is_immutable, bool marked_as_redef) + : ASTExprLeaf(ast_local_var_lhs, loc) + , identifier(identifier), declared_type(declared_type), is_immutable(is_immutable), marked_as_redef(marked_as_redef) {} }; template<> -struct Vertex final : ASTNodeLeaf { - std::string_view int_val; +// ast_local_vars_declaration is an expression declaring local variables on the left side of assignment +// examples: see above +// for `var (x, [y])` its expr is "tensor (local var, typed tuple (local var))" +// for assignment `var x = 5`, this node is `var x`, lhs of assignment +struct Vertex final : ASTExprUnary { + AnyExprV get_expr() const { return child; } // ast_local_var_lhs / ast_tensor / ast_typed_tuple + + Vertex(SrcLocation loc, AnyExprV expr) + : ASTExprUnary(ast_local_vars_declaration, loc, expr) {} +}; - Vertex(SrcLocation loc, std::string_view int_val) - : ASTNodeLeaf(ast_int_const, loc), int_val(int_val) {} +template<> +// ast_int_const is an integer literal +// examples: `0` / `0xFF` +// note, that `-1` is unary minus of `1` int const +struct Vertex final : ASTExprLeaf { + td::RefInt256 intval; // parsed value, 255 for "0xFF" + std::string_view orig_str; // original "0xFF"; empty for nodes generated by compiler (e.g. in constant folding) + + Vertex(SrcLocation loc, td::RefInt256 intval, std::string_view orig_str) + : ASTExprLeaf(ast_int_const, loc) + , intval(std::move(intval)) + , orig_str(orig_str) {} }; template<> -struct Vertex final : ASTNodeLeaf { +// ast_string_const is a string literal in double quotes or """ when multiline +// examples: "asdf" / "Ef8zMz..."a / "to_calc_crc32_from"c +// an optional modifier specifies how a string is parsed (probably, like an integer) +// note, that TVM doesn't have strings, it has only slices, so "hello" has type slice +struct Vertex final : ASTExprLeaf { std::string_view str_val; char modifier; + bool is_bitslice() const { + char m = modifier; + return m == 0 || m == 's' || m == 'a'; + } + bool is_intval() const { + char m = modifier; + return m == 'u' || m == 'h' || m == 'H' || m == 'c'; + } + Vertex(SrcLocation loc, std::string_view str_val, char modifier) - : ASTNodeLeaf(ast_string_const, loc), str_val(str_val), modifier(modifier) {} + : ASTExprLeaf(ast_string_const, loc) + , str_val(str_val), modifier(modifier) {} }; template<> -struct Vertex final : ASTNodeLeaf { +// ast_bool_const is either `true` or `false` +struct Vertex final : ASTExprLeaf { bool bool_val; Vertex(SrcLocation loc, bool bool_val) - : ASTNodeLeaf(ast_bool_const, loc), bool_val(bool_val) {} + : ASTExprLeaf(ast_bool_const, loc) + , bool_val(bool_val) {} }; template<> -struct Vertex final : ASTNodeLeaf { +// ast_null_keyword is the `null` literal +// it should be handled with care; for instance, `null` takes special place in the type system +struct Vertex final : ASTExprLeaf { explicit Vertex(SrcLocation loc) - : ASTNodeLeaf(ast_null_keyword, loc) {} + : ASTExprLeaf(ast_null_keyword, loc) {} }; template<> -struct Vertex final : ASTNodeLeaf { - explicit Vertex(SrcLocation loc) - : ASTNodeLeaf(ast_self_keyword, loc) {} +// ast_argument is an element of an argument list of a function/method call +// example: `f(1, x)` has 2 arguments, `t.tupleFirst()` has no arguments (though `t` is passed as `self`) +// example: `f(mutate arg)` has 1 argument with `passed_as_mutate` flag +// (without `mutate` keyword, the entity "argument" could be replaced just by "any expression") +struct Vertex final : ASTExprUnary { + bool passed_as_mutate; + + AnyExprV get_expr() const { return child; } + + Vertex(SrcLocation loc, AnyExprV expr, bool passed_as_mutate) + : ASTExprUnary(ast_argument, loc, expr) + , passed_as_mutate(passed_as_mutate) {} }; template<> -struct Vertex final : ASTNodeUnary { - bool passed_as_mutate; // when called `f(mutate arg)`, not `f(arg)` - - AnyV get_expr() const { return child; } +// ast_argument_list contains N arguments of a function/method call +struct Vertex final : ASTExprVararg { + const std::vector& get_arguments() const { return children; } + auto get_arg(int i) const { return child(i)->as(); } - explicit Vertex(SrcLocation loc, AnyV expr, bool passed_as_mutate) - : ASTNodeUnary(ast_argument, loc, expr), passed_as_mutate(passed_as_mutate) {} + Vertex(SrcLocation loc, std::vector arguments) + : ASTExprVararg(ast_argument_list, loc, std::move(arguments)) {} }; template<> -struct Vertex final : ASTNodeVararg { - const std::vector& get_arguments() const { return children; } - auto get_arg(int i) const { return children.at(i)->as(); } +// ast_dot_access is "object before dot, identifier + optional after dot" +// examples: `tensorVar.0` / `obj.field` / `getObj().method` / `t.tupleFirst` +// from traversing point of view, it's an unary expression: only obj is expression, field name is not +// note, that `obj.method()` is a function call with "dot access `obj.method`" callee +struct Vertex final : ASTExprUnary { +private: + V identifier; // `0` / `field` / `method` + V instantiationTs; // not null if ``, otherwise nullptr + +public: + + typedef const FunctionData* DotTarget; // for `t.tupleAt` target is `tupleAt` global function + DotTarget target = nullptr; // filled at type inferring + + AnyExprV get_obj() const { return child; } + auto get_identifier() const { return identifier; } + bool has_instantiationTs() const { return instantiationTs != nullptr; } + auto get_instantiationTs() const { return instantiationTs; } + std::string_view get_field_name() const { return identifier->name; } - explicit Vertex(SrcLocation loc, std::vector arguments) - : ASTNodeVararg(ast_argument_list, loc, std::move(arguments)) {} + Vertex* mutate() const { return const_cast(this); } + void assign_target(const DotTarget& target); + + Vertex(SrcLocation loc, AnyExprV obj, V identifier, V instantiationTs) + : ASTExprUnary(ast_dot_access, loc, obj) + , identifier(identifier), instantiationTs(instantiationTs) {} }; template<> -struct Vertex final : ASTNodeBinary { - AnyV get_called_f() const { return lhs; } +// ast_function_call is "calling some lhs with parenthesis", lhs is arbitrary expression (callee) +// example: `globalF()` then callee is reference +// example: `globalF()` then callee is reference (with instantiation Ts filled) +// example: `local_var()` then callee is reference (points to local var, filled at resolve identifiers) +// example: `getF()()` then callee is another func call (which type is TypeDataFunCallable) +// example: `obj.method()` then callee is dot access (resolved while type inferring) +struct Vertex final : ASTExprBinary { + const FunctionData* fun_maybe = nullptr; // filled while type inferring for `globalF()` / `obj.f()`; remains nullptr for `local_var()` / `getF()()` + + AnyExprV get_callee() const { return lhs; } + bool is_dot_call() const { return lhs->type == ast_dot_access; } + AnyExprV get_dot_obj() const { return lhs->as()->get_obj(); } auto get_arg_list() const { return rhs->as(); } int get_num_args() const { return rhs->as()->size(); } auto get_arg(int i) const { return rhs->as()->get_arg(i); } - Vertex(SrcLocation loc, AnyV lhs_f, V arguments) - : ASTNodeBinary(ast_function_call, loc, lhs_f, arguments) {} -}; + Vertex* mutate() const { return const_cast(this); } + void assign_fun_ref(const FunctionData* fun_ref); -template<> -struct Vertex final : ASTNodeBinary { - std::string_view method_name; - - AnyV get_obj() const { return lhs; } - auto get_arg_list() const { return rhs->as(); } - - Vertex(SrcLocation loc, std::string_view method_name, AnyV lhs, V arguments) - : ASTNodeBinary(ast_dot_method_call, loc, lhs, arguments), method_name(method_name) {} + Vertex(SrcLocation loc, AnyExprV lhs_f, V arguments) + : ASTExprBinary(ast_function_call, loc, lhs_f, arguments) {} }; template<> -struct Vertex final : ASTNodeUnary { - TypeExpr* declared_type; // may be nullptr - - auto get_identifier() const { return child->as(); } - - Vertex(SrcLocation loc, V name_identifier, TypeExpr* declared_type) - : ASTNodeUnary(ast_global_var_declaration, loc, name_identifier), declared_type(declared_type) {} +// ast_underscore represents `_` symbol used for left side of assignment +// example: `(cs, _) = cs.loadAndReturn()` +// though it's the only correct usage, using _ as rvalue like `var x = _;` is correct from AST point of view +// note, that for declaration `var _ = 1` underscore is a regular local var declared (with empty name) +// but for `_ = 1` (not declaration) it's underscore; it's because `var _:int` is also correct +struct Vertex final : ASTExprLeaf { + explicit Vertex(SrcLocation loc) + : ASTExprLeaf(ast_underscore, loc) {} }; template<> -struct Vertex final : ASTNodeBinary { - TypeExpr* declared_type; // may be nullptr - - auto get_identifier() const { return lhs->as(); } - AnyV get_init_value() const { return rhs; } - - Vertex(SrcLocation loc, V name_identifier, TypeExpr* declared_type, AnyV init_value) - : ASTNodeBinary(ast_constant_declaration, loc, name_identifier, init_value), declared_type(declared_type) {} +// ast_assign represents assignment "lhs = rhs" +// examples: `a = 4` / `var a = 4` / `(cs, b, mode) = rhs` / `f() = g()` +// note, that `a = 4` lhs is ast_reference, `var a = 4` lhs is ast_local_vars_declaration +struct Vertex final : ASTExprBinary { + AnyExprV get_lhs() const { return lhs; } + AnyExprV get_rhs() const { return rhs; } + + explicit Vertex(SrcLocation loc, AnyExprV lhs, AnyExprV rhs) + : ASTExprBinary(ast_assign, loc, lhs, rhs) {} }; template<> -struct Vertex final : ASTNodeLeaf { - explicit Vertex(SrcLocation loc) - : ASTNodeLeaf(ast_underscore, loc) {} +// ast_set_assign represents assignment-and-set operation "lhs = rhs" +// examples: `a += 4` / `b <<= c` +struct Vertex final : ASTExprBinary { + const FunctionData* fun_ref = nullptr; // filled at type inferring, points to `_+_` built-in for += + std::string_view operator_name; // without equal sign, "+" for operator += + TokenType tok; // tok_set_* + + AnyExprV get_lhs() const { return lhs; } + AnyExprV get_rhs() const { return rhs; } + + Vertex* mutate() const { return const_cast(this); } + void assign_fun_ref(const FunctionData* fun_ref); + + Vertex(SrcLocation loc, std::string_view operator_name, TokenType tok, AnyExprV lhs, AnyExprV rhs) + : ASTExprBinary(ast_set_assign, loc, lhs, rhs) + , operator_name(operator_name), tok(tok) {} }; template<> -struct Vertex final : ASTNodeUnary { +// ast_unary_operator is "some operator over one expression" +// examples: `-1` / `~found` +struct Vertex final : ASTExprUnary { + const FunctionData* fun_ref = nullptr; // filled at type inferring, points to some built-in function std::string_view operator_name; TokenType tok; - AnyV get_rhs() const { return child; } + AnyExprV get_rhs() const { return child; } - Vertex(SrcLocation loc, std::string_view operator_name, TokenType tok, AnyV rhs) - : ASTNodeUnary(ast_unary_operator, loc, rhs), operator_name(operator_name), tok(tok) {} + Vertex* mutate() const { return const_cast(this); } + void assign_fun_ref(const FunctionData* fun_ref); + + Vertex(SrcLocation loc, std::string_view operator_name, TokenType tok, AnyExprV rhs) + : ASTExprUnary(ast_unary_operator, loc, rhs) + , operator_name(operator_name), tok(tok) {} }; template<> -struct Vertex final : ASTNodeBinary { +// ast_binary_operator is "some operator over two expressions" +// examples: `a + b` / `x & true` / `(a, b) << g()` +// note, that `a = b` is NOT a binary operator, it's ast_assign, also `a += b`, it's ast_set_assign +struct Vertex final : ASTExprBinary { + const FunctionData* fun_ref = nullptr; // filled at type inferring, points to some built-in function std::string_view operator_name; TokenType tok; - AnyV get_lhs() const { return lhs; } - AnyV get_rhs() const { return rhs; } + AnyExprV get_lhs() const { return lhs; } + AnyExprV get_rhs() const { return rhs; } + + Vertex* mutate() const { return const_cast(this); } + void assign_fun_ref(const FunctionData* fun_ref); - Vertex(SrcLocation loc, std::string_view operator_name, TokenType tok, AnyV lhs, AnyV rhs) - : ASTNodeBinary(ast_binary_operator, loc, lhs, rhs), operator_name(operator_name), tok(tok) {} + Vertex(SrcLocation loc, std::string_view operator_name, TokenType tok, AnyExprV lhs, AnyExprV rhs) + : ASTExprBinary(ast_binary_operator, loc, lhs, rhs) + , operator_name(operator_name), tok(tok) {} }; template<> -struct Vertex final : ASTNodeVararg { - AnyV get_cond() const { return children.at(0); } - AnyV get_when_true() const { return children.at(1); } - AnyV get_when_false() const { return children.at(2); } - - Vertex(SrcLocation loc, AnyV cond, AnyV when_true, AnyV when_false) - : ASTNodeVararg(ast_ternary_operator, loc, {cond, when_true, when_false}) {} +// ast_ternary_operator is a traditional ternary construction +// example: `cond ? a : b` +struct Vertex final : ASTExprVararg { + AnyExprV get_cond() const { return child(0); } + AnyExprV get_when_true() const { return child(1); } + AnyExprV get_when_false() const { return child(2); } + + Vertex(SrcLocation loc, AnyExprV cond, AnyExprV when_true, AnyExprV when_false) + : ASTExprVararg(ast_ternary_operator, loc, {cond, when_true, when_false}) {} }; template<> -struct Vertex : ASTNodeUnary { - AnyV get_return_value() const { return child; } +// ast_cast_as_operator is explicit casting with "as" keyword +// examples: `arg as int` / `null as cell` / `t.tupleAt(2) as slice` +struct Vertex final : ASTExprUnary { + AnyExprV get_expr() const { return child; } + + TypePtr cast_to_type; + + Vertex* mutate() const { return const_cast(this); } + void assign_resolved_type(TypePtr cast_to_type); + + Vertex(SrcLocation loc, AnyExprV expr, TypePtr cast_to_type) + : ASTExprUnary(ast_cast_as_operator, loc, expr) + , cast_to_type(cast_to_type) {} +}; + - Vertex(SrcLocation loc, AnyV child) - : ASTNodeUnary(ast_return_statement, loc, child) {} +// +// --------------------------------------------------------- +// statements +// + + +template<> +// ast_empty_statement is very similar to "empty sequence" but has a special treatment +// example: `;` (just semicolon) +// example: body of `builtin` function is empty statement (not a zero sequence) +struct Vertex final : ASTStatementVararg { + explicit Vertex(SrcLocation loc) + : ASTStatementVararg(ast_empty_statement, loc, {}) {} }; template<> -struct Vertex final : ASTNodeVararg { +// ast_sequence is "some sequence of statements" +// example: function body is a sequence +// example: do while body is a sequence +struct Vertex final : ASTStatementVararg { SrcLocation loc_end; const std::vector& get_items() const { return children; } AnyV get_item(int i) const { return children.at(i); } Vertex(SrcLocation loc, SrcLocation loc_end, std::vector items) - : ASTNodeVararg(ast_sequence, loc, std::move(items)), loc_end(loc_end) {} + : ASTStatementVararg(ast_sequence, loc, std::move(items)) + , loc_end(loc_end) {} }; template<> -struct Vertex final : ASTNodeBinary { - AnyV get_cond() const { return lhs; } - auto get_body() const { return rhs->as(); } - - Vertex(SrcLocation loc, AnyV cond, V body) - : ASTNodeBinary(ast_repeat_statement, loc, cond, body) {} +// ast_return_statement is "return something from a function" +// examples: `return a` / `return any_expr()()` / `return;` +// note, that for `return;` (without a value, meaning "void"), in AST, it's stored as empty expression +struct Vertex : ASTStatementUnary { + AnyExprV get_return_value() const { return child_as_expr(); } + bool has_return_value() const { return child->type != ast_empty_expression; } + + Vertex(SrcLocation loc, AnyExprV child) + : ASTStatementUnary(ast_return_statement, loc, child) {} }; template<> -struct Vertex final : ASTNodeBinary { - AnyV get_cond() const { return lhs; } - auto get_body() const { return rhs->as(); } +// ast_if_statement is a traditional if statement, probably followed by an else branch +// examples: `if (cond) { ... } else { ... }` / `if (cond) { ... }` +// when else branch is missing, it's stored as empty statement +// for "else if", it's just "if statement" inside a sequence of else branch +struct Vertex final : ASTStatementVararg { + bool is_ifnot; // if(!cond), to generate more optimal fift code + + AnyExprV get_cond() const { return child_as_expr(0); } + auto get_if_body() const { return children.at(1)->as(); } + auto get_else_body() const { return children.at(2)->as(); } // always exists (when else omitted, it's empty) - Vertex(SrcLocation loc, AnyV cond, V body) - : ASTNodeBinary(ast_while_statement, loc, cond, body) {} + Vertex(SrcLocation loc, bool is_ifnot, AnyExprV cond, V if_body, V else_body) + : ASTStatementVararg(ast_if_statement, loc, {cond, if_body, else_body}) + , is_ifnot(is_ifnot) {} }; template<> -struct Vertex final : ASTNodeBinary { - auto get_body() const { return lhs->as(); } - AnyV get_cond() const { return rhs; } - - Vertex(SrcLocation loc, V body, AnyV cond) - : ASTNodeBinary(ast_do_while_statement, loc, body, cond) {} +// ast_repeat_statement is "repeat something N times" +// example: `repeat (10) { ... }` +struct Vertex final : ASTStatementVararg { + AnyExprV get_cond() const { return child_as_expr(0); } + auto get_body() const { return children.at(1)->as(); } + + Vertex(SrcLocation loc, AnyExprV cond, V body) + : ASTStatementVararg(ast_repeat_statement, loc, {cond, body}) {} }; template<> -struct Vertex final : ASTNodeBinary { - AnyV get_thrown_code() const { return lhs; } - AnyV get_thrown_arg() const { return rhs; } // may be ast_empty - bool has_thrown_arg() const { return rhs->type != ast_empty; } +// ast_while_statement is a standard "while" loop +// example: `while (x > 0) { ... }` +struct Vertex final : ASTStatementVararg { + AnyExprV get_cond() const { return child_as_expr(0); } + auto get_body() const { return children.at(1)->as(); } + + Vertex(SrcLocation loc, AnyExprV cond, V body) + : ASTStatementVararg(ast_while_statement, loc, {cond, body}) {} +}; - Vertex(SrcLocation loc, AnyV thrown_code, AnyV thrown_arg) - : ASTNodeBinary(ast_throw_statement, loc, thrown_code, thrown_arg) {} +template<> +// ast_do_while_statement is a standard "do while" loop +// example: `do { ... } while (x > 0);` +struct Vertex final : ASTStatementVararg { + auto get_body() const { return children.at(0)->as(); } + AnyExprV get_cond() const { return child_as_expr(1); } + + Vertex(SrcLocation loc, V body, AnyExprV cond) + : ASTStatementVararg(ast_do_while_statement, loc, {body, cond}) {} }; template<> -struct Vertex final : ASTNodeBinary { - AnyV get_cond() const { return lhs; } - AnyV get_thrown_code() const { return rhs; } +// ast_throw_statement is throwing an exception, it accepts excNo and optional arg +// examples: `throw 10` / `throw (ERR_LOW_BALANCE)` / `throw (1001, incomingAddr)` +// when thrown arg is missing, it's stored as empty expression +struct Vertex final : ASTStatementVararg { + AnyExprV get_thrown_code() const { return child_as_expr(0); } + bool has_thrown_arg() const { return child_as_expr(1)->type != ast_empty_expression; } + AnyExprV get_thrown_arg() const { return child_as_expr(1); } + + Vertex(SrcLocation loc, AnyExprV thrown_code, AnyExprV thrown_arg) + : ASTStatementVararg(ast_throw_statement, loc, {thrown_code, thrown_arg}) {} +}; - Vertex(SrcLocation loc, AnyV cond, AnyV thrown_code) - : ASTNodeBinary(ast_assert_statement, loc, cond, thrown_code) {} +template<> +// ast_assert_statement is "assert that cond is true, otherwise throw an exception" +// examples: `assert (balance > 0, ERR_ZERO_BALANCE)` / `assert (balance > 0) throw (ERR_ZERO_BALANCE)` +struct Vertex final : ASTStatementVararg { + AnyExprV get_cond() const { return child_as_expr(0); } + AnyExprV get_thrown_code() const { return child_as_expr(1); } + + Vertex(SrcLocation loc, AnyExprV cond, AnyExprV thrown_code) + : ASTStatementVararg(ast_assert_statement, loc, {cond, thrown_code}) {} }; template<> -struct Vertex final : ASTNodeVararg { +// ast_try_catch_statement is a standard try catch (finally block doesn't exist) +// example: `try { ... } catch (excNo) { ... }` +// there are two formal "arguments" of catch: excNo and arg, but both can be omitted +// when omitted, they are stored as underscores, so len of a catch tensor is always 2 +struct Vertex final : ASTStatementVararg { auto get_try_body() const { return children.at(0)->as(); } auto get_catch_expr() const { return children.at(1)->as(); } // (excNo, arg), always len 2 auto get_catch_body() const { return children.at(2)->as(); } Vertex(SrcLocation loc, V try_body, V catch_expr, V catch_body) - : ASTNodeVararg(ast_try_catch_statement, loc, {try_body, catch_expr, catch_body}) {} + : ASTStatementVararg(ast_try_catch_statement, loc, {try_body, catch_expr, catch_body}) {} }; template<> -struct Vertex final : ASTNodeVararg { - bool is_ifnot; // if(!cond), to generate more optimal fift code +// ast_asm_body is a body of `asm` function — a set of strings, and optionally stack order manipulations +// example: `fun skipMessageOp... asm "32 PUSHINT" "SDSKIPFIRST";` +// user can specify "arg order"; example: `fun store(self: builder, op: int) asm (op self)` then [1, 0] +// user can specify "ret order"; example: `fun modDiv... asm(-> 1 0) "DIVMOD";` then [1, 0] +struct Vertex final : ASTStatementVararg { + std::vector arg_order; + std::vector ret_order; - AnyV get_cond() const { return children.at(0); } - auto get_if_body() const { return children.at(1)->as(); } - auto get_else_body() const { return children.at(2)->as(); } // always exists (when else omitted, it's empty) + const std::vector& get_asm_commands() const { return children; } // ast_string_const[] - Vertex(SrcLocation loc, bool is_ifnot, AnyV cond, V if_body, V else_body) - : ASTNodeVararg(ast_if_statement, loc, {cond, if_body, else_body}), is_ifnot(is_ifnot) {} + Vertex(SrcLocation loc, std::vector arg_order, std::vector ret_order, std::vector asm_commands) + : ASTStatementVararg(ast_asm_body, loc, std::move(asm_commands)) + , arg_order(std::move(arg_order)), ret_order(std::move(ret_order)) {} }; + +// +// --------------------------------------------------------- +// other +// + + template<> -struct Vertex final : ASTNodeLeaf { - TypeExpr* created_type; // used to keep same pointer, since TypeExpr::new_var(i) always allocates +// ast_genericsT_item is generics T at declaration +// example: `fun f` has a list of 2 generic Ts +struct Vertex final : ASTOtherLeaf { std::string_view nameT; - Vertex(SrcLocation loc, TypeExpr* created_type, std::string_view nameT) - : ASTNodeLeaf(ast_genericsT_item, loc), created_type(created_type), nameT(nameT) {} + Vertex(SrcLocation loc, std::string_view nameT) + : ASTOtherLeaf(ast_genericsT_item, loc) + , nameT(nameT) {} }; template<> -struct Vertex final : ASTNodeVararg { +// ast_genericsT_list is a container for generics T at declaration +// example: see above +struct Vertex final : ASTOtherVararg { std::vector get_items() const { return children; } auto get_item(int i) const { return children.at(i)->as(); } Vertex(SrcLocation loc, std::vector genericsT_items) - : ASTNodeVararg(ast_genericsT_list, loc, std::move(genericsT_items)) {} + : ASTOtherVararg(ast_genericsT_list, loc, std::move(genericsT_items)) {} int lookup_idx(std::string_view nameT) const; }; + template<> -struct Vertex final : ASTNodeUnary { - TypeExpr* param_type; - bool declared_as_mutate; // declared as `mutate param_name` +// ast_instantiationT_item is manual substitution of generic T used in code, mostly for func calls +// examples: `g()` / `t.tupleFirst()` / `f<(int, slice), builder>()` +struct Vertex final : ASTOtherLeaf { + TypePtr substituted_type; - auto get_identifier() const { return child->as(); } // for underscore, name is empty - bool is_underscore() const { return child->as()->name.empty(); } + Vertex* mutate() const { return const_cast(this); } + void assign_resolved_type(TypePtr substituted_type); - Vertex(SrcLocation loc, V name_identifier, TypeExpr* param_type, bool declared_as_mutate) - : ASTNodeUnary(ast_parameter, loc, name_identifier), param_type(param_type), declared_as_mutate(declared_as_mutate) {} + Vertex(SrcLocation loc, TypePtr substituted_type) + : ASTOtherLeaf(ast_instantiationT_item, loc) + , substituted_type(substituted_type) {} }; template<> -struct Vertex final : ASTNodeVararg { +// ast_instantiationT_list is a container for generic T substitutions used in code +// examples: see above +struct Vertex final : ASTOtherVararg { + std::vector get_items() const { return children; } + auto get_item(int i) const { return children.at(i)->as(); } + + Vertex(SrcLocation loc, std::vector instantiationTs) + : ASTOtherVararg(ast_instantiationT_list, loc, std::move(instantiationTs)) {} +}; + +template<> +// ast_parameter is a parameter of a function in its declaration +// example: `fun f(a: int, mutate b: slice)` has 2 parameters +struct Vertex final : ASTOtherLeaf { + const LocalVarData* param_ref = nullptr; // filled on resolve identifiers + std::string_view param_name; + TypePtr declared_type; + bool declared_as_mutate; // declared as `mutate param_name` + + bool is_underscore() const { return param_name.empty(); } + + Vertex* mutate() const { return const_cast(this); } + void assign_param_ref(const LocalVarData* param_ref); + void assign_resolved_type(TypePtr declared_type); + + Vertex(SrcLocation loc, std::string_view param_name, TypePtr declared_type, bool declared_as_mutate) + : ASTOtherLeaf(ast_parameter, loc) + , param_name(param_name), declared_type(declared_type), declared_as_mutate(declared_as_mutate) {} +}; + +template<> +// ast_parameter_list is a container of parameters +// example: see above +struct Vertex final : ASTOtherVararg { const std::vector& get_params() const { return children; } auto get_param(int i) const { return children.at(i)->as(); } Vertex(SrcLocation loc, std::vector params) - : ASTNodeVararg(ast_parameter_list, loc, std::move(params)) {} + : ASTOtherVararg(ast_parameter_list, loc, std::move(params)) {} int lookup_idx(std::string_view param_name) const; int get_mutate_params_count() const; @@ -527,106 +918,132 @@ struct Vertex final : ASTNodeVararg { }; template<> -struct Vertex final : ASTNodeVararg { - std::vector arg_order; - std::vector ret_order; - - const std::vector& get_asm_commands() const { return children; } // ast_string_const[] - - Vertex(SrcLocation loc, std::vector arg_order, std::vector ret_order, std::vector asm_commands) - : ASTNodeVararg(ast_asm_body, loc, std::move(asm_commands)), arg_order(std::move(arg_order)), ret_order(std::move(ret_order)) {} -}; - -template<> -struct Vertex final : ASTNodeUnary { +// ast_annotation is @annotation above a declaration +// example: `@pure fun ...` +struct Vertex final : ASTOtherVararg { AnnotationKind kind; - auto get_arg() const { return child->as(); } + auto get_arg() const { return children.at(0)->as(); } static AnnotationKind parse_kind(std::string_view name); Vertex(SrcLocation loc, AnnotationKind kind, V arg_probably_empty) - : ASTNodeUnary(ast_annotation, loc, arg_probably_empty), kind(kind) {} + : ASTOtherVararg(ast_annotation, loc, {arg_probably_empty}) + , kind(kind) {} }; template<> -struct Vertex final : ASTNodeUnary { - TypeExpr* declared_type; - bool is_immutable; // declared via 'val', not 'var' - bool marked_as_redef; // var (existing_var redef, new_var: int) = ... +// ast_function_declaration is declaring a function/method +// methods are still global functions, just accepting "self" first parameter +// example: `fun f() { ... }` +// functions can be generic, `fun f(params) { ... }` +// their body is either sequence (regular code function), or `asm`, or `builtin` +struct Vertex final : ASTOtherVararg { + auto get_identifier() const { return children.at(0)->as(); } + int get_num_params() const { return children.at(1)->as()->size(); } + auto get_param_list() const { return children.at(1)->as(); } + auto get_param(int i) const { return children.at(1)->as()->get_param(i); } + AnyV get_body() const { return children.at(2); } // ast_sequence / ast_asm_body + + const FunctionData* fun_ref = nullptr; // filled after register + TypePtr declared_return_type; // filled at ast parsing; if unspecified (nullptr), means "auto infer" + V genericsT_list; // for non-generics it's nullptr + td::RefInt256 method_id; // specified via @method_id annotation + int flags; // from enum in FunctionData + + bool is_asm_function() const { return children.at(2)->type == ast_asm_body; } + bool is_code_function() const { return children.at(2)->type == ast_sequence; } + bool is_builtin_function() const { return children.at(2)->type == ast_empty_statement; } - AnyV get_identifier() const { return child; } // ast_identifier / ast_underscore + Vertex* mutate() const { return const_cast(this); } + void assign_fun_ref(const FunctionData* fun_ref); + void assign_resolved_type(TypePtr declared_return_type); - Vertex(SrcLocation loc, AnyV name_identifier, TypeExpr* declared_type, bool is_immutable, bool marked_as_redef) - : ASTNodeUnary(ast_local_var, loc, name_identifier), declared_type(declared_type), is_immutable(is_immutable), marked_as_redef(marked_as_redef) {} + Vertex(SrcLocation loc, V name_identifier, V parameters, AnyV body, TypePtr declared_return_type, V genericsT_list, td::RefInt256 method_id, int flags) + : ASTOtherVararg(ast_function_declaration, loc, {name_identifier, parameters, body}) + , declared_return_type(declared_return_type), genericsT_list(genericsT_list), method_id(std::move(method_id)), flags(flags) {} }; template<> -struct Vertex final : ASTNodeBinary { - AnyV get_lhs() const { return lhs; } // ast_local_var / ast_tensor / ast_tensor_square - AnyV get_assigned_val() const { return rhs; } +// ast_global_var_declaration is declaring a global var, outside a function +// example: `global g: int;` +// note, that globals don't have default values, since there is no single "entrypoint" for a contract +struct Vertex final : ASTOtherVararg { + const GlobalVarData* var_ref = nullptr; // filled after register + TypePtr declared_type; // filled always, typing globals is mandatory + + auto get_identifier() const { return children.at(0)->as(); } - Vertex(SrcLocation loc, AnyV lhs, AnyV assigned_val) - : ASTNodeBinary(ast_local_vars_declaration, loc, lhs, assigned_val) {} + Vertex* mutate() const { return const_cast(this); } + void assign_var_ref(const GlobalVarData* var_ref); + void assign_resolved_type(TypePtr declared_type); + + Vertex(SrcLocation loc, V name_identifier, TypePtr declared_type) + : ASTOtherVararg(ast_global_var_declaration, loc, {name_identifier}) + , declared_type(declared_type) {} }; template<> -struct Vertex final : ASTNodeVararg { - auto get_identifier() const { return children.at(0)->as(); } - int get_num_params() const { return children.at(1)->as()->size(); } - auto get_param_list() const { return children.at(1)->as(); } - auto get_param(int i) const { return children.at(1)->as()->get_param(i); } - AnyV get_body() const { return children.at(2); } // ast_sequence / ast_asm_body +// ast_constant_declaration is declaring a global constant, outside a function +// example: `const op = 0x123;` +struct Vertex final : ASTOtherVararg { + const GlobalConstData* const_ref = nullptr; // filled after register + TypePtr declared_type; // not null for `const op: int = ...` - TypeExpr* ret_type = nullptr; - V genericsT_list = nullptr; - bool is_entrypoint = false; - bool marked_as_pure = false; - bool marked_as_builtin = false; - bool marked_as_get_method = false; - bool marked_as_inline = false; - bool marked_as_inline_ref = false; - bool accepts_self = false; - bool returns_self = false; - V method_id = nullptr; + auto get_identifier() const { return children.at(0)->as(); } + AnyExprV get_init_value() const { return child_as_expr(1); } - bool is_asm_function() const { return children.at(2)->type == ast_asm_body; } + Vertex* mutate() const { return const_cast(this); } + void assign_const_ref(const GlobalConstData* const_ref); + void assign_resolved_type(TypePtr declared_type); - Vertex(SrcLocation loc, V name_identifier, V parameters, AnyV body) - : ASTNodeVararg(ast_function_declaration, loc, {name_identifier, parameters, body}) {} + Vertex(SrcLocation loc, V name_identifier, TypePtr declared_type, AnyExprV init_value) + : ASTOtherVararg(ast_constant_declaration, loc, {name_identifier, init_value}) + , declared_type(declared_type) {} }; template<> -struct Vertex final : ASTNodeLeaf { - TokenType cmp_tok; +// ast_tolk_required_version is a preamble fixating compiler's version at the top of the file +// example: `tolk 0.6` +// when compiler version mismatches, it means, that another compiler was earlier for that sources, a warning is emitted +struct Vertex final : ASTOtherLeaf { std::string_view semver; - Vertex(SrcLocation loc, TokenType cmp_tok, std::string_view semver) - : ASTNodeLeaf(ast_tolk_required_version, loc), cmp_tok(cmp_tok), semver(semver) {} + Vertex(SrcLocation loc, std::string_view semver) + : ASTOtherLeaf(ast_tolk_required_version, loc) + , semver(semver) {} }; template<> -struct Vertex final : ASTNodeUnary { - const SrcFile* file = nullptr; // assigned after includes have been resolved +// ast_import_directive is an import at the top of the file +// examples: `import "another.tolk"` / `import "@stdlib/tvm-dicts"` +struct Vertex final : ASTOtherVararg { + const SrcFile* file = nullptr; // assigned after imports have been resolved, just after parsing a file to ast - auto get_file_leaf() const { return child->as(); } + auto get_file_leaf() const { return children.at(0)->as(); } - std::string get_file_name() const { return static_cast(child->as()->str_val); } + std::string get_file_name() const { return static_cast(children.at(0)->as()->str_val); } - void mutate_set_src_file(const SrcFile* file) const; + Vertex* mutate() const { return const_cast(this); } + void assign_src_file(const SrcFile* file); Vertex(SrcLocation loc, V file_name) - : ASTNodeUnary(ast_import_statement, loc, file_name) {} + : ASTOtherVararg(ast_import_directive, loc, {file_name}) {} }; template<> -struct Vertex final : ASTNodeVararg { +// ast_tolk_file represents a whole parsed input .tolk file +// with functions, constants, etc. +// particularly, it contains imports that lead to loading other files +// a whole program consists of multiple parsed files, each of them has a parsed ast tree (stdlib is also parsed) +struct Vertex final : ASTOtherVararg { const SrcFile* const file; const std::vector& get_toplevel_declarations() const { return children; } Vertex(const SrcFile* file, std::vector toplevel_declarations) - : ASTNodeVararg(ast_tolk_file, SrcLocation(file), std::move(toplevel_declarations)), file(file) {} + : ASTOtherVararg(ast_tolk_file, SrcLocation(file), std::move(toplevel_declarations)) + , file(file) {} }; } // namespace tolk diff --git a/tolk/builtins.cpp b/tolk/builtins.cpp index d18cfa644..d704ec4d3 100644 --- a/tolk/builtins.cpp +++ b/tolk/builtins.cpp @@ -16,86 +16,57 @@ */ #include "tolk.h" #include "compiler-state.h" +#include "type-system.h" +#include "generics-helpers.h" namespace tolk { using namespace std::literals::string_literals; -/* - * - * SYMBOL VALUES - * - */ - -SymDef* define_builtin_func_impl(const std::string& name, SymValAsmFunc* func_val) { - sym_idx_t name_idx = G.symbols.lookup_add(name); - SymDef* def = define_global_symbol(name_idx); - tolk_assert(!def->value); - - def->value = func_val; -#ifdef TOLK_DEBUG - def->value->sym_name = name; -#endif - return def; -} - -// given func_type = `(slice, int) -> slice` and func flags, create SymDef for parameters +// given func_type = `(slice, int) -> slice` and func flags, create SymLocalVarOrParameter // currently (see at the bottom) parameters of built-in functions are unnamed: // built-in functions are created using a resulting type -static std::vector define_builtin_parameters(const TypeExpr* func_type, int func_flags) { +static std::vector define_builtin_parameters(const std::vector& params_types, int func_flags) { // `loadInt()`, `storeInt()`: they accept `self` and mutate it; no other options available in built-ins for now - bool is_mutate_self = func_flags & SymValFunc::flagHasMutateParams; - // func_type a map (params_type -> ret_type), probably surrounded by forall (internal representation of ) - TypeExpr* params_type = func_type->constr == TypeExpr::te_ForAll ? func_type->args[0]->args[0] : func_type->args[0]; - std::vector parameters; - - if (params_type->constr == TypeExpr::te_Tensor) { // multiple parameters: it's a tensor - parameters.reserve(params_type->args.size()); - for (int i = 0; i < static_cast(params_type->args.size()); ++i) { - SymDef* sym_def = define_parameter(i, {}); - SymValVariable* sym_val = new SymValVariable(i, params_type->args[i]); - if (i == 0 && is_mutate_self) { - sym_val->flags |= SymValVariable::flagMutateParameter; - } - sym_def->value = sym_val; - parameters.emplace_back(sym_def); - } - } else { // single parameter - SymDef* sym_def = define_parameter(0, {}); - SymValVariable* sym_val = new SymValVariable(0, params_type); - if (is_mutate_self) { - sym_val->flags |= SymValVariable::flagMutateParameter; - } - sym_def->value = sym_val; - parameters.emplace_back(sym_def); + bool is_mutate_self = func_flags & FunctionData::flagHasMutateParams; + std::vector parameters; + parameters.reserve(params_types.size()); + + for (int i = 0; i < static_cast(params_types.size()); ++i) { + LocalVarData p_sym("", {}, params_types[i], (i == 0 && is_mutate_self) * LocalVarData::flagMutateParameter, i); + parameters.push_back(std::move(p_sym)); } return parameters; } -static SymDef* define_builtin_func(const std::string& name, TypeExpr* func_type, const simple_compile_func_t& func, int flags) { - return define_builtin_func_impl(name, new SymValAsmFunc(define_builtin_parameters(func_type, flags), func_type, func, flags | SymValFunc::flagBuiltinFunction)); +static void define_builtin_func(const std::string& name, const std::vector& params_types, TypePtr return_type, const GenericsDeclaration* genericTs, const simple_compile_func_t& func, int flags) { + auto* f_sym = new FunctionData(name, {}, return_type, define_builtin_parameters(params_types, flags), flags, genericTs, nullptr, new FunctionBodyBuiltin(func), nullptr); + G.symtable.add_function(f_sym); } -static SymDef* define_builtin_func(const std::string& name, TypeExpr* func_type, const AsmOp& macro, int flags) { - return define_builtin_func_impl(name, new SymValAsmFunc(define_builtin_parameters(func_type, flags), func_type, make_simple_compile(macro), flags | SymValFunc::flagBuiltinFunction)); +static void define_builtin_func(const std::string& name, const std::vector& params_types, TypePtr return_type, const GenericsDeclaration* genericTs, const AsmOp& macro, int flags) { + auto* f_sym = new FunctionData(name, {}, return_type, define_builtin_parameters(params_types, flags), flags, genericTs, nullptr, new FunctionBodyBuiltin(make_simple_compile(macro)), nullptr); + G.symtable.add_function(f_sym); } -static SymDef* define_builtin_func(const std::string& name, TypeExpr* func_type, const simple_compile_func_t& func, int flags, - std::initializer_list arg_order, std::initializer_list ret_order) { - return define_builtin_func_impl(name, new SymValAsmFunc(define_builtin_parameters(func_type, flags), func_type, func, flags | SymValFunc::flagBuiltinFunction, arg_order, ret_order)); +static void define_builtin_func(const std::string& name, const std::vector& params_types, TypePtr return_type, const GenericsDeclaration* genericTs, const simple_compile_func_t& func, int flags, + std::initializer_list arg_order, std::initializer_list ret_order) { + auto* f_sym = new FunctionData(name, {}, return_type, define_builtin_parameters(params_types, flags), flags, genericTs, nullptr, new FunctionBodyBuiltin(func), nullptr); + f_sym->arg_order = arg_order; + f_sym->ret_order = ret_order; + G.symtable.add_function(f_sym); } -bool SymValAsmFunc::compile(AsmOpList& dest, std::vector& out, std::vector& in, - SrcLocation where) const { - if (simple_compile) { - return dest.append(simple_compile(out, in, where)); - } else if (ext_compile) { - return ext_compile(dest, out, in); - } else { - return false; - } +void FunctionBodyBuiltin::compile(AsmOpList& dest, std::vector& out, std::vector& in, + SrcLocation where) const { + dest.append(simple_compile(out, in, where)); } +void FunctionBodyAsm::compile(AsmOpList& dest) const { + dest.append(ops); +} + + /* * * DEFINE BUILT-IN FUNCTIONS @@ -504,7 +475,7 @@ AsmOp compile_unary_plus(std::vector& res, std::vector& args return AsmOp::Nop(); } -AsmOp compile_logical_not(std::vector& res, std::vector& args, SrcLocation where) { +AsmOp compile_logical_not(std::vector& res, std::vector& args, SrcLocation where, bool for_int_arg) { tolk_assert(res.size() == 1 && args.size() == 1); VarDescr &r = res[0], &x = args[0]; if (x.is_int_const()) { @@ -513,7 +484,9 @@ AsmOp compile_logical_not(std::vector& res, std::vector& arg return push_const(r.int_const); } r.val = VarDescr::ValBool; - return exec_op("0 EQINT", 1); + // for integers, `!var` is `var != 0` + // for booleans, `!var` can be shortened to `~var` (works the same for 0/-1 but consumes less) + return for_int_arg ? exec_op("0 EQINT", 1) : exec_op("NOT", 1); } AsmOp compile_bitwise_and(std::vector& res, std::vector& args, SrcLocation where) { @@ -1076,7 +1049,7 @@ AsmOp compile_fetch_slice(std::vector& res, std::vector& arg return exec_op(fetch ? "LDSLICEX" : "PLDSLICEX", 2, 1 + (unsigned)fetch); } -// fun at(t: tuple, index: int): X asm "INDEXVAR"; +// fun tupleAt(t: tuple, index: int): X asm "INDEXVAR"; AsmOp compile_tuple_at(std::vector& res, std::vector& args, SrcLocation) { tolk_assert(args.size() == 2 && res.size() == 1); auto& y = args[1]; @@ -1087,7 +1060,7 @@ AsmOp compile_tuple_at(std::vector& res, std::vector& args, return exec_op("INDEXVAR", 2, 1); } -// fun __isNull(X arg): int +// fun __isNull(X arg): bool AsmOp compile_is_null(std::vector& res, std::vector& args, SrcLocation) { tolk_assert(args.size() == 1 && res.size() == 1); res[0].val = VarDescr::ValBool; @@ -1098,143 +1071,188 @@ AsmOp compile_is_null(std::vector& res, std::vector& args, S void define_builtins() { using namespace std::placeholders; - TypeExpr* Unit = TypeExpr::new_unit(); - TypeExpr* Int = TypeExpr::new_atomic(TypeExpr::_Int); - TypeExpr* Slice = TypeExpr::new_atomic(TypeExpr::_Slice); - TypeExpr* Builder = TypeExpr::new_atomic(TypeExpr::_Builder); - TypeExpr* Tuple = TypeExpr::new_atomic(TypeExpr::_Tuple); - TypeExpr* Int2 = TypeExpr::new_tensor({Int, Int}); - TypeExpr* Int3 = TypeExpr::new_tensor({Int, Int, Int}); - TypeExpr* TupleInt = TypeExpr::new_tensor({Tuple, Int}); - TypeExpr* SliceInt = TypeExpr::new_tensor({Slice, Int}); - TypeExpr* X = TypeExpr::new_var(0); - TypeExpr* arith_bin_op = TypeExpr::new_map(Int2, Int); - TypeExpr* arith_un_op = TypeExpr::new_map(Int, Int); - TypeExpr* impure_un_op = TypeExpr::new_map(Int, Unit); - TypeExpr* fetch_int_op_mutate = TypeExpr::new_map(SliceInt, SliceInt); - TypeExpr* prefetch_int_op = TypeExpr::new_map(SliceInt, Int); - TypeExpr* store_int_mutate = TypeExpr::new_map(TypeExpr::new_tensor({Builder, Int, Int}), TypeExpr::new_tensor({Builder, Unit})); - TypeExpr* fetch_slice_op_mutate = TypeExpr::new_map(SliceInt, TypeExpr::new_tensor({Slice, Slice})); - TypeExpr* prefetch_slice_op = TypeExpr::new_map(SliceInt, Slice); - TypeExpr* throw_arg_op = TypeExpr::new_forall({X}, TypeExpr::new_map(TypeExpr::new_tensor({X, Int}), Unit)); - - define_builtin_func("_+_", arith_bin_op, compile_add, - SymValFunc::flagMarkedAsPure); - define_builtin_func("_-_", arith_bin_op, compile_sub, - SymValFunc::flagMarkedAsPure); - define_builtin_func("-_", arith_un_op, compile_unary_minus, - SymValFunc::flagMarkedAsPure); - define_builtin_func("+_", arith_un_op, compile_unary_plus, - SymValFunc::flagMarkedAsPure); - define_builtin_func("_*_", arith_bin_op, compile_mul, - SymValFunc::flagMarkedAsPure); - define_builtin_func("_/_", arith_bin_op, std::bind(compile_div, _1, _2, _3, -1), - SymValFunc::flagMarkedAsPure); - define_builtin_func("_~/_", arith_bin_op, std::bind(compile_div, _1, _2, _3, 0), - SymValFunc::flagMarkedAsPure); - define_builtin_func("_^/_", arith_bin_op, std::bind(compile_div, _1, _2, _3, 1), - SymValFunc::flagMarkedAsPure); - define_builtin_func("_%_", arith_bin_op, std::bind(compile_mod, _1, _2, _3, -1), - SymValFunc::flagMarkedAsPure); - define_builtin_func("_<<_", arith_bin_op, compile_lshift, - SymValFunc::flagMarkedAsPure); - define_builtin_func("_>>_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, -1), - SymValFunc::flagMarkedAsPure); - define_builtin_func("_~>>_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, 0), - SymValFunc::flagMarkedAsPure); - define_builtin_func("_^>>_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, 1), - SymValFunc::flagMarkedAsPure); - define_builtin_func("!_", arith_un_op, compile_logical_not, - SymValFunc::flagMarkedAsPure); - define_builtin_func("~_", arith_un_op, compile_bitwise_not, - SymValFunc::flagMarkedAsPure); - define_builtin_func("_&_", arith_bin_op, compile_bitwise_and, - SymValFunc::flagMarkedAsPure); - define_builtin_func("_|_", arith_bin_op, compile_bitwise_or, - SymValFunc::flagMarkedAsPure); - define_builtin_func("_^_", arith_bin_op, compile_bitwise_xor, - SymValFunc::flagMarkedAsPure); - define_builtin_func("^_+=_", arith_bin_op, compile_add, - SymValFunc::flagMarkedAsPure); - define_builtin_func("^_-=_", arith_bin_op, compile_sub, - SymValFunc::flagMarkedAsPure); - define_builtin_func("^_*=_", arith_bin_op, compile_mul, - SymValFunc::flagMarkedAsPure); - define_builtin_func("^_/=_", arith_bin_op, std::bind(compile_div, _1, _2, _3, -1), - SymValFunc::flagMarkedAsPure); - define_builtin_func("^_%=_", arith_bin_op, std::bind(compile_mod, _1, _2, _3, -1), - SymValFunc::flagMarkedAsPure); - define_builtin_func("^_<<=_", arith_bin_op, compile_lshift, - SymValFunc::flagMarkedAsPure); - define_builtin_func("^_>>=_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, -1), - SymValFunc::flagMarkedAsPure); - define_builtin_func("^_&=_", arith_bin_op, compile_bitwise_and, - SymValFunc::flagMarkedAsPure); - define_builtin_func("^_|=_", arith_bin_op, compile_bitwise_or, - SymValFunc::flagMarkedAsPure); - define_builtin_func("^_^=_", arith_bin_op, compile_bitwise_xor, - SymValFunc::flagMarkedAsPure); - define_builtin_func("_==_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 2), - SymValFunc::flagMarkedAsPure); - define_builtin_func("_!=_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 5), - SymValFunc::flagMarkedAsPure); - define_builtin_func("_<_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 4), - SymValFunc::flagMarkedAsPure); - define_builtin_func("_>_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 1), - SymValFunc::flagMarkedAsPure); - define_builtin_func("_<=_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 6), - SymValFunc::flagMarkedAsPure); - define_builtin_func("_>=_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 3), - SymValFunc::flagMarkedAsPure); - define_builtin_func("_<=>_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 7), - SymValFunc::flagMarkedAsPure); - define_builtin_func("mulDivFloor", TypeExpr::new_map(Int3, Int), std::bind(compile_muldiv, _1, _2, _3, -1), - SymValFunc::flagMarkedAsPure); - define_builtin_func("mulDivRound", TypeExpr::new_map(Int3, Int), std::bind(compile_muldiv, _1, _2, _3, 0), - SymValFunc::flagMarkedAsPure); - define_builtin_func("mulDivCeil", TypeExpr::new_map(Int3, Int), std::bind(compile_muldiv, _1, _2, _3, 1), - SymValFunc::flagMarkedAsPure); - define_builtin_func("mulDivMod", TypeExpr::new_map(Int3, Int2), AsmOp::Custom("MULDIVMOD", 3, 2), - SymValFunc::flagMarkedAsPure); - define_builtin_func("__true", TypeExpr::new_map(TypeExpr::new_unit(), Int), /* AsmOp::Const("TRUE") */ std::bind(compile_bool_const, _1, _2, true), - SymValFunc::flagMarkedAsPure); - define_builtin_func("__false", TypeExpr::new_map(TypeExpr::new_unit(), Int), /* AsmOp::Const("FALSE") */ std::bind(compile_bool_const, _1, _2, false), - SymValFunc::flagMarkedAsPure); - define_builtin_func("__null", TypeExpr::new_forall({X}, TypeExpr::new_map(TypeExpr::new_unit(), X)), AsmOp::Const("PUSHNULL"), - SymValFunc::flagMarkedAsPure); - define_builtin_func("__isNull", TypeExpr::new_forall({X}, TypeExpr::new_map(X, Int)), compile_is_null, - SymValFunc::flagMarkedAsPure); - define_builtin_func("__throw", impure_un_op, compile_throw, + TypePtr Unit = TypeDataVoid::create(); + TypePtr Int = TypeDataInt::create(); + TypePtr Bool = TypeDataBool::create(); + TypePtr Slice = TypeDataSlice::create(); + TypePtr Builder = TypeDataBuilder::create(); + TypePtr Tuple = TypeDataTuple::create(); + + std::vector itemsT; + itemsT.emplace_back("T"); + TypePtr typeT = TypeDataGenericT::create("T"); + const GenericsDeclaration* declGenericT = new GenericsDeclaration(std::move(itemsT)); + + std::vector ParamsInt1 = {Int}; + std::vector ParamsInt2 = {Int, Int}; + std::vector ParamsInt3 = {Int, Int, Int}; + std::vector ParamsSliceInt = {Slice, Int}; + + // builtin operators + // they are internally stored as functions, because at IR level, there is no difference + // between calling `userAdd(a,b)` and `_+_(a,b)` + // since they are registered in a global symtable, technically, they can even be referenced from Tolk code, + // though it's a "hidden feature" and won't work well for overloads (`==` for int and bool, for example) + + // unary operators + define_builtin_func("-_", ParamsInt1, Int, nullptr, + compile_unary_minus, + FunctionData::flagMarkedAsPure); + define_builtin_func("+_", ParamsInt1, Int, nullptr, + compile_unary_plus, + FunctionData::flagMarkedAsPure); + define_builtin_func("!_", ParamsInt1, Bool, nullptr, + std::bind(compile_logical_not, _1, _2, _3, true), + FunctionData::flagMarkedAsPure); + define_builtin_func("!b_", {Bool}, Bool, nullptr, // "overloaded" separate version for bool + std::bind(compile_logical_not, _1, _2, _3, false), + FunctionData::flagMarkedAsPure); + define_builtin_func("~_", ParamsInt1, Int, nullptr, + compile_bitwise_not, + FunctionData::flagMarkedAsPure); + + // binary operators + define_builtin_func("_+_", ParamsInt2, Int, nullptr, + compile_add, + FunctionData::flagMarkedAsPure); + define_builtin_func("_-_", ParamsInt2, Int, nullptr, + compile_sub, + FunctionData::flagMarkedAsPure); + define_builtin_func("_*_", ParamsInt2, Int, nullptr, + compile_mul, + FunctionData::flagMarkedAsPure); + define_builtin_func("_/_", ParamsInt2, Int, nullptr, + std::bind(compile_div, _1, _2, _3, -1), + FunctionData::flagMarkedAsPure); + define_builtin_func("_~/_", ParamsInt2, Int, nullptr, + std::bind(compile_div, _1, _2, _3, 0), + FunctionData::flagMarkedAsPure); + define_builtin_func("_^/_", ParamsInt2, Int, nullptr, + std::bind(compile_div, _1, _2, _3, 1), + FunctionData::flagMarkedAsPure); + define_builtin_func("_%_", ParamsInt2, Int, nullptr, + std::bind(compile_mod, _1, _2, _3, -1), + FunctionData::flagMarkedAsPure); + define_builtin_func("_<<_", ParamsInt2, Int, nullptr, + compile_lshift, + FunctionData::flagMarkedAsPure); + define_builtin_func("_>>_", ParamsInt2, Int, nullptr, + std::bind(compile_rshift, _1, _2, _3, -1), + FunctionData::flagMarkedAsPure); + define_builtin_func("_~>>_", ParamsInt2, Int, nullptr, + std::bind(compile_rshift, _1, _2, _3, 0), + FunctionData::flagMarkedAsPure); + define_builtin_func("_^>>_", ParamsInt2, Int, nullptr, + std::bind(compile_rshift, _1, _2, _3, 1), + FunctionData::flagMarkedAsPure); + define_builtin_func("_&_", ParamsInt2, Int, nullptr, // also works for bool + compile_bitwise_and, + FunctionData::flagMarkedAsPure); + define_builtin_func("_|_", ParamsInt2, Int, nullptr, // also works for bool + compile_bitwise_or, + FunctionData::flagMarkedAsPure); + define_builtin_func("_^_", ParamsInt2, Int, nullptr, // also works for bool + compile_bitwise_xor, + FunctionData::flagMarkedAsPure); + define_builtin_func("_==_", ParamsInt2, Int, nullptr, // also works for bool + std::bind(compile_cmp_int, _1, _2, 2), + FunctionData::flagMarkedAsPure); + define_builtin_func("_!=_", ParamsInt2, Int, nullptr, // also works for bool + std::bind(compile_cmp_int, _1, _2, 5), + FunctionData::flagMarkedAsPure); + define_builtin_func("_<_", ParamsInt2, Int, nullptr, + std::bind(compile_cmp_int, _1, _2, 4), + FunctionData::flagMarkedAsPure); + define_builtin_func("_>_", ParamsInt2, Int, nullptr, + std::bind(compile_cmp_int, _1, _2, 1), + FunctionData::flagMarkedAsPure); + define_builtin_func("_<=_", ParamsInt2, Int, nullptr, + std::bind(compile_cmp_int, _1, _2, 6), + FunctionData::flagMarkedAsPure); + define_builtin_func("_>=_", ParamsInt2, Int, nullptr, + std::bind(compile_cmp_int, _1, _2, 3), + FunctionData::flagMarkedAsPure); + define_builtin_func("_<=>_", ParamsInt2, Int, nullptr, + std::bind(compile_cmp_int, _1, _2, 7), + FunctionData::flagMarkedAsPure); + + // special function used for internal compilation of some lexical constructs + // for example, `throw 123;` is actually calling `__throw(123)` + define_builtin_func("__true", {}, Bool, nullptr, /* AsmOp::Const("TRUE") */ + std::bind(compile_bool_const, _1, _2, true), + FunctionData::flagMarkedAsPure); + define_builtin_func("__false", {}, Bool, nullptr, /* AsmOp::Const("FALSE") */ + std::bind(compile_bool_const, _1, _2, false), + FunctionData::flagMarkedAsPure); + define_builtin_func("__null", {}, typeT, declGenericT, + AsmOp::Const("PUSHNULL"), + FunctionData::flagMarkedAsPure); + define_builtin_func("__isNull", {typeT}, Bool, declGenericT, + compile_is_null, + FunctionData::flagMarkedAsPure); + define_builtin_func("__throw", ParamsInt1, Unit, nullptr, + compile_throw, 0); - define_builtin_func("__throw_arg", throw_arg_op, compile_throw_arg, + define_builtin_func("__throw_arg", {typeT, Int}, Unit, declGenericT, + compile_throw_arg, 0); - define_builtin_func("__throw_if_unless", TypeExpr::new_map(Int3, Unit), compile_throw_if_unless, + define_builtin_func("__throw_if_unless", ParamsInt3, Unit, nullptr, + compile_throw_if_unless, 0); - define_builtin_func("loadInt", fetch_int_op_mutate, std::bind(compile_fetch_int, _1, _2, true, true), - SymValFunc::flagMarkedAsPure | SymValFunc::flagHasMutateParams | SymValFunc::flagAcceptsSelf, {}, {1, 0}); - define_builtin_func("loadUint", fetch_int_op_mutate, std::bind(compile_fetch_int, _1, _2, true, false), - SymValFunc::flagMarkedAsPure | SymValFunc::flagHasMutateParams | SymValFunc::flagAcceptsSelf, {}, {1, 0}); - define_builtin_func("loadBits", fetch_slice_op_mutate, std::bind(compile_fetch_slice, _1, _2, true), - SymValFunc::flagMarkedAsPure | SymValFunc::flagHasMutateParams | SymValFunc::flagAcceptsSelf, {}, {1, 0}); - define_builtin_func("preloadInt", prefetch_int_op, std::bind(compile_fetch_int, _1, _2, false, true), - SymValFunc::flagMarkedAsPure | SymValFunc::flagAcceptsSelf); - define_builtin_func("preloadUint", prefetch_int_op, std::bind(compile_fetch_int, _1, _2, false, false), - SymValFunc::flagMarkedAsPure | SymValFunc::flagAcceptsSelf); - define_builtin_func("preloadBits", prefetch_slice_op, std::bind(compile_fetch_slice, _1, _2, false), - SymValFunc::flagMarkedAsPure | SymValFunc::flagAcceptsSelf); - define_builtin_func("storeInt", store_int_mutate, std::bind(compile_store_int, _1, _2, true), - SymValFunc::flagMarkedAsPure | SymValFunc::flagHasMutateParams | SymValFunc::flagAcceptsSelf | SymValFunc::flagReturnsSelf, {1, 0, 2}, {}); - define_builtin_func("storeUint", store_int_mutate, std::bind(compile_store_int, _1, _2, false), - SymValFunc::flagMarkedAsPure | SymValFunc::flagHasMutateParams | SymValFunc::flagAcceptsSelf | SymValFunc::flagReturnsSelf, {1, 0, 2}, {}); - define_builtin_func("tupleAt", TypeExpr::new_forall({X}, TypeExpr::new_map(TupleInt, X)), compile_tuple_at, - SymValFunc::flagMarkedAsPure | SymValFunc::flagAcceptsSelf); - define_builtin_func("debugPrint", TypeExpr::new_forall({X}, TypeExpr::new_map(X, Unit)), + + // functions from stdlib marked as `builtin`, implemented at compiler level for optimizations + // (for example, `loadInt(1)` is `1 LDI`, but `loadInt(n)` for non-constant requires it be on a stack and `LDIX`) + define_builtin_func("mulDivFloor", ParamsInt3, Int, nullptr, + std::bind(compile_muldiv, _1, _2, _3, -1), + FunctionData::flagMarkedAsPure); + define_builtin_func("mulDivRound", ParamsInt3, Int, nullptr, + std::bind(compile_muldiv, _1, _2, _3, 0), + FunctionData::flagMarkedAsPure); + define_builtin_func("mulDivCeil", ParamsInt3, Int, nullptr, + std::bind(compile_muldiv, _1, _2, _3, 1), + FunctionData::flagMarkedAsPure); + define_builtin_func("mulDivMod", ParamsInt3, TypeDataTensor::create({Int, Int}), nullptr, + AsmOp::Custom("MULDIVMOD", 3, 2), + FunctionData::flagMarkedAsPure); + define_builtin_func("loadInt", ParamsSliceInt, Int, nullptr, + std::bind(compile_fetch_int, _1, _2, true, true), + FunctionData::flagMarkedAsPure | FunctionData::flagHasMutateParams | FunctionData::flagAcceptsSelf, + {}, {1, 0}); + define_builtin_func("loadUint", ParamsSliceInt, Int, nullptr, + std::bind(compile_fetch_int, _1, _2, true, false), + FunctionData::flagMarkedAsPure | FunctionData::flagHasMutateParams | FunctionData::flagAcceptsSelf, + {}, {1, 0}); + define_builtin_func("loadBits", ParamsSliceInt, Slice, nullptr, + std::bind(compile_fetch_slice, _1, _2, true), + FunctionData::flagMarkedAsPure | FunctionData::flagHasMutateParams | FunctionData::flagAcceptsSelf, + {}, {1, 0}); + define_builtin_func("preloadInt", ParamsSliceInt, Int, nullptr, + std::bind(compile_fetch_int, _1, _2, false, true), + FunctionData::flagMarkedAsPure | FunctionData::flagAcceptsSelf); + define_builtin_func("preloadUint", ParamsSliceInt, Int, nullptr, + std::bind(compile_fetch_int, _1, _2, false, false), + FunctionData::flagMarkedAsPure | FunctionData::flagAcceptsSelf); + define_builtin_func("preloadBits", ParamsSliceInt, Slice, nullptr, + std::bind(compile_fetch_slice, _1, _2, false), + FunctionData::flagMarkedAsPure | FunctionData::flagAcceptsSelf); + define_builtin_func("storeInt", {Builder, Int, Int}, Unit, nullptr, + std::bind(compile_store_int, _1, _2, true), + FunctionData::flagMarkedAsPure | FunctionData::flagHasMutateParams | FunctionData::flagAcceptsSelf | FunctionData::flagReturnsSelf, + {1, 0, 2}, {}); + define_builtin_func("storeUint", {Builder, Int, Int}, Unit, nullptr, + std::bind(compile_store_int, _1, _2, false), + FunctionData::flagMarkedAsPure | FunctionData::flagHasMutateParams | FunctionData::flagAcceptsSelf | FunctionData::flagReturnsSelf, + {1, 0, 2}, {}); + define_builtin_func("tupleAt", {Tuple, Int}, typeT, declGenericT, + compile_tuple_at, + FunctionData::flagMarkedAsPure | FunctionData::flagAcceptsSelf); + define_builtin_func("debugPrint", {typeT}, Unit, declGenericT, AsmOp::Custom("s0 DUMP DROP", 1, 1), 0); - define_builtin_func("debugPrintString", TypeExpr::new_forall({X}, TypeExpr::new_map(X, Unit)), + define_builtin_func("debugPrintString", {typeT}, Unit, declGenericT, AsmOp::Custom("STRDUMP DROP", 1, 1), 0); - define_builtin_func("debugDumpStack", TypeExpr::new_map(Unit, Unit), + define_builtin_func("debugDumpStack", {}, Unit, nullptr, AsmOp::Custom("DUMPSTK", 0, 0), 0); } diff --git a/tolk/codegen.cpp b/tolk/codegen.cpp index 9a90a3ed9..3830f7ae5 100644 --- a/tolk/codegen.cpp +++ b/tolk/codegen.cpp @@ -16,6 +16,7 @@ */ #include "tolk.h" #include "compiler-state.h" +#include "type-system.h" namespace tolk { @@ -314,7 +315,7 @@ bool Op::generate_code_step(Stack& stack) { return true; } case _GlobVar: - if (dynamic_cast(fun_ref->value)) { + if (g_sym) { bool used = false; for (auto i : left) { auto p = next->var_info[i]; @@ -325,8 +326,7 @@ bool Op::generate_code_step(Stack& stack) { if (!used || disabled()) { return true; } - std::string name = G.symbols.get_name(fun_ref->sym_idx); - stack.o << AsmOp::Custom(name + " GETGLOB", 0, 1); + stack.o << AsmOp::Custom(g_sym->name + " GETGLOB", 0, 1); if (left.size() != 1) { tolk_assert(left.size() <= 15); stack.o << AsmOp::UnTuple((int)left.size()); @@ -343,25 +343,28 @@ bool Op::generate_code_step(Stack& stack) { } stack.o << "CONT:<{"; stack.o.indent(); - auto func = dynamic_cast(fun_ref->value); - if (func) { + if (f_sym->is_asm_function() || f_sym->is_builtin_function()) { // TODO: create and compile a true lambda instead of this (so that arg_order and ret_order would work correctly) std::vector args0, res; - TypeExpr::remove_indirect(func->sym_type); - tolk_assert(func->get_type()->is_map()); - auto wr = func->get_type()->args.at(0)->get_width(); - auto wl = func->get_type()->args.at(1)->get_width(); - tolk_assert(wl >= 0 && wr >= 0); - for (int i = 0; i < wl; i++) { + int w_arg = 0; + for (const LocalVarData& param : f_sym->parameters) { + w_arg += param.declared_type->calc_width_on_stack(); + } + int w_ret = f_sym->inferred_return_type->calc_width_on_stack(); + tolk_assert(w_ret >= 0 && w_arg >= 0); + for (int i = 0; i < w_ret; i++) { res.emplace_back(0); } - for (int i = 0; i < wr; i++) { + for (int i = 0; i < w_arg; i++) { args0.emplace_back(0); } - func->compile(stack.o, res, args0, where); // compile res := f (args0) + if (f_sym->is_asm_function()) { + std::get(f_sym->body)->compile(stack.o); // compile res := f (args0) + } else { + std::get(f_sym->body)->compile(stack.o, res, args0, where); // compile res := f (args0) + } } else { - std::string name = G.symbols.get_name(fun_ref->sym_idx); - stack.o << AsmOp::Custom(name + " CALLDICT", (int)right.size(), (int)left.size()); + stack.o << AsmOp::Custom(f_sym->name + " CALLDICT", (int)right.size(), (int)left.size()); } stack.o.undent(); stack.o << "}>"; @@ -438,10 +441,9 @@ bool Op::generate_code_step(Stack& stack) { if (disabled()) { return true; } - // fun_ref can be nullptr for Op::_CallInd (invoke a variable, not a function) - SymValFunc* func = (fun_ref ? dynamic_cast(fun_ref->value) : nullptr); - auto arg_order = (func ? func->get_arg_order() : nullptr); - auto ret_order = (func ? func->get_ret_order() : nullptr); + // f_sym can be nullptr for Op::_CallInd (invoke a variable, not a function) + const std::vector* arg_order = f_sym ? f_sym->get_arg_order() : nullptr; + const std::vector* ret_order = f_sym ? f_sym->get_ret_order() : nullptr; tolk_assert(!arg_order || arg_order->size() == right.size()); tolk_assert(!ret_order || ret_order->size() == left.size()); std::vector right1; @@ -455,14 +457,12 @@ bool Op::generate_code_step(Stack& stack) { right1.push_back(arg.idx); } } - } else if (arg_order) { - for (int i = 0; i < (int)right.size(); i++) { - right1.push_back(right.at(arg_order->at(i))); - } } else { + tolk_assert(!arg_order); right1 = right; } std::vector last; + last.reserve(right1.size()); for (var_idx_t x : right1) { last.push_back(var_info[x] && var_info[x]->is_last()); } @@ -488,23 +488,25 @@ bool Op::generate_code_step(Stack& stack) { }; if (cl == _CallInd) { exec_callxargs((int)right.size() - 1, (int)left.size()); - } else if (auto asm_fv = dynamic_cast(fun_ref->value)) { + } else if (!f_sym->is_code_function()) { std::vector res; res.reserve(left.size()); for (var_idx_t i : left) { res.emplace_back(i); } - asm_fv->compile(stack.o, res, args, where); // compile res := f (args) + if (f_sym->is_asm_function()) { + std::get(f_sym->body)->compile(stack.o); // compile res := f (args) + } else { + std::get(f_sym->body)->compile(stack.o, res, args, where); // compile res := f (args) + } } else { - auto fv = dynamic_cast(fun_ref->value); - std::string name = G.symbols.get_name(fun_ref->sym_idx); - if (fv->is_inline() || fv->is_inline_ref()) { - stack.o << AsmOp::Custom(name + " INLINECALLDICT", (int)right.size(), (int)left.size()); - } else if (fv->code && fv->code->require_callxargs) { - stack.o << AsmOp::Custom(name + (" PREPAREDICT"), 0, 2); + if (f_sym->is_inline() || f_sym->is_inline_ref()) { + stack.o << AsmOp::Custom(f_sym->name + " INLINECALLDICT", (int)right.size(), (int)left.size()); + } else if (f_sym->is_code_function() && std::get(f_sym->body)->code->require_callxargs) { + stack.o << AsmOp::Custom(f_sym->name + (" PREPAREDICT"), 0, 2); exec_callxargs((int)right.size() + 1, (int)left.size()); } else { - stack.o << AsmOp::Custom(name + " CALLDICT", (int)right.size(), (int)left.size()); + stack.o << AsmOp::Custom(f_sym->name + " CALLDICT", (int)right.size(), (int)left.size()); } } stack.s.resize(k); @@ -515,7 +517,7 @@ bool Op::generate_code_step(Stack& stack) { return true; } case _SetGlob: { - tolk_assert(fun_ref && dynamic_cast(fun_ref->value)); + tolk_assert(g_sym); std::vector last; for (var_idx_t x : right) { last.push_back(var_info[x] && var_info[x]->is_last()); @@ -534,8 +536,7 @@ bool Op::generate_code_step(Stack& stack) { stack.o << AsmOp::Tuple((int)right.size()); } if (!right.empty()) { - std::string name = G.symbols.get_name(fun_ref->sym_idx); - stack.o << AsmOp::Custom(name + " SETGLOB", 1, 0); + stack.o << AsmOp::Custom(g_sym->name + " SETGLOB", 1, 0); } stack.s.resize(k); return true; @@ -826,6 +827,8 @@ bool Op::generate_code_step(Stack& stack) { catch_stack.push_new_var(left[1]); stack.rearrange_top(catch_vars, catch_last); stack.opt_show(); + stack.o << "c1 PUSH"; + stack.o << "c3 PUSH"; stack.o << "c4 PUSH"; stack.o << "c5 PUSH"; stack.o << "c7 PUSH"; @@ -842,6 +845,8 @@ bool Op::generate_code_step(Stack& stack) { stack.o << "c7 SETCONT"; stack.o << "c5 SETCONT"; stack.o << "c4 SETCONT"; + stack.o << "c3 SETCONT"; + stack.o << "c1 SETCONT"; for (size_t begin = catch_vars.size(), end = begin; end > 0; end = begin) { begin = end >= block_size ? end - block_size : 0; stack.o << std::to_string(end - begin) + " PUSHINT"; diff --git a/tolk/compiler-state.cpp b/tolk/compiler-state.cpp index fb70022fa..66fad844f 100644 --- a/tolk/compiler-state.cpp +++ b/tolk/compiler-state.cpp @@ -27,6 +27,19 @@ void ExperimentalOption::mark_deprecated(const char* deprecated_from_v, const ch this->deprecated_reason = deprecated_reason; } +std::string_view PersistentHeapAllocator::copy_string_to_persistent_memory(std::string_view str_in_tmp_memory) { + size_t len = str_in_tmp_memory.size(); + char* allocated = new char[len]; + memcpy(allocated, str_in_tmp_memory.data(), str_in_tmp_memory.size()); + auto new_chunk = std::make_unique(allocated, std::move(head)); + head = std::move(new_chunk); + return {head->allocated, len}; +} + +void PersistentHeapAllocator::clear() { + head = nullptr; +} + void CompilerSettings::enable_experimental_option(std::string_view name) { ExperimentalOption* to_enable = nullptr; @@ -53,4 +66,8 @@ void CompilerSettings::parse_experimental_options_cmd_arg(const std::string& cmd } } +const std::vector& get_all_not_builtin_functions() { + return G.all_functions; +} + } // namespace tolk diff --git a/tolk/compiler-state.h b/tolk/compiler-state.h index aec1945e2..d33eec81d 100644 --- a/tolk/compiler-state.h +++ b/tolk/compiler-state.h @@ -19,6 +19,7 @@ #include "src-file.h" #include "symtable.h" #include "td/utils/Status.h" +#include #include #include @@ -64,6 +65,26 @@ struct CompilerSettings { void parse_experimental_options_cmd_arg(const std::string& cmd_arg); }; +// AST nodes contain std::string_view referencing to contents of .tolk files (kept in memory after reading). +// It's more than enough, except a situation when we create new AST nodes inside the compiler +// and want some "persistent place" for std::string_view to point to. +// This class copies strings to heap, so that they remain valid after closing scope. +class PersistentHeapAllocator { + struct ChunkInHeap { + const char* allocated; + std::unique_ptr next; + + ChunkInHeap(const char* allocated, std::unique_ptr&& next) + : allocated(allocated), next(std::move(next)) {} + }; + + std::unique_ptr head = nullptr; + +public: + std::string_view copy_string_to_persistent_memory(std::string_view str_in_tmp_memory); + void clear(); +}; + // CompilerState contains a mutable state that is changed while the compilation is going on. // It's a "global state" of all compilation. // Historically, in FunC, this global state was spread along many global C++ variables. @@ -71,14 +92,13 @@ struct CompilerSettings { struct CompilerState { CompilerSettings settings; - SymTable symbols; - int scope_level = 0; - SymDef* sym_def[SymTable::SIZE_PRIME + 1]{}; - SymDef* global_sym_def[SymTable::SIZE_PRIME + 1]{}; - std::vector> symbol_stack; - std::vector scope_opened_at; + GlobalSymbolTable symtable; + PersistentHeapAllocator persistent_mem; - std::vector all_code_functions, all_global_vars, all_get_methods, all_constants; + std::vector all_functions; // all user-defined (not built-in) functions, with generic instantiations + std::vector all_get_methods; + std::vector all_global_vars; + std::vector all_constants; AllRegisteredSrcFiles all_src_files; bool is_verbosity(int gt_eq) const { return settings.verbosity >= gt_eq; } diff --git a/tolk/constant-evaluator.cpp b/tolk/constant-evaluator.cpp new file mode 100644 index 000000000..9ad273812 --- /dev/null +++ b/tolk/constant-evaluator.cpp @@ -0,0 +1,317 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "constant-evaluator.h" +#include "ast.h" +#include "tolk.h" +#include "openssl/digest.hpp" +#include "crypto/common/util.h" +#include "td/utils/crypto.h" +#include "ton/ton-types.h" + +namespace tolk { + +// parse address like "EQCRDM9h4k3UJdOePPuyX40mCgA4vxge5Dc5vjBR8djbEKC5" +// based on unpack_std_smc_addr() from block.cpp +// (which is not included to avoid linking with ton_crypto) +static bool parse_friendly_address(const char packed[48], ton::WorkchainId& workchain, ton::StdSmcAddress& addr) { + unsigned char buffer[36]; + if (!td::buff_base64_decode(td::MutableSlice{buffer, 36}, td::Slice{packed, 48}, true)) { + return false; + } + td::uint16 crc = td::crc16(td::Slice{buffer, 34}); + if (buffer[34] != (crc >> 8) || buffer[35] != (crc & 0xff) || (buffer[0] & 0x3f) != 0x11) { + return false; + } + workchain = static_cast(buffer[1]); + std::memcpy(addr.data(), buffer + 2, 32); + return true; +} + +// parse address like "0:527964d55cfa6eb731f4bfc07e9d025098097ef8505519e853986279bd8400d8" +// based on StdAddress::parse_addr() from block.cpp +// (which is not included to avoid linking with ton_crypto) +static bool parse_raw_address(const std::string& acc_string, int& workchain, ton::StdSmcAddress& addr) { + size_t pos = acc_string.find(':'); + if (pos != std::string::npos) { + td::Result r_wc = td::to_integer_safe(acc_string.substr(0, pos)); + if (r_wc.is_error()) { + return false; + } + workchain = r_wc.move_as_ok(); + pos++; + } else { + pos = 0; + } + if (acc_string.size() != pos + 64) { + return false; + } + + for (int i = 0; i < 64; ++i) { // loop through each hex digit + char c = acc_string[pos + i]; + int x; + if (c >= '0' && c <= '9') { + x = c - '0'; + } else if (c >= 'a' && c <= 'z') { + x = c - 'a' + 10; + } else if (c >= 'A' && c <= 'Z') { + x = c - 'A' + 10; + } else { + return false; + } + + if ((i & 1) == 0) { + addr.data()[i >> 1] = static_cast((addr.data()[i >> 1] & 0x0F) | (x << 4)); + } else { + addr.data()[i >> 1] = static_cast((addr.data()[i >> 1] & 0xF0) | x); + } + } + return true; +} + + +static std::string parse_vertex_string_const_as_slice(V v) { + std::string str = static_cast(v->str_val); + switch (v->modifier) { + case 0: { + return td::hex_encode(str); + } + case 's': { + unsigned char buff[128]; + long bits = td::bitstring::parse_bitstring_hex_literal(buff, sizeof(buff), str.data(), str.data() + str.size()); + if (bits < 0) { + v->error("invalid hex bitstring constant '" + str + "'"); + } + return str; + } + case 'a': { // MsgAddress + ton::WorkchainId workchain; + ton::StdSmcAddress addr; + bool correct = (str.size() == 48 && parse_friendly_address(str.data(), workchain, addr)) || + (str.size() != 48 && parse_raw_address(str, workchain, addr)); + if (!correct) { + v->error("invalid standard address '" + str + "'"); + } + if (workchain < -128 || workchain >= 128) { + v->error("anycast addresses not supported"); + } + + unsigned char data[3 + 8 + 256]; // addr_std$10 anycast:(Maybe Anycast) workchain_id:int8 address:bits256 = MsgAddressInt; + td::bitstring::bits_store_long_top(data, 0, static_cast(4) << (64 - 3), 3); + td::bitstring::bits_store_long_top(data, 3, static_cast(workchain) << (64 - 8), 8); + td::bitstring::bits_memcpy(data, 3 + 8, addr.bits().ptr, 0, ton::StdSmcAddress::size()); + return td::BitSlice{data, sizeof(data)}.to_hex(); + } + default: + tolk_assert(false); + } +} + +static td::RefInt256 parse_vertex_string_const_as_int(V v) { + std::string str = static_cast(v->str_val); + switch (v->modifier) { + case 'u': { + td::RefInt256 intval = td::hex_string_to_int256(td::hex_encode(str)); + if (str.empty()) { + v->error("empty integer ascii-constant"); + } + if (intval.is_null()) { + v->error("too long integer ascii-constant"); + } + return intval; + } + case 'h': + case 'H': { + unsigned char hash[32]; + digest::hash_str(hash, str.data(), str.size()); + return td::bits_to_refint(hash, (v->modifier == 'h') ? 32 : 256, false); + } + case 'c': { + return td::make_refint(td::crc32(td::Slice{str})); + } + default: + tolk_assert(false); + } +} + + +struct ConstantEvaluator { + static bool is_overflow(const td::RefInt256& intval) { + return intval.is_null() || !intval->signed_fits_bits(257); + } + + static ConstantValue handle_unary_operator(V v, const ConstantValue& rhs) { + if (!rhs.is_int()) { + v->error("invalid operator, expecting integer"); + } + td::RefInt256 intval = std::get(rhs.value); + + switch (v->tok) { + case tok_minus: + intval = -intval; + break; + case tok_plus: + break; + case tok_bitwise_not: + intval = ~intval; + break; + case tok_logical_not: + intval = td::make_refint(intval == 0 ? -1 : 0); + break; + default: + v->error("not a constant expression"); + } + + if (is_overflow(intval)) { + v->error("integer overflow"); + } + return ConstantValue::from_int(std::move(intval)); + } + + static ConstantValue handle_binary_operator(V v, const ConstantValue& lhs, const ConstantValue& rhs) { + if (!lhs.is_int() || !rhs.is_int()) { + v->error("invalid operator, expecting integer"); + } + td::RefInt256 lhs_intval = std::get(lhs.value); + td::RefInt256 rhs_intval = std::get(rhs.value); + td::RefInt256 intval; + + switch (v->tok) { + case tok_minus: + intval = lhs_intval - rhs_intval; + break; + case tok_plus: + intval = lhs_intval + rhs_intval; + break; + case tok_mul: + intval = lhs_intval * rhs_intval; + break; + case tok_div: + intval = lhs_intval / rhs_intval; + break; + case tok_mod: + intval = lhs_intval % rhs_intval; + break; + case tok_lshift: + intval = lhs_intval << static_cast(rhs_intval->to_long()); + break; + case tok_rshift: + intval = lhs_intval >> static_cast(rhs_intval->to_long()); + break; + case tok_bitwise_and: + intval = lhs_intval & rhs_intval; + break; + case tok_bitwise_or: + intval = lhs_intval | rhs_intval; + break; + case tok_bitwise_xor: + intval = lhs_intval ^ rhs_intval; + break; + case tok_eq: + intval = td::make_refint(lhs_intval == rhs_intval ? -1 : 0); + break; + case tok_lt: + intval = td::make_refint(lhs_intval < rhs_intval ? -1 : 0); + break; + case tok_gt: + intval = td::make_refint(lhs_intval > rhs_intval ? -1 : 0); + break; + case tok_leq: + intval = td::make_refint(lhs_intval <= rhs_intval ? -1 : 0); + break; + case tok_geq: + intval = td::make_refint(lhs_intval >= rhs_intval ? -1 : 0); + break; + case tok_neq: + intval = td::make_refint(lhs_intval != rhs_intval ? -1 : 0); + break; + default: + v->error("unsupported binary operator in constant expression"); + } + + if (is_overflow(intval)) { + v->error("integer overflow"); + } + return ConstantValue::from_int(std::move(intval)); + } + + static ConstantValue handle_reference(V v) { + // todo better handle "appears, directly or indirectly, in its own initializer" + std::string_view name = v->get_name(); + const Symbol* sym = lookup_global_symbol(name); + if (!sym) { + v->error("undefined symbol `" + static_cast(name) + "`"); + } + const GlobalConstData* const_ref = sym->try_as(); + if (!const_ref) { + v->error("symbol `" + static_cast(name) + "` is not a constant"); + } + if (v->has_instantiationTs()) { // SOME_CONST + v->error("constant is not a generic"); + } + return {const_ref->value}; + } + + static ConstantValue visit(AnyExprV v) { + if (auto v_int = v->try_as()) { + return ConstantValue::from_int(v_int->intval); + } + if (auto v_bool = v->try_as()) { + return ConstantValue::from_int(v_bool->bool_val ? -1 : 0); + } + if (auto v_unop = v->try_as()) { + return handle_unary_operator(v_unop, visit(v_unop->get_rhs())); + } + if (auto v_binop = v->try_as()) { + return handle_binary_operator(v_binop, visit(v_binop->get_lhs()), visit(v_binop->get_rhs())); + } + if (auto v_ref = v->try_as()) { + return handle_reference(v_ref); + } + if (auto v_par = v->try_as()) { + return visit(v_par->get_expr()); + } + if (v->try_as()) { + return eval_const_init_value(v); + } + v->error("not a constant expression"); + } + + static ConstantValue eval_const_init_value(AnyExprV init_value) { + // it init_value is incorrect, an exception is thrown + return visit(init_value); + } +}; + +ConstantValue eval_const_init_value(AnyExprV init_value) { + // at first, handle most simple cases, not to launch heavy computation algorithm: just a number, just a string + // just `c = 1` or `c = 0xFF` + if (auto v_int = init_value->try_as()) { + return {v_int->intval}; + } + // just `c = "strval"`, probably with modifier (address, etc.) + if (auto v_string = init_value->try_as()) { + if (v_string->is_bitslice()) { + return {parse_vertex_string_const_as_slice(v_string)}; + } else { + return {parse_vertex_string_const_as_int(v_string)}; + } + } + // something more complex, like `c = anotherC` or `c = 1 << 8` + return ConstantEvaluator::eval_const_init_value(init_value); +} + +} // namespace tolk diff --git a/tolk/constant-evaluator.h b/tolk/constant-evaluator.h new file mode 100644 index 000000000..0f99867d8 --- /dev/null +++ b/tolk/constant-evaluator.h @@ -0,0 +1,45 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#pragma once + +#include "fwd-declarations.h" +#include "crypto/common/refint.h" +#include + +namespace tolk { + +struct ConstantValue { + std::variant value; + + bool is_int() const { return std::holds_alternative(value); } + bool is_slice() const { return std::holds_alternative(value); } + + td::RefInt256 as_int() const { return std::get(value); } + const std::string& as_slice() const { return std::get(value); } + + static ConstantValue from_int(int value) { + return {td::make_refint(value)}; + } + + static ConstantValue from_int(td::RefInt256 value) { + return {std::move(value)}; + } +}; + +ConstantValue eval_const_init_value(AnyExprV init_value); + +} // namespace tolk diff --git a/tolk/fwd-declarations.h b/tolk/fwd-declarations.h new file mode 100644 index 000000000..e3599f364 --- /dev/null +++ b/tolk/fwd-declarations.h @@ -0,0 +1,40 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#pragma once + +namespace tolk { + +struct ASTNodeBase; +struct ASTNodeExpressionBase; +struct ASTNodeStatementBase; + +using AnyV = const ASTNodeBase*; +using AnyExprV = const ASTNodeExpressionBase*; +using AnyStatementV = const ASTNodeStatementBase*; + +struct Symbol; +struct LocalVarData; +struct FunctionData; +struct GlobalVarData; +struct GlobalConstData; + +class TypeData; +using TypePtr = const TypeData*; + +struct SrcFile; + +} // namespace tolk diff --git a/tolk/gen-abscode.cpp b/tolk/gen-abscode.cpp deleted file mode 100644 index fb085ae9c..000000000 --- a/tolk/gen-abscode.cpp +++ /dev/null @@ -1,429 +0,0 @@ -/* - This file is part of TON Blockchain Library. - - TON Blockchain Library is free software: you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation, either version 2 of the License, or - (at your option) any later version. - - TON Blockchain Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with TON Blockchain Library. If not, see . -*/ -#include "tolk.h" -#include "compiler-state.h" - -using namespace std::literals::string_literals; - -namespace tolk { - -/* - * - * EXPRESSIONS - * - */ - -Expr* Expr::copy() const { - auto res = new Expr{*this}; - for (auto& arg : res->args) { - arg = arg->copy(); - } - return res; -} - -Expr::Expr(ExprCls c, sym_idx_t name_idx, std::initializer_list _arglist) : cls(c), args(std::move(_arglist)) { - sym = lookup_symbol(name_idx); - if (!sym) { - } -} - -void Expr::deduce_type() { - if (e_type) { - return; - } - switch (cls) { - case _Apply: { - if (!sym) { - return; - } - SymValFunc* sym_val = dynamic_cast(sym->value); - if (!sym_val || !sym_val->get_type()) { - return; - } - std::vector arg_types; - arg_types.reserve(args.size()); - for (const Expr* arg : args) { - arg_types.push_back(arg->e_type); - } - TypeExpr* fun_type = TypeExpr::new_map(TypeExpr::new_tensor(arg_types), TypeExpr::new_hole()); - try { - unify(fun_type, sym_val->sym_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "cannot apply function " << sym->name() << " : " << sym_val->get_type() << " to arguments of type " - << fun_type->args[0] << ": " << ue; - throw ParseError(here, os.str()); - } - e_type = fun_type->args[1]; - TypeExpr::remove_indirect(e_type); - return; - } - case _VarApply: { - tolk_assert(args.size() == 2); - TypeExpr* fun_type = TypeExpr::new_map(args[1]->e_type, TypeExpr::new_hole()); - try { - unify(fun_type, args[0]->e_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "cannot apply expression of type " << args[0]->e_type << " to an expression of type " << args[1]->e_type - << ": " << ue; - throw ParseError(here, os.str()); - } - e_type = fun_type->args[1]; - TypeExpr::remove_indirect(e_type); - return; - } - case _GrabMutatedVars: { - tolk_assert(args.size() == 2 && args[0]->cls == _Apply && sym); - SymValFunc* called_f = dynamic_cast(sym->value); - tolk_assert(called_f->has_mutate_params()); - TypeExpr* sym_type = called_f->get_type(); - if (sym_type->constr == TypeExpr::te_ForAll) { - TypeExpr::remove_forall(sym_type); - } - tolk_assert(sym_type->args[1]->constr == TypeExpr::te_Tensor); - e_type = sym_type->args[1]->args[sym_type->args[1]->args.size() - 1]; - TypeExpr::remove_indirect(e_type); - return; - } - case _ReturnSelf: { - tolk_assert(args.size() == 2 && sym); - Expr* this_arg = args[1]; - e_type = this_arg->e_type; - TypeExpr::remove_indirect(e_type); - return; - } - case _Letop: { - tolk_assert(args.size() == 2); - try { - // std::cerr << "in assignment: " << args[0]->e_type << " from " << args[1]->e_type << std::endl; - unify(args[0]->e_type, args[1]->e_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "cannot assign an expression of type " << args[1]->e_type << " to a variable or pattern of type " - << args[0]->e_type << ": " << ue; - throw ParseError(here, os.str()); - } - e_type = args[0]->e_type; - TypeExpr::remove_indirect(e_type); - return; - } - case _CondExpr: { - tolk_assert(args.size() == 3); - auto flag_type = TypeExpr::new_atomic(TypeExpr::_Int); - try { - unify(args[0]->e_type, flag_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "condition in a conditional expression has non-integer type " << args[0]->e_type << ": " << ue; - throw ParseError(here, os.str()); - } - try { - unify(args[1]->e_type, args[2]->e_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "the two variants in a conditional expression have different types " << args[1]->e_type << " and " - << args[2]->e_type << " : " << ue; - throw ParseError(here, os.str()); - } - e_type = args[1]->e_type; - TypeExpr::remove_indirect(e_type); - return; - } - default: - throw Fatal("unexpected cls=" + std::to_string(cls) + " in Expr::deduce_type()"); - } -} - -void Expr::define_new_vars(CodeBlob& code) { - switch (cls) { - case _Tensor: - case _MkTuple: { - for (Expr* item : args) { - item->define_new_vars(code); - } - break; - } - case _Var: - if (val < 0) { - val = code.create_var(e_type, sym->sym_idx, here); - sym->value->idx = val; - } - break; - case _Hole: - if (val < 0) { - val = code.create_tmp_var(e_type, here); - } - break; - default: - break; - } -} - -void Expr::predefine_vars() { - switch (cls) { - case _Tensor: - case _MkTuple: { - for (Expr* item : args) { - item->predefine_vars(); - } - break; - } - case _Var: - if (!sym) { - tolk_assert(val < 0 && here.is_defined()); - sym = define_symbol(~val, false, here); - // std::cerr << "predefining variable " << symbols.get_name(~val) << std::endl; - if (!sym) { - throw ParseError{here, std::string{"redefined variable `"} + G.symbols.get_name(~val) + "`"}; - } - sym->value = new SymValVariable(-1, e_type); - if (is_immutable()) { - dynamic_cast(sym->value)->flags |= SymValVariable::flagImmutable; - } - } - break; - default: - break; - } -} - -var_idx_t Expr::new_tmp(CodeBlob& code) const { - return code.create_tmp_var(e_type, here); -} - -void add_set_globs(CodeBlob& code, std::vector>& globs, SrcLocation here) { - for (const auto& p : globs) { - auto& op = code.emplace_back(here, Op::_SetGlob, std::vector{}, std::vector{ p.second }, p.first); - op.set_impure(code); - } -} - -std::vector pre_compile_let(CodeBlob& code, Expr* lhs, Expr* rhs, SrcLocation here) { - if (lhs->is_mktuple()) { - if (rhs->is_mktuple()) { - return pre_compile_let(code, lhs->args.at(0), rhs->args.at(0), here); - } - auto right = rhs->pre_compile(code); - TypeExpr::remove_indirect(rhs->e_type); - auto unpacked_type = rhs->e_type->args.at(0); - std::vector tmp{code.create_tmp_var(unpacked_type, rhs->here)}; - code.emplace_back(lhs->here, Op::_UnTuple, tmp, std::move(right)); - auto tvar = new Expr{Expr::_Var, lhs->here}; - tvar->set_val(tmp[0]); - tvar->set_location(rhs->here); - tvar->e_type = unpacked_type; - pre_compile_let(code, lhs->args.at(0), tvar, here); - return tmp; - } - auto right = rhs->pre_compile(code); - std::vector> globs; - auto left = lhs->pre_compile(code, &globs); - for (var_idx_t v : left) { - code.on_var_modification(v, here); - } - code.emplace_back(here, Op::_Let, std::move(left), right); - add_set_globs(code, globs, here); - return right; -} - -std::vector pre_compile_tensor(const std::vector& args, CodeBlob &code, - std::vector> *lval_globs) { - const size_t n = args.size(); - if (n == 0) { // just `()` - return {}; - } - if (n == 1) { // just `(x)`: even if x is modified (e.g. `f(x=x+2)`), there are no next arguments - return args[0]->pre_compile(code, lval_globs); - } - std::vector> res_lists(n); - - struct ModifiedVar { - size_t i, j; - std::unique_ptr* cur_ops; // `LET tmp = v_ij` will be inserted before this - }; - std::vector modified_vars; - for (size_t i = 0; i < n; ++i) { - res_lists[i] = args[i]->pre_compile(code, lval_globs); - for (size_t j = 0; j < res_lists[i].size(); ++j) { - TmpVar& var = code.vars.at(res_lists[i][j]); - if (!lval_globs && !var.is_unnamed()) { - var.on_modification.push_back([&modified_vars, i, j, cur_ops = code.cur_ops, done = false](SrcLocation here) mutable { - if (!done) { - done = true; - modified_vars.push_back({i, j, cur_ops}); - } - }); - } else { - var.on_modification.push_back([](SrcLocation) { - }); - } - } - } - for (const auto& list : res_lists) { - for (var_idx_t v : list) { - tolk_assert(!code.vars.at(v).on_modification.empty()); - code.vars.at(v).on_modification.pop_back(); - } - } - for (size_t idx = modified_vars.size(); idx--; ) { - const ModifiedVar &m = modified_vars[idx]; - var_idx_t orig_v = res_lists[m.i][m.j]; - var_idx_t tmp_v = code.create_tmp_var(code.vars[orig_v].v_type, code.vars[orig_v].where); - std::unique_ptr op = std::make_unique(code.vars[orig_v].where, Op::_Let); - op->left = {tmp_v}; - op->right = {orig_v}; - op->next = std::move((*m.cur_ops)); - *m.cur_ops = std::move(op); - res_lists[m.i][m.j] = tmp_v; - } - std::vector res; - for (const auto& list : res_lists) { - res.insert(res.end(), list.cbegin(), list.cend()); - } - return res; -} - -std::vector Expr::pre_compile(CodeBlob& code, std::vector>* lval_globs) const { - if (lval_globs && !(cls == _Tensor || cls == _Var || cls == _Hole || cls == _GlobVar)) { - std::cerr << "lvalue expression constructor is " << cls << std::endl; - throw Fatal{"cannot compile lvalue expression with unknown constructor"}; - } - switch (cls) { - case _Tensor: { - return pre_compile_tensor(args, code, lval_globs); - } - case _Apply: { - tolk_assert(sym); - std::vector res = pre_compile_tensor(args, code, lval_globs);; - auto rvect = new_tmp_vect(code); - auto& op = code.emplace_back(here, Op::_Call, rvect, res, sym); - if (flags & _IsImpure) { - op.set_impure(code); - } - return rvect; - } - case _GrabMutatedVars: { - SymValFunc* func_val = dynamic_cast(sym->value); - tolk_assert(func_val && func_val->has_mutate_params()); - tolk_assert(args.size() == 2 && args[0]->cls == _Apply && args[1]->cls == _Tensor); - auto right = args[0]->pre_compile(code); // apply (returning function result and mutated) - std::vector> local_globs; - if (!lval_globs) { - lval_globs = &local_globs; - } - auto left = args[1]->pre_compile(code, lval_globs); // mutated (lvalue) - auto rvect = new_tmp_vect(code); - left.push_back(rvect[0]); - for (var_idx_t v : left) { - code.on_var_modification(v, here); - } - code.emplace_back(here, Op::_Let, std::move(left), std::move(right)); - add_set_globs(code, local_globs, here); - return rvect; - } - case _ReturnSelf: { - tolk_assert(args.size() == 2 && sym); - Expr* this_arg = args[1]; - auto right = args[0]->pre_compile(code); - return this_arg->pre_compile(code); - } - case _Var: - case _Hole: - if (val < 0) { - throw ParseError{here, "unexpected variable definition"}; - } - return {val}; - case _VarApply: - if (args[0]->cls == _GlobFunc) { - auto res = args[1]->pre_compile(code); - auto rvect = new_tmp_vect(code); - auto& op = code.emplace_back(here, Op::_Call, rvect, std::move(res), args[0]->sym); - if (args[0]->flags & _IsImpure) { - op.set_impure(code); - } - return rvect; - } else { - auto res = args[1]->pre_compile(code); - auto tfunc = args[0]->pre_compile(code); - if (tfunc.size() != 1) { - throw Fatal{"stack tuple used as a function"}; - } - res.push_back(tfunc[0]); - auto rvect = new_tmp_vect(code); - code.emplace_back(here, Op::_CallInd, rvect, std::move(res)); - return rvect; - } - case _Const: { - auto rvect = new_tmp_vect(code); - code.emplace_back(here, Op::_IntConst, rvect, intval); - return rvect; - } - case _GlobFunc: - case _GlobVar: { - if (auto fun_ref = dynamic_cast(sym->value)) { - fun_ref->flags |= SymValFunc::flagUsedAsNonCall; - if (!fun_ref->arg_order.empty() || !fun_ref->ret_order.empty()) { - throw ParseError(here, "saving `" + sym->name() + "` into a variable will most likely lead to invalid usage, since it changes the order of variables on the stack"); - } - if (fun_ref->has_mutate_params()) { - throw ParseError(here, "saving `" + sym->name() + "` into a variable is impossible, since it has `mutate` parameters and thus can only be called directly"); - } - } - auto rvect = new_tmp_vect(code); - if (lval_globs) { - lval_globs->push_back({ sym, rvect[0] }); - return rvect; - } else { - code.emplace_back(here, Op::_GlobVar, rvect, std::vector{}, sym); - return rvect; - } - } - case _Letop: { - return pre_compile_let(code, args.at(0), args.at(1), here); - } - case _MkTuple: { - auto left = new_tmp_vect(code); - auto right = args[0]->pre_compile(code); - code.emplace_back(here, Op::_Tuple, left, std::move(right)); - return left; - } - case _CondExpr: { - auto cond = args[0]->pre_compile(code); - tolk_assert(cond.size() == 1); - auto rvect = new_tmp_vect(code); - Op& if_op = code.emplace_back(here, Op::_If, cond); - code.push_set_cur(if_op.block0); - code.emplace_back(here, Op::_Let, rvect, args[1]->pre_compile(code)); - code.close_pop_cur(args[1]->here); - code.push_set_cur(if_op.block1); - code.emplace_back(here, Op::_Let, rvect, args[2]->pre_compile(code)); - code.close_pop_cur(args[2]->here); - return rvect; - } - case _SliceConst: { - auto rvect = new_tmp_vect(code); - code.emplace_back(here, Op::_SliceConst, rvect, strval); - return rvect; - } - default: - std::cerr << "expression constructor is " << cls << std::endl; - throw Fatal{"cannot compile expression with unknown constructor"}; - } -} - -} // namespace tolk diff --git a/tolk/generics-helpers.cpp b/tolk/generics-helpers.cpp new file mode 100644 index 000000000..3d353cc4e --- /dev/null +++ b/tolk/generics-helpers.cpp @@ -0,0 +1,272 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "generics-helpers.h" +#include "tolk.h" +#include "ast.h" +#include "ast-replicator.h" +#include "type-system.h" +#include "compiler-state.h" +#include "pipeline.h" + +namespace tolk { + +// given orig = "(int, T)" and substitutions = [slice], return "(int, slice)" +static TypePtr replace_genericT_with_deduced(TypePtr orig, const GenericsDeclaration* genericTs, const std::vector& substitutionTs) { + if (!orig || !orig->has_genericT_inside()) { + return orig; + } + tolk_assert(genericTs->size() == substitutionTs.size()); + + return orig->replace_children_custom([genericTs, substitutionTs](TypePtr child) { + if (const TypeDataGenericT* asT = child->try_as()) { + int idx = genericTs->find_nameT(asT->nameT); + if (idx == -1) { + throw Fatal("can not replace generic " + asT->nameT); + } + return substitutionTs[idx]; + } + return child; + }); +} + +// purpose: having `f(value: T)` and call `f(5)`, deduce T = int +// generally, there may be many generic Ts for declaration, and many arguments +// for every argument, `consider_next_condition()` is called +// example: `f(a: int, b: T1, c: (T1, T2))` and call `f(6, 7, (8, cs))` +// - `a` does not affect, it doesn't depend on generic Ts +// - next condition: param_type = `T1`, arg_type = `int`, deduce T1 = int +// - next condition: param_type = `(T1, T2)`, arg_type = `(int, slice)`, deduce T1 = int, T2 = slice +// for call `f(6, cs, (8, cs))` T1 will be both `slice` and `int`, fired an error +class GenericSubstitutionsDeduceForFunctionCall final { + const FunctionData* fun_ref; + std::vector substitutions; + + void provideDeducedT(const std::string& nameT, TypePtr deduced) { + if (deduced == TypeDataNullLiteral::create() || deduced->has_unknown_inside()) { + return; // just 'null' doesn't give sensible info + } + + int idx = fun_ref->genericTs->find_nameT(nameT); + if (substitutions[idx] == nullptr) { + substitutions[idx] = deduced; + } else if (substitutions[idx] != deduced) { + throw std::runtime_error(nameT + " is both " + substitutions[idx]->as_human_readable() + " and " + deduced->as_human_readable()); + } + } + +public: + explicit GenericSubstitutionsDeduceForFunctionCall(const FunctionData* fun_ref) + : fun_ref(fun_ref) { + substitutions.resize(fun_ref->genericTs->size()); // filled with nullptr (nothing deduced) + } + + void consider_next_condition(TypePtr param_type, TypePtr arg_type) { + if (const auto* asT = param_type->try_as()) { + // `(arg: T)` called as `f([1, 2])` => T is [int, int] + provideDeducedT(asT->nameT, arg_type); + } else if (const auto* p_tensor = param_type->try_as()) { + // `arg: (int, T)` called as `f((5, cs))` => T is slice + if (const auto* a_tensor = arg_type->try_as(); a_tensor && a_tensor->size() == p_tensor->size()) { + for (int i = 0; i < a_tensor->size(); ++i) { + consider_next_condition(p_tensor->items[i], a_tensor->items[i]); + } + } + } else if (const auto* p_tuple = param_type->try_as()) { + // `arg: [int, T]` called as `f([5, cs])` => T is slice + if (const auto* a_tuple = arg_type->try_as(); a_tuple && a_tuple->size() == p_tuple->size()) { + for (int i = 0; i < a_tuple->size(); ++i) { + consider_next_condition(p_tuple->items[i], a_tuple->items[i]); + } + } + } else if (const auto* p_callable = param_type->try_as()) { + // `arg: fun(TArg) -> TResult` called as `f(calcTupleLen)` => TArg is tuple, TResult is int + if (const auto* a_callable = arg_type->try_as(); a_callable && a_callable->params_size() == p_callable->params_size()) { + for (int i = 0; i < a_callable->params_size(); ++i) { + consider_next_condition(p_callable->params_types[i], a_callable->params_types[i]); + } + consider_next_condition(p_callable->return_type, a_callable->return_type); + } + } + } + + int get_first_not_deduced_idx() const { + for (int i = 0; i < static_cast(substitutions.size()); ++i) { + if (substitutions[i] == nullptr) { + return i; + } + } + return -1; + } + + std::vector flush() { + return {std::move(substitutions)}; + } +}; + +// clone the body of `f` replacing T everywhere with a substitution +// before: `fun f(v: T) { var cp: [T] = [v]; }` +// after: `fun f(v: int) { var cp: [int] = [v]; }` +// an instantiated function becomes a deep copy, all AST nodes are copied, no previous pointers left +class GenericFunctionReplicator final : public ASTReplicatorFunction { + const GenericsDeclaration* genericTs; + const std::vector& substitutionTs; + +protected: + using ASTReplicatorFunction::clone; + + TypePtr clone(TypePtr t) override { + return replace_genericT_with_deduced(t, genericTs, substitutionTs); + } + +public: + GenericFunctionReplicator(const GenericsDeclaration* genericTs, const std::vector& substitutionTs) + : genericTs(genericTs) + , substitutionTs(substitutionTs) { + } + + V clone_function_body(V v_function) override { + return createV( + v_function->loc, + clone(v_function->get_identifier()), + clone(v_function->get_param_list()), + clone(v_function->get_body()), + clone(v_function->declared_return_type), + nullptr, // a newly-created function is not generic + v_function->method_id, + v_function->flags + ); + } +}; + +std::string GenericsDeclaration::as_human_readable() const { + std::string result = "<"; + for (const GenericsItem& item : itemsT) { + if (result.size() > 1) { + result += ","; + } + result += item.nameT; + } + result += ">"; + return result; +} + +int GenericsDeclaration::find_nameT(std::string_view nameT) const { + for (int i = 0; i < static_cast(itemsT.size()); ++i) { + if (itemsT[i].nameT == nameT) { + return i; + } + } + return -1; +} + +// after creating a deep copy of `f` like `f`, its new and fresh body needs the previous pipeline to run +// for example, all local vars need to be registered as symbols, etc. +static void run_pipeline_for_instantiated_function(const FunctionData* inst_fun_ref) { + // these pipes are exactly the same as in tolk.cpp — all preceding (and including) type inferring + pipeline_resolve_identifiers_and_assign_symbols(inst_fun_ref); + pipeline_calculate_rvalue_lvalue(inst_fun_ref); + pipeline_detect_unreachable_statements(inst_fun_ref); + pipeline_infer_types_and_calls_and_fields(inst_fun_ref); +} + +std::string generate_instantiated_name(const std::string& orig_name, const std::vector& substitutions) { + // an instantiated function name will be "{orig_name}<{T1,T2,...}>" + std::string name = orig_name; + name += "<"; + for (TypePtr subs : substitutions) { + if (name.size() > orig_name.size() + 1) { + name += ","; + } + name += subs->as_human_readable(); + } + name.erase(std::remove(name.begin(), name.end(), ' '), name.end()); + name += ">"; + return name; +} + +td::Result> deduce_substitutionTs_on_generic_func_call(const FunctionData* called_fun, std::vector&& arg_types, TypePtr return_hint) { + try { + GenericSubstitutionsDeduceForFunctionCall deducing(called_fun); + for (const LocalVarData& param : called_fun->parameters) { + if (param.declared_type->has_genericT_inside() && param.idx < static_cast(arg_types.size())) { + deducing.consider_next_condition(param.declared_type, arg_types[param.idx]); + } + } + int idx = deducing.get_first_not_deduced_idx(); + if (idx != -1 && return_hint && called_fun->declared_return_type->has_genericT_inside()) { + deducing.consider_next_condition(called_fun->declared_return_type, return_hint); + idx = deducing.get_first_not_deduced_idx(); + } + if (idx != -1) { + return td::Status::Error(td::Slice{"can not deduce " + called_fun->genericTs->get_nameT(idx)}); + } + return deducing.flush(); + } catch (const std::runtime_error& ex) { + return td::Status::Error(td::Slice{ex.what()}); + } +} + +const FunctionData* instantiate_generic_function(SrcLocation loc, const FunctionData* fun_ref, const std::string& inst_name, std::vector&& substitutionTs) { + tolk_assert(fun_ref->genericTs); + + // if `f` was earlier instantiated, return it + if (const auto* existing = lookup_global_symbol(inst_name)) { + const FunctionData* inst_ref = existing->try_as(); + tolk_assert(inst_ref); + return inst_ref; + } + + std::vector parameters; + parameters.reserve(fun_ref->get_num_params()); + for (const LocalVarData& orig_p : fun_ref->parameters) { + parameters.emplace_back(orig_p.name, orig_p.loc, replace_genericT_with_deduced(orig_p.declared_type, fun_ref->genericTs, substitutionTs), orig_p.flags, orig_p.idx); + } + TypePtr declared_return_type = replace_genericT_with_deduced(fun_ref->declared_return_type, fun_ref->genericTs, substitutionTs); + const GenericsInstantiation* instantiationTs = new GenericsInstantiation(loc, std::move(substitutionTs)); + + if (fun_ref->is_asm_function()) { + FunctionData* inst_ref = new FunctionData(inst_name, fun_ref->loc, declared_return_type, std::move(parameters), fun_ref->flags, nullptr, instantiationTs, new FunctionBodyAsm, fun_ref->ast_root); + inst_ref->arg_order = fun_ref->arg_order; + inst_ref->ret_order = fun_ref->ret_order; + G.symtable.add_function(inst_ref); + G.all_functions.push_back(inst_ref); + run_pipeline_for_instantiated_function(inst_ref); + return inst_ref; + } + + if (fun_ref->is_builtin_function()) { + FunctionData* inst_ref = new FunctionData(inst_name, fun_ref->loc, declared_return_type, std::move(parameters), fun_ref->flags, nullptr, instantiationTs, fun_ref->body, fun_ref->ast_root); + inst_ref->arg_order = fun_ref->arg_order; + inst_ref->ret_order = fun_ref->ret_order; + G.symtable.add_function(inst_ref); + return inst_ref; + } + + GenericFunctionReplicator replicator(fun_ref->genericTs, instantiationTs->substitutions); + V inst_root = replicator.clone_function_body(fun_ref->ast_root->as()); + + FunctionData* inst_ref = new FunctionData(inst_name, fun_ref->loc, declared_return_type, std::move(parameters), fun_ref->flags, nullptr, instantiationTs, new FunctionBodyCode, inst_root); + inst_ref->arg_order = fun_ref->arg_order; + inst_ref->ret_order = fun_ref->ret_order; + inst_root->mutate()->assign_fun_ref(inst_ref); + G.symtable.add_function(inst_ref); + G.all_functions.push_back(inst_ref); + run_pipeline_for_instantiated_function(inst_ref); + return inst_ref; +} + +} // namespace tolk diff --git a/tolk/generics-helpers.h b/tolk/generics-helpers.h new file mode 100644 index 000000000..2a304f55b --- /dev/null +++ b/tolk/generics-helpers.h @@ -0,0 +1,64 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#pragma once + +#include "src-file.h" +#include "fwd-declarations.h" +#include "td/utils/Status.h" +#include + +namespace tolk { + +// when a function is declared `f`, this "" is represented as this class +// (not at AST, but at symbol storage level) +struct GenericsDeclaration { + struct GenericsItem { + std::string_view nameT; + + explicit GenericsItem(std::string_view nameT) + : nameT(nameT) {} + }; + + explicit GenericsDeclaration(std::vector&& itemsT) + : itemsT(std::move(itemsT)) {} + + const std::vector itemsT; + + std::string as_human_readable() const; + + size_t size() const { return itemsT.size(); } + bool has_nameT(std::string_view nameT) const { return find_nameT(nameT) != -1; } + int find_nameT(std::string_view nameT) const; + std::string get_nameT(int idx) const { return static_cast(itemsT[idx].nameT); } +}; + +// when a function call is `f()`, this "" is represented as this class +struct GenericsInstantiation { + const std::vector substitutions; // for genericTs + const SrcLocation loc; // first instantiation location + + explicit GenericsInstantiation(SrcLocation loc, std::vector&& substitutions) + : substitutions(std::move(substitutions)) + , loc(loc) { + } +}; + +std::string generate_instantiated_name(const std::string& orig_name, const std::vector& substitutions); +td::Result> deduce_substitutionTs_on_generic_func_call(const FunctionData* called_fun, std::vector&& arg_types, TypePtr return_hint); +const FunctionData* instantiate_generic_function(SrcLocation loc, const FunctionData* fun_ref, const std::string& inst_name, std::vector&& substitutionTs); + +} // namespace tolk diff --git a/tolk/lexer.cpp b/tolk/lexer.cpp index 17eb4544c..7e8c8fb2d 100644 --- a/tolk/lexer.cpp +++ b/tolk/lexer.cpp @@ -15,9 +15,9 @@ along with TON Blockchain Library. If not, see . */ #include "lexer.h" -#include "compiler-state.h" -#include "symtable.h" #include +#include +#include namespace tolk { @@ -328,6 +328,7 @@ struct ChunkIdentifierOrKeyword final : ChunkLexerBase { case 2: if (str == "do") return tok_do; if (str == "if") return tok_if; + if (str == "as") return tok_as; break; case 3: if (str == "int") return tok_int; @@ -345,7 +346,6 @@ struct ChunkIdentifierOrKeyword final : ChunkLexerBase { if (str == "null") return tok_null; if (str == "void") return tok_void; if (str == "bool") return tok_bool; - if (str == "auto") return tok_auto; if (str == "self") return tok_self; if (str == "tolk") return tok_tolk; if (str == "type") return tok_type; @@ -406,7 +406,6 @@ struct ChunkIdentifierOrKeyword final : ChunkLexerBase { if (TokenType kw_tok = maybe_keyword(str_val)) { lex->add_token(kw_tok, str_val); } else { - G.symbols.lookup_add(str_val); lex->add_token(tok_identifier, str_val); } return true; @@ -421,7 +420,7 @@ struct ChunkIdentifierInBackticks final : ChunkLexerBase { const char* str_begin = lex->c_str(); lex->skip_chars(1); while (!lex->is_eof() && lex->char_at() != '`' && lex->char_at() != '\n') { - if (std::isspace(lex->char_at())) { // probably, I'll remove this restriction after rewriting symtable and cur_sym_idx + if (std::isspace(lex->char_at())) { lex->error("an identifier can't have a space in its name (even inside backticks)"); } lex->skip_chars(1); @@ -432,7 +431,6 @@ struct ChunkIdentifierInBackticks final : ChunkLexerBase { std::string_view str_val(str_begin + 1, lex->c_str() - str_begin - 1); lex->skip_chars(1); - G.symbols.lookup_add(str_val); lex->add_token(tok_identifier, str_val); return true; } @@ -580,6 +578,16 @@ void Lexer::next_special(TokenType parse_next_as, const char* str_expected) { cur_token = tokens_circularbuf[++cur_token_idx & 7]; } +Lexer::SavedPositionForLookahead Lexer::save_parsing_position() const { + return {p_next, cur_token_idx, cur_token}; +} + +void Lexer::restore_position(SavedPositionForLookahead saved) { + p_next = saved.p_next; + cur_token_idx = last_token_idx = saved.cur_token_idx; + cur_token = saved.cur_token; +} + void Lexer::error(const std::string& err_msg) const { throw ParseError(cur_location(), err_msg); } @@ -597,7 +605,7 @@ void lexer_init() { // Hence, it's difficult to measure Lexer performance separately. // This function can be called just to tick Lexer performance, it just scans all input files. // There is no sense to use it in production, but when refactoring and optimizing Lexer, it's useful. -void lexer_measure_performance(const AllSrcFiles& files_to_just_parse) { +void lexer_measure_performance(const AllRegisteredSrcFiles& files_to_just_parse) { for (const SrcFile* file : files_to_just_parse) { Lexer lex(file); while (!lex.is_eof()) { diff --git a/tolk/lexer.h b/tolk/lexer.h index 8a25f9526..81d579db0 100644 --- a/tolk/lexer.h +++ b/tolk/lexer.h @@ -57,10 +57,29 @@ enum TokenType { tok_dot, tok_plus, + tok_set_plus, tok_minus, + tok_set_minus, tok_mul, + tok_set_mul, tok_div, + tok_set_div, tok_mod, + tok_set_mod, + tok_lshift, + tok_set_lshift, + tok_rshift, + tok_set_rshift, + tok_rshiftR, + tok_rshiftC, + tok_bitwise_and, + tok_set_bitwise_and, + tok_bitwise_or, + tok_set_bitwise_or, + tok_bitwise_xor, + tok_set_bitwise_xor, + tok_bitwise_not, + tok_question, tok_comma, tok_semicolon, @@ -77,32 +96,14 @@ enum TokenType { tok_logical_not, tok_logical_and, tok_logical_or, - tok_bitwise_and, - tok_bitwise_or, - tok_bitwise_xor, - tok_bitwise_not, tok_eq, tok_neq, tok_leq, tok_geq, tok_spaceship, - tok_lshift, - tok_rshift, - tok_rshiftR, - tok_rshiftC, tok_divR, tok_divC, - tok_set_plus, - tok_set_minus, - tok_set_mul, - tok_set_div, - tok_set_mod, - tok_set_lshift, - tok_set_rshift, - tok_set_bitwise_and, - tok_set_bitwise_or, - tok_set_bitwise_xor, tok_return, tok_repeat, @@ -124,9 +125,9 @@ enum TokenType { tok_builder, tok_continuation, tok_tuple, - tok_auto, tok_void, tok_arrow, + tok_as, tok_tolk, tok_semver, @@ -165,6 +166,12 @@ class Lexer { public: + struct SavedPositionForLookahead { + const char* p_next = nullptr; + int cur_token_idx = 0; + Token cur_token; + }; + explicit Lexer(const SrcFile* file); Lexer(const Lexer&) = delete; Lexer &operator=(const Lexer&) = delete; @@ -208,6 +215,9 @@ class Lexer { void next(); void next_special(TokenType parse_next_as, const char* str_expected); + SavedPositionForLookahead save_parsing_position() const; + void restore_position(SavedPositionForLookahead saved); + void check(TokenType next_tok, const char* str_expected) const { if (cur_token.type != next_tok) { unexpected(str_expected); // unlikely path, not inlined @@ -229,6 +239,6 @@ class Lexer { void lexer_init(); // todo #ifdef TOLK_PROFILING -void lexer_measure_performance(const AllSrcFiles& files_to_just_parse); +void lexer_measure_performance(const AllRegisteredSrcFiles& files_to_just_parse); } // namespace tolk diff --git a/tolk/pipe-ast-to-legacy.cpp b/tolk/pipe-ast-to-legacy.cpp index 7257bfb07..d60bb8b34 100644 --- a/tolk/pipe-ast-to-legacy.cpp +++ b/tolk/pipe-ast-to-legacy.cpp @@ -17,1068 +17,561 @@ #include "tolk.h" #include "src-file.h" #include "ast.h" -#include "compiler-state.h" +#include "ast-visitor.h" +#include "type-system.h" #include "common/refint.h" -#include "openssl/digest.hpp" -#include "crypto/common/util.h" -#include "td/utils/crypto.h" -#include "ton/ton-types.h" +#include "constant-evaluator.h" /* - * In this module, we convert modern AST representation to legacy representation - * (global state, Expr, CodeBlob, etc.) to make the rest of compiling process remain unchanged for now. - * Since time goes, I'll gradually get rid of legacy, since most of the code analysis - * should be done at AST level. + * This pipe is the last one operating AST: it transforms AST to IR. + * IR is described as "Op" struct. So, here AST is transformed to Ops, and then all the rest "legacy" + * kernel (initially forked from FunC) comes into play. + * Up to this point, all types have been inferred, all validity checks have been passed, etc. + * All properties in AST nodes are assigned and can be safely used (fun_ref, etc.). + * So, if execution reaches this pass, the input is correct, and code generation should succeed. */ namespace tolk { -static int calc_sym_idx(std::string_view sym_name) { - return G.symbols.lookup(sym_name); -} - -void Expr::fire_error_rvalue_expected() const { - // generally, almost all vertices are rvalue, that's why code leading to "not rvalue" - // should be very strange, like `var x = _` - throw ParseError(here, "rvalue expected"); -} +struct LValGlobs { + std::vector> globs; -void Expr::fire_error_lvalue_expected(const std::string& details) const { - // "lvalue expected" is when a user modifies something unmodifiable - // example: `f() = 32` - // example: `loadUint(c.beginParse(), 32)` (since `loadUint()` mutates the first argument) - throw ParseError(here, "lvalue expected (" + details + ")"); -} + void add_modified_glob(const GlobalVarData* g_sym, var_idx_t local_ir_idx) { + globs.emplace_back(g_sym, local_ir_idx); + } -void Expr::fire_error_modifying_immutable(const std::string& details) const { - // "modifying immutable variable" is when a user assigns to a variable declared `val` - // example: `immutable_val = 32` - // example: `(regular_var, immutable_val) = f()` - // for better error message, try to print out variable name if possible - std::string variable_name; - if (cls == _Var || cls == _Const) { - variable_name = sym->name(); - } else if (cls == _Tensor || cls == _MkTuple) { - for (const Expr* arg : (cls == _Tensor ? args : args[0]->args)) { - if (arg->is_immutable() && (arg->cls == _Var || arg->cls == _Const)) { - variable_name = arg->sym->name(); - break; - } + void gen_ops_set_globs(CodeBlob& code, SrcLocation loc) const { + for (const auto& [g_sym, ir_idx] : globs) { + Op& op = code.emplace_back(loc, Op::_SetGlob, std::vector{}, std::vector{ ir_idx }, g_sym); + op.set_impure_flag(); } } +}; - if (variable_name == "self") { - throw ParseError(here, "modifying `self` (" + details + "), which is immutable by default; probably, you want to declare `mutate self`"); - } else if (!variable_name.empty()) { - throw ParseError(here, "modifying an immutable variable `" + variable_name + "` (" + details + ")"); - } else { - throw ParseError(here, "modifying an immutable variable (" + details + ")"); - } -} +std::vector pre_compile_expr(AnyExprV v, CodeBlob& code, LValGlobs* lval_globs = nullptr); +void process_any_statement(AnyV v, CodeBlob& code); -GNU_ATTRIBUTE_COLD GNU_ATTRIBUTE_NORETURN -static void fire_error_invalid_mutate_arg_passed(SrcLocation loc, const SymDef* func_sym, const SymDef* param_sym, bool called_as_method, bool arg_passed_as_mutate, AnyV arg_expr) { - std::string func_name = func_sym->name(); - std::string arg_str(arg_expr->type == ast_identifier ? arg_expr->as()->name : "obj"); - const SymValFunc* func_val = dynamic_cast(func_sym->value); - const SymValVariable* param_val = dynamic_cast(param_sym->value); - // case: `loadInt(cs, 32)`; suggest: `cs.loadInt(32)` - if (param_val->is_mutate_parameter() && !arg_passed_as_mutate && !called_as_method && param_val->idx == 0 && func_val->does_accept_self()) { - throw ParseError(loc, "`" + func_name + "` is a mutating method; consider calling `" + arg_str + "." + func_name + "()`, not `" + func_name + "(" + arg_str + ")`"); +static std::vector> pre_compile_tensor_inner(CodeBlob& code, const std::vector& args, + LValGlobs* lval_globs) { + const int n = static_cast(args.size()); + if (n == 0) { // just `()` + return {}; } - // case: `cs.mutating_function()`; suggest: `mutating_function(mutate cs)` or make it a method - if (param_val->is_mutate_parameter() && called_as_method && param_val->idx == 0 && !func_val->does_accept_self()) { - throw ParseError(loc, "function `" + func_name + "` mutates parameter `" + param_sym->name() + "`; consider calling `" + func_name + "(mutate " + arg_str + ")`, not `" + arg_str + "." + func_name + "`(); alternatively, rename parameter to `self` to make it a method"); + if (n == 1) { // just `(x)`: even if x is modified (e.g. `f(x=x+2)`), there are no next arguments + return {pre_compile_expr(args[0], code, lval_globs)}; } - // case: `mutating_function(arg)`; suggest: `mutate arg` - if (param_val->is_mutate_parameter() && !arg_passed_as_mutate) { - throw ParseError(loc, "function `" + func_name + "` mutates parameter `" + param_sym->name() + "`; you need to specify `mutate` when passing an argument, like `mutate " + arg_str + "`"); - } - // case: `usual_function(mutate arg)` - if (!param_val->is_mutate_parameter() && arg_passed_as_mutate) { - throw ParseError(loc, "incorrect `mutate`, since `" + func_name + "` does not mutate this parameter"); - } - throw Fatal("unreachable"); -} -// parse address like "EQCRDM9h4k3UJdOePPuyX40mCgA4vxge5Dc5vjBR8djbEKC5" -// based on unpack_std_smc_addr() from block.cpp -// (which is not included to avoid linking with ton_crypto) -static bool parse_friendly_address(const char packed[48], ton::WorkchainId& workchain, ton::StdSmcAddress& addr) { - unsigned char buffer[36]; - if (!td::buff_base64_decode(td::MutableSlice{buffer, 36}, td::Slice{packed, 48}, true)) { - return false; - } - td::uint16 crc = td::crc16(td::Slice{buffer, 34}); - if (buffer[34] != (crc >> 8) || buffer[35] != (crc & 0xff) || (buffer[0] & 0x3f) != 0x11) { - return false; - } - workchain = (td::int8)buffer[1]; - std::memcpy(addr.data(), buffer + 2, 32); - return true; -} + // the purpose is to handle such cases: `return (x, x += y, x)` + // without this, ops will be { _Call $2 = +($0_x, $1_y); _Return $0_x, $2, $0_x } - invalid + // with this, ops will be { _Let $3 = $0_x; _Call $2 = ...; _Return $3, $2, $0_x } - valid, tmp var for x + // how it works: for every arg, after transforming to ops, start tracking ir_idx inside it + // on modification attempt, create Op::_Let to a tmp var and replace old ir_idx with tmp_idx in result + struct WatchingVarList { + std::vector watched_vars; + std::vector> res_lists; -// parse address like "0:527964d55cfa6eb731f4bfc07e9d025098097ef8505519e853986279bd8400d8" -// based on StdAddress::parse_addr() from block.cpp -// (which is not included to avoid linking with ton_crypto) -static bool parse_raw_address(const std::string& acc_string, int& workchain, ton::StdSmcAddress& addr) { - size_t pos = acc_string.find(':'); - if (pos != std::string::npos) { - td::Result r_wc = td::to_integer_safe(acc_string.substr(0, pos)); - if (r_wc.is_error()) { - return false; + explicit WatchingVarList(int n_args) { + res_lists.reserve(n_args); } - workchain = r_wc.move_as_ok(); - pos++; - } else { - pos = 0; - } - if (acc_string.size() != pos + 64) { - return false; - } - for (int i = 0; i < 64; ++i) { // loop through each hex digit - char c = acc_string[pos + i]; - int x; - if (c >= '0' && c <= '9') { - x = c - '0'; - } else if (c >= 'a' && c <= 'z') { - x = c - 'a' + 10; - } else if (c >= 'A' && c <= 'Z') { - x = c - 'A' + 10; - } else { - return false; + bool is_watched(var_idx_t ir_idx) const { + return std::find(watched_vars.begin(), watched_vars.end(), ir_idx) != watched_vars.end(); } - if ((i & 1) == 0) { - addr.data()[i >> 1] = static_cast((addr.data()[i >> 1] & 0x0F) | (x << 4)); - } else { - addr.data()[i >> 1] = static_cast((addr.data()[i >> 1] & 0xF0) | x); + void add_and_watch_modifications(std::vector&& vars_of_ith_arg, CodeBlob& code) { + for (var_idx_t ir_idx : vars_of_ith_arg) { + if (code.vars[ir_idx].v_sym && !is_watched(ir_idx)) { + watched_vars.emplace_back(ir_idx); + code.vars[ir_idx].on_modification.emplace_back([this, &code, ir_idx](SrcLocation loc) { + on_var_modified(ir_idx, loc, code); + }); + } + } + res_lists.emplace_back(std::move(vars_of_ith_arg)); } - } - return true; -} - -static Expr* create_expr_apply(SrcLocation loc, SymDef* sym, std::vector&& args) { - Expr* apply = new Expr(Expr::_Apply, sym, std::move(args)); - apply->here = loc; - apply->flags = Expr::_IsRvalue; - apply->deduce_type(); - return apply; -} - -static Expr* create_expr_int_const(SrcLocation loc, int int_val) { - Expr* int_const = new Expr(Expr::_Const, loc); - int_const->intval = td::make_refint(int_val); - int_const->flags = Expr::_IsRvalue; - int_const->e_type = TypeExpr::new_atomic(TypeExpr::_Int); - return int_const; -} - -namespace blk_fl { -enum { end = 1, ret = 2, empty = 4 }; -typedef int val; -constexpr val init = end | empty; -void combine(val& x, const val y) { - x |= y & ret; - x &= y | ~(end | empty); -} -void combine_parallel(val& x, const val y) { - x &= y | ~(ret | empty); - x |= y & end; -} -} // namespace blk_fl - -Expr* process_expr(AnyV v, CodeBlob& code); -blk_fl::val process_statement(AnyV v, CodeBlob& code); - -static void check_global_func(SrcLocation loc, sym_idx_t func_name) { - SymDef* sym_def = lookup_symbol(func_name); - if (!sym_def) { - throw ParseError(loc, "undefined symbol `" + G.symbols.get_name(func_name) + "`"); - } -} -static void check_import_exists_when_using_sym(AnyV v_usage, const SymDef* used_sym) { - if (!v_usage->loc.is_symbol_from_same_or_builtin_file(used_sym->loc)) { - const SrcFile* declared_in = used_sym->loc.get_src_file(); - bool has_import = false; - for (const SrcFile::ImportStatement& import_stmt : v_usage->loc.get_src_file()->imports) { - if (import_stmt.imported_file == declared_in) { - has_import = true; + void on_var_modified(var_idx_t ir_idx, SrcLocation loc, CodeBlob& code) { + tolk_assert(is_watched(ir_idx)); + var_idx_t tmp_idx = code.create_tmp_var(code.vars[ir_idx].v_type, loc); + code.emplace_back(loc, Op::_Let, std::vector{tmp_idx}, std::vector{ir_idx}); + for (std::vector& prev_vars : res_lists) { + std::replace(prev_vars.begin(), prev_vars.end(), ir_idx, tmp_idx); } } - if (!has_import) { - v_usage->error("Using a non-imported symbol `" + used_sym->name() + "`. Forgot to import \"" + declared_in->rel_filename + "\"?"); - } - } -} -static Expr* create_new_local_variable(SrcLocation loc, std::string_view var_name, TypeExpr* var_type, bool is_immutable) { - SymDef* sym = lookup_symbol(calc_sym_idx(var_name)); - if (sym) { // creating a new variable, but something found in symtable - if (sym->level != G.scope_level) { - sym = nullptr; // declaring a new variable with the same name, but in another scope - } else { - throw ParseError(loc, "redeclaration of local variable `" + static_cast(var_name) + "`"); + std::vector> clear_and_stop_watching(CodeBlob& code) { + for (var_idx_t ir_idx : watched_vars) { + code.vars[ir_idx].on_modification.pop_back(); + } + watched_vars.clear(); + return std::move(res_lists); } + }; + + WatchingVarList watched_vars(n); + for (int arg_idx = 0; arg_idx < n; ++arg_idx) { + std::vector vars_of_ith_arg = pre_compile_expr(args[arg_idx], code, lval_globs); + watched_vars.add_and_watch_modifications(std::move(vars_of_ith_arg), code); } - Expr* x = new Expr{Expr::_Var, loc}; - x->val = ~calc_sym_idx(var_name); - x->e_type = var_type; - x->flags = Expr::_IsLvalue | (is_immutable ? Expr::_IsImmutable : 0); - return x; + return watched_vars.clear_and_stop_watching(code); } -static Expr* create_new_underscore_variable(SrcLocation loc, TypeExpr* var_type) { - Expr* x = new Expr{Expr::_Hole, loc}; - x->val = -1; - x->flags = Expr::_IsLvalue; - x->e_type = var_type; - return x; +static std::vector pre_compile_tensor(CodeBlob& code, const std::vector& args, + LValGlobs* lval_globs = nullptr) { + std::vector> res_lists = pre_compile_tensor_inner(code, args, lval_globs); + std::vector res; + for (const std::vector& list : res_lists) { + res.insert(res.end(), list.cbegin(), list.cend()); + } + return res; } -static Expr* process_expr(V v, CodeBlob& code) { - TokenType t = v->tok; - std::string operator_name = static_cast(v->operator_name); - - if (t == tok_set_plus || t == tok_set_minus || t == tok_set_mul || t == tok_set_div || - t == tok_set_mod || t == tok_set_lshift || t == tok_set_rshift || - t == tok_set_bitwise_and || t == tok_set_bitwise_or || t == tok_set_bitwise_xor) { - Expr* x = process_expr(v->get_lhs(), code); - x->chk_rvalue(); - if (!x->is_lvalue()) { - x->fire_error_lvalue_expected("left side of assignment"); - } - if (x->is_immutable()) { - x->fire_error_modifying_immutable("left side of assignment"); +static std::vector pre_compile_let(CodeBlob& code, AnyExprV lhs, AnyExprV rhs, SrcLocation loc) { + // [lhs] = [rhs]; since type checking is ok, it's the same as "lhs = rhs" + if (lhs->type == ast_typed_tuple && rhs->type == ast_typed_tuple) { + std::vector right = pre_compile_tensor(code, rhs->as()->get_items()); + LValGlobs globs; + std::vector left = pre_compile_tensor(code, lhs->as()->get_items(), &globs); + code.on_var_modification(left, loc); + code.emplace_back(loc, Op::_Let, std::move(left), right); + globs.gen_ops_set_globs(code, loc); + return right; + } + // [lhs] = rhs; it's un-tuple to N left vars + if (lhs->type == ast_typed_tuple) { + std::vector right = pre_compile_expr(rhs, code); + const TypeDataTypedTuple* inferred_tuple = rhs->inferred_type->try_as(); + std::vector types_list = inferred_tuple->items; + std::vector rvect = {code.create_tmp_var(TypeDataTensor::create(std::move(types_list)), rhs->loc)}; + code.emplace_back(lhs->loc, Op::_UnTuple, rvect, std::move(right)); + LValGlobs globs; + std::vector left = pre_compile_tensor(code, lhs->as()->get_items(), &globs); + code.on_var_modification(left, loc); + code.emplace_back(loc, Op::_Let, std::move(left), rvect); + globs.gen_ops_set_globs(code, loc); + return rvect; + } + // lhs = rhs + std::vector right = pre_compile_expr(rhs, code); + LValGlobs globs; + std::vector left = pre_compile_expr(lhs, code, &globs); + code.on_var_modification(left, loc); + code.emplace_back(loc, Op::_Let, std::move(left), right); + globs.gen_ops_set_globs(code, loc); + return right; +} + +static std::vector gen_op_call(CodeBlob& code, TypePtr ret_type, SrcLocation here, + std::vector&& args_vars, const FunctionData* fun_ref) { + std::vector rvect = {code.create_tmp_var(ret_type, here)}; + Op& op = code.emplace_back(here, Op::_Call, rvect, std::move(args_vars), fun_ref); + if (!fun_ref->is_marked_as_pure()) { + op.set_impure_flag(); + } + return rvect; +} + + +static std::vector process_symbol(SrcLocation loc, const Symbol* sym, CodeBlob& code, LValGlobs* lval_globs) { + if (const auto* glob_ref = sym->try_as()) { + std::vector rvect = {code.create_tmp_var(glob_ref->declared_type, loc)}; + if (lval_globs) { + lval_globs->add_modified_glob(glob_ref, rvect[0]); + return rvect; + } else { + code.emplace_back(loc, Op::_GlobVar, rvect, std::vector{}, glob_ref); + return rvect; } - SymDef* sym = lookup_symbol(calc_sym_idx("^_" + operator_name + "_")); - Expr* y = process_expr(v->get_rhs(), code); - y->chk_rvalue(); - Expr* z = create_expr_apply(v->loc, sym, {x, y}); - Expr* res = new Expr{Expr::_Letop, {x->copy(), z}}; - res->here = v->loc; - res->flags = x->flags | Expr::_IsRvalue; - res->deduce_type(); - return res; } - if (t == tok_assign) { - Expr* x = process_expr(v->get_lhs(), code); - if (!x->is_lvalue()) { - x->fire_error_lvalue_expected("left side of assignment"); - } - if (x->is_immutable()) { - x->fire_error_modifying_immutable("left side of assignment"); + if (const auto* const_ref = sym->try_as()) { + if (const_ref->is_int_const()) { + std::vector rvect = {code.create_tmp_var(TypeDataInt::create(), loc)}; + code.emplace_back(loc, Op::_IntConst, rvect, const_ref->as_int_const()); + return rvect; + } else { + std::vector rvect = {code.create_tmp_var(TypeDataSlice::create(), loc)}; + code.emplace_back(loc, Op::_SliceConst, rvect, const_ref->as_slice_const()); + return rvect; } - Expr* y = process_expr(v->get_rhs(), code); - y->chk_rvalue(); - x->predefine_vars(); - x->define_new_vars(code); - Expr* res = new Expr{Expr::_Letop, {x, y}}; - res->here = v->loc; - res->flags = x->flags | Expr::_IsRvalue; - res->deduce_type(); - return res; } - if (t == tok_minus || t == tok_plus || - t == tok_bitwise_and || t == tok_bitwise_or || t == tok_bitwise_xor || - t == tok_eq || t == tok_lt || t == tok_gt || t == tok_leq || t == tok_geq || t == tok_neq || t == tok_spaceship || - t == tok_lshift || t == tok_rshift || t == tok_rshiftC || t == tok_rshiftR || - t == tok_mul || t == tok_div || t == tok_mod || t == tok_divC || t == tok_divR) { - Expr* res = process_expr(v->get_lhs(), code); - res->chk_rvalue(); - SymDef* sym = lookup_symbol(calc_sym_idx("_" + operator_name + "_")); - Expr* x = process_expr(v->get_rhs(), code); - x->chk_rvalue(); - res = create_expr_apply(v->loc, sym, {res, x}); - return res; + if (const auto* fun_ref = sym->try_as()) { + std::vector rvect = {code.create_tmp_var(fun_ref->inferred_full_type, loc)}; + code.emplace_back(loc, Op::_GlobVar, rvect, std::vector{}, fun_ref); + return rvect; } - if (t == tok_logical_and || t == tok_logical_or) { - // do the following transformations: - // a && b -> a ? (b != 0) : 0 - // a || b -> a ? 1 : (b != 0) - SymDef* sym_neq = lookup_symbol(calc_sym_idx("_!=_")); - Expr* lhs = process_expr(v->get_lhs(), code); - Expr* rhs = process_expr(v->get_rhs(), code); - Expr* e_neq0 = create_expr_apply(v->loc, sym_neq, {rhs, create_expr_int_const(v->loc, 0)}); - Expr* e_when_true = t == tok_logical_and ? e_neq0 : create_expr_int_const(v->loc, -1); - Expr* e_when_false = t == tok_logical_and ? create_expr_int_const(v->loc, 0) : e_neq0; - Expr* e_ternary = new Expr(Expr::_CondExpr, {lhs, e_when_true, e_when_false}); - e_ternary->here = v->loc; - e_ternary->flags = Expr::_IsRvalue; - e_ternary->deduce_type(); - return e_ternary; + if (const auto* var_ref = sym->try_as()) { + return {var_ref->idx}; } - - v->error("unsupported binary operator"); + throw Fatal("process_symbol"); } -static Expr* process_expr(V v, CodeBlob& code) { - TokenType t = v->tok; - SymDef* sym = lookup_symbol(calc_sym_idx(static_cast(v->operator_name) + "_")); - Expr* x = process_expr(v->get_rhs(), code); - x->chk_rvalue(); - - // here's an optimization to convert "-1" (tok_minus tok_int_const) to a const -1, not to Expr::Apply(-,1) - // without this, everything still works, but Tolk looses some vars/stack knowledge for now (to be fixed later) - // in FunC, it was: - // `var fst = -1;` // is constantly 1 - // `var snd = - 1;` // is Expr::Apply(-), a comment "snd=1" is lost in stack layout comments, and so on - // hence, when after grammar modification tok_minus became a true unary operator (not a part of a number), - // and thus to preserve existing behavior until compiler parts are completely rewritten, handle this case here - if (t == tok_minus && x->cls == Expr::_Const) { - x->intval = -x->intval; - if (!x->intval->signed_fits_bits(257)) { - v->error("integer overflow"); - } - return x; - } - if (t == tok_plus && x->cls == Expr::_Const) { - return x; +static std::vector process_assign(V v, CodeBlob& code) { + if (auto lhs_decl = v->get_lhs()->try_as()) { + return pre_compile_let(code, lhs_decl->get_expr(), v->get_rhs(), v->loc); + } else { + return pre_compile_let(code, v->get_lhs(), v->get_rhs(), v->loc); } - - return create_expr_apply(v->loc, sym, {x}); } -static Expr* process_expr(V v, CodeBlob& code) { - Expr* cond = process_expr(v->get_cond(), code); - cond->chk_rvalue(); - Expr* x = process_expr(v->get_when_true(), code); - x->chk_rvalue(); - Expr* y = process_expr(v->get_when_false(), code); - y->chk_rvalue(); - Expr* res = new Expr{Expr::_CondExpr, {cond, x, y}}; - res->here = v->loc; - res->flags = Expr::_IsRvalue; - res->deduce_type(); - return res; +static std::vector process_set_assign(V v, CodeBlob& code) { + // for "a += b", emulate "a = a + b" + // seems not beautiful, but it works; probably, this transformation should be done at AST level in advance + std::string_view calc_operator = v->operator_name; // "+" for operator += + auto v_apply = createV(v->loc, calc_operator, static_cast(v->tok - 1), v->get_lhs(), v->get_rhs()); + v_apply->assign_inferred_type(v->inferred_type); + v_apply->assign_fun_ref(v->fun_ref); + return pre_compile_let(code, v->get_lhs(), v_apply, v->loc); } -static Expr* process_function_arguments(SymDef* func_sym, V v, Expr* lhs_of_dot_call, CodeBlob& code) { - SymValFunc* func_val = dynamic_cast(func_sym->value); - int delta_self = lhs_of_dot_call ? 1 : 0; - int n_arguments = static_cast(v->get_arguments().size()) + delta_self; - int n_parameters = static_cast(func_val->parameters.size()); - - // Tolk doesn't have optional parameters currently, so just compare counts - if (n_parameters < n_arguments) { - v->error("too many arguments in call to `" + func_sym->name() + "`, expected " + std::to_string(n_parameters - delta_self) + ", have " + std::to_string(n_arguments - delta_self)); - } - if (n_arguments < n_parameters) { - v->error("too few arguments in call to `" + func_sym->name() + "`, expected " + std::to_string(n_parameters - delta_self) + ", have " + std::to_string(n_arguments - delta_self)); - } - - std::vector apply_args; - apply_args.reserve(n_arguments); - if (lhs_of_dot_call) { - apply_args.push_back(lhs_of_dot_call); - } - for (int i = delta_self; i < n_arguments; ++i) { - auto v_arg = v->get_arg(i - delta_self); - if (SymDef* param_sym = func_val->parameters[i]) { // can be null (for underscore parameter) - SymValVariable* param_val = dynamic_cast(param_sym->value); - if (param_val->is_mutate_parameter() != v_arg->passed_as_mutate) { - fire_error_invalid_mutate_arg_passed(v_arg->loc, func_sym, param_sym, false, v_arg->passed_as_mutate, v_arg->get_expr()); - } - } - - Expr* arg = process_expr(v_arg->get_expr(), code); - arg->chk_rvalue(); - apply_args.push_back(arg); - } - - Expr* apply = new Expr{Expr::_Apply, func_sym, std::move(apply_args)}; - apply->flags = Expr::_IsRvalue | (!func_val->is_marked_as_pure() * Expr::_IsImpure); - apply->here = v->loc; - apply->deduce_type(); - - return apply; -} +static std::vector process_binary_operator(V v, CodeBlob& code) { + TokenType t = v->tok; -static Expr* process_function_call(V v, CodeBlob& code) { - // special error for "null()" which is a FunC syntax - if (v->get_called_f()->type == ast_null_keyword) { - v->error("null is not a function: use `null`, not `null()`"); + if (v->fun_ref) { // almost all operators, fun_ref was assigned at type inferring + std::vector args_vars = pre_compile_tensor(code, {v->get_lhs(), v->get_rhs()}); + return gen_op_call(code, v->inferred_type, v->loc, std::move(args_vars), v->fun_ref); } - - // most likely it's a global function, but also may be `some_var(args)` or even `getF()(args)` - Expr* lhs = process_expr(v->get_called_f(), code); - if (lhs->cls != Expr::_GlobFunc) { - Expr* tensor_arg = new Expr(Expr::_Tensor, v->loc); - std::vector type_list; - type_list.reserve(v->get_num_args()); + if (t == tok_logical_and || t == tok_logical_or) { + // do the following transformations: + // a && b -> a ? (b != 0) : 0 + // a || b -> a ? 1 : (b != 0) + AnyExprV v_0 = createV(v->loc, td::make_refint(0), "0"); + v_0->mutate()->assign_inferred_type(TypeDataInt::create()); + AnyExprV v_1 = createV(v->loc, td::make_refint(-1), "-1"); + v_1->mutate()->assign_inferred_type(TypeDataInt::create()); + auto v_b_ne_0 = createV(v->loc, "!=", tok_neq, v->get_rhs(), v_0); + v_b_ne_0->mutate()->assign_inferred_type(TypeDataInt::create()); + v_b_ne_0->mutate()->assign_fun_ref(lookup_global_symbol("_!=_")->as()); + std::vector cond = pre_compile_expr(v->get_lhs(), code); + tolk_assert(cond.size() == 1); + std::vector rvect = {code.create_tmp_var(v->inferred_type, v->loc)}; + Op& if_op = code.emplace_back(v->loc, Op::_If, cond); + code.push_set_cur(if_op.block0); + code.emplace_back(v->loc, Op::_Let, rvect, pre_compile_expr(t == tok_logical_and ? v_b_ne_0 : v_1, code)); + code.close_pop_cur(v->loc); + code.push_set_cur(if_op.block1); + code.emplace_back(v->loc, Op::_Let, rvect, pre_compile_expr(t == tok_logical_and ? v_0 : v_b_ne_0, code)); + code.close_pop_cur(v->loc); + return rvect; + } + + throw UnexpectedASTNodeType(v, "process_binary_operator"); +} + +static std::vector process_unary_operator(V v, CodeBlob& code) { + std::vector args_vars = pre_compile_tensor(code, {v->get_rhs()}); + return gen_op_call(code, v->inferred_type, v->loc, std::move(args_vars), v->fun_ref); +} + +static std::vector process_ternary_operator(V v, CodeBlob& code) { + std::vector cond = pre_compile_expr(v->get_cond(), code); + tolk_assert(cond.size() == 1); + std::vector rvect = {code.create_tmp_var(v->inferred_type, v->loc)}; + Op& if_op = code.emplace_back(v->loc, Op::_If, cond); + code.push_set_cur(if_op.block0); + code.emplace_back(v->get_when_true()->loc, Op::_Let, rvect, pre_compile_expr(v->get_when_true(), code)); + code.close_pop_cur(v->get_when_true()->loc); + code.push_set_cur(if_op.block1); + code.emplace_back(v->get_when_false()->loc, Op::_Let, rvect, pre_compile_expr(v->get_when_false(), code)); + code.close_pop_cur(v->get_when_false()->loc); + return rvect; +} + +static std::vector process_dot_access(V v, CodeBlob& code, LValGlobs* lval_globs) { + // it's NOT a method call `t.tupleSize()` (since such cases are handled by process_function_call) + // it's `t.0`, `getUser().id`, and `t.tupleSize` (as a reference, not as a call) + // currently, nothing except a global function can be a target of dot access + const FunctionData* fun_ref = v->target; + tolk_assert(fun_ref); + return process_symbol(v->loc, fun_ref, code, lval_globs); +} + +static std::vector process_function_call(V v, CodeBlob& code) { + // v is `globalF(args)` / `globalF(args)` / `obj.method(args)` / `local_var(args)` / `getF()(args)` + const FunctionData* fun_ref = v->fun_maybe; + if (!fun_ref) { + std::vector args; + args.reserve(v->get_num_args()); for (int i = 0; i < v->get_num_args(); ++i) { - auto v_arg = v->get_arg(i); - if (v_arg->passed_as_mutate) { - v_arg->error("`mutate` used for non-mutate argument"); + args.push_back(v->get_arg(i)->get_expr()); + } + std::vector args_vars = pre_compile_tensor(code, args); + std::vector tfunc = pre_compile_expr(v->get_callee(), code); + tolk_assert(tfunc.size() == 1); + args_vars.push_back(tfunc[0]); + std::vector rvect = {code.create_tmp_var(v->inferred_type, v->loc)}; + Op& op = code.emplace_back(v->loc, Op::_CallInd, rvect, std::move(args_vars)); + op.set_impure_flag(); + return rvect; + } + + int delta_self = v->is_dot_call(); + AnyExprV obj_leftmost = nullptr; + std::vector args; + args.reserve(delta_self + v->get_num_args()); + if (delta_self) { + args.push_back(v->get_dot_obj()); + obj_leftmost = v->get_dot_obj(); + while (obj_leftmost->type == ast_function_call && obj_leftmost->as()->is_dot_call() && obj_leftmost->as()->fun_maybe && obj_leftmost->as()->fun_maybe->does_return_self()) { + obj_leftmost = obj_leftmost->as()->get_dot_obj(); + } + } + for (int i = 0; i < v->get_num_args(); ++i) { + args.push_back(v->get_arg(i)->get_expr()); + } + std::vector> vars_per_arg = pre_compile_tensor_inner(code, args, nullptr); + + TypePtr op_call_type = v->inferred_type; + TypePtr real_ret_type = v->inferred_type; + if (delta_self && fun_ref->does_return_self()) { + real_ret_type = TypeDataVoid::create(); + if (!fun_ref->parameters[0].is_mutate_parameter()) { + op_call_type = TypeDataVoid::create(); + } + } + if (fun_ref->has_mutate_params()) { + std::vector types_list; + for (int i = 0; i < delta_self + v->get_num_args(); ++i) { + if (fun_ref->parameters[i].is_mutate_parameter()) { + types_list.push_back(args[i]->inferred_type); } - Expr* arg = process_expr(v_arg->get_expr(), code); - arg->chk_rvalue(); - tensor_arg->pb_arg(arg); - type_list.push_back(arg->e_type); } - tensor_arg->flags = Expr::_IsRvalue; - tensor_arg->e_type = TypeExpr::new_tensor(std::move(type_list)); - - Expr* var_apply = new Expr{Expr::_VarApply, {lhs, tensor_arg}}; - var_apply->here = v->loc; - var_apply->flags = Expr::_IsRvalue; - var_apply->deduce_type(); - return var_apply; + types_list.push_back(real_ret_type); + op_call_type = TypeDataTensor::create(std::move(types_list)); } - Expr* apply = process_function_arguments(lhs->sym, v->get_arg_list(), nullptr, code); - - if (dynamic_cast(apply->sym->value)->has_mutate_params()) { - const std::vector& args = apply->args; - SymValFunc* func_val = dynamic_cast(apply->sym->value); - tolk_assert(func_val->parameters.size() == args.size()); - Expr* grabbed_vars = new Expr(Expr::_Tensor, v->loc); - std::vector type_list; - for (int i = 0; i < static_cast(args.size()); ++i) { - SymDef* param_def = func_val->parameters[i]; - if (param_def && dynamic_cast(param_def->value)->is_mutate_parameter()) { - if (!args[i]->is_lvalue()) { - args[i]->fire_error_lvalue_expected("call a mutating function"); - } - if (args[i]->is_immutable()) { - args[i]->fire_error_modifying_immutable("call a mutating function"); - } - grabbed_vars->pb_arg(args[i]->copy()); - type_list.emplace_back(args[i]->e_type); - } - } - grabbed_vars->flags = Expr::_IsRvalue; - Expr* grab_mutate = new Expr(Expr::_GrabMutatedVars, apply->sym, {apply, grabbed_vars}); - grab_mutate->here = v->loc; - grab_mutate->flags = apply->flags; - grab_mutate->deduce_type(); - return grab_mutate; + std::vector args_vars; + for (const std::vector& list : vars_per_arg) { + args_vars.insert(args_vars.end(), list.cbegin(), list.cend()); } + std::vector rvect_apply = gen_op_call(code, op_call_type, v->loc, std::move(args_vars), fun_ref); - return apply; -} - -static Expr* process_dot_method_call(V v, CodeBlob& code) { - sym_idx_t name_idx = calc_sym_idx(v->method_name); - check_global_func(v->loc, name_idx); - SymDef* func_sym = lookup_symbol(name_idx); - SymValFunc* func_val = dynamic_cast(func_sym->value); - tolk_assert(func_val != nullptr); - - Expr* obj = process_expr(v->get_obj(), code); - obj->chk_rvalue(); - - if (func_val->parameters.empty()) { - v->error("`" + func_sym->name() + "` has no parameters and can not be called as method"); - } - if (!func_val->does_accept_self() && func_val->parameters[0] && dynamic_cast(func_val->parameters[0]->value)->is_mutate_parameter()) { - fire_error_invalid_mutate_arg_passed(v->loc, func_sym, func_val->parameters[0], true, false, v->get_obj()); - } - - Expr* apply = process_function_arguments(func_sym, v->get_arg_list(), obj, code); - - Expr* obj_lval = apply->args[0]; - if (!obj_lval->is_lvalue()) { - if (obj_lval->cls == Expr::_ReturnSelf) { - obj_lval = obj_lval->args[1]; - } else { - Expr* tmp_var = create_new_underscore_variable(v->loc, obj_lval->e_type); - tmp_var->define_new_vars(code); - Expr* assign_to_tmp_var = new Expr(Expr::_Letop, {tmp_var, obj_lval}); - assign_to_tmp_var->here = v->loc; - assign_to_tmp_var->flags = Expr::_IsRvalue; - assign_to_tmp_var->deduce_type(); - apply->args[0] = assign_to_tmp_var; - obj_lval = tmp_var; - } - } - - if (func_val->has_mutate_params()) { - tolk_assert(func_val->parameters.size() == apply->args.size()); - Expr* grabbed_vars = new Expr(Expr::_Tensor, v->loc); - std::vector type_list; - for (int i = 0; i < static_cast(apply->args.size()); ++i) { - SymDef* param_sym = func_val->parameters[i]; - if (param_sym && dynamic_cast(param_sym->value)->is_mutate_parameter()) { - Expr* ith_arg = apply->args[i]; - if (ith_arg->is_immutable()) { - ith_arg->fire_error_modifying_immutable("call a mutating method"); - } - - Expr* var_to_mutate = nullptr; - if (ith_arg->is_lvalue()) { - var_to_mutate = ith_arg->copy(); - } else if (i == 0) { - var_to_mutate = obj_lval; + if (fun_ref->has_mutate_params()) { + LValGlobs local_globs; + std::vector left; + for (int i = 0; i < delta_self + v->get_num_args(); ++i) { + if (fun_ref->parameters[i].is_mutate_parameter()) { + AnyExprV arg_i = obj_leftmost && i == 0 ? obj_leftmost : args[i]; + tolk_assert(arg_i->is_lvalue || i == 0); + if (arg_i->is_lvalue) { + std::vector ith_var_idx = pre_compile_expr(arg_i, code, &local_globs); + left.insert(left.end(), ith_var_idx.begin(), ith_var_idx.end()); } else { - ith_arg->fire_error_lvalue_expected("call a mutating method"); + left.insert(left.end(), vars_per_arg[0].begin(), vars_per_arg[0].end()); } - tolk_assert(var_to_mutate->is_lvalue() && !var_to_mutate->is_immutable()); - grabbed_vars->pb_arg(var_to_mutate); - type_list.emplace_back(var_to_mutate->e_type); } } - grabbed_vars->flags = Expr::_IsRvalue; - - Expr* grab_mutate = new Expr(Expr::_GrabMutatedVars, func_sym, {apply, grabbed_vars}); - grab_mutate->here = v->loc; - grab_mutate->flags = apply->flags; - grab_mutate->deduce_type(); - - apply = grab_mutate; + std::vector rvect = {code.create_tmp_var(real_ret_type, v->loc)}; + left.push_back(rvect[0]); + code.on_var_modification(left, v->loc); + code.emplace_back(v->loc, Op::_Let, std::move(left), rvect_apply); + local_globs.gen_ops_set_globs(code, v->loc); + rvect_apply = rvect; } - if (func_val->does_return_self()) { - Expr* self_arg = obj_lval; - tolk_assert(self_arg->is_lvalue()); - - Expr* return_self = new Expr(Expr::_ReturnSelf, func_sym, {apply, self_arg}); - return_self->here = v->loc; - return_self->flags = Expr::_IsRvalue; - return_self->deduce_type(); - - apply = return_self; + if (obj_leftmost && fun_ref->does_return_self()) { + if (obj_leftmost->is_lvalue) { // to handle if obj is global var, potentially re-assigned inside a chain + rvect_apply = pre_compile_expr(obj_leftmost, code); + } else { // temporary object, not lvalue, pre_compile_expr + rvect_apply = vars_per_arg[0]; + } } - return apply; + return rvect_apply; } -static Expr* process_expr(V v, CodeBlob& code) { - if (v->empty()) { - Expr* res = new Expr{Expr::_Tensor, {}}; - res->flags = Expr::_IsRvalue; - res->here = v->loc; - res->e_type = TypeExpr::new_unit(); - return res; - } - - Expr* res = process_expr(v->get_item(0), code); - std::vector type_list; - type_list.push_back(res->e_type); - int f = res->flags; - res = new Expr{Expr::_Tensor, {res}}; - for (int i = 1; i < v->size(); ++i) { - Expr* x = process_expr(v->get_item(i), code); - res->pb_arg(x); - f &= (x->flags | Expr::_IsImmutable); - f |= (x->flags & Expr::_IsImmutable); - type_list.push_back(x->e_type); - } - res->here = v->loc; - res->flags = f; - res->e_type = TypeExpr::new_tensor(std::move(type_list)); - return res; +static std::vector process_tensor(V v, CodeBlob& code, LValGlobs* lval_globs) { + return pre_compile_tensor(code, v->get_items(), lval_globs); } -static Expr* process_expr(V v, CodeBlob& code) { - if (v->empty()) { - Expr* res = new Expr{Expr::_Tensor, {}}; - res->flags = Expr::_IsRvalue; - res->here = v->loc; - res->e_type = TypeExpr::new_unit(); - res = new Expr{Expr::_MkTuple, {res}}; - res->flags = Expr::_IsRvalue; - res->here = v->loc; - res->e_type = TypeExpr::new_tuple(res->args.at(0)->e_type); - return res; - } - - Expr* res = process_expr(v->get_item(0), code); - std::vector type_list; - type_list.push_back(res->e_type); - int f = res->flags; - res = new Expr{Expr::_Tensor, {res}}; - for (int i = 1; i < v->size(); ++i) { - Expr* x = process_expr(v->get_item(i), code); - res->pb_arg(x); - f &= (x->flags | Expr::_IsImmutable); - f |= (x->flags & Expr::_IsImmutable); - type_list.push_back(x->e_type); +static std::vector process_typed_tuple(V v, CodeBlob& code, LValGlobs* lval_globs) { + if (lval_globs) { // todo some time, make "var (a, [b,c]) = (1, [2,3])" work + v->error("[...] can not be used as lvalue here"); } - res->here = v->loc; - res->flags = f; - res->e_type = TypeExpr::new_tensor(std::move(type_list), false); - res = new Expr{Expr::_MkTuple, {res}}; - res->flags = f; - res->here = v->loc; - res->e_type = TypeExpr::new_tuple(res->args.at(0)->e_type); - return res; + std::vector left = std::vector{code.create_tmp_var(v->inferred_type, v->loc)}; + std::vector right = pre_compile_tensor(code, v->get_items()); + code.emplace_back(v->loc, Op::_Tuple, left, std::move(right)); + return left; } -static Expr* process_expr(V v) { - Expr* res = new Expr{Expr::_Const, v->loc}; - res->flags = Expr::_IsRvalue; - res->intval = td::string_to_int256(static_cast(v->int_val)); - if (res->intval.is_null() || !res->intval->signed_fits_bits(257)) { - v->error("invalid integer constant"); - } - res->e_type = TypeExpr::new_atomic(TypeExpr::_Int); - return res; +static std::vector process_int_const(V v, CodeBlob& code) { + std::vector rvect = {code.create_tmp_var(v->inferred_type, v->loc)}; + code.emplace_back(v->loc, Op::_IntConst, rvect, v->intval); + return rvect; } -static Expr* process_expr(V v) { - std::string str = static_cast(v->str_val); - Expr* res; - switch (v->modifier) { - case 0: - case 's': - case 'a': - res = new Expr{Expr::_SliceConst, v->loc}; - res->e_type = TypeExpr::new_atomic(TypeExpr::_Slice); - break; - case 'u': - case 'h': - case 'H': - case 'c': - res = new Expr{Expr::_Const, v->loc}; - res->e_type = TypeExpr::new_atomic(TypeExpr::_Int); - break; - default: - v->error("invalid string modifier '" + std::string(1, v->modifier) + "'"); - } - res->flags = Expr::_IsRvalue; - switch (v->modifier) { - case 0: { - res->strval = td::hex_encode(str); - break; - } - case 's': { - res->strval = str; - unsigned char buff[128]; - int bits = (int)td::bitstring::parse_bitstring_hex_literal(buff, sizeof(buff), str.data(), str.data() + str.size()); - if (bits < 0) { - v->error("invalid hex bitstring constant '" + str + "'"); - } - break; - } - case 'a': { // MsgAddress - int workchain; - ton::StdSmcAddress addr; - bool correct = (str.size() == 48 && parse_friendly_address(str.data(), workchain, addr)) || - (str.size() != 48 && parse_raw_address(str, workchain, addr)); - if (!correct) { - v->error("invalid standard address '" + str + "'"); - } - if (workchain < -128 || workchain >= 128) { - v->error("anycast addresses not supported"); - } - - unsigned char data[3 + 8 + 256]; // addr_std$10 anycast:(Maybe Anycast) workchain_id:int8 address:bits256 = MsgAddressInt; - td::bitstring::bits_store_long_top(data, 0, static_cast(4) << (64 - 3), 3); - td::bitstring::bits_store_long_top(data, 3, static_cast(workchain) << (64 - 8), 8); - td::bitstring::bits_memcpy(data, 3 + 8, addr.bits().ptr, 0, addr.size()); - res->strval = td::BitSlice{data, sizeof(data)}.to_hex(); - break; - } - case 'u': { - res->intval = td::hex_string_to_int256(td::hex_encode(str)); - if (str.empty()) { - v->error("empty integer ascii-constant"); - } - if (res->intval.is_null()) { - v->error("too long integer ascii-constant"); - } - break; - } - case 'h': - case 'H': { - unsigned char hash[32]; - digest::hash_str(hash, str.data(), str.size()); - res->intval = td::bits_to_refint(hash, (v->modifier == 'h') ? 32 : 256, false); - break; - } - case 'c': { - res->intval = td::make_refint(td::crc32(td::Slice{str})); - break; - } - default: - tolk_assert(false); +static std::vector process_string_const(V v, CodeBlob& code) { + ConstantValue value = eval_const_init_value(v); + std::vector rvect = {code.create_tmp_var(v->inferred_type, v->loc)}; + if (value.is_int()) { + code.emplace_back(v->loc, Op::_IntConst, rvect, value.as_int()); + } else { + code.emplace_back(v->loc, Op::_SliceConst, rvect, value.as_slice()); } - return res; + return rvect; } -static Expr* process_expr(V v) { - SymDef* builtin_sym = lookup_symbol(calc_sym_idx(v->bool_val ? "__true" : "__false")); - return create_expr_apply(v->loc, builtin_sym, {}); +static std::vector process_bool_const(V v, CodeBlob& code) { + const FunctionData* builtin_sym = lookup_global_symbol(v->bool_val ? "__true" : "__false")->as(); + return gen_op_call(code, v->inferred_type, v->loc, {}, builtin_sym); } -static Expr* process_expr(V v) { - SymDef* builtin_sym = lookup_symbol(calc_sym_idx("__null")); - return create_expr_apply(v->loc, builtin_sym, {}); +static std::vector process_null_keyword(V v, CodeBlob& code) { + const FunctionData* builtin_sym = lookup_global_symbol("__null")->as(); + return gen_op_call(code, v->inferred_type, v->loc, {}, builtin_sym); } -static Expr* process_expr(V v, CodeBlob& code) { - if (!code.func_val->does_accept_self()) { - v->error("using `self` in a non-member function (it does not accept the first `self` parameter)"); +static std::vector process_local_var(V v, CodeBlob& code) { + if (v->marked_as_redef) { + return process_symbol(v->loc, v->var_ref, code, nullptr); } - SymDef* sym = lookup_symbol(calc_sym_idx("self")); - tolk_assert(sym); - SymValVariable* sym_val = dynamic_cast(sym->value); - Expr* res = new Expr(Expr::_Var, v->loc); - res->sym = sym; - res->val = sym_val->idx; - res->flags = Expr::_IsLvalue | Expr::_IsRvalue | (sym_val->is_immutable() ? Expr::_IsImmutable : 0); - res->e_type = sym_val->get_type(); - return res; + + tolk_assert(v->var_ref->idx == -1); + v->var_ref->mutate()->assign_idx(code.create_var(v->inferred_type, v->var_ref, v->loc)); + return {v->var_ref->idx}; } -static Expr* process_identifier(V v) { - SymDef* sym = lookup_symbol(calc_sym_idx(v->name)); - if (sym && dynamic_cast(sym->value)) { - check_import_exists_when_using_sym(v, sym); - Expr* res = new Expr{Expr::_GlobVar, v->loc}; - res->e_type = sym->value->get_type(); - res->sym = sym; - res->flags = Expr::_IsLvalue | Expr::_IsRvalue | Expr::_IsImpure; - return res; - } - if (sym && dynamic_cast(sym->value)) { - check_import_exists_when_using_sym(v, sym); - auto val = dynamic_cast(sym->value); - Expr* res = nullptr; - if (val->get_kind() == SymValConst::IntConst) { - res = new Expr{Expr::_Const, v->loc}; - res->intval = val->get_int_value(); - res->e_type = TypeExpr::new_atomic(TypeExpr::_Int); - } else if (val->get_kind() == SymValConst::SliceConst) { - res = new Expr{Expr::_SliceConst, v->loc}; - res->strval = val->get_str_value(); - res->e_type = TypeExpr::new_atomic(TypeExpr::_Slice); - } else { - v->error("invalid symbolic constant type"); - } - res->flags = Expr::_IsLvalue | Expr::_IsRvalue | Expr::_IsImmutable; - res->sym = sym; - return res; - } - if (sym && dynamic_cast(sym->value)) { - check_import_exists_when_using_sym(v, sym); - } - Expr* res = new Expr{Expr::_Var, v->loc}; - if (!sym) { - check_global_func(v->loc, calc_sym_idx(v->name)); - sym = lookup_symbol(calc_sym_idx(v->name)); - tolk_assert(sym); - } - res->sym = sym; - bool impure = false; - bool immutable = false; - if (const SymValFunc* func_val = dynamic_cast(sym->value)) { - res->e_type = func_val->get_type(); - res->cls = Expr::_GlobFunc; - impure = !func_val->is_marked_as_pure(); - } else if (const SymValVariable* var_val = dynamic_cast(sym->value)) { - tolk_assert(var_val->idx >= 0) - res->val = var_val->idx; - res->e_type = var_val->get_type(); - immutable = var_val->is_immutable(); - // std::cerr << "accessing variable " << lex.cur().str << " : " << res->e_type << std::endl; - } else { - v->error("undefined identifier '" + static_cast(v->name) + "'"); - } - // std::cerr << "accessing symbol " << lex.cur().str << " : " << res->e_type << (val->impure ? " (impure)" : " (pure)") << std::endl; - res->flags = Expr::_IsLvalue | Expr::_IsRvalue | (impure ? Expr::_IsImpure : 0) | (immutable ? Expr::_IsImmutable : 0); - res->deduce_type(); - return res; +static std::vector process_local_vars_declaration(V, CodeBlob&) { + // it can not appear as a standalone expression + // `var ... = rhs` is handled by ast_assign + tolk_assert(false); } -Expr* process_expr(AnyV v, CodeBlob& code) { +static std::vector process_underscore(V v, CodeBlob& code) { + // when _ is used as left side of assignment, like `(cs, _) = cs.loadAndReturn()` + return {code.create_tmp_var(v->inferred_type, v->loc)}; +} + +std::vector pre_compile_expr(AnyExprV v, CodeBlob& code, LValGlobs* lval_globs) { switch (v->type) { + case ast_reference: + return process_symbol(v->loc, v->as()->sym, code, lval_globs); + case ast_assign: + return process_assign(v->as(), code); + case ast_set_assign: + return process_set_assign(v->as(), code); case ast_binary_operator: - return process_expr(v->as(), code); + return process_binary_operator(v->as(), code); case ast_unary_operator: - return process_expr(v->as(), code); + return process_unary_operator(v->as(), code); case ast_ternary_operator: - return process_expr(v->as(), code); + return process_ternary_operator(v->as(), code); + case ast_cast_as_operator: + return pre_compile_expr(v->as()->get_expr(), code, lval_globs); + case ast_dot_access: + return process_dot_access(v->as(), code, lval_globs); case ast_function_call: return process_function_call(v->as(), code); - case ast_dot_method_call: - return process_dot_method_call(v->as(), code); - case ast_parenthesized_expr: - return process_expr(v->as()->get_expr(), code); + case ast_parenthesized_expression: + return pre_compile_expr(v->as()->get_expr(), code, lval_globs); case ast_tensor: - return process_expr(v->as(), code); - case ast_tensor_square: - return process_expr(v->as(), code); + return process_tensor(v->as(), code, lval_globs); + case ast_typed_tuple: + return process_typed_tuple(v->as(), code, lval_globs); case ast_int_const: - return process_expr(v->as()); + return process_int_const(v->as(), code); case ast_string_const: - return process_expr(v->as()); + return process_string_const(v->as(), code); case ast_bool_const: - return process_expr(v->as()); + return process_bool_const(v->as(), code); case ast_null_keyword: - return process_expr(v->as()); - case ast_self_keyword: - return process_expr(v->as(), code); - case ast_identifier: - return process_identifier(v->as()); + return process_null_keyword(v->as(), code); + case ast_local_var_lhs: + return process_local_var(v->as(), code); + case ast_local_vars_declaration: + return process_local_vars_declaration(v->as(), code); case ast_underscore: - return create_new_underscore_variable(v->loc, TypeExpr::new_hole()); + return process_underscore(v->as(), code); default: - throw UnexpectedASTNodeType(v, "process_expr"); + throw UnexpectedASTNodeType(v, "pre_compile_expr"); } } -static Expr* process_local_vars_lhs(AnyV v, CodeBlob& code) { - switch (v->type) { - case ast_local_var: { - auto v_var = v->as(); - if (v_var->marked_as_redef) { - Expr* redef_var = process_identifier(v_var->get_identifier()->as()); - if (redef_var->is_immutable()) { - redef_var->fire_error_modifying_immutable("left side of assignment"); - } - return redef_var; - } - TypeExpr* var_type = v_var->declared_type ? v_var->declared_type : TypeExpr::new_hole(); - if (auto v_ident = v->as()->get_identifier()->try_as()) { - return create_new_local_variable(v->loc, v_ident->name, var_type, v_var->is_immutable); - } else { - return create_new_underscore_variable(v->loc, var_type); - } - } - case ast_parenthesized_expr: - return process_local_vars_lhs(v->as()->get_expr(), code); - case ast_tensor: { - std::vector type_list; - Expr* res = new Expr{Expr::_Tensor, v->loc}; - for (AnyV item : v->as()->get_items()) { - Expr* x = process_local_vars_lhs(item, code); - res->pb_arg(x); - res->flags |= x->flags; - type_list.push_back(x->e_type); - } - res->e_type = TypeExpr::new_tensor(std::move(type_list)); - return res; - } - case ast_tensor_square: { - std::vector type_list; - Expr* res = new Expr{Expr::_Tensor, v->loc}; - for (AnyV item : v->as()->get_items()) { - Expr* x = process_local_vars_lhs(item, code); - res->pb_arg(x); - res->flags |= x->flags; - type_list.push_back(x->e_type); - } - res->e_type = TypeExpr::new_tensor(std::move(type_list)); - res = new Expr{Expr::_MkTuple, {res}}; - res->flags = res->args.at(0)->flags; - res->here = v->loc; - res->e_type = TypeExpr::new_tuple(res->args.at(0)->e_type); - return res; - } - default: - throw UnexpectedASTNodeType(v, "process_local_vars_lhs"); - } -} - -static blk_fl::val process_vertex(V v, CodeBlob& code) { - Expr* x = process_local_vars_lhs(v->get_lhs(), code); - Expr* y = process_expr(v->get_assigned_val(), code); - y->chk_rvalue(); - x->predefine_vars(); - x->define_new_vars(code); - Expr* res = new Expr{Expr::_Letop, {x, y}}; - res->here = v->loc; - res->flags = x->flags | Expr::_IsRvalue; - res->deduce_type(); - res->chk_rvalue(); - res->pre_compile(code); - return blk_fl::end; -} -static bool is_expr_valid_as_return_self(Expr* return_expr) { - // `return self` - if (return_expr->cls == Expr::_Var && return_expr->val == 0) { - return true; - } - if (return_expr->cls == Expr::_ReturnSelf) { - return is_expr_valid_as_return_self(return_expr->args[1]); - } - if (return_expr->cls == Expr::_CondExpr) { - return is_expr_valid_as_return_self(return_expr->args[1]) && is_expr_valid_as_return_self(return_expr->args[2]); +static void process_sequence(V v, CodeBlob& code) { + for (AnyV item : v->get_items()) { + process_any_statement(item, code); } - return false; } -// for mutating functions, having `return expr`, transform it to `return (modify_var1, ..., expr)` -static Expr* wrap_return_value_with_mutate_params(SrcLocation loc, CodeBlob& code, Expr* return_expr) { - Expr* tmp_var; - if (return_expr->cls != Expr::_Var) { - // `return complex_expr` - extract this into temporary variable (eval it before return) - // this is mandatory if it assigns to one of modified vars - tmp_var = create_new_underscore_variable(loc, return_expr->e_type); - tmp_var->predefine_vars(); - tmp_var->define_new_vars(code); - Expr* assign_to_tmp_var = new Expr(Expr::_Letop, {tmp_var, return_expr}); - assign_to_tmp_var->here = loc; - assign_to_tmp_var->flags = tmp_var->flags | Expr::_IsRvalue; - assign_to_tmp_var->deduce_type(); - assign_to_tmp_var->pre_compile(code); +static void process_assert_statement(V v, CodeBlob& code) { + std::vector args(3); + if (auto v_not = v->get_cond()->try_as(); v_not && v_not->tok == tok_logical_not) { + args[0] = v->get_thrown_code(); + args[1] = v->get_cond()->as()->get_rhs(); + args[2] = createV(v->loc, true); + args[2]->mutate()->assign_inferred_type(TypeDataInt::create()); } else { - tmp_var = return_expr; + args[0] = v->get_thrown_code(); + args[1] = v->get_cond(); + args[2] = createV(v->loc, false); + args[2]->mutate()->assign_inferred_type(TypeDataInt::create()); } - Expr* ret_tensor = new Expr(Expr::_Tensor, loc); - std::vector type_list; - for (SymDef* p_sym: code.func_val->parameters) { - if (p_sym && dynamic_cast(p_sym->value)->is_mutate_parameter()) { - Expr* p_expr = new Expr{Expr::_Var, p_sym->loc}; - p_expr->sym = p_sym; - p_expr->val = p_sym->value->idx; - p_expr->flags = Expr::_IsRvalue; - p_expr->e_type = p_sym->value->get_type(); - ret_tensor->pb_arg(p_expr); - type_list.emplace_back(p_expr->e_type); - } - } - ret_tensor->pb_arg(tmp_var); - type_list.emplace_back(tmp_var->e_type); - ret_tensor->flags = Expr::_IsRvalue; - ret_tensor->e_type = TypeExpr::new_tensor(std::move(type_list)); - return ret_tensor; + const FunctionData* builtin_sym = lookup_global_symbol("__throw_if_unless")->as(); + std::vector args_vars = pre_compile_tensor(code, args); + gen_op_call(code, TypeDataVoid::create(), v->loc, std::move(args_vars), builtin_sym); } -static blk_fl::val process_vertex(V v, CodeBlob& code) { - Expr* expr = process_expr(v->get_return_value(), code); - if (code.func_val->does_return_self()) { - if (!is_expr_valid_as_return_self(expr)) { - v->error("invalid return from `self` function"); - } - Expr* var_self = new Expr(Expr::_Var, v->loc); - var_self->flags = Expr::_IsRvalue | Expr::_IsLvalue; - var_self->e_type = code.func_val->parameters[0]->value->get_type(); - Expr* assign_to_self = new Expr(Expr::_Letop, {var_self, expr}); - assign_to_self->here = v->loc; - assign_to_self->flags = Expr::_IsRvalue; - assign_to_self->deduce_type(); - assign_to_self->pre_compile(code); - Expr* empty_tensor = new Expr(Expr::_Tensor, {}); - empty_tensor->here = v->loc; - empty_tensor->flags = Expr::_IsRvalue; - empty_tensor->e_type = TypeExpr::new_tensor({}); - expr = empty_tensor; +static void process_catch_variable(AnyExprV v_catch_var, CodeBlob& code) { + if (auto v_ref = v_catch_var->try_as(); v_ref && v_ref->sym) { // not underscore + const LocalVarData* var_ref = v_ref->sym->as(); + tolk_assert(var_ref->idx == -1); + var_ref->mutate()->assign_idx(code.create_var(v_catch_var->inferred_type, var_ref, v_catch_var->loc)); } - if (code.func_val->has_mutate_params()) { - expr = wrap_return_value_with_mutate_params(v->loc, code, expr); - } - expr->chk_rvalue(); - try { - unify(expr->e_type, code.ret_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "previous function return type " << code.ret_type - << " cannot be unified with return statement expression type " << expr->e_type << ": " << ue; - v->error(os.str()); - } - std::vector tmp_vars = expr->pre_compile(code); - code.emplace_back(v->loc, Op::_Return, std::move(tmp_vars)); - return blk_fl::ret; } -static void append_implicit_ret_stmt(SrcLocation loc_end, CodeBlob& code) { - Expr* expr = new Expr{Expr::_Tensor, {}}; - expr->flags = Expr::_IsRvalue; - expr->here = loc_end; - expr->e_type = TypeExpr::new_unit(); - if (code.func_val->does_return_self()) { - throw ParseError(loc_end, "missing return; forgot `return self`?"); - } - if (code.func_val->has_mutate_params()) { - expr = wrap_return_value_with_mutate_params(loc_end, code, expr); - } - try { - unify(expr->e_type, code.ret_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "previous function return type " << code.ret_type - << " cannot be unified with implicit end-of-block return type " << expr->e_type << ": " << ue; - throw ParseError(loc_end, os.str()); - } - std::vector tmp_vars = expr->pre_compile(code); - code.emplace_back(loc_end, Op::_Return, std::move(tmp_vars)); -} +static void process_try_catch_statement(V v, CodeBlob& code) { + code.require_callxargs = true; + Op& try_catch_op = code.emplace_back(v->loc, Op::_TryCatch); + code.push_set_cur(try_catch_op.block0); + process_any_statement(v->get_try_body(), code); + code.close_pop_cur(v->get_try_body()->loc_end); + code.push_set_cur(try_catch_op.block1); -static blk_fl::val process_vertex(V v, CodeBlob& code, bool no_new_scope = false) { - if (!no_new_scope) { - open_scope(v->loc); - } - blk_fl::val res = blk_fl::init; - bool warned = false; - for (AnyV item : v->get_items()) { - if (!(res & blk_fl::end) && !warned) { - item->loc.show_warning("unreachable code"); - warned = true; - } - blk_fl::combine(res, process_statement(item, code)); - } - if (!no_new_scope) { - close_scope(); - } - return res; + // transform catch (excNo, arg) into TVM-catch (arg, excNo), where arg is untyped and thus almost useless now + const std::vector& catch_vars = v->get_catch_expr()->get_items(); + tolk_assert(catch_vars.size() == 2); + process_catch_variable(catch_vars[0], code); + process_catch_variable(catch_vars[1], code); + try_catch_op.left = pre_compile_tensor(code, {catch_vars[1], catch_vars[0]}); + process_any_statement(v->get_catch_body(), code); + code.close_pop_cur(v->get_catch_body()->loc_end); } -static blk_fl::val process_vertex(V v, CodeBlob& code) { - Expr* expr = process_expr(v->get_cond(), code); - expr->chk_rvalue(); - auto cnt_type = TypeExpr::new_atomic(TypeExpr::_Int); - try { - unify(expr->e_type, cnt_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "repeat count value of type " << expr->e_type << " is not an integer: " << ue; - v->get_cond()->error(os.str()); - } - std::vector tmp_vars = expr->pre_compile(code); - if (tmp_vars.size() != 1) { - v->get_cond()->error("repeat count value is not a singleton"); - } +static void process_repeat_statement(V v, CodeBlob& code) { + std::vector tmp_vars = pre_compile_expr(v->get_cond(), code); Op& repeat_op = code.emplace_back(v->loc, Op::_Repeat, tmp_vars); code.push_set_cur(repeat_op.block0); - blk_fl::val res = process_vertex(v->get_body(), code); + process_any_statement(v->get_body(), code); code.close_pop_cur(v->get_body()->loc_end); - return res | blk_fl::end; } -static blk_fl::val process_vertex(V v, CodeBlob& code) { - Expr* expr = process_expr(v->get_cond(), code); - expr->chk_rvalue(); - auto cnt_type = TypeExpr::new_atomic(TypeExpr::_Int); - try { - unify(expr->e_type, cnt_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "while condition value of type " << expr->e_type << " is not an integer: " << ue; - v->get_cond()->error(os.str()); - } - Op& while_op = code.emplace_back(v->loc, Op::_While); - code.push_set_cur(while_op.block0); - while_op.left = expr->pre_compile(code); - code.close_pop_cur(v->get_body()->loc); - if (while_op.left.size() != 1) { - v->get_cond()->error("while condition value is not a singleton"); +static void process_if_statement(V v, CodeBlob& code) { + std::vector tmp_vars = pre_compile_expr(v->get_cond(), code); + Op& if_op = code.emplace_back(v->loc, Op::_If, std::move(tmp_vars)); + code.push_set_cur(if_op.block0); + process_any_statement(v->get_if_body(), code); + code.close_pop_cur(v->get_if_body()->loc_end); + code.push_set_cur(if_op.block1); + process_any_statement(v->get_else_body(), code); + code.close_pop_cur(v->get_else_body()->loc_end); + if (v->is_ifnot) { + std::swap(if_op.block0, if_op.block1); } - code.push_set_cur(while_op.block1); - blk_fl::val res1 = process_vertex(v->get_body(), code); - code.close_pop_cur(v->get_body()->loc_end); - return res1 | blk_fl::end; } -static blk_fl::val process_vertex(V v, CodeBlob& code) { +static void process_do_while_statement(V v, CodeBlob& code) { Op& until_op = code.emplace_back(v->loc, Op::_Until); code.push_set_cur(until_op.block0); - open_scope(v->loc); - blk_fl::val res = process_vertex(v->get_body(), code, true); + process_any_statement(v->get_body(), code); // in TVM, there is only "do until", but in Tolk, we want "do while" // here we negate condition to pass it forward to legacy to Op::_Until // also, handle common situations as a hardcoded "optimization": replace (a<0) with (a>=0) and so on // todo these hardcoded conditions should be removed from this place in the future - AnyV cond = v->get_cond(); - AnyV until_cond; + AnyExprV cond = v->get_cond(); + AnyExprV until_cond; if (auto v_not = cond->try_as(); v_not && v_not->tok == tok_logical_not) { until_cond = v_not->get_rhs(); } else if (auto v_eq = cond->try_as(); v_eq && v_eq->tok == tok_eq) { @@ -1093,222 +586,127 @@ static blk_fl::val process_vertex(V v, CodeBlob& code) { until_cond = createV(cond->loc, "<", tok_lt, v_geq->get_lhs(), v_geq->get_rhs()); } else if (auto v_gt = cond->try_as(); v_gt && v_gt->tok == tok_gt) { until_cond = createV(cond->loc, "<=", tok_geq, v_gt->get_lhs(), v_gt->get_rhs()); + } else if (cond->inferred_type == TypeDataBool::create()) { + until_cond = createV(cond->loc, "!b", tok_logical_not, cond); } else { until_cond = createV(cond->loc, "!", tok_logical_not, cond); } - - Expr* expr = process_expr(until_cond, code); - expr->chk_rvalue(); - close_scope(); - auto cnt_type = TypeExpr::new_atomic(TypeExpr::_Int); - try { - unify(expr->e_type, cnt_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "`while` condition value of type " << expr->e_type << " is not an integer: " << ue; - v->get_cond()->error(os.str()); + until_cond->mutate()->assign_inferred_type(TypeDataInt::create()); + if (auto v_bin = until_cond->try_as(); v_bin && !v_bin->fun_ref) { + v_bin->mutate()->assign_fun_ref(lookup_global_symbol("_" + static_cast(v_bin->operator_name) + "_")->as()); + } else if (auto v_un = until_cond->try_as(); v_un && !v_un->fun_ref) { + v_un->mutate()->assign_fun_ref(lookup_global_symbol(static_cast(v_un->operator_name) + "_")->as()); } - until_op.left = expr->pre_compile(code); + + until_op.left = pre_compile_expr(until_cond, code); code.close_pop_cur(v->get_body()->loc_end); - if (until_op.left.size() != 1) { - v->get_cond()->error("`while` condition value is not a singleton"); - } - return res & ~blk_fl::empty; } -static blk_fl::val process_vertex(V v, CodeBlob& code) { - std::vector args; - SymDef* builtin_sym; - if (v->has_thrown_arg()) { - builtin_sym = lookup_symbol(calc_sym_idx("__throw_arg")); - args.push_back(process_expr(v->get_thrown_arg(), code)); - args.push_back(process_expr(v->get_thrown_code(), code)); - } else { - builtin_sym = lookup_symbol(calc_sym_idx("__throw")); - args.push_back(process_expr(v->get_thrown_code(), code)); - } - - Expr* apply = create_expr_apply(v->loc, builtin_sym, std::move(args)); - apply->flags |= Expr::_IsImpure; - apply->pre_compile(code); - return blk_fl::end; +static void process_while_statement(V v, CodeBlob& code) { + Op& while_op = code.emplace_back(v->loc, Op::_While); + code.push_set_cur(while_op.block0); + while_op.left = pre_compile_expr(v->get_cond(), code); + code.close_pop_cur(v->get_body()->loc); + code.push_set_cur(while_op.block1); + process_any_statement(v->get_body(), code); + code.close_pop_cur(v->get_body()->loc_end); } -static blk_fl::val process_vertex(V v, CodeBlob& code) { - std::vector args(3); - if (auto v_not = v->get_cond()->try_as(); v_not && v_not->tok == tok_logical_not) { - args[0] = process_expr(v->get_thrown_code(), code); - args[1] = process_expr(v->get_cond()->as()->get_rhs(), code); - args[2] = process_expr(createV(v->loc, true), code); +static void process_throw_statement(V v, CodeBlob& code) { + if (v->has_thrown_arg()) { + const FunctionData* builtin_sym = lookup_global_symbol("__throw_arg")->as(); + std::vector args_vars = pre_compile_tensor(code, {v->get_thrown_arg(), v->get_thrown_code()}); + gen_op_call(code, TypeDataVoid::create(), v->loc, std::move(args_vars), builtin_sym); } else { - args[0] = process_expr(v->get_thrown_code(), code); - args[1] = process_expr(v->get_cond(), code); - args[2] = process_expr(createV(v->loc, false), code); + const FunctionData* builtin_sym = lookup_global_symbol("__throw")->as(); + std::vector args_vars = pre_compile_tensor(code, {v->get_thrown_code()}); + gen_op_call(code, TypeDataVoid::create(), v->loc, std::move(args_vars), builtin_sym); } - - SymDef* builtin_sym = lookup_symbol(calc_sym_idx("__throw_if_unless")); - Expr* apply = create_expr_apply(v->loc, builtin_sym, std::move(args)); - apply->flags |= Expr::_IsImpure; - apply->pre_compile(code); - return blk_fl::end; } -static Expr* process_catch_variable(AnyV catch_var, TypeExpr* var_type) { - if (auto v_ident = catch_var->try_as()) { - return create_new_local_variable(catch_var->loc, v_ident->name, var_type, true); +static void process_return_statement(V v, CodeBlob& code) { + std::vector return_vars = v->has_return_value() ? pre_compile_expr(v->get_return_value(), code) : std::vector{}; + if (code.fun_ref->does_return_self()) { + tolk_assert(return_vars.size() == 1); + return_vars = {}; } - return create_new_underscore_variable(catch_var->loc, var_type); -} - -static blk_fl::val process_vertex(V v, CodeBlob& code) { - code.require_callxargs = true; - Op& try_catch_op = code.emplace_back(v->loc, Op::_TryCatch); - code.push_set_cur(try_catch_op.block0); - blk_fl::val res0 = process_vertex(v->get_try_body(), code); - code.close_pop_cur(v->get_try_body()->loc_end); - code.push_set_cur(try_catch_op.block1); - open_scope(v->get_catch_expr()->loc); - - // transform catch (excNo, arg) into TVM-catch (arg, excNo), where arg is untyped and thus almost useless now - TypeExpr* tvm_error_type = TypeExpr::new_tensor(TypeExpr::new_var(), TypeExpr::new_atomic(TypeExpr::_Int)); - const std::vector& catch_items = v->get_catch_expr()->get_items(); - tolk_assert(catch_items.size() == 2); - Expr* e_catch = new Expr{Expr::_Tensor, v->get_catch_expr()->loc}; - e_catch->pb_arg(process_catch_variable(catch_items[1], tvm_error_type->args[0])); - e_catch->pb_arg(process_catch_variable(catch_items[0], tvm_error_type->args[1])); - e_catch->flags = Expr::_IsLvalue; - e_catch->e_type = tvm_error_type; - e_catch->predefine_vars(); - e_catch->define_new_vars(code); - try_catch_op.left = e_catch->pre_compile(code); - tolk_assert(try_catch_op.left.size() == 2); - - blk_fl::val res1 = process_vertex(v->get_catch_body(), code); - close_scope(); - code.close_pop_cur(v->get_catch_body()->loc_end); - blk_fl::combine_parallel(res0, res1); - return res0; + if (code.fun_ref->has_mutate_params()) { + std::vector mutated_vars; + for (const LocalVarData& p_sym: code.fun_ref->parameters) { + if (p_sym.is_mutate_parameter()) { + mutated_vars.push_back(p_sym.idx); + } + } + return_vars.insert(return_vars.begin(), mutated_vars.begin(), mutated_vars.end()); + } + code.emplace_back(v->loc, Op::_Return, std::move(return_vars)); } -static blk_fl::val process_vertex(V v, CodeBlob& code) { - Expr* expr = process_expr(v->get_cond(), code); - expr->chk_rvalue(); - TypeExpr* flag_type = TypeExpr::new_atomic(TypeExpr::_Int); - try { - unify(expr->e_type, flag_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "`if` condition value of type " << expr->e_type << " is not an integer: " << ue; - v->get_cond()->error(os.str()); - } - std::vector tmp_vars = expr->pre_compile(code); - if (tmp_vars.size() != 1) { - v->get_cond()->error("condition value is not a singleton"); - } - Op& if_op = code.emplace_back(v->loc, Op::_If, tmp_vars); - code.push_set_cur(if_op.block0); - blk_fl::val res1 = process_vertex(v->get_if_body(), code); - blk_fl::val res2 = blk_fl::init; - code.close_pop_cur(v->get_if_body()->loc_end); - code.push_set_cur(if_op.block1); - res2 = process_vertex(v->get_else_body(), code); - code.close_pop_cur(v->get_else_body()->loc_end); - if (v->is_ifnot) { - std::swap(if_op.block0, if_op.block1); +static void append_implicit_return_statement(SrcLocation loc_end, CodeBlob& code) { + std::vector mutated_vars; + if (code.fun_ref->has_mutate_params()) { + for (const LocalVarData& p_sym: code.fun_ref->parameters) { + if (p_sym.is_mutate_parameter()) { + mutated_vars.push_back(p_sym.idx); + } + } } - blk_fl::combine_parallel(res1, res2); - return res1; + code.emplace_back(loc_end, Op::_Return, std::move(mutated_vars)); } -blk_fl::val process_statement(AnyV v, CodeBlob& code) { + +void process_any_statement(AnyV v, CodeBlob& code) { switch (v->type) { - case ast_local_vars_declaration: - return process_vertex(v->as(), code); - case ast_return_statement: - return process_vertex(v->as(), code); case ast_sequence: - return process_vertex(v->as(), code); - case ast_empty: - return blk_fl::init; + return process_sequence(v->as(), code); + case ast_return_statement: + return process_return_statement(v->as(), code); case ast_repeat_statement: - return process_vertex(v->as(), code); + return process_repeat_statement(v->as(), code); case ast_if_statement: - return process_vertex(v->as(), code); + return process_if_statement(v->as(), code); case ast_do_while_statement: - return process_vertex(v->as(), code); + return process_do_while_statement(v->as(), code); case ast_while_statement: - return process_vertex(v->as(), code); + return process_while_statement(v->as(), code); case ast_throw_statement: - return process_vertex(v->as(), code); + return process_throw_statement(v->as(), code); case ast_assert_statement: - return process_vertex(v->as(), code); + return process_assert_statement(v->as(), code); case ast_try_catch_statement: - return process_vertex(v->as(), code); - default: { - Expr* expr = process_expr(v, code); - expr->chk_rvalue(); - expr->pre_compile(code); - return blk_fl::end; - } - } -} - -static FormalArg process_vertex(V v, SymDef* param_sym) { - if (!param_sym) { - return std::make_tuple(v->param_type, nullptr, v->loc); - } - SymDef* new_sym_def = define_symbol(calc_sym_idx(v->get_identifier()->name), true, v->loc); - if (!new_sym_def || new_sym_def->value) { - v->error("redefined parameter"); + return process_try_catch_statement(v->as(), code); + case ast_empty_statement: + return; + default: + pre_compile_expr(reinterpret_cast(v), code); } - const SymValVariable* param_val = dynamic_cast(param_sym->value); - new_sym_def->value = new SymValVariable(*param_val); - return std::make_tuple(v->param_type, new_sym_def, v->loc); } -static void convert_function_body_to_CodeBlob(V v, V v_body) { - SymDef* sym_def = lookup_symbol(calc_sym_idx(v->get_identifier()->name)); - SymValCodeFunc* sym_val = dynamic_cast(sym_def->value); - tolk_assert(sym_val != nullptr); - - open_scope(v->loc); - CodeBlob* blob = new CodeBlob{static_cast(v->get_identifier()->name), v->loc, sym_val, v->ret_type}; - if (v->marked_as_pure) { - blob->flags |= CodeBlob::_ForbidImpure; - } +static void convert_function_body_to_CodeBlob(const FunctionData* fun_ref, FunctionBodyCode* code_body) { + auto v_body = fun_ref->ast_root->as()->get_body()->as(); + CodeBlob* blob = new CodeBlob{fun_ref->name, fun_ref->loc, fun_ref}; FormalArgList legacy_arg_list; - for (int i = 0; i < v->get_num_params(); ++i) { - legacy_arg_list.emplace_back(process_vertex(v->get_param(i), sym_val->parameters[i])); + for (const LocalVarData& param : fun_ref->parameters) { + legacy_arg_list.emplace_back(param.declared_type, ¶m, param.loc); } blob->import_params(std::move(legacy_arg_list)); - blk_fl::val res = blk_fl::init; - bool warned = false; for (AnyV item : v_body->get_items()) { - if (!(res & blk_fl::end) && !warned) { - item->loc.show_warning("unreachable code"); - warned = true; - } - blk_fl::combine(res, process_statement(item, *blob)); + process_any_statement(item, *blob); } - if (res & blk_fl::end) { - append_implicit_ret_stmt(v_body->loc_end, *blob); + if (fun_ref->is_implicit_return()) { + append_implicit_return_statement(v_body->loc_end, *blob); } blob->close_blk(v_body->loc_end); - close_scope(); - sym_val->set_code(blob); + code_body->set_code(blob); } -static void convert_asm_body_to_AsmOp(V v, V v_body) { - SymDef* sym_def = lookup_symbol(calc_sym_idx(v->get_identifier()->name)); - SymValAsmFunc* sym_val = dynamic_cast(sym_def->value); - tolk_assert(sym_val != nullptr); - - int cnt = v->get_num_params(); - int width = v->ret_type->get_width(); +static void convert_asm_body_to_AsmOp(const FunctionData* fun_ref, FunctionBodyAsm* asm_body) { + int cnt = fun_ref->get_num_params(); + int width = fun_ref->inferred_return_type->calc_width_on_stack(); std::vector asm_ops; - for (AnyV v_child : v_body->get_asm_commands()) { + for (AnyV v_child : fun_ref->ast_root->as()->get_body()->as()->get_asm_commands()) { std::string_view ops = v_child->as()->str_val; // \n\n... std::string op; for (char c : ops) { @@ -1332,24 +730,77 @@ static void convert_asm_body_to_AsmOp(V v, Vset_code(std::move(asm_ops)); + asm_body->set_code(std::move(asm_ops)); } +class UpdateArgRetOrderConsideringStackWidth final { +public: + static bool should_visit_function(const FunctionData* fun_ref) { + return !fun_ref->is_generic_function() && (!fun_ref->ret_order.empty() || !fun_ref->arg_order.empty()); + } -void pipeline_convert_ast_to_legacy_Expr_Op(const AllSrcFiles& all_src_files) { - for (const SrcFile* file : all_src_files) { - tolk_assert(file->ast); + static void start_visiting_function(const FunctionData* fun_ref, V v_function) { + int total_arg_mutate_width = 0; + bool has_arg_width_not_1 = false; + for (const LocalVarData& param : fun_ref->parameters) { + int arg_width = param.declared_type->calc_width_on_stack(); + has_arg_width_not_1 |= arg_width != 1; + total_arg_mutate_width += param.is_mutate_parameter() * arg_width; + } - for (AnyV v : file->ast->as()->get_toplevel_declarations()) { - if (auto v_func = v->try_as()) { - if (v_func->is_asm_function()) { - convert_asm_body_to_AsmOp(v_func, v_func->get_body()->as()); - } else if (!v_func->marked_as_builtin) { - convert_function_body_to_CodeBlob(v_func, v_func->get_body()->as()); + // example: `fun f(a: int, b: (int, (int, int)), c: int)` with `asm (b a c)` + // current arg_order is [1 0 2] + // needs to be converted to [1 2 3 0 4] because b width is 3 + if (has_arg_width_not_1) { + int total_arg_width = 0; + std::vector cum_arg_width; + cum_arg_width.reserve(1 + fun_ref->get_num_params()); + cum_arg_width.push_back(0); + for (const LocalVarData& param : fun_ref->parameters) { + cum_arg_width.push_back(total_arg_width += param.declared_type->calc_width_on_stack()); + } + std::vector arg_order; + for (int i = 0; i < fun_ref->get_num_params(); ++i) { + int j = fun_ref->arg_order[i]; + int c1 = cum_arg_width[j], c2 = cum_arg_width[j + 1]; + while (c1 < c2) { + arg_order.push_back(c1++); } } + fun_ref->mutate()->assign_arg_order(std::move(arg_order)); + } + + // example: `fun f(mutate self: slice): slice` with `asm(-> 1 0)` + // ret_order is a shuffled range 0...N + // validate N: a function should return value and mutated arguments onto a stack + if (!fun_ref->ret_order.empty()) { + size_t expected_width = fun_ref->inferred_return_type->calc_width_on_stack() + total_arg_mutate_width; + if (expected_width != fun_ref->ret_order.size()) { + v_function->get_body()->error("ret_order (after ->) expected to contain " + std::to_string(expected_width) + " numbers"); + } + } + } +}; + +class ConvertASTToLegacyOpVisitor final { +public: + static bool should_visit_function(const FunctionData* fun_ref) { + return !fun_ref->is_generic_function(); + } + + static void start_visiting_function(const FunctionData* fun_ref, V) { + tolk_assert(fun_ref->is_type_inferring_done()); + if (fun_ref->is_code_function()) { + convert_function_body_to_CodeBlob(fun_ref, std::get(fun_ref->body)); + } else if (fun_ref->is_asm_function()) { + convert_asm_body_to_AsmOp(fun_ref, std::get(fun_ref->body)); } } +}; + +void pipeline_convert_ast_to_legacy_Expr_Op() { + visit_ast_of_all_functions(); + visit_ast_of_all_functions(); } } // namespace tolk diff --git a/tolk/pipe-calc-rvalue-lvalue.cpp b/tolk/pipe-calc-rvalue-lvalue.cpp new file mode 100644 index 000000000..041aec891 --- /dev/null +++ b/tolk/pipe-calc-rvalue-lvalue.cpp @@ -0,0 +1,217 @@ +/* + This file is part of TON Blockchain source code. + + TON Blockchain is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + TON Blockchain is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with TON Blockchain. If not, see . +*/ +#include "tolk.h" +#include "ast.h" +#include "ast-visitor.h" + +/* + * This pipe assigns lvalue/rvalue flags for AST expressions. + * It happens after identifiers have been resolved, but before type inferring (before methods binding). + * + * Example: `a = b`, `a` is lvalue, `b` is rvalue. + * Example: `a + b`, both are rvalue. + * + * Note, that this pass only assigns, not checks. So, for `f() = 4`, expr `f()` is lvalue. + * Checking (firing this as incorrect later) is performed after type inferring, see pipe-check-rvalue-lvalue. + */ + +namespace tolk { + +enum class MarkingState { + None, + LValue, + RValue, + LValueAndRValue +}; + +class CalculateRvalueLvalueVisitor final : public ASTVisitorFunctionBody { + MarkingState cur_state = MarkingState::None; + + MarkingState enter_state(MarkingState activated) { + MarkingState saved = cur_state; + cur_state = activated; + return saved; + } + + void restore_state(MarkingState saved) { + cur_state = saved; + } + + void mark_vertex_cur_or_rvalue(AnyExprV v) const { + if (cur_state == MarkingState::LValue || cur_state == MarkingState::LValueAndRValue) { + v->mutate()->assign_lvalue_true(); + } + if (cur_state == MarkingState::RValue || cur_state == MarkingState::LValueAndRValue || cur_state == MarkingState::None) { + v->mutate()->assign_rvalue_true(); + } + } + + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + } + + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + parent::visit(v); + } + + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + parent::visit(v); + } + + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + parent::visit(v); + } + + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + } + + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + } + + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + } + + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + } + + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + } + + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + MarkingState saved = enter_state(v->passed_as_mutate ? MarkingState::LValueAndRValue : MarkingState::RValue); + parent::visit(v); + restore_state(saved); + } + + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + parent::visit(v); + } + + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + MarkingState saved = enter_state(MarkingState::RValue); + parent::visit(v->get_obj()); + restore_state(saved); + } + + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + MarkingState saved = enter_state(MarkingState::RValue); + parent::visit(v); + restore_state(saved); + } + + void visit(V v) override { + // underscore is a placeholder to ignore left side of assignment: `(a, _) = get2params()` + // so, if current state is "lvalue", `_` will be marked as lvalue, and ok + // but if used incorrectly, like `f(_)` or just `_;`, it will be marked rvalue + // and will fire an error later, in pipe lvalue/rvalue check + mark_vertex_cur_or_rvalue(v); + } + + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + MarkingState saved = enter_state(MarkingState::LValue); + parent::visit(v->get_lhs()); + enter_state(MarkingState::RValue); + parent::visit(v->get_rhs()); + restore_state(saved); + } + + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + MarkingState saved = enter_state(MarkingState::LValueAndRValue); + parent::visit(v->get_lhs()); + enter_state(MarkingState::RValue); + parent::visit(v->get_rhs()); + restore_state(saved); + } + + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + MarkingState saved = enter_state(MarkingState::RValue); + parent::visit(v); + restore_state(saved); + } + + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + MarkingState saved = enter_state(MarkingState::RValue); + parent::visit(v); + restore_state(saved); + } + + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + MarkingState saved = enter_state(MarkingState::RValue); + parent::visit(v); // both cond, when_true and when_false are rvalue, `(cond ? a : b) = 5` prohibited + restore_state(saved); + } + + void visit(V v) override { + mark_vertex_cur_or_rvalue(v); + parent::visit(v->get_expr()); // leave lvalue state unchanged, for `mutate (t.0 as int)` both `t.0 as int` and `t.0` are lvalue + } + + void visit(V v) override { + tolk_assert(cur_state == MarkingState::LValue); + mark_vertex_cur_or_rvalue(v); + parent::visit(v); + } + + void visit(V v) override { + tolk_assert(cur_state == MarkingState::LValue); + mark_vertex_cur_or_rvalue(v); + parent::visit(v); + } + + void visit(V v) override { + parent::visit(v->get_try_body()); + MarkingState saved = enter_state(MarkingState::LValue); + parent::visit(v->get_catch_expr()); + restore_state(saved); + parent::visit(v->get_catch_body()); + } + +public: + bool should_visit_function(const FunctionData* fun_ref) override { + return fun_ref->is_code_function() && !fun_ref->is_generic_function(); + } +}; + +void pipeline_calculate_rvalue_lvalue() { + visit_ast_of_all_functions(); +} + +void pipeline_calculate_rvalue_lvalue(const FunctionData* fun_ref) { + CalculateRvalueLvalueVisitor visitor; + if (visitor.should_visit_function(fun_ref)) { + visitor.start_visiting_function(fun_ref, fun_ref->ast_root->as()); + } +} + +} // namespace tolk diff --git a/tolk/pipe-check-pure-impure.cpp b/tolk/pipe-check-pure-impure.cpp new file mode 100644 index 000000000..2b2e1e670 --- /dev/null +++ b/tolk/pipe-check-pure-impure.cpp @@ -0,0 +1,93 @@ +/* + This file is part of TON Blockchain source code. + + TON Blockchain is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + TON Blockchain is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with TON Blockchain. If not, see . +*/ +#include "tolk.h" +#include "ast.h" +#include "ast-visitor.h" +#include "platform-utils.h" + +/* + * This pipe checks for impure operations inside pure functions. + * It happens after type inferring (after methods binding) since it operates fun_ref of calls. + */ + +namespace tolk { + +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_impure_operation_inside_pure_function(AnyV v) { + v->error("an impure operation in a pure function"); +} + +class CheckImpureOperationsInPureFunctionVisitor final : public ASTVisitorFunctionBody { + static void fire_if_global_var(AnyExprV v) { + if (auto v_ident = v->try_as()) { + if (v_ident->sym->try_as()) { + fire_error_impure_operation_inside_pure_function(v); + } + } + } + + void visit(V v) override { + fire_if_global_var(v->get_lhs()); + parent::visit(v); + } + + void visit(V v) override { + fire_if_global_var(v->get_lhs()); + parent::visit(v); + } + + void visit(V v) override { + // v is `globalF(args)` / `globalF(args)` / `obj.method(args)` / `local_var(args)` / `getF()(args)` + if (!v->fun_maybe) { + // `local_var(args)` is always impure, no considerations about what's there at runtime + fire_error_impure_operation_inside_pure_function(v); + } + + if (!v->fun_maybe->is_marked_as_pure()) { + fire_error_impure_operation_inside_pure_function(v); + } + + parent::visit(v); + } + + void visit(V v) override { + if (v->passed_as_mutate) { + fire_if_global_var(v->get_expr()); + } + + parent::visit(v); + } + + void visit(V v) override { + fire_error_impure_operation_inside_pure_function(v); + } + + void visit(V v) override { + fire_error_impure_operation_inside_pure_function(v); + } + +public: + bool should_visit_function(const FunctionData* fun_ref) override { + return fun_ref->is_code_function() && !fun_ref->is_generic_function() && fun_ref->is_marked_as_pure(); + } +}; + +void pipeline_check_pure_impure_operations() { + visit_ast_of_all_functions(); +} + +} // namespace tolk diff --git a/tolk/pipe-check-rvalue-lvalue.cpp b/tolk/pipe-check-rvalue-lvalue.cpp new file mode 100644 index 000000000..038b09991 --- /dev/null +++ b/tolk/pipe-check-rvalue-lvalue.cpp @@ -0,0 +1,198 @@ +/* + This file is part of TON Blockchain source code. + + TON Blockchain is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + TON Blockchain is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with TON Blockchain. If not, see . +*/ +#include "tolk.h" +#include "ast.h" +#include "ast-visitor.h" +#include "platform-utils.h" + +/* + * This pipe checks lvalue/rvalue for validity. + * It happens after type inferring (after methods binding) and after lvalue/rvalue are refined based on fun_ref. + * + * Example: `f() = 4`, `f()` was earlier marked as lvalue, it's incorrect. + * Example: `f(mutate 5)`, `5` was marked also, it's incorrect. + */ + +namespace tolk { + +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_cannot_be_used_as_lvalue(AnyV v, const std::string& details) { + // example: `f() = 32` + // example: `loadUint(c.beginParse(), 32)` (since `loadUint()` mutates the first argument) + v->error(details + " can not be used as lvalue"); +} + +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_modifying_immutable_variable(AnyExprV v, const LocalVarData* var_ref) { + if (var_ref->idx == 0 && var_ref->name == "self") { + v->error("modifying `self`, which is immutable by default; probably, you want to declare `mutate self`"); + } else { + v->error("modifying immutable variable `" + var_ref->name + "`"); + } +} + +// validate a function used as rvalue, like `var cb = f` +// it's not a generic function (ensured earlier at type inferring) and has some more restrictions +static void validate_function_used_as_noncall(AnyExprV v, const FunctionData* fun_ref) { + if (!fun_ref->arg_order.empty() || !fun_ref->ret_order.empty()) { + v->error("saving `" + fun_ref->name + "` into a variable will most likely lead to invalid usage, since it changes the order of variables on the stack"); + } + if (fun_ref->has_mutate_params()) { + v->error("saving `" + fun_ref->name + "` into a variable is impossible, since it has `mutate` parameters and thus can only be called directly"); + } +} + +class CheckRValueLvalueVisitor final : public ASTVisitorFunctionBody { + void visit(V v) override { + if (v->is_lvalue) { + fire_error_cannot_be_used_as_lvalue(v, "assignment"); + } + parent::visit(v); + } + + void visit(V v) override { + if (v->is_lvalue) { + fire_error_cannot_be_used_as_lvalue(v, "assignment"); + } + parent::visit(v); + } + + void visit(V v) override { + if (v->is_lvalue) { + fire_error_cannot_be_used_as_lvalue(v, "operator " + static_cast(v->operator_name)); + } + parent::visit(v); + } + + void visit(V v) override { + if (v->is_lvalue) { + fire_error_cannot_be_used_as_lvalue(v, "operator " + static_cast(v->operator_name)); + } + parent::visit(v); + } + + void visit(V v) override { + if (v->is_lvalue) { + fire_error_cannot_be_used_as_lvalue(v, "operator ?:"); + } + parent::visit(v); + } + + void visit(V v) override { + // if `x as int` is lvalue, then `x` is also lvalue, so check that `x` is ok + parent::visit(v->get_expr()); + } + + void visit(V v) override { + if (v->is_lvalue) { + fire_error_cannot_be_used_as_lvalue(v, "literal"); + } + } + + void visit(V v) override { + if (v->is_lvalue) { + fire_error_cannot_be_used_as_lvalue(v, "literal"); + } + } + + void visit(V v) override { + if (v->is_lvalue) { + fire_error_cannot_be_used_as_lvalue(v, "literal"); + } + } + + void visit(V v) override { + if (v->is_lvalue) { + fire_error_cannot_be_used_as_lvalue(v, "literal"); + } + } + + void visit(V v) override { + // a reference to a method used as rvalue, like `var v = t.tupleAt` + if (const FunctionData* fun_ref = v->target; v->is_rvalue) { + validate_function_used_as_noncall(v, fun_ref); + } + } + + void visit(V v) override { + if (v->is_lvalue) { + fire_error_cannot_be_used_as_lvalue(v, "function call"); + } + if (!v->fun_maybe) { + parent::visit(v->get_callee()); + } + // for `f()` don't visit ast_reference `f`, to detect `f` usage as non-call, like `var cb = f` + // same for `obj.method()`, don't visit ast_reference method, visit only obj + if (v->is_dot_call()) { + parent::visit(v->get_dot_obj()); + } + + for (int i = 0; i < v->get_num_args(); ++i) { + parent::visit(v->get_arg(i)); + } + } + + void visit(V v) override { + if (v->marked_as_redef) { + tolk_assert(v->var_ref); + if (v->var_ref->is_immutable()) { + v->error("`redef` for immutable variable"); + } + } + } + + void visit(V v) override { + if (v->is_lvalue) { + tolk_assert(v->sym); + if (const auto* var_ref = v->sym->try_as(); var_ref && var_ref->is_immutable()) { + fire_error_modifying_immutable_variable(v, var_ref); + } else if (v->sym->try_as()) { + v->error("modifying immutable constant"); + } else if (v->sym->try_as()) { + v->error("function can't be used as lvalue"); + } + } + + // a reference to a function used as rvalue, like `var v = someFunction` + if (const FunctionData* fun_ref = v->sym->try_as(); fun_ref && v->is_rvalue) { + validate_function_used_as_noncall(v, fun_ref); + } + } + + void visit(V v) override { + if (v->is_rvalue) { + v->error("`_` can't be used as a value; it's a placeholder for a left side of assignment"); + } + } + + void visit(V v) override { + parent::visit(v->get_try_body()); + // skip catch(_,excNo), there are always vars due to grammar, lvalue/rvalue aren't set to them + parent::visit(v->get_catch_body()); + } + +public: + bool should_visit_function(const FunctionData* fun_ref) override { + return fun_ref->is_code_function() && !fun_ref->is_generic_function(); + } +}; + +void pipeline_check_rvalue_lvalue() { + visit_ast_of_all_functions(); +} + +} // namespace tolk diff --git a/tolk/pipe-constant-folding.cpp b/tolk/pipe-constant-folding.cpp new file mode 100644 index 000000000..98996c28d --- /dev/null +++ b/tolk/pipe-constant-folding.cpp @@ -0,0 +1,91 @@ +/* + This file is part of TON Blockchain source code. + + TON Blockchain is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + TON Blockchain is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with TON Blockchain. If not, see . +*/ +#include "tolk.h" +#include "ast.h" +#include "ast-replacer.h" +#include "type-system.h" + +/* + * This pipe is supposed to do constant folding, like replacing `2 + 3` with `5`. + * It happens after type inferring and validity checks, one of the last ones. + * + * Currently, it just replaces `-1` (ast_unary_operator ast_int_const) with a number -1 + * and `!true` with false. + * More rich constant folding should be done some day, but even without this, IR optimizations + * (operating low-level stack variables) pretty manage to do all related optimizations. + * Constant folding in the future, done at AST level, just would slightly reduce amount of work for optimizer. + */ + +namespace tolk { + +class ConstantFoldingReplacer final : public ASTReplacerInFunctionBody { + static V create_int_const(SrcLocation loc, td::RefInt256&& intval) { + auto v_int = createV(loc, std::move(intval), {}); + v_int->assign_inferred_type(TypeDataInt::create()); + v_int->assign_rvalue_true(); + return v_int; + } + + static V create_bool_const(SrcLocation loc, bool bool_val) { + auto v_bool = createV(loc, bool_val); + v_bool->assign_inferred_type(TypeDataBool::create()); + v_bool->assign_rvalue_true(); + return v_bool; + } + + AnyExprV replace(V v) override { + parent::replace(v); + + TokenType t = v->tok; + // convert "-1" (tok_minus tok_int_const) to a const -1 + if (t == tok_minus && v->get_rhs()->type == ast_int_const) { + td::RefInt256 intval = v->get_rhs()->as()->intval; + tolk_assert(!intval.is_null()); + intval = -intval; + if (intval.is_null() || !intval->signed_fits_bits(257)) { + v->error("integer overflow"); + } + return create_int_const(v->loc, std::move(intval)); + } + // same for "+1" + if (t == tok_plus && v->get_rhs()->type == ast_int_const) { + return v->get_rhs(); + } + + // `!true` / `!false` + if (t == tok_logical_not && v->get_rhs()->type == ast_bool_const) { + return create_bool_const(v->loc, !v->get_rhs()->as()->bool_val); + } + // `!0` + if (t == tok_logical_not && v->get_rhs()->type == ast_int_const) { + return create_bool_const(v->loc, v->get_rhs()->as()->intval == 0); + } + + return v; + } + +public: + bool should_visit_function(const FunctionData* fun_ref) override { + return fun_ref->is_code_function() && !fun_ref->is_generic_function(); + } +}; + +void pipeline_constant_folding() { + replace_ast_of_all_functions(); +} + +} // namespace tolk diff --git a/tolk/pipe-detect-unreachable.cpp b/tolk/pipe-detect-unreachable.cpp new file mode 100644 index 000000000..15824cf39 --- /dev/null +++ b/tolk/pipe-detect-unreachable.cpp @@ -0,0 +1,138 @@ +/* + This file is part of TON Blockchain source code. + + TON Blockchain is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + TON Blockchain is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with TON Blockchain. If not, see . +*/ +#include "tolk.h" +#include "ast.h" +#include "ast-visitor.h" + +/* + * This pipe does two things: + * 1) detects unreachable code and prints warnings about it + * example: `fun main() { if(1){return;}else{return;} var x = 0; }` — var is unreachable + * 2) if control flow reaches end of function, store a flag to insert an implicit return + * example: `fun main() { assert(...); }` — has an implicit `return ()` statement before a brace + * + * Note, that it does not delete unreachable code, only prints warnings. + * Actual deleting is done much later (in "legacy" part), after AST is converted to Op. + * + * Note, that it's not CFG, it's just a shallow reachability detection. + * In the future, a true CFG should be introduced. For instance, in order to have nullable types, + * I'll need to implement smart casts. Then I'll think of a complicated granular control flow graph, + * considering data flow and exceptions (built before type inferring, of course), + * and detecting unreachable code will be a part of it. + */ + +namespace tolk { + +class UnreachableStatementsDetectVisitor final { + bool always_returns(AnyV v) { + switch (v->type) { + case ast_sequence: return always_returns(v->as()); + case ast_return_statement: return always_returns(v->as()); + case ast_throw_statement: return always_returns(v->as()); + case ast_function_call: return always_returns(v->as()); + case ast_repeat_statement: return always_returns(v->as()); + case ast_while_statement: return always_returns(v->as()); + case ast_do_while_statement: return always_returns(v->as()); + case ast_try_catch_statement: return always_returns(v->as()); + case ast_if_statement: return always_returns(v->as()); + default: + // unhandled statements (like assert) and statement expressions + return false; + } + } + + bool always_returns(V v) { + bool always = false; + for (AnyV item : v->get_items()) { + if (always && item->type != ast_empty_statement) { + item->loc.show_warning("unreachable code"); + break; + } + always |= always_returns(item); + } + return always; + } + + static bool always_returns([[maybe_unused]] V v) { + // quite obvious: `return expr` interrupts control flow + return true; + } + + static bool always_returns([[maybe_unused]] V v) { + // todo `throw excNo` currently does not interrupt control flow + // (in other words, `throw 1; something` - something is reachable) + // the reason is that internally it's transformed to a call of built-in function __throw(), + // which is a regular function, like __throw_if() or loadInt() + // to fix this later on, it should be deeper, introducing Op::_Throw for example, + // to make intermediate representations and stack optimizer also be aware that after it there is unreachable + return false; + } + + static bool always_returns([[maybe_unused]] V v) { + // neither annotations like @noreturn nor auto-detection of always-throwing functions also doesn't exist + // in order to do this in the future, it should be handled not only at AST/CFG level, + // but inside Op and low-level optimizer (at least if reachability detection is not moved out of there) + // see comments for `throw` above, similar to this case + return false; + } + + bool always_returns(V v) { + return always_returns(v->get_body()); + } + + bool always_returns(V v) { + return always_returns(v->get_body()); + } + + bool always_returns(V v) { + return always_returns(v->get_body()); + } + + bool always_returns(V v) { + return always_returns(v->get_try_body()) && always_returns(v->get_catch_body()); + } + + bool always_returns(V v) { + return always_returns(v->get_if_body()) && always_returns(v->get_else_body()); + } + +public: + static bool should_visit_function(const FunctionData* fun_ref) { + return fun_ref->is_code_function() && !fun_ref->is_generic_function(); + } + + void start_visiting_function(const FunctionData* fun_ref, V v_function) { + bool control_flow_reaches_end = !always_returns(v_function->get_body()->as()); + if (control_flow_reaches_end) { + fun_ref->mutate()->assign_is_implicit_return(); + } + } +}; + + +void pipeline_detect_unreachable_statements() { + visit_ast_of_all_functions(); +} + +void pipeline_detect_unreachable_statements(const FunctionData* fun_ref) { + UnreachableStatementsDetectVisitor visitor; + if (UnreachableStatementsDetectVisitor::should_visit_function(fun_ref)) { + visitor.start_visiting_function(fun_ref, fun_ref->ast_root->as()); + } +} + +} // namespace tolk diff --git a/tolk/pipe-discover-parse-sources.cpp b/tolk/pipe-discover-parse-sources.cpp index a8445ae95..d31348ba9 100644 --- a/tolk/pipe-discover-parse-sources.cpp +++ b/tolk/pipe-discover-parse-sources.cpp @@ -28,9 +28,17 @@ #include "ast-from-tokens.h" #include "compiler-state.h" +/* + * This is the starting point of compilation pipeline. + * It parses Tolk files to AST, analyzes `import` statements and loads/parses imported files. + * + * When it finishes, all files have been parsed to AST, and no more files will later be added. + * If a parsing error happens (invalid syntax), an exception is thrown immediately from ast-from-tokens.cpp. + */ + namespace tolk { -AllSrcFiles pipeline_discover_and_parse_sources(const std::string& stdlib_filename, const std::string& entrypoint_filename) { +void pipeline_discover_and_parse_sources(const std::string& stdlib_filename, const std::string& entrypoint_filename) { G.all_src_files.locate_and_register_source_file(stdlib_filename, {}); G.all_src_files.locate_and_register_source_file(entrypoint_filename, {}); @@ -38,27 +46,25 @@ AllSrcFiles pipeline_discover_and_parse_sources(const std::string& stdlib_filena tolk_assert(!file->ast); file->ast = parse_src_file_to_ast(file); - // file->ast->debug_print(); + // if (!file->is_stdlib_file()) file->ast->debug_print(); for (AnyV v_toplevel : file->ast->as()->get_toplevel_declarations()) { - if (auto v_import = v_toplevel->try_as()) { + if (auto v_import = v_toplevel->try_as()) { std::string imported_str = v_import->get_file_name(); size_t cur_slash_pos = file->rel_filename.rfind('/'); std::string rel_filename = cur_slash_pos == std::string::npos || imported_str[0] == '@' ? std::move(imported_str) : file->rel_filename.substr(0, cur_slash_pos + 1) + imported_str; - SrcFile* imported = G.all_src_files.locate_and_register_source_file(rel_filename, v_import->loc); - file->imports.push_back(SrcFile::ImportStatement{imported}); - v_import->mutate_set_src_file(imported); + const SrcFile* imported = G.all_src_files.locate_and_register_source_file(rel_filename, v_import->loc); + file->imports.push_back(SrcFile::ImportDirective{imported}); + v_import->mutate()->assign_src_file(imported); } } } // todo #ifdef TOLK_PROFILING - // lexer_measure_performance(G.all_src_files.get_all_files()); - - return G.all_src_files.get_all_files(); + lexer_measure_performance(G.all_src_files); } } // namespace tolk diff --git a/tolk/pipe-find-unused-symbols.cpp b/tolk/pipe-find-unused-symbols.cpp index f83579f4e..29584cbf7 100644 --- a/tolk/pipe-find-unused-symbols.cpp +++ b/tolk/pipe-find-unused-symbols.cpp @@ -24,51 +24,41 @@ from all source files in the program, then also delete it here. */ #include "tolk.h" -#include "src-file.h" #include "compiler-state.h" /* - * Here we find unused symbols (global functions and variables) to strip them off codegen. - * Note, that currently it's implemented as a standalone step after AST has been transformed to legacy Expr/Op. - * The reason why it's not done on AST level is that symbol resolving is done too late. For instance, - * having `beginCell()` there is not enough information in AST whether if points to a global function - * or it's a local variable application. - * In the future, this should be done on AST level. + * This pipe finds unused symbols (global functions and variables) to strip them off codegen. + * It happens after converting AST to Op, so it does not traverse AST. + * In the future, when control flow graph is introduced, this should be done at AST level. */ namespace tolk { static void mark_function_used_dfs(const std::unique_ptr& op); -static void mark_function_used(SymValCodeFunc* func_val) { - if (!func_val->code || func_val->is_really_used) { // already handled +static void mark_function_used(const FunctionData* fun_ref) { + if (!fun_ref->is_code_function() || fun_ref->is_really_used()) { // already handled return; } - func_val->is_really_used = true; - mark_function_used_dfs(func_val->code->ops); + fun_ref->mutate()->assign_is_really_used(); + mark_function_used_dfs(std::get(fun_ref->body)->code->ops); } -static void mark_global_var_used(SymValGlobVar* glob_val) { - glob_val->is_really_used = true; +static void mark_global_var_used(const GlobalVarData* glob_ref) { + glob_ref->mutate()->assign_is_really_used(); } static void mark_function_used_dfs(const std::unique_ptr& op) { if (!op) { return; } - // op->fun_ref, despite its name, may actually ref global var - // note, that for non-calls, e.g. `var a = some_fn` (Op::_Let), some_fn is Op::_GlobVar - // (in other words, fun_ref exists not only for direct Op::_Call, but for non-call references also) - if (op->fun_ref) { - if (auto* func_val = dynamic_cast(op->fun_ref->value)) { - mark_function_used(func_val); - } else if (auto* glob_val = dynamic_cast(op->fun_ref->value)) { - mark_global_var_used(glob_val); - } else if (auto* asm_val = dynamic_cast(op->fun_ref->value)) { - } else { - tolk_assert(false); - } + + if (op->f_sym) { // for Op::_Call + mark_function_used(op->f_sym); + } + if (op->g_sym) { // for Op::_GlobVar + mark_global_var_used(op->g_sym); } mark_function_used_dfs(op->next); mark_function_used_dfs(op->block0); @@ -76,11 +66,9 @@ static void mark_function_used_dfs(const std::unique_ptr& op) { } void pipeline_find_unused_symbols() { - for (SymDef* func_sym : G.all_code_functions) { - auto* func_val = dynamic_cast(func_sym->value); - std::string name = G.symbols.get_name(func_sym->sym_idx); - if (func_val->method_id.not_null() || func_val->is_entrypoint()) { - mark_function_used(func_val); + for (const FunctionData* fun_ref : G.all_functions) { + if (fun_ref->is_method_id_not_empty()) { // get methods, main and other entrypoints, regular functions with @method_id + mark_function_used(fun_ref); } } } diff --git a/tolk/pipe-generate-fif-output.cpp b/tolk/pipe-generate-fif-output.cpp index 91a99f96a..9092e5647 100644 --- a/tolk/pipe-generate-fif-output.cpp +++ b/tolk/pipe-generate-fif-output.cpp @@ -1,5 +1,5 @@ /* - This file is part of TON Blockchain source code. + This file is part of TON Blockchain source code-> TON Blockchain is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License @@ -30,113 +30,88 @@ namespace tolk { -bool SymValCodeFunc::does_need_codegen() const { - // when a function is declared, but not referenced from code in any way, don't generate its body - if (!is_really_used && G.settings.remove_unused_functions) { - return false; - } - // when a function is referenced like `var a = some_fn;` (or in some other non-call way), its continuation should exist - if (flags & flagUsedAsNonCall) { - return true; - } - // currently, there is no inlining, all functions are codegenerated - // (but actually, unused ones are later removed by Fift) - // in the future, we may want to implement a true AST inlining for "simple" functions - return true; -} - -void SymValCodeFunc::set_code(CodeBlob* code) { +void FunctionBodyCode::set_code(CodeBlob* code) { this->code = code; } -void SymValAsmFunc::set_code(std::vector code) { - this->ext_compile = make_ext_compile(std::move(code)); +void FunctionBodyAsm::set_code(std::vector&& code) { + this->ops = std::move(code); } -static void generate_output_func(SymDef* func_sym) { - SymValCodeFunc* func_val = dynamic_cast(func_sym->value); - tolk_assert(func_val); - std::string name = G.symbols.get_name(func_sym->sym_idx); +static void generate_output_func(const FunctionData* fun_ref) { + tolk_assert(fun_ref->is_code_function()); if (G.is_verbosity(2)) { - std::cerr << "\n\n=========================\nfunction " << name << " : " << func_val->get_type() << std::endl; - } - if (!func_val->code) { - throw ParseError(func_sym->loc, "function `" + name + "` is just declared, not implemented"); - } else { - CodeBlob& code = *(func_val->code); - if (G.is_verbosity(3)) { - code.print(std::cerr, 9); + std::cerr << "\n\n=========================\nfunction " << fun_ref->name << " : " << fun_ref->inferred_return_type << std::endl; + } + + CodeBlob* code = std::get(fun_ref->body)->code; + if (G.is_verbosity(3)) { + code->print(std::cerr, 9); + } + code->prune_unreachable_code(); + if (G.is_verbosity(5)) { + std::cerr << "after prune_unreachable: \n"; + code->print(std::cerr, 0); + } + code->split_vars(true); + if (G.is_verbosity(5)) { + std::cerr << "after split_vars: \n"; + code->print(std::cerr, 0); + } + for (int i = 0; i < 8; i++) { + code->compute_used_code_vars(); + if (G.is_verbosity(4)) { + std::cerr << "after compute_used_vars: \n"; + code->print(std::cerr, 6); } - code.simplify_var_types(); + code->fwd_analyze(); if (G.is_verbosity(5)) { - std::cerr << "after simplify_var_types: \n"; - code.print(std::cerr, 0); + std::cerr << "after fwd_analyze: \n"; + code->print(std::cerr, 6); } - code.prune_unreachable_code(); + code->prune_unreachable_code(); if (G.is_verbosity(5)) { std::cerr << "after prune_unreachable: \n"; - code.print(std::cerr, 0); - } - code.split_vars(true); - if (G.is_verbosity(5)) { - std::cerr << "after split_vars: \n"; - code.print(std::cerr, 0); - } - for (int i = 0; i < 8; i++) { - code.compute_used_code_vars(); - if (G.is_verbosity(4)) { - std::cerr << "after compute_used_vars: \n"; - code.print(std::cerr, 6); - } - code.fwd_analyze(); - if (G.is_verbosity(5)) { - std::cerr << "after fwd_analyze: \n"; - code.print(std::cerr, 6); - } - code.prune_unreachable_code(); - if (G.is_verbosity(5)) { - std::cerr << "after prune_unreachable: \n"; - code.print(std::cerr, 6); - } - } - code.mark_noreturn(); - if (G.is_verbosity(3)) { - code.print(std::cerr, 15); - } - if (G.is_verbosity(2)) { - std::cerr << "\n---------- resulting code for " << name << " -------------\n"; - } - const char* modifier = ""; - if (func_val->is_inline()) { - modifier = "INLINE"; - } else if (func_val->is_inline_ref()) { - modifier = "REF"; - } - std::cout << std::string(2, ' ') << name << " PROC" << modifier << ":<{\n"; - int mode = 0; - if (G.settings.stack_layout_comments) { - mode |= Stack::_StkCmt | Stack::_CptStkCmt; - } - if (func_val->is_inline() && code.ops->noreturn()) { - mode |= Stack::_InlineFunc; - } - if (func_val->is_inline() || func_val->is_inline_ref()) { - mode |= Stack::_InlineAny; - } - code.generate_code(std::cout, mode, 2); - std::cout << std::string(2, ' ') << "}>\n"; - if (G.is_verbosity(2)) { - std::cerr << "--------------\n"; + code->print(std::cerr, 6); } } + code->mark_noreturn(); + if (G.is_verbosity(3)) { + code->print(std::cerr, 15); + } + if (G.is_verbosity(2)) { + std::cerr << "\n---------- resulting code for " << fun_ref->name << " -------------\n"; + } + const char* modifier = ""; + if (fun_ref->is_inline()) { + modifier = "INLINE"; + } else if (fun_ref->is_inline_ref()) { + modifier = "REF"; + } + std::cout << std::string(2, ' ') << fun_ref->name << " PROC" << modifier << ":<{\n"; + int mode = 0; + if (G.settings.stack_layout_comments) { + mode |= Stack::_StkCmt | Stack::_CptStkCmt; + } + if (fun_ref->is_inline() && code->ops->noreturn()) { + mode |= Stack::_InlineFunc; + } + if (fun_ref->is_inline() || fun_ref->is_inline_ref()) { + mode |= Stack::_InlineAny; + } + code->generate_code(std::cout, mode, 2); + std::cout << std::string(2, ' ') << "}>\n"; + if (G.is_verbosity(2)) { + std::cerr << "--------------\n"; + } } -void pipeline_generate_fif_output_to_std_cout(const AllSrcFiles& all_src_files) { +void pipeline_generate_fif_output_to_std_cout() { std::cout << "\"Asm.fif\" include\n"; std::cout << "// automatically generated from "; bool need_comma = false; - for (const SrcFile* file : all_src_files) { + for (const SrcFile* file : G.all_src_files) { if (!file->is_stdlib_file()) { if (need_comma) { std::cout << ", "; @@ -149,26 +124,23 @@ void pipeline_generate_fif_output_to_std_cout(const AllSrcFiles& all_src_files) std::cout << "PROGRAM{\n"; bool has_main_procedure = false; - for (SymDef* func_sym : G.all_code_functions) { - SymValCodeFunc* func_val = dynamic_cast(func_sym->value); - tolk_assert(func_val); - if (!func_val->does_need_codegen()) { - if (G.is_verbosity(2)) { - std::cerr << func_sym->name() << ": code not generated, function does not need codegen\n"; + for (const FunctionData* fun_ref : G.all_functions) { + if (!fun_ref->does_need_codegen()) { + if (G.is_verbosity(2) && fun_ref->is_code_function()) { + std::cerr << fun_ref->name << ": code not generated, function does not need codegen\n"; } continue; } - std::string name = G.symbols.get_name(func_sym->sym_idx); - if (func_val->is_entrypoint() && (name == "main" || name == "onInternalMessage")) { + if (fun_ref->is_entrypoint() && (fun_ref->name == "main" || fun_ref->name == "onInternalMessage")) { has_main_procedure = true; } std::cout << std::string(2, ' '); - if (func_val->method_id.is_null()) { - std::cout << "DECLPROC " << name << "\n"; + if (fun_ref->is_method_id_not_empty()) { + std::cout << fun_ref->method_id << " DECLMETHOD " << fun_ref->name << "\n"; } else { - std::cout << func_val->method_id << " DECLMETHOD " << name << "\n"; + std::cout << "DECLPROC " << fun_ref->name << "\n"; } } @@ -176,25 +148,22 @@ void pipeline_generate_fif_output_to_std_cout(const AllSrcFiles& all_src_files) throw Fatal("the contract has no entrypoint; forgot `fun onInternalMessage(...)`?"); } - for (SymDef* gvar_sym : G.all_global_vars) { - auto* glob_val = dynamic_cast(gvar_sym->value); - tolk_assert(glob_val); - if (!glob_val->is_really_used && G.settings.remove_unused_functions) { + for (const GlobalVarData* var_ref : G.all_global_vars) { + if (!var_ref->is_really_used() && G.settings.remove_unused_functions) { if (G.is_verbosity(2)) { - std::cerr << gvar_sym->name() << ": variable not generated, it's unused\n"; + std::cerr << var_ref->name << ": variable not generated, it's unused\n"; } continue; } - std::string name = G.symbols.get_name(gvar_sym->sym_idx); - std::cout << std::string(2, ' ') << "DECLGLOBVAR " << name << "\n"; + + std::cout << std::string(2, ' ') << "DECLGLOBVAR " << var_ref->name << "\n"; } - for (SymDef* func_sym : G.all_code_functions) { - SymValCodeFunc* func_val = dynamic_cast(func_sym->value); - if (!func_val->does_need_codegen()) { + for (const FunctionData* fun_ref : G.all_functions) { + if (!fun_ref->does_need_codegen()) { continue; } - generate_output_func(func_sym); + generate_output_func(fun_ref); } std::cout << "}END>c\n"; diff --git a/tolk/pipe-infer-types-and-calls.cpp b/tolk/pipe-infer-types-and-calls.cpp new file mode 100644 index 000000000..d8a7d41be --- /dev/null +++ b/tolk/pipe-infer-types-and-calls.cpp @@ -0,0 +1,1236 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "tolk.h" +#include "src-file.h" +#include "ast.h" +#include "ast-visitor.h" +#include "generics-helpers.h" +#include "type-system.h" + +/* + * This is a complicated and crucial part of the pipeline. It simultaneously does the following: + * * infers types of all expressions; example: `2 + 3` both are TypeDataInt, result is also + * * AND checks types for assignment, arguments passing, etc.; example: `fInt(cs)` is error passing slice to int + * * AND binds function/method calls (assigns fun_ref); example: `globalF()`, fun_ref is assigned to `globalF` (unless generic) + * * AND instantiates generic functions; example: `t.tuplePush(2)` creates `tuplePush` and assigns fun_ref to dot field + * * AND infers return type of functions if it's omitted (`fun f() { ... }` means "auto infer", not "void") + * + * It's important to do all these parts simultaneously, they can't be split or separated. + * For example, we can't bind `f(2)` earlier, because if `f` is a generic `f`, we should instantiate it, + * and in order to do it, we need to know argument types. + * For example, we can't bind `c.cellHash()` earlier, because in the future we'll have overloads (`cell.hash()` and `slice.hash()`), + * and in order to bind it, we need to know object type. + * And vice versa, to infer type of expression in the middle, we need to have inferred all expressions preceding it, + * which may also include generics, etc. + * + * About generics. They are more like "C++ templates". If `f` and `f` called from somewhere, + * there will be TWO new functions, inserted into symtable, and both will be code generated to Fift. + * Body of a generic function is NOT analyzed. Hence, `fun f(v: T) { v.method(); }` we don't know + * whether `v.method()` is a valid call until instantiate it with `f` for example. + * Same for `v + 2`, we don't know whether + operator can be applied until instantiation. + * In other words, we have a closed type system, not open. + * That's why generic functions' bodies aren't traversed here (and in most following pipes). + * Instead, when an instantiated function is created, it follows all the preceding pipeline (registering symbols, etc.), + * and type inferring is done inside instantiated functions (which can recursively instantiate another, etc.). + * + * A noticeable part of inferring is "hints". + * Example: `var a: User = { id: 3, name: "" }`. To infer type of `{...}` we need to know it's `User`. This hint is taken from lhs. + * Example: `fun tupleAt(t: tuple, idx: int):T`, just `t.tupleGet(2)` can't be deduced (T left unspecified), + * but for assignment with left-defined type, or a call to `fInt(t.tupleGet(2))` hint "int" helps deduce T. + * + * Unlike other pipes, inferring can dig recursively on demand. + * Example: + * fun getInt() { return 1; } + * fun main() { var i = getInt(); } + * If `main` is handled the first, it should know the return type if `getInt`. It's not declared, so we need + * to launch type inferring for `getInt` and then proceed back to `main`. + * When a generic function is instantiated, type inferring inside it is also run. + */ + +namespace tolk { + +static void infer_and_save_return_type_of_function(const FunctionData* fun_ref); + +static TypePtr get_or_infer_return_type(const FunctionData* fun_ref) { + if (!fun_ref->inferred_return_type) { + infer_and_save_return_type_of_function(fun_ref); + } + return fun_ref->inferred_return_type; +} + +GNU_ATTRIBUTE_NOINLINE +static std::string to_string(TypePtr type) { + return "`" + type->as_human_readable() + "`"; +} + +GNU_ATTRIBUTE_NOINLINE +static std::string to_string(AnyExprV v_with_type) { + return "`" + v_with_type->inferred_type->as_human_readable() + "`"; +} + +GNU_ATTRIBUTE_NOINLINE +static std::string to_string(const LocalVarData& var_ref) { + return "`" + var_ref.declared_type->as_human_readable() + "`"; +} + +GNU_ATTRIBUTE_NOINLINE +static std::string to_string(const FunctionData* fun_ref) { + return "`" + fun_ref->as_human_readable() + "`"; +} + +// fire an error when `fun f(...) asm ...` is called with T=(int,int) or other non-1 width on stack +// asm functions generally can't handle it, they expect T to be a TVM primitive +// (in FunC, `forall` type just couldn't be unified with non-primitives; in Tolk, generic T is expectedly inferred) +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_calling_asm_function_with_non1_stack_width_arg(SrcLocation loc, const FunctionData* fun_ref, const std::vector& substitutions, int arg_idx) { + throw ParseError(loc, "can not call `" + fun_ref->as_human_readable() + "` with " + fun_ref->genericTs->get_nameT(arg_idx) + "=" + substitutions[arg_idx]->as_human_readable() + ", because it occupies " + std::to_string(substitutions[arg_idx]->calc_width_on_stack()) + " stack slots in TVM, not 1"); +} + +// fire an error on `var n = null` +// technically it's correct, type of `n` is TypeDataNullLiteral, but it's not what the user wanted +// so, it's better to see an error on assignment, that later, on `n` usage and types mismatch +// (most common is situation above, but generally, `var (x,n) = xn` where xn is a tensor with 2-nd always-null, can be) +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_assign_always_null_to_variable(SrcLocation loc, const LocalVarData* assigned_var, bool is_assigned_null_literal) { + std::string var_name = assigned_var->name; + throw ParseError(loc, "can not infer type of `" + var_name + "`, it's always null; specify its type with `" + var_name + ": `" + (is_assigned_null_literal ? " or use `null as `" : "")); +} + +// fire an error on `!cell` / `+slice` +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_cannot_apply_operator(SrcLocation loc, std::string_view operator_name, AnyExprV unary_expr) { + std::string op = static_cast(operator_name); + throw ParseError(loc, "can not apply operator `" + op + "` to " + to_string(unary_expr->inferred_type)); +} + +// fire an error on `int + cell` / `slice & int` +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_cannot_apply_operator(SrcLocation loc, std::string_view operator_name, AnyExprV lhs, AnyExprV rhs) { + std::string op = static_cast(operator_name); + throw ParseError(loc, "can not apply operator `" + op + "` to " + to_string(lhs->inferred_type) + " and " + to_string(rhs->inferred_type)); +} + +// check correctness of called arguments counts and their type matching +static void check_function_arguments(const FunctionData* fun_ref, V v, AnyExprV lhs_of_dot_call) { + int delta_self = lhs_of_dot_call ? 1 : 0; + int n_arguments = v->size() + delta_self; + int n_parameters = fun_ref->get_num_params(); + + // Tolk doesn't have optional parameters currently, so just compare counts + if (!n_parameters && lhs_of_dot_call) { + v->error("`" + fun_ref->name + "` has no parameters and can not be called as method"); + } + if (n_parameters < n_arguments) { + v->error("too many arguments in call to `" + fun_ref->name + "`, expected " + std::to_string(n_parameters - delta_self) + ", have " + std::to_string(n_arguments - delta_self)); + } + if (n_arguments < n_parameters) { + v->error("too few arguments in call to `" + fun_ref->name + "`, expected " + std::to_string(n_parameters - delta_self) + ", have " + std::to_string(n_arguments - delta_self)); + } + + if (lhs_of_dot_call) { + if (!fun_ref->parameters[0].declared_type->can_rhs_be_assigned(lhs_of_dot_call->inferred_type)) { + lhs_of_dot_call->error("can not call method for " + to_string(fun_ref->parameters[0]) + " with object of type " + to_string(lhs_of_dot_call)); + } + } + for (int i = 0; i < v->size(); ++i) { + if (!fun_ref->parameters[i + delta_self].declared_type->can_rhs_be_assigned(v->get_arg(i)->inferred_type)) { + v->get_arg(i)->error("can not pass " + to_string(v->get_arg(i)) + " to " + to_string(fun_ref->parameters[i + delta_self])); + } + } +} + +/* + * TypeInferringUnifyStrategy unifies types from various branches to a common result (lca). + * It's used to auto infer function return type based on return statements, like in TypeScript. + * Example: `fun f() { ... return 1; ... return null; }` inferred as `int`. + * + * Besides function returns, it's also useful for ternary `return cond ? 1 : null` and `match` expression. + * If types can't be unified (a function returns int and cell, for example), `unify()` returns false, handled outside. + * BTW, don't confuse this way of inferring with Hindley-Milner, they have nothing in common. + */ +class TypeInferringUnifyStrategy { + TypePtr unified_result = nullptr; + + static TypePtr calculate_type_lca(TypePtr t1, TypePtr t2) { + if (t1 == t2) { + return t1; + } + if (t1->can_rhs_be_assigned(t2)) { + return t1; + } + if (t2->can_rhs_be_assigned(t1)) { + return t2; + } + + const auto* tensor1 = t1->try_as(); + const auto* tensor2 = t2->try_as(); + if (tensor1 && tensor2 && tensor1->size() == tensor2->size()) { + std::vector types_lca; + types_lca.reserve(tensor1->size()); + for (int i = 0; i < tensor1->size(); ++i) { + TypePtr next = calculate_type_lca(tensor1->items[i], tensor2->items[i]); + if (next == nullptr) { + return nullptr; + } + types_lca.push_back(next); + } + return TypeDataTensor::create(std::move(types_lca)); + } + + const auto* tuple1 = t1->try_as(); + const auto* tuple2 = t2->try_as(); + if (tuple1 && tuple2 && tuple1->size() == tuple2->size()) { + std::vector types_lca; + types_lca.reserve(tuple1->size()); + for (int i = 0; i < tuple1->size(); ++i) { + TypePtr next = calculate_type_lca(tuple1->items[i], tuple2->items[i]); + if (next == nullptr) { + return nullptr; + } + types_lca.push_back(next); + } + return TypeDataTypedTuple::create(std::move(types_lca)); + } + + return nullptr; + } + +public: + bool unify_with(TypePtr next) { + if (unified_result == nullptr) { + unified_result = next; + return true; + } + if (unified_result == next) { + return true; + } + + TypePtr combined = calculate_type_lca(unified_result, next); + if (!combined) { + return false; + } + + unified_result = combined; + return true; + } + + bool unify_with_implicit_return_void() { + if (unified_result == nullptr) { + unified_result = TypeDataVoid::create(); + return true; + } + + return unified_result == TypeDataVoid::create(); + } + + TypePtr get_result() const { return unified_result; } +}; + +/* + * This class handles all types of AST vertices and traverses them, filling all AnyExprV::inferred_type. + * Note, that it isn't derived from ASTVisitor, it has manual `switch` over all existing vertex types. + * There are two reasons for this: + * 1) when a new AST node type is introduced, I want it to fail here, not to be left un-inferred with UB at next steps + * 2) easy to maintain a hint (see comments at the top of the file) + */ +class InferCheckTypesAndCallsAndFieldsVisitor final { + const FunctionData* current_function = nullptr; + TypeInferringUnifyStrategy return_unifier; + + GNU_ATTRIBUTE_ALWAYS_INLINE + static void assign_inferred_type(AnyExprV dst, AnyExprV src) { +#ifdef TOLK_DEBUG + tolk_assert(src->inferred_type != nullptr && !src->inferred_type->has_unresolved_inside() && !src->inferred_type->has_genericT_inside()); +#endif + dst->mutate()->assign_inferred_type(src->inferred_type); + } + + GNU_ATTRIBUTE_ALWAYS_INLINE + static void assign_inferred_type(AnyExprV dst, TypePtr inferred_type) { +#ifdef TOLK_DEBUG + tolk_assert(inferred_type != nullptr && !inferred_type->has_unresolved_inside() && !inferred_type->has_genericT_inside()); +#endif + dst->mutate()->assign_inferred_type(inferred_type); + } + + static void assign_inferred_type(const LocalVarData* local_var_or_param, TypePtr inferred_type) { +#ifdef TOLK_DEBUG + tolk_assert(inferred_type != nullptr && !inferred_type->has_unresolved_inside() && !inferred_type->has_genericT_inside()); +#endif + local_var_or_param->mutate()->assign_inferred_type(inferred_type); + } + + static void assign_inferred_type(const FunctionData* fun_ref, TypePtr inferred_return_type, TypePtr inferred_full_type) { +#ifdef TOLK_DEBUG + tolk_assert(inferred_return_type != nullptr && !inferred_return_type->has_unresolved_inside() && !inferred_return_type->has_genericT_inside()); +#endif + fun_ref->mutate()->assign_inferred_type(inferred_return_type, inferred_full_type); + } + + // traverse children in any statement + void process_any_statement(AnyV v) { + switch (v->type) { + case ast_sequence: + return process_sequence(v->as()); + case ast_return_statement: + return process_return_statement(v->as()); + case ast_if_statement: + return process_if_statement(v->as()); + case ast_repeat_statement: + return process_repeat_statement(v->as()); + case ast_while_statement: + return process_while_statement(v->as()); + case ast_do_while_statement: + return process_do_while_statement(v->as()); + case ast_throw_statement: + return process_throw_statement(v->as()); + case ast_assert_statement: + return process_assert_statement(v->as()); + case ast_try_catch_statement: + return process_try_catch_statement(v->as()); + case ast_empty_statement: + return; + default: + infer_any_expr(reinterpret_cast(v)); + } + } + + // assigns inferred_type for any expression (by calling assign_inferred_type) + void infer_any_expr(AnyExprV v, TypePtr hint = nullptr) { + switch (v->type) { + case ast_int_const: + return infer_int_const(v->as()); + case ast_string_const: + return infer_string_const(v->as()); + case ast_bool_const: + return infer_bool_const(v->as()); + case ast_local_vars_declaration: + return infer_local_vars_declaration(v->as()); + case ast_assign: + return infer_assignment(v->as()); + case ast_set_assign: + return infer_set_assign(v->as()); + case ast_unary_operator: + return infer_unary_operator(v->as()); + case ast_binary_operator: + return infer_binary_operator(v->as()); + case ast_ternary_operator: + return infer_ternary_operator(v->as(), hint); + case ast_cast_as_operator: + return infer_cast_as_operator(v->as()); + case ast_parenthesized_expression: + return infer_parenthesized(v->as(), hint); + case ast_reference: + return infer_reference(v->as()); + case ast_dot_access: + return infer_dot_access(v->as(), hint); + case ast_function_call: + return infer_function_call(v->as(), hint); + case ast_tensor: + return infer_tensor(v->as(), hint); + case ast_typed_tuple: + return infer_typed_tuple(v->as(), hint); + case ast_null_keyword: + return infer_null_keyword(v->as()); + case ast_underscore: + return infer_underscore(v->as(), hint); + case ast_empty_expression: + return infer_empty_expression(v->as()); + default: + throw UnexpectedASTNodeType(v, "infer_any_expr"); + } + } + + static bool expect_integer(AnyExprV v_inferred) { + return v_inferred->inferred_type == TypeDataInt::create(); + } + + static bool expect_boolean(AnyExprV v_inferred) { + return v_inferred->inferred_type == TypeDataBool::create(); + } + + static void infer_int_const(V v) { + assign_inferred_type(v, TypeDataInt::create()); + } + + static void infer_string_const(V v) { + if (v->is_bitslice()) { + assign_inferred_type(v, TypeDataSlice::create()); + } else { + assign_inferred_type(v, TypeDataInt::create()); + } + } + + static void infer_bool_const(V v) { + assign_inferred_type(v, TypeDataBool::create()); + } + + static void infer_local_vars_declaration(V) { + // it can not appear as a standalone expression + // `var ... = rhs` is handled by ast_assign + tolk_assert(false); + } + + void infer_assignment(V v) { + // v is assignment: `x = 5` / `var x = 5` / `var x: slice = 5` / `(cs,_) = f()` / `val (a,[b],_) = (a,t,0)` + // it's a tricky node to handle, because to infer rhs, at first we need to create hint from lhs + // and then to apply/check inferred rhs onto lhs + // about a hint: `var i: int = t.tupleAt(0)` is ok, but `var i = t.tupleAt(0)` not, since `tupleAt(t,i): T` + AnyExprV lhs = v->get_lhs(); + AnyExprV rhs = v->get_rhs(); + infer_any_expr(rhs, calc_hint_from_assignment_lhs(lhs)); + process_assignment_lhs_after_infer_rhs(lhs, rhs->inferred_type, rhs); + assign_inferred_type(v, lhs); + } + + // having assignment like `var (i: int, s) = rhs` (its lhs is local vars declaration), + // create a contextual infer hint for rhs, `(int, unknown)` in this case + // this hint helps to deduce generics and to resolve unknown types while inferring rhs + static TypePtr calc_hint_from_assignment_lhs(AnyExprV lhs) { + // `var ... = rhs` - dig into left part + if (auto lhs_decl = lhs->try_as()) { + return calc_hint_from_assignment_lhs(lhs_decl->get_expr()); + } + + // inside `var v: int = rhs` / `var _ = rhs` / `var v redef = rhs` (lhs is "v" / "_" / "v") + if (auto lhs_var = lhs->try_as()) { + if (lhs_var->marked_as_redef) { + return lhs_var->var_ref->declared_type; + } + if (lhs_var->declared_type) { + return lhs_var->declared_type; + } + return TypeDataUnknown::create(); + } + + // `v = rhs` / `(c1, c2) = rhs` (lhs is "v" / "_" / "c1" / "c2" after recursion) + if (auto lhs_ref = lhs->try_as()) { + if (const auto* var_ref = lhs_ref->sym->try_as()) { + return var_ref->declared_type; + } + if (const auto* glob_ref = lhs_ref->sym->try_as()) { + return glob_ref->declared_type; + } + return TypeDataUnknown::create(); + } + + // `(v1, v2) = rhs` / `var (v1, v2) = rhs` + if (auto lhs_tensor = lhs->try_as()) { + std::vector sub_hints; + sub_hints.reserve(lhs_tensor->size()); + for (AnyExprV item : lhs_tensor->get_items()) { + sub_hints.push_back(calc_hint_from_assignment_lhs(item)); + } + return TypeDataTensor::create(std::move(sub_hints)); + } + + // `[v1, v2] = rhs` / `var [v1, v2] = rhs` + if (auto lhs_tuple = lhs->try_as()) { + std::vector sub_hints; + sub_hints.reserve(lhs_tuple->size()); + for (AnyExprV item : lhs_tuple->get_items()) { + sub_hints.push_back(calc_hint_from_assignment_lhs(item)); + } + return TypeDataTypedTuple::create(std::move(sub_hints)); + } + + return TypeDataUnknown::create(); + } + + // handle (and dig recursively) into `var lhs = rhs` + // examples: `var z = 5`, `var (x, [y]) = (2, [3])`, `var (x, [y]) = xy` + // while recursing, keep track of rhs if lhs and rhs have common shape (5 for z, 2 for x, [3] for [y], 3 for y) + // (so that on type mismatch, point to corresponding rhs, example: `var (x, y:slice) = (1, 2)` point to 2 + void process_assignment_lhs_after_infer_rhs(AnyExprV lhs, TypePtr rhs_type, AnyExprV corresponding_maybe_rhs) { + AnyExprV err_loc = corresponding_maybe_rhs ? corresponding_maybe_rhs : lhs; + + // `var ... = rhs` - dig into left part + if (auto lhs_decl = lhs->try_as()) { + process_assignment_lhs_after_infer_rhs(lhs_decl->get_expr(), rhs_type, corresponding_maybe_rhs); + assign_inferred_type(lhs, lhs_decl->get_expr()->inferred_type); + return; + } + + // inside `var v: int = rhs` / `var _ = rhs` / `var v redef = rhs` (lhs is "v" / "_" / "v") + if (auto lhs_var = lhs->try_as()) { + TypePtr declared_type = lhs_var->declared_type; // `var v: int = rhs` (otherwise, nullptr) + if (lhs_var->marked_as_redef) { + tolk_assert(lhs_var->var_ref && lhs_var->var_ref->declared_type); + declared_type = lhs_var->var_ref->declared_type; + } + if (declared_type) { + if (!declared_type->can_rhs_be_assigned(rhs_type)) { + err_loc->error("can not assign " + to_string(rhs_type) + " to variable of type " + to_string(declared_type)); + } + assign_inferred_type(lhs, declared_type); + } else { + if (rhs_type == TypeDataNullLiteral::create()) { + fire_error_assign_always_null_to_variable(err_loc->loc, lhs_var->var_ref->try_as(), corresponding_maybe_rhs && corresponding_maybe_rhs->type == ast_null_keyword); + } + assign_inferred_type(lhs, rhs_type); + assign_inferred_type(lhs_var->var_ref, lhs_var->inferred_type); + } + return; + } + + // `v = rhs` / `(c1, c2) = rhs` (lhs is "v" / "_" / "c1" / "c2" after recursion) + if (lhs->try_as()) { + infer_any_expr(lhs); + if (!lhs->inferred_type->can_rhs_be_assigned(rhs_type)) { + err_loc->error("can not assign " + to_string(rhs_type) + " to variable of type " + to_string(lhs)); + } + return; + } + + // `(v1, v2) = rhs` / `var (v1, v2) = rhs` (rhs may be `(1,2)` or `tensorVar` or `someF()`, doesn't matter) + // dig recursively into v1 and v2 with corresponding rhs i-th item of a tensor + if (auto lhs_tensor = lhs->try_as()) { + const TypeDataTensor* rhs_type_tensor = rhs_type->try_as(); + if (!rhs_type_tensor) { + err_loc->error("can not assign " + to_string(rhs_type) + " to a tensor"); + } + if (lhs_tensor->size() != rhs_type_tensor->size()) { + err_loc->error("can not assign " + to_string(rhs_type) + ", sizes mismatch"); + } + V rhs_tensor_maybe = corresponding_maybe_rhs ? corresponding_maybe_rhs->try_as() : nullptr; + std::vector types_list; + types_list.reserve(lhs_tensor->size()); + for (int i = 0; i < lhs_tensor->size(); ++i) { + process_assignment_lhs_after_infer_rhs(lhs_tensor->get_item(i), rhs_type_tensor->items[i], rhs_tensor_maybe ? rhs_tensor_maybe->get_item(i) : nullptr); + types_list.push_back(lhs_tensor->get_item(i)->inferred_type); + } + assign_inferred_type(lhs, TypeDataTensor::create(std::move(types_list))); + return; + } + + // `[v1, v2] = rhs` / `var [v1, v2] = rhs` (rhs may be `[1,2]` or `tupleVar` or `someF()`, doesn't matter) + // dig recursively into v1 and v2 with corresponding rhs i-th item of a tuple + if (auto lhs_tuple = lhs->try_as()) { + const TypeDataTypedTuple* rhs_type_tuple = rhs_type->try_as(); + if (!rhs_type_tuple) { + err_loc->error("can not assign " + to_string(rhs_type) + " to a tuple"); + } + if (lhs_tuple->size() != rhs_type_tuple->size()) { + err_loc->error("can not assign " + to_string(rhs_type) + ", sizes mismatch"); + } + V rhs_tuple_maybe = corresponding_maybe_rhs ? corresponding_maybe_rhs->try_as() : nullptr; + std::vector types_list; + types_list.reserve(lhs_tuple->size()); + for (int i = 0; i < lhs_tuple->size(); ++i) { + process_assignment_lhs_after_infer_rhs(lhs_tuple->get_item(i), rhs_type_tuple->items[i], rhs_tuple_maybe ? rhs_tuple_maybe->get_item(i) : nullptr); + types_list.push_back(lhs_tuple->get_item(i)->inferred_type); + } + assign_inferred_type(lhs, TypeDataTypedTuple::create(std::move(types_list))); + return; + } + + // `_ = rhs` + if (lhs->type == ast_underscore) { + assign_inferred_type(lhs, TypeDataUnknown::create()); + return; + } + + // here is something strange and unhandled, like `f() = rhs` + // it will fail on later compilation steps (like rvalue/lvalue checks), but type inferring should pass + infer_any_expr(lhs, rhs_type); + if (!lhs->inferred_type->can_rhs_be_assigned(rhs_type)) { + err_loc->error("can not assign " + to_string(rhs_type) + " to " + to_string(lhs)); + } + } + + void infer_set_assign(V v) { + AnyExprV lhs = v->get_lhs(); + AnyExprV rhs = v->get_rhs(); + infer_any_expr(lhs); + infer_any_expr(rhs, lhs->inferred_type); + + // almost all operators implementation is hardcoded by built-in functions `_+_` and similar + std::string_view builtin_func = v->operator_name; // "+" for operator += + + switch (v->tok) { + // &= |= ^= are "overloaded" both for integers and booleans, (int &= bool) is NOT allowed + case tok_set_bitwise_and: + case tok_set_bitwise_or: + case tok_set_bitwise_xor: { + bool both_int = expect_integer(lhs) && expect_integer(rhs); + bool both_bool = expect_boolean(lhs) && expect_boolean(rhs); + if (!both_int && !both_bool) { + fire_error_cannot_apply_operator(v->loc, v->operator_name, lhs, rhs); + } + break; + } + // others are mathematical: += *= ... + default: + if (!expect_integer(lhs) || !expect_integer(rhs)) { + fire_error_cannot_apply_operator(v->loc, v->operator_name, lhs, rhs); + } + } + + assign_inferred_type(v, lhs); + if (!builtin_func.empty()) { + const FunctionData* builtin_sym = lookup_global_symbol("_" + static_cast(builtin_func) + "_")->as(); + tolk_assert(builtin_sym); + v->mutate()->assign_fun_ref(builtin_sym); + } + } + + void infer_unary_operator(V v) { + AnyExprV rhs = v->get_rhs(); + infer_any_expr(rhs); + + // all operators implementation is hardcoded by built-in functions `~_` and similar + std::string_view builtin_func = v->operator_name; + + switch (v->tok) { + case tok_minus: + case tok_plus: + case tok_bitwise_not: + if (!expect_integer(rhs)) { + fire_error_cannot_apply_operator(v->loc, v->operator_name, rhs); + } + assign_inferred_type(v, TypeDataInt::create()); + break; + case tok_logical_not: + if (expect_boolean(rhs)) { + builtin_func = "!b"; // "overloaded" for bool + } else if (!expect_integer(rhs)) { + fire_error_cannot_apply_operator(v->loc, v->operator_name, rhs); + } + assign_inferred_type(v, TypeDataBool::create()); + break; + default: + tolk_assert(false); + } + + if (!builtin_func.empty()) { + const FunctionData* builtin_sym = lookup_global_symbol(static_cast(builtin_func) + "_")->as(); + tolk_assert(builtin_sym); + v->mutate()->assign_fun_ref(builtin_sym); + } + } + + void infer_binary_operator(V v) { + AnyExprV lhs = v->get_lhs(); + AnyExprV rhs = v->get_rhs(); + infer_any_expr(lhs); + infer_any_expr(rhs); + + // almost all operators implementation is hardcoded by built-in functions `_+_` and similar + std::string_view builtin_func = v->operator_name; + + switch (v->tok) { + // == != can compare both integers and booleans, (int == bool) is NOT allowed + case tok_eq: + case tok_neq: { + bool both_int = expect_integer(lhs) && expect_integer(rhs); + bool both_bool = expect_boolean(lhs) && expect_boolean(rhs); + if (!both_int && !both_bool) { + if (lhs->inferred_type == rhs->inferred_type) { // compare slice with slice + v->error("type " + to_string(lhs) + " can not be compared with `== !=`"); + } else { + fire_error_cannot_apply_operator(v->loc, v->operator_name, lhs, rhs); + } + } + assign_inferred_type(v, TypeDataBool::create()); + break; + } + // < > can compare only integers + case tok_lt: + case tok_gt: + case tok_leq: + case tok_geq: + case tok_spaceship: { + if (!expect_integer(lhs) || !expect_integer(rhs)) { + fire_error_cannot_apply_operator(v->loc, v->operator_name, lhs, rhs); + } + assign_inferred_type(v, TypeDataBool::create()); + break; + } + // & | ^ are "overloaded" both for integers and booleans, (int & bool) is NOT allowed + case tok_bitwise_and: + case tok_bitwise_or: + case tok_bitwise_xor: { + bool both_int = expect_integer(lhs) && expect_integer(rhs); + bool both_bool = expect_boolean(lhs) && expect_boolean(rhs); + if (!both_int && !both_bool) { + fire_error_cannot_apply_operator(v->loc, v->operator_name, lhs, rhs); + } + assign_inferred_type(v, rhs); // (int & int) is int, (bool & bool) is bool + break; + } + // && || can work with integers and booleans, (int && bool) is allowed + case tok_logical_and: + case tok_logical_or: { + bool lhs_ok = expect_integer(lhs) || expect_boolean(lhs); + bool rhs_ok = expect_integer(rhs) || expect_boolean(rhs); + if (!lhs_ok || !rhs_ok) { + fire_error_cannot_apply_operator(v->loc, v->operator_name, lhs, rhs); + } + assign_inferred_type(v, TypeDataBool::create()); + builtin_func = {}; // no built-in functions, logical operators are expressed as IFs at IR level + break; + } + // others are mathematical: + * ... + default: + if (!expect_integer(lhs) || !expect_integer(rhs)) { + fire_error_cannot_apply_operator(v->loc, v->operator_name, lhs, rhs); + } + assign_inferred_type(v, TypeDataInt::create()); + } + + if (!builtin_func.empty()) { + const FunctionData* builtin_sym = lookup_global_symbol("_" + static_cast(builtin_func) + "_")->as(); + tolk_assert(builtin_sym); + v->mutate()->assign_fun_ref(builtin_sym); + } + } + + void infer_ternary_operator(V v, TypePtr hint) { + AnyExprV cond = v->get_cond(); + infer_any_expr(cond); + if (!expect_integer(cond) && !expect_boolean(cond)) { + cond->error("can not use " + to_string(cond) + " as a boolean condition"); + } + infer_any_expr(v->get_when_true(), hint); + infer_any_expr(v->get_when_false(), hint); + + TypeInferringUnifyStrategy tern_type; + tern_type.unify_with(v->get_when_true()->inferred_type); + if (!tern_type.unify_with(v->get_when_false()->inferred_type)) { + v->error("types of ternary branches are incompatible"); + } + assign_inferred_type(v, tern_type.get_result()); + } + + void infer_cast_as_operator(V v) { + // for `expr as `, use this type for hint, so that `t.tupleAt(0) as int` is ok + infer_any_expr(v->get_expr(), v->cast_to_type); + if (!v->get_expr()->inferred_type->can_be_casted_with_as_operator(v->cast_to_type)) { + v->error("type " + to_string(v->get_expr()) + " can not be cast to " + to_string(v->cast_to_type)); + } + assign_inferred_type(v, v->cast_to_type); + } + + void infer_parenthesized(V v, TypePtr hint) { + infer_any_expr(v->get_expr(), hint); + assign_inferred_type(v, v->get_expr()); + } + + static void infer_reference(V v) { + if (const auto* var_ref = v->sym->try_as()) { + assign_inferred_type(v, var_ref->declared_type); + + } else if (const auto* const_ref = v->sym->try_as()) { + assign_inferred_type(v, const_ref->is_int_const() ? TypeDataInt::create() : TypeDataSlice::create()); + + } else if (const auto* glob_ref = v->sym->try_as()) { + assign_inferred_type(v, glob_ref->declared_type); + + } else if (const auto* fun_ref = v->sym->try_as()) { + // it's `globalF` / `globalF` - references to functions used as non-call + V v_instantiationTs = v->get_instantiationTs(); + + if (fun_ref->is_generic_function() && !v_instantiationTs) { + // `genericFn` is invalid as non-call, can't be used without + v->error("can not use a generic function " + to_string(fun_ref) + " as non-call"); + + } else if (fun_ref->is_generic_function()) { + // `genericFn` is valid, it's a reference to instantiation + std::vector substitutions = collect_fun_generic_substitutions_from_manually_specified(v->loc, fun_ref, v_instantiationTs); + fun_ref = check_and_instantiate_generic_function(v->loc, fun_ref, std::move(substitutions)); + v->mutate()->assign_sym(fun_ref); + + } else if (UNLIKELY(v_instantiationTs != nullptr)) { + // non-generic function referenced like `return beginCell;` + v_instantiationTs->error("not generic function used with generic T"); + } + + fun_ref->mutate()->assign_is_used_as_noncall(); + get_or_infer_return_type(fun_ref); + assign_inferred_type(v, fun_ref->inferred_full_type); + return; + + } else { + tolk_assert(false); + } + + // for non-functions: `local_var` and similar not allowed + if (UNLIKELY(v->has_instantiationTs())) { + v->get_instantiationTs()->error("generic T not expected here"); + } + } + + // given `genericF` / `t.tupleFirst` (the user manually specified instantiation Ts), + // validate and collect them + // returns: [int, slice] / [cell] + static std::vector collect_fun_generic_substitutions_from_manually_specified(SrcLocation loc, const FunctionData* fun_ref, V instantiationT_list) { + if (fun_ref->genericTs->size() != instantiationT_list->get_items().size()) { + throw ParseError(loc, "wrong count of generic T: expected " + std::to_string(fun_ref->genericTs->size()) + ", got " + std::to_string(instantiationT_list->size())); + } + + std::vector substitutions; + substitutions.reserve(instantiationT_list->size()); + for (int i = 0; i < instantiationT_list->size(); ++i) { + substitutions.push_back(instantiationT_list->get_item(i)->substituted_type); + } + + return substitutions; + } + + // when generic Ts have been collected from user-specified or deduced from arguments, + // instantiate a generic function + // example: was `t.tuplePush(2)`, deduced , instantiate `tuplePush` + // example: was `t.tuplePush(2)`, read , instantiate `tuplePush` (will later fail type check) + // example: was `var cb = t.tupleFirst;` (used as reference, as non-call), instantiate `tupleFirst` + // returns fun_ref to instantiated function + static const FunctionData* check_and_instantiate_generic_function(SrcLocation loc, const FunctionData* fun_ref, std::vector&& substitutionTs) { + // T for asm function must be a TVM primitive (width 1), otherwise, asm would act incorrectly + if (fun_ref->is_asm_function() || fun_ref->is_builtin_function()) { + for (int i = 0; i < static_cast(substitutionTs.size()); ++i) { + if (substitutionTs[i]->calc_width_on_stack() != 1) { + fire_error_calling_asm_function_with_non1_stack_width_arg(loc, fun_ref, substitutionTs, i); + } + } + } + + std::string inst_name = generate_instantiated_name(fun_ref->name, substitutionTs); + try { + // make deep clone of `f` with substitutionTs + // (if `f` was already instantiated, it will be immediately returned from a symbol table) + return instantiate_generic_function(loc, fun_ref, inst_name, std::move(substitutionTs)); + } catch (const ParseError& ex) { + throw ParseError(ex.where, "while instantiating generic function `" + inst_name + "` at " + loc.to_string() + ": " + ex.message); + } + } + + void infer_dot_access(V v, TypePtr hint) { + // it's NOT a method call `t.tupleSize()` (since such cases are handled by infer_function_call) + // it's `t.0`, `getUser().id`, and `t.tupleSize` (as a reference, not as a call) + infer_any_expr(v->get_obj()); + // our goal is to fill v->target knowing type of obj + V v_ident = v->get_identifier(); // field/method name vertex + V v_instantiationTs = v->get_instantiationTs(); + std::string_view field_name = v_ident->name; + + // for now, Tolk doesn't have structures, properties, and object-scoped methods + // so, only `t.tupleSize` is allowed, look up a global function + const Symbol* sym = lookup_global_symbol(field_name); + if (!sym) { + v_ident->error("undefined symbol `" + static_cast(field_name) + "`"); + } + const FunctionData* fun_ref = sym->try_as(); + if (!fun_ref) { + v_ident->error("referencing a non-function"); + } + + // `t.tupleSize` is ok, `cs.tupleSize` not + if (!fun_ref->parameters[0].declared_type->can_rhs_be_assigned(v->get_obj()->inferred_type)) { + v_ident->error("referencing a method for " + to_string(fun_ref->parameters[0]) + " with an object of type " + to_string(v->get_obj())); + } + + if (fun_ref->is_generic_function() && !v_instantiationTs) { + // `genericFn` and `t.tupleAt` are invalid as non-call, they can't be used without + v->error("can not use a generic function " + to_string(fun_ref) + " as non-call"); + + } else if (fun_ref->is_generic_function()) { + // `t.tupleAt` is valid, it's a reference to instantiation + std::vector substitutions = collect_fun_generic_substitutions_from_manually_specified(v->loc, fun_ref, v_instantiationTs); + fun_ref = check_and_instantiate_generic_function(v->loc, fun_ref, std::move(substitutions)); + + } else if (UNLIKELY(v_instantiationTs != nullptr)) { + // non-generic method referenced like `var cb = c.cellHash;` + v_instantiationTs->error("not generic function used with generic T"); + } + + fun_ref->mutate()->assign_is_used_as_noncall(); + v->mutate()->assign_target(fun_ref); + get_or_infer_return_type(fun_ref); + assign_inferred_type(v, fun_ref->inferred_full_type); // type of `t.tupleSize` is TypeDataFunCallable + } + + void infer_function_call(V v, TypePtr hint) { + AnyExprV callee = v->get_callee(); + + // v is `globalF(args)` / `globalF(args)` / `obj.method(args)` / `local_var(args)` / `getF()(args)` + int delta_self = 0; + AnyExprV dot_obj = nullptr; + const FunctionData* fun_ref = nullptr; + V v_instantiationTs = nullptr; + + if (auto v_ref = callee->try_as()) { + // `globalF()` / `globalF()` / `local_var()` / `SOME_CONST()` + fun_ref = v_ref->sym->try_as(); // not null for `globalF` + v_instantiationTs = v_ref->get_instantiationTs(); // present for `globalF()` + + } else if (auto v_dot = callee->try_as()) { + // `obj.someMethod()` / `obj.someMethod()` / `getF().someMethod()` / `obj.SOME_CONST()` + delta_self = 1; + dot_obj = v_dot->get_obj(); + v_instantiationTs = v_dot->get_instantiationTs(); // present for `obj.someMethod()` + infer_any_expr(dot_obj); + + // for now, Tolk doesn't have object-scoped methods, so method resolving doesn't depend on obj type + // (in other words, `globalFunction(a)` = `a.globalFunction()`) + std::string_view method_name = v_dot->get_field_name(); + const Symbol* sym = lookup_global_symbol(method_name); + if (!sym) { + v_dot->get_identifier()->error("undefined symbol `" + static_cast(method_name) + "`"); + } + fun_ref = sym->try_as(); + if (!fun_ref) { + v_dot->get_identifier()->error("calling a non-function"); + } + + } else { + // `getF()()` / `5()` + // fun_ref remains nullptr + } + + // infer argument types, looking at fun_ref's parameters as hints + for (int i = 0; i < v->get_num_args(); ++i) { + TypePtr param_type = fun_ref && i < fun_ref->get_num_params() - delta_self ? fun_ref->parameters[delta_self + i].declared_type : nullptr; + auto arg_i = v->get_arg(i); + infer_any_expr(arg_i->get_expr(), param_type && !param_type->has_genericT_inside() ? param_type : nullptr); + assign_inferred_type(arg_i, arg_i->get_expr()); + } + + // handle `local_var()` / `getF()()` / `5()` / `SOME_CONST()` / `obj.method()()()` + if (!fun_ref) { + // treat callee like a usual expression, which must have "callable" inferred type + infer_any_expr(callee); + const TypeDataFunCallable* f_callable = callee->inferred_type->try_as(); + if (!f_callable) { // `5()` / `SOME_CONST()` / `null()` + v->error("calling a non-function"); + } + // check arguments count and their types + if (v->get_num_args() != static_cast(f_callable->params_types.size())) { + v->error("expected " + std::to_string(f_callable->params_types.size()) + " arguments, got " + std::to_string(v->get_arg_list()->size())); + } + for (int i = 0; i < v->get_num_args(); ++i) { + if (!f_callable->params_types[i]->can_rhs_be_assigned(v->get_arg(i)->inferred_type)) { + v->get_arg(i)->error("can not pass " + to_string(v->get_arg(i)) + " to " + to_string(f_callable->params_types[i])); + } + } + v->mutate()->assign_fun_ref(nullptr); // no fun_ref to a global function + assign_inferred_type(v, f_callable->return_type); + return; + } + + // so, we have a call `f(args)` or `obj.f(args)`, f is a global function (fun_ref) (code / asm / builtin) + // if it's a generic function `f`, we need to instantiate it, like `f` + // same for generic methods `t.tupleAt`, need to achieve `t.tupleAt` + + if (fun_ref->is_generic_function() && v_instantiationTs) { + // if Ts are specified by a user like `f(args)` / `t.tupleAt()`, take them + std::vector substitutions = collect_fun_generic_substitutions_from_manually_specified(v->loc, fun_ref, v_instantiationTs); + fun_ref = check_and_instantiate_generic_function(v->loc, fun_ref, std::move(substitutions)); + + } else if (fun_ref->is_generic_function()) { + // if `f` called like `f(args)`, deduce T from arg types + std::vector arg_types; + arg_types.reserve(delta_self + v->get_num_args()); + if (dot_obj) { + arg_types.push_back(dot_obj->inferred_type); + } + for (int i = 0; i < v->get_num_args(); ++i) { + arg_types.push_back(v->get_arg(i)->inferred_type); + } + + td::Result> deduced = deduce_substitutionTs_on_generic_func_call(fun_ref, std::move(arg_types), hint); + if (deduced.is_error()) { + v->error(deduced.error().message().str() + " for generic function " + to_string(fun_ref)); + } + fun_ref = check_and_instantiate_generic_function(v->loc, fun_ref, deduced.move_as_ok()); + + } else if (UNLIKELY(v_instantiationTs != nullptr)) { + // non-generic function/method called with type arguments, like `c.cellHash()` / `beginCell()` + v_instantiationTs->error("calling a not generic function with generic T"); + } + + v->mutate()->assign_fun_ref(fun_ref); + // since for `t.tupleAt()`, infer_dot_access() not called for callee = "t.tupleAt", assign its target here + if (v->is_dot_call()) { + v->get_callee()->as()->mutate()->assign_target(fun_ref); + v->get_callee()->as()->mutate()->assign_inferred_type(fun_ref->inferred_full_type); + } + // check arguments count and their types + check_function_arguments(fun_ref, v->get_arg_list(), dot_obj); + // get return type either from user-specified declaration or infer here on demand traversing its body + get_or_infer_return_type(fun_ref); + TypePtr inferred_type = dot_obj && fun_ref->does_return_self() ? dot_obj->inferred_type : fun_ref->inferred_return_type; + assign_inferred_type(v, inferred_type); + assign_inferred_type(callee, fun_ref->inferred_full_type); + // note, that mutate params don't affect typing, they are handled when converting to IR + } + + void infer_tensor(V v, TypePtr hint) { + const TypeDataTensor* tensor_hint = hint ? hint->try_as() : nullptr; + std::vector types_list; + types_list.reserve(v->get_items().size()); + for (int i = 0; i < v->size(); ++i) { + AnyExprV item = v->get_item(i); + infer_any_expr(item, tensor_hint && i < tensor_hint->size() ? tensor_hint->items[i] : nullptr); + types_list.emplace_back(item->inferred_type); + } + assign_inferred_type(v, TypeDataTensor::create(std::move(types_list))); + } + + void infer_typed_tuple(V v, TypePtr hint) { + const TypeDataTypedTuple* tuple_hint = hint ? hint->try_as() : nullptr; + std::vector types_list; + types_list.reserve(v->get_items().size()); + for (int i = 0; i < v->size(); ++i) { + AnyExprV item = v->get_item(i); + infer_any_expr(item, tuple_hint && i < tuple_hint->size() ? tuple_hint->items[i] : nullptr); + types_list.emplace_back(item->inferred_type); + } + assign_inferred_type(v, TypeDataTypedTuple::create(std::move(types_list))); + } + + static void infer_null_keyword(V v) { + assign_inferred_type(v, TypeDataNullLiteral::create()); + } + + static void infer_underscore(V v, TypePtr hint) { + // if execution is here, underscore is either used as lhs of assignment, or incorrectly, like `f(_)` + // more precise is to always set unknown here, but for incorrect usages, instead of an error + // "can not pass unknown to X" would better be an error it can't be used as a value, at later steps + assign_inferred_type(v, hint ? hint : TypeDataUnknown::create()); + } + + static void infer_empty_expression(V v) { + assign_inferred_type(v, TypeDataUnknown::create()); + } + + void process_sequence(V v) { + for (AnyV item : v->get_items()) { + process_any_statement(item); + } + } + + static bool is_expr_valid_as_return_self(AnyExprV return_expr) { + // `return self` + if (return_expr->type == ast_reference && return_expr->as()->get_name() == "self") { + return true; + } + // `return self.someMethod()` + if (auto v_call = return_expr->try_as(); v_call && v_call->is_dot_call()) { + return v_call->fun_maybe && v_call->fun_maybe->does_return_self() && is_expr_valid_as_return_self(v_call->get_dot_obj()); + } + // `return cond ? ... : ...` + if (auto v_ternary = return_expr->try_as()) { + return is_expr_valid_as_return_self(v_ternary->get_when_true()) && is_expr_valid_as_return_self(v_ternary->get_when_false()); + } + return false; + } + + void process_return_statement(V v) { + if (v->has_return_value()) { + infer_any_expr(v->get_return_value(), current_function->declared_return_type); + } else { + assign_inferred_type(v->get_return_value(), TypeDataVoid::create()); + } + if (current_function->does_return_self()) { + return_unifier.unify_with(current_function->parameters[0].declared_type); + if (!is_expr_valid_as_return_self(v->get_return_value())) { + v->error("invalid return from `self` function"); + } + return; + } + + TypePtr expr_type = v->get_return_value()->inferred_type; + if (current_function->declared_return_type) { + if (!current_function->declared_return_type->can_rhs_be_assigned(expr_type)) { + v->get_return_value()->error("can not convert type " + to_string(expr_type) + " to return type " + to_string(current_function->declared_return_type)); + } + } else { + if (!return_unifier.unify_with(expr_type)) { + v->get_return_value()->error("can not unify type " + to_string(expr_type) + " with previous return type " + to_string(return_unifier.get_result())); + } + } + } + + void process_if_statement(V v) { + AnyExprV cond = v->get_cond(); + infer_any_expr(cond); + if (!expect_integer(cond) && !expect_boolean(cond)) { + cond->error("can not use " + to_string(cond) + " as a boolean condition"); + } + process_any_statement(v->get_if_body()); + process_any_statement(v->get_else_body()); + } + + void process_repeat_statement(V v) { + AnyExprV cond = v->get_cond(); + infer_any_expr(cond); + if (!expect_integer(cond)) { + cond->error("condition of `repeat` must be an integer, got " + to_string(cond)); + } + process_any_statement(v->get_body()); + } + + void process_while_statement(V v) { + AnyExprV cond = v->get_cond(); + infer_any_expr(cond); + if (!expect_integer(cond) && !expect_boolean(cond)) { + cond->error("can not use " + to_string(cond) + " as a boolean condition"); + } + process_any_statement(v->get_body()); + } + + void process_do_while_statement(V v) { + process_any_statement(v->get_body()); + AnyExprV cond = v->get_cond(); + infer_any_expr(cond); + if (!expect_integer(cond) && !expect_boolean(cond)) { + cond->error("can not use " + to_string(cond) + " as a boolean condition"); + } + } + + void process_throw_statement(V v) { + infer_any_expr(v->get_thrown_code()); + if (!expect_integer(v->get_thrown_code())) { + v->get_thrown_code()->error("excNo of `throw` must be an integer, got " + to_string(v->get_thrown_code())); + } + infer_any_expr(v->get_thrown_arg()); + if (v->has_thrown_arg() && v->get_thrown_arg()->inferred_type->calc_width_on_stack() != 1) { + v->get_thrown_arg()->error("can not throw " + to_string(v->get_thrown_arg()) + ", exception arg must occupy exactly 1 stack slot"); + } + } + + void process_assert_statement(V v) { + AnyExprV cond = v->get_cond(); + infer_any_expr(cond); + if (!expect_integer(cond) && !expect_boolean(cond)) { + cond->error("can not use " + to_string(cond) + " as a boolean condition"); + } + infer_any_expr(v->get_thrown_code()); + if (!expect_integer(v->get_thrown_code())) { + v->get_cond()->error("thrown excNo of `assert` must be an integer, got " + to_string(v->get_cond())); + } + } + + static void process_catch_variable(AnyExprV catch_var, TypePtr catch_var_type) { + if (auto v_ref = catch_var->try_as(); v_ref && v_ref->sym) { // not underscore + assign_inferred_type(v_ref->sym->as(), catch_var_type); + } + assign_inferred_type(catch_var, catch_var_type); + } + + void process_try_catch_statement(V v) { + process_any_statement(v->get_try_body()); + + // `catch` has exactly 2 variables: excNo and arg (when missing, they are implicit underscores) + // `arg` is a curious thing, it can be any TVM primitive, so assign unknown to it + // hence, using `fInt(arg)` (int from parameter is a hint) or `arg as slice` works well + // it's not truly correct, because `arg as (int,int)` also compiles, but can never happen, but let it be user responsibility + tolk_assert(v->get_catch_expr()->size() == 2); + std::vector types_list = {TypeDataInt::create(), TypeDataUnknown::create()}; + process_catch_variable(v->get_catch_expr()->get_item(0), types_list[0]); + process_catch_variable(v->get_catch_expr()->get_item(1), types_list[1]); + assign_inferred_type(v->get_catch_expr(), TypeDataTensor::create(std::move(types_list))); + + process_any_statement(v->get_catch_body()); + } + +public: + static void assign_fun_full_type(const FunctionData* fun_ref, TypePtr inferred_return_type) { + // calculate function full type `fun(params) -> ret_type` + std::vector params_types; + params_types.reserve(fun_ref->get_num_params()); + for (const LocalVarData& param : fun_ref->parameters) { + params_types.push_back(param.declared_type); + } + assign_inferred_type(fun_ref, inferred_return_type, TypeDataFunCallable::create(std::move(params_types), inferred_return_type)); + } + + void start_visiting_function(const FunctionData* fun_ref, V v_function) { + if (fun_ref->is_code_function()) { + current_function = fun_ref; + process_any_statement(v_function->get_body()); + current_function = nullptr; + + if (fun_ref->is_implicit_return()) { + bool is_ok_with_void = fun_ref->declared_return_type + ? fun_ref->declared_return_type->can_rhs_be_assigned(TypeDataVoid::create()) + : return_unifier.unify_with_implicit_return_void(); + if (!is_ok_with_void || fun_ref->does_return_self()) { + throw ParseError(v_function->get_body()->as()->loc_end, "missing return"); + } + } + } else { + // asm functions should be strictly typed, this was checked earlier + tolk_assert(fun_ref->declared_return_type); + } + + TypePtr inferred_return_type = fun_ref->declared_return_type ? fun_ref->declared_return_type : return_unifier.get_result(); + assign_fun_full_type(fun_ref, inferred_return_type); + fun_ref->mutate()->assign_is_type_inferring_done(); + } +}; + +class LaunchInferTypesAndMethodsOnce final { +public: + static bool should_visit_function(const FunctionData* fun_ref) { + // since inferring can be requested on demand, prevent second execution from a regular pipeline launcher + return !fun_ref->is_type_inferring_done() && !fun_ref->is_generic_function(); + } + + static void start_visiting_function(const FunctionData* fun_ref, V v_function) { + InferCheckTypesAndCallsAndFieldsVisitor visitor; + visitor.start_visiting_function(fun_ref, v_function); + } +}; + +// infer return type "on demand" +// example: `fun f() { return g(); } fun g() { ... }` +// when analyzing `f()`, we need to infer what fun_ref=g returns +// (if `g` is generic, it was already instantiated, so fun_ref=g is here) +static void infer_and_save_return_type_of_function(const FunctionData* fun_ref) { + static std::vector called_stack; + + tolk_assert(!fun_ref->is_generic_function() && !fun_ref->is_type_inferring_done()); + // if `g` has return type declared, like `fun g(): int { ... }`, don't traverse its body + if (fun_ref->declared_return_type) { + InferCheckTypesAndCallsAndFieldsVisitor::assign_fun_full_type(fun_ref, fun_ref->declared_return_type); + return; + } + + // prevent recursion of untyped functions, like `fun f() { return g(); } fun g() { return f(); }` + bool contains = std::find(called_stack.begin(), called_stack.end(), fun_ref) != called_stack.end(); + if (contains) { + fun_ref->ast_root->error("could not infer return type of " + to_string(fun_ref) + ", because it appears in a recursive call chain; specify `: ` manually"); + } + + // dig into g's body; it's safe, since the compiler is single-threaded + // on finish, fun_ref->inferred_return_type is filled, and won't be called anymore + called_stack.push_back(fun_ref); + InferCheckTypesAndCallsAndFieldsVisitor visitor; + visitor.start_visiting_function(fun_ref, fun_ref->ast_root->as()); + called_stack.pop_back(); +} + +void pipeline_infer_types_and_calls_and_fields() { + visit_ast_of_all_functions(); +} + +void pipeline_infer_types_and_calls_and_fields(const FunctionData* fun_ref) { + InferCheckTypesAndCallsAndFieldsVisitor visitor; + visitor.start_visiting_function(fun_ref, fun_ref->ast_root->as()); +} + +} // namespace tolk diff --git a/tolk/pipe-optimize-boolean-expr.cpp b/tolk/pipe-optimize-boolean-expr.cpp new file mode 100644 index 000000000..037502566 --- /dev/null +++ b/tolk/pipe-optimize-boolean-expr.cpp @@ -0,0 +1,172 @@ +/* + This file is part of TON Blockchain source code. + + TON Blockchain is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + TON Blockchain is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with TON Blockchain. If not, see . +*/ +#include "tolk.h" +#include "ast.h" +#include "ast-replacer.h" +#include "type-system.h" + +/* + * This pipe does some optimizations related to booleans. + * It happens after type inferring, when we know types of all expressions. + * + * Example: `boolVar == true` -> `boolVar`. + * Example: `!!boolVar` -> `boolVar`. + * Also in unwraps parenthesis inside if condition and similar: `assert(((x)), 404)` -> `assert(x, 404)` + * + * todo some day, replace && || with & | when it's safe (currently, && always produces IFs in Fift) + * It's tricky to implement whether replacing is safe. + * For example, safe: `a > 0 && a < 10` / `a != 3 && a != 5` + * For example, unsafe: `cached && calc()` / `a > 0 && log(a)` / `b != 0 && a / b > 1` / `i >= 0 && arr[idx]` / `f != null && close(f)` + */ + +namespace tolk { + +static AnyExprV unwrap_parenthesis(AnyExprV v) { + while (v->type == ast_parenthesized_expression) { + v = v->as()->get_expr(); + } + return v; +} + +struct OptimizerBooleanExpressionsReplacer final : ASTReplacerInFunctionBody { + static V create_int_const(SrcLocation loc, td::RefInt256&& intval) { + auto v_int = createV(loc, std::move(intval), {}); + v_int->assign_inferred_type(TypeDataInt::create()); + v_int->assign_rvalue_true(); + return v_int; + } + + static V create_bool_const(SrcLocation loc, bool bool_val) { + auto v_bool = createV(loc, bool_val); + v_bool->assign_inferred_type(TypeDataInt::create()); + v_bool->assign_rvalue_true(); + return v_bool; + } + + static V create_logical_not_for_bool(SrcLocation loc, AnyExprV rhs) { + auto v_not = createV(loc, "!", tok_logical_not, rhs); + v_not->assign_inferred_type(TypeDataBool::create()); + v_not->assign_rvalue_true(); + v_not->assign_fun_ref(lookup_global_symbol("!b_")->as()); + return v_not; + } + +protected: + + AnyExprV replace(V v) override { + parent::replace(v); + + if (v->tok == tok_logical_not) { + if (auto inner_not = v->get_rhs()->try_as(); inner_not && inner_not->tok == tok_logical_not) { + AnyExprV cond_not_not = inner_not->get_rhs(); + // `!!boolVar` => `boolVar` + if (cond_not_not->inferred_type == TypeDataBool::create()) { + return cond_not_not; + } + // `!!intVar` => `intVar != 0` + if (cond_not_not->inferred_type == TypeDataInt::create()) { + auto v_zero = create_int_const(v->loc, td::make_refint(0)); + auto v_neq = createV(v->loc, "!=", tok_neq, cond_not_not, v_zero); + v_neq->mutate()->assign_rvalue_true(); + v_neq->mutate()->assign_inferred_type(TypeDataBool::create()); + v_neq->mutate()->assign_fun_ref(lookup_global_symbol("_!=_")->as()); + return v_neq; + } + } + if (auto inner_bool = v->get_rhs()->try_as()) { + // `!true` / `!false` + return create_bool_const(v->loc, !inner_bool->bool_val); + } + } + + return v; + } + + AnyExprV replace(V v) override { + parent::replace(v); + + if (v->tok == tok_eq || v->tok == tok_neq) { + AnyExprV lhs = v->get_lhs(); + AnyExprV rhs = v->get_rhs(); + if (lhs->inferred_type == TypeDataBool::create() && rhs->type == ast_bool_const) { + // `boolVar == true` / `boolVar != false` + if (rhs->as()->bool_val ^ (v->tok == tok_neq)) { + return lhs; + } + // `boolVar != true` / `boolVar == false` + return create_logical_not_for_bool(v->loc, lhs); + } + } + + return v; + } + + AnyV replace(V v) override { + parent::replace(v); + if (v->get_cond()->type == ast_parenthesized_expression) { + v = createV(v->loc, v->is_ifnot, unwrap_parenthesis(v->get_cond()), v->get_if_body(), v->get_else_body()); + } + + // `if (!x)` -> ifnot(x) + while (auto v_cond_unary = v->get_cond()->try_as()) { + if (v_cond_unary->tok != tok_logical_not) { + break; + } + v = createV(v->loc, !v->is_ifnot, v_cond_unary->get_rhs(), v->get_if_body(), v->get_else_body()); + } + + return v; + } + + AnyV replace(V v) override { + parent::replace(v); + + if (v->get_cond()->type == ast_parenthesized_expression) { + v = createV(v->loc, unwrap_parenthesis(v->get_cond()), v->get_body()); + } + return v; + } + + AnyV replace(V v) override { + parent::replace(v); + + if (v->get_cond()->type == ast_parenthesized_expression) { + v = createV(v->loc, v->get_body(), unwrap_parenthesis(v->get_cond())); + } + return v; + } + + AnyV replace(V v) override { + parent::replace(v); + + if (v->get_cond()->type == ast_parenthesized_expression) { + v = createV(v->loc, unwrap_parenthesis(v->get_cond()), v->get_thrown_code()); + } + return v; + } + +public: + bool should_visit_function(const FunctionData* fun_ref) override { + return fun_ref->is_code_function() && !fun_ref->is_generic_function(); + } +}; + +void pipeline_optimize_boolean_expressions() { + replace_ast_of_all_functions(); +} + +} // namespace tolk diff --git a/tolk/pipe-refine-lvalue-for-mutate.cpp b/tolk/pipe-refine-lvalue-for-mutate.cpp new file mode 100644 index 000000000..45dd3a94f --- /dev/null +++ b/tolk/pipe-refine-lvalue-for-mutate.cpp @@ -0,0 +1,126 @@ +/* + This file is part of TON Blockchain source code. + + TON Blockchain is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + TON Blockchain is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with TON Blockchain. If not, see . +*/ +#include "tolk.h" +#include "ast.h" +#include "ast-visitor.h" + +/* + * This pipe refines rvalue/lvalue and checks `mutate` arguments validity. + * It happens after type inferring (after methods binding), because it uses fun_ref of calls. + * + * Example: `a.increment().increment()`, the first `a.increment()` becomes lvalue (assume that increment mutates self). + * Example: `increment(a)` is invalid, should be `increment(mutate a)`. + * + * Note, that explicitly specifying `mutate` for arguments, like `increment(mutate a)` is on purpose. + * If we wished `increment(a)` to be valid (to work and mutate `a`, like passing by ref), it would also be done here, + * refining `a` to be lvalue. But to avoid unexpected mutations, `mutate` keyword for an argument is required. + * So, for mutated arguments, instead of setting lvalue, we check its presence. + */ + +namespace tolk { + +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_invalid_mutate_arg_passed(AnyExprV v, const FunctionData* fun_ref, const LocalVarData& p_sym, bool called_as_method, bool arg_passed_as_mutate, AnyV arg_expr) { + std::string arg_str(arg_expr->type == ast_reference ? arg_expr->as()->get_name() : "obj"); + + // case: `loadInt(cs, 32)`; suggest: `cs.loadInt(32)` + if (p_sym.is_mutate_parameter() && !arg_passed_as_mutate && !called_as_method && p_sym.idx == 0 && fun_ref->does_accept_self()) { + v->error("`" + fun_ref->name + "` is a mutating method; consider calling `" + arg_str + "." + fun_ref->name + "()`, not `" + fun_ref->name + "(" + arg_str + ")`"); + } + // case: `cs.mutating_function()`; suggest: `mutating_function(mutate cs)` or make it a method + if (p_sym.is_mutate_parameter() && called_as_method && p_sym.idx == 0 && !fun_ref->does_accept_self()) { + v->error("function `" + fun_ref->name + "` mutates parameter `" + p_sym.name + "`; consider calling `" + fun_ref->name + "(mutate " + arg_str + ")`, not `" + arg_str + "." + fun_ref->name + "`(); alternatively, rename parameter to `self` to make it a method"); + } + // case: `mutating_function(arg)`; suggest: `mutate arg` + if (p_sym.is_mutate_parameter() && !arg_passed_as_mutate) { + v->error("function `" + fun_ref->name + "` mutates parameter `" + p_sym.name + "`; you need to specify `mutate` when passing an argument, like `mutate " + arg_str + "`"); + } + // case: `usual_function(mutate arg)` + if (!p_sym.is_mutate_parameter() && arg_passed_as_mutate) { + v->error("incorrect `mutate`, since `" + fun_ref->name + "` does not mutate this parameter"); + } + throw Fatal("unreachable"); +} + + +class RefineLvalueForMutateArgumentsVisitor final : public ASTVisitorFunctionBody { + void visit(V v) override { + // v is `globalF(args)` / `globalF(args)` / `obj.method(args)` / `local_var(args)` / `getF()(args)` + const FunctionData* fun_ref = v->fun_maybe; + if (!fun_ref) { + parent::visit(v); + for (int i = 0; i < v->get_num_args(); ++i) { + auto v_arg = v->get_arg(i); + if (v_arg->passed_as_mutate) { + v_arg->error("`mutate` used for non-mutate argument"); + } + } + return; + } + + int delta_self = v->is_dot_call(); + tolk_assert(fun_ref->get_num_params() == delta_self + v->get_num_args()); + + if (v->is_dot_call()) { + if (fun_ref->does_mutate_self()) { + // for `b.storeInt()`, `b` should become lvalue, since `storeInt` is a method mutating self + // but: `beginCell().storeInt()`, then `beginCell()` is not lvalue + // (it will be extracted as tmp var when transforming AST to IR) + AnyExprV leftmost_obj = v->get_dot_obj(); + while (true) { + if (auto as_par = leftmost_obj->try_as()) { + leftmost_obj = as_par->get_expr(); + } else if (auto as_cast = leftmost_obj->try_as()) { + leftmost_obj = as_cast->get_expr(); + } else { + break; + } + } + bool will_be_extracted_as_tmp_var = leftmost_obj->type == ast_function_call; + if (!will_be_extracted_as_tmp_var) { + leftmost_obj->mutate()->assign_lvalue_true(); + v->get_dot_obj()->mutate()->assign_lvalue_true(); + } + } + + if (!fun_ref->does_accept_self() && fun_ref->parameters[0].is_mutate_parameter()) { + fire_error_invalid_mutate_arg_passed(v, fun_ref, fun_ref->parameters[0], true, false, v->get_dot_obj()); + } + } + + for (int i = 0; i < v->get_num_args(); ++i) { + const LocalVarData& p_sym = fun_ref->parameters[delta_self + i]; + auto arg_i = v->get_arg(i); + if (p_sym.is_mutate_parameter() != arg_i->passed_as_mutate) { + fire_error_invalid_mutate_arg_passed(arg_i, fun_ref, p_sym, false, arg_i->passed_as_mutate, arg_i->get_expr()); + } + parent::visit(arg_i); + } + parent::visit(v->get_callee()); + } + +public: + bool should_visit_function(const FunctionData* fun_ref) override { + return fun_ref->is_code_function() && !fun_ref->is_generic_function(); + } +}; + +void pipeline_refine_lvalue_for_mutate_arguments() { + visit_ast_of_all_functions(); +} + +} // namespace tolk diff --git a/tolk/pipe-register-symbols.cpp b/tolk/pipe-register-symbols.cpp index 569d434aa..2dae0d233 100644 --- a/tolk/pipe-register-symbols.cpp +++ b/tolk/pipe-register-symbols.cpp @@ -13,340 +13,214 @@ You should have received a copy of the GNU General Public License along with TON Blockchain. If not, see . - - In addition, as a special exception, the copyright holders give permission - to link the code of portions of this program with the OpenSSL library. - You must obey the GNU General Public License in all respects for all - of the code used other than OpenSSL. If you modify file(s) with this - exception, you may extend this exception to your version of the file(s), - but you are not obligated to do so. If you do not wish to do so, delete this - exception statement from your version. If you delete this exception statement - from all source files in the program, then also delete it here. */ #include "tolk.h" #include "platform-utils.h" #include "src-file.h" #include "ast.h" #include "compiler-state.h" +#include "constant-evaluator.h" +#include "generics-helpers.h" #include "td/utils/crypto.h" +#include "type-system.h" #include -namespace tolk { - -Expr* process_expr(AnyV v, CodeBlob& code); - -GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD -static void fire_error_redefinition_of_symbol(V v_ident, SymDef* existing) { - if (existing->loc.is_stdlib()) { - v_ident->error("redefinition of a symbol from stdlib"); - } else if (existing->loc.is_defined()) { - v_ident->error("redefinition of symbol, previous was at: " + existing->loc.to_string()); - } else { - v_ident->error("redefinition of built-in symbol"); - } -} +/* + * This pipe registers global symbols: functions, constants, global vars, etc. + * It happens just after all files have been parsed to AST. + * + * "Registering" means adding symbols to a global symbol table. + * After this pass, any global symbol can be looked up. + * Note, that local variables are not analyzed here, it's a later step. + * Before digging into locals, we need a global symtable to be filled, exactly done here. + */ -static int calc_sym_idx(std::string_view sym_name) { - return G.symbols.lookup_add(sym_name); -} +namespace tolk { -static td::RefInt256 calculate_method_id_for_entrypoint(std::string_view func_name) { +static int calculate_method_id_for_entrypoint(std::string_view func_name) { if (func_name == "main" || func_name == "onInternalMessage") { - return td::make_refint(0); + return 0; } if (func_name == "onExternalMessage") { - return td::make_refint(-1); + return -1; } if (func_name == "onRunTickTock") { - return td::make_refint(-2); + return -2; } if (func_name == "onSplitPrepare") { - return td::make_refint(-3); + return -3; } if (func_name == "onSplitInstall") { - return td::make_refint(-4); + return -4; } tolk_assert(false); } -static td::RefInt256 calculate_method_id_by_func_name(std::string_view func_name) { +static int calculate_method_id_by_func_name(std::string_view func_name) { unsigned int crc = td::crc16(static_cast(func_name)); - return td::make_refint((crc & 0xffff) | 0x10000); + return static_cast(crc & 0xffff) | 0x10000; } -static void calc_arg_ret_order_of_asm_function(V v_body, V param_list, TypeExpr* ret_type, - std::vector& arg_order, std::vector& ret_order) { - int cnt = param_list->size(); - int width = ret_type->get_width(); - if (width < 0 || width > 16) { - v_body->error("return type of an assembler built-in function must have a well-defined fixed width"); +static void validate_arg_ret_order_of_asm_function(V v_body, int n_params, TypePtr ret_type) { + if (!ret_type) { + v_body->error("asm function must declare return type (before asm instructions)"); } - if (cnt > 16) { - v_body->error("assembler built-in function must have at most 16 arguments"); - } - std::vector cum_arg_width; - cum_arg_width.push_back(0); - int tot_width = 0; - for (int i = 0; i < cnt; ++i) { - V v_param = param_list->get_param(i); - int arg_width = v_param->param_type->get_width(); - if (arg_width < 0 || arg_width > 16) { - v_param->error("parameters of an assembler built-in function must have a well-defined fixed width"); - } - cum_arg_width.push_back(tot_width += arg_width); + if (n_params > 16) { + v_body->error("asm function can have at most 16 parameters"); } + + // asm(param1 ... paramN), param names were previously mapped into indices if (!v_body->arg_order.empty()) { - if (static_cast(v_body->arg_order.size()) != cnt) { + if (static_cast(v_body->arg_order.size()) != n_params) { v_body->error("arg_order of asm function must specify all parameters"); } - std::vector visited(cnt, false); - for (int i = 0; i < cnt; ++i) { - int j = v_body->arg_order[i]; + std::vector visited(v_body->arg_order.size(), false); + for (int j : v_body->arg_order) { if (visited[j]) { v_body->error("arg_order of asm function contains duplicates"); } visited[j] = true; - int c1 = cum_arg_width[j], c2 = cum_arg_width[j + 1]; - while (c1 < c2) { - arg_order.push_back(c1++); - } } - tolk_assert(arg_order.size() == (unsigned)tot_width); } + + // asm(-> 0 2 1 3), check for a shuffled range 0...N + // correctness of N (actual return width onto a stack) will be checked after type inferring and generics instantiation if (!v_body->ret_order.empty()) { - if (static_cast(v_body->ret_order.size()) != width) { - v_body->error("ret_order of this asm function expected to be width = " + std::to_string(width)); - } - std::vector visited(width, false); - for (int i = 0; i < width; ++i) { - int j = v_body->ret_order[i]; - if (j < 0 || j >= width || visited[j]) { - v_body->error("ret_order contains invalid integer, not in range 0 .. width-1"); + std::vector visited(v_body->ret_order.size(), false); + for (int j : v_body->ret_order) { + if (j < 0 || j >= static_cast(v_body->ret_order.size()) || visited[j]) { + v_body->error("ret_order contains invalid integer, not in range 0 .. N"); } visited[j] = true; } - ret_order = v_body->ret_order; } } -static void register_constant(V v) { - AnyV init_value = v->get_init_value(); - SymDef* sym_def = define_global_symbol(calc_sym_idx(v->get_identifier()->name), v->loc); - if (sym_def->value) { - fire_error_redefinition_of_symbol(v->get_identifier(), sym_def); - } +static const GenericsDeclaration* construct_genericTs(V v_list) { + std::vector itemsT; + itemsT.reserve(v_list->size()); - // todo currently, constant value calculation is dirty and roughly: init_value is evaluated to fif code - // and waited to be a single expression - // although it works, of course it should be later rewritten using AST calculations, as well as lots of other parts - CodeBlob code("tmp", v->loc, nullptr, nullptr); - Expr* x = process_expr(init_value, code); - if (!x->is_rvalue()) { - v->get_init_value()->error("expression is not strictly Rvalue"); - } - if (v->declared_type && !v->declared_type->equals_to(x->e_type)) { - v->error("expression type does not match declared type"); - } - SymValConst* sym_val = nullptr; - if (x->cls == Expr::_Const) { // Integer constant - sym_val = new SymValConst(static_cast(G.all_constants.size()), x->intval); - } else if (x->cls == Expr::_SliceConst) { // Slice constant (string) - sym_val = new SymValConst(static_cast(G.all_constants.size()), x->strval); - } else if (x->cls == Expr::_Apply) { // even "1 + 2" is Expr::_Apply (it applies `_+_`) - code.emplace_back(v->loc, Op::_Import, std::vector()); - auto tmp_vars = x->pre_compile(code); - code.emplace_back(v->loc, Op::_Return, std::move(tmp_vars)); - code.emplace_back(v->loc, Op::_Nop); - // It is REQUIRED to execute "optimizations" as in tolk.cpp - code.simplify_var_types(); - code.prune_unreachable_code(); - code.split_vars(true); - for (int i = 0; i < 16; i++) { - code.compute_used_code_vars(); - code.fwd_analyze(); - code.prune_unreachable_code(); - } - code.mark_noreturn(); - AsmOpList out_list(0, &code.vars); - code.generate_code(out_list); - if (out_list.list_.size() != 1) { - init_value->error("precompiled expression must result in single operation"); + for (int i = 0; i < v_list->size(); ++i) { + auto v_item = v_list->get_item(i); + auto it_existing = std::find_if(itemsT.begin(), itemsT.end(), [v_item](const GenericsDeclaration::GenericsItem& prev) { + return prev.nameT == v_item->nameT; + }); + if (it_existing != itemsT.end()) { + v_item->error("duplicate generic parameter `" + static_cast(v_item->nameT) + "`"); } - auto op = out_list.list_[0]; - if (!op.is_const()) { - init_value->error("precompiled expression must result in compilation time constant"); - } - if (op.origin.is_null() || !op.origin->is_valid()) { - init_value->error("precompiled expression did not result in a valid integer constant"); + itemsT.emplace_back(v_item->nameT); + } + + return new GenericsDeclaration(std::move(itemsT)); +} + +static void register_constant(V v) { + ConstantValue init_value = eval_const_init_value(v->get_init_value()); + GlobalConstData* c_sym = new GlobalConstData(static_cast(v->get_identifier()->name), v->loc, v->declared_type, std::move(init_value)); + + if (v->declared_type) { + bool ok = (c_sym->is_int_const() && (v->declared_type == TypeDataInt::create())) + || (c_sym->is_slice_const() && (v->declared_type == TypeDataSlice::create())); + if (!ok) { + v->error("expression type does not match declared type"); } - sym_val = new SymValConst(static_cast(G.all_constants.size()), op.origin); - } else { - init_value->error("integer or slice literal or constant expected"); } - sym_def->value = sym_val; -#ifdef TOLK_DEBUG - sym_def->value->sym_name = v->get_identifier()->name; -#endif - G.all_constants.push_back(sym_def); + G.symtable.add_global_const(c_sym); + G.all_constants.push_back(c_sym); + v->mutate()->assign_const_ref(c_sym); } static void register_global_var(V v) { - SymDef* sym_def = define_global_symbol(calc_sym_idx(v->get_identifier()->name), v->loc); - if (sym_def->value) { - fire_error_redefinition_of_symbol(v->get_identifier(), sym_def); - } + GlobalVarData* g_sym = new GlobalVarData(static_cast(v->get_identifier()->name), v->loc, v->declared_type); - sym_def->value = new SymValGlobVar(static_cast(G.all_global_vars.size()), v->declared_type); -#ifdef TOLK_DEBUG - sym_def->value->sym_name = v->get_identifier()->name; -#endif - G.all_global_vars.push_back(sym_def); + G.symtable.add_global_var(g_sym); + G.all_global_vars.push_back(g_sym); + v->mutate()->assign_var_ref(g_sym); } -static SymDef* register_parameter(V v, int idx) { +static LocalVarData register_parameter(V v, int idx) { if (v->is_underscore()) { - return nullptr; - } - SymDef* sym_def = define_parameter(calc_sym_idx(v->get_identifier()->name), v->loc); - if (sym_def->value) { - // todo always false now, how to detect similar parameter names? (remember about underscore) - v->error("redefined parameter"); + return {"", v->loc, v->declared_type, 0, idx}; } - SymValVariable* sym_val = new SymValVariable(idx, v->param_type); + int flags = 0; if (v->declared_as_mutate) { - sym_val->flags |= SymValVariable::flagMutateParameter; + flags |= LocalVarData::flagMutateParameter; } - if (!v->declared_as_mutate && idx == 0 && v->get_identifier()->name == "self") { - sym_val->flags |= SymValVariable::flagImmutable; + if (!v->declared_as_mutate && idx == 0 && v->param_name == "self") { + flags |= LocalVarData::flagImmutable; } - sym_def->value = sym_val; -#ifdef TOLK_DEBUG - sym_def->value->sym_name = v->get_identifier()->name; -#endif - return sym_def; + return LocalVarData(static_cast(v->param_name), v->loc, v->declared_type, flags, idx); } static void register_function(V v) { std::string_view func_name = v->get_identifier()->name; - // calculate TypeExpr of a function: it's a map (params -> ret), probably surrounded by forall - TypeExpr* params_tensor_type = nullptr; + // calculate TypeData of a function + std::vector arg_types; + std::vector parameters; int n_params = v->get_num_params(); int n_mutate_params = 0; - std::vector parameters_syms; - if (n_params) { - std::vector param_tensor_items; - param_tensor_items.reserve(n_params); - parameters_syms.reserve(n_params); - for (int i = 0; i < n_params; ++i) { - auto v_param = v->get_param(i); - n_mutate_params += static_cast(v_param->declared_as_mutate); - param_tensor_items.emplace_back(v_param->param_type); - parameters_syms.emplace_back(register_parameter(v_param, i)); - } - params_tensor_type = TypeExpr::new_tensor(std::move(param_tensor_items)); - } else { - params_tensor_type = TypeExpr::new_unit(); + arg_types.reserve(n_params); + parameters.reserve(n_params); + for (int i = 0; i < n_params; ++i) { + auto v_param = v->get_param(i); + arg_types.emplace_back(v_param->declared_type); + parameters.emplace_back(register_parameter(v_param, i)); + n_mutate_params += static_cast(v_param->declared_as_mutate); } - TypeExpr* function_type = TypeExpr::new_map(params_tensor_type, v->ret_type); + const GenericsDeclaration* genericTs = nullptr; if (v->genericsT_list) { - std::vector type_vars; - type_vars.reserve(v->genericsT_list->size()); - for (int idx = 0; idx < v->genericsT_list->size(); ++idx) { - type_vars.emplace_back(v->genericsT_list->get_item(idx)->created_type); - } - function_type = TypeExpr::new_forall(std::move(type_vars), function_type); + genericTs = construct_genericTs(v->genericsT_list); } - if (v->marked_as_builtin) { - const SymDef* builtin_func = lookup_symbol(G.symbols.lookup(func_name)); - const SymValFunc* func_val = builtin_func ? dynamic_cast(builtin_func->value) : nullptr; - if (!func_val || !func_val->is_builtin()) { + if (v->is_builtin_function()) { + const Symbol* builtin_func = lookup_global_symbol(func_name); + const FunctionData* fun_ref = builtin_func ? builtin_func->as() : nullptr; + if (!fun_ref || !fun_ref->is_builtin_function()) { v->error("`builtin` used for non-builtin function"); } -#ifdef TOLK_DEBUG - // in release, we don't need this check, since `builtin` is used only in stdlib, which is our responsibility - if (!func_val->sym_type->equals_to(function_type) || func_val->is_marked_as_pure() != v->marked_as_pure) { - v->error("declaration for `builtin` function doesn't match an actual one"); - } -#endif + v->mutate()->assign_fun_ref(fun_ref); return; } - SymDef* sym_def = define_global_symbol(calc_sym_idx(func_name), v->loc); - if (sym_def->value) { - fire_error_redefinition_of_symbol(v->get_identifier(), sym_def); - } - if (G.is_verbosity(1)) { - std::cerr << "fun " << func_name << " : " << function_type << std::endl; - } - if (v->marked_as_pure && v->ret_type->get_width() == 0) { - v->error("a pure function should return something, otherwise it will be optimized out anyway"); + if (G.is_verbosity(1) && v->is_code_function()) { + std::cerr << "fun " << func_name << " : " << v->declared_return_type << std::endl; } - SymValFunc* sym_val = nullptr; - if (const auto* v_seq = v->get_body()->try_as()) { - sym_val = new SymValCodeFunc(std::move(parameters_syms), static_cast(G.all_code_functions.size()), function_type); - } else if (const auto* v_asm = v->get_body()->try_as()) { - std::vector arg_order, ret_order; - calc_arg_ret_order_of_asm_function(v_asm, v->get_param_list(), v->ret_type, arg_order, ret_order); - sym_val = new SymValAsmFunc(std::move(parameters_syms), function_type, std::move(arg_order), std::move(ret_order), 0); - } else { - v->error("Unexpected function body statement"); + FunctionBody f_body = v->get_body()->type == ast_sequence ? static_cast(new FunctionBodyCode) : static_cast(new FunctionBodyAsm); + FunctionData* f_sym = new FunctionData(static_cast(func_name), v->loc, v->declared_return_type, std::move(parameters), 0, genericTs, nullptr, f_body, v); + + if (const auto* v_asm = v->get_body()->try_as()) { + validate_arg_ret_order_of_asm_function(v_asm, v->get_num_params(), v->declared_return_type); + f_sym->arg_order = v_asm->arg_order; + f_sym->ret_order = v_asm->ret_order; } - if (v->method_id) { - sym_val->method_id = td::string_to_int256(static_cast(v->method_id->int_val)); - if (sym_val->method_id.is_null()) { - v->method_id->error("invalid integer constant"); - } - } else if (v->marked_as_get_method) { - sym_val->method_id = calculate_method_id_by_func_name(func_name); - for (const SymDef* other : G.all_get_methods) { - if (!td::cmp(dynamic_cast(other->value)->method_id, sym_val->method_id)) { - v->error(PSTRING() << "GET methods hash collision: `" << other->name() << "` and `" << static_cast(func_name) << "` produce the same hash. Consider renaming one of these functions."); + if (v->method_id.not_null()) { + f_sym->method_id = static_cast(v->method_id->to_long()); + } else if (v->flags & FunctionData::flagGetMethod) { + f_sym->method_id = calculate_method_id_by_func_name(func_name); + for (const FunctionData* other : G.all_get_methods) { + if (other->method_id == f_sym->method_id) { + v->error(PSTRING() << "GET methods hash collision: `" << other->name << "` and `" << f_sym->name << "` produce the same hash. Consider renaming one of these functions."); } } - } else if (v->is_entrypoint) { - sym_val->method_id = calculate_method_id_for_entrypoint(func_name); - } - if (v->marked_as_pure) { - sym_val->flags |= SymValFunc::flagMarkedAsPure; - } - if (v->marked_as_inline) { - sym_val->flags |= SymValFunc::flagInline; - } - if (v->marked_as_inline_ref) { - sym_val->flags |= SymValFunc::flagInlineRef; - } - if (v->marked_as_get_method) { - sym_val->flags |= SymValFunc::flagGetMethod; - } - if (v->is_entrypoint) { - sym_val->flags |= SymValFunc::flagIsEntrypoint; + } else if (v->flags & FunctionData::flagIsEntrypoint) { + f_sym->method_id = calculate_method_id_for_entrypoint(func_name); } + f_sym->flags |= v->flags; if (n_mutate_params) { - sym_val->flags |= SymValFunc::flagHasMutateParams; - } - if (v->accepts_self) { - sym_val->flags |= SymValFunc::flagAcceptsSelf; - } - if (v->returns_self) { - sym_val->flags |= SymValFunc::flagReturnsSelf; + f_sym->flags |= FunctionData::flagHasMutateParams; } - sym_def->value = sym_val; -#ifdef TOLK_DEBUG - sym_def->value->sym_name = func_name; -#endif - if (dynamic_cast(sym_val)) { - G.all_code_functions.push_back(sym_def); - } - if (sym_val->is_get_method()) { - G.all_get_methods.push_back(sym_def); + G.symtable.add_function(f_sym); + G.all_functions.push_back(f_sym); + if (f_sym->is_get_method()) { + G.all_get_methods.push_back(f_sym); } + v->mutate()->assign_fun_ref(f_sym); } static void iterate_through_file_symbols(const SrcFile* file) { @@ -358,10 +232,10 @@ static void iterate_through_file_symbols(const SrcFile* file) { for (AnyV v : file->ast->as()->get_toplevel_declarations()) { switch (v->type) { - case ast_import_statement: + case ast_import_directive: // on `import "another-file.tolk"`, register symbols from that file at first // (for instance, it can calculate constants, which are used in init_val of constants in current file below import) - iterate_through_file_symbols(v->as()->file); + iterate_through_file_symbols(v->as()->file); break; case ast_constant_declaration: @@ -379,8 +253,8 @@ static void iterate_through_file_symbols(const SrcFile* file) { } } -void pipeline_register_global_symbols(const AllSrcFiles& all_src_files) { - for (const SrcFile* file : all_src_files) { +void pipeline_register_global_symbols() { + for (const SrcFile* file : G.all_src_files) { iterate_through_file_symbols(file); } } diff --git a/tolk/pipe-resolve-identifiers.cpp b/tolk/pipe-resolve-identifiers.cpp new file mode 100644 index 000000000..03b23c3c1 --- /dev/null +++ b/tolk/pipe-resolve-identifiers.cpp @@ -0,0 +1,347 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "tolk.h" +#include "platform-utils.h" +#include "compiler-state.h" +#include "src-file.h" +#include "generics-helpers.h" +#include "ast.h" +#include "ast-visitor.h" +#include "type-system.h" +#include + +/* + * This pipe resolves identifiers (local variables and types) in all functions bodies. + * It happens before type inferring, but after all global symbols are registered. + * It means, that for any symbol `x` we can look up whether it's a global name or not. + * + * About resolving variables. + * Example: `var x = 10; x = 20;` both `x` point to one LocalVarData. + * Example: `x = 20` undefined symbol `x` is also here (unless it's a global) + * Variables scoping and redeclaration are also here. + * Note, that `x` is stored as `ast_reference (ast_identifier "x")`. More formally, "references" are resolved. + * "Reference" in AST, besides the identifier, stores optional generics instantiation. `x` is grammar-valid. + * + * About resolving types. At the moment of parsing, `int`, `cell` and other predefined are parsed as TypeDataInt, etc. + * All the others are stored as TypeDataUnresolved, to be resolved here, after global symtable is filled. + * Example: `var x: T = 0` unresolved "T" is replaced by TypeDataGenericT inside `f`. + * Example: `f()` unresolved "MyAlias" is replaced by TypeDataAlias inside the reference. + * Example: `fun f(): KKK` unresolved "KKK" fires an error "unknown type name". + * When structures and type aliases are implemented, their resolving will also be done here. + * See finalize_type_data(). + * + * Note, that functions/methods binding is NOT here. + * In other words, for ast_function_call `beginCell()` and `t.tupleAt(0)`, their fun_ref is NOT filled here. + * Functions/methods binding is done later, simultaneously with type inferring and generics instantiation. + * For instance, to call a generic function `t.tuplePush(1)`, we need types of `t` and `1` to be inferred, + * as well as `tuplePush` to be instantiated, and fun_ref to point at that exact instantiations. + * + * As a result of this step, + * * every V::sym is filled, pointing either to a local var/parameter, or to a global symbol + * (exceptional for function calls and methods, their references are bound later) + * * all TypeData in all symbols is ready for analyzing, TypeDataUnresolved won't occur later in pipeline + */ + +namespace tolk { + +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_undefined_symbol(V v) { + if (v->name == "self") { + v->error("using `self` in a non-member function (it does not accept the first `self` parameter)"); + } else { + v->error("undefined symbol `" + static_cast(v->name) + "`"); + } +} + +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_unknown_type_name(SrcLocation loc, const std::string &text) { + throw ParseError(loc, "unknown type name `" + text + "`"); +} + +static void check_import_exists_when_using_sym(AnyV v_usage, const Symbol* used_sym) { + SrcLocation sym_loc = used_sym->loc; + if (!v_usage->loc.is_symbol_from_same_or_builtin_file(sym_loc)) { + const SrcFile* declared_in = sym_loc.get_src_file(); + bool has_import = false; + for (const SrcFile::ImportDirective& import : v_usage->loc.get_src_file()->imports) { + if (import.imported_file == declared_in) { + has_import = true; + } + } + if (!has_import) { + v_usage->error("Using a non-imported symbol `" + used_sym->name + "`. Forgot to import \"" + declared_in->rel_filename + "\"?"); + } + } +} + +struct NameAndScopeResolver { + std::vector> scopes; + + static uint64_t key_hash(std::string_view name_key) { + return std::hash{}(name_key); + } + + void open_scope([[maybe_unused]] SrcLocation loc) { + // std::cerr << "open_scope " << scopes.size() + 1 << " at " << loc << std::endl; + scopes.emplace_back(); + } + + void close_scope([[maybe_unused]] SrcLocation loc) { + // std::cerr << "close_scope " << scopes.size() << " at " << loc << std::endl; + if (UNLIKELY(scopes.empty())) { + throw Fatal{"cannot close the outer scope"}; + } + scopes.pop_back(); + } + + const Symbol* lookup_symbol(std::string_view name) const { + uint64_t key = key_hash(name); + for (auto it = scopes.rbegin(); it != scopes.rend(); ++it) { // NOLINT(*-loop-convert) + const auto& scope = *it; + if (auto it_sym = scope.find(key); it_sym != scope.end()) { + return it_sym->second; + } + } + return G.symtable.lookup(name); + } + + void add_local_var(const LocalVarData* v_sym) { + if (UNLIKELY(scopes.empty())) { + throw Fatal("unexpected scope_level = 0"); + } + if (v_sym->name.empty()) { // underscore + return; + } + + uint64_t key = key_hash(v_sym->name); + const auto& [_, inserted] = scopes.rbegin()->emplace(key, v_sym); + if (UNLIKELY(!inserted)) { + throw ParseError(v_sym->loc, "redeclaration of local variable `" + v_sym->name + "`"); + } + } +}; + +struct TypeDataResolver { + GNU_ATTRIBUTE_NOINLINE + static TypePtr resolve_identifiers_in_type_data(TypePtr type_data, const GenericsDeclaration* genericTs) { + return type_data->replace_children_custom([genericTs](TypePtr child) { + if (const TypeDataUnresolved* un = child->try_as()) { + if (genericTs && genericTs->has_nameT(un->text)) { + std::string nameT = un->text; + return TypeDataGenericT::create(std::move(nameT)); + } + if (un->text == "auto") { + throw ParseError(un->loc, "`auto` type does not exist; just omit a type for local variable (will be inferred from assignment); parameters should always be typed"); + } + if (un->text == "self") { + throw ParseError(un->loc, "`self` type can be used only as a return type of a function (enforcing it to be chainable)"); + } + fire_error_unknown_type_name(un->loc, un->text); + } + return child; + }); + } +}; + +static TypePtr finalize_type_data(TypePtr type_data, const GenericsDeclaration* genericTs) { + if (!type_data || !type_data->has_unresolved_inside()) { + return type_data; + } + return TypeDataResolver::resolve_identifiers_in_type_data(type_data, genericTs); +} + + +class AssignSymInsideFunctionVisitor final : public ASTVisitorFunctionBody { + // more correctly this field shouldn't be static, but currently there is no need to make it a part of state + static NameAndScopeResolver current_scope; + static const FunctionData* current_function; + + static const LocalVarData* create_local_var_sym(std::string_view name, SrcLocation loc, TypePtr declared_type, bool immutable) { + LocalVarData* v_sym = new LocalVarData(static_cast(name), loc, declared_type, immutable * LocalVarData::flagImmutable, -1); + current_scope.add_local_var(v_sym); + return v_sym; + } + + static void process_catch_variable(AnyExprV catch_var) { + if (auto v_ref = catch_var->try_as()) { + const LocalVarData* var_ref = create_local_var_sym(v_ref->get_name(), catch_var->loc, nullptr, true); + v_ref->mutate()->assign_sym(var_ref); + } + } + +protected: + void visit(V v) override { + if (v->marked_as_redef) { + const Symbol* sym = current_scope.lookup_symbol(v->get_name()); + if (sym == nullptr) { + v->error("`redef` for unknown variable"); + } + const LocalVarData* var_ref = sym->try_as(); + if (!var_ref) { + v->error("`redef` for unknown variable"); + } + v->mutate()->assign_var_ref(var_ref); + } else { + TypePtr declared_type = finalize_type_data(v->declared_type, current_function->genericTs); + const LocalVarData* var_ref = create_local_var_sym(v->get_name(), v->loc, declared_type, v->is_immutable); + v->mutate()->assign_resolved_type(declared_type); + v->mutate()->assign_var_ref(var_ref); + } + } + + void visit(V v) override { + parent::visit(v->get_rhs()); // in this order, so that `var x = x` is invalid, "x" on the right unknown + parent::visit(v->get_lhs()); + } + + void visit(V v) override { + const Symbol* sym = current_scope.lookup_symbol(v->get_name()); + if (!sym) { + fire_error_undefined_symbol(v->get_identifier()); + } + v->mutate()->assign_sym(sym); + + // for global functions, global vars and constants, `import` must exist + if (!sym->try_as()) { + check_import_exists_when_using_sym(v, sym); + } + + // for `f` / `f`, resolve "MyAlias" and "T" + // (for function call `f()`, this v (ast_reference `f`) is callee) + if (auto v_instantiationTs = v->get_instantiationTs()) { + for (int i = 0; i < v_instantiationTs->size(); ++i) { + TypePtr substituted_type = finalize_type_data(v_instantiationTs->get_item(i)->substituted_type, current_function->genericTs); + v_instantiationTs->get_item(i)->mutate()->assign_resolved_type(substituted_type); + } + } + } + + void visit(V v) override { + // for `t.tupleAt` / `obj.method`, resolve "MyAlias" and "T" + // (for function call `t.tupleAt()`, this v (ast_dot_access `t.tupleAt`) is callee) + if (auto v_instantiationTs = v->get_instantiationTs()) { + for (int i = 0; i < v_instantiationTs->size(); ++i) { + TypePtr substituted_type = finalize_type_data(v_instantiationTs->get_item(i)->substituted_type, current_function->genericTs); + v_instantiationTs->get_item(i)->mutate()->assign_resolved_type(substituted_type); + } + } + parent::visit(v->get_obj()); + } + + void visit(V v) override { + TypePtr cast_to_type = finalize_type_data(v->cast_to_type, current_function->genericTs); + v->mutate()->assign_resolved_type(cast_to_type); + parent::visit(v->get_expr()); + } + + void visit(V v) override { + if (v->empty()) { + return; + } + current_scope.open_scope(v->loc); + parent::visit(v); + current_scope.close_scope(v->loc_end); + } + + void visit(V v) override { + current_scope.open_scope(v->loc); + parent::visit(v->get_body()); + parent::visit(v->get_cond()); // in 'while' condition it's ok to use variables declared inside do + current_scope.close_scope(v->get_body()->loc_end); + } + + void visit(V v) override { + visit(v->get_try_body()); + current_scope.open_scope(v->get_catch_body()->loc); + const std::vector& catch_items = v->get_catch_expr()->get_items(); + tolk_assert(catch_items.size() == 2); + process_catch_variable(catch_items[1]); + process_catch_variable(catch_items[0]); + parent::visit(v->get_catch_body()); + current_scope.close_scope(v->get_catch_body()->loc_end); + } + +public: + bool should_visit_function(const FunctionData* fun_ref) override { + // this pipe is done just after parsing + // visit both asm and code functions, resolve identifiers in parameter/return types everywhere + // for generic functions, unresolved "T" will be replaced by TypeDataGenericT + return true; + } + + void start_visiting_function(const FunctionData* fun_ref, V v) override { + current_function = fun_ref; + + for (int i = 0; i < v->get_num_params(); ++i) { + const LocalVarData& param_var = fun_ref->parameters[i]; + TypePtr declared_type = finalize_type_data(param_var.declared_type, fun_ref->genericTs); + v->get_param(i)->mutate()->assign_param_ref(¶m_var); + v->get_param(i)->mutate()->assign_resolved_type(declared_type); + param_var.mutate()->assign_resolved_type(declared_type); + } + TypePtr return_type = finalize_type_data(fun_ref->declared_return_type, fun_ref->genericTs); + v->mutate()->assign_resolved_type(return_type); + fun_ref->mutate()->assign_resolved_type(return_type); + + if (fun_ref->is_code_function()) { + auto v_seq = v->get_body()->as(); + current_scope.open_scope(v->loc); + for (int i = 0; i < v->get_num_params(); ++i) { + current_scope.add_local_var(&fun_ref->parameters[i]); + } + parent::visit(v_seq); + current_scope.close_scope(v_seq->loc_end); + tolk_assert(current_scope.scopes.empty()); + } + + current_function = nullptr; + } +}; + +NameAndScopeResolver AssignSymInsideFunctionVisitor::current_scope; +const FunctionData* AssignSymInsideFunctionVisitor::current_function = nullptr; + +void pipeline_resolve_identifiers_and_assign_symbols() { + AssignSymInsideFunctionVisitor visitor; + for (const SrcFile* file : G.all_src_files) { + for (AnyV v : file->ast->as()->get_toplevel_declarations()) { + if (auto v_func = v->try_as()) { + tolk_assert(v_func->fun_ref); + visitor.start_visiting_function(v_func->fun_ref, v_func); + + } else if (auto v_global = v->try_as()) { + TypePtr declared_type = finalize_type_data(v_global->var_ref->declared_type, nullptr); + v_global->mutate()->assign_resolved_type(declared_type); + v_global->var_ref->mutate()->assign_resolved_type(declared_type); + + } else if (auto v_const = v->try_as(); v_const && v_const->declared_type) { + TypePtr declared_type = finalize_type_data(v_const->const_ref->declared_type, nullptr); + v_const->mutate()->assign_resolved_type(declared_type); + v_const->const_ref->mutate()->assign_resolved_type(declared_type); + } + } + } +} + +void pipeline_resolve_identifiers_and_assign_symbols(const FunctionData* fun_ref) { + AssignSymInsideFunctionVisitor visitor; + if (visitor.should_visit_function(fun_ref)) { + visitor.start_visiting_function(fun_ref, fun_ref->ast_root->as()); + } +} + +} // namespace tolk diff --git a/tolk/pipeline.h b/tolk/pipeline.h index fdfd2b996..6aec2b5e8 100644 --- a/tolk/pipeline.h +++ b/tolk/pipeline.h @@ -25,17 +25,34 @@ */ #pragma once -#include "src-file.h" +#include "fwd-declarations.h" #include namespace tolk { -AllSrcFiles pipeline_discover_and_parse_sources(const std::string& stdlib_filename, const std::string& entrypoint_filename); +void pipeline_discover_and_parse_sources(const std::string& stdlib_filename, const std::string& entrypoint_filename); -void pipeline_register_global_symbols(const AllSrcFiles&); -void pipeline_convert_ast_to_legacy_Expr_Op(const AllSrcFiles&); +void pipeline_register_global_symbols(); +void pipeline_resolve_identifiers_and_assign_symbols(); +void pipeline_calculate_rvalue_lvalue(); +void pipeline_detect_unreachable_statements(); +void pipeline_infer_types_and_calls_and_fields(); +void pipeline_refine_lvalue_for_mutate_arguments(); +void pipeline_check_rvalue_lvalue(); +void pipeline_check_pure_impure_operations(); +void pipeline_constant_folding(); +void pipeline_optimize_boolean_expressions(); +void pipeline_convert_ast_to_legacy_Expr_Op(); void pipeline_find_unused_symbols(); -void pipeline_generate_fif_output_to_std_cout(const AllSrcFiles&); +void pipeline_generate_fif_output_to_std_cout(); + +// these pipes also can be called per-function individually +// they are called for instantiated generics functions, when `f` is deeply cloned as `f` +void pipeline_resolve_identifiers_and_assign_symbols(const FunctionData*); +void pipeline_calculate_rvalue_lvalue(const FunctionData*); +void pipeline_detect_unreachable_statements(const FunctionData*); +void pipeline_infer_types_and_calls_and_fields(const FunctionData*); + } // namespace tolk diff --git a/tolk/platform-utils.h b/tolk/platform-utils.h index 7b16226e7..5ab01220e 100644 --- a/tolk/platform-utils.h +++ b/tolk/platform-utils.h @@ -27,11 +27,15 @@ #if __GNUC__ #define GNU_ATTRIBUTE_COLD [[gnu::cold]] +#define GNU_ATTRIBUTE_FLATTEN [[gnu::flatten]] #define GNU_ATTRIBUTE_NORETURN [[gnu::noreturn]] +#define GNU_ATTRIBUTE_NOINLINE [[gnu::noinline]] #define GNU_ATTRIBUTE_ALWAYS_INLINE [[gnu::always_inline]] #else #define GNU_ATTRIBUTE_COLD +#define GNU_ATTRIBUTE_FLATTEN #define GNU_ATTRIBUTE_NORETURN [[noreturn]] +#define GNU_ATTRIBUTE_NOINLINE [[noinline]] #define GNU_ATTRIBUTE_ALWAYS_INLINE #endif diff --git a/tolk/src-file.cpp b/tolk/src-file.cpp index e5533f697..52ac38213 100644 --- a/tolk/src-file.cpp +++ b/tolk/src-file.cpp @@ -23,8 +23,8 @@ namespace tolk { static_assert(sizeof(SrcLocation) == 8); -SrcFile* AllRegisteredSrcFiles::find_file(int file_id) const { - for (SrcFile* file : all_src_files) { +const SrcFile* AllRegisteredSrcFiles::find_file(int file_id) const { + for (const SrcFile* file : all_src_files) { if (file->file_id == file_id) { return file; } @@ -32,8 +32,8 @@ SrcFile* AllRegisteredSrcFiles::find_file(int file_id) const { return nullptr; } -SrcFile* AllRegisteredSrcFiles::find_file(const std::string& abs_filename) const { - for (SrcFile* file : all_src_files) { +const SrcFile* AllRegisteredSrcFiles::find_file(const std::string& abs_filename) const { + for (const SrcFile* file : all_src_files) { if (file->abs_filename == abs_filename) { return file; } @@ -41,7 +41,7 @@ SrcFile* AllRegisteredSrcFiles::find_file(const std::string& abs_filename) const return nullptr; } -SrcFile* AllRegisteredSrcFiles::locate_and_register_source_file(const std::string& rel_filename, SrcLocation included_from) { +const SrcFile* AllRegisteredSrcFiles::locate_and_register_source_file(const std::string& rel_filename, SrcLocation included_from) { td::Result path = G.settings.read_callback(CompilerSettings::FsReadCallbackKind::Realpath, rel_filename.c_str()); if (path.is_error()) { if (included_from.is_defined()) { @@ -51,7 +51,7 @@ SrcFile* AllRegisteredSrcFiles::locate_and_register_source_file(const std::strin } std::string abs_filename = path.move_as_ok(); - if (SrcFile* file = find_file(abs_filename)) { + if (const SrcFile* file = find_file(abs_filename)) { return file; } @@ -75,16 +75,7 @@ SrcFile* AllRegisteredSrcFiles::get_next_unparsed_file() { if (last_parsed_file_id >= last_registered_file_id) { return nullptr; } - return all_src_files[++last_parsed_file_id]; -} - -AllSrcFiles AllRegisteredSrcFiles::get_all_files() const { - AllSrcFiles src_files_immutable; - src_files_immutable.reserve(all_src_files.size()); - for (const SrcFile* file : all_src_files) { - src_files_immutable.push_back(file); - } - return src_files_immutable; + return const_cast(all_src_files[++last_parsed_file_id]); } bool SrcFile::is_stdlib_file() const { diff --git a/tolk/src-file.h b/tolk/src-file.h index 815dccbed..0c82bf180 100644 --- a/tolk/src-file.h +++ b/tolk/src-file.h @@ -18,11 +18,10 @@ #include #include +#include "fwd-declarations.h" namespace tolk { -struct ASTNodeBase; - struct SrcFile { struct SrcPosition { int offset; @@ -31,7 +30,7 @@ struct SrcFile { std::string_view line_str; }; - struct ImportStatement { + struct ImportDirective { const SrcFile* imported_file; }; @@ -39,8 +38,8 @@ struct SrcFile { std::string rel_filename; // relative to cwd std::string abs_filename; // absolute from root std::string text; // file contents loaded into memory, every Token::str_val points inside it - const ASTNodeBase* ast = nullptr; // when a file has been parsed, its ast_tolk_file is kept here - std::vector imports; // to check strictness (can't use a symbol without importing its file) + AnyV ast = nullptr; // when a file has been parsed, its ast_tolk_file is kept here + std::vector imports; // to check strictness (can't use a symbol without importing its file) SrcFile(int file_id, std::string rel_filename, std::string abs_filename, std::string&& text) : file_id(file_id) @@ -96,21 +95,20 @@ class SrcLocation { std::ostream& operator<<(std::ostream& os, SrcLocation loc); -using AllSrcFiles = std::vector; - class AllRegisteredSrcFiles { - std::vector all_src_files; + std::vector all_src_files; int last_registered_file_id = -1; int last_parsed_file_id = -1; public: - SrcFile *find_file(int file_id) const; - SrcFile* find_file(const std::string& abs_filename) const; + const SrcFile* find_file(int file_id) const; + const SrcFile* find_file(const std::string& abs_filename) const; - SrcFile* locate_and_register_source_file(const std::string& rel_filename, SrcLocation included_from); + const SrcFile* locate_and_register_source_file(const std::string& rel_filename, SrcLocation included_from); SrcFile* get_next_unparsed_file(); - AllSrcFiles get_all_files() const; + auto begin() const { return all_src_files.begin(); } + auto end() const { return all_src_files.end(); } }; struct Fatal final : std::exception { diff --git a/tolk/symtable.cpp b/tolk/symtable.cpp index abaeb0846..918fdab33 100644 --- a/tolk/symtable.cpp +++ b/tolk/symtable.cpp @@ -16,154 +16,136 @@ */ #include "symtable.h" #include "compiler-state.h" -#include -#include +#include "platform-utils.h" +#include "generics-helpers.h" namespace tolk { - -std::string Symbol::unknown_symbol_name(sym_idx_t i) { - if (!i) { - return "_"; - } else { - std::ostringstream os; - os << "SYM#" << i; - return os.str(); +std::string FunctionData::as_human_readable() const { + if (!genericTs) { + return name; // if it's generic instantiation like `f`, its name is "f", not "f" } + return name + genericTs->as_human_readable(); } -sym_idx_t SymTable::gen_lookup(std::string_view str, int mode, sym_idx_t idx) { - unsigned long long h1 = 1, h2 = 1; - for (char c : str) { - h1 = ((h1 * 239) + (unsigned char)(c)) % SIZE_PRIME; - h2 = ((h2 * 17) + (unsigned char)(c)) % (SIZE_PRIME - 1); +bool FunctionData::does_need_codegen() const { + // when a function is declared, but not referenced from code in any way, don't generate its body + if (!is_really_used() && G.settings.remove_unused_functions) { + return false; + } + // functions with asm body don't need code generation + // (even if used as non-call: `var a = beginCell;` inserts TVM continuation inline) + if (is_asm_function() || is_builtin_function()) { + return false; + } + // when a function is referenced like `var a = some_fn;` (or in some other non-call way), its continuation should exist + if (is_used_as_noncall()) { + return true; } - ++h2; - ++h1; - while (true) { - if (sym[h1]) { - if (sym[h1]->str == str) { - return (mode & 2) ? not_found : sym_idx_t(h1); - } - h1 += h2; - if (h1 > SIZE_PRIME) { - h1 -= SIZE_PRIME; - } - } else { - if (!(mode & 1)) { - return not_found; - } - if (def_sym >= ((long long)SIZE_PRIME * 3) / 4) { - throw SymTableOverflow{def_sym}; - } - sym[h1] = std::make_unique(static_cast(str), idx <= 0 ? sym_idx_t(h1) : -idx); - ++def_sym; - return sym_idx_t(h1); - } + // generic functions also don't need code generation, only generic instantiations do + if (is_generic_function()) { + return false; } + // currently, there is no inlining, all functions are codegenerated + // (but actually, unused ones are later removed by Fift) + // in the future, we may want to implement a true AST inlining for "simple" functions + return true; } -std::string SymDef::name() const { - return G.symbols.get_name(sym_idx); +void FunctionData::assign_resolved_type(TypePtr declared_return_type) { + this->declared_return_type = declared_return_type; } -void open_scope(SrcLocation loc) { - ++G.scope_level; - G.scope_opened_at.push_back(loc); +void FunctionData::assign_inferred_type(TypePtr inferred_return_type, TypePtr inferred_full_type) { + this->inferred_return_type = inferred_return_type; + this->inferred_full_type = inferred_full_type; } -void close_scope() { - if (!G.scope_level) { - throw Fatal{"cannot close the outer scope"}; - } - while (!G.symbol_stack.empty() && G.symbol_stack.back().first == G.scope_level) { - SymDef old_def = G.symbol_stack.back().second; - auto idx = old_def.sym_idx; - G.symbol_stack.pop_back(); - SymDef* cur_def = G.sym_def[idx]; - assert(cur_def); - assert(cur_def->level == G.scope_level && cur_def->sym_idx == idx); - //std::cerr << "restoring local symbol `" << old_def.name << "` of level " << scope_level << " to its previous level " << old_def.level << std::endl; - if (cur_def->value) { - //std::cerr << "deleting value of symbol " << old_def.name << ":" << old_def.level << " at " << (const void*) it->second.value << std::endl; - delete cur_def->value; - } - if (!old_def.level && !old_def.value) { - delete cur_def; // ??? keep the definition always? - G.sym_def[idx] = nullptr; - } else { - cur_def->value = old_def.value; - cur_def->level = old_def.level; - } - old_def.value = nullptr; - } - --G.scope_level; - G.scope_opened_at.pop_back(); +void FunctionData::assign_is_used_as_noncall() { + this->flags |= flagUsedAsNonCall; } -SymDef* lookup_symbol(sym_idx_t idx) { - if (!idx) { - return nullptr; - } - if (G.sym_def[idx]) { - return G.sym_def[idx]; - } - if (G.global_sym_def[idx]) { - return G.global_sym_def[idx]; - } - return nullptr; +void FunctionData::assign_is_implicit_return() { + this->flags |= flagImplicitReturn; } -SymDef* define_global_symbol(sym_idx_t name_idx, SrcLocation loc) { - if (SymDef* found = G.global_sym_def[name_idx]) { - return found; // found->value is filled; it means, that a symbol is redefined - } +void FunctionData::assign_is_type_inferring_done() { + this->flags |= flagTypeInferringDone; +} - SymDef* registered = G.global_sym_def[name_idx] = new SymDef(0, name_idx, loc); -#ifdef TOLK_DEBUG - registered->sym_name = registered->name(); -#endif - return registered; // registered->value is nullptr; it means, it's just created +void FunctionData::assign_is_really_used() { + this->flags |= flagReallyUsed; } -SymDef* define_parameter(sym_idx_t name_idx, SrcLocation loc) { - // note, that parameters (defined at function declaration) are not inserted into symtable - // their SymDef is registered to be inserted into SymValFunc::parameters - // (and later ->value is filled with SymValVariable) +void FunctionData::assign_arg_order(std::vector&& arg_order) { + this->arg_order = std::move(arg_order); +} - SymDef* registered = new SymDef(0, name_idx, loc); +void GlobalVarData::assign_resolved_type(TypePtr declared_type) { + this->declared_type = declared_type; +} + +void GlobalVarData::assign_is_really_used() { + this->flags |= flagReallyUsed; +} + +void GlobalConstData::assign_resolved_type(TypePtr declared_type) { + this->declared_type = declared_type; +} + +void LocalVarData::assign_idx(int idx) { + this->idx = idx; +} + +void LocalVarData::assign_resolved_type(TypePtr declared_type) { + this->declared_type = declared_type; +} + +void LocalVarData::assign_inferred_type(TypePtr inferred_type) { #ifdef TOLK_DEBUG - registered->sym_name = registered->name(); + assert(this->declared_type == nullptr); // called when type declaration omitted, inferred from assigned value #endif - return registered; + this->declared_type = inferred_type; } -SymDef* define_symbol(sym_idx_t name_idx, bool force_new, SrcLocation loc) { - if (!name_idx) { - return nullptr; +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_redefinition_of_symbol(SrcLocation loc, const Symbol* previous) { + SrcLocation prev_loc = previous->loc; + if (prev_loc.is_stdlib()) { + throw ParseError(loc, "redefinition of a symbol from stdlib"); } - if (!G.scope_level) { - throw Fatal("unexpected scope_level = 0"); + if (prev_loc.is_defined()) { + throw ParseError(loc, "redefinition of symbol, previous was at: " + prev_loc.to_string()); } - auto found = G.sym_def[name_idx]; - if (found) { - if (found->level < G.scope_level) { - G.symbol_stack.emplace_back(G.scope_level, *found); - found->level = G.scope_level; - } else if (found->value && force_new) { - return nullptr; - } - found->value = nullptr; - found->loc = loc; - return found; + throw ParseError(loc, "redefinition of built-in symbol"); +} + +void GlobalSymbolTable::add_function(const FunctionData* f_sym) { + auto key = key_hash(f_sym->name); + auto [it, inserted] = entries.emplace(key, f_sym); + if (!inserted) { + fire_error_redefinition_of_symbol(f_sym->loc, it->second); } - found = G.sym_def[name_idx] = new SymDef(G.scope_level, name_idx, loc); - G.symbol_stack.emplace_back(G.scope_level, SymDef{0, name_idx, loc}); -#ifdef TOLK_DEBUG - found->sym_name = found->name(); - G.symbol_stack.back().second.sym_name = found->name(); -#endif - return found; +} + +void GlobalSymbolTable::add_global_var(const GlobalVarData* g_sym) { + auto key = key_hash(g_sym->name); + auto [it, inserted] = entries.emplace(key, g_sym); + if (!inserted) { + fire_error_redefinition_of_symbol(g_sym->loc, it->second); + } +} + +void GlobalSymbolTable::add_global_const(const GlobalConstData* c_sym) { + auto key = key_hash(c_sym->name); + auto [it, inserted] = entries.emplace(key, c_sym); + if (!inserted) { + fire_error_redefinition_of_symbol(c_sym->loc, it->second); + } +} + +const Symbol* lookup_global_symbol(std::string_view name) { + return G.symtable.lookup(name); } } // namespace tolk diff --git a/tolk/symtable.h b/tolk/symtable.h index 69e2eaa8e..3cda24edf 100644 --- a/tolk/symtable.h +++ b/tolk/symtable.h @@ -17,98 +17,225 @@ #pragma once #include "src-file.h" -#include "type-expr.h" -#include -#include +#include "fwd-declarations.h" +#include "constant-evaluator.h" +#include "crypto/common/refint.h" +#include +#include +#include namespace tolk { -typedef int var_idx_t; -typedef int sym_idx_t; +struct Symbol { + std::string name; + SrcLocation loc; -enum class SymValKind { _Var, _Func, _GlobVar, _Const }; + Symbol(std::string name, SrcLocation loc) + : name(std::move(name)) + , loc(loc) { + } -struct SymValBase { - SymValKind kind; - int idx; - TypeExpr* sym_type; + virtual ~Symbol() = default; + + template + const T* as() const { #ifdef TOLK_DEBUG - std::string sym_name; // seeing symbol name in debugger makes it much easier to delve into Tolk sources + assert(dynamic_cast(this) != nullptr); #endif - - SymValBase(SymValKind kind, int idx, TypeExpr* sym_type) : kind(kind), idx(idx), sym_type(sym_type) { + return dynamic_cast(this); } - virtual ~SymValBase() = default; - TypeExpr* get_type() const { - return sym_type; + template + const T* try_as() const { + return dynamic_cast(this); } }; +struct LocalVarData final : Symbol { + enum { + flagMutateParameter = 1, // parameter was declared with `mutate` keyword + flagImmutable = 2, // variable was declared via `val` (not `var`) + }; -struct Symbol { - std::string str; - sym_idx_t idx; + TypePtr declared_type; // either at declaration `var x:int`, or if omitted, from assigned value `var x=2` + int flags; + int idx; - Symbol(std::string str, sym_idx_t idx) : str(std::move(str)), idx(idx) {} + LocalVarData(std::string name, SrcLocation loc, TypePtr declared_type, int flags, int idx) + : Symbol(std::move(name), loc) + , declared_type(declared_type) + , flags(flags) + , idx(idx) { + } - static std::string unknown_symbol_name(sym_idx_t i); -}; + bool is_immutable() const { return flags & flagImmutable; } + bool is_mutate_parameter() const { return flags & flagMutateParameter; } -class SymTable { -public: - static constexpr int SIZE_PRIME = 100003; + LocalVarData* mutate() const { return const_cast(this); } + void assign_idx(int idx); + void assign_resolved_type(TypePtr declared_type); + void assign_inferred_type(TypePtr inferred_type); +}; -private: - sym_idx_t def_sym{0}; - std::unique_ptr sym[SIZE_PRIME + 1]; - sym_idx_t gen_lookup(std::string_view str, int mode = 0, sym_idx_t idx = 0); +struct FunctionBodyCode; +struct FunctionBodyAsm; +struct FunctionBodyBuiltin; +struct GenericsDeclaration; +struct GenericsInstantiation; + +typedef std::variant< + FunctionBodyCode*, + FunctionBodyAsm*, + FunctionBodyBuiltin* +> FunctionBody; + +struct FunctionData final : Symbol { + static constexpr int EMPTY_METHOD_ID = -10; + + enum { + flagInline = 1, // marked `@inline` + flagInlineRef = 2, // marked `@inline_ref` + flagTypeInferringDone = 4, // type inferring step of function's body (all AST nodes assigning v->inferred_type) is done + flagUsedAsNonCall = 8, // used not only as `f()`, but as a 1-st class function (assigned to var, pushed to tuple, etc.) + flagMarkedAsPure = 16, // declared as `pure`, can't call impure and access globals, unused invocations are optimized out + flagImplicitReturn = 32, // control flow reaches end of function, so it needs implicit return at the end + flagGetMethod = 64, // was declared via `get func(): T`, method_id is auto-assigned + flagIsEntrypoint = 128, // it's `main` / `onExternalMessage` / etc. + flagHasMutateParams = 256, // has parameters declared as `mutate` + flagAcceptsSelf = 512, // is a member function (has `self` first parameter) + flagReturnsSelf = 1024, // return type is `self` (returns the mutated 1st argument), calls can be chainable + flagReallyUsed = 2048, // calculated via dfs from used functions; declared but unused functions are not codegenerated + }; + + int method_id = EMPTY_METHOD_ID; + int flags; + + std::vector parameters; + std::vector arg_order, ret_order; + TypePtr declared_return_type; // may be nullptr, meaning "auto infer" + TypePtr inferred_return_type = nullptr; // assigned on type inferring + TypePtr inferred_full_type = nullptr; // assigned on type inferring, it's TypeDataFunCallable(params -> return) + + const GenericsDeclaration* genericTs; + const GenericsInstantiation* instantiationTs; + FunctionBody body; + AnyV ast_root; // V for user-defined (not builtin) + + FunctionData(std::string name, SrcLocation loc, TypePtr declared_return_type, std::vector parameters, int initial_flags, const GenericsDeclaration* genericTs, const GenericsInstantiation* instantiationTs, FunctionBody body, AnyV ast_root) + : Symbol(std::move(name), loc) + , flags(initial_flags) + , parameters(std::move(parameters)) + , declared_return_type(declared_return_type) + , genericTs(genericTs) + , instantiationTs(instantiationTs) + , body(body) + , ast_root(ast_root) { + } -public: + std::string as_human_readable() const; - static constexpr sym_idx_t not_found = 0; - sym_idx_t lookup(std::string_view str) { - return gen_lookup(str, 0); - } - sym_idx_t lookup_add(std::string_view str) { - return gen_lookup(str, 1); + const std::vector* get_arg_order() const { + return arg_order.empty() ? nullptr : &arg_order; } - Symbol* operator[](sym_idx_t i) const { - return sym[i].get(); - } - std::string get_name(sym_idx_t i) const { - return sym[i] ? sym[i]->str : Symbol::unknown_symbol_name(i); + const std::vector* get_ret_order() const { + return ret_order.empty() ? nullptr : &ret_order; } + + int get_num_params() const { return static_cast(parameters.size()); } + const LocalVarData& get_param(int idx) const { return parameters[idx]; } + + bool is_code_function() const { return std::holds_alternative(body); } + bool is_asm_function() const { return std::holds_alternative(body); } + bool is_builtin_function() const { return ast_root == nullptr; } + + bool is_generic_function() const { return genericTs != nullptr; } + bool is_instantiation_of_generic_function() const { return instantiationTs != nullptr; } + + bool is_inline() const { return flags & flagInline; } + bool is_inline_ref() const { return flags & flagInlineRef; } + bool is_type_inferring_done() const { return flags & flagTypeInferringDone; } + bool is_used_as_noncall() const { return flags & flagUsedAsNonCall; } + bool is_marked_as_pure() const { return flags & flagMarkedAsPure; } + bool is_implicit_return() const { return flags & flagImplicitReturn; } + bool is_get_method() const { return flags & flagGetMethod; } + bool is_method_id_not_empty() const { return method_id != EMPTY_METHOD_ID; } + bool is_entrypoint() const { return flags & flagIsEntrypoint; } + bool has_mutate_params() const { return flags & flagHasMutateParams; } + bool does_accept_self() const { return flags & flagAcceptsSelf; } + bool does_return_self() const { return flags & flagReturnsSelf; } + bool does_mutate_self() const { return (flags & flagAcceptsSelf) && parameters[0].is_mutate_parameter(); } + bool is_really_used() const { return flags & flagReallyUsed; } + + bool does_need_codegen() const; + + FunctionData* mutate() const { return const_cast(this); } + void assign_resolved_type(TypePtr declared_return_type); + void assign_inferred_type(TypePtr inferred_return_type, TypePtr inferred_full_type); + void assign_is_used_as_noncall(); + void assign_is_implicit_return(); + void assign_is_type_inferring_done(); + void assign_is_really_used(); + void assign_arg_order(std::vector&& arg_order); }; -struct SymTableOverflow { - int sym_def; - explicit SymTableOverflow(int x) : sym_def(x) { +struct GlobalVarData final : Symbol { + enum { + flagReallyUsed = 1, // calculated via dfs from used functions; unused globals are not codegenerated + }; + + TypePtr declared_type; // always exists, declaring globals without type is prohibited + int flags = 0; + + GlobalVarData(std::string name, SrcLocation loc, TypePtr declared_type) + : Symbol(std::move(name), loc) + , declared_type(declared_type) { } + + bool is_really_used() const { return flags & flagReallyUsed; } + + GlobalVarData* mutate() const { return const_cast(this); } + void assign_resolved_type(TypePtr declared_type); + void assign_is_really_used(); }; +struct GlobalConstData final : Symbol { + ConstantValue value; + TypePtr declared_type; // may be nullptr -struct SymDef { - int level; - sym_idx_t sym_idx; - SymValBase* value; - SrcLocation loc; -#ifdef TOLK_DEBUG - std::string sym_name; -#endif - SymDef(int lvl, sym_idx_t idx, SrcLocation _loc, SymValBase* val = nullptr) - : level(lvl), sym_idx(idx), value(val), loc(_loc) { + GlobalConstData(std::string name, SrcLocation loc, TypePtr declared_type, ConstantValue&& value) + : Symbol(std::move(name), loc) + , value(std::move(value)) + , declared_type(declared_type) { } - std::string name() const; + + bool is_int_const() const { return value.is_int(); } + bool is_slice_const() const { return value.is_slice(); } + + td::RefInt256 as_int_const() const { return value.as_int(); } + const std::string& as_slice_const() const { return value.as_slice(); } + + GlobalConstData* mutate() const { return const_cast(this); } + void assign_resolved_type(TypePtr declared_type); }; +class GlobalSymbolTable { + std::unordered_map entries; + + static uint64_t key_hash(std::string_view name_key) { + return std::hash{}(name_key); + } + +public: + void add_function(const FunctionData* f_sym); + void add_global_var(const GlobalVarData* g_sym); + void add_global_const(const GlobalConstData* c_sym); -void open_scope(SrcLocation loc); -void close_scope(); -SymDef* lookup_symbol(sym_idx_t idx); + const Symbol* lookup(std::string_view name) const { + const auto it = entries.find(key_hash(name)); + return it == entries.end() ? nullptr : it->second; + } +}; -SymDef* define_global_symbol(sym_idx_t name_idx, SrcLocation loc = {}); -SymDef* define_parameter(sym_idx_t name_idx, SrcLocation loc); -SymDef* define_symbol(sym_idx_t name_idx, bool force_new, SrcLocation loc); +const Symbol* lookup_global_symbol(std::string_view name); } // namespace tolk diff --git a/tolk/tolk-version.h b/tolk/tolk-version.h index 6e5b764ca..7eaf55a7a 100644 --- a/tolk/tolk-version.h +++ b/tolk/tolk-version.h @@ -18,6 +18,6 @@ namespace tolk { -constexpr const char* TOLK_VERSION = "0.6.0"; +constexpr const char* TOLK_VERSION = "0.7.0"; } // namespace tolk diff --git a/tolk/tolk.cpp b/tolk/tolk.cpp index 9268cc62d..cc867c521 100644 --- a/tolk/tolk.cpp +++ b/tolk/tolk.cpp @@ -28,6 +28,7 @@ #include "compiler-state.h" #include "lexer.h" #include "ast.h" +#include "type-system.h" namespace tolk { @@ -45,19 +46,29 @@ void on_assertion_failed(const char *description, const char *file_name, int lin } int tolk_proceed(const std::string &entrypoint_filename) { + type_system_init(); define_builtins(); lexer_init(); // on any error, an exception is thrown, and the message is printed out below // (currently, only a single error can be printed) try { - AllSrcFiles all_files = pipeline_discover_and_parse_sources("@stdlib/common.tolk", entrypoint_filename); + pipeline_discover_and_parse_sources("@stdlib/common.tolk", entrypoint_filename); - pipeline_register_global_symbols(all_files); - pipeline_convert_ast_to_legacy_Expr_Op(all_files); + pipeline_register_global_symbols(); + pipeline_resolve_identifiers_and_assign_symbols(); + pipeline_calculate_rvalue_lvalue(); + pipeline_detect_unreachable_statements(); + pipeline_infer_types_and_calls_and_fields(); + pipeline_refine_lvalue_for_mutate_arguments(); + pipeline_check_rvalue_lvalue(); + pipeline_check_pure_impure_operations(); + pipeline_constant_folding(); + pipeline_optimize_boolean_expressions(); + pipeline_convert_ast_to_legacy_Expr_Op(); pipeline_find_unused_symbols(); - pipeline_generate_fif_output_to_std_cout(all_files); + pipeline_generate_fif_output_to_std_cout(); return 0; } catch (Fatal& fatal) { @@ -66,11 +77,6 @@ int tolk_proceed(const std::string &entrypoint_filename) { } catch (ParseError& error) { std::cerr << error << std::endl; return 2; - } catch (UnifyError& unif_err) { - std::cerr << "fatal: "; - unif_err.print_message(std::cerr); - std::cerr << std::endl; - return 2; } catch (UnexpectedASTNodeType& error) { std::cerr << "fatal: " << error.what() << std::endl; std::cerr << "It's a compiler bug, please report to developers" << std::endl; diff --git a/tolk/tolk.h b/tolk/tolk.h index 971ca35dd..5ec4d3e08 100644 --- a/tolk/tolk.h +++ b/tolk/tolk.h @@ -18,10 +18,10 @@ #include "platform-utils.h" #include "src-file.h" -#include "type-expr.h" #include "symtable.h" #include "crypto/common/refint.h" #include "td/utils/Status.h" +#include #include #include #include @@ -34,52 +34,33 @@ namespace tolk { GNU_ATTRIBUTE_COLD GNU_ATTRIBUTE_NORETURN void on_assertion_failed(const char *description, const char *file_name, int line_number); -/* - * - * TYPE EXPRESSIONS - * - */ - -struct UnifyError : std::exception { - TypeExpr* te1; - TypeExpr* te2; - std::string msg; - - UnifyError(TypeExpr* _te1, TypeExpr* _te2, std::string _msg = "") : te1(_te1), te2(_te2), msg(std::move(_msg)) { - } - - void print_message(std::ostream& os) const; - const char* what() const noexcept override { - return msg.c_str(); - } -}; - -std::ostream& operator<<(std::ostream& os, const UnifyError& ue); - -void unify(TypeExpr*& te1, TypeExpr*& te2); - /* * * ABSTRACT CODE * */ -using const_idx_t = int; +typedef int var_idx_t; +typedef int const_idx_t; struct TmpVar { - TypeExpr* v_type; + TypePtr v_type; var_idx_t idx; - sym_idx_t sym_idx; + const LocalVarData* v_sym; // points to var defined in code; nullptr for implicitly created tmp vars int coord; SrcLocation where; std::vector> on_modification; - TmpVar(var_idx_t _idx, TypeExpr* _type, sym_idx_t sym_idx, SrcLocation loc); - bool is_unnamed() const { return sym_idx == 0; } + TmpVar(var_idx_t _idx, TypePtr type, const LocalVarData* v_sym, SrcLocation loc) + : v_type(type) + , idx(_idx) + , v_sym(v_sym) + , coord(0) + , where(loc) { + } void show(std::ostream& os, int omit_idx = 0) const; void dump(std::ostream& os) const; - void set_location(SrcLocation loc); }; struct VarDescr { @@ -171,7 +152,6 @@ struct VarDescr { void set_const(long long value); void set_const(td::RefInt256 value); void set_const(std::string value); - void set_const_nan(); void operator+=(const VarDescr& y) { flags &= y.flags; } @@ -303,7 +283,8 @@ struct Op { enum { _Disabled = 1, _NoReturn = 4, _Impure = 24 }; int flags; std::unique_ptr next; - SymDef* fun_ref; // despite its name, it may actually ref global var; applicable not only to Op::_Call, but for other kinds also + const FunctionData* f_sym = nullptr; + const GlobalVarData* g_sym = nullptr; SrcLocation where; VarDescrList var_info; std::vector args; @@ -311,27 +292,41 @@ struct Op { std::unique_ptr block0, block1; td::RefInt256 int_const; std::string str_const; - Op(SrcLocation _where = {}, OpKind _cl = _Undef) : cl(_cl), flags(0), fun_ref(nullptr), where(_where) { + Op(SrcLocation _where = {}, OpKind _cl = _Undef) : cl(_cl), flags(0), f_sym(nullptr), where(_where) { } Op(SrcLocation _where, OpKind _cl, const std::vector& _left) - : cl(_cl), flags(0), fun_ref(nullptr), where(_where), left(_left) { + : cl(_cl), flags(0), f_sym(nullptr), where(_where), left(_left) { } Op(SrcLocation _where, OpKind _cl, std::vector&& _left) - : cl(_cl), flags(0), fun_ref(nullptr), where(_where), left(std::move(_left)) { + : cl(_cl), flags(0), f_sym(nullptr), where(_where), left(std::move(_left)) { } Op(SrcLocation _where, OpKind _cl, const std::vector& _left, td::RefInt256 _const) - : cl(_cl), flags(0), fun_ref(nullptr), where(_where), left(_left), int_const(_const) { + : cl(_cl), flags(0), f_sym(nullptr), where(_where), left(_left), int_const(_const) { } Op(SrcLocation _where, OpKind _cl, const std::vector& _left, std::string _const) - : cl(_cl), flags(0), fun_ref(nullptr), where(_where), left(_left), str_const(_const) { + : cl(_cl), flags(0), f_sym(nullptr), where(_where), left(_left), str_const(_const) { + } + Op(SrcLocation _where, OpKind _cl, const std::vector& _left, const std::vector& _right) + : cl(_cl), flags(0), f_sym(nullptr), where(_where), left(_left), right(_right) { + } + Op(SrcLocation _where, OpKind _cl, std::vector&& _left, std::vector&& _right) + : cl(_cl), flags(0), f_sym(nullptr), where(_where), left(std::move(_left)), right(std::move(_right)) { } Op(SrcLocation _where, OpKind _cl, const std::vector& _left, const std::vector& _right, - SymDef* _fun = nullptr) - : cl(_cl), flags(0), fun_ref(_fun), where(_where), left(_left), right(_right) { + const FunctionData* _fun) + : cl(_cl), flags(0), f_sym(_fun), where(_where), left(_left), right(_right) { } Op(SrcLocation _where, OpKind _cl, std::vector&& _left, std::vector&& _right, - SymDef* _fun = nullptr) - : cl(_cl), flags(0), fun_ref(_fun), where(_where), left(std::move(_left)), right(std::move(_right)) { + const FunctionData* _fun) + : cl(_cl), flags(0), f_sym(_fun), where(_where), left(std::move(_left)), right(std::move(_right)) { + } + Op(SrcLocation _where, OpKind _cl, const std::vector& _left, const std::vector& _right, + const GlobalVarData* _gvar) + : cl(_cl), flags(0), g_sym(_gvar), where(_where), left(_left), right(_right) { + } + Op(SrcLocation _where, OpKind _cl, std::vector&& _left, std::vector&& _right, + const GlobalVarData* _gvar) + : cl(_cl), flags(0), g_sym(_gvar), where(_where), left(std::move(_left)), right(std::move(_right)) { } bool disabled() const { return flags & _Disabled; } @@ -343,8 +338,7 @@ struct Op { bool set_noreturn(bool flag); bool impure() const { return flags & _Impure; } - void set_impure(const CodeBlob &code); - void set_impure(const CodeBlob &code, bool flag); + void set_impure_flag(); void show(std::ostream& os, const std::vector& vars, std::string pfx = "", int mode = 0) const; void show_var_list(std::ostream& os, const std::vector& idx_list, const std::vector& vars) const; @@ -391,247 +385,16 @@ inline ListIterator end(const Op* op_list) { return ListIterator{}; } -typedef std::tuple FormalArg; +typedef std::tuple FormalArg; typedef std::vector FormalArgList; struct AsmOpList; -/* - * - * SYMBOL VALUES - * - */ - -struct SymValVariable : SymValBase { - enum SymValFlag { - flagMutateParameter = 1, // parameter was declared with `mutate` keyword - flagImmutable = 2, // variable was declared via `val` (not `var`) - }; - int flags{0}; - - ~SymValVariable() override = default; - SymValVariable(int val, TypeExpr* sym_type) - : SymValBase(SymValKind::_Var, val, sym_type) {} - - bool is_function_parameter() const { - return idx >= 0; - } - bool is_mutate_parameter() const { - return flags & flagMutateParameter; - } - bool is_local_var() const { - return idx == -1; - } - bool is_immutable() const { - return flags & flagImmutable; - } -}; - -struct SymValFunc : SymValBase { - enum SymValFlag { - flagInline = 1, // marked `@inline` - flagInlineRef = 2, // marked `@inline_ref` - flagUsedAsNonCall = 8, // used not only as `f()`, but as a 1-st class function (assigned to var, pushed to tuple, etc.) - flagMarkedAsPure = 16, // declared as `pure`, can't call impure and access globals, unused invocations are optimized out - flagBuiltinFunction = 32, // was created via `define_builtin_func()`, not from source code - flagGetMethod = 64, // was declared via `get func(): T`, method_id is auto-assigned - flagIsEntrypoint = 128, // it's `main` / `onExternalMessage` / etc. - flagHasMutateParams = 256, // has parameters declared as `mutate` - flagAcceptsSelf = 512, // is a member function (has `self` first parameter) - flagReturnsSelf = 1024, // return type is `self` (returns the mutated 1st argument), calls can be chainable - }; - - td::RefInt256 method_id; // todo why int256? it's small - int flags{0}; - std::vector parameters; // [i]-th may be nullptr for underscore; if not, its val is SymValVariable - std::vector arg_order, ret_order; - - ~SymValFunc() override = default; - SymValFunc(std::vector parameters, int val, TypeExpr* sym_type, int flags) - : SymValBase(SymValKind::_Func, val, sym_type), flags(flags), parameters(std::move(parameters)) { - } - SymValFunc(std::vector parameters, int val, TypeExpr* sym_type, int flags, std::initializer_list arg_order, std::initializer_list ret_order) - : SymValBase(SymValKind::_Func, val, sym_type), flags(flags), parameters(std::move(parameters)), arg_order(arg_order), ret_order(ret_order) { - } - - const std::vector* get_arg_order() const { - return arg_order.empty() ? nullptr : &arg_order; - } - const std::vector* get_ret_order() const { - return ret_order.empty() ? nullptr : &ret_order; - } - - bool is_inline() const { - return flags & flagInline; - } - bool is_inline_ref() const { - return flags & flagInlineRef; - } - bool is_marked_as_pure() const { - return flags & flagMarkedAsPure; - } - bool is_builtin() const { - return flags & flagBuiltinFunction; - } - bool is_get_method() const { - return flags & flagGetMethod; - } - bool is_entrypoint() const { - return flags & flagIsEntrypoint; - } - bool has_mutate_params() const { - return flags & flagHasMutateParams; - } - bool does_accept_self() const { - return flags & flagAcceptsSelf; - } - bool does_return_self() const { - return flags & flagReturnsSelf; - } -}; - -struct SymValCodeFunc : SymValFunc { - CodeBlob* code; - bool is_really_used{false}; // calculated via dfs; unused functions are not codegenerated - ~SymValCodeFunc() override = default; - SymValCodeFunc(std::vector parameters, int val, TypeExpr* _ft) - : SymValFunc(std::move(parameters), val, _ft, 0), code(nullptr) { - } - bool does_need_codegen() const; +struct FunctionBodyCode { + CodeBlob* code = nullptr; void set_code(CodeBlob* code); }; -struct SymValGlobVar : SymValBase { - bool is_really_used{false}; // calculated via dfs from used functions; unused globals are not codegenerated - - SymValGlobVar(int val, TypeExpr* gvtype) - : SymValBase(SymValKind::_GlobVar, val, gvtype) { - } - ~SymValGlobVar() override = default; -}; - -struct SymValConst : SymValBase { - enum ConstKind { IntConst, SliceConst }; - - td::RefInt256 intval; - std::string strval; - ConstKind kind; - - SymValConst(int idx, td::RefInt256 value) - : SymValBase(SymValKind::_Const, idx, TypeExpr::new_atomic(TypeExpr::_Int)), intval(std::move(value)), kind(IntConst) { - } - SymValConst(int idx, std::string value) - : SymValBase(SymValKind::_Const, idx, TypeExpr::new_atomic(TypeExpr::_Slice)), strval(std::move(value)), kind(SliceConst) { - } - ~SymValConst() override = default; - td::RefInt256 get_int_value() const { - return intval; - } - std::string get_str_value() const { - return strval; - } - ConstKind get_kind() const { - return kind; - } -}; - - -/* - * - * EXPRESSIONS - * - */ - -struct Expr { - enum ExprCls { - _Apply, - _VarApply, - _GrabMutatedVars, - _ReturnSelf, - _MkTuple, - _Tensor, - _Const, - _Var, - _GlobFunc, - _GlobVar, - _Letop, - _Hole, - _CondExpr, - _SliceConst, - }; - ExprCls cls; - int val{0}; - enum { _IsRvalue = 2, _IsLvalue = 4, _IsImmutable = 8, _IsImpure = 32 }; - int flags{0}; - SrcLocation here; - td::RefInt256 intval; - std::string strval; - SymDef* sym{nullptr}; - TypeExpr* e_type{nullptr}; - std::vector args; - Expr(ExprCls c, SrcLocation loc) : cls(c), here(loc) { - } - Expr(ExprCls c, std::vector _args) : cls(c), args(std::move(_args)) { - } - Expr(ExprCls c, std::initializer_list _arglist) : cls(c), args(std::move(_arglist)) { - } - Expr(ExprCls c, SymDef* _sym, std::initializer_list _arglist) : cls(c), sym(_sym), args(std::move(_arglist)) { - } - Expr(ExprCls c, SymDef* _sym, std::vector _arglist) : cls(c), sym(_sym), args(std::move(_arglist)) { - } - Expr(ExprCls c, sym_idx_t name_idx, std::initializer_list _arglist); - ~Expr() { - for (auto& arg_ptr : args) { - delete arg_ptr; - } - } - Expr* copy() const; - void pb_arg(Expr* expr) { - args.push_back(expr); - } - void set_val(int _val) { - val = _val; - } - bool is_rvalue() const { - return flags & _IsRvalue; - } - bool is_lvalue() const { - return flags & _IsLvalue; - } - bool is_immutable() const { - return flags & _IsImmutable; - } - bool is_mktuple() const { - return cls == _MkTuple; - } - void chk_rvalue() const { - if (!is_rvalue()) { - fire_error_rvalue_expected(); - } - } - void deduce_type(); - void set_location(SrcLocation loc) { - here = loc; - } - SrcLocation get_location() const { - return here; - } - void define_new_vars(CodeBlob& code); - void predefine_vars(); - std::vector pre_compile(CodeBlob& code, std::vector>* lval_globs = nullptr) const; - var_idx_t new_tmp(CodeBlob& code) const; - std::vector new_tmp_vect(CodeBlob& code) const { - return {new_tmp(code)}; - } - - GNU_ATTRIBUTE_COLD GNU_ATTRIBUTE_NORETURN - void fire_error_rvalue_expected() const; - GNU_ATTRIBUTE_COLD GNU_ATTRIBUTE_NORETURN - void fire_error_lvalue_expected(const std::string& details) const; - GNU_ATTRIBUTE_COLD GNU_ATTRIBUTE_NORETURN - void fire_error_modifying_immutable(const std::string& details) const; -}; - /* * * GENERATE CODE @@ -651,7 +414,6 @@ struct AsmOp { int a, b; bool gconst{false}; std::string op; - td::RefInt256 origin; struct SReg { int idx; SReg(int _idx) : idx(_idx) { @@ -671,9 +433,6 @@ struct AsmOp { AsmOp(Type _t, int _a, int _b, std::string _op) : t(_t), a(_a), b(_b), op(std::move(_op)) { compute_gconst(); } - AsmOp(Type _t, int _a, int _b, std::string _op, td::RefInt256 x) : t(_t), a(_a), b(_b), op(std::move(_op)), origin(x) { - compute_gconst(); - } void out(std::ostream& os) const; void out_indent_nl(std::ostream& os, bool no_nl = false) const; std::string to_string() const; @@ -786,20 +545,20 @@ struct AsmOp { static AsmOp BlkReverse(int a, int b); static AsmOp make_stk2(int a, int b, const char* str, int delta); static AsmOp make_stk3(int a, int b, int c, const char* str, int delta); - static AsmOp IntConst(td::RefInt256 value); + static AsmOp IntConst(const td::RefInt256& x); static AsmOp BoolConst(bool f); - static AsmOp Const(std::string push_op, td::RefInt256 origin = {}) { - return AsmOp(a_const, 0, 1, std::move(push_op), origin); + static AsmOp Const(std::string push_op) { + return AsmOp(a_const, 0, 1, std::move(push_op)); } - static AsmOp Const(int arg, std::string push_op, td::RefInt256 origin = {}); - static AsmOp Comment(std::string comment) { + static AsmOp Const(int arg, const std::string& push_op); + static AsmOp Comment(const std::string& comment) { return AsmOp(a_none, std::string{"// "} + comment); } - static AsmOp Custom(std::string custom_op) { + static AsmOp Custom(const std::string& custom_op) { return AsmOp(a_custom, 255, 255, custom_op); } - static AsmOp Parse(std::string custom_op); - static AsmOp Custom(std::string custom_op, int args, int retv = 1) { + static AsmOp Parse(const std::string& custom_op); + static AsmOp Custom(const std::string& custom_op, int args, int retv = 1) { return AsmOp(a_custom, args, retv, custom_op); } static AsmOp Parse(std::string custom_op, int args, int retv = 1); @@ -813,6 +572,7 @@ inline std::ostream& operator<<(std::ostream& os, const AsmOp& op) { } std::ostream& operator<<(std::ostream& os, AsmOp::SReg stack_reg); +std::ostream& operator<<(std::ostream& os, TypePtr type_data); struct AsmOpList { std::vector list_; @@ -887,18 +647,6 @@ inline std::ostream& operator<<(std::ostream& os, const AsmOpList& op_list) { return os; } -class IndentGuard { - AsmOpList& aol_; - - public: - IndentGuard(AsmOpList& aol) : aol_(aol) { - aol.indent(); - } - ~IndentGuard() { - aol_.undent(); - } -}; - struct AsmOpCons { std::unique_ptr car; std::unique_ptr cdr; @@ -1321,71 +1069,56 @@ struct Stack { */ typedef std::function&, std::vector&, SrcLocation)> simple_compile_func_t; -typedef std::function&, std::vector&)> compile_func_t; inline simple_compile_func_t make_simple_compile(AsmOp op) { return [op](std::vector& out, std::vector& in, SrcLocation) -> AsmOp { return op; }; } -inline compile_func_t make_ext_compile(std::vector&& ops) { - return [ops = std::move(ops)](AsmOpList& dest, std::vector& out, std::vector& in)->bool { - return dest.append(ops); - }; -} +struct FunctionBodyBuiltin { + simple_compile_func_t simple_compile; -inline compile_func_t make_ext_compile(AsmOp op) { - return - [op](AsmOpList& dest, std::vector& out, std::vector& in) -> bool { return dest.append(op); }; -} + explicit FunctionBodyBuiltin(simple_compile_func_t compile) + : simple_compile(std::move(compile)) {} -struct SymValAsmFunc : SymValFunc { - simple_compile_func_t simple_compile; - compile_func_t ext_compile; - ~SymValAsmFunc() override = default; - SymValAsmFunc(std::vector parameters, TypeExpr* ft, std::vector&& arg_order, std::vector&& ret_order, int flags) - : SymValFunc(std::move(parameters), -1, ft, flags) { - this->arg_order = std::move(arg_order); - this->ret_order = std::move(ret_order); - } - SymValAsmFunc(std::vector parameters, TypeExpr* ft, simple_compile_func_t _compile, int flags) - : SymValFunc(std::move(parameters), -1, ft, flags), simple_compile(std::move(_compile)) { - } - SymValAsmFunc(std::vector parameters, TypeExpr* ft, simple_compile_func_t _compile, int flags, - std::initializer_list arg_order, std::initializer_list ret_order) - : SymValFunc(std::move(parameters), -1, ft, flags, arg_order, ret_order), simple_compile(std::move(_compile)) { - } - void set_code(std::vector code); - bool compile(AsmOpList& dest, std::vector& out, std::vector& in, SrcLocation where) const; + void compile(AsmOpList& dest, std::vector& out, std::vector& in, SrcLocation where) const; +}; + +struct FunctionBodyAsm { + std::vector ops; + + void set_code(std::vector&& code); + void compile(AsmOpList& dest) const; }; struct CodeBlob { - enum { _ForbidImpure = 4 }; int var_cnt, in_var_cnt; - TypeExpr* ret_type; - const SymValCodeFunc* func_val; + const FunctionData* fun_ref; std::string name; SrcLocation loc; std::vector vars; std::unique_ptr ops; std::unique_ptr* cur_ops; - std::vector debug_ttt; +#ifdef TOLK_DEBUG + std::vector _vector_of_ops; // to see it in debugger instead of nested pointers +#endif std::stack*> cur_ops_stack; - int flags = 0; bool require_callxargs = false; - CodeBlob(std::string name, SrcLocation loc, const SymValCodeFunc* func_val, TypeExpr* ret_type) - : var_cnt(0), in_var_cnt(0), ret_type(ret_type), func_val(func_val), name(std::move(name)), loc(loc), cur_ops(&ops) { + CodeBlob(std::string name, SrcLocation loc, const FunctionData* fun_ref) + : var_cnt(0), in_var_cnt(0), fun_ref(fun_ref), name(std::move(name)), loc(loc), cur_ops(&ops) { } template Op& emplace_back(Args&&... args) { Op& res = *(*cur_ops = std::make_unique(args...)); cur_ops = &(res.next); - debug_ttt.push_back(&res); +#ifdef TOLK_DEBUG + _vector_of_ops.push_back(&res); +#endif return res; } - bool import_params(FormalArgList arg_list); - var_idx_t create_var(TypeExpr* var_type, var_idx_t sym_idx, SrcLocation loc); - var_idx_t create_tmp_var(TypeExpr* var_type, SrcLocation loc) { - return create_var(var_type, 0, loc); + bool import_params(FormalArgList&& arg_list); + var_idx_t create_var(TypePtr var_type, const LocalVarData* v_sym, SrcLocation loc); + var_idx_t create_tmp_var(TypePtr var_type, SrcLocation loc) { + return create_var(var_type, nullptr, loc); } int split_vars(bool strict = false); bool compute_used_code_vars(); @@ -1406,16 +1139,17 @@ struct CodeBlob { close_blk(location); pop_cur(); } - void simplify_var_types(); void prune_unreachable_code(); void fwd_analyze(); void mark_noreturn(); void generate_code(AsmOpList& out_list, int mode = 0); void generate_code(std::ostream& os, int mode = 0, int indent = 0); - void on_var_modification(var_idx_t idx, SrcLocation here) const { - for (auto& f : vars.at(idx).on_modification) { - f(here); + void on_var_modification(const std::vector& left_lval_indices, SrcLocation here) const { + for (var_idx_t ir_idx : left_lval_indices) { + for (auto& f : vars.at(ir_idx).on_modification) { + f(here); + } } } }; diff --git a/tolk/type-expr.h b/tolk/type-expr.h deleted file mode 100644 index 21a35a8e3..000000000 --- a/tolk/type-expr.h +++ /dev/null @@ -1,131 +0,0 @@ -#pragma once - -#include -#include - -namespace tolk { - -struct TypeExpr { - enum Kind { te_Unknown, te_Var, te_Indirect, te_Atomic, te_Tensor, te_Tuple, te_Map, te_ForAll }; - enum AtomicType { _Int, _Cell, _Slice, _Builder, _Continutaion, _Tuple }; - Kind constr; - int value; - int minw, maxw; - static constexpr int w_inf = 1023; - std::vector args; - bool was_forall_var = false; - - explicit TypeExpr(Kind _constr, int _val = 0) : constr(_constr), value(_val), minw(0), maxw(w_inf) { - } - TypeExpr(Kind _constr, int _val, int width) : constr(_constr), value(_val), minw(width), maxw(width) { - } - TypeExpr(Kind _constr, std::vector list) - : constr(_constr), value((int)list.size()), args(std::move(list)) { - compute_width(); - } - TypeExpr(Kind _constr, std::initializer_list list) - : constr(_constr), value((int)list.size()), args(std::move(list)) { - compute_width(); - } - TypeExpr(Kind _constr, TypeExpr* elem0) : constr(_constr), value(1), args{elem0} { - compute_width(); - } - TypeExpr(Kind _constr, TypeExpr* elem0, std::vector list) - : constr(_constr), value((int)list.size() + 1), args{elem0} { - args.insert(args.end(), list.begin(), list.end()); - compute_width(); - } - TypeExpr(Kind _constr, TypeExpr* elem0, std::initializer_list list) - : constr(_constr), value((int)list.size() + 1), args{elem0} { - args.insert(args.end(), list.begin(), list.end()); - compute_width(); - } - - bool is_atomic() const { - return constr == te_Atomic; - } - bool is_atomic(int v) const { - return constr == te_Atomic && value == v; - } - bool is_int() const { - return is_atomic(_Int); - } - bool is_var() const { - return constr == te_Var; - } - bool is_map() const { - return constr == te_Map; - } - bool is_tuple() const { - return constr == te_Tuple; - } - bool has_fixed_width() const { - return minw == maxw; - } - int get_width() const { - return has_fixed_width() ? minw : -1; - } - void compute_width(); - bool recompute_width(); - void show_width(std::ostream& os); - std::ostream& print(std::ostream& os, int prio = 0) const; - void replace_with(TypeExpr* te2); - int extract_components(std::vector& comp_list); - bool equals_to(const TypeExpr* rhs) const; - bool has_unknown_inside() const; - static int holes, type_vars; - static TypeExpr* new_hole() { - return new TypeExpr{te_Unknown, ++holes}; - } - static TypeExpr* new_hole(int width) { - return new TypeExpr{te_Unknown, ++holes, width}; - } - static TypeExpr* new_unit() { - return new TypeExpr{te_Tensor, 0, 0}; - } - static TypeExpr* new_atomic(int value) { - return new TypeExpr{te_Atomic, value, 1}; - } - static TypeExpr* new_map(TypeExpr* from, TypeExpr* to); - static TypeExpr* new_func() { - return new_map(new_hole(), new_hole()); - } - static TypeExpr* new_tensor(std::vector list, bool red = true) { - return red && list.size() == 1 ? list[0] : new TypeExpr{te_Tensor, std::move(list)}; - } - static TypeExpr* new_tensor(std::initializer_list list) { - return new TypeExpr{te_Tensor, std::move(list)}; - } - static TypeExpr* new_tensor(TypeExpr* te1, TypeExpr* te2) { - return new_tensor({te1, te2}); - } - static TypeExpr* new_tensor(TypeExpr* te1, TypeExpr* te2, TypeExpr* te3) { - return new_tensor({te1, te2, te3}); - } - static TypeExpr* new_tuple(TypeExpr* arg0) { - return new TypeExpr{te_Tuple, arg0}; - } - static TypeExpr* new_tuple(std::vector list, bool red = false) { - return new_tuple(new_tensor(std::move(list), red)); - } - static TypeExpr* new_tuple(std::initializer_list list) { - return new_tuple(new_tensor(list)); - } - static TypeExpr* new_var() { - return new TypeExpr{te_Var, --type_vars, 1}; - } - static TypeExpr* new_var(int idx) { - return new TypeExpr{te_Var, idx, 1}; - } - static TypeExpr* new_forall(std::vector list, TypeExpr* body) { - return new TypeExpr{te_ForAll, body, std::move(list)}; - } - - static bool remove_indirect(TypeExpr*& te, TypeExpr* forbidden = nullptr); - static std::vector remove_forall(TypeExpr*& te); - static bool remove_forall_in(TypeExpr*& te, TypeExpr* te2, const std::vector& new_vars); -}; - -std::ostream& operator<<(std::ostream& os, TypeExpr* type_expr); - -} // namespace tolk diff --git a/tolk/type-system.cpp b/tolk/type-system.cpp new file mode 100644 index 000000000..b21bd0eeb --- /dev/null +++ b/tolk/type-system.cpp @@ -0,0 +1,702 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "type-system.h" +#include "lexer.h" +#include "platform-utils.h" +#include "compiler-state.h" +#include + +namespace tolk { + +/* + * This class stores a big hashtable [hash => TypeData] + * Every non-trivial TypeData*::create() method at first looks here, and allocates an object only if not found. + * That's why all allocated TypeData objects are unique, storing unique type_id. + */ +class TypeDataTypeIdCalculation { + uint64_t cur_hash; + int children_flags_mask = 0; + + static std::unordered_map all_unique_occurred_types; + +public: + explicit TypeDataTypeIdCalculation(uint64_t initial_arbitrary_unique_number) + : cur_hash(initial_arbitrary_unique_number) {} + + void feed_hash(uint64_t val) { + cur_hash = cur_hash * 56235515617499ULL + val; + } + + void feed_string(const std::string& s) { + feed_hash(std::hash{}(s)); + } + + void feed_child(TypePtr inner) { + feed_hash(inner->type_id); + children_flags_mask |= inner->flags; + } + + uint64_t type_id() const { + return cur_hash; + } + + int children_flags() const { + return children_flags_mask; + } + + GNU_ATTRIBUTE_FLATTEN + TypePtr get_existing() const { + auto it = all_unique_occurred_types.find(cur_hash); + return it != all_unique_occurred_types.end() ? it->second : nullptr; + } + + GNU_ATTRIBUTE_NOINLINE + TypePtr register_unique(TypePtr newly_created) const { +#ifdef TOLK_DEBUG + assert(newly_created->type_id == cur_hash); +#endif + all_unique_occurred_types[cur_hash] = newly_created; + return newly_created; + } +}; + +std::unordered_map TypeDataTypeIdCalculation::all_unique_occurred_types; +TypePtr TypeDataInt::singleton; +TypePtr TypeDataBool::singleton; +TypePtr TypeDataCell::singleton; +TypePtr TypeDataSlice::singleton; +TypePtr TypeDataBuilder::singleton; +TypePtr TypeDataTuple::singleton; +TypePtr TypeDataContinuation::singleton; +TypePtr TypeDataNullLiteral::singleton; +TypePtr TypeDataUnknown::singleton; +TypePtr TypeDataVoid::singleton; + +void type_system_init() { + TypeDataInt::singleton = new TypeDataInt; + TypeDataBool::singleton = new TypeDataBool; + TypeDataCell::singleton = new TypeDataCell; + TypeDataSlice::singleton = new TypeDataSlice; + TypeDataBuilder::singleton = new TypeDataBuilder; + TypeDataTuple::singleton = new TypeDataTuple; + TypeDataContinuation::singleton = new TypeDataContinuation; + TypeDataNullLiteral::singleton = new TypeDataNullLiteral; + TypeDataUnknown::singleton = new TypeDataUnknown; + TypeDataVoid::singleton = new TypeDataVoid; +} + + +// -------------------------------------------- +// create() +// +// all constructors of TypeData classes are private, only TypeData*::create() is allowed +// each non-trivial create() method calculates hash (type_id) +// and creates an object only if it isn't found in a global hashtable +// + +TypePtr TypeDataFunCallable::create(std::vector&& params_types, TypePtr return_type) { + TypeDataTypeIdCalculation hash(3184039965511020991ULL); + for (TypePtr param : params_types) { + hash.feed_child(param); + hash.feed_hash(767721); + } + hash.feed_child(return_type); + hash.feed_hash(767722); + + if (TypePtr existing = hash.get_existing()) { + return existing; + } + return hash.register_unique(new TypeDataFunCallable(hash.type_id(), hash.children_flags(), std::move(params_types), return_type)); +} + +TypePtr TypeDataGenericT::create(std::string&& nameT) { + TypeDataTypeIdCalculation hash(9145033724911680012ULL); + hash.feed_string(nameT); + + if (TypePtr existing = hash.get_existing()) { + return existing; + } + return hash.register_unique(new TypeDataGenericT(hash.type_id(), std::move(nameT))); +} + +TypePtr TypeDataTensor::create(std::vector&& items) { + TypeDataTypeIdCalculation hash(3159238551239480381ULL); + for (TypePtr item : items) { + hash.feed_child(item); + hash.feed_hash(819613); + } + + if (TypePtr existing = hash.get_existing()) { + return existing; + } + return hash.register_unique(new TypeDataTensor(hash.type_id(), hash.children_flags(), std::move(items))); +} + +TypePtr TypeDataTypedTuple::create(std::vector&& items) { + TypeDataTypeIdCalculation hash(9189266157349499320ULL); + for (TypePtr item : items) { + hash.feed_child(item); + hash.feed_hash(735911); + } + + if (TypePtr existing = hash.get_existing()) { + return existing; + } + return hash.register_unique(new TypeDataTypedTuple(hash.type_id(), hash.children_flags(), std::move(items))); +} + +TypePtr TypeDataUnresolved::create(std::string&& text, SrcLocation loc) { + TypeDataTypeIdCalculation hash(3680147223540048162ULL); + hash.feed_string(text); + // hash.feed_hash(*reinterpret_cast(&loc)); + + if (TypePtr existing = hash.get_existing()) { + return existing; + } + return hash.register_unique(new TypeDataUnresolved(hash.type_id(), std::move(text), loc)); +} + + +// -------------------------------------------- +// as_human_readable() +// +// is used only for error messages and debugging, therefore no optimizations for simplicity +// only non-trivial implementations are here; trivial are defined in .h file +// + +std::string TypeDataFunCallable::as_human_readable() const { + std::string result = "("; + for (TypePtr param : params_types) { + if (result.size() > 1) { + result += ", "; + } + result += param->as_human_readable(); + } + result += ") -> "; + result += return_type->as_human_readable(); + return result; +} + +std::string TypeDataTensor::as_human_readable() const { + std::string result = "("; + for (TypePtr item : items) { + if (result.size() > 1) { + result += ", "; + } + result += item->as_human_readable(); + } + result += ')'; + return result; +} + +std::string TypeDataTypedTuple::as_human_readable() const { + std::string result = "["; + for (TypePtr item : items) { + if (result.size() > 1) { + result += ", "; + } + result += item->as_human_readable(); + } + result += ']'; + return result; +} + + +// -------------------------------------------- +// traverse() +// +// invokes a callback for TypeData itself and all its children +// only non-trivial implementations are here; by default (no children), `callback(this)` is executed +// + +void TypeDataFunCallable::traverse(const TraverserCallbackT& callback) const { + callback(this); + for (TypePtr param : params_types) { + param->traverse(callback); + } + return_type->traverse(callback); +} + +void TypeDataTensor::traverse(const TraverserCallbackT& callback) const { + callback(this); + for (TypePtr item : items) { + item->traverse(callback); + } +} + +void TypeDataTypedTuple::traverse(const TraverserCallbackT& callback) const { + callback(this); + for (TypePtr item : items) { + item->traverse(callback); + } +} + + +// -------------------------------------------- +// replace_children_custom() +// +// returns new TypeData with children replaced by a custom callback +// used to replace generic T on generics expansion — to convert `f` to `f` +// only non-trivial implementations are here; by default (no children), `return callback(this)` is executed +// + +TypePtr TypeDataFunCallable::replace_children_custom(const ReplacerCallbackT& callback) const { + std::vector mapped; + mapped.reserve(params_types.size()); + for (TypePtr param : params_types) { + mapped.push_back(param->replace_children_custom(callback)); + } + return callback(create(std::move(mapped), return_type->replace_children_custom(callback))); +} + +TypePtr TypeDataTensor::replace_children_custom(const ReplacerCallbackT& callback) const { + std::vector mapped; + mapped.reserve(items.size()); + for (TypePtr item : items) { + mapped.push_back(item->replace_children_custom(callback)); + } + return callback(create(std::move(mapped))); +} + +TypePtr TypeDataTypedTuple::replace_children_custom(const ReplacerCallbackT& callback) const { + std::vector mapped; + mapped.reserve(items.size()); + for (TypePtr item : items) { + mapped.push_back(item->replace_children_custom(callback)); + } + return callback(create(std::move(mapped))); +} + + +// -------------------------------------------- +// calc_width_on_stack() +// +// returns the number of stack slots occupied by a variable of this type +// only non-trivial implementations are here; by default (most types) occupy 1 stack slot +// + +int TypeDataGenericT::calc_width_on_stack() const { + // this function is invoked only in functions with generics already instantiated + assert(false); + return -999999; +} + +int TypeDataTensor::calc_width_on_stack() const { + int sum = 0; + for (TypePtr item : items) { + sum += item->calc_width_on_stack(); + } + return sum; +} + +int TypeDataUnresolved::calc_width_on_stack() const { + // since early pipeline stages, no unresolved types left + assert(false); + return -999999; +} + +int TypeDataVoid::calc_width_on_stack() const { + return 0; +} + + +// -------------------------------------------- +// can_rhs_be_assigned() +// +// on `var lhs: = rhs`, having inferred rhs_type, check that it can be assigned without any casts +// the same goes for passing arguments, returning values, etc. — where the "receiver" (lhs) checks "applier" (rhs) +// for now, `null` can be assigned to any TVM primitive, be later we'll have T? types and null safety +// + +bool TypeDataInt::can_rhs_be_assigned(TypePtr rhs) const { + if (rhs == this) { + return true; + } + if (rhs == TypeDataNullLiteral::create()) { + return true; + } + return false; +} + +bool TypeDataBool::can_rhs_be_assigned(TypePtr rhs) const { + if (rhs == this) { + return true; + } + if (rhs == TypeDataNullLiteral::create()) { + return true; + } + return false; +} + +bool TypeDataCell::can_rhs_be_assigned(TypePtr rhs) const { + if (rhs == this) { + return true; + } + if (rhs == TypeDataNullLiteral::create()) { + return true; + } + return false; +} + +bool TypeDataSlice::can_rhs_be_assigned(TypePtr rhs) const { + if (rhs == this) { + return true; + } + if (rhs == TypeDataNullLiteral::create()) { + return true; + } + return false; +} + +bool TypeDataBuilder::can_rhs_be_assigned(TypePtr rhs) const { + if (rhs == this) { + return true; + } + if (rhs == TypeDataNullLiteral::create()) { + return true; + } + return false; +} + +bool TypeDataTuple::can_rhs_be_assigned(TypePtr rhs) const { + if (rhs == this) { + return true; + } + if (rhs == TypeDataNullLiteral::create()) { + return true; + } + return false; +} + +bool TypeDataContinuation::can_rhs_be_assigned(TypePtr rhs) const { + if (rhs == this) { + return true; + } + if (rhs == TypeDataNullLiteral::create()) { + return true; + } + return false; +} + +bool TypeDataNullLiteral::can_rhs_be_assigned(TypePtr rhs) const { + return rhs == this; +} + +bool TypeDataFunCallable::can_rhs_be_assigned(TypePtr rhs) const { + return rhs == this; +} + +bool TypeDataGenericT::can_rhs_be_assigned(TypePtr rhs) const { + assert(false); + return false; +} + +bool TypeDataTensor::can_rhs_be_assigned(TypePtr rhs) const { + if (const auto* as_tensor = rhs->try_as(); as_tensor && as_tensor->size() == size()) { + for (int i = 0; i < size(); ++i) { + if (!items[i]->can_rhs_be_assigned(as_tensor->items[i])) { + return false; + } + } + return true; + } + // note, that tensors can not accept null + return false; +} + +bool TypeDataTypedTuple::can_rhs_be_assigned(TypePtr rhs) const { + if (const auto* as_tuple = rhs->try_as(); as_tuple && as_tuple->size() == size()) { + for (int i = 0; i < size(); ++i) { + if (!items[i]->can_rhs_be_assigned(as_tuple->items[i])) { + return false; + } + } + return true; + } + if (rhs == TypeDataNullLiteral::create()) { + return true; + } + return false; +} + +bool TypeDataUnknown::can_rhs_be_assigned(TypePtr rhs) const { + return true; +} + +bool TypeDataUnresolved::can_rhs_be_assigned(TypePtr rhs) const { + assert(false); + return false; +} + +bool TypeDataVoid::can_rhs_be_assigned(TypePtr rhs) const { + return rhs == this; +} + + +// -------------------------------------------- +// can_be_casted_with_as_operator() +// +// on `expr as `, check whether casting is applicable +// note, that it's not auto-casts `var lhs: = rhs`, it's an expression `rhs as ` +// + +bool TypeDataInt::can_be_casted_with_as_operator(TypePtr cast_to) const { + return cast_to == this; +} + +bool TypeDataBool::can_be_casted_with_as_operator(TypePtr cast_to) const { + return cast_to == this || cast_to == TypeDataInt::create(); +} + +bool TypeDataCell::can_be_casted_with_as_operator(TypePtr cast_to) const { + return cast_to == this; +} + +bool TypeDataSlice::can_be_casted_with_as_operator(TypePtr cast_to) const { + return cast_to == this; +} + +bool TypeDataBuilder::can_be_casted_with_as_operator(TypePtr cast_to) const { + return cast_to == this; +} + +bool TypeDataTuple::can_be_casted_with_as_operator(TypePtr cast_to) const { + return cast_to == this; +} + +bool TypeDataContinuation::can_be_casted_with_as_operator(TypePtr cast_to) const { + return cast_to == this; +} + +bool TypeDataNullLiteral::can_be_casted_with_as_operator(TypePtr cast_to) const { + return cast_to == this + || cast_to == TypeDataInt::create() || cast_to == TypeDataBool::create() || cast_to == TypeDataCell::create() || cast_to == TypeDataSlice::create() + || cast_to == TypeDataBuilder::create() || cast_to == TypeDataContinuation::create() || cast_to == TypeDataTuple::create() + || cast_to->try_as(); +} + +bool TypeDataFunCallable::can_be_casted_with_as_operator(TypePtr cast_to) const { + return this == cast_to; +} + +bool TypeDataGenericT::can_be_casted_with_as_operator(TypePtr cast_to) const { + return true; +} + +bool TypeDataTensor::can_be_casted_with_as_operator(TypePtr cast_to) const { + if (const auto* to_tensor = cast_to->try_as(); to_tensor && to_tensor->size() == size()) { + for (int i = 0; i < size(); ++i) { + if (!items[i]->can_be_casted_with_as_operator(to_tensor->items[i])) { + return false; + } + } + return true; + } + return false; +} + +bool TypeDataTypedTuple::can_be_casted_with_as_operator(TypePtr cast_to) const { + if (const auto* to_tuple = cast_to->try_as(); to_tuple && to_tuple->size() == size()) { + for (int i = 0; i < size(); ++i) { + if (!items[i]->can_be_casted_with_as_operator(to_tuple->items[i])) { + return false; + } + } + return true; + } + return false; +} + +bool TypeDataUnknown::can_be_casted_with_as_operator(TypePtr cast_to) const { + // 'unknown' can be cast to any type + // (though it's not valid for exception arguments when casting them to non-1 stack width, + // but to ensure it, we need a special type "unknown TVM primitive", which is overwhelming I think) + return true; +} + +bool TypeDataUnresolved::can_be_casted_with_as_operator(TypePtr cast_to) const { + return false; +} + +bool TypeDataVoid::can_be_casted_with_as_operator(TypePtr cast_to) const { + return cast_to == this; +} + + +// -------------------------------------------- +// extract_components() +// +// used in code generation (transforming Ops to other Ops) +// to be removed in the future +// + +void TypeDataGenericT::extract_components(std::vector& comp_types) const { + assert(false); +} + +void TypeDataTensor::extract_components(std::vector& comp_types) const { + for (TypePtr item : items) { + item->extract_components(comp_types); + } +} + +void TypeDataUnresolved::extract_components(std::vector& comp_types) const { + assert(false); +} + +void TypeDataVoid::extract_components(std::vector& comp_types) const { +} + + +// -------------------------------------------- +// parsing type from tokens +// +// here we implement parsing types (mostly after colon) to TypeData +// example: `var v: int` is TypeDataInt +// example: `var v: (builder, [cell])` is TypeDataTensor(TypeDataBuilder, TypeDataTypedTuple(TypeDataCell)) +// example: `fun f(): ()` is TypeDataTensor() (an empty one) +// +// note, that unrecognized type names (MyEnum, MyStruct, T) are parsed as TypeDataUnresolved, +// and later, when all files are parsed and all symbols registered, such identifiers are resolved +// example: `fun f(v: T)` at first v is TypeDataUnresolved("T"), later becomes TypeDataGenericT +// see finalize_type_data() +// +// note, that `self` does not name a type, it can appear only as a return value of a function (parsed specially) +// when `self` appears as a type, it's parsed as TypeDataUnresolved, and later an error is emitted +// + +static TypePtr parse_type_expression(Lexer& lex); + +std::vector parse_nested_type_list(Lexer& lex, TokenType tok_op, const char* s_op, TokenType tok_cl, const char* s_cl) { + lex.expect(tok_op, s_op); + std::vector sub_types; + while (true) { + if (lex.tok() == tok_cl) { // empty lists allowed + lex.next(); + break; + } + + sub_types.emplace_back(parse_type_expression(lex)); + if (lex.tok() == tok_comma) { + lex.next(); + } else if (lex.tok() != tok_cl) { + lex.unexpected(s_cl); + } + } + return sub_types; +} + +std::vector parse_nested_type_list_in_parenthesis(Lexer& lex) { + return parse_nested_type_list(lex, tok_oppar, "`(`", tok_clpar, "`)` or `,`"); +} + +static TypePtr parse_simple_type(Lexer& lex) { + switch (lex.tok()) { + case tok_int: + lex.next(); + return TypeDataInt::create(); + case tok_bool: + lex.next(); + return TypeDataBool::create(); + case tok_cell: + lex.next(); + return TypeDataCell::create(); + case tok_builder: + lex.next(); + return TypeDataBuilder::create(); + case tok_slice: + lex.next(); + return TypeDataSlice::create(); + case tok_tuple: + lex.next(); + return TypeDataTuple::create(); + case tok_continuation: + lex.next(); + return TypeDataContinuation::create(); + case tok_null: + lex.next(); + return TypeDataNullLiteral::create(); + case tok_void: + lex.next(); + return TypeDataVoid::create(); + case tok_self: + case tok_identifier: { + SrcLocation loc = lex.cur_location(); + std::string text = static_cast(lex.cur_str()); + lex.next(); + return TypeDataUnresolved::create(std::move(text), loc); + } + case tok_oppar: { + std::vector items = parse_nested_type_list_in_parenthesis(lex); + if (items.size() == 1) { + return items.front(); + } + return TypeDataTensor::create(std::move(items)); + } + case tok_opbracket: { + std::vector items = parse_nested_type_list(lex, tok_opbracket, "`[`", tok_clbracket, "`]` or `,`"); + return TypeDataTypedTuple::create(std::move(items)); + } + case tok_fun: { + lex.next(); + std::vector params_types = parse_nested_type_list_in_parenthesis(lex); + lex.expect(tok_arrow, "`->`"); + } + default: + lex.unexpected(""); + } +} + +static TypePtr parse_type_nullable(Lexer& lex) { + TypePtr result = parse_simple_type(lex); + + if (lex.tok() == tok_question) { + lex.error("nullable types are not supported yet"); + } + + return result; +} + +static TypePtr parse_type_expression(Lexer& lex) { + TypePtr result = parse_type_nullable(lex); + + if (lex.tok() == tok_arrow) { // `int -> int`, `(cell, slice) -> void` + lex.next(); + TypePtr return_type = parse_type_expression(lex); + std::vector params_types = {result}; + if (const auto* as_tensor = result->try_as()) { + params_types = as_tensor->items; + } + return TypeDataFunCallable::create(std::move(params_types), return_type); + } + + if (lex.tok() != tok_bitwise_or) { + return result; + } + + lex.error("union types are not supported yet"); +} + +TypePtr parse_type_from_tokens(Lexer& lex) { + return parse_type_expression(lex); +} + +std::ostream& operator<<(std::ostream& os, TypePtr type_data) { + return os << (type_data ? type_data->as_human_readable() : "(nullptr-type)"); +} + +} // namespace tolk diff --git a/tolk/type-system.h b/tolk/type-system.h new file mode 100644 index 000000000..13c0e4b09 --- /dev/null +++ b/tolk/type-system.h @@ -0,0 +1,423 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#pragma once + +#include "src-file.h" +#include +#include +#include + +namespace tolk { + +/* + * TypeData is both a user-given and an inferred type representation. + * `int`, `cell`, `T`, `(int, [tuple])` are instances of TypeData. + * Every unique TypeData is created only once, so for example TypeDataTensor::create(int, int) + * returns one and the same pointer always. This "uniqueness" is called type_id, calculated before creation. + * + * In Tolk code, types after colon `var v: (int, T)` are parsed to TypeData. + * See parse_type_from_tokens(). + * So, AST nodes which can have declared types (local/global variables and others) store a pointer to TypeData. + * + * Type inferring also creates TypeData for inferred expressions. All AST expression nodes have inferred_type. + * For example, `1 + 2`, both operands are TypeDataInt, its result is also TypeDataInt. + * Type checking also uses TypeData. For example, `var i: slice = 1 + 2`, at first rhs (TypeDataInt) is inferred, + * then lhs (TypeDataSlice from declaration) is checked whether rhs can be assigned. + * See can_rhs_be_assigned(). + * + * Note, that while initial parsing Tolk files to AST, known types (`int`, `cell`, etc.) are created as-is, + * but user-defined types (`T`, `MyStruct`, `MyAlias`) are saved as TypeDataUnresolved. + * After all symbols have been registered, resolving identifiers step is executed, where particularly + * all TypeDataUnresolved instances are converted to a resolved type. At inferring, no unresolved remain. + * For instance, `fun f(v: T)`, at first "T" of `v` is unresolved, and then converted to TypeDataGenericT. + */ +class TypeData { + // all unique types have unique type_id; it's used both for allocating memory once and for tagged unions + const uint64_t type_id; + // bits of flag_mask, to store often-used properties and return them without tree traversing + const int flags; + + friend class TypeDataTypeIdCalculation; + +protected: + enum flag_mask { + flag_contains_unknown_inside = 1 << 1, + flag_contains_genericT_inside = 1 << 2, + flag_contains_unresolved_inside = 1 << 3, + }; + + explicit TypeData(uint64_t type_id, int flags_with_children) + : type_id(type_id) + , flags(flags_with_children) { + } + +public: + virtual ~TypeData() = default; + + template + const Derived* try_as() const { + return dynamic_cast(this); + } + + uint64_t get_type_id() const { return type_id; } + + bool has_unknown_inside() const { return flags & flag_contains_unknown_inside; } + bool has_genericT_inside() const { return flags & flag_contains_genericT_inside; } + bool has_unresolved_inside() const { return flags & flag_contains_unresolved_inside; } + + using TraverserCallbackT = std::function; + using ReplacerCallbackT = std::function; + + virtual std::string as_human_readable() const = 0; + virtual bool can_rhs_be_assigned(TypePtr rhs) const = 0; + virtual bool can_be_casted_with_as_operator(TypePtr cast_to) const = 0; + + virtual void traverse(const TraverserCallbackT& callback) const { + callback(this); + } + + virtual TypePtr replace_children_custom(const ReplacerCallbackT& callback) const { + return callback(this); + } + + virtual int calc_width_on_stack() const { + return 1; + } + + virtual void extract_components(std::vector& comp_types) const { + comp_types.push_back(this); + } +}; + +/* + * `int` is TypeDataInt, representation of TVM int. + */ +class TypeDataInt final : public TypeData { + TypeDataInt() : TypeData(1ULL, 0) {} + + static TypePtr singleton; + friend void type_system_init(); + +public: + static TypePtr create() { return singleton; } + + std::string as_human_readable() const override { return "int"; } + bool can_rhs_be_assigned(TypePtr rhs) const override; + bool can_be_casted_with_as_operator(TypePtr cast_to) const override; +}; + +/* + * `bool` is TypeDataBool. TVM has no bool, only integers. Under the hood, -1 is true, 0 is false. + * From the type system point of view, int and bool are different, not-autocastable types. + */ +class TypeDataBool final : public TypeData { + TypeDataBool() : TypeData(2ULL, 0) {} + + static TypePtr singleton; + friend void type_system_init(); + +public: + static TypePtr create() { return singleton; } + + std::string as_human_readable() const override { return "bool"; } + bool can_rhs_be_assigned(TypePtr rhs) const override; + bool can_be_casted_with_as_operator(TypePtr cast_to) const override; +}; + +/* + * `cell` is TypeDataCell, representation of TVM cell. + */ +class TypeDataCell final : public TypeData { + TypeDataCell() : TypeData(3ULL, 0) {} + + static TypePtr singleton; + friend void type_system_init(); + +public: + static TypePtr create() { return singleton; } + + std::string as_human_readable() const override { return "cell"; } + bool can_rhs_be_assigned(TypePtr rhs) const override; + bool can_be_casted_with_as_operator(TypePtr cast_to) const override; +}; + +/* + * `slice` is TypeDataSlice, representation of TVM slice. + */ +class TypeDataSlice final : public TypeData { + TypeDataSlice() : TypeData(4ULL, 0) {} + + static TypePtr singleton; + friend void type_system_init(); + +public: + static TypePtr create() { return singleton; } + + std::string as_human_readable() const override { return "slice"; } + bool can_rhs_be_assigned(TypePtr rhs) const override; + bool can_be_casted_with_as_operator(TypePtr cast_to) const override; +}; + +/* + * `builder` is TypeDataBuilder, representation of TVM builder. + */ +class TypeDataBuilder final : public TypeData { + TypeDataBuilder() : TypeData(5ULL, 0) {} + + static TypePtr singleton; + friend void type_system_init(); + +public: + static TypePtr create() { return singleton; } + + std::string as_human_readable() const override { return "builder"; } + bool can_rhs_be_assigned(TypePtr rhs) const override; + bool can_be_casted_with_as_operator(TypePtr cast_to) const override; +}; + +/* + * `tuple` is TypeDataTuple, representation of TVM tuple. + * Note, that it's UNTYPED tuple. It occupies 1 stack slot in TVM. Its elements are any TVM values at runtime, + * so getting its element results in TypeDataUnknown (which must be assigned/cast explicitly). + */ +class TypeDataTuple final : public TypeData { + TypeDataTuple() : TypeData(6ULL, 0) {} + + static TypePtr singleton; + friend void type_system_init(); + +public: + static TypePtr create() { return singleton; } + + std::string as_human_readable() const override { return "tuple"; } + bool can_rhs_be_assigned(TypePtr rhs) const override; + bool can_be_casted_with_as_operator(TypePtr cast_to) const override; +}; + +/* + * `continuation` is TypeDataContinuation, representation of TVM continuation. + * It's like "untyped callable", not compatible with other types. + */ +class TypeDataContinuation final : public TypeData { + TypeDataContinuation() : TypeData(7ULL, 0) {} + + static TypePtr singleton; + friend void type_system_init(); + +public: + static TypePtr create() { return singleton; } + + std::string as_human_readable() const override { return "continuation"; } + bool can_rhs_be_assigned(TypePtr rhs) const override; + bool can_be_casted_with_as_operator(TypePtr cast_to) const override; +}; + +/* + * `null` has TypeDataNullLiteral type. + * Currently, it can be assigned to int/slice/etc., but later Tolk will have T? types and null safety. + * Note, that `var i = null`, though valid (i would be constant null), fires an "always-null" compilation error + * (it's much better for user to see an error here than when he passes this variable somewhere). + */ +class TypeDataNullLiteral final : public TypeData { + TypeDataNullLiteral() : TypeData(8ULL, 0) {} + + static TypePtr singleton; + friend void type_system_init(); + +public: + static TypePtr create() { return singleton; } + + std::string as_human_readable() const override { return "null"; } + bool can_rhs_be_assigned(TypePtr rhs) const override; + bool can_be_casted_with_as_operator(TypePtr cast_to) const override; +}; + +/* + * `fun(int, int) -> void` is TypeDataFunCallable, think of is as a typed continuation. + * A type of function `fun f(x: int) { return x; }` is actually `fun(int) -> int`. + * So, when assigning it to a variable `var cb = f`, this variable also has this type. + */ +class TypeDataFunCallable final : public TypeData { + TypeDataFunCallable(uint64_t type_id, int children_flags, std::vector&& params_types, TypePtr return_type) + : TypeData(type_id, children_flags) + , params_types(std::move(params_types)) + , return_type(return_type) {} + +public: + const std::vector params_types; + const TypePtr return_type; + + static TypePtr create(std::vector&& params_types, TypePtr return_type); + + int params_size() const { return static_cast(params_types.size()); } + + std::string as_human_readable() const override; + bool can_rhs_be_assigned(TypePtr rhs) const override; + bool can_be_casted_with_as_operator(TypePtr cast_to) const override; + void traverse(const TraverserCallbackT& callback) const override; + TypePtr replace_children_custom(const ReplacerCallbackT& callback) const override; +}; + +/* + * `T` inside generic functions is TypeDataGenericT. + * Example: `fun f(a: X, b: Y): [X, Y]` (here X and Y are). + * On instantiation like `f(1,"")`, a new function `f` is created with type `fun(int,slice)->[int,slice]`. + */ +class TypeDataGenericT final : public TypeData { + TypeDataGenericT(uint64_t type_id, std::string&& nameT) + : TypeData(type_id, flag_contains_genericT_inside) + , nameT(std::move(nameT)) {} + +public: + const std::string nameT; + + static TypePtr create(std::string&& nameT); + + std::string as_human_readable() const override { return nameT; } + bool can_rhs_be_assigned(TypePtr rhs) const override; + bool can_be_casted_with_as_operator(TypePtr cast_to) const override; + int calc_width_on_stack() const override; + void extract_components(std::vector& comp_types) const override; +}; + +/* + * `(int, slice)` is TypeDataTensor of 2 elements. Tensor of N elements occupies N stack slots. + * Of course, there may be nested tensors, like `(int, (int, slice), cell)`. + * Arguments, variables, globals, return values, etc. can be tensors. + * A tensor can be empty. + */ +class TypeDataTensor final : public TypeData { + TypeDataTensor(uint64_t type_id, int children_flags, std::vector&& items) + : TypeData(type_id, children_flags) + , items(std::move(items)) {} + +public: + const std::vector items; + + static TypePtr create(std::vector&& items); + + int size() const { return static_cast(items.size()); } + + std::string as_human_readable() const override; + bool can_rhs_be_assigned(TypePtr rhs) const override; + bool can_be_casted_with_as_operator(TypePtr cast_to) const override; + void traverse(const TraverserCallbackT& callback) const override; + TypePtr replace_children_custom(const ReplacerCallbackT& callback) const override; + int calc_width_on_stack() const override; + void extract_components(std::vector& comp_types) const override; +}; + +/* + * `[int, slice]` is TypeDataTypedTuple, a TVM 'tuple' under the hood, contained in 1 stack slot. + * Unlike TypeDataTuple (untyped tuples), it has a predefined inner structure and can be assigned as + * `var [i, cs] = [0, ""]` (where a and b become two separate variables on a stack, int and slice). + */ +class TypeDataTypedTuple final : public TypeData { + TypeDataTypedTuple(uint64_t type_id, int children_flags, std::vector&& items) + : TypeData(type_id, children_flags) + , items(std::move(items)) {} + +public: + const std::vector items; + + static TypePtr create(std::vector&& items); + + int size() const { return static_cast(items.size()); } + + std::string as_human_readable() const override; + bool can_rhs_be_assigned(TypePtr rhs) const override; + bool can_be_casted_with_as_operator(TypePtr cast_to) const override; + void traverse(const TraverserCallbackT& callback) const override; + TypePtr replace_children_custom(const ReplacerCallbackT& callback) const override; +}; + +/* + * `unknown` is a special type, which can appear in corner cases. + * The type of exception argument (which can hold any TVM value at runtime) is unknown. + * The type of `_` used as rvalue is unknown. + * The only thing available to do with unknown is to cast it: `catch (excNo, arg) { var i = arg as int; }` + */ +class TypeDataUnknown final : public TypeData { + TypeDataUnknown() : TypeData(20ULL, flag_contains_unknown_inside) {} + + static TypePtr singleton; + friend void type_system_init(); + +public: + static TypePtr create() { return singleton; } + + std::string as_human_readable() const override { return "unknown"; } + bool can_rhs_be_assigned(TypePtr rhs) const override; + bool can_be_casted_with_as_operator(TypePtr cast_to) const override; +}; + +/* + * "Unresolved" is not actually a type — it's an intermediate state between parsing and resolving. + * At parsing to AST, unrecognized type names (MyEnum, MyStruct, T) are parsed as TypeDataUnresolved, + * and after all source files parsed and global symbols registered, they are replaced by actual ones. + * Example: `fun f(v: T)` at first v is TypeDataUnresolved("T"), later becomes TypeDataGenericT. + */ +class TypeDataUnresolved final : public TypeData { + TypeDataUnresolved(uint64_t type_id, std::string&& text, SrcLocation loc) + : TypeData(type_id, flag_contains_unresolved_inside) + , text(std::move(text)) + , loc(loc) {} + +public: + const std::string text; + const SrcLocation loc; + + static TypePtr create(std::string&& text, SrcLocation loc); + + std::string as_human_readable() const override { return text + "*"; } + bool can_rhs_be_assigned(TypePtr rhs) const override; + bool can_be_casted_with_as_operator(TypePtr cast_to) const override; + int calc_width_on_stack() const override; + void extract_components(std::vector& comp_types) const override; +}; + +/* + * `void` is TypeDataVoid. + * From the type system point of view, `void` functions return nothing. + * Empty tensor is not compatible with void, although at IR level they are similar, 0 stack slots. + */ +class TypeDataVoid final : public TypeData { + TypeDataVoid() : TypeData(10ULL, 0) {} + + static TypePtr singleton; + friend void type_system_init(); + +public: + static TypePtr create() { return singleton; } + + std::string as_human_readable() const override { return "void"; } + bool can_rhs_be_assigned(TypePtr rhs) const override; + bool can_be_casted_with_as_operator(TypePtr cast_to) const override; + int calc_width_on_stack() const override; + void extract_components(std::vector& comp_types) const override; +}; + + +// -------------------------------------------- + + +class Lexer; +TypePtr parse_type_from_tokens(Lexer& lex); + +void type_system_init(); + +} // namespace tolk diff --git a/tolk/unify-types.cpp b/tolk/unify-types.cpp deleted file mode 100644 index cee71942b..000000000 --- a/tolk/unify-types.cpp +++ /dev/null @@ -1,454 +0,0 @@ -/* - This file is part of TON Blockchain Library. - - TON Blockchain Library is free software: you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation, either version 2 of the License, or - (at your option) any later version. - - TON Blockchain Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with TON Blockchain Library. If not, see . -*/ -#include "tolk.h" - -namespace tolk { - -/* - * - * TYPE EXPRESSIONS - * - */ - -int TypeExpr::holes = 0, TypeExpr::type_vars = 0; // not thread safe, but it is ok for now - -void TypeExpr::compute_width() { - switch (constr) { - case te_Atomic: - case te_Map: - minw = maxw = 1; - break; - case te_Tensor: - minw = maxw = 0; - for (TypeExpr* arg : args) { - minw += arg->minw; - maxw += arg->maxw; - } - if (minw > w_inf) { - minw = w_inf; - } - if (maxw > w_inf) { - maxw = w_inf; - } - break; - case te_Tuple: - minw = maxw = 1; - for (TypeExpr* arg : args) { - arg->compute_width(); - } - break; - case te_Indirect: - minw = args[0]->minw; - maxw = args[0]->maxw; - break; - default: - minw = 0; - maxw = w_inf; - break; - } -} - -bool TypeExpr::recompute_width() { - switch (constr) { - case te_Tensor: - case te_Indirect: { - int min = 0, max = 0; - for (TypeExpr* arg : args) { - min += arg->minw; - max += arg->maxw; - } - if (min > maxw || max < minw) { - return false; - } - if (min > w_inf) { - min = w_inf; - } - if (max > w_inf) { - max = w_inf; - } - if (minw < min) { - minw = min; - } - if (maxw > max) { - maxw = max; - } - return true; - } - case te_Tuple: { - for (TypeExpr* arg : args) { - if (arg->minw > 1 || arg->maxw < 1 || arg->minw > arg->maxw) { - return false; - } - } - return true; - } - default: - return false; - } -} - -int TypeExpr::extract_components(std::vector& comp_list) { - if (constr != te_Indirect && constr != te_Tensor) { - comp_list.push_back(this); - return 1; - } - int res = 0; - for (TypeExpr* arg : args) { - res += arg->extract_components(comp_list); - } - return res; -} - -bool TypeExpr::equals_to(const TypeExpr *rhs) const { - const TypeExpr *l = this; - const TypeExpr *r = rhs; - while (l->constr == te_Indirect) - l = l->args[0]; - while (r->constr == te_Indirect) - r = r->args[0]; - - bool eq = l->constr == r->constr && l->value == r->value && - l->minw == r->minw && l->maxw == r->maxw && - l->was_forall_var == r->was_forall_var && - l->args.size() == r->args.size(); - if (!eq) - return false; - - for (int i = 0; i < static_cast(l->args.size()); ++i) { - if (!l->args[i]->equals_to(r->args[i])) - return false; - } - return true; -} - -bool TypeExpr::has_unknown_inside() const { - if (constr == te_Unknown) - return true; - - for (const TypeExpr* inner : args) { - if (inner->has_unknown_inside()) - return true; - } - return false; -} - -TypeExpr* TypeExpr::new_map(TypeExpr* from, TypeExpr* to) { - return new TypeExpr{te_Map, std::vector{from, to}}; -} - -void TypeExpr::replace_with(TypeExpr* te2) { - if (te2 == this) { - return; - } - constr = te_Indirect; - value = 0; - minw = te2->minw; - maxw = te2->maxw; - args.clear(); - args.push_back(te2); -} - -bool TypeExpr::remove_indirect(TypeExpr*& te, TypeExpr* forbidden) { - tolk_assert(te); - while (te->constr == te_Indirect) { - te = te->args[0]; - } - if (te->constr == te_Unknown) { - return te != forbidden; - } - bool res = true; - for (auto& x : te->args) { - res &= remove_indirect(x, forbidden); - } - return res; -} - -std::vector TypeExpr::remove_forall(TypeExpr*& te) { - tolk_assert(te && te->constr == te_ForAll); - tolk_assert(te->args.size() >= 1); - std::vector new_vars; - for (std::size_t i = 1; i < te->args.size(); i++) { - new_vars.push_back(new_hole(1)); - } - TypeExpr* te2 = te; - // std::cerr << "removing universal quantifier in " << te << std::endl; - te = te->args[0]; - remove_forall_in(te, te2, new_vars); - // std::cerr << "-> " << te << std::endl; - return new_vars; -} - -bool TypeExpr::remove_forall_in(TypeExpr*& te, TypeExpr* te2, const std::vector& new_vars) { - tolk_assert(te); - tolk_assert(te2 && te2->constr == te_ForAll); - if (te->constr == te_Var) { - for (std::size_t i = 0; i < new_vars.size(); i++) { - if (te == te2->args[i + 1]) { - te = new_vars[i]; - return true; - } - } - return false; - } - if (te->constr == te_ForAll) { - return false; - } - if (te->args.empty()) { - return false; - } - auto te1 = new TypeExpr(*te); - bool res = false; - for (auto& arg : te1->args) { - res |= remove_forall_in(arg, te2, new_vars); - } - if (res) { - te = te1; - } else { - delete te1; - } - return res; -} - -void TypeExpr::show_width(std::ostream& os) { - os << minw; - if (maxw != minw) { - os << ".."; - if (maxw < w_inf) { - os << maxw; - } - } -} - -std::ostream& operator<<(std::ostream& os, TypeExpr* type_expr) { - if (!type_expr) { - return os << "(null-type-ptr)"; - } - return type_expr->print(os); -} - -std::ostream& TypeExpr::print(std::ostream& os, int lex_level) const { - switch (constr) { - case te_Unknown: - return os << "??" << value; - case te_Var: - if (value >= -26 && value < 0) { - return os << "_" << (char)(91 + value); - } else if (value >= 0 && value < 26) { - return os << (char)(65 + value); - } else { - return os << "TVAR" << value; - } - case te_Indirect: - return os << args[0]; - case te_Atomic: { - switch (value) { - case _Int: - return os << "int"; - case _Cell: - return os << "cell"; - case _Slice: - return os << "slice"; - case _Builder: - return os << "builder"; - case _Continutaion: - return os << "cont"; - case _Tuple: - return os << "tuple"; - default: - return os << "atomic-type-" << value; - } - } - case te_Tensor: { - if (lex_level > -127) { - os << "("; - } - auto c = args.size(); - if (c) { - for (const auto& x : args) { - x->print(os); - if (--c) { - os << ", "; - } - } - } - if (lex_level > -127) { - os << ")"; - } - return os; - } - case te_Tuple: { - os << "["; - auto c = args.size(); - if (c == 1 && args[0]->constr == te_Tensor) { - args[0]->print(os, -127); - } else if (c) { - for (const auto& x : args) { - x->print(os); - if (--c) { - os << ", "; - } - } - } - return os << "]"; - } - case te_Map: { - tolk_assert(args.size() == 2); - if (lex_level > 0) { - os << "("; - } - args[0]->print(os, 1); - os << " -> "; - args[1]->print(os); - if (lex_level > 0) { - os << ")"; - } - return os; - } - case te_ForAll: { - tolk_assert(args.size() >= 1); - if (lex_level > 0) { - os << '('; - } - os << "Forall "; - for (std::size_t i = 1; i < args.size(); i++) { - os << (i > 1 ? ' ' : '('); - args[i]->print(os); - } - os << ") "; - args[0]->print(os); - if (lex_level > 0) { - os << ')'; - } - return os; - } - default: - return os << "unknown-type-expr-" << constr; - } -} - -void UnifyError::print_message(std::ostream& os) const { - os << "cannot unify type " << te1 << " with " << te2; - if (!msg.empty()) { - os << ": " << msg; - } -} - -std::ostream& operator<<(std::ostream& os, const UnifyError& ue) { - ue.print_message(os); - return os; -} - -void check_width_compat(TypeExpr* te1, TypeExpr* te2) { - if (te1->minw > te2->maxw || te2->minw > te1->maxw) { - std::ostringstream os{"cannot unify types of widths ", std::ios_base::ate}; - te1->show_width(os); - os << " and "; - te2->show_width(os); - throw UnifyError{te1, te2, os.str()}; - } -} - -void check_update_widths(TypeExpr* te1, TypeExpr* te2) { - check_width_compat(te1, te2); - te1->minw = te2->minw = std::max(te1->minw, te2->minw); - te1->maxw = te2->maxw = std::min(te1->maxw, te2->maxw); - tolk_assert(te1->minw <= te1->maxw); -} - -void unify(TypeExpr*& te1, TypeExpr*& te2) { - tolk_assert(te1 && te2); - // std::cerr << "unify( " << te1 << " , " << te2 << " )\n"; - while (te1->constr == TypeExpr::te_Indirect) { - te1 = te1->args[0]; - } - while (te2->constr == TypeExpr::te_Indirect) { - te2 = te2->args[0]; - } - if (te1 == te2) { - return; - } - if (te1->constr == TypeExpr::te_ForAll) { - TypeExpr* te = te1; - std::vector new_vars = TypeExpr::remove_forall(te); - for (TypeExpr* t : new_vars) { - t->was_forall_var = true; - } - unify(te, te2); - for (TypeExpr* t : new_vars) { - t->was_forall_var = false; - } - return; - } - if (te2->constr == TypeExpr::te_ForAll) { - TypeExpr* te = te2; - std::vector new_vars = TypeExpr::remove_forall(te); - for (TypeExpr* t : new_vars) { - t->was_forall_var = true; - } - unify(te1, te); - for (TypeExpr* t : new_vars) { - t->was_forall_var = false; - } - return; - } - if (te1->was_forall_var && te2->constr == TypeExpr::te_Tensor) { - throw UnifyError{te1, te2, "cannot unify generic type and tensor"}; - } - if (te2->was_forall_var && te1->constr == TypeExpr::te_Tensor) { - throw UnifyError{te2, te1, "cannot unify generic type and tensor"}; - } - if (te1->constr == TypeExpr::te_Unknown) { - if (te2->constr == TypeExpr::te_Unknown) { - tolk_assert(te1->value != te2->value); - } - if (!TypeExpr::remove_indirect(te2, te1)) { - throw UnifyError{te1, te2, "type unification results in an infinite cyclic type"}; - } - check_update_widths(te1, te2); - te1->replace_with(te2); - te1 = te2; - return; - } - if (te2->constr == TypeExpr::te_Unknown) { - if (!TypeExpr::remove_indirect(te1, te2)) { - throw UnifyError{te2, te1, "type unification results in an infinite cyclic type"}; - } - check_update_widths(te2, te1); - te2->replace_with(te1); - te2 = te1; - return; - } - if (te1->constr != te2->constr || te1->value != te2->value || te1->args.size() != te2->args.size()) { - throw UnifyError{te1, te2}; - } - for (std::size_t i = 0; i < te1->args.size(); i++) { - unify(te1->args[i], te2->args[i]); - } - if (te1->constr == TypeExpr::te_Tensor) { - if (!te1->recompute_width()) { - throw UnifyError{te1, te2, "type unification incompatible with known width of first type"}; - } - if (!te2->recompute_width()) { - throw UnifyError{te2, te1, "type unification incompatible with known width of first type"}; - } - check_update_widths(te1, te2); - } - te1->replace_with(te2); - te1 = te2; -} - -} // namespace tolk