From 82648ebd6a8f595155562944e15635b9166ad3c3 Mon Sep 17 00:00:00 2001 From: tolk-vm Date: Thu, 31 Oct 2024 10:51:07 +0400 Subject: [PATCH 01/12] [Tolk] Initial commit of TOLK Language: fork all sources from FunC The Tolk Language will be positioned as "next-generation FunC". It's literally a fork of a FunC compiler, introducing familiar syntax similar to TypeScript, but leaving all low-level optimizations untouched. Note, that FunC sources are partially stored in the parser/ folder (shared with TL/B). In Tolk, nothing is shared. Everything from parser/ is copied into tolk/ folder. --- .github/script/amd64-20.04.Dockerfile | 2 +- .github/script/amd64-22.04.Dockerfile | 2 +- .github/script/arm64-20.04.Dockerfile | 2 +- .github/script/arm64-22.04.Dockerfile | 2 +- .github/workflows/create-release.yml | 32 + .github/workflows/ton-arm64-macos.yml | 1 + .github/workflows/ton-x86-64-linux.yml | 1 + .github/workflows/ton-x86-64-macos.yml | 1 + .gitignore | 2 + CMakeLists.txt | 1 + assembly/native/build-macos-portable.sh | 6 +- assembly/native/build-macos-shared.sh | 6 +- assembly/native/build-ubuntu-portable.sh | 7 +- assembly/native/build-ubuntu-shared.sh | 7 +- assembly/native/build-windows-2019.bat | 5 +- assembly/native/build-windows.bat | 5 +- assembly/nix/build-linux-arm64-nix.sh | 1 + assembly/nix/build-linux-x86-64-nix.sh | 1 + assembly/nix/build-macos-nix.sh | 1 + crypto/smartcont/mathlib.tolk | 937 +++++++++++ crypto/smartcont/stdlib.tolk | 638 ++++++++ tolk/CMakeLists.txt | 51 + tolk/abscode.cpp | 526 +++++++ tolk/analyzer.cpp | 916 +++++++++++ tolk/asmops.cpp | 372 +++++ tolk/builtins.cpp | 1231 +++++++++++++++ tolk/codegen.cpp | 910 +++++++++++ tolk/gen-abscode.cpp | 449 ++++++ tolk/keywords.cpp | 126 ++ tolk/lexer.cpp | 335 ++++ tolk/lexer.h | 113 ++ tolk/optimize.cpp | 652 ++++++++ tolk/parse-tolk.cpp | 1809 ++++++++++++++++++++++ tolk/srcread.cpp | 228 +++ tolk/srcread.h | 162 ++ tolk/stack-transform.cpp | 1054 +++++++++++++ tolk/symtable.cpp | 179 +++ tolk/symtable.h | 175 +++ tolk/tolk-main.cpp | 122 ++ tolk/tolk-wasm.cpp | 148 ++ tolk/tolk.cpp | 260 ++++ tolk/tolk.h | 1785 +++++++++++++++++++++ tolk/unify-types.cpp | 429 +++++ 43 files changed, 13674 insertions(+), 18 deletions(-) create mode 100644 crypto/smartcont/mathlib.tolk create mode 100644 crypto/smartcont/stdlib.tolk create mode 100644 tolk/CMakeLists.txt create mode 100644 tolk/abscode.cpp create mode 100644 tolk/analyzer.cpp create mode 100644 tolk/asmops.cpp create mode 100644 tolk/builtins.cpp create mode 100644 tolk/codegen.cpp create mode 100644 tolk/gen-abscode.cpp create mode 100644 tolk/keywords.cpp create mode 100644 tolk/lexer.cpp create mode 100644 tolk/lexer.h create mode 100644 tolk/optimize.cpp create mode 100644 tolk/parse-tolk.cpp create mode 100644 tolk/srcread.cpp create mode 100644 tolk/srcread.h create mode 100644 tolk/stack-transform.cpp create mode 100644 tolk/symtable.cpp create mode 100644 tolk/symtable.h create mode 100644 tolk/tolk-main.cpp create mode 100644 tolk/tolk-wasm.cpp create mode 100644 tolk/tolk.cpp create mode 100644 tolk/tolk.h create mode 100644 tolk/unify-types.cpp diff --git a/.github/script/amd64-20.04.Dockerfile b/.github/script/amd64-20.04.Dockerfile index 40d980e5e..1ec89ebd7 100644 --- a/.github/script/amd64-20.04.Dockerfile +++ b/.github/script/amd64-20.04.Dockerfile @@ -17,4 +17,4 @@ ENV CC clang ENV CXX clang++ ENV CCACHE_DISABLE 1 RUN cmake -GNinja -DCMAKE_BUILD_TYPE=Release -DPORTABLE=1 -DTON_ARCH= -DCMAKE_CXX_FLAGS="-mavx2" .. -RUN ninja storage-daemon storage-daemon-cli tonlibjson blockchain-explorer fift func validator-engine validator-engine-console create-state generate-random-id create-hardfork dht-server lite-client \ No newline at end of file +RUN ninja storage-daemon storage-daemon-cli tonlibjson blockchain-explorer fift func tolk validator-engine validator-engine-console create-state generate-random-id create-hardfork dht-server lite-client diff --git a/.github/script/amd64-22.04.Dockerfile b/.github/script/amd64-22.04.Dockerfile index 44c9c40b7..6134d1673 100644 --- a/.github/script/amd64-22.04.Dockerfile +++ b/.github/script/amd64-22.04.Dockerfile @@ -17,4 +17,4 @@ ENV CC clang ENV CXX clang++ ENV CCACHE_DISABLE 1 RUN cmake -GNinja -DCMAKE_BUILD_TYPE=Release -DPORTABLE=1 -DTON_ARCH= -DCMAKE_CXX_FLAGS="-mavx2" .. -RUN ninja storage-daemon storage-daemon-cli tonlibjson blockchain-explorer fift func validator-engine validator-engine-console create-state generate-random-id create-hardfork dht-server lite-client \ No newline at end of file +RUN ninja storage-daemon storage-daemon-cli tonlibjson blockchain-explorer fift func tolk validator-engine validator-engine-console create-state generate-random-id create-hardfork dht-server lite-client diff --git a/.github/script/arm64-20.04.Dockerfile b/.github/script/arm64-20.04.Dockerfile index 1f57dc401..5e3505345 100644 --- a/.github/script/arm64-20.04.Dockerfile +++ b/.github/script/arm64-20.04.Dockerfile @@ -17,4 +17,4 @@ ENV CC clang ENV CXX clang++ ENV CCACHE_DISABLE 1 RUN cmake -GNinja -DCMAKE_BUILD_TYPE=Release -DPORTABLE=1 -DTON_ARCH= .. -RUN ninja storage-daemon storage-daemon-cli tonlibjson blockchain-explorer fift func validator-engine validator-engine-console create-state generate-random-id dht-server lite-client \ No newline at end of file +RUN ninja storage-daemon storage-daemon-cli tonlibjson blockchain-explorer fift func tolk validator-engine validator-engine-console create-state generate-random-id dht-server lite-client diff --git a/.github/script/arm64-22.04.Dockerfile b/.github/script/arm64-22.04.Dockerfile index 2b595839f..f9805849d 100644 --- a/.github/script/arm64-22.04.Dockerfile +++ b/.github/script/arm64-22.04.Dockerfile @@ -17,4 +17,4 @@ ENV CC clang ENV CXX clang++ ENV CCACHE_DISABLE 1 RUN cmake -GNinja -DCMAKE_BUILD_TYPE=Release -DPORTABLE=1 -DTON_ARCH= .. -RUN ninja storage-daemon storage-daemon-cli tonlibjson blockchain-explorer fift func validator-engine validator-engine-console create-state generate-random-id dht-server lite-client \ No newline at end of file +RUN ninja storage-daemon storage-daemon-cli tonlibjson blockchain-explorer fift func tolk validator-engine validator-engine-console create-state generate-random-id dht-server lite-client diff --git a/.github/workflows/create-release.yml b/.github/workflows/create-release.yml index 263bd9a43..ca08357c2 100644 --- a/.github/workflows/create-release.yml +++ b/.github/workflows/create-release.yml @@ -167,6 +167,14 @@ jobs: asset_name: func.exe tag: ${{ steps.tag.outputs.TAG }} + - name: Upload Windows 2019 single artifact - tolk + uses: svenstaro/upload-release-action@v2 + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + file: artifacts/ton-win-binaries/tolk.exe + asset_name: tolk.exe + tag: ${{ steps.tag.outputs.TAG }} + - name: Upload Windows 2019 single artifact - lite-client uses: svenstaro/upload-release-action@v2 with: @@ -257,6 +265,14 @@ jobs: asset_name: func-mac-x86-64 tag: ${{ steps.tag.outputs.TAG }} + - name: Upload Mac x86-64 single artifact - tolk + uses: svenstaro/upload-release-action@v2 + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + file: artifacts/ton-x86_64-macos-binaries/tolk + asset_name: tolk-mac-x86-64 + tag: ${{ steps.tag.outputs.TAG }} + - name: Upload Mac x86-64 single artifact - lite-client uses: svenstaro/upload-release-action@v2 with: @@ -348,6 +364,14 @@ jobs: asset_name: func-mac-arm64 tag: ${{ steps.tag.outputs.TAG }} + - name: Upload Mac arm64 single artifact - tolk + uses: svenstaro/upload-release-action@v2 + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + file: artifacts/ton-arm64-macos-binaries/tolk + asset_name: tolk-mac-arm64 + tag: ${{ steps.tag.outputs.TAG }} + - name: Upload Mac arm64 single artifact - lite-client uses: svenstaro/upload-release-action@v2 with: @@ -438,6 +462,14 @@ jobs: asset_name: func-linux-x86_64 tag: ${{ steps.tag.outputs.TAG }} + - name: Upload Linux x86-64 single artifact - tolk + uses: svenstaro/upload-release-action@v2 + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + file: artifacts/ton-x86_64-linux-binaries/tolk + asset_name: tolk-linux-x86_64 + tag: ${{ steps.tag.outputs.TAG }} + - name: Upload Linux x86-64 single artifact - lite-client uses: svenstaro/upload-release-action@v2 with: diff --git a/.github/workflows/ton-arm64-macos.yml b/.github/workflows/ton-arm64-macos.yml index 9e8302e80..affe22456 100644 --- a/.github/workflows/ton-arm64-macos.yml +++ b/.github/workflows/ton-arm64-macos.yml @@ -29,6 +29,7 @@ jobs: artifacts/lite-client -V artifacts/fift -V artifacts/func -V + artifacts/tolk -v - name: Upload artifacts uses: actions/upload-artifact@master diff --git a/.github/workflows/ton-x86-64-linux.yml b/.github/workflows/ton-x86-64-linux.yml index abbe1cca4..b7ef9684e 100644 --- a/.github/workflows/ton-x86-64-linux.yml +++ b/.github/workflows/ton-x86-64-linux.yml @@ -33,6 +33,7 @@ jobs: artifacts/lite-client -V artifacts/fift -V artifacts/func -V + artifacts/tolk -v - name: Upload artifacts uses: actions/upload-artifact@master diff --git a/.github/workflows/ton-x86-64-macos.yml b/.github/workflows/ton-x86-64-macos.yml index 8c71f34a1..1890dc344 100644 --- a/.github/workflows/ton-x86-64-macos.yml +++ b/.github/workflows/ton-x86-64-macos.yml @@ -29,6 +29,7 @@ jobs: artifacts/lite-client -V artifacts/fift -V artifacts/func -V + artifacts/tolk -v - name: Upload artifacts uses: actions/upload-artifact@master diff --git a/.gitignore b/.gitignore index 536918ab3..9b94834b8 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,8 @@ test/regression-tests.cache/ **/*build*/ .idea .vscode +.DS_Store +dev/ zlib/ libsodium/ libmicrohttpd-0.9.77-w32-bin/ diff --git a/CMakeLists.txt b/CMakeLists.txt index 573bc3a32..885fcef7f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -413,6 +413,7 @@ add_subdirectory(adnl) add_subdirectory(crypto) add_subdirectory(lite-client) add_subdirectory(emulator) +add_subdirectory(tolk) #BEGIN tonlib add_subdirectory(tonlib) diff --git a/assembly/native/build-macos-portable.sh b/assembly/native/build-macos-portable.sh index 0e1003b56..af82b2c01 100644 --- a/assembly/native/build-macos-portable.sh +++ b/assembly/native/build-macos-portable.sh @@ -153,7 +153,7 @@ test $? -eq 0 || { echo "Can't configure ton"; exit 1; } if [ "$with_tests" = true ]; then ninja storage-daemon storage-daemon-cli blockchain-explorer \ - tonlib tonlibjson tonlib-cli validator-engine func fift \ + tonlib tonlibjson tonlib-cli validator-engine func tolk fift \ lite-client pow-miner validator-engine-console generate-random-id json2tlo dht-server \ http-proxy rldp-http-proxy adnl-proxy create-state create-hardfork tlbc emulator \ test-ed25519 test-ed25519-crypto test-bigint test-vm test-fift test-cells test-smartcont \ @@ -162,7 +162,7 @@ if [ "$with_tests" = true ]; then test $? -eq 0 || { echo "Can't compile ton"; exit 1; } else ninja storage-daemon storage-daemon-cli blockchain-explorer \ - tonlib tonlibjson tonlib-cli validator-engine func fift \ + tonlib tonlibjson tonlib-cli validator-engine func tolk fift \ lite-client pow-miner validator-engine-console generate-random-id json2tlo dht-server \ http-proxy rldp-http-proxy adnl-proxy create-state create-hardfork tlbc emulator test $? -eq 0 || { echo "Can't compile ton"; exit 1; } @@ -173,6 +173,7 @@ strip -s storage/storage-daemon/storage-daemon-cli strip -s blockchain-explorer/blockchain-explorer strip -s crypto/fift strip -s crypto/func +strip -s tolk/tolk strip -s crypto/create-state strip -s crypto/tlbc strip -s validator-engine-console/validator-engine-console @@ -197,6 +198,7 @@ if [ "$with_artifacts" = true ]; then cp build/blockchain-explorer/blockchain-explorer artifacts/ cp build/crypto/fift artifacts/ cp build/crypto/func artifacts/ + cp build/tolk/tolk artifacts/ cp build/crypto/create-state artifacts/ cp build/crypto/tlbc artifacts/ cp build/validator-engine-console/validator-engine-console artifacts/ diff --git a/assembly/native/build-macos-shared.sh b/assembly/native/build-macos-shared.sh index 7fdcfb941..8a7399aa9 100644 --- a/assembly/native/build-macos-shared.sh +++ b/assembly/native/build-macos-shared.sh @@ -81,7 +81,7 @@ test $? -eq 0 || { echo "Can't configure ton"; exit 1; } if [ "$with_tests" = true ]; then ninja storage-daemon storage-daemon-cli blockchain-explorer \ - tonlib tonlibjson tonlib-cli validator-engine func fift \ + tonlib tonlibjson tonlib-cli validator-engine func tolk fift \ lite-client pow-miner validator-engine-console generate-random-id json2tlo dht-server \ http-proxy rldp-http-proxy adnl-proxy create-state create-hardfork tlbc emulator \ test-ed25519 test-ed25519-crypto test-bigint test-vm test-fift test-cells test-smartcont \ @@ -90,7 +90,7 @@ if [ "$with_tests" = true ]; then test $? -eq 0 || { echo "Can't compile ton"; exit 1; } else ninja storage-daemon storage-daemon-cli blockchain-explorer \ - tonlib tonlibjson tonlib-cli validator-engine func fift \ + tonlib tonlibjson tonlib-cli validator-engine func tolk fift \ lite-client pow-miner validator-engine-console generate-random-id json2tlo dht-server \ http-proxy rldp-http-proxy adnl-proxy create-state create-hardfork tlbc emulator test $? -eq 0 || { echo "Can't compile ton"; exit 1; } @@ -102,6 +102,7 @@ strip -s storage/storage-daemon/storage-daemon-cli strip -s blockchain-explorer/blockchain-explorer strip -s crypto/fift strip -s crypto/func +strip -s tolk/tolk strip -s crypto/create-state strip -s crypto/tlbc strip -s validator-engine-console/validator-engine-console @@ -126,6 +127,7 @@ if [ "$with_artifacts" = true ]; then cp build/blockchain-explorer/blockchain-explorer artifacts/ cp build/crypto/fift artifacts/ cp build/crypto/func artifacts/ + cp build/tolk/tolk artifacts/ cp build/crypto/create-state artifacts/ cp build/crypto/tlbc artifacts/ cp build/validator-engine-console/validator-engine-console artifacts/ diff --git a/assembly/native/build-ubuntu-portable.sh b/assembly/native/build-ubuntu-portable.sh index 73ae59264..8ae977e0b 100644 --- a/assembly/native/build-ubuntu-portable.sh +++ b/assembly/native/build-ubuntu-portable.sh @@ -144,7 +144,7 @@ cmake -GNinja .. \ test $? -eq 0 || { echo "Can't configure ton"; exit 1; } if [ "$with_tests" = true ]; then -ninja storage-daemon storage-daemon-cli fift func tonlib tonlibjson tonlib-cli \ +ninja storage-daemon storage-daemon-cli fift func tolk tonlib tonlibjson tonlib-cli \ validator-engine lite-client pow-miner validator-engine-console blockchain-explorer \ generate-random-id json2tlo dht-server http-proxy rldp-http-proxy \ adnl-proxy create-state emulator test-ed25519 test-ed25519-crypto test-bigint \ @@ -153,7 +153,7 @@ ninja storage-daemon storage-daemon-cli fift func tonlib tonlibjson tonlib-cli \ test-fec test-tddb test-db test-validator-session-state test-emulator test $? -eq 0 || { echo "Can't compile ton"; exit 1; } else -ninja storage-daemon storage-daemon-cli fift func tonlib tonlibjson tonlib-cli \ +ninja storage-daemon storage-daemon-cli fift func tolk tonlib tonlibjson tonlib-cli \ validator-engine lite-client pow-miner validator-engine-console blockchain-explorer \ generate-random-id json2tlo dht-server http-proxy rldp-http-proxy \ adnl-proxy create-state emulator @@ -166,6 +166,7 @@ strip -s storage/storage-daemon/storage-daemon \ crypto/fift \ crypto/tlbc \ crypto/func \ + tolk/tolk \ crypto/create-state \ validator-engine-console/validator-engine-console \ tonlib/tonlib-cli \ @@ -195,7 +196,7 @@ if [ "$with_artifacts" = true ]; then mkdir artifacts mv build/tonlib/libtonlibjson.so.0.5 build/tonlib/libtonlibjson.so cp build/storage/storage-daemon/storage-daemon build/storage/storage-daemon/storage-daemon-cli \ - build/crypto/fift build/crypto/tlbc build/crypto/func build/crypto/create-state build/blockchain-explorer/blockchain-explorer \ + build/crypto/fift build/crypto/tlbc build/crypto/func build/tolk/tolk build/crypto/create-state build/blockchain-explorer/blockchain-explorer \ build/validator-engine-console/validator-engine-console build/tonlib/tonlib-cli \ build/tonlib/libtonlibjson.so build/http/http-proxy build/rldp-http-proxy/rldp-http-proxy \ build/dht-server/dht-server build/lite-client/lite-client build/validator-engine/validator-engine \ diff --git a/assembly/native/build-ubuntu-shared.sh b/assembly/native/build-ubuntu-shared.sh index 00b9aa9b4..6b1841cdf 100644 --- a/assembly/native/build-ubuntu-shared.sh +++ b/assembly/native/build-ubuntu-shared.sh @@ -52,7 +52,7 @@ cmake -GNinja -DTON_USE_JEMALLOC=ON .. \ test $? -eq 0 || { echo "Can't configure ton"; exit 1; } if [ "$with_tests" = true ]; then -ninja storage-daemon storage-daemon-cli fift func tonlib tonlibjson tonlib-cli \ +ninja storage-daemon storage-daemon-cli fift func tolk tonlib tonlibjson tonlib-cli \ validator-engine lite-client pow-miner validator-engine-console blockchain-explorer \ generate-random-id json2tlo dht-server http-proxy rldp-http-proxy \ adnl-proxy create-state emulator test-ed25519 test-ed25519-crypto test-bigint \ @@ -61,7 +61,7 @@ ninja storage-daemon storage-daemon-cli fift func tonlib tonlibjson tonlib-cli \ test-fec test-tddb test-db test-validator-session-state test-emulator test $? -eq 0 || { echo "Can't compile ton"; exit 1; } else -ninja storage-daemon storage-daemon-cli fift func tonlib tonlibjson tonlib-cli \ +ninja storage-daemon storage-daemon-cli fift func tolk tonlib tonlibjson tonlib-cli \ validator-engine lite-client pow-miner validator-engine-console blockchain-explorer \ generate-random-id json2tlo dht-server http-proxy rldp-http-proxy \ adnl-proxy create-state emulator @@ -74,6 +74,7 @@ strip -s storage/storage-daemon/storage-daemon \ crypto/fift \ crypto/tlbc \ crypto/func \ + tolk/tolk \ crypto/create-state \ validator-engine-console/validator-engine-console \ tonlib/tonlib-cli \ @@ -105,7 +106,7 @@ if [ "$with_artifacts" = true ]; then mkdir artifacts mv build/tonlib/libtonlibjson.so.0.5 build/tonlib/libtonlibjson.so cp build/storage/storage-daemon/storage-daemon build/storage/storage-daemon/storage-daemon-cli \ - build/crypto/fift build/crypto/tlbc build/crypto/func build/crypto/create-state build/blockchain-explorer/blockchain-explorer \ + build/crypto/fift build/crypto/tlbc build/crypto/func build/tolk/tolk build/crypto/create-state build/blockchain-explorer/blockchain-explorer \ build/validator-engine-console/validator-engine-console build/tonlib/tonlib-cli \ build/tonlib/libtonlibjson.so build/http/http-proxy build/rldp-http-proxy/rldp-http-proxy \ build/dht-server/dht-server build/lite-client/lite-client build/validator-engine/validator-engine \ diff --git a/assembly/native/build-windows-2019.bat b/assembly/native/build-windows-2019.bat index f728b88f8..fdfb6bcf6 100644 --- a/assembly/native/build-windows-2019.bat +++ b/assembly/native/build-windows-2019.bat @@ -155,7 +155,7 @@ IF %errorlevel% NEQ 0 ( ) IF "%1"=="-t" ( -ninja storage-daemon storage-daemon-cli blockchain-explorer fift func tonlib tonlibjson ^ +ninja storage-daemon storage-daemon-cli blockchain-explorer fift func tolk tonlib tonlibjson ^ tonlib-cli validator-engine lite-client pow-miner validator-engine-console generate-random-id ^ json2tlo dht-server http-proxy rldp-http-proxy adnl-proxy create-state create-hardfork emulator ^ test-ed25519 test-ed25519-crypto test-bigint test-vm test-fift test-cells test-smartcont test-net ^ @@ -166,7 +166,7 @@ IF %errorlevel% NEQ 0 ( exit /b %errorlevel% ) ) else ( -ninja storage-daemon storage-daemon-cli blockchain-explorer fift func tonlib tonlibjson ^ +ninja storage-daemon storage-daemon-cli blockchain-explorer fift func tolk tonlib tonlibjson ^ tonlib-cli validator-engine lite-client pow-miner validator-engine-console generate-random-id ^ json2tlo dht-server http-proxy rldp-http-proxy adnl-proxy create-state create-hardfork emulator IF %errorlevel% NEQ 0 ( @@ -204,6 +204,7 @@ build\blockchain-explorer\blockchain-explorer.exe ^ build\crypto\fift.exe ^ build\crypto\tlbc.exe ^ build\crypto\func.exe ^ +build\tolk\tolk.exe ^ build\crypto\create-state.exe ^ build\validator-engine-console\validator-engine-console.exe ^ build\tonlib\tonlib-cli.exe ^ diff --git a/assembly/native/build-windows.bat b/assembly/native/build-windows.bat index aa0fd69ad..e1ce9e473 100644 --- a/assembly/native/build-windows.bat +++ b/assembly/native/build-windows.bat @@ -156,7 +156,7 @@ IF %errorlevel% NEQ 0 ( ) IF "%1"=="-t" ( -ninja storage-daemon storage-daemon-cli blockchain-explorer fift func tonlib tonlibjson ^ +ninja storage-daemon storage-daemon-cli blockchain-explorer fift func tolk tonlib tonlibjson ^ tonlib-cli validator-engine lite-client pow-miner validator-engine-console generate-random-id ^ json2tlo dht-server http-proxy rldp-http-proxy adnl-proxy create-state create-hardfork emulator ^ test-ed25519 test-ed25519-crypto test-bigint test-vm test-fift test-cells test-smartcont test-net ^ @@ -167,7 +167,7 @@ IF %errorlevel% NEQ 0 ( exit /b %errorlevel% ) ) else ( -ninja storage-daemon storage-daemon-cli blockchain-explorer fift func tonlib tonlibjson ^ +ninja storage-daemon storage-daemon-cli blockchain-explorer fift func tolk tonlib tonlibjson ^ tonlib-cli validator-engine lite-client pow-miner validator-engine-console generate-random-id ^ json2tlo dht-server http-proxy rldp-http-proxy adnl-proxy create-state create-hardfork emulator IF %errorlevel% NEQ 0 ( @@ -205,6 +205,7 @@ build\blockchain-explorer\blockchain-explorer.exe ^ build\crypto\fift.exe ^ build\crypto\tlbc.exe ^ build\crypto\func.exe ^ +build\tolk\tolk.exe ^ build\crypto\create-state.exe ^ build\validator-engine-console\validator-engine-console.exe ^ build\tonlib\tonlib-cli.exe ^ diff --git a/assembly/nix/build-linux-arm64-nix.sh b/assembly/nix/build-linux-arm64-nix.sh index 2c7df521f..565b1d253 100644 --- a/assembly/nix/build-linux-arm64-nix.sh +++ b/assembly/nix/build-linux-arm64-nix.sh @@ -43,6 +43,7 @@ sudo strip -s storage-daemon \ fift \ tlbc \ func \ + tolk \ create-state \ validator-engine-console \ tonlib-cli \ diff --git a/assembly/nix/build-linux-x86-64-nix.sh b/assembly/nix/build-linux-x86-64-nix.sh index ae478ec2d..e6a3aef07 100644 --- a/assembly/nix/build-linux-x86-64-nix.sh +++ b/assembly/nix/build-linux-x86-64-nix.sh @@ -43,6 +43,7 @@ sudo strip -s storage-daemon \ fift \ tlbc \ func \ + tolk \ create-state \ validator-engine-console \ tonlib-cli \ diff --git a/assembly/nix/build-macos-nix.sh b/assembly/nix/build-macos-nix.sh index c92eddb28..0ada59a4a 100644 --- a/assembly/nix/build-macos-nix.sh +++ b/assembly/nix/build-macos-nix.sh @@ -43,6 +43,7 @@ sudo strip -xSX storage-daemon \ fift \ tlbc \ func \ + tolk \ create-state \ validator-engine-console \ tonlib-cli \ diff --git a/crypto/smartcont/mathlib.tolk b/crypto/smartcont/mathlib.tolk new file mode 100644 index 000000000..d4fea6095 --- /dev/null +++ b/crypto/smartcont/mathlib.tolk @@ -0,0 +1,937 @@ +{- + - + - Tolk fixed-point mathematical library + - (initially copied from mathlib.fc) + - + -} + +{- + This file is part of TON Tolk Standard Library. + + Tolk Standard Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Tolk Standard Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + +-} + +{---------------- HIGH-LEVEL FUNCTION DECLARATIONS -----------------} +{- + Most functions declared here work either with integers or with fixed-point numbers of type `fixed248`. + `fixedNNN` informally denotes an alias for type `int` used to represent fixed-point numbers with scale 2^NNN. + Prefix `fixedNNN::` is prepended to the names of high-level functions that accept arguments and return values of type `fixedNNN`. +-} + +{- function declarations have been commented out, otherwise they are not inlined by the current Tolk compiler + +;; nearest integer to sqrt(a*b) for non-negative integers or fixed-point numbers a and b +int geom_mean(int a, int b) inline_ref; +;; integer square root +int sqrt(int a) inline; +;; fixed-point square root +;; fixed248 sqrt(fixed248 x) +int fixed248::sqrt(int x) inline; + +int fixed248::sqr(int x) inline; +const int fixed248::One; + +;; log(2) as fixed248 +int fixed248::log2_const() inline; +;; Pi as fixed248 +int fixed248::Pi_const() inline; + +;; fixed248 exp(fixed248 x) +int fixed248::exp(int x) inline_ref; +;; fixed248 exp2(fixed248 x) +int fixed248::exp2(int x) inline_ref; + +;; fixed248 log(fixed248 x) +int fixed248::log(int x) inline_ref; +;; fixed248 log2(fixed248 x) +int fixed248::log2(int x) inline; + +;; fixed248 pow(fixed248 x, fixed248 y) +int fixed248::pow(int x, int y) inline_ref; + +;; (fixed248, fixed248) sincos(fixed248 x); +(int, int) fixed248::sincos(int x) inline_ref; +;; fixed248 sin(fixed248 x); +int fixed248::sin(int x) inline; +;; fixed248 cos(fixed248 x); +int fixed248::cos(int x) inline; +;; fixed248 tan(fixed248 x); +int fixed248::tan(int x) inline_ref; +;; fixed248 cot(fixed248 x); +int fixed248::cot(int x) inline_ref; + + +;; fixed248 asin(fixed248 x); +int fixed248::asin(int x) inline; +;; fixed248 acos(fixed248 x); +int fixed248::acos(int x) inline; +;; fixed248 atan(fixed248 x); +int fixed248::atan(int x) inline_ref; +;; fixed248 acot(fixed248 x); +int fixed248::acot(int x) inline_ref; + +;; random number uniformly distributed in [0..1) +;; fixed248 random(); +int fixed248::random() impure inline; +;; random number with standard normal distribution (2100 gas on average) +;; fixed248 nrand(); +int fixed248::nrand() impure inline; +;; generates a random number approximately distributed according to the standard normal distribution (1200 gas) +;; (fails chi-squared test, but it is shorter and faster than fixed248::nrand()) +;; fixed248 nrand_fast(); +int fixed248::nrand_fast() impure inline; + +-} ;; end (declarations) + +{-------------------- INTERMEDIATE FUNCTIONS -----------------------} + +{- + Intermediate functions are used in the implementations of high-level `fixedNNN::...` functions + if necessary, they can be used to define additional high-level functions for other fixed-point types, such as fixed128, outside this library. They can be also used in a hypothetical floating-point Tolk library. + For these reasons, the declarations of these functions are collected here. +-} + +{- function declarations have been commented out, otherwise they are not inlined by the current Tolk compiler + +;; fixed258 tanh(fixed258 x, int steps); +int tanh_f258(int x, int n); + +;; computes exp(x)-1 for |x| <= log(2)/2. +;; fixed257 expm1(fixed257 x); +int expm1_f257(int x); + +;; computes (sin(x+xe),-cos(x+xe)) for |x| <= Pi/4, xe very small +;; this function is very accurate, error less than 0.7 ulp (consumes ~ 5500 gas) +;; (fixed256, fixed256) sincosn(fixed256 x, fixed259 xe) +(int, int) sincosn_f256(int x, int xe); + +;; compute (sin(x),1-cos(x)) in fixed256 for |x| < 16*atan(1/16) = 0.9987 +;; (fixed256, fixed257) sincosm1_f256(fixed256 x); +;; slightly less accurate than sincosn_f256() (error up to 3/2^256), but faster (~ 4k gas) and shorter +(int, int) sincosm1_f256(int x); + +;; compute (p, q) such that p/q = tan(x) for |x|<2*atan(1/2)=1899/2048=0.927 +;; (int, int) tan_aux(fixed256 x); +(int, int) tan_aux_f256(int x); + +;; returns (y, s) such that log(x) = y/2^256 + s*log(2) for positive integer x +;; this function is very precise (error less than 0.6 ulp) and consumes < 7k gas +;; (fixed256, int) log_aux_f256(int x); +(int, int) log_aux_f256(int x); + +;; returns (y, s) such that log2(x) = y/2^256 + s for positive integer x +;; this function is very precise (error less than 0.6 ulp) and consumes < 7k gas +;; (fixed256, int) log2_aux_f256(int x); +(int, int) log2_aux_f256(int x); + +;; compute (q, z) such that atan(x)=q*atan(1/32)+z for -1 <= x < 1 +;; this function is reasonably accurate (error < 7 ulp with ulp = 2^-261), but it consumes >7k gas +;; this is sufficient for most purposes +;; (int, fixed261) atan_aux(fixed256 x) +(int, int) atan_aux_f256(int x); + +;; fixed255 atan(fixed255 x); +int atan_f255(int x); + +;; for -1 <= x < 1 only +;; fixed256 atan_small(fixed256 x); +int atan_f256_small(int x); + +;; fixed255 asin(fixed255 x); +int asin_f255(int x); + +;; fixed254 acos(fixed255 x); +int acos_f255(int x); + +;; generates normally distributed pseudo-random number +;; fixed252 nrand(); +int nrand_f252(int x); + +;; a faster and shorter variant of nrand_f252() that fails chi-squared test +;; (should suffice for most purposes) +;; fixed252 nrand_fast(); +int nrand_fast_f252(int x); + +-} ;; end (declarations) + +{---------------- MISSING OPERATIONS AND BUILT-INS -----------------} + +int sgn(int x) asm "SGN"; + +;; compute floor(log2(x))+1 +int log2_floor_p1(int x) asm "UBITSIZE"; + +int mulrshiftr(int x, int y, int s) asm "MULRSHIFTR"; +int mulrshiftr256(int x, int y) asm "256 MULRSHIFTR#"; +(int, int) mulrshift256mod(int x, int y) asm "256 MULRSHIFT#MOD"; +(int, int) mulrshiftr256mod(int x, int y) asm "256 MULRSHIFTR#MOD"; +(int, int) mulrshiftr255mod(int x, int y) asm "255 MULRSHIFTR#MOD"; +(int, int) mulrshiftr248mod(int x, int y) asm "248 MULRSHIFTR#MOD"; +(int, int) mulrshiftr5mod(int x, int y) asm "5 MULRSHIFTR#MOD"; +(int, int) mulrshiftr6mod(int x, int y) asm "6 MULRSHIFTR#MOD"; +(int, int) mulrshiftr7mod(int x, int y) asm "7 MULRSHIFTR#MOD"; + +int lshift256divr(int x, int y) asm "256 LSHIFT#DIVR"; +(int, int) lshift256divmodr(int x, int y) asm "256 LSHIFT#DIVMODR"; +(int, int) lshift255divmodr(int x, int y) asm "255 LSHIFT#DIVMODR"; +(int, int) lshift2divmodr(int x, int y) asm "2 LSHIFT#DIVMODR"; +(int, int) lshift7divmodr(int x, int y) asm "7 LSHIFT#DIVMODR"; +(int, int) lshiftdivmodr(int x, int y, int s) asm "LSHIFTDIVMODR"; + +(int, int) rshiftr256mod(int x) asm "256 RSHIFTR#MOD"; +(int, int) rshiftr248mod(int x) asm "248 RSHIFTR#MOD"; +(int, int) rshiftr4mod(int x) asm "4 RSHIFTR#MOD"; +(int, int) rshift3mod(int x) asm "3 RSHIFT#MOD"; + +;; computes y - x (Tolk compiler does not try to use this by itself) +int sub_rev(int x, int y) asm "SUBR"; + +int nan() asm "PUSHNAN"; +int is_nan(int x) asm "ISNAN"; + +{------------------------ SQUARE ROOTS ----------------------------} + +;; computes sqrt(a*b) exactly rounded to the nearest integer +;; for all 0 <= a, b <= 2^256-1 +;; may be used with b=1 or b=scale of fixed-point numbers +int geom_mean(int a, int b) inline_ref { + ifnot (min(a, b)) { + return 0; + } + int s = log2_floor_p1(a); ;; throws out of range error if a < 0 or b < 0 + int t = log2_floor_p1(b); + ;; NB: (a-b)/2+b == (a+b)/2, but without overflow for large a and b + int x = (s == t ? (a - b) / 2 + b : 1 << ((s + t) / 2)); + do { + ;; if always used with b=2^const, may be optimized to "const LSHIFTDIVC#" + ;; it is important to use `muldivc` here, not `muldiv` or `muldivr` + int q = (muldivc(a, b, x) - x) / 2; + x += q; + } until (q == 0); + return x; +} + +;; integer square root, computes round(sqrt(a)) for all a>=0. +;; note: `inline` is better than `inline_ref` for such simple functions +int sqrt(int a) inline { + return geom_mean(a, 1); +} + +;; version for fixed248 = fixed-point numbers with scale 2^248 +;; fixed248 sqrt(fixed248 x) +int fixed248::sqrt(int x) inline { + return geom_mean(x, 1 << 248); +} + +;; fixed255 sqrt(fixed255 x) +int fixed255::sqrt(int x) inline { + return geom_mean(x, 1 << 255); +} + +;; fixed248 sqr(fixed248 x); +int fixed248::sqr(int x) inline { + return muldivr(x, x, 1 << 248); +} + +;; fixed255 sqr(fixed255 x); +int fixed255::sqr(int x) inline { + return muldivr(x, x, 1 << 255); +} + +const int fixed248::One = (1 << 248); +const int fixed255::One = (1 << 255); + +{-------------------- USEFUL CONSTANTS --------------------} + +;; store huge constants in inline_ref functions for reuse +;; (y,z) where y=round(log(2)*2^256), z=round((log(2)*2^256-y)*2^128) +;; then log(2) = y/2^256 + z/2^384 +(int, int) log2_xconst_f256() inline_ref { + return (80260960185991308862233904206310070533990667611589946606122867505419956976172, -32272921378999278490133606779486332143); +} + +;; (y,z) where Pi = y/2^254 + z/2^382 +(int, int) Pi_xconst_f254() inline_ref { + return (90942894222941581070058735694432465663348344332098107489693037779484723616546, 108051869516004014909778934258921521947); +} + +;; atan(1/16) as fixed260 +int Atan1_16_f260() inline_ref { + return 115641670674223639132965820642403718536242645001775371762318060545014644837101; ;; true value is ...101.0089... +} + +;; atan(1/8) as fixed259 +int Atan1_8_f259() inline_ref { + return 115194597005316551477397594802136977648153890007566736408151129975021336532841; ;; correction -0.1687... +} + +;; atan(1/32) as fixed261 +int Atan1_32_f261() inline_ref { + return 115754418570128574501879331591757054405465733718902755858991306434399246026247; ;; correction 0.395... +} + +;; inline is better than inline_ref for such very small functions +int log2_const_f256() inline { + (int c, _) = log2_xconst_f256(); + return c; +} + +int fixed248::log2_const() inline { + return log2_const_f256() ~>> 8; +} + +int Pi_const_f254() inline { + (int c, _) = Pi_xconst_f254(); + return c; +} + +int fixed248::Pi_const() inline { + return Pi_const_f254() ~>> 6; +} + +{--------------- HYPERBOLIC TANGENT AND EXPONENT -------------------} + +;; hyperbolic tangent of small x via n+2 terms of Lambert's continued fraction +;; n=17: good for |x| < log(2)/4 = 0.173 +;; fixed258 tanh_f258(fixed258 x, int n) +int tanh_f258(int x, int n) inline_ref { + int x2 = muldivr(x, x, 1 << 255); ;; x^2 as fixed261 + int c = int a = (2 * n + 5) << 250; ;; a=2n+5 as fixed250 + int Two = (1 << 251); ;; 2. as fixed250 + repeat (n) { + a = (c -= Two) + muldivr(x2, 1 << 239, a); ;; a := 2k+1+x^2/a as fixed250, k=n+1,n,...,2 + } + a = (touch(3) << 254) + muldivr(x2, 1 << 243, a); ;; a := 3+x^2/a as fixed254 + ;; y = x/(1+a') = x - x*a'/(1+a') = x - x*x^2/(a+x^2) where a' = x^2/a + return x - (muldivr(x, x2, a + (x2 ~>> 7)) ~>> 7); +} + +;; fixed257 expm1_f257(fixed257 x) +;; computes exp(x)-1 for small x via 19 terms of Lambert's continued fraction for tanh(x/2) +;; good for |x| < log(2)/2 = 0.347 (n=17); consumes ~3500 gas +int expm1_f257(int x) inline_ref { + ;; (almost) compute tanh(x/2) first; x/2 as fixed258 = x as fixed257 + int x2 = muldivr(x, x, 1 << 255); ;; x^2 as fixed261 + int Two = (1 << 251); ;; 2. as fixed250 + int c = int a = touch(39) << 250; ;; a=2n+5 as fixed250 + repeat (17) { + a = (c -= Two) + muldivr(x2, 1 << 239, a); ;; a := 2k+1+x^2/a as fixed250, k=n+1,n,...,2 + } + a = (touch(3) << 254) + muldivr(x2, 1 << 243, a); ;; a := 3+x^2/a as fixed254 + ;; now tanh(x/2) = x/(1+a') where a'=x^2/a ; apply exp(x)-1=2*tanh(x/2)/(1-tanh(x/2)) + int t = (x ~>> 4) - a; ;; t:=x-a as fixed254 + return x - muldivr(x2, t / 2, a + mulrshiftr256(x, t) ~/ 4) ~/ 4; ;; x - x^2 * (x-a) / (a + x*(x-a)) +} + +;; expm1_f257() may be used to implement specific fixed-point exponentials +;; example: +;; fixed248 exp(fixed248 x) +int fixed248::exp(int x) inline_ref { + var (l2c, l2d) = log2_xconst_f256(); + ;; divide x by log(2) and convert to fixed257 + ;; (int q, x) = muldivmodr(x, 256, l2c); ;; unfortunately, no such built-in + (int q, x) = lshiftdivmodr(x, l2c, 8); + x = 2 * x - muldivr(q, l2d, 1 << 127); + int y = expm1_f257(x); + ;; result is (1 + y) * (2^q) --> ((1 << 257) + y) >> (9 - q) + return (y ~>> (9 - q)) - (-1 << (248 + q)); + ;; note that (y ~>> (9 - q)) + (1 << (248 + q)) leads to overflow when q=8 +} + +;; compute 2^x in fixed248 +;; fixed248 exp2(fixed248 x) +int fixed248::exp2(int x) inline_ref { + ;; (int q, x) = divmodr(x, 1 << 248); ;; no such built-in + (int q, x) = rshiftr248mod(x); + x = muldivr(x, log2_const_f256(), 1 << 247); + int y = expm1_f257(x); + return (y ~>> (9 - q)) - (-1 << (248 + q)); +} + +{--------------------- TRIGONOMETRIC FUNCTIONS -----------------------} + +;; fixed260 tan(fixed260 x); +;; computes tan(x) for small |x|> 10)) ~>> 9); +} + +;; fixed260 tan(fixed260 x); +int tan_f260(int x) inline_ref { + return tan_f260_inlined(x); +} + +;; fixed258 tan(fixed258 x); +;; computes tan(x) for small |x|> 6)) ~>> 5); +} + +;; fixed258 tan(fixed258 x); +int tan_f258(int x) inline_ref { + return tan_f258_inlined(x); +} + +;; (fixed259, fixed263) sincosm1(fixed259 x) +;; computes (sin(x), 1-cos(x)) for small |x|<2*atan(1/16) +(int, int) sincosm1_f259_inlined(int x) inline { + int t = tan_f260_inlined(x); ;; t=tan(x/2) as fixed260 + int tt = mulrshiftr256(t, t); ;; t^2 as fixed264 + int y = tt ~/ 512 + (1 << 255); ;; 1+t^2 as fixed255 + ;; 2*t/(1+t^2) as fixed259 and 2*t^2/(1+t^2) as fixed263 + ;; return (muldivr(t, 1 << 255, y), muldivr(tt, 1 << 255, y)); + return (t - muldivr(t / 2, tt, y) ~/ 256, tt - muldivr(tt / 2, tt, y) ~/ 256); +} + +(int, int) sincosm1_f259(int x) inline_ref { + return sincosm1_f259_inlined(x); +} + +;; computes (sin(x+xe),-cos(x+xe)) for |x| <= Pi/4, xe very small +;; this function is very accurate, error less than 0.7 ulp (consumes ~ 5500 gas) +;; (fixed256, fixed256) sincosn(fixed256 x, fixed259 xe) +(int, int) sincosn_f256(int x, int xe) inline_ref { + ;; var (q, x1) = muldivmodr(x, 8, Atan1_8_f259()); ;; no muldivmodr() builtin + var (q, x1) = lshift2divmodr(abs(x), Atan1_8_f259()); ;; reduce mod theta where theta=2*atan(1/8) + var (si, co) = sincosm1_f259(x1 * 2 + xe); + var (a, b, c) = (-1, 0, 1); + repeat (q) { ;; (a+b*I) *= (8+I)^2 = 63+16*I + (a, b, c) = (63 * a - 16 * b, 16 * a + 63 * b, 65 * c); + } + ;; now a/c = cos(q*theta), b/c = sin(q*theta) exactly(!) + ;; compute (a+b*I)*(1-co+si*I)/c + ;; (b, a) = (lshift256divr(b, c), lshift256divr(a, c)); + (b, int br) = lshift256divmodr(b, c); br = muldivr(br, 128, c); + (a, int ar) = lshift256divmodr(a, c); ar = muldivr(ar, 128, c); + return (sgn(x) * (((mulrshiftr256(b, co) - br) ~/ 16 - mulrshiftr256(a, si)) ~/ 8 - b), + a - ((mulrshiftr256(a, co) - ar) ~/ 16 + mulrshiftr256(b, si)) ~/ 8); +} + +;; compute (sin(x),1-cos(x)) in fixed256 for |x| < 16*atan(1/16) = 0.9987 +;; (fixed256, fixed257) sincosm1_f256(fixed256 x); +;; slightly less accurate than sincosn_f256() (error up to 3/2^256), but faster (~ 4k gas) and shorter +(int, int) sincosm1_f256(int x) inline_ref { + var (si, co) = sincosm1_f259_inlined(x); ;; compute (sin,1-cos)(x/8) in (fixed259,fixed263) + int r = 7; + repeat (r / 2) { + ;; 1-cos(2*x) = 2*sin(x)^2, sin(2*x) = 2*sin(x)*cos(x) + (co, si) = (mulrshiftr256(si, si), si - (mulrshiftr256(si, co) ~>> r)); + r -= 2; + } + return (si, co); +} + +;; compute (p, q) such that p/q = tan(x) for |x|<2*atan(1/2)=1899/2048=0.927 +;; (int, int) tan_aux(fixed256 x); +(int, int) tan_aux_f256(int x) inline_ref { + int t = tan_f258_inlined(x); ;; t=tan(x/4) as fixed258 + ;; t:=2*t/(1-t^2)=2*(t-t^3/(t^2-1)) + int tt = mulrshiftr256(t, t); ;; t^2 as fixed260 + t = muldivr(t, tt, tt ~/ 16 + (-1 << 256)) ~/ 16 - t; ;; now t=-tan(x/2) as fixed259 + return (t, mulrshiftr256(t, t) ~/ 4 + (-1 << 256)); ;; return (2*t, t^2-1) as fixed256 +} + +;; sincosm1_f256() and sincosn_f256() may be used to implement trigonometric functions for different fixed-point types +;; example: +;; (fixed248, fixed248) sincos(fixed248 x); +(int, int) fixed248::sincos(int x) inline_ref { + var (Pic, Pid) = Pi_xconst_f254(); + ;; (int q, x) = muldivmodr(x, 128, Pic); ;; no muldivmodr() builtin + (int q, x) = lshift7divmodr(x, Pic); ;; reduce mod Pi/2 + x = 2 * x - muldivr(q, Pid, 1 << 127); + (int si, int co) = sincosm1_f256(x); ;; doesn't make sense to use more accurate sincosn_f256() + co = (1 << 248) - (co ~>> 9); + si ~>>= 8; + repeat (q & 3) { + (si, co) = (co, - si); + } + return (si, co); +} + +;; fixed248 sin(fixed248 x); +;; inline is better than inline_ref for such simple functions +int fixed248::sin(int x) inline { + (int si, _) = fixed248::sincos(x); + return si; +} + +;; fixed248 cos(fixed248 x); +int fixed248::cos(int x) inline { + (_, int co) = fixed248::sincos(x); + return co; +} + +;; similarly, tan_aux_f256() may be used to implement tan() and cot() for specific fixed-point formats +;; fixed248 tan(fixed248 x); +;; not very accurate when |tan(x)| is very large (difficult to do better without floating-point numbers) +;; however, the relative accuracy is approximately 2^-247 in all cases, which is good enough for arguments given up to 2^-249 +int fixed248::tan(int x) inline_ref { + var (Pic, Pid) = Pi_xconst_f254(); + ;; (int q, x) = muldivmodr(x, 128, Pic); ;; no muldivmodr() builtin + (int q, x) = lshift7divmodr(x, Pic); ;; reduce mod Pi/2 + x = 2 * x - muldivr(q, Pid, 1 << 127); + var (a, b) = tan_aux_f256(x); ;; now a/b = tan(x') + if (q & 1) { + (a, b) = (b, - a); + } + return muldivr(a, 1 << 248, b); ;; either -b/a or a/b as fixed248 +} + +;; fixed248 cot(fixed248 x); +int fixed248::cot(int x) inline_ref { + var (Pic, Pid) = Pi_xconst_f254(); + (int q, x) = lshift7divmodr(x, Pic); ;; reduce mod Pi/2 + x = 2 * x - muldivr(q, Pid, 1 << 127); + var (b, a) = tan_aux_f256(x); ;; now b/a = tan(x') + if (q & 1) { + (a, b) = (b, - a); + } + return muldivr(a, 1 << 248, b); ;; either -b/a or a/b as fixed248 +} + +{----------------- INVERSE HYPERBOLIC TANGENT AND LOGARITHMS -----------------} + +;; inverse hyperbolic tangent of small x, evaluated by means of n terms of the continued fraction +;; valid for |x| < 2^-2.5 ~ 0.18 if n=37 (slightly less accurate with n=36) +;; |x| < 1/8 if n=32; |x| < 2^-3.5 if n=28; |x| < 1/16 if n=25 +;; |x| < 2^-4.5 if n=23; |x| < 1/32 if n=21; |x| < 1/64 if n=18 +;; fixed258 atanh(fixed258 x); +int atanh_f258(int x, int n) inline_ref { + int x2 = mulrshiftr256(x, x); ;; x^2 as fixed260 + int One = (1 << 254); + int a = One ~/ n + (1 << 255); ;; a := 2 + 1/n as fixed254 + repeat (n - 1) { + ;; a := 1 + (1 - x^2 / a)(1 + 1/n) as fixed254 + int t = One - muldivr(x2, 1 << 248, a); ;; t := 1 - x^2 / a + a = muldivr(t, n, (int n1 = n - 1)) + One; + n = n1; + } + ;; x / (1 - x^2 / a) = x / (1 - d) = x + x * d / (1 - d) for d = x^2 / a + ;; int d = muldivr(x2, 1 << 255, a - (x2 ~>> 6)); ;; d/(1-d) = x^2/(a-x^2) as fixed261 + ;; return x + (mulrshiftr256(x, d) ~>> 5); + return x + muldivr(x, x2 / 2, a - x2 ~/ 64) ~/ 32; +} + +;; number of terms n should be chosen as for atanh_f258() +;; fixed261 atanh(fixed261 x); +int atanh_f261_inlined(int x, int n) inline { + int x2 = mulrshiftr256(x, x); ;; x^2 as fixed266 + int One = (1 << 254); + int a = One ~/ n + (1 << 255); ;; a := 2 + 1/n as fixed254 + repeat (n - 1) { + ;; a := 1 + (1 - x^2 / a)(1 + 1/n) as fixed254 + int t = One - muldivr(x2, 1 << 242, a); ;; t := 1 - x^2 / a + a = muldivr(t, n, (int n1 = n - 1)) + One; + n = n1; + } + ;; x / (1 - x^2 / a) = x / (1 - d) = x + x * d / (1 - d) for d = x^2 / a + ;; int d = muldivr(x2, 1 << 255, a - (x2 ~>> 12)); ;; d/(1-d) = x^2/(a-x^2) as fixed267 + ;; return x + (mulrshiftr256(x, d) ~>> 11); + return x + muldivr(x, x2, a - x2 ~/ 4096) ~/ 4096; +} + +;; fixed261 atanh(fixed261 x); +int atanh_f261(int x, int n) inline_ref { + return atanh_f261_inlined(x, n); +} + +;; returns (y, s) such that log(x) = y/2^257 + s*log(2) for positive integer x +;; (fixed257, int) log_aux(int x) +(int, int) log_aux_f257(int x) inline_ref { + int s = log2_floor_p1(x); + x <<= 256 - s; + int t = touch(-1 << 256); + if ((x >> 249) <= 90) { + ;; t~touch(); + t >>= 1; + s -= 1; + } + x += t; + int 2x = 2 * x; + int y = lshift256divr(2x, (x >> 1) - t); + ;; y = 2x - (mulrshiftr256(2x, y) ~>> 2); ;; this line could improve precision on very rare occasions + return (atanh_f258(y, 36), s); +} + +;; computes 33^m for small m +int pow33(int m) inline { + int t = 1; + repeat (m) { t *= 33; } + return t; +} + +;; computes 33^m for small 0<=m<=22 +;; slightly faster than pow33() +int pow33b(int m) inline { + (int mh, int ml) = m /% 5; + int t = 1; + repeat (ml) { t *= 33; } + repeat (mh) { t *= 33 * 33 * 33 * 33 * 33; } + return t; +} + +;; returns (s, q, y) such that log(x) = s*log(2) + q*log(33/32) + y/2^260 for positive integer x +;; (int, int, fixed260) log_auxx_f260(int x); +(int, int, int) log_auxx_f260(int x) inline_ref { + int s = log2_floor_p1(x) - 1; + x <<= 255 - s; ;; rescale to 1 <= x < 2 as fixed255 + int t = touch(2873) << 244; ;; ~ (33/32)^11 ~ sqrt(2) as fixed255 + int x1 = (x - t) >> 1; + int q = muldivr(x1, 65, x1 + t) + 11; ;; crude approximation to round(log(x)/log(33/32)) + ;; t = 1; repeat (q) { t *= 33; } ;; t:=33^q, 0<=q<=22 + t = pow33b(q); + t <<= (51 - q) * 5; ;; t:=(33/32)^q as fixed255, nearest power of 33/32 to x + x -= t; + int y = lshift256divr(x << 4, (x >> 1) + t); ;; y = (x-t)/(x+t) as fixed261 + y = atanh_f261(y, 18); ;; atanh((x-t)/(x+t)) as fixed261, or log(x/t) as fixed260 + return (s, q, y); +} + +;; returns (y, s) such that log(x) = y/2^256 + s*log(2) for positive integer x +;; this function is very precise (error less than 0.6 ulp) and consumes < 7k gas +;; (fixed256, int) log_aux_f256(int x); +(int, int) log_aux_f256(int x) inline_ref { + var (s, q, y) = log_auxx_f260(x); + var (yh, yl) = rshiftr4mod(y); ;; y ~/% 16 , but Tolk does not optimize this to RSHIFTR#MOD + ;; int Log33_32 = 3563114646320977386603103333812068872452913448227778071188132859183498739150; ;; log(33/32) as fixed256 + ;; int Log33_32_l = -3769; ;; log(33/32) = Log33_32 / 2^256 + Log33_32_l / 2^269 + yh += (yl * 512 + q * -3769) ~>> 13; ;; compensation, may be removed if slightly worse accuracy is acceptable + int Log33_32 = 3563114646320977386603103333812068872452913448227778071188132859183498739150; ;; log(33/32) as fixed256 + return (yh + q * Log33_32, s); +} + +;; returns (y, s) such that log2(x) = y/2^256 + s for positive integer x +;; this function is very precise (error less than 0.6 ulp) and consumes < 7k gas +;; (fixed256, int) log2_aux_f256(int x); +(int, int) log2_aux_f256(int x) inline_ref { + var (s, q, y) = log_auxx_f260(x); + y = lshift256divr(y, log2_const_f256()) ~>> 4; ;; y/log(2) as fixed256 + int Log33_32 = 5140487830366106860412008603913034462883915832139695448455767612111363481357; ;; log_2(33/32) as fixed256 + ;; Log33_32/2^256 happens to be a very precise approximation to log_2(33/32), no compensation required + return (y + q * Log33_32, s); +} + +;; functions log_aux_f256() and log2_aux_f256() may be used to implement specific fixed-point instances of log() and log2() + +;; fixed248 log(fixed248 x) +int fixed248::log(int x) inline_ref { + var (y, s) = log_aux_f256(x); + return muldivr(s - 248, log2_const_f256(), 1 << 8) + (y ~>> 8); + ;; return muldivr(s - 248, 80260960185991308862233904206310070533990667611589946606122867505419956976172, 1 << 8) + (y ~>> 8); +} + +;; fixed248 log2(fixed248 x) +int fixed248::log2(int x) inline { + var (y, s) = log2_aux_f256(x); + return ((s - 248) << 248) + (y ~>> 8); +} + +;; computes x^y as exp(y*log(x)), x >= 0 +;; fixed248 pow(fixed248 x, fixed248 y); +int fixed248::pow(int x, int y) inline_ref { + ifnot (y) { + return 1 << 248; ;; x^0 = 1 + } + if (x <= 0) { + int bad = (x | y) < 0; + return 0 >> bad; ;; 0^y = 0 if x=0 and y>=0; "out of range" exception otherwise + } + var (l, s) = log2_aux_f256(x); + s -= 248; ;; log_2(x) = s+l, l is fixed256, 0<=l<1 + ;; compute (s+l)*y = q+ll + var (q1, r1) = mulrshiftr248mod(s, y); ;; muldivmodr(s, y, 1 << 248) + var (q2, r2) = mulrshift256mod(l, y); + r2 >>= 247; + var (q3, r3) = rshiftr248mod(q2); ;; divmodr(q2, 1 << 248); + var (q, ll) = rshiftr248mod(r1 + r3); + ll = 512 * ll + r2; + q += q1 + q3; + ;; now log_2(x^y) = y*log_2(x) = q + ll, ss integer, ll fixed257, -1/2<=ll<1/2 + int sq = q + 248; + if (sq <= 0) { + return - (sq == 0); ;; underflow + } + int y = expm1_f257(mulrshiftr256(ll, log2_const_f256())); + return (y ~>> (9 - q)) - (-1 << sq); +} + +{--------------------- INVERSE TRIGONOMETRIC FUNCTIONS -------------------} + +;; number of terms n should be chosen as for atanh_f258() +;; fixed259 atan(fixed259 x); +int atan_f259(int x, int n) inline_ref { + int x2 = mulrshiftr256(x, x); ;; x^2 as fixed262 + int One = (1 << 254); + int a = One ~/ n + (1 << 255); ;; a := 2 + 1/n as fixed254 + repeat (n - 1) { + ;; a := 1 + (1 + x^2 / a)(1 + 1/n) as fixed254 + int t = One + muldivr(x2, 1 << 246, a); ;; t := 1 + x^2 / a + a = muldivr(t, n, (int n1 = n - 1)) + One; + n = n1; + } + ;; x / (1 + x^2 / a) = x / (1 + d) = x - x * d / (1 + d) = x - x * x^2/(a+x^2) for d = x^2 / a + return x - muldivr(x, x2, a + x2 ~/ 256) ~/ 256; +} + +;; number of terms n should be chosen as for atanh_f261() +;; fixed261 atan(fixed261 x); +int atan_f261_inlined(int x, int n) inline { + int x2 = mulrshiftr256(x, x); ;; x^2 as fixed266 + int One = (1 << 254); + int a = One ~/ n + (1 << 255); ;; a := 2 + 1/n as fixed254 + repeat (n - 1) { + ;; a := 1 + (1 + x^2 / a)(1 + 1/n) as fixed254 + int t = One + muldivr(x2, 1 << 242, a); ;; t := 1 + x^2 / a + a = muldivr(t, n, (int n1 = n - 1)) + One; + n = n1; + } + ;; x / (1 + x^2 / a) = x / (1 + d) = x - x * d / (1 + d) = x - x * x^2/(a+x^2) for d = x^2 / a + return x - muldivr(x, x2, a + x2 ~/ 4096) ~/ 4096; +} + +;; fixed261 atan(fixed261 x); +int atan_f261(int x, int n) inline_ref { + return atan_f261_inlined(x, n); +} + +;; computes (q,a,b) such that q is approximately atan(x)/atan(1/32) and a+b*I=(1+I/32)^q as fixed255 +;; then b/a=atan(q*atan(1/32)) exactly, and (a,b) is almost a unit vector pointing in the direction of (1,x) +;; must have |x|<1.1, x is fixed24 +;; (int, fixed255, fixed255) atan_aux_prereduce(fixed24 x); +(int, int, int) atan_aux_prereduce(int x) inline_ref { + int xu = abs(x); + int tc = 7214596; ;; tan(13*theta) as fixed24 where theta=atan(1/32) + int t1 = muldivr(xu - tc, 1 << 88, xu * tc + (1 << 48)); ;; tan(x') as fixed64 where x'=atan(x)-13*theta + ;; t1/(3+t1^2) * 3073/32 = x'/3 * 3072/32 = x' / (96/3072) = x' / theta + int q = muldivr(t1 * 3073, 1 << 59, t1 * t1 + (touch(3) << 128)) + 13; ;; approximately round(atan(x)/theta), 0<=q<=25 + var (pa, pb) = (33226912, 5232641); ;; (32+I)^5 + var (qh, ql) = q /% 5; + var (a, b) = (1 << (5 * (51 - q)), 0); ;; (1/32^q, 0) as fixed255 + repeat (ql) { ;; a+b*I *= 32+I + (a, b) = (sub_rev(touch(b), 32 * a), a + 32 * b); ;; same as (32 * a - b, 32 * b + a), but more efficient + } + repeat (qh) { ;; a+b*I *= (32+I)^5 = pa + pb*I + (a, b) = (a * pa - b * pb, a * pb + b * pa); + } + int xs = sgn(x); + return (xs * q, a, xs * b); +} + +;; compute (q, z) such that atan(x)=q*atan(1/32)+z for -1 <= x < 1 +;; this function is reasonably accurate (error < 7 ulp with ulp = 2^-261), but it consumes >7k gas +;; this is sufficient for most purposes +;; (int, fixed261) atan_aux(fixed256 x) +(int, int) atan_aux_f256(int x) inline_ref { + var (q, a, b) = atan_aux_prereduce(x ~>> 232); ;; convert x to fixed24 + ;; now b/a = tan(q*atan(1/32)) exactly, where q is near atan(x)/atan(1/32); so b/a is near x + ;; compute y = u/v = (a*x-b)/(a+b*x) as fixed261 ; then |y|<0.0167 = 1.07/64 and atan(x)=atan(y)+q*atan(1/32) + var (u, ul) = mulrshiftr256mod(a, x); + u = (ul ~>> 250) + ((u - b) << 6); ;; |u| < 1/32, convert fixed255 -> fixed261 + int v = a + mulrshiftr256(b, x); ;; v is scalar product of (a,b) and (1,x), it is approximately in [1..sqrt(2)] as fixed255 + int y = muldivr(u, 1 << 255, v); ;; y = u/v as fixed261 + int z = atan_f261_inlined(y, 18); ;; z = atan(x)-q*atan(1/32) + return (q, z); +} + +;; compute (q, z) such that atan(x)=q*atan(1/32)+z for -1 <= x < 1 +;; this function is very accurate (error < 2 ulp), but it consumes >7k gas +;; in most cases, faster function atan_aux_f256() should be used +;; (int, fixed261) atan_auxx(fixed256 x) +(int, int) atan_auxx_f256(int x) inline_ref { + var (q, a, b) = atan_aux_prereduce(x ~>> 232); ;; convert x to fixed24 + ;; now b/a = tan(q*atan(1/32)) exactly, where q is near atan(x)/atan(1/32); so b/a is near x + ;; compute y = (a*x-b)/(a+b*x) as fixed261 ; then |y|<0.0167 = 1.07/64 and atan(x)=atan(y)+q*atan(1/32) + ;; use sort of double precision arithmetic for this + var (u, ul) = mulrshiftr256mod(a, x); + ul /= 2; + u -= b; ;; |u| < 1/32 as fixed255 + var (v, vl) = mulrshiftr256mod(b, x); + vl /= 2; + v += a; ;; v is scalar product of (a,b) and (1,x), it is approximately in [1..sqrt(2)] as fixed255 + ;; y = (u + ul*eps) / (v + vl*eps) = u/v + (ul - vl * u/v)/v * eps where eps=1/2^255 + var (y, r) = lshift255divmodr(u, v); ;; y = u/v as fixed255 + int yl = muldivr(ul + r, 1 << 255, v) - muldivr(vl, y, v); ;; y/2^255 + yl/2^510 represent u/v + y = (yl ~>> 249) + (y << 6); ;; convert y to fixed261 + int z = atan_f261_inlined(y, 18); ;; z = atan(x)-q*atan(1/32) + return (q, z); +} + +;; consumes ~ 8k gas +;; fixed255 atan(fixed255 x); +int atan_f255(int x) inline_ref { + int s = (x ~>> 256); + touch(x); + if (s) { + x = lshift256divr(-1 << 255, x); ;; x:=-1/x as fixed256 + } else { + x *= 2; ;; convert to fixed256 + } + var (q, z) = atan_aux_f256(x); + ;; now atan(x) = z + q*atan(1/32) + s*(Pi/2), z is fixed261 + var (Pi_h, Pi_l) = Pi_xconst_f254(); ;; Pi/2 as fixed255 + fixed383 + var (qh, ql) = mulrshiftr6mod (q, Atan1_32_f261()); + return qh + s * Pi_h + (z + ql + muldivr(s, Pi_l, 1 << 122)) ~/ 64; +} + +;; computes atan(x) for -1 <= x < 1 only +;; fixed256 atan_small(fixed256 x); +int atan_f256_small(int x) inline_ref { + var (q, z) = atan_aux_f256(x); + ;; now atan(x) = z + q*atan(1/32), z is fixed261 + var (qh, ql) = mulrshiftr5mod (q, Atan1_32_f261()); + return qh + (z + ql) ~/ 32; +} + +;; fixed255 asin(fixed255 x); +int asin_f255(int x) inline_ref { + int a = fixed255::One - fixed255::sqr(x); ;; a:=1-x^2 + ifnot (a) { + return sgn(x) * Pi_const_f254(); ;; Pi/2 or -Pi/2 + } + int y = fixed255::sqrt(a); ;; sqrt(1-x^2) + int t = - lshift256divr(x, (-1 << 255) - y); ;; t = x/(1+sqrt(1-x^2)) avoiding overflow + return atan_f256_small(t); ;; asin(x)=2*atan(t) +} + +;; fixed254 acos(fixed255 x); +int acos_f255(int x) inline_ref { + int Pi = Pi_const_f254(); + if (x == (-1 << 255)) { + return Pi; ;; acos(-1) = Pi + } + Pi /= 2; + int y = fixed255::sqrt(fixed255::One - fixed255::sqr(x)); ;; sqrt(1-x^2) + int t = lshift256divr(x, (-1 << 255) - y); ;; t = -x/(1+sqrt(1-x^2)) avoiding overflow + return Pi + atan_f256_small(t) ~/ 2; ;; acos(x)=Pi/2 + 2*atan(t) +} + +;; consumes ~ 10k gas +;; fixed248 asin(fixed248 x) +int fixed248::asin(int x) inline { + return asin_f255(x << 7) ~>> 7; +} + +;; consumes ~ 10k gas +;; fixed248 acos(fixed248 x) +int fixed248::acos(int x) inline { + return acos_f255(x << 7) ~>> 6; +} + +;; consumes ~ 7500 gas +;; fixed248 atan(fixed248 x); +int fixed248::atan(int x) inline_ref { + int s = (x ~>> 249); + touch(x); + if (s) { + s = sgn(s); + x = lshift256divr(-1 << 248, x); ;; x:=-1/x as fixed256 + } else { + x <<= 8; ;; convert to fixed256 + } + var (q, z) = atan_aux_f256(x); + ;; now atan(x) = z + q*atan(1/32) + s*(Pi/2), z is fixed261 + return (z ~/ 64 + s * Pi_const_f254() + muldivr(q, Atan1_32_f261(), 64)) ~/ 128; ;; compute in fixed255, then convert +} + +;; fixed248 acot(fixed248 x); +int fixed248::acot(int x) inline_ref { + int s = (x ~>> 249); + touch(x); + if (s) { + x = lshift256divr(-1 << 248, x); ;; x:=-1/x as fixed256 + s = 0; + } else { + x <<= 8; ;; convert to fixed256 + s = sgn(x); + } + var (q, z) = atan_aux_f256(x); + ;; now acot(x) = - z - q*atan(1/32) + s*(Pi/2), z is fixed261 + return (s * Pi_const_f254() - z ~/ 64 - muldivr(q, Atan1_32_f261(), 64)) ~/ 128; ;; compute in fixed255, then convert +} + +{--------------------- PSEUDO-RANDOM NUMBERS -------------------} + +;; random number with standard normal distribution N(0,1) +;; generated by Kinderman--Monahan ratio method modified by J.Leva +;; spends ~ 2k..3k gas on average +;; fixed252 nrand(); +int nrand_f252() impure inline_ref { + var (x, s, t, A, B, r0) = (nan(), touch(29483) << 236, touch(-3167) << 239, 12845, 16693, 9043); + ;; 4/sqrt(e*Pi) = 1.369 loop iterations on average + do { + var (u, v) = (random() / 16 + 1, muldivr(random() - (1 << 255), 7027, 1 << 16)); ;; fixed252; 7027=ceil(sqrt(8/e)*2^12) + int va = abs(v); + var (u1, v1) = (u - s, va - t); ;; (u - 29483/2^16, abs(v) + 3167/2^13) as fixed252 + ;; Q := u1^2 + v1 * (A*v1 - B*u1) as fixed252 where A=12845/2^16, B=16693/2^16 + int Q = muldivr(u1, u1, 1 << 252) + muldivr(v1, muldivr(v1, A, 1 << 16) - muldivr(u1, B, 1 << 16), 1 << 252); + ;; must have 9043 / 2^15 < Q < 9125 / 2^15, otherwise accept if smaller, reject if larger + int Qd = (Q >> 237) - r0; + if ((Qd < 9125 - 9043) & (va / u < 16)) { + x = muldivr(v, 1 << 252, u); ;; x:=v/u as fixed252; reject immediately if |v/u| >= 16 + if (Qd >= 0) { ;; immediately accept if Qd < 0 + ;; rarely taken branch - 0.012 times per call on average + ;; check condition v^2 < -4*u^2*log(u), or equivalent condition u < exp(-x^2/4) for x=v/u + int xx = mulrshiftr256(x, x) ~/ 4; ;; x^2/4 as fixed248 + int ex = fixed248::exp(- xx) * 16; ;; exp(-x^2/4) as fixed252 + if (u > ex) { + x = nan(); ;; condition false, reject + } + } + } + } until (~ is_nan(x)); + return x; +} + +;; generates a random number approximately distributed according to the standard normal distribution +;; much faster than nrand_f252(), should be suitable for most purposes when only several random numbers are needed +;; fixed252 nrand_fast(); +int nrand_fast_f252() impure inline_ref { + int t = touch(-3) << 253; ;; -6. as fixed252 + repeat (12) { + t += random() / 16; ;; add together 12 uniformly random numbers + } + return t; +} + +;; random number uniformly distributed in [0..1) +;; fixed248 random(); +int fixed248::random() impure inline { + return random() >> 8; +} + +;; random number with standard normal distribution +;; fixed248 nrand(); +int fixed248::nrand() impure inline { + return nrand_f252() ~>> 4; +} + +;; generates a random number approximately distributed according to the standard normal distribution +;; fixed248 nrand_fast(); +int fixed248::nrand_fast() impure inline { + return nrand_fast_f252() ~>> 4; +} diff --git a/crypto/smartcont/stdlib.tolk b/crypto/smartcont/stdlib.tolk new file mode 100644 index 000000000..344d90319 --- /dev/null +++ b/crypto/smartcont/stdlib.tolk @@ -0,0 +1,638 @@ +;; Standard library for Tolk +;; (initially copied from stdlib.fc) +;; + +{- + This file is part of TON Tolk Standard Library. + + Tolk Standard Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Tolk Standard Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + +-} + +{- + # Tuple manipulation primitives + The names and the types are mostly self-explaining. + + Note that currently values of atomic type `tuple` can't be cast to composite tuple type (e.g. `[int, cell]`) + and vise versa. +-} + +{- + # Lisp-style lists + + Lists can be represented as nested 2-elements tuples. + Empty list is conventionally represented as TVM `null` value (it can be obtained by calling [null()]). + For example, tuple `(1, (2, (3, null)))` represents list `[1, 2, 3]`. Elements of a list can be of different types. +-} + +;;; Adds an element to the beginning of lisp-style list. +forall X -> tuple cons(X head, tuple tail) asm "CONS"; + +;;; Extracts the head and the tail of lisp-style list. +forall X -> (X, tuple) uncons(tuple list) asm "UNCONS"; + +;;; Extracts the tail and the head of lisp-style list. +forall X -> (tuple, X) list_next(tuple list) asm( -> 1 0) "UNCONS"; + +;;; Returns the head of lisp-style list. +forall X -> X car(tuple list) asm "CAR"; + +;;; Returns the tail of lisp-style list. +tuple cdr(tuple list) asm "CDR"; + +;;; Creates tuple with zero elements. +tuple empty_tuple() asm "NIL"; + +;;; Appends a value `x` to a `Tuple t = (x1, ..., xn)`, but only if the resulting `Tuple t' = (x1, ..., xn, x)` +;;; is of length at most 255. Otherwise throws a type check exception. +forall X -> tuple tpush(tuple t, X value) asm "TPUSH"; +forall X -> (tuple, ()) ~tpush(tuple t, X value) asm "TPUSH"; + +;;; Creates a tuple of length one with given argument as element. +forall X -> [X] single(X x) asm "SINGLE"; + +;;; Unpacks a tuple of length one +forall X -> X unsingle([X] t) asm "UNSINGLE"; + +;;; Creates a tuple of length two with given arguments as elements. +forall X, Y -> [X, Y] pair(X x, Y y) asm "PAIR"; + +;;; Unpacks a tuple of length two +forall X, Y -> (X, Y) unpair([X, Y] t) asm "UNPAIR"; + +;;; Creates a tuple of length three with given arguments as elements. +forall X, Y, Z -> [X, Y, Z] triple(X x, Y y, Z z) asm "TRIPLE"; + +;;; Unpacks a tuple of length three +forall X, Y, Z -> (X, Y, Z) untriple([X, Y, Z] t) asm "UNTRIPLE"; + +;;; Creates a tuple of length four with given arguments as elements. +forall X, Y, Z, W -> [X, Y, Z, W] tuple4(X x, Y y, Z z, W w) asm "4 TUPLE"; + +;;; Unpacks a tuple of length four +forall X, Y, Z, W -> (X, Y, Z, W) untuple4([X, Y, Z, W] t) asm "4 UNTUPLE"; + +;;; Returns the first element of a tuple (with unknown element types). +forall X -> X first(tuple t) asm "FIRST"; + +;;; Returns the second element of a tuple (with unknown element types). +forall X -> X second(tuple t) asm "SECOND"; + +;;; Returns the third element of a tuple (with unknown element types). +forall X -> X third(tuple t) asm "THIRD"; + +;;; Returns the fourth element of a tuple (with unknown element types). +forall X -> X fourth(tuple t) asm "3 INDEX"; + +;;; Returns the first element of a pair tuple. +forall X, Y -> X pair_first([X, Y] p) asm "FIRST"; + +;;; Returns the second element of a pair tuple. +forall X, Y -> Y pair_second([X, Y] p) asm "SECOND"; + +;;; Returns the first element of a triple tuple. +forall X, Y, Z -> X triple_first([X, Y, Z] p) asm "FIRST"; + +;;; Returns the second element of a triple tuple. +forall X, Y, Z -> Y triple_second([X, Y, Z] p) asm "SECOND"; + +;;; Returns the third element of a triple tuple. +forall X, Y, Z -> Z triple_third([X, Y, Z] p) asm "THIRD"; + + +;;; Push null element (casted to given type) +;;; By the TVM type `Null` Tolk represents absence of a value of some atomic type. +;;; So `null` can actually have any atomic type. +forall X -> X null() asm "PUSHNULL"; + +;;; Moves a variable [x] to the top of the stack +forall X -> (X, ()) ~impure_touch(X x) impure asm "NOP"; + + + +;;; Returns the current Unix time as an Integer +int now() asm "NOW"; + +;;; Returns the internal address of the current smart contract as a Slice with a `MsgAddressInt`. +;;; If necessary, it can be parsed further using primitives such as [parse_std_addr]. +slice my_address() asm "MYADDR"; + +;;; Returns the balance of the smart contract as a tuple consisting of an int +;;; (balance in nanotoncoins) and a `cell` +;;; (a dictionary with 32-bit keys representing the balance of "extra currencies") +;;; at the start of Computation Phase. +;;; Note that RAW primitives such as [send_raw_message] do not update this field. +[int, cell] get_balance() asm "BALANCE"; + +;;; Returns the logical time of the current transaction. +int cur_lt() asm "LTIME"; + +;;; Returns the starting logical time of the current block. +int block_lt() asm "BLOCKLT"; + +;;; Computes the representation hash of a `cell` [c] and returns it as a 256-bit unsigned integer `x`. +;;; Useful for signing and checking signatures of arbitrary entities represented by a tree of cells. +int cell_hash(cell c) asm "HASHCU"; + +;;; Computes the hash of a `slice s` and returns it as a 256-bit unsigned integer `x`. +;;; The result is the same as if an ordinary cell containing only data and references from `s` had been created +;;; and its hash computed by [cell_hash]. +int slice_hash(slice s) asm "HASHSU"; + +;;; Computes sha256 of the data bits of `slice` [s]. If the bit length of `s` is not divisible by eight, +;;; throws a cell underflow exception. The hash value is returned as a 256-bit unsigned integer `x`. +int string_hash(slice s) asm "SHA256U"; + +{- + # Signature checks +-} + +;;; Checks the Ed25519-`signature` of a `hash` (a 256-bit unsigned integer, usually computed as the hash of some data) +;;; using [public_key] (also represented by a 256-bit unsigned integer). +;;; The signature must contain at least 512 data bits; only the first 512 bits are used. +;;; The result is `−1` if the signature is valid, `0` otherwise. +;;; Note that `CHKSIGNU` creates a 256-bit slice with the hash and calls `CHKSIGNS`. +;;; That is, if [hash] is computed as the hash of some data, these data are hashed twice, +;;; the second hashing occurring inside `CHKSIGNS`. +int check_signature(int hash, slice signature, int public_key) asm "CHKSIGNU"; + +;;; Checks whether [signature] is a valid Ed25519-signature of the data portion of `slice data` using `public_key`, +;;; similarly to [check_signature]. +;;; If the bit length of [data] is not divisible by eight, throws a cell underflow exception. +;;; The verification of Ed25519 signatures is the standard one, +;;; with sha256 used to reduce [data] to the 256-bit number that is actually signed. +int check_data_signature(slice data, slice signature, int public_key) asm "CHKSIGNS"; + +{--- + # Computation of boc size + The primitives below may be useful for computing storage fees of user-provided data. +-} + +;;; Returns `(x, y, z, -1)` or `(null, null, null, 0)`. +;;; Recursively computes the count of distinct cells `x`, data bits `y`, and cell references `z` +;;; in the DAG rooted at `cell` [c], effectively returning the total storage used by this DAG taking into account +;;; the identification of equal cells. +;;; The values of `x`, `y`, and `z` are computed by a depth-first traversal of this DAG, +;;; with a hash table of visited cell hashes used to prevent visits of already-visited cells. +;;; The total count of visited cells `x` cannot exceed non-negative [max_cells]; +;;; otherwise the computation is aborted before visiting the `(max_cells + 1)`-st cell and +;;; a zero flag is returned to indicate failure. If [c] is `null`, returns `x = y = z = 0`. +(int, int, int) compute_data_size(cell c, int max_cells) impure asm "CDATASIZE"; + +;;; Similar to [compute_data_size?], but accepting a `slice` [s] instead of a `cell`. +;;; The returned value of `x` does not take into account the cell that contains the `slice` [s] itself; +;;; however, the data bits and the cell references of [s] are accounted for in `y` and `z`. +(int, int, int) slice_compute_data_size(slice s, int max_cells) impure asm "SDATASIZE"; + +;;; A non-quiet version of [compute_data_size?] that throws a cell overflow exception (`8`) on failure. +(int, int, int, int) compute_data_size?(cell c, int max_cells) asm "CDATASIZEQ NULLSWAPIFNOT2 NULLSWAPIFNOT"; + +;;; A non-quiet version of [slice_compute_data_size?] that throws a cell overflow exception (8) on failure. +(int, int, int, int) slice_compute_data_size?(cell c, int max_cells) asm "SDATASIZEQ NULLSWAPIFNOT2 NULLSWAPIFNOT"; + +;;; Throws an exception with exit_code excno if cond is not 0 (commented since implemented in compilator) +;; () throw_if(int excno, int cond) impure asm "THROWARGIF"; + +{-- + # Debug primitives + Only works for local TVM execution with debug level verbosity +-} +;;; Dumps the stack (at most the top 255 values) and shows the total stack depth. +() dump_stack() impure asm "DUMPSTK"; + +{- + # Persistent storage save and load +-} + +;;; Returns the persistent contract storage cell. It can be parsed or modified with slice and builder primitives later. +cell get_data() asm "c4 PUSH"; + +;;; Sets `cell` [c] as persistent contract data. You can update persistent contract storage with this primitive. +() set_data(cell c) impure asm "c4 POP"; + +{- + # Continuation primitives +-} +;;; Usually `c3` has a continuation initialized by the whole code of the contract. It is used for function calls. +;;; The primitive returns the current value of `c3`. +cont get_c3() impure asm "c3 PUSH"; + +;;; Updates the current value of `c3`. Usually, it is used for updating smart contract code in run-time. +;;; Note that after execution of this primitive the current code +;;; (and the stack of recursive function calls) won't change, +;;; but any other function call will use a function from the new code. +() set_c3(cont c) impure asm "c3 POP"; + +;;; Transforms a `slice` [s] into a simple ordinary continuation `c`, with `c.code = s` and an empty stack and savelist. +cont bless(slice s) impure asm "BLESS"; + +{--- + # Gas related primitives +-} + +;;; Sets current gas limit `gl` to its maximal allowed value `gm`, and resets the gas credit `gc` to zero, +;;; decreasing the value of `gr` by `gc` in the process. +;;; In other words, the current smart contract agrees to buy some gas to finish the current transaction. +;;; This action is required to process external messages, which bring no value (hence no gas) with themselves. +;;; +;;; For more details check [accept_message effects](https://ton.org/docs/#/smart-contracts/accept). +() accept_message() impure asm "ACCEPT"; + +;;; Sets current gas limit `gl` to the minimum of limit and `gm`, and resets the gas credit `gc` to zero. +;;; If the gas consumed so far (including the present instruction) exceeds the resulting value of `gl`, +;;; an (unhandled) out of gas exception is thrown before setting new gas limits. +;;; Notice that [set_gas_limit] with an argument `limit ≥ 2^63 − 1` is equivalent to [accept_message]. +() set_gas_limit(int limit) impure asm "SETGASLIMIT"; + +;;; Commits the current state of registers `c4` (“persistent data”) and `c5` (“actions”) +;;; so that the current execution is considered “successful” with the saved values even if an exception +;;; in Computation Phase is thrown later. +() commit() impure asm "COMMIT"; + +;;; Not implemented +;;() buy_gas(int gram) impure asm "BUYGAS"; + +;;; Computes the amount of gas that can be bought for `amount` nanoTONs, +;;; and sets `gl` accordingly in the same way as [set_gas_limit]. +() buy_gas(int amount) impure asm "BUYGAS"; + +;;; Computes the minimum of two integers [x] and [y]. +int min(int x, int y) asm "MIN"; + +;;; Computes the maximum of two integers [x] and [y]. +int max(int x, int y) asm "MAX"; + +;;; Sorts two integers. +(int, int) minmax(int x, int y) asm "MINMAX"; + +;;; Computes the absolute value of an integer [x]. +int abs(int x) asm "ABS"; + +{- + # Slice primitives + + It is said that a primitive _loads_ some data, + if it returns the data and the remainder of the slice + (so it can also be used as modifying method). + + It is said that a primitive _preloads_ some data, if it returns only the data + (it can be used as non-modifying method). + + Unless otherwise stated, loading and preloading primitives read the data from a prefix of the slice. +-} + + +;;; Converts a `cell` [c] into a `slice`. Notice that [c] must be either an ordinary cell, +;;; or an exotic cell (see [TVM.pdf](https://ton-blockchain.github.io/docs/tvm.pdf), 3.1.2) +;;; which is automatically loaded to yield an ordinary cell `c'`, converted into a `slice` afterwards. +slice begin_parse(cell c) asm "CTOS"; + +;;; Checks if [s] is empty. If not, throws an exception. +() end_parse(slice s) impure asm "ENDS"; + +;;; Loads the first reference from the slice. +(slice, cell) load_ref(slice s) asm( -> 1 0) "LDREF"; + +;;; Preloads the first reference from the slice. +cell preload_ref(slice s) asm "PLDREF"; + + {- Functions below are commented because are implemented on compilator level for optimisation -} + +;;; Loads a signed [len]-bit integer from a slice [s]. +;; (slice, int) ~load_int(slice s, int len) asm(s len -> 1 0) "LDIX"; + +;;; Loads an unsigned [len]-bit integer from a slice [s]. +;; (slice, int) ~load_uint(slice s, int len) asm( -> 1 0) "LDUX"; + +;;; Preloads a signed [len]-bit integer from a slice [s]. +;; int preload_int(slice s, int len) asm "PLDIX"; + +;;; Preloads an unsigned [len]-bit integer from a slice [s]. +;; int preload_uint(slice s, int len) asm "PLDUX"; + +;;; Loads the first `0 ≤ len ≤ 1023` bits from slice [s] into a separate `slice s''`. +;; (slice, slice) load_bits(slice s, int len) asm(s len -> 1 0) "LDSLICEX"; + +;;; Preloads the first `0 ≤ len ≤ 1023` bits from slice [s] into a separate `slice s''`. +;; slice preload_bits(slice s, int len) asm "PLDSLICEX"; + +;;; Loads serialized amount of TonCoins (any unsigned integer up to `2^120 - 1`). +(slice, int) load_grams(slice s) asm( -> 1 0) "LDGRAMS"; +(slice, int) load_coins(slice s) asm( -> 1 0) "LDGRAMS"; + +;;; Returns all but the first `0 ≤ len ≤ 1023` bits of `slice` [s]. +slice skip_bits(slice s, int len) asm "SDSKIPFIRST"; +(slice, ()) ~skip_bits(slice s, int len) asm "SDSKIPFIRST"; + +;;; Returns the first `0 ≤ len ≤ 1023` bits of `slice` [s]. +slice first_bits(slice s, int len) asm "SDCUTFIRST"; + +;;; Returns all but the last `0 ≤ len ≤ 1023` bits of `slice` [s]. +slice skip_last_bits(slice s, int len) asm "SDSKIPLAST"; +(slice, ()) ~skip_last_bits(slice s, int len) asm "SDSKIPLAST"; + +;;; Returns the last `0 ≤ len ≤ 1023` bits of `slice` [s]. +slice slice_last(slice s, int len) asm "SDCUTLAST"; + +;;; Loads a dictionary `D` (HashMapE) from `slice` [s]. +;;; (returns `null` if `nothing` constructor is used). +(slice, cell) load_dict(slice s) asm( -> 1 0) "LDDICT"; + +;;; Preloads a dictionary `D` from `slice` [s]. +cell preload_dict(slice s) asm "PLDDICT"; + +;;; Loads a dictionary as [load_dict], but returns only the remainder of the slice. +slice skip_dict(slice s) asm "SKIPDICT"; + +;;; Loads (Maybe ^Cell) from `slice` [s]. +;;; In other words loads 1 bit and if it is true +;;; loads first ref and return it with slice remainder +;;; otherwise returns `null` and slice remainder +(slice, cell) load_maybe_ref(slice s) asm( -> 1 0) "LDOPTREF"; + +;;; Preloads (Maybe ^Cell) from `slice` [s]. +cell preload_maybe_ref(slice s) asm "PLDOPTREF"; + + +;;; Returns the depth of `cell` [c]. +;;; If [c] has no references, then return `0`; +;;; otherwise the returned value is one plus the maximum of depths of cells referred to from [c]. +;;; If [c] is a `null` instead of a cell, returns zero. +int cell_depth(cell c) asm "CDEPTH"; + + +{- + # Slice size primitives +-} + +;;; Returns the number of references in `slice` [s]. +int slice_refs(slice s) asm "SREFS"; + +;;; Returns the number of data bits in `slice` [s]. +int slice_bits(slice s) asm "SBITS"; + +;;; Returns both the number of data bits and the number of references in `slice` [s]. +(int, int) slice_bits_refs(slice s) asm "SBITREFS"; + +;;; Checks whether a `slice` [s] is empty (i.e., contains no bits of data and no cell references). +int slice_empty?(slice s) asm "SEMPTY"; + +;;; Checks whether `slice` [s] has no bits of data. +int slice_data_empty?(slice s) asm "SDEMPTY"; + +;;; Checks whether `slice` [s] has no references. +int slice_refs_empty?(slice s) asm "SREMPTY"; + +;;; Returns the depth of `slice` [s]. +;;; If [s] has no references, then returns `0`; +;;; otherwise the returned value is one plus the maximum of depths of cells referred to from [s]. +int slice_depth(slice s) asm "SDEPTH"; + +{- + # Builder size primitives +-} + +;;; Returns the number of cell references already stored in `builder` [b] +int builder_refs(builder b) asm "BREFS"; + +;;; Returns the number of data bits already stored in `builder` [b]. +int builder_bits(builder b) asm "BBITS"; + +;;; Returns the depth of `builder` [b]. +;;; If no cell references are stored in [b], then returns 0; +;;; otherwise the returned value is one plus the maximum of depths of cells referred to from [b]. +int builder_depth(builder b) asm "BDEPTH"; + +{- + # Builder primitives + It is said that a primitive _stores_ a value `x` into a builder `b` + if it returns a modified version of the builder `b'` with the value `x` stored at the end of it. + It can be used as non-modifying method. + + All the primitives below first check whether there is enough space in the `builder`, + and only then check the range of the value being serialized. +-} + +;;; Creates a new empty `builder`. +builder begin_cell() asm "NEWC"; + +;;; Converts a `builder` into an ordinary `cell`. +cell end_cell(builder b) asm "ENDC"; + +;;; Stores a reference to `cell` [c] into `builder` [b]. +builder store_ref(builder b, cell c) asm(c b) "STREF"; + +;;; Stores an unsigned [len]-bit integer `x` into `b` for `0 ≤ len ≤ 256`. +;; builder store_uint(builder b, int x, int len) asm(x b len) "STUX"; + +;;; Stores a signed [len]-bit integer `x` into `b` for` 0 ≤ len ≤ 257`. +;; builder store_int(builder b, int x, int len) asm(x b len) "STIX"; + + +;;; Stores `slice` [s] into `builder` [b] +builder store_slice(builder b, slice s) asm "STSLICER"; + +;;; Stores (serializes) an integer [x] in the range `0..2^120 − 1` into `builder` [b]. +;;; The serialization of [x] consists of a 4-bit unsigned big-endian integer `l`, +;;; which is the smallest integer `l ≥ 0`, such that `x < 2^8l`, +;;; followed by an `8l`-bit unsigned big-endian representation of [x]. +;;; If [x] does not belong to the supported range, a range check exception is thrown. +;;; +;;; Store amounts of TonCoins to the builder as VarUInteger 16 +builder store_grams(builder b, int x) asm "STGRAMS"; +builder store_coins(builder b, int x) asm "STGRAMS"; + +;;; Stores dictionary `D` represented by `cell` [c] or `null` into `builder` [b]. +;;; In other words, stores a `1`-bit and a reference to [c] if [c] is not `null` and `0`-bit otherwise. +builder store_dict(builder b, cell c) asm(c b) "STDICT"; + +;;; Stores (Maybe ^Cell) to builder: +;;; if cell is null store 1 zero bit +;;; otherwise store 1 true bit and ref to cell +builder store_maybe_ref(builder b, cell c) asm(c b) "STOPTREF"; + + +{- + # Address manipulation primitives + The address manipulation primitives listed below serialize and deserialize values according to the following TL-B scheme: + ```TL-B + addr_none$00 = MsgAddressExt; + addr_extern$01 len:(## 8) external_address:(bits len) + = MsgAddressExt; + anycast_info$_ depth:(#<= 30) { depth >= 1 } + rewrite_pfx:(bits depth) = Anycast; + addr_std$10 anycast:(Maybe Anycast) + workchain_id:int8 address:bits256 = MsgAddressInt; + addr_var$11 anycast:(Maybe Anycast) addr_len:(## 9) + workchain_id:int32 address:(bits addr_len) = MsgAddressInt; + _ _:MsgAddressInt = MsgAddress; + _ _:MsgAddressExt = MsgAddress; + + int_msg_info$0 ihr_disabled:Bool bounce:Bool bounced:Bool + src:MsgAddress dest:MsgAddressInt + value:CurrencyCollection ihr_fee:Grams fwd_fee:Grams + created_lt:uint64 created_at:uint32 = CommonMsgInfoRelaxed; + ext_out_msg_info$11 src:MsgAddress dest:MsgAddressExt + created_lt:uint64 created_at:uint32 = CommonMsgInfoRelaxed; + ``` + A deserialized `MsgAddress` is represented by a tuple `t` as follows: + + - `addr_none` is represented by `t = (0)`, + i.e., a tuple containing exactly one integer equal to zero. + - `addr_extern` is represented by `t = (1, s)`, + where slice `s` contains the field `external_address`. In other words, ` + t` is a pair (a tuple consisting of two entries), containing an integer equal to one and slice `s`. + - `addr_std` is represented by `t = (2, u, x, s)`, + where `u` is either a `null` (if `anycast` is absent) or a slice `s'` containing `rewrite_pfx` (if anycast is present). + Next, integer `x` is the `workchain_id`, and slice `s` contains the address. + - `addr_var` is represented by `t = (3, u, x, s)`, + where `u`, `x`, and `s` have the same meaning as for `addr_std`. +-} + +;;; Loads from slice [s] the only prefix that is a valid `MsgAddress`, +;;; and returns both this prefix `s'` and the remainder `s''` of [s] as slices. +(slice, slice) load_msg_addr(slice s) asm( -> 1 0) "LDMSGADDR"; + +;;; Decomposes slice [s] containing a valid `MsgAddress` into a `tuple t` with separate fields of this `MsgAddress`. +;;; If [s] is not a valid `MsgAddress`, a cell deserialization exception is thrown. +tuple parse_addr(slice s) asm "PARSEMSGADDR"; + +;;; Parses slice [s] containing a valid `MsgAddressInt` (usually a `msg_addr_std`), +;;; applies rewriting from the anycast (if present) to the same-length prefix of the address, +;;; and returns both the workchain and the 256-bit address as integers. +;;; If the address is not 256-bit, or if [s] is not a valid serialization of `MsgAddressInt`, +;;; throws a cell deserialization exception. +(int, int) parse_std_addr(slice s) asm "REWRITESTDADDR"; + +;;; A variant of [parse_std_addr] that returns the (rewritten) address as a slice [s], +;;; even if it is not exactly 256 bit long (represented by a `msg_addr_var`). +(int, slice) parse_var_addr(slice s) asm "REWRITEVARADDR"; + +{- + # Dictionary primitives +-} + + +;;; Sets the value associated with [key_len]-bit key signed index in dictionary [dict] to [value] (cell), +;;; and returns the resulting dictionary. +cell idict_set_ref(cell dict, int key_len, int index, cell value) asm(value index dict key_len) "DICTISETREF"; +(cell, ()) ~idict_set_ref(cell dict, int key_len, int index, cell value) asm(value index dict key_len) "DICTISETREF"; + +;;; Sets the value associated with [key_len]-bit key unsigned index in dictionary [dict] to [value] (cell), +;;; and returns the resulting dictionary. +cell udict_set_ref(cell dict, int key_len, int index, cell value) asm(value index dict key_len) "DICTUSETREF"; +(cell, ()) ~udict_set_ref(cell dict, int key_len, int index, cell value) asm(value index dict key_len) "DICTUSETREF"; + +cell idict_get_ref(cell dict, int key_len, int index) asm(index dict key_len) "DICTIGETOPTREF"; +(cell, int) idict_get_ref?(cell dict, int key_len, int index) asm(index dict key_len) "DICTIGETREF" "NULLSWAPIFNOT"; +(cell, int) udict_get_ref?(cell dict, int key_len, int index) asm(index dict key_len) "DICTUGETREF" "NULLSWAPIFNOT"; +(cell, cell) idict_set_get_ref(cell dict, int key_len, int index, cell value) asm(value index dict key_len) "DICTISETGETOPTREF"; +(cell, cell) udict_set_get_ref(cell dict, int key_len, int index, cell value) asm(value index dict key_len) "DICTUSETGETOPTREF"; +(cell, int) idict_delete?(cell dict, int key_len, int index) asm(index dict key_len) "DICTIDEL"; +(cell, int) udict_delete?(cell dict, int key_len, int index) asm(index dict key_len) "DICTUDEL"; +(slice, int) idict_get?(cell dict, int key_len, int index) asm(index dict key_len) "DICTIGET" "NULLSWAPIFNOT"; +(slice, int) udict_get?(cell dict, int key_len, int index) asm(index dict key_len) "DICTUGET" "NULLSWAPIFNOT"; +(cell, slice, int) idict_delete_get?(cell dict, int key_len, int index) asm(index dict key_len) "DICTIDELGET" "NULLSWAPIFNOT"; +(cell, slice, int) udict_delete_get?(cell dict, int key_len, int index) asm(index dict key_len) "DICTUDELGET" "NULLSWAPIFNOT"; +(cell, (slice, int)) ~idict_delete_get?(cell dict, int key_len, int index) asm(index dict key_len) "DICTIDELGET" "NULLSWAPIFNOT"; +(cell, (slice, int)) ~udict_delete_get?(cell dict, int key_len, int index) asm(index dict key_len) "DICTUDELGET" "NULLSWAPIFNOT"; +cell udict_set(cell dict, int key_len, int index, slice value) asm(value index dict key_len) "DICTUSET"; +(cell, ()) ~udict_set(cell dict, int key_len, int index, slice value) asm(value index dict key_len) "DICTUSET"; +cell idict_set(cell dict, int key_len, int index, slice value) asm(value index dict key_len) "DICTISET"; +(cell, ()) ~idict_set(cell dict, int key_len, int index, slice value) asm(value index dict key_len) "DICTISET"; +cell dict_set(cell dict, int key_len, slice index, slice value) asm(value index dict key_len) "DICTSET"; +(cell, ()) ~dict_set(cell dict, int key_len, slice index, slice value) asm(value index dict key_len) "DICTSET"; +(cell, int) udict_add?(cell dict, int key_len, int index, slice value) asm(value index dict key_len) "DICTUADD"; +(cell, int) udict_replace?(cell dict, int key_len, int index, slice value) asm(value index dict key_len) "DICTUREPLACE"; +(cell, int) idict_add?(cell dict, int key_len, int index, slice value) asm(value index dict key_len) "DICTIADD"; +(cell, int) idict_replace?(cell dict, int key_len, int index, slice value) asm(value index dict key_len) "DICTIREPLACE"; +cell udict_set_builder(cell dict, int key_len, int index, builder value) asm(value index dict key_len) "DICTUSETB"; +(cell, ()) ~udict_set_builder(cell dict, int key_len, int index, builder value) asm(value index dict key_len) "DICTUSETB"; +cell idict_set_builder(cell dict, int key_len, int index, builder value) asm(value index dict key_len) "DICTISETB"; +(cell, ()) ~idict_set_builder(cell dict, int key_len, int index, builder value) asm(value index dict key_len) "DICTISETB"; +cell dict_set_builder(cell dict, int key_len, slice index, builder value) asm(value index dict key_len) "DICTSETB"; +(cell, ()) ~dict_set_builder(cell dict, int key_len, slice index, builder value) asm(value index dict key_len) "DICTSETB"; +(cell, int) udict_add_builder?(cell dict, int key_len, int index, builder value) asm(value index dict key_len) "DICTUADDB"; +(cell, int) udict_replace_builder?(cell dict, int key_len, int index, builder value) asm(value index dict key_len) "DICTUREPLACEB"; +(cell, int) idict_add_builder?(cell dict, int key_len, int index, builder value) asm(value index dict key_len) "DICTIADDB"; +(cell, int) idict_replace_builder?(cell dict, int key_len, int index, builder value) asm(value index dict key_len) "DICTIREPLACEB"; +(cell, int, slice, int) udict_delete_get_min(cell dict, int key_len) asm(-> 0 2 1 3) "DICTUREMMIN" "NULLSWAPIFNOT2"; +(cell, (int, slice, int)) ~udict::delete_get_min(cell dict, int key_len) asm(-> 0 2 1 3) "DICTUREMMIN" "NULLSWAPIFNOT2"; +(cell, int, slice, int) idict_delete_get_min(cell dict, int key_len) asm(-> 0 2 1 3) "DICTIREMMIN" "NULLSWAPIFNOT2"; +(cell, (int, slice, int)) ~idict::delete_get_min(cell dict, int key_len) asm(-> 0 2 1 3) "DICTIREMMIN" "NULLSWAPIFNOT2"; +(cell, slice, slice, int) dict_delete_get_min(cell dict, int key_len) asm(-> 0 2 1 3) "DICTREMMIN" "NULLSWAPIFNOT2"; +(cell, (slice, slice, int)) ~dict::delete_get_min(cell dict, int key_len) asm(-> 0 2 1 3) "DICTREMMIN" "NULLSWAPIFNOT2"; +(cell, int, slice, int) udict_delete_get_max(cell dict, int key_len) asm(-> 0 2 1 3) "DICTUREMMAX" "NULLSWAPIFNOT2"; +(cell, (int, slice, int)) ~udict::delete_get_max(cell dict, int key_len) asm(-> 0 2 1 3) "DICTUREMMAX" "NULLSWAPIFNOT2"; +(cell, int, slice, int) idict_delete_get_max(cell dict, int key_len) asm(-> 0 2 1 3) "DICTIREMMAX" "NULLSWAPIFNOT2"; +(cell, (int, slice, int)) ~idict::delete_get_max(cell dict, int key_len) asm(-> 0 2 1 3) "DICTIREMMAX" "NULLSWAPIFNOT2"; +(cell, slice, slice, int) dict_delete_get_max(cell dict, int key_len) asm(-> 0 2 1 3) "DICTREMMAX" "NULLSWAPIFNOT2"; +(cell, (slice, slice, int)) ~dict::delete_get_max(cell dict, int key_len) asm(-> 0 2 1 3) "DICTREMMAX" "NULLSWAPIFNOT2"; +(int, slice, int) udict_get_min?(cell dict, int key_len) asm (-> 1 0 2) "DICTUMIN" "NULLSWAPIFNOT2"; +(int, slice, int) udict_get_max?(cell dict, int key_len) asm (-> 1 0 2) "DICTUMAX" "NULLSWAPIFNOT2"; +(int, cell, int) udict_get_min_ref?(cell dict, int key_len) asm (-> 1 0 2) "DICTUMINREF" "NULLSWAPIFNOT2"; +(int, cell, int) udict_get_max_ref?(cell dict, int key_len) asm (-> 1 0 2) "DICTUMAXREF" "NULLSWAPIFNOT2"; +(int, slice, int) idict_get_min?(cell dict, int key_len) asm (-> 1 0 2) "DICTIMIN" "NULLSWAPIFNOT2"; +(int, slice, int) idict_get_max?(cell dict, int key_len) asm (-> 1 0 2) "DICTIMAX" "NULLSWAPIFNOT2"; +(int, cell, int) idict_get_min_ref?(cell dict, int key_len) asm (-> 1 0 2) "DICTIMINREF" "NULLSWAPIFNOT2"; +(int, cell, int) idict_get_max_ref?(cell dict, int key_len) asm (-> 1 0 2) "DICTIMAXREF" "NULLSWAPIFNOT2"; +(int, slice, int) udict_get_next?(cell dict, int key_len, int pivot) asm(pivot dict key_len -> 1 0 2) "DICTUGETNEXT" "NULLSWAPIFNOT2"; +(int, slice, int) udict_get_nexteq?(cell dict, int key_len, int pivot) asm(pivot dict key_len -> 1 0 2) "DICTUGETNEXTEQ" "NULLSWAPIFNOT2"; +(int, slice, int) udict_get_prev?(cell dict, int key_len, int pivot) asm(pivot dict key_len -> 1 0 2) "DICTUGETPREV" "NULLSWAPIFNOT2"; +(int, slice, int) udict_get_preveq?(cell dict, int key_len, int pivot) asm(pivot dict key_len -> 1 0 2) "DICTUGETPREVEQ" "NULLSWAPIFNOT2"; +(int, slice, int) idict_get_next?(cell dict, int key_len, int pivot) asm(pivot dict key_len -> 1 0 2) "DICTIGETNEXT" "NULLSWAPIFNOT2"; +(int, slice, int) idict_get_nexteq?(cell dict, int key_len, int pivot) asm(pivot dict key_len -> 1 0 2) "DICTIGETNEXTEQ" "NULLSWAPIFNOT2"; +(int, slice, int) idict_get_prev?(cell dict, int key_len, int pivot) asm(pivot dict key_len -> 1 0 2) "DICTIGETPREV" "NULLSWAPIFNOT2"; +(int, slice, int) idict_get_preveq?(cell dict, int key_len, int pivot) asm(pivot dict key_len -> 1 0 2) "DICTIGETPREVEQ" "NULLSWAPIFNOT2"; + +;;; Creates an empty dictionary, which is actually a null value. Equivalent to PUSHNULL +cell new_dict() asm "NEWDICT"; +;;; Checks whether a dictionary is empty. Equivalent to cell_null?. +int dict_empty?(cell c) asm "DICTEMPTY"; + + +{- Prefix dictionary primitives -} +(slice, slice, slice, int) pfxdict_get?(cell dict, int key_len, slice key) asm(key dict key_len) "PFXDICTGETQ" "NULLSWAPIFNOT2"; +(cell, int) pfxdict_set?(cell dict, int key_len, slice key, slice value) asm(value key dict key_len) "PFXDICTSET"; +(cell, int) pfxdict_delete?(cell dict, int key_len, slice key) asm(key dict key_len) "PFXDICTDEL"; + +;;; Returns the value of the global configuration parameter with integer index `i` as a `cell` or `null` value. +cell config_param(int x) asm "CONFIGOPTPARAM"; +;;; Checks whether c is a null. Note, that Tolk also has polymorphic null? built-in. +int cell_null?(cell c) asm "ISNULL"; + +;;; Creates an output action which would reserve exactly amount nanotoncoins (if mode = 0), at most amount nanotoncoins (if mode = 2), or all but amount nanotoncoins (if mode = 1 or mode = 3), from the remaining balance of the account. It is roughly equivalent to creating an outbound message carrying amount nanotoncoins (or b − amount nanotoncoins, where b is the remaining balance) to oneself, so that the subsequent output actions would not be able to spend more money than the remainder. Bit +2 in mode means that the external action does not fail if the specified amount cannot be reserved; instead, all remaining balance is reserved. Bit +8 in mode means `amount <- -amount` before performing any further actions. Bit +4 in mode means that amount is increased by the original balance of the current account (before the compute phase), including all extra currencies, before performing any other checks and actions. Currently, amount must be a non-negative integer, and mode must be in the range 0..15. +() raw_reserve(int amount, int mode) impure asm "RAWRESERVE"; +;;; Similar to raw_reserve, but also accepts a dictionary extra_amount (represented by a cell or null) with extra currencies. In this way currencies other than TonCoin can be reserved. +() raw_reserve_extra(int amount, cell extra_amount, int mode) impure asm "RAWRESERVEX"; +;;; Sends a raw message contained in msg, which should contain a correctly serialized object Message X, with the only exception that the source address is allowed to have dummy value addr_none (to be automatically replaced with the current smart contract address), and ihr_fee, fwd_fee, created_lt and created_at fields can have arbitrary values (to be rewritten with correct values during the action phase of the current transaction). Integer parameter mode contains the flags. Currently mode = 0 is used for ordinary messages; mode = 128 is used for messages that are to carry all the remaining balance of the current smart contract (instead of the value originally indicated in the message); mode = 64 is used for messages that carry all the remaining value of the inbound message in addition to the value initially indicated in the new message (if bit 0 is not set, the gas fees are deducted from this amount); mode' = mode + 1 means that the sender wants to pay transfer fees separately; mode' = mode + 2 means that any errors arising while processing this message during the action phase should be ignored. Finally, mode' = mode + 32 means that the current account must be destroyed if its resulting balance is zero. This flag is usually employed together with +128. +() send_raw_message(cell msg, int mode) impure asm "SENDRAWMSG"; +;;; Creates an output action that would change this smart contract code to that given by cell new_code. Notice that this change will take effect only after the successful termination of the current run of the smart contract +() set_code(cell new_code) impure asm "SETCODE"; + +;;; Generates a new pseudo-random unsigned 256-bit integer x. The algorithm is as follows: if r is the old value of the random seed, considered as a 32-byte array (by constructing the big-endian representation of an unsigned 256-bit integer), then its sha512(r) is computed; the first 32 bytes of this hash are stored as the new value r' of the random seed, and the remaining 32 bytes are returned as the next random value x. +int random() impure asm "RANDU256"; +;;; Generates a new pseudo-random integer z in the range 0..range−1 (or range..−1, if range < 0). More precisely, an unsigned random value x is generated as in random; then z := x * range / 2^256 is computed. +int rand(int range) impure asm "RAND"; +;;; Returns the current random seed as an unsigned 256-bit Integer. +int get_seed() impure asm "RANDSEED"; +;;; Sets the random seed to unsigned 256-bit seed. +() set_seed(int) impure asm "SETRAND"; +;;; Mixes unsigned 256-bit integer x into the random seed r by setting the random seed to sha256 of the concatenation of two 32-byte strings: the first with the big-endian representation of the old seed r, and the second with the big-endian representation of x. +() randomize(int x) impure asm "ADDRAND"; +;;; Equivalent to randomize(cur_lt());. +() randomize_lt() impure asm "LTIME" "ADDRAND"; + +;;; Checks whether the data parts of two slices coinside +int equal_slice_bits (slice a, slice b) asm "SDEQ"; + +;;; Concatenates two builders +builder store_builder(builder to, builder from) asm "STBR"; diff --git a/tolk/CMakeLists.txt b/tolk/CMakeLists.txt new file mode 100644 index 000000000..54aaf8d21 --- /dev/null +++ b/tolk/CMakeLists.txt @@ -0,0 +1,51 @@ +cmake_minimum_required(VERSION 3.5 FATAL_ERROR) + +set(TOLK_SOURCE + srcread.cpp + lexer.cpp + symtable.cpp + keywords.cpp + unify-types.cpp + parse-tolk.cpp + abscode.cpp + gen-abscode.cpp + analyzer.cpp + asmops.cpp + builtins.cpp + stack-transform.cpp + optimize.cpp + codegen.cpp + tolk.cpp +) + +add_executable(tolk tolk-main.cpp ${TOLK_SOURCE}) +target_include_directories(tolk PUBLIC $) +target_link_libraries(tolk PUBLIC git ton_crypto) # todo replace with ton_crypto_core in the future +if (WINGETOPT_FOUND) + target_link_libraries_system(tolk wingetopt) +endif () + +if (USE_EMSCRIPTEN) + add_executable(tolkfiftlib tolk-wasm.cpp ${TOLK_SOURCE}) + target_include_directories(tolkfiftlib PUBLIC $) + target_link_libraries(tolkfiftlib PUBLIC fift-lib git) + target_link_options(tolkfiftlib PRIVATE + -sEXPORTED_RUNTIME_METHODS=FS,ccall,cwrap,UTF8ToString,stringToUTF8,lengthBytesUTF8,addFunction,removeFunction,setValue + -sEXPORTED_FUNCTIONS=_tolk_compile,_version,_malloc,_free,_setThrew + -sEXPORT_NAME=CompilerModule + -sERROR_ON_UNDEFINED_SYMBOLS=0 + -sFILESYSTEM=1 -lnodefs.js + -Oz + -sIGNORE_MISSING_MAIN=1 + -sAUTO_NATIVE_LIBRARIES=0 + -sMODULARIZE=1 + -sTOTAL_MEMORY=33554432 + -sALLOW_MEMORY_GROWTH=1 + -sALLOW_TABLE_GROWTH=1 + --embed-file ${CMAKE_CURRENT_SOURCE_DIR}/../crypto/fift/lib@/fiftlib + -fexceptions + ) + target_compile_options(tolkfiftlib PRIVATE -fexceptions -fno-stack-protector) +endif () + +install(TARGETS tolk RUNTIME DESTINATION bin) diff --git a/tolk/abscode.cpp b/tolk/abscode.cpp new file mode 100644 index 000000000..7dd64bd07 --- /dev/null +++ b/tolk/abscode.cpp @@ -0,0 +1,526 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "tolk.h" + +namespace tolk { + +/* + * + * ABSTRACT CODE + * + */ + +TmpVar::TmpVar(var_idx_t _idx, int _cls, TypeExpr* _type, SymDef* sym, const SrcLocation* loc) + : v_type(_type), idx(_idx), cls(_cls), coord(0) { + if (sym) { + name = sym->sym_idx; + sym->value->idx = _idx; + } + if (loc) { + where = std::make_unique(*loc); + } + if (!_type) { + v_type = TypeExpr::new_hole(); + } + if (cls == _Named) { + undefined = true; + } +} + +void TmpVar::set_location(const SrcLocation& loc) { + if (where) { + *where = loc; + } else { + where = std::make_unique(loc); + } +} + +void TmpVar::dump(std::ostream& os) const { + show(os); + os << " : " << v_type << " (width "; + v_type->show_width(os); + os << ")"; + if (coord > 0) { + os << " = _" << (coord >> 8) << '.' << (coord & 255); + } else if (coord < 0) { + int n = (~coord >> 8), k = (~coord & 0xff); + if (k) { + os << " = (_" << n << ".._" << (n + k - 1) << ")"; + } else { + os << " = ()"; + } + } + os << std::endl; +} + +void TmpVar::show(std::ostream& os, int omit_idx) const { + if (cls & _Named) { + os << symbols.get_name(name); + if (omit_idx && (omit_idx >= 2 || (cls & _UniqueName))) { + return; + } + } + os << '_' << idx; +} + +std::ostream& operator<<(std::ostream& os, const TmpVar& var) { + var.show(os); + return os; +} + +void VarDescr::show_value(std::ostream& os) const { + if (val & _Int) { + os << 'i'; + } + if (val & _Const) { + os << 'c'; + } + if (val & _Zero) { + os << '0'; + } + if (val & _NonZero) { + os << '!'; + } + if (val & _Pos) { + os << '>'; + } + if (val & _Neg) { + os << '<'; + } + if (val & _Bool) { + os << 'B'; + } + if (val & _Bit) { + os << 'b'; + } + if (val & _Even) { + os << 'E'; + } + if (val & _Odd) { + os << 'O'; + } + if (val & _Finite) { + os << 'f'; + } + if (val & _Nan) { + os << 'N'; + } + if (int_const.not_null()) { + os << '=' << int_const; + } +} + +void VarDescr::show(std::ostream& os, const char* name) const { + if (flags & _Last) { + os << '*'; + } + if (flags & _Unused) { + os << '?'; + } + if (name) { + os << name; + } + os << '_' << idx; + show_value(os); +} + +void VarDescr::set_const(long long value) { + return set_const(td::make_refint(value)); +} + +void VarDescr::set_const(td::RefInt256 value) { + int_const = std::move(value); + if (!int_const->signed_fits_bits(257)) { + int_const.write().invalidate(); + } + val = _Const | _Int; + int s = sgn(int_const); + if (s < -1) { + val |= _Nan | _NonZero; + } else if (s < 0) { + val |= _NonZero | _Neg | _Finite; + if (*int_const == -1) { + val |= _Bool; + } + } else if (s > 0) { + val |= _NonZero | _Pos | _Finite; + } else if (!s) { + //if (*int_const == 1) { + // val |= _Bit; + //} + val |= _Zero | _Neg | _Pos | _Finite | _Bool | _Bit; + } + if (val & _Finite) { + val |= int_const->get_bit(0) ? _Odd : _Even; + } +} + +void VarDescr::set_const(std::string value) { + str_const = value; + val = _Const; +} + +void VarDescr::set_const_nan() { + set_const(td::make_refint()); +} + +void VarDescr::operator|=(const VarDescr& y) { + val &= y.val; + if (is_int_const() && y.is_int_const() && cmp(int_const, y.int_const) != 0) { + val &= ~_Const; + } + if (!(val & _Const)) { + int_const.clear(); + } +} + +void VarDescr::operator&=(const VarDescr& y) { + val |= y.val; + if (y.int_const.not_null() && int_const.is_null()) { + int_const = y.int_const; + } +} + +void VarDescr::set_value(const VarDescr& y) { + val = y.val; + int_const = y.int_const; +} + +void VarDescr::set_value(VarDescr&& y) { + val = y.val; + int_const = std::move(y.int_const); +} + +void VarDescr::clear_value() { + val = 0; + int_const.clear(); +} + +void VarDescrList::show(std::ostream& os) const { + if (unreachable) { + os << " "; + } + os << "["; + for (const auto& v : list) { + os << ' ' << v; + } + os << " ]\n"; +} + +void Op::flags_set_clear(int set, int clear) { + flags = (flags | set) & ~clear; + for (auto& op : block0) { + op.flags_set_clear(set, clear); + } + for (auto& op : block1) { + op.flags_set_clear(set, clear); + } +} +void Op::split_vars(const std::vector& vars) { + split_var_list(left, vars); + split_var_list(right, vars); + for (auto& op : block0) { + op.split_vars(vars); + } + for (auto& op : block1) { + op.split_vars(vars); + } +} + +void Op::split_var_list(std::vector& var_list, const std::vector& vars) { + int new_size = 0, changes = 0; + for (var_idx_t v : var_list) { + int c = vars.at(v).coord; + if (c < 0) { + ++changes; + new_size += (~c & 0xff); + } else { + ++new_size; + } + } + if (!changes) { + return; + } + std::vector new_var_list; + new_var_list.reserve(new_size); + for (var_idx_t v : var_list) { + int c = vars.at(v).coord; + if (c < 0) { + int n = (~c >> 8), k = (~c & 0xff); + while (k-- > 0) { + new_var_list.push_back(n++); + } + } else { + new_var_list.push_back(v); + } + } + var_list = std::move(new_var_list); +} + +void Op::show(std::ostream& os, const std::vector& vars, std::string pfx, int mode) const { + if (mode & 2) { + os << pfx << " ["; + for (const auto& v : var_info.list) { + os << ' '; + if (v.flags & VarDescr::_Last) { + os << '*'; + } + if (v.flags & VarDescr::_Unused) { + os << '?'; + } + os << vars[v.idx]; + if (mode & 4) { + os << ':'; + v.show_value(os); + } + } + os << " ]\n"; + } + std::string dis = disabled() ? " " : ""; + if (noreturn()) { + dis += " "; + } + if (!is_pure()) { + dis += " "; + } + switch (cl) { + case _Undef: + os << pfx << dis << "???\n"; + break; + case _Nop: + os << pfx << dis << "NOP\n"; + break; + case _Call: + os << pfx << dis << "CALL: "; + show_var_list(os, left, vars); + os << " := " << (fun_ref ? fun_ref->name() : "(null)") << " "; + if ((mode & 4) && args.size() == right.size()) { + show_var_list(os, args, vars); + } else { + show_var_list(os, right, vars); + } + os << std::endl; + break; + case _CallInd: + os << pfx << dis << "CALLIND: "; + show_var_list(os, left, vars); + os << " := EXEC "; + show_var_list(os, right, vars); + os << std::endl; + break; + case _Let: + os << pfx << dis << "LET "; + show_var_list(os, left, vars); + os << " := "; + show_var_list(os, right, vars); + os << std::endl; + break; + case _Tuple: + os << pfx << dis << "MKTUPLE "; + show_var_list(os, left, vars); + os << " := "; + show_var_list(os, right, vars); + os << std::endl; + break; + case _UnTuple: + os << pfx << dis << "UNTUPLE "; + show_var_list(os, left, vars); + os << " := "; + show_var_list(os, right, vars); + os << std::endl; + break; + case _IntConst: + os << pfx << dis << "CONST "; + show_var_list(os, left, vars); + os << " := " << int_const << std::endl; + break; + case _SliceConst: + os << pfx << dis << "SCONST "; + show_var_list(os, left, vars); + os << " := " << str_const << std::endl; + break; + case _Import: + os << pfx << dis << "IMPORT "; + show_var_list(os, left, vars); + os << std::endl; + break; + case _Return: + os << pfx << dis << "RETURN "; + show_var_list(os, left, vars); + os << std::endl; + break; + case _GlobVar: + os << pfx << dis << "GLOBVAR "; + show_var_list(os, left, vars); + os << " := " << (fun_ref ? fun_ref->name() : "(null)") << std::endl; + break; + case _SetGlob: + os << pfx << dis << "SETGLOB "; + os << (fun_ref ? fun_ref->name() : "(null)") << " := "; + show_var_list(os, right, vars); + os << std::endl; + break; + case _Repeat: + os << pfx << dis << "REPEAT "; + show_var_list(os, left, vars); + os << ' '; + show_block(os, block0.get(), vars, pfx, mode); + os << std::endl; + break; + case _If: + os << pfx << dis << "IF "; + show_var_list(os, left, vars); + os << ' '; + show_block(os, block0.get(), vars, pfx, mode); + os << " ELSE "; + show_block(os, block1.get(), vars, pfx, mode); + os << std::endl; + break; + case _While: + os << pfx << dis << "WHILE "; + show_var_list(os, left, vars); + os << ' '; + show_block(os, block0.get(), vars, pfx, mode); + os << " DO "; + show_block(os, block1.get(), vars, pfx, mode); + os << std::endl; + break; + case _Until: + os << pfx << dis << "UNTIL "; + show_var_list(os, left, vars); + os << ' '; + show_block(os, block0.get(), vars, pfx, mode); + os << std::endl; + break; + case _Again: + os << pfx << dis << "AGAIN "; + show_var_list(os, left, vars); + os << ' '; + show_block(os, block0.get(), vars, pfx, mode); + os << std::endl; + break; + default: + os << pfx << dis << " "; + show_var_list(os, left, vars); + os << " -- "; + show_var_list(os, right, vars); + os << std::endl; + break; + } +} + +void Op::show_var_list(std::ostream& os, const std::vector& idx_list, + const std::vector& vars) const { + if (!idx_list.size()) { + os << "()"; + } else if (idx_list.size() == 1) { + os << vars.at(idx_list[0]); + } else { + os << "(" << vars.at(idx_list[0]); + for (std::size_t i = 1; i < idx_list.size(); i++) { + os << "," << vars.at(idx_list[i]); + } + os << ")"; + } +} + +void Op::show_var_list(std::ostream& os, const std::vector& list, const std::vector& vars) const { + auto n = list.size(); + if (!n) { + os << "()"; + } else { + os << "( "; + for (std::size_t i = 0; i < list.size(); i++) { + if (i) { + os << ", "; + } + if (list[i].is_unused()) { + os << '?'; + } + os << vars.at(list[i].idx) << ':'; + list[i].show_value(os); + } + os << " )"; + } +} + +void Op::show_block(std::ostream& os, const Op* block, const std::vector& vars, std::string pfx, int mode) { + os << "{" << std::endl; + std::string pfx2 = pfx + " "; + for (const Op& op : block) { + op.show(os, vars, pfx2, mode); + } + os << pfx << "}"; +} + +void CodeBlob::flags_set_clear(int set, int clear) { + for (auto& op : ops) { + op.flags_set_clear(set, clear); + } +} + +std::ostream& operator<<(std::ostream& os, const CodeBlob& code) { + code.print(os); + return os; +} + +// flags: +1 = show variable definition locations; +2 = show vars after each op; +4 = show var abstract value info after each op; +8 = show all variables at start +void CodeBlob::print(std::ostream& os, int flags) const { + os << "CODE BLOB: " << var_cnt << " variables, " << in_var_cnt << " input\n"; + if ((flags & 8) != 0) { + for (const auto& var : vars) { + var.dump(os); + if (var.where && (flags & 1) != 0) { + var.where->show(os); + os << " defined here:\n"; + var.where->show_context(os); + } + } + } + os << "------- BEGIN --------\n"; + for (const auto& op : ops) { + op.show(os, vars, "", flags); + } + os << "-------- END ---------\n\n"; +} + +var_idx_t CodeBlob::create_var(int cls, TypeExpr* var_type, SymDef* sym, const SrcLocation* location) { + vars.emplace_back(var_cnt, cls, var_type, sym, location); + if (sym) { + sym->value->idx = var_cnt; + } + return var_cnt++; +} + +bool CodeBlob::import_params(FormalArgList arg_list) { + if (var_cnt || in_var_cnt || op_cnt) { + return false; + } + std::vector list; + for (const auto& par : arg_list) { + TypeExpr* arg_type; + SymDef* arg_sym; + SrcLocation arg_loc; + std::tie(arg_type, arg_sym, arg_loc) = par; + list.push_back(create_var(arg_sym ? (TmpVar::_In | TmpVar::_Named) : TmpVar::_In, arg_type, arg_sym, &arg_loc)); + } + emplace_back(loc, Op::_Import, list); + in_var_cnt = var_cnt; + return true; +} + +} // namespace tolk diff --git a/tolk/analyzer.cpp b/tolk/analyzer.cpp new file mode 100644 index 000000000..ea41a103c --- /dev/null +++ b/tolk/analyzer.cpp @@ -0,0 +1,916 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "tolk.h" + +namespace tolk { + +/* + * + * ANALYZE AND PREPROCESS ABSTRACT CODE + * + */ + +void CodeBlob::simplify_var_types() { + for (TmpVar& var : vars) { + TypeExpr::remove_indirect(var.v_type); + var.v_type->recompute_width(); + } +} + +int CodeBlob::split_vars(bool strict) { + int n = var_cnt, changes = 0; + for (int j = 0; j < var_cnt; j++) { + TmpVar& var = vars[j]; + if (strict && var.v_type->minw != var.v_type->maxw) { + throw ParseError{var.where.get(), "variable does not have fixed width, cannot manipulate it"}; + } + std::vector comp_types; + int k = var.v_type->extract_components(comp_types); + tolk_assert(k <= 254 && n <= 0x7fff00); + tolk_assert((unsigned)k == comp_types.size()); + if (k != 1) { + var.coord = ~((n << 8) + k); + for (int i = 0; i < k; i++) { + auto v = create_var(vars[j].cls, comp_types[i], 0, vars[j].where.get()); + tolk_assert(v == n + i); + tolk_assert(vars[v].idx == v); + vars[v].name = vars[j].name; + vars[v].coord = ((int)j << 8) + i + 1; + } + n += k; + ++changes; + } else if (strict && var.v_type->minw != 1) { + throw ParseError{var.where.get(), + "cannot work with variable or variable component of width greater than one"}; + } + } + if (!changes) { + return 0; + } + for (auto& op : ops) { + op.split_vars(vars); + } + return changes; +} + +bool CodeBlob::compute_used_code_vars() { + VarDescrList empty_var_info; + return compute_used_code_vars(ops, empty_var_info, true); +} + +bool CodeBlob::compute_used_code_vars(std::unique_ptr& ops_ptr, const VarDescrList& var_info, bool edit) const { + tolk_assert(ops_ptr); + if (!ops_ptr->next) { + tolk_assert(ops_ptr->cl == Op::_Nop); + return ops_ptr->set_var_info(var_info); + } + // here and below, bitwise | (not logical ||) are used to execute both left and right parts + return static_cast(compute_used_code_vars(ops_ptr->next, var_info, edit)) | + static_cast(ops_ptr->compute_used_vars(*this, edit)); +} + +bool operator==(const VarDescrList& x, const VarDescrList& y) { + if (x.size() != y.size()) { + return false; + } + for (std::size_t i = 0; i < x.size(); i++) { + if (x.list[i].idx != y.list[i].idx || x.list[i].flags != y.list[i].flags) { + return false; + } + } + return true; +} + +bool same_values(const VarDescr& x, const VarDescr& y) { + if (x.val != y.val || x.int_const.is_null() != y.int_const.is_null()) { + return false; + } + if (x.int_const.not_null() && cmp(x.int_const, y.int_const) != 0) { + return false; + } + return true; +} + +bool same_values(const VarDescrList& x, const VarDescrList& y) { + if (x.size() != y.size()) { + return false; + } + for (std::size_t i = 0; i < x.size(); i++) { + if (x.list[i].idx != y.list[i].idx || !same_values(x.list[i], y.list[i])) { + return false; + } + } + return true; +} + +bool Op::set_var_info(const VarDescrList& new_var_info) { + if (var_info == new_var_info) { + return false; + } + var_info = new_var_info; + return true; +} + +bool Op::set_var_info(VarDescrList&& new_var_info) { + if (var_info == new_var_info) { + return false; + } + var_info = std::move(new_var_info); + return true; +} + +bool Op::set_var_info_except(const VarDescrList& new_var_info, const std::vector& var_list) { + if (!var_list.size()) { + return set_var_info(new_var_info); + } + VarDescrList tmp_info{new_var_info}; + tmp_info -= var_list; + return set_var_info(tmp_info); +} + +bool Op::set_var_info_except(VarDescrList&& new_var_info, const std::vector& var_list) { + if (var_list.size()) { + new_var_info -= var_list; + } + return set_var_info(std::move(new_var_info)); +} +std::vector sort_unique_vars(const std::vector& var_list) { + std::vector vars{var_list}, unique_vars; + std::sort(vars.begin(), vars.end()); + vars.erase(std::unique(vars.begin(), vars.end()), vars.end()); + return vars; +} + +VarDescr* VarDescrList::operator[](var_idx_t idx) { + auto it = std::lower_bound(list.begin(), list.end(), idx); + return it != list.end() && it->idx == idx ? &*it : nullptr; +} + +const VarDescr* VarDescrList::operator[](var_idx_t idx) const { + auto it = std::lower_bound(list.begin(), list.end(), idx); + return it != list.end() && it->idx == idx ? &*it : nullptr; +} + +std::size_t VarDescrList::count(const std::vector idx_list) const { + std::size_t res = 0; + for (var_idx_t idx : idx_list) { + if (operator[](idx)) { + ++res; + } + } + return res; +} + +std::size_t VarDescrList::count_used(const std::vector idx_list) const { + std::size_t res = 0; + for (var_idx_t idx : idx_list) { + auto v = operator[](idx); + if (v && !v->is_unused()) { + ++res; + } + } + return res; +} + +VarDescrList& VarDescrList::operator-=(var_idx_t idx) { + auto it = std::lower_bound(list.begin(), list.end(), idx); + if (it != list.end() && it->idx == idx) { + list.erase(it); + } + return *this; +} + +VarDescrList& VarDescrList::operator-=(const std::vector& idx_list) { + for (var_idx_t idx : idx_list) { + *this -= idx; + } + return *this; +} + +VarDescrList& VarDescrList::add_var(var_idx_t idx, bool unused) { + auto it = std::lower_bound(list.begin(), list.end(), idx); + if (it == list.end() || it->idx != idx) { + list.emplace(it, idx, VarDescr::_Last | (unused ? VarDescr::_Unused : 0)); + } else if (it->is_unused() && !unused) { + it->clear_unused(); + } + return *this; +} + +VarDescrList& VarDescrList::add_vars(const std::vector& idx_list, bool unused) { + for (var_idx_t idx : idx_list) { + add_var(idx, unused); + } + return *this; +} + +VarDescr& VarDescrList::add(var_idx_t idx) { + auto it = std::lower_bound(list.begin(), list.end(), idx); + if (it == list.end() || it->idx != idx) { + it = list.emplace(it, idx); + } + return *it; +} + +VarDescr& VarDescrList::add_newval(var_idx_t idx) { + auto it = std::lower_bound(list.begin(), list.end(), idx); + if (it == list.end() || it->idx != idx) { + return *list.emplace(it, idx); + } else { + it->clear_value(); + return *it; + } +} + +VarDescrList& VarDescrList::clear_last() { + for (auto& var : list) { + if (var.flags & VarDescr::_Last) { + var.flags &= ~VarDescr::_Last; + } + } + return *this; +} + +VarDescrList VarDescrList::operator+(const VarDescrList& y) const { + VarDescrList res; + auto it1 = list.cbegin(); + auto it2 = y.list.cbegin(); + while (it1 != list.cend() && it2 != y.list.cend()) { + if (it1->idx < it2->idx) { + res.list.push_back(*it1++); + } else if (it1->idx > it2->idx) { + res.list.push_back(*it2++); + } else { + res.list.push_back(*it1++); + res.list.back() += *it2++; + } + } + while (it1 != list.cend()) { + res.list.push_back(*it1++); + } + while (it2 != y.list.cend()) { + res.list.push_back(*it2++); + } + return res; +} + +VarDescrList& VarDescrList::operator+=(const VarDescrList& y) { + return *this = *this + y; +} + +VarDescrList VarDescrList::operator|(const VarDescrList& y) const { + if (y.unreachable) { + return *this; + } + if (unreachable) { + return y; + } + VarDescrList res; + auto it1 = list.cbegin(); + auto it2 = y.list.cbegin(); + while (it1 != list.cend() && it2 != y.list.cend()) { + if (it1->idx < it2->idx) { + it1++; + } else if (it1->idx > it2->idx) { + it2++; + } else { + res.list.push_back(*it1++); + res.list.back() |= *it2++; + } + } + return res; +} + +VarDescrList& VarDescrList::operator|=(const VarDescrList& y) { + if (y.unreachable) { + return *this; + } else { + return *this = *this | y; + } +} + +VarDescrList& VarDescrList::operator&=(const VarDescrList& values) { + for (const VarDescr& vd : values.list) { + VarDescr* item = operator[](vd.idx); + if (item) { + *item &= vd; + } + } + unreachable |= values.unreachable; + return *this; +} + +VarDescrList& VarDescrList::import_values(const VarDescrList& values) { + if (values.unreachable) { + set_unreachable(); + } else + for (auto& vd : list) { + auto new_vd = values[vd.idx]; + if (new_vd) { + vd.set_value(*new_vd); + } else { + vd.clear_value(); + } + } + return *this; +} + +bool Op::std_compute_used_vars(bool disabled) { + // left = OP right + // var_info := (var_info - left) + right + VarDescrList new_var_info{next->var_info}; + new_var_info -= left; + new_var_info.clear_last(); + if (args.size() == right.size() && !disabled) { + for (const VarDescr& arg : args) { + new_var_info.add_var(arg.idx, arg.is_unused()); + } + } else { + new_var_info.add_vars(right, disabled); + } + return set_var_info(std::move(new_var_info)); +} + +bool Op::compute_used_vars(const CodeBlob& code, bool edit) { + tolk_assert(next); + const VarDescrList& next_var_info = next->var_info; + if (cl == _Nop) { + return set_var_info_except(next_var_info, left); + } + switch (cl) { + case _IntConst: + case _SliceConst: + case _GlobVar: + case _Call: + case _CallInd: + case _Tuple: + case _UnTuple: { + // left = EXEC right; + if (!next_var_info.count_used(left) && is_pure()) { + // all variables in `left` are not needed + if (edit) { + disable(); + } + return std_compute_used_vars(true); + } + return std_compute_used_vars(); + } + case _SetGlob: { + // GLOB = right + if (right.empty() && edit) { + disable(); + } + return std_compute_used_vars(right.empty()); + } + case _Let: { + // left = right + std::size_t cnt = next_var_info.count_used(left); + tolk_assert(left.size() == right.size()); + auto l_it = left.cbegin(), r_it = right.cbegin(); + VarDescrList new_var_info{next_var_info}; + new_var_info -= left; + new_var_info.clear_last(); + std::vector new_left, new_right; + for (; l_it < left.cend(); ++l_it, ++r_it) { + if (std::find(l_it + 1, left.cend(), *l_it) == left.cend()) { + auto p = next_var_info[*l_it]; + new_var_info.add_var(*r_it, edit && (!p || p->is_unused())); + new_left.push_back(*l_it); + new_right.push_back(*r_it); + } + } + if (new_left.size() < left.size()) { + left = std::move(new_left); + right = std::move(new_right); + } + if (!cnt && edit) { + // all variables in `left` are not needed + disable(); + } + return set_var_info(std::move(new_var_info)); + } + case _Return: { + // return left + if (var_info.count(left) == left.size()) { + return false; + } + std::vector unique_vars = sort_unique_vars(left); + var_info.list.clear(); + for (var_idx_t i : unique_vars) { + var_info.list.emplace_back(i, VarDescr::_Last); + } + return true; + } + case _Import: { + // import left + std::vector unique_vars = sort_unique_vars(left); + var_info.list.clear(); + for (var_idx_t i : unique_vars) { + var_info.list.emplace_back(i, next_var_info[i] ? 0 : VarDescr::_Last); + } + return true; + } + case _If: { + // if (left) then block0 else block1 + // VarDescrList nx_var_info = next_var_info; + // nx_var_info.clear_last(); + code.compute_used_code_vars(block0, next_var_info, edit); + VarDescrList merge_info; + if (block1) { + code.compute_used_code_vars(block1, next_var_info, edit); + merge_info = block0->var_info + block1->var_info; + } else { + merge_info = block0->var_info + next_var_info; + } + merge_info.clear_last(); + merge_info += left; + return set_var_info(std::move(merge_info)); + } + case _While: { + // while (block0 || left) block1; + // ... block0 left { block1 block0 left } next + VarDescrList new_var_info{next_var_info}; + bool changes = false; + do { + VarDescrList after_cond{new_var_info}; + after_cond += left; + code.compute_used_code_vars(block0, after_cond, changes); + code.compute_used_code_vars(block1, block0->var_info, changes); + std::size_t n = new_var_info.size(); + new_var_info += block1->var_info; + new_var_info.clear_last(); + if (changes) { + break; + } + changes = (new_var_info.size() == n); + } while (changes <= edit); + new_var_info += left; + code.compute_used_code_vars(block0, new_var_info, edit); + return set_var_info(block0->var_info); + } + case _Until: { + // until (block0 || left); + // .. { block0 left } block0 left next + VarDescrList after_cond_first{next_var_info}; + after_cond_first += left; + code.compute_used_code_vars(block0, after_cond_first, false); + VarDescrList new_var_info{block0->var_info}; + bool changes = false; + do { + VarDescrList after_cond{new_var_info}; + after_cond += next_var_info; + after_cond += left; + code.compute_used_code_vars(block0, after_cond, changes); + std::size_t n = new_var_info.size(); + new_var_info += block0->var_info; + new_var_info.clear_last(); + if (changes) { + break; + } + changes = (new_var_info.size() == n); + } while (changes <= edit); + return set_var_info(std::move(new_var_info) + next_var_info); + } + case _Repeat: { + // repeat (left) block0 + // left { block0 } next + VarDescrList new_var_info{next_var_info}; + bool changes = false; + do { + code.compute_used_code_vars(block0, new_var_info, changes); + std::size_t n = new_var_info.size(); + new_var_info += block0->var_info; + new_var_info.clear_last(); + if (changes) { + break; + } + changes = (new_var_info.size() == n); + } while (changes <= edit); + tolk_assert(left.size() == 1); + bool last = new_var_info.count_used(left) == 0; + new_var_info += left; + if (last) { + new_var_info[left[0]]->flags |= VarDescr::_Last; + } + return set_var_info(std::move(new_var_info)); + } + case _Again: { + // for(;;) block0 + // { block0 } + VarDescrList new_var_info; + bool changes = false; + do { + code.compute_used_code_vars(block0, new_var_info, changes); + std::size_t n = new_var_info.size(); + new_var_info += block0->var_info; + new_var_info.clear_last(); + if (changes) { + break; + } + changes = (new_var_info.size() == n); + } while (changes <= edit); + return set_var_info(std::move(new_var_info)); + } + case _TryCatch: { + code.compute_used_code_vars(block0, next_var_info, edit); + code.compute_used_code_vars(block1, next_var_info, edit); + VarDescrList merge_info = block0->var_info + block1->var_info + next_var_info; + merge_info -= left; + merge_info.clear_last(); + return set_var_info(std::move(merge_info)); + } + default: + std::cerr << "fatal: unknown operation in compute_used_vars()\n"; + throw ParseError{where, "unknown operation"}; + } +} + +bool prune_unreachable(std::unique_ptr& ops) { + if (!ops) { + return true; + } + Op& op = *ops; + if (op.cl == Op::_Nop) { + if (op.next) { + ops = std::move(op.next); + return prune_unreachable(ops); + } + return true; + } + bool reach; + switch (op.cl) { + case Op::_IntConst: + case Op::_SliceConst: + case Op::_GlobVar: + case Op::_SetGlob: + case Op::_Call: + case Op::_CallInd: + case Op::_Tuple: + case Op::_UnTuple: + case Op::_Import: + reach = true; + break; + case Op::_Let: { + reach = true; + break; + } + case Op::_Return: + reach = false; + break; + case Op::_If: { + // if left then block0 else block1; ... + VarDescr* c_var = op.var_info[op.left[0]]; + if (c_var && c_var->always_true()) { + op.block0->last().next = std::move(op.next); + ops = std::move(op.block0); + return prune_unreachable(ops); + } else if (c_var && c_var->always_false()) { + op.block1->last().next = std::move(op.next); + ops = std::move(op.block1); + return prune_unreachable(ops); + } else { + reach = static_cast(prune_unreachable(op.block0)) | static_cast(prune_unreachable(op.block1)); + } + break; + } + case Op::_While: { + // while (block0 || left) block1; + if (!prune_unreachable(op.block0)) { + // computation of block0 never returns + ops = std::move(op.block0); + return prune_unreachable(ops); + } + VarDescr* c_var = op.block0->last().var_info[op.left[0]]; + if (c_var && c_var->always_false()) { + // block1 never executed + op.block0->last().next = std::move(op.next); + ops = std::move(op.block0); + return prune_unreachable(ops); + } else if (c_var && c_var->always_true()) { + if (!prune_unreachable(op.block1)) { + // block1 never returns + op.block0->last().next = std::move(op.block1); + ops = std::move(op.block0); + return false; + } + // infinite loop + op.cl = Op::_Again; + op.block0->last().next = std::move(op.block1); + op.left.clear(); + reach = false; + } else { + if (!prune_unreachable(op.block1)) { + // block1 never returns, while equivalent to block0 ; if left then block1 else next + op.cl = Op::_If; + std::unique_ptr new_op = std::move(op.block0); + op.block0 = std::move(op.block1); + op.block1 = std::make_unique(op.next->where, Op::_Nop); + new_op->last().next = std::move(ops); + ops = std::move(new_op); + } + reach = true; // block1 may be never executed + } + break; + } + case Op::_Repeat: { + // repeat (left) block0 + VarDescr* c_var = op.var_info[op.left[0]]; + if (c_var && c_var->always_nonpos()) { + // loop never executed + ops = std::move(op.next); + return prune_unreachable(ops); + } + if (c_var && c_var->always_pos()) { + if (!prune_unreachable(op.block0)) { + // block0 executed at least once, and it never returns + // replace code with block0 + ops = std::move(op.block0); + return false; + } + } else { + prune_unreachable(op.block0); + } + reach = true; + break; + } + case Op::_Until: + case Op::_Again: { + // do block0 until left; ... + if (!prune_unreachable(op.block0)) { + // block0 never returns, replace loop by block0 + ops = std::move(op.block0); + return false; + } + reach = (op.cl != Op::_Again); + break; + } + case Op::_TryCatch: { + reach = static_cast(prune_unreachable(op.block0)) | static_cast(prune_unreachable(op.block1)); + break; + } + default: + std::cerr << "fatal: unknown operation \n"; + throw ParseError{op.where, "unknown operation in prune_unreachable()"}; + } + if (reach) { + return prune_unreachable(op.next); + } else { + while (op.next->next) { + op.next = std::move(op.next->next); + } + return false; + } +} + +void CodeBlob::prune_unreachable_code() { + if (prune_unreachable(ops)) { + throw ParseError{loc, "control reaches end of function"}; + } +} + +void CodeBlob::fwd_analyze() { + VarDescrList values; + tolk_assert(ops && ops->cl == Op::_Import); + for (var_idx_t i : ops->left) { + values += i; + if (vars[i].v_type->is_int()) { + values[i]->val |= VarDescr::_Int; + } + } + ops->fwd_analyze(values); +} + +void Op::prepare_args(VarDescrList values) { + if (args.size() != right.size()) { + args.clear(); + for (var_idx_t i : right) { + args.emplace_back(i); + } + } + for (std::size_t i = 0; i < right.size(); i++) { + const VarDescr* val = values[right[i]]; + if (val) { + args[i].set_value(*val); + // args[i].clear_unused(); + } else { + args[i].clear_value(); + } + args[i].clear_unused(); + } +} + +VarDescrList Op::fwd_analyze(VarDescrList values) { + var_info.import_values(values); + switch (cl) { + case _Nop: + case _Import: + break; + case _Return: + values.set_unreachable(); + break; + case _IntConst: { + values.add_newval(left[0]).set_const(int_const); + break; + } + case _SliceConst: { + values.add_newval(left[0]).set_const(str_const); + break; + } + case _Call: { + prepare_args(values); + auto func = dynamic_cast(fun_ref->value); + if (func) { + std::vector res; + res.reserve(left.size()); + for (var_idx_t i : left) { + res.emplace_back(i); + } + AsmOpList tmp; + func->compile(tmp, res, args, where); // abstract interpretation of res := f (args) + int j = 0; + for (var_idx_t i : left) { + values.add_newval(i).set_value(res[j++]); + } + } else { + for (var_idx_t i : left) { + values.add_newval(i); + } + } + break; + } + case _Tuple: + case _UnTuple: + case _GlobVar: + case _CallInd: { + for (var_idx_t i : left) { + values.add_newval(i); + } + break; + } + case _SetGlob: + break; + case _Let: { + std::vector old_val; + tolk_assert(left.size() == right.size()); + for (std::size_t i = 0; i < right.size(); i++) { + const VarDescr* ov = values[right[i]]; + if (!ov && verbosity >= 5) { + std::cerr << "FATAL: error in assignment at right component #" << i << " (no value for _" << right[i] << ")" + << std::endl; + for (auto x : left) { + std::cerr << '_' << x << " "; + } + std::cerr << "= "; + for (auto x : right) { + std::cerr << '_' << x << " "; + } + std::cerr << std::endl; + } + // tolk_assert(ov); + if (ov) { + old_val.push_back(*ov); + } else { + old_val.emplace_back(); + } + } + for (std::size_t i = 0; i < left.size(); i++) { + values.add_newval(left[i]).set_value(std::move(old_val[i])); + } + break; + } + case _If: { + VarDescrList val1 = block0->fwd_analyze(values); + VarDescrList val2 = block1 ? block1->fwd_analyze(std::move(values)) : std::move(values); + values = val1 | val2; + break; + } + case _Repeat: { + bool atl1 = (values[left[0]] && values[left[0]]->always_pos()); + VarDescrList next_values = block0->fwd_analyze(values); + while (true) { + VarDescrList new_values = values | next_values; + if (same_values(new_values, values)) { + break; + } + values = std::move(new_values); + next_values = block0->fwd_analyze(values); + } + if (atl1) { + values = std::move(next_values); + } + break; + } + case _While: { + auto values0 = values; + values = block0->fwd_analyze(values); + if (values[left[0]] && values[left[0]]->always_false()) { + // block1 never executed + block1->fwd_analyze(values); + break; + } + while (true) { + VarDescrList next_values = values | block0->fwd_analyze(values0 | block1->fwd_analyze(values)); + if (same_values(next_values, values)) { + break; + } + values = std::move(next_values); + } + break; + } + case _Until: + case _Again: { + while (true) { + VarDescrList next_values = values | block0->fwd_analyze(values); + if (same_values(next_values, values)) { + break; + } + values = std::move(next_values); + } + values = block0->fwd_analyze(values); + break; + } + case _TryCatch: { + VarDescrList val1 = block0->fwd_analyze(values); + VarDescrList val2 = block1->fwd_analyze(std::move(values)); + values = val1 | val2; + break; + } + default: + std::cerr << "fatal: unknown operation \n"; + throw ParseError{where, "unknown operation in fwd_analyze()"}; + } + if (next) { + return next->fwd_analyze(std::move(values)); + } else { + return values; + } +} + +bool Op::set_noreturn(bool nr) { + if (nr) { + flags |= _NoReturn; + } else { + flags &= ~_NoReturn; + } + return nr; +} + +bool Op::mark_noreturn() { + switch (cl) { + case _Nop: + if (!next) { + return set_noreturn(false); + } + // fallthrough + case _Import: + case _IntConst: + case _SliceConst: + case _Let: + case _Tuple: + case _UnTuple: + case _SetGlob: + case _GlobVar: + case _CallInd: + case _Call: + return set_noreturn(next->mark_noreturn()); + case _Return: + return set_noreturn(true); + case _If: + case _TryCatch: + return set_noreturn((static_cast(block0->mark_noreturn()) & static_cast(block1 && block1->mark_noreturn())) | static_cast(next->mark_noreturn())); + case _Again: + block0->mark_noreturn(); + return set_noreturn(true); + case _Until: + return set_noreturn(static_cast(block0->mark_noreturn()) | static_cast(next->mark_noreturn())); + case _While: + block1->mark_noreturn(); + return set_noreturn(static_cast(block0->mark_noreturn()) | static_cast(next->mark_noreturn())); + case _Repeat: + block0->mark_noreturn(); + return set_noreturn(next->mark_noreturn()); + default: + std::cerr << "fatal: unknown operation \n"; + throw ParseError{where, "unknown operation in mark_noreturn()"}; + } +} + +void CodeBlob::mark_noreturn() { + ops->mark_noreturn(); +} + +} // namespace tolk diff --git a/tolk/asmops.cpp b/tolk/asmops.cpp new file mode 100644 index 000000000..cbe268f26 --- /dev/null +++ b/tolk/asmops.cpp @@ -0,0 +1,372 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "tolk.h" +#include + +namespace tolk { + +/* + * + * ASM-OP LIST FUNCTIONS + * + */ + +int is_pos_pow2(td::RefInt256 x) { + if (sgn(x) > 0 && !sgn(x & (x - 1))) { + return x->bit_size(false) - 1; + } else { + return -1; + } +} + +int is_neg_pow2(td::RefInt256 x) { + return sgn(x) < 0 ? is_pos_pow2(-x) : 0; +} + +std::ostream& operator<<(std::ostream& os, AsmOp::SReg stack_reg) { + int i = stack_reg.idx; + if (i >= 0) { + if (i < 16) { + return os << 's' << i; + } else { + return os << i << " s()"; + } + } else if (i >= -2) { + return os << "s(" << i << ')'; + } else { + return os << i << " s()"; + } +} + +AsmOp AsmOp::Const(int arg, std::string push_op, td::RefInt256 origin) { + std::ostringstream os; + os << arg << ' ' << push_op; + return AsmOp::Const(os.str(), origin); +} + +AsmOp AsmOp::make_stk2(int a, int b, const char* str, int delta) { + std::ostringstream os; + os << SReg(a) << ' ' << SReg(b) << ' ' << str; + int c = std::max(a, b) + 1; + return AsmOp::Custom(os.str(), c, c + delta); +} + +AsmOp AsmOp::make_stk3(int a, int b, int c, const char* str, int delta) { + std::ostringstream os; + os << SReg(a) << ' ' << SReg(b) << ' ' << SReg(c) << ' ' << str; + int m = std::max(a, std::max(b, c)) + 1; + return AsmOp::Custom(os.str(), m, m + delta); +} + +AsmOp AsmOp::BlkSwap(int a, int b) { + std::ostringstream os; + if (a == 1 && b == 1) { + return AsmOp::Xchg(0, 1); + } else if (a == 1) { + if (b == 2) { + os << "ROT"; + } else { + os << b << " ROLL"; + } + } else if (b == 1) { + if (a == 2) { + os << "-ROT"; + } else { + os << a << " -ROLL"; + } + } else { + os << a << " " << b << " BLKSWAP"; + } + return AsmOp::Custom(os.str(), a + b, a + b); +} + +AsmOp AsmOp::BlkPush(int a, int b) { + std::ostringstream os; + if (a == 1) { + return AsmOp::Push(b); + } else if (a == 2 && b == 1) { + os << "2DUP"; + } else { + os << a << " " << b << " BLKPUSH"; + } + return AsmOp::Custom(os.str(), b + 1, a + b + 1); +} + +AsmOp AsmOp::BlkDrop(int a) { + std::ostringstream os; + if (a == 1) { + return AsmOp::Pop(); + } else if (a == 2) { + os << "2DROP"; + } else { + os << a << " BLKDROP"; + } + return AsmOp::Custom(os.str(), a, 0); +} + +AsmOp AsmOp::BlkDrop2(int a, int b) { + if (!b) { + return BlkDrop(a); + } + std::ostringstream os; + os << a << " " << b << " BLKDROP2"; + return AsmOp::Custom(os.str(), a + b, b); +} + +AsmOp AsmOp::BlkReverse(int a, int b) { + std::ostringstream os; + os << a << " " << b << " REVERSE"; + return AsmOp::Custom(os.str(), a + b, a + b); +} + +AsmOp AsmOp::Tuple(int a) { + switch (a) { + case 1: + return AsmOp::Custom("SINGLE", 1, 1); + case 2: + return AsmOp::Custom("PAIR", 2, 1); + case 3: + return AsmOp::Custom("TRIPLE", 3, 1); + } + std::ostringstream os; + os << a << " TUPLE"; + return AsmOp::Custom(os.str(), a, 1); +} + +AsmOp AsmOp::UnTuple(int a) { + switch (a) { + case 1: + return AsmOp::Custom("UNSINGLE", 1, 1); + case 2: + return AsmOp::Custom("UNPAIR", 1, 2); + case 3: + return AsmOp::Custom("UNTRIPLE", 1, 3); + } + std::ostringstream os; + os << a << " UNTUPLE"; + return AsmOp::Custom(os.str(), 1, a); +} + +AsmOp AsmOp::IntConst(td::RefInt256 x) { + if (x->signed_fits_bits(8)) { + return AsmOp::Const(dec_string(x) + " PUSHINT", x); + } + if (!x->is_valid()) { + return AsmOp::Const("PUSHNAN", x); + } + int k = is_pos_pow2(x); + if (k >= 0) { + return AsmOp::Const(k, "PUSHPOW2", x); + } + k = is_pos_pow2(x + 1); + if (k >= 0) { + return AsmOp::Const(k, "PUSHPOW2DEC", x); + } + k = is_pos_pow2(-x); + if (k >= 0) { + return AsmOp::Const(k, "PUSHNEGPOW2", x); + } + if (!x->mod_pow2_short(23)) { + return AsmOp::Const(dec_string(x) + " PUSHINTX", x); + } + return AsmOp::Const(dec_string(x) + " PUSHINT", x); +} + +AsmOp AsmOp::BoolConst(bool f) { + return AsmOp::Const(f ? "TRUE" : "FALSE"); +} + +AsmOp AsmOp::Parse(std::string custom_op) { + if (custom_op == "NOP") { + return AsmOp::Nop(); + } else if (custom_op == "SWAP") { + return AsmOp::Xchg(1); + } else if (custom_op == "DROP") { + return AsmOp::Pop(0); + } else if (custom_op == "NIP") { + return AsmOp::Pop(1); + } else if (custom_op == "DUP") { + return AsmOp::Push(0); + } else if (custom_op == "OVER") { + return AsmOp::Push(1); + } else { + return AsmOp::Custom(custom_op); + } +} + +AsmOp AsmOp::Parse(std::string custom_op, int args, int retv) { + auto res = Parse(custom_op); + if (res.is_custom()) { + res.a = args; + res.b = retv; + } + return res; +} + +void AsmOp::out(std::ostream& os) const { + if (!op.empty()) { + os << op; + return; + } + switch (t) { + case a_none: + break; + case a_xchg: + if (!a && !(b & -2)) { + os << (b ? "SWAP" : "NOP"); + break; + } + os << SReg(a) << ' ' << SReg(b) << " XCHG"; + break; + case a_push: + if (!(a & -2)) { + os << (a ? "OVER" : "DUP"); + break; + } + os << SReg(a) << " PUSH"; + break; + case a_pop: + if (!(a & -2)) { + os << (a ? "NIP" : "DROP"); + break; + } + os << SReg(a) << " POP"; + break; + default: + throw Fatal{"unknown assembler operation"}; + } +} + +void AsmOp::out_indent_nl(std::ostream& os, bool no_eol) const { + for (int i = 0; i < indent; i++) { + os << " "; + } + out(os); + if (!no_eol) { + os << std::endl; + } +} + +std::string AsmOp::to_string() const { + if (!op.empty()) { + return op; + } else { + std::ostringstream os; + out(os); + return os.str(); + } +} + +bool AsmOpList::append(const std::vector& ops) { + for (const auto& op : ops) { + if (!append(op)) { + return false; + } + } + return true; +} + +const_idx_t AsmOpList::register_const(Const new_const) { + if (new_const.is_null()) { + return not_const; + } + unsigned idx; + for (idx = 0; idx < constants_.size(); idx++) { + if (!td::cmp(new_const, constants_[idx])) { + return idx; + } + } + constants_.push_back(std::move(new_const)); + return (const_idx_t)idx; +} + +Const AsmOpList::get_const(const_idx_t idx) { + if ((unsigned)idx < constants_.size()) { + return constants_[idx]; + } else { + return {}; + } +} + +void AsmOpList::show_var(std::ostream& os, var_idx_t idx) const { + if (!var_names_ || (unsigned)idx >= var_names_->size()) { + os << '_' << idx; + } else { + var_names_->at(idx).show(os, 2); + } +} + +void AsmOpList::show_var_ext(std::ostream& os, std::pair idx_pair) const { + auto i = idx_pair.first; + auto j = idx_pair.second; + if (!var_names_ || (unsigned)i >= var_names_->size()) { + os << '_' << i; + } else { + var_names_->at(i).show(os, 2); + } + if ((unsigned)j < constants_.size() && constants_[j].not_null()) { + os << '=' << constants_[j]; + } +} + +void AsmOpList::out(std::ostream& os, int mode) const { + if (!(mode & 2)) { + for (const auto& op : list_) { + op.out_indent_nl(os); + } + } else { + std::size_t n = list_.size(); + for (std::size_t i = 0; i < n; i++) { + const auto& op = list_[i]; + if (!op.is_comment() && i + 1 < n && list_[i + 1].is_comment()) { + op.out_indent_nl(os, true); + os << '\t'; + do { + i++; + } while (i + 1 < n && list_[i + 1].is_comment()); + list_[i].out(os); + os << std::endl; + } else { + op.out_indent_nl(os, false); + } + } + } +} + +bool apply_op(StackTransform& trans, const AsmOp& op) { + if (!trans.is_valid()) { + return false; + } + switch (op.t) { + case AsmOp::a_none: + return true; + case AsmOp::a_xchg: + return trans.apply_xchg(op.a, op.b, true); + case AsmOp::a_push: + return trans.apply_push(op.a); + case AsmOp::a_pop: + return trans.apply_pop(op.a); + case AsmOp::a_const: + return !op.a && op.b == 1 && trans.apply_push_newconst(); + case AsmOp::a_custom: + return op.is_gconst() && trans.apply_push_newconst(); + default: + return false; + } +} + +} // namespace tolk diff --git a/tolk/builtins.cpp b/tolk/builtins.cpp new file mode 100644 index 000000000..16ebd2596 --- /dev/null +++ b/tolk/builtins.cpp @@ -0,0 +1,1231 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "tolk.h" + +namespace tolk { +using namespace std::literals::string_literals; + +/* + * + * SYMBOL VALUES + * + */ + +int glob_func_cnt, undef_func_cnt, glob_var_cnt, const_cnt; +std::vector glob_func, glob_vars; +std::set prohibited_var_names; + +SymDef* predefine_builtin_func(std::string name, TypeExpr* func_type) { + if (name.back() == '_') { + prohibited_var_names.insert(name); + } + sym_idx_t name_idx = symbols.lookup(name, 1); + if (symbols.is_keyword(name_idx)) { + std::cerr << "fatal: global function `" << name << "` already defined as a keyword" << std::endl; + } + SymDef* def = define_global_symbol(name_idx, true); + if (!def) { + std::cerr << "fatal: global function `" << name << "` already defined" << std::endl; + std::exit(1); + } + return def; +} + +template +SymDef* define_builtin_func(std::string name, TypeExpr* func_type, const T& func, bool impure = false) { + SymDef* def = predefine_builtin_func(name, func_type); + def->value = new SymValAsmFunc{func_type, func, impure}; + return def; +} + +template +SymDef* define_builtin_func(std::string name, TypeExpr* func_type, const T& func, std::initializer_list arg_order, + std::initializer_list ret_order = {}, bool impure = false) { + SymDef* def = predefine_builtin_func(name, func_type); + def->value = new SymValAsmFunc{func_type, func, arg_order, ret_order, impure}; + return def; +} + +SymDef* define_builtin_func(std::string name, TypeExpr* func_type, const AsmOp& macro, + std::initializer_list arg_order, std::initializer_list ret_order = {}, + bool impure = false) { + SymDef* def = predefine_builtin_func(name, func_type); + def->value = new SymValAsmFunc{func_type, make_simple_compile(macro), arg_order, ret_order, impure}; + return def; +} + +SymDef* force_autoapply(SymDef* def) { + if (def) { + auto val = dynamic_cast(def->value); + if (val) { + val->auto_apply = true; + } + } + return def; +} + +template +SymDef* define_builtin_const(std::string name, TypeExpr* const_type, Args&&... args) { + return force_autoapply( + define_builtin_func(name, TypeExpr::new_map(TypeExpr::new_unit(), const_type), std::forward(args)...)); +} + +bool SymValAsmFunc::compile(AsmOpList& dest, std::vector& out, std::vector& in, + const SrcLocation& where) const { + if (simple_compile) { + return dest.append(simple_compile(out, in, where)); + } else if (ext_compile) { + return ext_compile(dest, out, in); + } else { + return false; + } +} + +/* + * + * DEFINE BUILT-IN FUNCTIONS + * + */ + +int emulate_negate(int a) { + int f = VarDescr::_Pos | VarDescr::_Neg; + if ((a & f) && (~a & f)) { + a ^= f; + } + f = VarDescr::_Bit | VarDescr::_Bool; + if ((a & f) && (~a & f)) { + a ^= f; + } + return a; +} + +int emulate_add(int a, int b) { + if (b & VarDescr::_Zero) { + return a; + } else if (a & VarDescr::_Zero) { + return b; + } + int u = a & b, v = a | b; + int r = VarDescr::_Int; + int t = u & (VarDescr::_Pos | VarDescr::_Neg); + if (v & VarDescr::_Nan) { + return r | VarDescr::_Nan; + } + // non-quiet addition always returns finite results! + r |= t | VarDescr::_Finite; + if (t) { + r |= v & VarDescr::_NonZero; + } + r |= v & VarDescr::_Nan; + if (u & (VarDescr::_Odd | VarDescr::_Even)) { + r |= VarDescr::_Even; + } else if (!(~v & (VarDescr::_Odd | VarDescr::_Even))) { + r |= VarDescr::_Odd | VarDescr::_NonZero; + } + return r; +} + +int emulate_sub(int a, int b) { + return emulate_add(a, emulate_negate(b)); +} + +int emulate_mul(int a, int b) { + if ((b & (VarDescr::_NonZero | VarDescr::_Bit)) == (VarDescr::_NonZero | VarDescr::_Bit)) { + return a; + } else if ((a & (VarDescr::_NonZero | VarDescr::_Bit)) == (VarDescr::_NonZero | VarDescr::_Bit)) { + return b; + } + int u = a & b, v = a | b; + int r = VarDescr::_Int; + if (v & VarDescr::_Nan) { + return r | VarDescr::_Nan; + } + // non-quiet multiplication always yields finite results, if any + r |= VarDescr::_Finite; + if (v & VarDescr::_Zero) { + // non-quiet multiplication + // the result is zero, if any result at all + return VarDescr::ConstZero; + } + if (u & (VarDescr::_Pos | VarDescr::_Neg)) { + r |= VarDescr::_Pos; + } else if (!(~v & (VarDescr::_Pos | VarDescr::_Neg))) { + r |= VarDescr::_Neg; + } + if (u & (VarDescr::_Bit | VarDescr::_Bool)) { + r |= VarDescr::_Bit; + } else if (!(~v & (VarDescr::_Bit | VarDescr::_Bool))) { + r |= VarDescr::_Bool; + } + r |= v & VarDescr::_Even; + r |= u & (VarDescr::_Odd | VarDescr::_NonZero); + return r; +} + +int emulate_and(int a, int b) { + int both = a & b, any = a | b; + int r = VarDescr::_Int; + if (any & VarDescr::_Nan) { + return r | VarDescr::_Nan; + } + r |= VarDescr::_Finite; + if (any & VarDescr::_Zero) { + return VarDescr::ConstZero; + } + r |= both & (VarDescr::_Even | VarDescr::_Odd); + r |= both & (VarDescr::_Bit | VarDescr::_Bool); + if (both & VarDescr::_Odd) { + r |= VarDescr::_NonZero; + } + return r; +} + +int emulate_or(int a, int b) { + if (b & VarDescr::_Zero) { + return a; + } else if (a & VarDescr::_Zero) { + return b; + } + int both = a & b, any = a | b; + int r = VarDescr::_Int; + if (any & VarDescr::_Nan) { + return r | VarDescr::_Nan; + } + r |= VarDescr::_Finite; + r |= any & VarDescr::_NonZero; + r |= any & VarDescr::_Odd; + r |= both & VarDescr::_Even; + return r; +} + +int emulate_xor(int a, int b) { + if (b & VarDescr::_Zero) { + return a; + } else if (a & VarDescr::_Zero) { + return b; + } + int both = a & b, any = a | b; + int r = VarDescr::_Int; + if (any & VarDescr::_Nan) { + return r | VarDescr::_Nan; + } + r |= VarDescr::_Finite; + r |= both & VarDescr::_Even; + if (both & VarDescr::_Odd) { + r |= VarDescr::_Even; + } + return r; +} + +int emulate_not(int a) { + if ((a & VarDescr::ConstZero) == VarDescr::ConstZero) { + return VarDescr::ConstTrue; + } + if ((a & VarDescr::ConstTrue) == VarDescr::ConstTrue) { + return VarDescr::ConstZero; + } + int a2 = a; + int f = VarDescr::_Even | VarDescr::_Odd; + if ((a2 & f) && (~a2 & f)) { + a2 ^= f; + } + a2 &= ~(VarDescr::_Zero | VarDescr::_NonZero | VarDescr::_Bit | VarDescr::_Pos | VarDescr::_Neg); + if ((a & VarDescr::_Neg) && (a & VarDescr::_NonZero)) { + a2 |= VarDescr::_Pos; + } + if (a & VarDescr::_Pos) { + a2 |= VarDescr::_Neg; + } + return a2; +} + +int emulate_lshift(int a, int b) { + if (((a | b) & VarDescr::_Nan) || !(~b & (VarDescr::_Neg | VarDescr::_NonZero))) { + return VarDescr::_Int | VarDescr::_Nan; + } + if (b & VarDescr::_Zero) { + return a; + } + int t = ((b & VarDescr::_NonZero) ? VarDescr::_Even : 0); + t |= b & VarDescr::_Finite; + return emulate_mul(a, VarDescr::_Int | VarDescr::_Pos | VarDescr::_NonZero | VarDescr::_Even | t); +} + +int emulate_div(int a, int b) { + if ((b & (VarDescr::_NonZero | VarDescr::_Bit)) == (VarDescr::_NonZero | VarDescr::_Bit)) { + return a; + } else if ((b & (VarDescr::_NonZero | VarDescr::_Bool)) == (VarDescr::_NonZero | VarDescr::_Bool)) { + return emulate_negate(a); + } + if (b & VarDescr::_Zero) { + return VarDescr::_Int | VarDescr::_Nan; + } + int u = a & b, v = a | b; + int r = VarDescr::_Int; + if (v & VarDescr::_Nan) { + return r | VarDescr::_Nan; + } + // non-quiet division always yields finite results, if any + r |= VarDescr::_Finite; + if (a & VarDescr::_Zero) { + // non-quiet division + // the result is zero, if any result at all + return VarDescr::ConstZero; + } + if (u & (VarDescr::_Pos | VarDescr::_Neg)) { + r |= VarDescr::_Pos; + } else if (!(~v & (VarDescr::_Pos | VarDescr::_Neg))) { + r |= VarDescr::_Neg; + } + if (u & (VarDescr::_Bit | VarDescr::_Bool)) { + r |= VarDescr::_Bit; + } else if (!(~v & (VarDescr::_Bit | VarDescr::_Bool))) { + r |= VarDescr::_Bool; + } + return r; +} + +int emulate_rshift(int a, int b) { + if (((a | b) & VarDescr::_Nan) || !(~b & (VarDescr::_Neg | VarDescr::_NonZero))) { + return VarDescr::_Int | VarDescr::_Nan; + } + if (b & VarDescr::_Zero) { + return a; + } + int t = ((b & VarDescr::_NonZero) ? VarDescr::_Even : 0); + t |= b & VarDescr::_Finite; + return emulate_div(a, VarDescr::_Int | VarDescr::_Pos | VarDescr::_NonZero | VarDescr::_Even | t); +} + +int emulate_mod(int a, int b, int round_mode = -1) { + if ((b & (VarDescr::_NonZero | VarDescr::_Bit)) == (VarDescr::_NonZero | VarDescr::_Bit)) { + return VarDescr::ConstZero; + } else if ((b & (VarDescr::_NonZero | VarDescr::_Bool)) == (VarDescr::_NonZero | VarDescr::_Bool)) { + return VarDescr::ConstZero; + } + if (b & VarDescr::_Zero) { + return VarDescr::_Int | VarDescr::_Nan; + } + int r = VarDescr::_Int; + if ((a | b) & VarDescr::_Nan) { + return r | VarDescr::_Nan; + } + // non-quiet division always yields finite results, if any + r |= VarDescr::_Finite; + if (a & VarDescr::_Zero) { + // non-quiet division + // the result is zero, if any result at all + return VarDescr::ConstZero; + } + if (round_mode < 0) { + r |= b & (VarDescr::_Pos | VarDescr::_Neg); + } else if (round_mode > 0) { + r |= emulate_negate(b) & (VarDescr::_Pos | VarDescr::_Neg); + } + if (a & (VarDescr::_Bit | VarDescr::_Bool)) { + if (r & VarDescr::_Pos) { + r |= VarDescr::_Bit; + } + if (r & VarDescr::_Neg) { + r |= VarDescr::_Bool; + } + } + if (b & VarDescr::_Even) { + r |= a & (VarDescr::_Even | VarDescr::_Odd); + } + return r; +} + +bool VarDescr::always_less(const VarDescr& other) const { + if (is_int_const() && other.is_int_const()) { + return int_const < other.int_const; + } + return (always_nonpos() && other.always_pos()) || (always_neg() && other.always_nonneg()); +} + +bool VarDescr::always_leq(const VarDescr& other) const { + if (is_int_const() && other.is_int_const()) { + return int_const <= other.int_const; + } + return always_nonpos() && other.always_nonneg(); +} + +bool VarDescr::always_greater(const VarDescr& other) const { + return other.always_less(*this); +} + +bool VarDescr::always_geq(const VarDescr& other) const { + return other.always_leq(*this); +} + +bool VarDescr::always_equal(const VarDescr& other) const { + return is_int_const() && other.is_int_const() && *int_const == *other.int_const; +} + +bool VarDescr::always_neq(const VarDescr& other) const { + if (is_int_const() && other.is_int_const()) { + return *int_const != *other.int_const; + } + return always_greater(other) || always_less(other) || (always_even() && other.always_odd()) || + (always_odd() && other.always_even()); +} + +AsmOp exec_op(std::string op) { + return AsmOp::Custom(op); +} + +AsmOp exec_op(std::string op, int args, int retv = 1) { + return AsmOp::Custom(op, args, retv); +} + +AsmOp exec_arg_op(std::string op, long long arg) { + std::ostringstream os; + os << arg << ' ' << op; + return AsmOp::Custom(os.str()); +} + +AsmOp exec_arg_op(std::string op, long long arg, int args, int retv) { + std::ostringstream os; + os << arg << ' ' << op; + return AsmOp::Custom(os.str(), args, retv); +} + +AsmOp exec_arg_op(std::string op, td::RefInt256 arg) { + std::ostringstream os; + os << arg << ' ' << op; + return AsmOp::Custom(os.str()); +} + +AsmOp exec_arg_op(std::string op, td::RefInt256 arg, int args, int retv) { + std::ostringstream os; + os << arg << ' ' << op; + return AsmOp::Custom(os.str(), args, retv); +} + +AsmOp exec_arg2_op(std::string op, long long imm1, long long imm2, int args, int retv) { + std::ostringstream os; + os << imm1 << ' ' << imm2 << ' ' << op; + return AsmOp::Custom(os.str(), args, retv); +} + +AsmOp push_const(td::RefInt256 x) { + return AsmOp::IntConst(std::move(x)); +} + +AsmOp compile_add(std::vector& res, std::vector& args, const SrcLocation& where) { + tolk_assert(res.size() == 1 && args.size() == 2); + VarDescr &r = res[0], &x = args[0], &y = args[1]; + if (x.is_int_const() && y.is_int_const()) { + r.set_const(x.int_const + y.int_const); + if (!r.int_const->is_valid()) { + throw ParseError(where, "integer overflow"); + } + x.unused(); + y.unused(); + return push_const(r.int_const); + } + r.val = emulate_add(x.val, y.val); + if (y.is_int_const() && y.int_const->signed_fits_bits(8)) { + y.unused(); + if (y.always_zero()) { + return AsmOp::Nop(); + } + if (*y.int_const == 1) { + return exec_op("INC", 1); + } + if (*y.int_const == -1) { + return exec_op("DEC", 1); + } + return exec_arg_op("ADDCONST", y.int_const, 1); + } + if (x.is_int_const() && x.int_const->signed_fits_bits(8)) { + x.unused(); + if (x.always_zero()) { + return AsmOp::Nop(); + } + if (*x.int_const == 1) { + return exec_op("INC", 1); + } + if (*x.int_const == -1) { + return exec_op("DEC", 1); + } + return exec_arg_op("ADDCONST", x.int_const, 1); + } + return exec_op("ADD", 2); +} + +AsmOp compile_sub(std::vector& res, std::vector& args, const SrcLocation& where) { + tolk_assert(res.size() == 1 && args.size() == 2); + VarDescr &r = res[0], &x = args[0], &y = args[1]; + if (x.is_int_const() && y.is_int_const()) { + r.set_const(x.int_const - y.int_const); + if (!r.int_const->is_valid()) { + throw ParseError(where, "integer overflow"); + } + x.unused(); + y.unused(); + return push_const(r.int_const); + } + r.val = emulate_sub(x.val, y.val); + if (y.is_int_const() && (-y.int_const)->signed_fits_bits(8)) { + y.unused(); + if (y.always_zero()) { + return {}; + } + if (*y.int_const == 1) { + return exec_op("DEC", 1); + } + if (*y.int_const == -1) { + return exec_op("INC", 1); + } + return exec_arg_op("ADDCONST", -y.int_const, 1); + } + if (x.always_zero()) { + x.unused(); + return exec_op("NEGATE", 1); + } + return exec_op("SUB", 2); +} + +AsmOp compile_negate(std::vector& res, std::vector& args, const SrcLocation& where) { + tolk_assert(res.size() == 1 && args.size() == 1); + VarDescr &r = res[0], &x = args[0]; + if (x.is_int_const()) { + r.set_const(-x.int_const); + if (!r.int_const->is_valid()) { + throw ParseError(where, "integer overflow"); + } + x.unused(); + return push_const(r.int_const); + } + r.val = emulate_negate(x.val); + return exec_op("NEGATE", 1); +} + +AsmOp compile_and(std::vector& res, std::vector& args, const SrcLocation& where) { + tolk_assert(res.size() == 1 && args.size() == 2); + VarDescr &r = res[0], &x = args[0], &y = args[1]; + if (x.is_int_const() && y.is_int_const()) { + r.set_const(x.int_const & y.int_const); + x.unused(); + y.unused(); + return push_const(r.int_const); + } + r.val = emulate_and(x.val, y.val); + return exec_op("AND", 2); +} + +AsmOp compile_or(std::vector& res, std::vector& args, const SrcLocation& where) { + tolk_assert(res.size() == 1 && args.size() == 2); + VarDescr &r = res[0], &x = args[0], &y = args[1]; + if (x.is_int_const() && y.is_int_const()) { + r.set_const(x.int_const | y.int_const); + x.unused(); + y.unused(); + return push_const(r.int_const); + } + r.val = emulate_or(x.val, y.val); + return exec_op("OR", 2); +} + +AsmOp compile_xor(std::vector& res, std::vector& args, const SrcLocation& where) { + tolk_assert(res.size() == 1 && args.size() == 2); + VarDescr &r = res[0], &x = args[0], &y = args[1]; + if (x.is_int_const() && y.is_int_const()) { + r.set_const(x.int_const ^ y.int_const); + x.unused(); + y.unused(); + return push_const(r.int_const); + } + r.val = emulate_xor(x.val, y.val); + return exec_op("XOR", 2); +} + +AsmOp compile_not(std::vector& res, std::vector& args, const SrcLocation& where) { + tolk_assert(res.size() == 1 && args.size() == 1); + VarDescr &r = res[0], &x = args[0]; + if (x.is_int_const()) { + r.set_const(~x.int_const); + x.unused(); + return push_const(r.int_const); + } + r.val = emulate_not(x.val); + return exec_op("NOT", 1); +} + +AsmOp compile_mul_internal(VarDescr& r, VarDescr& x, VarDescr& y, const SrcLocation& where) { + if (x.is_int_const() && y.is_int_const()) { + r.set_const(x.int_const * y.int_const); + if (!r.int_const->is_valid()) { + throw ParseError(where, "integer overflow"); + } + x.unused(); + y.unused(); + return push_const(r.int_const); + } + r.val = emulate_mul(x.val, y.val); + if (y.is_int_const()) { + int k = is_pos_pow2(y.int_const); + if (y.int_const->signed_fits_bits(8) && k < 0) { + y.unused(); + if (y.always_zero() && x.always_finite()) { + // dubious optimization: NaN * 0 = ? + r.set_const(y.int_const); + x.unused(); + return push_const(r.int_const); + } + if (*y.int_const == 1 && x.always_finite()) { + return AsmOp::Nop(); + } + if (*y.int_const == -1) { + return exec_op("NEGATE", 1); + } + return exec_arg_op("MULCONST", y.int_const, 1); + } + if (k > 0) { + y.unused(); + return exec_arg_op("LSHIFT#", k, 1); + } + if (k == 0) { + y.unused(); + return AsmOp::Nop(); + } + } + if (x.is_int_const()) { + int k = is_pos_pow2(x.int_const); + if (x.int_const->signed_fits_bits(8) && k < 0) { + x.unused(); + if (x.always_zero() && y.always_finite()) { + // dubious optimization: NaN * 0 = ? + r.set_const(x.int_const); + y.unused(); + return push_const(r.int_const); + } + if (*x.int_const == 1 && y.always_finite()) { + return AsmOp::Nop(); + } + if (*x.int_const == -1) { + return exec_op("NEGATE", 1); + } + return exec_arg_op("MULCONST", x.int_const, 1); + } + if (k > 0) { + x.unused(); + return exec_arg_op("LSHIFT#", k, 1); + } + if (k == 0) { + x.unused(); + return AsmOp::Nop(); + } + } + return exec_op("MUL", 2); +} + +AsmOp compile_mul(std::vector& res, std::vector& args, const SrcLocation& where) { + tolk_assert(res.size() == 1 && args.size() == 2); + return compile_mul_internal(res[0], args[0], args[1], where); +} + +AsmOp compile_lshift(std::vector& res, std::vector& args, const SrcLocation& where) { + tolk_assert(res.size() == 1 && args.size() == 2); + VarDescr &r = res[0], &x = args[0], &y = args[1]; + if (y.is_int_const()) { + auto yv = y.int_const->to_long(); + if (yv < 0 || yv > 256) { + throw ParseError(where, "lshift argument is out of range"); + } else if (x.is_int_const()) { + r.set_const(x.int_const << (int)yv); + if (!r.int_const->is_valid()) { + throw ParseError(where, "integer overflow"); + } + x.unused(); + y.unused(); + return push_const(r.int_const); + } + } + r.val = emulate_lshift(x.val, y.val); + if (y.is_int_const()) { + int k = (int)(y.int_const->to_long()); + if (!k /* && x.always_finite() */) { + // dubious optimization: what if x=NaN ? + y.unused(); + return AsmOp::Nop(); + } + y.unused(); + return exec_arg_op("LSHIFT#", k, 1); + } + if (x.is_int_const()) { + auto xv = x.int_const->to_long(); + if (xv == 1) { + x.unused(); + return exec_op("POW2", 1); + } + if (xv == -1) { + x.unused(); + return exec_op("-1 PUSHINT SWAP LSHIFT", 1); + } + } + return exec_op("LSHIFT", 2); +} + +AsmOp compile_rshift(std::vector& res, std::vector& args, const SrcLocation& where, + int round_mode) { + tolk_assert(res.size() == 1 && args.size() == 2); + VarDescr &r = res[0], &x = args[0], &y = args[1]; + if (y.is_int_const()) { + auto yv = y.int_const->to_long(); + if (yv < 0 || yv > 256) { + throw ParseError(where, "rshift argument is out of range"); + } else if (x.is_int_const()) { + r.set_const(td::rshift(x.int_const, (int)yv, round_mode)); + x.unused(); + y.unused(); + return push_const(r.int_const); + } + } + r.val = emulate_rshift(x.val, y.val); + std::string rshift = (round_mode < 0 ? "RSHIFT" : (round_mode ? "RSHIFTC" : "RSHIFTR")); + if (y.is_int_const()) { + int k = (int)(y.int_const->to_long()); + if (!k /* && x.always_finite() */) { + // dubious optimization: what if x=NaN ? + y.unused(); + return AsmOp::Nop(); + } + y.unused(); + return exec_arg_op(rshift + "#", k, 1); + } + return exec_op(rshift, 2); +} + +AsmOp compile_div_internal(VarDescr& r, VarDescr& x, VarDescr& y, const SrcLocation& where, int round_mode) { + if (x.is_int_const() && y.is_int_const()) { + r.set_const(div(x.int_const, y.int_const, round_mode)); + if (!r.int_const->is_valid()) { + throw ParseError(where, *y.int_const == 0 ? "division by zero" : "integer overflow"); + } + x.unused(); + y.unused(); + return push_const(r.int_const); + } + r.val = emulate_div(x.val, y.val); + if (y.is_int_const()) { + if (*y.int_const == 0) { + throw ParseError(where, "division by zero"); + } + if (*y.int_const == 1 && x.always_finite()) { + y.unused(); + return AsmOp::Nop(); + } + if (*y.int_const == -1) { + y.unused(); + return exec_op("NEGATE", 1); + } + int k = is_pos_pow2(y.int_const); + if (k > 0) { + y.unused(); + std::string op = "RSHIFT"; + if (round_mode >= 0) { + op += (round_mode > 0 ? 'C' : 'R'); + } + return exec_arg_op(op + '#', k, 1); + } + } + std::string op = "DIV"; + if (round_mode >= 0) { + op += (round_mode > 0 ? 'C' : 'R'); + } + return exec_op(op, 2); +} + +AsmOp compile_div(std::vector& res, std::vector& args, const SrcLocation& where, int round_mode) { + tolk_assert(res.size() == 1 && args.size() == 2); + return compile_div_internal(res[0], args[0], args[1], where, round_mode); +} + +AsmOp compile_mod(std::vector& res, std::vector& args, const SrcLocation& where, + int round_mode) { + tolk_assert(res.size() == 1 && args.size() == 2); + VarDescr &r = res[0], &x = args[0], &y = args[1]; + if (x.is_int_const() && y.is_int_const()) { + r.set_const(mod(x.int_const, y.int_const, round_mode)); + if (!r.int_const->is_valid()) { + throw ParseError(where, *y.int_const == 0 ? "division by zero" : "integer overflow"); + } + x.unused(); + y.unused(); + return push_const(r.int_const); + } + r.val = emulate_mod(x.val, y.val); + if (y.is_int_const()) { + if (*y.int_const == 0) { + throw ParseError(where, "division by zero"); + } + if ((*y.int_const == 1 || *y.int_const == -1) && x.always_finite()) { + x.unused(); + y.unused(); + r.set_const(td::zero_refint()); + return push_const(r.int_const); + } + int k = is_pos_pow2(y.int_const); + if (k > 0) { + y.unused(); + std::string op = "MODPOW2"; + if (round_mode >= 0) { + op += (round_mode > 0 ? 'C' : 'R'); + } + return exec_arg_op(op + '#', k, 1); + } + } + std::string op = "MOD"; + if (round_mode >= 0) { + op += (round_mode > 0 ? 'C' : 'R'); + } + return exec_op(op, 2); +} + +AsmOp compile_muldiv(std::vector& res, std::vector& args, const SrcLocation& where, + int round_mode) { + tolk_assert(res.size() == 1 && args.size() == 3); + VarDescr &r = res[0], &x = args[0], &y = args[1], &z = args[2]; + if (x.is_int_const() && y.is_int_const() && z.is_int_const()) { + r.set_const(muldiv(x.int_const, y.int_const, z.int_const, round_mode)); + if (!r.int_const->is_valid()) { + throw ParseError(where, *z.int_const == 0 ? "division by zero" : "integer overflow"); + } + x.unused(); + y.unused(); + z.unused(); + return push_const(r.int_const); + } + if (x.always_zero() || y.always_zero()) { + // dubious optimization for z=0... + x.unused(); + y.unused(); + z.unused(); + r.set_const(td::make_refint(0)); + return push_const(r.int_const); + } + char c = (round_mode < 0) ? 0 : (round_mode > 0 ? 'C' : 'R'); + r.val = emulate_div(emulate_mul(x.val, y.val), z.val); + if (z.is_int_const()) { + if (*z.int_const == 0) { + throw ParseError(where, "division by zero"); + } + if (*z.int_const == 1) { + z.unused(); + return compile_mul_internal(r, x, y, where); + } + } + if (y.is_int_const() && *y.int_const == 1) { + y.unused(); + return compile_div_internal(r, x, z, where, round_mode); + } + if (x.is_int_const() && *x.int_const == 1) { + x.unused(); + return compile_div_internal(r, y, z, where, round_mode); + } + if (z.is_int_const()) { + int k = is_pos_pow2(z.int_const); + if (k > 0) { + z.unused(); + std::string op = "MULRSHIFT"; + if (c) { + op += c; + } + return exec_arg_op(op + '#', k, 2); + } + } + if (y.is_int_const()) { + int k = is_pos_pow2(y.int_const); + if (k > 0) { + y.unused(); + std::string op = "LSHIFT#DIV"; + if (c) { + op += c; + } + return exec_arg_op(op, k, 2); + } + } + if (x.is_int_const()) { + int k = is_pos_pow2(x.int_const); + if (k > 0) { + x.unused(); + std::string op = "LSHIFT#DIV"; + if (c) { + op += c; + } + return exec_arg_op(op, k, 2); + } + } + std::string op = "MULDIV"; + if (c) { + op += c; + } + return exec_op(op, 3); +} + +int compute_compare(td::RefInt256 x, td::RefInt256 y, int mode) { + int s = td::cmp(x, y); + if (mode == 7) { + return s; + } else { + return -((mode >> (1 - s)) & 1); + } +} + +// return value: +// 4 -> constant 1 +// 2 -> constant 0 +// 1 -> constant -1 +// 3 -> 0 or -1 +int compute_compare(const VarDescr& x, const VarDescr& y, int mode) { + switch (mode) { + case 1: // > + return x.always_greater(y) ? 1 : (x.always_leq(y) ? 2 : 3); + case 2: // = + return x.always_equal(y) ? 1 : (x.always_neq(y) ? 2 : 3); + case 3: // >= + return x.always_geq(y) ? 1 : (x.always_less(y) ? 2 : 3); + case 4: // < + return x.always_less(y) ? 1 : (x.always_geq(y) ? 2 : 3); + case 5: // <> + return x.always_neq(y) ? 1 : (x.always_equal(y) ? 2 : 3); + case 6: // <= + return x.always_leq(y) ? 1 : (x.always_greater(y) ? 2 : 3); + case 7: // <=> + return x.always_less(y) + ? 1 + : (x.always_equal(y) + ? 2 + : (x.always_greater(y) + ? 4 + : (x.always_leq(y) ? 3 : (x.always_geq(y) ? 6 : (x.always_neq(y) ? 5 : 7))))); + default: + return 7; + } +} + +AsmOp compile_cmp_int(std::vector& res, std::vector& args, int mode) { + tolk_assert(mode >= 1 && mode <= 7); + tolk_assert(res.size() == 1 && args.size() == 2); + VarDescr &r = res[0], &x = args[0], &y = args[1]; + if (x.is_int_const() && y.is_int_const()) { + int v = compute_compare(x.int_const, y.int_const, mode); + r.set_const(v); + x.unused(); + y.unused(); + return mode == 7 ? push_const(r.int_const) : AsmOp::BoolConst(v != 0); + } + int v = compute_compare(x, y, mode); + // std::cerr << "compute_compare(" << x << ", " << y << ", " << mode << ") = " << v << std::endl; + tolk_assert(v); + if (!(v & (v - 1))) { + r.set_const(v - (v >> 2) - 2); + x.unused(); + y.unused(); + return mode == 7 ? push_const(r.int_const) : AsmOp::BoolConst(v & 1); + } + r.val = ~0; + if (v & 1) { + r.val &= VarDescr::ConstTrue; + } + if (v & 2) { + r.val &= VarDescr::ConstZero; + } + if (v & 4) { + r.val &= VarDescr::ConstOne; + } + // std::cerr << "result: " << r << std::endl; + static const char* cmp_int_names[] = {"", "GTINT", "EQINT", "GTINT", "LESSINT", "NEQINT", "LESSINT"}; + static const char* cmp_names[] = {"", "GREATER", "EQUAL", "GEQ", "LESS", "NEQ", "LEQ", "CMP"}; + static int cmp_int_delta[] = {0, 0, 0, -1, 0, 0, 1}; + if (mode != 7) { + if (y.is_int_const() && y.int_const >= -128 && y.int_const <= 127) { + y.unused(); + return exec_arg_op(cmp_int_names[mode], y.int_const + cmp_int_delta[mode], 1); + } + if (x.is_int_const() && x.int_const >= -128 && x.int_const <= 127) { + x.unused(); + mode = ((mode & 4) >> 2) | (mode & 2) | ((mode & 1) << 2); + return exec_arg_op(cmp_int_names[mode], x.int_const + cmp_int_delta[mode], 1); + } + } + return exec_op(cmp_names[mode], 2); +} + +AsmOp compile_throw(std::vector& res, std::vector& args, const SrcLocation&) { + tolk_assert(res.empty() && args.size() == 1); + VarDescr& x = args[0]; + if (x.is_int_const() && x.int_const->unsigned_fits_bits(11)) { + x.unused(); + return exec_arg_op("THROW", x.int_const, 0, 0); + } else { + return exec_op("THROWANY", 1, 0); + } +} + +AsmOp compile_cond_throw(std::vector& res, std::vector& args, bool mode) { + tolk_assert(res.empty() && args.size() == 2); + VarDescr &x = args[0], &y = args[1]; + std::string suff = (mode ? "IF" : "IFNOT"); + bool skip_cond = false; + if (y.always_true() || y.always_false()) { + y.unused(); + skip_cond = true; + if (y.always_true() != mode) { + x.unused(); + return AsmOp::Nop(); + } + } + if (x.is_int_const() && x.int_const->unsigned_fits_bits(11)) { + x.unused(); + return skip_cond ? exec_arg_op("THROW", x.int_const, 0, 0) : exec_arg_op("THROW"s + suff, x.int_const, 1, 0); + } else { + return skip_cond ? exec_op("THROWANY", 1, 0) : exec_op("THROWANY"s + suff, 2, 0); + } +} + +AsmOp compile_throw_arg(std::vector& res, std::vector& args, const SrcLocation&) { + tolk_assert(res.empty() && args.size() == 2); + VarDescr &x = args[1]; + if (x.is_int_const() && x.int_const->unsigned_fits_bits(11)) { + x.unused(); + return exec_arg_op("THROWARG", x.int_const, 1, 0); + } else { + return exec_op("THROWARGANY", 2, 0); + } +} + +AsmOp compile_cond_throw_arg(std::vector& res, std::vector& args, bool mode) { + tolk_assert(res.empty() && args.size() == 3); + VarDescr &x = args[1], &y = args[2]; + std::string suff = (mode ? "IF" : "IFNOT"); + bool skip_cond = false; + if (y.always_true() || y.always_false()) { + y.unused(); + skip_cond = true; + if (y.always_true() != mode) { + x.unused(); + return AsmOp::Nop(); + } + } + if (x.is_int_const() && x.int_const->unsigned_fits_bits(11)) { + x.unused(); + return skip_cond ? exec_arg_op("THROWARG", x.int_const, 1, 0) : exec_arg_op("THROWARG"s + suff, x.int_const, 2, 0); + } else { + return skip_cond ? exec_op("THROWARGANY", 2, 0) : exec_op("THROWARGANY"s + suff, 3, 0); + } +} + +AsmOp compile_bool_const(std::vector& res, std::vector& args, bool val) { + tolk_assert(res.size() == 1 && args.empty()); + VarDescr& r = res[0]; + r.set_const(val ? -1 : 0); + return AsmOp::Const(val ? "TRUE" : "FALSE"); +} + +// (slice, int) load_int(slice s, int len) asm(s len -> 1 0) "LDIX"; +// (slice, int) load_uint(slice s, int len) asm( -> 1 0) "LDUX"; +// int preload_int(slice s, int len) asm "PLDIX"; +// int preload_uint(slice s, int len) asm "PLDUX"; +AsmOp compile_fetch_int(std::vector& res, std::vector& args, bool fetch, bool sgnd) { + tolk_assert(args.size() == 2 && res.size() == 1 + (unsigned)fetch); + auto &y = args[1], &r = res.back(); + r.val = (sgnd ? VarDescr::FiniteInt : VarDescr::FiniteUInt); + int v = -1; + if (y.is_int_const() && y.int_const >= 0 && y.int_const <= 256) { + v = (int)y.int_const->to_long(); + if (!v) { + r.val = VarDescr::ConstZero; + } + if (v == 1) { + r.val = (sgnd ? VarDescr::ValBool : VarDescr::ValBit); + } + if (v > 0) { + y.unused(); + return exec_arg_op((fetch ? "LD"s : "PLD"s) + (sgnd ? 'I' : 'U'), v, 1, 1 + (unsigned)fetch); + } + } + return exec_op((fetch ? "LD"s : "PLD"s) + (sgnd ? "IX" : "UX"), 2, 1 + (unsigned)fetch); +} + +// builder store_uint(builder b, int x, int len) asm(x b len) "STUX"; +// builder store_int(builder b, int x, int len) asm(x b len) "STIX"; +AsmOp compile_store_int(std::vector& res, std::vector& args, bool sgnd) { + tolk_assert(args.size() == 3 && res.size() == 1); + auto& z = args[2]; + if (z.is_int_const() && z.int_const > 0 && z.int_const <= 256) { + z.unused(); + return exec_arg_op("ST"s + (sgnd ? 'I' : 'U'), z.int_const, 2, 1); + } + return exec_op("ST"s + (sgnd ? "IX" : "UX"), 3, 1); +} + +AsmOp compile_fetch_slice(std::vector& res, std::vector& args, bool fetch) { + tolk_assert(args.size() == 2 && res.size() == 1 + (unsigned)fetch); + auto& y = args[1]; + int v = -1; + if (y.is_int_const() && y.int_const > 0 && y.int_const <= 256) { + v = (int)y.int_const->to_long(); + if (v > 0) { + y.unused(); + return exec_arg_op(fetch ? "LDSLICE" : "PLDSLICE", v, 1, 1 + (unsigned)fetch); + } + } + return exec_op(fetch ? "LDSLICEX" : "PLDSLICEX", 2, 1 + (unsigned)fetch); +} + +// _at(tuple t, int index) asm "INDEXVAR"; +AsmOp compile_tuple_at(std::vector& res, std::vector& args, const SrcLocation&) { + tolk_assert(args.size() == 2 && res.size() == 1); + auto& y = args[1]; + if (y.is_int_const() && y.int_const >= 0 && y.int_const < 16) { + y.unused(); + return exec_arg_op("INDEX", y.int_const, 1, 1); + } + return exec_op("INDEXVAR", 2, 1); +} + +// int null?(X arg) +AsmOp compile_is_null(std::vector& res, std::vector& args, const SrcLocation&) { + tolk_assert(args.size() == 1 && res.size() == 1); + auto &x = args[0], &r = res[0]; + if (x.always_null() || x.always_not_null()) { + x.unused(); + r.set_const(x.always_null() ? -1 : 0); + return push_const(r.int_const); + } + res[0].val = VarDescr::ValBool; + return exec_op("ISNULL", 1, 1); +} + + +void define_builtins() { + using namespace std::placeholders; + auto Unit = TypeExpr::new_unit(); + auto Int = TypeExpr::new_atomic(_Int); + auto Cell = TypeExpr::new_atomic(_Cell); + auto Slice = TypeExpr::new_atomic(_Slice); + auto Builder = TypeExpr::new_atomic(_Builder); + // auto Null = TypeExpr::new_atomic(_Null); + auto Tuple = TypeExpr::new_atomic(_Tuple); + auto Int2 = TypeExpr::new_tensor({Int, Int}); + auto Int3 = TypeExpr::new_tensor({Int, Int, Int}); + auto TupleInt = TypeExpr::new_tensor({Tuple, Int}); + auto SliceInt = TypeExpr::new_tensor({Slice, Int}); + auto X = TypeExpr::new_var(); + auto Y = TypeExpr::new_var(); + auto Z = TypeExpr::new_var(); + auto XY = TypeExpr::new_tensor({X, Y}); + auto arith_bin_op = TypeExpr::new_map(Int2, Int); + auto arith_un_op = TypeExpr::new_map(Int, Int); + auto impure_bin_op = TypeExpr::new_map(Int2, Unit); + auto impure_un_op = TypeExpr::new_map(Int, Unit); + auto fetch_int_op = TypeExpr::new_map(SliceInt, SliceInt); + auto prefetch_int_op = TypeExpr::new_map(SliceInt, Int); + auto store_int_op = TypeExpr::new_map(TypeExpr::new_tensor({Builder, Int, Int}), Builder); + auto store_int_method = + TypeExpr::new_map(TypeExpr::new_tensor({Builder, Int, Int}), TypeExpr::new_tensor({Builder, Unit})); + auto fetch_slice_op = TypeExpr::new_map(SliceInt, TypeExpr::new_tensor({Slice, Slice})); + auto prefetch_slice_op = TypeExpr::new_map(SliceInt, Slice); + //auto arith_null_op = TypeExpr::new_map(TypeExpr::new_unit(), Int); + auto throw_arg_op = TypeExpr::new_forall({X}, TypeExpr::new_map(TypeExpr::new_tensor({X, Int}), Unit)); + auto cond_throw_arg_op = TypeExpr::new_forall({X}, TypeExpr::new_map(TypeExpr::new_tensor({X, Int, Int}), Unit)); + define_builtin_func("_+_", arith_bin_op, compile_add); + define_builtin_func("_-_", arith_bin_op, compile_sub); + define_builtin_func("-_", arith_un_op, compile_negate); + define_builtin_func("_*_", arith_bin_op, compile_mul); + define_builtin_func("_/_", arith_bin_op, std::bind(compile_div, _1, _2, _3, -1)); + define_builtin_func("_~/_", arith_bin_op, std::bind(compile_div, _1, _2, _3, 0)); + define_builtin_func("_^/_", arith_bin_op, std::bind(compile_div, _1, _2, _3, 1)); + define_builtin_func("_%_", arith_bin_op, std::bind(compile_mod, _1, _2, _3, -1)); + define_builtin_func("_~%_", arith_bin_op, std::bind(compile_mod, _1, _2, _3, 0)); + define_builtin_func("_^%_", arith_bin_op, std::bind(compile_mod, _1, _2, _3, 1)); + define_builtin_func("_/%_", TypeExpr::new_map(Int2, Int2), AsmOp::Custom("DIVMOD", 2, 2)); + define_builtin_func("divmod", TypeExpr::new_map(Int2, Int2), AsmOp::Custom("DIVMOD", 2, 2)); + define_builtin_func("~divmod", TypeExpr::new_map(Int2, Int2), AsmOp::Custom("DIVMOD", 2, 2)); + define_builtin_func("moddiv", TypeExpr::new_map(Int2, Int2), AsmOp::Custom("DIVMOD", 2, 2), {}, {1, 0}); + define_builtin_func("~moddiv", TypeExpr::new_map(Int2, Int2), AsmOp::Custom("DIVMOD", 2, 2), {}, {1, 0}); + define_builtin_func("_<<_", arith_bin_op, compile_lshift); + define_builtin_func("_>>_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, -1)); + define_builtin_func("_~>>_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, 0)); + define_builtin_func("_^>>_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, 1)); + define_builtin_func("_&_", arith_bin_op, compile_and); + define_builtin_func("_|_", arith_bin_op, compile_or); + define_builtin_func("_^_", arith_bin_op, compile_xor); + define_builtin_func("~_", arith_un_op, compile_not); + define_builtin_func("^_+=_", arith_bin_op, compile_add); + define_builtin_func("^_-=_", arith_bin_op, compile_sub); + define_builtin_func("^_*=_", arith_bin_op, compile_mul); + define_builtin_func("^_/=_", arith_bin_op, std::bind(compile_div, _1, _2, _3, -1)); + define_builtin_func("^_~/=_", arith_bin_op, std::bind(compile_div, _1, _2, _3, 0)); + define_builtin_func("^_^/=_", arith_bin_op, std::bind(compile_div, _1, _2, _3, 1)); + define_builtin_func("^_%=_", arith_bin_op, std::bind(compile_mod, _1, _2, _3, -1)); + define_builtin_func("^_~%=_", arith_bin_op, std::bind(compile_mod, _1, _2, _3, 0)); + define_builtin_func("^_^%=_", arith_bin_op, std::bind(compile_mod, _1, _2, _3, 1)); + define_builtin_func("^_<<=_", arith_bin_op, compile_lshift); + define_builtin_func("^_>>=_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, -1)); + define_builtin_func("^_~>>=_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, 0)); + define_builtin_func("^_^>>=_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, 1)); + define_builtin_func("^_&=_", arith_bin_op, compile_and); + define_builtin_func("^_|=_", arith_bin_op, compile_or); + define_builtin_func("^_^=_", arith_bin_op, compile_xor); + define_builtin_func("muldiv", TypeExpr::new_map(Int3, Int), std::bind(compile_muldiv, _1, _2, _3, -1)); + define_builtin_func("muldivr", TypeExpr::new_map(Int3, Int), std::bind(compile_muldiv, _1, _2, _3, 0)); + define_builtin_func("muldivc", TypeExpr::new_map(Int3, Int), std::bind(compile_muldiv, _1, _2, _3, 1)); + define_builtin_func("muldivmod", TypeExpr::new_map(Int3, Int2), AsmOp::Custom("MULDIVMOD", 3, 2)); + define_builtin_func("_==_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 2)); + define_builtin_func("_!=_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 5)); + define_builtin_func("_<_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 4)); + define_builtin_func("_>_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 1)); + define_builtin_func("_<=_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 6)); + define_builtin_func("_>=_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 3)); + define_builtin_func("_<=>_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 7)); + define_builtin_const("true", Int, /* AsmOp::Const("TRUE") */ std::bind(compile_bool_const, _1, _2, true)); + define_builtin_const("false", Int, /* AsmOp::Const("FALSE") */ std::bind(compile_bool_const, _1, _2, false)); + // define_builtin_func("null", Null, AsmOp::Const("PUSHNULL")); + define_builtin_const("nil", Tuple, AsmOp::Const("PUSHNULL")); + define_builtin_const("Nil", Tuple, AsmOp::Const("NIL")); + define_builtin_func("null?", TypeExpr::new_forall({X}, TypeExpr::new_map(X, Int)), compile_is_null); + define_builtin_func("throw", impure_un_op, compile_throw, true); + define_builtin_func("throw_if", impure_bin_op, std::bind(compile_cond_throw, _1, _2, true), true); + define_builtin_func("throw_unless", impure_bin_op, std::bind(compile_cond_throw, _1, _2, false), true); + define_builtin_func("throw_arg", throw_arg_op, compile_throw_arg, true); + define_builtin_func("throw_arg_if", cond_throw_arg_op, std::bind(compile_cond_throw_arg, _1, _2, true), true); + define_builtin_func("throw_arg_unless", cond_throw_arg_op, std::bind(compile_cond_throw_arg, _1, _2, false), true); + define_builtin_func("load_int", fetch_int_op, std::bind(compile_fetch_int, _1, _2, true, true), {}, {1, 0}); + define_builtin_func("load_uint", fetch_int_op, std::bind(compile_fetch_int, _1, _2, true, false), {}, {1, 0}); + define_builtin_func("preload_int", prefetch_int_op, std::bind(compile_fetch_int, _1, _2, false, true)); + define_builtin_func("preload_uint", prefetch_int_op, std::bind(compile_fetch_int, _1, _2, false, false)); + define_builtin_func("store_int", store_int_op, std::bind(compile_store_int, _1, _2, true), {1, 0, 2}); + define_builtin_func("store_uint", store_int_op, std::bind(compile_store_int, _1, _2, false), {1, 0, 2}); + define_builtin_func("~store_int", store_int_method, std::bind(compile_store_int, _1, _2, true), {1, 0, 2}); + define_builtin_func("~store_uint", store_int_method, std::bind(compile_store_int, _1, _2, false), {1, 0, 2}); + define_builtin_func("load_bits", fetch_slice_op, std::bind(compile_fetch_slice, _1, _2, true), {}, {1, 0}); + define_builtin_func("preload_bits", prefetch_slice_op, std::bind(compile_fetch_slice, _1, _2, false)); + define_builtin_func("at", TypeExpr::new_forall({X}, TypeExpr::new_map(TupleInt, X)), compile_tuple_at); + define_builtin_func("touch", TypeExpr::new_forall({X}, TypeExpr::new_map(X, X)), AsmOp::Nop()); + define_builtin_func("~touch", TypeExpr::new_forall({X}, TypeExpr::new_map(X, TypeExpr::new_tensor({X, Unit}))), + AsmOp::Nop()); + define_builtin_func("~dump", TypeExpr::new_forall({X}, TypeExpr::new_map(X, TypeExpr::new_tensor({X, Unit}))), + AsmOp::Custom("s0 DUMP", 1, 1), true); + define_builtin_func("~strdump", TypeExpr::new_forall({X}, TypeExpr::new_map(X, TypeExpr::new_tensor({X, Unit}))), + AsmOp::Custom("STRDUMP", 1, 1), true); +} + +} // namespace tolk diff --git a/tolk/codegen.cpp b/tolk/codegen.cpp new file mode 100644 index 000000000..504d0b210 --- /dev/null +++ b/tolk/codegen.cpp @@ -0,0 +1,910 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "tolk.h" + +namespace tolk { + +/* + * + * GENERATE TVM STACK CODE + * + */ + +StackLayout Stack::vars() const { + StackLayout res; + res.reserve(s.size()); + for (auto x : s) { + res.push_back(x.first); + } + return res; +} + +int Stack::find(var_idx_t var, int from) const { + for (int i = from; i < depth(); i++) { + if (at(i).first == var) { + return i; + } + } + return -1; +} + +// finds var in [from .. to) +int Stack::find(var_idx_t var, int from, int to) const { + for (int i = from; i < depth() && i < to; i++) { + if (at(i).first == var) { + return i; + } + } + return -1; +} + +// finds var outside [from .. to) +int Stack::find_outside(var_idx_t var, int from, int to) const { + from = std::max(from, 0); + if (from >= to) { + return find(var); + } else { + int t = find(var, 0, from); + return t >= 0 ? t : find(var, to); + } +} + +int Stack::find_const(const_idx_t cst, int from) const { + for (int i = from; i < depth(); i++) { + if (at(i).second == cst) { + return i; + } + } + return -1; +} + +void Stack::forget_const() { + for (auto& vc : s) { + if (vc.second != not_const) { + vc.second = not_const; + } + } +} + +void Stack::issue_pop(int i) { + validate(i); + if (output_enabled()) { + o << AsmOp::Pop(i); + } + at(i) = get(0); + s.pop_back(); + modified(); +} + +void Stack::issue_push(int i) { + validate(i); + if (output_enabled()) { + o << AsmOp::Push(i); + } + s.push_back(get(i)); + modified(); +} + +void Stack::issue_xchg(int i, int j) { + validate(i); + validate(j); + if (i != j && get(i) != get(j)) { + if (output_enabled()) { + o << AsmOp::Xchg(i, j); + } + std::swap(at(i), at(j)); + modified(); + } +} + +int Stack::drop_vars_except(const VarDescrList& var_info, int excl_var) { + int dropped = 0, changes; + do { + changes = 0; + int n = depth(); + for (int i = 0; i < n; i++) { + var_idx_t idx = at(i).first; + if (((!var_info[idx] || var_info[idx]->is_unused()) && idx != excl_var) || find(idx, 0, i - 1) >= 0) { + // unneeded + issue_pop(i); + changes = 1; + break; + } + } + dropped += changes; + } while (changes); + return dropped; +} + +void Stack::show(int flags) { + std::ostringstream os; + for (auto i : s) { + os << ' '; + o.show_var_ext(os, i); + } + o << AsmOp::Comment(os.str()); + mode |= _Shown; +} + +void Stack::forget_var(var_idx_t idx) { + for (auto& x : s) { + if (x.first == idx) { + x = std::make_pair(_Garbage, not_const); + modified(); + } + } +} + +void Stack::push_new_var(var_idx_t idx) { + forget_var(idx); + s.emplace_back(idx, not_const); + modified(); +} + +void Stack::push_new_const(var_idx_t idx, const_idx_t cidx) { + forget_var(idx); + s.emplace_back(idx, cidx); + modified(); +} + +void Stack::assign_var(var_idx_t new_idx, var_idx_t old_idx) { + int i = find(old_idx); + tolk_assert(i >= 0 && "variable not found in stack"); + if (new_idx != old_idx) { + at(i).first = new_idx; + modified(); + } +} + +void Stack::do_copy_var(var_idx_t new_idx, var_idx_t old_idx) { + int i = find(old_idx); + tolk_assert(i >= 0 && "variable not found in stack"); + if (find(old_idx, i + 1) < 0) { + issue_push(i); + tolk_assert(at(0).first == old_idx); + } + assign_var(new_idx, old_idx); +} + +void Stack::enforce_state(const StackLayout& req_stack) { + int k = (int)req_stack.size(); + for (int i = 0; i < k; i++) { + var_idx_t x = req_stack[i]; + if (i < depth() && s[i].first == x) { + continue; + } + while (depth() > 0 && std::find(req_stack.cbegin(), req_stack.cend(), get(0).first) == req_stack.cend()) { + // current TOS entry is unused in req_stack, drop it + issue_pop(0); + } + int j = find(x); + if (j >= depth() - i) { + issue_push(j); + j = 0; + } + issue_xchg(j, depth() - i - 1); + tolk_assert(s[i].first == x); + } + while (depth() > k) { + issue_pop(0); + } + tolk_assert(depth() == k); + for (int i = 0; i < k; i++) { + tolk_assert(s[i].first == req_stack[i]); + } +} + +void Stack::merge_const(const Stack& req_stack) { + tolk_assert(s.size() == req_stack.s.size()); + for (std::size_t i = 0; i < s.size(); i++) { + tolk_assert(s[i].first == req_stack.s[i].first); + if (s[i].second != req_stack.s[i].second) { + s[i].second = not_const; + } + } +} + +void Stack::merge_state(const Stack& req_stack) { + enforce_state(req_stack.vars()); + merge_const(req_stack); +} + +void Stack::rearrange_top(const StackLayout& top, std::vector last) { + while (last.size() < top.size()) { + last.push_back(false); + } + int k = (int)top.size(); + for (int i = 0; i < k; i++) { + for (int j = i + 1; j < k; j++) { + if (top[i] == top[j]) { + last[i] = false; + break; + } + } + } + int ss = 0; + for (int i = 0; i < k; i++) { + if (last[i]) { + ++ss; + } + } + for (int i = 0; i < k; i++) { + var_idx_t x = top[i]; + // find s(j) containing x with j not in [ss, ss+i) + int j = find_outside(x, ss, ss + i); + if (last[i]) { + // rearrange x to be at s(ss-1) + issue_xchg(--ss, j); + tolk_assert(get(ss).first == x); + } else { + // create a new copy of x + issue_push(j); + issue_xchg(0, ss); + tolk_assert(get(ss).first == x); + } + } + tolk_assert(!ss); +} + +void Stack::rearrange_top(var_idx_t top, bool last) { + int i = find(top); + if (last) { + issue_xchg(0, i); + } else { + issue_push(i); + } + tolk_assert(get(0).first == top); +} + +bool Op::generate_code_step(Stack& stack) { + stack.opt_show(); + stack.drop_vars_except(var_info); + stack.opt_show(); + bool inline_func = stack.mode & Stack::_InlineFunc; + switch (cl) { + case _Nop: + case _Import: + return true; + case _Return: { + stack.enforce_state(left); + if (stack.o.retalt_ && (stack.mode & Stack::_NeedRetAlt)) { + stack.o << "RETALT"; + } + stack.opt_show(); + return false; + } + case _IntConst: { + auto p = next->var_info[left[0]]; + if (!p || p->is_unused()) { + return true; + } + auto cidx = stack.o.register_const(int_const); + int i = stack.find_const(cidx); + if (i < 0) { + stack.o << push_const(int_const); + stack.push_new_const(left[0], cidx); + } else { + tolk_assert(stack.at(i).second == cidx); + stack.do_copy_var(left[0], stack[i]); + } + return true; + } + case _SliceConst: { + auto p = next->var_info[left[0]]; + if (!p || p->is_unused()) { + return true; + } + stack.o << AsmOp::Const("x{" + str_const + "} PUSHSLICE"); + stack.push_new_var(left[0]); + return true; + } + case _GlobVar: + if (dynamic_cast(fun_ref->value)) { + bool used = false; + for (auto i : left) { + auto p = next->var_info[i]; + if (p && !p->is_unused()) { + used = true; + } + } + if (!used || disabled()) { + return true; + } + std::string name = symbols.get_name(fun_ref->sym_idx); + stack.o << AsmOp::Custom(name + " GETGLOB", 0, 1); + if (left.size() != 1) { + tolk_assert(left.size() <= 15); + stack.o << AsmOp::UnTuple((int)left.size()); + } + for (auto i : left) { + stack.push_new_var(i); + } + return true; + } else { + tolk_assert(left.size() == 1); + auto p = next->var_info[left[0]]; + if (!p || p->is_unused() || disabled()) { + return true; + } + stack.o << "CONT:<{"; + stack.o.indent(); + auto func = dynamic_cast(fun_ref->value); + if (func) { + // TODO: create and compile a true lambda instead of this (so that arg_order and ret_order would work correctly) + std::vector args0, res; + TypeExpr::remove_indirect(func->sym_type); + tolk_assert(func->get_type()->is_map()); + auto wr = func->get_type()->args.at(0)->get_width(); + auto wl = func->get_type()->args.at(1)->get_width(); + tolk_assert(wl >= 0 && wr >= 0); + for (int i = 0; i < wl; i++) { + res.emplace_back(0); + } + for (int i = 0; i < wr; i++) { + args0.emplace_back(0); + } + func->compile(stack.o, res, args0, where); // compile res := f (args0) + } else { + std::string name = symbols.get_name(fun_ref->sym_idx); + stack.o << AsmOp::Custom(name + " CALLDICT", (int)right.size(), (int)left.size()); + } + stack.o.undent(); + stack.o << "}>"; + stack.push_new_var(left.at(0)); + return true; + } + case _Let: { + tolk_assert(left.size() == right.size()); + int i = 0; + std::vector active; + active.reserve(left.size()); + for (std::size_t k = 0; k < left.size(); k++) { + var_idx_t y = left[k]; // "y" = "x" + auto p = next->var_info[y]; + active.push_back(p && !p->is_unused()); + } + for (std::size_t k = 0; k < left.size(); k++) { + if (!active[k]) { + continue; + } + var_idx_t x = right[k]; // "y" = "x" + bool is_last = true; + for (std::size_t l = k + 1; l < right.size(); l++) { + if (right[l] == x && active[l]) { + is_last = false; + } + } + if (is_last) { + auto info = var_info[x]; + is_last = (info && info->is_last()); + } + if (is_last) { + stack.assign_var(--i, x); + } else { + stack.do_copy_var(--i, x); + } + } + i = 0; + for (std::size_t k = 0; k < left.size(); k++) { + if (active[k]) { + stack.assign_var(left[k], --i); + } + } + return true; + } + case _Tuple: + case _UnTuple: { + if (disabled()) { + return true; + } + std::vector last; + for (var_idx_t x : right) { + last.push_back(var_info[x] && var_info[x]->is_last()); + } + stack.rearrange_top(right, std::move(last)); + stack.opt_show(); + int k = (int)stack.depth() - (int)right.size(); + tolk_assert(k >= 0); + if (cl == _Tuple) { + stack.o << AsmOp::Tuple((int)right.size()); + tolk_assert(left.size() == 1); + } else { + stack.o << AsmOp::UnTuple((int)left.size()); + tolk_assert(right.size() == 1); + } + stack.s.resize(k); + for (int i = 0; i < (int)left.size(); i++) { + stack.push_new_var(left.at(i)); + } + return true; + } + case _Call: + case _CallInd: { + if (disabled()) { + return true; + } + SymValFunc* func = (fun_ref ? dynamic_cast(fun_ref->value) : nullptr); + auto arg_order = (func ? func->get_arg_order() : nullptr); + auto ret_order = (func ? func->get_ret_order() : nullptr); + tolk_assert(!arg_order || arg_order->size() == right.size()); + tolk_assert(!ret_order || ret_order->size() == left.size()); + std::vector right1; + if (args.size()) { + tolk_assert(args.size() == right.size()); + for (int i = 0; i < (int)right.size(); i++) { + int j = arg_order ? arg_order->at(i) : i; + const VarDescr& arg = args.at(j); + if (!arg.is_unused()) { + tolk_assert(var_info[arg.idx] && !var_info[arg.idx]->is_unused()); + right1.push_back(arg.idx); + } + } + } else if (arg_order) { + for (int i = 0; i < (int)right.size(); i++) { + right1.push_back(right.at(arg_order->at(i))); + } + } else { + right1 = right; + } + std::vector last; + for (var_idx_t x : right1) { + last.push_back(var_info[x] && var_info[x]->is_last()); + } + stack.rearrange_top(right1, std::move(last)); + stack.opt_show(); + int k = (int)stack.depth() - (int)right1.size(); + tolk_assert(k >= 0); + for (int i = 0; i < (int)right1.size(); i++) { + if (stack.s[k + i].first != right1[i]) { + std::cerr << stack.o; + } + tolk_assert(stack.s[k + i].first == right1[i]); + } + auto exec_callxargs = [&](int args, int ret) { + if (args <= 15 && ret <= 15) { + stack.o << exec_arg2_op("CALLXARGS", args, ret, args + 1, ret); + } else { + tolk_assert(args <= 254 && ret <= 254); + stack.o << AsmOp::Const(PSTRING() << args << " PUSHINT"); + stack.o << AsmOp::Const(PSTRING() << ret << " PUSHINT"); + stack.o << AsmOp::Custom("CALLXVARARGS", args + 3, ret); + } + }; + if (cl == _CallInd) { + exec_callxargs((int)right.size() - 1, (int)left.size()); + } else { + auto func = dynamic_cast(fun_ref->value); + if (func) { + std::vector res; + res.reserve(left.size()); + for (var_idx_t i : left) { + res.emplace_back(i); + } + func->compile(stack.o, res, args, where); // compile res := f (args) + } else { + auto fv = dynamic_cast(fun_ref->value); + std::string name = symbols.get_name(fun_ref->sym_idx); + bool is_inline = (fv && (fv->flags & 3)); + if (is_inline) { + stack.o << AsmOp::Custom(name + " INLINECALLDICT", (int)right.size(), (int)left.size()); + } else if (fv && fv->code && fv->code->require_callxargs) { + stack.o << AsmOp::Custom(name + (" PREPAREDICT"), 0, 2); + exec_callxargs((int)right.size() + 1, (int)left.size()); + } else { + stack.o << AsmOp::Custom(name + " CALLDICT", (int)right.size(), (int)left.size()); + } + } + } + stack.s.resize(k); + for (int i = 0; i < (int)left.size(); i++) { + int j = ret_order ? ret_order->at(i) : i; + stack.push_new_var(left.at(j)); + } + return true; + } + case _SetGlob: { + tolk_assert(fun_ref && dynamic_cast(fun_ref->value)); + std::vector last; + for (var_idx_t x : right) { + last.push_back(var_info[x] && var_info[x]->is_last()); + } + stack.rearrange_top(right, std::move(last)); + stack.opt_show(); + int k = (int)stack.depth() - (int)right.size(); + tolk_assert(k >= 0); + for (int i = 0; i < (int)right.size(); i++) { + if (stack.s[k + i].first != right[i]) { + std::cerr << stack.o; + } + tolk_assert(stack.s[k + i].first == right[i]); + } + if (right.size() > 1) { + stack.o << AsmOp::Tuple((int)right.size()); + } + if (!right.empty()) { + std::string name = symbols.get_name(fun_ref->sym_idx); + stack.o << AsmOp::Custom(name + " SETGLOB", 1, 0); + } + stack.s.resize(k); + return true; + } + case _If: { + if (block0->is_empty() && block1->is_empty()) { + return true; + } + if (!next->noreturn() && (block0->noreturn() != block1->noreturn())) { + stack.o.retalt_ = true; + } + var_idx_t x = left[0]; + stack.rearrange_top(x, var_info[x] && var_info[x]->is_last()); + tolk_assert(stack[0] == x); + stack.opt_show(); + stack.s.pop_back(); + stack.modified(); + if (inline_func && (block0->noreturn() || block1->noreturn())) { + bool is0 = block0->noreturn(); + Op* block_noreturn = is0 ? block0.get() : block1.get(); + Op* block_other = is0 ? block1.get() : block0.get(); + stack.mode &= ~Stack::_InlineFunc; + stack.o << (is0 ? "IF:<{" : "IFNOT:<{"); + stack.o.indent(); + Stack stack_copy{stack}; + block_noreturn->generate_code_all(stack_copy); + stack.o.undent(); + stack.o << "}>ELSE<{"; + stack.o.indent(); + block_other->generate_code_all(stack); + if (!block_other->noreturn()) { + next->generate_code_all(stack); + } + stack.o.undent(); + stack.o << "}>"; + return false; + } + if (block1->is_empty() || block0->is_empty()) { + bool is0 = block1->is_empty(); + Op* block = is0 ? block0.get() : block1.get(); + // if (left) block0; ... + // if (!left) block1; ... + if (block->noreturn()) { + stack.o << (is0 ? "IFJMP:<{" : "IFNOTJMP:<{"); + stack.o.indent(); + Stack stack_copy{stack}; + stack_copy.mode &= ~Stack::_InlineFunc; + stack_copy.mode |= next->noreturn() ? 0 : Stack::_NeedRetAlt; + block->generate_code_all(stack_copy); + stack.o.undent(); + stack.o << "}>"; + return true; + } + stack.o << (is0 ? "IF:<{" : "IFNOT:<{"); + stack.o.indent(); + Stack stack_copy{stack}, stack_target{stack}; + stack_target.disable_output(); + stack_target.drop_vars_except(next->var_info); + stack_copy.mode &= ~Stack::_InlineFunc; + block->generate_code_all(stack_copy); + stack_copy.drop_vars_except(var_info); + stack_copy.opt_show(); + if ((is0 && stack_copy == stack) || (!is0 && stack_copy.vars() == stack.vars())) { + stack.o.undent(); + stack.o << "}>"; + if (!is0) { + stack.merge_const(stack_copy); + } + return true; + } + // stack_copy.drop_vars_except(next->var_info); + stack_copy.enforce_state(stack_target.vars()); + stack_copy.opt_show(); + if (stack_copy.vars() == stack.vars()) { + stack.o.undent(); + stack.o << "}>"; + stack.merge_const(stack_copy); + return true; + } + stack.o.undent(); + stack.o << "}>ELSE<{"; + stack.o.indent(); + stack.merge_state(stack_copy); + stack.opt_show(); + stack.o.undent(); + stack.o << "}>"; + return true; + } + if (block0->noreturn() || block1->noreturn()) { + bool is0 = block0->noreturn(); + Op* block_noreturn = is0 ? block0.get() : block1.get(); + Op* block_other = is0 ? block1.get() : block0.get(); + stack.o << (is0 ? "IFJMP:<{" : "IFNOTJMP:<{"); + stack.o.indent(); + Stack stack_copy{stack}; + stack_copy.mode &= ~Stack::_InlineFunc; + stack_copy.mode |= (block_other->noreturn() || next->noreturn()) ? 0 : Stack::_NeedRetAlt; + block_noreturn->generate_code_all(stack_copy); + stack.o.undent(); + stack.o << "}>"; + block_other->generate_code_all(stack); + return !block_other->noreturn(); + } + stack.o << "IF:<{"; + stack.o.indent(); + Stack stack_copy{stack}; + stack_copy.mode &= ~Stack::_InlineFunc; + block0->generate_code_all(stack_copy); + stack_copy.drop_vars_except(next->var_info); + stack_copy.opt_show(); + stack.o.undent(); + stack.o << "}>ELSE<{"; + stack.o.indent(); + stack.mode &= ~Stack::_InlineFunc; + block1->generate_code_all(stack); + stack.merge_state(stack_copy); + stack.opt_show(); + stack.o.undent(); + stack.o << "}>"; + return true; + } + case _Repeat: { + var_idx_t x = left[0]; + //stack.drop_vars_except(block0->var_info, x); + stack.rearrange_top(x, var_info[x] && var_info[x]->is_last()); + tolk_assert(stack[0] == x); + stack.opt_show(); + stack.s.pop_back(); + stack.modified(); + if (block0->noreturn()) { + stack.o.retalt_ = true; + } + if (true || !next->is_empty()) { + stack.o << "REPEAT:<{"; + stack.o.indent(); + stack.forget_const(); + if (block0->noreturn()) { + Stack stack_copy{stack}; + StackLayout layout1 = stack.vars(); + stack_copy.mode &= ~Stack::_InlineFunc; + stack_copy.mode |= Stack::_NeedRetAlt; + block0->generate_code_all(stack_copy); + } else { + StackLayout layout1 = stack.vars(); + stack.mode &= ~Stack::_InlineFunc; + stack.mode |= Stack::_NeedRetAlt; + block0->generate_code_all(stack); + stack.enforce_state(std::move(layout1)); + stack.opt_show(); + } + stack.o.undent(); + stack.o << "}>"; + return true; + } else { + stack.o << "REPEATEND"; + stack.forget_const(); + StackLayout layout1 = stack.vars(); + block0->generate_code_all(stack); + stack.enforce_state(std::move(layout1)); + stack.opt_show(); + return false; + } + } + case _Again: { + stack.drop_vars_except(block0->var_info); + stack.opt_show(); + if (block0->noreturn()) { + stack.o.retalt_ = true; + } + if (!next->is_empty() || inline_func) { + stack.o << "AGAIN:<{"; + stack.o.indent(); + stack.forget_const(); + StackLayout layout1 = stack.vars(); + stack.mode &= ~Stack::_InlineFunc; + stack.mode |= Stack::_NeedRetAlt; + block0->generate_code_all(stack); + stack.enforce_state(std::move(layout1)); + stack.opt_show(); + stack.o.undent(); + stack.o << "}>"; + return true; + } else { + stack.o << "AGAINEND"; + stack.forget_const(); + StackLayout layout1 = stack.vars(); + block0->generate_code_all(stack); + stack.enforce_state(std::move(layout1)); + stack.opt_show(); + return false; + } + } + case _Until: { + // stack.drop_vars_except(block0->var_info); + // stack.opt_show(); + if (block0->noreturn()) { + stack.o.retalt_ = true; + } + if (true || !next->is_empty()) { + stack.o << "UNTIL:<{"; + stack.o.indent(); + stack.forget_const(); + auto layout1 = stack.vars(); + stack.mode &= ~Stack::_InlineFunc; + stack.mode |= Stack::_NeedRetAlt; + block0->generate_code_all(stack); + layout1.push_back(left[0]); + stack.enforce_state(std::move(layout1)); + stack.opt_show(); + stack.o.undent(); + stack.o << "}>"; + stack.s.pop_back(); + stack.modified(); + return true; + } else { + stack.o << "UNTILEND"; + stack.forget_const(); + StackLayout layout1 = stack.vars(); + block0->generate_code_all(stack); + layout1.push_back(left[0]); + stack.enforce_state(std::move(layout1)); + stack.opt_show(); + return false; + } + } + case _While: { + // while (block0 | left) block1; ...next + var_idx_t x = left[0]; + stack.drop_vars_except(block0->var_info); + stack.opt_show(); + StackLayout layout1 = stack.vars(); + bool next_empty = false && next->is_empty(); + if (block0->noreturn()) { + stack.o.retalt_ = true; + } + stack.o << "WHILE:<{"; + stack.o.indent(); + stack.forget_const(); + stack.mode &= ~Stack::_InlineFunc; + stack.mode |= Stack::_NeedRetAlt; + block0->generate_code_all(stack); + stack.rearrange_top(x, !next->var_info[x] && !block1->var_info[x]); + stack.opt_show(); + stack.s.pop_back(); + stack.modified(); + stack.o.undent(); + Stack stack_copy{stack}; + stack.o << (next_empty ? "}>DO:" : "}>DO<{"); + if (!next_empty) { + stack.o.indent(); + } + stack_copy.opt_show(); + block1->generate_code_all(stack_copy); + stack_copy.enforce_state(std::move(layout1)); + stack_copy.opt_show(); + if (!next_empty) { + stack.o.undent(); + stack.o << "}>"; + return true; + } else { + return false; + } + } + case _TryCatch: { + if (block0->is_empty() && block1->is_empty()) { + return true; + } + if (block0->noreturn() || block1->noreturn()) { + stack.o.retalt_ = true; + } + Stack catch_stack{stack.o}; + std::vector catch_vars; + std::vector catch_last; + for (const VarDescr& var : block1->var_info.list) { + if (stack.find(var.idx) >= 0) { + catch_vars.push_back(var.idx); + catch_last.push_back(!block0->var_info[var.idx]); + } + } + const size_t block_size = 255; + for (size_t begin = catch_vars.size(), end = begin; end > 0; end = begin) { + begin = end >= block_size ? end - block_size : 0; + for (size_t i = begin; i < end; ++i) { + catch_stack.push_new_var(catch_vars[i]); + } + } + catch_stack.push_new_var(left[0]); + catch_stack.push_new_var(left[1]); + stack.rearrange_top(catch_vars, catch_last); + stack.opt_show(); + stack.o << "c4 PUSH"; + stack.o << "c5 PUSH"; + stack.o << "c7 PUSH"; + stack.o << "<{"; + stack.o.indent(); + if (block1->noreturn()) { + catch_stack.mode |= Stack::_NeedRetAlt; + } + block1->generate_code_all(catch_stack); + catch_stack.drop_vars_except(next->var_info); + catch_stack.opt_show(); + stack.o.undent(); + stack.o << "}>CONT"; + stack.o << "c7 SETCONT"; + stack.o << "c5 SETCONT"; + stack.o << "c4 SETCONT"; + for (size_t begin = catch_vars.size(), end = begin; end > 0; end = begin) { + begin = end >= block_size ? end - block_size : 0; + stack.o << std::to_string(end - begin) + " PUSHINT"; + stack.o << "-1 PUSHINT"; + stack.o << "SETCONTVARARGS"; + } + stack.s.erase(stack.s.end() - catch_vars.size(), stack.s.end()); + stack.modified(); + stack.o << "<{"; + stack.o.indent(); + if (block0->noreturn()) { + stack.mode |= Stack::_NeedRetAlt; + } + block0->generate_code_all(stack); + if (block0->noreturn()) { + stack.s = std::move(catch_stack.s); + } else if (!block1->noreturn()) { + stack.merge_state(catch_stack); + } + stack.opt_show(); + stack.o.undent(); + stack.o << "}>CONT"; + stack.o << "c1 PUSH"; + stack.o << "COMPOSALT"; + stack.o << "SWAP"; + stack.o << "TRY"; + return true; + } + default: + std::cerr << "fatal: unknown operation \n"; + throw ParseError{where, "unknown operation in generate_code()"}; + } +} + +void Op::generate_code_all(Stack& stack) { + int saved_mode = stack.mode; + auto cont = generate_code_step(stack); + stack.mode = (stack.mode & ~Stack::_ModeSave) | (saved_mode & Stack::_ModeSave); + if (cont && next) { + next->generate_code_all(stack); + } +} + +void CodeBlob::generate_code(AsmOpList& out, int mode) { + Stack stack{out, mode}; + tolk_assert(ops && ops->cl == Op::_Import); + auto args = (int)ops->left.size(); + for (var_idx_t x : ops->left) { + stack.push_new_var(x); + } + ops->generate_code_all(stack); + stack.apply_wrappers(require_callxargs && (mode & Stack::_InlineAny) ? args : -1); + if (!(mode & Stack::_DisableOpt)) { + optimize_code(out); + } +} + +void CodeBlob::generate_code(std::ostream& os, int mode, int indent) { + AsmOpList out_list(indent, &vars); + generate_code(out_list, mode); + out_list.out(os, mode); +} + +} // namespace tolk diff --git a/tolk/gen-abscode.cpp b/tolk/gen-abscode.cpp new file mode 100644 index 000000000..bfce6f0ce --- /dev/null +++ b/tolk/gen-abscode.cpp @@ -0,0 +1,449 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include +#include "tolk.h" + +using namespace std::literals::string_literals; + +namespace tolk { + +/* + * + * EXPRESSIONS + * + */ + +Expr* Expr::copy() const { + auto res = new Expr{*this}; + for (auto& arg : res->args) { + arg = arg->copy(); + } + return res; +} + +Expr::Expr(int c, sym_idx_t name_idx, std::initializer_list _arglist) : cls(c), args(std::move(_arglist)) { + sym = lookup_symbol(name_idx); + if (!sym) { + } +} + +void Expr::chk_rvalue(const Lexem& lem) const { + if (!is_rvalue()) { + lem.error_at("rvalue expected before `", "`"); + } +} + +void Expr::chk_lvalue(const Lexem& lem) const { + if (!is_lvalue()) { + lem.error_at("lvalue expected before `", "`"); + } +} + +void Expr::chk_type(const Lexem& lem) const { + if (!is_type()) { + lem.error_at("type expression expected before `", "`"); + } +} + +bool Expr::deduce_type(const Lexem& lem) { + if (e_type) { + return true; + } + switch (cls) { + case _Apply: { + if (!sym) { + return false; + } + SymVal* sym_val = dynamic_cast(sym->value); + if (!sym_val || !sym_val->get_type()) { + return false; + } + std::vector arg_types; + for (const auto& arg : args) { + arg_types.push_back(arg->e_type); + } + TypeExpr* fun_type = TypeExpr::new_map(TypeExpr::new_tensor(arg_types), TypeExpr::new_hole()); + try { + unify(fun_type, sym_val->sym_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "cannot apply function " << sym->name() << " : " << sym_val->get_type() << " to arguments of type " + << fun_type->args[0] << ": " << ue; + lem.error(os.str()); + } + e_type = fun_type->args[1]; + TypeExpr::remove_indirect(e_type); + return true; + } + case _VarApply: { + tolk_assert(args.size() == 2); + TypeExpr* fun_type = TypeExpr::new_map(args[1]->e_type, TypeExpr::new_hole()); + try { + unify(fun_type, args[0]->e_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "cannot apply expression of type " << args[0]->e_type << " to an expression of type " << args[1]->e_type + << ": " << ue; + lem.error(os.str()); + } + e_type = fun_type->args[1]; + TypeExpr::remove_indirect(e_type); + return true; + } + case _Letop: { + tolk_assert(args.size() == 2); + try { + // std::cerr << "in assignment: " << args[0]->e_type << " from " << args[1]->e_type << std::endl; + unify(args[0]->e_type, args[1]->e_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "cannot assign an expression of type " << args[1]->e_type << " to a variable or pattern of type " + << args[0]->e_type << ": " << ue; + lem.error(os.str()); + } + e_type = args[0]->e_type; + TypeExpr::remove_indirect(e_type); + return true; + } + case _LetFirst: { + tolk_assert(args.size() == 2); + TypeExpr* rhs_type = TypeExpr::new_tensor({args[0]->e_type, TypeExpr::new_hole()}); + try { + // std::cerr << "in implicit assignment of a modifying method: " << rhs_type << " and " << args[1]->e_type << std::endl; + unify(rhs_type, args[1]->e_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "cannot implicitly assign an expression of type " << args[1]->e_type + << " to a variable or pattern of type " << rhs_type << " in modifying method `" << symbols.get_name(val) + << "` : " << ue; + lem.error(os.str()); + } + e_type = rhs_type->args[1]; + TypeExpr::remove_indirect(e_type); + // std::cerr << "result type is " << e_type << std::endl; + return true; + } + case _CondExpr: { + tolk_assert(args.size() == 3); + auto flag_type = TypeExpr::new_atomic(_Int); + try { + unify(args[0]->e_type, flag_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "condition in a conditional expression has non-integer type " << args[0]->e_type << ": " << ue; + lem.error(os.str()); + } + try { + unify(args[1]->e_type, args[2]->e_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "the two variants in a conditional expression have different types " << args[1]->e_type << " and " + << args[2]->e_type << " : " << ue; + lem.error(os.str()); + } + e_type = args[1]->e_type; + TypeExpr::remove_indirect(e_type); + return true; + } + } + return false; +} + +int Expr::define_new_vars(CodeBlob& code) { + switch (cls) { + case _Tensor: + case _MkTuple: + case _TypeApply: { + int res = 0; + for (const auto& x : args) { + res += x->define_new_vars(code); + } + return res; + } + case _Var: + if (val < 0) { + val = code.create_var(TmpVar::_Named, e_type, sym, &here); + return 1; + } + break; + case _Hole: + if (val < 0) { + val = code.create_var(TmpVar::_Tmp, e_type, nullptr, &here); + } + break; + } + return 0; +} + +int Expr::predefine_vars() { + switch (cls) { + case _Tensor: + case _MkTuple: + case _TypeApply: { + int res = 0; + for (const auto& x : args) { + res += x->predefine_vars(); + } + return res; + } + case _Var: + if (!sym) { + tolk_assert(val < 0 && here.defined()); + if (prohibited_var_names.count(symbols.get_name(~val))) { + throw ParseError{ + here, PSTRING() << "symbol `" << symbols.get_name(~val) << "` cannot be redefined as a variable"}; + } + sym = define_symbol(~val, false, here); + // std::cerr << "predefining variable " << symbols.get_name(~val) << std::endl; + if (!sym) { + throw ParseError{here, std::string{"redefined variable `"} + symbols.get_name(~val) + "`"}; + } + sym->value = new SymVal{SymVal::_Var, -1, e_type}; + return 1; + } + break; + } + return 0; +} + +var_idx_t Expr::new_tmp(CodeBlob& code) const { + return code.create_tmp_var(e_type, &here); +} + +void add_set_globs(CodeBlob& code, std::vector>& globs, const SrcLocation& here) { + for (const auto& p : globs) { + auto& op = code.emplace_back(here, Op::_SetGlob, std::vector{}, std::vector{ p.second }, p.first); + op.flags |= Op::_Impure; + } +} + +std::vector Expr::pre_compile_let(CodeBlob& code, Expr* lhs, Expr* rhs, const SrcLocation& here) { + while (lhs->is_type_apply()) { + lhs = lhs->args.at(0); + } + while (rhs->is_type_apply()) { + rhs = rhs->args.at(0); + } + if (lhs->is_mktuple()) { + if (rhs->is_mktuple()) { + return pre_compile_let(code, lhs->args.at(0), rhs->args.at(0), here); + } + auto right = rhs->pre_compile(code); + TypeExpr::remove_indirect(rhs->e_type); + auto unpacked_type = rhs->e_type->args.at(0); + std::vector tmp{code.create_tmp_var(unpacked_type, &rhs->here)}; + code.emplace_back(lhs->here, Op::_UnTuple, tmp, std::move(right)); + auto tvar = new Expr{_Var}; + tvar->set_val(tmp[0]); + tvar->set_location(rhs->here); + tvar->e_type = unpacked_type; + pre_compile_let(code, lhs->args.at(0), tvar, here); + return tmp; + } + auto right = rhs->pre_compile(code); + std::vector> globs; + auto left = lhs->pre_compile(code, &globs); + for (var_idx_t v : left) { + code.on_var_modification(v, here); + } + code.emplace_back(here, Op::_Let, std::move(left), right); + add_set_globs(code, globs, here); + return right; +} + +std::vector pre_compile_tensor(const std::vector args, CodeBlob &code, + std::vector> *lval_globs, + std::vector arg_order) { + if (arg_order.empty()) { + arg_order.resize(args.size()); + std::iota(arg_order.begin(), arg_order.end(), 0); + } + tolk_assert(args.size() == arg_order.size()); + std::vector> res_lists(args.size()); + + struct ModifiedVar { + size_t i, j; + Op* op; + }; + auto modified_vars = std::make_shared>(); + for (size_t i : arg_order) { + res_lists[i] = args[i]->pre_compile(code, lval_globs); + for (size_t j = 0; j < res_lists[i].size(); ++j) { + TmpVar& var = code.vars.at(res_lists[i][j]); + if (code.flags & CodeBlob::_AllowPostModification) { + if (!lval_globs && (var.cls & TmpVar::_Named)) { + Op *op = &code.emplace_back(nullptr, Op::_Let, std::vector(), std::vector()); + op->flags |= Op::_Disabled; + var.on_modification.push_back([modified_vars, i, j, op, done = false](const SrcLocation &here) mutable { + if (!done) { + done = true; + modified_vars->push_back({i, j, op}); + } + }); + } else { + var.on_modification.push_back([](const SrcLocation &) { + }); + } + } else { + var.on_modification.push_back([name = var.to_string()](const SrcLocation &here) { + throw ParseError{here, PSTRING() << "Modifying local variable " << name + << " after using it in the same expression"}; + }); + } + } + } + for (const auto& list : res_lists) { + for (var_idx_t v : list) { + tolk_assert(!code.vars.at(v).on_modification.empty()); + code.vars.at(v).on_modification.pop_back(); + } + } + for (const ModifiedVar &m : *modified_vars) { + var_idx_t& v = res_lists[m.i][m.j]; + var_idx_t v2 = code.create_tmp_var(code.vars[v].v_type, code.vars[v].where.get()); + m.op->left = {v2}; + m.op->right = {v}; + m.op->flags &= ~Op::_Disabled; + v = v2; + } + std::vector res; + for (const auto& list : res_lists) { + res.insert(res.end(), list.cbegin(), list.cend()); + } + return res; +} + +std::vector Expr::pre_compile(CodeBlob& code, std::vector>* lval_globs) const { + if (lval_globs && !(cls == _Tensor || cls == _Var || cls == _Hole || cls == _TypeApply || cls == _GlobVar)) { + std::cerr << "lvalue expression constructor is " << cls << std::endl; + throw Fatal{"cannot compile lvalue expression with unknown constructor"}; + } + switch (cls) { + case _Tensor: { + return pre_compile_tensor(args, code, lval_globs, {}); + } + case _Apply: { + tolk_assert(sym); + auto func = dynamic_cast(sym->value); + std::vector res; + if (func && func->arg_order.size() == args.size() && !(code.flags & CodeBlob::_ComputeAsmLtr)) { + //std::cerr << "!!! reordering " << args.size() << " arguments of " << sym->name() << std::endl; + res = pre_compile_tensor(args, code, lval_globs, func->arg_order); + } else { + res = pre_compile_tensor(args, code, lval_globs, {}); + } + auto rvect = new_tmp_vect(code); + auto& op = code.emplace_back(here, Op::_Call, rvect, std::move(res), sym); + if (flags & _IsImpure) { + op.flags |= Op::_Impure; + } + return rvect; + } + case _TypeApply: + return args[0]->pre_compile(code, lval_globs); + case _Var: + case _Hole: + if (val < 0) { + throw ParseError{here, "unexpected variable definition"}; + } + return {val}; + case _VarApply: + if (args[0]->cls == _Glob) { + auto res = args[1]->pre_compile(code); + auto rvect = new_tmp_vect(code); + auto& op = code.emplace_back(here, Op::_Call, rvect, std::move(res), args[0]->sym); + if (args[0]->flags & _IsImpure) { + op.flags |= Op::_Impure; + } + return rvect; + } else { + auto res = args[1]->pre_compile(code); + auto tfunc = args[0]->pre_compile(code); + if (tfunc.size() != 1) { + throw Fatal{"stack tuple used as a function"}; + } + res.push_back(tfunc[0]); + auto rvect = new_tmp_vect(code); + code.emplace_back(here, Op::_CallInd, rvect, std::move(res)); + return rvect; + } + case _Const: { + auto rvect = new_tmp_vect(code); + code.emplace_back(here, Op::_IntConst, rvect, intval); + return rvect; + } + case _Glob: + case _GlobVar: { + auto rvect = new_tmp_vect(code); + if (lval_globs) { + lval_globs->push_back({ sym, rvect[0] }); + return rvect; + } else { + code.emplace_back(here, Op::_GlobVar, rvect, std::vector{}, sym); + return rvect; + } + } + case _Letop: { + return pre_compile_let(code, args.at(0), args.at(1), here); + } + case _LetFirst: { + auto rvect = new_tmp_vect(code); + auto right = args[1]->pre_compile(code); + std::vector> local_globs; + if (!lval_globs) { + lval_globs = &local_globs; + } + auto left = args[0]->pre_compile(code, lval_globs); + left.push_back(rvect[0]); + for (var_idx_t v : left) { + code.on_var_modification(v, here); + } + code.emplace_back(here, Op::_Let, std::move(left), std::move(right)); + add_set_globs(code, local_globs, here); + return rvect; + } + case _MkTuple: { + auto left = new_tmp_vect(code); + auto right = args[0]->pre_compile(code); + code.emplace_back(here, Op::_Tuple, left, std::move(right)); + return left; + } + case _CondExpr: { + auto cond = args[0]->pre_compile(code); + tolk_assert(cond.size() == 1); + auto rvect = new_tmp_vect(code); + Op& if_op = code.emplace_back(here, Op::_If, cond); + code.push_set_cur(if_op.block0); + code.emplace_back(here, Op::_Let, rvect, args[1]->pre_compile(code)); + code.close_pop_cur(args[1]->here); + code.push_set_cur(if_op.block1); + code.emplace_back(here, Op::_Let, rvect, args[2]->pre_compile(code)); + code.close_pop_cur(args[2]->here); + return rvect; + } + case _SliceConst: { + auto rvect = new_tmp_vect(code); + code.emplace_back(here, Op::_SliceConst, rvect, strval); + return rvect; + } + default: + std::cerr << "expression constructor is " << cls << std::endl; + throw Fatal{"cannot compile expression with unknown constructor"}; + } +} + +} // namespace tolk diff --git a/tolk/keywords.cpp b/tolk/keywords.cpp new file mode 100644 index 000000000..db193debe --- /dev/null +++ b/tolk/keywords.cpp @@ -0,0 +1,126 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "tolk.h" + +namespace tolk { + +/* + * + * KEYWORD DEFINITION + * + */ + +void define_keywords() { + symbols.add_kw_char('+') + .add_kw_char('-') + .add_kw_char('*') + .add_kw_char('/') + .add_kw_char('%') + .add_kw_char('?') + .add_kw_char(':') + .add_kw_char(',') + .add_kw_char(';') + .add_kw_char('(') + .add_kw_char(')') + .add_kw_char('[') + .add_kw_char(']') + .add_kw_char('{') + .add_kw_char('}') + .add_kw_char('=') + .add_kw_char('_') + .add_kw_char('<') + .add_kw_char('>') + .add_kw_char('&') + .add_kw_char('|') + .add_kw_char('^') + .add_kw_char('~'); + + symbols.add_keyword("==", Keyword::_Eq) + .add_keyword("!=", Keyword::_Neq) + .add_keyword("<=", Keyword::_Leq) + .add_keyword(">=", Keyword::_Geq) + .add_keyword("<=>", Keyword::_Spaceship) + .add_keyword("<<", Keyword::_Lshift) + .add_keyword(">>", Keyword::_Rshift) + .add_keyword("~>>", Keyword::_RshiftR) + .add_keyword("^>>", Keyword::_RshiftC) + .add_keyword("~/", Keyword::_DivR) + .add_keyword("^/", Keyword::_DivC) + .add_keyword("~%", Keyword::_ModR) + .add_keyword("^%", Keyword::_ModC) + .add_keyword("/%", Keyword::_DivMod) + .add_keyword("+=", Keyword::_PlusLet) + .add_keyword("-=", Keyword::_MinusLet) + .add_keyword("*=", Keyword::_TimesLet) + .add_keyword("/=", Keyword::_DivLet) + .add_keyword("~/=", Keyword::_DivRLet) + .add_keyword("^/=", Keyword::_DivCLet) + .add_keyword("%=", Keyword::_ModLet) + .add_keyword("~%=", Keyword::_ModRLet) + .add_keyword("^%=", Keyword::_ModCLet) + .add_keyword("<<=", Keyword::_LshiftLet) + .add_keyword(">>=", Keyword::_RshiftLet) + .add_keyword("~>>=", Keyword::_RshiftRLet) + .add_keyword("^>>=", Keyword::_RshiftCLet) + .add_keyword("&=", Keyword::_AndLet) + .add_keyword("|=", Keyword::_OrLet) + .add_keyword("^=", Keyword::_XorLet); + + symbols.add_keyword("return", Keyword::_Return) + .add_keyword("var", Keyword::_Var) + .add_keyword("repeat", Keyword::_Repeat) + .add_keyword("do", Keyword::_Do) + .add_keyword("while", Keyword::_While) + .add_keyword("until", Keyword::_Until) + .add_keyword("try", Keyword::_Try) + .add_keyword("catch", Keyword::_Catch) + .add_keyword("if", Keyword::_If) + .add_keyword("ifnot", Keyword::_Ifnot) + .add_keyword("then", Keyword::_Then) + .add_keyword("else", Keyword::_Else) + .add_keyword("elseif", Keyword::_Elseif) + .add_keyword("elseifnot", Keyword::_Elseifnot); + + symbols.add_keyword("int", Keyword::_Int) + .add_keyword("cell", Keyword::_Cell) + .add_keyword("slice", Keyword::_Slice) + .add_keyword("builder", Keyword::_Builder) + .add_keyword("cont", Keyword::_Cont) + .add_keyword("tuple", Keyword::_Tuple) + .add_keyword("type", Keyword::_Type) + .add_keyword("->", Keyword::_Mapsto) + .add_keyword("forall", Keyword::_Forall); + + symbols.add_keyword("extern", Keyword::_Extern) + .add_keyword("global", Keyword::_Global) + .add_keyword("asm", Keyword::_Asm) + .add_keyword("impure", Keyword::_Impure) + .add_keyword("inline", Keyword::_Inline) + .add_keyword("inline_ref", Keyword::_InlineRef) + .add_keyword("auto_apply", Keyword::_AutoApply) + .add_keyword("method_id", Keyword::_MethodId) + .add_keyword("operator", Keyword::_Operator) + .add_keyword("infix", Keyword::_Infix) + .add_keyword("infixl", Keyword::_Infixl) + .add_keyword("infixr", Keyword::_Infixr) + .add_keyword("const", Keyword::_Const); + + symbols.add_keyword("#pragma", Keyword::_PragmaHashtag) + .add_keyword("#include", Keyword::_IncludeHashtag); +} + +} // namespace tolk diff --git a/tolk/lexer.cpp b/tolk/lexer.cpp new file mode 100644 index 000000000..f0838f5a6 --- /dev/null +++ b/tolk/lexer.cpp @@ -0,0 +1,335 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "lexer.h" +#include "symtable.h" +#include +#include + +namespace tolk { + +/* + * + * LEXER + * + */ + +std::string Lexem::lexem_name_str(int idx) { + if (idx == Eof) { + return "end of file"; + } else if (idx == Ident) { + return "identifier"; + } else if (idx == Number) { + return "number"; + } else if (idx == String) { + return "string"; + } else if (idx == Special) { + return "special"; + } else if (symbols.get_keyword(idx)) { + return "`" + symbols.get_keyword(idx)->str + "`"; + } else { + std::ostringstream os{""; + return os.str(); + } +} + +std::string Lexem::name_str() const { + if (tp == Ident) { + return std::string{"identifier `"} + symbols.get_name(val) + "`"; + } else if (tp == String) { + return std::string{"string \""} + str + '"'; + } else { + return lexem_name_str(tp); + } +} + +bool is_number(std::string str) { + auto st = str.begin(), en = str.end(); + if (st == en) { + return false; + } + if (*st == '-') { + st++; + } + bool hex = false; + if (st + 1 < en && *st == '0' && st[1] == 'x') { + st += 2; + hex = true; + } + if (st == en) { + return false; + } + while (st < en) { + int c = *st; + if (c >= '0' && c <= '9') { + ++st; + continue; + } + if (!hex) { + return false; + } + c |= 0x20; + if (c < 'a' || c > 'f') { + return false; + } + ++st; + } + return true; +} + +int Lexem::classify() { + if (tp != Unknown) { + return tp; + } + sym_idx_t i = symbols.lookup(str); + if (i) { + assert(str == symbols[i]->str); + str = symbols[i]->str; + sym_idx_t idx = symbols[i]->idx; + tp = (idx < 0 ? -idx : Ident); + val = i; + } else if (is_number(str)) { + tp = Number; + } else { + tp = 0; + } + if (tp == Unknown) { + tp = Ident; + val = symbols.lookup(str, 1); + } + return tp; +} + +int Lexem::set(std::string _str, const SrcLocation& _loc, int _tp, int _val) { + str = _str; + loc = _loc; + tp = _tp; + val = _val; + return classify(); +} + +Lexer::Lexer(SourceReader& _src, bool init, std::string active_chars, std::string eol_cmts, std::string open_cmts, + std::string close_cmts, std::string quote_chars, std::string multiline_quote) + : src(_src), eof(false), lexem("", src.here(), Lexem::Undefined), peek_lexem("", {}, Lexem::Undefined), + multiline_quote(std::move(multiline_quote)) { + std::memset(char_class, 0, sizeof(char_class)); + unsigned char activity = cc::active; + for (char c : active_chars) { + if (c == ' ') { + if (!--activity) { + activity = cc::allow_repeat; + } + } else if ((unsigned)c < 0x80) { + char_class[(unsigned)c] |= activity; + } + } + set_spec(eol_cmt, eol_cmts); + set_spec(cmt_op, open_cmts); + set_spec(cmt_cl, close_cmts); + for (int c : quote_chars) { + if (c > ' ' && c <= 0x7f) { + char_class[(unsigned)c] |= cc::quote_char; + } + } + if (init) { + next(); + } +} + +void Lexer::set_spec(std::array& arr, std::string setup) { + arr[0] = arr[1] = arr[2] = -0x100; + std::size_t n = setup.size(), i; + for (i = 0; i < n; i++) { + if (setup[i] == ' ') { + continue; + } + if (i == n - 1 || setup[i + 1] == ' ') { + arr[0] = setup[i]; + } else if (i == n - 2 || (i < n - 2 && setup[i + 2] == ' ')) { + arr[1] = setup[i]; + arr[2] = setup[++i]; + } else { + while (i < n && setup[i] != ' ') { + i++; + } + } + } +} + +bool Lexer::is_multiline_quote(const char* begin, const char* end) { + if (multiline_quote.empty()) { + return false; + } + for (const char& c : multiline_quote) { + if (begin == end || *begin != c) { + return false; + } + ++begin; + } + return true; +} + +void Lexer::expect(int exp_tp, const char* msg) { + if (tp() != exp_tp) { + throw ParseError{lexem.loc, (msg ? std::string{msg} : Lexem::lexem_name_str(exp_tp)) + " expected instead of " + + cur().name_str()}; + } + next(); +} + +const Lexem& Lexer::next() { + if (peek_lexem.valid()) { + lexem = std::move(peek_lexem); + peek_lexem.clear({}, Lexem::Undefined); + eof = (lexem.tp == Lexem::Eof); + return lexem; + } + if (eof) { + return lexem.clear(src.here(), Lexem::Eof); + } + long long comm = 1; + while (!src.seek_eof()) { + int cc = src.cur_char(), nc = src.next_char(); + if (cc == eol_cmt[0] || (cc == eol_cmt[1] && nc == eol_cmt[2])) { + src.load_line(); + } else if (cc == cmt_op[1] && nc == cmt_op[2]) { + src.advance(2); + comm = comm * 2 + 1; + } else if (cc == cmt_op[0]) { + src.advance(1); + comm *= 2; + } else if (comm == 1) { + break; + } else if (cc == cmt_cl[1] && nc == cmt_cl[2]) { + if (!(comm & 1)) { + src.error(std::string{"a `"} + (char)cmt_op[0] + "` comment closed by `" + (char)cmt_cl[1] + (char)cmt_cl[2] + + "`"); + } + comm >>= 1; + src.advance(2); + } else if (cc == cmt_cl[0]) { + if (!(comm & 1)) { + src.error(std::string{"a `"} + (char)cmt_op[1] + (char)cmt_op[2] + "` comment closed by `" + (char)cmt_cl[0] + + "`"); + } + comm >>= 1; + src.advance(1); + } else { + src.advance(1); + } + if (comm < 0) { + src.error("too many nested comments"); + } + } + if (src.seek_eof()) { + eof = true; + if (comm > 1) { + if (comm & 1) { + src.error(std::string{"`"} + (char)cmt_op[1] + (char)cmt_op[2] + "` comment extends past end of file"); + } else { + src.error(std::string{"`"} + (char)cmt_op[0] + "` comment extends past end of file"); + } + } + return lexem.clear(src.here(), Lexem::Eof); + } + if (is_multiline_quote(src.get_ptr(), src.get_end_ptr())) { + src.advance(multiline_quote.size()); + const char* end = nullptr; + SrcLocation here = src.here(); + std::string body; + while (!src.is_eof()) { + if (src.is_eoln()) { + body.push_back('\n'); + src.load_line(); + continue; + } + if (is_multiline_quote(src.get_ptr(), src.get_end_ptr())) { + end = src.get_ptr(); + src.advance(multiline_quote.size()); + break; + } + body.push_back(src.cur_char()); + src.advance(1); + } + if (!end) { + src.error("string extends past end of file"); + } + lexem.set(body, here, Lexem::String); + int c = src.cur_char(); + if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) { + lexem.val = c; + src.advance(1); + } + return lexem; + } + int c = src.cur_char(); + const char* end = src.get_ptr(); + if (is_quote_char(c) || c == '`') { + int qc = c; + ++end; + while (end < src.get_end_ptr() && *end != qc) { + ++end; + } + if (*end != qc) { + src.error(qc == '`' ? "a `back-quoted` token extends past end of line" : "string extends past end of line"); + } + lexem.set(std::string{src.get_ptr() + 1, end}, src.here(), qc == '`' ? Lexem::Unknown : Lexem::String); + src.set_ptr(end + 1); + c = src.cur_char(); + if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) { + lexem.val = c; + src.set_ptr(end + 2); + } + // std::cerr << lexem.name_str() << ' ' << lexem.str << std::endl; + return lexem; + } + int len = 0, pc = -0x100; + while (end < src.get_end_ptr()) { + c = *end; + bool repeated = (c == pc && is_repeatable(c)); + if (c == ' ' || c == 9 || (len && is_left_active(c) && !repeated)) { + break; + } + ++len; + ++end; + if (is_right_active(c) && !repeated) { + break; + } + pc = c; + } + lexem.set(std::string{src.get_ptr(), end}, src.here()); + src.set_ptr(end); + // std::cerr << lexem.name_str() << ' ' << lexem.str << std::endl; + return lexem; +} + +const Lexem& Lexer::peek() { + if (peek_lexem.valid()) { + return peek_lexem; + } + if (eof) { + return lexem.clear(src.here(), Lexem::Eof); + } + Lexem keep = std::move(lexem); + next(); + peek_lexem = std::move(lexem); + lexem = std::move(keep); + eof = false; + return peek_lexem; +} + +} // namespace tolk diff --git a/tolk/lexer.h b/tolk/lexer.h new file mode 100644 index 000000000..79d869068 --- /dev/null +++ b/tolk/lexer.h @@ -0,0 +1,113 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#pragma once +#include "srcread.h" +#include +#include +#include + +namespace tolk { + +/* + * + * LEXER + * + */ + +struct Lexem { + enum { Undefined = -2, Eof = -1, Unknown = 0, Ident = 0, Number = 1, Special = 2, String = 3 }; + int tp; + int val; + std::string str; + SrcLocation loc; + int classify(); + Lexem(std::string _str = "", const SrcLocation& _loc = {}, int _tp = Unknown, int _val = 0) + : tp(_tp), val(_val), str(_str), loc(_loc) { + classify(); + } + int set(std::string _str = "", const SrcLocation& _loc = {}, int _tp = Unknown, int _val = 0); + Lexem& clear(const SrcLocation& _loc = {}, int _tp = Unknown, int _val = 0) { + tp = _tp; + val = _val; + loc = _loc; + str = ""; + return *this; + } + bool valid() const { + return tp != Undefined; + } + std::string name_str() const; + void error(std::string _str) const { + throw ParseError{loc, _str}; + } + void error_at(std::string str1, std::string str2) const { + error(str1 + str + str2); + } + + static std::string lexem_name_str(int idx); +}; + +class Lexer { + SourceReader& src; + bool eof; + Lexem lexem, peek_lexem; + unsigned char char_class[128]; + std::array eol_cmt, cmt_op, cmt_cl; + std::string multiline_quote; + enum cc { left_active = 2, right_active = 1, active = 3, allow_repeat = 4, quote_char = 8 }; + + public: + bool eof_found() const { + return eof; + } + Lexer(SourceReader& _src, bool init = false, std::string active_chars = ";,() ~.", std::string eol_cmts = ";;", + std::string open_cmts = "{-", std::string close_cmts = "-}", std::string quote_chars = "\"", + std::string multiline_quote = "\"\"\""); + const Lexem& next(); + const Lexem& cur() const { + return lexem; + } + const Lexem& peek(); + int tp() const { + return lexem.tp; + } + void expect(int exp_tp, const char* msg = 0); + int classify_char(unsigned c) const { + return c < 0x80 ? char_class[c] : 0; + } + bool is_active(int c) const { + return (classify_char(c) & cc::active) == cc::active; + } + bool is_left_active(int c) const { + return (classify_char(c) & cc::left_active); + } + bool is_right_active(int c) const { + return (classify_char(c) & cc::right_active); + } + bool is_repeatable(int c) const { + return (classify_char(c) & cc::allow_repeat); + } + bool is_quote_char(int c) const { + return (classify_char(c) & cc::quote_char); + } + + private: + void set_spec(std::array& arr, std::string setup); + bool is_multiline_quote(const char* begin, const char* end); +}; + +} // namespace tolk diff --git a/tolk/optimize.cpp b/tolk/optimize.cpp new file mode 100644 index 000000000..64087032d --- /dev/null +++ b/tolk/optimize.cpp @@ -0,0 +1,652 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "tolk.h" + +namespace tolk { + +/* + * + * PEEPHOLE OPTIMIZER + * + */ + +void Optimizer::set_code(AsmOpConsList code) { + code_ = std::move(code); + unpack(); +} + +void Optimizer::unpack() { + int i = 0, j = 0; + for (AsmOpCons *p = code_.get(); p && i < n; p = p->cdr.get(), ++j) { + if (p->car->is_very_custom()) { + break; + } + if (p->car->is_comment()) { + continue; + } + op_cons_[i] = p; + op_[i] = std::move(p->car); + offs_[i] = j; + ++i; + } + l_ = i; + indent_ = (i ? op_[0]->indent : 0); +} + +void Optimizer::pack() { + for (int i = 0; i < l_; i++) { + op_cons_[i]->car = std::move(op_[i]); + op_cons_[i] = nullptr; + } + l_ = 0; +} + +void Optimizer::apply() { + if (!p_ && !q_) { + return; + } + tolk_assert(p_ > 0 && p_ <= l_ && q_ >= 0 && q_ <= n && l_ <= n); + for (int i = p_; i < l_; i++) { + tolk_assert(op_[i]); + op_cons_[i]->car = std::move(op_[i]); + op_cons_[i] = nullptr; + } + for (int c = offs_[p_ - 1]; c >= 0; --c) { + code_ = std::move(code_->cdr); + } + for (int j = q_ - 1; j >= 0; j--) { + tolk_assert(oq_[j]); + oq_[j]->indent = indent_; + code_ = AsmOpCons::cons(std::move(oq_[j]), std::move(code_)); + } + l_ = 0; +} + +AsmOpConsList Optimizer::extract_code() { + pack(); + return std::move(code_); +} + +void Optimizer::show_head() const { + if (!debug_) { + return; + } + std::cerr << "optimizing"; + for (int i = 0; i < l_; i++) { + if (op_[i]) { + std::cerr << ' ' << *op_[i] << ' '; + } else { + std::cerr << " (null) "; + } + } + std::cerr << std::endl; +} + +void Optimizer::show_left() const { + if (!debug_) { + return; + } + std::cerr << "// *** rewriting"; + for (int i = 0; i < p_; i++) { + if (op_[i]) { + std::cerr << ' ' << *op_[i] << ' '; + } else { + std::cerr << " (null) "; + } + } +} + +void Optimizer::show_right() const { + if (!debug_) { + return; + } + std::cerr << "->"; + for (int i = 0; i < q_; i++) { + if (oq_[i]) { + std::cerr << ' ' << *oq_[i] << ' '; + } else { + std::cerr << " (null) "; + } + } + std::cerr << std::endl; +} + +bool Optimizer::say(std::string str) const { + if (debug_) { + std::cerr << str << std::endl; + } + return true; +} + +bool Optimizer::find_const_op(int* op_idx, int cst) { + for (int i = 0; i < l2_; i++) { + if (op_[i]->is_gconst() && tr_[i].get(0) == cst) { + *op_idx = i; + return true; + } + } + return false; +} + +bool Optimizer::is_push_const(int* i, int* c) const { + return pb_ >= 3 && pb_ <= l2_ && tr_[pb_ - 1].is_push_const(i, c); +} + +// PUSHCONST c ; PUSH s(i+1) ; SWAP -> PUSH s(i) ; PUSHCONST c +bool Optimizer::rewrite_push_const(int i, int c) { + p_ = pb_; + q_ = 2; + int idx = -1; + if (!(p_ >= 2 && find_const_op(&idx, c) && idx < p_)) { + return false; + } + show_left(); + oq_[1] = std::move(op_[idx]); + oq_[0] = std::move(op_[!idx]); + *oq_[0] = AsmOp::Push(i); + show_right(); + return true; +} + +bool Optimizer::is_const_rot(int* c) const { + return pb_ >= 3 && pb_ <= l2_ && tr_[pb_ - 1].is_const_rot(c); +} + +bool Optimizer::rewrite_const_rot(int c) { + p_ = pb_; + q_ = 2; + int idx = -1; + if (!(p_ >= 2 && find_const_op(&idx, c) && idx < p_)) { + return false; + } + show_left(); + oq_[0] = std::move(op_[idx]); + oq_[1] = std::move(op_[!idx]); + *oq_[1] = AsmOp::Custom("ROT", 3, 3); + show_right(); + return true; +} + +bool Optimizer::is_const_pop(int* c, int* i) const { + return pb_ >= 3 && pb_ <= l2_ && tr_[pb_ - 1].is_const_pop(c, i); +} + +bool Optimizer::rewrite_const_pop(int c, int i) { + p_ = pb_; + q_ = 2; + int idx = -1; + if (!(p_ >= 2 && find_const_op(&idx, c) && idx < p_)) { + return false; + } + show_left(); + oq_[0] = std::move(op_[idx]); + oq_[1] = std::move(op_[!idx]); + *oq_[1] = AsmOp::Pop(i); + show_right(); + return true; +} + +bool Optimizer::is_const_push_xchgs() { + if (!(pb_ >= 2 && pb_ <= l2_ && op_[0]->is_gconst())) { + return false; + } + StackTransform t; + int pos = 0, i; + for (i = 1; i < pb_; i++) { + int a, b; + if (op_[i]->is_xchg(&a, &b)) { + if (pos == a) { + pos = b; + } else if (pos == b) { + pos = a; + } else { + t.apply_xchg(a - (a > pos), b - (b > pos)); + } + } else if (op_[i]->is_push(&a)) { + if (pos == a) { + return false; + } + t.apply_push(a - (a > pos)); + ++pos; + } else { + return false; + } + } + if (pos) { + return false; + } + t.apply_push_newconst(); + if (t <= tr_[i - 1]) { + p_ = i; + return true; + } else { + return false; + } +} + +bool Optimizer::rewrite_const_push_xchgs() { + if (!p_) { + return false; + } + show_left(); + auto c_op = std::move(op_[0]); + tolk_assert(c_op->is_gconst()); + StackTransform t; + q_ = 0; + int pos = 0; + for (int i = 1; i < p_; i++) { + int a, b; + if (op_[i]->is_xchg(&a, &b)) { + if (a == pos) { + pos = b; + } else if (b == pos) { + pos = a; + } else { + oq_[q_] = std::move(op_[i]); + if (a > pos) { + oq_[q_]->a = a - 1; + } + if (b > pos) { + oq_[q_]->b = b - 1; + } + tolk_assert(apply_op(t, *oq_[q_])); + ++q_; + } + } else { + tolk_assert(op_[i]->is_push(&a)); + tolk_assert(a != pos); + oq_[q_] = std::move(op_[i]); + if (a > pos) { + oq_[q_]->a = a - 1; + } + tolk_assert(apply_op(t, *oq_[q_])); + ++q_; + ++pos; + } + } + tolk_assert(!pos); + t.apply_push_newconst(); + tolk_assert(t <= tr_[p_ - 1]); + oq_[q_++] = std::move(c_op); + show_right(); + return true; +} + +bool Optimizer::rewrite(int p, AsmOp&& new_op) { + tolk_assert(p > 0 && p <= l_); + p_ = p; + q_ = 1; + show_left(); + oq_[0] = std::move(op_[0]); + *oq_[0] = new_op; + show_right(); + return true; +} + +bool Optimizer::rewrite(int p, AsmOp&& new_op1, AsmOp&& new_op2) { + tolk_assert(p > 1 && p <= l_); + p_ = p; + q_ = 2; + show_left(); + oq_[0] = std::move(op_[0]); + *oq_[0] = new_op1; + oq_[1] = std::move(op_[1]); + *oq_[1] = new_op2; + show_right(); + return true; +} + +bool Optimizer::rewrite(int p, AsmOp&& new_op1, AsmOp&& new_op2, AsmOp&& new_op3) { + tolk_assert(p > 2 && p <= l_); + p_ = p; + q_ = 3; + show_left(); + oq_[0] = std::move(op_[0]); + *oq_[0] = new_op1; + oq_[1] = std::move(op_[1]); + *oq_[1] = new_op2; + oq_[2] = std::move(op_[2]); + *oq_[2] = new_op3; + show_right(); + return true; +} + +bool Optimizer::rewrite_nop() { + tolk_assert(p_ > 0 && p_ <= l_); + q_ = 0; + show_left(); + show_right(); + return true; +} + +bool Optimizer::is_pred(const std::function& pred, int min_p) { + min_p = std::max(min_p, pb_); + for (int p = l2_; p >= min_p; p--) { + if (pred(tr_[p - 1])) { + p_ = p; + return true; + } + } + return false; +} + +bool Optimizer::is_same_as(const StackTransform& trans, int min_p) { + return is_pred([&trans](const auto& t) { return t >= trans; }, min_p); +} + +// s1 s3 XCHG ; s0 s2 XCHG -> 2SWAP +bool Optimizer::is_2swap() { + static const StackTransform t_2swap{2, 3, 0, 1, 4}; + return is_same_as(t_2swap); +} + +// s3 PUSH ; s3 PUSH -> 2OVER +bool Optimizer::is_2over() { + static const StackTransform t_2over{2, 3, 0}; + return is_same_as(t_2over); +} + +bool Optimizer::is_2dup() { + static const StackTransform t_2dup{0, 1, 0}; + return is_same_as(t_2dup); +} + +bool Optimizer::is_tuck() { + static const StackTransform t_tuck{0, 1, 0, 2}; + return is_same_as(t_tuck); +} + +bool Optimizer::is_2drop() { + static const StackTransform t_2drop{2}; + return is_same_as(t_2drop); +} + +bool Optimizer::is_rot() { + return is_pred([](const auto& t) { return t.is_rot(); }); +} + +bool Optimizer::is_rotrev() { + return is_pred([](const auto& t) { return t.is_rotrev(); }); +} + +bool Optimizer::is_nop() { + return is_pred([](const auto& t) { return t.is_id(); }, 1); +} + +bool Optimizer::is_xchg(int* i, int* j) { + return is_pred([i, j](const auto& t) { return t.is_xchg(i, j) && ((*i < 16 && *j < 16) || (!*i && *j < 256)); }); +} + +bool Optimizer::is_xchg_xchg(int* i, int* j, int* k, int* l) { + return is_pred([i, j, k, l](const auto& t) { + return t.is_xchg_xchg(i, j, k, l) && (*i < 2 && *j < (*i ? 16 : 256) && *k < 2 && *l < (*k ? 16 : 256)); + }) && + (!(p_ == 2 && op_[0]->is_xchg(*i, *j) && op_[1]->is_xchg(*k, *l))); +} + +bool Optimizer::is_push(int* i) { + return is_pred([i](const auto& t) { return t.is_push(i) && *i < 256; }); +} + +bool Optimizer::is_pop(int* i) { + return is_pred([i](const auto& t) { return t.is_pop(i) && *i < 256; }); +} + +bool Optimizer::is_pop_pop(int* i, int* j) { + return is_pred([i, j](const auto& t) { return t.is_pop_pop(i, j) && *i < 256 && *j < 256; }, 3); +} + +bool Optimizer::is_push_rot(int* i) { + return is_pred([i](const auto& t) { return t.is_push_rot(i) && *i < 16; }, 3); +} + +bool Optimizer::is_push_rotrev(int* i) { + return is_pred([i](const auto& t) { return t.is_push_rotrev(i) && *i < 16; }, 3); +} + +bool Optimizer::is_push_xchg(int* i, int* j, int* k) { + return is_pred([i, j, k](const auto& t) { return t.is_push_xchg(i, j, k) && *i < 16 && *j < 16 && *k < 16; }) && + !(p_ == 2 && op_[0]->is_push() && op_[1]->is_xchg()); +} + +bool Optimizer::is_xchg2(int* i, int* j) { + return is_pred([i, j](const auto& t) { return t.is_xchg2(i, j) && *i < 16 && *j < 16; }); +} + +bool Optimizer::is_xcpu(int* i, int* j) { + return is_pred([i, j](const auto& t) { return t.is_xcpu(i, j) && *i < 16 && *j < 16; }); +} + +bool Optimizer::is_puxc(int* i, int* j) { + return is_pred([i, j](const auto& t) { return t.is_puxc(i, j) && *i < 16 && *j < 15; }); +} + +bool Optimizer::is_push2(int* i, int* j) { + return is_pred([i, j](const auto& t) { return t.is_push2(i, j) && *i < 16 && *j < 16; }); +} + +bool Optimizer::is_xchg3(int* i, int* j, int* k) { + return is_pred([i, j, k](const auto& t) { return t.is_xchg3(i, j, k) && *i < 16 && *j < 16 && *k < 16; }); +} + +bool Optimizer::is_xc2pu(int* i, int* j, int* k) { + return is_pred([i, j, k](const auto& t) { return t.is_xc2pu(i, j, k) && *i < 16 && *j < 16 && *k < 16; }); +} + +bool Optimizer::is_xcpuxc(int* i, int* j, int* k) { + return is_pred([i, j, k](const auto& t) { return t.is_xcpuxc(i, j, k) && *i < 16 && *j < 16 && *k < 15; }); +} + +bool Optimizer::is_xcpu2(int* i, int* j, int* k) { + return is_pred([i, j, k](const auto& t) { return t.is_xcpu2(i, j, k) && *i < 16 && *j < 16 && *k < 16; }); +} + +bool Optimizer::is_puxc2(int* i, int* j, int* k) { + return is_pred( + [i, j, k](const auto& t) { return t.is_puxc2(i, j, k) && *i < 16 && *j < 15 && *k < 15 && *j + *k != -1; }); +} + +bool Optimizer::is_puxcpu(int* i, int* j, int* k) { + return is_pred([i, j, k](const auto& t) { return t.is_puxcpu(i, j, k) && *i < 16 && *j < 15 && *k < 15; }); +} + +bool Optimizer::is_pu2xc(int* i, int* j, int* k) { + return is_pred([i, j, k](const auto& t) { return t.is_pu2xc(i, j, k) && *i < 16 && *j < 15 && *k < 14; }); +} + +bool Optimizer::is_push3(int* i, int* j, int* k) { + return is_pred([i, j, k](const auto& t) { return t.is_push3(i, j, k) && *i < 16 && *j < 16 && *k < 16; }); +} + +bool Optimizer::is_blkswap(int* i, int* j) { + return is_pred([i, j](const auto& t) { return t.is_blkswap(i, j) && *i > 0 && *j > 0 && *i <= 16 && *j <= 16; }); +} + +bool Optimizer::is_blkpush(int* i, int* j) { + return is_pred([i, j](const auto& t) { return t.is_blkpush(i, j) && *i > 0 && *i < 16 && *j < 16; }); +} + +bool Optimizer::is_blkdrop(int* i) { + return is_pred([i](const auto& t) { return t.is_blkdrop(i) && *i > 0 && *i < 16; }); +} + +bool Optimizer::is_blkdrop2(int* i, int* j) { + return is_pred([i, j](const auto& t) { return t.is_blkdrop2(i, j) && *i > 0 && *i < 16 && *j > 0 && *j < 16; }); +} + +bool Optimizer::is_reverse(int* i, int* j) { + return is_pred([i, j](const auto& t) { return t.is_reverse(i, j) && *i >= 2 && *i <= 17 && *j < 16; }); +} + +bool Optimizer::is_nip_seq(int* i, int* j) { + return is_pred([i, j](const auto& t) { return t.is_nip_seq(i, j) && *i >= 3 && *i <= 15; }); +} + +bool Optimizer::is_pop_blkdrop(int* i, int* k) { + return is_pred([i, k](const auto& t) { return t.is_pop_blkdrop(i, k) && *i >= *k && *k >= 2 && *k <= 15; }, 3); +} + +bool Optimizer::is_2pop_blkdrop(int* i, int* j, int* k) { + return is_pred( + [i, j, k](const auto& t) { return t.is_2pop_blkdrop(i, j, k) && *i >= *k && *j >= *k && *k >= 2 && *k <= 15; }, + 3); +} + +bool Optimizer::compute_stack_transforms() { + StackTransform trans; + for (int i = 0; i < l_; i++) { + if (!apply_op(trans, *op_[i])) { + l2_ = i; + return true; + } + tr_[i] = trans; + } + l2_ = l_; + return true; +} + +bool Optimizer::show_stack_transforms() const { + show_head(); + // slow version + /* + StackTransform trans2; + std::cerr << "id = " << trans2 << std::endl; + for (int i = 0; i < l_; i++) { + StackTransform op; + if (!apply_op(op, *op_[i])) { + std::cerr << "* (" << *op_[i] << " = invalid)\n"; + break; + } + trans2 *= op; + std::cerr << "* " << *op_[i] << " = " << op << " -> " << trans2 << std::endl; + } + */ + // fast version + StackTransform trans; + for (int i = 0; i < l_; i++) { + std::cerr << trans << std::endl << *op_[i] << " -> "; + if (!apply_op(trans, *op_[i])) { + std::cerr << " " << std::endl; + return true; + } + } + std::cerr << trans << std::endl; + return true; +} + +bool Optimizer::find_at_least(int pb) { + p_ = q_ = 0; + pb_ = pb; + // show_stack_transforms(); + int i, j, k, l, c; + return (is_push_const(&i, &c) && rewrite_push_const(i, c)) || (is_nop() && rewrite_nop()) || + (!(mode_ & 1) && is_const_rot(&c) && rewrite_const_rot(c)) || + (is_const_push_xchgs() && rewrite_const_push_xchgs()) || (is_const_pop(&c, &i) && rewrite_const_pop(c, i)) || + (is_xchg(&i, &j) && rewrite(AsmOp::Xchg(i, j))) || (is_push(&i) && rewrite(AsmOp::Push(i))) || + (is_pop(&i) && rewrite(AsmOp::Pop(i))) || (is_pop_pop(&i, &j) && rewrite(AsmOp::Pop(i), AsmOp::Pop(j))) || + (is_xchg_xchg(&i, &j, &k, &l) && rewrite(AsmOp::Xchg(i, j), AsmOp::Xchg(k, l))) || + (!(mode_ & 1) && + ((is_rot() && rewrite(AsmOp::Custom("ROT", 3, 3))) || (is_rotrev() && rewrite(AsmOp::Custom("-ROT", 3, 3))) || + (is_2dup() && rewrite(AsmOp::Custom("2DUP", 2, 4))) || + (is_2swap() && rewrite(AsmOp::Custom("2SWAP", 2, 4))) || + (is_2over() && rewrite(AsmOp::Custom("2OVER", 2, 4))) || + (is_tuck() && rewrite(AsmOp::Custom("TUCK", 2, 3))) || + (is_2drop() && rewrite(AsmOp::Custom("2DROP", 2, 0))) || (is_xchg2(&i, &j) && rewrite(AsmOp::Xchg2(i, j))) || + (is_xcpu(&i, &j) && rewrite(AsmOp::XcPu(i, j))) || (is_puxc(&i, &j) && rewrite(AsmOp::PuXc(i, j))) || + (is_push2(&i, &j) && rewrite(AsmOp::Push2(i, j))) || (is_blkswap(&i, &j) && rewrite(AsmOp::BlkSwap(i, j))) || + (is_blkpush(&i, &j) && rewrite(AsmOp::BlkPush(i, j))) || (is_blkdrop(&i) && rewrite(AsmOp::BlkDrop(i))) || + (is_push_rot(&i) && rewrite(AsmOp::Push(i), AsmOp::Custom("ROT"))) || + (is_push_rotrev(&i) && rewrite(AsmOp::Push(i), AsmOp::Custom("-ROT"))) || + (is_push_xchg(&i, &j, &k) && rewrite(AsmOp::Push(i), AsmOp::Xchg(j, k))) || + (is_reverse(&i, &j) && rewrite(AsmOp::BlkReverse(i, j))) || + (is_blkdrop2(&i, &j) && rewrite(AsmOp::BlkDrop2(i, j))) || + (is_nip_seq(&i, &j) && rewrite(AsmOp::Xchg(i, j), AsmOp::BlkDrop(i))) || + (is_pop_blkdrop(&i, &k) && rewrite(AsmOp::Pop(i), AsmOp::BlkDrop(k))) || + (is_2pop_blkdrop(&i, &j, &k) && (k >= 3 && k <= 13 && i != j + 1 && i <= 15 && j <= 14 + ? rewrite(AsmOp::Xchg2(j + 1, i), AsmOp::BlkDrop(k + 2)) + : rewrite(AsmOp::Pop(i), AsmOp::Pop(j), AsmOp::BlkDrop(k)))) || + (is_xchg3(&i, &j, &k) && rewrite(AsmOp::Xchg3(i, j, k))) || + (is_xc2pu(&i, &j, &k) && rewrite(AsmOp::Xc2Pu(i, j, k))) || + (is_xcpuxc(&i, &j, &k) && rewrite(AsmOp::XcPuXc(i, j, k))) || + (is_xcpu2(&i, &j, &k) && rewrite(AsmOp::XcPu2(i, j, k))) || + (is_puxc2(&i, &j, &k) && rewrite(AsmOp::PuXc2(i, j, k))) || + (is_puxcpu(&i, &j, &k) && rewrite(AsmOp::PuXcPu(i, j, k))) || + (is_pu2xc(&i, &j, &k) && rewrite(AsmOp::Pu2Xc(i, j, k))) || + (is_push3(&i, &j, &k) && rewrite(AsmOp::Push3(i, j, k))))); +} + +bool Optimizer::find() { + if (!compute_stack_transforms()) { + return false; + } + for (int pb = l_; pb > 0; --pb) { + if (find_at_least(pb)) { + return true; + } + } + return false; +} + +bool Optimizer::optimize() { + bool f = false; + while (find()) { + f = true; + apply(); + unpack(); + } + return f; +} + +AsmOpConsList optimize_code_head(AsmOpConsList op_list, int mode) { + Optimizer opt(std::move(op_list), op_rewrite_comments, mode); + opt.optimize(); + return opt.extract_code(); +} + +AsmOpConsList optimize_code(AsmOpConsList op_list, int mode) { + std::vector> v; + while (op_list) { + if (!op_list->car->is_comment()) { + op_list = optimize_code_head(std::move(op_list), mode); + } + if (op_list) { + v.push_back(std::move(op_list->car)); + op_list = std::move(op_list->cdr); + } + } + for (auto it = v.rbegin(); it < v.rend(); ++it) { + op_list = AsmOpCons::cons(std::move(*it), std::move(op_list)); + } + return std::move(op_list); +} + +void optimize_code(AsmOpList& ops) { + AsmOpConsList op_list; + for (auto it = ops.list_.rbegin(); it < ops.list_.rend(); ++it) { + op_list = AsmOpCons::cons(std::make_unique(std::move(*it)), std::move(op_list)); + } + for (int mode : {1, 1, 1, 1, 0, 0, 0, 0}) { + op_list = optimize_code(std::move(op_list), mode); + } + ops.list_.clear(); + while (op_list) { + ops.list_.push_back(std::move(*(op_list->car))); + op_list = std::move(op_list->cdr); + } +} + +} // namespace tolk diff --git a/tolk/parse-tolk.cpp b/tolk/parse-tolk.cpp new file mode 100644 index 000000000..7fffb15ab --- /dev/null +++ b/tolk/parse-tolk.cpp @@ -0,0 +1,1809 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "tolk.h" +#include "td/utils/crypto.h" +#include "common/refint.h" +#include "openssl/digest.hpp" +#include "block/block.h" +#include "block-parse.h" + +namespace tolk { +using namespace std::literals::string_literals; + +int compute_symbol_subclass(std::string str) { + if (str.size() < 2) { + return IdSc::undef; + } else if (str[0] == '.') { + return IdSc::dotid; + } else if (str[0] == '~') { + return IdSc::tildeid; + } else { + return IdSc::undef; + } +} + +inline bool is_dot_ident(sym_idx_t idx) { + return symbols.get_subclass(idx) == IdSc::dotid; +} + +inline bool is_tilde_ident(sym_idx_t idx) { + return symbols.get_subclass(idx) == IdSc::tildeid; +} + +inline bool is_special_ident(sym_idx_t idx) { + return symbols.get_subclass(idx) != IdSc::undef; +} + +/* + * + * PARSE SOURCE + * + */ + +// TE ::= TA | TA -> TE +// TA ::= int | ... | cont | var | _ | () | ( TE { , TE } ) | [ TE { , TE } ] +TypeExpr* parse_type(Lexer& lex); + +TypeExpr* parse_type1(Lexer& lex) { + switch (lex.tp()) { + case _Int: + lex.next(); + return TypeExpr::new_atomic(_Int); + case _Cell: + lex.next(); + return TypeExpr::new_atomic(_Cell); + case _Slice: + lex.next(); + return TypeExpr::new_atomic(_Slice); + case _Builder: + lex.next(); + return TypeExpr::new_atomic(_Builder); + case _Cont: + lex.next(); + return TypeExpr::new_atomic(_Cont); + case _Tuple: + lex.next(); + return TypeExpr::new_atomic(_Tuple); + case _Var: + case '_': + lex.next(); + return TypeExpr::new_hole(); + case _Ident: { + auto sym = lookup_symbol(lex.cur().val); + if (sym && dynamic_cast(sym->value)) { + auto val = dynamic_cast(sym->value); + lex.next(); + return val->get_type(); + } + lex.cur().error_at("`", "` is not a type identifier"); + } + } + int c; + if (lex.tp() == '[') { + lex.next(); + c = ']'; + } else { + lex.expect('('); + c = ')'; + } + if (lex.tp() == c) { + lex.next(); + return c == ')' ? TypeExpr::new_unit() : TypeExpr::new_tuple({}); + } + auto t1 = parse_type(lex); + if (lex.tp() == ')') { + lex.expect(c); + return t1; + } + std::vector tlist{1, t1}; + while (lex.tp() == ',') { + lex.next(); + tlist.push_back(parse_type(lex)); + } + lex.expect(c); + return c == ')' ? TypeExpr::new_tensor(std::move(tlist)) : TypeExpr::new_tuple(std::move(tlist)); +} + +TypeExpr* parse_type(Lexer& lex) { + auto res = parse_type1(lex); + if (lex.tp() == _Mapsto) { + lex.next(); + auto to = parse_type(lex); + return TypeExpr::new_map(res, to); + } else { + return res; + } +} + +FormalArg parse_formal_arg(Lexer& lex, int fa_idx) { + TypeExpr* arg_type = 0; + SrcLocation loc = lex.cur().loc; + if (lex.tp() == '_') { + lex.next(); + if (lex.tp() == ',' || lex.tp() == ')') { + return std::make_tuple(TypeExpr::new_hole(), (SymDef*)nullptr, loc); + } + arg_type = TypeExpr::new_hole(); + loc = lex.cur().loc; + } else if (lex.tp() != _Ident) { + arg_type = parse_type(lex); + } else { + auto sym = lookup_symbol(lex.cur().val); + if (sym && dynamic_cast(sym->value)) { + auto val = dynamic_cast(sym->value); + lex.next(); + arg_type = val->get_type(); + } else { + arg_type = TypeExpr::new_hole(); + } + } + if (lex.tp() == '_' || lex.tp() == ',' || lex.tp() == ')') { + if (lex.tp() == '_') { + loc = lex.cur().loc; + lex.next(); + } + return std::make_tuple(arg_type, (SymDef*)nullptr, loc); + } + if (lex.tp() != _Ident) { + lex.expect(_Ident, "formal parameter name"); + } + loc = lex.cur().loc; + if (prohibited_var_names.count(symbols.get_name(lex.cur().val))) { + throw ParseError{ + loc, PSTRING() << "symbol `" << symbols.get_name(lex.cur().val) << "` cannot be redefined as a variable"}; + } + SymDef* new_sym_def = define_symbol(lex.cur().val, true, loc); + if (!new_sym_def) { + lex.cur().error_at("cannot define symbol `", "`"); + } + if (new_sym_def->value) { + lex.cur().error_at("redefined formal parameter `", "`"); + } + new_sym_def->value = new SymVal{SymVal::_Param, fa_idx, arg_type}; + lex.next(); + return std::make_tuple(arg_type, new_sym_def, loc); +} + +void parse_global_var_decl(Lexer& lex) { + TypeExpr* var_type = 0; + SrcLocation loc = lex.cur().loc; + if (lex.tp() == '_') { + lex.next(); + var_type = TypeExpr::new_hole(); + loc = lex.cur().loc; + } else if (lex.tp() != _Ident) { + var_type = parse_type(lex); + } else { + auto sym = lookup_symbol(lex.cur().val); + if (sym && dynamic_cast(sym->value)) { + auto val = dynamic_cast(sym->value); + lex.next(); + var_type = val->get_type(); + } else { + var_type = TypeExpr::new_hole(); + } + } + if (lex.tp() != _Ident) { + lex.expect(_Ident, "global variable name"); + } + loc = lex.cur().loc; + SymDef* sym_def = define_global_symbol(lex.cur().val, false, loc); + if (!sym_def) { + lex.cur().error_at("cannot define global symbol `", "`"); + } + if (sym_def->value) { + auto val = dynamic_cast(sym_def->value); + if (!val) { + lex.cur().error_at("symbol `", "` cannot be redefined as a global variable"); + } + try { + unify(var_type, val->sym_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "cannot unify new type " << var_type << " of global variable `" << sym_def->name() + << "` with its previous type " << val->sym_type << ": " << ue; + lex.cur().error(os.str()); + } + } else { + sym_def->value = new SymValGlobVar{glob_var_cnt++, var_type}; + glob_vars.push_back(sym_def); + } + lex.next(); +} + +extern int const_cnt; +Expr* parse_expr(Lexer& lex, CodeBlob& code, bool nv = false); + +void parse_const_decl(Lexer& lex) { + SrcLocation loc = lex.cur().loc; + int wanted_type = Expr::_None; + if (lex.tp() == _Int) { + wanted_type = Expr::_Const; + lex.next(); + } else if (lex.tp() == _Slice) { + wanted_type = Expr::_SliceConst; + lex.next(); + } + if (lex.tp() != _Ident) { + lex.expect(_Ident, "constant name"); + } + loc = lex.cur().loc; + SymDef* sym_def = define_global_symbol(lex.cur().val, false, loc); + if (!sym_def) { + lex.cur().error_at("cannot define global symbol `", "`"); + } + Lexem ident = lex.cur(); + lex.next(); + if (lex.tp() != '=') { + lex.cur().error_at("expected = instead of ", ""); + } + lex.next(); + CodeBlob code; + if (pragma_allow_post_modification.enabled()) { + code.flags |= CodeBlob::_AllowPostModification; + } + if (pragma_compute_asm_ltr.enabled()) { + code.flags |= CodeBlob::_ComputeAsmLtr; + } + // Handles processing and resolution of literals and consts + auto x = parse_expr(lex, code, false); // also does lex.next() ! + if (x->flags != Expr::_IsRvalue) { + lex.cur().error("expression is not strictly Rvalue"); + } + if ((wanted_type == Expr::_Const) && (x->cls == Expr::_Apply)) + wanted_type = Expr::_None; // Apply is additionally checked to result in an integer + if ((wanted_type != Expr::_None) && (x->cls != wanted_type)) { + lex.cur().error("expression type does not match wanted type"); + } + SymValConst* new_value = nullptr; + if (x->cls == Expr::_Const) { // Integer constant + new_value = new SymValConst{const_cnt++, x->intval}; + } else if (x->cls == Expr::_SliceConst) { // Slice constant (string) + new_value = new SymValConst{const_cnt++, x->strval}; + } else if (x->cls == Expr::_Apply) { + code.emplace_back(loc, Op::_Import, std::vector()); + auto tmp_vars = x->pre_compile(code); + code.emplace_back(loc, Op::_Return, std::move(tmp_vars)); + code.emplace_back(loc, Op::_Nop); // This is neccessary to prevent SIGSEGV! + // It is REQUIRED to execute "optimizations" as in tolk.cpp + code.simplify_var_types(); + code.prune_unreachable_code(); + code.split_vars(true); + for (int i = 0; i < 16; i++) { + code.compute_used_code_vars(); + code.fwd_analyze(); + code.prune_unreachable_code(); + } + code.mark_noreturn(); + AsmOpList out_list(0, &code.vars); + code.generate_code(out_list); + if (out_list.list_.size() != 1) { + lex.cur().error("precompiled expression must result in single operation"); + } + auto op = out_list.list_[0]; + if (!op.is_const()) { + lex.cur().error("precompiled expression must result in compilation time constant"); + } + if (op.origin.is_null() || !op.origin->is_valid()) { + lex.cur().error("precompiled expression did not result in a valid integer constant"); + } + new_value = new SymValConst{const_cnt++, op.origin}; + } else { + lex.cur().error("integer or slice literal or constant expected"); + } + if (sym_def->value) { + SymValConst* old_value = dynamic_cast(sym_def->value); + Keyword new_type = new_value->get_type(); + if (!old_value || old_value->get_type() != new_type || + (new_type == _Int && *old_value->get_int_value() != *new_value->get_int_value()) || + (new_type == _Slice && old_value->get_str_value() != new_value->get_str_value())) { + ident.error_at("global symbol `", "` already exists"); + } + } + sym_def->value = new_value; +} + +FormalArgList parse_formal_args(Lexer& lex) { + FormalArgList args; + lex.expect('(', "formal argument list"); + if (lex.tp() == ')') { + lex.next(); + return args; + } + int fa_idx = 0; + args.push_back(parse_formal_arg(lex, fa_idx++)); + while (lex.tp() == ',') { + lex.next(); + args.push_back(parse_formal_arg(lex, fa_idx++)); + } + lex.expect(')'); + return args; +} + +void parse_const_decls(Lexer& lex) { + lex.expect(_Const); + while (true) { + parse_const_decl(lex); + if (lex.tp() != ',') { + break; + } + lex.expect(','); + } + lex.expect(';'); +} + +TypeExpr* extract_total_arg_type(const FormalArgList& arg_list) { + if (arg_list.empty()) { + return TypeExpr::new_unit(); + } + if (arg_list.size() == 1) { + return std::get<0>(arg_list[0]); + } + std::vector type_list; + for (auto& x : arg_list) { + type_list.push_back(std::get<0>(x)); + } + return TypeExpr::new_tensor(std::move(type_list)); +} + +void parse_global_var_decls(Lexer& lex) { + lex.expect(_Global); + while (true) { + parse_global_var_decl(lex); + if (lex.tp() != ',') { + break; + } + lex.expect(','); + } + lex.expect(';'); +} + +SymValCodeFunc* make_new_glob_func(SymDef* func_sym, TypeExpr* func_type, bool impure = false) { + SymValCodeFunc* res = new SymValCodeFunc{glob_func_cnt, func_type, impure}; + func_sym->value = res; + glob_func.push_back(func_sym); + glob_func_cnt++; + return res; +} + +bool check_global_func(const Lexem& cur, sym_idx_t func_name = 0) { + if (!func_name) { + func_name = cur.val; + } + SymDef* def = lookup_symbol(func_name); + if (!def) { + cur.loc.show_error(std::string{"undefined function `"} + symbols.get_name(func_name) + + "`, defining a global function of unknown type"); + def = define_global_symbol(func_name, 0, cur.loc); + tolk_assert(def && "cannot define global function"); + ++undef_func_cnt; + make_new_glob_func(def, TypeExpr::new_func()); // was: ... ::new_func() + return true; + } + SymVal* val = dynamic_cast(def->value); + if (!val) { + cur.error(std::string{"symbol `"} + symbols.get_name(func_name) + "` has no value and no type"); + return false; + } else if (!val->get_type()) { + cur.error(std::string{"symbol `"} + symbols.get_name(func_name) + "` has no type, possibly not a function"); + return false; + } else { + return true; + } +} + +Expr* make_func_apply(Expr* fun, Expr* x) { + Expr* res; + if (fun->cls == Expr::_Glob) { + if (x->cls == Expr::_Tensor) { + res = new Expr{Expr::_Apply, fun->sym, x->args}; + } else { + res = new Expr{Expr::_Apply, fun->sym, {x}}; + } + res->flags = Expr::_IsRvalue | (fun->flags & Expr::_IsImpure); + } else { + res = new Expr{Expr::_VarApply, {fun, x}}; + res->flags = Expr::_IsRvalue; + } + return res; +} + +// parse ( E { , E } ) | () | [ E { , E } ] | [] | id | num | _ +Expr* parse_expr100(Lexer& lex, CodeBlob& code, bool nv) { + if (lex.tp() == '(' || lex.tp() == '[') { + bool tf = (lex.tp() == '['); + int clbr = (tf ? ']' : ')'); + SrcLocation loc{lex.cur().loc}; + lex.next(); + if (lex.tp() == clbr) { + lex.next(); + Expr* res = new Expr{Expr::_Tensor, {}}; + res->flags = Expr::_IsRvalue; + res->here = loc; + res->e_type = TypeExpr::new_unit(); + if (tf) { + res = new Expr{Expr::_MkTuple, {res}}; + res->flags = Expr::_IsRvalue; + res->here = loc; + res->e_type = TypeExpr::new_tuple(res->args.at(0)->e_type); + } + return res; + } + Expr* res = parse_expr(lex, code, nv); + if (lex.tp() == ')') { + lex.expect(clbr); + return res; + } + std::vector type_list; + type_list.push_back(res->e_type); + int f = res->flags; + res = new Expr{Expr::_Tensor, {res}}; + while (lex.tp() == ',') { + lex.next(); + auto x = parse_expr(lex, code, nv); + res->pb_arg(x); + if ((f ^ x->flags) & Expr::_IsType) { + lex.cur().error("mixing type and non-type expressions inside the same tuple"); + } + f &= x->flags; + type_list.push_back(x->e_type); + } + res->here = loc; + res->flags = f; + res->e_type = TypeExpr::new_tensor(std::move(type_list), !tf); + if (tf) { + res = new Expr{Expr::_MkTuple, {res}}; + res->flags = f; + res->here = loc; + res->e_type = TypeExpr::new_tuple(res->args.at(0)->e_type); + } + lex.expect(clbr); + return res; + } + int t = lex.tp(); + if (t == Lexem::Number) { + Expr* res = new Expr{Expr::_Const, lex.cur().loc}; + res->flags = Expr::_IsRvalue; + res->intval = td::string_to_int256(lex.cur().str); + if (res->intval.is_null() || !res->intval->signed_fits_bits(257)) { + lex.cur().error_at("invalid integer constant `", "`"); + } + res->e_type = TypeExpr::new_atomic(_Int); + lex.next(); + return res; + } + if (t == Lexem::String) { + std::string str = lex.cur().str; + int str_type = lex.cur().val; + Expr* res; + switch (str_type) { + case 0: + case 's': + case 'a': + { + res = new Expr{Expr::_SliceConst, lex.cur().loc}; + res->e_type = TypeExpr::new_atomic(_Slice); + break; + } + case 'u': + case 'h': + case 'H': + case 'c': + { + res = new Expr{Expr::_Const, lex.cur().loc}; + res->e_type = TypeExpr::new_atomic(_Int); + break; + } + default: + { + res = new Expr{Expr::_Const, lex.cur().loc}; + res->e_type = TypeExpr::new_atomic(_Int); + lex.cur().error("invalid string type `" + std::string(1, static_cast(str_type)) + "`"); + return res; + } + } + res->flags = Expr::_IsRvalue; + switch (str_type) { + case 0: { + res->strval = td::hex_encode(str); + break; + } + case 's': { + res->strval = str; + unsigned char buff[128]; + int bits = (int)td::bitstring::parse_bitstring_hex_literal(buff, sizeof(buff), str.data(), str.data() + str.size()); + if (bits < 0) { + lex.cur().error_at("Invalid hex bitstring constant `", "`"); + } + break; + } + case 'a': { // MsgAddressInt + // todo rewrite stdaddress parsing (if done, CMake dep "ton_crypto" can be replaced with "ton_crypto_core") + block::StdAddress a; + if (a.parse_addr(str)) { + res->strval = block::tlb::MsgAddressInt().pack_std_address(a)->as_bitslice().to_hex(); + } else { + lex.cur().error_at("invalid standard address `", "`"); + } + break; + } + case 'u': { + res->intval = td::hex_string_to_int256(td::hex_encode(str)); + if (!str.size()) { + lex.cur().error("empty integer ascii-constant"); + } + if (res->intval.is_null()) { + lex.cur().error_at("too long integer ascii-constant `", "`"); + } + break; + } + case 'h': + case 'H': + { + unsigned char hash[32]; + digest::hash_str(hash, str.data(), str.size()); + res->intval = td::bits_to_refint(hash, (str_type == 'h') ? 32 : 256, false); + break; + } + case 'c': + { + res->intval = td::make_refint(td::crc32(td::Slice{str})); + break; + } + } + lex.next(); + return res; + } + if (t == '_') { + Expr* res = new Expr{Expr::_Hole, lex.cur().loc}; + res->val = -1; + res->flags = (Expr::_IsLvalue | Expr::_IsHole | Expr::_IsNewVar); + res->e_type = TypeExpr::new_hole(); + lex.next(); + return res; + } + if (t == _Var) { + Expr* res = new Expr{Expr::_Type, lex.cur().loc}; + res->flags = Expr::_IsType; + res->e_type = TypeExpr::new_hole(); + lex.next(); + return res; + } + if (t == _Int || t == _Cell || t == _Slice || t == _Builder || t == _Cont || t == _Type || t == _Tuple) { + Expr* res = new Expr{Expr::_Type, lex.cur().loc}; + res->flags = Expr::_IsType; + res->e_type = TypeExpr::new_atomic(t); + lex.next(); + return res; + } + if (t == _Ident) { + auto sym = lookup_symbol(lex.cur().val); + if (sym && dynamic_cast(sym->value)) { + auto val = dynamic_cast(sym->value); + Expr* res = new Expr{Expr::_Type, lex.cur().loc}; + res->flags = Expr::_IsType; + res->e_type = val->get_type(); + lex.next(); + return res; + } + if (sym && dynamic_cast(sym->value)) { + auto val = dynamic_cast(sym->value); + Expr* res = new Expr{Expr::_GlobVar, lex.cur().loc}; + res->e_type = val->get_type(); + res->sym = sym; + res->flags = Expr::_IsLvalue | Expr::_IsRvalue | Expr::_IsImpure; + lex.next(); + return res; + } + if (sym && dynamic_cast(sym->value)) { + auto val = dynamic_cast(sym->value); + Expr* res = new Expr{Expr::_None, lex.cur().loc}; + res->flags = Expr::_IsRvalue; + if (val->type == _Int) { + res->cls = Expr::_Const; + res->intval = val->get_int_value(); + } + else if (val->type == _Slice) { + res->cls = Expr::_SliceConst; + res->strval = val->get_str_value(); + } + else { + lex.cur().error("Invalid symbolic constant type"); + } + res->e_type = TypeExpr::new_atomic(val->type); + lex.next(); + return res; + } + bool auto_apply = false; + Expr* res = new Expr{Expr::_Var, lex.cur().loc}; + if (nv) { + res->val = ~lex.cur().val; + res->e_type = TypeExpr::new_hole(); + res->flags = Expr::_IsLvalue | Expr::_IsNewVar; + // std::cerr << "defined new variable " << lex.cur().str << " : " << res->e_type << std::endl; + } else { + if (!sym) { + check_global_func(lex.cur()); + sym = lookup_symbol(lex.cur().val); + } + res->sym = sym; + SymVal* val = nullptr; + if (sym) { + val = dynamic_cast(sym->value); + } + if (!val) { + lex.cur().error_at("undefined identifier `", "`"); + } else if (val->type == SymVal::_Func) { + res->e_type = val->get_type(); + res->cls = Expr::_Glob; + auto_apply = val->auto_apply; + } else if (val->idx < 0) { + lex.cur().error_at("accessing variable `", "` being defined"); + } else { + res->val = val->idx; + res->e_type = val->get_type(); + // std::cerr << "accessing variable " << lex.cur().str << " : " << res->e_type << std::endl; + } + // std::cerr << "accessing symbol " << lex.cur().str << " : " << res->e_type << (val->impure ? " (impure)" : " (pure)") << std::endl; + res->flags = Expr::_IsLvalue | Expr::_IsRvalue | (val->impure ? Expr::_IsImpure : 0); + } + if (auto_apply) { + int impure = res->flags & Expr::_IsImpure; + delete res; + res = new Expr{Expr::_Apply, sym, {}}; + res->flags = Expr::_IsRvalue | impure; + } + res->deduce_type(lex.cur()); + lex.next(); + return res; + } + lex.expect(Lexem::Ident); + return nullptr; +} + +// parse E { E } +Expr* parse_expr90(Lexer& lex, CodeBlob& code, bool nv) { + Expr* res = parse_expr100(lex, code, nv); + while (lex.tp() == '(' || lex.tp() == '[' || (lex.tp() == _Ident && !is_special_ident(lex.cur().val))) { + if (res->is_type()) { + Expr* x = parse_expr100(lex, code, true); + x->chk_lvalue(lex.cur()); // chk_lrvalue() ? + TypeExpr* tp = res->e_type; + delete res; + res = new Expr{Expr::_TypeApply, {x}}; + res->e_type = tp; + res->here = lex.cur().loc; + try { + unify(res->e_type, x->e_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "cannot transform expression of type " << x->e_type << " to explicitly requested type " << res->e_type + << ": " << ue; + lex.cur().error(os.str()); + } + res->flags = x->flags; + } else { + Expr* x = parse_expr100(lex, code, false); + x->chk_rvalue(lex.cur()); + res = make_func_apply(res, x); + res->here = lex.cur().loc; + res->deduce_type(lex.cur()); + } + } + return res; +} + +// parse E { .method E | ~method E } +Expr* parse_expr80(Lexer& lex, CodeBlob& code, bool nv) { + Expr* res = parse_expr90(lex, code, nv); + while (lex.tp() == _Ident && is_special_ident(lex.cur().val)) { + auto modify = is_tilde_ident(lex.cur().val); + auto obj = res; + if (modify) { + obj->chk_lvalue(lex.cur()); + } else { + obj->chk_rvalue(lex.cur()); + } + auto loc = lex.cur().loc; + auto name = lex.cur().val; + auto sym = lookup_symbol(name); + if (!sym || !dynamic_cast(sym->value)) { + auto name1 = symbols.lookup(lex.cur().str.substr(1)); + if (name1) { + auto sym1 = lookup_symbol(name1); + if (sym1 && dynamic_cast(sym1->value)) { + name = name1; + sym = sym1; + } + } + } + check_global_func(lex.cur(), name); + if (verbosity >= 2) { + std::cerr << "using symbol `" << symbols.get_name(name) << "` for method call of " << lex.cur().str << std::endl; + } + sym = lookup_symbol(name); + SymValFunc* val = sym ? dynamic_cast(sym->value) : nullptr; + if (!val) { + lex.cur().error_at("undefined method identifier `", "`"); + } + lex.next(); + auto x = parse_expr100(lex, code, false); + x->chk_rvalue(lex.cur()); + if (x->cls == Expr::_Tensor) { + res = new Expr{Expr::_Apply, name, {obj}}; + res->args.insert(res->args.end(), x->args.begin(), x->args.end()); + } else { + res = new Expr{Expr::_Apply, name, {obj, x}}; + } + res->here = loc; + res->flags = Expr::_IsRvalue | (val->impure ? Expr::_IsImpure : 0); + res->deduce_type(lex.cur()); + if (modify) { + auto tmp = res; + res = new Expr{Expr::_LetFirst, {obj->copy(), tmp}}; + res->here = loc; + res->flags = tmp->flags; + res->set_val(name); + res->deduce_type(lex.cur()); + } + } + return res; +} + +// parse [ ~ ] E +Expr* parse_expr75(Lexer& lex, CodeBlob& code, bool nv) { + if (lex.tp() == '~') { + sym_idx_t name = symbols.lookup_add("~_"); + check_global_func(lex.cur(), name); + SrcLocation loc{lex.cur().loc}; + lex.next(); + auto x = parse_expr80(lex, code, false); + x->chk_rvalue(lex.cur()); + auto res = new Expr{Expr::_Apply, name, {x}}; + res->here = loc; + res->set_val('~'); + res->flags = Expr::_IsRvalue; + res->deduce_type(lex.cur()); + return res; + } else { + return parse_expr80(lex, code, nv); + } +} + +// parse E { (* | / | % | /% ) E } +Expr* parse_expr30(Lexer& lex, CodeBlob& code, bool nv) { + Expr* res = parse_expr75(lex, code, nv); + while (lex.tp() == '*' || lex.tp() == '/' || lex.tp() == '%' || lex.tp() == _DivMod || lex.tp() == _DivC || + lex.tp() == _DivR || lex.tp() == _ModC || lex.tp() == _ModR || lex.tp() == '&') { + res->chk_rvalue(lex.cur()); + int t = lex.tp(); + sym_idx_t name = symbols.lookup_add(std::string{"_"} + lex.cur().str + "_"); + SrcLocation loc{lex.cur().loc}; + check_global_func(lex.cur(), name); + lex.next(); + auto x = parse_expr75(lex, code, false); + x->chk_rvalue(lex.cur()); + res = new Expr{Expr::_Apply, name, {res, x}}; + res->here = loc; + res->set_val(t); + res->flags = Expr::_IsRvalue; + res->deduce_type(lex.cur()); + } + return res; +} + +// parse [-] E { (+ | - | `|` | ^) E } +Expr* parse_expr20(Lexer& lex, CodeBlob& code, bool nv) { + Expr* res; + int t = lex.tp(); + if (t == '-') { + sym_idx_t name = symbols.lookup_add("-_"); + check_global_func(lex.cur(), name); + SrcLocation loc{lex.cur().loc}; + lex.next(); + auto x = parse_expr30(lex, code, false); + x->chk_rvalue(lex.cur()); + res = new Expr{Expr::_Apply, name, {x}}; + res->here = loc; + res->set_val(t); + res->flags = Expr::_IsRvalue; + res->deduce_type(lex.cur()); + } else { + res = parse_expr30(lex, code, nv); + } + while (lex.tp() == '-' || lex.tp() == '+' || lex.tp() == '|' || lex.tp() == '^') { + res->chk_rvalue(lex.cur()); + t = lex.tp(); + sym_idx_t name = symbols.lookup_add(std::string{"_"} + lex.cur().str + "_"); + check_global_func(lex.cur(), name); + SrcLocation loc{lex.cur().loc}; + lex.next(); + auto x = parse_expr30(lex, code, false); + x->chk_rvalue(lex.cur()); + res = new Expr{Expr::_Apply, name, {res, x}}; + res->here = loc; + res->set_val(t); + res->flags = Expr::_IsRvalue; + res->deduce_type(lex.cur()); + } + return res; +} + +// parse E { ( << | >> | >>~ | >>^ ) E } +Expr* parse_expr17(Lexer& lex, CodeBlob& code, bool nv) { + Expr* res = parse_expr20(lex, code, nv); + while (lex.tp() == _Lshift || lex.tp() == _Rshift || lex.tp() == _RshiftC || lex.tp() == _RshiftR) { + res->chk_rvalue(lex.cur()); + int t = lex.tp(); + sym_idx_t name = symbols.lookup_add(std::string{"_"} + lex.cur().str + "_"); + check_global_func(lex.cur(), name); + SrcLocation loc{lex.cur().loc}; + lex.next(); + auto x = parse_expr20(lex, code, false); + x->chk_rvalue(lex.cur()); + res = new Expr{Expr::_Apply, name, {res, x}}; + res->here = loc; + res->set_val(t); + res->flags = Expr::_IsRvalue; + res->deduce_type(lex.cur()); + } + return res; +} + +// parse E [ (== | < | > | <= | >= | != | <=> ) E ] +Expr* parse_expr15(Lexer& lex, CodeBlob& code, bool nv) { + Expr* res = parse_expr17(lex, code, nv); + if (lex.tp() == _Eq || lex.tp() == '<' || lex.tp() == '>' || lex.tp() == _Leq || lex.tp() == _Geq || + lex.tp() == _Neq || lex.tp() == _Spaceship) { + res->chk_rvalue(lex.cur()); + int t = lex.tp(); + sym_idx_t name = symbols.lookup_add(std::string{"_"} + lex.cur().str + "_"); + check_global_func(lex.cur(), name); + SrcLocation loc{lex.cur().loc}; + lex.next(); + auto x = parse_expr17(lex, code, false); + x->chk_rvalue(lex.cur()); + res = new Expr{Expr::_Apply, name, {res, x}}; + res->here = loc; + res->set_val(t); + res->flags = Expr::_IsRvalue; + res->deduce_type(lex.cur()); + } + return res; +} + +// parse E [ ? E : E ] +Expr* parse_expr13(Lexer& lex, CodeBlob& code, bool nv) { + Expr* res = parse_expr15(lex, code, nv); + if (lex.tp() == '?') { + res->chk_rvalue(lex.cur()); + SrcLocation loc{lex.cur().loc}; + lex.next(); + auto x = parse_expr(lex, code, false); + x->chk_rvalue(lex.cur()); + lex.expect(':'); + auto y = parse_expr13(lex, code, false); + y->chk_rvalue(lex.cur()); + res = new Expr{Expr::_CondExpr, {res, x, y}}; + res->here = loc; + res->flags = Expr::_IsRvalue; + res->deduce_type(lex.cur()); + } + return res; +} + +// parse LE1 (= | += | -= | ... ) E2 +Expr* parse_expr10(Lexer& lex, CodeBlob& code, bool nv) { + auto x = parse_expr13(lex, code, nv); + int t = lex.tp(); + if (t == _PlusLet || t == _MinusLet || t == _TimesLet || t == _DivLet || t == _DivRLet || t == _DivCLet || + t == _ModLet || t == _ModCLet || t == _ModRLet || t == _LshiftLet || t == _RshiftLet || t == _RshiftCLet || + t == _RshiftRLet || t == _AndLet || t == _OrLet || t == _XorLet) { + x->chk_lvalue(lex.cur()); + x->chk_rvalue(lex.cur()); + sym_idx_t name = symbols.lookup_add(std::string{"^_"} + lex.cur().str + "_"); + check_global_func(lex.cur(), name); + SrcLocation loc{lex.cur().loc}; + lex.next(); + auto y = parse_expr10(lex, code, false); + y->chk_rvalue(lex.cur()); + Expr* z = new Expr{Expr::_Apply, name, {x, y}}; + z->here = loc; + z->set_val(t); + z->flags = Expr::_IsRvalue; + z->deduce_type(lex.cur()); + Expr* res = new Expr{Expr::_Letop, {x->copy(), z}}; + res->here = loc; + res->flags = (x->flags & ~Expr::_IsType) | Expr::_IsRvalue; + res->set_val(t); + res->deduce_type(lex.cur()); + return res; + } else if (t == '=') { + x->chk_lvalue(lex.cur()); + SrcLocation loc{lex.cur().loc}; + lex.next(); + auto y = parse_expr10(lex, code, false); + y->chk_rvalue(lex.cur()); + x->predefine_vars(); + x->define_new_vars(code); + Expr* res = new Expr{Expr::_Letop, {x, y}}; + res->here = loc; + res->flags = (x->flags & ~Expr::_IsType) | Expr::_IsRvalue; + res->set_val(t); + res->deduce_type(lex.cur()); + return res; + } else { + return x; + } +} + +Expr* parse_expr(Lexer& lex, CodeBlob& code, bool nv) { + return parse_expr10(lex, code, nv); +} + +namespace blk_fl { +enum { end = 1, ret = 2, empty = 4 }; +typedef int val; +constexpr val init = end | empty; +void combine(val& x, const val y) { + x |= y & ret; + x &= y | ~(end | empty); +} +void combine_parallel(val& x, const val y) { + x &= y | ~(ret | empty); + x |= y & end; +} +} // namespace blk_fl + +blk_fl::val parse_return_stmt(Lexer& lex, CodeBlob& code) { + auto expr = parse_expr(lex, code); + expr->chk_rvalue(lex.cur()); + try { + // std::cerr << "in return: "; + unify(expr->e_type, code.ret_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "previous function return type " << code.ret_type + << " cannot be unified with return statement expression type " << expr->e_type << ": " << ue; + lex.cur().error(os.str()); + } + std::vector tmp_vars = expr->pre_compile(code); + code.emplace_back(lex.cur().loc, Op::_Return, std::move(tmp_vars)); + lex.expect(';'); + return blk_fl::ret; +} + +blk_fl::val parse_implicit_ret_stmt(Lexer& lex, CodeBlob& code) { + auto ret_type = TypeExpr::new_unit(); + try { + // std::cerr << "in implicit return: "; + unify(ret_type, code.ret_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "previous function return type " << code.ret_type + << " cannot be unified with implicit end-of-block return type " << ret_type << ": " << ue; + lex.cur().error(os.str()); + } + code.emplace_back(lex.cur().loc, Op::_Return); + return blk_fl::ret; +} + +blk_fl::val parse_stmt(Lexer& lex, CodeBlob& code); + +blk_fl::val parse_block_stmt(Lexer& lex, CodeBlob& code, bool no_new_scope = false) { + lex.expect('{'); + if (!no_new_scope) { + open_scope(lex); + } + blk_fl::val res = blk_fl::init; + bool warned = false; + while (lex.tp() != '}') { + if (!(res & blk_fl::end) && !warned) { + lex.cur().loc.show_warning("unreachable code"); + warned = true; + } + blk_fl::combine(res, parse_stmt(lex, code)); + } + if (!no_new_scope) { + close_scope(lex); + } + lex.expect('}'); + return res; +} + +blk_fl::val parse_repeat_stmt(Lexer& lex, CodeBlob& code) { + SrcLocation loc{lex.cur().loc}; + lex.expect(_Repeat); + auto expr = parse_expr(lex, code); + expr->chk_rvalue(lex.cur()); + auto cnt_type = TypeExpr::new_atomic(_Int); + try { + unify(expr->e_type, cnt_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "repeat count value of type " << expr->e_type << " is not an integer: " << ue; + lex.cur().error(os.str()); + } + std::vector tmp_vars = expr->pre_compile(code); + if (tmp_vars.size() != 1) { + lex.cur().error("repeat count value is not a singleton"); + } + Op& repeat_op = code.emplace_back(loc, Op::_Repeat, tmp_vars); + code.push_set_cur(repeat_op.block0); + blk_fl::val res = parse_block_stmt(lex, code); + code.close_pop_cur(lex.cur().loc); + return res | blk_fl::end; +} + +blk_fl::val parse_while_stmt(Lexer& lex, CodeBlob& code) { + SrcLocation loc{lex.cur().loc}; + lex.expect(_While); + auto expr = parse_expr(lex, code); + expr->chk_rvalue(lex.cur()); + auto cnt_type = TypeExpr::new_atomic(_Int); + try { + unify(expr->e_type, cnt_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "while condition value of type " << expr->e_type << " is not an integer: " << ue; + lex.cur().error(os.str()); + } + Op& while_op = code.emplace_back(loc, Op::_While); + code.push_set_cur(while_op.block0); + while_op.left = expr->pre_compile(code); + code.close_pop_cur(lex.cur().loc); + if (while_op.left.size() != 1) { + lex.cur().error("while condition value is not a singleton"); + } + code.push_set_cur(while_op.block1); + blk_fl::val res1 = parse_block_stmt(lex, code); + code.close_pop_cur(lex.cur().loc); + return res1 | blk_fl::end; +} + +blk_fl::val parse_do_stmt(Lexer& lex, CodeBlob& code) { + Op& while_op = code.emplace_back(lex.cur().loc, Op::_Until); + lex.expect(_Do); + code.push_set_cur(while_op.block0); + open_scope(lex); + blk_fl::val res = parse_block_stmt(lex, code, true); + lex.expect(_Until); + auto expr = parse_expr(lex, code); + expr->chk_rvalue(lex.cur()); + close_scope(lex); + auto cnt_type = TypeExpr::new_atomic(_Int); + try { + unify(expr->e_type, cnt_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "`until` condition value of type " << expr->e_type << " is not an integer: " << ue; + lex.cur().error(os.str()); + } + while_op.left = expr->pre_compile(code); + code.close_pop_cur(lex.cur().loc); + if (while_op.left.size() != 1) { + lex.cur().error("`until` condition value is not a singleton"); + } + return res & ~blk_fl::empty; +} + +blk_fl::val parse_try_catch_stmt(Lexer& lex, CodeBlob& code) { + code.require_callxargs = true; + lex.expect(_Try); + Op& try_catch_op = code.emplace_back(lex.cur().loc, Op::_TryCatch); + code.push_set_cur(try_catch_op.block0); + blk_fl::val res0 = parse_block_stmt(lex, code); + code.close_pop_cur(lex.cur().loc); + lex.expect(_Catch); + code.push_set_cur(try_catch_op.block1); + open_scope(lex); + Expr* expr = parse_expr(lex, code, true); + expr->chk_lvalue(lex.cur()); + TypeExpr* tvm_error_type = TypeExpr::new_tensor(TypeExpr::new_var(), TypeExpr::new_atomic(_Int)); + try { + unify(expr->e_type, tvm_error_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "`catch` arguments have incorrect type " << expr->e_type << ": " << ue; + lex.cur().error(os.str()); + } + expr->predefine_vars(); + expr->define_new_vars(code); + try_catch_op.left = expr->pre_compile(code); + tolk_assert(try_catch_op.left.size() == 2 || try_catch_op.left.size() == 1); + blk_fl::val res1 = parse_block_stmt(lex, code); + close_scope(lex); + code.close_pop_cur(lex.cur().loc); + blk_fl::combine_parallel(res0, res1); + return res0; +} + +blk_fl::val parse_if_stmt(Lexer& lex, CodeBlob& code, int first_lex = _If) { + SrcLocation loc{lex.cur().loc}; + lex.expect(first_lex); + auto expr = parse_expr(lex, code); + expr->chk_rvalue(lex.cur()); + auto flag_type = TypeExpr::new_atomic(_Int); + try { + unify(expr->e_type, flag_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "`if` condition value of type " << expr->e_type << " is not an integer: " << ue; + lex.cur().error(os.str()); + } + std::vector tmp_vars = expr->pre_compile(code); + if (tmp_vars.size() != 1) { + lex.cur().error("condition value is not a singleton"); + } + Op& if_op = code.emplace_back(loc, Op::_If, tmp_vars); + code.push_set_cur(if_op.block0); + blk_fl::val res1 = parse_block_stmt(lex, code); + blk_fl::val res2 = blk_fl::init; + code.close_pop_cur(lex.cur().loc); + if (lex.tp() == _Else) { + lex.expect(_Else); + code.push_set_cur(if_op.block1); + res2 = parse_block_stmt(lex, code); + code.close_pop_cur(lex.cur().loc); + } else if (lex.tp() == _Elseif || lex.tp() == _Elseifnot) { + code.push_set_cur(if_op.block1); + res2 = parse_if_stmt(lex, code, lex.tp()); + code.close_pop_cur(lex.cur().loc); + } else { + if_op.block1 = std::make_unique(lex.cur().loc, Op::_Nop); + } + if (first_lex == _Ifnot || first_lex == _Elseifnot) { + std::swap(if_op.block0, if_op.block1); + } + blk_fl::combine_parallel(res1, res2); + return res1; +} + +blk_fl::val parse_stmt(Lexer& lex, CodeBlob& code) { + switch (lex.tp()) { + case _Return: { + lex.next(); + return parse_return_stmt(lex, code); + } + case '{': { + return parse_block_stmt(lex, code); + } + case ';': { + lex.next(); + return blk_fl::init; + } + case _Repeat: + return parse_repeat_stmt(lex, code); + case _If: + case _Ifnot: + return parse_if_stmt(lex, code, lex.tp()); + case _Do: + return parse_do_stmt(lex, code); + case _While: + return parse_while_stmt(lex, code); + case _Try: + return parse_try_catch_stmt(lex, code); + default: { + auto expr = parse_expr(lex, code); + expr->chk_rvalue(lex.cur()); + expr->pre_compile(code); + lex.expect(';'); + return blk_fl::end; + } + } +} + +CodeBlob* parse_func_body(Lexer& lex, FormalArgList arg_list, TypeExpr* ret_type) { + lex.expect('{'); + CodeBlob* blob = new CodeBlob{ret_type}; + if (pragma_allow_post_modification.enabled()) { + blob->flags |= CodeBlob::_AllowPostModification; + } + if (pragma_compute_asm_ltr.enabled()) { + blob->flags |= CodeBlob::_ComputeAsmLtr; + } + blob->import_params(std::move(arg_list)); + blk_fl::val res = blk_fl::init; + bool warned = false; + while (lex.tp() != '}') { + if (!(res & blk_fl::end) && !warned) { + lex.cur().loc.show_warning("unreachable code"); + warned = true; + } + blk_fl::combine(res, parse_stmt(lex, *blob)); + } + if (res & blk_fl::end) { + parse_implicit_ret_stmt(lex, *blob); + } + blob->close_blk(lex.cur().loc); + lex.expect('}'); + return blob; +} + +SymValAsmFunc* parse_asm_func_body(Lexer& lex, TypeExpr* func_type, const FormalArgList& arg_list, TypeExpr* ret_type, + bool impure = false) { + auto loc = lex.cur().loc; + lex.expect(_Asm); + int cnt = (int)arg_list.size(); + int width = ret_type->get_width(); + if (width < 0 || width > 16) { + throw ParseError{loc, "return type of an assembler built-in function must have a well-defined fixed width"}; + } + if (arg_list.size() > 16) { + throw ParseError{loc, "assembler built-in function must have at most 16 arguments"}; + } + std::vector cum_arg_width; + cum_arg_width.push_back(0); + int tot_width = 0; + for (auto& arg : arg_list) { + int arg_width = std::get(arg)->get_width(); + if (arg_width < 0 || arg_width > 16) { + throw ParseError{std::get(arg), + "parameters of an assembler built-in function must have a well-defined fixed width"}; + } + cum_arg_width.push_back(tot_width += arg_width); + } + std::vector asm_ops; + std::vector arg_order, ret_order; + if (lex.tp() == '(') { + lex.expect('('); + if (lex.tp() != _Mapsto) { + std::vector visited(cnt, false); + for (int i = 0; i < cnt; i++) { + if (lex.tp() != _Ident) { + lex.expect(_Ident); + } + auto sym = lookup_symbol(lex.cur().val); + int j; + for (j = 0; j < cnt; j++) { + if (std::get(arg_list[j]) == sym) { + break; + } + } + if (j == cnt) { + lex.cur().error("formal argument name expected"); + } + if (visited[j]) { + lex.cur().error("formal argument listed twice"); + } + visited[j] = true; + int c1 = cum_arg_width[j], c2 = cum_arg_width[j + 1]; + while (c1 < c2) { + arg_order.push_back(c1++); + } + lex.next(); + } + tolk_assert(arg_order.size() == (unsigned)tot_width); + } + if (lex.tp() == _Mapsto) { + lex.expect(_Mapsto); + std::vector visited(width, false); + for (int i = 0; i < width; i++) { + if (lex.tp() != Lexem::Number || lex.cur().str.size() > 3) { + lex.expect(Lexem::Number); + } + int j = atoi(lex.cur().str.c_str()); + if (j < 0 || j >= width || visited[j]) { + lex.cur().error("expected integer return value index 0 .. width-1"); + } + visited[j] = true; + ret_order.push_back(j); + lex.next(); + } + } + lex.expect(')'); + } + while (lex.tp() == _String) { + std::string ops = lex.cur().str; // \n\n... + std::string op; + for (const char& c : ops) { + if (c == '\n') { + if (!op.empty()) { + asm_ops.push_back(AsmOp::Parse(op, cnt, width)); + if (asm_ops.back().is_custom()) { + cnt = width; + } + op.clear(); + } + } else { + op.push_back(c); + } + } + if (!op.empty()) { + asm_ops.push_back(AsmOp::Parse(op, cnt, width)); + if (asm_ops.back().is_custom()) { + cnt = width; + } + } + lex.next(); + } + if (asm_ops.empty()) { + throw ParseError{lex.cur().loc, "string with assembler instruction expected"}; + } + lex.expect(';'); + std::string crc_s; + for (const AsmOp& asm_op : asm_ops) { + crc_s += asm_op.op; + } + crc_s.push_back(impure); + for (const int& x : arg_order) { + crc_s += std::string((const char*) (&x), (const char*) (&x + 1)); + } + for (const int& x : ret_order) { + crc_s += std::string((const char*) (&x), (const char*) (&x + 1)); + } + auto res = new SymValAsmFunc{func_type, asm_ops, impure}; + res->arg_order = std::move(arg_order); + res->ret_order = std::move(ret_order); + res->crc = td::crc64(crc_s); + return res; +} + +std::vector parse_type_var_list(Lexer& lex) { + std::vector res; + lex.expect(_Forall); + int idx = 0; + while (true) { + if (lex.tp() == _Type) { + lex.next(); + } + if (lex.tp() != _Ident) { + throw ParseError{lex.cur().loc, "free type identifier expected"}; + } + auto loc = lex.cur().loc; + if (prohibited_var_names.count(symbols.get_name(lex.cur().val))) { + throw ParseError{loc, PSTRING() << "symbol `" << symbols.get_name(lex.cur().val) + << "` cannot be redefined as a variable"}; + } + SymDef* new_sym_def = define_symbol(lex.cur().val, true, loc); + if (!new_sym_def || new_sym_def->value) { + lex.cur().error_at("redefined type variable `", "`"); + } + auto var = TypeExpr::new_var(idx); + new_sym_def->value = new SymValType{SymVal::_Typename, idx++, var}; + res.push_back(var); + lex.next(); + if (lex.tp() != ',') { + break; + } + lex.next(); + } + lex.expect(_Mapsto); + return res; +} + +void type_var_usage(TypeExpr* expr, const std::vector& typevars, std::vector& used) { + if (expr->constr != TypeExpr::te_Var) { + for (auto arg : expr->args) { + type_var_usage(arg, typevars, used); + } + return; + } + for (std::size_t i = 0; i < typevars.size(); i++) { + if (typevars[i] == expr) { + used.at(i) = true; + return; + } + } + return; +} + +TypeExpr* compute_type_closure(TypeExpr* expr, const std::vector& typevars) { + if (typevars.empty()) { + return expr; + } + std::vector used(typevars.size(), false); + type_var_usage(expr, typevars, used); + std::vector used_vars; + for (std::size_t i = 0; i < typevars.size(); i++) { + if (used.at(i)) { + used_vars.push_back(typevars[i]); + } + } + if (!used_vars.empty()) { + expr = TypeExpr::new_forall(std::move(used_vars), expr); + } + return expr; +} + +void parse_func_def(Lexer& lex) { + SrcLocation loc{lex.cur().loc}; + open_scope(lex); + std::vector type_vars; + if (lex.tp() == _Forall) { + type_vars = parse_type_var_list(lex); + } + auto ret_type = parse_type(lex); + if (lex.tp() != _Ident) { + throw ParseError{lex.cur().loc, "function name identifier expected"}; + } + Lexem func_name = lex.cur(); + lex.next(); + FormalArgList arg_list = parse_formal_args(lex); + bool impure = (lex.tp() == _Impure); + if (impure) { + lex.next(); + } + int f = 0; + if (lex.tp() == _Inline || lex.tp() == _InlineRef) { + f = (lex.tp() == _Inline) ? 1 : 2; + lex.next(); + } + td::RefInt256 method_id; + std::string method_name; + if (lex.tp() == _MethodId) { + lex.next(); + if (lex.tp() == '(') { + lex.expect('('); + if (lex.tp() == Lexem::String) { + method_name = lex.cur().str; + } else if (lex.tp() == Lexem::Number) { + method_name = lex.cur().str; + method_id = td::string_to_int256(method_name); + if (method_id.is_null()) { + lex.cur().error_at("invalid integer constant `", "`"); + } + } else { + throw ParseError{lex.cur().loc, "integer or string method identifier expected"}; + } + lex.next(); + lex.expect(')'); + } else { + method_name = func_name.str; + } + if (method_id.is_null()) { + unsigned crc = td::crc16(method_name); + method_id = td::make_refint((crc & 0xffff) | 0x10000); + } + } + if (lex.tp() != ';' && lex.tp() != '{' && lex.tp() != _Asm) { + lex.expect('{', "function body block expected"); + } + TypeExpr* func_type = TypeExpr::new_map(extract_total_arg_type(arg_list), ret_type); + func_type = compute_type_closure(func_type, type_vars); + if (verbosity >= 1) { + std::cerr << "function " << func_name.str << " : " << func_type << std::endl; + } + SymDef* func_sym = define_global_symbol(func_name.val, 0, loc); + tolk_assert(func_sym); + SymValFunc* func_sym_val = dynamic_cast(func_sym->value); + if (func_sym->value) { + if (func_sym->value->type != SymVal::_Func || !func_sym_val) { + lex.cur().error("was not defined as a function before"); + } + try { + unify(func_sym_val->sym_type, func_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "previous type of function " << func_name.str << " : " << func_sym_val->sym_type + << " cannot be unified with new type " << func_type << ": " << ue; + lex.cur().error(os.str()); + } + } + if (lex.tp() == ';') { + make_new_glob_func(func_sym, func_type, impure); + lex.next(); + } else if (lex.tp() == '{') { + if (dynamic_cast(func_sym_val)) { + lex.cur().error("function `"s + func_name.str + "` has been already defined as an assembler built-in"); + } + SymValCodeFunc* func_sym_code; + if (func_sym_val) { + func_sym_code = dynamic_cast(func_sym_val); + if (!func_sym_code) { + lex.cur().error("function `"s + func_name.str + "` has been already defined in an yet-unknown way"); + } + } else { + func_sym_code = make_new_glob_func(func_sym, func_type, impure); + } + if (func_sym_code->code) { + lex.cur().error("redefinition of function `"s + func_name.str + "`"); + } + CodeBlob* code = parse_func_body(lex, arg_list, ret_type); + code->name = func_name.str; + code->loc = loc; + // code->print(std::cerr); // !!!DEBUG!!! + func_sym_code->code = code; + } else { + Lexem asm_lexem = lex.cur(); + SymValAsmFunc* asm_func = parse_asm_func_body(lex, func_type, arg_list, ret_type, impure); + if (func_sym_val) { + if (dynamic_cast(func_sym_val)) { + asm_lexem.error("function `"s + func_name.str + "` was already declared as an ordinary function"); + } + SymValAsmFunc* asm_func_old = dynamic_cast(func_sym_val); + if (asm_func_old) { + if (asm_func->crc != asm_func_old->crc) { + asm_lexem.error("redefinition of built-in assembler function `"s + func_name.str + "`"); + } + } else { + asm_lexem.error("redefinition of previously (somehow) defined function `"s + func_name.str + "`"); + } + } + func_sym->value = asm_func; + } + if (method_id.not_null()) { + auto val = dynamic_cast(func_sym->value); + if (!val) { + lex.cur().error("cannot set method id for unknown function `"s + func_name.str + "`"); + } + if (val->method_id.is_null()) { + val->method_id = std::move(method_id); + } else if (td::cmp(val->method_id, method_id) != 0) { + lex.cur().error("integer method identifier for `"s + func_name.str + "` changed from " + + val->method_id->to_dec_string() + " to a different value " + method_id->to_dec_string()); + } + } + if (f) { + auto val = dynamic_cast(func_sym->value); + if (!val) { + lex.cur().error("cannot set unknown function `"s + func_name.str + "` as an inline"); + } + if (!(val->flags & 3)) { + val->flags = (short)(val->flags | f); + } else if ((val->flags & 3) != f) { + lex.cur().error("inline mode for `"s + func_name.str + "` changed with respect to a previous declaration"); + } + } + if (verbosity >= 1) { + std::cerr << "new type of function " << func_name.str << " : " << func_type << std::endl; + } + close_scope(lex); +} + +std::string tolk_ver_test = tolk_version; + +void parse_pragma(Lexer& lex) { + auto pragma = lex.cur(); + lex.next(); + if (lex.tp() != _Ident) { + lex.expect(_Ident, "pragma name expected"); + } + auto pragma_name = lex.cur().str; + lex.next(); + if (!pragma_name.compare("version") || !pragma_name.compare("not-version")) { + bool negate = !pragma_name.compare("not-version"); + char op = '='; bool eq = false; + int sem_ver[3] = {0, 0, 0}; + char segs = 1; + auto stoi = [&](const std::string& s) { + auto R = td::to_integer_safe(s); + if (R.is_error()) { + lex.cur().error("invalid semver format"); + } + return R.move_as_ok(); + }; + if (lex.tp() == _Number) { + sem_ver[0] = stoi(lex.cur().str); + } else if (lex.tp() == _Ident) { + auto id1 = lex.cur().str; + char ch1 = id1[0]; + if ((ch1 == '>') || (ch1 == '<') || (ch1 == '=') || (ch1 == '^')) { + op = ch1; + } else { + lex.cur().error("unexpected comparator operation"); + } + if (id1.length() < 2) { + lex.cur().error("expected number after comparator"); + } + if (id1[1] == '=') { + eq = true; + if (id1.length() < 3) { + lex.cur().error("expected number after comparator"); + } + sem_ver[0] = stoi(id1.substr(2)); + } else { + sem_ver[0] = stoi(id1.substr(1)); + } + } else { + lex.cur().error("expected semver with optional comparator"); + } + lex.next(); + if (lex.tp() != ';') { + if (lex.tp() != _Ident || lex.cur().str[0] != '.') { + lex.cur().error("invalid semver format"); + } + sem_ver[1] = stoi(lex.cur().str.substr(1)); + segs = 2; + lex.next(); + } + if (lex.tp() != ';') { + if (lex.tp() != _Ident || lex.cur().str[0] != '.') { + lex.cur().error("invalid semver format"); + } + sem_ver[2] = stoi(lex.cur().str.substr(1)); + segs = 3; + lex.next(); + } + // End reading semver from source code + int tolk_ver[3] = {0, 0, 0}; + std::istringstream iss(tolk_ver_test); + std::string s; + for (int idx = 0; idx < 3; idx++) { + std::getline(iss, s, '.'); + tolk_ver[idx] = stoi(s); + } + // End parsing embedded semver + std::string semver_expr; + if (negate) { + semver_expr += '!'; + } + semver_expr += op; + if (eq) { + semver_expr += '='; + } + for (int idx = 0; idx < 3; idx++) { + semver_expr += std::to_string(sem_ver[idx]); + if (idx < 2) + semver_expr += '.'; + } + bool match = true; + switch (op) { + case '=': + if ((tolk_ver[0] != sem_ver[0]) || + (tolk_ver[1] != sem_ver[1]) || + (tolk_ver[2] != sem_ver[2])) { + match = false; + } + break; + case '>': + if ( ((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] == sem_ver[1]) && (tolk_ver[2] == sem_ver[2]) && !eq) || + ((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] == sem_ver[1]) && (tolk_ver[2] < sem_ver[2])) || + ((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] < sem_ver[1])) || + ((tolk_ver[0] < sem_ver[0])) ) { + match = false; + } + break; + case '<': + if ( ((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] == sem_ver[1]) && (tolk_ver[2] == sem_ver[2]) && !eq) || + ((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] == sem_ver[1]) && (tolk_ver[2] > sem_ver[2])) || + ((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] > sem_ver[1])) || + ((tolk_ver[0] > sem_ver[0])) ) { + match = false; + } + break; + case '^': + if ( ((segs == 3) && ((tolk_ver[0] != sem_ver[0]) || (tolk_ver[1] != sem_ver[1]) || (tolk_ver[2] < sem_ver[2]))) + || ((segs == 2) && ((tolk_ver[0] != sem_ver[0]) || (tolk_ver[1] < sem_ver[1]))) + || ((segs == 1) && ((tolk_ver[0] < sem_ver[0]))) ) { + match = false; + } + break; + } + if ((match && negate) || (!match && !negate)) { + pragma.error(std::string("Tolk version ") + tolk_ver_test + " does not satisfy condition " + semver_expr); + } + } else if (!pragma_name.compare("test-version-set")) { + if (lex.tp() != _String) { + lex.cur().error("version string expected"); + } + tolk_ver_test = lex.cur().str; + lex.next(); + } else if (pragma_name == pragma_allow_post_modification.name()) { + pragma_allow_post_modification.enable(lex.cur().loc); + } else if (pragma_name == pragma_compute_asm_ltr.name()) { + pragma_compute_asm_ltr.enable(lex.cur().loc); + } else { + lex.cur().error(std::string{"unknown pragma `"} + pragma_name + "`"); + } + lex.expect(';'); +} + +std::vector source_fdescr; + +std::map source_files; +std::stack inclusion_locations; + +void parse_include(Lexer& lex, const FileDescr* fdescr) { + auto include = lex.cur(); + lex.expect(_IncludeHashtag); + if (lex.tp() != _String) { + lex.expect(_String, "source file name"); + } + std::string val = lex.cur().str; + std::string parent_dir = fdescr->filename; + if (parent_dir.rfind('/') != std::string::npos) { + val = parent_dir.substr(0, parent_dir.rfind('/') + 1) + val; + } + lex.next(); + lex.expect(';'); + if (!parse_source_file(val.c_str(), include, false)) { + include.error(std::string{"failed parsing included file `"} + val + "`"); + } +} + +bool parse_source(std::istream* is, FileDescr* fdescr) { + SourceReader reader{is, fdescr}; + Lexer lex{reader, true, ";,()[] ~."}; + while (lex.tp() != _Eof) { + if (lex.tp() == _PragmaHashtag) { + parse_pragma(lex); + } else if (lex.tp() == _IncludeHashtag) { + parse_include(lex, fdescr); + } else if (lex.tp() == _Global) { + parse_global_var_decls(lex); + } else if (lex.tp() == _Const) { + parse_const_decls(lex); + } else { + parse_func_def(lex); + } + } + return true; +} + +bool parse_source_file(const char* filename, Lexem lex, bool is_main) { + if (!filename || !*filename) { + auto msg = "source file name is an empty string"; + if (lex.tp) { + lex.error(msg); + } else { + throw Fatal{msg}; + } + } + + auto path_res = read_callback(ReadCallback::Kind::Realpath, filename); + if (path_res.is_error()) { + auto error = path_res.move_as_error(); + lex.error(error.message().c_str()); + return false; + } + std::string real_filename = path_res.move_as_ok(); + auto it = source_files.find(real_filename); + if (it != source_files.end()) { + it->second->is_main |= is_main; + if (verbosity >= 2) { + if (lex.tp) { + lex.loc.show_warning(std::string{"skipping file "} + real_filename + " because it was already included"); + } else { + std::cerr << "warning: skipping file " << real_filename << " because it was already included" << std::endl; + } + } + return true; + } + if (lex.tp) { // included + generated_from += std::string{"incl:"}; + } + generated_from += std::string{"`"} + filename + "` "; + FileDescr* cur_source = new FileDescr{filename}; + source_files[real_filename] = cur_source; + cur_source->is_main = is_main; + source_fdescr.push_back(cur_source); + auto file_res = read_callback(ReadCallback::Kind::ReadFile, filename); + if (file_res.is_error()) { + auto msg = file_res.move_as_error().message().str(); + if (lex.tp) { + lex.error(msg); + } else { + throw Fatal{msg}; + } + } + auto file_str = file_res.move_as_ok(); + std::stringstream ss{file_str}; + inclusion_locations.push(lex.loc); + bool res = parse_source(&ss, cur_source); + inclusion_locations.pop(); + return res; +} + +bool parse_source_stdin() { + FileDescr* cur_source = new FileDescr{"stdin", true}; + cur_source->is_main = true; + source_fdescr.push_back(cur_source); + return parse_source(&std::cin, cur_source); +} + +} // namespace tolk diff --git a/tolk/srcread.cpp b/tolk/srcread.cpp new file mode 100644 index 000000000..c71f498d6 --- /dev/null +++ b/tolk/srcread.cpp @@ -0,0 +1,228 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "srcread.h" +#include + +namespace tolk { + +/* + * + * SOURCE FILE READER + * + */ + +std::ostream& operator<<(std::ostream& os, const FileDescr* fdescr) { + return os << (fdescr ? (fdescr->is_stdin ? "stdin" : fdescr->filename) : "unknown-location"); +} + +std::ostream& operator<<(std::ostream& os, const Fatal& fatal) { + return os << fatal.get_msg(); +} + +const char* FileDescr::convert_offset(long offset, long* line_no, long* line_pos, long* line_size) const { + long lno = 0, lpos = -1, lsize = 0; + const char* lstart = nullptr; + if (offset >= 0 && offset < (long)text.size()) { + auto it = std::upper_bound(line_offs.begin(), line_offs.end(), offset); + lno = it - line_offs.begin(); + if (lno && it != line_offs.end()) { + lsize = it[0] - it[-1]; + lpos = offset - it[-1]; + lstart = text.data() + it[-1]; + } + } else { + lno = (long)line_offs.size(); + } + if (line_no) { + *line_no = lno; + } + if (line_pos) { + *line_pos = lpos; + } + if (line_size) { + *line_size = lsize; + } + return lstart; +} + +const char* FileDescr::push_line(std::string new_line) { + if (line_offs.empty()) { + line_offs.push_back(0); + } + std::size_t cur_size = text.size(); + text += new_line; + text += '\0'; + line_offs.push_back((long)text.size()); + return text.data() + cur_size; +} + +void SrcLocation::show(std::ostream& os) const { + os << fdescr; + long line_no, line_pos; + if (fdescr && convert_pos(&line_no, &line_pos)) { + os << ':' << line_no; + if (line_pos >= 0) { + os << ':' << (line_pos + 1); + } + } +} + +bool SrcLocation::show_context(std::ostream& os) const { + long line_no, line_pos, line_size; + if (!fdescr || !convert_pos(&line_no, &line_pos, &line_size)) { + return false; + } + bool skip_left = (line_pos > 200), skip_right = (line_pos + 200u < line_size); + const char* here = fdescr->text.data() + char_offs; + const char* base = here - line_pos; + const char* start = skip_left ? here - 100 : base; + const char* end = skip_right ? here + 100 : base + line_size; + os << " "; + if (skip_left) { + os << "... "; + } + for (const char* ptr = start; ptr < end; ptr++) { + os << (char)*ptr; + } + if (skip_right) { + os << " ..."; + } + os << std::endl; + os << " "; + if (skip_left) { + os << "... "; + } + for (const char* ptr = start; ptr < here; ptr++) { + char c = *ptr; + os << (c == 9 || c == 10 ? c : ' '); + } + os << '^' << std::endl; + return true; +} + +std::ostream& operator<<(std::ostream& os, const SrcLocation& loc) { + loc.show(os); + return os; +} + +void SrcLocation::show_gen_error(std::ostream& os, std::string message, std::string err_type) const { + show(os); + if (!err_type.empty()) { + os << ": " << err_type; + } + os << ": " << message << std::endl; + show_context(os); +} + +std::ostream& operator<<(std::ostream& os, const Error& error) { + error.show(os); + return os; +} + +void ParseError::show(std::ostream& os) const { + os << where << ": error: " << message << std::endl; + where.show_context(os); +} + +SourceReader::SourceReader(std::istream* _is, FileDescr* _fdescr) + : ifs(_is), fdescr(_fdescr), loc(_fdescr), eof(false), cur_line_len(0), start(0), cur(0), end(0) { + load_line(); +} + +void SourceReader::set_eof() { + if (!eof) { + eof = true; + start = cur = end = 0; + } +} + +int SourceReader::skip_spc() { + if (!cur) { + return 0; + } + const char* ptr = cur; + int res = 0; + while (*ptr == ' ' || *ptr == 9) { + ++ptr; + ++res; + } + set_ptr(ptr); + return res; +} + +bool SourceReader::seek_eof() { + while (seek_eoln()) { + if (!load_line()) { + return true; + } + } + return false; +} + +const char* SourceReader::set_ptr(const char* ptr) { + if (ptr != cur) { + if (ptr < cur || ptr > end) { + error("parsing position went outside of line"); + } + loc.char_offs += ptr - cur; + cur = ptr; + } + return ptr; +} + +bool SourceReader::load_line() { + if (eof) { + return false; + } + loc.set_eof(); + if (ifs->eof()) { + set_eof(); + return false; + } + std::getline(*ifs, cur_line); + if (ifs->fail()) { + set_eof(); + if (!ifs->eof()) { + error("cannot read line from source stream"); + } + return false; + } + std::size_t len = cur_line.size(); + if (len > 0xffffff) { + set_eof(); + error("line too long"); + return false; + } + if (len && cur_line.back() == '\r') { + // CP/M line breaks support + cur_line.pop_back(); + --len; + } + cur_line_len = (int)len; + if (fdescr) { + cur = start = fdescr->push_line(std::move(cur_line)); + end = start + len; + loc.char_offs = (std::size_t)(cur - fdescr->text.data()); + cur_line.clear(); + } else { + cur = start = cur_line.c_str(); + end = start + cur_line_len; + } + return true; +} + +} // namespace tolk diff --git a/tolk/srcread.h b/tolk/srcread.h new file mode 100644 index 000000000..3731a5ca4 --- /dev/null +++ b/tolk/srcread.h @@ -0,0 +1,162 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#pragma once + +#include +#include +#include + +namespace tolk { + +/* + * + * SOURCE FILE READER + * + */ + +struct FileDescr { + std::string filename; + std::string text; + std::vector line_offs; + bool is_stdin; + bool is_main = false; + FileDescr(std::string _fname, bool _stdin = false) : filename(std::move(_fname)), is_stdin(_stdin) { + } + const char* push_line(std::string new_line); + const char* convert_offset(long offset, long* line_no, long* line_pos, long* line_size = nullptr) const; +}; + +struct Fatal { + std::string message; + Fatal(std::string _msg) : message(std::move(_msg)) { + } + std::string get_msg() const { + return message; + } +}; + +std::ostream& operator<<(std::ostream& os, const Fatal& fatal); + +struct SrcLocation { + const FileDescr* fdescr; + long char_offs; + SrcLocation() : fdescr(nullptr), char_offs(-1) { + } + SrcLocation(const FileDescr* _fdescr, long offs = -1) : fdescr(_fdescr), char_offs(-1) { + } + bool defined() const { + return fdescr; + } + bool eof() const { + return char_offs == -1; + } + void set_eof() { + char_offs = -1; + } + const char* convert_pos(long* line_no, long* line_pos, long* line_size = nullptr) const { + return defined() ? fdescr->convert_offset(char_offs, line_no, line_pos, line_size) : nullptr; + } + void show(std::ostream& os) const; + bool show_context(std::ostream& os) const; + void show_gen_error(std::ostream& os, std::string message, std::string err_type = "") const; + void show_note(std::string err_msg) const { + show_gen_error(std::cerr, err_msg, "note"); + } + void show_warning(std::string err_msg) const { + show_gen_error(std::cerr, err_msg, "warning"); + } + void show_error(std::string err_msg) const { + show_gen_error(std::cerr, err_msg, "error"); + } +}; + +std::ostream& operator<<(std::ostream& os, const SrcLocation& loc); + +struct Error { + virtual ~Error() = default; + virtual void show(std::ostream& os) const = 0; +}; + +std::ostream& operator<<(std::ostream& os, const Error& error); + +struct ParseError : Error { + SrcLocation where; + std::string message; + ParseError(const SrcLocation& _where, std::string _msg) : where(_where), message(_msg) { + } + ParseError(const SrcLocation* _where, std::string _msg) : message(_msg) { + if (_where) { + where = *_where; + } + } + ~ParseError() override = default; + void show(std::ostream& os) const override; +}; + +class SourceReader { + std::istream* ifs; + FileDescr* fdescr; + SrcLocation loc; + bool eof; + std::string cur_line; + int cur_line_len; + void set_eof(); + const char *start, *cur, *end; + + public: + SourceReader(std::istream* _is, FileDescr* _fdescr); + bool load_line(); + bool is_eof() const { + return eof; + } + int is_eoln() const { + return cur == end; + } + int skip_spc(); + bool seek_eoln() { + skip_spc(); + return is_eoln(); + } + bool seek_eof(); + const char* cur_line_cstr() const { + return cur_line.c_str(); + } + const SrcLocation& here() const { + return loc; + } + char cur_char() const { + return *cur; + } + char next_char() const { + return cur[1]; + } + const char* get_ptr() const { + return cur; + } + const char* get_end_ptr() const { + return end; + } + const char* set_ptr(const char* ptr); + void advance(int n) { + set_ptr(get_ptr() + n); + } + void error(std::string err_msg) { + throw ParseError{loc, err_msg}; + } +}; + +} // namespace tolk diff --git a/tolk/stack-transform.cpp b/tolk/stack-transform.cpp new file mode 100644 index 000000000..fe5735e5c --- /dev/null +++ b/tolk/stack-transform.cpp @@ -0,0 +1,1054 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "tolk.h" + +namespace tolk { + +/* + * + * GENERIC STACK TRANSFORMATIONS + * + */ + +StackTransform::StackTransform(std::initializer_list list) { + *this = list; +} + +StackTransform &StackTransform::operator=(std::initializer_list list) { + if (list.size() > 255) { + invalidate(); + return *this; + } + set_id(); + if (!list.size()) { + return *this; + } + int m = (int)list.size(); + d = list.begin()[m - 1] - (m - 1); + if (d >= 128 || d < -128) { + invalidate(); + return *this; + } + for (int i = 0; i < m - 1; i++) { + int x = d + i; + int y = list.begin()[i]; + if (y != x) { + if (x != (short)x || y != (short)y || n == max_n) { + invalidate(); + return *this; + } + dp = std::max(dp, std::max(x, y) + 1); + A[n++] = std::make_pair((short)x, (short)y); + } + } + return *this; +} + +bool StackTransform::assign(const StackTransform &other) { + if (!other.is_valid() || (unsigned)other.n > max_n) { + return invalidate(); + } + d = other.d; + n = other.n; + dp = other.dp; + c = other.c; + invalid = false; + for (int i = 0; i < n; i++) { + A[i] = other.A[i]; + } + return true; +} + +int StackTransform::get(int x) const { + if (!is_valid()) { + return -1; + } + if (x <= c_start) { + return x - c; + } + x += d; + int i; + for (i = 0; i < n && A[i].first < x; i++) { + } + if (i < n && A[i].first == x) { + return A[i].second; + } else { + return x; + } +} + +bool StackTransform::set(int x, int y, bool relaxed) { + if (!is_valid()) { + return false; + } + if (x < 0) { + return (relaxed && y == x + d) || invalidate(); + } + if (!relaxed) { + touch(x); + } + x += d; + int i; + for (i = 0; i < n && A[i].first < x; i++) { + } + if (i < n && A[i].first == x) { + if (x != y) { + if (y != (short)y) { + return invalidate(); + } + A[i].second = (short)y; + } else { + --n; + for (; i < n; i++) { + A[i] = A[i + 1]; + } + } + } else { + if (x != y) { + if (x != (short)x || y != (short)y || n == max_n) { + return invalidate(); + } + for (int j = n++; j > i; j--) { + A[j] = A[j - 1]; + } + A[i].first = (short)x; + A[i].second = (short)y; + touch(x - d); + touch(y); + } + } + return true; +} + +// f(x') = x' + d for all x' >= x ? +bool StackTransform::is_trivial_after(int x) const { + return is_valid() && (!n || A[n - 1].first < x + d); +} + +// card f^{-1}(y) +int StackTransform::preimage_count(int y) const { + if (!is_valid()) { + return -1; + } + int count = (y >= d); + for (const auto &pair : A) { + if (pair.second == y) { + ++count; + } else if (pair.first == y) { + --count; + } + } + return count; +} + +// f^{-1}(y) +std::vector StackTransform::preimage(int y) const { + if (!is_valid()) { + return {}; + } + std::vector res; + bool f = (y >= d); + for (const auto &pair : A) { + if (pair.first > y && f) { + res.push_back(y - d); + f = false; + } + if (pair.first == y) { + f = false; + } else if (pair.second == y) { + res.push_back(pair.first - d); + } + } + return res; +} + +// is f:N->N bijective ? +bool StackTransform::is_permutation() const { + if (!is_valid() || d) { + return false; + } + tolk_assert(n <= max_n); + std::array X, Y; + for (int i = 0; i < n; i++) { + X[i] = A[i].first; + Y[i] = A[i].second; + if (Y[i] < 0) { + return false; + } + } + std::sort(Y.begin(), Y.begin() + n); + for (int i = 0; i < n; i++) { + if (X[i] != Y[i]) { + return false; + } + } + return true; +} + +bool StackTransform::remove_negative() { + int s = 0; + while (s < n && A[s].first < d) { + ++s; + } + if (s) { + n -= s; + for (int i = 0; i < n; i++) { + A[i] = A[i + s]; + } + } + return true; +} + +int StackTransform::try_load(int &i, int offs) const { + return i < n ? A[i++].first + offs : inf_x; +} + +bool StackTransform::try_store(int x, int y) { + if (x == y || x < d) { + return true; + } + if (n == max_n || x != (short)x || y != (short)y) { + return invalidate(); + } + A[n].first = (short)x; + A[n++].second = (short)y; + return true; +} + +// c := a * b +bool StackTransform::compose(const StackTransform &a, const StackTransform &b, StackTransform &c) { + if (!a.is_valid() || !b.is_valid()) { + return c.invalidate(); + } + c.d = a.d + b.d; + c.n = 0; + c.dp = std::max(a.dp, b.dp + a.d); + c.c = a.c + b.c; + c.invalid = false; + int i = 0, j = 0; + int x1 = a.try_load(i); + int x2 = b.try_load(j, a.d); + while (true) { + if (x1 < x2) { + int y = a.A[i - 1].second; + if (!c.try_store(x1, y)) { + return false; + } + x1 = a.try_load(i); + } else if (x2 < inf_x) { + if (x1 == x2) { + x1 = a.try_load(i); + } + int y = b.A[j - 1].second; + if (!c.try_store(x2, a(y))) { + return false; + } + x2 = b.try_load(j, a.d); + } else { + return true; + } + } +} + +// this = this * other +bool StackTransform::apply(const StackTransform &other) { + StackTransform res; + if (!compose(*this, other, res)) { + return invalidate(); + } + return assign(res); +} + +// this = other * this +bool StackTransform::preapply(const StackTransform &other) { + StackTransform res; + if (!compose(other, *this, res)) { + return invalidate(); + } + return assign(res); +} + +StackTransform StackTransform::operator*(const StackTransform &b) const & { + StackTransform res; + compose(*this, b, res); + return res; +} + +// this = this * other +StackTransform &StackTransform::operator*=(const StackTransform &other) { + StackTransform res; + (compose(*this, other, res) && assign(res)) || invalidate(); + return *this; +} + +bool StackTransform::apply_xchg(int i, int j, bool relaxed) { + if (!is_valid() || i < 0 || j < 0) { + return invalidate(); + } + if (i == j) { + return relaxed || touch(i); + } + int u = touch_get(i), v = touch_get(j); + return set(i, v) && set(j, u); +} + +bool StackTransform::apply_push(int i) { + if (!is_valid() || i < 0) { + return invalidate(); + } + int u = touch_get(i); + return shift(-1) && set(0, u); +} + +bool StackTransform::apply_push_newconst() { + if (!is_valid()) { + return false; + } + return shift(-1) && set(0, c_start - c++); +} + +bool StackTransform::apply_pop(int i) { + if (!is_valid() || i < 0) { + return invalidate(); + } + if (!i) { + return touch(0) && shift(1); + } else { + return set(i, get(0)) && shift(1); + } +} + +bool StackTransform::apply_blkpop(int k) { + if (!is_valid() || k < 0) { + return invalidate(); + } + return !k || (touch(k - 1) && shift(k)); +} + +bool StackTransform::equal(const StackTransform &other, bool relaxed) const { + if (!is_valid() || !other.is_valid()) { + return false; + } + if (!(n == other.n && d == other.d)) { + return false; + } + for (int i = 0; i < n; i++) { + if (A[i] != other.A[i]) { + return false; + } + } + return relaxed || dp == other.dp; +} + +StackTransform StackTransform::Xchg(int i, int j, bool relaxed) { + StackTransform t; + t.apply_xchg(i, j, relaxed); + return t; +} + +StackTransform StackTransform::Push(int i) { + StackTransform t; + t.apply_push(i); + return t; +} + +StackTransform StackTransform::Pop(int i) { + StackTransform t; + t.apply_pop(i); + return t; +} + +bool StackTransform::is_xchg(int i, int j) const { + if (i == j) { + return is_id(); + } + return is_valid() && !d && n == 2 && i >= 0 && j >= 0 && get(i) == j && get(j) == i; +} + +bool StackTransform::is_xchg(int *i, int *j) const { + if (!is_valid() || d || n > 2 || !dp) { + return false; + } + if (!n) { + *i = *j = 0; + return true; + } + if (n != 2) { + return false; + } + int a = A[0].first, b = A[1].first; + if (A[0].second != b || A[1].second != a) { + return false; + } + *i = std::min(a, b); + *j = std::max(a, b); + return true; +} + +bool StackTransform::is_xchg_xchg(int i, int j, int k, int l) const { + if (is_valid() && !d && n <= 4 && (i | j | k | l) >= 0) { + StackTransform t; + return t.apply_xchg(i, j) && t.apply_xchg(k, l) && t <= *this; + } else { + return false; + } +} + +bool StackTransform::is_xchg_xchg(int *i, int *j, int *k, int *l) const { + if (!is_valid() || d || n > 4 || !dp || !is_permutation()) { + return false; + } + if (!n) { + *i = *j = *k = *l = 0; + return true; + } + if (n <= 2) { + *k = *l = 0; + return is_xchg(i, j); + } + if (n == 3) { + // rotation: a -> b -> c -> a + int a = A[0].first; + int b = A[0].second; + int s = (b == A[2].first ? 2 : 1); + int c = A[s].second; + if (b != A[s].first || c != A[3 - s].first || a != A[3 - s].second) { + return false; + } + // implement as XCHG s(a),s(c) ; XCHG s(a),s(b) + *i = *k = a; + *j = c; + *l = b; + return is_xchg_xchg(*i, *j, *k, *l); + } + *i = A[0].first; + *j = A[0].second; + if (get(*j) != *i) { + return false; + } + for (int s = 1; s < 4; s++) { + if (A[s].first != *j) { + *k = A[s].first; + *l = A[s].second; + return get(*l) == *k && is_xchg_xchg(*i, *j, *k, *l); + } + } + return false; +} + +bool StackTransform::is_push(int i) const { + return is_valid() && d == -1 && n == 1 && A[0].first == -1 && A[0].second == i; +} + +bool StackTransform::is_push(int *i) const { + if (is_valid() && d == -1 && n == 1 && A[0].first == -1 && A[0].second >= 0) { + *i = A[0].second; + return true; + } else { + return false; + } +} + +// 1 2 3 4 .. = pop0 +// 0 2 3 4 .. = pop1 +// 1 0 3 4 .. = pop2 +// 1 2 0 4 .. = pop3 +// POP s(i) : 1 2 ... i-1 0 i+1 ... ; d=1, n=1, {(i,0)} +bool StackTransform::is_pop(int i) const { + if (!is_valid() || d != 1 || n > 1 || i < 0) { + return false; + } + if (!i) { + return !n; + } + return n == 1 && A[0].first == i && !A[0].second; +} + +bool StackTransform::is_pop(int *i) const { + if (!is_valid() || d != 1 || n > 1) { + return false; + } + if (!n) { + *i = 0; + return true; + } + if (n == 1 && !A[0].second) { + *i = A[0].first; + return true; + } + return false; +} + +// POP s(i) ; POP s(j) : 2 ... i-1 0 i+1 ... j 1 j+2 ... ; d=2, n=2, {(i,0),(j+1,1)} if i <> j+1 +bool StackTransform::is_pop_pop(int i, int j) const { + if (is_valid() && d == 2 && n <= 2 && i >= 0 && j >= 0) { + StackTransform t; + return t.apply_pop(i) && t.apply_pop(j) && t <= *this; + } else { + return false; + } +} + +bool StackTransform::is_pop_pop(int *i, int *j) const { + if (!is_valid() || d != 2 || n > 2) { + return false; + } + if (!n) { + *i = *j = 0; // 2DROP + } else if (n == 2) { + *i = A[0].first - A[0].second; + *j = A[1].first - A[1].second; + if (A[0].second > A[1].second) { + std::swap(*i, *j); + } + } else if (!A[0].second) { + *i = A[0].first; + *j = 0; + } else { + *i = 0; + *j = A[0].first - 1; + } + return is_pop_pop(*i, *j); +} + +const StackTransform StackTransform::rot{2, 0, 1, 3}; +const StackTransform StackTransform::rot_rev{1, 2, 0, 3}; + +bool StackTransform::is_rot() const { + return equal(rot, true); +} + +bool StackTransform::is_rotrev() const { + return equal(rot_rev, true); +} + +// PUSH i ; ROT == 1 i 0 2 3 +bool StackTransform::is_push_rot(int i) const { + return is_valid() && d == -1 && i >= 0 && is_trivial_after(3) && get(0) == 1 && get(1) == i && get(2) == 0; +} + +bool StackTransform::is_push_rot(int *i) const { + return is_valid() && (*i = get(1)) >= 0 && is_push_rot(*i); +} + +// PUSH i ; -ROT == 0 1 i 2 3 +bool StackTransform::is_push_rotrev(int i) const { + return is_valid() && d == -1 && i >= 0 && is_trivial_after(3) && get(0) == 0 && get(1) == 1 && get(2) == i; +} + +bool StackTransform::is_push_rotrev(int *i) const { + return is_valid() && (*i = get(2)) >= 0 && is_push_rotrev(*i); +} + +// PUSH s(i) ; XCHG s(j),s(k) --> i 0 1 .. i .. +// PUSH s(i) ; XCHG s(0),s(k) --> k-1 0 1 .. k-2 i k .. +bool StackTransform::is_push_xchg(int i, int j, int k) const { + StackTransform t; + return is_valid() && d == -1 && n <= 3 && t.apply_push(i) && t.apply_xchg(j, k) && t <= *this; +} + +bool StackTransform::is_push_xchg(int *i, int *j, int *k) const { + if (!(is_valid() && d == -1 && n <= 3 && n > 0)) { + return false; + } + int s = get(0); + if (s < 0) { + return false; + } + *i = s; + *j = 0; + if (n == 1) { + *k = 0; + } else if (n == 2) { + *k = s + 1; + *i = get(s + 1); + } else { + *j = A[1].first + 1; + *k = A[2].first + 1; + } + return is_push_xchg(*i, *j, *k); +} + +// XCHG s1,s(i) ; XCHG s0,s(j) +bool StackTransform::is_xchg2(int i, int j) const { + StackTransform t; + return is_valid() && !d && t.apply_xchg(1, i) && t.apply_xchg(0, j) && t <= *this; +} + +bool StackTransform::is_xchg2(int *i, int *j) const { + if (!is_valid() || d || n > 4 || n == 1 || dp < 2) { + return false; + } + *i = get(1); + *j = get(0); + if (!n) { + return true; + } + if (*i < 0 || *j < 0) { + return false; + } + if (n == 2 && !*i) { + *j = *i; // XCHG s0,s1 = XCHG2 s0,s0 + } else if (n == 3 && *i) { + // XCHG2 s(i),s(i) = XCHG s1,s(i) ; XCHG s0,s(i) : 0->1, 1->i + *j = *i; + } // XCHG2 s0,s(i) = XCHG s0,s1 ; XCHG s0,s(i) : 0->i, 1->0 + return is_xchg2(*i, *j); +} + +// XCHG s0,s(i) ; PUSH s(j) = PUSH s(j') ; XCHG s1,s(i+1) +// j'=j if j!=0, j!=i +// j'=0 if j=i +// j'=i if j=0 +bool StackTransform::is_xcpu(int i, int j) const { + StackTransform t; + return is_valid() && d == -1 && t.apply_xchg(0, i) && t.apply_push(j) && t <= *this; +} + +bool StackTransform::is_xcpu(int *i, int *j) const { + if (!is_valid() || d != -1 || n > 3 || dp < 1) { + return false; + } + *i = get(1); + *j = get(0); + if (!*j) { + *j = *i; + } else if (*j == *i) { + *j = 0; + } + return is_xcpu(*i, *j); +} + +// PUSH s(i) ; XCHG s0, s1 ; XCHG s0, s(j+1) +bool StackTransform::is_puxc(int i, int j) const { + StackTransform t; + return is_valid() && d == -1 && t.apply_push(i) && t.apply_xchg(0, 1) && t.apply_xchg(0, j + 1) && t <= *this; +} + +// j > 0 : 0 -> j, 1 -> i +// j = 0 : 0 -> i, 1 -> 0 ( PUSH s(i) ) +// j = -1 : 0 -> 0, 1 -> i ( PUSH s(i) ; XCHG s0, s1 ) +bool StackTransform::is_puxc(int *i, int *j) const { + if (!is_valid() || d != -1 || n > 3) { + return false; + } + *i = get(1); + *j = get(0); + if (!*i && is_push(*j)) { + std::swap(*i, *j); + return is_puxc(*i, *j); + } + if (!*j) { + --*j; + } + return is_puxc(*i, *j); +} + +// PUSH s(i) ; PUSH s(j+1) +bool StackTransform::is_push2(int i, int j) const { + StackTransform t; + return is_valid() && d == -2 && t.apply_push(i) && t.apply_push(j + 1) && t <= *this; +} + +bool StackTransform::is_push2(int *i, int *j) const { + if (!is_valid() || d != -2 || n > 2) { + return false; + } + *i = get(1); + *j = get(0); + return is_push2(*i, *j); +} + +// XCHG s2,s(i) ; XCHG s1,s(j) ; XCHG s0,s(k) +bool StackTransform::is_xchg3(int *i, int *j, int *k) const { + if (!is_valid() || d || dp < 3 || !is_permutation()) { + return false; + } + for (int s = 2; s >= 0; s--) { + *i = get(s); + StackTransform t = Xchg(2, *i) * *this; + if (t.is_xchg2(j, k)) { + return true; + } + } + return false; +} + +// XCHG s1,s(i) ; XCHG s0,s(j) ; PUSH s(k) +bool StackTransform::is_xc2pu(int *i, int *j, int *k) const { + if (!is_valid() || d != -1 || dp < 2) { + return false; + } + for (int s = 2; s >= 1; s--) { + *i = get(s); + StackTransform t = Xchg(1, *i) * *this; + if (t.is_xcpu(j, k)) { + return true; + } + } + return false; +} + +// XCHG s1,s(i) ; PUSH s(j) ; XCHG s0,s1 ; XCHG s0,s(k+1) +bool StackTransform::is_xcpuxc(int *i, int *j, int *k) const { + if (!is_valid() || d != -1 || dp < 2) { + return false; + } + for (int s = 2; s >= 0; s--) { + *i = get(s); + StackTransform t = Xchg(1, *i) * *this; + if (t.is_puxc(j, k)) { + return true; + } + } + return false; +} + +// XCHG s0,s(i) ; PUSH s(j) ; PUSH s(k+1) +bool StackTransform::is_xcpu2(int *i, int *j, int *k) const { + if (!is_valid() || d != -2 || dp < 1) { + return false; + } + *i = get(2); + StackTransform t = Xchg(0, *i) * *this; + return t.is_push2(j, k); +} + +// PUSH s(i) ; XCHG s0,s2 ; XCHG s1,s(j+1) ; XCHG s0,s(k+1) +// 0 -> i or 1 -> i or 2 -> i ; i has two preimages +// 0 -> k if k >= 2, k != j +// 1 -> j=k if j = k >= 2 +// 1 -> j if j >= 2, k != 0 +// 0 -> j if j >= 2, k = 0 +// => i in {f(0), f(1), f(2)} ; j in {-1, 0, 1, f(0), f(1)} ; k in {-1, 0, 1, f(0), f(1)} +bool StackTransform::is_puxc2(int *i, int *j, int *k) const { + if (!is_valid() || d != -1 || dp < 2) { + return false; + } + for (int s = 2; s >= 0; s--) { + *i = get(s); + if (preimage_count(*i) != 2) { + continue; + } + for (int u = -1; u <= 3; u++) { + *j = (u >= 2 ? get(u - 2) : u); + for (int v = -1; v <= 3; v++) { + *k = (v >= 2 ? get(v - 2) : v); + if (is_puxc2(*i, *j, *k)) { + return true; + } + } + } + } + return false; +} + +// PUSH s(i) ; XCHG s0,s2 ; XCHG s1,s(j+1) ; XCHG s0,s(k+1) +bool StackTransform::is_puxc2(int i, int j, int k) const { + StackTransform t; + return is_valid() && d == -1 && dp >= 2 // basic checks + && t.apply_push(i) && t.apply_xchg(0, 2) // PUSH s(i) ; XCHG s0,s2 + && t.apply_xchg(1, j + 1) // XCHG s1,s(j+1) + && t.apply_xchg(0, k + 1) && t <= *this; // XCHG s0,s(k+2) +} + +// PUSH s(i) ; XCHG s0,s1 ; XCHG s0,s(j+1) ; PUSH s(k+1) +bool StackTransform::is_puxcpu(int *i, int *j, int *k) const { + if (!is_valid() || d != -2 || dp < 1) { + return false; + } + StackTransform t = *this; + if (t.apply_pop() && t.is_puxc(i, j)) { + int y = get(0); + auto v = t.preimage(y); + if (!v.empty()) { + *k = v[0] - 1; + t.apply_push(*k + 1); + return t <= *this; + } + } + return false; +} + +// PUSH s(i) ; XCHG s0,s1 ; PUSH s(j+1) ; XCHG s0,s1 ; XCHG s0,s(k+2) +// 2 -> i; 1 -> j (if j >= 1, k != -1), 1 -> i (if j = 0, k != -1), 1 -> 0 (if j = -1, k != -1) +// 0 -> k (if k >= 1), 0 -> i (if k = 0), 0 -> j (if k = -1, j >= 1) +bool StackTransform::is_pu2xc(int *i, int *j, int *k) const { + if (!is_valid() || d != -2 || dp < 1) { + return false; + } + *i = get(2); + for (int v = -2; v <= 1; v++) { + *k = (v <= 0 ? v : get(0)); // one of -2, -1, 0, get(0) + for (int u = -1; u <= 1; u++) { + *j = (u <= 0 ? u : get(v != -1)); // one of -1, 0, get(0), get(1) + if (is_pu2xc(*i, *j, *k)) { + return true; + } + } + } + return false; +} + +bool StackTransform::is_pu2xc(int i, int j, int k) const { + StackTransform t; + return is_valid() && d == -2 && dp >= 1 // basic checks + && t.apply_push(i) && t.apply_xchg(0, 1) // PUSH s(i) ; XCHG s0,s1 + && t.apply_push(j + 1) && t.apply_xchg(0, 1) // PUSH s(j+1) ; XCHG s0,s1 + && t.apply_xchg(0, k + 2) && t <= *this; // XCHG s0,s(k+2) +} + +// PUSH s(i) ; PUSH s(j+1) ; PUSH s(k+2) +bool StackTransform::is_push3(int i, int j, int k) const { + StackTransform t; + return is_valid() && d == -3 && t.apply_push(i) && t.apply_push(j + 1) && t.apply_push(k + 2) && t <= *this; +} + +bool StackTransform::is_push3(int *i, int *j, int *k) const { + if (!is_valid() || d != -3 || n > 3) { + return false; + } + *i = get(2); + *j = get(1); + *k = get(0); + return is_push3(*i, *j, *k); +} + +bool StackTransform::is_blkswap(int *i, int *j) const { + if (!is_valid() || d || !is_permutation()) { + return false; + } + *j = get(0); + if (*j <= 0) { + return false; + } + auto v = preimage(0); + if (v.size() != 1) { + return false; + } + *i = v[0]; + return *i > 0 && is_blkswap(*i, *j); +} + +bool StackTransform::is_blkswap(int i, int j) const { + if (!is_valid() || d || i <= 0 || j <= 0 || dp < i + j || !is_trivial_after(i + j)) { + return false; + } + for (int s = 0; s < i; s++) { + if (get(s) != s + j) { + return false; + } + } + for (int s = 0; s < j; s++) { + if (get(s + i) != s) { + return false; + } + } + return true; +} + +// equivalent to i times DROP +bool StackTransform::is_blkdrop(int *i) const { + if (is_valid() && d > 0 && !n) { + *i = d; + return true; + } + return false; +} + +// 0 1 .. j-1 j+i j+i+1 ... +bool StackTransform::is_blkdrop2(int i, int j) const { + if (!is_valid() || d != i || i <= 0 || j < 0 || dp < i + j || n != j || !is_trivial_after(j)) { + return false; + } + for (int s = 0; s < j; s++) { + if (get(s) != s) { + return false; + } + } + return true; +} + +bool StackTransform::is_blkdrop2(int *i, int *j) const { + if (is_valid() && is_blkdrop2(d, n)) { + *i = d; + *j = n; + return true; + } + return false; +} + +// equivalent to i times PUSH s(j) +bool StackTransform::is_blkpush(int *i, int *j) const { + if (!is_valid() || d >= 0) { + return false; + } + *i = -d; + *j = get(*i - 1); + return is_blkpush(*i, *j); +} + +bool StackTransform::is_blkpush(int i, int j) const { + if (!is_valid() || d >= 0 || d != -i || j < 0 || dp < i + j || !is_trivial_after(i)) { + return false; + } + StackTransform t; + for (int s = 0; s < i; s++) { + if (!t.apply_push(j)) { + return false; + } + } + return t <= *this; +} + +bool StackTransform::is_reverse(int *i, int *j) const { + if (!is_valid() || d || !is_permutation() || n < 2) { + return false; + } + *j = A[0].first; + *i = A[n - 1].first - A[0].first + 1; + return is_reverse(*i, *j); +} + +bool StackTransform::is_reverse(int i, int j) const { + if (!is_valid() || d || !is_trivial_after(i + j) || n < 2 || A[0].first != j || A[n - 1].first != j + i - 1) { + return false; + } + for (int s = 0; s < i; s++) { + if (get(j + s) != j + i - 1 - s) { + return false; + } + } + return true; +} + +// 0 i+1 i+2 ... == i*NIP +// j i+1 i+2 ... == XCHG s(i),s(j) ; BLKDROP i +bool StackTransform::is_nip_seq(int i, int j) const { + return is_valid() && d == i && i > j && j >= 0 && n == 1 && A[0].first == i && A[0].second == j; +} + +bool StackTransform::is_nip_seq(int *i) const { + *i = d; + return is_nip_seq(*i); +} + +bool StackTransform::is_nip_seq(int *i, int *j) const { + if (is_valid() && n > 0) { + *i = d; + *j = A[0].second; + return is_nip_seq(*i, *j); + } else { + return false; + } +} + +// POP s(i); BLKDROP k (usually for i >= k >= 0) +bool StackTransform::is_pop_blkdrop(int i, int k) const { + StackTransform t; + return is_valid() && d == k + 1 && t.apply_pop(i) && t.apply_blkpop(k) && t <= *this; +} + +// POP s(i); BLKDROP k == XCHG s0,s(i); BLKDROP k+1 for i >= k >= 0 +// k+1 k+2 .. i-1 0 i+1 .. +bool StackTransform::is_pop_blkdrop(int *i, int *k) const { + if (is_valid() && n == 1 && d > 0 && !A[0].second) { + *k = d - 1; + *i = A[0].first; + return is_pop_blkdrop(*i, *k); + } else { + return false; + } +} + +// POP s(i); POP s(j); BLKDROP k (usually for i<>j >= k >= 0) +bool StackTransform::is_2pop_blkdrop(int i, int j, int k) const { + StackTransform t; + return is_valid() && d == k + 2 && t.apply_pop(i) && t.apply_pop(j) && t.apply_blkpop(k) && t <= *this; +} + +// POP s(i); POP s(j); BLKDROP k == XCHG s0,s(i); XCHG s1,s(j+1); BLKDROP k+2 (usually for i<>j >= k >= 2) +// k+2 k+3 .. i-1 0 i+1 ... j 1 j+2 ... +bool StackTransform::is_2pop_blkdrop(int *i, int *j, int *k) const { + if (is_valid() && n == 2 && d >= 2 && A[0].second + A[1].second == 1) { + *k = d - 2; + int t = (A[0].second > 0); + *i = A[t].first; + *j = A[1 - t].first - 1; + return is_2pop_blkdrop(*i, *j, *k); + } else { + return false; + } +} + +// PUSHCONST c ; ROT == 1 -1000 0 2 3 +bool StackTransform::is_const_rot(int c) const { + return is_valid() && d == -1 && is_trivial_after(3) && get(0) == 1 && c <= c_start && get(1) == c && get(2) == 0; +} + +bool StackTransform::is_const_rot(int *c) const { + return is_valid() && (*c = get(1)) <= c_start && is_const_rot(*c); +} + +// PUSHCONST c ; POP s(i) == 0 1 .. i-1 -1000 i+1 ... +bool StackTransform::is_const_pop(int c, int i) const { + return is_valid() && !d && n == 1 && i > 0 && c <= c_start && get(i - 1) == c; +} + +bool StackTransform::is_const_pop(int *c, int *i) const { + if (is_valid() && !d && n == 1 && A[0].second <= c_start) { + *i = A[0].first + 1; + *c = A[0].second; + return is_const_pop(*c, *i); + } else { + return false; + } +} + +// PUSH i ; PUSHCONST c == c i 0 1 2 ... +bool StackTransform::is_push_const(int i, int c) const { + return is_valid() && d == -2 && c <= c_start && i >= 0 && is_trivial_after(2) && get(0) == c && get(1) == i; +} + +bool StackTransform::is_push_const(int *i, int *c) const { + return is_valid() && d == -2 && n == 2 && is_push_const(*i = get(1), *c = get(0)); +} + +void StackTransform::show(std::ostream &os, int mode) const { + if (!is_valid()) { + os << ""; + return; + } + int mi = 0, ma = 0; + if (n > 0 && A[0].first < d) { + mi = A[0].first - d; + } + if (n > 0) { + ma = std::max(ma, A[n - 1].first - d + 1); + } + ma = std::max(ma + 1, dp - d); + os << '{'; + if (dp == d) { + os << '|'; + } + for (int i = mi; i < ma; i++) { + os << get(i) << (i == -1 ? '?' : (i == dp - d - 1 ? '|' : ' ')); + } + os << get(ma) << "..}"; +} + +} // namespace tolk diff --git a/tolk/symtable.cpp b/tolk/symtable.cpp new file mode 100644 index 000000000..ea2a1f916 --- /dev/null +++ b/tolk/symtable.cpp @@ -0,0 +1,179 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "symtable.h" +#include +#include + +namespace tolk { + +/* + * + * SYMBOL VALUES (DECLARED) + * + */ + +int scope_level; + +SymTable<100003> symbols; + +SymDef* sym_def[symbols.hprime + 1]; +SymDef* global_sym_def[symbols.hprime + 1]; +std::vector> symbol_stack; +std::vector scope_opened_at; + +std::string Symbol::unknown_symbol_name(sym_idx_t i) { + if (!i) { + return "_"; + } else { + std::ostringstream os; + os << "SYM#" << i; + return os.str(); + } +} + +sym_idx_t SymTableBase::gen_lookup(std::string str, int mode, sym_idx_t idx) { + unsigned long long h1 = 1, h2 = 1; + for (char c : str) { + h1 = ((h1 * 239) + (unsigned char)(c)) % p; + h2 = ((h2 * 17) + (unsigned char)(c)) % (p - 1); + } + ++h2; + ++h1; + while (true) { + if (sym_table[h1]) { + if (sym_table[h1]->str == str) { + return (mode & 2) ? not_found : sym_idx_t(h1); + } + h1 += h2; + if (h1 > p) { + h1 -= p; + } + } else { + if (!(mode & 1)) { + return not_found; + } + if (def_sym >= ((long long)p * 3) / 4) { + throw SymTableOverflow{def_sym}; + } + sym_table[h1] = std::make_unique(str, idx <= 0 ? sym_idx_t(h1) : -idx); + ++def_sym; + return sym_idx_t(h1); + } + } +} + +SymTableBase& SymTableBase::add_keyword(std::string str, sym_idx_t idx) { + if (idx <= 0) { + idx = ++def_kw; + } + sym_idx_t res = gen_lookup(str, -1, idx); + if (!res) { + throw SymTableKwRedef{str}; + } + if (idx < max_kw_idx) { + keywords[idx] = res; + } + return *this; +} + +void open_scope(Lexer& lex) { + ++scope_level; + scope_opened_at.push_back(lex.cur().loc); +} + +void close_scope(Lexer& lex) { + if (!scope_level) { + throw Fatal{"cannot close the outer scope"}; + } + while (!symbol_stack.empty() && symbol_stack.back().first == scope_level) { + SymDef old_def = symbol_stack.back().second; + auto idx = old_def.sym_idx; + symbol_stack.pop_back(); + SymDef* cur_def = sym_def[idx]; + assert(cur_def); + assert(cur_def->level == scope_level && cur_def->sym_idx == idx); + //std::cerr << "restoring local symbol `" << old_def.name << "` of level " << scope_level << " to its previous level " << old_def.level << std::endl; + if (cur_def->value) { + //std::cerr << "deleting value of symbol " << old_def.name << ":" << old_def.level << " at " << (const void*) it->second.value << std::endl; + delete cur_def->value; + } + if (!old_def.level && !old_def.value) { + delete cur_def; // ??? keep the definition always? + sym_def[idx] = nullptr; + } else { + cur_def->value = std::move(old_def.value); + cur_def->level = old_def.level; + } + old_def.value = nullptr; + } + --scope_level; + scope_opened_at.pop_back(); +} + +SymDef* lookup_symbol(sym_idx_t idx, int flags) { + if (!idx) { + return nullptr; + } + if ((flags & 1) && sym_def[idx]) { + return sym_def[idx]; + } + if ((flags & 2) && global_sym_def[idx]) { + return global_sym_def[idx]; + } + return nullptr; +} + +SymDef* lookup_symbol(std::string name, int flags) { + return lookup_symbol(symbols.lookup(name), flags); +} + +SymDef* define_global_symbol(sym_idx_t name_idx, bool force_new, const SrcLocation& loc) { + if (!name_idx) { + return nullptr; + } + auto found = global_sym_def[name_idx]; + if (found) { + return force_new && found->value ? nullptr : found; + } + return global_sym_def[name_idx] = new SymDef(0, name_idx, loc); +} + +SymDef* define_symbol(sym_idx_t name_idx, bool force_new, const SrcLocation& loc) { + if (!name_idx) { + return nullptr; + } + if (!scope_level) { + return define_global_symbol(name_idx, force_new, loc); + } + auto found = sym_def[name_idx]; + if (found) { + if (found->level < scope_level) { + symbol_stack.push_back(std::make_pair(scope_level, *found)); + found->level = scope_level; + } else if (found->value && force_new) { + return nullptr; + } + found->value = 0; + found->loc = loc; + return found; + } + found = sym_def[name_idx] = new SymDef(scope_level, name_idx, loc); + symbol_stack.push_back(std::make_pair(scope_level, SymDef{0, name_idx})); + return found; +} + +} // namespace tolk diff --git a/tolk/symtable.h b/tolk/symtable.h new file mode 100644 index 000000000..c0a0912a8 --- /dev/null +++ b/tolk/symtable.h @@ -0,0 +1,175 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#pragma once +#include "srcread.h" +#include "lexer.h" +#include + +namespace tolk { + +/* + * + * SYMBOL VALUES (DECLARED) + * + */ + +typedef int var_idx_t; + +struct SymValBase { + enum { _Param, _Var, _Func, _Typename, _GlobVar, _Const }; + int type; + int idx; + SymValBase(int _type, int _idx) : type(_type), idx(_idx) { + } + virtual ~SymValBase() = default; +}; + +/* + * + * SYMBOL TABLE + * + */ + +// defined outside this module (by the end user) +int compute_symbol_subclass(std::string str); // return 0 if unneeded + +typedef int sym_idx_t; + +struct Symbol { + std::string str; + sym_idx_t idx; + int subclass; + Symbol(std::string _str, sym_idx_t _idx, int _sc) : str(_str), idx(_idx), subclass(_sc) { + } + Symbol(std::string _str, sym_idx_t _idx) : str(_str), idx(_idx) { + subclass = compute_symbol_subclass(std::move(_str)); + } + static std::string unknown_symbol_name(sym_idx_t i); +}; + +class SymTableBase { + unsigned p; + std::unique_ptr* sym_table; + sym_idx_t def_kw, def_sym; + static constexpr int max_kw_idx = 10000; + sym_idx_t keywords[max_kw_idx]; + + public: + SymTableBase(unsigned p_, std::unique_ptr* sym_table_) + : p(p_), sym_table(sym_table_), def_kw(0x100), def_sym(0) { + std::memset(keywords, 0, sizeof(keywords)); + } + static constexpr sym_idx_t not_found = 0; + SymTableBase& add_keyword(std::string str, sym_idx_t idx = 0); + SymTableBase& add_kw_char(char c) { + return add_keyword(std::string{c}, c); + } + sym_idx_t lookup(std::string str, int mode = 0) { + return gen_lookup(str, mode); + } + sym_idx_t lookup_add(std::string str) { + return gen_lookup(str, 1); + } + Symbol* operator[](sym_idx_t i) const { + return sym_table[i].get(); + } + bool is_keyword(sym_idx_t i) const { + return sym_table[i] && sym_table[i]->idx < 0; + } + std::string get_name(sym_idx_t i) const { + return sym_table[i] ? sym_table[i]->str : Symbol::unknown_symbol_name(i); + } + int get_subclass(sym_idx_t i) const { + return sym_table[i] ? sym_table[i]->subclass : 0; + } + Symbol* get_keyword(int i) const { + return ((unsigned)i < (unsigned)max_kw_idx) ? sym_table[keywords[i]].get() : nullptr; + } + + protected: + sym_idx_t gen_lookup(std::string str, int mode = 0, sym_idx_t idx = 0); +}; + +template +class SymTable : public SymTableBase { + public: + static constexpr int hprime = pp; + static int size() { + return pp + 1; + } + + private: + std::unique_ptr sym[pp + 1]; + + public: + SymTable() : SymTableBase(pp, sym) { + } + SymTable& add_keyword(std::string str, sym_idx_t idx = 0) { + SymTableBase::add_keyword(str, idx); + return *this; + } + SymTable& add_kw_char(char c) { + return add_keyword(std::string{c}, c); + } +}; + +struct SymTableOverflow { + int sym_def; + SymTableOverflow(int x) : sym_def(x) { + } +}; + +struct SymTableKwRedef { + std::string kw; + SymTableKwRedef(std::string _kw) : kw(_kw) { + } +}; + +extern SymTable<100003> symbols; + +extern int scope_level; + +struct SymDef { + int level; + sym_idx_t sym_idx; + SymValBase* value; + SrcLocation loc; + SymDef(int lvl, sym_idx_t idx, const SrcLocation& _loc = {}, SymValBase* val = 0) + : level(lvl), sym_idx(idx), value(val), loc(_loc) { + } + bool has_name() const { + return sym_idx; + } + std::string name() const { + return symbols.get_name(sym_idx); + } +}; + +extern SymDef* sym_def[symbols.hprime + 1]; +extern SymDef* global_sym_def[symbols.hprime + 1]; +extern std::vector> symbol_stack; +extern std::vector scope_opened_at; + +void open_scope(Lexer& lex); +void close_scope(Lexer& lex); +SymDef* lookup_symbol(sym_idx_t idx, int flags = 3); +SymDef* lookup_symbol(std::string name, int flags = 3); + +SymDef* define_global_symbol(sym_idx_t name_idx, bool force_new = false, const SrcLocation& loc = {}); +SymDef* define_symbol(sym_idx_t name_idx, bool force_new = false, const SrcLocation& loc = {}); + +} // namespace tolk diff --git a/tolk/tolk-main.cpp b/tolk/tolk-main.cpp new file mode 100644 index 000000000..e4b6ebdb6 --- /dev/null +++ b/tolk/tolk-main.cpp @@ -0,0 +1,122 @@ +/* + This file is part of TON Blockchain source code. + + TON Blockchain is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + TON Blockchain is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with TON Blockchain. If not, see . + + In addition, as a special exception, the copyright holders give permission + to link the code of portions of this program with the OpenSSL library. + You must obey the GNU General Public License in all respects for all + of the code used other than OpenSSL. If you modify file(s) with this + exception, you may extend this exception to your version of the file(s), + but you are not obligated to do so. If you do not wish to do so, delete this + exception statement from your version. If you delete this exception statement + from all source files in the program, then also delete it here. +*/ +#include "tolk.h" +#include +#include +#include "git.h" + +void usage(const char* progname) { + std::cerr + << "usage: " << progname + << " [-vIAPSR][-O][-i][-o][-W] { ...}\n" + "\tGenerates Fift TVM assembler code from a Tolk source\n" + "-I\tEnables interactive mode (parse stdin)\n" + "-o\tWrites generated code into specified file instead of stdout\n" + "-v\tIncreases verbosity level (extra information output into stderr)\n" + "-i\tSets indentation for the output code (in two-space units)\n" + "-A\tPrefix code with `\"Asm.fif\" include` preamble\n" + "-O\tSets optimization level (2 by default)\n" + "-P\tEnvelope code into PROGRAM{ ... }END>c\n" + "-S\tInclude stack layout comments in the output code\n" + "-R\tInclude operation rewrite comments in the output code\n" + "-W\tInclude Fift code to serialize and save generated code into specified BoC file. Enables " + "-A and -P.\n" + "\t-s\tOutput semantic version of Tolk and exit\n" + "\t-V\tShow Tolk build information\n"; + std::exit(2); +} + +int main(int argc, char* const argv[]) { + int i; + std::string output_filename; + while ((i = getopt(argc, argv, "Ahi:Io:O:PRsSvW:V")) != -1) { + switch (i) { + case 'A': + tolk::asm_preamble = true; + break; + case 'I': + tolk::interactive = true; + break; + case 'i': + tolk::indent = std::max(0, atoi(optarg)); + break; + case 'o': + output_filename = optarg; + break; + case 'O': + tolk::opt_level = std::max(0, atoi(optarg)); + break; + case 'P': + tolk::program_envelope = true; + break; + case 'R': + tolk::op_rewrite_comments = true; + break; + case 'S': + tolk::stack_layout_comments = true; + break; + case 'v': + ++tolk::verbosity; + break; + case 'W': + tolk::boc_output_filename = optarg; + tolk::asm_preamble = tolk::program_envelope = true; + break; + case 's': + std::cout << tolk::tolk_version << "\n"; + std::exit(0); + case 'V': + std::cout << "Tolk semantic version: v" << tolk::tolk_version << "\n"; + std::cout << "Build information: [ Commit: " << GitMetadata::CommitSHA1() << ", Date: " << GitMetadata::CommitDate() << "]\n"; + std::exit(0); + case 'h': + default: + usage(argv[0]); + } + } + + std::ostream *outs = &std::cout; + + std::unique_ptr fs; + if (!output_filename.empty()) { + fs = std::make_unique(output_filename, std::fstream::trunc | std::fstream::out); + if (!fs->is_open()) { + std::cerr << "failed to create output file " << output_filename << '\n'; + return 2; + } + outs = fs.get(); + } + + std::vector sources; + + while (optind < argc) { + sources.push_back(std::string(argv[optind++])); + } + + tolk::read_callback = tolk::fs_read_callback; + + return tolk::tolk_proceed(sources, *outs, std::cerr); +} diff --git a/tolk/tolk-wasm.cpp b/tolk/tolk-wasm.cpp new file mode 100644 index 000000000..a7ca37b42 --- /dev/null +++ b/tolk/tolk-wasm.cpp @@ -0,0 +1,148 @@ +/* + This file is part of TON Blockchain source code. + + TON Blockchain is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + TON Blockchain is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with TON Blockchain. If not, see . + + In addition, as a special exception, the copyright holders give permission + to link the code of portions of this program with the OpenSSL library. + You must obey the GNU General Public License in all respects for all + of the code used other than OpenSSL. If you modify file(s) with this + exception, you may extend this exception to your version of the file(s), + but you are not obligated to do so. If you do not wish to do so, delete this + exception statement from your version. If you delete this exception statement + from all source files in the program, then also delete it here. +*/ +#include "tolk.h" +#include "git.h" +#include "td/utils/JsonBuilder.h" +#include "fift/utils.h" +#include "td/utils/base64.h" +#include "td/utils/Status.h" +#include +#include + +td::Result compile_internal(char *config_json) { + TRY_RESULT(input_json, td::json_decode(td::MutableSlice(config_json))) + auto &obj = input_json.get_object(); + + TRY_RESULT(opt_level, td::get_json_object_int_field(obj, "optLevel", false)); + TRY_RESULT(sources_obj, td::get_json_object_field(obj, "sources", td::JsonValue::Type::Array, false)); + + auto &sources_arr = sources_obj.get_array(); + + std::vector sources; + + for (auto &item : sources_arr) { + sources.push_back(item.get_string().str()); + } + + tolk::opt_level = std::max(0, opt_level); + tolk::program_envelope = true; + tolk::verbosity = 0; + tolk::indent = 1; + + std::ostringstream outs, errs; + auto compile_res = tolk::tolk_proceed(sources, outs, errs); + + if (compile_res != 0) { + return td::Status::Error(std::string("Tolk compilation error: ") + errs.str()); + } + + TRY_RESULT(code_cell, fift::compile_asm(outs.str(), "/fiftlib/", false)); + TRY_RESULT(boc, vm::std_boc_serialize(code_cell)); + + td::JsonBuilder result_json; + auto result_obj = result_json.enter_object(); + result_obj("status", "ok"); + result_obj("codeBoc", td::base64_encode(boc)); + result_obj("fiftCode", outs.str()); + result_obj("codeHashHex", code_cell->get_hash().to_hex()); + result_obj.leave(); + + outs.clear(); + errs.clear(); + + return result_json.string_builder().as_cslice().str(); +} + +/// Callback used to retrieve additional source files or data. +/// +/// @param _kind The kind of callback (a string). +/// @param _data The data for the callback (a string). +/// @param o_contents A pointer to the contents of the file, if found. Allocated via malloc(). +/// @param o_error A pointer to an error message, if there is one. Allocated via malloc(). +/// +/// The callback implementor must use malloc() to allocate storage for +/// contents or error. The callback implementor must use free() to free +/// said storage after tolk_compile returns. +/// +/// If the callback is not supported, *o_contents and *o_error must be set to NULL. +typedef void (*CStyleReadFileCallback)(char const* _kind, char const* _data, char** o_contents, char** o_error); + +tolk::ReadCallback::Callback wrapReadCallback(CStyleReadFileCallback _readCallback) +{ + tolk::ReadCallback::Callback readCallback; + if (_readCallback) { + readCallback = [=](tolk::ReadCallback::Kind _kind, char const* _data) -> td::Result { + char* contents_c = nullptr; + char* error_c = nullptr; + _readCallback(tolk::ReadCallback::kindString(_kind).data(), _data, &contents_c, &error_c); + if (!contents_c && !error_c) { + return td::Status::Error("Callback not supported"); + } + if (contents_c) { + return contents_c; + } + return td::Status::Error(std::string(error_c)); + }; + } + return readCallback; +} + +extern "C" { + +const char* version() { + auto version_json = td::JsonBuilder(); + auto obj = version_json.enter_object(); + obj("tolkVersion", tolk::tolk_version); + obj("tolkFiftLibCommitHash", GitMetadata::CommitSHA1()); + obj("tolkFiftLibCommitDate", GitMetadata::CommitDate()); + obj.leave(); + return strdup(version_json.string_builder().as_cslice().c_str()); +} + +const char *tolk_compile(char *config_json, CStyleReadFileCallback callback) { + if (callback) { + tolk::read_callback = wrapReadCallback(callback); + } else { + tolk::read_callback = tolk::fs_read_callback; + } + + auto res = compile_internal(config_json); + + if (res.is_error()) { + auto result = res.move_as_error(); + auto error_res = td::JsonBuilder(); + auto error_o = error_res.enter_object(); + error_o("status", "error"); + error_o("message", result.message().str()); + error_o.leave(); + return strdup(error_res.string_builder().as_cslice().c_str()); + } + + auto res_string = res.move_as_ok(); + + return strdup(res_string.c_str()); +} +} diff --git a/tolk/tolk.cpp b/tolk/tolk.cpp new file mode 100644 index 000000000..eb15155a5 --- /dev/null +++ b/tolk/tolk.cpp @@ -0,0 +1,260 @@ +/* + This file is part of TON Blockchain source code. + + TON Blockchain is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + TON Blockchain is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with TON Blockchain. If not, see . + + In addition, as a special exception, the copyright holders give permission + to link the code of portions of this program with the OpenSSL library. + You must obey the GNU General Public License in all respects for all + of the code used other than OpenSSL. If you modify file(s) with this + exception, you may extend this exception to your version of the file(s), + but you are not obligated to do so. If you do not wish to do so, delete this + exception statement from your version. If you delete this exception statement + from all source files in the program, then also delete it here. +*/ +#include "tolk.h" +#include "srcread.h" +#include "lexer.h" +#include +#include "git.h" +#include +#include "td/utils/port/path.h" + +namespace tolk { + +int verbosity, indent, opt_level = 2; +bool stack_layout_comments, op_rewrite_comments, program_envelope, asm_preamble; +bool interactive = false; +GlobalPragma pragma_allow_post_modification{"allow-post-modification"}; +GlobalPragma pragma_compute_asm_ltr{"compute-asm-ltr"}; +std::string generated_from, boc_output_filename; +ReadCallback::Callback read_callback; + +td::Result fs_read_callback(ReadCallback::Kind kind, const char* query) { + switch (kind) { + case ReadCallback::Kind::ReadFile: { + std::ifstream ifs{query}; + if (ifs.fail()) { + auto msg = std::string{"cannot open source file `"} + query + "`"; + return td::Status::Error(msg); + } + std::stringstream ss; + ss << ifs.rdbuf(); + return ss.str(); + } + case ReadCallback::Kind::Realpath: { + return td::realpath(td::CSlice(query)); + } + default: { + return td::Status::Error("Unknown query kind"); + } + } +} + +/* + * + * OUTPUT CODE GENERATOR + * + */ + +void generate_output_func(SymDef* func_sym, std::ostream &outs, std::ostream &errs) { + SymValCodeFunc* func_val = dynamic_cast(func_sym->value); + tolk_assert(func_val); + std::string name = symbols.get_name(func_sym->sym_idx); + if (verbosity >= 2) { + errs << "\n\n=========================\nfunction " << name << " : " << func_val->get_type() << std::endl; + } + if (!func_val->code) { + errs << "( function `" << name << "` undefined )\n"; + throw ParseError(func_sym->loc, name); + } else { + CodeBlob& code = *(func_val->code); + if (verbosity >= 3) { + code.print(errs, 9); + } + code.simplify_var_types(); + if (verbosity >= 5) { + errs << "after simplify_var_types: \n"; + code.print(errs, 0); + } + code.prune_unreachable_code(); + if (verbosity >= 5) { + errs << "after prune_unreachable: \n"; + code.print(errs, 0); + } + code.split_vars(true); + if (verbosity >= 5) { + errs << "after split_vars: \n"; + code.print(errs, 0); + } + for (int i = 0; i < 8; i++) { + code.compute_used_code_vars(); + if (verbosity >= 4) { + errs << "after compute_used_vars: \n"; + code.print(errs, 6); + } + code.fwd_analyze(); + if (verbosity >= 5) { + errs << "after fwd_analyze: \n"; + code.print(errs, 6); + } + code.prune_unreachable_code(); + if (verbosity >= 5) { + errs << "after prune_unreachable: \n"; + code.print(errs, 6); + } + } + code.mark_noreturn(); + if (verbosity >= 3) { + code.print(errs, 15); + } + if (verbosity >= 2) { + errs << "\n---------- resulting code for " << name << " -------------\n"; + } + bool inline_func = (func_val->flags & 1); + bool inline_ref = (func_val->flags & 2); + const char* modifier = ""; + if (inline_func) { + modifier = "INLINE"; + } else if (inline_ref) { + modifier = "REF"; + } + outs << std::string(indent * 2, ' ') << name << " PROC" << modifier << ":<{\n"; + int mode = 0; + if (stack_layout_comments) { + mode |= Stack::_StkCmt | Stack::_CptStkCmt; + } + if (opt_level < 2) { + mode |= Stack::_DisableOpt; + } + auto fv = dynamic_cast(func_sym->value); + // Flags: 1 - inline, 2 - inline_ref + if (fv && (fv->flags & 1) && code.ops->noreturn()) { + mode |= Stack::_InlineFunc; + } + if (fv && (fv->flags & 3)) { + mode |= Stack::_InlineAny; + } + code.generate_code(outs, mode, indent + 1); + outs << std::string(indent * 2, ' ') << "}>\n"; + if (verbosity >= 2) { + errs << "--------------\n"; + } + } +} + +int generate_output(std::ostream &outs, std::ostream &errs) { + if (asm_preamble) { + outs << "\"Asm.fif\" include\n"; + } + outs << "// automatically generated from " << generated_from << std::endl; + if (program_envelope) { + outs << "PROGRAM{\n"; + } + for (SymDef* func_sym : glob_func) { + SymValCodeFunc* func_val = dynamic_cast(func_sym->value); + tolk_assert(func_val); + std::string name = symbols.get_name(func_sym->sym_idx); + outs << std::string(indent * 2, ' '); + if (func_val->method_id.is_null()) { + outs << "DECLPROC " << name << "\n"; + } else { + outs << func_val->method_id << " DECLMETHOD " << name << "\n"; + } + } + for (SymDef* gvar_sym : glob_vars) { + tolk_assert(dynamic_cast(gvar_sym->value)); + std::string name = symbols.get_name(gvar_sym->sym_idx); + outs << std::string(indent * 2, ' ') << "DECLGLOBVAR " << name << "\n"; + } + int errors = 0; + for (SymDef* func_sym : glob_func) { + try { + generate_output_func(func_sym, outs, errs); + } catch (Error& err) { + errs << "cannot generate code for function `" << symbols.get_name(func_sym->sym_idx) << "`:\n" + << err << std::endl; + ++errors; + } + } + if (program_envelope) { + outs << "}END>c\n"; + } + if (!boc_output_filename.empty()) { + outs << "2 boc+>B \"" << boc_output_filename << "\" B>file\n"; + } + return errors; +} + +void output_inclusion_stack(std::ostream &errs) { + while (!inclusion_locations.empty()) { + SrcLocation loc = inclusion_locations.top(); + inclusion_locations.pop(); + if (loc.fdescr) { + errs << "note: included from "; + loc.show(errs); + errs << std::endl; + } + } +} + + +int tolk_proceed(const std::vector &sources, std::ostream &outs, std::ostream &errs) { + if (program_envelope && !indent) { + indent = 1; + } + + define_keywords(); + define_builtins(); + + int ok = 0, proc = 0; + try { + for (auto src : sources) { + ok += parse_source_file(src.c_str(), {}, true); + proc++; + } + if (interactive) { + generated_from += "stdin "; + ok += parse_source_stdin(); + proc++; + } + if (ok < proc) { + throw Fatal{"output code generation omitted because of errors"}; + } + if (!proc) { + throw Fatal{"no source files, no output"}; + } + pragma_allow_post_modification.check_enable_in_libs(); + pragma_compute_asm_ltr.check_enable_in_libs(); + return generate_output(outs, errs); + } catch (Fatal& fatal) { + errs << "fatal: " << fatal << std::endl; + output_inclusion_stack(errs); + return 2; + } catch (Error& error) { + errs << error << std::endl; + output_inclusion_stack(errs); + return 2; + } catch (UnifyError& unif_err) { + errs << "fatal: "; + unif_err.print_message(errs); + errs << std::endl; + output_inclusion_stack(errs); + return 2; + } + + return 0; +} + +} // namespace tolk diff --git a/tolk/tolk.h b/tolk/tolk.h new file mode 100644 index 000000000..664410992 --- /dev/null +++ b/tolk/tolk.h @@ -0,0 +1,1785 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include "common/refcnt.hpp" +#include "common/bigint.hpp" +#include "common/refint.h" +#include "srcread.h" +#include "lexer.h" +#include "symtable.h" +#include "td/utils/Status.h" + +#define tolk_assert(expr) \ + (bool(expr) ? void(0) \ + : throw Fatal(PSTRING() << "Assertion failed at " << __FILE__ << ":" << __LINE__ << ": " << #expr)) + +namespace tolk { + +extern int verbosity; +extern bool op_rewrite_comments; +extern std::string generated_from; + +constexpr int optimize_depth = 20; + +const std::string tolk_version{"0.4.5"}; + +enum Keyword { + _Eof = -1, + _Ident = 0, + _Number, + _Special, + _String, + _Return = 0x80, + _Var, + _Repeat, + _Do, + _While, + _Until, + _Try, + _Catch, + _If, + _Ifnot, + _Then, + _Else, + _Elseif, + _Elseifnot, + _Eq, + _Neq, + _Leq, + _Geq, + _Spaceship, + _Lshift, + _Rshift, + _RshiftR, + _RshiftC, + _DivR, + _DivC, + _ModR, + _ModC, + _DivMod, + _PlusLet, + _MinusLet, + _TimesLet, + _DivLet, + _DivRLet, + _DivCLet, + _ModLet, + _ModRLet, + _ModCLet, + _LshiftLet, + _RshiftLet, + _RshiftRLet, + _RshiftCLet, + _AndLet, + _OrLet, + _XorLet, + _Int, + _Cell, + _Slice, + _Builder, + _Cont, + _Tuple, + _Type, + _Mapsto, + _Forall, + _Asm, + _Impure, + _Global, + _Extern, + _Inline, + _InlineRef, + _AutoApply, + _MethodId, + _Operator, + _Infix, + _Infixl, + _Infixr, + _Const, + _PragmaHashtag, + _IncludeHashtag +}; + +void define_keywords(); + +class IdSc { + int cls; + + public: + enum { undef = 0, dotid = 1, tildeid = 2 }; + IdSc(int _cls = undef) : cls(_cls) { + } + operator int() { + return cls; + } +}; + +// symbol subclass: +// 1 = begins with . (a const method) +// 2 = begins with ~ (a non-const method) +// 0 = else + +/* + * + * TYPE EXPRESSIONS + * + */ + +struct TypeExpr { + enum te_type { te_Unknown, te_Var, te_Indirect, te_Atomic, te_Tensor, te_Tuple, te_Map, te_Type, te_ForAll } constr; + enum { + _Int = Keyword::_Int, + _Cell = Keyword::_Cell, + _Slice = Keyword::_Slice, + _Builder = Keyword::_Builder, + _Cont = Keyword::_Cont, + _Tuple = Keyword::_Tuple, + _Type = Keyword::_Type + }; + int value; + int minw, maxw; + static constexpr int w_inf = 1023; + std::vector args; + bool was_forall_var = false; + TypeExpr(te_type _constr, int _val = 0) : constr(_constr), value(_val), minw(0), maxw(w_inf) { + } + TypeExpr(te_type _constr, int _val, int width) : constr(_constr), value(_val), minw(width), maxw(width) { + } + TypeExpr(te_type _constr, std::vector list) + : constr(_constr), value((int)list.size()), args(std::move(list)) { + compute_width(); + } + TypeExpr(te_type _constr, std::initializer_list list) + : constr(_constr), value((int)list.size()), args(std::move(list)) { + compute_width(); + } + TypeExpr(te_type _constr, TypeExpr* elem0) : constr(_constr), value(1), args{elem0} { + compute_width(); + } + TypeExpr(te_type _constr, TypeExpr* elem0, std::vector list) + : constr(_constr), value((int)list.size() + 1), args{elem0} { + args.insert(args.end(), list.begin(), list.end()); + compute_width(); + } + TypeExpr(te_type _constr, TypeExpr* elem0, std::initializer_list list) + : constr(_constr), value((int)list.size() + 1), args{elem0} { + args.insert(args.end(), list.begin(), list.end()); + compute_width(); + } + bool is_atomic() const { + return constr == te_Atomic; + } + bool is_atomic(int v) const { + return constr == te_Atomic && value == v; + } + bool is_int() const { + return is_atomic(_Int); + } + bool is_var() const { + return constr == te_Var; + } + bool is_map() const { + return constr == te_Map; + } + bool is_tuple() const { + return constr == te_Tuple; + } + bool has_fixed_width() const { + return minw == maxw; + } + int get_width() const { + return has_fixed_width() ? minw : -1; + } + void compute_width(); + bool recompute_width(); + void show_width(std::ostream& os); + std::ostream& print(std::ostream& os, int prio = 0); + void replace_with(TypeExpr* te2); + int extract_components(std::vector& comp_list); + static int holes, type_vars; + static TypeExpr* new_hole() { + return new TypeExpr{te_Unknown, ++holes}; + } + static TypeExpr* new_hole(int width) { + return new TypeExpr{te_Unknown, ++holes, width}; + } + static TypeExpr* new_unit() { + return new TypeExpr{te_Tensor, 0, 0}; + } + static TypeExpr* new_atomic(int value) { + return new TypeExpr{te_Atomic, value, 1}; + } + static TypeExpr* new_map(TypeExpr* from, TypeExpr* to); + static TypeExpr* new_func() { + return new_map(new_hole(), new_hole()); + } + static TypeExpr* new_tensor(std::vector list, bool red = true) { + return red && list.size() == 1 ? list[0] : new TypeExpr{te_Tensor, std::move(list)}; + } + static TypeExpr* new_tensor(std::initializer_list list) { + return new TypeExpr{te_Tensor, std::move(list)}; + } + static TypeExpr* new_tensor(TypeExpr* te1, TypeExpr* te2) { + return new_tensor({te1, te2}); + } + static TypeExpr* new_tensor(TypeExpr* te1, TypeExpr* te2, TypeExpr* te3) { + return new_tensor({te1, te2, te3}); + } + static TypeExpr* new_tuple(TypeExpr* arg0) { + return new TypeExpr{te_Tuple, arg0}; + } + static TypeExpr* new_tuple(std::vector list, bool red = false) { + return new_tuple(new_tensor(std::move(list), red)); + } + static TypeExpr* new_tuple(std::initializer_list list) { + return new_tuple(new_tensor(std::move(list))); + } + static TypeExpr* new_var() { + return new TypeExpr{te_Var, --type_vars, 1}; + } + static TypeExpr* new_var(int idx) { + return new TypeExpr{te_Var, idx, 1}; + } + static TypeExpr* new_forall(std::vector list, TypeExpr* body) { + return new TypeExpr{te_ForAll, body, std::move(list)}; + } + static TypeExpr* new_forall(std::initializer_list list, TypeExpr* body) { + return new TypeExpr{te_ForAll, body, std::move(list)}; + } + static bool remove_indirect(TypeExpr*& te, TypeExpr* forbidden = nullptr); + static std::vector remove_forall(TypeExpr*& te); + static bool remove_forall_in(TypeExpr*& te, TypeExpr* te2, const std::vector& new_vars); +}; + +std::ostream& operator<<(std::ostream& os, TypeExpr* type_expr); + +struct UnifyError { + TypeExpr* te1; + TypeExpr* te2; + std::string msg; + UnifyError(TypeExpr* _te1, TypeExpr* _te2, std::string _msg = "") : te1(_te1), te2(_te2), msg(_msg) { + } + void print_message(std::ostream& os) const; + std::string message() const; +}; + +std::ostream& operator<<(std::ostream& os, const UnifyError& ue); + +void unify(TypeExpr*& te1, TypeExpr*& te2); + +// extern int TypeExpr::holes; + +/* + * + * ABSTRACT CODE + * + */ + +using const_idx_t = int; + +struct TmpVar { + TypeExpr* v_type; + var_idx_t idx; + enum { _In = 1, _Named = 2, _Tmp = 4, _UniqueName = 0x20 }; + int cls; + sym_idx_t name; + int coord; + std::unique_ptr where; + std::vector> on_modification; + bool undefined = false; + TmpVar(var_idx_t _idx, int _cls, TypeExpr* _type = 0, SymDef* sym = 0, const SrcLocation* loc = 0); + void show(std::ostream& os, int omit_idx = 0) const; + void dump(std::ostream& os) const; + void set_location(const SrcLocation& loc); + std::string to_string() const { + std::ostringstream s; + show(s, 2); + return s.str(); + } +}; + +struct VarDescr { + var_idx_t idx; + enum { _Last = 1, _Unused = 2 }; + int flags; + enum { + _Const = 16, + _Int = 32, + _Zero = 64, + _NonZero = 128, + _Pos = 256, + _Neg = 512, + _Bool = 1024, + _Bit = 2048, + _Finite = 4096, + _Nan = 8192, + _Even = 16384, + _Odd = 32768, + _Null = (1 << 16), + _NotNull = (1 << 17) + }; + static constexpr int ConstZero = _Int | _Zero | _Pos | _Neg | _Bool | _Bit | _Finite | _Even | _NotNull; + static constexpr int ConstOne = _Int | _NonZero | _Pos | _Bit | _Finite | _Odd | _NotNull; + static constexpr int ConstTrue = _Int | _NonZero | _Neg | _Bool | _Finite | _Odd | _NotNull; + static constexpr int ValBit = ConstZero & ConstOne; + static constexpr int ValBool = ConstZero & ConstTrue; + static constexpr int FiniteInt = _Int | _Finite | _NotNull; + static constexpr int FiniteUInt = FiniteInt | _Pos; + int val; + td::RefInt256 int_const; + std::string str_const; + + VarDescr(var_idx_t _idx = -1, int _flags = 0, int _val = 0) : idx(_idx), flags(_flags), val(_val) { + } + bool operator<(var_idx_t other_idx) const { + return idx < other_idx; + } + bool is_unused() const { + return flags & _Unused; + } + bool is_last() const { + return flags & _Last; + } + bool always_true() const { + return val & _NonZero; + } + bool always_false() const { + return val & _Zero; + } + bool always_nonzero() const { + return val & _NonZero; + } + bool always_zero() const { + return val & _Zero; + } + bool always_even() const { + return val & _Even; + } + bool always_odd() const { + return val & _Odd; + } + bool always_null() const { + return val & _Null; + } + bool always_not_null() const { + return val & _NotNull; + } + bool is_const() const { + return val & _Const; + } + bool is_int_const() const { + return (val & (_Int | _Const)) == (_Int | _Const) && int_const.not_null(); + } + bool always_nonpos() const { + return val & _Neg; + } + bool always_nonneg() const { + return val & _Pos; + } + bool always_pos() const { + return (val & (_Pos | _NonZero)) == (_Pos | _NonZero); + } + bool always_neg() const { + return (val & (_Neg | _NonZero)) == (_Neg | _NonZero); + } + bool always_finite() const { + return val & _Finite; + } + bool always_less(const VarDescr& other) const; + bool always_leq(const VarDescr& other) const; + bool always_greater(const VarDescr& other) const; + bool always_geq(const VarDescr& other) const; + bool always_equal(const VarDescr& other) const; + bool always_neq(const VarDescr& other) const; + void unused() { + flags |= _Unused; + } + void clear_unused() { + flags &= ~_Unused; + } + void set_const(long long value); + void set_const(td::RefInt256 value); + void set_const(std::string value); + void set_const_nan(); + void operator+=(const VarDescr& y) { + flags &= y.flags; + } + void operator|=(const VarDescr& y); + void operator&=(const VarDescr& y); + void set_value(const VarDescr& y); + void set_value(VarDescr&& y); + void set_value(const VarDescr* y) { + if (y) { + set_value(*y); + } + } + void clear_value(); + void show_value(std::ostream& os) const; + void show(std::ostream& os, const char* var_name = nullptr) const; +}; + +inline std::ostream& operator<<(std::ostream& os, const VarDescr& vd) { + vd.show(os); + return os; +} + +struct VarDescrList { + std::vector list; + bool unreachable{false}; + VarDescrList() : list() { + } + VarDescrList(const std::vector& _list) : list(_list) { + } + VarDescrList(std::vector&& _list) : list(std::move(_list)) { + } + std::size_t size() const { + return list.size(); + } + VarDescr* operator[](var_idx_t idx); + const VarDescr* operator[](var_idx_t idx) const; + VarDescrList operator+(const VarDescrList& y) const; + VarDescrList& operator+=(const VarDescrList& y); + VarDescrList& clear_last(); + VarDescrList& operator+=(var_idx_t idx) { + return add_var(idx); + } + VarDescrList& operator+=(const std::vector& idx_list) { + return add_vars(idx_list); + } + VarDescrList& add_var(var_idx_t idx, bool unused = false); + VarDescrList& add_vars(const std::vector& idx_list, bool unused = false); + VarDescrList& operator-=(const std::vector& idx_list); + VarDescrList& operator-=(var_idx_t idx); + std::size_t count(const std::vector idx_list) const; + std::size_t count_used(const std::vector idx_list) const; + VarDescr& add(var_idx_t idx); + VarDescr& add_newval(var_idx_t idx); + VarDescrList& operator&=(const VarDescrList& values); + VarDescrList& import_values(const VarDescrList& values); + VarDescrList operator|(const VarDescrList& y) const; + VarDescrList& operator|=(const VarDescrList& values); + void show(std::ostream& os) const; + void set_unreachable() { + list.clear(); + unreachable = true; + } +}; + +inline std::ostream& operator<<(std::ostream& os, const VarDescrList& values) { + values.show(os); + return os; +} + +struct CodeBlob; + +template +class ListIterator { + T* ptr; + + public: + ListIterator() : ptr(nullptr) { + } + ListIterator(T* _ptr) : ptr(_ptr) { + } + ListIterator& operator++() { + ptr = ptr->next.get(); + return *this; + } + ListIterator operator++(int) { + T* z = ptr; + ptr = ptr->next.get(); + return ListIterator{z}; + } + T& operator*() const { + return *ptr; + } + T* operator->() const { + return ptr; + } + bool operator==(const ListIterator& y) const { + return ptr == y.ptr; + } + bool operator!=(const ListIterator& y) const { + return ptr != y.ptr; + } +}; + +struct Stack; + +struct Op { + enum { + _Undef, + _Nop, + _Call, + _CallInd, + _Let, + _IntConst, + _GlobVar, + _SetGlob, + _Import, + _Return, + _Tuple, + _UnTuple, + _If, + _While, + _Until, + _Repeat, + _Again, + _TryCatch, + _SliceConst + }; + int cl; + enum { _Disabled = 1, _Reachable = 2, _NoReturn = 4, _ImpureR = 8, _ImpureW = 16, _Impure = 24 }; + int flags; + std::unique_ptr next; + SymDef* fun_ref; + SrcLocation where; + VarDescrList var_info; + std::vector args; + std::vector left, right; + std::unique_ptr block0, block1; + td::RefInt256 int_const; + std::string str_const; + Op(const SrcLocation& _where = {}, int _cl = _Undef) : cl(_cl), flags(0), fun_ref(nullptr), where(_where) { + } + Op(const SrcLocation& _where, int _cl, const std::vector& _left) + : cl(_cl), flags(0), fun_ref(nullptr), where(_where), left(_left) { + } + Op(const SrcLocation& _where, int _cl, std::vector&& _left) + : cl(_cl), flags(0), fun_ref(nullptr), where(_where), left(std::move(_left)) { + } + Op(const SrcLocation& _where, int _cl, const std::vector& _left, td::RefInt256 _const) + : cl(_cl), flags(0), fun_ref(nullptr), where(_where), left(_left), int_const(_const) { + } + Op(const SrcLocation& _where, int _cl, const std::vector& _left, std::string _const) + : cl(_cl), flags(0), fun_ref(nullptr), where(_where), left(_left), str_const(_const) { + } + Op(const SrcLocation& _where, int _cl, const std::vector& _left, const std::vector& _right, + SymDef* _fun = nullptr) + : cl(_cl), flags(0), fun_ref(_fun), where(_where), left(_left), right(_right) { + } + Op(const SrcLocation& _where, int _cl, std::vector&& _left, std::vector&& _right, + SymDef* _fun = nullptr) + : cl(_cl), flags(0), fun_ref(_fun), where(_where), left(std::move(_left)), right(std::move(_right)) { + } + bool disabled() const { + return flags & _Disabled; + } + bool enabled() const { + return !disabled(); + } + void disable() { + flags |= _Disabled; + } + bool unreachable() { + return !(flags & _Reachable); + } + void flags_set_clear(int set, int clear); + void show(std::ostream& os, const std::vector& vars, std::string pfx = "", int mode = 0) const; + void show_var_list(std::ostream& os, const std::vector& idx_list, const std::vector& vars) const; + void show_var_list(std::ostream& os, const std::vector& list, const std::vector& vars) const; + static void show_block(std::ostream& os, const Op* block, const std::vector& vars, std::string pfx = "", + int mode = 0); + void split_vars(const std::vector& vars); + static void split_var_list(std::vector& var_list, const std::vector& vars); + bool compute_used_vars(const CodeBlob& code, bool edit); + bool std_compute_used_vars(bool disabled = false); + bool set_var_info(const VarDescrList& new_var_info); + bool set_var_info(VarDescrList&& new_var_info); + bool set_var_info_except(const VarDescrList& new_var_info, const std::vector& var_list); + bool set_var_info_except(VarDescrList&& new_var_info, const std::vector& var_list); + void prepare_args(VarDescrList values); + VarDescrList fwd_analyze(VarDescrList values); + bool set_noreturn(bool nr); + bool mark_noreturn(); + bool noreturn() const { + return flags & _NoReturn; + } + bool is_empty() const { + return cl == _Nop && !next; + } + bool is_pure() const { + return !(flags & _Impure); + } + bool generate_code_step(Stack& stack); + void generate_code_all(Stack& stack); + Op& last() { + return next ? next->last() : *this; + } + const Op& last() const { + return next ? next->last() : *this; + } + ListIterator begin() { + return ListIterator{this}; + } + ListIterator end() const { + return ListIterator{}; + } + ListIterator cbegin() { + return ListIterator{this}; + } + ListIterator cend() const { + return ListIterator{}; + } +}; + +inline ListIterator begin(const std::unique_ptr& op_list) { + return ListIterator{op_list.get()}; +} + +inline ListIterator end(const std::unique_ptr& op_list) { + return ListIterator{}; +} + +inline ListIterator cbegin(const Op* op_list) { + return ListIterator{op_list}; +} + +inline ListIterator cend(const Op* op_list) { + return ListIterator{}; +} + +inline ListIterator begin(const Op* op_list) { + return ListIterator{op_list}; +} + +inline ListIterator end(const Op* op_list) { + return ListIterator{}; +} + +inline ListIterator begin(Op* op_list) { + return ListIterator{op_list}; +} + +inline ListIterator end(Op* op_list) { + return ListIterator{}; +} + +typedef std::tuple FormalArg; +typedef std::vector FormalArgList; + +struct AsmOpList; + +struct CodeBlob { + enum { _AllowPostModification = 1, _ComputeAsmLtr = 2 }; + int var_cnt, in_var_cnt, op_cnt; + TypeExpr* ret_type; + std::string name; + SrcLocation loc; + std::vector vars; + std::unique_ptr ops; + std::unique_ptr* cur_ops; + std::stack*> cur_ops_stack; + int flags = 0; + bool require_callxargs = false; + CodeBlob(TypeExpr* ret = nullptr) : var_cnt(0), in_var_cnt(0), op_cnt(0), ret_type(ret), cur_ops(&ops) { + } + template + Op& emplace_back(const Args&... args) { + Op& res = *(*cur_ops = std::make_unique(args...)); + cur_ops = &(res.next); + return res; + } + bool import_params(FormalArgList arg_list); + var_idx_t create_var(int cls, TypeExpr* var_type = 0, SymDef* sym = 0, const SrcLocation* loc = 0); + var_idx_t create_tmp_var(TypeExpr* var_type = 0, const SrcLocation* loc = 0) { + return create_var(TmpVar::_Tmp, var_type, nullptr, loc); + } + int split_vars(bool strict = false); + bool compute_used_code_vars(); + bool compute_used_code_vars(std::unique_ptr& ops, const VarDescrList& var_info, bool edit) const; + void print(std::ostream& os, int flags = 0) const; + void push_set_cur(std::unique_ptr& new_cur_ops) { + cur_ops_stack.push(cur_ops); + cur_ops = &new_cur_ops; + } + void close_blk(const SrcLocation& location) { + *cur_ops = std::make_unique(location, Op::_Nop); + } + void pop_cur() { + cur_ops = cur_ops_stack.top(); + cur_ops_stack.pop(); + } + void close_pop_cur(const SrcLocation& location) { + close_blk(location); + pop_cur(); + } + void simplify_var_types(); + void flags_set_clear(int set, int clear); + void prune_unreachable_code(); + void fwd_analyze(); + void mark_noreturn(); + void generate_code(AsmOpList& out_list, int mode = 0); + void generate_code(std::ostream& os, int mode = 0, int indent = 0); + + void on_var_modification(var_idx_t idx, const SrcLocation& here) const { + for (auto& f : vars.at(idx).on_modification) { + f(here); + } + } +}; + +/* + * + * SYMBOL VALUES + * + */ + +struct SymVal : SymValBase { + TypeExpr* sym_type; + td::RefInt256 method_id; + bool impure; + bool auto_apply{false}; + short flags; // +1 = inline, +2 = inline_ref + SymVal(int _type, int _idx, TypeExpr* _stype = nullptr, bool _impure = false) + : SymValBase(_type, _idx), sym_type(_stype), impure(_impure), flags(0) { + } + ~SymVal() override = default; + TypeExpr* get_type() const { + return sym_type; + } + virtual const std::vector* get_arg_order() const { + return nullptr; + } + virtual const std::vector* get_ret_order() const { + return nullptr; + } +}; + +struct SymValFunc : SymVal { + std::vector arg_order, ret_order; + ~SymValFunc() override = default; + SymValFunc(int val, TypeExpr* _ft, bool _impure = false) : SymVal(_Func, val, _ft, _impure) { + } + SymValFunc(int val, TypeExpr* _ft, std::initializer_list _arg_order, std::initializer_list _ret_order = {}, + bool _impure = false) + : SymVal(_Func, val, _ft, _impure), arg_order(_arg_order), ret_order(_ret_order) { + } + + const std::vector* get_arg_order() const override { + return arg_order.empty() ? nullptr : &arg_order; + } + const std::vector* get_ret_order() const override { + return ret_order.empty() ? nullptr : &ret_order; + } +}; + +struct SymValCodeFunc : SymValFunc { + CodeBlob* code; + ~SymValCodeFunc() override = default; + SymValCodeFunc(int val, TypeExpr* _ft, bool _impure = false) : SymValFunc(val, _ft, _impure), code(nullptr) { + } +}; + +struct SymValType : SymValBase { + TypeExpr* sym_type; + SymValType(int _type, int _idx, TypeExpr* _stype = nullptr) : SymValBase(_type, _idx), sym_type(_stype) { + } + ~SymValType() override = default; + TypeExpr* get_type() const { + return sym_type; + } +}; + +struct SymValGlobVar : SymValBase { + TypeExpr* sym_type; + int out_idx{0}; + SymValGlobVar(int val, TypeExpr* gvtype, int oidx = 0) + : SymValBase(_GlobVar, val), sym_type(gvtype), out_idx(oidx) { + } + ~SymValGlobVar() override = default; + TypeExpr* get_type() const { + return sym_type; + } +}; + +struct SymValConst : SymValBase { + td::RefInt256 intval; + std::string strval; + Keyword type; + SymValConst(int idx, td::RefInt256 value) + : SymValBase(_Const, idx), intval(value) { + type = _Int; + } + SymValConst(int idx, std::string value) + : SymValBase(_Const, idx), strval(value) { + type = _Slice; + } + ~SymValConst() override = default; + td::RefInt256 get_int_value() const { + return intval; + } + std::string get_str_value() const { + return strval; + } + Keyword get_type() const { + return type; + } +}; + +extern int glob_func_cnt, undef_func_cnt, glob_var_cnt; +extern std::vector glob_func, glob_vars; +extern std::set prohibited_var_names; + +/* + * + * PARSE SOURCE + * + */ + +class ReadCallback { +public: + /// Noncopyable. + ReadCallback(ReadCallback const&) = delete; + ReadCallback& operator=(ReadCallback const&) = delete; + + enum class Kind + { + ReadFile, + Realpath + }; + + static std::string kindString(Kind _kind) + { + switch (_kind) + { + case Kind::ReadFile: + return "source"; + case Kind::Realpath: + return "realpath"; + default: + throw ""; // todo ? + } + } + + /// File reading or generic query callback. + using Callback = std::function(ReadCallback::Kind, const char*)>; +}; + +// defined in parse-tolk.cpp +bool parse_source(std::istream* is, const FileDescr* fdescr); +bool parse_source_file(const char* filename, Lexem lex = {}, bool is_main = false); +bool parse_source_stdin(); + +extern std::stack inclusion_locations; + +/* + * + * EXPRESSIONS + * + */ + +struct Expr { + enum { + _None, + _Apply, + _VarApply, + _TypeApply, + _MkTuple, + _Tensor, + _Const, + _Var, + _Glob, + _GlobVar, + _Letop, + _LetFirst, + _Hole, + _Type, + _CondExpr, + _SliceConst + }; + int cls; + int val{0}; + enum { _IsType = 1, _IsRvalue = 2, _IsLvalue = 4, _IsHole = 8, _IsNewVar = 16, _IsImpure = 32 }; + int flags{0}; + SrcLocation here; + td::RefInt256 intval; + std::string strval; + SymDef* sym{nullptr}; + TypeExpr* e_type{nullptr}; + std::vector args; + Expr(int c = _None) : cls(c) { + } + Expr(int c, const SrcLocation& loc) : cls(c), here(loc) { + } + Expr(int c, std::vector _args) : cls(c), args(std::move(_args)) { + } + Expr(int c, std::initializer_list _arglist) : cls(c), args(std::move(_arglist)) { + } + Expr(int c, SymDef* _sym, std::initializer_list _arglist) : cls(c), sym(_sym), args(std::move(_arglist)) { + } + Expr(int c, SymDef* _sym, std::vector _arglist) : cls(c), sym(_sym), args(std::move(_arglist)) { + } + Expr(int c, sym_idx_t name_idx, std::initializer_list _arglist); + ~Expr() { + for (auto& arg_ptr : args) { + delete arg_ptr; + } + } + Expr* copy() const; + void pb_arg(Expr* expr) { + args.push_back(expr); + } + void set_val(int _val) { + val = _val; + } + bool is_rvalue() const { + return flags & _IsRvalue; + } + bool is_lvalue() const { + return flags & _IsLvalue; + } + bool is_type() const { + return flags & _IsType; + } + bool is_type_apply() const { + return cls == _TypeApply; + } + bool is_mktuple() const { + return cls == _MkTuple; + } + void chk_rvalue(const Lexem& lem) const; + void chk_lvalue(const Lexem& lem) const; + void chk_type(const Lexem& lem) const; + bool deduce_type(const Lexem& lem); + void set_location(const SrcLocation& loc) { + here = loc; + } + const SrcLocation& get_location() const { + return here; + } + int define_new_vars(CodeBlob& code); + int predefine_vars(); + std::vector pre_compile(CodeBlob& code, std::vector>* lval_globs = nullptr) const; + static std::vector pre_compile_let(CodeBlob& code, Expr* lhs, Expr* rhs, const SrcLocation& here); + var_idx_t new_tmp(CodeBlob& code) const; + std::vector new_tmp_vect(CodeBlob& code) const { + return {new_tmp(code)}; + } +}; + +/* + * + * GENERATE CODE + * + */ + +typedef std::vector StackLayout; +typedef std::pair var_const_idx_t; +typedef std::vector StackLayoutExt; +constexpr const_idx_t not_const = -1; +using Const = td::RefInt256; + +struct AsmOp { + enum Type { a_none, a_xchg, a_push, a_pop, a_const, a_custom, a_magic }; + int t{a_none}; + int indent{0}; + int a, b, c; + bool gconst{false}; + std::string op; + td::RefInt256 origin; + struct SReg { + int idx; + SReg(int _idx) : idx(_idx) { + } + }; + AsmOp() = default; + AsmOp(int _t) : t(_t) { + } + AsmOp(int _t, std::string _op) : t(_t), op(std::move(_op)) { + } + AsmOp(int _t, int _a) : t(_t), a(_a) { + } + AsmOp(int _t, int _a, std::string _op) : t(_t), a(_a), op(std::move(_op)) { + } + AsmOp(int _t, int _a, int _b) : t(_t), a(_a), b(_b) { + } + AsmOp(int _t, int _a, int _b, std::string _op) : t(_t), a(_a), b(_b), op(std::move(_op)) { + compute_gconst(); + } + AsmOp(int _t, int _a, int _b, std::string _op, td::RefInt256 x) : t(_t), a(_a), b(_b), op(std::move(_op)), origin(x) { + compute_gconst(); + } + AsmOp(int _t, int _a, int _b, int _c) : t(_t), a(_a), b(_b), c(_c) { + } + AsmOp(int _t, int _a, int _b, int _c, std::string _op) : t(_t), a(_a), b(_b), c(_c), op(std::move(_op)) { + } + void out(std::ostream& os) const; + void out_indent_nl(std::ostream& os, bool no_nl = false) const; + std::string to_string() const; + void compute_gconst() { + gconst = (is_custom() && (op == "PUSHNULL" || op == "NEWC" || op == "NEWB" || op == "TRUE" || op == "FALSE" || op == "NOW")); + } + bool is_nop() const { + return t == a_none && op.empty(); + } + bool is_comment() const { + return t == a_none && !op.empty(); + } + bool is_custom() const { + return t == a_custom; + } + bool is_very_custom() const { + return is_custom() && a >= 255; + } + bool is_push() const { + return t == a_push; + } + bool is_push(int x) const { + return is_push() && a == x; + } + bool is_push(int* x) const { + *x = a; + return is_push(); + } + bool is_pop() const { + return t == a_pop; + } + bool is_pop(int x) const { + return is_pop() && a == x; + } + bool is_xchg() const { + return t == a_xchg; + } + bool is_xchg(int x, int y) const { + return is_xchg() && b == y && a == x; + } + bool is_xchg(int* x, int* y) const { + *x = a; + *y = b; + return is_xchg(); + } + bool is_xchg_short() const { + return is_xchg() && (a <= 1 || b <= 1); + } + bool is_swap() const { + return is_xchg(0, 1); + } + bool is_const() const { + return t == a_const && !a && b == 1; + } + bool is_gconst() const { + return !a && b == 1 && (t == a_const || gconst); + } + static AsmOp Nop() { + return AsmOp(a_none); + } + static AsmOp Xchg(int a, int b = 0) { + return a == b ? AsmOp(a_none) : (a < b ? AsmOp(a_xchg, a, b) : AsmOp(a_xchg, b, a)); + } + static AsmOp Push(int a) { + return AsmOp(a_push, a); + } + static AsmOp Pop(int a = 0) { + return AsmOp(a_pop, a); + } + static AsmOp Xchg2(int a, int b) { + return make_stk2(a, b, "XCHG2", 0); + } + static AsmOp XcPu(int a, int b) { + return make_stk2(a, b, "XCPU", 1); + } + static AsmOp PuXc(int a, int b) { + return make_stk2(a, b, "PUXC", 1); + } + static AsmOp Push2(int a, int b) { + return make_stk2(a, b, "PUSH2", 2); + } + static AsmOp Xchg3(int a, int b, int c) { + return make_stk3(a, b, c, "XCHG3", 0); + } + static AsmOp Xc2Pu(int a, int b, int c) { + return make_stk3(a, b, c, "XC2PU", 1); + } + static AsmOp XcPuXc(int a, int b, int c) { + return make_stk3(a, b, c, "XCPUXC", 1); + } + static AsmOp XcPu2(int a, int b, int c) { + return make_stk3(a, b, c, "XCPU2", 3); + } + static AsmOp PuXc2(int a, int b, int c) { + return make_stk3(a, b, c, "PUXC2", 3); + } + static AsmOp PuXcPu(int a, int b, int c) { + return make_stk3(a, b, c, "PUXCPU", 3); + } + static AsmOp Pu2Xc(int a, int b, int c) { + return make_stk3(a, b, c, "PU2XC", 3); + } + static AsmOp Push3(int a, int b, int c) { + return make_stk3(a, b, c, "PUSH3", 3); + } + static AsmOp BlkSwap(int a, int b); + static AsmOp BlkPush(int a, int b); + static AsmOp BlkDrop(int a); + static AsmOp BlkDrop2(int a, int b); + static AsmOp BlkReverse(int a, int b); + static AsmOp make_stk2(int a, int b, const char* str, int delta); + static AsmOp make_stk3(int a, int b, int c, const char* str, int delta); + static AsmOp IntConst(td::RefInt256 value); + static AsmOp BoolConst(bool f); + static AsmOp Const(std::string push_op, td::RefInt256 origin = {}) { + return AsmOp(a_const, 0, 1, std::move(push_op), origin); + } + static AsmOp Const(int arg, std::string push_op, td::RefInt256 origin = {}); + static AsmOp Comment(std::string comment) { + return AsmOp(a_none, std::string{"// "} + comment); + } + static AsmOp Custom(std::string custom_op) { + return AsmOp(a_custom, 255, 255, custom_op); + } + static AsmOp Parse(std::string custom_op); + static AsmOp Custom(std::string custom_op, int args, int retv = 1) { + return AsmOp(a_custom, args, retv, custom_op); + } + static AsmOp Parse(std::string custom_op, int args, int retv = 1); + static AsmOp Tuple(int a); + static AsmOp UnTuple(int a); +}; + +inline std::ostream& operator<<(std::ostream& os, const AsmOp& op) { + op.out(os); + return os; +} + +std::ostream& operator<<(std::ostream& os, AsmOp::SReg stack_reg); + +struct AsmOpList { + std::vector list_; + int indent_{0}; + const std::vector* var_names_{nullptr}; + std::vector constants_; + bool retalt_{false}; + void out(std::ostream& os, int mode = 0) const; + AsmOpList(int indent = 0, const std::vector* var_names = nullptr) : indent_(indent), var_names_(var_names) { + } + template + AsmOpList& add(Args&&... args) { + append(AsmOp(std::forward(args)...)); + adjust_last(); + return *this; + } + bool append(const AsmOp& op) { + list_.push_back(op); + adjust_last(); + return true; + } + bool append(const std::vector& ops); + bool append(std::initializer_list ops) { + return append(std::vector(std::move(ops))); + } + AsmOpList& operator<<(const AsmOp& op) { + return add(op); + } + AsmOpList& operator<<(AsmOp&& op) { + return add(std::move(op)); + } + AsmOpList& operator<<(std::string str) { + return add(AsmOp::Type::a_custom, 255, 255, str); + } + const_idx_t register_const(Const new_const); + Const get_const(const_idx_t idx); + void show_var(std::ostream& os, var_idx_t idx) const; + void show_var_ext(std::ostream& os, std::pair idx_pair) const; + void adjust_last() { + if (list_.back().is_nop()) { + list_.pop_back(); + } else { + list_.back().indent = indent_; + } + } + void indent() { + ++indent_; + } + void undent() { + --indent_; + } + void set_indent(int new_indent) { + indent_ = new_indent; + } + void insert(size_t pos, std::string str) { + insert(pos, AsmOp(AsmOp::a_custom, 255, 255, str)); + } + void insert(size_t pos, const AsmOp& op) { + auto ip = list_.begin() + pos; + ip = list_.insert(ip, op); + ip->indent = (ip == list_.begin()) ? indent_ : (ip - 1)->indent; + } + void indent_all() { + for (auto &op : list_) { + ++op.indent; + } + } +}; + +inline std::ostream& operator<<(std::ostream& os, const AsmOpList& op_list) { + op_list.out(os); + return os; +} + +class IndentGuard { + AsmOpList& aol_; + + public: + IndentGuard(AsmOpList& aol) : aol_(aol) { + aol.indent(); + } + ~IndentGuard() { + aol_.undent(); + } +}; + +struct AsmOpCons { + std::unique_ptr car; + std::unique_ptr cdr; + AsmOpCons(std::unique_ptr head, std::unique_ptr tail) : car(std::move(head)), cdr(std::move(tail)) { + } + static std::unique_ptr cons(std::unique_ptr head, std::unique_ptr tail) { + return std::make_unique(std::move(head), std::move(tail)); + } +}; + +using AsmOpConsList = std::unique_ptr; + +int is_pos_pow2(td::RefInt256 x); +int is_neg_pow2(td::RefInt256 x); + +/* + * + * STACK TRANSFORMS + * + */ + +/* +A stack transform is a map f:N={0,1,...} -> N, such that f(x) = x + d_f for almost all x:N and for a fixed d_f:N. +They form a monoid under composition: (fg)(x)=f(g(x)). +They act on stacks S on the right: Sf=S', such that S'[n]=S[f(n)]. + +A stack transform f is determined by d_f and the finite set A of all pairs (x,y), such that x>=d_f, f(x-d_f) = y and y<>x. They are listed in increasing order by x. +*/ +struct StackTransform { + enum { max_n = 16, inf_x = 0x7fffffff, c_start = -1000 }; + int d{0}, n{0}, dp{0}, c{0}; + bool invalid{false}; + std::array, max_n> A; + StackTransform() = default; + // list of f(0),f(1),...,f(s); assumes next values are f(s)+1,f(s)+2,... + StackTransform(std::initializer_list list); + StackTransform& operator=(std::initializer_list list); + bool assign(const StackTransform& other); + static StackTransform id() { + return {}; + } + bool invalidate() { + invalid = true; + return false; + } + bool is_valid() const { + return !invalid; + } + bool set_id() { + d = n = dp = c = 0; + invalid = false; + return true; + } + bool shift(int offs) { // post-composes with x -> x + offs + d += offs; + return offs <= 0 || remove_negative(); + } + bool remove_negative(); + bool touch(int i) { + dp = std::max(dp, i + d + 1); + return true; + } + bool is_permutation() const; // is f:N->N bijective ? + bool is_trivial_after(int x) const; // f(x') = x' + d for all x' >= x + int preimage_count(int y) const; // card f^{-1}(y) + std::vector preimage(int y) const; + bool apply_xchg(int i, int j, bool relaxed = false); + bool apply_push(int i); + bool apply_pop(int i = 0); + bool apply_push_newconst(); + bool apply_blkpop(int k); + bool apply(const StackTransform& other); // this = this * other + bool preapply(const StackTransform& other); // this = other * this + // c := a * b + static bool compose(const StackTransform& a, const StackTransform& b, StackTransform& c); + StackTransform& operator*=(const StackTransform& other); + StackTransform operator*(const StackTransform& b) const &; + bool equal(const StackTransform& other, bool relaxed = false) const; + bool almost_equal(const StackTransform& other) const { + return equal(other, true); + } + bool operator==(const StackTransform& other) const { + return dp == other.dp && almost_equal(other); + } + bool operator<=(const StackTransform& other) const { + return dp <= other.dp && almost_equal(other); + } + bool operator>=(const StackTransform& other) const { + return dp >= other.dp && almost_equal(other); + } + int get(int i) const; + int touch_get(int i, bool relaxed = false) { + if (!relaxed) { + touch(i); + } + return get(i); + } + bool set(int i, int v, bool relaxed = false); + int operator()(int i) const { + return get(i); + } + class Pos { + StackTransform& t_; + int p_; + + public: + Pos(StackTransform& t, int p) : t_(t), p_(p) { + } + Pos& operator=(const Pos& other) = delete; + operator int() const { + return t_.get(p_); + } + const Pos& operator=(int v) const { + t_.set(p_, v); + return *this; + } + }; + Pos operator[](int i) { + return Pos(*this, i); + } + static const StackTransform rot; + static const StackTransform rot_rev; + bool is_id() const { + return is_valid() && !d && !n; + } + bool is_xchg(int i, int j) const; + bool is_xchg(int* i, int* j) const; + bool is_xchg_xchg(int i, int j, int k, int l) const; + bool is_xchg_xchg(int* i, int* j, int* k, int* l) const; + bool is_push(int i) const; + bool is_push(int* i) const; + bool is_pop(int i) const; + bool is_pop(int* i) const; + bool is_pop_pop(int i, int j) const; + bool is_pop_pop(int* i, int* j) const; + bool is_rot() const; + bool is_rotrev() const; + bool is_push_rot(int i) const; + bool is_push_rot(int* i) const; + bool is_push_rotrev(int i) const; + bool is_push_rotrev(int* i) const; + bool is_push_xchg(int i, int j, int k) const; + bool is_push_xchg(int* i, int* j, int* k) const; + bool is_xchg2(int i, int j) const; + bool is_xchg2(int* i, int* j) const; + bool is_xcpu(int i, int j) const; + bool is_xcpu(int* i, int* j) const; + bool is_puxc(int i, int j) const; + bool is_puxc(int* i, int* j) const; + bool is_push2(int i, int j) const; + bool is_push2(int* i, int* j) const; + bool is_xchg3(int* i, int* j, int* k) const; + bool is_xc2pu(int* i, int* j, int* k) const; + bool is_xcpuxc(int* i, int* j, int* k) const; + bool is_xcpu2(int* i, int* j, int* k) const; + bool is_puxc2(int i, int j, int k) const; + bool is_puxc2(int* i, int* j, int* k) const; + bool is_puxcpu(int* i, int* j, int* k) const; + bool is_pu2xc(int i, int j, int k) const; + bool is_pu2xc(int* i, int* j, int* k) const; + bool is_push3(int i, int j, int k) const; + bool is_push3(int* i, int* j, int* k) const; + bool is_blkswap(int i, int j) const; + bool is_blkswap(int* i, int* j) const; + bool is_blkpush(int i, int j) const; + bool is_blkpush(int* i, int* j) const; + bool is_blkdrop(int* i) const; + bool is_blkdrop2(int i, int j) const; + bool is_blkdrop2(int* i, int* j) const; + bool is_reverse(int i, int j) const; + bool is_reverse(int* i, int* j) const; + bool is_nip_seq(int i, int j = 0) const; + bool is_nip_seq(int* i) const; + bool is_nip_seq(int* i, int* j) const; + bool is_pop_blkdrop(int i, int k) const; + bool is_pop_blkdrop(int* i, int* k) const; + bool is_2pop_blkdrop(int i, int j, int k) const; + bool is_2pop_blkdrop(int* i, int* j, int* k) const; + bool is_const_rot(int c) const; + bool is_const_rot(int* c) const; + bool is_const_pop(int c, int i) const; + bool is_const_pop(int* c, int* i) const; + bool is_push_const(int i, int c) const; + bool is_push_const(int* i, int* c) const; + + void show(std::ostream& os, int mode = 0) const; + + static StackTransform Xchg(int i, int j, bool relaxed = false); + static StackTransform Push(int i); + static StackTransform Pop(int i); + + private: + int try_load(int& i, int offs = 0) const; // returns A[i++].first + offs or inf_x + bool try_store(int x, int y); // appends (x,y) to A +}; + +//extern const StackTransform StackTransform::rot, StackTransform::rot_rev; + +inline std::ostream& operator<<(std::ostream& os, const StackTransform& trans) { + trans.show(os); + return os; +} + +bool apply_op(StackTransform& trans, const AsmOp& op); + +/* + * + * STACK OPERATION OPTIMIZER + * + */ + +struct Optimizer { + enum { n = optimize_depth }; + AsmOpConsList code_; + int l_{0}, l2_{0}, p_, pb_, q_, indent_; + bool debug_{false}; + std::unique_ptr op_[n], oq_[n]; + AsmOpCons* op_cons_[n]; + int offs_[n]; + StackTransform tr_[n]; + int mode_{0}; + Optimizer() { + } + Optimizer(bool debug, int mode = 0) : debug_(debug), mode_(mode) { + } + Optimizer(AsmOpConsList code, bool debug = false, int mode = 0) : Optimizer(debug, mode) { + set_code(std::move(code)); + } + void set_code(AsmOpConsList code_); + void unpack(); + void pack(); + void apply(); + bool find_at_least(int pb); + bool find(); + bool optimize(); + bool compute_stack_transforms(); + bool say(std::string str) const; + bool show_stack_transforms() const; + void show_head() const; + void show_left() const; + void show_right() const; + bool find_const_op(int* op_idx, int cst); + bool is_push_const(int* i, int* c) const; + bool rewrite_push_const(int i, int c); + bool is_const_push_xchgs(); + bool rewrite_const_push_xchgs(); + bool is_const_rot(int* c) const; + bool rewrite_const_rot(int c); + bool is_const_pop(int* c, int* i) const; + bool rewrite_const_pop(int c, int i); + bool rewrite(int p, AsmOp&& new_op); + bool rewrite(int p, AsmOp&& new_op1, AsmOp&& new_op2); + bool rewrite(int p, AsmOp&& new_op1, AsmOp&& new_op2, AsmOp&& new_op3); + bool rewrite(AsmOp&& new_op) { + return rewrite(p_, std::move(new_op)); + } + bool rewrite(AsmOp&& new_op1, AsmOp&& new_op2) { + return rewrite(p_, std::move(new_op1), std::move(new_op2)); + } + bool rewrite(AsmOp&& new_op1, AsmOp&& new_op2, AsmOp&& new_op3) { + return rewrite(p_, std::move(new_op1), std::move(new_op2), std::move(new_op3)); + } + bool rewrite_nop(); + bool is_pred(const std::function& pred, int min_p = 2); + bool is_same_as(const StackTransform& trans, int min_p = 2); + bool is_rot(); + bool is_rotrev(); + bool is_tuck(); + bool is_2dup(); + bool is_2drop(); + bool is_2swap(); + bool is_2over(); + bool is_xchg(int* i, int* j); + bool is_xchg_xchg(int* i, int* j, int* k, int* l); + bool is_push(int* i); + bool is_pop(int* i); + bool is_pop_pop(int* i, int* j); + bool is_nop(); + bool is_push_rot(int* i); + bool is_push_rotrev(int* i); + bool is_push_xchg(int* i, int* j, int* k); + bool is_xchg2(int* i, int* j); + bool is_xcpu(int* i, int* j); + bool is_puxc(int* i, int* j); + bool is_push2(int* i, int* j); + bool is_xchg3(int* i, int* j, int* k); + bool is_xc2pu(int* i, int* j, int* k); + bool is_xcpuxc(int* i, int* j, int* k); + bool is_xcpu2(int* i, int* j, int* k); + bool is_puxc2(int* i, int* j, int* k); + bool is_puxcpu(int* i, int* j, int* k); + bool is_pu2xc(int* i, int* j, int* k); + bool is_push3(int* i, int* j, int* k); + bool is_blkswap(int* i, int* j); + bool is_blkpush(int* i, int* j); + bool is_blkdrop(int* i); + bool is_blkdrop2(int* i, int* j); + bool is_reverse(int* i, int* j); + bool is_nip_seq(int* i, int* j); + bool is_pop_blkdrop(int* i, int* k); + bool is_2pop_blkdrop(int* i, int* j, int* k); + AsmOpConsList extract_code(); +}; + +AsmOpConsList optimize_code_head(AsmOpConsList op_list, int mode = 0); +AsmOpConsList optimize_code(AsmOpConsList op_list, int mode); +void optimize_code(AsmOpList& ops); + +struct Stack { + StackLayoutExt s; + AsmOpList& o; + enum { + _StkCmt = 1, _CptStkCmt = 2, _DisableOpt = 4, _DisableOut = 128, _Shown = 256, + _InlineFunc = 512, _NeedRetAlt = 1024, _InlineAny = 2048, + _ModeSave = _InlineFunc | _NeedRetAlt | _InlineAny, + _Garbage = -0x10000 + }; + int mode; + Stack(AsmOpList& _o, int _mode = 0) : o(_o), mode(_mode) { + } + Stack(AsmOpList& _o, const StackLayoutExt& _s, int _mode = 0) : s(_s), o(_o), mode(_mode) { + } + Stack(AsmOpList& _o, StackLayoutExt&& _s, int _mode = 0) : s(std::move(_s)), o(_o), mode(_mode) { + } + int depth() const { + return (int)s.size(); + } + var_idx_t operator[](int i) const { + validate(i); + return s[depth() - i - 1].first; + } + var_const_idx_t& at(int i) { + validate(i); + return s[depth() - i - 1]; + } + var_const_idx_t at(int i) const { + validate(i); + return s[depth() - i - 1]; + } + var_const_idx_t get(int i) const { + return at(i); + } + bool output_disabled() const { + return mode & _DisableOut; + } + bool output_enabled() const { + return !output_disabled(); + } + void disable_output() { + mode |= _DisableOut; + } + StackLayout vars() const; + int find(var_idx_t var, int from = 0) const; + int find(var_idx_t var, int from, int to) const; + int find_const(const_idx_t cst, int from = 0) const; + int find_outside(var_idx_t var, int from, int to) const; + void forget_const(); + void validate(int i) const { + if (i > 255) { + throw Fatal{"Too deep stack"}; + } + tolk_assert(i >= 0 && i < depth() && "invalid stack reference"); + } + void modified() { + mode &= ~_Shown; + } + void issue_pop(int i); + void issue_push(int i); + void issue_xchg(int i, int j); + int drop_vars_except(const VarDescrList& var_info, int excl_var = 0x80000000); + void forget_var(var_idx_t idx); + void push_new_var(var_idx_t idx); + void push_new_const(var_idx_t idx, const_idx_t cidx); + void assign_var(var_idx_t new_idx, var_idx_t old_idx); + void do_copy_var(var_idx_t new_idx, var_idx_t old_idx); + void enforce_state(const StackLayout& req_stack); + void rearrange_top(const StackLayout& top, std::vector last); + void rearrange_top(var_idx_t top, bool last); + void merge_const(const Stack& req_stack); + void merge_state(const Stack& req_stack); + void show(int _mode); + void show() { + show(mode); + } + void opt_show() { + if ((mode & (_StkCmt | _Shown)) == _StkCmt) { + show(mode); + } + } + bool operator==(const Stack& y) const & { + return s == y.s; + } + void apply_wrappers(int callxargs_count) { + bool is_inline = mode & _InlineFunc; + if (o.retalt_) { + o.insert(0, "SAMEALTSAVE"); + o.insert(0, "c2 SAVE"); + } + if (callxargs_count != -1 || (is_inline && o.retalt_)) { + o.indent_all(); + o.insert(0, "CONT:<{"); + o << "}>"; + if (callxargs_count != -1) { + if (callxargs_count <= 15) { + o << AsmOp::Custom(PSTRING() << callxargs_count << " -1 CALLXARGS"); + } else { + tolk_assert(callxargs_count <= 254); + o << AsmOp::Custom(PSTRING() << callxargs_count << " PUSHINT -1 PUSHINT CALLXVARARGS"); + } + } else { + o << "EXECUTE"; + } + } + } +}; + +/* + * + * SPECIFIC SYMBOL VALUES, + * BUILT-IN FUNCTIONS AND OPERATIONS + * + */ + +typedef std::function&, std::vector&, const SrcLocation)> simple_compile_func_t; +typedef std::function&, std::vector&)> compile_func_t; + +inline simple_compile_func_t make_simple_compile(AsmOp op) { + return [op](std::vector& out, std::vector& in, const SrcLocation&) -> AsmOp { return op; }; +} + +inline compile_func_t make_ext_compile(std::vector ops) { + return [ops = std::move(ops)](AsmOpList & dest, std::vector & out, std::vector & in)->bool { + return dest.append(ops); + }; +} + +inline compile_func_t make_ext_compile(AsmOp op) { + return + [op](AsmOpList& dest, std::vector& out, std::vector& in) -> bool { return dest.append(op); }; +} + +struct SymValAsmFunc : SymValFunc { + simple_compile_func_t simple_compile; + compile_func_t ext_compile; + td::uint64 crc; + ~SymValAsmFunc() override = default; + SymValAsmFunc(TypeExpr* ft, const AsmOp& _macro, bool impure = false) + : SymValFunc(-1, ft, impure), simple_compile(make_simple_compile(_macro)) { + } + SymValAsmFunc(TypeExpr* ft, std::vector _macro, bool impure = false) + : SymValFunc(-1, ft, impure), ext_compile(make_ext_compile(std::move(_macro))) { + } + SymValAsmFunc(TypeExpr* ft, simple_compile_func_t _compile, bool impure = false) + : SymValFunc(-1, ft, impure), simple_compile(std::move(_compile)) { + } + SymValAsmFunc(TypeExpr* ft, compile_func_t _compile, bool impure = false) + : SymValFunc(-1, ft, impure), ext_compile(std::move(_compile)) { + } + SymValAsmFunc(TypeExpr* ft, simple_compile_func_t _compile, std::initializer_list arg_order, + std::initializer_list ret_order = {}, bool impure = false) + : SymValFunc(-1, ft, arg_order, ret_order, impure), simple_compile(std::move(_compile)) { + } + SymValAsmFunc(TypeExpr* ft, compile_func_t _compile, std::initializer_list arg_order, + std::initializer_list ret_order = {}, bool impure = false) + : SymValFunc(-1, ft, arg_order, ret_order, impure), ext_compile(std::move(_compile)) { + } + bool compile(AsmOpList& dest, std::vector& out, std::vector& in, const SrcLocation& where) const; +}; + +// defined in builtins.cpp +AsmOp exec_arg_op(std::string op, long long arg); +AsmOp exec_arg_op(std::string op, long long arg, int args, int retv = 1); +AsmOp exec_arg_op(std::string op, td::RefInt256 arg); +AsmOp exec_arg_op(std::string op, td::RefInt256 arg, int args, int retv = 1); +AsmOp exec_arg2_op(std::string op, long long imm1, long long imm2, int args, int retv = 1); +AsmOp push_const(td::RefInt256 x); + +void define_builtins(); + + +extern int verbosity, indent, opt_level; +extern bool stack_layout_comments, op_rewrite_comments, program_envelope, asm_preamble, interactive; +extern std::string generated_from, boc_output_filename; +extern ReadCallback::Callback read_callback; + +td::Result fs_read_callback(ReadCallback::Kind kind, const char* query); + +class GlobalPragma { + public: + explicit GlobalPragma(std::string name) : name_(std::move(name)) { + } + const std::string& name() const { + return name_; + } + bool enabled() const { + return enabled_; + } + void enable(SrcLocation loc) { + enabled_ = true; + locs_.push_back(std::move(loc)); + } + void check_enable_in_libs() { + if (locs_.empty()) { + return; + } + for (const SrcLocation& loc : locs_) { + if (loc.fdescr->is_main) { + return; + } + } + locs_[0].show_warning(PSTRING() << "#pragma " << name_ + << " is enabled in included libraries, it may change the behavior of your code. " + << "Add this #pragma to the main source file to suppress this warning."); + } + + private: + std::string name_; + bool enabled_ = false; + std::vector locs_; +}; +extern GlobalPragma pragma_allow_post_modification, pragma_compute_asm_ltr; + +/* + * + * OUTPUT CODE GENERATOR + * + */ + +int tolk_proceed(const std::vector &sources, std::ostream &outs, std::ostream &errs); + +} // namespace tolk + + diff --git a/tolk/unify-types.cpp b/tolk/unify-types.cpp new file mode 100644 index 000000000..4e28dc834 --- /dev/null +++ b/tolk/unify-types.cpp @@ -0,0 +1,429 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "tolk.h" + +namespace tolk { + +/* + * + * TYPE EXPRESSIONS + * + */ + +int TypeExpr::holes = 0, TypeExpr::type_vars = 0; // not thread safe, but it is ok for now + +void TypeExpr::compute_width() { + switch (constr) { + case te_Atomic: + case te_Map: + minw = maxw = 1; + break; + case te_Tensor: + minw = maxw = 0; + for (TypeExpr* arg : args) { + minw += arg->minw; + maxw += arg->maxw; + } + if (minw > w_inf) { + minw = w_inf; + } + if (maxw > w_inf) { + maxw = w_inf; + } + break; + case te_Tuple: + minw = maxw = 1; + for (TypeExpr* arg : args) { + arg->compute_width(); + } + break; + case te_Indirect: + minw = args[0]->minw; + maxw = args[0]->maxw; + break; + default: + minw = 0; + maxw = w_inf; + break; + } +} + +bool TypeExpr::recompute_width() { + switch (constr) { + case te_Tensor: + case te_Indirect: { + int min = 0, max = 0; + for (TypeExpr* arg : args) { + min += arg->minw; + max += arg->maxw; + } + if (min > maxw || max < minw) { + return false; + } + if (min > w_inf) { + min = w_inf; + } + if (max > w_inf) { + max = w_inf; + } + if (minw < min) { + minw = min; + } + if (maxw > max) { + maxw = max; + } + return true; + } + case te_Tuple: { + for (TypeExpr* arg : args) { + if (arg->minw > 1 || arg->maxw < 1 || arg->minw > arg->maxw) { + return false; + } + } + return true; + } + default: + return false; + } +} + +int TypeExpr::extract_components(std::vector& comp_list) { + if (constr != te_Indirect && constr != te_Tensor) { + comp_list.push_back(this); + return 1; + } + int res = 0; + for (TypeExpr* arg : args) { + res += arg->extract_components(comp_list); + } + return res; +} + +TypeExpr* TypeExpr::new_map(TypeExpr* from, TypeExpr* to) { + return new TypeExpr{te_Map, std::vector{from, to}}; +} + +void TypeExpr::replace_with(TypeExpr* te2) { + if (te2 == this) { + return; + } + constr = te_Indirect; + value = 0; + minw = te2->minw; + maxw = te2->maxw; + args.clear(); + args.push_back(te2); +} + +bool TypeExpr::remove_indirect(TypeExpr*& te, TypeExpr* forbidden) { + tolk_assert(te); + while (te->constr == te_Indirect) { + te = te->args[0]; + } + if (te->constr == te_Unknown) { + return te != forbidden; + } + bool res = true; + for (auto& x : te->args) { + res &= remove_indirect(x, forbidden); + } + return res; +} + +std::vector TypeExpr::remove_forall(TypeExpr*& te) { + tolk_assert(te && te->constr == te_ForAll); + tolk_assert(te->args.size() >= 1); + std::vector new_vars; + for (std::size_t i = 1; i < te->args.size(); i++) { + new_vars.push_back(new_hole(1)); + } + TypeExpr* te2 = te; + // std::cerr << "removing universal quantifier in " << te << std::endl; + te = te->args[0]; + remove_forall_in(te, te2, new_vars); + // std::cerr << "-> " << te << std::endl; + return new_vars; +} + +bool TypeExpr::remove_forall_in(TypeExpr*& te, TypeExpr* te2, const std::vector& new_vars) { + tolk_assert(te); + tolk_assert(te2 && te2->constr == te_ForAll); + if (te->constr == te_Var) { + for (std::size_t i = 0; i < new_vars.size(); i++) { + if (te == te2->args[i + 1]) { + te = new_vars[i]; + return true; + } + } + return false; + } + if (te->constr == te_ForAll) { + return false; + } + if (te->args.empty()) { + return false; + } + auto te1 = new TypeExpr(*te); + bool res = false; + for (auto& arg : te1->args) { + res |= remove_forall_in(arg, te2, new_vars); + } + if (res) { + te = te1; + } else { + delete te1; + } + return res; +} + +void TypeExpr::show_width(std::ostream& os) { + os << minw; + if (maxw != minw) { + os << ".."; + if (maxw < w_inf) { + os << maxw; + } + } +} + +std::ostream& operator<<(std::ostream& os, TypeExpr* type_expr) { + if (!type_expr) { + return os << "(null-type-ptr)"; + } + return type_expr->print(os); +} + +std::ostream& TypeExpr::print(std::ostream& os, int lex_level) { + switch (constr) { + case te_Unknown: + return os << "??" << value; + case te_Var: + if (value >= -26 && value < 0) { + return os << "_" << (char)(91 + value); + } else if (value >= 0 && value < 26) { + return os << (char)(65 + value); + } else { + return os << "TVAR" << value; + } + case te_Indirect: + return os << args[0]; + case te_Atomic: { + switch (value) { + case _Int: + return os << "int"; + case _Cell: + return os << "cell"; + case _Slice: + return os << "slice"; + case _Builder: + return os << "builder"; + case _Cont: + return os << "cont"; + case _Tuple: + return os << "tuple"; + case _Type: + return os << "type"; + default: + return os << "atomic-type-" << value; + } + } + case te_Tensor: { + if (lex_level > -127) { + os << "("; + } + auto c = args.size(); + if (c) { + for (const auto& x : args) { + x->print(os); + if (--c) { + os << ", "; + } + } + } + if (lex_level > -127) { + os << ")"; + } + return os; + } + case te_Tuple: { + os << "["; + auto c = args.size(); + if (c == 1 && args[0]->constr == te_Tensor) { + args[0]->print(os, -127); + } else if (c) { + for (const auto& x : args) { + x->print(os); + if (--c) { + os << ", "; + } + } + } + return os << "]"; + } + case te_Map: { + tolk_assert(args.size() == 2); + if (lex_level > 0) { + os << "("; + } + args[0]->print(os, 1); + os << " -> "; + args[1]->print(os); + if (lex_level > 0) { + os << ")"; + } + return os; + } + case te_ForAll: { + tolk_assert(args.size() >= 1); + if (lex_level > 0) { + os << '('; + } + os << "Forall "; + for (std::size_t i = 1; i < args.size(); i++) { + os << (i > 1 ? ' ' : '('); + args[i]->print(os); + } + os << ") "; + args[0]->print(os); + if (lex_level > 0) { + os << ')'; + } + return os; + } + default: + return os << "unknown-type-expr-" << constr; + } +} + +void UnifyError::print_message(std::ostream& os) const { + os << "cannot unify type " << te1 << " with " << te2; + if (!msg.empty()) { + os << ": " << msg; + } +} + +std::ostream& operator<<(std::ostream& os, const UnifyError& ue) { + ue.print_message(os); + return os; +} + +std::string UnifyError::message() const { + std::ostringstream os; + print_message(os); + return os.str(); +} + +void check_width_compat(TypeExpr* te1, TypeExpr* te2) { + if (te1->minw > te2->maxw || te2->minw > te1->maxw) { + std::ostringstream os{"cannot unify types of widths ", std::ios_base::ate}; + te1->show_width(os); + os << " and "; + te2->show_width(os); + throw UnifyError{te1, te2, os.str()}; + } +} + +void check_update_widths(TypeExpr* te1, TypeExpr* te2) { + check_width_compat(te1, te2); + te1->minw = te2->minw = std::max(te1->minw, te2->minw); + te1->maxw = te2->maxw = std::min(te1->maxw, te2->maxw); + tolk_assert(te1->minw <= te1->maxw); +} + +void unify(TypeExpr*& te1, TypeExpr*& te2) { + tolk_assert(te1 && te2); + // std::cerr << "unify( " << te1 << " , " << te2 << " )\n"; + while (te1->constr == TypeExpr::te_Indirect) { + te1 = te1->args[0]; + } + while (te2->constr == TypeExpr::te_Indirect) { + te2 = te2->args[0]; + } + if (te1 == te2) { + return; + } + if (te1->constr == TypeExpr::te_ForAll) { + TypeExpr* te = te1; + std::vector new_vars = TypeExpr::remove_forall(te); + for (TypeExpr* t : new_vars) { + t->was_forall_var = true; + } + unify(te, te2); + for (TypeExpr* t : new_vars) { + t->was_forall_var = false; + } + return; + } + if (te2->constr == TypeExpr::te_ForAll) { + TypeExpr* te = te2; + std::vector new_vars = TypeExpr::remove_forall(te); + for (TypeExpr* t : new_vars) { + t->was_forall_var = true; + } + unify(te1, te); + for (TypeExpr* t : new_vars) { + t->was_forall_var = false; + } + return; + } + if (te1->was_forall_var && te2->constr == TypeExpr::te_Tensor) { + throw UnifyError{te1, te2, "cannot unify generic type and tensor"}; + } + if (te2->was_forall_var && te1->constr == TypeExpr::te_Tensor) { + throw UnifyError{te2, te1, "cannot unify generic type and tensor"}; + } + if (te1->constr == TypeExpr::te_Unknown) { + if (te2->constr == TypeExpr::te_Unknown) { + tolk_assert(te1->value != te2->value); + } + if (!TypeExpr::remove_indirect(te2, te1)) { + throw UnifyError{te1, te2, "type unification results in an infinite cyclic type"}; + } + check_update_widths(te1, te2); + te1->replace_with(te2); + te1 = te2; + return; + } + if (te2->constr == TypeExpr::te_Unknown) { + if (!TypeExpr::remove_indirect(te1, te2)) { + throw UnifyError{te2, te1, "type unification results in an infinite cyclic type"}; + } + check_update_widths(te2, te1); + te2->replace_with(te1); + te2 = te1; + return; + } + if (te1->constr != te2->constr || te1->value != te2->value || te1->args.size() != te2->args.size()) { + throw UnifyError{te1, te2}; + } + for (std::size_t i = 0; i < te1->args.size(); i++) { + unify(te1->args[i], te2->args[i]); + } + if (te1->constr == TypeExpr::te_Tensor) { + if (!te1->recompute_width()) { + throw UnifyError{te1, te2, "type unification incompatible with known width of first type"}; + } + if (!te2->recompute_width()) { + throw UnifyError{te2, te1, "type unification incompatible with known width of first type"}; + } + check_update_widths(te1, te2); + } + te1->replace_with(te2); + te1 = te2; +} + +} // namespace tolk From ebbab54cdad8c5e11f6b810781f63ff88dfd9b62 Mon Sep 17 00:00:00 2001 From: tolk-vm Date: Thu, 31 Oct 2024 10:54:05 +0400 Subject: [PATCH 02/12] [Tolk] Tolk v0.5.0 as FunC v0.5.0 could have been like All changes from PR "FunC v0.5.0": https://github.com/ton-blockchain/ton/pull/1026 Instead of developing FunC, we decided to fork it. BTW, the first Tolk release will be v0.6, a metaphor of FunC v0.5 that missed a chance to occur. --- crypto/smartcont/mathlib.tolk | 16 +- crypto/smartcont/stdlib.tolk | 939 ++++++++++++++++++---------------- lite-client/lite-client.cpp | 2 +- tolk/CMakeLists.txt | 4 + tolk/abscode.cpp | 17 +- tolk/analyzer.cpp | 49 +- tolk/asmops.cpp | 3 + tolk/builtins.cpp | 52 +- tolk/codegen.cpp | 36 +- tolk/gen-abscode.cpp | 112 ++-- tolk/keywords.cpp | 3 + tolk/lexer.cpp | 61 ++- tolk/lexer.h | 13 +- tolk/parse-tolk.cpp | 413 ++++++++++++--- tolk/symtable.cpp | 10 +- tolk/symtable.h | 3 + tolk/tolk-wasm.cpp | 1 + tolk/tolk.cpp | 155 +++++- tolk/tolk.h | 242 ++++----- tolk/unify-types.cpp | 35 +- tonlib/tonlib/tonlib-cli.cpp | 2 +- 21 files changed, 1362 insertions(+), 806 deletions(-) diff --git a/crypto/smartcont/mathlib.tolk b/crypto/smartcont/mathlib.tolk index d4fea6095..6a5b2d1b7 100644 --- a/crypto/smartcont/mathlib.tolk +++ b/crypto/smartcont/mathlib.tolk @@ -81,14 +81,14 @@ int fixed248::acot(int x) inline_ref; ;; random number uniformly distributed in [0..1) ;; fixed248 random(); -int fixed248::random() impure inline; +int fixed248::random() inline; ;; random number with standard normal distribution (2100 gas on average) ;; fixed248 nrand(); -int fixed248::nrand() impure inline; +int fixed248::nrand() inline; ;; generates a random number approximately distributed according to the standard normal distribution (1200 gas) ;; (fails chi-squared test, but it is shorter and faster than fixed248::nrand()) ;; fixed248 nrand_fast(); -int fixed248::nrand_fast() impure inline; +int fixed248::nrand_fast() inline; -} ;; end (declarations) @@ -880,7 +880,7 @@ int fixed248::acot(int x) inline_ref { ;; generated by Kinderman--Monahan ratio method modified by J.Leva ;; spends ~ 2k..3k gas on average ;; fixed252 nrand(); -int nrand_f252() impure inline_ref { +int nrand_f252() inline_ref { var (x, s, t, A, B, r0) = (nan(), touch(29483) << 236, touch(-3167) << 239, 12845, 16693, 9043); ;; 4/sqrt(e*Pi) = 1.369 loop iterations on average do { @@ -910,7 +910,7 @@ int nrand_f252() impure inline_ref { ;; generates a random number approximately distributed according to the standard normal distribution ;; much faster than nrand_f252(), should be suitable for most purposes when only several random numbers are needed ;; fixed252 nrand_fast(); -int nrand_fast_f252() impure inline_ref { +int nrand_fast_f252() inline_ref { int t = touch(-3) << 253; ;; -6. as fixed252 repeat (12) { t += random() / 16; ;; add together 12 uniformly random numbers @@ -920,18 +920,18 @@ int nrand_fast_f252() impure inline_ref { ;; random number uniformly distributed in [0..1) ;; fixed248 random(); -int fixed248::random() impure inline { +int fixed248::random() inline { return random() >> 8; } ;; random number with standard normal distribution ;; fixed248 nrand(); -int fixed248::nrand() impure inline { +int fixed248::nrand() inline { return nrand_f252() ~>> 4; } ;; generates a random number approximately distributed according to the standard normal distribution ;; fixed248 nrand_fast(); -int fixed248::nrand_fast() impure inline { +int fixed248::nrand_fast() inline { return nrand_fast_f252() ~>> 4; } diff --git a/crypto/smartcont/stdlib.tolk b/crypto/smartcont/stdlib.tolk index 344d90319..8545601bc 100644 --- a/crypto/smartcont/stdlib.tolk +++ b/crypto/smartcont/stdlib.tolk @@ -1,8 +1,8 @@ -;; Standard library for Tolk -;; (initially copied from stdlib.fc) -;; +// Standard library for Tolk +// (initially copied from stdlib.fc) +// -{- +/* This file is part of TON Tolk Standard Library. Tolk Standard Library is free software: you can redistribute it and/or modify @@ -15,268 +15,324 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. --} +*/ -{- +/* # Tuple manipulation primitives The names and the types are mostly self-explaining. Note that currently values of atomic type `tuple` can't be cast to composite tuple type (e.g. `[int, cell]`) and vise versa. --} +*/ -{- +/*** # Lisp-style lists Lists can be represented as nested 2-elements tuples. Empty list is conventionally represented as TVM `null` value (it can be obtained by calling [null()]). For example, tuple `(1, (2, (3, null)))` represents list `[1, 2, 3]`. Elements of a list can be of different types. --} +*/ -;;; Adds an element to the beginning of lisp-style list. -forall X -> tuple cons(X head, tuple tail) asm "CONS"; +/// Adds an element to the beginning of lisp-style list. +forall X -> tuple cons(X head, tuple tail) pure asm "CONS"; -;;; Extracts the head and the tail of lisp-style list. -forall X -> (X, tuple) uncons(tuple list) asm "UNCONS"; +/// Extracts the head and the tail of lisp-style list. +forall X -> (X, tuple) uncons(tuple list) pure asm "UNCONS"; -;;; Extracts the tail and the head of lisp-style list. -forall X -> (tuple, X) list_next(tuple list) asm( -> 1 0) "UNCONS"; +/// Extracts the tail and the head of lisp-style list. +forall X -> (tuple, X) list_next(tuple list) pure asm( -> 1 0) "UNCONS"; -;;; Returns the head of lisp-style list. -forall X -> X car(tuple list) asm "CAR"; +/// Returns the head of lisp-style list. +forall X -> X car(tuple list) pure asm "CAR"; -;;; Returns the tail of lisp-style list. -tuple cdr(tuple list) asm "CDR"; +/// Returns the tail of lisp-style list. +tuple cdr(tuple list) pure asm "CDR"; -;;; Creates tuple with zero elements. -tuple empty_tuple() asm "NIL"; +/// Creates tuple with zero elements. +tuple empty_tuple() pure asm "NIL"; -;;; Appends a value `x` to a `Tuple t = (x1, ..., xn)`, but only if the resulting `Tuple t' = (x1, ..., xn, x)` -;;; is of length at most 255. Otherwise throws a type check exception. -forall X -> tuple tpush(tuple t, X value) asm "TPUSH"; -forall X -> (tuple, ()) ~tpush(tuple t, X value) asm "TPUSH"; +/// Appends a value `x` to a `Tuple t = (x1, ..., xn)`, but only if the resulting `Tuple t' = (x1, ..., xn, x)` +/// is of length at most 255. Otherwise throws a type check exception. +forall X -> tuple tpush(tuple t, X value) pure asm "TPUSH"; +forall X -> (tuple, ()) ~tpush(tuple t, X value) pure asm "TPUSH"; -;;; Creates a tuple of length one with given argument as element. -forall X -> [X] single(X x) asm "SINGLE"; +/// Creates a tuple of length one with given argument as element. +forall X -> [X] single(X x) pure asm "SINGLE"; -;;; Unpacks a tuple of length one -forall X -> X unsingle([X] t) asm "UNSINGLE"; +/// Unpacks a tuple of length one +forall X -> X unsingle([X] t) pure asm "UNSINGLE"; -;;; Creates a tuple of length two with given arguments as elements. -forall X, Y -> [X, Y] pair(X x, Y y) asm "PAIR"; +/// Creates a tuple of length two with given arguments as elements. +forall X, Y -> [X, Y] pair(X x, Y y) pure asm "PAIR"; -;;; Unpacks a tuple of length two -forall X, Y -> (X, Y) unpair([X, Y] t) asm "UNPAIR"; +/// Unpacks a tuple of length two +forall X, Y -> (X, Y) unpair([X, Y] t) pure asm "UNPAIR"; -;;; Creates a tuple of length three with given arguments as elements. -forall X, Y, Z -> [X, Y, Z] triple(X x, Y y, Z z) asm "TRIPLE"; +/// Creates a tuple of length three with given arguments as elements. +forall X, Y, Z -> [X, Y, Z] triple(X x, Y y, Z z) pure asm "TRIPLE"; -;;; Unpacks a tuple of length three -forall X, Y, Z -> (X, Y, Z) untriple([X, Y, Z] t) asm "UNTRIPLE"; +/// Unpacks a tuple of length three +forall X, Y, Z -> (X, Y, Z) untriple([X, Y, Z] t) pure asm "UNTRIPLE"; -;;; Creates a tuple of length four with given arguments as elements. -forall X, Y, Z, W -> [X, Y, Z, W] tuple4(X x, Y y, Z z, W w) asm "4 TUPLE"; +/// Creates a tuple of length four with given arguments as elements. +forall X, Y, Z, W -> [X, Y, Z, W] tuple4(X x, Y y, Z z, W w) pure asm "4 TUPLE"; -;;; Unpacks a tuple of length four -forall X, Y, Z, W -> (X, Y, Z, W) untuple4([X, Y, Z, W] t) asm "4 UNTUPLE"; +/// Unpacks a tuple of length four +forall X, Y, Z, W -> (X, Y, Z, W) untuple4([X, Y, Z, W] t) pure asm "4 UNTUPLE"; -;;; Returns the first element of a tuple (with unknown element types). -forall X -> X first(tuple t) asm "FIRST"; +/// Returns the first element of a tuple (with unknown element types). +forall X -> X first(tuple t) pure asm "FIRST"; -;;; Returns the second element of a tuple (with unknown element types). -forall X -> X second(tuple t) asm "SECOND"; +/// Returns the second element of a tuple (with unknown element types). +forall X -> X second(tuple t) pure asm "SECOND"; -;;; Returns the third element of a tuple (with unknown element types). -forall X -> X third(tuple t) asm "THIRD"; +/// Returns the third element of a tuple (with unknown element types). +forall X -> X third(tuple t) pure asm "THIRD"; -;;; Returns the fourth element of a tuple (with unknown element types). -forall X -> X fourth(tuple t) asm "3 INDEX"; +/// Returns the fourth element of a tuple (with unknown element types). +forall X -> X fourth(tuple t) pure asm "3 INDEX"; -;;; Returns the first element of a pair tuple. -forall X, Y -> X pair_first([X, Y] p) asm "FIRST"; +/// Returns the [`index`]-th element of tuple [`t`]. +forall X -> X at(tuple t, int index) pure builtin; -;;; Returns the second element of a pair tuple. -forall X, Y -> Y pair_second([X, Y] p) asm "SECOND"; +/// Returns the first element of a pair tuple. +forall X, Y -> X pair_first([X, Y] p) pure asm "FIRST"; -;;; Returns the first element of a triple tuple. -forall X, Y, Z -> X triple_first([X, Y, Z] p) asm "FIRST"; +/// Returns the second element of a pair tuple. +forall X, Y -> Y pair_second([X, Y] p) pure asm "SECOND"; -;;; Returns the second element of a triple tuple. -forall X, Y, Z -> Y triple_second([X, Y, Z] p) asm "SECOND"; +/// Returns the first element of a triple tuple. +forall X, Y, Z -> X triple_first([X, Y, Z] p) pure asm "FIRST"; -;;; Returns the third element of a triple tuple. -forall X, Y, Z -> Z triple_third([X, Y, Z] p) asm "THIRD"; +/// Returns the second element of a triple tuple. +forall X, Y, Z -> Y triple_second([X, Y, Z] p) pure asm "SECOND"; +/// Returns the third element of a triple tuple. +forall X, Y, Z -> Z triple_third([X, Y, Z] p) pure asm "THIRD"; -;;; Push null element (casted to given type) -;;; By the TVM type `Null` Tolk represents absence of a value of some atomic type. -;;; So `null` can actually have any atomic type. -forall X -> X null() asm "PUSHNULL"; -;;; Moves a variable [x] to the top of the stack -forall X -> (X, ()) ~impure_touch(X x) impure asm "NOP"; +/// Push null element (casted to given type) +/// By the TVM type `Null` Tolk represents absence of a value of some atomic type. +/// So `null` can actually have any atomic type. +forall X -> X null() pure asm "PUSHNULL"; +/// Checks whether the argument is null. +forall X -> int null?(X x) pure builtin; +/// Moves a variable [x] to the top of the stack. +forall X -> X touch(X x) pure builtin; -;;; Returns the current Unix time as an Integer -int now() asm "NOW"; +/// Moves a variable [x] to the top of the stack. +forall X -> (X, ()) ~touch(X x) pure builtin; -;;; Returns the internal address of the current smart contract as a Slice with a `MsgAddressInt`. -;;; If necessary, it can be parsed further using primitives such as [parse_std_addr]. -slice my_address() asm "MYADDR"; +/// Mark a variable as used, such that the code which produced it won't be deleted even if it is not impure. +forall X -> (X, ()) ~impure_touch(X x) asm "NOP"; -;;; Returns the balance of the smart contract as a tuple consisting of an int -;;; (balance in nanotoncoins) and a `cell` -;;; (a dictionary with 32-bit keys representing the balance of "extra currencies") -;;; at the start of Computation Phase. -;;; Note that RAW primitives such as [send_raw_message] do not update this field. -[int, cell] get_balance() asm "BALANCE"; -;;; Returns the logical time of the current transaction. -int cur_lt() asm "LTIME"; -;;; Returns the starting logical time of the current block. -int block_lt() asm "BLOCKLT"; +/// Returns the current Unix time as an Integer +int now() pure asm "NOW"; -;;; Computes the representation hash of a `cell` [c] and returns it as a 256-bit unsigned integer `x`. -;;; Useful for signing and checking signatures of arbitrary entities represented by a tree of cells. -int cell_hash(cell c) asm "HASHCU"; +/// Returns the internal address of the current smart contract as a Slice with a `MsgAddressInt`. +/// If necessary, it can be parsed further using primitives such as [parse_std_addr]. +slice my_address() pure asm "MYADDR"; -;;; Computes the hash of a `slice s` and returns it as a 256-bit unsigned integer `x`. -;;; The result is the same as if an ordinary cell containing only data and references from `s` had been created -;;; and its hash computed by [cell_hash]. -int slice_hash(slice s) asm "HASHSU"; +/// Returns the balance of the smart contract as a tuple consisting of an int +/// (balance in nanotoncoins) and a `cell` +/// (a dictionary with 32-bit keys representing the balance of "extra currencies") +/// at the start of Computation Phase. +/// Note that RAW primitives such as [send_raw_message] do not update this field. +[int, cell] get_balance() pure asm "BALANCE"; -;;; Computes sha256 of the data bits of `slice` [s]. If the bit length of `s` is not divisible by eight, -;;; throws a cell underflow exception. The hash value is returned as a 256-bit unsigned integer `x`. -int string_hash(slice s) asm "SHA256U"; +/// Returns the logical time of the current transaction. +int cur_lt() pure asm "LTIME"; -{- +/// Returns the starting logical time of the current block. +int block_lt() pure asm "BLOCKLT"; + +/// Computes the representation hash of a `cell` [c] and returns it as a 256-bit unsigned integer `x`. +/// Useful for signing and checking signatures of arbitrary entities represented by a tree of cells. +int cell_hash(cell c) pure asm "HASHCU"; + +/// Computes the hash of a `slice s` and returns it as a 256-bit unsigned integer `x`. +/// The result is the same as if an ordinary cell containing only data and references from `s` had been created +/// and its hash computed by [cell_hash]. +int slice_hash(slice s) pure asm "HASHSU"; + +/// Computes sha256 of the data bits of `slice` [s]. If the bit length of `s` is not divisible by eight, +/// throws a cell underflow exception. The hash value is returned as a 256-bit unsigned integer `x`. +int string_hash(slice s) pure asm "SHA256U"; + +/*** # Signature checks --} - -;;; Checks the Ed25519-`signature` of a `hash` (a 256-bit unsigned integer, usually computed as the hash of some data) -;;; using [public_key] (also represented by a 256-bit unsigned integer). -;;; The signature must contain at least 512 data bits; only the first 512 bits are used. -;;; The result is `−1` if the signature is valid, `0` otherwise. -;;; Note that `CHKSIGNU` creates a 256-bit slice with the hash and calls `CHKSIGNS`. -;;; That is, if [hash] is computed as the hash of some data, these data are hashed twice, -;;; the second hashing occurring inside `CHKSIGNS`. -int check_signature(int hash, slice signature, int public_key) asm "CHKSIGNU"; - -;;; Checks whether [signature] is a valid Ed25519-signature of the data portion of `slice data` using `public_key`, -;;; similarly to [check_signature]. -;;; If the bit length of [data] is not divisible by eight, throws a cell underflow exception. -;;; The verification of Ed25519 signatures is the standard one, -;;; with sha256 used to reduce [data] to the 256-bit number that is actually signed. -int check_data_signature(slice data, slice signature, int public_key) asm "CHKSIGNS"; - -{--- +*/ + +/// Checks the Ed25519-`signature` of a `hash` (a 256-bit unsigned integer, usually computed as the hash of some data) +/// using [public_key] (also represented by a 256-bit unsigned integer). +/// The signature must contain at least 512 data bits; only the first 512 bits are used. +/// The result is `−1` if the signature is valid, `0` otherwise. +/// Note that `CHKSIGNU` creates a 256-bit slice with the hash and calls `CHKSIGNS`. +/// That is, if [hash] is computed as the hash of some data, these data are hashed twice, +/// the second hashing occurring inside `CHKSIGNS`. +int check_signature(int hash, slice signature, int public_key) pure asm "CHKSIGNU"; + +/// Checks whether [signature] is a valid Ed25519-signature of the data portion of `slice data` using `public_key`, +/// similarly to [check_signature]. +/// If the bit length of [data] is not divisible by eight, throws a cell underflow exception. +/// The verification of Ed25519 signatures is the standard one, +/// with sha256 used to reduce [data] to the 256-bit number that is actually signed. +int check_data_signature(slice data, slice signature, int public_key) pure asm "CHKSIGNS"; + +/*** # Computation of boc size The primitives below may be useful for computing storage fees of user-provided data. --} - -;;; Returns `(x, y, z, -1)` or `(null, null, null, 0)`. -;;; Recursively computes the count of distinct cells `x`, data bits `y`, and cell references `z` -;;; in the DAG rooted at `cell` [c], effectively returning the total storage used by this DAG taking into account -;;; the identification of equal cells. -;;; The values of `x`, `y`, and `z` are computed by a depth-first traversal of this DAG, -;;; with a hash table of visited cell hashes used to prevent visits of already-visited cells. -;;; The total count of visited cells `x` cannot exceed non-negative [max_cells]; -;;; otherwise the computation is aborted before visiting the `(max_cells + 1)`-st cell and -;;; a zero flag is returned to indicate failure. If [c] is `null`, returns `x = y = z = 0`. -(int, int, int) compute_data_size(cell c, int max_cells) impure asm "CDATASIZE"; - -;;; Similar to [compute_data_size?], but accepting a `slice` [s] instead of a `cell`. -;;; The returned value of `x` does not take into account the cell that contains the `slice` [s] itself; -;;; however, the data bits and the cell references of [s] are accounted for in `y` and `z`. -(int, int, int) slice_compute_data_size(slice s, int max_cells) impure asm "SDATASIZE"; - -;;; A non-quiet version of [compute_data_size?] that throws a cell overflow exception (`8`) on failure. -(int, int, int, int) compute_data_size?(cell c, int max_cells) asm "CDATASIZEQ NULLSWAPIFNOT2 NULLSWAPIFNOT"; - -;;; A non-quiet version of [slice_compute_data_size?] that throws a cell overflow exception (8) on failure. -(int, int, int, int) slice_compute_data_size?(cell c, int max_cells) asm "SDATASIZEQ NULLSWAPIFNOT2 NULLSWAPIFNOT"; - -;;; Throws an exception with exit_code excno if cond is not 0 (commented since implemented in compilator) -;; () throw_if(int excno, int cond) impure asm "THROWARGIF"; - -{-- +*/ + +/// A non-quiet version of [compute_data_size?] that throws a cell overflow exception (`8`) on failure. +(int, int, int) compute_data_size(cell c, int max_cells) asm "CDATASIZE"; + +/// A non-quiet version of [slice_compute_data_size?] that throws a cell overflow exception (`8`) on failure. +(int, int, int) slice_compute_data_size(slice s, int max_cells) asm "SDATASIZE"; + +/// Returns `(x, y, z, -1)` or `(null, null, null, 0)`. +/// Recursively computes the count of distinct cells `x`, data bits `y`, and cell references `z` +/// in the DAG rooted at `cell` [c], effectively returning the total storage used by this DAG taking into account +/// the identification of equal cells. +/// The values of `x`, `y`, and `z` are computed by a depth-first traversal of this DAG, +/// with a hash table of visited cell hashes used to prevent visits of already-visited cells. +/// The total count of visited cells `x` cannot exceed non-negative [max_cells]; +/// otherwise the computation is aborted before visiting the `(max_cells + 1)`-st cell and +/// a zero flag is returned to indicate failure. If [c] is `null`, returns `x = y = z = 0`. +(int, int, int, int) compute_data_size?(cell c, int max_cells) pure asm "CDATASIZEQ NULLSWAPIFNOT2 NULLSWAPIFNOT"; + +/// Similar to [compute_data_size?], but accepting a `slice` [s] instead of a `cell`. +/// The returned value of `x` does not take into account the cell that contains the `slice` [s] itself; +/// however, the data bits and the cell references of [s] are accounted for in `y` and `z`. +(int, int, int, int) slice_compute_data_size?(slice s, int max_cells) pure asm "SDATASIZEQ NULLSWAPIFNOT2 NULLSWAPIFNOT"; + +/// Throws exception [`excno`] with parameter zero. +/// In other words, it transfers control to the continuation in `c2`, +/// pushing `0` and [`excno`] into it's stack, and discarding the old stack altogether. +() throw(int excno) builtin; + +/// Throws exception [`excno`] with parameter zero only if [`cond`] != `0`. +() throw_if(int excno, int cond) builtin; + +/// Throws exception [`excno`] with parameter zero only if [`cond`] == `0`. +() throw_unless(int excno, int cond) builtin; + +/// Throws exception [`excno`] with parameter [`x`], +/// by copying [`x`] and [`excno`] into the stack of `c2` and transferring control to `c2`. +forall X -> () throw_arg(X x, int excno) builtin; + +/// Throws exception [`excno`] with parameter [`x`] only if [`cond`] != `0`. +forall X -> () throw_arg_if(X x, int excno, int cond) builtin; + +/// Throws exception [`excno`] with parameter [`x`] only if [`cond`] == `0`. +forall X -> () throw_arg_unless(X x, int excno, int cond) builtin; + +/*** # Debug primitives Only works for local TVM execution with debug level verbosity --} -;;; Dumps the stack (at most the top 255 values) and shows the total stack depth. -() dump_stack() impure asm "DUMPSTK"; +*/ + +/// Dump a variable [x] to the debug log. +forall X -> (X, ()) ~dump(X x) builtin; + +/// Dump a string [x] to the debug log. +forall X -> (X, ()) ~strdump(X x) builtin; -{- +/// Dumps the stack (at most the top 255 values) and shows the total stack depth. +() dump_stack() asm "DUMPSTK"; + +/*** # Persistent storage save and load --} +*/ -;;; Returns the persistent contract storage cell. It can be parsed or modified with slice and builder primitives later. -cell get_data() asm "c4 PUSH"; +/// Returns the persistent contract storage cell. It can be parsed or modified with slice and builder primitives later. +cell get_data() pure asm "c4 PUSH"; -;;; Sets `cell` [c] as persistent contract data. You can update persistent contract storage with this primitive. -() set_data(cell c) impure asm "c4 POP"; +/// Sets `cell` [c] as persistent contract data. You can update persistent contract storage with this primitive. +() set_data(cell c) asm "c4 POP"; -{- +/*** # Continuation primitives --} -;;; Usually `c3` has a continuation initialized by the whole code of the contract. It is used for function calls. -;;; The primitive returns the current value of `c3`. -cont get_c3() impure asm "c3 PUSH"; +*/ +/// Usually `c3` has a continuation initialized by the whole code of the contract. It is used for function calls. +/// The primitive returns the current value of `c3`. +cont get_c3() pure asm "c3 PUSH"; -;;; Updates the current value of `c3`. Usually, it is used for updating smart contract code in run-time. -;;; Note that after execution of this primitive the current code -;;; (and the stack of recursive function calls) won't change, -;;; but any other function call will use a function from the new code. -() set_c3(cont c) impure asm "c3 POP"; +/// Updates the current value of `c3`. Usually, it is used for updating smart contract code in run-time. +/// Note that after execution of this primitive the current code +/// (and the stack of recursive function calls) won't change, +/// but any other function call will use a function from the new code. +() set_c3(cont c) asm "c3 POP"; -;;; Transforms a `slice` [s] into a simple ordinary continuation `c`, with `c.code = s` and an empty stack and savelist. -cont bless(slice s) impure asm "BLESS"; +/// Transforms a `slice` [s] into a simple ordinary continuation `c`, with `c.code = s` and an empty stack and savelist. +cont bless(slice s) pure asm "BLESS"; -{--- +/*** # Gas related primitives --} +*/ + +/// Sets current gas limit `gl` to its maximal allowed value `gm`, and resets the gas credit `gc` to zero, +/// decreasing the value of `gr` by `gc` in the process. +/// In other words, the current smart contract agrees to buy some gas to finish the current transaction. +/// This action is required to process external messages, which bring no value (hence no gas) with themselves. +/// +/// For more details check [accept_message effects](https://ton.org/docs/#/smart-contracts/accept). +() accept_message() asm "ACCEPT"; + +/// Sets current gas limit `gl` to the minimum of limit and `gm`, and resets the gas credit `gc` to zero. +/// If the gas consumed so far (including the present instruction) exceeds the resulting value of `gl`, +/// an (unhandled) out of gas exception is thrown before setting new gas limits. +/// Notice that [set_gas_limit] with an argument `limit ≥ 2^63 − 1` is equivalent to [accept_message]. +() set_gas_limit(int limit) asm "SETGASLIMIT"; -;;; Sets current gas limit `gl` to its maximal allowed value `gm`, and resets the gas credit `gc` to zero, -;;; decreasing the value of `gr` by `gc` in the process. -;;; In other words, the current smart contract agrees to buy some gas to finish the current transaction. -;;; This action is required to process external messages, which bring no value (hence no gas) with themselves. -;;; -;;; For more details check [accept_message effects](https://ton.org/docs/#/smart-contracts/accept). -() accept_message() impure asm "ACCEPT"; +/// Commits the current state of registers `c4` (“persistent data”) and `c5` (“actions”) +/// so that the current execution is considered “successful” with the saved values even if an exception +/// in Computation Phase is thrown later. +() commit() asm "COMMIT"; -;;; Sets current gas limit `gl` to the minimum of limit and `gm`, and resets the gas credit `gc` to zero. -;;; If the gas consumed so far (including the present instruction) exceeds the resulting value of `gl`, -;;; an (unhandled) out of gas exception is thrown before setting new gas limits. -;;; Notice that [set_gas_limit] with an argument `limit ≥ 2^63 − 1` is equivalent to [accept_message]. -() set_gas_limit(int limit) impure asm "SETGASLIMIT"; +/// Not implemented +//() buy_gas(int gram) asm "BUYGAS"; -;;; Commits the current state of registers `c4` (“persistent data”) and `c5` (“actions”) -;;; so that the current execution is considered “successful” with the saved values even if an exception -;;; in Computation Phase is thrown later. -() commit() impure asm "COMMIT"; +/// Computes the amount of gas that can be bought for `amount` nanoTONs, +/// and sets `gl` accordingly in the same way as [set_gas_limit]. +() buy_gas(int amount) asm "BUYGAS"; -;;; Not implemented -;;() buy_gas(int gram) impure asm "BUYGAS"; +/// Computes the minimum of two integers [x] and [y]. +int min(int x, int y) pure asm "MIN"; -;;; Computes the amount of gas that can be bought for `amount` nanoTONs, -;;; and sets `gl` accordingly in the same way as [set_gas_limit]. -() buy_gas(int amount) impure asm "BUYGAS"; +/// Computes the maximum of two integers [x] and [y]. +int max(int x, int y) pure asm "MAX"; -;;; Computes the minimum of two integers [x] and [y]. -int min(int x, int y) asm "MIN"; +/// Sorts two integers. +(int, int) minmax(int x, int y) pure asm "MINMAX"; -;;; Computes the maximum of two integers [x] and [y]. -int max(int x, int y) asm "MAX"; +/// Computes the absolute value of an integer [x]. +int abs(int x) pure asm "ABS"; -;;; Sorts two integers. -(int, int) minmax(int x, int y) asm "MINMAX"; +/// Computes the quotient and remainder of [x] / [y]. Example: divmod(112,3) = (37,1) +(int, int) divmod(int x, int y) pure builtin; -;;; Computes the absolute value of an integer [x]. -int abs(int x) asm "ABS"; +/// Computes the remainder and quotient of [x] / [y]. Example: moddiv(112,3) = (1,37) +(int, int) moddiv(int x, int y) pure builtin; -{- +/// Computes multiple-then-divide: floor([x] * [y] / [z]). +/// The intermediate result is stored in a 513-bit integer to prevent precision loss. +int muldiv(int x, int y, int z) pure builtin; + +/// Similar to `muldiv`, but rounds the result: round([x] * [y] / [z]). +int muldivr(int x, int y, int z) pure builtin; + +/// Similar to `muldiv`, but ceils the result: ceil([x] * [y] / [z]). +int muldivc(int x, int y, int z) pure builtin; + +/// Computes the quotient and remainder of ([x] * [y] / [z]). Example: muldivmod(112,3,10) = (33,6) +(int, int) muldivmod(int x, int y, int z) pure builtin; + +/*** # Slice primitives It is said that a primitive _loads_ some data, @@ -287,131 +343,129 @@ int abs(int x) asm "ABS"; (it can be used as non-modifying method). Unless otherwise stated, loading and preloading primitives read the data from a prefix of the slice. --} - +*/ -;;; Converts a `cell` [c] into a `slice`. Notice that [c] must be either an ordinary cell, -;;; or an exotic cell (see [TVM.pdf](https://ton-blockchain.github.io/docs/tvm.pdf), 3.1.2) -;;; which is automatically loaded to yield an ordinary cell `c'`, converted into a `slice` afterwards. -slice begin_parse(cell c) asm "CTOS"; -;;; Checks if [s] is empty. If not, throws an exception. -() end_parse(slice s) impure asm "ENDS"; +/// Converts a `cell` [c] into a `slice`. Notice that [c] must be either an ordinary cell, +/// or an exotic cell (see [TVM.pdf](https://ton-blockchain.github.io/docs/tvm.pdf), 3.1.2) +/// which is automatically loaded to yield an ordinary cell `c'`, converted into a `slice` afterwards. +slice begin_parse(cell c) pure asm "CTOS"; -;;; Loads the first reference from the slice. -(slice, cell) load_ref(slice s) asm( -> 1 0) "LDREF"; +/// Checks if [s] is empty. If not, throws an exception. +() end_parse(slice s) asm "ENDS"; -;;; Preloads the first reference from the slice. -cell preload_ref(slice s) asm "PLDREF"; +/// Loads the first reference from the slice. +(slice, cell) load_ref(slice s) pure asm( -> 1 0) "LDREF"; - {- Functions below are commented because are implemented on compilator level for optimisation -} +/// Preloads the first reference from the slice. +cell preload_ref(slice s) pure asm "PLDREF"; -;;; Loads a signed [len]-bit integer from a slice [s]. -;; (slice, int) ~load_int(slice s, int len) asm(s len -> 1 0) "LDIX"; +/// Loads a signed [len]-bit integer from a slice [s]. +(slice, int) load_int(slice s, int len) pure builtin; -;;; Loads an unsigned [len]-bit integer from a slice [s]. -;; (slice, int) ~load_uint(slice s, int len) asm( -> 1 0) "LDUX"; +/// Loads an unsigned [len]-bit integer from a slice [s]. +(slice, int) load_uint(slice s, int len) pure builtin; -;;; Preloads a signed [len]-bit integer from a slice [s]. -;; int preload_int(slice s, int len) asm "PLDIX"; +/// Preloads a signed [len]-bit integer from a slice [s]. +int preload_int(slice s, int len) pure builtin; -;;; Preloads an unsigned [len]-bit integer from a slice [s]. -;; int preload_uint(slice s, int len) asm "PLDUX"; +/// Preloads an unsigned [len]-bit integer from a slice [s]. +int preload_uint(slice s, int len) pure builtin; -;;; Loads the first `0 ≤ len ≤ 1023` bits from slice [s] into a separate `slice s''`. -;; (slice, slice) load_bits(slice s, int len) asm(s len -> 1 0) "LDSLICEX"; +/// Loads the first `0 ≤ len ≤ 1023` bits from slice [s] into a separate slice `s''`. +(slice, slice) load_bits(slice s, int len) pure builtin; -;;; Preloads the first `0 ≤ len ≤ 1023` bits from slice [s] into a separate `slice s''`. -;; slice preload_bits(slice s, int len) asm "PLDSLICEX"; +/// Preloads the first `0 ≤ len ≤ 1023` bits from slice [s] into a separate slice `s''`. +slice preload_bits(slice s, int len) pure builtin; -;;; Loads serialized amount of TonCoins (any unsigned integer up to `2^120 - 1`). -(slice, int) load_grams(slice s) asm( -> 1 0) "LDGRAMS"; -(slice, int) load_coins(slice s) asm( -> 1 0) "LDGRAMS"; +/// Loads serialized amount of TonCoins (any unsigned integer up to `2^120 - 1`). +(slice, int) load_grams(slice s) pure asm( -> 1 0) "LDGRAMS"; +(slice, int) load_coins(slice s) pure asm( -> 1 0) "LDGRAMS"; -;;; Returns all but the first `0 ≤ len ≤ 1023` bits of `slice` [s]. -slice skip_bits(slice s, int len) asm "SDSKIPFIRST"; -(slice, ()) ~skip_bits(slice s, int len) asm "SDSKIPFIRST"; +/// Returns all but the first `0 ≤ len ≤ 1023` bits of `slice` [s]. +slice skip_bits(slice s, int len) pure asm "SDSKIPFIRST"; +(slice, ()) ~skip_bits(slice s, int len) pure asm "SDSKIPFIRST"; -;;; Returns the first `0 ≤ len ≤ 1023` bits of `slice` [s]. -slice first_bits(slice s, int len) asm "SDCUTFIRST"; +/// Returns the first `0 ≤ len ≤ 1023` bits of `slice` [s]. +slice first_bits(slice s, int len) pure asm "SDCUTFIRST"; -;;; Returns all but the last `0 ≤ len ≤ 1023` bits of `slice` [s]. -slice skip_last_bits(slice s, int len) asm "SDSKIPLAST"; -(slice, ()) ~skip_last_bits(slice s, int len) asm "SDSKIPLAST"; +/// Returns all but the last `0 ≤ len ≤ 1023` bits of `slice` [s]. +slice skip_last_bits(slice s, int len) pure asm "SDSKIPLAST"; +(slice, ()) ~skip_last_bits(slice s, int len) pure asm "SDSKIPLAST"; -;;; Returns the last `0 ≤ len ≤ 1023` bits of `slice` [s]. -slice slice_last(slice s, int len) asm "SDCUTLAST"; +/// Returns the last `0 ≤ len ≤ 1023` bits of `slice` [s]. +slice slice_last(slice s, int len) pure asm "SDCUTLAST"; -;;; Loads a dictionary `D` (HashMapE) from `slice` [s]. -;;; (returns `null` if `nothing` constructor is used). -(slice, cell) load_dict(slice s) asm( -> 1 0) "LDDICT"; +/// Loads a dictionary `D` (HashMapE) from `slice` [s]. +/// (returns `null` if `nothing` constructor is used). +(slice, cell) load_dict(slice s) pure asm( -> 1 0) "LDDICT"; -;;; Preloads a dictionary `D` from `slice` [s]. -cell preload_dict(slice s) asm "PLDDICT"; +/// Preloads a dictionary `D` from `slice` [s]. +cell preload_dict(slice s) pure asm "PLDDICT"; -;;; Loads a dictionary as [load_dict], but returns only the remainder of the slice. -slice skip_dict(slice s) asm "SKIPDICT"; +/// Loads a dictionary as [load_dict], but returns only the remainder of the slice. +slice skip_dict(slice s) pure asm "SKIPDICT"; -;;; Loads (Maybe ^Cell) from `slice` [s]. -;;; In other words loads 1 bit and if it is true -;;; loads first ref and return it with slice remainder -;;; otherwise returns `null` and slice remainder -(slice, cell) load_maybe_ref(slice s) asm( -> 1 0) "LDOPTREF"; +/// Loads (Maybe ^Cell) from `slice` [s]. +/// In other words loads 1 bit and if it is true +/// loads first ref and return it with slice remainder +/// otherwise returns `null` and slice remainder +(slice, cell) load_maybe_ref(slice s) pure asm( -> 1 0) "LDOPTREF"; -;;; Preloads (Maybe ^Cell) from `slice` [s]. -cell preload_maybe_ref(slice s) asm "PLDOPTREF"; +/// Preloads (Maybe ^Cell) from `slice` [s]. +cell preload_maybe_ref(slice s) pure asm "PLDOPTREF"; -;;; Returns the depth of `cell` [c]. -;;; If [c] has no references, then return `0`; -;;; otherwise the returned value is one plus the maximum of depths of cells referred to from [c]. -;;; If [c] is a `null` instead of a cell, returns zero. -int cell_depth(cell c) asm "CDEPTH"; +/// Returns the depth of `cell` [c]. +/// If [c] has no references, then return `0`; +/// otherwise the returned value is one plus the maximum of depths of cells referred to from [c]. +/// If [c] is a `null` instead of a cell, returns zero. +int cell_depth(cell c) pure asm "CDEPTH"; -{- +/*** # Slice size primitives --} +*/ -;;; Returns the number of references in `slice` [s]. -int slice_refs(slice s) asm "SREFS"; +/// Returns the number of references in `slice` [s]. +int slice_refs(slice s) pure asm "SREFS"; -;;; Returns the number of data bits in `slice` [s]. -int slice_bits(slice s) asm "SBITS"; +/// Returns the number of data bits in `slice` [s]. +int slice_bits(slice s) pure asm "SBITS"; -;;; Returns both the number of data bits and the number of references in `slice` [s]. -(int, int) slice_bits_refs(slice s) asm "SBITREFS"; +/// Returns both the number of data bits and the number of references in `slice` [s]. +(int, int) slice_bits_refs(slice s) pure asm "SBITREFS"; -;;; Checks whether a `slice` [s] is empty (i.e., contains no bits of data and no cell references). -int slice_empty?(slice s) asm "SEMPTY"; +/// Checks whether a `slice` [s] is empty (i.e., contains no bits of data and no cell references). +int slice_empty?(slice s) pure asm "SEMPTY"; -;;; Checks whether `slice` [s] has no bits of data. -int slice_data_empty?(slice s) asm "SDEMPTY"; +/// Checks whether `slice` [s] has no bits of data. +int slice_data_empty?(slice s) pure asm "SDEMPTY"; -;;; Checks whether `slice` [s] has no references. -int slice_refs_empty?(slice s) asm "SREMPTY"; +/// Checks whether `slice` [s] has no references. +int slice_refs_empty?(slice s) pure asm "SREMPTY"; -;;; Returns the depth of `slice` [s]. -;;; If [s] has no references, then returns `0`; -;;; otherwise the returned value is one plus the maximum of depths of cells referred to from [s]. -int slice_depth(slice s) asm "SDEPTH"; +/// Returns the depth of `slice` [s]. +/// If [s] has no references, then returns `0`; +/// otherwise the returned value is one plus the maximum of depths of cells referred to from [s]. +int slice_depth(slice s) pure asm "SDEPTH"; -{- +/*** # Builder size primitives --} +*/ -;;; Returns the number of cell references already stored in `builder` [b] -int builder_refs(builder b) asm "BREFS"; +/// Returns the number of cell references already stored in `builder` [b] +int builder_refs(builder b) pure asm "BREFS"; -;;; Returns the number of data bits already stored in `builder` [b]. -int builder_bits(builder b) asm "BBITS"; +/// Returns the number of data bits already stored in `builder` [b]. +int builder_bits(builder b) pure asm "BBITS"; -;;; Returns the depth of `builder` [b]. -;;; If no cell references are stored in [b], then returns 0; -;;; otherwise the returned value is one plus the maximum of depths of cells referred to from [b]. -int builder_depth(builder b) asm "BDEPTH"; +/// Returns the depth of `builder` [b]. +/// If no cell references are stored in [b], then returns 0; +/// otherwise the returned value is one plus the maximum of depths of cells referred to from [b]. +int builder_depth(builder b) pure asm "BDEPTH"; -{- +/*** # Builder primitives It is said that a primitive _stores_ a value `x` into a builder `b` if it returns a modified version of the builder `b'` with the value `x` stored at the end of it. @@ -419,48 +473,47 @@ int builder_depth(builder b) asm "BDEPTH"; All the primitives below first check whether there is enough space in the `builder`, and only then check the range of the value being serialized. --} - -;;; Creates a new empty `builder`. -builder begin_cell() asm "NEWC"; +*/ -;;; Converts a `builder` into an ordinary `cell`. -cell end_cell(builder b) asm "ENDC"; +/// Creates a new empty `builder`. +builder begin_cell() pure asm "NEWC"; -;;; Stores a reference to `cell` [c] into `builder` [b]. -builder store_ref(builder b, cell c) asm(c b) "STREF"; +/// Converts a `builder` into an ordinary `cell`. +cell end_cell(builder b) pure asm "ENDC"; -;;; Stores an unsigned [len]-bit integer `x` into `b` for `0 ≤ len ≤ 256`. -;; builder store_uint(builder b, int x, int len) asm(x b len) "STUX"; +/// Stores a reference to `cell` [c] into `builder` [b]. +builder store_ref(builder b, cell c) pure asm(c b) "STREF"; -;;; Stores a signed [len]-bit integer `x` into `b` for` 0 ≤ len ≤ 257`. -;; builder store_int(builder b, int x, int len) asm(x b len) "STIX"; +/// Stores an unsigned [len]-bit integer `x` into `b` for `0 ≤ len ≤ 256`. +builder store_uint(builder b, int x, int len) pure builtin; +/// Stores a signed [len]-bit integer `x` into `b` for `0 ≤ len ≤ 257`. +builder store_int(builder b, int x, int len) pure builtin; -;;; Stores `slice` [s] into `builder` [b] -builder store_slice(builder b, slice s) asm "STSLICER"; +/// Stores `slice` [s] into `builder` [b]. +builder store_slice(builder b, slice s) pure asm "STSLICER"; -;;; Stores (serializes) an integer [x] in the range `0..2^120 − 1` into `builder` [b]. -;;; The serialization of [x] consists of a 4-bit unsigned big-endian integer `l`, -;;; which is the smallest integer `l ≥ 0`, such that `x < 2^8l`, -;;; followed by an `8l`-bit unsigned big-endian representation of [x]. -;;; If [x] does not belong to the supported range, a range check exception is thrown. -;;; -;;; Store amounts of TonCoins to the builder as VarUInteger 16 -builder store_grams(builder b, int x) asm "STGRAMS"; -builder store_coins(builder b, int x) asm "STGRAMS"; +/// Stores (serializes) an integer [x] in the range `0..2^120 − 1` into `builder` [b]. +/// The serialization of [x] consists of a 4-bit unsigned big-endian integer `l`, +/// which is the smallest integer `l ≥ 0`, such that `x < 2^8l`, +/// followed by an `8l`-bit unsigned big-endian representation of [x]. +/// If [x] does not belong to the supported range, a range check exception is thrown. +/// +/// Store amounts of TonCoins to the builder as VarUInteger 16 +builder store_grams(builder b, int x) pure asm "STGRAMS"; +builder store_coins(builder b, int x) pure asm "STGRAMS"; -;;; Stores dictionary `D` represented by `cell` [c] or `null` into `builder` [b]. -;;; In other words, stores a `1`-bit and a reference to [c] if [c] is not `null` and `0`-bit otherwise. -builder store_dict(builder b, cell c) asm(c b) "STDICT"; +/// Stores dictionary `D` represented by `cell` [c] or `null` into `builder` [b]. +/// In other words, stores a `1`-bit and a reference to [c] if [c] is not `null` and `0`-bit otherwise. +builder store_dict(builder b, cell c) pure asm(c b) "STDICT"; -;;; Stores (Maybe ^Cell) to builder: -;;; if cell is null store 1 zero bit -;;; otherwise store 1 true bit and ref to cell -builder store_maybe_ref(builder b, cell c) asm(c b) "STOPTREF"; +/// Stores (Maybe ^Cell) to builder: +/// if cell is null store 1 zero bit +/// otherwise store 1 true bit and ref to cell +builder store_maybe_ref(builder b, cell c) pure asm(c b) "STOPTREF"; -{- +/*** # Address manipulation primitives The address manipulation primitives listed below serialize and deserialize values according to the following TL-B scheme: ```TL-B @@ -495,144 +548,144 @@ builder store_maybe_ref(builder b, cell c) asm(c b) "STOPTREF"; Next, integer `x` is the `workchain_id`, and slice `s` contains the address. - `addr_var` is represented by `t = (3, u, x, s)`, where `u`, `x`, and `s` have the same meaning as for `addr_std`. --} +*/ -;;; Loads from slice [s] the only prefix that is a valid `MsgAddress`, -;;; and returns both this prefix `s'` and the remainder `s''` of [s] as slices. -(slice, slice) load_msg_addr(slice s) asm( -> 1 0) "LDMSGADDR"; +/// Loads from slice [s] the only prefix that is a valid `MsgAddress`, +/// and returns both this prefix `s'` and the remainder `s''` of [s] as slices. +(slice, slice) load_msg_addr(slice s) pure asm( -> 1 0) "LDMSGADDR"; -;;; Decomposes slice [s] containing a valid `MsgAddress` into a `tuple t` with separate fields of this `MsgAddress`. -;;; If [s] is not a valid `MsgAddress`, a cell deserialization exception is thrown. -tuple parse_addr(slice s) asm "PARSEMSGADDR"; +/// Decomposes slice [s] containing a valid `MsgAddress` into a `tuple t` with separate fields of this `MsgAddress`. +/// If [s] is not a valid `MsgAddress`, a cell deserialization exception is thrown. +tuple parse_addr(slice s) pure asm "PARSEMSGADDR"; -;;; Parses slice [s] containing a valid `MsgAddressInt` (usually a `msg_addr_std`), -;;; applies rewriting from the anycast (if present) to the same-length prefix of the address, -;;; and returns both the workchain and the 256-bit address as integers. -;;; If the address is not 256-bit, or if [s] is not a valid serialization of `MsgAddressInt`, -;;; throws a cell deserialization exception. -(int, int) parse_std_addr(slice s) asm "REWRITESTDADDR"; +/// Parses slice [s] containing a valid `MsgAddressInt` (usually a `msg_addr_std`), +/// applies rewriting from the anycast (if present) to the same-length prefix of the address, +/// and returns both the workchain and the 256-bit address as integers. +/// If the address is not 256-bit, or if [s] is not a valid serialization of `MsgAddressInt`, +/// throws a cell deserialization exception. +(int, int) parse_std_addr(slice s) pure asm "REWRITESTDADDR"; -;;; A variant of [parse_std_addr] that returns the (rewritten) address as a slice [s], -;;; even if it is not exactly 256 bit long (represented by a `msg_addr_var`). -(int, slice) parse_var_addr(slice s) asm "REWRITEVARADDR"; +/// A variant of [parse_std_addr] that returns the (rewritten) address as a slice [s], +/// even if it is not exactly 256 bit long (represented by a `msg_addr_var`). +(int, slice) parse_var_addr(slice s) pure asm "REWRITEVARADDR"; -{- +/*** # Dictionary primitives --} - - -;;; Sets the value associated with [key_len]-bit key signed index in dictionary [dict] to [value] (cell), -;;; and returns the resulting dictionary. -cell idict_set_ref(cell dict, int key_len, int index, cell value) asm(value index dict key_len) "DICTISETREF"; -(cell, ()) ~idict_set_ref(cell dict, int key_len, int index, cell value) asm(value index dict key_len) "DICTISETREF"; - -;;; Sets the value associated with [key_len]-bit key unsigned index in dictionary [dict] to [value] (cell), -;;; and returns the resulting dictionary. -cell udict_set_ref(cell dict, int key_len, int index, cell value) asm(value index dict key_len) "DICTUSETREF"; -(cell, ()) ~udict_set_ref(cell dict, int key_len, int index, cell value) asm(value index dict key_len) "DICTUSETREF"; - -cell idict_get_ref(cell dict, int key_len, int index) asm(index dict key_len) "DICTIGETOPTREF"; -(cell, int) idict_get_ref?(cell dict, int key_len, int index) asm(index dict key_len) "DICTIGETREF" "NULLSWAPIFNOT"; -(cell, int) udict_get_ref?(cell dict, int key_len, int index) asm(index dict key_len) "DICTUGETREF" "NULLSWAPIFNOT"; -(cell, cell) idict_set_get_ref(cell dict, int key_len, int index, cell value) asm(value index dict key_len) "DICTISETGETOPTREF"; -(cell, cell) udict_set_get_ref(cell dict, int key_len, int index, cell value) asm(value index dict key_len) "DICTUSETGETOPTREF"; -(cell, int) idict_delete?(cell dict, int key_len, int index) asm(index dict key_len) "DICTIDEL"; -(cell, int) udict_delete?(cell dict, int key_len, int index) asm(index dict key_len) "DICTUDEL"; -(slice, int) idict_get?(cell dict, int key_len, int index) asm(index dict key_len) "DICTIGET" "NULLSWAPIFNOT"; -(slice, int) udict_get?(cell dict, int key_len, int index) asm(index dict key_len) "DICTUGET" "NULLSWAPIFNOT"; -(cell, slice, int) idict_delete_get?(cell dict, int key_len, int index) asm(index dict key_len) "DICTIDELGET" "NULLSWAPIFNOT"; -(cell, slice, int) udict_delete_get?(cell dict, int key_len, int index) asm(index dict key_len) "DICTUDELGET" "NULLSWAPIFNOT"; -(cell, (slice, int)) ~idict_delete_get?(cell dict, int key_len, int index) asm(index dict key_len) "DICTIDELGET" "NULLSWAPIFNOT"; -(cell, (slice, int)) ~udict_delete_get?(cell dict, int key_len, int index) asm(index dict key_len) "DICTUDELGET" "NULLSWAPIFNOT"; -cell udict_set(cell dict, int key_len, int index, slice value) asm(value index dict key_len) "DICTUSET"; -(cell, ()) ~udict_set(cell dict, int key_len, int index, slice value) asm(value index dict key_len) "DICTUSET"; -cell idict_set(cell dict, int key_len, int index, slice value) asm(value index dict key_len) "DICTISET"; -(cell, ()) ~idict_set(cell dict, int key_len, int index, slice value) asm(value index dict key_len) "DICTISET"; -cell dict_set(cell dict, int key_len, slice index, slice value) asm(value index dict key_len) "DICTSET"; -(cell, ()) ~dict_set(cell dict, int key_len, slice index, slice value) asm(value index dict key_len) "DICTSET"; -(cell, int) udict_add?(cell dict, int key_len, int index, slice value) asm(value index dict key_len) "DICTUADD"; -(cell, int) udict_replace?(cell dict, int key_len, int index, slice value) asm(value index dict key_len) "DICTUREPLACE"; -(cell, int) idict_add?(cell dict, int key_len, int index, slice value) asm(value index dict key_len) "DICTIADD"; -(cell, int) idict_replace?(cell dict, int key_len, int index, slice value) asm(value index dict key_len) "DICTIREPLACE"; -cell udict_set_builder(cell dict, int key_len, int index, builder value) asm(value index dict key_len) "DICTUSETB"; -(cell, ()) ~udict_set_builder(cell dict, int key_len, int index, builder value) asm(value index dict key_len) "DICTUSETB"; -cell idict_set_builder(cell dict, int key_len, int index, builder value) asm(value index dict key_len) "DICTISETB"; -(cell, ()) ~idict_set_builder(cell dict, int key_len, int index, builder value) asm(value index dict key_len) "DICTISETB"; -cell dict_set_builder(cell dict, int key_len, slice index, builder value) asm(value index dict key_len) "DICTSETB"; -(cell, ()) ~dict_set_builder(cell dict, int key_len, slice index, builder value) asm(value index dict key_len) "DICTSETB"; -(cell, int) udict_add_builder?(cell dict, int key_len, int index, builder value) asm(value index dict key_len) "DICTUADDB"; -(cell, int) udict_replace_builder?(cell dict, int key_len, int index, builder value) asm(value index dict key_len) "DICTUREPLACEB"; -(cell, int) idict_add_builder?(cell dict, int key_len, int index, builder value) asm(value index dict key_len) "DICTIADDB"; -(cell, int) idict_replace_builder?(cell dict, int key_len, int index, builder value) asm(value index dict key_len) "DICTIREPLACEB"; -(cell, int, slice, int) udict_delete_get_min(cell dict, int key_len) asm(-> 0 2 1 3) "DICTUREMMIN" "NULLSWAPIFNOT2"; -(cell, (int, slice, int)) ~udict::delete_get_min(cell dict, int key_len) asm(-> 0 2 1 3) "DICTUREMMIN" "NULLSWAPIFNOT2"; -(cell, int, slice, int) idict_delete_get_min(cell dict, int key_len) asm(-> 0 2 1 3) "DICTIREMMIN" "NULLSWAPIFNOT2"; -(cell, (int, slice, int)) ~idict::delete_get_min(cell dict, int key_len) asm(-> 0 2 1 3) "DICTIREMMIN" "NULLSWAPIFNOT2"; -(cell, slice, slice, int) dict_delete_get_min(cell dict, int key_len) asm(-> 0 2 1 3) "DICTREMMIN" "NULLSWAPIFNOT2"; -(cell, (slice, slice, int)) ~dict::delete_get_min(cell dict, int key_len) asm(-> 0 2 1 3) "DICTREMMIN" "NULLSWAPIFNOT2"; -(cell, int, slice, int) udict_delete_get_max(cell dict, int key_len) asm(-> 0 2 1 3) "DICTUREMMAX" "NULLSWAPIFNOT2"; -(cell, (int, slice, int)) ~udict::delete_get_max(cell dict, int key_len) asm(-> 0 2 1 3) "DICTUREMMAX" "NULLSWAPIFNOT2"; -(cell, int, slice, int) idict_delete_get_max(cell dict, int key_len) asm(-> 0 2 1 3) "DICTIREMMAX" "NULLSWAPIFNOT2"; -(cell, (int, slice, int)) ~idict::delete_get_max(cell dict, int key_len) asm(-> 0 2 1 3) "DICTIREMMAX" "NULLSWAPIFNOT2"; -(cell, slice, slice, int) dict_delete_get_max(cell dict, int key_len) asm(-> 0 2 1 3) "DICTREMMAX" "NULLSWAPIFNOT2"; -(cell, (slice, slice, int)) ~dict::delete_get_max(cell dict, int key_len) asm(-> 0 2 1 3) "DICTREMMAX" "NULLSWAPIFNOT2"; -(int, slice, int) udict_get_min?(cell dict, int key_len) asm (-> 1 0 2) "DICTUMIN" "NULLSWAPIFNOT2"; -(int, slice, int) udict_get_max?(cell dict, int key_len) asm (-> 1 0 2) "DICTUMAX" "NULLSWAPIFNOT2"; -(int, cell, int) udict_get_min_ref?(cell dict, int key_len) asm (-> 1 0 2) "DICTUMINREF" "NULLSWAPIFNOT2"; -(int, cell, int) udict_get_max_ref?(cell dict, int key_len) asm (-> 1 0 2) "DICTUMAXREF" "NULLSWAPIFNOT2"; -(int, slice, int) idict_get_min?(cell dict, int key_len) asm (-> 1 0 2) "DICTIMIN" "NULLSWAPIFNOT2"; -(int, slice, int) idict_get_max?(cell dict, int key_len) asm (-> 1 0 2) "DICTIMAX" "NULLSWAPIFNOT2"; -(int, cell, int) idict_get_min_ref?(cell dict, int key_len) asm (-> 1 0 2) "DICTIMINREF" "NULLSWAPIFNOT2"; -(int, cell, int) idict_get_max_ref?(cell dict, int key_len) asm (-> 1 0 2) "DICTIMAXREF" "NULLSWAPIFNOT2"; -(int, slice, int) udict_get_next?(cell dict, int key_len, int pivot) asm(pivot dict key_len -> 1 0 2) "DICTUGETNEXT" "NULLSWAPIFNOT2"; -(int, slice, int) udict_get_nexteq?(cell dict, int key_len, int pivot) asm(pivot dict key_len -> 1 0 2) "DICTUGETNEXTEQ" "NULLSWAPIFNOT2"; -(int, slice, int) udict_get_prev?(cell dict, int key_len, int pivot) asm(pivot dict key_len -> 1 0 2) "DICTUGETPREV" "NULLSWAPIFNOT2"; -(int, slice, int) udict_get_preveq?(cell dict, int key_len, int pivot) asm(pivot dict key_len -> 1 0 2) "DICTUGETPREVEQ" "NULLSWAPIFNOT2"; -(int, slice, int) idict_get_next?(cell dict, int key_len, int pivot) asm(pivot dict key_len -> 1 0 2) "DICTIGETNEXT" "NULLSWAPIFNOT2"; -(int, slice, int) idict_get_nexteq?(cell dict, int key_len, int pivot) asm(pivot dict key_len -> 1 0 2) "DICTIGETNEXTEQ" "NULLSWAPIFNOT2"; -(int, slice, int) idict_get_prev?(cell dict, int key_len, int pivot) asm(pivot dict key_len -> 1 0 2) "DICTIGETPREV" "NULLSWAPIFNOT2"; -(int, slice, int) idict_get_preveq?(cell dict, int key_len, int pivot) asm(pivot dict key_len -> 1 0 2) "DICTIGETPREVEQ" "NULLSWAPIFNOT2"; - -;;; Creates an empty dictionary, which is actually a null value. Equivalent to PUSHNULL -cell new_dict() asm "NEWDICT"; -;;; Checks whether a dictionary is empty. Equivalent to cell_null?. -int dict_empty?(cell c) asm "DICTEMPTY"; - - -{- Prefix dictionary primitives -} -(slice, slice, slice, int) pfxdict_get?(cell dict, int key_len, slice key) asm(key dict key_len) "PFXDICTGETQ" "NULLSWAPIFNOT2"; -(cell, int) pfxdict_set?(cell dict, int key_len, slice key, slice value) asm(value key dict key_len) "PFXDICTSET"; -(cell, int) pfxdict_delete?(cell dict, int key_len, slice key) asm(key dict key_len) "PFXDICTDEL"; - -;;; Returns the value of the global configuration parameter with integer index `i` as a `cell` or `null` value. -cell config_param(int x) asm "CONFIGOPTPARAM"; -;;; Checks whether c is a null. Note, that Tolk also has polymorphic null? built-in. -int cell_null?(cell c) asm "ISNULL"; - -;;; Creates an output action which would reserve exactly amount nanotoncoins (if mode = 0), at most amount nanotoncoins (if mode = 2), or all but amount nanotoncoins (if mode = 1 or mode = 3), from the remaining balance of the account. It is roughly equivalent to creating an outbound message carrying amount nanotoncoins (or b − amount nanotoncoins, where b is the remaining balance) to oneself, so that the subsequent output actions would not be able to spend more money than the remainder. Bit +2 in mode means that the external action does not fail if the specified amount cannot be reserved; instead, all remaining balance is reserved. Bit +8 in mode means `amount <- -amount` before performing any further actions. Bit +4 in mode means that amount is increased by the original balance of the current account (before the compute phase), including all extra currencies, before performing any other checks and actions. Currently, amount must be a non-negative integer, and mode must be in the range 0..15. -() raw_reserve(int amount, int mode) impure asm "RAWRESERVE"; -;;; Similar to raw_reserve, but also accepts a dictionary extra_amount (represented by a cell or null) with extra currencies. In this way currencies other than TonCoin can be reserved. -() raw_reserve_extra(int amount, cell extra_amount, int mode) impure asm "RAWRESERVEX"; -;;; Sends a raw message contained in msg, which should contain a correctly serialized object Message X, with the only exception that the source address is allowed to have dummy value addr_none (to be automatically replaced with the current smart contract address), and ihr_fee, fwd_fee, created_lt and created_at fields can have arbitrary values (to be rewritten with correct values during the action phase of the current transaction). Integer parameter mode contains the flags. Currently mode = 0 is used for ordinary messages; mode = 128 is used for messages that are to carry all the remaining balance of the current smart contract (instead of the value originally indicated in the message); mode = 64 is used for messages that carry all the remaining value of the inbound message in addition to the value initially indicated in the new message (if bit 0 is not set, the gas fees are deducted from this amount); mode' = mode + 1 means that the sender wants to pay transfer fees separately; mode' = mode + 2 means that any errors arising while processing this message during the action phase should be ignored. Finally, mode' = mode + 32 means that the current account must be destroyed if its resulting balance is zero. This flag is usually employed together with +128. -() send_raw_message(cell msg, int mode) impure asm "SENDRAWMSG"; -;;; Creates an output action that would change this smart contract code to that given by cell new_code. Notice that this change will take effect only after the successful termination of the current run of the smart contract -() set_code(cell new_code) impure asm "SETCODE"; - -;;; Generates a new pseudo-random unsigned 256-bit integer x. The algorithm is as follows: if r is the old value of the random seed, considered as a 32-byte array (by constructing the big-endian representation of an unsigned 256-bit integer), then its sha512(r) is computed; the first 32 bytes of this hash are stored as the new value r' of the random seed, and the remaining 32 bytes are returned as the next random value x. -int random() impure asm "RANDU256"; -;;; Generates a new pseudo-random integer z in the range 0..range−1 (or range..−1, if range < 0). More precisely, an unsigned random value x is generated as in random; then z := x * range / 2^256 is computed. -int rand(int range) impure asm "RAND"; -;;; Returns the current random seed as an unsigned 256-bit Integer. -int get_seed() impure asm "RANDSEED"; -;;; Sets the random seed to unsigned 256-bit seed. -() set_seed(int) impure asm "SETRAND"; -;;; Mixes unsigned 256-bit integer x into the random seed r by setting the random seed to sha256 of the concatenation of two 32-byte strings: the first with the big-endian representation of the old seed r, and the second with the big-endian representation of x. -() randomize(int x) impure asm "ADDRAND"; -;;; Equivalent to randomize(cur_lt());. -() randomize_lt() impure asm "LTIME" "ADDRAND"; - -;;; Checks whether the data parts of two slices coinside -int equal_slice_bits (slice a, slice b) asm "SDEQ"; - -;;; Concatenates two builders -builder store_builder(builder to, builder from) asm "STBR"; +*/ + + +/// Sets the value associated with [key_len]-bit key signed index in dictionary [dict] to [value] (cell), +/// and returns the resulting dictionary. +cell idict_set_ref(cell dict, int key_len, int index, cell value) pure asm(value index dict key_len) "DICTISETREF"; +(cell, ()) ~idict_set_ref(cell dict, int key_len, int index, cell value) pure asm(value index dict key_len) "DICTISETREF"; + +/// Sets the value associated with [key_len]-bit key unsigned index in dictionary [dict] to [value] (cell), +/// and returns the resulting dictionary. +cell udict_set_ref(cell dict, int key_len, int index, cell value) pure asm(value index dict key_len) "DICTUSETREF"; +(cell, ()) ~udict_set_ref(cell dict, int key_len, int index, cell value) pure asm(value index dict key_len) "DICTUSETREF"; + +cell idict_get_ref(cell dict, int key_len, int index) pure asm(index dict key_len) "DICTIGETOPTREF"; +(cell, int) idict_get_ref?(cell dict, int key_len, int index) pure asm(index dict key_len) "DICTIGETREF" "NULLSWAPIFNOT"; +(cell, int) udict_get_ref?(cell dict, int key_len, int index) pure asm(index dict key_len) "DICTUGETREF" "NULLSWAPIFNOT"; +(cell, cell) idict_set_get_ref(cell dict, int key_len, int index, cell value) pure asm(value index dict key_len) "DICTISETGETOPTREF"; +(cell, cell) udict_set_get_ref(cell dict, int key_len, int index, cell value) pure asm(value index dict key_len) "DICTUSETGETOPTREF"; +(cell, int) idict_delete?(cell dict, int key_len, int index) pure asm(index dict key_len) "DICTIDEL"; +(cell, int) udict_delete?(cell dict, int key_len, int index) pure asm(index dict key_len) "DICTUDEL"; +(slice, int) idict_get?(cell dict, int key_len, int index) pure asm(index dict key_len) "DICTIGET" "NULLSWAPIFNOT"; +(slice, int) udict_get?(cell dict, int key_len, int index) pure asm(index dict key_len) "DICTUGET" "NULLSWAPIFNOT"; +(cell, slice, int) idict_delete_get?(cell dict, int key_len, int index) pure asm(index dict key_len) "DICTIDELGET" "NULLSWAPIFNOT"; +(cell, slice, int) udict_delete_get?(cell dict, int key_len, int index) pure asm(index dict key_len) "DICTUDELGET" "NULLSWAPIFNOT"; +(cell, (slice, int)) ~idict_delete_get?(cell dict, int key_len, int index) pure asm(index dict key_len) "DICTIDELGET" "NULLSWAPIFNOT"; +(cell, (slice, int)) ~udict_delete_get?(cell dict, int key_len, int index) pure asm(index dict key_len) "DICTUDELGET" "NULLSWAPIFNOT"; +cell udict_set(cell dict, int key_len, int index, slice value) pure asm(value index dict key_len) "DICTUSET"; +(cell, ()) ~udict_set(cell dict, int key_len, int index, slice value) pure asm(value index dict key_len) "DICTUSET"; +cell idict_set(cell dict, int key_len, int index, slice value) pure asm(value index dict key_len) "DICTISET"; +(cell, ()) ~idict_set(cell dict, int key_len, int index, slice value) pure asm(value index dict key_len) "DICTISET"; +cell dict_set(cell dict, int key_len, slice index, slice value) pure asm(value index dict key_len) "DICTSET"; +(cell, ()) ~dict_set(cell dict, int key_len, slice index, slice value) pure asm(value index dict key_len) "DICTSET"; +(cell, int) udict_add?(cell dict, int key_len, int index, slice value) pure asm(value index dict key_len) "DICTUADD"; +(cell, int) udict_replace?(cell dict, int key_len, int index, slice value) pure asm(value index dict key_len) "DICTUREPLACE"; +(cell, int) idict_add?(cell dict, int key_len, int index, slice value) pure asm(value index dict key_len) "DICTIADD"; +(cell, int) idict_replace?(cell dict, int key_len, int index, slice value) pure asm(value index dict key_len) "DICTIREPLACE"; +cell udict_set_builder(cell dict, int key_len, int index, builder value) pure asm(value index dict key_len) "DICTUSETB"; +(cell, ()) ~udict_set_builder(cell dict, int key_len, int index, builder value) pure asm(value index dict key_len) "DICTUSETB"; +cell idict_set_builder(cell dict, int key_len, int index, builder value) pure asm(value index dict key_len) "DICTISETB"; +(cell, ()) ~idict_set_builder(cell dict, int key_len, int index, builder value) pure asm(value index dict key_len) "DICTISETB"; +cell dict_set_builder(cell dict, int key_len, slice index, builder value) pure asm(value index dict key_len) "DICTSETB"; +(cell, ()) ~dict_set_builder(cell dict, int key_len, slice index, builder value) pure asm(value index dict key_len) "DICTSETB"; +(cell, int) udict_add_builder?(cell dict, int key_len, int index, builder value) pure asm(value index dict key_len) "DICTUADDB"; +(cell, int) udict_replace_builder?(cell dict, int key_len, int index, builder value) pure asm(value index dict key_len) "DICTUREPLACEB"; +(cell, int) idict_add_builder?(cell dict, int key_len, int index, builder value) pure asm(value index dict key_len) "DICTIADDB"; +(cell, int) idict_replace_builder?(cell dict, int key_len, int index, builder value) pure asm(value index dict key_len) "DICTIREPLACEB"; +(cell, int, slice, int) udict_delete_get_min(cell dict, int key_len) pure asm(-> 0 2 1 3) "DICTUREMMIN" "NULLSWAPIFNOT2"; +(cell, (int, slice, int)) ~udict::delete_get_min(cell dict, int key_len) pure asm(-> 0 2 1 3) "DICTUREMMIN" "NULLSWAPIFNOT2"; +(cell, int, slice, int) idict_delete_get_min(cell dict, int key_len) pure asm(-> 0 2 1 3) "DICTIREMMIN" "NULLSWAPIFNOT2"; +(cell, (int, slice, int)) ~idict::delete_get_min(cell dict, int key_len) pure asm(-> 0 2 1 3) "DICTIREMMIN" "NULLSWAPIFNOT2"; +(cell, slice, slice, int) dict_delete_get_min(cell dict, int key_len) pure asm(-> 0 2 1 3) "DICTREMMIN" "NULLSWAPIFNOT2"; +(cell, (slice, slice, int)) ~dict::delete_get_min(cell dict, int key_len) pure asm(-> 0 2 1 3) "DICTREMMIN" "NULLSWAPIFNOT2"; +(cell, int, slice, int) udict_delete_get_max(cell dict, int key_len) pure asm(-> 0 2 1 3) "DICTUREMMAX" "NULLSWAPIFNOT2"; +(cell, (int, slice, int)) ~udict::delete_get_max(cell dict, int key_len) pure asm(-> 0 2 1 3) "DICTUREMMAX" "NULLSWAPIFNOT2"; +(cell, int, slice, int) idict_delete_get_max(cell dict, int key_len) pure asm(-> 0 2 1 3) "DICTIREMMAX" "NULLSWAPIFNOT2"; +(cell, (int, slice, int)) ~idict::delete_get_max(cell dict, int key_len) pure asm(-> 0 2 1 3) "DICTIREMMAX" "NULLSWAPIFNOT2"; +(cell, slice, slice, int) dict_delete_get_max(cell dict, int key_len) pure asm(-> 0 2 1 3) "DICTREMMAX" "NULLSWAPIFNOT2"; +(cell, (slice, slice, int)) ~dict::delete_get_max(cell dict, int key_len) pure asm(-> 0 2 1 3) "DICTREMMAX" "NULLSWAPIFNOT2"; +(int, slice, int) udict_get_min?(cell dict, int key_len) pure asm (-> 1 0 2) "DICTUMIN" "NULLSWAPIFNOT2"; +(int, slice, int) udict_get_max?(cell dict, int key_len) pure asm (-> 1 0 2) "DICTUMAX" "NULLSWAPIFNOT2"; +(int, cell, int) udict_get_min_ref?(cell dict, int key_len) pure asm (-> 1 0 2) "DICTUMINREF" "NULLSWAPIFNOT2"; +(int, cell, int) udict_get_max_ref?(cell dict, int key_len) pure asm (-> 1 0 2) "DICTUMAXREF" "NULLSWAPIFNOT2"; +(int, slice, int) idict_get_min?(cell dict, int key_len) pure asm (-> 1 0 2) "DICTIMIN" "NULLSWAPIFNOT2"; +(int, slice, int) idict_get_max?(cell dict, int key_len) pure asm (-> 1 0 2) "DICTIMAX" "NULLSWAPIFNOT2"; +(int, cell, int) idict_get_min_ref?(cell dict, int key_len) pure asm (-> 1 0 2) "DICTIMINREF" "NULLSWAPIFNOT2"; +(int, cell, int) idict_get_max_ref?(cell dict, int key_len) pure asm (-> 1 0 2) "DICTIMAXREF" "NULLSWAPIFNOT2"; +(int, slice, int) udict_get_next?(cell dict, int key_len, int pivot) pure asm(pivot dict key_len -> 1 0 2) "DICTUGETNEXT" "NULLSWAPIFNOT2"; +(int, slice, int) udict_get_nexteq?(cell dict, int key_len, int pivot) pure asm(pivot dict key_len -> 1 0 2) "DICTUGETNEXTEQ" "NULLSWAPIFNOT2"; +(int, slice, int) udict_get_prev?(cell dict, int key_len, int pivot) pure asm(pivot dict key_len -> 1 0 2) "DICTUGETPREV" "NULLSWAPIFNOT2"; +(int, slice, int) udict_get_preveq?(cell dict, int key_len, int pivot) pure asm(pivot dict key_len -> 1 0 2) "DICTUGETPREVEQ" "NULLSWAPIFNOT2"; +(int, slice, int) idict_get_next?(cell dict, int key_len, int pivot) pure asm(pivot dict key_len -> 1 0 2) "DICTIGETNEXT" "NULLSWAPIFNOT2"; +(int, slice, int) idict_get_nexteq?(cell dict, int key_len, int pivot) pure asm(pivot dict key_len -> 1 0 2) "DICTIGETNEXTEQ" "NULLSWAPIFNOT2"; +(int, slice, int) idict_get_prev?(cell dict, int key_len, int pivot) pure asm(pivot dict key_len -> 1 0 2) "DICTIGETPREV" "NULLSWAPIFNOT2"; +(int, slice, int) idict_get_preveq?(cell dict, int key_len, int pivot) pure asm(pivot dict key_len -> 1 0 2) "DICTIGETPREVEQ" "NULLSWAPIFNOT2"; + +/// Creates an empty dictionary, which is actually a null value. Equivalent to PUSHNULL +cell new_dict() pure asm "NEWDICT"; +/// Checks whether a dictionary is empty. Equivalent to cell_null?. +int dict_empty?(cell c) pure asm "DICTEMPTY"; + + +/* Prefix dictionary primitives */ +(slice, slice, slice, int) pfxdict_get?(cell dict, int key_len, slice key) pure asm(key dict key_len) "PFXDICTGETQ" "NULLSWAPIFNOT2"; +(cell, int) pfxdict_set?(cell dict, int key_len, slice key, slice value) pure asm(value key dict key_len) "PFXDICTSET"; +(cell, int) pfxdict_delete?(cell dict, int key_len, slice key) pure asm(key dict key_len) "PFXDICTDEL"; + +/// Returns the value of the global configuration parameter with integer index `i` as a `cell` or `null` value. +cell config_param(int x) pure asm "CONFIGOPTPARAM"; +/// Checks whether c is a null. Note, that Tolk also has polymorphic null? built-in. +int cell_null?(cell c) pure asm "ISNULL"; + +/// Creates an output action which would reserve exactly amount nanotoncoins (if mode = 0), at most amount nanotoncoins (if mode = 2), or all but amount nanotoncoins (if mode = 1 or mode = 3), from the remaining balance of the account. It is roughly equivalent to creating an outbound message carrying amount nanotoncoins (or b − amount nanotoncoins, where b is the remaining balance) to oneself, so that the subsequent output actions would not be able to spend more money than the remainder. Bit +2 in mode means that the external action does not fail if the specified amount cannot be reserved; instead, all remaining balance is reserved. Bit +8 in mode means `amount <- -amount` before performing any further actions. Bit +4 in mode means that amount is increased by the original balance of the current account (before the compute phase), including all extra currencies, before performing any other checks and actions. Currently, amount must be a non-negative integer, and mode must be in the range 0..15. +() raw_reserve(int amount, int mode) asm "RAWRESERVE"; +/// Similar to raw_reserve, but also accepts a dictionary extra_amount (represented by a cell or null) with extra currencies. In this way currencies other than TonCoin can be reserved. +() raw_reserve_extra(int amount, cell extra_amount, int mode) asm "RAWRESERVEX"; +/// Sends a raw message contained in msg, which should contain a correctly serialized object Message X, with the only exception that the source address is allowed to have dummy value addr_none (to be automatically replaced with the current smart contract address), and ihr_fee, fwd_fee, created_lt and created_at fields can have arbitrary values (to be rewritten with correct values during the action phase of the current transaction). Integer parameter mode contains the flags. Currently mode = 0 is used for ordinary messages; mode = 128 is used for messages that are to carry all the remaining balance of the current smart contract (instead of the value originally indicated in the message); mode = 64 is used for messages that carry all the remaining value of the inbound message in addition to the value initially indicated in the new message (if bit 0 is not set, the gas fees are deducted from this amount); mode' = mode + 1 means that the sender wants to pay transfer fees separately; mode' = mode + 2 means that any errors arising while processing this message during the action phase should be ignored. Finally, mode' = mode + 32 means that the current account must be destroyed if its resulting balance is zero. This flag is usually employed together with +128. +() send_raw_message(cell msg, int mode) asm "SENDRAWMSG"; +/// Creates an output action that would change this smart contract code to that given by cell new_code. Notice that this change will take effect only after the successful termination of the current run of the smart contract +() set_code(cell new_code) asm "SETCODE"; + +/// Generates a new pseudo-random unsigned 256-bit integer x. The algorithm is as follows: if r is the old value of the random seed, considered as a 32-byte array (by constructing the big-endian representation of an unsigned 256-bit integer), then its sha512(r) is computed; the first 32 bytes of this hash are stored as the new value r' of the random seed, and the remaining 32 bytes are returned as the next random value x. +int random() asm "RANDU256"; +/// Generates a new pseudo-random integer z in the range 0..range−1 (or range..−1, if range < 0). More precisely, an unsigned random value x is generated as in random; then z := x * range / 2^256 is computed. +int rand(int range) asm "RAND"; +/// Returns the current random seed as an unsigned 256-bit Integer. +int get_seed() pure asm "RANDSEED"; +/// Sets the random seed to unsigned 256-bit seed. +() set_seed(int) asm "SETRAND"; +/// Mixes unsigned 256-bit integer x into the random seed r by setting the random seed to sha256 of the concatenation of two 32-byte strings: the first with the big-endian representation of the old seed r, and the second with the big-endian representation of x. +() randomize(int x) asm "ADDRAND"; +/// Equivalent to randomize(cur_lt());. +() randomize_lt() asm "LTIME" "ADDRAND"; + +/// Checks whether the data parts of two slices coinside +int equal_slice_bits (slice a, slice b) pure asm "SDEQ"; + +/// Concatenates two builders +builder store_builder(builder to, builder from) pure asm "STBR"; diff --git a/lite-client/lite-client.cpp b/lite-client/lite-client.cpp index 1a4201a7c..77c9a8c8b 100644 --- a/lite-client/lite-client.cpp +++ b/lite-client/lite-client.cpp @@ -926,7 +926,7 @@ bool TestNode::show_help(std::string command) { "saveaccount[code|data] []\tSaves into specified file the most recent state " "(StateInit) or just the code or data of specified account; is in " "[:] format\n" - "runmethod[full] [] ...\tRuns GET method of account " + "runmethod[full] [] ...\tRuns GET method of account " " " "with specified parameters\n" "dnsresolve [] []\tResolves a domain starting from root dns smart contract\n" diff --git a/tolk/CMakeLists.txt b/tolk/CMakeLists.txt index 54aaf8d21..8c890859d 100644 --- a/tolk/CMakeLists.txt +++ b/tolk/CMakeLists.txt @@ -24,6 +24,10 @@ target_link_libraries(tolk PUBLIC git ton_crypto) # todo replace with ton_crypt if (WINGETOPT_FOUND) target_link_libraries_system(tolk wingetopt) endif () +if (${TOLK_DEBUG}) # -DTOLK_DEBUG=1 in CMake options => #define TOLK_DEBUG (for development purposes) + message(STATUS "TOLK_DEBUG is ON") + target_compile_definitions(tolk PRIVATE TOLK_DEBUG=1) +endif() if (USE_EMSCRIPTEN) add_executable(tolkfiftlib tolk-wasm.cpp ${TOLK_SOURCE}) diff --git a/tolk/abscode.cpp b/tolk/abscode.cpp index 7dd64bd07..5833c004a 100644 --- a/tolk/abscode.cpp +++ b/tolk/abscode.cpp @@ -221,15 +221,6 @@ void VarDescrList::show(std::ostream& os) const { os << " ]\n"; } -void Op::flags_set_clear(int set, int clear) { - flags = (flags | set) & ~clear; - for (auto& op : block0) { - op.flags_set_clear(set, clear); - } - for (auto& op : block1) { - op.flags_set_clear(set, clear); - } -} void Op::split_vars(const std::vector& vars) { split_var_list(left, vars); split_var_list(right, vars); @@ -294,7 +285,7 @@ void Op::show(std::ostream& os, const std::vector& vars, std::string pfx if (noreturn()) { dis += " "; } - if (!is_pure()) { + if (impure()) { dis += " "; } switch (cl) { @@ -467,12 +458,6 @@ void Op::show_block(std::ostream& os, const Op* block, const std::vector os << pfx << "}"; } -void CodeBlob::flags_set_clear(int set, int clear) { - for (auto& op : ops) { - op.flags_set_clear(set, clear); - } -} - std::ostream& operator<<(std::ostream& os, const CodeBlob& code) { code.print(os); return os; diff --git a/tolk/analyzer.cpp b/tolk/analyzer.cpp index ea41a103c..ab55a2b64 100644 --- a/tolk/analyzer.cpp +++ b/tolk/analyzer.cpp @@ -360,10 +360,10 @@ bool Op::compute_used_vars(const CodeBlob& code, bool edit) { case _Tuple: case _UnTuple: { // left = EXEC right; - if (!next_var_info.count_used(left) && is_pure()) { + if (!next_var_info.count_used(left) && !impure()) { // all variables in `left` are not needed if (edit) { - disable(); + set_disabled(); } return std_compute_used_vars(true); } @@ -372,7 +372,7 @@ bool Op::compute_used_vars(const CodeBlob& code, bool edit) { case _SetGlob: { // GLOB = right if (right.empty() && edit) { - disable(); + set_disabled(); } return std_compute_used_vars(right.empty()); } @@ -399,7 +399,7 @@ bool Op::compute_used_vars(const CodeBlob& code, bool edit) { } if (!cnt && edit) { // all variables in `left` are not needed - disable(); + set_disabled(); } return set_var_info(std::move(new_var_info)); } @@ -860,15 +860,45 @@ VarDescrList Op::fwd_analyze(VarDescrList values) { } } -bool Op::set_noreturn(bool nr) { - if (nr) { +void Op::set_disabled(bool flag) { + if (flag) { + flags |= _Disabled; + } else { + flags &= ~_Disabled; + } +} + + +bool Op::set_noreturn(bool flag) { + if (flag) { flags |= _NoReturn; } else { flags &= ~_NoReturn; } - return nr; + return flag; +} + +void Op::set_impure(const CodeBlob &code) { + // todo calling this function with `code` is a bad design (flags are assigned after Op is constructed) + // later it's better to check this somewhere in code.emplace_back() + if (code.flags & CodeBlob::_ForbidImpure) { + throw ParseError(where, "An impure operation in a pure function"); + } + flags |= _Impure; } +void Op::set_impure(const CodeBlob &code, bool flag) { + if (flag) { + if (code.flags & CodeBlob::_ForbidImpure) { + throw ParseError(where, "An impure operation in a pure function"); + } + flags |= _Impure; + } else { + flags &= ~_Impure; + } +} + + bool Op::mark_noreturn() { switch (cl) { case _Nop: @@ -888,13 +918,14 @@ bool Op::mark_noreturn() { case _Call: return set_noreturn(next->mark_noreturn()); case _Return: - return set_noreturn(true); + return set_noreturn(); case _If: case _TryCatch: + // note, that & | (not && ||) here and below is mandatory to invoke both left and right calls return set_noreturn((static_cast(block0->mark_noreturn()) & static_cast(block1 && block1->mark_noreturn())) | static_cast(next->mark_noreturn())); case _Again: block0->mark_noreturn(); - return set_noreturn(true); + return set_noreturn(); case _Until: return set_noreturn(static_cast(block0->mark_noreturn()) | static_cast(next->mark_noreturn())); case _While: diff --git a/tolk/asmops.cpp b/tolk/asmops.cpp index cbe268f26..8db75091b 100644 --- a/tolk/asmops.cpp +++ b/tolk/asmops.cpp @@ -317,6 +317,9 @@ void AsmOpList::show_var_ext(std::ostream& os, std::pair os << '_' << i; } else { var_names_->at(i).show(os, 2); + // if (!var_names_->at(i).v_type->is_int()) { + // os << '<'; var_names_->at(i).v_type->print(os); os << '>'; + // } } if ((unsigned)j < constants_.size() && constants_[j].not_null()) { os << '=' << constants_[j]; diff --git a/tolk/builtins.cpp b/tolk/builtins.cpp index 16ebd2596..6589b9fcd 100644 --- a/tolk/builtins.cpp +++ b/tolk/builtins.cpp @@ -26,10 +26,10 @@ using namespace std::literals::string_literals; */ int glob_func_cnt, undef_func_cnt, glob_var_cnt, const_cnt; -std::vector glob_func, glob_vars; +std::vector glob_func, glob_vars, glob_get_methods; std::set prohibited_var_names; -SymDef* predefine_builtin_func(std::string name, TypeExpr* func_type) { +SymDef* define_builtin_func_impl(const std::string& name, SymValAsmFunc* func_val) { if (name.back() == '_') { prohibited_var_names.insert(name); } @@ -42,30 +42,40 @@ SymDef* predefine_builtin_func(std::string name, TypeExpr* func_type) { std::cerr << "fatal: global function `" << name << "` already defined" << std::endl; std::exit(1); } + func_val->flags |= SymValFunc::flagBuiltinFunction; + def->value = func_val; +#ifdef TOLK_DEBUG + dynamic_cast(def->value)->name = name; +#endif return def; } -template -SymDef* define_builtin_func(std::string name, TypeExpr* func_type, const T& func, bool impure = false) { - SymDef* def = predefine_builtin_func(name, func_type); - def->value = new SymValAsmFunc{func_type, func, impure}; - return def; +SymDef* define_builtin_func(const std::string& name, TypeExpr* func_type, const simple_compile_func_t& func, bool impure = false) { + return define_builtin_func_impl(name, new SymValAsmFunc{func_type, func, !impure}); +} + +SymDef* define_builtin_func(const std::string& name, TypeExpr* func_type, const compile_func_t& func, bool impure = false) { + return define_builtin_func_impl(name, new SymValAsmFunc{func_type, func, !impure}); +} + +SymDef* define_builtin_func(const std::string& name, TypeExpr* func_type, const AsmOp& macro, bool impure = false) { + return define_builtin_func_impl(name, new SymValAsmFunc{func_type, make_simple_compile(macro), !impure}); } -template -SymDef* define_builtin_func(std::string name, TypeExpr* func_type, const T& func, std::initializer_list arg_order, +SymDef* define_builtin_func(const std::string& name, TypeExpr* func_type, const simple_compile_func_t& func, std::initializer_list arg_order, std::initializer_list ret_order = {}, bool impure = false) { - SymDef* def = predefine_builtin_func(name, func_type); - def->value = new SymValAsmFunc{func_type, func, arg_order, ret_order, impure}; - return def; + return define_builtin_func_impl(name, new SymValAsmFunc{func_type, func, arg_order, ret_order, !impure}); } -SymDef* define_builtin_func(std::string name, TypeExpr* func_type, const AsmOp& macro, +SymDef* define_builtin_func(const std::string& name, TypeExpr* func_type, const compile_func_t& func, std::initializer_list arg_order, + std::initializer_list ret_order = {}, bool impure = false) { + return define_builtin_func_impl(name, new SymValAsmFunc{func_type, func, arg_order, ret_order, !impure}); +} + +SymDef* define_builtin_func(const std::string& name, TypeExpr* func_type, const AsmOp& macro, std::initializer_list arg_order, std::initializer_list ret_order = {}, bool impure = false) { - SymDef* def = predefine_builtin_func(name, func_type); - def->value = new SymValAsmFunc{func_type, make_simple_compile(macro), arg_order, ret_order, impure}; - return def; + return define_builtin_func_impl(name, new SymValAsmFunc{func_type, make_simple_compile(macro), arg_order, ret_order, !impure}); } SymDef* force_autoapply(SymDef* def) { @@ -262,7 +272,7 @@ int emulate_lshift(int a, int b) { } int t = ((b & VarDescr::_NonZero) ? VarDescr::_Even : 0); t |= b & VarDescr::_Finite; - return emulate_mul(a, VarDescr::_Int | VarDescr::_Pos | VarDescr::_NonZero | VarDescr::_Even | t); + return emulate_mul(a, VarDescr::_Int | VarDescr::_Pos | VarDescr::_NonZero | t); } int emulate_div(int a, int b) { @@ -308,7 +318,7 @@ int emulate_rshift(int a, int b) { } int t = ((b & VarDescr::_NonZero) ? VarDescr::_Even : 0); t |= b & VarDescr::_Finite; - return emulate_div(a, VarDescr::_Int | VarDescr::_Pos | VarDescr::_NonZero | VarDescr::_Even | t); + return emulate_div(a, VarDescr::_Int | VarDescr::_Pos | VarDescr::_NonZero | t); } int emulate_mod(int a, int b, int round_mode = -1) { @@ -1128,9 +1138,9 @@ void define_builtins() { auto Int3 = TypeExpr::new_tensor({Int, Int, Int}); auto TupleInt = TypeExpr::new_tensor({Tuple, Int}); auto SliceInt = TypeExpr::new_tensor({Slice, Int}); - auto X = TypeExpr::new_var(); - auto Y = TypeExpr::new_var(); - auto Z = TypeExpr::new_var(); + auto X = TypeExpr::new_var(0); + auto Y = TypeExpr::new_var(1); + auto Z = TypeExpr::new_var(2); auto XY = TypeExpr::new_tensor({X, Y}); auto arith_bin_op = TypeExpr::new_map(Int2, Int); auto arith_un_op = TypeExpr::new_map(Int, Int); diff --git a/tolk/codegen.cpp b/tolk/codegen.cpp index 504d0b210..64d8fdf05 100644 --- a/tolk/codegen.cpp +++ b/tolk/codegen.cpp @@ -437,6 +437,7 @@ bool Op::generate_code_step(Stack& stack) { if (disabled()) { return true; } + // fun_ref can be nullptr for Op::_CallInd (invoke a variable, not a function) SymValFunc* func = (fun_ref ? dynamic_cast(fun_ref->value) : nullptr); auto arg_order = (func ? func->get_arg_order() : nullptr); auto ret_order = (func ? func->get_ret_order() : nullptr); @@ -486,27 +487,24 @@ bool Op::generate_code_step(Stack& stack) { }; if (cl == _CallInd) { exec_callxargs((int)right.size() - 1, (int)left.size()); + } else if (auto asm_fv = dynamic_cast(fun_ref->value)) { + std::vector res; + res.reserve(left.size()); + for (var_idx_t i : left) { + res.emplace_back(i); + } + asm_fv->compile(stack.o, res, args, where); // compile res := f (args) } else { - auto func = dynamic_cast(fun_ref->value); - if (func) { - std::vector res; - res.reserve(left.size()); - for (var_idx_t i : left) { - res.emplace_back(i); - } - func->compile(stack.o, res, args, where); // compile res := f (args) + auto fv = dynamic_cast(fun_ref->value); + // todo can be fv == nullptr? + std::string name = symbols.get_name(fun_ref->sym_idx); + if (fv && (fv->is_inline() || fv->is_inline_ref())) { + stack.o << AsmOp::Custom(name + " INLINECALLDICT", (int)right.size(), (int)left.size()); + } else if (fv && fv->code && fv->code->require_callxargs) { + stack.o << AsmOp::Custom(name + (" PREPAREDICT"), 0, 2); + exec_callxargs((int)right.size() + 1, (int)left.size()); } else { - auto fv = dynamic_cast(fun_ref->value); - std::string name = symbols.get_name(fun_ref->sym_idx); - bool is_inline = (fv && (fv->flags & 3)); - if (is_inline) { - stack.o << AsmOp::Custom(name + " INLINECALLDICT", (int)right.size(), (int)left.size()); - } else if (fv && fv->code && fv->code->require_callxargs) { - stack.o << AsmOp::Custom(name + (" PREPAREDICT"), 0, 2); - exec_callxargs((int)right.size() + 1, (int)left.size()); - } else { - stack.o << AsmOp::Custom(name + " CALLDICT", (int)right.size(), (int)left.size()); - } + stack.o << AsmOp::Custom(name + " CALLDICT", (int)right.size(), (int)left.size()); } } stack.s.resize(k); diff --git a/tolk/gen-abscode.cpp b/tolk/gen-abscode.cpp index bfce6f0ce..a537d99ce 100644 --- a/tolk/gen-abscode.cpp +++ b/tolk/gen-abscode.cpp @@ -35,7 +35,7 @@ Expr* Expr::copy() const { return res; } -Expr::Expr(int c, sym_idx_t name_idx, std::initializer_list _arglist) : cls(c), args(std::move(_arglist)) { +Expr::Expr(ExprCls c, sym_idx_t name_idx, std::initializer_list _arglist) : cls(c), args(std::move(_arglist)) { sym = lookup_symbol(name_idx); if (!sym) { } @@ -227,11 +227,11 @@ var_idx_t Expr::new_tmp(CodeBlob& code) const { void add_set_globs(CodeBlob& code, std::vector>& globs, const SrcLocation& here) { for (const auto& p : globs) { auto& op = code.emplace_back(here, Op::_SetGlob, std::vector{}, std::vector{ p.second }, p.first); - op.flags |= Op::_Impure; + op.set_impure(code); } } -std::vector Expr::pre_compile_let(CodeBlob& code, Expr* lhs, Expr* rhs, const SrcLocation& here) { +std::vector pre_compile_let(CodeBlob& code, Expr* lhs, Expr* rhs, const SrcLocation& here) { while (lhs->is_type_apply()) { lhs = lhs->args.at(0); } @@ -247,7 +247,7 @@ std::vector Expr::pre_compile_let(CodeBlob& code, Expr* lhs, Expr* rh auto unpacked_type = rhs->e_type->args.at(0); std::vector tmp{code.create_tmp_var(unpacked_type, &rhs->here)}; code.emplace_back(lhs->here, Op::_UnTuple, tmp, std::move(right)); - auto tvar = new Expr{_Var}; + auto tvar = new Expr{Expr::_Var}; tvar->set_val(tmp[0]); tvar->set_location(rhs->here); tvar->e_type = unpacked_type; @@ -265,43 +265,35 @@ std::vector Expr::pre_compile_let(CodeBlob& code, Expr* lhs, Expr* rh return right; } -std::vector pre_compile_tensor(const std::vector args, CodeBlob &code, - std::vector> *lval_globs, - std::vector arg_order) { - if (arg_order.empty()) { - arg_order.resize(args.size()); - std::iota(arg_order.begin(), arg_order.end(), 0); +std::vector pre_compile_tensor(const std::vector& args, CodeBlob &code, + std::vector> *lval_globs) { + const size_t n = args.size(); + if (n == 0) { // just `()` + return {}; } - tolk_assert(args.size() == arg_order.size()); - std::vector> res_lists(args.size()); + if (n == 1) { // just `(x)`: even if x is modified (e.g. `f(x=x+2)`), there are no next arguments + return args[0]->pre_compile(code, lval_globs); + } + std::vector> res_lists(n); struct ModifiedVar { size_t i, j; - Op* op; + std::unique_ptr* cur_ops; // `LET tmp = v_ij` will be inserted before this }; - auto modified_vars = std::make_shared>(); - for (size_t i : arg_order) { + std::vector modified_vars; + for (size_t i = 0; i < n; ++i) { res_lists[i] = args[i]->pre_compile(code, lval_globs); for (size_t j = 0; j < res_lists[i].size(); ++j) { TmpVar& var = code.vars.at(res_lists[i][j]); - if (code.flags & CodeBlob::_AllowPostModification) { - if (!lval_globs && (var.cls & TmpVar::_Named)) { - Op *op = &code.emplace_back(nullptr, Op::_Let, std::vector(), std::vector()); - op->flags |= Op::_Disabled; - var.on_modification.push_back([modified_vars, i, j, op, done = false](const SrcLocation &here) mutable { - if (!done) { - done = true; - modified_vars->push_back({i, j, op}); - } - }); - } else { - var.on_modification.push_back([](const SrcLocation &) { - }); - } + if (!lval_globs && (var.cls & TmpVar::_Named)) { + var.on_modification.push_back([&modified_vars, i, j, cur_ops = code.cur_ops, done = false](const SrcLocation &here) mutable { + if (!done) { + done = true; + modified_vars.push_back({i, j, cur_ops}); + } + }); } else { - var.on_modification.push_back([name = var.to_string()](const SrcLocation &here) { - throw ParseError{here, PSTRING() << "Modifying local variable " << name - << " after using it in the same expression"}; + var.on_modification.push_back([](const SrcLocation &) { }); } } @@ -312,13 +304,16 @@ std::vector pre_compile_tensor(const std::vector args, CodeBl code.vars.at(v).on_modification.pop_back(); } } - for (const ModifiedVar &m : *modified_vars) { - var_idx_t& v = res_lists[m.i][m.j]; - var_idx_t v2 = code.create_tmp_var(code.vars[v].v_type, code.vars[v].where.get()); - m.op->left = {v2}; - m.op->right = {v}; - m.op->flags &= ~Op::_Disabled; - v = v2; + for (size_t idx = modified_vars.size(); idx--; ) { + const ModifiedVar &m = modified_vars[idx]; + var_idx_t orig_v = res_lists[m.i][m.j]; + var_idx_t tmp_v = code.create_tmp_var(code.vars[orig_v].v_type, code.vars[orig_v].where.get()); + std::unique_ptr op = std::make_unique(*code.vars[orig_v].where, Op::_Let); + op->left = {tmp_v}; + op->right = {orig_v}; + op->next = std::move((*m.cur_ops)); + *m.cur_ops = std::move(op); + res_lists[m.i][m.j] = tmp_v; } std::vector res; for (const auto& list : res_lists) { @@ -334,22 +329,33 @@ std::vector Expr::pre_compile(CodeBlob& code, std::vector(sym->value); std::vector res; - if (func && func->arg_order.size() == args.size() && !(code.flags & CodeBlob::_ComputeAsmLtr)) { - //std::cerr << "!!! reordering " << args.size() << " arguments of " << sym->name() << std::endl; - res = pre_compile_tensor(args, code, lval_globs, func->arg_order); + SymDef* applied_sym = sym; + auto func = dynamic_cast(applied_sym->value); + // replace `beginCell()` with `begin_cell()` + if (func && func->is_just_wrapper_for_another_f()) { + // body is { Op::_Import; Op::_Call; Op::_Return; } + const std::unique_ptr& op_call = dynamic_cast(func)->code->ops->next; + applied_sym = op_call->fun_ref; + // a function may call anotherF with shuffled arguments: f(x,y) { return anotherF(y,x) } + // then op_call looks like (_1,_0), so use op_call->right for correct positions in Op::_Call below + // it's correct, since every argument has width 1 + std::vector res_inner = pre_compile_tensor(args, code, lval_globs); + res.reserve(res_inner.size()); + for (var_idx_t right_idx : op_call->right) { + res.emplace_back(res_inner[right_idx]); + } } else { - res = pre_compile_tensor(args, code, lval_globs, {}); + res = pre_compile_tensor(args, code, lval_globs); } auto rvect = new_tmp_vect(code); - auto& op = code.emplace_back(here, Op::_Call, rvect, std::move(res), sym); + auto& op = code.emplace_back(here, Op::_Call, rvect, res, applied_sym); if (flags & _IsImpure) { - op.flags |= Op::_Impure; + op.set_impure(code); } return rvect; } @@ -362,12 +368,12 @@ std::vector Expr::pre_compile(CodeBlob& code, std::vectorcls == _Glob) { + if (args[0]->cls == _GlobFunc) { auto res = args[1]->pre_compile(code); auto rvect = new_tmp_vect(code); auto& op = code.emplace_back(here, Op::_Call, rvect, std::move(res), args[0]->sym); if (args[0]->flags & _IsImpure) { - op.flags |= Op::_Impure; + op.set_impure(code); } return rvect; } else { @@ -386,8 +392,14 @@ std::vector Expr::pre_compile(CodeBlob& code, std::vector(sym->value)) { + fun_ref->flags |= SymValFunc::flagUsedAsNonCall; + if (!fun_ref->arg_order.empty() || !fun_ref->ret_order.empty()) { + throw ParseError(here, "Saving " + sym->name() + " into a variable will most likely lead to invalid usage, since it changes the order of variables on the stack"); + } + } auto rvect = new_tmp_vect(code); if (lval_globs) { lval_globs->push_back({ sym, rvect[0] }); diff --git a/tolk/keywords.cpp b/tolk/keywords.cpp index db193debe..50d55c41d 100644 --- a/tolk/keywords.cpp +++ b/tolk/keywords.cpp @@ -109,10 +109,13 @@ void define_keywords() { .add_keyword("global", Keyword::_Global) .add_keyword("asm", Keyword::_Asm) .add_keyword("impure", Keyword::_Impure) + .add_keyword("pure", Keyword::_Pure) .add_keyword("inline", Keyword::_Inline) .add_keyword("inline_ref", Keyword::_InlineRef) + .add_keyword("builtin", Keyword::_Builtin) .add_keyword("auto_apply", Keyword::_AutoApply) .add_keyword("method_id", Keyword::_MethodId) + .add_keyword("get", Keyword::_Get) .add_keyword("operator", Keyword::_Operator) .add_keyword("infix", Keyword::_Infix) .add_keyword("infixl", Keyword::_Infixl) diff --git a/tolk/lexer.cpp b/tolk/lexer.cpp index f0838f5a6..e54c70e41 100644 --- a/tolk/lexer.cpp +++ b/tolk/lexer.cpp @@ -122,8 +122,7 @@ int Lexem::set(std::string _str, const SrcLocation& _loc, int _tp, int _val) { return classify(); } -Lexer::Lexer(SourceReader& _src, bool init, std::string active_chars, std::string eol_cmts, std::string open_cmts, - std::string close_cmts, std::string quote_chars, std::string multiline_quote) +Lexer::Lexer(SourceReader& _src, std::string active_chars, std::string quote_chars, std::string multiline_quote) : src(_src), eof(false), lexem("", src.here(), Lexem::Undefined), peek_lexem("", {}, Lexem::Undefined), multiline_quote(std::move(multiline_quote)) { std::memset(char_class, 0, sizeof(char_class)); @@ -137,17 +136,27 @@ Lexer::Lexer(SourceReader& _src, bool init, std::string active_chars, std::strin char_class[(unsigned)c] |= activity; } } - set_spec(eol_cmt, eol_cmts); - set_spec(cmt_op, open_cmts); - set_spec(cmt_cl, close_cmts); for (int c : quote_chars) { if (c > ' ' && c <= 0x7f) { char_class[(unsigned)c] |= cc::quote_char; } } - if (init) { - next(); - } +} + +void Lexer::set_comment_tokens(const std::string &eol_cmts, const std::string &open_cmts, const std::string &close_cmts) { + set_spec(eol_cmt, eol_cmts); + set_spec(cmt_op, open_cmts); + set_spec(cmt_cl, close_cmts); +} + +void Lexer::set_comment2_tokens(const std::string &eol_cmts2, const std::string &open_cmts2, const std::string &close_cmts2) { + set_spec(eol_cmt2, eol_cmts2); + set_spec(cmt_op2, open_cmts2); + set_spec(cmt_cl2, close_cmts2); +} + +void Lexer::start_parsing() { + next(); } void Lexer::set_spec(std::array& arr, std::string setup) { @@ -202,31 +211,41 @@ const Lexem& Lexer::next() { return lexem.clear(src.here(), Lexem::Eof); } long long comm = 1; + // the code below is very complicated, because it tried to support one-symbol start/end and nesting + // in Tolk, we decided to stop supporting nesting (it was never used in practice and almost impossible for js highlighters) + // later on I'll simplify this code (more precisely, rewrite lexer from scratch) while (!src.seek_eof()) { int cc = src.cur_char(), nc = src.next_char(); - if (cc == eol_cmt[0] || (cc == eol_cmt[1] && nc == eol_cmt[2])) { - src.load_line(); - } else if (cc == cmt_op[1] && nc == cmt_op[2]) { + // note, that in practice, [0]-th element is -256, condition for [0]-th is always false + // todo rewrite this all in the future + if (cc == eol_cmt[0] || (cc == eol_cmt[1] && nc == eol_cmt[2]) || cc == eol_cmt2[0] || (cc == eol_cmt2[1] && nc == eol_cmt2[2])) { + if (comm == 1) { // just "//" — skip a whole line + src.load_line(); + } else { // if "//" is nested into "/*", continue reading, since "*/" may be met + src.advance(1); + } + } else if (cc == cmt_op[1] && nc == cmt_op[2] || cc == cmt_op2[1] && nc == cmt_op2[2]) { src.advance(2); comm = comm * 2 + 1; - } else if (cc == cmt_op[0]) { + } else if (cc == cmt_op[0] || cc == cmt_op2[0]) { // always false src.advance(1); comm *= 2; } else if (comm == 1) { - break; - } else if (cc == cmt_cl[1] && nc == cmt_cl[2]) { - if (!(comm & 1)) { + break; // means that we are not inside a comment + } else if (cc == cmt_cl[1] && nc == cmt_cl[2] || cc == cmt_cl2[1] && nc == cmt_cl2[2]) { + if (!(comm & 1)) { // always false src.error(std::string{"a `"} + (char)cmt_op[0] + "` comment closed by `" + (char)cmt_cl[1] + (char)cmt_cl[2] + "`"); } - comm >>= 1; + // note that {- may be closed with */, but assume it's ok (we'll get rid of {- in the future) + comm = 1; src.advance(2); - } else if (cc == cmt_cl[0]) { + } else if (cc == cmt_cl[0] || cc == cmt_cl2[0]) { // always false if (!(comm & 1)) { src.error(std::string{"a `"} + (char)cmt_op[1] + (char)cmt_op[2] + "` comment closed by `" + (char)cmt_cl[0] + "`"); } - comm >>= 1; + comm = 1; src.advance(1); } else { src.advance(1); @@ -238,11 +257,7 @@ const Lexem& Lexer::next() { if (src.seek_eof()) { eof = true; if (comm > 1) { - if (comm & 1) { - src.error(std::string{"`"} + (char)cmt_op[1] + (char)cmt_op[2] + "` comment extends past end of file"); - } else { - src.error(std::string{"`"} + (char)cmt_op[0] + "` comment extends past end of file"); - } + src.error("comment extends past end of file"); } return lexem.clear(src.here(), Lexem::Eof); } diff --git a/tolk/lexer.h b/tolk/lexer.h index 79d869068..816f7a827 100644 --- a/tolk/lexer.h +++ b/tolk/lexer.h @@ -66,7 +66,8 @@ class Lexer { bool eof; Lexem lexem, peek_lexem; unsigned char char_class[128]; - std::array eol_cmt, cmt_op, cmt_cl; + std::array eol_cmt, cmt_op, cmt_cl; // for ;; {- -} + std::array eol_cmt2, cmt_op2, cmt_cl2; // for // /* */ std::string multiline_quote; enum cc { left_active = 2, right_active = 1, active = 3, allow_repeat = 4, quote_char = 8 }; @@ -74,9 +75,13 @@ class Lexer { bool eof_found() const { return eof; } - Lexer(SourceReader& _src, bool init = false, std::string active_chars = ";,() ~.", std::string eol_cmts = ";;", - std::string open_cmts = "{-", std::string close_cmts = "-}", std::string quote_chars = "\"", - std::string multiline_quote = "\"\"\""); + explicit Lexer(SourceReader& _src, std::string active_chars = ";,() ~.", + std::string quote_chars = "\"", std::string multiline_quote = "\"\"\""); + + void set_comment_tokens(const std::string &eol_cmts, const std::string &open_cmts, const std::string &close_cmts); + void set_comment2_tokens(const std::string &eol_cmts2, const std::string &open_cmts2, const std::string &close_cmts2); + void start_parsing(); + const Lexem& next(); const Lexem& cur() const { return lexem; diff --git a/tolk/parse-tolk.cpp b/tolk/parse-tolk.cpp index 7fffb15ab..3cff0bb51 100644 --- a/tolk/parse-tolk.cpp +++ b/tolk/parse-tolk.cpp @@ -48,6 +48,109 @@ inline bool is_special_ident(sym_idx_t idx) { return symbols.get_subclass(idx) != IdSc::undef; } +// given Expr::_Apply (a function call / a variable call), determine whether it's <, or >, or similar +// (an expression `1 < 2` is expressed as `_<_(1,2)`, see builtins.cpp) +static bool is_comparison_binary_op(const Expr* e_apply) { + const std::string& name = e_apply->sym->name(); + const size_t len = name.size(); + if (len < 3 || len > 5 || name[0] != '_' || name[len-1] != '_') { + return false; // not "_<_" and similar + } + + char c1 = name[1]; + char c2 = name[2]; + // < > <= != == >= <=> + return (len == 3 && (c1 == '<' || c1 == '>')) || + (len == 4 && (c1 == '<' || c1 == '>' || c1 == '!' || c1 == '=') && c2 == '=') || + (len == 5 && (c1 == '<' && c2 == '=' && name[3] == '>')); +} + +// same as above, but to detect bitwise operators: & | ^ +// (in Tolk, they are used as logical ones due to absence of a boolean type and && || operators) +static bool is_bitwise_binary_op(const Expr* e_apply) { + const std::string& name = e_apply->sym->name(); + const size_t len = name.size(); + if (len != 3 || name[0] != '_' || name[len-1] != '_') { + return false; + } + + char c1 = name[1]; + return c1 == '&' || c1 == '|' || c1 == '^'; +} + +// same as above, but to detect addition/subtraction +static bool is_add_or_sub_binary_op(const Expr* e_apply) { + const std::string& name = e_apply->sym->name(); + const size_t len = name.size(); + if (len != 3 || name[0] != '_' || name[len-1] != '_') { + return false; + } + + char c1 = name[1]; + return c1 == '+' || c1 == '-'; +} + +static inline std::string get_builtin_operator_name(sym_idx_t sym_builtin) { + std::string underscored = symbols.get_name(sym_builtin); + return underscored.substr(1, underscored.size() - 2); +} + +// fire an error for a case "flags & 0xFF != 0" (equivalent to "flags & 1", probably unexpected) +// it would better be a warning, but we decided to make it a strict error +[[gnu::cold]] static void fire_error_lower_precedence(const SrcLocation& loc, sym_idx_t op_lower, sym_idx_t op_higher) { + std::string name_lower = get_builtin_operator_name(op_lower); + std::string name_higher = get_builtin_operator_name(op_higher); + throw ParseError(loc, name_lower + " has lower precedence than " + name_higher + + ", probably this code won't work as you expected. " + "Use parenthesis: either (... " + name_lower + " ...) to evaluate it first, or (... " + name_higher + " ...) to suppress this error."); +} + +// fire an error for a case "arg1 & arg2 | arg3" +[[gnu::cold]] static void fire_error_mix_bitwise_and_or(const SrcLocation& loc, sym_idx_t op1, sym_idx_t op2) { + std::string name1 = get_builtin_operator_name(op1); + std::string name2 = get_builtin_operator_name(op2); + throw ParseError(loc, "mixing " + name1 + " with " + name2 + " without parenthesis" + ", probably this code won't work as you expected. " + "Use parenthesis to emphasize operator precedence."); +} + +// diagnose when bitwise operators are used in a probably wrong way due to tricky precedence +// example: "flags & 0xFF != 0" is equivalent to "flags & 1", most likely it's unexpected +// the only way to suppress this error for the programmer is to use parenthesis +static void diagnose_bitwise_precedence(const SrcLocation& loc, sym_idx_t bitwise_sym, const Expr* lhs, const Expr* rhs) { + // handle "0 != flags & 0xFF" (lhs = "0 != flags") + if (!lhs->is_inside_parenthesis() && + lhs->cls == Expr::_Apply && lhs->e_type->is_int() && // fast false if 100% not + is_comparison_binary_op(lhs)) { + fire_error_lower_precedence(loc, bitwise_sym, lhs->sym->sym_idx); + // there is a tiny bug: "flags & _!=_(0xFF,0)" will also suggest to wrap rhs into parenthesis + } + + // handle "flags & 0xFF != 0" (rhs = "0xFF != 0") + if (!rhs->is_inside_parenthesis() && + rhs->cls == Expr::_Apply && rhs->e_type->is_int() && + is_comparison_binary_op(rhs)) { + fire_error_lower_precedence(loc, bitwise_sym, rhs->sym->sym_idx); + } + + // handle "arg1 & arg2 | arg3" (lhs = "arg1 & arg2") + if (!lhs->is_inside_parenthesis() && + lhs->cls == Expr::_Apply && lhs->e_type->is_int() && + is_bitwise_binary_op(lhs) && + lhs->sym->sym_idx != bitwise_sym) { + fire_error_mix_bitwise_and_or(loc, lhs->sym->sym_idx, bitwise_sym); + } +} + +// diagnose "a << 8 + 1" (equivalent to "a << 9", probably unexpected) +static void diagnose_addition_in_bitshift(const SrcLocation& loc, sym_idx_t bitshift_sym, const Expr* rhs) { + if (!rhs->is_inside_parenthesis() && + rhs->cls == Expr::_Apply && rhs->e_type->is_int() && + is_add_or_sub_binary_op(rhs)) { + fire_error_lower_precedence(loc, bitshift_sym, rhs->sym->sym_idx); + } +} + /* * * PARSE SOURCE @@ -220,6 +323,9 @@ void parse_global_var_decl(Lexer& lex) { } } else { sym_def->value = new SymValGlobVar{glob_var_cnt++, var_type}; +#ifdef TOLK_DEBUG + dynamic_cast(sym_def->value)->name = lex.cur().str; +#endif glob_vars.push_back(sym_def); } lex.next(); @@ -253,15 +359,9 @@ void parse_const_decl(Lexer& lex) { } lex.next(); CodeBlob code; - if (pragma_allow_post_modification.enabled()) { - code.flags |= CodeBlob::_AllowPostModification; - } - if (pragma_compute_asm_ltr.enabled()) { - code.flags |= CodeBlob::_ComputeAsmLtr; - } // Handles processing and resolution of literals and consts auto x = parse_expr(lex, code, false); // also does lex.next() ! - if (x->flags != Expr::_IsRvalue) { + if (!x->is_rvalue()) { lex.cur().error("expression is not strictly Rvalue"); } if ((wanted_type == Expr::_Const) && (x->cls == Expr::_Apply)) @@ -274,7 +374,7 @@ void parse_const_decl(Lexer& lex) { new_value = new SymValConst{const_cnt++, x->intval}; } else if (x->cls == Expr::_SliceConst) { // Slice constant (string) new_value = new SymValConst{const_cnt++, x->strval}; - } else if (x->cls == Expr::_Apply) { + } else if (x->cls == Expr::_Apply) { // even "1 + 2" is Expr::_Apply (it applies `_+_`) code.emplace_back(loc, Op::_Import, std::vector()); auto tmp_vars = x->pre_compile(code); code.emplace_back(loc, Op::_Return, std::move(tmp_vars)); @@ -372,27 +472,22 @@ void parse_global_var_decls(Lexer& lex) { lex.expect(';'); } -SymValCodeFunc* make_new_glob_func(SymDef* func_sym, TypeExpr* func_type, bool impure = false) { - SymValCodeFunc* res = new SymValCodeFunc{glob_func_cnt, func_type, impure}; +SymValCodeFunc* make_new_glob_func(SymDef* func_sym, TypeExpr* func_type, bool marked_as_pure) { + SymValCodeFunc* res = new SymValCodeFunc{glob_func_cnt, func_type, marked_as_pure}; +#ifdef TOLK_DEBUG + res->name = func_sym->name(); +#endif func_sym->value = res; glob_func.push_back(func_sym); glob_func_cnt++; return res; } -bool check_global_func(const Lexem& cur, sym_idx_t func_name = 0) { - if (!func_name) { - func_name = cur.val; - } +bool check_global_func(const Lexem& cur, sym_idx_t func_name) { SymDef* def = lookup_symbol(func_name); if (!def) { - cur.loc.show_error(std::string{"undefined function `"} + symbols.get_name(func_name) + - "`, defining a global function of unknown type"); - def = define_global_symbol(func_name, 0, cur.loc); - tolk_assert(def && "cannot define global function"); - ++undef_func_cnt; - make_new_glob_func(def, TypeExpr::new_func()); // was: ... ::new_func() - return true; + cur.error("undefined symbol `" + symbols.get_name(func_name) + "`"); + return false; } SymVal* val = dynamic_cast(def->value); if (!val) { @@ -407,8 +502,8 @@ bool check_global_func(const Lexem& cur, sym_idx_t func_name = 0) { } Expr* make_func_apply(Expr* fun, Expr* x) { - Expr* res; - if (fun->cls == Expr::_Glob) { + Expr* res{nullptr}; + if (fun->cls == Expr::_GlobFunc) { if (x->cls == Expr::_Tensor) { res = new Expr{Expr::_Apply, fun->sym, x->args}; } else { @@ -445,6 +540,7 @@ Expr* parse_expr100(Lexer& lex, CodeBlob& code, bool nv) { } Expr* res = parse_expr(lex, code, nv); if (lex.tp() == ')') { + res->flags |= Expr::_IsInsideParenthesis; lex.expect(clbr); return res; } @@ -571,7 +667,7 @@ Expr* parse_expr100(Lexer& lex, CodeBlob& code, bool nv) { if (t == '_') { Expr* res = new Expr{Expr::_Hole, lex.cur().loc}; res->val = -1; - res->flags = (Expr::_IsLvalue | Expr::_IsHole | Expr::_IsNewVar); + res->flags = Expr::_IsLvalue; res->e_type = TypeExpr::new_hole(); lex.next(); return res; @@ -633,15 +729,16 @@ Expr* parse_expr100(Lexer& lex, CodeBlob& code, bool nv) { if (nv) { res->val = ~lex.cur().val; res->e_type = TypeExpr::new_hole(); - res->flags = Expr::_IsLvalue | Expr::_IsNewVar; + res->flags = Expr::_IsLvalue; // std::cerr << "defined new variable " << lex.cur().str << " : " << res->e_type << std::endl; } else { if (!sym) { - check_global_func(lex.cur()); + check_global_func(lex.cur(), lex.cur().val); sym = lookup_symbol(lex.cur().val); } res->sym = sym; SymVal* val = nullptr; + bool impure = false; if (sym) { val = dynamic_cast(sym->value); } @@ -649,8 +746,9 @@ Expr* parse_expr100(Lexer& lex, CodeBlob& code, bool nv) { lex.cur().error_at("undefined identifier `", "`"); } else if (val->type == SymVal::_Func) { res->e_type = val->get_type(); - res->cls = Expr::_Glob; + res->cls = Expr::_GlobFunc; auto_apply = val->auto_apply; + impure = !dynamic_cast(val)->is_marked_as_pure(); } else if (val->idx < 0) { lex.cur().error_at("accessing variable `", "` being defined"); } else { @@ -659,7 +757,7 @@ Expr* parse_expr100(Lexer& lex, CodeBlob& code, bool nv) { // std::cerr << "accessing variable " << lex.cur().str << " : " << res->e_type << std::endl; } // std::cerr << "accessing symbol " << lex.cur().str << " : " << res->e_type << (val->impure ? " (impure)" : " (pure)") << std::endl; - res->flags = Expr::_IsLvalue | Expr::_IsRvalue | (val->impure ? Expr::_IsImpure : 0); + res->flags = Expr::_IsLvalue | Expr::_IsRvalue | (impure ? Expr::_IsImpure : 0); } if (auto_apply) { int impure = res->flags & Expr::_IsImpure; @@ -750,7 +848,7 @@ Expr* parse_expr80(Lexer& lex, CodeBlob& code, bool nv) { res = new Expr{Expr::_Apply, name, {obj, x}}; } res->here = loc; - res->flags = Expr::_IsRvalue | (val->impure ? Expr::_IsImpure : 0); + res->flags = Expr::_IsRvalue | (val->is_marked_as_pure() ? 0 : Expr::_IsImpure); res->deduce_type(lex.cur()); if (modify) { auto tmp = res; @@ -784,11 +882,11 @@ Expr* parse_expr75(Lexer& lex, CodeBlob& code, bool nv) { } } -// parse E { (* | / | % | /% ) E } +// parse E { (* | / | % | /% | ^/ | ~/ | ^% | ~% ) E } Expr* parse_expr30(Lexer& lex, CodeBlob& code, bool nv) { Expr* res = parse_expr75(lex, code, nv); while (lex.tp() == '*' || lex.tp() == '/' || lex.tp() == '%' || lex.tp() == _DivMod || lex.tp() == _DivC || - lex.tp() == _DivR || lex.tp() == _ModC || lex.tp() == _ModR || lex.tp() == '&') { + lex.tp() == _DivR || lex.tp() == _ModC || lex.tp() == _ModR) { res->chk_rvalue(lex.cur()); int t = lex.tp(); sym_idx_t name = symbols.lookup_add(std::string{"_"} + lex.cur().str + "_"); @@ -806,7 +904,7 @@ Expr* parse_expr30(Lexer& lex, CodeBlob& code, bool nv) { return res; } -// parse [-] E { (+ | - | `|` | ^) E } +// parse [-] E { (+ | -) E } Expr* parse_expr20(Lexer& lex, CodeBlob& code, bool nv) { Expr* res; int t = lex.tp(); @@ -825,7 +923,7 @@ Expr* parse_expr20(Lexer& lex, CodeBlob& code, bool nv) { } else { res = parse_expr30(lex, code, nv); } - while (lex.tp() == '-' || lex.tp() == '+' || lex.tp() == '|' || lex.tp() == '^') { + while (lex.tp() == '-' || lex.tp() == '+') { res->chk_rvalue(lex.cur()); t = lex.tp(); sym_idx_t name = symbols.lookup_add(std::string{"_"} + lex.cur().str + "_"); @@ -843,7 +941,7 @@ Expr* parse_expr20(Lexer& lex, CodeBlob& code, bool nv) { return res; } -// parse E { ( << | >> | >>~ | >>^ ) E } +// parse E { ( << | >> | ~>> | ^>> ) E } Expr* parse_expr17(Lexer& lex, CodeBlob& code, bool nv) { Expr* res = parse_expr20(lex, code, nv); while (lex.tp() == _Lshift || lex.tp() == _Rshift || lex.tp() == _RshiftC || lex.tp() == _RshiftR) { @@ -855,6 +953,7 @@ Expr* parse_expr17(Lexer& lex, CodeBlob& code, bool nv) { lex.next(); auto x = parse_expr20(lex, code, false); x->chk_rvalue(lex.cur()); + diagnose_addition_in_bitshift(loc, name, x); res = new Expr{Expr::_Apply, name, {res, x}}; res->here = loc; res->set_val(t); @@ -886,9 +985,33 @@ Expr* parse_expr15(Lexer& lex, CodeBlob& code, bool nv) { return res; } +// parse E { ( & | `|` | ^ ) E } +Expr* parse_expr14(Lexer& lex, CodeBlob& code, bool nv) { + Expr* res = parse_expr15(lex, code, nv); + while (lex.tp() == '&' || lex.tp() == '|' || lex.tp() == '^') { + res->chk_rvalue(lex.cur()); + int t = lex.tp(); + sym_idx_t name = symbols.lookup_add(std::string{"_"} + lex.cur().str + "_"); + check_global_func(lex.cur(), name); + SrcLocation loc{lex.cur().loc}; + lex.next(); + auto x = parse_expr15(lex, code, false); + x->chk_rvalue(lex.cur()); + // diagnose tricky bitwise precedence, like "flags & 0xFF != 0" (& has lower precedence) + diagnose_bitwise_precedence(loc, name, res, x); + + res = new Expr{Expr::_Apply, name, {res, x}}; + res->here = loc; + res->set_val(t); + res->flags = Expr::_IsRvalue; + res->deduce_type(lex.cur()); + } + return res; +} + // parse E [ ? E : E ] Expr* parse_expr13(Lexer& lex, CodeBlob& code, bool nv) { - Expr* res = parse_expr15(lex, code, nv); + Expr* res = parse_expr14(lex, code, nv); if (lex.tp() == '?') { res->chk_rvalue(lex.cur()); SrcLocation loc{lex.cur().loc}; @@ -1207,14 +1330,11 @@ blk_fl::val parse_stmt(Lexer& lex, CodeBlob& code) { } } -CodeBlob* parse_func_body(Lexer& lex, FormalArgList arg_list, TypeExpr* ret_type) { +CodeBlob* parse_func_body(Lexer& lex, FormalArgList arg_list, TypeExpr* ret_type, bool marked_as_pure) { lex.expect('{'); CodeBlob* blob = new CodeBlob{ret_type}; - if (pragma_allow_post_modification.enabled()) { - blob->flags |= CodeBlob::_AllowPostModification; - } - if (pragma_compute_asm_ltr.enabled()) { - blob->flags |= CodeBlob::_ComputeAsmLtr; + if (marked_as_pure) { + blob->flags |= CodeBlob::_ForbidImpure; } blob->import_params(std::move(arg_list)); blk_fl::val res = blk_fl::init; @@ -1235,7 +1355,7 @@ CodeBlob* parse_func_body(Lexer& lex, FormalArgList arg_list, TypeExpr* ret_type } SymValAsmFunc* parse_asm_func_body(Lexer& lex, TypeExpr* func_type, const FormalArgList& arg_list, TypeExpr* ret_type, - bool impure = false) { + bool marked_as_pure) { auto loc = lex.cur().loc; lex.expect(_Asm); int cnt = (int)arg_list.size(); @@ -1339,14 +1459,14 @@ SymValAsmFunc* parse_asm_func_body(Lexer& lex, TypeExpr* func_type, const Formal for (const AsmOp& asm_op : asm_ops) { crc_s += asm_op.op; } - crc_s.push_back(impure); + crc_s.push_back(!marked_as_pure); for (const int& x : arg_order) { crc_s += std::string((const char*) (&x), (const char*) (&x + 1)); } for (const int& x : ret_order) { crc_s += std::string((const char*) (&x), (const char*) (&x + 1)); } - auto res = new SymValAsmFunc{func_type, asm_ops, impure}; + auto res = new SymValAsmFunc{func_type, std::move(asm_ops), marked_as_pure}; res->arg_order = std::move(arg_order); res->ret_order = std::move(ret_order); res->crc = td::crc64(crc_s); @@ -1420,12 +1540,90 @@ TypeExpr* compute_type_closure(TypeExpr* expr, const std::vector& typ return expr; } +// if a function looks like `T f(...args) { return anotherF(...args); }`, +// set a bit to flags +// then, all calls to `f(...)` will be effectively replaced with `anotherF(...)` +void detect_if_function_just_wraps_another(SymValCodeFunc* v_current, const td::RefInt256 &method_id) { + const std::string& function_name = v_current->code->name; + + // in "AST" representation, the first is Op::_Import (input arguments, even if none) + const auto& op_import = v_current->code->ops; + tolk_assert(op_import && op_import->cl == Op::_Import); + + // then Op::_Call (anotherF) + const Op* op_call = op_import->next.get(); + if (!op_call || op_call->cl != Op::_Call) + return; + tolk_assert(op_call->left.size() == 1); + + const auto& op_return = op_call->next; + if (!op_return || op_return->cl != Op::_Return || op_return->left.size() != 1) + return; + + bool indices_expected = static_cast(op_import->left.size()) == op_call->left[0] && op_call->left[0] == op_return->left[0]; + if (!indices_expected) + return; + + const SymDef* f_called = op_call->fun_ref; + const SymValFunc* v_called = dynamic_cast(f_called->value); + if (!v_called) + return; + + // `return` must use all arguments, e.g. `return (_0,_2,_1)`, not `return (_0,_1,_1)` + int args_used_mask = 0; + for (var_idx_t arg_idx : op_call->right) { + args_used_mask |= 1 << arg_idx; + } + if (args_used_mask != (1 << op_call->right.size()) - 1) + return; + + // detect getters (having method_id), they should not be treated as wrappers + // v_current->method_id will be assigned later; todo refactor function parsing completely, it's weird + // moreover, `recv_external()` and others are also exported, but FunC is unaware of method_id + // (it's assigned by Fift later) + // so, for now, just handle "special" function names, the same as in Asm.fif + if (!method_id.is_null()) + return; + if (function_name == "main" || function_name == "recv_internal" || function_name == "recv_external" || + function_name == "run_ticktock" || function_name == "split_prepare" || function_name == "split_install") + return; + + // all types must be strictly defined (on mismatch, a compilation error will be triggered anyway) + if (v_called->sym_type->has_unknown_inside() || v_current->sym_type->has_unknown_inside()) + return; + // avoid situations like `f(int a, (int,int) b)`, inlining will be cumbersome + if (v_current->get_arg_type()->get_width() != static_cast(op_call->right.size())) + return; + // 'return true;' (false, nil) are (surprisingly) also function calls, with auto_apply=true + if (v_called->auto_apply) + return; + // if an original is marked `pure`, and this one doesn't, it's okay; just check for inline_ref storage + if (v_current->is_inline_ref()) + return; + + // ok, f_current is a wrapper + v_current->flags |= SymValFunc::flagWrapsAnotherF; + if (verbosity >= 2) { + std::cerr << function_name << " -> " << f_called->name() << std::endl; + } +} + +static td::RefInt256 calculate_method_id_by_func_name(const std::string &func_name) { + unsigned int crc = td::crc16(func_name); + return td::make_refint((crc & 0xffff) | 0x10000); +} + +// todo rewrite function declaration parsing completely, it's weird void parse_func_def(Lexer& lex) { SrcLocation loc{lex.cur().loc}; open_scope(lex); std::vector type_vars; + bool is_get_method = false; if (lex.tp() == _Forall) { type_vars = parse_type_var_list(lex); + } else if (lex.tp() == _Get) { + is_get_method = true; + lex.next(); } auto ret_type = parse_type(lex); if (lex.tp() != _Ident) { @@ -1434,47 +1632,80 @@ void parse_func_def(Lexer& lex) { Lexem func_name = lex.cur(); lex.next(); FormalArgList arg_list = parse_formal_args(lex); - bool impure = (lex.tp() == _Impure); - if (impure) { + bool marked_as_pure = false; + if (lex.tp() == _Impure) { + static bool warning_shown = false; + if (!warning_shown) { + lex.cur().loc.show_warning("`impure` specifier is deprecated. All functions are impure by default, use `pure` to mark a function as pure"); + warning_shown = true; + } + lex.next(); + } else if (lex.tp() == _Pure) { + marked_as_pure = true; lex.next(); } - int f = 0; - if (lex.tp() == _Inline || lex.tp() == _InlineRef) { - f = (lex.tp() == _Inline) ? 1 : 2; + int flags_inline = 0; + if (lex.tp() == _Inline) { + flags_inline = SymValFunc::flagInline; + lex.next(); + } else if (lex.tp() == _InlineRef) { + flags_inline = SymValFunc::flagInlineRef; lex.next(); } td::RefInt256 method_id; - std::string method_name; if (lex.tp() == _MethodId) { + if (is_get_method) { + lex.cur().error("both `get` and `method_id` are not allowed"); + } lex.next(); - if (lex.tp() == '(') { + if (lex.tp() == '(') { // method_id(N) lex.expect('('); - if (lex.tp() == Lexem::String) { - method_name = lex.cur().str; - } else if (lex.tp() == Lexem::Number) { - method_name = lex.cur().str; - method_id = td::string_to_int256(method_name); - if (method_id.is_null()) { - lex.cur().error_at("invalid integer constant `", "`"); - } - } else { - throw ParseError{lex.cur().loc, "integer or string method identifier expected"}; + method_id = td::string_to_int256(lex.cur().str); + lex.expect(Lexem::Number); + if (method_id.is_null()) { + lex.cur().error_at("invalid integer constant `", "`"); } - lex.next(); lex.expect(')'); } else { - method_name = func_name.str; - } - if (method_id.is_null()) { - unsigned crc = td::crc16(method_name); - method_id = td::make_refint((crc & 0xffff) | 0x10000); + static bool warning_shown = false; + if (!warning_shown) { + lex.cur().loc.show_warning("`method_id` specifier is deprecated, use `get` keyword.\nExample: `get int seqno() { ... }`"); + warning_shown = true; + } + method_id = calculate_method_id_by_func_name(func_name.str); } } - if (lex.tp() != ';' && lex.tp() != '{' && lex.tp() != _Asm) { - lex.expect('{', "function body block expected"); + if (is_get_method) { + tolk_assert(method_id.is_null()); + method_id = calculate_method_id_by_func_name(func_name.str); + for (const SymDef* other : glob_get_methods) { + if (!td::cmp(dynamic_cast(other->value)->method_id, method_id)) { + lex.cur().error(PSTRING() << "GET methods hash collision: `" << other->name() << "` and `" + func_name.str + "` produce the same hash. Consider renaming one of these functions."); + } + } } TypeExpr* func_type = TypeExpr::new_map(extract_total_arg_type(arg_list), ret_type); func_type = compute_type_closure(func_type, type_vars); + if (lex.tp() == _Builtin) { + const SymDef* builtin_func = lookup_symbol(func_name.str); + const SymValFunc* func_val = builtin_func ? dynamic_cast(builtin_func->value) : nullptr; + if (!func_val || !func_val->is_builtin()) { + lex.cur().error("`builtin` used for non-builtin function"); + } +#ifdef TOLK_DEBUG + // in release, we don't need this check, since `builtin` is used only in stdlib.tolk, which is our responsibility + if (!func_val->sym_type->equals_to(func_type) || func_val->is_marked_as_pure() != marked_as_pure) { + lex.cur().error("declaration for `builtin` function doesn't match an actual one"); + } +#endif + lex.next(); + lex.expect(';'); + close_scope(lex); + return; + } + if (lex.tp() != ';' && lex.tp() != '{' && lex.tp() != _Asm) { + lex.expect('{', "function body block"); + } if (verbosity >= 1) { std::cerr << "function " << func_name.str << " : " << func_type << std::endl; } @@ -1495,7 +1726,7 @@ void parse_func_def(Lexer& lex) { } } if (lex.tp() == ';') { - make_new_glob_func(func_sym, func_type, impure); + make_new_glob_func(func_sym, func_type, marked_as_pure); lex.next(); } else if (lex.tp() == '{') { if (dynamic_cast(func_sym_val)) { @@ -1508,19 +1739,26 @@ void parse_func_def(Lexer& lex) { lex.cur().error("function `"s + func_name.str + "` has been already defined in an yet-unknown way"); } } else { - func_sym_code = make_new_glob_func(func_sym, func_type, impure); + func_sym_code = make_new_glob_func(func_sym, func_type, marked_as_pure); } if (func_sym_code->code) { lex.cur().error("redefinition of function `"s + func_name.str + "`"); } - CodeBlob* code = parse_func_body(lex, arg_list, ret_type); + if (marked_as_pure && ret_type->get_width() == 0) { + lex.cur().error("a pure function should return something, otherwise it will be optimized out anyway"); + } + CodeBlob* code = parse_func_body(lex, arg_list, ret_type, marked_as_pure); code->name = func_name.str; code->loc = loc; // code->print(std::cerr); // !!!DEBUG!!! func_sym_code->code = code; + detect_if_function_just_wraps_another(func_sym_code, method_id); } else { Lexem asm_lexem = lex.cur(); - SymValAsmFunc* asm_func = parse_asm_func_body(lex, func_type, arg_list, ret_type, impure); + SymValAsmFunc* asm_func = parse_asm_func_body(lex, func_type, arg_list, ret_type, marked_as_pure); +#ifdef TOLK_DEBUG + asm_func->name = func_name.str; +#endif if (func_sym_val) { if (dynamic_cast(func_sym_val)) { asm_lexem.error("function `"s + func_name.str + "` was already declared as an ordinary function"); @@ -1537,7 +1775,7 @@ void parse_func_def(Lexer& lex) { func_sym->value = asm_func; } if (method_id.not_null()) { - auto val = dynamic_cast(func_sym->value); + auto val = dynamic_cast(func_sym->value); if (!val) { lex.cur().error("cannot set method id for unknown function `"s + func_name.str + "`"); } @@ -1548,17 +1786,25 @@ void parse_func_def(Lexer& lex) { val->method_id->to_dec_string() + " to a different value " + method_id->to_dec_string()); } } - if (f) { - auto val = dynamic_cast(func_sym->value); + if (flags_inline) { + auto val = dynamic_cast(func_sym->value); if (!val) { lex.cur().error("cannot set unknown function `"s + func_name.str + "` as an inline"); } - if (!(val->flags & 3)) { - val->flags = (short)(val->flags | f); - } else if ((val->flags & 3) != f) { + if (!val->is_inline() && !val->is_inline_ref()) { + val->flags |= flags_inline; + } else if ((val->flags & (SymValFunc::flagInline | SymValFunc::flagInlineRef)) != flags_inline) { lex.cur().error("inline mode for `"s + func_name.str + "` changed with respect to a previous declaration"); } } + if (is_get_method) { + auto val = dynamic_cast(func_sym->value); + if (!val) { + lex.cur().error("cannot set unknown function `"s + func_name.str + "` as a get method"); + } + val->flags |= SymValFunc::flagGetMethod; + glob_get_methods.push_back(func_sym); + } if (verbosity >= 1) { std::cerr << "new type of function " << func_name.str << " : " << func_type << std::endl; } @@ -1697,6 +1943,8 @@ void parse_pragma(Lexer& lex) { pragma_allow_post_modification.enable(lex.cur().loc); } else if (pragma_name == pragma_compute_asm_ltr.name()) { pragma_compute_asm_ltr.enable(lex.cur().loc); + } else if (pragma_name == pragma_remove_unused_functions.name()) { + pragma_remove_unused_functions.enable(lex.cur().loc); } else { lex.cur().error(std::string{"unknown pragma `"} + pragma_name + "`"); } @@ -1728,7 +1976,12 @@ void parse_include(Lexer& lex, const FileDescr* fdescr) { bool parse_source(std::istream* is, FileDescr* fdescr) { SourceReader reader{is, fdescr}; - Lexer lex{reader, true, ";,()[] ~."}; + Lexer lex{reader, ";,()[] ~."}; + // previously, FunC had lisp-style comments, + // but Tolk supports traditional (slash) comments alongside (lisp-style will be deleted soon) + lex.set_comment_tokens(";;", "{-", "-}"); + lex.set_comment2_tokens("//", "/*", "*/"); + lex.start_parsing(); while (lex.tp() != _Eof) { if (lex.tp() == _PragmaHashtag) { parse_pragma(lex); diff --git a/tolk/symtable.cpp b/tolk/symtable.cpp index ea2a1f916..ee6d6aca7 100644 --- a/tolk/symtable.cpp +++ b/tolk/symtable.cpp @@ -149,7 +149,11 @@ SymDef* define_global_symbol(sym_idx_t name_idx, bool force_new, const SrcLocati if (found) { return force_new && found->value ? nullptr : found; } - return global_sym_def[name_idx] = new SymDef(0, name_idx, loc); + found = global_sym_def[name_idx] = new SymDef(0, name_idx, loc); +#ifdef TOLK_DEBUG + found->sym_name = found->name(); +#endif + return found; } SymDef* define_symbol(sym_idx_t name_idx, bool force_new, const SrcLocation& loc) { @@ -173,6 +177,10 @@ SymDef* define_symbol(sym_idx_t name_idx, bool force_new, const SrcLocation& loc } found = sym_def[name_idx] = new SymDef(scope_level, name_idx, loc); symbol_stack.push_back(std::make_pair(scope_level, SymDef{0, name_idx})); +#ifdef TOLK_DEBUG + found->sym_name = found->name(); + symbol_stack.back().second.sym_name = found->name(); +#endif return found; } diff --git a/tolk/symtable.h b/tolk/symtable.h index c0a0912a8..68a4a1dad 100644 --- a/tolk/symtable.h +++ b/tolk/symtable.h @@ -148,6 +148,9 @@ struct SymDef { sym_idx_t sym_idx; SymValBase* value; SrcLocation loc; +#ifdef TOLK_DEBUG + std::string sym_name; +#endif SymDef(int lvl, sym_idx_t idx, const SrcLocation& _loc = {}, SymValBase* val = 0) : level(lvl), sym_idx(idx), value(val), loc(_loc) { } diff --git a/tolk/tolk-wasm.cpp b/tolk/tolk-wasm.cpp index a7ca37b42..6ffc798e8 100644 --- a/tolk/tolk-wasm.cpp +++ b/tolk/tolk-wasm.cpp @@ -31,6 +31,7 @@ #include "td/utils/Status.h" #include #include +#include "vm/boc.h" td::Result compile_internal(char *config_json) { TRY_RESULT(input_json, td::json_decode(td::MutableSlice(config_json))) diff --git a/tolk/tolk.cpp b/tolk/tolk.cpp index eb15155a5..1b8a17a1e 100644 --- a/tolk/tolk.cpp +++ b/tolk/tolk.cpp @@ -38,9 +38,74 @@ bool stack_layout_comments, op_rewrite_comments, program_envelope, asm_preamble; bool interactive = false; GlobalPragma pragma_allow_post_modification{"allow-post-modification"}; GlobalPragma pragma_compute_asm_ltr{"compute-asm-ltr"}; +GlobalPragma pragma_remove_unused_functions{"remove-unused-functions"}; std::string generated_from, boc_output_filename; ReadCallback::Callback read_callback; +// returns argument type of a function +// note, that when a function has multiple arguments, its arg type is a tensor (no arguments — an empty tensor) +// in other words, `f(int a, int b)` and `f((int,int) ab)` is the same when we speak about types +const TypeExpr *SymValFunc::get_arg_type() const { + if (!sym_type) + return nullptr; + + tolk_assert(sym_type->constr == TypeExpr::te_Map || sym_type->constr == TypeExpr::te_ForAll); + const TypeExpr *te_map = sym_type->constr == TypeExpr::te_ForAll ? sym_type->args[0] : sym_type; + const TypeExpr *arg_type = te_map->args[0]; + + while (arg_type->constr == TypeExpr::te_Indirect) { + arg_type = arg_type->args[0]; + } + return arg_type; +} + + +bool SymValCodeFunc::does_need_codegen() const { + // when a function is declared, but not referenced from code in any way, don't generate its body + if (!is_really_used && pragma_remove_unused_functions.enabled()) { + return false; + } + // when a function is referenced like `var a = some_fn;` (or in some other non-call way), its continuation should exist + if (flags & flagUsedAsNonCall) { + return true; + } + // when a function f() is just `return anotherF(...args)`, it doesn't need to be codegenerated at all, + // since all its usages are inlined + return !is_just_wrapper_for_another_f(); + // in the future, we may want to implement a true AST inlining for `inline` functions also +} + +void GlobalPragma::enable(SrcLocation loc) { + if (deprecated_from_v_) { + loc.show_warning(PSTRING() << "#pragma " << name_ << + " is deprecated since Tolk v" << deprecated_from_v_ << + ". Please, remove this line from your code."); + return; + } + + enabled_ = true; + locs_.push_back(std::move(loc)); +} + +void GlobalPragma::check_enable_in_libs() { + if (locs_.empty()) { + return; + } + for (const SrcLocation& loc : locs_) { + if (loc.fdescr->is_main) { + return; + } + } + locs_[0].show_warning(PSTRING() << "#pragma " << name_ + << " is enabled in included libraries, it may change the behavior of your code. " + << "Add this #pragma to the main source file to suppress this warning."); +} + +void GlobalPragma::always_on_and_deprecated(const char *deprecated_from_v) { + deprecated_from_v_ = deprecated_from_v; + enabled_ = true; +} + td::Result fs_read_callback(ReadCallback::Kind kind, const char* query) { switch (kind) { case ReadCallback::Kind::ReadFile: { @@ -62,6 +127,55 @@ td::Result fs_read_callback(ReadCallback::Kind kind, const char* qu } } +void mark_function_used_dfs(const std::unique_ptr& op); + +void mark_function_used(SymValCodeFunc* func_val) { + if (!func_val->code || func_val->is_really_used) { // already handled + return; + } + + func_val->is_really_used = true; + mark_function_used_dfs(func_val->code->ops); +} + +void mark_global_var_used(SymValGlobVar* glob_val) { + glob_val->is_really_used = true; +} + +void mark_function_used_dfs(const std::unique_ptr& op) { + if (!op) { + return; + } + // op->fun_ref, despite its name, may actually ref global var + // note, that for non-calls, e.g. `var a = some_fn` (Op::_Let), some_fn is Op::_GlobVar + // (in other words, fun_ref exists not only for direct Op::_Call, but for non-call references also) + if (op->fun_ref) { + if (auto* func_val = dynamic_cast(op->fun_ref->value)) { + mark_function_used(func_val); + } else if (auto* glob_val = dynamic_cast(op->fun_ref->value)) { + mark_global_var_used(glob_val); + } else if (auto* asm_val = dynamic_cast(op->fun_ref->value)) { + } else { + tolk_assert(false); + } + } + mark_function_used_dfs(op->next); + mark_function_used_dfs(op->block0); + mark_function_used_dfs(op->block1); +} + +void mark_used_symbols() { + for (SymDef* func_sym : glob_func) { + auto* func_val = dynamic_cast(func_sym->value); + std::string name = symbols.get_name(func_sym->sym_idx); + if (func_val->method_id.not_null() || + name == "main" || name == "recv_internal" || name == "recv_external" || + name == "run_ticktock" || name == "split_prepare" || name == "split_install") { + mark_function_used(func_val); + } + } +} + /* * * OUTPUT CODE GENERATOR @@ -76,8 +190,7 @@ void generate_output_func(SymDef* func_sym, std::ostream &outs, std::ostream &er errs << "\n\n=========================\nfunction " << name << " : " << func_val->get_type() << std::endl; } if (!func_val->code) { - errs << "( function `" << name << "` undefined )\n"; - throw ParseError(func_sym->loc, name); + throw ParseError(func_sym->loc, "function `" + name + "` is just declared, not implemented"); } else { CodeBlob& code = *(func_val->code); if (verbosity >= 3) { @@ -122,12 +235,10 @@ void generate_output_func(SymDef* func_sym, std::ostream &outs, std::ostream &er if (verbosity >= 2) { errs << "\n---------- resulting code for " << name << " -------------\n"; } - bool inline_func = (func_val->flags & 1); - bool inline_ref = (func_val->flags & 2); const char* modifier = ""; - if (inline_func) { + if (func_val->is_inline()) { modifier = "INLINE"; - } else if (inline_ref) { + } else if (func_val->is_inline_ref()) { modifier = "REF"; } outs << std::string(indent * 2, ' ') << name << " PROC" << modifier << ":<{\n"; @@ -138,12 +249,10 @@ void generate_output_func(SymDef* func_sym, std::ostream &outs, std::ostream &er if (opt_level < 2) { mode |= Stack::_DisableOpt; } - auto fv = dynamic_cast(func_sym->value); - // Flags: 1 - inline, 2 - inline_ref - if (fv && (fv->flags & 1) && code.ops->noreturn()) { + if (func_val->is_inline() && code.ops->noreturn()) { mode |= Stack::_InlineFunc; } - if (fv && (fv->flags & 3)) { + if (func_val->is_inline() || func_val->is_inline_ref()) { mode |= Stack::_InlineAny; } code.generate_code(outs, mode, indent + 1); @@ -162,9 +271,17 @@ int generate_output(std::ostream &outs, std::ostream &errs) { if (program_envelope) { outs << "PROGRAM{\n"; } + mark_used_symbols(); for (SymDef* func_sym : glob_func) { SymValCodeFunc* func_val = dynamic_cast(func_sym->value); tolk_assert(func_val); + if (!func_val->does_need_codegen()) { + if (verbosity >= 2) { + errs << func_sym->name() << ": code not generated, function does not need codegen\n"; + } + continue; + } + std::string name = symbols.get_name(func_sym->sym_idx); outs << std::string(indent * 2, ' '); if (func_val->method_id.is_null()) { @@ -174,12 +291,23 @@ int generate_output(std::ostream &outs, std::ostream &errs) { } } for (SymDef* gvar_sym : glob_vars) { - tolk_assert(dynamic_cast(gvar_sym->value)); + auto* glob_val = dynamic_cast(gvar_sym->value); + tolk_assert(glob_val); + if (!glob_val->is_really_used && pragma_remove_unused_functions.enabled()) { + if (verbosity >= 2) { + errs << gvar_sym->name() << ": variable not generated, it's unused\n"; + } + continue; + } std::string name = symbols.get_name(gvar_sym->sym_idx); outs << std::string(indent * 2, ' ') << "DECLGLOBVAR " << name << "\n"; } int errors = 0; for (SymDef* func_sym : glob_func) { + SymValCodeFunc* func_val = dynamic_cast(func_sym->value); + if (!func_val->does_need_codegen()) { + continue; + } try { generate_output_func(func_sym, outs, errs); } catch (Error& err) { @@ -217,6 +345,8 @@ int tolk_proceed(const std::vector &sources, std::ostream &outs, st define_keywords(); define_builtins(); + pragma_allow_post_modification.always_on_and_deprecated("0.5.0"); + pragma_compute_asm_ltr.always_on_and_deprecated("0.5.0"); int ok = 0, proc = 0; try { @@ -235,8 +365,7 @@ int tolk_proceed(const std::vector &sources, std::ostream &outs, st if (!proc) { throw Fatal{"no source files, no output"}; } - pragma_allow_post_modification.check_enable_in_libs(); - pragma_compute_asm_ltr.check_enable_in_libs(); + pragma_remove_unused_functions.check_enable_in_libs(); return generate_output(outs, errs); } catch (Fatal& fatal) { errs << "fatal: " << fatal << std::endl; diff --git a/tolk/tolk.h b/tolk/tolk.h index 664410992..9086620bc 100644 --- a/tolk/tolk.h +++ b/tolk/tolk.h @@ -106,12 +106,15 @@ enum Keyword { _Forall, _Asm, _Impure, + _Pure, _Global, _Extern, _Inline, _InlineRef, + _Builtin, _AutoApply, _MethodId, + _Get, _Operator, _Infix, _Infixl, @@ -147,8 +150,8 @@ class IdSc { */ struct TypeExpr { - enum te_type { te_Unknown, te_Var, te_Indirect, te_Atomic, te_Tensor, te_Tuple, te_Map, te_Type, te_ForAll } constr; - enum { + enum te_type { te_Unknown, te_Var, te_Indirect, te_Atomic, te_Tensor, te_Tuple, te_Map, te_ForAll } constr; + enum AtomicType { _Int = Keyword::_Int, _Cell = Keyword::_Cell, _Slice = Keyword::_Slice, @@ -214,9 +217,11 @@ struct TypeExpr { void compute_width(); bool recompute_width(); void show_width(std::ostream& os); - std::ostream& print(std::ostream& os, int prio = 0); + std::ostream& print(std::ostream& os, int prio = 0) const; void replace_with(TypeExpr* te2); int extract_components(std::vector& comp_list); + bool equals_to(const TypeExpr* rhs) const; + bool has_unknown_inside() const; static int holes, type_vars; static TypeExpr* new_hole() { return new TypeExpr{te_Unknown, ++holes}; @@ -528,7 +533,7 @@ class ListIterator { struct Stack; struct Op { - enum { + enum OpKind { _Undef, _Nop, _Call, @@ -547,13 +552,13 @@ struct Op { _Repeat, _Again, _TryCatch, - _SliceConst + _SliceConst, }; - int cl; - enum { _Disabled = 1, _Reachable = 2, _NoReturn = 4, _ImpureR = 8, _ImpureW = 16, _Impure = 24 }; + OpKind cl; + enum { _Disabled = 1, _NoReturn = 4, _Impure = 24 }; int flags; std::unique_ptr next; - SymDef* fun_ref; + SymDef* fun_ref; // despite its name, it may actually ref global var; applicable not only to Op::_Call, but for other kinds also SrcLocation where; VarDescrList var_info; std::vector args; @@ -561,41 +566,41 @@ struct Op { std::unique_ptr block0, block1; td::RefInt256 int_const; std::string str_const; - Op(const SrcLocation& _where = {}, int _cl = _Undef) : cl(_cl), flags(0), fun_ref(nullptr), where(_where) { + Op(const SrcLocation& _where = {}, OpKind _cl = _Undef) : cl(_cl), flags(0), fun_ref(nullptr), where(_where) { } - Op(const SrcLocation& _where, int _cl, const std::vector& _left) + Op(const SrcLocation& _where, OpKind _cl, const std::vector& _left) : cl(_cl), flags(0), fun_ref(nullptr), where(_where), left(_left) { } - Op(const SrcLocation& _where, int _cl, std::vector&& _left) + Op(const SrcLocation& _where, OpKind _cl, std::vector&& _left) : cl(_cl), flags(0), fun_ref(nullptr), where(_where), left(std::move(_left)) { } - Op(const SrcLocation& _where, int _cl, const std::vector& _left, td::RefInt256 _const) + Op(const SrcLocation& _where, OpKind _cl, const std::vector& _left, td::RefInt256 _const) : cl(_cl), flags(0), fun_ref(nullptr), where(_where), left(_left), int_const(_const) { } - Op(const SrcLocation& _where, int _cl, const std::vector& _left, std::string _const) + Op(const SrcLocation& _where, OpKind _cl, const std::vector& _left, std::string _const) : cl(_cl), flags(0), fun_ref(nullptr), where(_where), left(_left), str_const(_const) { } - Op(const SrcLocation& _where, int _cl, const std::vector& _left, const std::vector& _right, + Op(const SrcLocation& _where, OpKind _cl, const std::vector& _left, const std::vector& _right, SymDef* _fun = nullptr) : cl(_cl), flags(0), fun_ref(_fun), where(_where), left(_left), right(_right) { } - Op(const SrcLocation& _where, int _cl, std::vector&& _left, std::vector&& _right, + Op(const SrcLocation& _where, OpKind _cl, std::vector&& _left, std::vector&& _right, SymDef* _fun = nullptr) : cl(_cl), flags(0), fun_ref(_fun), where(_where), left(std::move(_left)), right(std::move(_right)) { } - bool disabled() const { - return flags & _Disabled; - } - bool enabled() const { - return !disabled(); - } - void disable() { - flags |= _Disabled; - } - bool unreachable() { - return !(flags & _Reachable); - } - void flags_set_clear(int set, int clear); + + bool disabled() const { return flags & _Disabled; } + void set_disabled() { flags |= _Disabled; } + void set_disabled(bool flag); + + bool noreturn() const { return flags & _NoReturn; } + bool set_noreturn() { flags |= _NoReturn; return true; } + bool set_noreturn(bool flag); + + bool impure() const { return flags & _Impure; } + void set_impure(const CodeBlob &code); + void set_impure(const CodeBlob &code, bool flag); + void show(std::ostream& os, const std::vector& vars, std::string pfx = "", int mode = 0) const; void show_var_list(std::ostream& os, const std::vector& idx_list, const std::vector& vars) const; void show_var_list(std::ostream& os, const std::vector& list, const std::vector& vars) const; @@ -611,17 +616,10 @@ struct Op { bool set_var_info_except(VarDescrList&& new_var_info, const std::vector& var_list); void prepare_args(VarDescrList values); VarDescrList fwd_analyze(VarDescrList values); - bool set_noreturn(bool nr); bool mark_noreturn(); - bool noreturn() const { - return flags & _NoReturn; - } bool is_empty() const { return cl == _Nop && !next; } - bool is_pure() const { - return !(flags & _Impure); - } bool generate_code_step(Stack& stack); void generate_code_all(Stack& stack); Op& last() { @@ -682,7 +680,7 @@ typedef std::vector FormalArgList; struct AsmOpList; struct CodeBlob { - enum { _AllowPostModification = 1, _ComputeAsmLtr = 2 }; + enum { _ForbidImpure = 4 }; int var_cnt, in_var_cnt, op_cnt; TypeExpr* ret_type; std::string name; @@ -726,7 +724,6 @@ struct CodeBlob { pop_cur(); } void simplify_var_types(); - void flags_set_clear(int set, int clear); void prune_unreachable_code(); void fwd_analyze(); void mark_noreturn(); @@ -748,48 +745,75 @@ struct CodeBlob { struct SymVal : SymValBase { TypeExpr* sym_type; - td::RefInt256 method_id; - bool impure; bool auto_apply{false}; - short flags; // +1 = inline, +2 = inline_ref - SymVal(int _type, int _idx, TypeExpr* _stype = nullptr, bool _impure = false) - : SymValBase(_type, _idx), sym_type(_stype), impure(_impure), flags(0) { + SymVal(int _type, int _idx, TypeExpr* _stype = nullptr) + : SymValBase(_type, _idx), sym_type(_stype) { } ~SymVal() override = default; TypeExpr* get_type() const { return sym_type; } - virtual const std::vector* get_arg_order() const { - return nullptr; - } - virtual const std::vector* get_ret_order() const { - return nullptr; - } }; struct SymValFunc : SymVal { + enum SymValFlag { + flagInline = 1, // function marked `inline` + flagInlineRef = 2, // function marked `inline_ref` + flagWrapsAnotherF = 4, // (T) thisF(...args) { return anotherF(...args); } (calls to thisF will be replaced) + flagUsedAsNonCall = 8, // used not only as `f()`, but as a 1-st class function (assigned to var, pushed to tuple, etc.) + flagMarkedAsPure = 16, // declared as `pure`, can't call impure and access globals, unused invocations are optimized out + flagBuiltinFunction = 32, // was created via `define_builtin_func()`, not from source code + flagGetMethod = 64, // was declared via `get T func()`, method_id is auto-assigned + }; + + td::RefInt256 method_id; // todo why int256? it's small + int flags{0}; std::vector arg_order, ret_order; +#ifdef TOLK_DEBUG + std::string name; // seeing function name in debugger makes it much easier to delve into Tolk sources +#endif ~SymValFunc() override = default; - SymValFunc(int val, TypeExpr* _ft, bool _impure = false) : SymVal(_Func, val, _ft, _impure) { - } - SymValFunc(int val, TypeExpr* _ft, std::initializer_list _arg_order, std::initializer_list _ret_order = {}, - bool _impure = false) - : SymVal(_Func, val, _ft, _impure), arg_order(_arg_order), ret_order(_ret_order) { + SymValFunc(int val, TypeExpr* _ft, bool marked_as_pure) + : SymVal(_Func, val, _ft), flags(marked_as_pure ? flagMarkedAsPure : 0) {} + SymValFunc(int val, TypeExpr* _ft, std::initializer_list _arg_order, std::initializer_list _ret_order, bool marked_as_pure) + : SymVal(_Func, val, _ft), flags(marked_as_pure ? flagMarkedAsPure : 0), arg_order(_arg_order), ret_order(_ret_order) { } - const std::vector* get_arg_order() const override { + const std::vector* get_arg_order() const { return arg_order.empty() ? nullptr : &arg_order; } - const std::vector* get_ret_order() const override { + const std::vector* get_ret_order() const { return ret_order.empty() ? nullptr : &ret_order; } + const TypeExpr* get_arg_type() const; + + bool is_inline() const { + return flags & flagInline; + } + bool is_inline_ref() const { + return flags & flagInlineRef; + } + bool is_just_wrapper_for_another_f() const { + return flags & flagWrapsAnotherF; + } + bool is_marked_as_pure() const { + return flags & flagMarkedAsPure; + } + bool is_builtin() const { + return flags & flagBuiltinFunction; + } + bool is_get_method() const { + return flags & flagGetMethod; + } }; struct SymValCodeFunc : SymValFunc { CodeBlob* code; + bool is_really_used{false}; // calculated via dfs; unused functions are not codegenerated ~SymValCodeFunc() override = default; - SymValCodeFunc(int val, TypeExpr* _ft, bool _impure = false) : SymValFunc(val, _ft, _impure), code(nullptr) { + SymValCodeFunc(int val, TypeExpr* _ft, bool marked_as_pure) : SymValFunc(val, _ft, marked_as_pure), code(nullptr) { } + bool does_need_codegen() const; }; struct SymValType : SymValBase { @@ -805,6 +829,10 @@ struct SymValType : SymValBase { struct SymValGlobVar : SymValBase { TypeExpr* sym_type; int out_idx{0}; + bool is_really_used{false}; // calculated via dfs from used functions; unused globals are not codegenerated +#ifdef TOLK_DEBUG + std::string name; // seeing variable name in debugger makes it much easier to delve into Tolk sources +#endif SymValGlobVar(int val, TypeExpr* gvtype, int oidx = 0) : SymValBase(_GlobVar, val), sym_type(gvtype), out_idx(oidx) { } @@ -839,7 +867,7 @@ struct SymValConst : SymValBase { }; extern int glob_func_cnt, undef_func_cnt, glob_var_cnt; -extern std::vector glob_func, glob_vars; +extern std::vector glob_func, glob_vars, glob_get_methods; extern std::set prohibited_var_names; /* @@ -891,7 +919,7 @@ extern std::stack inclusion_locations; */ struct Expr { - enum { + enum ExprCls { _None, _Apply, _VarApply, @@ -900,18 +928,18 @@ struct Expr { _Tensor, _Const, _Var, - _Glob, + _GlobFunc, _GlobVar, _Letop, _LetFirst, _Hole, _Type, _CondExpr, - _SliceConst + _SliceConst, }; - int cls; + ExprCls cls; int val{0}; - enum { _IsType = 1, _IsRvalue = 2, _IsLvalue = 4, _IsHole = 8, _IsNewVar = 16, _IsImpure = 32 }; + enum { _IsType = 1, _IsRvalue = 2, _IsLvalue = 4, _IsImpure = 32, _IsInsideParenthesis = 64 }; int flags{0}; SrcLocation here; td::RefInt256 intval; @@ -919,19 +947,19 @@ struct Expr { SymDef* sym{nullptr}; TypeExpr* e_type{nullptr}; std::vector args; - Expr(int c = _None) : cls(c) { + explicit Expr(ExprCls c = _None) : cls(c) { } - Expr(int c, const SrcLocation& loc) : cls(c), here(loc) { + Expr(ExprCls c, const SrcLocation& loc) : cls(c), here(loc) { } - Expr(int c, std::vector _args) : cls(c), args(std::move(_args)) { + Expr(ExprCls c, std::vector _args) : cls(c), args(std::move(_args)) { } - Expr(int c, std::initializer_list _arglist) : cls(c), args(std::move(_arglist)) { + Expr(ExprCls c, std::initializer_list _arglist) : cls(c), args(std::move(_arglist)) { } - Expr(int c, SymDef* _sym, std::initializer_list _arglist) : cls(c), sym(_sym), args(std::move(_arglist)) { + Expr(ExprCls c, SymDef* _sym, std::initializer_list _arglist) : cls(c), sym(_sym), args(std::move(_arglist)) { } - Expr(int c, SymDef* _sym, std::vector _arglist) : cls(c), sym(_sym), args(std::move(_arglist)) { + Expr(ExprCls c, SymDef* _sym, std::vector _arglist) : cls(c), sym(_sym), args(std::move(_arglist)) { } - Expr(int c, sym_idx_t name_idx, std::initializer_list _arglist); + Expr(ExprCls c, sym_idx_t name_idx, std::initializer_list _arglist); ~Expr() { for (auto& arg_ptr : args) { delete arg_ptr; @@ -953,6 +981,9 @@ struct Expr { bool is_type() const { return flags & _IsType; } + bool is_inside_parenthesis() const { + return flags & _IsInsideParenthesis; + } bool is_type_apply() const { return cls == _TypeApply; } @@ -972,7 +1003,6 @@ struct Expr { int define_new_vars(CodeBlob& code); int predefine_vars(); std::vector pre_compile(CodeBlob& code, std::vector>* lval_globs = nullptr) const; - static std::vector pre_compile_let(CodeBlob& code, Expr* lhs, Expr* rhs, const SrcLocation& here); var_idx_t new_tmp(CodeBlob& code) const; std::vector new_tmp_vect(CodeBlob& code) const { return {new_tmp(code)}; @@ -993,9 +1023,9 @@ using Const = td::RefInt256; struct AsmOp { enum Type { a_none, a_xchg, a_push, a_pop, a_const, a_custom, a_magic }; - int t{a_none}; + Type t{a_none}; int indent{0}; - int a, b, c; + int a, b; bool gconst{false}; std::string op; td::RefInt256 origin; @@ -1005,26 +1035,22 @@ struct AsmOp { } }; AsmOp() = default; - AsmOp(int _t) : t(_t) { + AsmOp(Type _t) : t(_t) { } - AsmOp(int _t, std::string _op) : t(_t), op(std::move(_op)) { + AsmOp(Type _t, std::string _op) : t(_t), op(std::move(_op)) { } - AsmOp(int _t, int _a) : t(_t), a(_a) { + AsmOp(Type _t, int _a) : t(_t), a(_a) { } - AsmOp(int _t, int _a, std::string _op) : t(_t), a(_a), op(std::move(_op)) { + AsmOp(Type _t, int _a, std::string _op) : t(_t), a(_a), op(std::move(_op)) { } - AsmOp(int _t, int _a, int _b) : t(_t), a(_a), b(_b) { + AsmOp(Type _t, int _a, int _b) : t(_t), a(_a), b(_b) { } - AsmOp(int _t, int _a, int _b, std::string _op) : t(_t), a(_a), b(_b), op(std::move(_op)) { + AsmOp(Type _t, int _a, int _b, std::string _op) : t(_t), a(_a), b(_b), op(std::move(_op)) { compute_gconst(); } - AsmOp(int _t, int _a, int _b, std::string _op, td::RefInt256 x) : t(_t), a(_a), b(_b), op(std::move(_op)), origin(x) { + AsmOp(Type _t, int _a, int _b, std::string _op, td::RefInt256 x) : t(_t), a(_a), b(_b), op(std::move(_op)), origin(x) { compute_gconst(); } - AsmOp(int _t, int _a, int _b, int _c) : t(_t), a(_a), b(_b), c(_c) { - } - AsmOp(int _t, int _a, int _b, int _c, std::string _op) : t(_t), a(_a), b(_b), c(_c), op(std::move(_op)) { - } void out(std::ostream& os) const; void out_indent_nl(std::ostream& os, bool no_nl = false) const; std::string to_string() const; @@ -1680,8 +1706,8 @@ inline simple_compile_func_t make_simple_compile(AsmOp op) { return [op](std::vector& out, std::vector& in, const SrcLocation&) -> AsmOp { return op; }; } -inline compile_func_t make_ext_compile(std::vector ops) { - return [ops = std::move(ops)](AsmOpList & dest, std::vector & out, std::vector & in)->bool { +inline compile_func_t make_ext_compile(std::vector&& ops) { + return [ops = std::move(ops)](AsmOpList& dest, std::vector& out, std::vector& in)->bool { return dest.append(ops); }; } @@ -1696,25 +1722,22 @@ struct SymValAsmFunc : SymValFunc { compile_func_t ext_compile; td::uint64 crc; ~SymValAsmFunc() override = default; - SymValAsmFunc(TypeExpr* ft, const AsmOp& _macro, bool impure = false) - : SymValFunc(-1, ft, impure), simple_compile(make_simple_compile(_macro)) { + SymValAsmFunc(TypeExpr* ft, std::vector&& _macro, bool marked_as_pure) + : SymValFunc(-1, ft, marked_as_pure), ext_compile(make_ext_compile(std::move(_macro))) { } - SymValAsmFunc(TypeExpr* ft, std::vector _macro, bool impure = false) - : SymValFunc(-1, ft, impure), ext_compile(make_ext_compile(std::move(_macro))) { + SymValAsmFunc(TypeExpr* ft, simple_compile_func_t _compile, bool marked_as_pure) + : SymValFunc(-1, ft, marked_as_pure), simple_compile(std::move(_compile)) { } - SymValAsmFunc(TypeExpr* ft, simple_compile_func_t _compile, bool impure = false) - : SymValFunc(-1, ft, impure), simple_compile(std::move(_compile)) { - } - SymValAsmFunc(TypeExpr* ft, compile_func_t _compile, bool impure = false) - : SymValFunc(-1, ft, impure), ext_compile(std::move(_compile)) { + SymValAsmFunc(TypeExpr* ft, compile_func_t _compile, bool marked_as_pure) + : SymValFunc(-1, ft, marked_as_pure), ext_compile(std::move(_compile)) { } SymValAsmFunc(TypeExpr* ft, simple_compile_func_t _compile, std::initializer_list arg_order, - std::initializer_list ret_order = {}, bool impure = false) - : SymValFunc(-1, ft, arg_order, ret_order, impure), simple_compile(std::move(_compile)) { + std::initializer_list ret_order = {}, bool marked_as_pure = false) + : SymValFunc(-1, ft, arg_order, ret_order, marked_as_pure), simple_compile(std::move(_compile)) { } SymValAsmFunc(TypeExpr* ft, compile_func_t _compile, std::initializer_list arg_order, - std::initializer_list ret_order = {}, bool impure = false) - : SymValFunc(-1, ft, arg_order, ret_order, impure), ext_compile(std::move(_compile)) { + std::initializer_list ret_order = {}, bool marked_as_pure = false) + : SymValFunc(-1, ft, arg_order, ret_order, marked_as_pure), ext_compile(std::move(_compile)) { } bool compile(AsmOpList& dest, std::vector& out, std::vector& in, const SrcLocation& where) const; }; @@ -1747,30 +1770,17 @@ class GlobalPragma { bool enabled() const { return enabled_; } - void enable(SrcLocation loc) { - enabled_ = true; - locs_.push_back(std::move(loc)); - } - void check_enable_in_libs() { - if (locs_.empty()) { - return; - } - for (const SrcLocation& loc : locs_) { - if (loc.fdescr->is_main) { - return; - } - } - locs_[0].show_warning(PSTRING() << "#pragma " << name_ - << " is enabled in included libraries, it may change the behavior of your code. " - << "Add this #pragma to the main source file to suppress this warning."); - } + void enable(SrcLocation loc); + void check_enable_in_libs(); + void always_on_and_deprecated(const char *deprecated_from_v); private: std::string name_; bool enabled_ = false; + const char *deprecated_from_v_ = nullptr; std::vector locs_; }; -extern GlobalPragma pragma_allow_post_modification, pragma_compute_asm_ltr; +extern GlobalPragma pragma_allow_post_modification, pragma_compute_asm_ltr, pragma_remove_unused_functions; /* * diff --git a/tolk/unify-types.cpp b/tolk/unify-types.cpp index 4e28dc834..848e454aa 100644 --- a/tolk/unify-types.cpp +++ b/tolk/unify-types.cpp @@ -113,6 +113,39 @@ int TypeExpr::extract_components(std::vector& comp_list) { return res; } +bool TypeExpr::equals_to(const TypeExpr *rhs) const { + const TypeExpr *l = this; + const TypeExpr *r = rhs; + while (l->constr == te_Indirect) + l = l->args[0]; + while (r->constr == te_Indirect) + r = r->args[0]; + + bool eq = l->constr == r->constr && l->value == r->value && + l->minw == r->minw && l->maxw == r->maxw && + l->was_forall_var == r->was_forall_var && + l->args.size() == r->args.size(); + if (!eq) + return false; + + for (int i = 0; i < static_cast(l->args.size()); ++i) { + if (!l->args[i]->equals_to(r->args[i])) + return false; + } + return true; +} + +bool TypeExpr::has_unknown_inside() const { + if (constr == te_Unknown) + return true; + + for (const TypeExpr* inner : args) { + if (inner->has_unknown_inside()) + return true; + } + return false; +} + TypeExpr* TypeExpr::new_map(TypeExpr* from, TypeExpr* to) { return new TypeExpr{te_Map, std::vector{from, to}}; } @@ -207,7 +240,7 @@ std::ostream& operator<<(std::ostream& os, TypeExpr* type_expr) { return type_expr->print(os); } -std::ostream& TypeExpr::print(std::ostream& os, int lex_level) { +std::ostream& TypeExpr::print(std::ostream& os, int lex_level) const { switch (constr) { case te_Unknown: return os << "??" << value; diff --git a/tonlib/tonlib/tonlib-cli.cpp b/tonlib/tonlib/tonlib-cli.cpp index 4567478eb..8fddedd40 100644 --- a/tonlib/tonlib/tonlib-cli.cpp +++ b/tonlib/tonlib/tonlib-cli.cpp @@ -386,7 +386,7 @@ class TonlibCli : public td::actor::Actor { td::TerminalIO::out() << "sendfile \tLoad a serialized message from and send it to server\n"; td::TerminalIO::out() << "setconfig|validateconfig [] [] [] - set or validate " "lite server config\n"; - td::TerminalIO::out() << "runmethod ...\tRuns GET method of account " + td::TerminalIO::out() << "runmethod ...\tRuns GET method of account " " with specified parameters\n"; td::TerminalIO::out() << "getstate \tget state of wallet with requested key\n"; td::TerminalIO::out() << "getstatebytransaction \tget state of wallet with requested key after transaction with local time and hash (base64url)\n"; From 0bcc0b3c12631a7153b24d3883bfb12266d15d48 Mon Sep 17 00:00:00 2001 From: tolk-vm Date: Thu, 31 Oct 2024 10:57:13 +0400 Subject: [PATCH 03/12] [Fift] Fix an issue of FunC/Tolk WASM which truncated long fif output As it turned out, PSTRING() created a buffer of 128K. If asm_code exceeded this buffer, it was truncated. I've just dropped PSTRING() from there in favor of std::string. --- crypto/fift/utils.cpp | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/crypto/fift/utils.cpp b/crypto/fift/utils.cpp index f37766a72..01cf0eb56 100644 --- a/crypto/fift/utils.cpp +++ b/crypto/fift/utils.cpp @@ -114,7 +114,7 @@ class MemoryFileLoader : public fift::FileLoader { std::map> files_; }; -td::Result create_source_lookup(std::string main, bool need_preamble = true, bool need_asm = true, +td::Result create_source_lookup(std::string&& main, bool need_preamble = true, bool need_asm = true, bool need_ton_util = true, bool need_lisp = true, bool need_w3_code = true, bool need_fift_ext = true, bool need_disasm = true, std::string dir = "") { @@ -189,7 +189,7 @@ td::Result run_fift(fift::SourceLookup source_lookup, std::o } // namespace td::Result mem_run_fift(std::string source, std::vector args, std::string fift_dir) { std::stringstream ss; - TRY_RESULT(source_lookup, create_source_lookup(source, true, true, true, true, true, true, true, fift_dir)); + TRY_RESULT(source_lookup, create_source_lookup(std::move(source), true, true, true, true, true, true, true, fift_dir)); TRY_RESULT_ASSIGN(source_lookup, run_fift(std::move(source_lookup), &ss, true, std::move(args))); FiftOutput res; res.source_lookup = std::move(source_lookup); @@ -207,16 +207,21 @@ td::Result mem_run_fift(SourceLookup source_lookup, std::vector create_mem_source_lookup(std::string main, std::string fift_dir, bool need_preamble, bool need_asm, bool need_ton_util, bool need_lisp, bool need_w3_code) { - return create_source_lookup(main, need_preamble, need_asm, need_ton_util, need_lisp, need_w3_code, false, false, + return create_source_lookup(std::move(main), need_preamble, need_asm, need_ton_util, need_lisp, need_w3_code, false, false, fift_dir); } td::Result> compile_asm(td::Slice asm_code, std::string fift_dir, bool is_raw) { std::stringstream ss; - TRY_RESULT(source_lookup, - create_source_lookup(PSTRING() << "\"Asm.fif\" include\n " << (is_raw ? "<{" : "") << asm_code << "\n" - << (is_raw ? "}>c" : "") << " boc>B \"res\" B>file", - true, true, true, false, false, false, false, fift_dir)); + std::string sb; + sb.reserve(asm_code.size() + 100); + sb.append("\"Asm.fif\" include\n "); + sb.append(is_raw ? "<{" : ""); + sb.append(asm_code.data(), asm_code.size()); + sb.append(is_raw ? "}>c" : ""); + sb.append(" boc>B \"res\" B>file"); + + TRY_RESULT(source_lookup, create_source_lookup(std::move(sb), true, true, true, false, false, false, false, fift_dir)); TRY_RESULT(res, run_fift(std::move(source_lookup), &ss)); TRY_RESULT(boc, res.read_file("res")); return vm::std_boc_deserialize(std::move(boc.data)); From f0e6470d0b1d253104d098bb9c506c05e00d509b Mon Sep 17 00:00:00 2001 From: tolk-vm Date: Thu, 31 Oct 2024 10:59:23 +0400 Subject: [PATCH 04/12] [Tolk] Rewrite lexer, spaces are not mandatory anymore A new lexer is noticeably faster and memory efficient (although splitting a file to tokens is negligible in a whole pipeline). But the purpose of rewriting lexer was not just to speed up, but to allow writing code without spaces: `2+2` is now 4, not a valid identifier as earlier. The variety of symbols allowed in identifier has greatly reduced and is now similar to other languages. SrcLocation became 8 bytes on stack everywhere. Command-line flags were also reworked: - the input for Tolk compiler is only a single file now, it's parsed, and parsing continues while new #include are resolved - flags like -A -P and so on are no more needed, actually --- crypto/fift/utils.cpp | 31 +- crypto/fift/utils.h | 12 +- crypto/funcfiftlib/funcfiftlib.cpp | 32 +- crypto/smartcont/mathlib.tolk | 6 +- tolk/CMakeLists.txt | 3 +- tolk/abscode.cpp | 28 +- tolk/analyzer.cpp | 6 +- tolk/builtins.cpp | 103 ++- tolk/gen-abscode.cpp | 54 +- tolk/keywords.cpp | 129 --- tolk/lexer.cpp | 841 +++++++++++++------- tolk/lexer.h | 281 +++++-- tolk/optimize.cpp | 2 +- tolk/parse-tolk.cpp | 1187 +++++++++++++--------------- tolk/platform-utils.h | 44 ++ tolk/src-file.cpp | 164 ++++ tolk/src-file.h | 120 +++ tolk/srcread.cpp | 228 ------ tolk/srcread.h | 162 ---- tolk/symtable.cpp | 66 +- tolk/symtable.h | 116 ++- tolk/tolk-main.cpp | 71 +- tolk/tolk-wasm.cpp | 77 +- tolk/tolk.cpp | 118 +-- tolk/tolk.h | 244 ++---- tolk/unify-types.cpp | 6 - 26 files changed, 2022 insertions(+), 2109 deletions(-) delete mode 100644 tolk/keywords.cpp create mode 100644 tolk/platform-utils.h create mode 100644 tolk/src-file.cpp create mode 100644 tolk/src-file.h delete mode 100644 tolk/srcread.cpp delete mode 100644 tolk/srcread.h diff --git a/crypto/fift/utils.cpp b/crypto/fift/utils.cpp index 01cf0eb56..6057b2dc0 100644 --- a/crypto/fift/utils.cpp +++ b/crypto/fift/utils.cpp @@ -211,20 +211,39 @@ td::Result create_mem_source_lookup(std::string main, std::s fift_dir); } -td::Result> compile_asm(td::Slice asm_code, std::string fift_dir, bool is_raw) { +td::Result> compile_asm(td::Slice asm_code) { std::stringstream ss; std::string sb; sb.reserve(asm_code.size() + 100); - sb.append("\"Asm.fif\" include\n "); - sb.append(is_raw ? "<{" : ""); + sb.append("\"Asm.fif\" include\n <{\n"); sb.append(asm_code.data(), asm_code.size()); - sb.append(is_raw ? "}>c" : ""); - sb.append(" boc>B \"res\" B>file"); + sb.append("\n}>c boc>B \"res\" B>file"); - TRY_RESULT(source_lookup, create_source_lookup(std::move(sb), true, true, true, false, false, false, false, fift_dir)); + TRY_RESULT(source_lookup, create_source_lookup(std::move(sb), true, true, true, false, false, false, false)); TRY_RESULT(res, run_fift(std::move(source_lookup), &ss)); TRY_RESULT(boc, res.read_file("res")); return vm::std_boc_deserialize(std::move(boc.data)); } +td::Result compile_asm_program(std::string&& program_code, const std::string& fift_dir) { + std::string main_fif; + main_fif.reserve(program_code.size() + 100); + main_fif.append(program_code.data(), program_code.size()); + main_fif.append(R"( dup hashB B>X $>B "hex" B>file)"); // write codeHashHex to a file + main_fif.append(R"( boc>B B>base64 $>B "boc" B>file)"); // write codeBoc64 to a file + + std::stringstream fift_output_stream; + TRY_RESULT(source_lookup, create_source_lookup(std::move(main_fif), true, true, false, false, false, false, false, fift_dir)); + TRY_RESULT(res, run_fift(std::move(source_lookup), &fift_output_stream)); + + TRY_RESULT(boc, res.read_file("boc")); + TRY_RESULT(hex, res.read_file("hex")); + + return CompiledProgramOutput{ + std::move(program_code), + std::move(boc.data), + std::move(hex.data), + }; +} + } // namespace fift diff --git a/crypto/fift/utils.h b/crypto/fift/utils.h index dd434fe01..fab92c542 100644 --- a/crypto/fift/utils.h +++ b/crypto/fift/utils.h @@ -26,11 +26,21 @@ struct FiftOutput { SourceLookup source_lookup; std::string output; }; + +// given a valid Fift code PROGRAM{ ... }END>c, compile_asm_program() returns this output +// now it's used primarily for wasm output (see tolk-js, for example) +struct CompiledProgramOutput { + std::string fiftCode; + std::string codeBoc64; + std::string codeHashHex; +}; + td::Result create_mem_source_lookup(std::string main, std::string fift_dir = "", bool need_preamble = true, bool need_asm = true, bool need_ton_util = true, bool need_lisp = true, bool need_w3_code = true); td::Result mem_run_fift(std::string source, std::vector args = {}, std::string fift_dir = ""); td::Result mem_run_fift(SourceLookup source_lookup, std::vector args); -td::Result> compile_asm(td::Slice asm_code, std::string fift_dir = "", bool is_raw = true); +td::Result> compile_asm(td::Slice asm_code); +td::Result compile_asm_program(std::string&& program_code, const std::string& fift_dir); } // namespace fift diff --git a/crypto/funcfiftlib/funcfiftlib.cpp b/crypto/funcfiftlib/funcfiftlib.cpp index 0bef9eac7..403c075dd 100644 --- a/crypto/funcfiftlib/funcfiftlib.cpp +++ b/crypto/funcfiftlib/funcfiftlib.cpp @@ -37,10 +37,10 @@ td::Result compile_internal(char *config_json) { TRY_RESULT(input_json, td::json_decode(td::MutableSlice(config_json))) - auto &obj = input_json.get_object(); + td::JsonObject& config = input_json.get_object(); - TRY_RESULT(opt_level, td::get_json_object_int_field(obj, "optLevel", false)); - TRY_RESULT(sources_obj, td::get_json_object_field(obj, "sources", td::JsonValue::Type::Array, false)); + TRY_RESULT(opt_level, td::get_json_object_int_field(config, "optLevel", false)); + TRY_RESULT(sources_obj, td::get_json_object_field(config, "sources", td::JsonValue::Type::Array, false)); auto &sources_arr = sources_obj.get_array(); @@ -52,29 +52,25 @@ td::Result compile_internal(char *config_json) { funC::opt_level = std::max(0, opt_level); funC::program_envelope = true; + funC::asm_preamble = true; funC::verbosity = 0; funC::indent = 1; std::ostringstream outs, errs; - auto compile_res = funC::func_proceed(sources, outs, errs); - - if (compile_res != 0) { - return td::Status::Error(std::string("Func compilation error: ") + errs.str()); + int funC_res = funC::func_proceed(sources, outs, errs); + if (funC_res != 0) { + return td::Status::Error("FunC compilation error: " + errs.str()); } - TRY_RESULT(code_cell, fift::compile_asm(outs.str(), "/fiftlib/", false)); - TRY_RESULT(boc, vm::std_boc_serialize(code_cell)); + TRY_RESULT(fift_res, fift::compile_asm_program(outs.str(), "/fiftlib/")); td::JsonBuilder result_json; - auto result_obj = result_json.enter_object(); - result_obj("status", "ok"); - result_obj("codeBoc", td::base64_encode(boc)); - result_obj("fiftCode", outs.str()); - result_obj("codeHashHex", code_cell->get_hash().to_hex()); - result_obj.leave(); - - outs.clear(); - errs.clear(); + auto obj = result_json.enter_object(); + obj("status", "ok"); + obj("fiftCode", std::move(fift_res.fiftCode)); + obj("codeBoc", std::move(fift_res.codeBoc64)); + obj("codeHashHex", std::move(fift_res.codeHashHex)); + obj.leave(); return result_json.string_builder().as_cslice().str(); } diff --git a/crypto/smartcont/mathlib.tolk b/crypto/smartcont/mathlib.tolk index 6a5b2d1b7..bb18f9212 100644 --- a/crypto/smartcont/mathlib.tolk +++ b/crypto/smartcont/mathlib.tolk @@ -572,9 +572,9 @@ int atanh_f261(int x, int n) inline_ref { s -= 1; } x += t; - int 2x = 2 * x; - int y = lshift256divr(2x, (x >> 1) - t); - ;; y = 2x - (mulrshiftr256(2x, y) ~>> 2); ;; this line could improve precision on very rare occasions + int `2x` = 2 * x; + int y = lshift256divr(`2x`, (x >> 1) - t); + ;; y = `2x` - (mulrshiftr256(2x, y) ~>> 2); ;; this line could improve precision on very rare occasions return (atanh_f258(y, 36), s); } diff --git a/tolk/CMakeLists.txt b/tolk/CMakeLists.txt index 8c890859d..820367043 100644 --- a/tolk/CMakeLists.txt +++ b/tolk/CMakeLists.txt @@ -1,10 +1,9 @@ cmake_minimum_required(VERSION 3.5 FATAL_ERROR) set(TOLK_SOURCE - srcread.cpp + src-file.cpp lexer.cpp symtable.cpp - keywords.cpp unify-types.cpp parse-tolk.cpp abscode.cpp diff --git a/tolk/abscode.cpp b/tolk/abscode.cpp index 5833c004a..8cf1f597f 100644 --- a/tolk/abscode.cpp +++ b/tolk/abscode.cpp @@ -24,29 +24,19 @@ namespace tolk { * */ -TmpVar::TmpVar(var_idx_t _idx, int _cls, TypeExpr* _type, SymDef* sym, const SrcLocation* loc) - : v_type(_type), idx(_idx), cls(_cls), coord(0) { +TmpVar::TmpVar(var_idx_t _idx, int _cls, TypeExpr* _type, SymDef* sym, SrcLocation loc) + : v_type(_type), idx(_idx), cls(_cls), coord(0), where(loc) { if (sym) { name = sym->sym_idx; sym->value->idx = _idx; } - if (loc) { - where = std::make_unique(*loc); - } if (!_type) { v_type = TypeExpr::new_hole(); } - if (cls == _Named) { - undefined = true; - } } -void TmpVar::set_location(const SrcLocation& loc) { - if (where) { - *where = loc; - } else { - where = std::make_unique(loc); - } +void TmpVar::set_location(SrcLocation loc) { + where = loc; } void TmpVar::dump(std::ostream& os) const { @@ -469,10 +459,10 @@ void CodeBlob::print(std::ostream& os, int flags) const { if ((flags & 8) != 0) { for (const auto& var : vars) { var.dump(os); - if (var.where && (flags & 1) != 0) { - var.where->show(os); + if (var.where.is_defined() && (flags & 1) != 0) { + var.where.show(os); os << " defined here:\n"; - var.where->show_context(os); + var.where.show_context(os); } } } @@ -483,7 +473,7 @@ void CodeBlob::print(std::ostream& os, int flags) const { os << "-------- END ---------\n\n"; } -var_idx_t CodeBlob::create_var(int cls, TypeExpr* var_type, SymDef* sym, const SrcLocation* location) { +var_idx_t CodeBlob::create_var(int cls, TypeExpr* var_type, SymDef* sym, SrcLocation location) { vars.emplace_back(var_cnt, cls, var_type, sym, location); if (sym) { sym->value->idx = var_cnt; @@ -501,7 +491,7 @@ bool CodeBlob::import_params(FormalArgList arg_list) { SymDef* arg_sym; SrcLocation arg_loc; std::tie(arg_type, arg_sym, arg_loc) = par; - list.push_back(create_var(arg_sym ? (TmpVar::_In | TmpVar::_Named) : TmpVar::_In, arg_type, arg_sym, &arg_loc)); + list.push_back(create_var(arg_sym ? (TmpVar::_In | TmpVar::_Named) : TmpVar::_In, arg_type, arg_sym, arg_loc)); } emplace_back(loc, Op::_Import, list); in_var_cnt = var_cnt; diff --git a/tolk/analyzer.cpp b/tolk/analyzer.cpp index ab55a2b64..e38ba1bb0 100644 --- a/tolk/analyzer.cpp +++ b/tolk/analyzer.cpp @@ -36,7 +36,7 @@ int CodeBlob::split_vars(bool strict) { for (int j = 0; j < var_cnt; j++) { TmpVar& var = vars[j]; if (strict && var.v_type->minw != var.v_type->maxw) { - throw ParseError{var.where.get(), "variable does not have fixed width, cannot manipulate it"}; + throw ParseError{var.where, "variable does not have fixed width, cannot manipulate it"}; } std::vector comp_types; int k = var.v_type->extract_components(comp_types); @@ -45,7 +45,7 @@ int CodeBlob::split_vars(bool strict) { if (k != 1) { var.coord = ~((n << 8) + k); for (int i = 0; i < k; i++) { - auto v = create_var(vars[j].cls, comp_types[i], 0, vars[j].where.get()); + auto v = create_var(vars[j].cls, comp_types[i], 0, vars[j].where); tolk_assert(v == n + i); tolk_assert(vars[v].idx == v); vars[v].name = vars[j].name; @@ -54,7 +54,7 @@ int CodeBlob::split_vars(bool strict) { n += k; ++changes; } else if (strict && var.v_type->minw != 1) { - throw ParseError{var.where.get(), + throw ParseError{var.where, "cannot work with variable or variable component of width greater than one"}; } } diff --git a/tolk/builtins.cpp b/tolk/builtins.cpp index 6589b9fcd..355c21df2 100644 --- a/tolk/builtins.cpp +++ b/tolk/builtins.cpp @@ -95,7 +95,7 @@ SymDef* define_builtin_const(std::string name, TypeExpr* const_type, Args&&... a } bool SymValAsmFunc::compile(AsmOpList& dest, std::vector& out, std::vector& in, - const SrcLocation& where) const { + SrcLocation where) const { if (simple_compile) { return dest.append(simple_compile(out, in, where)); } else if (ext_compile) { @@ -186,7 +186,7 @@ int emulate_mul(int a, int b) { return r; } -int emulate_and(int a, int b) { +int emulate_bitwise_and(int a, int b) { int both = a & b, any = a | b; int r = VarDescr::_Int; if (any & VarDescr::_Nan) { @@ -204,7 +204,7 @@ int emulate_and(int a, int b) { return r; } -int emulate_or(int a, int b) { +int emulate_bitwise_or(int a, int b) { if (b & VarDescr::_Zero) { return a; } else if (a & VarDescr::_Zero) { @@ -222,7 +222,7 @@ int emulate_or(int a, int b) { return r; } -int emulate_xor(int a, int b) { +int emulate_bitwise_xor(int a, int b) { if (b & VarDescr::_Zero) { return a; } else if (a & VarDescr::_Zero) { @@ -241,7 +241,7 @@ int emulate_xor(int a, int b) { return r; } -int emulate_not(int a) { +int emulate_bitwise_not(int a) { if ((a & VarDescr::ConstZero) == VarDescr::ConstZero) { return VarDescr::ConstTrue; } @@ -436,7 +436,7 @@ AsmOp push_const(td::RefInt256 x) { return AsmOp::IntConst(std::move(x)); } -AsmOp compile_add(std::vector& res, std::vector& args, const SrcLocation& where) { +AsmOp compile_add(std::vector& res, std::vector& args, SrcLocation where) { tolk_assert(res.size() == 1 && args.size() == 2); VarDescr &r = res[0], &x = args[0], &y = args[1]; if (x.is_int_const() && y.is_int_const()) { @@ -478,7 +478,7 @@ AsmOp compile_add(std::vector& res, std::vector& args, const return exec_op("ADD", 2); } -AsmOp compile_sub(std::vector& res, std::vector& args, const SrcLocation& where) { +AsmOp compile_sub(std::vector& res, std::vector& args, SrcLocation where) { tolk_assert(res.size() == 1 && args.size() == 2); VarDescr &r = res[0], &x = args[0], &y = args[1]; if (x.is_int_const() && y.is_int_const()) { @@ -511,7 +511,7 @@ AsmOp compile_sub(std::vector& res, std::vector& args, const return exec_op("SUB", 2); } -AsmOp compile_negate(std::vector& res, std::vector& args, const SrcLocation& where) { +AsmOp compile_unary_minus(std::vector& res, std::vector& args, SrcLocation where) { tolk_assert(res.size() == 1 && args.size() == 1); VarDescr &r = res[0], &x = args[0]; if (x.is_int_const()) { @@ -526,7 +526,19 @@ AsmOp compile_negate(std::vector& res, std::vector& args, co return exec_op("NEGATE", 1); } -AsmOp compile_and(std::vector& res, std::vector& args, const SrcLocation& where) { +AsmOp compile_unary_plus(std::vector& res, std::vector& args, SrcLocation where) { + tolk_assert(res.size() == 1 && args.size() == 1); + VarDescr &r = res[0], &x = args[0]; + if (x.is_int_const()) { + r.set_const(x.int_const); + x.unused(); + return push_const(r.int_const); + } + r.val = x.val; + return AsmOp::Nop(); +} + +AsmOp compile_bitwise_and(std::vector& res, std::vector& args, SrcLocation where) { tolk_assert(res.size() == 1 && args.size() == 2); VarDescr &r = res[0], &x = args[0], &y = args[1]; if (x.is_int_const() && y.is_int_const()) { @@ -535,11 +547,11 @@ AsmOp compile_and(std::vector& res, std::vector& args, const y.unused(); return push_const(r.int_const); } - r.val = emulate_and(x.val, y.val); + r.val = emulate_bitwise_and(x.val, y.val); return exec_op("AND", 2); } -AsmOp compile_or(std::vector& res, std::vector& args, const SrcLocation& where) { +AsmOp compile_bitwise_or(std::vector& res, std::vector& args, SrcLocation where) { tolk_assert(res.size() == 1 && args.size() == 2); VarDescr &r = res[0], &x = args[0], &y = args[1]; if (x.is_int_const() && y.is_int_const()) { @@ -548,11 +560,11 @@ AsmOp compile_or(std::vector& res, std::vector& args, const y.unused(); return push_const(r.int_const); } - r.val = emulate_or(x.val, y.val); + r.val = emulate_bitwise_or(x.val, y.val); return exec_op("OR", 2); } -AsmOp compile_xor(std::vector& res, std::vector& args, const SrcLocation& where) { +AsmOp compile_bitwise_xor(std::vector& res, std::vector& args, SrcLocation where) { tolk_assert(res.size() == 1 && args.size() == 2); VarDescr &r = res[0], &x = args[0], &y = args[1]; if (x.is_int_const() && y.is_int_const()) { @@ -561,11 +573,11 @@ AsmOp compile_xor(std::vector& res, std::vector& args, const y.unused(); return push_const(r.int_const); } - r.val = emulate_xor(x.val, y.val); + r.val = emulate_bitwise_xor(x.val, y.val); return exec_op("XOR", 2); } -AsmOp compile_not(std::vector& res, std::vector& args, const SrcLocation& where) { +AsmOp compile_bitwise_not(std::vector& res, std::vector& args, SrcLocation where) { tolk_assert(res.size() == 1 && args.size() == 1); VarDescr &r = res[0], &x = args[0]; if (x.is_int_const()) { @@ -573,11 +585,11 @@ AsmOp compile_not(std::vector& res, std::vector& args, const x.unused(); return push_const(r.int_const); } - r.val = emulate_not(x.val); + r.val = emulate_bitwise_not(x.val); return exec_op("NOT", 1); } -AsmOp compile_mul_internal(VarDescr& r, VarDescr& x, VarDescr& y, const SrcLocation& where) { +AsmOp compile_mul_internal(VarDescr& r, VarDescr& x, VarDescr& y, SrcLocation where) { if (x.is_int_const() && y.is_int_const()) { r.set_const(x.int_const * y.int_const); if (!r.int_const->is_valid()) { @@ -645,12 +657,12 @@ AsmOp compile_mul_internal(VarDescr& r, VarDescr& x, VarDescr& y, const SrcLocat return exec_op("MUL", 2); } -AsmOp compile_mul(std::vector& res, std::vector& args, const SrcLocation& where) { +AsmOp compile_mul(std::vector& res, std::vector& args, SrcLocation where) { tolk_assert(res.size() == 1 && args.size() == 2); return compile_mul_internal(res[0], args[0], args[1], where); } -AsmOp compile_lshift(std::vector& res, std::vector& args, const SrcLocation& where) { +AsmOp compile_lshift(std::vector& res, std::vector& args, SrcLocation where) { tolk_assert(res.size() == 1 && args.size() == 2); VarDescr &r = res[0], &x = args[0], &y = args[1]; if (y.is_int_const()) { @@ -692,7 +704,7 @@ AsmOp compile_lshift(std::vector& res, std::vector& args, co return exec_op("LSHIFT", 2); } -AsmOp compile_rshift(std::vector& res, std::vector& args, const SrcLocation& where, +AsmOp compile_rshift(std::vector& res, std::vector& args, SrcLocation where, int round_mode) { tolk_assert(res.size() == 1 && args.size() == 2); VarDescr &r = res[0], &x = args[0], &y = args[1]; @@ -722,7 +734,7 @@ AsmOp compile_rshift(std::vector& res, std::vector& args, co return exec_op(rshift, 2); } -AsmOp compile_div_internal(VarDescr& r, VarDescr& x, VarDescr& y, const SrcLocation& where, int round_mode) { +AsmOp compile_div_internal(VarDescr& r, VarDescr& x, VarDescr& y, SrcLocation where, int round_mode) { if (x.is_int_const() && y.is_int_const()) { r.set_const(div(x.int_const, y.int_const, round_mode)); if (!r.int_const->is_valid()) { @@ -762,12 +774,12 @@ AsmOp compile_div_internal(VarDescr& r, VarDescr& x, VarDescr& y, const SrcLocat return exec_op(op, 2); } -AsmOp compile_div(std::vector& res, std::vector& args, const SrcLocation& where, int round_mode) { +AsmOp compile_div(std::vector& res, std::vector& args, SrcLocation where, int round_mode) { tolk_assert(res.size() == 1 && args.size() == 2); return compile_div_internal(res[0], args[0], args[1], where, round_mode); } -AsmOp compile_mod(std::vector& res, std::vector& args, const SrcLocation& where, +AsmOp compile_mod(std::vector& res, std::vector& args, SrcLocation where, int round_mode) { tolk_assert(res.size() == 1 && args.size() == 2); VarDescr &r = res[0], &x = args[0], &y = args[1]; @@ -808,7 +820,7 @@ AsmOp compile_mod(std::vector& res, std::vector& args, const return exec_op(op, 2); } -AsmOp compile_muldiv(std::vector& res, std::vector& args, const SrcLocation& where, +AsmOp compile_muldiv(std::vector& res, std::vector& args, SrcLocation where, int round_mode) { tolk_assert(res.size() == 1 && args.size() == 3); VarDescr &r = res[0], &x = args[0], &y = args[1], &z = args[2]; @@ -978,7 +990,7 @@ AsmOp compile_cmp_int(std::vector& res, std::vector& args, i return exec_op(cmp_names[mode], 2); } -AsmOp compile_throw(std::vector& res, std::vector& args, const SrcLocation&) { +AsmOp compile_throw(std::vector& res, std::vector& args, SrcLocation) { tolk_assert(res.empty() && args.size() == 1); VarDescr& x = args[0]; if (x.is_int_const() && x.int_const->unsigned_fits_bits(11)) { @@ -1010,7 +1022,7 @@ AsmOp compile_cond_throw(std::vector& res, std::vector& args } } -AsmOp compile_throw_arg(std::vector& res, std::vector& args, const SrcLocation&) { +AsmOp compile_throw_arg(std::vector& res, std::vector& args, SrcLocation) { tolk_assert(res.empty() && args.size() == 2); VarDescr &x = args[1]; if (x.is_int_const() && x.int_const->unsigned_fits_bits(11)) { @@ -1101,7 +1113,7 @@ AsmOp compile_fetch_slice(std::vector& res, std::vector& arg } // _at(tuple t, int index) asm "INDEXVAR"; -AsmOp compile_tuple_at(std::vector& res, std::vector& args, const SrcLocation&) { +AsmOp compile_tuple_at(std::vector& res, std::vector& args, SrcLocation) { tolk_assert(args.size() == 2 && res.size() == 1); auto& y = args[1]; if (y.is_int_const() && y.int_const >= 0 && y.int_const < 16) { @@ -1112,7 +1124,7 @@ AsmOp compile_tuple_at(std::vector& res, std::vector& args, } // int null?(X arg) -AsmOp compile_is_null(std::vector& res, std::vector& args, const SrcLocation&) { +AsmOp compile_is_null(std::vector& res, std::vector& args, SrcLocation) { tolk_assert(args.size() == 1 && res.size() == 1); auto &x = args[0], &r = res[0]; if (x.always_null() || x.always_not_null()) { @@ -1128,12 +1140,12 @@ AsmOp compile_is_null(std::vector& res, std::vector& args, c void define_builtins() { using namespace std::placeholders; auto Unit = TypeExpr::new_unit(); - auto Int = TypeExpr::new_atomic(_Int); - auto Cell = TypeExpr::new_atomic(_Cell); - auto Slice = TypeExpr::new_atomic(_Slice); - auto Builder = TypeExpr::new_atomic(_Builder); - // auto Null = TypeExpr::new_atomic(_Null); - auto Tuple = TypeExpr::new_atomic(_Tuple); + auto Int = TypeExpr::new_atomic(TypeExpr::_Int); + auto Cell = TypeExpr::new_atomic(TypeExpr::_Cell); + auto Slice = TypeExpr::new_atomic(TypeExpr::_Slice); + auto Builder = TypeExpr::new_atomic(TypeExpr::_Builder); + // auto Null = TypeExpr::new_atomic(TypeExpr::_Null); + auto Tuple = TypeExpr::new_atomic(TypeExpr::_Tuple); auto Int2 = TypeExpr::new_tensor({Int, Int}); auto Int3 = TypeExpr::new_tensor({Int, Int, Int}); auto TupleInt = TypeExpr::new_tensor({Tuple, Int}); @@ -1156,9 +1168,16 @@ void define_builtins() { //auto arith_null_op = TypeExpr::new_map(TypeExpr::new_unit(), Int); auto throw_arg_op = TypeExpr::new_forall({X}, TypeExpr::new_map(TypeExpr::new_tensor({X, Int}), Unit)); auto cond_throw_arg_op = TypeExpr::new_forall({X}, TypeExpr::new_map(TypeExpr::new_tensor({X, Int, Int}), Unit)); + + // prevent unused vars warnings (there vars are created to acquire initial id of TypeExpr::value) + static_cast(Z); + static_cast(XY); + static_cast(Cell); + define_builtin_func("_+_", arith_bin_op, compile_add); define_builtin_func("_-_", arith_bin_op, compile_sub); - define_builtin_func("-_", arith_un_op, compile_negate); + define_builtin_func("-_", arith_un_op, compile_unary_minus); + define_builtin_func("+_", arith_un_op, compile_unary_plus); define_builtin_func("_*_", arith_bin_op, compile_mul); define_builtin_func("_/_", arith_bin_op, std::bind(compile_div, _1, _2, _3, -1)); define_builtin_func("_~/_", arith_bin_op, std::bind(compile_div, _1, _2, _3, 0)); @@ -1175,10 +1194,10 @@ void define_builtins() { define_builtin_func("_>>_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, -1)); define_builtin_func("_~>>_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, 0)); define_builtin_func("_^>>_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, 1)); - define_builtin_func("_&_", arith_bin_op, compile_and); - define_builtin_func("_|_", arith_bin_op, compile_or); - define_builtin_func("_^_", arith_bin_op, compile_xor); - define_builtin_func("~_", arith_un_op, compile_not); + define_builtin_func("_&_", arith_bin_op, compile_bitwise_and); + define_builtin_func("_|_", arith_bin_op, compile_bitwise_or); + define_builtin_func("_^_", arith_bin_op, compile_bitwise_xor); + define_builtin_func("~_", arith_un_op, compile_bitwise_not); define_builtin_func("^_+=_", arith_bin_op, compile_add); define_builtin_func("^_-=_", arith_bin_op, compile_sub); define_builtin_func("^_*=_", arith_bin_op, compile_mul); @@ -1192,9 +1211,9 @@ void define_builtins() { define_builtin_func("^_>>=_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, -1)); define_builtin_func("^_~>>=_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, 0)); define_builtin_func("^_^>>=_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, 1)); - define_builtin_func("^_&=_", arith_bin_op, compile_and); - define_builtin_func("^_|=_", arith_bin_op, compile_or); - define_builtin_func("^_^=_", arith_bin_op, compile_xor); + define_builtin_func("^_&=_", arith_bin_op, compile_bitwise_and); + define_builtin_func("^_|=_", arith_bin_op, compile_bitwise_or); + define_builtin_func("^_^=_", arith_bin_op, compile_bitwise_xor); define_builtin_func("muldiv", TypeExpr::new_map(Int3, Int), std::bind(compile_muldiv, _1, _2, _3, -1)); define_builtin_func("muldivr", TypeExpr::new_map(Int3, Int), std::bind(compile_muldiv, _1, _2, _3, 0)); define_builtin_func("muldivc", TypeExpr::new_map(Int3, Int), std::bind(compile_muldiv, _1, _2, _3, 1)); diff --git a/tolk/gen-abscode.cpp b/tolk/gen-abscode.cpp index a537d99ce..1c4afa674 100644 --- a/tolk/gen-abscode.cpp +++ b/tolk/gen-abscode.cpp @@ -41,25 +41,19 @@ Expr::Expr(ExprCls c, sym_idx_t name_idx, std::initializer_list _arglist) } } -void Expr::chk_rvalue(const Lexem& lem) const { +void Expr::chk_rvalue(const Lexer& lex) const { if (!is_rvalue()) { - lem.error_at("rvalue expected before `", "`"); + lex.error_at("rvalue expected before `", "`"); } } -void Expr::chk_lvalue(const Lexem& lem) const { +void Expr::chk_lvalue(const Lexer& lex) const { if (!is_lvalue()) { - lem.error_at("lvalue expected before `", "`"); + lex.error_at("lvalue expected before `", "`"); } } -void Expr::chk_type(const Lexem& lem) const { - if (!is_type()) { - lem.error_at("type expression expected before `", "`"); - } -} - -bool Expr::deduce_type(const Lexem& lem) { +bool Expr::deduce_type(const Lexer& lex) { if (e_type) { return true; } @@ -83,7 +77,7 @@ bool Expr::deduce_type(const Lexem& lem) { std::ostringstream os; os << "cannot apply function " << sym->name() << " : " << sym_val->get_type() << " to arguments of type " << fun_type->args[0] << ": " << ue; - lem.error(os.str()); + lex.error(os.str()); } e_type = fun_type->args[1]; TypeExpr::remove_indirect(e_type); @@ -98,7 +92,7 @@ bool Expr::deduce_type(const Lexem& lem) { std::ostringstream os; os << "cannot apply expression of type " << args[0]->e_type << " to an expression of type " << args[1]->e_type << ": " << ue; - lem.error(os.str()); + lex.error(os.str()); } e_type = fun_type->args[1]; TypeExpr::remove_indirect(e_type); @@ -113,7 +107,7 @@ bool Expr::deduce_type(const Lexem& lem) { std::ostringstream os; os << "cannot assign an expression of type " << args[1]->e_type << " to a variable or pattern of type " << args[0]->e_type << ": " << ue; - lem.error(os.str()); + lex.error(os.str()); } e_type = args[0]->e_type; TypeExpr::remove_indirect(e_type); @@ -130,7 +124,7 @@ bool Expr::deduce_type(const Lexem& lem) { os << "cannot implicitly assign an expression of type " << args[1]->e_type << " to a variable or pattern of type " << rhs_type << " in modifying method `" << symbols.get_name(val) << "` : " << ue; - lem.error(os.str()); + lex.error(os.str()); } e_type = rhs_type->args[1]; TypeExpr::remove_indirect(e_type); @@ -139,13 +133,13 @@ bool Expr::deduce_type(const Lexem& lem) { } case _CondExpr: { tolk_assert(args.size() == 3); - auto flag_type = TypeExpr::new_atomic(_Int); + auto flag_type = TypeExpr::new_atomic(TypeExpr::_Int); try { unify(args[0]->e_type, flag_type); } catch (UnifyError& ue) { std::ostringstream os; os << "condition in a conditional expression has non-integer type " << args[0]->e_type << ": " << ue; - lem.error(os.str()); + lex.error(os.str()); } try { unify(args[1]->e_type, args[2]->e_type); @@ -153,7 +147,7 @@ bool Expr::deduce_type(const Lexem& lem) { std::ostringstream os; os << "the two variants in a conditional expression have different types " << args[1]->e_type << " and " << args[2]->e_type << " : " << ue; - lem.error(os.str()); + lex.error(os.str()); } e_type = args[1]->e_type; TypeExpr::remove_indirect(e_type); @@ -176,13 +170,13 @@ int Expr::define_new_vars(CodeBlob& code) { } case _Var: if (val < 0) { - val = code.create_var(TmpVar::_Named, e_type, sym, &here); + val = code.create_var(TmpVar::_Named, e_type, sym, here); return 1; } break; case _Hole: if (val < 0) { - val = code.create_var(TmpVar::_Tmp, e_type, nullptr, &here); + val = code.create_var(TmpVar::_Tmp, e_type, nullptr, here); } break; } @@ -202,7 +196,7 @@ int Expr::predefine_vars() { } case _Var: if (!sym) { - tolk_assert(val < 0 && here.defined()); + tolk_assert(val < 0 && here.is_defined()); if (prohibited_var_names.count(symbols.get_name(~val))) { throw ParseError{ here, PSTRING() << "symbol `" << symbols.get_name(~val) << "` cannot be redefined as a variable"}; @@ -212,7 +206,7 @@ int Expr::predefine_vars() { if (!sym) { throw ParseError{here, std::string{"redefined variable `"} + symbols.get_name(~val) + "`"}; } - sym->value = new SymVal{SymVal::_Var, -1, e_type}; + sym->value = new SymVal{SymValKind::_Var, -1, e_type}; return 1; } break; @@ -221,17 +215,17 @@ int Expr::predefine_vars() { } var_idx_t Expr::new_tmp(CodeBlob& code) const { - return code.create_tmp_var(e_type, &here); + return code.create_tmp_var(e_type, here); } -void add_set_globs(CodeBlob& code, std::vector>& globs, const SrcLocation& here) { +void add_set_globs(CodeBlob& code, std::vector>& globs, SrcLocation here) { for (const auto& p : globs) { auto& op = code.emplace_back(here, Op::_SetGlob, std::vector{}, std::vector{ p.second }, p.first); op.set_impure(code); } } -std::vector pre_compile_let(CodeBlob& code, Expr* lhs, Expr* rhs, const SrcLocation& here) { +std::vector pre_compile_let(CodeBlob& code, Expr* lhs, Expr* rhs, SrcLocation here) { while (lhs->is_type_apply()) { lhs = lhs->args.at(0); } @@ -245,7 +239,7 @@ std::vector pre_compile_let(CodeBlob& code, Expr* lhs, Expr* rhs, con auto right = rhs->pre_compile(code); TypeExpr::remove_indirect(rhs->e_type); auto unpacked_type = rhs->e_type->args.at(0); - std::vector tmp{code.create_tmp_var(unpacked_type, &rhs->here)}; + std::vector tmp{code.create_tmp_var(unpacked_type, rhs->here)}; code.emplace_back(lhs->here, Op::_UnTuple, tmp, std::move(right)); auto tvar = new Expr{Expr::_Var}; tvar->set_val(tmp[0]); @@ -286,14 +280,14 @@ std::vector pre_compile_tensor(const std::vector& args, CodeB for (size_t j = 0; j < res_lists[i].size(); ++j) { TmpVar& var = code.vars.at(res_lists[i][j]); if (!lval_globs && (var.cls & TmpVar::_Named)) { - var.on_modification.push_back([&modified_vars, i, j, cur_ops = code.cur_ops, done = false](const SrcLocation &here) mutable { + var.on_modification.push_back([&modified_vars, i, j, cur_ops = code.cur_ops, done = false](SrcLocation here) mutable { if (!done) { done = true; modified_vars.push_back({i, j, cur_ops}); } }); } else { - var.on_modification.push_back([](const SrcLocation &) { + var.on_modification.push_back([](SrcLocation) { }); } } @@ -307,8 +301,8 @@ std::vector pre_compile_tensor(const std::vector& args, CodeB for (size_t idx = modified_vars.size(); idx--; ) { const ModifiedVar &m = modified_vars[idx]; var_idx_t orig_v = res_lists[m.i][m.j]; - var_idx_t tmp_v = code.create_tmp_var(code.vars[orig_v].v_type, code.vars[orig_v].where.get()); - std::unique_ptr op = std::make_unique(*code.vars[orig_v].where, Op::_Let); + var_idx_t tmp_v = code.create_tmp_var(code.vars[orig_v].v_type, code.vars[orig_v].where); + std::unique_ptr op = std::make_unique(code.vars[orig_v].where, Op::_Let); op->left = {tmp_v}; op->right = {orig_v}; op->next = std::move((*m.cur_ops)); diff --git a/tolk/keywords.cpp b/tolk/keywords.cpp deleted file mode 100644 index 50d55c41d..000000000 --- a/tolk/keywords.cpp +++ /dev/null @@ -1,129 +0,0 @@ -/* - This file is part of TON Blockchain Library. - - TON Blockchain Library is free software: you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation, either version 2 of the License, or - (at your option) any later version. - - TON Blockchain Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with TON Blockchain Library. If not, see . -*/ -#include "tolk.h" - -namespace tolk { - -/* - * - * KEYWORD DEFINITION - * - */ - -void define_keywords() { - symbols.add_kw_char('+') - .add_kw_char('-') - .add_kw_char('*') - .add_kw_char('/') - .add_kw_char('%') - .add_kw_char('?') - .add_kw_char(':') - .add_kw_char(',') - .add_kw_char(';') - .add_kw_char('(') - .add_kw_char(')') - .add_kw_char('[') - .add_kw_char(']') - .add_kw_char('{') - .add_kw_char('}') - .add_kw_char('=') - .add_kw_char('_') - .add_kw_char('<') - .add_kw_char('>') - .add_kw_char('&') - .add_kw_char('|') - .add_kw_char('^') - .add_kw_char('~'); - - symbols.add_keyword("==", Keyword::_Eq) - .add_keyword("!=", Keyword::_Neq) - .add_keyword("<=", Keyword::_Leq) - .add_keyword(">=", Keyword::_Geq) - .add_keyword("<=>", Keyword::_Spaceship) - .add_keyword("<<", Keyword::_Lshift) - .add_keyword(">>", Keyword::_Rshift) - .add_keyword("~>>", Keyword::_RshiftR) - .add_keyword("^>>", Keyword::_RshiftC) - .add_keyword("~/", Keyword::_DivR) - .add_keyword("^/", Keyword::_DivC) - .add_keyword("~%", Keyword::_ModR) - .add_keyword("^%", Keyword::_ModC) - .add_keyword("/%", Keyword::_DivMod) - .add_keyword("+=", Keyword::_PlusLet) - .add_keyword("-=", Keyword::_MinusLet) - .add_keyword("*=", Keyword::_TimesLet) - .add_keyword("/=", Keyword::_DivLet) - .add_keyword("~/=", Keyword::_DivRLet) - .add_keyword("^/=", Keyword::_DivCLet) - .add_keyword("%=", Keyword::_ModLet) - .add_keyword("~%=", Keyword::_ModRLet) - .add_keyword("^%=", Keyword::_ModCLet) - .add_keyword("<<=", Keyword::_LshiftLet) - .add_keyword(">>=", Keyword::_RshiftLet) - .add_keyword("~>>=", Keyword::_RshiftRLet) - .add_keyword("^>>=", Keyword::_RshiftCLet) - .add_keyword("&=", Keyword::_AndLet) - .add_keyword("|=", Keyword::_OrLet) - .add_keyword("^=", Keyword::_XorLet); - - symbols.add_keyword("return", Keyword::_Return) - .add_keyword("var", Keyword::_Var) - .add_keyword("repeat", Keyword::_Repeat) - .add_keyword("do", Keyword::_Do) - .add_keyword("while", Keyword::_While) - .add_keyword("until", Keyword::_Until) - .add_keyword("try", Keyword::_Try) - .add_keyword("catch", Keyword::_Catch) - .add_keyword("if", Keyword::_If) - .add_keyword("ifnot", Keyword::_Ifnot) - .add_keyword("then", Keyword::_Then) - .add_keyword("else", Keyword::_Else) - .add_keyword("elseif", Keyword::_Elseif) - .add_keyword("elseifnot", Keyword::_Elseifnot); - - symbols.add_keyword("int", Keyword::_Int) - .add_keyword("cell", Keyword::_Cell) - .add_keyword("slice", Keyword::_Slice) - .add_keyword("builder", Keyword::_Builder) - .add_keyword("cont", Keyword::_Cont) - .add_keyword("tuple", Keyword::_Tuple) - .add_keyword("type", Keyword::_Type) - .add_keyword("->", Keyword::_Mapsto) - .add_keyword("forall", Keyword::_Forall); - - symbols.add_keyword("extern", Keyword::_Extern) - .add_keyword("global", Keyword::_Global) - .add_keyword("asm", Keyword::_Asm) - .add_keyword("impure", Keyword::_Impure) - .add_keyword("pure", Keyword::_Pure) - .add_keyword("inline", Keyword::_Inline) - .add_keyword("inline_ref", Keyword::_InlineRef) - .add_keyword("builtin", Keyword::_Builtin) - .add_keyword("auto_apply", Keyword::_AutoApply) - .add_keyword("method_id", Keyword::_MethodId) - .add_keyword("get", Keyword::_Get) - .add_keyword("operator", Keyword::_Operator) - .add_keyword("infix", Keyword::_Infix) - .add_keyword("infixl", Keyword::_Infixl) - .add_keyword("infixr", Keyword::_Infixr) - .add_keyword("const", Keyword::_Const); - - symbols.add_keyword("#pragma", Keyword::_PragmaHashtag) - .add_keyword("#include", Keyword::_IncludeHashtag); -} - -} // namespace tolk diff --git a/tolk/lexer.cpp b/tolk/lexer.cpp index e54c70e41..6d066d294 100644 --- a/tolk/lexer.cpp +++ b/tolk/lexer.cpp @@ -16,335 +16,632 @@ */ #include "lexer.h" #include "symtable.h" -#include #include namespace tolk { -/* - * - * LEXER - * - */ - -std::string Lexem::lexem_name_str(int idx) { - if (idx == Eof) { - return "end of file"; - } else if (idx == Ident) { - return "identifier"; - } else if (idx == Number) { - return "number"; - } else if (idx == String) { - return "string"; - } else if (idx == Special) { - return "special"; - } else if (symbols.get_keyword(idx)) { - return "`" + symbols.get_keyword(idx)->str + "`"; - } else { - std::ostringstream os{""; - return os.str(); - } -} +// By 'chunk' in lexer I mean a token or a list of tokens parsed simultaneously. +// E.g., when we meet "str", ChunkString is called, it emits tok_string. +// E.g., when we meet "str"x, ChunkString emits not only tok_string, but tok_string_modifier. +// E.g., when we meet //, ChunkInlineComment is called, it emits nothing (just skips a line). +// We store all valid chunks lexers in a prefix tree (LexingTrie), see below. +struct ChunkLexerBase { + ChunkLexerBase(const ChunkLexerBase&) = delete; + ChunkLexerBase &operator=(const ChunkLexerBase&) = delete; + ChunkLexerBase() = default; -std::string Lexem::name_str() const { - if (tp == Ident) { - return std::string{"identifier `"} + symbols.get_name(val) + "`"; - } else if (tp == String) { - return std::string{"string \""} + str + '"'; - } else { - return lexem_name_str(tp); - } + virtual bool parse(Lexer* lex) const = 0; + virtual ~ChunkLexerBase() = default; +}; + +template +static T* singleton() { + static T obj; + return &obj; } -bool is_number(std::string str) { - auto st = str.begin(), en = str.end(); - if (st == en) { - return false; - } - if (*st == '-') { - st++; - } - bool hex = false; - if (st + 1 < en && *st == '0' && st[1] == 'x') { - st += 2; - hex = true; +// LexingTrie is a prefix tree storing all available Tolk language constructs. +// It's effectively a map of a prefix to ChunkLexerBase. +class LexingTrie { + LexingTrie** next{nullptr}; // either nullptr or [256] + ChunkLexerBase* val{nullptr}; // non-null for leafs + + GNU_ATTRIBUTE_ALWAYS_INLINE void ensure_next_allocated() { + if (next == nullptr) { + next = new LexingTrie*[256]; + std::memset(next, 0, 256 * sizeof(LexingTrie*)); + } } - if (st == en) { - return false; + + GNU_ATTRIBUTE_ALWAYS_INLINE void ensure_symbol_allocated(uint8_t symbol) const { + if (next[symbol] == nullptr) { + next[symbol] = new LexingTrie; + } } - while (st < en) { - int c = *st; - if (c >= '0' && c <= '9') { - ++st; - continue; + +public: + // Maps a prefix onto a chunk lexer. + // E.g. " -> ChunkString + // E.g. """ -> ChunkMultilineString + void add_prefix(const char* s, ChunkLexerBase* val) { + LexingTrie* cur = this; + + for (; *s; ++s) { + uint8_t symbol = static_cast(*s); + cur->ensure_next_allocated(); + cur->ensure_symbol_allocated(symbol); + cur = cur->next[symbol]; } - if (!hex) { - return false; + +#ifdef TOLK_DEBUG + assert(!cur->val); +#endif + cur->val = val; + } + + // Maps a pattern onto a chunk lexer. + // E.g. -[0-9] -> ChunkNegativeNumber + // Internally, it expands the pattern to all possible prefixes: -0, -1, etc. + // (for example, [0-9][a-z_$] gives 10*28=280 prefixes) + void add_pattern(const char* pattern, ChunkLexerBase* val) { + std::vector all_possible_trie{this}; + + for (const char* c = pattern; *c; ++c) { + std::string to_append; + if (*c == '[') { + c++; + while (*c != ']') { // assume that input is corrent, no out-of-string checks + if (*(c + 1) == '-') { + char l = *c, r = *(c + 2); + for (char symbol = l; symbol <= r; ++symbol) { + to_append += symbol; + } + c += 3; + } else { + to_append += *c; + c++; + } + } + } else { + to_append += *c; + } + + std::vector next_all_possible_trie; + next_all_possible_trie.reserve(all_possible_trie.size() * to_append.size()); + for (LexingTrie* cur : all_possible_trie) { + cur->ensure_next_allocated(); + for (uint8_t symbol : to_append) { + cur->ensure_symbol_allocated(symbol); + next_all_possible_trie.emplace_back(cur->next[symbol]); + } + } + all_possible_trie = std::move(next_all_possible_trie); } - c |= 0x20; - if (c < 'a' || c > 'f') { - return false; + + for (LexingTrie* trie : all_possible_trie) { + trie->val = val; } - ++st; } - return true; -} -int Lexem::classify() { - if (tp != Unknown) { - return tp; - } - sym_idx_t i = symbols.lookup(str); - if (i) { - assert(str == symbols[i]->str); - str = symbols[i]->str; - sym_idx_t idx = symbols[i]->idx; - tp = (idx < 0 ? -idx : Ident); - val = i; - } else if (is_number(str)) { - tp = Number; - } else { - tp = 0; - } - if (tp == Unknown) { - tp = Ident; - val = symbols.lookup(str, 1); + // Looks up a chunk lexer given a string (in practice, s points to cur position in the middle of the file). + // It returns the deepest case: pointing to ", it will return ChunkMultilineString if """, or ChunkString otherwize. + ChunkLexerBase* get_deepest(const char* s) const { + const LexingTrie* best = this; + + for (const LexingTrie* cur = this; cur && cur->next; ++s) { + cur = cur->next[static_cast(*s)]; // if s reaches \0, cur will just become nullptr, and loop will end + if (cur && cur->val) { + best = cur; + } + } + + return best->val; } - return tp; -} +}; -int Lexem::set(std::string _str, const SrcLocation& _loc, int _tp, int _val) { - str = _str; - loc = _loc; - tp = _tp; - val = _val; - return classify(); -} +// +// ---------------------------------------------------------------------- +// A list of valid parsed chunks. +// + +// An inline comment, starting from '//' +struct ChunkInlineComment final : ChunkLexerBase { + bool parse(Lexer* lex) const override { + lex->skip_line(); + return true; + } +}; -Lexer::Lexer(SourceReader& _src, std::string active_chars, std::string quote_chars, std::string multiline_quote) - : src(_src), eof(false), lexem("", src.here(), Lexem::Undefined), peek_lexem("", {}, Lexem::Undefined), - multiline_quote(std::move(multiline_quote)) { - std::memset(char_class, 0, sizeof(char_class)); - unsigned char activity = cc::active; - for (char c : active_chars) { - if (c == ' ') { - if (!--activity) { - activity = cc::allow_repeat; +// A multiline comment, starting from '/*' +// Note, that nested comments are not supported. +struct ChunkMultilineComment final : ChunkLexerBase { + bool parse(Lexer* lex) const override { + while (!lex->is_eof()) { + // todo drop -} later + if ((lex->char_at() == '-' && lex->char_at(1) == '}') || (lex->char_at() == '*' && lex->char_at(1) == '/')) { + lex->skip_chars(2); + return true; } - } else if ((unsigned)c < 0x80) { - char_class[(unsigned)c] |= activity; + lex->skip_chars(1); } + return true; // it's okay if comment extends past end of file } - for (int c : quote_chars) { - if (c > ' ' && c <= 0x7f) { - char_class[(unsigned)c] |= cc::quote_char; +}; + +// A string, starting from " +// Note, that there are no escape symbols inside: the purpose of strings in Tolk just doesn't need it. +// After a closing quote, a string modifier may be present, like "Ef8zMzMzMzMzMzMzMzMzMzM0vF"a. +// If present, it emits a separate tok_string_modifier. +struct ChunkString final : ChunkLexerBase { + bool parse(Lexer* lex) const override { + const char* str_begin = lex->c_str(); + lex->skip_chars(1); + while (!lex->is_eof() && lex->char_at() != '"' && lex->char_at() != '\n') { + lex->skip_chars(1); + } + if (lex->char_at() != '"') { + lex->error("string extends past end of line"); } - } -} -void Lexer::set_comment_tokens(const std::string &eol_cmts, const std::string &open_cmts, const std::string &close_cmts) { - set_spec(eol_cmt, eol_cmts); - set_spec(cmt_op, open_cmts); - set_spec(cmt_cl, close_cmts); -} + std::string_view str_val(str_begin + 1, lex->c_str() - str_begin - 1); + lex->skip_chars(1); + lex->add_token(tok_string_const, str_val); -void Lexer::set_comment2_tokens(const std::string &eol_cmts2, const std::string &open_cmts2, const std::string &close_cmts2) { - set_spec(eol_cmt2, eol_cmts2); - set_spec(cmt_op2, open_cmts2); - set_spec(cmt_cl2, close_cmts2); -} + if (std::isalpha(lex->char_at())) { + std::string_view modifier_val(lex->c_str(), 1); + lex->skip_chars(1); + lex->add_token(tok_string_modifier, modifier_val); + } -void Lexer::start_parsing() { - next(); -} + return true; + } +}; -void Lexer::set_spec(std::array& arr, std::string setup) { - arr[0] = arr[1] = arr[2] = -0x100; - std::size_t n = setup.size(), i; - for (i = 0; i < n; i++) { - if (setup[i] == ' ') { - continue; - } - if (i == n - 1 || setup[i + 1] == ' ') { - arr[0] = setup[i]; - } else if (i == n - 2 || (i < n - 2 && setup[i + 2] == ' ')) { - arr[1] = setup[i]; - arr[2] = setup[++i]; - } else { - while (i < n && setup[i] != ' ') { - i++; +// A string starting from """ +// Used for multiline asm constructions. Can not have a postfix modifier. +struct ChunkMultilineString final : ChunkLexerBase { + bool parse(Lexer* lex) const override { + const char* str_begin = lex->c_str(); + lex->skip_chars(3); + while (!lex->is_eof()) { + if (lex->char_at() == '"' && lex->char_at(1) == '"' && lex->char_at(2) == '"') { + break; } + lex->skip_chars(1); + } + if (lex->is_eof()) { + lex->error("string extends past end of file"); } - } -} -bool Lexer::is_multiline_quote(const char* begin, const char* end) { - if (multiline_quote.empty()) { - return false; + std::string_view str_val(str_begin + 3, lex->c_str() - str_begin - 3); + lex->skip_chars(3); + lex->add_token(tok_string_const, str_val); + return true; } - for (const char& c : multiline_quote) { - if (begin == end || *begin != c) { +}; + +// A number, may be a hex one. +struct ChunkNumber final : ChunkLexerBase { + bool parse(Lexer* lex) const override { + const char* str_begin = lex->c_str(); + bool hex = false; + if (lex->char_at() == '0' && lex->char_at(1) == 'x') { + lex->skip_chars(2); + hex = true; + } + if (lex->is_eof()) { return false; } - ++begin; + while (!lex->is_eof()) { + char c = lex->char_at(); + if (c >= '0' && c <= '9') { + lex->skip_chars(1); + continue; + } + if (!hex) { + break; + } + c |= 0x20; + if (c < 'a' || c > 'f') { + break; + } + lex->skip_chars(1); + } + + std::string_view str_val(str_begin, lex->c_str() - str_begin); + lex->add_token(tok_int_const, str_val); + return true; } - return true; -} +}; + +// Anything starting from # is a compiler directive. +// Technically, #include and #pragma can be mapped as separate chunks, +// but storing such long strings in a trie increases its memory usage. +struct ChunkCompilerDirective final : ChunkLexerBase { + bool parse(Lexer* lex) const override { + const char* str_begin = lex->c_str(); -void Lexer::expect(int exp_tp, const char* msg) { - if (tp() != exp_tp) { - throw ParseError{lexem.loc, (msg ? std::string{msg} : Lexem::lexem_name_str(exp_tp)) + " expected instead of " + - cur().name_str()}; + lex->skip_chars(1); + while (std::isalnum(lex->char_at())) { + lex->skip_chars(1); + } + + std::string_view str_val(str_begin, lex->c_str() - str_begin); + if (str_val == "#include") { + lex->add_token(tok_include, str_val); + return true; + } + if (str_val == "#pragma") { + lex->add_token(tok_pragma, str_val); + return true; + } + + lex->error("unknown compiler directive"); } - next(); -} +}; + +// Tokens like !=, &, etc. emit just a simple TokenType. +// Since they are stored in trie, "parsing" them is just skipping len chars. +struct ChunkSimpleToken final : ChunkLexerBase { + TokenType tp; + int len; -const Lexem& Lexer::next() { - if (peek_lexem.valid()) { - lexem = std::move(peek_lexem); - peek_lexem.clear({}, Lexem::Undefined); - eof = (lexem.tp == Lexem::Eof); - return lexem; + ChunkSimpleToken(TokenType tp, int len) : tp(tp), len(len) {} + + bool parse(Lexer* lex) const override { + std::string_view str_val(lex->c_str(), len); + lex->add_token(tp, str_val); + lex->skip_chars(len); + return true; } - if (eof) { - return lexem.clear(src.here(), Lexem::Eof); +}; + +// Spaces and other space-like symbols are just skipped. +struct ChunkSkipWhitespace final : ChunkLexerBase { + bool parse(Lexer* lex) const override { + lex->skip_chars(1); + lex->skip_spaces(); + return true; } - long long comm = 1; - // the code below is very complicated, because it tried to support one-symbol start/end and nesting - // in Tolk, we decided to stop supporting nesting (it was never used in practice and almost impossible for js highlighters) - // later on I'll simplify this code (more precisely, rewrite lexer from scratch) - while (!src.seek_eof()) { - int cc = src.cur_char(), nc = src.next_char(); - // note, that in practice, [0]-th element is -256, condition for [0]-th is always false - // todo rewrite this all in the future - if (cc == eol_cmt[0] || (cc == eol_cmt[1] && nc == eol_cmt[2]) || cc == eol_cmt2[0] || (cc == eol_cmt2[1] && nc == eol_cmt2[2])) { - if (comm == 1) { // just "//" — skip a whole line - src.load_line(); - } else { // if "//" is nested into "/*", continue reading, since "*/" may be met - src.advance(1); - } - } else if (cc == cmt_op[1] && nc == cmt_op[2] || cc == cmt_op2[1] && nc == cmt_op2[2]) { - src.advance(2); - comm = comm * 2 + 1; - } else if (cc == cmt_op[0] || cc == cmt_op2[0]) { // always false - src.advance(1); - comm *= 2; - } else if (comm == 1) { - break; // means that we are not inside a comment - } else if (cc == cmt_cl[1] && nc == cmt_cl[2] || cc == cmt_cl2[1] && nc == cmt_cl2[2]) { - if (!(comm & 1)) { // always false - src.error(std::string{"a `"} + (char)cmt_op[0] + "` comment closed by `" + (char)cmt_cl[1] + (char)cmt_cl[2] + - "`"); - } - // note that {- may be closed with */, but assume it's ok (we'll get rid of {- in the future) - comm = 1; - src.advance(2); - } else if (cc == cmt_cl[0] || cc == cmt_cl2[0]) { // always false - if (!(comm & 1)) { - src.error(std::string{"a `"} + (char)cmt_op[1] + (char)cmt_op[2] + "` comment closed by `" + (char)cmt_cl[0] + - "`"); - } - comm = 1; - src.advance(1); - } else { - src.advance(1); +}; + +// Here we handle corner cases of grammar that are requested on demand. +// E.g., for 'pragma version >0.5.0', '0.5.0' should be parsed specially to emit tok_semver. +// See TolkLanguageGrammar::parse_next_chunk_special(). +struct ChunkSpecialParsing { + static bool parse_pragma_name(Lexer* lex) { + const char* str_begin = lex->c_str(); + while (std::isalnum(lex->char_at()) || lex->char_at() == '-') { + lex->skip_chars(1); } - if (comm < 0) { - src.error("too many nested comments"); + + std::string_view str_val(str_begin, lex->c_str() - str_begin); + if (str_val.empty()) { + return false; } + lex->add_token(tok_pragma_name, str_val); + return true; } - if (src.seek_eof()) { - eof = true; - if (comm > 1) { - src.error("comment extends past end of file"); + + static bool parse_semver(Lexer* lex) { + const char* str_begin = lex->c_str(); + while (std::isdigit(lex->char_at()) || lex->char_at() == '.') { + lex->skip_chars(1); } - return lexem.clear(src.here(), Lexem::Eof); + + std::string_view str_val(str_begin, lex->c_str() - str_begin); + if (str_val.empty()) { + return false; + } + lex->add_token(tok_semver, str_val); + return true; } - if (is_multiline_quote(src.get_ptr(), src.get_end_ptr())) { - src.advance(multiline_quote.size()); - const char* end = nullptr; - SrcLocation here = src.here(); - std::string body; - while (!src.is_eof()) { - if (src.is_eoln()) { - body.push_back('\n'); - src.load_line(); - continue; - } - if (is_multiline_quote(src.get_ptr(), src.get_end_ptr())) { - end = src.get_ptr(); - src.advance(multiline_quote.size()); +}; + +// Anything starting from a valid identifier beginning symbol is parsed as an identifier. +// But if a resulting string is a keyword, a corresponding token is emitted instead of tok_identifier. +struct ChunkIdentifierOrKeyword final : ChunkLexerBase { + // having parsed str up to the valid end, look up whether it's a valid keyword + // in the future, this could be a bit more effective than just comparing strings (e.g. gperf), + // but nevertheless, performance of the naive code below is reasonably good + static TokenType maybe_keyword(std::string_view str) { + switch (str.size()) { + case 1: + if (str == "~") return tok_bitwise_not; // todo attention + if (str == "_") return tok_underscore; // todo attention + break; + case 2: + if (str == "do") return tok_do; + if (str == "if") return tok_if; + break; + case 3: + if (str == "int") return tok_int; + if (str == "var") return tok_var; + if (str == "asm") return tok_asm; + if (str == "get") return tok_get; + if (str == "try") return tok_try; + break; + case 4: + if (str == "else") return tok_else; + if (str == "pure") return tok_pure; + if (str == "then") return tok_then; + if (str == "cell") return tok_cell; + if (str == "cont") return tok_cont; + if (str == "type") return tok_type; // todo unused token? + break; + case 5: + if (str == "slice") return tok_slice; + if (str == "tuple") return tok_tuple; + if (str == "const") return tok_const; + if (str == "while") return tok_while; + if (str == "until") return tok_until; + if (str == "catch") return tok_catch; + if (str == "ifnot") return tok_ifnot; + break; + case 6: + if (str == "return") return tok_return; + if (str == "repeat") return tok_repeat; + if (str == "elseif") return tok_elseif; + if (str == "forall") return tok_forall; + if (str == "extern") return tok_extern; + if (str == "global") return tok_global; + if (str == "impure") return tok_impure; + if (str == "inline") return tok_inline; + break; + case 7: + if (str == "builder") return tok_builder; + if (str == "builtin") return tok_builtin; + break; + case 8: + if (str == "operator") return tok_operator; + break; + case 9: + if (str == "elseifnot") return tok_elseifnot; + if (str == "method_id") return tok_method_id; + break; + case 10: + if (str == "inline_ref") return tok_inlineref; + if (str == "auto_apply") return tok_autoapply; + break; + default: break; - } - body.push_back(src.cur_char()); - src.advance(1); } - if (!end) { - src.error("string extends past end of file"); + return tok_empty; + } + + bool parse(Lexer* lex) const override { + const char* sym_begin = lex->c_str(); + lex->skip_chars(1); + while (!lex->is_eof()) { + char c = lex->char_at(); + // the pattern of valid identifier first symbol is provided in trie, here we test for identifier middle + bool allowed_in_identifier = std::isalnum(c) || c == '_' || c == '$' || c == ':' || c == '?' || c == '!' || c == '\''; + if (!allowed_in_identifier) { + break; + } + lex->skip_chars(1); } - lexem.set(body, here, Lexem::String); - int c = src.cur_char(); - if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) { - lexem.val = c; - src.advance(1); + + std::string_view str_val(sym_begin, lex->c_str() - sym_begin); + if (TokenType kw_tok = maybe_keyword(str_val)) { + lex->add_token(kw_tok, str_val); + } else { + symbols.lookup_add(static_cast(str_val)); + lex->add_token(tok_identifier, str_val); } - return lexem; + return true; } - int c = src.cur_char(); - const char* end = src.get_ptr(); - if (is_quote_char(c) || c == '`') { - int qc = c; - ++end; - while (end < src.get_end_ptr() && *end != qc) { - ++end; - } - if (*end != qc) { - src.error(qc == '`' ? "a `back-quoted` token extends past end of line" : "string extends past end of line"); +}; + +// Like in Kotlin, `backticks` can be used to wrap identifiers (both in declarations/usage, both for vars/functions). +// E.g.: function `do`() { var `with spaces` = 1; } +// This could be useful to use reserved names as identifiers (in a probable codegen from TL, for example). +struct ChunkIdentifierInBackticks final : ChunkLexerBase { + bool parse(Lexer* lex) const override { + const char* str_begin = lex->c_str(); + lex->skip_chars(1); + while (!lex->is_eof() && lex->char_at() != '`' && lex->char_at() != '\n') { + if (std::isspace(lex->char_at())) { // probably, I'll remove this restriction after rewriting symtable and cur_sym_idx + lex->error("An identifier can't have a space in its name (even inside backticks)"); + } + lex->skip_chars(1); } - lexem.set(std::string{src.get_ptr() + 1, end}, src.here(), qc == '`' ? Lexem::Unknown : Lexem::String); - src.set_ptr(end + 1); - c = src.cur_char(); - if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) { - lexem.val = c; - src.set_ptr(end + 2); + if (lex->char_at() != '`') { + lex->error("Unclosed backtick `"); } - // std::cerr << lexem.name_str() << ' ' << lexem.str << std::endl; - return lexem; + + std::string_view str_val(str_begin + 1, lex->c_str() - str_begin - 1); + lex->skip_chars(1); + symbols.lookup_add(static_cast(str_val)); + lex->add_token(tok_identifier, str_val); + return true; } - int len = 0, pc = -0x100; - while (end < src.get_end_ptr()) { - c = *end; - bool repeated = (c == pc && is_repeatable(c)); - if (c == ' ' || c == 9 || (len && is_left_active(c) && !repeated)) { - break; +}; + +// +// ---------------------------------------------------------------------- +// Here we define a grammar of Tolk. +// All valid chunks prefixes are stored in trie. +// + +struct TolkLanguageGrammar { + static LexingTrie trie; + + static bool parse_next_chunk(Lexer* lex) { + const ChunkLexerBase* best = trie.get_deepest(lex->c_str()); + return best && best->parse(lex); + } + + static bool parse_next_chunk_special(Lexer* lex, TokenType parse_next_as) { + switch (parse_next_as) { + case tok_pragma_name: + return ChunkSpecialParsing::parse_pragma_name(lex); + case tok_semver: + return ChunkSpecialParsing::parse_semver(lex); + default: + assert(false); + return false; } - ++len; - ++end; - if (is_right_active(c) && !repeated) { - break; + } + + static void register_token(const char* str, int len, TokenType tp) { + trie.add_prefix(str, new ChunkSimpleToken(tp, len)); + } + + static void init() { + trie.add_prefix("//", singleton()); + trie.add_prefix(";;", singleton()); + trie.add_prefix("/*", singleton()); + trie.add_prefix("{-", singleton()); + trie.add_prefix(R"(")", singleton()); + trie.add_prefix(R"(""")", singleton()); + trie.add_prefix(" ", singleton()); + trie.add_prefix("\t", singleton()); + trie.add_prefix("\r", singleton()); + trie.add_prefix("\n", singleton()); + trie.add_prefix("#", singleton()); + + trie.add_pattern("[0-9]", singleton()); + // todo think of . ~ + trie.add_pattern("[a-zA-Z_$.~]", singleton()); + trie.add_prefix("`", singleton()); + + register_token("+", 1, tok_plus); + register_token("-", 1, tok_minus); + register_token("*", 1, tok_mul); + register_token("/", 1, tok_div); + register_token("%", 1, tok_mod); + register_token("?", 1, tok_question); + register_token(":", 1, tok_colon); + register_token(",", 1, tok_comma); + register_token(";", 1, tok_semicolon); + register_token("(", 1, tok_oppar); + register_token(")", 1, tok_clpar); + register_token("[", 1, tok_opbracket); + register_token("]", 1, tok_clbracket); + register_token("{", 1, tok_opbrace); + register_token("}", 1, tok_clbrace); + register_token("=", 1, tok_assign); + register_token("<", 1, tok_lt); + register_token(">", 1, tok_gt); + register_token("&", 1, tok_bitwise_and); + register_token("|", 1, tok_bitwise_or); + register_token("^", 1, tok_bitwise_xor); + register_token("==", 2, tok_eq); + register_token("!=", 2, tok_neq); + register_token("<=", 2, tok_leq); + register_token(">=", 2, tok_geq); + register_token("<<", 2, tok_lshift); + register_token(">>", 2, tok_rshift); + register_token("~/", 2, tok_divR); + register_token("^/", 2, tok_divC); + register_token("~%", 2, tok_modR); + register_token("^%", 2, tok_modC); + register_token("/%", 2, tok_divmod); + register_token("+=", 2, tok_set_plus); + register_token("-=", 2, tok_set_minus); + register_token("*=", 2, tok_set_mul); + register_token("/=", 2, tok_set_div); + register_token("%=", 2, tok_set_mod); + register_token("&=", 2, tok_set_bitwise_and); + register_token("|=", 2, tok_set_bitwise_or); + register_token("^=", 2, tok_set_bitwise_xor); + register_token("->", 2, tok_mapsto); + register_token("<=>", 3, tok_spaceship); + register_token("~>>", 3, tok_rshiftR); + register_token("^>>", 3, tok_rshiftC); + register_token("~/=", 3, tok_set_divR); + register_token("^/=", 3, tok_set_divC); + register_token("~%=", 3, tok_set_modR); + register_token("^%=", 3, tok_set_modC); + register_token("<<=", 3, tok_set_lshift); + register_token(">>=", 3, tok_set_rshift); + register_token("~>>=", 4, tok_set_rshiftR); + register_token("^>>=", 4, tok_set_rshiftC); + } +}; + +LexingTrie TolkLanguageGrammar::trie; + +// +// ---------------------------------------------------------------------- +// The Lexer class is to be used outside (by parser, which constructs AST from tokens). +// It's streaming. It means, that `next()` parses a next token on demand +// (instead of parsing all file contents to vector and iterating over it). +// Parsing on demand uses effectively less memory. +// Note, that chunks, being parsed, call `add_token()`, and a chunk may add multiple tokens at once. +// That's why a small cirlular buffer for tokens is used. +// `last_token_idx` actually means a number of total tokens added. +// `cur_token_idx` is a number of returned by `next()`. +// It's assumed that an input file has already been loaded, its contents is present and won't be deleted +// (`start`, `cur` and `end`, as well as every Token str_val, points inside file->text). +// + +Lexer::Lexer(const SrcFile* file) + : file(file) + , p_start(file->text.data()) + , p_end(p_start + file->text.size()) + , p_next(p_start) + , location(file) { + next(); +} + +void Lexer::next() { + while (cur_token_idx == last_token_idx && !is_eof()) { + update_location(); + if (!TolkLanguageGrammar::parse_next_chunk(this)) { + error("Failed to parse"); } - pc = c; } - lexem.set(std::string{src.get_ptr(), end}, src.here()); - src.set_ptr(end); - // std::cerr << lexem.name_str() << ' ' << lexem.str << std::endl; - return lexem; + if (is_eof()) { + add_token(tok_eof, file->text); + } + cur_token = tokens_circularbuf[++cur_token_idx & 7]; } -const Lexem& Lexer::peek() { - if (peek_lexem.valid()) { - return peek_lexem; +void Lexer::next_special(TokenType parse_next_as, const char* str_expected) { + assert(cur_token_idx == last_token_idx); + skip_spaces(); + update_location(); + if (!TolkLanguageGrammar::parse_next_chunk_special(this, parse_next_as)) { + error(std::string(str_expected) + " expected"); } - if (eof) { - return lexem.clear(src.here(), Lexem::Eof); + cur_token = tokens_circularbuf[++cur_token_idx & 7]; +} + +int Lexer::cur_sym_idx() const { + assert(tok() == tok_identifier); + return symbols.lookup_add(cur_str_std_string()); +} + +void Lexer::error(const std::string& err_msg) const { + throw ParseError(cur_location(), err_msg); +} + +void Lexer::error_at(const std::string& prefix, const std::string& suffix) const { + throw ParseError(cur_location(), prefix + cur_str_std_string() + suffix); +} + +void Lexer::on_expect_call_failed(const char* str_expected) const { + throw ParseError(cur_location(), std::string(str_expected) + " expected instead of `" + cur_str_std_string() + "`"); +} + +void lexer_init() { + TolkLanguageGrammar::init(); +} + +// todo #ifdef TOLK_PROFILING +// As told above, `next()` produces tokens on demand, while AST is being generated. +// Hence, it's difficult to measure Lexer performance separately. +// This function can be called just to tick Lexer performance, it just scans all input files. +// There is no sense to use it in production, but when refactoring and optimizing Lexer, it's useful. +void lexer_measure_performance(const std::vector& files_to_just_parse) { + for (const SrcFile* file : files_to_just_parse) { + Lexer lex(file); + while (!lex.is_eof()) { + lex.next(); + } } - Lexem keep = std::move(lexem); - next(); - peek_lexem = std::move(lexem); - lexem = std::move(keep); - eof = false; - return peek_lexem; } } // namespace tolk diff --git a/tolk/lexer.h b/tolk/lexer.h index 816f7a827..e0fa76065 100644 --- a/tolk/lexer.h +++ b/tolk/lexer.h @@ -15,104 +15,225 @@ along with TON Blockchain Library. If not, see . */ #pragma once -#include "srcread.h" -#include -#include -#include + +#include "platform-utils.h" +#include "src-file.h" +#include namespace tolk { -/* - * - * LEXER - * - */ - -struct Lexem { - enum { Undefined = -2, Eof = -1, Unknown = 0, Ident = 0, Number = 1, Special = 2, String = 3 }; - int tp; - int val; - std::string str; - SrcLocation loc; - int classify(); - Lexem(std::string _str = "", const SrcLocation& _loc = {}, int _tp = Unknown, int _val = 0) - : tp(_tp), val(_val), str(_str), loc(_loc) { - classify(); - } - int set(std::string _str = "", const SrcLocation& _loc = {}, int _tp = Unknown, int _val = 0); - Lexem& clear(const SrcLocation& _loc = {}, int _tp = Unknown, int _val = 0) { - tp = _tp; - val = _val; - loc = _loc; - str = ""; - return *this; - } - bool valid() const { - return tp != Undefined; - } - std::string name_str() const; - void error(std::string _str) const { - throw ParseError{loc, _str}; - } - void error_at(std::string str1, std::string str2) const { - error(str1 + str + str2); - } +enum TokenType { + tok_empty, + + tok_int_const, + tok_string_const, + tok_string_modifier, + + tok_identifier, - static std::string lexem_name_str(int idx); + tok_plus, + tok_minus, + tok_mul, + tok_div, + tok_mod, + tok_question, + tok_colon, + tok_comma, + tok_semicolon, + tok_oppar, + tok_clpar, + tok_opbracket, + tok_clbracket, + tok_opbrace, + tok_clbrace, + tok_assign, + tok_underscore, + tok_lt, + tok_gt, + tok_bitwise_and, + tok_bitwise_or, + tok_bitwise_xor, + tok_bitwise_not, + tok_dot, + + tok_eq, + tok_neq, + tok_leq, + tok_geq, + tok_spaceship, + tok_lshift, + tok_rshift, + tok_rshiftR, + tok_rshiftC, + tok_divR, + tok_divC, + tok_modR, + tok_modC, + tok_divmod, + tok_set_plus, + tok_set_minus, + tok_set_mul, + tok_set_div, + tok_set_divR, + tok_set_divC, + tok_set_mod, + tok_set_modR, + tok_set_modC, + tok_set_lshift, + tok_set_rshift, + tok_set_rshiftR, + tok_set_rshiftC, + tok_set_bitwise_and, + tok_set_bitwise_or, + tok_set_bitwise_xor, + + tok_return, + tok_var, + tok_repeat, + tok_do, + tok_while, + tok_until, + tok_try, + tok_catch, + tok_if, + tok_ifnot, + tok_then, + tok_else, + tok_elseif, + tok_elseifnot, + + tok_int, + tok_cell, + tok_slice, + tok_builder, + tok_cont, + tok_tuple, + tok_type, + tok_mapsto, + tok_forall, + + tok_extern, + tok_global, + tok_asm, + tok_impure, + tok_pure, + tok_inline, + tok_inlineref, + tok_builtin, + tok_autoapply, + tok_method_id, + tok_get, + tok_operator, + tok_infix, + tok_infixl, + tok_infixr, + tok_const, + + tok_pragma, + tok_pragma_name, + tok_semver, + tok_include, + + tok_eof }; +// All tolk language is parsed into tokens. +// Lexer::next() returns a Token. +struct Token { + TokenType type = tok_empty; + std::string_view str_val; + + Token() = default; + Token(TokenType type, std::string_view str_val): type(type), str_val(str_val) {} +}; + +// Lexer::next() is a method to be used externally (while parsing tolk file to AST). +// It's streaming: `next()` parses a token on demand. +// For comments, see lexer.cpp, a comment above Lexer constructor. class Lexer { - SourceReader& src; - bool eof; - Lexem lexem, peek_lexem; - unsigned char char_class[128]; - std::array eol_cmt, cmt_op, cmt_cl; // for ;; {- -} - std::array eol_cmt2, cmt_op2, cmt_cl2; // for // /* */ - std::string multiline_quote; - enum cc { left_active = 2, right_active = 1, active = 3, allow_repeat = 4, quote_char = 8 }; - - public: - bool eof_found() const { - return eof; - } - explicit Lexer(SourceReader& _src, std::string active_chars = ";,() ~.", - std::string quote_chars = "\"", std::string multiline_quote = "\"\"\""); + Token tokens_circularbuf[8]{}; + int last_token_idx = -1; + int cur_token_idx = -1; + Token cur_token; // = tokens_circularbuf[cur_token_idx & 7] - void set_comment_tokens(const std::string &eol_cmts, const std::string &open_cmts, const std::string &close_cmts); - void set_comment2_tokens(const std::string &eol_cmts2, const std::string &open_cmts2, const std::string &close_cmts2); - void start_parsing(); + const SrcFile* file; + const char *p_start, *p_end, *p_next; + SrcLocation location; - const Lexem& next(); - const Lexem& cur() const { - return lexem; + void update_location() { + location.char_offset = static_cast(p_next - p_start); } - const Lexem& peek(); - int tp() const { - return lexem.tp; + + GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD + void on_expect_call_failed(const char* str_expected) const; + +public: + + explicit Lexer(const SrcFile* file); + Lexer(const Lexer&) = delete; + Lexer &operator=(const Lexer&) = delete; + + void add_token(TokenType type, std::string_view str) { + tokens_circularbuf[++last_token_idx & 7] = Token(type, str); } - void expect(int exp_tp, const char* msg = 0); - int classify_char(unsigned c) const { - return c < 0x80 ? char_class[c] : 0; + + void skip_spaces() { + while (std::isspace(*p_next)) { + ++p_next; + } } - bool is_active(int c) const { - return (classify_char(c) & cc::active) == cc::active; + + void skip_line() { + while (p_next < p_end && *p_next != '\n' && *p_next != '\r') { + ++p_next; + } + while (*p_next == '\n' || *p_next == '\r') { + ++p_next; + } } - bool is_left_active(int c) const { - return (classify_char(c) & cc::left_active); + + void skip_chars(int n) { + p_next += n; } - bool is_right_active(int c) const { - return (classify_char(c) & cc::right_active); + + bool is_eof() const { + return p_next >= p_end; } - bool is_repeatable(int c) const { - return (classify_char(c) & cc::allow_repeat); + + char char_at() const { return *p_next; } + char char_at(int shift) const { return *(p_next + shift); } + const char* c_str() const { return p_next; } + + TokenType tok() const { return cur_token.type; } + std::string_view cur_str() const { return cur_token.str_val; } + std::string cur_str_std_string() const { return static_cast(cur_token.str_val); } + SrcLocation cur_location() const { return location; } + int cur_sym_idx() const; + + void next(); + void next_special(TokenType parse_next_as, const char* str_expected); + + void check(TokenType next_tok, const char* str_expected) const { + if (cur_token.type != next_tok) { + on_expect_call_failed(str_expected); // unlikely path, not inlined + } } - bool is_quote_char(int c) const { - return (classify_char(c) & cc::quote_char); + void expect(TokenType next_tok, const char* str_expected) { + if (cur_token.type != next_tok) { + on_expect_call_failed(str_expected); + } + next(); } - private: - void set_spec(std::array& arr, std::string setup); - bool is_multiline_quote(const char* begin, const char* end); + GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD + void error(const std::string& err_msg) const; + GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD + void error_at(const std::string& prefix, const std::string& suffix) const; }; +void lexer_init(); + +// todo #ifdef TOLK_PROFILING +void lexer_measure_performance(const std::vector& files_to_just_parse); + } // namespace tolk diff --git a/tolk/optimize.cpp b/tolk/optimize.cpp index 64087032d..cf7f460f8 100644 --- a/tolk/optimize.cpp +++ b/tolk/optimize.cpp @@ -612,7 +612,7 @@ bool Optimizer::optimize() { } AsmOpConsList optimize_code_head(AsmOpConsList op_list, int mode) { - Optimizer opt(std::move(op_list), op_rewrite_comments, mode); + Optimizer opt(std::move(op_list), false, mode); opt.optimize(); return opt.extract_code(); } diff --git a/tolk/parse-tolk.cpp b/tolk/parse-tolk.cpp index 3cff0bb51..c28501d43 100644 --- a/tolk/parse-tolk.cpp +++ b/tolk/parse-tolk.cpp @@ -15,6 +15,7 @@ along with TON Blockchain Library. If not, see . */ #include "tolk.h" +#include "platform-utils.h" #include "td/utils/crypto.h" #include "common/refint.h" #include "openssl/digest.hpp" @@ -24,28 +25,16 @@ namespace tolk { using namespace std::literals::string_literals; -int compute_symbol_subclass(std::string str) { - if (str.size() < 2) { - return IdSc::undef; - } else if (str[0] == '.') { - return IdSc::dotid; - } else if (str[0] == '~') { - return IdSc::tildeid; - } else { - return IdSc::undef; - } -} - inline bool is_dot_ident(sym_idx_t idx) { - return symbols.get_subclass(idx) == IdSc::dotid; + return symbols.get_subclass(idx) == SymbolSubclass::dot_identifier; } inline bool is_tilde_ident(sym_idx_t idx) { - return symbols.get_subclass(idx) == IdSc::tildeid; + return symbols.get_subclass(idx) == SymbolSubclass::tilde_identifier; } inline bool is_special_ident(sym_idx_t idx) { - return symbols.get_subclass(idx) != IdSc::undef; + return symbols.get_subclass(idx) != SymbolSubclass::undef; } // given Expr::_Apply (a function call / a variable call), determine whether it's <, or >, or similar @@ -97,7 +86,8 @@ static inline std::string get_builtin_operator_name(sym_idx_t sym_builtin) { // fire an error for a case "flags & 0xFF != 0" (equivalent to "flags & 1", probably unexpected) // it would better be a warning, but we decided to make it a strict error -[[gnu::cold]] static void fire_error_lower_precedence(const SrcLocation& loc, sym_idx_t op_lower, sym_idx_t op_higher) { +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_lower_precedence(SrcLocation loc, sym_idx_t op_lower, sym_idx_t op_higher) { std::string name_lower = get_builtin_operator_name(op_lower); std::string name_higher = get_builtin_operator_name(op_higher); throw ParseError(loc, name_lower + " has lower precedence than " + name_higher + @@ -106,7 +96,8 @@ static inline std::string get_builtin_operator_name(sym_idx_t sym_builtin) { } // fire an error for a case "arg1 & arg2 | arg3" -[[gnu::cold]] static void fire_error_mix_bitwise_and_or(const SrcLocation& loc, sym_idx_t op1, sym_idx_t op2) { +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_mix_bitwise_and_or(SrcLocation loc, sym_idx_t op1, sym_idx_t op2) { std::string name1 = get_builtin_operator_name(op1); std::string name2 = get_builtin_operator_name(op2); throw ParseError(loc, "mixing " + name1 + " with " + name2 + " without parenthesis" @@ -117,7 +108,7 @@ static inline std::string get_builtin_operator_name(sym_idx_t sym_builtin) { // diagnose when bitwise operators are used in a probably wrong way due to tricky precedence // example: "flags & 0xFF != 0" is equivalent to "flags & 1", most likely it's unexpected // the only way to suppress this error for the programmer is to use parenthesis -static void diagnose_bitwise_precedence(const SrcLocation& loc, sym_idx_t bitwise_sym, const Expr* lhs, const Expr* rhs) { +static void diagnose_bitwise_precedence(SrcLocation loc, sym_idx_t bitwise_sym, const Expr* lhs, const Expr* rhs) { // handle "0 != flags & 0xFF" (lhs = "0 != flags") if (!lhs->is_inside_parenthesis() && lhs->cls == Expr::_Apply && lhs->e_type->is_int() && // fast false if 100% not @@ -143,7 +134,7 @@ static void diagnose_bitwise_precedence(const SrcLocation& loc, sym_idx_t bitwis } // diagnose "a << 8 + 1" (equivalent to "a << 9", probably unexpected) -static void diagnose_addition_in_bitshift(const SrcLocation& loc, sym_idx_t bitshift_sym, const Expr* rhs) { +static void diagnose_addition_in_bitshift(SrcLocation loc, sym_idx_t bitshift_sym, const Expr* rhs) { if (!rhs->is_inside_parenthesis() && rhs->cls == Expr::_Apply && rhs->e_type->is_int() && is_add_or_sub_binary_op(rhs)) { @@ -152,9 +143,9 @@ static void diagnose_addition_in_bitshift(const SrcLocation& loc, sym_idx_t bits } /* - * + * * PARSE SOURCE - * + * */ // TE ::= TA | TA -> TE @@ -162,68 +153,70 @@ static void diagnose_addition_in_bitshift(const SrcLocation& loc, sym_idx_t bits TypeExpr* parse_type(Lexer& lex); TypeExpr* parse_type1(Lexer& lex) { - switch (lex.tp()) { - case _Int: + switch (lex.tok()) { + case tok_int: lex.next(); - return TypeExpr::new_atomic(_Int); - case _Cell: + return TypeExpr::new_atomic(TypeExpr::_Int); + case tok_cell: lex.next(); - return TypeExpr::new_atomic(_Cell); - case _Slice: + return TypeExpr::new_atomic(TypeExpr::_Cell); + case tok_slice: lex.next(); - return TypeExpr::new_atomic(_Slice); - case _Builder: + return TypeExpr::new_atomic(TypeExpr::_Slice); + case tok_builder: lex.next(); - return TypeExpr::new_atomic(_Builder); - case _Cont: + return TypeExpr::new_atomic(TypeExpr::_Builder); + case tok_cont: lex.next(); - return TypeExpr::new_atomic(_Cont); - case _Tuple: + return TypeExpr::new_atomic(TypeExpr::_Cont); + case tok_tuple: lex.next(); - return TypeExpr::new_atomic(_Tuple); - case _Var: - case '_': + return TypeExpr::new_atomic(TypeExpr::_Tuple); + case tok_var: + case tok_underscore: lex.next(); return TypeExpr::new_hole(); - case _Ident: { - auto sym = lookup_symbol(lex.cur().val); + case tok_identifier: { + auto sym = lookup_symbol(lex.cur_sym_idx()); if (sym && dynamic_cast(sym->value)) { auto val = dynamic_cast(sym->value); lex.next(); return val->get_type(); } - lex.cur().error_at("`", "` is not a type identifier"); + lex.error_at("`", "` is not a type identifier"); } + default: + break; } - int c; - if (lex.tp() == '[') { + TokenType c; + if (lex.tok() == tok_opbracket) { lex.next(); - c = ']'; + c = tok_clbracket; } else { - lex.expect('('); - c = ')'; + lex.expect(tok_oppar, ""); + c = tok_clpar; } - if (lex.tp() == c) { + if (lex.tok() == c) { lex.next(); - return c == ')' ? TypeExpr::new_unit() : TypeExpr::new_tuple({}); + return c == tok_clpar ? TypeExpr::new_unit() : TypeExpr::new_tuple({}); } auto t1 = parse_type(lex); - if (lex.tp() == ')') { - lex.expect(c); + if (lex.tok() == tok_clpar) { + lex.expect(c, c == tok_clpar ? "')'" : "']'"); return t1; } std::vector tlist{1, t1}; - while (lex.tp() == ',') { + while (lex.tok() == tok_comma) { lex.next(); tlist.push_back(parse_type(lex)); } - lex.expect(c); - return c == ')' ? TypeExpr::new_tensor(std::move(tlist)) : TypeExpr::new_tuple(std::move(tlist)); + lex.expect(c, c == tok_clpar ? "')'" : "']'"); + return c == tok_clpar ? TypeExpr::new_tensor(std::move(tlist)) : TypeExpr::new_tuple(std::move(tlist)); } TypeExpr* parse_type(Lexer& lex) { auto res = parse_type1(lex); - if (lex.tp() == _Mapsto) { + if (lex.tok() == tok_mapsto) { lex.next(); auto to = parse_type(lex); return TypeExpr::new_map(res, to); @@ -234,18 +227,18 @@ TypeExpr* parse_type(Lexer& lex) { FormalArg parse_formal_arg(Lexer& lex, int fa_idx) { TypeExpr* arg_type = 0; - SrcLocation loc = lex.cur().loc; - if (lex.tp() == '_') { + SrcLocation loc = lex.cur_location(); + if (lex.tok() == tok_underscore) { lex.next(); - if (lex.tp() == ',' || lex.tp() == ')') { + if (lex.tok() == tok_comma || lex.tok() == tok_clpar) { return std::make_tuple(TypeExpr::new_hole(), (SymDef*)nullptr, loc); } arg_type = TypeExpr::new_hole(); - loc = lex.cur().loc; - } else if (lex.tp() != _Ident) { + loc = lex.cur_location(); + } else if (lex.tok() != tok_identifier) { arg_type = parse_type(lex); } else { - auto sym = lookup_symbol(lex.cur().val); + auto sym = lookup_symbol(lex.cur_sym_idx()); if (sym && dynamic_cast(sym->value)) { auto val = dynamic_cast(sym->value); lex.next(); @@ -254,44 +247,42 @@ FormalArg parse_formal_arg(Lexer& lex, int fa_idx) { arg_type = TypeExpr::new_hole(); } } - if (lex.tp() == '_' || lex.tp() == ',' || lex.tp() == ')') { - if (lex.tp() == '_') { - loc = lex.cur().loc; + if (lex.tok() == tok_underscore || lex.tok() == tok_comma || lex.tok() == tok_clpar) { + if (lex.tok() == tok_underscore) { + loc = lex.cur_location(); lex.next(); } return std::make_tuple(arg_type, (SymDef*)nullptr, loc); } - if (lex.tp() != _Ident) { - lex.expect(_Ident, "formal parameter name"); - } - loc = lex.cur().loc; - if (prohibited_var_names.count(symbols.get_name(lex.cur().val))) { + lex.check(tok_identifier, "formal parameter name"); + loc = lex.cur_location(); + if (prohibited_var_names.count(symbols.get_name(lex.cur_sym_idx()))) { throw ParseError{ - loc, PSTRING() << "symbol `" << symbols.get_name(lex.cur().val) << "` cannot be redefined as a variable"}; + loc, PSTRING() << "symbol `" << symbols.get_name(lex.cur_sym_idx()) << "` cannot be redefined as a variable"}; } - SymDef* new_sym_def = define_symbol(lex.cur().val, true, loc); + SymDef* new_sym_def = define_symbol(lex.cur_sym_idx(), true, loc); if (!new_sym_def) { - lex.cur().error_at("cannot define symbol `", "`"); + lex.error_at("cannot define symbol `", "`"); } if (new_sym_def->value) { - lex.cur().error_at("redefined formal parameter `", "`"); + lex.error_at("redefined formal parameter `", "`"); } - new_sym_def->value = new SymVal{SymVal::_Param, fa_idx, arg_type}; + new_sym_def->value = new SymVal{SymValKind::_Param, fa_idx, arg_type}; lex.next(); return std::make_tuple(arg_type, new_sym_def, loc); } void parse_global_var_decl(Lexer& lex) { TypeExpr* var_type = 0; - SrcLocation loc = lex.cur().loc; - if (lex.tp() == '_') { + SrcLocation loc = lex.cur_location(); + if (lex.tok() == tok_underscore) { lex.next(); var_type = TypeExpr::new_hole(); - loc = lex.cur().loc; - } else if (lex.tp() != _Ident) { + loc = lex.cur_location(); + } else if (lex.tok() != tok_identifier) { var_type = parse_type(lex); } else { - auto sym = lookup_symbol(lex.cur().val); + auto sym = lookup_symbol(lex.cur_sym_idx()); if (sym && dynamic_cast(sym->value)) { auto val = dynamic_cast(sym->value); lex.next(); @@ -300,18 +291,16 @@ void parse_global_var_decl(Lexer& lex) { var_type = TypeExpr::new_hole(); } } - if (lex.tp() != _Ident) { - lex.expect(_Ident, "global variable name"); - } - loc = lex.cur().loc; - SymDef* sym_def = define_global_symbol(lex.cur().val, false, loc); + lex.check(tok_identifier, "global variable name"); + loc = lex.cur_location(); + SymDef* sym_def = define_global_symbol(lex.cur_sym_idx(), false, loc); if (!sym_def) { - lex.cur().error_at("cannot define global symbol `", "`"); + lex.error_at("cannot define global symbol `", "`"); } if (sym_def->value) { auto val = dynamic_cast(sym_def->value); if (!val) { - lex.cur().error_at("symbol `", "` cannot be redefined as a global variable"); + lex.error_at("symbol `", "` cannot be redefined as a global variable"); } try { unify(var_type, val->sym_type); @@ -319,12 +308,12 @@ void parse_global_var_decl(Lexer& lex) { std::ostringstream os; os << "cannot unify new type " << var_type << " of global variable `" << sym_def->name() << "` with its previous type " << val->sym_type << ": " << ue; - lex.cur().error(os.str()); + lex.error(os.str()); } } else { sym_def->value = new SymValGlobVar{glob_var_cnt++, var_type}; #ifdef TOLK_DEBUG - dynamic_cast(sym_def->value)->name = lex.cur().str; + dynamic_cast(sym_def->value)->name = lex.cur_str(); #endif glob_vars.push_back(sym_def); } @@ -335,39 +324,39 @@ extern int const_cnt; Expr* parse_expr(Lexer& lex, CodeBlob& code, bool nv = false); void parse_const_decl(Lexer& lex) { - SrcLocation loc = lex.cur().loc; + SrcLocation loc = lex.cur_location(); int wanted_type = Expr::_None; - if (lex.tp() == _Int) { + if (lex.tok() == tok_int) { wanted_type = Expr::_Const; lex.next(); - } else if (lex.tp() == _Slice) { + } else if (lex.tok() == tok_slice) { wanted_type = Expr::_SliceConst; lex.next(); } - if (lex.tp() != _Ident) { - lex.expect(_Ident, "constant name"); - } - loc = lex.cur().loc; - SymDef* sym_def = define_global_symbol(lex.cur().val, false, loc); + lex.check(tok_identifier, "constant name"); + loc = lex.cur_location(); + SymDef* sym_def = define_global_symbol(lex.cur_sym_idx(), false, loc); if (!sym_def) { - lex.cur().error_at("cannot define global symbol `", "`"); + lex.error_at("cannot define global symbol `", "`"); + } + if (sym_def->value) { // todo below it was a check (for duplicate include?) + lex.error_at("global symbol `", "` already exists"); } - Lexem ident = lex.cur(); lex.next(); - if (lex.tp() != '=') { - lex.cur().error_at("expected = instead of ", ""); + if (lex.tok() != tok_assign) { + lex.error_at("expected = instead of ", ""); } lex.next(); CodeBlob code; // Handles processing and resolution of literals and consts auto x = parse_expr(lex, code, false); // also does lex.next() ! if (!x->is_rvalue()) { - lex.cur().error("expression is not strictly Rvalue"); + lex.error("expression is not strictly Rvalue"); } if ((wanted_type == Expr::_Const) && (x->cls == Expr::_Apply)) wanted_type = Expr::_None; // Apply is additionally checked to result in an integer if ((wanted_type != Expr::_None) && (x->cls != wanted_type)) { - lex.cur().error("expression type does not match wanted type"); + lex.error("expression type does not match wanted type"); } SymValConst* new_value = nullptr; if (x->cls == Expr::_Const) { // Integer constant @@ -392,58 +381,49 @@ void parse_const_decl(Lexer& lex) { AsmOpList out_list(0, &code.vars); code.generate_code(out_list); if (out_list.list_.size() != 1) { - lex.cur().error("precompiled expression must result in single operation"); + lex.error("precompiled expression must result in single operation"); } auto op = out_list.list_[0]; if (!op.is_const()) { - lex.cur().error("precompiled expression must result in compilation time constant"); + lex.error("precompiled expression must result in compilation time constant"); } if (op.origin.is_null() || !op.origin->is_valid()) { - lex.cur().error("precompiled expression did not result in a valid integer constant"); + lex.error("precompiled expression did not result in a valid integer constant"); } new_value = new SymValConst{const_cnt++, op.origin}; } else { - lex.cur().error("integer or slice literal or constant expected"); - } - if (sym_def->value) { - SymValConst* old_value = dynamic_cast(sym_def->value); - Keyword new_type = new_value->get_type(); - if (!old_value || old_value->get_type() != new_type || - (new_type == _Int && *old_value->get_int_value() != *new_value->get_int_value()) || - (new_type == _Slice && old_value->get_str_value() != new_value->get_str_value())) { - ident.error_at("global symbol `", "` already exists"); - } + lex.error("integer or slice literal or constant expected"); } sym_def->value = new_value; } FormalArgList parse_formal_args(Lexer& lex) { FormalArgList args; - lex.expect('(', "formal argument list"); - if (lex.tp() == ')') { + lex.expect(tok_oppar, "formal argument list"); + if (lex.tok() == tok_clpar) { lex.next(); return args; } int fa_idx = 0; args.push_back(parse_formal_arg(lex, fa_idx++)); - while (lex.tp() == ',') { + while (lex.tok() == tok_comma) { lex.next(); args.push_back(parse_formal_arg(lex, fa_idx++)); } - lex.expect(')'); + lex.expect(tok_clpar, "')'"); return args; } void parse_const_decls(Lexer& lex) { - lex.expect(_Const); + lex.expect(tok_const, "'const'"); while (true) { parse_const_decl(lex); - if (lex.tp() != ',') { + if (lex.tok() != tok_comma) { break; } - lex.expect(','); + lex.expect(tok_comma, "','"); } - lex.expect(';'); + lex.expect(tok_semicolon, "';'"); } TypeExpr* extract_total_arg_type(const FormalArgList& arg_list) { @@ -461,15 +441,15 @@ TypeExpr* extract_total_arg_type(const FormalArgList& arg_list) { } void parse_global_var_decls(Lexer& lex) { - lex.expect(_Global); + lex.expect(tok_global, "'global'"); while (true) { parse_global_var_decl(lex); - if (lex.tp() != ',') { + if (lex.tok() != tok_comma) { break; } - lex.expect(','); + lex.expect(tok_comma, "','"); } - lex.expect(';'); + lex.expect(tok_semicolon, "';'"); } SymValCodeFunc* make_new_glob_func(SymDef* func_sym, TypeExpr* func_type, bool marked_as_pure) { @@ -483,18 +463,18 @@ SymValCodeFunc* make_new_glob_func(SymDef* func_sym, TypeExpr* func_type, bool m return res; } -bool check_global_func(const Lexem& cur, sym_idx_t func_name) { +bool check_global_func(const Lexer& lex, sym_idx_t func_name) { SymDef* def = lookup_symbol(func_name); if (!def) { - cur.error("undefined symbol `" + symbols.get_name(func_name) + "`"); + lex.error("undefined symbol `" + symbols.get_name(func_name) + "`"); return false; } SymVal* val = dynamic_cast(def->value); if (!val) { - cur.error(std::string{"symbol `"} + symbols.get_name(func_name) + "` has no value and no type"); + lex.error(std::string{"symbol `"} + symbols.get_name(func_name) + "` has no value and no type"); return false; } else if (!val->get_type()) { - cur.error(std::string{"symbol `"} + symbols.get_name(func_name) + "` has no type, possibly not a function"); + lex.error(std::string{"symbol `"} + symbols.get_name(func_name) + "` has no type, possibly not a function"); return false; } else { return true; @@ -519,12 +499,12 @@ Expr* make_func_apply(Expr* fun, Expr* x) { // parse ( E { , E } ) | () | [ E { , E } ] | [] | id | num | _ Expr* parse_expr100(Lexer& lex, CodeBlob& code, bool nv) { - if (lex.tp() == '(' || lex.tp() == '[') { - bool tf = (lex.tp() == '['); - int clbr = (tf ? ']' : ')'); - SrcLocation loc{lex.cur().loc}; + if (lex.tok() == tok_oppar || lex.tok() == tok_opbracket) { + bool tf = (lex.tok() == tok_opbracket); + TokenType clbr = (tf ? tok_clbracket : tok_clpar); + SrcLocation loc{lex.cur_location()}; lex.next(); - if (lex.tp() == clbr) { + if (lex.tok() == clbr) { lex.next(); Expr* res = new Expr{Expr::_Tensor, {}}; res->flags = Expr::_IsRvalue; @@ -539,21 +519,21 @@ Expr* parse_expr100(Lexer& lex, CodeBlob& code, bool nv) { return res; } Expr* res = parse_expr(lex, code, nv); - if (lex.tp() == ')') { + if (lex.tok() == tok_clpar) { + lex.expect(clbr, clbr == tok_clbracket ? "']'" : "')'"); res->flags |= Expr::_IsInsideParenthesis; - lex.expect(clbr); return res; } std::vector type_list; type_list.push_back(res->e_type); int f = res->flags; res = new Expr{Expr::_Tensor, {res}}; - while (lex.tp() == ',') { + while (lex.tok() == tok_comma) { lex.next(); auto x = parse_expr(lex, code, nv); res->pb_arg(x); if ((f ^ x->flags) & Expr::_IsType) { - lex.cur().error("mixing type and non-type expressions inside the same tuple"); + lex.error("mixing type and non-type expressions inside the same tuple"); } f &= x->flags; type_list.push_back(x->e_type); @@ -567,53 +547,49 @@ Expr* parse_expr100(Lexer& lex, CodeBlob& code, bool nv) { res->here = loc; res->e_type = TypeExpr::new_tuple(res->args.at(0)->e_type); } - lex.expect(clbr); + lex.expect(clbr, clbr == tok_clbracket ? "']'" : "')'"); return res; } - int t = lex.tp(); - if (t == Lexem::Number) { - Expr* res = new Expr{Expr::_Const, lex.cur().loc}; + TokenType t = lex.tok(); + if (t == tok_int_const) { + Expr* res = new Expr{Expr::_Const, lex.cur_location()}; res->flags = Expr::_IsRvalue; - res->intval = td::string_to_int256(lex.cur().str); + res->intval = td::string_to_int256(lex.cur_str_std_string()); if (res->intval.is_null() || !res->intval->signed_fits_bits(257)) { - lex.cur().error_at("invalid integer constant `", "`"); + lex.error_at("invalid integer constant `", "`"); } - res->e_type = TypeExpr::new_atomic(_Int); + res->e_type = TypeExpr::new_atomic(TypeExpr::_Int); lex.next(); return res; } - if (t == Lexem::String) { - std::string str = lex.cur().str; - int str_type = lex.cur().val; + if (t == tok_string_const) { + std::string str = lex.cur_str_std_string(); + lex.next(); + char modifier = 0; + if (lex.tok() == tok_string_modifier) { + modifier = lex.cur_str()[0]; + lex.next(); + } Expr* res; - switch (str_type) { + switch (modifier) { case 0: case 's': case 'a': - { - res = new Expr{Expr::_SliceConst, lex.cur().loc}; - res->e_type = TypeExpr::new_atomic(_Slice); + res = new Expr{Expr::_SliceConst, lex.cur_location()}; + res->e_type = TypeExpr::new_atomic(TypeExpr::_Slice); break; - } case 'u': case 'h': case 'H': case 'c': - { - res = new Expr{Expr::_Const, lex.cur().loc}; - res->e_type = TypeExpr::new_atomic(_Int); + res = new Expr{Expr::_Const, lex.cur_location()}; + res->e_type = TypeExpr::new_atomic(TypeExpr::_Int); break; - } default: - { - res = new Expr{Expr::_Const, lex.cur().loc}; - res->e_type = TypeExpr::new_atomic(_Int); - lex.cur().error("invalid string type `" + std::string(1, static_cast(str_type)) + "`"); - return res; - } + lex.error("invalid string type `" + std::string(1, modifier) + "`"); } res->flags = Expr::_IsRvalue; - switch (str_type) { + switch (modifier) { case 0: { res->strval = td::hex_encode(str); break; @@ -623,7 +599,7 @@ Expr* parse_expr100(Lexer& lex, CodeBlob& code, bool nv) { unsigned char buff[128]; int bits = (int)td::bitstring::parse_bitstring_hex_literal(buff, sizeof(buff), str.data(), str.data() + str.size()); if (bits < 0) { - lex.cur().error_at("Invalid hex bitstring constant `", "`"); + lex.error_at("Invalid hex bitstring constant `", "`"); } break; } @@ -633,64 +609,63 @@ Expr* parse_expr100(Lexer& lex, CodeBlob& code, bool nv) { if (a.parse_addr(str)) { res->strval = block::tlb::MsgAddressInt().pack_std_address(a)->as_bitslice().to_hex(); } else { - lex.cur().error_at("invalid standard address `", "`"); + lex.error_at("invalid standard address `", "`"); } break; } case 'u': { res->intval = td::hex_string_to_int256(td::hex_encode(str)); - if (!str.size()) { - lex.cur().error("empty integer ascii-constant"); + if (str.empty()) { + lex.error("empty integer ascii-constant"); } if (res->intval.is_null()) { - lex.cur().error_at("too long integer ascii-constant `", "`"); + lex.error_at("too long integer ascii-constant `", "`"); } break; } case 'h': - case 'H': - { + case 'H': { unsigned char hash[32]; digest::hash_str(hash, str.data(), str.size()); - res->intval = td::bits_to_refint(hash, (str_type == 'h') ? 32 : 256, false); + res->intval = td::bits_to_refint(hash, (modifier == 'h') ? 32 : 256, false); break; } - case 'c': - { + case 'c': { res->intval = td::make_refint(td::crc32(td::Slice{str})); break; } + default: + __builtin_unreachable(); } - lex.next(); return res; } - if (t == '_') { - Expr* res = new Expr{Expr::_Hole, lex.cur().loc}; + if (t == tok_underscore) { + Expr* res = new Expr{Expr::_Hole, lex.cur_location()}; res->val = -1; res->flags = Expr::_IsLvalue; res->e_type = TypeExpr::new_hole(); lex.next(); return res; } - if (t == _Var) { - Expr* res = new Expr{Expr::_Type, lex.cur().loc}; + if (t == tok_var) { + Expr* res = new Expr{Expr::_Type, lex.cur_location()}; res->flags = Expr::_IsType; res->e_type = TypeExpr::new_hole(); lex.next(); return res; } - if (t == _Int || t == _Cell || t == _Slice || t == _Builder || t == _Cont || t == _Type || t == _Tuple) { - Expr* res = new Expr{Expr::_Type, lex.cur().loc}; + if (t == tok_int || t == tok_cell || t == tok_slice || t == tok_builder || t == tok_cont || t == tok_type || t == tok_tuple) { + Expr* res = new Expr{Expr::_Type, lex.cur_location()}; res->flags = Expr::_IsType; res->e_type = TypeExpr::new_atomic(t); lex.next(); return res; } - if (t == _Ident) { - auto sym = lookup_symbol(lex.cur().val); + if (t == tok_identifier) { + auto sym = lookup_symbol(lex.cur_sym_idx()); if (sym && dynamic_cast(sym->value)) { auto val = dynamic_cast(sym->value); - Expr* res = new Expr{Expr::_Type, lex.cur().loc}; + Expr* res = new Expr{Expr::_Type, lex.cur_location()}; res->flags = Expr::_IsType; res->e_type = val->get_type(); lex.next(); @@ -698,7 +673,7 @@ Expr* parse_expr100(Lexer& lex, CodeBlob& code, bool nv) { } if (sym && dynamic_cast(sym->value)) { auto val = dynamic_cast(sym->value); - Expr* res = new Expr{Expr::_GlobVar, lex.cur().loc}; + Expr* res = new Expr{Expr::_GlobVar, lex.cur_location()}; res->e_type = val->get_type(); res->sym = sym; res->flags = Expr::_IsLvalue | Expr::_IsRvalue | Expr::_IsImpure; @@ -707,34 +682,35 @@ Expr* parse_expr100(Lexer& lex, CodeBlob& code, bool nv) { } if (sym && dynamic_cast(sym->value)) { auto val = dynamic_cast(sym->value); - Expr* res = new Expr{Expr::_None, lex.cur().loc}; + Expr* res = new Expr{Expr::_None, lex.cur_location()}; res->flags = Expr::_IsRvalue; - if (val->type == _Int) { + if (val->get_kind() == SymValConst::IntConst) { res->cls = Expr::_Const; res->intval = val->get_int_value(); + res->e_type = TypeExpr::new_atomic(tok_int); } - else if (val->type == _Slice) { + else if (val->get_kind() == SymValConst::SliceConst) { res->cls = Expr::_SliceConst; res->strval = val->get_str_value(); + res->e_type = TypeExpr::new_atomic(tok_slice); } else { - lex.cur().error("Invalid symbolic constant type"); + lex.error("Invalid symbolic constant type"); } - res->e_type = TypeExpr::new_atomic(val->type); lex.next(); return res; } bool auto_apply = false; - Expr* res = new Expr{Expr::_Var, lex.cur().loc}; + Expr* res = new Expr{Expr::_Var, lex.cur_location()}; if (nv) { - res->val = ~lex.cur().val; + res->val = ~lex.cur_sym_idx(); res->e_type = TypeExpr::new_hole(); res->flags = Expr::_IsLvalue; // std::cerr << "defined new variable " << lex.cur().str << " : " << res->e_type << std::endl; } else { if (!sym) { - check_global_func(lex.cur(), lex.cur().val); - sym = lookup_symbol(lex.cur().val); + check_global_func(lex, lex.cur_sym_idx()); + sym = lookup_symbol(lex.cur_sym_idx()); } res->sym = sym; SymVal* val = nullptr; @@ -743,14 +719,14 @@ Expr* parse_expr100(Lexer& lex, CodeBlob& code, bool nv) { val = dynamic_cast(sym->value); } if (!val) { - lex.cur().error_at("undefined identifier `", "`"); - } else if (val->type == SymVal::_Func) { + lex.error_at("undefined identifier `", "`"); + } else if (val->kind == SymValKind::_Func) { res->e_type = val->get_type(); res->cls = Expr::_GlobFunc; auto_apply = val->auto_apply; impure = !dynamic_cast(val)->is_marked_as_pure(); } else if (val->idx < 0) { - lex.cur().error_at("accessing variable `", "` being defined"); + lex.error_at("accessing variable `", "` being defined"); } else { res->val = val->idx; res->e_type = val->get_type(); @@ -765,41 +741,41 @@ Expr* parse_expr100(Lexer& lex, CodeBlob& code, bool nv) { res = new Expr{Expr::_Apply, sym, {}}; res->flags = Expr::_IsRvalue | impure; } - res->deduce_type(lex.cur()); + res->deduce_type(lex); lex.next(); return res; } - lex.expect(Lexem::Ident); + lex.expect(tok_identifier, "identifier"); return nullptr; } // parse E { E } Expr* parse_expr90(Lexer& lex, CodeBlob& code, bool nv) { Expr* res = parse_expr100(lex, code, nv); - while (lex.tp() == '(' || lex.tp() == '[' || (lex.tp() == _Ident && !is_special_ident(lex.cur().val))) { + while (lex.tok() == tok_oppar || lex.tok() == tok_opbracket || (lex.tok() == tok_identifier && !is_special_ident(lex.cur_sym_idx()))) { if (res->is_type()) { Expr* x = parse_expr100(lex, code, true); - x->chk_lvalue(lex.cur()); // chk_lrvalue() ? + x->chk_lvalue(lex); // chk_lrvalue() ? TypeExpr* tp = res->e_type; delete res; res = new Expr{Expr::_TypeApply, {x}}; res->e_type = tp; - res->here = lex.cur().loc; + res->here = lex.cur_location(); try { unify(res->e_type, x->e_type); } catch (UnifyError& ue) { std::ostringstream os; os << "cannot transform expression of type " << x->e_type << " to explicitly requested type " << res->e_type << ": " << ue; - lex.cur().error(os.str()); + lex.error(os.str()); } res->flags = x->flags; } else { Expr* x = parse_expr100(lex, code, false); - x->chk_rvalue(lex.cur()); + x->chk_rvalue(lex); res = make_func_apply(res, x); - res->here = lex.cur().loc; - res->deduce_type(lex.cur()); + res->here = lex.cur_location(); + res->deduce_type(lex); } } return res; @@ -808,19 +784,19 @@ Expr* parse_expr90(Lexer& lex, CodeBlob& code, bool nv) { // parse E { .method E | ~method E } Expr* parse_expr80(Lexer& lex, CodeBlob& code, bool nv) { Expr* res = parse_expr90(lex, code, nv); - while (lex.tp() == _Ident && is_special_ident(lex.cur().val)) { - auto modify = is_tilde_ident(lex.cur().val); + while (lex.tok() == tok_identifier && is_special_ident(lex.cur_sym_idx())) { + auto modify = is_tilde_ident(lex.cur_sym_idx()); auto obj = res; if (modify) { - obj->chk_lvalue(lex.cur()); + obj->chk_lvalue(lex); } else { - obj->chk_rvalue(lex.cur()); + obj->chk_rvalue(lex); } - auto loc = lex.cur().loc; - auto name = lex.cur().val; + SrcLocation loc = lex.cur_location(); + sym_idx_t name = lex.cur_sym_idx(); auto sym = lookup_symbol(name); if (!sym || !dynamic_cast(sym->value)) { - auto name1 = symbols.lookup(lex.cur().str.substr(1)); + auto name1 = symbols.lookup(lex.cur_str().substr(1)); if (name1) { auto sym1 = lookup_symbol(name1); if (sym1 && dynamic_cast(sym1->value)) { @@ -829,18 +805,18 @@ Expr* parse_expr80(Lexer& lex, CodeBlob& code, bool nv) { } } } - check_global_func(lex.cur(), name); + check_global_func(lex, name); if (verbosity >= 2) { - std::cerr << "using symbol `" << symbols.get_name(name) << "` for method call of " << lex.cur().str << std::endl; + std::cerr << "using symbol `" << symbols.get_name(name) << "` for method call of " << lex.cur_str() << std::endl; } sym = lookup_symbol(name); SymValFunc* val = sym ? dynamic_cast(sym->value) : nullptr; if (!val) { - lex.cur().error_at("undefined method identifier `", "`"); + lex.error_at("undefined method identifier `", "`"); } lex.next(); auto x = parse_expr100(lex, code, false); - x->chk_rvalue(lex.cur()); + x->chk_rvalue(lex); if (x->cls == Expr::_Tensor) { res = new Expr{Expr::_Apply, name, {obj}}; res->args.insert(res->args.end(), x->args.begin(), x->args.end()); @@ -849,33 +825,54 @@ Expr* parse_expr80(Lexer& lex, CodeBlob& code, bool nv) { } res->here = loc; res->flags = Expr::_IsRvalue | (val->is_marked_as_pure() ? 0 : Expr::_IsImpure); - res->deduce_type(lex.cur()); + res->deduce_type(lex); if (modify) { auto tmp = res; res = new Expr{Expr::_LetFirst, {obj->copy(), tmp}}; res->here = loc; res->flags = tmp->flags; res->set_val(name); - res->deduce_type(lex.cur()); + res->deduce_type(lex); } } return res; } -// parse [ ~ ] E +// parse [ ~ | - | + ] E Expr* parse_expr75(Lexer& lex, CodeBlob& code, bool nv) { - if (lex.tp() == '~') { - sym_idx_t name = symbols.lookup_add("~_"); - check_global_func(lex.cur(), name); - SrcLocation loc{lex.cur().loc}; + if (lex.tok() == tok_bitwise_not || lex.tok() == tok_minus || lex.tok() == tok_plus) { + TokenType t = lex.tok(); + sym_idx_t name = symbols.lookup_add(lex.cur_str_std_string() + "_"); + check_global_func(lex, name); + SrcLocation loc{lex.cur_location()}; lex.next(); - auto x = parse_expr80(lex, code, false); - x->chk_rvalue(lex.cur()); + auto x = parse_expr75(lex, code, false); + x->chk_rvalue(lex); + + // here's an optimization to convert "-1" (tok_minus tok_int_const) to a const -1, not to Expr::Apply(-,1) + // without this, everything still works, but Tolk looses some vars/stack knowledge for now (to be fixed later) + // in FunC, it was: + // `var fst = -1;` // is constantly 1 + // `var snd = - 1;` // is Expr::Apply(-), a comment "snd=1" is lost in stack layout comments, and so on + // hence, when after grammar modification tok_minus became a true unary operator (not a part of a number), + // and thus to preserve existing behavior until compiler parts are completely rewritten, handle this case here + if (x->cls == Expr::_Const) { + if (t == tok_bitwise_not) { + x->intval = ~x->intval; + } else if (t == tok_minus) { + x->intval = -x->intval; + } + if (!x->intval->signed_fits_bits(257)) { + lex.error("integer overflow"); + } + return x; + } + auto res = new Expr{Expr::_Apply, name, {x}}; res->here = loc; - res->set_val('~'); + res->set_val(t); res->flags = Expr::_IsRvalue; - res->deduce_type(lex.cur()); + res->deduce_type(lex); return res; } else { return parse_expr80(lex, code, nv); @@ -885,58 +882,42 @@ Expr* parse_expr75(Lexer& lex, CodeBlob& code, bool nv) { // parse E { (* | / | % | /% | ^/ | ~/ | ^% | ~% ) E } Expr* parse_expr30(Lexer& lex, CodeBlob& code, bool nv) { Expr* res = parse_expr75(lex, code, nv); - while (lex.tp() == '*' || lex.tp() == '/' || lex.tp() == '%' || lex.tp() == _DivMod || lex.tp() == _DivC || - lex.tp() == _DivR || lex.tp() == _ModC || lex.tp() == _ModR) { - res->chk_rvalue(lex.cur()); - int t = lex.tp(); - sym_idx_t name = symbols.lookup_add(std::string{"_"} + lex.cur().str + "_"); - SrcLocation loc{lex.cur().loc}; - check_global_func(lex.cur(), name); + while (lex.tok() == tok_mul || lex.tok() == tok_div || lex.tok() == tok_mod || lex.tok() == tok_divmod || lex.tok() == tok_divC || + lex.tok() == tok_divR || lex.tok() == tok_modC || lex.tok() == tok_modR) { + res->chk_rvalue(lex); + TokenType t = lex.tok(); + sym_idx_t name = symbols.lookup_add(std::string{"_"} + lex.cur_str_std_string() + "_"); + SrcLocation loc{lex.cur_location()}; + check_global_func(lex, name); lex.next(); auto x = parse_expr75(lex, code, false); - x->chk_rvalue(lex.cur()); + x->chk_rvalue(lex); res = new Expr{Expr::_Apply, name, {res, x}}; res->here = loc; res->set_val(t); res->flags = Expr::_IsRvalue; - res->deduce_type(lex.cur()); + res->deduce_type(lex); } return res; } -// parse [-] E { (+ | -) E } +// parse E { (+ | -) E } Expr* parse_expr20(Lexer& lex, CodeBlob& code, bool nv) { - Expr* res; - int t = lex.tp(); - if (t == '-') { - sym_idx_t name = symbols.lookup_add("-_"); - check_global_func(lex.cur(), name); - SrcLocation loc{lex.cur().loc}; - lex.next(); - auto x = parse_expr30(lex, code, false); - x->chk_rvalue(lex.cur()); - res = new Expr{Expr::_Apply, name, {x}}; - res->here = loc; - res->set_val(t); - res->flags = Expr::_IsRvalue; - res->deduce_type(lex.cur()); - } else { - res = parse_expr30(lex, code, nv); - } - while (lex.tp() == '-' || lex.tp() == '+') { - res->chk_rvalue(lex.cur()); - t = lex.tp(); - sym_idx_t name = symbols.lookup_add(std::string{"_"} + lex.cur().str + "_"); - check_global_func(lex.cur(), name); - SrcLocation loc{lex.cur().loc}; + Expr* res = parse_expr30(lex, code, nv); + while (lex.tok() == tok_minus || lex.tok() == tok_plus) { + res->chk_rvalue(lex); + TokenType t = lex.tok(); + sym_idx_t name = symbols.lookup_add(std::string{"_"} + lex.cur_str_std_string() + "_"); + check_global_func(lex, name); + SrcLocation loc{lex.cur_location()}; lex.next(); auto x = parse_expr30(lex, code, false); - x->chk_rvalue(lex.cur()); + x->chk_rvalue(lex); res = new Expr{Expr::_Apply, name, {res, x}}; res->here = loc; res->set_val(t); res->flags = Expr::_IsRvalue; - res->deduce_type(lex.cur()); + res->deduce_type(lex); } return res; } @@ -944,21 +925,21 @@ Expr* parse_expr20(Lexer& lex, CodeBlob& code, bool nv) { // parse E { ( << | >> | ~>> | ^>> ) E } Expr* parse_expr17(Lexer& lex, CodeBlob& code, bool nv) { Expr* res = parse_expr20(lex, code, nv); - while (lex.tp() == _Lshift || lex.tp() == _Rshift || lex.tp() == _RshiftC || lex.tp() == _RshiftR) { - res->chk_rvalue(lex.cur()); - int t = lex.tp(); - sym_idx_t name = symbols.lookup_add(std::string{"_"} + lex.cur().str + "_"); - check_global_func(lex.cur(), name); - SrcLocation loc{lex.cur().loc}; + while (lex.tok() == tok_lshift || lex.tok() == tok_rshift || lex.tok() == tok_rshiftC || lex.tok() == tok_rshiftR) { + res->chk_rvalue(lex); + TokenType t = lex.tok(); + sym_idx_t name = symbols.lookup_add(std::string{"_"} + lex.cur_str_std_string() + "_"); + check_global_func(lex, name); + SrcLocation loc{lex.cur_location()}; lex.next(); auto x = parse_expr20(lex, code, false); - x->chk_rvalue(lex.cur()); + x->chk_rvalue(lex); diagnose_addition_in_bitshift(loc, name, x); res = new Expr{Expr::_Apply, name, {res, x}}; res->here = loc; res->set_val(t); res->flags = Expr::_IsRvalue; - res->deduce_type(lex.cur()); + res->deduce_type(lex); } return res; } @@ -966,21 +947,21 @@ Expr* parse_expr17(Lexer& lex, CodeBlob& code, bool nv) { // parse E [ (== | < | > | <= | >= | != | <=> ) E ] Expr* parse_expr15(Lexer& lex, CodeBlob& code, bool nv) { Expr* res = parse_expr17(lex, code, nv); - if (lex.tp() == _Eq || lex.tp() == '<' || lex.tp() == '>' || lex.tp() == _Leq || lex.tp() == _Geq || - lex.tp() == _Neq || lex.tp() == _Spaceship) { - res->chk_rvalue(lex.cur()); - int t = lex.tp(); - sym_idx_t name = symbols.lookup_add(std::string{"_"} + lex.cur().str + "_"); - check_global_func(lex.cur(), name); - SrcLocation loc{lex.cur().loc}; + if (lex.tok() == tok_eq || lex.tok() == tok_lt || lex.tok() == tok_gt || lex.tok() == tok_leq || lex.tok() == tok_geq || + lex.tok() == tok_neq || lex.tok() == tok_spaceship) { + res->chk_rvalue(lex); + TokenType t = lex.tok(); + sym_idx_t name = symbols.lookup_add(std::string{"_"} + lex.cur_str_std_string() + "_"); + check_global_func(lex, name); + SrcLocation loc{lex.cur_location()}; lex.next(); auto x = parse_expr17(lex, code, false); - x->chk_rvalue(lex.cur()); + x->chk_rvalue(lex); res = new Expr{Expr::_Apply, name, {res, x}}; res->here = loc; res->set_val(t); res->flags = Expr::_IsRvalue; - res->deduce_type(lex.cur()); + res->deduce_type(lex); } return res; } @@ -988,15 +969,15 @@ Expr* parse_expr15(Lexer& lex, CodeBlob& code, bool nv) { // parse E { ( & | `|` | ^ ) E } Expr* parse_expr14(Lexer& lex, CodeBlob& code, bool nv) { Expr* res = parse_expr15(lex, code, nv); - while (lex.tp() == '&' || lex.tp() == '|' || lex.tp() == '^') { - res->chk_rvalue(lex.cur()); - int t = lex.tp(); - sym_idx_t name = symbols.lookup_add(std::string{"_"} + lex.cur().str + "_"); - check_global_func(lex.cur(), name); - SrcLocation loc{lex.cur().loc}; + while (lex.tok() == tok_bitwise_and || lex.tok() == tok_bitwise_or || lex.tok() == tok_bitwise_xor) { + res->chk_rvalue(lex); + TokenType t = lex.tok(); + sym_idx_t name = symbols.lookup_add(std::string{"_"} + lex.cur_str_std_string() + "_"); + check_global_func(lex, name); + SrcLocation loc{lex.cur_location()}; lex.next(); auto x = parse_expr15(lex, code, false); - x->chk_rvalue(lex.cur()); + x->chk_rvalue(lex); // diagnose tricky bitwise precedence, like "flags & 0xFF != 0" (& has lower precedence) diagnose_bitwise_precedence(loc, name, res, x); @@ -1004,7 +985,7 @@ Expr* parse_expr14(Lexer& lex, CodeBlob& code, bool nv) { res->here = loc; res->set_val(t); res->flags = Expr::_IsRvalue; - res->deduce_type(lex.cur()); + res->deduce_type(lex); } return res; } @@ -1012,19 +993,19 @@ Expr* parse_expr14(Lexer& lex, CodeBlob& code, bool nv) { // parse E [ ? E : E ] Expr* parse_expr13(Lexer& lex, CodeBlob& code, bool nv) { Expr* res = parse_expr14(lex, code, nv); - if (lex.tp() == '?') { - res->chk_rvalue(lex.cur()); - SrcLocation loc{lex.cur().loc}; + if (lex.tok() == tok_question) { + res->chk_rvalue(lex); + SrcLocation loc{lex.cur_location()}; lex.next(); auto x = parse_expr(lex, code, false); - x->chk_rvalue(lex.cur()); - lex.expect(':'); + x->chk_rvalue(lex); + lex.expect(tok_colon, "':'"); auto y = parse_expr13(lex, code, false); - y->chk_rvalue(lex.cur()); + y->chk_rvalue(lex); res = new Expr{Expr::_CondExpr, {res, x, y}}; res->here = loc; res->flags = Expr::_IsRvalue; - res->deduce_type(lex.cur()); + res->deduce_type(lex); } return res; } @@ -1032,42 +1013,42 @@ Expr* parse_expr13(Lexer& lex, CodeBlob& code, bool nv) { // parse LE1 (= | += | -= | ... ) E2 Expr* parse_expr10(Lexer& lex, CodeBlob& code, bool nv) { auto x = parse_expr13(lex, code, nv); - int t = lex.tp(); - if (t == _PlusLet || t == _MinusLet || t == _TimesLet || t == _DivLet || t == _DivRLet || t == _DivCLet || - t == _ModLet || t == _ModCLet || t == _ModRLet || t == _LshiftLet || t == _RshiftLet || t == _RshiftCLet || - t == _RshiftRLet || t == _AndLet || t == _OrLet || t == _XorLet) { - x->chk_lvalue(lex.cur()); - x->chk_rvalue(lex.cur()); - sym_idx_t name = symbols.lookup_add(std::string{"^_"} + lex.cur().str + "_"); - check_global_func(lex.cur(), name); - SrcLocation loc{lex.cur().loc}; + TokenType t = lex.tok(); + if (t == tok_set_plus || t == tok_set_minus || t == tok_set_mul || t == tok_set_div || t == tok_set_divR || t == tok_set_divC || + t == tok_set_mod || t == tok_set_modC || t == tok_set_modR || t == tok_set_lshift || t == tok_set_rshift || t == tok_set_rshiftC || + t == tok_set_rshiftR || t == tok_set_bitwise_and || t == tok_set_bitwise_or || t == tok_set_bitwise_xor) { + x->chk_lvalue(lex); + x->chk_rvalue(lex); + sym_idx_t name = symbols.lookup_add(std::string{"^_"} + lex.cur_str_std_string() + "_"); + check_global_func(lex, name); + SrcLocation loc{lex.cur_location()}; lex.next(); auto y = parse_expr10(lex, code, false); - y->chk_rvalue(lex.cur()); + y->chk_rvalue(lex); Expr* z = new Expr{Expr::_Apply, name, {x, y}}; z->here = loc; z->set_val(t); z->flags = Expr::_IsRvalue; - z->deduce_type(lex.cur()); + z->deduce_type(lex); Expr* res = new Expr{Expr::_Letop, {x->copy(), z}}; res->here = loc; res->flags = (x->flags & ~Expr::_IsType) | Expr::_IsRvalue; res->set_val(t); - res->deduce_type(lex.cur()); + res->deduce_type(lex); return res; - } else if (t == '=') { - x->chk_lvalue(lex.cur()); - SrcLocation loc{lex.cur().loc}; + } else if (t == tok_assign) { + x->chk_lvalue(lex); + SrcLocation loc{lex.cur_location()}; lex.next(); auto y = parse_expr10(lex, code, false); - y->chk_rvalue(lex.cur()); + y->chk_rvalue(lex); x->predefine_vars(); x->define_new_vars(code); Expr* res = new Expr{Expr::_Letop, {x, y}}; res->here = loc; res->flags = (x->flags & ~Expr::_IsType) | Expr::_IsRvalue; res->set_val(t); - res->deduce_type(lex.cur()); + res->deduce_type(lex); return res; } else { return x; @@ -1094,7 +1075,7 @@ void combine_parallel(val& x, const val y) { blk_fl::val parse_return_stmt(Lexer& lex, CodeBlob& code) { auto expr = parse_expr(lex, code); - expr->chk_rvalue(lex.cur()); + expr->chk_rvalue(lex); try { // std::cerr << "in return: "; unify(expr->e_type, code.ret_type); @@ -1102,11 +1083,11 @@ blk_fl::val parse_return_stmt(Lexer& lex, CodeBlob& code) { std::ostringstream os; os << "previous function return type " << code.ret_type << " cannot be unified with return statement expression type " << expr->e_type << ": " << ue; - lex.cur().error(os.str()); + lex.error(os.str()); } std::vector tmp_vars = expr->pre_compile(code); - code.emplace_back(lex.cur().loc, Op::_Return, std::move(tmp_vars)); - lex.expect(';'); + code.emplace_back(lex.cur_location(), Op::_Return, std::move(tmp_vars)); + lex.expect(tok_semicolon, "';'"); return blk_fl::ret; } @@ -1119,177 +1100,177 @@ blk_fl::val parse_implicit_ret_stmt(Lexer& lex, CodeBlob& code) { std::ostringstream os; os << "previous function return type " << code.ret_type << " cannot be unified with implicit end-of-block return type " << ret_type << ": " << ue; - lex.cur().error(os.str()); + lex.error(os.str()); } - code.emplace_back(lex.cur().loc, Op::_Return); + code.emplace_back(lex.cur_location(), Op::_Return); return blk_fl::ret; } blk_fl::val parse_stmt(Lexer& lex, CodeBlob& code); blk_fl::val parse_block_stmt(Lexer& lex, CodeBlob& code, bool no_new_scope = false) { - lex.expect('{'); + lex.expect(tok_opbrace, "'{'"); if (!no_new_scope) { - open_scope(lex); + open_scope(lex.cur_location()); } blk_fl::val res = blk_fl::init; bool warned = false; - while (lex.tp() != '}') { + while (lex.tok() != tok_clbrace) { if (!(res & blk_fl::end) && !warned) { - lex.cur().loc.show_warning("unreachable code"); + lex.cur_location().show_warning("unreachable code"); warned = true; } blk_fl::combine(res, parse_stmt(lex, code)); } if (!no_new_scope) { - close_scope(lex); + close_scope(lex.cur_location()); } - lex.expect('}'); + lex.expect(tok_clbrace, "'}'"); return res; } blk_fl::val parse_repeat_stmt(Lexer& lex, CodeBlob& code) { - SrcLocation loc{lex.cur().loc}; - lex.expect(_Repeat); + SrcLocation loc{lex.cur_location()}; + lex.expect(tok_repeat, "'repeat'"); auto expr = parse_expr(lex, code); - expr->chk_rvalue(lex.cur()); - auto cnt_type = TypeExpr::new_atomic(_Int); + expr->chk_rvalue(lex); + auto cnt_type = TypeExpr::new_atomic(TypeExpr::_Int); try { unify(expr->e_type, cnt_type); } catch (UnifyError& ue) { std::ostringstream os; os << "repeat count value of type " << expr->e_type << " is not an integer: " << ue; - lex.cur().error(os.str()); + lex.error(os.str()); } std::vector tmp_vars = expr->pre_compile(code); if (tmp_vars.size() != 1) { - lex.cur().error("repeat count value is not a singleton"); + lex.error("repeat count value is not a singleton"); } Op& repeat_op = code.emplace_back(loc, Op::_Repeat, tmp_vars); code.push_set_cur(repeat_op.block0); blk_fl::val res = parse_block_stmt(lex, code); - code.close_pop_cur(lex.cur().loc); + code.close_pop_cur(lex.cur_location()); return res | blk_fl::end; } blk_fl::val parse_while_stmt(Lexer& lex, CodeBlob& code) { - SrcLocation loc{lex.cur().loc}; - lex.expect(_While); + SrcLocation loc{lex.cur_location()}; + lex.expect(tok_while, "'while'"); auto expr = parse_expr(lex, code); - expr->chk_rvalue(lex.cur()); - auto cnt_type = TypeExpr::new_atomic(_Int); + expr->chk_rvalue(lex); + auto cnt_type = TypeExpr::new_atomic(TypeExpr::_Int); try { unify(expr->e_type, cnt_type); } catch (UnifyError& ue) { std::ostringstream os; os << "while condition value of type " << expr->e_type << " is not an integer: " << ue; - lex.cur().error(os.str()); + lex.error(os.str()); } Op& while_op = code.emplace_back(loc, Op::_While); code.push_set_cur(while_op.block0); while_op.left = expr->pre_compile(code); - code.close_pop_cur(lex.cur().loc); + code.close_pop_cur(lex.cur_location()); if (while_op.left.size() != 1) { - lex.cur().error("while condition value is not a singleton"); + lex.error("while condition value is not a singleton"); } code.push_set_cur(while_op.block1); blk_fl::val res1 = parse_block_stmt(lex, code); - code.close_pop_cur(lex.cur().loc); + code.close_pop_cur(lex.cur_location()); return res1 | blk_fl::end; } blk_fl::val parse_do_stmt(Lexer& lex, CodeBlob& code) { - Op& while_op = code.emplace_back(lex.cur().loc, Op::_Until); - lex.expect(_Do); + Op& while_op = code.emplace_back(lex.cur_location(), Op::_Until); + lex.expect(tok_do, "'do'"); code.push_set_cur(while_op.block0); - open_scope(lex); + open_scope(lex.cur_location()); blk_fl::val res = parse_block_stmt(lex, code, true); - lex.expect(_Until); + lex.expect(tok_until, "'until'"); auto expr = parse_expr(lex, code); - expr->chk_rvalue(lex.cur()); - close_scope(lex); - auto cnt_type = TypeExpr::new_atomic(_Int); + expr->chk_rvalue(lex); + close_scope(lex.cur_location()); + auto cnt_type = TypeExpr::new_atomic(TypeExpr::_Int); try { unify(expr->e_type, cnt_type); } catch (UnifyError& ue) { std::ostringstream os; os << "`until` condition value of type " << expr->e_type << " is not an integer: " << ue; - lex.cur().error(os.str()); + lex.error(os.str()); } while_op.left = expr->pre_compile(code); - code.close_pop_cur(lex.cur().loc); + code.close_pop_cur(lex.cur_location()); if (while_op.left.size() != 1) { - lex.cur().error("`until` condition value is not a singleton"); + lex.error("`until` condition value is not a singleton"); } return res & ~blk_fl::empty; } blk_fl::val parse_try_catch_stmt(Lexer& lex, CodeBlob& code) { code.require_callxargs = true; - lex.expect(_Try); - Op& try_catch_op = code.emplace_back(lex.cur().loc, Op::_TryCatch); + lex.expect(tok_try, "'try'"); + Op& try_catch_op = code.emplace_back(lex.cur_location(), Op::_TryCatch); code.push_set_cur(try_catch_op.block0); blk_fl::val res0 = parse_block_stmt(lex, code); - code.close_pop_cur(lex.cur().loc); - lex.expect(_Catch); + code.close_pop_cur(lex.cur_location()); + lex.expect(tok_catch, "'catch'"); code.push_set_cur(try_catch_op.block1); - open_scope(lex); + open_scope(lex.cur_location()); Expr* expr = parse_expr(lex, code, true); - expr->chk_lvalue(lex.cur()); - TypeExpr* tvm_error_type = TypeExpr::new_tensor(TypeExpr::new_var(), TypeExpr::new_atomic(_Int)); + expr->chk_lvalue(lex); + TypeExpr* tvm_error_type = TypeExpr::new_tensor(TypeExpr::new_var(), TypeExpr::new_atomic(TypeExpr::_Int)); try { unify(expr->e_type, tvm_error_type); } catch (UnifyError& ue) { std::ostringstream os; os << "`catch` arguments have incorrect type " << expr->e_type << ": " << ue; - lex.cur().error(os.str()); + lex.error(os.str()); } expr->predefine_vars(); expr->define_new_vars(code); try_catch_op.left = expr->pre_compile(code); tolk_assert(try_catch_op.left.size() == 2 || try_catch_op.left.size() == 1); blk_fl::val res1 = parse_block_stmt(lex, code); - close_scope(lex); - code.close_pop_cur(lex.cur().loc); + close_scope(lex.cur_location()); + code.close_pop_cur(lex.cur_location()); blk_fl::combine_parallel(res0, res1); return res0; } -blk_fl::val parse_if_stmt(Lexer& lex, CodeBlob& code, int first_lex = _If) { - SrcLocation loc{lex.cur().loc}; - lex.expect(first_lex); +blk_fl::val parse_if_stmt(Lexer& lex, CodeBlob& code, TokenType first_lex = tok_if) { + SrcLocation loc{lex.cur_location()}; + lex.next(); auto expr = parse_expr(lex, code); - expr->chk_rvalue(lex.cur()); - auto flag_type = TypeExpr::new_atomic(_Int); + expr->chk_rvalue(lex); + auto flag_type = TypeExpr::new_atomic(TypeExpr::_Int); try { unify(expr->e_type, flag_type); } catch (UnifyError& ue) { std::ostringstream os; os << "`if` condition value of type " << expr->e_type << " is not an integer: " << ue; - lex.cur().error(os.str()); + lex.error(os.str()); } std::vector tmp_vars = expr->pre_compile(code); if (tmp_vars.size() != 1) { - lex.cur().error("condition value is not a singleton"); + lex.error("condition value is not a singleton"); } Op& if_op = code.emplace_back(loc, Op::_If, tmp_vars); code.push_set_cur(if_op.block0); blk_fl::val res1 = parse_block_stmt(lex, code); blk_fl::val res2 = blk_fl::init; - code.close_pop_cur(lex.cur().loc); - if (lex.tp() == _Else) { - lex.expect(_Else); + code.close_pop_cur(lex.cur_location()); + if (lex.tok() == tok_else) { + lex.expect(tok_else, "'else'"); code.push_set_cur(if_op.block1); res2 = parse_block_stmt(lex, code); - code.close_pop_cur(lex.cur().loc); - } else if (lex.tp() == _Elseif || lex.tp() == _Elseifnot) { + code.close_pop_cur(lex.cur_location()); + } else if (lex.tok() == tok_elseif || lex.tok() == tok_elseifnot) { code.push_set_cur(if_op.block1); - res2 = parse_if_stmt(lex, code, lex.tp()); - code.close_pop_cur(lex.cur().loc); + res2 = parse_if_stmt(lex, code, lex.tok()); + code.close_pop_cur(lex.cur_location()); } else { - if_op.block1 = std::make_unique(lex.cur().loc, Op::_Nop); + if_op.block1 = std::make_unique(lex.cur_location(), Op::_Nop); } - if (first_lex == _Ifnot || first_lex == _Elseifnot) { + if (first_lex == tok_ifnot || first_lex == tok_elseifnot) { std::swap(if_op.block0, if_op.block1); } blk_fl::combine_parallel(res1, res2); @@ -1297,41 +1278,41 @@ blk_fl::val parse_if_stmt(Lexer& lex, CodeBlob& code, int first_lex = _If) { } blk_fl::val parse_stmt(Lexer& lex, CodeBlob& code) { - switch (lex.tp()) { - case _Return: { + switch (lex.tok()) { + case tok_return: { lex.next(); return parse_return_stmt(lex, code); } - case '{': { + case tok_opbrace: { return parse_block_stmt(lex, code); } - case ';': { + case tok_semicolon: { lex.next(); return blk_fl::init; } - case _Repeat: + case tok_repeat: return parse_repeat_stmt(lex, code); - case _If: - case _Ifnot: - return parse_if_stmt(lex, code, lex.tp()); - case _Do: + case tok_if: + case tok_ifnot: + return parse_if_stmt(lex, code, lex.tok()); + case tok_do: return parse_do_stmt(lex, code); - case _While: + case tok_while: return parse_while_stmt(lex, code); - case _Try: + case tok_try: return parse_try_catch_stmt(lex, code); default: { auto expr = parse_expr(lex, code); - expr->chk_rvalue(lex.cur()); + expr->chk_rvalue(lex); expr->pre_compile(code); - lex.expect(';'); + lex.expect(tok_semicolon, "';'"); return blk_fl::end; } } } CodeBlob* parse_func_body(Lexer& lex, FormalArgList arg_list, TypeExpr* ret_type, bool marked_as_pure) { - lex.expect('{'); + lex.expect(tok_opbrace, "'{'"); CodeBlob* blob = new CodeBlob{ret_type}; if (marked_as_pure) { blob->flags |= CodeBlob::_ForbidImpure; @@ -1339,9 +1320,9 @@ CodeBlob* parse_func_body(Lexer& lex, FormalArgList arg_list, TypeExpr* ret_type blob->import_params(std::move(arg_list)); blk_fl::val res = blk_fl::init; bool warned = false; - while (lex.tp() != '}') { + while (lex.tok() != tok_clbrace) { if (!(res & blk_fl::end) && !warned) { - lex.cur().loc.show_warning("unreachable code"); + lex.cur_location().show_warning("unreachable code"); warned = true; } blk_fl::combine(res, parse_stmt(lex, *blob)); @@ -1349,15 +1330,15 @@ CodeBlob* parse_func_body(Lexer& lex, FormalArgList arg_list, TypeExpr* ret_type if (res & blk_fl::end) { parse_implicit_ret_stmt(lex, *blob); } - blob->close_blk(lex.cur().loc); - lex.expect('}'); + blob->close_blk(lex.cur_location()); + lex.expect(tok_clbrace, "'}'"); return blob; } SymValAsmFunc* parse_asm_func_body(Lexer& lex, TypeExpr* func_type, const FormalArgList& arg_list, TypeExpr* ret_type, bool marked_as_pure) { - auto loc = lex.cur().loc; - lex.expect(_Asm); + SrcLocation loc = lex.cur_location(); + lex.expect(tok_asm, "'asm'"); int cnt = (int)arg_list.size(); int width = ret_type->get_width(); if (width < 0 || width > 16) { @@ -1379,15 +1360,13 @@ SymValAsmFunc* parse_asm_func_body(Lexer& lex, TypeExpr* func_type, const Formal } std::vector asm_ops; std::vector arg_order, ret_order; - if (lex.tp() == '(') { - lex.expect('('); - if (lex.tp() != _Mapsto) { + if (lex.tok() == tok_oppar) { + lex.next(); + if (lex.tok() != tok_mapsto) { std::vector visited(cnt, false); for (int i = 0; i < cnt; i++) { - if (lex.tp() != _Ident) { - lex.expect(_Ident); - } - auto sym = lookup_symbol(lex.cur().val); + lex.check(tok_identifier, "identifier"); + auto sym = lookup_symbol(lex.cur_sym_idx()); int j; for (j = 0; j < cnt; j++) { if (std::get(arg_list[j]) == sym) { @@ -1395,10 +1374,10 @@ SymValAsmFunc* parse_asm_func_body(Lexer& lex, TypeExpr* func_type, const Formal } } if (j == cnt) { - lex.cur().error("formal argument name expected"); + lex.error("formal argument name expected"); } if (visited[j]) { - lex.cur().error("formal argument listed twice"); + lex.error("formal argument listed twice"); } visited[j] = true; int c1 = cum_arg_width[j], c2 = cum_arg_width[j + 1]; @@ -1409,29 +1388,29 @@ SymValAsmFunc* parse_asm_func_body(Lexer& lex, TypeExpr* func_type, const Formal } tolk_assert(arg_order.size() == (unsigned)tot_width); } - if (lex.tp() == _Mapsto) { - lex.expect(_Mapsto); + if (lex.tok() == tok_mapsto) { + lex.next(); std::vector visited(width, false); for (int i = 0; i < width; i++) { - if (lex.tp() != Lexem::Number || lex.cur().str.size() > 3) { - lex.expect(Lexem::Number); + if (lex.tok() != tok_int_const || lex.cur_str().size() > 3) { + lex.expect(tok_int_const, "number"); } - int j = atoi(lex.cur().str.c_str()); + int j = atoi(lex.cur_str_std_string().c_str()); if (j < 0 || j >= width || visited[j]) { - lex.cur().error("expected integer return value index 0 .. width-1"); + lex.error("expected integer return value index 0 .. width-1"); } visited[j] = true; ret_order.push_back(j); lex.next(); } } - lex.expect(')'); + lex.expect(tok_clpar, "')'"); } - while (lex.tp() == _String) { - std::string ops = lex.cur().str; // \n\n... + while (lex.tok() == tok_string_const) { + std::string ops = lex.cur_str_std_string(); // \n\n... std::string op; for (const char& c : ops) { - if (c == '\n') { + if (c == '\n' || c == '\r') { if (!op.empty()) { asm_ops.push_back(AsmOp::Parse(op, cnt, width)); if (asm_ops.back().is_custom()) { @@ -1452,9 +1431,9 @@ SymValAsmFunc* parse_asm_func_body(Lexer& lex, TypeExpr* func_type, const Formal lex.next(); } if (asm_ops.empty()) { - throw ParseError{lex.cur().loc, "string with assembler instruction expected"}; + lex.error("string with assembler instruction expected"); } - lex.expect(';'); + lex.expect(tok_semicolon, "';'"); std::string crc_s; for (const AsmOp& asm_op : asm_ops) { crc_s += asm_op.op; @@ -1475,34 +1454,34 @@ SymValAsmFunc* parse_asm_func_body(Lexer& lex, TypeExpr* func_type, const Formal std::vector parse_type_var_list(Lexer& lex) { std::vector res; - lex.expect(_Forall); + lex.expect(tok_forall, "'forall'"); int idx = 0; while (true) { - if (lex.tp() == _Type) { + if (lex.tok() == tok_type) { lex.next(); } - if (lex.tp() != _Ident) { - throw ParseError{lex.cur().loc, "free type identifier expected"}; + if (lex.tok() != tok_identifier) { + lex.error("free type identifier expected"); } - auto loc = lex.cur().loc; - if (prohibited_var_names.count(symbols.get_name(lex.cur().val))) { - throw ParseError{loc, PSTRING() << "symbol `" << symbols.get_name(lex.cur().val) + SrcLocation loc = lex.cur_location(); + if (prohibited_var_names.count(symbols.get_name(lex.cur_sym_idx()))) { + throw ParseError{loc, PSTRING() << "symbol `" << symbols.get_name(lex.cur_sym_idx()) << "` cannot be redefined as a variable"}; } - SymDef* new_sym_def = define_symbol(lex.cur().val, true, loc); + SymDef* new_sym_def = define_symbol(lex.cur_sym_idx(), true, loc); if (!new_sym_def || new_sym_def->value) { - lex.cur().error_at("redefined type variable `", "`"); + lex.error_at("redefined type variable `", "`"); } auto var = TypeExpr::new_var(idx); - new_sym_def->value = new SymValType{SymVal::_Typename, idx++, var}; + new_sym_def->value = new SymValType{SymValKind::_Typename, idx++, var}; res.push_back(var); lex.next(); - if (lex.tp() != ',') { + if (lex.tok() != tok_comma) { break; } lex.next(); } - lex.expect(_Mapsto); + lex.expect(tok_mapsto, "'->'"); return res; } @@ -1608,168 +1587,169 @@ void detect_if_function_just_wraps_another(SymValCodeFunc* v_current, const td:: } } -static td::RefInt256 calculate_method_id_by_func_name(const std::string &func_name) { - unsigned int crc = td::crc16(func_name); +static td::RefInt256 calculate_method_id_by_func_name(std::string_view func_name) { + unsigned int crc = td::crc16(static_cast(func_name)); return td::make_refint((crc & 0xffff) | 0x10000); } // todo rewrite function declaration parsing completely, it's weird void parse_func_def(Lexer& lex) { - SrcLocation loc{lex.cur().loc}; - open_scope(lex); + SrcLocation loc = lex.cur_location(); + open_scope(loc); std::vector type_vars; bool is_get_method = false; - if (lex.tp() == _Forall) { + if (lex.tok() == tok_forall) { type_vars = parse_type_var_list(lex); - } else if (lex.tp() == _Get) { + } else if (lex.tok() == tok_get) { is_get_method = true; lex.next(); } auto ret_type = parse_type(lex); - if (lex.tp() != _Ident) { - throw ParseError{lex.cur().loc, "function name identifier expected"}; + if (lex.tok() != tok_identifier) { + lex.error("function name identifier expected"); } - Lexem func_name = lex.cur(); + std::string func_name = lex.cur_str_std_string(); + int func_sym_idx = lex.cur_sym_idx(); lex.next(); FormalArgList arg_list = parse_formal_args(lex); bool marked_as_pure = false; - if (lex.tp() == _Impure) { + if (lex.tok() == tok_impure) { static bool warning_shown = false; if (!warning_shown) { - lex.cur().loc.show_warning("`impure` specifier is deprecated. All functions are impure by default, use `pure` to mark a function as pure"); + lex.cur_location().show_warning("`impure` specifier is deprecated. All functions are impure by default, use `pure` to mark a function as pure"); warning_shown = true; } lex.next(); - } else if (lex.tp() == _Pure) { + } else if (lex.tok() == tok_pure) { marked_as_pure = true; lex.next(); } int flags_inline = 0; - if (lex.tp() == _Inline) { + if (lex.tok() == tok_inline) { flags_inline = SymValFunc::flagInline; lex.next(); - } else if (lex.tp() == _InlineRef) { + } else if (lex.tok() == tok_inlineref) { flags_inline = SymValFunc::flagInlineRef; lex.next(); } td::RefInt256 method_id; - if (lex.tp() == _MethodId) { + if (lex.tok() == tok_method_id) { if (is_get_method) { - lex.cur().error("both `get` and `method_id` are not allowed"); + lex.error("both `get` and `method_id` are not allowed"); } lex.next(); - if (lex.tp() == '(') { // method_id(N) - lex.expect('('); - method_id = td::string_to_int256(lex.cur().str); - lex.expect(Lexem::Number); + if (lex.tok() == tok_oppar) { // method_id(N) + lex.next(); + method_id = td::string_to_int256(lex.cur_str_std_string()); + lex.expect(tok_int_const, "number"); if (method_id.is_null()) { - lex.cur().error_at("invalid integer constant `", "`"); + lex.error_at("invalid integer constant `", "`"); } - lex.expect(')'); + lex.expect(tok_clpar, "')'"); } else { static bool warning_shown = false; if (!warning_shown) { - lex.cur().loc.show_warning("`method_id` specifier is deprecated, use `get` keyword.\nExample: `get int seqno() { ... }`"); + lex.cur_location().show_warning("`method_id` specifier is deprecated, use `get` keyword.\nExample: `get int seqno() { ... }`"); warning_shown = true; } - method_id = calculate_method_id_by_func_name(func_name.str); + method_id = calculate_method_id_by_func_name(func_name); } } if (is_get_method) { tolk_assert(method_id.is_null()); - method_id = calculate_method_id_by_func_name(func_name.str); + method_id = calculate_method_id_by_func_name(func_name); for (const SymDef* other : glob_get_methods) { if (!td::cmp(dynamic_cast(other->value)->method_id, method_id)) { - lex.cur().error(PSTRING() << "GET methods hash collision: `" << other->name() << "` and `" + func_name.str + "` produce the same hash. Consider renaming one of these functions."); + lex.error(PSTRING() << "GET methods hash collision: `" << other->name() << "` and `" + func_name + "` produce the same hash. Consider renaming one of these functions."); } } } TypeExpr* func_type = TypeExpr::new_map(extract_total_arg_type(arg_list), ret_type); func_type = compute_type_closure(func_type, type_vars); - if (lex.tp() == _Builtin) { - const SymDef* builtin_func = lookup_symbol(func_name.str); + if (lex.tok() == tok_builtin) { + const SymDef* builtin_func = lookup_symbol(symbols.lookup(func_name)); const SymValFunc* func_val = builtin_func ? dynamic_cast(builtin_func->value) : nullptr; if (!func_val || !func_val->is_builtin()) { - lex.cur().error("`builtin` used for non-builtin function"); + lex.error("`builtin` used for non-builtin function"); } #ifdef TOLK_DEBUG // in release, we don't need this check, since `builtin` is used only in stdlib.tolk, which is our responsibility if (!func_val->sym_type->equals_to(func_type) || func_val->is_marked_as_pure() != marked_as_pure) { - lex.cur().error("declaration for `builtin` function doesn't match an actual one"); + lex.error("declaration for `builtin` function doesn't match an actual one"); } #endif lex.next(); - lex.expect(';'); - close_scope(lex); + lex.expect(tok_semicolon, "';'"); + close_scope(lex.cur_location()); return; } - if (lex.tp() != ';' && lex.tp() != '{' && lex.tp() != _Asm) { - lex.expect('{', "function body block"); + if (lex.tok() != tok_semicolon && lex.tok() != tok_opbrace && lex.tok() != tok_asm) { + lex.expect(tok_opbrace, "function body block"); } if (verbosity >= 1) { - std::cerr << "function " << func_name.str << " : " << func_type << std::endl; + std::cerr << "function " << func_name << " : " << func_type << std::endl; } - SymDef* func_sym = define_global_symbol(func_name.val, 0, loc); + SymDef* func_sym = define_global_symbol(func_sym_idx, 0, loc); tolk_assert(func_sym); SymValFunc* func_sym_val = dynamic_cast(func_sym->value); if (func_sym->value) { - if (func_sym->value->type != SymVal::_Func || !func_sym_val) { - lex.cur().error("was not defined as a function before"); + if (func_sym->value->kind != SymValKind::_Func || !func_sym_val) { + lex.error("was not defined as a function before"); } try { unify(func_sym_val->sym_type, func_type); } catch (UnifyError& ue) { std::ostringstream os; - os << "previous type of function " << func_name.str << " : " << func_sym_val->sym_type + os << "previous type of function " << func_name << " : " << func_sym_val->sym_type << " cannot be unified with new type " << func_type << ": " << ue; - lex.cur().error(os.str()); + lex.error(os.str()); } } - if (lex.tp() == ';') { + if (lex.tok() == tok_semicolon) { make_new_glob_func(func_sym, func_type, marked_as_pure); lex.next(); - } else if (lex.tp() == '{') { + } else if (lex.tok() == tok_opbrace) { if (dynamic_cast(func_sym_val)) { - lex.cur().error("function `"s + func_name.str + "` has been already defined as an assembler built-in"); + lex.error("function `" + func_name + "` has been already defined as an assembler built-in"); } SymValCodeFunc* func_sym_code; if (func_sym_val) { func_sym_code = dynamic_cast(func_sym_val); if (!func_sym_code) { - lex.cur().error("function `"s + func_name.str + "` has been already defined in an yet-unknown way"); + lex.error("function `" + func_name + "` has been already defined in an yet-unknown way"); } } else { func_sym_code = make_new_glob_func(func_sym, func_type, marked_as_pure); } if (func_sym_code->code) { - lex.cur().error("redefinition of function `"s + func_name.str + "`"); + lex.error("redefinition of function `"s + func_name + "`"); } if (marked_as_pure && ret_type->get_width() == 0) { - lex.cur().error("a pure function should return something, otherwise it will be optimized out anyway"); + lex.error("a pure function should return something, otherwise it will be optimized out anyway"); } CodeBlob* code = parse_func_body(lex, arg_list, ret_type, marked_as_pure); - code->name = func_name.str; + code->name = func_name; code->loc = loc; // code->print(std::cerr); // !!!DEBUG!!! func_sym_code->code = code; detect_if_function_just_wraps_another(func_sym_code, method_id); } else { - Lexem asm_lexem = lex.cur(); + SrcLocation asm_location = lex.cur_location(); SymValAsmFunc* asm_func = parse_asm_func_body(lex, func_type, arg_list, ret_type, marked_as_pure); #ifdef TOLK_DEBUG - asm_func->name = func_name.str; + asm_func->name = func_name; #endif if (func_sym_val) { if (dynamic_cast(func_sym_val)) { - asm_lexem.error("function `"s + func_name.str + "` was already declared as an ordinary function"); + throw ParseError(asm_location, "function `" + func_name + "` was already declared as an ordinary function"); } SymValAsmFunc* asm_func_old = dynamic_cast(func_sym_val); if (asm_func_old) { if (asm_func->crc != asm_func_old->crc) { - asm_lexem.error("redefinition of built-in assembler function `"s + func_name.str + "`"); + throw ParseError(asm_location, "redefinition of built-in assembler function `" + func_name + "`"); } } else { - asm_lexem.error("redefinition of previously (somehow) defined function `"s + func_name.str + "`"); + throw ParseError(asm_location, "redefinition of previously (somehow) defined function `" + func_name + "`"); } } func_sym->value = asm_func; @@ -1777,126 +1757,87 @@ void parse_func_def(Lexer& lex) { if (method_id.not_null()) { auto val = dynamic_cast(func_sym->value); if (!val) { - lex.cur().error("cannot set method id for unknown function `"s + func_name.str + "`"); + lex.error("cannot set method id for unknown function `" + func_name + "`"); } if (val->method_id.is_null()) { val->method_id = std::move(method_id); } else if (td::cmp(val->method_id, method_id) != 0) { - lex.cur().error("integer method identifier for `"s + func_name.str + "` changed from " + + lex.error("integer method identifier for `" + func_name + "` changed from " + val->method_id->to_dec_string() + " to a different value " + method_id->to_dec_string()); } } if (flags_inline) { auto val = dynamic_cast(func_sym->value); if (!val) { - lex.cur().error("cannot set unknown function `"s + func_name.str + "` as an inline"); + lex.error("cannot set unknown function `" + func_name + "` as an inline"); } if (!val->is_inline() && !val->is_inline_ref()) { val->flags |= flags_inline; } else if ((val->flags & (SymValFunc::flagInline | SymValFunc::flagInlineRef)) != flags_inline) { - lex.cur().error("inline mode for `"s + func_name.str + "` changed with respect to a previous declaration"); + lex.error("inline mode for `" + func_name + "` changed with respect to a previous declaration"); } } if (is_get_method) { auto val = dynamic_cast(func_sym->value); if (!val) { - lex.cur().error("cannot set unknown function `"s + func_name.str + "` as a get method"); + lex.error("cannot set unknown function `" + func_name + "` as a get method"); } val->flags |= SymValFunc::flagGetMethod; glob_get_methods.push_back(func_sym); } if (verbosity >= 1) { - std::cerr << "new type of function " << func_name.str << " : " << func_type << std::endl; + std::cerr << "new type of function " << func_name << " : " << func_type << std::endl; } - close_scope(lex); + close_scope(lex.cur_location()); } -std::string tolk_ver_test = tolk_version; - void parse_pragma(Lexer& lex) { - auto pragma = lex.cur(); - lex.next(); - if (lex.tp() != _Ident) { - lex.expect(_Ident, "pragma name expected"); - } - auto pragma_name = lex.cur().str; - lex.next(); - if (!pragma_name.compare("version") || !pragma_name.compare("not-version")) { - bool negate = !pragma_name.compare("not-version"); + SrcLocation loc = lex.cur_location(); + lex.next_special(tok_pragma_name, "pragma name"); + std::string_view pragma_name = lex.cur_str(); + if (pragma_name == "version") { + lex.next(); + TokenType cmp_tok = lex.tok(); char op = '='; bool eq = false; + if (cmp_tok == tok_gt || cmp_tok == tok_geq) { + op = '>'; + eq = cmp_tok == tok_geq; + } else if (cmp_tok == tok_lt || cmp_tok == tok_leq) { + op = '<'; + eq = cmp_tok == tok_leq; + } else if (cmp_tok == tok_eq) { + op = '='; + } else if (cmp_tok == tok_bitwise_xor) { + op = '^'; + } else { + lex.error("invalid comparison operator"); + } + lex.next_special(tok_semver, "semver"); + std::string_view pragma_value = lex.cur_str(); int sem_ver[3] = {0, 0, 0}; char segs = 1; - auto stoi = [&](const std::string& s) { - auto R = td::to_integer_safe(s); + auto stoi = [&](std::string_view s) { + auto R = td::to_integer_safe(static_cast(s)); if (R.is_error()) { - lex.cur().error("invalid semver format"); + lex.error("invalid semver format"); } return R.move_as_ok(); }; - if (lex.tp() == _Number) { - sem_ver[0] = stoi(lex.cur().str); - } else if (lex.tp() == _Ident) { - auto id1 = lex.cur().str; - char ch1 = id1[0]; - if ((ch1 == '>') || (ch1 == '<') || (ch1 == '=') || (ch1 == '^')) { - op = ch1; - } else { - lex.cur().error("unexpected comparator operation"); - } - if (id1.length() < 2) { - lex.cur().error("expected number after comparator"); - } - if (id1[1] == '=') { - eq = true; - if (id1.length() < 3) { - lex.cur().error("expected number after comparator"); - } - sem_ver[0] = stoi(id1.substr(2)); - } else { - sem_ver[0] = stoi(id1.substr(1)); - } - } else { - lex.cur().error("expected semver with optional comparator"); - } - lex.next(); - if (lex.tp() != ';') { - if (lex.tp() != _Ident || lex.cur().str[0] != '.') { - lex.cur().error("invalid semver format"); - } - sem_ver[1] = stoi(lex.cur().str.substr(1)); - segs = 2; - lex.next(); - } - if (lex.tp() != ';') { - if (lex.tp() != _Ident || lex.cur().str[0] != '.') { - lex.cur().error("invalid semver format"); - } - sem_ver[2] = stoi(lex.cur().str.substr(1)); - segs = 3; - lex.next(); + std::istringstream iss_value(static_cast(pragma_value)); + for (int idx = 0; idx < 3; idx++) { + std::string s{"0"}; + std::getline(iss_value, s, '.'); + sem_ver[idx] = stoi(s); } // End reading semver from source code int tolk_ver[3] = {0, 0, 0}; - std::istringstream iss(tolk_ver_test); - std::string s; + std::istringstream iss(tolk_version); for (int idx = 0; idx < 3; idx++) { + std::string s; std::getline(iss, s, '.'); tolk_ver[idx] = stoi(s); } // End parsing embedded semver - std::string semver_expr; - if (negate) { - semver_expr += '!'; - } - semver_expr += op; - if (eq) { - semver_expr += '='; - } - for (int idx = 0; idx < 3; idx++) { - semver_expr += std::to_string(sem_ver[idx]); - if (idx < 2) - semver_expr += '.'; - } bool match = true; switch (op) { case '=': @@ -1929,134 +1870,94 @@ void parse_pragma(Lexer& lex) { match = false; } break; + default: + __builtin_unreachable(); } - if ((match && negate) || (!match && !negate)) { - pragma.error(std::string("Tolk version ") + tolk_ver_test + " does not satisfy condition " + semver_expr); - } - } else if (!pragma_name.compare("test-version-set")) { - if (lex.tp() != _String) { - lex.cur().error("version string expected"); + if (!match) { + throw ParseError(loc, std::string("Tolk version ") + tolk_version + " does not satisfy this condition"); } - tolk_ver_test = lex.cur().str; - lex.next(); } else if (pragma_name == pragma_allow_post_modification.name()) { - pragma_allow_post_modification.enable(lex.cur().loc); + pragma_allow_post_modification.enable(loc); } else if (pragma_name == pragma_compute_asm_ltr.name()) { - pragma_compute_asm_ltr.enable(lex.cur().loc); + pragma_compute_asm_ltr.enable(loc); } else if (pragma_name == pragma_remove_unused_functions.name()) { - pragma_remove_unused_functions.enable(lex.cur().loc); + pragma_remove_unused_functions.enable(loc); } else { - lex.cur().error(std::string{"unknown pragma `"} + pragma_name + "`"); + lex.error("unknown pragma name"); } - lex.expect(';'); + lex.next(); + lex.expect(tok_semicolon, "';'"); } -std::vector source_fdescr; +AllRegisteredSrcFiles all_src_files; +std::string stdlib_filename; -std::map source_files; -std::stack inclusion_locations; - -void parse_include(Lexer& lex, const FileDescr* fdescr) { - auto include = lex.cur(); - lex.expect(_IncludeHashtag); - if (lex.tp() != _String) { - lex.expect(_String, "source file name"); +void parse_include(Lexer& lex, const SrcFile* parent_file) { + SrcLocation loc = lex.cur_location(); + lex.expect(tok_include, "#include"); + if (lex.tok() != tok_string_const) { + lex.expect(tok_string_const, "source file name"); } - std::string val = lex.cur().str; - std::string parent_dir = fdescr->filename; - if (parent_dir.rfind('/') != std::string::npos) { - val = parent_dir.substr(0, parent_dir.rfind('/') + 1) + val; + std::string val = static_cast(lex.cur_str()); + std::string parent_dir = parent_file->rel_filename; + if (size_t rc = parent_dir.rfind('/'); rc != std::string::npos) { + val = parent_dir.substr(0, rc + 1) + val; } lex.next(); - lex.expect(';'); - if (!parse_source_file(val.c_str(), include, false)) { - include.error(std::string{"failed parsing included file `"} + val + "`"); + lex.expect(tok_semicolon, "';'"); + if (!parse_source_file(val.c_str(), loc)) { + lex.error(std::string{"failed parsing included file `"} + val + "`"); } } -bool parse_source(std::istream* is, FileDescr* fdescr) { - SourceReader reader{is, fdescr}; - Lexer lex{reader, ";,()[] ~."}; - // previously, FunC had lisp-style comments, - // but Tolk supports traditional (slash) comments alongside (lisp-style will be deleted soon) - lex.set_comment_tokens(";;", "{-", "-}"); - lex.set_comment2_tokens("//", "/*", "*/"); - lex.start_parsing(); - while (lex.tp() != _Eof) { - if (lex.tp() == _PragmaHashtag) { +void parse_source(const SrcFile* file) { + Lexer lex(file); + while (!lex.is_eof()) { + if (lex.tok() == tok_pragma) { parse_pragma(lex); - } else if (lex.tp() == _IncludeHashtag) { - parse_include(lex, fdescr); - } else if (lex.tp() == _Global) { + } else if (lex.tok() == tok_include) { + parse_include(lex, file); + } else if (lex.tok() == tok_global) { parse_global_var_decls(lex); - } else if (lex.tp() == _Const) { + } else if (lex.tok() == tok_const) { parse_const_decls(lex); } else { parse_func_def(lex); } } - return true; } -bool parse_source_file(const char* filename, Lexem lex, bool is_main) { +bool parse_source_file(const char* filename, SrcLocation loc_included_from) { + const SrcFile* included_from = loc_included_from.get_src_file(); if (!filename || !*filename) { - auto msg = "source file name is an empty string"; - if (lex.tp) { - lex.error(msg); - } else { - throw Fatal{msg}; - } + throw ParseError(loc_included_from, "source file name is an empty string"); } auto path_res = read_callback(ReadCallback::Kind::Realpath, filename); if (path_res.is_error()) { auto error = path_res.move_as_error(); - lex.error(error.message().c_str()); + throw ParseError(loc_included_from, error.message().c_str()); return false; } - std::string real_filename = path_res.move_as_ok(); - auto it = source_files.find(real_filename); - if (it != source_files.end()) { - it->second->is_main |= is_main; + std::string abs_filename = path_res.move_as_ok(); + const SrcFile* file = all_src_files.find_file(abs_filename); + if (file != nullptr) { if (verbosity >= 2) { - if (lex.tp) { - lex.loc.show_warning(std::string{"skipping file "} + real_filename + " because it was already included"); - } else { - std::cerr << "warning: skipping file " << real_filename << " because it was already included" << std::endl; - } + std::cerr << "skipping file " << abs_filename << " because it was already parsed" << '\n'; } return true; } - if (lex.tp) { // included + if (included_from) { generated_from += std::string{"incl:"}; } generated_from += std::string{"`"} + filename + "` "; - FileDescr* cur_source = new FileDescr{filename}; - source_files[real_filename] = cur_source; - cur_source->is_main = is_main; - source_fdescr.push_back(cur_source); - auto file_res = read_callback(ReadCallback::Kind::ReadFile, filename); - if (file_res.is_error()) { - auto msg = file_res.move_as_error().message().str(); - if (lex.tp) { - lex.error(msg); - } else { - throw Fatal{msg}; - } + td::Result text = read_callback(ReadCallback::Kind::ReadFile, abs_filename.c_str()); + if (text.is_error()) { + throw ParseError(loc_included_from, text.move_as_error().message().str()); } - auto file_str = file_res.move_as_ok(); - std::stringstream ss{file_str}; - inclusion_locations.push(lex.loc); - bool res = parse_source(&ss, cur_source); - inclusion_locations.pop(); - return res; -} - -bool parse_source_stdin() { - FileDescr* cur_source = new FileDescr{"stdin", true}; - cur_source->is_main = true; - source_fdescr.push_back(cur_source); - return parse_source(&std::cin, cur_source); + file = all_src_files.register_file(filename, abs_filename, text.move_as_ok(), included_from); + parse_source(file); + return true; } } // namespace tolk diff --git a/tolk/platform-utils.h b/tolk/platform-utils.h new file mode 100644 index 000000000..7b16226e7 --- /dev/null +++ b/tolk/platform-utils.h @@ -0,0 +1,44 @@ +/* + This file is part of TON Blockchain source code. + + TON Blockchain is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + TON Blockchain is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with TON Blockchain. If not, see . + + In addition, as a special exception, the copyright holders give permission + to link the code of portions of this program with the OpenSSL library. + You must obey the GNU General Public License in all respects for all + of the code used other than OpenSSL. If you modify file(s) with this + exception, you may extend this exception to your version of the file(s), + but you are not obligated to do so. If you do not wish to do so, delete this + exception statement from your version. If you delete this exception statement + from all source files in the program, then also delete it here. +*/ +#pragma once + +#if __GNUC__ +#define GNU_ATTRIBUTE_COLD [[gnu::cold]] +#define GNU_ATTRIBUTE_NORETURN [[gnu::noreturn]] +#define GNU_ATTRIBUTE_ALWAYS_INLINE [[gnu::always_inline]] +#else +#define GNU_ATTRIBUTE_COLD +#define GNU_ATTRIBUTE_NORETURN [[noreturn]] +#define GNU_ATTRIBUTE_ALWAYS_INLINE +#endif + +#if defined(__GNUC__) +#define LIKELY(x) __builtin_expect(x, true) +#define UNLIKELY(x) __builtin_expect(x, false) +#else +#define LIKELY(x) (x) +#define UNLIKELY(x) (x) +#endif diff --git a/tolk/src-file.cpp b/tolk/src-file.cpp new file mode 100644 index 000000000..93a92e60f --- /dev/null +++ b/tolk/src-file.cpp @@ -0,0 +1,164 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "src-file.h" +#include + +namespace tolk { + +extern AllRegisteredSrcFiles all_src_files; +extern std::string stdlib_filename; + +static_assert(sizeof(SrcLocation) == 8); + +const SrcFile* AllRegisteredSrcFiles::find_file(int file_id) const { + for (const SrcFile* file : all_src_files) { + if (file->file_id == file_id) { + return file; + } + } + return nullptr; +} + +const SrcFile* AllRegisteredSrcFiles::find_file(const std::string& abs_filename) const { + for (const SrcFile* file : all_src_files) { + if (file->abs_filename == abs_filename) { + return file; + } + } + return nullptr; +} + +const SrcFile* AllRegisteredSrcFiles::register_file(const std::string& rel_filename, const std::string& abs_filename, std::string&& text, const SrcFile* included_from) { + SrcFile* created = new SrcFile(++last_file_id, rel_filename, abs_filename, std::move(text), included_from); + all_src_files.push_back(created); + return created; +} + + +bool SrcFile::is_entrypoint_file() const { + return file_id == (stdlib_filename.empty() ? 0 : 1); +} + +bool SrcFile::is_offset_valid(int offset) const { + return offset >= 0 && offset < static_cast(text.size()); +} + +SrcFile::SrcPosition SrcFile::convert_offset(int offset) const { + if (!is_offset_valid(offset)) { + return SrcPosition{offset, -1, -1, "invalid offset"}; + } + + int line_idx = 0; + int char_idx = 0; + int line_offset = 0; + for (int i = 0; i < offset; ++i) { + char c = text[i]; + if (c == '\n') { + line_idx++; + char_idx = 0; + line_offset = i + 1; + } else { + char_idx++; + } + } + + size_t line_len = text.size() - line_offset; + for (int i = line_offset; i < static_cast(text.size()); ++i) { + if (text[i] == '\n') { + line_len = i - line_offset; + break; + } + } + + std::string_view line_str(text.data() + line_offset, line_len); + return SrcPosition{offset, line_idx + 1, char_idx + 1, line_str}; +} + + +std::ostream& operator<<(std::ostream& os, const SrcFile* src_file) { + return os << (src_file ? src_file->rel_filename : "unknown-location"); +} + +std::ostream& operator<<(std::ostream& os, const Fatal& fatal) { + return os << fatal.what(); +} + +const SrcFile* SrcLocation::get_src_file() const { + return all_src_files.find_file(file_id); +} + +void SrcLocation::show(std::ostream& os) const { + const SrcFile* src_file = get_src_file(); + os << src_file; + if (src_file && src_file->is_offset_valid(char_offset)) { + SrcFile::SrcPosition pos = src_file->convert_offset(char_offset); + os << ':' << pos.line_no << ':' << pos.char_no; + } +} + +void SrcLocation::show_context(std::ostream& os) const { + const SrcFile* src_file = get_src_file(); + if (!src_file || !src_file->is_offset_valid(char_offset)) { + return; + } + SrcFile::SrcPosition pos = src_file->convert_offset(char_offset); + os << " " << pos.line_str << "\n"; + + os << " "; + for (int i = 1; i < pos.char_no; ++i) { + os << ' '; + } + os << '^' << "\n"; +} + +std::ostream& operator<<(std::ostream& os, SrcLocation loc) { + loc.show(os); + return os; +} + +void SrcLocation::show_general_error(std::ostream& os, const std::string& message, const std::string& err_type) const { + show(os); + if (!err_type.empty()) { + os << ": " << err_type; + } + os << ": " << message << std::endl; + show_context(os); +} + +void SrcLocation::show_note(const std::string& err_msg) const { + show_general_error(std::cerr, err_msg, "note"); +} + +void SrcLocation::show_warning(const std::string& err_msg) const { + show_general_error(std::cerr, err_msg, "warning"); +} + +void SrcLocation::show_error(const std::string& err_msg) const { + show_general_error(std::cerr, err_msg, "error"); +} + +std::ostream& operator<<(std::ostream& os, const ParseError& error) { + error.show(os); + return os; +} + +void ParseError::show(std::ostream& os) const { + os << where << ": error: " << message << std::endl; + where.show_context(os); +} + +} // namespace tolk diff --git a/tolk/src-file.h b/tolk/src-file.h new file mode 100644 index 000000000..0f76d787e --- /dev/null +++ b/tolk/src-file.h @@ -0,0 +1,120 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#pragma once + +#include +#include + +namespace tolk { + +struct SrcFile { + struct SrcPosition { + int offset; + int line_no; + int char_no; + std::string_view line_str; + }; + + int file_id; + std::string rel_filename; + std::string abs_filename; + std::string text; + const SrcFile* included_from{nullptr}; + + SrcFile(int file_id, std::string rel_filename, std::string abs_filename, std::string&& text, const SrcFile* included_from) + : file_id(file_id) + , rel_filename(std::move(rel_filename)) + , abs_filename(std::move(abs_filename)) + , text(std::move(text)) + , included_from(included_from) { } + + SrcFile(const SrcFile& other) = delete; + SrcFile &operator=(const SrcFile&) = delete; + + bool is_entrypoint_file() const; + bool is_offset_valid(int offset) const; + SrcPosition convert_offset(int offset) const; +}; + +class AllRegisteredSrcFiles { + std::vector all_src_files; + int last_file_id = -1; + +public: + const SrcFile *find_file(int file_id) const; + const SrcFile* find_file(const std::string& abs_filename) const; + const SrcFile* register_file(const std::string& rel_filename, const std::string& abs_filename, std::string&& text, const SrcFile* included_from); + const std::vector& get_all_files() const { return all_src_files; } +}; + +struct Fatal final : std::exception { + std::string message; + + explicit Fatal(std::string _msg) : message(std::move(_msg)) { + } + const char* what() const noexcept override { + return message.c_str(); + } +}; + +std::ostream& operator<<(std::ostream& os, const Fatal& fatal); + +// SrcLocation points to a location (line, column) in some loaded .tolk source SrcFile. +// Note, that instead of storing src_file, line_no, etc., only 2 ints are stored. +// The purpose is: sizeof(SrcLocation) == 8, so it's just passed/stored without pointers/refs, just like int64_t. +// When decoding SrcLocation into human-readable format, it's converted to SrcFile::SrcPosition via offset. +class SrcLocation { + friend class Lexer; + + int file_id = -1; // file_id from AllRegisteredSrcFiles + int char_offset = -1; // offset from SrcFile::text + +public: + + SrcLocation() = default; + explicit SrcLocation(const SrcFile* src_file) : file_id(src_file->file_id) { + } + + bool is_defined() const { return file_id != -1; } + const SrcFile* get_src_file() const; + + void show(std::ostream& os) const; + void show_context(std::ostream& os) const; + + void show_general_error(std::ostream& os, const std::string& message, const std::string& err_type) const; + void show_note(const std::string& err_msg) const; + void show_warning(const std::string& err_msg) const; + void show_error(const std::string& err_msg) const; +}; + +std::ostream& operator<<(std::ostream& os, SrcLocation loc); + +struct ParseError : std::exception { + SrcLocation where; + std::string message; + ParseError(SrcLocation _where, std::string _msg) : where(_where), message(std::move(_msg)) { + } + + const char* what() const noexcept override { + return message.c_str(); + } + void show(std::ostream& os) const; +}; + +std::ostream& operator<<(std::ostream& os, const ParseError& error); + +} // namespace tolk diff --git a/tolk/srcread.cpp b/tolk/srcread.cpp deleted file mode 100644 index c71f498d6..000000000 --- a/tolk/srcread.cpp +++ /dev/null @@ -1,228 +0,0 @@ -/* - This file is part of TON Blockchain Library. - - TON Blockchain Library is free software: you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation, either version 2 of the License, or - (at your option) any later version. - - TON Blockchain Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with TON Blockchain Library. If not, see . -*/ -#include "srcread.h" -#include - -namespace tolk { - -/* - * - * SOURCE FILE READER - * - */ - -std::ostream& operator<<(std::ostream& os, const FileDescr* fdescr) { - return os << (fdescr ? (fdescr->is_stdin ? "stdin" : fdescr->filename) : "unknown-location"); -} - -std::ostream& operator<<(std::ostream& os, const Fatal& fatal) { - return os << fatal.get_msg(); -} - -const char* FileDescr::convert_offset(long offset, long* line_no, long* line_pos, long* line_size) const { - long lno = 0, lpos = -1, lsize = 0; - const char* lstart = nullptr; - if (offset >= 0 && offset < (long)text.size()) { - auto it = std::upper_bound(line_offs.begin(), line_offs.end(), offset); - lno = it - line_offs.begin(); - if (lno && it != line_offs.end()) { - lsize = it[0] - it[-1]; - lpos = offset - it[-1]; - lstart = text.data() + it[-1]; - } - } else { - lno = (long)line_offs.size(); - } - if (line_no) { - *line_no = lno; - } - if (line_pos) { - *line_pos = lpos; - } - if (line_size) { - *line_size = lsize; - } - return lstart; -} - -const char* FileDescr::push_line(std::string new_line) { - if (line_offs.empty()) { - line_offs.push_back(0); - } - std::size_t cur_size = text.size(); - text += new_line; - text += '\0'; - line_offs.push_back((long)text.size()); - return text.data() + cur_size; -} - -void SrcLocation::show(std::ostream& os) const { - os << fdescr; - long line_no, line_pos; - if (fdescr && convert_pos(&line_no, &line_pos)) { - os << ':' << line_no; - if (line_pos >= 0) { - os << ':' << (line_pos + 1); - } - } -} - -bool SrcLocation::show_context(std::ostream& os) const { - long line_no, line_pos, line_size; - if (!fdescr || !convert_pos(&line_no, &line_pos, &line_size)) { - return false; - } - bool skip_left = (line_pos > 200), skip_right = (line_pos + 200u < line_size); - const char* here = fdescr->text.data() + char_offs; - const char* base = here - line_pos; - const char* start = skip_left ? here - 100 : base; - const char* end = skip_right ? here + 100 : base + line_size; - os << " "; - if (skip_left) { - os << "... "; - } - for (const char* ptr = start; ptr < end; ptr++) { - os << (char)*ptr; - } - if (skip_right) { - os << " ..."; - } - os << std::endl; - os << " "; - if (skip_left) { - os << "... "; - } - for (const char* ptr = start; ptr < here; ptr++) { - char c = *ptr; - os << (c == 9 || c == 10 ? c : ' '); - } - os << '^' << std::endl; - return true; -} - -std::ostream& operator<<(std::ostream& os, const SrcLocation& loc) { - loc.show(os); - return os; -} - -void SrcLocation::show_gen_error(std::ostream& os, std::string message, std::string err_type) const { - show(os); - if (!err_type.empty()) { - os << ": " << err_type; - } - os << ": " << message << std::endl; - show_context(os); -} - -std::ostream& operator<<(std::ostream& os, const Error& error) { - error.show(os); - return os; -} - -void ParseError::show(std::ostream& os) const { - os << where << ": error: " << message << std::endl; - where.show_context(os); -} - -SourceReader::SourceReader(std::istream* _is, FileDescr* _fdescr) - : ifs(_is), fdescr(_fdescr), loc(_fdescr), eof(false), cur_line_len(0), start(0), cur(0), end(0) { - load_line(); -} - -void SourceReader::set_eof() { - if (!eof) { - eof = true; - start = cur = end = 0; - } -} - -int SourceReader::skip_spc() { - if (!cur) { - return 0; - } - const char* ptr = cur; - int res = 0; - while (*ptr == ' ' || *ptr == 9) { - ++ptr; - ++res; - } - set_ptr(ptr); - return res; -} - -bool SourceReader::seek_eof() { - while (seek_eoln()) { - if (!load_line()) { - return true; - } - } - return false; -} - -const char* SourceReader::set_ptr(const char* ptr) { - if (ptr != cur) { - if (ptr < cur || ptr > end) { - error("parsing position went outside of line"); - } - loc.char_offs += ptr - cur; - cur = ptr; - } - return ptr; -} - -bool SourceReader::load_line() { - if (eof) { - return false; - } - loc.set_eof(); - if (ifs->eof()) { - set_eof(); - return false; - } - std::getline(*ifs, cur_line); - if (ifs->fail()) { - set_eof(); - if (!ifs->eof()) { - error("cannot read line from source stream"); - } - return false; - } - std::size_t len = cur_line.size(); - if (len > 0xffffff) { - set_eof(); - error("line too long"); - return false; - } - if (len && cur_line.back() == '\r') { - // CP/M line breaks support - cur_line.pop_back(); - --len; - } - cur_line_len = (int)len; - if (fdescr) { - cur = start = fdescr->push_line(std::move(cur_line)); - end = start + len; - loc.char_offs = (std::size_t)(cur - fdescr->text.data()); - cur_line.clear(); - } else { - cur = start = cur_line.c_str(); - end = start + cur_line_len; - } - return true; -} - -} // namespace tolk diff --git a/tolk/srcread.h b/tolk/srcread.h deleted file mode 100644 index 3731a5ca4..000000000 --- a/tolk/srcread.h +++ /dev/null @@ -1,162 +0,0 @@ -/* - This file is part of TON Blockchain Library. - - TON Blockchain Library is free software: you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation, either version 2 of the License, or - (at your option) any later version. - - TON Blockchain Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with TON Blockchain Library. If not, see . -*/ -#pragma once - -#include -#include -#include - -namespace tolk { - -/* - * - * SOURCE FILE READER - * - */ - -struct FileDescr { - std::string filename; - std::string text; - std::vector line_offs; - bool is_stdin; - bool is_main = false; - FileDescr(std::string _fname, bool _stdin = false) : filename(std::move(_fname)), is_stdin(_stdin) { - } - const char* push_line(std::string new_line); - const char* convert_offset(long offset, long* line_no, long* line_pos, long* line_size = nullptr) const; -}; - -struct Fatal { - std::string message; - Fatal(std::string _msg) : message(std::move(_msg)) { - } - std::string get_msg() const { - return message; - } -}; - -std::ostream& operator<<(std::ostream& os, const Fatal& fatal); - -struct SrcLocation { - const FileDescr* fdescr; - long char_offs; - SrcLocation() : fdescr(nullptr), char_offs(-1) { - } - SrcLocation(const FileDescr* _fdescr, long offs = -1) : fdescr(_fdescr), char_offs(-1) { - } - bool defined() const { - return fdescr; - } - bool eof() const { - return char_offs == -1; - } - void set_eof() { - char_offs = -1; - } - const char* convert_pos(long* line_no, long* line_pos, long* line_size = nullptr) const { - return defined() ? fdescr->convert_offset(char_offs, line_no, line_pos, line_size) : nullptr; - } - void show(std::ostream& os) const; - bool show_context(std::ostream& os) const; - void show_gen_error(std::ostream& os, std::string message, std::string err_type = "") const; - void show_note(std::string err_msg) const { - show_gen_error(std::cerr, err_msg, "note"); - } - void show_warning(std::string err_msg) const { - show_gen_error(std::cerr, err_msg, "warning"); - } - void show_error(std::string err_msg) const { - show_gen_error(std::cerr, err_msg, "error"); - } -}; - -std::ostream& operator<<(std::ostream& os, const SrcLocation& loc); - -struct Error { - virtual ~Error() = default; - virtual void show(std::ostream& os) const = 0; -}; - -std::ostream& operator<<(std::ostream& os, const Error& error); - -struct ParseError : Error { - SrcLocation where; - std::string message; - ParseError(const SrcLocation& _where, std::string _msg) : where(_where), message(_msg) { - } - ParseError(const SrcLocation* _where, std::string _msg) : message(_msg) { - if (_where) { - where = *_where; - } - } - ~ParseError() override = default; - void show(std::ostream& os) const override; -}; - -class SourceReader { - std::istream* ifs; - FileDescr* fdescr; - SrcLocation loc; - bool eof; - std::string cur_line; - int cur_line_len; - void set_eof(); - const char *start, *cur, *end; - - public: - SourceReader(std::istream* _is, FileDescr* _fdescr); - bool load_line(); - bool is_eof() const { - return eof; - } - int is_eoln() const { - return cur == end; - } - int skip_spc(); - bool seek_eoln() { - skip_spc(); - return is_eoln(); - } - bool seek_eof(); - const char* cur_line_cstr() const { - return cur_line.c_str(); - } - const SrcLocation& here() const { - return loc; - } - char cur_char() const { - return *cur; - } - char next_char() const { - return cur[1]; - } - const char* get_ptr() const { - return cur; - } - const char* get_end_ptr() const { - return end; - } - const char* set_ptr(const char* ptr); - void advance(int n) { - set_ptr(get_ptr() + n); - } - void error(std::string err_msg) { - throw ParseError{loc, err_msg}; - } -}; - -} // namespace tolk diff --git a/tolk/symtable.cpp b/tolk/symtable.cpp index ee6d6aca7..ec409ab2b 100644 --- a/tolk/symtable.cpp +++ b/tolk/symtable.cpp @@ -28,13 +28,19 @@ namespace tolk { int scope_level; -SymTable<100003> symbols; +SymTable symbols; -SymDef* sym_def[symbols.hprime + 1]; -SymDef* global_sym_def[symbols.hprime + 1]; +SymDef* sym_def[symbols.SIZE_PRIME + 1]; +SymDef* global_sym_def[symbols.SIZE_PRIME + 1]; std::vector> symbol_stack; std::vector scope_opened_at; +Symbol::Symbol(std::string str, sym_idx_t idx) : str(std::move(str)), idx(idx) { + subclass = this->str[0] == '.' ? SymbolSubclass::dot_identifier + : this->str[0] == '~' ? SymbolSubclass::tilde_identifier + : SymbolSubclass::undef; +} + std::string Symbol::unknown_symbol_name(sym_idx_t i) { if (!i) { return "_"; @@ -45,57 +51,43 @@ std::string Symbol::unknown_symbol_name(sym_idx_t i) { } } -sym_idx_t SymTableBase::gen_lookup(std::string str, int mode, sym_idx_t idx) { +sym_idx_t SymTable::gen_lookup(std::string_view str, int mode, sym_idx_t idx) { unsigned long long h1 = 1, h2 = 1; for (char c : str) { - h1 = ((h1 * 239) + (unsigned char)(c)) % p; - h2 = ((h2 * 17) + (unsigned char)(c)) % (p - 1); + h1 = ((h1 * 239) + (unsigned char)(c)) % SIZE_PRIME; + h2 = ((h2 * 17) + (unsigned char)(c)) % (SIZE_PRIME - 1); } ++h2; ++h1; while (true) { - if (sym_table[h1]) { - if (sym_table[h1]->str == str) { + if (sym[h1]) { + if (sym[h1]->str == str) { return (mode & 2) ? not_found : sym_idx_t(h1); } h1 += h2; - if (h1 > p) { - h1 -= p; + if (h1 > SIZE_PRIME) { + h1 -= SIZE_PRIME; } } else { if (!(mode & 1)) { return not_found; } - if (def_sym >= ((long long)p * 3) / 4) { + if (def_sym >= ((long long)SIZE_PRIME * 3) / 4) { throw SymTableOverflow{def_sym}; } - sym_table[h1] = std::make_unique(str, idx <= 0 ? sym_idx_t(h1) : -idx); + sym[h1] = std::make_unique(static_cast(str), idx <= 0 ? sym_idx_t(h1) : -idx); ++def_sym; return sym_idx_t(h1); } } } -SymTableBase& SymTableBase::add_keyword(std::string str, sym_idx_t idx) { - if (idx <= 0) { - idx = ++def_kw; - } - sym_idx_t res = gen_lookup(str, -1, idx); - if (!res) { - throw SymTableKwRedef{str}; - } - if (idx < max_kw_idx) { - keywords[idx] = res; - } - return *this; -} - -void open_scope(Lexer& lex) { +void open_scope(SrcLocation loc) { ++scope_level; - scope_opened_at.push_back(lex.cur().loc); + scope_opened_at.push_back(loc); } -void close_scope(Lexer& lex) { +void close_scope(SrcLocation loc) { if (!scope_level) { throw Fatal{"cannot close the outer scope"}; } @@ -124,24 +116,20 @@ void close_scope(Lexer& lex) { scope_opened_at.pop_back(); } -SymDef* lookup_symbol(sym_idx_t idx, int flags) { +SymDef* lookup_symbol(sym_idx_t idx) { if (!idx) { return nullptr; } - if ((flags & 1) && sym_def[idx]) { + if (sym_def[idx]) { return sym_def[idx]; } - if ((flags & 2) && global_sym_def[idx]) { + if (global_sym_def[idx]) { return global_sym_def[idx]; } return nullptr; } -SymDef* lookup_symbol(std::string name, int flags) { - return lookup_symbol(symbols.lookup(name), flags); -} - -SymDef* define_global_symbol(sym_idx_t name_idx, bool force_new, const SrcLocation& loc) { +SymDef* define_global_symbol(sym_idx_t name_idx, bool force_new, SrcLocation loc) { if (!name_idx) { return nullptr; } @@ -156,7 +144,7 @@ SymDef* define_global_symbol(sym_idx_t name_idx, bool force_new, const SrcLocati return found; } -SymDef* define_symbol(sym_idx_t name_idx, bool force_new, const SrcLocation& loc) { +SymDef* define_symbol(sym_idx_t name_idx, bool force_new, SrcLocation loc) { if (!name_idx) { return nullptr; } @@ -176,7 +164,7 @@ SymDef* define_symbol(sym_idx_t name_idx, bool force_new, const SrcLocation& loc return found; } found = sym_def[name_idx] = new SymDef(scope_level, name_idx, loc); - symbol_stack.push_back(std::make_pair(scope_level, SymDef{0, name_idx})); + symbol_stack.push_back(std::make_pair(scope_level, SymDef{0, name_idx, loc})); #ifdef TOLK_DEBUG found->sym_name = found->name(); symbol_stack.back().second.sym_name = found->name(); diff --git a/tolk/symtable.h b/tolk/symtable.h index 68a4a1dad..67f949a12 100644 --- a/tolk/symtable.h +++ b/tolk/symtable.h @@ -15,8 +15,9 @@ along with TON Blockchain Library. If not, see . */ #pragma once -#include "srcread.h" -#include "lexer.h" +#include "src-file.h" +#include +#include #include namespace tolk { @@ -29,11 +30,12 @@ namespace tolk { typedef int var_idx_t; +enum class SymValKind { _Param, _Var, _Func, _Typename, _GlobVar, _Const }; + struct SymValBase { - enum { _Param, _Var, _Func, _Typename, _GlobVar, _Const }; - int type; + SymValKind kind; int idx; - SymValBase(int _type, int _idx) : type(_type), idx(_idx) { + SymValBase(SymValKind kind, int idx) : kind(kind), idx(idx) { } virtual ~SymValBase() = default; }; @@ -44,92 +46,69 @@ struct SymValBase { * */ -// defined outside this module (by the end user) -int compute_symbol_subclass(std::string str); // return 0 if unneeded +enum class SymbolSubclass { + undef = 0, + dot_identifier = 1, // begins with . (a const method) + tilde_identifier = 2 // begins with ~ (a non-const method) +}; typedef int sym_idx_t; struct Symbol { std::string str; sym_idx_t idx; - int subclass; - Symbol(std::string _str, sym_idx_t _idx, int _sc) : str(_str), idx(_idx), subclass(_sc) { - } - Symbol(std::string _str, sym_idx_t _idx) : str(_str), idx(_idx) { - subclass = compute_symbol_subclass(std::move(_str)); - } + SymbolSubclass subclass; + + Symbol(std::string str, sym_idx_t idx); + static std::string unknown_symbol_name(sym_idx_t i); }; -class SymTableBase { - unsigned p; - std::unique_ptr* sym_table; - sym_idx_t def_kw, def_sym; +class SymTable { +public: + static constexpr int SIZE_PRIME = 100003; + +private: + sym_idx_t def_sym{0}; + std::unique_ptr sym[SIZE_PRIME + 1]; + sym_idx_t gen_lookup(std::string_view str, int mode = 0, sym_idx_t idx = 0); + static constexpr int max_kw_idx = 10000; sym_idx_t keywords[max_kw_idx]; - public: - SymTableBase(unsigned p_, std::unique_ptr* sym_table_) - : p(p_), sym_table(sym_table_), def_kw(0x100), def_sym(0) { - std::memset(keywords, 0, sizeof(keywords)); - } +public: + static constexpr sym_idx_t not_found = 0; - SymTableBase& add_keyword(std::string str, sym_idx_t idx = 0); - SymTableBase& add_kw_char(char c) { - return add_keyword(std::string{c}, c); - } - sym_idx_t lookup(std::string str, int mode = 0) { + sym_idx_t lookup(const std::string_view& str, int mode = 0) { return gen_lookup(str, mode); } - sym_idx_t lookup_add(std::string str) { + sym_idx_t lookup_add(const std::string& str) { return gen_lookup(str, 1); } Symbol* operator[](sym_idx_t i) const { - return sym_table[i].get(); + return sym[i].get(); } bool is_keyword(sym_idx_t i) const { - return sym_table[i] && sym_table[i]->idx < 0; + return sym[i] && sym[i]->idx < 0; } std::string get_name(sym_idx_t i) const { - return sym_table[i] ? sym_table[i]->str : Symbol::unknown_symbol_name(i); + return sym[i] ? sym[i]->str : Symbol::unknown_symbol_name(i); } - int get_subclass(sym_idx_t i) const { - return sym_table[i] ? sym_table[i]->subclass : 0; + SymbolSubclass get_subclass(sym_idx_t i) const { + return sym[i] ? sym[i]->subclass : SymbolSubclass::undef; } Symbol* get_keyword(int i) const { - return ((unsigned)i < (unsigned)max_kw_idx) ? sym_table[keywords[i]].get() : nullptr; - } - - protected: - sym_idx_t gen_lookup(std::string str, int mode = 0, sym_idx_t idx = 0); -}; - -template -class SymTable : public SymTableBase { - public: - static constexpr int hprime = pp; - static int size() { - return pp + 1; + return ((unsigned)i < (unsigned)max_kw_idx) ? sym[keywords[i]].get() : nullptr; } - private: - std::unique_ptr sym[pp + 1]; - - public: - SymTable() : SymTableBase(pp, sym) { - } - SymTable& add_keyword(std::string str, sym_idx_t idx = 0) { - SymTableBase::add_keyword(str, idx); - return *this; - } - SymTable& add_kw_char(char c) { - return add_keyword(std::string{c}, c); + SymTable() { + std::memset(keywords, 0, sizeof(keywords)); } }; struct SymTableOverflow { int sym_def; - SymTableOverflow(int x) : sym_def(x) { + explicit SymTableOverflow(int x) : sym_def(x) { } }; @@ -139,7 +118,7 @@ struct SymTableKwRedef { } }; -extern SymTable<100003> symbols; +extern SymTable symbols; extern int scope_level; @@ -151,7 +130,7 @@ struct SymDef { #ifdef TOLK_DEBUG std::string sym_name; #endif - SymDef(int lvl, sym_idx_t idx, const SrcLocation& _loc = {}, SymValBase* val = 0) + SymDef(int lvl, sym_idx_t idx, SrcLocation _loc, SymValBase* val = nullptr) : level(lvl), sym_idx(idx), value(val), loc(_loc) { } bool has_name() const { @@ -162,17 +141,16 @@ struct SymDef { } }; -extern SymDef* sym_def[symbols.hprime + 1]; -extern SymDef* global_sym_def[symbols.hprime + 1]; +extern SymDef* sym_def[symbols.SIZE_PRIME + 1]; +extern SymDef* global_sym_def[symbols.SIZE_PRIME + 1]; extern std::vector> symbol_stack; extern std::vector scope_opened_at; -void open_scope(Lexer& lex); -void close_scope(Lexer& lex); -SymDef* lookup_symbol(sym_idx_t idx, int flags = 3); -SymDef* lookup_symbol(std::string name, int flags = 3); +void open_scope(SrcLocation loc); +void close_scope(SrcLocation loc); +SymDef* lookup_symbol(sym_idx_t idx); -SymDef* define_global_symbol(sym_idx_t name_idx, bool force_new = false, const SrcLocation& loc = {}); -SymDef* define_symbol(sym_idx_t name_idx, bool force_new = false, const SrcLocation& loc = {}); +SymDef* define_global_symbol(sym_idx_t name_idx, bool force_new = false, SrcLocation loc = {}); +SymDef* define_symbol(sym_idx_t name_idx, bool force_new, SrcLocation loc); } // namespace tolk diff --git a/tolk/tolk-main.cpp b/tolk/tolk-main.cpp index e4b6ebdb6..ce08a5529 100644 --- a/tolk/tolk-main.cpp +++ b/tolk/tolk-main.cpp @@ -30,67 +30,41 @@ void usage(const char* progname) { std::cerr - << "usage: " << progname - << " [-vIAPSR][-O][-i][-o][-W] { ...}\n" - "\tGenerates Fift TVM assembler code from a Tolk source\n" - "-I\tEnables interactive mode (parse stdin)\n" - "-o\tWrites generated code into specified file instead of stdout\n" - "-v\tIncreases verbosity level (extra information output into stderr)\n" - "-i\tSets indentation for the output code (in two-space units)\n" - "-A\tPrefix code with `\"Asm.fif\" include` preamble\n" + << "usage: " << progname << " [options] \n" + "\tGenerates Fift TVM assembler code from a .tolk file\n" + "-o\tWrites generated code into specified .fif file instead of stdout\n" + "-b\tGenerate Fift instructions to save TVM bytecode into .boc file\n" "-O\tSets optimization level (2 by default)\n" - "-P\tEnvelope code into PROGRAM{ ... }END>c\n" - "-S\tInclude stack layout comments in the output code\n" - "-R\tInclude operation rewrite comments in the output code\n" - "-W\tInclude Fift code to serialize and save generated code into specified BoC file. Enables " - "-A and -P.\n" - "\t-s\tOutput semantic version of Tolk and exit\n" - "\t-V\tShow Tolk build information\n"; + "-S\tDon't include stack layout comments into Fift output\n" + "-e\tIncreases verbosity level (extra output into stderr)\n" + "-v\tOutput version of Tolk and exit\n"; std::exit(2); } int main(int argc, char* const argv[]) { int i; std::string output_filename; - while ((i = getopt(argc, argv, "Ahi:Io:O:PRsSvW:V")) != -1) { + while ((i = getopt(argc, argv, "o:b:O:Sevh")) != -1) { switch (i) { - case 'A': - tolk::asm_preamble = true; - break; - case 'I': - tolk::interactive = true; - break; - case 'i': - tolk::indent = std::max(0, atoi(optarg)); - break; case 'o': output_filename = optarg; break; + case 'b': + tolk::boc_output_filename = optarg; + break; case 'O': tolk::opt_level = std::max(0, atoi(optarg)); break; - case 'P': - tolk::program_envelope = true; - break; - case 'R': - tolk::op_rewrite_comments = true; - break; case 'S': - tolk::stack_layout_comments = true; + tolk::stack_layout_comments = false; break; - case 'v': + case 'e': ++tolk::verbosity; break; - case 'W': - tolk::boc_output_filename = optarg; - tolk::asm_preamble = tolk::program_envelope = true; - break; - case 's': - std::cout << tolk::tolk_version << "\n"; - std::exit(0); - case 'V': - std::cout << "Tolk semantic version: v" << tolk::tolk_version << "\n"; - std::cout << "Build information: [ Commit: " << GitMetadata::CommitSHA1() << ", Date: " << GitMetadata::CommitDate() << "]\n"; + case 'v': + std::cout << "Tolk compiler v" << tolk::tolk_version << "\n"; + std::cout << "Build commit: " << GitMetadata::CommitSHA1() << "\n"; + std::cout << "Build date: " << GitMetadata::CommitDate() << "\n"; std::exit(0); case 'h': default: @@ -110,13 +84,14 @@ int main(int argc, char* const argv[]) { outs = fs.get(); } - std::vector sources; - - while (optind < argc) { - sources.push_back(std::string(argv[optind++])); + if (optind != argc - 1) { + std::cerr << "invalid usage: should specify exactly one input file.tolk"; + return 2; } + std::string entrypoint_file_name = argv[optind]; + tolk::read_callback = tolk::fs_read_callback; - return tolk::tolk_proceed(sources, *outs, std::cerr); + return tolk::tolk_proceed(entrypoint_file_name, *outs, std::cerr); } diff --git a/tolk/tolk-wasm.cpp b/tolk/tolk-wasm.cpp index 6ffc798e8..7cf28ba3a 100644 --- a/tolk/tolk-wasm.cpp +++ b/tolk/tolk-wasm.cpp @@ -31,81 +31,58 @@ #include "td/utils/Status.h" #include #include -#include "vm/boc.h" td::Result compile_internal(char *config_json) { TRY_RESULT(input_json, td::json_decode(td::MutableSlice(config_json))) - auto &obj = input_json.get_object(); + td::JsonObject& config = input_json.get_object(); - TRY_RESULT(opt_level, td::get_json_object_int_field(obj, "optLevel", false)); - TRY_RESULT(sources_obj, td::get_json_object_field(obj, "sources", td::JsonValue::Type::Array, false)); - - auto &sources_arr = sources_obj.get_array(); - - std::vector sources; - - for (auto &item : sources_arr) { - sources.push_back(item.get_string().str()); - } + TRY_RESULT(opt_level, td::get_json_object_int_field(config, "optimizationLevel", true, 2)); + TRY_RESULT(stack_comments, td::get_json_object_bool_field(config, "withStackComments", true, false)); + TRY_RESULT(entrypoint_file_name, td::get_json_object_string_field(config, "entrypointFileName", false)); tolk::opt_level = std::max(0, opt_level); - tolk::program_envelope = true; tolk::verbosity = 0; - tolk::indent = 1; + tolk::stack_layout_comments = stack_comments; std::ostringstream outs, errs; - auto compile_res = tolk::tolk_proceed(sources, outs, errs); - - if (compile_res != 0) { - return td::Status::Error(std::string("Tolk compilation error: ") + errs.str()); + int tolk_res = tolk::tolk_proceed(entrypoint_file_name, outs, errs); + if (tolk_res != 0) { + return td::Status::Error("Tolk compilation error: " + errs.str()); } - TRY_RESULT(code_cell, fift::compile_asm(outs.str(), "/fiftlib/", false)); - TRY_RESULT(boc, vm::std_boc_serialize(code_cell)); + TRY_RESULT(fift_res, fift::compile_asm_program(outs.str(), "/fiftlib/")); td::JsonBuilder result_json; - auto result_obj = result_json.enter_object(); - result_obj("status", "ok"); - result_obj("codeBoc", td::base64_encode(boc)); - result_obj("fiftCode", outs.str()); - result_obj("codeHashHex", code_cell->get_hash().to_hex()); - result_obj.leave(); - - outs.clear(); - errs.clear(); + auto obj = result_json.enter_object(); + obj("status", "ok"); + obj("fiftCode", fift_res.fiftCode); + obj("codeBoc64", fift_res.codeBoc64); + obj("codeHashHex", fift_res.codeHashHex); + obj.leave(); return result_json.string_builder().as_cslice().str(); } -/// Callback used to retrieve additional source files or data. -/// -/// @param _kind The kind of callback (a string). -/// @param _data The data for the callback (a string). -/// @param o_contents A pointer to the contents of the file, if found. Allocated via malloc(). -/// @param o_error A pointer to an error message, if there is one. Allocated via malloc(). -/// -/// The callback implementor must use malloc() to allocate storage for -/// contents or error. The callback implementor must use free() to free -/// said storage after tolk_compile returns. -/// -/// If the callback is not supported, *o_contents and *o_error must be set to NULL. -typedef void (*CStyleReadFileCallback)(char const* _kind, char const* _data, char** o_contents, char** o_error); +/// Callback used to retrieve file contents from a "not file system". See tolk-js for implementation. +/// The callback must fill either destContents or destError. +/// The implementor must use malloc() for them and use free() after tolk_compile returns. +typedef void (*CStyleReadFileCallback)(int kind, char const* data, char** destContents, char** destError); tolk::ReadCallback::Callback wrapReadCallback(CStyleReadFileCallback _readCallback) { tolk::ReadCallback::Callback readCallback; if (_readCallback) { - readCallback = [=](tolk::ReadCallback::Kind _kind, char const* _data) -> td::Result { - char* contents_c = nullptr; - char* error_c = nullptr; - _readCallback(tolk::ReadCallback::kindString(_kind).data(), _data, &contents_c, &error_c); - if (!contents_c && !error_c) { + readCallback = [=](tolk::ReadCallback::Kind kind, char const* data) -> td::Result { + char* destContents = nullptr; + char* destError = nullptr; + _readCallback(static_cast(kind), data, &destContents, &destError); + if (!destContents && !destError) { return td::Status::Error("Callback not supported"); } - if (contents_c) { - return contents_c; + if (destContents) { + return destContents; } - return td::Status::Error(std::string(error_c)); + return td::Status::Error(std::string(destError)); }; } return readCallback; diff --git a/tolk/tolk.cpp b/tolk/tolk.cpp index 1b8a17a1e..1fce3ebf2 100644 --- a/tolk/tolk.cpp +++ b/tolk/tolk.cpp @@ -24,18 +24,17 @@ from all source files in the program, then also delete it here. */ #include "tolk.h" -#include "srcread.h" #include "lexer.h" #include #include "git.h" #include #include "td/utils/port/path.h" +#include namespace tolk { -int verbosity, indent, opt_level = 2; -bool stack_layout_comments, op_rewrite_comments, program_envelope, asm_preamble; -bool interactive = false; +int verbosity = 0, opt_level = 2; +bool stack_layout_comments = true; GlobalPragma pragma_allow_post_modification{"allow-post-modification"}; GlobalPragma pragma_compute_asm_ltr{"compute-asm-ltr"}; GlobalPragma pragma_remove_unused_functions{"remove-unused-functions"}; @@ -82,23 +81,13 @@ void GlobalPragma::enable(SrcLocation loc) { ". Please, remove this line from your code."); return; } + if (!loc.get_src_file()->is_entrypoint_file()) { + // todo generally it's not true; rework pragmas completely + loc.show_warning(PSTRING() << "#pragma " << name_ << + " should be used in the main file only."); + } enabled_ = true; - locs_.push_back(std::move(loc)); -} - -void GlobalPragma::check_enable_in_libs() { - if (locs_.empty()) { - return; - } - for (const SrcLocation& loc : locs_) { - if (loc.fdescr->is_main) { - return; - } - } - locs_[0].show_warning(PSTRING() << "#pragma " << name_ - << " is enabled in included libraries, it may change the behavior of your code. " - << "Add this #pragma to the main source file to suppress this warning."); } void GlobalPragma::always_on_and_deprecated(const char *deprecated_from_v) { @@ -109,14 +98,19 @@ void GlobalPragma::always_on_and_deprecated(const char *deprecated_from_v) { td::Result fs_read_callback(ReadCallback::Kind kind, const char* query) { switch (kind) { case ReadCallback::Kind::ReadFile: { - std::ifstream ifs{query}; - if (ifs.fail()) { - auto msg = std::string{"cannot open source file `"} + query + "`"; - return td::Status::Error(msg); + struct stat f_stat; + int res = stat(query, &f_stat); + if (res != 0) { + return td::Status::Error(std::string{"cannot open source file: "} + query); } - std::stringstream ss; - ss << ifs.rdbuf(); - return ss.str(); + + size_t file_size = static_cast(f_stat.st_size); + std::string str; + str.resize(file_size); + FILE* f = fopen(query, "r"); + fread(str.data(), file_size, 1, f); + fclose(f); + return std::move(str); } case ReadCallback::Kind::Realpath: { return td::realpath(td::CSlice(query)); @@ -241,7 +235,7 @@ void generate_output_func(SymDef* func_sym, std::ostream &outs, std::ostream &er } else if (func_val->is_inline_ref()) { modifier = "REF"; } - outs << std::string(indent * 2, ' ') << name << " PROC" << modifier << ":<{\n"; + outs << std::string(2, ' ') << name << " PROC" << modifier << ":<{\n"; int mode = 0; if (stack_layout_comments) { mode |= Stack::_StkCmt | Stack::_CptStkCmt; @@ -255,8 +249,8 @@ void generate_output_func(SymDef* func_sym, std::ostream &outs, std::ostream &er if (func_val->is_inline() || func_val->is_inline_ref()) { mode |= Stack::_InlineAny; } - code.generate_code(outs, mode, indent + 1); - outs << std::string(indent * 2, ' ') << "}>\n"; + code.generate_code(outs, mode, 2); + outs << std::string(2, ' ') << "}>\n"; if (verbosity >= 2) { errs << "--------------\n"; } @@ -264,13 +258,9 @@ void generate_output_func(SymDef* func_sym, std::ostream &outs, std::ostream &er } int generate_output(std::ostream &outs, std::ostream &errs) { - if (asm_preamble) { - outs << "\"Asm.fif\" include\n"; - } + outs << "\"Asm.fif\" include\n"; outs << "// automatically generated from " << generated_from << std::endl; - if (program_envelope) { - outs << "PROGRAM{\n"; - } + outs << "PROGRAM{\n"; mark_used_symbols(); for (SymDef* func_sym : glob_func) { SymValCodeFunc* func_val = dynamic_cast(func_sym->value); @@ -283,7 +273,7 @@ int generate_output(std::ostream &outs, std::ostream &errs) { } std::string name = symbols.get_name(func_sym->sym_idx); - outs << std::string(indent * 2, ' '); + outs << std::string(2, ' '); if (func_val->method_id.is_null()) { outs << "DECLPROC " << name << "\n"; } else { @@ -300,7 +290,7 @@ int generate_output(std::ostream &outs, std::ostream &errs) { continue; } std::string name = symbols.get_name(gvar_sym->sym_idx); - outs << std::string(indent * 2, ' ') << "DECLGLOBVAR " << name << "\n"; + outs << std::string(2, ' ') << "DECLGLOBVAR " << name << "\n"; } int errors = 0; for (SymDef* func_sym : glob_func) { @@ -310,76 +300,46 @@ int generate_output(std::ostream &outs, std::ostream &errs) { } try { generate_output_func(func_sym, outs, errs); - } catch (Error& err) { + } catch (ParseError& err) { errs << "cannot generate code for function `" << symbols.get_name(func_sym->sym_idx) << "`:\n" << err << std::endl; ++errors; } } - if (program_envelope) { - outs << "}END>c\n"; - } + outs << "}END>c\n"; if (!boc_output_filename.empty()) { - outs << "2 boc+>B \"" << boc_output_filename << "\" B>file\n"; + outs << "boc>B \"" << boc_output_filename << "\" B>file\n"; } return errors; } -void output_inclusion_stack(std::ostream &errs) { - while (!inclusion_locations.empty()) { - SrcLocation loc = inclusion_locations.top(); - inclusion_locations.pop(); - if (loc.fdescr) { - errs << "note: included from "; - loc.show(errs); - errs << std::endl; - } - } -} - - -int tolk_proceed(const std::vector &sources, std::ostream &outs, std::ostream &errs) { - if (program_envelope && !indent) { - indent = 1; - } - define_keywords(); +int tolk_proceed(const std::string &entrypoint_file_name, std::ostream &outs, std::ostream &errs) { define_builtins(); + lexer_init(); pragma_allow_post_modification.always_on_and_deprecated("0.5.0"); pragma_compute_asm_ltr.always_on_and_deprecated("0.5.0"); - int ok = 0, proc = 0; try { - for (auto src : sources) { - ok += parse_source_file(src.c_str(), {}, true); - proc++; - } - if (interactive) { - generated_from += "stdin "; - ok += parse_source_stdin(); - proc++; - } - if (ok < proc) { + bool ok = parse_source_file(entrypoint_file_name.c_str(), {}); + if (!ok) { throw Fatal{"output code generation omitted because of errors"}; } - if (!proc) { - throw Fatal{"no source files, no output"}; - } - pragma_remove_unused_functions.check_enable_in_libs(); + + // todo #ifdef TOLK_PROFILING + comment + // lexer_measure_performance(all_src_files.get_all_files()); + return generate_output(outs, errs); } catch (Fatal& fatal) { errs << "fatal: " << fatal << std::endl; - output_inclusion_stack(errs); return 2; - } catch (Error& error) { + } catch (ParseError& error) { errs << error << std::endl; - output_inclusion_stack(errs); return 2; } catch (UnifyError& unif_err) { errs << "fatal: "; unif_err.print_message(errs); errs << std::endl; - output_inclusion_stack(errs); return 2; } diff --git a/tolk/tolk.h b/tolk/tolk.h index 9086620bc..27e26f050 100644 --- a/tolk/tolk.h +++ b/tolk/tolk.h @@ -15,6 +15,7 @@ along with TON Blockchain Library. If not, see . */ #pragma once +#include #include #include #include @@ -26,7 +27,7 @@ #include "common/refcnt.hpp" #include "common/bigint.hpp" #include "common/refint.h" -#include "srcread.h" +#include "src-file.h" #include "lexer.h" #include "symtable.h" #include "td/utils/Status.h" @@ -45,104 +46,6 @@ constexpr int optimize_depth = 20; const std::string tolk_version{"0.4.5"}; -enum Keyword { - _Eof = -1, - _Ident = 0, - _Number, - _Special, - _String, - _Return = 0x80, - _Var, - _Repeat, - _Do, - _While, - _Until, - _Try, - _Catch, - _If, - _Ifnot, - _Then, - _Else, - _Elseif, - _Elseifnot, - _Eq, - _Neq, - _Leq, - _Geq, - _Spaceship, - _Lshift, - _Rshift, - _RshiftR, - _RshiftC, - _DivR, - _DivC, - _ModR, - _ModC, - _DivMod, - _PlusLet, - _MinusLet, - _TimesLet, - _DivLet, - _DivRLet, - _DivCLet, - _ModLet, - _ModRLet, - _ModCLet, - _LshiftLet, - _RshiftLet, - _RshiftRLet, - _RshiftCLet, - _AndLet, - _OrLet, - _XorLet, - _Int, - _Cell, - _Slice, - _Builder, - _Cont, - _Tuple, - _Type, - _Mapsto, - _Forall, - _Asm, - _Impure, - _Pure, - _Global, - _Extern, - _Inline, - _InlineRef, - _Builtin, - _AutoApply, - _MethodId, - _Get, - _Operator, - _Infix, - _Infixl, - _Infixr, - _Const, - _PragmaHashtag, - _IncludeHashtag -}; - -void define_keywords(); - -class IdSc { - int cls; - - public: - enum { undef = 0, dotid = 1, tildeid = 2 }; - IdSc(int _cls = undef) : cls(_cls) { - } - operator int() { - return cls; - } -}; - -// symbol subclass: -// 1 = begins with . (a const method) -// 2 = begins with ~ (a non-const method) -// 0 = else - /* * * TYPE EXPRESSIONS @@ -152,13 +55,13 @@ class IdSc { struct TypeExpr { enum te_type { te_Unknown, te_Var, te_Indirect, te_Atomic, te_Tensor, te_Tuple, te_Map, te_ForAll } constr; enum AtomicType { - _Int = Keyword::_Int, - _Cell = Keyword::_Cell, - _Slice = Keyword::_Slice, - _Builder = Keyword::_Builder, - _Cont = Keyword::_Cont, - _Tuple = Keyword::_Tuple, - _Type = Keyword::_Type + _Int = tok_int, + _Cell = tok_cell, + _Slice = tok_slice, + _Builder = tok_builder, + _Cont = tok_cont, + _Tuple = tok_tuple, + _Type = tok_type }; int value; int minw, maxw; @@ -279,14 +182,18 @@ struct TypeExpr { std::ostream& operator<<(std::ostream& os, TypeExpr* type_expr); -struct UnifyError { +struct UnifyError : std::exception { TypeExpr* te1; TypeExpr* te2; std::string msg; - UnifyError(TypeExpr* _te1, TypeExpr* _te2, std::string _msg = "") : te1(_te1), te2(_te2), msg(_msg) { + + UnifyError(TypeExpr* _te1, TypeExpr* _te2, std::string _msg = "") : te1(_te1), te2(_te2), msg(std::move(_msg)) { } + void print_message(std::ostream& os) const; - std::string message() const; + const char* what() const noexcept override { + return msg.c_str(); + } }; std::ostream& operator<<(std::ostream& os, const UnifyError& ue); @@ -310,18 +217,13 @@ struct TmpVar { int cls; sym_idx_t name; int coord; - std::unique_ptr where; - std::vector> on_modification; - bool undefined = false; - TmpVar(var_idx_t _idx, int _cls, TypeExpr* _type = 0, SymDef* sym = 0, const SrcLocation* loc = 0); + SrcLocation where; + std::vector> on_modification; + + TmpVar(var_idx_t _idx, int _cls, TypeExpr* _type, SymDef* sym, SrcLocation loc); void show(std::ostream& os, int omit_idx = 0) const; void dump(std::ostream& os) const; - void set_location(const SrcLocation& loc); - std::string to_string() const { - std::ostringstream s; - show(s, 2); - return s.str(); - } + void set_location(SrcLocation loc); }; struct VarDescr { @@ -566,25 +468,25 @@ struct Op { std::unique_ptr block0, block1; td::RefInt256 int_const; std::string str_const; - Op(const SrcLocation& _where = {}, OpKind _cl = _Undef) : cl(_cl), flags(0), fun_ref(nullptr), where(_where) { + Op(SrcLocation _where = {}, OpKind _cl = _Undef) : cl(_cl), flags(0), fun_ref(nullptr), where(_where) { } - Op(const SrcLocation& _where, OpKind _cl, const std::vector& _left) + Op(SrcLocation _where, OpKind _cl, const std::vector& _left) : cl(_cl), flags(0), fun_ref(nullptr), where(_where), left(_left) { } - Op(const SrcLocation& _where, OpKind _cl, std::vector&& _left) + Op(SrcLocation _where, OpKind _cl, std::vector&& _left) : cl(_cl), flags(0), fun_ref(nullptr), where(_where), left(std::move(_left)) { } - Op(const SrcLocation& _where, OpKind _cl, const std::vector& _left, td::RefInt256 _const) + Op(SrcLocation _where, OpKind _cl, const std::vector& _left, td::RefInt256 _const) : cl(_cl), flags(0), fun_ref(nullptr), where(_where), left(_left), int_const(_const) { } - Op(const SrcLocation& _where, OpKind _cl, const std::vector& _left, std::string _const) + Op(SrcLocation _where, OpKind _cl, const std::vector& _left, std::string _const) : cl(_cl), flags(0), fun_ref(nullptr), where(_where), left(_left), str_const(_const) { } - Op(const SrcLocation& _where, OpKind _cl, const std::vector& _left, const std::vector& _right, + Op(SrcLocation _where, OpKind _cl, const std::vector& _left, const std::vector& _right, SymDef* _fun = nullptr) : cl(_cl), flags(0), fun_ref(_fun), where(_where), left(_left), right(_right) { } - Op(const SrcLocation& _where, OpKind _cl, std::vector&& _left, std::vector&& _right, + Op(SrcLocation _where, OpKind _cl, std::vector&& _left, std::vector&& _right, SymDef* _fun = nullptr) : cl(_cl), flags(0), fun_ref(_fun), where(_where), left(std::move(_left)), right(std::move(_right)) { } @@ -700,8 +602,8 @@ struct CodeBlob { return res; } bool import_params(FormalArgList arg_list); - var_idx_t create_var(int cls, TypeExpr* var_type = 0, SymDef* sym = 0, const SrcLocation* loc = 0); - var_idx_t create_tmp_var(TypeExpr* var_type = 0, const SrcLocation* loc = 0) { + var_idx_t create_var(int cls, TypeExpr* var_type, SymDef* sym, SrcLocation loc); + var_idx_t create_tmp_var(TypeExpr* var_type, SrcLocation loc) { return create_var(TmpVar::_Tmp, var_type, nullptr, loc); } int split_vars(bool strict = false); @@ -712,14 +614,14 @@ struct CodeBlob { cur_ops_stack.push(cur_ops); cur_ops = &new_cur_ops; } - void close_blk(const SrcLocation& location) { + void close_blk(SrcLocation location) { *cur_ops = std::make_unique(location, Op::_Nop); } void pop_cur() { cur_ops = cur_ops_stack.top(); cur_ops_stack.pop(); } - void close_pop_cur(const SrcLocation& location) { + void close_pop_cur(SrcLocation location) { close_blk(location); pop_cur(); } @@ -730,7 +632,7 @@ struct CodeBlob { void generate_code(AsmOpList& out_list, int mode = 0); void generate_code(std::ostream& os, int mode = 0, int indent = 0); - void on_var_modification(var_idx_t idx, const SrcLocation& here) const { + void on_var_modification(var_idx_t idx, SrcLocation here) const { for (auto& f : vars.at(idx).on_modification) { f(here); } @@ -746,8 +648,8 @@ struct CodeBlob { struct SymVal : SymValBase { TypeExpr* sym_type; bool auto_apply{false}; - SymVal(int _type, int _idx, TypeExpr* _stype = nullptr) - : SymValBase(_type, _idx), sym_type(_stype) { + SymVal(SymValKind kind, int idx, TypeExpr* sym_type = nullptr) + : SymValBase(kind, idx), sym_type(sym_type) { } ~SymVal() override = default; TypeExpr* get_type() const { @@ -774,9 +676,9 @@ struct SymValFunc : SymVal { #endif ~SymValFunc() override = default; SymValFunc(int val, TypeExpr* _ft, bool marked_as_pure) - : SymVal(_Func, val, _ft), flags(marked_as_pure ? flagMarkedAsPure : 0) {} + : SymVal(SymValKind::_Func, val, _ft), flags(marked_as_pure ? flagMarkedAsPure : 0) {} SymValFunc(int val, TypeExpr* _ft, std::initializer_list _arg_order, std::initializer_list _ret_order, bool marked_as_pure) - : SymVal(_Func, val, _ft), flags(marked_as_pure ? flagMarkedAsPure : 0), arg_order(_arg_order), ret_order(_ret_order) { + : SymVal(SymValKind::_Func, val, _ft), flags(marked_as_pure ? flagMarkedAsPure : 0), arg_order(_arg_order), ret_order(_ret_order) { } const std::vector* get_arg_order() const { @@ -818,7 +720,7 @@ struct SymValCodeFunc : SymValFunc { struct SymValType : SymValBase { TypeExpr* sym_type; - SymValType(int _type, int _idx, TypeExpr* _stype = nullptr) : SymValBase(_type, _idx), sym_type(_stype) { + SymValType(SymValKind kind, int idx, TypeExpr* _stype = nullptr) : SymValBase(kind, idx), sym_type(_stype) { } ~SymValType() override = default; TypeExpr* get_type() const { @@ -834,7 +736,7 @@ struct SymValGlobVar : SymValBase { std::string name; // seeing variable name in debugger makes it much easier to delve into Tolk sources #endif SymValGlobVar(int val, TypeExpr* gvtype, int oidx = 0) - : SymValBase(_GlobVar, val), sym_type(gvtype), out_idx(oidx) { + : SymValBase(SymValKind::_GlobVar, val), sym_type(gvtype), out_idx(oidx) { } ~SymValGlobVar() override = default; TypeExpr* get_type() const { @@ -843,16 +745,16 @@ struct SymValGlobVar : SymValBase { }; struct SymValConst : SymValBase { + enum ConstKind { IntConst, SliceConst }; + td::RefInt256 intval; std::string strval; - Keyword type; + ConstKind kind; SymValConst(int idx, td::RefInt256 value) - : SymValBase(_Const, idx), intval(value) { - type = _Int; + : SymValBase(SymValKind::_Const, idx), intval(value), kind(IntConst) { } SymValConst(int idx, std::string value) - : SymValBase(_Const, idx), strval(value) { - type = _Slice; + : SymValBase(SymValKind::_Const, idx), strval(value), kind(SliceConst) { } ~SymValConst() override = default; td::RefInt256 get_int_value() const { @@ -861,8 +763,8 @@ struct SymValConst : SymValBase { std::string get_str_value() const { return strval; } - Keyword get_type() const { - return type; + ConstKind get_kind() const { + return kind; } }; @@ -882,35 +784,21 @@ class ReadCallback { ReadCallback(ReadCallback const&) = delete; ReadCallback& operator=(ReadCallback const&) = delete; - enum class Kind - { + enum class Kind { + Realpath, ReadFile, - Realpath }; - static std::string kindString(Kind _kind) - { - switch (_kind) - { - case Kind::ReadFile: - return "source"; - case Kind::Realpath: - return "realpath"; - default: - throw ""; // todo ? - } - } - /// File reading or generic query callback. - using Callback = std::function(ReadCallback::Kind, const char*)>; + using Callback = std::function(Kind, const char*)>; }; // defined in parse-tolk.cpp -bool parse_source(std::istream* is, const FileDescr* fdescr); -bool parse_source_file(const char* filename, Lexem lex = {}, bool is_main = false); -bool parse_source_stdin(); +void parse_source(const SrcFile* file); +bool parse_source_file(const char* filename, SrcLocation loc_included_from); extern std::stack inclusion_locations; +extern AllRegisteredSrcFiles all_src_files; /* * @@ -949,7 +837,7 @@ struct Expr { std::vector args; explicit Expr(ExprCls c = _None) : cls(c) { } - Expr(ExprCls c, const SrcLocation& loc) : cls(c), here(loc) { + Expr(ExprCls c, SrcLocation loc) : cls(c), here(loc) { } Expr(ExprCls c, std::vector _args) : cls(c), args(std::move(_args)) { } @@ -990,14 +878,13 @@ struct Expr { bool is_mktuple() const { return cls == _MkTuple; } - void chk_rvalue(const Lexem& lem) const; - void chk_lvalue(const Lexem& lem) const; - void chk_type(const Lexem& lem) const; - bool deduce_type(const Lexem& lem); - void set_location(const SrcLocation& loc) { + void chk_rvalue(const Lexer& lex) const; // todo here and below: strange to pass Lexer + void chk_lvalue(const Lexer& lex) const; + bool deduce_type(const Lexer& lex); + void set_location(SrcLocation loc) { here = loc; } - const SrcLocation& get_location() const { + SrcLocation get_location() const { return here; } int define_new_vars(CodeBlob& code); @@ -1699,11 +1586,11 @@ struct Stack { * */ -typedef std::function&, std::vector&, const SrcLocation)> simple_compile_func_t; +typedef std::function&, std::vector&, SrcLocation)> simple_compile_func_t; typedef std::function&, std::vector&)> compile_func_t; inline simple_compile_func_t make_simple_compile(AsmOp op) { - return [op](std::vector& out, std::vector& in, const SrcLocation&) -> AsmOp { return op; }; + return [op](std::vector& out, std::vector& in, SrcLocation) -> AsmOp { return op; }; } inline compile_func_t make_ext_compile(std::vector&& ops) { @@ -1739,7 +1626,7 @@ struct SymValAsmFunc : SymValFunc { std::initializer_list ret_order = {}, bool marked_as_pure = false) : SymValFunc(-1, ft, arg_order, ret_order, marked_as_pure), ext_compile(std::move(_compile)) { } - bool compile(AsmOpList& dest, std::vector& out, std::vector& in, const SrcLocation& where) const; + bool compile(AsmOpList& dest, std::vector& out, std::vector& in, SrcLocation where) const; }; // defined in builtins.cpp @@ -1753,8 +1640,8 @@ AsmOp push_const(td::RefInt256 x); void define_builtins(); -extern int verbosity, indent, opt_level; -extern bool stack_layout_comments, op_rewrite_comments, program_envelope, asm_preamble, interactive; +extern int verbosity, opt_level; +extern bool stack_layout_comments; extern std::string generated_from, boc_output_filename; extern ReadCallback::Callback read_callback; @@ -1764,6 +1651,7 @@ class GlobalPragma { public: explicit GlobalPragma(std::string name) : name_(std::move(name)) { } + const std::string& name() const { return name_; } @@ -1771,14 +1659,12 @@ class GlobalPragma { return enabled_; } void enable(SrcLocation loc); - void check_enable_in_libs(); void always_on_and_deprecated(const char *deprecated_from_v); private: std::string name_; bool enabled_ = false; const char *deprecated_from_v_ = nullptr; - std::vector locs_; }; extern GlobalPragma pragma_allow_post_modification, pragma_compute_asm_ltr, pragma_remove_unused_functions; @@ -1788,7 +1674,7 @@ extern GlobalPragma pragma_allow_post_modification, pragma_compute_asm_ltr, prag * */ -int tolk_proceed(const std::vector &sources, std::ostream &outs, std::ostream &errs); +int tolk_proceed(const std::string &entrypoint_file_name, std::ostream &outs, std::ostream &errs); } // namespace tolk diff --git a/tolk/unify-types.cpp b/tolk/unify-types.cpp index 848e454aa..04de323df 100644 --- a/tolk/unify-types.cpp +++ b/tolk/unify-types.cpp @@ -354,12 +354,6 @@ std::ostream& operator<<(std::ostream& os, const UnifyError& ue) { return os; } -std::string UnifyError::message() const { - std::ostringstream os; - print_message(os); - return os.str(); -} - void check_width_compat(TypeExpr* te1, TypeExpr* te2) { if (te1->minw > te2->maxw || te2->minw > te1->maxw) { std::ostringstream os{"cannot unify types of widths ", std::ios_base::ate}; From 6c30e5a7eb237a711df49daa38bd4a4031474aa1 Mon Sep 17 00:00:00 2001 From: tolk-vm Date: Thu, 31 Oct 2024 11:02:01 +0400 Subject: [PATCH 05/12] [Tolk] Embedded stdlib.tolk, CompilerState, strict includes Several related changes: - stdlib.tolk is embedded into a distribution (deb package or tolk-js), the user won't have to download it and store as a project file; it's an important step to maintain correct language versioning - stdlib.tolk is auto-included, that's why all its functions are available out of the box - strict includes: you can't use symbol `f` from another file unless you've #include'd this file - drop all C++ global variables holding compilation state, merge them into a single struct CompilerState located at compiler-state.h; for instance, stdlib filename is also there --- crypto/smartcont/stdlib.tolk | 3 + tolk/CMakeLists.txt | 7 ++ tolk/abscode.cpp | 3 +- tolk/analyzer.cpp | 3 +- tolk/builtins.cpp | 12 +- tolk/codegen.cpp | 15 +-- tolk/compiler-state.cpp | 47 ++++++++ tolk/compiler-state.h | 93 ++++++++++++++ tolk/gen-abscode.cpp | 10 +- tolk/lexer.cpp | 7 +- tolk/lexer.h | 1 + tolk/optimize.cpp | 4 +- tolk/parse-tolk.cpp | 178 +++++++++++++++------------ tolk/src-file.cpp | 22 ++-- tolk/src-file.h | 62 ++++++---- tolk/symtable.cpp | 73 +++++------ tolk/symtable.h | 49 +------- tolk/tolk-main.cpp | 128 ++++++++++++++++---- tolk/tolk-wasm.cpp | 35 +++--- tolk/tolk.cpp | 228 ++++++++++++++--------------------- tolk/tolk.h | 96 +++------------ 21 files changed, 587 insertions(+), 489 deletions(-) create mode 100644 tolk/compiler-state.cpp create mode 100644 tolk/compiler-state.h diff --git a/crypto/smartcont/stdlib.tolk b/crypto/smartcont/stdlib.tolk index 8545601bc..b3dfbee06 100644 --- a/crypto/smartcont/stdlib.tolk +++ b/crypto/smartcont/stdlib.tolk @@ -1,6 +1,7 @@ // Standard library for Tolk // (initially copied from stdlib.fc) // +#pragma version >=0.5; /* This file is part of TON Tolk Standard Library. @@ -405,6 +406,7 @@ cell preload_dict(slice s) pure asm "PLDDICT"; /// Loads a dictionary as [load_dict], but returns only the remainder of the slice. slice skip_dict(slice s) pure asm "SKIPDICT"; +(slice, ()) ~skip_dict(slice s) pure asm "SKIPDICT"; /// Loads (Maybe ^Cell) from `slice` [s]. /// In other words loads 1 bit and if it is true @@ -661,6 +663,7 @@ int dict_empty?(cell c) pure asm "DICTEMPTY"; cell config_param(int x) pure asm "CONFIGOPTPARAM"; /// Checks whether c is a null. Note, that Tolk also has polymorphic null? built-in. int cell_null?(cell c) pure asm "ISNULL"; +int builder_null?(builder b) asm "ISNULL"; /// Creates an output action which would reserve exactly amount nanotoncoins (if mode = 0), at most amount nanotoncoins (if mode = 2), or all but amount nanotoncoins (if mode = 1 or mode = 3), from the remaining balance of the account. It is roughly equivalent to creating an outbound message carrying amount nanotoncoins (or b − amount nanotoncoins, where b is the remaining balance) to oneself, so that the subsequent output actions would not be able to spend more money than the remainder. Bit +2 in mode means that the external action does not fail if the specified amount cannot be reserved; instead, all remaining balance is reserved. Bit +8 in mode means `amount <- -amount` before performing any further actions. Bit +4 in mode means that amount is increased by the original balance of the current account (before the compute phase), including all extra currencies, before performing any other checks and actions. Currently, amount must be a non-negative integer, and mode must be in the range 0..15. () raw_reserve(int amount, int mode) asm "RAWRESERVE"; diff --git a/tolk/CMakeLists.txt b/tolk/CMakeLists.txt index 820367043..a47c76147 100644 --- a/tolk/CMakeLists.txt +++ b/tolk/CMakeLists.txt @@ -4,6 +4,7 @@ set(TOLK_SOURCE src-file.cpp lexer.cpp symtable.cpp + compiler-state.cpp unify-types.cpp parse-tolk.cpp abscode.cpp @@ -28,6 +29,12 @@ if (${TOLK_DEBUG}) # -DTOLK_DEBUG=1 in CMake options => #define TOLK_DEBUG (for target_compile_definitions(tolk PRIVATE TOLK_DEBUG=1) endif() +if (NOT USE_EMSCRIPTEN) + get_filename_component(STDLIB_TOLK_IF_BUILD_FROM_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/../crypto/smartcont/stdlib.tolk" REALPATH) + target_compile_definitions(tolk PRIVATE STDLIB_TOLK_IF_BUILD_FROM_SOURCES="${STDLIB_TOLK_IF_BUILD_FROM_SOURCES}") +endif() + + if (USE_EMSCRIPTEN) add_executable(tolkfiftlib tolk-wasm.cpp ${TOLK_SOURCE}) target_include_directories(tolkfiftlib PUBLIC $) diff --git a/tolk/abscode.cpp b/tolk/abscode.cpp index 8cf1f597f..c028a5314 100644 --- a/tolk/abscode.cpp +++ b/tolk/abscode.cpp @@ -15,6 +15,7 @@ along with TON Blockchain Library. If not, see . */ #include "tolk.h" +#include "compiler-state.h" namespace tolk { @@ -59,7 +60,7 @@ void TmpVar::dump(std::ostream& os) const { void TmpVar::show(std::ostream& os, int omit_idx) const { if (cls & _Named) { - os << symbols.get_name(name); + os << G.symbols.get_name(name); if (omit_idx && (omit_idx >= 2 || (cls & _UniqueName))) { return; } diff --git a/tolk/analyzer.cpp b/tolk/analyzer.cpp index e38ba1bb0..91b66ae91 100644 --- a/tolk/analyzer.cpp +++ b/tolk/analyzer.cpp @@ -15,6 +15,7 @@ along with TON Blockchain Library. If not, see . */ #include "tolk.h" +#include "compiler-state.h" namespace tolk { @@ -768,7 +769,7 @@ VarDescrList Op::fwd_analyze(VarDescrList values) { tolk_assert(left.size() == right.size()); for (std::size_t i = 0; i < right.size(); i++) { const VarDescr* ov = values[right[i]]; - if (!ov && verbosity >= 5) { + if (!ov && G.is_verbosity(5)) { std::cerr << "FATAL: error in assignment at right component #" << i << " (no value for _" << right[i] << ")" << std::endl; for (auto x : left) { diff --git a/tolk/builtins.cpp b/tolk/builtins.cpp index 355c21df2..439228f4b 100644 --- a/tolk/builtins.cpp +++ b/tolk/builtins.cpp @@ -15,6 +15,7 @@ along with TON Blockchain Library. If not, see . */ #include "tolk.h" +#include "compiler-state.h" namespace tolk { using namespace std::literals::string_literals; @@ -25,18 +26,11 @@ using namespace std::literals::string_literals; * */ -int glob_func_cnt, undef_func_cnt, glob_var_cnt, const_cnt; -std::vector glob_func, glob_vars, glob_get_methods; -std::set prohibited_var_names; - SymDef* define_builtin_func_impl(const std::string& name, SymValAsmFunc* func_val) { if (name.back() == '_') { - prohibited_var_names.insert(name); - } - sym_idx_t name_idx = symbols.lookup(name, 1); - if (symbols.is_keyword(name_idx)) { - std::cerr << "fatal: global function `" << name << "` already defined as a keyword" << std::endl; + G.prohibited_var_names.insert(name); } + sym_idx_t name_idx = G.symbols.lookup(name, 1); SymDef* def = define_global_symbol(name_idx, true); if (!def) { std::cerr << "fatal: global function `" << name << "` already defined" << std::endl; diff --git a/tolk/codegen.cpp b/tolk/codegen.cpp index 64d8fdf05..a5d432ee0 100644 --- a/tolk/codegen.cpp +++ b/tolk/codegen.cpp @@ -15,6 +15,7 @@ along with TON Blockchain Library. If not, see . */ #include "tolk.h" +#include "compiler-state.h" namespace tolk { @@ -324,7 +325,7 @@ bool Op::generate_code_step(Stack& stack) { if (!used || disabled()) { return true; } - std::string name = symbols.get_name(fun_ref->sym_idx); + std::string name = G.symbols.get_name(fun_ref->sym_idx); stack.o << AsmOp::Custom(name + " GETGLOB", 0, 1); if (left.size() != 1) { tolk_assert(left.size() <= 15); @@ -359,7 +360,7 @@ bool Op::generate_code_step(Stack& stack) { } func->compile(stack.o, res, args0, where); // compile res := f (args0) } else { - std::string name = symbols.get_name(fun_ref->sym_idx); + std::string name = G.symbols.get_name(fun_ref->sym_idx); stack.o << AsmOp::Custom(name + " CALLDICT", (int)right.size(), (int)left.size()); } stack.o.undent(); @@ -497,7 +498,7 @@ bool Op::generate_code_step(Stack& stack) { } else { auto fv = dynamic_cast(fun_ref->value); // todo can be fv == nullptr? - std::string name = symbols.get_name(fun_ref->sym_idx); + std::string name = G.symbols.get_name(fun_ref->sym_idx); if (fv && (fv->is_inline() || fv->is_inline_ref())) { stack.o << AsmOp::Custom(name + " INLINECALLDICT", (int)right.size(), (int)left.size()); } else if (fv && fv->code && fv->code->require_callxargs) { @@ -534,7 +535,7 @@ bool Op::generate_code_step(Stack& stack) { stack.o << AsmOp::Tuple((int)right.size()); } if (!right.empty()) { - std::string name = symbols.get_name(fun_ref->sym_idx); + std::string name = G.symbols.get_name(fun_ref->sym_idx); stack.o << AsmOp::Custom(name + " SETGLOB", 1, 0); } stack.s.resize(k); @@ -894,14 +895,14 @@ void CodeBlob::generate_code(AsmOpList& out, int mode) { } ops->generate_code_all(stack); stack.apply_wrappers(require_callxargs && (mode & Stack::_InlineAny) ? args : -1); - if (!(mode & Stack::_DisableOpt)) { - optimize_code(out); - } } void CodeBlob::generate_code(std::ostream& os, int mode, int indent) { AsmOpList out_list(indent, &vars); generate_code(out_list, mode); + if (G.settings.optimization_level >= 2) { + optimize_code(out_list); + } out_list.out(os, mode); } diff --git a/tolk/compiler-state.cpp b/tolk/compiler-state.cpp new file mode 100644 index 000000000..a609d88d5 --- /dev/null +++ b/tolk/compiler-state.cpp @@ -0,0 +1,47 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "compiler-state.h" + +namespace tolk { + +std::string tolk_version{"0.5.0"}; + +CompilerState G; // the only mutable global variable in tolk internals + +void GlobalPragma::enable(SrcLocation loc) { + if (deprecated_from_v_) { + loc.show_warning(PSTRING() << "#pragma " << name_ << + " is deprecated since Tolk v" << deprecated_from_v_ << + ". Please, remove this line from your code."); + return; + } + if (!loc.get_src_file()->is_entrypoint_file()) { + // todo generally it's not true; rework pragmas completely + loc.show_warning(PSTRING() << "#pragma " << name_ << + " should be used in the main file only."); + } + + enabled_ = true; +} + +void GlobalPragma::always_on_and_deprecated(const char *deprecated_from_v) { + deprecated_from_v_ = deprecated_from_v; + enabled_ = true; +} + + +} // namespace tolk diff --git a/tolk/compiler-state.h b/tolk/compiler-state.h new file mode 100644 index 000000000..d20f5e953 --- /dev/null +++ b/tolk/compiler-state.h @@ -0,0 +1,93 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#pragma once + +#include "src-file.h" +#include "symtable.h" +#include "td/utils/Status.h" +#include +#include + +namespace tolk { + +extern std::string tolk_version; + +class GlobalPragma { + std::string name_; + bool enabled_ = false; + const char* deprecated_from_v_ = nullptr; + +public: + explicit GlobalPragma(std::string name) : name_(std::move(name)) { } + + const std::string& name() const { return name_; } + + bool enabled() const { return enabled_; } + void enable(SrcLocation loc); + void always_on_and_deprecated(const char* deprecated_from_v); +}; + +// CompilerSettings contains settings that can be passed via cmd line or (partially) wasm envelope. +// They are filled once at start and are immutable since the compilation started. +struct CompilerSettings { + enum class FsReadCallbackKind { Realpath, ReadFile }; + + using FsReadCallback = std::function(FsReadCallbackKind, const char*)>; + + int verbosity = 0; + int optimization_level = 2; + bool stack_layout_comments = true; + + std::string entrypoint_filename; + std::string output_filename; + std::string boc_output_filename; + std::string stdlib_filename; + + FsReadCallback read_callback; +}; + +// CompilerState contains a mutable state that is changed while the compilation is going on. +// It's a "global state" of all compilation. +// Historically, in FunC, this global state was spread along many global C++ variables. +// Now, no global C++ variables except `CompilerState G` are present. +struct CompilerState { + CompilerSettings settings; + + SymTable symbols; + int scope_level = 0; + SymDef* sym_def[SymTable::SIZE_PRIME + 1]{}; + SymDef* global_sym_def[SymTable::SIZE_PRIME + 1]{}; + std::vector> symbol_stack; + std::vector scope_opened_at; + + AllRegisteredSrcFiles all_src_files; + + int glob_func_cnt = 0, glob_var_cnt = 0, const_cnt = 0; + std::vector glob_func, glob_vars, glob_get_methods; + std::set prohibited_var_names; + + std::string generated_from; + GlobalPragma pragma_allow_post_modification{"allow-post-modification"}; + GlobalPragma pragma_compute_asm_ltr{"compute-asm-ltr"}; + GlobalPragma pragma_remove_unused_functions{"remove-unused-functions"}; + + bool is_verbosity(int gt_eq) const { return settings.verbosity >= gt_eq; } +}; + +extern CompilerState G; + +} // namespace tolk diff --git a/tolk/gen-abscode.cpp b/tolk/gen-abscode.cpp index 1c4afa674..ed9374163 100644 --- a/tolk/gen-abscode.cpp +++ b/tolk/gen-abscode.cpp @@ -14,8 +14,8 @@ You should have received a copy of the GNU Lesser General Public License along with TON Blockchain Library. If not, see . */ -#include #include "tolk.h" +#include "compiler-state.h" using namespace std::literals::string_literals; @@ -122,7 +122,7 @@ bool Expr::deduce_type(const Lexer& lex) { } catch (UnifyError& ue) { std::ostringstream os; os << "cannot implicitly assign an expression of type " << args[1]->e_type - << " to a variable or pattern of type " << rhs_type << " in modifying method `" << symbols.get_name(val) + << " to a variable or pattern of type " << rhs_type << " in modifying method `" << G.symbols.get_name(val) << "` : " << ue; lex.error(os.str()); } @@ -197,14 +197,14 @@ int Expr::predefine_vars() { case _Var: if (!sym) { tolk_assert(val < 0 && here.is_defined()); - if (prohibited_var_names.count(symbols.get_name(~val))) { + if (G.prohibited_var_names.count(G.symbols.get_name(~val))) { throw ParseError{ - here, PSTRING() << "symbol `" << symbols.get_name(~val) << "` cannot be redefined as a variable"}; + here, PSTRING() << "symbol `" << G.symbols.get_name(~val) << "` cannot be redefined as a variable"}; } sym = define_symbol(~val, false, here); // std::cerr << "predefining variable " << symbols.get_name(~val) << std::endl; if (!sym) { - throw ParseError{here, std::string{"redefined variable `"} + symbols.get_name(~val) + "`"}; + throw ParseError{here, std::string{"redefined variable `"} + G.symbols.get_name(~val) + "`"}; } sym->value = new SymVal{SymValKind::_Var, -1, e_type}; return 1; diff --git a/tolk/lexer.cpp b/tolk/lexer.cpp index 6d066d294..1a5fe5694 100644 --- a/tolk/lexer.cpp +++ b/tolk/lexer.cpp @@ -15,6 +15,7 @@ along with TON Blockchain Library. If not, see . */ #include "lexer.h" +#include "compiler-state.h" #include "symtable.h" #include @@ -426,7 +427,7 @@ struct ChunkIdentifierOrKeyword final : ChunkLexerBase { if (TokenType kw_tok = maybe_keyword(str_val)) { lex->add_token(kw_tok, str_val); } else { - symbols.lookup_add(static_cast(str_val)); + G.symbols.lookup_add(static_cast(str_val)); lex->add_token(tok_identifier, str_val); } return true; @@ -452,7 +453,7 @@ struct ChunkIdentifierInBackticks final : ChunkLexerBase { std::string_view str_val(str_begin + 1, lex->c_str() - str_begin - 1); lex->skip_chars(1); - symbols.lookup_add(static_cast(str_val)); + G.symbols.lookup_add(static_cast(str_val)); lex->add_token(tok_identifier, str_val); return true; } @@ -611,7 +612,7 @@ void Lexer::next_special(TokenType parse_next_as, const char* str_expected) { int Lexer::cur_sym_idx() const { assert(tok() == tok_identifier); - return symbols.lookup_add(cur_str_std_string()); + return G.symbols.lookup_add(cur_str_std_string()); } void Lexer::error(const std::string& err_msg) const { diff --git a/tolk/lexer.h b/tolk/lexer.h index e0fa76065..04fc025d0 100644 --- a/tolk/lexer.h +++ b/tolk/lexer.h @@ -208,6 +208,7 @@ class Lexer { std::string_view cur_str() const { return cur_token.str_val; } std::string cur_str_std_string() const { return static_cast(cur_token.str_val); } SrcLocation cur_location() const { return location; } + const SrcFile* cur_file() const { return file; } int cur_sym_idx() const; void next(); diff --git a/tolk/optimize.cpp b/tolk/optimize.cpp index cf7f460f8..76d756386 100644 --- a/tolk/optimize.cpp +++ b/tolk/optimize.cpp @@ -31,7 +31,7 @@ void Optimizer::set_code(AsmOpConsList code) { void Optimizer::unpack() { int i = 0, j = 0; - for (AsmOpCons *p = code_.get(); p && i < n; p = p->cdr.get(), ++j) { + for (AsmOpCons *p = code_.get(); p && i < optimize_depth; p = p->cdr.get(), ++j) { if (p->car->is_very_custom()) { break; } @@ -59,7 +59,7 @@ void Optimizer::apply() { if (!p_ && !q_) { return; } - tolk_assert(p_ > 0 && p_ <= l_ && q_ >= 0 && q_ <= n && l_ <= n); + tolk_assert(p_ > 0 && p_ <= l_ && q_ >= 0 && q_ <= optimize_depth && l_ <= optimize_depth); for (int i = p_; i < l_; i++) { tolk_assert(op_[i]); op_cons_[i]->car = std::move(op_[i]); diff --git a/tolk/parse-tolk.cpp b/tolk/parse-tolk.cpp index c28501d43..0b41152d4 100644 --- a/tolk/parse-tolk.cpp +++ b/tolk/parse-tolk.cpp @@ -16,6 +16,7 @@ */ #include "tolk.h" #include "platform-utils.h" +#include "compiler-state.h" #include "td/utils/crypto.h" #include "common/refint.h" #include "openssl/digest.hpp" @@ -26,15 +27,15 @@ namespace tolk { using namespace std::literals::string_literals; inline bool is_dot_ident(sym_idx_t idx) { - return symbols.get_subclass(idx) == SymbolSubclass::dot_identifier; + return G.symbols.get_subclass(idx) == SymbolSubclass::dot_identifier; } inline bool is_tilde_ident(sym_idx_t idx) { - return symbols.get_subclass(idx) == SymbolSubclass::tilde_identifier; + return G.symbols.get_subclass(idx) == SymbolSubclass::tilde_identifier; } inline bool is_special_ident(sym_idx_t idx) { - return symbols.get_subclass(idx) != SymbolSubclass::undef; + return G.symbols.get_subclass(idx) != SymbolSubclass::undef; } // given Expr::_Apply (a function call / a variable call), determine whether it's <, or >, or similar @@ -80,7 +81,7 @@ static bool is_add_or_sub_binary_op(const Expr* e_apply) { } static inline std::string get_builtin_operator_name(sym_idx_t sym_builtin) { - std::string underscored = symbols.get_name(sym_builtin); + std::string underscored = G.symbols.get_name(sym_builtin); return underscored.substr(1, underscored.size() - 2); } @@ -256,9 +257,9 @@ FormalArg parse_formal_arg(Lexer& lex, int fa_idx) { } lex.check(tok_identifier, "formal parameter name"); loc = lex.cur_location(); - if (prohibited_var_names.count(symbols.get_name(lex.cur_sym_idx()))) { + if (G.prohibited_var_names.count(G.symbols.get_name(lex.cur_sym_idx()))) { throw ParseError{ - loc, PSTRING() << "symbol `" << symbols.get_name(lex.cur_sym_idx()) << "` cannot be redefined as a variable"}; + loc, PSTRING() << "symbol `" << G.symbols.get_name(lex.cur_sym_idx()) << "` cannot be redefined as a variable"}; } SymDef* new_sym_def = define_symbol(lex.cur_sym_idx(), true, loc); if (!new_sym_def) { @@ -311,16 +312,15 @@ void parse_global_var_decl(Lexer& lex) { lex.error(os.str()); } } else { - sym_def->value = new SymValGlobVar{glob_var_cnt++, var_type}; + sym_def->value = new SymValGlobVar{G.glob_var_cnt++, var_type}; #ifdef TOLK_DEBUG dynamic_cast(sym_def->value)->name = lex.cur_str(); #endif - glob_vars.push_back(sym_def); + G.glob_vars.push_back(sym_def); } lex.next(); } -extern int const_cnt; Expr* parse_expr(Lexer& lex, CodeBlob& code, bool nv = false); void parse_const_decl(Lexer& lex) { @@ -360,9 +360,9 @@ void parse_const_decl(Lexer& lex) { } SymValConst* new_value = nullptr; if (x->cls == Expr::_Const) { // Integer constant - new_value = new SymValConst{const_cnt++, x->intval}; + new_value = new SymValConst{G.const_cnt++, x->intval}; } else if (x->cls == Expr::_SliceConst) { // Slice constant (string) - new_value = new SymValConst{const_cnt++, x->strval}; + new_value = new SymValConst{G.const_cnt++, x->strval}; } else if (x->cls == Expr::_Apply) { // even "1 + 2" is Expr::_Apply (it applies `_+_`) code.emplace_back(loc, Op::_Import, std::vector()); auto tmp_vars = x->pre_compile(code); @@ -390,7 +390,7 @@ void parse_const_decl(Lexer& lex) { if (op.origin.is_null() || !op.origin->is_valid()) { lex.error("precompiled expression did not result in a valid integer constant"); } - new_value = new SymValConst{const_cnt++, op.origin}; + new_value = new SymValConst{G.const_cnt++, op.origin}; } else { lex.error("integer or slice literal or constant expected"); } @@ -453,28 +453,28 @@ void parse_global_var_decls(Lexer& lex) { } SymValCodeFunc* make_new_glob_func(SymDef* func_sym, TypeExpr* func_type, bool marked_as_pure) { - SymValCodeFunc* res = new SymValCodeFunc{glob_func_cnt, func_type, marked_as_pure}; + SymValCodeFunc* res = new SymValCodeFunc{G.glob_func_cnt, func_type, marked_as_pure}; #ifdef TOLK_DEBUG res->name = func_sym->name(); #endif func_sym->value = res; - glob_func.push_back(func_sym); - glob_func_cnt++; + G.glob_func.push_back(func_sym); + G.glob_func_cnt++; return res; } bool check_global_func(const Lexer& lex, sym_idx_t func_name) { SymDef* def = lookup_symbol(func_name); if (!def) { - lex.error("undefined symbol `" + symbols.get_name(func_name) + "`"); + lex.error("undefined symbol `" + G.symbols.get_name(func_name) + "`"); return false; } SymVal* val = dynamic_cast(def->value); if (!val) { - lex.error(std::string{"symbol `"} + symbols.get_name(func_name) + "` has no value and no type"); + lex.error(std::string{"symbol `"} + G.symbols.get_name(func_name) + "` has no value and no type"); return false; } else if (!val->get_type()) { - lex.error(std::string{"symbol `"} + symbols.get_name(func_name) + "` has no type, possibly not a function"); + lex.error(std::string{"symbol `"} + G.symbols.get_name(func_name) + "` has no type, possibly not a function"); return false; } else { return true; @@ -497,6 +497,21 @@ Expr* make_func_apply(Expr* fun, Expr* x) { return res; } +void check_import_exists_when_using_sym(const Lexer& lex, const SymDef* used_sym) { + if (!lex.cur_location().is_symbol_from_same_or_builtin_file(used_sym->loc)) { + const SrcFile* declared_in = used_sym->loc.get_src_file(); + bool has_import = false; + for (const SrcFile::ImportStatement& import_stmt : lex.cur_file()->imports) { + if (import_stmt.imported_file == declared_in) { + has_import = true; + } + } + if (!has_import) { + lex.error("Using a non-imported symbol `" + used_sym->name() + "`. Forgot to import \"" + declared_in->rel_filename + "\"?"); + } + } +} + // parse ( E { , E } ) | () | [ E { , E } ] | [] | id | num | _ Expr* parse_expr100(Lexer& lex, CodeBlob& code, bool nv) { if (lex.tok() == tok_oppar || lex.tok() == tok_opbracket) { @@ -672,6 +687,7 @@ Expr* parse_expr100(Lexer& lex, CodeBlob& code, bool nv) { return res; } if (sym && dynamic_cast(sym->value)) { + check_import_exists_when_using_sym(lex, sym); auto val = dynamic_cast(sym->value); Expr* res = new Expr{Expr::_GlobVar, lex.cur_location()}; res->e_type = val->get_type(); @@ -681,6 +697,7 @@ Expr* parse_expr100(Lexer& lex, CodeBlob& code, bool nv) { return res; } if (sym && dynamic_cast(sym->value)) { + check_import_exists_when_using_sym(lex, sym); auto val = dynamic_cast(sym->value); Expr* res = new Expr{Expr::_None, lex.cur_location()}; res->flags = Expr::_IsRvalue; @@ -700,6 +717,9 @@ Expr* parse_expr100(Lexer& lex, CodeBlob& code, bool nv) { lex.next(); return res; } + if (sym && dynamic_cast(sym->value)) { + check_import_exists_when_using_sym(lex, sym); + } bool auto_apply = false; Expr* res = new Expr{Expr::_Var, lex.cur_location()}; if (nv) { @@ -796,7 +816,7 @@ Expr* parse_expr80(Lexer& lex, CodeBlob& code, bool nv) { sym_idx_t name = lex.cur_sym_idx(); auto sym = lookup_symbol(name); if (!sym || !dynamic_cast(sym->value)) { - auto name1 = symbols.lookup(lex.cur_str().substr(1)); + auto name1 = G.symbols.lookup(lex.cur_str().substr(1)); if (name1) { auto sym1 = lookup_symbol(name1); if (sym1 && dynamic_cast(sym1->value)) { @@ -806,8 +826,8 @@ Expr* parse_expr80(Lexer& lex, CodeBlob& code, bool nv) { } } check_global_func(lex, name); - if (verbosity >= 2) { - std::cerr << "using symbol `" << symbols.get_name(name) << "` for method call of " << lex.cur_str() << std::endl; + if (G.is_verbosity(2)) { + std::cerr << "using symbol `" << G.symbols.get_name(name) << "` for method call of " << lex.cur_str() << std::endl; } sym = lookup_symbol(name); SymValFunc* val = sym ? dynamic_cast(sym->value) : nullptr; @@ -842,7 +862,7 @@ Expr* parse_expr80(Lexer& lex, CodeBlob& code, bool nv) { Expr* parse_expr75(Lexer& lex, CodeBlob& code, bool nv) { if (lex.tok() == tok_bitwise_not || lex.tok() == tok_minus || lex.tok() == tok_plus) { TokenType t = lex.tok(); - sym_idx_t name = symbols.lookup_add(lex.cur_str_std_string() + "_"); + sym_idx_t name = G.symbols.lookup_add(lex.cur_str_std_string() + "_"); check_global_func(lex, name); SrcLocation loc{lex.cur_location()}; lex.next(); @@ -886,7 +906,7 @@ Expr* parse_expr30(Lexer& lex, CodeBlob& code, bool nv) { lex.tok() == tok_divR || lex.tok() == tok_modC || lex.tok() == tok_modR) { res->chk_rvalue(lex); TokenType t = lex.tok(); - sym_idx_t name = symbols.lookup_add(std::string{"_"} + lex.cur_str_std_string() + "_"); + sym_idx_t name = G.symbols.lookup_add(std::string{"_"} + lex.cur_str_std_string() + "_"); SrcLocation loc{lex.cur_location()}; check_global_func(lex, name); lex.next(); @@ -907,7 +927,7 @@ Expr* parse_expr20(Lexer& lex, CodeBlob& code, bool nv) { while (lex.tok() == tok_minus || lex.tok() == tok_plus) { res->chk_rvalue(lex); TokenType t = lex.tok(); - sym_idx_t name = symbols.lookup_add(std::string{"_"} + lex.cur_str_std_string() + "_"); + sym_idx_t name = G.symbols.lookup_add(std::string{"_"} + lex.cur_str_std_string() + "_"); check_global_func(lex, name); SrcLocation loc{lex.cur_location()}; lex.next(); @@ -928,7 +948,7 @@ Expr* parse_expr17(Lexer& lex, CodeBlob& code, bool nv) { while (lex.tok() == tok_lshift || lex.tok() == tok_rshift || lex.tok() == tok_rshiftC || lex.tok() == tok_rshiftR) { res->chk_rvalue(lex); TokenType t = lex.tok(); - sym_idx_t name = symbols.lookup_add(std::string{"_"} + lex.cur_str_std_string() + "_"); + sym_idx_t name = G.symbols.lookup_add(std::string{"_"} + lex.cur_str_std_string() + "_"); check_global_func(lex, name); SrcLocation loc{lex.cur_location()}; lex.next(); @@ -951,7 +971,7 @@ Expr* parse_expr15(Lexer& lex, CodeBlob& code, bool nv) { lex.tok() == tok_neq || lex.tok() == tok_spaceship) { res->chk_rvalue(lex); TokenType t = lex.tok(); - sym_idx_t name = symbols.lookup_add(std::string{"_"} + lex.cur_str_std_string() + "_"); + sym_idx_t name = G.symbols.lookup_add(std::string{"_"} + lex.cur_str_std_string() + "_"); check_global_func(lex, name); SrcLocation loc{lex.cur_location()}; lex.next(); @@ -972,7 +992,7 @@ Expr* parse_expr14(Lexer& lex, CodeBlob& code, bool nv) { while (lex.tok() == tok_bitwise_and || lex.tok() == tok_bitwise_or || lex.tok() == tok_bitwise_xor) { res->chk_rvalue(lex); TokenType t = lex.tok(); - sym_idx_t name = symbols.lookup_add(std::string{"_"} + lex.cur_str_std_string() + "_"); + sym_idx_t name = G.symbols.lookup_add(std::string{"_"} + lex.cur_str_std_string() + "_"); check_global_func(lex, name); SrcLocation loc{lex.cur_location()}; lex.next(); @@ -1019,7 +1039,7 @@ Expr* parse_expr10(Lexer& lex, CodeBlob& code, bool nv) { t == tok_set_rshiftR || t == tok_set_bitwise_and || t == tok_set_bitwise_or || t == tok_set_bitwise_xor) { x->chk_lvalue(lex); x->chk_rvalue(lex); - sym_idx_t name = symbols.lookup_add(std::string{"^_"} + lex.cur_str_std_string() + "_"); + sym_idx_t name = G.symbols.lookup_add(std::string{"^_"} + lex.cur_str_std_string() + "_"); check_global_func(lex, name); SrcLocation loc{lex.cur_location()}; lex.next(); @@ -1464,8 +1484,8 @@ std::vector parse_type_var_list(Lexer& lex) { lex.error("free type identifier expected"); } SrcLocation loc = lex.cur_location(); - if (prohibited_var_names.count(symbols.get_name(lex.cur_sym_idx()))) { - throw ParseError{loc, PSTRING() << "symbol `" << symbols.get_name(lex.cur_sym_idx()) + if (G.prohibited_var_names.count(G.symbols.get_name(lex.cur_sym_idx()))) { + throw ParseError{loc, PSTRING() << "symbol `" << G.symbols.get_name(lex.cur_sym_idx()) << "` cannot be redefined as a variable"}; } SymDef* new_sym_def = define_symbol(lex.cur_sym_idx(), true, loc); @@ -1582,7 +1602,7 @@ void detect_if_function_just_wraps_another(SymValCodeFunc* v_current, const td:: // ok, f_current is a wrapper v_current->flags |= SymValFunc::flagWrapsAnotherF; - if (verbosity >= 2) { + if (G.is_verbosity(2)) { std::cerr << function_name << " -> " << f_called->name() << std::endl; } } @@ -1658,7 +1678,7 @@ void parse_func_def(Lexer& lex) { if (is_get_method) { tolk_assert(method_id.is_null()); method_id = calculate_method_id_by_func_name(func_name); - for (const SymDef* other : glob_get_methods) { + for (const SymDef* other : G.glob_get_methods) { if (!td::cmp(dynamic_cast(other->value)->method_id, method_id)) { lex.error(PSTRING() << "GET methods hash collision: `" << other->name() << "` and `" + func_name + "` produce the same hash. Consider renaming one of these functions."); } @@ -1667,7 +1687,7 @@ void parse_func_def(Lexer& lex) { TypeExpr* func_type = TypeExpr::new_map(extract_total_arg_type(arg_list), ret_type); func_type = compute_type_closure(func_type, type_vars); if (lex.tok() == tok_builtin) { - const SymDef* builtin_func = lookup_symbol(symbols.lookup(func_name)); + const SymDef* builtin_func = lookup_symbol(G.symbols.lookup(func_name)); const SymValFunc* func_val = builtin_func ? dynamic_cast(builtin_func->value) : nullptr; if (!func_val || !func_val->is_builtin()) { lex.error("`builtin` used for non-builtin function"); @@ -1686,7 +1706,7 @@ void parse_func_def(Lexer& lex) { if (lex.tok() != tok_semicolon && lex.tok() != tok_opbrace && lex.tok() != tok_asm) { lex.expect(tok_opbrace, "function body block"); } - if (verbosity >= 1) { + if (G.is_verbosity(1)) { std::cerr << "function " << func_name << " : " << func_type << std::endl; } SymDef* func_sym = define_global_symbol(func_sym_idx, 0, loc); @@ -1783,9 +1803,9 @@ void parse_func_def(Lexer& lex) { lex.error("cannot set unknown function `" + func_name + "` as a get method"); } val->flags |= SymValFunc::flagGetMethod; - glob_get_methods.push_back(func_sym); + G.glob_get_methods.push_back(func_sym); } - if (verbosity >= 1) { + if (G.is_verbosity(1)) { std::cerr << "new type of function " << func_name << " : " << func_type << std::endl; } close_scope(lex.cur_location()); @@ -1876,12 +1896,12 @@ void parse_pragma(Lexer& lex) { if (!match) { throw ParseError(loc, std::string("Tolk version ") + tolk_version + " does not satisfy this condition"); } - } else if (pragma_name == pragma_allow_post_modification.name()) { - pragma_allow_post_modification.enable(loc); - } else if (pragma_name == pragma_compute_asm_ltr.name()) { - pragma_compute_asm_ltr.enable(loc); - } else if (pragma_name == pragma_remove_unused_functions.name()) { - pragma_remove_unused_functions.enable(loc); + } else if (pragma_name == G.pragma_allow_post_modification.name()) { + G.pragma_allow_post_modification.enable(loc); + } else if (pragma_name == G.pragma_compute_asm_ltr.name()) { + G.pragma_compute_asm_ltr.enable(loc); + } else if (pragma_name == G.pragma_remove_unused_functions.name()) { + G.pragma_remove_unused_functions.enable(loc); } else { lex.error("unknown pragma name"); } @@ -1889,28 +1909,42 @@ void parse_pragma(Lexer& lex) { lex.expect(tok_semicolon, "';'"); } -AllRegisteredSrcFiles all_src_files; -std::string stdlib_filename; - -void parse_include(Lexer& lex, const SrcFile* parent_file) { +void parse_include(Lexer& lex, SrcFile* parent_file) { SrcLocation loc = lex.cur_location(); lex.expect(tok_include, "#include"); if (lex.tok() != tok_string_const) { lex.expect(tok_string_const, "source file name"); } - std::string val = static_cast(lex.cur_str()); - std::string parent_dir = parent_file->rel_filename; - if (size_t rc = parent_dir.rfind('/'); rc != std::string::npos) { - val = parent_dir.substr(0, rc + 1) + val; + std::string rel_filename = lex.cur_str_std_string(); + if (rel_filename.empty()) { + lex.error("imported file name is an empty string"); + } + if (size_t rc = parent_file->rel_filename.rfind('/'); rc != std::string::npos) { + rel_filename = parent_file->rel_filename.substr(0, rc + 1) + rel_filename; } lex.next(); lex.expect(tok_semicolon, "';'"); - if (!parse_source_file(val.c_str(), loc)) { - lex.error(std::string{"failed parsing included file `"} + val + "`"); + + td::Result locate_res = locate_source_file(rel_filename); + if (locate_res.is_error()) { + throw ParseError(loc, "Failed to import: " + locate_res.move_as_error().message().str()); + } + + SrcFile* imported_file = locate_res.move_as_ok(); + parent_file->imports.emplace_back(SrcFile::ImportStatement{imported_file}); + if (!imported_file->was_parsed) { + parse_source_file(imported_file); } } -void parse_source(const SrcFile* file) { +// this function either throws (on any error) or returns nothing meaning success (filling global variables) +void parse_source_file(SrcFile* file) { + if (!file->is_stdlib_file()) { + G.generated_from += file->rel_filename; + G.generated_from += ", "; + } + file->was_parsed = true; + Lexer lex(file); while (!lex.is_eof()) { if (lex.tok() == tok_pragma) { @@ -1927,37 +1961,23 @@ void parse_source(const SrcFile* file) { } } -bool parse_source_file(const char* filename, SrcLocation loc_included_from) { - const SrcFile* included_from = loc_included_from.get_src_file(); - if (!filename || !*filename) { - throw ParseError(loc_included_from, "source file name is an empty string"); +td::Result locate_source_file(const std::string& rel_filename) { + td::Result path = G.settings.read_callback(CompilerSettings::FsReadCallbackKind::Realpath, rel_filename.c_str()); + if (path.is_error()) { + return path.move_as_error(); } - auto path_res = read_callback(ReadCallback::Kind::Realpath, filename); - if (path_res.is_error()) { - auto error = path_res.move_as_error(); - throw ParseError(loc_included_from, error.message().c_str()); - return false; - } - std::string abs_filename = path_res.move_as_ok(); - const SrcFile* file = all_src_files.find_file(abs_filename); - if (file != nullptr) { - if (verbosity >= 2) { - std::cerr << "skipping file " << abs_filename << " because it was already parsed" << '\n'; - } - return true; - } - if (included_from) { - generated_from += std::string{"incl:"}; + std::string abs_filename = path.move_as_ok(); + if (SrcFile* file = G.all_src_files.find_file(abs_filename)) { + return file; // file was already parsed (imported from somewhere else) } - generated_from += std::string{"`"} + filename + "` "; - td::Result text = read_callback(ReadCallback::Kind::ReadFile, abs_filename.c_str()); + + td::Result text = G.settings.read_callback(CompilerSettings::FsReadCallbackKind::ReadFile, abs_filename.c_str()); if (text.is_error()) { - throw ParseError(loc_included_from, text.move_as_error().message().str()); + return text.move_as_error(); } - file = all_src_files.register_file(filename, abs_filename, text.move_as_ok(), included_from); - parse_source(file); - return true; + + return G.all_src_files.register_file(rel_filename, abs_filename, text.move_as_ok()); } } // namespace tolk diff --git a/tolk/src-file.cpp b/tolk/src-file.cpp index 93a92e60f..3384d3d5e 100644 --- a/tolk/src-file.cpp +++ b/tolk/src-file.cpp @@ -15,17 +15,15 @@ along with TON Blockchain Library. If not, see . */ #include "src-file.h" +#include "compiler-state.h" #include namespace tolk { -extern AllRegisteredSrcFiles all_src_files; -extern std::string stdlib_filename; - static_assert(sizeof(SrcLocation) == 8); -const SrcFile* AllRegisteredSrcFiles::find_file(int file_id) const { - for (const SrcFile* file : all_src_files) { +SrcFile* AllRegisteredSrcFiles::find_file(int file_id) const { + for (SrcFile* file : all_src_files) { if (file->file_id == file_id) { return file; } @@ -33,8 +31,8 @@ const SrcFile* AllRegisteredSrcFiles::find_file(int file_id) const { return nullptr; } -const SrcFile* AllRegisteredSrcFiles::find_file(const std::string& abs_filename) const { - for (const SrcFile* file : all_src_files) { +SrcFile* AllRegisteredSrcFiles::find_file(const std::string& abs_filename) const { + for (SrcFile* file : all_src_files) { if (file->abs_filename == abs_filename) { return file; } @@ -42,17 +40,13 @@ const SrcFile* AllRegisteredSrcFiles::find_file(const std::string& abs_filename) return nullptr; } -const SrcFile* AllRegisteredSrcFiles::register_file(const std::string& rel_filename, const std::string& abs_filename, std::string&& text, const SrcFile* included_from) { - SrcFile* created = new SrcFile(++last_file_id, rel_filename, abs_filename, std::move(text), included_from); +SrcFile* AllRegisteredSrcFiles::register_file(const std::string& rel_filename, const std::string& abs_filename, std::string&& text) { + SrcFile* created = new SrcFile(++last_file_id, rel_filename, abs_filename, std::move(text)); all_src_files.push_back(created); return created; } -bool SrcFile::is_entrypoint_file() const { - return file_id == (stdlib_filename.empty() ? 0 : 1); -} - bool SrcFile::is_offset_valid(int offset) const { return offset >= 0 && offset < static_cast(text.size()); } @@ -98,7 +92,7 @@ std::ostream& operator<<(std::ostream& os, const Fatal& fatal) { } const SrcFile* SrcLocation::get_src_file() const { - return all_src_files.find_file(file_id); + return G.all_src_files.find_file(file_id); } void SrcLocation::show(std::ostream& os) const { diff --git a/tolk/src-file.h b/tolk/src-file.h index 0f76d787e..56395571f 100644 --- a/tolk/src-file.h +++ b/tolk/src-file.h @@ -29,23 +29,29 @@ struct SrcFile { std::string_view line_str; }; - int file_id; - std::string rel_filename; - std::string abs_filename; - std::string text; - const SrcFile* included_from{nullptr}; + struct ImportStatement { + const SrcFile* imported_file; + }; + + int file_id; // an incremental counter through all parsed files + std::string rel_filename; // relative to cwd + std::string abs_filename; // absolute from root + std::string text; // file contents loaded into memory, Token::str_val points into it + bool was_parsed = false; // to prevent double parsing when a file is imported multiple times + std::vector imports; // to check strictness (can't use a symbol without importing its file) - SrcFile(int file_id, std::string rel_filename, std::string abs_filename, std::string&& text, const SrcFile* included_from) + SrcFile(int file_id, std::string rel_filename, std::string abs_filename, std::string&& text) : file_id(file_id) , rel_filename(std::move(rel_filename)) , abs_filename(std::move(abs_filename)) - , text(std::move(text)) - , included_from(included_from) { } + , text(std::move(text)) { } SrcFile(const SrcFile& other) = delete; SrcFile &operator=(const SrcFile&) = delete; - bool is_entrypoint_file() const; + bool is_stdlib_file() const { return file_id == 0; /* stdlib always exists, has no imports and parsed the first */ } + bool is_entrypoint_file() const { return file_id == 1; /* after stdlib, the entrypoint file is parsed */ } + bool is_offset_valid(int offset) const; SrcPosition convert_offset(int offset) const; }; @@ -55,24 +61,12 @@ class AllRegisteredSrcFiles { int last_file_id = -1; public: - const SrcFile *find_file(int file_id) const; - const SrcFile* find_file(const std::string& abs_filename) const; - const SrcFile* register_file(const std::string& rel_filename, const std::string& abs_filename, std::string&& text, const SrcFile* included_from); + SrcFile *find_file(int file_id) const; + SrcFile* find_file(const std::string& abs_filename) const; + SrcFile* register_file(const std::string& rel_filename, const std::string& abs_filename, std::string&& text); const std::vector& get_all_files() const { return all_src_files; } }; -struct Fatal final : std::exception { - std::string message; - - explicit Fatal(std::string _msg) : message(std::move(_msg)) { - } - const char* what() const noexcept override { - return message.c_str(); - } -}; - -std::ostream& operator<<(std::ostream& os, const Fatal& fatal); - // SrcLocation points to a location (line, column) in some loaded .tolk source SrcFile. // Note, that instead of storing src_file, line_no, etc., only 2 ints are stored. // The purpose is: sizeof(SrcLocation) == 8, so it's just passed/stored without pointers/refs, just like int64_t. @@ -80,7 +74,7 @@ std::ostream& operator<<(std::ostream& os, const Fatal& fatal); class SrcLocation { friend class Lexer; - int file_id = -1; // file_id from AllRegisteredSrcFiles + int file_id = -1; // = SrcFile::file_id (note, that get_src_file() does linear search) int char_offset = -1; // offset from SrcFile::text public: @@ -92,6 +86,12 @@ class SrcLocation { bool is_defined() const { return file_id != -1; } const SrcFile* get_src_file() const; + // similar to `this->get_src_file() == symbol->get_src_file() || symbol->get_src_file()->is_stdlib()` + // (but effectively, avoiding linear search) + bool is_symbol_from_same_or_builtin_file(SrcLocation symbol_loc) const { + return file_id == symbol_loc.file_id || symbol_loc.file_id < 1; + } + void show(std::ostream& os) const; void show_context(std::ostream& os) const; @@ -103,6 +103,18 @@ class SrcLocation { std::ostream& operator<<(std::ostream& os, SrcLocation loc); +struct Fatal final : std::exception { + std::string message; + + explicit Fatal(std::string _msg) : message(std::move(_msg)) { + } + const char* what() const noexcept override { + return message.c_str(); + } +}; + +std::ostream& operator<<(std::ostream& os, const Fatal& fatal); + struct ParseError : std::exception { SrcLocation where; std::string message; diff --git a/tolk/symtable.cpp b/tolk/symtable.cpp index ec409ab2b..552abd11b 100644 --- a/tolk/symtable.cpp +++ b/tolk/symtable.cpp @@ -15,25 +15,12 @@ along with TON Blockchain Library. If not, see . */ #include "symtable.h" +#include "compiler-state.h" #include #include namespace tolk { -/* - * - * SYMBOL VALUES (DECLARED) - * - */ - -int scope_level; - -SymTable symbols; - -SymDef* sym_def[symbols.SIZE_PRIME + 1]; -SymDef* global_sym_def[symbols.SIZE_PRIME + 1]; -std::vector> symbol_stack; -std::vector scope_opened_at; Symbol::Symbol(std::string str, sym_idx_t idx) : str(std::move(str)), idx(idx) { subclass = this->str[0] == '.' ? SymbolSubclass::dot_identifier @@ -82,22 +69,26 @@ sym_idx_t SymTable::gen_lookup(std::string_view str, int mode, sym_idx_t idx) { } } +std::string SymDef::name() const { + return G.symbols.get_name(sym_idx); +} + void open_scope(SrcLocation loc) { - ++scope_level; - scope_opened_at.push_back(loc); + ++G.scope_level; + G.scope_opened_at.push_back(loc); } void close_scope(SrcLocation loc) { - if (!scope_level) { + if (!G.scope_level) { throw Fatal{"cannot close the outer scope"}; } - while (!symbol_stack.empty() && symbol_stack.back().first == scope_level) { - SymDef old_def = symbol_stack.back().second; + while (!G.symbol_stack.empty() && G.symbol_stack.back().first == G.scope_level) { + SymDef old_def = G.symbol_stack.back().second; auto idx = old_def.sym_idx; - symbol_stack.pop_back(); - SymDef* cur_def = sym_def[idx]; + G.symbol_stack.pop_back(); + SymDef* cur_def = G.sym_def[idx]; assert(cur_def); - assert(cur_def->level == scope_level && cur_def->sym_idx == idx); + assert(cur_def->level == G.scope_level && cur_def->sym_idx == idx); //std::cerr << "restoring local symbol `" << old_def.name << "` of level " << scope_level << " to its previous level " << old_def.level << std::endl; if (cur_def->value) { //std::cerr << "deleting value of symbol " << old_def.name << ":" << old_def.level << " at " << (const void*) it->second.value << std::endl; @@ -105,26 +96,26 @@ void close_scope(SrcLocation loc) { } if (!old_def.level && !old_def.value) { delete cur_def; // ??? keep the definition always? - sym_def[idx] = nullptr; + G.sym_def[idx] = nullptr; } else { - cur_def->value = std::move(old_def.value); + cur_def->value = old_def.value; cur_def->level = old_def.level; } old_def.value = nullptr; } - --scope_level; - scope_opened_at.pop_back(); + --G.scope_level; + G.scope_opened_at.pop_back(); } SymDef* lookup_symbol(sym_idx_t idx) { if (!idx) { return nullptr; } - if (sym_def[idx]) { - return sym_def[idx]; + if (G.sym_def[idx]) { + return G.sym_def[idx]; } - if (global_sym_def[idx]) { - return global_sym_def[idx]; + if (G.global_sym_def[idx]) { + return G.global_sym_def[idx]; } return nullptr; } @@ -133,11 +124,11 @@ SymDef* define_global_symbol(sym_idx_t name_idx, bool force_new, SrcLocation loc if (!name_idx) { return nullptr; } - auto found = global_sym_def[name_idx]; + auto found = G.global_sym_def[name_idx]; if (found) { return force_new && found->value ? nullptr : found; } - found = global_sym_def[name_idx] = new SymDef(0, name_idx, loc); + found = G.global_sym_def[name_idx] = new SymDef(0, name_idx, loc); #ifdef TOLK_DEBUG found->sym_name = found->name(); #endif @@ -148,26 +139,26 @@ SymDef* define_symbol(sym_idx_t name_idx, bool force_new, SrcLocation loc) { if (!name_idx) { return nullptr; } - if (!scope_level) { + if (!G.scope_level) { return define_global_symbol(name_idx, force_new, loc); } - auto found = sym_def[name_idx]; + auto found = G.sym_def[name_idx]; if (found) { - if (found->level < scope_level) { - symbol_stack.push_back(std::make_pair(scope_level, *found)); - found->level = scope_level; + if (found->level < G.scope_level) { + G.symbol_stack.emplace_back(G.scope_level, *found); + found->level = G.scope_level; } else if (found->value && force_new) { return nullptr; } - found->value = 0; + found->value = nullptr; found->loc = loc; return found; } - found = sym_def[name_idx] = new SymDef(scope_level, name_idx, loc); - symbol_stack.push_back(std::make_pair(scope_level, SymDef{0, name_idx, loc})); + found = G.sym_def[name_idx] = new SymDef(G.scope_level, name_idx, loc); + G.symbol_stack.emplace_back(G.scope_level, SymDef{0, name_idx, loc}); #ifdef TOLK_DEBUG found->sym_name = found->name(); - symbol_stack.back().second.sym_name = found->name(); + G.symbol_stack.back().second.sym_name = found->name(); #endif return found; } diff --git a/tolk/symtable.h b/tolk/symtable.h index 67f949a12..0566122a4 100644 --- a/tolk/symtable.h +++ b/tolk/symtable.h @@ -15,20 +15,15 @@ along with TON Blockchain Library. If not, see . */ #pragma once + #include "src-file.h" #include #include -#include namespace tolk { -/* - * - * SYMBOL VALUES (DECLARED) - * - */ - typedef int var_idx_t; +typedef int sym_idx_t; enum class SymValKind { _Param, _Var, _Func, _Typename, _GlobVar, _Const }; @@ -40,11 +35,6 @@ struct SymValBase { virtual ~SymValBase() = default; }; -/* - * - * SYMBOL TABLE - * - */ enum class SymbolSubclass { undef = 0, @@ -52,8 +42,6 @@ enum class SymbolSubclass { tilde_identifier = 2 // begins with ~ (a non-const method) }; -typedef int sym_idx_t; - struct Symbol { std::string str; sym_idx_t idx; @@ -73,9 +61,6 @@ class SymTable { std::unique_ptr sym[SIZE_PRIME + 1]; sym_idx_t gen_lookup(std::string_view str, int mode = 0, sym_idx_t idx = 0); - static constexpr int max_kw_idx = 10000; - sym_idx_t keywords[max_kw_idx]; - public: static constexpr sym_idx_t not_found = 0; @@ -88,22 +73,12 @@ class SymTable { Symbol* operator[](sym_idx_t i) const { return sym[i].get(); } - bool is_keyword(sym_idx_t i) const { - return sym[i] && sym[i]->idx < 0; - } std::string get_name(sym_idx_t i) const { return sym[i] ? sym[i]->str : Symbol::unknown_symbol_name(i); } SymbolSubclass get_subclass(sym_idx_t i) const { return sym[i] ? sym[i]->subclass : SymbolSubclass::undef; } - Symbol* get_keyword(int i) const { - return ((unsigned)i < (unsigned)max_kw_idx) ? sym[keywords[i]].get() : nullptr; - } - - SymTable() { - std::memset(keywords, 0, sizeof(keywords)); - } }; struct SymTableOverflow { @@ -112,15 +87,6 @@ struct SymTableOverflow { } }; -struct SymTableKwRedef { - std::string kw; - SymTableKwRedef(std::string _kw) : kw(_kw) { - } -}; - -extern SymTable symbols; - -extern int scope_level; struct SymDef { int level; @@ -133,18 +99,9 @@ struct SymDef { SymDef(int lvl, sym_idx_t idx, SrcLocation _loc, SymValBase* val = nullptr) : level(lvl), sym_idx(idx), value(val), loc(_loc) { } - bool has_name() const { - return sym_idx; - } - std::string name() const { - return symbols.get_name(sym_idx); - } + std::string name() const; }; -extern SymDef* sym_def[symbols.SIZE_PRIME + 1]; -extern SymDef* global_sym_def[symbols.SIZE_PRIME + 1]; -extern std::vector> symbol_stack; -extern std::vector scope_opened_at; void open_scope(SrcLocation loc); void close_scope(SrcLocation loc); diff --git a/tolk/tolk-main.cpp b/tolk/tolk-main.cpp index ce08a5529..38625534d 100644 --- a/tolk/tolk-main.cpp +++ b/tolk/tolk-main.cpp @@ -24,16 +24,23 @@ from all source files in the program, then also delete it here. */ #include "tolk.h" +#include "compiler-state.h" +#include "td/utils/port/path.h" #include #include +#include +#include #include "git.h" +using namespace tolk; + void usage(const char* progname) { std::cerr << "usage: " << progname << " [options] \n" "\tGenerates Fift TVM assembler code from a .tolk file\n" "-o\tWrites generated code into specified .fif file instead of stdout\n" "-b\tGenerate Fift instructions to save TVM bytecode into .boc file\n" + "-s\tSpecify stdlib location (same as env TOLK_STDLIB; if unset, auto-discover)\n" "-O\tSets optimization level (2 by default)\n" "-S\tDon't include stack layout comments into Fift output\n" "-e\tIncreases verbosity level (extra output into stderr)\n" @@ -41,28 +48,100 @@ void usage(const char* progname) { std::exit(2); } +static std::string auto_discover_stdlib_location() { + if (const char* env_var = getenv("TOLK_STDLIB")) { + return env_var; + } + // this define is automatically set if just building this repo locally with cmake +#ifdef STDLIB_TOLK_IF_BUILD_FROM_SOURCES + return STDLIB_TOLK_IF_BUILD_FROM_SOURCES; +#endif + // this define is automatically set when compiling a linux package for distribution + // (since binaries and smartcont/ folder are installed to a predefined path) + // todo provide in cmake +#ifdef STDLIB_TOLK_IF_BUILD_TO_PACKAGE + return STDLIB_TOLK_IF_BUILD_TO_PACKAGE; +#endif + return {}; +} + +td::Result fs_read_callback(CompilerSettings::FsReadCallbackKind kind, const char* query) { + switch (kind) { + case CompilerSettings::FsReadCallbackKind::ReadFile: { + struct stat f_stat; + int res = stat(query, &f_stat); + if (res != 0 || !S_ISREG(f_stat.st_mode)) { + return td::Status::Error(std::string{"cannot open file "} + query); + } + + size_t file_size = static_cast(f_stat.st_size); + std::string str; + str.resize(file_size); + FILE* f = fopen(query, "rb"); + fread(str.data(), file_size, 1, f); + fclose(f); + return std::move(str); + } + case CompilerSettings::FsReadCallbackKind::Realpath: { + td::Result res_realpath = td::realpath(td::CSlice(query)); + if (res_realpath.is_error()) { + return td::Status::Error(std::string{"cannot find file "} + query); + } + return res_realpath; + } + default: { + return td::Status::Error("Unknown query kind"); + } + } +} + +class StdCoutRedirectToFile { + std::unique_ptr output_file; + std::streambuf* backup_sbuf = nullptr; + +public: + explicit StdCoutRedirectToFile(const std::string& output_filename) { + if (!output_filename.empty()) { + output_file = std::make_unique(output_filename, std::fstream::trunc | std::fstream::out); + if (output_file->is_open()) { + backup_sbuf = std::cout.rdbuf(output_file->rdbuf()); + } + } + } + + ~StdCoutRedirectToFile() { + if (backup_sbuf) { + std::cout.rdbuf(backup_sbuf); + } + } + + bool is_failed() const { return output_file && !output_file->is_open(); } +}; + int main(int argc, char* const argv[]) { int i; - std::string output_filename; - while ((i = getopt(argc, argv, "o:b:O:Sevh")) != -1) { + while ((i = getopt(argc, argv, "o:b:s:O:Sevh")) != -1) { switch (i) { case 'o': - output_filename = optarg; + G.settings.output_filename = optarg; break; case 'b': - tolk::boc_output_filename = optarg; + G.settings.boc_output_filename = optarg; + break; + case 's': + G.settings.stdlib_filename = optarg; break; case 'O': - tolk::opt_level = std::max(0, atoi(optarg)); + G.settings.optimization_level = std::max(0, atoi(optarg)); break; case 'S': - tolk::stack_layout_comments = false; + G.settings.stack_layout_comments = false; break; case 'e': - ++tolk::verbosity; + G.settings.verbosity++; break; case 'v': - std::cout << "Tolk compiler v" << tolk::tolk_version << "\n"; + std::cout << "Tolk compiler v" << tolk_version << "\n"; std::cout << "Build commit: " << GitMetadata::CommitSHA1() << "\n"; std::cout << "Build date: " << GitMetadata::CommitDate() << "\n"; std::exit(0); @@ -72,16 +151,24 @@ int main(int argc, char* const argv[]) { } } - std::ostream *outs = &std::cout; + StdCoutRedirectToFile redirect_cout(G.settings.output_filename); + if (redirect_cout.is_failed()) { + std::cerr << "Failed to create output file " << G.settings.output_filename << '\n'; + return 2; + } - std::unique_ptr fs; - if (!output_filename.empty()) { - fs = std::make_unique(output_filename, std::fstream::trunc | std::fstream::out); - if (!fs->is_open()) { - std::cerr << "failed to create output file " << output_filename << '\n'; - return 2; - } - outs = fs.get(); + // if stdlib wasn't specify as an option — locate it based on env + if (G.settings.stdlib_filename.empty()) { + G.settings.stdlib_filename = auto_discover_stdlib_location(); + } + if (G.settings.stdlib_filename.empty()) { + std::cerr << "Failed to discover stdlib.tolk.\n" + "Probably, you have a non-standard Tolk installation.\n" + "Please, provide env variable TOLK_STDLIB referencing to it.\n"; + return 2; + } + if (G.is_verbosity(2)) { + std::cerr << "stdlib located at " << G.settings.stdlib_filename << '\n'; } if (optind != argc - 1) { @@ -89,9 +176,8 @@ int main(int argc, char* const argv[]) { return 2; } - std::string entrypoint_file_name = argv[optind]; - - tolk::read_callback = tolk::fs_read_callback; + G.settings.entrypoint_filename = argv[optind]; + G.settings.read_callback = fs_read_callback; - return tolk::tolk_proceed(entrypoint_file_name, *outs, std::cerr); + return tolk_proceed(G.settings.entrypoint_filename); } diff --git a/tolk/tolk-wasm.cpp b/tolk/tolk-wasm.cpp index 7cf28ba3a..5add279bc 100644 --- a/tolk/tolk-wasm.cpp +++ b/tolk/tolk-wasm.cpp @@ -24,28 +24,34 @@ from all source files in the program, then also delete it here. */ #include "tolk.h" +#include "compiler-state.h" #include "git.h" #include "td/utils/JsonBuilder.h" #include "fift/utils.h" -#include "td/utils/base64.h" #include "td/utils/Status.h" #include -#include + +using namespace tolk; td::Result compile_internal(char *config_json) { TRY_RESULT(input_json, td::json_decode(td::MutableSlice(config_json))) td::JsonObject& config = input_json.get_object(); TRY_RESULT(opt_level, td::get_json_object_int_field(config, "optimizationLevel", true, 2)); + TRY_RESULT(stdlib_tolk, td::get_json_object_string_field(config, "stdlibLocation", false)); TRY_RESULT(stack_comments, td::get_json_object_bool_field(config, "withStackComments", true, false)); TRY_RESULT(entrypoint_file_name, td::get_json_object_string_field(config, "entrypointFileName", false)); - tolk::opt_level = std::max(0, opt_level); - tolk::verbosity = 0; - tolk::stack_layout_comments = stack_comments; + G.settings.verbosity = 0; + G.settings.optimization_level = std::max(0, opt_level); + G.settings.stdlib_filename = stdlib_tolk; + G.settings.stack_layout_comments = stack_comments; + G.settings.entrypoint_filename = entrypoint_file_name; std::ostringstream outs, errs; - int tolk_res = tolk::tolk_proceed(entrypoint_file_name, outs, errs); + std::cout.rdbuf(outs.rdbuf()); + std::cerr.rdbuf(errs.rdbuf()); + int tolk_res = tolk::tolk_proceed(entrypoint_file_name); if (tolk_res != 0) { return td::Status::Error("Tolk compilation error: " + errs.str()); } @@ -58,6 +64,7 @@ td::Result compile_internal(char *config_json) { obj("fiftCode", fift_res.fiftCode); obj("codeBoc64", fift_res.codeBoc64); obj("codeHashHex", fift_res.codeHashHex); + obj("stderr", errs.str().c_str()); obj.leave(); return result_json.string_builder().as_cslice().str(); @@ -68,11 +75,11 @@ td::Result compile_internal(char *config_json) { /// The implementor must use malloc() for them and use free() after tolk_compile returns. typedef void (*CStyleReadFileCallback)(int kind, char const* data, char** destContents, char** destError); -tolk::ReadCallback::Callback wrapReadCallback(CStyleReadFileCallback _readCallback) +CompilerSettings::FsReadCallback wrapReadCallback(CStyleReadFileCallback _readCallback) { - tolk::ReadCallback::Callback readCallback; + CompilerSettings::FsReadCallback readCallback; if (_readCallback) { - readCallback = [=](tolk::ReadCallback::Kind kind, char const* data) -> td::Result { + readCallback = [=](CompilerSettings::FsReadCallbackKind kind, char const* data) -> td::Result { char* destContents = nullptr; char* destError = nullptr; _readCallback(static_cast(kind), data, &destContents, &destError); @@ -93,7 +100,7 @@ extern "C" { const char* version() { auto version_json = td::JsonBuilder(); auto obj = version_json.enter_object(); - obj("tolkVersion", tolk::tolk_version); + obj("tolkVersion", tolk_version); obj("tolkFiftLibCommitHash", GitMetadata::CommitSHA1()); obj("tolkFiftLibCommitDate", GitMetadata::CommitDate()); obj.leave(); @@ -101,13 +108,9 @@ const char* version() { } const char *tolk_compile(char *config_json, CStyleReadFileCallback callback) { - if (callback) { - tolk::read_callback = wrapReadCallback(callback); - } else { - tolk::read_callback = tolk::fs_read_callback; - } + G.settings.read_callback = wrapReadCallback(callback); - auto res = compile_internal(config_json); + td::Result res = compile_internal(config_json); if (res.is_error()) { auto result = res.move_as_error(); diff --git a/tolk/tolk.cpp b/tolk/tolk.cpp index 1fce3ebf2..066fecbdf 100644 --- a/tolk/tolk.cpp +++ b/tolk/tolk.cpp @@ -24,6 +24,7 @@ from all source files in the program, then also delete it here. */ #include "tolk.h" +#include "compiler-state.h" #include "lexer.h" #include #include "git.h" @@ -33,14 +34,6 @@ namespace tolk { -int verbosity = 0, opt_level = 2; -bool stack_layout_comments = true; -GlobalPragma pragma_allow_post_modification{"allow-post-modification"}; -GlobalPragma pragma_compute_asm_ltr{"compute-asm-ltr"}; -GlobalPragma pragma_remove_unused_functions{"remove-unused-functions"}; -std::string generated_from, boc_output_filename; -ReadCallback::Callback read_callback; - // returns argument type of a function // note, that when a function has multiple arguments, its arg type is a tensor (no arguments — an empty tensor) // in other words, `f(int a, int b)` and `f((int,int) ab)` is the same when we speak about types @@ -61,7 +54,7 @@ const TypeExpr *SymValFunc::get_arg_type() const { bool SymValCodeFunc::does_need_codegen() const { // when a function is declared, but not referenced from code in any way, don't generate its body - if (!is_really_used && pragma_remove_unused_functions.enabled()) { + if (!is_really_used && G.pragma_remove_unused_functions.enabled()) { return false; } // when a function is referenced like `var a = some_fn;` (or in some other non-call way), its continuation should exist @@ -74,53 +67,6 @@ bool SymValCodeFunc::does_need_codegen() const { // in the future, we may want to implement a true AST inlining for `inline` functions also } -void GlobalPragma::enable(SrcLocation loc) { - if (deprecated_from_v_) { - loc.show_warning(PSTRING() << "#pragma " << name_ << - " is deprecated since Tolk v" << deprecated_from_v_ << - ". Please, remove this line from your code."); - return; - } - if (!loc.get_src_file()->is_entrypoint_file()) { - // todo generally it's not true; rework pragmas completely - loc.show_warning(PSTRING() << "#pragma " << name_ << - " should be used in the main file only."); - } - - enabled_ = true; -} - -void GlobalPragma::always_on_and_deprecated(const char *deprecated_from_v) { - deprecated_from_v_ = deprecated_from_v; - enabled_ = true; -} - -td::Result fs_read_callback(ReadCallback::Kind kind, const char* query) { - switch (kind) { - case ReadCallback::Kind::ReadFile: { - struct stat f_stat; - int res = stat(query, &f_stat); - if (res != 0) { - return td::Status::Error(std::string{"cannot open source file: "} + query); - } - - size_t file_size = static_cast(f_stat.st_size); - std::string str; - str.resize(file_size); - FILE* f = fopen(query, "r"); - fread(str.data(), file_size, 1, f); - fclose(f); - return std::move(str); - } - case ReadCallback::Kind::Realpath: { - return td::realpath(td::CSlice(query)); - } - default: { - return td::Status::Error("Unknown query kind"); - } - } -} - void mark_function_used_dfs(const std::unique_ptr& op); void mark_function_used(SymValCodeFunc* func_val) { @@ -159,9 +105,9 @@ void mark_function_used_dfs(const std::unique_ptr& op) { } void mark_used_symbols() { - for (SymDef* func_sym : glob_func) { + for (SymDef* func_sym : G.glob_func) { auto* func_val = dynamic_cast(func_sym->value); - std::string name = symbols.get_name(func_sym->sym_idx); + std::string name = G.symbols.get_name(func_sym->sym_idx); if (func_val->method_id.not_null() || name == "main" || name == "recv_internal" || name == "recv_external" || name == "run_ticktock" || name == "split_prepare" || name == "split_install") { @@ -176,58 +122,58 @@ void mark_used_symbols() { * */ -void generate_output_func(SymDef* func_sym, std::ostream &outs, std::ostream &errs) { +void generate_output_func(SymDef* func_sym) { SymValCodeFunc* func_val = dynamic_cast(func_sym->value); tolk_assert(func_val); - std::string name = symbols.get_name(func_sym->sym_idx); - if (verbosity >= 2) { - errs << "\n\n=========================\nfunction " << name << " : " << func_val->get_type() << std::endl; + std::string name = G.symbols.get_name(func_sym->sym_idx); + if (G.is_verbosity(2)) { + std::cerr << "\n\n=========================\nfunction " << name << " : " << func_val->get_type() << std::endl; } if (!func_val->code) { throw ParseError(func_sym->loc, "function `" + name + "` is just declared, not implemented"); } else { CodeBlob& code = *(func_val->code); - if (verbosity >= 3) { - code.print(errs, 9); + if (G.is_verbosity(3)) { + code.print(std::cerr, 9); } code.simplify_var_types(); - if (verbosity >= 5) { - errs << "after simplify_var_types: \n"; - code.print(errs, 0); + if (G.is_verbosity(5)) { + std::cerr << "after simplify_var_types: \n"; + code.print(std::cerr, 0); } code.prune_unreachable_code(); - if (verbosity >= 5) { - errs << "after prune_unreachable: \n"; - code.print(errs, 0); + if (G.is_verbosity(5)) { + std::cerr << "after prune_unreachable: \n"; + code.print(std::cerr, 0); } code.split_vars(true); - if (verbosity >= 5) { - errs << "after split_vars: \n"; - code.print(errs, 0); + if (G.is_verbosity(5)) { + std::cerr << "after split_vars: \n"; + code.print(std::cerr, 0); } for (int i = 0; i < 8; i++) { code.compute_used_code_vars(); - if (verbosity >= 4) { - errs << "after compute_used_vars: \n"; - code.print(errs, 6); + if (G.is_verbosity(4)) { + std::cerr << "after compute_used_vars: \n"; + code.print(std::cerr, 6); } code.fwd_analyze(); - if (verbosity >= 5) { - errs << "after fwd_analyze: \n"; - code.print(errs, 6); + if (G.is_verbosity(5)) { + std::cerr << "after fwd_analyze: \n"; + code.print(std::cerr, 6); } code.prune_unreachable_code(); - if (verbosity >= 5) { - errs << "after prune_unreachable: \n"; - code.print(errs, 6); + if (G.is_verbosity(5)) { + std::cerr << "after prune_unreachable: \n"; + code.print(std::cerr, 6); } } code.mark_noreturn(); - if (verbosity >= 3) { - code.print(errs, 15); + if (G.is_verbosity(3)) { + code.print(std::cerr, 15); } - if (verbosity >= 2) { - errs << "\n---------- resulting code for " << name << " -------------\n"; + if (G.is_verbosity(2)) { + std::cerr << "\n---------- resulting code for " << name << " -------------\n"; } const char* modifier = ""; if (func_val->is_inline()) { @@ -235,115 +181,119 @@ void generate_output_func(SymDef* func_sym, std::ostream &outs, std::ostream &er } else if (func_val->is_inline_ref()) { modifier = "REF"; } - outs << std::string(2, ' ') << name << " PROC" << modifier << ":<{\n"; + std::cout << std::string(2, ' ') << name << " PROC" << modifier << ":<{\n"; int mode = 0; - if (stack_layout_comments) { + if (G.settings.stack_layout_comments) { mode |= Stack::_StkCmt | Stack::_CptStkCmt; } - if (opt_level < 2) { - mode |= Stack::_DisableOpt; - } if (func_val->is_inline() && code.ops->noreturn()) { mode |= Stack::_InlineFunc; } if (func_val->is_inline() || func_val->is_inline_ref()) { mode |= Stack::_InlineAny; } - code.generate_code(outs, mode, 2); - outs << std::string(2, ' ') << "}>\n"; - if (verbosity >= 2) { - errs << "--------------\n"; + code.generate_code(std::cout, mode, 2); + std::cout << std::string(2, ' ') << "}>\n"; + if (G.is_verbosity(2)) { + std::cerr << "--------------\n"; } } } -int generate_output(std::ostream &outs, std::ostream &errs) { - outs << "\"Asm.fif\" include\n"; - outs << "// automatically generated from " << generated_from << std::endl; - outs << "PROGRAM{\n"; +// this function either throws or successfully prints whole program output to std::cout +void generate_output() { + std::cout << "\"Asm.fif\" include\n"; + std::cout << "// automatically generated from " << G.generated_from << std::endl; + std::cout << "PROGRAM{\n"; mark_used_symbols(); - for (SymDef* func_sym : glob_func) { + + for (SymDef* func_sym : G.glob_func) { SymValCodeFunc* func_val = dynamic_cast(func_sym->value); tolk_assert(func_val); if (!func_val->does_need_codegen()) { - if (verbosity >= 2) { - errs << func_sym->name() << ": code not generated, function does not need codegen\n"; + if (G.is_verbosity(2)) { + std::cerr << func_sym->name() << ": code not generated, function does not need codegen\n"; } continue; } - std::string name = symbols.get_name(func_sym->sym_idx); - outs << std::string(2, ' '); + std::string name = G.symbols.get_name(func_sym->sym_idx); + std::cout << std::string(2, ' '); if (func_val->method_id.is_null()) { - outs << "DECLPROC " << name << "\n"; + std::cout << "DECLPROC " << name << "\n"; } else { - outs << func_val->method_id << " DECLMETHOD " << name << "\n"; + std::cout << func_val->method_id << " DECLMETHOD " << name << "\n"; } } - for (SymDef* gvar_sym : glob_vars) { + + for (SymDef* gvar_sym : G.glob_vars) { auto* glob_val = dynamic_cast(gvar_sym->value); tolk_assert(glob_val); - if (!glob_val->is_really_used && pragma_remove_unused_functions.enabled()) { - if (verbosity >= 2) { - errs << gvar_sym->name() << ": variable not generated, it's unused\n"; + if (!glob_val->is_really_used && G.pragma_remove_unused_functions.enabled()) { + if (G.is_verbosity(2)) { + std::cerr << gvar_sym->name() << ": variable not generated, it's unused\n"; } continue; } - std::string name = symbols.get_name(gvar_sym->sym_idx); - outs << std::string(2, ' ') << "DECLGLOBVAR " << name << "\n"; + std::string name = G.symbols.get_name(gvar_sym->sym_idx); + std::cout << std::string(2, ' ') << "DECLGLOBVAR " << name << "\n"; } - int errors = 0; - for (SymDef* func_sym : glob_func) { + + for (SymDef* func_sym : G.glob_func) { SymValCodeFunc* func_val = dynamic_cast(func_sym->value); if (!func_val->does_need_codegen()) { continue; } - try { - generate_output_func(func_sym, outs, errs); - } catch (ParseError& err) { - errs << "cannot generate code for function `" << symbols.get_name(func_sym->sym_idx) << "`:\n" - << err << std::endl; - ++errors; - } + generate_output_func(func_sym); } - outs << "}END>c\n"; - if (!boc_output_filename.empty()) { - outs << "boc>B \"" << boc_output_filename << "\" B>file\n"; + + std::cout << "}END>c\n"; + if (!G.settings.boc_output_filename.empty()) { + std::cout << "boc>B \"" << G.settings.boc_output_filename << "\" B>file\n"; } - return errors; } -int tolk_proceed(const std::string &entrypoint_file_name, std::ostream &outs, std::ostream &errs) { +int tolk_proceed(const std::string &entrypoint_file_name) { define_builtins(); lexer_init(); - pragma_allow_post_modification.always_on_and_deprecated("0.5.0"); - pragma_compute_asm_ltr.always_on_and_deprecated("0.5.0"); + G.pragma_allow_post_modification.always_on_and_deprecated("0.5.0"); + G.pragma_compute_asm_ltr.always_on_and_deprecated("0.5.0"); try { - bool ok = parse_source_file(entrypoint_file_name.c_str(), {}); - if (!ok) { - throw Fatal{"output code generation omitted because of errors"}; + { + if (G.settings.stdlib_filename.empty()) { + throw Fatal("stdlib filename not specified"); + } + td::Result locate_res = locate_source_file(G.settings.stdlib_filename); + if (locate_res.is_error()) { + throw Fatal("Failed to locate stdlib: " + locate_res.error().message().str()); + } + parse_source_file(locate_res.move_as_ok()); + } + td::Result locate_res = locate_source_file(entrypoint_file_name); + if (locate_res.is_error()) { + throw Fatal("Failed to locate " + entrypoint_file_name + ": " + locate_res.error().message().str()); } + parse_source_file(locate_res.move_as_ok()); // todo #ifdef TOLK_PROFILING + comment // lexer_measure_performance(all_src_files.get_all_files()); - return generate_output(outs, errs); + generate_output(); + return 0; } catch (Fatal& fatal) { - errs << "fatal: " << fatal << std::endl; + std::cerr << "fatal: " << fatal << std::endl; return 2; } catch (ParseError& error) { - errs << error << std::endl; + std::cerr << error << std::endl; return 2; } catch (UnifyError& unif_err) { - errs << "fatal: "; - unif_err.print_message(errs); - errs << std::endl; + std::cerr << "fatal: "; + unif_err.print_message(std::cerr); + std::cerr << std::endl; return 2; } - - return 0; } } // namespace tolk diff --git a/tolk/tolk.h b/tolk/tolk.h index 27e26f050..15aeba256 100644 --- a/tolk/tolk.h +++ b/tolk/tolk.h @@ -15,22 +15,16 @@ along with TON Blockchain Library. If not, see . */ #pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "common/refcnt.hpp" -#include "common/bigint.hpp" -#include "common/refint.h" + #include "src-file.h" #include "lexer.h" #include "symtable.h" +#include "crypto/common/refint.h" #include "td/utils/Status.h" +#include +#include +#include +#include #define tolk_assert(expr) \ (bool(expr) ? void(0) \ @@ -38,14 +32,6 @@ namespace tolk { -extern int verbosity; -extern bool op_rewrite_comments; -extern std::string generated_from; - -constexpr int optimize_depth = 20; - -const std::string tolk_version{"0.4.5"}; - /* * * TYPE EXPRESSIONS @@ -200,8 +186,6 @@ std::ostream& operator<<(std::ostream& os, const UnifyError& ue); void unify(TypeExpr*& te1, TypeExpr*& te2); -// extern int TypeExpr::holes; - /* * * ABSTRACT CODE @@ -596,7 +580,7 @@ struct CodeBlob { CodeBlob(TypeExpr* ret = nullptr) : var_cnt(0), in_var_cnt(0), op_cnt(0), ret_type(ret), cur_ops(&ops) { } template - Op& emplace_back(const Args&... args) { + Op& emplace_back(Args&&... args) { Op& res = *(*cur_ops = std::make_unique(args...)); cur_ops = &(res.next); return res; @@ -768,9 +752,6 @@ struct SymValConst : SymValBase { } }; -extern int glob_func_cnt, undef_func_cnt, glob_var_cnt; -extern std::vector glob_func, glob_vars, glob_get_methods; -extern std::set prohibited_var_names; /* * @@ -778,27 +759,11 @@ extern std::set prohibited_var_names; * */ -class ReadCallback { -public: - /// Noncopyable. - ReadCallback(ReadCallback const&) = delete; - ReadCallback& operator=(ReadCallback const&) = delete; - - enum class Kind { - Realpath, - ReadFile, - }; - - /// File reading or generic query callback. - using Callback = std::function(Kind, const char*)>; -}; // defined in parse-tolk.cpp -void parse_source(const SrcFile* file); -bool parse_source_file(const char* filename, SrcLocation loc_included_from); +td::Result locate_source_file(const std::string& rel_filename); +void parse_source_file(SrcFile* file); -extern std::stack inclusion_locations; -extern AllRegisteredSrcFiles all_src_files; /* * @@ -1359,8 +1324,6 @@ struct StackTransform { bool try_store(int x, int y); // appends (x,y) to A }; -//extern const StackTransform StackTransform::rot, StackTransform::rot_rev; - inline std::ostream& operator<<(std::ostream& os, const StackTransform& trans) { trans.show(os); return os; @@ -1375,14 +1338,14 @@ bool apply_op(StackTransform& trans, const AsmOp& op); */ struct Optimizer { - enum { n = optimize_depth }; + static constexpr int optimize_depth = 20; AsmOpConsList code_; int l_{0}, l2_{0}, p_, pb_, q_, indent_; bool debug_{false}; - std::unique_ptr op_[n], oq_[n]; - AsmOpCons* op_cons_[n]; - int offs_[n]; - StackTransform tr_[n]; + std::unique_ptr op_[optimize_depth], oq_[optimize_depth]; + AsmOpCons* op_cons_[optimize_depth]; + int offs_[optimize_depth]; + StackTransform tr_[optimize_depth]; int mode_{0}; Optimizer() { } @@ -1475,7 +1438,7 @@ struct Stack { StackLayoutExt s; AsmOpList& o; enum { - _StkCmt = 1, _CptStkCmt = 2, _DisableOpt = 4, _DisableOut = 128, _Shown = 256, + _StkCmt = 1, _CptStkCmt = 2, _DisableOut = 128, _Shown = 256, _InlineFunc = 512, _NeedRetAlt = 1024, _InlineAny = 2048, _ModeSave = _InlineFunc | _NeedRetAlt | _InlineAny, _Garbage = -0x10000 @@ -1640,33 +1603,6 @@ AsmOp push_const(td::RefInt256 x); void define_builtins(); -extern int verbosity, opt_level; -extern bool stack_layout_comments; -extern std::string generated_from, boc_output_filename; -extern ReadCallback::Callback read_callback; - -td::Result fs_read_callback(ReadCallback::Kind kind, const char* query); - -class GlobalPragma { - public: - explicit GlobalPragma(std::string name) : name_(std::move(name)) { - } - - const std::string& name() const { - return name_; - } - bool enabled() const { - return enabled_; - } - void enable(SrcLocation loc); - void always_on_and_deprecated(const char *deprecated_from_v); - - private: - std::string name_; - bool enabled_ = false; - const char *deprecated_from_v_ = nullptr; -}; -extern GlobalPragma pragma_allow_post_modification, pragma_compute_asm_ltr, pragma_remove_unused_functions; /* * @@ -1674,7 +1610,7 @@ extern GlobalPragma pragma_allow_post_modification, pragma_compute_asm_ltr, prag * */ -int tolk_proceed(const std::string &entrypoint_file_name, std::ostream &outs, std::ostream &errs); +int tolk_proceed(const std::string &entrypoint_file_name); } // namespace tolk From 80001d1756758ccaf65d890397aeb25f7fcb2604 Mon Sep 17 00:00:00 2001 From: tolk-vm Date: Thu, 31 Oct 2024 11:03:33 +0400 Subject: [PATCH 06/12] [Tolk] Implement AST: intermediate representation of tolk files Now, the whole .tolk file can be loaded as AST tree and then converted to Expr/Op. This gives a great ability to implement AST transformations. In the future, more and more code analysis will be moved out of legacy to AST-level. --- tolk/CMakeLists.txt | 4 +- tolk/abscode.cpp | 14 +- tolk/analyzer.cpp | 2 +- tolk/ast-from-tokens.cpp | 877 +++++++++++++++++ tolk/ast-from-tokens.h | 27 + tolk/ast-replacer.h | 155 +++ tolk/ast-stringifier.h | 233 +++++ tolk/ast-to-legacy.cpp | 1438 +++++++++++++++++++++++++++ tolk/ast-to-legacy.h | 28 + tolk/ast-visitor.h | 199 ++++ tolk/ast.cpp | 70 ++ tolk/ast.h | 567 +++++++++++ tolk/builtins.cpp | 23 +- tolk/gen-abscode.cpp | 32 +- tolk/lexer.cpp | 19 +- tolk/lexer.h | 9 +- tolk/parse-tolk.cpp | 1983 -------------------------------------- tolk/symtable.cpp | 8 +- tolk/symtable.h | 18 +- tolk/tolk.cpp | 11 +- tolk/tolk.h | 172 +--- tolk/type-expr.h | 140 +++ tolk/unify-types.cpp | 2 - 23 files changed, 3798 insertions(+), 2233 deletions(-) create mode 100644 tolk/ast-from-tokens.cpp create mode 100644 tolk/ast-from-tokens.h create mode 100644 tolk/ast-replacer.h create mode 100644 tolk/ast-stringifier.h create mode 100644 tolk/ast-to-legacy.cpp create mode 100644 tolk/ast-to-legacy.h create mode 100644 tolk/ast-visitor.h create mode 100644 tolk/ast.cpp create mode 100644 tolk/ast.h delete mode 100644 tolk/parse-tolk.cpp create mode 100644 tolk/type-expr.h diff --git a/tolk/CMakeLists.txt b/tolk/CMakeLists.txt index a47c76147..5306354de 100644 --- a/tolk/CMakeLists.txt +++ b/tolk/CMakeLists.txt @@ -5,8 +5,10 @@ set(TOLK_SOURCE lexer.cpp symtable.cpp compiler-state.cpp + ast.cpp + ast-from-tokens.cpp + ast-to-legacy.cpp unify-types.cpp - parse-tolk.cpp abscode.cpp gen-abscode.cpp analyzer.cpp diff --git a/tolk/abscode.cpp b/tolk/abscode.cpp index c028a5314..0702b1b9d 100644 --- a/tolk/abscode.cpp +++ b/tolk/abscode.cpp @@ -25,8 +25,8 @@ namespace tolk { * */ -TmpVar::TmpVar(var_idx_t _idx, int _cls, TypeExpr* _type, SymDef* sym, SrcLocation loc) - : v_type(_type), idx(_idx), cls(_cls), coord(0), where(loc) { +TmpVar::TmpVar(var_idx_t _idx, bool _is_tmp_unnamed, TypeExpr* _type, SymDef* sym, SrcLocation loc) + : v_type(_type), idx(_idx), is_tmp_unnamed(_is_tmp_unnamed), coord(0), where(loc) { if (sym) { name = sym->sym_idx; sym->value->idx = _idx; @@ -59,9 +59,9 @@ void TmpVar::dump(std::ostream& os) const { } void TmpVar::show(std::ostream& os, int omit_idx) const { - if (cls & _Named) { + if (!is_tmp_unnamed) { os << G.symbols.get_name(name); - if (omit_idx && (omit_idx >= 2 || (cls & _UniqueName))) { + if (omit_idx >= 2) { return; } } @@ -474,8 +474,8 @@ void CodeBlob::print(std::ostream& os, int flags) const { os << "-------- END ---------\n\n"; } -var_idx_t CodeBlob::create_var(int cls, TypeExpr* var_type, SymDef* sym, SrcLocation location) { - vars.emplace_back(var_cnt, cls, var_type, sym, location); +var_idx_t CodeBlob::create_var(bool is_tmp_unnamed, TypeExpr* var_type, SymDef* sym, SrcLocation location) { + vars.emplace_back(var_cnt, is_tmp_unnamed, var_type, sym, location); if (sym) { sym->value->idx = var_cnt; } @@ -492,7 +492,7 @@ bool CodeBlob::import_params(FormalArgList arg_list) { SymDef* arg_sym; SrcLocation arg_loc; std::tie(arg_type, arg_sym, arg_loc) = par; - list.push_back(create_var(arg_sym ? (TmpVar::_In | TmpVar::_Named) : TmpVar::_In, arg_type, arg_sym, arg_loc)); + list.push_back(create_var(arg_sym == nullptr, arg_type, arg_sym, arg_loc)); } emplace_back(loc, Op::_Import, list); in_var_cnt = var_cnt; diff --git a/tolk/analyzer.cpp b/tolk/analyzer.cpp index 91b66ae91..cefa83b9c 100644 --- a/tolk/analyzer.cpp +++ b/tolk/analyzer.cpp @@ -46,7 +46,7 @@ int CodeBlob::split_vars(bool strict) { if (k != 1) { var.coord = ~((n << 8) + k); for (int i = 0; i < k; i++) { - auto v = create_var(vars[j].cls, comp_types[i], 0, vars[j].where); + auto v = create_var(vars[j].is_tmp_unnamed, comp_types[i], 0, vars[j].where); tolk_assert(v == n + i); tolk_assert(vars[v].idx == v); vars[v].name = vars[j].name; diff --git a/tolk/ast-from-tokens.cpp b/tolk/ast-from-tokens.cpp new file mode 100644 index 000000000..386576843 --- /dev/null +++ b/tolk/ast-from-tokens.cpp @@ -0,0 +1,877 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "ast-from-tokens.h" +#include "ast.h" +#include "platform-utils.h" +#include "type-expr.h" + +/* + * Here we construct AST for a tolk file. + * While constructing, no global state is modified. + * Historically, in FunC, there was no AST: while lexing, symbols were registered, types were inferred, and so on. + * There was no way to perform any more or less semantic analysis. + * Implementing AST gives a giant advance for future modifications and stability. + */ + +namespace tolk { + +// given a token, determine whether it's <, or >, or similar +static bool is_comparison_binary_op(TokenType tok) { + return tok == tok_lt || tok == tok_gt || tok == tok_leq || tok == tok_geq || tok == tok_eq || tok == tok_neq || tok == tok_spaceship; +} + +// same as above, but to detect bitwise operators: & | ^ +// (in Tolk, they are used as logical ones due to absence of a boolean type and && || operators) +static bool is_bitwise_binary_op(TokenType tok) { + return tok == tok_bitwise_and || tok == tok_bitwise_or || tok == tok_bitwise_xor; +} + +// same as above, but to detect addition/subtraction +static bool is_add_or_sub_binary_op(TokenType tok) { + return tok == tok_plus || tok == tok_minus; +} + +// fire an error for a case "flags & 0xFF != 0" (equivalent to "flags & 1", probably unexpected) +// it would better be a warning, but we decided to make it a strict error +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_lower_precedence(SrcLocation loc, std::string_view op_lower, std::string_view op_higher) { + std::string name_lower = static_cast(op_lower); + std::string name_higher = static_cast(op_higher); + throw ParseError(loc, name_lower + " has lower precedence than " + name_higher + + ", probably this code won't work as you expected. " + "Use parenthesis: either (... " + name_lower + " ...) to evaluate it first, or (... " + name_higher + " ...) to suppress this error."); +} + +// fire an error for a case "arg1 & arg2 | arg3" +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_mix_bitwise_and_or(SrcLocation loc, std::string_view op1, std::string_view op2) { + std::string name1 = static_cast(op1); + std::string name2 = static_cast(op2); + throw ParseError(loc, "mixing " + name1 + " with " + name2 + " without parenthesis" + ", probably this code won't work as you expected. " + "Use parenthesis to emphasize operator precedence."); +} + +// diagnose when bitwise operators are used in a probably wrong way due to tricky precedence +// example: "flags & 0xFF != 0" is equivalent to "flags & 1", most likely it's unexpected +// the only way to suppress this error for the programmer is to use parenthesis +// (how do we detect presence of parenthesis? simple: (0!=1) is ast_parenthesized_expr{ast_binary_operator}, +// that's why if rhs->type == ast_binary_operator, it's not surrounded by parenthesis) +static void diagnose_bitwise_precedence(SrcLocation loc, std::string_view operator_name, AnyV lhs, AnyV rhs) { + // handle "flags & 0xFF != 0" (rhs = "0xFF != 0") + if (rhs->type == ast_binary_operator && is_comparison_binary_op(rhs->as()->tok)) { + fire_error_lower_precedence(loc, operator_name, rhs->as()->operator_name); + } + + // handle "0 != flags & 0xFF" (lhs = "0 != flags") + if (lhs->type == ast_binary_operator && is_comparison_binary_op(lhs->as()->tok)) { + fire_error_lower_precedence(loc, operator_name, lhs->as()->operator_name); + } + + // handle "arg1 & arg2 | arg3" (lhs = "arg1 & arg2") + if (lhs->type == ast_binary_operator && is_bitwise_binary_op(lhs->as()->tok) && lhs->as()->operator_name != operator_name) { + fire_error_mix_bitwise_and_or(loc, lhs->as()->operator_name, operator_name); + } +} + +// diagnose "a << 8 + 1" (equivalent to "a << 9", probably unexpected) +static void diagnose_addition_in_bitshift(SrcLocation loc, std::string_view bitshift_operator_name, AnyV rhs) { + if (rhs->type == ast_binary_operator && is_add_or_sub_binary_op(rhs->as()->tok)) { + fire_error_lower_precedence(loc, bitshift_operator_name, rhs->as()->operator_name); + } +} + +/* + * + * PARSE SOURCE + * + */ + +// TE ::= TA | TA -> TE +// TA ::= int | ... | cont | var | _ | () | ( TE { , TE } ) | [ TE { , TE } ] +TypeExpr* parse_type(Lexer& lex, V forall_list); + +TypeExpr* parse_type1(Lexer& lex, V forall_list) { + switch (lex.tok()) { + case tok_int: + lex.next(); + return TypeExpr::new_atomic(TypeExpr::_Int); + case tok_cell: + lex.next(); + return TypeExpr::new_atomic(TypeExpr::_Cell); + case tok_slice: + lex.next(); + return TypeExpr::new_atomic(TypeExpr::_Slice); + case tok_builder: + lex.next(); + return TypeExpr::new_atomic(TypeExpr::_Builder); + case tok_cont: + lex.next(); + return TypeExpr::new_atomic(TypeExpr::_Cont); + case tok_tuple: + lex.next(); + return TypeExpr::new_atomic(TypeExpr::_Tuple); + case tok_var: + case tok_underscore: + lex.next(); + return TypeExpr::new_hole(); + case tok_identifier: { + if (int idx = forall_list ? forall_list->lookup_idx(lex.cur_str()) : -1; idx != -1) { + lex.next(); + return forall_list->get_item(idx)->created_type; + } + lex.error("Is not a type identifier"); + } + default: + break; + } + TokenType c; + if (lex.tok() == tok_opbracket) { + lex.next(); + c = tok_clbracket; + } else { + lex.expect(tok_oppar, ""); + c = tok_clpar; + } + if (lex.tok() == c) { + lex.next(); + return c == tok_clpar ? TypeExpr::new_unit() : TypeExpr::new_tuple({}); + } + auto t1 = parse_type(lex, forall_list); + if (lex.tok() == tok_clpar) { + lex.expect(c, c == tok_clpar ? "')'" : "']'"); + return t1; + } + std::vector tlist{1, t1}; + while (lex.tok() == tok_comma) { + lex.next(); + tlist.push_back(parse_type(lex, forall_list)); + } + lex.expect(c, c == tok_clpar ? "')'" : "']'"); + return c == tok_clpar ? TypeExpr::new_tensor(std::move(tlist)) : TypeExpr::new_tuple(std::move(tlist)); +} + +TypeExpr* parse_type(Lexer& lex, V forall_list) { + TypeExpr* res = parse_type1(lex, forall_list); + if (lex.tok() == tok_mapsto) { + lex.next(); + TypeExpr* to = parse_type(lex, forall_list); + return TypeExpr::new_map(res, to); + } + return res; +} + +AnyV parse_argument(Lexer& lex, V forall_list) { + TypeExpr* arg_type = nullptr; + SrcLocation loc = lex.cur_location(); + if (lex.tok() == tok_underscore) { + lex.next(); + if (lex.tok() == tok_comma || lex.tok() == tok_clpar) { + return createV(loc, "", TypeExpr::new_hole()); + } + arg_type = TypeExpr::new_hole(); + loc = lex.cur_location(); + } else if (lex.tok() != tok_identifier) { // int, cell, [X], etc. + arg_type = parse_type(lex, forall_list); + } else if (lex.tok() == tok_identifier) { + if (forall_list && forall_list->lookup_idx(lex.cur_str()) != -1) { + arg_type = parse_type(lex, forall_list); + } else { + arg_type = TypeExpr::new_hole(); + } + } else { + lex.error("Is not a type identifier"); + } + if (lex.tok() == tok_underscore || lex.tok() == tok_comma || lex.tok() == tok_clpar) { + if (lex.tok() == tok_underscore) { + loc = lex.cur_location(); + lex.next(); + } + return createV(loc, "", arg_type); + } + lex.check(tok_identifier, "parameter name"); + loc = lex.cur_location(); + std::string_view arg_name = lex.cur_str(); + lex.next(); + return createV(loc, arg_name, arg_type); +} + +AnyV parse_global_var_declaration(Lexer& lex) { + TypeExpr* declared_type = nullptr; + SrcLocation loc = lex.cur_location(); + if (lex.tok() == tok_underscore) { + lex.next(); + declared_type = TypeExpr::new_hole(); + loc = lex.cur_location(); + } else if (lex.tok() != tok_identifier) { + declared_type = parse_type(lex, nullptr); + } + lex.check(tok_identifier, "global variable name"); + std::string_view var_name = lex.cur_str(); + lex.next(); + return createV(loc, var_name, declared_type); +} + +AnyV parse_expr(Lexer& lex); + +AnyV parse_constant_declaration(Lexer& lex) { + TypeExpr *declared_type = nullptr; + if (lex.tok() == tok_int) { + declared_type = TypeExpr::new_atomic(TypeExpr::_Int); + lex.next(); + } else if (lex.tok() == tok_slice) { + declared_type = TypeExpr::new_atomic(TypeExpr::_Slice); + lex.next(); + } + lex.check(tok_identifier, "constant name"); + SrcLocation loc = lex.cur_location(); + std::string_view const_name = lex.cur_str(); + lex.next(); + lex.expect(tok_assign, "'='"); + AnyV init_value = parse_expr(lex); + return createV(loc, const_name, declared_type, init_value); +} + +AnyV parse_argument_list(Lexer& lex, V forall_list) { + SrcLocation loc = lex.cur_location(); + std::vector args; + lex.expect(tok_oppar, "argument list"); + if (lex.tok() != tok_clpar) { + args.push_back(parse_argument(lex, forall_list)); + while (lex.tok() == tok_comma) { + lex.next(); + args.push_back(parse_argument(lex, forall_list)); + } + } + lex.expect(tok_clpar, "')'"); + return createV(loc, std::move(args)); +} + +AnyV parse_constant_declaration_list(Lexer& lex) { + std::vector consts; + SrcLocation loc = lex.cur_location(); + lex.expect(tok_const, "'const'"); + while (true) { + consts.push_back(parse_constant_declaration(lex)); + if (lex.tok() != tok_comma) { + break; + } + lex.expect(tok_comma, "','"); + } + lex.expect(tok_semicolon, "';'"); + return createV(loc, std::move(consts)); +} + +AnyV parse_global_var_declaration_list(Lexer& lex) { + std::vector globals; + SrcLocation loc = lex.cur_location(); + lex.expect(tok_global, "'global'"); + while (true) { + globals.push_back(parse_global_var_declaration(lex)); + if (lex.tok() != tok_comma) { + break; + } + lex.expect(tok_comma, "','"); + } + lex.expect(tok_semicolon, "';'"); + return createV(loc, std::move(globals)); +} + +// parse ( E { , E } ) | () | [ E { , E } ] | [] | id | num | _ +AnyV parse_expr100(Lexer& lex) { + SrcLocation loc = lex.cur_location(); + if (lex.tok() == tok_oppar) { + lex.next(); + if (lex.tok() == tok_clpar) { + lex.next(); + return createV(loc, {}); + } + AnyV res = parse_expr(lex); + if (lex.tok() == tok_clpar) { + lex.next(); + return createV(loc, res); + } + std::vector items; + bool is_type_expression = res->type == ast_type_expression; // to differ `(a,b)` and `(int,slice)` + items.emplace_back(res); + while (lex.tok() == tok_comma) { + lex.next(); + AnyV item = parse_expr(lex); + if (is_type_expression != (item->type == ast_type_expression)) { + lex.error("mixing type and non-type expressions inside the same tuple"); + } + items.emplace_back(item); + } + lex.expect(tok_clpar, "')'"); + if (is_type_expression) { + std::vector types; + types.reserve(items.size()); + for (AnyV item : items) { + types.emplace_back(item->as()->declared_type); + } + return createV(loc, TypeExpr::new_tensor(std::move(types))); + } + return createV(loc, std::move(items)); + } + if (lex.tok() == tok_opbracket) { + lex.next(); + if (lex.tok() == tok_clbracket) { + lex.next(); + return createV(loc, {}); + } + AnyV res = parse_expr(lex); + std::vector items; + bool is_type_expression = res->type == ast_type_expression; // to differ `(a,b)` and `(int,slice)` + items.emplace_back(res); + while (lex.tok() == tok_comma) { + lex.next(); + AnyV item = parse_expr(lex); + if (is_type_expression != (item->type == ast_type_expression)) { + lex.error("mixing type and non-type expressions inside the same tuple"); + } + items.emplace_back(item); + } + lex.expect(tok_clbracket, "']'"); + if (is_type_expression) { + std::vector types; + types.reserve(items.size()); + for (AnyV item : items) { + types.emplace_back(item->as()->declared_type); + } + return createV(loc, TypeExpr::new_tuple(TypeExpr::new_tensor(std::move(types)))); + } + return createV(loc, std::move(items)); + } + TokenType t = lex.tok(); + if (t == tok_int_const) { + std::string_view int_val = lex.cur_str(); + lex.next(); + return createV(loc, int_val); + } + if (t == tok_string_const) { + std::string_view str_val = lex.cur_str(); + lex.next(); + char modifier = 0; + if (lex.tok() == tok_string_modifier) { + modifier = lex.cur_str()[0]; + lex.next(); + } + return createV(loc, str_val, modifier); + } + if (t == tok_underscore) { + lex.next(); + return createV(loc); + } + if (t == tok_var) { + lex.next(); + return createV(loc, TypeExpr::new_hole()); + } + if (t == tok_int || t == tok_cell || t == tok_slice || t == tok_builder || t == tok_cont || t == tok_tuple) { + lex.next(); + return createV(loc, TypeExpr::new_atomic(t)); + } + if (t == tok_true || t == tok_false) { + lex.next(); + return createV(loc, t == tok_true); + } + if (t == tok_nil) { + lex.next(); + return createV(loc); + } + if (t == tok_identifier) { + std::string_view str_val = lex.cur_str(); + lex.next(); + return createV(loc, str_val); + } + lex.expect(tok_identifier, "identifier"); + return nullptr; +} + +// parse E { E } +AnyV parse_expr90(Lexer& lex) { + AnyV res = parse_expr100(lex); + while (lex.tok() == tok_oppar || lex.tok() == tok_opbracket || (lex.tok() == tok_identifier && lex.cur_str()[0] != '.' && lex.cur_str()[0] != '~')) { + if (const auto* v_type_expr = res->try_as()) { + AnyV dest = parse_expr100(lex); + return createV(v_type_expr->loc, v_type_expr->declared_type, dest); + } else { + AnyV arg = parse_expr100(lex); + return createV(res->loc, res, arg); + } + } + return res; +} + +// parse E { .method E | ~method E } +AnyV parse_expr80(Lexer& lex) { + AnyV lhs = parse_expr90(lex); + while (lex.tok() == tok_identifier && (lex.cur_str()[0] == '.' || lex.cur_str()[0] == '~')) { + std::string_view method_name = lex.cur_str(); + SrcLocation loc = lex.cur_location(); + lex.next(); + const ASTNodeBase *arg = parse_expr100(lex); + lhs = createV(loc, method_name, lhs, arg); + } + return lhs; +} + +// parse [ ~ | - | + ] E +AnyV parse_expr75(Lexer& lex) { + TokenType t = lex.tok(); + if (t == tok_bitwise_not || t == tok_minus || t == tok_plus) { + SrcLocation loc = lex.cur_location(); + std::string_view operator_name = lex.cur_str(); + lex.next(); + AnyV rhs = parse_expr75(lex); + return createV(loc, operator_name, t, rhs); + } else { + return parse_expr80(lex); + } +} + +// parse E { (* | / | % | /% | ^/ | ~/ | ^% | ~% ) E } +AnyV parse_expr30(Lexer& lex) { + AnyV lhs = parse_expr75(lex); + TokenType t = lex.tok(); + while (t == tok_mul || t == tok_div || t == tok_mod || t == tok_divmod || t == tok_divC || + t == tok_divR || t == tok_modC || t == tok_modR) { + SrcLocation loc = lex.cur_location(); + std::string_view operator_name = lex.cur_str(); + lex.next(); + AnyV rhs = parse_expr75(lex); + lhs = createV(loc, operator_name, t, lhs, rhs); + t = lex.tok(); + } + return lhs; +} + +// parse E { (+ | -) E } +AnyV parse_expr20(Lexer& lex) { + AnyV lhs = parse_expr30(lex); + TokenType t = lex.tok(); + while (t == tok_minus || t == tok_plus) { + SrcLocation loc = lex.cur_location(); + std::string_view operator_name = lex.cur_str(); + lex.next(); + AnyV rhs = parse_expr30(lex); + lhs = createV(loc, operator_name, t, lhs, rhs); + t = lex.tok(); + } + return lhs; +} + +// parse E { ( << | >> | ~>> | ^>> ) E } +AnyV parse_expr17(Lexer& lex) { + AnyV lhs = parse_expr20(lex); + TokenType t = lex.tok(); + while (t == tok_lshift || t == tok_rshift || t == tok_rshiftC || t == tok_rshiftR) { + SrcLocation loc = lex.cur_location(); + std::string_view operator_name = lex.cur_str(); + lex.next(); + AnyV rhs = parse_expr20(lex); + diagnose_addition_in_bitshift(loc, operator_name, rhs); + lhs = createV(loc, operator_name, t, lhs, rhs); + t = lex.tok(); + } + return lhs; +} + +// parse E [ (== | < | > | <= | >= | != | <=> ) E ] +AnyV parse_expr15(Lexer& lex) { + AnyV lhs = parse_expr17(lex); + TokenType t = lex.tok(); + if (t == tok_eq || t == tok_lt || t == tok_gt || t == tok_leq || t == tok_geq || t == tok_neq || t == tok_spaceship) { + SrcLocation loc = lex.cur_location(); + std::string_view operator_name = lex.cur_str(); + lex.next(); + AnyV rhs = parse_expr17(lex); + lhs = createV(loc, operator_name, t, lhs, rhs); + } + return lhs; +} + +// parse E { ( & | `|` | ^ ) E } +AnyV parse_expr14(Lexer& lex) { + AnyV lhs = parse_expr15(lex); + TokenType t = lex.tok(); + while (t == tok_bitwise_and || t == tok_bitwise_or || t == tok_bitwise_xor) { + SrcLocation loc = lex.cur_location(); + std::string_view operator_name = lex.cur_str(); + lex.next(); + AnyV rhs = parse_expr15(lex); + diagnose_bitwise_precedence(loc, operator_name, lhs, rhs); + lhs = createV(loc, operator_name, t, lhs, rhs); + t = lex.tok(); + } + return lhs; +} + +// parse E [ ? E : E ] +AnyV parse_expr13(Lexer& lex) { + AnyV res = parse_expr14(lex); + if (lex.tok() == tok_question) { + SrcLocation loc = lex.cur_location(); + lex.next(); + AnyV when_true = parse_expr(lex); + lex.expect(tok_colon, "':'"); + AnyV when_false = parse_expr13(lex); + return createV(loc, res, when_true, when_false); + } + return res; +} + +// parse LE1 (= | += | -= | ... ) E2 +AnyV parse_expr10(Lexer& lex) { + AnyV lhs = parse_expr13(lex); + TokenType t = lex.tok(); + if (t == tok_set_plus || t == tok_set_minus || t == tok_set_mul || t == tok_set_div || t == tok_set_divR || t == tok_set_divC || + t == tok_set_mod || t == tok_set_modC || t == tok_set_modR || t == tok_set_lshift || t == tok_set_rshift || t == tok_set_rshiftC || + t == tok_set_rshiftR || t == tok_set_bitwise_and || t == tok_set_bitwise_or || t == tok_set_bitwise_xor || + t == tok_assign) { + SrcLocation loc = lex.cur_location(); + std::string_view operator_name = lex.cur_str(); + lex.next(); + AnyV rhs = parse_expr10(lex); + return createV(loc, operator_name, t, lhs, rhs); + } + return lhs; +} + +AnyV parse_expr(Lexer& lex) { + return parse_expr10(lex); +} + +AnyV parse_return_stmt(Lexer& lex) { + SrcLocation loc = lex.cur_location(); + lex.expect(tok_return, "'return'"); + AnyV child = parse_expr(lex); + lex.expect(tok_semicolon, "';'"); + return createV(loc, child); +} + +AnyV parse_statement(Lexer& lex); + +V parse_sequence(Lexer& lex) { + SrcLocation loc = lex.cur_location(); + lex.expect(tok_opbrace, "'{'"); + std::vector items; + while (lex.tok() != tok_clbrace) { + items.push_back(parse_statement(lex)); + } + SrcLocation loc_end = lex.cur_location(); + lex.expect(tok_clbrace, "'}'"); + return createV(loc, loc_end, items); +} + +AnyV parse_repeat_statement(Lexer& lex) { + SrcLocation loc = lex.cur_location(); + lex.expect(tok_repeat, "'repeat'"); + AnyV cond = parse_expr(lex); + V body = parse_sequence(lex); + return createV(loc, cond, body); +} + +AnyV parse_while_statement(Lexer& lex) { + SrcLocation loc = lex.cur_location(); + lex.expect(tok_while, "'while'"); + AnyV cond = parse_expr(lex); + V body = parse_sequence(lex); + return createV(loc, cond, body); +} + +ASTNodeBase* parse_do_until_statement(Lexer& lex) { + SrcLocation loc = lex.cur_location(); + lex.expect(tok_do, "'do'"); + V body = parse_sequence(lex); + lex.expect(tok_until, "'until'"); + AnyV cond = parse_expr(lex); + return createV(loc, body, cond); +} + +AnyV parse_try_catch_statement(Lexer& lex) { + SrcLocation loc = lex.cur_location(); + lex.expect(tok_try, "'try'"); + V try_body = parse_sequence(lex); + lex.expect(tok_catch, "'catch'"); + AnyV catch_expr = parse_expr(lex); + V catch_body = parse_sequence(lex); + return createV(loc, try_body, catch_expr, catch_body); +} + +AnyV parse_if_statement(Lexer& lex, bool is_ifnot) { + SrcLocation loc = lex.cur_location(); + lex.next(); + AnyV cond = parse_expr(lex); + V if_body = parse_sequence(lex); + V else_body = nullptr; + if (lex.tok() == tok_else) { + lex.next(); + else_body = parse_sequence(lex); + } else if (lex.tok() == tok_elseif) { + AnyV v_inner_if = parse_if_statement(lex, false); + else_body = createV(v_inner_if->loc, lex.cur_location(), {v_inner_if}); + } else if (lex.tok() == tok_elseifnot) { + AnyV v_inner_if = parse_if_statement(lex, true); + else_body = createV(v_inner_if->loc, lex.cur_location(), {v_inner_if}); + } else { + else_body = createV(lex.cur_location(), lex.cur_location(), {}); + } + return createV(loc, is_ifnot, cond, if_body, else_body); +} + +AnyV parse_statement(Lexer& lex) { + switch (lex.tok()) { + case tok_return: + return parse_return_stmt(lex); + case tok_opbrace: + return parse_sequence(lex); + case tok_repeat: + return parse_repeat_statement(lex); + case tok_if: + return parse_if_statement(lex, false); + case tok_ifnot: + return parse_if_statement(lex, true); + case tok_do: + return parse_do_until_statement(lex); + case tok_while: + return parse_while_statement(lex); + case tok_try: + return parse_try_catch_statement(lex); + case tok_semicolon: { + lex.next(); + return createV; + } + default: { + AnyV expr = parse_expr(lex); + lex.expect(tok_semicolon, "';'"); + return expr; + } + } +} + +AnyV parse_func_body(Lexer& lex) { + return parse_sequence(lex); +} + +AnyV parse_asm_func_body(Lexer& lex, V arg_list) { + SrcLocation loc = lex.cur_location(); + lex.expect(tok_asm, "'asm'"); + size_t n_args = arg_list->size(); + if (n_args > 16) { + throw ParseError{loc, "assembler built-in function can have at most 16 arguments"}; + } + std::vector arg_order, ret_order; + if (lex.tok() == tok_oppar) { + lex.next(); + while (lex.tok() == tok_identifier || lex.tok() == tok_int_const) { + int arg_idx = arg_list->lookup_idx(lex.cur_str()); + if (arg_idx == -1) { + lex.error("argument name expected"); + } + arg_order.push_back(arg_idx); + lex.next(); + } + if (lex.tok() == tok_mapsto) { + lex.next(); + while (lex.tok() == tok_int_const) { + int ret_idx = std::atoi(static_cast(lex.cur_str()).c_str()); + ret_order.push_back(ret_idx); + lex.next(); + } + } + lex.expect(tok_clpar, "')'"); + } + std::vector asm_commands; + lex.check(tok_string_const, "\"ASM COMMAND\""); + while (lex.tok() == tok_string_const) { + std::string_view asm_command = lex.cur_str(); + asm_commands.push_back(createV(lex.cur_location(), asm_command, 0)); + lex.next(); + } + lex.expect(tok_semicolon, "';'"); + return createV(loc, std::move(arg_order), std::move(ret_order), std::move(asm_commands)); +} + +AnyV parse_forall(Lexer& lex) { + SrcLocation loc = lex.cur_location(); + std::vector forall_items; + lex.expect(tok_forall, "'forall'"); + int idx = 0; + while (true) { + lex.check(tok_identifier, "T expected"); + std::string_view nameT = lex.cur_str(); + TypeExpr* type = TypeExpr::new_var(idx++); + forall_items.emplace_back(createV(lex.cur_location(), type, static_cast(nameT))); + lex.next(); + if (lex.tok() != tok_comma) { + break; + } + lex.next(); + } + lex.expect(tok_mapsto, "'->'"); + return createV{loc, std::move(forall_items)}; +} + +AnyV parse_function_declaration(Lexer& lex) { + SrcLocation loc = lex.cur_location(); + V forall_list = nullptr; + bool is_get_method = false; + bool is_builtin = false; + bool marked_as_inline = false; + bool marked_as_inline_ref = false; + if (lex.tok() == tok_forall) { + forall_list = parse_forall(lex)->as(); + } else if (lex.tok() == tok_get) { + is_get_method = true; + lex.next(); + } + TypeExpr* ret_type = parse_type(lex, forall_list); + lex.check(tok_identifier, "function name identifier expected"); + std::string func_name = static_cast(lex.cur_str()); + lex.next(); + V arg_list = parse_argument_list(lex, forall_list)->as(); + bool marked_as_pure = false; + if (lex.tok() == tok_impure) { + static bool warning_shown = false; + if (!warning_shown) { + lex.cur_location().show_warning("`impure` specifier is deprecated. All functions are impure by default, use `pure` to mark a function as pure"); + warning_shown = true; + } + lex.next(); + } else if (lex.tok() == tok_pure) { + marked_as_pure = true; + lex.next(); + } + if (lex.tok() == tok_inline) { + marked_as_inline = true; + lex.next(); + } else if (lex.tok() == tok_inlineref) { + marked_as_inline_ref = true; + lex.next(); + } + V method_id = nullptr; + if (lex.tok() == tok_method_id) { + if (is_get_method) { + lex.error("both `get` and `method_id` are not allowed"); + } + lex.next(); + if (lex.tok() == tok_oppar) { // method_id(N) + lex.next(); + lex.check(tok_int_const, "number"); + std::string_view int_val = lex.cur_str(); + method_id = createV(lex.cur_location(), int_val); + lex.next(); + lex.expect(tok_clpar, "')'"); + } else { + static bool warning_shown = false; + if (!warning_shown) { + lex.cur_location().show_warning("`method_id` specifier is deprecated, use `get` keyword.\nExample: `get int seqno() { ... }`"); + warning_shown = true; + } + is_get_method = true; + } + } + + AnyV body = nullptr; + + if (lex.tok() == tok_builtin) { + is_builtin = true; + body = createV; + lex.next(); + lex.expect(tok_semicolon, "';'"); + } else if (lex.tok() == tok_semicolon) { + // todo this is just a prototype, remove this "feature" in the future + lex.next(); + body = createV; + } else if (lex.tok() == tok_opbrace) { + body = parse_func_body(lex); + } else if (lex.tok() == tok_asm) { + body = parse_asm_func_body(lex, arg_list); + } else { + lex.expect(tok_opbrace, "function body block"); + } + + auto f_declaration = createV(loc, func_name, arg_list, body); + f_declaration->ret_type = ret_type; + f_declaration->forall_list = forall_list; + f_declaration->marked_as_pure = marked_as_pure; + f_declaration->marked_as_get_method = is_get_method; + f_declaration->marked_as_builtin = is_builtin; + f_declaration->marked_as_inline = marked_as_inline; + f_declaration->marked_as_inline_ref = marked_as_inline_ref; + f_declaration->method_id = method_id; + return f_declaration; +} + +AnyV parse_pragma(Lexer& lex) { + SrcLocation loc = lex.cur_location(); + lex.next_special(tok_pragma_name, "pragma name"); + std::string_view pragma_name = lex.cur_str(); + if (pragma_name == "version") { + lex.next(); + TokenType cmp_tok = lex.tok(); + bool valid = cmp_tok == tok_gt || cmp_tok == tok_geq || cmp_tok == tok_lt || cmp_tok == tok_leq || cmp_tok == tok_eq || cmp_tok == tok_bitwise_xor; + if (!valid) { + lex.error("invalid comparison operator"); + } + lex.next_special(tok_semver, "semver"); + std::string_view semver = lex.cur_str(); + lex.next(); + lex.expect(tok_semicolon, "';'"); + return createV(loc, cmp_tok, semver); + } + lex.next(); + lex.expect(tok_semicolon, "';'"); + return createV(loc, pragma_name); +} + +AnyV parse_include_statement(Lexer& lex) { + SrcLocation loc = lex.cur_location(); + lex.expect(tok_include, "#include"); + lex.check(tok_string_const, "source file name"); + std::string_view rel_filename = lex.cur_str(); + if (rel_filename.empty()) { + lex.error("imported file name is an empty string"); + } + lex.next(); + lex.expect(tok_semicolon, "';'"); + return createV(loc, rel_filename); +} + +// the main (exported) function +AnyV parse_src_file_to_ast(SrcFile* file) { + file->was_parsed = true; + + std::vector toplevel_declarations; + Lexer lex(file); + while (!lex.is_eof()) { + if (lex.tok() == tok_pragma) { + toplevel_declarations.push_back(parse_pragma(lex)); + } else if (lex.tok() == tok_include) { + toplevel_declarations.push_back(parse_include_statement(lex)); + } else if (lex.tok() == tok_global) { + toplevel_declarations.push_back(parse_global_var_declaration_list(lex)); + } else if (lex.tok() == tok_const) { + toplevel_declarations.push_back(parse_constant_declaration_list(lex)); + } else { + toplevel_declarations.push_back(parse_function_declaration(lex)); + } + } + return createV(file, std::move(toplevel_declarations)); +} + +} // namespace tolk diff --git a/tolk/ast-from-tokens.h b/tolk/ast-from-tokens.h new file mode 100644 index 000000000..65b82ad64 --- /dev/null +++ b/tolk/ast-from-tokens.h @@ -0,0 +1,27 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#pragma once + +#include "src-file.h" + +namespace tolk { + +struct ASTNodeBase; + +const ASTNodeBase* parse_src_file_to_ast(SrcFile* file); + +} // namespace tolk diff --git a/tolk/ast-replacer.h b/tolk/ast-replacer.h new file mode 100644 index 000000000..feae56166 --- /dev/null +++ b/tolk/ast-replacer.h @@ -0,0 +1,155 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#pragma once + +#include "ast.h" +#include "platform-utils.h" + +/* + * A module of implementing traversing a vertex tree and replacing any vertex to another. + * For example, to replace "beginCell()" call to "begin_cell()" in a function body (in V) + * regardless of the place this call is performed, you need to iterate over all the function AST, + * to find ast_function_call(beginCell), create ast_function_call(begin_cell) instead and to replace + * a pointer inside its parent. + * Inheriting from ASTVisitor makes this task quite simple, without any boilerplate. + * + * If you need just to traverse a vertex tree without replacing vertices, + * consider another api: ast-visitor.h. + */ + +namespace tolk { + +class ASTReplacer { +protected: + GNU_ATTRIBUTE_ALWAYS_INLINE static AnyV replace_children(const ASTNodeLeaf* v) { + return v; + } + + GNU_ATTRIBUTE_ALWAYS_INLINE AnyV replace_children(const ASTNodeUnary* v) { + auto* v_mutable = const_cast(v); + v_mutable->child = replace(v_mutable->child); + return v_mutable; + } + + GNU_ATTRIBUTE_ALWAYS_INLINE AnyV replace_children(const ASTNodeBinary* v) { + auto* v_mutable = const_cast(v); + v_mutable->lhs = replace(v->lhs); + v_mutable->rhs = replace(v->rhs); + return v_mutable; + } + + GNU_ATTRIBUTE_ALWAYS_INLINE AnyV replace_children(const ASTNodeVararg* v) { + auto* v_mutable = const_cast(v); + for (AnyV& child : v_mutable->children) { + child = replace(child); + } + return v_mutable; + } + +public: + virtual ~ASTReplacer() = default; + + virtual AnyV replace(AnyV v) = 0; +}; + +class ASTReplacerInFunctionBody : public ASTReplacer { +protected: + using parent = ASTReplacerInFunctionBody; + + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + + AnyV replace(AnyV v) final { + switch (v->type) { + case ast_empty: return replace(v->as()); + case ast_identifier: return replace(v->as()); + case ast_int_const: return replace(v->as()); + case ast_string_const: return replace(v->as()); + case ast_bool_const: return replace(v->as()); + case ast_nil_tuple: return replace(v->as()); + case ast_function_call: return replace(v->as()); + case ast_parenthesized_expr: return replace(v->as()); + case ast_underscore: return replace(v->as()); + case ast_type_expression: return replace(v->as()); + case ast_variable_declaration: return replace(v->as()); + case ast_tensor: return replace(v->as()); + case ast_tensor_square: return replace(v->as()); + case ast_dot_tilde_call: return replace(v->as()); + case ast_unary_operator: return replace(v->as()); + case ast_binary_operator: return replace(v->as()); + case ast_ternary_operator: return replace(v->as()); + case ast_return_statement: return replace(v->as()); + case ast_sequence: return replace(v->as()); + case ast_repeat_statement: return replace(v->as()); + case ast_while_statement: return replace(v->as()); + case ast_do_until_statement: return replace(v->as()); + case ast_try_catch_statement: return replace(v->as()); + case ast_if_statement: return replace(v->as()); + case ast_asm_body: return replace(v->as()); + default: + throw UnexpectedASTNodeType(v, "ASTReplacerInFunctionBody::visit"); + } + } + +public: + void start_replacing_in_function(V v) { + replace(v->get_body()); + } +}; + +class ASTReplacerAllFunctionsInFile : public ASTReplacerInFunctionBody { +protected: + using parent = ASTReplacerAllFunctionsInFile; + + virtual bool should_enter_function(V v) = 0; + +public: + void start_replacing_in_file(V v_file) { + for (AnyV v : v_file->get_toplevel_declarations()) { + if (auto v_function = v->try_as()) { + if (should_enter_function(v_function)) { + replace(v_function->get_body()); + } + } + } + } +}; + +} // namespace tolk diff --git a/tolk/ast-stringifier.h b/tolk/ast-stringifier.h new file mode 100644 index 000000000..399017a78 --- /dev/null +++ b/tolk/ast-stringifier.h @@ -0,0 +1,233 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#pragma once + +#ifdef TOLK_DEBUG + +#include "ast.h" +#include "ast-visitor.h" +#include + +/* + * ASTStringifier is used to print out the whole vertex tree in a human-readable format. + * To stringify any vertex, call v->debug_print(), which uses this class. + */ + +namespace tolk { + +class ASTStringifier final : public ASTVisitor { + constexpr static std::pair name_pairs[] = { + {ast_empty, "ast_empty"}, + {ast_identifier, "ast_identifier"}, + {ast_int_const, "ast_int_const"}, + {ast_string_const, "ast_string_const"}, + {ast_bool_const, "ast_bool_const"}, + {ast_nil_tuple, "ast_nil_tuple"}, + {ast_function_call, "ast_function_call"}, + {ast_parenthesized_expr, "ast_parenthesized_expr"}, + {ast_global_var_declaration, "ast_global_var_declaration"}, + {ast_global_var_declaration_list, "ast_global_var_declaration_list"}, + {ast_constant_declaration, "ast_constant_declaration"}, + {ast_constant_declaration_list, "ast_constant_declaration_list"}, + {ast_underscore, "ast_underscore"}, + {ast_type_expression, "ast_type_expression"}, + {ast_variable_declaration, "ast_variable_declaration"}, + {ast_tensor, "ast_tensor"}, + {ast_tensor_square, "ast_tensor_square"}, + {ast_dot_tilde_call, "ast_dot_tilde_call"}, + {ast_unary_operator, "ast_unary_operator"}, + {ast_binary_operator, "ast_binary_operator"}, + {ast_ternary_operator, "ast_ternary_operator"}, + {ast_return_statement, "ast_return_statement"}, + {ast_sequence, "ast_sequence"}, + {ast_repeat_statement, "ast_repeat_statement"}, + {ast_while_statement, "ast_while_statement"}, + {ast_do_until_statement, "ast_do_until_statement"}, + {ast_try_catch_statement, "ast_try_catch_statement"}, + {ast_if_statement, "ast_if_statement"}, + {ast_forall_item, "ast_forall_item"}, + {ast_forall_list, "ast_forall_list"}, + {ast_argument, "ast_argument"}, + {ast_argument_list, "ast_argument_list"}, + {ast_asm_body, "ast_asm_body"}, + {ast_function_declaration, "ast_function_declaration"}, + {ast_pragma_no_arg, "ast_pragma_no_arg"}, + {ast_pragma_version, "ast_pragma_version"}, + {ast_include_statement, "ast_include_statement"}, + {ast_tolk_file, "ast_tolk_file"}, + }; + + template + constexpr static const char* ast_node_type_to_string() { + static_assert(std::size(name_pairs) == ast_tolk_file + 1, "name_pairs needs to be updated"); + return name_pairs[node_type].second; + } + + int depth = 0; + std::string out; + bool colored = false; + + template + void handle_vertex(V v) { + out += std::string(depth * 2, ' '); + out += ast_node_type_to_string(); + if (std::string postfix = specific_str(v); !postfix.empty()) { + out += colored ? " \x1b[34m" : " // "; + out += postfix; + out += colored ? "\x1b[0m" : ""; + } + out += '\n'; + depth++; + visit_children(v); + depth--; + } + + static std::string specific_str(AnyV node) { + switch (node->type) { + case ast_identifier: + return static_cast(node->as()->name); + case ast_int_const: + return static_cast(node->as()->int_val); + case ast_string_const: + if (char modifier = node->as()->modifier) { + return "\"" + static_cast(node->as()->str_val) + "\"" + std::string(1, modifier); + } else { + return "\"" + static_cast(node->as()->str_val) + "\""; + } + case ast_global_var_declaration: + return static_cast(node->as()->var_name); + case ast_constant_declaration: + return static_cast(node->as()->const_name); + case ast_type_expression: { + std::ostringstream os; + os << node->as()->declared_type; + return os.str(); + } + case ast_variable_declaration: { + std::ostringstream os; + os << node->as()->declared_type; + return os.str(); + } + case ast_dot_tilde_call: + return static_cast(node->as()->method_name); + case ast_unary_operator: + return static_cast(node->as()->operator_name); + case ast_binary_operator: + return static_cast(node->as()->operator_name); + case ast_sequence: + return "↓" + std::to_string(node->as()->get_items().size()); + case ast_if_statement: + return node->as()->is_ifnot ? "ifnot" : ""; + case ast_argument: { + std::ostringstream os; + os << node->as()->arg_type; + return static_cast(node->as()->arg_name) + ": " + os.str(); + } + case ast_function_declaration: { + std::string arg_names; + for (int i = 0; i < node->as()->get_num_args(); i++) { + if (!arg_names.empty()) + arg_names += ","; + arg_names += node->as()->get_arg(i)->arg_name; + } + return "fun " + node->as()->name + "(" + arg_names + ")"; + } + case ast_pragma_no_arg: + return static_cast(node->as()->pragma_name); + case ast_pragma_version: + return static_cast(node->as()->semver); + case ast_include_statement: + return static_cast(node->as()->file_name); + case ast_tolk_file: + return node->as()->file->rel_filename; + default: + return {}; + } + } + +public: + explicit ASTStringifier(bool colored) : colored(colored) { + } + + std::string to_string_with_children(AnyV v) { + out.clear(); + visit(v); + return std::move(out); + } + + static std::string to_string_without_children(AnyV v) { + std::string result = ast_node_type_to_string(v->type); + if (std::string postfix = specific_str(v); !postfix.empty()) { + result += ' '; + result += specific_str(v); + } + return result; + } + + static const char* ast_node_type_to_string(ASTNodeType node_type) { + return name_pairs[node_type].second; + } + + void visit(AnyV v) override { + switch (v->type) { + case ast_empty: return handle_vertex(v->as()); + case ast_identifier: return handle_vertex(v->as()); + case ast_int_const: return handle_vertex(v->as()); + case ast_string_const: return handle_vertex(v->as()); + case ast_bool_const: return handle_vertex(v->as()); + case ast_nil_tuple: return handle_vertex(v->as()); + case ast_function_call: return handle_vertex(v->as()); + case ast_parenthesized_expr: return handle_vertex(v->as()); + case ast_global_var_declaration: return handle_vertex(v->as()); + case ast_global_var_declaration_list: return handle_vertex(v->as()); + case ast_constant_declaration: return handle_vertex(v->as()); + case ast_constant_declaration_list: return handle_vertex(v->as()); + case ast_underscore: return handle_vertex(v->as()); + case ast_type_expression: return handle_vertex(v->as()); + case ast_variable_declaration: return handle_vertex(v->as()); + case ast_tensor: return handle_vertex(v->as()); + case ast_tensor_square: return handle_vertex(v->as()); + case ast_dot_tilde_call: return handle_vertex(v->as()); + case ast_unary_operator: return handle_vertex(v->as()); + case ast_binary_operator: return handle_vertex(v->as()); + case ast_ternary_operator: return handle_vertex(v->as()); + case ast_return_statement: return handle_vertex(v->as()); + case ast_sequence: return handle_vertex(v->as()); + case ast_repeat_statement: return handle_vertex(v->as()); + case ast_while_statement: return handle_vertex(v->as()); + case ast_do_until_statement: return handle_vertex(v->as()); + case ast_try_catch_statement: return handle_vertex(v->as()); + case ast_if_statement: return handle_vertex(v->as()); + case ast_forall_item: return handle_vertex(v->as()); + case ast_forall_list: return handle_vertex(v->as()); + case ast_argument: return handle_vertex(v->as()); + case ast_argument_list: return handle_vertex(v->as()); + case ast_asm_body: return handle_vertex(v->as()); + case ast_function_declaration: return handle_vertex(v->as()); + case ast_pragma_no_arg: return handle_vertex(v->as()); + case ast_pragma_version: return handle_vertex(v->as()); + case ast_include_statement: return handle_vertex(v->as()); + case ast_tolk_file: return handle_vertex(v->as()); + default: + throw UnexpectedASTNodeType(v, "ASTStringifier::visit"); + } + } +}; + +} // namespace tolk + +#endif // TOLK_DEBUG diff --git a/tolk/ast-to-legacy.cpp b/tolk/ast-to-legacy.cpp new file mode 100644 index 000000000..8b0e78103 --- /dev/null +++ b/tolk/ast-to-legacy.cpp @@ -0,0 +1,1438 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "ast-to-legacy.h" +#include "ast.h" +#include "ast-visitor.h" +#include "ast-from-tokens.h" // todo should be deleted +#include "compiler-state.h" +#include "src-file.h" +#include "tolk.h" +#include "td/utils/crypto.h" +#include "common/refint.h" +#include "openssl/digest.hpp" +#include "block/block.h" +#include "block-parse.h" + +/* + * In this module, we convert modern AST representation to legacy representation + * (global state, Expr, CodeBlob, etc.) to make the rest of compiling process remain unchanged for now. + * Since time goes, I'll gradually get rid of legacy, since most of the code analysis + * should be done at AST level. + */ + +namespace tolk { + +static int calc_sym_idx(std::string_view sym_name) { + return G.symbols.lookup_add(sym_name); +} + + +Expr* process_expr(AnyV v, CodeBlob& code, bool nv = false); + +static SymValCodeFunc* make_new_glob_func(SymDef* func_sym, TypeExpr* func_type, bool marked_as_pure) { + SymValCodeFunc* res = new SymValCodeFunc{G.glob_func_cnt, func_type, marked_as_pure}; +#ifdef TOLK_DEBUG + res->name = func_sym->name(); +#endif + func_sym->value = res; + G.glob_func.push_back(func_sym); + G.glob_func_cnt++; + return res; +} + +static bool check_global_func(SrcLocation loc, sym_idx_t func_name) { + SymDef* def = lookup_symbol(func_name); + if (!def) { + throw ParseError(loc, "undefined symbol `" + G.symbols.get_name(func_name) + "`"); + return false; + } + SymVal* val = dynamic_cast(def->value); + if (!val) { + throw ParseError(loc, "symbol `" + G.symbols.get_name(func_name) + "` has no value and no type"); + return false; + } else if (!val->get_type()) { + throw ParseError(loc, "symbol `" + G.symbols.get_name(func_name) + "` has no type, possibly not a function"); + return false; + } else { + return true; + } +} + +static Expr* make_func_apply(Expr* fun, Expr* x) { + Expr* res{nullptr}; + if (fun->cls == Expr::_GlobFunc) { + if (x->cls == Expr::_Tensor) { + res = new Expr{Expr::_Apply, fun->sym, x->args}; + } else { + res = new Expr{Expr::_Apply, fun->sym, {x}}; + } + res->flags = Expr::_IsRvalue | (fun->flags & Expr::_IsImpure); + } else { + res = new Expr{Expr::_VarApply, {fun, x}}; + res->flags = Expr::_IsRvalue; + } + return res; +} + +static void check_import_exists_when_using_sym(AnyV v_usage, const SymDef* used_sym) { + if (!v_usage->loc.is_symbol_from_same_or_builtin_file(used_sym->loc)) { + const SrcFile* declared_in = used_sym->loc.get_src_file(); + bool has_import = false; + for (const SrcFile::ImportStatement& import_stmt : v_usage->loc.get_src_file()->imports) { + if (import_stmt.imported_file == declared_in) { + has_import = true; + } + } + if (!has_import) { + v_usage->error("Using a non-imported symbol `" + used_sym->name() + "`. Forgot to import \"" + declared_in->rel_filename + "\"?"); + } + } +} + +Expr* process_expr(V v, CodeBlob& code, bool nv) { + TokenType t = v->tok; + std::string operator_name = static_cast(v->operator_name); + + if (t == tok_set_plus || t == tok_set_minus || t == tok_set_mul || t == tok_set_div || t == tok_set_divR || t == tok_set_divC || + t == tok_set_mod || t == tok_set_modC || t == tok_set_modR || t == tok_set_lshift || t == tok_set_rshift || t == tok_set_rshiftC || + t == tok_set_rshiftR || t == tok_set_bitwise_and || t == tok_set_bitwise_or || t == tok_set_bitwise_xor) { + Expr* x = process_expr(v->get_lhs(), code, nv); + x->chk_lvalue(); + x->chk_rvalue(); + sym_idx_t name = G.symbols.lookup_add("^_" + operator_name + "_"); + check_global_func(v->loc, name); + Expr* y = process_expr(v->get_rhs(), code, false); + y->chk_rvalue(); + Expr* z = new Expr{Expr::_Apply, name, {x, y}}; + z->here = v->loc; + z->set_val(t); + z->flags = Expr::_IsRvalue; + z->deduce_type(); + Expr* res = new Expr{Expr::_Letop, {x->copy(), z}}; + res->here = v->loc; + res->flags = (x->flags & ~Expr::_IsType) | Expr::_IsRvalue; + res->set_val(t); + res->deduce_type(); + return res; + } + if (t == tok_assign) { + Expr* x = process_expr(v->get_lhs(), code, nv); + x->chk_lvalue(); + Expr* y = process_expr(v->get_rhs(), code, false); + y->chk_rvalue(); + x->predefine_vars(); + x->define_new_vars(code); + Expr* res = new Expr{Expr::_Letop, {x, y}}; + res->here = v->loc; + res->flags = (x->flags & ~Expr::_IsType) | Expr::_IsRvalue; + res->set_val(t); + res->deduce_type(); + return res; + } + if (t == tok_minus || t == tok_plus || + t == tok_bitwise_and || t == tok_bitwise_or || t == tok_bitwise_xor || + t == tok_eq || t == tok_lt || t == tok_gt || t == tok_leq || t == tok_geq || t == tok_neq || t == tok_spaceship || + t == tok_lshift || t == tok_rshift || t == tok_rshiftC || t == tok_rshiftR || + t == tok_mul || t == tok_div || t == tok_mod || t == tok_divmod || + t == tok_divC || t == tok_divR || t == tok_modC || t == tok_modR) { + Expr* res = process_expr(v->get_lhs(), code, nv); + res->chk_rvalue(); + sym_idx_t name = G.symbols.lookup_add("_" + operator_name + "_"); + check_global_func(v->loc, name); + Expr* x = process_expr(v->get_rhs(), code, false); + x->chk_rvalue(); + res = new Expr{Expr::_Apply, name, {res, x}}; + res->here = v->loc; + res->set_val(t); + res->flags = Expr::_IsRvalue; + res->deduce_type(); + return res; + } + + v->error("unsupported binary operator"); +} + +Expr* process_expr(V v, CodeBlob& code) { + TokenType t = v->tok; + sym_idx_t name = G.symbols.lookup_add(static_cast(v->operator_name) + "_"); + check_global_func(v->loc, name); + Expr* x = process_expr(v->get_rhs(), code, false); + x->chk_rvalue(); + + // here's an optimization to convert "-1" (tok_minus tok_int_const) to a const -1, not to Expr::Apply(-,1) + // without this, everything still works, but Tolk looses some vars/stack knowledge for now (to be fixed later) + // in FunC, it was: + // `var fst = -1;` // is constantly 1 + // `var snd = - 1;` // is Expr::Apply(-), a comment "snd=1" is lost in stack layout comments, and so on + // hence, when after grammar modification tok_minus became a true unary operator (not a part of a number), + // and thus to preserve existing behavior until compiler parts are completely rewritten, handle this case here + if (x->cls == Expr::_Const) { + if (t == tok_bitwise_not) { + x->intval = ~x->intval; + } else if (t == tok_minus) { + x->intval = -x->intval; + } + if (!x->intval->signed_fits_bits(257)) { + v->error("integer overflow"); + } + return x; + } + + auto res = new Expr{Expr::_Apply, name, {x}}; + res->here = v->loc; + res->set_val(t); + res->flags = Expr::_IsRvalue; + res->deduce_type(); + return res; +} + +Expr* process_expr(V v, CodeBlob& code, bool nv) { + Expr* res = process_expr(v->get_lhs(), code, nv); + bool modify = v->method_name[0] == '~'; + Expr* obj = res; + if (modify) { + obj->chk_lvalue(); + } else { + obj->chk_rvalue(); + } + sym_idx_t name = calc_sym_idx(v->method_name); + const SymDef* sym = lookup_symbol(name); + if (!sym || !dynamic_cast(sym->value)) { + sym_idx_t name1 = G.symbols.lookup(v->method_name.substr(1)); + if (name1) { + const SymDef* sym1 = lookup_symbol(name1); + if (sym1 && dynamic_cast(sym1->value)) { + name = name1; + sym = sym1; + } + } + } + check_global_func(v->loc, name); + if (G.is_verbosity(2)) { + std::cerr << "using symbol `" << G.symbols.get_name(name) << "` for method call of " << v->method_name << std::endl; + } + sym = lookup_symbol(name); + SymValFunc* val = sym ? dynamic_cast(sym->value) : nullptr; + if (!val) { + v->error("undefined method call"); + } + Expr* x = process_expr(v->get_arg(), code, false); + x->chk_rvalue(); + if (x->cls == Expr::_Tensor) { + res = new Expr{Expr::_Apply, name, {obj}}; + res->args.insert(res->args.end(), x->args.begin(), x->args.end()); + } else { + res = new Expr{Expr::_Apply, name, {obj, x}}; + } + res->here = v->loc; + res->flags = Expr::_IsRvalue | (val->is_marked_as_pure() ? 0 : Expr::_IsImpure); + res->deduce_type(); + if (modify) { + auto tmp = res; + res = new Expr{Expr::_LetFirst, {obj->copy(), tmp}}; + res->here = v->loc; + res->flags = tmp->flags; + res->set_val(name); + res->deduce_type(); + } + return res; +} + +Expr* process_expr(V v, CodeBlob& code, bool nv) { + Expr* cond = process_expr(v->get_cond(), code, nv); + cond->chk_rvalue(); + Expr* x = process_expr(v->get_when_true(), code, false); + x->chk_rvalue(); + Expr* y = process_expr(v->get_when_false(), code, false); + y->chk_rvalue(); + Expr* res = new Expr{Expr::_CondExpr, {cond, x, y}}; + res->here = v->loc; + res->flags = Expr::_IsRvalue; + res->deduce_type(); + return res; +} + +Expr* process_expr(V v, CodeBlob& code, bool nv) { + Expr* res = process_expr(v->get_called_f(), code, nv); + Expr* x = process_expr(v->get_called_arg(), code, false); + x->chk_rvalue(); + res = make_func_apply(res, x); + res->here = v->loc; + res->deduce_type(); + return res; +} + +Expr* process_expr(V v, CodeBlob& code, bool nv) { + if (v->empty()) { + Expr* res = new Expr{Expr::_Tensor, {}}; + res->flags = Expr::_IsRvalue; + res->here = v->loc; + res->e_type = TypeExpr::new_unit(); + return res; + } + + Expr* res = process_expr(v->get_item(0), code, nv); + std::vector type_list; + type_list.push_back(res->e_type); + int f = res->flags; + res = new Expr{Expr::_Tensor, {res}}; + for (int i = 1; i < v->size(); ++i) { + Expr* x = process_expr(v->get_item(i), code, nv); + res->pb_arg(x); + f &= x->flags; + type_list.push_back(x->e_type); + } + res->here = v->loc; + res->flags = f; + res->e_type = TypeExpr::new_tensor(std::move(type_list)); + return res; +} + +Expr* process_expr(V v, CodeBlob& code) { + Expr* x = process_expr(v->get_variable_or_list(), code, true); + x->chk_lvalue(); // chk_lrvalue() ? + Expr* res = new Expr{Expr::_TypeApply, {x}}; + res->e_type = v->declared_type; + res->here = v->loc; + try { + unify(res->e_type, x->e_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "cannot transform expression of type " << x->e_type << " to explicitly requested type " << res->e_type + << ": " << ue; + v->error(os.str()); + } + res->flags = x->flags; + return res; +} + +Expr* process_expr(V v, CodeBlob& code, bool nv) { + if (v->empty()) { + Expr* res = new Expr{Expr::_Tensor, {}}; + res->flags = Expr::_IsRvalue; + res->here = v->loc; + res->e_type = TypeExpr::new_unit(); + res = new Expr{Expr::_MkTuple, {res}}; + res->flags = Expr::_IsRvalue; + res->here = v->loc; + res->e_type = TypeExpr::new_tuple(res->args.at(0)->e_type); + return res; + } + + Expr* res = process_expr(v->get_item(0), code, nv); + std::vector type_list; + type_list.push_back(res->e_type); + int f = res->flags; + res = new Expr{Expr::_Tensor, {res}}; + for (int i = 1; i < v->size(); ++i) { + Expr* x = process_expr(v->get_item(i), code, nv); + res->pb_arg(x); + f &= x->flags; + type_list.push_back(x->e_type); + } + res->here = v->loc; + res->flags = f; + res->e_type = TypeExpr::new_tensor(std::move(type_list), false); + res = new Expr{Expr::_MkTuple, {res}}; + res->flags = f; + res->here = v->loc; + res->e_type = TypeExpr::new_tuple(res->args.at(0)->e_type); + return res; +} + +Expr* process_expr(V v) { + Expr* res = new Expr{Expr::_Const, v->loc}; + res->flags = Expr::_IsRvalue; + res->intval = td::string_to_int256(static_cast(v->int_val)); + if (res->intval.is_null() || !res->intval->signed_fits_bits(257)) { + v->error("invalid integer constant"); + } + res->e_type = TypeExpr::new_atomic(TypeExpr::_Int); + return res; +} + +Expr* process_expr(V v) { + std::string str = static_cast(v->str_val); + Expr* res; + switch (v->modifier) { + case 0: + case 's': + case 'a': + res = new Expr{Expr::_SliceConst, v->loc}; + res->e_type = TypeExpr::new_atomic(TypeExpr::_Slice); + break; + case 'u': + case 'h': + case 'H': + case 'c': + res = new Expr{Expr::_Const, v->loc}; + res->e_type = TypeExpr::new_atomic(TypeExpr::_Int); + break; + default: + v->error("invalid string modifier '" + std::string(1, v->modifier) + "'"); + } + res->flags = Expr::_IsRvalue; + switch (v->modifier) { + case 0: { + res->strval = td::hex_encode(str); + break; + } + case 's': { + res->strval = str; + unsigned char buff[128]; + int bits = (int)td::bitstring::parse_bitstring_hex_literal(buff, sizeof(buff), str.data(), str.data() + str.size()); + if (bits < 0) { + v->error("Invalid hex bitstring constant '" + str + "'"); + } + break; + } + case 'a': { // MsgAddressInt + // todo rewrite stdaddress parsing (if done, CMake dep "ton_crypto" can be replaced with "ton_crypto_core") + block::StdAddress a; + if (a.parse_addr(str)) { + res->strval = block::tlb::MsgAddressInt().pack_std_address(a)->as_bitslice().to_hex(); + } else { + v->error("invalid standard address '" + str + "'"); + } + break; + } + case 'u': { + res->intval = td::hex_string_to_int256(td::hex_encode(str)); + if (str.empty()) { + v->error("empty integer ascii-constant"); + } + if (res->intval.is_null()) { + v->error("too long integer ascii-constant"); + } + break; + } + case 'h': + case 'H': { + unsigned char hash[32]; + digest::hash_str(hash, str.data(), str.size()); + res->intval = td::bits_to_refint(hash, (v->modifier == 'h') ? 32 : 256, false); + break; + } + case 'c': { + res->intval = td::make_refint(td::crc32(td::Slice{str})); + break; + } + default: + __builtin_unreachable(); + } + return res; +} + +Expr* process_expr(V v) { + SymDef* sym = lookup_symbol(calc_sym_idx(v->bool_val ? "true" : "false")); + tolk_assert(sym); + Expr* res = new Expr{Expr::_Apply, sym, {}}; + res->flags = Expr::_IsRvalue; + res->deduce_type(); + return res; +} + +Expr* process_expr([[maybe_unused]] V v) { + SymDef* sym = lookup_symbol(calc_sym_idx("nil")); + tolk_assert(sym); + Expr* res = new Expr{Expr::_Apply, sym, {}}; + res->flags = Expr::_IsRvalue; + res->deduce_type(); + return res; +} + +Expr* process_expr(V v, bool nv) { + SymDef* sym = lookup_symbol(calc_sym_idx(v->name)); + if (sym && dynamic_cast(sym->value)) { + check_import_exists_when_using_sym(v, sym); + auto val = dynamic_cast(sym->value); + Expr* res = new Expr{Expr::_GlobVar, v->loc}; + res->e_type = val->get_type(); + res->sym = sym; + res->flags = Expr::_IsLvalue | Expr::_IsRvalue | Expr::_IsImpure; + return res; + } + if (sym && dynamic_cast(sym->value)) { + check_import_exists_when_using_sym(v, sym); + auto val = dynamic_cast(sym->value); + Expr* res = new Expr{Expr::_None, v->loc}; + res->flags = Expr::_IsRvalue; + if (val->get_kind() == SymValConst::IntConst) { + res->cls = Expr::_Const; + res->intval = val->get_int_value(); + res->e_type = TypeExpr::new_atomic(tok_int); + } else if (val->get_kind() == SymValConst::SliceConst) { + res->cls = Expr::_SliceConst; + res->strval = val->get_str_value(); + res->e_type = TypeExpr::new_atomic(tok_slice); + } else { + v->error("Invalid symbolic constant type"); + } + return res; + } + if (sym && dynamic_cast(sym->value)) { + check_import_exists_when_using_sym(v, sym); + } + Expr* res = new Expr{Expr::_Var, v->loc}; + if (nv) { + res->val = ~calc_sym_idx(v->name); + res->e_type = TypeExpr::new_hole(); + res->flags = Expr::_IsLvalue; + // std::cerr << "defined new variable " << lex.cur().str << " : " << res->e_type << std::endl; + } else { + if (!sym) { + check_global_func(v->loc, calc_sym_idx(v->name)); + sym = lookup_symbol(calc_sym_idx(v->name)); + } + res->sym = sym; + SymVal* val = nullptr; + bool impure = false; + if (sym) { + val = dynamic_cast(sym->value); + } + if (!val) { + v->error("undefined identifier '" + static_cast(v->name) + "'"); + } + if (val->kind == SymValKind::_Func) { + res->e_type = val->get_type(); + res->cls = Expr::_GlobFunc; + impure = !dynamic_cast(val)->is_marked_as_pure(); + } else { + tolk_assert(val->idx >= 0); + res->val = val->idx; + res->e_type = val->get_type(); + // std::cerr << "accessing variable " << lex.cur().str << " : " << res->e_type << std::endl; + } + // std::cerr << "accessing symbol " << lex.cur().str << " : " << res->e_type << (val->impure ? " (impure)" : " (pure)") << std::endl; + res->flags = Expr::_IsLvalue | Expr::_IsRvalue | (impure ? Expr::_IsImpure : 0); + } + res->deduce_type(); + return res; +} + +Expr* process_expr(AnyV v, CodeBlob& code, bool nv) { + switch (v->type) { + case ast_binary_operator: + return process_expr(v->as(), code, nv); + case ast_unary_operator: + return process_expr(v->as(), code); + case ast_dot_tilde_call: + return process_expr(v->as(), code, nv); + case ast_ternary_operator: + return process_expr(v->as(), code, nv); + case ast_function_call: + return process_expr(v->as(), code, nv); + case ast_parenthesized_expr: + return process_expr(v->as()->get_expr(), code, nv); + case ast_variable_declaration: + return process_expr(v->as(), code); + case ast_tensor: + return process_expr(v->as(), code, nv); + case ast_tensor_square: + return process_expr(v->as(), code, nv); + case ast_int_const: + return process_expr(v->as()); + case ast_string_const: + return process_expr(v->as()); + case ast_bool_const: + return process_expr(v->as()); + case ast_nil_tuple: + return process_expr(v->as()); + case ast_identifier: + return process_expr(v->as(), nv); + + case ast_underscore: { + Expr* res = new Expr{Expr::_Hole, v->loc}; + res->val = -1; + res->flags = Expr::_IsLvalue; + res->e_type = TypeExpr::new_hole(); + return res; + } + case ast_type_expression: { + Expr* res = new Expr{Expr::_Type, v->loc}; + res->flags = Expr::_IsType; + res->e_type = v->as()->declared_type; + return res; + } + default: + throw UnexpectedASTNodeType(v, "process_expr"); + } +} + +namespace blk_fl { +enum { end = 1, ret = 2, empty = 4 }; +typedef int val; +constexpr val init = end | empty; +void combine(val& x, const val y) { + x |= y & ret; + x &= y | ~(end | empty); +} +void combine_parallel(val& x, const val y) { + x &= y | ~(ret | empty); + x |= y & end; +} +} // namespace blk_fl + +blk_fl::val process_vertex(V v, CodeBlob& code) { + Expr* expr = process_expr(v->get_return_value(), code); + expr->chk_rvalue(); + try { + // std::cerr << "in return: "; + unify(expr->e_type, code.ret_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "previous function return type " << code.ret_type + << " cannot be unified with return statement expression type " << expr->e_type << ": " << ue; + v->error(os.str()); + } + std::vector tmp_vars = expr->pre_compile(code); + code.emplace_back(v->loc, Op::_Return, std::move(tmp_vars)); + return blk_fl::ret; +} + +void append_implicit_ret_stmt(V v, CodeBlob& code) { + TypeExpr* ret_type = TypeExpr::new_unit(); + try { + // std::cerr << "in implicit return: "; + unify(ret_type, code.ret_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "previous function return type " << code.ret_type + << " cannot be unified with implicit end-of-block return type " << ret_type << ": " << ue; + throw ParseError(v->loc_end, os.str()); + } + code.emplace_back(v->loc_end, Op::_Return); +} + +blk_fl::val process_stmt(AnyV v, CodeBlob& code); + +blk_fl::val process_vertex(V v, CodeBlob& code, bool no_new_scope = false) { + if (!no_new_scope) { + open_scope(v->loc); + } + blk_fl::val res = blk_fl::init; + bool warned = false; + for (AnyV item : v->get_items()) { + if (!(res & blk_fl::end) && !warned) { + item->loc.show_warning("unreachable code"); + warned = true; + } + blk_fl::combine(res, process_stmt(item, code)); + } + if (!no_new_scope) { + close_scope(); + } + return res; +} + +blk_fl::val process_vertex(V v, CodeBlob& code) { + Expr* expr = process_expr(v->get_cond(), code); + expr->chk_rvalue(); + auto cnt_type = TypeExpr::new_atomic(TypeExpr::_Int); + try { + unify(expr->e_type, cnt_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "repeat count value of type " << expr->e_type << " is not an integer: " << ue; + v->get_cond()->error(os.str()); + } + std::vector tmp_vars = expr->pre_compile(code); + if (tmp_vars.size() != 1) { + v->get_cond()->error("repeat count value is not a singleton"); + } + Op& repeat_op = code.emplace_back(v->loc, Op::_Repeat, tmp_vars); + code.push_set_cur(repeat_op.block0); + blk_fl::val res = process_vertex(v->get_body(), code); + code.close_pop_cur(v->get_body()->loc_end); + return res | blk_fl::end; +} + +blk_fl::val process_vertex(V v, CodeBlob& code) { + Expr* expr = process_expr(v->get_cond(), code); + expr->chk_rvalue(); + auto cnt_type = TypeExpr::new_atomic(TypeExpr::_Int); + try { + unify(expr->e_type, cnt_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "while condition value of type " << expr->e_type << " is not an integer: " << ue; + v->get_cond()->error(os.str()); + } + Op& while_op = code.emplace_back(v->loc, Op::_While); + code.push_set_cur(while_op.block0); + while_op.left = expr->pre_compile(code); + code.close_pop_cur(v->get_body()->loc); + if (while_op.left.size() != 1) { + v->get_cond()->error("while condition value is not a singleton"); + } + code.push_set_cur(while_op.block1); + blk_fl::val res1 = process_vertex(v->get_body(), code); + code.close_pop_cur(v->get_body()->loc_end); + return res1 | blk_fl::end; +} + +blk_fl::val process_vertex(V v, CodeBlob& code) { + Op& until_op = code.emplace_back(v->loc, Op::_Until); + code.push_set_cur(until_op.block0); + open_scope(v->loc); + blk_fl::val res = process_vertex(v->get_body(), code, true); + Expr* expr = process_expr(v->get_cond(), code); + expr->chk_rvalue(); + close_scope(); + auto cnt_type = TypeExpr::new_atomic(TypeExpr::_Int); + try { + unify(expr->e_type, cnt_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "`until` condition value of type " << expr->e_type << " is not an integer: " << ue; + v->get_cond()->error(os.str()); + } + until_op.left = expr->pre_compile(code); + code.close_pop_cur(v->get_body()->loc_end); + if (until_op.left.size() != 1) { + v->get_cond()->error("`until` condition value is not a singleton"); + } + return res & ~blk_fl::empty; +} + +blk_fl::val process_vertex(V v, CodeBlob& code) { + code.require_callxargs = true; + Op& try_catch_op = code.emplace_back(v->loc, Op::_TryCatch); + code.push_set_cur(try_catch_op.block0); + blk_fl::val res0 = process_vertex(v->get_try_body(), code); + code.close_pop_cur(v->get_try_body()->loc_end); + code.push_set_cur(try_catch_op.block1); + open_scope(v->get_catch_expr()->loc); + Expr* expr = process_expr(v->get_catch_expr(), code, true); + expr->chk_lvalue(); + TypeExpr* tvm_error_type = TypeExpr::new_tensor(TypeExpr::new_var(), TypeExpr::new_atomic(TypeExpr::_Int)); + try { + unify(expr->e_type, tvm_error_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "`catch` arguments have incorrect type " << expr->e_type << ": " << ue; + v->get_catch_expr()->error(os.str()); + } + expr->predefine_vars(); + expr->define_new_vars(code); + try_catch_op.left = expr->pre_compile(code); + tolk_assert(try_catch_op.left.size() == 2 || try_catch_op.left.size() == 1); + blk_fl::val res1 = process_vertex(v->get_catch_body(), code); + close_scope(); + code.close_pop_cur(v->get_catch_body()->loc_end); + blk_fl::combine_parallel(res0, res1); + return res0; +} + +blk_fl::val process_vertex(V v, CodeBlob& code, TokenType first_lex = tok_if) { + Expr* expr = process_expr(v->get_cond(), code); + expr->chk_rvalue(); + auto flag_type = TypeExpr::new_atomic(TypeExpr::_Int); + try { + unify(expr->e_type, flag_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "`if` condition value of type " << expr->e_type << " is not an integer: " << ue; + v->get_cond()->error(os.str()); + } + std::vector tmp_vars = expr->pre_compile(code); + if (tmp_vars.size() != 1) { + v->get_cond()->error("condition value is not a singleton"); + } + Op& if_op = code.emplace_back(v->loc, Op::_If, tmp_vars); + code.push_set_cur(if_op.block0); + blk_fl::val res1 = process_vertex(v->get_if_body(), code); + blk_fl::val res2 = blk_fl::init; + code.close_pop_cur(v->get_if_body()->loc_end); + code.push_set_cur(if_op.block1); + res2 = process_vertex(v->get_else_body(), code); + code.close_pop_cur(v->get_else_body()->loc_end); + if (v->is_ifnot) { + std::swap(if_op.block0, if_op.block1); + } + blk_fl::combine_parallel(res1, res2); + return res1; +} + +blk_fl::val process_stmt(AnyV v, CodeBlob& code) { + switch (v->type) { + case ast_return_statement: + return process_vertex(v->as(), code); + case ast_sequence: + return process_vertex(v->as(), code); + case ast_empty: + return blk_fl::init; + case ast_repeat_statement: + return process_vertex(v->as(), code); + case ast_if_statement: + return process_vertex(v->as(), code); + case ast_do_until_statement: + return process_vertex(v->as(), code); + case ast_while_statement: + return process_vertex(v->as(), code); + case ast_try_catch_statement: + return process_vertex(v->as(), code); + default: { + auto expr = process_expr(v, code); + expr->chk_rvalue(); + expr->pre_compile(code); + return blk_fl::end; + } + } +} + +FormalArg process_vertex(V v, int fa_idx) { + if (v->arg_name.empty()) { + return std::make_tuple(v->arg_type, (SymDef*)nullptr, v->loc); + } + if (G.prohibited_var_names.count(static_cast(v->arg_name))) { + v->error("symbol `" + static_cast(v->arg_name) + "` cannot be redefined as a variable"); + } + SymDef* new_sym_def = define_symbol(calc_sym_idx(v->arg_name), true, v->loc); + if (!new_sym_def) { + v->error("cannot define symbol"); + } + if (new_sym_def->value) { + v->error("redefined argument"); + } + new_sym_def->value = new SymVal{SymValKind::_Param, fa_idx, v->arg_type}; + return std::make_tuple(v->arg_type, new_sym_def, v->loc); +} + +CodeBlob* process_vertex(V v_body, V arg_list, TypeExpr* ret_type, bool marked_as_pure) { + CodeBlob* blob = new CodeBlob{ret_type}; + if (marked_as_pure) { + blob->flags |= CodeBlob::_ForbidImpure; + } + FormalArgList legacy_arg_list; + for (int i = 0; i < arg_list->size(); ++i) { + legacy_arg_list.emplace_back(process_vertex(arg_list->get_arg(i), i)); + } + blob->import_params(std::move(legacy_arg_list)); + blk_fl::val res = blk_fl::init; + bool warned = false; + for (AnyV item : v_body->get_items()) { + if (!(res & blk_fl::end) && !warned) { + item->loc.show_warning("unreachable code"); + warned = true; + } + blk_fl::combine(res, process_stmt(item, *blob)); + } + if (res & blk_fl::end) { + append_implicit_ret_stmt(v_body, *blob); + } + blob->close_blk(v_body->loc_end); + return blob; +} + +SymValAsmFunc* process_vertex(V v_body, TypeExpr* func_type, V arg_list, TypeExpr* ret_type, + bool marked_as_pure) { + int cnt = arg_list->size(); + int width = ret_type->get_width(); + if (width < 0 || width > 16) { + v_body->error("return type of an assembler built-in function must have a well-defined fixed width"); + } + if (cnt > 16) { + v_body->error("assembler built-in function must have at most 16 arguments"); + } + std::vector cum_arg_width; + cum_arg_width.push_back(0); + int tot_width = 0; + for (int i = 0; i < cnt; ++i) { + V arg = arg_list->get_arg(i); + int arg_width = arg->arg_type->get_width(); + if (arg_width < 0 || arg_width > 16) { + arg->error("parameters of an assembler built-in function must have a well-defined fixed width"); + } + cum_arg_width.push_back(tot_width += arg_width); + } + std::vector asm_ops; + std::vector arg_order, ret_order; + if (!v_body->arg_order.empty()) { + if (static_cast(v_body->arg_order.size()) != cnt) { + v_body->error("arg_order of asm function must specify all arguments"); + } + std::vector visited(cnt, false); + for (int i = 0; i < cnt; ++i) { + int j = v_body->arg_order[i]; + if (visited[j]) { + v_body->error("arg_order of asm function contains duplicates"); + } + visited[j] = true; + int c1 = cum_arg_width[j], c2 = cum_arg_width[j + 1]; + while (c1 < c2) { + arg_order.push_back(c1++); + } + } + tolk_assert(arg_order.size() == (unsigned)tot_width); + } + if (!v_body->ret_order.empty()) { + if (static_cast(v_body->ret_order.size()) != width) { + v_body->error("ret_order of this asm function expected to be width = " + std::to_string(width)); + } + std::vector visited(width, false); + for (int i = 0; i < width; ++i) { + int j = v_body->ret_order[i]; + if (j < 0 || j >= width || visited[j]) { + v_body->error("ret_order contains invalid integer, not in range 0 .. width-1"); + } + visited[j] = true; + } + ret_order = v_body->ret_order; + } + for (AnyV v_child : v_body->get_asm_commands()) { + std::string_view ops = v_child->as()->str_val; // \n\n... + std::string op; + for (const char& c : ops) { + if (c == '\n' || c == '\r') { + if (!op.empty()) { + asm_ops.push_back(AsmOp::Parse(op, cnt, width)); + if (asm_ops.back().is_custom()) { + cnt = width; + } + op.clear(); + } + } else { + op.push_back(c); + } + } + if (!op.empty()) { + asm_ops.push_back(AsmOp::Parse(op, cnt, width)); + if (asm_ops.back().is_custom()) { + cnt = width; + } + } + } + std::string crc_s; + for (const AsmOp& asm_op : asm_ops) { + crc_s += asm_op.op; + } + crc_s.push_back(!marked_as_pure); + for (const int& x : arg_order) { + crc_s += std::string((const char*) (&x), (const char*) (&x + 1)); + } + for (const int& x : ret_order) { + crc_s += std::string((const char*) (&x), (const char*) (&x + 1)); + } + auto res = new SymValAsmFunc{func_type, std::move(asm_ops), marked_as_pure}; + res->arg_order = std::move(arg_order); + res->ret_order = std::move(ret_order); + res->crc = td::crc64(crc_s); + return res; +} + +// if a function looks like `T f(...args) { return anotherF(...args); }`, +// set a bit to flags +// then, all calls to `f(...)` will be effectively replaced with `anotherF(...)` +void detect_if_function_just_wraps_another(SymValCodeFunc* v_current, const td::RefInt256 &method_id) { + const std::string& function_name = v_current->code->name; + + // in "AST" representation, the first is Op::_Import (input arguments, even if none) + const auto& op_import = v_current->code->ops; + tolk_assert(op_import && op_import->cl == Op::_Import); + + // then Op::_Call (anotherF) + const Op* op_call = op_import->next.get(); + if (!op_call || op_call->cl != Op::_Call) + return; + tolk_assert(op_call->left.size() == 1); + + const auto& op_return = op_call->next; + if (!op_return || op_return->cl != Op::_Return || op_return->left.size() != 1) + return; + + bool indices_expected = op_import->left.size() == op_call->left[0] && op_call->left[0] == op_return->left[0]; + if (!indices_expected) + return; + + const SymDef* f_called = op_call->fun_ref; + const SymValFunc* v_called = dynamic_cast(f_called->value); + if (!v_called) + return; + + // `return` must use all arguments, e.g. `return (_0,_2,_1)`, not `return (_0,_1,_1)` + int args_used_mask = 0; + for (var_idx_t arg_idx : op_call->right) { + args_used_mask |= 1 << arg_idx; + } + if (args_used_mask != (1 << op_call->right.size()) - 1) + return; + + // detect getters (having method_id), they should not be treated as wrappers + // v_current->method_id will be assigned later; todo refactor function parsing completely, it's weird + // moreover, `recv_external()` and others are also exported, but FunC is unaware of method_id + // (it's assigned by Fift later) + // so, for now, just handle "special" function names, the same as in Asm.fif + if (!method_id.is_null()) + return; + if (function_name == "main" || function_name == "recv_internal" || function_name == "recv_external" || + function_name == "run_ticktock" || function_name == "split_prepare" || function_name == "split_install") + return; + + // all types must be strictly defined (on mismatch, a compilation error will be triggered anyway) + if (v_called->sym_type->has_unknown_inside() || v_current->sym_type->has_unknown_inside()) + return; + // avoid situations like `f(int a, (int,int) b)`, inlining will be cumbersome + if (v_current->get_arg_type()->get_width() != op_call->right.size()) + return; + // 'return true;' (false, nil) are (surprisingly) also function calls + if (f_called->name() == "true" || f_called->name() == "false" || f_called->name() == "nil") + return; + // if an original is marked `pure`, and this one doesn't, it's okay; just check for inline_ref storage + if (v_current->is_inline_ref()) + return; + + // ok, f_current is a wrapper + v_current->flags |= SymValFunc::flagWrapsAnotherF; + if (G.is_verbosity(2)) { + std::cerr << function_name << " -> " << f_called->name() << std::endl; + } +} + +static td::RefInt256 calculate_method_id_by_func_name(std::string_view func_name) { + unsigned int crc = td::crc16(static_cast(func_name)); + return td::make_refint((crc & 0xffff) | 0x10000); +} + +void process_vertex(V v_function) { + open_scope(v_function->loc); + std::vector type_vars; + if (v_function->forall_list) { + type_vars.reserve(v_function->forall_list->size()); + for (int idx = 0; idx < v_function->forall_list->size(); ++idx) { + type_vars.emplace_back(v_function->forall_list->get_item(idx)->created_type); + } + } + std::string func_name = v_function->name; + int func_sym_idx = calc_sym_idx(func_name); + int flags_inline = 0; + if (v_function->marked_as_inline) { + flags_inline = SymValFunc::flagInline; + } else if (v_function->marked_as_inline_ref) { + flags_inline = SymValFunc::flagInlineRef; + } + td::RefInt256 method_id; + if (v_function->method_id) { + method_id = td::string_to_int256(static_cast(v_function->method_id->int_val)); + if (method_id.is_null()) { + v_function->method_id->error("invalid integer constant"); + } + } else if (v_function->marked_as_get_method) { + method_id = calculate_method_id_by_func_name(func_name); + for (const SymDef* other : G.glob_get_methods) { + if (!td::cmp(dynamic_cast(other->value)->method_id, method_id)) { + v_function->error(PSTRING() << "GET methods hash collision: `" << other->name() << "` and `" + func_name + "` produce the same hash. Consider renaming one of these functions."); + } + } + } + TypeExpr* arg_list_type = nullptr; + if (int n_args = v_function->get_num_args()) { + std::vector arg_types; + arg_types.reserve(n_args); + for (int idx = 0; idx < n_args; ++idx) { + arg_types.emplace_back(v_function->get_arg(idx)->arg_type); + } + arg_list_type = TypeExpr::new_tensor(std::move(arg_types)); + } else { + arg_list_type = TypeExpr::new_unit(); + } + TypeExpr* func_type = TypeExpr::new_map(arg_list_type, v_function->ret_type); + if (!type_vars.empty()) { + func_type = TypeExpr::new_forall(std::move(type_vars), func_type); + } + if (v_function->marked_as_builtin) { + const SymDef* builtin_func = lookup_symbol(G.symbols.lookup(func_name)); + const SymValFunc* func_val = builtin_func ? dynamic_cast(builtin_func->value) : nullptr; + if (!func_val || !func_val->is_builtin()) { + v_function->error("`builtin` used for non-builtin function"); + } +#ifdef TOLK_DEBUG + // in release, we don't need this check, since `builtin` is used only in stdlib.tolk, which is our responsibility + if (!func_val->sym_type->equals_to(func_type) || func_val->is_marked_as_pure() != v_function->marked_as_pure) { + v_function->error("declaration for `builtin` function doesn't match an actual one"); + } +#endif + close_scope(); + return; + } + if (G.is_verbosity(1)) { + std::cerr << "fun " << func_name << " : " << func_type << std::endl; + } + SymDef* func_sym = define_global_symbol(func_sym_idx, 0, v_function->loc); + tolk_assert(func_sym); + SymValFunc* func_sym_val = dynamic_cast(func_sym->value); + if (func_sym->value) { + // todo remove all about pre-declarations and prototypes + if (func_sym->value->kind != SymValKind::_Func || !func_sym_val) { + v_function->error("was not defined as a function before"); + } + try { + unify(func_sym_val->sym_type, func_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "previous type of function " << func_name << " : " << func_sym_val->sym_type + << " cannot be unified with new type " << func_type << ": " << ue; + v_function->error(os.str()); + } + } + if (v_function->get_body()->type == ast_empty) { + make_new_glob_func(func_sym, func_type, v_function->marked_as_pure); + } else if (const auto* v_seq = v_function->get_body()->try_as()) { + if (dynamic_cast(func_sym_val)) { + v_function->error("function `" + func_name + "` has been already defined as an assembler built-in"); + } + SymValCodeFunc* func_sym_code; + if (func_sym_val) { + func_sym_code = dynamic_cast(func_sym_val); + if (!func_sym_code) { + v_function->error("function `" + func_name + "` has been already defined in an yet-unknown way"); + } + } else { + func_sym_code = make_new_glob_func(func_sym, func_type, v_function->marked_as_pure); + } + if (func_sym_code->code) { + v_function->error("redefinition of function `" + func_name + "`"); + } + if (v_function->marked_as_pure && v_function->ret_type->get_width() == 0) { + v_function->error("a pure function should return something, otherwise it will be optimized out anyway"); + } + CodeBlob* code = process_vertex(v_seq, v_function->get_arg_list(), v_function->ret_type, v_function->marked_as_pure); + code->name = func_name; + code->loc = v_function->loc; + func_sym_code->code = code; + // todo it should be done not here, it should be on ast level, it should work when functions are declared swapped + detect_if_function_just_wraps_another(func_sym_code, method_id); + } else if (const auto* v_asm = v_function->get_body()->try_as()) { + SymValAsmFunc* asm_func = process_vertex(v_asm, func_type, v_function->get_arg_list(), v_function->ret_type, v_function->marked_as_pure); +#ifdef TOLK_DEBUG + asm_func->name = func_name; +#endif + if (func_sym_val) { + if (dynamic_cast(func_sym_val)) { + v_function->error("function `" + func_name + "` was already declared as an ordinary function"); + } + SymValAsmFunc* asm_func_old = dynamic_cast(func_sym_val); + if (asm_func_old) { + if (asm_func->crc != asm_func_old->crc) { + v_function->error("redefinition of built-in assembler function `" + func_name + "`"); + } + } else { + v_function->error("redefinition of previously (somehow) defined function `" + func_name + "`"); + } + } + func_sym->value = asm_func; + } + if (method_id.not_null()) { + auto val = dynamic_cast(func_sym->value); + if (!val) { + v_function->error("cannot set method id for unknown function `" + func_name + "`"); + } + if (val->method_id.is_null()) { + val->method_id = std::move(method_id); + } else if (td::cmp(val->method_id, method_id) != 0) { + v_function->error("integer method identifier for `" + func_name + "` changed from " + + val->method_id->to_dec_string() + " to a different value " + method_id->to_dec_string()); + } + } + if (flags_inline) { + auto val = dynamic_cast(func_sym->value); + if (!val) { + v_function->error("cannot set unknown function `" + func_name + "` as an inline"); + } + if (!val->is_inline() && !val->is_inline_ref()) { + val->flags |= flags_inline; + } else if ((val->flags & (SymValFunc::flagInline | SymValFunc::flagInlineRef)) != flags_inline) { + v_function->error("inline mode for `" + func_name + "` changed with respect to a previous declaration"); + } + } + if (v_function->marked_as_get_method) { + auto val = dynamic_cast(func_sym->value); + if (!val) { + v_function->error("cannot set unknown function `" + func_name + "` as a get method"); + } + val->flags |= SymValFunc::flagGetMethod; + G.glob_get_methods.push_back(func_sym); + } + close_scope(); +} + +td::Result locate_source_file(const std::string& rel_filename) { + td::Result path = G.settings.read_callback(CompilerSettings::FsReadCallbackKind::Realpath, rel_filename.c_str()); + if (path.is_error()) { + return path.move_as_error(); + } + + std::string abs_filename = path.move_as_ok(); + if (SrcFile* file = G.all_src_files.find_file(abs_filename)) { + return file; // file was already parsed (imported from somewhere else) + } + + td::Result text = G.settings.read_callback(CompilerSettings::FsReadCallbackKind::ReadFile, abs_filename.c_str()); + if (text.is_error()) { + return text.move_as_error(); + } + + return G.all_src_files.register_file(rel_filename, abs_filename, text.move_as_ok()); +} + +void process_vertex(V v) { + std::string_view pragma_name = v->pragma_name; + if (pragma_name == G.pragma_allow_post_modification.name()) { + G.pragma_allow_post_modification.enable(v->loc); + } else if (pragma_name == G.pragma_compute_asm_ltr.name()) { + G.pragma_compute_asm_ltr.enable(v->loc); + } else if (pragma_name == G.pragma_remove_unused_functions.name()) { + G.pragma_remove_unused_functions.enable(v->loc); + } else { + v->error("unknown pragma name"); + } +} + +void process_vertex(V v) { + char op = '='; bool eq = false; + TokenType cmp_tok = v->cmp_tok; + if (cmp_tok == tok_gt || cmp_tok == tok_geq) { + op = '>'; + eq = cmp_tok == tok_geq; + } else if (cmp_tok == tok_lt || cmp_tok == tok_leq) { + op = '<'; + eq = cmp_tok == tok_leq; + } else if (cmp_tok == tok_eq) { + op = '='; + } else if (cmp_tok == tok_bitwise_xor) { + op = '^'; + } else { + v->error("invalid comparison operator"); + } + std::string_view pragma_value = v->semver; + int sem_ver[3] = {0, 0, 0}; + char segs = 1; + auto stoi = [&](std::string_view s) { + auto R = td::to_integer_safe(static_cast(s)); + if (R.is_error()) { + v->error("invalid semver format"); + } + return R.move_as_ok(); + }; + std::istringstream iss_value(static_cast(pragma_value)); + for (int idx = 0; idx < 3; idx++) { + std::string s{"0"}; + std::getline(iss_value, s, '.'); + sem_ver[idx] = stoi(s); + } + // End reading semver from source code + int tolk_ver[3] = {0, 0, 0}; + std::istringstream iss(tolk_version); + for (int idx = 0; idx < 3; idx++) { + std::string s; + std::getline(iss, s, '.'); + tolk_ver[idx] = stoi(s); + } + // End parsing embedded semver + bool match = true; + switch (op) { + case '=': + if ((tolk_ver[0] != sem_ver[0]) || + (tolk_ver[1] != sem_ver[1]) || + (tolk_ver[2] != sem_ver[2])) { + match = false; + } + break; + case '>': + if ( ((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] == sem_ver[1]) && (tolk_ver[2] == sem_ver[2]) && !eq) || + ((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] == sem_ver[1]) && (tolk_ver[2] < sem_ver[2])) || + ((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] < sem_ver[1])) || + ((tolk_ver[0] < sem_ver[0])) ) { + match = false; + } + break; + case '<': + if ( ((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] == sem_ver[1]) && (tolk_ver[2] == sem_ver[2]) && !eq) || + ((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] == sem_ver[1]) && (tolk_ver[2] > sem_ver[2])) || + ((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] > sem_ver[1])) || + ((tolk_ver[0] > sem_ver[0])) ) { + match = false; + } + break; + case '^': + if ( ((segs == 3) && ((tolk_ver[0] != sem_ver[0]) || (tolk_ver[1] != sem_ver[1]) || (tolk_ver[2] < sem_ver[2]))) + || ((segs == 2) && ((tolk_ver[0] != sem_ver[0]) || (tolk_ver[1] < sem_ver[1]))) + || ((segs == 1) && ((tolk_ver[0] < sem_ver[0]))) ) { + match = false; + } + break; + default: + __builtin_unreachable(); + } + if (!match) { + v->error("Tolk version " + tolk_version + " does not satisfy this condition"); + } +} + +void process_vertex(V v, SrcFile* current_file) { + std::string rel_filename = static_cast(v->file_name); + if (size_t rc = current_file->rel_filename.rfind('/'); rc != std::string::npos) { + rel_filename = current_file->rel_filename.substr(0, rc + 1) + rel_filename; + } + + td::Result locate_res = locate_source_file(rel_filename); + if (locate_res.is_error()) { + v->error("Failed to import: " + locate_res.move_as_error().message().str()); + } + + SrcFile* imported_file = locate_res.move_as_ok(); + current_file->imports.emplace_back(SrcFile::ImportStatement{imported_file}); + if (!imported_file->was_parsed) { + // todo it's wrong, but ok for now + process_file_ast(parse_src_file_to_ast(imported_file)); + } +} + +void process_vertex(V v) { + AnyV init_value = v->get_init_value(); + SymDef* sym_def = define_global_symbol(calc_sym_idx(v->const_name), false, v->loc); + if (!sym_def) { + v->error("cannot define global symbol"); + } + if (sym_def->value) { + v->error("symbol already exists"); + } + CodeBlob code; + Expr* x = process_expr(init_value, code, false); + if (!x->is_rvalue()) { + v->get_init_value()->error("expression is not strictly Rvalue"); + } + if (v->declared_type && !v->declared_type->equals_to(x->e_type)) { + v->error("expression type does not match declared type"); + } + SymValConst* new_value = nullptr; + if (x->cls == Expr::_Const) { // Integer constant + new_value = new SymValConst{G.const_cnt++, x->intval}; + } else if (x->cls == Expr::_SliceConst) { // Slice constant (string) + new_value = new SymValConst{G.const_cnt++, x->strval}; + } else if (x->cls == Expr::_Apply) { // even "1 + 2" is Expr::_Apply (it applies `_+_`) + code.emplace_back(v->loc, Op::_Import, std::vector()); + auto tmp_vars = x->pre_compile(code); + code.emplace_back(v->loc, Op::_Return, std::move(tmp_vars)); + code.emplace_back(v->loc, Op::_Nop); + // It is REQUIRED to execute "optimizations" as in tolk.cpp + code.simplify_var_types(); + code.prune_unreachable_code(); + code.split_vars(true); + for (int i = 0; i < 16; i++) { + code.compute_used_code_vars(); + code.fwd_analyze(); + code.prune_unreachable_code(); + } + code.mark_noreturn(); + AsmOpList out_list(0, &code.vars); + code.generate_code(out_list); + if (out_list.list_.size() != 1) { + init_value->error("precompiled expression must result in single operation"); + } + auto op = out_list.list_[0]; + if (!op.is_const()) { + init_value->error("precompiled expression must result in compilation time constant"); + } + if (op.origin.is_null() || !op.origin->is_valid()) { + init_value->error("precompiled expression did not result in a valid integer constant"); + } + new_value = new SymValConst{G.const_cnt++, op.origin}; + } else { + init_value->error("integer or slice literal or constant expected"); + } + sym_def->value = new_value; +} + +void process_vertex(V v) { + TypeExpr* var_type = v->declared_type; + SymDef* sym_def = define_global_symbol(calc_sym_idx(v->var_name), false, v->loc); + if (!sym_def) { + v->error("cannot define global symbol"); + } + if (sym_def->value) { + auto val = dynamic_cast(sym_def->value); + if (!val) { + v->error("symbol cannot be redefined as a global variable"); + } + try { + unify(var_type, val->sym_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "cannot unify new type " << var_type << " of global variable `" << sym_def->name() + << "` with its previous type " << val->sym_type << ": " << ue; + v->error(os.str()); + } + } else { + sym_def->value = new SymValGlobVar{G.glob_var_cnt++, var_type}; +#ifdef TOLK_DEBUG + dynamic_cast(sym_def->value)->name = v->var_name; +#endif + G.glob_vars.push_back(sym_def); + } +} + +class FileToLegacyVisitor final : public ASTVisitorToplevelDeclarations { + SrcFile* current_file; + + // todo inline here all these + void on_pragma_no_arg(V v) override { + process_vertex(v); + } + + void on_pragma_version(V v) override { + process_vertex(v); + } + + void on_include_statement(V v) override { + process_vertex(v, current_file); + } + + void on_function_declaration(V v) override { + process_vertex(v); + } + + void on_constant_declaration(V v) override { + process_vertex(v); + } + + void on_global_var_declaration(V v) override { + process_vertex(v); + } + +public: + explicit FileToLegacyVisitor(SrcFile* file) : current_file(file) { + } +}; + +void process_file_ast(AnyV file_ast) { + auto v = file_ast->try_as(); + if (!v) { + throw UnexpectedASTNodeType(file_ast, "process_file_ast"); + } + + const SrcFile* file = v->file; + if (!file->is_stdlib_file()) { + // v->debug_print(); + G.generated_from += file->rel_filename; + G.generated_from += ", "; + } + + FileToLegacyVisitor(const_cast(file)).start_visiting_file(v); +} + +} // namespace tolk diff --git a/tolk/ast-to-legacy.h b/tolk/ast-to-legacy.h new file mode 100644 index 000000000..f7660f20e --- /dev/null +++ b/tolk/ast-to-legacy.h @@ -0,0 +1,28 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#pragma once + +#include "ast.h" + +namespace tolk { + +struct SrcFile; + +void process_file_ast(AnyV file_ast); + +} // namespace tolk + diff --git a/tolk/ast-visitor.h b/tolk/ast-visitor.h new file mode 100644 index 000000000..237a79f13 --- /dev/null +++ b/tolk/ast-visitor.h @@ -0,0 +1,199 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#pragma once + +#include "ast.h" +#include "platform-utils.h" + +/* + * A module implementing base functionality of read-only traversing a vertex tree. + * Since a vertex in general doesn't store a vector of children, iterating is possible only for concrete node_type. + * E.g., for ast_if_statement, visit nodes cond, if-body and else-body. For ast_string_const, nothing. And so on. + * Visitors below are helpers to inherit from and handle specific vertex types. + * + * Note, that absence of "children" in ASTNodeBase is not a drawback. Instead, it encourages you to think + * about types and match the type system. + * + * The visitor is read-only, it does not modify visited nodes (except if you purposely call mutating methods). + * For example, if you want to replace "beginCell()" call with "begin_cell", a visitor isn't enough for you. + * To replace vertices, consider another API: ast-replacer.h. + */ + +namespace tolk { + +class ASTVisitor { +protected: + GNU_ATTRIBUTE_ALWAYS_INLINE static void visit_children(const ASTNodeLeaf* v) { + static_cast(v); + } + + GNU_ATTRIBUTE_ALWAYS_INLINE void visit_children(const ASTNodeUnary* v) { + visit(v->child); + } + + GNU_ATTRIBUTE_ALWAYS_INLINE void visit_children(const ASTNodeBinary* v) { + visit(v->lhs); + visit(v->rhs); + } + + GNU_ATTRIBUTE_ALWAYS_INLINE void visit_children(const ASTNodeVararg* v) { + for (AnyV child : v->children) { + visit(child); + } + } + + virtual void visit(AnyV v) = 0; + +public: + virtual ~ASTVisitor() = default; +}; + +class ASTVisitorFunctionBody : public ASTVisitor { +protected: + using parent = ASTVisitorFunctionBody; + + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + + void visit(AnyV v) final { + switch (v->type) { + case ast_empty: return visit(v->as()); + case ast_identifier: return visit(v->as()); + case ast_int_const: return visit(v->as()); + case ast_string_const: return visit(v->as()); + case ast_bool_const: return visit(v->as()); + case ast_nil_tuple: return visit(v->as()); + case ast_function_call: return visit(v->as()); + case ast_parenthesized_expr: return visit(v->as()); + case ast_underscore: return visit(v->as()); + case ast_type_expression: return visit(v->as()); + case ast_variable_declaration: return visit(v->as()); + case ast_tensor: return visit(v->as()); + case ast_tensor_square: return visit(v->as()); + case ast_dot_tilde_call: return visit(v->as()); + case ast_unary_operator: return visit(v->as()); + case ast_binary_operator: return visit(v->as()); + case ast_ternary_operator: return visit(v->as()); + case ast_return_statement: return visit(v->as()); + case ast_sequence: return visit(v->as()); + case ast_repeat_statement: return visit(v->as()); + case ast_while_statement: return visit(v->as()); + case ast_do_until_statement: return visit(v->as()); + case ast_try_catch_statement: return visit(v->as()); + case ast_if_statement: return visit(v->as()); + case ast_asm_body: return visit(v->as()); + default: + throw UnexpectedASTNodeType(v, "ASTVisitorFunctionBody::visit"); + } + } + +public: + void start_visiting_function(V v_function) { + visit(v_function->get_body()); + } +}; + +class ASTVisitorAllFunctionsInFile : public ASTVisitorFunctionBody { +protected: + using parent = ASTVisitorAllFunctionsInFile; + + virtual bool should_enter_function(V v) = 0; + +public: + void start_visiting_file(V v_file) { + for (AnyV v : v_file->get_toplevel_declarations()) { + if (auto v_func = v->try_as()) { + if (should_enter_function(v_func)) { + visit(v_func->get_body()); + } + } + } + } +}; + +class ASTVisitorToplevelDeclarations : public ASTVisitor { +protected: + using parent = ASTVisitorToplevelDeclarations; + + virtual void on_pragma_no_arg(V v) = 0; + virtual void on_pragma_version(V v) = 0; + virtual void on_include_statement(V v) = 0; + virtual void on_constant_declaration(V v) = 0; + virtual void on_global_var_declaration(V v) = 0; + virtual void on_function_declaration(V v) = 0; + + void visit(AnyV v) final { + switch (v->type) { + case ast_pragma_no_arg: + on_pragma_no_arg(v->as()); + break; + case ast_pragma_version: + on_pragma_version(v->as()); + break; + case ast_include_statement: + on_include_statement(v->as()); + break; + case ast_constant_declaration_list: + for (const auto& v_decl : v->as()->get_declarations()) { + on_constant_declaration(v_decl->as()); + } + break; + case ast_global_var_declaration_list: + for (const auto& v_decl : v->as()->get_declarations()) { + on_global_var_declaration(v_decl->as()); + } + break; + case ast_function_declaration: + on_function_declaration(v->as()); + break; + default: + throw UnexpectedASTNodeType(v, "ASTVisitorToplevelDeclarations::visit"); + } + } + +public: + void start_visiting_file(V v_file) { + for (AnyV v : v_file->get_toplevel_declarations()) { + visit(v); + } + } +}; + +} // namespace tolk diff --git a/tolk/ast.cpp b/tolk/ast.cpp new file mode 100644 index 000000000..ec060c58b --- /dev/null +++ b/tolk/ast.cpp @@ -0,0 +1,70 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "ast.h" +#include "ast-stringifier.h" +#include + +namespace tolk { + +static_assert(sizeof(ASTNodeBase) == 12); + +#ifdef TOLK_DEBUG + +std::string ASTNodeBase::to_debug_string(bool colored) const { + ASTStringifier s(colored); + return s.to_string_with_children(this); +} + +void ASTNodeBase::debug_print() const { + std::cerr << to_debug_string(true) << std::endl; +} + +#endif // TOLK_DEBUG + +UnexpectedASTNodeType::UnexpectedASTNodeType(AnyV v_unexpected, const char* place_where): v_unexpected(v_unexpected) { + message = "Unexpected ASTNodeType "; +#ifdef TOLK_DEBUG + message += ASTStringifier::ast_node_type_to_string(v_unexpected->type); + message += " "; +#endif + message += "in "; + message += place_where; +} + +void ASTNodeBase::error(const std::string& err_msg) const { + throw ParseError(loc, err_msg); +} + +int Vertex::lookup_idx(std::string_view nameT) const { + for (size_t idx = 0; idx < children.size(); ++idx) { + if (children[idx] && children[idx]->as()->nameT == nameT) { + return static_cast(idx); + } + } + return -1; +} + +int Vertex::lookup_idx(std::string_view arg_name) const { + for (size_t idx = 0; idx < children.size(); ++idx) { + if (children[idx] && children[idx]->as()->arg_name == arg_name) { + return static_cast(idx); + } + } + return -1; +} + +} // namespace tolk diff --git a/tolk/ast.h b/tolk/ast.h new file mode 100644 index 000000000..c3fe13941 --- /dev/null +++ b/tolk/ast.h @@ -0,0 +1,567 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#pragma once + +#include +#include "platform-utils.h" +#include "src-file.h" +#include "type-expr.h" +#include "lexer.h" + +/* + * Here we introduce AST representation of Tolk source code. + * Historically, in FunC, there was no AST: while lexing, symbols were registered, types were inferred, and so on. + * There was no way to perform any more or less semantic analysis. + * In Tolk, I've implemented parsing .tolk files into AST at first, and then converting this AST + * into legacy representation (see ast-to-legacy.cpp). + * In the future, more and more code analysis will be moved out of legacy to AST-level. + * + * From the user's point of view, all AST vertices are constant. All API is based on constancy. + * Even though fields of vertex structs are public, they can't be modified, since vertices are accepted by const ref. + * Generally, there are two ways of accepting a vertex: + * * AnyV (= const ASTNodeBase*) + * the only you can do with this vertex is to see v->type (ASTNodeType) and to cast via v->as() + * * V (= const Vertex*) + * a specific type of vertex, you can use its fields and methods + * There is one way of creating a vertex: + * * createV(...constructor_args) (= new Vertex(...)) + * vertices are currently created on a heap, without any custom memory arena, just allocated and never deleted + * + * Having AnyV and knowing its node_type, a call + * v->as() + * will return a typed vertex. + * There is also a shorthand v->try_as() which returns V or nullptr if types don't match: + * if (auto v_int = v->try_as()) + * Note, that there casts are NOT DYNAMIC. ASTNode is not a virtual base, it has no vtable. + * So, as<...>() is just a compile-time casting, without any runtime overhead. + * + * Note, that ASTNodeBase doesn't store any vector of children. That's why there is no way to loop over + * a random (unknown) vertex. Only a concrete Vertex stores its children (if any). + * Hence, to iterate over a custom vertex (e.g., a function body), one should inherit some kind of ASTVisitor. + * Besides read-only visiting, there is a "visit and replace" pattern. + * See ast-visitor.h and ast-replacer.h. + */ + +namespace tolk { + +enum ASTNodeType { + ast_empty, + ast_identifier, + ast_int_const, + ast_string_const, + ast_bool_const, + ast_nil_tuple, + ast_function_call, + ast_parenthesized_expr, + ast_global_var_declaration, + ast_global_var_declaration_list, + ast_constant_declaration, + ast_constant_declaration_list, + ast_underscore, + ast_type_expression, + ast_variable_declaration, + ast_tensor, + ast_tensor_square, + ast_dot_tilde_call, + ast_unary_operator, + ast_binary_operator, + ast_ternary_operator, + ast_return_statement, + ast_sequence, + ast_repeat_statement, + ast_while_statement, + ast_do_until_statement, + ast_try_catch_statement, + ast_if_statement, + ast_forall_item, + ast_forall_list, + ast_argument, + ast_argument_list, + ast_asm_body, + ast_function_declaration, + ast_pragma_no_arg, + ast_pragma_version, + ast_include_statement, + ast_tolk_file, +}; + +struct ASTNodeBase; + +using AnyV = const ASTNodeBase*; + +template +struct Vertex; + +template +using V = const Vertex*; + +#define createV new Vertex + +struct UnexpectedASTNodeType final : std::exception { + AnyV v_unexpected; + std::string message; + + explicit UnexpectedASTNodeType(AnyV v_unexpected, const char* place_where); + + const char* what() const noexcept override { + return message.c_str(); + } +}; + +// --------------------------------------------------------- + +struct ASTNodeBase { + const ASTNodeType type; + const SrcLocation loc; + + ASTNodeBase(ASTNodeType type, SrcLocation loc) : type(type), loc(loc) {} + + template + V as() const { +#ifdef TOLK_DEBUG + if (type != node_type) { + throw Fatal("v->as<...> to wrong node_type"); + } +#endif + return static_cast>(this); + } + + template + V try_as() const { + return type == node_type ? static_cast>(this) : nullptr; + } + + #ifdef TOLK_DEBUG + std::string to_debug_string() const { return to_debug_string(false); } + std::string to_debug_string(bool colored) const; + void debug_print() const; +#endif + + GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD + void error(const std::string& err_msg) const; +}; + +struct ASTNodeLeaf : ASTNodeBase { + friend class ASTVisitor; + friend class ASTReplacer; + +protected: + ASTNodeLeaf(ASTNodeType type, SrcLocation loc) + : ASTNodeBase(type, loc) {} +}; + +struct ASTNodeUnary : ASTNodeBase { + friend class ASTVisitor; + friend class ASTReplacer; + +protected: + AnyV child; + + ASTNodeUnary(ASTNodeType type, SrcLocation loc, AnyV child) + : ASTNodeBase(type, loc), child(child) {} +}; + +struct ASTNodeBinary : ASTNodeBase { + friend class ASTVisitor; + friend class ASTReplacer; + +protected: + AnyV lhs; + AnyV rhs; + + ASTNodeBinary(ASTNodeType type, SrcLocation loc, AnyV lhs, AnyV rhs) + : ASTNodeBase(type, loc), lhs(lhs), rhs(rhs) {} +}; + +struct ASTNodeVararg : ASTNodeBase { + friend class ASTVisitor; + friend class ASTReplacer; + +protected: + std::vector children; + + ASTNodeVararg(ASTNodeType type, SrcLocation loc, std::vector children) + : ASTNodeBase(type, loc), children(std::move(children)) {} + +public: + int size() const { return static_cast(children.size()); } + bool empty() const { return children.empty(); } +}; + +// --------------------------------------------------------- + +template<> +struct Vertex final : ASTNodeLeaf { + Vertex() + : ASTNodeLeaf(ast_empty, SrcLocation()) {} +}; + +template<> +struct Vertex final : ASTNodeLeaf { + std::string_view name; + + Vertex(SrcLocation loc, std::string_view name) + : ASTNodeLeaf(ast_identifier, loc), name(name) {} +}; + +template<> +struct Vertex final : ASTNodeLeaf { + std::string_view int_val; + + Vertex(SrcLocation loc, std::string_view int_val) + : ASTNodeLeaf(ast_int_const, loc), int_val(int_val) {} +}; + +template<> +struct Vertex final : ASTNodeLeaf { + std::string_view str_val; + char modifier; + + Vertex(SrcLocation loc, std::string_view str_val, char modifier) + : ASTNodeLeaf(ast_string_const, loc), str_val(str_val), modifier(modifier) {} +}; + +template<> +struct Vertex final : ASTNodeLeaf { + bool bool_val; + + Vertex(SrcLocation loc, bool bool_val) + : ASTNodeLeaf(ast_bool_const, loc), bool_val(bool_val) {} +}; + +template<> +struct Vertex final : ASTNodeLeaf { + explicit Vertex(SrcLocation loc) + : ASTNodeLeaf(ast_nil_tuple, loc) {} +}; + +template<> +struct Vertex final : ASTNodeBinary { + // even for f(1,2,3), f (lhs) is called with a single arg (tensor "(1,2,3)") (rhs) + AnyV get_called_f() const { return lhs; } + AnyV get_called_arg() const { return rhs; } + + Vertex(SrcLocation loc, AnyV lhs_f, AnyV arg) + : ASTNodeBinary(ast_function_call, loc, lhs_f, arg) {} +}; + +template<> +struct Vertex final : ASTNodeUnary { + AnyV get_expr() const { return child; } + + Vertex(SrcLocation loc, AnyV expr) + : ASTNodeUnary(ast_parenthesized_expr, loc, expr) {} +}; + +template<> +struct Vertex final : ASTNodeLeaf { + std::string_view var_name; + TypeExpr* declared_type; // may be nullptr + + Vertex(SrcLocation loc, std::string_view var_name, TypeExpr* declared_type) + : ASTNodeLeaf(ast_global_var_declaration, loc), var_name(var_name), declared_type(declared_type) {} +}; + +template<> +struct Vertex final : ASTNodeVararg { + const std::vector& get_declarations() const { return children; } + + Vertex(SrcLocation loc, std::vector declarations) + : ASTNodeVararg(ast_global_var_declaration_list, loc, std::move(declarations)) {} +}; + +template<> +struct Vertex final : ASTNodeUnary { + std::string_view const_name; + TypeExpr* declared_type; // may be nullptr + + AnyV get_init_value() const { return child; } + + Vertex(SrcLocation loc, std::string_view const_name, TypeExpr* declared_type, AnyV init_value) + : ASTNodeUnary(ast_constant_declaration, loc, init_value), const_name(const_name), declared_type(declared_type) {} +}; + +template<> +struct Vertex final : ASTNodeVararg { + const std::vector& get_declarations() const { return children; } + + Vertex(SrcLocation loc, std::vector declarations) + : ASTNodeVararg(ast_constant_declaration_list, loc, std::move(declarations)) {} +}; + +template<> +struct Vertex final : ASTNodeLeaf { + explicit Vertex(SrcLocation loc) + : ASTNodeLeaf(ast_underscore, loc) {} +}; + +template<> +struct Vertex final : ASTNodeLeaf { + TypeExpr* declared_type; + + Vertex(SrcLocation loc, TypeExpr* declared_type) + : ASTNodeLeaf(ast_type_expression, loc), declared_type(declared_type) {} +}; + +template<> +struct Vertex final : ASTNodeUnary { + TypeExpr* declared_type; + + AnyV get_variable_or_list() const { return child; } // identifier, tuple, tensor + + Vertex(SrcLocation loc, TypeExpr* declared_type, AnyV dest) + : ASTNodeUnary(ast_variable_declaration, loc, dest), declared_type(declared_type) {} +}; + +template<> +struct Vertex final : ASTNodeVararg { + const std::vector& get_items() const { return children; } + AnyV get_item(int i) const { return children.at(i); } + + Vertex(SrcLocation loc, std::vector items) + : ASTNodeVararg(ast_tensor, loc, std::move(items)) {} +}; + +template<> +struct Vertex final : ASTNodeVararg { + const std::vector& get_items() const { return children; } + AnyV get_item(int i) const { return children.at(i); } + + Vertex(SrcLocation loc, std::vector items) + : ASTNodeVararg(ast_tensor_square, loc, std::move(items)) {} +}; + +template<> +struct Vertex final : ASTNodeBinary { + std::string_view method_name; // starts with . or ~ + + AnyV get_lhs() const { return lhs; } + AnyV get_arg() const { return rhs; } + + Vertex(SrcLocation loc, std::string_view method_name, AnyV lhs, AnyV rhs) + : ASTNodeBinary(ast_dot_tilde_call, loc, lhs, rhs), method_name(method_name) {} +}; + +template<> +struct Vertex final : ASTNodeUnary { + std::string_view operator_name; + TokenType tok; + + AnyV get_rhs() const { return child; } + + Vertex(SrcLocation loc, std::string_view operator_name, TokenType tok, AnyV rhs) + : ASTNodeUnary(ast_unary_operator, loc, rhs), operator_name(operator_name), tok(tok) {} +}; + +template<> +struct Vertex final : ASTNodeBinary { + std::string_view operator_name; + TokenType tok; + + AnyV get_lhs() const { return lhs; } + AnyV get_rhs() const { return rhs; } + + Vertex(SrcLocation loc, std::string_view operator_name, TokenType tok, AnyV lhs, AnyV rhs) + : ASTNodeBinary(ast_binary_operator, loc, lhs, rhs), operator_name(operator_name), tok(tok) {} +}; + +template<> +struct Vertex final : ASTNodeVararg { + AnyV get_cond() const { return children.at(0); } + AnyV get_when_true() const { return children.at(1); } + AnyV get_when_false() const { return children.at(2); } + + Vertex(SrcLocation loc, AnyV cond, AnyV when_true, AnyV when_false) + : ASTNodeVararg(ast_ternary_operator, loc, {cond, when_true, when_false}) {} +}; + +template<> +struct Vertex : ASTNodeUnary { + AnyV get_return_value() const { return child; } + + Vertex(SrcLocation loc, AnyV child) + : ASTNodeUnary(ast_return_statement, loc, child) {} +}; + +template<> +struct Vertex final : ASTNodeVararg { + SrcLocation loc_end; + + const std::vector& get_items() const { return children; } + AnyV get_item(int i) const { return children.at(i); } + + Vertex(SrcLocation loc, SrcLocation loc_end, std::vector items) + : ASTNodeVararg(ast_sequence, loc, std::move(items)), loc_end(loc_end) {} +}; + +template<> +struct Vertex final : ASTNodeBinary { + AnyV get_cond() const { return lhs; } + auto get_body() const { return rhs->as(); } + + Vertex(SrcLocation loc, AnyV cond, V body) + : ASTNodeBinary(ast_repeat_statement, loc, cond, body) {} +}; + +template<> +struct Vertex final : ASTNodeBinary { + AnyV get_cond() const { return lhs; } + auto get_body() const { return rhs->as(); } + + Vertex(SrcLocation loc, AnyV cond, V body) + : ASTNodeBinary(ast_while_statement, loc, cond, body) {} +}; + +template<> +struct Vertex final : ASTNodeBinary { + auto get_body() const { return lhs->as(); } + AnyV get_cond() const { return rhs; } + + Vertex(SrcLocation loc, V body, AnyV cond) + : ASTNodeBinary(ast_do_until_statement, loc, body, cond) {} +}; + +template<> +struct Vertex final : ASTNodeVararg { + auto get_try_body() const { return children.at(0)->as(); } + AnyV get_catch_expr() const { return children.at(1); } // it's a tensor + auto get_catch_body() const { return children.at(2)->as(); } + + Vertex(SrcLocation loc, V try_body, AnyV catch_expr, V catch_body) + : ASTNodeVararg(ast_try_catch_statement, loc, {try_body, catch_expr, catch_body}) {} +}; + +template<> +struct Vertex final : ASTNodeVararg { + bool is_ifnot; + + AnyV get_cond() const { return children.at(0); } + auto get_if_body() const { return children.at(1)->as(); } + auto get_else_body() const { return children.at(2)->as(); } // always exists (when else omitted, it's empty) + + Vertex(SrcLocation loc, bool is_ifnot, AnyV cond, V if_body, V else_body) + : ASTNodeVararg(ast_if_statement, loc, {cond, if_body, else_body}), is_ifnot(is_ifnot) {} +}; + +template<> +struct Vertex final : ASTNodeLeaf { + TypeExpr* created_type; // used to keep same pointer, since TypeExpr::new_var(i) always allocates + std::string nameT; + + Vertex(SrcLocation loc, TypeExpr* created_type, std::string nameT) + : ASTNodeLeaf(ast_forall_item, loc), created_type(created_type), nameT(std::move(nameT)) {} +}; + +template<> +struct Vertex final : ASTNodeVararg { + std::vector get_items() const { return children; } + auto get_item(int i) const { return children.at(i)->as(); } + + Vertex(SrcLocation loc, std::vector forall_items) + : ASTNodeVararg(ast_forall_list, loc, std::move(forall_items)) {} + + int lookup_idx(std::string_view nameT) const; +}; + +template<> +struct Vertex final : ASTNodeLeaf { + std::string_view arg_name; + TypeExpr* arg_type; + + Vertex(SrcLocation loc, std::string_view arg_name, TypeExpr* arg_type) + : ASTNodeLeaf(ast_argument, loc), arg_name(arg_name), arg_type(arg_type) {} +}; + +template<> +struct Vertex final : ASTNodeVararg { + std::vector arg_order; + std::vector ret_order; + + const std::vector& get_asm_commands() const { return children; } // ast_string_const[] + + Vertex(SrcLocation loc, std::vector arg_order, std::vector ret_order, std::vector asm_commands) + : ASTNodeVararg(ast_asm_body, loc, std::move(asm_commands)), arg_order(std::move(arg_order)), ret_order(std::move(ret_order)) {} +}; + +template<> +struct Vertex final : ASTNodeVararg { + const std::vector& get_args() const { return children; } + auto get_arg(int i) const { return children.at(i)->as(); } + + Vertex(SrcLocation loc, std::vector args) + : ASTNodeVararg(ast_argument_list, loc, std::move(args)) {} + + int lookup_idx(std::string_view arg_name) const; +}; + +template<> +struct Vertex final : ASTNodeBinary { + int get_num_args() const { return lhs->as()->size(); } + auto get_arg_list() const { return lhs->as(); } + auto get_arg(int i) const { return lhs->as()->get_arg(i); } + AnyV get_body() const { return rhs; } // ast_sequence / ast_asm_body / ast_empty + + std::string name; + TypeExpr* ret_type = nullptr; + V forall_list = nullptr; + bool marked_as_pure = false; + bool marked_as_builtin = false; + bool marked_as_get_method = false; + bool marked_as_inline = false; + bool marked_as_inline_ref = false; + V method_id = nullptr; + + Vertex(SrcLocation loc, std::string name, V args, AnyV body) + : ASTNodeBinary(ast_function_declaration, loc, args, body), name(std::move(name)) {} +}; + +template<> +struct Vertex final : ASTNodeLeaf { + std::string_view pragma_name; + + Vertex(SrcLocation loc, std::string_view pragma_name) + : ASTNodeLeaf(ast_pragma_no_arg, loc), pragma_name(pragma_name) {} +}; + +template<> +struct Vertex final : ASTNodeLeaf { + TokenType cmp_tok; + std::string_view semver; + + Vertex(SrcLocation loc, TokenType cmp_tok, std::string_view semver) + : ASTNodeLeaf(ast_pragma_version, loc), cmp_tok(cmp_tok), semver(semver) {} +}; + +template<> +struct Vertex final : ASTNodeLeaf { + std::string_view file_name; + + Vertex(SrcLocation loc, std::string_view file_name) + : ASTNodeLeaf(ast_include_statement, loc), file_name(file_name) {} +}; + +template<> +struct Vertex final : ASTNodeVararg { + const SrcFile* const file; + + const std::vector& get_toplevel_declarations() const { return children; } + + Vertex(const SrcFile* file, std::vector toplevel_declarations) + : ASTNodeVararg(ast_tolk_file, SrcLocation(file), std::move(toplevel_declarations)), file(file) {} +}; + +} // namespace tolk diff --git a/tolk/builtins.cpp b/tolk/builtins.cpp index 439228f4b..ddcb26305 100644 --- a/tolk/builtins.cpp +++ b/tolk/builtins.cpp @@ -72,22 +72,6 @@ SymDef* define_builtin_func(const std::string& name, TypeExpr* func_type, const return define_builtin_func_impl(name, new SymValAsmFunc{func_type, make_simple_compile(macro), arg_order, ret_order, !impure}); } -SymDef* force_autoapply(SymDef* def) { - if (def) { - auto val = dynamic_cast(def->value); - if (val) { - val->auto_apply = true; - } - } - return def; -} - -template -SymDef* define_builtin_const(std::string name, TypeExpr* const_type, Args&&... args) { - return force_autoapply( - define_builtin_func(name, TypeExpr::new_map(TypeExpr::new_unit(), const_type), std::forward(args)...)); -} - bool SymValAsmFunc::compile(AsmOpList& dest, std::vector& out, std::vector& in, SrcLocation where) const { if (simple_compile) { @@ -1219,11 +1203,10 @@ void define_builtins() { define_builtin_func("_<=_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 6)); define_builtin_func("_>=_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 3)); define_builtin_func("_<=>_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 7)); - define_builtin_const("true", Int, /* AsmOp::Const("TRUE") */ std::bind(compile_bool_const, _1, _2, true)); - define_builtin_const("false", Int, /* AsmOp::Const("FALSE") */ std::bind(compile_bool_const, _1, _2, false)); + define_builtin_func("true", TypeExpr::new_map(TypeExpr::new_unit(), Int), /* AsmOp::Const("TRUE") */ std::bind(compile_bool_const, _1, _2, true)); + define_builtin_func("false", TypeExpr::new_map(TypeExpr::new_unit(), Int), /* AsmOp::Const("FALSE") */ std::bind(compile_bool_const, _1, _2, false)); // define_builtin_func("null", Null, AsmOp::Const("PUSHNULL")); - define_builtin_const("nil", Tuple, AsmOp::Const("PUSHNULL")); - define_builtin_const("Nil", Tuple, AsmOp::Const("NIL")); + define_builtin_func("nil", TypeExpr::new_map(TypeExpr::new_unit(), Tuple), AsmOp::Const("PUSHNULL")); define_builtin_func("null?", TypeExpr::new_forall({X}, TypeExpr::new_map(X, Int)), compile_is_null); define_builtin_func("throw", impure_un_op, compile_throw, true); define_builtin_func("throw_if", impure_bin_op, std::bind(compile_cond_throw, _1, _2, true), true); diff --git a/tolk/gen-abscode.cpp b/tolk/gen-abscode.cpp index ed9374163..b95b434ad 100644 --- a/tolk/gen-abscode.cpp +++ b/tolk/gen-abscode.cpp @@ -41,19 +41,7 @@ Expr::Expr(ExprCls c, sym_idx_t name_idx, std::initializer_list _arglist) } } -void Expr::chk_rvalue(const Lexer& lex) const { - if (!is_rvalue()) { - lex.error_at("rvalue expected before `", "`"); - } -} - -void Expr::chk_lvalue(const Lexer& lex) const { - if (!is_lvalue()) { - lex.error_at("lvalue expected before `", "`"); - } -} - -bool Expr::deduce_type(const Lexer& lex) { +bool Expr::deduce_type() { if (e_type) { return true; } @@ -77,7 +65,7 @@ bool Expr::deduce_type(const Lexer& lex) { std::ostringstream os; os << "cannot apply function " << sym->name() << " : " << sym_val->get_type() << " to arguments of type " << fun_type->args[0] << ": " << ue; - lex.error(os.str()); + throw ParseError(here, os.str()); } e_type = fun_type->args[1]; TypeExpr::remove_indirect(e_type); @@ -92,7 +80,7 @@ bool Expr::deduce_type(const Lexer& lex) { std::ostringstream os; os << "cannot apply expression of type " << args[0]->e_type << " to an expression of type " << args[1]->e_type << ": " << ue; - lex.error(os.str()); + throw ParseError(here, os.str()); } e_type = fun_type->args[1]; TypeExpr::remove_indirect(e_type); @@ -107,7 +95,7 @@ bool Expr::deduce_type(const Lexer& lex) { std::ostringstream os; os << "cannot assign an expression of type " << args[1]->e_type << " to a variable or pattern of type " << args[0]->e_type << ": " << ue; - lex.error(os.str()); + throw ParseError(here, os.str()); } e_type = args[0]->e_type; TypeExpr::remove_indirect(e_type); @@ -124,7 +112,7 @@ bool Expr::deduce_type(const Lexer& lex) { os << "cannot implicitly assign an expression of type " << args[1]->e_type << " to a variable or pattern of type " << rhs_type << " in modifying method `" << G.symbols.get_name(val) << "` : " << ue; - lex.error(os.str()); + throw ParseError(here, os.str()); } e_type = rhs_type->args[1]; TypeExpr::remove_indirect(e_type); @@ -139,7 +127,7 @@ bool Expr::deduce_type(const Lexer& lex) { } catch (UnifyError& ue) { std::ostringstream os; os << "condition in a conditional expression has non-integer type " << args[0]->e_type << ": " << ue; - lex.error(os.str()); + throw ParseError(here, os.str()); } try { unify(args[1]->e_type, args[2]->e_type); @@ -147,7 +135,7 @@ bool Expr::deduce_type(const Lexer& lex) { std::ostringstream os; os << "the two variants in a conditional expression have different types " << args[1]->e_type << " and " << args[2]->e_type << " : " << ue; - lex.error(os.str()); + throw ParseError(here, os.str()); } e_type = args[1]->e_type; TypeExpr::remove_indirect(e_type); @@ -170,13 +158,13 @@ int Expr::define_new_vars(CodeBlob& code) { } case _Var: if (val < 0) { - val = code.create_var(TmpVar::_Named, e_type, sym, here); + val = code.create_var(false, e_type, sym, here); return 1; } break; case _Hole: if (val < 0) { - val = code.create_var(TmpVar::_Tmp, e_type, nullptr, here); + val = code.create_var(true, e_type, nullptr, here); } break; } @@ -279,7 +267,7 @@ std::vector pre_compile_tensor(const std::vector& args, CodeB res_lists[i] = args[i]->pre_compile(code, lval_globs); for (size_t j = 0; j < res_lists[i].size(); ++j) { TmpVar& var = code.vars.at(res_lists[i][j]); - if (!lval_globs && (var.cls & TmpVar::_Named)) { + if (!lval_globs && !var.is_tmp_unnamed) { var.on_modification.push_back([&modified_vars, i, j, cur_ops = code.cur_ops, done = false](SrcLocation here) mutable { if (!done) { done = true; diff --git a/tolk/lexer.cpp b/tolk/lexer.cpp index 1a5fe5694..a9682e52e 100644 --- a/tolk/lexer.cpp +++ b/tolk/lexer.cpp @@ -361,19 +361,21 @@ struct ChunkIdentifierOrKeyword final : ChunkLexerBase { if (str == "asm") return tok_asm; if (str == "get") return tok_get; if (str == "try") return tok_try; + if (str == "nil") return tok_nil; break; case 4: if (str == "else") return tok_else; + if (str == "true") return tok_true; if (str == "pure") return tok_pure; if (str == "then") return tok_then; if (str == "cell") return tok_cell; if (str == "cont") return tok_cont; - if (str == "type") return tok_type; // todo unused token? break; case 5: if (str == "slice") return tok_slice; if (str == "tuple") return tok_tuple; if (str == "const") return tok_const; + if (str == "false") return tok_false; if (str == "while") return tok_while; if (str == "until") return tok_until; if (str == "catch") return tok_catch; @@ -427,7 +429,7 @@ struct ChunkIdentifierOrKeyword final : ChunkLexerBase { if (TokenType kw_tok = maybe_keyword(str_val)) { lex->add_token(kw_tok, str_val); } else { - G.symbols.lookup_add(static_cast(str_val)); + G.symbols.lookup_add(str_val); lex->add_token(tok_identifier, str_val); } return true; @@ -453,7 +455,7 @@ struct ChunkIdentifierInBackticks final : ChunkLexerBase { std::string_view str_val(str_begin + 1, lex->c_str() - str_begin - 1); lex->skip_chars(1); - G.symbols.lookup_add(static_cast(str_val)); + G.symbols.lookup_add(str_val); lex->add_token(tok_identifier, str_val); return true; } @@ -610,21 +612,12 @@ void Lexer::next_special(TokenType parse_next_as, const char* str_expected) { cur_token = tokens_circularbuf[++cur_token_idx & 7]; } -int Lexer::cur_sym_idx() const { - assert(tok() == tok_identifier); - return G.symbols.lookup_add(cur_str_std_string()); -} - void Lexer::error(const std::string& err_msg) const { throw ParseError(cur_location(), err_msg); } -void Lexer::error_at(const std::string& prefix, const std::string& suffix) const { - throw ParseError(cur_location(), prefix + cur_str_std_string() + suffix); -} - void Lexer::on_expect_call_failed(const char* str_expected) const { - throw ParseError(cur_location(), std::string(str_expected) + " expected instead of `" + cur_str_std_string() + "`"); + throw ParseError(cur_location(), std::string(str_expected) + " expected instead of `" + std::string(cur_str()) + "`"); } void lexer_init() { diff --git a/tolk/lexer.h b/tolk/lexer.h index 04fc025d0..b24efa9d0 100644 --- a/tolk/lexer.h +++ b/tolk/lexer.h @@ -31,6 +31,10 @@ enum TokenType { tok_identifier, + tok_true, + tok_false, + tok_nil, // todo "null" keyword is still absent, "nil" in FunC is an empty tuple + tok_plus, tok_minus, tok_mul, @@ -108,7 +112,6 @@ enum TokenType { tok_builder, tok_cont, tok_tuple, - tok_type, tok_mapsto, tok_forall, @@ -206,10 +209,8 @@ class Lexer { TokenType tok() const { return cur_token.type; } std::string_view cur_str() const { return cur_token.str_val; } - std::string cur_str_std_string() const { return static_cast(cur_token.str_val); } SrcLocation cur_location() const { return location; } const SrcFile* cur_file() const { return file; } - int cur_sym_idx() const; void next(); void next_special(TokenType parse_next_as, const char* str_expected); @@ -228,8 +229,6 @@ class Lexer { GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD void error(const std::string& err_msg) const; - GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD - void error_at(const std::string& prefix, const std::string& suffix) const; }; void lexer_init(); diff --git a/tolk/parse-tolk.cpp b/tolk/parse-tolk.cpp deleted file mode 100644 index 0b41152d4..000000000 --- a/tolk/parse-tolk.cpp +++ /dev/null @@ -1,1983 +0,0 @@ -/* - This file is part of TON Blockchain Library. - - TON Blockchain Library is free software: you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation, either version 2 of the License, or - (at your option) any later version. - - TON Blockchain Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with TON Blockchain Library. If not, see . -*/ -#include "tolk.h" -#include "platform-utils.h" -#include "compiler-state.h" -#include "td/utils/crypto.h" -#include "common/refint.h" -#include "openssl/digest.hpp" -#include "block/block.h" -#include "block-parse.h" - -namespace tolk { -using namespace std::literals::string_literals; - -inline bool is_dot_ident(sym_idx_t idx) { - return G.symbols.get_subclass(idx) == SymbolSubclass::dot_identifier; -} - -inline bool is_tilde_ident(sym_idx_t idx) { - return G.symbols.get_subclass(idx) == SymbolSubclass::tilde_identifier; -} - -inline bool is_special_ident(sym_idx_t idx) { - return G.symbols.get_subclass(idx) != SymbolSubclass::undef; -} - -// given Expr::_Apply (a function call / a variable call), determine whether it's <, or >, or similar -// (an expression `1 < 2` is expressed as `_<_(1,2)`, see builtins.cpp) -static bool is_comparison_binary_op(const Expr* e_apply) { - const std::string& name = e_apply->sym->name(); - const size_t len = name.size(); - if (len < 3 || len > 5 || name[0] != '_' || name[len-1] != '_') { - return false; // not "_<_" and similar - } - - char c1 = name[1]; - char c2 = name[2]; - // < > <= != == >= <=> - return (len == 3 && (c1 == '<' || c1 == '>')) || - (len == 4 && (c1 == '<' || c1 == '>' || c1 == '!' || c1 == '=') && c2 == '=') || - (len == 5 && (c1 == '<' && c2 == '=' && name[3] == '>')); -} - -// same as above, but to detect bitwise operators: & | ^ -// (in Tolk, they are used as logical ones due to absence of a boolean type and && || operators) -static bool is_bitwise_binary_op(const Expr* e_apply) { - const std::string& name = e_apply->sym->name(); - const size_t len = name.size(); - if (len != 3 || name[0] != '_' || name[len-1] != '_') { - return false; - } - - char c1 = name[1]; - return c1 == '&' || c1 == '|' || c1 == '^'; -} - -// same as above, but to detect addition/subtraction -static bool is_add_or_sub_binary_op(const Expr* e_apply) { - const std::string& name = e_apply->sym->name(); - const size_t len = name.size(); - if (len != 3 || name[0] != '_' || name[len-1] != '_') { - return false; - } - - char c1 = name[1]; - return c1 == '+' || c1 == '-'; -} - -static inline std::string get_builtin_operator_name(sym_idx_t sym_builtin) { - std::string underscored = G.symbols.get_name(sym_builtin); - return underscored.substr(1, underscored.size() - 2); -} - -// fire an error for a case "flags & 0xFF != 0" (equivalent to "flags & 1", probably unexpected) -// it would better be a warning, but we decided to make it a strict error -GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD -static void fire_error_lower_precedence(SrcLocation loc, sym_idx_t op_lower, sym_idx_t op_higher) { - std::string name_lower = get_builtin_operator_name(op_lower); - std::string name_higher = get_builtin_operator_name(op_higher); - throw ParseError(loc, name_lower + " has lower precedence than " + name_higher + - ", probably this code won't work as you expected. " - "Use parenthesis: either (... " + name_lower + " ...) to evaluate it first, or (... " + name_higher + " ...) to suppress this error."); -} - -// fire an error for a case "arg1 & arg2 | arg3" -GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD -static void fire_error_mix_bitwise_and_or(SrcLocation loc, sym_idx_t op1, sym_idx_t op2) { - std::string name1 = get_builtin_operator_name(op1); - std::string name2 = get_builtin_operator_name(op2); - throw ParseError(loc, "mixing " + name1 + " with " + name2 + " without parenthesis" - ", probably this code won't work as you expected. " - "Use parenthesis to emphasize operator precedence."); -} - -// diagnose when bitwise operators are used in a probably wrong way due to tricky precedence -// example: "flags & 0xFF != 0" is equivalent to "flags & 1", most likely it's unexpected -// the only way to suppress this error for the programmer is to use parenthesis -static void diagnose_bitwise_precedence(SrcLocation loc, sym_idx_t bitwise_sym, const Expr* lhs, const Expr* rhs) { - // handle "0 != flags & 0xFF" (lhs = "0 != flags") - if (!lhs->is_inside_parenthesis() && - lhs->cls == Expr::_Apply && lhs->e_type->is_int() && // fast false if 100% not - is_comparison_binary_op(lhs)) { - fire_error_lower_precedence(loc, bitwise_sym, lhs->sym->sym_idx); - // there is a tiny bug: "flags & _!=_(0xFF,0)" will also suggest to wrap rhs into parenthesis - } - - // handle "flags & 0xFF != 0" (rhs = "0xFF != 0") - if (!rhs->is_inside_parenthesis() && - rhs->cls == Expr::_Apply && rhs->e_type->is_int() && - is_comparison_binary_op(rhs)) { - fire_error_lower_precedence(loc, bitwise_sym, rhs->sym->sym_idx); - } - - // handle "arg1 & arg2 | arg3" (lhs = "arg1 & arg2") - if (!lhs->is_inside_parenthesis() && - lhs->cls == Expr::_Apply && lhs->e_type->is_int() && - is_bitwise_binary_op(lhs) && - lhs->sym->sym_idx != bitwise_sym) { - fire_error_mix_bitwise_and_or(loc, lhs->sym->sym_idx, bitwise_sym); - } -} - -// diagnose "a << 8 + 1" (equivalent to "a << 9", probably unexpected) -static void diagnose_addition_in_bitshift(SrcLocation loc, sym_idx_t bitshift_sym, const Expr* rhs) { - if (!rhs->is_inside_parenthesis() && - rhs->cls == Expr::_Apply && rhs->e_type->is_int() && - is_add_or_sub_binary_op(rhs)) { - fire_error_lower_precedence(loc, bitshift_sym, rhs->sym->sym_idx); - } -} - -/* - * - * PARSE SOURCE - * - */ - -// TE ::= TA | TA -> TE -// TA ::= int | ... | cont | var | _ | () | ( TE { , TE } ) | [ TE { , TE } ] -TypeExpr* parse_type(Lexer& lex); - -TypeExpr* parse_type1(Lexer& lex) { - switch (lex.tok()) { - case tok_int: - lex.next(); - return TypeExpr::new_atomic(TypeExpr::_Int); - case tok_cell: - lex.next(); - return TypeExpr::new_atomic(TypeExpr::_Cell); - case tok_slice: - lex.next(); - return TypeExpr::new_atomic(TypeExpr::_Slice); - case tok_builder: - lex.next(); - return TypeExpr::new_atomic(TypeExpr::_Builder); - case tok_cont: - lex.next(); - return TypeExpr::new_atomic(TypeExpr::_Cont); - case tok_tuple: - lex.next(); - return TypeExpr::new_atomic(TypeExpr::_Tuple); - case tok_var: - case tok_underscore: - lex.next(); - return TypeExpr::new_hole(); - case tok_identifier: { - auto sym = lookup_symbol(lex.cur_sym_idx()); - if (sym && dynamic_cast(sym->value)) { - auto val = dynamic_cast(sym->value); - lex.next(); - return val->get_type(); - } - lex.error_at("`", "` is not a type identifier"); - } - default: - break; - } - TokenType c; - if (lex.tok() == tok_opbracket) { - lex.next(); - c = tok_clbracket; - } else { - lex.expect(tok_oppar, ""); - c = tok_clpar; - } - if (lex.tok() == c) { - lex.next(); - return c == tok_clpar ? TypeExpr::new_unit() : TypeExpr::new_tuple({}); - } - auto t1 = parse_type(lex); - if (lex.tok() == tok_clpar) { - lex.expect(c, c == tok_clpar ? "')'" : "']'"); - return t1; - } - std::vector tlist{1, t1}; - while (lex.tok() == tok_comma) { - lex.next(); - tlist.push_back(parse_type(lex)); - } - lex.expect(c, c == tok_clpar ? "')'" : "']'"); - return c == tok_clpar ? TypeExpr::new_tensor(std::move(tlist)) : TypeExpr::new_tuple(std::move(tlist)); -} - -TypeExpr* parse_type(Lexer& lex) { - auto res = parse_type1(lex); - if (lex.tok() == tok_mapsto) { - lex.next(); - auto to = parse_type(lex); - return TypeExpr::new_map(res, to); - } else { - return res; - } -} - -FormalArg parse_formal_arg(Lexer& lex, int fa_idx) { - TypeExpr* arg_type = 0; - SrcLocation loc = lex.cur_location(); - if (lex.tok() == tok_underscore) { - lex.next(); - if (lex.tok() == tok_comma || lex.tok() == tok_clpar) { - return std::make_tuple(TypeExpr::new_hole(), (SymDef*)nullptr, loc); - } - arg_type = TypeExpr::new_hole(); - loc = lex.cur_location(); - } else if (lex.tok() != tok_identifier) { - arg_type = parse_type(lex); - } else { - auto sym = lookup_symbol(lex.cur_sym_idx()); - if (sym && dynamic_cast(sym->value)) { - auto val = dynamic_cast(sym->value); - lex.next(); - arg_type = val->get_type(); - } else { - arg_type = TypeExpr::new_hole(); - } - } - if (lex.tok() == tok_underscore || lex.tok() == tok_comma || lex.tok() == tok_clpar) { - if (lex.tok() == tok_underscore) { - loc = lex.cur_location(); - lex.next(); - } - return std::make_tuple(arg_type, (SymDef*)nullptr, loc); - } - lex.check(tok_identifier, "formal parameter name"); - loc = lex.cur_location(); - if (G.prohibited_var_names.count(G.symbols.get_name(lex.cur_sym_idx()))) { - throw ParseError{ - loc, PSTRING() << "symbol `" << G.symbols.get_name(lex.cur_sym_idx()) << "` cannot be redefined as a variable"}; - } - SymDef* new_sym_def = define_symbol(lex.cur_sym_idx(), true, loc); - if (!new_sym_def) { - lex.error_at("cannot define symbol `", "`"); - } - if (new_sym_def->value) { - lex.error_at("redefined formal parameter `", "`"); - } - new_sym_def->value = new SymVal{SymValKind::_Param, fa_idx, arg_type}; - lex.next(); - return std::make_tuple(arg_type, new_sym_def, loc); -} - -void parse_global_var_decl(Lexer& lex) { - TypeExpr* var_type = 0; - SrcLocation loc = lex.cur_location(); - if (lex.tok() == tok_underscore) { - lex.next(); - var_type = TypeExpr::new_hole(); - loc = lex.cur_location(); - } else if (lex.tok() != tok_identifier) { - var_type = parse_type(lex); - } else { - auto sym = lookup_symbol(lex.cur_sym_idx()); - if (sym && dynamic_cast(sym->value)) { - auto val = dynamic_cast(sym->value); - lex.next(); - var_type = val->get_type(); - } else { - var_type = TypeExpr::new_hole(); - } - } - lex.check(tok_identifier, "global variable name"); - loc = lex.cur_location(); - SymDef* sym_def = define_global_symbol(lex.cur_sym_idx(), false, loc); - if (!sym_def) { - lex.error_at("cannot define global symbol `", "`"); - } - if (sym_def->value) { - auto val = dynamic_cast(sym_def->value); - if (!val) { - lex.error_at("symbol `", "` cannot be redefined as a global variable"); - } - try { - unify(var_type, val->sym_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "cannot unify new type " << var_type << " of global variable `" << sym_def->name() - << "` with its previous type " << val->sym_type << ": " << ue; - lex.error(os.str()); - } - } else { - sym_def->value = new SymValGlobVar{G.glob_var_cnt++, var_type}; -#ifdef TOLK_DEBUG - dynamic_cast(sym_def->value)->name = lex.cur_str(); -#endif - G.glob_vars.push_back(sym_def); - } - lex.next(); -} - -Expr* parse_expr(Lexer& lex, CodeBlob& code, bool nv = false); - -void parse_const_decl(Lexer& lex) { - SrcLocation loc = lex.cur_location(); - int wanted_type = Expr::_None; - if (lex.tok() == tok_int) { - wanted_type = Expr::_Const; - lex.next(); - } else if (lex.tok() == tok_slice) { - wanted_type = Expr::_SliceConst; - lex.next(); - } - lex.check(tok_identifier, "constant name"); - loc = lex.cur_location(); - SymDef* sym_def = define_global_symbol(lex.cur_sym_idx(), false, loc); - if (!sym_def) { - lex.error_at("cannot define global symbol `", "`"); - } - if (sym_def->value) { // todo below it was a check (for duplicate include?) - lex.error_at("global symbol `", "` already exists"); - } - lex.next(); - if (lex.tok() != tok_assign) { - lex.error_at("expected = instead of ", ""); - } - lex.next(); - CodeBlob code; - // Handles processing and resolution of literals and consts - auto x = parse_expr(lex, code, false); // also does lex.next() ! - if (!x->is_rvalue()) { - lex.error("expression is not strictly Rvalue"); - } - if ((wanted_type == Expr::_Const) && (x->cls == Expr::_Apply)) - wanted_type = Expr::_None; // Apply is additionally checked to result in an integer - if ((wanted_type != Expr::_None) && (x->cls != wanted_type)) { - lex.error("expression type does not match wanted type"); - } - SymValConst* new_value = nullptr; - if (x->cls == Expr::_Const) { // Integer constant - new_value = new SymValConst{G.const_cnt++, x->intval}; - } else if (x->cls == Expr::_SliceConst) { // Slice constant (string) - new_value = new SymValConst{G.const_cnt++, x->strval}; - } else if (x->cls == Expr::_Apply) { // even "1 + 2" is Expr::_Apply (it applies `_+_`) - code.emplace_back(loc, Op::_Import, std::vector()); - auto tmp_vars = x->pre_compile(code); - code.emplace_back(loc, Op::_Return, std::move(tmp_vars)); - code.emplace_back(loc, Op::_Nop); // This is neccessary to prevent SIGSEGV! - // It is REQUIRED to execute "optimizations" as in tolk.cpp - code.simplify_var_types(); - code.prune_unreachable_code(); - code.split_vars(true); - for (int i = 0; i < 16; i++) { - code.compute_used_code_vars(); - code.fwd_analyze(); - code.prune_unreachable_code(); - } - code.mark_noreturn(); - AsmOpList out_list(0, &code.vars); - code.generate_code(out_list); - if (out_list.list_.size() != 1) { - lex.error("precompiled expression must result in single operation"); - } - auto op = out_list.list_[0]; - if (!op.is_const()) { - lex.error("precompiled expression must result in compilation time constant"); - } - if (op.origin.is_null() || !op.origin->is_valid()) { - lex.error("precompiled expression did not result in a valid integer constant"); - } - new_value = new SymValConst{G.const_cnt++, op.origin}; - } else { - lex.error("integer or slice literal or constant expected"); - } - sym_def->value = new_value; -} - -FormalArgList parse_formal_args(Lexer& lex) { - FormalArgList args; - lex.expect(tok_oppar, "formal argument list"); - if (lex.tok() == tok_clpar) { - lex.next(); - return args; - } - int fa_idx = 0; - args.push_back(parse_formal_arg(lex, fa_idx++)); - while (lex.tok() == tok_comma) { - lex.next(); - args.push_back(parse_formal_arg(lex, fa_idx++)); - } - lex.expect(tok_clpar, "')'"); - return args; -} - -void parse_const_decls(Lexer& lex) { - lex.expect(tok_const, "'const'"); - while (true) { - parse_const_decl(lex); - if (lex.tok() != tok_comma) { - break; - } - lex.expect(tok_comma, "','"); - } - lex.expect(tok_semicolon, "';'"); -} - -TypeExpr* extract_total_arg_type(const FormalArgList& arg_list) { - if (arg_list.empty()) { - return TypeExpr::new_unit(); - } - if (arg_list.size() == 1) { - return std::get<0>(arg_list[0]); - } - std::vector type_list; - for (auto& x : arg_list) { - type_list.push_back(std::get<0>(x)); - } - return TypeExpr::new_tensor(std::move(type_list)); -} - -void parse_global_var_decls(Lexer& lex) { - lex.expect(tok_global, "'global'"); - while (true) { - parse_global_var_decl(lex); - if (lex.tok() != tok_comma) { - break; - } - lex.expect(tok_comma, "','"); - } - lex.expect(tok_semicolon, "';'"); -} - -SymValCodeFunc* make_new_glob_func(SymDef* func_sym, TypeExpr* func_type, bool marked_as_pure) { - SymValCodeFunc* res = new SymValCodeFunc{G.glob_func_cnt, func_type, marked_as_pure}; -#ifdef TOLK_DEBUG - res->name = func_sym->name(); -#endif - func_sym->value = res; - G.glob_func.push_back(func_sym); - G.glob_func_cnt++; - return res; -} - -bool check_global_func(const Lexer& lex, sym_idx_t func_name) { - SymDef* def = lookup_symbol(func_name); - if (!def) { - lex.error("undefined symbol `" + G.symbols.get_name(func_name) + "`"); - return false; - } - SymVal* val = dynamic_cast(def->value); - if (!val) { - lex.error(std::string{"symbol `"} + G.symbols.get_name(func_name) + "` has no value and no type"); - return false; - } else if (!val->get_type()) { - lex.error(std::string{"symbol `"} + G.symbols.get_name(func_name) + "` has no type, possibly not a function"); - return false; - } else { - return true; - } -} - -Expr* make_func_apply(Expr* fun, Expr* x) { - Expr* res{nullptr}; - if (fun->cls == Expr::_GlobFunc) { - if (x->cls == Expr::_Tensor) { - res = new Expr{Expr::_Apply, fun->sym, x->args}; - } else { - res = new Expr{Expr::_Apply, fun->sym, {x}}; - } - res->flags = Expr::_IsRvalue | (fun->flags & Expr::_IsImpure); - } else { - res = new Expr{Expr::_VarApply, {fun, x}}; - res->flags = Expr::_IsRvalue; - } - return res; -} - -void check_import_exists_when_using_sym(const Lexer& lex, const SymDef* used_sym) { - if (!lex.cur_location().is_symbol_from_same_or_builtin_file(used_sym->loc)) { - const SrcFile* declared_in = used_sym->loc.get_src_file(); - bool has_import = false; - for (const SrcFile::ImportStatement& import_stmt : lex.cur_file()->imports) { - if (import_stmt.imported_file == declared_in) { - has_import = true; - } - } - if (!has_import) { - lex.error("Using a non-imported symbol `" + used_sym->name() + "`. Forgot to import \"" + declared_in->rel_filename + "\"?"); - } - } -} - -// parse ( E { , E } ) | () | [ E { , E } ] | [] | id | num | _ -Expr* parse_expr100(Lexer& lex, CodeBlob& code, bool nv) { - if (lex.tok() == tok_oppar || lex.tok() == tok_opbracket) { - bool tf = (lex.tok() == tok_opbracket); - TokenType clbr = (tf ? tok_clbracket : tok_clpar); - SrcLocation loc{lex.cur_location()}; - lex.next(); - if (lex.tok() == clbr) { - lex.next(); - Expr* res = new Expr{Expr::_Tensor, {}}; - res->flags = Expr::_IsRvalue; - res->here = loc; - res->e_type = TypeExpr::new_unit(); - if (tf) { - res = new Expr{Expr::_MkTuple, {res}}; - res->flags = Expr::_IsRvalue; - res->here = loc; - res->e_type = TypeExpr::new_tuple(res->args.at(0)->e_type); - } - return res; - } - Expr* res = parse_expr(lex, code, nv); - if (lex.tok() == tok_clpar) { - lex.expect(clbr, clbr == tok_clbracket ? "']'" : "')'"); - res->flags |= Expr::_IsInsideParenthesis; - return res; - } - std::vector type_list; - type_list.push_back(res->e_type); - int f = res->flags; - res = new Expr{Expr::_Tensor, {res}}; - while (lex.tok() == tok_comma) { - lex.next(); - auto x = parse_expr(lex, code, nv); - res->pb_arg(x); - if ((f ^ x->flags) & Expr::_IsType) { - lex.error("mixing type and non-type expressions inside the same tuple"); - } - f &= x->flags; - type_list.push_back(x->e_type); - } - res->here = loc; - res->flags = f; - res->e_type = TypeExpr::new_tensor(std::move(type_list), !tf); - if (tf) { - res = new Expr{Expr::_MkTuple, {res}}; - res->flags = f; - res->here = loc; - res->e_type = TypeExpr::new_tuple(res->args.at(0)->e_type); - } - lex.expect(clbr, clbr == tok_clbracket ? "']'" : "')'"); - return res; - } - TokenType t = lex.tok(); - if (t == tok_int_const) { - Expr* res = new Expr{Expr::_Const, lex.cur_location()}; - res->flags = Expr::_IsRvalue; - res->intval = td::string_to_int256(lex.cur_str_std_string()); - if (res->intval.is_null() || !res->intval->signed_fits_bits(257)) { - lex.error_at("invalid integer constant `", "`"); - } - res->e_type = TypeExpr::new_atomic(TypeExpr::_Int); - lex.next(); - return res; - } - if (t == tok_string_const) { - std::string str = lex.cur_str_std_string(); - lex.next(); - char modifier = 0; - if (lex.tok() == tok_string_modifier) { - modifier = lex.cur_str()[0]; - lex.next(); - } - Expr* res; - switch (modifier) { - case 0: - case 's': - case 'a': - res = new Expr{Expr::_SliceConst, lex.cur_location()}; - res->e_type = TypeExpr::new_atomic(TypeExpr::_Slice); - break; - case 'u': - case 'h': - case 'H': - case 'c': - res = new Expr{Expr::_Const, lex.cur_location()}; - res->e_type = TypeExpr::new_atomic(TypeExpr::_Int); - break; - default: - lex.error("invalid string type `" + std::string(1, modifier) + "`"); - } - res->flags = Expr::_IsRvalue; - switch (modifier) { - case 0: { - res->strval = td::hex_encode(str); - break; - } - case 's': { - res->strval = str; - unsigned char buff[128]; - int bits = (int)td::bitstring::parse_bitstring_hex_literal(buff, sizeof(buff), str.data(), str.data() + str.size()); - if (bits < 0) { - lex.error_at("Invalid hex bitstring constant `", "`"); - } - break; - } - case 'a': { // MsgAddressInt - // todo rewrite stdaddress parsing (if done, CMake dep "ton_crypto" can be replaced with "ton_crypto_core") - block::StdAddress a; - if (a.parse_addr(str)) { - res->strval = block::tlb::MsgAddressInt().pack_std_address(a)->as_bitslice().to_hex(); - } else { - lex.error_at("invalid standard address `", "`"); - } - break; - } - case 'u': { - res->intval = td::hex_string_to_int256(td::hex_encode(str)); - if (str.empty()) { - lex.error("empty integer ascii-constant"); - } - if (res->intval.is_null()) { - lex.error_at("too long integer ascii-constant `", "`"); - } - break; - } - case 'h': - case 'H': { - unsigned char hash[32]; - digest::hash_str(hash, str.data(), str.size()); - res->intval = td::bits_to_refint(hash, (modifier == 'h') ? 32 : 256, false); - break; - } - case 'c': { - res->intval = td::make_refint(td::crc32(td::Slice{str})); - break; - } - default: - __builtin_unreachable(); - } - return res; - } - if (t == tok_underscore) { - Expr* res = new Expr{Expr::_Hole, lex.cur_location()}; - res->val = -1; - res->flags = Expr::_IsLvalue; - res->e_type = TypeExpr::new_hole(); - lex.next(); - return res; - } - if (t == tok_var) { - Expr* res = new Expr{Expr::_Type, lex.cur_location()}; - res->flags = Expr::_IsType; - res->e_type = TypeExpr::new_hole(); - lex.next(); - return res; - } - if (t == tok_int || t == tok_cell || t == tok_slice || t == tok_builder || t == tok_cont || t == tok_type || t == tok_tuple) { - Expr* res = new Expr{Expr::_Type, lex.cur_location()}; - res->flags = Expr::_IsType; - res->e_type = TypeExpr::new_atomic(t); - lex.next(); - return res; - } - if (t == tok_identifier) { - auto sym = lookup_symbol(lex.cur_sym_idx()); - if (sym && dynamic_cast(sym->value)) { - auto val = dynamic_cast(sym->value); - Expr* res = new Expr{Expr::_Type, lex.cur_location()}; - res->flags = Expr::_IsType; - res->e_type = val->get_type(); - lex.next(); - return res; - } - if (sym && dynamic_cast(sym->value)) { - check_import_exists_when_using_sym(lex, sym); - auto val = dynamic_cast(sym->value); - Expr* res = new Expr{Expr::_GlobVar, lex.cur_location()}; - res->e_type = val->get_type(); - res->sym = sym; - res->flags = Expr::_IsLvalue | Expr::_IsRvalue | Expr::_IsImpure; - lex.next(); - return res; - } - if (sym && dynamic_cast(sym->value)) { - check_import_exists_when_using_sym(lex, sym); - auto val = dynamic_cast(sym->value); - Expr* res = new Expr{Expr::_None, lex.cur_location()}; - res->flags = Expr::_IsRvalue; - if (val->get_kind() == SymValConst::IntConst) { - res->cls = Expr::_Const; - res->intval = val->get_int_value(); - res->e_type = TypeExpr::new_atomic(tok_int); - } - else if (val->get_kind() == SymValConst::SliceConst) { - res->cls = Expr::_SliceConst; - res->strval = val->get_str_value(); - res->e_type = TypeExpr::new_atomic(tok_slice); - } - else { - lex.error("Invalid symbolic constant type"); - } - lex.next(); - return res; - } - if (sym && dynamic_cast(sym->value)) { - check_import_exists_when_using_sym(lex, sym); - } - bool auto_apply = false; - Expr* res = new Expr{Expr::_Var, lex.cur_location()}; - if (nv) { - res->val = ~lex.cur_sym_idx(); - res->e_type = TypeExpr::new_hole(); - res->flags = Expr::_IsLvalue; - // std::cerr << "defined new variable " << lex.cur().str << " : " << res->e_type << std::endl; - } else { - if (!sym) { - check_global_func(lex, lex.cur_sym_idx()); - sym = lookup_symbol(lex.cur_sym_idx()); - } - res->sym = sym; - SymVal* val = nullptr; - bool impure = false; - if (sym) { - val = dynamic_cast(sym->value); - } - if (!val) { - lex.error_at("undefined identifier `", "`"); - } else if (val->kind == SymValKind::_Func) { - res->e_type = val->get_type(); - res->cls = Expr::_GlobFunc; - auto_apply = val->auto_apply; - impure = !dynamic_cast(val)->is_marked_as_pure(); - } else if (val->idx < 0) { - lex.error_at("accessing variable `", "` being defined"); - } else { - res->val = val->idx; - res->e_type = val->get_type(); - // std::cerr << "accessing variable " << lex.cur().str << " : " << res->e_type << std::endl; - } - // std::cerr << "accessing symbol " << lex.cur().str << " : " << res->e_type << (val->impure ? " (impure)" : " (pure)") << std::endl; - res->flags = Expr::_IsLvalue | Expr::_IsRvalue | (impure ? Expr::_IsImpure : 0); - } - if (auto_apply) { - int impure = res->flags & Expr::_IsImpure; - delete res; - res = new Expr{Expr::_Apply, sym, {}}; - res->flags = Expr::_IsRvalue | impure; - } - res->deduce_type(lex); - lex.next(); - return res; - } - lex.expect(tok_identifier, "identifier"); - return nullptr; -} - -// parse E { E } -Expr* parse_expr90(Lexer& lex, CodeBlob& code, bool nv) { - Expr* res = parse_expr100(lex, code, nv); - while (lex.tok() == tok_oppar || lex.tok() == tok_opbracket || (lex.tok() == tok_identifier && !is_special_ident(lex.cur_sym_idx()))) { - if (res->is_type()) { - Expr* x = parse_expr100(lex, code, true); - x->chk_lvalue(lex); // chk_lrvalue() ? - TypeExpr* tp = res->e_type; - delete res; - res = new Expr{Expr::_TypeApply, {x}}; - res->e_type = tp; - res->here = lex.cur_location(); - try { - unify(res->e_type, x->e_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "cannot transform expression of type " << x->e_type << " to explicitly requested type " << res->e_type - << ": " << ue; - lex.error(os.str()); - } - res->flags = x->flags; - } else { - Expr* x = parse_expr100(lex, code, false); - x->chk_rvalue(lex); - res = make_func_apply(res, x); - res->here = lex.cur_location(); - res->deduce_type(lex); - } - } - return res; -} - -// parse E { .method E | ~method E } -Expr* parse_expr80(Lexer& lex, CodeBlob& code, bool nv) { - Expr* res = parse_expr90(lex, code, nv); - while (lex.tok() == tok_identifier && is_special_ident(lex.cur_sym_idx())) { - auto modify = is_tilde_ident(lex.cur_sym_idx()); - auto obj = res; - if (modify) { - obj->chk_lvalue(lex); - } else { - obj->chk_rvalue(lex); - } - SrcLocation loc = lex.cur_location(); - sym_idx_t name = lex.cur_sym_idx(); - auto sym = lookup_symbol(name); - if (!sym || !dynamic_cast(sym->value)) { - auto name1 = G.symbols.lookup(lex.cur_str().substr(1)); - if (name1) { - auto sym1 = lookup_symbol(name1); - if (sym1 && dynamic_cast(sym1->value)) { - name = name1; - sym = sym1; - } - } - } - check_global_func(lex, name); - if (G.is_verbosity(2)) { - std::cerr << "using symbol `" << G.symbols.get_name(name) << "` for method call of " << lex.cur_str() << std::endl; - } - sym = lookup_symbol(name); - SymValFunc* val = sym ? dynamic_cast(sym->value) : nullptr; - if (!val) { - lex.error_at("undefined method identifier `", "`"); - } - lex.next(); - auto x = parse_expr100(lex, code, false); - x->chk_rvalue(lex); - if (x->cls == Expr::_Tensor) { - res = new Expr{Expr::_Apply, name, {obj}}; - res->args.insert(res->args.end(), x->args.begin(), x->args.end()); - } else { - res = new Expr{Expr::_Apply, name, {obj, x}}; - } - res->here = loc; - res->flags = Expr::_IsRvalue | (val->is_marked_as_pure() ? 0 : Expr::_IsImpure); - res->deduce_type(lex); - if (modify) { - auto tmp = res; - res = new Expr{Expr::_LetFirst, {obj->copy(), tmp}}; - res->here = loc; - res->flags = tmp->flags; - res->set_val(name); - res->deduce_type(lex); - } - } - return res; -} - -// parse [ ~ | - | + ] E -Expr* parse_expr75(Lexer& lex, CodeBlob& code, bool nv) { - if (lex.tok() == tok_bitwise_not || lex.tok() == tok_minus || lex.tok() == tok_plus) { - TokenType t = lex.tok(); - sym_idx_t name = G.symbols.lookup_add(lex.cur_str_std_string() + "_"); - check_global_func(lex, name); - SrcLocation loc{lex.cur_location()}; - lex.next(); - auto x = parse_expr75(lex, code, false); - x->chk_rvalue(lex); - - // here's an optimization to convert "-1" (tok_minus tok_int_const) to a const -1, not to Expr::Apply(-,1) - // without this, everything still works, but Tolk looses some vars/stack knowledge for now (to be fixed later) - // in FunC, it was: - // `var fst = -1;` // is constantly 1 - // `var snd = - 1;` // is Expr::Apply(-), a comment "snd=1" is lost in stack layout comments, and so on - // hence, when after grammar modification tok_minus became a true unary operator (not a part of a number), - // and thus to preserve existing behavior until compiler parts are completely rewritten, handle this case here - if (x->cls == Expr::_Const) { - if (t == tok_bitwise_not) { - x->intval = ~x->intval; - } else if (t == tok_minus) { - x->intval = -x->intval; - } - if (!x->intval->signed_fits_bits(257)) { - lex.error("integer overflow"); - } - return x; - } - - auto res = new Expr{Expr::_Apply, name, {x}}; - res->here = loc; - res->set_val(t); - res->flags = Expr::_IsRvalue; - res->deduce_type(lex); - return res; - } else { - return parse_expr80(lex, code, nv); - } -} - -// parse E { (* | / | % | /% | ^/ | ~/ | ^% | ~% ) E } -Expr* parse_expr30(Lexer& lex, CodeBlob& code, bool nv) { - Expr* res = parse_expr75(lex, code, nv); - while (lex.tok() == tok_mul || lex.tok() == tok_div || lex.tok() == tok_mod || lex.tok() == tok_divmod || lex.tok() == tok_divC || - lex.tok() == tok_divR || lex.tok() == tok_modC || lex.tok() == tok_modR) { - res->chk_rvalue(lex); - TokenType t = lex.tok(); - sym_idx_t name = G.symbols.lookup_add(std::string{"_"} + lex.cur_str_std_string() + "_"); - SrcLocation loc{lex.cur_location()}; - check_global_func(lex, name); - lex.next(); - auto x = parse_expr75(lex, code, false); - x->chk_rvalue(lex); - res = new Expr{Expr::_Apply, name, {res, x}}; - res->here = loc; - res->set_val(t); - res->flags = Expr::_IsRvalue; - res->deduce_type(lex); - } - return res; -} - -// parse E { (+ | -) E } -Expr* parse_expr20(Lexer& lex, CodeBlob& code, bool nv) { - Expr* res = parse_expr30(lex, code, nv); - while (lex.tok() == tok_minus || lex.tok() == tok_plus) { - res->chk_rvalue(lex); - TokenType t = lex.tok(); - sym_idx_t name = G.symbols.lookup_add(std::string{"_"} + lex.cur_str_std_string() + "_"); - check_global_func(lex, name); - SrcLocation loc{lex.cur_location()}; - lex.next(); - auto x = parse_expr30(lex, code, false); - x->chk_rvalue(lex); - res = new Expr{Expr::_Apply, name, {res, x}}; - res->here = loc; - res->set_val(t); - res->flags = Expr::_IsRvalue; - res->deduce_type(lex); - } - return res; -} - -// parse E { ( << | >> | ~>> | ^>> ) E } -Expr* parse_expr17(Lexer& lex, CodeBlob& code, bool nv) { - Expr* res = parse_expr20(lex, code, nv); - while (lex.tok() == tok_lshift || lex.tok() == tok_rshift || lex.tok() == tok_rshiftC || lex.tok() == tok_rshiftR) { - res->chk_rvalue(lex); - TokenType t = lex.tok(); - sym_idx_t name = G.symbols.lookup_add(std::string{"_"} + lex.cur_str_std_string() + "_"); - check_global_func(lex, name); - SrcLocation loc{lex.cur_location()}; - lex.next(); - auto x = parse_expr20(lex, code, false); - x->chk_rvalue(lex); - diagnose_addition_in_bitshift(loc, name, x); - res = new Expr{Expr::_Apply, name, {res, x}}; - res->here = loc; - res->set_val(t); - res->flags = Expr::_IsRvalue; - res->deduce_type(lex); - } - return res; -} - -// parse E [ (== | < | > | <= | >= | != | <=> ) E ] -Expr* parse_expr15(Lexer& lex, CodeBlob& code, bool nv) { - Expr* res = parse_expr17(lex, code, nv); - if (lex.tok() == tok_eq || lex.tok() == tok_lt || lex.tok() == tok_gt || lex.tok() == tok_leq || lex.tok() == tok_geq || - lex.tok() == tok_neq || lex.tok() == tok_spaceship) { - res->chk_rvalue(lex); - TokenType t = lex.tok(); - sym_idx_t name = G.symbols.lookup_add(std::string{"_"} + lex.cur_str_std_string() + "_"); - check_global_func(lex, name); - SrcLocation loc{lex.cur_location()}; - lex.next(); - auto x = parse_expr17(lex, code, false); - x->chk_rvalue(lex); - res = new Expr{Expr::_Apply, name, {res, x}}; - res->here = loc; - res->set_val(t); - res->flags = Expr::_IsRvalue; - res->deduce_type(lex); - } - return res; -} - -// parse E { ( & | `|` | ^ ) E } -Expr* parse_expr14(Lexer& lex, CodeBlob& code, bool nv) { - Expr* res = parse_expr15(lex, code, nv); - while (lex.tok() == tok_bitwise_and || lex.tok() == tok_bitwise_or || lex.tok() == tok_bitwise_xor) { - res->chk_rvalue(lex); - TokenType t = lex.tok(); - sym_idx_t name = G.symbols.lookup_add(std::string{"_"} + lex.cur_str_std_string() + "_"); - check_global_func(lex, name); - SrcLocation loc{lex.cur_location()}; - lex.next(); - auto x = parse_expr15(lex, code, false); - x->chk_rvalue(lex); - // diagnose tricky bitwise precedence, like "flags & 0xFF != 0" (& has lower precedence) - diagnose_bitwise_precedence(loc, name, res, x); - - res = new Expr{Expr::_Apply, name, {res, x}}; - res->here = loc; - res->set_val(t); - res->flags = Expr::_IsRvalue; - res->deduce_type(lex); - } - return res; -} - -// parse E [ ? E : E ] -Expr* parse_expr13(Lexer& lex, CodeBlob& code, bool nv) { - Expr* res = parse_expr14(lex, code, nv); - if (lex.tok() == tok_question) { - res->chk_rvalue(lex); - SrcLocation loc{lex.cur_location()}; - lex.next(); - auto x = parse_expr(lex, code, false); - x->chk_rvalue(lex); - lex.expect(tok_colon, "':'"); - auto y = parse_expr13(lex, code, false); - y->chk_rvalue(lex); - res = new Expr{Expr::_CondExpr, {res, x, y}}; - res->here = loc; - res->flags = Expr::_IsRvalue; - res->deduce_type(lex); - } - return res; -} - -// parse LE1 (= | += | -= | ... ) E2 -Expr* parse_expr10(Lexer& lex, CodeBlob& code, bool nv) { - auto x = parse_expr13(lex, code, nv); - TokenType t = lex.tok(); - if (t == tok_set_plus || t == tok_set_minus || t == tok_set_mul || t == tok_set_div || t == tok_set_divR || t == tok_set_divC || - t == tok_set_mod || t == tok_set_modC || t == tok_set_modR || t == tok_set_lshift || t == tok_set_rshift || t == tok_set_rshiftC || - t == tok_set_rshiftR || t == tok_set_bitwise_and || t == tok_set_bitwise_or || t == tok_set_bitwise_xor) { - x->chk_lvalue(lex); - x->chk_rvalue(lex); - sym_idx_t name = G.symbols.lookup_add(std::string{"^_"} + lex.cur_str_std_string() + "_"); - check_global_func(lex, name); - SrcLocation loc{lex.cur_location()}; - lex.next(); - auto y = parse_expr10(lex, code, false); - y->chk_rvalue(lex); - Expr* z = new Expr{Expr::_Apply, name, {x, y}}; - z->here = loc; - z->set_val(t); - z->flags = Expr::_IsRvalue; - z->deduce_type(lex); - Expr* res = new Expr{Expr::_Letop, {x->copy(), z}}; - res->here = loc; - res->flags = (x->flags & ~Expr::_IsType) | Expr::_IsRvalue; - res->set_val(t); - res->deduce_type(lex); - return res; - } else if (t == tok_assign) { - x->chk_lvalue(lex); - SrcLocation loc{lex.cur_location()}; - lex.next(); - auto y = parse_expr10(lex, code, false); - y->chk_rvalue(lex); - x->predefine_vars(); - x->define_new_vars(code); - Expr* res = new Expr{Expr::_Letop, {x, y}}; - res->here = loc; - res->flags = (x->flags & ~Expr::_IsType) | Expr::_IsRvalue; - res->set_val(t); - res->deduce_type(lex); - return res; - } else { - return x; - } -} - -Expr* parse_expr(Lexer& lex, CodeBlob& code, bool nv) { - return parse_expr10(lex, code, nv); -} - -namespace blk_fl { -enum { end = 1, ret = 2, empty = 4 }; -typedef int val; -constexpr val init = end | empty; -void combine(val& x, const val y) { - x |= y & ret; - x &= y | ~(end | empty); -} -void combine_parallel(val& x, const val y) { - x &= y | ~(ret | empty); - x |= y & end; -} -} // namespace blk_fl - -blk_fl::val parse_return_stmt(Lexer& lex, CodeBlob& code) { - auto expr = parse_expr(lex, code); - expr->chk_rvalue(lex); - try { - // std::cerr << "in return: "; - unify(expr->e_type, code.ret_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "previous function return type " << code.ret_type - << " cannot be unified with return statement expression type " << expr->e_type << ": " << ue; - lex.error(os.str()); - } - std::vector tmp_vars = expr->pre_compile(code); - code.emplace_back(lex.cur_location(), Op::_Return, std::move(tmp_vars)); - lex.expect(tok_semicolon, "';'"); - return blk_fl::ret; -} - -blk_fl::val parse_implicit_ret_stmt(Lexer& lex, CodeBlob& code) { - auto ret_type = TypeExpr::new_unit(); - try { - // std::cerr << "in implicit return: "; - unify(ret_type, code.ret_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "previous function return type " << code.ret_type - << " cannot be unified with implicit end-of-block return type " << ret_type << ": " << ue; - lex.error(os.str()); - } - code.emplace_back(lex.cur_location(), Op::_Return); - return blk_fl::ret; -} - -blk_fl::val parse_stmt(Lexer& lex, CodeBlob& code); - -blk_fl::val parse_block_stmt(Lexer& lex, CodeBlob& code, bool no_new_scope = false) { - lex.expect(tok_opbrace, "'{'"); - if (!no_new_scope) { - open_scope(lex.cur_location()); - } - blk_fl::val res = blk_fl::init; - bool warned = false; - while (lex.tok() != tok_clbrace) { - if (!(res & blk_fl::end) && !warned) { - lex.cur_location().show_warning("unreachable code"); - warned = true; - } - blk_fl::combine(res, parse_stmt(lex, code)); - } - if (!no_new_scope) { - close_scope(lex.cur_location()); - } - lex.expect(tok_clbrace, "'}'"); - return res; -} - -blk_fl::val parse_repeat_stmt(Lexer& lex, CodeBlob& code) { - SrcLocation loc{lex.cur_location()}; - lex.expect(tok_repeat, "'repeat'"); - auto expr = parse_expr(lex, code); - expr->chk_rvalue(lex); - auto cnt_type = TypeExpr::new_atomic(TypeExpr::_Int); - try { - unify(expr->e_type, cnt_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "repeat count value of type " << expr->e_type << " is not an integer: " << ue; - lex.error(os.str()); - } - std::vector tmp_vars = expr->pre_compile(code); - if (tmp_vars.size() != 1) { - lex.error("repeat count value is not a singleton"); - } - Op& repeat_op = code.emplace_back(loc, Op::_Repeat, tmp_vars); - code.push_set_cur(repeat_op.block0); - blk_fl::val res = parse_block_stmt(lex, code); - code.close_pop_cur(lex.cur_location()); - return res | blk_fl::end; -} - -blk_fl::val parse_while_stmt(Lexer& lex, CodeBlob& code) { - SrcLocation loc{lex.cur_location()}; - lex.expect(tok_while, "'while'"); - auto expr = parse_expr(lex, code); - expr->chk_rvalue(lex); - auto cnt_type = TypeExpr::new_atomic(TypeExpr::_Int); - try { - unify(expr->e_type, cnt_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "while condition value of type " << expr->e_type << " is not an integer: " << ue; - lex.error(os.str()); - } - Op& while_op = code.emplace_back(loc, Op::_While); - code.push_set_cur(while_op.block0); - while_op.left = expr->pre_compile(code); - code.close_pop_cur(lex.cur_location()); - if (while_op.left.size() != 1) { - lex.error("while condition value is not a singleton"); - } - code.push_set_cur(while_op.block1); - blk_fl::val res1 = parse_block_stmt(lex, code); - code.close_pop_cur(lex.cur_location()); - return res1 | blk_fl::end; -} - -blk_fl::val parse_do_stmt(Lexer& lex, CodeBlob& code) { - Op& while_op = code.emplace_back(lex.cur_location(), Op::_Until); - lex.expect(tok_do, "'do'"); - code.push_set_cur(while_op.block0); - open_scope(lex.cur_location()); - blk_fl::val res = parse_block_stmt(lex, code, true); - lex.expect(tok_until, "'until'"); - auto expr = parse_expr(lex, code); - expr->chk_rvalue(lex); - close_scope(lex.cur_location()); - auto cnt_type = TypeExpr::new_atomic(TypeExpr::_Int); - try { - unify(expr->e_type, cnt_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "`until` condition value of type " << expr->e_type << " is not an integer: " << ue; - lex.error(os.str()); - } - while_op.left = expr->pre_compile(code); - code.close_pop_cur(lex.cur_location()); - if (while_op.left.size() != 1) { - lex.error("`until` condition value is not a singleton"); - } - return res & ~blk_fl::empty; -} - -blk_fl::val parse_try_catch_stmt(Lexer& lex, CodeBlob& code) { - code.require_callxargs = true; - lex.expect(tok_try, "'try'"); - Op& try_catch_op = code.emplace_back(lex.cur_location(), Op::_TryCatch); - code.push_set_cur(try_catch_op.block0); - blk_fl::val res0 = parse_block_stmt(lex, code); - code.close_pop_cur(lex.cur_location()); - lex.expect(tok_catch, "'catch'"); - code.push_set_cur(try_catch_op.block1); - open_scope(lex.cur_location()); - Expr* expr = parse_expr(lex, code, true); - expr->chk_lvalue(lex); - TypeExpr* tvm_error_type = TypeExpr::new_tensor(TypeExpr::new_var(), TypeExpr::new_atomic(TypeExpr::_Int)); - try { - unify(expr->e_type, tvm_error_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "`catch` arguments have incorrect type " << expr->e_type << ": " << ue; - lex.error(os.str()); - } - expr->predefine_vars(); - expr->define_new_vars(code); - try_catch_op.left = expr->pre_compile(code); - tolk_assert(try_catch_op.left.size() == 2 || try_catch_op.left.size() == 1); - blk_fl::val res1 = parse_block_stmt(lex, code); - close_scope(lex.cur_location()); - code.close_pop_cur(lex.cur_location()); - blk_fl::combine_parallel(res0, res1); - return res0; -} - -blk_fl::val parse_if_stmt(Lexer& lex, CodeBlob& code, TokenType first_lex = tok_if) { - SrcLocation loc{lex.cur_location()}; - lex.next(); - auto expr = parse_expr(lex, code); - expr->chk_rvalue(lex); - auto flag_type = TypeExpr::new_atomic(TypeExpr::_Int); - try { - unify(expr->e_type, flag_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "`if` condition value of type " << expr->e_type << " is not an integer: " << ue; - lex.error(os.str()); - } - std::vector tmp_vars = expr->pre_compile(code); - if (tmp_vars.size() != 1) { - lex.error("condition value is not a singleton"); - } - Op& if_op = code.emplace_back(loc, Op::_If, tmp_vars); - code.push_set_cur(if_op.block0); - blk_fl::val res1 = parse_block_stmt(lex, code); - blk_fl::val res2 = blk_fl::init; - code.close_pop_cur(lex.cur_location()); - if (lex.tok() == tok_else) { - lex.expect(tok_else, "'else'"); - code.push_set_cur(if_op.block1); - res2 = parse_block_stmt(lex, code); - code.close_pop_cur(lex.cur_location()); - } else if (lex.tok() == tok_elseif || lex.tok() == tok_elseifnot) { - code.push_set_cur(if_op.block1); - res2 = parse_if_stmt(lex, code, lex.tok()); - code.close_pop_cur(lex.cur_location()); - } else { - if_op.block1 = std::make_unique(lex.cur_location(), Op::_Nop); - } - if (first_lex == tok_ifnot || first_lex == tok_elseifnot) { - std::swap(if_op.block0, if_op.block1); - } - blk_fl::combine_parallel(res1, res2); - return res1; -} - -blk_fl::val parse_stmt(Lexer& lex, CodeBlob& code) { - switch (lex.tok()) { - case tok_return: { - lex.next(); - return parse_return_stmt(lex, code); - } - case tok_opbrace: { - return parse_block_stmt(lex, code); - } - case tok_semicolon: { - lex.next(); - return blk_fl::init; - } - case tok_repeat: - return parse_repeat_stmt(lex, code); - case tok_if: - case tok_ifnot: - return parse_if_stmt(lex, code, lex.tok()); - case tok_do: - return parse_do_stmt(lex, code); - case tok_while: - return parse_while_stmt(lex, code); - case tok_try: - return parse_try_catch_stmt(lex, code); - default: { - auto expr = parse_expr(lex, code); - expr->chk_rvalue(lex); - expr->pre_compile(code); - lex.expect(tok_semicolon, "';'"); - return blk_fl::end; - } - } -} - -CodeBlob* parse_func_body(Lexer& lex, FormalArgList arg_list, TypeExpr* ret_type, bool marked_as_pure) { - lex.expect(tok_opbrace, "'{'"); - CodeBlob* blob = new CodeBlob{ret_type}; - if (marked_as_pure) { - blob->flags |= CodeBlob::_ForbidImpure; - } - blob->import_params(std::move(arg_list)); - blk_fl::val res = blk_fl::init; - bool warned = false; - while (lex.tok() != tok_clbrace) { - if (!(res & blk_fl::end) && !warned) { - lex.cur_location().show_warning("unreachable code"); - warned = true; - } - blk_fl::combine(res, parse_stmt(lex, *blob)); - } - if (res & blk_fl::end) { - parse_implicit_ret_stmt(lex, *blob); - } - blob->close_blk(lex.cur_location()); - lex.expect(tok_clbrace, "'}'"); - return blob; -} - -SymValAsmFunc* parse_asm_func_body(Lexer& lex, TypeExpr* func_type, const FormalArgList& arg_list, TypeExpr* ret_type, - bool marked_as_pure) { - SrcLocation loc = lex.cur_location(); - lex.expect(tok_asm, "'asm'"); - int cnt = (int)arg_list.size(); - int width = ret_type->get_width(); - if (width < 0 || width > 16) { - throw ParseError{loc, "return type of an assembler built-in function must have a well-defined fixed width"}; - } - if (arg_list.size() > 16) { - throw ParseError{loc, "assembler built-in function must have at most 16 arguments"}; - } - std::vector cum_arg_width; - cum_arg_width.push_back(0); - int tot_width = 0; - for (auto& arg : arg_list) { - int arg_width = std::get(arg)->get_width(); - if (arg_width < 0 || arg_width > 16) { - throw ParseError{std::get(arg), - "parameters of an assembler built-in function must have a well-defined fixed width"}; - } - cum_arg_width.push_back(tot_width += arg_width); - } - std::vector asm_ops; - std::vector arg_order, ret_order; - if (lex.tok() == tok_oppar) { - lex.next(); - if (lex.tok() != tok_mapsto) { - std::vector visited(cnt, false); - for (int i = 0; i < cnt; i++) { - lex.check(tok_identifier, "identifier"); - auto sym = lookup_symbol(lex.cur_sym_idx()); - int j; - for (j = 0; j < cnt; j++) { - if (std::get(arg_list[j]) == sym) { - break; - } - } - if (j == cnt) { - lex.error("formal argument name expected"); - } - if (visited[j]) { - lex.error("formal argument listed twice"); - } - visited[j] = true; - int c1 = cum_arg_width[j], c2 = cum_arg_width[j + 1]; - while (c1 < c2) { - arg_order.push_back(c1++); - } - lex.next(); - } - tolk_assert(arg_order.size() == (unsigned)tot_width); - } - if (lex.tok() == tok_mapsto) { - lex.next(); - std::vector visited(width, false); - for (int i = 0; i < width; i++) { - if (lex.tok() != tok_int_const || lex.cur_str().size() > 3) { - lex.expect(tok_int_const, "number"); - } - int j = atoi(lex.cur_str_std_string().c_str()); - if (j < 0 || j >= width || visited[j]) { - lex.error("expected integer return value index 0 .. width-1"); - } - visited[j] = true; - ret_order.push_back(j); - lex.next(); - } - } - lex.expect(tok_clpar, "')'"); - } - while (lex.tok() == tok_string_const) { - std::string ops = lex.cur_str_std_string(); // \n\n... - std::string op; - for (const char& c : ops) { - if (c == '\n' || c == '\r') { - if (!op.empty()) { - asm_ops.push_back(AsmOp::Parse(op, cnt, width)); - if (asm_ops.back().is_custom()) { - cnt = width; - } - op.clear(); - } - } else { - op.push_back(c); - } - } - if (!op.empty()) { - asm_ops.push_back(AsmOp::Parse(op, cnt, width)); - if (asm_ops.back().is_custom()) { - cnt = width; - } - } - lex.next(); - } - if (asm_ops.empty()) { - lex.error("string with assembler instruction expected"); - } - lex.expect(tok_semicolon, "';'"); - std::string crc_s; - for (const AsmOp& asm_op : asm_ops) { - crc_s += asm_op.op; - } - crc_s.push_back(!marked_as_pure); - for (const int& x : arg_order) { - crc_s += std::string((const char*) (&x), (const char*) (&x + 1)); - } - for (const int& x : ret_order) { - crc_s += std::string((const char*) (&x), (const char*) (&x + 1)); - } - auto res = new SymValAsmFunc{func_type, std::move(asm_ops), marked_as_pure}; - res->arg_order = std::move(arg_order); - res->ret_order = std::move(ret_order); - res->crc = td::crc64(crc_s); - return res; -} - -std::vector parse_type_var_list(Lexer& lex) { - std::vector res; - lex.expect(tok_forall, "'forall'"); - int idx = 0; - while (true) { - if (lex.tok() == tok_type) { - lex.next(); - } - if (lex.tok() != tok_identifier) { - lex.error("free type identifier expected"); - } - SrcLocation loc = lex.cur_location(); - if (G.prohibited_var_names.count(G.symbols.get_name(lex.cur_sym_idx()))) { - throw ParseError{loc, PSTRING() << "symbol `" << G.symbols.get_name(lex.cur_sym_idx()) - << "` cannot be redefined as a variable"}; - } - SymDef* new_sym_def = define_symbol(lex.cur_sym_idx(), true, loc); - if (!new_sym_def || new_sym_def->value) { - lex.error_at("redefined type variable `", "`"); - } - auto var = TypeExpr::new_var(idx); - new_sym_def->value = new SymValType{SymValKind::_Typename, idx++, var}; - res.push_back(var); - lex.next(); - if (lex.tok() != tok_comma) { - break; - } - lex.next(); - } - lex.expect(tok_mapsto, "'->'"); - return res; -} - -void type_var_usage(TypeExpr* expr, const std::vector& typevars, std::vector& used) { - if (expr->constr != TypeExpr::te_Var) { - for (auto arg : expr->args) { - type_var_usage(arg, typevars, used); - } - return; - } - for (std::size_t i = 0; i < typevars.size(); i++) { - if (typevars[i] == expr) { - used.at(i) = true; - return; - } - } - return; -} - -TypeExpr* compute_type_closure(TypeExpr* expr, const std::vector& typevars) { - if (typevars.empty()) { - return expr; - } - std::vector used(typevars.size(), false); - type_var_usage(expr, typevars, used); - std::vector used_vars; - for (std::size_t i = 0; i < typevars.size(); i++) { - if (used.at(i)) { - used_vars.push_back(typevars[i]); - } - } - if (!used_vars.empty()) { - expr = TypeExpr::new_forall(std::move(used_vars), expr); - } - return expr; -} - -// if a function looks like `T f(...args) { return anotherF(...args); }`, -// set a bit to flags -// then, all calls to `f(...)` will be effectively replaced with `anotherF(...)` -void detect_if_function_just_wraps_another(SymValCodeFunc* v_current, const td::RefInt256 &method_id) { - const std::string& function_name = v_current->code->name; - - // in "AST" representation, the first is Op::_Import (input arguments, even if none) - const auto& op_import = v_current->code->ops; - tolk_assert(op_import && op_import->cl == Op::_Import); - - // then Op::_Call (anotherF) - const Op* op_call = op_import->next.get(); - if (!op_call || op_call->cl != Op::_Call) - return; - tolk_assert(op_call->left.size() == 1); - - const auto& op_return = op_call->next; - if (!op_return || op_return->cl != Op::_Return || op_return->left.size() != 1) - return; - - bool indices_expected = static_cast(op_import->left.size()) == op_call->left[0] && op_call->left[0] == op_return->left[0]; - if (!indices_expected) - return; - - const SymDef* f_called = op_call->fun_ref; - const SymValFunc* v_called = dynamic_cast(f_called->value); - if (!v_called) - return; - - // `return` must use all arguments, e.g. `return (_0,_2,_1)`, not `return (_0,_1,_1)` - int args_used_mask = 0; - for (var_idx_t arg_idx : op_call->right) { - args_used_mask |= 1 << arg_idx; - } - if (args_used_mask != (1 << op_call->right.size()) - 1) - return; - - // detect getters (having method_id), they should not be treated as wrappers - // v_current->method_id will be assigned later; todo refactor function parsing completely, it's weird - // moreover, `recv_external()` and others are also exported, but FunC is unaware of method_id - // (it's assigned by Fift later) - // so, for now, just handle "special" function names, the same as in Asm.fif - if (!method_id.is_null()) - return; - if (function_name == "main" || function_name == "recv_internal" || function_name == "recv_external" || - function_name == "run_ticktock" || function_name == "split_prepare" || function_name == "split_install") - return; - - // all types must be strictly defined (on mismatch, a compilation error will be triggered anyway) - if (v_called->sym_type->has_unknown_inside() || v_current->sym_type->has_unknown_inside()) - return; - // avoid situations like `f(int a, (int,int) b)`, inlining will be cumbersome - if (v_current->get_arg_type()->get_width() != static_cast(op_call->right.size())) - return; - // 'return true;' (false, nil) are (surprisingly) also function calls, with auto_apply=true - if (v_called->auto_apply) - return; - // if an original is marked `pure`, and this one doesn't, it's okay; just check for inline_ref storage - if (v_current->is_inline_ref()) - return; - - // ok, f_current is a wrapper - v_current->flags |= SymValFunc::flagWrapsAnotherF; - if (G.is_verbosity(2)) { - std::cerr << function_name << " -> " << f_called->name() << std::endl; - } -} - -static td::RefInt256 calculate_method_id_by_func_name(std::string_view func_name) { - unsigned int crc = td::crc16(static_cast(func_name)); - return td::make_refint((crc & 0xffff) | 0x10000); -} - -// todo rewrite function declaration parsing completely, it's weird -void parse_func_def(Lexer& lex) { - SrcLocation loc = lex.cur_location(); - open_scope(loc); - std::vector type_vars; - bool is_get_method = false; - if (lex.tok() == tok_forall) { - type_vars = parse_type_var_list(lex); - } else if (lex.tok() == tok_get) { - is_get_method = true; - lex.next(); - } - auto ret_type = parse_type(lex); - if (lex.tok() != tok_identifier) { - lex.error("function name identifier expected"); - } - std::string func_name = lex.cur_str_std_string(); - int func_sym_idx = lex.cur_sym_idx(); - lex.next(); - FormalArgList arg_list = parse_formal_args(lex); - bool marked_as_pure = false; - if (lex.tok() == tok_impure) { - static bool warning_shown = false; - if (!warning_shown) { - lex.cur_location().show_warning("`impure` specifier is deprecated. All functions are impure by default, use `pure` to mark a function as pure"); - warning_shown = true; - } - lex.next(); - } else if (lex.tok() == tok_pure) { - marked_as_pure = true; - lex.next(); - } - int flags_inline = 0; - if (lex.tok() == tok_inline) { - flags_inline = SymValFunc::flagInline; - lex.next(); - } else if (lex.tok() == tok_inlineref) { - flags_inline = SymValFunc::flagInlineRef; - lex.next(); - } - td::RefInt256 method_id; - if (lex.tok() == tok_method_id) { - if (is_get_method) { - lex.error("both `get` and `method_id` are not allowed"); - } - lex.next(); - if (lex.tok() == tok_oppar) { // method_id(N) - lex.next(); - method_id = td::string_to_int256(lex.cur_str_std_string()); - lex.expect(tok_int_const, "number"); - if (method_id.is_null()) { - lex.error_at("invalid integer constant `", "`"); - } - lex.expect(tok_clpar, "')'"); - } else { - static bool warning_shown = false; - if (!warning_shown) { - lex.cur_location().show_warning("`method_id` specifier is deprecated, use `get` keyword.\nExample: `get int seqno() { ... }`"); - warning_shown = true; - } - method_id = calculate_method_id_by_func_name(func_name); - } - } - if (is_get_method) { - tolk_assert(method_id.is_null()); - method_id = calculate_method_id_by_func_name(func_name); - for (const SymDef* other : G.glob_get_methods) { - if (!td::cmp(dynamic_cast(other->value)->method_id, method_id)) { - lex.error(PSTRING() << "GET methods hash collision: `" << other->name() << "` and `" + func_name + "` produce the same hash. Consider renaming one of these functions."); - } - } - } - TypeExpr* func_type = TypeExpr::new_map(extract_total_arg_type(arg_list), ret_type); - func_type = compute_type_closure(func_type, type_vars); - if (lex.tok() == tok_builtin) { - const SymDef* builtin_func = lookup_symbol(G.symbols.lookup(func_name)); - const SymValFunc* func_val = builtin_func ? dynamic_cast(builtin_func->value) : nullptr; - if (!func_val || !func_val->is_builtin()) { - lex.error("`builtin` used for non-builtin function"); - } -#ifdef TOLK_DEBUG - // in release, we don't need this check, since `builtin` is used only in stdlib.tolk, which is our responsibility - if (!func_val->sym_type->equals_to(func_type) || func_val->is_marked_as_pure() != marked_as_pure) { - lex.error("declaration for `builtin` function doesn't match an actual one"); - } -#endif - lex.next(); - lex.expect(tok_semicolon, "';'"); - close_scope(lex.cur_location()); - return; - } - if (lex.tok() != tok_semicolon && lex.tok() != tok_opbrace && lex.tok() != tok_asm) { - lex.expect(tok_opbrace, "function body block"); - } - if (G.is_verbosity(1)) { - std::cerr << "function " << func_name << " : " << func_type << std::endl; - } - SymDef* func_sym = define_global_symbol(func_sym_idx, 0, loc); - tolk_assert(func_sym); - SymValFunc* func_sym_val = dynamic_cast(func_sym->value); - if (func_sym->value) { - if (func_sym->value->kind != SymValKind::_Func || !func_sym_val) { - lex.error("was not defined as a function before"); - } - try { - unify(func_sym_val->sym_type, func_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "previous type of function " << func_name << " : " << func_sym_val->sym_type - << " cannot be unified with new type " << func_type << ": " << ue; - lex.error(os.str()); - } - } - if (lex.tok() == tok_semicolon) { - make_new_glob_func(func_sym, func_type, marked_as_pure); - lex.next(); - } else if (lex.tok() == tok_opbrace) { - if (dynamic_cast(func_sym_val)) { - lex.error("function `" + func_name + "` has been already defined as an assembler built-in"); - } - SymValCodeFunc* func_sym_code; - if (func_sym_val) { - func_sym_code = dynamic_cast(func_sym_val); - if (!func_sym_code) { - lex.error("function `" + func_name + "` has been already defined in an yet-unknown way"); - } - } else { - func_sym_code = make_new_glob_func(func_sym, func_type, marked_as_pure); - } - if (func_sym_code->code) { - lex.error("redefinition of function `"s + func_name + "`"); - } - if (marked_as_pure && ret_type->get_width() == 0) { - lex.error("a pure function should return something, otherwise it will be optimized out anyway"); - } - CodeBlob* code = parse_func_body(lex, arg_list, ret_type, marked_as_pure); - code->name = func_name; - code->loc = loc; - // code->print(std::cerr); // !!!DEBUG!!! - func_sym_code->code = code; - detect_if_function_just_wraps_another(func_sym_code, method_id); - } else { - SrcLocation asm_location = lex.cur_location(); - SymValAsmFunc* asm_func = parse_asm_func_body(lex, func_type, arg_list, ret_type, marked_as_pure); -#ifdef TOLK_DEBUG - asm_func->name = func_name; -#endif - if (func_sym_val) { - if (dynamic_cast(func_sym_val)) { - throw ParseError(asm_location, "function `" + func_name + "` was already declared as an ordinary function"); - } - SymValAsmFunc* asm_func_old = dynamic_cast(func_sym_val); - if (asm_func_old) { - if (asm_func->crc != asm_func_old->crc) { - throw ParseError(asm_location, "redefinition of built-in assembler function `" + func_name + "`"); - } - } else { - throw ParseError(asm_location, "redefinition of previously (somehow) defined function `" + func_name + "`"); - } - } - func_sym->value = asm_func; - } - if (method_id.not_null()) { - auto val = dynamic_cast(func_sym->value); - if (!val) { - lex.error("cannot set method id for unknown function `" + func_name + "`"); - } - if (val->method_id.is_null()) { - val->method_id = std::move(method_id); - } else if (td::cmp(val->method_id, method_id) != 0) { - lex.error("integer method identifier for `" + func_name + "` changed from " + - val->method_id->to_dec_string() + " to a different value " + method_id->to_dec_string()); - } - } - if (flags_inline) { - auto val = dynamic_cast(func_sym->value); - if (!val) { - lex.error("cannot set unknown function `" + func_name + "` as an inline"); - } - if (!val->is_inline() && !val->is_inline_ref()) { - val->flags |= flags_inline; - } else if ((val->flags & (SymValFunc::flagInline | SymValFunc::flagInlineRef)) != flags_inline) { - lex.error("inline mode for `" + func_name + "` changed with respect to a previous declaration"); - } - } - if (is_get_method) { - auto val = dynamic_cast(func_sym->value); - if (!val) { - lex.error("cannot set unknown function `" + func_name + "` as a get method"); - } - val->flags |= SymValFunc::flagGetMethod; - G.glob_get_methods.push_back(func_sym); - } - if (G.is_verbosity(1)) { - std::cerr << "new type of function " << func_name << " : " << func_type << std::endl; - } - close_scope(lex.cur_location()); -} - -void parse_pragma(Lexer& lex) { - SrcLocation loc = lex.cur_location(); - lex.next_special(tok_pragma_name, "pragma name"); - std::string_view pragma_name = lex.cur_str(); - if (pragma_name == "version") { - lex.next(); - TokenType cmp_tok = lex.tok(); - char op = '='; bool eq = false; - if (cmp_tok == tok_gt || cmp_tok == tok_geq) { - op = '>'; - eq = cmp_tok == tok_geq; - } else if (cmp_tok == tok_lt || cmp_tok == tok_leq) { - op = '<'; - eq = cmp_tok == tok_leq; - } else if (cmp_tok == tok_eq) { - op = '='; - } else if (cmp_tok == tok_bitwise_xor) { - op = '^'; - } else { - lex.error("invalid comparison operator"); - } - lex.next_special(tok_semver, "semver"); - std::string_view pragma_value = lex.cur_str(); - int sem_ver[3] = {0, 0, 0}; - char segs = 1; - auto stoi = [&](std::string_view s) { - auto R = td::to_integer_safe(static_cast(s)); - if (R.is_error()) { - lex.error("invalid semver format"); - } - return R.move_as_ok(); - }; - std::istringstream iss_value(static_cast(pragma_value)); - for (int idx = 0; idx < 3; idx++) { - std::string s{"0"}; - std::getline(iss_value, s, '.'); - sem_ver[idx] = stoi(s); - } - // End reading semver from source code - int tolk_ver[3] = {0, 0, 0}; - std::istringstream iss(tolk_version); - for (int idx = 0; idx < 3; idx++) { - std::string s; - std::getline(iss, s, '.'); - tolk_ver[idx] = stoi(s); - } - // End parsing embedded semver - bool match = true; - switch (op) { - case '=': - if ((tolk_ver[0] != sem_ver[0]) || - (tolk_ver[1] != sem_ver[1]) || - (tolk_ver[2] != sem_ver[2])) { - match = false; - } - break; - case '>': - if ( ((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] == sem_ver[1]) && (tolk_ver[2] == sem_ver[2]) && !eq) || - ((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] == sem_ver[1]) && (tolk_ver[2] < sem_ver[2])) || - ((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] < sem_ver[1])) || - ((tolk_ver[0] < sem_ver[0])) ) { - match = false; - } - break; - case '<': - if ( ((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] == sem_ver[1]) && (tolk_ver[2] == sem_ver[2]) && !eq) || - ((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] == sem_ver[1]) && (tolk_ver[2] > sem_ver[2])) || - ((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] > sem_ver[1])) || - ((tolk_ver[0] > sem_ver[0])) ) { - match = false; - } - break; - case '^': - if ( ((segs == 3) && ((tolk_ver[0] != sem_ver[0]) || (tolk_ver[1] != sem_ver[1]) || (tolk_ver[2] < sem_ver[2]))) - || ((segs == 2) && ((tolk_ver[0] != sem_ver[0]) || (tolk_ver[1] < sem_ver[1]))) - || ((segs == 1) && ((tolk_ver[0] < sem_ver[0]))) ) { - match = false; - } - break; - default: - __builtin_unreachable(); - } - if (!match) { - throw ParseError(loc, std::string("Tolk version ") + tolk_version + " does not satisfy this condition"); - } - } else if (pragma_name == G.pragma_allow_post_modification.name()) { - G.pragma_allow_post_modification.enable(loc); - } else if (pragma_name == G.pragma_compute_asm_ltr.name()) { - G.pragma_compute_asm_ltr.enable(loc); - } else if (pragma_name == G.pragma_remove_unused_functions.name()) { - G.pragma_remove_unused_functions.enable(loc); - } else { - lex.error("unknown pragma name"); - } - lex.next(); - lex.expect(tok_semicolon, "';'"); -} - -void parse_include(Lexer& lex, SrcFile* parent_file) { - SrcLocation loc = lex.cur_location(); - lex.expect(tok_include, "#include"); - if (lex.tok() != tok_string_const) { - lex.expect(tok_string_const, "source file name"); - } - std::string rel_filename = lex.cur_str_std_string(); - if (rel_filename.empty()) { - lex.error("imported file name is an empty string"); - } - if (size_t rc = parent_file->rel_filename.rfind('/'); rc != std::string::npos) { - rel_filename = parent_file->rel_filename.substr(0, rc + 1) + rel_filename; - } - lex.next(); - lex.expect(tok_semicolon, "';'"); - - td::Result locate_res = locate_source_file(rel_filename); - if (locate_res.is_error()) { - throw ParseError(loc, "Failed to import: " + locate_res.move_as_error().message().str()); - } - - SrcFile* imported_file = locate_res.move_as_ok(); - parent_file->imports.emplace_back(SrcFile::ImportStatement{imported_file}); - if (!imported_file->was_parsed) { - parse_source_file(imported_file); - } -} - -// this function either throws (on any error) or returns nothing meaning success (filling global variables) -void parse_source_file(SrcFile* file) { - if (!file->is_stdlib_file()) { - G.generated_from += file->rel_filename; - G.generated_from += ", "; - } - file->was_parsed = true; - - Lexer lex(file); - while (!lex.is_eof()) { - if (lex.tok() == tok_pragma) { - parse_pragma(lex); - } else if (lex.tok() == tok_include) { - parse_include(lex, file); - } else if (lex.tok() == tok_global) { - parse_global_var_decls(lex); - } else if (lex.tok() == tok_const) { - parse_const_decls(lex); - } else { - parse_func_def(lex); - } - } -} - -td::Result locate_source_file(const std::string& rel_filename) { - td::Result path = G.settings.read_callback(CompilerSettings::FsReadCallbackKind::Realpath, rel_filename.c_str()); - if (path.is_error()) { - return path.move_as_error(); - } - - std::string abs_filename = path.move_as_ok(); - if (SrcFile* file = G.all_src_files.find_file(abs_filename)) { - return file; // file was already parsed (imported from somewhere else) - } - - td::Result text = G.settings.read_callback(CompilerSettings::FsReadCallbackKind::ReadFile, abs_filename.c_str()); - if (text.is_error()) { - return text.move_as_error(); - } - - return G.all_src_files.register_file(rel_filename, abs_filename, text.move_as_ok()); -} - -} // namespace tolk diff --git a/tolk/symtable.cpp b/tolk/symtable.cpp index 552abd11b..9463dbbde 100644 --- a/tolk/symtable.cpp +++ b/tolk/symtable.cpp @@ -22,12 +22,6 @@ namespace tolk { -Symbol::Symbol(std::string str, sym_idx_t idx) : str(std::move(str)), idx(idx) { - subclass = this->str[0] == '.' ? SymbolSubclass::dot_identifier - : this->str[0] == '~' ? SymbolSubclass::tilde_identifier - : SymbolSubclass::undef; -} - std::string Symbol::unknown_symbol_name(sym_idx_t i) { if (!i) { return "_"; @@ -78,7 +72,7 @@ void open_scope(SrcLocation loc) { G.scope_opened_at.push_back(loc); } -void close_scope(SrcLocation loc) { +void close_scope() { if (!G.scope_level) { throw Fatal{"cannot close the outer scope"}; } diff --git a/tolk/symtable.h b/tolk/symtable.h index 0566122a4..243437d39 100644 --- a/tolk/symtable.h +++ b/tolk/symtable.h @@ -36,18 +36,11 @@ struct SymValBase { }; -enum class SymbolSubclass { - undef = 0, - dot_identifier = 1, // begins with . (a const method) - tilde_identifier = 2 // begins with ~ (a non-const method) -}; - struct Symbol { std::string str; sym_idx_t idx; - SymbolSubclass subclass; - Symbol(std::string str, sym_idx_t idx); + Symbol(std::string str, sym_idx_t idx) : str(std::move(str)), idx(idx) {} static std::string unknown_symbol_name(sym_idx_t i); }; @@ -64,10 +57,10 @@ class SymTable { public: static constexpr sym_idx_t not_found = 0; - sym_idx_t lookup(const std::string_view& str, int mode = 0) { + sym_idx_t lookup(std::string_view str, int mode = 0) { return gen_lookup(str, mode); } - sym_idx_t lookup_add(const std::string& str) { + sym_idx_t lookup_add(std::string_view str) { return gen_lookup(str, 1); } Symbol* operator[](sym_idx_t i) const { @@ -76,9 +69,6 @@ class SymTable { std::string get_name(sym_idx_t i) const { return sym[i] ? sym[i]->str : Symbol::unknown_symbol_name(i); } - SymbolSubclass get_subclass(sym_idx_t i) const { - return sym[i] ? sym[i]->subclass : SymbolSubclass::undef; - } }; struct SymTableOverflow { @@ -104,7 +94,7 @@ struct SymDef { void open_scope(SrcLocation loc); -void close_scope(SrcLocation loc); +void close_scope(); SymDef* lookup_symbol(sym_idx_t idx); SymDef* define_global_symbol(sym_idx_t name_idx, bool force_new = false, SrcLocation loc = {}); diff --git a/tolk/tolk.cpp b/tolk/tolk.cpp index 066fecbdf..044d62f07 100644 --- a/tolk/tolk.cpp +++ b/tolk/tolk.cpp @@ -27,7 +27,8 @@ #include "compiler-state.h" #include "lexer.h" #include -#include "git.h" +#include "ast-from-tokens.h" +#include "ast-to-legacy.h" #include #include "td/utils/port/path.h" #include @@ -269,13 +270,13 @@ int tolk_proceed(const std::string &entrypoint_file_name) { if (locate_res.is_error()) { throw Fatal("Failed to locate stdlib: " + locate_res.error().message().str()); } - parse_source_file(locate_res.move_as_ok()); + process_file_ast(parse_src_file_to_ast(locate_res.move_as_ok())); } td::Result locate_res = locate_source_file(entrypoint_file_name); if (locate_res.is_error()) { throw Fatal("Failed to locate " + entrypoint_file_name + ": " + locate_res.error().message().str()); } - parse_source_file(locate_res.move_as_ok()); + process_file_ast(parse_src_file_to_ast(locate_res.move_as_ok())); // todo #ifdef TOLK_PROFILING + comment // lexer_measure_performance(all_src_files.get_all_files()); @@ -293,6 +294,10 @@ int tolk_proceed(const std::string &entrypoint_file_name) { unif_err.print_message(std::cerr); std::cerr << std::endl; return 2; + } catch (UnexpectedASTNodeType& error) { + std::cerr << "fatal: " << error.what() << std::endl; + std::cerr << "It's a compiler bug, please report to developers" << std::endl; + return 2; } } diff --git a/tolk/tolk.h b/tolk/tolk.h index 15aeba256..b62c6a581 100644 --- a/tolk/tolk.h +++ b/tolk/tolk.h @@ -17,7 +17,7 @@ #pragma once #include "src-file.h" -#include "lexer.h" +#include "type-expr.h" #include "symtable.h" #include "crypto/common/refint.h" #include "td/utils/Status.h" @@ -38,136 +38,6 @@ namespace tolk { * */ -struct TypeExpr { - enum te_type { te_Unknown, te_Var, te_Indirect, te_Atomic, te_Tensor, te_Tuple, te_Map, te_ForAll } constr; - enum AtomicType { - _Int = tok_int, - _Cell = tok_cell, - _Slice = tok_slice, - _Builder = tok_builder, - _Cont = tok_cont, - _Tuple = tok_tuple, - _Type = tok_type - }; - int value; - int minw, maxw; - static constexpr int w_inf = 1023; - std::vector args; - bool was_forall_var = false; - TypeExpr(te_type _constr, int _val = 0) : constr(_constr), value(_val), minw(0), maxw(w_inf) { - } - TypeExpr(te_type _constr, int _val, int width) : constr(_constr), value(_val), minw(width), maxw(width) { - } - TypeExpr(te_type _constr, std::vector list) - : constr(_constr), value((int)list.size()), args(std::move(list)) { - compute_width(); - } - TypeExpr(te_type _constr, std::initializer_list list) - : constr(_constr), value((int)list.size()), args(std::move(list)) { - compute_width(); - } - TypeExpr(te_type _constr, TypeExpr* elem0) : constr(_constr), value(1), args{elem0} { - compute_width(); - } - TypeExpr(te_type _constr, TypeExpr* elem0, std::vector list) - : constr(_constr), value((int)list.size() + 1), args{elem0} { - args.insert(args.end(), list.begin(), list.end()); - compute_width(); - } - TypeExpr(te_type _constr, TypeExpr* elem0, std::initializer_list list) - : constr(_constr), value((int)list.size() + 1), args{elem0} { - args.insert(args.end(), list.begin(), list.end()); - compute_width(); - } - bool is_atomic() const { - return constr == te_Atomic; - } - bool is_atomic(int v) const { - return constr == te_Atomic && value == v; - } - bool is_int() const { - return is_atomic(_Int); - } - bool is_var() const { - return constr == te_Var; - } - bool is_map() const { - return constr == te_Map; - } - bool is_tuple() const { - return constr == te_Tuple; - } - bool has_fixed_width() const { - return minw == maxw; - } - int get_width() const { - return has_fixed_width() ? minw : -1; - } - void compute_width(); - bool recompute_width(); - void show_width(std::ostream& os); - std::ostream& print(std::ostream& os, int prio = 0) const; - void replace_with(TypeExpr* te2); - int extract_components(std::vector& comp_list); - bool equals_to(const TypeExpr* rhs) const; - bool has_unknown_inside() const; - static int holes, type_vars; - static TypeExpr* new_hole() { - return new TypeExpr{te_Unknown, ++holes}; - } - static TypeExpr* new_hole(int width) { - return new TypeExpr{te_Unknown, ++holes, width}; - } - static TypeExpr* new_unit() { - return new TypeExpr{te_Tensor, 0, 0}; - } - static TypeExpr* new_atomic(int value) { - return new TypeExpr{te_Atomic, value, 1}; - } - static TypeExpr* new_map(TypeExpr* from, TypeExpr* to); - static TypeExpr* new_func() { - return new_map(new_hole(), new_hole()); - } - static TypeExpr* new_tensor(std::vector list, bool red = true) { - return red && list.size() == 1 ? list[0] : new TypeExpr{te_Tensor, std::move(list)}; - } - static TypeExpr* new_tensor(std::initializer_list list) { - return new TypeExpr{te_Tensor, std::move(list)}; - } - static TypeExpr* new_tensor(TypeExpr* te1, TypeExpr* te2) { - return new_tensor({te1, te2}); - } - static TypeExpr* new_tensor(TypeExpr* te1, TypeExpr* te2, TypeExpr* te3) { - return new_tensor({te1, te2, te3}); - } - static TypeExpr* new_tuple(TypeExpr* arg0) { - return new TypeExpr{te_Tuple, arg0}; - } - static TypeExpr* new_tuple(std::vector list, bool red = false) { - return new_tuple(new_tensor(std::move(list), red)); - } - static TypeExpr* new_tuple(std::initializer_list list) { - return new_tuple(new_tensor(std::move(list))); - } - static TypeExpr* new_var() { - return new TypeExpr{te_Var, --type_vars, 1}; - } - static TypeExpr* new_var(int idx) { - return new TypeExpr{te_Var, idx, 1}; - } - static TypeExpr* new_forall(std::vector list, TypeExpr* body) { - return new TypeExpr{te_ForAll, body, std::move(list)}; - } - static TypeExpr* new_forall(std::initializer_list list, TypeExpr* body) { - return new TypeExpr{te_ForAll, body, std::move(list)}; - } - static bool remove_indirect(TypeExpr*& te, TypeExpr* forbidden = nullptr); - static std::vector remove_forall(TypeExpr*& te); - static bool remove_forall_in(TypeExpr*& te, TypeExpr* te2, const std::vector& new_vars); -}; - -std::ostream& operator<<(std::ostream& os, TypeExpr* type_expr); - struct UnifyError : std::exception { TypeExpr* te1; TypeExpr* te2; @@ -197,14 +67,13 @@ using const_idx_t = int; struct TmpVar { TypeExpr* v_type; var_idx_t idx; - enum { _In = 1, _Named = 2, _Tmp = 4, _UniqueName = 0x20 }; - int cls; + bool is_tmp_unnamed; sym_idx_t name; int coord; SrcLocation where; std::vector> on_modification; - TmpVar(var_idx_t _idx, int _cls, TypeExpr* _type, SymDef* sym, SrcLocation loc); + TmpVar(var_idx_t _idx, bool _is_tmp_unnamed, TypeExpr* _type, SymDef* sym, SrcLocation loc); void show(std::ostream& os, int omit_idx = 0) const; void dump(std::ostream& os) const; void set_location(SrcLocation loc); @@ -586,9 +455,9 @@ struct CodeBlob { return res; } bool import_params(FormalArgList arg_list); - var_idx_t create_var(int cls, TypeExpr* var_type, SymDef* sym, SrcLocation loc); + var_idx_t create_var(bool is_tmp_unnamed, TypeExpr* var_type, SymDef* sym, SrcLocation loc); var_idx_t create_tmp_var(TypeExpr* var_type, SrcLocation loc) { - return create_var(TmpVar::_Tmp, var_type, nullptr, loc); + return create_var(true, var_type, nullptr, loc); } int split_vars(bool strict = false); bool compute_used_code_vars(); @@ -631,7 +500,6 @@ struct CodeBlob { struct SymVal : SymValBase { TypeExpr* sym_type; - bool auto_apply{false}; SymVal(SymValKind kind, int idx, TypeExpr* sym_type = nullptr) : SymValBase(kind, idx), sym_type(sym_type) { } @@ -702,16 +570,6 @@ struct SymValCodeFunc : SymValFunc { bool does_need_codegen() const; }; -struct SymValType : SymValBase { - TypeExpr* sym_type; - SymValType(SymValKind kind, int idx, TypeExpr* _stype = nullptr) : SymValBase(kind, idx), sym_type(_stype) { - } - ~SymValType() override = default; - TypeExpr* get_type() const { - return sym_type; - } -}; - struct SymValGlobVar : SymValBase { TypeExpr* sym_type; int out_idx{0}; @@ -762,7 +620,6 @@ struct SymValConst : SymValBase { // defined in parse-tolk.cpp td::Result locate_source_file(const std::string& rel_filename); -void parse_source_file(SrcFile* file); /* @@ -792,7 +649,7 @@ struct Expr { }; ExprCls cls; int val{0}; - enum { _IsType = 1, _IsRvalue = 2, _IsLvalue = 4, _IsImpure = 32, _IsInsideParenthesis = 64 }; + enum { _IsType = 1, _IsRvalue = 2, _IsLvalue = 4, _IsImpure = 32 }; int flags{0}; SrcLocation here; td::RefInt256 intval; @@ -834,18 +691,23 @@ struct Expr { bool is_type() const { return flags & _IsType; } - bool is_inside_parenthesis() const { - return flags & _IsInsideParenthesis; - } bool is_type_apply() const { return cls == _TypeApply; } bool is_mktuple() const { return cls == _MkTuple; } - void chk_rvalue(const Lexer& lex) const; // todo here and below: strange to pass Lexer - void chk_lvalue(const Lexer& lex) const; - bool deduce_type(const Lexer& lex); + void chk_rvalue() const { + if (!is_rvalue()) { + throw ParseError(here, "rvalue expected"); + } + } + void chk_lvalue() const { + if (!is_lvalue()) { + throw ParseError(here, "lvalue expected"); + } + } + bool deduce_type(); void set_location(SrcLocation loc) { here = loc; } diff --git a/tolk/type-expr.h b/tolk/type-expr.h new file mode 100644 index 000000000..4893df35a --- /dev/null +++ b/tolk/type-expr.h @@ -0,0 +1,140 @@ +#pragma once + +#include +#include +#include "lexer.h" + +namespace tolk { + +struct TypeExpr { + enum Kind { te_Unknown, te_Var, te_Indirect, te_Atomic, te_Tensor, te_Tuple, te_Map, te_ForAll }; + // todo not _ + enum AtomicType { + _Int = tok_int, + _Cell = tok_cell, + _Slice = tok_slice, + _Builder = tok_builder, + _Cont = tok_cont, + _Tuple = tok_tuple, + }; + Kind constr; + int value; + int minw, maxw; + static constexpr int w_inf = 1023; + std::vector args; + bool was_forall_var = false; + TypeExpr(Kind _constr, int _val = 0) : constr(_constr), value(_val), minw(0), maxw(w_inf) { + } + TypeExpr(Kind _constr, int _val, int width) : constr(_constr), value(_val), minw(width), maxw(width) { + } + TypeExpr(Kind _constr, std::vector list) + : constr(_constr), value((int)list.size()), args(std::move(list)) { + compute_width(); + } + TypeExpr(Kind _constr, std::initializer_list list) + : constr(_constr), value((int)list.size()), args(std::move(list)) { + compute_width(); + } + TypeExpr(Kind _constr, TypeExpr* elem0) : constr(_constr), value(1), args{elem0} { + compute_width(); + } + TypeExpr(Kind _constr, TypeExpr* elem0, std::vector list) + : constr(_constr), value((int)list.size() + 1), args{elem0} { + args.insert(args.end(), list.begin(), list.end()); + compute_width(); + } + TypeExpr(Kind _constr, TypeExpr* elem0, std::initializer_list list) + : constr(_constr), value((int)list.size() + 1), args{elem0} { + args.insert(args.end(), list.begin(), list.end()); + compute_width(); + } + bool is_atomic() const { + return constr == te_Atomic; + } + bool is_atomic(int v) const { + return constr == te_Atomic && value == v; + } + bool is_int() const { + return is_atomic(_Int); + } + bool is_var() const { + return constr == te_Var; + } + bool is_map() const { + return constr == te_Map; + } + bool is_tuple() const { + return constr == te_Tuple; + } + bool has_fixed_width() const { + return minw == maxw; + } + int get_width() const { + return has_fixed_width() ? minw : -1; + } + void compute_width(); + bool recompute_width(); + void show_width(std::ostream& os); + std::ostream& print(std::ostream& os, int prio = 0) const; + void replace_with(TypeExpr* te2); + int extract_components(std::vector& comp_list); + bool equals_to(const TypeExpr* rhs) const; + bool has_unknown_inside() const; + static int holes, type_vars; + static TypeExpr* new_hole() { + return new TypeExpr{te_Unknown, ++holes}; + } + static TypeExpr* new_hole(int width) { + return new TypeExpr{te_Unknown, ++holes, width}; + } + static TypeExpr* new_unit() { + return new TypeExpr{te_Tensor, 0, 0}; + } + static TypeExpr* new_atomic(int value) { + return new TypeExpr{te_Atomic, value, 1}; + } + static TypeExpr* new_map(TypeExpr* from, TypeExpr* to); + static TypeExpr* new_func() { + return new_map(new_hole(), new_hole()); + } + static TypeExpr* new_tensor(std::vector list, bool red = true) { + return red && list.size() == 1 ? list[0] : new TypeExpr{te_Tensor, std::move(list)}; + } + static TypeExpr* new_tensor(std::initializer_list list) { + return new TypeExpr{te_Tensor, std::move(list)}; + } + static TypeExpr* new_tensor(TypeExpr* te1, TypeExpr* te2) { + return new_tensor({te1, te2}); + } + static TypeExpr* new_tensor(TypeExpr* te1, TypeExpr* te2, TypeExpr* te3) { + return new_tensor({te1, te2, te3}); + } + static TypeExpr* new_tuple(TypeExpr* arg0) { + return new TypeExpr{te_Tuple, arg0}; + } + static TypeExpr* new_tuple(std::vector list, bool red = false) { + return new_tuple(new_tensor(std::move(list), red)); + } + static TypeExpr* new_tuple(std::initializer_list list) { + return new_tuple(new_tensor(list)); + } + static TypeExpr* new_var() { + return new TypeExpr{te_Var, --type_vars, 1}; + } + static TypeExpr* new_var(int idx) { + return new TypeExpr{te_Var, idx, 1}; + } + static TypeExpr* new_forall(std::vector list, TypeExpr* body) { + return new TypeExpr{te_ForAll, body, std::move(list)}; + } + static TypeExpr* new_forall(std::initializer_list list, TypeExpr* body) { + return new TypeExpr{te_ForAll, body, std::move(list)}; + } + static bool remove_indirect(TypeExpr*& te, TypeExpr* forbidden = nullptr); + static std::vector remove_forall(TypeExpr*& te); + static bool remove_forall_in(TypeExpr*& te, TypeExpr* te2, const std::vector& new_vars); +}; + +std::ostream& operator<<(std::ostream& os, TypeExpr* type_expr); + +} // namespace tolk diff --git a/tolk/unify-types.cpp b/tolk/unify-types.cpp index 04de323df..cc2073ede 100644 --- a/tolk/unify-types.cpp +++ b/tolk/unify-types.cpp @@ -268,8 +268,6 @@ std::ostream& TypeExpr::print(std::ostream& os, int lex_level) const { return os << "cont"; case _Tuple: return os << "tuple"; - case _Type: - return os << "type"; default: return os << "atomic-type-" << value; } From 5a3e3595d6b414d5886a3df36dd9676108031afd Mon Sep 17 00:00:00 2001 From: tolk-vm Date: Thu, 31 Oct 2024 11:04:58 +0400 Subject: [PATCH 07/12] [Tolk] Compilation pipeline, register global symbols in advance Since I've implemented AST, now I can drop forward declarations. Instead, I traverse AST of all files and register global symbols (functions, constants, global vars) as a separate step, in advance. That's why, while converting AST to Expr/Op, all available symbols are already registered. This greatly simplifies "intermediate state" of yet unknown functions and checking them afterward. Redeclaration of local variables (inside the same scope) is now also prohibited. --- crypto/smartcont/mathlib.tolk | 2 +- tolk/CMakeLists.txt | 7 +- tolk/ast-from-tokens.cpp | 108 +-- tolk/ast-from-tokens.h | 2 +- tolk/ast-stringifier.h | 56 +- tolk/ast-to-legacy.h | 28 - tolk/ast-visitor.h | 48 -- tolk/ast.cpp | 6 +- tolk/ast.h | 66 +- tolk/builtins.cpp | 7 +- tolk/compiler-state.h | 5 +- tolk/gen-abscode.cpp | 11 +- tolk/lexer.cpp | 2 +- tolk/lexer.h | 2 +- ...t-to-legacy.cpp => pipe-ast-to-legacy.cpp} | 726 ++---------------- tolk/pipe-discover-parse-sources.cpp | 62 ++ tolk/pipe-find-unused-symbols.cpp | 90 +++ tolk/pipe-generate-fif-output.cpp | 186 +++++ tolk/pipe-handle-pragmas.cpp | 140 ++++ tolk/pipe-register-symbols.cpp | 402 ++++++++++ tolk/pipeline.h | 42 + tolk/src-file.cpp | 50 +- tolk/src-file.h | 35 +- tolk/symtable.cpp | 19 +- tolk/symtable.h | 2 +- tolk/tolk-wasm.cpp | 6 +- tolk/tolk.cpp | 259 +------ tolk/tolk.h | 29 +- 28 files changed, 1265 insertions(+), 1133 deletions(-) delete mode 100644 tolk/ast-to-legacy.h rename tolk/{ast-to-legacy.cpp => pipe-ast-to-legacy.cpp} (50%) create mode 100644 tolk/pipe-discover-parse-sources.cpp create mode 100644 tolk/pipe-find-unused-symbols.cpp create mode 100644 tolk/pipe-generate-fif-output.cpp create mode 100644 tolk/pipe-handle-pragmas.cpp create mode 100644 tolk/pipe-register-symbols.cpp create mode 100644 tolk/pipeline.h diff --git a/crypto/smartcont/mathlib.tolk b/crypto/smartcont/mathlib.tolk index bb18f9212..74fdfdd59 100644 --- a/crypto/smartcont/mathlib.tolk +++ b/crypto/smartcont/mathlib.tolk @@ -676,7 +676,7 @@ int fixed248::pow(int x, int y) inline_ref { if (sq <= 0) { return - (sq == 0); ;; underflow } - int y = expm1_f257(mulrshiftr256(ll, log2_const_f256())); + y = expm1_f257(mulrshiftr256(ll, log2_const_f256())); return (y ~>> (9 - q)) - (-1 << sq); } diff --git a/tolk/CMakeLists.txt b/tolk/CMakeLists.txt index 5306354de..ffd778a62 100644 --- a/tolk/CMakeLists.txt +++ b/tolk/CMakeLists.txt @@ -7,7 +7,12 @@ set(TOLK_SOURCE compiler-state.cpp ast.cpp ast-from-tokens.cpp - ast-to-legacy.cpp + pipe-discover-parse-sources.cpp + pipe-handle-pragmas.cpp + pipe-register-symbols.cpp + pipe-ast-to-legacy.cpp + pipe-find-unused-symbols.cpp + pipe-generate-fif-output.cpp unify-types.cpp abscode.cpp gen-abscode.cpp diff --git a/tolk/ast-from-tokens.cpp b/tolk/ast-from-tokens.cpp index 386576843..75cc0b4d2 100644 --- a/tolk/ast-from-tokens.cpp +++ b/tolk/ast-from-tokens.cpp @@ -103,9 +103,9 @@ static void diagnose_addition_in_bitshift(SrcLocation loc, std::string_view bits // TE ::= TA | TA -> TE // TA ::= int | ... | cont | var | _ | () | ( TE { , TE } ) | [ TE { , TE } ] -TypeExpr* parse_type(Lexer& lex, V forall_list); +static TypeExpr* parse_type(Lexer& lex, V forall_list); -TypeExpr* parse_type1(Lexer& lex, V forall_list) { +static TypeExpr* parse_type1(Lexer& lex, V forall_list) { switch (lex.tok()) { case tok_int: lex.next(); @@ -165,7 +165,7 @@ TypeExpr* parse_type1(Lexer& lex, V forall_list) { return c == tok_clpar ? TypeExpr::new_tensor(std::move(tlist)) : TypeExpr::new_tuple(std::move(tlist)); } -TypeExpr* parse_type(Lexer& lex, V forall_list) { +static TypeExpr* parse_type(Lexer& lex, V forall_list) { TypeExpr* res = parse_type1(lex, forall_list); if (lex.tok() == tok_mapsto) { lex.next(); @@ -175,13 +175,14 @@ TypeExpr* parse_type(Lexer& lex, V forall_list) { return res; } -AnyV parse_argument(Lexer& lex, V forall_list) { +static AnyV parse_argument(Lexer& lex, V forall_list) { TypeExpr* arg_type = nullptr; SrcLocation loc = lex.cur_location(); if (lex.tok() == tok_underscore) { lex.next(); if (lex.tok() == tok_comma || lex.tok() == tok_clpar) { - return createV(loc, "", TypeExpr::new_hole()); + auto v_empty = createV(lex.cur_location(), ""); + return createV(loc, v_empty, TypeExpr::new_hole()); } arg_type = TypeExpr::new_hole(); loc = lex.cur_location(); @@ -201,16 +202,17 @@ AnyV parse_argument(Lexer& lex, V forall_list) { loc = lex.cur_location(); lex.next(); } - return createV(loc, "", arg_type); + auto v_empty = createV(lex.cur_location(), ""); + return createV(loc, v_empty, arg_type); } lex.check(tok_identifier, "parameter name"); loc = lex.cur_location(); - std::string_view arg_name = lex.cur_str(); + auto v_ident = createV(lex.cur_location(), lex.cur_str()); lex.next(); - return createV(loc, arg_name, arg_type); + return createV(loc, v_ident, arg_type); } -AnyV parse_global_var_declaration(Lexer& lex) { +static AnyV parse_global_var_declaration(Lexer& lex) { TypeExpr* declared_type = nullptr; SrcLocation loc = lex.cur_location(); if (lex.tok() == tok_underscore) { @@ -221,14 +223,15 @@ AnyV parse_global_var_declaration(Lexer& lex) { declared_type = parse_type(lex, nullptr); } lex.check(tok_identifier, "global variable name"); - std::string_view var_name = lex.cur_str(); + auto v_ident = createV(lex.cur_location(), lex.cur_str()); lex.next(); - return createV(loc, var_name, declared_type); + return createV(loc, v_ident, declared_type); } AnyV parse_expr(Lexer& lex); -AnyV parse_constant_declaration(Lexer& lex) { +static AnyV parse_constant_declaration(Lexer& lex) { + SrcLocation loc = lex.cur_location(); TypeExpr *declared_type = nullptr; if (lex.tok() == tok_int) { declared_type = TypeExpr::new_atomic(TypeExpr::_Int); @@ -238,15 +241,14 @@ AnyV parse_constant_declaration(Lexer& lex) { lex.next(); } lex.check(tok_identifier, "constant name"); - SrcLocation loc = lex.cur_location(); - std::string_view const_name = lex.cur_str(); + auto v_ident = createV(lex.cur_location(), lex.cur_str()); lex.next(); lex.expect(tok_assign, "'='"); AnyV init_value = parse_expr(lex); - return createV(loc, const_name, declared_type, init_value); + return createV(loc, v_ident, declared_type, init_value); } -AnyV parse_argument_list(Lexer& lex, V forall_list) { +static AnyV parse_argument_list(Lexer& lex, V forall_list) { SrcLocation loc = lex.cur_location(); std::vector args; lex.expect(tok_oppar, "argument list"); @@ -261,7 +263,7 @@ AnyV parse_argument_list(Lexer& lex, V forall_list) { return createV(loc, std::move(args)); } -AnyV parse_constant_declaration_list(Lexer& lex) { +static AnyV parse_constant_declaration_list(Lexer& lex) { std::vector consts; SrcLocation loc = lex.cur_location(); lex.expect(tok_const, "'const'"); @@ -276,7 +278,7 @@ AnyV parse_constant_declaration_list(Lexer& lex) { return createV(loc, std::move(consts)); } -AnyV parse_global_var_declaration_list(Lexer& lex) { +static AnyV parse_global_var_declaration_list(Lexer& lex) { std::vector globals; SrcLocation loc = lex.cur_location(); lex.expect(tok_global, "'global'"); @@ -292,7 +294,7 @@ AnyV parse_global_var_declaration_list(Lexer& lex) { } // parse ( E { , E } ) | () | [ E { , E } ] | [] | id | num | _ -AnyV parse_expr100(Lexer& lex) { +static AnyV parse_expr100(Lexer& lex) { SrcLocation loc = lex.cur_location(); if (lex.tok() == tok_oppar) { lex.next(); @@ -402,7 +404,7 @@ AnyV parse_expr100(Lexer& lex) { } // parse E { E } -AnyV parse_expr90(Lexer& lex) { +static AnyV parse_expr90(Lexer& lex) { AnyV res = parse_expr100(lex); while (lex.tok() == tok_oppar || lex.tok() == tok_opbracket || (lex.tok() == tok_identifier && lex.cur_str()[0] != '.' && lex.cur_str()[0] != '~')) { if (const auto* v_type_expr = res->try_as()) { @@ -417,7 +419,7 @@ AnyV parse_expr90(Lexer& lex) { } // parse E { .method E | ~method E } -AnyV parse_expr80(Lexer& lex) { +static AnyV parse_expr80(Lexer& lex) { AnyV lhs = parse_expr90(lex); while (lex.tok() == tok_identifier && (lex.cur_str()[0] == '.' || lex.cur_str()[0] == '~')) { std::string_view method_name = lex.cur_str(); @@ -430,7 +432,7 @@ AnyV parse_expr80(Lexer& lex) { } // parse [ ~ | - | + ] E -AnyV parse_expr75(Lexer& lex) { +static AnyV parse_expr75(Lexer& lex) { TokenType t = lex.tok(); if (t == tok_bitwise_not || t == tok_minus || t == tok_plus) { SrcLocation loc = lex.cur_location(); @@ -444,7 +446,7 @@ AnyV parse_expr75(Lexer& lex) { } // parse E { (* | / | % | /% | ^/ | ~/ | ^% | ~% ) E } -AnyV parse_expr30(Lexer& lex) { +static AnyV parse_expr30(Lexer& lex) { AnyV lhs = parse_expr75(lex); TokenType t = lex.tok(); while (t == tok_mul || t == tok_div || t == tok_mod || t == tok_divmod || t == tok_divC || @@ -460,7 +462,7 @@ AnyV parse_expr30(Lexer& lex) { } // parse E { (+ | -) E } -AnyV parse_expr20(Lexer& lex) { +static AnyV parse_expr20(Lexer& lex) { AnyV lhs = parse_expr30(lex); TokenType t = lex.tok(); while (t == tok_minus || t == tok_plus) { @@ -475,7 +477,7 @@ AnyV parse_expr20(Lexer& lex) { } // parse E { ( << | >> | ~>> | ^>> ) E } -AnyV parse_expr17(Lexer& lex) { +static AnyV parse_expr17(Lexer& lex) { AnyV lhs = parse_expr20(lex); TokenType t = lex.tok(); while (t == tok_lshift || t == tok_rshift || t == tok_rshiftC || t == tok_rshiftR) { @@ -491,7 +493,7 @@ AnyV parse_expr17(Lexer& lex) { } // parse E [ (== | < | > | <= | >= | != | <=> ) E ] -AnyV parse_expr15(Lexer& lex) { +static AnyV parse_expr15(Lexer& lex) { AnyV lhs = parse_expr17(lex); TokenType t = lex.tok(); if (t == tok_eq || t == tok_lt || t == tok_gt || t == tok_leq || t == tok_geq || t == tok_neq || t == tok_spaceship) { @@ -505,7 +507,7 @@ AnyV parse_expr15(Lexer& lex) { } // parse E { ( & | `|` | ^ ) E } -AnyV parse_expr14(Lexer& lex) { +static AnyV parse_expr14(Lexer& lex) { AnyV lhs = parse_expr15(lex); TokenType t = lex.tok(); while (t == tok_bitwise_and || t == tok_bitwise_or || t == tok_bitwise_xor) { @@ -521,7 +523,7 @@ AnyV parse_expr14(Lexer& lex) { } // parse E [ ? E : E ] -AnyV parse_expr13(Lexer& lex) { +static AnyV parse_expr13(Lexer& lex) { AnyV res = parse_expr14(lex); if (lex.tok() == tok_question) { SrcLocation loc = lex.cur_location(); @@ -535,7 +537,7 @@ AnyV parse_expr13(Lexer& lex) { } // parse LE1 (= | += | -= | ... ) E2 -AnyV parse_expr10(Lexer& lex) { +static AnyV parse_expr10(Lexer& lex) { AnyV lhs = parse_expr13(lex); TokenType t = lex.tok(); if (t == tok_set_plus || t == tok_set_minus || t == tok_set_mul || t == tok_set_div || t == tok_set_divR || t == tok_set_divC || @@ -555,7 +557,7 @@ AnyV parse_expr(Lexer& lex) { return parse_expr10(lex); } -AnyV parse_return_stmt(Lexer& lex) { +static AnyV parse_return_stmt(Lexer& lex) { SrcLocation loc = lex.cur_location(); lex.expect(tok_return, "'return'"); AnyV child = parse_expr(lex); @@ -565,7 +567,7 @@ AnyV parse_return_stmt(Lexer& lex) { AnyV parse_statement(Lexer& lex); -V parse_sequence(Lexer& lex) { +static V parse_sequence(Lexer& lex) { SrcLocation loc = lex.cur_location(); lex.expect(tok_opbrace, "'{'"); std::vector items; @@ -577,7 +579,7 @@ V parse_sequence(Lexer& lex) { return createV(loc, loc_end, items); } -AnyV parse_repeat_statement(Lexer& lex) { +static AnyV parse_repeat_statement(Lexer& lex) { SrcLocation loc = lex.cur_location(); lex.expect(tok_repeat, "'repeat'"); AnyV cond = parse_expr(lex); @@ -585,7 +587,7 @@ AnyV parse_repeat_statement(Lexer& lex) { return createV(loc, cond, body); } -AnyV parse_while_statement(Lexer& lex) { +static AnyV parse_while_statement(Lexer& lex) { SrcLocation loc = lex.cur_location(); lex.expect(tok_while, "'while'"); AnyV cond = parse_expr(lex); @@ -593,7 +595,7 @@ AnyV parse_while_statement(Lexer& lex) { return createV(loc, cond, body); } -ASTNodeBase* parse_do_until_statement(Lexer& lex) { +static AnyV parse_do_until_statement(Lexer& lex) { SrcLocation loc = lex.cur_location(); lex.expect(tok_do, "'do'"); V body = parse_sequence(lex); @@ -602,7 +604,7 @@ ASTNodeBase* parse_do_until_statement(Lexer& lex) { return createV(loc, body, cond); } -AnyV parse_try_catch_statement(Lexer& lex) { +static AnyV parse_try_catch_statement(Lexer& lex) { SrcLocation loc = lex.cur_location(); lex.expect(tok_try, "'try'"); V try_body = parse_sequence(lex); @@ -612,7 +614,7 @@ AnyV parse_try_catch_statement(Lexer& lex) { return createV(loc, try_body, catch_expr, catch_body); } -AnyV parse_if_statement(Lexer& lex, bool is_ifnot) { +static AnyV parse_if_statement(Lexer& lex, bool is_ifnot) { SrcLocation loc = lex.cur_location(); lex.next(); AnyV cond = parse_expr(lex); @@ -652,8 +654,9 @@ AnyV parse_statement(Lexer& lex) { case tok_try: return parse_try_catch_statement(lex); case tok_semicolon: { + SrcLocation loc = lex.cur_location(); lex.next(); - return createV; + return createV(loc); } default: { AnyV expr = parse_expr(lex); @@ -663,11 +666,11 @@ AnyV parse_statement(Lexer& lex) { } } -AnyV parse_func_body(Lexer& lex) { +static AnyV parse_func_body(Lexer& lex) { return parse_sequence(lex); } -AnyV parse_asm_func_body(Lexer& lex, V arg_list) { +static AnyV parse_asm_func_body(Lexer& lex, V arg_list) { SrcLocation loc = lex.cur_location(); lex.expect(tok_asm, "'asm'"); size_t n_args = arg_list->size(); @@ -706,7 +709,7 @@ AnyV parse_asm_func_body(Lexer& lex, V arg_list) { return createV(loc, std::move(arg_order), std::move(ret_order), std::move(asm_commands)); } -AnyV parse_forall(Lexer& lex) { +static AnyV parse_forall(Lexer& lex) { SrcLocation loc = lex.cur_location(); std::vector forall_items; lex.expect(tok_forall, "'forall'"); @@ -726,7 +729,7 @@ AnyV parse_forall(Lexer& lex) { return createV{loc, std::move(forall_items)}; } -AnyV parse_function_declaration(Lexer& lex) { +static AnyV parse_function_declaration(Lexer& lex) { SrcLocation loc = lex.cur_location(); V forall_list = nullptr; bool is_get_method = false; @@ -741,7 +744,7 @@ AnyV parse_function_declaration(Lexer& lex) { } TypeExpr* ret_type = parse_type(lex, forall_list); lex.check(tok_identifier, "function name identifier expected"); - std::string func_name = static_cast(lex.cur_str()); + auto v_ident = createV(lex.cur_location(), lex.cur_str()); lex.next(); V arg_list = parse_argument_list(lex, forall_list)->as(); bool marked_as_pure = false; @@ -790,13 +793,9 @@ AnyV parse_function_declaration(Lexer& lex) { if (lex.tok() == tok_builtin) { is_builtin = true; - body = createV; + body = createV(lex.cur_location()); lex.next(); lex.expect(tok_semicolon, "';'"); - } else if (lex.tok() == tok_semicolon) { - // todo this is just a prototype, remove this "feature" in the future - lex.next(); - body = createV; } else if (lex.tok() == tok_opbrace) { body = parse_func_body(lex); } else if (lex.tok() == tok_asm) { @@ -805,7 +804,7 @@ AnyV parse_function_declaration(Lexer& lex) { lex.expect(tok_opbrace, "function body block"); } - auto f_declaration = createV(loc, func_name, arg_list, body); + auto f_declaration = createV(loc, v_ident, arg_list, body); f_declaration->ret_type = ret_type; f_declaration->forall_list = forall_list; f_declaration->marked_as_pure = marked_as_pure; @@ -817,7 +816,7 @@ AnyV parse_function_declaration(Lexer& lex) { return f_declaration; } -AnyV parse_pragma(Lexer& lex) { +static AnyV parse_pragma(Lexer& lex) { SrcLocation loc = lex.cur_location(); lex.next_special(tok_pragma_name, "pragma name"); std::string_view pragma_name = lex.cur_str(); @@ -839,7 +838,7 @@ AnyV parse_pragma(Lexer& lex) { return createV(loc, pragma_name); } -AnyV parse_include_statement(Lexer& lex) { +static AnyV parse_include_statement(Lexer& lex) { SrcLocation loc = lex.cur_location(); lex.expect(tok_include, "#include"); lex.check(tok_string_const, "source file name"); @@ -847,15 +846,14 @@ AnyV parse_include_statement(Lexer& lex) { if (rel_filename.empty()) { lex.error("imported file name is an empty string"); } + auto v_str = createV(lex.cur_location(), rel_filename, 0); lex.next(); lex.expect(tok_semicolon, "';'"); - return createV(loc, rel_filename); + return createV(loc, v_str); } // the main (exported) function -AnyV parse_src_file_to_ast(SrcFile* file) { - file->was_parsed = true; - +AnyV parse_src_file_to_ast(const SrcFile* file) { std::vector toplevel_declarations; Lexer lex(file); while (!lex.is_eof()) { @@ -867,6 +865,8 @@ AnyV parse_src_file_to_ast(SrcFile* file) { toplevel_declarations.push_back(parse_global_var_declaration_list(lex)); } else if (lex.tok() == tok_const) { toplevel_declarations.push_back(parse_constant_declaration_list(lex)); + } else if (lex.tok() == tok_semicolon) { + lex.next(); // don't add op_empty, no need } else { toplevel_declarations.push_back(parse_function_declaration(lex)); } diff --git a/tolk/ast-from-tokens.h b/tolk/ast-from-tokens.h index 65b82ad64..5f380c569 100644 --- a/tolk/ast-from-tokens.h +++ b/tolk/ast-from-tokens.h @@ -22,6 +22,6 @@ namespace tolk { struct ASTNodeBase; -const ASTNodeBase* parse_src_file_to_ast(SrcFile* file); +const ASTNodeBase* parse_src_file_to_ast(const SrcFile* file); } // namespace tolk diff --git a/tolk/ast-stringifier.h b/tolk/ast-stringifier.h index 399017a78..fcd1f36cc 100644 --- a/tolk/ast-stringifier.h +++ b/tolk/ast-stringifier.h @@ -96,64 +96,70 @@ class ASTStringifier final : public ASTVisitor { depth--; } - static std::string specific_str(AnyV node) { - switch (node->type) { + static std::string specific_str(AnyV v) { + switch (v->type) { case ast_identifier: - return static_cast(node->as()->name); + return static_cast(v->as()->name); case ast_int_const: - return static_cast(node->as()->int_val); + return static_cast(v->as()->int_val); case ast_string_const: - if (char modifier = node->as()->modifier) { - return "\"" + static_cast(node->as()->str_val) + "\"" + std::string(1, modifier); + if (char modifier = v->as()->modifier) { + return "\"" + static_cast(v->as()->str_val) + "\"" + std::string(1, modifier); } else { - return "\"" + static_cast(node->as()->str_val) + "\""; + return "\"" + static_cast(v->as()->str_val) + "\""; + } + case ast_function_call: { + if (auto v_lhs = v->as()->get_called_f()->try_as()) { + return static_cast(v_lhs->name) + "()"; } + return {}; + } case ast_global_var_declaration: - return static_cast(node->as()->var_name); + return static_cast(v->as()->get_identifier()->name); case ast_constant_declaration: - return static_cast(node->as()->const_name); + return static_cast(v->as()->get_identifier()->name); case ast_type_expression: { std::ostringstream os; - os << node->as()->declared_type; + os << v->as()->declared_type; return os.str(); } case ast_variable_declaration: { std::ostringstream os; - os << node->as()->declared_type; + os << v->as()->declared_type; return os.str(); } case ast_dot_tilde_call: - return static_cast(node->as()->method_name); + return static_cast(v->as()->method_name); case ast_unary_operator: - return static_cast(node->as()->operator_name); + return static_cast(v->as()->operator_name); case ast_binary_operator: - return static_cast(node->as()->operator_name); + return static_cast(v->as()->operator_name); case ast_sequence: - return "↓" + std::to_string(node->as()->get_items().size()); + return "↓" + std::to_string(v->as()->get_items().size()); case ast_if_statement: - return node->as()->is_ifnot ? "ifnot" : ""; + return v->as()->is_ifnot ? "ifnot" : ""; case ast_argument: { std::ostringstream os; - os << node->as()->arg_type; - return static_cast(node->as()->arg_name) + ": " + os.str(); + os << v->as()->arg_type; + return static_cast(v->as()->get_identifier()->name) + ": " + os.str(); } case ast_function_declaration: { std::string arg_names; - for (int i = 0; i < node->as()->get_num_args(); i++) { + for (int i = 0; i < v->as()->get_num_args(); i++) { if (!arg_names.empty()) arg_names += ","; - arg_names += node->as()->get_arg(i)->arg_name; + arg_names += v->as()->get_arg(i)->get_identifier()->name; } - return "fun " + node->as()->name + "(" + arg_names + ")"; + return "fun " + static_cast(v->as()->get_identifier()->name) + "(" + arg_names + ")"; } case ast_pragma_no_arg: - return static_cast(node->as()->pragma_name); + return static_cast(v->as()->pragma_name); case ast_pragma_version: - return static_cast(node->as()->semver); + return static_cast(v->as()->semver); case ast_include_statement: - return static_cast(node->as()->file_name); + return static_cast(v->as()->get_file_leaf()->str_val); case ast_tolk_file: - return node->as()->file->rel_filename; + return v->as()->file->rel_filename; default: return {}; } diff --git a/tolk/ast-to-legacy.h b/tolk/ast-to-legacy.h deleted file mode 100644 index f7660f20e..000000000 --- a/tolk/ast-to-legacy.h +++ /dev/null @@ -1,28 +0,0 @@ -/* - This file is part of TON Blockchain Library. - - TON Blockchain Library is free software: you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation, either version 2 of the License, or - (at your option) any later version. - - TON Blockchain Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with TON Blockchain Library. If not, see . -*/ -#pragma once - -#include "ast.h" - -namespace tolk { - -struct SrcFile; - -void process_file_ast(AnyV file_ast); - -} // namespace tolk - diff --git a/tolk/ast-visitor.h b/tolk/ast-visitor.h index 237a79f13..d1c38b9e9 100644 --- a/tolk/ast-visitor.h +++ b/tolk/ast-visitor.h @@ -148,52 +148,4 @@ class ASTVisitorAllFunctionsInFile : public ASTVisitorFunctionBody { } }; -class ASTVisitorToplevelDeclarations : public ASTVisitor { -protected: - using parent = ASTVisitorToplevelDeclarations; - - virtual void on_pragma_no_arg(V v) = 0; - virtual void on_pragma_version(V v) = 0; - virtual void on_include_statement(V v) = 0; - virtual void on_constant_declaration(V v) = 0; - virtual void on_global_var_declaration(V v) = 0; - virtual void on_function_declaration(V v) = 0; - - void visit(AnyV v) final { - switch (v->type) { - case ast_pragma_no_arg: - on_pragma_no_arg(v->as()); - break; - case ast_pragma_version: - on_pragma_version(v->as()); - break; - case ast_include_statement: - on_include_statement(v->as()); - break; - case ast_constant_declaration_list: - for (const auto& v_decl : v->as()->get_declarations()) { - on_constant_declaration(v_decl->as()); - } - break; - case ast_global_var_declaration_list: - for (const auto& v_decl : v->as()->get_declarations()) { - on_global_var_declaration(v_decl->as()); - } - break; - case ast_function_declaration: - on_function_declaration(v->as()); - break; - default: - throw UnexpectedASTNodeType(v, "ASTVisitorToplevelDeclarations::visit"); - } - } - -public: - void start_visiting_file(V v_file) { - for (AnyV v : v_file->get_toplevel_declarations()) { - visit(v); - } - } -}; - } // namespace tolk diff --git a/tolk/ast.cpp b/tolk/ast.cpp index ec060c58b..123dd8963 100644 --- a/tolk/ast.cpp +++ b/tolk/ast.cpp @@ -60,11 +60,15 @@ int Vertex::lookup_idx(std::string_view nameT) const { int Vertex::lookup_idx(std::string_view arg_name) const { for (size_t idx = 0; idx < children.size(); ++idx) { - if (children[idx] && children[idx]->as()->arg_name == arg_name) { + if (children[idx] && children[idx]->as()->get_identifier()->name == arg_name) { return static_cast(idx); } } return -1; } +void Vertex::mutate_set_src_file(const SrcFile* file) const { + const_cast(this)->file = file; +} + } // namespace tolk diff --git a/tolk/ast.h b/tolk/ast.h index c3fe13941..12b7da93b 100644 --- a/tolk/ast.h +++ b/tolk/ast.h @@ -27,7 +27,7 @@ * Historically, in FunC, there was no AST: while lexing, symbols were registered, types were inferred, and so on. * There was no way to perform any more or less semantic analysis. * In Tolk, I've implemented parsing .tolk files into AST at first, and then converting this AST - * into legacy representation (see ast-to-legacy.cpp). + * into legacy representation (see pipe-ast-to-legacy.cpp). * In the future, more and more code analysis will be moved out of legacy to AST-level. * * From the user's point of view, all AST vertices are constant. All API is based on constancy. @@ -206,8 +206,8 @@ struct ASTNodeVararg : ASTNodeBase { template<> struct Vertex final : ASTNodeLeaf { - Vertex() - : ASTNodeLeaf(ast_empty, SrcLocation()) {} + explicit Vertex(SrcLocation loc) + : ASTNodeLeaf(ast_empty, loc) {} }; template<> @@ -268,12 +268,13 @@ struct Vertex final : ASTNodeUnary { }; template<> -struct Vertex final : ASTNodeLeaf { - std::string_view var_name; +struct Vertex final : ASTNodeUnary { TypeExpr* declared_type; // may be nullptr - Vertex(SrcLocation loc, std::string_view var_name, TypeExpr* declared_type) - : ASTNodeLeaf(ast_global_var_declaration, loc), var_name(var_name), declared_type(declared_type) {} + auto get_identifier() const { return child->as(); } + + Vertex(SrcLocation loc, V var_identifier, TypeExpr* declared_type) + : ASTNodeUnary(ast_global_var_declaration, loc, var_identifier), declared_type(declared_type) {} }; template<> @@ -285,14 +286,14 @@ struct Vertex final : ASTNodeVararg { }; template<> -struct Vertex final : ASTNodeUnary { - std::string_view const_name; +struct Vertex final : ASTNodeBinary { TypeExpr* declared_type; // may be nullptr - AnyV get_init_value() const { return child; } + auto get_identifier() const { return lhs->as(); } + AnyV get_init_value() const { return rhs; } - Vertex(SrcLocation loc, std::string_view const_name, TypeExpr* declared_type, AnyV init_value) - : ASTNodeUnary(ast_constant_declaration, loc, init_value), const_name(const_name), declared_type(declared_type) {} + Vertex(SrcLocation loc, V const_identifier, TypeExpr* declared_type, AnyV init_value) + : ASTNodeBinary(ast_constant_declaration, loc, const_identifier, init_value), declared_type(declared_type) {} }; template<> @@ -478,12 +479,13 @@ struct Vertex final : ASTNodeVararg { }; template<> -struct Vertex final : ASTNodeLeaf { - std::string_view arg_name; +struct Vertex final : ASTNodeUnary { TypeExpr* arg_type; - Vertex(SrcLocation loc, std::string_view arg_name, TypeExpr* arg_type) - : ASTNodeLeaf(ast_argument, loc), arg_name(arg_name), arg_type(arg_type) {} + auto get_identifier() const { return child->as(); } + + Vertex(SrcLocation loc, V arg_identifier, TypeExpr* arg_type) + : ASTNodeUnary(ast_argument, loc, arg_identifier), arg_type(arg_type) {} }; template<> @@ -509,13 +511,13 @@ struct Vertex final : ASTNodeVararg { }; template<> -struct Vertex final : ASTNodeBinary { - int get_num_args() const { return lhs->as()->size(); } - auto get_arg_list() const { return lhs->as(); } - auto get_arg(int i) const { return lhs->as()->get_arg(i); } - AnyV get_body() const { return rhs; } // ast_sequence / ast_asm_body / ast_empty +struct Vertex final : ASTNodeVararg { + auto get_identifier() const { return children.at(0)->as(); } + int get_num_args() const { return children.at(1)->as()->size(); } + auto get_arg_list() const { return children.at(1)->as(); } + auto get_arg(int i) const { return children.at(1)->as()->get_arg(i); } + AnyV get_body() const { return children.at(2); } // ast_sequence / ast_asm_body - std::string name; TypeExpr* ret_type = nullptr; V forall_list = nullptr; bool marked_as_pure = false; @@ -525,8 +527,10 @@ struct Vertex final : ASTNodeBinary { bool marked_as_inline_ref = false; V method_id = nullptr; - Vertex(SrcLocation loc, std::string name, V args, AnyV body) - : ASTNodeBinary(ast_function_declaration, loc, args, body), name(std::move(name)) {} + bool is_asm_function() const { return children.at(2)->type == ast_asm_body; } + + Vertex(SrcLocation loc, V name_identifier, V args, AnyV body) + : ASTNodeVararg(ast_function_declaration, loc, {name_identifier, args, body}) {} }; template<> @@ -547,11 +551,17 @@ struct Vertex final : ASTNodeLeaf { }; template<> -struct Vertex final : ASTNodeLeaf { - std::string_view file_name; +struct Vertex final : ASTNodeUnary { + const SrcFile* file = nullptr; // assigned after includes have been resolved + + auto get_file_leaf() const { return child->as(); } + + std::string get_file_name() const { return static_cast(child->as()->str_val); } + + void mutate_set_src_file(const SrcFile* file) const; - Vertex(SrcLocation loc, std::string_view file_name) - : ASTNodeLeaf(ast_include_statement, loc), file_name(file_name) {} + Vertex(SrcLocation loc, V file_name) + : ASTNodeUnary(ast_include_statement, loc, file_name) {} }; template<> diff --git a/tolk/builtins.cpp b/tolk/builtins.cpp index ddcb26305..4b31d1779 100644 --- a/tolk/builtins.cpp +++ b/tolk/builtins.cpp @@ -27,11 +27,8 @@ using namespace std::literals::string_literals; */ SymDef* define_builtin_func_impl(const std::string& name, SymValAsmFunc* func_val) { - if (name.back() == '_') { - G.prohibited_var_names.insert(name); - } - sym_idx_t name_idx = G.symbols.lookup(name, 1); - SymDef* def = define_global_symbol(name_idx, true); + sym_idx_t name_idx = G.symbols.lookup_add(name); + SymDef* def = define_global_symbol(name_idx); if (!def) { std::cerr << "fatal: global function `" << name << "` already defined" << std::endl; std::exit(1); diff --git a/tolk/compiler-state.h b/tolk/compiler-state.h index d20f5e953..324a21afc 100644 --- a/tolk/compiler-state.h +++ b/tolk/compiler-state.h @@ -74,12 +74,9 @@ struct CompilerState { std::vector> symbol_stack; std::vector scope_opened_at; + std::vector all_code_functions, all_global_vars, all_get_methods, all_constants; AllRegisteredSrcFiles all_src_files; - int glob_func_cnt = 0, glob_var_cnt = 0, const_cnt = 0; - std::vector glob_func, glob_vars, glob_get_methods; - std::set prohibited_var_names; - std::string generated_from; GlobalPragma pragma_allow_post_modification{"allow-post-modification"}; GlobalPragma pragma_compute_asm_ltr{"compute-asm-ltr"}; diff --git a/tolk/gen-abscode.cpp b/tolk/gen-abscode.cpp index b95b434ad..09a926867 100644 --- a/tolk/gen-abscode.cpp +++ b/tolk/gen-abscode.cpp @@ -185,10 +185,6 @@ int Expr::predefine_vars() { case _Var: if (!sym) { tolk_assert(val < 0 && here.is_defined()); - if (G.prohibited_var_names.count(G.symbols.get_name(~val))) { - throw ParseError{ - here, PSTRING() << "symbol `" << G.symbols.get_name(~val) << "` cannot be redefined as a variable"}; - } sym = define_symbol(~val, false, here); // std::cerr << "predefining variable " << symbols.get_name(~val) << std::endl; if (!sym) { @@ -319,7 +315,13 @@ std::vector Expr::pre_compile(CodeBlob& code, std::vector(applied_sym->value); // replace `beginCell()` with `begin_cell()` + // todo it should be done at AST level, see comment above detect_if_function_just_wraps_another() if (func && func->is_just_wrapper_for_another_f()) { + // todo currently, f is inlined only if anotherF is declared (and processed) before + if (!dynamic_cast(func)->code) { // if anotherF is processed after + func->flags |= SymValFunc::flagUsedAsNonCall; + res = pre_compile_tensor(args, code, lval_globs); + } else { // body is { Op::_Import; Op::_Call; Op::_Return; } const std::unique_ptr& op_call = dynamic_cast(func)->code->ops->next; applied_sym = op_call->fun_ref; @@ -331,6 +333,7 @@ std::vector Expr::pre_compile(CodeBlob& code, std::vectorright) { res.emplace_back(res_inner[right_idx]); } + } } else { res = pre_compile_tensor(args, code, lval_globs); } diff --git a/tolk/lexer.cpp b/tolk/lexer.cpp index a9682e52e..d2c05f348 100644 --- a/tolk/lexer.cpp +++ b/tolk/lexer.cpp @@ -629,7 +629,7 @@ void lexer_init() { // Hence, it's difficult to measure Lexer performance separately. // This function can be called just to tick Lexer performance, it just scans all input files. // There is no sense to use it in production, but when refactoring and optimizing Lexer, it's useful. -void lexer_measure_performance(const std::vector& files_to_just_parse) { +void lexer_measure_performance(const AllSrcFiles& files_to_just_parse) { for (const SrcFile* file : files_to_just_parse) { Lexer lex(file); while (!lex.is_eof()) { diff --git a/tolk/lexer.h b/tolk/lexer.h index b24efa9d0..1c8188fc3 100644 --- a/tolk/lexer.h +++ b/tolk/lexer.h @@ -234,6 +234,6 @@ class Lexer { void lexer_init(); // todo #ifdef TOLK_PROFILING -void lexer_measure_performance(const std::vector& files_to_just_parse); +void lexer_measure_performance(const AllSrcFiles& files_to_just_parse); } // namespace tolk diff --git a/tolk/ast-to-legacy.cpp b/tolk/pipe-ast-to-legacy.cpp similarity index 50% rename from tolk/ast-to-legacy.cpp rename to tolk/pipe-ast-to-legacy.cpp index 8b0e78103..6db123746 100644 --- a/tolk/ast-to-legacy.cpp +++ b/tolk/pipe-ast-to-legacy.cpp @@ -14,18 +14,15 @@ You should have received a copy of the GNU Lesser General Public License along with TON Blockchain Library. If not, see . */ -#include "ast-to-legacy.h" +#include "tolk.h" +#include "src-file.h" #include "ast.h" -#include "ast-visitor.h" -#include "ast-from-tokens.h" // todo should be deleted #include "compiler-state.h" -#include "src-file.h" -#include "tolk.h" -#include "td/utils/crypto.h" #include "common/refint.h" #include "openssl/digest.hpp" #include "block/block.h" #include "block-parse.h" +#include "td/utils/crypto.h" /* * In this module, we convert modern AST representation to legacy representation @@ -40,35 +37,12 @@ static int calc_sym_idx(std::string_view sym_name) { return G.symbols.lookup_add(sym_name); } - Expr* process_expr(AnyV v, CodeBlob& code, bool nv = false); -static SymValCodeFunc* make_new_glob_func(SymDef* func_sym, TypeExpr* func_type, bool marked_as_pure) { - SymValCodeFunc* res = new SymValCodeFunc{G.glob_func_cnt, func_type, marked_as_pure}; -#ifdef TOLK_DEBUG - res->name = func_sym->name(); -#endif - func_sym->value = res; - G.glob_func.push_back(func_sym); - G.glob_func_cnt++; - return res; -} - -static bool check_global_func(SrcLocation loc, sym_idx_t func_name) { - SymDef* def = lookup_symbol(func_name); - if (!def) { +static void check_global_func(SrcLocation loc, sym_idx_t func_name) { + SymDef* sym_def = lookup_symbol(func_name); + if (!sym_def) { throw ParseError(loc, "undefined symbol `" + G.symbols.get_name(func_name) + "`"); - return false; - } - SymVal* val = dynamic_cast(def->value); - if (!val) { - throw ParseError(loc, "symbol `" + G.symbols.get_name(func_name) + "` has no value and no type"); - return false; - } else if (!val->get_type()) { - throw ParseError(loc, "symbol `" + G.symbols.get_name(func_name) + "` has no type, possibly not a function"); - return false; - } else { - return true; } } @@ -103,7 +77,7 @@ static void check_import_exists_when_using_sym(AnyV v_usage, const SymDef* used_ } } -Expr* process_expr(V v, CodeBlob& code, bool nv) { +static Expr* process_expr(V v, CodeBlob& code, bool nv) { TokenType t = v->tok; std::string operator_name = static_cast(v->operator_name); @@ -114,7 +88,6 @@ Expr* process_expr(V v, CodeBlob& code, bool nv) { x->chk_lvalue(); x->chk_rvalue(); sym_idx_t name = G.symbols.lookup_add("^_" + operator_name + "_"); - check_global_func(v->loc, name); Expr* y = process_expr(v->get_rhs(), code, false); y->chk_rvalue(); Expr* z = new Expr{Expr::_Apply, name, {x, y}}; @@ -152,7 +125,6 @@ Expr* process_expr(V v, CodeBlob& code, bool nv) { Expr* res = process_expr(v->get_lhs(), code, nv); res->chk_rvalue(); sym_idx_t name = G.symbols.lookup_add("_" + operator_name + "_"); - check_global_func(v->loc, name); Expr* x = process_expr(v->get_rhs(), code, false); x->chk_rvalue(); res = new Expr{Expr::_Apply, name, {res, x}}; @@ -166,10 +138,9 @@ Expr* process_expr(V v, CodeBlob& code, bool nv) { v->error("unsupported binary operator"); } -Expr* process_expr(V v, CodeBlob& code) { +static Expr* process_expr(V v, CodeBlob& code) { TokenType t = v->tok; sym_idx_t name = G.symbols.lookup_add(static_cast(v->operator_name) + "_"); - check_global_func(v->loc, name); Expr* x = process_expr(v->get_rhs(), code, false); x->chk_rvalue(); @@ -200,7 +171,7 @@ Expr* process_expr(V v, CodeBlob& code) { return res; } -Expr* process_expr(V v, CodeBlob& code, bool nv) { +static Expr* process_expr(V v, CodeBlob& code, bool nv) { Expr* res = process_expr(v->get_lhs(), code, nv); bool modify = v->method_name[0] == '~'; Expr* obj = res; @@ -209,23 +180,20 @@ Expr* process_expr(V v, CodeBlob& code, bool nv) { } else { obj->chk_rvalue(); } - sym_idx_t name = calc_sym_idx(v->method_name); - const SymDef* sym = lookup_symbol(name); + sym_idx_t name_idx = calc_sym_idx(v->method_name); + const SymDef* sym = lookup_symbol(name_idx); if (!sym || !dynamic_cast(sym->value)) { sym_idx_t name1 = G.symbols.lookup(v->method_name.substr(1)); if (name1) { const SymDef* sym1 = lookup_symbol(name1); if (sym1 && dynamic_cast(sym1->value)) { - name = name1; + name_idx = name1; sym = sym1; } } } - check_global_func(v->loc, name); - if (G.is_verbosity(2)) { - std::cerr << "using symbol `" << G.symbols.get_name(name) << "` for method call of " << v->method_name << std::endl; - } - sym = lookup_symbol(name); + check_global_func(v->loc, name_idx); + sym = lookup_symbol(name_idx); SymValFunc* val = sym ? dynamic_cast(sym->value) : nullptr; if (!val) { v->error("undefined method call"); @@ -233,10 +201,10 @@ Expr* process_expr(V v, CodeBlob& code, bool nv) { Expr* x = process_expr(v->get_arg(), code, false); x->chk_rvalue(); if (x->cls == Expr::_Tensor) { - res = new Expr{Expr::_Apply, name, {obj}}; + res = new Expr{Expr::_Apply, name_idx, {obj}}; res->args.insert(res->args.end(), x->args.begin(), x->args.end()); } else { - res = new Expr{Expr::_Apply, name, {obj, x}}; + res = new Expr{Expr::_Apply, name_idx, {obj, x}}; } res->here = v->loc; res->flags = Expr::_IsRvalue | (val->is_marked_as_pure() ? 0 : Expr::_IsImpure); @@ -246,13 +214,13 @@ Expr* process_expr(V v, CodeBlob& code, bool nv) { res = new Expr{Expr::_LetFirst, {obj->copy(), tmp}}; res->here = v->loc; res->flags = tmp->flags; - res->set_val(name); + res->set_val(name_idx); res->deduce_type(); } return res; } -Expr* process_expr(V v, CodeBlob& code, bool nv) { +static Expr* process_expr(V v, CodeBlob& code, bool nv) { Expr* cond = process_expr(v->get_cond(), code, nv); cond->chk_rvalue(); Expr* x = process_expr(v->get_when_true(), code, false); @@ -266,7 +234,7 @@ Expr* process_expr(V v, CodeBlob& code, bool nv) { return res; } -Expr* process_expr(V v, CodeBlob& code, bool nv) { +static Expr* process_expr(V v, CodeBlob& code, bool nv) { Expr* res = process_expr(v->get_called_f(), code, nv); Expr* x = process_expr(v->get_called_arg(), code, false); x->chk_rvalue(); @@ -276,7 +244,7 @@ Expr* process_expr(V v, CodeBlob& code, bool nv) { return res; } -Expr* process_expr(V v, CodeBlob& code, bool nv) { +static Expr* process_expr(V v, CodeBlob& code, bool nv) { if (v->empty()) { Expr* res = new Expr{Expr::_Tensor, {}}; res->flags = Expr::_IsRvalue; @@ -302,7 +270,7 @@ Expr* process_expr(V v, CodeBlob& code, bool nv) { return res; } -Expr* process_expr(V v, CodeBlob& code) { +static Expr* process_expr(V v, CodeBlob& code) { Expr* x = process_expr(v->get_variable_or_list(), code, true); x->chk_lvalue(); // chk_lrvalue() ? Expr* res = new Expr{Expr::_TypeApply, {x}}; @@ -320,7 +288,7 @@ Expr* process_expr(V v, CodeBlob& code) { return res; } -Expr* process_expr(V v, CodeBlob& code, bool nv) { +static Expr* process_expr(V v, CodeBlob& code, bool nv) { if (v->empty()) { Expr* res = new Expr{Expr::_Tensor, {}}; res->flags = Expr::_IsRvalue; @@ -354,7 +322,7 @@ Expr* process_expr(V v, CodeBlob& code, bool nv) { return res; } -Expr* process_expr(V v) { +static Expr* process_expr(V v) { Expr* res = new Expr{Expr::_Const, v->loc}; res->flags = Expr::_IsRvalue; res->intval = td::string_to_int256(static_cast(v->int_val)); @@ -365,7 +333,7 @@ Expr* process_expr(V v) { return res; } -Expr* process_expr(V v) { +static Expr* process_expr(V v) { std::string str = static_cast(v->str_val); Expr* res; switch (v->modifier) { @@ -432,12 +400,12 @@ Expr* process_expr(V v) { break; } default: - __builtin_unreachable(); + tolk_assert(false); } return res; } -Expr* process_expr(V v) { +static Expr* process_expr(V v) { SymDef* sym = lookup_symbol(calc_sym_idx(v->bool_val ? "true" : "false")); tolk_assert(sym); Expr* res = new Expr{Expr::_Apply, sym, {}}; @@ -446,7 +414,7 @@ Expr* process_expr(V v) { return res; } -Expr* process_expr([[maybe_unused]] V v) { +static Expr* process_expr([[maybe_unused]] V v) { SymDef* sym = lookup_symbol(calc_sym_idx("nil")); tolk_assert(sym); Expr* res = new Expr{Expr::_Apply, sym, {}}; @@ -455,8 +423,15 @@ Expr* process_expr([[maybe_unused]] V v) { return res; } -Expr* process_expr(V v, bool nv) { +static Expr* process_expr(V v, bool nv) { SymDef* sym = lookup_symbol(calc_sym_idx(v->name)); + if (nv && sym) { + if (sym->level != G.scope_level) { + sym = nullptr; // declaring a new variable with the same name, but in another scope + } else { + v->error("redeclaration of local variable `" + static_cast(v->name) + "`"); + } + } if (sym && dynamic_cast(sym->value)) { check_import_exists_when_using_sym(v, sym); auto val = dynamic_cast(sym->value); @@ -587,7 +562,7 @@ void combine_parallel(val& x, const val y) { } } // namespace blk_fl -blk_fl::val process_vertex(V v, CodeBlob& code) { +static blk_fl::val process_vertex(V v, CodeBlob& code) { Expr* expr = process_expr(v->get_return_value(), code); expr->chk_rvalue(); try { @@ -604,7 +579,7 @@ blk_fl::val process_vertex(V v, CodeBlob& code) { return blk_fl::ret; } -void append_implicit_ret_stmt(V v, CodeBlob& code) { +static void append_implicit_ret_stmt(V v, CodeBlob& code) { TypeExpr* ret_type = TypeExpr::new_unit(); try { // std::cerr << "in implicit return: "; @@ -620,7 +595,7 @@ void append_implicit_ret_stmt(V v, CodeBlob& code) { blk_fl::val process_stmt(AnyV v, CodeBlob& code); -blk_fl::val process_vertex(V v, CodeBlob& code, bool no_new_scope = false) { +static blk_fl::val process_vertex(V v, CodeBlob& code, bool no_new_scope = false) { if (!no_new_scope) { open_scope(v->loc); } @@ -639,7 +614,7 @@ blk_fl::val process_vertex(V v, CodeBlob& code, bool no_new_scope return res; } -blk_fl::val process_vertex(V v, CodeBlob& code) { +static blk_fl::val process_vertex(V v, CodeBlob& code) { Expr* expr = process_expr(v->get_cond(), code); expr->chk_rvalue(); auto cnt_type = TypeExpr::new_atomic(TypeExpr::_Int); @@ -661,7 +636,7 @@ blk_fl::val process_vertex(V v, CodeBlob& code) { return res | blk_fl::end; } -blk_fl::val process_vertex(V v, CodeBlob& code) { +static blk_fl::val process_vertex(V v, CodeBlob& code) { Expr* expr = process_expr(v->get_cond(), code); expr->chk_rvalue(); auto cnt_type = TypeExpr::new_atomic(TypeExpr::_Int); @@ -685,7 +660,7 @@ blk_fl::val process_vertex(V v, CodeBlob& code) { return res1 | blk_fl::end; } -blk_fl::val process_vertex(V v, CodeBlob& code) { +static blk_fl::val process_vertex(V v, CodeBlob& code) { Op& until_op = code.emplace_back(v->loc, Op::_Until); code.push_set_cur(until_op.block0); open_scope(v->loc); @@ -709,7 +684,7 @@ blk_fl::val process_vertex(V v, CodeBlob& code) { return res & ~blk_fl::empty; } -blk_fl::val process_vertex(V v, CodeBlob& code) { +static blk_fl::val process_vertex(V v, CodeBlob& code) { code.require_callxargs = true; Op& try_catch_op = code.emplace_back(v->loc, Op::_TryCatch); code.push_set_cur(try_catch_op.block0); @@ -738,7 +713,7 @@ blk_fl::val process_vertex(V v, CodeBlob& code) { return res0; } -blk_fl::val process_vertex(V v, CodeBlob& code, TokenType first_lex = tok_if) { +static blk_fl::val process_vertex(V v, CodeBlob& code) { Expr* expr = process_expr(v->get_cond(), code); expr->chk_rvalue(); auto flag_type = TypeExpr::new_atomic(TypeExpr::_Int); @@ -795,14 +770,11 @@ blk_fl::val process_stmt(AnyV v, CodeBlob& code) { } } -FormalArg process_vertex(V v, int fa_idx) { - if (v->arg_name.empty()) { +static FormalArg process_vertex(V v, int fa_idx) { + if (v->get_identifier()->name.empty()) { return std::make_tuple(v->arg_type, (SymDef*)nullptr, v->loc); } - if (G.prohibited_var_names.count(static_cast(v->arg_name))) { - v->error("symbol `" + static_cast(v->arg_name) + "` cannot be redefined as a variable"); - } - SymDef* new_sym_def = define_symbol(calc_sym_idx(v->arg_name), true, v->loc); + SymDef* new_sym_def = define_symbol(calc_sym_idx(v->get_identifier()->name), true, v->loc); if (!new_sym_def) { v->error("cannot define symbol"); } @@ -813,16 +785,22 @@ FormalArg process_vertex(V v, int fa_idx) { return std::make_tuple(v->arg_type, new_sym_def, v->loc); } -CodeBlob* process_vertex(V v_body, V arg_list, TypeExpr* ret_type, bool marked_as_pure) { - CodeBlob* blob = new CodeBlob{ret_type}; - if (marked_as_pure) { +static void convert_function_body_to_CodeBlob(V v, V v_body) { + SymDef* sym_def = lookup_symbol(calc_sym_idx(v->get_identifier()->name)); + SymValCodeFunc* sym_val = dynamic_cast(sym_def->value); + tolk_assert(sym_val != nullptr); + + open_scope(v->loc); + CodeBlob* blob = new CodeBlob{static_cast(v->get_identifier()->name), v->loc, v->ret_type}; + if (v->marked_as_pure) { blob->flags |= CodeBlob::_ForbidImpure; } FormalArgList legacy_arg_list; - for (int i = 0; i < arg_list->size(); ++i) { - legacy_arg_list.emplace_back(process_vertex(arg_list->get_arg(i), i)); + for (int i = 0; i < v->get_num_args(); ++i) { + legacy_arg_list.emplace_back(process_vertex(v->get_arg(i), i)); } blob->import_params(std::move(legacy_arg_list)); + blk_fl::val res = blk_fl::init; bool warned = false; for (AnyV item : v_body->get_items()) { @@ -835,69 +813,24 @@ CodeBlob* process_vertex(V v_body, V arg_list, if (res & blk_fl::end) { append_implicit_ret_stmt(v_body, *blob); } + blob->close_blk(v_body->loc_end); - return blob; + close_scope(); + sym_val->set_code(blob); } -SymValAsmFunc* process_vertex(V v_body, TypeExpr* func_type, V arg_list, TypeExpr* ret_type, - bool marked_as_pure) { - int cnt = arg_list->size(); - int width = ret_type->get_width(); - if (width < 0 || width > 16) { - v_body->error("return type of an assembler built-in function must have a well-defined fixed width"); - } - if (cnt > 16) { - v_body->error("assembler built-in function must have at most 16 arguments"); - } - std::vector cum_arg_width; - cum_arg_width.push_back(0); - int tot_width = 0; - for (int i = 0; i < cnt; ++i) { - V arg = arg_list->get_arg(i); - int arg_width = arg->arg_type->get_width(); - if (arg_width < 0 || arg_width > 16) { - arg->error("parameters of an assembler built-in function must have a well-defined fixed width"); - } - cum_arg_width.push_back(tot_width += arg_width); - } +static void convert_asm_body_to_AsmOp(V v, V v_body) { + SymDef* sym_def = lookup_symbol(calc_sym_idx(v->get_identifier()->name)); + SymValAsmFunc* sym_val = dynamic_cast(sym_def->value); + tolk_assert(sym_val != nullptr); + + int cnt = v->get_num_args(); + int width = v->ret_type->get_width(); std::vector asm_ops; - std::vector arg_order, ret_order; - if (!v_body->arg_order.empty()) { - if (static_cast(v_body->arg_order.size()) != cnt) { - v_body->error("arg_order of asm function must specify all arguments"); - } - std::vector visited(cnt, false); - for (int i = 0; i < cnt; ++i) { - int j = v_body->arg_order[i]; - if (visited[j]) { - v_body->error("arg_order of asm function contains duplicates"); - } - visited[j] = true; - int c1 = cum_arg_width[j], c2 = cum_arg_width[j + 1]; - while (c1 < c2) { - arg_order.push_back(c1++); - } - } - tolk_assert(arg_order.size() == (unsigned)tot_width); - } - if (!v_body->ret_order.empty()) { - if (static_cast(v_body->ret_order.size()) != width) { - v_body->error("ret_order of this asm function expected to be width = " + std::to_string(width)); - } - std::vector visited(width, false); - for (int i = 0; i < width; ++i) { - int j = v_body->ret_order[i]; - if (j < 0 || j >= width || visited[j]) { - v_body->error("ret_order contains invalid integer, not in range 0 .. width-1"); - } - visited[j] = true; - } - ret_order = v_body->ret_order; - } for (AnyV v_child : v_body->get_asm_commands()) { std::string_view ops = v_child->as()->str_val; // \n\n... std::string op; - for (const char& c : ops) { + for (char c : ops) { if (c == '\n' || c == '\r') { if (!op.empty()) { asm_ops.push_back(AsmOp::Parse(op, cnt, width)); @@ -917,522 +850,31 @@ SymValAsmFunc* process_vertex(V v_body, TypeExpr* func_type, Varg_order = std::move(arg_order); - res->ret_order = std::move(ret_order); - res->crc = td::crc64(crc_s); - return res; -} -// if a function looks like `T f(...args) { return anotherF(...args); }`, -// set a bit to flags -// then, all calls to `f(...)` will be effectively replaced with `anotherF(...)` -void detect_if_function_just_wraps_another(SymValCodeFunc* v_current, const td::RefInt256 &method_id) { - const std::string& function_name = v_current->code->name; - - // in "AST" representation, the first is Op::_Import (input arguments, even if none) - const auto& op_import = v_current->code->ops; - tolk_assert(op_import && op_import->cl == Op::_Import); - - // then Op::_Call (anotherF) - const Op* op_call = op_import->next.get(); - if (!op_call || op_call->cl != Op::_Call) - return; - tolk_assert(op_call->left.size() == 1); - - const auto& op_return = op_call->next; - if (!op_return || op_return->cl != Op::_Return || op_return->left.size() != 1) - return; - - bool indices_expected = op_import->left.size() == op_call->left[0] && op_call->left[0] == op_return->left[0]; - if (!indices_expected) - return; - - const SymDef* f_called = op_call->fun_ref; - const SymValFunc* v_called = dynamic_cast(f_called->value); - if (!v_called) - return; - - // `return` must use all arguments, e.g. `return (_0,_2,_1)`, not `return (_0,_1,_1)` - int args_used_mask = 0; - for (var_idx_t arg_idx : op_call->right) { - args_used_mask |= 1 << arg_idx; - } - if (args_used_mask != (1 << op_call->right.size()) - 1) - return; - - // detect getters (having method_id), they should not be treated as wrappers - // v_current->method_id will be assigned later; todo refactor function parsing completely, it's weird - // moreover, `recv_external()` and others are also exported, but FunC is unaware of method_id - // (it's assigned by Fift later) - // so, for now, just handle "special" function names, the same as in Asm.fif - if (!method_id.is_null()) - return; - if (function_name == "main" || function_name == "recv_internal" || function_name == "recv_external" || - function_name == "run_ticktock" || function_name == "split_prepare" || function_name == "split_install") - return; - - // all types must be strictly defined (on mismatch, a compilation error will be triggered anyway) - if (v_called->sym_type->has_unknown_inside() || v_current->sym_type->has_unknown_inside()) - return; - // avoid situations like `f(int a, (int,int) b)`, inlining will be cumbersome - if (v_current->get_arg_type()->get_width() != op_call->right.size()) - return; - // 'return true;' (false, nil) are (surprisingly) also function calls - if (f_called->name() == "true" || f_called->name() == "false" || f_called->name() == "nil") - return; - // if an original is marked `pure`, and this one doesn't, it's okay; just check for inline_ref storage - if (v_current->is_inline_ref()) - return; - - // ok, f_current is a wrapper - v_current->flags |= SymValFunc::flagWrapsAnotherF; - if (G.is_verbosity(2)) { - std::cerr << function_name << " -> " << f_called->name() << std::endl; - } + sym_val->set_code(std::move(asm_ops)); } -static td::RefInt256 calculate_method_id_by_func_name(std::string_view func_name) { - unsigned int crc = td::crc16(static_cast(func_name)); - return td::make_refint((crc & 0xffff) | 0x10000); -} -void process_vertex(V v_function) { - open_scope(v_function->loc); - std::vector type_vars; - if (v_function->forall_list) { - type_vars.reserve(v_function->forall_list->size()); - for (int idx = 0; idx < v_function->forall_list->size(); ++idx) { - type_vars.emplace_back(v_function->forall_list->get_item(idx)->created_type); - } - } - std::string func_name = v_function->name; - int func_sym_idx = calc_sym_idx(func_name); - int flags_inline = 0; - if (v_function->marked_as_inline) { - flags_inline = SymValFunc::flagInline; - } else if (v_function->marked_as_inline_ref) { - flags_inline = SymValFunc::flagInlineRef; - } - td::RefInt256 method_id; - if (v_function->method_id) { - method_id = td::string_to_int256(static_cast(v_function->method_id->int_val)); - if (method_id.is_null()) { - v_function->method_id->error("invalid integer constant"); - } - } else if (v_function->marked_as_get_method) { - method_id = calculate_method_id_by_func_name(func_name); - for (const SymDef* other : G.glob_get_methods) { - if (!td::cmp(dynamic_cast(other->value)->method_id, method_id)) { - v_function->error(PSTRING() << "GET methods hash collision: `" << other->name() << "` and `" + func_name + "` produce the same hash. Consider renaming one of these functions."); - } - } - } - TypeExpr* arg_list_type = nullptr; - if (int n_args = v_function->get_num_args()) { - std::vector arg_types; - arg_types.reserve(n_args); - for (int idx = 0; idx < n_args; ++idx) { - arg_types.emplace_back(v_function->get_arg(idx)->arg_type); - } - arg_list_type = TypeExpr::new_tensor(std::move(arg_types)); - } else { - arg_list_type = TypeExpr::new_unit(); - } - TypeExpr* func_type = TypeExpr::new_map(arg_list_type, v_function->ret_type); - if (!type_vars.empty()) { - func_type = TypeExpr::new_forall(std::move(type_vars), func_type); - } - if (v_function->marked_as_builtin) { - const SymDef* builtin_func = lookup_symbol(G.symbols.lookup(func_name)); - const SymValFunc* func_val = builtin_func ? dynamic_cast(builtin_func->value) : nullptr; - if (!func_val || !func_val->is_builtin()) { - v_function->error("`builtin` used for non-builtin function"); - } -#ifdef TOLK_DEBUG - // in release, we don't need this check, since `builtin` is used only in stdlib.tolk, which is our responsibility - if (!func_val->sym_type->equals_to(func_type) || func_val->is_marked_as_pure() != v_function->marked_as_pure) { - v_function->error("declaration for `builtin` function doesn't match an actual one"); - } -#endif - close_scope(); - return; - } - if (G.is_verbosity(1)) { - std::cerr << "fun " << func_name << " : " << func_type << std::endl; - } - SymDef* func_sym = define_global_symbol(func_sym_idx, 0, v_function->loc); - tolk_assert(func_sym); - SymValFunc* func_sym_val = dynamic_cast(func_sym->value); - if (func_sym->value) { - // todo remove all about pre-declarations and prototypes - if (func_sym->value->kind != SymValKind::_Func || !func_sym_val) { - v_function->error("was not defined as a function before"); - } - try { - unify(func_sym_val->sym_type, func_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "previous type of function " << func_name << " : " << func_sym_val->sym_type - << " cannot be unified with new type " << func_type << ": " << ue; - v_function->error(os.str()); - } - } - if (v_function->get_body()->type == ast_empty) { - make_new_glob_func(func_sym, func_type, v_function->marked_as_pure); - } else if (const auto* v_seq = v_function->get_body()->try_as()) { - if (dynamic_cast(func_sym_val)) { - v_function->error("function `" + func_name + "` has been already defined as an assembler built-in"); - } - SymValCodeFunc* func_sym_code; - if (func_sym_val) { - func_sym_code = dynamic_cast(func_sym_val); - if (!func_sym_code) { - v_function->error("function `" + func_name + "` has been already defined in an yet-unknown way"); - } - } else { - func_sym_code = make_new_glob_func(func_sym, func_type, v_function->marked_as_pure); - } - if (func_sym_code->code) { - v_function->error("redefinition of function `" + func_name + "`"); - } - if (v_function->marked_as_pure && v_function->ret_type->get_width() == 0) { - v_function->error("a pure function should return something, otherwise it will be optimized out anyway"); - } - CodeBlob* code = process_vertex(v_seq, v_function->get_arg_list(), v_function->ret_type, v_function->marked_as_pure); - code->name = func_name; - code->loc = v_function->loc; - func_sym_code->code = code; - // todo it should be done not here, it should be on ast level, it should work when functions are declared swapped - detect_if_function_just_wraps_another(func_sym_code, method_id); - } else if (const auto* v_asm = v_function->get_body()->try_as()) { - SymValAsmFunc* asm_func = process_vertex(v_asm, func_type, v_function->get_arg_list(), v_function->ret_type, v_function->marked_as_pure); -#ifdef TOLK_DEBUG - asm_func->name = func_name; -#endif - if (func_sym_val) { - if (dynamic_cast(func_sym_val)) { - v_function->error("function `" + func_name + "` was already declared as an ordinary function"); - } - SymValAsmFunc* asm_func_old = dynamic_cast(func_sym_val); - if (asm_func_old) { - if (asm_func->crc != asm_func_old->crc) { - v_function->error("redefinition of built-in assembler function `" + func_name + "`"); - } - } else { - v_function->error("redefinition of previously (somehow) defined function `" + func_name + "`"); - } - } - func_sym->value = asm_func; - } - if (method_id.not_null()) { - auto val = dynamic_cast(func_sym->value); - if (!val) { - v_function->error("cannot set method id for unknown function `" + func_name + "`"); - } - if (val->method_id.is_null()) { - val->method_id = std::move(method_id); - } else if (td::cmp(val->method_id, method_id) != 0) { - v_function->error("integer method identifier for `" + func_name + "` changed from " + - val->method_id->to_dec_string() + " to a different value " + method_id->to_dec_string()); - } - } - if (flags_inline) { - auto val = dynamic_cast(func_sym->value); - if (!val) { - v_function->error("cannot set unknown function `" + func_name + "` as an inline"); - } - if (!val->is_inline() && !val->is_inline_ref()) { - val->flags |= flags_inline; - } else if ((val->flags & (SymValFunc::flagInline | SymValFunc::flagInlineRef)) != flags_inline) { - v_function->error("inline mode for `" + func_name + "` changed with respect to a previous declaration"); - } - } - if (v_function->marked_as_get_method) { - auto val = dynamic_cast(func_sym->value); - if (!val) { - v_function->error("cannot set unknown function `" + func_name + "` as a get method"); - } - val->flags |= SymValFunc::flagGetMethod; - G.glob_get_methods.push_back(func_sym); - } - close_scope(); -} - -td::Result locate_source_file(const std::string& rel_filename) { - td::Result path = G.settings.read_callback(CompilerSettings::FsReadCallbackKind::Realpath, rel_filename.c_str()); - if (path.is_error()) { - return path.move_as_error(); - } - - std::string abs_filename = path.move_as_ok(); - if (SrcFile* file = G.all_src_files.find_file(abs_filename)) { - return file; // file was already parsed (imported from somewhere else) - } - - td::Result text = G.settings.read_callback(CompilerSettings::FsReadCallbackKind::ReadFile, abs_filename.c_str()); - if (text.is_error()) { - return text.move_as_error(); - } - - return G.all_src_files.register_file(rel_filename, abs_filename, text.move_as_ok()); -} - -void process_vertex(V v) { - std::string_view pragma_name = v->pragma_name; - if (pragma_name == G.pragma_allow_post_modification.name()) { - G.pragma_allow_post_modification.enable(v->loc); - } else if (pragma_name == G.pragma_compute_asm_ltr.name()) { - G.pragma_compute_asm_ltr.enable(v->loc); - } else if (pragma_name == G.pragma_remove_unused_functions.name()) { - G.pragma_remove_unused_functions.enable(v->loc); - } else { - v->error("unknown pragma name"); - } -} - -void process_vertex(V v) { - char op = '='; bool eq = false; - TokenType cmp_tok = v->cmp_tok; - if (cmp_tok == tok_gt || cmp_tok == tok_geq) { - op = '>'; - eq = cmp_tok == tok_geq; - } else if (cmp_tok == tok_lt || cmp_tok == tok_leq) { - op = '<'; - eq = cmp_tok == tok_leq; - } else if (cmp_tok == tok_eq) { - op = '='; - } else if (cmp_tok == tok_bitwise_xor) { - op = '^'; - } else { - v->error("invalid comparison operator"); - } - std::string_view pragma_value = v->semver; - int sem_ver[3] = {0, 0, 0}; - char segs = 1; - auto stoi = [&](std::string_view s) { - auto R = td::to_integer_safe(static_cast(s)); - if (R.is_error()) { - v->error("invalid semver format"); - } - return R.move_as_ok(); - }; - std::istringstream iss_value(static_cast(pragma_value)); - for (int idx = 0; idx < 3; idx++) { - std::string s{"0"}; - std::getline(iss_value, s, '.'); - sem_ver[idx] = stoi(s); - } - // End reading semver from source code - int tolk_ver[3] = {0, 0, 0}; - std::istringstream iss(tolk_version); - for (int idx = 0; idx < 3; idx++) { - std::string s; - std::getline(iss, s, '.'); - tolk_ver[idx] = stoi(s); - } - // End parsing embedded semver - bool match = true; - switch (op) { - case '=': - if ((tolk_ver[0] != sem_ver[0]) || - (tolk_ver[1] != sem_ver[1]) || - (tolk_ver[2] != sem_ver[2])) { - match = false; - } - break; - case '>': - if ( ((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] == sem_ver[1]) && (tolk_ver[2] == sem_ver[2]) && !eq) || - ((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] == sem_ver[1]) && (tolk_ver[2] < sem_ver[2])) || - ((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] < sem_ver[1])) || - ((tolk_ver[0] < sem_ver[0])) ) { - match = false; - } - break; - case '<': - if ( ((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] == sem_ver[1]) && (tolk_ver[2] == sem_ver[2]) && !eq) || - ((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] == sem_ver[1]) && (tolk_ver[2] > sem_ver[2])) || - ((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] > sem_ver[1])) || - ((tolk_ver[0] > sem_ver[0])) ) { - match = false; - } - break; - case '^': - if ( ((segs == 3) && ((tolk_ver[0] != sem_ver[0]) || (tolk_ver[1] != sem_ver[1]) || (tolk_ver[2] < sem_ver[2]))) - || ((segs == 2) && ((tolk_ver[0] != sem_ver[0]) || (tolk_ver[1] < sem_ver[1]))) - || ((segs == 1) && ((tolk_ver[0] < sem_ver[0]))) ) { - match = false; - } - break; - default: - __builtin_unreachable(); - } - if (!match) { - v->error("Tolk version " + tolk_version + " does not satisfy this condition"); - } -} - -void process_vertex(V v, SrcFile* current_file) { - std::string rel_filename = static_cast(v->file_name); - if (size_t rc = current_file->rel_filename.rfind('/'); rc != std::string::npos) { - rel_filename = current_file->rel_filename.substr(0, rc + 1) + rel_filename; - } - - td::Result locate_res = locate_source_file(rel_filename); - if (locate_res.is_error()) { - v->error("Failed to import: " + locate_res.move_as_error().message().str()); - } +void pipeline_convert_ast_to_legacy_Expr_Op(const AllSrcFiles& all_src_files) { + for (const SrcFile* file : all_src_files) { + tolk_assert(file->ast); - SrcFile* imported_file = locate_res.move_as_ok(); - current_file->imports.emplace_back(SrcFile::ImportStatement{imported_file}); - if (!imported_file->was_parsed) { - // todo it's wrong, but ok for now - process_file_ast(parse_src_file_to_ast(imported_file)); - } -} - -void process_vertex(V v) { - AnyV init_value = v->get_init_value(); - SymDef* sym_def = define_global_symbol(calc_sym_idx(v->const_name), false, v->loc); - if (!sym_def) { - v->error("cannot define global symbol"); - } - if (sym_def->value) { - v->error("symbol already exists"); - } - CodeBlob code; - Expr* x = process_expr(init_value, code, false); - if (!x->is_rvalue()) { - v->get_init_value()->error("expression is not strictly Rvalue"); - } - if (v->declared_type && !v->declared_type->equals_to(x->e_type)) { - v->error("expression type does not match declared type"); - } - SymValConst* new_value = nullptr; - if (x->cls == Expr::_Const) { // Integer constant - new_value = new SymValConst{G.const_cnt++, x->intval}; - } else if (x->cls == Expr::_SliceConst) { // Slice constant (string) - new_value = new SymValConst{G.const_cnt++, x->strval}; - } else if (x->cls == Expr::_Apply) { // even "1 + 2" is Expr::_Apply (it applies `_+_`) - code.emplace_back(v->loc, Op::_Import, std::vector()); - auto tmp_vars = x->pre_compile(code); - code.emplace_back(v->loc, Op::_Return, std::move(tmp_vars)); - code.emplace_back(v->loc, Op::_Nop); - // It is REQUIRED to execute "optimizations" as in tolk.cpp - code.simplify_var_types(); - code.prune_unreachable_code(); - code.split_vars(true); - for (int i = 0; i < 16; i++) { - code.compute_used_code_vars(); - code.fwd_analyze(); - code.prune_unreachable_code(); - } - code.mark_noreturn(); - AsmOpList out_list(0, &code.vars); - code.generate_code(out_list); - if (out_list.list_.size() != 1) { - init_value->error("precompiled expression must result in single operation"); - } - auto op = out_list.list_[0]; - if (!op.is_const()) { - init_value->error("precompiled expression must result in compilation time constant"); - } - if (op.origin.is_null() || !op.origin->is_valid()) { - init_value->error("precompiled expression did not result in a valid integer constant"); + if (!file->is_stdlib_file()) { + // file->ast->debug_print(); + G.generated_from += file->rel_filename; + G.generated_from += ", "; } - new_value = new SymValConst{G.const_cnt++, op.origin}; - } else { - init_value->error("integer or slice literal or constant expected"); - } - sym_def->value = new_value; -} -void process_vertex(V v) { - TypeExpr* var_type = v->declared_type; - SymDef* sym_def = define_global_symbol(calc_sym_idx(v->var_name), false, v->loc); - if (!sym_def) { - v->error("cannot define global symbol"); - } - if (sym_def->value) { - auto val = dynamic_cast(sym_def->value); - if (!val) { - v->error("symbol cannot be redefined as a global variable"); - } - try { - unify(var_type, val->sym_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "cannot unify new type " << var_type << " of global variable `" << sym_def->name() - << "` with its previous type " << val->sym_type << ": " << ue; - v->error(os.str()); + for (AnyV v : file->ast->as()->get_toplevel_declarations()) { + if (auto v_func = v->try_as()) { + if (v_func->is_asm_function()) { + convert_asm_body_to_AsmOp(v_func, v_func->get_body()->as()); + } else if (!v_func->marked_as_builtin) { + convert_function_body_to_CodeBlob(v_func, v_func->get_body()->as()); + } + } } - } else { - sym_def->value = new SymValGlobVar{G.glob_var_cnt++, var_type}; -#ifdef TOLK_DEBUG - dynamic_cast(sym_def->value)->name = v->var_name; -#endif - G.glob_vars.push_back(sym_def); } } -class FileToLegacyVisitor final : public ASTVisitorToplevelDeclarations { - SrcFile* current_file; - - // todo inline here all these - void on_pragma_no_arg(V v) override { - process_vertex(v); - } - - void on_pragma_version(V v) override { - process_vertex(v); - } - - void on_include_statement(V v) override { - process_vertex(v, current_file); - } - - void on_function_declaration(V v) override { - process_vertex(v); - } - - void on_constant_declaration(V v) override { - process_vertex(v); - } - - void on_global_var_declaration(V v) override { - process_vertex(v); - } - -public: - explicit FileToLegacyVisitor(SrcFile* file) : current_file(file) { - } -}; - -void process_file_ast(AnyV file_ast) { - auto v = file_ast->try_as(); - if (!v) { - throw UnexpectedASTNodeType(file_ast, "process_file_ast"); - } - - const SrcFile* file = v->file; - if (!file->is_stdlib_file()) { - // v->debug_print(); - G.generated_from += file->rel_filename; - G.generated_from += ", "; - } - - FileToLegacyVisitor(const_cast(file)).start_visiting_file(v); -} - } // namespace tolk diff --git a/tolk/pipe-discover-parse-sources.cpp b/tolk/pipe-discover-parse-sources.cpp new file mode 100644 index 000000000..f074e075d --- /dev/null +++ b/tolk/pipe-discover-parse-sources.cpp @@ -0,0 +1,62 @@ +/* + This file is part of TON Blockchain source code. + + TON Blockchain is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + TON Blockchain is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with TON Blockchain. If not, see . + + In addition, as a special exception, the copyright holders give permission + to link the code of portions of this program with the OpenSSL library. + You must obey the GNU General Public License in all respects for all + of the code used other than OpenSSL. If you modify file(s) with this + exception, you may extend this exception to your version of the file(s), + but you are not obligated to do so. If you do not wish to do so, delete this + exception statement from your version. If you delete this exception statement + from all source files in the program, then also delete it here. +*/ +#include "tolk.h" +#include "ast.h" +#include "ast-from-tokens.h" +#include "compiler-state.h" + +namespace tolk { + +AllSrcFiles pipeline_discover_and_parse_sources(const std::string& stdlib_filename, const std::string& entrypoint_filename) { + G.all_src_files.locate_and_register_source_file(stdlib_filename, {}); + G.all_src_files.locate_and_register_source_file(entrypoint_filename, {}); + + while (SrcFile* file = G.all_src_files.get_next_unparsed_file()) { + tolk_assert(!file->ast); + + file->ast = parse_src_file_to_ast(file); + + for (AnyV v_toplevel : file->ast->as()->get_toplevel_declarations()) { + if (auto v_include = v_toplevel->try_as()) { + size_t pos = file->rel_filename.rfind('/'); + std::string rel_filename = pos == std::string::npos + ? v_include->get_file_name() + : file->rel_filename.substr(0, pos + 1) + v_include->get_file_name(); + + SrcFile* imported = G.all_src_files.locate_and_register_source_file(rel_filename, v_include->loc); + file->imports.push_back(SrcFile::ImportStatement{imported}); + v_include->mutate_set_src_file(imported); + } + } + } + + // todo #ifdef TOLK_PROFILING + // lexer_measure_performance(G.all_src_files.get_all_files()); + + return G.all_src_files.get_all_files(); +} + +} // namespace tolk diff --git a/tolk/pipe-find-unused-symbols.cpp b/tolk/pipe-find-unused-symbols.cpp new file mode 100644 index 000000000..0badd8538 --- /dev/null +++ b/tolk/pipe-find-unused-symbols.cpp @@ -0,0 +1,90 @@ +/* + This file is part of TON Blockchain source code. + + TON Blockchain is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + TON Blockchain is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with TON Blockchain. If not, see . + + In addition, as a special exception, the copyright holders give permission + to link the code of portions of this program with the OpenSSL library. + You must obey the GNU General Public License in all respects for all + of the code used other than OpenSSL. If you modify file(s) with this + exception, you may extend this exception to your version of the file(s), + but you are not obligated to do so. If you do not wish to do so, delete this + exception statement from your version. If you delete this exception statement + from all source files in the program, then also delete it here. +*/ +#include "tolk.h" +#include "src-file.h" +#include "compiler-state.h" + +/* + * Here we find unused symbols (global functions and variables) to strip them off codegen. + * Note, that currently it's implemented as a standalone step after AST has been transformed to legacy Expr/Op. + * The reason why it's not done on AST level is that symbol resolving is done too late. For instance, + * having `beginCell()` there is not enough information in AST whether if points to a global function + * or it's a local variable application. + * In the future, this should be done on AST level. + */ + +namespace tolk { + +static void mark_function_used_dfs(const std::unique_ptr& op); + +static void mark_function_used(SymValCodeFunc* func_val) { + if (!func_val->code || func_val->is_really_used) { // already handled + return; + } + + func_val->is_really_used = true; + mark_function_used_dfs(func_val->code->ops); +} + +static void mark_global_var_used(SymValGlobVar* glob_val) { + glob_val->is_really_used = true; +} + +static void mark_function_used_dfs(const std::unique_ptr& op) { + if (!op) { + return; + } + // op->fun_ref, despite its name, may actually ref global var + // note, that for non-calls, e.g. `var a = some_fn` (Op::_Let), some_fn is Op::_GlobVar + // (in other words, fun_ref exists not only for direct Op::_Call, but for non-call references also) + if (op->fun_ref) { + if (auto* func_val = dynamic_cast(op->fun_ref->value)) { + mark_function_used(func_val); + } else if (auto* glob_val = dynamic_cast(op->fun_ref->value)) { + mark_global_var_used(glob_val); + } else if (auto* asm_val = dynamic_cast(op->fun_ref->value)) { + } else { + tolk_assert(false); + } + } + mark_function_used_dfs(op->next); + mark_function_used_dfs(op->block0); + mark_function_used_dfs(op->block1); +} + +void pipeline_find_unused_symbols() { + for (SymDef* func_sym : G.all_code_functions) { + auto* func_val = dynamic_cast(func_sym->value); + std::string name = G.symbols.get_name(func_sym->sym_idx); + if (func_val->method_id.not_null() || + name == "main" || name == "recv_internal" || name == "recv_external" || + name == "run_ticktock" || name == "split_prepare" || name == "split_install") { + mark_function_used(func_val); + } + } +} + +} // namespace tolk diff --git a/tolk/pipe-generate-fif-output.cpp b/tolk/pipe-generate-fif-output.cpp new file mode 100644 index 000000000..538dc9baa --- /dev/null +++ b/tolk/pipe-generate-fif-output.cpp @@ -0,0 +1,186 @@ +/* + This file is part of TON Blockchain source code. + + TON Blockchain is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + TON Blockchain is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with TON Blockchain. If not, see . + + In addition, as a special exception, the copyright holders give permission + to link the code of portions of this program with the OpenSSL library. + You must obey the GNU General Public License in all respects for all + of the code used other than OpenSSL. If you modify file(s) with this + exception, you may extend this exception to your version of the file(s), + but you are not obligated to do so. If you do not wish to do so, delete this + exception statement from your version. If you delete this exception statement + from all source files in the program, then also delete it here. +*/ +#include "tolk.h" +#include "src-file.h" +#include "ast.h" +#include "compiler-state.h" + +namespace tolk { + +bool SymValCodeFunc::does_need_codegen() const { + // when a function is declared, but not referenced from code in any way, don't generate its body + if (!is_really_used && G.pragma_remove_unused_functions.enabled()) { + return false; + } + // when a function is referenced like `var a = some_fn;` (or in some other non-call way), its continuation should exist + if (flags & flagUsedAsNonCall) { + return true; + } + // when a function f() is just `return anotherF(...args)`, it doesn't need to be codegenerated at all, + // since all its usages are inlined + return !is_just_wrapper_for_another_f(); + // in the future, we may want to implement a true AST inlining for `inline` functions also +} + +void SymValCodeFunc::set_code(CodeBlob* code) { + this->code = code; +} + +void SymValAsmFunc::set_code(std::vector code) { + this->ext_compile = make_ext_compile(std::move(code)); +} + + +static void generate_output_func(SymDef* func_sym) { + SymValCodeFunc* func_val = dynamic_cast(func_sym->value); + tolk_assert(func_val); + std::string name = G.symbols.get_name(func_sym->sym_idx); + if (G.is_verbosity(2)) { + std::cerr << "\n\n=========================\nfunction " << name << " : " << func_val->get_type() << std::endl; + } + if (!func_val->code) { + throw ParseError(func_sym->loc, "function `" + name + "` is just declared, not implemented"); + } else { + CodeBlob& code = *(func_val->code); + if (G.is_verbosity(3)) { + code.print(std::cerr, 9); + } + code.simplify_var_types(); + if (G.is_verbosity(5)) { + std::cerr << "after simplify_var_types: \n"; + code.print(std::cerr, 0); + } + code.prune_unreachable_code(); + if (G.is_verbosity(5)) { + std::cerr << "after prune_unreachable: \n"; + code.print(std::cerr, 0); + } + code.split_vars(true); + if (G.is_verbosity(5)) { + std::cerr << "after split_vars: \n"; + code.print(std::cerr, 0); + } + for (int i = 0; i < 8; i++) { + code.compute_used_code_vars(); + if (G.is_verbosity(4)) { + std::cerr << "after compute_used_vars: \n"; + code.print(std::cerr, 6); + } + code.fwd_analyze(); + if (G.is_verbosity(5)) { + std::cerr << "after fwd_analyze: \n"; + code.print(std::cerr, 6); + } + code.prune_unreachable_code(); + if (G.is_verbosity(5)) { + std::cerr << "after prune_unreachable: \n"; + code.print(std::cerr, 6); + } + } + code.mark_noreturn(); + if (G.is_verbosity(3)) { + code.print(std::cerr, 15); + } + if (G.is_verbosity(2)) { + std::cerr << "\n---------- resulting code for " << name << " -------------\n"; + } + const char* modifier = ""; + if (func_val->is_inline()) { + modifier = "INLINE"; + } else if (func_val->is_inline_ref()) { + modifier = "REF"; + } + std::cout << std::string(2, ' ') << name << " PROC" << modifier << ":<{\n"; + int mode = 0; + if (G.settings.stack_layout_comments) { + mode |= Stack::_StkCmt | Stack::_CptStkCmt; + } + if (func_val->is_inline() && code.ops->noreturn()) { + mode |= Stack::_InlineFunc; + } + if (func_val->is_inline() || func_val->is_inline_ref()) { + mode |= Stack::_InlineAny; + } + code.generate_code(std::cout, mode, 2); + std::cout << std::string(2, ' ') << "}>\n"; + if (G.is_verbosity(2)) { + std::cerr << "--------------\n"; + } + } +} + +void pipeline_generate_fif_output_to_std_cout() { + std::cout << "\"Asm.fif\" include\n"; + std::cout << "// automatically generated from " << G.generated_from << std::endl; + std::cout << "PROGRAM{\n"; + + for (SymDef* func_sym : G.all_code_functions) { + SymValCodeFunc* func_val = dynamic_cast(func_sym->value); + tolk_assert(func_val); + if (!func_val->does_need_codegen()) { + if (G.is_verbosity(2)) { + std::cerr << func_sym->name() << ": code not generated, function does not need codegen\n"; + } + continue; + } + + std::string name = G.symbols.get_name(func_sym->sym_idx); + std::cout << std::string(2, ' '); + if (func_val->method_id.is_null()) { + std::cout << "DECLPROC " << name << "\n"; + } else { + std::cout << func_val->method_id << " DECLMETHOD " << name << "\n"; + } + } + + for (SymDef* gvar_sym : G.all_global_vars) { + auto* glob_val = dynamic_cast(gvar_sym->value); + tolk_assert(glob_val); + if (!glob_val->is_really_used && G.pragma_remove_unused_functions.enabled()) { + if (G.is_verbosity(2)) { + std::cerr << gvar_sym->name() << ": variable not generated, it's unused\n"; + } + continue; + } + std::string name = G.symbols.get_name(gvar_sym->sym_idx); + std::cout << std::string(2, ' ') << "DECLGLOBVAR " << name << "\n"; + } + + for (SymDef* func_sym : G.all_code_functions) { + SymValCodeFunc* func_val = dynamic_cast(func_sym->value); + if (!func_val->does_need_codegen()) { + continue; + } + generate_output_func(func_sym); + } + + std::cout << "}END>c\n"; + if (!G.settings.boc_output_filename.empty()) { + std::cout << "boc>B \"" << G.settings.boc_output_filename << "\" B>file\n"; + } +} + +} // namespace tolk diff --git a/tolk/pipe-handle-pragmas.cpp b/tolk/pipe-handle-pragmas.cpp new file mode 100644 index 000000000..1b0cd7d3a --- /dev/null +++ b/tolk/pipe-handle-pragmas.cpp @@ -0,0 +1,140 @@ +/* + This file is part of TON Blockchain source code. + + TON Blockchain is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + TON Blockchain is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with TON Blockchain. If not, see . + + In addition, as a special exception, the copyright holders give permission + to link the code of portions of this program with the OpenSSL library. + You must obey the GNU General Public License in all respects for all + of the code used other than OpenSSL. If you modify file(s) with this + exception, you may extend this exception to your version of the file(s), + but you are not obligated to do so. If you do not wish to do so, delete this + exception statement from your version. If you delete this exception statement + from all source files in the program, then also delete it here. +*/ +#include "tolk.h" +#include "src-file.h" +#include "ast.h" +#include "compiler-state.h" +#include "td/utils/misc.h" + +namespace tolk { + +static void handle_pragma_no_arg(V v) { + std::string_view pragma_name = v->pragma_name; + if (pragma_name == G.pragma_allow_post_modification.name()) { + G.pragma_allow_post_modification.enable(v->loc); + } else if (pragma_name == G.pragma_compute_asm_ltr.name()) { + G.pragma_compute_asm_ltr.enable(v->loc); + } else if (pragma_name == G.pragma_remove_unused_functions.name()) { + G.pragma_remove_unused_functions.enable(v->loc); + } else { + v->error("unknown pragma name"); + } +} + +static void handle_pragma_version(V v) { + char op = '='; + bool eq = false; + TokenType cmp_tok = v->cmp_tok; + if (cmp_tok == tok_gt || cmp_tok == tok_geq) { + op = '>'; + eq = cmp_tok == tok_geq; + } else if (cmp_tok == tok_lt || cmp_tok == tok_leq) { + op = '<'; + eq = cmp_tok == tok_leq; + } else if (cmp_tok == tok_eq) { + op = '='; + } else if (cmp_tok == tok_bitwise_xor) { + op = '^'; + } else { + v->error("invalid comparison operator"); + } + std::string_view pragma_value = v->semver; + int sem_ver[3] = {0, 0, 0}; + char segs = 1; + auto stoi = [&](std::string_view s) { + auto R = td::to_integer_safe(static_cast(s)); + if (R.is_error()) { + v->error("invalid semver format"); + } + return R.move_as_ok(); + }; + std::istringstream iss_value(static_cast(pragma_value)); + for (int idx = 0; idx < 3; idx++) { + std::string s{"0"}; + std::getline(iss_value, s, '.'); + sem_ver[idx] = stoi(s); + } + // End reading semver from source code + int tolk_ver[3] = {0, 0, 0}; + std::istringstream iss(tolk_version); + for (int idx = 0; idx < 3; idx++) { + std::string s; + std::getline(iss, s, '.'); + tolk_ver[idx] = stoi(s); + } + // End parsing embedded semver + bool match = true; + switch (op) { + case '=': + if ((tolk_ver[0] != sem_ver[0]) || (tolk_ver[1] != sem_ver[1]) || (tolk_ver[2] != sem_ver[2])) { + match = false; + } + break; + case '>': + if (((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] == sem_ver[1]) && (tolk_ver[2] == sem_ver[2]) && !eq) || + ((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] == sem_ver[1]) && (tolk_ver[2] < sem_ver[2])) || + ((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] < sem_ver[1])) || ((tolk_ver[0] < sem_ver[0]))) { + match = false; + } + break; + case '<': + if (((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] == sem_ver[1]) && (tolk_ver[2] == sem_ver[2]) && !eq) || + ((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] == sem_ver[1]) && (tolk_ver[2] > sem_ver[2])) || + ((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] > sem_ver[1])) || ((tolk_ver[0] > sem_ver[0]))) { + match = false; + } + break; + case '^': + if (((segs == 3) && + ((tolk_ver[0] != sem_ver[0]) || (tolk_ver[1] != sem_ver[1]) || (tolk_ver[2] < sem_ver[2]))) || + ((segs == 2) && ((tolk_ver[0] != sem_ver[0]) || (tolk_ver[1] < sem_ver[1]))) || + ((segs == 1) && ((tolk_ver[0] < sem_ver[0])))) { + match = false; + } + break; + default: + tolk_assert(false); + } + if (!match) { + v->error("Tolk version " + tolk_version + " does not satisfy this condition"); + } +} + +void pipeline_handle_pragmas(const AllSrcFiles& all_src_files) { + for (const SrcFile* file : all_src_files) { + tolk_assert(file->ast); + + for (AnyV v : file->ast->as()->get_toplevel_declarations()) { + if (auto v_no_arg = v->try_as()) { + handle_pragma_no_arg(v_no_arg); + } else if (auto v_version = v->try_as()) { + handle_pragma_version(v_version); + } + } + } +} + +} // namespace tolk diff --git a/tolk/pipe-register-symbols.cpp b/tolk/pipe-register-symbols.cpp new file mode 100644 index 000000000..792037a74 --- /dev/null +++ b/tolk/pipe-register-symbols.cpp @@ -0,0 +1,402 @@ +/* + This file is part of TON Blockchain source code. + + TON Blockchain is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + TON Blockchain is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with TON Blockchain. If not, see . + + In addition, as a special exception, the copyright holders give permission + to link the code of portions of this program with the OpenSSL library. + You must obey the GNU General Public License in all respects for all + of the code used other than OpenSSL. If you modify file(s) with this + exception, you may extend this exception to your version of the file(s), + but you are not obligated to do so. If you do not wish to do so, delete this + exception statement from your version. If you delete this exception statement + from all source files in the program, then also delete it here. +*/ +#include "tolk.h" +#include "platform-utils.h" +#include "src-file.h" +#include "ast.h" +#include "compiler-state.h" +#include "td/utils/crypto.h" +#include + +namespace tolk { + +Expr* process_expr(AnyV v, CodeBlob& code, bool nv = false); + +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_redefinition_of_symbol(V v_ident, SymDef* existing) { + if (existing->loc.is_stdlib()) { + v_ident->error("redefinition of a symbol from stdlib"); + } else if (existing->loc.is_defined()) { + v_ident->error("redefinition of symbol, previous was at: " + existing->loc.to_string()); + } else { + v_ident->error("redefinition of built-in symbol"); + } +} + +static int calc_sym_idx(std::string_view sym_name) { + return G.symbols.lookup_add(sym_name); +} + +static td::RefInt256 calculate_method_id_by_func_name(std::string_view func_name) { + unsigned int crc = td::crc16(static_cast(func_name)); + return td::make_refint((crc & 0xffff) | 0x10000); +} + +static bool is_argument_of_function(AnyV v_variable, V v_func) { + return v_variable->type == ast_identifier && v_func->get_arg_list()->lookup_idx(v_variable->as()->name) != -1; +} + +// if a function looks like `T f(...args) { return anotherF(...args); }`, +// set a bit to flags +// then, all calls to `f(...)` will be effectively replaced with `anotherF(...)` +// todo this function (and optimization) was done before implementing AST, but after AST and registering symbols in advance, +// its behavior became a bit wrong: if anotherF is declared before f, than it's detected here, but still not inlined, +// since inlining is done is legacy code, using Expr +// in the future, inlining should be done on AST level, but it's impossible until all names resolving (including scopes) +// is also done on AST level +// in the future, when working on AST level, inlining should become much more powerful +// (for instance, it should inline `return anotherF(constants)`, etc.) +static bool detect_if_function_just_wraps_another(V v) { + if (v->method_id || v->marked_as_get_method || v->marked_as_inline_ref || v->ret_type->has_unknown_inside()) { + return false; + } + for (int i = 0; i < v->get_num_args(); ++i) { + if (v->get_arg(i)->arg_type->get_width() != 1) { + return false; // avoid situations like `f(int a, (int,int) b)`, inlining will be cumbersome + } + } + + auto v_body = v->get_body()->try_as(); + if (!v_body || v_body->size() != 1 || v_body->get_item(0)->type != ast_return_statement) { + return false; + } + + auto v_return = v_body->get_item(0)->as(); + auto v_anotherF = v_return->get_return_value()->try_as(); + if (!v_anotherF) { + return false; + } + + // todo simplify when removing ability of calling a function without parentheses + AnyV called_arg = v_anotherF->get_called_arg(); + bool ok_arg = called_arg->type == ast_tensor || called_arg->type == ast_parenthesized_expr; + if (!ok_arg || v_anotherF->get_called_f()->type != ast_identifier) { + return false; + } + + std::string_view called_name = v_anotherF->get_called_f()->try_as()->name; + std::string_view function_name = v->get_identifier()->name; + + if (called_arg->type == ast_tensor) { + const std::vector& v_arg_items = called_arg->as()->get_items(); + std::set used_args; + for (AnyV v_arg : v_arg_items) { + if (!is_argument_of_function(v_arg, v)) { + return false; + } + used_args.emplace(v_arg->as()->name); + } + if (used_args.size() != v->get_num_args() || used_args.size() != v_arg_items.size()) { + return false; + } + } else if (called_arg->type == ast_parenthesized_expr) { + AnyV v_arg = called_arg->as()->get_expr(); + if (!is_argument_of_function(v_arg, v)) { + return false; + } + } + + if (function_name == "main" || function_name == "recv_internal" || function_name == "recv_external" || + function_name == "run_ticktock" || function_name == "split_prepare" || function_name == "split_install") { + return false; + } + + // ok, f_current is a wrapper + if (G.is_verbosity(2)) { + std::cerr << function_name << " -> " << called_name << std::endl; + } + return true; +} + +static void calc_arg_ret_order_of_asm_function(V v_body, V arg_list, TypeExpr* ret_type, + std::vector& arg_order, std::vector& ret_order) { + int cnt = arg_list->size(); + int width = ret_type->get_width(); + if (width < 0 || width > 16) { + v_body->error("return type of an assembler built-in function must have a well-defined fixed width"); + } + if (cnt > 16) { + v_body->error("assembler built-in function must have at most 16 arguments"); + } + std::vector cum_arg_width; + cum_arg_width.push_back(0); + int tot_width = 0; + for (int i = 0; i < cnt; ++i) { + V arg = arg_list->get_arg(i); + int arg_width = arg->arg_type->get_width(); + if (arg_width < 0 || arg_width > 16) { + arg->error("parameters of an assembler built-in function must have a well-defined fixed width"); + } + cum_arg_width.push_back(tot_width += arg_width); + } + if (!v_body->arg_order.empty()) { + if (static_cast(v_body->arg_order.size()) != cnt) { + v_body->error("arg_order of asm function must specify all arguments"); + } + std::vector visited(cnt, false); + for (int i = 0; i < cnt; ++i) { + int j = v_body->arg_order[i]; + if (visited[j]) { + v_body->error("arg_order of asm function contains duplicates"); + } + visited[j] = true; + int c1 = cum_arg_width[j], c2 = cum_arg_width[j + 1]; + while (c1 < c2) { + arg_order.push_back(c1++); + } + } + tolk_assert(arg_order.size() == (unsigned)tot_width); + } + if (!v_body->ret_order.empty()) { + if (static_cast(v_body->ret_order.size()) != width) { + v_body->error("ret_order of this asm function expected to be width = " + std::to_string(width)); + } + std::vector visited(width, false); + for (int i = 0; i < width; ++i) { + int j = v_body->ret_order[i]; + if (j < 0 || j >= width || visited[j]) { + v_body->error("ret_order contains invalid integer, not in range 0 .. width-1"); + } + visited[j] = true; + } + ret_order = v_body->ret_order; + } +} + +static void register_constant(V v) { + AnyV init_value = v->get_init_value(); + SymDef* sym_def = define_global_symbol(calc_sym_idx(v->get_identifier()->name), v->loc); + if (sym_def->value) { + fire_error_redefinition_of_symbol(v->get_identifier(), sym_def); + } + + // todo currently, constant value calculation is dirty and roughly: init_value is evaluated to fif code + // and waited to be a single expression + // although it works, of course it should be later rewritten using AST calculations, as well as lots of other parts + CodeBlob code("tmp", v->loc, nullptr); + Expr* x = process_expr(init_value, code, false); + if (!x->is_rvalue()) { + v->get_init_value()->error("expression is not strictly Rvalue"); + } + if (v->declared_type && !v->declared_type->equals_to(x->e_type)) { + v->error("expression type does not match declared type"); + } + SymValConst* sym_val = nullptr; + if (x->cls == Expr::_Const) { // Integer constant + sym_val = new SymValConst{static_cast(G.all_constants.size()), x->intval}; + } else if (x->cls == Expr::_SliceConst) { // Slice constant (string) + sym_val = new SymValConst{static_cast(G.all_constants.size()), x->strval}; + } else if (x->cls == Expr::_Apply) { // even "1 + 2" is Expr::_Apply (it applies `_+_`) + code.emplace_back(v->loc, Op::_Import, std::vector()); + auto tmp_vars = x->pre_compile(code); + code.emplace_back(v->loc, Op::_Return, std::move(tmp_vars)); + code.emplace_back(v->loc, Op::_Nop); + // It is REQUIRED to execute "optimizations" as in tolk.cpp + code.simplify_var_types(); + code.prune_unreachable_code(); + code.split_vars(true); + for (int i = 0; i < 16; i++) { + code.compute_used_code_vars(); + code.fwd_analyze(); + code.prune_unreachable_code(); + } + code.mark_noreturn(); + AsmOpList out_list(0, &code.vars); + code.generate_code(out_list); + if (out_list.list_.size() != 1) { + init_value->error("precompiled expression must result in single operation"); + } + auto op = out_list.list_[0]; + if (!op.is_const()) { + init_value->error("precompiled expression must result in compilation time constant"); + } + if (op.origin.is_null() || !op.origin->is_valid()) { + init_value->error("precompiled expression did not result in a valid integer constant"); + } + sym_val = new SymValConst{static_cast(G.all_constants.size()), op.origin}; + } else { + init_value->error("integer or slice literal or constant expected"); + } + + sym_def->value = sym_val; +#ifdef TOLK_DEBUG + dynamic_cast(sym_def->value)->name = v->get_identifier()->name; +#endif + G.all_constants.push_back(sym_def); +} + +static void register_global_var(V v) { + SymDef* sym_def = define_global_symbol(calc_sym_idx(v->get_identifier()->name), v->loc); + if (sym_def->value) { + fire_error_redefinition_of_symbol(v->get_identifier(), sym_def); + } + + sym_def->value = new SymValGlobVar{static_cast(G.all_global_vars.size()), v->declared_type}; +#ifdef TOLK_DEBUG + dynamic_cast(sym_def->value)->name = v->get_identifier()->name; +#endif + G.all_global_vars.push_back(sym_def); +} + +static void register_function(V v) { + std::string_view func_name = v->get_identifier()->name; + + // calculate TypeExpr of a function: it's a map (args -> ret), probably surrounded by forall + TypeExpr* func_type = nullptr; + if (int n_args = v->get_num_args()) { + std::vector arg_types; + arg_types.reserve(n_args); + for (int idx = 0; idx < n_args; ++idx) { + arg_types.emplace_back(v->get_arg(idx)->arg_type); + } + func_type = TypeExpr::new_map(TypeExpr::new_tensor(std::move(arg_types)), v->ret_type); + } else { + func_type = TypeExpr::new_map(TypeExpr::new_unit(), v->ret_type); + } + if (v->forall_list) { + std::vector type_vars; + type_vars.reserve(v->forall_list->size()); + for (int idx = 0; idx < v->forall_list->size(); ++idx) { + type_vars.emplace_back(v->forall_list->get_item(idx)->created_type); + } + func_type = TypeExpr::new_forall(std::move(type_vars), func_type); + } + if (v->marked_as_builtin) { + const SymDef* builtin_func = lookup_symbol(G.symbols.lookup(func_name)); + const SymValFunc* func_val = builtin_func ? dynamic_cast(builtin_func->value) : nullptr; + if (!func_val || !func_val->is_builtin()) { + v->error("`builtin` used for non-builtin function"); + } +#ifdef TOLK_DEBUG + // in release, we don't need this check, since `builtin` is used only in stdlib.tolk, which is our responsibility + if (!func_val->sym_type->equals_to(func_type) || func_val->is_marked_as_pure() != v->marked_as_pure) { + v->error("declaration for `builtin` function doesn't match an actual one"); + } +#endif + return; + } + + SymDef* sym_def = define_global_symbol(calc_sym_idx(func_name), v->loc); + if (sym_def->value) { + fire_error_redefinition_of_symbol(v->get_identifier(), sym_def); + } + if (G.is_verbosity(1)) { + std::cerr << "fun " << func_name << " : " << func_type << std::endl; + } + if (v->marked_as_pure && v->ret_type->get_width() == 0) { + v->error("a pure function should return something, otherwise it will be optimized out anyway"); + } + + SymValFunc* sym_val = nullptr; + if (const auto* v_seq = v->get_body()->try_as()) { + sym_val = new SymValCodeFunc{static_cast(G.all_code_functions.size()), func_type, v->marked_as_pure}; + } else if (const auto* v_asm = v->get_body()->try_as()) { + std::vector arg_order, ret_order; + calc_arg_ret_order_of_asm_function(v_asm, v->get_arg_list(), v->ret_type, arg_order, ret_order); + sym_val = new SymValAsmFunc{func_type, std::move(arg_order), std::move(ret_order), v->marked_as_pure}; + } else { + v->error("Unexpected function body statement"); + } + + if (v->method_id) { + sym_val->method_id = td::string_to_int256(static_cast(v->method_id->int_val)); + if (sym_val->method_id.is_null()) { + v->method_id->error("invalid integer constant"); + } + } else if (v->marked_as_get_method) { + sym_val->method_id = calculate_method_id_by_func_name(func_name); + for (const SymDef* other : G.all_get_methods) { + if (!td::cmp(dynamic_cast(other->value)->method_id, sym_val->method_id)) { + v->error(PSTRING() << "GET methods hash collision: `" << other->name() << "` and `" << static_cast(func_name) << "` produce the same hash. Consider renaming one of these functions."); + } + } + } + if (v->marked_as_inline) { + sym_val->flags |= SymValFunc::flagInline; + } + if (v->marked_as_inline_ref) { + sym_val->flags |= SymValFunc::flagInlineRef; + } + if (v->marked_as_get_method) { + sym_val->flags |= SymValFunc::flagGetMethod; + } + if (detect_if_function_just_wraps_another(v)) { + sym_val->flags |= SymValFunc::flagWrapsAnotherF; + } + + sym_def->value = sym_val; +#ifdef TOLK_DEBUG + dynamic_cast(sym_def->value)->name = func_name; +#endif + if (dynamic_cast(sym_val)) { + G.all_code_functions.push_back(sym_def); + } + if (sym_val->is_get_method()) { + G.all_get_methods.push_back(sym_def); + } +} + +static void iterate_through_file_symbols(const SrcFile* file) { + static std::unordered_set seen; + if (!seen.insert(file).second) { + return; + } + tolk_assert(file && file->ast); + + for (AnyV v : file->ast->as()->get_toplevel_declarations()) { + switch (v->type) { + case ast_include_statement: + // on `import "another-file.tolk"`, register symbols from that file at first + // (for instance, it can calculate constants, which are used in init_val of constants in current file below import) + iterate_through_file_symbols(v->as()->file); + break; + + case ast_constant_declaration_list: + for (AnyV v_decl : v->as()->get_declarations()) { + register_constant(v_decl->as()); + } + break; + case ast_global_var_declaration_list: + for (AnyV v_decl : v->as()->get_declarations()) { + register_global_var(v_decl->as()); + } + break; + case ast_function_declaration: + register_function(v->as()); + break; + default: + break; + } + } +} + +void pipeline_register_global_symbols(const AllSrcFiles& all_src_files) { + for (const SrcFile* file : all_src_files) { + iterate_through_file_symbols(file); + } +} + +} // namespace tolk diff --git a/tolk/pipeline.h b/tolk/pipeline.h new file mode 100644 index 000000000..b00816349 --- /dev/null +++ b/tolk/pipeline.h @@ -0,0 +1,42 @@ +/* + This file is part of TON Blockchain source code. + + TON Blockchain is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + TON Blockchain is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with TON Blockchain. If not, see . + + In addition, as a special exception, the copyright holders give permission + to link the code of portions of this program with the OpenSSL library. + You must obey the GNU General Public License in all respects for all + of the code used other than OpenSSL. If you modify file(s) with this + exception, you may extend this exception to your version of the file(s), + but you are not obligated to do so. If you do not wish to do so, delete this + exception statement from your version. If you delete this exception statement + from all source files in the program, then also delete it here. +*/ +#pragma once + +#include "src-file.h" +#include + +namespace tolk { + +AllSrcFiles pipeline_discover_and_parse_sources(const std::string& stdlib_filename, const std::string& entrypoint_filename); + +void pipeline_handle_pragmas(const AllSrcFiles&); +void pipeline_register_global_symbols(const AllSrcFiles&); +void pipeline_convert_ast_to_legacy_Expr_Op(const AllSrcFiles&); + +void pipeline_find_unused_symbols(); +void pipeline_generate_fif_output_to_std_cout(); + +} // namespace tolk diff --git a/tolk/src-file.cpp b/tolk/src-file.cpp index 3384d3d5e..b6c7e2d4a 100644 --- a/tolk/src-file.cpp +++ b/tolk/src-file.cpp @@ -17,6 +17,7 @@ #include "src-file.h" #include "compiler-state.h" #include +#include namespace tolk { @@ -40,12 +41,51 @@ SrcFile* AllRegisteredSrcFiles::find_file(const std::string& abs_filename) const return nullptr; } -SrcFile* AllRegisteredSrcFiles::register_file(const std::string& rel_filename, const std::string& abs_filename, std::string&& text) { - SrcFile* created = new SrcFile(++last_file_id, rel_filename, abs_filename, std::move(text)); +SrcFile* AllRegisteredSrcFiles::locate_and_register_source_file(const std::string& rel_filename, SrcLocation included_from) { + td::Result path = G.settings.read_callback(CompilerSettings::FsReadCallbackKind::Realpath, rel_filename.c_str()); + if (path.is_error()) { + if (included_from.is_defined()) { + throw ParseError(included_from, "Failed to import: " + path.move_as_error().message().str()); + } + throw Fatal("Failed to locate " + rel_filename + ": " + path.move_as_error().message().str()); + } + + std::string abs_filename = path.move_as_ok(); + if (SrcFile* file = find_file(abs_filename)) { + return file; + } + + td::Result text = G.settings.read_callback(CompilerSettings::FsReadCallbackKind::ReadFile, abs_filename.c_str()); + if (text.is_error()) { + if (included_from.is_defined()) { + throw ParseError(included_from, "Failed to import: " + text.move_as_error().message().str()); + } + throw Fatal("Failed to read " + rel_filename + ": " + text.move_as_error().message().str()); + } + + SrcFile* created = new SrcFile(++last_registered_file_id, rel_filename, std::move(abs_filename), text.move_as_ok()); + if (G.is_verbosity(1)) { + std::cerr << "register file_id " << created->file_id << " " << created->abs_filename << std::endl; + } all_src_files.push_back(created); return created; } +SrcFile* AllRegisteredSrcFiles::get_next_unparsed_file() { + if (last_parsed_file_id >= last_registered_file_id) { + return nullptr; + } + return all_src_files[++last_parsed_file_id]; +} + +AllSrcFiles AllRegisteredSrcFiles::get_all_files() const { + AllSrcFiles src_files_immutable; + src_files_immutable.reserve(all_src_files.size()); + for (const SrcFile* file : all_src_files) { + src_files_immutable.push_back(file); + } + return src_files_immutable; +} bool SrcFile::is_offset_valid(int offset) const { return offset >= 0 && offset < static_cast(text.size()); @@ -119,6 +159,12 @@ void SrcLocation::show_context(std::ostream& os) const { os << '^' << "\n"; } +std::string SrcLocation::to_string() const { + std::ostringstream os; + show(os); + return os.str(); +} + std::ostream& operator<<(std::ostream& os, SrcLocation loc) { loc.show(os); return os; diff --git a/tolk/src-file.h b/tolk/src-file.h index 56395571f..9eaf3a67f 100644 --- a/tolk/src-file.h +++ b/tolk/src-file.h @@ -21,6 +21,8 @@ namespace tolk { +struct ASTNodeBase; + struct SrcFile { struct SrcPosition { int offset; @@ -36,8 +38,8 @@ struct SrcFile { int file_id; // an incremental counter through all parsed files std::string rel_filename; // relative to cwd std::string abs_filename; // absolute from root - std::string text; // file contents loaded into memory, Token::str_val points into it - bool was_parsed = false; // to prevent double parsing when a file is imported multiple times + std::string text; // file contents loaded into memory, every Token::str_val points inside it + const ASTNodeBase* ast = nullptr; // when a file has been parsed, its ast_tolk_file is kept here std::vector imports; // to check strictness (can't use a symbol without importing its file) SrcFile(int file_id, std::string rel_filename, std::string abs_filename, std::string&& text) @@ -56,16 +58,6 @@ struct SrcFile { SrcPosition convert_offset(int offset) const; }; -class AllRegisteredSrcFiles { - std::vector all_src_files; - int last_file_id = -1; - -public: - SrcFile *find_file(int file_id) const; - SrcFile* find_file(const std::string& abs_filename) const; - SrcFile* register_file(const std::string& rel_filename, const std::string& abs_filename, std::string&& text); - const std::vector& get_all_files() const { return all_src_files; } -}; // SrcLocation points to a location (line, column) in some loaded .tolk source SrcFile. // Note, that instead of storing src_file, line_no, etc., only 2 ints are stored. @@ -84,6 +76,7 @@ class SrcLocation { } bool is_defined() const { return file_id != -1; } + bool is_stdlib() const { return file_id == 0; } const SrcFile* get_src_file() const; // similar to `this->get_src_file() == symbol->get_src_file() || symbol->get_src_file()->is_stdlib()` @@ -94,6 +87,7 @@ class SrcLocation { void show(std::ostream& os) const; void show_context(std::ostream& os) const; + std::string to_string() const; void show_general_error(std::ostream& os, const std::string& message, const std::string& err_type) const; void show_note(const std::string& err_msg) const; @@ -103,6 +97,23 @@ class SrcLocation { std::ostream& operator<<(std::ostream& os, SrcLocation loc); +using AllSrcFiles = std::vector; + +class AllRegisteredSrcFiles { + std::vector all_src_files; + int last_registered_file_id = -1; + int last_parsed_file_id = -1; + +public: + SrcFile *find_file(int file_id) const; + SrcFile* find_file(const std::string& abs_filename) const; + + SrcFile* locate_and_register_source_file(const std::string& rel_filename, SrcLocation included_from); + SrcFile* get_next_unparsed_file(); + + AllSrcFiles get_all_files() const; +}; + struct Fatal final : std::exception { std::string message; diff --git a/tolk/symtable.cpp b/tolk/symtable.cpp index 9463dbbde..f8f64c50f 100644 --- a/tolk/symtable.cpp +++ b/tolk/symtable.cpp @@ -114,19 +114,16 @@ SymDef* lookup_symbol(sym_idx_t idx) { return nullptr; } -SymDef* define_global_symbol(sym_idx_t name_idx, bool force_new, SrcLocation loc) { - if (!name_idx) { - return nullptr; - } - auto found = G.global_sym_def[name_idx]; - if (found) { - return force_new && found->value ? nullptr : found; +SymDef* define_global_symbol(sym_idx_t name_idx, SrcLocation loc) { + if (SymDef* found = G.global_sym_def[name_idx]) { + return found; // found->value is filled; it means, that a symbol is redefined } - found = G.global_sym_def[name_idx] = new SymDef(0, name_idx, loc); + + SymDef* registered = G.global_sym_def[name_idx] = new SymDef(0, name_idx, loc); #ifdef TOLK_DEBUG - found->sym_name = found->name(); + registered->sym_name = registered->name(); #endif - return found; + return registered; // registered->value is nullptr; it means, it's just created } SymDef* define_symbol(sym_idx_t name_idx, bool force_new, SrcLocation loc) { @@ -134,7 +131,7 @@ SymDef* define_symbol(sym_idx_t name_idx, bool force_new, SrcLocation loc) { return nullptr; } if (!G.scope_level) { - return define_global_symbol(name_idx, force_new, loc); + throw Fatal("unexpected scope_level = 0"); } auto found = G.sym_def[name_idx]; if (found) { diff --git a/tolk/symtable.h b/tolk/symtable.h index 243437d39..75b0aa2f2 100644 --- a/tolk/symtable.h +++ b/tolk/symtable.h @@ -97,7 +97,7 @@ void open_scope(SrcLocation loc); void close_scope(); SymDef* lookup_symbol(sym_idx_t idx); -SymDef* define_global_symbol(sym_idx_t name_idx, bool force_new = false, SrcLocation loc = {}); +SymDef* define_global_symbol(sym_idx_t name_idx, SrcLocation loc = {}); SymDef* define_symbol(sym_idx_t name_idx, bool force_new, SrcLocation loc); } // namespace tolk diff --git a/tolk/tolk-wasm.cpp b/tolk/tolk-wasm.cpp index 5add279bc..81953f798 100644 --- a/tolk/tolk-wasm.cpp +++ b/tolk/tolk-wasm.cpp @@ -40,18 +40,18 @@ td::Result compile_internal(char *config_json) { TRY_RESULT(opt_level, td::get_json_object_int_field(config, "optimizationLevel", true, 2)); TRY_RESULT(stdlib_tolk, td::get_json_object_string_field(config, "stdlibLocation", false)); TRY_RESULT(stack_comments, td::get_json_object_bool_field(config, "withStackComments", true, false)); - TRY_RESULT(entrypoint_file_name, td::get_json_object_string_field(config, "entrypointFileName", false)); + TRY_RESULT(entrypoint_filename, td::get_json_object_string_field(config, "entrypointFileName", false)); G.settings.verbosity = 0; G.settings.optimization_level = std::max(0, opt_level); G.settings.stdlib_filename = stdlib_tolk; G.settings.stack_layout_comments = stack_comments; - G.settings.entrypoint_filename = entrypoint_file_name; + G.settings.entrypoint_filename = entrypoint_filename; std::ostringstream outs, errs; std::cout.rdbuf(outs.rdbuf()); std::cerr.rdbuf(errs.rdbuf()); - int tolk_res = tolk::tolk_proceed(entrypoint_file_name); + int tolk_res = tolk::tolk_proceed(entrypoint_filename); if (tolk_res != 0) { return td::Status::Error("Tolk compilation error: " + errs.str()); } diff --git a/tolk/tolk.cpp b/tolk/tolk.cpp index 044d62f07..0a0cf144d 100644 --- a/tolk/tolk.cpp +++ b/tolk/tolk.cpp @@ -24,264 +24,37 @@ from all source files in the program, then also delete it here. */ #include "tolk.h" +#include "pipeline.h" #include "compiler-state.h" #include "lexer.h" -#include -#include "ast-from-tokens.h" -#include "ast-to-legacy.h" -#include -#include "td/utils/port/path.h" -#include +#include "ast.h" namespace tolk { -// returns argument type of a function -// note, that when a function has multiple arguments, its arg type is a tensor (no arguments — an empty tensor) -// in other words, `f(int a, int b)` and `f((int,int) ab)` is the same when we speak about types -const TypeExpr *SymValFunc::get_arg_type() const { - if (!sym_type) - return nullptr; - tolk_assert(sym_type->constr == TypeExpr::te_Map || sym_type->constr == TypeExpr::te_ForAll); - const TypeExpr *te_map = sym_type->constr == TypeExpr::te_ForAll ? sym_type->args[0] : sym_type; - const TypeExpr *arg_type = te_map->args[0]; - - while (arg_type->constr == TypeExpr::te_Indirect) { - arg_type = arg_type->args[0]; - } - return arg_type; -} - - -bool SymValCodeFunc::does_need_codegen() const { - // when a function is declared, but not referenced from code in any way, don't generate its body - if (!is_really_used && G.pragma_remove_unused_functions.enabled()) { - return false; - } - // when a function is referenced like `var a = some_fn;` (or in some other non-call way), its continuation should exist - if (flags & flagUsedAsNonCall) { - return true; - } - // when a function f() is just `return anotherF(...args)`, it doesn't need to be codegenerated at all, - // since all its usages are inlined - return !is_just_wrapper_for_another_f(); - // in the future, we may want to implement a true AST inlining for `inline` functions also -} - -void mark_function_used_dfs(const std::unique_ptr& op); - -void mark_function_used(SymValCodeFunc* func_val) { - if (!func_val->code || func_val->is_really_used) { // already handled - return; - } - - func_val->is_really_used = true; - mark_function_used_dfs(func_val->code->ops); -} - -void mark_global_var_used(SymValGlobVar* glob_val) { - glob_val->is_really_used = true; -} - -void mark_function_used_dfs(const std::unique_ptr& op) { - if (!op) { - return; - } - // op->fun_ref, despite its name, may actually ref global var - // note, that for non-calls, e.g. `var a = some_fn` (Op::_Let), some_fn is Op::_GlobVar - // (in other words, fun_ref exists not only for direct Op::_Call, but for non-call references also) - if (op->fun_ref) { - if (auto* func_val = dynamic_cast(op->fun_ref->value)) { - mark_function_used(func_val); - } else if (auto* glob_val = dynamic_cast(op->fun_ref->value)) { - mark_global_var_used(glob_val); - } else if (auto* asm_val = dynamic_cast(op->fun_ref->value)) { - } else { - tolk_assert(false); - } - } - mark_function_used_dfs(op->next); - mark_function_used_dfs(op->block0); - mark_function_used_dfs(op->block1); -} - -void mark_used_symbols() { - for (SymDef* func_sym : G.glob_func) { - auto* func_val = dynamic_cast(func_sym->value); - std::string name = G.symbols.get_name(func_sym->sym_idx); - if (func_val->method_id.not_null() || - name == "main" || name == "recv_internal" || name == "recv_external" || - name == "run_ticktock" || name == "split_prepare" || name == "split_install") { - mark_function_used(func_val); - } - } -} - -/* - * - * OUTPUT CODE GENERATOR - * - */ - -void generate_output_func(SymDef* func_sym) { - SymValCodeFunc* func_val = dynamic_cast(func_sym->value); - tolk_assert(func_val); - std::string name = G.symbols.get_name(func_sym->sym_idx); - if (G.is_verbosity(2)) { - std::cerr << "\n\n=========================\nfunction " << name << " : " << func_val->get_type() << std::endl; - } - if (!func_val->code) { - throw ParseError(func_sym->loc, "function `" + name + "` is just declared, not implemented"); - } else { - CodeBlob& code = *(func_val->code); - if (G.is_verbosity(3)) { - code.print(std::cerr, 9); - } - code.simplify_var_types(); - if (G.is_verbosity(5)) { - std::cerr << "after simplify_var_types: \n"; - code.print(std::cerr, 0); - } - code.prune_unreachable_code(); - if (G.is_verbosity(5)) { - std::cerr << "after prune_unreachable: \n"; - code.print(std::cerr, 0); - } - code.split_vars(true); - if (G.is_verbosity(5)) { - std::cerr << "after split_vars: \n"; - code.print(std::cerr, 0); - } - for (int i = 0; i < 8; i++) { - code.compute_used_code_vars(); - if (G.is_verbosity(4)) { - std::cerr << "after compute_used_vars: \n"; - code.print(std::cerr, 6); - } - code.fwd_analyze(); - if (G.is_verbosity(5)) { - std::cerr << "after fwd_analyze: \n"; - code.print(std::cerr, 6); - } - code.prune_unreachable_code(); - if (G.is_verbosity(5)) { - std::cerr << "after prune_unreachable: \n"; - code.print(std::cerr, 6); - } - } - code.mark_noreturn(); - if (G.is_verbosity(3)) { - code.print(std::cerr, 15); - } - if (G.is_verbosity(2)) { - std::cerr << "\n---------- resulting code for " << name << " -------------\n"; - } - const char* modifier = ""; - if (func_val->is_inline()) { - modifier = "INLINE"; - } else if (func_val->is_inline_ref()) { - modifier = "REF"; - } - std::cout << std::string(2, ' ') << name << " PROC" << modifier << ":<{\n"; - int mode = 0; - if (G.settings.stack_layout_comments) { - mode |= Stack::_StkCmt | Stack::_CptStkCmt; - } - if (func_val->is_inline() && code.ops->noreturn()) { - mode |= Stack::_InlineFunc; - } - if (func_val->is_inline() || func_val->is_inline_ref()) { - mode |= Stack::_InlineAny; - } - code.generate_code(std::cout, mode, 2); - std::cout << std::string(2, ' ') << "}>\n"; - if (G.is_verbosity(2)) { - std::cerr << "--------------\n"; - } - } -} - -// this function either throws or successfully prints whole program output to std::cout -void generate_output() { - std::cout << "\"Asm.fif\" include\n"; - std::cout << "// automatically generated from " << G.generated_from << std::endl; - std::cout << "PROGRAM{\n"; - mark_used_symbols(); - - for (SymDef* func_sym : G.glob_func) { - SymValCodeFunc* func_val = dynamic_cast(func_sym->value); - tolk_assert(func_val); - if (!func_val->does_need_codegen()) { - if (G.is_verbosity(2)) { - std::cerr << func_sym->name() << ": code not generated, function does not need codegen\n"; - } - continue; - } - - std::string name = G.symbols.get_name(func_sym->sym_idx); - std::cout << std::string(2, ' '); - if (func_val->method_id.is_null()) { - std::cout << "DECLPROC " << name << "\n"; - } else { - std::cout << func_val->method_id << " DECLMETHOD " << name << "\n"; - } - } - - for (SymDef* gvar_sym : G.glob_vars) { - auto* glob_val = dynamic_cast(gvar_sym->value); - tolk_assert(glob_val); - if (!glob_val->is_really_used && G.pragma_remove_unused_functions.enabled()) { - if (G.is_verbosity(2)) { - std::cerr << gvar_sym->name() << ": variable not generated, it's unused\n"; - } - continue; - } - std::string name = G.symbols.get_name(gvar_sym->sym_idx); - std::cout << std::string(2, ' ') << "DECLGLOBVAR " << name << "\n"; - } - - for (SymDef* func_sym : G.glob_func) { - SymValCodeFunc* func_val = dynamic_cast(func_sym->value); - if (!func_val->does_need_codegen()) { - continue; - } - generate_output_func(func_sym); - } - - std::cout << "}END>c\n"; - if (!G.settings.boc_output_filename.empty()) { - std::cout << "boc>B \"" << G.settings.boc_output_filename << "\" B>file\n"; - } -} - - -int tolk_proceed(const std::string &entrypoint_file_name) { +int tolk_proceed(const std::string &entrypoint_filename) { define_builtins(); lexer_init(); G.pragma_allow_post_modification.always_on_and_deprecated("0.5.0"); G.pragma_compute_asm_ltr.always_on_and_deprecated("0.5.0"); try { - { - if (G.settings.stdlib_filename.empty()) { - throw Fatal("stdlib filename not specified"); - } - td::Result locate_res = locate_source_file(G.settings.stdlib_filename); - if (locate_res.is_error()) { - throw Fatal("Failed to locate stdlib: " + locate_res.error().message().str()); - } - process_file_ast(parse_src_file_to_ast(locate_res.move_as_ok())); - } - td::Result locate_res = locate_source_file(entrypoint_file_name); - if (locate_res.is_error()) { - throw Fatal("Failed to locate " + entrypoint_file_name + ": " + locate_res.error().message().str()); + if (G.settings.stdlib_filename.empty()) { + throw Fatal("stdlib filename not specified"); } - process_file_ast(parse_src_file_to_ast(locate_res.move_as_ok())); - // todo #ifdef TOLK_PROFILING + comment - // lexer_measure_performance(all_src_files.get_all_files()); + // on any error, an exception is thrown, and the message is printed out below + // (currently, only a single error can be printed) + + AllSrcFiles all_files = pipeline_discover_and_parse_sources(G.settings.stdlib_filename, entrypoint_filename); + + pipeline_handle_pragmas(all_files); + pipeline_register_global_symbols(all_files); + pipeline_convert_ast_to_legacy_Expr_Op(all_files); + + pipeline_find_unused_symbols(); + pipeline_generate_fif_output_to_std_cout(); - generate_output(); return 0; } catch (Fatal& fatal) { std::cerr << "fatal: " << fatal << std::endl; diff --git a/tolk/tolk.h b/tolk/tolk.h index b62c6a581..a0106ffc0 100644 --- a/tolk/tolk.h +++ b/tolk/tolk.h @@ -446,7 +446,8 @@ struct CodeBlob { std::stack*> cur_ops_stack; int flags = 0; bool require_callxargs = false; - CodeBlob(TypeExpr* ret = nullptr) : var_cnt(0), in_var_cnt(0), op_cnt(0), ret_type(ret), cur_ops(&ops) { + CodeBlob(std::string name, SrcLocation loc, TypeExpr* ret) + : var_cnt(0), in_var_cnt(0), op_cnt(0), ret_type(ret), name(std::move(name)), loc(loc), cur_ops(&ops) { } template Op& emplace_back(Args&&... args) { @@ -539,7 +540,6 @@ struct SymValFunc : SymVal { const std::vector* get_ret_order() const { return ret_order.empty() ? nullptr : &ret_order; } - const TypeExpr* get_arg_type() const; bool is_inline() const { return flags & flagInline; @@ -568,6 +568,7 @@ struct SymValCodeFunc : SymValFunc { SymValCodeFunc(int val, TypeExpr* _ft, bool marked_as_pure) : SymValFunc(val, _ft, marked_as_pure), code(nullptr) { } bool does_need_codegen() const; + void set_code(CodeBlob* code); }; struct SymValGlobVar : SymValBase { @@ -592,6 +593,9 @@ struct SymValConst : SymValBase { td::RefInt256 intval; std::string strval; ConstKind kind; +#ifdef TOLK_DEBUG + std::string name; // seeing const name in debugger makes it much easier to delve into Tolk sources +#endif SymValConst(int idx, td::RefInt256 value) : SymValBase(SymValKind::_Const, idx), intval(value), kind(IntConst) { } @@ -611,17 +615,6 @@ struct SymValConst : SymValBase { }; -/* - * - * PARSE SOURCE - * - */ - - -// defined in parse-tolk.cpp -td::Result locate_source_file(const std::string& rel_filename); - - /* * * EXPRESSIONS @@ -1432,10 +1425,11 @@ inline compile_func_t make_ext_compile(AsmOp op) { struct SymValAsmFunc : SymValFunc { simple_compile_func_t simple_compile; compile_func_t ext_compile; - td::uint64 crc; ~SymValAsmFunc() override = default; - SymValAsmFunc(TypeExpr* ft, std::vector&& _macro, bool marked_as_pure) - : SymValFunc(-1, ft, marked_as_pure), ext_compile(make_ext_compile(std::move(_macro))) { + SymValAsmFunc(TypeExpr* ft, std::vector&& arg_order, std::vector&& ret_order, bool marked_as_pure) + : SymValFunc(-1, ft, marked_as_pure) { + this->arg_order = std::move(arg_order); + this->ret_order = std::move(ret_order); } SymValAsmFunc(TypeExpr* ft, simple_compile_func_t _compile, bool marked_as_pure) : SymValFunc(-1, ft, marked_as_pure), simple_compile(std::move(_compile)) { @@ -1451,6 +1445,7 @@ struct SymValAsmFunc : SymValFunc { std::initializer_list ret_order = {}, bool marked_as_pure = false) : SymValFunc(-1, ft, arg_order, ret_order, marked_as_pure), ext_compile(std::move(_compile)) { } + void set_code(std::vector code); bool compile(AsmOpList& dest, std::vector& out, std::vector& in, SrcLocation where) const; }; @@ -1472,7 +1467,7 @@ void define_builtins(); * */ -int tolk_proceed(const std::string &entrypoint_file_name); +int tolk_proceed(const std::string &entrypoint_filename); } // namespace tolk From e2edadba920d98d2c13e709c8d47c712aa1ddbef Mon Sep 17 00:00:00 2001 From: tolk-vm Date: Thu, 31 Oct 2024 11:11:41 +0400 Subject: [PATCH 08/12] [Tolk] v0.6 syntax: `fun`, `import`, `var`, types on the right, etc. Lots of changes, actually. Most noticeable are: - traditional //comments - #include -> import - a rule "import what you use" - ~ found -> !found (for -1/0) - null() -> null - is_null?(v) -> v == null - throw is a keyword - catch with swapped arguments - throw_if, throw_unless -> assert - do until -> do while - elseif -> else if - drop ifnot, elseifnot - drop rarely used operators A testing framework also appears here. All tests existed earlier, but due to significant syntax changes, their history is useless. --- CMakeLists.txt | 24 + crypto/fift/lib/Asm.fif | 3 + crypto/smartcont/mathlib.tolk | 1803 +++++++++-------- crypto/smartcont/stdlib.tolk | 888 +++++--- tolk-tester/tests/a10.tolk | 42 + tolk-tester/tests/a6.tolk | 85 + tolk-tester/tests/a6_1.tolk | 16 + tolk-tester/tests/a6_5.tolk | 26 + tolk-tester/tests/a7.tolk | 24 + .../tests/allow_post_modification.tolk | 108 + tolk-tester/tests/asm_arg_order.tolk | 145 ++ tolk-tester/tests/bit-operators.tolk | 53 + tolk-tester/tests/c2.tolk | 27 + tolk-tester/tests/c2_1.tolk | 14 + tolk-tester/tests/camel1.tolk | 245 +++ tolk-tester/tests/camel2.tolk | 204 ++ tolk-tester/tests/camel3.tolk | 95 + tolk-tester/tests/camel4.tolk | 145 ++ tolk-tester/tests/cells-slices.tolk | 163 ++ tolk-tester/tests/co1.tolk | 75 + tolk-tester/tests/code_after_ifelse.tolk | 41 + tolk-tester/tests/codegen_check_demo.tolk | 96 + tolk-tester/tests/comments.tolk | 31 + tolk-tester/tests/if_stmt.tolk | 66 + .../tests/imports/invalid-no-import.tolk | 4 + tolk-tester/tests/imports/some-math.tolk | 3 + tolk-tester/tests/inline_big.tolk | 62 + tolk-tester/tests/inline_if.tolk | 28 + tolk-tester/tests/inline_loops.tolk | 48 + tolk-tester/tests/invalid-bitwise-1.tolk | 9 + tolk-tester/tests/invalid-bitwise-2.tolk | 8 + tolk-tester/tests/invalid-bitwise-3.tolk | 8 + tolk-tester/tests/invalid-bitwise-4.tolk | 6 + tolk-tester/tests/invalid-bitwise-5.tolk | 11 + tolk-tester/tests/invalid-bitwise-6.tolk | 9 + tolk-tester/tests/invalid-bitwise-7.tolk | 8 + tolk-tester/tests/invalid-builtin-1.tolk | 10 + tolk-tester/tests/invalid-catch-1.tolk | 12 + tolk-tester/tests/invalid-catch-2.tolk | 9 + tolk-tester/tests/invalid-cmt-nested.tolk | 11 + tolk-tester/tests/invalid-cmt-old.tolk | 8 + tolk-tester/tests/invalid-cyclic-1.tolk | 8 + tolk-tester/tests/invalid-declaration-1.tolk | 6 + tolk-tester/tests/invalid-declaration-10.tolk | 8 + tolk-tester/tests/invalid-declaration-2.tolk | 8 + tolk-tester/tests/invalid-declaration-3.tolk | 8 + tolk-tester/tests/invalid-declaration-4.tolk | 8 + tolk-tester/tests/invalid-declaration-5.tolk | 6 + tolk-tester/tests/invalid-declaration-6.tolk | 8 + tolk-tester/tests/invalid-declaration-7.tolk | 8 + tolk-tester/tests/invalid-declaration-8.tolk | 8 + tolk-tester/tests/invalid-declaration-9.tolk | 9 + tolk-tester/tests/invalid-get-method-1.tolk | 9 + tolk-tester/tests/invalid-get-method-2.tolk | 17 + tolk-tester/tests/invalid-import.tolk | 9 + tolk-tester/tests/invalid-logical-1.tolk | 8 + tolk-tester/tests/invalid-no-import.tolk | 8 + tolk-tester/tests/invalid-nopar-1.tolk | 12 + tolk-tester/tests/invalid-nopar-2.tolk | 12 + tolk-tester/tests/invalid-nopar-3.tolk | 12 + tolk-tester/tests/invalid-nopar-4.tolk | 8 + tolk-tester/tests/invalid-pure-1.tolk | 20 + tolk-tester/tests/invalid-pure-2.tolk | 23 + tolk-tester/tests/invalid-pure-3.tolk | 23 + tolk-tester/tests/invalid-redefinition-1.tolk | 7 + tolk-tester/tests/invalid-redefinition-2.tolk | 12 + tolk-tester/tests/invalid-redefinition-3.tolk | 8 + tolk-tester/tests/invalid-redefinition-4.tolk | 9 + tolk-tester/tests/invalid-redefinition-5.tolk | 9 + tolk-tester/tests/invalid-shift-1.tolk | 8 + tolk-tester/tests/invalid-symbol-1.tolk | 14 + tolk-tester/tests/invalid-symbol-2.tolk | 12 + tolk-tester/tests/invalid-syntax-1.tolk | 15 + tolk-tester/tests/invalid-syntax-2.tolk | 13 + tolk-tester/tests/invalid-syntax-3.tolk | 8 + tolk-tester/tests/invalid-syntax-4.tolk | 8 + tolk-tester/tests/invalid-tolk-version.tolk | 7 + tolk-tester/tests/invalid-typing-1.tolk | 10 + tolk-tester/tests/invalid-typing-2.tolk | 9 + tolk-tester/tests/invalid.tolk | 8 + tolk-tester/tests/logical-operators.tolk | 154 ++ tolk-tester/tests/method_id.tolk | 15 + tolk-tester/tests/no-spaces.tolk | 117 ++ tolk-tester/tests/null-keyword.tolk | 157 ++ tolk-tester/tests/op_priority.tolk | 121 ++ tolk-tester/tests/pure-functions.tolk | 46 + .../tests/remove-unused-functions.tolk | 48 + tolk-tester/tests/s1.tolk | 61 + tolk-tester/tests/special-fun-names.tolk | 24 + tolk-tester/tests/test-math.tolk | 309 +++ tolk-tester/tests/try-func.tolk | 151 ++ tolk-tester/tests/unbalanced_ret.tolk | 17 + tolk-tester/tests/unbalanced_ret_inline.tolk | 19 + tolk-tester/tests/unbalanced_ret_loops.tolk | 68 + tolk-tester/tests/unbalanced_ret_nested.tolk | 40 + tolk-tester/tests/use-before-declare.tolk | 49 + tolk-tester/tests/w1.tolk | 14 + tolk-tester/tests/w2.tolk | 34 + tolk-tester/tests/w6.tolk | 19 + tolk-tester/tests/w7.tolk | 26 + tolk-tester/tests/w9.tolk | 14 + tolk-tester/tolk-tester.js | 525 +++++ tolk-tester/tolk-tester.py | 430 ++++ tolk/CMakeLists.txt | 1 - tolk/abscode.cpp | 16 +- tolk/analyzer.cpp | 4 +- tolk/ast-from-tokens.cpp | 1125 ++++++---- tolk/ast-replacer.h | 32 +- tolk/ast-stringifier.h | 125 +- tolk/ast-visitor.h | 30 +- tolk/ast.cpp | 29 +- tolk/ast.h | 277 +-- tolk/builtins.cpp | 121 +- tolk/codegen.cpp | 5 +- tolk/compiler-state.cpp | 45 +- tolk/compiler-state.h | 28 +- tolk/gen-abscode.cpp | 16 +- tolk/lexer.cpp | 157 +- tolk/lexer.h | 90 +- tolk/pipe-ast-to-legacy.cpp | 458 +++-- tolk/pipe-discover-parse-sources.cpp | 11 +- tolk/pipe-find-unused-symbols.cpp | 4 +- tolk/pipe-generate-fif-output.cpp | 13 +- tolk/pipe-handle-pragmas.cpp | 140 -- tolk/pipe-register-symbols.cpp | 112 +- tolk/pipeline.h | 1 - tolk/src-file.h | 1 - tolk/tolk-main.cpp | 76 +- tolk/tolk-version.h | 23 + tolk/tolk-wasm.cpp | 7 +- tolk/tolk.cpp | 15 +- tolk/tolk.h | 207 +- tolk/type-expr.h | 2 +- 133 files changed, 8148 insertions(+), 2557 deletions(-) create mode 100644 tolk-tester/tests/a10.tolk create mode 100644 tolk-tester/tests/a6.tolk create mode 100644 tolk-tester/tests/a6_1.tolk create mode 100644 tolk-tester/tests/a6_5.tolk create mode 100644 tolk-tester/tests/a7.tolk create mode 100644 tolk-tester/tests/allow_post_modification.tolk create mode 100644 tolk-tester/tests/asm_arg_order.tolk create mode 100644 tolk-tester/tests/bit-operators.tolk create mode 100644 tolk-tester/tests/c2.tolk create mode 100644 tolk-tester/tests/c2_1.tolk create mode 100644 tolk-tester/tests/camel1.tolk create mode 100644 tolk-tester/tests/camel2.tolk create mode 100644 tolk-tester/tests/camel3.tolk create mode 100644 tolk-tester/tests/camel4.tolk create mode 100644 tolk-tester/tests/cells-slices.tolk create mode 100644 tolk-tester/tests/co1.tolk create mode 100644 tolk-tester/tests/code_after_ifelse.tolk create mode 100644 tolk-tester/tests/codegen_check_demo.tolk create mode 100644 tolk-tester/tests/comments.tolk create mode 100644 tolk-tester/tests/if_stmt.tolk create mode 100644 tolk-tester/tests/imports/invalid-no-import.tolk create mode 100644 tolk-tester/tests/imports/some-math.tolk create mode 100644 tolk-tester/tests/inline_big.tolk create mode 100644 tolk-tester/tests/inline_if.tolk create mode 100644 tolk-tester/tests/inline_loops.tolk create mode 100644 tolk-tester/tests/invalid-bitwise-1.tolk create mode 100644 tolk-tester/tests/invalid-bitwise-2.tolk create mode 100644 tolk-tester/tests/invalid-bitwise-3.tolk create mode 100644 tolk-tester/tests/invalid-bitwise-4.tolk create mode 100644 tolk-tester/tests/invalid-bitwise-5.tolk create mode 100644 tolk-tester/tests/invalid-bitwise-6.tolk create mode 100644 tolk-tester/tests/invalid-bitwise-7.tolk create mode 100644 tolk-tester/tests/invalid-builtin-1.tolk create mode 100644 tolk-tester/tests/invalid-catch-1.tolk create mode 100644 tolk-tester/tests/invalid-catch-2.tolk create mode 100644 tolk-tester/tests/invalid-cmt-nested.tolk create mode 100644 tolk-tester/tests/invalid-cmt-old.tolk create mode 100644 tolk-tester/tests/invalid-cyclic-1.tolk create mode 100644 tolk-tester/tests/invalid-declaration-1.tolk create mode 100644 tolk-tester/tests/invalid-declaration-10.tolk create mode 100644 tolk-tester/tests/invalid-declaration-2.tolk create mode 100644 tolk-tester/tests/invalid-declaration-3.tolk create mode 100644 tolk-tester/tests/invalid-declaration-4.tolk create mode 100644 tolk-tester/tests/invalid-declaration-5.tolk create mode 100644 tolk-tester/tests/invalid-declaration-6.tolk create mode 100644 tolk-tester/tests/invalid-declaration-7.tolk create mode 100644 tolk-tester/tests/invalid-declaration-8.tolk create mode 100644 tolk-tester/tests/invalid-declaration-9.tolk create mode 100644 tolk-tester/tests/invalid-get-method-1.tolk create mode 100644 tolk-tester/tests/invalid-get-method-2.tolk create mode 100644 tolk-tester/tests/invalid-import.tolk create mode 100644 tolk-tester/tests/invalid-logical-1.tolk create mode 100644 tolk-tester/tests/invalid-no-import.tolk create mode 100644 tolk-tester/tests/invalid-nopar-1.tolk create mode 100644 tolk-tester/tests/invalid-nopar-2.tolk create mode 100644 tolk-tester/tests/invalid-nopar-3.tolk create mode 100644 tolk-tester/tests/invalid-nopar-4.tolk create mode 100644 tolk-tester/tests/invalid-pure-1.tolk create mode 100644 tolk-tester/tests/invalid-pure-2.tolk create mode 100644 tolk-tester/tests/invalid-pure-3.tolk create mode 100644 tolk-tester/tests/invalid-redefinition-1.tolk create mode 100644 tolk-tester/tests/invalid-redefinition-2.tolk create mode 100644 tolk-tester/tests/invalid-redefinition-3.tolk create mode 100644 tolk-tester/tests/invalid-redefinition-4.tolk create mode 100644 tolk-tester/tests/invalid-redefinition-5.tolk create mode 100644 tolk-tester/tests/invalid-shift-1.tolk create mode 100644 tolk-tester/tests/invalid-symbol-1.tolk create mode 100644 tolk-tester/tests/invalid-symbol-2.tolk create mode 100644 tolk-tester/tests/invalid-syntax-1.tolk create mode 100644 tolk-tester/tests/invalid-syntax-2.tolk create mode 100644 tolk-tester/tests/invalid-syntax-3.tolk create mode 100644 tolk-tester/tests/invalid-syntax-4.tolk create mode 100644 tolk-tester/tests/invalid-tolk-version.tolk create mode 100644 tolk-tester/tests/invalid-typing-1.tolk create mode 100644 tolk-tester/tests/invalid-typing-2.tolk create mode 100644 tolk-tester/tests/invalid.tolk create mode 100644 tolk-tester/tests/logical-operators.tolk create mode 100644 tolk-tester/tests/method_id.tolk create mode 100644 tolk-tester/tests/no-spaces.tolk create mode 100644 tolk-tester/tests/null-keyword.tolk create mode 100644 tolk-tester/tests/op_priority.tolk create mode 100644 tolk-tester/tests/pure-functions.tolk create mode 100644 tolk-tester/tests/remove-unused-functions.tolk create mode 100644 tolk-tester/tests/s1.tolk create mode 100644 tolk-tester/tests/special-fun-names.tolk create mode 100644 tolk-tester/tests/test-math.tolk create mode 100644 tolk-tester/tests/try-func.tolk create mode 100644 tolk-tester/tests/unbalanced_ret.tolk create mode 100644 tolk-tester/tests/unbalanced_ret_inline.tolk create mode 100644 tolk-tester/tests/unbalanced_ret_loops.tolk create mode 100644 tolk-tester/tests/unbalanced_ret_nested.tolk create mode 100644 tolk-tester/tests/use-before-declare.tolk create mode 100644 tolk-tester/tests/w1.tolk create mode 100644 tolk-tester/tests/w2.tolk create mode 100644 tolk-tester/tests/w6.tolk create mode 100644 tolk-tester/tests/w7.tolk create mode 100644 tolk-tester/tests/w9.tolk create mode 100644 tolk-tester/tolk-tester.js create mode 100644 tolk-tester/tolk-tester.py delete mode 100644 tolk/pipe-handle-pragmas.cpp create mode 100644 tolk/tolk-version.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 885fcef7f..7d5b8da5c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -627,6 +627,30 @@ if (NOT NIX) endif() endif() +# Tolk tests +if (NOT NIX) + if (MSVC) + set(PYTHON_VER "python") + else() + set(PYTHON_VER "python3") + endif() + add_test( + NAME test-tolk + COMMAND ${PYTHON_VER} tolk-tester.py tests/ + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/tolk-tester) + if (WIN32) + set_property(TEST test-tolk PROPERTY ENVIRONMENT + "TOLK_EXECUTABLE=${CMAKE_CURRENT_BINARY_DIR}/tolk/tolk.exe" + "FIFT_EXECUTABLE=${CMAKE_CURRENT_BINARY_DIR}/crypto/fift.exe" + "FIFTPATH=${CMAKE_CURRENT_SOURCE_DIR}/crypto/fift/lib/") + else() + set_property(TEST test-tolk PROPERTY ENVIRONMENT + "TOLK_EXECUTABLE=${CMAKE_CURRENT_BINARY_DIR}/tolk/tolk" + "FIFT_EXECUTABLE=${CMAKE_CURRENT_BINARY_DIR}/crypto/fift" + "FIFTPATH=${CMAKE_CURRENT_SOURCE_DIR}/crypto/fift/lib/") + endif() +endif() + #BEGIN internal if (NOT TON_ONLY_TONLIB) add_test(test-adnl test-adnl) diff --git a/crypto/fift/lib/Asm.fif b/crypto/fift/lib/Asm.fif index 92ceab6db..964db4417 100644 --- a/crypto/fift/lib/Asm.fif +++ b/crypto/fift/lib/Asm.fif @@ -1589,6 +1589,9 @@ forget @proclist forget @proccnt { }END> b> } : }END>c { }END>c s +// This is the way how FunC assigns method_id for reserved functions. +// Note, that Tolk entrypoints have other names (`onInternalMessage`, etc.), +// but method_id is assigned not by Fift, but by Tolk code generation. 0 constant recv_internal -1 constant recv_external -2 constant run_ticktock diff --git a/crypto/smartcont/mathlib.tolk b/crypto/smartcont/mathlib.tolk index 74fdfdd59..1f7510a6b 100644 --- a/crypto/smartcont/mathlib.tolk +++ b/crypto/smartcont/mathlib.tolk @@ -1,11 +1,12 @@ -{- +/* - - Tolk fixed-point mathematical library - (initially copied from mathlib.fc) - - -} + */ +tolk 0.6 -{- +/* This file is part of TON Tolk Standard Library. Tolk Standard Library is free software: you can redistribute it and/or modify @@ -18,920 +19,984 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. --} - -{---------------- HIGH-LEVEL FUNCTION DECLARATIONS -----------------} -{- - Most functions declared here work either with integers or with fixed-point numbers of type `fixed248`. - `fixedNNN` informally denotes an alias for type `int` used to represent fixed-point numbers with scale 2^NNN. - Prefix `fixedNNN::` is prepended to the names of high-level functions that accept arguments and return values of type `fixedNNN`. --} - -{- function declarations have been commented out, otherwise they are not inlined by the current Tolk compiler - -;; nearest integer to sqrt(a*b) for non-negative integers or fixed-point numbers a and b -int geom_mean(int a, int b) inline_ref; -;; integer square root -int sqrt(int a) inline; -;; fixed-point square root -;; fixed248 sqrt(fixed248 x) -int fixed248::sqrt(int x) inline; - -int fixed248::sqr(int x) inline; -const int fixed248::One; - -;; log(2) as fixed248 -int fixed248::log2_const() inline; -;; Pi as fixed248 -int fixed248::Pi_const() inline; - -;; fixed248 exp(fixed248 x) -int fixed248::exp(int x) inline_ref; -;; fixed248 exp2(fixed248 x) -int fixed248::exp2(int x) inline_ref; - -;; fixed248 log(fixed248 x) -int fixed248::log(int x) inline_ref; -;; fixed248 log2(fixed248 x) -int fixed248::log2(int x) inline; - -;; fixed248 pow(fixed248 x, fixed248 y) -int fixed248::pow(int x, int y) inline_ref; - -;; (fixed248, fixed248) sincos(fixed248 x); -(int, int) fixed248::sincos(int x) inline_ref; -;; fixed248 sin(fixed248 x); -int fixed248::sin(int x) inline; -;; fixed248 cos(fixed248 x); -int fixed248::cos(int x) inline; -;; fixed248 tan(fixed248 x); -int fixed248::tan(int x) inline_ref; -;; fixed248 cot(fixed248 x); -int fixed248::cot(int x) inline_ref; - - -;; fixed248 asin(fixed248 x); -int fixed248::asin(int x) inline; -;; fixed248 acos(fixed248 x); -int fixed248::acos(int x) inline; -;; fixed248 atan(fixed248 x); -int fixed248::atan(int x) inline_ref; -;; fixed248 acot(fixed248 x); -int fixed248::acot(int x) inline_ref; - -;; random number uniformly distributed in [0..1) -;; fixed248 random(); -int fixed248::random() inline; -;; random number with standard normal distribution (2100 gas on average) -;; fixed248 nrand(); -int fixed248::nrand() inline; -;; generates a random number approximately distributed according to the standard normal distribution (1200 gas) -;; (fails chi-squared test, but it is shorter and faster than fixed248::nrand()) -;; fixed248 nrand_fast(); -int fixed248::nrand_fast() inline; - --} ;; end (declarations) - -{-------------------- INTERMEDIATE FUNCTIONS -----------------------} - -{- - Intermediate functions are used in the implementations of high-level `fixedNNN::...` functions - if necessary, they can be used to define additional high-level functions for other fixed-point types, such as fixed128, outside this library. They can be also used in a hypothetical floating-point Tolk library. - For these reasons, the declarations of these functions are collected here. --} - -{- function declarations have been commented out, otherwise they are not inlined by the current Tolk compiler - -;; fixed258 tanh(fixed258 x, int steps); -int tanh_f258(int x, int n); - -;; computes exp(x)-1 for |x| <= log(2)/2. -;; fixed257 expm1(fixed257 x); -int expm1_f257(int x); - -;; computes (sin(x+xe),-cos(x+xe)) for |x| <= Pi/4, xe very small -;; this function is very accurate, error less than 0.7 ulp (consumes ~ 5500 gas) -;; (fixed256, fixed256) sincosn(fixed256 x, fixed259 xe) -(int, int) sincosn_f256(int x, int xe); - -;; compute (sin(x),1-cos(x)) in fixed256 for |x| < 16*atan(1/16) = 0.9987 -;; (fixed256, fixed257) sincosm1_f256(fixed256 x); -;; slightly less accurate than sincosn_f256() (error up to 3/2^256), but faster (~ 4k gas) and shorter -(int, int) sincosm1_f256(int x); - -;; compute (p, q) such that p/q = tan(x) for |x|<2*atan(1/2)=1899/2048=0.927 -;; (int, int) tan_aux(fixed256 x); -(int, int) tan_aux_f256(int x); - -;; returns (y, s) such that log(x) = y/2^256 + s*log(2) for positive integer x -;; this function is very precise (error less than 0.6 ulp) and consumes < 7k gas -;; (fixed256, int) log_aux_f256(int x); -(int, int) log_aux_f256(int x); - -;; returns (y, s) such that log2(x) = y/2^256 + s for positive integer x -;; this function is very precise (error less than 0.6 ulp) and consumes < 7k gas -;; (fixed256, int) log2_aux_f256(int x); -(int, int) log2_aux_f256(int x); - -;; compute (q, z) such that atan(x)=q*atan(1/32)+z for -1 <= x < 1 -;; this function is reasonably accurate (error < 7 ulp with ulp = 2^-261), but it consumes >7k gas -;; this is sufficient for most purposes -;; (int, fixed261) atan_aux(fixed256 x) -(int, int) atan_aux_f256(int x); - -;; fixed255 atan(fixed255 x); -int atan_f255(int x); - -;; for -1 <= x < 1 only -;; fixed256 atan_small(fixed256 x); -int atan_f256_small(int x); - -;; fixed255 asin(fixed255 x); -int asin_f255(int x); - -;; fixed254 acos(fixed255 x); -int acos_f255(int x); - -;; generates normally distributed pseudo-random number -;; fixed252 nrand(); -int nrand_f252(int x); - -;; a faster and shorter variant of nrand_f252() that fails chi-squared test -;; (should suffice for most purposes) -;; fixed252 nrand_fast(); -int nrand_fast_f252(int x); - --} ;; end (declarations) - -{---------------- MISSING OPERATIONS AND BUILT-INS -----------------} +*/ -int sgn(int x) asm "SGN"; +/*--------------- MISSING OPERATIONS AND BUILT-INS ----------------*/ -;; compute floor(log2(x))+1 -int log2_floor_p1(int x) asm "UBITSIZE"; +@pure +fun sgn(x: int): int + asm "SGN"; -int mulrshiftr(int x, int y, int s) asm "MULRSHIFTR"; -int mulrshiftr256(int x, int y) asm "256 MULRSHIFTR#"; -(int, int) mulrshift256mod(int x, int y) asm "256 MULRSHIFT#MOD"; -(int, int) mulrshiftr256mod(int x, int y) asm "256 MULRSHIFTR#MOD"; -(int, int) mulrshiftr255mod(int x, int y) asm "255 MULRSHIFTR#MOD"; -(int, int) mulrshiftr248mod(int x, int y) asm "248 MULRSHIFTR#MOD"; -(int, int) mulrshiftr5mod(int x, int y) asm "5 MULRSHIFTR#MOD"; -(int, int) mulrshiftr6mod(int x, int y) asm "6 MULRSHIFTR#MOD"; -(int, int) mulrshiftr7mod(int x, int y) asm "7 MULRSHIFTR#MOD"; +/// compute floor(log2(x))+1 +@pure +fun log2_floor_p1(x: int): int + asm "UBITSIZE"; -int lshift256divr(int x, int y) asm "256 LSHIFT#DIVR"; -(int, int) lshift256divmodr(int x, int y) asm "256 LSHIFT#DIVMODR"; -(int, int) lshift255divmodr(int x, int y) asm "255 LSHIFT#DIVMODR"; -(int, int) lshift2divmodr(int x, int y) asm "2 LSHIFT#DIVMODR"; -(int, int) lshift7divmodr(int x, int y) asm "7 LSHIFT#DIVMODR"; -(int, int) lshiftdivmodr(int x, int y, int s) asm "LSHIFTDIVMODR"; +@pure +fun mulrshiftr(x: int, y: int, s: int): int + asm "MULRSHIFTR"; -(int, int) rshiftr256mod(int x) asm "256 RSHIFTR#MOD"; -(int, int) rshiftr248mod(int x) asm "248 RSHIFTR#MOD"; -(int, int) rshiftr4mod(int x) asm "4 RSHIFTR#MOD"; -(int, int) rshift3mod(int x) asm "3 RSHIFT#MOD"; +@pure +fun mulrshiftr256(x: int, y: int): int + asm "256 MULRSHIFTR#"; -;; computes y - x (Tolk compiler does not try to use this by itself) -int sub_rev(int x, int y) asm "SUBR"; +@pure +fun mulrshift256mod(x: int, y: int): (int, int) + asm "256 MULRSHIFT#MOD"; -int nan() asm "PUSHNAN"; -int is_nan(int x) asm "ISNAN"; +@pure +fun mulrshiftr256mod(x: int, y: int): (int, int) + asm "256 MULRSHIFTR#MOD"; -{------------------------ SQUARE ROOTS ----------------------------} +@pure +fun mulrshiftr255mod(x: int, y: int): (int, int) + asm "255 MULRSHIFTR#MOD"; -;; computes sqrt(a*b) exactly rounded to the nearest integer -;; for all 0 <= a, b <= 2^256-1 -;; may be used with b=1 or b=scale of fixed-point numbers -int geom_mean(int a, int b) inline_ref { - ifnot (min(a, b)) { - return 0; - } - int s = log2_floor_p1(a); ;; throws out of range error if a < 0 or b < 0 - int t = log2_floor_p1(b); - ;; NB: (a-b)/2+b == (a+b)/2, but without overflow for large a and b - int x = (s == t ? (a - b) / 2 + b : 1 << ((s + t) / 2)); - do { - ;; if always used with b=2^const, may be optimized to "const LSHIFTDIVC#" - ;; it is important to use `muldivc` here, not `muldiv` or `muldivr` - int q = (muldivc(a, b, x) - x) / 2; - x += q; - } until (q == 0); - return x; +@pure +fun mulrshiftr248mod(x: int, y: int): (int, int) + asm "248 MULRSHIFTR#MOD"; + +@pure +fun mulrshiftr5mod(x: int, y: int): (int, int) + asm "5 MULRSHIFTR#MOD"; + +@pure +fun mulrshiftr6mod(x: int, y: int): (int, int) + asm "6 MULRSHIFTR#MOD"; + +@pure +fun mulrshiftr7mod(x: int, y: int): (int, int) + asm "7 MULRSHIFTR#MOD"; + +@pure +fun lshift256divr(x: int, y: int): int + asm "256 LSHIFT#DIVR"; + +@pure +fun lshift256divmodr(x: int, y: int): (int, int) + asm "256 LSHIFT#DIVMODR"; + +@pure +fun lshift255divmodr(x: int, y: int): (int, int) + asm "255 LSHIFT#DIVMODR"; + +@pure +fun lshift2divmodr(x: int, y: int): (int, int) + asm "2 LSHIFT#DIVMODR"; + +@pure +fun lshift7divmodr(x: int, y: int): (int, int) + asm "7 LSHIFT#DIVMODR"; + +@pure +fun lshiftdivmodr(x: int, y: int, s: int): (int, int) + asm "LSHIFTDIVMODR"; + +@pure +fun rshiftr256mod(x: int): (int, int) + asm "256 RSHIFTR#MOD"; + +@pure +fun rshiftr248mod(x: int): (int, int) + asm "248 RSHIFTR#MOD"; + +@pure +fun rshiftr4mod(x: int): (int, int) + asm "4 RSHIFTR#MOD"; + +@pure +fun rshift3mod(x: int): (int, int) + asm "3 RSHIFT#MOD"; + +/// computes y - x (Tolk compiler does not try to use this by itself) +@pure +fun sub_rev(x: int, y: int): int + asm "SUBR"; + +@pure +fun nan(): int + asm "PUSHNAN"; + +@pure +fun is_nan(x: int): int + asm "ISNAN"; + +/*----------------------- SQUARE ROOTS ---------------------------*/ + +/// computes sqrt(a*b) exactly rounded to the nearest integer +/// for all 0 <= a, b <= 2^256-1 +/// may be used with b=1 or b=scale of fixed-point numbers +@pure +@inline_ref +fun geom_mean(a: int, b: int): int { + if (!min(a, b)) { + return 0; + } + var s: int = log2_floor_p1(a); // throws out of range error if a < 0 or b < 0 + var t: int = log2_floor_p1(b); + // NB: (a-b)/2+b == (a+b)/2, but without overflow for large a and b + var x: int = (s == t ? (a - b) / 2 + b : 1 << ((s + t) / 2)); + do { + // if always used with b=2^const, may be optimized to "const LSHIFTDIVC#" + // it is important to use `muldivc` here, not `muldiv` or `muldivr` + var q: int = (muldivc(a, b, x) - x) / 2; + x += q; + } while (q); + return x; } -;; integer square root, computes round(sqrt(a)) for all a>=0. -;; note: `inline` is better than `inline_ref` for such simple functions -int sqrt(int a) inline { - return geom_mean(a, 1); +/// integer square root, computes round(sqrt(a)) for all a>=0. +/// note: `inline` is better than `inline_ref` for such simple functions +@pure +@inline +fun sqrt(a: int): int { + return geom_mean(a, 1); } -;; version for fixed248 = fixed-point numbers with scale 2^248 -;; fixed248 sqrt(fixed248 x) -int fixed248::sqrt(int x) inline { - return geom_mean(x, 1 << 248); +/// version for fixed248 = fixed-point numbers with scale 2^248 +/// fixed248 sqrt(fixed248 x) +@pure +@inline +fun fixed248_sqrt(x: int): int { + return geom_mean(x, 1 << 248); } -;; fixed255 sqrt(fixed255 x) -int fixed255::sqrt(int x) inline { - return geom_mean(x, 1 << 255); +/// fixed255 sqrt(fixed255 x) +@pure +@inline +fun fixed255_sqrt(x: int): int { + return geom_mean(x, 1 << 255); } -;; fixed248 sqr(fixed248 x); -int fixed248::sqr(int x) inline { - return muldivr(x, x, 1 << 248); +/// fixed248 sqr(fixed248 x); +@pure +@inline +fun fixed248_sqr(x: int): int { + return muldivr(x, x, 1 << 248); } -;; fixed255 sqr(fixed255 x); -int fixed255::sqr(int x) inline { - return muldivr(x, x, 1 << 255); +/// fixed255 sqr(fixed255 x); +@pure +@inline +fun fixed255_sqr(x: int): int { + return muldivr(x, x, 1 << 255); } -const int fixed248::One = (1 << 248); -const int fixed255::One = (1 << 255); +const fixed248_One: int = (1 << 248); +const fixed255_One: int = (1 << 255); -{-------------------- USEFUL CONSTANTS --------------------} +/*------------------- USEFUL CONSTANTS -------------------*/ -;; store huge constants in inline_ref functions for reuse -;; (y,z) where y=round(log(2)*2^256), z=round((log(2)*2^256-y)*2^128) -;; then log(2) = y/2^256 + z/2^384 -(int, int) log2_xconst_f256() inline_ref { - return (80260960185991308862233904206310070533990667611589946606122867505419956976172, -32272921378999278490133606779486332143); +/// store huge constants in inline_ref functions for reuse +/// (y,z) where y=round(log(2)*2^256), z=round((log(2)*2^256-y)*2^128) +/// then log(2) = y/2^256 + z/2^384 +@pure +@inline_ref +fun log2_xconst_f256(): (int, int) { + return (80260960185991308862233904206310070533990667611589946606122867505419956976172, -32272921378999278490133606779486332143); } -;; (y,z) where Pi = y/2^254 + z/2^382 -(int, int) Pi_xconst_f254() inline_ref { - return (90942894222941581070058735694432465663348344332098107489693037779484723616546, 108051869516004014909778934258921521947); +/// (y,z) where Pi = y/2^254 + z/2^382 +@pure +@inline_ref +fun Pi_xconst_f254(): (int, int) { + return (90942894222941581070058735694432465663348344332098107489693037779484723616546, 108051869516004014909778934258921521947); } -;; atan(1/16) as fixed260 -int Atan1_16_f260() inline_ref { - return 115641670674223639132965820642403718536242645001775371762318060545014644837101; ;; true value is ...101.0089... -} - -;; atan(1/8) as fixed259 -int Atan1_8_f259() inline_ref { - return 115194597005316551477397594802136977648153890007566736408151129975021336532841; ;; correction -0.1687... +/// atan(1/16) as fixed260 +@pure +@inline_ref +fun Atan1_16_f260(): int { + return 115641670674223639132965820642403718536242645001775371762318060545014644837101; // true value is ...101.0089... } -;; atan(1/32) as fixed261 -int Atan1_32_f261() inline_ref { - return 115754418570128574501879331591757054405465733718902755858991306434399246026247; ;; correction 0.395... +/// atan(1/8) as fixed259 +@pure +@inline_ref +fun Atan1_8_f259(): int { + return 115194597005316551477397594802136977648153890007566736408151129975021336532841; // correction -0.1687... } -;; inline is better than inline_ref for such very small functions -int log2_const_f256() inline { - (int c, _) = log2_xconst_f256(); - return c; +/// atan(1/32) as fixed261 +@pure +@inline_ref +fun Atan1_32_f261(): int { + return 115754418570128574501879331591757054405465733718902755858991306434399246026247; // correction 0.395... } -int fixed248::log2_const() inline { - return log2_const_f256() ~>> 8; +/// inline is better than inline_ref for such very small functions +@pure +@inline +fun log2_const_f256(): int { + var (c: int, _) = log2_xconst_f256(); + return c; } -int Pi_const_f254() inline { - (int c, _) = Pi_xconst_f254(); - return c; +@pure +@inline +fun fixed248_log2_const(): int { + return log2_const_f256() ~>> 8; } -int fixed248::Pi_const() inline { - return Pi_const_f254() ~>> 6; +@pure +@inline +fun Pi_const_f254(): int { + var (c: auto, _) = Pi_xconst_f254(); + return c; } -{--------------- HYPERBOLIC TANGENT AND EXPONENT -------------------} - -;; hyperbolic tangent of small x via n+2 terms of Lambert's continued fraction -;; n=17: good for |x| < log(2)/4 = 0.173 -;; fixed258 tanh_f258(fixed258 x, int n) -int tanh_f258(int x, int n) inline_ref { - int x2 = muldivr(x, x, 1 << 255); ;; x^2 as fixed261 - int c = int a = (2 * n + 5) << 250; ;; a=2n+5 as fixed250 - int Two = (1 << 251); ;; 2. as fixed250 - repeat (n) { - a = (c -= Two) + muldivr(x2, 1 << 239, a); ;; a := 2k+1+x^2/a as fixed250, k=n+1,n,...,2 - } - a = (touch(3) << 254) + muldivr(x2, 1 << 243, a); ;; a := 3+x^2/a as fixed254 - ;; y = x/(1+a') = x - x*a'/(1+a') = x - x*x^2/(a+x^2) where a' = x^2/a - return x - (muldivr(x, x2, a + (x2 ~>> 7)) ~>> 7); +@pure +@inline +fun fixed248_Pi_const(): int { + return Pi_const_f254() ~>> 6; } -;; fixed257 expm1_f257(fixed257 x) -;; computes exp(x)-1 for small x via 19 terms of Lambert's continued fraction for tanh(x/2) -;; good for |x| < log(2)/2 = 0.347 (n=17); consumes ~3500 gas -int expm1_f257(int x) inline_ref { - ;; (almost) compute tanh(x/2) first; x/2 as fixed258 = x as fixed257 - int x2 = muldivr(x, x, 1 << 255); ;; x^2 as fixed261 - int Two = (1 << 251); ;; 2. as fixed250 - int c = int a = touch(39) << 250; ;; a=2n+5 as fixed250 - repeat (17) { - a = (c -= Two) + muldivr(x2, 1 << 239, a); ;; a := 2k+1+x^2/a as fixed250, k=n+1,n,...,2 - } - a = (touch(3) << 254) + muldivr(x2, 1 << 243, a); ;; a := 3+x^2/a as fixed254 - ;; now tanh(x/2) = x/(1+a') where a'=x^2/a ; apply exp(x)-1=2*tanh(x/2)/(1-tanh(x/2)) - int t = (x ~>> 4) - a; ;; t:=x-a as fixed254 - return x - muldivr(x2, t / 2, a + mulrshiftr256(x, t) ~/ 4) ~/ 4; ;; x - x^2 * (x-a) / (a + x*(x-a)) -} +/*-------------- HYPERBOLIC TANGENT AND EXPONENT ------------------*/ -;; expm1_f257() may be used to implement specific fixed-point exponentials -;; example: -;; fixed248 exp(fixed248 x) -int fixed248::exp(int x) inline_ref { - var (l2c, l2d) = log2_xconst_f256(); - ;; divide x by log(2) and convert to fixed257 - ;; (int q, x) = muldivmodr(x, 256, l2c); ;; unfortunately, no such built-in - (int q, x) = lshiftdivmodr(x, l2c, 8); - x = 2 * x - muldivr(q, l2d, 1 << 127); - int y = expm1_f257(x); - ;; result is (1 + y) * (2^q) --> ((1 << 257) + y) >> (9 - q) - return (y ~>> (9 - q)) - (-1 << (248 + q)); - ;; note that (y ~>> (9 - q)) + (1 << (248 + q)) leads to overflow when q=8 -} - -;; compute 2^x in fixed248 -;; fixed248 exp2(fixed248 x) -int fixed248::exp2(int x) inline_ref { - ;; (int q, x) = divmodr(x, 1 << 248); ;; no such built-in - (int q, x) = rshiftr248mod(x); - x = muldivr(x, log2_const_f256(), 1 << 247); - int y = expm1_f257(x); - return (y ~>> (9 - q)) - (-1 << (248 + q)); -} - -{--------------------- TRIGONOMETRIC FUNCTIONS -----------------------} - -;; fixed260 tan(fixed260 x); -;; computes tan(x) for small |x|> 10)) ~>> 9); -} - -;; fixed260 tan(fixed260 x); -int tan_f260(int x) inline_ref { - return tan_f260_inlined(x); -} - -;; fixed258 tan(fixed258 x); -;; computes tan(x) for small |x|> 6)) ~>> 5); -} - -;; fixed258 tan(fixed258 x); -int tan_f258(int x) inline_ref { - return tan_f258_inlined(x); -} - -;; (fixed259, fixed263) sincosm1(fixed259 x) -;; computes (sin(x), 1-cos(x)) for small |x|<2*atan(1/16) -(int, int) sincosm1_f259_inlined(int x) inline { - int t = tan_f260_inlined(x); ;; t=tan(x/2) as fixed260 - int tt = mulrshiftr256(t, t); ;; t^2 as fixed264 - int y = tt ~/ 512 + (1 << 255); ;; 1+t^2 as fixed255 - ;; 2*t/(1+t^2) as fixed259 and 2*t^2/(1+t^2) as fixed263 - ;; return (muldivr(t, 1 << 255, y), muldivr(tt, 1 << 255, y)); - return (t - muldivr(t / 2, tt, y) ~/ 256, tt - muldivr(tt / 2, tt, y) ~/ 256); -} - -(int, int) sincosm1_f259(int x) inline_ref { - return sincosm1_f259_inlined(x); -} - -;; computes (sin(x+xe),-cos(x+xe)) for |x| <= Pi/4, xe very small -;; this function is very accurate, error less than 0.7 ulp (consumes ~ 5500 gas) -;; (fixed256, fixed256) sincosn(fixed256 x, fixed259 xe) -(int, int) sincosn_f256(int x, int xe) inline_ref { - ;; var (q, x1) = muldivmodr(x, 8, Atan1_8_f259()); ;; no muldivmodr() builtin - var (q, x1) = lshift2divmodr(abs(x), Atan1_8_f259()); ;; reduce mod theta where theta=2*atan(1/8) - var (si, co) = sincosm1_f259(x1 * 2 + xe); - var (a, b, c) = (-1, 0, 1); - repeat (q) { ;; (a+b*I) *= (8+I)^2 = 63+16*I - (a, b, c) = (63 * a - 16 * b, 16 * a + 63 * b, 65 * c); - } - ;; now a/c = cos(q*theta), b/c = sin(q*theta) exactly(!) - ;; compute (a+b*I)*(1-co+si*I)/c - ;; (b, a) = (lshift256divr(b, c), lshift256divr(a, c)); - (b, int br) = lshift256divmodr(b, c); br = muldivr(br, 128, c); - (a, int ar) = lshift256divmodr(a, c); ar = muldivr(ar, 128, c); - return (sgn(x) * (((mulrshiftr256(b, co) - br) ~/ 16 - mulrshiftr256(a, si)) ~/ 8 - b), - a - ((mulrshiftr256(a, co) - ar) ~/ 16 + mulrshiftr256(b, si)) ~/ 8); -} - -;; compute (sin(x),1-cos(x)) in fixed256 for |x| < 16*atan(1/16) = 0.9987 -;; (fixed256, fixed257) sincosm1_f256(fixed256 x); -;; slightly less accurate than sincosn_f256() (error up to 3/2^256), but faster (~ 4k gas) and shorter -(int, int) sincosm1_f256(int x) inline_ref { - var (si, co) = sincosm1_f259_inlined(x); ;; compute (sin,1-cos)(x/8) in (fixed259,fixed263) - int r = 7; - repeat (r / 2) { - ;; 1-cos(2*x) = 2*sin(x)^2, sin(2*x) = 2*sin(x)*cos(x) - (co, si) = (mulrshiftr256(si, si), si - (mulrshiftr256(si, co) ~>> r)); - r -= 2; - } - return (si, co); -} - -;; compute (p, q) such that p/q = tan(x) for |x|<2*atan(1/2)=1899/2048=0.927 -;; (int, int) tan_aux(fixed256 x); -(int, int) tan_aux_f256(int x) inline_ref { - int t = tan_f258_inlined(x); ;; t=tan(x/4) as fixed258 - ;; t:=2*t/(1-t^2)=2*(t-t^3/(t^2-1)) - int tt = mulrshiftr256(t, t); ;; t^2 as fixed260 - t = muldivr(t, tt, tt ~/ 16 + (-1 << 256)) ~/ 16 - t; ;; now t=-tan(x/2) as fixed259 - return (t, mulrshiftr256(t, t) ~/ 4 + (-1 << 256)); ;; return (2*t, t^2-1) as fixed256 -} - -;; sincosm1_f256() and sincosn_f256() may be used to implement trigonometric functions for different fixed-point types -;; example: -;; (fixed248, fixed248) sincos(fixed248 x); -(int, int) fixed248::sincos(int x) inline_ref { - var (Pic, Pid) = Pi_xconst_f254(); - ;; (int q, x) = muldivmodr(x, 128, Pic); ;; no muldivmodr() builtin - (int q, x) = lshift7divmodr(x, Pic); ;; reduce mod Pi/2 - x = 2 * x - muldivr(q, Pid, 1 << 127); - (int si, int co) = sincosm1_f256(x); ;; doesn't make sense to use more accurate sincosn_f256() - co = (1 << 248) - (co ~>> 9); - si ~>>= 8; - repeat (q & 3) { - (si, co) = (co, - si); - } - return (si, co); -} - -;; fixed248 sin(fixed248 x); -;; inline is better than inline_ref for such simple functions -int fixed248::sin(int x) inline { - (int si, _) = fixed248::sincos(x); - return si; -} - -;; fixed248 cos(fixed248 x); -int fixed248::cos(int x) inline { - (_, int co) = fixed248::sincos(x); - return co; -} - -;; similarly, tan_aux_f256() may be used to implement tan() and cot() for specific fixed-point formats -;; fixed248 tan(fixed248 x); -;; not very accurate when |tan(x)| is very large (difficult to do better without floating-point numbers) -;; however, the relative accuracy is approximately 2^-247 in all cases, which is good enough for arguments given up to 2^-249 -int fixed248::tan(int x) inline_ref { - var (Pic, Pid) = Pi_xconst_f254(); - ;; (int q, x) = muldivmodr(x, 128, Pic); ;; no muldivmodr() builtin - (int q, x) = lshift7divmodr(x, Pic); ;; reduce mod Pi/2 - x = 2 * x - muldivr(q, Pid, 1 << 127); - var (a, b) = tan_aux_f256(x); ;; now a/b = tan(x') - if (q & 1) { - (a, b) = (b, - a); - } - return muldivr(a, 1 << 248, b); ;; either -b/a or a/b as fixed248 -} - -;; fixed248 cot(fixed248 x); -int fixed248::cot(int x) inline_ref { - var (Pic, Pid) = Pi_xconst_f254(); - (int q, x) = lshift7divmodr(x, Pic); ;; reduce mod Pi/2 - x = 2 * x - muldivr(q, Pid, 1 << 127); - var (b, a) = tan_aux_f256(x); ;; now b/a = tan(x') - if (q & 1) { - (a, b) = (b, - a); - } - return muldivr(a, 1 << 248, b); ;; either -b/a or a/b as fixed248 -} - -{----------------- INVERSE HYPERBOLIC TANGENT AND LOGARITHMS -----------------} - -;; inverse hyperbolic tangent of small x, evaluated by means of n terms of the continued fraction -;; valid for |x| < 2^-2.5 ~ 0.18 if n=37 (slightly less accurate with n=36) -;; |x| < 1/8 if n=32; |x| < 2^-3.5 if n=28; |x| < 1/16 if n=25 -;; |x| < 2^-4.5 if n=23; |x| < 1/32 if n=21; |x| < 1/64 if n=18 -;; fixed258 atanh(fixed258 x); -int atanh_f258(int x, int n) inline_ref { - int x2 = mulrshiftr256(x, x); ;; x^2 as fixed260 - int One = (1 << 254); - int a = One ~/ n + (1 << 255); ;; a := 2 + 1/n as fixed254 - repeat (n - 1) { - ;; a := 1 + (1 - x^2 / a)(1 + 1/n) as fixed254 - int t = One - muldivr(x2, 1 << 248, a); ;; t := 1 - x^2 / a - a = muldivr(t, n, (int n1 = n - 1)) + One; - n = n1; - } - ;; x / (1 - x^2 / a) = x / (1 - d) = x + x * d / (1 - d) for d = x^2 / a - ;; int d = muldivr(x2, 1 << 255, a - (x2 ~>> 6)); ;; d/(1-d) = x^2/(a-x^2) as fixed261 - ;; return x + (mulrshiftr256(x, d) ~>> 5); - return x + muldivr(x, x2 / 2, a - x2 ~/ 64) ~/ 32; -} - -;; number of terms n should be chosen as for atanh_f258() -;; fixed261 atanh(fixed261 x); -int atanh_f261_inlined(int x, int n) inline { - int x2 = mulrshiftr256(x, x); ;; x^2 as fixed266 - int One = (1 << 254); - int a = One ~/ n + (1 << 255); ;; a := 2 + 1/n as fixed254 - repeat (n - 1) { - ;; a := 1 + (1 - x^2 / a)(1 + 1/n) as fixed254 - int t = One - muldivr(x2, 1 << 242, a); ;; t := 1 - x^2 / a - a = muldivr(t, n, (int n1 = n - 1)) + One; - n = n1; - } - ;; x / (1 - x^2 / a) = x / (1 - d) = x + x * d / (1 - d) for d = x^2 / a - ;; int d = muldivr(x2, 1 << 255, a - (x2 ~>> 12)); ;; d/(1-d) = x^2/(a-x^2) as fixed267 - ;; return x + (mulrshiftr256(x, d) ~>> 11); - return x + muldivr(x, x2, a - x2 ~/ 4096) ~/ 4096; -} - -;; fixed261 atanh(fixed261 x); -int atanh_f261(int x, int n) inline_ref { - return atanh_f261_inlined(x, n); -} - -;; returns (y, s) such that log(x) = y/2^257 + s*log(2) for positive integer x -;; (fixed257, int) log_aux(int x) -(int, int) log_aux_f257(int x) inline_ref { - int s = log2_floor_p1(x); - x <<= 256 - s; - int t = touch(-1 << 256); - if ((x >> 249) <= 90) { - ;; t~touch(); - t >>= 1; - s -= 1; - } - x += t; - int `2x` = 2 * x; - int y = lshift256divr(`2x`, (x >> 1) - t); - ;; y = `2x` - (mulrshiftr256(2x, y) ~>> 2); ;; this line could improve precision on very rare occasions - return (atanh_f258(y, 36), s); -} - -;; computes 33^m for small m -int pow33(int m) inline { - int t = 1; - repeat (m) { t *= 33; } - return t; -} - -;; computes 33^m for small 0<=m<=22 -;; slightly faster than pow33() -int pow33b(int m) inline { - (int mh, int ml) = m /% 5; - int t = 1; - repeat (ml) { t *= 33; } - repeat (mh) { t *= 33 * 33 * 33 * 33 * 33; } - return t; -} - -;; returns (s, q, y) such that log(x) = s*log(2) + q*log(33/32) + y/2^260 for positive integer x -;; (int, int, fixed260) log_auxx_f260(int x); -(int, int, int) log_auxx_f260(int x) inline_ref { - int s = log2_floor_p1(x) - 1; - x <<= 255 - s; ;; rescale to 1 <= x < 2 as fixed255 - int t = touch(2873) << 244; ;; ~ (33/32)^11 ~ sqrt(2) as fixed255 - int x1 = (x - t) >> 1; - int q = muldivr(x1, 65, x1 + t) + 11; ;; crude approximation to round(log(x)/log(33/32)) - ;; t = 1; repeat (q) { t *= 33; } ;; t:=33^q, 0<=q<=22 - t = pow33b(q); - t <<= (51 - q) * 5; ;; t:=(33/32)^q as fixed255, nearest power of 33/32 to x - x -= t; - int y = lshift256divr(x << 4, (x >> 1) + t); ;; y = (x-t)/(x+t) as fixed261 - y = atanh_f261(y, 18); ;; atanh((x-t)/(x+t)) as fixed261, or log(x/t) as fixed260 - return (s, q, y); -} - -;; returns (y, s) such that log(x) = y/2^256 + s*log(2) for positive integer x -;; this function is very precise (error less than 0.6 ulp) and consumes < 7k gas -;; (fixed256, int) log_aux_f256(int x); -(int, int) log_aux_f256(int x) inline_ref { - var (s, q, y) = log_auxx_f260(x); - var (yh, yl) = rshiftr4mod(y); ;; y ~/% 16 , but Tolk does not optimize this to RSHIFTR#MOD - ;; int Log33_32 = 3563114646320977386603103333812068872452913448227778071188132859183498739150; ;; log(33/32) as fixed256 - ;; int Log33_32_l = -3769; ;; log(33/32) = Log33_32 / 2^256 + Log33_32_l / 2^269 - yh += (yl * 512 + q * -3769) ~>> 13; ;; compensation, may be removed if slightly worse accuracy is acceptable - int Log33_32 = 3563114646320977386603103333812068872452913448227778071188132859183498739150; ;; log(33/32) as fixed256 - return (yh + q * Log33_32, s); -} - -;; returns (y, s) such that log2(x) = y/2^256 + s for positive integer x -;; this function is very precise (error less than 0.6 ulp) and consumes < 7k gas -;; (fixed256, int) log2_aux_f256(int x); -(int, int) log2_aux_f256(int x) inline_ref { - var (s, q, y) = log_auxx_f260(x); - y = lshift256divr(y, log2_const_f256()) ~>> 4; ;; y/log(2) as fixed256 - int Log33_32 = 5140487830366106860412008603913034462883915832139695448455767612111363481357; ;; log_2(33/32) as fixed256 - ;; Log33_32/2^256 happens to be a very precise approximation to log_2(33/32), no compensation required - return (y + q * Log33_32, s); -} - -;; functions log_aux_f256() and log2_aux_f256() may be used to implement specific fixed-point instances of log() and log2() - -;; fixed248 log(fixed248 x) -int fixed248::log(int x) inline_ref { - var (y, s) = log_aux_f256(x); - return muldivr(s - 248, log2_const_f256(), 1 << 8) + (y ~>> 8); - ;; return muldivr(s - 248, 80260960185991308862233904206310070533990667611589946606122867505419956976172, 1 << 8) + (y ~>> 8); -} - -;; fixed248 log2(fixed248 x) -int fixed248::log2(int x) inline { - var (y, s) = log2_aux_f256(x); - return ((s - 248) << 248) + (y ~>> 8); -} - -;; computes x^y as exp(y*log(x)), x >= 0 -;; fixed248 pow(fixed248 x, fixed248 y); -int fixed248::pow(int x, int y) inline_ref { - ifnot (y) { - return 1 << 248; ;; x^0 = 1 - } - if (x <= 0) { - int bad = (x | y) < 0; - return 0 >> bad; ;; 0^y = 0 if x=0 and y>=0; "out of range" exception otherwise - } - var (l, s) = log2_aux_f256(x); - s -= 248; ;; log_2(x) = s+l, l is fixed256, 0<=l<1 - ;; compute (s+l)*y = q+ll - var (q1, r1) = mulrshiftr248mod(s, y); ;; muldivmodr(s, y, 1 << 248) - var (q2, r2) = mulrshift256mod(l, y); - r2 >>= 247; - var (q3, r3) = rshiftr248mod(q2); ;; divmodr(q2, 1 << 248); - var (q, ll) = rshiftr248mod(r1 + r3); - ll = 512 * ll + r2; - q += q1 + q3; - ;; now log_2(x^y) = y*log_2(x) = q + ll, ss integer, ll fixed257, -1/2<=ll<1/2 - int sq = q + 248; - if (sq <= 0) { - return - (sq == 0); ;; underflow - } - y = expm1_f257(mulrshiftr256(ll, log2_const_f256())); - return (y ~>> (9 - q)) - (-1 << sq); -} - -{--------------------- INVERSE TRIGONOMETRIC FUNCTIONS -------------------} - -;; number of terms n should be chosen as for atanh_f258() -;; fixed259 atan(fixed259 x); -int atan_f259(int x, int n) inline_ref { - int x2 = mulrshiftr256(x, x); ;; x^2 as fixed262 - int One = (1 << 254); - int a = One ~/ n + (1 << 255); ;; a := 2 + 1/n as fixed254 - repeat (n - 1) { - ;; a := 1 + (1 + x^2 / a)(1 + 1/n) as fixed254 - int t = One + muldivr(x2, 1 << 246, a); ;; t := 1 + x^2 / a - a = muldivr(t, n, (int n1 = n - 1)) + One; - n = n1; - } - ;; x / (1 + x^2 / a) = x / (1 + d) = x - x * d / (1 + d) = x - x * x^2/(a+x^2) for d = x^2 / a - return x - muldivr(x, x2, a + x2 ~/ 256) ~/ 256; -} - -;; number of terms n should be chosen as for atanh_f261() -;; fixed261 atan(fixed261 x); -int atan_f261_inlined(int x, int n) inline { - int x2 = mulrshiftr256(x, x); ;; x^2 as fixed266 - int One = (1 << 254); - int a = One ~/ n + (1 << 255); ;; a := 2 + 1/n as fixed254 - repeat (n - 1) { - ;; a := 1 + (1 + x^2 / a)(1 + 1/n) as fixed254 - int t = One + muldivr(x2, 1 << 242, a); ;; t := 1 + x^2 / a - a = muldivr(t, n, (int n1 = n - 1)) + One; - n = n1; - } - ;; x / (1 + x^2 / a) = x / (1 + d) = x - x * d / (1 + d) = x - x * x^2/(a+x^2) for d = x^2 / a - return x - muldivr(x, x2, a + x2 ~/ 4096) ~/ 4096; -} - -;; fixed261 atan(fixed261 x); -int atan_f261(int x, int n) inline_ref { - return atan_f261_inlined(x, n); -} - -;; computes (q,a,b) such that q is approximately atan(x)/atan(1/32) and a+b*I=(1+I/32)^q as fixed255 -;; then b/a=atan(q*atan(1/32)) exactly, and (a,b) is almost a unit vector pointing in the direction of (1,x) -;; must have |x|<1.1, x is fixed24 -;; (int, fixed255, fixed255) atan_aux_prereduce(fixed24 x); -(int, int, int) atan_aux_prereduce(int x) inline_ref { - int xu = abs(x); - int tc = 7214596; ;; tan(13*theta) as fixed24 where theta=atan(1/32) - int t1 = muldivr(xu - tc, 1 << 88, xu * tc + (1 << 48)); ;; tan(x') as fixed64 where x'=atan(x)-13*theta - ;; t1/(3+t1^2) * 3073/32 = x'/3 * 3072/32 = x' / (96/3072) = x' / theta - int q = muldivr(t1 * 3073, 1 << 59, t1 * t1 + (touch(3) << 128)) + 13; ;; approximately round(atan(x)/theta), 0<=q<=25 - var (pa, pb) = (33226912, 5232641); ;; (32+I)^5 - var (qh, ql) = q /% 5; - var (a, b) = (1 << (5 * (51 - q)), 0); ;; (1/32^q, 0) as fixed255 - repeat (ql) { ;; a+b*I *= 32+I - (a, b) = (sub_rev(touch(b), 32 * a), a + 32 * b); ;; same as (32 * a - b, 32 * b + a), but more efficient - } - repeat (qh) { ;; a+b*I *= (32+I)^5 = pa + pb*I - (a, b) = (a * pa - b * pb, a * pb + b * pa); - } - int xs = sgn(x); - return (xs * q, a, xs * b); -} - -;; compute (q, z) such that atan(x)=q*atan(1/32)+z for -1 <= x < 1 -;; this function is reasonably accurate (error < 7 ulp with ulp = 2^-261), but it consumes >7k gas -;; this is sufficient for most purposes -;; (int, fixed261) atan_aux(fixed256 x) -(int, int) atan_aux_f256(int x) inline_ref { - var (q, a, b) = atan_aux_prereduce(x ~>> 232); ;; convert x to fixed24 - ;; now b/a = tan(q*atan(1/32)) exactly, where q is near atan(x)/atan(1/32); so b/a is near x - ;; compute y = u/v = (a*x-b)/(a+b*x) as fixed261 ; then |y|<0.0167 = 1.07/64 and atan(x)=atan(y)+q*atan(1/32) - var (u, ul) = mulrshiftr256mod(a, x); - u = (ul ~>> 250) + ((u - b) << 6); ;; |u| < 1/32, convert fixed255 -> fixed261 - int v = a + mulrshiftr256(b, x); ;; v is scalar product of (a,b) and (1,x), it is approximately in [1..sqrt(2)] as fixed255 - int y = muldivr(u, 1 << 255, v); ;; y = u/v as fixed261 - int z = atan_f261_inlined(y, 18); ;; z = atan(x)-q*atan(1/32) - return (q, z); -} - -;; compute (q, z) such that atan(x)=q*atan(1/32)+z for -1 <= x < 1 -;; this function is very accurate (error < 2 ulp), but it consumes >7k gas -;; in most cases, faster function atan_aux_f256() should be used -;; (int, fixed261) atan_auxx(fixed256 x) -(int, int) atan_auxx_f256(int x) inline_ref { - var (q, a, b) = atan_aux_prereduce(x ~>> 232); ;; convert x to fixed24 - ;; now b/a = tan(q*atan(1/32)) exactly, where q is near atan(x)/atan(1/32); so b/a is near x - ;; compute y = (a*x-b)/(a+b*x) as fixed261 ; then |y|<0.0167 = 1.07/64 and atan(x)=atan(y)+q*atan(1/32) - ;; use sort of double precision arithmetic for this - var (u, ul) = mulrshiftr256mod(a, x); - ul /= 2; - u -= b; ;; |u| < 1/32 as fixed255 - var (v, vl) = mulrshiftr256mod(b, x); - vl /= 2; - v += a; ;; v is scalar product of (a,b) and (1,x), it is approximately in [1..sqrt(2)] as fixed255 - ;; y = (u + ul*eps) / (v + vl*eps) = u/v + (ul - vl * u/v)/v * eps where eps=1/2^255 - var (y, r) = lshift255divmodr(u, v); ;; y = u/v as fixed255 - int yl = muldivr(ul + r, 1 << 255, v) - muldivr(vl, y, v); ;; y/2^255 + yl/2^510 represent u/v - y = (yl ~>> 249) + (y << 6); ;; convert y to fixed261 - int z = atan_f261_inlined(y, 18); ;; z = atan(x)-q*atan(1/32) - return (q, z); -} - -;; consumes ~ 8k gas -;; fixed255 atan(fixed255 x); -int atan_f255(int x) inline_ref { - int s = (x ~>> 256); - touch(x); - if (s) { - x = lshift256divr(-1 << 255, x); ;; x:=-1/x as fixed256 - } else { - x *= 2; ;; convert to fixed256 - } - var (q, z) = atan_aux_f256(x); - ;; now atan(x) = z + q*atan(1/32) + s*(Pi/2), z is fixed261 - var (Pi_h, Pi_l) = Pi_xconst_f254(); ;; Pi/2 as fixed255 + fixed383 - var (qh, ql) = mulrshiftr6mod (q, Atan1_32_f261()); - return qh + s * Pi_h + (z + ql + muldivr(s, Pi_l, 1 << 122)) ~/ 64; -} - -;; computes atan(x) for -1 <= x < 1 only -;; fixed256 atan_small(fixed256 x); -int atan_f256_small(int x) inline_ref { - var (q, z) = atan_aux_f256(x); - ;; now atan(x) = z + q*atan(1/32), z is fixed261 - var (qh, ql) = mulrshiftr5mod (q, Atan1_32_f261()); - return qh + (z + ql) ~/ 32; -} - -;; fixed255 asin(fixed255 x); -int asin_f255(int x) inline_ref { - int a = fixed255::One - fixed255::sqr(x); ;; a:=1-x^2 - ifnot (a) { - return sgn(x) * Pi_const_f254(); ;; Pi/2 or -Pi/2 - } - int y = fixed255::sqrt(a); ;; sqrt(1-x^2) - int t = - lshift256divr(x, (-1 << 255) - y); ;; t = x/(1+sqrt(1-x^2)) avoiding overflow - return atan_f256_small(t); ;; asin(x)=2*atan(t) -} - -;; fixed254 acos(fixed255 x); -int acos_f255(int x) inline_ref { - int Pi = Pi_const_f254(); - if (x == (-1 << 255)) { - return Pi; ;; acos(-1) = Pi - } - Pi /= 2; - int y = fixed255::sqrt(fixed255::One - fixed255::sqr(x)); ;; sqrt(1-x^2) - int t = lshift256divr(x, (-1 << 255) - y); ;; t = -x/(1+sqrt(1-x^2)) avoiding overflow - return Pi + atan_f256_small(t) ~/ 2; ;; acos(x)=Pi/2 + 2*atan(t) -} - -;; consumes ~ 10k gas -;; fixed248 asin(fixed248 x) -int fixed248::asin(int x) inline { - return asin_f255(x << 7) ~>> 7; -} - -;; consumes ~ 10k gas -;; fixed248 acos(fixed248 x) -int fixed248::acos(int x) inline { - return acos_f255(x << 7) ~>> 6; -} - -;; consumes ~ 7500 gas -;; fixed248 atan(fixed248 x); -int fixed248::atan(int x) inline_ref { - int s = (x ~>> 249); - touch(x); - if (s) { - s = sgn(s); - x = lshift256divr(-1 << 248, x); ;; x:=-1/x as fixed256 - } else { - x <<= 8; ;; convert to fixed256 - } - var (q, z) = atan_aux_f256(x); - ;; now atan(x) = z + q*atan(1/32) + s*(Pi/2), z is fixed261 - return (z ~/ 64 + s * Pi_const_f254() + muldivr(q, Atan1_32_f261(), 64)) ~/ 128; ;; compute in fixed255, then convert -} - -;; fixed248 acot(fixed248 x); -int fixed248::acot(int x) inline_ref { - int s = (x ~>> 249); - touch(x); - if (s) { - x = lshift256divr(-1 << 248, x); ;; x:=-1/x as fixed256 - s = 0; - } else { - x <<= 8; ;; convert to fixed256 - s = sgn(x); - } - var (q, z) = atan_aux_f256(x); - ;; now acot(x) = - z - q*atan(1/32) + s*(Pi/2), z is fixed261 - return (s * Pi_const_f254() - z ~/ 64 - muldivr(q, Atan1_32_f261(), 64)) ~/ 128; ;; compute in fixed255, then convert -} - -{--------------------- PSEUDO-RANDOM NUMBERS -------------------} - -;; random number with standard normal distribution N(0,1) -;; generated by Kinderman--Monahan ratio method modified by J.Leva -;; spends ~ 2k..3k gas on average -;; fixed252 nrand(); -int nrand_f252() inline_ref { - var (x, s, t, A, B, r0) = (nan(), touch(29483) << 236, touch(-3167) << 239, 12845, 16693, 9043); - ;; 4/sqrt(e*Pi) = 1.369 loop iterations on average - do { - var (u, v) = (random() / 16 + 1, muldivr(random() - (1 << 255), 7027, 1 << 16)); ;; fixed252; 7027=ceil(sqrt(8/e)*2^12) - int va = abs(v); - var (u1, v1) = (u - s, va - t); ;; (u - 29483/2^16, abs(v) + 3167/2^13) as fixed252 - ;; Q := u1^2 + v1 * (A*v1 - B*u1) as fixed252 where A=12845/2^16, B=16693/2^16 - int Q = muldivr(u1, u1, 1 << 252) + muldivr(v1, muldivr(v1, A, 1 << 16) - muldivr(u1, B, 1 << 16), 1 << 252); - ;; must have 9043 / 2^15 < Q < 9125 / 2^15, otherwise accept if smaller, reject if larger - int Qd = (Q >> 237) - r0; - if ((Qd < 9125 - 9043) & (va / u < 16)) { - x = muldivr(v, 1 << 252, u); ;; x:=v/u as fixed252; reject immediately if |v/u| >= 16 - if (Qd >= 0) { ;; immediately accept if Qd < 0 - ;; rarely taken branch - 0.012 times per call on average - ;; check condition v^2 < -4*u^2*log(u), or equivalent condition u < exp(-x^2/4) for x=v/u - int xx = mulrshiftr256(x, x) ~/ 4; ;; x^2/4 as fixed248 - int ex = fixed248::exp(- xx) * 16; ;; exp(-x^2/4) as fixed252 - if (u > ex) { - x = nan(); ;; condition false, reject +/// hyperbolic tangent of small x via n+2 terms of Lambert's continued fraction +/// n=17: good for |x| < log(2)/4 = 0.173 +/// fixed258 tanh_f258(fixed258 x, int n) +@pure +@inline_ref +fun tanh_f258(x: int, n: int): int { + var x2: int = muldivr(x, x, 1 << 255); // x^2 as fixed261 + var a: int = (2 * n + 5) << 250; // a=2n+5 as fixed250 + var c = a; + var Two: int = (1 << 251); // 2. as fixed250 + repeat (n) { + a = (c -= Two) + muldivr(x2, 1 << 239, a); // a := 2k+1+x^2/a as fixed250, k=n+1,n,...,2 + } + a = (touch(3) << 254) + muldivr(x2, 1 << 243, a); // a := 3+x^2/a as fixed254 + // y = x/(1+a') = x - x*a'/(1+a') = x - x*x^2/(a+x^2) where a' = x^2/a + return x - (muldivr(x, x2, a + (x2 ~>> 7)) ~>> 7); +} + +/// fixed257 expm1_f257(fixed257 x) +/// computes exp(x)-1 for small x via 19 terms of Lambert's continued fraction for tanh(x/2) +/// good for |x| < log(2)/2 = 0.347 (n=17); consumes ~3500 gas +@pure +@inline_ref +fun expm1_f257(x: int): int { + // (almost) compute tanh(x/2) first; x/2 as fixed258 = x as fixed257 + var x2: int = muldivr(x, x, 1 << 255); // x^2 as fixed261 + var Two: int = (1 << 251); // 2. as fixed250 + var a: int = touch(39) << 250; // a=2n+5 as fixed250 + var c = a; + repeat (17) { + a = (c -= Two) + muldivr(x2, 1 << 239, a); // a := 2k+1+x^2/a as fixed250, k=n+1,n,...,2 + } + a = (touch(3) << 254) + muldivr(x2, 1 << 243, a); // a := 3+x^2/a as fixed254 + // now tanh(x/2) = x/(1+a') where a'=x^2/a ; apply exp(x)-1=2*tanh(x/2)/(1-tanh(x/2)) + var t: int = (x ~>> 4) - a; // t:=x-a as fixed254 + return x - muldivr(x2, t / 2, a + mulrshiftr256(x, t) ~/ 4) ~/ 4; // x - x^2 * (x-a) / (a + x*(x-a)) +} + +/// expm1_f257() may be used to implement specific fixed-point exponentials +/// example: +/// fixed248 exp(fixed248 x) +@pure +@inline_ref +fun fixed248_exp(x: int): int { + var (l2c, l2d) = log2_xconst_f256(); + // divide x by log(2) and convert to fixed257 + // (int q, x) = muldivmodr(x, 256, l2c); // unfortunately, no such built-in + var (q: int, x redef) = lshiftdivmodr(x, l2c, 8); + x = 2 * x - muldivr(q, l2d, 1 << 127); + var y: int = expm1_f257(x); + // result is (1 + y) * (2^q) --> ((1 << 257) + y) >> (9 - q) + return (y ~>> (9 - q)) - (-1 << (248 + q)); + // note that (y ~>> (9 - q)) + (1 << (248 + q)) leads to overflow when q=8 +} + +/// compute 2^x in fixed248 +/// fixed248 exp2(fixed248 x) +@pure +@inline_ref +fun fixed248_exp2(x: int): int { + // (int q, x) = divmodr(x, 1 << 248); // no such built-in + var (q: int, x redef) = rshiftr248mod(x); + x = muldivr(x, log2_const_f256(), 1 << 247); + var y: int = expm1_f257(x); + return (y ~>> (9 - q)) - (-1 << (248 + q)); +} + +/*-------------------- TRIGONOMETRIC FUNCTIONS ----------------------*/ + +/// fixed260 tan(fixed260 x); +/// computes tan(x) for small |x|> 10)) ~>> 9); +} + +/// fixed260 tan(fixed260 x); +@pure +@inline_ref +fun tan_f260(x: int): int { + return tan_f260_inlined(x); +} + +/// fixed258 tan(fixed258 x); +/// computes tan(x) for small |x|> 6)) ~>> 5); +} + +/// fixed258 tan(fixed258 x); +@pure +@inline_ref +fun tan_f258(x: int): int { + return tan_f258_inlined(x); +} + +/// (fixed259, fixed263) sincosm1(fixed259 x) +/// computes (sin(x), 1-cos(x)) for small |x|<2*atan(1/16) +@pure +@inline +fun sincosm1_f259_inlined(x: int): (int, int) { + var t: int = tan_f260_inlined(x); // t=tan(x/2) as fixed260 + var tt: int = mulrshiftr256(t, t); // t^2 as fixed264 + var y: int = tt ~/ 512 + (1 << 255); // 1+t^2 as fixed255 + // 2*t/(1+t^2) as fixed259 and 2*t^2/(1+t^2) as fixed263 + // return (muldivr(t, 1 << 255, y), muldivr(tt, 1 << 255, y)); + return (t - muldivr(t / 2, tt, y) ~/ 256, tt - muldivr(tt / 2, tt, y) ~/ 256); +} + +@pure +@inline_ref +fun sincosm1_f259(x: int): (int, int) { + return sincosm1_f259_inlined(x); +} + +/// computes (sin(x+xe),-cos(x+xe)) for |x| <= Pi/4, xe very small +/// this function is very accurate, error less than 0.7 ulp (consumes ~ 5500 gas) +/// (fixed256, fixed256) sincosn(fixed256 x, fixed259 xe) +@pure +@inline_ref +fun sincosn_f256(x: int, xe: int): (int, int) { + // var (q, x1) = muldivmodr(x, 8, Atan1_8_f259()); // no muldivmodr() builtin + var (q, x1) = lshift2divmodr(abs(x), Atan1_8_f259()); // reduce mod theta where theta=2*atan(1/8) + var (si, co) = sincosm1_f259(x1 * 2 + xe); + var (a, b, c) = (-1, 0, 1); + repeat (q) { + // (a+b*I) *= (8+I)^2 = 63+16*I + (a, b, c) = (63 * a - 16 * b, 16 * a + 63 * b, 65 * c); + } + // now a/c = cos(q*theta), b/c = sin(q*theta) exactly(!) + // compute (a+b*I)*(1-co+si*I)/c + // (b, a) = (lshift256divr(b, c), lshift256divr(a, c)); + var (b redef, br: int) = lshift256divmodr(b, c); br = muldivr(br, 128, c); + var (a redef, ar: int) = lshift256divmodr(a, c); ar = muldivr(ar, 128, c); + return (sgn(x) * (((mulrshiftr256(b, co) - br) ~/ 16 - mulrshiftr256(a, si)) ~/ 8 - b), + a - ((mulrshiftr256(a, co) - ar) ~/ 16 + mulrshiftr256(b, si)) ~/ 8); +} + +/// compute (sin(x),1-cos(x)) in fixed256 for |x| < 16*atan(1/16) = 0.9987 +/// (fixed256, fixed257) sincosm1_f256(fixed256 x); +/// slightly less accurate than sincosn_f256() (error up to 3/2^256), but faster (~ 4k gas) and shorter +@pure +@inline_ref +fun sincosm1_f256(x: int): (int, int) { + var (si, co) = sincosm1_f259_inlined(x); // compute (sin,1-cos)(x/8) in (fixed259,fixed263) + var r: int = 7; + repeat (r / 2) { + // 1-cos(2*x) = 2*sin(x)^2, sin(2*x) = 2*sin(x)*cos(x) + (co, si) = (mulrshiftr256(si, si), si - (mulrshiftr256(si, co) ~>> r)); + r -= 2; + } + return (si, co); +} + +/// compute (p, q) such that p/q = tan(x) for |x|<2*atan(1/2)=1899/2048=0.927 +/// (int, int) tan_aux(fixed256 x); +@pure +@inline_ref +fun tan_aux_f256(x: int): (int, int) { + var t: int = tan_f258_inlined(x); // t=tan(x/4) as fixed258 + // t:=2*t/(1-t^2)=2*(t-t^3/(t^2-1)) + var tt: int = mulrshiftr256(t, t); // t^2 as fixed260 + t = muldivr(t, tt, tt ~/ 16 + (-1 << 256)) ~/ 16 - t; // now t=-tan(x/2) as fixed259 + return (t, mulrshiftr256(t, t) ~/ 4 + (-1 << 256)); // return (2*t, t^2-1) as fixed256 +} + +/// sincosm1_f256() and sincosn_f256() may be used to implement trigonometric functions for different fixed-point types +/// example: +/// (fixed248, fixed248) sincos(fixed248 x); +@pure +@inline_ref +fun fixed248_sincos(x: int): (int, int) { + var (Pic, Pid) = Pi_xconst_f254(); + // (int q, x) = muldivmodr(x, 128, Pic); // no muldivmodr() builtin + var (q: int, x redef) = lshift7divmodr(x, Pic); // reduce mod Pi/2 + x = 2 * x - muldivr(q, Pid, 1 << 127); + var (si: int, co: int) = sincosm1_f256(x); // doesn't make sense to use more accurate sincosn_f256() + co = (1 << 248) - (co ~>> 9); + si = si ~>> 8; + repeat (q & 3) { + (si, co) = (co, -si); + } + return (si, co); +} + +/// fixed248 sin(fixed248 x); +/// inline is better than inline_ref for such simple functions +@pure +@inline +fun fixed248_sin(x: int): int { + var (si: int, _) = fixed248_sincos(x); + return si; +} + +/// fixed248 cos(fixed248 x); +@pure +@inline +fun fixed248_cos(x: int): int { + var (_, co: int) = fixed248_sincos(x); + return co; +} + +/// similarly, tan_aux_f256() may be used to implement tan() and cot() for specific fixed-point formats +/// fixed248 tan(fixed248 x); +/// not very accurate when |tan(x)| is very large (difficult to do better without floating-point numbers) +/// however, the relative accuracy is approximately 2^-247 in all cases, which is good enough for arguments given up to 2^-249 +@pure +@inline_ref +fun fixed248_tan(x: int): int { + var (Pic, Pid) = Pi_xconst_f254(); + // (int q, x) = muldivmodr(x, 128, Pic); // no muldivmodr() builtin + var (q: int, x redef) = lshift7divmodr(x, Pic); // reduce mod Pi/2 + x = 2 * x - muldivr(q, Pid, 1 << 127); + var (a, b) = tan_aux_f256(x); // now a/b = tan(x') + if (q & 1) { + (a, b) = (b, -a); + } + return muldivr(a, 1 << 248, b); // either -b/a or a/b as fixed248 +} + +/// fixed248 cot(fixed248 x); +@pure +@inline_ref +fun fixed248_cot(x: int): int { + var (Pic, Pid) = Pi_xconst_f254(); + var (q: int, x redef) = lshift7divmodr(x, Pic); // reduce mod Pi/2 + x = 2 * x - muldivr(q, Pid, 1 << 127); + var (b, a) = tan_aux_f256(x); // now b/a = tan(x') + if (q & 1) { + (a, b) = (b, -a); + } + return muldivr(a, 1 << 248, b); // either -b/a or a/b as fixed248 +} + +/*---------------- INVERSE HYPERBOLIC TANGENT AND LOGARITHMS ----------------*/ + +/// inverse hyperbolic tangent of small x, evaluated by means of n terms of the continued fraction +/// valid for |x| < 2^-2.5 ~ 0.18 if n=37 (slightly less accurate with n=36) +/// |x| < 1/8 if n=32; |x| < 2^-3.5 if n=28; |x| < 1/16 if n=25 +/// |x| < 2^-4.5 if n=23; |x| < 1/32 if n=21; |x| < 1/64 if n=18 +/// fixed258 atanh(fixed258 x); +@pure +@inline_ref +fun atanh_f258(x: int, n: int): int { + var x2: int = mulrshiftr256(x, x); // x^2 as fixed260 + var One: int = (1 << 254); + var a: int = One ~/ n + (1 << 255); // a := 2 + 1/n as fixed254 + repeat (n - 1) { + // a := 1 + (1 - x^2 / a)(1 + 1/n) as fixed254 + var t: int = One - muldivr(x2, 1 << 248, a); // t := 1 - x^2 / a + var n1: int = n - 1; + a = muldivr(t, n, n1) + One; + n = n1; + } + // x / (1 - x^2 / a) = x / (1 - d) = x + x * d / (1 - d) for d = x^2 / a + // int d = muldivr(x2, 1 << 255, a - (x2 ~>> 6)); // d/(1-d) = x^2/(a-x^2) as fixed261 + // return x + (mulrshiftr256(x, d) ~>> 5); + return x + muldivr(x, x2 / 2, a - x2 ~/ 64) ~/ 32; +} + +/// number of terms n should be chosen as for atanh_f258() +/// fixed261 atanh(fixed261 x); +@pure +@inline +fun atanh_f261_inlined(x: int, n: int): int { + var x2: int = mulrshiftr256(x, x); // x^2 as fixed266 + var One: int = (1 << 254); + var a: int = One ~/ n + (1 << 255); // a := 2 + 1/n as fixed254 + repeat (n - 1) { + // a := 1 + (1 - x^2 / a)(1 + 1/n) as fixed254 + var t: int = One - muldivr(x2, 1 << 242, a); // t := 1 - x^2 / a + var n1: int = n - 1; + a = muldivr(t, n, n1) + One; + n = n1; + } + // x / (1 - x^2 / a) = x / (1 - d) = x + x * d / (1 - d) for d = x^2 / a + // int d = muldivr(x2, 1 << 255, a - (x2 ~>> 12)); // d/(1-d) = x^2/(a-x^2) as fixed267 + // return x + (mulrshiftr256(x, d) ~>> 11); + return x + muldivr(x, x2, a - x2 ~/ 4096) ~/ 4096; +} + +/// fixed261 atanh(fixed261 x); +@pure +@inline_ref +fun atanh_f261(x: int, n: int): int { + return atanh_f261_inlined(x, n); +} + +/// returns (y, s) such that log(x) = y/2^257 + s*log(2) for positive integer x +/// (fixed257, int) log_aux(int x) +@pure +@inline_ref +fun log_aux_f257(x: int): (int, int) { + var s: int = log2_floor_p1(x); + x <<= 256 - s; + var t: int = touch(-1 << 256); + if ((x >> 249) <= 90) { + // t~touch(); + t >>= 1; + s -= 1; + } + x += t; + var `2x`: int = 2 * x; + var y: int = lshift256divr(`2x`, (x >> 1) - t); + // y = `2x` - (mulrshiftr256(2x, y) ~>> 2); // this line could improve precision on very rare occasions + return (atanh_f258(y, 36), s); +} + +/// computes 33^m for small m +@pure +@inline +fun pow33(m: int): int { + var t: int = 1; + repeat (m) { + t *= 33; + } + return t; +} + +/// computes 33^m for small 0<=m<=22 +/// slightly faster than pow33() +@pure +@inline +fun pow33b(m: int): int { + var (mh: int, ml: int) = divmod(m, 5); + var t: int = 1; + repeat (ml) { + t *= 33; + } + repeat (mh) { + t *= 33 * 33 * 33 * 33 * 33; + } + return t; +} + +/// returns (s, q, y) such that log(x) = s*log(2) + q*log(33/32) + y/2^260 for positive integer x +/// (int, int, fixed260) log_auxx_f260(int x); +@pure +@inline_ref +fun log_auxx_f260(x: int): (int, int, int) { + var s: int = log2_floor_p1(x) - 1; + x <<= 255 - s; // rescale to 1 <= x < 2 as fixed255 + var t: int = touch(2873) << 244; // ~ (33/32)^11 ~ sqrt(2) as fixed255 + var x1: int = (x - t) >> 1; + var q: int = muldivr(x1, 65, x1 + t) + 11; // crude approximation to round(log(x)/log(33/32)) + // t = 1; repeat (q) { t *= 33; } // t:=33^q, 0<=q<=22 + t = pow33b(q); + t <<= (51 - q) * 5; // t:=(33/32)^q as fixed255, nearest power of 33/32 to x + x -= t; + var y: int = lshift256divr(x << 4, (x >> 1) + t); // y = (x-t)/(x+t) as fixed261 + y = atanh_f261(y, 18); // atanh((x-t)/(x+t)) as fixed261, or log(x/t) as fixed260 + return (s, q, y); +} + +/// returns (y, s) such that log(x) = y/2^256 + s*log(2) for positive integer x +/// this function is very precise (error less than 0.6 ulp) and consumes < 7k gas +/// may be used to implement specific fixed-point instances of log() and log2() +/// (fixed256, int) log_aux_f256(int x); +@pure +@inline_ref +fun log_aux_f256(x: int): (int, int) { + var (s, q, y) = log_auxx_f260(x); + var (yh, yl) = rshiftr4mod(y); // y ~/% 16 , but Tolk does not optimize this to RSHIFTR#MOD + // int Log33_32 = 3563114646320977386603103333812068872452913448227778071188132859183498739150; // log(33/32) as fixed256 + // int Log33_32_l = -3769; // log(33/32) = Log33_32 / 2^256 + Log33_32_l / 2^269 + yh += (yl * 512 + q * -3769) ~>> 13; // compensation, may be removed if slightly worse accuracy is acceptable + var Log33_32: int = 3563114646320977386603103333812068872452913448227778071188132859183498739150; // log(33/32) as fixed256 + return (yh + q * Log33_32, s); +} + +/// returns (y, s) such that log2(x) = y/2^256 + s for positive integer x +/// this function is very precise (error less than 0.6 ulp) and consumes < 7k gas +/// may be used to implement specific fixed-point instances of log() and log2() +/// (fixed256, int) log2_aux_f256(int x); +@pure +@inline_ref +fun log2_aux_f256(x: int): (int, int) { + var (s, q, y) = log_auxx_f260(x); + y = lshift256divr(y, log2_const_f256()) ~>> 4; // y/log(2) as fixed256 + var Log33_32: int = 5140487830366106860412008603913034462883915832139695448455767612111363481357; // log_2(33/32) as fixed256 + // Log33_32/2^256 happens to be a very precise approximation to log_2(33/32), no compensation required + return (y + q * Log33_32, s); +} + + +/// fixed248 log(fixed248 x) +@pure +@inline_ref +fun fixed248_log(x: int): int { + var (y, s) = log_aux_f256(x); + return muldivr(s - 248, log2_const_f256(), 1 << 8) + (y ~>> 8); + // return muldivr(s - 248, 80260960185991308862233904206310070533990667611589946606122867505419956976172, 1 << 8) + (y ~>> 8); +} + +/// fixed248 log2(fixed248 x) +@pure +@inline +fun fixed248_log2(x: int): int { + var (y, s) = log2_aux_f256(x); + return ((s - 248) << 248) + (y ~>> 8); +} + +/// computes x^y as exp(y*log(x)), x >= 0 +/// fixed248 pow(fixed248 x, fixed248 y); +@pure +@inline_ref +fun fixed248_pow(x: int, y: int): int { + if (!y) { + return 1 << 248; // x^0 = 1 + } + if (x <= 0) { + var bad: int = (x | y) < 0; + return 0 >> bad; // 0^y = 0 if x=0 and y>=0; "out of range" exception otherwise + } + var (l, s) = log2_aux_f256(x); + s -= 248; // log_2(x) = s+l, l is fixed256, 0<=l<1 + // compute (s+l)*y = q+ll + var (q1, r1) = mulrshiftr248mod(s, y); // muldivmodr(s, y, 1 << 248) + var (q2, r2) = mulrshift256mod(l, y); + r2 >>= 247; + var (q3, r3) = rshiftr248mod(q2); // divmodr(q2, 1 << 248); + var (q, ll) = rshiftr248mod(r1 + r3); + ll = 512 * ll + r2; + q += q1 + q3; + // now log_2(x^y) = y*log_2(x) = q + ll, ss integer, ll fixed257, -1/2<=ll<1/2 + var sq: int = q + 248; + if (sq <= 0) { + return -(sq == 0); // underflow + } + y = expm1_f257(mulrshiftr256(ll, log2_const_f256())); + return (y ~>> (9 - q)) - (-1 << sq); +} + +/*-------------------- INVERSE TRIGONOMETRIC FUNCTIONS ------------------*/ + +/// number of terms n should be chosen as for atanh_f258() +/// fixed259 atan(fixed259 x); +@pure +@inline_ref +fun atan_f259(x: int, n: int): int { + var x2: int = mulrshiftr256(x, x); // x^2 as fixed262 + var One: int = (1 << 254); + var a: int = One ~/ n + (1 << 255); // a := 2 + 1/n as fixed254 + repeat (n - 1) { + // a := 1 + (1 + x^2 / a)(1 + 1/n) as fixed254 + var t: int = One + muldivr(x2, 1 << 246, a); // t := 1 + x^2 / a + var n1: int = n - 1; + a = muldivr(t, n, n1) + One; + n = n1; + } + // x / (1 + x^2 / a) = x / (1 + d) = x - x * d / (1 + d) = x - x * x^2/(a+x^2) for d = x^2 / a + return x - muldivr(x, x2, a + x2 ~/ 256) ~/ 256; +} + +/// number of terms n should be chosen as for atanh_f261() +/// fixed261 atan(fixed261 x); +@pure +@inline +fun atan_f261_inlined(x: int, n: int): int { + var x2: int = mulrshiftr256(x, x); // x^2 as fixed266 + var One: int = (1 << 254); + var a: int = One ~/ n + (1 << 255); // a := 2 + 1/n as fixed254 + repeat (n - 1) { + // a := 1 + (1 + x^2 / a)(1 + 1/n) as fixed254 + var t: int = One + muldivr(x2, 1 << 242, a); // t := 1 + x^2 / a + var n1: int = n - 1; + a = muldivr(t, n, n1) + One; + n = n1; + } + // x / (1 + x^2 / a) = x / (1 + d) = x - x * d / (1 + d) = x - x * x^2/(a+x^2) for d = x^2 / a + return x - muldivr(x, x2, a + x2 ~/ 4096) ~/ 4096; +} + +/// fixed261 atan(fixed261 x); +@pure +@inline_ref +fun atan_f261(x: int, n: int): int { + return atan_f261_inlined(x, n); +} + +/// computes (q,a,b) such that q is approximately atan(x)/atan(1/32) and a+b*I=(1+I/32)^q as fixed255 +/// then b/a=atan(q*atan(1/32)) exactly, and (a,b) is almost a unit vector pointing in the direction of (1,x) +/// must have |x|<1.1, x is fixed24 +/// (int, fixed255, fixed255) atan_aux_prereduce(fixed24 x); +@pure +@inline_ref +fun atan_aux_prereduce(x: int): (int, int, int) { + var xu: int = abs(x); + var tc: int = 7214596; // tan(13*theta) as fixed24 where theta=atan(1/32) + var t1: int = muldivr(xu - tc, 1 << 88, xu * tc + (1 << 48)); // tan(x') as fixed64 where x'=atan(x)-13*theta + // t1/(3+t1^2) * 3073/32 = x'/3 * 3072/32 = x' / (96/3072) = x' / theta + var q: int = muldivr(t1 * 3073, 1 << 59, t1 * t1 + (touch(3) << 128)) + 13; // approximately round(atan(x)/theta), 0<=q<=25 + var (pa, pb) = (33226912, 5232641); // (32+I)^5 + var (qh, ql) = divmod(q, 5); + var (a, b) = (1 << (5 * (51 - q)), 0); // (1/32^q, 0) as fixed255 + repeat (ql) { + // a+b*I *= 32+I + (a, b) = (sub_rev(touch(b), 32 * a), a + 32 * b); // same as (32 * a - b, 32 * b + a), but more efficient + } + repeat (qh) { + // a+b*I *= (32+I)^5 = pa + pb*I + (a, b) = (a * pa - b * pb, a * pb + b * pa); + } + var xs: int = sgn(x); + return (xs * q, a, xs * b); +} + +/// compute (q, z) such that atan(x)=q*atan(1/32)+z for -1 <= x < 1 +/// this function is reasonably accurate (error < 7 ulp with ulp = 2^-261), but it consumes >7k gas +/// this is sufficient for most purposes +/// (int, fixed261) atan_aux(fixed256 x) +@pure +@inline_ref +fun atan_aux_f256(x: int): (int, int) { + var (q, a, b) = atan_aux_prereduce(x ~>> 232); // convert x to fixed24 + // now b/a = tan(q*atan(1/32)) exactly, where q is near atan(x)/atan(1/32); so b/a is near x + // compute y = u/v = (a*x-b)/(a+b*x) as fixed261 ; then |y|<0.0167 = 1.07/64 and atan(x)=atan(y)+q*atan(1/32) + var (u, ul) = mulrshiftr256mod(a, x); + u = (ul ~>> 250) + ((u - b) << 6); // |u| < 1/32, convert fixed255 -> fixed261 + var v: int = a + mulrshiftr256(b, x); // v is scalar product of (a,b) and (1,x), it is approximately in [1..sqrt(2)] as fixed255 + var y: int = muldivr(u, 1 << 255, v); // y = u/v as fixed261 + var z: int = atan_f261_inlined(y, 18); // z = atan(x)-q*atan(1/32) + return (q, z); +} + +/// compute (q, z) such that atan(x)=q*atan(1/32)+z for -1 <= x < 1 +/// this function is very accurate (error < 2 ulp), but it consumes >7k gas +/// in most cases, faster function atan_aux_f256() should be used +/// (int, fixed261) atan_auxx(fixed256 x) +@pure +@inline_ref +fun atan_auxx_f256(x: int): (int, int) { + var (q, a, b) = atan_aux_prereduce(x ~>> 232); // convert x to fixed24 + // now b/a = tan(q*atan(1/32)) exactly, where q is near atan(x)/atan(1/32); so b/a is near x + // compute y = (a*x-b)/(a+b*x) as fixed261 ; then |y|<0.0167 = 1.07/64 and atan(x)=atan(y)+q*atan(1/32) + // use sort of double precision arithmetic for this + var (u, ul) = mulrshiftr256mod(a, x); + ul /= 2; + u -= b; // |u| < 1/32 as fixed255 + var (v, vl) = mulrshiftr256mod(b, x); + vl /= 2; + v += a; // v is scalar product of (a,b) and (1,x), it is approximately in [1..sqrt(2)] as fixed255 + // y = (u + ul*eps) / (v + vl*eps) = u/v + (ul - vl * u/v)/v * eps where eps=1/2^255 + var (y, r) = lshift255divmodr(u, v); // y = u/v as fixed255 + var yl: int = muldivr(ul + r, 1 << 255, v) - muldivr(vl, y, v); // y/2^255 + yl/2^510 represent u/v + y = (yl ~>> 249) + (y << 6); // convert y to fixed261 + var z: int = atan_f261_inlined(y, 18); // z = atan(x)-q*atan(1/32) + return (q, z); +} + +/// consumes ~ 8k gas +/// fixed255 atan(fixed255 x); +@pure +@inline_ref +fun atan_f255(x: int): int { + var s: int = (x ~>> 256); + touch(x); + if (s) { + x = lshift256divr(-1 << 255, x); // x:=-1/x as fixed256 + } else { + x *= 2; // convert to fixed256 + } + var (q, z) = atan_aux_f256(x); + // now atan(x) = z + q*atan(1/32) + s*(Pi/2), z is fixed261 + var (Pi_h, Pi_l) = Pi_xconst_f254(); // Pi/2 as fixed255 + fixed383 + var (qh, ql) = mulrshiftr6mod(q, Atan1_32_f261()); + return qh + s * Pi_h + (z + ql + muldivr(s, Pi_l, 1 << 122)) ~/ 64; +} + +/// computes atan(x) for -1 <= x < 1 only +/// fixed256 atan_small(fixed256 x); +@pure +@inline_ref +fun atan_f256_small(x: int): int { + var (q, z) = atan_aux_f256(x); + // now atan(x) = z + q*atan(1/32), z is fixed261 + var (qh, ql) = mulrshiftr5mod(q, Atan1_32_f261()); + return qh + (z + ql) ~/ 32; +} + +/// fixed255 asin(fixed255 x); +@pure +@inline_ref +fun asin_f255(x: int): int { + var a: int = fixed255_One - fixed255_sqr(x); // a:=1-x^2 + if (!a) { + return sgn(x) * Pi_const_f254(); // Pi/2 or -Pi/2 + } + var y: int = fixed255_sqrt(a); // sqrt(1-x^2) + var t: int = -lshift256divr(x, (-1 << 255) - y); // t = x/(1+sqrt(1-x^2)) avoiding overflow + return atan_f256_small(t); // asin(x)=2*atan(t) +} + +/// fixed254 acos(fixed255 x); +@pure +@inline_ref +fun acos_f255(x: int): int { + var Pi: int = Pi_const_f254(); + if (x == (-1 << 255)) { + return Pi; // acos(-1) = Pi + } + Pi /= 2; + var y: int = fixed255_sqrt(fixed255_One - fixed255_sqr(x)); // sqrt(1-x^2) + var t: int = lshift256divr(x, (-1 << 255) - y); // t = -x/(1+sqrt(1-x^2)) avoiding overflow + return Pi + atan_f256_small(t) ~/ 2; // acos(x)=Pi/2 + 2*atan(t) +} + +/// consumes ~ 10k gas +/// fixed248 asin(fixed248 x) +@pure +@inline +fun fixed248_asin(x: int): int { + return asin_f255(x << 7) ~>> 7; +} + +/// consumes ~ 10k gas +/// fixed248 acos(fixed248 x) +@pure +@inline +fun fixed248_acos(x: int): int { + return acos_f255(x << 7) ~>> 6; +} + +/// consumes ~ 7500 gas +/// fixed248 atan(fixed248 x); +@pure +@inline_ref +fun fixed248_atan(x: int): int { + var s: int = (x ~>> 249); + touch(x); + if (s) { + s = sgn(s); + x = lshift256divr(-1 << 248, x); // x:=-1/x as fixed256 + } else { + x <<= 8; // convert to fixed256 + } + var (q, z) = atan_aux_f256(x); + // now atan(x) = z + q*atan(1/32) + s*(Pi/2), z is fixed261 + return (z ~/ 64 + s * Pi_const_f254() + muldivr(q, Atan1_32_f261(), 64)) ~/ 128; // compute in fixed255, then convert +} + +/// fixed248 acot(fixed248 x); +@pure +@inline_ref +fun fixed248_acot(x: int): int { + var s: int = (x ~>> 249); + touch(x); + if (s) { + x = lshift256divr(-1 << 248, x); // x:=-1/x as fixed256 + s = 0; + } else { + x <<= 8; // convert to fixed256 + s = sgn(x); + } + var (q, z) = atan_aux_f256(x); + // now acot(x) = - z - q*atan(1/32) + s*(Pi/2), z is fixed261 + return (s * Pi_const_f254() - z ~/ 64 - muldivr(q, Atan1_32_f261(), 64)) ~/ 128; // compute in fixed255, then convert +} + +/*-------------------- PSEUDO-RANDOM NUMBERS ------------------*/ + +/// random number with standard normal distribution N(0,1) +/// generated by Kinderman--Monahan ratio method modified by J.Leva +/// spends ~ 2k..3k gas on average +/// fixed252 nrand(); +@inline_ref +fun nrand_f252(): int { + var (x, s, t, A, B, r0) = (nan(), touch(29483) << 236, touch(-3167) << 239, 12845, 16693, 9043); + // 4/sqrt(e*Pi) = 1.369 loop iterations on average + do { + var (u, v) = (random() / 16 + 1, muldivr(random() - (1 << 255), 7027, 1 << 16)); // fixed252; 7027=ceil(sqrt(8/e)*2^12) + var va: int = abs(v); + var (u1, v1) = (u - s, va - t); // (u - 29483/2^16, abs(v) + 3167/2^13) as fixed252 + // Q := u1^2 + v1 * (A*v1 - B*u1) as fixed252 where A=12845/2^16, B=16693/2^16 + var Q: int = muldivr(u1, u1, 1 << 252) + muldivr(v1, muldivr(v1, A, 1 << 16) - muldivr(u1, B, 1 << 16), 1 << 252); + // must have 9043 / 2^15 < Q < 9125 / 2^15, otherwise accept if smaller, reject if larger + var Qd: int = (Q >> 237) - r0; + if ((Qd < 9125 - 9043) & (va / u < 16)) { + x = muldivr(v, 1 << 252, u); // x:=v/u as fixed252; reject immediately if |v/u| >= 16 + if (Qd >= 0) { + // immediately accept if Qd < 0 + // rarely taken branch - 0.012 times per call on average + // check condition v^2 < -4*u^2*log(u), or equivalent condition u < exp(-x^2/4) for x=v/u + var xx: int = mulrshiftr256(x, x) ~/ 4; // x^2/4 as fixed248 + var ex: int = fixed248_exp(-xx) * 16; // exp(-x^2/4) as fixed252 + if (u > ex) { + x = nan(); // condition false, reject + } + } } - } + } while (!(~ is_nan(x))); + return x; +} + +/// generates a random number approximately distributed according to the standard normal distribution +/// much faster than nrand_f252(), should be suitable for most purposes when only several random numbers are needed +/// fixed252 nrand_fast(); +@inline_ref +fun nrand_fast_f252(): int { + var t: int = touch(-3) << 253; // -6. as fixed252 + repeat (12) { + t += random() / 16; // add together 12 uniformly random numbers } - } until (~ is_nan(x)); - return x; -} - -;; generates a random number approximately distributed according to the standard normal distribution -;; much faster than nrand_f252(), should be suitable for most purposes when only several random numbers are needed -;; fixed252 nrand_fast(); -int nrand_fast_f252() inline_ref { - int t = touch(-3) << 253; ;; -6. as fixed252 - repeat (12) { - t += random() / 16; ;; add together 12 uniformly random numbers - } - return t; + return t; } -;; random number uniformly distributed in [0..1) -;; fixed248 random(); -int fixed248::random() inline { - return random() >> 8; +/// random number uniformly distributed in [0..1) +/// fixed248 random(); +@inline +fun fixed248_random(): int { + return random() >> 8; } -;; random number with standard normal distribution -;; fixed248 nrand(); -int fixed248::nrand() inline { - return nrand_f252() ~>> 4; +/// random number with standard normal distribution +/// fixed248 nrand(); +@inline +fun fixed248_nrand(): int { + return nrand_f252() ~>> 4; } -;; generates a random number approximately distributed according to the standard normal distribution -;; fixed248 nrand_fast(); -int fixed248::nrand_fast() inline { - return nrand_fast_f252() ~>> 4; +/// generates a random number approximately distributed according to the standard normal distribution +/// fixed248 nrand_fast(); +@inline +fun fixed248_nrand_fast(): int { + return nrand_fast_f252() ~>> 4; } diff --git a/crypto/smartcont/stdlib.tolk b/crypto/smartcont/stdlib.tolk index b3dfbee06..10c3b36ad 100644 --- a/crypto/smartcont/stdlib.tolk +++ b/crypto/smartcont/stdlib.tolk @@ -1,7 +1,7 @@ // Standard library for Tolk // (initially copied from stdlib.fc) // -#pragma version >=0.5; +tolk 0.6 /* This file is part of TON Tolk Standard Library. @@ -15,7 +15,6 @@ but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. - */ /* @@ -30,139 +29,205 @@ # Lisp-style lists Lists can be represented as nested 2-elements tuples. - Empty list is conventionally represented as TVM `null` value (it can be obtained by calling [null()]). + Empty list is conventionally represented as TVM `null` value. For example, tuple `(1, (2, (3, null)))` represents list `[1, 2, 3]`. Elements of a list can be of different types. */ /// Adds an element to the beginning of lisp-style list. -forall X -> tuple cons(X head, tuple tail) pure asm "CONS"; +@pure +fun cons(head: X, tail: tuple): tuple + asm "CONS"; /// Extracts the head and the tail of lisp-style list. -forall X -> (X, tuple) uncons(tuple list) pure asm "UNCONS"; +@pure +fun uncons(list: tuple): (X, tuple) + asm "UNCONS"; /// Extracts the tail and the head of lisp-style list. -forall X -> (tuple, X) list_next(tuple list) pure asm( -> 1 0) "UNCONS"; +@pure +fun list_next(list: tuple): (tuple, X) + asm( -> 1 0) "UNCONS"; /// Returns the head of lisp-style list. -forall X -> X car(tuple list) pure asm "CAR"; +@pure +fun car(list: tuple): X + asm "CAR"; /// Returns the tail of lisp-style list. -tuple cdr(tuple list) pure asm "CDR"; +@pure +fun cdr(list: tuple): tuple + asm "CDR"; /// Creates tuple with zero elements. -tuple empty_tuple() pure asm "NIL"; +@pure +fun empty_tuple(): tuple + asm "NIL"; /// Appends a value `x` to a `Tuple t = (x1, ..., xn)`, but only if the resulting `Tuple t' = (x1, ..., xn, x)` /// is of length at most 255. Otherwise throws a type check exception. -forall X -> tuple tpush(tuple t, X value) pure asm "TPUSH"; -forall X -> (tuple, ()) ~tpush(tuple t, X value) pure asm "TPUSH"; +@pure +fun tpush(t: tuple, value: X): tuple + asm "TPUSH"; + +@pure +fun ~tpush(t: tuple, value: X): (tuple, ()) + asm "TPUSH"; /// Creates a tuple of length one with given argument as element. -forall X -> [X] single(X x) pure asm "SINGLE"; +@pure +fun single(x: X): [X] + asm "SINGLE"; /// Unpacks a tuple of length one -forall X -> X unsingle([X] t) pure asm "UNSINGLE"; +@pure +fun unsingle(t: [X]): X + asm "UNSINGLE"; /// Creates a tuple of length two with given arguments as elements. -forall X, Y -> [X, Y] pair(X x, Y y) pure asm "PAIR"; +@pure +fun pair(x: X, y: Y): [X, Y] + asm "PAIR"; /// Unpacks a tuple of length two -forall X, Y -> (X, Y) unpair([X, Y] t) pure asm "UNPAIR"; +@pure +fun unpair(t: [X, Y]): (X, Y) + asm "UNPAIR"; /// Creates a tuple of length three with given arguments as elements. -forall X, Y, Z -> [X, Y, Z] triple(X x, Y y, Z z) pure asm "TRIPLE"; +@pure +fun triple(x: X, y: Y, z: Z): [X, Y, Z] + asm "TRIPLE"; /// Unpacks a tuple of length three -forall X, Y, Z -> (X, Y, Z) untriple([X, Y, Z] t) pure asm "UNTRIPLE"; +@pure +fun untriple(t: [X, Y, Z]): (X, Y, Z) + asm "UNTRIPLE"; /// Creates a tuple of length four with given arguments as elements. -forall X, Y, Z, W -> [X, Y, Z, W] tuple4(X x, Y y, Z z, W w) pure asm "4 TUPLE"; +@pure +fun tuple4(x: X, y: Y, z: Z, w: W): [X, Y, Z, W] + asm "4 TUPLE"; /// Unpacks a tuple of length four -forall X, Y, Z, W -> (X, Y, Z, W) untuple4([X, Y, Z, W] t) pure asm "4 UNTUPLE"; +@pure +fun untuple4(t: [X, Y, Z, W]): (X, Y, Z, W) + asm "4 UNTUPLE"; /// Returns the first element of a tuple (with unknown element types). -forall X -> X first(tuple t) pure asm "FIRST"; +@pure +fun first(t: tuple): X + asm "FIRST"; /// Returns the second element of a tuple (with unknown element types). -forall X -> X second(tuple t) pure asm "SECOND"; +@pure +fun second(t: tuple): X + asm "SECOND"; /// Returns the third element of a tuple (with unknown element types). -forall X -> X third(tuple t) pure asm "THIRD"; +@pure +fun third(t: tuple): X + asm "THIRD"; /// Returns the fourth element of a tuple (with unknown element types). -forall X -> X fourth(tuple t) pure asm "3 INDEX"; +@pure +fun fourth(t: tuple): X + asm "3 INDEX"; /// Returns the [`index`]-th element of tuple [`t`]. -forall X -> X at(tuple t, int index) pure builtin; +@pure +fun at(t: tuple, index: int): X + builtin; /// Returns the first element of a pair tuple. -forall X, Y -> X pair_first([X, Y] p) pure asm "FIRST"; +@pure +fun pair_first(p: [X, Y]): X + asm "FIRST"; /// Returns the second element of a pair tuple. -forall X, Y -> Y pair_second([X, Y] p) pure asm "SECOND"; +@pure +fun pair_second(p: [X, Y]): Y + asm "SECOND"; /// Returns the first element of a triple tuple. -forall X, Y, Z -> X triple_first([X, Y, Z] p) pure asm "FIRST"; +@pure +fun triple_first(p: [X, Y, Z]): X + asm "FIRST"; /// Returns the second element of a triple tuple. -forall X, Y, Z -> Y triple_second([X, Y, Z] p) pure asm "SECOND"; +@pure +fun triple_second(p: [X, Y, Z]): Y + asm "SECOND"; /// Returns the third element of a triple tuple. -forall X, Y, Z -> Z triple_third([X, Y, Z] p) pure asm "THIRD"; - +@pure +fun triple_third(p: [X, Y, Z]): Z + asm "THIRD"; -/// Push null element (casted to given type) -/// By the TVM type `Null` Tolk represents absence of a value of some atomic type. -/// So `null` can actually have any atomic type. -forall X -> X null() pure asm "PUSHNULL"; - -/// Checks whether the argument is null. -forall X -> int null?(X x) pure builtin; /// Moves a variable [x] to the top of the stack. -forall X -> X touch(X x) pure builtin; +@pure +fun touch(x: X): X + builtin; /// Moves a variable [x] to the top of the stack. -forall X -> (X, ()) ~touch(X x) pure builtin; +@pure +fun ~touch(x: X): (X, ()) + builtin; /// Mark a variable as used, such that the code which produced it won't be deleted even if it is not impure. -forall X -> (X, ()) ~impure_touch(X x) asm "NOP"; +fun ~impure_touch(x: X): (X, ()) + asm "NOP"; /// Returns the current Unix time as an Integer -int now() pure asm "NOW"; +@pure +fun now(): int + asm "NOW"; /// Returns the internal address of the current smart contract as a Slice with a `MsgAddressInt`. /// If necessary, it can be parsed further using primitives such as [parse_std_addr]. -slice my_address() pure asm "MYADDR"; +@pure +fun my_address(): slice + asm "MYADDR"; /// Returns the balance of the smart contract as a tuple consisting of an int /// (balance in nanotoncoins) and a `cell` /// (a dictionary with 32-bit keys representing the balance of "extra currencies") /// at the start of Computation Phase. /// Note that RAW primitives such as [send_raw_message] do not update this field. -[int, cell] get_balance() pure asm "BALANCE"; +@pure +fun get_balance(): [int, cell] + asm "BALANCE"; /// Returns the logical time of the current transaction. -int cur_lt() pure asm "LTIME"; +@pure +fun cur_lt(): int + asm "LTIME"; /// Returns the starting logical time of the current block. -int block_lt() pure asm "BLOCKLT"; +@pure +fun block_lt(): int + asm "BLOCKLT"; /// Computes the representation hash of a `cell` [c] and returns it as a 256-bit unsigned integer `x`. /// Useful for signing and checking signatures of arbitrary entities represented by a tree of cells. -int cell_hash(cell c) pure asm "HASHCU"; +@pure +fun cell_hash(c: cell): int + asm "HASHCU"; /// Computes the hash of a `slice s` and returns it as a 256-bit unsigned integer `x`. /// The result is the same as if an ordinary cell containing only data and references from `s` had been created /// and its hash computed by [cell_hash]. -int slice_hash(slice s) pure asm "HASHSU"; +@pure +fun slice_hash(s: slice): int + asm "HASHSU"; /// Computes sha256 of the data bits of `slice` [s]. If the bit length of `s` is not divisible by eight, /// throws a cell underflow exception. The hash value is returned as a 256-bit unsigned integer `x`. -int string_hash(slice s) pure asm "SHA256U"; +@pure +fun string_hash(s: slice): int + asm "SHA256U"; /*** # Signature checks @@ -175,14 +240,18 @@ int string_hash(slice s) pure asm "SHA256U"; /// Note that `CHKSIGNU` creates a 256-bit slice with the hash and calls `CHKSIGNS`. /// That is, if [hash] is computed as the hash of some data, these data are hashed twice, /// the second hashing occurring inside `CHKSIGNS`. -int check_signature(int hash, slice signature, int public_key) pure asm "CHKSIGNU"; +@pure +fun check_signature(hash: int, signature: slice, public_key: int): int + asm "CHKSIGNU"; /// Checks whether [signature] is a valid Ed25519-signature of the data portion of `slice data` using `public_key`, /// similarly to [check_signature]. /// If the bit length of [data] is not divisible by eight, throws a cell underflow exception. /// The verification of Ed25519 signatures is the standard one, /// with sha256 used to reduce [data] to the 256-bit number that is actually signed. -int check_data_signature(slice data, slice signature, int public_key) pure asm "CHKSIGNS"; +@pure +fun check_data_signature(data: slice, signature: slice, public_key: int): int + asm "CHKSIGNS"; /*** # Computation of boc size @@ -190,10 +259,12 @@ int check_data_signature(slice data, slice signature, int public_key) pure asm " */ /// A non-quiet version of [compute_data_size?] that throws a cell overflow exception (`8`) on failure. -(int, int, int) compute_data_size(cell c, int max_cells) asm "CDATASIZE"; +fun compute_data_size(c: cell, max_cells: int): (int, int, int) + asm "CDATASIZE"; /// A non-quiet version of [slice_compute_data_size?] that throws a cell overflow exception (`8`) on failure. -(int, int, int) slice_compute_data_size(slice s, int max_cells) asm "SDATASIZE"; +fun slice_compute_data_size(s: slice, max_cells: int): (int, int, int) + asm "SDATASIZE"; /// Returns `(x, y, z, -1)` or `(null, null, null, 0)`. /// Recursively computes the count of distinct cells `x`, data bits `y`, and cell references `z` @@ -204,33 +275,16 @@ int check_data_signature(slice data, slice signature, int public_key) pure asm " /// The total count of visited cells `x` cannot exceed non-negative [max_cells]; /// otherwise the computation is aborted before visiting the `(max_cells + 1)`-st cell and /// a zero flag is returned to indicate failure. If [c] is `null`, returns `x = y = z = 0`. -(int, int, int, int) compute_data_size?(cell c, int max_cells) pure asm "CDATASIZEQ NULLSWAPIFNOT2 NULLSWAPIFNOT"; +@pure +fun compute_data_size?(c: cell, max_cells: int): (int, int, int, int) + asm "CDATASIZEQ NULLSWAPIFNOT2 NULLSWAPIFNOT"; /// Similar to [compute_data_size?], but accepting a `slice` [s] instead of a `cell`. /// The returned value of `x` does not take into account the cell that contains the `slice` [s] itself; /// however, the data bits and the cell references of [s] are accounted for in `y` and `z`. -(int, int, int, int) slice_compute_data_size?(slice s, int max_cells) pure asm "SDATASIZEQ NULLSWAPIFNOT2 NULLSWAPIFNOT"; - -/// Throws exception [`excno`] with parameter zero. -/// In other words, it transfers control to the continuation in `c2`, -/// pushing `0` and [`excno`] into it's stack, and discarding the old stack altogether. -() throw(int excno) builtin; - -/// Throws exception [`excno`] with parameter zero only if [`cond`] != `0`. -() throw_if(int excno, int cond) builtin; - -/// Throws exception [`excno`] with parameter zero only if [`cond`] == `0`. -() throw_unless(int excno, int cond) builtin; - -/// Throws exception [`excno`] with parameter [`x`], -/// by copying [`x`] and [`excno`] into the stack of `c2` and transferring control to `c2`. -forall X -> () throw_arg(X x, int excno) builtin; - -/// Throws exception [`excno`] with parameter [`x`] only if [`cond`] != `0`. -forall X -> () throw_arg_if(X x, int excno, int cond) builtin; - -/// Throws exception [`excno`] with parameter [`x`] only if [`cond`] == `0`. -forall X -> () throw_arg_unless(X x, int excno, int cond) builtin; +@pure +fun slice_compute_data_size?(s: slice, max_cells: int): (int, int, int, int) + asm "SDATASIZEQ NULLSWAPIFNOT2 NULLSWAPIFNOT"; /*** # Debug primitives @@ -238,39 +292,50 @@ forall X -> () throw_arg_unless(X x, int excno, int cond) builtin; */ /// Dump a variable [x] to the debug log. -forall X -> (X, ()) ~dump(X x) builtin; +fun ~dump(x: X): (X, ()) + builtin; /// Dump a string [x] to the debug log. -forall X -> (X, ()) ~strdump(X x) builtin; +fun ~strdump(x: X): (X, ()) + builtin; /// Dumps the stack (at most the top 255 values) and shows the total stack depth. -() dump_stack() asm "DUMPSTK"; +fun dump_stack(): void + asm "DUMPSTK"; /*** # Persistent storage save and load */ /// Returns the persistent contract storage cell. It can be parsed or modified with slice and builder primitives later. -cell get_data() pure asm "c4 PUSH"; +@pure +fun get_data(): cell + asm "c4 PUSH"; /// Sets `cell` [c] as persistent contract data. You can update persistent contract storage with this primitive. -() set_data(cell c) asm "c4 POP"; +fun set_data(c: cell): void + asm "c4 POP"; /*** # Continuation primitives */ /// Usually `c3` has a continuation initialized by the whole code of the contract. It is used for function calls. /// The primitive returns the current value of `c3`. -cont get_c3() pure asm "c3 PUSH"; +@pure +fun get_c3(): continuation + asm "c3 PUSH"; /// Updates the current value of `c3`. Usually, it is used for updating smart contract code in run-time. /// Note that after execution of this primitive the current code /// (and the stack of recursive function calls) won't change, /// but any other function call will use a function from the new code. -() set_c3(cont c) asm "c3 POP"; +fun set_c3(c: continuation): void + asm "c3 POP"; /// Transforms a `slice` [s] into a simple ordinary continuation `c`, with `c.code = s` and an empty stack and savelist. -cont bless(slice s) pure asm "BLESS"; +@pure +fun bless(s: slice): continuation + asm "BLESS"; /*** # Gas related primitives @@ -282,56 +347,77 @@ cont bless(slice s) pure asm "BLESS"; /// This action is required to process external messages, which bring no value (hence no gas) with themselves. /// /// For more details check [accept_message effects](https://ton.org/docs/#/smart-contracts/accept). -() accept_message() asm "ACCEPT"; +fun accept_message(): void + asm "ACCEPT"; /// Sets current gas limit `gl` to the minimum of limit and `gm`, and resets the gas credit `gc` to zero. /// If the gas consumed so far (including the present instruction) exceeds the resulting value of `gl`, /// an (unhandled) out of gas exception is thrown before setting new gas limits. /// Notice that [set_gas_limit] with an argument `limit ≥ 2^63 − 1` is equivalent to [accept_message]. -() set_gas_limit(int limit) asm "SETGASLIMIT"; +fun set_gas_limit(limit: int): void + asm "SETGASLIMIT"; /// Commits the current state of registers `c4` (“persistent data”) and `c5` (“actions”) /// so that the current execution is considered “successful” with the saved values even if an exception /// in Computation Phase is thrown later. -() commit() asm "COMMIT"; - -/// Not implemented -//() buy_gas(int gram) asm "BUYGAS"; +fun commit(): void + asm "COMMIT"; /// Computes the amount of gas that can be bought for `amount` nanoTONs, /// and sets `gl` accordingly in the same way as [set_gas_limit]. -() buy_gas(int amount) asm "BUYGAS"; +fun buy_gas(amount: int): void + asm "BUYGAS"; /// Computes the minimum of two integers [x] and [y]. -int min(int x, int y) pure asm "MIN"; +@pure +fun min(x: int, y: int): int + asm "MIN"; /// Computes the maximum of two integers [x] and [y]. -int max(int x, int y) pure asm "MAX"; +@pure +fun max(x: int, y: int): int + asm "MAX"; /// Sorts two integers. -(int, int) minmax(int x, int y) pure asm "MINMAX"; +@pure +fun minmax(x: int, y: int): (int, int) + asm "MINMAX"; /// Computes the absolute value of an integer [x]. -int abs(int x) pure asm "ABS"; +@pure +fun abs(x: int): int + asm "ABS"; /// Computes the quotient and remainder of [x] / [y]. Example: divmod(112,3) = (37,1) -(int, int) divmod(int x, int y) pure builtin; +@pure +fun divmod(x: int, y: int): (int, int) + builtin; /// Computes the remainder and quotient of [x] / [y]. Example: moddiv(112,3) = (1,37) -(int, int) moddiv(int x, int y) pure builtin; +@pure +fun moddiv(x: int, y: int): (int, int) + builtin; /// Computes multiple-then-divide: floor([x] * [y] / [z]). /// The intermediate result is stored in a 513-bit integer to prevent precision loss. -int muldiv(int x, int y, int z) pure builtin; +@pure +fun muldiv(x: int, y: int, z: int): int + builtin; /// Similar to `muldiv`, but rounds the result: round([x] * [y] / [z]). -int muldivr(int x, int y, int z) pure builtin; +@pure +fun muldivr(x: int, y: int, z: int): int + builtin; /// Similar to `muldiv`, but ceils the result: ceil([x] * [y] / [z]). -int muldivc(int x, int y, int z) pure builtin; +@pure +fun muldivc(x: int, y: int, z: int): int + builtin; /// Computes the quotient and remainder of ([x] * [y] / [z]). Example: muldivmod(112,3,10) = (33,6) -(int, int) muldivmod(int x, int y, int z) pure builtin; +@pure +fun muldivmod(x: int, y: int, z: int): (int, int) + builtin; /*** # Slice primitives @@ -350,79 +436,131 @@ int muldivc(int x, int y, int z) pure builtin; /// Converts a `cell` [c] into a `slice`. Notice that [c] must be either an ordinary cell, /// or an exotic cell (see [TVM.pdf](https://ton-blockchain.github.io/docs/tvm.pdf), 3.1.2) /// which is automatically loaded to yield an ordinary cell `c'`, converted into a `slice` afterwards. -slice begin_parse(cell c) pure asm "CTOS"; +@pure +fun begin_parse(c: cell): slice + asm "CTOS"; /// Checks if [s] is empty. If not, throws an exception. -() end_parse(slice s) asm "ENDS"; +fun end_parse(s: slice): void + asm "ENDS"; /// Loads the first reference from the slice. -(slice, cell) load_ref(slice s) pure asm( -> 1 0) "LDREF"; +@pure +fun load_ref(s: slice): (slice, cell) + asm( -> 1 0) "LDREF"; /// Preloads the first reference from the slice. -cell preload_ref(slice s) pure asm "PLDREF"; +@pure +fun preload_ref(s: slice): cell + asm "PLDREF"; /// Loads a signed [len]-bit integer from a slice [s]. -(slice, int) load_int(slice s, int len) pure builtin; +@pure +fun load_int(s: slice, len: int): (slice, int) + builtin; /// Loads an unsigned [len]-bit integer from a slice [s]. -(slice, int) load_uint(slice s, int len) pure builtin; +@pure +fun load_uint(s: slice, len: int): (slice, int) + builtin; /// Preloads a signed [len]-bit integer from a slice [s]. -int preload_int(slice s, int len) pure builtin; +@pure +fun preload_int(s: slice, len: int): int + builtin; /// Preloads an unsigned [len]-bit integer from a slice [s]. -int preload_uint(slice s, int len) pure builtin; +@pure +fun preload_uint(s: slice, len: int): int + builtin; /// Loads the first `0 ≤ len ≤ 1023` bits from slice [s] into a separate slice `s''`. -(slice, slice) load_bits(slice s, int len) pure builtin; +@pure +fun load_bits(s: slice, len: int): (slice, slice) + builtin; /// Preloads the first `0 ≤ len ≤ 1023` bits from slice [s] into a separate slice `s''`. -slice preload_bits(slice s, int len) pure builtin; +@pure +fun preload_bits(s: slice, len: int): slice + builtin; /// Loads serialized amount of TonCoins (any unsigned integer up to `2^120 - 1`). -(slice, int) load_grams(slice s) pure asm( -> 1 0) "LDGRAMS"; -(slice, int) load_coins(slice s) pure asm( -> 1 0) "LDGRAMS"; +@pure +fun load_grams(s: slice): (slice, int) + asm( -> 1 0) "LDGRAMS"; + +@pure +fun load_coins(s: slice): (slice, int) + asm( -> 1 0) "LDGRAMS"; /// Returns all but the first `0 ≤ len ≤ 1023` bits of `slice` [s]. -slice skip_bits(slice s, int len) pure asm "SDSKIPFIRST"; -(slice, ()) ~skip_bits(slice s, int len) pure asm "SDSKIPFIRST"; +@pure +fun skip_bits(s: slice, len: int): slice + asm "SDSKIPFIRST"; + +@pure +fun ~skip_bits(s: slice, len: int): (slice, ()) + asm "SDSKIPFIRST"; /// Returns the first `0 ≤ len ≤ 1023` bits of `slice` [s]. -slice first_bits(slice s, int len) pure asm "SDCUTFIRST"; +@pure +fun first_bits(s: slice, len: int): slice + asm "SDCUTFIRST"; /// Returns all but the last `0 ≤ len ≤ 1023` bits of `slice` [s]. -slice skip_last_bits(slice s, int len) pure asm "SDSKIPLAST"; -(slice, ()) ~skip_last_bits(slice s, int len) pure asm "SDSKIPLAST"; +@pure +fun skip_last_bits(s: slice, len: int): slice + asm "SDSKIPLAST"; +@pure +fun ~skip_last_bits(s: slice, len: int): (slice, ()) + asm "SDSKIPLAST"; /// Returns the last `0 ≤ len ≤ 1023` bits of `slice` [s]. -slice slice_last(slice s, int len) pure asm "SDCUTLAST"; +@pure +fun slice_last(s: slice, len: int): slice + asm "SDCUTLAST"; /// Loads a dictionary `D` (HashMapE) from `slice` [s]. /// (returns `null` if `nothing` constructor is used). -(slice, cell) load_dict(slice s) pure asm( -> 1 0) "LDDICT"; +@pure +fun load_dict(s: slice): (slice, cell) + asm( -> 1 0) "LDDICT"; /// Preloads a dictionary `D` from `slice` [s]. -cell preload_dict(slice s) pure asm "PLDDICT"; +@pure +fun preload_dict(s: slice): cell + asm "PLDDICT"; /// Loads a dictionary as [load_dict], but returns only the remainder of the slice. -slice skip_dict(slice s) pure asm "SKIPDICT"; -(slice, ()) ~skip_dict(slice s) pure asm "SKIPDICT"; +@pure +fun skip_dict(s: slice): slice + asm "SKIPDICT"; + +@pure +fun ~skip_dict(s: slice): (slice, ()) + asm "SKIPDICT"; /// Loads (Maybe ^Cell) from `slice` [s]. /// In other words loads 1 bit and if it is true /// loads first ref and return it with slice remainder /// otherwise returns `null` and slice remainder -(slice, cell) load_maybe_ref(slice s) pure asm( -> 1 0) "LDOPTREF"; +@pure +fun load_maybe_ref(s: slice): (slice, cell) + asm( -> 1 0) "LDOPTREF"; /// Preloads (Maybe ^Cell) from `slice` [s]. -cell preload_maybe_ref(slice s) pure asm "PLDOPTREF"; +@pure +fun preload_maybe_ref(s: slice): cell + asm "PLDOPTREF"; /// Returns the depth of `cell` [c]. /// If [c] has no references, then return `0`; /// otherwise the returned value is one plus the maximum of depths of cells referred to from [c]. /// If [c] is a `null` instead of a cell, returns zero. -int cell_depth(cell c) pure asm "CDEPTH"; +@pure +fun cell_depth(c: cell): int + asm "CDEPTH"; /*** @@ -430,42 +568,62 @@ int cell_depth(cell c) pure asm "CDEPTH"; */ /// Returns the number of references in `slice` [s]. -int slice_refs(slice s) pure asm "SREFS"; +@pure +fun slice_refs(s: slice): int + asm "SREFS"; /// Returns the number of data bits in `slice` [s]. -int slice_bits(slice s) pure asm "SBITS"; +@pure +fun slice_bits(s: slice): int + asm "SBITS"; /// Returns both the number of data bits and the number of references in `slice` [s]. -(int, int) slice_bits_refs(slice s) pure asm "SBITREFS"; +@pure +fun slice_bits_refs(s: slice): (int, int) + asm "SBITREFS"; /// Checks whether a `slice` [s] is empty (i.e., contains no bits of data and no cell references). -int slice_empty?(slice s) pure asm "SEMPTY"; +@pure +fun slice_empty?(s: slice): int + asm "SEMPTY"; /// Checks whether `slice` [s] has no bits of data. -int slice_data_empty?(slice s) pure asm "SDEMPTY"; +@pure +fun slice_data_empty?(s: slice): int + asm "SDEMPTY"; /// Checks whether `slice` [s] has no references. -int slice_refs_empty?(slice s) pure asm "SREMPTY"; +@pure +fun slice_refs_empty?(s: slice): int + asm "SREMPTY"; /// Returns the depth of `slice` [s]. /// If [s] has no references, then returns `0`; /// otherwise the returned value is one plus the maximum of depths of cells referred to from [s]. -int slice_depth(slice s) pure asm "SDEPTH"; +@pure +fun slice_depth(s: slice): int + asm "SDEPTH"; /*** # Builder size primitives */ /// Returns the number of cell references already stored in `builder` [b] -int builder_refs(builder b) pure asm "BREFS"; +@pure +fun builder_refs(b: builder): int + asm "BREFS"; /// Returns the number of data bits already stored in `builder` [b]. -int builder_bits(builder b) pure asm "BBITS"; +@pure +fun builder_bits(b: builder): int + asm "BBITS"; /// Returns the depth of `builder` [b]. /// If no cell references are stored in [b], then returns 0; /// otherwise the returned value is one plus the maximum of depths of cells referred to from [b]. -int builder_depth(builder b) pure asm "BDEPTH"; +@pure +fun builder_depth(b: builder): int + asm "BDEPTH"; /*** # Builder primitives @@ -478,22 +636,34 @@ int builder_depth(builder b) pure asm "BDEPTH"; */ /// Creates a new empty `builder`. -builder begin_cell() pure asm "NEWC"; +@pure +fun begin_cell(): builder + asm "NEWC"; /// Converts a `builder` into an ordinary `cell`. -cell end_cell(builder b) pure asm "ENDC"; +@pure +fun end_cell(b: builder): cell + asm "ENDC"; /// Stores a reference to `cell` [c] into `builder` [b]. -builder store_ref(builder b, cell c) pure asm(c b) "STREF"; +@pure +fun store_ref(b: builder, c: cell): builder + asm(c b) "STREF"; /// Stores an unsigned [len]-bit integer `x` into `b` for `0 ≤ len ≤ 256`. -builder store_uint(builder b, int x, int len) pure builtin; +@pure +fun store_uint(b: builder, x: int, len: int): builder + builtin; /// Stores a signed [len]-bit integer `x` into `b` for `0 ≤ len ≤ 257`. -builder store_int(builder b, int x, int len) pure builtin; +@pure +fun store_int(b: builder, x: int, len: int): builder + builtin; /// Stores `slice` [s] into `builder` [b]. -builder store_slice(builder b, slice s) pure asm "STSLICER"; +@pure +fun store_slice(b: builder, s: slice): builder + asm "STSLICER"; /// Stores (serializes) an integer [x] in the range `0..2^120 − 1` into `builder` [b]. /// The serialization of [x] consists of a 4-bit unsigned big-endian integer `l`, @@ -502,17 +672,26 @@ builder store_slice(builder b, slice s) pure asm "STSLICER"; /// If [x] does not belong to the supported range, a range check exception is thrown. /// /// Store amounts of TonCoins to the builder as VarUInteger 16 -builder store_grams(builder b, int x) pure asm "STGRAMS"; -builder store_coins(builder b, int x) pure asm "STGRAMS"; +@pure +fun store_grams(b: builder, x: int): builder + asm "STGRAMS"; + +@pure +fun store_coins(b: builder, x: int): builder + asm "STGRAMS"; /// Stores dictionary `D` represented by `cell` [c] or `null` into `builder` [b]. /// In other words, stores a `1`-bit and a reference to [c] if [c] is not `null` and `0`-bit otherwise. -builder store_dict(builder b, cell c) pure asm(c b) "STDICT"; +@pure +fun store_dict(b: builder, c: cell): builder + asm(c b) "STDICT"; /// Stores (Maybe ^Cell) to builder: /// if cell is null store 1 zero bit /// otherwise store 1 true bit and ref to cell -builder store_maybe_ref(builder b, cell c) pure asm(c b) "STOPTREF"; +@pure +fun store_maybe_ref(b: builder, c: cell): builder + asm(c b) "STOPTREF"; /*** @@ -554,22 +733,30 @@ builder store_maybe_ref(builder b, cell c) pure asm(c b) "STOPTREF"; /// Loads from slice [s] the only prefix that is a valid `MsgAddress`, /// and returns both this prefix `s'` and the remainder `s''` of [s] as slices. -(slice, slice) load_msg_addr(slice s) pure asm( -> 1 0) "LDMSGADDR"; +@pure +fun load_msg_addr(s: slice): (slice, slice) + asm( -> 1 0) "LDMSGADDR"; /// Decomposes slice [s] containing a valid `MsgAddress` into a `tuple t` with separate fields of this `MsgAddress`. /// If [s] is not a valid `MsgAddress`, a cell deserialization exception is thrown. -tuple parse_addr(slice s) pure asm "PARSEMSGADDR"; +@pure +fun parse_addr(s: slice): tuple + asm "PARSEMSGADDR"; /// Parses slice [s] containing a valid `MsgAddressInt` (usually a `msg_addr_std`), /// applies rewriting from the anycast (if present) to the same-length prefix of the address, /// and returns both the workchain and the 256-bit address as integers. /// If the address is not 256-bit, or if [s] is not a valid serialization of `MsgAddressInt`, /// throws a cell deserialization exception. -(int, int) parse_std_addr(slice s) pure asm "REWRITESTDADDR"; +@pure +fun parse_std_addr(s: slice): (int, int) + asm "REWRITESTDADDR"; /// A variant of [parse_std_addr] that returns the (rewritten) address as a slice [s], /// even if it is not exactly 256 bit long (represented by a `msg_addr_var`). -(int, slice) parse_var_addr(slice s) pure asm "REWRITEVARADDR"; +@pure +fun parse_var_addr(s: slice): (int, slice) + asm "REWRITEVARADDR"; /*** # Dictionary primitives @@ -578,117 +765,344 @@ tuple parse_addr(slice s) pure asm "PARSEMSGADDR"; /// Sets the value associated with [key_len]-bit key signed index in dictionary [dict] to [value] (cell), /// and returns the resulting dictionary. -cell idict_set_ref(cell dict, int key_len, int index, cell value) pure asm(value index dict key_len) "DICTISETREF"; -(cell, ()) ~idict_set_ref(cell dict, int key_len, int index, cell value) pure asm(value index dict key_len) "DICTISETREF"; +@pure +fun idict_set_ref(dict: cell, key_len: int, index: int, value: cell): cell + asm(value index dict key_len) "DICTISETREF"; + +@pure +fun ~idict_set_ref(dict: cell, key_len: int, index: int, value: cell): (cell, ()) + asm(value index dict key_len) "DICTISETREF"; /// Sets the value associated with [key_len]-bit key unsigned index in dictionary [dict] to [value] (cell), /// and returns the resulting dictionary. -cell udict_set_ref(cell dict, int key_len, int index, cell value) pure asm(value index dict key_len) "DICTUSETREF"; -(cell, ()) ~udict_set_ref(cell dict, int key_len, int index, cell value) pure asm(value index dict key_len) "DICTUSETREF"; - -cell idict_get_ref(cell dict, int key_len, int index) pure asm(index dict key_len) "DICTIGETOPTREF"; -(cell, int) idict_get_ref?(cell dict, int key_len, int index) pure asm(index dict key_len) "DICTIGETREF" "NULLSWAPIFNOT"; -(cell, int) udict_get_ref?(cell dict, int key_len, int index) pure asm(index dict key_len) "DICTUGETREF" "NULLSWAPIFNOT"; -(cell, cell) idict_set_get_ref(cell dict, int key_len, int index, cell value) pure asm(value index dict key_len) "DICTISETGETOPTREF"; -(cell, cell) udict_set_get_ref(cell dict, int key_len, int index, cell value) pure asm(value index dict key_len) "DICTUSETGETOPTREF"; -(cell, int) idict_delete?(cell dict, int key_len, int index) pure asm(index dict key_len) "DICTIDEL"; -(cell, int) udict_delete?(cell dict, int key_len, int index) pure asm(index dict key_len) "DICTUDEL"; -(slice, int) idict_get?(cell dict, int key_len, int index) pure asm(index dict key_len) "DICTIGET" "NULLSWAPIFNOT"; -(slice, int) udict_get?(cell dict, int key_len, int index) pure asm(index dict key_len) "DICTUGET" "NULLSWAPIFNOT"; -(cell, slice, int) idict_delete_get?(cell dict, int key_len, int index) pure asm(index dict key_len) "DICTIDELGET" "NULLSWAPIFNOT"; -(cell, slice, int) udict_delete_get?(cell dict, int key_len, int index) pure asm(index dict key_len) "DICTUDELGET" "NULLSWAPIFNOT"; -(cell, (slice, int)) ~idict_delete_get?(cell dict, int key_len, int index) pure asm(index dict key_len) "DICTIDELGET" "NULLSWAPIFNOT"; -(cell, (slice, int)) ~udict_delete_get?(cell dict, int key_len, int index) pure asm(index dict key_len) "DICTUDELGET" "NULLSWAPIFNOT"; -cell udict_set(cell dict, int key_len, int index, slice value) pure asm(value index dict key_len) "DICTUSET"; -(cell, ()) ~udict_set(cell dict, int key_len, int index, slice value) pure asm(value index dict key_len) "DICTUSET"; -cell idict_set(cell dict, int key_len, int index, slice value) pure asm(value index dict key_len) "DICTISET"; -(cell, ()) ~idict_set(cell dict, int key_len, int index, slice value) pure asm(value index dict key_len) "DICTISET"; -cell dict_set(cell dict, int key_len, slice index, slice value) pure asm(value index dict key_len) "DICTSET"; -(cell, ()) ~dict_set(cell dict, int key_len, slice index, slice value) pure asm(value index dict key_len) "DICTSET"; -(cell, int) udict_add?(cell dict, int key_len, int index, slice value) pure asm(value index dict key_len) "DICTUADD"; -(cell, int) udict_replace?(cell dict, int key_len, int index, slice value) pure asm(value index dict key_len) "DICTUREPLACE"; -(cell, int) idict_add?(cell dict, int key_len, int index, slice value) pure asm(value index dict key_len) "DICTIADD"; -(cell, int) idict_replace?(cell dict, int key_len, int index, slice value) pure asm(value index dict key_len) "DICTIREPLACE"; -cell udict_set_builder(cell dict, int key_len, int index, builder value) pure asm(value index dict key_len) "DICTUSETB"; -(cell, ()) ~udict_set_builder(cell dict, int key_len, int index, builder value) pure asm(value index dict key_len) "DICTUSETB"; -cell idict_set_builder(cell dict, int key_len, int index, builder value) pure asm(value index dict key_len) "DICTISETB"; -(cell, ()) ~idict_set_builder(cell dict, int key_len, int index, builder value) pure asm(value index dict key_len) "DICTISETB"; -cell dict_set_builder(cell dict, int key_len, slice index, builder value) pure asm(value index dict key_len) "DICTSETB"; -(cell, ()) ~dict_set_builder(cell dict, int key_len, slice index, builder value) pure asm(value index dict key_len) "DICTSETB"; -(cell, int) udict_add_builder?(cell dict, int key_len, int index, builder value) pure asm(value index dict key_len) "DICTUADDB"; -(cell, int) udict_replace_builder?(cell dict, int key_len, int index, builder value) pure asm(value index dict key_len) "DICTUREPLACEB"; -(cell, int) idict_add_builder?(cell dict, int key_len, int index, builder value) pure asm(value index dict key_len) "DICTIADDB"; -(cell, int) idict_replace_builder?(cell dict, int key_len, int index, builder value) pure asm(value index dict key_len) "DICTIREPLACEB"; -(cell, int, slice, int) udict_delete_get_min(cell dict, int key_len) pure asm(-> 0 2 1 3) "DICTUREMMIN" "NULLSWAPIFNOT2"; -(cell, (int, slice, int)) ~udict::delete_get_min(cell dict, int key_len) pure asm(-> 0 2 1 3) "DICTUREMMIN" "NULLSWAPIFNOT2"; -(cell, int, slice, int) idict_delete_get_min(cell dict, int key_len) pure asm(-> 0 2 1 3) "DICTIREMMIN" "NULLSWAPIFNOT2"; -(cell, (int, slice, int)) ~idict::delete_get_min(cell dict, int key_len) pure asm(-> 0 2 1 3) "DICTIREMMIN" "NULLSWAPIFNOT2"; -(cell, slice, slice, int) dict_delete_get_min(cell dict, int key_len) pure asm(-> 0 2 1 3) "DICTREMMIN" "NULLSWAPIFNOT2"; -(cell, (slice, slice, int)) ~dict::delete_get_min(cell dict, int key_len) pure asm(-> 0 2 1 3) "DICTREMMIN" "NULLSWAPIFNOT2"; -(cell, int, slice, int) udict_delete_get_max(cell dict, int key_len) pure asm(-> 0 2 1 3) "DICTUREMMAX" "NULLSWAPIFNOT2"; -(cell, (int, slice, int)) ~udict::delete_get_max(cell dict, int key_len) pure asm(-> 0 2 1 3) "DICTUREMMAX" "NULLSWAPIFNOT2"; -(cell, int, slice, int) idict_delete_get_max(cell dict, int key_len) pure asm(-> 0 2 1 3) "DICTIREMMAX" "NULLSWAPIFNOT2"; -(cell, (int, slice, int)) ~idict::delete_get_max(cell dict, int key_len) pure asm(-> 0 2 1 3) "DICTIREMMAX" "NULLSWAPIFNOT2"; -(cell, slice, slice, int) dict_delete_get_max(cell dict, int key_len) pure asm(-> 0 2 1 3) "DICTREMMAX" "NULLSWAPIFNOT2"; -(cell, (slice, slice, int)) ~dict::delete_get_max(cell dict, int key_len) pure asm(-> 0 2 1 3) "DICTREMMAX" "NULLSWAPIFNOT2"; -(int, slice, int) udict_get_min?(cell dict, int key_len) pure asm (-> 1 0 2) "DICTUMIN" "NULLSWAPIFNOT2"; -(int, slice, int) udict_get_max?(cell dict, int key_len) pure asm (-> 1 0 2) "DICTUMAX" "NULLSWAPIFNOT2"; -(int, cell, int) udict_get_min_ref?(cell dict, int key_len) pure asm (-> 1 0 2) "DICTUMINREF" "NULLSWAPIFNOT2"; -(int, cell, int) udict_get_max_ref?(cell dict, int key_len) pure asm (-> 1 0 2) "DICTUMAXREF" "NULLSWAPIFNOT2"; -(int, slice, int) idict_get_min?(cell dict, int key_len) pure asm (-> 1 0 2) "DICTIMIN" "NULLSWAPIFNOT2"; -(int, slice, int) idict_get_max?(cell dict, int key_len) pure asm (-> 1 0 2) "DICTIMAX" "NULLSWAPIFNOT2"; -(int, cell, int) idict_get_min_ref?(cell dict, int key_len) pure asm (-> 1 0 2) "DICTIMINREF" "NULLSWAPIFNOT2"; -(int, cell, int) idict_get_max_ref?(cell dict, int key_len) pure asm (-> 1 0 2) "DICTIMAXREF" "NULLSWAPIFNOT2"; -(int, slice, int) udict_get_next?(cell dict, int key_len, int pivot) pure asm(pivot dict key_len -> 1 0 2) "DICTUGETNEXT" "NULLSWAPIFNOT2"; -(int, slice, int) udict_get_nexteq?(cell dict, int key_len, int pivot) pure asm(pivot dict key_len -> 1 0 2) "DICTUGETNEXTEQ" "NULLSWAPIFNOT2"; -(int, slice, int) udict_get_prev?(cell dict, int key_len, int pivot) pure asm(pivot dict key_len -> 1 0 2) "DICTUGETPREV" "NULLSWAPIFNOT2"; -(int, slice, int) udict_get_preveq?(cell dict, int key_len, int pivot) pure asm(pivot dict key_len -> 1 0 2) "DICTUGETPREVEQ" "NULLSWAPIFNOT2"; -(int, slice, int) idict_get_next?(cell dict, int key_len, int pivot) pure asm(pivot dict key_len -> 1 0 2) "DICTIGETNEXT" "NULLSWAPIFNOT2"; -(int, slice, int) idict_get_nexteq?(cell dict, int key_len, int pivot) pure asm(pivot dict key_len -> 1 0 2) "DICTIGETNEXTEQ" "NULLSWAPIFNOT2"; -(int, slice, int) idict_get_prev?(cell dict, int key_len, int pivot) pure asm(pivot dict key_len -> 1 0 2) "DICTIGETPREV" "NULLSWAPIFNOT2"; -(int, slice, int) idict_get_preveq?(cell dict, int key_len, int pivot) pure asm(pivot dict key_len -> 1 0 2) "DICTIGETPREVEQ" "NULLSWAPIFNOT2"; +@pure +fun udict_set_ref(dict: cell, key_len: int, index: int, value: cell): cell + asm(value index dict key_len) "DICTUSETREF"; + +@pure +fun ~udict_set_ref(dict: cell, key_len: int, index: int, value: cell): (cell, ()) + asm(value index dict key_len) "DICTUSETREF"; + +@pure +fun idict_get_ref(dict: cell, key_len: int, index: int): cell + asm(index dict key_len) "DICTIGETOPTREF"; + +@pure +fun idict_get_ref?(dict: cell, key_len: int, index: int): (cell, int) + asm(index dict key_len) "DICTIGETREF" "NULLSWAPIFNOT"; + +@pure +fun udict_get_ref?(dict: cell, key_len: int, index: int): (cell, int) + asm(index dict key_len) "DICTUGETREF" "NULLSWAPIFNOT"; + +@pure +fun idict_set_get_ref(dict: cell, key_len: int, index: int, value: cell): (cell, cell) + asm(value index dict key_len) "DICTISETGETOPTREF"; + +@pure +fun udict_set_get_ref(dict: cell, key_len: int, index: int, value: cell): (cell, cell) + asm(value index dict key_len) "DICTUSETGETOPTREF"; + +@pure +fun idict_delete?(dict: cell, key_len: int, index: int): (cell, int) + asm(index dict key_len) "DICTIDEL"; + +@pure +fun udict_delete?(dict: cell, key_len: int, index: int): (cell, int) + asm(index dict key_len) "DICTUDEL"; + +@pure +fun idict_get?(dict: cell, key_len: int, index: int): (slice, int) + asm(index dict key_len) "DICTIGET" "NULLSWAPIFNOT"; + +@pure +fun udict_get?(dict: cell, key_len: int, index: int): (slice, int) + asm(index dict key_len) "DICTUGET" "NULLSWAPIFNOT"; + +@pure +fun idict_delete_get?(dict: cell, key_len: int, index: int): (cell, slice, int) + asm(index dict key_len) "DICTIDELGET" "NULLSWAPIFNOT"; + +@pure +fun udict_delete_get?(dict: cell, key_len: int, index: int): (cell, slice, int) + asm(index dict key_len) "DICTUDELGET" "NULLSWAPIFNOT"; + +@pure +fun ~idict_delete_get?(dict: cell, key_len: int, index: int): (cell, (slice, int)) + asm(index dict key_len) "DICTIDELGET" "NULLSWAPIFNOT"; + +@pure +fun ~udict_delete_get?(dict: cell, key_len: int, index: int): (cell, (slice, int)) + asm(index dict key_len) "DICTUDELGET" "NULLSWAPIFNOT"; + +@pure +fun udict_set(dict: cell, key_len: int, index: int, value: slice): cell + asm(value index dict key_len) "DICTUSET"; + +@pure +fun ~udict_set(dict: cell, key_len: int, index: int, value: slice): (cell, ()) + asm(value index dict key_len) "DICTUSET"; + +@pure +fun idict_set(dict: cell, key_len: int, index: int, value: slice): cell + asm(value index dict key_len) "DICTISET"; + +@pure +fun ~idict_set(dict: cell, key_len: int, index: int, value: slice): (cell, ()) + asm(value index dict key_len) "DICTISET"; + +@pure +fun dict_set(dict: cell, key_len: int, index: slice, value: slice): cell + asm(value index dict key_len) "DICTSET"; + +@pure +fun ~dict_set(dict: cell, key_len: int, index: slice, value: slice): (cell, ()) + asm(value index dict key_len) "DICTSET"; + +@pure +fun udict_add?(dict: cell, key_len: int, index: int, value: slice): (cell, int) + asm(value index dict key_len) "DICTUADD"; + +@pure +fun udict_replace?(dict: cell, key_len: int, index: int, value: slice): (cell, int) + asm(value index dict key_len) "DICTUREPLACE"; + +@pure +fun idict_add?(dict: cell, key_len: int, index: int, value: slice): (cell, int) + asm(value index dict key_len) "DICTIADD"; + +@pure +fun idict_replace?(dict: cell, key_len: int, index: int, value: slice): (cell, int) + asm(value index dict key_len) "DICTIREPLACE"; + +@pure +fun udict_set_builder(dict: cell, key_len: int, index: int, value: builder): cell + asm(value index dict key_len) "DICTUSETB"; + +@pure +fun ~udict_set_builder(dict: cell, key_len: int, index: int, value: builder): (cell, ()) + asm(value index dict key_len) "DICTUSETB"; + +@pure +fun idict_set_builder(dict: cell, key_len: int, index: int, value: builder): cell + asm(value index dict key_len) "DICTISETB"; + +@pure +fun ~idict_set_builder(dict: cell, key_len: int, index: int, value: builder): (cell, ()) + asm(value index dict key_len) "DICTISETB"; + +@pure +fun dict_set_builder(dict: cell, key_len: int, index: slice, value: builder): cell + asm(value index dict key_len) "DICTSETB"; + +@pure +fun ~dict_set_builder(dict: cell, key_len: int, index: slice, value: builder): (cell, ()) + asm(value index dict key_len) "DICTSETB"; + +@pure +fun udict_add_builder?(dict: cell, key_len: int, index: int, value: builder): (cell, int) + asm(value index dict key_len) "DICTUADDB"; + +@pure +fun udict_replace_builder?(dict: cell, key_len: int, index: int, value: builder): (cell, int) + asm(value index dict key_len) "DICTUREPLACEB"; + +@pure +fun idict_add_builder?(dict: cell, key_len: int, index: int, value: builder): (cell, int) + asm(value index dict key_len) "DICTIADDB"; + +@pure +fun idict_replace_builder?(dict: cell, key_len: int, index: int, value: builder): (cell, int) + asm(value index dict key_len) "DICTIREPLACEB"; + +@pure +fun udict_delete_get_min(dict: cell, key_len: int): (cell, int, slice, int) + asm(-> 0 2 1 3) "DICTUREMMIN" "NULLSWAPIFNOT2"; + +@pure +fun ~udict_delete_get_min(dict: cell, key_len: int): (cell, (int, slice, int)) + asm(-> 0 2 1 3) "DICTUREMMIN" "NULLSWAPIFNOT2"; + +@pure +fun idict_delete_get_min(dict: cell, key_len: int): (cell, int, slice, int) + asm(-> 0 2 1 3) "DICTIREMMIN" "NULLSWAPIFNOT2"; + +@pure +fun ~idict_delete_get_min(dict: cell, key_len: int): (cell, (int, slice, int)) + asm(-> 0 2 1 3) "DICTIREMMIN" "NULLSWAPIFNOT2"; + +@pure +fun dict_delete_get_min(dict: cell, key_len: int): (cell, slice, slice, int) + asm(-> 0 2 1 3) "DICTREMMIN" "NULLSWAPIFNOT2"; + +@pure +fun ~dict_delete_get_min(dict: cell, key_len: int): (cell, (slice, slice, int)) + asm(-> 0 2 1 3) "DICTREMMIN" "NULLSWAPIFNOT2"; + +@pure +fun udict_delete_get_max(dict: cell, key_len: int): (cell, int, slice, int) + asm(-> 0 2 1 3) "DICTUREMMAX" "NULLSWAPIFNOT2"; + +@pure +fun ~udict_delete_get_max(dict: cell, key_len: int): (cell, (int, slice, int)) + asm(-> 0 2 1 3) "DICTUREMMAX" "NULLSWAPIFNOT2"; + +@pure +fun idict_delete_get_max(dict: cell, key_len: int): (cell, int, slice, int) + asm(-> 0 2 1 3) "DICTIREMMAX" "NULLSWAPIFNOT2"; + +@pure +fun ~idict_delete_get_max(dict: cell, key_len: int): (cell, (int, slice, int)) + asm(-> 0 2 1 3) "DICTIREMMAX" "NULLSWAPIFNOT2"; + +@pure +fun dict_delete_get_max(dict: cell, key_len: int): (cell, slice, slice, int) + asm(-> 0 2 1 3) "DICTREMMAX" "NULLSWAPIFNOT2"; + +@pure +fun ~dict_delete_get_max(dict: cell, key_len: int): (cell, (slice, slice, int)) + asm(-> 0 2 1 3) "DICTREMMAX" "NULLSWAPIFNOT2"; + +@pure +fun udict_get_min?(dict: cell, key_len: int): (int, slice, int) + asm (-> 1 0 2) "DICTUMIN" "NULLSWAPIFNOT2"; + +@pure +fun udict_get_max?(dict: cell, key_len: int): (int, slice, int) + asm (-> 1 0 2) "DICTUMAX" "NULLSWAPIFNOT2"; + +@pure +fun udict_get_min_ref?(dict: cell, key_len: int): (int, cell, int) + asm (-> 1 0 2) "DICTUMINREF" "NULLSWAPIFNOT2"; + +@pure +fun udict_get_max_ref?(dict: cell, key_len: int): (int, cell, int) + asm (-> 1 0 2) "DICTUMAXREF" "NULLSWAPIFNOT2"; + +@pure +fun idict_get_min?(dict: cell, key_len: int): (int, slice, int) + asm (-> 1 0 2) "DICTIMIN" "NULLSWAPIFNOT2"; + +@pure +fun idict_get_max?(dict: cell, key_len: int): (int, slice, int) + asm (-> 1 0 2) "DICTIMAX" "NULLSWAPIFNOT2"; + +@pure +fun idict_get_min_ref?(dict: cell, key_len: int): (int, cell, int) + asm (-> 1 0 2) "DICTIMINREF" "NULLSWAPIFNOT2"; + +@pure +fun idict_get_max_ref?(dict: cell, key_len: int): (int, cell, int) + asm (-> 1 0 2) "DICTIMAXREF" "NULLSWAPIFNOT2"; + +@pure +fun udict_get_next?(dict: cell, key_len: int, pivot: int): (int, slice, int) + asm(pivot dict key_len -> 1 0 2) "DICTUGETNEXT" "NULLSWAPIFNOT2"; + +@pure +fun udict_get_nexteq?(dict: cell, key_len: int, pivot: int): (int, slice, int) + asm(pivot dict key_len -> 1 0 2) "DICTUGETNEXTEQ" "NULLSWAPIFNOT2"; + +@pure +fun udict_get_prev?(dict: cell, key_len: int, pivot: int): (int, slice, int) + asm(pivot dict key_len -> 1 0 2) "DICTUGETPREV" "NULLSWAPIFNOT2"; + +@pure +fun udict_get_preveq?(dict: cell, key_len: int, pivot: int): (int, slice, int) + asm(pivot dict key_len -> 1 0 2) "DICTUGETPREVEQ" "NULLSWAPIFNOT2"; + +@pure +fun idict_get_next?(dict: cell, key_len: int, pivot: int): (int, slice, int) + asm(pivot dict key_len -> 1 0 2) "DICTIGETNEXT" "NULLSWAPIFNOT2"; + +@pure +fun idict_get_nexteq?(dict: cell, key_len: int, pivot: int): (int, slice, int) + asm(pivot dict key_len -> 1 0 2) "DICTIGETNEXTEQ" "NULLSWAPIFNOT2"; + +@pure +fun idict_get_prev?(dict: cell, key_len: int, pivot: int): (int, slice, int) + asm(pivot dict key_len -> 1 0 2) "DICTIGETPREV" "NULLSWAPIFNOT2"; + +@pure +fun idict_get_preveq?(dict: cell, key_len: int, pivot: int): (int, slice, int) + asm(pivot dict key_len -> 1 0 2) "DICTIGETPREVEQ" "NULLSWAPIFNOT2"; /// Creates an empty dictionary, which is actually a null value. Equivalent to PUSHNULL -cell new_dict() pure asm "NEWDICT"; -/// Checks whether a dictionary is empty. Equivalent to cell_null?. -int dict_empty?(cell c) pure asm "DICTEMPTY"; +@pure +fun new_dict(): cell + asm "NEWDICT"; + +/// Checks whether a dictionary is empty. +@pure +fun dict_empty?(c: cell): int + asm "DICTEMPTY"; /* Prefix dictionary primitives */ -(slice, slice, slice, int) pfxdict_get?(cell dict, int key_len, slice key) pure asm(key dict key_len) "PFXDICTGETQ" "NULLSWAPIFNOT2"; -(cell, int) pfxdict_set?(cell dict, int key_len, slice key, slice value) pure asm(value key dict key_len) "PFXDICTSET"; -(cell, int) pfxdict_delete?(cell dict, int key_len, slice key) pure asm(key dict key_len) "PFXDICTDEL"; +@pure +fun pfxdict_get?(dict: cell, key_len: int, key: slice): (slice, slice, slice, int) + asm(key dict key_len) "PFXDICTGETQ" "NULLSWAPIFNOT2"; + +@pure +fun pfxdict_set?(dict: cell, key_len: int, key: slice, value: slice): (cell, int) + asm(value key dict key_len) "PFXDICTSET"; + +@pure +fun pfxdict_delete?(dict: cell, key_len: int, key: slice): (cell, int) + asm(key dict key_len) "PFXDICTDEL"; /// Returns the value of the global configuration parameter with integer index `i` as a `cell` or `null` value. -cell config_param(int x) pure asm "CONFIGOPTPARAM"; -/// Checks whether c is a null. Note, that Tolk also has polymorphic null? built-in. -int cell_null?(cell c) pure asm "ISNULL"; -int builder_null?(builder b) asm "ISNULL"; +@pure +fun config_param(x: int): cell + asm "CONFIGOPTPARAM"; /// Creates an output action which would reserve exactly amount nanotoncoins (if mode = 0), at most amount nanotoncoins (if mode = 2), or all but amount nanotoncoins (if mode = 1 or mode = 3), from the remaining balance of the account. It is roughly equivalent to creating an outbound message carrying amount nanotoncoins (or b − amount nanotoncoins, where b is the remaining balance) to oneself, so that the subsequent output actions would not be able to spend more money than the remainder. Bit +2 in mode means that the external action does not fail if the specified amount cannot be reserved; instead, all remaining balance is reserved. Bit +8 in mode means `amount <- -amount` before performing any further actions. Bit +4 in mode means that amount is increased by the original balance of the current account (before the compute phase), including all extra currencies, before performing any other checks and actions. Currently, amount must be a non-negative integer, and mode must be in the range 0..15. -() raw_reserve(int amount, int mode) asm "RAWRESERVE"; +fun raw_reserve(amount: int, mode: int): void + asm "RAWRESERVE"; + /// Similar to raw_reserve, but also accepts a dictionary extra_amount (represented by a cell or null) with extra currencies. In this way currencies other than TonCoin can be reserved. -() raw_reserve_extra(int amount, cell extra_amount, int mode) asm "RAWRESERVEX"; +fun raw_reserve_extra(amount: int, extra_amount: cell, mode: int): void + asm "RAWRESERVEX"; + /// Sends a raw message contained in msg, which should contain a correctly serialized object Message X, with the only exception that the source address is allowed to have dummy value addr_none (to be automatically replaced with the current smart contract address), and ihr_fee, fwd_fee, created_lt and created_at fields can have arbitrary values (to be rewritten with correct values during the action phase of the current transaction). Integer parameter mode contains the flags. Currently mode = 0 is used for ordinary messages; mode = 128 is used for messages that are to carry all the remaining balance of the current smart contract (instead of the value originally indicated in the message); mode = 64 is used for messages that carry all the remaining value of the inbound message in addition to the value initially indicated in the new message (if bit 0 is not set, the gas fees are deducted from this amount); mode' = mode + 1 means that the sender wants to pay transfer fees separately; mode' = mode + 2 means that any errors arising while processing this message during the action phase should be ignored. Finally, mode' = mode + 32 means that the current account must be destroyed if its resulting balance is zero. This flag is usually employed together with +128. -() send_raw_message(cell msg, int mode) asm "SENDRAWMSG"; +fun send_raw_message(msg: cell, mode: int): void + asm "SENDRAWMSG"; + /// Creates an output action that would change this smart contract code to that given by cell new_code. Notice that this change will take effect only after the successful termination of the current run of the smart contract -() set_code(cell new_code) asm "SETCODE"; +fun set_code(new_code: cell): void + asm "SETCODE"; /// Generates a new pseudo-random unsigned 256-bit integer x. The algorithm is as follows: if r is the old value of the random seed, considered as a 32-byte array (by constructing the big-endian representation of an unsigned 256-bit integer), then its sha512(r) is computed; the first 32 bytes of this hash are stored as the new value r' of the random seed, and the remaining 32 bytes are returned as the next random value x. -int random() asm "RANDU256"; +fun random(): int + asm "RANDU256"; + /// Generates a new pseudo-random integer z in the range 0..range−1 (or range..−1, if range < 0). More precisely, an unsigned random value x is generated as in random; then z := x * range / 2^256 is computed. -int rand(int range) asm "RAND"; +fun rand(range: int): int + asm "RAND"; + /// Returns the current random seed as an unsigned 256-bit Integer. -int get_seed() pure asm "RANDSEED"; +@pure +fun get_seed(): int + asm "RANDSEED"; + /// Sets the random seed to unsigned 256-bit seed. -() set_seed(int) asm "SETRAND"; +fun set_seed(seed: int): void + asm "SETRAND"; + /// Mixes unsigned 256-bit integer x into the random seed r by setting the random seed to sha256 of the concatenation of two 32-byte strings: the first with the big-endian representation of the old seed r, and the second with the big-endian representation of x. -() randomize(int x) asm "ADDRAND"; +fun randomize(x: int): void + asm "ADDRAND"; + /// Equivalent to randomize(cur_lt());. -() randomize_lt() asm "LTIME" "ADDRAND"; +fun randomize_lt(): void + asm "LTIME" "ADDRAND"; /// Checks whether the data parts of two slices coinside -int equal_slice_bits (slice a, slice b) pure asm "SDEQ"; +@pure +fun equal_slice_bits(a: slice, b: slice): int + asm "SDEQ"; /// Concatenates two builders -builder store_builder(builder to, builder from) pure asm "STBR"; +@pure +fun store_builder(to: builder, from: builder): builder + asm "STBR"; diff --git a/tolk-tester/tests/a10.tolk b/tolk-tester/tests/a10.tolk new file mode 100644 index 000000000..8ea137748 --- /dev/null +++ b/tolk-tester/tests/a10.tolk @@ -0,0 +1,42 @@ +fun one(dummy: tuple) { + return 1; +} + +fun main(a: int, x: int) { + var y: int = 0; + var z: int = 0; + while ((y = x * x) > a) { + x -= 1; + z = one(null); + } + return (y, z); +} + +fun throwIfLt10(x: int): void { + if (x > 10) { + return; + } + throw 234; + return; +} + +@method_id(88) +fun test88(x: int) { + try { + var x: void = throwIfLt10(x); + return 0; + } catch(code) { + return code; + } +} + +/** + method_id | in | out +@testcase | 0 | 101 15 | 100 1 +@testcase | 0 | 101 14 | 100 1 +@testcase | 0 | 101 10 | 100 0 +@testcase | 0 | 100 10 | 100 0 +@testcase | 0 | 100 10 | 100 0 +@testcase | 88 | 5 | 234 +@testcase | 88 | 50 | 0 +*/ diff --git a/tolk-tester/tests/a6.tolk b/tolk-tester/tests/a6.tolk new file mode 100644 index 000000000..3bdcdbdf8 --- /dev/null +++ b/tolk-tester/tests/a6.tolk @@ -0,0 +1,85 @@ +fun f(a: int, b: int, c: int, d: int, e: int, f: int): (int, int) { + // solve a 2x2 linear equation + var D: int = a*d - b*c;;;; var Dx: int = e*d-b*f ;;;; var Dy: int = a * f - e * c; + return (Dx/D,Dy/D); +};;;; + +fun mulDivR(x: int, y: int, z: int): int { return muldivr(x, y, z); } + +fun calc_phi(): int { + var n = 1; + repeat (70) { n*=10; }; + var p= 1; + var `q`=1; + do { + (p,q)=(q,p+q); + } while (q <= n); //;; + return mulDivR(p, n, q); +} + +fun calc_sqrt2(): int { + var n = 1; + repeat (70) { n *= 10; } + var p = 1; + var q = 1; + do { + var t = p + q; + (p, q) = (q, t + q); + } while (q <= n); + return mulDivR(p, n, q); +} + +fun calc_root(m: auto): auto { + var base: int=1; + repeat(70) { base *= 10; } + var (a, b, c) = (1,0,-m); + var (p1, q1, p2, q2) = (1, 0, 0, 1); + do { + var k: int=-1; + var (a1, b1, c1) = (0, 0, 0); + do { + k+=1; + (a1, b1, c1) = (a, b, c); + c+=b; + c += b += a; + } while (c <= 0); + (a, b, c) = (-c1, -b1, -a1); + (p1, q1) = (k * p1+q1, p1); + (p2, q2) = (k * p2+q2, p2); + } while (p1 <= base); + return (p1, q1, p2, q2); +} + +fun ataninv(base: int, q: int): int { // computes base*atan(1/q) + base=base~/q; + q*=-q; + var sum: int = 0; + var n: int = 1; + do { + sum += base~/n; + base = base~/q; + n += 2; + } while (base != 0); + return sum; +} + +fun arctanInv(base: int, q: int): int { return ataninv(base, q); } + +fun calc_pi(): int { + var base: int = 64; + repeat (70) { base *= 10; } + return (arctanInv(base << 2, 5) - arctanInv(base, 239))~>>4; +} + +fun calcPi(): int { return calc_pi(); } + +fun main(): int { + return calcPi(); +} + +/** + method_id | in | out +@testcase | 0 | | 31415926535897932384626433832795028841971693993751058209749445923078164 + +@code_hash 84337043972311674339187056298873613816389434478842780265748859098303774481976 +*/ diff --git a/tolk-tester/tests/a6_1.tolk b/tolk-tester/tests/a6_1.tolk new file mode 100644 index 000000000..ecbf56dd0 --- /dev/null +++ b/tolk-tester/tests/a6_1.tolk @@ -0,0 +1,16 @@ +fun main(a: int, b: int, c: int, d: int, e: int, f: int): (int, int) { + var D: int = a * d - b * c; + var Dx: int = e * d - b * f; + var Dy: int = a * f - e * c; + return (Dx / D, Dy / D); +} + +/** + method_id | in | out +@testcase | 0 | 1 1 1 -1 10 6 | 8 2 +@testcase | 0 | 817 -31 624 -241 132272 272276 | 132 -788 +@testcase | 0 | -886 562 498 -212 -36452 -68958 | -505 -861 +@testcase | 0 | 448 -433 -444 792 150012 -356232 | -218 -572 +@testcase | 0 | -40 -821 433 -734 -721629 -741724 | -206 889 +@testcase | 0 | -261 -98 -494 868 -166153 733738 | 263 995 +*/ diff --git a/tolk-tester/tests/a6_5.tolk b/tolk-tester/tests/a6_5.tolk new file mode 100644 index 000000000..8b300c0c9 --- /dev/null +++ b/tolk-tester/tests/a6_5.tolk @@ -0,0 +1,26 @@ +@deprecated +fun twice(f: auto, x: auto): auto { + return f (f (x)); +} + +fun sqr(x: int) { + return x * x; +} + +fun main(x: int): int { + var f = sqr; + return twice(f, x) * f(x); +} + +@method_id(4) +fun pow6(x: int): int { + return twice(sqr, x) * sqr(x); +} + +/** + method_id | in | out +@testcase | 0 | 3 | 729 +@testcase | 0 | 10 | 1000000 +@testcase | 4 | 3 | 729 +@testcase | 4 | 10 | 1000000 +*/ diff --git a/tolk-tester/tests/a7.tolk b/tolk-tester/tests/a7.tolk new file mode 100644 index 000000000..1c0ae2eb3 --- /dev/null +++ b/tolk-tester/tests/a7.tolk @@ -0,0 +1,24 @@ +fun main() { } +@method_id(1) +fun steps(x: int): int { + var n = 0; + while (x > 1) { + n += 1; + if (x & 1) { + x = 3 * x + 1; + } else { + x >>= 1; + } + } + return n; +} + +/** + method_id | in | out +@testcase | 1 | 1 | 0 +@testcase | 1 | 2 | 1 +@testcase | 1 | 5 | 5 +@testcase | 1 | 19 | 20 +@testcase | 1 | 27 | 111 +@testcase | 1 | 100 | 25 +*/ diff --git a/tolk-tester/tests/allow_post_modification.tolk b/tolk-tester/tests/allow_post_modification.tolk new file mode 100644 index 000000000..079558864 --- /dev/null +++ b/tolk-tester/tests/allow_post_modification.tolk @@ -0,0 +1,108 @@ +fun unsafe_tuple(x: X): tuple + asm "NOP"; + +fun inc(x: int, y: int): (int, int) { + return (x + y, y * 10); +} +fun ~inc(x: int, y: int): (int, int) { + (x, y) = inc(x, y); + return (x, y); +} + +fun ~incWrap(x: int, y: int): (int, int) { + return ~inc(x, y); +} + +@method_id(11) +fun test_return(x: int): (int, int, int, int, int, int, int) { + return (x, x~incWrap(x / 20), x, x = x * 2, x, x += 1, x); +} + +@method_id(12) +fun test_assign(x: int): (int, int, int, int, int, int, int) { + var (x1: int, x2: int, x3: int, x4: int, x5: int, x6: int, x7: int) = (x, x~inc(x / 20), x, x=x*2, x, x+=1, x); + return (x1, x2, x3, x4, x5, x6, x7); +} + +@method_id(13) +fun test_tuple(x: int): tuple { + var t: tuple = unsafe_tuple([x, x~incWrap(x / 20), x, x = x * 2, x, x += 1, x]); + return t; +} + +@method_id(14) +fun test_tuple_assign(x: int): (int, int, int, int, int, int, int) { + var [x1: int, x2: int, x3: int, x4: int, x5: int, x6: int, x7: int] = [x, x~inc(x / 20), x, x = x * 2, x, x += 1, x]; + return (x1, x2, x3, x4, x5, x6, x7); +} + +fun foo1(x1: int, x2: int, x3: int, x4: int, x5: int, x6: int, x7: int): (int, int, int, int, int, int, int) { + return (x1, x2, x3, x4, x5, x6, x7); +} + +@method_id(15) +fun test_call_1(x: int): (int, int, int, int, int, int, int) { + return foo1(x, x~inc(x / 20), x, x = x * 2, x, x += 1, x); +} + +fun foo2(x1: int, x2: int, x3456: (int, int, int, int), x7: int): (int, int, int, int, int, int, int) { + var (x3: int, x4: int, x5: int, x6: int) = x3456; + return (x1, x2, x3, x4, x5, x6, x7); +} + +@method_id(16) +fun test_call_2(x: int): (int, int, int, int, int, int, int) { + return foo2(x, x~incWrap(x / 20), (x, x = x * 2, x, x += 1), x); +} + +fun asm_func(x1: int, x2: int, x3: int, x4: int, x5: int, x6: int, x7: int): (int, int, int, int, int, int, int) +asm + (x4 x5 x6 x7 x1 x2 x3->0 1 2 3 4 5 6) "NOP"; + +@method_id(17) +fun test_call_asm_old(x: int): (int, int, int, int, int, int, int) { + return asm_func(x, x += 1, x, x, x~inc(x / 20), x, x = x * 2); +} + +@method_id(18) +fun test_call_asm_new(x: int): (int, int, int, int, int, int, int) { + return asm_func(x, x~incWrap(x / 20), x, x = x * 2, x, x += 1, x); +} + +global xx: int; +@method_id(19) +fun test_global(x: int): (int, int, int, int, int, int, int) { + xx = x; + return (xx, xx~incWrap(xx / 20), xx, xx = xx * 2, xx, xx += 1, xx); +} + +@method_id(20) +fun test_if_else(x: int): (int, int, int, int, int) { + if (x > 10) { + return (x~inc(8), x + 1, x = 1, x <<= 3, x); + } else { + xx = 9; + return (x, x~inc(-4), x~inc(-1), x >= 1, x = x + xx); + } +} + +fun main() { +} + +/** + method_id | in | out +@testcase | 11 | 100 | 100 50 105 210 210 211 211 +@testcase | 12 | 100 | 100 50 105 210 210 211 211 +@testcase | 13 | 100 | [ 100 50 105 210 210 211 211 ] +@testcase | 14 | 100 | 100 50 105 210 210 211 211 +@testcase | 15 | 100 | 100 50 105 210 210 211 211 +@testcase | 16 | 100 | 100 50 105 210 210 211 211 +@testcase | 17 | 100 | 101 50 106 212 100 101 101 +@testcase | 18 | 100 | 210 210 211 211 100 50 105 +@testcase | 19 | 100 | 100 50 105 210 210 211 211 +@testcase | 20 | 80 | 80 89 1 8 8 +@testcase | 20 | 9 | 9 -40 -10 -1 13 + +@fif_codegen_avoid ~incWrap +@code_hash 97139400653362069936987769894397430077752335662822462908581556703209313861576 +*/ diff --git a/tolk-tester/tests/asm_arg_order.tolk b/tolk-tester/tests/asm_arg_order.tolk new file mode 100644 index 000000000..8bf46e3e2 --- /dev/null +++ b/tolk-tester/tests/asm_arg_order.tolk @@ -0,0 +1,145 @@ +@pure +fun empty_tuple2(): tuple +asm "NIL"; +@pure +fun tpush2(t: tuple, x: X): (tuple, ()) +asm "TPUSH"; +fun emptyTuple(): tuple { return empty_tuple2(); } +fun tuplePush(t: tuple, value: X): (tuple, ()) { return tpush2(t, value); } + +@pure +fun asm_func_1(x: int, y: int, z: int): tuple +asm "3 TUPLE"; +@pure +fun asm_func_2(x: int, y: int, z: int): tuple +asm (z y x -> 0) "3 TUPLE"; +@pure +fun asm_func_3(x: int, y: int, z: int): tuple +asm (y z x -> 0) "3 TUPLE"; +@pure +fun asm_func_4(a: int, b: (int, (int, int)), c: int): tuple +asm (b a c -> 0) "5 TUPLE"; + +fun asmFunc1(x: int, y: int, z: int): tuple { return asm_func_1(x, y, z); } +fun asmFunc3(x: int, y: int, z: int): tuple { return asm_func_3(x, y, z); } + +@pure +fun asm_func_modify(a: tuple, b: int, c: int): (tuple, ()) +asm (c b a -> 0) "SWAP TPUSH SWAP TPUSH"; +fun asmFuncModify(a: tuple, b: int, c: int): (tuple, ()) { return asm_func_modify(a, b, c); } + +global t: tuple; + +fun foo(x: int): int { + t~tuplePush(x); + return x * 10; +} + +@method_id(11) +fun test_old_1(): (tuple, tuple) { + t = empty_tuple2(); + var t2: tuple = asmFunc1(foo(11), foo(22), foo(33)); + return (t, t2); +} + +@method_id(12) +fun test_old_2(): (tuple, tuple) { + t = emptyTuple(); + var t2: tuple = asm_func_2(foo(11), foo(22), foo(33)); + return (t, t2); +} + +@method_id(13) +fun test_old_3(): (tuple, tuple) { + t = empty_tuple2(); + var t2: tuple = asm_func_3(foo(11), foo(22), foo(33)); + return (t, t2); +} + +@method_id(14) +fun test_old_4(): (tuple, tuple) { + t = emptyTuple(); + var t2: tuple = empty_tuple2(); + // This actually computes left-to-right even without compute-asm-ltr + t2 = asm_func_4(foo(11), (foo(22), (foo(33), foo(44))), foo(55)); + return (t, t2); +} + +@method_id(15) +fun test_old_modify(): (tuple, tuple) { + t = empty_tuple2(); + var t2: tuple = empty_tuple2(); + t2~asmFuncModify(foo(22), foo(33)); + return (t, t2); +} + +@method_id(16) +fun test_old_dot(): (tuple, tuple) { + t = empty_tuple2(); + var t2: tuple = foo(11).asmFunc3(foo(22), foo(33)); + return (t, t2); +} + +@method_id(21) +fun test_new_1(): (tuple, tuple) { + t = empty_tuple2(); + var t2: tuple = asmFunc1(foo(11), foo(22), foo(33)); + return (t, t2); +} + +@method_id(22) +fun test_new_2(): (tuple, tuple) { + t = empty_tuple2(); + var t2: tuple = asm_func_2(foo(11), foo(22), foo(33)); + return (t, t2); +} + +@method_id(23) +fun test_new_3(): (tuple, tuple) { + t = empty_tuple2(); + var t2: tuple = asm_func_3(foo(11), foo(22), foo(33)); + return (t, t2); +} + +@method_id(24) +fun test_new_4(): (tuple, tuple) { + t = empty_tuple2(); + var t2: tuple = asm_func_4(foo(11), (foo(22), (foo(33), foo(44))), foo(55)); + return (t, t2); +} + +@method_id(25) +fun test_new_modify(): (tuple, tuple) { + t = empty_tuple2(); + var t2: tuple = empty_tuple2(); + t2~asm_func_modify(foo(22), foo(33)); + return (t, t2); +} + +@method_id(26) +fun test_new_dot(): (tuple, tuple) { + t = empty_tuple2(); + var t2: tuple = foo(11).asm_func_3(foo(22), foo(33)); + return (t, t2); +} + +fun main() { +} + +/** + method_id | in | out +@testcase | 11 | | [ 11 22 33 ] [ 110 220 330 ] +@testcase | 12 | | [ 11 22 33 ] [ 330 220 110 ] +@testcase | 13 | | [ 11 22 33 ] [ 220 330 110 ] +@testcase | 14 | | [ 11 22 33 44 55 ] [ 220 330 440 110 550 ] +@testcase | 15 | | [ 22 33 ] [ 220 330 ] +@testcase | 16 | | [ 11 22 33 ] [ 220 330 110 ] +@testcase | 21 | | [ 11 22 33 ] [ 110 220 330 ] +@testcase | 22 | | [ 11 22 33 ] [ 330 220 110 ] +@testcase | 23 | | [ 11 22 33 ] [ 220 330 110 ] +@testcase | 24 | | [ 11 22 33 44 55 ] [ 220 330 440 110 550 ] +@testcase | 25 | | [ 22 33 ] [ 220 330 ] +@testcase | 26 | | [ 11 22 33 ] [ 220 330 110 ] + +@code_hash 93068291567112337250118419287631047120002003622184251973082208096953112184588 +*/ diff --git a/tolk-tester/tests/bit-operators.tolk b/tolk-tester/tests/bit-operators.tolk new file mode 100644 index 000000000..049406af9 --- /dev/null +++ b/tolk-tester/tests/bit-operators.tolk @@ -0,0 +1,53 @@ +fun lshift(): int { + return (1 << 0) == 1; +} + +fun rshift(): int { + return (1 >> 0) == 1; +} + +fun lshift_var(i: int): int { + return (1 << i) == 1; +} + +fun rshift_var(i: int): int { + return (1 >> i) == 1; +} + +fun main(x: int): int { + if (x == 0) { + return lshift(); + } else if (x == 1) { + return rshift(); + } else if (x == 2) { + return lshift_var(0); + } else if (x == 3) { + return rshift_var(0); + } else if (x == 4) { + return lshift_var(1); + } else { + return rshift_var(1); + } +} + +@method_id(11) +fun is_claimed(index: int): int { + var claim_bit_index: int = index % 256; + var mask: int = 1 << claim_bit_index; + return (255 & mask) == mask; +} + + +/** + method_id | in | out +@testcase | 0 | 0 | -1 +@testcase | 0 | 1 | -1 +@testcase | 0 | 2 | -1 +@testcase | 0 | 3 | -1 +@testcase | 0 | 4 | 0 +@testcase | 0 | 5 | 0 +@testcase | 11 | 0 | -1 +@testcase | 11 | 1 | -1 +@testcase | 11 | 256 | -1 +@testcase | 11 | 8 | 0 +*/ diff --git a/tolk-tester/tests/c2.tolk b/tolk-tester/tests/c2.tolk new file mode 100644 index 000000000..ec8d32da4 --- /dev/null +++ b/tolk-tester/tests/c2.tolk @@ -0,0 +1,27 @@ +global op: (int, int) -> int; + +fun check_assoc(a: int, b: int, c: int): int { + return op(op(a, b), c) == op(a, op(b, c)); +} + +fun unnamed_args(_: int, _: slice, _: auto): auto { + return true; +} + +fun main(x: int, y: int, z: int): int { + op = `_+_`; + return check_assoc(x, y, z); +} + +@method_id(101) +fun test101(x: int, z: int): auto { + return unnamed_args(x, "asdf", z); +} + +/** + method_id | in | out +@testcase | 0 | 2 3 9 | -1 +@testcase | 0 | 11 22 44 | -1 +@testcase | 0 | -1 -10 -20 | -1 +@testcase | 101 | 1 10 | -1 +*/ diff --git a/tolk-tester/tests/c2_1.tolk b/tolk-tester/tests/c2_1.tolk new file mode 100644 index 000000000..4e52b9eeb --- /dev/null +++ b/tolk-tester/tests/c2_1.tolk @@ -0,0 +1,14 @@ +fun check_assoc(op: auto, a: int, b: int, c: int) { + return op(op(a, b), c) == op(a, op(b, c)); +} + +fun main(x: int, y: int, z: int): int { + return check_assoc(`_+_`, x, y, z); +} + +/** + method_id | in | out +@testcase | 0 | 2 3 9 | -1 +@testcase | 0 | 11 22 44 | -1 +@testcase | 0 | -1 -10 -20 | -1 +*/ diff --git a/tolk-tester/tests/camel1.tolk b/tolk-tester/tests/camel1.tolk new file mode 100644 index 000000000..a9f1bf3e4 --- /dev/null +++ b/tolk-tester/tests/camel1.tolk @@ -0,0 +1,245 @@ +// Here we test "functions that just wrap other functions" (camelCase in particular): +// > builder beginCell() { return begin_cell(); } +// Such functions, when called, are explicitly inlined during code generation (even without `inline` modifier). +// It means, that `beginCell()` is replaced to `begin_cell()` (and effectively to `NEWC`). +// Moreover, body of `beginCell` is NOT codegenerated at all. +// Hence, we can write camelCase wrappers (as well as more intelligible namings around stdlib functions) +// without affecting performance and even bytecode hashes. +// This works with ~functions also. And even works with wrappers of wrappers. +// Moreover, such wrappers can reorder input parameters, see a separate test camel2.tolk. + +fun myBeginCell(): builder { return begin_cell(); } +fun myEndCell(b: builder): cell { return end_cell(b); } +fun myStoreRef(b: builder, c: cell): builder { return store_ref(b, c); } +fun myStoreUint(b: builder, i: int, bw: int): builder { return store_uint(b, i, bw); } + +// 'inline' is not needed actually, but if it exists, it's just ignored +@inline +@pure +fun myBeginParse(c: cell): slice { return begin_parse(c); } +@inline +@pure +fun mySkipBits(s: slice, len: int): slice { return skip_bits(s, len); } +@inline +@pure +fun ~mySkipBits(s: slice, len: int): (slice, ()) { return ~skip_bits(s, len); } +@inline +@pure +fun ~myLoadUint(s: slice, len: int): (slice, int) { return load_uint(s, len); } + +fun myComputeDataSize(c: cell, maxCells: int): (int, int, int) { return compute_data_size(c, maxCells); } + +fun dict__new(): cell { return new_dict(); } +fun dict__iset(dict: cell, keyLen: int, index: int, value: slice): cell { return idict_set(dict, keyLen, index, value); } +fun ~dict__iset(dict: cell, keyLen: int, index: int, value: slice): (cell, ()) { return ~idict_set(dict, keyLen, index, value); } +fun dict__tryIGet(dict: cell, keyLen: int, index: int): (slice, int) { return idict_get?(dict, keyLen, index); } +fun dict__tryIGetMin(dict: cell, keyLen: int): (int, slice, int) { return idict_get_min?(dict, keyLen); } + +fun myEmptyTuple(): tuple { return empty_tuple(); } +fun emptyTuple1(): tuple { return myEmptyTuple(); } +fun emptyTuple11(): tuple { return emptyTuple1(); } +fun myTuplePush(t: tuple, value: X): tuple { return tpush(t, value); } +fun ~myTuplePush(t: tuple, value: X): (tuple, ()) { return ~tpush(t, value); } +fun myTupleAt(t: tuple, index: int): X { return at(t, index); } +fun tripleSecond(p: [X1, Y2, Z3]): Y2 { return triple_second(p); } +@pure +fun nullValue(): X +asm "PUSHNULL"; + +fun initial1(x: tuple): tuple { return x; } +fun initial2(x: tuple): tuple { return initial1(x); } + +// int add(int x, int y) { return x + y; } // this is also a wrapper, as its body is _+_(x,y) + +fun fake1(a: int, b: int, c: int): void +asm(a b c) "DROP DROP DROP"; +fun fake2(a: int, b: int, c: int): void +asm(b c a) "DROP DROP DROP"; +fun fake3(a: int, b: int, c: int): () +asm(c a b) "DROP DROP DROP"; +fun fake4(a: int, b: int, c: int): () +asm(c b a) "DROP DROP DROP"; + +fun fake1Wrapper(a: int, b: int, c: int) { return fake1(a, b, c); } +fun fake2Wrapper(a: int, b: int, c: int) { return fake2(a, b, c); } +fun fake3Wrapper(a: int, b: int, c: int) { return fake3(a, b, c); } +fun fake4Wrapper(a: int, b: int, c: int) { return fake4(a, b, c); } + +@method_id(101) +fun test1(): [int, int, int] { + var x: int = 1; + var y: int = 1; + var to_be_ref: cell = myBeginCell().myEndCell(); + var in_c: builder = myBeginCell().myStoreUint(123, 8); + in_c = myStoreRef(in_c, to_be_ref); + var (a, b, c) = myComputeDataSize(in_c.myEndCell(), 10); + assert(!(b != 8)) throw 101; + assert(!(c != 1), 101); + return [a, b + x, c + y]; +} + +@method_id(102) +fun test2(): [[int, int, int], int, int, int] { + var dict: cell = dict__new(); + dict = dict__iset(dict, 32, 456, myBeginCell().myStoreUint(4560, 32).myEndCell().myBeginParse()); + dict.dict__iset(32, 789, myBeginCell().myStoreUint(7890, 32).myEndCell().myBeginParse()); + dict~dict__iset(32, 123, myBeginCell().myStoreUint(0, 64).myStoreUint(1230, 32).myStoreUint(1231, 32).myStoreUint(1232, 32).myEndCell().myBeginParse()); + + var (mink, minv, _) = dict__tryIGetMin(dict, 32); + // skip 64 bits + minv~mySkipBits(16); + minv = minv.mySkipBits(16); + minv.mySkipBits(11); // does nothing + (minv, _) = ~mySkipBits(minv, 16); + mySkipBits(minv, 11); // does nothing + minv~mySkipBits(16); + // load 3*32 + var minv1 = minv~myLoadUint(32); + var minv2 = minv~myLoadUint(32); + var minv3 = minv~myLoadUint(32); + + var (_, found123) = dict__tryIGet(dict, 32, 123); + var (_, found456) = dict__tryIGet(dict, 32, 456); + var (_, found789) = dict__tryIGet(dict, 32, 789); + return [[minv1, minv2, minv3], found123, found456, found789]; +} + +@method_id(103) +fun test3(): tuple { + var with34: tuple = initial2(emptyTuple1()); + with34~myTuplePush(34); + + var t: tuple = emptyTuple11(); + t = myTuplePush(t, 12); + myTuplePush(t, emptyTuple11()); // does nothing + t~myTuplePush(emptyTuple1()); + t~myTuplePush(with34.myTupleAt(0)); + t.myTuplePush("123"s); // does nothing + + var tri: [cell, int, cell] = [nullValue(), 90 + 1, null]; + var f: int = tripleSecond(tri); + (t, _) = ~myTuplePush(t, f); + + return t; +} + +@method_id(104) +fun test4(a: int, b: int, c: int): int { + fake1Wrapper(a, b, c); + fake2Wrapper(a, b, c); + fake3Wrapper(a, b, c); + fake4Wrapper(a, b, c); + return 10; +} + +fun main(): int { + var x: int = now(); + return 30; +} + +/** + method_id | in | out +@testcase | 101 | | [ 2 9 2 ] +@testcase | 102 | | [ [ 1230 1231 1232 ] -1 -1 0 ] +@testcase | 103 | | [ 12 [] 34 91 ] + +@fif_codegen +""" + main PROC:<{ + // + 30 PUSHINT + }> +""" + +@fif_codegen +""" + test1 PROC:<{ + // + NEWC // _5 + ENDC // to_be_ref + NEWC // to_be_ref _8 + 123 PUSHINT // to_be_ref _8 _9=123 + SWAP // to_be_ref _9=123 _8 + 8 STU // to_be_ref in_c + STREF // in_c + ENDC // _16 + 10 PUSHINT // _16 _17=10 + CDATASIZE // a b c + OVER // a b c b + 8 NEQINT // a b c _21 + 101 THROWIF + DUP // a b c c + 1 NEQINT // a b c _26 + 101 THROWIF + SWAP // a c b + INC // a c _30 + SWAP // a _30 c + INC // a _30 _31 + TRIPLE // _29 + }> +""" + +@fif_codegen +""" + test2 PROC:<{ + ... + 16 PUSHINT // dict minv _45=16 + SDSKIPFIRST // dict minv + 16 PUSHINT // dict minv _47=16 + SDSKIPFIRST // dict minv + 16 PUSHINT // dict minv _52=16 + SDSKIPFIRST // dict minv + 16 PUSHINT // dict minv _57=16 + SDSKIPFIRST // dict minv + ... + 32 PUSHINT // dict minv1 minv2 minv3 found123 found456 _83=32 + 789 PUSHINT // dict minv1 minv2 minv3 found123 found456 _83=32 _84=789 + s0 s7 s7 XCHG3 // found456 minv1 minv2 minv3 found123 _84=789 dict _83=32 + DICTIGET + NULLSWAPIFNOT // found456 minv1 minv2 minv3 found123 _101 _102 + NIP // found456 minv1 minv2 minv3 found123 found789 + ... + 4 TUPLE // _86 + }> +""" + +@fif_codegen +""" + test3 PROC:<{ + // + NIL // _1 + initial1 CALLDICT // with34 + ... + TRIPLE // t tri + SECOND // t f + TPUSH // t + }> +""" + +@fif_codegen +""" + test4 PROC:<{ + // a b c + s2 s1 s0 PUSH3 // a b c a b c + DROP DROP DROP + s1 s0 s2 PUSH3 // a b c b c a + DROP DROP DROP + s0 s2 s1 PUSH3 // a b c c a b + DROP DROP DROP + s0 s2 XCHG // c b a + DROP DROP DROP + 10 PUSHINT // _7=10 + }> +""" + +@fif_codegen_avoid DECLPROC myBeginCell +@fif_codegen_avoid DECLPROC myStoreUint +@fif_codegen_avoid DECLPROC myStoreRef +@fif_codegen_avoid DECLPROC myComputeDataSize +@fif_codegen_avoid DECLPROC tryIdictGet +@fif_codegen_avoid DECLPROC myEmptyTuple +@fif_codegen_avoid DECLPROC myStoreUint +@fif_codegen_avoid DECLPROC initial2 +@fif_codegen_avoid DECLPROC add +@fif_codegen_avoid DECLPROC increase +*/ diff --git a/tolk-tester/tests/camel2.tolk b/tolk-tester/tests/camel2.tolk new file mode 100644 index 000000000..121b7f784 --- /dev/null +++ b/tolk-tester/tests/camel2.tolk @@ -0,0 +1,204 @@ +// Here we also test "functions that just wrap other functions" like in camel1.tolk, +// but when they reorder arguments, e.g. +// > T f(x,y) { return anotherF(y,x); } +// This also works, even for wrappers of wrappers, even if anotherF is asm(with reorder). +// But swapping arguments may sometimes lead to bytecode changes (see test2), +// both with compute-asm-ltr and without it. + +fun myBeginCell(): builder { return begin_cell(); } +fun myEndCell(b: builder): cell { return end_cell(b); } +fun myStoreRef1(b: builder, c: cell): builder { return store_ref(b, c); } +fun myStoreRef2(c: cell, b: builder): builder { return store_ref(b, c); } +fun myStoreUint1(b: builder, x: int, bw: int): builder { return store_uint(b, x, bw); } +fun myStoreUint2(b: builder, bw: int, x: int): builder { return store_uint(b, x, bw); } + +fun computeDataSize1(c: cell, maxCells: int): (int, int, int) { return compute_data_size(c, maxCells); } +fun computeDataSize2(maxCells: int, c: cell): (int, int, int) { return compute_data_size(c, maxCells); } + +fun fake(a: int, b: int, c: int): void +asm "DROP DROP DROP"; +fun fake2(b: int, c: int, a: int) { return fake(a,b,c); } +fun fake3(c: int, a: int, b: int) { return fake(a,b,c); } +fun fake4(c: int, b: int, a: int) { return fake(a,b,c); } + +@method_id(101) +fun test1(): (int, int, int) { + var x: int = 1; + var y: int = 1; + var to_be_ref: cell = myBeginCell().myEndCell(); + var in_c: builder = myBeginCell().myStoreUint1(123, 8); + in_c = myStoreRef1(in_c, to_be_ref); + var (a, b, c) = computeDataSize1(in_c.myEndCell(), 10); + assert(!0, 101); + return (a, b + x, c + y); +} + +@method_id(102) +fun test2(): (int, int, int) { + var x: int = 1; + var y: int = 1; + var to_be_ref: cell = myBeginCell().myEndCell(); + var in_c: builder = myBeginCell().myStoreUint2(8, 123); + in_c = myStoreRef2(to_be_ref, in_c); + var (a, b, c) = computeDataSize2(10, in_c.myEndCell()); + return (a, b + x, c + y); +} + +@method_id(103) +fun test3(): (int, int, int) { + var x: int = 1; + var y: int = 1; + var to_be_ref: cell = begin_cell().end_cell(); + var in_c: builder = begin_cell().store_uint(123, 8); + in_c = store_ref(in_c, to_be_ref); + var (a, b, c) = compute_data_size(in_c.end_cell(), 10); + return (a, b + x, c + y); +} + +fun beginCell1(): builder { return begin_cell(); } +fun beginCell11(): builder { return beginCell1(); } +fun beginCell111(): builder { return beginCell11(); } + +fun endCell1(b: builder): cell { return end_cell(b); } +fun endCell11(b: builder): cell { return endCell1(b); } + +fun beginParse1(c: cell): slice { return begin_parse(c); } +fun beginParse11(c: cell): slice { return beginParse1(c); } + +fun storeInt1(b: builder, bw: int, x: int): builder { return store_int(b, x, bw); } +fun storeInt11(bw: int, x: int, b: builder): builder { return storeInt1(b, bw, x); } +fun storeInt111(b: builder, x: int, bw: int): builder { return storeInt11(bw, x, b); } + +@method_id(104) +fun test4(): slice { + var b: builder = beginCell111(); + b = storeInt11(32, 1, b); + b = storeInt111(b, 2, 32).storeInt111(3, 32); + return b.endCell11().beginParse11(); +} + +@method_id(105) +fun test5(a: int, b: int, c: int): int { + fake(a, b, c); + fake2(b, c, a); + fake3(c, a, b); + fake4(c, b, a); + return a; +} + +fun main() { + throw 0; +} + +/** + method_id | in | out +@testcase | 101 | | 2 9 2 +@testcase | 102 | | 2 9 2 +@testcase | 103 | | 2 9 2 +@testcase | 104 | | CS{Cell{0018000000010000000200000003} bits: 0..96; refs: 0..0} + +test1 and test3 fif code is absolutely identical, test2 (due to reorder) is a bit different: + +@fif_codegen +""" + test1 PROC:<{ + // + NEWC // _5 + ENDC // to_be_ref + NEWC // to_be_ref _8 + 123 PUSHINT // to_be_ref _8 _9=123 + SWAP // to_be_ref _9=123 _8 + 8 STU // to_be_ref in_c + STREF // in_c + ENDC // _16 + 10 PUSHINT // _16 _17=10 + CDATASIZE // a b c + SWAP // a c b + INC // a c _23 + SWAP // a _23 c + INC // a _23 _24 + }> +""" + +@fif_codegen +""" + test2 PROC:<{ + // + NEWC // _5 + ENDC // to_be_ref + NEWC // to_be_ref _8 + 123 PUSHINT // to_be_ref _8 _10=123 + SWAP // to_be_ref _10=123 _8 + 8 STU // to_be_ref in_c + STREF // in_c + 10 PUSHINT + SWAP + ENDC + SWAP + CDATASIZE // a b c + SWAP // a c b + INC // a c _19 + SWAP // a _19 c + INC // a _19 _20 + }> +""" + +@fif_codegen +""" + test3 PROC:<{ + // + NEWC // _5 + ENDC // to_be_ref + NEWC // to_be_ref _8 + 123 PUSHINT // to_be_ref _8 _9=123 + SWAP // to_be_ref _9=123 _8 + 8 STU // to_be_ref in_c + STREF // in_c + ENDC // _16 + 10 PUSHINT // _16 _17=10 + CDATASIZE // a b c + SWAP // a c b + INC // a c _19 + SWAP // a _19 c + INC // a _19 _20 + }> +""" + +@fif_codegen +""" + test4 PROC:<{ + // + NEWC // b + 1 PUSHINT // b _3=1 + SWAP // _3=1 b + 32 STI // b + 2 PUSHINT + SWAP // _5=2 b + 32 STI + 3 PUSHINT + SWAP + 32 STI // b + ENDC // _11 + CTOS // _12 + }> +""" + +@fif_codegen +""" + test5 PROC:<{ + // a b c + s2 s1 s0 PUSH3 // a b c a b c + DROP DROP DROP + s2 s1 s0 PUSH3 // a b c a b c + DROP DROP DROP + s2 s1 s0 PUSH3 // a b c a b c + DROP DROP DROP + s2 PUSH + -ROT // a a b c + DROP DROP DROP + }> +""" + +@fif_codegen_avoid myStoreUint1 +@fif_codegen_avoid myStoreUint2 +*/ diff --git a/tolk-tester/tests/camel3.tolk b/tolk-tester/tests/camel3.tolk new file mode 100644 index 000000000..e76c02b7d --- /dev/null +++ b/tolk-tester/tests/camel3.tolk @@ -0,0 +1,95 @@ +// Here we test that if you declare a wrapper like +// > builder beginCell() { return begin_cell(); } +// but use it NOT only as a direct call, BUT as a 1-st class function +// (save to a variable, return from a function, etc.) +// it also works, since a function becomes codegenerated (though direct calls are expectedly inlined). + +fun myBeginCell(): builder { return begin_cell(); } +fun myEndCell(b: builder): cell { return end_cell(b); } +fun myStoreRef(b: builder, c: cell): builder { return store_ref(b, c); } +fun myStoreUint3(i: int, bw: int, b: builder): builder { return store_uint(b, i, bw); } + +fun computeDataSize2(maxCells: int, c: cell): (int, int, int) { return compute_data_size(c, maxCells); } + +fun myEmptyTuple(): tuple { return empty_tuple(); } +fun myTuplePush(t: tuple, value: X): tuple { return tpush(t, value); } +fun ~myTuplePush(t: tuple, value: X): (tuple, ()) { return ~tpush(t, value); } +fun tupleGetFirst(t: tuple): X { return first(t); } + + +@inline +fun getBeginEnd(): (auto, auto) { + return (myBeginCell, myEndCell); +} + +fun begAndStore(beg: auto, store: auto, x: int): builder { + return store(x, 8, beg()); +} + +fun test1(): (int, int, int) { + var (_, computer) = (0, computeDataSize2); + var (beg, end) = getBeginEnd(); + + var t: tuple = myEmptyTuple(); + t~myTuplePush(myStoreRef); + var refStorer = tupleGetFirst(t); + + var x: int = 1; + var y: int = 1; + var to_be_ref: cell = myBeginCell().myEndCell(); + var in_c: builder = begAndStore(beg, myStoreUint3, 123); + in_c = refStorer(in_c, to_be_ref); + var (a, b, c) = computer(10, end(in_c)); + return (a, b + x, c + y); +} + +fun main(): (int, int, int) { + return test1(); +} + +/** + method_id | in | out +@testcase | 0 | | 2 9 2 + +@fif_codegen DECLPROC myBeginCell +@fif_codegen DECLPROC computeDataSize2 + +@fif_codegen +""" + myStoreUint3 PROC:<{ + // i bw b + SWAP // i b bw + STUX // _3 + }> +""" + +@fif_codegen +""" + myStoreRef PROC:<{ + // b c + SWAP // c b + STREF // _2 + }> +""" + +@fif_codegen +""" + CONT:<{ + computeDataSize2 CALLDICT + }> // computer + getBeginEnd INLINECALLDICT // computer beg end + NIL // computer beg end t + ... + NEWC // computer beg end refStorer _19 + ENDC // computer beg end refStorer to_be_ref + ... + CONT:<{ + myStoreUint3 CALLDICT + }> + ... + begAndStore CALLDICT // computer to_be_ref end refStorer in_c +""" + +@fif_codegen_avoid myEmptyTuple +@fif_codegen_avoid myTuplePush +*/ diff --git a/tolk-tester/tests/camel4.tolk b/tolk-tester/tests/camel4.tolk new file mode 100644 index 000000000..c6be62685 --- /dev/null +++ b/tolk-tester/tests/camel4.tolk @@ -0,0 +1,145 @@ +// Here we test that a just-return function is not a valid wrapper, it will not be inlined. +// (doesn't use all arguments, has different pureness, has method_id, etc.) + +fun myStoreUint(b: builder, x: int, unused: int): builder { return store_uint(b, x, x); } +fun throwIf(excNo: int, cond: int) { assert(!cond) throw excNo; } + +fun initial1(x: auto) { return x; } +fun initial2(x: auto) { return initial1(x); } + +@pure +fun asm_func_4(a: int, b: (int, (int, int)), c: int): tuple +asm (b a c -> 0) "5 TUPLE"; +fun asmFunc4(a: int, b: (int, (int, int)), c: int): tuple { return asm_func_4(a, b, c); } + +fun postpone_elections(): int { + return false; +} + +fun setAndGetData(ret: int): int { + var c: cell = begin_cell().store_uint(ret, 8).end_cell(); + set_data(c); + var s: slice = get_data().begin_parse(); + throwIf(101, 0); + return s~load_uint(8); +} + +fun setAndGetDataWrapper(ret: int): int { + return setAndGetData(ret); +} + +@method_id(101) +fun test1(): int { + var c: cell = begin_cell().myStoreUint(32, 10000000).end_cell(); + var s: slice = c.begin_parse(); + return s~load_uint(32); +} + +get fun test2(ret: int): int { + return setAndGetDataWrapper(ret); +} + +@method_id(103) +fun test3(): int { + return initial2(10); +} + +global t: tuple; + +fun foo(x: int): int { + t~tpush(x); + return x * 10; +} + +@method_id(104) +fun test4(): (tuple, tuple) { + t = empty_tuple(); + var t2: tuple = asmFunc4(foo(11), (foo(22), (foo(33), foo(44))), foo(55)); + return (t, t2); +} + +@method_id(105) +fun test5(): int { + if (1) { + return postpone_elections(); + } + return 123; +} + +@method_id(106) +fun test6(): int { + return add2(1, 2); // doesn't inline since declared below +} + +fun main(ret: int): int { + return setAndGetDataWrapper(ret); +} + +fun onExternalMessage(ret: int): int { + return setAndGetData(ret); +} + +// currently, functions implemented after usage, can't be inlined, since inlining is legacy, not AST +fun add2(x: int, y: int): int { return x + y; } + +/** + method_id | in | out +@testcase | 101 | | 32 +@testcase | 103 | | 10 +@testcase | 104 | | [ 11 22 33 44 55 ] [ 220 330 440 110 550 ] +@testcase | 105 | | 0 +@testcase | 106 | | 3 +@testcase | 74435 | 99 | 99 +@testcase | 0 | 98 | 98 +@testcase | -1 | 97 | 97 + +@fif_codegen DECLPROC myStoreUint +@fif_codegen DECLPROC throwIf +@fif_codegen DECLPROC postpone_elections +@fif_codegen DECLPROC add2 +@fif_codegen 74435 DECLMETHOD test2 + +@fif_codegen +""" + test3 PROC:<{ + // + 10 PUSHINT // _0=10 + initial2 CALLDICT // _1 + }> +""" + +@fif_codegen +""" + test2 PROC:<{ + // ret + setAndGetData CALLDICT // _1 + }> +""" + +@fif_codegen +""" + 11 PUSHINT + foo CALLDICT + 22 PUSHINT + foo CALLDICT + 33 PUSHINT + foo CALLDICT + 44 PUSHINT + foo CALLDICT + 55 PUSHINT + foo CALLDICT + asmFunc4 CALLDICT // t2 +""" + +@fif_codegen +""" + test6 PROC:<{ + // + 1 PUSHINT // _0=1 + 2 PUSHINT // _0=1 _1=2 + add2 CALLDICT // _2 + }> +""" + +@fif_codegen_avoid setAndGetDataWrapper +*/ diff --git a/tolk-tester/tests/cells-slices.tolk b/tolk-tester/tests/cells-slices.tolk new file mode 100644 index 000000000..508cd31db --- /dev/null +++ b/tolk-tester/tests/cells-slices.tolk @@ -0,0 +1,163 @@ +fun store_u32(b: builder, value: int): builder { + return b.store_uint(value, 32); +} +fun ~store_u32(b: builder, value: int): (builder, ()) { + return ~store_uint(b, value, 32); +} + +fun load_u32(cs: slice): (slice, int) { + return cs.load_uint(32); +} + +fun my_load_int(s: slice, len: int): (slice, int) + asm(s len -> 1 0) "LDIX"; // top is "value slice" +fun my_store_int(b: builder, x: int, len: int): builder + asm(x b len) "STIX"; +fun ~my_store_int(b: builder, x: int, len: int): (builder, ()) + asm(x b len) "STIX"; + +@method_id(101) +fun test1(): [int,int,int,int,int] { + var b: builder = begin_cell().store_uint(1, 32); + b = b.store_uint(2, 32); + b~store_uint(3, 32); + b = b.store_u32(4); + b~store_u32(5); + + var cs: slice = b.end_cell().begin_parse(); + var (cs redef, one: int) = cs.load_uint(32); + var (two: int, three: int) = (cs~load_uint(32), cs~load_u32()); + var (cs redef, four: int) = cs.load_u32(); + var five: int = cs~load_u32(); + + return [one,two,three,four,five]; +} + +@method_id(102) +fun test2(): [int,int,int] { + var b: builder = begin_cell().my_store_int(1, 32); + b = b.my_store_int(2, 32); + b~my_store_int(3, 32); + + var cs: slice = b.end_cell().begin_parse(); + var (cs redef, one: int) = cs.my_load_int(32); + var (two: int, three: int) = (cs~my_load_int(32), cs~my_load_int(32)); + + return [one,two,three]; +} + +@method_id(103) +fun test3(ret: int): int { + var (_, same: int) = begin_cell().store_uint(ret,32).end_cell().begin_parse().load_uint(32); + return same; +} + +@method_id(104) +fun test4(): [int,int] { + var b: builder = my_store_int(begin_cell(), 1, 32); + b = store_int(store_int(b, 2, 32), 3, 32); + + var cs: slice = b.end_cell().begin_parse(); + var cs32: slice = cs.first_bits(32); // todo s.first_bits()~load_uint() doesn't work, 'lvalue expected' + var (one, _, three) = (cs32~load_int(32), cs~skip_bits(64), cs~load_u32()); + + return [one,three]; +} + +@method_id(105) +fun test5(): [int,int] { + var cref: cell = end_cell(store_u32(begin_cell(), 105)); + var c: cell = begin_cell().store_ref(cref).store_ref(cref).store_u32(1).end_cell(); + + var cs: slice = begin_parse(c); + // todo I want cs~load_ref().begin_parse()~load_u32(), but 'lvalue expected' + var ref1 = cs~load_ref().begin_parse(); + var ref2 = cs~load_ref().begin_parse(); + var sto5x2: int = ref1~load_u32() + ref2~load_uint(32); + return [sto5x2, cs~load_u32()]; +} + + +fun ~sumNumbersInSlice(s: slice): (slice, int) { + var result = 0; + while (!slice_data_empty?(s)) { + result += s~load_uint(32); + } + return (s, result); +} + +@method_id(106) +fun test6() { + var ref = begin_cell().store_int(100, 32).end_cell(); + var s: slice = begin_cell().store_int(1, 32).store_int(2, 32).store_ref(ref).end_cell().begin_parse(); + var result = (slice_bits(s), s~sumNumbersInSlice(), slice_bits(s), slice_empty?(s), slice_data_empty?(s), slice_refs_empty?(s)); + var ref2: cell = s~load_ref(); + var s2: slice = ref2.begin_parse(); + s.end_parse(); + return (result, s2~load_int(32), s2.slice_empty?()); +} + +@method_id(107) +fun test7() { + var s: slice = begin_cell().store_int(1, 32).store_int(2, 32).store_int(3, 32).store_int(4, 32).store_int(5, 32).store_int(6, 32).store_int(7, 32).end_cell().begin_parse(); + var size1 = slice_bits(s); + s~skip_bits(32); + var s1: slice = s.first_bits(64); + var n1 = s1~load_int(32); + var size2 = slice_bits(s); + s~load_int(32); + var size3 = slice_bits(s); + s~skip_last_bits(32); + var size4 = slice_bits(s); + var n2 = s~load_int(32); + var size5 = slice_bits(s); + return (n1, n2, size1, size2, size3, size4, size5); +} + +@method_id(108) +fun test108() { + var (result1, result2) = (0, 0); + try { + begin_cell().store_ref(begin_cell().end_cell()).end_cell().begin_parse().end_parse(); + result1 = 100; + } catch (code) { + result1 = code; + } + try { + begin_cell().end_cell().begin_parse().end_parse(); + result2 = 100; + } catch (code) { + result2 = code; + } + return (result1, result2); +} + +@method_id(109) +fun test109() { + var ref2 = begin_cell().store_int(1, 32).end_cell(); + var ref1 = begin_cell().store_int(1, 32).store_ref(ref2).end_cell(); + var c = begin_cell().store_int(444, 32).store_ref(ref1).store_ref(ref1).store_ref(ref1).store_ref(ref2).store_int(4, 32).end_cell(); + var (n_cells1, n_bits1, n_refs1) = c.compute_data_size(10); + var s = c.begin_parse(); + s~load_ref(); + s~load_ref(); + var n = s~load_int(32); + var (n_cells2, n_bits2, n_refs2) = s.slice_compute_data_size(10); + return ([n_cells1, n_bits1, n_refs1], [n_cells2, n_bits2, n_refs2], n); +} + +fun main(): int { + return 0; +} + +/** +@testcase | 101 | | [ 1 2 3 4 5 ] +@testcase | 102 | | [ 1 2 3 ] +@testcase | 103 | 103 | 103 +@testcase | 104 | | [ 1 3 ] +@testcase | 105 | | [ 210 1 ] +@testcase | 106 | | 64 3 0 0 -1 0 100 -1 +@testcase | 107 | | 2 3 224 192 160 128 96 +@testcase | 108 | | 9 100 +@testcase | 109 | | [ 3 128 5 ] [ 2 96 3 ] 444 + */ diff --git a/tolk-tester/tests/co1.tolk b/tolk-tester/tests/co1.tolk new file mode 100644 index 000000000..bc56dfa89 --- /dev/null +++ b/tolk-tester/tests/co1.tolk @@ -0,0 +1,75 @@ +const int1 = 1; +const int2 = 2; + +const int101: int = 101; +const int111: int = 111; + +const int1r = int1; + +const str1 = "const1"; +const str2 = "aabbcc"s; + +const str2r: slice = str2; + +const str1int = 0x636f6e737431; +const str2int = 0xAABBCC; + +const nibbles: int = 4; + +fun iget1(): int { return int1; } +fun iget2(): int { return int2; } +fun iget3(): int { return int1+int2; } + +fun iget1r(): int { return int1r; } + +fun sget1(): slice { return str1; } +fun sget2(): slice { return str2; } +fun sget2r(): slice { return str2r; } + +const int240: int = ((int1+int2)*10)<<3; + +fun iget240(): int { return int240; } + +@pure +fun newc(): builder +asm "NEWC"; +@pure +fun endcs(b: builder): slice +asm "ENDC" "CTOS"; +@pure +fun sdeq(s1: slice, s2: slice): int +asm "SDEQ"; +@pure +fun stslicer(b: builder, s: slice): builder +asm "STSLICER"; + +fun storeUint(b: builder, x: int, len: int): builder { return store_uint(b, x, len); } +fun endSlice(b: builder): slice { return endcs(b); } + +fun main() { + var i1: int = iget1(); + var i2: int = iget2(); + var i3: int = iget3(); + + assert(i1 == 1) throw int101; + assert(i2 == 2) throw 102; + assert(i3 == 3) throw 103; + + var s1: slice = sget1(); + var s2: slice = sget2(); + var s3: slice = newc().stslicer(str1).stslicer(str2r).endcs(); + + assert(sdeq(s1, newc().storeUint(str1int, 12 * nibbles).endcs())) throw int111; + assert(sdeq(s2, newc().store_uint(str2int, 6 * nibbles).endSlice())) throw 112; + assert(sdeq(s3, newc().store_uint(0x636f6e737431AABBCC, 18 * nibbles).endcs())) throw 113; + + var i4: int = iget240(); + assert(i4 == 240) throw ((104)); + return 0; +} + +/** +@testcase | 0 | | 0 + +@code_hash 61273295789179921867241079778489100375537711211918844448475493726205774530743 +*/ diff --git a/tolk-tester/tests/code_after_ifelse.tolk b/tolk-tester/tests/code_after_ifelse.tolk new file mode 100644 index 000000000..6a16262f8 --- /dev/null +++ b/tolk-tester/tests/code_after_ifelse.tolk @@ -0,0 +1,41 @@ +fun elseif(cond: int) { + if (cond > 0) { + throw(cond); + } +} + +@inline +@method_id(101) +fun foo(x: int): int { + if (x==1) { + return 111; + } else { + x *= 2; + } + return x + 1; +} + +fun main(x: int): (int, int) { + return (foo(x), 222); +} + +@method_id(102) +fun test2(x: int) { + try { + if (x < 0) { return -1; } + elseif (x); + } catch(excNo) { + return excNo * 1000; + } + return 0; +} + +/** + method_id | in | out +@testcase | 0 | 1 | 111 222 +@testcase | 0 | 3 | 7 222 +@testcase | 101 | 1 | 111 +@testcase | 101 | 3 | 7 +@testcase | 102 | -5 | -1 +@testcase | 102 | 5 | 5000 +*/ diff --git a/tolk-tester/tests/codegen_check_demo.tolk b/tolk-tester/tests/codegen_check_demo.tolk new file mode 100644 index 000000000..02379540c --- /dev/null +++ b/tolk-tester/tests/codegen_check_demo.tolk @@ -0,0 +1,96 @@ +@method_id(101) +fun test1(): int { + var x = false; + if (x == true) { + x= 100500; + } + return x; +} + +fun main(s: int) { + var (z, t) = (17, s); + while (z > 0) { + t = s; + z -= 1; + } + return ~ t; +} + +/** + method_id | in | out +@testcase | 0 | 1 | -2 +@testcase | 0 | 5 | -6 +@testcase | 101 | | 0 + +Below, I just give examples of @fif_codegen tag: +* a pattern can be single-line (after the tag), or multi-line, surrounded with """ +* there may be multiple @fif_codegen, they all will be checked +* identation (spaces) is not checked intentionally +* "..." means any number of any lines +* lines not divided with "..." are expected to be consecutive in fif output +* //comments can be omitted, but if present, they are also expected to be equal +* there is also a tag @fif_codegen_avoid to check a pattern does not occur + +@fif_codegen +""" +main PROC:<{ + // s + 17 PUSHINT // s _3=17 + OVER // s z=17 t + WHILE:<{ + ... + }>DO<{ // s z t + ... + s1 s(-1) PUXC // s t z + ... + 2 1 BLKDROP2 + ... +}> +""" + +@fif_codegen +""" +main PROC:<{ + ... + WHILE:<{ + ... + }>DO<{ + ... + }> + }END>c +""" + +@fif_codegen +""" + OVER + 0 GTINT // s z t _5 +""" + +@fif_codegen +""" + "Asm.fif" include + ... + PROGRAM{ + ... + }END>c +""" + +@fif_codegen +""" +test1 PROC:<{ +// +FALSE +}> +""" + +@fif_codegen NOT // _8 +@fif_codegen main PROC:<{ + +@fif_codegen_avoid PROCINLINE +@fif_codegen_avoid END c +@fif_codegen_avoid +""" +multiline +can also be +""" +*/ diff --git a/tolk-tester/tests/comments.tolk b/tolk-tester/tests/comments.tolk new file mode 100644 index 000000000..cd2877470 --- /dev/null +++ b/tolk-tester/tests/comments.tolk @@ -0,0 +1,31 @@ + +fun main(): int + +// inside a comment, /* doesn't start a new one +/* but if // is inside, a comment may end at this line*/ { + var cc = "a string may contain /* or // or /*, not parsed"; + // return 1; + return get10() + /* + traditional comment /* may not be nested + // line comment + // ends */1 + + 1; + /* moreover, different comment styles + may be used for opening and closing + */ +} + +/*** + first line + //two-lined*/ + +@method_id(10) +fun get10(): int { + return 10; +} + + +/** +@testcase | 0 | | 12 +@testcase | 10 | | 10 +*/ diff --git a/tolk-tester/tests/if_stmt.tolk b/tolk-tester/tests/if_stmt.tolk new file mode 100644 index 000000000..2c51ac515 --- /dev/null +++ b/tolk-tester/tests/if_stmt.tolk @@ -0,0 +1,66 @@ +@method_id(101) +fun test1(x: int): int { + if (x > 200) { + return 200; + } else if (x > 100) { + return 100; + } else if (!(x <= 50)) { + if (!(x > 90)) { + return x; + } else { + return 90; + } + } else { + return 0; + } +} + +@method_id(102) +fun test2(x: int) { + if (x == 20) { return 20; } + if (x != 50) { return 50; } + if (x == 0) { return 0; } + return -1; +} + +@method_id(103) +fun test3(x: int) { + if (!(x != 20)) { return 20; } + if (!(x == 50)) { return 50; } + if (!x) { return 0; } + return -1; +} + +fun main() { + +} + +/** +@testcase | 101 | 0 | 0 +@testcase | 101 | 1000 | 200 +@testcase | 101 | 150 | 100 +@testcase | 101 | -1 | 0 +@testcase | 101 | 87 | 87 +@testcase | 101 | 94 | 90 +@testcase | 102 | 20 | 20 +@testcase | 102 | 40 | 50 +@testcase | 102 | 50 | -1 +@testcase | 103 | 20 | 20 +@testcase | 103 | 40 | 50 +@testcase | 103 | 50 | -1 + +@fif_codegen +""" + test3 PROC:<{ + // x + DUP // x x + 20 NEQINT // x _2 + IFNOTJMP:<{ // x + DROP // + 20 PUSHINT // _3=20 + }> // x + DUP // x x + 50 EQINT // x _5 + IFNOTJMP:<{ // x +""" +*/ diff --git a/tolk-tester/tests/imports/invalid-no-import.tolk b/tolk-tester/tests/imports/invalid-no-import.tolk new file mode 100644 index 000000000..6c4ab6ce6 --- /dev/null +++ b/tolk-tester/tests/imports/invalid-no-import.tolk @@ -0,0 +1,4 @@ +fun demoOfInvalid(): (int) { + var f = someAdd; + return f(1, 2); +} diff --git a/tolk-tester/tests/imports/some-math.tolk b/tolk-tester/tests/imports/some-math.tolk new file mode 100644 index 000000000..dc0c9c9b7 --- /dev/null +++ b/tolk-tester/tests/imports/some-math.tolk @@ -0,0 +1,3 @@ +fun someAdd(a: int, b: int): int { + return a + b + 0; +} diff --git a/tolk-tester/tests/inline_big.tolk b/tolk-tester/tests/inline_big.tolk new file mode 100644 index 000000000..be014eb5d --- /dev/null +++ b/tolk-tester/tests/inline_big.tolk @@ -0,0 +1,62 @@ +@inline +fun foo(x: int): int { + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + x = x * 10 + 1; + return x; +} + +fun main(x: int): int { + return foo(x) * 10 + 5; +} +/** + method_id | in | out +@testcase | 0 | 9 | 9111111111111111111111111111111111111111111111111115 +*/ diff --git a/tolk-tester/tests/inline_if.tolk b/tolk-tester/tests/inline_if.tolk new file mode 100644 index 000000000..9f1fa8c12 --- /dev/null +++ b/tolk-tester/tests/inline_if.tolk @@ -0,0 +1,28 @@ +fun foo1(x: int): int { + if (x == 1) { + return 1; + } + return 2; +} +@inline +fun foo2(x: int): int { + if (x == 1) { + return 11; + } + return 22; +} +@inline_ref +fun foo3(x: int): int { + if (x == 1) { + return 111; + } + return 222; +} +fun main(x: int): (int, int, int) { + return (foo1(x)+1, foo2(x)+1, foo3(x)+1); +} +/** + method_id | in | out +@testcase | 0 | 1 | 2 12 112 +@testcase | 0 | 2 | 3 23 223 +*/ diff --git a/tolk-tester/tests/inline_loops.tolk b/tolk-tester/tests/inline_loops.tolk new file mode 100644 index 000000000..eba595a5e --- /dev/null +++ b/tolk-tester/tests/inline_loops.tolk @@ -0,0 +1,48 @@ +global g: int; + +@inline +fun foo_repeat() { + g = 1; + repeat(5) { + g *= 2; + } +} + +@inline +fun foo_until(): int { + g = 1; + var i: int = 0; + do { + g *= 2; + i += 1; + } while (i < 8); + return i; +} + +@inline +fun foo_while(): int { + g = 1; + var i: int = 0; + while (i < 10) { + g *= 2; + i += 1; + } + return i; +} + +fun main() { + foo_repeat(); + var x: int = g; + foo_until(); + var y: int = g; + foo_while(); + var z: int = g; + return (x, y, z); +} + +/** + method_id | in | out +@testcase | 0 | | 32 256 1024 + +@code_hash 102749806552989901976653997041637095139193406161777448419603700344770997608788 +*/ diff --git a/tolk-tester/tests/invalid-bitwise-1.tolk b/tolk-tester/tests/invalid-bitwise-1.tolk new file mode 100644 index 000000000..f939d60db --- /dev/null +++ b/tolk-tester/tests/invalid-bitwise-1.tolk @@ -0,0 +1,9 @@ +fun main(flags: int): int { + return flags&0xFF!=0; +} + +/** +@compilation_should_fail +@stderr & has lower precedence than != +@stderr Use parenthesis +*/ diff --git a/tolk-tester/tests/invalid-bitwise-2.tolk b/tolk-tester/tests/invalid-bitwise-2.tolk new file mode 100644 index 000000000..e6fcd1e54 --- /dev/null +++ b/tolk-tester/tests/invalid-bitwise-2.tolk @@ -0,0 +1,8 @@ +fun justTrue(): int { return true; } + +const a = justTrue() | 1 < 9; + +/** +@compilation_should_fail +@stderr | has lower precedence than < +*/ diff --git a/tolk-tester/tests/invalid-bitwise-3.tolk b/tolk-tester/tests/invalid-bitwise-3.tolk new file mode 100644 index 000000000..ee43860bf --- /dev/null +++ b/tolk-tester/tests/invalid-bitwise-3.tolk @@ -0,0 +1,8 @@ +fun justTrue(): int { return true; } + +const a = justTrue() | (1 < 9) | justTrue() != true; + +/** +@compilation_should_fail +@stderr | has lower precedence than != +*/ diff --git a/tolk-tester/tests/invalid-bitwise-4.tolk b/tolk-tester/tests/invalid-bitwise-4.tolk new file mode 100644 index 000000000..563ed535d --- /dev/null +++ b/tolk-tester/tests/invalid-bitwise-4.tolk @@ -0,0 +1,6 @@ +const a = (1) <=> (0) ^ 8; + +/** +@compilation_should_fail +@stderr ^ has lower precedence than <=> +*/ diff --git a/tolk-tester/tests/invalid-bitwise-5.tolk b/tolk-tester/tests/invalid-bitwise-5.tolk new file mode 100644 index 000000000..1030ed8d5 --- /dev/null +++ b/tolk-tester/tests/invalid-bitwise-5.tolk @@ -0,0 +1,11 @@ +const MAX_SLIPAGE = 100; + +fun main(jetton_amount: int, msg_value: int, slippage: int) { + if ((0 == jetton_amount) | (msg_value == 0) | true | false | slippage > MAX_SLIPAGE) { + } +} + +/** +@compilation_should_fail +@stderr | has lower precedence than > +*/ diff --git a/tolk-tester/tests/invalid-bitwise-6.tolk b/tolk-tester/tests/invalid-bitwise-6.tolk new file mode 100644 index 000000000..9c4dc67e4 --- /dev/null +++ b/tolk-tester/tests/invalid-bitwise-6.tolk @@ -0,0 +1,9 @@ +fun main() { + if ((1==1)|(2==2)&(3==3)) { + } +} + +/** +@compilation_should_fail +@stderr mixing | with & without parenthesis +*/ diff --git a/tolk-tester/tests/invalid-bitwise-7.tolk b/tolk-tester/tests/invalid-bitwise-7.tolk new file mode 100644 index 000000000..39fba401c --- /dev/null +++ b/tolk-tester/tests/invalid-bitwise-7.tolk @@ -0,0 +1,8 @@ +fun main() { + var c = x && y || x && y; +} + +/** +@compilation_should_fail +@stderr mixing && with || without parenthesis +*/ diff --git a/tolk-tester/tests/invalid-builtin-1.tolk b/tolk-tester/tests/invalid-builtin-1.tolk new file mode 100644 index 000000000..6a7f1ca7f --- /dev/null +++ b/tolk-tester/tests/invalid-builtin-1.tolk @@ -0,0 +1,10 @@ +fun moddiv2(x: int, y: int): (int, int) builtin; + +/** +@compilation_should_fail +@stderr +""" +`builtin` used for non-builtin function +fun moddiv2 +""" +*/ diff --git a/tolk-tester/tests/invalid-catch-1.tolk b/tolk-tester/tests/invalid-catch-1.tolk new file mode 100644 index 000000000..756722bb8 --- /dev/null +++ b/tolk-tester/tests/invalid-catch-1.tolk @@ -0,0 +1,12 @@ +fun main() { + try { + + } catch(int, arg) {} + return 0; +} + +/** +@compilation_should_fail +@stderr expected identifier, got `int` +@stderr catch(int + */ diff --git a/tolk-tester/tests/invalid-catch-2.tolk b/tolk-tester/tests/invalid-catch-2.tolk new file mode 100644 index 000000000..a02761463 --- /dev/null +++ b/tolk-tester/tests/invalid-catch-2.tolk @@ -0,0 +1,9 @@ +fun main() { + try {} + catch(err, arg, more) {} +} + +/** +@compilation_should_fail +@stderr expected `)`, got `,` + */ diff --git a/tolk-tester/tests/invalid-cmt-nested.tolk b/tolk-tester/tests/invalid-cmt-nested.tolk new file mode 100644 index 000000000..807e7be88 --- /dev/null +++ b/tolk-tester/tests/invalid-cmt-nested.tolk @@ -0,0 +1,11 @@ +/* +in tolk we decided to drop nested comments support +/* +not nested + */ +*/ + +/** +@compilation_should_fail +@stderr error: expected fun or get, got `*` +*/ diff --git a/tolk-tester/tests/invalid-cmt-old.tolk b/tolk-tester/tests/invalid-cmt-old.tolk new file mode 100644 index 000000000..eaf58db83 --- /dev/null +++ b/tolk-tester/tests/invalid-cmt-old.tolk @@ -0,0 +1,8 @@ +fun main(): int { + ;; this is not a comment +} + +/** +@compilation_should_fail +@stderr error: expected `;`, got `is` + */ diff --git a/tolk-tester/tests/invalid-cyclic-1.tolk b/tolk-tester/tests/invalid-cyclic-1.tolk new file mode 100644 index 000000000..c46b1640e --- /dev/null +++ b/tolk-tester/tests/invalid-cyclic-1.tolk @@ -0,0 +1,8 @@ +const ONE = TWO - 1; +const TWO = ONE + 1; + +/** +@compilation_should_fail +@stderr const ONE +@stderr undefined symbol `TWO` + */ diff --git a/tolk-tester/tests/invalid-declaration-1.tolk b/tolk-tester/tests/invalid-declaration-1.tolk new file mode 100644 index 000000000..ea27e723b --- /dev/null +++ b/tolk-tester/tests/invalid-declaration-1.tolk @@ -0,0 +1,6 @@ +const a = 10, b = 20; + +/** +@compilation_should_fail +@stderr multiple declarations are not allowed + */ diff --git a/tolk-tester/tests/invalid-declaration-10.tolk b/tolk-tester/tests/invalid-declaration-10.tolk new file mode 100644 index 000000000..7ccb182d5 --- /dev/null +++ b/tolk-tester/tests/invalid-declaration-10.tolk @@ -0,0 +1,8 @@ +get fun onInternalMessage() { + return 0; +} + +/** +@compilation_should_fail +@stderr invalid declaration of a reserved function + */ diff --git a/tolk-tester/tests/invalid-declaration-2.tolk b/tolk-tester/tests/invalid-declaration-2.tolk new file mode 100644 index 000000000..700632517 --- /dev/null +++ b/tolk-tester/tests/invalid-declaration-2.tolk @@ -0,0 +1,8 @@ +fun main(int): int { + +} + +/** +@compilation_should_fail +@stderr expected parameter name, got `int` +*/ diff --git a/tolk-tester/tests/invalid-declaration-3.tolk b/tolk-tester/tests/invalid-declaration-3.tolk new file mode 100644 index 000000000..3edc09fda --- /dev/null +++ b/tolk-tester/tests/invalid-declaration-3.tolk @@ -0,0 +1,8 @@ +int main() { + +} + +/** +@compilation_should_fail +@stderr expected fun or get, got `int` +*/ diff --git a/tolk-tester/tests/invalid-declaration-4.tolk b/tolk-tester/tests/invalid-declaration-4.tolk new file mode 100644 index 000000000..183dda96f --- /dev/null +++ b/tolk-tester/tests/invalid-declaration-4.tolk @@ -0,0 +1,8 @@ +fun main() { + int x = 0; +} + +/** +@compilation_should_fail +@stderr probably, you use FunC-like declarations; valid syntax is `var x: int = ...` +*/ diff --git a/tolk-tester/tests/invalid-declaration-5.tolk b/tolk-tester/tests/invalid-declaration-5.tolk new file mode 100644 index 000000000..bf23d8570 --- /dev/null +++ b/tolk-tester/tests/invalid-declaration-5.tolk @@ -0,0 +1,6 @@ +enum MyKind { } + +/** +@compilation_should_fail +@stderr `enum` is not supported yet +*/ diff --git a/tolk-tester/tests/invalid-declaration-6.tolk b/tolk-tester/tests/invalid-declaration-6.tolk new file mode 100644 index 000000000..731c299ba --- /dev/null +++ b/tolk-tester/tests/invalid-declaration-6.tolk @@ -0,0 +1,8 @@ +fun main() { + val imm = 10; +} + +/** +@compilation_should_fail +@stderr immutable variables are not supported yet +*/ diff --git a/tolk-tester/tests/invalid-declaration-7.tolk b/tolk-tester/tests/invalid-declaration-7.tolk new file mode 100644 index 000000000..8d188ea08 --- /dev/null +++ b/tolk-tester/tests/invalid-declaration-7.tolk @@ -0,0 +1,8 @@ +fun main() { + var a = 10, b = 20; +} + +/** +@compilation_should_fail +@stderr multiple declarations are not allowed + */ diff --git a/tolk-tester/tests/invalid-declaration-8.tolk b/tolk-tester/tests/invalid-declaration-8.tolk new file mode 100644 index 000000000..06cb9a985 --- /dev/null +++ b/tolk-tester/tests/invalid-declaration-8.tolk @@ -0,0 +1,8 @@ +fun someDemo() { + return 0; +} + +/** +@compilation_should_fail +@stderr the contract has no entrypoint + */ diff --git a/tolk-tester/tests/invalid-declaration-9.tolk b/tolk-tester/tests/invalid-declaration-9.tolk new file mode 100644 index 000000000..8cb71c73a --- /dev/null +++ b/tolk-tester/tests/invalid-declaration-9.tolk @@ -0,0 +1,9 @@ +fun recv_internal() { + return 0; +} + +/** +@compilation_should_fail +@stderr this is a reserved FunC/Fift identifier +@stderr you need `onInternalMessage` + */ diff --git a/tolk-tester/tests/invalid-get-method-1.tolk b/tolk-tester/tests/invalid-get-method-1.tolk new file mode 100644 index 000000000..263370d47 --- /dev/null +++ b/tolk-tester/tests/invalid-get-method-1.tolk @@ -0,0 +1,9 @@ +@method_id(123) +get fun hello(x: int, y: int): (int, int) { + return (x, y); +} + +/** +@compilation_should_fail +@stderr @method_id can be specified only for regular functions +*/ diff --git a/tolk-tester/tests/invalid-get-method-2.tolk b/tolk-tester/tests/invalid-get-method-2.tolk new file mode 100644 index 000000000..7c7a14136 --- /dev/null +++ b/tolk-tester/tests/invalid-get-method-2.tolk @@ -0,0 +1,17 @@ +@pure +get fun secret(): int { + return 0; +} +@pure +get fun balanced(): int { + return 1; +} + +fun main(): int { + return secret() + balanced(); +} + +/** +@compilation_should_fail +@stderr GET methods hash collision: `secret` and `balanced` produce the same hash +*/ diff --git a/tolk-tester/tests/invalid-import.tolk b/tolk-tester/tests/invalid-import.tolk new file mode 100644 index 000000000..b1c01518e --- /dev/null +++ b/tolk-tester/tests/invalid-import.tolk @@ -0,0 +1,9 @@ +// line1 +/* */ import "unexisting.tolk"; +// line3 + +/** +@compilation_should_fail +@stderr invalid-import.tolk:2:7: error: Failed to import: cannot find file +@stderr import "unexisting.tolk"; + */ diff --git a/tolk-tester/tests/invalid-logical-1.tolk b/tolk-tester/tests/invalid-logical-1.tolk new file mode 100644 index 000000000..9aa210bbd --- /dev/null +++ b/tolk-tester/tests/invalid-logical-1.tolk @@ -0,0 +1,8 @@ +fun main() { + return 1 && 2; +} + +/** +@compilation_should_fail +@stderr logical operators are not supported yet + */ diff --git a/tolk-tester/tests/invalid-no-import.tolk b/tolk-tester/tests/invalid-no-import.tolk new file mode 100644 index 000000000..89f879a36 --- /dev/null +++ b/tolk-tester/tests/invalid-no-import.tolk @@ -0,0 +1,8 @@ +import "imports/some-math.tolk"; +import "imports/invalid-no-import.tolk"; + +/** +@compilation_should_fail +@stderr imports/invalid-no-import.tolk:2:13 +@stderr Using a non-imported symbol `someAdd` + */ diff --git a/tolk-tester/tests/invalid-nopar-1.tolk b/tolk-tester/tests/invalid-nopar-1.tolk new file mode 100644 index 000000000..a9a848654 --- /dev/null +++ b/tolk-tester/tests/invalid-nopar-1.tolk @@ -0,0 +1,12 @@ +fun eq(x: int): int { + return x; +} + +fun main(x: int): int { + return eq x; +} + +/** +@compilation_should_fail +@stderr expected `;`, got `x` + */ diff --git a/tolk-tester/tests/invalid-nopar-2.tolk b/tolk-tester/tests/invalid-nopar-2.tolk new file mode 100644 index 000000000..c7c136509 --- /dev/null +++ b/tolk-tester/tests/invalid-nopar-2.tolk @@ -0,0 +1,12 @@ + +fun main(x: int): int { + if x { + return 10; + } + return 0; +} + +/** +@compilation_should_fail +@stderr expected `(`, got `x` + */ diff --git a/tolk-tester/tests/invalid-nopar-3.tolk b/tolk-tester/tests/invalid-nopar-3.tolk new file mode 100644 index 000000000..8249ca284 --- /dev/null +++ b/tolk-tester/tests/invalid-nopar-3.tolk @@ -0,0 +1,12 @@ + +fun main(x: int): int { + if (x, 1) { + return 10; + } + return 0; +} + +/** +@compilation_should_fail +@stderr expected `)`, got `,` + */ diff --git a/tolk-tester/tests/invalid-nopar-4.tolk b/tolk-tester/tests/invalid-nopar-4.tolk new file mode 100644 index 000000000..6e833f995 --- /dev/null +++ b/tolk-tester/tests/invalid-nopar-4.tolk @@ -0,0 +1,8 @@ +fun load_u32(cs: slice): (slice, int) { + return cs.load_uint 32; +} + +/** +@compilation_should_fail +@stderr expected `(`, got `32` + */ diff --git a/tolk-tester/tests/invalid-pure-1.tolk b/tolk-tester/tests/invalid-pure-1.tolk new file mode 100644 index 000000000..5baa32922 --- /dev/null +++ b/tolk-tester/tests/invalid-pure-1.tolk @@ -0,0 +1,20 @@ + +@pure +fun f_pure(): int { + return f_impure(); +} + +fun f_impure(): int {} + +fun main(): int { + return f_pure(); +} + +/** +@compilation_should_fail +@stderr +""" +an impure operation in a pure function +return f_impure(); +""" +*/ diff --git a/tolk-tester/tests/invalid-pure-2.tolk b/tolk-tester/tests/invalid-pure-2.tolk new file mode 100644 index 000000000..5f8f40ec8 --- /dev/null +++ b/tolk-tester/tests/invalid-pure-2.tolk @@ -0,0 +1,23 @@ +global g: int; + +@pure +fun f_pure(): builder { + var b: builder = begin_cell(); + g = g + 1; + return b; +} + +fun main(): int { + g = 0; + f_pure(); + return g; +} + +/** +@compilation_should_fail +@stderr +""" +an impure operation in a pure function +g = g + 1; +""" +*/ diff --git a/tolk-tester/tests/invalid-pure-3.tolk b/tolk-tester/tests/invalid-pure-3.tolk new file mode 100644 index 000000000..0e1b4104a --- /dev/null +++ b/tolk-tester/tests/invalid-pure-3.tolk @@ -0,0 +1,23 @@ +@pure +fun validate_input(input: cell): (int, int) { + var (x, y, z, correct) = compute_data_size?(input, 10); + assert(correct) throw 102; +} + +@pure +fun someF(): int { + var c: cell = begin_cell().end_cell(); + validate_input(c); + return 0; +} + +fun main() {} + +/** +@compilation_should_fail +@stderr +""" +an impure operation in a pure function +assert(correct) +""" +*/ diff --git a/tolk-tester/tests/invalid-redefinition-1.tolk b/tolk-tester/tests/invalid-redefinition-1.tolk new file mode 100644 index 000000000..49771cea1 --- /dev/null +++ b/tolk-tester/tests/invalid-redefinition-1.tolk @@ -0,0 +1,7 @@ +global moddiv: int; + +/** +@compilation_should_fail +@stderr global moddiv: int; +@stderr redefinition of built-in symbol + */ diff --git a/tolk-tester/tests/invalid-redefinition-2.tolk b/tolk-tester/tests/invalid-redefinition-2.tolk new file mode 100644 index 000000000..3a300dc2e --- /dev/null +++ b/tolk-tester/tests/invalid-redefinition-2.tolk @@ -0,0 +1,12 @@ +global hello: int; + +fun hello(): int { + +} + +/** +@compilation_should_fail +@stderr fun hello() +@stderr redefinition of symbol, previous was at +@stderr invalid-redefinition-2.tolk:1:1 + */ diff --git a/tolk-tester/tests/invalid-redefinition-3.tolk b/tolk-tester/tests/invalid-redefinition-3.tolk new file mode 100644 index 000000000..04ed9383d --- /dev/null +++ b/tolk-tester/tests/invalid-redefinition-3.tolk @@ -0,0 +1,8 @@ +fun main(): int { + var demo_10: int = demo_10; +} + +/** +@compilation_should_fail +@stderr undefined symbol `demo_10` + */ diff --git a/tolk-tester/tests/invalid-redefinition-4.tolk b/tolk-tester/tests/invalid-redefinition-4.tolk new file mode 100644 index 000000000..993a869b7 --- /dev/null +++ b/tolk-tester/tests/invalid-redefinition-4.tolk @@ -0,0 +1,9 @@ +fun main(): int { + var (a: int, b: int) = (10, 20); + var (a, b: int) = (10, 20); +} + +/** +@compilation_should_fail +@stderr redeclaration of local variable `a` + */ diff --git a/tolk-tester/tests/invalid-redefinition-5.tolk b/tolk-tester/tests/invalid-redefinition-5.tolk new file mode 100644 index 000000000..4a8f5ea1d --- /dev/null +++ b/tolk-tester/tests/invalid-redefinition-5.tolk @@ -0,0 +1,9 @@ +fun main(x: int): int { + var (a: int, b: int) = (10, 20); + var (a redef, x: int) = (10, 20); +} + +/** +@compilation_should_fail +@stderr redeclaration of local variable `x` + */ diff --git a/tolk-tester/tests/invalid-shift-1.tolk b/tolk-tester/tests/invalid-shift-1.tolk new file mode 100644 index 000000000..5127ce05b --- /dev/null +++ b/tolk-tester/tests/invalid-shift-1.tolk @@ -0,0 +1,8 @@ +fun main(flags: int) { + return flags << 1 + 32; +} + +/** +@compilation_should_fail +@stderr << has lower precedence than + +*/ diff --git a/tolk-tester/tests/invalid-symbol-1.tolk b/tolk-tester/tests/invalid-symbol-1.tolk new file mode 100644 index 000000000..5d392f529 --- /dev/null +++ b/tolk-tester/tests/invalid-symbol-1.tolk @@ -0,0 +1,14 @@ +fun main(x: int): int { + if (x > 0) { + var y: int = 10; + } else { + var y: slice = "20"; + } + ~dump(y); +} + +/** +@compilation_should_fail +@stderr ~dump(y); +@stderr undefined symbol `y` + */ diff --git a/tolk-tester/tests/invalid-symbol-2.tolk b/tolk-tester/tests/invalid-symbol-2.tolk new file mode 100644 index 000000000..f55e15cec --- /dev/null +++ b/tolk-tester/tests/invalid-symbol-2.tolk @@ -0,0 +1,12 @@ +fun main(x: int): int { + try { + if (x > 10) { throw(44); } + } catch(code) {} + return code; +} + +/** +@compilation_should_fail +@stderr return code; +@stderr undefined symbol `code` + */ diff --git a/tolk-tester/tests/invalid-syntax-1.tolk b/tolk-tester/tests/invalid-syntax-1.tolk new file mode 100644 index 000000000..4ccc8f22d --- /dev/null +++ b/tolk-tester/tests/invalid-syntax-1.tolk @@ -0,0 +1,15 @@ +fun main(x: int): int { + if (x > 0) { + return 1; + } + // 'elseif' doesn't exist anymore, it's treated as 'someFunction(arg)' + elseif(x < 0) { + return -1; + } + return x; +} + +/** +@compilation_should_fail +@stderr expected `;`, got `{` + */ diff --git a/tolk-tester/tests/invalid-syntax-2.tolk b/tolk-tester/tests/invalid-syntax-2.tolk new file mode 100644 index 000000000..1180dbbff --- /dev/null +++ b/tolk-tester/tests/invalid-syntax-2.tolk @@ -0,0 +1,13 @@ +fun main(x: int) { + while (x > 0) { + if (x == 10) { + break; + } + x = x -1; + } +} + +/** +@compilation_should_fail +@stderr break/continue from loops are not supported yet + */ diff --git a/tolk-tester/tests/invalid-syntax-3.tolk b/tolk-tester/tests/invalid-syntax-3.tolk new file mode 100644 index 000000000..26ce82ac5 --- /dev/null +++ b/tolk-tester/tests/invalid-syntax-3.tolk @@ -0,0 +1,8 @@ +fun main(x: int) { + return null(); +} + +/** +@compilation_should_fail +@stderr null is not a function: use `null`, not `null()` + */ diff --git a/tolk-tester/tests/invalid-syntax-4.tolk b/tolk-tester/tests/invalid-syntax-4.tolk new file mode 100644 index 000000000..044dd329a --- /dev/null +++ b/tolk-tester/tests/invalid-syntax-4.tolk @@ -0,0 +1,8 @@ +fun main(x: int) { + assert(x > 0); +} + +/** +@compilation_should_fail +@stderr expected `throw excNo` after assert, got `;` + */ diff --git a/tolk-tester/tests/invalid-tolk-version.tolk b/tolk-tester/tests/invalid-tolk-version.tolk new file mode 100644 index 000000000..d66de9ff6 --- /dev/null +++ b/tolk-tester/tests/invalid-tolk-version.tolk @@ -0,0 +1,7 @@ +tolk asdf; + +/** +@compilation_should_fail +@stderr semver expected +@stderr tolk asdf; + */ diff --git a/tolk-tester/tests/invalid-typing-1.tolk b/tolk-tester/tests/invalid-typing-1.tolk new file mode 100644 index 000000000..a0fe296d8 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-1.tolk @@ -0,0 +1,10 @@ +fun main() { + var tri: [int, scli] = [10, null()]; + return; +} + +/** +@compilation_should_fail +@stderr .tolk:2 +@stderr expected , got `scli` + */ diff --git a/tolk-tester/tests/invalid-typing-2.tolk b/tolk-tester/tests/invalid-typing-2.tolk new file mode 100644 index 000000000..d7c6745f5 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-2.tolk @@ -0,0 +1,9 @@ +fun main() { + var tri: (int, bool) = (10, false); + return; +} + +/** +@compilation_should_fail +@stderr bool type is not supported yet + */ diff --git a/tolk-tester/tests/invalid.tolk b/tolk-tester/tests/invalid.tolk new file mode 100644 index 000000000..217747748 --- /dev/null +++ b/tolk-tester/tests/invalid.tolk @@ -0,0 +1,8 @@ +fun main(s: auto) { + var (z, t) = ; + +/** +@compilation_should_fail +@stderr expected , got `;` +@stderr var (z, t) = ; +*/ diff --git a/tolk-tester/tests/logical-operators.tolk b/tolk-tester/tests/logical-operators.tolk new file mode 100644 index 000000000..b73c25220 --- /dev/null +++ b/tolk-tester/tests/logical-operators.tolk @@ -0,0 +1,154 @@ +fun simpleAllConst() { + return (!0, !!0 & !false, !!!0, !1, !!1, !-1, !!-1, (!5 == 0) == !0, !0 == true); +} + +fun compileTimeEval1(x: int) { + // todo now compiler doesn't understand that bool can't be equal to number other than 0/-1 + // (but understands that it can't be positive) + // that's why for now, the last condition is evaluated at runtime + return (!x, !x > 10, !x < 10, !!x == 5, !x == -10); +} + +@method_id(101) +fun withIfNot(x: int, y: int) { + if (!x) { return 10; } + else if (!y) { return 20; } + return x+y; +} + +@method_id(102) +fun withAndOr(x: int, y: int, z: int) { + var return_at_end = -1; + if (!x & !y) { + if (!z & !y) { return 10; } + else if (z | !!y) { return_at_end = 20; } + } else if (!!x & !!y & !z) { + if (!z & (x > 10)) { return_at_end = 30; } + if ((x != 11) & !z) { return 40; } + return_at_end = 50; + } else { + return_at_end = !x ? !y : !z | 1; + } + return return_at_end; +} + +@method_id(103) +fun someSum(upto: int) { + var x = 0; + var should_break = false; + while (!x & !should_break) { + if (upto < 10) { x = upto; should_break = true; } + else { upto = upto - 1; } + } + return x; +} + + +fun lookupIdxByValue(idict32: cell, value: int) { + var cur_key = -1; + do { + var (cur_key redef, cs: slice, found: int) = idict32.idict_get_next?(32, cur_key); + // todo one-line condition (via &) doesn't work, since right side is calculated immediately + if (found) { + if (cs~load_int(32) == value) { + return cur_key; + } + } + } while (found); + return -1; +} + +@method_id(104) +fun testDict(last: int) { + // prepare dict: [3 => 30, 4 => 40, 5 => 50] + var dict: cell = new_dict(); + dict~idict_set_builder(32, 3, begin_cell().store_int(30, 32)); + dict~idict_set_builder(32, 4, begin_cell().store_int(40, 32)); + dict~idict_set_builder(32, 5, begin_cell().store_int(!last ? 100 : last, 32)); + + return (lookupIdxByValue(dict, 30), lookupIdxByValue(dict, last), lookupIdxByValue(dict, 100)); +} + +@method_id(105) +fun testNotNull(x: int) { + return [x == null, null == x, !(x == null), null == null, +(null != null)]; +} + +fun main() { + +} + +/** +@testcase | 101 | 0 0 | 10 +@testcase | 101 | 5 0 | 20 +@testcase | 101 | 5 8 | 13 +@testcase | 102 | 0 0 0 | 10 +@testcase | 102 | 0 0 5 | 20 +@testcase | 102 | 1 2 0 | 40 +@testcase | 102 | 11 2 0 | 50 +@testcase | 102 | 1 0 0 | -1 +@testcase | 102 | 0 1 0 | 0 +@testcase | 102 | 1 0 1 | 1 +@testcase | 103 | 15 | 9 +@testcase | 103 | 6 | 6 +@testcase | 103 | -1 | -1 +@testcase | 104 | 50 | 3 5 -1 +@testcase | 104 | 100 | 3 5 5 +@testcase | 104 | 0 | 3 -1 5 +@testcase | 105 | 0 | [ 0 0 -1 -1 0 ] +@testcase | 105 | null | [ -1 -1 0 -1 0 ] + +@fif_codegen +""" + simpleAllConst PROC:<{ + // + -1 PUSHINT + 0 PUSHINT + -1 PUSHINT + 0 PUSHINT + -1 PUSHINT + 0 PUSHINT + -1 PUSHINT + TRUE + TRUE + }> +""" + +@fif_codegen +""" + compileTimeEval1 PROC:<{ + // x + DUP // x x + 0 EQINT // x _1 + FALSE // x _1 _4 + TRUE // x _1 _4 _7 + FALSE // x _1 _4 _7 _11 + s0 s4 XCHG // _11 _1 _4 _7 x + 0 EQINT // _11 _1 _4 _7 _12 + -10 EQINT // _11 _1 _4 _7 _14 + s3 s4 XCHG + s1 s3 s0 XCHG3 // _1 _4 _7 _11 _14 + }> +""" + +@fif_codegen +""" + withIfNot PROC:<{ + c2 SAVE + SAMEALTSAVE // x y + OVER // x y x + IFNOTJMP:<{ // x y + 2DROP // + 10 PUSHINT // _2=10 + }> // x y + DUP // x y y + IFNOTJMP:<{ // x y + 2DROP // + 20 PUSHINT // _3=20 + RETALT + }> // x y + ADD // _4 + }> +""" + + */ diff --git a/tolk-tester/tests/method_id.tolk b/tolk-tester/tests/method_id.tolk new file mode 100644 index 000000000..c2d0b9aad --- /dev/null +++ b/tolk-tester/tests/method_id.tolk @@ -0,0 +1,15 @@ +@method_id(1) +fun foo1(): int { return 111; } +@method_id(3) +fun foo2(): int { return 222; } +@method_id(10) +fun foo3(): int { return 333; } +fun main(): int { return 999; } + +/** + method_id | in | out +@testcase | 1 | | 111 +@testcase | 3 | | 222 +@testcase | 10 | | 333 +@testcase | 0 | | 999 +*/ diff --git a/tolk-tester/tests/no-spaces.tolk b/tolk-tester/tests/no-spaces.tolk new file mode 100644 index 000000000..018c99da2 --- /dev/null +++ b/tolk-tester/tests/no-spaces.tolk @@ -0,0 +1,117 @@ +const int10:int=10; + +fun just10(): int { return int10; } +fun eq(v: int): int { return`v`; } + +@method_id(101) fun `get_-1` (): int {return-1;} +@method_id(102) fun `get_--1` (): int {return--1;} +@method_id(103) fun `get_---1`(): int {return---1;} +@method_id(104) fun `get_+++1`(): int {return+++1;} +@method_id(105) fun `get_+-+1`(): int {return+-+1;} + +global `some()var`:int; + +@method_id(110) fun `some_math`(): int { + `some()var`=--6; + return 1*-2*-3*-4*just10()*-5+-`some()var`+--`some()var`---`some()var`; +} + +@method_id(111) fun `negative_nums`(a:int):int { + var m$0:int=1; + var m1:int=-(+0x1)*m$0; + return `a`*-1*-(1)*---(1)*+just10()+-`just10`()*m1*-m1+-eq(m1)----0x1; +} + +@method_id(112) fun `bitwise~ops`(flags:int):[int,int] { + return[ + (just10()-3==just10()-(4)--1)|((2==2)&(eq(eq(10)) -3==just10()--13)), + ((flags&0xFF)!=0) + ]; +} + +@method_id(113)fun`unary+bitwise-constant`():[int,int,int]{ + // todo spaces are still not allowed before ~ + return [~-~~+-3, ~+3-~ 9, -(-~+-20-~ 10+3+~ 38&39)]; +} + +@method_id(114)fun`unary+bitwize-parametrized`(c3:int, c9:int, c20:int, c10:int, c38:int):[int,int,int]{ + // todo spaces are still not allowed before ~ + return [~-~~+-c3, ~+c3-~ `c9`, -(-~+-c20-~ c10+c3+~ c38&39)]; +} + +fun add3(a: int, b: int, c: int) { return a+b+c; } + +@method_id(115) fun unary_const_check(): [int,int] { + var fst1: int=-1; + var snd1: int=-1; + var trd1: int=+2; + var (fst2,snd2,trd2)=(-1,-1,+2); + return [add3(fst2,snd2,trd2),add3(fst1,snd1,trd1)]; +} + +fun `load:u32`(cs: slice): (slice, int) { + return cs.load_uint(32); +} + +@method_id(116) fun `call_~_via_backticks`():[int,int,int,int] { + var b:builder = begin_cell().store_uint(1, 32).store_uint(2, 32).store_uint(3, 32).store_uint(4, 32); + var `cs`:slice = b.end_cell().begin_parse(); + var (`cs` redef,one:int) = `cs`.`load_uint`(32); + var (two:int,three:int) = (`cs`~`load_uint`(32), cs~`load:u32`()); + var (cs redef,four:int) = cs.`load:u32`(); + return [one,two,three,four]; +} + +fun`main`(){} + +/** + method_id | in | out +@testcase | 101 | | -1 +@testcase | 102 | | 1 +@testcase | 103 | | -1 +@testcase | 104 | | 1 +@testcase | 105 | | -1 +@testcase | 110 | | 1194 +@testcase | 111 | -1 | 22 +@testcase | 112 | 0 | [ -1 0 ] +@testcase | 113 | | [ -4 6 -4 ] +@testcase | 114 | 3 9 20 10 38 | [ -4 6 -4 ] +@testcase | 115 | | [ 0 0 ] +@testcase | 116 | | [ 1 2 3 4 ] + +@fif_codegen +""" + get_+-+1 PROC:<{ + // + -1 PUSHINT + }> +""" + +@fif_codegen +""" + unary+bitwise-constant PROC:<{ + // + -4 PUSHINT + 6 PUSHINT + -4 PUSHINT + TRIPLE + }> +""" + +@fif_codegen +""" + unary_const_check PROC:<{ + // + -1 PUSHINT // fst1=-1 + DUP // fst1=-1 snd1=-1 + 2 PUSHINT // fst1=-1 snd1=-1 trd1=2 + s1 s1 s0 PUSH3 // fst1=-1 snd1=-1 trd1=2 fst2=-1 snd2=-1 trd2=2 + add3 CALLDICT // fst1=-1 snd1=-1 trd1=2 _13 + 3 -ROLL // _13 fst1=-1 snd1=-1 trd1=2 + add3 CALLDICT // _13 _14 + PAIR // _12 + }> +""" + + */ + diff --git a/tolk-tester/tests/null-keyword.tolk b/tolk-tester/tests/null-keyword.tolk new file mode 100644 index 000000000..0be4966ff --- /dev/null +++ b/tolk-tester/tests/null-keyword.tolk @@ -0,0 +1,157 @@ +import "../../crypto/smartcont/stdlib.tolk" +@method_id(101) +fun test1() { + var numbers: tuple = null; + numbers = cons(1, numbers); + numbers = cons(2, numbers); + numbers = cons(3, numbers); + numbers = cons(4, numbers); + var (h, numbers redef) = uncons(numbers); + h += car(numbers); + + var t = empty_tuple(); + do { + var num = numbers~list_next(); + t~tpush(num); + } while (numbers != null); + + return (h, numbers == null, t); +} + +@method_id(102) +fun test2(x: int) { + if (null != x) { + var y: int = null; + if (y != null) { return 10; } + return y; + } + try { + return x + 10; // will throw, since not a number + } catch { + return -1; + } + return 100; +} + +fun myIsNull(x: int): int { + return x == null ? -1 : x; +} + +@method_id(103) +fun test3(x: int) { + return myIsNull(x > 10 ? null : x); +} + +fun getUntypedNull() { + var untyped = null; + if (true) { + return untyped; + } + return untyped; +} + +@method_id(104) +fun test4() { + var (_, (_, untyped)) = (3, (empty_tuple, null)); + if (true) { + return untyped; + } + return untyped; +} + +@method_id(105) +fun test5() { + var n = getUntypedNull(); + return !(null == n) ? n~load_int(32) : 100; +} + +@method_id(106) +fun test6(x: int) { + return x > null; // this compiles (for now), but fails at runtime +} + +@method_id(107) +fun test7() { + var b = begin_cell().store_maybe_ref(null); + var s = b.end_cell().begin_parse(); + var c = s~load_maybe_ref(); + return (null == c) * 10 + (b != null); +} + +fun main() { + // now, the compiler doesn't optimize this at compile-time, fif codegen contains ifs + var i: int = null; + if (i == null) { + return 1; + } + return 10; +} + +/** +@testcase | 101 | | 7 -1 [ 3 2 1 ] +@testcase | 102 | 5 | (null) +@testcase | 102 | null | -1 +@testcase | 103 | 5 | 5 +@testcase | 103 | 15 | -1 +@testcase | 104 | | (null) +@testcase | 105 | | 100 +@testcase | 107 | | -11 +@fif_codegen +""" + test1 PROC:<{ + // + PUSHNULL // numbers + 1 PUSHINT // numbers _2=1 + SWAP // _2=1 numbers + CONS // numbers + 2 PUSHINT // numbers _4=2 + SWAP // _4=2 numbers + CONS // numbers + 3 PUSHINT // numbers _6=3 + SWAP // _6=3 numbers + CONS // numbers + 4 PUSHINT // numbers _8=4 + SWAP // _8=4 numbers + CONS // numbers + UNCONS // h numbers + DUP // h numbers numbers + CAR // h numbers _12 +""" + +@fif_codegen +""" + main PROC:<{ + // + PUSHNULL // i + ISNULL // _2 + IFJMP:<{ // + 1 PUSHINT // _3=1 + }> // + 10 PUSHINT // _4=10 + }> +""" + +@fif_codegen +""" + test6 PROC:<{ + // x + PUSHNULL // x _1 + GREATER // _2 + }> +""" + +@fif_codegen +""" + test7 PROC:<{ + ... + LDOPTREF // b _17 _16 + DROP // b c + ISNULL // b _10 + 10 MULCONST // b _12 + SWAP // _12 b + ISNULL // _12 _13 + 0 EQINT // _12 _14 + ADD // _15 + }> +""" +*/ diff --git a/tolk-tester/tests/op_priority.tolk b/tolk-tester/tests/op_priority.tolk new file mode 100644 index 000000000..e4f97b759 --- /dev/null +++ b/tolk-tester/tests/op_priority.tolk @@ -0,0 +1,121 @@ +fun justTrue(): int { return true; } + +fun unary_minus_1(a: int, b: int, c: int): int{return -(a+b) *c;} +fun unary_minus_2(a: int, b: int, c: int): int{return(-(a+b))*c;} +fun unary_minus_3(a: int, b: int, c: int): int{return-((a+b) *c);} + + +@method_id(101) +fun test1(x: int, y: int, z: int): int { + return (x > 0) & (y > 0) & (z > 0); +} + +@method_id(102) +fun test2(x: int, y: int, z: int): int { + return x > (0 & (y > 0) & (z > 0)); +} + +@method_id(103) +fun test3(x: int, y: int, z: int): int { + if ((x < 0) | (y < 0)) { + return z < 0; + } + return (x > 0) & (y > 0); +} + +@method_id(104) +fun test4(x: int, y: int, mode: int): int { + if (mode == 1) { + return (x == 10) | (y == 20); + } if (mode == 2) { + return (x == 10) | (y == 20); + } else { + return x == (10 | (y == 20)); + } +} + +@method_id(105) +fun test5(status: int): int { + return justTrue() & (status == 1) & ((justTrue() & status) == 1); +} + +@method_id(106) +fun test6(a: int, b: int, c: int): int { + return (unary_minus_1(a,b,c) == unary_minus_2(a,b,c)) & (unary_minus_1(a,b,c) == unary_minus_3(a,b,c)); +} + +@method_id(107) +fun test7(b: int): int { + var a = b == 3 ? 3 : b == 4 ? 4 : (b == 5) & 1 ? 5 : 100; + return a; +} + +@method_id(108) +fun test8(b: int): int { + var a = b == 3 ? 3 : b == 4 ? 4 : b = 5 ? 5 : 100; + return a; +} + +fun `_ 0, 3 & (3 > 0), 3 & (`_<_`(3, 0)), + 3 & `_ + unary_minus_2 PROC:<{ + // a b c + -ROT // c a b + ADD // c _3 + NEGATE // c _4 + SWAP // _4 c + MUL // _5 + }> + unary_minus_3 PROC:<{ + // a b c + -ROT // c a b + ADD // c _3 + SWAP // _3 c + MUL // _4 + NEGATE // _5 + }> +""" + + */ diff --git a/tolk-tester/tests/pure-functions.tolk b/tolk-tester/tests/pure-functions.tolk new file mode 100644 index 000000000..59b2f0da5 --- /dev/null +++ b/tolk-tester/tests/pure-functions.tolk @@ -0,0 +1,46 @@ + +@pure +fun f_pure1(): int { + return f_pure2(); +} + +@pure +fun f_pure2(): int { + return 2; +} + +@pure +fun get_contract_data(): (int, int) { + var c: cell = get_data(); + var cs: slice = c.begin_parse(); + cs~load_bits(32); + var value: int = cs~load_uint(16); + return (1, value); +} + +fun save_contract_data(value: int) { + var b: builder = begin_cell().store_int(1, 32).store_uint(value, 16); + set_data(b.end_cell()); +} + +@pure +@method_id(101) +fun test1(): int { + return f_pure1(); +} + +@method_id(102) +fun test2(value: int): int { + save_contract_data(value); + var (_, restored: auto) = get_contract_data(); + return restored; +} + +fun main() { return; } + +/** + +@testcase | 101 | | 2 +@testcase | 102 | 44 | 44 + +*/ diff --git a/tolk-tester/tests/remove-unused-functions.tolk b/tolk-tester/tests/remove-unused-functions.tolk new file mode 100644 index 000000000..e5d8aabcb --- /dev/null +++ b/tolk-tester/tests/remove-unused-functions.tolk @@ -0,0 +1,48 @@ +fun unused1(): int { return 2; } +fun unused2(): int { return unused1(); } +fun unused3(x: int): int { return x * 2+unused2(); } + +fun used_from_noncall1(): int { return 10; } +fun used_as_noncall1(): int { return used_from_noncall1(); } + +const int20: int = 20; +fun used_from_noncall2(): int { return int20; } +fun used_as_noncall2(): int { return 0 * 0 + used_from_noncall2() + (0 << 0); } + +global unused_gv: int; +global used_gv: auto; + +fun receiveGetter(): (() -> int) { return used_as_noncall2; } + +@pure +fun usedButOptimizedOut(x: int): int { return x + 2; } + +fun main(): (int, int, int) { + used_gv = 1; + used_gv = used_gv + 2; + var getter1 = used_as_noncall1; + var getter2 = receiveGetter(); + usedButOptimizedOut(used_gv); + return (used_gv, getter1(), getter2()); +} + +/** +@experimental_options remove-unused-functions + +@testcase | 0 | | 3 10 20 + +@fif_codegen DECLPROC used_as_noncall1 +@fif_codegen DECLGLOBVAR used_gv + +@fif_codegen_avoid DECLPROC unused1 +@fif_codegen_avoid DECLPROC unused2 +@fif_codegen_avoid DECLPROC unused3 +@fif_codegen_avoid DECLGLOBVAR unused_gv + +Note, that `usedButOptimizedOut()` (a pure function which result is unused) +is currently codegenerated, since it's formally reachable. +This is because optimizing code is a moment of codegen for now (later than marking unused symbols). + +@fif_codegen DECLPROC usedButOptimizedOut +@fif_codegen_avoid usedButOptimizedOut CALLDICT +*/ diff --git a/tolk-tester/tests/s1.tolk b/tolk-tester/tests/s1.tolk new file mode 100644 index 000000000..3f75f1a70 --- /dev/null +++ b/tolk-tester/tests/s1.tolk @@ -0,0 +1,61 @@ +get ascii_slice(): slice { + return"string"; +} + +get raw_slice(): slice { + return "abcdef"s; +} + +get addr_slice(): slice { + return "Ef8zMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzM0vF"a; +} + +get string_hex(): int { + return "ABCDEFGHIJKLMNOPQRSTUVWXYZ012345"u; +} + +get fun string_minihash(): int { // 'get' and 'get fun' both possible + return "transfer(slice, int)"h; +} + +get fun string_maxihash(): int { + return "transfer(slice, int)"H; +} + +get fun string_crc32(): int { + return "transfer(slice, int)"c; +} + +@pure +fun newc(): builder +asm "NEWC"; +fun endcs(b: builder): slice +asm "ENDC" "CTOS"; +@pure +fun sdeq(s1: slice, s2: slice): int +asm "SDEQ"; + +fun main() { + var s_ascii: slice = ascii_slice(); + var s_raw: slice = raw_slice(); + var s_addr: slice = addr_slice(); + var i_hex: int = string_hex(); + var i_mini: int = string_minihash(); + var i_maxi: int = string_maxihash(); + var i_crc: int = string_crc32(); + assert(sdeq(s_ascii, newc().store_uint(0x737472696E67, 12 * 4).endcs())) throw 101; + assert(sdeq(s_raw, newc().store_uint(0xABCDEF, 6 * 4).endcs())) throw 102; + assert(sdeq(s_addr, newc().store_uint(4, 3).store_int(-1, 8) + .store_uint(0x3333333333333333333333333333333333333333333333333333333333333333, 256).endcs()), 103); + assert(i_hex == 0x4142434445464748494A4B4C4D4E4F505152535455565758595A303132333435) throw 104; + assert(i_mini == 0x7a62e8a8) throw 105; + assert(i_maxi == 0x7a62e8a8ebac41bd6de16c65e7be363bc2d2cbc6a0873778dead4795c13db979) throw 106; + assert(i_crc == 2235694568) throw 107; + return 0; +} + +/** +@testcase | 0 | | 0 + +@code_hash 13830542019509784148027107880226447201604257839069192762244575629978154217223 +*/ diff --git a/tolk-tester/tests/special-fun-names.tolk b/tolk-tester/tests/special-fun-names.tolk new file mode 100644 index 000000000..8fae6d5db --- /dev/null +++ b/tolk-tester/tests/special-fun-names.tolk @@ -0,0 +1,24 @@ +fun onInternalMessage() { return 0; } +fun onExternalMessage() { return -1; } +fun onRunTickTock() { return -2; } +fun onSplitPrepare() { return -3; } +fun onSplitInstall() { return -4; } + +/** +@experimental_options remove-unused-functions + +@testcase | 0 | | 0 +@testcase | -1 | | -1 +@testcase | -2 | | -2 +@testcase | -3 | | -3 +@testcase | -4 | | -4 + +@fif_codegen +""" + 0 DECLMETHOD onInternalMessage + -1 DECLMETHOD onExternalMessage + -2 DECLMETHOD onRunTickTock + -3 DECLMETHOD onSplitPrepare + -4 DECLMETHOD onSplitInstall +""" + */ diff --git a/tolk-tester/tests/test-math.tolk b/tolk-tester/tests/test-math.tolk new file mode 100644 index 000000000..dde4c2b32 --- /dev/null +++ b/tolk-tester/tests/test-math.tolk @@ -0,0 +1,309 @@ +import "../../crypto/smartcont/mathlib.tolk"; + +@pure +fun ~tset(t: tuple, idx: int, value: X): (tuple, ()) +asm(t value idx) "SETINDEXVAR"; + +// computes 1-acos(x)/Pi by a very simple, extremely slow (~70k gas) and imprecise method +// fixed256 acos_prepare_slow(fixed255 x); +@inline +fun acos_prepare_slow_f255(x: int): int { + x -= (x == 0); + var t: int = 1; + repeat (255) { + t = t * sgn(x) * 2 + 1; // decode Gray code (sgn(x_0), sgn(x_1), ...) + x = (-1 << 255) - muldivr(x, - x, 1 << 254); // iterate x := 2*x^2 - 1 = cos(2*acos(x)) + } + return abs(t); +} + +// extremely slow (~70k gas) and somewhat imprecise (very imprecise when x is small), for testing only +// fixed254 acos_slow(fixed255 x); +@inline_ref +fun acos_slow_f255(x: int): int { + var t: int = acos_prepare_slow_f255(x); + return - mulrshiftr256(t + (-1<<256), Pi_const_f254()); +} + +// fixed255 asin_slow(fixed255 x); +@inline_ref +fun asin_slow_f255(x: int): int { + var t: int = acos_prepare_slow_f255(abs(x)) % (1 << 255); + return muldivr(t, Pi_const_f254(), 1 << 255) * sgn(x); +} + +@inline_ref +fun test_nrand(n: int): tuple { + var t: tuple = empty_tuple(); + repeat (255) { + t~tpush(0); + } + repeat (n) { + var x: int = fixed248_nrand(); + var bucket: int = (abs(x) >> 243); // 255 buckets starting from x=0, each 1/32 wide + t~tset(bucket, t.at(bucket) + 1); + } + return t; +} + +@method_id(10000) +fun geom_mean_test(x: int, y: int): int { + return geom_mean(x, y); +} +@method_id(10001) +fun tan_f260_test(x: int): int { + return tan_f260(x); +} +@method_id(10002) +fun sincosm1_f259_test(x: int): (int, int) { + return sincosm1_f259(x); +} +@method_id(10003) +fun sincosn_f256_test(x: int, y: int): (int, int) { + return sincosn_f256(x, y); +} +@method_id(10004) +fun sincosm1_f256_test(x: int): (int, int) { + return sincosm1_f256(x); +} +@method_id(10005) +fun tan_aux_f256_test(x: int): (int, int) { + return tan_aux_f256(x); +} +@method_id(10006) +fun fixed248_tan_test(x: int): int { + return fixed248_tan(x); +} +/* + (int) atanh_alt_f258_test(x) method_id(10007) { + return atanh_alt_f258(x); + } +*/ +@method_id(10008) +fun atanh_f258_test(x:int, y:int): int { + return atanh_f258(x, y); +} +@method_id(10009) +fun atanh_f261_test(x:int, y:int): int { + return atanh_f261(x, y); +} + +@method_id(10010) +fun log2_aux_f256_test(x:int): (int, int) { + return log2_aux_f256(x); +} +@method_id(10011) +fun log_aux_f256_test(x:int): (int, int) { + return log_aux_f256(x); +} +@method_id(10012) +fun fixed248_pow_test(x:int, y:int): int { + return fixed248_pow(x, y); +} +@method_id(10013) +fun exp_log_div(x:int, y:int): int { + return fixed248_exp(fixed248_log(x << 248) ~/ y); +} +@method_id(10014) +fun fixed248_log_test(x:int): int { + return fixed248_log(x); +} +@method_id(10015) +fun log_aux_f257_test(x:int): (int,int) { + return log_aux_f257(x); +} +@method_id(10016) +fun fixed248_sincos_test(x:int): (int,int) { + return fixed248_sincos(x); +} +@method_id(10017) +fun fixed248_exp_test(x:int): int { + return fixed248_exp(x); +} +@method_id(10018) +fun fixed248_exp2_test(x:int): int { + return fixed248_exp2(x); +} +@method_id(10019) +fun expm1_f257_test(x:int): int { + return expm1_f257(x); +} +@method_id(10020) +fun atan_f255_test(x:int): int { + return atan_f255(x); +} +@method_id(10021) +fun atan_f259_test(x:int, n:int): int { + return atan_f259(x, n); +} +@method_id(10022) +fun atan_aux_f256_test(x:int): (int, int) { + return atan_aux_f256(x); +} +@method_id(10023) +fun asin_f255_test(x:int): int { + return asin_f255(x); +} +@method_id(10024) +fun asin_slow_f255_test(x:int): int { + return asin_slow_f255(x); +} +@method_id(10025) +fun acos_f255_test(x:int): int { + return acos_f255(x); +} +@method_id(10026) +fun acos_slow_f255_test(x:int): int { + return acos_slow_f255(x); +} +@method_id(10027) +fun fixed248_atan_test(x:int): int { + return fixed248_atan(x); +} +@method_id(10028) +fun fixed248_acot_test(x:int): int { + return fixed248_acot(x); +} + +fun main() { + var One: int = 1; + // repeat(76 / 4) { One *= 10000; } + var sqrt2: int = geom_mean(One, 2 * One); + var sqrt3: int = geom_mean(One, 3 * One); + // return geom_mean(-1 - (-1 << 256), -1 - (-1 << 256)); + // return geom_mean(-1 - (-1 << 256), -2 - (-1 << 256)); + // return geom_mean(-1 - (-1 << 256), 1 << 255); + // return (sqrt2, geom_mean(sqrt2, One)); // (sqrt(2), 2^(1/4)) + // return (sqrt3, geom_mean(sqrt3, One)); // (sqrt(3), 3^(1/4)) + // return geom_mean(3 << 254, 1 << 254); + // return geom_mean(3, 5); + // return tan_f260(115641670674223639132965820642403718536242645001775371762318060545014644837101 - 1); + // return tan_f260(15 << 252); // tan(15/256) * 2^260 + // return sincosm1_f259(1 << 255); // (sin,1-cos)(1/16) * 2^259 + // return sincosm1_f259(115641670674223639132965820642403718536242645001775371762318060545014644837101 - 1); + // return sincosm1_f256((1 << 255) - 1 + (1 << 255)); // (sin,1-cos)(1-2^(-256)) + // return sincosm1_f256(Pi_const_f254()); // (sin,1-cos)(Pi/4) + // return sincosn_f256(Pi_const_f254(), 0); // (sin,-cos)(Pi/4) + // return sincosn_f256((1 << 255) + 1, 0); // (sin,-cos)(1/2+1/2^256) + // return sincosn_f256(1 << 254, 0); + // return sincosn_f256(touch(15) << 252, 0); // (sin,-cos)(15/16) + // return sincosm1_f256(touch(15) << 252); // (sin,1-cos)(15/16) + // return sincosn_f256(60628596148627720713372490462954977108898896221398738326462025186323149077698, 0); // (sin,-cos)(Pi/6) + // return sincosm1_f256(60628596148627720713372490462954977108898896221398738326462025186323149077698); // (sin,1-cos)(Pi/6) + // return tan_aux_f256(1899 << 245); // (p,q) such that p/q=tan(1899/2048) + // return fixed248_tan(11 << 248); // tan(11) + // return atanh_alt_f258(1 << 252); // atanh(1/64) * 2^258 + // return atanh_f258(1 << 252, 18); // atanh(1/64) * 2^258 + // return atanh_f261(muldivr(64, 1 << 255, 55), 18); // atanh(1/55) * 2^261 + // return log2_aux_f256(1 << 255); + // return log2_aux_f256(-1 - (-1 << 256)); // log2(2-1/2^255))*2^256 ~ 2^256 - 1.43 + // return log_aux_f256(-1 - (-1 << 256)); + // return log_aux_f256(3); // log(3/2)*2^256 + // return fixed248_pow(3 << 248, 3 << 248); // 3^3 + // return fixed248_exp(fixed248_log(5 << 248) ~/ 7); // exp(log(5)/7) = 5^(1/7) + // return fixed248_log(Pi_const_f254() ~>> 6); // log(Pi) + // return atanh_alt_f258(1 << 255); // atanh(1/8) * 2^258 + // return atanh_f258(1 << 255, 37); // atanh(1/8) * 2^258 + // return atanh_f258(81877371507464127617551201542979628307507432471243237061821853600756754782485, 36); // atanh(sqrt(2)/8) * 2^258 + // return log_aux_f257(Pi_const_f254()); // log(Pi/4) + // return log_aux_f257(3 << 254); // log(3) + // return atanh_alt_f258(81877371507464127617551201542979628307507432471243237061821853600756754782485); // atanh(sqrt(2)/8) * 2^258 + // return fixed248_sincos(Pi_const_f254() ~/ (64 * 3)); // (sin,cos)(Pi/3) + // return fixed248_exp(3 << 248); // exp(3)*2^248 + // return fixed248_exp2((1 << 248) ~/ 5); // 2^(1/5)*2^248 + // return fixed248_pow(3 << 248, -3 << 247); // 3^(-1.5) + // return fixed248_pow(10 << 248, -70 << 248); // 10^(-70) + // return fixed248_pow(fixed248_Pi_const(), touch(3) << 248); // Pi^3 ~ 31.006, computed more precisely + // return fixed248_pow(fixed248_Pi_const(), fixed248_Pi_const()); // Pi^Pi, more precisely + // return fixed248_exp(fixed248_log(fixed248_Pi_const()) * 3); // Pi^3 ~ 31.006 + // return fixed248_exp(muldivr(fixed248_log(fixed248_Pi_const()), fixed248_Pi_const(), 1 << 248)); // Pi^Pi + // return fixed248_sin(fixed248_log(fixed248_exp(fixed248_Pi_const()))); // sin(log(e^Pi)) + // return expm1_f257(1 << 255); // (exp(1/4)-1)*2^256 + // return expm1_f257(-1 << 256); // (exp(-1/2)-1)*2^256 (argument out of range, will overflow) + // return expm1_f257(log2_const_f256()); // (exp(log(2)/2)-1)*2^256 + // return expm1_f257(- log2_const_f256()); // (exp(-log(2)/2)-1)*2^256 + // return tanh_f258(log2_const_f256(), 17); // tanh(log(2)/4)*2^258 + // return atan_f255(0xa0 << 247); + // return atan_f259(1 << 255, 26); // atan(1/16) + // return atan_f259(touch(2273) << 244, 26); // atan(2273/2^15) + // return atan_aux_f256(0xa0 << 248); + // return atan_aux_f256(-1 - (-1 << 256)); + // return atan_aux_f256(-1 << 256); + // return atan_aux_f256(1); // atan(1/2^256)*2^261 = 32 + //return fixed248_nrand(); + // return test_nrand(100000); + var One2: int = touch(1 << 255); + // return asin_f255(One); + // return asin_f255(-2 * One ~/ -3); + var arg: int = muldivr(12, One2, 17); // 12/17 + // return [ asin_slow_f255(arg), asin_f255(arg) ]; + // return [ acos_slow_f255(arg), acos_f255(arg) ]; + // return 4 * atan_f255(One ~/ 5) - atan_f255(One ~/ 239); // 4 * atan(1/5) - atan(1/239) = Pi/4 as fixed255 + var One3: int = touch(1 << 248); + // return fixed248_atan(One) ~/ 5); // atan(1/5) + // return fixed248_acot(One ~/ 239); // atan(1/5) +} + +/** + method_id | in | out +@testcase | 10000 | -1-(-1<<256) -1-(-1<<256) | 115792089237316195423570985008687907853269984665640564039457584007913129639935 +@testcase | 10000 | -1-(-1<<256) -2-(-1<<256) | 115792089237316195423570985008687907853269984665640564039457584007913129639934 +@testcase | 10000 | -1-(-1<<256) 1<<255 | 81877371507464127617551201542979628307507432471243237061821853600756754782485 +@testcase | 10000 | 1 2 | 1 +@testcase | 10000 | 1 3 | 2 +@testcase | 10000 | 3<<254 1<<254 | 50139445418395255283694704271811692336355250894665672355503583528635147053497 +@testcase | 10000 | 3 5 | 4 +@testcase | 10001 | 115641670674223639132965820642403718536242645001775371762318060545014644837101-1 | 115792089237316195423570985008687907853269984665640564039457584007913129639935 +@testcase | 10001 | 15<<252 | 108679485937549714997960660780289583146059954551846264494610741505469565211201 + +@testcase | 10002 | 1<<255 | 57858359242454268843682786479537198006144860419130642837770554273561536355094 28938600351875109040123440645416448095273333920390487381363947585666516031269 +@testcase | 10002 | 90942894222941581070058735694432465663348344332098107489693037779484723616546 | 90796875678616203090520439851979829600860326752181983760731669850687818036503 71369031536005973567205947792557760023823761636922618688720973932041901854510 +@testcase | 10002 | 115641670674223639132965820642403718536242645001775371762318060545014644837100 | 115341536360906404779899502576747487978354537254490211650198994186870666100480 115341536360906404779899502576747487978354537254490211650198994186870666100479 +@testcase | 10003 | 90942894222941581070058735694432465663348344332098107489693037779484723616546 0 | 81877371507464127617551201542979628307507432471243237061821853600756754782485 -81877371507464127617551201542979628307507432471243237061821853600756754782486 +@testcase | 10003 | (1<<255)+1 0 | 55513684748706254392157395574451324146997108788015526773113170656738693667657 -101617118319522600545601981648807607350213579319835970884288805016705398675944 +@testcase | 10003 | 1<<254 0 | 28647421327665059059430596260119789787021370826354543144805343654507971817712 -112192393597863122712065585177748900737784171216163716639418346853706594800924 +@testcase | 10003 | 15<<252 0 | 93337815620236900315136494926097782162348358704087992554326802765553037216157 -68526346066204767396483080633934170508153877799043171682610011603005473885083 +@testcase | 10004 | 15<<252 | 93337815620236900315136494926097782162348358704087992554326802765553037216158 94531486342222856054175808749507474690232213733194784713695144809815311509707 +@testcase | 10003 | 60628596148627720713372490462954977108898896221398738326462025186323149077698 0 | 57896044618658097711785492504343953926634992332820282019728792003956564819968 -100278890836790510567389408543623384672710501789331344711007167057270294106993 +@testcase | 10004 | 60628596148627720713372490462954977108898896221398738326462025186323149077698 | 57896044618658097711785492504343953926634992332820282019728792003956564819968 31026396801051369712363152930129046361118965752618438656900833901285671065886 +@testcase | 10005 | 1899<<245 | -115784979074977116522606932816046735344768048129666123117516779696532375620701 -86847621900007587791673148476644866514014227467564880140262768165345715058771 +@testcase | 10006 | 11<<248 | -102200470999497240398685962406597118965525125432278008915850368651878945159221 +@testcase | 10008 | 1<<252 18 | 7237594612640731814076778712183932891481921212865048737772958953246047977071 +@testcase! | 10009 | 64*(1<<255)//55 18 | 67377367986958444187782963285047188951340314639925508148698906136973510008513 +@testcase | 10010 | 1<<255 | 0 255 +@testcase | 10011 | -1-(-1<<256) | 80260960185991308862233904206310070533990667611589946606122867505419956976171 255 +@testcase | 10012 | 3<<248 3<<248 | 12212446911748192486079752325135052781399568695204278238536542063334587891712 +@testcase | 10013 | 5 7 | 569235245303856216139605450142923208167703167128528666640203654338408315932 +@testcase | 10014 | 1420982722233462204219667745225507275989817880189032929526453715304448806508 | 517776035526939558040896860590142614178014859368681705591403663865964112176 +@testcase | 10008 | 1<<255 37 | 58200445412255555045265806996802932280233368707362818578692888102488340124094 +@testcase | 10008 | 81877371507464127617551201542979628307507432471243237061821853600756754782485 36 | 82746618329032515754939514227666784789465120373484337368014239356561508382845 +@testcase | 10015 | 90942894222941581070058735694432465663348344332098107489693037779484723616546 | -55942510554172181731996424203087263676819062449594753161692794122306202470292 256 +@testcase | 10015 | 3<<254 | -66622616410625360568738677407433830899150908037353507097280251369610028875158 256 +@testcase | 10016 | 90942894222941581070058735694432465663348344332098107489693037779484723616546//(64*3) | 391714417331212931903864877123528846377775397614575565277371746317462086355 226156424291633194186662080095093570025917938800079226639565593765455331328 +@testcase | 10017 | 3<<248 | 9084946421051389814103830025729847734065792062362132089390904679466687950835 +@testcase | 10018 | (1<<248)//5 | 519571025111621076330285524602776985448579272766894385941850747946908706857 +@testcase | 10012 | 3<<248 -3<<247 | 87047648295825095978636639360784188083950088358794570061638165848324908079 +@testcase | 10012 | 10<<248 -70<<248 | 45231 +@testcase | 10012 | 1420982722233462204219667745225507275989817880189032929526453715304448806508 3<<248 | 14024537329227316173680050897643053638073167245065581681188087336877135047241 +@testcase | 10012 | 1420982722233462204219667745225507275989817880189032929526453715304448806508 1420982722233462204219667745225507275989817880189032929526453715304448806508 | 16492303277433924047657446877966346821161732581471802839855102123372676002295 +@testcase | 10019 | 1<<255 | 65775792789545756849501669218806308540691279864498696756901136302101823231959 +@testcase | 10019 | -1<<255 | -51226238931640701466578648374135745377468902266335737558089915608594425303282 + +@testcase | 10020 | 160<<247 | 32340690885082755723307749066376646841771751777398167772823878380310576779097 +@testcase | 10021 | 1<<255 26 | 57820835337111819566482910321201859268121322500887685881159030272507322418551 +@testcase | 10021 | 2273<<244 26 | 64153929153128256059565403901040178355488584937372975321150754259394300105908 +@testcase | 10022 | 160<<248 | 18 -13775317617017974742132028403521581424991093186766868001115299479309514610238 +@testcase | 10022 | -1-(-1<<256) | 25 16312150880916231694896252427912541090503675654570543195394548083530005073282 +@testcase | 10022 | -1<<256 | -25 -16312150880916231694896252427912541090503675654570543195394548083530005073298 +@testcase | 10022 | 1 | 0 32 + +@testcase | 10023 | 1<<255 | 90942894222941581070058735694432465663348344332098107489693037779484723616546 +@testcase | 10023 | (1-(1<<255))//-3 | 19675212872822715586637341573564384553677006914302429002469838095945333339604 +@testcase | 10023 | 12*(1<<255)//17 | 45371280744427205854111943101074857545572584208710061167826656461897302968384 +@testcase | 10024 | 12*(1<<255)//17 | 45371280744427205854111943101074857545572584208710061167826656461897302968387 +@testcase | 10025 | 12*(1<<255)//17 | 22785806739257187607973396296678804058887880061694023160933190658793710324081 +@testcase | 10026 | 12*(1<<255)//17 | 22785806739257187607973396296678804058887880061694023160933190658793710324080 + +@testcase | 10027 | (1<<248)//5 | 89284547973388213553327350968415123522888028497458323165947767504203347189 +@testcase | 10028 | (1<<248)//239 | 708598849781543798951441405045469962900811296151941404481049216461523216127 +*/ diff --git a/tolk-tester/tests/try-func.tolk b/tolk-tester/tests/try-func.tolk new file mode 100644 index 000000000..7963a8500 --- /dev/null +++ b/tolk-tester/tests/try-func.tolk @@ -0,0 +1,151 @@ +fun unsafeGetInt(any: X): int + asm "NOP"; + +@method_id(11) +fun foo(x: int): int { + try { + if (x == 7) { + throw 44; + } + return x; + } catch { + return 2; + } +} + +@inline +@method_id(12) +fun foo_inline(x: int): int { + try { + assert(!(x == 7)) throw 44; + return x; + } catch { + return 2; + } +} + +@inline_ref +@method_id(13) +fun foo_inlineref(x: int): int { + try { + if (x == 7) { throw (44, 2); } + return x; + } catch (_, arg) { + return unsafeGetInt(arg); + } +} + +@method_id(1) +fun test(x: int, y: int, z: int): int { + y = foo(y); + return x * 100 + y * 10 + z; +} + +@method_id(2) +fun test_inline(x: int, y: int, z: int): int { + y = foo_inline(y); + return x * 100 + y * 10 + z; +} + +@method_id(3) +fun test_inlineref(x: int, y: int, z: int): int { + y = foo_inlineref(y); + return x * 100 + y * 10 + z; +} + +@inline +@method_id(14) +fun foo_inline_big( + x1: int, x2: int, x3: int, x4: int, x5: int, x6: int, x7: int, x8: int, x9: int, x10: int, + x11: int, x12: int, x13: int, x14: int, x15: int, x16: int, x17: int, x18: int, x19: int, x20: int +): int { + try { + if (x1 == 7) { + throw 44; + } + return x1 + x2 + x3 + x4 + x5 + x6 + x7 + x8 + x9 + x10 + x11 + x12 + x13 + x14 + x15 + x16 + x17 + x18 + x19 + x20; + } catch { + return 1; + } +} + +@method_id(4) +fun test_inline_big(x: int, y: int, z: int): int { + y = foo_inline_big( + y, y + 1, y + 2, y + 3, y + 4, y + 5, y + 6, y + 7, y + 8, y + 9, + y + 10, y + 11, y + 12, y + 13, y + 14, y + 15, y + 16, y + 17, y + 18, y + 19); + return x * 1000000 + y * 1000 + z; +} + +@method_id(15) +fun foo_big( + x1: int, x2: int, x3: int, x4: int, x5: int, x6: int, x7: int, x8: int, x9: int, x10: int, + x11: int, x12: int, x13: int, x14: int, x15: int, x16: int, x17: int, x18: int, x19: int, x20: int +): int { + try { + if (x1 == 7) { + throw (44, 1); + } + return x1 + x2 + x3 + x4 + x5 + x6 + x7 + x8 + x9 + x10 + x11 + x12 + x13 + x14 + x15 + x16 + x17 + x18 + x19 + x20; + } catch (code, arg) { + return unsafeGetInt(arg); + } +} + +@method_id(5) +fun test_big(x: int, y: int, z: int): int { + y = foo_big( + y, y + 1, y + 2, y + 3, y + 4, y + 5, y + 6, y + 7, y + 8, y + 9, + y + 10, y + 11, y + 12, y + 13, y + 14, y + 15, y + 16, y + 17, y + 18, y + 19); + return x * 1000000 + y * 1000 + z; +} + +@method_id(16) +fun test_catch_into_same(x: int): int { + var code = x; + try { + assert(x <= 10, 44); + } catch(code) { + return code; + } + return code; +} + + +@method_id(17) +fun test_catch_into_same_2(x: int): int { + var code = x; + try { + if (x > 10) { + throw 44; + } + } catch(code) { + } + return code; +} + +fun main() { +} + +/** + method_id | in | out +@testcase | 1 | 1 2 3 | 123 +@testcase | 1 | 3 8 9 | 389 +@testcase | 1 | 3 7 9 | 329 +@testcase | 2 | 1 2 3 | 123 +@testcase | 2 | 3 8 9 | 389 +@testcase | 2 | 3 7 9 | 329 +@testcase | 3 | 1 2 3 | 123 +@testcase | 3 | 3 8 9 | 389 +@testcase | 3 | 3 7 9 | 329 +@testcase | 4 | 4 8 9 | 4350009 +@testcase | 4 | 4 7 9 | 4001009 +@testcase | 5 | 4 8 9 | 4350009 +@testcase | 5 | 4 7 9 | 4001009 +@testcase | 16 | 5 | 5 +@testcase | 16 | 20 | 44 +@testcase | 17 | 5 | 5 +@testcase | 17 | 20 | 20 + +@code_hash 73240939343624734070640372352271282883450660826541545137654364443860257436623 +*/ diff --git a/tolk-tester/tests/unbalanced_ret.tolk b/tolk-tester/tests/unbalanced_ret.tolk new file mode 100644 index 000000000..6cf42643a --- /dev/null +++ b/tolk-tester/tests/unbalanced_ret.tolk @@ -0,0 +1,17 @@ +fun main(x: int): (int, int) { + var y: int = 5; + if (x < 0) { + x *= 2; + y += 1; + if (x == -10) { + return (111, 0); + } + } + return (x + 1, y); +} +/** + method_id | in | out +@testcase | 0 | 10 | 11 5 +@testcase | 0 | -5 | 111 0 +@testcase | 0 | -4 | -7 6 +*/ diff --git a/tolk-tester/tests/unbalanced_ret_inline.tolk b/tolk-tester/tests/unbalanced_ret_inline.tolk new file mode 100644 index 000000000..4e24fbd8f --- /dev/null +++ b/tolk-tester/tests/unbalanced_ret_inline.tolk @@ -0,0 +1,19 @@ +@inline +fun foo(x: int): int { + if (x < 0) { + x *= 2; + if (x == -10) { + return 111; + } + } + return x + 1; +} +fun main(x: int): int { + return foo(x) * 10; +} +/** + method_id | in | out +@testcase | 0 | 10 | 110 +@testcase | 0 | -5 | 1110 +@testcase | 0 | -4 | -70 +*/ diff --git a/tolk-tester/tests/unbalanced_ret_loops.tolk b/tolk-tester/tests/unbalanced_ret_loops.tolk new file mode 100644 index 000000000..9b59339d8 --- /dev/null +++ b/tolk-tester/tests/unbalanced_ret_loops.tolk @@ -0,0 +1,68 @@ +fun main() { } + +@method_id(1) +fun foo_repeat(x: int): int { + repeat(10) { + x += 10; + if (x >= 100) { + return x; + } + } + return -1; +} + +@method_id(2) +fun foo_while(x: int): int { + var i: int = 0; + while (i < 10) { + x += 10; + if (x >= 100) { + return x; + } + i += 1; + } + return -1; +} + +@method_id(3) +fun foo_until(x: int): int { + var i: int = 0; + do { + x += 10; + if (x >= 100) { + return x; + } + i += 1; + } while (i < 10); + return -1; +} + +@method_id(4) +fun test4(x: int): (int, int) { + var s = 0; + var reached = false; + do { + x = x - 1; + s = s + 1; + if (x < 10) { + reached = true; + } + } while (!reached); + return (s, reached); +} + +/** + method_id | in | out +@testcase | 1 | 40 | 100 +@testcase | 1 | 33 | 103 +@testcase | 1 | -5 | -1 +@testcase | 2 | 40 | 100 +@testcase | 2 | 33 | 103 +@testcase | 2 | -5 | -1 +@testcase | 3 | 40 | 100 +@testcase | 3 | 33 | 103 +@testcase | 3 | -5 | -1 +@testcase | 4 | 18 | 9 -1 + +@code_hash 12359153928622198176298534554187062238616102949658930329300859312625793323482 +*/ diff --git a/tolk-tester/tests/unbalanced_ret_nested.tolk b/tolk-tester/tests/unbalanced_ret_nested.tolk new file mode 100644 index 000000000..4d294ae95 --- /dev/null +++ b/tolk-tester/tests/unbalanced_ret_nested.tolk @@ -0,0 +1,40 @@ +fun foo(y: int): int { + if (y < 0) { + y *= 2; + if (y == -10) { + return 111; + } + } + return y + 1; +} +fun bar(x: int, y: int): (int, int) { + if (x < 0) { + y = foo(y); + x *= 2; + if (x == -10) { + return (111, y); + } + } + return (x + 1, y); +} +fun bar2(x: int, y: int): (int,int) { + return bar(x, y); +} +fun main(x: int, y: int): (int, int) { + (x, y) = bar2(x, y); + return (x, y * 10); +} +/** + method_id | in | out +@testcase | 0 | 3 3 | 4 30 +@testcase | 0 | 3 -5 | 4 -50 +@testcase | 0 | 3 -4 | 4 -40 +@testcase | 0 | -5 3 | 111 40 +@testcase | 0 | -5 -5 | 111 1110 +@testcase | 0 | -5 -4 | 111 -70 +@testcase | 0 | -4 3 | -7 40 +@testcase | 0 | -4 -5 | -7 1110 +@testcase | 0 | -4 -4 | -7 -70 + +@code_hash 68625253347714662162648433047986779710161195298061582217368558479961252943991 +*/ diff --git a/tolk-tester/tests/use-before-declare.tolk b/tolk-tester/tests/use-before-declare.tolk new file mode 100644 index 000000000..84569c0cf --- /dev/null +++ b/tolk-tester/tests/use-before-declare.tolk @@ -0,0 +1,49 @@ +fun main(): int { + var c: cell = my_begin_cell().store_int(demo_10, 32).my_end_cell(); + var cs: slice = my_begin_parse(c); + var ten: int = cs~load_int(32); + return 1 + demo1(ten) + demo_var; +} + +@pure +fun my_begin_cell(): builder +asm "NEWC"; +@pure +fun my_end_cell(b: builder): cell +asm "ENDC"; +@pure +fun my_begin_parse(c: cell): slice +asm "CTOS"; + +fun demo1(v: int): int { + demo_var = 23; + return v; +} + +global demo_var: int; +const demo_10: int = 10; + +fun test1(): int { + var demo_var: int = demo_10; + var demo_slice: int = demo_20; + if (demo_var > 0) { + var demo_var: tuple = null; + var demo_slice: tuple = null; + } + return demo_var + demo_slice; +} + +global demo_slice: slice; +const demo_20: int = 20; + +/** +@testcase | 0 | | 34 + +@fif_codegen +""" + test1 PROC:<{ + // + 30 PUSHINT // _10 + }> +""" + */ diff --git a/tolk-tester/tests/w1.tolk b/tolk-tester/tests/w1.tolk new file mode 100644 index 000000000..eb06bec67 --- /dev/null +++ b/tolk-tester/tests/w1.tolk @@ -0,0 +1,14 @@ +fun main(id: int): (int, int) { + if (id > 0) { + if (id > 10) { + return (2 * id, 3 * id); + } + } + return (5, 6); +} +/** + method_id | in | out +@testcase | 0 | 0 | 5 6 +@testcase | 0 | 4 | 5 6 +@testcase | 0 | 11 | 22 33 +*/ diff --git a/tolk-tester/tests/w2.tolk b/tolk-tester/tests/w2.tolk new file mode 100644 index 000000000..b013ab06d --- /dev/null +++ b/tolk-tester/tests/w2.tolk @@ -0,0 +1,34 @@ +@method_id(101) +fun test1(cs: slice) { + return cs~load_uint(8)+cs~load_uint(8)+cs~load_uint(8)+cs~load_uint(8); +} + +@method_id(102) +fun test2(cs: slice) { + var (x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, + x11, x12, x13, x14, x15, x16, x17, x18, x19) = f(cs); + return x0 + x1 + x2 + x3 + x4 + x5 + x6 + x7 + x8 + x9 + + x10+ x11+ x12+ x13+ x14+ x15+ x16+ x17+ x18+ x19; +} + +fun main(cs: slice) { + return (cs~load_uint(8), cs~load_uint(8), cs~load_uint(8), cs~load_uint(8)); +} + +fun f(cs: slice) { + return (cs~load_uint(8), cs~load_uint(8), cs~load_uint(8), cs~load_uint(8), + cs~load_uint(8), cs~load_uint(8), cs~load_uint(8), cs~load_uint(8), + cs~load_uint(8), cs~load_uint(8), cs~load_uint(8), cs~load_uint(8), + cs~load_uint(8), cs~load_uint(8), cs~load_uint(8), cs~load_uint(8), + cs~load_uint(8), cs~load_uint(8), cs~load_uint(8), cs~load_uint(8)); +} + + +/** + method_id | in | out +@testcase | 102 | x{000102030405060708090a0b0c0d0e0f10111213} | 190 +@testcase | 101 | x{000102030405060708090a0b0c0d0e0f10111213} | 6 +@testcase | 0 | x{000102030405060708090a0b0c0d0e0f10111213} | 0 1 2 3 + +@code_hash 58474889199998908444151060994149070836199913191952040273624197630531731101157 +*/ diff --git a/tolk-tester/tests/w6.tolk b/tolk-tester/tests/w6.tolk new file mode 100644 index 000000000..2f8956440 --- /dev/null +++ b/tolk-tester/tests/w6.tolk @@ -0,0 +1,19 @@ +fun main(x: int): int { + var i: int = 0; + // int f = false; + do { + i = i + 1; + if (i > 5) { + return 1; + } + var f: int = (i * i == 64); + } while (!f); + return -1; +} + +/** + method_id | in | out +@testcase | 0 | 0 | 1 + +@code_hash 36599880583276393028571473830850694081778552118303309411432666239740650614479 +*/ diff --git a/tolk-tester/tests/w7.tolk b/tolk-tester/tests/w7.tolk new file mode 100644 index 000000000..85081fbb3 --- /dev/null +++ b/tolk-tester/tests/w7.tolk @@ -0,0 +1,26 @@ +@method_id(1) +fun test(y: int): int { + var x: int = 1; + if (y > 0) { + return 1; + } + return x > 0; +} + +@method_id(2) +fun f(y: int): int { + if (y > 0) { + return 1; + } + return 2; +} + +fun main() { } + +/** + method_id | in | out +@testcase | 1 | 10 | 1 +@testcase | 1 | -5 | -1 +@testcase | 2 | 10 | 1 +@testcase | 2 | -5 | 2 +*/ diff --git a/tolk-tester/tests/w9.tolk b/tolk-tester/tests/w9.tolk new file mode 100644 index 000000000..b88dc736e --- /dev/null +++ b/tolk-tester/tests/w9.tolk @@ -0,0 +1,14 @@ +fun main(s: int) { + var (z, t) = (17, s); + while (z > 0) { + t = s; + z -= 1; + } + return ~ t; +} + +/** + method_id | in | out +@testcase | 0 | 1 | -2 +@testcase | 0 | 5 | -6 +*/ diff --git a/tolk-tester/tolk-tester.js b/tolk-tester/tolk-tester.js new file mode 100644 index 000000000..3fb92ff08 --- /dev/null +++ b/tolk-tester/tolk-tester.js @@ -0,0 +1,525 @@ +// Usage: `node tolk-tester.js tests_dir` OR `node tolk-tester.js test_file.tolk` +// from current dir, providing some env (see getenv() calls). +// This is a JS version of tolk-tester.py to test Tolk compiled to WASM. +// Don't forget to keep it identical to Python version! + +const fs = require('fs'); +const os = require('os'); +const path = require('path'); +const child_process = require('child_process'); + +function print(...args) { + console.log(...args) +} + +/** @return {string} */ +function getenv(name, def = null) { + if (name in process.env) + return process.env[name] + if (def === null) { + print(`Environment variable ${name} is not set`) + process.exit(1) + } + return def +} + +const TOLKFIFTLIB_MODULE = getenv('TOLKFIFTLIB_MODULE') +const TOLKFIFTLIB_WASM = getenv('TOLKFIFTLIB_WASM') +const FIFT_EXECUTABLE = getenv('FIFT_EXECUTABLE') +const FIFT_LIBS_FOLDER = getenv('FIFTPATH') // this env is needed for fift to work properly +const TMP_DIR = os.tmpdir() + +class CmdLineOptions { + constructor(/**string[]*/ argv) { + if (argv.length !== 3) { + print("Usage: node tolk-tester.js tests_dir OR node tolk-tester.js test_file.tolk") + process.exit(1) + } + if (!fs.existsSync(argv[2])) { + print(`Input '${argv[2]}' doesn't exist`) + process.exit(1) + } + + if (fs.lstatSync(argv[2]).isDirectory()) { + this.tests_dir = argv[2] + this.test_file = null + } else { + this.tests_dir = path.dirname(argv[2]) + this.test_file = argv[2] + } + } + + /** @return {string[]} */ + find_tests() { + if (this.test_file) // an option to run (debug) a single test + return [this.test_file] + + let tests = fs.readdirSync(this.tests_dir).filter(f => f.endsWith('.tolk') || f.endsWith('.ton')) + tests.sort() + return tests.map(f => path.join(this.tests_dir, f)) + } +} + + +class ParseInputError extends Error { +} + +class TolkCompilationFailedError extends Error { + constructor(/**string*/ message, /**string*/ stderr) { + super(message); + this.stderr = stderr + } +} + +class TolkCompilationSucceededError extends Error { +} + +class FiftExecutionFailedError extends Error { + constructor(/**string*/ message, /**string*/ stderr) { + super(message); + this.stderr = stderr + } +} + +class CompareOutputError extends Error { + constructor(/**string*/ message, /**string*/ output) { + super(message); + this.output = output + } +} + +class CompareFifCodegenError extends Error { +} + +class CompareCodeHashError extends Error { +} + + +/* + * In positive tests, there are several testcases "input X should produce output Y". + */ +class TolkTestCaseInputOutput { + static reJustNumber = /^[-+]?\d+$/ + static reMathExpr = /^[0x123456789()+\-*/<>]*$/ + + constructor(/**string*/ method_id_str, /**string*/ input_str, /**string*/ output_str) { + let processed_inputs = [] + for (let in_arg of input_str.split(' ')) { + if (in_arg.length === 0) + continue + else if (in_arg.startsWith("x{") || TolkTestCaseInputOutput.reJustNumber.test(in_arg)) + processed_inputs.push(in_arg) + else if (TolkTestCaseInputOutput.reMathExpr.test(in_arg)) + // replace "3<<254" with "3n<<254n" (big number) before eval (in Python we don't need this) + processed_inputs.push(eval(in_arg.replace('//', '/').replace(/(\d)($|\D)/gmi, '$1n$2')).toString()) + else if (in_arg === "null") + processed_inputs.push("null") + else + throw new ParseInputError(`'${in_arg}' can't be evaluated`) + } + + this.method_id = +method_id_str + this.input = processed_inputs.join(' ') + this.expected_output = output_str + } + + check(/**string[]*/ stdout_lines, /**number*/ line_idx) { + if (stdout_lines[line_idx] !== this.expected_output) + throw new CompareOutputError(`error on case #${line_idx + 1} (${this.method_id} | ${this.input}): expected '${this.expected_output}', found '${stdout_lines[line_idx]}'`, stdout_lines.join("\n")) + } +} + +/* + * @stderr checks, when compilation fails, that stderr (compilation error) is expected. + * If it's multiline, all lines must be present in specified order. + */ +class TolkTestCaseStderr { + constructor(/**string[]*/ stderr_pattern, /**boolean*/ avoid) { + this.stderr_pattern = stderr_pattern + this.avoid = avoid + } + + check(/**string*/ stderr) { + const line_match = this.find_pattern_in_stderr(stderr.split(/\n/)) + if (line_match === -1 && !this.avoid) + throw new CompareOutputError("pattern not found in stderr:\n" + + this.stderr_pattern.map(x => " " + x).join("\n"), stderr) + else if (line_match !== -1 && this.avoid) + throw new CompareOutputError(`pattern found (line ${line_match + 1}), but not expected to be:\n` + + this.stderr_pattern.map(x => " " + x).join("\n"), stderr) + } + + find_pattern_in_stderr(/**string[]*/ stderr) { + for (let line_start = 0; line_start < stderr.length; ++line_start) + if (this.try_match_pattern(0, stderr, line_start)) + return line_start + return -1 + } + + try_match_pattern(/**number*/ pattern_offset, /**string[]*/ stderr, /**number*/ offset) { + if (pattern_offset >= this.stderr_pattern.length) + return true + if (offset >= stderr.length) + return false + + const line_pattern = this.stderr_pattern[pattern_offset] + const line_output = stderr[offset] + return line_output.includes(line_pattern) && this.try_match_pattern(pattern_offset + 1, stderr, offset + 1) + } +} + +/* + * @fif_codegen checks that contents of compiled.fif matches the expected pattern. + * @fif_codegen_avoid checks that is does not match the pattern. + * See comments in run_tests.py. + */ +class TolkTestCaseFifCodegen { + constructor(/**string[]*/ fif_pattern, /**boolean*/ avoid) { + /** @type {string[]} */ + this.fif_pattern = fif_pattern.map(s => s.trim()) + this.avoid = avoid + } + + check(/**string[]*/ fif_output) { + const line_match = this.find_pattern_in_fif_output(fif_output) + if (line_match === -1 && !this.avoid) + throw new CompareFifCodegenError("pattern not found:\n" + + this.fif_pattern.map(x => " " + x).join("\n")) + else if (line_match !== -1 && this.avoid) + throw new CompareFifCodegenError(`pattern found (line ${line_match + 1}), but not expected to be:\n` + + this.fif_pattern.map(x => " " + x).join("\n")) + } + + find_pattern_in_fif_output(/**string[]*/ fif_output) { + for (let line_start = 0; line_start < fif_output.length; ++line_start) + if (this.try_match_pattern(0, fif_output, line_start)) + return line_start + return -1 + } + + try_match_pattern(/**number*/ pattern_offset, /**string[]*/ fif_output, /**number*/ offset) { + if (pattern_offset >= this.fif_pattern.length) + return true + if (offset >= fif_output.length) + return false + const line_pattern = this.fif_pattern[pattern_offset] + const line_output = fif_output[offset] + + if (line_pattern !== "...") { + if (!TolkTestCaseFifCodegen.does_line_match(line_pattern, line_output)) + return false + return this.try_match_pattern(pattern_offset + 1, fif_output, offset + 1) + } + while (offset < fif_output.length) { + if (this.try_match_pattern(pattern_offset + 1, fif_output, offset)) + return true + offset = offset + 1 + } + return false + } + + static split_line_to_cmd_and_comment(/**string*/ trimmed_line) { + const pos = trimmed_line.indexOf("//") + if (pos === -1) + return [trimmed_line, null] + else + return [trimmed_line.substring(0, pos).trimEnd(), trimmed_line.substring(pos + 2).trimStart()] + } + + static does_line_match(/**string*/ line_pattern, /**string*/ line_output) { + const [cmd_pattern, comment_pattern] = TolkTestCaseFifCodegen.split_line_to_cmd_and_comment(line_pattern) + const [cmd_output, comment_output] = TolkTestCaseFifCodegen.split_line_to_cmd_and_comment(line_output.trim()) + return cmd_pattern === cmd_output && (comment_pattern === null || comment_pattern === comment_output) + } +} + +/* + * @code_hash checks that hash of compiled output.fif matches the provided value. + * It's used to "record" code boc hash and to check that it remains the same on compiler modifications. + * Being much less flexible than @fif_codegen, it nevertheless gives a guarantee of bytecode stability. + */ +class TolkTestCaseExpectedHash { + constructor(/**string*/ expected_hash) { + this.code_hash = expected_hash + } + + check(/**string*/ fif_code_hash) { + if (this.code_hash !== fif_code_hash) + throw new CompareCodeHashError(`expected ${this.code_hash}, actual ${fif_code_hash}`) + } +} + + +class TolkTestFile { + constructor(/**string*/ tolk_filename, /**string*/ artifacts_folder) { + this.line_idx = 0 + this.tolk_filename = tolk_filename + this.artifacts_folder = artifacts_folder + this.compilation_should_fail = false + /** @type {TolkTestCaseStderr[]} */ + this.stderr_includes = [] + /** @type {TolkTestCaseInputOutput[]} */ + this.input_output = [] + /** @type {TolkTestCaseFifCodegen[]} */ + this.fif_codegen = [] + /** @type {TolkTestCaseExpectedHash | null} */ + this.expected_hash = null + /** @type {string | null} */ + this.experimental_options = null + } + + parse_input_from_tolk_file() { + const lines = fs.readFileSync(this.tolk_filename, 'utf-8').split(/\r?\n/) + this.line_idx = 0 + + while (this.line_idx < lines.length) { + const line = lines[this.line_idx] + if (line.startsWith('@testcase')) { + let s = line.split("|").map(p => p.trim()) + if (s.length !== 4) + throw new ParseInputError(`incorrect format of @testcase: ${line}`) + this.input_output.push(new TolkTestCaseInputOutput(s[1], s[2], s[3])) + } else if (line.startsWith('@compilation_should_fail')) { + this.compilation_should_fail = true + } else if (line.startsWith('@stderr')) { + this.stderr_includes.push(new TolkTestCaseStderr(this.parse_string_value(lines), false)) + } else if (line.startsWith("@fif_codegen_avoid")) { + this.fif_codegen.push(new TolkTestCaseFifCodegen(this.parse_string_value(lines), true)) + } else if (line.startsWith("@fif_codegen")) { + this.fif_codegen.push(new TolkTestCaseFifCodegen(this.parse_string_value(lines), false)) + } else if (line.startsWith("@code_hash")) { + this.expected_hash = new TolkTestCaseExpectedHash(this.parse_string_value(lines, false)[0]) + } else if (line.startsWith("@experimental_options")) { + this.experimental_options = line.substring(22) + } + this.line_idx++ + } + + if (this.input_output.length === 0 && !this.compilation_should_fail) + throw new ParseInputError("no @testcase present") + if (this.input_output.length !== 0 && this.compilation_should_fail) + throw new ParseInputError("@testcase present, but compilation_should_fail") + } + + /** @return {string[]} */ + parse_string_value(/**string[]*/ lines, allow_multiline = true) { + // a tag must be followed by a space (single-line), e.g. '@stderr some text' + // or be a multi-line value, surrounded by """ + const line = lines[this.line_idx] + const pos_sp = line.indexOf(' ') + const is_multi_line = lines[this.line_idx + 1] === '"""' + const is_single_line = pos_sp !== -1 + if (!is_single_line && !is_multi_line) + throw new ParseInputError(`${line} value is empty (not followed by a string or a multiline """)`) + if (is_single_line && is_multi_line) + throw new ParseInputError(`${line.substring(0, pos_sp)} value is both single-line and followed by """`) + if (is_multi_line && !allow_multiline) + throw new ParseInputError(`${line} value should be single-line`); + + if (is_single_line) + return [line.substring(pos_sp + 1).trim()] + + this.line_idx += 2 + let s_multiline = [] + while (this.line_idx < lines.length && lines[this.line_idx] !== '"""') { + s_multiline.push(lines[this.line_idx]) + this.line_idx = this.line_idx + 1 + } + return s_multiline + } + + get_compiled_fif_filename() { + return this.artifacts_folder + "/compiled.fif" + } + + get_runner_fif_filename() { + return this.artifacts_folder + "/runner.fif" + } + + async run_and_check() { + const wasmModule = await compileWasm(TOLKFIFTLIB_MODULE, TOLKFIFTLIB_WASM) + let res = compileFile(wasmModule, this.tolk_filename, this.experimental_options) + let exit_code = res.status === 'ok' ? 0 : 1 + let stderr = res.message + let stdout = '' + + if (exit_code === 0 && this.compilation_should_fail) + throw new TolkCompilationSucceededError("compilation succeeded, but it should have failed") + + if (exit_code !== 0 && this.compilation_should_fail) { + for (let should_include of this.stderr_includes) + should_include.check(stderr) + return + } + + if (exit_code !== 0 && !this.compilation_should_fail) + throw new TolkCompilationFailedError(`tolk exit_code = ${exit_code}`, stderr) + + fs.writeFileSync(this.get_compiled_fif_filename(), `"Asm.fif" include\n${res.fiftCode}`) + { + let runner = `"${this.get_compiled_fif_filename()}" include x.trim()).filter(s => s.length > 0) + let fif_code_hash = null + if (this.expected_hash !== null) { // then the last stdout line is a hash + fif_code_hash = stdout_lines[stdout_lines.length - 1] + stdout_lines = stdout_lines.slice(0, stdout_lines.length - 1) + } + + if (stdout_lines.length !== this.input_output.length) + throw new CompareOutputError(`unexpected number of fift output: ${stdout_lines.length} lines, but ${this.input_output.length} testcases`, stdout) + + for (let i = 0; i < stdout_lines.length; ++i) + this.input_output[i].check(stdout_lines, i) + + if (this.fif_codegen.length) { + const fif_output = fs.readFileSync(this.get_compiled_fif_filename(), 'utf-8').split(/\r?\n/) + for (let fif_codegen of this.fif_codegen) + fif_codegen.check(fif_output) + } + + if (this.expected_hash !== null) + this.expected_hash.check(fif_code_hash) + } +} + +async function run_all_tests(/**string[]*/ tests) { + for (let ti = 0; ti < tests.length; ++ti) { + let tolk_filename = tests[ti] + print(`Running test ${ti + 1}/${tests.length}: ${tolk_filename}`) + + let artifacts_folder = path.join(TMP_DIR, tolk_filename) + let testcase = new TolkTestFile(tolk_filename, artifacts_folder) + + try { + if (!fs.existsSync(artifacts_folder)) + fs.mkdirSync(artifacts_folder, {recursive: true}) + testcase.parse_input_from_tolk_file() + await testcase.run_and_check() + fs.rmSync(artifacts_folder, {recursive: true}) + + if (testcase.compilation_should_fail) + print(" OK, compilation failed as it should") + else + print(` OK, ${testcase.input_output.length} cases`) + } catch (e) { + if (e instanceof ParseInputError) { + print(` Error parsing input (cur line #${testcase.line_idx + 1}):`, e.message) + process.exit(2) + } else if (e instanceof TolkCompilationFailedError) { + print(" Error compiling tolk:", e.message) + print(" stderr:") + print(e.stderr.trimEnd()) + process.exit(2) + } else if (e instanceof FiftExecutionFailedError) { + print(" Error executing fift:", e.message) + print(" stderr:") + print(e.stderr.trimEnd()) + print(" compiled.fif at:", testcase.get_compiled_fif_filename()) + process.exit(2) + } else if (e instanceof CompareOutputError) { + print(" Mismatch in output:", e.message) + print(" Full output:") + print(e.output.trimEnd()) + print(" Was compiled to:", testcase.get_compiled_fif_filename()) + process.exit(2) + } else if (e instanceof CompareFifCodegenError) { + print(" Mismatch in fif codegen:", e.message) + print(" Was compiled to:", testcase.get_compiled_fif_filename()) + print(fs.readFileSync(testcase.get_compiled_fif_filename(), 'utf-8')) + process.exit(2) + } else if (e instanceof CompareCodeHashError) { + print(" Mismatch in code hash:", e.message) + print(" Was compiled to:", testcase.get_compiled_fif_filename()) + process.exit(2) + } + throw e + } + } +} + +const tests = new CmdLineOptions(process.argv).find_tests() +print(`Found ${tests.length} tests`) +run_all_tests(tests).then( + () => print(`Done, ${tests.length} tests`), + console.error +) + +// below are WASM helpers, which don't exist in Python version + +process.setMaxListeners(0); + +function copyToCString(mod, str) { + const len = mod.lengthBytesUTF8(str) + 1; + const ptr = mod._malloc(len); + mod.stringToUTF8(str, ptr, len); + return ptr; +} + +function copyToCStringPtr(mod, str, ptr) { + const allocated = copyToCString(mod, str); + mod.setValue(ptr, allocated, '*'); + return allocated; +} + +function copyFromCString(mod, ptr) { + return mod.UTF8ToString(ptr); +} + +/** @return {{status: string, message: string, fiftCode: string, codeBoc: string, codeHashHex: string}} */ +function compileFile(mod, filename, experimentalOptions) { + // see tolk-wasm.cpp: typedef void (*CStyleReadFileCallback)(int, char const*, char**, char**) + const callbackPtr = mod.addFunction((kind, dataPtr, destContents, destError) => { + if (kind === 0) { // realpath + try { + const relativeFilename = copyFromCString(mod, dataPtr) + copyToCStringPtr(mod, fs.realpathSync(relativeFilename), destContents); + } catch (err) { + copyToCStringPtr(mod, 'cannot find file', destError); + } + } else if (kind === 1) { // read file + try { + const filename = copyFromCString(mod, dataPtr) // already normalized (as returned above) + copyToCStringPtr(mod, fs.readFileSync(filename).toString('utf-8'), destContents); + } catch (err) { + copyToCStringPtr(mod, err.message || err.toString(), destError); + } + } else { + copyToCStringPtr(mod, 'Unknown callback kind=' + kind, destError); + } + }, 'viiii'); + + const config = { + optimizationLevel: 2, + withStackComments: true, + experimentalOptions: experimentalOptions || undefined, + stdlibLocation: __dirname + '/../crypto/smartcont/stdlib.tolk', + entrypointFileName: filename + }; + + const configPtr = copyToCString(mod, JSON.stringify(config)); + + const responsePtr = mod._tolk_compile(configPtr, callbackPtr); + + return JSON.parse(copyFromCString(mod, responsePtr)); +} + +async function compileWasm(tolkFiftLibJsFileName, tolkFiftLibWasmFileName) { + const wasmModule = require(tolkFiftLibJsFileName) + const wasmBinary = new Uint8Array(fs.readFileSync(tolkFiftLibWasmFileName)) + + return await wasmModule({ wasmBinary }) +} diff --git a/tolk-tester/tolk-tester.py b/tolk-tester/tolk-tester.py new file mode 100644 index 000000000..261ab4962 --- /dev/null +++ b/tolk-tester/tolk-tester.py @@ -0,0 +1,430 @@ +# Usage: `tolk-tester.py tests_dir` OR `tolk-tester.py test_file.tolk` +# from current dir, providing some env (see getenv() calls). +# Every .tolk file should provide /* testcase description in a comment */, consider tests/ folder. +# +# Tests for Tolk can be +# * positive (compiled to .fif, run with fift, compared output with the one expected) +# * negative (compilation fails, and it's expected; patterns in stderr can be specified) +# +# Note, that there is also tolk-tester.js to test Tolk compiled to WASM. +# Don't forget to keep it identical to Python version! + +import os +import os.path +import re +import shutil +import subprocess +import sys +import tempfile +from typing import List + + +def getenv(name, default=None): + if name in os.environ: + return os.environ[name] + if default is None: + print("Environment variable", name, "is not set", file=sys.stderr) + exit(1) + return default + + +TOLK_EXECUTABLE = getenv("TOLK_EXECUTABLE", "tolk") +FIFT_EXECUTABLE = getenv("FIFT_EXECUTABLE", "fift") +FIFT_LIBS_FOLDER = getenv("FIFTPATH") # this env is needed for fift to work properly +TMP_DIR = tempfile.mkdtemp() + + +class CmdLineOptions: + def __init__(self, argv: List[str]): + if len(argv) != 2: + print("Usage: tolk-tester.py tests_dir OR tolk-tester.py test_file.tolk", file=sys.stderr) + exit(1) + if not os.path.exists(argv[1]): + print("Input '%s' doesn't exist" % argv[1], file=sys.stderr) + exit(1) + + if os.path.isdir(argv[1]): + self.tests_dir = argv[1] + self.test_file = None + else: + self.tests_dir = os.path.dirname(argv[1]) + self.test_file = argv[1] + + def find_tests(self) -> List[str]: + if self.test_file is not None: # an option to run (debug) a single test + return [self.test_file] + + tests = [f for f in os.listdir(self.tests_dir) if f.endswith(".tolk") or f.endswith(".ton")] + tests.sort() + return [os.path.join(self.tests_dir, f) for f in tests] + + +class ParseInputError(Exception): + pass + + +class TolkCompilationFailedError(Exception): + def __init__(self, message: str, stderr: str): + super().__init__(message) + self.stderr = stderr + + +class TolkCompilationSucceededError(Exception): + pass + + +class FiftExecutionFailedError(Exception): + def __init__(self, message: str, stderr: str): + super().__init__(message) + self.stderr = stderr + + +class CompareOutputError(Exception): + def __init__(self, message: str, output: str): + super().__init__(message) + self.output = output + + +class CompareFifCodegenError(Exception): + pass + + +class CompareCodeHashError(Exception): + pass + + +class TolkTestCaseInputOutput: + """ + In positive tests, there are several testcases "input X should produce output Y". + They are written as a table: + @testcase | method_id | input (one or several) | output + """ + reJustNumber = re.compile(r"[-+]?\d+") + reMathExpr = re.compile(r"[0x123456789()+\-*/<>]+") + + def __init__(self, method_id_str: str, input_str: str, output_str: str): + processed_inputs = [] + for in_arg in input_str.split(" "): + if len(in_arg) == 0: + continue + elif in_arg.startswith("x{") or TolkTestCaseInputOutput.reJustNumber.fullmatch(in_arg): + processed_inputs.append(in_arg) + elif TolkTestCaseInputOutput.reMathExpr.fullmatch(in_arg): + processed_inputs.append(str(eval(in_arg))) + elif in_arg == "null": + processed_inputs.append("null") + else: + raise ParseInputError("'%s' can't be evaluated" % in_arg) + + self.method_id = int(method_id_str) + self.input = " ".join(processed_inputs) + self.expected_output = output_str + + def check(self, stdout_lines: List[str], line_idx: int): + if stdout_lines[line_idx] != self.expected_output: + raise CompareOutputError("error on case #%d (%d | %s): expected '%s', found '%s'" % (line_idx + 1, self.method_id, self.input, self.expected_output, stdout_lines[line_idx]), "\n".join(stdout_lines)) + + +class TolkTestCaseStderr: + """ + @stderr checks, when compilation fails, that stderr (compilation error) is expected. + If it's multiline, all lines must be present in specified order. + """ + + def __init__(self, stderr_pattern: List[str], avoid: bool): + self.stderr_pattern = stderr_pattern + self.avoid = avoid + + def check(self, stderr: str): + line_match = self.find_pattern_in_stderr(stderr.splitlines()) + if line_match == -1 and not self.avoid: + raise CompareOutputError("pattern not found in stderr:\n%s" % + "\n".join(map(lambda x: " " + x, self.stderr_pattern)), stderr) + elif line_match != -1 and self.avoid: + raise CompareOutputError("pattern found (line %d), but not expected to be:\n%s" % + (line_match + 1, "\n".join(map(lambda x: " " + x, self.stderr_pattern))), stderr) + + def find_pattern_in_stderr(self, stderr: List[str]) -> int: + for line_start in range(len(stderr)): + if self.try_match_pattern(0, stderr, line_start): + return line_start + return -1 + + def try_match_pattern(self, pattern_offset: int, stderr: List[str], offset: int) -> bool: + if pattern_offset >= len(self.stderr_pattern): + return True + if offset >= len(stderr): + return False + + line_pattern = self.stderr_pattern[pattern_offset] + line_output = stderr[offset] + return line_output.find(line_pattern) != -1 and self.try_match_pattern(pattern_offset + 1, stderr, offset + 1) + + +class TolkTestCaseFifCodegen: + """ + @fif_codegen checks that contents of compiled.fif matches the expected pattern. + @fif_codegen_avoid checks that is does not match the pattern. + The pattern is a multiline piece of fift code, optionally with "..." meaning "any lines here". + See tests/codegen_check_demo.tolk of how it looks. + A notable thing about indentations (spaces at line starts): + Taking them into account will complicate the code without reasonable profit, + that's why we just trim every string. + And one more word about //comments. Tolk inserts them into fift output. + If a line in the pattern contains a //comment, it's expected to be equal. + If a line does not, we just compare a command. + """ + + def __init__(self, fif_pattern: List[str], avoid: bool): + self.fif_pattern = [s.strip() for s in fif_pattern] + self.avoid = avoid + + def check(self, fif_output: List[str]): + line_match = self.find_pattern_in_fif_output(fif_output) + if line_match == -1 and not self.avoid: + raise CompareFifCodegenError("pattern not found:\n%s" % + "\n".join(map(lambda x: " " + x, self.fif_pattern))) + elif line_match != -1 and self.avoid: + raise CompareFifCodegenError("pattern found (line %d), but not expected to be:\n%s" % + (line_match + 1, "\n".join(map(lambda x: " " + x, self.fif_pattern)))) + + def find_pattern_in_fif_output(self, fif_output: List[str]) -> int: + for line_start in range(len(fif_output)): + if self.try_match_pattern(0, fif_output, line_start): + return line_start + return -1 + + def try_match_pattern(self, pattern_offset: int, fif_output: List[str], offset: int) -> bool: + if pattern_offset >= len(self.fif_pattern): + return True + if offset >= len(fif_output): + return False + line_pattern = self.fif_pattern[pattern_offset] + line_output = fif_output[offset] + + if line_pattern != "...": + if not TolkTestCaseFifCodegen.does_line_match(line_pattern, line_output): + return False + return self.try_match_pattern(pattern_offset + 1, fif_output, offset + 1) + while offset < len(fif_output): + if self.try_match_pattern(pattern_offset + 1, fif_output, offset): + return True + offset = offset + 1 + return False + + @staticmethod + def split_line_to_cmd_and_comment(trimmed_line: str) -> tuple: + pos = trimmed_line.find("//") + if pos == -1: + return trimmed_line, None + else: + return trimmed_line[:pos].rstrip(), trimmed_line[pos + 2:].lstrip() + + @staticmethod + def does_line_match(line_pattern: str, line_output: str) -> bool: + cmd_pattern, comment_pattern = TolkTestCaseFifCodegen.split_line_to_cmd_and_comment(line_pattern) + cmd_output, comment_output = TolkTestCaseFifCodegen.split_line_to_cmd_and_comment(line_output.strip()) + return cmd_pattern == cmd_output and (comment_pattern is None or comment_pattern == comment_output) + + +class TolkTestCaseExpectedHash: + """ + @code_hash checks that hash of compiled output.fif matches the provided value. + It's used to "record" code boc hash and to check that it remains the same on compiler modifications. + Being much less flexible than @fif_codegen, it nevertheless gives a guarantee of bytecode stability. + """ + + def __init__(self, expected_hash: str): + self.code_hash = expected_hash + + def check(self, fif_code_hash: str): + if self.code_hash != fif_code_hash: + raise CompareCodeHashError("expected %s, actual %s" % (self.code_hash, fif_code_hash)) + + +class TolkTestFile: + def __init__(self, tolk_filename: str, artifacts_folder: str): + self.line_idx = 0 + self.tolk_filename = tolk_filename + self.artifacts_folder = artifacts_folder + self.compilation_should_fail = False + self.stderr_includes: List[TolkTestCaseStderr] = [] + self.input_output: List[TolkTestCaseInputOutput] = [] + self.fif_codegen: List[TolkTestCaseFifCodegen] = [] + self.expected_hash: TolkTestCaseExpectedHash | None = None + self.experimental_options: str | None = None + + def parse_input_from_tolk_file(self): + with open(self.tolk_filename, "r") as fd: + lines = fd.read().splitlines() + self.line_idx = 0 + + while self.line_idx < len(lines): + line = lines[self.line_idx] + if line.startswith("@testcase"): + s = [x.strip() for x in line.split("|")] + if len(s) != 4: + raise ParseInputError("incorrect format of @testcase: %s" % line) + self.input_output.append(TolkTestCaseInputOutput(s[1], s[2], s[3])) + elif line.startswith("@compilation_should_fail"): + self.compilation_should_fail = True + elif line.startswith("@stderr"): + self.stderr_includes.append(TolkTestCaseStderr(self.parse_string_value(lines), False)) + elif line.startswith("@fif_codegen_avoid"): + self.fif_codegen.append(TolkTestCaseFifCodegen(self.parse_string_value(lines), True)) + elif line.startswith("@fif_codegen"): + self.fif_codegen.append(TolkTestCaseFifCodegen(self.parse_string_value(lines), False)) + elif line.startswith("@code_hash"): + self.expected_hash = TolkTestCaseExpectedHash(self.parse_string_value(lines, False)[0]) + elif line.startswith("@experimental_options"): + self.experimental_options = line[22:] + self.line_idx = self.line_idx + 1 + + if len(self.input_output) == 0 and not self.compilation_should_fail: + raise ParseInputError("no @testcase present") + if len(self.input_output) != 0 and self.compilation_should_fail: + raise ParseInputError("@testcase present, but compilation_should_fail") + + def parse_string_value(self, lines: List[str], allow_multiline = True) -> List[str]: + # a tag must be followed by a space (single-line), e.g. '@stderr some text' + # or be a multi-line value, surrounded by """ + line = lines[self.line_idx] + pos_sp = line.find(' ') + is_multi_line = lines[self.line_idx + 1] == '"""' + is_single_line = pos_sp != -1 + if not is_single_line and not is_multi_line: + raise ParseInputError('%s value is empty (not followed by a string or a multiline """)' % line) + if is_single_line and is_multi_line: + raise ParseInputError('%s value is both single-line and followed by """' % line[:pos_sp]) + if is_multi_line and not allow_multiline: + raise ParseInputError("%s value should be single-line" % line) + + if is_single_line: + return [line[pos_sp + 1:].strip()] + + self.line_idx += 2 + s_multiline = [] + while self.line_idx < len(lines) and lines[self.line_idx] != '"""': + s_multiline.append(lines[self.line_idx]) + self.line_idx = self.line_idx + 1 + return s_multiline + + def get_compiled_fif_filename(self): + return self.artifacts_folder + "/compiled.fif" + + def get_runner_fif_filename(self): + return self.artifacts_folder + "/runner.fif" + + def run_and_check(self): + cmd_args = [TOLK_EXECUTABLE, "-o", self.get_compiled_fif_filename()] + if self.experimental_options: + cmd_args = cmd_args + ["-x", self.experimental_options] + res = subprocess.run(cmd_args + [self.tolk_filename], capture_output=True, timeout=10) + exit_code = res.returncode + stderr = str(res.stderr, "utf-8") + stdout = str(res.stdout, "utf-8") + + if exit_code == 0 and self.compilation_should_fail: + raise TolkCompilationSucceededError("compilation succeeded, but it should have failed") + + if exit_code != 0 and self.compilation_should_fail: + for should_include in self.stderr_includes: + should_include.check(stderr) + return + + if exit_code != 0 and not self.compilation_should_fail: + raise TolkCompilationFailedError("tolk exit_code = %d" % exit_code, stderr) + + with open(self.get_runner_fif_filename(), "w") as fd: + fd.write("\"%s\" include 0) { val |= _NonZero | _Pos | _Finite; - } else if (!s) { - //if (*int_const == 1) { - // val |= _Bit; - //} - val |= _Zero | _Neg | _Pos | _Finite | _Bool | _Bit; + } else { + val |= _Zero | _Neg | _Pos | _Finite; } if (val & _Finite) { val |= int_const->get_bit(0) ? _Odd : _Even; diff --git a/tolk/analyzer.cpp b/tolk/analyzer.cpp index cefa83b9c..b2ea55ec8 100644 --- a/tolk/analyzer.cpp +++ b/tolk/analyzer.cpp @@ -883,7 +883,7 @@ void Op::set_impure(const CodeBlob &code) { // todo calling this function with `code` is a bad design (flags are assigned after Op is constructed) // later it's better to check this somewhere in code.emplace_back() if (code.flags & CodeBlob::_ForbidImpure) { - throw ParseError(where, "An impure operation in a pure function"); + throw ParseError(where, "an impure operation in a pure function"); } flags |= _Impure; } @@ -891,7 +891,7 @@ void Op::set_impure(const CodeBlob &code) { void Op::set_impure(const CodeBlob &code, bool flag) { if (flag) { if (code.flags & CodeBlob::_ForbidImpure) { - throw ParseError(where, "An impure operation in a pure function"); + throw ParseError(where, "an impure operation in a pure function"); } flags |= _Impure; } else { diff --git a/tolk/ast-from-tokens.cpp b/tolk/ast-from-tokens.cpp index 75cc0b4d2..5116fcf57 100644 --- a/tolk/ast-from-tokens.cpp +++ b/tolk/ast-from-tokens.cpp @@ -18,6 +18,7 @@ #include "ast.h" #include "platform-utils.h" #include "type-expr.h" +#include "tolk-version.h" /* * Here we construct AST for a tolk file. @@ -35,11 +36,15 @@ static bool is_comparison_binary_op(TokenType tok) { } // same as above, but to detect bitwise operators: & | ^ -// (in Tolk, they are used as logical ones due to absence of a boolean type and && || operators) static bool is_bitwise_binary_op(TokenType tok) { return tok == tok_bitwise_and || tok == tok_bitwise_or || tok == tok_bitwise_xor; } +// same as above, but to detect logical operators: && || +static bool is_logical_binary_op(TokenType tok) { + return tok == tok_logical_and || tok == tok_logical_or; +} + // same as above, but to detect addition/subtraction static bool is_add_or_sub_binary_op(TokenType tok) { return tok == tok_plus || tok == tok_minus; @@ -58,11 +63,10 @@ static void fire_error_lower_precedence(SrcLocation loc, std::string_view op_low // fire an error for a case "arg1 & arg2 | arg3" GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD -static void fire_error_mix_bitwise_and_or(SrcLocation loc, std::string_view op1, std::string_view op2) { +static void fire_error_mix_and_or_no_parenthesis(SrcLocation loc, std::string_view op1, std::string_view op2) { std::string name1 = static_cast(op1); std::string name2 = static_cast(op2); - throw ParseError(loc, "mixing " + name1 + " with " + name2 + " without parenthesis" - ", probably this code won't work as you expected. " + throw ParseError(loc, "mixing " + name1 + " with " + name2 + " without parenthesis may lead to accidental errors. " "Use parenthesis to emphasize operator precedence."); } @@ -81,10 +85,22 @@ static void diagnose_bitwise_precedence(SrcLocation loc, std::string_view operat if (lhs->type == ast_binary_operator && is_comparison_binary_op(lhs->as()->tok)) { fire_error_lower_precedence(loc, operator_name, lhs->as()->operator_name); } +} + +// similar to above, but detect potentially invalid usage of && and || +// since anyway, using parenthesis when both && and || occur in the same expression, +// && and || have equal operator precedence in Tolk +static void diagnose_and_or_precedence(SrcLocation loc, AnyV lhs, TokenType rhs_tok, std::string_view rhs_operator_name) { + if (auto lhs_op = lhs->try_as()) { + // handle "arg1 & arg2 | arg3" (lhs = "arg1 & arg2") + if (is_bitwise_binary_op(lhs_op->tok) && is_bitwise_binary_op(rhs_tok) && lhs_op->tok != rhs_tok) { + fire_error_mix_and_or_no_parenthesis(loc, lhs_op->operator_name, rhs_operator_name); + } - // handle "arg1 & arg2 | arg3" (lhs = "arg1 & arg2") - if (lhs->type == ast_binary_operator && is_bitwise_binary_op(lhs->as()->tok) && lhs->as()->operator_name != operator_name) { - fire_error_mix_bitwise_and_or(loc, lhs->as()->operator_name, operator_name); + // handle "arg1 && arg2 || arg3" (lhs = "arg1 && arg2") + if (is_logical_binary_op(lhs_op->tok) && is_logical_binary_op(rhs_tok) && lhs_op->tok != rhs_tok) { + fire_error_mix_and_or_no_parenthesis(loc, lhs_op->operator_name, rhs_operator_name); + } } } @@ -95,6 +111,34 @@ static void diagnose_addition_in_bitshift(SrcLocation loc, std::string_view bits } } +// fire an error for FunC-style variable declaration, like "int i" +GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD +static void fire_error_FunC_style_var_declaration(Lexer& lex) { + SrcLocation loc = lex.cur_location(); + std::string type_str = static_cast(lex.cur_str()); // int / slice / etc. + lex.next(); + std::string var_name = lex.tok() == tok_identifier ? static_cast(lex.cur_str()) : "name"; + throw ParseError(loc, "can't parse; probably, you use FunC-like declarations; valid syntax is `var " + var_name + ": " + type_str + " = ...`"); +} + +// replace (a == null) and similar to isNull(a) (call of a built-in function) +static AnyV maybe_replace_eq_null_with_isNull_call(V v) { + bool has_null = v->get_lhs()->type == ast_null_keyword || v->get_rhs()->type == ast_null_keyword; + bool replace = has_null && (v->tok == tok_eq || v->tok == tok_neq); + if (!replace) { + return v; + } + + auto v_ident = createV(v->loc, "__isNull"); // built-in function + AnyV v_null = v->get_lhs()->type == ast_null_keyword ? v->get_rhs() : v->get_lhs(); + AnyV v_isNull = createV(v->loc, v_ident, createV(v->loc, {v_null})); + if (v->tok == tok_neq) { + v_isNull = createV(v->loc, "!", tok_logical_not, v_isNull); + } + return v_isNull; +} + + /* * * PARSE SOURCE @@ -103,9 +147,9 @@ static void diagnose_addition_in_bitshift(SrcLocation loc, std::string_view bits // TE ::= TA | TA -> TE // TA ::= int | ... | cont | var | _ | () | ( TE { , TE } ) | [ TE { , TE } ] -static TypeExpr* parse_type(Lexer& lex, V forall_list); +static TypeExpr* parse_type(Lexer& lex, V genericsT_list); -static TypeExpr* parse_type1(Lexer& lex, V forall_list) { +static TypeExpr* parse_type1(Lexer& lex, V genericsT_list) { switch (lex.tok()) { case tok_int: lex.next(); @@ -119,338 +163,307 @@ static TypeExpr* parse_type1(Lexer& lex, V forall_list) { case tok_builder: lex.next(); return TypeExpr::new_atomic(TypeExpr::_Builder); - case tok_cont: + case tok_continuation: lex.next(); return TypeExpr::new_atomic(TypeExpr::_Cont); case tok_tuple: lex.next(); return TypeExpr::new_atomic(TypeExpr::_Tuple); - case tok_var: - case tok_underscore: + case tok_auto: lex.next(); return TypeExpr::new_hole(); - case tok_identifier: { - if (int idx = forall_list ? forall_list->lookup_idx(lex.cur_str()) : -1; idx != -1) { + case tok_void: + lex.next(); + return TypeExpr::new_tensor({}); + case tok_bool: + lex.error("bool type is not supported yet"); + case tok_identifier: + if (int idx = genericsT_list ? genericsT_list->lookup_idx(lex.cur_str()) : -1; idx != -1) { lex.next(); - return forall_list->get_item(idx)->created_type; + return genericsT_list->get_item(idx)->created_type; } - lex.error("Is not a type identifier"); + break; + case tok_oppar: { + lex.next(); + if (lex.tok() == tok_clpar) { + lex.next(); + return TypeExpr::new_unit(); + } + std::vector sub{1, parse_type(lex, genericsT_list)}; + while (lex.tok() == tok_comma) { + lex.next(); + sub.push_back(parse_type(lex, genericsT_list)); + } + lex.expect(tok_clpar, "`)`"); + return TypeExpr::new_tensor(std::move(sub)); + } + case tok_opbracket: { + lex.next(); + if (lex.tok() == tok_clbracket) { + lex.next(); + return TypeExpr::new_tuple({}); + } + std::vector sub{1, parse_type(lex, genericsT_list)}; + while (lex.tok() == tok_comma) { + lex.next(); + sub.push_back(parse_type(lex, genericsT_list)); + } + lex.expect(tok_clbracket, "`]`"); + return TypeExpr::new_tuple(std::move(sub)); } default: break; } - TokenType c; - if (lex.tok() == tok_opbracket) { - lex.next(); - c = tok_clbracket; - } else { - lex.expect(tok_oppar, ""); - c = tok_clpar; - } - if (lex.tok() == c) { - lex.next(); - return c == tok_clpar ? TypeExpr::new_unit() : TypeExpr::new_tuple({}); - } - auto t1 = parse_type(lex, forall_list); - if (lex.tok() == tok_clpar) { - lex.expect(c, c == tok_clpar ? "')'" : "']'"); - return t1; - } - std::vector tlist{1, t1}; - while (lex.tok() == tok_comma) { - lex.next(); - tlist.push_back(parse_type(lex, forall_list)); - } - lex.expect(c, c == tok_clpar ? "')'" : "']'"); - return c == tok_clpar ? TypeExpr::new_tensor(std::move(tlist)) : TypeExpr::new_tuple(std::move(tlist)); + lex.unexpected(""); } -static TypeExpr* parse_type(Lexer& lex, V forall_list) { - TypeExpr* res = parse_type1(lex, forall_list); - if (lex.tok() == tok_mapsto) { +static TypeExpr* parse_type(Lexer& lex, V genericsT_list) { + TypeExpr* res = parse_type1(lex, genericsT_list); + if (lex.tok() == tok_arrow) { lex.next(); - TypeExpr* to = parse_type(lex, forall_list); + TypeExpr* to = parse_type(lex, genericsT_list); return TypeExpr::new_map(res, to); } return res; } -static AnyV parse_argument(Lexer& lex, V forall_list) { - TypeExpr* arg_type = nullptr; +AnyV parse_expr(Lexer& lex); + +static AnyV parse_parameter(Lexer& lex, V genericsT_list) { SrcLocation loc = lex.cur_location(); - if (lex.tok() == tok_underscore) { - lex.next(); - if (lex.tok() == tok_comma || lex.tok() == tok_clpar) { - auto v_empty = createV(lex.cur_location(), ""); - return createV(loc, v_empty, TypeExpr::new_hole()); - } - arg_type = TypeExpr::new_hole(); - loc = lex.cur_location(); - } else if (lex.tok() != tok_identifier) { // int, cell, [X], etc. - arg_type = parse_type(lex, forall_list); - } else if (lex.tok() == tok_identifier) { - if (forall_list && forall_list->lookup_idx(lex.cur_str()) != -1) { - arg_type = parse_type(lex, forall_list); - } else { - arg_type = TypeExpr::new_hole(); - } - } else { - lex.error("Is not a type identifier"); - } - if (lex.tok() == tok_underscore || lex.tok() == tok_comma || lex.tok() == tok_clpar) { - if (lex.tok() == tok_underscore) { - loc = lex.cur_location(); - lex.next(); - } - auto v_empty = createV(lex.cur_location(), ""); - return createV(loc, v_empty, arg_type); + + // argument name (or underscore for an unnamed parameter) + std::string_view param_name; + if (lex.tok() == tok_identifier) { + param_name = lex.cur_str(); + } else if (lex.tok() != tok_underscore) { + lex.unexpected("parameter name"); } - lex.check(tok_identifier, "parameter name"); - loc = lex.cur_location(); - auto v_ident = createV(lex.cur_location(), lex.cur_str()); + auto v_ident = createV(lex.cur_location(), param_name); lex.next(); - return createV(loc, v_ident, arg_type); + + // parameter type after colon, also mandatory (even explicit ":auto") + lex.expect(tok_colon, "`: `"); + TypeExpr* param_type = parse_type(lex, genericsT_list); + + return createV(loc, v_ident, param_type); } -static AnyV parse_global_var_declaration(Lexer& lex) { - TypeExpr* declared_type = nullptr; - SrcLocation loc = lex.cur_location(); - if (lex.tok() == tok_underscore) { - lex.next(); - declared_type = TypeExpr::new_hole(); - loc = lex.cur_location(); - } else if (lex.tok() != tok_identifier) { - declared_type = parse_type(lex, nullptr); +static AnyV parse_global_var_declaration(Lexer& lex, const std::vector>& annotations) { + if (!annotations.empty()) { + lex.error("@annotations are not applicable to global var declaration"); } + SrcLocation loc = lex.cur_location(); + lex.expect(tok_global, "`global`"); lex.check(tok_identifier, "global variable name"); auto v_ident = createV(lex.cur_location(), lex.cur_str()); lex.next(); + lex.expect(tok_colon, "`:`"); + TypeExpr* declared_type = parse_type(lex, nullptr); + if (lex.tok() == tok_comma) { + lex.error("multiple declarations are not allowed, split globals on separate lines"); + } + if (lex.tok() == tok_assign) { + lex.error("assigning to a global is not allowed at declaration"); + } + lex.expect(tok_semicolon, "`;`"); return createV(loc, v_ident, declared_type); } -AnyV parse_expr(Lexer& lex); - -static AnyV parse_constant_declaration(Lexer& lex) { - SrcLocation loc = lex.cur_location(); - TypeExpr *declared_type = nullptr; - if (lex.tok() == tok_int) { - declared_type = TypeExpr::new_atomic(TypeExpr::_Int); - lex.next(); - } else if (lex.tok() == tok_slice) { - declared_type = TypeExpr::new_atomic(TypeExpr::_Slice); - lex.next(); +static AnyV parse_constant_declaration(Lexer& lex, const std::vector>& annotations) { + if (!annotations.empty()) { + lex.error("@annotations are not applicable to global var declaration"); } + SrcLocation loc = lex.cur_location(); + lex.expect(tok_const, "`const`"); lex.check(tok_identifier, "constant name"); auto v_ident = createV(lex.cur_location(), lex.cur_str()); lex.next(); - lex.expect(tok_assign, "'='"); + TypeExpr *declared_type = nullptr; + if (lex.tok() == tok_colon) { + lex.next(); + if (lex.tok() == tok_int) { + declared_type = TypeExpr::new_atomic(TypeExpr::_Int); + lex.next(); + } else if (lex.tok() == tok_slice) { + declared_type = TypeExpr::new_atomic(TypeExpr::_Slice); + lex.next(); + } else { + lex.error("a constant can be int or slice only"); + } + } + lex.expect(tok_assign, "`=`"); AnyV init_value = parse_expr(lex); + if (lex.tok() == tok_comma) { + lex.error("multiple declarations are not allowed, split constants on separate lines"); + } + lex.expect(tok_semicolon, "`;`"); return createV(loc, v_ident, declared_type, init_value); } -static AnyV parse_argument_list(Lexer& lex, V forall_list) { +static AnyV parse_parameter_list(Lexer& lex, V genericsT_list) { SrcLocation loc = lex.cur_location(); - std::vector args; - lex.expect(tok_oppar, "argument list"); + std::vector params; + lex.expect(tok_oppar, "parameter list"); if (lex.tok() != tok_clpar) { - args.push_back(parse_argument(lex, forall_list)); + params.push_back(parse_parameter(lex, genericsT_list)); while (lex.tok() == tok_comma) { lex.next(); - args.push_back(parse_argument(lex, forall_list)); - } - } - lex.expect(tok_clpar, "')'"); - return createV(loc, std::move(args)); -} - -static AnyV parse_constant_declaration_list(Lexer& lex) { - std::vector consts; - SrcLocation loc = lex.cur_location(); - lex.expect(tok_const, "'const'"); - while (true) { - consts.push_back(parse_constant_declaration(lex)); - if (lex.tok() != tok_comma) { - break; + params.push_back(parse_parameter(lex, genericsT_list)); } - lex.expect(tok_comma, "','"); } - lex.expect(tok_semicolon, "';'"); - return createV(loc, std::move(consts)); + lex.expect(tok_clpar, "`)`"); + return createV(loc, std::move(params)); } -static AnyV parse_global_var_declaration_list(Lexer& lex) { - std::vector globals; - SrcLocation loc = lex.cur_location(); - lex.expect(tok_global, "'global'"); - while (true) { - globals.push_back(parse_global_var_declaration(lex)); - if (lex.tok() != tok_comma) { - break; - } - lex.expect(tok_comma, "','"); - } - lex.expect(tok_semicolon, "';'"); - return createV(loc, std::move(globals)); -} - -// parse ( E { , E } ) | () | [ E { , E } ] | [] | id | num | _ +// parse (expr) / [expr] / identifier / number static AnyV parse_expr100(Lexer& lex) { SrcLocation loc = lex.cur_location(); - if (lex.tok() == tok_oppar) { - lex.next(); - if (lex.tok() == tok_clpar) { - lex.next(); - return createV(loc, {}); - } - AnyV res = parse_expr(lex); - if (lex.tok() == tok_clpar) { + switch (lex.tok()) { + case tok_oppar: { lex.next(); - return createV(loc, res); + if (lex.tok() == tok_clpar) { + lex.next(); + return createV(loc, {}); + } + AnyV first = parse_expr(lex); + if (lex.tok() == tok_clpar) { + lex.next(); + return createV(loc, first); + } + std::vector items(1, first); + while (lex.tok() == tok_comma) { + lex.next(); + items.emplace_back(parse_expr(lex)); + } + lex.expect(tok_clpar, "`)`"); + return createV(loc, std::move(items)); } - std::vector items; - bool is_type_expression = res->type == ast_type_expression; // to differ `(a,b)` and `(int,slice)` - items.emplace_back(res); - while (lex.tok() == tok_comma) { + case tok_opbracket: { lex.next(); - AnyV item = parse_expr(lex); - if (is_type_expression != (item->type == ast_type_expression)) { - lex.error("mixing type and non-type expressions inside the same tuple"); + if (lex.tok() == tok_clbracket) { + lex.next(); + return createV(loc, {}); } - items.emplace_back(item); - } - lex.expect(tok_clpar, "')'"); - if (is_type_expression) { - std::vector types; - types.reserve(items.size()); - for (AnyV item : items) { - types.emplace_back(item->as()->declared_type); + std::vector items(1, parse_expr(lex)); + while (lex.tok() == tok_comma) { + lex.next(); + items.emplace_back(parse_expr(lex)); } - return createV(loc, TypeExpr::new_tensor(std::move(types))); + lex.expect(tok_clbracket, "`]`"); + return createV(loc, std::move(items)); } - return createV(loc, std::move(items)); - } - if (lex.tok() == tok_opbracket) { - lex.next(); - if (lex.tok() == tok_clbracket) { + case tok_int_const: { + std::string_view int_val = lex.cur_str(); lex.next(); - return createV(loc, {}); + return createV(loc, int_val); } - AnyV res = parse_expr(lex); - std::vector items; - bool is_type_expression = res->type == ast_type_expression; // to differ `(a,b)` and `(int,slice)` - items.emplace_back(res); - while (lex.tok() == tok_comma) { + case tok_string_const: { + std::string_view str_val = lex.cur_str(); lex.next(); - AnyV item = parse_expr(lex); - if (is_type_expression != (item->type == ast_type_expression)) { - lex.error("mixing type and non-type expressions inside the same tuple"); - } - items.emplace_back(item); - } - lex.expect(tok_clbracket, "']'"); - if (is_type_expression) { - std::vector types; - types.reserve(items.size()); - for (AnyV item : items) { - types.emplace_back(item->as()->declared_type); + char modifier = 0; + if (lex.tok() == tok_string_modifier) { + modifier = lex.cur_str()[0]; + lex.next(); } - return createV(loc, TypeExpr::new_tuple(TypeExpr::new_tensor(std::move(types)))); + return createV(loc, str_val, modifier); } - return createV(loc, std::move(items)); - } - TokenType t = lex.tok(); - if (t == tok_int_const) { - std::string_view int_val = lex.cur_str(); - lex.next(); - return createV(loc, int_val); - } - if (t == tok_string_const) { - std::string_view str_val = lex.cur_str(); - lex.next(); - char modifier = 0; - if (lex.tok() == tok_string_modifier) { - modifier = lex.cur_str()[0]; + case tok_underscore: { lex.next(); + return createV(loc); + } + case tok_true: { + lex.next(); + return createV(loc, true); + } + case tok_false: { + lex.next(); + return createV(loc, false); + } + case tok_null: { + lex.next(); + return createV(loc); + } + case tok_identifier: { + std::string_view str_val = lex.cur_str(); + lex.next(); + return createV(loc, str_val); + } + default: { + // show a proper error for `int i` (FunC-style declarations) + TokenType t = lex.tok(); + if (t == tok_int || t == tok_cell || t == tok_slice || t == tok_builder || t == tok_tuple) { + fire_error_FunC_style_var_declaration(lex); + } + lex.unexpected(""); } - return createV(loc, str_val, modifier); - } - if (t == tok_underscore) { - lex.next(); - return createV(loc); - } - if (t == tok_var) { - lex.next(); - return createV(loc, TypeExpr::new_hole()); - } - if (t == tok_int || t == tok_cell || t == tok_slice || t == tok_builder || t == tok_cont || t == tok_tuple) { - lex.next(); - return createV(loc, TypeExpr::new_atomic(t)); - } - if (t == tok_true || t == tok_false) { - lex.next(); - return createV(loc, t == tok_true); - } - if (t == tok_nil) { - lex.next(); - return createV(loc); - } - if (t == tok_identifier) { - std::string_view str_val = lex.cur_str(); - lex.next(); - return createV(loc, str_val); } - lex.expect(tok_identifier, "identifier"); - return nullptr; } -// parse E { E } +// parse E(expr) static AnyV parse_expr90(Lexer& lex) { AnyV res = parse_expr100(lex); - while (lex.tok() == tok_oppar || lex.tok() == tok_opbracket || (lex.tok() == tok_identifier && lex.cur_str()[0] != '.' && lex.cur_str()[0] != '~')) { - if (const auto* v_type_expr = res->try_as()) { - AnyV dest = parse_expr100(lex); - return createV(v_type_expr->loc, v_type_expr->declared_type, dest); - } else { - AnyV arg = parse_expr100(lex); - return createV(res->loc, res, arg); + if (lex.tok() == tok_oppar) { + lex.next(); + + SrcLocation loc = lex.cur_location(); + std::vector args; + if (lex.tok() != tok_clpar) { + args.push_back(parse_expr(lex)); + while (lex.tok() == tok_comma) { + lex.next(); + args.push_back(parse_expr(lex)); + } } + lex.expect(tok_clpar, "`)`"); + + return createV(res->loc, res, createV(loc, std::move(args))); } return res; } -// parse E { .method E | ~method E } +// parse E .method ~method E (left-to-right) static AnyV parse_expr80(Lexer& lex) { AnyV lhs = parse_expr90(lex); while (lex.tok() == tok_identifier && (lex.cur_str()[0] == '.' || lex.cur_str()[0] == '~')) { std::string_view method_name = lex.cur_str(); - SrcLocation loc = lex.cur_location(); lex.next(); - const ASTNodeBase *arg = parse_expr100(lex); - lhs = createV(loc, method_name, lhs, arg); + + SrcLocation loc = lex.cur_location(); + std::vector args; + lex.expect(tok_oppar, "`(`"); + if (lex.tok() != tok_clpar) { + args.push_back(parse_expr(lex)); + while (lex.tok() == tok_comma) { + lex.next(); + args.push_back(parse_expr(lex)); + } + } + lex.expect(tok_clpar, "`)`"); + + lhs = createV(lhs->loc, method_name, lhs, createV(loc, std::move(args))); } return lhs; } -// parse [ ~ | - | + ] E +// parse ! ~ - + E (unary) static AnyV parse_expr75(Lexer& lex) { TokenType t = lex.tok(); - if (t == tok_bitwise_not || t == tok_minus || t == tok_plus) { + if (t == tok_logical_not || t == tok_bitwise_not || t == tok_minus || t == tok_plus) { SrcLocation loc = lex.cur_location(); std::string_view operator_name = lex.cur_str(); lex.next(); AnyV rhs = parse_expr75(lex); return createV(loc, operator_name, t, rhs); - } else { - return parse_expr80(lex); } + return parse_expr80(lex); } -// parse E { (* | / | % | /% | ^/ | ~/ | ^% | ~% ) E } +// parse E * / % ^/ ~/ E (left-to-right) static AnyV parse_expr30(Lexer& lex) { AnyV lhs = parse_expr75(lex); TokenType t = lex.tok(); - while (t == tok_mul || t == tok_div || t == tok_mod || t == tok_divmod || t == tok_divC || - t == tok_divR || t == tok_modC || t == tok_modR) { + while (t == tok_mul || t == tok_div || t == tok_mod || t == tok_divC || t == tok_divR) { SrcLocation loc = lex.cur_location(); std::string_view operator_name = lex.cur_str(); lex.next(); @@ -461,7 +474,7 @@ static AnyV parse_expr30(Lexer& lex) { return lhs; } -// parse E { (+ | -) E } +// parse E + - E (left-to-right) static AnyV parse_expr20(Lexer& lex) { AnyV lhs = parse_expr30(lex); TokenType t = lex.tok(); @@ -476,7 +489,7 @@ static AnyV parse_expr20(Lexer& lex) { return lhs; } -// parse E { ( << | >> | ~>> | ^>> ) E } +// parse E << >> ~>> ^>> E (left-to-right) static AnyV parse_expr17(Lexer& lex) { AnyV lhs = parse_expr20(lex); TokenType t = lex.tok(); @@ -492,7 +505,7 @@ static AnyV parse_expr17(Lexer& lex) { return lhs; } -// parse E [ (== | < | > | <= | >= | != | <=> ) E ] +// parse E == < > <= >= != <=> E (left-to-right) static AnyV parse_expr15(Lexer& lex) { AnyV lhs = parse_expr17(lex); TokenType t = lex.tok(); @@ -502,11 +515,14 @@ static AnyV parse_expr15(Lexer& lex) { lex.next(); AnyV rhs = parse_expr17(lex); lhs = createV(loc, operator_name, t, lhs, rhs); + if (t == tok_eq || t == tok_neq) { + lhs = maybe_replace_eq_null_with_isNull_call(lhs->as()); + } } return lhs; } -// parse E { ( & | `|` | ^ ) E } +// parse E & | ^ E (left-to-right) static AnyV parse_expr14(Lexer& lex) { AnyV lhs = parse_expr15(lex); TokenType t = lex.tok(); @@ -516,33 +532,36 @@ static AnyV parse_expr14(Lexer& lex) { lex.next(); AnyV rhs = parse_expr15(lex); diagnose_bitwise_precedence(loc, operator_name, lhs, rhs); + diagnose_and_or_precedence(loc, lhs, t, operator_name); lhs = createV(loc, operator_name, t, lhs, rhs); t = lex.tok(); } return lhs; } -// parse E [ ? E : E ] +// parse E && || E (left-to-right) static AnyV parse_expr13(Lexer& lex) { - AnyV res = parse_expr14(lex); - if (lex.tok() == tok_question) { + AnyV lhs = parse_expr14(lex); + TokenType t = lex.tok(); + while (t == tok_logical_and || t == tok_logical_or) { SrcLocation loc = lex.cur_location(); + std::string_view operator_name = lex.cur_str(); lex.next(); - AnyV when_true = parse_expr(lex); - lex.expect(tok_colon, "':'"); - AnyV when_false = parse_expr13(lex); - return createV(loc, res, when_true, when_false); + AnyV rhs = parse_expr14(lex); + diagnose_and_or_precedence(loc, lhs, t, operator_name); + lhs = createV(loc, operator_name, t, lhs, rhs); + t = lex.tok(); } - return res; + return lhs; } -// parse LE1 (= | += | -= | ... ) E2 +// parse E = += -= E and E ? E : E (right-to-left) static AnyV parse_expr10(Lexer& lex) { AnyV lhs = parse_expr13(lex); TokenType t = lex.tok(); - if (t == tok_set_plus || t == tok_set_minus || t == tok_set_mul || t == tok_set_div || t == tok_set_divR || t == tok_set_divC || - t == tok_set_mod || t == tok_set_modC || t == tok_set_modR || t == tok_set_lshift || t == tok_set_rshift || t == tok_set_rshiftC || - t == tok_set_rshiftR || t == tok_set_bitwise_and || t == tok_set_bitwise_or || t == tok_set_bitwise_xor || + if (t == tok_set_plus || t == tok_set_minus || t == tok_set_mul || t == tok_set_div || + t == tok_set_mod || t == tok_set_lshift || t == tok_set_rshift || + t == tok_set_bitwise_and || t == tok_set_bitwise_or || t == tok_set_bitwise_xor || t == tok_assign) { SrcLocation loc = lex.cur_location(); std::string_view operator_name = lex.cur_str(); @@ -550,6 +569,14 @@ static AnyV parse_expr10(Lexer& lex) { AnyV rhs = parse_expr10(lex); return createV(loc, operator_name, t, lhs, rhs); } + if (t == tok_question) { + SrcLocation loc = lex.cur_location(); + lex.next(); + AnyV when_true = parse_expr10(lex); + lex.expect(tok_colon, "`:`"); + AnyV when_false = parse_expr10(lex); + return createV(loc, lhs, when_true, when_false); + } return lhs; } @@ -557,100 +584,275 @@ AnyV parse_expr(Lexer& lex) { return parse_expr10(lex); } -static AnyV parse_return_stmt(Lexer& lex) { +AnyV parse_statement(Lexer& lex); + +static AnyV parse_var_declaration_lhs(Lexer& lex) { SrcLocation loc = lex.cur_location(); - lex.expect(tok_return, "'return'"); - AnyV child = parse_expr(lex); - lex.expect(tok_semicolon, "';'"); - return createV(loc, child); + if (lex.tok() == tok_oppar) { + lex.next(); + AnyV first = parse_var_declaration_lhs(lex); + if (lex.tok() == tok_clpar) { + lex.next(); + return createV(loc, first); + } + std::vector args(1, first); + while (lex.tok() == tok_comma) { + lex.next(); + args.push_back(parse_var_declaration_lhs(lex)); + } + lex.expect(tok_clpar, "`)`"); + return createV(loc, std::move(args)); + } + if (lex.tok() == tok_opbracket) { + lex.next(); + std::vector args(1, parse_var_declaration_lhs(lex)); + while (lex.tok() == tok_comma) { + lex.next(); + args.push_back(parse_var_declaration_lhs(lex)); + } + lex.expect(tok_clbracket, "`]`"); + return createV(loc, std::move(args)); + } + if (lex.tok() == tok_identifier) { + auto v_ident = createV(loc, lex.cur_str()); + TypeExpr* declared_type = nullptr; + bool marked_as_redef = false; + lex.next(); + if (lex.tok() == tok_colon) { + lex.next(); + declared_type = parse_type(lex, nullptr); + } else if (lex.tok() == tok_redef) { + lex.next(); + marked_as_redef = true; + } + return createV(loc, v_ident, declared_type, marked_as_redef); + } + if (lex.tok() == tok_underscore) { + TypeExpr* declared_type = nullptr; + lex.next(); + if (lex.tok() == tok_colon) { + lex.next(); + declared_type = parse_type(lex, nullptr); + } + return createV(loc, createV(loc), declared_type, false); + } + lex.unexpected("variable name"); } -AnyV parse_statement(Lexer& lex); +static AnyV parse_local_vars_declaration(Lexer& lex) { + SrcLocation loc = lex.cur_location(); + bool immutable = lex.tok() == tok_val; + lex.next(); + + if (immutable) { + lex.error("immutable variables are not supported yet"); + } + + AnyV lhs = parse_var_declaration_lhs(lex); + if (lex.tok() != tok_assign) { + lex.error("variables declaration must be followed by assignment: `var xxx = ...`"); + } + lex.next(); + AnyV assigned_val = parse_expr(lex); + + if (lex.tok() == tok_comma) { + lex.error("multiple declarations are not allowed, split variables on separate lines"); + } + lex.expect(tok_semicolon, "`;`"); + return createV(loc, lhs, assigned_val); +} static V parse_sequence(Lexer& lex) { SrcLocation loc = lex.cur_location(); - lex.expect(tok_opbrace, "'{'"); + lex.expect(tok_opbrace, "`{`"); std::vector items; while (lex.tok() != tok_clbrace) { items.push_back(parse_statement(lex)); } SrcLocation loc_end = lex.cur_location(); - lex.expect(tok_clbrace, "'}'"); + lex.expect(tok_clbrace, "`}`"); return createV(loc, loc_end, items); } +static AnyV parse_return_statement(Lexer& lex) { + SrcLocation loc = lex.cur_location(); + lex.expect(tok_return, "`return`"); + AnyV child = lex.tok() == tok_semicolon // `return;` actually means `return ();` (which is void) + ? createV(lex.cur_location(), {}) + : parse_expr(lex); + lex.expect(tok_semicolon, "`;`"); + return createV(loc, child); +} + +static AnyV parse_if_statement(Lexer& lex, bool is_ifnot) { + SrcLocation loc = lex.cur_location(); + lex.expect(tok_if, "`if`"); + + lex.expect(tok_oppar, "`(`"); + AnyV cond = parse_expr(lex); + lex.expect(tok_clpar, "`)`"); + // replace if(!expr) with ifnot(expr) (this should be done later, but for now, let this be right at parsing time) + if (auto v_not = cond->try_as(); v_not && v_not->tok == tok_logical_not) { + is_ifnot = !is_ifnot; + cond = v_not->get_rhs(); + } + + V if_body = parse_sequence(lex); + V else_body = nullptr; + if (lex.tok() == tok_else) { // else if(e) { } or else { } + lex.next(); + if (lex.tok() == tok_if) { + AnyV v_inner_if = parse_if_statement(lex, false); + else_body = createV(v_inner_if->loc, lex.cur_location(), {v_inner_if}); + } else { + else_body = parse_sequence(lex); + } + } else { // no 'else', create empty block + else_body = createV(lex.cur_location(), lex.cur_location(), {}); + } + return createV(loc, is_ifnot, cond, if_body, else_body); +} + static AnyV parse_repeat_statement(Lexer& lex) { SrcLocation loc = lex.cur_location(); - lex.expect(tok_repeat, "'repeat'"); + lex.expect(tok_repeat, "`repeat`"); + lex.expect(tok_oppar, "`(`"); AnyV cond = parse_expr(lex); + lex.expect(tok_clpar, "`)`"); V body = parse_sequence(lex); return createV(loc, cond, body); } static AnyV parse_while_statement(Lexer& lex) { SrcLocation loc = lex.cur_location(); - lex.expect(tok_while, "'while'"); + lex.expect(tok_while, "`while`"); + lex.expect(tok_oppar, "`(`"); AnyV cond = parse_expr(lex); + lex.expect(tok_clpar, "`)`"); V body = parse_sequence(lex); return createV(loc, cond, body); } -static AnyV parse_do_until_statement(Lexer& lex) { +static AnyV parse_do_while_statement(Lexer& lex) { SrcLocation loc = lex.cur_location(); - lex.expect(tok_do, "'do'"); + lex.expect(tok_do, "`do`"); V body = parse_sequence(lex); - lex.expect(tok_until, "'until'"); + lex.expect(tok_while, "`while`"); + lex.expect(tok_oppar, "`(`"); AnyV cond = parse_expr(lex); - return createV(loc, body, cond); + lex.expect(tok_clpar, "`)`"); + lex.expect(tok_semicolon, "`;`"); + return createV(loc, body, cond); } -static AnyV parse_try_catch_statement(Lexer& lex) { +static AnyV parse_catch_variable(Lexer& lex) { SrcLocation loc = lex.cur_location(); - lex.expect(tok_try, "'try'"); - V try_body = parse_sequence(lex); - lex.expect(tok_catch, "'catch'"); - AnyV catch_expr = parse_expr(lex); - V catch_body = parse_sequence(lex); - return createV(loc, try_body, catch_expr, catch_body); + if (lex.tok() == tok_identifier) { + std::string_view var_name = lex.cur_str(); + lex.next(); + return createV(loc, var_name); + } + if (lex.tok() == tok_underscore) { + lex.next(); + return createV(loc); + } + lex.unexpected("identifier"); } -static AnyV parse_if_statement(Lexer& lex, bool is_ifnot) { +static AnyV parse_throw_statement(Lexer& lex) { SrcLocation loc = lex.cur_location(); - lex.next(); + lex.expect(tok_throw, "`throw`"); + + AnyV thrown_code, thrown_arg; + if (lex.tok() == tok_oppar) { // throw (code) or throw (code, arg) + lex.next(); + thrown_code = parse_expr(lex); + if (lex.tok() == tok_comma) { + lex.next(); + thrown_arg = parse_expr(lex); + } else { + thrown_arg = createV(loc); + } + lex.expect(tok_clpar, "`)`"); + } else { // throw code + thrown_code = parse_expr(lex); + thrown_arg = createV(loc); + } + + lex.expect(tok_semicolon, "`;`"); + return createV(loc, thrown_code, thrown_arg); +} + +static AnyV parse_assert_statement(Lexer& lex) { + SrcLocation loc = lex.cur_location(); + lex.expect(tok_assert, "`assert`"); + + lex.expect(tok_oppar, "`(`"); AnyV cond = parse_expr(lex); - V if_body = parse_sequence(lex); - V else_body = nullptr; - if (lex.tok() == tok_else) { + AnyV thrown_code; + if (lex.tok() == tok_comma) { // assert(cond, code) lex.next(); - else_body = parse_sequence(lex); - } else if (lex.tok() == tok_elseif) { - AnyV v_inner_if = parse_if_statement(lex, false); - else_body = createV(v_inner_if->loc, lex.cur_location(), {v_inner_if}); - } else if (lex.tok() == tok_elseifnot) { - AnyV v_inner_if = parse_if_statement(lex, true); - else_body = createV(v_inner_if->loc, lex.cur_location(), {v_inner_if}); - } else { - else_body = createV(lex.cur_location(), lex.cur_location(), {}); + thrown_code = parse_expr(lex); + lex.expect(tok_clpar, "`)`"); + } else { // assert(cond) throw code + lex.expect(tok_clpar, "`)`"); + lex.expect(tok_throw, "`throw excNo` after assert"); + thrown_code = parse_expr(lex); } - return createV(loc, is_ifnot, cond, if_body, else_body); + + lex.expect(tok_semicolon, "`;`"); + return createV(loc, cond, thrown_code); +} + +static AnyV parse_try_catch_statement(Lexer& lex) { + SrcLocation loc = lex.cur_location(); + lex.expect(tok_try, "`try`"); + V try_body = parse_sequence(lex); + + std::vector catch_args; + lex.expect(tok_catch, "`catch`"); + SrcLocation catch_loc = lex.cur_location(); + if (lex.tok() == tok_oppar) { + lex.next(); + catch_args.push_back(parse_catch_variable(lex)); + if (lex.tok() == tok_comma) { // catch (excNo, arg) + lex.next(); + catch_args.push_back(parse_catch_variable(lex)); + } else { // catch (excNo) -> catch (excNo, _) + catch_args.push_back(createV(catch_loc)); + } + lex.expect(tok_clpar, "`)`"); + } else { // catch -> catch (_, _) + catch_args.push_back(createV(catch_loc)); + catch_args.push_back(createV(catch_loc)); + } + V catch_expr = createV(catch_loc, std::move(catch_args)); + + V catch_body = parse_sequence(lex); + return createV(loc, try_body, catch_expr, catch_body); } AnyV parse_statement(Lexer& lex) { switch (lex.tok()) { - case tok_return: - return parse_return_stmt(lex); + case tok_var: + case tok_val: + return parse_local_vars_declaration(lex); case tok_opbrace: return parse_sequence(lex); - case tok_repeat: - return parse_repeat_statement(lex); + case tok_return: + return parse_return_statement(lex); case tok_if: return parse_if_statement(lex, false); - case tok_ifnot: - return parse_if_statement(lex, true); + case tok_repeat: + return parse_repeat_statement(lex); case tok_do: - return parse_do_until_statement(lex); + return parse_do_while_statement(lex); case tok_while: return parse_while_statement(lex); + case tok_throw: + return parse_throw_statement(lex); + case tok_assert: + return parse_assert_statement(lex); case tok_try: return parse_try_catch_statement(lex); case tok_semicolon: { @@ -658,9 +860,12 @@ AnyV parse_statement(Lexer& lex) { lex.next(); return createV(loc); } + case tok_break: + case tok_continue: + lex.error("break/continue from loops are not supported yet"); default: { AnyV expr = parse_expr(lex); - lex.expect(tok_semicolon, "';'"); + lex.expect(tok_semicolon, "`;`"); return expr; } } @@ -670,25 +875,25 @@ static AnyV parse_func_body(Lexer& lex) { return parse_sequence(lex); } -static AnyV parse_asm_func_body(Lexer& lex, V arg_list) { +static AnyV parse_asm_func_body(Lexer& lex, V param_list) { SrcLocation loc = lex.cur_location(); - lex.expect(tok_asm, "'asm'"); - size_t n_args = arg_list->size(); - if (n_args > 16) { + lex.expect(tok_asm, "`asm`"); + size_t n_params = param_list->size(); + if (n_params > 16) { throw ParseError{loc, "assembler built-in function can have at most 16 arguments"}; } std::vector arg_order, ret_order; if (lex.tok() == tok_oppar) { lex.next(); while (lex.tok() == tok_identifier || lex.tok() == tok_int_const) { - int arg_idx = arg_list->lookup_idx(lex.cur_str()); + int arg_idx = param_list->lookup_idx(lex.cur_str()); if (arg_idx == -1) { - lex.error("argument name expected"); + lex.unexpected("argument name"); } arg_order.push_back(arg_idx); lex.next(); } - if (lex.tok() == tok_mapsto) { + if (lex.tok() == tok_arrow) { lex.next(); while (lex.tok() == tok_int_const) { int ret_idx = std::atoi(static_cast(lex.cur_str()).c_str()); @@ -696,7 +901,7 @@ static AnyV parse_asm_func_body(Lexer& lex, V arg_list) { lex.next(); } } - lex.expect(tok_clpar, "')'"); + lex.expect(tok_clpar, "`)`"); } std::vector asm_commands; lex.check(tok_string_const, "\"ASM COMMAND\""); @@ -705,142 +910,184 @@ static AnyV parse_asm_func_body(Lexer& lex, V arg_list) { asm_commands.push_back(createV(lex.cur_location(), asm_command, 0)); lex.next(); } - lex.expect(tok_semicolon, "';'"); + lex.expect(tok_semicolon, "`;`"); return createV(loc, std::move(arg_order), std::move(ret_order), std::move(asm_commands)); } -static AnyV parse_forall(Lexer& lex) { +static AnyV parse_genericsT_list(Lexer& lex) { SrcLocation loc = lex.cur_location(); - std::vector forall_items; - lex.expect(tok_forall, "'forall'"); + std::vector genericsT_items; + lex.expect(tok_lt, "`<`"); int idx = 0; while (true) { - lex.check(tok_identifier, "T expected"); + lex.check(tok_identifier, "T"); std::string_view nameT = lex.cur_str(); TypeExpr* type = TypeExpr::new_var(idx++); - forall_items.emplace_back(createV(lex.cur_location(), type, static_cast(nameT))); + genericsT_items.emplace_back(createV(lex.cur_location(), type, nameT)); lex.next(); if (lex.tok() != tok_comma) { break; } lex.next(); } - lex.expect(tok_mapsto, "'->'"); - return createV{loc, std::move(forall_items)}; + lex.expect(tok_gt, "`>`"); + return createV{loc, std::move(genericsT_items)}; } -static AnyV parse_function_declaration(Lexer& lex) { +static V parse_annotation(Lexer& lex) { SrcLocation loc = lex.cur_location(); - V forall_list = nullptr; - bool is_get_method = false; - bool is_builtin = false; - bool marked_as_inline = false; - bool marked_as_inline_ref = false; - if (lex.tok() == tok_forall) { - forall_list = parse_forall(lex)->as(); - } else if (lex.tok() == tok_get) { - is_get_method = true; + lex.check(tok_annotation_at, "`@`"); + std::string_view name = lex.cur_str(); + AnnotationKind kind = Vertex::parse_kind(name); + lex.next(); + + V v_arg = nullptr; + if (lex.tok() == tok_oppar) { + SrcLocation loc_args = lex.cur_location(); lex.next(); + std::vector args; + args.push_back(parse_expr(lex)); + while (lex.tok() == tok_comma) { + lex.next(); + args.push_back(parse_expr(lex)); + } + lex.expect(tok_clpar, "`)`"); + v_arg = createV(loc_args, std::move(args)); } - TypeExpr* ret_type = parse_type(lex, forall_list); - lex.check(tok_identifier, "function name identifier expected"); - auto v_ident = createV(lex.cur_location(), lex.cur_str()); + + switch (kind) { + case AnnotationKind::unknown: + throw ParseError(loc, "unknown annotation " + static_cast(name)); + case AnnotationKind::inline_simple: + case AnnotationKind::inline_ref: + case AnnotationKind::pure: + case AnnotationKind::deprecated: + if (v_arg) { + throw ParseError(v_arg->loc, "arguments aren't allowed for " + static_cast(name)); + } + v_arg = createV(loc, {}); + break; + case AnnotationKind::method_id: + if (!v_arg || v_arg->size() != 1 || v_arg->get_item(0)->type != ast_int_const) { + throw ParseError(loc, "expecting `(number)` after " + static_cast(name)); + } + break; + } + + return createV(loc, kind, v_arg); +} + +static AnyV parse_function_declaration(Lexer& lex, const std::vector>& annotations) { + SrcLocation loc = lex.cur_location(); + bool is_get_method = lex.tok() == tok_get; lex.next(); - V arg_list = parse_argument_list(lex, forall_list)->as(); - bool marked_as_pure = false; - if (lex.tok() == tok_impure) { - static bool warning_shown = false; - if (!warning_shown) { - lex.cur_location().show_warning("`impure` specifier is deprecated. All functions are impure by default, use `pure` to mark a function as pure"); - warning_shown = true; - } - lex.next(); - } else if (lex.tok() == tok_pure) { - marked_as_pure = true; - lex.next(); + if (is_get_method && lex.tok() == tok_fun) { + lex.next(); // 'get f()' and 'get fun f()' both correct } - if (lex.tok() == tok_inline) { - marked_as_inline = true; - lex.next(); - } else if (lex.tok() == tok_inlineref) { - marked_as_inline_ref = true; - lex.next(); + + lex.check(tok_identifier, "function name identifier"); + + std::string_view f_name = lex.cur_str(); + bool is_entrypoint = + f_name == "main" || f_name == "onInternalMessage" || f_name == "onExternalMessage" || + f_name == "onRunTickTock" || f_name == "onSplitPrepare" || f_name == "onSplitInstall"; + bool is_FunC_entrypoint = + f_name == "recv_internal" || f_name == "recv_external" || + f_name == "run_ticktock" || f_name == "split_prepare" || f_name == "split_install"; + if (is_FunC_entrypoint) { + lex.error("this is a reserved FunC/Fift identifier; you need `onInternalMessage`"); } - V method_id = nullptr; - if (lex.tok() == tok_method_id) { - if (is_get_method) { - lex.error("both `get` and `method_id` are not allowed"); - } + + auto v_ident = createV(lex.cur_location(), f_name); + lex.next(); + + V genericsT_list = nullptr; + if (lex.tok() == tok_lt) { // 'fun f' + genericsT_list = parse_genericsT_list(lex)->as(); + } + + V param_list = parse_parameter_list(lex, genericsT_list)->as(); + + TypeExpr* ret_type = nullptr; + if (lex.tok() == tok_colon) { // : (if absent, it means "auto infer", not void) lex.next(); - if (lex.tok() == tok_oppar) { // method_id(N) - lex.next(); - lex.check(tok_int_const, "number"); - std::string_view int_val = lex.cur_str(); - method_id = createV(lex.cur_location(), int_val); - lex.next(); - lex.expect(tok_clpar, "')'"); - } else { - static bool warning_shown = false; - if (!warning_shown) { - lex.cur_location().show_warning("`method_id` specifier is deprecated, use `get` keyword.\nExample: `get int seqno() { ... }`"); - warning_shown = true; - } - is_get_method = true; - } + ret_type = parse_type(lex, genericsT_list); } - AnyV body = nullptr; + if (is_entrypoint && (is_get_method || genericsT_list || !annotations.empty())) { + throw ParseError(loc, "invalid declaration of a reserved function"); + } + + AnyV v_body = nullptr; if (lex.tok() == tok_builtin) { - is_builtin = true; - body = createV(lex.cur_location()); + v_body = createV(lex.cur_location()); lex.next(); - lex.expect(tok_semicolon, "';'"); + lex.expect(tok_semicolon, "`;`"); } else if (lex.tok() == tok_opbrace) { - body = parse_func_body(lex); + v_body = parse_func_body(lex); } else if (lex.tok() == tok_asm) { - body = parse_asm_func_body(lex, arg_list); + if (!ret_type) { + lex.error("asm function must specify return type"); + } + v_body = parse_asm_func_body(lex, param_list); } else { - lex.expect(tok_opbrace, "function body block"); + lex.unexpected("{ function body }"); } - auto f_declaration = createV(loc, v_ident, arg_list, body); - f_declaration->ret_type = ret_type; - f_declaration->forall_list = forall_list; - f_declaration->marked_as_pure = marked_as_pure; + auto f_declaration = createV(loc, v_ident, param_list, v_body); + f_declaration->ret_type = ret_type ? ret_type : TypeExpr::new_hole(); + f_declaration->is_entrypoint = is_entrypoint; + f_declaration->genericsT_list = genericsT_list; f_declaration->marked_as_get_method = is_get_method; - f_declaration->marked_as_builtin = is_builtin; - f_declaration->marked_as_inline = marked_as_inline; - f_declaration->marked_as_inline_ref = marked_as_inline_ref; - f_declaration->method_id = method_id; + f_declaration->marked_as_builtin = v_body->type == ast_empty; + + for (auto v_annotation : annotations) { + switch (v_annotation->kind) { + case AnnotationKind::inline_simple: + f_declaration->marked_as_inline = true; + break; + case AnnotationKind::inline_ref: + f_declaration->marked_as_inline_ref = true; + break; + case AnnotationKind::pure: + f_declaration->marked_as_pure = true; + break; + case AnnotationKind::method_id: + if (is_get_method || genericsT_list || is_entrypoint) { + v_annotation->error("@method_id can be specified only for regular functions"); + } + f_declaration->method_id = v_annotation->get_arg()->get_item(0)->as(); + break; + case AnnotationKind::deprecated: + // no special handling + break; + + default: + v_annotation->error("this annotation is not applicable to functions"); + } + } + return f_declaration; } -static AnyV parse_pragma(Lexer& lex) { +static AnyV parse_tolk_required_version(Lexer& lex) { SrcLocation loc = lex.cur_location(); - lex.next_special(tok_pragma_name, "pragma name"); - std::string_view pragma_name = lex.cur_str(); - if (pragma_name == "version") { - lex.next(); - TokenType cmp_tok = lex.tok(); - bool valid = cmp_tok == tok_gt || cmp_tok == tok_geq || cmp_tok == tok_lt || cmp_tok == tok_leq || cmp_tok == tok_eq || cmp_tok == tok_bitwise_xor; - if (!valid) { - lex.error("invalid comparison operator"); - } - lex.next_special(tok_semver, "semver"); - std::string_view semver = lex.cur_str(); - lex.next(); - lex.expect(tok_semicolon, "';'"); - return createV(loc, cmp_tok, semver); - } + lex.next_special(tok_semver, "semver"); // syntax: "tolk 0.6" + std::string semver = static_cast(lex.cur_str()); lex.next(); - lex.expect(tok_semicolon, "';'"); - return createV(loc, pragma_name); + + // for simplicity, there is no syntax ">= version" and so on, just strict compare + if (TOLK_VERSION != semver && TOLK_VERSION != semver + ".0") { // 0.6 = 0.6.0 + loc.show_warning("the contract is written in Tolk v" + semver + ", but you use Tolk compiler v" + TOLK_VERSION + "; probably, it will lead to compilation errors or hash changes"); + } + + return createV(loc, tok_eq, semver); // semicolon is not necessary } -static AnyV parse_include_statement(Lexer& lex) { +static AnyV parse_import_statement(Lexer& lex) { SrcLocation loc = lex.cur_location(); - lex.expect(tok_include, "#include"); + lex.expect(tok_import, "`import`"); lex.check(tok_string_const, "source file name"); std::string_view rel_filename = lex.cur_str(); if (rel_filename.empty()) { @@ -848,29 +1095,65 @@ static AnyV parse_include_statement(Lexer& lex) { } auto v_str = createV(lex.cur_location(), rel_filename, 0); lex.next(); - lex.expect(tok_semicolon, "';'"); - return createV(loc, v_str); + return createV(loc, v_str); // semicolon is not necessary } // the main (exported) function AnyV parse_src_file_to_ast(const SrcFile* file) { std::vector toplevel_declarations; + std::vector> annotations; Lexer lex(file); + while (!lex.is_eof()) { - if (lex.tok() == tok_pragma) { - toplevel_declarations.push_back(parse_pragma(lex)); - } else if (lex.tok() == tok_include) { - toplevel_declarations.push_back(parse_include_statement(lex)); - } else if (lex.tok() == tok_global) { - toplevel_declarations.push_back(parse_global_var_declaration_list(lex)); - } else if (lex.tok() == tok_const) { - toplevel_declarations.push_back(parse_constant_declaration_list(lex)); - } else if (lex.tok() == tok_semicolon) { - lex.next(); // don't add op_empty, no need - } else { - toplevel_declarations.push_back(parse_function_declaration(lex)); + switch (lex.tok()) { + case tok_tolk: + if (!annotations.empty()) { + lex.unexpected("declaration after @annotations"); + } + toplevel_declarations.push_back(parse_tolk_required_version(lex)); + break; + case tok_import: + if (!annotations.empty()) { + lex.unexpected("declaration after @annotations"); + } + toplevel_declarations.push_back(parse_import_statement(lex)); + break; + case tok_semicolon: + if (!annotations.empty()) { + lex.unexpected("declaration after @annotations"); + } + lex.next(); // don't add ast_empty, no need + break; + + case tok_annotation_at: + annotations.push_back(parse_annotation(lex)); + break; + case tok_global: + toplevel_declarations.push_back(parse_global_var_declaration(lex, annotations)); + annotations.clear(); + break; + case tok_const: + toplevel_declarations.push_back(parse_constant_declaration(lex, annotations)); + annotations.clear(); + break; + case tok_fun: + case tok_get: + toplevel_declarations.push_back(parse_function_declaration(lex, annotations)); + annotations.clear(); + break; + + case tok_export: + case tok_struct: + case tok_enum: + case tok_operator: + case tok_infix: + lex.error("`" + static_cast(lex.cur_str()) +"` is not supported yet"); + + default: + lex.unexpected("fun or get"); } } + return createV(file, std::move(toplevel_declarations)); } diff --git a/tolk/ast-replacer.h b/tolk/ast-replacer.h index feae56166..16a9f64c2 100644 --- a/tolk/ast-replacer.h +++ b/tolk/ast-replacer.h @@ -71,18 +71,16 @@ class ASTReplacerInFunctionBody : public ASTReplacer { using parent = ASTReplacerInFunctionBody; virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } virtual AnyV replace(V v) { return replace_children(v); } virtual AnyV replace(V v) { return replace_children(v); } virtual AnyV replace(V v) { return replace_children(v); } virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } virtual AnyV replace(V v) { return replace_children(v); } virtual AnyV replace(V v) { return replace_children(v); } virtual AnyV replace(V v) { return replace_children(v); } @@ -91,26 +89,28 @@ class ASTReplacerInFunctionBody : public ASTReplacer { virtual AnyV replace(V v) { return replace_children(v); } virtual AnyV replace(V v) { return replace_children(v); } virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } virtual AnyV replace(V v) { return replace_children(v); } virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } virtual AnyV replace(V v) { return replace_children(v); } AnyV replace(AnyV v) final { switch (v->type) { case ast_empty: return replace(v->as()); + case ast_parenthesized_expr: return replace(v->as()); + case ast_tensor: return replace(v->as()); + case ast_tensor_square: return replace(v->as()); case ast_identifier: return replace(v->as()); case ast_int_const: return replace(v->as()); case ast_string_const: return replace(v->as()); case ast_bool_const: return replace(v->as()); - case ast_nil_tuple: return replace(v->as()); + case ast_null_keyword: return replace(v->as()); case ast_function_call: return replace(v->as()); - case ast_parenthesized_expr: return replace(v->as()); case ast_underscore: return replace(v->as()); - case ast_type_expression: return replace(v->as()); - case ast_variable_declaration: return replace(v->as()); - case ast_tensor: return replace(v->as()); - case ast_tensor_square: return replace(v->as()); case ast_dot_tilde_call: return replace(v->as()); case ast_unary_operator: return replace(v->as()); case ast_binary_operator: return replace(v->as()); @@ -119,9 +119,13 @@ class ASTReplacerInFunctionBody : public ASTReplacer { case ast_sequence: return replace(v->as()); case ast_repeat_statement: return replace(v->as()); case ast_while_statement: return replace(v->as()); - case ast_do_until_statement: return replace(v->as()); + case ast_do_while_statement: return replace(v->as()); + case ast_throw_statement: return replace(v->as()); + case ast_assert_statement: return replace(v->as()); case ast_try_catch_statement: return replace(v->as()); case ast_if_statement: return replace(v->as()); + case ast_local_var: return replace(v->as()); + case ast_local_vars_declaration: return replace(v->as()); case ast_asm_body: return replace(v->as()); default: throw UnexpectedASTNodeType(v, "ASTReplacerInFunctionBody::visit"); diff --git a/tolk/ast-stringifier.h b/tolk/ast-stringifier.h index fcd1f36cc..cabda4990 100644 --- a/tolk/ast-stringifier.h +++ b/tolk/ast-stringifier.h @@ -32,22 +32,18 @@ namespace tolk { class ASTStringifier final : public ASTVisitor { constexpr static std::pair name_pairs[] = { {ast_empty, "ast_empty"}, + {ast_parenthesized_expr, "ast_parenthesized_expr"}, + {ast_tensor, "ast_tensor"}, + {ast_tensor_square, "ast_tensor_square"}, {ast_identifier, "ast_identifier"}, {ast_int_const, "ast_int_const"}, {ast_string_const, "ast_string_const"}, {ast_bool_const, "ast_bool_const"}, - {ast_nil_tuple, "ast_nil_tuple"}, + {ast_null_keyword, "ast_null_keyword"}, {ast_function_call, "ast_function_call"}, - {ast_parenthesized_expr, "ast_parenthesized_expr"}, {ast_global_var_declaration, "ast_global_var_declaration"}, - {ast_global_var_declaration_list, "ast_global_var_declaration_list"}, {ast_constant_declaration, "ast_constant_declaration"}, - {ast_constant_declaration_list, "ast_constant_declaration_list"}, {ast_underscore, "ast_underscore"}, - {ast_type_expression, "ast_type_expression"}, - {ast_variable_declaration, "ast_variable_declaration"}, - {ast_tensor, "ast_tensor"}, - {ast_tensor_square, "ast_tensor_square"}, {ast_dot_tilde_call, "ast_dot_tilde_call"}, {ast_unary_operator, "ast_unary_operator"}, {ast_binary_operator, "ast_binary_operator"}, @@ -56,24 +52,39 @@ class ASTStringifier final : public ASTVisitor { {ast_sequence, "ast_sequence"}, {ast_repeat_statement, "ast_repeat_statement"}, {ast_while_statement, "ast_while_statement"}, - {ast_do_until_statement, "ast_do_until_statement"}, + {ast_do_while_statement, "ast_do_while_statement"}, + {ast_throw_statement, "ast_throw_statement"}, + {ast_assert_statement, "ast_assert_statement"}, {ast_try_catch_statement, "ast_try_catch_statement"}, {ast_if_statement, "ast_if_statement"}, - {ast_forall_item, "ast_forall_item"}, - {ast_forall_list, "ast_forall_list"}, - {ast_argument, "ast_argument"}, - {ast_argument_list, "ast_argument_list"}, + {ast_genericsT_item, "ast_genericsT_item"}, + {ast_genericsT_list, "ast_genericsT_list"}, + {ast_parameter, "ast_parameter"}, + {ast_parameter_list, "ast_parameter_list"}, {ast_asm_body, "ast_asm_body"}, + {ast_annotation, "ast_annotation"}, {ast_function_declaration, "ast_function_declaration"}, - {ast_pragma_no_arg, "ast_pragma_no_arg"}, - {ast_pragma_version, "ast_pragma_version"}, - {ast_include_statement, "ast_include_statement"}, + {ast_local_var, "ast_local_var"}, + {ast_local_vars_declaration, "ast_local_vars_declaration"}, + {ast_tolk_required_version, "ast_tolk_required_version"}, + {ast_import_statement, "ast_import_statement"}, {ast_tolk_file, "ast_tolk_file"}, }; + static_assert(std::size(name_pairs) == ast_tolk_file + 1, "name_pairs needs to be updated"); + + constexpr static std::pair annotation_kinds[] = { + {AnnotationKind::inline_simple, "@inline"}, + {AnnotationKind::inline_ref, "@inline_ref"}, + {AnnotationKind::method_id, "@method_id"}, + {AnnotationKind::pure, "@pure"}, + {AnnotationKind::deprecated, "@deprecated"}, + }; + + static_assert(std::size(annotation_kinds) == static_cast(AnnotationKind::unknown), "annotation_kinds needs to be updated"); + template constexpr static const char* ast_node_type_to_string() { - static_assert(std::size(name_pairs) == ast_tolk_file + 1, "name_pairs needs to be updated"); return name_pairs[node_type].second; } @@ -118,16 +129,6 @@ class ASTStringifier final : public ASTVisitor { return static_cast(v->as()->get_identifier()->name); case ast_constant_declaration: return static_cast(v->as()->get_identifier()->name); - case ast_type_expression: { - std::ostringstream os; - os << v->as()->declared_type; - return os.str(); - } - case ast_variable_declaration: { - std::ostringstream os; - os << v->as()->declared_type; - return os.str(); - } case ast_dot_tilde_call: return static_cast(v->as()->method_name); case ast_unary_operator: @@ -138,26 +139,34 @@ class ASTStringifier final : public ASTVisitor { return "↓" + std::to_string(v->as()->get_items().size()); case ast_if_statement: return v->as()->is_ifnot ? "ifnot" : ""; - case ast_argument: { + case ast_annotation: + return annotation_kinds[static_cast(v->as()->kind)].second; + case ast_parameter: { std::ostringstream os; - os << v->as()->arg_type; - return static_cast(v->as()->get_identifier()->name) + ": " + os.str(); + os << v->as()->param_type; + return static_cast(v->as()->get_identifier()->name) + ": " + os.str(); } case ast_function_declaration: { - std::string arg_names; - for (int i = 0; i < v->as()->get_num_args(); i++) { - if (!arg_names.empty()) - arg_names += ","; - arg_names += v->as()->get_arg(i)->get_identifier()->name; + std::string param_names; + for (int i = 0; i < v->as()->get_num_params(); i++) { + if (!param_names.empty()) + param_names += ","; + param_names += v->as()->get_param(i)->get_identifier()->name; + } + return "fun " + static_cast(v->as()->get_identifier()->name) + "(" + param_names + ")"; + } + case ast_local_var: { + std::ostringstream os; + os << v->as()->declared_type; + if (auto v_ident = v->as()->get_identifier()->try_as()) { + return static_cast(v_ident->name) + ":" + os.str(); } - return "fun " + static_cast(v->as()->get_identifier()->name) + "(" + arg_names + ")"; + return "_: " + os.str(); } - case ast_pragma_no_arg: - return static_cast(v->as()->pragma_name); - case ast_pragma_version: - return static_cast(v->as()->semver); - case ast_include_statement: - return static_cast(v->as()->get_file_leaf()->str_val); + case ast_tolk_required_version: + return static_cast(v->as()->semver); + case ast_import_statement: + return static_cast(v->as()->get_file_leaf()->str_val); case ast_tolk_file: return v->as()->file->rel_filename; default: @@ -191,22 +200,18 @@ class ASTStringifier final : public ASTVisitor { void visit(AnyV v) override { switch (v->type) { case ast_empty: return handle_vertex(v->as()); + case ast_parenthesized_expr: return handle_vertex(v->as()); + case ast_tensor: return handle_vertex(v->as()); + case ast_tensor_square: return handle_vertex(v->as()); case ast_identifier: return handle_vertex(v->as()); case ast_int_const: return handle_vertex(v->as()); case ast_string_const: return handle_vertex(v->as()); case ast_bool_const: return handle_vertex(v->as()); - case ast_nil_tuple: return handle_vertex(v->as()); + case ast_null_keyword: return handle_vertex(v->as()); case ast_function_call: return handle_vertex(v->as()); - case ast_parenthesized_expr: return handle_vertex(v->as()); case ast_global_var_declaration: return handle_vertex(v->as()); - case ast_global_var_declaration_list: return handle_vertex(v->as()); case ast_constant_declaration: return handle_vertex(v->as()); - case ast_constant_declaration_list: return handle_vertex(v->as()); case ast_underscore: return handle_vertex(v->as()); - case ast_type_expression: return handle_vertex(v->as()); - case ast_variable_declaration: return handle_vertex(v->as()); - case ast_tensor: return handle_vertex(v->as()); - case ast_tensor_square: return handle_vertex(v->as()); case ast_dot_tilde_call: return handle_vertex(v->as()); case ast_unary_operator: return handle_vertex(v->as()); case ast_binary_operator: return handle_vertex(v->as()); @@ -215,18 +220,22 @@ class ASTStringifier final : public ASTVisitor { case ast_sequence: return handle_vertex(v->as()); case ast_repeat_statement: return handle_vertex(v->as()); case ast_while_statement: return handle_vertex(v->as()); - case ast_do_until_statement: return handle_vertex(v->as()); + case ast_do_while_statement: return handle_vertex(v->as()); + case ast_throw_statement: return handle_vertex(v->as()); + case ast_assert_statement: return handle_vertex(v->as()); case ast_try_catch_statement: return handle_vertex(v->as()); case ast_if_statement: return handle_vertex(v->as()); - case ast_forall_item: return handle_vertex(v->as()); - case ast_forall_list: return handle_vertex(v->as()); - case ast_argument: return handle_vertex(v->as()); - case ast_argument_list: return handle_vertex(v->as()); + case ast_genericsT_item: return handle_vertex(v->as()); + case ast_genericsT_list: return handle_vertex(v->as()); + case ast_parameter: return handle_vertex(v->as()); + case ast_parameter_list: return handle_vertex(v->as()); case ast_asm_body: return handle_vertex(v->as()); + case ast_annotation: return handle_vertex(v->as()); case ast_function_declaration: return handle_vertex(v->as()); - case ast_pragma_no_arg: return handle_vertex(v->as()); - case ast_pragma_version: return handle_vertex(v->as()); - case ast_include_statement: return handle_vertex(v->as()); + case ast_local_var: return handle_vertex(v->as()); + case ast_local_vars_declaration: return handle_vertex(v->as()); + case ast_tolk_required_version: return handle_vertex(v->as()); + case ast_import_statement: return handle_vertex(v->as()); case ast_tolk_file: return handle_vertex(v->as()); default: throw UnexpectedASTNodeType(v, "ASTStringifier::visit"); diff --git a/tolk/ast-visitor.h b/tolk/ast-visitor.h index d1c38b9e9..6fe9ed5d9 100644 --- a/tolk/ast-visitor.h +++ b/tolk/ast-visitor.h @@ -67,18 +67,16 @@ class ASTVisitorFunctionBody : public ASTVisitor { using parent = ASTVisitorFunctionBody; virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } - virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } - virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } - virtual void visit(V v) { return visit_children(v); } - virtual void visit(V v) { return visit_children(v); } - virtual void visit(V v) { return visit_children(v); } - virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } @@ -87,26 +85,26 @@ class ASTVisitorFunctionBody : public ASTVisitor { virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } - virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } void visit(AnyV v) final { switch (v->type) { case ast_empty: return visit(v->as()); + case ast_parenthesized_expr: return visit(v->as()); + case ast_tensor: return visit(v->as()); + case ast_tensor_square: return visit(v->as()); case ast_identifier: return visit(v->as()); case ast_int_const: return visit(v->as()); case ast_string_const: return visit(v->as()); case ast_bool_const: return visit(v->as()); - case ast_nil_tuple: return visit(v->as()); + case ast_null_keyword: return visit(v->as()); case ast_function_call: return visit(v->as()); - case ast_parenthesized_expr: return visit(v->as()); case ast_underscore: return visit(v->as()); - case ast_type_expression: return visit(v->as()); - case ast_variable_declaration: return visit(v->as()); - case ast_tensor: return visit(v->as()); - case ast_tensor_square: return visit(v->as()); case ast_dot_tilde_call: return visit(v->as()); case ast_unary_operator: return visit(v->as()); case ast_binary_operator: return visit(v->as()); @@ -115,9 +113,13 @@ class ASTVisitorFunctionBody : public ASTVisitor { case ast_sequence: return visit(v->as()); case ast_repeat_statement: return visit(v->as()); case ast_while_statement: return visit(v->as()); - case ast_do_until_statement: return visit(v->as()); + case ast_do_while_statement: return visit(v->as()); + case ast_throw_statement: return visit(v->as()); + case ast_assert_statement: return visit(v->as()); case ast_try_catch_statement: return visit(v->as()); case ast_if_statement: return visit(v->as()); + case ast_local_var: return visit(v->as()); + case ast_local_vars_declaration: return visit(v->as()); case ast_asm_body: return visit(v->as()); default: throw UnexpectedASTNodeType(v, "ASTVisitorFunctionBody::visit"); diff --git a/tolk/ast.cpp b/tolk/ast.cpp index 123dd8963..f0506ef48 100644 --- a/tolk/ast.cpp +++ b/tolk/ast.cpp @@ -49,25 +49,44 @@ void ASTNodeBase::error(const std::string& err_msg) const { throw ParseError(loc, err_msg); } -int Vertex::lookup_idx(std::string_view nameT) const { +AnnotationKind Vertex::parse_kind(std::string_view name) { + if (name == "@pure") { + return AnnotationKind::pure; + } + if (name == "@inline") { + return AnnotationKind::inline_simple; + } + if (name == "@inline_ref") { + return AnnotationKind::inline_ref; + } + if (name == "@method_id") { + return AnnotationKind::method_id; + } + if (name == "@deprecated") { + return AnnotationKind::deprecated; + } + return AnnotationKind::unknown; +} + +int Vertex::lookup_idx(std::string_view nameT) const { for (size_t idx = 0; idx < children.size(); ++idx) { - if (children[idx] && children[idx]->as()->nameT == nameT) { + if (children[idx] && children[idx]->as()->nameT == nameT) { return static_cast(idx); } } return -1; } -int Vertex::lookup_idx(std::string_view arg_name) const { +int Vertex::lookup_idx(std::string_view param_name) const { for (size_t idx = 0; idx < children.size(); ++idx) { - if (children[idx] && children[idx]->as()->get_identifier()->name == arg_name) { + if (children[idx] && children[idx]->as()->get_identifier()->name == param_name) { return static_cast(idx); } } return -1; } -void Vertex::mutate_set_src_file(const SrcFile* file) const { +void Vertex::mutate_set_src_file(const SrcFile* file) const { const_cast(this)->file = file; } diff --git a/tolk/ast.h b/tolk/ast.h index 12b7da93b..a233f09d3 100644 --- a/tolk/ast.h +++ b/tolk/ast.h @@ -60,22 +60,18 @@ namespace tolk { enum ASTNodeType { ast_empty, + ast_parenthesized_expr, + ast_tensor, + ast_tensor_square, ast_identifier, ast_int_const, ast_string_const, ast_bool_const, - ast_nil_tuple, + ast_null_keyword, ast_function_call, - ast_parenthesized_expr, ast_global_var_declaration, - ast_global_var_declaration_list, ast_constant_declaration, - ast_constant_declaration_list, ast_underscore, - ast_type_expression, - ast_variable_declaration, - ast_tensor, - ast_tensor_square, ast_dot_tilde_call, ast_unary_operator, ast_binary_operator, @@ -84,21 +80,34 @@ enum ASTNodeType { ast_sequence, ast_repeat_statement, ast_while_statement, - ast_do_until_statement, + ast_do_while_statement, + ast_throw_statement, + ast_assert_statement, ast_try_catch_statement, ast_if_statement, - ast_forall_item, - ast_forall_list, - ast_argument, - ast_argument_list, + ast_genericsT_item, + ast_genericsT_list, + ast_parameter, + ast_parameter_list, ast_asm_body, + ast_annotation, ast_function_declaration, - ast_pragma_no_arg, - ast_pragma_version, - ast_include_statement, + ast_local_var, + ast_local_vars_declaration, + ast_tolk_required_version, + ast_import_statement, ast_tolk_file, }; +enum class AnnotationKind { + inline_simple, + inline_ref, + method_id, + pure, + deprecated, + unknown, +}; + struct ASTNodeBase; using AnyV = const ASTNodeBase*; @@ -210,6 +219,32 @@ struct Vertex final : ASTNodeLeaf { : ASTNodeLeaf(ast_empty, loc) {} }; +template<> +struct Vertex final : ASTNodeUnary { + AnyV get_expr() const { return child; } + + Vertex(SrcLocation loc, AnyV expr) + : ASTNodeUnary(ast_parenthesized_expr, loc, expr) {} +}; + +template<> +struct Vertex final : ASTNodeVararg { + const std::vector& get_items() const { return children; } + AnyV get_item(int i) const { return children.at(i); } + + Vertex(SrcLocation loc, std::vector items) + : ASTNodeVararg(ast_tensor, loc, std::move(items)) {} +}; + +template<> +struct Vertex final : ASTNodeVararg { + const std::vector& get_items() const { return children; } + AnyV get_item(int i) const { return children.at(i); } + + Vertex(SrcLocation loc, std::vector items) + : ASTNodeVararg(ast_tensor_square, loc, std::move(items)) {} +}; + template<> struct Vertex final : ASTNodeLeaf { std::string_view name; @@ -244,45 +279,29 @@ struct Vertex final : ASTNodeLeaf { }; template<> -struct Vertex final : ASTNodeLeaf { +struct Vertex final : ASTNodeLeaf { explicit Vertex(SrcLocation loc) - : ASTNodeLeaf(ast_nil_tuple, loc) {} + : ASTNodeLeaf(ast_null_keyword, loc) {} }; template<> struct Vertex final : ASTNodeBinary { // even for f(1,2,3), f (lhs) is called with a single arg (tensor "(1,2,3)") (rhs) AnyV get_called_f() const { return lhs; } - AnyV get_called_arg() const { return rhs; } + auto get_called_arg() const { return rhs->as(); } - Vertex(SrcLocation loc, AnyV lhs_f, AnyV arg) + Vertex(SrcLocation loc, AnyV lhs_f, V arg) : ASTNodeBinary(ast_function_call, loc, lhs_f, arg) {} }; -template<> -struct Vertex final : ASTNodeUnary { - AnyV get_expr() const { return child; } - - Vertex(SrcLocation loc, AnyV expr) - : ASTNodeUnary(ast_parenthesized_expr, loc, expr) {} -}; - template<> struct Vertex final : ASTNodeUnary { TypeExpr* declared_type; // may be nullptr auto get_identifier() const { return child->as(); } - Vertex(SrcLocation loc, V var_identifier, TypeExpr* declared_type) - : ASTNodeUnary(ast_global_var_declaration, loc, var_identifier), declared_type(declared_type) {} -}; - -template<> -struct Vertex final : ASTNodeVararg { - const std::vector& get_declarations() const { return children; } - - Vertex(SrcLocation loc, std::vector declarations) - : ASTNodeVararg(ast_global_var_declaration_list, loc, std::move(declarations)) {} + Vertex(SrcLocation loc, V name_identifier, TypeExpr* declared_type) + : ASTNodeUnary(ast_global_var_declaration, loc, name_identifier), declared_type(declared_type) {} }; template<> @@ -292,16 +311,8 @@ struct Vertex final : ASTNodeBinary { auto get_identifier() const { return lhs->as(); } AnyV get_init_value() const { return rhs; } - Vertex(SrcLocation loc, V const_identifier, TypeExpr* declared_type, AnyV init_value) - : ASTNodeBinary(ast_constant_declaration, loc, const_identifier, init_value), declared_type(declared_type) {} -}; - -template<> -struct Vertex final : ASTNodeVararg { - const std::vector& get_declarations() const { return children; } - - Vertex(SrcLocation loc, std::vector declarations) - : ASTNodeVararg(ast_constant_declaration_list, loc, std::move(declarations)) {} + Vertex(SrcLocation loc, V name_identifier, TypeExpr* declared_type, AnyV init_value) + : ASTNodeBinary(ast_constant_declaration, loc, name_identifier, init_value), declared_type(declared_type) {} }; template<> @@ -310,51 +321,15 @@ struct Vertex final : ASTNodeLeaf { : ASTNodeLeaf(ast_underscore, loc) {} }; -template<> -struct Vertex final : ASTNodeLeaf { - TypeExpr* declared_type; - - Vertex(SrcLocation loc, TypeExpr* declared_type) - : ASTNodeLeaf(ast_type_expression, loc), declared_type(declared_type) {} -}; - -template<> -struct Vertex final : ASTNodeUnary { - TypeExpr* declared_type; - - AnyV get_variable_or_list() const { return child; } // identifier, tuple, tensor - - Vertex(SrcLocation loc, TypeExpr* declared_type, AnyV dest) - : ASTNodeUnary(ast_variable_declaration, loc, dest), declared_type(declared_type) {} -}; - -template<> -struct Vertex final : ASTNodeVararg { - const std::vector& get_items() const { return children; } - AnyV get_item(int i) const { return children.at(i); } - - Vertex(SrcLocation loc, std::vector items) - : ASTNodeVararg(ast_tensor, loc, std::move(items)) {} -}; - -template<> -struct Vertex final : ASTNodeVararg { - const std::vector& get_items() const { return children; } - AnyV get_item(int i) const { return children.at(i); } - - Vertex(SrcLocation loc, std::vector items) - : ASTNodeVararg(ast_tensor_square, loc, std::move(items)) {} -}; - template<> struct Vertex final : ASTNodeBinary { std::string_view method_name; // starts with . or ~ AnyV get_lhs() const { return lhs; } - AnyV get_arg() const { return rhs; } + auto get_arg() const { return rhs->as(); } - Vertex(SrcLocation loc, std::string_view method_name, AnyV lhs, AnyV rhs) - : ASTNodeBinary(ast_dot_tilde_call, loc, lhs, rhs), method_name(method_name) {} + Vertex(SrcLocation loc, std::string_view method_name, AnyV lhs, V arg) + : ASTNodeBinary(ast_dot_tilde_call, loc, lhs, arg), method_name(method_name) {} }; template<> @@ -428,27 +403,46 @@ struct Vertex final : ASTNodeBinary { }; template<> -struct Vertex final : ASTNodeBinary { +struct Vertex final : ASTNodeBinary { auto get_body() const { return lhs->as(); } AnyV get_cond() const { return rhs; } Vertex(SrcLocation loc, V body, AnyV cond) - : ASTNodeBinary(ast_do_until_statement, loc, body, cond) {} + : ASTNodeBinary(ast_do_while_statement, loc, body, cond) {} +}; + +template<> +struct Vertex final : ASTNodeBinary { + AnyV get_thrown_code() const { return lhs; } + AnyV get_thrown_arg() const { return rhs; } // may be ast_empty + bool has_thrown_arg() const { return rhs->type != ast_empty; } + + Vertex(SrcLocation loc, AnyV thrown_code, AnyV thrown_arg) + : ASTNodeBinary(ast_throw_statement, loc, thrown_code, thrown_arg) {} +}; + +template<> +struct Vertex final : ASTNodeBinary { + AnyV get_cond() const { return lhs; } + AnyV get_thrown_code() const { return rhs; } + + Vertex(SrcLocation loc, AnyV cond, AnyV thrown_code) + : ASTNodeBinary(ast_assert_statement, loc, cond, thrown_code) {} }; template<> struct Vertex final : ASTNodeVararg { auto get_try_body() const { return children.at(0)->as(); } - AnyV get_catch_expr() const { return children.at(1); } // it's a tensor + auto get_catch_expr() const { return children.at(1)->as(); } // (excNo, arg), always len 2 auto get_catch_body() const { return children.at(2)->as(); } - Vertex(SrcLocation loc, V try_body, AnyV catch_expr, V catch_body) + Vertex(SrcLocation loc, V try_body, V catch_expr, V catch_body) : ASTNodeVararg(ast_try_catch_statement, loc, {try_body, catch_expr, catch_body}) {} }; template<> struct Vertex final : ASTNodeVararg { - bool is_ifnot; + bool is_ifnot; // if(!cond), to generate more optimal fift code AnyV get_cond() const { return children.at(0); } auto get_if_body() const { return children.at(1)->as(); } @@ -459,33 +453,44 @@ struct Vertex final : ASTNodeVararg { }; template<> -struct Vertex final : ASTNodeLeaf { +struct Vertex final : ASTNodeLeaf { TypeExpr* created_type; // used to keep same pointer, since TypeExpr::new_var(i) always allocates - std::string nameT; + std::string_view nameT; - Vertex(SrcLocation loc, TypeExpr* created_type, std::string nameT) - : ASTNodeLeaf(ast_forall_item, loc), created_type(created_type), nameT(std::move(nameT)) {} + Vertex(SrcLocation loc, TypeExpr* created_type, std::string_view nameT) + : ASTNodeLeaf(ast_genericsT_item, loc), created_type(created_type), nameT(nameT) {} }; template<> -struct Vertex final : ASTNodeVararg { +struct Vertex final : ASTNodeVararg { std::vector get_items() const { return children; } - auto get_item(int i) const { return children.at(i)->as(); } + auto get_item(int i) const { return children.at(i)->as(); } - Vertex(SrcLocation loc, std::vector forall_items) - : ASTNodeVararg(ast_forall_list, loc, std::move(forall_items)) {} + Vertex(SrcLocation loc, std::vector genericsT_items) + : ASTNodeVararg(ast_genericsT_list, loc, std::move(genericsT_items)) {} int lookup_idx(std::string_view nameT) const; }; template<> -struct Vertex final : ASTNodeUnary { - TypeExpr* arg_type; +struct Vertex final : ASTNodeUnary { + TypeExpr* param_type; - auto get_identifier() const { return child->as(); } + auto get_identifier() const { return child->as(); } // for underscore, its str_val is empty - Vertex(SrcLocation loc, V arg_identifier, TypeExpr* arg_type) - : ASTNodeUnary(ast_argument, loc, arg_identifier), arg_type(arg_type) {} + Vertex(SrcLocation loc, V name_identifier, TypeExpr* param_type) + : ASTNodeUnary(ast_parameter, loc, name_identifier), param_type(param_type) {} +}; + +template<> +struct Vertex final : ASTNodeVararg { + const std::vector& get_params() const { return children; } + auto get_param(int i) const { return children.at(i)->as(); } + + Vertex(SrcLocation loc, std::vector params) + : ASTNodeVararg(ast_parameter_list, loc, std::move(params)) {} + + int lookup_idx(std::string_view param_name) const; }; template<> @@ -500,26 +505,48 @@ struct Vertex final : ASTNodeVararg { }; template<> -struct Vertex final : ASTNodeVararg { - const std::vector& get_args() const { return children; } - auto get_arg(int i) const { return children.at(i)->as(); } +struct Vertex final : ASTNodeUnary { + AnnotationKind kind; + + auto get_arg() const { return child->as(); } - Vertex(SrcLocation loc, std::vector args) - : ASTNodeVararg(ast_argument_list, loc, std::move(args)) {} + static AnnotationKind parse_kind(std::string_view name); - int lookup_idx(std::string_view arg_name) const; + Vertex(SrcLocation loc, AnnotationKind kind, V arg_probably_empty) + : ASTNodeUnary(ast_annotation, loc, arg_probably_empty), kind(kind) {} +}; + +template<> +struct Vertex final : ASTNodeUnary { + TypeExpr* declared_type; + bool marked_as_redef; // var (existing_var redef, new_var: int) = ... + + AnyV get_identifier() const { return child; } // ast_identifier / ast_underscore + + Vertex(SrcLocation loc, AnyV name_identifier, TypeExpr* declared_type, bool marked_as_redef) + : ASTNodeUnary(ast_local_var, loc, name_identifier), declared_type(declared_type), marked_as_redef(marked_as_redef) {} +}; + +template<> +struct Vertex final : ASTNodeBinary { + AnyV get_lhs() const { return lhs; } // ast_local_var / ast_tensor / ast_tensor_square + AnyV get_assigned_val() const { return rhs; } + + Vertex(SrcLocation loc, AnyV lhs, AnyV assigned_val) + : ASTNodeBinary(ast_local_vars_declaration, loc, lhs, assigned_val) {} }; template<> struct Vertex final : ASTNodeVararg { auto get_identifier() const { return children.at(0)->as(); } - int get_num_args() const { return children.at(1)->as()->size(); } - auto get_arg_list() const { return children.at(1)->as(); } - auto get_arg(int i) const { return children.at(1)->as()->get_arg(i); } + int get_num_params() const { return children.at(1)->as()->size(); } + auto get_param_list() const { return children.at(1)->as(); } + auto get_param(int i) const { return children.at(1)->as()->get_param(i); } AnyV get_body() const { return children.at(2); } // ast_sequence / ast_asm_body TypeExpr* ret_type = nullptr; - V forall_list = nullptr; + V genericsT_list = nullptr; + bool is_entrypoint = false; bool marked_as_pure = false; bool marked_as_builtin = false; bool marked_as_get_method = false; @@ -529,29 +556,21 @@ struct Vertex final : ASTNodeVararg { bool is_asm_function() const { return children.at(2)->type == ast_asm_body; } - Vertex(SrcLocation loc, V name_identifier, V args, AnyV body) - : ASTNodeVararg(ast_function_declaration, loc, {name_identifier, args, body}) {} -}; - -template<> -struct Vertex final : ASTNodeLeaf { - std::string_view pragma_name; - - Vertex(SrcLocation loc, std::string_view pragma_name) - : ASTNodeLeaf(ast_pragma_no_arg, loc), pragma_name(pragma_name) {} + Vertex(SrcLocation loc, V name_identifier, V parameters, AnyV body) + : ASTNodeVararg(ast_function_declaration, loc, {name_identifier, parameters, body}) {} }; template<> -struct Vertex final : ASTNodeLeaf { +struct Vertex final : ASTNodeLeaf { TokenType cmp_tok; std::string_view semver; Vertex(SrcLocation loc, TokenType cmp_tok, std::string_view semver) - : ASTNodeLeaf(ast_pragma_version, loc), cmp_tok(cmp_tok), semver(semver) {} + : ASTNodeLeaf(ast_tolk_required_version, loc), cmp_tok(cmp_tok), semver(semver) {} }; template<> -struct Vertex final : ASTNodeUnary { +struct Vertex final : ASTNodeUnary { const SrcFile* file = nullptr; // assigned after includes have been resolved auto get_file_leaf() const { return child->as(); } @@ -561,7 +580,7 @@ struct Vertex final : ASTNodeUnary { void mutate_set_src_file(const SrcFile* file) const; Vertex(SrcLocation loc, V file_name) - : ASTNodeUnary(ast_include_statement, loc, file_name) {} + : ASTNodeUnary(ast_import_statement, loc, file_name) {} }; template<> diff --git a/tolk/builtins.cpp b/tolk/builtins.cpp index 4b31d1779..52144d419 100644 --- a/tolk/builtins.cpp +++ b/tolk/builtins.cpp @@ -91,10 +91,6 @@ int emulate_negate(int a) { if ((a & f) && (~a & f)) { a ^= f; } - f = VarDescr::_Bit | VarDescr::_Bool; - if ((a & f) && (~a & f)) { - a ^= f; - } return a; } @@ -129,9 +125,9 @@ int emulate_sub(int a, int b) { } int emulate_mul(int a, int b) { - if ((b & (VarDescr::_NonZero | VarDescr::_Bit)) == (VarDescr::_NonZero | VarDescr::_Bit)) { + if ((b & VarDescr::ConstOne) == VarDescr::ConstOne) { return a; - } else if ((a & (VarDescr::_NonZero | VarDescr::_Bit)) == (VarDescr::_NonZero | VarDescr::_Bit)) { + } else if ((a & VarDescr::ConstOne) == VarDescr::ConstOne) { return b; } int u = a & b, v = a | b; @@ -151,11 +147,6 @@ int emulate_mul(int a, int b) { } else if (!(~v & (VarDescr::_Pos | VarDescr::_Neg))) { r |= VarDescr::_Neg; } - if (u & (VarDescr::_Bit | VarDescr::_Bool)) { - r |= VarDescr::_Bit; - } else if (!(~v & (VarDescr::_Bit | VarDescr::_Bool))) { - r |= VarDescr::_Bool; - } r |= v & VarDescr::_Even; r |= u & (VarDescr::_Odd | VarDescr::_NonZero); return r; @@ -172,7 +163,6 @@ int emulate_bitwise_and(int a, int b) { return VarDescr::ConstZero; } r |= both & (VarDescr::_Even | VarDescr::_Odd); - r |= both & (VarDescr::_Bit | VarDescr::_Bool); if (both & VarDescr::_Odd) { r |= VarDescr::_NonZero; } @@ -228,7 +218,7 @@ int emulate_bitwise_not(int a) { if ((a2 & f) && (~a2 & f)) { a2 ^= f; } - a2 &= ~(VarDescr::_Zero | VarDescr::_NonZero | VarDescr::_Bit | VarDescr::_Pos | VarDescr::_Neg); + a2 &= ~(VarDescr::_Zero | VarDescr::_NonZero | VarDescr::_Pos | VarDescr::_Neg); if ((a & VarDescr::_Neg) && (a & VarDescr::_NonZero)) { a2 |= VarDescr::_Pos; } @@ -251,9 +241,9 @@ int emulate_lshift(int a, int b) { } int emulate_div(int a, int b) { - if ((b & (VarDescr::_NonZero | VarDescr::_Bit)) == (VarDescr::_NonZero | VarDescr::_Bit)) { + if ((b & VarDescr::ConstOne) == VarDescr::ConstOne) { return a; - } else if ((b & (VarDescr::_NonZero | VarDescr::_Bool)) == (VarDescr::_NonZero | VarDescr::_Bool)) { + } else if ((b & VarDescr::ConstOne) == VarDescr::ConstOne) { return emulate_negate(a); } if (b & VarDescr::_Zero) { @@ -276,11 +266,6 @@ int emulate_div(int a, int b) { } else if (!(~v & (VarDescr::_Pos | VarDescr::_Neg))) { r |= VarDescr::_Neg; } - if (u & (VarDescr::_Bit | VarDescr::_Bool)) { - r |= VarDescr::_Bit; - } else if (!(~v & (VarDescr::_Bit | VarDescr::_Bool))) { - r |= VarDescr::_Bool; - } return r; } @@ -297,9 +282,7 @@ int emulate_rshift(int a, int b) { } int emulate_mod(int a, int b, int round_mode = -1) { - if ((b & (VarDescr::_NonZero | VarDescr::_Bit)) == (VarDescr::_NonZero | VarDescr::_Bit)) { - return VarDescr::ConstZero; - } else if ((b & (VarDescr::_NonZero | VarDescr::_Bool)) == (VarDescr::_NonZero | VarDescr::_Bool)) { + if ((b & VarDescr::ConstOne) == VarDescr::ConstOne) { return VarDescr::ConstZero; } if (b & VarDescr::_Zero) { @@ -321,14 +304,6 @@ int emulate_mod(int a, int b, int round_mode = -1) { } else if (round_mode > 0) { r |= emulate_negate(b) & (VarDescr::_Pos | VarDescr::_Neg); } - if (a & (VarDescr::_Bit | VarDescr::_Bool)) { - if (r & VarDescr::_Pos) { - r |= VarDescr::_Bit; - } - if (r & VarDescr::_Neg) { - r |= VarDescr::_Bool; - } - } if (b & VarDescr::_Even) { r |= a & (VarDescr::_Even | VarDescr::_Odd); } @@ -513,6 +488,18 @@ AsmOp compile_unary_plus(std::vector& res, std::vector& args return AsmOp::Nop(); } +AsmOp compile_logical_not(std::vector& res, std::vector& args, SrcLocation where) { + tolk_assert(res.size() == 1 && args.size() == 1); + VarDescr &r = res[0], &x = args[0]; + if (x.is_int_const()) { + r.set_const(x.int_const == 0 ? -1 : 0); + x.unused(); + return push_const(r.int_const); + } + r.val = VarDescr::ValBool; + return exec_op("0 EQINT", 1); +} + AsmOp compile_bitwise_and(std::vector& res, std::vector& args, SrcLocation where) { tolk_assert(res.size() == 1 && args.size() == 2); VarDescr &r = res[0], &x = args[0], &y = args[1]; @@ -976,9 +963,14 @@ AsmOp compile_throw(std::vector& res, std::vector& args, Src } } -AsmOp compile_cond_throw(std::vector& res, std::vector& args, bool mode) { - tolk_assert(res.empty() && args.size() == 2); - VarDescr &x = args[0], &y = args[1]; +AsmOp compile_throw_if_unless(std::vector& res, std::vector& args) { + tolk_assert(res.empty() && args.size() == 3); + VarDescr &x = args[0], &y = args[1], &z = args[2]; + if (!z.always_true() && !z.always_false()) { + throw Fatal("invalid usage of built-in symbol"); + } + bool mode = z.always_true(); + z.unused(); std::string suff = (mode ? "IF" : "IFNOT"); bool skip_cond = false; if (y.always_true() || y.always_false()) { @@ -1008,27 +1000,6 @@ AsmOp compile_throw_arg(std::vector& res, std::vector& args, } } -AsmOp compile_cond_throw_arg(std::vector& res, std::vector& args, bool mode) { - tolk_assert(res.empty() && args.size() == 3); - VarDescr &x = args[1], &y = args[2]; - std::string suff = (mode ? "IF" : "IFNOT"); - bool skip_cond = false; - if (y.always_true() || y.always_false()) { - y.unused(); - skip_cond = true; - if (y.always_true() != mode) { - x.unused(); - return AsmOp::Nop(); - } - } - if (x.is_int_const() && x.int_const->unsigned_fits_bits(11)) { - x.unused(); - return skip_cond ? exec_arg_op("THROWARG", x.int_const, 1, 0) : exec_arg_op("THROWARG"s + suff, x.int_const, 2, 0); - } else { - return skip_cond ? exec_op("THROWARGANY", 2, 0) : exec_op("THROWARGANY"s + suff, 3, 0); - } -} - AsmOp compile_bool_const(std::vector& res, std::vector& args, bool val) { tolk_assert(res.size() == 1 && args.empty()); VarDescr& r = res[0]; @@ -1098,15 +1069,9 @@ AsmOp compile_tuple_at(std::vector& res, std::vector& args, return exec_op("INDEXVAR", 2, 1); } -// int null?(X arg) +// fun __isNull(X arg): int AsmOp compile_is_null(std::vector& res, std::vector& args, SrcLocation) { tolk_assert(args.size() == 1 && res.size() == 1); - auto &x = args[0], &r = res[0]; - if (x.always_null() || x.always_not_null()) { - x.unused(); - r.set_const(x.always_null() ? -1 : 0); - return push_const(r.int_const); - } res[0].val = VarDescr::ValBool; return exec_op("ISNULL", 1, 1); } @@ -1131,7 +1096,6 @@ void define_builtins() { auto XY = TypeExpr::new_tensor({X, Y}); auto arith_bin_op = TypeExpr::new_map(Int2, Int); auto arith_un_op = TypeExpr::new_map(Int, Int); - auto impure_bin_op = TypeExpr::new_map(Int2, Unit); auto impure_un_op = TypeExpr::new_map(Int, Unit); auto fetch_int_op = TypeExpr::new_map(SliceInt, SliceInt); auto prefetch_int_op = TypeExpr::new_map(SliceInt, Int); @@ -1142,7 +1106,6 @@ void define_builtins() { auto prefetch_slice_op = TypeExpr::new_map(SliceInt, Slice); //auto arith_null_op = TypeExpr::new_map(TypeExpr::new_unit(), Int); auto throw_arg_op = TypeExpr::new_forall({X}, TypeExpr::new_map(TypeExpr::new_tensor({X, Int}), Unit)); - auto cond_throw_arg_op = TypeExpr::new_forall({X}, TypeExpr::new_map(TypeExpr::new_tensor({X, Int, Int}), Unit)); // prevent unused vars warnings (there vars are created to acquire initial id of TypeExpr::value) static_cast(Z); @@ -1158,9 +1121,6 @@ void define_builtins() { define_builtin_func("_~/_", arith_bin_op, std::bind(compile_div, _1, _2, _3, 0)); define_builtin_func("_^/_", arith_bin_op, std::bind(compile_div, _1, _2, _3, 1)); define_builtin_func("_%_", arith_bin_op, std::bind(compile_mod, _1, _2, _3, -1)); - define_builtin_func("_~%_", arith_bin_op, std::bind(compile_mod, _1, _2, _3, 0)); - define_builtin_func("_^%_", arith_bin_op, std::bind(compile_mod, _1, _2, _3, 1)); - define_builtin_func("_/%_", TypeExpr::new_map(Int2, Int2), AsmOp::Custom("DIVMOD", 2, 2)); define_builtin_func("divmod", TypeExpr::new_map(Int2, Int2), AsmOp::Custom("DIVMOD", 2, 2)); define_builtin_func("~divmod", TypeExpr::new_map(Int2, Int2), AsmOp::Custom("DIVMOD", 2, 2)); define_builtin_func("moddiv", TypeExpr::new_map(Int2, Int2), AsmOp::Custom("DIVMOD", 2, 2), {}, {1, 0}); @@ -1169,23 +1129,18 @@ void define_builtins() { define_builtin_func("_>>_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, -1)); define_builtin_func("_~>>_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, 0)); define_builtin_func("_^>>_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, 1)); + define_builtin_func("!_", arith_un_op, compile_logical_not); + define_builtin_func("~_", arith_un_op, compile_bitwise_not); define_builtin_func("_&_", arith_bin_op, compile_bitwise_and); define_builtin_func("_|_", arith_bin_op, compile_bitwise_or); define_builtin_func("_^_", arith_bin_op, compile_bitwise_xor); - define_builtin_func("~_", arith_un_op, compile_bitwise_not); define_builtin_func("^_+=_", arith_bin_op, compile_add); define_builtin_func("^_-=_", arith_bin_op, compile_sub); define_builtin_func("^_*=_", arith_bin_op, compile_mul); define_builtin_func("^_/=_", arith_bin_op, std::bind(compile_div, _1, _2, _3, -1)); - define_builtin_func("^_~/=_", arith_bin_op, std::bind(compile_div, _1, _2, _3, 0)); - define_builtin_func("^_^/=_", arith_bin_op, std::bind(compile_div, _1, _2, _3, 1)); define_builtin_func("^_%=_", arith_bin_op, std::bind(compile_mod, _1, _2, _3, -1)); - define_builtin_func("^_~%=_", arith_bin_op, std::bind(compile_mod, _1, _2, _3, 0)); - define_builtin_func("^_^%=_", arith_bin_op, std::bind(compile_mod, _1, _2, _3, 1)); define_builtin_func("^_<<=_", arith_bin_op, compile_lshift); define_builtin_func("^_>>=_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, -1)); - define_builtin_func("^_~>>=_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, 0)); - define_builtin_func("^_^>>=_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, 1)); define_builtin_func("^_&=_", arith_bin_op, compile_bitwise_and); define_builtin_func("^_|=_", arith_bin_op, compile_bitwise_or); define_builtin_func("^_^=_", arith_bin_op, compile_bitwise_xor); @@ -1200,17 +1155,13 @@ void define_builtins() { define_builtin_func("_<=_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 6)); define_builtin_func("_>=_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 3)); define_builtin_func("_<=>_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 7)); - define_builtin_func("true", TypeExpr::new_map(TypeExpr::new_unit(), Int), /* AsmOp::Const("TRUE") */ std::bind(compile_bool_const, _1, _2, true)); - define_builtin_func("false", TypeExpr::new_map(TypeExpr::new_unit(), Int), /* AsmOp::Const("FALSE") */ std::bind(compile_bool_const, _1, _2, false)); - // define_builtin_func("null", Null, AsmOp::Const("PUSHNULL")); - define_builtin_func("nil", TypeExpr::new_map(TypeExpr::new_unit(), Tuple), AsmOp::Const("PUSHNULL")); - define_builtin_func("null?", TypeExpr::new_forall({X}, TypeExpr::new_map(X, Int)), compile_is_null); - define_builtin_func("throw", impure_un_op, compile_throw, true); - define_builtin_func("throw_if", impure_bin_op, std::bind(compile_cond_throw, _1, _2, true), true); - define_builtin_func("throw_unless", impure_bin_op, std::bind(compile_cond_throw, _1, _2, false), true); - define_builtin_func("throw_arg", throw_arg_op, compile_throw_arg, true); - define_builtin_func("throw_arg_if", cond_throw_arg_op, std::bind(compile_cond_throw_arg, _1, _2, true), true); - define_builtin_func("throw_arg_unless", cond_throw_arg_op, std::bind(compile_cond_throw_arg, _1, _2, false), true); + define_builtin_func("__true", TypeExpr::new_map(TypeExpr::new_unit(), Int), /* AsmOp::Const("TRUE") */ std::bind(compile_bool_const, _1, _2, true)); + define_builtin_func("__false", TypeExpr::new_map(TypeExpr::new_unit(), Int), /* AsmOp::Const("FALSE") */ std::bind(compile_bool_const, _1, _2, false)); + define_builtin_func("__null", TypeExpr::new_forall({X}, TypeExpr::new_map(TypeExpr::new_unit(), X)), AsmOp::Const("PUSHNULL")); + define_builtin_func("__isNull", TypeExpr::new_forall({X}, TypeExpr::new_map(X, Int)), compile_is_null); + define_builtin_func("__throw", impure_un_op, compile_throw, true); + define_builtin_func("__throw_arg", throw_arg_op, compile_throw_arg, true); + define_builtin_func("__throw_if_unless", TypeExpr::new_map(Int3, Unit), std::bind(compile_throw_if_unless, _1, _2), true); define_builtin_func("load_int", fetch_int_op, std::bind(compile_fetch_int, _1, _2, true, true), {}, {1, 0}); define_builtin_func("load_uint", fetch_int_op, std::bind(compile_fetch_int, _1, _2, true, false), {}, {1, 0}); define_builtin_func("preload_int", prefetch_int_op, std::bind(compile_fetch_int, _1, _2, false, true)); diff --git a/tolk/codegen.cpp b/tolk/codegen.cpp index a5d432ee0..9a90a3ed9 100644 --- a/tolk/codegen.cpp +++ b/tolk/codegen.cpp @@ -497,11 +497,10 @@ bool Op::generate_code_step(Stack& stack) { asm_fv->compile(stack.o, res, args, where); // compile res := f (args) } else { auto fv = dynamic_cast(fun_ref->value); - // todo can be fv == nullptr? std::string name = G.symbols.get_name(fun_ref->sym_idx); - if (fv && (fv->is_inline() || fv->is_inline_ref())) { + if (fv->is_inline() || fv->is_inline_ref()) { stack.o << AsmOp::Custom(name + " INLINECALLDICT", (int)right.size(), (int)left.size()); - } else if (fv && fv->code && fv->code->require_callxargs) { + } else if (fv->code && fv->code->require_callxargs) { stack.o << AsmOp::Custom(name + (" PREPAREDICT"), 0, 2); exec_callxargs((int)right.size() + 1, (int)left.size()); } else { diff --git a/tolk/compiler-state.cpp b/tolk/compiler-state.cpp index a609d88d5..fb70022fa 100644 --- a/tolk/compiler-state.cpp +++ b/tolk/compiler-state.cpp @@ -15,33 +15,42 @@ along with TON Blockchain Library. If not, see . */ #include "compiler-state.h" +#include +#include namespace tolk { -std::string tolk_version{"0.5.0"}; - CompilerState G; // the only mutable global variable in tolk internals -void GlobalPragma::enable(SrcLocation loc) { - if (deprecated_from_v_) { - loc.show_warning(PSTRING() << "#pragma " << name_ << - " is deprecated since Tolk v" << deprecated_from_v_ << - ". Please, remove this line from your code."); - return; - } - if (!loc.get_src_file()->is_entrypoint_file()) { - // todo generally it's not true; rework pragmas completely - loc.show_warning(PSTRING() << "#pragma " << name_ << - " should be used in the main file only."); +void ExperimentalOption::mark_deprecated(const char* deprecated_from_v, const char* deprecated_reason) { + this->deprecated_from_v = deprecated_from_v; + this->deprecated_reason = deprecated_reason; +} + +void CompilerSettings::enable_experimental_option(std::string_view name) { + ExperimentalOption* to_enable = nullptr; + + if (name == remove_unused_functions.name) { + to_enable = &remove_unused_functions; } - enabled_ = true; + if (to_enable == nullptr) { + std::cerr << "unknown experimental option: " << name << std::endl; + } else if (to_enable->deprecated_from_v) { + std::cerr << "experimental option " << name << " " + << "is deprecated since Tolk v" << to_enable->deprecated_from_v + << ": " << to_enable->deprecated_reason << std::endl; + } else { + to_enable->enabled = true; + } } -void GlobalPragma::always_on_and_deprecated(const char *deprecated_from_v) { - deprecated_from_v_ = deprecated_from_v; - enabled_ = true; +void CompilerSettings::parse_experimental_options_cmd_arg(const std::string& cmd_arg) { + std::istringstream stream(cmd_arg); + std::string token; + while (std::getline(stream, token, ',')) { + enable_experimental_option(token); + } } - } // namespace tolk diff --git a/tolk/compiler-state.h b/tolk/compiler-state.h index 324a21afc..23df230b6 100644 --- a/tolk/compiler-state.h +++ b/tolk/compiler-state.h @@ -24,21 +24,21 @@ namespace tolk { -extern std::string tolk_version; +// with cmd option -x, the user can pass experimental options to use +class ExperimentalOption { + friend struct CompilerSettings; -class GlobalPragma { - std::string name_; - bool enabled_ = false; - const char* deprecated_from_v_ = nullptr; + const std::string_view name; + bool enabled = false; + const char* deprecated_from_v = nullptr; // when an option becomes deprecated (after the next compiler release), + const char* deprecated_reason = nullptr; // but the user still passes it, we'll warn to stderr public: - explicit GlobalPragma(std::string name) : name_(std::move(name)) { } + explicit ExperimentalOption(std::string_view name) : name(name) {} - const std::string& name() const { return name_; } + void mark_deprecated(const char* deprecated_from_v, const char* deprecated_reason); - bool enabled() const { return enabled_; } - void enable(SrcLocation loc); - void always_on_and_deprecated(const char* deprecated_from_v); + explicit operator bool() const { return enabled; } }; // CompilerSettings contains settings that can be passed via cmd line or (partially) wasm envelope. @@ -58,6 +58,11 @@ struct CompilerSettings { std::string stdlib_filename; FsReadCallback read_callback; + + ExperimentalOption remove_unused_functions{"remove-unused-functions"}; + + void enable_experimental_option(std::string_view name); + void parse_experimental_options_cmd_arg(const std::string& cmd_arg); }; // CompilerState contains a mutable state that is changed while the compilation is going on. @@ -78,9 +83,6 @@ struct CompilerState { AllRegisteredSrcFiles all_src_files; std::string generated_from; - GlobalPragma pragma_allow_post_modification{"allow-post-modification"}; - GlobalPragma pragma_compute_asm_ltr{"compute-asm-ltr"}; - GlobalPragma pragma_remove_unused_functions{"remove-unused-functions"}; bool is_verbosity(int gt_eq) const { return settings.verbosity >= gt_eq; } }; diff --git a/tolk/gen-abscode.cpp b/tolk/gen-abscode.cpp index 09a926867..97fb5d3f2 100644 --- a/tolk/gen-abscode.cpp +++ b/tolk/gen-abscode.cpp @@ -148,8 +148,7 @@ bool Expr::deduce_type() { int Expr::define_new_vars(CodeBlob& code) { switch (cls) { case _Tensor: - case _MkTuple: - case _TypeApply: { + case _MkTuple: { int res = 0; for (const auto& x : args) { res += x->define_new_vars(code); @@ -174,8 +173,7 @@ int Expr::define_new_vars(CodeBlob& code) { int Expr::predefine_vars() { switch (cls) { case _Tensor: - case _MkTuple: - case _TypeApply: { + case _MkTuple: { int res = 0; for (const auto& x : args) { res += x->predefine_vars(); @@ -210,12 +208,6 @@ void add_set_globs(CodeBlob& code, std::vector>& g } std::vector pre_compile_let(CodeBlob& code, Expr* lhs, Expr* rhs, SrcLocation here) { - while (lhs->is_type_apply()) { - lhs = lhs->args.at(0); - } - while (rhs->is_type_apply()) { - rhs = rhs->args.at(0); - } if (lhs->is_mktuple()) { if (rhs->is_mktuple()) { return pre_compile_let(code, lhs->args.at(0), rhs->args.at(0), here); @@ -301,7 +293,7 @@ std::vector pre_compile_tensor(const std::vector& args, CodeB } std::vector Expr::pre_compile(CodeBlob& code, std::vector>* lval_globs) const { - if (lval_globs && !(cls == _Tensor || cls == _Var || cls == _Hole || cls == _TypeApply || cls == _GlobVar)) { + if (lval_globs && !(cls == _Tensor || cls == _Var || cls == _Hole || cls == _GlobVar)) { std::cerr << "lvalue expression constructor is " << cls << std::endl; throw Fatal{"cannot compile lvalue expression with unknown constructor"}; } @@ -344,8 +336,6 @@ std::vector Expr::pre_compile(CodeBlob& code, std::vectorpre_compile(code, lval_globs); case _Var: case _Hole: if (val < 0) { diff --git a/tolk/lexer.cpp b/tolk/lexer.cpp index d2c05f348..0a2dd79c2 100644 --- a/tolk/lexer.cpp +++ b/tolk/lexer.cpp @@ -158,8 +158,7 @@ struct ChunkInlineComment final : ChunkLexerBase { struct ChunkMultilineComment final : ChunkLexerBase { bool parse(Lexer* lex) const override { while (!lex->is_eof()) { - // todo drop -} later - if ((lex->char_at() == '-' && lex->char_at(1) == '}') || (lex->char_at() == '*' && lex->char_at(1) == '/')) { + if (lex->char_at() == '*' && lex->char_at(1) == '/') { lex->skip_chars(2); return true; } @@ -221,6 +220,22 @@ struct ChunkMultilineString final : ChunkLexerBase { } }; +// An annotation for a function (in the future, for vars also): +// @inline and others +struct ChunkAnnotation final : ChunkLexerBase { + bool parse(Lexer* lex) const override { + const char* str_begin = lex->c_str(); + lex->skip_chars(1); + while (std::isalnum(lex->char_at()) || lex->char_at() == '_') { + lex->skip_chars(1); + } + + std::string_view str_val(str_begin, lex->c_str() - str_begin); + lex->add_token(tok_annotation_at, str_val); + return true; + } +}; + // A number, may be a hex one. struct ChunkNumber final : ChunkLexerBase { bool parse(Lexer* lex) const override { @@ -255,32 +270,6 @@ struct ChunkNumber final : ChunkLexerBase { } }; -// Anything starting from # is a compiler directive. -// Technically, #include and #pragma can be mapped as separate chunks, -// but storing such long strings in a trie increases its memory usage. -struct ChunkCompilerDirective final : ChunkLexerBase { - bool parse(Lexer* lex) const override { - const char* str_begin = lex->c_str(); - - lex->skip_chars(1); - while (std::isalnum(lex->char_at())) { - lex->skip_chars(1); - } - - std::string_view str_val(str_begin, lex->c_str() - str_begin); - if (str_val == "#include") { - lex->add_token(tok_include, str_val); - return true; - } - if (str_val == "#pragma") { - lex->add_token(tok_pragma, str_val); - return true; - } - - lex->error("unknown compiler directive"); - } -}; - // Tokens like !=, &, etc. emit just a simple TokenType. // Since they are stored in trie, "parsing" them is just skipping len chars. struct ChunkSimpleToken final : ChunkLexerBase { @@ -307,23 +296,9 @@ struct ChunkSkipWhitespace final : ChunkLexerBase { }; // Here we handle corner cases of grammar that are requested on demand. -// E.g., for 'pragma version >0.5.0', '0.5.0' should be parsed specially to emit tok_semver. +// E.g., for 'tolk >0.5.0', '0.5.0' should be parsed specially to emit tok_semver. // See TolkLanguageGrammar::parse_next_chunk_special(). struct ChunkSpecialParsing { - static bool parse_pragma_name(Lexer* lex) { - const char* str_begin = lex->c_str(); - while (std::isalnum(lex->char_at()) || lex->char_at() == '-') { - lex->skip_chars(1); - } - - std::string_view str_val(str_begin, lex->c_str() - str_begin); - if (str_val.empty()) { - return false; - } - lex->add_token(tok_pragma_name, str_val); - return true; - } - static bool parse_semver(Lexer* lex) { const char* str_begin = lex->c_str(); while (std::isdigit(lex->char_at()) || lex->char_at() == '.') { @@ -358,53 +333,55 @@ struct ChunkIdentifierOrKeyword final : ChunkLexerBase { case 3: if (str == "int") return tok_int; if (str == "var") return tok_var; + if (str == "fun") return tok_fun; if (str == "asm") return tok_asm; if (str == "get") return tok_get; if (str == "try") return tok_try; - if (str == "nil") return tok_nil; + if (str == "val") return tok_val; break; case 4: if (str == "else") return tok_else; if (str == "true") return tok_true; - if (str == "pure") return tok_pure; - if (str == "then") return tok_then; if (str == "cell") return tok_cell; - if (str == "cont") return tok_cont; + if (str == "null") return tok_null; + if (str == "void") return tok_void; + if (str == "bool") return tok_bool; + if (str == "auto") return tok_auto; + if (str == "tolk") return tok_tolk; + if (str == "type") return tok_type; + if (str == "enum") return tok_enum; break; case 5: if (str == "slice") return tok_slice; if (str == "tuple") return tok_tuple; if (str == "const") return tok_const; if (str == "false") return tok_false; + if (str == "redef") return tok_redef; if (str == "while") return tok_while; - if (str == "until") return tok_until; + if (str == "break") return tok_break; + if (str == "throw") return tok_throw; if (str == "catch") return tok_catch; - if (str == "ifnot") return tok_ifnot; + if (str == "infix") return tok_infix; break; case 6: if (str == "return") return tok_return; - if (str == "repeat") return tok_repeat; - if (str == "elseif") return tok_elseif; - if (str == "forall") return tok_forall; - if (str == "extern") return tok_extern; + if (str == "assert") return tok_assert; + if (str == "import") return tok_import; if (str == "global") return tok_global; - if (str == "impure") return tok_impure; - if (str == "inline") return tok_inline; + if (str == "repeat") return tok_repeat; + if (str == "struct") return tok_struct; + if (str == "export") return tok_export; break; case 7: if (str == "builder") return tok_builder; if (str == "builtin") return tok_builtin; break; case 8: + if (str == "continue") return tok_continue; if (str == "operator") return tok_operator; break; - case 9: - if (str == "elseifnot") return tok_elseifnot; - if (str == "method_id") return tok_method_id; - break; - case 10: - if (str == "inline_ref") return tok_inlineref; - if (str == "auto_apply") return tok_autoapply; + case 12: + if (str == "continuation") return tok_continuation; break; default: break; @@ -418,7 +395,7 @@ struct ChunkIdentifierOrKeyword final : ChunkLexerBase { while (!lex->is_eof()) { char c = lex->char_at(); // the pattern of valid identifier first symbol is provided in trie, here we test for identifier middle - bool allowed_in_identifier = std::isalnum(c) || c == '_' || c == '$' || c == ':' || c == '?' || c == '!' || c == '\''; + bool allowed_in_identifier = std::isalnum(c) || c == '_' || c == '$' || c == '?' || c == '!' || c == '\''; if (!allowed_in_identifier) { break; } @@ -445,12 +422,12 @@ struct ChunkIdentifierInBackticks final : ChunkLexerBase { lex->skip_chars(1); while (!lex->is_eof() && lex->char_at() != '`' && lex->char_at() != '\n') { if (std::isspace(lex->char_at())) { // probably, I'll remove this restriction after rewriting symtable and cur_sym_idx - lex->error("An identifier can't have a space in its name (even inside backticks)"); + lex->error("an identifier can't have a space in its name (even inside backticks)"); } lex->skip_chars(1); } if (lex->char_at() != '`') { - lex->error("Unclosed backtick `"); + lex->error("unclosed backtick `"); } std::string_view str_val(str_begin + 1, lex->c_str() - str_begin - 1); @@ -461,6 +438,28 @@ struct ChunkIdentifierInBackticks final : ChunkLexerBase { } }; +// Handle ~`some_method` and .`some_method` todo to be removed later +struct ChunkDotTildeAndBackticks final : ChunkLexerBase { + bool parse(Lexer* lex) const override { + const char* str_begin = lex->c_str(); + lex->skip_chars(2); + while (!lex->is_eof() && lex->char_at() != '`' && lex->char_at() != '\n') { + lex->skip_chars(1); + } + if (lex->char_at() != '`') { + lex->error("unclosed backtick `"); + } + + std::string_view in_backticks(str_begin + 2, lex->c_str() - str_begin - 2); + std::string full = std::string(1, *str_begin) + static_cast(in_backticks); + std::string* allocated = new std::string(full); + lex->skip_chars(1); + std::string_view str_val(allocated->c_str(), allocated->size()); + lex->add_token(tok_identifier, str_val); + return true; + } +}; + // // ---------------------------------------------------------------------- // Here we define a grammar of Tolk. @@ -477,8 +476,6 @@ struct TolkLanguageGrammar { static bool parse_next_chunk_special(Lexer* lex, TokenType parse_next_as) { switch (parse_next_as) { - case tok_pragma_name: - return ChunkSpecialParsing::parse_pragma_name(lex); case tok_semver: return ChunkSpecialParsing::parse_semver(lex); default: @@ -493,21 +490,21 @@ struct TolkLanguageGrammar { static void init() { trie.add_prefix("//", singleton()); - trie.add_prefix(";;", singleton()); trie.add_prefix("/*", singleton()); - trie.add_prefix("{-", singleton()); trie.add_prefix(R"(")", singleton()); trie.add_prefix(R"(""")", singleton()); + trie.add_prefix("@", singleton()); trie.add_prefix(" ", singleton()); trie.add_prefix("\t", singleton()); trie.add_prefix("\r", singleton()); trie.add_prefix("\n", singleton()); - trie.add_prefix("#", singleton()); trie.add_pattern("[0-9]", singleton()); // todo think of . ~ trie.add_pattern("[a-zA-Z_$.~]", singleton()); trie.add_prefix("`", singleton()); + // todo to be removed after ~ becomes invalid and . becomes a separate token + trie.add_pattern("[.~]`", singleton()); register_token("+", 1, tok_plus); register_token("-", 1, tok_minus); @@ -527,6 +524,7 @@ struct TolkLanguageGrammar { register_token("=", 1, tok_assign); register_token("<", 1, tok_lt); register_token(">", 1, tok_gt); + register_token("!", 1, tok_logical_not); register_token("&", 1, tok_bitwise_and); register_token("|", 1, tok_bitwise_or); register_token("^", 1, tok_bitwise_xor); @@ -536,11 +534,10 @@ struct TolkLanguageGrammar { register_token(">=", 2, tok_geq); register_token("<<", 2, tok_lshift); register_token(">>", 2, tok_rshift); + register_token("&&", 2, tok_logical_and); + register_token("||", 2, tok_logical_or); register_token("~/", 2, tok_divR); register_token("^/", 2, tok_divC); - register_token("~%", 2, tok_modR); - register_token("^%", 2, tok_modC); - register_token("/%", 2, tok_divmod); register_token("+=", 2, tok_set_plus); register_token("-=", 2, tok_set_minus); register_token("*=", 2, tok_set_mul); @@ -549,18 +546,12 @@ struct TolkLanguageGrammar { register_token("&=", 2, tok_set_bitwise_and); register_token("|=", 2, tok_set_bitwise_or); register_token("^=", 2, tok_set_bitwise_xor); - register_token("->", 2, tok_mapsto); + register_token("->", 2, tok_arrow); register_token("<=>", 3, tok_spaceship); register_token("~>>", 3, tok_rshiftR); register_token("^>>", 3, tok_rshiftC); - register_token("~/=", 3, tok_set_divR); - register_token("^/=", 3, tok_set_divC); - register_token("~%=", 3, tok_set_modR); - register_token("^%=", 3, tok_set_modC); register_token("<<=", 3, tok_set_lshift); register_token(">>=", 3, tok_set_rshift); - register_token("~>>=", 4, tok_set_rshiftR); - register_token("^>>=", 4, tok_set_rshiftC); } }; @@ -593,7 +584,7 @@ void Lexer::next() { while (cur_token_idx == last_token_idx && !is_eof()) { update_location(); if (!TolkLanguageGrammar::parse_next_chunk(this)) { - error("Failed to parse"); + error("failed to parse"); } } if (is_eof()) { @@ -616,8 +607,8 @@ void Lexer::error(const std::string& err_msg) const { throw ParseError(cur_location(), err_msg); } -void Lexer::on_expect_call_failed(const char* str_expected) const { - throw ParseError(cur_location(), std::string(str_expected) + " expected instead of `" + std::string(cur_str()) + "`"); +void Lexer::unexpected(const char* str_expected) const { + throw ParseError(cur_location(), "expected " + std::string(str_expected) + ", got `" + std::string(cur_str()) + "`"); } void lexer_init() { diff --git a/tolk/lexer.h b/tolk/lexer.h index 1c8188fc3..8e04018cd 100644 --- a/tolk/lexer.h +++ b/tolk/lexer.h @@ -25,15 +25,33 @@ namespace tolk { enum TokenType { tok_empty, + tok_fun, + tok_get, + tok_type, + tok_enum, + tok_struct, + tok_operator, + tok_infix, + + tok_global, + tok_const, + tok_var, + tok_val, + tok_redef, + + tok_annotation_at, + tok_colon, + tok_asm, + tok_builtin, + tok_int_const, tok_string_const, tok_string_modifier, - - tok_identifier, - tok_true, tok_false, - tok_nil, // todo "null" keyword is still absent, "nil" in FunC is an empty tuple + tok_null, + + tok_identifier, tok_plus, tok_minus, @@ -41,7 +59,6 @@ enum TokenType { tok_div, tok_mod, tok_question, - tok_colon, tok_comma, tok_semicolon, tok_oppar, @@ -54,11 +71,13 @@ enum TokenType { tok_underscore, tok_lt, tok_gt, + tok_logical_not, + tok_logical_and, + tok_logical_or, tok_bitwise_and, tok_bitwise_or, tok_bitwise_xor, tok_bitwise_not, - tok_dot, tok_eq, tok_neq, @@ -71,71 +90,45 @@ enum TokenType { tok_rshiftC, tok_divR, tok_divC, - tok_modR, - tok_modC, - tok_divmod, tok_set_plus, tok_set_minus, tok_set_mul, tok_set_div, - tok_set_divR, - tok_set_divC, tok_set_mod, - tok_set_modR, - tok_set_modC, tok_set_lshift, tok_set_rshift, - tok_set_rshiftR, - tok_set_rshiftC, tok_set_bitwise_and, tok_set_bitwise_or, tok_set_bitwise_xor, tok_return, - tok_var, tok_repeat, tok_do, tok_while, - tok_until, + tok_break, + tok_continue, tok_try, tok_catch, + tok_throw, + tok_assert, tok_if, - tok_ifnot, - tok_then, tok_else, - tok_elseif, - tok_elseifnot, tok_int, tok_cell, + tok_bool, tok_slice, tok_builder, - tok_cont, + tok_continuation, tok_tuple, - tok_mapsto, - tok_forall, + tok_auto, + tok_void, + tok_arrow, - tok_extern, - tok_global, - tok_asm, - tok_impure, - tok_pure, - tok_inline, - tok_inlineref, - tok_builtin, - tok_autoapply, - tok_method_id, - tok_get, - tok_operator, - tok_infix, - tok_infixl, - tok_infixr, - tok_const, - - tok_pragma, - tok_pragma_name, + tok_tolk, tok_semver, - tok_include, + tok_import, + tok_export, tok_eof }; @@ -167,9 +160,6 @@ class Lexer { location.char_offset = static_cast(p_next - p_start); } - GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD - void on_expect_call_failed(const char* str_expected) const; - public: explicit Lexer(const SrcFile* file); @@ -217,16 +207,18 @@ class Lexer { void check(TokenType next_tok, const char* str_expected) const { if (cur_token.type != next_tok) { - on_expect_call_failed(str_expected); // unlikely path, not inlined + unexpected(str_expected); // unlikely path, not inlined } } void expect(TokenType next_tok, const char* str_expected) { if (cur_token.type != next_tok) { - on_expect_call_failed(str_expected); + unexpected(str_expected); } next(); } + GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD + void unexpected(const char* str_expected) const; GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD void error(const std::string& err_msg) const; }; diff --git a/tolk/pipe-ast-to-legacy.cpp b/tolk/pipe-ast-to-legacy.cpp index 6db123746..539652c06 100644 --- a/tolk/pipe-ast-to-legacy.cpp +++ b/tolk/pipe-ast-to-legacy.cpp @@ -34,10 +34,10 @@ namespace tolk { static int calc_sym_idx(std::string_view sym_name) { - return G.symbols.lookup_add(sym_name); + return G.symbols.lookup(sym_name); } -Expr* process_expr(AnyV v, CodeBlob& code, bool nv = false); +Expr* process_expr(AnyV v, CodeBlob& code); static void check_global_func(SrcLocation loc, sym_idx_t func_name) { SymDef* sym_def = lookup_symbol(func_name); @@ -77,42 +77,63 @@ static void check_import_exists_when_using_sym(AnyV v_usage, const SymDef* used_ } } -static Expr* process_expr(V v, CodeBlob& code, bool nv) { +static Expr* create_new_local_variable(SrcLocation loc, std::string_view var_name, TypeExpr* var_type) { + SymDef* sym = lookup_symbol(calc_sym_idx(var_name)); + if (sym) { // creating a new variable, but something found in symtable + if (sym->level != G.scope_level) { + sym = nullptr; // declaring a new variable with the same name, but in another scope + } else { + throw ParseError(loc, "redeclaration of local variable `" + static_cast(var_name) + "`"); + } + } + Expr* x = new Expr{Expr::_Var, loc}; + x->val = ~calc_sym_idx(var_name); + x->e_type = var_type; + x->flags = Expr::_IsLvalue; + return x; +} + +static Expr* create_new_underscore_variable(SrcLocation loc, TypeExpr* var_type) { + Expr* x = new Expr{Expr::_Hole, loc}; + x->val = -1; + x->flags = Expr::_IsLvalue; + x->e_type = var_type; + return x; +} + +static Expr* process_expr(V v, CodeBlob& code) { TokenType t = v->tok; std::string operator_name = static_cast(v->operator_name); - if (t == tok_set_plus || t == tok_set_minus || t == tok_set_mul || t == tok_set_div || t == tok_set_divR || t == tok_set_divC || - t == tok_set_mod || t == tok_set_modC || t == tok_set_modR || t == tok_set_lshift || t == tok_set_rshift || t == tok_set_rshiftC || - t == tok_set_rshiftR || t == tok_set_bitwise_and || t == tok_set_bitwise_or || t == tok_set_bitwise_xor) { - Expr* x = process_expr(v->get_lhs(), code, nv); + if (t == tok_set_plus || t == tok_set_minus || t == tok_set_mul || t == tok_set_div || + t == tok_set_mod || t == tok_set_lshift || t == tok_set_rshift || + t == tok_set_bitwise_and || t == tok_set_bitwise_or || t == tok_set_bitwise_xor) { + Expr* x = process_expr(v->get_lhs(), code); x->chk_lvalue(); x->chk_rvalue(); sym_idx_t name = G.symbols.lookup_add("^_" + operator_name + "_"); - Expr* y = process_expr(v->get_rhs(), code, false); + Expr* y = process_expr(v->get_rhs(), code); y->chk_rvalue(); Expr* z = new Expr{Expr::_Apply, name, {x, y}}; z->here = v->loc; - z->set_val(t); z->flags = Expr::_IsRvalue; z->deduce_type(); Expr* res = new Expr{Expr::_Letop, {x->copy(), z}}; res->here = v->loc; - res->flags = (x->flags & ~Expr::_IsType) | Expr::_IsRvalue; - res->set_val(t); + res->flags = x->flags | Expr::_IsRvalue; res->deduce_type(); return res; } if (t == tok_assign) { - Expr* x = process_expr(v->get_lhs(), code, nv); + Expr* x = process_expr(v->get_lhs(), code); x->chk_lvalue(); - Expr* y = process_expr(v->get_rhs(), code, false); + Expr* y = process_expr(v->get_rhs(), code); y->chk_rvalue(); x->predefine_vars(); x->define_new_vars(code); Expr* res = new Expr{Expr::_Letop, {x, y}}; res->here = v->loc; - res->flags = (x->flags & ~Expr::_IsType) | Expr::_IsRvalue; - res->set_val(t); + res->flags = x->flags | Expr::_IsRvalue; res->deduce_type(); return res; } @@ -120,20 +141,21 @@ static Expr* process_expr(V v, CodeBlob& code, bool nv) { t == tok_bitwise_and || t == tok_bitwise_or || t == tok_bitwise_xor || t == tok_eq || t == tok_lt || t == tok_gt || t == tok_leq || t == tok_geq || t == tok_neq || t == tok_spaceship || t == tok_lshift || t == tok_rshift || t == tok_rshiftC || t == tok_rshiftR || - t == tok_mul || t == tok_div || t == tok_mod || t == tok_divmod || - t == tok_divC || t == tok_divR || t == tok_modC || t == tok_modR) { - Expr* res = process_expr(v->get_lhs(), code, nv); + t == tok_mul || t == tok_div || t == tok_mod || t == tok_divC || t == tok_divR) { + Expr* res = process_expr(v->get_lhs(), code); res->chk_rvalue(); sym_idx_t name = G.symbols.lookup_add("_" + operator_name + "_"); - Expr* x = process_expr(v->get_rhs(), code, false); + Expr* x = process_expr(v->get_rhs(), code); x->chk_rvalue(); res = new Expr{Expr::_Apply, name, {res, x}}; res->here = v->loc; - res->set_val(t); res->flags = Expr::_IsRvalue; res->deduce_type(); return res; } + if (t == tok_logical_and || t == tok_logical_or) { + v->error("logical operators are not supported yet"); + } v->error("unsupported binary operator"); } @@ -141,7 +163,7 @@ static Expr* process_expr(V v, CodeBlob& code, bool nv) { static Expr* process_expr(V v, CodeBlob& code) { TokenType t = v->tok; sym_idx_t name = G.symbols.lookup_add(static_cast(v->operator_name) + "_"); - Expr* x = process_expr(v->get_rhs(), code, false); + Expr* x = process_expr(v->get_rhs(), code); x->chk_rvalue(); // here's an optimization to convert "-1" (tok_minus tok_int_const) to a const -1, not to Expr::Apply(-,1) @@ -151,28 +173,26 @@ static Expr* process_expr(V v, CodeBlob& code) { // `var snd = - 1;` // is Expr::Apply(-), a comment "snd=1" is lost in stack layout comments, and so on // hence, when after grammar modification tok_minus became a true unary operator (not a part of a number), // and thus to preserve existing behavior until compiler parts are completely rewritten, handle this case here - if (x->cls == Expr::_Const) { - if (t == tok_bitwise_not) { - x->intval = ~x->intval; - } else if (t == tok_minus) { - x->intval = -x->intval; - } + if (t == tok_minus && x->cls == Expr::_Const) { + x->intval = -x->intval; if (!x->intval->signed_fits_bits(257)) { v->error("integer overflow"); } return x; } + if (t == tok_plus && x->cls == Expr::_Const) { + return x; + } auto res = new Expr{Expr::_Apply, name, {x}}; res->here = v->loc; - res->set_val(t); res->flags = Expr::_IsRvalue; res->deduce_type(); return res; } -static Expr* process_expr(V v, CodeBlob& code, bool nv) { - Expr* res = process_expr(v->get_lhs(), code, nv); +static Expr* process_expr(V v, CodeBlob& code) { + Expr* res = process_expr(v->get_lhs(), code); bool modify = v->method_name[0] == '~'; Expr* obj = res; if (modify) { @@ -188,7 +208,6 @@ static Expr* process_expr(V v, CodeBlob& code, bool nv) { const SymDef* sym1 = lookup_symbol(name1); if (sym1 && dynamic_cast(sym1->value)) { name_idx = name1; - sym = sym1; } } } @@ -198,7 +217,7 @@ static Expr* process_expr(V v, CodeBlob& code, bool nv) { if (!val) { v->error("undefined method call"); } - Expr* x = process_expr(v->get_arg(), code, false); + Expr* x = process_expr(v->get_arg(), code); x->chk_rvalue(); if (x->cls == Expr::_Tensor) { res = new Expr{Expr::_Apply, name_idx, {obj}}; @@ -210,7 +229,7 @@ static Expr* process_expr(V v, CodeBlob& code, bool nv) { res->flags = Expr::_IsRvalue | (val->is_marked_as_pure() ? 0 : Expr::_IsImpure); res->deduce_type(); if (modify) { - auto tmp = res; + Expr* tmp = res; res = new Expr{Expr::_LetFirst, {obj->copy(), tmp}}; res->here = v->loc; res->flags = tmp->flags; @@ -220,12 +239,12 @@ static Expr* process_expr(V v, CodeBlob& code, bool nv) { return res; } -static Expr* process_expr(V v, CodeBlob& code, bool nv) { - Expr* cond = process_expr(v->get_cond(), code, nv); +static Expr* process_expr(V v, CodeBlob& code) { + Expr* cond = process_expr(v->get_cond(), code); cond->chk_rvalue(); - Expr* x = process_expr(v->get_when_true(), code, false); + Expr* x = process_expr(v->get_when_true(), code); x->chk_rvalue(); - Expr* y = process_expr(v->get_when_false(), code, false); + Expr* y = process_expr(v->get_when_false(), code); y->chk_rvalue(); Expr* res = new Expr{Expr::_CondExpr, {cond, x, y}}; res->here = v->loc; @@ -234,9 +253,14 @@ static Expr* process_expr(V v, CodeBlob& code, bool nv) { return res; } -static Expr* process_expr(V v, CodeBlob& code, bool nv) { - Expr* res = process_expr(v->get_called_f(), code, nv); - Expr* x = process_expr(v->get_called_arg(), code, false); +static Expr* process_expr(V v, CodeBlob& code) { + // special error for "null()" which is a FunC syntax + if (v->get_called_f()->type == ast_null_keyword) { + v->error("null is not a function: use `null`, not `null()`"); + } + + Expr* res = process_expr(v->get_called_f(), code); + Expr* x = process_expr(v->get_called_arg(), code); x->chk_rvalue(); res = make_func_apply(res, x); res->here = v->loc; @@ -244,7 +268,7 @@ static Expr* process_expr(V v, CodeBlob& code, bool nv) { return res; } -static Expr* process_expr(V v, CodeBlob& code, bool nv) { +static Expr* process_expr(V v, CodeBlob& code) { if (v->empty()) { Expr* res = new Expr{Expr::_Tensor, {}}; res->flags = Expr::_IsRvalue; @@ -253,13 +277,13 @@ static Expr* process_expr(V v, CodeBlob& code, bool nv) { return res; } - Expr* res = process_expr(v->get_item(0), code, nv); + Expr* res = process_expr(v->get_item(0), code); std::vector type_list; type_list.push_back(res->e_type); int f = res->flags; res = new Expr{Expr::_Tensor, {res}}; for (int i = 1; i < v->size(); ++i) { - Expr* x = process_expr(v->get_item(i), code, nv); + Expr* x = process_expr(v->get_item(i), code); res->pb_arg(x); f &= x->flags; type_list.push_back(x->e_type); @@ -270,25 +294,7 @@ static Expr* process_expr(V v, CodeBlob& code, bool nv) { return res; } -static Expr* process_expr(V v, CodeBlob& code) { - Expr* x = process_expr(v->get_variable_or_list(), code, true); - x->chk_lvalue(); // chk_lrvalue() ? - Expr* res = new Expr{Expr::_TypeApply, {x}}; - res->e_type = v->declared_type; - res->here = v->loc; - try { - unify(res->e_type, x->e_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "cannot transform expression of type " << x->e_type << " to explicitly requested type " << res->e_type - << ": " << ue; - v->error(os.str()); - } - res->flags = x->flags; - return res; -} - -static Expr* process_expr(V v, CodeBlob& code, bool nv) { +static Expr* process_expr(V v, CodeBlob& code) { if (v->empty()) { Expr* res = new Expr{Expr::_Tensor, {}}; res->flags = Expr::_IsRvalue; @@ -301,13 +307,13 @@ static Expr* process_expr(V v, CodeBlob& code, bool nv) { return res; } - Expr* res = process_expr(v->get_item(0), code, nv); + Expr* res = process_expr(v->get_item(0), code); std::vector type_list; type_list.push_back(res->e_type); int f = res->flags; res = new Expr{Expr::_Tensor, {res}}; for (int i = 1; i < v->size(); ++i) { - Expr* x = process_expr(v->get_item(i), code, nv); + Expr* x = process_expr(v->get_item(i), code); res->pb_arg(x); f &= x->flags; type_list.push_back(x->e_type); @@ -364,7 +370,7 @@ static Expr* process_expr(V v) { unsigned char buff[128]; int bits = (int)td::bitstring::parse_bitstring_hex_literal(buff, sizeof(buff), str.data(), str.data() + str.size()); if (bits < 0) { - v->error("Invalid hex bitstring constant '" + str + "'"); + v->error("invalid hex bitstring constant '" + str + "'"); } break; } @@ -406,32 +412,23 @@ static Expr* process_expr(V v) { } static Expr* process_expr(V v) { - SymDef* sym = lookup_symbol(calc_sym_idx(v->bool_val ? "true" : "false")); - tolk_assert(sym); - Expr* res = new Expr{Expr::_Apply, sym, {}}; + SymDef* builtin_sym = lookup_symbol(calc_sym_idx(v->bool_val ? "__true" : "__false")); + Expr* res = new Expr{Expr::_Apply, builtin_sym, {}}; res->flags = Expr::_IsRvalue; res->deduce_type(); return res; } -static Expr* process_expr([[maybe_unused]] V v) { - SymDef* sym = lookup_symbol(calc_sym_idx("nil")); - tolk_assert(sym); - Expr* res = new Expr{Expr::_Apply, sym, {}}; +static Expr* process_expr([[maybe_unused]] V v) { + SymDef* builtin_sym = lookup_symbol(calc_sym_idx("__null")); + Expr* res = new Expr{Expr::_Apply, builtin_sym, {}}; res->flags = Expr::_IsRvalue; res->deduce_type(); return res; } -static Expr* process_expr(V v, bool nv) { +static Expr* process_identifier(V v) { SymDef* sym = lookup_symbol(calc_sym_idx(v->name)); - if (nv && sym) { - if (sym->level != G.scope_level) { - sym = nullptr; // declaring a new variable with the same name, but in another scope - } else { - v->error("redeclaration of local variable `" + static_cast(v->name) + "`"); - } - } if (sym && dynamic_cast(sym->value)) { check_import_exists_when_using_sym(v, sym); auto val = dynamic_cast(sym->value); @@ -455,7 +452,7 @@ static Expr* process_expr(V v, bool nv) { res->strval = val->get_str_value(); res->e_type = TypeExpr::new_atomic(tok_slice); } else { - v->error("Invalid symbolic constant type"); + v->error("invalid symbolic constant type"); } return res; } @@ -463,86 +460,65 @@ static Expr* process_expr(V v, bool nv) { check_import_exists_when_using_sym(v, sym); } Expr* res = new Expr{Expr::_Var, v->loc}; - if (nv) { - res->val = ~calc_sym_idx(v->name); - res->e_type = TypeExpr::new_hole(); - res->flags = Expr::_IsLvalue; - // std::cerr << "defined new variable " << lex.cur().str << " : " << res->e_type << std::endl; + if (!sym) { + check_global_func(v->loc, calc_sym_idx(v->name)); + sym = lookup_symbol(calc_sym_idx(v->name)); + } + res->sym = sym; + SymVal* val = nullptr; + bool impure = false; + if (sym) { + val = dynamic_cast(sym->value); + } + if (!val) { + v->error("undefined identifier '" + static_cast(v->name) + "'"); + } + if (val->kind == SymValKind::_Func) { + res->e_type = val->get_type(); + res->cls = Expr::_GlobFunc; + impure = !dynamic_cast(val)->is_marked_as_pure(); } else { - if (!sym) { - check_global_func(v->loc, calc_sym_idx(v->name)); - sym = lookup_symbol(calc_sym_idx(v->name)); - } - res->sym = sym; - SymVal* val = nullptr; - bool impure = false; - if (sym) { - val = dynamic_cast(sym->value); - } - if (!val) { - v->error("undefined identifier '" + static_cast(v->name) + "'"); - } - if (val->kind == SymValKind::_Func) { - res->e_type = val->get_type(); - res->cls = Expr::_GlobFunc; - impure = !dynamic_cast(val)->is_marked_as_pure(); - } else { - tolk_assert(val->idx >= 0); - res->val = val->idx; - res->e_type = val->get_type(); - // std::cerr << "accessing variable " << lex.cur().str << " : " << res->e_type << std::endl; - } - // std::cerr << "accessing symbol " << lex.cur().str << " : " << res->e_type << (val->impure ? " (impure)" : " (pure)") << std::endl; - res->flags = Expr::_IsLvalue | Expr::_IsRvalue | (impure ? Expr::_IsImpure : 0); + tolk_assert(val->idx >= 0); + res->val = val->idx; + res->e_type = val->get_type(); + // std::cerr << "accessing variable " << lex.cur().str << " : " << res->e_type << std::endl; } + // std::cerr << "accessing symbol " << lex.cur().str << " : " << res->e_type << (val->impure ? " (impure)" : " (pure)") << std::endl; + res->flags = Expr::_IsLvalue | Expr::_IsRvalue | (impure ? Expr::_IsImpure : 0); res->deduce_type(); return res; } -Expr* process_expr(AnyV v, CodeBlob& code, bool nv) { +Expr* process_expr(AnyV v, CodeBlob& code) { switch (v->type) { case ast_binary_operator: - return process_expr(v->as(), code, nv); + return process_expr(v->as(), code); case ast_unary_operator: return process_expr(v->as(), code); case ast_dot_tilde_call: - return process_expr(v->as(), code, nv); + return process_expr(v->as(), code); case ast_ternary_operator: - return process_expr(v->as(), code, nv); + return process_expr(v->as(), code); case ast_function_call: - return process_expr(v->as(), code, nv); + return process_expr(v->as(), code); case ast_parenthesized_expr: - return process_expr(v->as()->get_expr(), code, nv); - case ast_variable_declaration: - return process_expr(v->as(), code); + return process_expr(v->as()->get_expr(), code); case ast_tensor: - return process_expr(v->as(), code, nv); + return process_expr(v->as(), code); case ast_tensor_square: - return process_expr(v->as(), code, nv); + return process_expr(v->as(), code); case ast_int_const: return process_expr(v->as()); case ast_string_const: return process_expr(v->as()); case ast_bool_const: return process_expr(v->as()); - case ast_nil_tuple: - return process_expr(v->as()); + case ast_null_keyword: + return process_expr(v->as()); case ast_identifier: - return process_expr(v->as(), nv); - - case ast_underscore: { - Expr* res = new Expr{Expr::_Hole, v->loc}; - res->val = -1; - res->flags = Expr::_IsLvalue; - res->e_type = TypeExpr::new_hole(); - return res; - } - case ast_type_expression: { - Expr* res = new Expr{Expr::_Type, v->loc}; - res->flags = Expr::_IsType; - res->e_type = v->as()->declared_type; - return res; - } + return process_identifier(v->as()); + case ast_underscore: + return create_new_underscore_variable(v->loc, TypeExpr::new_hole()); default: throw UnexpectedASTNodeType(v, "process_expr"); } @@ -562,6 +538,70 @@ void combine_parallel(val& x, const val y) { } } // namespace blk_fl +static Expr* process_local_vars_lhs(AnyV v, CodeBlob& code) { + switch (v->type) { + case ast_local_var: { + if (v->as()->marked_as_redef) { + return process_identifier(v->as()->get_identifier()->as()); + } + TypeExpr* declared_type = v->as()->declared_type; + if (auto v_ident = v->as()->get_identifier()->try_as()) { + return create_new_local_variable(v->loc, v_ident->name, declared_type ? declared_type : TypeExpr::new_hole()); + } else { + return create_new_underscore_variable(v->loc, declared_type ? declared_type : TypeExpr::new_hole()); + } + } + case ast_parenthesized_expr: + return process_local_vars_lhs(v->as()->get_expr(), code); + case ast_tensor: { + std::vector type_list; + Expr* res = new Expr{Expr::_Tensor, v->loc}; + for (AnyV item : v->as()->get_items()) { + Expr* x = process_local_vars_lhs(item, code); + res->pb_arg(x); + res->flags |= x->flags; + type_list.push_back(x->e_type); + } + res->e_type = TypeExpr::new_tensor(std::move(type_list)); + return res; + } + case ast_tensor_square: { + std::vector type_list; + Expr* res = new Expr{Expr::_Tensor, v->loc}; + for (AnyV item : v->as()->get_items()) { + Expr* x = process_local_vars_lhs(item, code); + res->pb_arg(x); + res->flags |= x->flags; + type_list.push_back(x->e_type); + } + res->e_type = TypeExpr::new_tensor(std::move(type_list)); + res = new Expr{Expr::_MkTuple, {res}}; + res->flags = res->args.at(0)->flags; + res->here = v->loc; + res->e_type = TypeExpr::new_tuple(res->args.at(0)->e_type); + return res; + } + default: + throw UnexpectedASTNodeType(v, "process_local_vars_lhs"); + } +} + +static blk_fl::val process_vertex(V v, CodeBlob& code) { + Expr* x = process_local_vars_lhs(v->get_lhs(), code); + x->chk_lvalue(); + Expr* y = process_expr(v->get_assigned_val(), code); + y->chk_rvalue(); + x->predefine_vars(); + x->define_new_vars(code); + Expr* res = new Expr{Expr::_Letop, {x, y}}; + res->here = v->loc; + res->flags = x->flags | Expr::_IsRvalue; + res->deduce_type(); + res->chk_rvalue(); + res->pre_compile(code); + return blk_fl::end; +} + static blk_fl::val process_vertex(V v, CodeBlob& code) { Expr* expr = process_expr(v->get_return_value(), code); expr->chk_rvalue(); @@ -593,7 +633,7 @@ static void append_implicit_ret_stmt(V v, CodeBlob& code) { code.emplace_back(v->loc_end, Op::_Return); } -blk_fl::val process_stmt(AnyV v, CodeBlob& code); +blk_fl::val process_statement(AnyV v, CodeBlob& code); static blk_fl::val process_vertex(V v, CodeBlob& code, bool no_new_scope = false) { if (!no_new_scope) { @@ -606,7 +646,7 @@ static blk_fl::val process_vertex(V v, CodeBlob& code, bool no_new item->loc.show_warning("unreachable code"); warned = true; } - blk_fl::combine(res, process_stmt(item, code)); + blk_fl::combine(res, process_statement(item, code)); } if (!no_new_scope) { close_scope(); @@ -660,12 +700,37 @@ static blk_fl::val process_vertex(V v, CodeBlob& code) { return res1 | blk_fl::end; } -static blk_fl::val process_vertex(V v, CodeBlob& code) { +static blk_fl::val process_vertex(V v, CodeBlob& code) { Op& until_op = code.emplace_back(v->loc, Op::_Until); code.push_set_cur(until_op.block0); open_scope(v->loc); blk_fl::val res = process_vertex(v->get_body(), code, true); - Expr* expr = process_expr(v->get_cond(), code); + + // in TVM, there is only "do until", but in Tolk, we want "do while" + // here we negate condition to pass it forward to legacy to Op::_Until + // also, handle common situations as a hardcoded "optimization": replace (a<0) with (a>=0) and so on + // todo these hardcoded conditions should be removed from this place in the future + AnyV cond = v->get_cond(); + AnyV until_cond; + if (auto v_not = cond->try_as(); v_not && v_not->tok == tok_logical_not) { + until_cond = v_not->get_rhs(); + } else if (auto v_eq = cond->try_as(); v_eq && v_eq->tok == tok_eq) { + until_cond = createV(cond->loc, "!=", tok_neq, v_eq->get_lhs(), v_eq->get_rhs()); + } else if (auto v_neq = cond->try_as(); v_neq && v_neq->tok == tok_neq) { + until_cond = createV(cond->loc, "==", tok_eq, v_neq->get_lhs(), v_neq->get_rhs()); + } else if (auto v_leq = cond->try_as(); v_leq && v_leq->tok == tok_leq) { + until_cond = createV(cond->loc, ">", tok_gt, v_leq->get_lhs(), v_leq->get_rhs()); + } else if (auto v_lt = cond->try_as(); v_lt && v_lt->tok == tok_lt) { + until_cond = createV(cond->loc, ">=", tok_geq, v_lt->get_lhs(), v_lt->get_rhs()); + } else if (auto v_geq = cond->try_as(); v_geq && v_geq->tok == tok_geq) { + until_cond = createV(cond->loc, "<", tok_lt, v_geq->get_lhs(), v_geq->get_rhs()); + } else if (auto v_gt = cond->try_as(); v_gt && v_gt->tok == tok_gt) { + until_cond = createV(cond->loc, "<=", tok_geq, v_gt->get_lhs(), v_gt->get_rhs()); + } else { + until_cond = createV(cond->loc, "!", tok_logical_not, cond); + } + + Expr* expr = process_expr(until_cond, code); expr->chk_rvalue(); close_scope(); auto cnt_type = TypeExpr::new_atomic(TypeExpr::_Int); @@ -673,17 +738,65 @@ static blk_fl::val process_vertex(V v, CodeBlob& code) { unify(expr->e_type, cnt_type); } catch (UnifyError& ue) { std::ostringstream os; - os << "`until` condition value of type " << expr->e_type << " is not an integer: " << ue; + os << "`while` condition value of type " << expr->e_type << " is not an integer: " << ue; v->get_cond()->error(os.str()); } until_op.left = expr->pre_compile(code); code.close_pop_cur(v->get_body()->loc_end); if (until_op.left.size() != 1) { - v->get_cond()->error("`until` condition value is not a singleton"); + v->get_cond()->error("`while` condition value is not a singleton"); } return res & ~blk_fl::empty; } +static blk_fl::val process_vertex(V v, CodeBlob& code) { + std::vector args; + SymDef* builtin_sym; + if (v->has_thrown_arg()) { + builtin_sym = lookup_symbol(calc_sym_idx("__throw_arg")); + args.push_back(process_expr(v->get_thrown_arg(), code)); + args.push_back(process_expr(v->get_thrown_code(), code)); + } else { + builtin_sym = lookup_symbol(calc_sym_idx("__throw")); + args.push_back(process_expr(v->get_thrown_code(), code)); + } + + Expr* expr = new Expr{Expr::_Apply, builtin_sym, std::move(args)}; + expr->here = v->loc; + expr->flags = Expr::_IsRvalue | Expr::_IsImpure; + expr->deduce_type(); + expr->pre_compile(code); + return blk_fl::end; +} + +static blk_fl::val process_vertex(V v, CodeBlob& code) { + std::vector args(3); + if (auto v_not = v->get_cond()->try_as(); v_not && v_not->tok == tok_logical_not) { + args[0] = process_expr(v->get_thrown_code(), code); + args[1] = process_expr(v->get_cond()->as()->get_rhs(), code); + args[2] = process_expr(createV(v->loc, true), code); + } else { + args[0] = process_expr(v->get_thrown_code(), code); + args[1] = process_expr(v->get_cond(), code); + args[2] = process_expr(createV(v->loc, false), code); + } + + SymDef* builtin_sym = lookup_symbol(calc_sym_idx("__throw_if_unless")); + Expr* expr = new Expr{Expr::_Apply, builtin_sym, std::move(args)}; + expr->here = v->loc; + expr->flags = Expr::_IsRvalue | Expr::_IsImpure; + expr->deduce_type(); + expr->pre_compile(code); + return blk_fl::end; +} + +static Expr* process_catch_variable(AnyV catch_var, TypeExpr* var_type) { + if (auto v_ident = catch_var->try_as()) { + return create_new_local_variable(catch_var->loc, v_ident->name, var_type); + } + return create_new_underscore_variable(catch_var->loc, var_type); +} + static blk_fl::val process_vertex(V v, CodeBlob& code) { code.require_callxargs = true; Op& try_catch_op = code.emplace_back(v->loc, Op::_TryCatch); @@ -692,20 +805,21 @@ static blk_fl::val process_vertex(V v, CodeBlob& code) code.close_pop_cur(v->get_try_body()->loc_end); code.push_set_cur(try_catch_op.block1); open_scope(v->get_catch_expr()->loc); - Expr* expr = process_expr(v->get_catch_expr(), code, true); - expr->chk_lvalue(); + + // transform catch (excNo, arg) into TVM-catch (arg, excNo), where arg is untyped and thus almost useless now TypeExpr* tvm_error_type = TypeExpr::new_tensor(TypeExpr::new_var(), TypeExpr::new_atomic(TypeExpr::_Int)); - try { - unify(expr->e_type, tvm_error_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "`catch` arguments have incorrect type " << expr->e_type << ": " << ue; - v->get_catch_expr()->error(os.str()); - } - expr->predefine_vars(); - expr->define_new_vars(code); - try_catch_op.left = expr->pre_compile(code); - tolk_assert(try_catch_op.left.size() == 2 || try_catch_op.left.size() == 1); + const std::vector& catch_items = v->get_catch_expr()->get_items(); + tolk_assert(catch_items.size() == 2); + Expr* e_catch = new Expr{Expr::_Tensor, v->get_catch_expr()->loc}; + e_catch->pb_arg(process_catch_variable(catch_items[1], tvm_error_type->args[0])); + e_catch->pb_arg(process_catch_variable(catch_items[0], tvm_error_type->args[1])); + e_catch->flags = Expr::_IsLvalue; + e_catch->e_type = tvm_error_type; + e_catch->predefine_vars(); + e_catch->define_new_vars(code); + try_catch_op.left = e_catch->pre_compile(code); + tolk_assert(try_catch_op.left.size() == 2); + blk_fl::val res1 = process_vertex(v->get_catch_body(), code); close_scope(); code.close_pop_cur(v->get_catch_body()->loc_end); @@ -716,7 +830,7 @@ static blk_fl::val process_vertex(V v, CodeBlob& code) static blk_fl::val process_vertex(V v, CodeBlob& code) { Expr* expr = process_expr(v->get_cond(), code); expr->chk_rvalue(); - auto flag_type = TypeExpr::new_atomic(TypeExpr::_Int); + TypeExpr* flag_type = TypeExpr::new_atomic(TypeExpr::_Int); try { unify(expr->e_type, flag_type); } catch (UnifyError& ue) { @@ -743,8 +857,10 @@ static blk_fl::val process_vertex(V v, CodeBlob& code) { return res1; } -blk_fl::val process_stmt(AnyV v, CodeBlob& code) { +blk_fl::val process_statement(AnyV v, CodeBlob& code) { switch (v->type) { + case ast_local_vars_declaration: + return process_vertex(v->as(), code); case ast_return_statement: return process_vertex(v->as(), code); case ast_sequence: @@ -755,10 +871,14 @@ blk_fl::val process_stmt(AnyV v, CodeBlob& code) { return process_vertex(v->as(), code); case ast_if_statement: return process_vertex(v->as(), code); - case ast_do_until_statement: - return process_vertex(v->as(), code); + case ast_do_while_statement: + return process_vertex(v->as(), code); case ast_while_statement: return process_vertex(v->as(), code); + case ast_throw_statement: + return process_vertex(v->as(), code); + case ast_assert_statement: + return process_vertex(v->as(), code); case ast_try_catch_statement: return process_vertex(v->as(), code); default: { @@ -770,9 +890,9 @@ blk_fl::val process_stmt(AnyV v, CodeBlob& code) { } } -static FormalArg process_vertex(V v, int fa_idx) { +static FormalArg process_vertex(V v, int fa_idx) { if (v->get_identifier()->name.empty()) { - return std::make_tuple(v->arg_type, (SymDef*)nullptr, v->loc); + return std::make_tuple(v->param_type, (SymDef*)nullptr, v->loc); } SymDef* new_sym_def = define_symbol(calc_sym_idx(v->get_identifier()->name), true, v->loc); if (!new_sym_def) { @@ -781,8 +901,8 @@ static FormalArg process_vertex(V v, int fa_idx) { if (new_sym_def->value) { v->error("redefined argument"); } - new_sym_def->value = new SymVal{SymValKind::_Param, fa_idx, v->arg_type}; - return std::make_tuple(v->arg_type, new_sym_def, v->loc); + new_sym_def->value = new SymVal{SymValKind::_Param, fa_idx, v->param_type}; + return std::make_tuple(v->param_type, new_sym_def, v->loc); } static void convert_function_body_to_CodeBlob(V v, V v_body) { @@ -796,8 +916,8 @@ static void convert_function_body_to_CodeBlob(V v, Vflags |= CodeBlob::_ForbidImpure; } FormalArgList legacy_arg_list; - for (int i = 0; i < v->get_num_args(); ++i) { - legacy_arg_list.emplace_back(process_vertex(v->get_arg(i), i)); + for (int i = 0; i < v->get_num_params(); ++i) { + legacy_arg_list.emplace_back(process_vertex(v->get_param(i), i)); } blob->import_params(std::move(legacy_arg_list)); @@ -808,7 +928,7 @@ static void convert_function_body_to_CodeBlob(V v, Vloc.show_warning("unreachable code"); warned = true; } - blk_fl::combine(res, process_stmt(item, *blob)); + blk_fl::combine(res, process_statement(item, *blob)); } if (res & blk_fl::end) { append_implicit_ret_stmt(v_body, *blob); @@ -824,7 +944,7 @@ static void convert_asm_body_to_AsmOp(V v, V(sym_def->value); tolk_assert(sym_val != nullptr); - int cnt = v->get_num_args(); + int cnt = v->get_num_params(); int width = v->ret_type->get_width(); std::vector asm_ops; for (AnyV v_child : v_body->get_asm_commands()) { diff --git a/tolk/pipe-discover-parse-sources.cpp b/tolk/pipe-discover-parse-sources.cpp index f074e075d..c57c9c1d4 100644 --- a/tolk/pipe-discover-parse-sources.cpp +++ b/tolk/pipe-discover-parse-sources.cpp @@ -38,17 +38,18 @@ AllSrcFiles pipeline_discover_and_parse_sources(const std::string& stdlib_filena tolk_assert(!file->ast); file->ast = parse_src_file_to_ast(file); + // file->ast->debug_print(); for (AnyV v_toplevel : file->ast->as()->get_toplevel_declarations()) { - if (auto v_include = v_toplevel->try_as()) { + if (auto v_import = v_toplevel->try_as()) { size_t pos = file->rel_filename.rfind('/'); std::string rel_filename = pos == std::string::npos - ? v_include->get_file_name() - : file->rel_filename.substr(0, pos + 1) + v_include->get_file_name(); + ? v_import->get_file_name() + : file->rel_filename.substr(0, pos + 1) + v_import->get_file_name(); - SrcFile* imported = G.all_src_files.locate_and_register_source_file(rel_filename, v_include->loc); + SrcFile* imported = G.all_src_files.locate_and_register_source_file(rel_filename, v_import->loc); file->imports.push_back(SrcFile::ImportStatement{imported}); - v_include->mutate_set_src_file(imported); + v_import->mutate_set_src_file(imported); } } } diff --git a/tolk/pipe-find-unused-symbols.cpp b/tolk/pipe-find-unused-symbols.cpp index 0badd8538..f83579f4e 100644 --- a/tolk/pipe-find-unused-symbols.cpp +++ b/tolk/pipe-find-unused-symbols.cpp @@ -79,9 +79,7 @@ void pipeline_find_unused_symbols() { for (SymDef* func_sym : G.all_code_functions) { auto* func_val = dynamic_cast(func_sym->value); std::string name = G.symbols.get_name(func_sym->sym_idx); - if (func_val->method_id.not_null() || - name == "main" || name == "recv_internal" || name == "recv_external" || - name == "run_ticktock" || name == "split_prepare" || name == "split_install") { + if (func_val->method_id.not_null() || func_val->is_entrypoint()) { mark_function_used(func_val); } } diff --git a/tolk/pipe-generate-fif-output.cpp b/tolk/pipe-generate-fif-output.cpp index 538dc9baa..627b510f7 100644 --- a/tolk/pipe-generate-fif-output.cpp +++ b/tolk/pipe-generate-fif-output.cpp @@ -32,7 +32,7 @@ namespace tolk { bool SymValCodeFunc::does_need_codegen() const { // when a function is declared, but not referenced from code in any way, don't generate its body - if (!is_really_used && G.pragma_remove_unused_functions.enabled()) { + if (!is_really_used && G.settings.remove_unused_functions) { return false; } // when a function is referenced like `var a = some_fn;` (or in some other non-call way), its continuation should exist @@ -137,6 +137,7 @@ void pipeline_generate_fif_output_to_std_cout() { std::cout << "// automatically generated from " << G.generated_from << std::endl; std::cout << "PROGRAM{\n"; + bool has_main_procedure = false; for (SymDef* func_sym : G.all_code_functions) { SymValCodeFunc* func_val = dynamic_cast(func_sym->value); tolk_assert(func_val); @@ -148,6 +149,10 @@ void pipeline_generate_fif_output_to_std_cout() { } std::string name = G.symbols.get_name(func_sym->sym_idx); + if (func_val->is_entrypoint() && (name == "main" || name == "onInternalMessage")) { + has_main_procedure = true; + } + std::cout << std::string(2, ' '); if (func_val->method_id.is_null()) { std::cout << "DECLPROC " << name << "\n"; @@ -156,10 +161,14 @@ void pipeline_generate_fif_output_to_std_cout() { } } + if (!has_main_procedure) { + throw Fatal("the contract has no entrypoint; forgot `fun onInternalMessage(...)`?"); + } + for (SymDef* gvar_sym : G.all_global_vars) { auto* glob_val = dynamic_cast(gvar_sym->value); tolk_assert(glob_val); - if (!glob_val->is_really_used && G.pragma_remove_unused_functions.enabled()) { + if (!glob_val->is_really_used && G.settings.remove_unused_functions) { if (G.is_verbosity(2)) { std::cerr << gvar_sym->name() << ": variable not generated, it's unused\n"; } diff --git a/tolk/pipe-handle-pragmas.cpp b/tolk/pipe-handle-pragmas.cpp deleted file mode 100644 index 1b0cd7d3a..000000000 --- a/tolk/pipe-handle-pragmas.cpp +++ /dev/null @@ -1,140 +0,0 @@ -/* - This file is part of TON Blockchain source code. - - TON Blockchain is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; either version 2 - of the License, or (at your option) any later version. - - TON Blockchain is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with TON Blockchain. If not, see . - - In addition, as a special exception, the copyright holders give permission - to link the code of portions of this program with the OpenSSL library. - You must obey the GNU General Public License in all respects for all - of the code used other than OpenSSL. If you modify file(s) with this - exception, you may extend this exception to your version of the file(s), - but you are not obligated to do so. If you do not wish to do so, delete this - exception statement from your version. If you delete this exception statement - from all source files in the program, then also delete it here. -*/ -#include "tolk.h" -#include "src-file.h" -#include "ast.h" -#include "compiler-state.h" -#include "td/utils/misc.h" - -namespace tolk { - -static void handle_pragma_no_arg(V v) { - std::string_view pragma_name = v->pragma_name; - if (pragma_name == G.pragma_allow_post_modification.name()) { - G.pragma_allow_post_modification.enable(v->loc); - } else if (pragma_name == G.pragma_compute_asm_ltr.name()) { - G.pragma_compute_asm_ltr.enable(v->loc); - } else if (pragma_name == G.pragma_remove_unused_functions.name()) { - G.pragma_remove_unused_functions.enable(v->loc); - } else { - v->error("unknown pragma name"); - } -} - -static void handle_pragma_version(V v) { - char op = '='; - bool eq = false; - TokenType cmp_tok = v->cmp_tok; - if (cmp_tok == tok_gt || cmp_tok == tok_geq) { - op = '>'; - eq = cmp_tok == tok_geq; - } else if (cmp_tok == tok_lt || cmp_tok == tok_leq) { - op = '<'; - eq = cmp_tok == tok_leq; - } else if (cmp_tok == tok_eq) { - op = '='; - } else if (cmp_tok == tok_bitwise_xor) { - op = '^'; - } else { - v->error("invalid comparison operator"); - } - std::string_view pragma_value = v->semver; - int sem_ver[3] = {0, 0, 0}; - char segs = 1; - auto stoi = [&](std::string_view s) { - auto R = td::to_integer_safe(static_cast(s)); - if (R.is_error()) { - v->error("invalid semver format"); - } - return R.move_as_ok(); - }; - std::istringstream iss_value(static_cast(pragma_value)); - for (int idx = 0; idx < 3; idx++) { - std::string s{"0"}; - std::getline(iss_value, s, '.'); - sem_ver[idx] = stoi(s); - } - // End reading semver from source code - int tolk_ver[3] = {0, 0, 0}; - std::istringstream iss(tolk_version); - for (int idx = 0; idx < 3; idx++) { - std::string s; - std::getline(iss, s, '.'); - tolk_ver[idx] = stoi(s); - } - // End parsing embedded semver - bool match = true; - switch (op) { - case '=': - if ((tolk_ver[0] != sem_ver[0]) || (tolk_ver[1] != sem_ver[1]) || (tolk_ver[2] != sem_ver[2])) { - match = false; - } - break; - case '>': - if (((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] == sem_ver[1]) && (tolk_ver[2] == sem_ver[2]) && !eq) || - ((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] == sem_ver[1]) && (tolk_ver[2] < sem_ver[2])) || - ((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] < sem_ver[1])) || ((tolk_ver[0] < sem_ver[0]))) { - match = false; - } - break; - case '<': - if (((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] == sem_ver[1]) && (tolk_ver[2] == sem_ver[2]) && !eq) || - ((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] == sem_ver[1]) && (tolk_ver[2] > sem_ver[2])) || - ((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] > sem_ver[1])) || ((tolk_ver[0] > sem_ver[0]))) { - match = false; - } - break; - case '^': - if (((segs == 3) && - ((tolk_ver[0] != sem_ver[0]) || (tolk_ver[1] != sem_ver[1]) || (tolk_ver[2] < sem_ver[2]))) || - ((segs == 2) && ((tolk_ver[0] != sem_ver[0]) || (tolk_ver[1] < sem_ver[1]))) || - ((segs == 1) && ((tolk_ver[0] < sem_ver[0])))) { - match = false; - } - break; - default: - tolk_assert(false); - } - if (!match) { - v->error("Tolk version " + tolk_version + " does not satisfy this condition"); - } -} - -void pipeline_handle_pragmas(const AllSrcFiles& all_src_files) { - for (const SrcFile* file : all_src_files) { - tolk_assert(file->ast); - - for (AnyV v : file->ast->as()->get_toplevel_declarations()) { - if (auto v_no_arg = v->try_as()) { - handle_pragma_no_arg(v_no_arg); - } else if (auto v_version = v->try_as()) { - handle_pragma_version(v_version); - } - } - } -} - -} // namespace tolk diff --git a/tolk/pipe-register-symbols.cpp b/tolk/pipe-register-symbols.cpp index 792037a74..c84474f80 100644 --- a/tolk/pipe-register-symbols.cpp +++ b/tolk/pipe-register-symbols.cpp @@ -33,7 +33,7 @@ namespace tolk { -Expr* process_expr(AnyV v, CodeBlob& code, bool nv = false); +Expr* process_expr(AnyV v, CodeBlob& code); GNU_ATTRIBUTE_NORETURN GNU_ATTRIBUTE_COLD static void fire_error_redefinition_of_symbol(V v_ident, SymDef* existing) { @@ -50,13 +50,32 @@ static int calc_sym_idx(std::string_view sym_name) { return G.symbols.lookup_add(sym_name); } +static td::RefInt256 calculate_method_id_for_entrypoint(std::string_view func_name) { + if (func_name == "main" || func_name == "onInternalMessage") { + return td::make_refint(0); + } + if (func_name == "onExternalMessage") { + return td::make_refint(-1); + } + if (func_name == "onRunTickTock") { + return td::make_refint(-2); + } + if (func_name == "onSplitPrepare") { + return td::make_refint(-3); + } + if (func_name == "onSplitInstall") { + return td::make_refint(-4); + } + tolk_assert(false); +} + static td::RefInt256 calculate_method_id_by_func_name(std::string_view func_name) { unsigned int crc = td::crc16(static_cast(func_name)); return td::make_refint((crc & 0xffff) | 0x10000); } -static bool is_argument_of_function(AnyV v_variable, V v_func) { - return v_variable->type == ast_identifier && v_func->get_arg_list()->lookup_idx(v_variable->as()->name) != -1; +static bool is_parameter_of_function(AnyV v_variable, V v_func) { + return v_variable->type == ast_identifier && v_func->get_param_list()->lookup_idx(v_variable->as()->name) != -1; } // if a function looks like `T f(...args) { return anotherF(...args); }`, @@ -70,11 +89,11 @@ static bool is_argument_of_function(AnyV v_variable, V // in the future, when working on AST level, inlining should become much more powerful // (for instance, it should inline `return anotherF(constants)`, etc.) static bool detect_if_function_just_wraps_another(V v) { - if (v->method_id || v->marked_as_get_method || v->marked_as_inline_ref || v->ret_type->has_unknown_inside()) { + if (v->method_id || v->marked_as_get_method || v->marked_as_builtin || v->marked_as_inline_ref || v->is_entrypoint) { return false; } - for (int i = 0; i < v->get_num_args(); ++i) { - if (v->get_arg(i)->arg_type->get_width() != 1) { + for (int i = 0; i < v->get_num_params(); ++i) { + if (v->get_param(i)->param_type->get_width() != 1 || v->get_param(i)->param_type->has_unknown_inside()) { return false; // avoid situations like `f(int a, (int,int) b)`, inlining will be cumbersome } } @@ -82,7 +101,7 @@ static bool detect_if_function_just_wraps_another(V v) auto v_body = v->get_body()->try_as(); if (!v_body || v_body->size() != 1 || v_body->get_item(0)->type != ast_return_statement) { return false; - } + } auto v_return = v_body->get_item(0)->as(); auto v_anotherF = v_return->get_return_value()->try_as(); @@ -90,37 +109,23 @@ static bool detect_if_function_just_wraps_another(V v) return false; } - // todo simplify when removing ability of calling a function without parentheses - AnyV called_arg = v_anotherF->get_called_arg(); - bool ok_arg = called_arg->type == ast_tensor || called_arg->type == ast_parenthesized_expr; - if (!ok_arg || v_anotherF->get_called_f()->type != ast_identifier) { + V called_arg = v_anotherF->get_called_arg(); + if (v_anotherF->get_called_f()->type != ast_identifier) { return false; } std::string_view called_name = v_anotherF->get_called_f()->try_as()->name; std::string_view function_name = v->get_identifier()->name; - if (called_arg->type == ast_tensor) { - const std::vector& v_arg_items = called_arg->as()->get_items(); - std::set used_args; - for (AnyV v_arg : v_arg_items) { - if (!is_argument_of_function(v_arg, v)) { - return false; - } - used_args.emplace(v_arg->as()->name); - } - if (used_args.size() != v->get_num_args() || used_args.size() != v_arg_items.size()) { - return false; - } - } else if (called_arg->type == ast_parenthesized_expr) { - AnyV v_arg = called_arg->as()->get_expr(); - if (!is_argument_of_function(v_arg, v)) { + const std::vector& v_arg_items = called_arg->get_items(); + std::set used_args; + for (AnyV v_arg : v_arg_items) { + if (!is_parameter_of_function(v_arg, v)) { return false; } + used_args.emplace(v_arg->as()->name); } - - if (function_name == "main" || function_name == "recv_internal" || function_name == "recv_external" || - function_name == "run_ticktock" || function_name == "split_prepare" || function_name == "split_install") { + if (static_cast(used_args.size()) != v->get_num_params() || used_args.size() != v_arg_items.size()) { return false; } @@ -131,9 +136,9 @@ static bool detect_if_function_just_wraps_another(V v) return true; } -static void calc_arg_ret_order_of_asm_function(V v_body, V arg_list, TypeExpr* ret_type, +static void calc_arg_ret_order_of_asm_function(V v_body, V param_list, TypeExpr* ret_type, std::vector& arg_order, std::vector& ret_order) { - int cnt = arg_list->size(); + int cnt = param_list->size(); int width = ret_type->get_width(); if (width < 0 || width > 16) { v_body->error("return type of an assembler built-in function must have a well-defined fixed width"); @@ -145,16 +150,16 @@ static void calc_arg_ret_order_of_asm_function(V v_body, V arg = arg_list->get_arg(i); - int arg_width = arg->arg_type->get_width(); + V v_param = param_list->get_param(i); + int arg_width = v_param->param_type->get_width(); if (arg_width < 0 || arg_width > 16) { - arg->error("parameters of an assembler built-in function must have a well-defined fixed width"); + v_param->error("parameters of an assembler built-in function must have a well-defined fixed width"); } cum_arg_width.push_back(tot_width += arg_width); } if (!v_body->arg_order.empty()) { if (static_cast(v_body->arg_order.size()) != cnt) { - v_body->error("arg_order of asm function must specify all arguments"); + v_body->error("arg_order of asm function must specify all parameters"); } std::vector visited(cnt, false); for (int i = 0; i < cnt; ++i) { @@ -197,7 +202,7 @@ static void register_constant(V v) { // and waited to be a single expression // although it works, of course it should be later rewritten using AST calculations, as well as lots of other parts CodeBlob code("tmp", v->loc, nullptr); - Expr* x = process_expr(init_value, code, false); + Expr* x = process_expr(init_value, code); if (!x->is_rvalue()) { v->get_init_value()->error("expression is not strictly Rvalue"); } @@ -266,21 +271,21 @@ static void register_function(V v) { // calculate TypeExpr of a function: it's a map (args -> ret), probably surrounded by forall TypeExpr* func_type = nullptr; - if (int n_args = v->get_num_args()) { + if (int n_args = v->get_num_params()) { std::vector arg_types; arg_types.reserve(n_args); for (int idx = 0; idx < n_args; ++idx) { - arg_types.emplace_back(v->get_arg(idx)->arg_type); + arg_types.emplace_back(v->get_param(idx)->param_type); } func_type = TypeExpr::new_map(TypeExpr::new_tensor(std::move(arg_types)), v->ret_type); } else { func_type = TypeExpr::new_map(TypeExpr::new_unit(), v->ret_type); } - if (v->forall_list) { + if (v->genericsT_list) { std::vector type_vars; - type_vars.reserve(v->forall_list->size()); - for (int idx = 0; idx < v->forall_list->size(); ++idx) { - type_vars.emplace_back(v->forall_list->get_item(idx)->created_type); + type_vars.reserve(v->genericsT_list->size()); + for (int idx = 0; idx < v->genericsT_list->size(); ++idx) { + type_vars.emplace_back(v->genericsT_list->get_item(idx)->created_type); } func_type = TypeExpr::new_forall(std::move(type_vars), func_type); } @@ -315,7 +320,7 @@ static void register_function(V v) { sym_val = new SymValCodeFunc{static_cast(G.all_code_functions.size()), func_type, v->marked_as_pure}; } else if (const auto* v_asm = v->get_body()->try_as()) { std::vector arg_order, ret_order; - calc_arg_ret_order_of_asm_function(v_asm, v->get_arg_list(), v->ret_type, arg_order, ret_order); + calc_arg_ret_order_of_asm_function(v_asm, v->get_param_list(), v->ret_type, arg_order, ret_order); sym_val = new SymValAsmFunc{func_type, std::move(arg_order), std::move(ret_order), v->marked_as_pure}; } else { v->error("Unexpected function body statement"); @@ -333,6 +338,8 @@ static void register_function(V v) { v->error(PSTRING() << "GET methods hash collision: `" << other->name() << "` and `" << static_cast(func_name) << "` produce the same hash. Consider renaming one of these functions."); } } + } else if (v->is_entrypoint) { + sym_val->method_id = calculate_method_id_for_entrypoint(func_name); } if (v->marked_as_inline) { sym_val->flags |= SymValFunc::flagInline; @@ -343,6 +350,9 @@ static void register_function(V v) { if (v->marked_as_get_method) { sym_val->flags |= SymValFunc::flagGetMethod; } + if (v->is_entrypoint) { + sym_val->flags |= SymValFunc::flagIsEntrypoint; + } if (detect_if_function_just_wraps_another(v)) { sym_val->flags |= SymValFunc::flagWrapsAnotherF; } @@ -368,21 +378,17 @@ static void iterate_through_file_symbols(const SrcFile* file) { for (AnyV v : file->ast->as()->get_toplevel_declarations()) { switch (v->type) { - case ast_include_statement: + case ast_import_statement: // on `import "another-file.tolk"`, register symbols from that file at first // (for instance, it can calculate constants, which are used in init_val of constants in current file below import) - iterate_through_file_symbols(v->as()->file); + iterate_through_file_symbols(v->as()->file); break; - case ast_constant_declaration_list: - for (AnyV v_decl : v->as()->get_declarations()) { - register_constant(v_decl->as()); - } + case ast_constant_declaration: + register_constant(v->as()); break; - case ast_global_var_declaration_list: - for (AnyV v_decl : v->as()->get_declarations()) { - register_global_var(v_decl->as()); - } + case ast_global_var_declaration: + register_global_var(v->as()); break; case ast_function_declaration: register_function(v->as()); diff --git a/tolk/pipeline.h b/tolk/pipeline.h index b00816349..1330c97a4 100644 --- a/tolk/pipeline.h +++ b/tolk/pipeline.h @@ -32,7 +32,6 @@ namespace tolk { AllSrcFiles pipeline_discover_and_parse_sources(const std::string& stdlib_filename, const std::string& entrypoint_filename); -void pipeline_handle_pragmas(const AllSrcFiles&); void pipeline_register_global_symbols(const AllSrcFiles&); void pipeline_convert_ast_to_legacy_Expr_Op(const AllSrcFiles&); diff --git a/tolk/src-file.h b/tolk/src-file.h index 9eaf3a67f..28de75680 100644 --- a/tolk/src-file.h +++ b/tolk/src-file.h @@ -52,7 +52,6 @@ struct SrcFile { SrcFile &operator=(const SrcFile&) = delete; bool is_stdlib_file() const { return file_id == 0; /* stdlib always exists, has no imports and parsed the first */ } - bool is_entrypoint_file() const { return file_id == 1; /* after stdlib, the entrypoint file is parsed */ } bool is_offset_valid(int offset) const; SrcPosition convert_offset(int offset) const; diff --git a/tolk/tolk-main.cpp b/tolk/tolk-main.cpp index 38625534d..2dc5a0df6 100644 --- a/tolk/tolk-main.cpp +++ b/tolk/tolk-main.cpp @@ -24,12 +24,14 @@ from all source files in the program, then also delete it here. */ #include "tolk.h" +#include "tolk-version.h" #include "compiler-state.h" #include "td/utils/port/path.h" #include #include #include #include +#include #include "git.h" using namespace tolk; @@ -40,28 +42,54 @@ void usage(const char* progname) { "\tGenerates Fift TVM assembler code from a .tolk file\n" "-o\tWrites generated code into specified .fif file instead of stdout\n" "-b\tGenerate Fift instructions to save TVM bytecode into .boc file\n" - "-s\tSpecify stdlib location (same as env TOLK_STDLIB; if unset, auto-discover)\n" "-O\tSets optimization level (2 by default)\n" + "-x\tEnables experimental options, comma-separated\n" "-S\tDon't include stack layout comments into Fift output\n" "-e\tIncreases verbosity level (extra output into stderr)\n" "-v\tOutput version of Tolk and exit\n"; std::exit(2); } -static std::string auto_discover_stdlib_location() { +static bool stdlib_file_exists(std::filesystem::path& stdlib_tolk) { + struct stat f_stat; + stdlib_tolk = stdlib_tolk.lexically_normal(); + int res = stat(stdlib_tolk.c_str(), &f_stat); + return res == 0 && S_ISREG(f_stat.st_mode); +} + +static std::string auto_discover_stdlib_location(const char* argv0) { + // first, the user can specify env var that points directly to stdlib (useful for non-standard compiler locations) if (const char* env_var = getenv("TOLK_STDLIB")) { return env_var; } - // this define is automatically set if just building this repo locally with cmake -#ifdef STDLIB_TOLK_IF_BUILD_FROM_SOURCES - return STDLIB_TOLK_IF_BUILD_FROM_SOURCES; -#endif - // this define is automatically set when compiling a linux package for distribution - // (since binaries and smartcont/ folder are installed to a predefined path) - // todo provide in cmake -#ifdef STDLIB_TOLK_IF_BUILD_TO_PACKAGE - return STDLIB_TOLK_IF_BUILD_TO_PACKAGE; + + // if the user launches tolk compiler from a package installed (e.g. /usr/bin/tolk), + // locate stdlib in /usr/share/ton/smartcont (this folder exists on package installation) + // (note, that paths are not absolute, they are relative to the launched binary) + // consider https://github.com/ton-blockchain/packages for actual paths + std::filesystem::path executable_dir = std::filesystem::canonical(argv0).remove_filename(); + +#ifdef TD_DARWIN + auto def_location = executable_dir / "../share/ton/ton/smartcont/stdlib.tolk"; +#elif TD_WINDOWS + auto def_location = executable_dir / "smartcont/stdlib.tolk"; +#else // linux + auto def_location = executable_dir / "../share/ton/smartcont/stdlib.tolk"; #endif + + if (stdlib_file_exists(def_location)) { + return def_location; + } + + // so, the binary is not from a system package + // maybe it's just built from sources? e.g. ~/ton/cmake-build-debug/tolk/tolk + // then, check the ~/ton/crypto/smartcont folder + auto near_when_built_from_sources = executable_dir / "../../crypto/smartcont/stdlib.tolk"; + if (stdlib_file_exists(near_when_built_from_sources)) { + return near_when_built_from_sources; + } + + // no idea of where to find stdlib; let's show an error for the user, he should provide env var above return {}; } @@ -120,7 +148,7 @@ class StdCoutRedirectToFile { int main(int argc, char* const argv[]) { int i; - while ((i = getopt(argc, argv, "o:b:s:O:Sevh")) != -1) { + while ((i = getopt(argc, argv, "o:b:O:x:Sevh")) != -1) { switch (i) { case 'o': G.settings.output_filename = optarg; @@ -128,12 +156,12 @@ int main(int argc, char* const argv[]) { case 'b': G.settings.boc_output_filename = optarg; break; - case 's': - G.settings.stdlib_filename = optarg; - break; case 'O': G.settings.optimization_level = std::max(0, atoi(optarg)); break; + case 'x': + G.settings.parse_experimental_options_cmd_arg(optarg); + break; case 'S': G.settings.stack_layout_comments = false; break; @@ -141,9 +169,9 @@ int main(int argc, char* const argv[]) { G.settings.verbosity++; break; case 'v': - std::cout << "Tolk compiler v" << tolk_version << "\n"; - std::cout << "Build commit: " << GitMetadata::CommitSHA1() << "\n"; - std::cout << "Build date: " << GitMetadata::CommitDate() << "\n"; + std::cout << "Tolk compiler v" << TOLK_VERSION << std::endl; + std::cout << "Build commit: " << GitMetadata::CommitSHA1() << std::endl; + std::cout << "Build date: " << GitMetadata::CommitDate() << std::endl; std::exit(0); case 'h': default: @@ -153,14 +181,12 @@ int main(int argc, char* const argv[]) { StdCoutRedirectToFile redirect_cout(G.settings.output_filename); if (redirect_cout.is_failed()) { - std::cerr << "Failed to create output file " << G.settings.output_filename << '\n'; + std::cerr << "Failed to create output file " << G.settings.output_filename << std::endl; return 2; } - // if stdlib wasn't specify as an option — locate it based on env - if (G.settings.stdlib_filename.empty()) { - G.settings.stdlib_filename = auto_discover_stdlib_location(); - } + // locate stdlib.tolk based on env or default system paths + G.settings.stdlib_filename = auto_discover_stdlib_location(argv[0]); if (G.settings.stdlib_filename.empty()) { std::cerr << "Failed to discover stdlib.tolk.\n" "Probably, you have a non-standard Tolk installation.\n" @@ -168,11 +194,11 @@ int main(int argc, char* const argv[]) { return 2; } if (G.is_verbosity(2)) { - std::cerr << "stdlib located at " << G.settings.stdlib_filename << '\n'; + std::cerr << "stdlib located at " << G.settings.stdlib_filename << std::endl; } if (optind != argc - 1) { - std::cerr << "invalid usage: should specify exactly one input file.tolk"; + std::cerr << "invalid usage: should specify exactly one input file.tolk" << std::endl; return 2; } diff --git a/tolk/tolk-version.h b/tolk/tolk-version.h new file mode 100644 index 000000000..6e5b764ca --- /dev/null +++ b/tolk/tolk-version.h @@ -0,0 +1,23 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#pragma once + +namespace tolk { + +constexpr const char* TOLK_VERSION = "0.6.0"; + +} // namespace tolk diff --git a/tolk/tolk-wasm.cpp b/tolk/tolk-wasm.cpp index 81953f798..a093a7f6b 100644 --- a/tolk/tolk-wasm.cpp +++ b/tolk/tolk-wasm.cpp @@ -24,6 +24,7 @@ from all source files in the program, then also delete it here. */ #include "tolk.h" +#include "tolk-version.h" #include "compiler-state.h" #include "git.h" #include "td/utils/JsonBuilder.h" @@ -41,12 +42,16 @@ td::Result compile_internal(char *config_json) { TRY_RESULT(stdlib_tolk, td::get_json_object_string_field(config, "stdlibLocation", false)); TRY_RESULT(stack_comments, td::get_json_object_bool_field(config, "withStackComments", true, false)); TRY_RESULT(entrypoint_filename, td::get_json_object_string_field(config, "entrypointFileName", false)); + TRY_RESULT(experimental_options, td::get_json_object_string_field(config, "experimentalOptions", true)); G.settings.verbosity = 0; G.settings.optimization_level = std::max(0, opt_level); G.settings.stdlib_filename = stdlib_tolk; G.settings.stack_layout_comments = stack_comments; G.settings.entrypoint_filename = entrypoint_filename; + if (!experimental_options.empty()) { + G.settings.parse_experimental_options_cmd_arg(experimental_options.c_str()); + } std::ostringstream outs, errs; std::cout.rdbuf(outs.rdbuf()); @@ -100,7 +105,7 @@ extern "C" { const char* version() { auto version_json = td::JsonBuilder(); auto obj = version_json.enter_object(); - obj("tolkVersion", tolk_version); + obj("tolkVersion", TOLK_VERSION); obj("tolkFiftLibCommitHash", GitMetadata::CommitSHA1()); obj("tolkFiftLibCommitDate", GitMetadata::CommitDate()); obj.leave(); diff --git a/tolk/tolk.cpp b/tolk/tolk.cpp index 0a0cf144d..46eb4dc92 100644 --- a/tolk/tolk.cpp +++ b/tolk/tolk.cpp @@ -32,11 +32,21 @@ namespace tolk { +void on_assertion_failed(const char *description, const char *file_name, int line_number) { + std::string message = static_cast("Assertion failed at ") + file_name + ":" + std::to_string(line_number) + ": " + description; +#ifdef TOLK_DEBUG +#ifdef __arm64__ + // when developing, it's handy when the debugger stops on assertion failure (stacktraces and watches are available) + std::cerr << message << std::endl; + __builtin_debugtrap(); +#endif +#endif + throw Fatal(std::move(message)); +} + int tolk_proceed(const std::string &entrypoint_filename) { define_builtins(); lexer_init(); - G.pragma_allow_post_modification.always_on_and_deprecated("0.5.0"); - G.pragma_compute_asm_ltr.always_on_and_deprecated("0.5.0"); try { if (G.settings.stdlib_filename.empty()) { @@ -48,7 +58,6 @@ int tolk_proceed(const std::string &entrypoint_filename) { AllSrcFiles all_files = pipeline_discover_and_parse_sources(G.settings.stdlib_filename, entrypoint_filename); - pipeline_handle_pragmas(all_files); pipeline_register_global_symbols(all_files); pipeline_convert_ast_to_legacy_Expr_Op(all_files); diff --git a/tolk/tolk.h b/tolk/tolk.h index a0106ffc0..199194355 100644 --- a/tolk/tolk.h +++ b/tolk/tolk.h @@ -16,6 +16,7 @@ */ #pragma once +#include "platform-utils.h" #include "src-file.h" #include "type-expr.h" #include "symtable.h" @@ -26,12 +27,13 @@ #include #include -#define tolk_assert(expr) \ - (bool(expr) ? void(0) \ - : throw Fatal(PSTRING() << "Assertion failed at " << __FILE__ << ":" << __LINE__ << ": " << #expr)) +#define tolk_assert(expr) if(UNLIKELY(!(expr))) on_assertion_failed(#expr, __FILE__, __LINE__); namespace tolk { +GNU_ATTRIBUTE_COLD GNU_ATTRIBUTE_NORETURN +void on_assertion_failed(const char *description, const char *file_name, int line_number); + /* * * TYPE EXPRESSIONS @@ -90,27 +92,23 @@ struct VarDescr { _NonZero = 128, _Pos = 256, _Neg = 512, - _Bool = 1024, - _Bit = 2048, _Finite = 4096, _Nan = 8192, _Even = 16384, _Odd = 32768, - _Null = (1 << 16), - _NotNull = (1 << 17) }; - static constexpr int ConstZero = _Int | _Zero | _Pos | _Neg | _Bool | _Bit | _Finite | _Even | _NotNull; - static constexpr int ConstOne = _Int | _NonZero | _Pos | _Bit | _Finite | _Odd | _NotNull; - static constexpr int ConstTrue = _Int | _NonZero | _Neg | _Bool | _Finite | _Odd | _NotNull; - static constexpr int ValBit = ConstZero & ConstOne; - static constexpr int ValBool = ConstZero & ConstTrue; - static constexpr int FiniteInt = _Int | _Finite | _NotNull; - static constexpr int FiniteUInt = FiniteInt | _Pos; + static constexpr int ConstZero = _Const | _Int | _Zero | _Pos | _Neg | _Finite | _Even; + static constexpr int ConstOne = _Const | _Int | _NonZero | _Pos | _Finite | _Odd; + static constexpr int ConstTrue = _Const | _Int | _NonZero | _Neg | _Finite | _Odd; + static constexpr int ValBit = _Int | _Pos | _Finite; + static constexpr int ValBool = _Int | _Neg | _Finite; + static constexpr int FiniteInt = _Int | _Finite; + static constexpr int FiniteUInt = _Int | _Finite | _Pos; int val; td::RefInt256 int_const; std::string str_const; - VarDescr(var_idx_t _idx = -1, int _flags = 0, int _val = 0) : idx(_idx), flags(_flags), val(_val) { + explicit VarDescr(var_idx_t _idx = -1, int _flags = 0, int _val = 0) : idx(_idx), flags(_flags), val(_val) { } bool operator<(var_idx_t other_idx) const { return idx < other_idx; @@ -139,15 +137,6 @@ struct VarDescr { bool always_odd() const { return val & _Odd; } - bool always_null() const { - return val & _Null; - } - bool always_not_null() const { - return val & _NotNull; - } - bool is_const() const { - return val & _Const; - } bool is_int_const() const { return (val & (_Int | _Const)) == (_Int | _Const) && int_const.not_null(); } @@ -260,7 +249,7 @@ class ListIterator { public: ListIterator() : ptr(nullptr) { } - ListIterator(T* _ptr) : ptr(_ptr) { + explicit ListIterator(T* _ptr) : ptr(_ptr) { } ListIterator& operator++() { ptr = ptr->next.get(); @@ -383,18 +372,6 @@ struct Op { const Op& last() const { return next ? next->last() : *this; } - ListIterator begin() { - return ListIterator{this}; - } - ListIterator end() const { - return ListIterator{}; - } - ListIterator cbegin() { - return ListIterator{this}; - } - ListIterator cend() const { - return ListIterator{}; - } }; inline ListIterator begin(const std::unique_ptr& op_list) { @@ -405,14 +382,6 @@ inline ListIterator end(const std::unique_ptr& op_list) { return ListIterator{}; } -inline ListIterator cbegin(const Op* op_list) { - return ListIterator{op_list}; -} - -inline ListIterator cend(const Op* op_list) { - return ListIterator{}; -} - inline ListIterator begin(const Op* op_list) { return ListIterator{op_list}; } @@ -421,78 +390,11 @@ inline ListIterator end(const Op* op_list) { return ListIterator{}; } -inline ListIterator begin(Op* op_list) { - return ListIterator{op_list}; -} - -inline ListIterator end(Op* op_list) { - return ListIterator{}; -} - typedef std::tuple FormalArg; typedef std::vector FormalArgList; struct AsmOpList; -struct CodeBlob { - enum { _ForbidImpure = 4 }; - int var_cnt, in_var_cnt, op_cnt; - TypeExpr* ret_type; - std::string name; - SrcLocation loc; - std::vector vars; - std::unique_ptr ops; - std::unique_ptr* cur_ops; - std::stack*> cur_ops_stack; - int flags = 0; - bool require_callxargs = false; - CodeBlob(std::string name, SrcLocation loc, TypeExpr* ret) - : var_cnt(0), in_var_cnt(0), op_cnt(0), ret_type(ret), name(std::move(name)), loc(loc), cur_ops(&ops) { - } - template - Op& emplace_back(Args&&... args) { - Op& res = *(*cur_ops = std::make_unique(args...)); - cur_ops = &(res.next); - return res; - } - bool import_params(FormalArgList arg_list); - var_idx_t create_var(bool is_tmp_unnamed, TypeExpr* var_type, SymDef* sym, SrcLocation loc); - var_idx_t create_tmp_var(TypeExpr* var_type, SrcLocation loc) { - return create_var(true, var_type, nullptr, loc); - } - int split_vars(bool strict = false); - bool compute_used_code_vars(); - bool compute_used_code_vars(std::unique_ptr& ops, const VarDescrList& var_info, bool edit) const; - void print(std::ostream& os, int flags = 0) const; - void push_set_cur(std::unique_ptr& new_cur_ops) { - cur_ops_stack.push(cur_ops); - cur_ops = &new_cur_ops; - } - void close_blk(SrcLocation location) { - *cur_ops = std::make_unique(location, Op::_Nop); - } - void pop_cur() { - cur_ops = cur_ops_stack.top(); - cur_ops_stack.pop(); - } - void close_pop_cur(SrcLocation location) { - close_blk(location); - pop_cur(); - } - void simplify_var_types(); - void prune_unreachable_code(); - void fwd_analyze(); - void mark_noreturn(); - void generate_code(AsmOpList& out_list, int mode = 0); - void generate_code(std::ostream& os, int mode = 0, int indent = 0); - - void on_var_modification(var_idx_t idx, SrcLocation here) const { - for (auto& f : vars.at(idx).on_modification) { - f(here); - } - } -}; - /* * * SYMBOL VALUES @@ -512,13 +414,14 @@ struct SymVal : SymValBase { struct SymValFunc : SymVal { enum SymValFlag { - flagInline = 1, // function marked `inline` - flagInlineRef = 2, // function marked `inline_ref` - flagWrapsAnotherF = 4, // (T) thisF(...args) { return anotherF(...args); } (calls to thisF will be replaced) + flagInline = 1, // marked `@inline` + flagInlineRef = 2, // marked `@inline_ref` + flagWrapsAnotherF = 4, // `fun thisF(...args) { return anotherF(...args); }` (calls to thisF will be inlined) flagUsedAsNonCall = 8, // used not only as `f()`, but as a 1-st class function (assigned to var, pushed to tuple, etc.) flagMarkedAsPure = 16, // declared as `pure`, can't call impure and access globals, unused invocations are optimized out flagBuiltinFunction = 32, // was created via `define_builtin_func()`, not from source code - flagGetMethod = 64, // was declared via `get T func()`, method_id is auto-assigned + flagGetMethod = 64, // was declared via `get func(): T`, method_id is auto-assigned + flagIsEntrypoint = 128, // it's `main` / `onExternalMessage` / etc. }; td::RefInt256 method_id; // todo why int256? it's small @@ -559,6 +462,9 @@ struct SymValFunc : SymVal { bool is_get_method() const { return flags & flagGetMethod; } + bool is_entrypoint() const { + return flags & flagIsEntrypoint; + } }; struct SymValCodeFunc : SymValFunc { @@ -626,7 +532,6 @@ struct Expr { _None, _Apply, _VarApply, - _TypeApply, _MkTuple, _Tensor, _Const, @@ -636,13 +541,12 @@ struct Expr { _Letop, _LetFirst, _Hole, - _Type, _CondExpr, _SliceConst, }; ExprCls cls; int val{0}; - enum { _IsType = 1, _IsRvalue = 2, _IsLvalue = 4, _IsImpure = 32 }; + enum { _IsRvalue = 2, _IsLvalue = 4, _IsImpure = 32 }; int flags{0}; SrcLocation here; td::RefInt256 intval; @@ -681,12 +585,6 @@ struct Expr { bool is_lvalue() const { return flags & _IsLvalue; } - bool is_type() const { - return flags & _IsType; - } - bool is_type_apply() const { - return cls == _TypeApply; - } bool is_mktuple() const { return cls == _MkTuple; } @@ -1449,6 +1347,65 @@ struct SymValAsmFunc : SymValFunc { bool compile(AsmOpList& dest, std::vector& out, std::vector& in, SrcLocation where) const; }; +struct CodeBlob { + enum { _ForbidImpure = 4 }; + int var_cnt, in_var_cnt, op_cnt; + TypeExpr* ret_type; + std::string name; + SrcLocation loc; + std::vector vars; + std::unique_ptr ops; + std::unique_ptr* cur_ops; + std::stack*> cur_ops_stack; + int flags = 0; + bool require_callxargs = false; + CodeBlob(std::string name, SrcLocation loc, TypeExpr* ret) + : var_cnt(0), in_var_cnt(0), op_cnt(0), ret_type(ret), name(std::move(name)), loc(loc), cur_ops(&ops) { + } + template + Op& emplace_back(Args&&... args) { + Op& res = *(*cur_ops = std::make_unique(args...)); + cur_ops = &(res.next); + return res; + } + bool import_params(FormalArgList arg_list); + var_idx_t create_var(bool is_tmp_unnamed, TypeExpr* var_type, SymDef* sym, SrcLocation loc); + var_idx_t create_tmp_var(TypeExpr* var_type, SrcLocation loc) { + return create_var(true, var_type, nullptr, loc); + } + int split_vars(bool strict = false); + bool compute_used_code_vars(); + bool compute_used_code_vars(std::unique_ptr& ops, const VarDescrList& var_info, bool edit) const; + void print(std::ostream& os, int flags = 0) const; + void push_set_cur(std::unique_ptr& new_cur_ops) { + cur_ops_stack.push(cur_ops); + cur_ops = &new_cur_ops; + } + void close_blk(SrcLocation location) { + *cur_ops = std::make_unique(location, Op::_Nop); + } + void pop_cur() { + cur_ops = cur_ops_stack.top(); + cur_ops_stack.pop(); + } + void close_pop_cur(SrcLocation location) { + close_blk(location); + pop_cur(); + } + void simplify_var_types(); + void prune_unreachable_code(); + void fwd_analyze(); + void mark_noreturn(); + void generate_code(AsmOpList& out_list, int mode = 0); + void generate_code(std::ostream& os, int mode = 0, int indent = 0); + + void on_var_modification(var_idx_t idx, SrcLocation here) const { + for (auto& f : vars.at(idx).on_modification) { + f(here); + } + } +}; + // defined in builtins.cpp AsmOp exec_arg_op(std::string op, long long arg); AsmOp exec_arg_op(std::string op, long long arg, int args, int retv = 1); diff --git a/tolk/type-expr.h b/tolk/type-expr.h index 4893df35a..0e2a870f9 100644 --- a/tolk/type-expr.h +++ b/tolk/type-expr.h @@ -14,7 +14,7 @@ struct TypeExpr { _Cell = tok_cell, _Slice = tok_slice, _Builder = tok_builder, - _Cont = tok_cont, + _Cont = tok_continuation, _Tuple = tok_tuple, }; Kind constr; From 12ff28ac94d6c1e5d2baafeb5a35d25c4efd48a8 Mon Sep 17 00:00:00 2001 From: tolk-vm Date: Thu, 31 Oct 2024 11:16:19 +0400 Subject: [PATCH 09/12] [Tolk] Completely rework stdlib: multiple files and renaming - split stdlib.tolk into multiple files (tolk-stdlib/ folder) (the "core" common.tolk is auto-imported, the rest are needed to be explicitly imported like "@stdlib/tvm-dicts.tolk") - all functions were renamed to long and clear names - new naming is camelCase --- crypto/smartcont/mathlib.tolk | 1002 --------------- crypto/smartcont/stdlib.tolk | 1108 ----------------- crypto/smartcont/tolk-stdlib/common.tolk | 766 ++++++++++++ .../smartcont/tolk-stdlib/gas-payments.tolk | 63 + crypto/smartcont/tolk-stdlib/lisp-lists.tolk | 38 + crypto/smartcont/tolk-stdlib/tvm-dicts.tolk | 447 +++++++ .../smartcont/tolk-stdlib/tvm-lowlevel.tolk | 29 + tolk-tester/tests/a10.tolk | 45 + tolk-tester/tests/a6.tolk | 2 +- tolk-tester/tests/asm_arg_order.tolk | 10 +- tolk-tester/tests/camel1.tolk | 41 +- tolk-tester/tests/camel2.tolk | 32 +- tolk-tester/tests/camel3.tolk | 20 +- tolk-tester/tests/camel4.tolk | 20 +- tolk-tester/tests/cells-slices.tolk | 161 ++- tolk-tester/tests/co1.tolk | 8 +- tolk-tester/tests/dicts-demo.tolk | 106 ++ tolk-tester/tests/imports/use-dicts-err.tolk | 21 + tolk-tester/tests/imports/use-dicts.tolk | 23 + tolk-tester/tests/invalid-import.tolk | 4 +- ...o-import.tolk => invalid-no-import-1.tolk} | 0 tolk-tester/tests/invalid-no-import-2.tolk | 9 + tolk-tester/tests/invalid-pure-2.tolk | 2 +- tolk-tester/tests/invalid-pure-3.tolk | 4 +- tolk-tester/tests/invalid-redefinition-1.tolk | 4 +- tolk-tester/tests/invalid-symbol-1.tolk | 4 +- tolk-tester/tests/logical-operators.tolk | 25 +- tolk-tester/tests/no-spaces.tolk | 10 +- tolk-tester/tests/null-keyword.tolk | 35 +- tolk-tester/tests/pure-functions.tolk | 12 +- tolk-tester/tests/s1.tolk | 8 +- tolk-tester/tests/test-math.tolk | 1010 ++++++++++++++- tolk-tester/tests/use-before-declare.tolk | 4 +- tolk-tester/tests/w2.tolk | 14 +- tolk-tester/tolk-tester.js | 20 +- tolk/CMakeLists.txt | 6 - tolk/builtins.cpp | 47 +- tolk/compiler-state.h | 5 +- tolk/pipe-ast-to-legacy.cpp | 6 - tolk/pipe-discover-parse-sources.cpp | 9 +- tolk/pipe-generate-fif-output.cpp | 15 +- tolk/pipe-register-symbols.cpp | 2 +- tolk/pipeline.h | 2 +- tolk/src-file.cpp | 5 + tolk/src-file.h | 2 +- tolk/tolk-main.cpp | 146 ++- tolk/tolk-wasm.cpp | 65 +- tolk/tolk.cpp | 13 +- 48 files changed, 2969 insertions(+), 2461 deletions(-) delete mode 100644 crypto/smartcont/mathlib.tolk delete mode 100644 crypto/smartcont/stdlib.tolk create mode 100644 crypto/smartcont/tolk-stdlib/common.tolk create mode 100644 crypto/smartcont/tolk-stdlib/gas-payments.tolk create mode 100644 crypto/smartcont/tolk-stdlib/lisp-lists.tolk create mode 100644 crypto/smartcont/tolk-stdlib/tvm-dicts.tolk create mode 100644 crypto/smartcont/tolk-stdlib/tvm-lowlevel.tolk create mode 100644 tolk-tester/tests/dicts-demo.tolk create mode 100644 tolk-tester/tests/imports/use-dicts-err.tolk create mode 100644 tolk-tester/tests/imports/use-dicts.tolk rename tolk-tester/tests/{invalid-no-import.tolk => invalid-no-import-1.tolk} (100%) create mode 100644 tolk-tester/tests/invalid-no-import-2.tolk diff --git a/crypto/smartcont/mathlib.tolk b/crypto/smartcont/mathlib.tolk deleted file mode 100644 index 1f7510a6b..000000000 --- a/crypto/smartcont/mathlib.tolk +++ /dev/null @@ -1,1002 +0,0 @@ -/* - - - - Tolk fixed-point mathematical library - - (initially copied from mathlib.fc) - - - */ -tolk 0.6 - -/* - This file is part of TON Tolk Standard Library. - - Tolk Standard Library is free software: you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation, either version 2 of the License, or - (at your option) any later version. - - Tolk Standard Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Lesser General Public License for more details. - -*/ - -/*--------------- MISSING OPERATIONS AND BUILT-INS ----------------*/ - -@pure -fun sgn(x: int): int - asm "SGN"; - -/// compute floor(log2(x))+1 -@pure -fun log2_floor_p1(x: int): int - asm "UBITSIZE"; - -@pure -fun mulrshiftr(x: int, y: int, s: int): int - asm "MULRSHIFTR"; - -@pure -fun mulrshiftr256(x: int, y: int): int - asm "256 MULRSHIFTR#"; - -@pure -fun mulrshift256mod(x: int, y: int): (int, int) - asm "256 MULRSHIFT#MOD"; - -@pure -fun mulrshiftr256mod(x: int, y: int): (int, int) - asm "256 MULRSHIFTR#MOD"; - -@pure -fun mulrshiftr255mod(x: int, y: int): (int, int) - asm "255 MULRSHIFTR#MOD"; - -@pure -fun mulrshiftr248mod(x: int, y: int): (int, int) - asm "248 MULRSHIFTR#MOD"; - -@pure -fun mulrshiftr5mod(x: int, y: int): (int, int) - asm "5 MULRSHIFTR#MOD"; - -@pure -fun mulrshiftr6mod(x: int, y: int): (int, int) - asm "6 MULRSHIFTR#MOD"; - -@pure -fun mulrshiftr7mod(x: int, y: int): (int, int) - asm "7 MULRSHIFTR#MOD"; - -@pure -fun lshift256divr(x: int, y: int): int - asm "256 LSHIFT#DIVR"; - -@pure -fun lshift256divmodr(x: int, y: int): (int, int) - asm "256 LSHIFT#DIVMODR"; - -@pure -fun lshift255divmodr(x: int, y: int): (int, int) - asm "255 LSHIFT#DIVMODR"; - -@pure -fun lshift2divmodr(x: int, y: int): (int, int) - asm "2 LSHIFT#DIVMODR"; - -@pure -fun lshift7divmodr(x: int, y: int): (int, int) - asm "7 LSHIFT#DIVMODR"; - -@pure -fun lshiftdivmodr(x: int, y: int, s: int): (int, int) - asm "LSHIFTDIVMODR"; - -@pure -fun rshiftr256mod(x: int): (int, int) - asm "256 RSHIFTR#MOD"; - -@pure -fun rshiftr248mod(x: int): (int, int) - asm "248 RSHIFTR#MOD"; - -@pure -fun rshiftr4mod(x: int): (int, int) - asm "4 RSHIFTR#MOD"; - -@pure -fun rshift3mod(x: int): (int, int) - asm "3 RSHIFT#MOD"; - -/// computes y - x (Tolk compiler does not try to use this by itself) -@pure -fun sub_rev(x: int, y: int): int - asm "SUBR"; - -@pure -fun nan(): int - asm "PUSHNAN"; - -@pure -fun is_nan(x: int): int - asm "ISNAN"; - -/*----------------------- SQUARE ROOTS ---------------------------*/ - -/// computes sqrt(a*b) exactly rounded to the nearest integer -/// for all 0 <= a, b <= 2^256-1 -/// may be used with b=1 or b=scale of fixed-point numbers -@pure -@inline_ref -fun geom_mean(a: int, b: int): int { - if (!min(a, b)) { - return 0; - } - var s: int = log2_floor_p1(a); // throws out of range error if a < 0 or b < 0 - var t: int = log2_floor_p1(b); - // NB: (a-b)/2+b == (a+b)/2, but without overflow for large a and b - var x: int = (s == t ? (a - b) / 2 + b : 1 << ((s + t) / 2)); - do { - // if always used with b=2^const, may be optimized to "const LSHIFTDIVC#" - // it is important to use `muldivc` here, not `muldiv` or `muldivr` - var q: int = (muldivc(a, b, x) - x) / 2; - x += q; - } while (q); - return x; -} - -/// integer square root, computes round(sqrt(a)) for all a>=0. -/// note: `inline` is better than `inline_ref` for such simple functions -@pure -@inline -fun sqrt(a: int): int { - return geom_mean(a, 1); -} - -/// version for fixed248 = fixed-point numbers with scale 2^248 -/// fixed248 sqrt(fixed248 x) -@pure -@inline -fun fixed248_sqrt(x: int): int { - return geom_mean(x, 1 << 248); -} - -/// fixed255 sqrt(fixed255 x) -@pure -@inline -fun fixed255_sqrt(x: int): int { - return geom_mean(x, 1 << 255); -} - -/// fixed248 sqr(fixed248 x); -@pure -@inline -fun fixed248_sqr(x: int): int { - return muldivr(x, x, 1 << 248); -} - -/// fixed255 sqr(fixed255 x); -@pure -@inline -fun fixed255_sqr(x: int): int { - return muldivr(x, x, 1 << 255); -} - -const fixed248_One: int = (1 << 248); -const fixed255_One: int = (1 << 255); - -/*------------------- USEFUL CONSTANTS -------------------*/ - -/// store huge constants in inline_ref functions for reuse -/// (y,z) where y=round(log(2)*2^256), z=round((log(2)*2^256-y)*2^128) -/// then log(2) = y/2^256 + z/2^384 -@pure -@inline_ref -fun log2_xconst_f256(): (int, int) { - return (80260960185991308862233904206310070533990667611589946606122867505419956976172, -32272921378999278490133606779486332143); -} - -/// (y,z) where Pi = y/2^254 + z/2^382 -@pure -@inline_ref -fun Pi_xconst_f254(): (int, int) { - return (90942894222941581070058735694432465663348344332098107489693037779484723616546, 108051869516004014909778934258921521947); -} - -/// atan(1/16) as fixed260 -@pure -@inline_ref -fun Atan1_16_f260(): int { - return 115641670674223639132965820642403718536242645001775371762318060545014644837101; // true value is ...101.0089... -} - -/// atan(1/8) as fixed259 -@pure -@inline_ref -fun Atan1_8_f259(): int { - return 115194597005316551477397594802136977648153890007566736408151129975021336532841; // correction -0.1687... -} - -/// atan(1/32) as fixed261 -@pure -@inline_ref -fun Atan1_32_f261(): int { - return 115754418570128574501879331591757054405465733718902755858991306434399246026247; // correction 0.395... -} - -/// inline is better than inline_ref for such very small functions -@pure -@inline -fun log2_const_f256(): int { - var (c: int, _) = log2_xconst_f256(); - return c; -} - -@pure -@inline -fun fixed248_log2_const(): int { - return log2_const_f256() ~>> 8; -} - -@pure -@inline -fun Pi_const_f254(): int { - var (c: auto, _) = Pi_xconst_f254(); - return c; -} - -@pure -@inline -fun fixed248_Pi_const(): int { - return Pi_const_f254() ~>> 6; -} - -/*-------------- HYPERBOLIC TANGENT AND EXPONENT ------------------*/ - -/// hyperbolic tangent of small x via n+2 terms of Lambert's continued fraction -/// n=17: good for |x| < log(2)/4 = 0.173 -/// fixed258 tanh_f258(fixed258 x, int n) -@pure -@inline_ref -fun tanh_f258(x: int, n: int): int { - var x2: int = muldivr(x, x, 1 << 255); // x^2 as fixed261 - var a: int = (2 * n + 5) << 250; // a=2n+5 as fixed250 - var c = a; - var Two: int = (1 << 251); // 2. as fixed250 - repeat (n) { - a = (c -= Two) + muldivr(x2, 1 << 239, a); // a := 2k+1+x^2/a as fixed250, k=n+1,n,...,2 - } - a = (touch(3) << 254) + muldivr(x2, 1 << 243, a); // a := 3+x^2/a as fixed254 - // y = x/(1+a') = x - x*a'/(1+a') = x - x*x^2/(a+x^2) where a' = x^2/a - return x - (muldivr(x, x2, a + (x2 ~>> 7)) ~>> 7); -} - -/// fixed257 expm1_f257(fixed257 x) -/// computes exp(x)-1 for small x via 19 terms of Lambert's continued fraction for tanh(x/2) -/// good for |x| < log(2)/2 = 0.347 (n=17); consumes ~3500 gas -@pure -@inline_ref -fun expm1_f257(x: int): int { - // (almost) compute tanh(x/2) first; x/2 as fixed258 = x as fixed257 - var x2: int = muldivr(x, x, 1 << 255); // x^2 as fixed261 - var Two: int = (1 << 251); // 2. as fixed250 - var a: int = touch(39) << 250; // a=2n+5 as fixed250 - var c = a; - repeat (17) { - a = (c -= Two) + muldivr(x2, 1 << 239, a); // a := 2k+1+x^2/a as fixed250, k=n+1,n,...,2 - } - a = (touch(3) << 254) + muldivr(x2, 1 << 243, a); // a := 3+x^2/a as fixed254 - // now tanh(x/2) = x/(1+a') where a'=x^2/a ; apply exp(x)-1=2*tanh(x/2)/(1-tanh(x/2)) - var t: int = (x ~>> 4) - a; // t:=x-a as fixed254 - return x - muldivr(x2, t / 2, a + mulrshiftr256(x, t) ~/ 4) ~/ 4; // x - x^2 * (x-a) / (a + x*(x-a)) -} - -/// expm1_f257() may be used to implement specific fixed-point exponentials -/// example: -/// fixed248 exp(fixed248 x) -@pure -@inline_ref -fun fixed248_exp(x: int): int { - var (l2c, l2d) = log2_xconst_f256(); - // divide x by log(2) and convert to fixed257 - // (int q, x) = muldivmodr(x, 256, l2c); // unfortunately, no such built-in - var (q: int, x redef) = lshiftdivmodr(x, l2c, 8); - x = 2 * x - muldivr(q, l2d, 1 << 127); - var y: int = expm1_f257(x); - // result is (1 + y) * (2^q) --> ((1 << 257) + y) >> (9 - q) - return (y ~>> (9 - q)) - (-1 << (248 + q)); - // note that (y ~>> (9 - q)) + (1 << (248 + q)) leads to overflow when q=8 -} - -/// compute 2^x in fixed248 -/// fixed248 exp2(fixed248 x) -@pure -@inline_ref -fun fixed248_exp2(x: int): int { - // (int q, x) = divmodr(x, 1 << 248); // no such built-in - var (q: int, x redef) = rshiftr248mod(x); - x = muldivr(x, log2_const_f256(), 1 << 247); - var y: int = expm1_f257(x); - return (y ~>> (9 - q)) - (-1 << (248 + q)); -} - -/*-------------------- TRIGONOMETRIC FUNCTIONS ----------------------*/ - -/// fixed260 tan(fixed260 x); -/// computes tan(x) for small |x|> 10)) ~>> 9); -} - -/// fixed260 tan(fixed260 x); -@pure -@inline_ref -fun tan_f260(x: int): int { - return tan_f260_inlined(x); -} - -/// fixed258 tan(fixed258 x); -/// computes tan(x) for small |x|> 6)) ~>> 5); -} - -/// fixed258 tan(fixed258 x); -@pure -@inline_ref -fun tan_f258(x: int): int { - return tan_f258_inlined(x); -} - -/// (fixed259, fixed263) sincosm1(fixed259 x) -/// computes (sin(x), 1-cos(x)) for small |x|<2*atan(1/16) -@pure -@inline -fun sincosm1_f259_inlined(x: int): (int, int) { - var t: int = tan_f260_inlined(x); // t=tan(x/2) as fixed260 - var tt: int = mulrshiftr256(t, t); // t^2 as fixed264 - var y: int = tt ~/ 512 + (1 << 255); // 1+t^2 as fixed255 - // 2*t/(1+t^2) as fixed259 and 2*t^2/(1+t^2) as fixed263 - // return (muldivr(t, 1 << 255, y), muldivr(tt, 1 << 255, y)); - return (t - muldivr(t / 2, tt, y) ~/ 256, tt - muldivr(tt / 2, tt, y) ~/ 256); -} - -@pure -@inline_ref -fun sincosm1_f259(x: int): (int, int) { - return sincosm1_f259_inlined(x); -} - -/// computes (sin(x+xe),-cos(x+xe)) for |x| <= Pi/4, xe very small -/// this function is very accurate, error less than 0.7 ulp (consumes ~ 5500 gas) -/// (fixed256, fixed256) sincosn(fixed256 x, fixed259 xe) -@pure -@inline_ref -fun sincosn_f256(x: int, xe: int): (int, int) { - // var (q, x1) = muldivmodr(x, 8, Atan1_8_f259()); // no muldivmodr() builtin - var (q, x1) = lshift2divmodr(abs(x), Atan1_8_f259()); // reduce mod theta where theta=2*atan(1/8) - var (si, co) = sincosm1_f259(x1 * 2 + xe); - var (a, b, c) = (-1, 0, 1); - repeat (q) { - // (a+b*I) *= (8+I)^2 = 63+16*I - (a, b, c) = (63 * a - 16 * b, 16 * a + 63 * b, 65 * c); - } - // now a/c = cos(q*theta), b/c = sin(q*theta) exactly(!) - // compute (a+b*I)*(1-co+si*I)/c - // (b, a) = (lshift256divr(b, c), lshift256divr(a, c)); - var (b redef, br: int) = lshift256divmodr(b, c); br = muldivr(br, 128, c); - var (a redef, ar: int) = lshift256divmodr(a, c); ar = muldivr(ar, 128, c); - return (sgn(x) * (((mulrshiftr256(b, co) - br) ~/ 16 - mulrshiftr256(a, si)) ~/ 8 - b), - a - ((mulrshiftr256(a, co) - ar) ~/ 16 + mulrshiftr256(b, si)) ~/ 8); -} - -/// compute (sin(x),1-cos(x)) in fixed256 for |x| < 16*atan(1/16) = 0.9987 -/// (fixed256, fixed257) sincosm1_f256(fixed256 x); -/// slightly less accurate than sincosn_f256() (error up to 3/2^256), but faster (~ 4k gas) and shorter -@pure -@inline_ref -fun sincosm1_f256(x: int): (int, int) { - var (si, co) = sincosm1_f259_inlined(x); // compute (sin,1-cos)(x/8) in (fixed259,fixed263) - var r: int = 7; - repeat (r / 2) { - // 1-cos(2*x) = 2*sin(x)^2, sin(2*x) = 2*sin(x)*cos(x) - (co, si) = (mulrshiftr256(si, si), si - (mulrshiftr256(si, co) ~>> r)); - r -= 2; - } - return (si, co); -} - -/// compute (p, q) such that p/q = tan(x) for |x|<2*atan(1/2)=1899/2048=0.927 -/// (int, int) tan_aux(fixed256 x); -@pure -@inline_ref -fun tan_aux_f256(x: int): (int, int) { - var t: int = tan_f258_inlined(x); // t=tan(x/4) as fixed258 - // t:=2*t/(1-t^2)=2*(t-t^3/(t^2-1)) - var tt: int = mulrshiftr256(t, t); // t^2 as fixed260 - t = muldivr(t, tt, tt ~/ 16 + (-1 << 256)) ~/ 16 - t; // now t=-tan(x/2) as fixed259 - return (t, mulrshiftr256(t, t) ~/ 4 + (-1 << 256)); // return (2*t, t^2-1) as fixed256 -} - -/// sincosm1_f256() and sincosn_f256() may be used to implement trigonometric functions for different fixed-point types -/// example: -/// (fixed248, fixed248) sincos(fixed248 x); -@pure -@inline_ref -fun fixed248_sincos(x: int): (int, int) { - var (Pic, Pid) = Pi_xconst_f254(); - // (int q, x) = muldivmodr(x, 128, Pic); // no muldivmodr() builtin - var (q: int, x redef) = lshift7divmodr(x, Pic); // reduce mod Pi/2 - x = 2 * x - muldivr(q, Pid, 1 << 127); - var (si: int, co: int) = sincosm1_f256(x); // doesn't make sense to use more accurate sincosn_f256() - co = (1 << 248) - (co ~>> 9); - si = si ~>> 8; - repeat (q & 3) { - (si, co) = (co, -si); - } - return (si, co); -} - -/// fixed248 sin(fixed248 x); -/// inline is better than inline_ref for such simple functions -@pure -@inline -fun fixed248_sin(x: int): int { - var (si: int, _) = fixed248_sincos(x); - return si; -} - -/// fixed248 cos(fixed248 x); -@pure -@inline -fun fixed248_cos(x: int): int { - var (_, co: int) = fixed248_sincos(x); - return co; -} - -/// similarly, tan_aux_f256() may be used to implement tan() and cot() for specific fixed-point formats -/// fixed248 tan(fixed248 x); -/// not very accurate when |tan(x)| is very large (difficult to do better without floating-point numbers) -/// however, the relative accuracy is approximately 2^-247 in all cases, which is good enough for arguments given up to 2^-249 -@pure -@inline_ref -fun fixed248_tan(x: int): int { - var (Pic, Pid) = Pi_xconst_f254(); - // (int q, x) = muldivmodr(x, 128, Pic); // no muldivmodr() builtin - var (q: int, x redef) = lshift7divmodr(x, Pic); // reduce mod Pi/2 - x = 2 * x - muldivr(q, Pid, 1 << 127); - var (a, b) = tan_aux_f256(x); // now a/b = tan(x') - if (q & 1) { - (a, b) = (b, -a); - } - return muldivr(a, 1 << 248, b); // either -b/a or a/b as fixed248 -} - -/// fixed248 cot(fixed248 x); -@pure -@inline_ref -fun fixed248_cot(x: int): int { - var (Pic, Pid) = Pi_xconst_f254(); - var (q: int, x redef) = lshift7divmodr(x, Pic); // reduce mod Pi/2 - x = 2 * x - muldivr(q, Pid, 1 << 127); - var (b, a) = tan_aux_f256(x); // now b/a = tan(x') - if (q & 1) { - (a, b) = (b, -a); - } - return muldivr(a, 1 << 248, b); // either -b/a or a/b as fixed248 -} - -/*---------------- INVERSE HYPERBOLIC TANGENT AND LOGARITHMS ----------------*/ - -/// inverse hyperbolic tangent of small x, evaluated by means of n terms of the continued fraction -/// valid for |x| < 2^-2.5 ~ 0.18 if n=37 (slightly less accurate with n=36) -/// |x| < 1/8 if n=32; |x| < 2^-3.5 if n=28; |x| < 1/16 if n=25 -/// |x| < 2^-4.5 if n=23; |x| < 1/32 if n=21; |x| < 1/64 if n=18 -/// fixed258 atanh(fixed258 x); -@pure -@inline_ref -fun atanh_f258(x: int, n: int): int { - var x2: int = mulrshiftr256(x, x); // x^2 as fixed260 - var One: int = (1 << 254); - var a: int = One ~/ n + (1 << 255); // a := 2 + 1/n as fixed254 - repeat (n - 1) { - // a := 1 + (1 - x^2 / a)(1 + 1/n) as fixed254 - var t: int = One - muldivr(x2, 1 << 248, a); // t := 1 - x^2 / a - var n1: int = n - 1; - a = muldivr(t, n, n1) + One; - n = n1; - } - // x / (1 - x^2 / a) = x / (1 - d) = x + x * d / (1 - d) for d = x^2 / a - // int d = muldivr(x2, 1 << 255, a - (x2 ~>> 6)); // d/(1-d) = x^2/(a-x^2) as fixed261 - // return x + (mulrshiftr256(x, d) ~>> 5); - return x + muldivr(x, x2 / 2, a - x2 ~/ 64) ~/ 32; -} - -/// number of terms n should be chosen as for atanh_f258() -/// fixed261 atanh(fixed261 x); -@pure -@inline -fun atanh_f261_inlined(x: int, n: int): int { - var x2: int = mulrshiftr256(x, x); // x^2 as fixed266 - var One: int = (1 << 254); - var a: int = One ~/ n + (1 << 255); // a := 2 + 1/n as fixed254 - repeat (n - 1) { - // a := 1 + (1 - x^2 / a)(1 + 1/n) as fixed254 - var t: int = One - muldivr(x2, 1 << 242, a); // t := 1 - x^2 / a - var n1: int = n - 1; - a = muldivr(t, n, n1) + One; - n = n1; - } - // x / (1 - x^2 / a) = x / (1 - d) = x + x * d / (1 - d) for d = x^2 / a - // int d = muldivr(x2, 1 << 255, a - (x2 ~>> 12)); // d/(1-d) = x^2/(a-x^2) as fixed267 - // return x + (mulrshiftr256(x, d) ~>> 11); - return x + muldivr(x, x2, a - x2 ~/ 4096) ~/ 4096; -} - -/// fixed261 atanh(fixed261 x); -@pure -@inline_ref -fun atanh_f261(x: int, n: int): int { - return atanh_f261_inlined(x, n); -} - -/// returns (y, s) such that log(x) = y/2^257 + s*log(2) for positive integer x -/// (fixed257, int) log_aux(int x) -@pure -@inline_ref -fun log_aux_f257(x: int): (int, int) { - var s: int = log2_floor_p1(x); - x <<= 256 - s; - var t: int = touch(-1 << 256); - if ((x >> 249) <= 90) { - // t~touch(); - t >>= 1; - s -= 1; - } - x += t; - var `2x`: int = 2 * x; - var y: int = lshift256divr(`2x`, (x >> 1) - t); - // y = `2x` - (mulrshiftr256(2x, y) ~>> 2); // this line could improve precision on very rare occasions - return (atanh_f258(y, 36), s); -} - -/// computes 33^m for small m -@pure -@inline -fun pow33(m: int): int { - var t: int = 1; - repeat (m) { - t *= 33; - } - return t; -} - -/// computes 33^m for small 0<=m<=22 -/// slightly faster than pow33() -@pure -@inline -fun pow33b(m: int): int { - var (mh: int, ml: int) = divmod(m, 5); - var t: int = 1; - repeat (ml) { - t *= 33; - } - repeat (mh) { - t *= 33 * 33 * 33 * 33 * 33; - } - return t; -} - -/// returns (s, q, y) such that log(x) = s*log(2) + q*log(33/32) + y/2^260 for positive integer x -/// (int, int, fixed260) log_auxx_f260(int x); -@pure -@inline_ref -fun log_auxx_f260(x: int): (int, int, int) { - var s: int = log2_floor_p1(x) - 1; - x <<= 255 - s; // rescale to 1 <= x < 2 as fixed255 - var t: int = touch(2873) << 244; // ~ (33/32)^11 ~ sqrt(2) as fixed255 - var x1: int = (x - t) >> 1; - var q: int = muldivr(x1, 65, x1 + t) + 11; // crude approximation to round(log(x)/log(33/32)) - // t = 1; repeat (q) { t *= 33; } // t:=33^q, 0<=q<=22 - t = pow33b(q); - t <<= (51 - q) * 5; // t:=(33/32)^q as fixed255, nearest power of 33/32 to x - x -= t; - var y: int = lshift256divr(x << 4, (x >> 1) + t); // y = (x-t)/(x+t) as fixed261 - y = atanh_f261(y, 18); // atanh((x-t)/(x+t)) as fixed261, or log(x/t) as fixed260 - return (s, q, y); -} - -/// returns (y, s) such that log(x) = y/2^256 + s*log(2) for positive integer x -/// this function is very precise (error less than 0.6 ulp) and consumes < 7k gas -/// may be used to implement specific fixed-point instances of log() and log2() -/// (fixed256, int) log_aux_f256(int x); -@pure -@inline_ref -fun log_aux_f256(x: int): (int, int) { - var (s, q, y) = log_auxx_f260(x); - var (yh, yl) = rshiftr4mod(y); // y ~/% 16 , but Tolk does not optimize this to RSHIFTR#MOD - // int Log33_32 = 3563114646320977386603103333812068872452913448227778071188132859183498739150; // log(33/32) as fixed256 - // int Log33_32_l = -3769; // log(33/32) = Log33_32 / 2^256 + Log33_32_l / 2^269 - yh += (yl * 512 + q * -3769) ~>> 13; // compensation, may be removed if slightly worse accuracy is acceptable - var Log33_32: int = 3563114646320977386603103333812068872452913448227778071188132859183498739150; // log(33/32) as fixed256 - return (yh + q * Log33_32, s); -} - -/// returns (y, s) such that log2(x) = y/2^256 + s for positive integer x -/// this function is very precise (error less than 0.6 ulp) and consumes < 7k gas -/// may be used to implement specific fixed-point instances of log() and log2() -/// (fixed256, int) log2_aux_f256(int x); -@pure -@inline_ref -fun log2_aux_f256(x: int): (int, int) { - var (s, q, y) = log_auxx_f260(x); - y = lshift256divr(y, log2_const_f256()) ~>> 4; // y/log(2) as fixed256 - var Log33_32: int = 5140487830366106860412008603913034462883915832139695448455767612111363481357; // log_2(33/32) as fixed256 - // Log33_32/2^256 happens to be a very precise approximation to log_2(33/32), no compensation required - return (y + q * Log33_32, s); -} - - -/// fixed248 log(fixed248 x) -@pure -@inline_ref -fun fixed248_log(x: int): int { - var (y, s) = log_aux_f256(x); - return muldivr(s - 248, log2_const_f256(), 1 << 8) + (y ~>> 8); - // return muldivr(s - 248, 80260960185991308862233904206310070533990667611589946606122867505419956976172, 1 << 8) + (y ~>> 8); -} - -/// fixed248 log2(fixed248 x) -@pure -@inline -fun fixed248_log2(x: int): int { - var (y, s) = log2_aux_f256(x); - return ((s - 248) << 248) + (y ~>> 8); -} - -/// computes x^y as exp(y*log(x)), x >= 0 -/// fixed248 pow(fixed248 x, fixed248 y); -@pure -@inline_ref -fun fixed248_pow(x: int, y: int): int { - if (!y) { - return 1 << 248; // x^0 = 1 - } - if (x <= 0) { - var bad: int = (x | y) < 0; - return 0 >> bad; // 0^y = 0 if x=0 and y>=0; "out of range" exception otherwise - } - var (l, s) = log2_aux_f256(x); - s -= 248; // log_2(x) = s+l, l is fixed256, 0<=l<1 - // compute (s+l)*y = q+ll - var (q1, r1) = mulrshiftr248mod(s, y); // muldivmodr(s, y, 1 << 248) - var (q2, r2) = mulrshift256mod(l, y); - r2 >>= 247; - var (q3, r3) = rshiftr248mod(q2); // divmodr(q2, 1 << 248); - var (q, ll) = rshiftr248mod(r1 + r3); - ll = 512 * ll + r2; - q += q1 + q3; - // now log_2(x^y) = y*log_2(x) = q + ll, ss integer, ll fixed257, -1/2<=ll<1/2 - var sq: int = q + 248; - if (sq <= 0) { - return -(sq == 0); // underflow - } - y = expm1_f257(mulrshiftr256(ll, log2_const_f256())); - return (y ~>> (9 - q)) - (-1 << sq); -} - -/*-------------------- INVERSE TRIGONOMETRIC FUNCTIONS ------------------*/ - -/// number of terms n should be chosen as for atanh_f258() -/// fixed259 atan(fixed259 x); -@pure -@inline_ref -fun atan_f259(x: int, n: int): int { - var x2: int = mulrshiftr256(x, x); // x^2 as fixed262 - var One: int = (1 << 254); - var a: int = One ~/ n + (1 << 255); // a := 2 + 1/n as fixed254 - repeat (n - 1) { - // a := 1 + (1 + x^2 / a)(1 + 1/n) as fixed254 - var t: int = One + muldivr(x2, 1 << 246, a); // t := 1 + x^2 / a - var n1: int = n - 1; - a = muldivr(t, n, n1) + One; - n = n1; - } - // x / (1 + x^2 / a) = x / (1 + d) = x - x * d / (1 + d) = x - x * x^2/(a+x^2) for d = x^2 / a - return x - muldivr(x, x2, a + x2 ~/ 256) ~/ 256; -} - -/// number of terms n should be chosen as for atanh_f261() -/// fixed261 atan(fixed261 x); -@pure -@inline -fun atan_f261_inlined(x: int, n: int): int { - var x2: int = mulrshiftr256(x, x); // x^2 as fixed266 - var One: int = (1 << 254); - var a: int = One ~/ n + (1 << 255); // a := 2 + 1/n as fixed254 - repeat (n - 1) { - // a := 1 + (1 + x^2 / a)(1 + 1/n) as fixed254 - var t: int = One + muldivr(x2, 1 << 242, a); // t := 1 + x^2 / a - var n1: int = n - 1; - a = muldivr(t, n, n1) + One; - n = n1; - } - // x / (1 + x^2 / a) = x / (1 + d) = x - x * d / (1 + d) = x - x * x^2/(a+x^2) for d = x^2 / a - return x - muldivr(x, x2, a + x2 ~/ 4096) ~/ 4096; -} - -/// fixed261 atan(fixed261 x); -@pure -@inline_ref -fun atan_f261(x: int, n: int): int { - return atan_f261_inlined(x, n); -} - -/// computes (q,a,b) such that q is approximately atan(x)/atan(1/32) and a+b*I=(1+I/32)^q as fixed255 -/// then b/a=atan(q*atan(1/32)) exactly, and (a,b) is almost a unit vector pointing in the direction of (1,x) -/// must have |x|<1.1, x is fixed24 -/// (int, fixed255, fixed255) atan_aux_prereduce(fixed24 x); -@pure -@inline_ref -fun atan_aux_prereduce(x: int): (int, int, int) { - var xu: int = abs(x); - var tc: int = 7214596; // tan(13*theta) as fixed24 where theta=atan(1/32) - var t1: int = muldivr(xu - tc, 1 << 88, xu * tc + (1 << 48)); // tan(x') as fixed64 where x'=atan(x)-13*theta - // t1/(3+t1^2) * 3073/32 = x'/3 * 3072/32 = x' / (96/3072) = x' / theta - var q: int = muldivr(t1 * 3073, 1 << 59, t1 * t1 + (touch(3) << 128)) + 13; // approximately round(atan(x)/theta), 0<=q<=25 - var (pa, pb) = (33226912, 5232641); // (32+I)^5 - var (qh, ql) = divmod(q, 5); - var (a, b) = (1 << (5 * (51 - q)), 0); // (1/32^q, 0) as fixed255 - repeat (ql) { - // a+b*I *= 32+I - (a, b) = (sub_rev(touch(b), 32 * a), a + 32 * b); // same as (32 * a - b, 32 * b + a), but more efficient - } - repeat (qh) { - // a+b*I *= (32+I)^5 = pa + pb*I - (a, b) = (a * pa - b * pb, a * pb + b * pa); - } - var xs: int = sgn(x); - return (xs * q, a, xs * b); -} - -/// compute (q, z) such that atan(x)=q*atan(1/32)+z for -1 <= x < 1 -/// this function is reasonably accurate (error < 7 ulp with ulp = 2^-261), but it consumes >7k gas -/// this is sufficient for most purposes -/// (int, fixed261) atan_aux(fixed256 x) -@pure -@inline_ref -fun atan_aux_f256(x: int): (int, int) { - var (q, a, b) = atan_aux_prereduce(x ~>> 232); // convert x to fixed24 - // now b/a = tan(q*atan(1/32)) exactly, where q is near atan(x)/atan(1/32); so b/a is near x - // compute y = u/v = (a*x-b)/(a+b*x) as fixed261 ; then |y|<0.0167 = 1.07/64 and atan(x)=atan(y)+q*atan(1/32) - var (u, ul) = mulrshiftr256mod(a, x); - u = (ul ~>> 250) + ((u - b) << 6); // |u| < 1/32, convert fixed255 -> fixed261 - var v: int = a + mulrshiftr256(b, x); // v is scalar product of (a,b) and (1,x), it is approximately in [1..sqrt(2)] as fixed255 - var y: int = muldivr(u, 1 << 255, v); // y = u/v as fixed261 - var z: int = atan_f261_inlined(y, 18); // z = atan(x)-q*atan(1/32) - return (q, z); -} - -/// compute (q, z) such that atan(x)=q*atan(1/32)+z for -1 <= x < 1 -/// this function is very accurate (error < 2 ulp), but it consumes >7k gas -/// in most cases, faster function atan_aux_f256() should be used -/// (int, fixed261) atan_auxx(fixed256 x) -@pure -@inline_ref -fun atan_auxx_f256(x: int): (int, int) { - var (q, a, b) = atan_aux_prereduce(x ~>> 232); // convert x to fixed24 - // now b/a = tan(q*atan(1/32)) exactly, where q is near atan(x)/atan(1/32); so b/a is near x - // compute y = (a*x-b)/(a+b*x) as fixed261 ; then |y|<0.0167 = 1.07/64 and atan(x)=atan(y)+q*atan(1/32) - // use sort of double precision arithmetic for this - var (u, ul) = mulrshiftr256mod(a, x); - ul /= 2; - u -= b; // |u| < 1/32 as fixed255 - var (v, vl) = mulrshiftr256mod(b, x); - vl /= 2; - v += a; // v is scalar product of (a,b) and (1,x), it is approximately in [1..sqrt(2)] as fixed255 - // y = (u + ul*eps) / (v + vl*eps) = u/v + (ul - vl * u/v)/v * eps where eps=1/2^255 - var (y, r) = lshift255divmodr(u, v); // y = u/v as fixed255 - var yl: int = muldivr(ul + r, 1 << 255, v) - muldivr(vl, y, v); // y/2^255 + yl/2^510 represent u/v - y = (yl ~>> 249) + (y << 6); // convert y to fixed261 - var z: int = atan_f261_inlined(y, 18); // z = atan(x)-q*atan(1/32) - return (q, z); -} - -/// consumes ~ 8k gas -/// fixed255 atan(fixed255 x); -@pure -@inline_ref -fun atan_f255(x: int): int { - var s: int = (x ~>> 256); - touch(x); - if (s) { - x = lshift256divr(-1 << 255, x); // x:=-1/x as fixed256 - } else { - x *= 2; // convert to fixed256 - } - var (q, z) = atan_aux_f256(x); - // now atan(x) = z + q*atan(1/32) + s*(Pi/2), z is fixed261 - var (Pi_h, Pi_l) = Pi_xconst_f254(); // Pi/2 as fixed255 + fixed383 - var (qh, ql) = mulrshiftr6mod(q, Atan1_32_f261()); - return qh + s * Pi_h + (z + ql + muldivr(s, Pi_l, 1 << 122)) ~/ 64; -} - -/// computes atan(x) for -1 <= x < 1 only -/// fixed256 atan_small(fixed256 x); -@pure -@inline_ref -fun atan_f256_small(x: int): int { - var (q, z) = atan_aux_f256(x); - // now atan(x) = z + q*atan(1/32), z is fixed261 - var (qh, ql) = mulrshiftr5mod(q, Atan1_32_f261()); - return qh + (z + ql) ~/ 32; -} - -/// fixed255 asin(fixed255 x); -@pure -@inline_ref -fun asin_f255(x: int): int { - var a: int = fixed255_One - fixed255_sqr(x); // a:=1-x^2 - if (!a) { - return sgn(x) * Pi_const_f254(); // Pi/2 or -Pi/2 - } - var y: int = fixed255_sqrt(a); // sqrt(1-x^2) - var t: int = -lshift256divr(x, (-1 << 255) - y); // t = x/(1+sqrt(1-x^2)) avoiding overflow - return atan_f256_small(t); // asin(x)=2*atan(t) -} - -/// fixed254 acos(fixed255 x); -@pure -@inline_ref -fun acos_f255(x: int): int { - var Pi: int = Pi_const_f254(); - if (x == (-1 << 255)) { - return Pi; // acos(-1) = Pi - } - Pi /= 2; - var y: int = fixed255_sqrt(fixed255_One - fixed255_sqr(x)); // sqrt(1-x^2) - var t: int = lshift256divr(x, (-1 << 255) - y); // t = -x/(1+sqrt(1-x^2)) avoiding overflow - return Pi + atan_f256_small(t) ~/ 2; // acos(x)=Pi/2 + 2*atan(t) -} - -/// consumes ~ 10k gas -/// fixed248 asin(fixed248 x) -@pure -@inline -fun fixed248_asin(x: int): int { - return asin_f255(x << 7) ~>> 7; -} - -/// consumes ~ 10k gas -/// fixed248 acos(fixed248 x) -@pure -@inline -fun fixed248_acos(x: int): int { - return acos_f255(x << 7) ~>> 6; -} - -/// consumes ~ 7500 gas -/// fixed248 atan(fixed248 x); -@pure -@inline_ref -fun fixed248_atan(x: int): int { - var s: int = (x ~>> 249); - touch(x); - if (s) { - s = sgn(s); - x = lshift256divr(-1 << 248, x); // x:=-1/x as fixed256 - } else { - x <<= 8; // convert to fixed256 - } - var (q, z) = atan_aux_f256(x); - // now atan(x) = z + q*atan(1/32) + s*(Pi/2), z is fixed261 - return (z ~/ 64 + s * Pi_const_f254() + muldivr(q, Atan1_32_f261(), 64)) ~/ 128; // compute in fixed255, then convert -} - -/// fixed248 acot(fixed248 x); -@pure -@inline_ref -fun fixed248_acot(x: int): int { - var s: int = (x ~>> 249); - touch(x); - if (s) { - x = lshift256divr(-1 << 248, x); // x:=-1/x as fixed256 - s = 0; - } else { - x <<= 8; // convert to fixed256 - s = sgn(x); - } - var (q, z) = atan_aux_f256(x); - // now acot(x) = - z - q*atan(1/32) + s*(Pi/2), z is fixed261 - return (s * Pi_const_f254() - z ~/ 64 - muldivr(q, Atan1_32_f261(), 64)) ~/ 128; // compute in fixed255, then convert -} - -/*-------------------- PSEUDO-RANDOM NUMBERS ------------------*/ - -/// random number with standard normal distribution N(0,1) -/// generated by Kinderman--Monahan ratio method modified by J.Leva -/// spends ~ 2k..3k gas on average -/// fixed252 nrand(); -@inline_ref -fun nrand_f252(): int { - var (x, s, t, A, B, r0) = (nan(), touch(29483) << 236, touch(-3167) << 239, 12845, 16693, 9043); - // 4/sqrt(e*Pi) = 1.369 loop iterations on average - do { - var (u, v) = (random() / 16 + 1, muldivr(random() - (1 << 255), 7027, 1 << 16)); // fixed252; 7027=ceil(sqrt(8/e)*2^12) - var va: int = abs(v); - var (u1, v1) = (u - s, va - t); // (u - 29483/2^16, abs(v) + 3167/2^13) as fixed252 - // Q := u1^2 + v1 * (A*v1 - B*u1) as fixed252 where A=12845/2^16, B=16693/2^16 - var Q: int = muldivr(u1, u1, 1 << 252) + muldivr(v1, muldivr(v1, A, 1 << 16) - muldivr(u1, B, 1 << 16), 1 << 252); - // must have 9043 / 2^15 < Q < 9125 / 2^15, otherwise accept if smaller, reject if larger - var Qd: int = (Q >> 237) - r0; - if ((Qd < 9125 - 9043) & (va / u < 16)) { - x = muldivr(v, 1 << 252, u); // x:=v/u as fixed252; reject immediately if |v/u| >= 16 - if (Qd >= 0) { - // immediately accept if Qd < 0 - // rarely taken branch - 0.012 times per call on average - // check condition v^2 < -4*u^2*log(u), or equivalent condition u < exp(-x^2/4) for x=v/u - var xx: int = mulrshiftr256(x, x) ~/ 4; // x^2/4 as fixed248 - var ex: int = fixed248_exp(-xx) * 16; // exp(-x^2/4) as fixed252 - if (u > ex) { - x = nan(); // condition false, reject - } - } - } - } while (!(~ is_nan(x))); - return x; -} - -/// generates a random number approximately distributed according to the standard normal distribution -/// much faster than nrand_f252(), should be suitable for most purposes when only several random numbers are needed -/// fixed252 nrand_fast(); -@inline_ref -fun nrand_fast_f252(): int { - var t: int = touch(-3) << 253; // -6. as fixed252 - repeat (12) { - t += random() / 16; // add together 12 uniformly random numbers - } - return t; -} - -/// random number uniformly distributed in [0..1) -/// fixed248 random(); -@inline -fun fixed248_random(): int { - return random() >> 8; -} - -/// random number with standard normal distribution -/// fixed248 nrand(); -@inline -fun fixed248_nrand(): int { - return nrand_f252() ~>> 4; -} - -/// generates a random number approximately distributed according to the standard normal distribution -/// fixed248 nrand_fast(); -@inline -fun fixed248_nrand_fast(): int { - return nrand_fast_f252() ~>> 4; -} diff --git a/crypto/smartcont/stdlib.tolk b/crypto/smartcont/stdlib.tolk deleted file mode 100644 index 10c3b36ad..000000000 --- a/crypto/smartcont/stdlib.tolk +++ /dev/null @@ -1,1108 +0,0 @@ -// Standard library for Tolk -// (initially copied from stdlib.fc) -// -tolk 0.6 - -/* - This file is part of TON Tolk Standard Library. - - Tolk Standard Library is free software: you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation, either version 2 of the License, or - (at your option) any later version. - - Tolk Standard Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Lesser General Public License for more details. -*/ - -/* - # Tuple manipulation primitives - The names and the types are mostly self-explaining. - - Note that currently values of atomic type `tuple` can't be cast to composite tuple type (e.g. `[int, cell]`) - and vise versa. -*/ - -/*** - # Lisp-style lists - - Lists can be represented as nested 2-elements tuples. - Empty list is conventionally represented as TVM `null` value. - For example, tuple `(1, (2, (3, null)))` represents list `[1, 2, 3]`. Elements of a list can be of different types. -*/ - -/// Adds an element to the beginning of lisp-style list. -@pure -fun cons(head: X, tail: tuple): tuple - asm "CONS"; - -/// Extracts the head and the tail of lisp-style list. -@pure -fun uncons(list: tuple): (X, tuple) - asm "UNCONS"; - -/// Extracts the tail and the head of lisp-style list. -@pure -fun list_next(list: tuple): (tuple, X) - asm( -> 1 0) "UNCONS"; - -/// Returns the head of lisp-style list. -@pure -fun car(list: tuple): X - asm "CAR"; - -/// Returns the tail of lisp-style list. -@pure -fun cdr(list: tuple): tuple - asm "CDR"; - -/// Creates tuple with zero elements. -@pure -fun empty_tuple(): tuple - asm "NIL"; - -/// Appends a value `x` to a `Tuple t = (x1, ..., xn)`, but only if the resulting `Tuple t' = (x1, ..., xn, x)` -/// is of length at most 255. Otherwise throws a type check exception. -@pure -fun tpush(t: tuple, value: X): tuple - asm "TPUSH"; - -@pure -fun ~tpush(t: tuple, value: X): (tuple, ()) - asm "TPUSH"; - -/// Creates a tuple of length one with given argument as element. -@pure -fun single(x: X): [X] - asm "SINGLE"; - -/// Unpacks a tuple of length one -@pure -fun unsingle(t: [X]): X - asm "UNSINGLE"; - -/// Creates a tuple of length two with given arguments as elements. -@pure -fun pair(x: X, y: Y): [X, Y] - asm "PAIR"; - -/// Unpacks a tuple of length two -@pure -fun unpair(t: [X, Y]): (X, Y) - asm "UNPAIR"; - -/// Creates a tuple of length three with given arguments as elements. -@pure -fun triple(x: X, y: Y, z: Z): [X, Y, Z] - asm "TRIPLE"; - -/// Unpacks a tuple of length three -@pure -fun untriple(t: [X, Y, Z]): (X, Y, Z) - asm "UNTRIPLE"; - -/// Creates a tuple of length four with given arguments as elements. -@pure -fun tuple4(x: X, y: Y, z: Z, w: W): [X, Y, Z, W] - asm "4 TUPLE"; - -/// Unpacks a tuple of length four -@pure -fun untuple4(t: [X, Y, Z, W]): (X, Y, Z, W) - asm "4 UNTUPLE"; - -/// Returns the first element of a tuple (with unknown element types). -@pure -fun first(t: tuple): X - asm "FIRST"; - -/// Returns the second element of a tuple (with unknown element types). -@pure -fun second(t: tuple): X - asm "SECOND"; - -/// Returns the third element of a tuple (with unknown element types). -@pure -fun third(t: tuple): X - asm "THIRD"; - -/// Returns the fourth element of a tuple (with unknown element types). -@pure -fun fourth(t: tuple): X - asm "3 INDEX"; - -/// Returns the [`index`]-th element of tuple [`t`]. -@pure -fun at(t: tuple, index: int): X - builtin; - -/// Returns the first element of a pair tuple. -@pure -fun pair_first(p: [X, Y]): X - asm "FIRST"; - -/// Returns the second element of a pair tuple. -@pure -fun pair_second(p: [X, Y]): Y - asm "SECOND"; - -/// Returns the first element of a triple tuple. -@pure -fun triple_first(p: [X, Y, Z]): X - asm "FIRST"; - -/// Returns the second element of a triple tuple. -@pure -fun triple_second(p: [X, Y, Z]): Y - asm "SECOND"; - -/// Returns the third element of a triple tuple. -@pure -fun triple_third(p: [X, Y, Z]): Z - asm "THIRD"; - - -/// Moves a variable [x] to the top of the stack. -@pure -fun touch(x: X): X - builtin; - -/// Moves a variable [x] to the top of the stack. -@pure -fun ~touch(x: X): (X, ()) - builtin; - -/// Mark a variable as used, such that the code which produced it won't be deleted even if it is not impure. -fun ~impure_touch(x: X): (X, ()) - asm "NOP"; - - - -/// Returns the current Unix time as an Integer -@pure -fun now(): int - asm "NOW"; - -/// Returns the internal address of the current smart contract as a Slice with a `MsgAddressInt`. -/// If necessary, it can be parsed further using primitives such as [parse_std_addr]. -@pure -fun my_address(): slice - asm "MYADDR"; - -/// Returns the balance of the smart contract as a tuple consisting of an int -/// (balance in nanotoncoins) and a `cell` -/// (a dictionary with 32-bit keys representing the balance of "extra currencies") -/// at the start of Computation Phase. -/// Note that RAW primitives such as [send_raw_message] do not update this field. -@pure -fun get_balance(): [int, cell] - asm "BALANCE"; - -/// Returns the logical time of the current transaction. -@pure -fun cur_lt(): int - asm "LTIME"; - -/// Returns the starting logical time of the current block. -@pure -fun block_lt(): int - asm "BLOCKLT"; - -/// Computes the representation hash of a `cell` [c] and returns it as a 256-bit unsigned integer `x`. -/// Useful for signing and checking signatures of arbitrary entities represented by a tree of cells. -@pure -fun cell_hash(c: cell): int - asm "HASHCU"; - -/// Computes the hash of a `slice s` and returns it as a 256-bit unsigned integer `x`. -/// The result is the same as if an ordinary cell containing only data and references from `s` had been created -/// and its hash computed by [cell_hash]. -@pure -fun slice_hash(s: slice): int - asm "HASHSU"; - -/// Computes sha256 of the data bits of `slice` [s]. If the bit length of `s` is not divisible by eight, -/// throws a cell underflow exception. The hash value is returned as a 256-bit unsigned integer `x`. -@pure -fun string_hash(s: slice): int - asm "SHA256U"; - -/*** - # Signature checks -*/ - -/// Checks the Ed25519-`signature` of a `hash` (a 256-bit unsigned integer, usually computed as the hash of some data) -/// using [public_key] (also represented by a 256-bit unsigned integer). -/// The signature must contain at least 512 data bits; only the first 512 bits are used. -/// The result is `−1` if the signature is valid, `0` otherwise. -/// Note that `CHKSIGNU` creates a 256-bit slice with the hash and calls `CHKSIGNS`. -/// That is, if [hash] is computed as the hash of some data, these data are hashed twice, -/// the second hashing occurring inside `CHKSIGNS`. -@pure -fun check_signature(hash: int, signature: slice, public_key: int): int - asm "CHKSIGNU"; - -/// Checks whether [signature] is a valid Ed25519-signature of the data portion of `slice data` using `public_key`, -/// similarly to [check_signature]. -/// If the bit length of [data] is not divisible by eight, throws a cell underflow exception. -/// The verification of Ed25519 signatures is the standard one, -/// with sha256 used to reduce [data] to the 256-bit number that is actually signed. -@pure -fun check_data_signature(data: slice, signature: slice, public_key: int): int - asm "CHKSIGNS"; - -/*** - # Computation of boc size - The primitives below may be useful for computing storage fees of user-provided data. -*/ - -/// A non-quiet version of [compute_data_size?] that throws a cell overflow exception (`8`) on failure. -fun compute_data_size(c: cell, max_cells: int): (int, int, int) - asm "CDATASIZE"; - -/// A non-quiet version of [slice_compute_data_size?] that throws a cell overflow exception (`8`) on failure. -fun slice_compute_data_size(s: slice, max_cells: int): (int, int, int) - asm "SDATASIZE"; - -/// Returns `(x, y, z, -1)` or `(null, null, null, 0)`. -/// Recursively computes the count of distinct cells `x`, data bits `y`, and cell references `z` -/// in the DAG rooted at `cell` [c], effectively returning the total storage used by this DAG taking into account -/// the identification of equal cells. -/// The values of `x`, `y`, and `z` are computed by a depth-first traversal of this DAG, -/// with a hash table of visited cell hashes used to prevent visits of already-visited cells. -/// The total count of visited cells `x` cannot exceed non-negative [max_cells]; -/// otherwise the computation is aborted before visiting the `(max_cells + 1)`-st cell and -/// a zero flag is returned to indicate failure. If [c] is `null`, returns `x = y = z = 0`. -@pure -fun compute_data_size?(c: cell, max_cells: int): (int, int, int, int) - asm "CDATASIZEQ NULLSWAPIFNOT2 NULLSWAPIFNOT"; - -/// Similar to [compute_data_size?], but accepting a `slice` [s] instead of a `cell`. -/// The returned value of `x` does not take into account the cell that contains the `slice` [s] itself; -/// however, the data bits and the cell references of [s] are accounted for in `y` and `z`. -@pure -fun slice_compute_data_size?(s: slice, max_cells: int): (int, int, int, int) - asm "SDATASIZEQ NULLSWAPIFNOT2 NULLSWAPIFNOT"; - -/*** - # Debug primitives - Only works for local TVM execution with debug level verbosity -*/ - -/// Dump a variable [x] to the debug log. -fun ~dump(x: X): (X, ()) - builtin; - -/// Dump a string [x] to the debug log. -fun ~strdump(x: X): (X, ()) - builtin; - -/// Dumps the stack (at most the top 255 values) and shows the total stack depth. -fun dump_stack(): void - asm "DUMPSTK"; - -/*** - # Persistent storage save and load -*/ - -/// Returns the persistent contract storage cell. It can be parsed or modified with slice and builder primitives later. -@pure -fun get_data(): cell - asm "c4 PUSH"; - -/// Sets `cell` [c] as persistent contract data. You can update persistent contract storage with this primitive. -fun set_data(c: cell): void - asm "c4 POP"; - -/*** - # Continuation primitives -*/ -/// Usually `c3` has a continuation initialized by the whole code of the contract. It is used for function calls. -/// The primitive returns the current value of `c3`. -@pure -fun get_c3(): continuation - asm "c3 PUSH"; - -/// Updates the current value of `c3`. Usually, it is used for updating smart contract code in run-time. -/// Note that after execution of this primitive the current code -/// (and the stack of recursive function calls) won't change, -/// but any other function call will use a function from the new code. -fun set_c3(c: continuation): void - asm "c3 POP"; - -/// Transforms a `slice` [s] into a simple ordinary continuation `c`, with `c.code = s` and an empty stack and savelist. -@pure -fun bless(s: slice): continuation - asm "BLESS"; - -/*** - # Gas related primitives -*/ - -/// Sets current gas limit `gl` to its maximal allowed value `gm`, and resets the gas credit `gc` to zero, -/// decreasing the value of `gr` by `gc` in the process. -/// In other words, the current smart contract agrees to buy some gas to finish the current transaction. -/// This action is required to process external messages, which bring no value (hence no gas) with themselves. -/// -/// For more details check [accept_message effects](https://ton.org/docs/#/smart-contracts/accept). -fun accept_message(): void - asm "ACCEPT"; - -/// Sets current gas limit `gl` to the minimum of limit and `gm`, and resets the gas credit `gc` to zero. -/// If the gas consumed so far (including the present instruction) exceeds the resulting value of `gl`, -/// an (unhandled) out of gas exception is thrown before setting new gas limits. -/// Notice that [set_gas_limit] with an argument `limit ≥ 2^63 − 1` is equivalent to [accept_message]. -fun set_gas_limit(limit: int): void - asm "SETGASLIMIT"; - -/// Commits the current state of registers `c4` (“persistent data”) and `c5` (“actions”) -/// so that the current execution is considered “successful” with the saved values even if an exception -/// in Computation Phase is thrown later. -fun commit(): void - asm "COMMIT"; - -/// Computes the amount of gas that can be bought for `amount` nanoTONs, -/// and sets `gl` accordingly in the same way as [set_gas_limit]. -fun buy_gas(amount: int): void - asm "BUYGAS"; - -/// Computes the minimum of two integers [x] and [y]. -@pure -fun min(x: int, y: int): int - asm "MIN"; - -/// Computes the maximum of two integers [x] and [y]. -@pure -fun max(x: int, y: int): int - asm "MAX"; - -/// Sorts two integers. -@pure -fun minmax(x: int, y: int): (int, int) - asm "MINMAX"; - -/// Computes the absolute value of an integer [x]. -@pure -fun abs(x: int): int - asm "ABS"; - -/// Computes the quotient and remainder of [x] / [y]. Example: divmod(112,3) = (37,1) -@pure -fun divmod(x: int, y: int): (int, int) - builtin; - -/// Computes the remainder and quotient of [x] / [y]. Example: moddiv(112,3) = (1,37) -@pure -fun moddiv(x: int, y: int): (int, int) - builtin; - -/// Computes multiple-then-divide: floor([x] * [y] / [z]). -/// The intermediate result is stored in a 513-bit integer to prevent precision loss. -@pure -fun muldiv(x: int, y: int, z: int): int - builtin; - -/// Similar to `muldiv`, but rounds the result: round([x] * [y] / [z]). -@pure -fun muldivr(x: int, y: int, z: int): int - builtin; - -/// Similar to `muldiv`, but ceils the result: ceil([x] * [y] / [z]). -@pure -fun muldivc(x: int, y: int, z: int): int - builtin; - -/// Computes the quotient and remainder of ([x] * [y] / [z]). Example: muldivmod(112,3,10) = (33,6) -@pure -fun muldivmod(x: int, y: int, z: int): (int, int) - builtin; - -/*** - # Slice primitives - - It is said that a primitive _loads_ some data, - if it returns the data and the remainder of the slice - (so it can also be used as modifying method). - - It is said that a primitive _preloads_ some data, if it returns only the data - (it can be used as non-modifying method). - - Unless otherwise stated, loading and preloading primitives read the data from a prefix of the slice. -*/ - - -/// Converts a `cell` [c] into a `slice`. Notice that [c] must be either an ordinary cell, -/// or an exotic cell (see [TVM.pdf](https://ton-blockchain.github.io/docs/tvm.pdf), 3.1.2) -/// which is automatically loaded to yield an ordinary cell `c'`, converted into a `slice` afterwards. -@pure -fun begin_parse(c: cell): slice - asm "CTOS"; - -/// Checks if [s] is empty. If not, throws an exception. -fun end_parse(s: slice): void - asm "ENDS"; - -/// Loads the first reference from the slice. -@pure -fun load_ref(s: slice): (slice, cell) - asm( -> 1 0) "LDREF"; - -/// Preloads the first reference from the slice. -@pure -fun preload_ref(s: slice): cell - asm "PLDREF"; - -/// Loads a signed [len]-bit integer from a slice [s]. -@pure -fun load_int(s: slice, len: int): (slice, int) - builtin; - -/// Loads an unsigned [len]-bit integer from a slice [s]. -@pure -fun load_uint(s: slice, len: int): (slice, int) - builtin; - -/// Preloads a signed [len]-bit integer from a slice [s]. -@pure -fun preload_int(s: slice, len: int): int - builtin; - -/// Preloads an unsigned [len]-bit integer from a slice [s]. -@pure -fun preload_uint(s: slice, len: int): int - builtin; - -/// Loads the first `0 ≤ len ≤ 1023` bits from slice [s] into a separate slice `s''`. -@pure -fun load_bits(s: slice, len: int): (slice, slice) - builtin; - -/// Preloads the first `0 ≤ len ≤ 1023` bits from slice [s] into a separate slice `s''`. -@pure -fun preload_bits(s: slice, len: int): slice - builtin; - -/// Loads serialized amount of TonCoins (any unsigned integer up to `2^120 - 1`). -@pure -fun load_grams(s: slice): (slice, int) - asm( -> 1 0) "LDGRAMS"; - -@pure -fun load_coins(s: slice): (slice, int) - asm( -> 1 0) "LDGRAMS"; - -/// Returns all but the first `0 ≤ len ≤ 1023` bits of `slice` [s]. -@pure -fun skip_bits(s: slice, len: int): slice - asm "SDSKIPFIRST"; - -@pure -fun ~skip_bits(s: slice, len: int): (slice, ()) - asm "SDSKIPFIRST"; - -/// Returns the first `0 ≤ len ≤ 1023` bits of `slice` [s]. -@pure -fun first_bits(s: slice, len: int): slice - asm "SDCUTFIRST"; - -/// Returns all but the last `0 ≤ len ≤ 1023` bits of `slice` [s]. -@pure -fun skip_last_bits(s: slice, len: int): slice - asm "SDSKIPLAST"; -@pure -fun ~skip_last_bits(s: slice, len: int): (slice, ()) - asm "SDSKIPLAST"; - -/// Returns the last `0 ≤ len ≤ 1023` bits of `slice` [s]. -@pure -fun slice_last(s: slice, len: int): slice - asm "SDCUTLAST"; - -/// Loads a dictionary `D` (HashMapE) from `slice` [s]. -/// (returns `null` if `nothing` constructor is used). -@pure -fun load_dict(s: slice): (slice, cell) - asm( -> 1 0) "LDDICT"; - -/// Preloads a dictionary `D` from `slice` [s]. -@pure -fun preload_dict(s: slice): cell - asm "PLDDICT"; - -/// Loads a dictionary as [load_dict], but returns only the remainder of the slice. -@pure -fun skip_dict(s: slice): slice - asm "SKIPDICT"; - -@pure -fun ~skip_dict(s: slice): (slice, ()) - asm "SKIPDICT"; - -/// Loads (Maybe ^Cell) from `slice` [s]. -/// In other words loads 1 bit and if it is true -/// loads first ref and return it with slice remainder -/// otherwise returns `null` and slice remainder -@pure -fun load_maybe_ref(s: slice): (slice, cell) - asm( -> 1 0) "LDOPTREF"; - -/// Preloads (Maybe ^Cell) from `slice` [s]. -@pure -fun preload_maybe_ref(s: slice): cell - asm "PLDOPTREF"; - - -/// Returns the depth of `cell` [c]. -/// If [c] has no references, then return `0`; -/// otherwise the returned value is one plus the maximum of depths of cells referred to from [c]. -/// If [c] is a `null` instead of a cell, returns zero. -@pure -fun cell_depth(c: cell): int - asm "CDEPTH"; - - -/*** - # Slice size primitives -*/ - -/// Returns the number of references in `slice` [s]. -@pure -fun slice_refs(s: slice): int - asm "SREFS"; - -/// Returns the number of data bits in `slice` [s]. -@pure -fun slice_bits(s: slice): int - asm "SBITS"; - -/// Returns both the number of data bits and the number of references in `slice` [s]. -@pure -fun slice_bits_refs(s: slice): (int, int) - asm "SBITREFS"; - -/// Checks whether a `slice` [s] is empty (i.e., contains no bits of data and no cell references). -@pure -fun slice_empty?(s: slice): int - asm "SEMPTY"; - -/// Checks whether `slice` [s] has no bits of data. -@pure -fun slice_data_empty?(s: slice): int - asm "SDEMPTY"; - -/// Checks whether `slice` [s] has no references. -@pure -fun slice_refs_empty?(s: slice): int - asm "SREMPTY"; - -/// Returns the depth of `slice` [s]. -/// If [s] has no references, then returns `0`; -/// otherwise the returned value is one plus the maximum of depths of cells referred to from [s]. -@pure -fun slice_depth(s: slice): int - asm "SDEPTH"; - -/*** - # Builder size primitives -*/ - -/// Returns the number of cell references already stored in `builder` [b] -@pure -fun builder_refs(b: builder): int - asm "BREFS"; - -/// Returns the number of data bits already stored in `builder` [b]. -@pure -fun builder_bits(b: builder): int - asm "BBITS"; - -/// Returns the depth of `builder` [b]. -/// If no cell references are stored in [b], then returns 0; -/// otherwise the returned value is one plus the maximum of depths of cells referred to from [b]. -@pure -fun builder_depth(b: builder): int - asm "BDEPTH"; - -/*** - # Builder primitives - It is said that a primitive _stores_ a value `x` into a builder `b` - if it returns a modified version of the builder `b'` with the value `x` stored at the end of it. - It can be used as non-modifying method. - - All the primitives below first check whether there is enough space in the `builder`, - and only then check the range of the value being serialized. -*/ - -/// Creates a new empty `builder`. -@pure -fun begin_cell(): builder - asm "NEWC"; - -/// Converts a `builder` into an ordinary `cell`. -@pure -fun end_cell(b: builder): cell - asm "ENDC"; - -/// Stores a reference to `cell` [c] into `builder` [b]. -@pure -fun store_ref(b: builder, c: cell): builder - asm(c b) "STREF"; - -/// Stores an unsigned [len]-bit integer `x` into `b` for `0 ≤ len ≤ 256`. -@pure -fun store_uint(b: builder, x: int, len: int): builder - builtin; - -/// Stores a signed [len]-bit integer `x` into `b` for `0 ≤ len ≤ 257`. -@pure -fun store_int(b: builder, x: int, len: int): builder - builtin; - -/// Stores `slice` [s] into `builder` [b]. -@pure -fun store_slice(b: builder, s: slice): builder - asm "STSLICER"; - -/// Stores (serializes) an integer [x] in the range `0..2^120 − 1` into `builder` [b]. -/// The serialization of [x] consists of a 4-bit unsigned big-endian integer `l`, -/// which is the smallest integer `l ≥ 0`, such that `x < 2^8l`, -/// followed by an `8l`-bit unsigned big-endian representation of [x]. -/// If [x] does not belong to the supported range, a range check exception is thrown. -/// -/// Store amounts of TonCoins to the builder as VarUInteger 16 -@pure -fun store_grams(b: builder, x: int): builder - asm "STGRAMS"; - -@pure -fun store_coins(b: builder, x: int): builder - asm "STGRAMS"; - -/// Stores dictionary `D` represented by `cell` [c] or `null` into `builder` [b]. -/// In other words, stores a `1`-bit and a reference to [c] if [c] is not `null` and `0`-bit otherwise. -@pure -fun store_dict(b: builder, c: cell): builder - asm(c b) "STDICT"; - -/// Stores (Maybe ^Cell) to builder: -/// if cell is null store 1 zero bit -/// otherwise store 1 true bit and ref to cell -@pure -fun store_maybe_ref(b: builder, c: cell): builder - asm(c b) "STOPTREF"; - - -/*** - # Address manipulation primitives - The address manipulation primitives listed below serialize and deserialize values according to the following TL-B scheme: - ```TL-B - addr_none$00 = MsgAddressExt; - addr_extern$01 len:(## 8) external_address:(bits len) - = MsgAddressExt; - anycast_info$_ depth:(#<= 30) { depth >= 1 } - rewrite_pfx:(bits depth) = Anycast; - addr_std$10 anycast:(Maybe Anycast) - workchain_id:int8 address:bits256 = MsgAddressInt; - addr_var$11 anycast:(Maybe Anycast) addr_len:(## 9) - workchain_id:int32 address:(bits addr_len) = MsgAddressInt; - _ _:MsgAddressInt = MsgAddress; - _ _:MsgAddressExt = MsgAddress; - - int_msg_info$0 ihr_disabled:Bool bounce:Bool bounced:Bool - src:MsgAddress dest:MsgAddressInt - value:CurrencyCollection ihr_fee:Grams fwd_fee:Grams - created_lt:uint64 created_at:uint32 = CommonMsgInfoRelaxed; - ext_out_msg_info$11 src:MsgAddress dest:MsgAddressExt - created_lt:uint64 created_at:uint32 = CommonMsgInfoRelaxed; - ``` - A deserialized `MsgAddress` is represented by a tuple `t` as follows: - - - `addr_none` is represented by `t = (0)`, - i.e., a tuple containing exactly one integer equal to zero. - - `addr_extern` is represented by `t = (1, s)`, - where slice `s` contains the field `external_address`. In other words, ` - t` is a pair (a tuple consisting of two entries), containing an integer equal to one and slice `s`. - - `addr_std` is represented by `t = (2, u, x, s)`, - where `u` is either a `null` (if `anycast` is absent) or a slice `s'` containing `rewrite_pfx` (if anycast is present). - Next, integer `x` is the `workchain_id`, and slice `s` contains the address. - - `addr_var` is represented by `t = (3, u, x, s)`, - where `u`, `x`, and `s` have the same meaning as for `addr_std`. -*/ - -/// Loads from slice [s] the only prefix that is a valid `MsgAddress`, -/// and returns both this prefix `s'` and the remainder `s''` of [s] as slices. -@pure -fun load_msg_addr(s: slice): (slice, slice) - asm( -> 1 0) "LDMSGADDR"; - -/// Decomposes slice [s] containing a valid `MsgAddress` into a `tuple t` with separate fields of this `MsgAddress`. -/// If [s] is not a valid `MsgAddress`, a cell deserialization exception is thrown. -@pure -fun parse_addr(s: slice): tuple - asm "PARSEMSGADDR"; - -/// Parses slice [s] containing a valid `MsgAddressInt` (usually a `msg_addr_std`), -/// applies rewriting from the anycast (if present) to the same-length prefix of the address, -/// and returns both the workchain and the 256-bit address as integers. -/// If the address is not 256-bit, or if [s] is not a valid serialization of `MsgAddressInt`, -/// throws a cell deserialization exception. -@pure -fun parse_std_addr(s: slice): (int, int) - asm "REWRITESTDADDR"; - -/// A variant of [parse_std_addr] that returns the (rewritten) address as a slice [s], -/// even if it is not exactly 256 bit long (represented by a `msg_addr_var`). -@pure -fun parse_var_addr(s: slice): (int, slice) - asm "REWRITEVARADDR"; - -/*** - # Dictionary primitives -*/ - - -/// Sets the value associated with [key_len]-bit key signed index in dictionary [dict] to [value] (cell), -/// and returns the resulting dictionary. -@pure -fun idict_set_ref(dict: cell, key_len: int, index: int, value: cell): cell - asm(value index dict key_len) "DICTISETREF"; - -@pure -fun ~idict_set_ref(dict: cell, key_len: int, index: int, value: cell): (cell, ()) - asm(value index dict key_len) "DICTISETREF"; - -/// Sets the value associated with [key_len]-bit key unsigned index in dictionary [dict] to [value] (cell), -/// and returns the resulting dictionary. -@pure -fun udict_set_ref(dict: cell, key_len: int, index: int, value: cell): cell - asm(value index dict key_len) "DICTUSETREF"; - -@pure -fun ~udict_set_ref(dict: cell, key_len: int, index: int, value: cell): (cell, ()) - asm(value index dict key_len) "DICTUSETREF"; - -@pure -fun idict_get_ref(dict: cell, key_len: int, index: int): cell - asm(index dict key_len) "DICTIGETOPTREF"; - -@pure -fun idict_get_ref?(dict: cell, key_len: int, index: int): (cell, int) - asm(index dict key_len) "DICTIGETREF" "NULLSWAPIFNOT"; - -@pure -fun udict_get_ref?(dict: cell, key_len: int, index: int): (cell, int) - asm(index dict key_len) "DICTUGETREF" "NULLSWAPIFNOT"; - -@pure -fun idict_set_get_ref(dict: cell, key_len: int, index: int, value: cell): (cell, cell) - asm(value index dict key_len) "DICTISETGETOPTREF"; - -@pure -fun udict_set_get_ref(dict: cell, key_len: int, index: int, value: cell): (cell, cell) - asm(value index dict key_len) "DICTUSETGETOPTREF"; - -@pure -fun idict_delete?(dict: cell, key_len: int, index: int): (cell, int) - asm(index dict key_len) "DICTIDEL"; - -@pure -fun udict_delete?(dict: cell, key_len: int, index: int): (cell, int) - asm(index dict key_len) "DICTUDEL"; - -@pure -fun idict_get?(dict: cell, key_len: int, index: int): (slice, int) - asm(index dict key_len) "DICTIGET" "NULLSWAPIFNOT"; - -@pure -fun udict_get?(dict: cell, key_len: int, index: int): (slice, int) - asm(index dict key_len) "DICTUGET" "NULLSWAPIFNOT"; - -@pure -fun idict_delete_get?(dict: cell, key_len: int, index: int): (cell, slice, int) - asm(index dict key_len) "DICTIDELGET" "NULLSWAPIFNOT"; - -@pure -fun udict_delete_get?(dict: cell, key_len: int, index: int): (cell, slice, int) - asm(index dict key_len) "DICTUDELGET" "NULLSWAPIFNOT"; - -@pure -fun ~idict_delete_get?(dict: cell, key_len: int, index: int): (cell, (slice, int)) - asm(index dict key_len) "DICTIDELGET" "NULLSWAPIFNOT"; - -@pure -fun ~udict_delete_get?(dict: cell, key_len: int, index: int): (cell, (slice, int)) - asm(index dict key_len) "DICTUDELGET" "NULLSWAPIFNOT"; - -@pure -fun udict_set(dict: cell, key_len: int, index: int, value: slice): cell - asm(value index dict key_len) "DICTUSET"; - -@pure -fun ~udict_set(dict: cell, key_len: int, index: int, value: slice): (cell, ()) - asm(value index dict key_len) "DICTUSET"; - -@pure -fun idict_set(dict: cell, key_len: int, index: int, value: slice): cell - asm(value index dict key_len) "DICTISET"; - -@pure -fun ~idict_set(dict: cell, key_len: int, index: int, value: slice): (cell, ()) - asm(value index dict key_len) "DICTISET"; - -@pure -fun dict_set(dict: cell, key_len: int, index: slice, value: slice): cell - asm(value index dict key_len) "DICTSET"; - -@pure -fun ~dict_set(dict: cell, key_len: int, index: slice, value: slice): (cell, ()) - asm(value index dict key_len) "DICTSET"; - -@pure -fun udict_add?(dict: cell, key_len: int, index: int, value: slice): (cell, int) - asm(value index dict key_len) "DICTUADD"; - -@pure -fun udict_replace?(dict: cell, key_len: int, index: int, value: slice): (cell, int) - asm(value index dict key_len) "DICTUREPLACE"; - -@pure -fun idict_add?(dict: cell, key_len: int, index: int, value: slice): (cell, int) - asm(value index dict key_len) "DICTIADD"; - -@pure -fun idict_replace?(dict: cell, key_len: int, index: int, value: slice): (cell, int) - asm(value index dict key_len) "DICTIREPLACE"; - -@pure -fun udict_set_builder(dict: cell, key_len: int, index: int, value: builder): cell - asm(value index dict key_len) "DICTUSETB"; - -@pure -fun ~udict_set_builder(dict: cell, key_len: int, index: int, value: builder): (cell, ()) - asm(value index dict key_len) "DICTUSETB"; - -@pure -fun idict_set_builder(dict: cell, key_len: int, index: int, value: builder): cell - asm(value index dict key_len) "DICTISETB"; - -@pure -fun ~idict_set_builder(dict: cell, key_len: int, index: int, value: builder): (cell, ()) - asm(value index dict key_len) "DICTISETB"; - -@pure -fun dict_set_builder(dict: cell, key_len: int, index: slice, value: builder): cell - asm(value index dict key_len) "DICTSETB"; - -@pure -fun ~dict_set_builder(dict: cell, key_len: int, index: slice, value: builder): (cell, ()) - asm(value index dict key_len) "DICTSETB"; - -@pure -fun udict_add_builder?(dict: cell, key_len: int, index: int, value: builder): (cell, int) - asm(value index dict key_len) "DICTUADDB"; - -@pure -fun udict_replace_builder?(dict: cell, key_len: int, index: int, value: builder): (cell, int) - asm(value index dict key_len) "DICTUREPLACEB"; - -@pure -fun idict_add_builder?(dict: cell, key_len: int, index: int, value: builder): (cell, int) - asm(value index dict key_len) "DICTIADDB"; - -@pure -fun idict_replace_builder?(dict: cell, key_len: int, index: int, value: builder): (cell, int) - asm(value index dict key_len) "DICTIREPLACEB"; - -@pure -fun udict_delete_get_min(dict: cell, key_len: int): (cell, int, slice, int) - asm(-> 0 2 1 3) "DICTUREMMIN" "NULLSWAPIFNOT2"; - -@pure -fun ~udict_delete_get_min(dict: cell, key_len: int): (cell, (int, slice, int)) - asm(-> 0 2 1 3) "DICTUREMMIN" "NULLSWAPIFNOT2"; - -@pure -fun idict_delete_get_min(dict: cell, key_len: int): (cell, int, slice, int) - asm(-> 0 2 1 3) "DICTIREMMIN" "NULLSWAPIFNOT2"; - -@pure -fun ~idict_delete_get_min(dict: cell, key_len: int): (cell, (int, slice, int)) - asm(-> 0 2 1 3) "DICTIREMMIN" "NULLSWAPIFNOT2"; - -@pure -fun dict_delete_get_min(dict: cell, key_len: int): (cell, slice, slice, int) - asm(-> 0 2 1 3) "DICTREMMIN" "NULLSWAPIFNOT2"; - -@pure -fun ~dict_delete_get_min(dict: cell, key_len: int): (cell, (slice, slice, int)) - asm(-> 0 2 1 3) "DICTREMMIN" "NULLSWAPIFNOT2"; - -@pure -fun udict_delete_get_max(dict: cell, key_len: int): (cell, int, slice, int) - asm(-> 0 2 1 3) "DICTUREMMAX" "NULLSWAPIFNOT2"; - -@pure -fun ~udict_delete_get_max(dict: cell, key_len: int): (cell, (int, slice, int)) - asm(-> 0 2 1 3) "DICTUREMMAX" "NULLSWAPIFNOT2"; - -@pure -fun idict_delete_get_max(dict: cell, key_len: int): (cell, int, slice, int) - asm(-> 0 2 1 3) "DICTIREMMAX" "NULLSWAPIFNOT2"; - -@pure -fun ~idict_delete_get_max(dict: cell, key_len: int): (cell, (int, slice, int)) - asm(-> 0 2 1 3) "DICTIREMMAX" "NULLSWAPIFNOT2"; - -@pure -fun dict_delete_get_max(dict: cell, key_len: int): (cell, slice, slice, int) - asm(-> 0 2 1 3) "DICTREMMAX" "NULLSWAPIFNOT2"; - -@pure -fun ~dict_delete_get_max(dict: cell, key_len: int): (cell, (slice, slice, int)) - asm(-> 0 2 1 3) "DICTREMMAX" "NULLSWAPIFNOT2"; - -@pure -fun udict_get_min?(dict: cell, key_len: int): (int, slice, int) - asm (-> 1 0 2) "DICTUMIN" "NULLSWAPIFNOT2"; - -@pure -fun udict_get_max?(dict: cell, key_len: int): (int, slice, int) - asm (-> 1 0 2) "DICTUMAX" "NULLSWAPIFNOT2"; - -@pure -fun udict_get_min_ref?(dict: cell, key_len: int): (int, cell, int) - asm (-> 1 0 2) "DICTUMINREF" "NULLSWAPIFNOT2"; - -@pure -fun udict_get_max_ref?(dict: cell, key_len: int): (int, cell, int) - asm (-> 1 0 2) "DICTUMAXREF" "NULLSWAPIFNOT2"; - -@pure -fun idict_get_min?(dict: cell, key_len: int): (int, slice, int) - asm (-> 1 0 2) "DICTIMIN" "NULLSWAPIFNOT2"; - -@pure -fun idict_get_max?(dict: cell, key_len: int): (int, slice, int) - asm (-> 1 0 2) "DICTIMAX" "NULLSWAPIFNOT2"; - -@pure -fun idict_get_min_ref?(dict: cell, key_len: int): (int, cell, int) - asm (-> 1 0 2) "DICTIMINREF" "NULLSWAPIFNOT2"; - -@pure -fun idict_get_max_ref?(dict: cell, key_len: int): (int, cell, int) - asm (-> 1 0 2) "DICTIMAXREF" "NULLSWAPIFNOT2"; - -@pure -fun udict_get_next?(dict: cell, key_len: int, pivot: int): (int, slice, int) - asm(pivot dict key_len -> 1 0 2) "DICTUGETNEXT" "NULLSWAPIFNOT2"; - -@pure -fun udict_get_nexteq?(dict: cell, key_len: int, pivot: int): (int, slice, int) - asm(pivot dict key_len -> 1 0 2) "DICTUGETNEXTEQ" "NULLSWAPIFNOT2"; - -@pure -fun udict_get_prev?(dict: cell, key_len: int, pivot: int): (int, slice, int) - asm(pivot dict key_len -> 1 0 2) "DICTUGETPREV" "NULLSWAPIFNOT2"; - -@pure -fun udict_get_preveq?(dict: cell, key_len: int, pivot: int): (int, slice, int) - asm(pivot dict key_len -> 1 0 2) "DICTUGETPREVEQ" "NULLSWAPIFNOT2"; - -@pure -fun idict_get_next?(dict: cell, key_len: int, pivot: int): (int, slice, int) - asm(pivot dict key_len -> 1 0 2) "DICTIGETNEXT" "NULLSWAPIFNOT2"; - -@pure -fun idict_get_nexteq?(dict: cell, key_len: int, pivot: int): (int, slice, int) - asm(pivot dict key_len -> 1 0 2) "DICTIGETNEXTEQ" "NULLSWAPIFNOT2"; - -@pure -fun idict_get_prev?(dict: cell, key_len: int, pivot: int): (int, slice, int) - asm(pivot dict key_len -> 1 0 2) "DICTIGETPREV" "NULLSWAPIFNOT2"; - -@pure -fun idict_get_preveq?(dict: cell, key_len: int, pivot: int): (int, slice, int) - asm(pivot dict key_len -> 1 0 2) "DICTIGETPREVEQ" "NULLSWAPIFNOT2"; - -/// Creates an empty dictionary, which is actually a null value. Equivalent to PUSHNULL -@pure -fun new_dict(): cell - asm "NEWDICT"; - -/// Checks whether a dictionary is empty. -@pure -fun dict_empty?(c: cell): int - asm "DICTEMPTY"; - - -/* Prefix dictionary primitives */ -@pure -fun pfxdict_get?(dict: cell, key_len: int, key: slice): (slice, slice, slice, int) - asm(key dict key_len) "PFXDICTGETQ" "NULLSWAPIFNOT2"; - -@pure -fun pfxdict_set?(dict: cell, key_len: int, key: slice, value: slice): (cell, int) - asm(value key dict key_len) "PFXDICTSET"; - -@pure -fun pfxdict_delete?(dict: cell, key_len: int, key: slice): (cell, int) - asm(key dict key_len) "PFXDICTDEL"; - -/// Returns the value of the global configuration parameter with integer index `i` as a `cell` or `null` value. -@pure -fun config_param(x: int): cell - asm "CONFIGOPTPARAM"; - -/// Creates an output action which would reserve exactly amount nanotoncoins (if mode = 0), at most amount nanotoncoins (if mode = 2), or all but amount nanotoncoins (if mode = 1 or mode = 3), from the remaining balance of the account. It is roughly equivalent to creating an outbound message carrying amount nanotoncoins (or b − amount nanotoncoins, where b is the remaining balance) to oneself, so that the subsequent output actions would not be able to spend more money than the remainder. Bit +2 in mode means that the external action does not fail if the specified amount cannot be reserved; instead, all remaining balance is reserved. Bit +8 in mode means `amount <- -amount` before performing any further actions. Bit +4 in mode means that amount is increased by the original balance of the current account (before the compute phase), including all extra currencies, before performing any other checks and actions. Currently, amount must be a non-negative integer, and mode must be in the range 0..15. -fun raw_reserve(amount: int, mode: int): void - asm "RAWRESERVE"; - -/// Similar to raw_reserve, but also accepts a dictionary extra_amount (represented by a cell or null) with extra currencies. In this way currencies other than TonCoin can be reserved. -fun raw_reserve_extra(amount: int, extra_amount: cell, mode: int): void - asm "RAWRESERVEX"; - -/// Sends a raw message contained in msg, which should contain a correctly serialized object Message X, with the only exception that the source address is allowed to have dummy value addr_none (to be automatically replaced with the current smart contract address), and ihr_fee, fwd_fee, created_lt and created_at fields can have arbitrary values (to be rewritten with correct values during the action phase of the current transaction). Integer parameter mode contains the flags. Currently mode = 0 is used for ordinary messages; mode = 128 is used for messages that are to carry all the remaining balance of the current smart contract (instead of the value originally indicated in the message); mode = 64 is used for messages that carry all the remaining value of the inbound message in addition to the value initially indicated in the new message (if bit 0 is not set, the gas fees are deducted from this amount); mode' = mode + 1 means that the sender wants to pay transfer fees separately; mode' = mode + 2 means that any errors arising while processing this message during the action phase should be ignored. Finally, mode' = mode + 32 means that the current account must be destroyed if its resulting balance is zero. This flag is usually employed together with +128. -fun send_raw_message(msg: cell, mode: int): void - asm "SENDRAWMSG"; - -/// Creates an output action that would change this smart contract code to that given by cell new_code. Notice that this change will take effect only after the successful termination of the current run of the smart contract -fun set_code(new_code: cell): void - asm "SETCODE"; - -/// Generates a new pseudo-random unsigned 256-bit integer x. The algorithm is as follows: if r is the old value of the random seed, considered as a 32-byte array (by constructing the big-endian representation of an unsigned 256-bit integer), then its sha512(r) is computed; the first 32 bytes of this hash are stored as the new value r' of the random seed, and the remaining 32 bytes are returned as the next random value x. -fun random(): int - asm "RANDU256"; - -/// Generates a new pseudo-random integer z in the range 0..range−1 (or range..−1, if range < 0). More precisely, an unsigned random value x is generated as in random; then z := x * range / 2^256 is computed. -fun rand(range: int): int - asm "RAND"; - -/// Returns the current random seed as an unsigned 256-bit Integer. -@pure -fun get_seed(): int - asm "RANDSEED"; - -/// Sets the random seed to unsigned 256-bit seed. -fun set_seed(seed: int): void - asm "SETRAND"; - -/// Mixes unsigned 256-bit integer x into the random seed r by setting the random seed to sha256 of the concatenation of two 32-byte strings: the first with the big-endian representation of the old seed r, and the second with the big-endian representation of x. -fun randomize(x: int): void - asm "ADDRAND"; - -/// Equivalent to randomize(cur_lt());. -fun randomize_lt(): void - asm "LTIME" "ADDRAND"; - -/// Checks whether the data parts of two slices coinside -@pure -fun equal_slice_bits(a: slice, b: slice): int - asm "SDEQ"; - -/// Concatenates two builders -@pure -fun store_builder(to: builder, from: builder): builder - asm "STBR"; diff --git a/crypto/smartcont/tolk-stdlib/common.tolk b/crypto/smartcont/tolk-stdlib/common.tolk new file mode 100644 index 000000000..de711f7df --- /dev/null +++ b/crypto/smartcont/tolk-stdlib/common.tolk @@ -0,0 +1,766 @@ +// Standard library for Tolk (LGPL licence). +// It contains common functions that are available out of the box, the user doesn't have to import anything. +// More specific functions are required to be imported explicitly, like "@stdlib/tvm-dicts". +tolk 0.6 + +/** + Tuple manipulation primitives. + Elements of a tuple can be of arbitrary type. + Note that atomic type `tuple` can't be cast to composite tuple type (e.g. `[int, cell]`) and vise versa. + */ + +/// Creates a tuple with zero elements. +@pure +fun createEmptyTuple(): tuple + asm "NIL"; + +/// Appends a value to tuple, resulting in `Tuple t' = (x1, ..., xn, value)`. +/// If its size exceeds 255, throws a type check exception. +@pure +fun tuplePush(t: tuple, value: X): tuple + asm "TPUSH"; + +@pure +fun ~tuplePush(t: tuple, value: X): (tuple, ()) + asm "TPUSH"; + +/// Returns the first element of a non-empty tuple. +@pure +fun tupleFirst(t: tuple): X + asm "FIRST"; + +/// Returns the [`index`]-th element of a tuple. +@pure +fun tupleAt(t: tuple, index: int): X + builtin; + +/// Returns the size of a tuple (elements count in it). +@pure +fun tupleSize(t: tuple): int + asm "TLEN"; + +/// Returns the last element of a non-empty tuple. +@pure +fun tupleLast(t: tuple): int + asm "LAST"; + + +/** + Mathematical primitives. + */ + +/// Computes the minimum of two integers. +@pure +fun min(x: int, y: int): int + asm "MIN"; + +/// Computes the maximum of two integers. +@pure +fun max(x: int, y: int): int + asm "MAX"; + +/// Sorts two integers. +@pure +fun minMax(x: int, y: int): (int, int) + asm "MINMAX"; + +/// Computes the absolute value of an integer. +@pure +fun abs(x: int): int + asm "ABS"; + +/// Returns the sign of an integer: `-1` if x < 0, `0` if x == 0, `1` if x > 0. +@pure +fun sign(x: int): int + asm "SGN"; + +/// Computes the quotient and remainder of [x] / [y]. Example: divMod(112,3) = (37,1) +@pure +fun divMod(x: int, y: int): (int, int) + asm "DIVMOD"; + +/// Computes the remainder and quotient of [x] / [y]. Example: modDiv(112,3) = (1,37) +@pure +fun modDiv(x: int, y: int): (int, int) + asm(-> 1 0) "DIVMOD"; + +/// Computes multiple-then-divide: floor([x] * [y] / [z]). +/// The intermediate result is stored in a 513-bit integer to prevent precision loss. +@pure +fun mulDivFloor(x: int, y: int, z: int): int + builtin; + +/// Similar to `mulDivFloor`, but rounds the result: round([x] * [y] / [z]). +@pure +fun mulDivRound(x: int, y: int, z: int): int + builtin; + +/// Similar to `mulDivFloor`, but ceils the result: ceil([x] * [y] / [z]). +@pure +fun mulDivCeil(x: int, y: int, z: int): int + builtin; + +/// Computes the quotient and remainder of ([x] * [y] / [z]). Example: mulDivMod(112,3,10) = (33,6) +@pure +fun mulDivMod(x: int, y: int, z: int): (int, int) + builtin; + + +/** + Global getters of environment and contract state. + */ + +const MASTERCHAIN = -1; +const BASECHAIN = 0; + +/// Returns current Unix timestamp (in seconds). +@pure +fun now(): int + asm "NOW"; + +/// Returns the internal address of the current smart contract as a Slice with a `MsgAddressInt`. +/// If necessary, it can be parsed further using primitives such as [parseStandardAddress]. +@pure +fun getMyAddress(): slice + asm "MYADDR"; + +/// Returns the balance (in nanotoncoins) of the smart contract at the start of Computation Phase. +/// Note that RAW primitives such as [sendMessage] do not update this field. +@pure +fun getMyOriginalBalance(): int + asm "BALANCE" "FIRST"; + +/// Same as [getMyOriginalBalance], but returns a tuple: +/// `int` — balance in nanotoncoins; +/// `cell` — a dictionary with 32-bit keys representing the balance of "extra currencies". +@pure +fun getMyOriginalBalanceWithExtraCurrencies(): [int, cell] + asm "BALANCE"; + +/// Returns the logical time of the current transaction. +@pure +fun getLogicalTime(): int + asm "LTIME"; + +/// Returns the starting logical time of the current block. +@pure +fun getCurrentBlockLogicalTime(): int + asm "BLOCKLT"; + +/// Returns the value of the global configuration parameter with integer index `i` as a `cell` or `null` value. +@pure +fun getBlockchainConfigParam(x: int): cell + asm "CONFIGOPTPARAM"; + +/// Returns the persistent contract storage cell. It can be parsed or modified with slice and builder primitives later. +@pure +fun getContractData(): cell + asm "c4 PUSH"; + +/// Sets `cell` [c] as persistent contract data. You can update persistent contract storage with this primitive. +fun setContractData(c: cell): void + asm "c4 POP"; + +/// Retrieves code of smart-contract from c7 +@pure +fun getContractCode(): cell + asm "MYCODE"; + +/// Creates an output action that would change this smart contract code to that given by cell [newCode]. +/// Notice that this change will take effect only after the successful termination of the current run of the smart contract. +fun setContractCodePostponed(newCode: cell): void + asm "SETCODE"; + +/// Commits the current state of registers `c4` (“persistent data”) and `c5` (“actions”) +/// so that the current execution is considered “successful” with the saved values even if an exception +/// in Computation Phase is thrown later. +fun commitContractDataAndActions(): void + asm "COMMIT"; + + +/** + Signature checks, hashing, cryptography. + */ + +/// Computes the representation hash of a `cell` [c] and returns it as a 256-bit unsigned integer `x`. +/// Useful for signing and checking signatures of arbitrary entities represented by a tree of cells. +@pure +fun cellHash(c: cell): int + asm "HASHCU"; + +/// Computes the hash of a `slice s` and returns it as a 256-bit unsigned integer `x`. +/// The result is the same as if an ordinary cell containing only data and references from `s` had been created +/// and its hash computed by [cellHash]. +@pure +fun sliceHash(s: slice): int + asm "HASHSU"; + +/// Computes sha256 of the data bits of `slice` [s]. If the bit length of `s` is not divisible by eight, +/// throws a cell underflow exception. The hash value is returned as a 256-bit unsigned integer `x`. +@pure +fun stringHash(s: slice): int + asm "SHA256U"; + +/// Checks the Ed25519-`signature` of a `hash` (a 256-bit unsigned integer, usually computed as the hash of some data) +/// using [publicKey] (also represented by a 256-bit unsigned integer). +/// The signature must contain at least 512 data bits; only the first 512 bits are used. +/// The result is `−1` if the signature is valid, `0` otherwise. +/// Note that `CHKSIGNU` creates a 256-bit slice with the hash and calls `CHKSIGNS`. +/// That is, if [hash] is computed as the hash of some data, these data are hashed twice, +/// the second hashing occurring inside `CHKSIGNS`. +@pure +fun isSignatureValid(hash: int, signature: slice, publicKey: int): int + asm "CHKSIGNU"; + +/// Checks whether [signature] is a valid Ed25519-signature of the data portion of `slice data` using `publicKey`, +/// similarly to [isSignatureValid]. +/// If the bit length of [data] is not divisible by eight, throws a cell underflow exception. +/// The verification of Ed25519 signatures is the standard one, +/// with sha256 used to reduce [data] to the 256-bit number that is actually signed. +@pure +fun isSliceSignatureValid(data: slice, signature: slice, publicKey: int): int + asm "CHKSIGNS"; + +/// Generates a new pseudo-random unsigned 256-bit integer x. +fun random(): int + asm "RANDU256"; + +/// Generates a new pseudo-random integer z in the range 0..range−1 (or range..−1, if range < 0). +/// More precisely, an unsigned random value x is generated as in random; then z := x * range / 2^256 is computed. +fun randomRange(range: int): int + asm "RAND"; + +/// Returns the current random seed as an unsigned 256-bit integer. +@pure +fun randomGetSeed(): int + asm "RANDSEED"; + +/// Sets the random seed to unsigned 256-bit seed. +fun randomSetSeed(seed: int): void + asm "SETRAND"; + +/// Initializes (mixes) random seed with unsigned 256-bit integer x. +fun randomizeBy(x: int): void + asm "ADDRAND"; + +/// Initializes random seed using current time. Don't forget to call this before calling `random`! +fun randomizeByLogicalTime(): void + asm "LTIME" "ADDRAND"; + + +/** + Size computation primitives. + They may be useful for computing storage fees of user-provided data. + */ + +/// Returns `(x, y, z, -1)` or `(null, null, null, 0)`. +/// Recursively computes the count of distinct cells `x`, data bits `y`, and cell references `z` +/// in the DAG rooted at `cell` [c], effectively returning the total storage used by this DAG taking into account +/// the identification of equal cells. +/// The values of `x`, `y`, and `z` are computed by a depth-first traversal of this DAG, +/// with a hash table of visited cell hashes used to prevent visits of already-visited cells. +/// The total count of visited cells `x` cannot exceed non-negative [maxCells]; +/// otherwise the computation is aborted before visiting the `(maxCells + 1)`-st cell and +/// a zero flag is returned to indicate failure. If [c] is `null`, returns `x = y = z = 0`. +@pure +fun calculateCellSize(c: cell, maxCells: int): (int, int, int, int) + asm "CDATASIZEQ NULLSWAPIFNOT2 NULLSWAPIFNOT"; + +/// Similar to [calculateCellSize], but accepting a `slice` [s] instead of a `cell`. +/// The returned value of `x` does not take into account the cell that contains the `slice` [s] itself; +/// however, the data bits and the cell references of [s] are accounted for in `y` and `z`. +@pure +fun calculateSliceSize(s: slice, maxCells: int): (int, int, int, int) + asm "SDATASIZEQ NULLSWAPIFNOT2 NULLSWAPIFNOT"; + +/// A non-quiet version of [calculateCellSize] that throws a cell overflow exception (`8`) on failure. +fun calculateCellSizeStrict(c: cell, maxCells: int): (int, int, int) + asm "CDATASIZE"; + +/// A non-quiet version of [calculateSliceSize] that throws a cell overflow exception (`8`) on failure. +fun calculateSliceSizeStrict(s: slice, maxCells: int): (int, int, int) + asm "SDATASIZE"; + +/// Returns the depth of `cell` [c]. +/// If [c] has no references, then return `0`; +/// otherwise the returned value is one plus the maximum of depths of cells referred to from [c]. +/// If [c] is a `null` instead of a cell, returns zero. +@pure +fun getCellDepth(c: cell): int + asm "CDEPTH"; + +/// Returns the depth of `slice` [s]. +/// If [s] has no references, then returns `0`; +/// otherwise the returned value is one plus the maximum of depths of cells referred to from [s]. +@pure +fun getSliceDepth(s: slice): int + asm "SDEPTH"; + +/// Returns the depth of `builder` [b]. +/// If no cell references are stored in [b], then returns 0; +/// otherwise the returned value is one plus the maximum of depths of cells referred to from [b]. +@pure +fun getBuilderDepth(b: builder): int + asm "BDEPTH"; + + +/** + Debug primitives. + Only works for local TVM execution with debug level verbosity. + */ + +/// Dump a variable [x] to the debug log. +fun debugPrint(x: X): void + builtin; + +/// Dump a string [x] to the debug log. +fun debugPrintString(x: X): void + builtin; + +/// Dumps the stack (at most the top 255 values) and shows the total stack depth. +fun debugDumpStack(): void + builtin; + + +/** + Slice primitives: parsing cells. + When you _load_ some data, you mutate the slice (shifting an internal pointer on the stack). + When you _preload_ some data, you just get the result without mutating the slice. + */ + +/// Converts a `cell` [c] into a `slice`. Notice that [c] must be either an ordinary cell, +/// or an exotic cell (see [TVM.pdf](https://ton-blockchain.github.io/docs/tvm.pdf), 3.1.2) +/// which is automatically loaded to yield an ordinary cell `c'`, converted into a `slice` afterwards. +@pure +fun beginParse(c: cell): slice + asm "CTOS"; + +/// Checks if slice is empty. If not, throws an exception. +fun assertEndOfSlice(s: slice): void + asm "ENDS"; + +/// Loads the next reference from the slice. +@pure +fun loadRef(s: slice): (slice, cell) + asm( -> 1 0) "LDREF"; + +/// Preloads the next reference from the slice. +@pure +fun preloadRef(s: slice): cell + asm "PLDREF"; + +/// Loads a signed [len]-bit integer from a slice. +@pure +fun loadInt(s: slice, len: int): (slice, int) + builtin; + +/// Loads an unsigned [len]-bit integer from a slice. +@pure +fun loadUint(s: slice, len: int): (slice, int) + builtin; + +/// Loads the first `0 ≤ len ≤ 1023` bits from slice [s] into a separate slice `s''`. +@pure +fun loadBits(s: slice, len: int): (slice, slice) + builtin; + +/// Preloads a signed [len]-bit integer from a slice. +@pure +fun preloadInt(s: slice, len: int): int + builtin; + +/// Preloads an unsigned [len]-bit integer from a slice. +@pure +fun preloadUint(s: slice, len: int): int + builtin; + +/// Preloads the first `0 ≤ len ≤ 1023` bits from slice [s] into a separate slice. +@pure +fun preloadBits(s: slice, len: int): slice + builtin; + +/// Loads serialized amount of Toncoins (any unsigned integer up to `2^120 - 1`). +@pure +fun loadCoins(s: slice): (slice, int) + asm( -> 1 0) "LDGRAMS"; + +/// Loads bool (-1 or 0) from a slice +@pure +fun loadBool(s: slice): (slice, int) + asm( -> 1 0) "1 LDI"; + +/// Shifts a slice pointer to [len] bits forward, mutating the slice. +@pure +fun skipBits(s: slice, len: int): slice + asm "SDSKIPFIRST"; // todo make mutating +@pure +fun ~skipBits(s: slice, len: int): (slice, ()) + asm "SDSKIPFIRST"; + +/// Returns the first `0 ≤ len ≤ 1023` bits of a slice. +@pure +fun getFirstBits(s: slice, len: int): slice + asm "SDCUTFIRST"; + +/// Returns all but the last `0 ≤ len ≤ 1023` bits of a slice. +@pure +fun removeLastBits(s: slice, len: int): slice + asm "SDSKIPLAST"; // todo make mutating +@pure +fun ~removeLastBits(s: slice, len: int): (slice, ()) + asm "SDSKIPLAST"; + +/// Returns the last `0 ≤ len ≤ 1023` bits of a slice. +@pure +fun getLastBits(s: slice, len: int): slice + asm "SDCUTLAST"; + +/// Loads a dictionary (TL HashMapE structure, represented as TVM cell) from a slice. +/// Returns `null` if `nothing` constructor is used. +@pure +fun loadDict(s: slice): (slice, cell) + asm( -> 1 0) "LDDICT"; + +/// Preloads a dictionary (cell) from a slice. +@pure +fun preloadDict(s: slice): cell + asm "PLDDICT"; + +/// Loads a dictionary as [loadDict], but returns only the remainder of the slice. +@pure +fun skipDict(s: slice): slice + asm "SKIPDICT"; // todo make mutating +@pure +fun ~skipDict(s: slice): (slice, ()) + asm "SKIPDICT"; + +/// Loads (Maybe ^Cell) from a slice. +/// In other words, loads 1 bit: if it's true, loads the first ref, otherwise returns `null`. +@pure +fun loadMaybeRef(s: slice): (slice, cell) + asm( -> 1 0) "LDOPTREF"; + +/// Preloads (Maybe ^Cell) from a slice. +@pure +fun preloadMaybeRef(s: slice): cell + asm "PLDOPTREF"; + +/// Loads (Maybe ^Cell), but returns only the remainder of the slice. +@pure +fun ~skipMaybeRef(s: slice): (slice, ()) + asm "SKIPOPTREF"; + +/** + Builder primitives: constructing cells. + When you _store_ some data, you mutate the builder (shifting an internal pointer on the stack). + All the primitives below first check whether there is enough space in the `builder`, + and only then check the range of the value being serialized. + */ + +/// Creates a new empty builder. +@pure +fun beginCell(): builder + asm "NEWC"; + +/// Converts a builder into an ordinary `cell`. +@pure +fun endCell(b: builder): cell + asm "ENDC"; + +/// Stores a reference to a cell into a builder. +@pure +fun storeRef(b: builder, c: cell): builder + asm(c b) "STREF"; + +/// Stores a signed [len]-bit integer into a builder (`0 ≤ len ≤ 257`). +@pure +fun storeInt(b: builder, x: int, len: int): builder + builtin; + +/// Stores an unsigned [len]-bit integer into a builder (`0 ≤ len ≤ 256`). +@pure +fun storeUint(b: builder, x: int, len: int): builder + builtin; + +/// Stores a slice into a builder. +@pure +fun storeSlice(b: builder, s: slice): builder + asm "STSLICER"; + +/// Stores amount of Toncoins into a builder. +@pure +fun storeCoins(b: builder, x: int): builder + asm "STGRAMS"; + +/// Stores bool (-1 or 0) into a builder. +/// Attention: true value is `-1`, not 1! If you pass `1` here, TVM will throw an exception. +@pure +fun storeBool(b: builder, x: int): builder + asm(x b) "1 STI"; + +/// Stores dictionary (represented by TVM `cell` or `null`) into a builder. +/// In other words, stores a `1`-bit and a reference to [c] if [c] is not `null` and `0`-bit otherwise. +@pure +fun storeDict(b: builder, c: cell): builder + asm(c b) "STDICT"; + +/// Stores (Maybe ^Cell) into a builder. +/// In other words, if cell is `null`, store '0' bit; otherwise, store '1' and a ref to [c]. +@pure +fun storeMaybeRef(b: builder, c: cell): builder + asm(c b) "STOPTREF"; + +/// Concatenates two builders. +@pure +fun storeBuilder(to: builder, from: builder): builder + asm "STBR"; + +@pure +fun storeAddressNone(b: builder): builder + asm "0 PUSHINT" "SWAP" "2 STU"; +@pure +fun ~storeAddressNone(b: builder): (builder, ()) + asm "b{00} STSLICECONST"; + + +/** + Slice size primitives. + */ + +/// Returns the number of references in a slice. +@pure +fun getRemainingRefsCount(s: slice): int + asm "SREFS"; + +/// Returns the number of data bits in a slice. +@pure +fun getRemainingBitsCount(s: slice): int + asm "SBITS"; + +/// Returns both the number of data bits and the number of references in a slice. +@pure +fun getRemainingBitsAndRefsCount(s: slice): (int, int) + asm "SBITREFS"; + +/// Checks whether a slice is empty (i.e., contains no bits of data and no cell references). +@pure +fun isEndOfSlice(s: slice): int + asm "SEMPTY"; + +/// Checks whether a slice has no bits of data. +@pure +fun isEndOfSliceBits(s: slice): int + asm "SDEMPTY"; + +/// Checks whether a slice has no references. +@pure +fun isEndOfSliceRefs(s: slice): int + asm "SREMPTY"; + +/// Checks whether data parts of two slices coinside. +@pure +fun isSliceBitsEqual(a: slice, b: slice): int + asm "SDEQ"; + +/// Returns the number of cell references already stored in a builder. +@pure +fun getBuilderRefsCount(b: builder): int + asm "BREFS"; + +/// Returns the number of data bits already stored in a builder. +@pure +fun getBuilderBitsCount(b: builder): int + asm "BBITS"; + + +/** + Address manipulation primitives. + The address manipulation primitives listed below serialize and deserialize values according to the following TL-B scheme: + ```TL-B + addr_none$00 = MsgAddressExt; + addr_extern$01 len:(## 8) external_address:(bits len) + = MsgAddressExt; + anycast_info$_ depth:(#<= 30) { depth >= 1 } + rewrite_pfx:(bits depth) = Anycast; + addr_std$10 anycast:(Maybe Anycast) + workchain_id:int8 address:bits256 = MsgAddressInt; + addr_var$11 anycast:(Maybe Anycast) addr_len:(## 9) + workchain_id:int32 address:(bits addr_len) = MsgAddressInt; + _ _:MsgAddressInt = MsgAddress; + _ _:MsgAddressExt = MsgAddress; + + int_msg_info$0 ihr_disabled:Bool bounce:Bool bounced:Bool + src:MsgAddress dest:MsgAddressInt + value:CurrencyCollection ihr_fee:Grams fwd_fee:Grams + created_lt:uint64 created_at:uint32 = CommonMsgInfoRelaxed; + ext_out_msg_info$11 src:MsgAddress dest:MsgAddressExt + created_lt:uint64 created_at:uint32 = CommonMsgInfoRelaxed; + ``` + A deserialized `MsgAddress` is represented by a tuple `t` as follows: + + - `addr_none` is represented by `t = (0)`, + i.e., a tuple containing exactly one integer equal to zero. + - `addr_extern` is represented by `t = (1, s)`, + where slice `s` contains the field `external_address`. In other words, ` + t` is a pair (a tuple consisting of two entries), containing an integer equal to one and slice `s`. + - `addr_std` is represented by `t = (2, u, x, s)`, + where `u` is either a `null` (if `anycast` is absent) or a slice `s'` containing `rewrite_pfx` (if anycast is present). + Next, integer `x` is the `workchain_id`, and slice `s` contains the address. + - `addr_var` is represented by `t = (3, u, x, s)`, + where `u`, `x`, and `s` have the same meaning as for `addr_std`. + */ + +/// Loads from slice [s] the only prefix that is a valid `MsgAddress`, +/// and returns both this prefix `s'` and the remainder `s''` of [s] as slices. +@pure +fun loadAddress(s: slice): (slice, slice) + asm( -> 1 0) "LDMSGADDR"; // todo make mutating + +/// Decomposes slice [s] containing a valid `MsgAddress` into a `tuple t` with separate fields of this `MsgAddress`. +/// If [s] is not a valid `MsgAddress`, a cell deserialization exception is thrown. +@pure +fun parseAddress(s: slice): tuple + asm "PARSEMSGADDR"; + +/// Parses slice [s] containing a valid `MsgAddressInt` (usually a `msg_addr_std`), +/// applies rewriting from the anycast (if present) to the same-length prefix of the address, +/// and returns both the workchain and the 256-bit address as integers. +/// If the address is not 256-bit, or if [s] is not a valid serialization of `MsgAddressInt`, +/// throws a cell deserialization exception. +@pure +fun parseStandardAddress(s: slice): (int, int) + asm "REWRITESTDADDR"; + +/// Creates a slice representing TL addr_none$00 (two `0` bits). +@pure +fun createAddressNone(): slice + asm "b{00} PUSHSLICE"; + +/// Returns if a slice pointer contains an empty address (`-1` for true, `0` for false, as always). +/// In other words, a slice starts with two `0` bits (TL addr_none$00). +@pure +fun addressIsNone(s: slice): int + asm "2 PLDU" "0 EQINT"; + + +/** + Reserving Toncoins on balance and its flags. + */ + +/// mode = 0: Reserve exact amount of nanotoncoins +const RESERVE_MODE_EXACT_AMOUNT = 0; +/// +1: Actually reserves all but amount, meaning `currentContractBalance - amount` +const RESERVE_MODE_ALL_BUT_AMOUNT = 1; +/// +2: Actually set `min(amount, currentContractBalance)` (without this mode, if amount is greater, the action will fail) +const RESERVE_MODE_AT_MOST = 2; +/// +4: [amount] is increased by the _original_ balance of the current account (before the compute phase). +const RESERVE_MODE_INCREASE_BY_ORIGINAL_BALANCE = 4; +/// +8: Actually sets `amount = -amount` before performing any further actions. +const RESERVE_MODE_NEGATE_AMOUNT = 8; +/// +16: If this action fails, the transaction will be bounced. +const RESERVE_MODE_BOUNCE_ON_ACTION_FAIL = 16; + +/// Creates an output action which would reserve Toncoins on balance. +/// For [reserveMode] consider constants above. +fun reserveToncoinsOnBalance(nanoTonCoins: int, reserveMode: int): void + asm "RAWRESERVE"; + +/// Similar to [reserveToncoinsOnBalance], but also accepts a dictionary extraAmount (represented by a cell or null) +/// with extra currencies. In this way currencies other than Toncoin can be reserved. +fun reserveExtraCurrenciesOnBalance(nanoTonCoins: int, extraAmount: cell, reserveMode: int): void + asm "RAWRESERVEX"; + + +/** + Messages sending and parsing primitives. + Working with messages is low-level right now, but still, every contract should do that. + + `Message` structure, its header and so on are specified in TL-B scheme, particularly: + int_msg_info$0 ihr_disabled:Bool bounce:Bool bounced:Bool ... = CommonMsgInfo; + */ + +/// 0b011000 tag - 0, ihr_disabled - 1, bounce - 1, bounced - 0, src = adr_none$00 +const BOUNCEABLE = 0x18; +/// 0b010000 tag - 0, ihr_disabled - 1, bounce - 0, bounced - 0, src = adr_none$00 +const NON_BOUNCEABLE = 0x10; + +/// Load msgFlags from incoming message body (4 bits). +@pure +fun loadMessageFlags(s: slice): (slice, int) + asm( -> 1 0) "4 LDU"; + +/// Having msgFlags (4 bits), check that a message is bounced. +/// Effectively, it's `msgFlags & 1` (the lowest bit present). +@pure +fun isMessageBounced(msgFlags: int): int + asm "1 PUSHINT" "AND"; + +/// Skip 0xFFFFFFFF prefix (when a message is bounced). +@pure +fun ~skipBouncedPrefix(s: slice): (slice, ()) + asm "32 PUSHINT" "SDSKIPFIRST"; + +/// The guideline recommends to start the body of an internal message with uint32 `op` and uint64 `queryId`. +@pure +fun loadMessageOp(s: slice): (slice, int) + asm( -> 1 0) "32 LDU"; + +@pure +fun ~skipMessageOp(s: slice): (slice, ()) + asm "32 PUSHINT" "SDSKIPFIRST"; + +@pure +fun storeMessageOp(b: builder, op: int): builder + asm(op b) "32 STU"; +fun ~storeMessageOp(b: builder, op: int): (builder, ()) + asm(op b) "32 STU"; + +/// The guideline recommends that uint64 `queryId` should follow uint32 `op`. +@pure +fun loadMessageQueryId(s: slice): (slice, int) + asm( -> 1 0) "64 LDU"; + +@pure +fun ~skipMessageQueryId(s: slice): (slice, ()) + asm "64 PUSHINT" "SDSKIPFIRST"; + +@pure +fun storeMessageQueryId(b: builder, queryId: int): builder + asm(queryId b) "64 STU"; +fun ~storeMessageQueryId(b: builder, queryId: int): (builder, ()) + asm(queryId b) "64 STU"; + +/// SEND MODES - https://docs.ton.org/tvm.pdf page 137, SENDRAWMSG + +/// mode = 0 is used for ordinary messages; the gas fees are deducted from the senging amount; action phaes should NOT be ignored. +const SEND_MODE_REGULAR = 0; +/// +1 means that the sender wants to pay transfer fees separately. +const SEND_MODE_PAY_FEES_SEPARATELY = 1; +/// +2 means that any errors arising while processing this message during the action phase should be ignored. +const SEND_MODE_IGNORE_ERRORS = 2; +/// in the case of action fail - bounce transaction. No effect if SEND_MODE_IGNORE_ERRORS (+2) is used. TVM UPGRADE 2023-07. https://docs.ton.org/learn/tvm-instructions/tvm-upgrade-2023-07#sending-messages +const SEND_MODE_BOUNCE_ON_ACTION_FAIL = 16; +/// mode = 32 means that the current account must be destroyed if its resulting balance is zero. +const SEND_MODE_DESTROY = 32; +/// mode = 64 is used for messages that carry all the remaining value of the inbound message in addition to the value initially indicated in the new message. +const SEND_MODE_CARRY_ALL_REMAINING_MESSAGE_VALUE = 64; +/// mode = 128 is used for messages that are to carry all the remaining balance of the current smart contract (instead of the value originally indicated in the message). +const SEND_MODE_CARRY_ALL_BALANCE = 128; +/// do not create an action, only estimate fee. TVM UPGRADE 2023-07. https://docs.ton.org/learn/tvm-instructions/tvm-upgrade-2023-07#sending-messages +const SEND_MODE_ESTIMATE_FEE_ONLY = 1024; +/// Other modes affect the fee calculation as follows: +/// +64 substitutes the entire balance of the incoming message as an outcoming value (slightly inaccurate, gas expenses that cannot be estimated before the computation is completed are not taken into account). +/// +128 substitutes the value of the entire balance of the contract before the start of the computation phase (slightly inaccurate, since gas expenses that cannot be estimated before the completion of the computation phase are not taken into account). + +/// Sends a raw message — a correctly serialized TL object `Message X`. +/// For `mode`, see constants above (except SEND_MODE_ESTIMATE_FEE_ONLY). +/// This function is still available, but deprecated: consider using [sendMessage]. +@deprecated +fun sendRawMessage(msg: cell, mode: int): void + asm "SENDRAWMSG"; + +/// Creates an output action and returns a fee for creating a message. +/// Mode has the same effect as in the case of SENDRAWMSG. +/// For mode including SEND_MODE_ESTIMATE_FEE_ONLY it just returns estimated fee without sending a message. +fun sendMessage(msg: cell, mode: int): int + asm "SENDMSG"; diff --git a/crypto/smartcont/tolk-stdlib/gas-payments.tolk b/crypto/smartcont/tolk-stdlib/gas-payments.tolk new file mode 100644 index 000000000..1dc6f3f89 --- /dev/null +++ b/crypto/smartcont/tolk-stdlib/gas-payments.tolk @@ -0,0 +1,63 @@ +// A part of standard library for Tolk +tolk 0.6 + +/** + Gas and payment related primitives. + */ + +/// Returns amount of gas (in gas units) consumed in current Computation Phase. +fun getGasConsumedAtTheMoment(): int + asm "GASCONSUMED"; + +/// This function is required to be called when you process an external message (from an outer world) +/// and "accept" it to blockchain. +/// Without calling this function, an external message would be discarded. +/// As an effect, the current smart contract agrees to buy some gas to finish the current transaction. +/// For more details, check [accept_message effects](https://ton.org/docs/#/smart-contracts/accept). +fun acceptExternalMessage(): void + asm "ACCEPT"; + +/// When processing an internal message, by default, the limit of gas consumption is determined by incoming message. +/// Functions [setGasLimit] and [setGasLimitToMaximum] allow you to change this behavior. +/// Sets current gas limit `gl` to its maximal allowed value `gm`, and resets the gas credit `gc` to zero, +/// decreasing the value of `gr` by `gc` in the process. +fun setGasLimitToMaximum(): void + asm "ACCEPT"; + +/// When processing an internal message, by default, the limit of gas consumption is determined by incoming message. +/// Functions [setGasLimit] and [setGasLimitToMaximum] allow you to change this behavior. +/// Sets current gas limit `gl` to the minimum of limit and `gm`, and resets the gas credit `gc` to zero. +/// If the gas consumed so far (including the present instruction) exceeds the resulting value of `gl`, +/// an (unhandled) out of gas exception is thrown before setting new gas limits. +fun setGasLimit(limit: int): void + asm "SETGASLIMIT"; + +/// Calculates fee (amount in nanotoncoins to be paid) for a transaction which consumed [gasUsed] gas units. +fun calculateGasFee(workchain: int, gasUsed: int): int + asm(gasUsed workchain) "GETGASFEE"; + +/// Same as [calculateGasFee], but without flat price (you have supposed to read https://docs.ton.org/develop/howto/fees-low-level) +fun calculateGasFeeWithoutFlatPrice(workchain: int, gasUsed: int): int + asm(gasUsed workchain) "GETGASFEESIMPLE"; + +/// Calculates amount of nanotoncoins you should pay for storing a contract of provided size for [seconds]. +/// [bits] and [cells] represent contract state (code + data). +fun calculateStorageFee(workchain: int, seconds: int, bits: int, cells: int): int + asm(cells bits seconds workchain) "GETSTORAGEFEE"; + +/// Calculates amount of nanotoncoins you should pay to send a message of specified size. +fun calculateMessageFee(workchain: int, bits: int, cells: int): int + asm(cells bits workchain) "GETFORWARDFEE"; + +/// Same as [calculateMessageFee], but without lump price (you have supposed to read https://docs.ton.org/develop/howto/fees-low-level) +fun calculateMessageFeeWithoutLumpPrice(workchain: int, bits: int, cells: int): int + asm(cells bits workchain) "GETFORWARDFEESIMPLE"; + +/// Calculates fee that was paid by the sender of an incoming internal message. +fun calculateOriginalMessageFee(workchain: int, incomingFwdFee: int): int + asm(incomingFwdFee workchain) "GETORIGINALFWDFEE"; + +/// Returns the amount of nanotoncoins current contract debts for storage. ("due" and "debt" are synonyms) +/// If it has no debt, `0` is returned. +fun getMyStorageDuePayment(): int + asm "DUEPAYMENT"; diff --git a/crypto/smartcont/tolk-stdlib/lisp-lists.tolk b/crypto/smartcont/tolk-stdlib/lisp-lists.tolk new file mode 100644 index 000000000..94c045237 --- /dev/null +++ b/crypto/smartcont/tolk-stdlib/lisp-lists.tolk @@ -0,0 +1,38 @@ +// A part of standard library for Tolk +tolk 0.6 + +/** + Lisp-style lists are nested 2-elements tuples: `(1, (2, (3, null)))` represents list `[1, 2, 3]`. + Elements of a list can be of different types. + Empty list is conventionally represented as TVM `null` value. + */ + +@pure +fun createEmptyList(): tuple + asm "PUSHNULL"; + +/// Adds an element to the beginning of lisp-style list. +/// Note, that it does not mutate the list: instead, it returns a new one (it's a lisp pattern). +@pure +fun listPrepend(head: X, tail: tuple): tuple + asm "CONS"; + +/// Extracts the head and the tail of lisp-style list. +@pure +fun listSplit(list: tuple): (X, tuple) + asm "UNCONS"; + +/// Extracts the tail and the head of lisp-style list. +@pure +fun ~listNext(list: tuple): (tuple, X) + asm( -> 1 0) "UNCONS"; + +/// Returns the head of lisp-style list. +@pure +fun listGetHead(list: tuple): X + asm "CAR"; + +/// Returns the tail of lisp-style list. +@pure +fun listGetTail(list: tuple): tuple + asm "CDR"; diff --git a/crypto/smartcont/tolk-stdlib/tvm-dicts.tolk b/crypto/smartcont/tolk-stdlib/tvm-dicts.tolk new file mode 100644 index 000000000..1e3c10ec8 --- /dev/null +++ b/crypto/smartcont/tolk-stdlib/tvm-dicts.tolk @@ -0,0 +1,447 @@ +// A part of standard library for Tolk +tolk 0.6 + +/** + Dictionaries are represented as `cell` data type (cells can store anything, dicts in particular). + Currently, they have very low-level API very close to TVM internals. + Most of functions are duplicated for three common cases: + - iDict* - dicts with signed integer keys + - uDict* - dicts with unsigned integer keys + - sDict* - dicts with arbitrary slice keys + When accessing a dict element, you should not only provide a key, but provide keyLen, + since for optimization, for optimization, key length is not stored in the dictionary itself. + */ + +/// Creates an empty dictionary, which is actually a null value. Equivalent to PUSHNULL +@pure +fun createEmptyDict(): cell + asm "NEWDICT"; + +/// Checks whether a dictionary is empty. +@pure +fun dictIsEmpty(c: cell): int + asm "DICTEMPTY"; + + +@pure +fun iDictGet(dict: cell, keyLen: int, key: int): (slice, int) + asm(key dict keyLen) "DICTIGET" "NULLSWAPIFNOT"; + +@pure +fun uDictGet(dict: cell, keyLen: int, key: int): (slice, int) + asm(key dict keyLen) "DICTUGET" "NULLSWAPIFNOT"; + +@pure +fun sDictGet(dict: cell, keyLen: int, key: slice): (slice, int) + asm(key dict keyLen) "DICTGET" "NULLSWAPIFNOT"; + + +@pure +fun iDictSet(dict: cell, keyLen: int, key: int, value: slice): cell + asm(value key dict keyLen) "DICTISET"; + +@pure +fun ~iDictSet(dict: cell, keyLen: int, key: int, value: slice): (cell, ()) + asm(value key dict keyLen) "DICTISET"; + +@pure +fun uDictSet(dict: cell, keyLen: int, key: int, value: slice): cell + asm(value key dict keyLen) "DICTUSET"; + +@pure +fun ~uDictSet(dict: cell, keyLen: int, key: int, value: slice): (cell, ()) + asm(value key dict keyLen) "DICTUSET"; + +@pure +fun sDictSet(dict: cell, keyLen: int, key: slice, value: slice): cell + asm(value key dict keyLen) "DICTSET"; + +@pure +fun ~sDictSet(dict: cell, keyLen: int, key: slice, value: slice): (cell, ()) + asm(value key dict keyLen) "DICTSET"; + + +@pure +fun iDictSetRef(dict: cell, keyLen: int, key: int, value: cell): cell + asm(value key dict keyLen) "DICTISETREF"; + +@pure +fun ~iDictSetRef(dict: cell, keyLen: int, key: int, value: cell): (cell, ()) + asm(value key dict keyLen) "DICTISETREF"; + +@pure +fun uDictSetRef(dict: cell, keyLen: int, key: int, value: cell): cell + asm(value key dict keyLen) "DICTUSETREF"; + +@pure +fun ~uDictSetRef(dict: cell, keyLen: int, key: int, value: cell): (cell, ()) + asm(value key dict keyLen) "DICTUSETREF"; + +@pure +fun sDictSetRef(dict: cell, keyLen: int, key: slice, value: cell): cell + asm(value key dict keyLen) "DICTSETREF"; + +@pure +fun ~sDictSetRef(dict: cell, keyLen: int, key: slice, value: cell): (cell, ()) + asm(value key dict keyLen) "DICTSETREF"; + + +@pure +fun iDictSetIfNotExists(dict: cell, keyLen: int, key: int, value: slice): (cell, int) + asm(value key dict keyLen) "DICTIADD"; + +@pure +fun ~iDictSetIfNotExists(dict: cell, keyLen: int, key: int, value: slice): (cell, int) + asm(value key dict keyLen) "DICTIADD"; + +@pure +fun uDictSetIfNotExists(dict: cell, keyLen: int, key: int, value: slice): (cell, int) + asm(value key dict keyLen) "DICTUADD"; + +@pure +fun ~uDictSetIfNotExists(dict: cell, keyLen: int, key: int, value: slice): (cell, int) + asm(value key dict keyLen) "DICTUADD"; + + +@pure +fun iDictSetIfExists(dict: cell, keyLen: int, key: int, value: slice): (cell, int) + asm(value key dict keyLen) "DICTIREPLACE"; + +@pure +fun ~iDictSetIfExists(dict: cell, keyLen: int, key: int, value: slice): (cell, int) + asm(value key dict keyLen) "DICTIREPLACE"; + +@pure +fun uDictSetIfExists(dict: cell, keyLen: int, key: int, value: slice): (cell, int) + asm(value key dict keyLen) "DICTUREPLACE"; + +@pure +fun ~uDictSetIfExists(dict: cell, keyLen: int, key: int, value: slice): (cell, int) + asm(value key dict keyLen) "DICTUREPLACE"; + + +@pure +fun iDictGetRef(dict: cell, keyLen: int, key: int): (cell, int) + asm(key dict keyLen) "DICTIGETREF" "NULLSWAPIFNOT"; + +@pure +fun uDictGetRef(dict: cell, keyLen: int, key: int): (cell, int) + asm(key dict keyLen) "DICTUGETREF" "NULLSWAPIFNOT"; + +@pure +fun sDictGetRef(dict: cell, keyLen: int, key: slice): (cell, int) + asm(key dict keyLen) "DICTGETREF" "NULLSWAPIFNOT"; + + +@pure +fun iDictGetRefOrNull(dict: cell, keyLen: int, key: int): cell + asm(key dict keyLen) "DICTIGETOPTREF"; + +@pure +fun uDictGetRefOrNull(dict: cell, keyLen: int, key: int): cell + asm(key dict keyLen) "DICTUGETOPTREF"; + +@pure +fun sDictGetRefOrNull(dict: cell, keyLen: int, key: slice): cell + asm(key dict keyLen) "DICTGETOPTREF"; + + +@pure +fun iDictDelete(dict: cell, keyLen: int, key: int): (cell, int) + asm(key dict keyLen) "DICTIDEL"; + +@pure +fun ~iDictDelete(dict: cell, keyLen: int, key: int): (cell, int) + asm(key dict keyLen) "DICTIDEL"; + +@pure +fun uDictDelete(dict: cell, keyLen: int, key: int): (cell, int) + asm(key dict keyLen) "DICTUDEL"; + +@pure +fun ~uDictDelete(dict: cell, keyLen: int, key: int): (cell, int) + asm(key dict keyLen) "DICTUDEL"; + +@pure +fun sDictDelete(dict: cell, keyLen: int, key: slice): (cell, int) + asm(key dict keyLen) "DICTDEL"; + +@pure +fun ~sDictDelete(dict: cell, keyLen: int, key: slice): (cell, int) + asm(key dict keyLen) "DICTDEL"; + + +@pure +fun iDictSetAndGet(dict: cell, keyLen: int, key: int, value: slice): (cell, slice, int) + asm(value key dict keyLen) "DICTISETGET" "NULLSWAPIFNOT"; + +@pure +fun ~iDictSetAndGet(dict: cell, keyLen: int, key: int, value: slice): (cell, (slice, int)) + asm(value key dict keyLen) "DICTISETGET" "NULLSWAPIFNOT"; + +@pure +fun uDictSetAndGet(dict: cell, keyLen: int, key: int, value: slice): (cell, slice, int) + asm(value key dict keyLen) "DICTUSETGET" "NULLSWAPIFNOT"; + +@pure +fun ~uDictSetAndGet(dict: cell, keyLen: int, key: int, value: slice): (cell, (slice, int)) + asm(value key dict keyLen) "DICTUSETGET" "NULLSWAPIFNOT"; + +@pure +fun sDictSetAndGet(dict: cell, keyLen: int, key: slice, value: slice): (cell, slice, int) + asm(value key dict keyLen) "DICTSETGET" "NULLSWAPIFNOT"; + +@pure +fun ~sDictSetAndGet(dict: cell, keyLen: int, key: slice, value: slice): (cell, (slice, int)) + asm(value key dict keyLen) "DICTSETGET" "NULLSWAPIFNOT"; + + +@pure +fun iDictSetAndGetPreviousRefOrNull(dict: cell, keyLen: int, key: int, value: cell): (cell, cell) + asm(value key dict keyLen) "DICTISETGETOPTREF"; + +@pure +fun ~iDictSetAndGetPreviousRefOrNull(dict: cell, keyLen: int, key: int, value: cell): (cell, cell) + asm(value key dict keyLen) "DICTISETGETOPTREF"; + +@pure +fun uDictSetAndGetPreviousRefOrNull(dict: cell, keyLen: int, key: int, value: cell): (cell, cell) + asm(value key dict keyLen) "DICTUSETGETOPTREF"; + +@pure +fun ~uDictSetAndGetPreviousRefOrNull(dict: cell, keyLen: int, key: int, value: cell): (cell, cell) + asm(value key dict keyLen) "DICTUSETGETOPTREF"; + + +@pure +fun iDictDeleteAndGet(dict: cell, keyLen: int, key: int): (cell, slice, int) + asm(key dict keyLen) "DICTIDELGET" "NULLSWAPIFNOT"; + +@pure +fun ~iDictDeleteAndGet(dict: cell, keyLen: int, key: int): (cell, (slice, int)) + asm(key dict keyLen) "DICTIDELGET" "NULLSWAPIFNOT"; + +@pure +fun uDictDeleteAndGet(dict: cell, keyLen: int, key: int): (cell, slice, int) + asm(key dict keyLen) "DICTUDELGET" "NULLSWAPIFNOT"; + +@pure +fun ~uDictDeleteAndGet(dict: cell, keyLen: int, key: int): (cell, (slice, int)) + asm(key dict keyLen) "DICTUDELGET" "NULLSWAPIFNOT"; + +@pure +fun sDictDeleteAndGet(dict: cell, keyLen: int, key: slice): (cell, slice, int) + asm(key dict keyLen) "DICTDELGET" "NULLSWAPIFNOT"; + +@pure +fun ~sDictDeleteAndGet(dict: cell, keyLen: int, key: slice): (cell, (slice, int)) + asm(key dict keyLen) "DICTDELGET" "NULLSWAPIFNOT"; + + +@pure +fun iDictSetBuilder(dict: cell, keyLen: int, key: int, value: builder): cell + asm(value key dict keyLen) "DICTISETB"; + +@pure +fun ~iDictSetBuilder(dict: cell, keyLen: int, key: int, value: builder): (cell, ()) + asm(value key dict keyLen) "DICTISETB"; + +@pure +fun uDictSetBuilder(dict: cell, keyLen: int, key: int, value: builder): cell + asm(value key dict keyLen) "DICTUSETB"; + +@pure +fun ~uDictSetBuilder(dict: cell, keyLen: int, key: int, value: builder): (cell, ()) + asm(value key dict keyLen) "DICTUSETB"; + +@pure +fun sDictSetBuilder(dict: cell, keyLen: int, key: slice, value: builder): cell + asm(value key dict keyLen) "DICTSETB"; + +@pure +fun ~sDictSetBuilder(dict: cell, keyLen: int, key: slice, value: builder): (cell, ()) + asm(value key dict keyLen) "DICTSETB"; + + +@pure +fun iDictSetBuilderIfNotExists(dict: cell, keyLen: int, key: int, value: builder): (cell, int) + asm(value key dict keyLen) "DICTIADDB"; + +@pure +fun ~iDictSetBuilderIfNotExists(dict: cell, keyLen: int, key: int, value: builder): (cell, int) + asm(value key dict keyLen) "DICTIADDB"; + +@pure +fun uDictSetBuilderIfNotExists(dict: cell, keyLen: int, key: int, value: builder): (cell, int) + asm(value key dict keyLen) "DICTUADDB"; + +@pure +fun ~uDictSetBuilderIfNotExists(dict: cell, keyLen: int, key: int, value: builder): (cell, int) + asm(value key dict keyLen) "DICTUADDB"; + +@pure +fun iDictSetBuilderIfExists(dict: cell, keyLen: int, key: int, value: builder): (cell, int) + asm(value key dict keyLen) "DICTIREPLACEB"; + +@pure +fun ~iDictSetBuilderIfExists(dict: cell, keyLen: int, key: int, value: builder): (cell, int) + asm(value key dict keyLen) "DICTIREPLACEB"; + +@pure +fun uDictSetBuilderIfExists(dict: cell, keyLen: int, key: int, value: builder): (cell, int) + asm(value key dict keyLen) "DICTUREPLACEB"; + +@pure +fun ~uDictSetBuilderIfExists(dict: cell, keyLen: int, key: int, value: builder): (cell, int) + asm(value key dict keyLen) "DICTUREPLACEB"; + + +@pure +fun iDictDeleteFirstAndGet(dict: cell, keyLen: int): (cell, int, slice, int) + asm(-> 0 2 1 3) "DICTIREMMIN" "NULLSWAPIFNOT2"; + +@pure +fun ~iDictDeleteFirstAndGet(dict: cell, keyLen: int): (cell, (int, slice, int)) + asm(-> 0 2 1 3) "DICTIREMMIN" "NULLSWAPIFNOT2"; + +@pure +fun uDictDeleteFirstAndGet(dict: cell, keyLen: int): (cell, int, slice, int) + asm(-> 0 2 1 3) "DICTUREMMIN" "NULLSWAPIFNOT2"; + +@pure +fun ~uDictDeleteFirstAndGet(dict: cell, keyLen: int): (cell, (int, slice, int)) + asm(-> 0 2 1 3) "DICTUREMMIN" "NULLSWAPIFNOT2"; + +@pure +fun sDictDeleteFirstAndGet(dict: cell, keyLen: int): (cell, slice, slice, int) + asm(-> 0 2 1 3) "DICTREMMIN" "NULLSWAPIFNOT2"; + +@pure +fun ~sDictDeleteFirstAndGet(dict: cell, keyLen: int): (cell, (slice, slice, int)) + asm(-> 0 2 1 3) "DICTREMMIN" "NULLSWAPIFNOT2"; + + +@pure +fun iDictDeleteLastAndGet(dict: cell, keyLen: int): (cell, int, slice, int) + asm(-> 0 2 1 3) "DICTIREMMAX" "NULLSWAPIFNOT2"; + +@pure +fun ~iDictDeleteLastAndGet(dict: cell, keyLen: int): (cell, (int, slice, int)) + asm(-> 0 2 1 3) "DICTIREMMAX" "NULLSWAPIFNOT2"; + +@pure +fun uDictDeleteLastAndGet(dict: cell, keyLen: int): (cell, int, slice, int) + asm(-> 0 2 1 3) "DICTUREMMAX" "NULLSWAPIFNOT2"; + +@pure +fun ~uDictDeleteLastAndGet(dict: cell, keyLen: int): (cell, (int, slice, int)) + asm(-> 0 2 1 3) "DICTUREMMAX" "NULLSWAPIFNOT2"; + +@pure +fun sDictDeleteLastAndGet(dict: cell, keyLen: int): (cell, slice, slice, int) + asm(-> 0 2 1 3) "DICTREMMAX" "NULLSWAPIFNOT2"; + +@pure +fun ~sDictDeleteLastAndGet(dict: cell, keyLen: int): (cell, (slice, slice, int)) + asm(-> 0 2 1 3) "DICTREMMAX" "NULLSWAPIFNOT2"; + + +@pure +fun iDictGetFirst(dict: cell, keyLen: int): (int, slice, int) + asm (-> 1 0 2) "DICTIMIN" "NULLSWAPIFNOT2"; + +@pure +fun uDictGetFirst(dict: cell, keyLen: int): (int, slice, int) + asm (-> 1 0 2) "DICTUMIN" "NULLSWAPIFNOT2"; + +@pure +fun sDictGetFirst(dict: cell, keyLen: int): (slice, slice, int) + asm (-> 1 0 2) "DICTMIN" "NULLSWAPIFNOT2"; + +@pure +fun iDictGetFirstAsRef(dict: cell, keyLen: int): (int, cell, int) + asm (-> 1 0 2) "DICTIMINREF" "NULLSWAPIFNOT2"; + +@pure +fun uDictGetFirstAsRef(dict: cell, keyLen: int): (int, cell, int) + asm (-> 1 0 2) "DICTUMINREF" "NULLSWAPIFNOT2"; + +@pure +fun sDictGetFirstAsRef(dict: cell, keyLen: int): (slice, cell, int) + asm (-> 1 0 2) "DICTMINREF" "NULLSWAPIFNOT2"; + + +@pure +fun iDictGetLast(dict: cell, keyLen: int): (int, slice, int) + asm (-> 1 0 2) "DICTIMAX" "NULLSWAPIFNOT2"; + +@pure +fun uDictGetLast(dict: cell, keyLen: int): (int, slice, int) + asm (-> 1 0 2) "DICTUMAX" "NULLSWAPIFNOT2"; + +@pure +fun sDictGetLast(dict: cell, keyLen: int): (slice, slice, int) + asm (-> 1 0 2) "DICTMAX" "NULLSWAPIFNOT2"; + +@pure +fun iDictGetLastAsRef(dict: cell, keyLen: int): (int, cell, int) + asm (-> 1 0 2) "DICTIMAXREF" "NULLSWAPIFNOT2"; + +@pure +fun uDictGetLastAsRef(dict: cell, keyLen: int): (int, cell, int) + asm (-> 1 0 2) "DICTUMAXREF" "NULLSWAPIFNOT2"; + +@pure +fun sDictGetLastAsRef(dict: cell, keyLen: int): (slice, cell, int) + asm (-> 1 0 2) "DICTMAXREF" "NULLSWAPIFNOT2"; + + +@pure +fun iDictGetNext(dict: cell, keyLen: int, pivot: int): (int, slice, int) + asm(pivot dict keyLen -> 1 0 2) "DICTIGETNEXT" "NULLSWAPIFNOT2"; + +@pure +fun uDictGetNext(dict: cell, keyLen: int, pivot: int): (int, slice, int) + asm(pivot dict keyLen -> 1 0 2) "DICTUGETNEXT" "NULLSWAPIFNOT2"; + +@pure +fun iDictGetNextOrEqual(dict: cell, keyLen: int, pivot: int): (int, slice, int) + asm(pivot dict keyLen -> 1 0 2) "DICTIGETNEXTEQ" "NULLSWAPIFNOT2"; + +@pure +fun uDictGetNextOrEqual(dict: cell, keyLen: int, pivot: int): (int, slice, int) + asm(pivot dict keyLen -> 1 0 2) "DICTUGETNEXTEQ" "NULLSWAPIFNOT2"; + + +@pure +fun iDictGetPrev(dict: cell, keyLen: int, pivot: int): (int, slice, int) + asm(pivot dict keyLen -> 1 0 2) "DICTIGETPREV" "NULLSWAPIFNOT2"; + +@pure +fun uDictGetPrev(dict: cell, keyLen: int, pivot: int): (int, slice, int) + asm(pivot dict keyLen -> 1 0 2) "DICTUGETPREV" "NULLSWAPIFNOT2"; + +@pure +fun iDictGetPrevOrEqual(dict: cell, keyLen: int, pivot: int): (int, slice, int) + asm(pivot dict keyLen -> 1 0 2) "DICTIGETPREVEQ" "NULLSWAPIFNOT2"; + +@pure +fun uDictGetPrevOrEqual(dict: cell, keyLen: int, pivot: int): (int, slice, int) + asm(pivot dict keyLen -> 1 0 2) "DICTUGETPREVEQ" "NULLSWAPIFNOT2"; + + +/** + Prefix dictionary primitives. + */ + +@pure +fun prefixDictGet(dict: cell, keyLen: int, key: slice): (slice, slice, slice, int) + asm(key dict keyLen) "PFXDICTGETQ" "NULLSWAPIFNOT2"; + +@pure +fun prefixDictSet(dict: cell, keyLen: int, key: slice, value: slice): (cell, int) + asm(value key dict keyLen) "PFXDICTSET"; + +@pure +fun prefixDictDelete(dict: cell, keyLen: int, key: slice): (cell, int) + asm(key dict keyLen) "PFXDICTDEL"; diff --git a/crypto/smartcont/tolk-stdlib/tvm-lowlevel.tolk b/crypto/smartcont/tolk-stdlib/tvm-lowlevel.tolk new file mode 100644 index 000000000..b4f44a1bf --- /dev/null +++ b/crypto/smartcont/tolk-stdlib/tvm-lowlevel.tolk @@ -0,0 +1,29 @@ +// A part of standard library for Tolk +tolk 0.6 + +/// Usually `c3` has a continuation initialized by the whole code of the contract. It is used for function calls. +/// The primitive returns the current value of `c3`. +@pure +fun getTvmRegisterC3(): continuation + asm "c3 PUSH"; + +/// Updates the current value of `c3`. Usually, it is used for updating smart contract code in run-time. +/// Note that after execution of this primitive the current code +/// (and the stack of recursive function calls) won't change, +/// but any other function call will use a function from the new code. +fun setTvmRegisterC3(c: continuation): void + asm "c3 POP"; + +/// Transforms a `slice` [s] into a simple ordinary continuation `c`, with `c.code = s` and an empty stack and savelist. +@pure +fun transformSliceToContinuation(s: slice): continuation + asm "BLESS"; + +/// Moves a variable or a value [x] to the top of the stack. +@pure +fun stackMoveToTop(x: X): X + asm "NOP"; + +/// Mark a variable as used, such that the code which produced it won't be deleted even if it is not impure. +fun stackMoveToTopImpure(x: X): void // todo needs to be deleted, check verified contracts + asm "DROP"; diff --git a/tolk-tester/tests/a10.tolk b/tolk-tester/tests/a10.tolk index 8ea137748..1526a1220 100644 --- a/tolk-tester/tests/a10.tolk +++ b/tolk-tester/tests/a10.tolk @@ -1,3 +1,5 @@ +fun pair_first(p: [X, Y]): X asm "FIRST"; + fun one(dummy: tuple) { return 1; } @@ -30,6 +32,29 @@ fun test88(x: int) { } } +@method_id(89) +fun test89(last: int) { + var t: tuple = createEmptyTuple(); + t~tuplePush(1); + t~tuplePush(2); + t~tuplePush(3); + t~tuplePush(last); + return (t.tupleAt(0), t.tupleAt(t.tupleSize() - 1), t.tupleFirst(), t.tupleLast()); +} + +@method_id(93) +fun testStartBalanceCodegen1() { + var t = getMyOriginalBalanceWithExtraCurrencies(); + var first = t.pair_first(); + return first; +} + +@method_id(94) +fun testStartBalanceCodegen2() { + var first = getMyOriginalBalance(); + return first; +} + /** method_id | in | out @testcase | 0 | 101 15 | 100 1 @@ -39,4 +64,24 @@ fun test88(x: int) { @testcase | 0 | 100 10 | 100 0 @testcase | 88 | 5 | 234 @testcase | 88 | 50 | 0 +@testcase | 89 | 4 | 1 4 1 4 + + +@fif_codegen +""" + testStartBalanceCodegen1 PROC:<{ + // + BALANCE // t + FIRST // first + }> +""" + +@fif_codegen +""" + testStartBalanceCodegen2 PROC:<{ + // + BALANCE + FIRST // first + }> +""" */ diff --git a/tolk-tester/tests/a6.tolk b/tolk-tester/tests/a6.tolk index 3bdcdbdf8..da692a78e 100644 --- a/tolk-tester/tests/a6.tolk +++ b/tolk-tester/tests/a6.tolk @@ -4,7 +4,7 @@ fun f(a: int, b: int, c: int, d: int, e: int, f: int): (int, int) { return (Dx/D,Dy/D); };;;; -fun mulDivR(x: int, y: int, z: int): int { return muldivr(x, y, z); } +fun mulDivR(x: int, y: int, z: int): int { return mulDivRound(x, y, z); } fun calc_phi(): int { var n = 1; diff --git a/tolk-tester/tests/asm_arg_order.tolk b/tolk-tester/tests/asm_arg_order.tolk index 8bf46e3e2..a2d66bc2d 100644 --- a/tolk-tester/tests/asm_arg_order.tolk +++ b/tolk-tester/tests/asm_arg_order.tolk @@ -4,8 +4,8 @@ asm "NIL"; @pure fun tpush2(t: tuple, x: X): (tuple, ()) asm "TPUSH"; -fun emptyTuple(): tuple { return empty_tuple2(); } -fun tuplePush(t: tuple, value: X): (tuple, ()) { return tpush2(t, value); } +fun myEmptyTuple(): tuple { return empty_tuple2(); } +fun myTuplePush(t: tuple, value: X): (tuple, ()) { return tpush2(t, value); } @pure fun asm_func_1(x: int, y: int, z: int): tuple @@ -31,7 +31,7 @@ fun asmFuncModify(a: tuple, b: int, c: int): (tuple, ()) { return asm_func_modif global t: tuple; fun foo(x: int): int { - t~tuplePush(x); + t~myTuplePush(x); return x * 10; } @@ -44,7 +44,7 @@ fun test_old_1(): (tuple, tuple) { @method_id(12) fun test_old_2(): (tuple, tuple) { - t = emptyTuple(); + t = myEmptyTuple(); var t2: tuple = asm_func_2(foo(11), foo(22), foo(33)); return (t, t2); } @@ -58,7 +58,7 @@ fun test_old_3(): (tuple, tuple) { @method_id(14) fun test_old_4(): (tuple, tuple) { - t = emptyTuple(); + t = myEmptyTuple(); var t2: tuple = empty_tuple2(); // This actually computes left-to-right even without compute-asm-ltr t2 = asm_func_4(foo(11), (foo(22), (foo(33), foo(44))), foo(55)); diff --git a/tolk-tester/tests/camel1.tolk b/tolk-tester/tests/camel1.tolk index a9f1bf3e4..291206a95 100644 --- a/tolk-tester/tests/camel1.tolk +++ b/tolk-tester/tests/camel1.tolk @@ -7,40 +7,45 @@ // without affecting performance and even bytecode hashes. // This works with ~functions also. And even works with wrappers of wrappers. // Moreover, such wrappers can reorder input parameters, see a separate test camel2.tolk. +import "@stdlib/tvm-dicts" -fun myBeginCell(): builder { return begin_cell(); } -fun myEndCell(b: builder): cell { return end_cell(b); } -fun myStoreRef(b: builder, c: cell): builder { return store_ref(b, c); } -fun myStoreUint(b: builder, i: int, bw: int): builder { return store_uint(b, i, bw); } +fun myBeginCell(): builder { return beginCell(); } +fun myEndCell(b: builder): cell { return endCell(b); } +fun myStoreRef(b: builder, c: cell): builder { return storeRef(b, c); } +fun myStoreUint(b: builder, i: int, bw: int): builder { return storeUint(b, i, bw); } // 'inline' is not needed actually, but if it exists, it's just ignored @inline @pure -fun myBeginParse(c: cell): slice { return begin_parse(c); } +fun myBeginParse(c: cell): slice { return beginParse(c); } @inline @pure -fun mySkipBits(s: slice, len: int): slice { return skip_bits(s, len); } +fun mySkipBits(s: slice, len: int): slice { return skipBits(s, len); } @inline @pure -fun ~mySkipBits(s: slice, len: int): (slice, ()) { return ~skip_bits(s, len); } +fun ~mySkipBits(s: slice, len: int): (slice, ()) { return ~skipBits(s, len); } @inline @pure -fun ~myLoadUint(s: slice, len: int): (slice, int) { return load_uint(s, len); } +fun ~myLoadUint(s: slice, len: int): (slice, int) { return loadUint(s, len); } -fun myComputeDataSize(c: cell, maxCells: int): (int, int, int) { return compute_data_size(c, maxCells); } +fun myComputeDataSize(c: cell, maxCells: int): (int, int, int) { return calculateCellSizeStrict(c, maxCells); } -fun dict__new(): cell { return new_dict(); } -fun dict__iset(dict: cell, keyLen: int, index: int, value: slice): cell { return idict_set(dict, keyLen, index, value); } -fun ~dict__iset(dict: cell, keyLen: int, index: int, value: slice): (cell, ()) { return ~idict_set(dict, keyLen, index, value); } -fun dict__tryIGet(dict: cell, keyLen: int, index: int): (slice, int) { return idict_get?(dict, keyLen, index); } -fun dict__tryIGetMin(dict: cell, keyLen: int): (int, slice, int) { return idict_get_min?(dict, keyLen); } +fun dict__new(): cell { return createEmptyDict(); } +fun dict__iset(dict: cell, keyLen: int, index: int, value: slice): cell { return iDictSet(dict, keyLen, index, value); } +fun ~dict__iset(dict: cell, keyLen: int, index: int, value: slice): (cell, ()) { return ~iDictSet(dict, keyLen, index, value); } +fun dict__tryIGet(dict: cell, keyLen: int, index: int): (slice, int) { return iDictGet(dict, keyLen, index); } +fun dict__tryIGetMin(dict: cell, keyLen: int): (int, slice, int) { return iDictGetFirst(dict, keyLen); } -fun myEmptyTuple(): tuple { return empty_tuple(); } +@pure +fun triple_second(p: [X, Y, Z]): Y + asm "SECOND"; + +fun myEmptyTuple(): tuple { return createEmptyTuple(); } fun emptyTuple1(): tuple { return myEmptyTuple(); } fun emptyTuple11(): tuple { return emptyTuple1(); } -fun myTuplePush(t: tuple, value: X): tuple { return tpush(t, value); } -fun ~myTuplePush(t: tuple, value: X): (tuple, ()) { return ~tpush(t, value); } -fun myTupleAt(t: tuple, index: int): X { return at(t, index); } +fun myTuplePush(t: tuple, value: X): tuple { return tuplePush(t, value); } +fun ~myTuplePush(t: tuple, value: X): (tuple, ()) { return ~tuplePush(t, value); } +fun myTupleAt(t: tuple, index: int): X { return tupleAt(t, index); } fun tripleSecond(p: [X1, Y2, Z3]): Y2 { return triple_second(p); } @pure fun nullValue(): X diff --git a/tolk-tester/tests/camel2.tolk b/tolk-tester/tests/camel2.tolk index 121b7f784..51344b843 100644 --- a/tolk-tester/tests/camel2.tolk +++ b/tolk-tester/tests/camel2.tolk @@ -5,15 +5,15 @@ // But swapping arguments may sometimes lead to bytecode changes (see test2), // both with compute-asm-ltr and without it. -fun myBeginCell(): builder { return begin_cell(); } -fun myEndCell(b: builder): cell { return end_cell(b); } -fun myStoreRef1(b: builder, c: cell): builder { return store_ref(b, c); } -fun myStoreRef2(c: cell, b: builder): builder { return store_ref(b, c); } -fun myStoreUint1(b: builder, x: int, bw: int): builder { return store_uint(b, x, bw); } -fun myStoreUint2(b: builder, bw: int, x: int): builder { return store_uint(b, x, bw); } +fun myBeginCell(): builder { return beginCell(); } +fun myEndCell(b: builder): cell { return endCell(b); } +fun myStoreRef1(b: builder, c: cell): builder { return storeRef(b, c); } +fun myStoreRef2(c: cell, b: builder): builder { return storeRef(b, c); } +fun myStoreUint1(b: builder, x: int, bw: int): builder { return storeUint(b, x, bw); } +fun myStoreUint2(b: builder, bw: int, x: int): builder { return storeUint(b, x, bw); } -fun computeDataSize1(c: cell, maxCells: int): (int, int, int) { return compute_data_size(c, maxCells); } -fun computeDataSize2(maxCells: int, c: cell): (int, int, int) { return compute_data_size(c, maxCells); } +fun computeDataSize1(c: cell, maxCells: int): (int, int, int) { return calculateCellSizeStrict(c, maxCells); } +fun computeDataSize2(maxCells: int, c: cell): (int, int, int) { return calculateCellSizeStrict(c, maxCells); } fun fake(a: int, b: int, c: int): void asm "DROP DROP DROP"; @@ -48,24 +48,24 @@ fun test2(): (int, int, int) { fun test3(): (int, int, int) { var x: int = 1; var y: int = 1; - var to_be_ref: cell = begin_cell().end_cell(); - var in_c: builder = begin_cell().store_uint(123, 8); - in_c = store_ref(in_c, to_be_ref); - var (a, b, c) = compute_data_size(in_c.end_cell(), 10); + var to_be_ref: cell = beginCell().endCell(); + var in_c: builder = beginCell().storeUint(123, 8); + in_c = storeRef(in_c, to_be_ref); + var (a, b, c) = calculateCellSizeStrict(in_c.endCell(), 10); return (a, b + x, c + y); } -fun beginCell1(): builder { return begin_cell(); } +fun beginCell1(): builder { return beginCell(); } fun beginCell11(): builder { return beginCell1(); } fun beginCell111(): builder { return beginCell11(); } -fun endCell1(b: builder): cell { return end_cell(b); } +fun endCell1(b: builder): cell { return endCell(b); } fun endCell11(b: builder): cell { return endCell1(b); } -fun beginParse1(c: cell): slice { return begin_parse(c); } +fun beginParse1(c: cell): slice { return beginParse(c); } fun beginParse11(c: cell): slice { return beginParse1(c); } -fun storeInt1(b: builder, bw: int, x: int): builder { return store_int(b, x, bw); } +fun storeInt1(b: builder, bw: int, x: int): builder { return storeInt(b, x, bw); } fun storeInt11(bw: int, x: int, b: builder): builder { return storeInt1(b, bw, x); } fun storeInt111(b: builder, x: int, bw: int): builder { return storeInt11(bw, x, b); } diff --git a/tolk-tester/tests/camel3.tolk b/tolk-tester/tests/camel3.tolk index e76c02b7d..23b16e5fb 100644 --- a/tolk-tester/tests/camel3.tolk +++ b/tolk-tester/tests/camel3.tolk @@ -4,17 +4,17 @@ // (save to a variable, return from a function, etc.) // it also works, since a function becomes codegenerated (though direct calls are expectedly inlined). -fun myBeginCell(): builder { return begin_cell(); } -fun myEndCell(b: builder): cell { return end_cell(b); } -fun myStoreRef(b: builder, c: cell): builder { return store_ref(b, c); } -fun myStoreUint3(i: int, bw: int, b: builder): builder { return store_uint(b, i, bw); } +fun myBeginCell(): builder { return beginCell(); } +fun myEndCell(b: builder): cell { return endCell(b); } +fun myStoreRef(b: builder, c: cell): builder { return storeRef(b, c); } +fun myStoreUint3(i: int, bw: int, b: builder): builder { return storeUint(b, i, bw); } -fun computeDataSize2(maxCells: int, c: cell): (int, int, int) { return compute_data_size(c, maxCells); } +fun computeDataSize2(maxCells: int, c: cell): (int, int, int) { return calculateCellSizeStrict(c, maxCells); } -fun myEmptyTuple(): tuple { return empty_tuple(); } -fun myTuplePush(t: tuple, value: X): tuple { return tpush(t, value); } -fun ~myTuplePush(t: tuple, value: X): (tuple, ()) { return ~tpush(t, value); } -fun tupleGetFirst(t: tuple): X { return first(t); } +fun myEmptyTuple(): tuple { return createEmptyTuple(); } +fun myTuplePush(t: tuple, value: X): tuple { return tuplePush(t, value); } +fun ~myTuplePush(t: tuple, value: X): (tuple, ()) { return ~tuplePush(t, value); } +fun myTupleGetFirst(t: tuple): X { return tupleFirst(t); } @inline @@ -32,7 +32,7 @@ fun test1(): (int, int, int) { var t: tuple = myEmptyTuple(); t~myTuplePush(myStoreRef); - var refStorer = tupleGetFirst(t); + var refStorer = myTupleGetFirst(t); var x: int = 1; var y: int = 1; diff --git a/tolk-tester/tests/camel4.tolk b/tolk-tester/tests/camel4.tolk index c6be62685..a33e3fd36 100644 --- a/tolk-tester/tests/camel4.tolk +++ b/tolk-tester/tests/camel4.tolk @@ -1,7 +1,7 @@ // Here we test that a just-return function is not a valid wrapper, it will not be inlined. // (doesn't use all arguments, has different pureness, has method_id, etc.) -fun myStoreUint(b: builder, x: int, unused: int): builder { return store_uint(b, x, x); } +fun myStoreUint(b: builder, x: int, unused: int): builder { return storeUint(b, x, x); } fun throwIf(excNo: int, cond: int) { assert(!cond) throw excNo; } fun initial1(x: auto) { return x; } @@ -17,11 +17,11 @@ fun postpone_elections(): int { } fun setAndGetData(ret: int): int { - var c: cell = begin_cell().store_uint(ret, 8).end_cell(); - set_data(c); - var s: slice = get_data().begin_parse(); + var c: cell = beginCell().storeUint(ret, 8).endCell(); + setContractData(c); + var s: slice = getContractData().beginParse(); throwIf(101, 0); - return s~load_uint(8); + return s~loadUint(8); } fun setAndGetDataWrapper(ret: int): int { @@ -30,9 +30,9 @@ fun setAndGetDataWrapper(ret: int): int { @method_id(101) fun test1(): int { - var c: cell = begin_cell().myStoreUint(32, 10000000).end_cell(); - var s: slice = c.begin_parse(); - return s~load_uint(32); + var c: cell = beginCell().myStoreUint(32, 10000000).endCell(); + var s: slice = c.beginParse(); + return s~loadUint(32); } get fun test2(ret: int): int { @@ -47,13 +47,13 @@ fun test3(): int { global t: tuple; fun foo(x: int): int { - t~tpush(x); + t~tuplePush(x); return x * 10; } @method_id(104) fun test4(): (tuple, tuple) { - t = empty_tuple(); + t = createEmptyTuple(); var t2: tuple = asmFunc4(foo(11), (foo(22), (foo(33), foo(44))), foo(55)); return (t, t2); } diff --git a/tolk-tester/tests/cells-slices.tolk b/tolk-tester/tests/cells-slices.tolk index 508cd31db..1bf0742d9 100644 --- a/tolk-tester/tests/cells-slices.tolk +++ b/tolk-tester/tests/cells-slices.tolk @@ -1,32 +1,32 @@ fun store_u32(b: builder, value: int): builder { - return b.store_uint(value, 32); + return b.storeUint(value, 32); } fun ~store_u32(b: builder, value: int): (builder, ()) { - return ~store_uint(b, value, 32); + return ~storeUint(b, value, 32); } fun load_u32(cs: slice): (slice, int) { - return cs.load_uint(32); + return cs.loadUint(32); } -fun my_load_int(s: slice, len: int): (slice, int) +fun my_loadInt(s: slice, len: int): (slice, int) asm(s len -> 1 0) "LDIX"; // top is "value slice" -fun my_store_int(b: builder, x: int, len: int): builder +fun my_storeInt(b: builder, x: int, len: int): builder asm(x b len) "STIX"; -fun ~my_store_int(b: builder, x: int, len: int): (builder, ()) +fun ~my_storeInt(b: builder, x: int, len: int): (builder, ()) asm(x b len) "STIX"; @method_id(101) fun test1(): [int,int,int,int,int] { - var b: builder = begin_cell().store_uint(1, 32); - b = b.store_uint(2, 32); - b~store_uint(3, 32); + var b: builder = beginCell().storeUint(1, 32); + b = b.storeUint(2, 32); + b~storeUint(3, 32); b = b.store_u32(4); b~store_u32(5); - var cs: slice = b.end_cell().begin_parse(); - var (cs redef, one: int) = cs.load_uint(32); - var (two: int, three: int) = (cs~load_uint(32), cs~load_u32()); + var cs: slice = b.endCell().beginParse(); + var (cs redef, one: int) = cs.loadUint(32); + var (two: int, three: int) = (cs~loadUint(32), cs~load_u32()); var (cs redef, four: int) = cs.load_u32(); var five: int = cs~load_u32(); @@ -35,82 +35,82 @@ fun test1(): [int,int,int,int,int] { @method_id(102) fun test2(): [int,int,int] { - var b: builder = begin_cell().my_store_int(1, 32); - b = b.my_store_int(2, 32); - b~my_store_int(3, 32); + var b: builder = beginCell().my_storeInt(1, 32); + b = b.my_storeInt(2, 32); + b~my_storeInt(3, 32); - var cs: slice = b.end_cell().begin_parse(); - var (cs redef, one: int) = cs.my_load_int(32); - var (two: int, three: int) = (cs~my_load_int(32), cs~my_load_int(32)); + var cs: slice = b.endCell().beginParse(); + var (cs redef, one: int) = cs.my_loadInt(32); + var (two: int, three: int) = (cs~my_loadInt(32), cs~my_loadInt(32)); return [one,two,three]; } @method_id(103) fun test3(ret: int): int { - var (_, same: int) = begin_cell().store_uint(ret,32).end_cell().begin_parse().load_uint(32); + var (_, same: int) = beginCell().storeUint(ret,32).endCell().beginParse().loadUint(32); return same; } @method_id(104) fun test4(): [int,int] { - var b: builder = my_store_int(begin_cell(), 1, 32); - b = store_int(store_int(b, 2, 32), 3, 32); + var b: builder = my_storeInt(beginCell(), 1, 32); + b = storeInt(storeInt(b, 2, 32), 3, 32); - var cs: slice = b.end_cell().begin_parse(); - var cs32: slice = cs.first_bits(32); // todo s.first_bits()~load_uint() doesn't work, 'lvalue expected' - var (one, _, three) = (cs32~load_int(32), cs~skip_bits(64), cs~load_u32()); + var cs: slice = b.endCell().beginParse(); + var cs32: slice = cs.getFirstBits(32); // todo s.first_bits()~loadUint() doesn't work, 'lvalue expected' + var (one, _, three) = (cs32~loadInt(32), cs~skipBits(64), cs~load_u32()); return [one,three]; } @method_id(105) fun test5(): [int,int] { - var cref: cell = end_cell(store_u32(begin_cell(), 105)); - var c: cell = begin_cell().store_ref(cref).store_ref(cref).store_u32(1).end_cell(); - - var cs: slice = begin_parse(c); - // todo I want cs~load_ref().begin_parse()~load_u32(), but 'lvalue expected' - var ref1 = cs~load_ref().begin_parse(); - var ref2 = cs~load_ref().begin_parse(); - var sto5x2: int = ref1~load_u32() + ref2~load_uint(32); + var cref: cell = endCell(store_u32(beginCell(), 105)); + var c: cell = beginCell().storeRef(cref).storeRef(cref).store_u32(1).endCell(); + + var cs: slice = beginParse(c); + // todo I want cs~loadRef().beginParse()~load_u32(), but 'lvalue expected' + var ref1 = cs~loadRef().beginParse(); + var ref2 = cs~loadRef().beginParse(); + var sto5x2: int = ref1~load_u32() + ref2~loadUint(32); return [sto5x2, cs~load_u32()]; } fun ~sumNumbersInSlice(s: slice): (slice, int) { var result = 0; - while (!slice_data_empty?(s)) { - result += s~load_uint(32); + while (!s.isEndOfSliceBits()) { + result += s~loadUint(32); } return (s, result); } @method_id(106) fun test6() { - var ref = begin_cell().store_int(100, 32).end_cell(); - var s: slice = begin_cell().store_int(1, 32).store_int(2, 32).store_ref(ref).end_cell().begin_parse(); - var result = (slice_bits(s), s~sumNumbersInSlice(), slice_bits(s), slice_empty?(s), slice_data_empty?(s), slice_refs_empty?(s)); - var ref2: cell = s~load_ref(); - var s2: slice = ref2.begin_parse(); - s.end_parse(); - return (result, s2~load_int(32), s2.slice_empty?()); + var ref = beginCell().storeInt(100, 32).endCell(); + var s: slice = beginCell().storeInt(1, 32).storeInt(2, 32).storeRef(ref).endCell().beginParse(); + var result = (getRemainingBitsCount(s), s~sumNumbersInSlice(), getRemainingBitsCount(s), isEndOfSlice(s), isEndOfSliceBits(s), isEndOfSliceRefs(s)); + var ref2: cell = s~loadRef(); + var s2: slice = ref2.beginParse(); + s.assertEndOfSlice(); + return (result, s2~loadInt(32), s2.isEndOfSlice()); } @method_id(107) fun test7() { - var s: slice = begin_cell().store_int(1, 32).store_int(2, 32).store_int(3, 32).store_int(4, 32).store_int(5, 32).store_int(6, 32).store_int(7, 32).end_cell().begin_parse(); - var size1 = slice_bits(s); - s~skip_bits(32); - var s1: slice = s.first_bits(64); - var n1 = s1~load_int(32); - var size2 = slice_bits(s); - s~load_int(32); - var size3 = slice_bits(s); - s~skip_last_bits(32); - var size4 = slice_bits(s); - var n2 = s~load_int(32); - var size5 = slice_bits(s); + var s: slice = beginCell().storeInt(1, 32).storeInt(2, 32).storeInt(3, 32).storeInt(4, 32).storeInt(5, 32).storeInt(6, 32).storeInt(7, 32).endCell().beginParse(); + var size1 = getRemainingBitsCount(s); + s~skipBits(32); + var s1: slice = s.getFirstBits(64); + var n1 = s1~loadInt(32); + var size2 = getRemainingBitsCount(s); + s~loadInt(32); + var size3 = getRemainingBitsCount(s); + s~removeLastBits(32); + var size4 = getRemainingBitsCount(s); + var n2 = s~loadInt(32); + var size5 = getRemainingBitsCount(s); return (n1, n2, size1, size2, size3, size4, size5); } @@ -118,13 +118,13 @@ fun test7() { fun test108() { var (result1, result2) = (0, 0); try { - begin_cell().store_ref(begin_cell().end_cell()).end_cell().begin_parse().end_parse(); + beginCell().storeRef(beginCell().endCell()).endCell().beginParse().assertEndOfSlice(); result1 = 100; } catch (code) { result1 = code; } try { - begin_cell().end_cell().begin_parse().end_parse(); + beginCell().endCell().beginParse().assertEndOfSlice(); result2 = 100; } catch (code) { result2 = code; @@ -134,18 +134,48 @@ fun test108() { @method_id(109) fun test109() { - var ref2 = begin_cell().store_int(1, 32).end_cell(); - var ref1 = begin_cell().store_int(1, 32).store_ref(ref2).end_cell(); - var c = begin_cell().store_int(444, 32).store_ref(ref1).store_ref(ref1).store_ref(ref1).store_ref(ref2).store_int(4, 32).end_cell(); - var (n_cells1, n_bits1, n_refs1) = c.compute_data_size(10); - var s = c.begin_parse(); - s~load_ref(); - s~load_ref(); - var n = s~load_int(32); - var (n_cells2, n_bits2, n_refs2) = s.slice_compute_data_size(10); + var ref2 = beginCell().storeInt(1, 32).endCell(); + var ref1 = beginCell().storeInt(1, 32).storeRef(ref2).endCell(); + var c = beginCell().storeInt(444, 32).storeRef(ref1).storeRef(ref1).storeRef(ref1).storeRef(ref2).storeInt(4, 32).endCell(); + var (n_cells1, n_bits1, n_refs1) = c.calculateCellSizeStrict(10); + var s = c.beginParse(); + s~loadRef(); + s~loadRef(); + var n = s~loadInt(32); + var (n_cells2, n_bits2, n_refs2) = s.calculateSliceSizeStrict(10); return ([n_cells1, n_bits1, n_refs1], [n_cells2, n_bits2, n_refs2], n); } +@method_id(110) +fun test110(x: int) { + var s = beginCell().storeBool(x < 0).storeBool(0).storeBool(x).endCell().beginParse(); + return (s~loadBool(), s~loadBool(), s~loadBool()); +} + +@method_id(111) +fun test111() { + var s = beginCell().storeMessageOp(123).storeMessageQueryId(456) + .storeAddressNone().storeAddressNone() + .storeUint(0, 32) + .storeUint(123, 32).storeUint(456, 64).storeUint(789, 64) + .endCell().beginParse(); + var op1 = s~loadUint(32); + var q1 = s~loadUint(64); + if (s.addressIsNone()) { + s~skipBits(2); + } + if (s~loadBool() == 0) { + assert(s~loadBool() == 0) throw 444; + s~skipBits(32); + } + var op2 = s~loadMessageOp(); + var q2 = s~loadMessageQueryId(); + s~skipBits(64); + s.assertEndOfSlice(); + assert(isMessageBounced(0x001)) throw 444; + return (op1, q1, op2, q2); +} + fun main(): int { return 0; } @@ -160,4 +190,7 @@ fun main(): int { @testcase | 107 | | 2 3 224 192 160 128 96 @testcase | 108 | | 9 100 @testcase | 109 | | [ 3 128 5 ] [ 2 96 3 ] 444 +@testcase | 110 | -1 | -1 0 -1 +@testcase | 110 | 0 | 0 0 0 +@testcase | 111 | | 123 456 123 456 */ diff --git a/tolk-tester/tests/co1.tolk b/tolk-tester/tests/co1.tolk index bc56dfa89..5ad9d8e41 100644 --- a/tolk-tester/tests/co1.tolk +++ b/tolk-tester/tests/co1.tolk @@ -43,7 +43,7 @@ asm "SDEQ"; fun stslicer(b: builder, s: slice): builder asm "STSLICER"; -fun storeUint(b: builder, x: int, len: int): builder { return store_uint(b, x, len); } +fun myStoreUint(b: builder, x: int, len: int): builder { return storeUint(b, x, len); } fun endSlice(b: builder): slice { return endcs(b); } fun main() { @@ -59,9 +59,9 @@ fun main() { var s2: slice = sget2(); var s3: slice = newc().stslicer(str1).stslicer(str2r).endcs(); - assert(sdeq(s1, newc().storeUint(str1int, 12 * nibbles).endcs())) throw int111; - assert(sdeq(s2, newc().store_uint(str2int, 6 * nibbles).endSlice())) throw 112; - assert(sdeq(s3, newc().store_uint(0x636f6e737431AABBCC, 18 * nibbles).endcs())) throw 113; + assert(sdeq(s1, newc().myStoreUint(str1int, 12 * nibbles).endcs())) throw int111; + assert(sdeq(s2, newc().storeUint(str2int, 6 * nibbles).endSlice())) throw 112; + assert(sdeq(s3, newc().storeUint(0x636f6e737431AABBCC, 18 * nibbles).endcs())) throw 113; var i4: int = iget240(); assert(i4 == 240) throw ((104)); diff --git a/tolk-tester/tests/dicts-demo.tolk b/tolk-tester/tests/dicts-demo.tolk new file mode 100644 index 000000000..5852b175c --- /dev/null +++ b/tolk-tester/tests/dicts-demo.tolk @@ -0,0 +1,106 @@ +import "@stdlib/tvm-dicts" + +fun ~addIntToIDict(iDict: cell, key: int, number: int): (cell, ()) { + iDict~iDictSetBuilder(32, key, beginCell().storeInt(number, 32)); + return (iDict, ()); +} + +fun calculateDictLen(d: cell) { + var len = 0; + var (k, v, f) = d.uDictGetFirst(32); + while (f) { + len += 1; + (k, v, f) = d.uDictGetNext(32, k); + } + return len; +} + +fun ~loadTwoDigitNumberFromSlice(s: slice): (slice, int) { + var n1 = s~loadInt(8); + var n2 = s~loadInt(8); + return (s, (n1 - 48) * 10 + (n2 - 48)); +} + + +@method_id(101) +fun test101(getK1: int, getK2: int, getK3: int) { + var dict = createEmptyDict(); + dict~uDictSetBuilder(32, 1, beginCell().storeUint(1, 32)); + var (old1: slice, found1) = dict~uDictSetAndGet(32, getK1, beginCell().storeUint(2, 32).endCell().beginParse()); + var (old2: slice, found2) = dict~uDictSetAndGet(32, getK2, beginCell().storeUint(3, 32).endCell().beginParse()); + var (cur3: slice, found3) = dict.uDictGet(32, getK3); + return ( + found1 ? old1~loadUint(32) : -1, + found2 ? old2~loadUint(32) : -1, + found3 ? cur3~loadUint(32) : -1 + ); +} + +@method_id(102) +fun test102() { + var dict = createEmptyDict(); + dict~addIntToIDict(2, 102); + dict~addIntToIDict(1, 101); + dict~addIntToIDict(4, 104); + dict~addIntToIDict(3, 103); + var deleted = createEmptyTuple(); + var shouldBreak = false; + while (!shouldBreak) { + var (kDel, kVal, wasDel) = dict~iDictDeleteLastAndGet(32); + if (wasDel) { + deleted~tuplePush([kDel, kVal~loadInt(32)]); + } else { + shouldBreak = true; + } + } + return deleted; +} + +@method_id(103) +fun test103() { + var dict = createEmptyDict(); + dict~uDictSetBuilderIfNotExists(32, 1,beginCell().storeInt(1, 32)); + dict~uDictSetBuilderIfNotExists(32, 1,beginCell().storeInt(1, 32)); + var len1 = calculateDictLen(dict); + dict~uDictSetBuilderIfExists(32, 2,beginCell().storeInt(1, 32)); + dict~uDictSetBuilderIfExists(32, 2,beginCell().storeInt(1, 32)); + var len2 = calculateDictLen(dict); + dict~uDictSetBuilder(32, 3,beginCell().storeInt(1, 32)); + dict~uDictSetBuilderIfExists(32, 3,beginCell().storeInt(1, 32)); + var len3 = calculateDictLen(dict); + var (delK1, _, _) = dict~uDictDeleteFirstAndGet(32); + var (delK2, _, _) = dict~uDictDeleteFirstAndGet(32); + var (delK3, _, _) = dict~uDictDeleteFirstAndGet(32); + return (len1, len2, len3, delK1, delK2, delK3); +} + +@method_id(104) +fun test104() { + var dict = createEmptyDict(); + dict~sDictSetBuilder(32, "7800", beginCell().storeUint(5 + 48, 8).storeUint(6 + 48, 8)); + dict~sDictSet(32, "key1", "12"); + var (old1, _) = dict~sDictSetAndGet(32, "key1", "34"); + var (old2, _) = dict~sDictDeleteAndGet(32, "key1"); + var (restK, restV, _) = dict.sDictGetFirst(32); + var (restK1, restV1, _) = dict~sDictDeleteLastAndGet(32); + assert (restK.isSliceBitsEqual(restK1)) throw 123; + assert (restV.isSliceBitsEqual(restV1)) throw 123; + return ( + old1~loadTwoDigitNumberFromSlice(), + old2~loadTwoDigitNumberFromSlice(), + restV~loadTwoDigitNumberFromSlice(), + restK~loadTwoDigitNumberFromSlice(), + restK~loadTwoDigitNumberFromSlice() + ); +} + +fun main() {} + +/** +@testcase | 101 | 1 1 1 | 1 2 3 +@testcase | 101 | 1 2 1 | 1 -1 2 +@testcase | 101 | 1 2 3 | 1 -1 -1 +@testcase | 102 | | [ [ 4 104 ] [ 3 103 ] [ 2 102 ] [ 1 101 ] ] +@testcase | 103 | | 1 1 2 1 3 (null) +@testcase | 104 | | 12 34 56 78 0 + */ diff --git a/tolk-tester/tests/imports/use-dicts-err.tolk b/tolk-tester/tests/imports/use-dicts-err.tolk new file mode 100644 index 000000000..a4ee9aede --- /dev/null +++ b/tolk-tester/tests/imports/use-dicts-err.tolk @@ -0,0 +1,21 @@ +fun prepareDict_3_30_4_40_5_x(valueAt5: int): cell { + var dict: cell = createEmptyDict(); + dict~idict_set_builder(32, 3, begin_cell().store_int(30, 32)); + dict~idict_set_builder(32, 4, begin_cell().store_int(40, 32)); + dict~idict_set_builder(32, 5, begin_cell().store_int(valueAt5, 32)); + return dict; +} + +fun lookupIdxByValue(idict32: cell, value: int): int { + var cur_key = -1; + do { + var (cur_key redef, cs: slice, found: int) = idict32.idict_get_next?(32, cur_key); + // one-line condition (via &) doesn't work, since right side is calculated immediately + if (found) { + if (cs~load_int(32) == value) { + return cur_key; + } + } + } while (found); + return -1; +} diff --git a/tolk-tester/tests/imports/use-dicts.tolk b/tolk-tester/tests/imports/use-dicts.tolk new file mode 100644 index 000000000..358a5673a --- /dev/null +++ b/tolk-tester/tests/imports/use-dicts.tolk @@ -0,0 +1,23 @@ +import "@stdlib/tvm-dicts" + +fun prepareDict_3_30_4_40_5_x(valueAt5: int): cell { + var dict: cell = createEmptyDict(); + dict~iDictSetBuilder(32, 3, beginCell().storeInt(30, 32)); + dict~iDictSetBuilder(32, 4, beginCell().storeInt(40, 32)); + dict~iDictSetBuilder(32, 5, beginCell().storeInt(valueAt5, 32)); + return dict; +} + +fun lookupIdxByValue(idict32: cell, value: int): int { + var cur_key = -1; + do { + var (cur_key redef, cs: slice, found: int) = idict32.iDictGetNext(32, cur_key); + // one-line condition (via &) doesn't work, since right side is calculated immediately + if (found) { + if (cs~loadInt(32) == value) { + return cur_key; + } + } + } while (found); + return -1; +} diff --git a/tolk-tester/tests/invalid-import.tolk b/tolk-tester/tests/invalid-import.tolk index b1c01518e..416764b62 100644 --- a/tolk-tester/tests/invalid-import.tolk +++ b/tolk-tester/tests/invalid-import.tolk @@ -4,6 +4,8 @@ /** @compilation_should_fail -@stderr invalid-import.tolk:2:7: error: Failed to import: cannot find file +On Linux/Mac, `realpath()` returns an error, and the error message is `cannot find file` +On Windows, it fails after, on reading, with a message "cannot open file" +@stderr invalid-import.tolk:2:7: error: Failed to import: cannot @stderr import "unexisting.tolk"; */ diff --git a/tolk-tester/tests/invalid-no-import.tolk b/tolk-tester/tests/invalid-no-import-1.tolk similarity index 100% rename from tolk-tester/tests/invalid-no-import.tolk rename to tolk-tester/tests/invalid-no-import-1.tolk diff --git a/tolk-tester/tests/invalid-no-import-2.tolk b/tolk-tester/tests/invalid-no-import-2.tolk new file mode 100644 index 000000000..d78346b90 --- /dev/null +++ b/tolk-tester/tests/invalid-no-import-2.tolk @@ -0,0 +1,9 @@ +import "@stdlib/tvm-dicts" +import "imports/use-dicts-err.tolk" + +/** +@compilation_should_fail +@stderr imports/use-dicts-err.tolk:2:22 +@stderr Using a non-imported symbol `createEmptyDict` +@stderr Forgot to import "@stdlib/tvm-dicts"? + */ diff --git a/tolk-tester/tests/invalid-pure-2.tolk b/tolk-tester/tests/invalid-pure-2.tolk index 5f8f40ec8..213206834 100644 --- a/tolk-tester/tests/invalid-pure-2.tolk +++ b/tolk-tester/tests/invalid-pure-2.tolk @@ -2,7 +2,7 @@ global g: int; @pure fun f_pure(): builder { - var b: builder = begin_cell(); + var b: builder = beginCell(); g = g + 1; return b; } diff --git a/tolk-tester/tests/invalid-pure-3.tolk b/tolk-tester/tests/invalid-pure-3.tolk index 0e1b4104a..f64b81ce7 100644 --- a/tolk-tester/tests/invalid-pure-3.tolk +++ b/tolk-tester/tests/invalid-pure-3.tolk @@ -1,12 +1,12 @@ @pure fun validate_input(input: cell): (int, int) { - var (x, y, z, correct) = compute_data_size?(input, 10); + var (x, y, z, correct) = calculateCellSize(input, 10); assert(correct) throw 102; } @pure fun someF(): int { - var c: cell = begin_cell().end_cell(); + var c: cell = beginCell().endCell(); validate_input(c); return 0; } diff --git a/tolk-tester/tests/invalid-redefinition-1.tolk b/tolk-tester/tests/invalid-redefinition-1.tolk index 49771cea1..5238a6801 100644 --- a/tolk-tester/tests/invalid-redefinition-1.tolk +++ b/tolk-tester/tests/invalid-redefinition-1.tolk @@ -1,7 +1,7 @@ -global moddiv: int; +global mulDivMod: int; /** @compilation_should_fail -@stderr global moddiv: int; +@stderr global mulDivMod: int; @stderr redefinition of built-in symbol */ diff --git a/tolk-tester/tests/invalid-symbol-1.tolk b/tolk-tester/tests/invalid-symbol-1.tolk index 5d392f529..08a86f176 100644 --- a/tolk-tester/tests/invalid-symbol-1.tolk +++ b/tolk-tester/tests/invalid-symbol-1.tolk @@ -4,11 +4,11 @@ fun main(x: int): int { } else { var y: slice = "20"; } - ~dump(y); + debugPrint(y); } /** @compilation_should_fail -@stderr ~dump(y); +@stderr debugPrint(y); @stderr undefined symbol `y` */ diff --git a/tolk-tester/tests/logical-operators.tolk b/tolk-tester/tests/logical-operators.tolk index b73c25220..ec0e7a87c 100644 --- a/tolk-tester/tests/logical-operators.tolk +++ b/tolk-tester/tests/logical-operators.tolk @@ -1,3 +1,5 @@ +import "imports/use-dicts.tolk" + fun simpleAllConst() { return (!0, !!0 & !false, !!!0, !1, !!1, !-1, !!-1, (!5 == 0) == !0, !0 == true); } @@ -43,29 +45,10 @@ fun someSum(upto: int) { return x; } - -fun lookupIdxByValue(idict32: cell, value: int) { - var cur_key = -1; - do { - var (cur_key redef, cs: slice, found: int) = idict32.idict_get_next?(32, cur_key); - // todo one-line condition (via &) doesn't work, since right side is calculated immediately - if (found) { - if (cs~load_int(32) == value) { - return cur_key; - } - } - } while (found); - return -1; -} - @method_id(104) fun testDict(last: int) { - // prepare dict: [3 => 30, 4 => 40, 5 => 50] - var dict: cell = new_dict(); - dict~idict_set_builder(32, 3, begin_cell().store_int(30, 32)); - dict~idict_set_builder(32, 4, begin_cell().store_int(40, 32)); - dict~idict_set_builder(32, 5, begin_cell().store_int(!last ? 100 : last, 32)); - + // prepare dict: [3 => 30, 4 => 40, 5 => x] + var dict = prepareDict_3_30_4_40_5_x(!last ? 100 : last); return (lookupIdxByValue(dict, 30), lookupIdxByValue(dict, last), lookupIdxByValue(dict, 100)); } diff --git a/tolk-tester/tests/no-spaces.tolk b/tolk-tester/tests/no-spaces.tolk index 018c99da2..aecfdabaa 100644 --- a/tolk-tester/tests/no-spaces.tolk +++ b/tolk-tester/tests/no-spaces.tolk @@ -50,14 +50,14 @@ fun add3(a: int, b: int, c: int) { return a+b+c; } } fun `load:u32`(cs: slice): (slice, int) { - return cs.load_uint(32); + return cs.loadUint(32); } @method_id(116) fun `call_~_via_backticks`():[int,int,int,int] { - var b:builder = begin_cell().store_uint(1, 32).store_uint(2, 32).store_uint(3, 32).store_uint(4, 32); - var `cs`:slice = b.end_cell().begin_parse(); - var (`cs` redef,one:int) = `cs`.`load_uint`(32); - var (two:int,three:int) = (`cs`~`load_uint`(32), cs~`load:u32`()); + var b:builder = beginCell().storeUint(1, 32).storeUint(2, 32).storeUint(3, 32).storeUint(4, 32); + var `cs`:slice = b.endCell().beginParse(); + var (`cs` redef,one:int) = `cs`.`loadUint`(32); + var (two:int,three:int) = (`cs`~`loadUint`(32), cs~`load:u32`()); var (cs redef,four:int) = cs.`load:u32`(); return [one,two,three,four]; } diff --git a/tolk-tester/tests/null-keyword.tolk b/tolk-tester/tests/null-keyword.tolk index 0be4966ff..f0f85fd60 100644 --- a/tolk-tester/tests/null-keyword.tolk +++ b/tolk-tester/tests/null-keyword.tolk @@ -1,18 +1,19 @@ -import "../../crypto/smartcont/stdlib.tolk" +import "@stdlib/lisp-lists" + @method_id(101) fun test1() { - var numbers: tuple = null; - numbers = cons(1, numbers); - numbers = cons(2, numbers); - numbers = cons(3, numbers); - numbers = cons(4, numbers); - var (h, numbers redef) = uncons(numbers); - h += car(numbers); - - var t = empty_tuple(); + var numbers: tuple = createEmptyList(); + numbers = listPrepend(1, numbers); + numbers = listPrepend(2, numbers); + numbers = listPrepend(3, numbers); + numbers = listPrepend(4, numbers); + var (h, numbers redef) = listSplit(numbers); + h += listGetHead(numbers); + + var t = createEmptyTuple(); do { - var num = numbers~list_next(); - t~tpush(num); + var num = numbers~listNext(); + t~tuplePush(num); } while (numbers != null); return (h, numbers == null, t); @@ -52,7 +53,7 @@ fun getUntypedNull() { @method_id(104) fun test4() { - var (_, (_, untyped)) = (3, (empty_tuple, null)); + var (_, (_, untyped)) = (3, (createEmptyTuple, null)); if (true) { return untyped; } @@ -62,7 +63,7 @@ fun test4() { @method_id(105) fun test5() { var n = getUntypedNull(); - return !(null == n) ? n~load_int(32) : 100; + return !(null == n) ? n~loadInt(32) : 100; } @method_id(106) @@ -72,9 +73,9 @@ fun test6(x: int) { @method_id(107) fun test7() { - var b = begin_cell().store_maybe_ref(null); - var s = b.end_cell().begin_parse(); - var c = s~load_maybe_ref(); + var b = beginCell().storeMaybeRef(null); + var s = b.endCell().beginParse(); + var c = s~loadMaybeRef(); return (null == c) * 10 + (b != null); } diff --git a/tolk-tester/tests/pure-functions.tolk b/tolk-tester/tests/pure-functions.tolk index 59b2f0da5..6e7a6ddad 100644 --- a/tolk-tester/tests/pure-functions.tolk +++ b/tolk-tester/tests/pure-functions.tolk @@ -11,16 +11,16 @@ fun f_pure2(): int { @pure fun get_contract_data(): (int, int) { - var c: cell = get_data(); - var cs: slice = c.begin_parse(); - cs~load_bits(32); - var value: int = cs~load_uint(16); + var c: cell = getContractData(); + var cs: slice = c.beginParse(); + cs~loadBits(32); + var value: int = cs~loadUint(16); return (1, value); } fun save_contract_data(value: int) { - var b: builder = begin_cell().store_int(1, 32).store_uint(value, 16); - set_data(b.end_cell()); + var b: builder = beginCell().storeInt(1, 32).storeUint(value, 16); + setContractData(b.endCell()); } @pure diff --git a/tolk-tester/tests/s1.tolk b/tolk-tester/tests/s1.tolk index 3f75f1a70..c7c4f6946 100644 --- a/tolk-tester/tests/s1.tolk +++ b/tolk-tester/tests/s1.tolk @@ -43,10 +43,10 @@ fun main() { var i_mini: int = string_minihash(); var i_maxi: int = string_maxihash(); var i_crc: int = string_crc32(); - assert(sdeq(s_ascii, newc().store_uint(0x737472696E67, 12 * 4).endcs())) throw 101; - assert(sdeq(s_raw, newc().store_uint(0xABCDEF, 6 * 4).endcs())) throw 102; - assert(sdeq(s_addr, newc().store_uint(4, 3).store_int(-1, 8) - .store_uint(0x3333333333333333333333333333333333333333333333333333333333333333, 256).endcs()), 103); + assert(sdeq(s_ascii, newc().storeUint(0x737472696E67, 12 * 4).endcs())) throw 101; + assert(sdeq(s_raw, newc().storeUint(0xABCDEF, 6 * 4).endcs())) throw 102; + assert(sdeq(s_addr, newc().storeUint(4, 3).storeInt(-1, 8) + .storeUint(0x3333333333333333333333333333333333333333333333333333333333333333, 256).endcs()), 103); assert(i_hex == 0x4142434445464748494A4B4C4D4E4F505152535455565758595A303132333435) throw 104; assert(i_mini == 0x7a62e8a8) throw 105; assert(i_maxi == 0x7a62e8a8ebac41bd6de16c65e7be363bc2d2cbc6a0873778dead4795c13db979) throw 106; diff --git a/tolk-tester/tests/test-math.tolk b/tolk-tester/tests/test-math.tolk index dde4c2b32..7c9873f09 100644 --- a/tolk-tester/tests/test-math.tolk +++ b/tolk-tester/tests/test-math.tolk @@ -1,4 +1,982 @@ -import "../../crypto/smartcont/mathlib.tolk"; +// this is actually `mathlib.fc` transformed to Tolk + +import "@stdlib/tvm-lowlevel" + +/*--------------- MISSING OPERATIONS AND BUILT-INS ----------------*/ + +/// compute floor(log2(x))+1 +@pure +fun log2_floor_p1(x: int): int + asm "UBITSIZE"; + +@pure +fun mulrshiftr(x: int, y: int, s: int): int + asm "MULRSHIFTR"; + +@pure +fun mulrshiftr256(x: int, y: int): int + asm "256 MULRSHIFTR#"; + +@pure +fun mulrshift256mod(x: int, y: int): (int, int) + asm "256 MULRSHIFT#MOD"; + +@pure +fun mulrshiftr256mod(x: int, y: int): (int, int) + asm "256 MULRSHIFTR#MOD"; + +@pure +fun mulrshiftr255mod(x: int, y: int): (int, int) + asm "255 MULRSHIFTR#MOD"; + +@pure +fun mulrshiftr248mod(x: int, y: int): (int, int) + asm "248 MULRSHIFTR#MOD"; + +@pure +fun mulrshiftr5mod(x: int, y: int): (int, int) + asm "5 MULRSHIFTR#MOD"; + +@pure +fun mulrshiftr6mod(x: int, y: int): (int, int) + asm "6 MULRSHIFTR#MOD"; + +@pure +fun mulrshiftr7mod(x: int, y: int): (int, int) + asm "7 MULRSHIFTR#MOD"; + +@pure +fun lshift256divr(x: int, y: int): int + asm "256 LSHIFT#DIVR"; + +@pure +fun lshift256divmodr(x: int, y: int): (int, int) + asm "256 LSHIFT#DIVMODR"; + +@pure +fun lshift255divmodr(x: int, y: int): (int, int) + asm "255 LSHIFT#DIVMODR"; + +@pure +fun lshift2divmodr(x: int, y: int): (int, int) + asm "2 LSHIFT#DIVMODR"; + +@pure +fun lshift7divmodr(x: int, y: int): (int, int) + asm "7 LSHIFT#DIVMODR"; + +@pure +fun lshiftdivmodr(x: int, y: int, s: int): (int, int) + asm "LSHIFTDIVMODR"; + +@pure +fun rshiftr256mod(x: int): (int, int) + asm "256 RSHIFTR#MOD"; + +@pure +fun rshiftr248mod(x: int): (int, int) + asm "248 RSHIFTR#MOD"; + +@pure +fun rshiftr4mod(x: int): (int, int) + asm "4 RSHIFTR#MOD"; + +@pure +fun rshift3mod(x: int): (int, int) + asm "3 RSHIFT#MOD"; + +/// computes y - x (Tolk compiler does not try to use this by itself) +@pure +fun sub_rev(x: int, y: int): int + asm "SUBR"; + +@pure +fun nan(): int + asm "PUSHNAN"; + +@pure +fun is_nan(x: int): int + asm "ISNAN"; + +/*----------------------- SQUARE ROOTS ---------------------------*/ + +/// computes sqrt(a*b) exactly rounded to the nearest integer +/// for all 0 <= a, b <= 2^256-1 +/// may be used with b=1 or b=scale of fixed-point numbers +@pure +@inline_ref +fun geom_mean(a: int, b: int): int { + if (!min(a, b)) { + return 0; + } + var s: int = log2_floor_p1(a); // throws out of range error if a < 0 or b < 0 + var t: int = log2_floor_p1(b); + // NB: (a-b)/2+b == (a+b)/2, but without overflow for large a and b + var x: int = (s == t ? (a - b) / 2 + b : 1 << ((s + t) / 2)); + do { + // if always used with b=2^const, may be optimized to "const LSHIFTDIVC#" + // it is important to use `mulDivCeil` here, not `mulDivFloor` or `mulDivRound` + var q: int = (mulDivCeil(a, b, x) - x) / 2; + x += q; + } while (q); + return x; +} + +/// integer square root, computes round(sqrt(a)) for all a>=0. +/// note: `inline` is better than `inline_ref` for such simple functions +@pure +@inline +fun sqrt(a: int): int { + return geom_mean(a, 1); +} + +/// version for fixed248 = fixed-point numbers with scale 2^248 +/// fixed248 sqrt(fixed248 x) +@pure +@inline +fun fixed248_sqrt(x: int): int { + return geom_mean(x, 1 << 248); +} + +/// fixed255 sqrt(fixed255 x) +@pure +@inline +fun fixed255_sqrt(x: int): int { + return geom_mean(x, 1 << 255); +} + +/// fixed248 sqr(fixed248 x); +@pure +@inline +fun fixed248_sqr(x: int): int { + return mulDivRound(x, x, 1 << 248); +} + +/// fixed255 sqr(fixed255 x); +@pure +@inline +fun fixed255_sqr(x: int): int { + return mulDivRound(x, x, 1 << 255); +} + +const fixed248_One: int = (1 << 248); +const fixed255_One: int = (1 << 255); + +/*------------------- USEFUL CONSTANTS -------------------*/ + +/// store huge constants in inline_ref functions for reuse +/// (y,z) where y=round(log(2)*2^256), z=round((log(2)*2^256-y)*2^128) +/// then log(2) = y/2^256 + z/2^384 +@pure +@inline_ref +fun log2_xconst_f256(): (int, int) { + return (80260960185991308862233904206310070533990667611589946606122867505419956976172, -32272921378999278490133606779486332143); +} + +/// (y,z) where Pi = y/2^254 + z/2^382 +@pure +@inline_ref +fun Pi_xconst_f254(): (int, int) { + return (90942894222941581070058735694432465663348344332098107489693037779484723616546, 108051869516004014909778934258921521947); +} + +/// atan(1/16) as fixed260 +@pure +@inline_ref +fun Atan1_16_f260(): int { + return 115641670674223639132965820642403718536242645001775371762318060545014644837101; // true value is ...101.0089... +} + +/// atan(1/8) as fixed259 +@pure +@inline_ref +fun Atan1_8_f259(): int { + return 115194597005316551477397594802136977648153890007566736408151129975021336532841; // correction -0.1687... +} + +/// atan(1/32) as fixed261 +@pure +@inline_ref +fun Atan1_32_f261(): int { + return 115754418570128574501879331591757054405465733718902755858991306434399246026247; // correction 0.395... +} + +/// inline is better than inline_ref for such very small functions +@pure +@inline +fun log2_const_f256(): int { + var (c: int, _) = log2_xconst_f256(); + return c; +} + +@pure +@inline +fun fixed248_log2_const(): int { + return log2_const_f256() ~>> 8; +} + +@pure +@inline +fun Pi_const_f254(): int { + var (c: auto, _) = Pi_xconst_f254(); + return c; +} + +@pure +@inline +fun fixed248_Pi_const(): int { + return Pi_const_f254() ~>> 6; +} + +/*-------------- HYPERBOLIC TANGENT AND EXPONENT ------------------*/ + +/// hyperbolic tangent of small x via n+2 terms of Lambert's continued fraction +/// n=17: good for |x| < log(2)/4 = 0.173 +/// fixed258 tanh_f258(fixed258 x, int n) +@pure +@inline_ref +fun tanh_f258(x: int, n: int): int { + var x2: int = mulDivRound(x, x, 1 << 255); // x^2 as fixed261 + var a: int = (2 * n + 5) << 250; // a=2n+5 as fixed250 + var c = a; + var Two: int = (1 << 251); // 2. as fixed250 + repeat (n) { + a = (c -= Two) + mulDivRound(x2, 1 << 239, a); // a := 2k+1+x^2/a as fixed250, k=n+1,n,...,2 + } + a = (stackMoveToTop(3) << 254) + mulDivRound(x2, 1 << 243, a); // a := 3+x^2/a as fixed254 + // y = x/(1+a') = x - x*a'/(1+a') = x - x*x^2/(a+x^2) where a' = x^2/a + return x - (mulDivRound(x, x2, a + (x2 ~>> 7)) ~>> 7); +} + +/// fixed257 expm1_f257(fixed257 x) +/// computes exp(x)-1 for small x via 19 terms of Lambert's continued fraction for tanh(x/2) +/// good for |x| < log(2)/2 = 0.347 (n=17); consumes ~3500 gas +@pure +@inline_ref +fun expm1_f257(x: int): int { + // (almost) compute tanh(x/2) first; x/2 as fixed258 = x as fixed257 + var x2: int = mulDivRound(x, x, 1 << 255); // x^2 as fixed261 + var Two: int = (1 << 251); // 2. as fixed250 + var a: int = stackMoveToTop(39) << 250; // a=2n+5 as fixed250 + var c = a; + repeat (17) { + a = (c -= Two) + mulDivRound(x2, 1 << 239, a); // a := 2k+1+x^2/a as fixed250, k=n+1,n,...,2 + } + a = (stackMoveToTop(3) << 254) + mulDivRound(x2, 1 << 243, a); // a := 3+x^2/a as fixed254 + // now tanh(x/2) = x/(1+a') where a'=x^2/a ; apply exp(x)-1=2*tanh(x/2)/(1-tanh(x/2)) + var t: int = (x ~>> 4) - a; // t:=x-a as fixed254 + return x - mulDivRound(x2, t / 2, a + mulrshiftr256(x, t) ~/ 4) ~/ 4; // x - x^2 * (x-a) / (a + x*(x-a)) +} + +/// expm1_f257() may be used to implement specific fixed-point exponentials +/// example: +/// fixed248 exp(fixed248 x) +@pure +@inline_ref +fun fixed248_exp(x: int): int { + var (l2c, l2d) = log2_xconst_f256(); + // divide x by log(2) and convert to fixed257 + // (int q, x) = muldivmodr(x, 256, l2c); // unfortunately, no such built-in + var (q: int, x redef) = lshiftdivmodr(x, l2c, 8); + x = 2 * x - mulDivRound(q, l2d, 1 << 127); + var y: int = expm1_f257(x); + // result is (1 + y) * (2^q) --> ((1 << 257) + y) >> (9 - q) + return (y ~>> (9 - q)) - (-1 << (248 + q)); + // note that (y ~>> (9 - q)) + (1 << (248 + q)) leads to overflow when q=8 +} + +/// compute 2^x in fixed248 +/// fixed248 exp2(fixed248 x) +@pure +@inline_ref +fun fixed248_exp2(x: int): int { + // (int q, x) = divmodr(x, 1 << 248); // no such built-in + var (q: int, x redef) = rshiftr248mod(x); + x = mulDivRound(x, log2_const_f256(), 1 << 247); + var y: int = expm1_f257(x); + return (y ~>> (9 - q)) - (-1 << (248 + q)); +} + +/*-------------------- TRIGONOMETRIC FUNCTIONS ----------------------*/ + +/// fixed260 tan(fixed260 x); +/// computes tan(x) for small |x|> 10)) ~>> 9); +} + +/// fixed260 tan(fixed260 x); +@pure +@inline_ref +fun tan_f260(x: int): int { + return tan_f260_inlined(x); +} + +/// fixed258 tan(fixed258 x); +/// computes tan(x) for small |x|> 6)) ~>> 5); +} + +/// fixed258 tan(fixed258 x); +@pure +@inline_ref +fun tan_f258(x: int): int { + return tan_f258_inlined(x); +} + +/// (fixed259, fixed263) sincosm1(fixed259 x) +/// computes (sin(x), 1-cos(x)) for small |x|<2*atan(1/16) +@pure +@inline +fun sincosm1_f259_inlined(x: int): (int, int) { + var t: int = tan_f260_inlined(x); // t=tan(x/2) as fixed260 + var tt: int = mulrshiftr256(t, t); // t^2 as fixed264 + var y: int = tt ~/ 512 + (1 << 255); // 1+t^2 as fixed255 + // 2*t/(1+t^2) as fixed259 and 2*t^2/(1+t^2) as fixed263 + // return (mulDivRound(t, 1 << 255, y), mulDivRound(tt, 1 << 255, y)); + return (t - mulDivRound(t / 2, tt, y) ~/ 256, tt - mulDivRound(tt / 2, tt, y) ~/ 256); +} + +@pure +@inline_ref +fun sincosm1_f259(x: int): (int, int) { + return sincosm1_f259_inlined(x); +} + +/// computes (sin(x+xe),-cos(x+xe)) for |x| <= Pi/4, xe very small +/// this function is very accurate, error less than 0.7 ulp (consumes ~ 5500 gas) +/// (fixed256, fixed256) sincosn(fixed256 x, fixed259 xe) +@pure +@inline_ref +fun sincosn_f256(x: int, xe: int): (int, int) { + // var (q, x1) = muldivmodr(x, 8, Atan1_8_f259()); // no muldivmodr() builtin + var (q, x1) = lshift2divmodr(abs(x), Atan1_8_f259()); // reduce mod theta where theta=2*atan(1/8) + var (si, co) = sincosm1_f259(x1 * 2 + xe); + var (a, b, c) = (-1, 0, 1); + repeat (q) { + // (a+b*I) *= (8+I)^2 = 63+16*I + (a, b, c) = (63 * a - 16 * b, 16 * a + 63 * b, 65 * c); + } + // now a/c = cos(q*theta), b/c = sin(q*theta) exactly(!) + // compute (a+b*I)*(1-co+si*I)/c + // (b, a) = (lshift256divr(b, c), lshift256divr(a, c)); + var (b redef, br: int) = lshift256divmodr(b, c); br = mulDivRound(br, 128, c); + var (a redef, ar: int) = lshift256divmodr(a, c); ar = mulDivRound(ar, 128, c); + return (sign(x) * (((mulrshiftr256(b, co) - br) ~/ 16 - mulrshiftr256(a, si)) ~/ 8 - b), + a - ((mulrshiftr256(a, co) - ar) ~/ 16 + mulrshiftr256(b, si)) ~/ 8); +} + +/// compute (sin(x),1-cos(x)) in fixed256 for |x| < 16*atan(1/16) = 0.9987 +/// (fixed256, fixed257) sincosm1_f256(fixed256 x); +/// slightly less accurate than sincosn_f256() (error up to 3/2^256), but faster (~ 4k gas) and shorter +@pure +@inline_ref +fun sincosm1_f256(x: int): (int, int) { + var (si, co) = sincosm1_f259_inlined(x); // compute (sin,1-cos)(x/8) in (fixed259,fixed263) + var r: int = 7; + repeat (r / 2) { + // 1-cos(2*x) = 2*sin(x)^2, sin(2*x) = 2*sin(x)*cos(x) + (co, si) = (mulrshiftr256(si, si), si - (mulrshiftr256(si, co) ~>> r)); + r -= 2; + } + return (si, co); +} + +/// compute (p, q) such that p/q = tan(x) for |x|<2*atan(1/2)=1899/2048=0.927 +/// (int, int) tan_aux(fixed256 x); +@pure +@inline_ref +fun tan_aux_f256(x: int): (int, int) { + var t: int = tan_f258_inlined(x); // t=tan(x/4) as fixed258 + // t:=2*t/(1-t^2)=2*(t-t^3/(t^2-1)) + var tt: int = mulrshiftr256(t, t); // t^2 as fixed260 + t = mulDivRound(t, tt, tt ~/ 16 + (-1 << 256)) ~/ 16 - t; // now t=-tan(x/2) as fixed259 + return (t, mulrshiftr256(t, t) ~/ 4 + (-1 << 256)); // return (2*t, t^2-1) as fixed256 +} + +/// sincosm1_f256() and sincosn_f256() may be used to implement trigonometric functions for different fixed-point types +/// example: +/// (fixed248, fixed248) sincos(fixed248 x); +@pure +@inline_ref +fun fixed248_sincos(x: int): (int, int) { + var (Pic, Pid) = Pi_xconst_f254(); + // (int q, x) = muldivmodr(x, 128, Pic); // no muldivmodr() builtin + var (q: int, x redef) = lshift7divmodr(x, Pic); // reduce mod Pi/2 + x = 2 * x - mulDivRound(q, Pid, 1 << 127); + var (si: int, co: int) = sincosm1_f256(x); // doesn't make sense to use more accurate sincosn_f256() + co = (1 << 248) - (co ~>> 9); + si = si ~>> 8; + repeat (q & 3) { + (si, co) = (co, -si); + } + return (si, co); +} + +/// fixed248 sin(fixed248 x); +/// inline is better than inline_ref for such simple functions +@pure +@inline +fun fixed248_sin(x: int): int { + var (si: int, _) = fixed248_sincos(x); + return si; +} + +/// fixed248 cos(fixed248 x); +@pure +@inline +fun fixed248_cos(x: int): int { + var (_, co: int) = fixed248_sincos(x); + return co; +} + +/// similarly, tan_aux_f256() may be used to implement tan() and cot() for specific fixed-point formats +/// fixed248 tan(fixed248 x); +/// not very accurate when |tan(x)| is very large (difficult to do better without floating-point numbers) +/// however, the relative accuracy is approximately 2^-247 in all cases, which is good enough for arguments given up to 2^-249 +@pure +@inline_ref +fun fixed248_tan(x: int): int { + var (Pic, Pid) = Pi_xconst_f254(); + // (int q, x) = muldivmodr(x, 128, Pic); // no muldivmodr() builtin + var (q: int, x redef) = lshift7divmodr(x, Pic); // reduce mod Pi/2 + x = 2 * x - mulDivRound(q, Pid, 1 << 127); + var (a, b) = tan_aux_f256(x); // now a/b = tan(x') + if (q & 1) { + (a, b) = (b, -a); + } + return mulDivRound(a, 1 << 248, b); // either -b/a or a/b as fixed248 +} + +/// fixed248 cot(fixed248 x); +@pure +@inline_ref +fun fixed248_cot(x: int): int { + var (Pic, Pid) = Pi_xconst_f254(); + var (q: int, x redef) = lshift7divmodr(x, Pic); // reduce mod Pi/2 + x = 2 * x - mulDivRound(q, Pid, 1 << 127); + var (b, a) = tan_aux_f256(x); // now b/a = tan(x') + if (q & 1) { + (a, b) = (b, -a); + } + return mulDivRound(a, 1 << 248, b); // either -b/a or a/b as fixed248 +} + +/*---------------- INVERSE HYPERBOLIC TANGENT AND LOGARITHMS ----------------*/ + +/// inverse hyperbolic tangent of small x, evaluated by means of n terms of the continued fraction +/// valid for |x| < 2^-2.5 ~ 0.18 if n=37 (slightly less accurate with n=36) +/// |x| < 1/8 if n=32; |x| < 2^-3.5 if n=28; |x| < 1/16 if n=25 +/// |x| < 2^-4.5 if n=23; |x| < 1/32 if n=21; |x| < 1/64 if n=18 +/// fixed258 atanh(fixed258 x); +@pure +@inline_ref +fun atanh_f258(x: int, n: int): int { + var x2: int = mulrshiftr256(x, x); // x^2 as fixed260 + var One: int = (1 << 254); + var a: int = One ~/ n + (1 << 255); // a := 2 + 1/n as fixed254 + repeat (n - 1) { + // a := 1 + (1 - x^2 / a)(1 + 1/n) as fixed254 + var t: int = One - mulDivRound(x2, 1 << 248, a); // t := 1 - x^2 / a + var n1: int = n - 1; + a = mulDivRound(t, n, n1) + One; + n = n1; + } + // x / (1 - x^2 / a) = x / (1 - d) = x + x * d / (1 - d) for d = x^2 / a + // int d = mulDivRound(x2, 1 << 255, a - (x2 ~>> 6)); // d/(1-d) = x^2/(a-x^2) as fixed261 + // return x + (mulrshiftr256(x, d) ~>> 5); + return x + mulDivRound(x, x2 / 2, a - x2 ~/ 64) ~/ 32; +} + +/// number of terms n should be chosen as for atanh_f258() +/// fixed261 atanh(fixed261 x); +@pure +@inline +fun atanh_f261_inlined(x: int, n: int): int { + var x2: int = mulrshiftr256(x, x); // x^2 as fixed266 + var One: int = (1 << 254); + var a: int = One ~/ n + (1 << 255); // a := 2 + 1/n as fixed254 + repeat (n - 1) { + // a := 1 + (1 - x^2 / a)(1 + 1/n) as fixed254 + var t: int = One - mulDivRound(x2, 1 << 242, a); // t := 1 - x^2 / a + var n1: int = n - 1; + a = mulDivRound(t, n, n1) + One; + n = n1; + } + // x / (1 - x^2 / a) = x / (1 - d) = x + x * d / (1 - d) for d = x^2 / a + // int d = mulDivRound(x2, 1 << 255, a - (x2 ~>> 12)); // d/(1-d) = x^2/(a-x^2) as fixed267 + // return x + (mulrshiftr256(x, d) ~>> 11); + return x + mulDivRound(x, x2, a - x2 ~/ 4096) ~/ 4096; +} + +/// fixed261 atanh(fixed261 x); +@pure +@inline_ref +fun atanh_f261(x: int, n: int): int { + return atanh_f261_inlined(x, n); +} + +/// returns (y, s) such that log(x) = y/2^257 + s*log(2) for positive integer x +/// (fixed257, int) log_aux(int x) +@pure +@inline_ref +fun log_aux_f257(x: int): (int, int) { + var s: int = log2_floor_p1(x); + x <<= 256 - s; + var t: int = stackMoveToTop(-1 << 256); + if ((x >> 249) <= 90) { + // t~stackMoveToTop(); + t >>= 1; + s -= 1; + } + x += t; + var `2x`: int = 2 * x; + var y: int = lshift256divr(`2x`, (x >> 1) - t); + // y = `2x` - (mulrshiftr256(2x, y) ~>> 2); // this line could improve precision on very rare occasions + return (atanh_f258(y, 36), s); +} + +/// computes 33^m for small m +@pure +@inline +fun pow33(m: int): int { + var t: int = 1; + repeat (m) { + t *= 33; + } + return t; +} + +/// computes 33^m for small 0<=m<=22 +/// slightly faster than pow33() +@pure +@inline +fun pow33b(m: int): int { + var (mh: int, ml: int) = divMod(m, 5); + var t: int = 1; + repeat (ml) { + t *= 33; + } + repeat (mh) { + t *= 33 * 33 * 33 * 33 * 33; + } + return t; +} + +/// returns (s, q, y) such that log(x) = s*log(2) + q*log(33/32) + y/2^260 for positive integer x +/// (int, int, fixed260) log_auxx_f260(int x); +@pure +@inline_ref +fun log_auxx_f260(x: int): (int, int, int) { + var s: int = log2_floor_p1(x) - 1; + x <<= 255 - s; // rescale to 1 <= x < 2 as fixed255 + var t: int = stackMoveToTop(2873) << 244; // ~ (33/32)^11 ~ sqrt(2) as fixed255 + var x1: int = (x - t) >> 1; + var q: int = mulDivRound(x1, 65, x1 + t) + 11; // crude approximation to round(log(x)/log(33/32)) + // t = 1; repeat (q) { t *= 33; } // t:=33^q, 0<=q<=22 + t = pow33b(q); + t <<= (51 - q) * 5; // t:=(33/32)^q as fixed255, nearest power of 33/32 to x + x -= t; + var y: int = lshift256divr(x << 4, (x >> 1) + t); // y = (x-t)/(x+t) as fixed261 + y = atanh_f261(y, 18); // atanh((x-t)/(x+t)) as fixed261, or log(x/t) as fixed260 + return (s, q, y); +} + +/// returns (y, s) such that log(x) = y/2^256 + s*log(2) for positive integer x +/// this function is very precise (error less than 0.6 ulp) and consumes < 7k gas +/// may be used to implement specific fixed-point instances of log() and log2() +/// (fixed256, int) log_aux_f256(int x); +@pure +@inline_ref +fun log_aux_f256(x: int): (int, int) { + var (s, q, y) = log_auxx_f260(x); + var (yh, yl) = rshiftr4mod(y); // y ~/% 16 , but Tolk does not optimize this to RSHIFTR#MOD + // int Log33_32 = 3563114646320977386603103333812068872452913448227778071188132859183498739150; // log(33/32) as fixed256 + // int Log33_32_l = -3769; // log(33/32) = Log33_32 / 2^256 + Log33_32_l / 2^269 + yh += (yl * 512 + q * -3769) ~>> 13; // compensation, may be removed if slightly worse accuracy is acceptable + var Log33_32: int = 3563114646320977386603103333812068872452913448227778071188132859183498739150; // log(33/32) as fixed256 + return (yh + q * Log33_32, s); +} + +/// returns (y, s) such that log2(x) = y/2^256 + s for positive integer x +/// this function is very precise (error less than 0.6 ulp) and consumes < 7k gas +/// may be used to implement specific fixed-point instances of log() and log2() +/// (fixed256, int) log2_aux_f256(int x); +@pure +@inline_ref +fun log2_aux_f256(x: int): (int, int) { + var (s, q, y) = log_auxx_f260(x); + y = lshift256divr(y, log2_const_f256()) ~>> 4; // y/log(2) as fixed256 + var Log33_32: int = 5140487830366106860412008603913034462883915832139695448455767612111363481357; // log_2(33/32) as fixed256 + // Log33_32/2^256 happens to be a very precise approximation to log_2(33/32), no compensation required + return (y + q * Log33_32, s); +} + + +/// fixed248 log(fixed248 x) +@pure +@inline_ref +fun fixed248_log(x: int): int { + var (y, s) = log_aux_f256(x); + return mulDivRound(s - 248, log2_const_f256(), 1 << 8) + (y ~>> 8); + // return mulDivRound(s - 248, 80260960185991308862233904206310070533990667611589946606122867505419956976172, 1 << 8) + (y ~>> 8); +} + +/// fixed248 log2(fixed248 x) +@pure +@inline +fun fixed248_log2(x: int): int { + var (y, s) = log2_aux_f256(x); + return ((s - 248) << 248) + (y ~>> 8); +} + +/// computes x^y as exp(y*log(x)), x >= 0 +/// fixed248 pow(fixed248 x, fixed248 y); +@pure +@inline_ref +fun fixed248_pow(x: int, y: int): int { + if (!y) { + return 1 << 248; // x^0 = 1 + } + if (x <= 0) { + var bad: int = (x | y) < 0; + return 0 >> bad; // 0^y = 0 if x=0 and y>=0; "out of range" exception otherwise + } + var (l, s) = log2_aux_f256(x); + s -= 248; // log_2(x) = s+l, l is fixed256, 0<=l<1 + // compute (s+l)*y = q+ll + var (q1, r1) = mulrshiftr248mod(s, y); // muldivmodr(s, y, 1 << 248) + var (q2, r2) = mulrshift256mod(l, y); + r2 >>= 247; + var (q3, r3) = rshiftr248mod(q2); // divmodr(q2, 1 << 248); + var (q, ll) = rshiftr248mod(r1 + r3); + ll = 512 * ll + r2; + q += q1 + q3; + // now log_2(x^y) = y*log_2(x) = q + ll, ss integer, ll fixed257, -1/2<=ll<1/2 + var sq: int = q + 248; + if (sq <= 0) { + return -(sq == 0); // underflow + } + y = expm1_f257(mulrshiftr256(ll, log2_const_f256())); + return (y ~>> (9 - q)) - (-1 << sq); +} + +/*-------------------- INVERSE TRIGONOMETRIC FUNCTIONS ------------------*/ + +/// number of terms n should be chosen as for atanh_f258() +/// fixed259 atan(fixed259 x); +@pure +@inline_ref +fun atan_f259(x: int, n: int): int { + var x2: int = mulrshiftr256(x, x); // x^2 as fixed262 + var One: int = (1 << 254); + var a: int = One ~/ n + (1 << 255); // a := 2 + 1/n as fixed254 + repeat (n - 1) { + // a := 1 + (1 + x^2 / a)(1 + 1/n) as fixed254 + var t: int = One + mulDivRound(x2, 1 << 246, a); // t := 1 + x^2 / a + var n1: int = n - 1; + a = mulDivRound(t, n, n1) + One; + n = n1; + } + // x / (1 + x^2 / a) = x / (1 + d) = x - x * d / (1 + d) = x - x * x^2/(a+x^2) for d = x^2 / a + return x - mulDivRound(x, x2, a + x2 ~/ 256) ~/ 256; +} + +/// number of terms n should be chosen as for atanh_f261() +/// fixed261 atan(fixed261 x); +@pure +@inline +fun atan_f261_inlined(x: int, n: int): int { + var x2: int = mulrshiftr256(x, x); // x^2 as fixed266 + var One: int = (1 << 254); + var a: int = One ~/ n + (1 << 255); // a := 2 + 1/n as fixed254 + repeat (n - 1) { + // a := 1 + (1 + x^2 / a)(1 + 1/n) as fixed254 + var t: int = One + mulDivRound(x2, 1 << 242, a); // t := 1 + x^2 / a + var n1: int = n - 1; + a = mulDivRound(t, n, n1) + One; + n = n1; + } + // x / (1 + x^2 / a) = x / (1 + d) = x - x * d / (1 + d) = x - x * x^2/(a+x^2) for d = x^2 / a + return x - mulDivRound(x, x2, a + x2 ~/ 4096) ~/ 4096; +} + +/// fixed261 atan(fixed261 x); +@pure +@inline_ref +fun atan_f261(x: int, n: int): int { + return atan_f261_inlined(x, n); +} + +/// computes (q,a,b) such that q is approximately atan(x)/atan(1/32) and a+b*I=(1+I/32)^q as fixed255 +/// then b/a=atan(q*atan(1/32)) exactly, and (a,b) is almost a unit vector pointing in the direction of (1,x) +/// must have |x|<1.1, x is fixed24 +/// (int, fixed255, fixed255) atan_aux_prereduce(fixed24 x); +@pure +@inline_ref +fun atan_aux_prereduce(x: int): (int, int, int) { + var xu: int = abs(x); + var tc: int = 7214596; // tan(13*theta) as fixed24 where theta=atan(1/32) + var t1: int = mulDivRound(xu - tc, 1 << 88, xu * tc + (1 << 48)); // tan(x') as fixed64 where x'=atan(x)-13*theta + // t1/(3+t1^2) * 3073/32 = x'/3 * 3072/32 = x' / (96/3072) = x' / theta + var q: int = mulDivRound(t1 * 3073, 1 << 59, t1 * t1 + (stackMoveToTop(3) << 128)) + 13; // approximately round(atan(x)/theta), 0<=q<=25 + var (pa, pb) = (33226912, 5232641); // (32+I)^5 + var (qh, ql) = divMod(q, 5); + var (a, b) = (1 << (5 * (51 - q)), 0); // (1/32^q, 0) as fixed255 + repeat (ql) { + // a+b*I *= 32+I + (a, b) = (sub_rev(stackMoveToTop(b), 32 * a), a + 32 * b); // same as (32 * a - b, 32 * b + a), but more efficient + } + repeat (qh) { + // a+b*I *= (32+I)^5 = pa + pb*I + (a, b) = (a * pa - b * pb, a * pb + b * pa); + } + var xs: int = sign(x); + return (xs * q, a, xs * b); +} + +/// compute (q, z) such that atan(x)=q*atan(1/32)+z for -1 <= x < 1 +/// this function is reasonably accurate (error < 7 ulp with ulp = 2^-261), but it consumes >7k gas +/// this is sufficient for most purposes +/// (int, fixed261) atan_aux(fixed256 x) +@pure +@inline_ref +fun atan_aux_f256(x: int): (int, int) { + var (q, a, b) = atan_aux_prereduce(x ~>> 232); // convert x to fixed24 + // now b/a = tan(q*atan(1/32)) exactly, where q is near atan(x)/atan(1/32); so b/a is near x + // compute y = u/v = (a*x-b)/(a+b*x) as fixed261 ; then |y|<0.0167 = 1.07/64 and atan(x)=atan(y)+q*atan(1/32) + var (u, ul) = mulrshiftr256mod(a, x); + u = (ul ~>> 250) + ((u - b) << 6); // |u| < 1/32, convert fixed255 -> fixed261 + var v: int = a + mulrshiftr256(b, x); // v is scalar product of (a,b) and (1,x), it is approximately in [1..sqrt(2)] as fixed255 + var y: int = mulDivRound(u, 1 << 255, v); // y = u/v as fixed261 + var z: int = atan_f261_inlined(y, 18); // z = atan(x)-q*atan(1/32) + return (q, z); +} + +/// compute (q, z) such that atan(x)=q*atan(1/32)+z for -1 <= x < 1 +/// this function is very accurate (error < 2 ulp), but it consumes >7k gas +/// in most cases, faster function atan_aux_f256() should be used +/// (int, fixed261) atan_auxx(fixed256 x) +@pure +@inline_ref +fun atan_auxx_f256(x: int): (int, int) { + var (q, a, b) = atan_aux_prereduce(x ~>> 232); // convert x to fixed24 + // now b/a = tan(q*atan(1/32)) exactly, where q is near atan(x)/atan(1/32); so b/a is near x + // compute y = (a*x-b)/(a+b*x) as fixed261 ; then |y|<0.0167 = 1.07/64 and atan(x)=atan(y)+q*atan(1/32) + // use sort of double precision arithmetic for this + var (u, ul) = mulrshiftr256mod(a, x); + ul /= 2; + u -= b; // |u| < 1/32 as fixed255 + var (v, vl) = mulrshiftr256mod(b, x); + vl /= 2; + v += a; // v is scalar product of (a,b) and (1,x), it is approximately in [1..sqrt(2)] as fixed255 + // y = (u + ul*eps) / (v + vl*eps) = u/v + (ul - vl * u/v)/v * eps where eps=1/2^255 + var (y, r) = lshift255divmodr(u, v); // y = u/v as fixed255 + var yl: int = mulDivRound(ul + r, 1 << 255, v) - mulDivRound(vl, y, v); // y/2^255 + yl/2^510 represent u/v + y = (yl ~>> 249) + (y << 6); // convert y to fixed261 + var z: int = atan_f261_inlined(y, 18); // z = atan(x)-q*atan(1/32) + return (q, z); +} + +/// consumes ~ 8k gas +/// fixed255 atan(fixed255 x); +@pure +@inline_ref +fun atan_f255(x: int): int { + var s: int = (x ~>> 256); + stackMoveToTop(x); + if (s) { + x = lshift256divr(-1 << 255, x); // x:=-1/x as fixed256 + } else { + x *= 2; // convert to fixed256 + } + var (q, z) = atan_aux_f256(x); + // now atan(x) = z + q*atan(1/32) + s*(Pi/2), z is fixed261 + var (Pi_h, Pi_l) = Pi_xconst_f254(); // Pi/2 as fixed255 + fixed383 + var (qh, ql) = mulrshiftr6mod(q, Atan1_32_f261()); + return qh + s * Pi_h + (z + ql + mulDivRound(s, Pi_l, 1 << 122)) ~/ 64; +} + +/// computes atan(x) for -1 <= x < 1 only +/// fixed256 atan_small(fixed256 x); +@pure +@inline_ref +fun atan_f256_small(x: int): int { + var (q, z) = atan_aux_f256(x); + // now atan(x) = z + q*atan(1/32), z is fixed261 + var (qh, ql) = mulrshiftr5mod(q, Atan1_32_f261()); + return qh + (z + ql) ~/ 32; +} + +/// fixed255 asin(fixed255 x); +@pure +@inline_ref +fun asin_f255(x: int): int { + var a: int = fixed255_One - fixed255_sqr(x); // a:=1-x^2 + if (!a) { + return sign(x) * Pi_const_f254(); // Pi/2 or -Pi/2 + } + var y: int = fixed255_sqrt(a); // sqrt(1-x^2) + var t: int = -lshift256divr(x, (-1 << 255) - y); // t = x/(1+sqrt(1-x^2)) avoiding overflow + return atan_f256_small(t); // asin(x)=2*atan(t) +} + +/// fixed254 acos(fixed255 x); +@pure +@inline_ref +fun acos_f255(x: int): int { + var Pi: int = Pi_const_f254(); + if (x == (-1 << 255)) { + return Pi; // acos(-1) = Pi + } + Pi /= 2; + var y: int = fixed255_sqrt(fixed255_One - fixed255_sqr(x)); // sqrt(1-x^2) + var t: int = lshift256divr(x, (-1 << 255) - y); // t = -x/(1+sqrt(1-x^2)) avoiding overflow + return Pi + atan_f256_small(t) ~/ 2; // acos(x)=Pi/2 + 2*atan(t) +} + +/// consumes ~ 10k gas +/// fixed248 asin(fixed248 x) +@pure +@inline +fun fixed248_asin(x: int): int { + return asin_f255(x << 7) ~>> 7; +} + +/// consumes ~ 10k gas +/// fixed248 acos(fixed248 x) +@pure +@inline +fun fixed248_acos(x: int): int { + return acos_f255(x << 7) ~>> 6; +} + +/// consumes ~ 7500 gas +/// fixed248 atan(fixed248 x); +@pure +@inline_ref +fun fixed248_atan(x: int): int { + var s: int = (x ~>> 249); + stackMoveToTop(x); + if (s) { + s = sign(s); + x = lshift256divr(-1 << 248, x); // x:=-1/x as fixed256 + } else { + x <<= 8; // convert to fixed256 + } + var (q, z) = atan_aux_f256(x); + // now atan(x) = z + q*atan(1/32) + s*(Pi/2), z is fixed261 + return (z ~/ 64 + s * Pi_const_f254() + mulDivRound(q, Atan1_32_f261(), 64)) ~/ 128; // compute in fixed255, then convert +} + +/// fixed248 acot(fixed248 x); +@pure +@inline_ref +fun fixed248_acot(x: int): int { + var s: int = (x ~>> 249); + stackMoveToTop(x); + if (s) { + x = lshift256divr(-1 << 248, x); // x:=-1/x as fixed256 + s = 0; + } else { + x <<= 8; // convert to fixed256 + s = sign(x); + } + var (q, z) = atan_aux_f256(x); + // now acot(x) = - z - q*atan(1/32) + s*(Pi/2), z is fixed261 + return (s * Pi_const_f254() - z ~/ 64 - mulDivRound(q, Atan1_32_f261(), 64)) ~/ 128; // compute in fixed255, then convert +} + +/*-------------------- PSEUDO-RANDOM NUMBERS ------------------*/ + +/// random number with standard normal distribution N(0,1) +/// generated by Kinderman--Monahan ratio method modified by J.Leva +/// spends ~ 2k..3k gas on average +/// fixed252 nrand(); +@inline_ref +fun nrand_f252(): int { + var (x, s, t, A, B, r0) = (nan(), stackMoveToTop(29483) << 236, stackMoveToTop(-3167) << 239, 12845, 16693, 9043); + // 4/sqrt(e*Pi) = 1.369 loop iterations on average + do { + var (u, v) = (random() / 16 + 1, mulDivRound(random() - (1 << 255), 7027, 1 << 16)); // fixed252; 7027=ceil(sqrt(8/e)*2^12) + var va: int = abs(v); + var (u1, v1) = (u - s, va - t); // (u - 29483/2^16, abs(v) + 3167/2^13) as fixed252 + // Q := u1^2 + v1 * (A*v1 - B*u1) as fixed252 where A=12845/2^16, B=16693/2^16 + var Q: int = mulDivRound(u1, u1, 1 << 252) + mulDivRound(v1, mulDivRound(v1, A, 1 << 16) - mulDivRound(u1, B, 1 << 16), 1 << 252); + // must have 9043 / 2^15 < Q < 9125 / 2^15, otherwise accept if smaller, reject if larger + var Qd: int = (Q >> 237) - r0; + if ((Qd < 9125 - 9043) & (va / u < 16)) { + x = mulDivRound(v, 1 << 252, u); // x:=v/u as fixed252; reject immediately if |v/u| >= 16 + if (Qd >= 0) { + // immediately accept if Qd < 0 + // rarely taken branch - 0.012 times per call on average + // check condition v^2 < -4*u^2*log(u), or equivalent condition u < exp(-x^2/4) for x=v/u + var xx: int = mulrshiftr256(x, x) ~/ 4; // x^2/4 as fixed248 + var ex: int = fixed248_exp(-xx) * 16; // exp(-x^2/4) as fixed252 + if (u > ex) { + x = nan(); // condition false, reject + } + } + } + } while (!(~ is_nan(x))); + return x; +} + +/// generates a random number approximately distributed according to the standard normal distribution +/// much faster than nrand_f252(), should be suitable for most purposes when only several random numbers are needed +/// fixed252 nrand_fast(); +@inline_ref +fun nrand_fast_f252(): int { + var t: int = stackMoveToTop(-3) << 253; // -6. as fixed252 + repeat (12) { + t += random() / 16; // add together 12 uniformly random numbers + } + return t; +} + +/// random number uniformly distributed in [0..1) +/// fixed248 random(); +@inline +fun fixed248_random(): int { + return random() >> 8; +} + +/// random number with standard normal distribution +/// fixed248 nrand(); +@inline +fun fixed248_nrand(): int { + return nrand_f252() ~>> 4; +} + +/// generates a random number approximately distributed according to the standard normal distribution +/// fixed248 nrand_fast(); +@inline +fun fixed248_nrand_fast(): int { + return nrand_fast_f252() ~>> 4; +} @pure fun ~tset(t: tuple, idx: int, value: X): (tuple, ()) @@ -11,8 +989,8 @@ fun acos_prepare_slow_f255(x: int): int { x -= (x == 0); var t: int = 1; repeat (255) { - t = t * sgn(x) * 2 + 1; // decode Gray code (sgn(x_0), sgn(x_1), ...) - x = (-1 << 255) - muldivr(x, - x, 1 << 254); // iterate x := 2*x^2 - 1 = cos(2*acos(x)) + t = t * sign(x) * 2 + 1; // decode Gray code (sign(x_0), sign(x_1), ...) + x = (-1 << 255) - mulDivRound(x, - x, 1 << 254); // iterate x := 2*x^2 - 1 = cos(2*acos(x)) } return abs(t); } @@ -29,19 +1007,19 @@ fun acos_slow_f255(x: int): int { @inline_ref fun asin_slow_f255(x: int): int { var t: int = acos_prepare_slow_f255(abs(x)) % (1 << 255); - return muldivr(t, Pi_const_f254(), 1 << 255) * sgn(x); + return mulDivRound(t, Pi_const_f254(), 1 << 255) * sign(x); } @inline_ref fun test_nrand(n: int): tuple { - var t: tuple = empty_tuple(); + var t: tuple = createEmptyTuple(); repeat (255) { - t~tpush(0); + t~tuplePush(0); } repeat (n) { var x: int = fixed248_nrand(); var bucket: int = (abs(x) >> 243); // 255 buckets starting from x=0, each 1/32 wide - t~tset(bucket, t.at(bucket) + 1); + t~tset(bucket, t.tupleAt(bucket) + 1); } return t; } @@ -186,15 +1164,15 @@ fun main() { // return sincosn_f256(Pi_const_f254(), 0); // (sin,-cos)(Pi/4) // return sincosn_f256((1 << 255) + 1, 0); // (sin,-cos)(1/2+1/2^256) // return sincosn_f256(1 << 254, 0); - // return sincosn_f256(touch(15) << 252, 0); // (sin,-cos)(15/16) - // return sincosm1_f256(touch(15) << 252); // (sin,1-cos)(15/16) + // return sincosn_f256(stackMoveToTop(15) << 252, 0); // (sin,-cos)(15/16) + // return sincosm1_f256(stackMoveToTop(15) << 252); // (sin,1-cos)(15/16) // return sincosn_f256(60628596148627720713372490462954977108898896221398738326462025186323149077698, 0); // (sin,-cos)(Pi/6) // return sincosm1_f256(60628596148627720713372490462954977108898896221398738326462025186323149077698); // (sin,1-cos)(Pi/6) // return tan_aux_f256(1899 << 245); // (p,q) such that p/q=tan(1899/2048) // return fixed248_tan(11 << 248); // tan(11) // return atanh_alt_f258(1 << 252); // atanh(1/64) * 2^258 // return atanh_f258(1 << 252, 18); // atanh(1/64) * 2^258 - // return atanh_f261(muldivr(64, 1 << 255, 55), 18); // atanh(1/55) * 2^261 + // return atanh_f261(mulDivRound(64, 1 << 255, 55), 18); // atanh(1/55) * 2^261 // return log2_aux_f256(1 << 255); // return log2_aux_f256(-1 - (-1 << 256)); // log2(2-1/2^255))*2^256 ~ 2^256 - 1.43 // return log_aux_f256(-1 - (-1 << 256)); @@ -213,10 +1191,10 @@ fun main() { // return fixed248_exp2((1 << 248) ~/ 5); // 2^(1/5)*2^248 // return fixed248_pow(3 << 248, -3 << 247); // 3^(-1.5) // return fixed248_pow(10 << 248, -70 << 248); // 10^(-70) - // return fixed248_pow(fixed248_Pi_const(), touch(3) << 248); // Pi^3 ~ 31.006, computed more precisely + // return fixed248_pow(fixed248_Pi_const(), stackMoveToTop(3) << 248); // Pi^3 ~ 31.006, computed more precisely // return fixed248_pow(fixed248_Pi_const(), fixed248_Pi_const()); // Pi^Pi, more precisely // return fixed248_exp(fixed248_log(fixed248_Pi_const()) * 3); // Pi^3 ~ 31.006 - // return fixed248_exp(muldivr(fixed248_log(fixed248_Pi_const()), fixed248_Pi_const(), 1 << 248)); // Pi^Pi + // return fixed248_exp(mulDivRound(fixed248_log(fixed248_Pi_const()), fixed248_Pi_const(), 1 << 248)); // Pi^Pi // return fixed248_sin(fixed248_log(fixed248_exp(fixed248_Pi_const()))); // sin(log(e^Pi)) // return expm1_f257(1 << 255); // (exp(1/4)-1)*2^256 // return expm1_f257(-1 << 256); // (exp(-1/2)-1)*2^256 (argument out of range, will overflow) @@ -225,21 +1203,21 @@ fun main() { // return tanh_f258(log2_const_f256(), 17); // tanh(log(2)/4)*2^258 // return atan_f255(0xa0 << 247); // return atan_f259(1 << 255, 26); // atan(1/16) - // return atan_f259(touch(2273) << 244, 26); // atan(2273/2^15) + // return atan_f259(stackMoveToTop(2273) << 244, 26); // atan(2273/2^15) // return atan_aux_f256(0xa0 << 248); // return atan_aux_f256(-1 - (-1 << 256)); // return atan_aux_f256(-1 << 256); // return atan_aux_f256(1); // atan(1/2^256)*2^261 = 32 //return fixed248_nrand(); // return test_nrand(100000); - var One2: int = touch(1 << 255); + var One2: int = stackMoveToTop(1 << 255); // return asin_f255(One); // return asin_f255(-2 * One ~/ -3); - var arg: int = muldivr(12, One2, 17); // 12/17 + var arg: int = mulDivRound(12, One2, 17); // 12/17 // return [ asin_slow_f255(arg), asin_f255(arg) ]; // return [ acos_slow_f255(arg), acos_f255(arg) ]; // return 4 * atan_f255(One ~/ 5) - atan_f255(One ~/ 239); // 4 * atan(1/5) - atan(1/239) = Pi/4 as fixed255 - var One3: int = touch(1 << 248); + var One3: int = stackMoveToTop(1 << 248); // return fixed248_atan(One) ~/ 5); // atan(1/5) // return fixed248_acot(One ~/ 239); // atan(1/5) } diff --git a/tolk-tester/tests/use-before-declare.tolk b/tolk-tester/tests/use-before-declare.tolk index 84569c0cf..2486df1c3 100644 --- a/tolk-tester/tests/use-before-declare.tolk +++ b/tolk-tester/tests/use-before-declare.tolk @@ -1,7 +1,7 @@ fun main(): int { - var c: cell = my_begin_cell().store_int(demo_10, 32).my_end_cell(); + var c: cell = my_begin_cell().storeInt(demo_10, 32).my_end_cell(); var cs: slice = my_begin_parse(c); - var ten: int = cs~load_int(32); + var ten: int = cs~loadInt(32); return 1 + demo1(ten) + demo_var; } diff --git a/tolk-tester/tests/w2.tolk b/tolk-tester/tests/w2.tolk index b013ab06d..24820f143 100644 --- a/tolk-tester/tests/w2.tolk +++ b/tolk-tester/tests/w2.tolk @@ -1,6 +1,6 @@ @method_id(101) fun test1(cs: slice) { - return cs~load_uint(8)+cs~load_uint(8)+cs~load_uint(8)+cs~load_uint(8); + return cs~loadUint(8)+cs~loadUint(8)+cs~loadUint(8)+cs~loadUint(8); } @method_id(102) @@ -12,15 +12,15 @@ fun test2(cs: slice) { } fun main(cs: slice) { - return (cs~load_uint(8), cs~load_uint(8), cs~load_uint(8), cs~load_uint(8)); + return (cs~loadUint(8), cs~loadUint(8), cs~loadUint(8), cs~loadUint(8)); } fun f(cs: slice) { - return (cs~load_uint(8), cs~load_uint(8), cs~load_uint(8), cs~load_uint(8), - cs~load_uint(8), cs~load_uint(8), cs~load_uint(8), cs~load_uint(8), - cs~load_uint(8), cs~load_uint(8), cs~load_uint(8), cs~load_uint(8), - cs~load_uint(8), cs~load_uint(8), cs~load_uint(8), cs~load_uint(8), - cs~load_uint(8), cs~load_uint(8), cs~load_uint(8), cs~load_uint(8)); + return (cs~loadUint(8), cs~loadUint(8), cs~loadUint(8), cs~loadUint(8), + cs~loadUint(8), cs~loadUint(8), cs~loadUint(8), cs~loadUint(8), + cs~loadUint(8), cs~loadUint(8), cs~loadUint(8), cs~loadUint(8), + cs~loadUint(8), cs~loadUint(8), cs~loadUint(8), cs~loadUint(8), + cs~loadUint(8), cs~loadUint(8), cs~loadUint(8), cs~loadUint(8)); } diff --git a/tolk-tester/tolk-tester.js b/tolk-tester/tolk-tester.js index 3fb92ff08..2a3eb776a 100644 --- a/tolk-tester/tolk-tester.js +++ b/tolk-tester/tolk-tester.js @@ -27,6 +27,7 @@ const TOLKFIFTLIB_MODULE = getenv('TOLKFIFTLIB_MODULE') const TOLKFIFTLIB_WASM = getenv('TOLKFIFTLIB_WASM') const FIFT_EXECUTABLE = getenv('FIFT_EXECUTABLE') const FIFT_LIBS_FOLDER = getenv('FIFTPATH') // this env is needed for fift to work properly +const STDLIB_FOLDER = __dirname + '/../crypto/smartcont/tolk-stdlib' const TMP_DIR = os.tmpdir() class CmdLineOptions { @@ -475,25 +476,33 @@ function copyToCStringPtr(mod, str, ptr) { return allocated; } +/** @return {string} */ function copyFromCString(mod, ptr) { return mod.UTF8ToString(ptr); } /** @return {{status: string, message: string, fiftCode: string, codeBoc: string, codeHashHex: string}} */ function compileFile(mod, filename, experimentalOptions) { - // see tolk-wasm.cpp: typedef void (*CStyleReadFileCallback)(int, char const*, char**, char**) + // see tolk-wasm.cpp: typedef void (*WasmFsReadCallback)(int, char const*, char**, char**) const callbackPtr = mod.addFunction((kind, dataPtr, destContents, destError) => { if (kind === 0) { // realpath try { - const relativeFilename = copyFromCString(mod, dataPtr) - copyToCStringPtr(mod, fs.realpathSync(relativeFilename), destContents); + let relative = copyFromCString(mod, dataPtr) + if (relative.startsWith('@stdlib/')) { + // import "@stdlib/filename" or import "@stdlib/filename.tolk" + relative = STDLIB_FOLDER + '/' + relative.substring(7) + if (!relative.endsWith('.tolk')) { + relative += '.tolk' + } + } + copyToCStringPtr(mod, fs.realpathSync(relative), destContents); } catch (err) { copyToCStringPtr(mod, 'cannot find file', destError); } } else if (kind === 1) { // read file try { - const filename = copyFromCString(mod, dataPtr) // already normalized (as returned above) - copyToCStringPtr(mod, fs.readFileSync(filename).toString('utf-8'), destContents); + const absolute = copyFromCString(mod, dataPtr) // already normalized (as returned above) + copyToCStringPtr(mod, fs.readFileSync(absolute).toString('utf-8'), destContents); } catch (err) { copyToCStringPtr(mod, err.message || err.toString(), destError); } @@ -506,7 +515,6 @@ function compileFile(mod, filename, experimentalOptions) { optimizationLevel: 2, withStackComments: true, experimentalOptions: experimentalOptions || undefined, - stdlibLocation: __dirname + '/../crypto/smartcont/stdlib.tolk', entrypointFileName: filename }; diff --git a/tolk/CMakeLists.txt b/tolk/CMakeLists.txt index 245021c3b..09e02c0ac 100644 --- a/tolk/CMakeLists.txt +++ b/tolk/CMakeLists.txt @@ -35,12 +35,6 @@ if (${TOLK_DEBUG}) # -DTOLK_DEBUG=1 in CMake options => #define TOLK_DEBUG (for target_compile_definitions(tolk PRIVATE TOLK_DEBUG=1) endif() -if (NOT USE_EMSCRIPTEN) - get_filename_component(STDLIB_TOLK_IF_BUILD_FROM_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/../crypto/smartcont/stdlib.tolk" REALPATH) - target_compile_definitions(tolk PRIVATE STDLIB_TOLK_IF_BUILD_FROM_SOURCES="${STDLIB_TOLK_IF_BUILD_FROM_SOURCES}") -endif() - - if (USE_EMSCRIPTEN) add_executable(tolkfiftlib tolk-wasm.cpp ${TOLK_SOURCE}) target_include_directories(tolkfiftlib PUBLIC $) diff --git a/tolk/builtins.cpp b/tolk/builtins.cpp index 52144d419..a123b0a85 100644 --- a/tolk/builtins.cpp +++ b/tolk/builtins.cpp @@ -1121,10 +1121,6 @@ void define_builtins() { define_builtin_func("_~/_", arith_bin_op, std::bind(compile_div, _1, _2, _3, 0)); define_builtin_func("_^/_", arith_bin_op, std::bind(compile_div, _1, _2, _3, 1)); define_builtin_func("_%_", arith_bin_op, std::bind(compile_mod, _1, _2, _3, -1)); - define_builtin_func("divmod", TypeExpr::new_map(Int2, Int2), AsmOp::Custom("DIVMOD", 2, 2)); - define_builtin_func("~divmod", TypeExpr::new_map(Int2, Int2), AsmOp::Custom("DIVMOD", 2, 2)); - define_builtin_func("moddiv", TypeExpr::new_map(Int2, Int2), AsmOp::Custom("DIVMOD", 2, 2), {}, {1, 0}); - define_builtin_func("~moddiv", TypeExpr::new_map(Int2, Int2), AsmOp::Custom("DIVMOD", 2, 2), {}, {1, 0}); define_builtin_func("_<<_", arith_bin_op, compile_lshift); define_builtin_func("_>>_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, -1)); define_builtin_func("_~>>_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, 0)); @@ -1144,10 +1140,10 @@ void define_builtins() { define_builtin_func("^_&=_", arith_bin_op, compile_bitwise_and); define_builtin_func("^_|=_", arith_bin_op, compile_bitwise_or); define_builtin_func("^_^=_", arith_bin_op, compile_bitwise_xor); - define_builtin_func("muldiv", TypeExpr::new_map(Int3, Int), std::bind(compile_muldiv, _1, _2, _3, -1)); - define_builtin_func("muldivr", TypeExpr::new_map(Int3, Int), std::bind(compile_muldiv, _1, _2, _3, 0)); - define_builtin_func("muldivc", TypeExpr::new_map(Int3, Int), std::bind(compile_muldiv, _1, _2, _3, 1)); - define_builtin_func("muldivmod", TypeExpr::new_map(Int3, Int2), AsmOp::Custom("MULDIVMOD", 3, 2)); + define_builtin_func("mulDivFloor", TypeExpr::new_map(Int3, Int), std::bind(compile_muldiv, _1, _2, _3, -1)); + define_builtin_func("mulDivRound", TypeExpr::new_map(Int3, Int), std::bind(compile_muldiv, _1, _2, _3, 0)); + define_builtin_func("mulDivCeil", TypeExpr::new_map(Int3, Int), std::bind(compile_muldiv, _1, _2, _3, 1)); + define_builtin_func("mulDivMod", TypeExpr::new_map(Int3, Int2), AsmOp::Custom("MULDIVMOD", 3, 2)); define_builtin_func("_==_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 2)); define_builtin_func("_!=_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 5)); define_builtin_func("_<_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 4)); @@ -1162,24 +1158,23 @@ void define_builtins() { define_builtin_func("__throw", impure_un_op, compile_throw, true); define_builtin_func("__throw_arg", throw_arg_op, compile_throw_arg, true); define_builtin_func("__throw_if_unless", TypeExpr::new_map(Int3, Unit), std::bind(compile_throw_if_unless, _1, _2), true); - define_builtin_func("load_int", fetch_int_op, std::bind(compile_fetch_int, _1, _2, true, true), {}, {1, 0}); - define_builtin_func("load_uint", fetch_int_op, std::bind(compile_fetch_int, _1, _2, true, false), {}, {1, 0}); - define_builtin_func("preload_int", prefetch_int_op, std::bind(compile_fetch_int, _1, _2, false, true)); - define_builtin_func("preload_uint", prefetch_int_op, std::bind(compile_fetch_int, _1, _2, false, false)); - define_builtin_func("store_int", store_int_op, std::bind(compile_store_int, _1, _2, true), {1, 0, 2}); - define_builtin_func("store_uint", store_int_op, std::bind(compile_store_int, _1, _2, false), {1, 0, 2}); - define_builtin_func("~store_int", store_int_method, std::bind(compile_store_int, _1, _2, true), {1, 0, 2}); - define_builtin_func("~store_uint", store_int_method, std::bind(compile_store_int, _1, _2, false), {1, 0, 2}); - define_builtin_func("load_bits", fetch_slice_op, std::bind(compile_fetch_slice, _1, _2, true), {}, {1, 0}); - define_builtin_func("preload_bits", prefetch_slice_op, std::bind(compile_fetch_slice, _1, _2, false)); - define_builtin_func("at", TypeExpr::new_forall({X}, TypeExpr::new_map(TupleInt, X)), compile_tuple_at); - define_builtin_func("touch", TypeExpr::new_forall({X}, TypeExpr::new_map(X, X)), AsmOp::Nop()); - define_builtin_func("~touch", TypeExpr::new_forall({X}, TypeExpr::new_map(X, TypeExpr::new_tensor({X, Unit}))), - AsmOp::Nop()); - define_builtin_func("~dump", TypeExpr::new_forall({X}, TypeExpr::new_map(X, TypeExpr::new_tensor({X, Unit}))), - AsmOp::Custom("s0 DUMP", 1, 1), true); - define_builtin_func("~strdump", TypeExpr::new_forall({X}, TypeExpr::new_map(X, TypeExpr::new_tensor({X, Unit}))), - AsmOp::Custom("STRDUMP", 1, 1), true); + define_builtin_func("loadInt", fetch_int_op, std::bind(compile_fetch_int, _1, _2, true, true), {}, {1, 0}); + define_builtin_func("loadUint", fetch_int_op, std::bind(compile_fetch_int, _1, _2, true, false), {}, {1, 0}); + define_builtin_func("loadBits", fetch_slice_op, std::bind(compile_fetch_slice, _1, _2, true), {}, {1, 0}); + define_builtin_func("preloadInt", prefetch_int_op, std::bind(compile_fetch_int, _1, _2, false, true)); + define_builtin_func("preloadUint", prefetch_int_op, std::bind(compile_fetch_int, _1, _2, false, false)); + define_builtin_func("preloadBits", prefetch_slice_op, std::bind(compile_fetch_slice, _1, _2, false)); + define_builtin_func("storeInt", store_int_op, std::bind(compile_store_int, _1, _2, true), {1, 0, 2}); + define_builtin_func("storeUint", store_int_op, std::bind(compile_store_int, _1, _2, false), {1, 0, 2}); + define_builtin_func("~storeInt", store_int_method, std::bind(compile_store_int, _1, _2, true), {1, 0, 2}); + define_builtin_func("~storeUint", store_int_method, std::bind(compile_store_int, _1, _2, false), {1, 0, 2}); + define_builtin_func("tupleAt", TypeExpr::new_forall({X}, TypeExpr::new_map(TupleInt, X)), compile_tuple_at); + define_builtin_func("debugPrint", TypeExpr::new_forall({X}, TypeExpr::new_map(X, Unit)), + AsmOp::Custom("s0 DUMP DROP", 1, 1), true); + define_builtin_func("debugPrintString", TypeExpr::new_forall({X}, TypeExpr::new_map(X, Unit)), + AsmOp::Custom("STRDUMP DROP", 1, 1), true); + define_builtin_func("debugDumpStack", TypeExpr::new_map(Unit, Unit), + AsmOp::Custom("DUMPSTK", 0, 0), true); } } // namespace tolk diff --git a/tolk/compiler-state.h b/tolk/compiler-state.h index 23df230b6..aec1945e2 100644 --- a/tolk/compiler-state.h +++ b/tolk/compiler-state.h @@ -52,10 +52,9 @@ struct CompilerSettings { int optimization_level = 2; bool stack_layout_comments = true; - std::string entrypoint_filename; std::string output_filename; std::string boc_output_filename; - std::string stdlib_filename; + std::string stdlib_folder; // a path to tolk-stdlib/; files imported via @stdlib/xxx are there FsReadCallback read_callback; @@ -82,8 +81,6 @@ struct CompilerState { std::vector all_code_functions, all_global_vars, all_get_methods, all_constants; AllRegisteredSrcFiles all_src_files; - std::string generated_from; - bool is_verbosity(int gt_eq) const { return settings.verbosity >= gt_eq; } }; diff --git a/tolk/pipe-ast-to-legacy.cpp b/tolk/pipe-ast-to-legacy.cpp index 539652c06..3e713e726 100644 --- a/tolk/pipe-ast-to-legacy.cpp +++ b/tolk/pipe-ast-to-legacy.cpp @@ -979,12 +979,6 @@ void pipeline_convert_ast_to_legacy_Expr_Op(const AllSrcFiles& all_src_files) { for (const SrcFile* file : all_src_files) { tolk_assert(file->ast); - if (!file->is_stdlib_file()) { - // file->ast->debug_print(); - G.generated_from += file->rel_filename; - G.generated_from += ", "; - } - for (AnyV v : file->ast->as()->get_toplevel_declarations()) { if (auto v_func = v->try_as()) { if (v_func->is_asm_function()) { diff --git a/tolk/pipe-discover-parse-sources.cpp b/tolk/pipe-discover-parse-sources.cpp index c57c9c1d4..a8445ae95 100644 --- a/tolk/pipe-discover-parse-sources.cpp +++ b/tolk/pipe-discover-parse-sources.cpp @@ -42,10 +42,11 @@ AllSrcFiles pipeline_discover_and_parse_sources(const std::string& stdlib_filena for (AnyV v_toplevel : file->ast->as()->get_toplevel_declarations()) { if (auto v_import = v_toplevel->try_as()) { - size_t pos = file->rel_filename.rfind('/'); - std::string rel_filename = pos == std::string::npos - ? v_import->get_file_name() - : file->rel_filename.substr(0, pos + 1) + v_import->get_file_name(); + std::string imported_str = v_import->get_file_name(); + size_t cur_slash_pos = file->rel_filename.rfind('/'); + std::string rel_filename = cur_slash_pos == std::string::npos || imported_str[0] == '@' + ? std::move(imported_str) + : file->rel_filename.substr(0, cur_slash_pos + 1) + imported_str; SrcFile* imported = G.all_src_files.locate_and_register_source_file(rel_filename, v_import->loc); file->imports.push_back(SrcFile::ImportStatement{imported}); diff --git a/tolk/pipe-generate-fif-output.cpp b/tolk/pipe-generate-fif-output.cpp index 627b510f7..65225d828 100644 --- a/tolk/pipe-generate-fif-output.cpp +++ b/tolk/pipe-generate-fif-output.cpp @@ -132,9 +132,20 @@ static void generate_output_func(SymDef* func_sym) { } } -void pipeline_generate_fif_output_to_std_cout() { +void pipeline_generate_fif_output_to_std_cout(const AllSrcFiles& all_src_files) { std::cout << "\"Asm.fif\" include\n"; - std::cout << "// automatically generated from " << G.generated_from << std::endl; + std::cout << "// automatically generated from "; + bool need_comma = false; + for (const SrcFile* file : all_src_files) { + if (!file->is_stdlib_file()) { + if (need_comma) { + std::cout << ", "; + } + std::cout << file->rel_filename; + need_comma = true; + } + } + std::cout << std::endl; std::cout << "PROGRAM{\n"; bool has_main_procedure = false; diff --git a/tolk/pipe-register-symbols.cpp b/tolk/pipe-register-symbols.cpp index c84474f80..2e6d26dd8 100644 --- a/tolk/pipe-register-symbols.cpp +++ b/tolk/pipe-register-symbols.cpp @@ -296,7 +296,7 @@ static void register_function(V v) { v->error("`builtin` used for non-builtin function"); } #ifdef TOLK_DEBUG - // in release, we don't need this check, since `builtin` is used only in stdlib.tolk, which is our responsibility + // in release, we don't need this check, since `builtin` is used only in stdlib, which is our responsibility if (!func_val->sym_type->equals_to(func_type) || func_val->is_marked_as_pure() != v->marked_as_pure) { v->error("declaration for `builtin` function doesn't match an actual one"); } diff --git a/tolk/pipeline.h b/tolk/pipeline.h index 1330c97a4..fdfd2b996 100644 --- a/tolk/pipeline.h +++ b/tolk/pipeline.h @@ -36,6 +36,6 @@ void pipeline_register_global_symbols(const AllSrcFiles&); void pipeline_convert_ast_to_legacy_Expr_Op(const AllSrcFiles&); void pipeline_find_unused_symbols(); -void pipeline_generate_fif_output_to_std_cout(); +void pipeline_generate_fif_output_to_std_cout(const AllSrcFiles&); } // namespace tolk diff --git a/tolk/src-file.cpp b/tolk/src-file.cpp index b6c7e2d4a..e5533f697 100644 --- a/tolk/src-file.cpp +++ b/tolk/src-file.cpp @@ -87,6 +87,11 @@ AllSrcFiles AllRegisteredSrcFiles::get_all_files() const { return src_files_immutable; } +bool SrcFile::is_stdlib_file() const { + std::string_view rel(rel_filename); + return rel.size() > 10 && rel.substr(0, 8) == "@stdlib/"; // common.tolk, tvm-dicts.tolk, etc +} + bool SrcFile::is_offset_valid(int offset) const { return offset >= 0 && offset < static_cast(text.size()); } diff --git a/tolk/src-file.h b/tolk/src-file.h index 28de75680..815dccbed 100644 --- a/tolk/src-file.h +++ b/tolk/src-file.h @@ -51,7 +51,7 @@ struct SrcFile { SrcFile(const SrcFile& other) = delete; SrcFile &operator=(const SrcFile&) = delete; - bool is_stdlib_file() const { return file_id == 0; /* stdlib always exists, has no imports and parsed the first */ } + bool is_stdlib_file() const; bool is_offset_valid(int offset) const; SrcPosition convert_offset(int offset) const; diff --git a/tolk/tolk-main.cpp b/tolk/tolk-main.cpp index 2dc5a0df6..7f939670b 100644 --- a/tolk/tolk-main.cpp +++ b/tolk/tolk-main.cpp @@ -29,9 +29,14 @@ #include "td/utils/port/path.h" #include #include -#include #include -#include +#ifdef TD_DARWIN +#include +#elif TD_WINDOWS +#include +#else // linux +#include +#endif #include "git.h" using namespace tolk; @@ -50,42 +55,89 @@ void usage(const char* progname) { std::exit(2); } -static bool stdlib_file_exists(std::filesystem::path& stdlib_tolk) { +static bool stdlib_folder_exists(const char* stdlib_folder) { struct stat f_stat; - stdlib_tolk = stdlib_tolk.lexically_normal(); - int res = stat(stdlib_tolk.c_str(), &f_stat); - return res == 0 && S_ISREG(f_stat.st_mode); + int res = stat(stdlib_folder, &f_stat); + return res == 0 && (f_stat.st_mode & S_IFMT) == S_IFDIR; } -static std::string auto_discover_stdlib_location(const char* argv0) { - // first, the user can specify env var that points directly to stdlib (useful for non-standard compiler locations) - if (const char* env_var = getenv("TOLK_STDLIB")) { - return env_var; +// getting current executable path is a complicated and not cross-platform task +// for instance, we can't just use argv[0] or even filesystem::canonical +// https://stackoverflow.com/questions/1023306/finding-current-executables-path-without-proc-self-exe/1024937 +static bool get_current_executable_filename(std::string& out) { +#ifdef TD_DARWIN + char name_buf[1024]; + unsigned int size = 1024; + if (0 == _NSGetExecutablePath(name_buf, &size)) { // may contain ../, so normalize it + char *exe_path = realpath(name_buf, nullptr); + if (exe_path != nullptr) { + out = exe_path; + return true; + } } +#elif TD_WINDOWS + char exe_path[1024]; + if (GetModuleFileNameA(nullptr, exe_path, 1024)) { + out = exe_path; + std::replace(out.begin(), out.end(), '\\', '/'); // modern Windows correctly deals with / separator + return true; + } +#else // linux + char exe_path[1024]; + ssize_t res = readlink("/proc/self/exe", exe_path, 1024 - 1); + if (res >= 0) { + exe_path[res] = 0; + out = exe_path; + return true; + } +#endif + return false; +} +// simple join "/some/folder/" (guaranteed to end with /) and "../relative/path" +static std::string join_path(std::string dir, const char* relative) { + while (relative[0] == '.' && relative[1] == '.' && relative[2] == '/') { + size_t slash_pos = dir.find_last_of('/', dir.size() - 2); // last symbol is slash, find before it + if (slash_pos != std::string::npos) { + dir = dir.substr(0, slash_pos + 1); + } + relative += 3; + } + + return dir + relative; +} + +static std::string auto_discover_stdlib_folder() { // if the user launches tolk compiler from a package installed (e.g. /usr/bin/tolk), // locate stdlib in /usr/share/ton/smartcont (this folder exists on package installation) // (note, that paths are not absolute, they are relative to the launched binary) // consider https://github.com/ton-blockchain/packages for actual paths - std::filesystem::path executable_dir = std::filesystem::canonical(argv0).remove_filename(); + std::string executable_filename; + if (!get_current_executable_filename(executable_filename)) { + return {}; + } + + // extract dirname to concatenate with relative paths (separator / is ok even for windows) + size_t slash_pos = executable_filename.find_last_of('/'); + std::string executable_dir = executable_filename.substr(0, slash_pos + 1); #ifdef TD_DARWIN - auto def_location = executable_dir / "../share/ton/ton/smartcont/stdlib.tolk"; + std::string def_location = join_path(executable_dir, "../share/ton/ton/smartcont/tolk-stdlib"); #elif TD_WINDOWS - auto def_location = executable_dir / "smartcont/stdlib.tolk"; + std::string def_location = join_path(executable_dir, "smartcont/tolk-stdlib"); #else // linux - auto def_location = executable_dir / "../share/ton/smartcont/stdlib.tolk"; + std::string def_location = join_path(executable_dir, "../share/ton/smartcont/tolk-stdlib"); #endif - if (stdlib_file_exists(def_location)) { + if (stdlib_folder_exists(def_location.c_str())) { return def_location; } // so, the binary is not from a system package // maybe it's just built from sources? e.g. ~/ton/cmake-build-debug/tolk/tolk // then, check the ~/ton/crypto/smartcont folder - auto near_when_built_from_sources = executable_dir / "../../crypto/smartcont/stdlib.tolk"; - if (stdlib_file_exists(near_when_built_from_sources)) { + std::string near_when_built_from_sources = join_path(executable_dir, "../../crypto/smartcont/tolk-stdlib"); + if (stdlib_folder_exists(near_when_built_from_sources.c_str())) { return near_when_built_from_sources; } @@ -95,10 +147,31 @@ static std::string auto_discover_stdlib_location(const char* argv0) { td::Result fs_read_callback(CompilerSettings::FsReadCallbackKind kind, const char* query) { switch (kind) { + case CompilerSettings::FsReadCallbackKind::Realpath: { + td::Result res_realpath; + if (query[0] == '@' && strlen(query) > 8 && !strncmp(query, "@stdlib/", 8)) { + // import "@stdlib/filename" or import "@stdlib/filename.tolk" + std::string path = G.settings.stdlib_folder + static_cast(query + 7); + if (strncmp(path.c_str() + path.size() - 5, ".tolk", 5) != 0) { + path += ".tolk"; + } + res_realpath = td::realpath(td::CSlice(path.c_str())); + } else { + // import "relative/to/cwd/path.tolk" + res_realpath = td::realpath(td::CSlice(query)); + } + + if (res_realpath.is_error()) { + // note, that for non-existing files, `realpath()` on Linux/Mac returns an error, + // whereas on Windows, it returns okay, but fails after, on reading, with a message "cannot open file" + return td::Status::Error(std::string{"cannot find file "} + query); + } + return res_realpath; + } case CompilerSettings::FsReadCallbackKind::ReadFile: { struct stat f_stat; - int res = stat(query, &f_stat); - if (res != 0 || !S_ISREG(f_stat.st_mode)) { + int res = stat(query, &f_stat); // query here is already resolved realpath + if (res != 0 || (f_stat.st_mode & S_IFMT) != S_IFREG) { return td::Status::Error(std::string{"cannot open file "} + query); } @@ -110,15 +183,8 @@ td::Result fs_read_callback(CompilerSettings::FsReadCallbackKind ki fclose(f); return std::move(str); } - case CompilerSettings::FsReadCallbackKind::Realpath: { - td::Result res_realpath = td::realpath(td::CSlice(query)); - if (res_realpath.is_error()) { - return td::Status::Error(std::string{"cannot find file "} + query); - } - return res_realpath; - } default: { - return td::Status::Error("Unknown query kind"); + return td::Status::Error("unknown query kind"); } } } @@ -185,16 +251,26 @@ int main(int argc, char* const argv[]) { return 2; } - // locate stdlib.tolk based on env or default system paths - G.settings.stdlib_filename = auto_discover_stdlib_location(argv[0]); - if (G.settings.stdlib_filename.empty()) { - std::cerr << "Failed to discover stdlib.tolk.\n" + // locate tolk-stdlib/ based on env or default system paths + if (const char* env_var = getenv("TOLK_STDLIB")) { + std::string stdlib_filename = static_cast(env_var) + "/common.tolk"; + td::Result res = td::realpath(td::CSlice(stdlib_filename.c_str())); + if (res.is_error()) { + std::cerr << "Environment variable TOLK_STDLIB is invalid: " << res.move_as_error().message().c_str() << std::endl; + return 2; + } + G.settings.stdlib_folder = env_var; + } else { + G.settings.stdlib_folder = auto_discover_stdlib_folder(); + } + if (G.settings.stdlib_folder.empty()) { + std::cerr << "Failed to discover Tolk stdlib.\n" "Probably, you have a non-standard Tolk installation.\n" - "Please, provide env variable TOLK_STDLIB referencing to it.\n"; + "Please, provide env variable TOLK_STDLIB referencing to tolk-stdlib/ folder.\n"; return 2; } if (G.is_verbosity(2)) { - std::cerr << "stdlib located at " << G.settings.stdlib_filename << std::endl; + std::cerr << "stdlib folder: " << G.settings.stdlib_folder << std::endl; } if (optind != argc - 1) { @@ -202,8 +278,8 @@ int main(int argc, char* const argv[]) { return 2; } - G.settings.entrypoint_filename = argv[optind]; G.settings.read_callback = fs_read_callback; - return tolk_proceed(G.settings.entrypoint_filename); + int exit_code = tolk_proceed(argv[optind]); + return exit_code; } diff --git a/tolk/tolk-wasm.cpp b/tolk/tolk-wasm.cpp index a093a7f6b..e74589ce8 100644 --- a/tolk/tolk-wasm.cpp +++ b/tolk/tolk-wasm.cpp @@ -34,21 +34,18 @@ using namespace tolk; -td::Result compile_internal(char *config_json) { +static td::Result compile_internal(char *config_json) { TRY_RESULT(input_json, td::json_decode(td::MutableSlice(config_json))) td::JsonObject& config = input_json.get_object(); TRY_RESULT(opt_level, td::get_json_object_int_field(config, "optimizationLevel", true, 2)); - TRY_RESULT(stdlib_tolk, td::get_json_object_string_field(config, "stdlibLocation", false)); TRY_RESULT(stack_comments, td::get_json_object_bool_field(config, "withStackComments", true, false)); TRY_RESULT(entrypoint_filename, td::get_json_object_string_field(config, "entrypointFileName", false)); TRY_RESULT(experimental_options, td::get_json_object_string_field(config, "experimentalOptions", true)); G.settings.verbosity = 0; G.settings.optimization_level = std::max(0, opt_level); - G.settings.stdlib_filename = stdlib_tolk; G.settings.stack_layout_comments = stack_comments; - G.settings.entrypoint_filename = entrypoint_filename; if (!experimental_options.empty()) { G.settings.parse_experimental_options_cmd_arg(experimental_options.c_str()); } @@ -56,8 +53,8 @@ td::Result compile_internal(char *config_json) { std::ostringstream outs, errs; std::cout.rdbuf(outs.rdbuf()); std::cerr.rdbuf(errs.rdbuf()); - int tolk_res = tolk::tolk_proceed(entrypoint_filename); - if (tolk_res != 0) { + int exit_code = tolk_proceed(entrypoint_filename); + if (exit_code != 0) { return td::Status::Error("Tolk compilation error: " + errs.str()); } @@ -78,32 +75,29 @@ td::Result compile_internal(char *config_json) { /// Callback used to retrieve file contents from a "not file system". See tolk-js for implementation. /// The callback must fill either destContents or destError. /// The implementor must use malloc() for them and use free() after tolk_compile returns. -typedef void (*CStyleReadFileCallback)(int kind, char const* data, char** destContents, char** destError); - -CompilerSettings::FsReadCallback wrapReadCallback(CStyleReadFileCallback _readCallback) -{ - CompilerSettings::FsReadCallback readCallback; - if (_readCallback) { - readCallback = [=](CompilerSettings::FsReadCallbackKind kind, char const* data) -> td::Result { - char* destContents = nullptr; - char* destError = nullptr; +typedef void (*WasmFsReadCallback)(int kind, char const* data, char** destContents, char** destError); + +static CompilerSettings::FsReadCallback wrap_wasm_read_callback(WasmFsReadCallback _readCallback) { + return [_readCallback](CompilerSettings::FsReadCallbackKind kind, char const* data) -> td::Result { + char* destContents = nullptr; + char* destError = nullptr; + if (_readCallback) { _readCallback(static_cast(kind), data, &destContents, &destError); - if (!destContents && !destError) { - return td::Status::Error("Callback not supported"); - } - if (destContents) { - return destContents; - } + } + if (destContents) { + return destContents; + } + if (destError) { return td::Status::Error(std::string(destError)); - }; - } - return readCallback; + } + return td::Status::Error("Invalid callback from wasm"); + }; } extern "C" { const char* version() { - auto version_json = td::JsonBuilder(); + td::JsonBuilder version_json = td::JsonBuilder(); auto obj = version_json.enter_object(); obj("tolkVersion", TOLK_VERSION); obj("tolkFiftLibCommitHash", GitMetadata::CommitSHA1()); @@ -112,23 +106,22 @@ const char* version() { return strdup(version_json.string_builder().as_cslice().c_str()); } -const char *tolk_compile(char *config_json, CStyleReadFileCallback callback) { - G.settings.read_callback = wrapReadCallback(callback); +const char *tolk_compile(char *config_json, WasmFsReadCallback callback) { + G.settings.read_callback = wrap_wasm_read_callback(callback); td::Result res = compile_internal(config_json); if (res.is_error()) { - auto result = res.move_as_error(); - auto error_res = td::JsonBuilder(); - auto error_o = error_res.enter_object(); - error_o("status", "error"); - error_o("message", result.message().str()); - error_o.leave(); + td::JsonBuilder error_res = td::JsonBuilder(); + auto obj = error_res.enter_object(); + obj("status", "error"); + obj("message", res.move_as_error().message().str()); + obj.leave(); return strdup(error_res.string_builder().as_cslice().c_str()); } - auto res_string = res.move_as_ok(); - + std::string res_string = res.move_as_ok(); return strdup(res_string.c_str()); } -} + +} // extern "C" diff --git a/tolk/tolk.cpp b/tolk/tolk.cpp index 46eb4dc92..9268cc62d 100644 --- a/tolk/tolk.cpp +++ b/tolk/tolk.cpp @@ -48,21 +48,16 @@ int tolk_proceed(const std::string &entrypoint_filename) { define_builtins(); lexer_init(); + // on any error, an exception is thrown, and the message is printed out below + // (currently, only a single error can be printed) try { - if (G.settings.stdlib_filename.empty()) { - throw Fatal("stdlib filename not specified"); - } - - // on any error, an exception is thrown, and the message is printed out below - // (currently, only a single error can be printed) - - AllSrcFiles all_files = pipeline_discover_and_parse_sources(G.settings.stdlib_filename, entrypoint_filename); + AllSrcFiles all_files = pipeline_discover_and_parse_sources("@stdlib/common.tolk", entrypoint_filename); pipeline_register_global_symbols(all_files); pipeline_convert_ast_to_legacy_Expr_Op(all_files); pipeline_find_unused_symbols(); - pipeline_generate_fif_output_to_std_cout(); + pipeline_generate_fif_output_to_std_cout(all_files); return 0; } catch (Fatal& fatal) { From d9dba320ccbc23f2e7a4989d5a07297a706c6592 Mon Sep 17 00:00:00 2001 From: tolk-vm Date: Thu, 31 Oct 2024 11:18:54 +0400 Subject: [PATCH 10/12] [Tolk] Get rid of ~tilda with `mutate` and `self` methods This is a very big change. If FunC has `.methods()` and `~methods()`, Tolk has only dot, one and only way to call a `.method()`. A method may mutate an object, or may not. It's a behavioral and semantic difference from FunC. - `cs.loadInt(32)` modifies a slice and returns an integer - `b.storeInt(x, 32)` modifies a builder - `b = b.storeInt()` also works, since it not only modifies, but returns - chained methods also work, they return `self` - everything works exactly as expected, similar to JS - no runtime overhead, exactly same Fift instructions - custom methods are created with ease - tilda `~` does not exist in Tolk at all --- crypto/smartcont/tolk-stdlib/common.tolk | 137 ++-- crypto/smartcont/tolk-stdlib/lisp-lists.tolk | 2 +- crypto/smartcont/tolk-stdlib/tvm-dicts.tolk | 366 ++++------- .../smartcont/tolk-stdlib/tvm-lowlevel.tolk | 6 +- tolk-tester/tests/a10.tolk | 51 +- tolk-tester/tests/a6.tolk | 14 +- tolk-tester/tests/a6_1.tolk | 20 +- .../tests/allow_post_modification.tolk | 41 +- tolk-tester/tests/asm_arg_order.tolk | 28 +- tolk-tester/tests/camel1.tolk | 250 -------- tolk-tester/tests/camel2.tolk | 204 ------ tolk-tester/tests/camel3.tolk | 95 --- tolk-tester/tests/camel4.tolk | 145 ----- tolk-tester/tests/cells-slices.tolk | 189 +++--- tolk-tester/tests/co1.tolk | 7 +- tolk-tester/tests/dicts-demo.tolk | 75 ++- tolk-tester/tests/imports/use-dicts-err.tolk | 10 +- tolk-tester/tests/imports/use-dicts.tolk | 8 +- tolk-tester/tests/invalid-call-1.tolk | 9 + tolk-tester/tests/invalid-call-2.tolk | 14 + tolk-tester/tests/invalid-call-3.tolk | 12 + tolk-tester/tests/invalid-call-4.tolk | 13 + tolk-tester/tests/invalid-call-5.tolk | 13 + tolk-tester/tests/invalid-call-6.tolk | 12 + tolk-tester/tests/invalid-call-7.tolk | 14 + tolk-tester/tests/invalid-call-8.tolk | 8 + tolk-tester/tests/invalid-cmt-old.tolk | 2 +- tolk-tester/tests/invalid-declaration-6.tolk | 8 +- tolk-tester/tests/invalid-mutate-1.tolk | 11 + tolk-tester/tests/invalid-mutate-10.tolk | 16 + tolk-tester/tests/invalid-mutate-11.tolk | 8 + tolk-tester/tests/invalid-mutate-12.tolk | 14 + tolk-tester/tests/invalid-mutate-13.tolk | 8 + tolk-tester/tests/invalid-mutate-14.tolk | 8 + tolk-tester/tests/invalid-mutate-15.tolk | 12 + tolk-tester/tests/invalid-mutate-2.tolk | 10 + tolk-tester/tests/invalid-mutate-3.tolk | 11 + tolk-tester/tests/invalid-mutate-4.tolk | 14 + tolk-tester/tests/invalid-mutate-5.tolk | 14 + tolk-tester/tests/invalid-mutate-6.tolk | 16 + tolk-tester/tests/invalid-mutate-7.tolk | 15 + tolk-tester/tests/invalid-mutate-8.tolk | 10 + tolk-tester/tests/invalid-mutate-9.tolk | 9 + tolk-tester/tests/invalid-self-1.tolk | 8 + tolk-tester/tests/invalid-self-2.tolk | 8 + tolk-tester/tests/invalid-self-3.tolk | 10 + tolk-tester/tests/invalid-self-4.tolk | 9 + tolk-tester/tests/invalid-self-5.tolk | 15 + tolk-tester/tests/invalid-self-6.tolk | 8 + tolk-tester/tests/invalid-self-7.tolk | 8 + tolk-tester/tests/invalid-typing-3.tolk | 19 + tolk-tester/tests/invalid-typing-4.tolk | 14 + tolk-tester/tests/invalid-typing-5.tolk | 14 + tolk-tester/tests/known-bugs.tolk | 27 + tolk-tester/tests/mutate-methods.tolk | 337 ++++++++++ tolk-tester/tests/no-spaces.tolk | 16 +- tolk-tester/tests/null-keyword.tolk | 22 +- tolk-tester/tests/parse-address.tolk | 113 ++++ tolk-tester/tests/pure-functions.tolk | 4 +- tolk-tester/tests/self-keyword.tolk | 213 +++++++ tolk-tester/tests/test-math.tolk | 46 +- tolk-tester/tests/unbalanced_ret_nested.tolk | 5 +- tolk-tester/tests/use-before-declare.tolk | 2 +- tolk-tester/tests/var-apply.tolk | 22 + tolk-tester/tests/w2.tolk | 14 +- tolk/abscode.cpp | 23 +- tolk/analyzer.cpp | 3 +- tolk/ast-from-tokens.cpp | 180 ++++-- tolk/ast-replacer.h | 5 +- tolk/ast-stringifier.h | 14 +- tolk/ast-visitor.h | 6 +- tolk/ast.cpp | 10 + tolk/ast.h | 78 ++- tolk/builtins.cpp | 314 ++++++---- tolk/gen-abscode.cpp | 177 +++--- tolk/lexer.cpp | 37 +- tolk/lexer.h | 3 + tolk/pipe-ast-to-legacy.cpp | 584 +++++++++++++----- tolk/pipe-generate-fif-output.cpp | 8 +- tolk/pipe-register-symbols.cpp | 152 ++--- tolk/symtable.cpp | 12 + tolk/symtable.h | 15 +- tolk/tolk.h | 156 ++--- tolk/type-expr.h | 19 +- tolk/unify-types.cpp | 2 +- 85 files changed, 2703 insertions(+), 1958 deletions(-) delete mode 100644 tolk-tester/tests/camel1.tolk delete mode 100644 tolk-tester/tests/camel2.tolk delete mode 100644 tolk-tester/tests/camel3.tolk delete mode 100644 tolk-tester/tests/camel4.tolk create mode 100644 tolk-tester/tests/invalid-call-1.tolk create mode 100644 tolk-tester/tests/invalid-call-2.tolk create mode 100644 tolk-tester/tests/invalid-call-3.tolk create mode 100644 tolk-tester/tests/invalid-call-4.tolk create mode 100644 tolk-tester/tests/invalid-call-5.tolk create mode 100644 tolk-tester/tests/invalid-call-6.tolk create mode 100644 tolk-tester/tests/invalid-call-7.tolk create mode 100644 tolk-tester/tests/invalid-call-8.tolk create mode 100644 tolk-tester/tests/invalid-mutate-1.tolk create mode 100644 tolk-tester/tests/invalid-mutate-10.tolk create mode 100644 tolk-tester/tests/invalid-mutate-11.tolk create mode 100644 tolk-tester/tests/invalid-mutate-12.tolk create mode 100644 tolk-tester/tests/invalid-mutate-13.tolk create mode 100644 tolk-tester/tests/invalid-mutate-14.tolk create mode 100644 tolk-tester/tests/invalid-mutate-15.tolk create mode 100644 tolk-tester/tests/invalid-mutate-2.tolk create mode 100644 tolk-tester/tests/invalid-mutate-3.tolk create mode 100644 tolk-tester/tests/invalid-mutate-4.tolk create mode 100644 tolk-tester/tests/invalid-mutate-5.tolk create mode 100644 tolk-tester/tests/invalid-mutate-6.tolk create mode 100644 tolk-tester/tests/invalid-mutate-7.tolk create mode 100644 tolk-tester/tests/invalid-mutate-8.tolk create mode 100644 tolk-tester/tests/invalid-mutate-9.tolk create mode 100644 tolk-tester/tests/invalid-self-1.tolk create mode 100644 tolk-tester/tests/invalid-self-2.tolk create mode 100644 tolk-tester/tests/invalid-self-3.tolk create mode 100644 tolk-tester/tests/invalid-self-4.tolk create mode 100644 tolk-tester/tests/invalid-self-5.tolk create mode 100644 tolk-tester/tests/invalid-self-6.tolk create mode 100644 tolk-tester/tests/invalid-self-7.tolk create mode 100644 tolk-tester/tests/invalid-typing-3.tolk create mode 100644 tolk-tester/tests/invalid-typing-4.tolk create mode 100644 tolk-tester/tests/invalid-typing-5.tolk create mode 100644 tolk-tester/tests/known-bugs.tolk create mode 100644 tolk-tester/tests/mutate-methods.tolk create mode 100644 tolk-tester/tests/parse-address.tolk create mode 100644 tolk-tester/tests/self-keyword.tolk create mode 100644 tolk-tester/tests/var-apply.tolk diff --git a/crypto/smartcont/tolk-stdlib/common.tolk b/crypto/smartcont/tolk-stdlib/common.tolk index de711f7df..dec12e233 100644 --- a/crypto/smartcont/tolk-stdlib/common.tolk +++ b/crypto/smartcont/tolk-stdlib/common.tolk @@ -17,11 +17,7 @@ fun createEmptyTuple(): tuple /// Appends a value to tuple, resulting in `Tuple t' = (x1, ..., xn, value)`. /// If its size exceeds 255, throws a type check exception. @pure -fun tuplePush(t: tuple, value: X): tuple - asm "TPUSH"; - -@pure -fun ~tuplePush(t: tuple, value: X): (tuple, ()) +fun tuplePush(mutate self: tuple, value: X): void asm "TPUSH"; /// Returns the first element of a non-empty tuple. @@ -336,118 +332,109 @@ fun beginParse(c: cell): slice asm "CTOS"; /// Checks if slice is empty. If not, throws an exception. -fun assertEndOfSlice(s: slice): void +fun assertEndOfSlice(self: slice): void asm "ENDS"; /// Loads the next reference from the slice. @pure -fun loadRef(s: slice): (slice, cell) +fun loadRef(mutate self: slice): cell asm( -> 1 0) "LDREF"; /// Preloads the next reference from the slice. @pure -fun preloadRef(s: slice): cell +fun preloadRef(self: slice): cell asm "PLDREF"; /// Loads a signed [len]-bit integer from a slice. @pure -fun loadInt(s: slice, len: int): (slice, int) +fun loadInt(mutate self: slice, len: int): int builtin; /// Loads an unsigned [len]-bit integer from a slice. @pure -fun loadUint(s: slice, len: int): (slice, int) +fun loadUint(mutate self: slice, len: int): int builtin; /// Loads the first `0 ≤ len ≤ 1023` bits from slice [s] into a separate slice `s''`. @pure -fun loadBits(s: slice, len: int): (slice, slice) +fun loadBits(mutate self: slice, len: int): slice builtin; /// Preloads a signed [len]-bit integer from a slice. @pure -fun preloadInt(s: slice, len: int): int +fun preloadInt(self: slice, len: int): int builtin; /// Preloads an unsigned [len]-bit integer from a slice. @pure -fun preloadUint(s: slice, len: int): int +fun preloadUint(self: slice, len: int): int builtin; /// Preloads the first `0 ≤ len ≤ 1023` bits from slice [s] into a separate slice. @pure -fun preloadBits(s: slice, len: int): slice +fun preloadBits(self: slice, len: int): slice builtin; /// Loads serialized amount of Toncoins (any unsigned integer up to `2^120 - 1`). @pure -fun loadCoins(s: slice): (slice, int) +fun loadCoins(mutate self: slice): int asm( -> 1 0) "LDGRAMS"; /// Loads bool (-1 or 0) from a slice @pure -fun loadBool(s: slice): (slice, int) +fun loadBool(mutate self: slice): int asm( -> 1 0) "1 LDI"; /// Shifts a slice pointer to [len] bits forward, mutating the slice. @pure -fun skipBits(s: slice, len: int): slice - asm "SDSKIPFIRST"; // todo make mutating -@pure -fun ~skipBits(s: slice, len: int): (slice, ()) +fun skipBits(mutate self: slice, len: int): self asm "SDSKIPFIRST"; /// Returns the first `0 ≤ len ≤ 1023` bits of a slice. @pure -fun getFirstBits(s: slice, len: int): slice +fun getFirstBits(self: slice, len: int): slice asm "SDCUTFIRST"; /// Returns all but the last `0 ≤ len ≤ 1023` bits of a slice. @pure -fun removeLastBits(s: slice, len: int): slice - asm "SDSKIPLAST"; // todo make mutating -@pure -fun ~removeLastBits(s: slice, len: int): (slice, ()) +fun removeLastBits(mutate self: slice, len: int): self asm "SDSKIPLAST"; /// Returns the last `0 ≤ len ≤ 1023` bits of a slice. @pure -fun getLastBits(s: slice, len: int): slice +fun getLastBits(self: slice, len: int): slice asm "SDCUTLAST"; /// Loads a dictionary (TL HashMapE structure, represented as TVM cell) from a slice. /// Returns `null` if `nothing` constructor is used. @pure -fun loadDict(s: slice): (slice, cell) +fun loadDict(mutate self: slice): cell asm( -> 1 0) "LDDICT"; /// Preloads a dictionary (cell) from a slice. @pure -fun preloadDict(s: slice): cell +fun preloadDict(self: slice): cell asm "PLDDICT"; /// Loads a dictionary as [loadDict], but returns only the remainder of the slice. @pure -fun skipDict(s: slice): slice - asm "SKIPDICT"; // todo make mutating -@pure -fun ~skipDict(s: slice): (slice, ()) +fun skipDict(mutate self: slice): self asm "SKIPDICT"; /// Loads (Maybe ^Cell) from a slice. /// In other words, loads 1 bit: if it's true, loads the first ref, otherwise returns `null`. @pure -fun loadMaybeRef(s: slice): (slice, cell) +fun loadMaybeRef(mutate self: slice): cell asm( -> 1 0) "LDOPTREF"; /// Preloads (Maybe ^Cell) from a slice. @pure -fun preloadMaybeRef(s: slice): cell +fun preloadMaybeRef(self: slice): cell asm "PLDOPTREF"; /// Loads (Maybe ^Cell), but returns only the remainder of the slice. @pure -fun ~skipMaybeRef(s: slice): (slice, ()) +fun skipMaybeRef(mutate self: slice): self asm "SKIPOPTREF"; /** @@ -464,62 +451,60 @@ fun beginCell(): builder /// Converts a builder into an ordinary `cell`. @pure -fun endCell(b: builder): cell +fun endCell(self: builder): cell asm "ENDC"; /// Stores a reference to a cell into a builder. @pure -fun storeRef(b: builder, c: cell): builder - asm(c b) "STREF"; +fun storeRef(mutate self: builder, c: cell): self + asm(c self) "STREF"; /// Stores a signed [len]-bit integer into a builder (`0 ≤ len ≤ 257`). @pure -fun storeInt(b: builder, x: int, len: int): builder +fun storeInt(mutate self: builder, x: int, len: int): self builtin; /// Stores an unsigned [len]-bit integer into a builder (`0 ≤ len ≤ 256`). @pure -fun storeUint(b: builder, x: int, len: int): builder +fun storeUint(mutate self: builder, x: int, len: int): self builtin; /// Stores a slice into a builder. @pure -fun storeSlice(b: builder, s: slice): builder +fun storeSlice(mutate self: builder, s: slice): self asm "STSLICER"; /// Stores amount of Toncoins into a builder. @pure -fun storeCoins(b: builder, x: int): builder +fun storeCoins(mutate self: builder, x: int): self asm "STGRAMS"; /// Stores bool (-1 or 0) into a builder. /// Attention: true value is `-1`, not 1! If you pass `1` here, TVM will throw an exception. @pure -fun storeBool(b: builder, x: int): builder - asm(x b) "1 STI"; +fun storeBool(mutate self: builder, x: int): self + asm(x self) "1 STI"; /// Stores dictionary (represented by TVM `cell` or `null`) into a builder. /// In other words, stores a `1`-bit and a reference to [c] if [c] is not `null` and `0`-bit otherwise. @pure -fun storeDict(b: builder, c: cell): builder - asm(c b) "STDICT"; +fun storeDict(mutate self: builder, c: cell): self + asm(c self) "STDICT"; /// Stores (Maybe ^Cell) into a builder. /// In other words, if cell is `null`, store '0' bit; otherwise, store '1' and a ref to [c]. @pure -fun storeMaybeRef(b: builder, c: cell): builder - asm(c b) "STOPTREF"; +fun storeMaybeRef(mutate self: builder, c: cell): self + asm(c self) "STOPTREF"; /// Concatenates two builders. @pure -fun storeBuilder(to: builder, from: builder): builder +fun storeBuilder(mutate self: builder, from: builder): self asm "STBR"; +/// Stores a slice representing TL addr_none$00 (two `0` bits). @pure -fun storeAddressNone(b: builder): builder - asm "0 PUSHINT" "SWAP" "2 STU"; -@pure -fun ~storeAddressNone(b: builder): (builder, ()) +fun storeAddressNone(mutate self: builder): self asm "b{00} STSLICECONST"; @@ -529,47 +514,47 @@ fun ~storeAddressNone(b: builder): (builder, ()) /// Returns the number of references in a slice. @pure -fun getRemainingRefsCount(s: slice): int +fun getRemainingRefsCount(self: slice): int asm "SREFS"; /// Returns the number of data bits in a slice. @pure -fun getRemainingBitsCount(s: slice): int +fun getRemainingBitsCount(self: slice): int asm "SBITS"; /// Returns both the number of data bits and the number of references in a slice. @pure -fun getRemainingBitsAndRefsCount(s: slice): (int, int) +fun getRemainingBitsAndRefsCount(self: slice): (int, int) asm "SBITREFS"; /// Checks whether a slice is empty (i.e., contains no bits of data and no cell references). @pure -fun isEndOfSlice(s: slice): int +fun isEndOfSlice(self: slice): int asm "SEMPTY"; /// Checks whether a slice has no bits of data. @pure -fun isEndOfSliceBits(s: slice): int +fun isEndOfSliceBits(self: slice): int asm "SDEMPTY"; /// Checks whether a slice has no references. @pure -fun isEndOfSliceRefs(s: slice): int +fun isEndOfSliceRefs(self: slice): int asm "SREMPTY"; /// Checks whether data parts of two slices coinside. @pure -fun isSliceBitsEqual(a: slice, b: slice): int +fun isSliceBitsEqual(self: slice, b: slice): int asm "SDEQ"; /// Returns the number of cell references already stored in a builder. @pure -fun getBuilderRefsCount(b: builder): int +fun getBuilderRefsCount(self: builder): int asm "BREFS"; /// Returns the number of data bits already stored in a builder. @pure -fun getBuilderBitsCount(b: builder): int +fun getBuilderBitsCount(self: builder): int asm "BBITS"; @@ -613,8 +598,8 @@ fun getBuilderBitsCount(b: builder): int /// Loads from slice [s] the only prefix that is a valid `MsgAddress`, /// and returns both this prefix `s'` and the remainder `s''` of [s] as slices. @pure -fun loadAddress(s: slice): (slice, slice) - asm( -> 1 0) "LDMSGADDR"; // todo make mutating +fun loadAddress(mutate self: slice): slice + asm( -> 1 0) "LDMSGADDR"; /// Decomposes slice [s] containing a valid `MsgAddress` into a `tuple t` with separate fields of this `MsgAddress`. /// If [s] is not a valid `MsgAddress`, a cell deserialization exception is thrown. @@ -686,7 +671,7 @@ const NON_BOUNCEABLE = 0x10; /// Load msgFlags from incoming message body (4 bits). @pure -fun loadMessageFlags(s: slice): (slice, int) +fun loadMessageFlags(mutate self: slice): int asm( -> 1 0) "4 LDU"; /// Having msgFlags (4 bits), check that a message is bounced. @@ -697,38 +682,34 @@ fun isMessageBounced(msgFlags: int): int /// Skip 0xFFFFFFFF prefix (when a message is bounced). @pure -fun ~skipBouncedPrefix(s: slice): (slice, ()) +fun skipBouncedPrefix(mutate self: slice): self asm "32 PUSHINT" "SDSKIPFIRST"; /// The guideline recommends to start the body of an internal message with uint32 `op` and uint64 `queryId`. @pure -fun loadMessageOp(s: slice): (slice, int) +fun loadMessageOp(mutate self: slice): int asm( -> 1 0) "32 LDU"; @pure -fun ~skipMessageOp(s: slice): (slice, ()) +fun skipMessageOp(mutate self: slice): self asm "32 PUSHINT" "SDSKIPFIRST"; @pure -fun storeMessageOp(b: builder, op: int): builder - asm(op b) "32 STU"; -fun ~storeMessageOp(b: builder, op: int): (builder, ()) - asm(op b) "32 STU"; +fun storeMessageOp(mutate self: builder, op: int): self + asm(op self) "32 STU"; /// The guideline recommends that uint64 `queryId` should follow uint32 `op`. @pure -fun loadMessageQueryId(s: slice): (slice, int) +fun loadMessageQueryId(mutate self: slice): int asm( -> 1 0) "64 LDU"; @pure -fun ~skipMessageQueryId(s: slice): (slice, ()) +fun skipMessageQueryId(mutate self: slice): self asm "64 PUSHINT" "SDSKIPFIRST"; @pure -fun storeMessageQueryId(b: builder, queryId: int): builder - asm(queryId b) "64 STU"; -fun ~storeMessageQueryId(b: builder, queryId: int): (builder, ()) - asm(queryId b) "64 STU"; +fun storeMessageQueryId(mutate self: builder, queryId: int): self + asm(queryId self) "64 STU"; /// SEND MODES - https://docs.ton.org/tvm.pdf page 137, SENDRAWMSG diff --git a/crypto/smartcont/tolk-stdlib/lisp-lists.tolk b/crypto/smartcont/tolk-stdlib/lisp-lists.tolk index 94c045237..f7a721918 100644 --- a/crypto/smartcont/tolk-stdlib/lisp-lists.tolk +++ b/crypto/smartcont/tolk-stdlib/lisp-lists.tolk @@ -24,7 +24,7 @@ fun listSplit(list: tuple): (X, tuple) /// Extracts the tail and the head of lisp-style list. @pure -fun ~listNext(list: tuple): (tuple, X) +fun listNext(mutate self: tuple): X asm( -> 1 0) "UNCONS"; /// Returns the head of lisp-style list. diff --git a/crypto/smartcont/tolk-stdlib/tvm-dicts.tolk b/crypto/smartcont/tolk-stdlib/tvm-dicts.tolk index 1e3c10ec8..9fba24d90 100644 --- a/crypto/smartcont/tolk-stdlib/tvm-dicts.tolk +++ b/crypto/smartcont/tolk-stdlib/tvm-dicts.tolk @@ -19,415 +19,279 @@ fun createEmptyDict(): cell /// Checks whether a dictionary is empty. @pure -fun dictIsEmpty(c: cell): int +fun dictIsEmpty(self: cell): int asm "DICTEMPTY"; @pure -fun iDictGet(dict: cell, keyLen: int, key: int): (slice, int) - asm(key dict keyLen) "DICTIGET" "NULLSWAPIFNOT"; +fun iDictGet(self: cell, keyLen: int, key: int): (slice, int) + asm(key self keyLen) "DICTIGET" "NULLSWAPIFNOT"; @pure -fun uDictGet(dict: cell, keyLen: int, key: int): (slice, int) - asm(key dict keyLen) "DICTUGET" "NULLSWAPIFNOT"; +fun uDictGet(self: cell, keyLen: int, key: int): (slice, int) + asm(key self keyLen) "DICTUGET" "NULLSWAPIFNOT"; @pure -fun sDictGet(dict: cell, keyLen: int, key: slice): (slice, int) - asm(key dict keyLen) "DICTGET" "NULLSWAPIFNOT"; +fun sDictGet(self: cell, keyLen: int, key: slice): (slice, int) + asm(key self keyLen) "DICTGET" "NULLSWAPIFNOT"; @pure -fun iDictSet(dict: cell, keyLen: int, key: int, value: slice): cell - asm(value key dict keyLen) "DICTISET"; +fun iDictSet(mutate self: cell, keyLen: int, key: int, value: slice): void + asm(value key self keyLen) "DICTISET"; @pure -fun ~iDictSet(dict: cell, keyLen: int, key: int, value: slice): (cell, ()) - asm(value key dict keyLen) "DICTISET"; +fun uDictSet(mutate self: cell, keyLen: int, key: int, value: slice): void + asm(value key self keyLen) "DICTUSET"; @pure -fun uDictSet(dict: cell, keyLen: int, key: int, value: slice): cell - asm(value key dict keyLen) "DICTUSET"; - -@pure -fun ~uDictSet(dict: cell, keyLen: int, key: int, value: slice): (cell, ()) - asm(value key dict keyLen) "DICTUSET"; - -@pure -fun sDictSet(dict: cell, keyLen: int, key: slice, value: slice): cell - asm(value key dict keyLen) "DICTSET"; - -@pure -fun ~sDictSet(dict: cell, keyLen: int, key: slice, value: slice): (cell, ()) - asm(value key dict keyLen) "DICTSET"; - - -@pure -fun iDictSetRef(dict: cell, keyLen: int, key: int, value: cell): cell - asm(value key dict keyLen) "DICTISETREF"; - -@pure -fun ~iDictSetRef(dict: cell, keyLen: int, key: int, value: cell): (cell, ()) - asm(value key dict keyLen) "DICTISETREF"; - -@pure -fun uDictSetRef(dict: cell, keyLen: int, key: int, value: cell): cell - asm(value key dict keyLen) "DICTUSETREF"; - -@pure -fun ~uDictSetRef(dict: cell, keyLen: int, key: int, value: cell): (cell, ()) - asm(value key dict keyLen) "DICTUSETREF"; - -@pure -fun sDictSetRef(dict: cell, keyLen: int, key: slice, value: cell): cell - asm(value key dict keyLen) "DICTSETREF"; - -@pure -fun ~sDictSetRef(dict: cell, keyLen: int, key: slice, value: cell): (cell, ()) - asm(value key dict keyLen) "DICTSETREF"; - - -@pure -fun iDictSetIfNotExists(dict: cell, keyLen: int, key: int, value: slice): (cell, int) - asm(value key dict keyLen) "DICTIADD"; - -@pure -fun ~iDictSetIfNotExists(dict: cell, keyLen: int, key: int, value: slice): (cell, int) - asm(value key dict keyLen) "DICTIADD"; - -@pure -fun uDictSetIfNotExists(dict: cell, keyLen: int, key: int, value: slice): (cell, int) - asm(value key dict keyLen) "DICTUADD"; - -@pure -fun ~uDictSetIfNotExists(dict: cell, keyLen: int, key: int, value: slice): (cell, int) - asm(value key dict keyLen) "DICTUADD"; - - -@pure -fun iDictSetIfExists(dict: cell, keyLen: int, key: int, value: slice): (cell, int) - asm(value key dict keyLen) "DICTIREPLACE"; - -@pure -fun ~iDictSetIfExists(dict: cell, keyLen: int, key: int, value: slice): (cell, int) - asm(value key dict keyLen) "DICTIREPLACE"; - -@pure -fun uDictSetIfExists(dict: cell, keyLen: int, key: int, value: slice): (cell, int) - asm(value key dict keyLen) "DICTUREPLACE"; - -@pure -fun ~uDictSetIfExists(dict: cell, keyLen: int, key: int, value: slice): (cell, int) - asm(value key dict keyLen) "DICTUREPLACE"; +fun sDictSet(mutate self: cell, keyLen: int, key: slice, value: slice): void + asm(value key self keyLen) "DICTSET"; @pure -fun iDictGetRef(dict: cell, keyLen: int, key: int): (cell, int) - asm(key dict keyLen) "DICTIGETREF" "NULLSWAPIFNOT"; +fun iDictSetRef(mutate self: cell, keyLen: int, key: int, value: cell): void + asm(value key self keyLen) "DICTISETREF"; @pure -fun uDictGetRef(dict: cell, keyLen: int, key: int): (cell, int) - asm(key dict keyLen) "DICTUGETREF" "NULLSWAPIFNOT"; +fun uDictSetRef(mutate self: cell, keyLen: int, key: int, value: cell): void + asm(value key self keyLen) "DICTUSETREF"; @pure -fun sDictGetRef(dict: cell, keyLen: int, key: slice): (cell, int) - asm(key dict keyLen) "DICTGETREF" "NULLSWAPIFNOT"; +fun sDictSetRef(mutate self: cell, keyLen: int, key: slice, value: cell): void + asm(value key self keyLen) "DICTSETREF"; @pure -fun iDictGetRefOrNull(dict: cell, keyLen: int, key: int): cell - asm(key dict keyLen) "DICTIGETOPTREF"; +fun iDictSetIfNotExists(mutate self: cell, keyLen: int, key: int, value: slice): int + asm(value key self keyLen) "DICTIADD"; @pure -fun uDictGetRefOrNull(dict: cell, keyLen: int, key: int): cell - asm(key dict keyLen) "DICTUGETOPTREF"; - -@pure -fun sDictGetRefOrNull(dict: cell, keyLen: int, key: slice): cell - asm(key dict keyLen) "DICTGETOPTREF"; +fun uDictSetIfNotExists(mutate self: cell, keyLen: int, key: int, value: slice): int + asm(value key self keyLen) "DICTUADD"; @pure -fun iDictDelete(dict: cell, keyLen: int, key: int): (cell, int) - asm(key dict keyLen) "DICTIDEL"; +fun iDictSetIfExists(mutate self: cell, keyLen: int, key: int, value: slice): int + asm(value key self keyLen) "DICTIREPLACE"; @pure -fun ~iDictDelete(dict: cell, keyLen: int, key: int): (cell, int) - asm(key dict keyLen) "DICTIDEL"; +fun uDictSetIfExists(mutate self: cell, keyLen: int, key: int, value: slice): int + asm(value key self keyLen) "DICTUREPLACE"; -@pure -fun uDictDelete(dict: cell, keyLen: int, key: int): (cell, int) - asm(key dict keyLen) "DICTUDEL"; @pure -fun ~uDictDelete(dict: cell, keyLen: int, key: int): (cell, int) - asm(key dict keyLen) "DICTUDEL"; +fun iDictGetRef(self: cell, keyLen: int, key: int): (cell, int) + asm(key self keyLen) "DICTIGETREF" "NULLSWAPIFNOT"; @pure -fun sDictDelete(dict: cell, keyLen: int, key: slice): (cell, int) - asm(key dict keyLen) "DICTDEL"; +fun uDictGetRef(self: cell, keyLen: int, key: int): (cell, int) + asm(key self keyLen) "DICTUGETREF" "NULLSWAPIFNOT"; @pure -fun ~sDictDelete(dict: cell, keyLen: int, key: slice): (cell, int) - asm(key dict keyLen) "DICTDEL"; +fun sDictGetRef(self: cell, keyLen: int, key: slice): (cell, int) + asm(key self keyLen) "DICTGETREF" "NULLSWAPIFNOT"; @pure -fun iDictSetAndGet(dict: cell, keyLen: int, key: int, value: slice): (cell, slice, int) - asm(value key dict keyLen) "DICTISETGET" "NULLSWAPIFNOT"; +fun iDictGetRefOrNull(self: cell, keyLen: int, key: int): cell + asm(key self keyLen) "DICTIGETOPTREF"; @pure -fun ~iDictSetAndGet(dict: cell, keyLen: int, key: int, value: slice): (cell, (slice, int)) - asm(value key dict keyLen) "DICTISETGET" "NULLSWAPIFNOT"; +fun uDictGetRefOrNull(self: cell, keyLen: int, key: int): cell + asm(key self keyLen) "DICTUGETOPTREF"; @pure -fun uDictSetAndGet(dict: cell, keyLen: int, key: int, value: slice): (cell, slice, int) - asm(value key dict keyLen) "DICTUSETGET" "NULLSWAPIFNOT"; +fun sDictGetRefOrNull(self: cell, keyLen: int, key: slice): cell + asm(key self keyLen) "DICTGETOPTREF"; -@pure -fun ~uDictSetAndGet(dict: cell, keyLen: int, key: int, value: slice): (cell, (slice, int)) - asm(value key dict keyLen) "DICTUSETGET" "NULLSWAPIFNOT"; @pure -fun sDictSetAndGet(dict: cell, keyLen: int, key: slice, value: slice): (cell, slice, int) - asm(value key dict keyLen) "DICTSETGET" "NULLSWAPIFNOT"; +fun iDictDelete(mutate self: cell, keyLen: int, key: int): int + asm(key self keyLen) "DICTIDEL"; @pure -fun ~sDictSetAndGet(dict: cell, keyLen: int, key: slice, value: slice): (cell, (slice, int)) - asm(value key dict keyLen) "DICTSETGET" "NULLSWAPIFNOT"; - +fun uDictDelete(mutate self: cell, keyLen: int, key: int): int + asm(key self keyLen) "DICTUDEL"; @pure -fun iDictSetAndGetPreviousRefOrNull(dict: cell, keyLen: int, key: int, value: cell): (cell, cell) - asm(value key dict keyLen) "DICTISETGETOPTREF"; +fun sDictDelete(mutate self: cell, keyLen: int, key: slice): int + asm(key self keyLen) "DICTDEL"; -@pure -fun ~iDictSetAndGetPreviousRefOrNull(dict: cell, keyLen: int, key: int, value: cell): (cell, cell) - asm(value key dict keyLen) "DICTISETGETOPTREF"; @pure -fun uDictSetAndGetPreviousRefOrNull(dict: cell, keyLen: int, key: int, value: cell): (cell, cell) - asm(value key dict keyLen) "DICTUSETGETOPTREF"; +fun iDictSetAndGet(mutate self: cell, keyLen: int, key: int, value: slice): (slice, int) + asm(value key self keyLen) "DICTISETGET" "NULLSWAPIFNOT"; @pure -fun ~uDictSetAndGetPreviousRefOrNull(dict: cell, keyLen: int, key: int, value: cell): (cell, cell) - asm(value key dict keyLen) "DICTUSETGETOPTREF"; - +fun uDictSetAndGet(mutate self: cell, keyLen: int, key: int, value: slice): (slice, int) + asm(value key self keyLen) "DICTUSETGET" "NULLSWAPIFNOT"; @pure -fun iDictDeleteAndGet(dict: cell, keyLen: int, key: int): (cell, slice, int) - asm(key dict keyLen) "DICTIDELGET" "NULLSWAPIFNOT"; +fun sDictSetAndGet(mutate self: cell, keyLen: int, key: slice, value: slice): (slice, int) + asm(value key self keyLen) "DICTSETGET" "NULLSWAPIFNOT"; -@pure -fun ~iDictDeleteAndGet(dict: cell, keyLen: int, key: int): (cell, (slice, int)) - asm(key dict keyLen) "DICTIDELGET" "NULLSWAPIFNOT"; @pure -fun uDictDeleteAndGet(dict: cell, keyLen: int, key: int): (cell, slice, int) - asm(key dict keyLen) "DICTUDELGET" "NULLSWAPIFNOT"; +fun iDictSetAndGetRefOrNull(mutate self: cell, keyLen: int, key: int, value: cell): cell + asm(value key self keyLen) "DICTISETGETOPTREF"; @pure -fun ~uDictDeleteAndGet(dict: cell, keyLen: int, key: int): (cell, (slice, int)) - asm(key dict keyLen) "DICTUDELGET" "NULLSWAPIFNOT"; +fun uDictSetAndGetRefOrNull(mutate self: cell, keyLen: int, key: int, value: cell): cell + asm(value key self keyLen) "DICTUSETGETOPTREF"; -@pure -fun sDictDeleteAndGet(dict: cell, keyLen: int, key: slice): (cell, slice, int) - asm(key dict keyLen) "DICTDELGET" "NULLSWAPIFNOT"; @pure -fun ~sDictDeleteAndGet(dict: cell, keyLen: int, key: slice): (cell, (slice, int)) - asm(key dict keyLen) "DICTDELGET" "NULLSWAPIFNOT"; - +fun iDictDeleteAndGet(mutate self: cell, keyLen: int, key: int): (slice, int) + asm(key self keyLen) "DICTIDELGET" "NULLSWAPIFNOT"; @pure -fun iDictSetBuilder(dict: cell, keyLen: int, key: int, value: builder): cell - asm(value key dict keyLen) "DICTISETB"; +fun uDictDeleteAndGet(mutate self: cell, keyLen: int, key: int): (slice, int) + asm(key self keyLen) "DICTUDELGET" "NULLSWAPIFNOT"; @pure -fun ~iDictSetBuilder(dict: cell, keyLen: int, key: int, value: builder): (cell, ()) - asm(value key dict keyLen) "DICTISETB"; +fun sDictDeleteAndGet(mutate self: cell, keyLen: int, key: slice): (slice, int) + asm(key self keyLen) "DICTDELGET" "NULLSWAPIFNOT"; -@pure -fun uDictSetBuilder(dict: cell, keyLen: int, key: int, value: builder): cell - asm(value key dict keyLen) "DICTUSETB"; @pure -fun ~uDictSetBuilder(dict: cell, keyLen: int, key: int, value: builder): (cell, ()) - asm(value key dict keyLen) "DICTUSETB"; +fun iDictSetBuilder(mutate self: cell, keyLen: int, key: int, value: builder): void + asm(value key self keyLen) "DICTISETB"; @pure -fun sDictSetBuilder(dict: cell, keyLen: int, key: slice, value: builder): cell - asm(value key dict keyLen) "DICTSETB"; +fun uDictSetBuilder(mutate self: cell, keyLen: int, key: int, value: builder): void + asm(value key self keyLen) "DICTUSETB"; @pure -fun ~sDictSetBuilder(dict: cell, keyLen: int, key: slice, value: builder): (cell, ()) - asm(value key dict keyLen) "DICTSETB"; - +fun sDictSetBuilder(mutate self: cell, keyLen: int, key: slice, value: builder): void + asm(value key self keyLen) "DICTSETB"; -@pure -fun iDictSetBuilderIfNotExists(dict: cell, keyLen: int, key: int, value: builder): (cell, int) - asm(value key dict keyLen) "DICTIADDB"; - -@pure -fun ~iDictSetBuilderIfNotExists(dict: cell, keyLen: int, key: int, value: builder): (cell, int) - asm(value key dict keyLen) "DICTIADDB"; - -@pure -fun uDictSetBuilderIfNotExists(dict: cell, keyLen: int, key: int, value: builder): (cell, int) - asm(value key dict keyLen) "DICTUADDB"; - -@pure -fun ~uDictSetBuilderIfNotExists(dict: cell, keyLen: int, key: int, value: builder): (cell, int) - asm(value key dict keyLen) "DICTUADDB"; @pure -fun iDictSetBuilderIfExists(dict: cell, keyLen: int, key: int, value: builder): (cell, int) - asm(value key dict keyLen) "DICTIREPLACEB"; +fun iDictSetBuilderIfNotExists(mutate self: cell, keyLen: int, key: int, value: builder): int + asm(value key self keyLen) "DICTIADDB"; @pure -fun ~iDictSetBuilderIfExists(dict: cell, keyLen: int, key: int, value: builder): (cell, int) - asm(value key dict keyLen) "DICTIREPLACEB"; +fun uDictSetBuilderIfNotExists(mutate self: cell, keyLen: int, key: int, value: builder): int + asm(value key self keyLen) "DICTUADDB"; @pure -fun uDictSetBuilderIfExists(dict: cell, keyLen: int, key: int, value: builder): (cell, int) - asm(value key dict keyLen) "DICTUREPLACEB"; +fun iDictSetBuilderIfExists(mutate self: cell, keyLen: int, key: int, value: builder): int + asm(value key self keyLen) "DICTIREPLACEB"; @pure -fun ~uDictSetBuilderIfExists(dict: cell, keyLen: int, key: int, value: builder): (cell, int) - asm(value key dict keyLen) "DICTUREPLACEB"; +fun uDictSetBuilderIfExists(mutate self: cell, keyLen: int, key: int, value: builder): int + asm(value key self keyLen) "DICTUREPLACEB"; @pure -fun iDictDeleteFirstAndGet(dict: cell, keyLen: int): (cell, int, slice, int) +fun iDictDeleteFirstAndGet(mutate self: cell, keyLen: int): (int, slice, int) asm(-> 0 2 1 3) "DICTIREMMIN" "NULLSWAPIFNOT2"; @pure -fun ~iDictDeleteFirstAndGet(dict: cell, keyLen: int): (cell, (int, slice, int)) - asm(-> 0 2 1 3) "DICTIREMMIN" "NULLSWAPIFNOT2"; - -@pure -fun uDictDeleteFirstAndGet(dict: cell, keyLen: int): (cell, int, slice, int) +fun uDictDeleteFirstAndGet(mutate self: cell, keyLen: int): (int, slice, int) asm(-> 0 2 1 3) "DICTUREMMIN" "NULLSWAPIFNOT2"; @pure -fun ~uDictDeleteFirstAndGet(dict: cell, keyLen: int): (cell, (int, slice, int)) - asm(-> 0 2 1 3) "DICTUREMMIN" "NULLSWAPIFNOT2"; - -@pure -fun sDictDeleteFirstAndGet(dict: cell, keyLen: int): (cell, slice, slice, int) - asm(-> 0 2 1 3) "DICTREMMIN" "NULLSWAPIFNOT2"; - -@pure -fun ~sDictDeleteFirstAndGet(dict: cell, keyLen: int): (cell, (slice, slice, int)) +fun sDictDeleteFirstAndGet(mutate self: cell, keyLen: int): (slice, slice, int) asm(-> 0 2 1 3) "DICTREMMIN" "NULLSWAPIFNOT2"; @pure -fun iDictDeleteLastAndGet(dict: cell, keyLen: int): (cell, int, slice, int) +fun iDictDeleteLastAndGet(mutate self: cell, keyLen: int): (int, slice, int) asm(-> 0 2 1 3) "DICTIREMMAX" "NULLSWAPIFNOT2"; @pure -fun ~iDictDeleteLastAndGet(dict: cell, keyLen: int): (cell, (int, slice, int)) - asm(-> 0 2 1 3) "DICTIREMMAX" "NULLSWAPIFNOT2"; - -@pure -fun uDictDeleteLastAndGet(dict: cell, keyLen: int): (cell, int, slice, int) +fun uDictDeleteLastAndGet(mutate self: cell, keyLen: int): (int, slice, int) asm(-> 0 2 1 3) "DICTUREMMAX" "NULLSWAPIFNOT2"; @pure -fun ~uDictDeleteLastAndGet(dict: cell, keyLen: int): (cell, (int, slice, int)) - asm(-> 0 2 1 3) "DICTUREMMAX" "NULLSWAPIFNOT2"; - -@pure -fun sDictDeleteLastAndGet(dict: cell, keyLen: int): (cell, slice, slice, int) - asm(-> 0 2 1 3) "DICTREMMAX" "NULLSWAPIFNOT2"; - -@pure -fun ~sDictDeleteLastAndGet(dict: cell, keyLen: int): (cell, (slice, slice, int)) +fun sDictDeleteLastAndGet(mutate self: cell, keyLen: int): (slice, slice, int) asm(-> 0 2 1 3) "DICTREMMAX" "NULLSWAPIFNOT2"; @pure -fun iDictGetFirst(dict: cell, keyLen: int): (int, slice, int) +fun iDictGetFirst(self: cell, keyLen: int): (int, slice, int) asm (-> 1 0 2) "DICTIMIN" "NULLSWAPIFNOT2"; @pure -fun uDictGetFirst(dict: cell, keyLen: int): (int, slice, int) +fun uDictGetFirst(self: cell, keyLen: int): (int, slice, int) asm (-> 1 0 2) "DICTUMIN" "NULLSWAPIFNOT2"; @pure -fun sDictGetFirst(dict: cell, keyLen: int): (slice, slice, int) +fun sDictGetFirst(self: cell, keyLen: int): (slice, slice, int) asm (-> 1 0 2) "DICTMIN" "NULLSWAPIFNOT2"; @pure -fun iDictGetFirstAsRef(dict: cell, keyLen: int): (int, cell, int) +fun iDictGetFirstAsRef(self: cell, keyLen: int): (int, cell, int) asm (-> 1 0 2) "DICTIMINREF" "NULLSWAPIFNOT2"; @pure -fun uDictGetFirstAsRef(dict: cell, keyLen: int): (int, cell, int) +fun uDictGetFirstAsRef(self: cell, keyLen: int): (int, cell, int) asm (-> 1 0 2) "DICTUMINREF" "NULLSWAPIFNOT2"; @pure -fun sDictGetFirstAsRef(dict: cell, keyLen: int): (slice, cell, int) +fun sDictGetFirstAsRef(self: cell, keyLen: int): (slice, cell, int) asm (-> 1 0 2) "DICTMINREF" "NULLSWAPIFNOT2"; @pure -fun iDictGetLast(dict: cell, keyLen: int): (int, slice, int) +fun iDictGetLast(self: cell, keyLen: int): (int, slice, int) asm (-> 1 0 2) "DICTIMAX" "NULLSWAPIFNOT2"; @pure -fun uDictGetLast(dict: cell, keyLen: int): (int, slice, int) +fun uDictGetLast(self: cell, keyLen: int): (int, slice, int) asm (-> 1 0 2) "DICTUMAX" "NULLSWAPIFNOT2"; @pure -fun sDictGetLast(dict: cell, keyLen: int): (slice, slice, int) +fun sDictGetLast(self: cell, keyLen: int): (slice, slice, int) asm (-> 1 0 2) "DICTMAX" "NULLSWAPIFNOT2"; @pure -fun iDictGetLastAsRef(dict: cell, keyLen: int): (int, cell, int) +fun iDictGetLastAsRef(self: cell, keyLen: int): (int, cell, int) asm (-> 1 0 2) "DICTIMAXREF" "NULLSWAPIFNOT2"; @pure -fun uDictGetLastAsRef(dict: cell, keyLen: int): (int, cell, int) +fun uDictGetLastAsRef(self: cell, keyLen: int): (int, cell, int) asm (-> 1 0 2) "DICTUMAXREF" "NULLSWAPIFNOT2"; @pure -fun sDictGetLastAsRef(dict: cell, keyLen: int): (slice, cell, int) +fun sDictGetLastAsRef(self: cell, keyLen: int): (slice, cell, int) asm (-> 1 0 2) "DICTMAXREF" "NULLSWAPIFNOT2"; @pure -fun iDictGetNext(dict: cell, keyLen: int, pivot: int): (int, slice, int) - asm(pivot dict keyLen -> 1 0 2) "DICTIGETNEXT" "NULLSWAPIFNOT2"; +fun iDictGetNext(self: cell, keyLen: int, pivot: int): (int, slice, int) + asm(pivot self keyLen -> 1 0 2) "DICTIGETNEXT" "NULLSWAPIFNOT2"; @pure -fun uDictGetNext(dict: cell, keyLen: int, pivot: int): (int, slice, int) - asm(pivot dict keyLen -> 1 0 2) "DICTUGETNEXT" "NULLSWAPIFNOT2"; +fun uDictGetNext(self: cell, keyLen: int, pivot: int): (int, slice, int) + asm(pivot self keyLen -> 1 0 2) "DICTUGETNEXT" "NULLSWAPIFNOT2"; @pure -fun iDictGetNextOrEqual(dict: cell, keyLen: int, pivot: int): (int, slice, int) - asm(pivot dict keyLen -> 1 0 2) "DICTIGETNEXTEQ" "NULLSWAPIFNOT2"; +fun iDictGetNextOrEqual(self: cell, keyLen: int, pivot: int): (int, slice, int) + asm(pivot self keyLen -> 1 0 2) "DICTIGETNEXTEQ" "NULLSWAPIFNOT2"; @pure -fun uDictGetNextOrEqual(dict: cell, keyLen: int, pivot: int): (int, slice, int) - asm(pivot dict keyLen -> 1 0 2) "DICTUGETNEXTEQ" "NULLSWAPIFNOT2"; +fun uDictGetNextOrEqual(self: cell, keyLen: int, pivot: int): (int, slice, int) + asm(pivot self keyLen -> 1 0 2) "DICTUGETNEXTEQ" "NULLSWAPIFNOT2"; @pure -fun iDictGetPrev(dict: cell, keyLen: int, pivot: int): (int, slice, int) - asm(pivot dict keyLen -> 1 0 2) "DICTIGETPREV" "NULLSWAPIFNOT2"; +fun iDictGetPrev(self: cell, keyLen: int, pivot: int): (int, slice, int) + asm(pivot self keyLen -> 1 0 2) "DICTIGETPREV" "NULLSWAPIFNOT2"; @pure -fun uDictGetPrev(dict: cell, keyLen: int, pivot: int): (int, slice, int) - asm(pivot dict keyLen -> 1 0 2) "DICTUGETPREV" "NULLSWAPIFNOT2"; +fun uDictGetPrev(self: cell, keyLen: int, pivot: int): (int, slice, int) + asm(pivot self keyLen -> 1 0 2) "DICTUGETPREV" "NULLSWAPIFNOT2"; @pure -fun iDictGetPrevOrEqual(dict: cell, keyLen: int, pivot: int): (int, slice, int) - asm(pivot dict keyLen -> 1 0 2) "DICTIGETPREVEQ" "NULLSWAPIFNOT2"; +fun iDictGetPrevOrEqual(self: cell, keyLen: int, pivot: int): (int, slice, int) + asm(pivot self keyLen -> 1 0 2) "DICTIGETPREVEQ" "NULLSWAPIFNOT2"; @pure -fun uDictGetPrevOrEqual(dict: cell, keyLen: int, pivot: int): (int, slice, int) - asm(pivot dict keyLen -> 1 0 2) "DICTUGETPREVEQ" "NULLSWAPIFNOT2"; +fun uDictGetPrevOrEqual(self: cell, keyLen: int, pivot: int): (int, slice, int) + asm(pivot self keyLen -> 1 0 2) "DICTUGETPREVEQ" "NULLSWAPIFNOT2"; /** @@ -435,13 +299,13 @@ fun uDictGetPrevOrEqual(dict: cell, keyLen: int, pivot: int): (int, slice, int) */ @pure -fun prefixDictGet(dict: cell, keyLen: int, key: slice): (slice, slice, slice, int) - asm(key dict keyLen) "PFXDICTGETQ" "NULLSWAPIFNOT2"; +fun prefixDictGet(self: cell, keyLen: int, key: slice): (slice, slice, slice, int) + asm(key self keyLen) "PFXDICTGETQ" "NULLSWAPIFNOT2"; @pure -fun prefixDictSet(dict: cell, keyLen: int, key: slice, value: slice): (cell, int) - asm(value key dict keyLen) "PFXDICTSET"; +fun prefixDictSet(mutate self: cell, keyLen: int, key: slice, value: slice): int + asm(value key self keyLen) "PFXDICTSET"; @pure -fun prefixDictDelete(dict: cell, keyLen: int, key: slice): (cell, int) - asm(key dict keyLen) "PFXDICTDEL"; +fun prefixDictDelete(mutate self: cell, keyLen: int, key: slice): int + asm(key self keyLen) "PFXDICTDEL"; diff --git a/crypto/smartcont/tolk-stdlib/tvm-lowlevel.tolk b/crypto/smartcont/tolk-stdlib/tvm-lowlevel.tolk index b4f44a1bf..91b35f2bd 100644 --- a/crypto/smartcont/tolk-stdlib/tvm-lowlevel.tolk +++ b/crypto/smartcont/tolk-stdlib/tvm-lowlevel.tolk @@ -21,9 +21,5 @@ fun transformSliceToContinuation(s: slice): continuation /// Moves a variable or a value [x] to the top of the stack. @pure -fun stackMoveToTop(x: X): X +fun stackMoveToTop(mutate self: X): void asm "NOP"; - -/// Mark a variable as used, such that the code which produced it won't be deleted even if it is not impure. -fun stackMoveToTopImpure(x: X): void // todo needs to be deleted, check verified contracts - asm "DROP"; diff --git a/tolk-tester/tests/a10.tolk b/tolk-tester/tests/a10.tolk index 1526a1220..d46397c6a 100644 --- a/tolk-tester/tests/a10.tolk +++ b/tolk-tester/tests/a10.tolk @@ -1,3 +1,5 @@ +import "@stdlib/tvm-lowlevel" + fun pair_first(p: [X, Y]): X asm "FIRST"; fun one(dummy: tuple) { @@ -35,13 +37,34 @@ fun test88(x: int) { @method_id(89) fun test89(last: int) { var t: tuple = createEmptyTuple(); - t~tuplePush(1); - t~tuplePush(2); - t~tuplePush(3); - t~tuplePush(last); + t.tuplePush(1); + t.tuplePush(2); + t.tuplePush(3); + t.tuplePush(last); return (t.tupleAt(0), t.tupleAt(t.tupleSize() - 1), t.tupleFirst(), t.tupleLast()); } +@pure fun get10() { return 10; } + +@method_id(91) +fun touchCodegen2() { + var f = get10(); + f.stackMoveToTop(); + return f; +} + +@method_id(92) +fun testDumpDontPolluteStack() { + var f = get10(); + f.debugPrint(); + debugPrint(10); + var s = "asdf"; + s.debugPrintString(); + debugDumpStack(); + debugPrintString("my"); + return (f, getRemainingBitsCount(s)); +} + @method_id(93) fun testStartBalanceCodegen1() { var t = getMyOriginalBalanceWithExtraCurrencies(); @@ -65,7 +88,27 @@ fun testStartBalanceCodegen2() { @testcase | 88 | 5 | 234 @testcase | 88 | 50 | 0 @testcase | 89 | 4 | 1 4 1 4 +@testcase | 91 | | 10 +@testcase | 92 | | 10 32 + +@fif_codegen +""" + touchCodegen2 PROC:<{ + // + get10 CALLDICT // f + }> +""" +@fif_codegen +""" + testDumpDontPolluteStack PROC:<{ + ... + DUMPSTK + x{6d79} PUSHSLICE // f s _9 + STRDUMP DROP + SBITS // f _11 + }> +""" @fif_codegen """ diff --git a/tolk-tester/tests/a6.tolk b/tolk-tester/tests/a6.tolk index da692a78e..7f2c39461 100644 --- a/tolk-tester/tests/a6.tolk +++ b/tolk-tester/tests/a6.tolk @@ -4,8 +4,6 @@ fun f(a: int, b: int, c: int, d: int, e: int, f: int): (int, int) { return (Dx/D,Dy/D); };;;; -fun mulDivR(x: int, y: int, z: int): int { return mulDivRound(x, y, z); } - fun calc_phi(): int { var n = 1; repeat (70) { n*=10; }; @@ -14,7 +12,7 @@ fun calc_phi(): int { do { (p,q)=(q,p+q); } while (q <= n); //;; - return mulDivR(p, n, q); + return mulDivRound(p, n, q); } fun calc_sqrt2(): int { @@ -26,7 +24,7 @@ fun calc_sqrt2(): int { var t = p + q; (p, q) = (q, t + q); } while (q <= n); - return mulDivR(p, n, q); + return mulDivRound(p, n, q); } fun calc_root(m: auto): auto { @@ -63,18 +61,14 @@ fun ataninv(base: int, q: int): int { // computes base*atan(1/q) return sum; } -fun arctanInv(base: int, q: int): int { return ataninv(base, q); } - fun calc_pi(): int { var base: int = 64; repeat (70) { base *= 10; } - return (arctanInv(base << 2, 5) - arctanInv(base, 239))~>>4; + return (ataninv(base << 2, 5) - ataninv(base, 239))~>>4; } -fun calcPi(): int { return calc_pi(); } - fun main(): int { - return calcPi(); + return calc_pi(); } /** diff --git a/tolk-tester/tests/a6_1.tolk b/tolk-tester/tests/a6_1.tolk index ecbf56dd0..4995c42d3 100644 --- a/tolk-tester/tests/a6_1.tolk +++ b/tolk-tester/tests/a6_1.tolk @@ -5,12 +5,18 @@ fun main(a: int, b: int, c: int, d: int, e: int, f: int): (int, int) { return (Dx / D, Dy / D); } +@method_id(101) +fun testDivMod(x: int, y: int) { + return [divMod(x, y), modDiv(x, y), mulDivMod(x, y, 10)]; +} + /** - method_id | in | out -@testcase | 0 | 1 1 1 -1 10 6 | 8 2 -@testcase | 0 | 817 -31 624 -241 132272 272276 | 132 -788 -@testcase | 0 | -886 562 498 -212 -36452 -68958 | -505 -861 -@testcase | 0 | 448 -433 -444 792 150012 -356232 | -218 -572 -@testcase | 0 | -40 -821 433 -734 -721629 -741724 | -206 889 -@testcase | 0 | -261 -98 -494 868 -166153 733738 | 263 995 + method_id | in | out +@testcase | 0 | 1 1 1 -1 10 6 | 8 2 +@testcase | 0 | 817 -31 624 -241 132272 272276 | 132 -788 +@testcase | 0 | -886 562 498 -212 -36452 -68958 | -505 -861 +@testcase | 0 | 448 -433 -444 792 150012 -356232 | -218 -572 +@testcase | 0 | -40 -821 433 -734 -721629 -741724 | -206 889 +@testcase | 0 | -261 -98 -494 868 -166153 733738 | 263 995 +@testcase | 101 | 112 3 | [ 37 1 1 37 33 6 ] */ diff --git a/tolk-tester/tests/allow_post_modification.tolk b/tolk-tester/tests/allow_post_modification.tolk index 079558864..5cfa2f3d8 100644 --- a/tolk-tester/tests/allow_post_modification.tolk +++ b/tolk-tester/tests/allow_post_modification.tolk @@ -4,35 +4,32 @@ fun unsafe_tuple(x: X): tuple fun inc(x: int, y: int): (int, int) { return (x + y, y * 10); } -fun ~inc(x: int, y: int): (int, int) { - (x, y) = inc(x, y); - return (x, y); -} - -fun ~incWrap(x: int, y: int): (int, int) { - return ~inc(x, y); +fun `~inc`(mutate self: int, y: int): int { + val (newX, newY) = inc(self, y); + self = newX; + return newY; } @method_id(11) fun test_return(x: int): (int, int, int, int, int, int, int) { - return (x, x~incWrap(x / 20), x, x = x * 2, x, x += 1, x); + return (x, x.`~inc`(x / 20), x, x = x * 2, x, x += 1, x); } @method_id(12) fun test_assign(x: int): (int, int, int, int, int, int, int) { - var (x1: int, x2: int, x3: int, x4: int, x5: int, x6: int, x7: int) = (x, x~inc(x / 20), x, x=x*2, x, x+=1, x); + var (x1: int, x2: int, x3: int, x4: int, x5: int, x6: int, x7: int) = (x, x.`~inc`(x / 20), x, x=x*2, x, x+=1, x); return (x1, x2, x3, x4, x5, x6, x7); } @method_id(13) fun test_tuple(x: int): tuple { - var t: tuple = unsafe_tuple([x, x~incWrap(x / 20), x, x = x * 2, x, x += 1, x]); + var t: tuple = unsafe_tuple([x, x.`~inc`(x / 20), x, x = x * 2, x, x += 1, x]); return t; } @method_id(14) fun test_tuple_assign(x: int): (int, int, int, int, int, int, int) { - var [x1: int, x2: int, x3: int, x4: int, x5: int, x6: int, x7: int] = [x, x~inc(x / 20), x, x = x * 2, x, x += 1, x]; + var [x1: int, x2: int, x3: int, x4: int, x5: int, x6: int, x7: int] = [x, x.`~inc`(x / 20), x, x = x * 2, x, x += 1, x]; return (x1, x2, x3, x4, x5, x6, x7); } @@ -42,7 +39,7 @@ fun foo1(x1: int, x2: int, x3: int, x4: int, x5: int, x6: int, x7: int): (int, i @method_id(15) fun test_call_1(x: int): (int, int, int, int, int, int, int) { - return foo1(x, x~inc(x / 20), x, x = x * 2, x, x += 1, x); + return foo1(x, x.`~inc`(x / 20), x, x = x * 2, x, x += 1, x); } fun foo2(x1: int, x2: int, x3456: (int, int, int, int), x7: int): (int, int, int, int, int, int, int) { @@ -52,7 +49,7 @@ fun foo2(x1: int, x2: int, x3456: (int, int, int, int), x7: int): (int, int, int @method_id(16) fun test_call_2(x: int): (int, int, int, int, int, int, int) { - return foo2(x, x~incWrap(x / 20), (x, x = x * 2, x, x += 1), x); + return foo2(x, x.`~inc`(x / 20), (x, x = x * 2, x, x += 1), x); } fun asm_func(x1: int, x2: int, x3: int, x4: int, x5: int, x6: int, x7: int): (int, int, int, int, int, int, int) @@ -61,28 +58,28 @@ asm @method_id(17) fun test_call_asm_old(x: int): (int, int, int, int, int, int, int) { - return asm_func(x, x += 1, x, x, x~inc(x / 20), x, x = x * 2); + return asm_func(x, x += 1, x, x, x.`~inc`(x / 20), x, x = x * 2); } @method_id(18) fun test_call_asm_new(x: int): (int, int, int, int, int, int, int) { - return asm_func(x, x~incWrap(x / 20), x, x = x * 2, x, x += 1, x); + return asm_func(x, x.`~inc`(x / 20), x, x = x * 2, x, x += 1, x); } global xx: int; @method_id(19) fun test_global(x: int): (int, int, int, int, int, int, int) { xx = x; - return (xx, xx~incWrap(xx / 20), xx, xx = xx * 2, xx, xx += 1, xx); + return (xx, xx.`~inc`(xx / 20), xx, xx = xx * 2, xx, xx += 1, xx); } @method_id(20) fun test_if_else(x: int): (int, int, int, int, int) { if (x > 10) { - return (x~inc(8), x + 1, x = 1, x <<= 3, x); + return (x.`~inc`(8), x + 1, x = 1, x <<= 3, x); } else { xx = 9; - return (x, x~inc(-4), x~inc(-1), x >= 1, x = x + xx); + return (x, x.`~inc`(-4), x.`~inc`(-1), x >= 1, x = x + xx); } } @@ -103,6 +100,12 @@ fun main() { @testcase | 20 | 80 | 80 89 1 8 8 @testcase | 20 | 9 | 9 -40 -10 -1 13 -@fif_codegen_avoid ~incWrap +@fif_codegen +""" + ~inc PROC:<{ + // self y + inc CALLDICT // self newY + }> +""" @code_hash 97139400653362069936987769894397430077752335662822462908581556703209313861576 */ diff --git a/tolk-tester/tests/asm_arg_order.tolk b/tolk-tester/tests/asm_arg_order.tolk index a2d66bc2d..b96e09ecb 100644 --- a/tolk-tester/tests/asm_arg_order.tolk +++ b/tolk-tester/tests/asm_arg_order.tolk @@ -2,10 +2,8 @@ fun empty_tuple2(): tuple asm "NIL"; @pure -fun tpush2(t: tuple, x: X): (tuple, ()) +fun tpush2(mutate self: tuple, x: X): void asm "TPUSH"; -fun myEmptyTuple(): tuple { return empty_tuple2(); } -fun myTuplePush(t: tuple, value: X): (tuple, ()) { return tpush2(t, value); } @pure fun asm_func_1(x: int, y: int, z: int): tuple @@ -20,31 +18,27 @@ asm (y z x -> 0) "3 TUPLE"; fun asm_func_4(a: int, b: (int, (int, int)), c: int): tuple asm (b a c -> 0) "5 TUPLE"; -fun asmFunc1(x: int, y: int, z: int): tuple { return asm_func_1(x, y, z); } -fun asmFunc3(x: int, y: int, z: int): tuple { return asm_func_3(x, y, z); } - @pure -fun asm_func_modify(a: tuple, b: int, c: int): (tuple, ()) -asm (c b a -> 0) "SWAP TPUSH SWAP TPUSH"; -fun asmFuncModify(a: tuple, b: int, c: int): (tuple, ()) { return asm_func_modify(a, b, c); } +fun asm_func_modify(mutate self: tuple, b: int, c: int): void +asm (c b self) "SWAP TPUSH SWAP TPUSH"; global t: tuple; fun foo(x: int): int { - t~myTuplePush(x); + t.tpush2(x); return x * 10; } @method_id(11) fun test_old_1(): (tuple, tuple) { t = empty_tuple2(); - var t2: tuple = asmFunc1(foo(11), foo(22), foo(33)); + var t2: tuple = asm_func_1(foo(11), foo(22), foo(33)); return (t, t2); } @method_id(12) fun test_old_2(): (tuple, tuple) { - t = myEmptyTuple(); + t = empty_tuple2(); var t2: tuple = asm_func_2(foo(11), foo(22), foo(33)); return (t, t2); } @@ -58,7 +52,7 @@ fun test_old_3(): (tuple, tuple) { @method_id(14) fun test_old_4(): (tuple, tuple) { - t = myEmptyTuple(); + t = empty_tuple2(); var t2: tuple = empty_tuple2(); // This actually computes left-to-right even without compute-asm-ltr t2 = asm_func_4(foo(11), (foo(22), (foo(33), foo(44))), foo(55)); @@ -69,21 +63,21 @@ fun test_old_4(): (tuple, tuple) { fun test_old_modify(): (tuple, tuple) { t = empty_tuple2(); var t2: tuple = empty_tuple2(); - t2~asmFuncModify(foo(22), foo(33)); + t2.asm_func_modify(foo(22), foo(33)); return (t, t2); } @method_id(16) fun test_old_dot(): (tuple, tuple) { t = empty_tuple2(); - var t2: tuple = foo(11).asmFunc3(foo(22), foo(33)); + var t2: tuple = foo(11).asm_func_3(foo(22), foo(33)); return (t, t2); } @method_id(21) fun test_new_1(): (tuple, tuple) { t = empty_tuple2(); - var t2: tuple = asmFunc1(foo(11), foo(22), foo(33)); + var t2: tuple = asm_func_1(foo(11), foo(22), foo(33)); return (t, t2); } @@ -112,7 +106,7 @@ fun test_new_4(): (tuple, tuple) { fun test_new_modify(): (tuple, tuple) { t = empty_tuple2(); var t2: tuple = empty_tuple2(); - t2~asm_func_modify(foo(22), foo(33)); + t2.asm_func_modify(foo(22), foo(33)); return (t, t2); } diff --git a/tolk-tester/tests/camel1.tolk b/tolk-tester/tests/camel1.tolk deleted file mode 100644 index 291206a95..000000000 --- a/tolk-tester/tests/camel1.tolk +++ /dev/null @@ -1,250 +0,0 @@ -// Here we test "functions that just wrap other functions" (camelCase in particular): -// > builder beginCell() { return begin_cell(); } -// Such functions, when called, are explicitly inlined during code generation (even without `inline` modifier). -// It means, that `beginCell()` is replaced to `begin_cell()` (and effectively to `NEWC`). -// Moreover, body of `beginCell` is NOT codegenerated at all. -// Hence, we can write camelCase wrappers (as well as more intelligible namings around stdlib functions) -// without affecting performance and even bytecode hashes. -// This works with ~functions also. And even works with wrappers of wrappers. -// Moreover, such wrappers can reorder input parameters, see a separate test camel2.tolk. -import "@stdlib/tvm-dicts" - -fun myBeginCell(): builder { return beginCell(); } -fun myEndCell(b: builder): cell { return endCell(b); } -fun myStoreRef(b: builder, c: cell): builder { return storeRef(b, c); } -fun myStoreUint(b: builder, i: int, bw: int): builder { return storeUint(b, i, bw); } - -// 'inline' is not needed actually, but if it exists, it's just ignored -@inline -@pure -fun myBeginParse(c: cell): slice { return beginParse(c); } -@inline -@pure -fun mySkipBits(s: slice, len: int): slice { return skipBits(s, len); } -@inline -@pure -fun ~mySkipBits(s: slice, len: int): (slice, ()) { return ~skipBits(s, len); } -@inline -@pure -fun ~myLoadUint(s: slice, len: int): (slice, int) { return loadUint(s, len); } - -fun myComputeDataSize(c: cell, maxCells: int): (int, int, int) { return calculateCellSizeStrict(c, maxCells); } - -fun dict__new(): cell { return createEmptyDict(); } -fun dict__iset(dict: cell, keyLen: int, index: int, value: slice): cell { return iDictSet(dict, keyLen, index, value); } -fun ~dict__iset(dict: cell, keyLen: int, index: int, value: slice): (cell, ()) { return ~iDictSet(dict, keyLen, index, value); } -fun dict__tryIGet(dict: cell, keyLen: int, index: int): (slice, int) { return iDictGet(dict, keyLen, index); } -fun dict__tryIGetMin(dict: cell, keyLen: int): (int, slice, int) { return iDictGetFirst(dict, keyLen); } - -@pure -fun triple_second(p: [X, Y, Z]): Y - asm "SECOND"; - -fun myEmptyTuple(): tuple { return createEmptyTuple(); } -fun emptyTuple1(): tuple { return myEmptyTuple(); } -fun emptyTuple11(): tuple { return emptyTuple1(); } -fun myTuplePush(t: tuple, value: X): tuple { return tuplePush(t, value); } -fun ~myTuplePush(t: tuple, value: X): (tuple, ()) { return ~tuplePush(t, value); } -fun myTupleAt(t: tuple, index: int): X { return tupleAt(t, index); } -fun tripleSecond(p: [X1, Y2, Z3]): Y2 { return triple_second(p); } -@pure -fun nullValue(): X -asm "PUSHNULL"; - -fun initial1(x: tuple): tuple { return x; } -fun initial2(x: tuple): tuple { return initial1(x); } - -// int add(int x, int y) { return x + y; } // this is also a wrapper, as its body is _+_(x,y) - -fun fake1(a: int, b: int, c: int): void -asm(a b c) "DROP DROP DROP"; -fun fake2(a: int, b: int, c: int): void -asm(b c a) "DROP DROP DROP"; -fun fake3(a: int, b: int, c: int): () -asm(c a b) "DROP DROP DROP"; -fun fake4(a: int, b: int, c: int): () -asm(c b a) "DROP DROP DROP"; - -fun fake1Wrapper(a: int, b: int, c: int) { return fake1(a, b, c); } -fun fake2Wrapper(a: int, b: int, c: int) { return fake2(a, b, c); } -fun fake3Wrapper(a: int, b: int, c: int) { return fake3(a, b, c); } -fun fake4Wrapper(a: int, b: int, c: int) { return fake4(a, b, c); } - -@method_id(101) -fun test1(): [int, int, int] { - var x: int = 1; - var y: int = 1; - var to_be_ref: cell = myBeginCell().myEndCell(); - var in_c: builder = myBeginCell().myStoreUint(123, 8); - in_c = myStoreRef(in_c, to_be_ref); - var (a, b, c) = myComputeDataSize(in_c.myEndCell(), 10); - assert(!(b != 8)) throw 101; - assert(!(c != 1), 101); - return [a, b + x, c + y]; -} - -@method_id(102) -fun test2(): [[int, int, int], int, int, int] { - var dict: cell = dict__new(); - dict = dict__iset(dict, 32, 456, myBeginCell().myStoreUint(4560, 32).myEndCell().myBeginParse()); - dict.dict__iset(32, 789, myBeginCell().myStoreUint(7890, 32).myEndCell().myBeginParse()); - dict~dict__iset(32, 123, myBeginCell().myStoreUint(0, 64).myStoreUint(1230, 32).myStoreUint(1231, 32).myStoreUint(1232, 32).myEndCell().myBeginParse()); - - var (mink, minv, _) = dict__tryIGetMin(dict, 32); - // skip 64 bits - minv~mySkipBits(16); - minv = minv.mySkipBits(16); - minv.mySkipBits(11); // does nothing - (minv, _) = ~mySkipBits(minv, 16); - mySkipBits(minv, 11); // does nothing - minv~mySkipBits(16); - // load 3*32 - var minv1 = minv~myLoadUint(32); - var minv2 = minv~myLoadUint(32); - var minv3 = minv~myLoadUint(32); - - var (_, found123) = dict__tryIGet(dict, 32, 123); - var (_, found456) = dict__tryIGet(dict, 32, 456); - var (_, found789) = dict__tryIGet(dict, 32, 789); - return [[minv1, minv2, minv3], found123, found456, found789]; -} - -@method_id(103) -fun test3(): tuple { - var with34: tuple = initial2(emptyTuple1()); - with34~myTuplePush(34); - - var t: tuple = emptyTuple11(); - t = myTuplePush(t, 12); - myTuplePush(t, emptyTuple11()); // does nothing - t~myTuplePush(emptyTuple1()); - t~myTuplePush(with34.myTupleAt(0)); - t.myTuplePush("123"s); // does nothing - - var tri: [cell, int, cell] = [nullValue(), 90 + 1, null]; - var f: int = tripleSecond(tri); - (t, _) = ~myTuplePush(t, f); - - return t; -} - -@method_id(104) -fun test4(a: int, b: int, c: int): int { - fake1Wrapper(a, b, c); - fake2Wrapper(a, b, c); - fake3Wrapper(a, b, c); - fake4Wrapper(a, b, c); - return 10; -} - -fun main(): int { - var x: int = now(); - return 30; -} - -/** - method_id | in | out -@testcase | 101 | | [ 2 9 2 ] -@testcase | 102 | | [ [ 1230 1231 1232 ] -1 -1 0 ] -@testcase | 103 | | [ 12 [] 34 91 ] - -@fif_codegen -""" - main PROC:<{ - // - 30 PUSHINT - }> -""" - -@fif_codegen -""" - test1 PROC:<{ - // - NEWC // _5 - ENDC // to_be_ref - NEWC // to_be_ref _8 - 123 PUSHINT // to_be_ref _8 _9=123 - SWAP // to_be_ref _9=123 _8 - 8 STU // to_be_ref in_c - STREF // in_c - ENDC // _16 - 10 PUSHINT // _16 _17=10 - CDATASIZE // a b c - OVER // a b c b - 8 NEQINT // a b c _21 - 101 THROWIF - DUP // a b c c - 1 NEQINT // a b c _26 - 101 THROWIF - SWAP // a c b - INC // a c _30 - SWAP // a _30 c - INC // a _30 _31 - TRIPLE // _29 - }> -""" - -@fif_codegen -""" - test2 PROC:<{ - ... - 16 PUSHINT // dict minv _45=16 - SDSKIPFIRST // dict minv - 16 PUSHINT // dict minv _47=16 - SDSKIPFIRST // dict minv - 16 PUSHINT // dict minv _52=16 - SDSKIPFIRST // dict minv - 16 PUSHINT // dict minv _57=16 - SDSKIPFIRST // dict minv - ... - 32 PUSHINT // dict minv1 minv2 minv3 found123 found456 _83=32 - 789 PUSHINT // dict minv1 minv2 minv3 found123 found456 _83=32 _84=789 - s0 s7 s7 XCHG3 // found456 minv1 minv2 minv3 found123 _84=789 dict _83=32 - DICTIGET - NULLSWAPIFNOT // found456 minv1 minv2 minv3 found123 _101 _102 - NIP // found456 minv1 minv2 minv3 found123 found789 - ... - 4 TUPLE // _86 - }> -""" - -@fif_codegen -""" - test3 PROC:<{ - // - NIL // _1 - initial1 CALLDICT // with34 - ... - TRIPLE // t tri - SECOND // t f - TPUSH // t - }> -""" - -@fif_codegen -""" - test4 PROC:<{ - // a b c - s2 s1 s0 PUSH3 // a b c a b c - DROP DROP DROP - s1 s0 s2 PUSH3 // a b c b c a - DROP DROP DROP - s0 s2 s1 PUSH3 // a b c c a b - DROP DROP DROP - s0 s2 XCHG // c b a - DROP DROP DROP - 10 PUSHINT // _7=10 - }> -""" - -@fif_codegen_avoid DECLPROC myBeginCell -@fif_codegen_avoid DECLPROC myStoreUint -@fif_codegen_avoid DECLPROC myStoreRef -@fif_codegen_avoid DECLPROC myComputeDataSize -@fif_codegen_avoid DECLPROC tryIdictGet -@fif_codegen_avoid DECLPROC myEmptyTuple -@fif_codegen_avoid DECLPROC myStoreUint -@fif_codegen_avoid DECLPROC initial2 -@fif_codegen_avoid DECLPROC add -@fif_codegen_avoid DECLPROC increase -*/ diff --git a/tolk-tester/tests/camel2.tolk b/tolk-tester/tests/camel2.tolk deleted file mode 100644 index 51344b843..000000000 --- a/tolk-tester/tests/camel2.tolk +++ /dev/null @@ -1,204 +0,0 @@ -// Here we also test "functions that just wrap other functions" like in camel1.tolk, -// but when they reorder arguments, e.g. -// > T f(x,y) { return anotherF(y,x); } -// This also works, even for wrappers of wrappers, even if anotherF is asm(with reorder). -// But swapping arguments may sometimes lead to bytecode changes (see test2), -// both with compute-asm-ltr and without it. - -fun myBeginCell(): builder { return beginCell(); } -fun myEndCell(b: builder): cell { return endCell(b); } -fun myStoreRef1(b: builder, c: cell): builder { return storeRef(b, c); } -fun myStoreRef2(c: cell, b: builder): builder { return storeRef(b, c); } -fun myStoreUint1(b: builder, x: int, bw: int): builder { return storeUint(b, x, bw); } -fun myStoreUint2(b: builder, bw: int, x: int): builder { return storeUint(b, x, bw); } - -fun computeDataSize1(c: cell, maxCells: int): (int, int, int) { return calculateCellSizeStrict(c, maxCells); } -fun computeDataSize2(maxCells: int, c: cell): (int, int, int) { return calculateCellSizeStrict(c, maxCells); } - -fun fake(a: int, b: int, c: int): void -asm "DROP DROP DROP"; -fun fake2(b: int, c: int, a: int) { return fake(a,b,c); } -fun fake3(c: int, a: int, b: int) { return fake(a,b,c); } -fun fake4(c: int, b: int, a: int) { return fake(a,b,c); } - -@method_id(101) -fun test1(): (int, int, int) { - var x: int = 1; - var y: int = 1; - var to_be_ref: cell = myBeginCell().myEndCell(); - var in_c: builder = myBeginCell().myStoreUint1(123, 8); - in_c = myStoreRef1(in_c, to_be_ref); - var (a, b, c) = computeDataSize1(in_c.myEndCell(), 10); - assert(!0, 101); - return (a, b + x, c + y); -} - -@method_id(102) -fun test2(): (int, int, int) { - var x: int = 1; - var y: int = 1; - var to_be_ref: cell = myBeginCell().myEndCell(); - var in_c: builder = myBeginCell().myStoreUint2(8, 123); - in_c = myStoreRef2(to_be_ref, in_c); - var (a, b, c) = computeDataSize2(10, in_c.myEndCell()); - return (a, b + x, c + y); -} - -@method_id(103) -fun test3(): (int, int, int) { - var x: int = 1; - var y: int = 1; - var to_be_ref: cell = beginCell().endCell(); - var in_c: builder = beginCell().storeUint(123, 8); - in_c = storeRef(in_c, to_be_ref); - var (a, b, c) = calculateCellSizeStrict(in_c.endCell(), 10); - return (a, b + x, c + y); -} - -fun beginCell1(): builder { return beginCell(); } -fun beginCell11(): builder { return beginCell1(); } -fun beginCell111(): builder { return beginCell11(); } - -fun endCell1(b: builder): cell { return endCell(b); } -fun endCell11(b: builder): cell { return endCell1(b); } - -fun beginParse1(c: cell): slice { return beginParse(c); } -fun beginParse11(c: cell): slice { return beginParse1(c); } - -fun storeInt1(b: builder, bw: int, x: int): builder { return storeInt(b, x, bw); } -fun storeInt11(bw: int, x: int, b: builder): builder { return storeInt1(b, bw, x); } -fun storeInt111(b: builder, x: int, bw: int): builder { return storeInt11(bw, x, b); } - -@method_id(104) -fun test4(): slice { - var b: builder = beginCell111(); - b = storeInt11(32, 1, b); - b = storeInt111(b, 2, 32).storeInt111(3, 32); - return b.endCell11().beginParse11(); -} - -@method_id(105) -fun test5(a: int, b: int, c: int): int { - fake(a, b, c); - fake2(b, c, a); - fake3(c, a, b); - fake4(c, b, a); - return a; -} - -fun main() { - throw 0; -} - -/** - method_id | in | out -@testcase | 101 | | 2 9 2 -@testcase | 102 | | 2 9 2 -@testcase | 103 | | 2 9 2 -@testcase | 104 | | CS{Cell{0018000000010000000200000003} bits: 0..96; refs: 0..0} - -test1 and test3 fif code is absolutely identical, test2 (due to reorder) is a bit different: - -@fif_codegen -""" - test1 PROC:<{ - // - NEWC // _5 - ENDC // to_be_ref - NEWC // to_be_ref _8 - 123 PUSHINT // to_be_ref _8 _9=123 - SWAP // to_be_ref _9=123 _8 - 8 STU // to_be_ref in_c - STREF // in_c - ENDC // _16 - 10 PUSHINT // _16 _17=10 - CDATASIZE // a b c - SWAP // a c b - INC // a c _23 - SWAP // a _23 c - INC // a _23 _24 - }> -""" - -@fif_codegen -""" - test2 PROC:<{ - // - NEWC // _5 - ENDC // to_be_ref - NEWC // to_be_ref _8 - 123 PUSHINT // to_be_ref _8 _10=123 - SWAP // to_be_ref _10=123 _8 - 8 STU // to_be_ref in_c - STREF // in_c - 10 PUSHINT - SWAP - ENDC - SWAP - CDATASIZE // a b c - SWAP // a c b - INC // a c _19 - SWAP // a _19 c - INC // a _19 _20 - }> -""" - -@fif_codegen -""" - test3 PROC:<{ - // - NEWC // _5 - ENDC // to_be_ref - NEWC // to_be_ref _8 - 123 PUSHINT // to_be_ref _8 _9=123 - SWAP // to_be_ref _9=123 _8 - 8 STU // to_be_ref in_c - STREF // in_c - ENDC // _16 - 10 PUSHINT // _16 _17=10 - CDATASIZE // a b c - SWAP // a c b - INC // a c _19 - SWAP // a _19 c - INC // a _19 _20 - }> -""" - -@fif_codegen -""" - test4 PROC:<{ - // - NEWC // b - 1 PUSHINT // b _3=1 - SWAP // _3=1 b - 32 STI // b - 2 PUSHINT - SWAP // _5=2 b - 32 STI - 3 PUSHINT - SWAP - 32 STI // b - ENDC // _11 - CTOS // _12 - }> -""" - -@fif_codegen -""" - test5 PROC:<{ - // a b c - s2 s1 s0 PUSH3 // a b c a b c - DROP DROP DROP - s2 s1 s0 PUSH3 // a b c a b c - DROP DROP DROP - s2 s1 s0 PUSH3 // a b c a b c - DROP DROP DROP - s2 PUSH - -ROT // a a b c - DROP DROP DROP - }> -""" - -@fif_codegen_avoid myStoreUint1 -@fif_codegen_avoid myStoreUint2 -*/ diff --git a/tolk-tester/tests/camel3.tolk b/tolk-tester/tests/camel3.tolk deleted file mode 100644 index 23b16e5fb..000000000 --- a/tolk-tester/tests/camel3.tolk +++ /dev/null @@ -1,95 +0,0 @@ -// Here we test that if you declare a wrapper like -// > builder beginCell() { return begin_cell(); } -// but use it NOT only as a direct call, BUT as a 1-st class function -// (save to a variable, return from a function, etc.) -// it also works, since a function becomes codegenerated (though direct calls are expectedly inlined). - -fun myBeginCell(): builder { return beginCell(); } -fun myEndCell(b: builder): cell { return endCell(b); } -fun myStoreRef(b: builder, c: cell): builder { return storeRef(b, c); } -fun myStoreUint3(i: int, bw: int, b: builder): builder { return storeUint(b, i, bw); } - -fun computeDataSize2(maxCells: int, c: cell): (int, int, int) { return calculateCellSizeStrict(c, maxCells); } - -fun myEmptyTuple(): tuple { return createEmptyTuple(); } -fun myTuplePush(t: tuple, value: X): tuple { return tuplePush(t, value); } -fun ~myTuplePush(t: tuple, value: X): (tuple, ()) { return ~tuplePush(t, value); } -fun myTupleGetFirst(t: tuple): X { return tupleFirst(t); } - - -@inline -fun getBeginEnd(): (auto, auto) { - return (myBeginCell, myEndCell); -} - -fun begAndStore(beg: auto, store: auto, x: int): builder { - return store(x, 8, beg()); -} - -fun test1(): (int, int, int) { - var (_, computer) = (0, computeDataSize2); - var (beg, end) = getBeginEnd(); - - var t: tuple = myEmptyTuple(); - t~myTuplePush(myStoreRef); - var refStorer = myTupleGetFirst(t); - - var x: int = 1; - var y: int = 1; - var to_be_ref: cell = myBeginCell().myEndCell(); - var in_c: builder = begAndStore(beg, myStoreUint3, 123); - in_c = refStorer(in_c, to_be_ref); - var (a, b, c) = computer(10, end(in_c)); - return (a, b + x, c + y); -} - -fun main(): (int, int, int) { - return test1(); -} - -/** - method_id | in | out -@testcase | 0 | | 2 9 2 - -@fif_codegen DECLPROC myBeginCell -@fif_codegen DECLPROC computeDataSize2 - -@fif_codegen -""" - myStoreUint3 PROC:<{ - // i bw b - SWAP // i b bw - STUX // _3 - }> -""" - -@fif_codegen -""" - myStoreRef PROC:<{ - // b c - SWAP // c b - STREF // _2 - }> -""" - -@fif_codegen -""" - CONT:<{ - computeDataSize2 CALLDICT - }> // computer - getBeginEnd INLINECALLDICT // computer beg end - NIL // computer beg end t - ... - NEWC // computer beg end refStorer _19 - ENDC // computer beg end refStorer to_be_ref - ... - CONT:<{ - myStoreUint3 CALLDICT - }> - ... - begAndStore CALLDICT // computer to_be_ref end refStorer in_c -""" - -@fif_codegen_avoid myEmptyTuple -@fif_codegen_avoid myTuplePush -*/ diff --git a/tolk-tester/tests/camel4.tolk b/tolk-tester/tests/camel4.tolk deleted file mode 100644 index a33e3fd36..000000000 --- a/tolk-tester/tests/camel4.tolk +++ /dev/null @@ -1,145 +0,0 @@ -// Here we test that a just-return function is not a valid wrapper, it will not be inlined. -// (doesn't use all arguments, has different pureness, has method_id, etc.) - -fun myStoreUint(b: builder, x: int, unused: int): builder { return storeUint(b, x, x); } -fun throwIf(excNo: int, cond: int) { assert(!cond) throw excNo; } - -fun initial1(x: auto) { return x; } -fun initial2(x: auto) { return initial1(x); } - -@pure -fun asm_func_4(a: int, b: (int, (int, int)), c: int): tuple -asm (b a c -> 0) "5 TUPLE"; -fun asmFunc4(a: int, b: (int, (int, int)), c: int): tuple { return asm_func_4(a, b, c); } - -fun postpone_elections(): int { - return false; -} - -fun setAndGetData(ret: int): int { - var c: cell = beginCell().storeUint(ret, 8).endCell(); - setContractData(c); - var s: slice = getContractData().beginParse(); - throwIf(101, 0); - return s~loadUint(8); -} - -fun setAndGetDataWrapper(ret: int): int { - return setAndGetData(ret); -} - -@method_id(101) -fun test1(): int { - var c: cell = beginCell().myStoreUint(32, 10000000).endCell(); - var s: slice = c.beginParse(); - return s~loadUint(32); -} - -get fun test2(ret: int): int { - return setAndGetDataWrapper(ret); -} - -@method_id(103) -fun test3(): int { - return initial2(10); -} - -global t: tuple; - -fun foo(x: int): int { - t~tuplePush(x); - return x * 10; -} - -@method_id(104) -fun test4(): (tuple, tuple) { - t = createEmptyTuple(); - var t2: tuple = asmFunc4(foo(11), (foo(22), (foo(33), foo(44))), foo(55)); - return (t, t2); -} - -@method_id(105) -fun test5(): int { - if (1) { - return postpone_elections(); - } - return 123; -} - -@method_id(106) -fun test6(): int { - return add2(1, 2); // doesn't inline since declared below -} - -fun main(ret: int): int { - return setAndGetDataWrapper(ret); -} - -fun onExternalMessage(ret: int): int { - return setAndGetData(ret); -} - -// currently, functions implemented after usage, can't be inlined, since inlining is legacy, not AST -fun add2(x: int, y: int): int { return x + y; } - -/** - method_id | in | out -@testcase | 101 | | 32 -@testcase | 103 | | 10 -@testcase | 104 | | [ 11 22 33 44 55 ] [ 220 330 440 110 550 ] -@testcase | 105 | | 0 -@testcase | 106 | | 3 -@testcase | 74435 | 99 | 99 -@testcase | 0 | 98 | 98 -@testcase | -1 | 97 | 97 - -@fif_codegen DECLPROC myStoreUint -@fif_codegen DECLPROC throwIf -@fif_codegen DECLPROC postpone_elections -@fif_codegen DECLPROC add2 -@fif_codegen 74435 DECLMETHOD test2 - -@fif_codegen -""" - test3 PROC:<{ - // - 10 PUSHINT // _0=10 - initial2 CALLDICT // _1 - }> -""" - -@fif_codegen -""" - test2 PROC:<{ - // ret - setAndGetData CALLDICT // _1 - }> -""" - -@fif_codegen -""" - 11 PUSHINT - foo CALLDICT - 22 PUSHINT - foo CALLDICT - 33 PUSHINT - foo CALLDICT - 44 PUSHINT - foo CALLDICT - 55 PUSHINT - foo CALLDICT - asmFunc4 CALLDICT // t2 -""" - -@fif_codegen -""" - test6 PROC:<{ - // - 1 PUSHINT // _0=1 - 2 PUSHINT // _0=1 _1=2 - add2 CALLDICT // _2 - }> -""" - -@fif_codegen_avoid setAndGetDataWrapper -*/ diff --git a/tolk-tester/tests/cells-slices.tolk b/tolk-tester/tests/cells-slices.tolk index 1bf0742d9..e1d28b8b1 100644 --- a/tolk-tester/tests/cells-slices.tolk +++ b/tolk-tester/tests/cells-slices.tolk @@ -1,121 +1,136 @@ -fun store_u32(b: builder, value: int): builder { - return b.storeUint(value, 32); -} -fun ~store_u32(b: builder, value: int): (builder, ()) { - return ~storeUint(b, value, 32); +fun store_u32(mutate self: builder, value: int): self { + return self.storeUint(value, 32); } -fun load_u32(cs: slice): (slice, int) { - return cs.loadUint(32); +fun load_u32(mutate self: slice): int { + return self.loadUint(32); } -fun my_loadInt(s: slice, len: int): (slice, int) - asm(s len -> 1 0) "LDIX"; // top is "value slice" -fun my_storeInt(b: builder, x: int, len: int): builder - asm(x b len) "STIX"; -fun ~my_storeInt(b: builder, x: int, len: int): (builder, ()) - asm(x b len) "STIX"; +fun myLoadInt(mutate self: slice, len: int): int + asm(-> 1 0) "LDIX"; +fun myStoreInt(mutate self: builder, x: int, len: int): self + asm(x self len) "STIX"; @method_id(101) fun test1(): [int,int,int,int,int] { var b: builder = beginCell().storeUint(1, 32); b = b.storeUint(2, 32); - b~storeUint(3, 32); + b.storeUint(3, 32); b = b.store_u32(4); - b~store_u32(5); + b.store_u32(5); var cs: slice = b.endCell().beginParse(); - var (cs redef, one: int) = cs.loadUint(32); - var (two: int, three: int) = (cs~loadUint(32), cs~load_u32()); - var (cs redef, four: int) = cs.load_u32(); - var five: int = cs~load_u32(); + var one: int = cs.loadUint(32); + var (two: int, three: int) = (cs.loadUint(32), cs.load_u32()); + var four: int = cs.load_u32(); + var five: int = cs.load_u32(); return [one,two,three,four,five]; } @method_id(102) fun test2(): [int,int,int] { - var b: builder = beginCell().my_storeInt(1, 32); - b = b.my_storeInt(2, 32); - b~my_storeInt(3, 32); + var b: builder = beginCell().myStoreInt(1, 32); + b = b.myStoreInt(2, 32); + b.myStoreInt(3, 32); var cs: slice = b.endCell().beginParse(); - var (cs redef, one: int) = cs.my_loadInt(32); - var (two: int, three: int) = (cs~my_loadInt(32), cs~my_loadInt(32)); + var one: int = cs.myLoadInt(32); + var (two: int, three: int) = (cs.myLoadInt(32), cs.myLoadInt(32)); return [one,two,three]; } @method_id(103) fun test3(ret: int): int { - var (_, same: int) = beginCell().storeUint(ret,32).endCell().beginParse().loadUint(32); + val same: int = beginCell().storeUint(ret,32).endCell().beginParse().loadUint(32); return same; } @method_id(104) fun test4(): [int,int] { - var b: builder = my_storeInt(beginCell(), 1, 32); - b = storeInt(storeInt(b, 2, 32), 3, 32); + var b: builder = beginCell().myStoreInt(1, 32); + b = b.storeInt(2, 32).storeInt(3, 32); var cs: slice = b.endCell().beginParse(); - var cs32: slice = cs.getFirstBits(32); // todo s.first_bits()~loadUint() doesn't work, 'lvalue expected' - var (one, _, three) = (cs32~loadInt(32), cs~skipBits(64), cs~load_u32()); + var (one, _, three) = (cs.getFirstBits(32).loadUint(32), cs.skipBits(64), cs.load_u32()); return [one,three]; } @method_id(105) fun test5(): [int,int] { - var cref: cell = endCell(store_u32(beginCell(), 105)); + var cref: cell = endCell(beginCell().store_u32(105)); var c: cell = beginCell().storeRef(cref).storeRef(cref).store_u32(1).endCell(); var cs: slice = beginParse(c); - // todo I want cs~loadRef().beginParse()~load_u32(), but 'lvalue expected' - var ref1 = cs~loadRef().beginParse(); - var ref2 = cs~loadRef().beginParse(); - var sto5x2: int = ref1~load_u32() + ref2~loadUint(32); - return [sto5x2, cs~load_u32()]; + var sto5x2: int = cs.loadRef().beginParse().load_u32() + cs.loadRef().beginParse().loadUint(32); + return [sto5x2, cs.load_u32()]; +} + +@method_id(106) +fun test6() { + return beginCell().storeUint(1, 32).storeUint(2, 32).storeUint(3, 32); } +@method_id(107) +fun test7() { + // since .store() methods now mutate, this piece of code works not as earlier (mutates uri_builder) + var uri_builder = beginCell(); + var uri_slice = uri_builder.storeSlice(".json").endCell().beginParse(); + var image_slice = uri_builder.storeSlice(".png").endCell().beginParse(); + return (uri_builder.getBuilderBitsCount(), uri_slice.getRemainingBitsCount(), image_slice.getRemainingBitsCount()); +} + +@method_id(108) +fun test8() { + var uri_builder = beginCell(); + var fresh = uri_builder; + var uri_slice = fresh.storeSlice(".json").endCell().beginParse(); + var fresh redef = uri_builder; + var image_slice = fresh.storeSlice(".png").endCell().beginParse(); + return (uri_builder.getBuilderBitsCount(), uri_slice.getRemainingBitsCount(), image_slice.getRemainingBitsCount()); +} -fun ~sumNumbersInSlice(s: slice): (slice, int) { + +fun sumNumbersInSlice(mutate self: slice): int { var result = 0; - while (!s.isEndOfSliceBits()) { - result += s~loadUint(32); + while (!self.isEndOfSliceBits()) { + result += self.loadUint(32); } - return (s, result); + return result; } -@method_id(106) -fun test6() { +@method_id(110) +fun test10() { var ref = beginCell().storeInt(100, 32).endCell(); var s: slice = beginCell().storeInt(1, 32).storeInt(2, 32).storeRef(ref).endCell().beginParse(); - var result = (getRemainingBitsCount(s), s~sumNumbersInSlice(), getRemainingBitsCount(s), isEndOfSlice(s), isEndOfSliceBits(s), isEndOfSliceRefs(s)); - var ref2: cell = s~loadRef(); + var result = (getRemainingBitsCount(s), s.sumNumbersInSlice(), getRemainingBitsCount(s), isEndOfSlice(s), isEndOfSliceBits(s), isEndOfSliceRefs(s)); + var ref2: cell = s.loadRef(); var s2: slice = ref2.beginParse(); s.assertEndOfSlice(); - return (result, s2~loadInt(32), s2.isEndOfSlice()); + return (result, s2.loadInt(32), s2.isEndOfSlice()); } -@method_id(107) -fun test7() { +@method_id(111) +fun test11() { var s: slice = beginCell().storeInt(1, 32).storeInt(2, 32).storeInt(3, 32).storeInt(4, 32).storeInt(5, 32).storeInt(6, 32).storeInt(7, 32).endCell().beginParse(); var size1 = getRemainingBitsCount(s); - s~skipBits(32); + s.skipBits(32); var s1: slice = s.getFirstBits(64); - var n1 = s1~loadInt(32); + var n1 = s1.loadInt(32); var size2 = getRemainingBitsCount(s); - s~loadInt(32); + s.loadInt(32); var size3 = getRemainingBitsCount(s); - s~removeLastBits(32); + s.removeLastBits(32); var size4 = getRemainingBitsCount(s); - var n2 = s~loadInt(32); + var n2 = s.loadInt(32); var size5 = getRemainingBitsCount(s); return (n1, n2, size1, size2, size3, size4, size5); } -@method_id(108) -fun test108() { +@method_id(112) +fun test12() { var (result1, result2) = (0, 0); try { beginCell().storeRef(beginCell().endCell()).endCell().beginParse().assertEndOfSlice(); @@ -132,45 +147,45 @@ fun test108() { return (result1, result2); } -@method_id(109) -fun test109() { +@method_id(113) +fun test13() { var ref2 = beginCell().storeInt(1, 32).endCell(); var ref1 = beginCell().storeInt(1, 32).storeRef(ref2).endCell(); var c = beginCell().storeInt(444, 32).storeRef(ref1).storeRef(ref1).storeRef(ref1).storeRef(ref2).storeInt(4, 32).endCell(); var (n_cells1, n_bits1, n_refs1) = c.calculateCellSizeStrict(10); var s = c.beginParse(); - s~loadRef(); - s~loadRef(); - var n = s~loadInt(32); + s.loadRef(); + s.loadRef(); + var n = s.loadInt(32); var (n_cells2, n_bits2, n_refs2) = s.calculateSliceSizeStrict(10); return ([n_cells1, n_bits1, n_refs1], [n_cells2, n_bits2, n_refs2], n); } -@method_id(110) +@method_id(114) fun test110(x: int) { var s = beginCell().storeBool(x < 0).storeBool(0).storeBool(x).endCell().beginParse(); - return (s~loadBool(), s~loadBool(), s~loadBool()); + return (s.loadBool(), s.loadBool(), s.loadBool()); } -@method_id(111) +@method_id(115) fun test111() { var s = beginCell().storeMessageOp(123).storeMessageQueryId(456) .storeAddressNone().storeAddressNone() .storeUint(0, 32) .storeUint(123, 32).storeUint(456, 64).storeUint(789, 64) .endCell().beginParse(); - var op1 = s~loadUint(32); - var q1 = s~loadUint(64); + var op1 = s.loadUint(32); + var q1 = s.loadUint(64); if (s.addressIsNone()) { - s~skipBits(2); + s.skipBits(2); } - if (s~loadBool() == 0) { - assert(s~loadBool() == 0) throw 444; - s~skipBits(32); + if (s.loadBool() == 0) { + assert(s.loadBool() == 0) throw 444; + s.skipBouncedPrefix(); } - var op2 = s~loadMessageOp(); - var q2 = s~loadMessageQueryId(); - s~skipBits(64); + var op2 = s.loadMessageOp(); + var q2 = s.loadMessageQueryId(); + s.skipBits(64); s.assertEndOfSlice(); assert(isMessageBounced(0x001)) throw 444; return (op1, q1, op2, q2); @@ -186,11 +201,31 @@ fun main(): int { @testcase | 103 | 103 | 103 @testcase | 104 | | [ 1 3 ] @testcase | 105 | | [ 210 1 ] -@testcase | 106 | | 64 3 0 0 -1 0 100 -1 -@testcase | 107 | | 2 3 224 192 160 128 96 -@testcase | 108 | | 9 100 -@testcase | 109 | | [ 3 128 5 ] [ 2 96 3 ] 444 -@testcase | 110 | -1 | -1 0 -1 -@testcase | 110 | 0 | 0 0 0 -@testcase | 111 | | 123 456 123 456 +@testcase | 107 | | 72 40 72 +@testcase | 108 | | 0 40 32 +@testcase | 110 | | 64 3 0 0 -1 0 100 -1 +@testcase | 111 | | 2 3 224 192 160 128 96 +@testcase | 112 | | 9 100 +@testcase | 113 | | [ 3 128 5 ] [ 2 96 3 ] 444 +@testcase | 114 | -1 | -1 0 -1 +@testcase | 114 | 0 | 0 0 0 +@testcase | 115 | | 123 456 123 456 + +Note, that since 'compute-asm-ltr' became on be default, chaining methods codegen is not quite optimal. +@fif_codegen +""" + test6 PROC:<{ + // + NEWC // _1 + 1 PUSHINT // _1 _2=1 + SWAP // _2=1 _1 + 32 STU // _0 + 2 PUSHINT // _0 _6=2 + SWAP // _6=2 _0 + 32 STU // _0 + 3 PUSHINT // _0 _10=3 + SWAP // _10=3 _0 + 32 STU // _0 + }> +""" */ diff --git a/tolk-tester/tests/co1.tolk b/tolk-tester/tests/co1.tolk index 5ad9d8e41..f124e1de8 100644 --- a/tolk-tester/tests/co1.tolk +++ b/tolk-tester/tests/co1.tolk @@ -43,9 +43,6 @@ asm "SDEQ"; fun stslicer(b: builder, s: slice): builder asm "STSLICER"; -fun myStoreUint(b: builder, x: int, len: int): builder { return storeUint(b, x, len); } -fun endSlice(b: builder): slice { return endcs(b); } - fun main() { var i1: int = iget1(); var i2: int = iget2(); @@ -59,8 +56,8 @@ fun main() { var s2: slice = sget2(); var s3: slice = newc().stslicer(str1).stslicer(str2r).endcs(); - assert(sdeq(s1, newc().myStoreUint(str1int, 12 * nibbles).endcs())) throw int111; - assert(sdeq(s2, newc().storeUint(str2int, 6 * nibbles).endSlice())) throw 112; + assert(sdeq(s1, newc().storeUint(str1int, 12 * nibbles).endcs())) throw int111; + assert(sdeq(s2, newc().storeUint(str2int, 6 * nibbles).endcs())) throw 112; assert(sdeq(s3, newc().storeUint(0x636f6e737431AABBCC, 18 * nibbles).endcs())) throw 113; var i4: int = iget240(); diff --git a/tolk-tester/tests/dicts-demo.tolk b/tolk-tester/tests/dicts-demo.tolk index 5852b175c..291bd2ea4 100644 --- a/tolk-tester/tests/dicts-demo.tolk +++ b/tolk-tester/tests/dicts-demo.tolk @@ -1,8 +1,7 @@ import "@stdlib/tvm-dicts" -fun ~addIntToIDict(iDict: cell, key: int, number: int): (cell, ()) { - iDict~iDictSetBuilder(32, key, beginCell().storeInt(number, 32)); - return (iDict, ()); +fun addIntToIDict(mutate self: cell, key: int, number: int): void { + return self.iDictSetBuilder(32, key, beginCell().storeInt(number, 32)); } fun calculateDictLen(d: cell) { @@ -15,40 +14,40 @@ fun calculateDictLen(d: cell) { return len; } -fun ~loadTwoDigitNumberFromSlice(s: slice): (slice, int) { - var n1 = s~loadInt(8); - var n2 = s~loadInt(8); - return (s, (n1 - 48) * 10 + (n2 - 48)); +fun loadTwoDigitNumberFromSlice(mutate self: slice): int { + var n1 = self.loadInt(8); + var n2 = self.loadInt(8); + return (n1 - 48) * 10 + (n2 - 48); } @method_id(101) fun test101(getK1: int, getK2: int, getK3: int) { var dict = createEmptyDict(); - dict~uDictSetBuilder(32, 1, beginCell().storeUint(1, 32)); - var (old1: slice, found1) = dict~uDictSetAndGet(32, getK1, beginCell().storeUint(2, 32).endCell().beginParse()); - var (old2: slice, found2) = dict~uDictSetAndGet(32, getK2, beginCell().storeUint(3, 32).endCell().beginParse()); + dict.uDictSetBuilder(32, 1, beginCell().storeUint(1, 32)); + var (old1: slice, found1) = dict.uDictSetAndGet(32, getK1, beginCell().storeUint(2, 32).endCell().beginParse()); + var (old2: slice, found2) = dict.uDictSetAndGet(32, getK2, beginCell().storeUint(3, 32).endCell().beginParse()); var (cur3: slice, found3) = dict.uDictGet(32, getK3); return ( - found1 ? old1~loadUint(32) : -1, - found2 ? old2~loadUint(32) : -1, - found3 ? cur3~loadUint(32) : -1 + found1 ? old1.loadUint(32) : -1, + found2 ? old2.loadUint(32) : -1, + found3 ? cur3.loadUint(32) : -1 ); } @method_id(102) fun test102() { var dict = createEmptyDict(); - dict~addIntToIDict(2, 102); - dict~addIntToIDict(1, 101); - dict~addIntToIDict(4, 104); - dict~addIntToIDict(3, 103); + dict.addIntToIDict(2, 102); + dict.addIntToIDict(1, 101); + dict.addIntToIDict(4, 104); + dict.addIntToIDict(3, 103); var deleted = createEmptyTuple(); var shouldBreak = false; while (!shouldBreak) { - var (kDel, kVal, wasDel) = dict~iDictDeleteLastAndGet(32); + var (kDel, kVal, wasDel) = dict.iDictDeleteLastAndGet(32); if (wasDel) { - deleted~tuplePush([kDel, kVal~loadInt(32)]); + deleted.tuplePush([kDel, kVal.loadInt(32)]); } else { shouldBreak = true; } @@ -59,38 +58,38 @@ fun test102() { @method_id(103) fun test103() { var dict = createEmptyDict(); - dict~uDictSetBuilderIfNotExists(32, 1,beginCell().storeInt(1, 32)); - dict~uDictSetBuilderIfNotExists(32, 1,beginCell().storeInt(1, 32)); + dict.uDictSetBuilderIfNotExists(32, 1,beginCell().storeInt(1, 32)); + dict.uDictSetBuilderIfNotExists(32, 1,beginCell().storeInt(1, 32)); var len1 = calculateDictLen(dict); - dict~uDictSetBuilderIfExists(32, 2,beginCell().storeInt(1, 32)); - dict~uDictSetBuilderIfExists(32, 2,beginCell().storeInt(1, 32)); + dict.uDictSetBuilderIfExists(32, 2,beginCell().storeInt(1, 32)); + dict.uDictSetBuilderIfExists(32, 2,beginCell().storeInt(1, 32)); var len2 = calculateDictLen(dict); - dict~uDictSetBuilder(32, 3,beginCell().storeInt(1, 32)); - dict~uDictSetBuilderIfExists(32, 3,beginCell().storeInt(1, 32)); + dict.uDictSetBuilder(32, 3,beginCell().storeInt(1, 32)); + dict.uDictSetBuilderIfExists(32, 3,beginCell().storeInt(1, 32)); var len3 = calculateDictLen(dict); - var (delK1, _, _) = dict~uDictDeleteFirstAndGet(32); - var (delK2, _, _) = dict~uDictDeleteFirstAndGet(32); - var (delK3, _, _) = dict~uDictDeleteFirstAndGet(32); + var (delK1, _, _) = dict.uDictDeleteFirstAndGet(32); + var (delK2, _, _) = dict.uDictDeleteFirstAndGet(32); + var (delK3, _, _) = dict.uDictDeleteFirstAndGet(32); return (len1, len2, len3, delK1, delK2, delK3); } @method_id(104) fun test104() { var dict = createEmptyDict(); - dict~sDictSetBuilder(32, "7800", beginCell().storeUint(5 + 48, 8).storeUint(6 + 48, 8)); - dict~sDictSet(32, "key1", "12"); - var (old1, _) = dict~sDictSetAndGet(32, "key1", "34"); - var (old2, _) = dict~sDictDeleteAndGet(32, "key1"); + dict.sDictSetBuilder(32, "7800", beginCell().storeUint(5 + 48, 8).storeUint(6 + 48, 8)); + dict.sDictSet(32, "key1", "12"); + var (old1, _) = dict.sDictSetAndGet(32, "key1", "34"); + var (old2, _) = dict.sDictDeleteAndGet(32, "key1"); var (restK, restV, _) = dict.sDictGetFirst(32); - var (restK1, restV1, _) = dict~sDictDeleteLastAndGet(32); + var (restK1, restV1, _) = dict.sDictDeleteLastAndGet(32); assert (restK.isSliceBitsEqual(restK1)) throw 123; assert (restV.isSliceBitsEqual(restV1)) throw 123; return ( - old1~loadTwoDigitNumberFromSlice(), - old2~loadTwoDigitNumberFromSlice(), - restV~loadTwoDigitNumberFromSlice(), - restK~loadTwoDigitNumberFromSlice(), - restK~loadTwoDigitNumberFromSlice() + old1.loadTwoDigitNumberFromSlice(), + old2.loadTwoDigitNumberFromSlice(), + restV.loadTwoDigitNumberFromSlice(), + restK.loadTwoDigitNumberFromSlice(), + restK.loadTwoDigitNumberFromSlice() ); } diff --git a/tolk-tester/tests/imports/use-dicts-err.tolk b/tolk-tester/tests/imports/use-dicts-err.tolk index a4ee9aede..c5ba89d22 100644 --- a/tolk-tester/tests/imports/use-dicts-err.tolk +++ b/tolk-tester/tests/imports/use-dicts-err.tolk @@ -1,18 +1,18 @@ fun prepareDict_3_30_4_40_5_x(valueAt5: int): cell { var dict: cell = createEmptyDict(); - dict~idict_set_builder(32, 3, begin_cell().store_int(30, 32)); - dict~idict_set_builder(32, 4, begin_cell().store_int(40, 32)); - dict~idict_set_builder(32, 5, begin_cell().store_int(valueAt5, 32)); + dict.idict_set_builder(32, 3, begin_cell().store_int(30, 32)); + dict.idict_set_builder(32, 4, begin_cell().store_int(40, 32)); + dict.idict_set_builder(32, 5, begin_cell().store_int(valueAt5, 32)); return dict; } fun lookupIdxByValue(idict32: cell, value: int): int { var cur_key = -1; do { - var (cur_key redef, cs: slice, found: int) = idict32.idict_get_next?(32, cur_key); + var (cur_key redef, cs: slice, found: int) = idict32.idictGetNext(32, cur_key); // one-line condition (via &) doesn't work, since right side is calculated immediately if (found) { - if (cs~load_int(32) == value) { + if (cs.loadInt(32) == value) { return cur_key; } } diff --git a/tolk-tester/tests/imports/use-dicts.tolk b/tolk-tester/tests/imports/use-dicts.tolk index 358a5673a..26a9a9ccd 100644 --- a/tolk-tester/tests/imports/use-dicts.tolk +++ b/tolk-tester/tests/imports/use-dicts.tolk @@ -2,9 +2,9 @@ import "@stdlib/tvm-dicts" fun prepareDict_3_30_4_40_5_x(valueAt5: int): cell { var dict: cell = createEmptyDict(); - dict~iDictSetBuilder(32, 3, beginCell().storeInt(30, 32)); - dict~iDictSetBuilder(32, 4, beginCell().storeInt(40, 32)); - dict~iDictSetBuilder(32, 5, beginCell().storeInt(valueAt5, 32)); + dict.iDictSetBuilder(32, 3, beginCell().storeInt(30, 32)); + dict.iDictSetBuilder(32, 4, beginCell().storeInt(40, 32)); + dict.iDictSetBuilder(32, 5, beginCell().storeInt(valueAt5, 32)); return dict; } @@ -14,7 +14,7 @@ fun lookupIdxByValue(idict32: cell, value: int): int { var (cur_key redef, cs: slice, found: int) = idict32.iDictGetNext(32, cur_key); // one-line condition (via &) doesn't work, since right side is calculated immediately if (found) { - if (cs~loadInt(32) == value) { + if (cs.loadInt(32) == value) { return cur_key; } } diff --git a/tolk-tester/tests/invalid-call-1.tolk b/tolk-tester/tests/invalid-call-1.tolk new file mode 100644 index 000000000..1c32422ee --- /dev/null +++ b/tolk-tester/tests/invalid-call-1.tolk @@ -0,0 +1,9 @@ +fun main() { + return true(); +} + +/** +@compilation_should_fail +The message is weird now, but later I'll rework error messages anyway. +@stderr cannot apply expression of type int to an expression of type (): cannot unify type () -> ??3 with int + */ diff --git a/tolk-tester/tests/invalid-call-2.tolk b/tolk-tester/tests/invalid-call-2.tolk new file mode 100644 index 000000000..5a8c9fa5d --- /dev/null +++ b/tolk-tester/tests/invalid-call-2.tolk @@ -0,0 +1,14 @@ +fun add1(x: int) { + return x + 1; +} + +fun main() { + val adder_fn = add1; + var x = 10; + return adder_fn(mutate x); +} + +/** +@compilation_should_fail +@stderr `mutate` used for non-mutate argument + */ diff --git a/tolk-tester/tests/invalid-call-3.tolk b/tolk-tester/tests/invalid-call-3.tolk new file mode 100644 index 000000000..ac98df704 --- /dev/null +++ b/tolk-tester/tests/invalid-call-3.tolk @@ -0,0 +1,12 @@ +fun with2Params(x: int, y: int) { + +} + +fun main() { + return with2Params(1); +} + +/** +@compilation_should_fail +@stderr too few arguments in call to `with2Params`, expected 2, have 1 + */ diff --git a/tolk-tester/tests/invalid-call-4.tolk b/tolk-tester/tests/invalid-call-4.tolk new file mode 100644 index 000000000..c8f7dcebf --- /dev/null +++ b/tolk-tester/tests/invalid-call-4.tolk @@ -0,0 +1,13 @@ +fun methodWith1Param(self: int, param: int) { + +} + +fun main() { + val x = 10; + x.methodWith1Param(2, "asdf"); +} + +/** +@compilation_should_fail +@stderr too many arguments in call to `methodWith1Param`, expected 1, have 2 + */ diff --git a/tolk-tester/tests/invalid-call-5.tolk b/tolk-tester/tests/invalid-call-5.tolk new file mode 100644 index 000000000..89ab026a9 --- /dev/null +++ b/tolk-tester/tests/invalid-call-5.tolk @@ -0,0 +1,13 @@ +fun inc(x: int) { + return x + 1; +} + +fun main() { + return inc(_); +} + +/** +@compilation_should_fail +@stderr rvalue expected +@stderr inc(_) + */ diff --git a/tolk-tester/tests/invalid-call-6.tolk b/tolk-tester/tests/invalid-call-6.tolk new file mode 100644 index 000000000..cbf598066 --- /dev/null +++ b/tolk-tester/tests/invalid-call-6.tolk @@ -0,0 +1,12 @@ +fun nothing() { +} + +fun main() { + val x = 0; + return x.nothing(); +} + +/** +@compilation_should_fail +@stderr `nothing` has no parameters and can not be called as method + */ diff --git a/tolk-tester/tests/invalid-call-7.tolk b/tolk-tester/tests/invalid-call-7.tolk new file mode 100644 index 000000000..4ad038c9e --- /dev/null +++ b/tolk-tester/tests/invalid-call-7.tolk @@ -0,0 +1,14 @@ +fun main() { + beginCell() + .storeAddressNone() + .storeUint(3, 32) + .storeUnexisting() + .storeInt(1, 32) + .endCell(); +} + +/** +@compilation_should_fail +@stderr undefined symbol `storeUnexisting` +@stderr .storeUnexisting() + */ diff --git a/tolk-tester/tests/invalid-call-8.tolk b/tolk-tester/tests/invalid-call-8.tolk new file mode 100644 index 000000000..c613d7d9c --- /dev/null +++ b/tolk-tester/tests/invalid-call-8.tolk @@ -0,0 +1,8 @@ +fun main() { + var incoming_ton: int = get_incoming_value().3(); +} + +/** +@compilation_should_fail +@stderr expected method name, got `3` + */ diff --git a/tolk-tester/tests/invalid-cmt-old.tolk b/tolk-tester/tests/invalid-cmt-old.tolk index eaf58db83..58927d3a0 100644 --- a/tolk-tester/tests/invalid-cmt-old.tolk +++ b/tolk-tester/tests/invalid-cmt-old.tolk @@ -1,5 +1,5 @@ fun main(): int { - ;; this is not a comment + ;; here is not a comment } /** diff --git a/tolk-tester/tests/invalid-declaration-6.tolk b/tolk-tester/tests/invalid-declaration-6.tolk index 731c299ba..42cb7b953 100644 --- a/tolk-tester/tests/invalid-declaration-6.tolk +++ b/tolk-tester/tests/invalid-declaration-6.tolk @@ -1,8 +1,8 @@ -fun main() { - val imm = 10; +get seqno(self: int) { + return 0; } /** @compilation_should_fail -@stderr immutable variables are not supported yet -*/ +@stderr get methods can't have `mutate` and `self` params + */ diff --git a/tolk-tester/tests/invalid-mutate-1.tolk b/tolk-tester/tests/invalid-mutate-1.tolk new file mode 100644 index 000000000..237940fc9 --- /dev/null +++ b/tolk-tester/tests/invalid-mutate-1.tolk @@ -0,0 +1,11 @@ +fun f(x: int) {} + +fun cantAssignToVal() { + val x = 10; + f(x += 1); +} + +/** +@compilation_should_fail +@stderr modifying an immutable variable `x` + */ diff --git a/tolk-tester/tests/invalid-mutate-10.tolk b/tolk-tester/tests/invalid-mutate-10.tolk new file mode 100644 index 000000000..8cd37c517 --- /dev/null +++ b/tolk-tester/tests/invalid-mutate-10.tolk @@ -0,0 +1,16 @@ +fun increment(mutate x: int) { + x = x + 1; +} + +fun cantCallMutatingAsAMember() { + var x = 0; + x.increment(); + return x; +} + +/** +@compilation_should_fail +@stderr function `increment` mutates parameter `x` +@stderr consider calling `increment(mutate x)`, not `x.increment`() +@stderr alternatively, rename parameter to `self` to make it a method + */ diff --git a/tolk-tester/tests/invalid-mutate-11.tolk b/tolk-tester/tests/invalid-mutate-11.tolk new file mode 100644 index 000000000..9f2c2601e --- /dev/null +++ b/tolk-tester/tests/invalid-mutate-11.tolk @@ -0,0 +1,8 @@ +fun load32(self: slice): int { + return self.loadUint(32); +} + +/** +@compilation_should_fail +@stderr modifying `self` (call a mutating method), which is immutable by default + */ diff --git a/tolk-tester/tests/invalid-mutate-12.tolk b/tolk-tester/tests/invalid-mutate-12.tolk new file mode 100644 index 000000000..c8c8c68ec --- /dev/null +++ b/tolk-tester/tests/invalid-mutate-12.tolk @@ -0,0 +1,14 @@ +fun increment(mutate x: int) { + +} + +fun main() { + var x = 0; + var inc = increment; + inc(x); +} + +/** +@compilation_should_fail +@stderr saving `increment` into a variable is impossible, since it has `mutate` parameters and thus can only be called directly + */ diff --git a/tolk-tester/tests/invalid-mutate-13.tolk b/tolk-tester/tests/invalid-mutate-13.tolk new file mode 100644 index 000000000..ad861fd88 --- /dev/null +++ b/tolk-tester/tests/invalid-mutate-13.tolk @@ -0,0 +1,8 @@ +fun onInternalMessage(mutate in_msg_body: slice) { + +} + +/** +@compilation_should_fail +@stderr invalid declaration of a reserved function + */ diff --git a/tolk-tester/tests/invalid-mutate-14.tolk b/tolk-tester/tests/invalid-mutate-14.tolk new file mode 100644 index 000000000..2ba645d13 --- /dev/null +++ b/tolk-tester/tests/invalid-mutate-14.tolk @@ -0,0 +1,8 @@ +fun main(cs: slice) { + return loadInt(cs, 32); +} + +/** +@compilation_should_fail +@stderr `loadInt` is a mutating method; consider calling `cs.loadInt()`, not `loadInt(cs)` + */ diff --git a/tolk-tester/tests/invalid-mutate-15.tolk b/tolk-tester/tests/invalid-mutate-15.tolk new file mode 100644 index 000000000..f6874fb8c --- /dev/null +++ b/tolk-tester/tests/invalid-mutate-15.tolk @@ -0,0 +1,12 @@ +fun asdf(mutate cs: slice) {} + +fun main(cs: slice) { + cs.asdf(); +} + +/** +@compilation_should_fail +@stderr function `asdf` mutates parameter `cs` +@stderr consider calling `asdf(mutate cs)`, not `cs.asdf`() +@stderr alternatively, rename parameter to `self` to make it a method + */ diff --git a/tolk-tester/tests/invalid-mutate-2.tolk b/tolk-tester/tests/invalid-mutate-2.tolk new file mode 100644 index 000000000..7501fdaf5 --- /dev/null +++ b/tolk-tester/tests/invalid-mutate-2.tolk @@ -0,0 +1,10 @@ +fun cantAssignToVal() { + val x = 10; + var y = 20; + [y, x] = [30, 40]; +} + +/** +@compilation_should_fail +@stderr modifying an immutable variable `x` + */ diff --git a/tolk-tester/tests/invalid-mutate-3.tolk b/tolk-tester/tests/invalid-mutate-3.tolk new file mode 100644 index 000000000..c49973f71 --- /dev/null +++ b/tolk-tester/tests/invalid-mutate-3.tolk @@ -0,0 +1,11 @@ +const op_increase = 0x123; + +fun cantAssignToConst() { + var x = 10; + (x, op_increase) = (20, 30); +} + +/** +@compilation_should_fail +@stderr modifying an immutable variable `op_increase` + */ diff --git a/tolk-tester/tests/invalid-mutate-4.tolk b/tolk-tester/tests/invalid-mutate-4.tolk new file mode 100644 index 000000000..f25a707cb --- /dev/null +++ b/tolk-tester/tests/invalid-mutate-4.tolk @@ -0,0 +1,14 @@ + +fun inc(mutate x: int) { + x += 1; +} + +fun cantPassToMutatingFunction() { + val myVal = 10; + inc(mutate myVal); +} + +/** +@compilation_should_fail +@stderr modifying an immutable variable `myVal` + */ diff --git a/tolk-tester/tests/invalid-mutate-5.tolk b/tolk-tester/tests/invalid-mutate-5.tolk new file mode 100644 index 000000000..fd8d11924 --- /dev/null +++ b/tolk-tester/tests/invalid-mutate-5.tolk @@ -0,0 +1,14 @@ +fun cantCallMutatingMethod(c: cell) { + val s: slice = c.beginParse(); + if (1) { + var s: slice = c.beginParse(); + s.loadRef(); // this is ok, 's' is another variable + } + val i = s.loadUint(32); +} + +/** +@compilation_should_fail +@stderr modifying an immutable variable `s` (call a mutating method) +@stderr s.loadUint + */ diff --git a/tolk-tester/tests/invalid-mutate-6.tolk b/tolk-tester/tests/invalid-mutate-6.tolk new file mode 100644 index 000000000..bb577ae47 --- /dev/null +++ b/tolk-tester/tests/invalid-mutate-6.tolk @@ -0,0 +1,16 @@ +const op_increase = 0x123; + +fun inc(mutate x: int): int { + x += 10; + return x + 1; +} + +fun cantCallMutatingFunctionWithImmutable() { + return inc(mutate op_increase); +} + +/** +@compilation_should_fail +@stderr modifying an immutable variable `op_increase` (call a mutating function) +@stderr inc(mutate op_increase) + */ diff --git a/tolk-tester/tests/invalid-mutate-7.tolk b/tolk-tester/tests/invalid-mutate-7.tolk new file mode 100644 index 000000000..5b6b6afe4 --- /dev/null +++ b/tolk-tester/tests/invalid-mutate-7.tolk @@ -0,0 +1,15 @@ +fun incBoth(mutate x: int, mutate y: int) { + x += 10; + y += 10; +} + +fun cantCallMutatingFunctionWithRvalue() { + var x = 10; + incBoth(mutate x, mutate 30); +} + +/** +@compilation_should_fail +@stderr lvalue expected (call a mutating function) +@stderr incBoth(mutate x, mutate 30) + */ diff --git a/tolk-tester/tests/invalid-mutate-8.tolk b/tolk-tester/tests/invalid-mutate-8.tolk new file mode 100644 index 000000000..0dd7c5687 --- /dev/null +++ b/tolk-tester/tests/invalid-mutate-8.tolk @@ -0,0 +1,10 @@ +fun cantRedefImmutable() { + val x = 10; + var (y: int, x redef) = (20, 30); + return (y, x); +} + +/** +@compilation_should_fail +@stderr modifying an immutable variable `x` (left side of assignment) + */ diff --git a/tolk-tester/tests/invalid-mutate-9.tolk b/tolk-tester/tests/invalid-mutate-9.tolk new file mode 100644 index 000000000..7e79052e4 --- /dev/null +++ b/tolk-tester/tests/invalid-mutate-9.tolk @@ -0,0 +1,9 @@ +fun increment(self: int) { + self = self + 1; +} + +/** +@compilation_should_fail +@stderr modifying `self` (left side of assignment), which is immutable by default +@stderr probably, you want to declare `mutate self` + */ diff --git a/tolk-tester/tests/invalid-self-1.tolk b/tolk-tester/tests/invalid-self-1.tolk new file mode 100644 index 000000000..40b54f163 --- /dev/null +++ b/tolk-tester/tests/invalid-self-1.tolk @@ -0,0 +1,8 @@ +fun cantReturnFromSelf(mutate self: int): self { + return 2; +} + +/** +@compilation_should_fail +@stderr invalid return from `self` function + */ diff --git a/tolk-tester/tests/invalid-self-2.tolk b/tolk-tester/tests/invalid-self-2.tolk new file mode 100644 index 000000000..c4aa758b2 --- /dev/null +++ b/tolk-tester/tests/invalid-self-2.tolk @@ -0,0 +1,8 @@ +fun cantUseSelfAsType(mutate x: int) { + var y: self = x; +} + +/** +@compilation_should_fail +@stderr `self` type can be used only as a return type of a function (enforcing it to be chainable) + */ diff --git a/tolk-tester/tests/invalid-self-3.tolk b/tolk-tester/tests/invalid-self-3.tolk new file mode 100644 index 000000000..330ac2495 --- /dev/null +++ b/tolk-tester/tests/invalid-self-3.tolk @@ -0,0 +1,10 @@ +fun cantReturnSelf(mutate x: int): int { + x += 1; + return self; +} + +/** +@compilation_should_fail +@stderr using `self` in a non-member function (it does not accept the first `self` parameter) +@stderr return self + */ diff --git a/tolk-tester/tests/invalid-self-4.tolk b/tolk-tester/tests/invalid-self-4.tolk new file mode 100644 index 000000000..f4856a465 --- /dev/null +++ b/tolk-tester/tests/invalid-self-4.tolk @@ -0,0 +1,9 @@ +fun cantReturnNothingFromSelf(mutate self: int): self { + self = self + 1; +} + +/** +@compilation_should_fail +@stderr missing return; forgot `return self`? +@stderr } + */ diff --git a/tolk-tester/tests/invalid-self-5.tolk b/tolk-tester/tests/invalid-self-5.tolk new file mode 100644 index 000000000..a007a93c2 --- /dev/null +++ b/tolk-tester/tests/invalid-self-5.tolk @@ -0,0 +1,15 @@ +fun increment(mutate self: int): self { + self = self + 1; + return self; +} + +fun cantReturnAnotherSelf(mutate self: int): self { + self = self + 1; + var x = 0; + return x.increment(); +} + +/** +@compilation_should_fail +@stderr invalid return from `self` function + */ diff --git a/tolk-tester/tests/invalid-self-6.tolk b/tolk-tester/tests/invalid-self-6.tolk new file mode 100644 index 000000000..588c70ab2 --- /dev/null +++ b/tolk-tester/tests/invalid-self-6.tolk @@ -0,0 +1,8 @@ +fun increment(x: int, self: int): int { + return x + self; +} + +/** +@compilation_should_fail +@stderr `self` can only be the first parameter + */ diff --git a/tolk-tester/tests/invalid-self-7.tolk b/tolk-tester/tests/invalid-self-7.tolk new file mode 100644 index 000000000..2fa2da492 --- /dev/null +++ b/tolk-tester/tests/invalid-self-7.tolk @@ -0,0 +1,8 @@ +fun increment(x: int): int { + return self + 1; +} + +/** +@compilation_should_fail +@stderr using `self` in a non-member function + */ diff --git a/tolk-tester/tests/invalid-typing-3.tolk b/tolk-tester/tests/invalid-typing-3.tolk new file mode 100644 index 000000000..fb4b0bc51 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-3.tolk @@ -0,0 +1,19 @@ +fun incInt(mutate self: int): self { + self += 1; + return self; +} + +fun appendBuilder(mutate self: builder): self { + self.storeUint(1, 32); + return self; +} + +fun cantMixDifferentThis() { + var x = 0; + return x.incInt().appendBuilder().incInt(); +} + +/** +@compilation_should_fail +@stderr cannot apply function appendBuilder : builder -> (builder, ()) to arguments of type int: cannot unify type int with builder + */ diff --git a/tolk-tester/tests/invalid-typing-4.tolk b/tolk-tester/tests/invalid-typing-4.tolk new file mode 100644 index 000000000..0e6553690 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-4.tolk @@ -0,0 +1,14 @@ +fun incNotChained(mutate self: int) { + self = self + 1; +} + +fun cantCallNotChainedMethodsInAChain(x: int) { + return x.incNotChained().incNotChained(); +} + +/** +The error is very weird, but nevertheless, the type system prevents of doing such errors. + +@compilation_should_fail +@stderr cannot apply function incNotChained : int -> (int, ()) to arguments of type (): cannot unify type () with int + */ diff --git a/tolk-tester/tests/invalid-typing-5.tolk b/tolk-tester/tests/invalid-typing-5.tolk new file mode 100644 index 000000000..ba3450de2 --- /dev/null +++ b/tolk-tester/tests/invalid-typing-5.tolk @@ -0,0 +1,14 @@ +fun incNotChained(mutate self: int) { + self = self + 1; +} + +fun failWhenReturnANotChainedValue(x: int): int { + return x.incNotChained(); +} + +/** +The error is very weird, but nevertheless, the type system prevents of doing such errors. + +@compilation_should_fail +@stderr previous function return type int cannot be unified with return statement expression type (): cannot unify type () with int + */ diff --git a/tolk-tester/tests/known-bugs.tolk b/tolk-tester/tests/known-bugs.tolk new file mode 100644 index 000000000..4de6a3752 --- /dev/null +++ b/tolk-tester/tests/known-bugs.tolk @@ -0,0 +1,27 @@ +fun increment(mutate x: int): int { + x = x + 1; + return x; +} + +@method_id(101) +fun bugWithModifyingMethodInsideSameExpression() { + /* + The same bug existed in FunC: +#pragma allow-post-modification; +(int, int) ~increment(int x) { x = x + 5; return (x, x); } +int main() { int x = 0; x += x~increment(); return x; } + It's related to using a variable modified by ~method inside the same expression. + */ + var x = 0; + x = x + increment(mutate x); + return x; +} + +fun main() { + +} + +/** +// correct: 2 +@testcase | 101 | | 1 + */ diff --git a/tolk-tester/tests/mutate-methods.tolk b/tolk-tester/tests/mutate-methods.tolk new file mode 100644 index 000000000..b9184ca9a --- /dev/null +++ b/tolk-tester/tests/mutate-methods.tolk @@ -0,0 +1,337 @@ +fun incrementInPlace(mutate self: int, byValue: int): void { + self = self + byValue; +} + +fun incrementTwoInPlace(mutate self: int, mutate y: int, byValue: int): int { + self.incrementInPlace(byValue); + y += byValue; + return self + y; +} + +@method_id(101) +fun testIncrement1() { + var x = 50; + var y = 30; + incrementInPlace(mutate x, 10); + incrementInPlace(mutate x, 10); + incrementInPlace(mutate y, 10); + y.incrementInPlace(10); + incrementInPlace(mutate y, 10); + return (x, y); +} + +@method_id(102) +fun testIncrement2() { + var x = 50; + var y = 30; + val sum1 = incrementTwoInPlace(mutate x, mutate y, 10); + val sum2 = x.incrementTwoInPlace(mutate y, 10); + return (x, y, sum1, sum2); +} + + +fun load_next(mutate cs: slice): int { + return loadInt(mutate cs, 32); +} + +fun myLoadInt(mutate self: slice, len: int): int + asm(-> 1 0) "LDIX"; +fun myStoreInt(mutate self: builder, x: int, len: int): self + asm(x self len) "STIX"; + +@inline_ref +fun unpack_utils_info(mutate utils_info_sl: slice): (int, int) { + return ( + utils_info_sl.myLoadInt(32), + utils_info_sl.myLoadInt(32) + ); +} + +@method_id(103) +fun testSlices1() { + var b: builder = beginCell().storeInt(1, 32).myStoreInt(2, 32); + b.myStoreInt(3, 32); + var c: cell = b.myStoreInt(4, 32).storeInt(5, 32).endCell(); + var cs = c.beginParse(); + var first = cs.preloadInt(32); + unpack_utils_info(mutate cs); + return (first, cs.myLoadInt(32), cs.loadInt(32)); +} + +fun load_decimal_symbol(mutate self: slice): int { + // load decimal from bits using utf-8 table + var n: int = self.loadUint(8); + n = n - 48; + assert(n >= 0) throw 400; + assert(n <= 9) throw 400; + return n; +} + +@method_id(104) +fun testSlices2() { + var cs = "123"; + return (cs.load_decimal_symbol(), cs.load_decimal_symbol(), cs.load_decimal_symbol()); +} + +global v1: int; +global v2: int; +global v3: int; + +@method_id(105) +fun testGlobals() { + v1 = 0; + v2 = 0; + v3 = 100; + v3 += incrementTwoInPlace(mutate v1, mutate v2, 5); + return (v1, v2, v3); +} + +fun withNameShadowing(mutate x: int, pivot: int, extra: int) { + x += pivot; + if (pivot < 100) { + var x = 100 + extra; + if (pivot < 50) { + var x = 50 + extra; + return x + extra; + } else { + x += extra; + return x + extra; + } + } else { + x += extra; + return -100 + extra; + } +} + +@method_id(106) +fun testNameShadowing() { + var x = 0; + var sum = 0; + sum += withNameShadowing(mutate x, 100, 10); + sum += withNameShadowing(mutate x, 50, 10); + sum += withNameShadowing(mutate x, 0, 10); + return (x, sum); +} + +fun updateTwoItems(mutate self: (int, int), byValue: int) { + val (first, second) = self; + self = (first + byValue, second + byValue); +} + +@method_id(107) +fun testMutableTensor() { + var t = (40, 50); + t.updateTwoItems(10); + updateTwoItems(mutate t, 10); + return t; +} + +@pure +fun myStoreUint(mutate self: builder, x: int, len: int): self + asm(x self len) "STIX"; + +@pure +fun myStoreU32(mutate self: builder, x: int): self { + return self.storeUint(x, 32); +} + +fun getSumOfNumbersInCell(c: cell): int { + var sum = 0; + var s = c.beginParse(); + var n_numbers = s.getRemainingBitsCount() / 32; + repeat (n_numbers) { + sum += s.loadUint(32); + } + return sum; +} + +@method_id(110) +fun testStoreChaining() { + var b = beginCell().storeUint(1, 32).storeUint(2, 32).storeUint(3, 32); + b.storeUint(4, 32); + b.myStoreUint(5, 32).storeUint(6, 32); + storeUint(mutate b, 7, 32); + b = b.storeUint(8, 32); + b = b.storeUint(9, 32).storeUint(10, 32); + + return getBuilderBitsCount(b); +} + +@method_id(111) +fun testStoreChainingCustom() { + var b = beginCell().myStoreUint(1, 32).myStoreUint(2, 32).myStoreUint(3, 32); + b.myStoreUint(4, 32); + b.myStoreUint(5, 32).myStoreUint(6, 32); + myStoreUint(mutate b, 7, 32); + b = b.myStoreUint(8, 32); + b = b.myStoreUint(9, 32).myStoreUint(10, 32); + val sum1 = getSumOfNumbersInCell(b.endCell()); + + b = beginCell().myStoreU32(1).storeUint(2, 32).myStoreU32(3); + b.myStoreU32(4); + b.myStoreU32(5).myStoreU32(6); + myStoreU32(mutate b, 7); + b = b.myStoreU32(8); + b = b.storeUint(9, 32).myStoreU32(10); + val sum2 = getSumOfNumbersInCell(b.endCell()); + + return (sum1, sum2); +} + +fun myStoreU32_and_mutate_x(mutate self: builder, mutate x: int): void { + return myStoreUint(mutate self, x += 10, 32); +} + +@method_id(112) +fun testStoreAndMutateBoth() { + var x = 3; + var b: builder = beginCell().myStoreUint(1, 32); + b.myStoreU32_and_mutate_x(mutate x); + b.myStoreU32(3).myStoreU32_and_mutate_x(mutate x); + b.myStoreU32_and_mutate_x(mutate x); + + var cs: slice = b.endCell().beginParse(); + var (n1,n2,n3,n4,n5) = (cs.loadUint(32),cs.loadUint(32),cs.loadUint(32),cs.loadUint(32),cs.loadUint(32)); + assert(n5 == x) throw 100; + + return [n1,n2,n3,n4,n5]; +} + +global ccc: builder; + +@method_id(113) +fun testStoreChainingForGlobal() { + ccc = beginCell().storeUint(1, 32).myStoreUint(2, 32).myStoreU32(3); + ccc.storeUint(4, 32); + ccc.storeUint(5, 32).myStoreU32(6); + storeUint(mutate ccc, 7, 32); + ccc = ccc.myStoreU32(8); + ccc = ccc.storeUint(9, 32).myStoreUint(10, 32); + + return getBuilderBitsCount(ccc); +} + +fun alwaysThrows(): int { throw 123; return 123; } +fun loadIntFromCell(c: cell, len: int) { return c.beginParse().loadUint(len); } + +@method_id(114) +fun testLoadIntForTemporaryObject() { + val c0 = beginCell().storeUint(0, 32).endCell(); + val c4 = beginCell().storeUint(4, 32).endCell(); + return ( + beginCell().storeUint(1, 32).endCell().beginParse().loadUint(32), + beginCell().storeUint(2, 32).endCell().beginParse().loadUint(32), + c0.beginParse().loadUint(32) ? alwaysThrows() : loadIntFromCell(c4, 32) + ); +} + +@pure +fun myStoreUint_pure(mutate self: builder): void + asm "STIX"; + +fun myStoreUint_impure(mutate self: builder): void + asm "STIX"; + +fun testStoreUintPureUnusedResult() { + var b = beginCell(); + b.myStoreUint_pure(); + var s = b.endCell().beginParse(); + val ii = s.loadUint(32); + return 0; +} + +fun testStoreUintImpureUnusedResult() { + var b = beginCell(); + b.myStoreUint_impure(); + var s = b.endCell().beginParse(); + val ii = s.loadUint(32); + return 0; +} + +global counter: int; + +fun writeNext2(mutate self: builder): self { + return self.storeUint(counter += 1, 32).storeUint(counter += 1, 32); +} + +fun resetCounter(mutate self: builder): self { + counter = 0; + return self; +} + +@method_id(115) +fun testExplicitReturn() { + counter = 0; + return ( + beginCell().writeNext2().writeNext2().resetCounter().writeNext2().endCell().getSumOfNumbersInCell(), + counter + ); +} + + +fun main(){} + +/** +@testcase | 101 | | 70 60 +@testcase | 102 | | 70 50 100 120 +@testcase | 103 | | 1 3 4 +@testcase | 104 | | 1 2 3 +@testcase | 105 | | 5 5 110 +@testcase | 106 | | 160 110 +@testcase | 107 | | 60 70 +@testcase | 110 | | 320 +@testcase | 111 | | 55 55 +@testcase | 112 | | [ 1 13 3 23 33 ] +@testcase | 113 | | 320 +@testcase | 114 | | 1 2 4 +@testcase | 115 | | 13 2 + +@fif_codegen +""" + incrementInPlace PROC:<{ + // self byValue + ADD // self + }> +""" + +@fif_codegen +""" + testIncrement2 PROC:<{ + ... + incrementTwoInPlace CALLDICT // x y sum1 + -ROT + 10 PUSHINT // sum1 x y _9=10 + incrementTwoInPlace CALLDICT // sum1 x y sum2 + s1 s3 s0 XCHG3 // x y sum1 sum2 + }> +""" + +@fif_codegen +""" + load_next PROC:<{ + // cs + 32 LDI // _1 cs + SWAP // cs _1 + }> +""" + +@fif_codegen +""" + testStoreUintPureUnusedResult PROC:<{ + // + 0 PUSHINT // _12=0 + }> +""" + +@fif_codegen +""" + testStoreUintImpureUnusedResult PROC:<{ + // + NEWC // b + STIX // _2 + DROP // + 0 PUSHINT // _12=0 + }> +""" + + */ diff --git a/tolk-tester/tests/no-spaces.tolk b/tolk-tester/tests/no-spaces.tolk index aecfdabaa..0d4c3b678 100644 --- a/tolk-tester/tests/no-spaces.tolk +++ b/tolk-tester/tests/no-spaces.tolk @@ -30,13 +30,11 @@ global `some()var`:int; } @method_id(113)fun`unary+bitwise-constant`():[int,int,int]{ - // todo spaces are still not allowed before ~ - return [~-~~+-3, ~+3-~ 9, -(-~+-20-~ 10+3+~ 38&39)]; + return [~-~~+-3, ~+3-~9, -(-~+-20-~ 10+3+~38&39)]; } @method_id(114)fun`unary+bitwize-parametrized`(c3:int, c9:int, c20:int, c10:int, c38:int):[int,int,int]{ - // todo spaces are still not allowed before ~ - return [~-~~+-c3, ~+c3-~ `c9`, -(-~+-c20-~ c10+c3+~ c38&39)]; + return [~-~~+-c3, ~+c3-~`c9`, -(-~+-c20-~c10+c3+~c38&39)]; } fun add3(a: int, b: int, c: int) { return a+b+c; } @@ -49,16 +47,16 @@ fun add3(a: int, b: int, c: int) { return a+b+c; } return [add3(fst2,snd2,trd2),add3(fst1,snd1,trd1)]; } -fun `load:u32`(cs: slice): (slice, int) { - return cs.loadUint(32); +fun `load:u32`(mutate self: slice): int { + return self.loadUint(32); } @method_id(116) fun `call_~_via_backticks`():[int,int,int,int] { var b:builder = beginCell().storeUint(1, 32).storeUint(2, 32).storeUint(3, 32).storeUint(4, 32); var `cs`:slice = b.endCell().beginParse(); - var (`cs` redef,one:int) = `cs`.`loadUint`(32); - var (two:int,three:int) = (`cs`~`loadUint`(32), cs~`load:u32`()); - var (cs redef,four:int) = cs.`load:u32`(); + val one:int=`cs`.`loadUint`(32); + val (two:int,three:int) = (`cs`.`loadUint`(32), cs.`load:u32`()); + val four:int = cs.`load:u32`(); return [one,two,three,four]; } diff --git a/tolk-tester/tests/null-keyword.tolk b/tolk-tester/tests/null-keyword.tolk index f0f85fd60..cdfe5acf9 100644 --- a/tolk-tester/tests/null-keyword.tolk +++ b/tolk-tester/tests/null-keyword.tolk @@ -12,8 +12,8 @@ fun test1() { var t = createEmptyTuple(); do { - var num = numbers~listNext(); - t~tuplePush(num); + var num = numbers.listNext(); + t.tuplePush(num); } while (numbers != null); return (h, numbers == null, t); @@ -63,7 +63,7 @@ fun test4() { @method_id(105) fun test5() { var n = getUntypedNull(); - return !(null == n) ? n~loadInt(32) : 100; + return !(null == n) ? n.loadInt(32) : 100; } @method_id(106) @@ -75,7 +75,7 @@ fun test6(x: int) { fun test7() { var b = beginCell().storeMaybeRef(null); var s = b.endCell().beginParse(); - var c = s~loadMaybeRef(); + var c = s.loadMaybeRef(); return (null == c) * 10 + (b != null); } @@ -145,14 +145,14 @@ fun main() { """ test7 PROC:<{ ... - LDOPTREF // b _17 _16 + LDOPTREF // b _20 _19 DROP // b c - ISNULL // b _10 - 10 MULCONST // b _12 - SWAP // _12 b - ISNULL // _12 _13 - 0 EQINT // _12 _14 - ADD // _15 + ISNULL // b _13 + 10 MULCONST // b _15 + SWAP // _15 b + ISNULL // _15 _16 + 0 EQINT // _15 _17 + ADD // _18 }> """ */ diff --git a/tolk-tester/tests/parse-address.tolk b/tolk-tester/tests/parse-address.tolk new file mode 100644 index 000000000..385aa3b53 --- /dev/null +++ b/tolk-tester/tests/parse-address.tolk @@ -0,0 +1,113 @@ +const cc1 = "0:ca6e321c7cce9ecedf0a8ca2492ec8592494aa5fb5ce0387dff96ef6af982a3e"a; +const cc2 = "EQDKbjIcfM6ezt8KjKJJLshZJJSqX7XOA4ff-W72r5gqPrHF"a; + +fun verifyAddr(addr: slice, workchain: int, number: int) { + assert (addr.getRemainingBitsCount() == 3 + 8 + 256) throw 112; + addr.skipBits(3); + assert (addr.loadUint(8) == workchain) throw 111; + assert (addr.loadUint(256) == number) throw 111; +} + +fun main() { + verifyAddr("Ef8zMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzMzM0vF"a, 255, 23158417847463239084714197001737581570653996933128112807891516801582625927987); + verifyAddr("EQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAM9c"a, 0, 0); + verifyAddr("EQCRDM9h4k3UJdOePPuyX40mCgA4vxge5Dc5vjBR8djbEKC5"a, 0, 65607996509792174074532427555986248720836864382484024657400295821210434460432); + verifyAddr("UQCOgxbCOjOLH_cEuQdGgS23zBM5SrQQepMFedjK-oixYbis"a, 0, 64460038539088394980732229180523693489682583665805557562964506609821558550881); + verifyAddr("EQDa4VOnTYlLvDJ0gZjNYm5PXfSmmtL6Vs6A_CZEtXCNICq_"a, 0, 99002318936150612861744867526221033858534811876886359650897405270877291973920); + verifyAddr("Ef8BtXO9bcTMXjg9bgivKh4lhJmZWQPP6_rb9vfjlTP5FJtM"a, 255, 772910975127952880303441415761050161913031788763061162001556772893733681428); + verifyAddr("Ef89xh-uy860-mCcvS8zcAUs8bApmxLGygDLEKjUk5RL-311"a, 255, 27941138149036269893630478666581900122707382189183906805784676408403709676539); + verifyAddr("Ef_vA6yRfmt2P4UHnxlrQUZFcBnKux8mL2eMqBgpeMFPorr4"a, 255, 108109262375472472702582493362335418330829651067377177643099076957184687427490); + verifyAddr("Ef8o6AM9sUZ8rOqLFY8PYeaC3gbopZR1BMkE8fcD0r5NnmCi"a, 255, 18502444830824300068094395885436326119386947594392869497312068745716154912158); + verifyAddr("Ef_fvrd0hBoVJUxoi7wH173Zk8NPiyVvxh5IoYSjEYZbOhsu"a, 255, 101202732337223525952216789200341489000836292542250083765062769181728788863802); + verifyAddr("Ef9nzj6RBc4mQ6p3ng7mGJ7tp7MbzERhe7obkM9A0wnCCEcf"a, 255, 46952625717497919357580310066854892621799390294920450816077086267929711460872); + verifyAddr("Ef9rU-_AAnBkHB71TIC3QvUf5LcAsvj0B4IoYzAXLpEFd5CA"a, 255, 48545777798729612074233611768739897492467685225150339217043102685589809464695); + verifyAddr("Ef9LynHHKgBxY6-l-W_dWN-CtGT2_ji5rN3EzOI-p9zWEfq6"a, 255, 34281152017620085319078796986198022632548048219136747083019177301186013091345); + verifyAddr("Ef9hMd78gzSiVsK0zz0AHtEja8x1UoB_NDZMjn-l86NQK_2Y"a, 255, 43962460814164090767878334494257755557842170134382045184921495822637115592747); + verifyAddr("Ef80FNJ5NJO4-0QwlVAWckUZXdk-PfYDexDZ1-ju9SxhF0A6"a, 255, 23557057702048801338698514499604413540742716310574705490458593067566768087319); + verifyAddr("Ef_fdIbThooPs4_r2DE_Z6ZsWycJdHLnsuKAJHTcbaZaipez"a, 255, 101071650030310556115830521522496708686577365303530257137459798093298869361290); + verifyAddr("Ef_lva0qEiZhWrrZJl-IJxyCcTQmmTo71fIWyQ31HxJ8NurV"a, 255, 103914771557158282349484109182290824591675204108148026180964788916630125182006); + verifyAddr("Ef8sMGKypw006AeRYqimLjmY2Ufp-SHk8C0ZJBNgVBlzw_Nr"a, 255, 19987255184378161380023126214650814972824352533523055905552702178965809886147); + verifyAddr("EQDKbjIcfM6ezt8KjKJJLshZJJSqX7XOA4ff+W72r5gqPrHF"a, 0, 91561894446285001782438967260723928368560331318344957259023550817453781559870); + verifyAddr("EQCaSCHVak-jIc9ANutTAfHpZNM3YdGky7yaDzsTrg0WhFlm"a, 0, 69783625181781015447914682554083924798054947959007050695795761257887453484676); + verifyAddr("EQBS9U3AfD15fGmOtRMXQAxcPVBwNuItfLcDni9fkbTyyNX0"a, 0, 37523067738561024305547433298623118197038688994386001017161816416175242146504); + verifyAddr("EQBiMNL9qNWMAkJHuM0BFneYcuHL17kzS4pswpaEO-NGWrFG"a, 0, 44412924025649114419413541526870954696667907029239618728289150652715284776538); + verifyAddr("EQAUzE-Nef80O9dLZy91HfPiOb6EEQ8YqyWKyIU-KeaYLNUi"a, 0, 9407242825041766837311851458322335726136775042891143504070507665010681354284); + verifyAddr("EQD-nhrinjv0B4LTgr0dRHTHwH1MOsgGhKBXJZd7vESMZUf1"a, 0, 115166810931401616117484448645661180241548402534908005320733783571353775148133); + verifyAddr("EQAVD3Fni9I6j8XeSIl-wAGBEhqhame6OtAY0GScKT0D9X6f"a, 0, 9525855215156855607080079714361451576383963668563135377495902959388099150837); + verifyAddr("EQC6ACq3VANZjqfRBy7JMHkpLwqQ9qyYJsCIGx1mYbQgxaKw"a, 0, 84130484652351964071210477536969520113177637645401392541565606610268614566085); + verifyAddr("EQCIJLNFIko5CvpKn9oAkrDgLocDOoD4vwmHxNx_fsG_LkwW"a, 0, 61579391178099797614367237687950512448308156724136883899001108680249616482094); + verifyAddr("EQCe4AYIBce1pAk2qJJPSs1OzyZRlKjkfq8zuC8D7erv6DUP"a, 0, 71861245445432818728925844931259040612664802586395398157190478191760507596776); + verifyAddr("EQCtrtTXEAoSpoERmiqOnICe9LHxn2N89N4BH9qdHlrG-U0i"a, 0, 78559023162479717496981724991265882229440558807791659796411897368395464230649); + verifyAddr("EQBBlraAps0OZaB9Q8ePQn2wVAaL1G411A-dNppyWe3X3GIT"a, 0, 29666621803903557832193058147214384979915773445007872807927344851911086823388); + verifyAddr("EQBiASqUqaVizrozLRbszkWC2kETbkhpO2qniDVDPPg2_0W8"a, 0, 44328719889509369519441680467651025944540360433148852643949783408843779749631); + verifyAddr("EQBu2Q1EO8gIoNA1qoGWnHUudKfmqlKEDTQE-DxN-_4sdg14"a, 0, 50137910719490808065414827264266674858051167131188257457782826342827836714102); + verifyAddr("EQA5bvxWd5-q2vJUVqR9AlbEIfdFysLR0PXGgVlBf8x5hWuF"a, 0, 25977927117604457079092522008276392864656238504700352770597256138254994667909); + verifyAddr("EQBguMSHjFv5bfoOdshr3ruS9ymSZzhRKMovoNrxGxZXvmee"a, 0, 43748489720571123896506696370504498290006245978262404519821633796370658121662); + verifyAddr("EQAxL0oF1-zNgimPKthbDnYS4xj94rHtfNRN7_Pd1r2LNNv3"a, 0, 22246882279393590648219842750911786376759362211171398419754426796438233910068); + verifyAddr("EQANX1uRKGZfyPIwEaIXrR0ZOqadct5q10dvKxWIxx7SQqzW"a, 0, 6048549475100840191738010856156544571222758030966479209409932714701987172930); + verifyAddr("EQBitdFDoU5DWSjfKq7AsO29RIwAnBzzvcVVSn5ekQoB9Liv"a, 0, 44647902768175374073183447303109856895983123510911038181495138821771906122228); + verifyAddr("EQBgbux7VSjqJHP7ByRK1q4QuVZbpSCesNgvz5qad3lfXX_B"a, 0, 43618018778298854282398238948198420936771670943015013768514626213000552996701); + verifyAddr("EQDisBd8U7M3CEOZ8gcWCdetdmJi3AI31zIT5qBwOdmUbsxY"a, 0, 102533830955233207294921564956803510155400341370448300698800842506363763004526); + verifyAddr("EQAZpn_eynVlf7Ii2d6jP_p1URPrdF9F3S7DiudQyelkjzwE"a, 0, 11602000355550451044739442929923326898313570892134000961608306166632391730319); + verifyAddr("EQDE0HBgfkOiqHezLtExBGTvOs8eitthHQosBjW3BmDy1y2K"a, 0, 89021598108837008984355105304701054698583123510131754065320641619941010764503); + verifyAddr("EQDyT36zktBN9PVWvZ1joRxhIfEUgCPt4F2isa-enUA_d6CP"a, 0, 109600164736599393471831241268953938618560132398692391517933933264745646800759); + verifyAddr("EQDSMUGwt25IQd3_yHjI03F71G8Kp2GMaMEv2TiWoTKbsyRH"a, 0, 95072727086440754059372943502908629555499501854161516009430039520728770059187); + verifyAddr("EQAgK1EcrvEuL9sCtoj3cNhVNOuf3lo5GIPE2gn1fwZZYB3j"a, 0, 14550545393206146289454646242321274637527057595221202748348667645886114191712); + verifyAddr("EQCDKqL5w_6MD-Z7AOButu-uR-ZJTsgNU1fu464hn9grY81U"a, 0, 59328315557704100696483472039557119625141880163887490602190749720459366378339); + verifyAddr("EQB1aVMyFBhnlYXmQjsma0S63kvxKU7ccZKFNCFTwX7ASPv4"a, 0, 53106696421104300082516512931084483581353095629408473618166869610568148238408); + verifyAddr("EQBbjrXHoxDyh1ZYGBdBoQgLaScxW6pZR1hEhJC8BqF-5Kgq"a, 0, 41412616102566803060532874463898939692666425753852274254609049615175463829220); + verifyAddr("EQC-QeZ13QP0lszxNKt380fCWuaV94vwC/bfuqmrlg1/fJPA"a, 0, 86055876869280374285292827775555707420719385459150221433115419095878595346300); + verifyAddr("EQAiUwpF27vXCngqNhf_TQ5E_06ah0G4zuSrnfU7CLLaht5H"a, 0, 15525356059048115813946213102829493539706126913595626308144289257869196581510); + verifyAddr("EQBqiVjmhe2iVGmgOSDO1FGjSiz_AMtb1w7lLEiP4XIF_SFy"a, 0, 48187833566271418625754761625661652107159264793429628379411792200127405491709); + verifyAddr("EQDmwvaK2d_SbaPdpOM60effPWeKsksgDVwFPEyxuftM396K"a, 0, 104376425077737068747642645125299653296942252727305637929378053253273342397663); + verifyAddr("EQDWtPZZgF7wvIMUHZQojuD3utiuivsW7WslRJ33dgv-5yc8"a, 0, 97114682311034709685427168495629428400170984047839002197324103884924936519399); + verifyAddr("EQAA7z0JI0JKqbN-1uENKz9JrxIO5ZRY-ehMeg9fPncx50Ck"a, 0, 422697701361909095759185681783393186844038628935759044330165207027374567911); + verifyAddr("EQBVUHRoCq6coQYUwOAhGSoAmQ6Mpm7dFlDYon6HMgWV8Ftr"a, 0, 38588743302295548905191533977469452945717219128199196974980570837505276220912); + verifyAddr("EQCTdvDCf0bA5dOPI1-44tB2ZfNcMGiklzvg27TovgDEqM6E"a, 0, 66700138358140658950710678965721715920748906761125730971082529064117803730088); + verifyAddr("EQBDBKE5WGKIlnoi3OOzw7vkKKIX55eWjPvgxJWwek8AyL2J"a, 0, 30313140970524770883308749215942283658935592719811899513010665548955593408712); + verifyAddr("EQAvCSyLCo21GrqLAifdov4WkOxuGQCjMRxgF1cXSaNzLHZe"a, 0, 21274912932379789207153885262858116665851037273450532982121514600400844714796); + verifyAddr("EQCsLpDeHB2qpRbmsCb_0xmsYVNx1NgeYrvHGT1TDrHkDgL4"a, 0, 77880084760844670718511036557364450189323549135231036747480176919181282894862); + verifyAddr("EQCTQ8kPwyX92r48gCIL_pLN_RcQT9ghZygnmDTYkOkuW_j5"a, 0, 66609755171046741472463433430016629628609840960137226492652665879987546041947); + verifyAddr("EQCTrFRSHt-tfk7WxK9ZHQmqLcgxXxTK7wGfCEbqgY2W9Mcx"a, 0, 66794468397542182731534559853537484892417154018190804733043974345563210356468); + verifyAddr("EQCv28y49GdaLncoclv0ISdDlMUY_cxDPGNWFCPT8t4GuqUJ"a, 0, 79543100951881731989812212377176715376834409392116644269458867858071577560762); + verifyAddr("EQCVL-k6deDR56Z8pcb0Btg0lGfaivOGfdDCD1vvyRsyL9vS"a, 0, 67479265933941008511790471646564661743401752930295407567346938670637286896175); + verifyAddr("EQD6t2dXDjZxF1DqunKF-8dEWivJdliY_0FYiCXnthuqnDCa"a, 0, 113402258385556889021060606279033166272577193563727959698596277924908309916316); + verifyAddr("EQDE98XNzXiPq7VnbJJ2M4-Ht3tX_OWR0xUTTnDC8NObLmyU"a, 0, 89091094739778473356272490822716056624384395256388481953562551087642791090990); + verifyAddr("EQDfeRDE1TDhwt478CDR0Q7MDwqcTUhfjqyTT59mgoAaF6f7"a, 0, 101079669463449311486034260688909914923832300293253430457119371423825321269783); + verifyAddr("EQDijcEyUKa-QgCbeGlggQk1uBtt2ZRHyW4Y4gB4R6MN6RLW"a, 0, 102473162609487797404330889623966425536887610061087715571345738626121871855081); + verifyAddr("EQDOtFOt41skbjBkZF89oYXpoDECjlxIzD-ShWAOYyzuxqLA"a, 0, 93495056812773926196963707371665512785268729004579280701087533371037976424134); + verifyAddr("EQDuJKSFWU7AYqH6KLFfAbYvMuz346eWmJvG6_2NYE42_B4T"a, 0, 107715199938628393100813870735031557263256555616273999363057194279168054802172); + verifyAddr("EQDwGu4vFv1e3wn8min_iy7OPJXegOYTFQ5bZFZ5a5ZPiBpX"a, 0, 108602665568837301744601989570019709742180613578164394799026726718721456754568); + verifyAddr("EQC4G2ph6AS_mD_-cIv4aIYm1z5jAgCW_TTDEr72ygXOP2X-"a, 0, 83274003234732023403481554420859495155084746906198543572711543697320249249343); + verifyAddr("EQDpUkyAa6lZ12P3ZB2PL_rmWwI1I55BU4kxw_rssFL5dswA"a, 0, 105534303174146507629736518862713754948570412188900908177600861330298381728118); + verifyAddr("EQDoIA20MF1qEcSPtROdCu5ukGx9dVjgMeJh1oQ4A4cf_Jif"a, 0, 104993214557977037193613824776415934089204193426692473563548548423424814817276); + verifyAddr("EQDpUkyAa6lZ12P3ZB2PL_rmWwI1I55BU4kxw_rssFL5dswA"a, 0, 105534303174146507629736518862713754948570412188900908177600861330298381728118); + verifyAddr("EQClLO4EnZ_rTyV1GVpWy53pLgWJRki5c4ZzuM_1O_ClBkO9"a, 0, 74711004027159342540251007601464500186374346239921204216319145006974068892934); + verifyAddr("EQDmkj65Ab_m0aZaW8IpKw4kYqIgITw_HRstYEkVQ6NIYCyW"a, 0, 104290347741656803921830951060768893809692975574470790497562993373950614128736); + verifyAddr("EQCqNTwAYUNhPFS0RgqZoTLGJcQQxbAJ7csUo4YO3_TONLab"a, 0, 76987241268612358571638783428744566580605181728938801022059780105627411729972); + verifyAddr("EQCL3DmCynaRK7-vsfeNmd4Jj-UxAIHPvA4qS2xwaL6UpLbF"a, 0, 63260589232981964910240894899061676480139492286430589202252472895352724165796); + verifyAddr("EQDbU1SVEjBE73oUqgAoM9gDcShUkM5EC2PgoCjuwVUKo-Ee"a, 0, 99203745911752606845646497420891218522647962685916739950275357890977532807843); + verifyAddr("EQD02VdcF4TDbCKLLhZJ39NQTu6aWq2LnLjp0oXqbNu_BANK"a, 0, 110748343802097970709980079967961144373090790244250392237586606542170934198020); + verifyAddr("EQBynBO23ywHy_CgarY9NK9FTz0yDsG82PtcbSTQgGoXwiuA"a, 0, 51839428943991432793039248316067731096592274748149794482308513726460953499586); + verifyAddr("UQDKbjIcfM6ezt8KjKJJLshZJJSqX7XOA4ff-W72r5gqPuwA"a, 0, 91561894446285001782438967260723928368560331318344957259023550817453781559870); + verifyAddr("EQAUTbQiM522Y_XJ_T98QPhPhTmb4nV--VSPiha8kC6kRfPO"a, 0, 9183547432069678364603018431103042146626948674383548774683927217595824907333); + verifyAddr("EQBlqsm144Dq6SjbPI4jjZvA1hqTIP3CvHovbIfW_t-SCALE"a, 0, 45985353862647206060987594732861817093328871106941773337270673759241903247880); + verifyAddr("UQDKbjIcfM6ezt8KjKJJLshZJJSqX7XOA4ff-W72r5gqPuwA"a, 0, 91561894446285001782438967260723928368560331318344957259023550817453781559870); + verifyAddr("kQDKbjIcfM6ezt8KjKJJLshZJJSqX7XOA4ff-W72r5gqPgpP"a, 0, 91561894446285001782438967260723928368560331318344957259023550817453781559870); + verifyAddr("kf-Dfdg-YQXaR2Q97gZJ4fGBtmV1DHOU1y1RPyyZZtRy_Ikh"a, 255, 59475331506450494976393625198911249698879029820580340449086829444312920781564); + verifyAddr("0:527964d55cfa6eb731f4bfc07e9d025098097ef8505519e853986279bd8400d8"a, 0, 37304138005561100291416421295333982606153966175434134130332440738068913455320); + verifyAddr("0:0000000000000000000000000000000000000000000000000000000000000000"a, 0, 0); + verifyAddr("0:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFfffffffffffffffffffffffffffff"a, 0, 115792089237316195423570985008687907853269984665640564039457584007913129639935); + verifyAddr("0:zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"a, 0, 23158417847463239084714197001737581570653996933128112807891516801582625927987); + verifyAddr("0:0000000000000000000000000000000000000000000000000000000000000000"a, 0, 0); + verifyAddr("1:527964d55cfa6eb731f4bfc07e9d025098097ef8505519e853986279bd8400d8"a, 1, 37304138005561100291416421295333982606153966175434134130332440738068913455320); + verifyAddr("9:527964d55cfa6eb731f4bfc07e9d025098097ef8505519e853986279bd8400d8"a, 9, 37304138005561100291416421295333982606153966175434134130332440738068913455320); + verifyAddr("99:527964d55cfa6eb731f4bfc07e9d025098097ef8505519e853986279bd8400d8"a, 99, 37304138005561100291416421295333982606153966175434134130332440738068913455320); + verifyAddr("-1:527964d55cfa6eb731f4bfc07e9d025098097ef8505519e853986279bd8400d8"a, 255, 37304138005561100291416421295333982606153966175434134130332440738068913455320); + + return cc1.isSliceBitsEqual(cc2); +} + +/** +@testcase | 0 | | -1 + */ diff --git a/tolk-tester/tests/pure-functions.tolk b/tolk-tester/tests/pure-functions.tolk index 6e7a6ddad..8598e85c2 100644 --- a/tolk-tester/tests/pure-functions.tolk +++ b/tolk-tester/tests/pure-functions.tolk @@ -13,8 +13,8 @@ fun f_pure2(): int { fun get_contract_data(): (int, int) { var c: cell = getContractData(); var cs: slice = c.beginParse(); - cs~loadBits(32); - var value: int = cs~loadUint(16); + cs.loadBits(32); + var value: int = cs.loadUint(16); return (1, value); } diff --git a/tolk-tester/tests/self-keyword.tolk b/tolk-tester/tests/self-keyword.tolk new file mode 100644 index 000000000..a339e7d01 --- /dev/null +++ b/tolk-tester/tests/self-keyword.tolk @@ -0,0 +1,213 @@ +fun incChained(mutate self: int): self { + self = self + 1; + return self; +} + +fun incChained2(mutate self: int): self { + return self.incChained(); +} + +fun incChained3(mutate self: int): self { + incChained(mutate self); + return self; +} + +fun incChained4(mutate self: int): self { + self.incChained(); + return self; +} + +@method_id(101) +fun testIncChainedCodegen(x: int) { + return x.incChained().incChained2().incChained3().incChained4(); +} + +@method_id(102) +fun testIncChained() { + var x: int = 10; + incChained(mutate x); + x.incChained(); + x.incChained2(); + x.incChained2().incChained(); + x = x.incChained(); + x = x.incChained2().incChained().incChained2(); + return x.incChained(); +} + +fun incChainedWithMiddleReturn(mutate self: int, maxValue: int): self { + if (self >= maxValue) { + return self; + } + self += 1; + return self; +} + +@method_id(103) +fun testIncChainedWithMiddleReturn(x: int) { + x.incChainedWithMiddleReturn(10).incChainedWithMiddleReturn(10); + x = x.incChainedWithMiddleReturn(10).incChainedWithMiddleReturn(10); + return x.incChainedWithMiddleReturn(10).incChainedWithMiddleReturn(999); +} + +fun incChainedMutatingBoth(mutate self: int, mutate y: int): self { + self += 1; + y += 1; + return self; +} + +global c104: int; + +@method_id(104) +fun testIncChainedMutatingBoth() { + var (x, y) = (0, 0); + c104 = 0; + x.incChainedMutatingBoth(mutate y).incChainedMutatingBoth(mutate y); + incChainedMutatingBoth(mutate x, mutate y); + x = x.incChainedMutatingBoth(mutate c104).incChainedMutatingBoth(mutate c104).incChainedMutatingBoth(mutate y); + return (x, y, c104); +} + +fun incTensorChained(mutate self: (int, int)): self { + val (f, s) = self; + self = (f + 1, s + 1); + return self; +} + +@method_id(105) +fun testIncTensorChained(f: int, s: int) { + var tens = (f, s); + tens.incTensorChained().incTensorChained(); + return tens.incTensorChained().incTensorChained(); +} + +fun incConditionalChainable(mutate self: int, mutate another: int, ifLessThan: int): self { + another += 1; + return self.incChained() < ifLessThan ? self.incChained().incChained() : self; +} + +@method_id(106) +fun testIncConditionalChainable(x: int) { + var y = 0; + x.incConditionalChainable(mutate y, 5).incConditionalChainable(mutate y, 5); + x = x.incConditionalChainable(mutate y, 5).incConditionalChainable(mutate y, 5); + return (x.incConditionalChainable(mutate y, 5), y); +} + +fun checkNotEq(self: int, throwIfEq: int): void { + if (self == throwIfEq) { + throw 100 + throwIfEq; + } +} + +@method_id(107) +fun testNotMutatingSelf(arg: int) { + try { + arg.checkNotEq(100); + arg.checkNotEq(101); + arg.checkNotEq(102); + return 0; + } catch (code) { + return code; + } +} + +global c108: int; + +fun checkNotEqChainable(self: int, throwIfEq: int): self { + c108 += 1; + if (self != throwIfEq) { + return self; + } + throw 100 + throwIfEq; + return self; +} + +@method_id(108) +fun testNotMutatingChainableSelf(arg: int) { + c108 = 0; + try { + arg.checkNotEqChainable(100).checkNotEqChainable(101).checkNotEqChainable(102); + arg = arg.checkNotEqChainable(100).checkNotEqChainable(101).checkNotEqChainable(102); + return (arg, c108); + } catch (code) { + return (code, c108); + } +} + +global onceFailed109: int; + +fun checkNotEqChainableMutateAnother(self: int, throwIfEq: int, mutate toInc: int): self { + if (onceFailed109) { return self; } + toInc += 1; + try { return self.checkNotEqChainable(throwIfEq); } + catch { onceFailed109 = 1; return self; } +} + +global c109: int; + +@method_id(109) +fun testNotMutatingChainableSelfMutateAnother(initial: int) { + val arg = initial; + var x = 0; + c108 = 0; + c109 = 0; + onceFailed109 = 0; + arg.checkNotEqChainableMutateAnother(100, mutate x) + .checkNotEqChainableMutateAnother(101, mutate c109) + .checkNotEqChainableMutateAnother(102, mutate x); + return (arg, c108, c109, x); +} + + +fun main() { } + +/** +@testcase | 101 | 5 | 9 +@testcase | 102 | | 20 +@testcase | 103 | 1 | 7 +@testcase | 103 | 100 | 101 +@testcase | 103 | 8 | 11 +@testcase | 104 | | 6 4 2 +@testcase | 105 | 1 2 | 5 6 +@testcase | 106 | -20 | -5 5 +@testcase | 106 | -1 | 8 5 +@testcase | 106 | 7 | 12 5 +@testcase | 107 | 200 | 0 +@testcase | 107 | 102 | 202 +@testcase | 108 | 200 | 200 6 +@testcase | 108 | 101 | 201 0 +@testcase | 109 | 200 | 200 3 1 2 +@testcase | 109 | 100 | 100 0 0 1 +@testcase | 109 | 102 | 102 2 1 2 + +@fif_codegen +""" + incChained PROC:<{ + // self + INC // self + }> + incChained2 PROC:<{ + // self + incChained CALLDICT // self + }> + incChained3 PROC:<{ + // self + incChained CALLDICT // self + }> + incChained4 PROC:<{ + // self + incChained CALLDICT // self + }> +""" + +@fif_codegen +""" + testIncChainedCodegen PROC:<{ + // x + incChained CALLDICT // x + incChained2 CALLDICT // x + incChained3 CALLDICT // x + incChained4 CALLDICT // x + }> +""" + */ diff --git a/tolk-tester/tests/test-math.tolk b/tolk-tester/tests/test-math.tolk index 7c9873f09..893035fde 100644 --- a/tolk-tester/tests/test-math.tolk +++ b/tolk-tester/tests/test-math.tolk @@ -243,7 +243,7 @@ fun tanh_f258(x: int, n: int): int { repeat (n) { a = (c -= Two) + mulDivRound(x2, 1 << 239, a); // a := 2k+1+x^2/a as fixed250, k=n+1,n,...,2 } - a = (stackMoveToTop(3) << 254) + mulDivRound(x2, 1 << 243, a); // a := 3+x^2/a as fixed254 + a = (3 << 254) + mulDivRound(x2, 1 << 243, a); // a := 3+x^2/a as fixed254 // y = x/(1+a') = x - x*a'/(1+a') = x - x*x^2/(a+x^2) where a' = x^2/a return x - (mulDivRound(x, x2, a + (x2 ~>> 7)) ~>> 7); } @@ -257,12 +257,12 @@ fun expm1_f257(x: int): int { // (almost) compute tanh(x/2) first; x/2 as fixed258 = x as fixed257 var x2: int = mulDivRound(x, x, 1 << 255); // x^2 as fixed261 var Two: int = (1 << 251); // 2. as fixed250 - var a: int = stackMoveToTop(39) << 250; // a=2n+5 as fixed250 + var a: int = 39 << 250; // a=2n+5 as fixed250 var c = a; repeat (17) { a = (c -= Two) + mulDivRound(x2, 1 << 239, a); // a := 2k+1+x^2/a as fixed250, k=n+1,n,...,2 } - a = (stackMoveToTop(3) << 254) + mulDivRound(x2, 1 << 243, a); // a := 3+x^2/a as fixed254 + a = (3 << 254) + mulDivRound(x2, 1 << 243, a); // a := 3+x^2/a as fixed254 // now tanh(x/2) = x/(1+a') where a'=x^2/a ; apply exp(x)-1=2*tanh(x/2)/(1-tanh(x/2)) var t: int = (x ~>> 4) - a; // t:=x-a as fixed254 return x - mulDivRound(x2, t / 2, a + mulrshiftr256(x, t) ~/ 4) ~/ 4; // x - x^2 * (x-a) / (a + x*(x-a)) @@ -306,12 +306,12 @@ fun fixed248_exp2(x: int): int { fun tan_f260_inlined(x: int): int { var x2: int = mulrshiftr256(x, x); // x^2 as fixed264 var Two: int = (1 << 251); // 2. as fixed250 - var a: int = stackMoveToTop(33) << 250; // a=2n+5 as fixed250 + var a: int = 33 << 250; // a=2n+5 as fixed250 var c = a; repeat (14) { a = (c -= Two) - mulDivRound(x2, 1 << 236, a); // a := 2k+1-x^2/a as fixed250, k=n+1,n,...,2 } - a = (stackMoveToTop(3) << 254) - mulDivRound(x2, 1 << 240, a); // a := 3-x^2/a as fixed254 + a = (3 << 254) - mulDivRound(x2, 1 << 240, a); // a := 3-x^2/a as fixed254 // y = x/(1-a') = x + x*a'/(1-a') = x + x*x^2/(a-x^2) where a' = x^2/a return x + (mulDivRound(x / 2, x2, a - (x2 ~>> 10)) ~>> 9); } @@ -330,12 +330,12 @@ fun tan_f260(x: int): int { fun tan_f258_inlined(x: int): int { var x2: int = mulrshiftr256(x, x); // x^2 as fixed260 var Two: int = (1 << 251); // 2. as fixed250 - var a: int = stackMoveToTop(41) << 250; // a=2n+5 as fixed250 + var a: int = 41 << 250; // a=2n+5 as fixed250 var c = a; repeat (18) { a = (c -= Two) - mulDivRound(x2, 1 << 240, a); // a := 2k+1-x^2/a as fixed250, k=n+1,n,...,2 } - a = (stackMoveToTop(3) << 254) - mulDivRound(x2, 1 << 244, a); // a := 3-x^2/a as fixed254 + a = (3 << 254) - mulDivRound(x2, 1 << 244, a); // a := 3-x^2/a as fixed254 // y = x/(1-a') = x + x*a'/(1-a') = x + x*x^2/(a-x^2) where a' = x^2/a return x + (mulDivRound(x / 2, x2, a - (x2 ~>> 6)) ~>> 5); } @@ -546,9 +546,8 @@ fun atanh_f261(x: int, n: int): int { fun log_aux_f257(x: int): (int, int) { var s: int = log2_floor_p1(x); x <<= 256 - s; - var t: int = stackMoveToTop(-1 << 256); + var t: int = -1 << 256; if ((x >> 249) <= 90) { - // t~stackMoveToTop(); t >>= 1; s -= 1; } @@ -593,7 +592,7 @@ fun pow33b(m: int): int { fun log_auxx_f260(x: int): (int, int, int) { var s: int = log2_floor_p1(x) - 1; x <<= 255 - s; // rescale to 1 <= x < 2 as fixed255 - var t: int = stackMoveToTop(2873) << 244; // ~ (33/32)^11 ~ sqrt(2) as fixed255 + var t: int = 2873 << 244; // ~ (33/32)^11 ~ sqrt(2) as fixed255 var x1: int = (x - t) >> 1; var q: int = mulDivRound(x1, 65, x1 + t) + 11; // crude approximation to round(log(x)/log(33/32)) // t = 1; repeat (q) { t *= 33; } // t:=33^q, 0<=q<=22 @@ -742,13 +741,14 @@ fun atan_aux_prereduce(x: int): (int, int, int) { var tc: int = 7214596; // tan(13*theta) as fixed24 where theta=atan(1/32) var t1: int = mulDivRound(xu - tc, 1 << 88, xu * tc + (1 << 48)); // tan(x') as fixed64 where x'=atan(x)-13*theta // t1/(3+t1^2) * 3073/32 = x'/3 * 3072/32 = x' / (96/3072) = x' / theta - var q: int = mulDivRound(t1 * 3073, 1 << 59, t1 * t1 + (stackMoveToTop(3) << 128)) + 13; // approximately round(atan(x)/theta), 0<=q<=25 + var q: int = mulDivRound(t1 * 3073, 1 << 59, t1 * t1 + (3 << 128)) + 13; // approximately round(atan(x)/theta), 0<=q<=25 var (pa, pb) = (33226912, 5232641); // (32+I)^5 var (qh, ql) = divMod(q, 5); var (a, b) = (1 << (5 * (51 - q)), 0); // (1/32^q, 0) as fixed255 repeat (ql) { // a+b*I *= 32+I - (a, b) = (sub_rev(stackMoveToTop(b), 32 * a), a + 32 * b); // same as (32 * a - b, 32 * b + a), but more efficient + b.stackMoveToTop(); + (a, b) = (sub_rev(b, 32 * a), a + 32 * b); // same as (32 * a - b, 32 * b + a), but more efficient } repeat (qh) { // a+b*I *= (32+I)^5 = pa + pb*I @@ -807,7 +807,7 @@ fun atan_auxx_f256(x: int): (int, int) { @inline_ref fun atan_f255(x: int): int { var s: int = (x ~>> 256); - stackMoveToTop(x); + x.stackMoveToTop(); if (s) { x = lshift256divr(-1 << 255, x); // x:=-1/x as fixed256 } else { @@ -880,7 +880,7 @@ fun fixed248_acos(x: int): int { @inline_ref fun fixed248_atan(x: int): int { var s: int = (x ~>> 249); - stackMoveToTop(x); + x.stackMoveToTop(); if (s) { s = sign(s); x = lshift256divr(-1 << 248, x); // x:=-1/x as fixed256 @@ -897,7 +897,7 @@ fun fixed248_atan(x: int): int { @inline_ref fun fixed248_acot(x: int): int { var s: int = (x ~>> 249); - stackMoveToTop(x); + x.stackMoveToTop(); if (s) { x = lshift256divr(-1 << 248, x); // x:=-1/x as fixed256 s = 0; @@ -918,7 +918,7 @@ fun fixed248_acot(x: int): int { /// fixed252 nrand(); @inline_ref fun nrand_f252(): int { - var (x, s, t, A, B, r0) = (nan(), stackMoveToTop(29483) << 236, stackMoveToTop(-3167) << 239, 12845, 16693, 9043); + var (x, s, t, A, B, r0) = (nan(), 29483 << 236, -3167 << 239, 12845, 16693, 9043); // 4/sqrt(e*Pi) = 1.369 loop iterations on average do { var (u, v) = (random() / 16 + 1, mulDivRound(random() - (1 << 255), 7027, 1 << 16)); // fixed252; 7027=ceil(sqrt(8/e)*2^12) @@ -950,7 +950,7 @@ fun nrand_f252(): int { /// fixed252 nrand_fast(); @inline_ref fun nrand_fast_f252(): int { - var t: int = stackMoveToTop(-3) << 253; // -6. as fixed252 + var t: int = -3 << 253; // -6. as fixed252 repeat (12) { t += random() / 16; // add together 12 uniformly random numbers } @@ -979,8 +979,8 @@ fun fixed248_nrand_fast(): int { } @pure -fun ~tset(t: tuple, idx: int, value: X): (tuple, ()) -asm(t value idx) "SETINDEXVAR"; +fun tset(mutate self: tuple, idx: int, value: X): void + asm(self value idx) "SETINDEXVAR"; // computes 1-acos(x)/Pi by a very simple, extremely slow (~70k gas) and imprecise method // fixed256 acos_prepare_slow(fixed255 x); @@ -1014,12 +1014,12 @@ fun asin_slow_f255(x: int): int { fun test_nrand(n: int): tuple { var t: tuple = createEmptyTuple(); repeat (255) { - t~tuplePush(0); + t.tuplePush(0); } repeat (n) { var x: int = fixed248_nrand(); var bucket: int = (abs(x) >> 243); // 255 buckets starting from x=0, each 1/32 wide - t~tset(bucket, t.tupleAt(bucket) + 1); + t.tset(bucket, t.tupleAt(bucket) + 1); } return t; } @@ -1210,14 +1210,14 @@ fun main() { // return atan_aux_f256(1); // atan(1/2^256)*2^261 = 32 //return fixed248_nrand(); // return test_nrand(100000); - var One2: int = stackMoveToTop(1 << 255); + var One2: int = 1 << 255; // return asin_f255(One); // return asin_f255(-2 * One ~/ -3); var arg: int = mulDivRound(12, One2, 17); // 12/17 // return [ asin_slow_f255(arg), asin_f255(arg) ]; // return [ acos_slow_f255(arg), acos_f255(arg) ]; // return 4 * atan_f255(One ~/ 5) - atan_f255(One ~/ 239); // 4 * atan(1/5) - atan(1/239) = Pi/4 as fixed255 - var One3: int = stackMoveToTop(1 << 248); + var One3: int = 1 << 248; // return fixed248_atan(One) ~/ 5); // atan(1/5) // return fixed248_acot(One ~/ 239); // atan(1/5) } diff --git a/tolk-tester/tests/unbalanced_ret_nested.tolk b/tolk-tester/tests/unbalanced_ret_nested.tolk index 4d294ae95..05e609240 100644 --- a/tolk-tester/tests/unbalanced_ret_nested.tolk +++ b/tolk-tester/tests/unbalanced_ret_nested.tolk @@ -17,11 +17,8 @@ fun bar(x: int, y: int): (int, int) { } return (x + 1, y); } -fun bar2(x: int, y: int): (int,int) { - return bar(x, y); -} fun main(x: int, y: int): (int, int) { - (x, y) = bar2(x, y); + (x, y) = bar(x, y); return (x, y * 10); } /** diff --git a/tolk-tester/tests/use-before-declare.tolk b/tolk-tester/tests/use-before-declare.tolk index 2486df1c3..384569b93 100644 --- a/tolk-tester/tests/use-before-declare.tolk +++ b/tolk-tester/tests/use-before-declare.tolk @@ -1,7 +1,7 @@ fun main(): int { var c: cell = my_begin_cell().storeInt(demo_10, 32).my_end_cell(); var cs: slice = my_begin_parse(c); - var ten: int = cs~loadInt(32); + var ten: int = cs.loadInt(32); return 1 + demo1(ten) + demo_var; } diff --git a/tolk-tester/tests/var-apply.tolk b/tolk-tester/tests/var-apply.tolk new file mode 100644 index 000000000..9bee862ac --- /dev/null +++ b/tolk-tester/tests/var-apply.tolk @@ -0,0 +1,22 @@ +fun getBeginCell() { + return beginCell; +} + +fun getBeginParse() { + return beginParse; +} + +@method_id(101) +fun testVarApply1() { + var (_, f_end_cell) = (0, endCell); + var b: builder = (getBeginCell())().storeInt(1, 32); + b.storeInt(2, 32); + var s = (getBeginParse())(f_end_cell(b)); + return (s.loadInt(32), s.loadInt(32)); +} + +fun main() {} + +/** +@testcase | 101 | | 1 2 + */ diff --git a/tolk-tester/tests/w2.tolk b/tolk-tester/tests/w2.tolk index 24820f143..728b18d3f 100644 --- a/tolk-tester/tests/w2.tolk +++ b/tolk-tester/tests/w2.tolk @@ -1,6 +1,6 @@ @method_id(101) fun test1(cs: slice) { - return cs~loadUint(8)+cs~loadUint(8)+cs~loadUint(8)+cs~loadUint(8); + return cs.loadUint(8)+cs.loadUint(8)+cs.loadUint(8)+cs.loadUint(8); } @method_id(102) @@ -12,15 +12,15 @@ fun test2(cs: slice) { } fun main(cs: slice) { - return (cs~loadUint(8), cs~loadUint(8), cs~loadUint(8), cs~loadUint(8)); + return (cs.loadUint(8), cs.loadUint(8), cs.loadUint(8), cs.loadUint(8)); } fun f(cs: slice) { - return (cs~loadUint(8), cs~loadUint(8), cs~loadUint(8), cs~loadUint(8), - cs~loadUint(8), cs~loadUint(8), cs~loadUint(8), cs~loadUint(8), - cs~loadUint(8), cs~loadUint(8), cs~loadUint(8), cs~loadUint(8), - cs~loadUint(8), cs~loadUint(8), cs~loadUint(8), cs~loadUint(8), - cs~loadUint(8), cs~loadUint(8), cs~loadUint(8), cs~loadUint(8)); + return (cs.loadUint(8), cs.loadUint(8), cs.loadUint(8), cs.loadUint(8), + cs.loadUint(8), cs.loadUint(8), cs.loadUint(8), cs.loadUint(8), + cs.loadUint(8), cs.loadUint(8), cs.loadUint(8), cs.loadUint(8), + cs.loadUint(8), cs.loadUint(8), cs.loadUint(8), cs.loadUint(8), + cs.loadUint(8), cs.loadUint(8), cs.loadUint(8), cs.loadUint(8)); } diff --git a/tolk/abscode.cpp b/tolk/abscode.cpp index c0190b640..c1add6839 100644 --- a/tolk/abscode.cpp +++ b/tolk/abscode.cpp @@ -25,12 +25,8 @@ namespace tolk { * */ -TmpVar::TmpVar(var_idx_t _idx, bool _is_tmp_unnamed, TypeExpr* _type, SymDef* sym, SrcLocation loc) - : v_type(_type), idx(_idx), is_tmp_unnamed(_is_tmp_unnamed), coord(0), where(loc) { - if (sym) { - name = sym->sym_idx; - sym->value->idx = _idx; - } +TmpVar::TmpVar(var_idx_t _idx, TypeExpr* _type, sym_idx_t sym_idx, SrcLocation loc) + : v_type(_type), idx(_idx), sym_idx(sym_idx), coord(0), where(loc) { if (!_type) { v_type = TypeExpr::new_hole(); } @@ -59,8 +55,8 @@ void TmpVar::dump(std::ostream& os) const { } void TmpVar::show(std::ostream& os, int omit_idx) const { - if (!is_tmp_unnamed) { - os << G.symbols.get_name(name); + if (!is_unnamed()) { + os << G.symbols.get_name(sym_idx); if (omit_idx >= 2) { return; } @@ -462,16 +458,13 @@ void CodeBlob::print(std::ostream& os, int flags) const { os << "-------- END ---------\n\n"; } -var_idx_t CodeBlob::create_var(bool is_tmp_unnamed, TypeExpr* var_type, SymDef* sym, SrcLocation location) { - vars.emplace_back(var_cnt, is_tmp_unnamed, var_type, sym, location); - if (sym) { - sym->value->idx = var_cnt; - } +var_idx_t CodeBlob::create_var(TypeExpr* var_type, var_idx_t sym_idx, SrcLocation location) { + vars.emplace_back(var_cnt, var_type, sym_idx, location); return var_cnt++; } bool CodeBlob::import_params(FormalArgList arg_list) { - if (var_cnt || in_var_cnt || op_cnt) { + if (var_cnt || in_var_cnt) { return false; } std::vector list; @@ -480,7 +473,7 @@ bool CodeBlob::import_params(FormalArgList arg_list) { SymDef* arg_sym; SrcLocation arg_loc; std::tie(arg_type, arg_sym, arg_loc) = par; - list.push_back(create_var(arg_sym == nullptr, arg_type, arg_sym, arg_loc)); + list.push_back(create_var(arg_type, arg_sym ? arg_sym->sym_idx : 0, arg_loc)); } emplace_back(loc, Op::_Import, list); in_var_cnt = var_cnt; diff --git a/tolk/analyzer.cpp b/tolk/analyzer.cpp index b2ea55ec8..719df9b7d 100644 --- a/tolk/analyzer.cpp +++ b/tolk/analyzer.cpp @@ -46,10 +46,9 @@ int CodeBlob::split_vars(bool strict) { if (k != 1) { var.coord = ~((n << 8) + k); for (int i = 0; i < k; i++) { - auto v = create_var(vars[j].is_tmp_unnamed, comp_types[i], 0, vars[j].where); + auto v = create_var(comp_types[i], vars[j].sym_idx, vars[j].where); tolk_assert(v == n + i); tolk_assert(vars[v].idx == v); - vars[v].name = vars[j].name; vars[v].coord = ((int)j << 8) + i + 1; } n += k; diff --git a/tolk/ast-from-tokens.cpp b/tolk/ast-from-tokens.cpp index 5116fcf57..1a1d199ec 100644 --- a/tolk/ast-from-tokens.cpp +++ b/tolk/ast-from-tokens.cpp @@ -131,7 +131,8 @@ static AnyV maybe_replace_eq_null_with_isNull_call(V v) { auto v_ident = createV(v->loc, "__isNull"); // built-in function AnyV v_null = v->get_lhs()->type == ast_null_keyword ? v->get_rhs() : v->get_lhs(); - AnyV v_isNull = createV(v->loc, v_ident, createV(v->loc, {v_null})); + AnyV v_arg = createV(v->loc, v_null, false); + AnyV v_isNull = createV(v->loc, v_ident, createV(v->loc, {v_arg})); if (v->tok == tok_neq) { v_isNull = createV(v->loc, "!", tok_logical_not, v_isNull); } @@ -165,7 +166,7 @@ static TypeExpr* parse_type1(Lexer& lex, V genericsT_list) { return TypeExpr::new_atomic(TypeExpr::_Builder); case tok_continuation: lex.next(); - return TypeExpr::new_atomic(TypeExpr::_Cont); + return TypeExpr::new_atomic(TypeExpr::_Continutaion); case tok_tuple: lex.next(); return TypeExpr::new_atomic(TypeExpr::_Tuple); @@ -177,6 +178,8 @@ static TypeExpr* parse_type1(Lexer& lex, V genericsT_list) { return TypeExpr::new_tensor({}); case tok_bool: lex.error("bool type is not supported yet"); + case tok_self: + lex.error("`self` type can be used only as a return type of a function (enforcing it to be chainable)"); case tok_identifier: if (int idx = genericsT_list ? genericsT_list->lookup_idx(lex.cur_str()) : -1; idx != -1) { lex.next(); @@ -229,13 +232,27 @@ static TypeExpr* parse_type(Lexer& lex, V genericsT_list) { AnyV parse_expr(Lexer& lex); -static AnyV parse_parameter(Lexer& lex, V genericsT_list) { +static AnyV parse_parameter(Lexer& lex, V genericsT_list, bool is_first) { SrcLocation loc = lex.cur_location(); - // argument name (or underscore for an unnamed parameter) + // optional keyword `mutate` meaning that a function will mutate a passed argument (like passed by reference) + bool declared_as_mutate = false; + bool is_param_self = false; + if (lex.tok() == tok_mutate) { + lex.next(); + declared_as_mutate = true; + } + + // parameter name (or underscore for an unnamed parameter) std::string_view param_name; if (lex.tok() == tok_identifier) { param_name = lex.cur_str(); + } else if (lex.tok() == tok_self) { + if (!is_first) { + lex.error("`self` can only be the first parameter"); + } + param_name = "self"; + is_param_self = true; } else if (lex.tok() != tok_underscore) { lex.unexpected("parameter name"); } @@ -245,8 +262,14 @@ static AnyV parse_parameter(Lexer& lex, V genericsT_list) { // parameter type after colon, also mandatory (even explicit ":auto") lex.expect(tok_colon, "`: `"); TypeExpr* param_type = parse_type(lex, genericsT_list); + if (declared_as_mutate && !param_type->has_fixed_width()) { + throw ParseError(loc, "`mutate` parameter must be strictly typed"); + } + if (is_param_self && !param_type->has_fixed_width()) { + throw ParseError(loc, "`self` parameter must be strictly typed"); + } - return createV(loc, v_ident, param_type); + return createV(loc, v_ident, param_type, declared_as_mutate); } static AnyV parse_global_var_declaration(Lexer& lex, const std::vector>& annotations) { @@ -301,21 +324,52 @@ static AnyV parse_constant_declaration(Lexer& lex, const std::vector(loc, v_ident, declared_type, init_value); } -static AnyV parse_parameter_list(Lexer& lex, V genericsT_list) { +// "parameters" are at function declaration: `fun f(param1: int, mutate param2: slice)` +static V parse_parameter_list(Lexer& lex, V genericsT_list) { SrcLocation loc = lex.cur_location(); std::vector params; lex.expect(tok_oppar, "parameter list"); if (lex.tok() != tok_clpar) { - params.push_back(parse_parameter(lex, genericsT_list)); + params.push_back(parse_parameter(lex, genericsT_list, true)); while (lex.tok() == tok_comma) { lex.next(); - params.push_back(parse_parameter(lex, genericsT_list)); + params.push_back(parse_parameter(lex, genericsT_list, false)); } } lex.expect(tok_clpar, "`)`"); return createV(loc, std::move(params)); } +// "arguments" are at function call: `f(arg1, mutate arg2)` +static AnyV parse_argument(Lexer& lex) { + SrcLocation loc = lex.cur_location(); + + // keyword `mutate` is necessary when a parameter is declared `mutate` (to make mutation obvious for the reader) + bool passed_as_mutate = false; + if (lex.tok() == tok_mutate) { + lex.next(); + passed_as_mutate = true; + } + + AnyV expr = parse_expr(lex); + return createV(loc, expr, passed_as_mutate); +} + +static V parse_argument_list(Lexer& lex) { + SrcLocation loc = lex.cur_location(); + std::vector args; + lex.expect(tok_oppar, "`(`"); + if (lex.tok() != tok_clpar) { + args.push_back(parse_argument(lex)); + while (lex.tok() == tok_comma) { + lex.next(); + args.push_back(parse_argument(lex)); + } + } + lex.expect(tok_clpar, "`)`"); + return createV(loc, std::move(args)); +} + // parse (expr) / [expr] / identifier / number static AnyV parse_expr100(Lexer& lex) { SrcLocation loc = lex.cur_location(); @@ -384,6 +438,10 @@ static AnyV parse_expr100(Lexer& lex) { lex.next(); return createV(loc); } + case tok_self: { + lex.next(); + return createV(loc); + } case tok_identifier: { std::string_view str_val = lex.cur_str(); lex.next(); @@ -400,48 +458,25 @@ static AnyV parse_expr100(Lexer& lex) { } } -// parse E(expr) +// parse E(args) static AnyV parse_expr90(Lexer& lex) { AnyV res = parse_expr100(lex); if (lex.tok() == tok_oppar) { - lex.next(); - - SrcLocation loc = lex.cur_location(); - std::vector args; - if (lex.tok() != tok_clpar) { - args.push_back(parse_expr(lex)); - while (lex.tok() == tok_comma) { - lex.next(); - args.push_back(parse_expr(lex)); - } - } - lex.expect(tok_clpar, "`)`"); - - return createV(res->loc, res, createV(loc, std::move(args))); + return createV(res->loc, res, parse_argument_list(lex)); } return res; } -// parse E .method ~method E (left-to-right) +// parse E.method(...) (left-to-right) static AnyV parse_expr80(Lexer& lex) { AnyV lhs = parse_expr90(lex); - while (lex.tok() == tok_identifier && (lex.cur_str()[0] == '.' || lex.cur_str()[0] == '~')) { + while (lex.tok() == tok_dot) { + SrcLocation loc = lex.cur_location(); + lex.next(); + lex.check(tok_identifier, "method name"); std::string_view method_name = lex.cur_str(); lex.next(); - - SrcLocation loc = lex.cur_location(); - std::vector args; - lex.expect(tok_oppar, "`(`"); - if (lex.tok() != tok_clpar) { - args.push_back(parse_expr(lex)); - while (lex.tok() == tok_comma) { - lex.next(); - args.push_back(parse_expr(lex)); - } - } - lex.expect(tok_clpar, "`)`"); - - lhs = createV(lhs->loc, method_name, lhs, createV(loc, std::move(args))); + lhs = createV(loc, method_name, lhs, parse_argument_list(lex)); } return lhs; } @@ -586,11 +621,11 @@ AnyV parse_expr(Lexer& lex) { AnyV parse_statement(Lexer& lex); -static AnyV parse_var_declaration_lhs(Lexer& lex) { +static AnyV parse_var_declaration_lhs(Lexer& lex, bool is_immutable) { SrcLocation loc = lex.cur_location(); if (lex.tok() == tok_oppar) { lex.next(); - AnyV first = parse_var_declaration_lhs(lex); + AnyV first = parse_var_declaration_lhs(lex, is_immutable); if (lex.tok() == tok_clpar) { lex.next(); return createV(loc, first); @@ -598,17 +633,17 @@ static AnyV parse_var_declaration_lhs(Lexer& lex) { std::vector args(1, first); while (lex.tok() == tok_comma) { lex.next(); - args.push_back(parse_var_declaration_lhs(lex)); + args.push_back(parse_var_declaration_lhs(lex, is_immutable)); } lex.expect(tok_clpar, "`)`"); return createV(loc, std::move(args)); } if (lex.tok() == tok_opbracket) { lex.next(); - std::vector args(1, parse_var_declaration_lhs(lex)); + std::vector args(1, parse_var_declaration_lhs(lex, is_immutable)); while (lex.tok() == tok_comma) { lex.next(); - args.push_back(parse_var_declaration_lhs(lex)); + args.push_back(parse_var_declaration_lhs(lex, is_immutable)); } lex.expect(tok_clbracket, "`]`"); return createV(loc, std::move(args)); @@ -625,7 +660,7 @@ static AnyV parse_var_declaration_lhs(Lexer& lex) { lex.next(); marked_as_redef = true; } - return createV(loc, v_ident, declared_type, marked_as_redef); + return createV(loc, v_ident, declared_type, is_immutable, marked_as_redef); } if (lex.tok() == tok_underscore) { TypeExpr* declared_type = nullptr; @@ -634,21 +669,17 @@ static AnyV parse_var_declaration_lhs(Lexer& lex) { lex.next(); declared_type = parse_type(lex, nullptr); } - return createV(loc, createV(loc), declared_type, false); + return createV(loc, createV(loc), declared_type, true, false); } lex.unexpected("variable name"); } static AnyV parse_local_vars_declaration(Lexer& lex) { SrcLocation loc = lex.cur_location(); - bool immutable = lex.tok() == tok_val; + bool is_immutable = lex.tok() == tok_val; lex.next(); - if (immutable) { - lex.error("immutable variables are not supported yet"); - } - - AnyV lhs = parse_var_declaration_lhs(lex); + AnyV lhs = parse_var_declaration_lhs(lex, is_immutable); if (lex.tok() != tok_assign) { lex.error("variables declaration must be followed by assignment: `var xxx = ...`"); } @@ -885,10 +916,10 @@ static AnyV parse_asm_func_body(Lexer& lex, V param_list) { std::vector arg_order, ret_order; if (lex.tok() == tok_oppar) { lex.next(); - while (lex.tok() == tok_identifier || lex.tok() == tok_int_const) { + while (lex.tok() == tok_identifier || lex.tok() == tok_self) { int arg_idx = param_list->lookup_idx(lex.cur_str()); if (arg_idx == -1) { - lex.unexpected("argument name"); + lex.unexpected("parameter name"); } arg_order.push_back(arg_idx); lex.next(); @@ -1006,17 +1037,44 @@ static AnyV parse_function_declaration(Lexer& lex, const std::vectoras(); } - V param_list = parse_parameter_list(lex, genericsT_list)->as(); + V v_param_list = parse_parameter_list(lex, genericsT_list)->as(); + bool accepts_self = !v_param_list->empty() && v_param_list->get_param(0)->get_identifier()->name == "self"; + int n_mutate_params = v_param_list->get_mutate_params_count(); TypeExpr* ret_type = nullptr; + bool returns_self = false; if (lex.tok() == tok_colon) { // : (if absent, it means "auto infer", not void) lex.next(); - ret_type = parse_type(lex, genericsT_list); + if (lex.tok() == tok_self) { + if (!accepts_self) { + lex.error("only a member function can return `self` (which accepts `self` first parameter)"); + } + lex.next(); + returns_self = true; + ret_type = TypeExpr::new_unit(); + } else { + ret_type = parse_type(lex, genericsT_list); + } } - if (is_entrypoint && (is_get_method || genericsT_list || !annotations.empty())) { + if (is_entrypoint && (is_get_method || genericsT_list || n_mutate_params || accepts_self)) { throw ParseError(loc, "invalid declaration of a reserved function"); } + if (is_get_method && (genericsT_list || n_mutate_params || accepts_self)) { + throw ParseError(loc, "get methods can't have `mutate` and `self` params"); + } + + if (n_mutate_params) { + std::vector ret_tensor_items; + ret_tensor_items.reserve(1 + n_mutate_params); + for (AnyV v_param : v_param_list->get_params()) { + if (v_param->as()->declared_as_mutate) { + ret_tensor_items.emplace_back(v_param->as()->param_type); + } + } + ret_tensor_items.emplace_back(ret_type ? ret_type : TypeExpr::new_hole()); + ret_type = TypeExpr::new_tensor(std::move(ret_tensor_items)); + } AnyV v_body = nullptr; @@ -1030,17 +1088,19 @@ static AnyV parse_function_declaration(Lexer& lex, const std::vector(loc, v_ident, param_list, v_body); + auto f_declaration = createV(loc, v_ident, v_param_list, v_body); f_declaration->ret_type = ret_type ? ret_type : TypeExpr::new_hole(); f_declaration->is_entrypoint = is_entrypoint; f_declaration->genericsT_list = genericsT_list; f_declaration->marked_as_get_method = is_get_method; f_declaration->marked_as_builtin = v_body->type == ast_empty; + f_declaration->accepts_self = accepts_self; + f_declaration->returns_self = returns_self; for (auto v_annotation : annotations) { switch (v_annotation->kind) { @@ -1054,7 +1114,7 @@ static AnyV parse_function_declaration(Lexer& lex, const std::vectormarked_as_pure = true; break; case AnnotationKind::method_id: - if (is_get_method || genericsT_list || is_entrypoint) { + if (is_get_method || genericsT_list || is_entrypoint || n_mutate_params || accepts_self) { v_annotation->error("@method_id can be specified only for regular functions"); } f_declaration->method_id = v_annotation->get_arg()->get_item(0)->as(); diff --git a/tolk/ast-replacer.h b/tolk/ast-replacer.h index 16a9f64c2..478994e8b 100644 --- a/tolk/ast-replacer.h +++ b/tolk/ast-replacer.h @@ -80,8 +80,8 @@ class ASTReplacerInFunctionBody : public ASTReplacer { virtual AnyV replace(V v) { return replace_children(v); } virtual AnyV replace(V v) { return replace_children(v); } virtual AnyV replace(V v) { return replace_children(v); } + virtual AnyV replace(V v) { return replace_children(v); } virtual AnyV replace(V v) { return replace_children(v); } - virtual AnyV replace(V v) { return replace_children(v); } virtual AnyV replace(V v) { return replace_children(v); } virtual AnyV replace(V v) { return replace_children(v); } virtual AnyV replace(V v) { return replace_children(v); } @@ -109,9 +109,10 @@ class ASTReplacerInFunctionBody : public ASTReplacer { case ast_string_const: return replace(v->as()); case ast_bool_const: return replace(v->as()); case ast_null_keyword: return replace(v->as()); + case ast_self_keyword: return replace(v->as()); case ast_function_call: return replace(v->as()); + case ast_dot_method_call: return replace(v->as()); case ast_underscore: return replace(v->as()); - case ast_dot_tilde_call: return replace(v->as()); case ast_unary_operator: return replace(v->as()); case ast_binary_operator: return replace(v->as()); case ast_ternary_operator: return replace(v->as()); diff --git a/tolk/ast-stringifier.h b/tolk/ast-stringifier.h index cabda4990..759873b04 100644 --- a/tolk/ast-stringifier.h +++ b/tolk/ast-stringifier.h @@ -40,11 +40,14 @@ class ASTStringifier final : public ASTVisitor { {ast_string_const, "ast_string_const"}, {ast_bool_const, "ast_bool_const"}, {ast_null_keyword, "ast_null_keyword"}, + {ast_self_keyword, "ast_self_keyword"}, + {ast_argument, "ast_argument"}, + {ast_argument_list, "ast_argument_list"}, {ast_function_call, "ast_function_call"}, + {ast_dot_method_call, "ast_dot_method_call"}, {ast_global_var_declaration, "ast_global_var_declaration"}, {ast_constant_declaration, "ast_constant_declaration"}, {ast_underscore, "ast_underscore"}, - {ast_dot_tilde_call, "ast_dot_tilde_call"}, {ast_unary_operator, "ast_unary_operator"}, {ast_binary_operator, "ast_binary_operator"}, {ast_ternary_operator, "ast_ternary_operator"}, @@ -125,12 +128,12 @@ class ASTStringifier final : public ASTVisitor { } return {}; } + case ast_dot_method_call: + return static_cast(v->as()->method_name); case ast_global_var_declaration: return static_cast(v->as()->get_identifier()->name); case ast_constant_declaration: return static_cast(v->as()->get_identifier()->name); - case ast_dot_tilde_call: - return static_cast(v->as()->method_name); case ast_unary_operator: return static_cast(v->as()->operator_name); case ast_binary_operator: @@ -208,11 +211,14 @@ class ASTStringifier final : public ASTVisitor { case ast_string_const: return handle_vertex(v->as()); case ast_bool_const: return handle_vertex(v->as()); case ast_null_keyword: return handle_vertex(v->as()); + case ast_self_keyword: return handle_vertex(v->as()); + case ast_argument: return handle_vertex(v->as()); + case ast_argument_list: return handle_vertex(v->as()); case ast_function_call: return handle_vertex(v->as()); + case ast_dot_method_call: return handle_vertex(v->as()); case ast_global_var_declaration: return handle_vertex(v->as()); case ast_constant_declaration: return handle_vertex(v->as()); case ast_underscore: return handle_vertex(v->as()); - case ast_dot_tilde_call: return handle_vertex(v->as()); case ast_unary_operator: return handle_vertex(v->as()); case ast_binary_operator: return handle_vertex(v->as()); case ast_ternary_operator: return handle_vertex(v->as()); diff --git a/tolk/ast-visitor.h b/tolk/ast-visitor.h index 6fe9ed5d9..d0a7bfaf6 100644 --- a/tolk/ast-visitor.h +++ b/tolk/ast-visitor.h @@ -75,9 +75,10 @@ class ASTVisitorFunctionBody : public ASTVisitor { virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } + virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } - virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } virtual void visit(V v) { return visit_children(v); } @@ -103,9 +104,10 @@ class ASTVisitorFunctionBody : public ASTVisitor { case ast_string_const: return visit(v->as()); case ast_bool_const: return visit(v->as()); case ast_null_keyword: return visit(v->as()); + case ast_self_keyword: return visit(v->as()); case ast_function_call: return visit(v->as()); + case ast_dot_method_call: return visit(v->as()); case ast_underscore: return visit(v->as()); - case ast_dot_tilde_call: return visit(v->as()); case ast_unary_operator: return visit(v->as()); case ast_binary_operator: return visit(v->as()); case ast_ternary_operator: return visit(v->as()); diff --git a/tolk/ast.cpp b/tolk/ast.cpp index f0506ef48..b1af51005 100644 --- a/tolk/ast.cpp +++ b/tolk/ast.cpp @@ -86,6 +86,16 @@ int Vertex::lookup_idx(std::string_view param_name) const { return -1; } +int Vertex::get_mutate_params_count() const { + int n = 0; + for (AnyV param : children) { + if (param->as()->declared_as_mutate) { + n++; + } + } + return n; +} + void Vertex::mutate_set_src_file(const SrcFile* file) const { const_cast(this)->file = file; } diff --git a/tolk/ast.h b/tolk/ast.h index a233f09d3..fd2b27cbf 100644 --- a/tolk/ast.h +++ b/tolk/ast.h @@ -68,11 +68,14 @@ enum ASTNodeType { ast_string_const, ast_bool_const, ast_null_keyword, + ast_self_keyword, + ast_argument, + ast_argument_list, ast_function_call, + ast_dot_method_call, ast_global_var_declaration, ast_constant_declaration, ast_underscore, - ast_dot_tilde_call, ast_unary_operator, ast_binary_operator, ast_ternary_operator, @@ -284,14 +287,51 @@ struct Vertex final : ASTNodeLeaf { : ASTNodeLeaf(ast_null_keyword, loc) {} }; +template<> +struct Vertex final : ASTNodeLeaf { + explicit Vertex(SrcLocation loc) + : ASTNodeLeaf(ast_self_keyword, loc) {} +}; + +template<> +struct Vertex final : ASTNodeUnary { + bool passed_as_mutate; // when called `f(mutate arg)`, not `f(arg)` + + AnyV get_expr() const { return child; } + + explicit Vertex(SrcLocation loc, AnyV expr, bool passed_as_mutate) + : ASTNodeUnary(ast_argument, loc, expr), passed_as_mutate(passed_as_mutate) {} +}; + +template<> +struct Vertex final : ASTNodeVararg { + const std::vector& get_arguments() const { return children; } + auto get_arg(int i) const { return children.at(i)->as(); } + + explicit Vertex(SrcLocation loc, std::vector arguments) + : ASTNodeVararg(ast_argument_list, loc, std::move(arguments)) {} +}; + template<> struct Vertex final : ASTNodeBinary { - // even for f(1,2,3), f (lhs) is called with a single arg (tensor "(1,2,3)") (rhs) AnyV get_called_f() const { return lhs; } - auto get_called_arg() const { return rhs->as(); } + auto get_arg_list() const { return rhs->as(); } + int get_num_args() const { return rhs->as()->size(); } + auto get_arg(int i) const { return rhs->as()->get_arg(i); } + + Vertex(SrcLocation loc, AnyV lhs_f, V arguments) + : ASTNodeBinary(ast_function_call, loc, lhs_f, arguments) {} +}; + +template<> +struct Vertex final : ASTNodeBinary { + std::string_view method_name; + + AnyV get_obj() const { return lhs; } + auto get_arg_list() const { return rhs->as(); } - Vertex(SrcLocation loc, AnyV lhs_f, V arg) - : ASTNodeBinary(ast_function_call, loc, lhs_f, arg) {} + Vertex(SrcLocation loc, std::string_view method_name, AnyV lhs, V arguments) + : ASTNodeBinary(ast_dot_method_call, loc, lhs, arguments), method_name(method_name) {} }; template<> @@ -321,17 +361,6 @@ struct Vertex final : ASTNodeLeaf { : ASTNodeLeaf(ast_underscore, loc) {} }; -template<> -struct Vertex final : ASTNodeBinary { - std::string_view method_name; // starts with . or ~ - - AnyV get_lhs() const { return lhs; } - auto get_arg() const { return rhs->as(); } - - Vertex(SrcLocation loc, std::string_view method_name, AnyV lhs, V arg) - : ASTNodeBinary(ast_dot_tilde_call, loc, lhs, arg), method_name(method_name) {} -}; - template<> struct Vertex final : ASTNodeUnary { std::string_view operator_name; @@ -475,11 +504,13 @@ struct Vertex final : ASTNodeVararg { template<> struct Vertex final : ASTNodeUnary { TypeExpr* param_type; + bool declared_as_mutate; // declared as `mutate param_name` - auto get_identifier() const { return child->as(); } // for underscore, its str_val is empty + auto get_identifier() const { return child->as(); } // for underscore, name is empty + bool is_underscore() const { return child->as()->name.empty(); } - Vertex(SrcLocation loc, V name_identifier, TypeExpr* param_type) - : ASTNodeUnary(ast_parameter, loc, name_identifier), param_type(param_type) {} + Vertex(SrcLocation loc, V name_identifier, TypeExpr* param_type, bool declared_as_mutate) + : ASTNodeUnary(ast_parameter, loc, name_identifier), param_type(param_type), declared_as_mutate(declared_as_mutate) {} }; template<> @@ -491,6 +522,8 @@ struct Vertex final : ASTNodeVararg { : ASTNodeVararg(ast_parameter_list, loc, std::move(params)) {} int lookup_idx(std::string_view param_name) const; + int get_mutate_params_count() const; + bool has_mutate_params() const { return get_mutate_params_count() > 0; } }; template<> @@ -519,12 +552,13 @@ struct Vertex final : ASTNodeUnary { template<> struct Vertex final : ASTNodeUnary { TypeExpr* declared_type; + bool is_immutable; // declared via 'val', not 'var' bool marked_as_redef; // var (existing_var redef, new_var: int) = ... AnyV get_identifier() const { return child; } // ast_identifier / ast_underscore - Vertex(SrcLocation loc, AnyV name_identifier, TypeExpr* declared_type, bool marked_as_redef) - : ASTNodeUnary(ast_local_var, loc, name_identifier), declared_type(declared_type), marked_as_redef(marked_as_redef) {} + Vertex(SrcLocation loc, AnyV name_identifier, TypeExpr* declared_type, bool is_immutable, bool marked_as_redef) + : ASTNodeUnary(ast_local_var, loc, name_identifier), declared_type(declared_type), is_immutable(is_immutable), marked_as_redef(marked_as_redef) {} }; template<> @@ -552,6 +586,8 @@ struct Vertex final : ASTNodeVararg { bool marked_as_get_method = false; bool marked_as_inline = false; bool marked_as_inline_ref = false; + bool accepts_self = false; + bool returns_self = false; V method_id = nullptr; bool is_asm_function() const { return children.at(2)->type == ast_asm_body; } diff --git a/tolk/builtins.cpp b/tolk/builtins.cpp index a123b0a85..d18cfa644 100644 --- a/tolk/builtins.cpp +++ b/tolk/builtins.cpp @@ -29,44 +29,60 @@ using namespace std::literals::string_literals; SymDef* define_builtin_func_impl(const std::string& name, SymValAsmFunc* func_val) { sym_idx_t name_idx = G.symbols.lookup_add(name); SymDef* def = define_global_symbol(name_idx); - if (!def) { - std::cerr << "fatal: global function `" << name << "` already defined" << std::endl; - std::exit(1); - } - func_val->flags |= SymValFunc::flagBuiltinFunction; + tolk_assert(!def->value); + def->value = func_val; #ifdef TOLK_DEBUG - dynamic_cast(def->value)->name = name; + def->value->sym_name = name; #endif return def; } -SymDef* define_builtin_func(const std::string& name, TypeExpr* func_type, const simple_compile_func_t& func, bool impure = false) { - return define_builtin_func_impl(name, new SymValAsmFunc{func_type, func, !impure}); -} - -SymDef* define_builtin_func(const std::string& name, TypeExpr* func_type, const compile_func_t& func, bool impure = false) { - return define_builtin_func_impl(name, new SymValAsmFunc{func_type, func, !impure}); -} +// given func_type = `(slice, int) -> slice` and func flags, create SymDef for parameters +// currently (see at the bottom) parameters of built-in functions are unnamed: +// built-in functions are created using a resulting type +static std::vector define_builtin_parameters(const TypeExpr* func_type, int func_flags) { + // `loadInt()`, `storeInt()`: they accept `self` and mutate it; no other options available in built-ins for now + bool is_mutate_self = func_flags & SymValFunc::flagHasMutateParams; + // func_type a map (params_type -> ret_type), probably surrounded by forall (internal representation of ) + TypeExpr* params_type = func_type->constr == TypeExpr::te_ForAll ? func_type->args[0]->args[0] : func_type->args[0]; + std::vector parameters; + + if (params_type->constr == TypeExpr::te_Tensor) { // multiple parameters: it's a tensor + parameters.reserve(params_type->args.size()); + for (int i = 0; i < static_cast(params_type->args.size()); ++i) { + SymDef* sym_def = define_parameter(i, {}); + SymValVariable* sym_val = new SymValVariable(i, params_type->args[i]); + if (i == 0 && is_mutate_self) { + sym_val->flags |= SymValVariable::flagMutateParameter; + } + sym_def->value = sym_val; + parameters.emplace_back(sym_def); + } + } else { // single parameter + SymDef* sym_def = define_parameter(0, {}); + SymValVariable* sym_val = new SymValVariable(0, params_type); + if (is_mutate_self) { + sym_val->flags |= SymValVariable::flagMutateParameter; + } + sym_def->value = sym_val; + parameters.emplace_back(sym_def); + } -SymDef* define_builtin_func(const std::string& name, TypeExpr* func_type, const AsmOp& macro, bool impure = false) { - return define_builtin_func_impl(name, new SymValAsmFunc{func_type, make_simple_compile(macro), !impure}); + return parameters; } -SymDef* define_builtin_func(const std::string& name, TypeExpr* func_type, const simple_compile_func_t& func, std::initializer_list arg_order, - std::initializer_list ret_order = {}, bool impure = false) { - return define_builtin_func_impl(name, new SymValAsmFunc{func_type, func, arg_order, ret_order, !impure}); +static SymDef* define_builtin_func(const std::string& name, TypeExpr* func_type, const simple_compile_func_t& func, int flags) { + return define_builtin_func_impl(name, new SymValAsmFunc(define_builtin_parameters(func_type, flags), func_type, func, flags | SymValFunc::flagBuiltinFunction)); } -SymDef* define_builtin_func(const std::string& name, TypeExpr* func_type, const compile_func_t& func, std::initializer_list arg_order, - std::initializer_list ret_order = {}, bool impure = false) { - return define_builtin_func_impl(name, new SymValAsmFunc{func_type, func, arg_order, ret_order, !impure}); +static SymDef* define_builtin_func(const std::string& name, TypeExpr* func_type, const AsmOp& macro, int flags) { + return define_builtin_func_impl(name, new SymValAsmFunc(define_builtin_parameters(func_type, flags), func_type, make_simple_compile(macro), flags | SymValFunc::flagBuiltinFunction)); } -SymDef* define_builtin_func(const std::string& name, TypeExpr* func_type, const AsmOp& macro, - std::initializer_list arg_order, std::initializer_list ret_order = {}, - bool impure = false) { - return define_builtin_func_impl(name, new SymValAsmFunc{func_type, make_simple_compile(macro), arg_order, ret_order, !impure}); +static SymDef* define_builtin_func(const std::string& name, TypeExpr* func_type, const simple_compile_func_t& func, int flags, + std::initializer_list arg_order, std::initializer_list ret_order) { + return define_builtin_func_impl(name, new SymValAsmFunc(define_builtin_parameters(func_type, flags), func_type, func, flags | SymValFunc::flagBuiltinFunction, arg_order, ret_order)); } bool SymValAsmFunc::compile(AsmOpList& dest, std::vector& out, std::vector& in, @@ -963,7 +979,7 @@ AsmOp compile_throw(std::vector& res, std::vector& args, Src } } -AsmOp compile_throw_if_unless(std::vector& res, std::vector& args) { +AsmOp compile_throw_if_unless(std::vector& res, std::vector& args, SrcLocation) { tolk_assert(res.empty() && args.size() == 3); VarDescr &x = args[0], &y = args[1], &z = args[2]; if (!z.always_true() && !z.always_false()) { @@ -1007,10 +1023,10 @@ AsmOp compile_bool_const(std::vector& res, std::vector& args return AsmOp::Const(val ? "TRUE" : "FALSE"); } -// (slice, int) load_int(slice s, int len) asm(s len -> 1 0) "LDIX"; -// (slice, int) load_uint(slice s, int len) asm( -> 1 0) "LDUX"; -// int preload_int(slice s, int len) asm "PLDIX"; -// int preload_uint(slice s, int len) asm "PLDUX"; +// fun loadInt (mutate s: slice, len: int): int asm(s len -> 1 0) "LDIX"; +// fun loadUint (mutate s: slice, len: int): int asm( -> 1 0) "LDUX"; +// fun preloadInt (s: slice, len: int): int asm "PLDIX"; +// fun preloadUint(s: slice, len: int): int asm "PLDUX"; AsmOp compile_fetch_int(std::vector& res, std::vector& args, bool fetch, bool sgnd) { tolk_assert(args.size() == 2 && res.size() == 1 + (unsigned)fetch); auto &y = args[1], &r = res.back(); @@ -1032,8 +1048,8 @@ AsmOp compile_fetch_int(std::vector& res, std::vector& args, return exec_op((fetch ? "LD"s : "PLD"s) + (sgnd ? "IX" : "UX"), 2, 1 + (unsigned)fetch); } -// builder store_uint(builder b, int x, int len) asm(x b len) "STUX"; -// builder store_int(builder b, int x, int len) asm(x b len) "STIX"; +// fun storeInt (mutate self: builder, x: int, len: int): self asm(x b len) "STIX"; +// fun storeUint (mutate self: builder, x: int, len: int): self asm(x b len) "STUX"; AsmOp compile_store_int(std::vector& res, std::vector& args, bool sgnd) { tolk_assert(args.size() == 3 && res.size() == 1); auto& z = args[2]; @@ -1044,6 +1060,8 @@ AsmOp compile_store_int(std::vector& res, std::vector& args, return exec_op("ST"s + (sgnd ? "IX" : "UX"), 3, 1); } +// fun loadBits (mutate self: slice, len: int): self asm(s len -> 1 0) "LDSLICEX" +// fun preloadBits(self: slice, len: int): slice asm(s len -> 1 0) "PLDSLICEX" AsmOp compile_fetch_slice(std::vector& res, std::vector& args, bool fetch) { tolk_assert(args.size() == 2 && res.size() == 1 + (unsigned)fetch); auto& y = args[1]; @@ -1058,7 +1076,7 @@ AsmOp compile_fetch_slice(std::vector& res, std::vector& arg return exec_op(fetch ? "LDSLICEX" : "PLDSLICEX", 2, 1 + (unsigned)fetch); } -// _at(tuple t, int index) asm "INDEXVAR"; +// fun at(t: tuple, index: int): X asm "INDEXVAR"; AsmOp compile_tuple_at(std::vector& res, std::vector& args, SrcLocation) { tolk_assert(args.size() == 2 && res.size() == 1); auto& y = args[1]; @@ -1079,102 +1097,146 @@ AsmOp compile_is_null(std::vector& res, std::vector& args, S void define_builtins() { using namespace std::placeholders; - auto Unit = TypeExpr::new_unit(); - auto Int = TypeExpr::new_atomic(TypeExpr::_Int); - auto Cell = TypeExpr::new_atomic(TypeExpr::_Cell); - auto Slice = TypeExpr::new_atomic(TypeExpr::_Slice); - auto Builder = TypeExpr::new_atomic(TypeExpr::_Builder); - // auto Null = TypeExpr::new_atomic(TypeExpr::_Null); - auto Tuple = TypeExpr::new_atomic(TypeExpr::_Tuple); - auto Int2 = TypeExpr::new_tensor({Int, Int}); - auto Int3 = TypeExpr::new_tensor({Int, Int, Int}); - auto TupleInt = TypeExpr::new_tensor({Tuple, Int}); - auto SliceInt = TypeExpr::new_tensor({Slice, Int}); - auto X = TypeExpr::new_var(0); - auto Y = TypeExpr::new_var(1); - auto Z = TypeExpr::new_var(2); - auto XY = TypeExpr::new_tensor({X, Y}); - auto arith_bin_op = TypeExpr::new_map(Int2, Int); - auto arith_un_op = TypeExpr::new_map(Int, Int); - auto impure_un_op = TypeExpr::new_map(Int, Unit); - auto fetch_int_op = TypeExpr::new_map(SliceInt, SliceInt); - auto prefetch_int_op = TypeExpr::new_map(SliceInt, Int); - auto store_int_op = TypeExpr::new_map(TypeExpr::new_tensor({Builder, Int, Int}), Builder); - auto store_int_method = - TypeExpr::new_map(TypeExpr::new_tensor({Builder, Int, Int}), TypeExpr::new_tensor({Builder, Unit})); - auto fetch_slice_op = TypeExpr::new_map(SliceInt, TypeExpr::new_tensor({Slice, Slice})); - auto prefetch_slice_op = TypeExpr::new_map(SliceInt, Slice); - //auto arith_null_op = TypeExpr::new_map(TypeExpr::new_unit(), Int); - auto throw_arg_op = TypeExpr::new_forall({X}, TypeExpr::new_map(TypeExpr::new_tensor({X, Int}), Unit)); - - // prevent unused vars warnings (there vars are created to acquire initial id of TypeExpr::value) - static_cast(Z); - static_cast(XY); - static_cast(Cell); - - define_builtin_func("_+_", arith_bin_op, compile_add); - define_builtin_func("_-_", arith_bin_op, compile_sub); - define_builtin_func("-_", arith_un_op, compile_unary_minus); - define_builtin_func("+_", arith_un_op, compile_unary_plus); - define_builtin_func("_*_", arith_bin_op, compile_mul); - define_builtin_func("_/_", arith_bin_op, std::bind(compile_div, _1, _2, _3, -1)); - define_builtin_func("_~/_", arith_bin_op, std::bind(compile_div, _1, _2, _3, 0)); - define_builtin_func("_^/_", arith_bin_op, std::bind(compile_div, _1, _2, _3, 1)); - define_builtin_func("_%_", arith_bin_op, std::bind(compile_mod, _1, _2, _3, -1)); - define_builtin_func("_<<_", arith_bin_op, compile_lshift); - define_builtin_func("_>>_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, -1)); - define_builtin_func("_~>>_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, 0)); - define_builtin_func("_^>>_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, 1)); - define_builtin_func("!_", arith_un_op, compile_logical_not); - define_builtin_func("~_", arith_un_op, compile_bitwise_not); - define_builtin_func("_&_", arith_bin_op, compile_bitwise_and); - define_builtin_func("_|_", arith_bin_op, compile_bitwise_or); - define_builtin_func("_^_", arith_bin_op, compile_bitwise_xor); - define_builtin_func("^_+=_", arith_bin_op, compile_add); - define_builtin_func("^_-=_", arith_bin_op, compile_sub); - define_builtin_func("^_*=_", arith_bin_op, compile_mul); - define_builtin_func("^_/=_", arith_bin_op, std::bind(compile_div, _1, _2, _3, -1)); - define_builtin_func("^_%=_", arith_bin_op, std::bind(compile_mod, _1, _2, _3, -1)); - define_builtin_func("^_<<=_", arith_bin_op, compile_lshift); - define_builtin_func("^_>>=_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, -1)); - define_builtin_func("^_&=_", arith_bin_op, compile_bitwise_and); - define_builtin_func("^_|=_", arith_bin_op, compile_bitwise_or); - define_builtin_func("^_^=_", arith_bin_op, compile_bitwise_xor); - define_builtin_func("mulDivFloor", TypeExpr::new_map(Int3, Int), std::bind(compile_muldiv, _1, _2, _3, -1)); - define_builtin_func("mulDivRound", TypeExpr::new_map(Int3, Int), std::bind(compile_muldiv, _1, _2, _3, 0)); - define_builtin_func("mulDivCeil", TypeExpr::new_map(Int3, Int), std::bind(compile_muldiv, _1, _2, _3, 1)); - define_builtin_func("mulDivMod", TypeExpr::new_map(Int3, Int2), AsmOp::Custom("MULDIVMOD", 3, 2)); - define_builtin_func("_==_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 2)); - define_builtin_func("_!=_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 5)); - define_builtin_func("_<_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 4)); - define_builtin_func("_>_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 1)); - define_builtin_func("_<=_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 6)); - define_builtin_func("_>=_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 3)); - define_builtin_func("_<=>_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 7)); - define_builtin_func("__true", TypeExpr::new_map(TypeExpr::new_unit(), Int), /* AsmOp::Const("TRUE") */ std::bind(compile_bool_const, _1, _2, true)); - define_builtin_func("__false", TypeExpr::new_map(TypeExpr::new_unit(), Int), /* AsmOp::Const("FALSE") */ std::bind(compile_bool_const, _1, _2, false)); - define_builtin_func("__null", TypeExpr::new_forall({X}, TypeExpr::new_map(TypeExpr::new_unit(), X)), AsmOp::Const("PUSHNULL")); - define_builtin_func("__isNull", TypeExpr::new_forall({X}, TypeExpr::new_map(X, Int)), compile_is_null); - define_builtin_func("__throw", impure_un_op, compile_throw, true); - define_builtin_func("__throw_arg", throw_arg_op, compile_throw_arg, true); - define_builtin_func("__throw_if_unless", TypeExpr::new_map(Int3, Unit), std::bind(compile_throw_if_unless, _1, _2), true); - define_builtin_func("loadInt", fetch_int_op, std::bind(compile_fetch_int, _1, _2, true, true), {}, {1, 0}); - define_builtin_func("loadUint", fetch_int_op, std::bind(compile_fetch_int, _1, _2, true, false), {}, {1, 0}); - define_builtin_func("loadBits", fetch_slice_op, std::bind(compile_fetch_slice, _1, _2, true), {}, {1, 0}); - define_builtin_func("preloadInt", prefetch_int_op, std::bind(compile_fetch_int, _1, _2, false, true)); - define_builtin_func("preloadUint", prefetch_int_op, std::bind(compile_fetch_int, _1, _2, false, false)); - define_builtin_func("preloadBits", prefetch_slice_op, std::bind(compile_fetch_slice, _1, _2, false)); - define_builtin_func("storeInt", store_int_op, std::bind(compile_store_int, _1, _2, true), {1, 0, 2}); - define_builtin_func("storeUint", store_int_op, std::bind(compile_store_int, _1, _2, false), {1, 0, 2}); - define_builtin_func("~storeInt", store_int_method, std::bind(compile_store_int, _1, _2, true), {1, 0, 2}); - define_builtin_func("~storeUint", store_int_method, std::bind(compile_store_int, _1, _2, false), {1, 0, 2}); - define_builtin_func("tupleAt", TypeExpr::new_forall({X}, TypeExpr::new_map(TupleInt, X)), compile_tuple_at); + + TypeExpr* Unit = TypeExpr::new_unit(); + TypeExpr* Int = TypeExpr::new_atomic(TypeExpr::_Int); + TypeExpr* Slice = TypeExpr::new_atomic(TypeExpr::_Slice); + TypeExpr* Builder = TypeExpr::new_atomic(TypeExpr::_Builder); + TypeExpr* Tuple = TypeExpr::new_atomic(TypeExpr::_Tuple); + TypeExpr* Int2 = TypeExpr::new_tensor({Int, Int}); + TypeExpr* Int3 = TypeExpr::new_tensor({Int, Int, Int}); + TypeExpr* TupleInt = TypeExpr::new_tensor({Tuple, Int}); + TypeExpr* SliceInt = TypeExpr::new_tensor({Slice, Int}); + TypeExpr* X = TypeExpr::new_var(0); + TypeExpr* arith_bin_op = TypeExpr::new_map(Int2, Int); + TypeExpr* arith_un_op = TypeExpr::new_map(Int, Int); + TypeExpr* impure_un_op = TypeExpr::new_map(Int, Unit); + TypeExpr* fetch_int_op_mutate = TypeExpr::new_map(SliceInt, SliceInt); + TypeExpr* prefetch_int_op = TypeExpr::new_map(SliceInt, Int); + TypeExpr* store_int_mutate = TypeExpr::new_map(TypeExpr::new_tensor({Builder, Int, Int}), TypeExpr::new_tensor({Builder, Unit})); + TypeExpr* fetch_slice_op_mutate = TypeExpr::new_map(SliceInt, TypeExpr::new_tensor({Slice, Slice})); + TypeExpr* prefetch_slice_op = TypeExpr::new_map(SliceInt, Slice); + TypeExpr* throw_arg_op = TypeExpr::new_forall({X}, TypeExpr::new_map(TypeExpr::new_tensor({X, Int}), Unit)); + + define_builtin_func("_+_", arith_bin_op, compile_add, + SymValFunc::flagMarkedAsPure); + define_builtin_func("_-_", arith_bin_op, compile_sub, + SymValFunc::flagMarkedAsPure); + define_builtin_func("-_", arith_un_op, compile_unary_minus, + SymValFunc::flagMarkedAsPure); + define_builtin_func("+_", arith_un_op, compile_unary_plus, + SymValFunc::flagMarkedAsPure); + define_builtin_func("_*_", arith_bin_op, compile_mul, + SymValFunc::flagMarkedAsPure); + define_builtin_func("_/_", arith_bin_op, std::bind(compile_div, _1, _2, _3, -1), + SymValFunc::flagMarkedAsPure); + define_builtin_func("_~/_", arith_bin_op, std::bind(compile_div, _1, _2, _3, 0), + SymValFunc::flagMarkedAsPure); + define_builtin_func("_^/_", arith_bin_op, std::bind(compile_div, _1, _2, _3, 1), + SymValFunc::flagMarkedAsPure); + define_builtin_func("_%_", arith_bin_op, std::bind(compile_mod, _1, _2, _3, -1), + SymValFunc::flagMarkedAsPure); + define_builtin_func("_<<_", arith_bin_op, compile_lshift, + SymValFunc::flagMarkedAsPure); + define_builtin_func("_>>_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, -1), + SymValFunc::flagMarkedAsPure); + define_builtin_func("_~>>_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, 0), + SymValFunc::flagMarkedAsPure); + define_builtin_func("_^>>_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, 1), + SymValFunc::flagMarkedAsPure); + define_builtin_func("!_", arith_un_op, compile_logical_not, + SymValFunc::flagMarkedAsPure); + define_builtin_func("~_", arith_un_op, compile_bitwise_not, + SymValFunc::flagMarkedAsPure); + define_builtin_func("_&_", arith_bin_op, compile_bitwise_and, + SymValFunc::flagMarkedAsPure); + define_builtin_func("_|_", arith_bin_op, compile_bitwise_or, + SymValFunc::flagMarkedAsPure); + define_builtin_func("_^_", arith_bin_op, compile_bitwise_xor, + SymValFunc::flagMarkedAsPure); + define_builtin_func("^_+=_", arith_bin_op, compile_add, + SymValFunc::flagMarkedAsPure); + define_builtin_func("^_-=_", arith_bin_op, compile_sub, + SymValFunc::flagMarkedAsPure); + define_builtin_func("^_*=_", arith_bin_op, compile_mul, + SymValFunc::flagMarkedAsPure); + define_builtin_func("^_/=_", arith_bin_op, std::bind(compile_div, _1, _2, _3, -1), + SymValFunc::flagMarkedAsPure); + define_builtin_func("^_%=_", arith_bin_op, std::bind(compile_mod, _1, _2, _3, -1), + SymValFunc::flagMarkedAsPure); + define_builtin_func("^_<<=_", arith_bin_op, compile_lshift, + SymValFunc::flagMarkedAsPure); + define_builtin_func("^_>>=_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, -1), + SymValFunc::flagMarkedAsPure); + define_builtin_func("^_&=_", arith_bin_op, compile_bitwise_and, + SymValFunc::flagMarkedAsPure); + define_builtin_func("^_|=_", arith_bin_op, compile_bitwise_or, + SymValFunc::flagMarkedAsPure); + define_builtin_func("^_^=_", arith_bin_op, compile_bitwise_xor, + SymValFunc::flagMarkedAsPure); + define_builtin_func("_==_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 2), + SymValFunc::flagMarkedAsPure); + define_builtin_func("_!=_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 5), + SymValFunc::flagMarkedAsPure); + define_builtin_func("_<_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 4), + SymValFunc::flagMarkedAsPure); + define_builtin_func("_>_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 1), + SymValFunc::flagMarkedAsPure); + define_builtin_func("_<=_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 6), + SymValFunc::flagMarkedAsPure); + define_builtin_func("_>=_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 3), + SymValFunc::flagMarkedAsPure); + define_builtin_func("_<=>_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 7), + SymValFunc::flagMarkedAsPure); + define_builtin_func("mulDivFloor", TypeExpr::new_map(Int3, Int), std::bind(compile_muldiv, _1, _2, _3, -1), + SymValFunc::flagMarkedAsPure); + define_builtin_func("mulDivRound", TypeExpr::new_map(Int3, Int), std::bind(compile_muldiv, _1, _2, _3, 0), + SymValFunc::flagMarkedAsPure); + define_builtin_func("mulDivCeil", TypeExpr::new_map(Int3, Int), std::bind(compile_muldiv, _1, _2, _3, 1), + SymValFunc::flagMarkedAsPure); + define_builtin_func("mulDivMod", TypeExpr::new_map(Int3, Int2), AsmOp::Custom("MULDIVMOD", 3, 2), + SymValFunc::flagMarkedAsPure); + define_builtin_func("__true", TypeExpr::new_map(TypeExpr::new_unit(), Int), /* AsmOp::Const("TRUE") */ std::bind(compile_bool_const, _1, _2, true), + SymValFunc::flagMarkedAsPure); + define_builtin_func("__false", TypeExpr::new_map(TypeExpr::new_unit(), Int), /* AsmOp::Const("FALSE") */ std::bind(compile_bool_const, _1, _2, false), + SymValFunc::flagMarkedAsPure); + define_builtin_func("__null", TypeExpr::new_forall({X}, TypeExpr::new_map(TypeExpr::new_unit(), X)), AsmOp::Const("PUSHNULL"), + SymValFunc::flagMarkedAsPure); + define_builtin_func("__isNull", TypeExpr::new_forall({X}, TypeExpr::new_map(X, Int)), compile_is_null, + SymValFunc::flagMarkedAsPure); + define_builtin_func("__throw", impure_un_op, compile_throw, + 0); + define_builtin_func("__throw_arg", throw_arg_op, compile_throw_arg, + 0); + define_builtin_func("__throw_if_unless", TypeExpr::new_map(Int3, Unit), compile_throw_if_unless, + 0); + define_builtin_func("loadInt", fetch_int_op_mutate, std::bind(compile_fetch_int, _1, _2, true, true), + SymValFunc::flagMarkedAsPure | SymValFunc::flagHasMutateParams | SymValFunc::flagAcceptsSelf, {}, {1, 0}); + define_builtin_func("loadUint", fetch_int_op_mutate, std::bind(compile_fetch_int, _1, _2, true, false), + SymValFunc::flagMarkedAsPure | SymValFunc::flagHasMutateParams | SymValFunc::flagAcceptsSelf, {}, {1, 0}); + define_builtin_func("loadBits", fetch_slice_op_mutate, std::bind(compile_fetch_slice, _1, _2, true), + SymValFunc::flagMarkedAsPure | SymValFunc::flagHasMutateParams | SymValFunc::flagAcceptsSelf, {}, {1, 0}); + define_builtin_func("preloadInt", prefetch_int_op, std::bind(compile_fetch_int, _1, _2, false, true), + SymValFunc::flagMarkedAsPure | SymValFunc::flagAcceptsSelf); + define_builtin_func("preloadUint", prefetch_int_op, std::bind(compile_fetch_int, _1, _2, false, false), + SymValFunc::flagMarkedAsPure | SymValFunc::flagAcceptsSelf); + define_builtin_func("preloadBits", prefetch_slice_op, std::bind(compile_fetch_slice, _1, _2, false), + SymValFunc::flagMarkedAsPure | SymValFunc::flagAcceptsSelf); + define_builtin_func("storeInt", store_int_mutate, std::bind(compile_store_int, _1, _2, true), + SymValFunc::flagMarkedAsPure | SymValFunc::flagHasMutateParams | SymValFunc::flagAcceptsSelf | SymValFunc::flagReturnsSelf, {1, 0, 2}, {}); + define_builtin_func("storeUint", store_int_mutate, std::bind(compile_store_int, _1, _2, false), + SymValFunc::flagMarkedAsPure | SymValFunc::flagHasMutateParams | SymValFunc::flagAcceptsSelf | SymValFunc::flagReturnsSelf, {1, 0, 2}, {}); + define_builtin_func("tupleAt", TypeExpr::new_forall({X}, TypeExpr::new_map(TupleInt, X)), compile_tuple_at, + SymValFunc::flagMarkedAsPure | SymValFunc::flagAcceptsSelf); define_builtin_func("debugPrint", TypeExpr::new_forall({X}, TypeExpr::new_map(X, Unit)), - AsmOp::Custom("s0 DUMP DROP", 1, 1), true); + AsmOp::Custom("s0 DUMP DROP", 1, 1), + 0); define_builtin_func("debugPrintString", TypeExpr::new_forall({X}, TypeExpr::new_map(X, Unit)), - AsmOp::Custom("STRDUMP DROP", 1, 1), true); + AsmOp::Custom("STRDUMP DROP", 1, 1), + 0); define_builtin_func("debugDumpStack", TypeExpr::new_map(Unit, Unit), - AsmOp::Custom("DUMPSTK", 0, 0), true); + AsmOp::Custom("DUMPSTK", 0, 0), + 0); } } // namespace tolk diff --git a/tolk/gen-abscode.cpp b/tolk/gen-abscode.cpp index 97fb5d3f2..fb085ae9c 100644 --- a/tolk/gen-abscode.cpp +++ b/tolk/gen-abscode.cpp @@ -41,21 +41,22 @@ Expr::Expr(ExprCls c, sym_idx_t name_idx, std::initializer_list _arglist) } } -bool Expr::deduce_type() { +void Expr::deduce_type() { if (e_type) { - return true; + return; } switch (cls) { case _Apply: { if (!sym) { - return false; + return; } - SymVal* sym_val = dynamic_cast(sym->value); + SymValFunc* sym_val = dynamic_cast(sym->value); if (!sym_val || !sym_val->get_type()) { - return false; + return; } std::vector arg_types; - for (const auto& arg : args) { + arg_types.reserve(args.size()); + for (const Expr* arg : args) { arg_types.push_back(arg->e_type); } TypeExpr* fun_type = TypeExpr::new_map(TypeExpr::new_tensor(arg_types), TypeExpr::new_hole()); @@ -69,7 +70,7 @@ bool Expr::deduce_type() { } e_type = fun_type->args[1]; TypeExpr::remove_indirect(e_type); - return true; + return; } case _VarApply: { tolk_assert(args.size() == 2); @@ -84,7 +85,27 @@ bool Expr::deduce_type() { } e_type = fun_type->args[1]; TypeExpr::remove_indirect(e_type); - return true; + return; + } + case _GrabMutatedVars: { + tolk_assert(args.size() == 2 && args[0]->cls == _Apply && sym); + SymValFunc* called_f = dynamic_cast(sym->value); + tolk_assert(called_f->has_mutate_params()); + TypeExpr* sym_type = called_f->get_type(); + if (sym_type->constr == TypeExpr::te_ForAll) { + TypeExpr::remove_forall(sym_type); + } + tolk_assert(sym_type->args[1]->constr == TypeExpr::te_Tensor); + e_type = sym_type->args[1]->args[sym_type->args[1]->args.size() - 1]; + TypeExpr::remove_indirect(e_type); + return; + } + case _ReturnSelf: { + tolk_assert(args.size() == 2 && sym); + Expr* this_arg = args[1]; + e_type = this_arg->e_type; + TypeExpr::remove_indirect(e_type); + return; } case _Letop: { tolk_assert(args.size() == 2); @@ -99,25 +120,7 @@ bool Expr::deduce_type() { } e_type = args[0]->e_type; TypeExpr::remove_indirect(e_type); - return true; - } - case _LetFirst: { - tolk_assert(args.size() == 2); - TypeExpr* rhs_type = TypeExpr::new_tensor({args[0]->e_type, TypeExpr::new_hole()}); - try { - // std::cerr << "in implicit assignment of a modifying method: " << rhs_type << " and " << args[1]->e_type << std::endl; - unify(rhs_type, args[1]->e_type); - } catch (UnifyError& ue) { - std::ostringstream os; - os << "cannot implicitly assign an expression of type " << args[1]->e_type - << " to a variable or pattern of type " << rhs_type << " in modifying method `" << G.symbols.get_name(val) - << "` : " << ue; - throw ParseError(here, os.str()); - } - e_type = rhs_type->args[1]; - TypeExpr::remove_indirect(e_type); - // std::cerr << "result type is " << e_type << std::endl; - return true; + return; } case _CondExpr: { tolk_assert(args.size() == 3); @@ -139,46 +142,46 @@ bool Expr::deduce_type() { } e_type = args[1]->e_type; TypeExpr::remove_indirect(e_type); - return true; + return; } + default: + throw Fatal("unexpected cls=" + std::to_string(cls) + " in Expr::deduce_type()"); } - return false; } -int Expr::define_new_vars(CodeBlob& code) { +void Expr::define_new_vars(CodeBlob& code) { switch (cls) { case _Tensor: case _MkTuple: { - int res = 0; - for (const auto& x : args) { - res += x->define_new_vars(code); + for (Expr* item : args) { + item->define_new_vars(code); } - return res; + break; } case _Var: if (val < 0) { - val = code.create_var(false, e_type, sym, here); - return 1; + val = code.create_var(e_type, sym->sym_idx, here); + sym->value->idx = val; } break; case _Hole: if (val < 0) { - val = code.create_var(true, e_type, nullptr, here); + val = code.create_tmp_var(e_type, here); } break; + default: + break; } - return 0; } -int Expr::predefine_vars() { +void Expr::predefine_vars() { switch (cls) { case _Tensor: case _MkTuple: { - int res = 0; - for (const auto& x : args) { - res += x->predefine_vars(); + for (Expr* item : args) { + item->predefine_vars(); } - return res; + break; } case _Var: if (!sym) { @@ -188,12 +191,15 @@ int Expr::predefine_vars() { if (!sym) { throw ParseError{here, std::string{"redefined variable `"} + G.symbols.get_name(~val) + "`"}; } - sym->value = new SymVal{SymValKind::_Var, -1, e_type}; - return 1; + sym->value = new SymValVariable(-1, e_type); + if (is_immutable()) { + dynamic_cast(sym->value)->flags |= SymValVariable::flagImmutable; + } } break; + default: + break; } - return 0; } var_idx_t Expr::new_tmp(CodeBlob& code) const { @@ -217,7 +223,7 @@ std::vector pre_compile_let(CodeBlob& code, Expr* lhs, Expr* rhs, Src auto unpacked_type = rhs->e_type->args.at(0); std::vector tmp{code.create_tmp_var(unpacked_type, rhs->here)}; code.emplace_back(lhs->here, Op::_UnTuple, tmp, std::move(right)); - auto tvar = new Expr{Expr::_Var}; + auto tvar = new Expr{Expr::_Var, lhs->here}; tvar->set_val(tmp[0]); tvar->set_location(rhs->here); tvar->e_type = unpacked_type; @@ -255,7 +261,7 @@ std::vector pre_compile_tensor(const std::vector& args, CodeB res_lists[i] = args[i]->pre_compile(code, lval_globs); for (size_t j = 0; j < res_lists[i].size(); ++j) { TmpVar& var = code.vars.at(res_lists[i][j]); - if (!lval_globs && !var.is_tmp_unnamed) { + if (!lval_globs && !var.is_unnamed()) { var.on_modification.push_back([&modified_vars, i, j, cur_ops = code.cur_ops, done = false](SrcLocation here) mutable { if (!done) { done = true; @@ -303,39 +309,39 @@ std::vector Expr::pre_compile(CodeBlob& code, std::vector res; - SymDef* applied_sym = sym; - auto func = dynamic_cast(applied_sym->value); - // replace `beginCell()` with `begin_cell()` - // todo it should be done at AST level, see comment above detect_if_function_just_wraps_another() - if (func && func->is_just_wrapper_for_another_f()) { - // todo currently, f is inlined only if anotherF is declared (and processed) before - if (!dynamic_cast(func)->code) { // if anotherF is processed after - func->flags |= SymValFunc::flagUsedAsNonCall; - res = pre_compile_tensor(args, code, lval_globs); - } else { - // body is { Op::_Import; Op::_Call; Op::_Return; } - const std::unique_ptr& op_call = dynamic_cast(func)->code->ops->next; - applied_sym = op_call->fun_ref; - // a function may call anotherF with shuffled arguments: f(x,y) { return anotherF(y,x) } - // then op_call looks like (_1,_0), so use op_call->right for correct positions in Op::_Call below - // it's correct, since every argument has width 1 - std::vector res_inner = pre_compile_tensor(args, code, lval_globs); - res.reserve(res_inner.size()); - for (var_idx_t right_idx : op_call->right) { - res.emplace_back(res_inner[right_idx]); - } - } - } else { - res = pre_compile_tensor(args, code, lval_globs); - } + std::vector res = pre_compile_tensor(args, code, lval_globs);; auto rvect = new_tmp_vect(code); - auto& op = code.emplace_back(here, Op::_Call, rvect, res, applied_sym); + auto& op = code.emplace_back(here, Op::_Call, rvect, res, sym); if (flags & _IsImpure) { op.set_impure(code); } return rvect; } + case _GrabMutatedVars: { + SymValFunc* func_val = dynamic_cast(sym->value); + tolk_assert(func_val && func_val->has_mutate_params()); + tolk_assert(args.size() == 2 && args[0]->cls == _Apply && args[1]->cls == _Tensor); + auto right = args[0]->pre_compile(code); // apply (returning function result and mutated) + std::vector> local_globs; + if (!lval_globs) { + lval_globs = &local_globs; + } + auto left = args[1]->pre_compile(code, lval_globs); // mutated (lvalue) + auto rvect = new_tmp_vect(code); + left.push_back(rvect[0]); + for (var_idx_t v : left) { + code.on_var_modification(v, here); + } + code.emplace_back(here, Op::_Let, std::move(left), std::move(right)); + add_set_globs(code, local_globs, here); + return rvect; + } + case _ReturnSelf: { + tolk_assert(args.size() == 2 && sym); + Expr* this_arg = args[1]; + auto right = args[0]->pre_compile(code); + return this_arg->pre_compile(code); + } case _Var: case _Hole: if (val < 0) { @@ -372,7 +378,10 @@ std::vector Expr::pre_compile(CodeBlob& code, std::vector(sym->value)) { fun_ref->flags |= SymValFunc::flagUsedAsNonCall; if (!fun_ref->arg_order.empty() || !fun_ref->ret_order.empty()) { - throw ParseError(here, "Saving " + sym->name() + " into a variable will most likely lead to invalid usage, since it changes the order of variables on the stack"); + throw ParseError(here, "saving `" + sym->name() + "` into a variable will most likely lead to invalid usage, since it changes the order of variables on the stack"); + } + if (fun_ref->has_mutate_params()) { + throw ParseError(here, "saving `" + sym->name() + "` into a variable is impossible, since it has `mutate` parameters and thus can only be called directly"); } } auto rvect = new_tmp_vect(code); @@ -387,22 +396,6 @@ std::vector Expr::pre_compile(CodeBlob& code, std::vectorpre_compile(code); - std::vector> local_globs; - if (!lval_globs) { - lval_globs = &local_globs; - } - auto left = args[0]->pre_compile(code, lval_globs); - left.push_back(rvect[0]); - for (var_idx_t v : left) { - code.on_var_modification(v, here); - } - code.emplace_back(here, Op::_Let, std::move(left), std::move(right)); - add_set_globs(code, local_globs, here); - return rvect; - } case _MkTuple: { auto left = new_tmp_vect(code); auto right = args[0]->pre_compile(code); diff --git a/tolk/lexer.cpp b/tolk/lexer.cpp index 0a2dd79c2..17eb4544c 100644 --- a/tolk/lexer.cpp +++ b/tolk/lexer.cpp @@ -323,8 +323,7 @@ struct ChunkIdentifierOrKeyword final : ChunkLexerBase { static TokenType maybe_keyword(std::string_view str) { switch (str.size()) { case 1: - if (str == "~") return tok_bitwise_not; // todo attention - if (str == "_") return tok_underscore; // todo attention + if (str == "_") return tok_underscore; break; case 2: if (str == "do") return tok_do; @@ -347,6 +346,7 @@ struct ChunkIdentifierOrKeyword final : ChunkLexerBase { if (str == "void") return tok_void; if (str == "bool") return tok_bool; if (str == "auto") return tok_auto; + if (str == "self") return tok_self; if (str == "tolk") return tok_tolk; if (str == "type") return tok_type; if (str == "enum") return tok_enum; @@ -368,6 +368,7 @@ struct ChunkIdentifierOrKeyword final : ChunkLexerBase { if (str == "assert") return tok_assert; if (str == "import") return tok_import; if (str == "global") return tok_global; + if (str == "mutate") return tok_mutate; if (str == "repeat") return tok_repeat; if (str == "struct") return tok_struct; if (str == "export") return tok_export; @@ -394,8 +395,7 @@ struct ChunkIdentifierOrKeyword final : ChunkLexerBase { lex->skip_chars(1); while (!lex->is_eof()) { char c = lex->char_at(); - // the pattern of valid identifier first symbol is provided in trie, here we test for identifier middle - bool allowed_in_identifier = std::isalnum(c) || c == '_' || c == '$' || c == '?' || c == '!' || c == '\''; + bool allowed_in_identifier = std::isalnum(c) || c == '_' || c == '$'; if (!allowed_in_identifier) { break; } @@ -438,28 +438,6 @@ struct ChunkIdentifierInBackticks final : ChunkLexerBase { } }; -// Handle ~`some_method` and .`some_method` todo to be removed later -struct ChunkDotTildeAndBackticks final : ChunkLexerBase { - bool parse(Lexer* lex) const override { - const char* str_begin = lex->c_str(); - lex->skip_chars(2); - while (!lex->is_eof() && lex->char_at() != '`' && lex->char_at() != '\n') { - lex->skip_chars(1); - } - if (lex->char_at() != '`') { - lex->error("unclosed backtick `"); - } - - std::string_view in_backticks(str_begin + 2, lex->c_str() - str_begin - 2); - std::string full = std::string(1, *str_begin) + static_cast(in_backticks); - std::string* allocated = new std::string(full); - lex->skip_chars(1); - std::string_view str_val(allocated->c_str(), allocated->size()); - lex->add_token(tok_identifier, str_val); - return true; - } -}; - // // ---------------------------------------------------------------------- // Here we define a grammar of Tolk. @@ -500,11 +478,8 @@ struct TolkLanguageGrammar { trie.add_prefix("\n", singleton()); trie.add_pattern("[0-9]", singleton()); - // todo think of . ~ - trie.add_pattern("[a-zA-Z_$.~]", singleton()); + trie.add_pattern("[a-zA-Z_$]", singleton()); trie.add_prefix("`", singleton()); - // todo to be removed after ~ becomes invalid and . becomes a separate token - trie.add_pattern("[.~]`", singleton()); register_token("+", 1, tok_plus); register_token("-", 1, tok_minus); @@ -528,6 +503,8 @@ struct TolkLanguageGrammar { register_token("&", 1, tok_bitwise_and); register_token("|", 1, tok_bitwise_or); register_token("^", 1, tok_bitwise_xor); + register_token("~", 1, tok_bitwise_not); + register_token(".", 1, tok_dot); register_token("==", 2, tok_eq); register_token("!=", 2, tok_neq); register_token("<=", 2, tok_leq); diff --git a/tolk/lexer.h b/tolk/lexer.h index 8e04018cd..8a25f9526 100644 --- a/tolk/lexer.h +++ b/tolk/lexer.h @@ -38,6 +38,8 @@ enum TokenType { tok_var, tok_val, tok_redef, + tok_mutate, + tok_self, tok_annotation_at, tok_colon, @@ -52,6 +54,7 @@ enum TokenType { tok_null, tok_identifier, + tok_dot, tok_plus, tok_minus, diff --git a/tolk/pipe-ast-to-legacy.cpp b/tolk/pipe-ast-to-legacy.cpp index 3e713e726..c735698cd 100644 --- a/tolk/pipe-ast-to-legacy.cpp +++ b/tolk/pipe-ast-to-legacy.cpp @@ -37,7 +37,87 @@ static int calc_sym_idx(std::string_view sym_name) { return G.symbols.lookup(sym_name); } +void Expr::fire_error_rvalue_expected() const { + // generally, almost all vertices are rvalue, that's why code leading to "not rvalue" + // should be very strange, like `var x = _` + throw ParseError(here, "rvalue expected"); +} + +void Expr::fire_error_lvalue_expected(const std::string& details) const { + // "lvalue expected" is when a user modifies something unmodifiable + // example: `f() = 32` + // example: `loadUint(c.beginParse(), 32)` (since `loadUint()` mutates the first argument) + throw ParseError(here, "lvalue expected (" + details + ")"); +} + +void Expr::fire_error_modifying_immutable(const std::string& details) const { + // "modifying immutable variable" is when a user assigns to a variable declared `val` + // example: `immutable_val = 32` + // example: `(regular_var, immutable_val) = f()` + // for better error message, try to print out variable name if possible + std::string variable_name; + if (cls == _Var || cls == _Const) { + variable_name = sym->name(); + } else if (cls == _Tensor || cls == _MkTuple) { + for (const Expr* arg : (cls == _Tensor ? args : args[0]->args)) { + if (arg->is_immutable() && (arg->cls == _Var || arg->cls == _Const)) { + variable_name = arg->sym->name(); + break; + } + } + } + + if (variable_name == "self") { + throw ParseError(here, "modifying `self` (" + details + "), which is immutable by default; probably, you want to declare `mutate self`"); + } else if (!variable_name.empty()) { + throw ParseError(here, "modifying an immutable variable `" + variable_name + "` (" + details + ")"); + } else { + throw ParseError(here, "modifying an immutable variable (" + details + ")"); + } +} + +GNU_ATTRIBUTE_COLD GNU_ATTRIBUTE_NORETURN +static void fire_error_invalid_mutate_arg_passed(SrcLocation loc, const SymDef* func_sym, const SymDef* param_sym, bool called_as_method, bool arg_passed_as_mutate, AnyV arg_expr) { + std::string func_name = func_sym->name(); + std::string arg_str(arg_expr->type == ast_identifier ? arg_expr->as()->name : "obj"); + const SymValFunc* func_val = dynamic_cast(func_sym->value); + const SymValVariable* param_val = dynamic_cast(param_sym->value); + + // case: `loadInt(cs, 32)`; suggest: `cs.loadInt(32)` + if (param_val->is_mutate_parameter() && !arg_passed_as_mutate && !called_as_method && param_val->idx == 0 && func_val->does_accept_self()) { + throw ParseError(loc, "`" + func_name + "` is a mutating method; consider calling `" + arg_str + "." + func_name + "()`, not `" + func_name + "(" + arg_str + ")`"); + } + // case: `cs.mutating_function()`; suggest: `mutating_function(mutate cs)` or make it a method + if (param_val->is_mutate_parameter() && called_as_method && param_val->idx == 0 && !func_val->does_accept_self()) { + throw ParseError(loc, "function `" + func_name + "` mutates parameter `" + param_sym->name() + "`; consider calling `" + func_name + "(mutate " + arg_str + ")`, not `" + arg_str + "." + func_name + "`(); alternatively, rename parameter to `self` to make it a method"); + } + // case: `mutating_function(arg)`; suggest: `mutate arg` + if (param_val->is_mutate_parameter() && !arg_passed_as_mutate) { + throw ParseError(loc, "function `" + func_name + "` mutates parameter `" + param_sym->name() + "`; you need to specify `mutate` when passing an argument, like `mutate " + arg_str + "`"); + } + // case: `usual_function(mutate arg)` + if (!param_val->is_mutate_parameter() && arg_passed_as_mutate) { + throw ParseError(loc, "incorrect `mutate`, since `" + func_name + "` does not mutate this parameter"); + } + throw Fatal("unreachable"); +} + +namespace blk_fl { +enum { end = 1, ret = 2, empty = 4 }; +typedef int val; +constexpr val init = end | empty; +void combine(val& x, const val y) { + x |= y & ret; + x &= y | ~(end | empty); +} +void combine_parallel(val& x, const val y) { + x &= y | ~(ret | empty); + x |= y & end; +} +} // namespace blk_fl + Expr* process_expr(AnyV v, CodeBlob& code); +blk_fl::val process_statement(AnyV v, CodeBlob& code); static void check_global_func(SrcLocation loc, sym_idx_t func_name) { SymDef* sym_def = lookup_symbol(func_name); @@ -46,22 +126,6 @@ static void check_global_func(SrcLocation loc, sym_idx_t func_name) { } } -static Expr* make_func_apply(Expr* fun, Expr* x) { - Expr* res{nullptr}; - if (fun->cls == Expr::_GlobFunc) { - if (x->cls == Expr::_Tensor) { - res = new Expr{Expr::_Apply, fun->sym, x->args}; - } else { - res = new Expr{Expr::_Apply, fun->sym, {x}}; - } - res->flags = Expr::_IsRvalue | (fun->flags & Expr::_IsImpure); - } else { - res = new Expr{Expr::_VarApply, {fun, x}}; - res->flags = Expr::_IsRvalue; - } - return res; -} - static void check_import_exists_when_using_sym(AnyV v_usage, const SymDef* used_sym) { if (!v_usage->loc.is_symbol_from_same_or_builtin_file(used_sym->loc)) { const SrcFile* declared_in = used_sym->loc.get_src_file(); @@ -77,7 +141,7 @@ static void check_import_exists_when_using_sym(AnyV v_usage, const SymDef* used_ } } -static Expr* create_new_local_variable(SrcLocation loc, std::string_view var_name, TypeExpr* var_type) { +static Expr* create_new_local_variable(SrcLocation loc, std::string_view var_name, TypeExpr* var_type, bool is_immutable) { SymDef* sym = lookup_symbol(calc_sym_idx(var_name)); if (sym) { // creating a new variable, but something found in symtable if (sym->level != G.scope_level) { @@ -89,7 +153,7 @@ static Expr* create_new_local_variable(SrcLocation loc, std::string_view var_nam Expr* x = new Expr{Expr::_Var, loc}; x->val = ~calc_sym_idx(var_name); x->e_type = var_type; - x->flags = Expr::_IsLvalue; + x->flags = Expr::_IsLvalue | (is_immutable ? Expr::_IsImmutable : 0); return x; } @@ -109,8 +173,13 @@ static Expr* process_expr(V v, CodeBlob& code) { t == tok_set_mod || t == tok_set_lshift || t == tok_set_rshift || t == tok_set_bitwise_and || t == tok_set_bitwise_or || t == tok_set_bitwise_xor) { Expr* x = process_expr(v->get_lhs(), code); - x->chk_lvalue(); x->chk_rvalue(); + if (!x->is_lvalue()) { + x->fire_error_lvalue_expected("left side of assignment"); + } + if (x->is_immutable()) { + x->fire_error_modifying_immutable("left side of assignment"); + } sym_idx_t name = G.symbols.lookup_add("^_" + operator_name + "_"); Expr* y = process_expr(v->get_rhs(), code); y->chk_rvalue(); @@ -126,7 +195,12 @@ static Expr* process_expr(V v, CodeBlob& code) { } if (t == tok_assign) { Expr* x = process_expr(v->get_lhs(), code); - x->chk_lvalue(); + if (!x->is_lvalue()) { + x->fire_error_lvalue_expected("left side of assignment"); + } + if (x->is_immutable()) { + x->fire_error_modifying_immutable("left side of assignment"); + } Expr* y = process_expr(v->get_rhs(), code); y->chk_rvalue(); x->predefine_vars(); @@ -191,54 +265,6 @@ static Expr* process_expr(V v, CodeBlob& code) { return res; } -static Expr* process_expr(V v, CodeBlob& code) { - Expr* res = process_expr(v->get_lhs(), code); - bool modify = v->method_name[0] == '~'; - Expr* obj = res; - if (modify) { - obj->chk_lvalue(); - } else { - obj->chk_rvalue(); - } - sym_idx_t name_idx = calc_sym_idx(v->method_name); - const SymDef* sym = lookup_symbol(name_idx); - if (!sym || !dynamic_cast(sym->value)) { - sym_idx_t name1 = G.symbols.lookup(v->method_name.substr(1)); - if (name1) { - const SymDef* sym1 = lookup_symbol(name1); - if (sym1 && dynamic_cast(sym1->value)) { - name_idx = name1; - } - } - } - check_global_func(v->loc, name_idx); - sym = lookup_symbol(name_idx); - SymValFunc* val = sym ? dynamic_cast(sym->value) : nullptr; - if (!val) { - v->error("undefined method call"); - } - Expr* x = process_expr(v->get_arg(), code); - x->chk_rvalue(); - if (x->cls == Expr::_Tensor) { - res = new Expr{Expr::_Apply, name_idx, {obj}}; - res->args.insert(res->args.end(), x->args.begin(), x->args.end()); - } else { - res = new Expr{Expr::_Apply, name_idx, {obj, x}}; - } - res->here = v->loc; - res->flags = Expr::_IsRvalue | (val->is_marked_as_pure() ? 0 : Expr::_IsImpure); - res->deduce_type(); - if (modify) { - Expr* tmp = res; - res = new Expr{Expr::_LetFirst, {obj->copy(), tmp}}; - res->here = v->loc; - res->flags = tmp->flags; - res->set_val(name_idx); - res->deduce_type(); - } - return res; -} - static Expr* process_expr(V v, CodeBlob& code) { Expr* cond = process_expr(v->get_cond(), code); cond->chk_rvalue(); @@ -253,19 +279,194 @@ static Expr* process_expr(V v, CodeBlob& code) { return res; } -static Expr* process_expr(V v, CodeBlob& code) { +static Expr* process_function_arguments(SymDef* func_sym, V v, Expr* lhs_of_dot_call, CodeBlob& code) { + SymValFunc* func_val = dynamic_cast(func_sym->value); + int delta_self = lhs_of_dot_call ? 1 : 0; + int n_arguments = static_cast(v->get_arguments().size()) + delta_self; + int n_parameters = static_cast(func_val->parameters.size()); + + // Tolk doesn't have optional parameters currently, so just compare counts + if (n_parameters < n_arguments) { + v->error("too many arguments in call to `" + func_sym->name() + "`, expected " + std::to_string(n_parameters - delta_self) + ", have " + std::to_string(n_arguments - delta_self)); + } + if (n_arguments < n_parameters) { + v->error("too few arguments in call to `" + func_sym->name() + "`, expected " + std::to_string(n_parameters - delta_self) + ", have " + std::to_string(n_arguments - delta_self)); + } + + std::vector apply_args; + apply_args.reserve(n_arguments); + if (lhs_of_dot_call) { + apply_args.push_back(lhs_of_dot_call); + } + for (int i = delta_self; i < n_arguments; ++i) { + auto v_arg = v->get_arg(i - delta_self); + if (SymDef* param_sym = func_val->parameters[i]) { // can be null (for underscore parameter) + SymValVariable* param_val = dynamic_cast(param_sym->value); + if (param_val->is_mutate_parameter() != v_arg->passed_as_mutate) { + fire_error_invalid_mutate_arg_passed(v_arg->loc, func_sym, param_sym, false, v_arg->passed_as_mutate, v_arg->get_expr()); + } + } + + Expr* arg = process_expr(v_arg->get_expr(), code); + arg->chk_rvalue(); + apply_args.push_back(arg); + } + + Expr* apply = new Expr{Expr::_Apply, func_sym, std::move(apply_args)}; + apply->flags = Expr::_IsRvalue | (!func_val->is_marked_as_pure() * Expr::_IsImpure); + apply->here = v->loc; + apply->deduce_type(); + + return apply; +} + +static Expr* process_function_call(V v, CodeBlob& code) { // special error for "null()" which is a FunC syntax if (v->get_called_f()->type == ast_null_keyword) { v->error("null is not a function: use `null`, not `null()`"); } - Expr* res = process_expr(v->get_called_f(), code); - Expr* x = process_expr(v->get_called_arg(), code); - x->chk_rvalue(); - res = make_func_apply(res, x); - res->here = v->loc; - res->deduce_type(); - return res; + // most likely it's a global function, but also may be `some_var(args)` or even `getF()(args)` + Expr* lhs = process_expr(v->get_called_f(), code); + if (lhs->cls != Expr::_GlobFunc) { + Expr* tensor_arg = new Expr(Expr::_Tensor, v->loc); + std::vector type_list; + type_list.reserve(v->get_num_args()); + for (int i = 0; i < v->get_num_args(); ++i) { + auto v_arg = v->get_arg(i); + if (v_arg->passed_as_mutate) { + v_arg->error("`mutate` used for non-mutate argument"); + } + Expr* arg = process_expr(v_arg->get_expr(), code); + arg->chk_rvalue(); + tensor_arg->pb_arg(arg); + type_list.push_back(arg->e_type); + } + tensor_arg->flags = Expr::_IsRvalue; + tensor_arg->e_type = TypeExpr::new_tensor(std::move(type_list)); + + Expr* var_apply = new Expr{Expr::_VarApply, {lhs, tensor_arg}}; + var_apply->here = v->loc; + var_apply->flags = Expr::_IsRvalue; + var_apply->deduce_type(); + return var_apply; + } + + Expr* apply = process_function_arguments(lhs->sym, v->get_arg_list(), nullptr, code); + + if (dynamic_cast(apply->sym->value)->has_mutate_params()) { + const std::vector& args = apply->args; + SymValFunc* func_val = dynamic_cast(apply->sym->value); + tolk_assert(func_val->parameters.size() == args.size()); + Expr* grabbed_vars = new Expr(Expr::_Tensor, v->loc); + std::vector type_list; + for (int i = 0; i < static_cast(args.size()); ++i) { + SymDef* param_def = func_val->parameters[i]; + if (param_def && dynamic_cast(param_def->value)->is_mutate_parameter()) { + if (!args[i]->is_lvalue()) { + args[i]->fire_error_lvalue_expected("call a mutating function"); + } + if (args[i]->is_immutable()) { + args[i]->fire_error_modifying_immutable("call a mutating function"); + } + grabbed_vars->pb_arg(args[i]->copy()); + type_list.emplace_back(args[i]->e_type); + } + } + grabbed_vars->flags = Expr::_IsRvalue; + Expr* grab_mutate = new Expr(Expr::_GrabMutatedVars, apply->sym, {apply, grabbed_vars}); + grab_mutate->here = v->loc; + grab_mutate->flags = apply->flags; + grab_mutate->deduce_type(); + return grab_mutate; + } + + return apply; +} + +static Expr* process_dot_method_call(V v, CodeBlob& code) { + sym_idx_t name_idx = calc_sym_idx(v->method_name); + check_global_func(v->loc, name_idx); + SymDef* func_sym = lookup_symbol(name_idx); + SymValFunc* func_val = dynamic_cast(func_sym->value); + tolk_assert(func_val != nullptr); + + Expr* obj = process_expr(v->get_obj(), code); + obj->chk_rvalue(); + + if (func_val->parameters.empty()) { + v->error("`" + func_sym->name() + "` has no parameters and can not be called as method"); + } + if (!func_val->does_accept_self() && func_val->parameters[0] && dynamic_cast(func_val->parameters[0]->value)->is_mutate_parameter()) { + fire_error_invalid_mutate_arg_passed(v->loc, func_sym, func_val->parameters[0], true, false, v->get_obj()); + } + + Expr* apply = process_function_arguments(func_sym, v->get_arg_list(), obj, code); + + Expr* obj_lval = apply->args[0]; + if (!obj_lval->is_lvalue()) { + if (obj_lval->cls == Expr::_ReturnSelf) { + obj_lval = obj_lval->args[1]; + } else { + Expr* tmp_var = create_new_underscore_variable(v->loc, obj_lval->e_type); + tmp_var->define_new_vars(code); + Expr* assign_to_tmp_var = new Expr(Expr::_Letop, {tmp_var, obj_lval}); + assign_to_tmp_var->here = v->loc; + assign_to_tmp_var->flags = Expr::_IsRvalue; + assign_to_tmp_var->deduce_type(); + apply->args[0] = assign_to_tmp_var; + obj_lval = tmp_var; + } + } + + if (func_val->has_mutate_params()) { + tolk_assert(func_val->parameters.size() == apply->args.size()); + Expr* grabbed_vars = new Expr(Expr::_Tensor, v->loc); + std::vector type_list; + for (int i = 0; i < static_cast(apply->args.size()); ++i) { + SymDef* param_sym = func_val->parameters[i]; + if (param_sym && dynamic_cast(param_sym->value)->is_mutate_parameter()) { + Expr* ith_arg = apply->args[i]; + if (ith_arg->is_immutable()) { + ith_arg->fire_error_modifying_immutable("call a mutating method"); + } + + Expr* var_to_mutate = nullptr; + if (ith_arg->is_lvalue()) { + var_to_mutate = ith_arg->copy(); + } else if (i == 0) { + var_to_mutate = obj_lval; + } else { + ith_arg->fire_error_lvalue_expected("call a mutating method"); + } + tolk_assert(var_to_mutate->is_lvalue() && !var_to_mutate->is_immutable()); + grabbed_vars->pb_arg(var_to_mutate); + type_list.emplace_back(var_to_mutate->e_type); + } + } + grabbed_vars->flags = Expr::_IsRvalue; + + Expr* grab_mutate = new Expr(Expr::_GrabMutatedVars, func_sym, {apply, grabbed_vars}); + grab_mutate->here = v->loc; + grab_mutate->flags = apply->flags; + grab_mutate->deduce_type(); + + apply = grab_mutate; + } + + if (func_val->does_return_self()) { + Expr* self_arg = obj_lval; + tolk_assert(self_arg->is_lvalue()); + + Expr* return_self = new Expr(Expr::_ReturnSelf, func_sym, {apply, self_arg}); + return_self->here = v->loc; + return_self->flags = Expr::_IsRvalue; + return_self->deduce_type(); + + apply = return_self; + } + + return apply; } static Expr* process_expr(V v, CodeBlob& code) { @@ -285,7 +486,8 @@ static Expr* process_expr(V v, CodeBlob& code) { for (int i = 1; i < v->size(); ++i) { Expr* x = process_expr(v->get_item(i), code); res->pb_arg(x); - f &= x->flags; + f &= (x->flags | Expr::_IsImmutable); + f |= (x->flags & Expr::_IsImmutable); type_list.push_back(x->e_type); } res->here = v->loc; @@ -315,7 +517,8 @@ static Expr* process_expr(V v, CodeBlob& code) { for (int i = 1; i < v->size(); ++i) { Expr* x = process_expr(v->get_item(i), code); res->pb_arg(x); - f &= x->flags; + f &= (x->flags | Expr::_IsImmutable); + f |= (x->flags & Expr::_IsImmutable); type_list.push_back(x->e_type); } res->here = v->loc; @@ -419,21 +622,36 @@ static Expr* process_expr(V v) { return res; } -static Expr* process_expr([[maybe_unused]] V v) { +static Expr* process_expr(V v) { SymDef* builtin_sym = lookup_symbol(calc_sym_idx("__null")); Expr* res = new Expr{Expr::_Apply, builtin_sym, {}}; + res->here = v->loc; res->flags = Expr::_IsRvalue; res->deduce_type(); return res; } +static Expr* process_expr(V v, CodeBlob& code) { + if (!code.func_val->does_accept_self()) { + v->error("using `self` in a non-member function (it does not accept the first `self` parameter)"); + } + SymDef* sym = lookup_symbol(calc_sym_idx("self")); + tolk_assert(sym); + SymValVariable* sym_val = dynamic_cast(sym->value); + Expr* res = new Expr(Expr::_Var, v->loc); + res->sym = sym; + res->val = sym_val->idx; + res->flags = Expr::_IsLvalue | Expr::_IsRvalue | (sym_val->is_immutable() ? Expr::_IsImmutable : 0); + res->e_type = sym_val->get_type(); + return res; +} + static Expr* process_identifier(V v) { SymDef* sym = lookup_symbol(calc_sym_idx(v->name)); if (sym && dynamic_cast(sym->value)) { check_import_exists_when_using_sym(v, sym); - auto val = dynamic_cast(sym->value); Expr* res = new Expr{Expr::_GlobVar, v->loc}; - res->e_type = val->get_type(); + res->e_type = sym->value->get_type(); res->sym = sym; res->flags = Expr::_IsLvalue | Expr::_IsRvalue | Expr::_IsImpure; return res; @@ -441,19 +659,20 @@ static Expr* process_identifier(V v) { if (sym && dynamic_cast(sym->value)) { check_import_exists_when_using_sym(v, sym); auto val = dynamic_cast(sym->value); - Expr* res = new Expr{Expr::_None, v->loc}; - res->flags = Expr::_IsRvalue; + Expr* res = nullptr; if (val->get_kind() == SymValConst::IntConst) { - res->cls = Expr::_Const; + res = new Expr{Expr::_Const, v->loc}; res->intval = val->get_int_value(); - res->e_type = TypeExpr::new_atomic(tok_int); + res->e_type = TypeExpr::new_atomic(TypeExpr::_Int); } else if (val->get_kind() == SymValConst::SliceConst) { - res->cls = Expr::_SliceConst; + res = new Expr{Expr::_SliceConst, v->loc}; res->strval = val->get_str_value(); - res->e_type = TypeExpr::new_atomic(tok_slice); + res->e_type = TypeExpr::new_atomic(TypeExpr::_Slice); } else { v->error("invalid symbolic constant type"); } + res->flags = Expr::_IsLvalue | Expr::_IsRvalue | Expr::_IsImmutable; + res->sym = sym; return res; } if (sym && dynamic_cast(sym->value)) { @@ -463,28 +682,26 @@ static Expr* process_identifier(V v) { if (!sym) { check_global_func(v->loc, calc_sym_idx(v->name)); sym = lookup_symbol(calc_sym_idx(v->name)); + tolk_assert(sym); } res->sym = sym; - SymVal* val = nullptr; bool impure = false; - if (sym) { - val = dynamic_cast(sym->value); - } - if (!val) { - v->error("undefined identifier '" + static_cast(v->name) + "'"); - } - if (val->kind == SymValKind::_Func) { - res->e_type = val->get_type(); + bool immutable = false; + if (const SymValFunc* func_val = dynamic_cast(sym->value)) { + res->e_type = func_val->get_type(); res->cls = Expr::_GlobFunc; - impure = !dynamic_cast(val)->is_marked_as_pure(); - } else { - tolk_assert(val->idx >= 0); - res->val = val->idx; - res->e_type = val->get_type(); + impure = !func_val->is_marked_as_pure(); + } else if (const SymValVariable* var_val = dynamic_cast(sym->value)) { + tolk_assert(var_val->idx >= 0) + res->val = var_val->idx; + res->e_type = var_val->get_type(); + immutable = var_val->is_immutable(); // std::cerr << "accessing variable " << lex.cur().str << " : " << res->e_type << std::endl; + } else { + v->error("undefined identifier '" + static_cast(v->name) + "'"); } // std::cerr << "accessing symbol " << lex.cur().str << " : " << res->e_type << (val->impure ? " (impure)" : " (pure)") << std::endl; - res->flags = Expr::_IsLvalue | Expr::_IsRvalue | (impure ? Expr::_IsImpure : 0); + res->flags = Expr::_IsLvalue | Expr::_IsRvalue | (impure ? Expr::_IsImpure : 0) | (immutable ? Expr::_IsImmutable : 0); res->deduce_type(); return res; } @@ -495,12 +712,12 @@ Expr* process_expr(AnyV v, CodeBlob& code) { return process_expr(v->as(), code); case ast_unary_operator: return process_expr(v->as(), code); - case ast_dot_tilde_call: - return process_expr(v->as(), code); case ast_ternary_operator: return process_expr(v->as(), code); case ast_function_call: - return process_expr(v->as(), code); + return process_function_call(v->as(), code); + case ast_dot_method_call: + return process_dot_method_call(v->as(), code); case ast_parenthesized_expr: return process_expr(v->as()->get_expr(), code); case ast_tensor: @@ -515,6 +732,8 @@ Expr* process_expr(AnyV v, CodeBlob& code) { return process_expr(v->as()); case ast_null_keyword: return process_expr(v->as()); + case ast_self_keyword: + return process_expr(v->as(), code); case ast_identifier: return process_identifier(v->as()); case ast_underscore: @@ -524,31 +743,22 @@ Expr* process_expr(AnyV v, CodeBlob& code) { } } -namespace blk_fl { -enum { end = 1, ret = 2, empty = 4 }; -typedef int val; -constexpr val init = end | empty; -void combine(val& x, const val y) { - x |= y & ret; - x &= y | ~(end | empty); -} -void combine_parallel(val& x, const val y) { - x &= y | ~(ret | empty); - x |= y & end; -} -} // namespace blk_fl - static Expr* process_local_vars_lhs(AnyV v, CodeBlob& code) { switch (v->type) { case ast_local_var: { - if (v->as()->marked_as_redef) { - return process_identifier(v->as()->get_identifier()->as()); + auto v_var = v->as(); + if (v_var->marked_as_redef) { + Expr* redef_var = process_identifier(v_var->get_identifier()->as()); + if (redef_var->is_immutable()) { + redef_var->fire_error_modifying_immutable("left side of assignment"); + } + return redef_var; } - TypeExpr* declared_type = v->as()->declared_type; + TypeExpr* var_type = v_var->declared_type ? v_var->declared_type : TypeExpr::new_hole(); if (auto v_ident = v->as()->get_identifier()->try_as()) { - return create_new_local_variable(v->loc, v_ident->name, declared_type ? declared_type : TypeExpr::new_hole()); + return create_new_local_variable(v->loc, v_ident->name, var_type, v_var->is_immutable); } else { - return create_new_underscore_variable(v->loc, declared_type ? declared_type : TypeExpr::new_hole()); + return create_new_underscore_variable(v->loc, var_type); } } case ast_parenthesized_expr: @@ -588,7 +798,6 @@ static Expr* process_local_vars_lhs(AnyV v, CodeBlob& code) { static blk_fl::val process_vertex(V v, CodeBlob& code) { Expr* x = process_local_vars_lhs(v->get_lhs(), code); - x->chk_lvalue(); Expr* y = process_expr(v->get_assigned_val(), code); y->chk_rvalue(); x->predefine_vars(); @@ -602,11 +811,83 @@ static blk_fl::val process_vertex(V v, CodeBlob& cod return blk_fl::end; } +static bool is_expr_valid_as_return_self(Expr* return_expr) { + // `return self` + if (return_expr->cls == Expr::_Var && return_expr->val == 0) { + return true; + } + if (return_expr->cls == Expr::_ReturnSelf) { + return is_expr_valid_as_return_self(return_expr->args[1]); + } + if (return_expr->cls == Expr::_CondExpr) { + return is_expr_valid_as_return_self(return_expr->args[1]) && is_expr_valid_as_return_self(return_expr->args[2]); + } + return false; +} + +// for mutating functions, having `return expr`, transform it to `return (modify_var1, ..., expr)` +static Expr* wrap_return_value_with_mutate_params(SrcLocation loc, CodeBlob& code, Expr* return_expr) { + Expr* tmp_var; + if (return_expr->cls != Expr::_Var) { + // `return complex_expr` - extract this into temporary variable (eval it before return) + // this is mandatory if it assigns to one of modified vars + tmp_var = create_new_underscore_variable(loc, return_expr->e_type); + tmp_var->predefine_vars(); + tmp_var->define_new_vars(code); + Expr* assign_to_tmp_var = new Expr(Expr::_Letop, {tmp_var, return_expr}); + assign_to_tmp_var->here = loc; + assign_to_tmp_var->flags = tmp_var->flags | Expr::_IsRvalue; + assign_to_tmp_var->deduce_type(); + assign_to_tmp_var->pre_compile(code); + } else { + tmp_var = return_expr; + } + + Expr* ret_tensor = new Expr(Expr::_Tensor, loc); + std::vector type_list; + for (SymDef* p_sym: code.func_val->parameters) { + if (p_sym && dynamic_cast(p_sym->value)->is_mutate_parameter()) { + Expr* p_expr = new Expr{Expr::_Var, p_sym->loc}; + p_expr->sym = p_sym; + p_expr->val = p_sym->value->idx; + p_expr->flags = Expr::_IsRvalue; + p_expr->e_type = p_sym->value->get_type(); + ret_tensor->pb_arg(p_expr); + type_list.emplace_back(p_expr->e_type); + } + } + ret_tensor->pb_arg(tmp_var); + type_list.emplace_back(tmp_var->e_type); + ret_tensor->flags = Expr::_IsRvalue; + ret_tensor->e_type = TypeExpr::new_tensor(std::move(type_list)); + return ret_tensor; +} + static blk_fl::val process_vertex(V v, CodeBlob& code) { Expr* expr = process_expr(v->get_return_value(), code); + if (code.func_val->does_return_self()) { + if (!is_expr_valid_as_return_self(expr)) { + v->error("invalid return from `self` function"); + } + Expr* var_self = new Expr(Expr::_Var, v->loc); + var_self->flags = Expr::_IsRvalue | Expr::_IsLvalue; + var_self->e_type = code.func_val->parameters[0]->value->get_type(); + Expr* assign_to_self = new Expr(Expr::_Letop, {var_self, expr}); + assign_to_self->here = v->loc; + assign_to_self->flags = Expr::_IsRvalue; + assign_to_self->deduce_type(); + assign_to_self->pre_compile(code); + Expr* empty_tensor = new Expr(Expr::_Tensor, {}); + empty_tensor->here = v->loc; + empty_tensor->flags = Expr::_IsRvalue; + empty_tensor->e_type = TypeExpr::new_tensor({}); + expr = empty_tensor; + } + if (code.func_val->has_mutate_params()) { + expr = wrap_return_value_with_mutate_params(v->loc, code, expr); + } expr->chk_rvalue(); try { - // std::cerr << "in return: "; unify(expr->e_type, code.ret_type); } catch (UnifyError& ue) { std::ostringstream os; @@ -619,22 +900,29 @@ static blk_fl::val process_vertex(V v, CodeBlob& code) { return blk_fl::ret; } -static void append_implicit_ret_stmt(V v, CodeBlob& code) { - TypeExpr* ret_type = TypeExpr::new_unit(); +static void append_implicit_ret_stmt(SrcLocation loc_end, CodeBlob& code) { + Expr* expr = new Expr{Expr::_Tensor, {}}; + expr->flags = Expr::_IsRvalue; + expr->here = loc_end; + expr->e_type = TypeExpr::new_unit(); + if (code.func_val->does_return_self()) { + throw ParseError(loc_end, "missing return; forgot `return self`?"); + } + if (code.func_val->has_mutate_params()) { + expr = wrap_return_value_with_mutate_params(loc_end, code, expr); + } try { - // std::cerr << "in implicit return: "; - unify(ret_type, code.ret_type); + unify(expr->e_type, code.ret_type); } catch (UnifyError& ue) { std::ostringstream os; os << "previous function return type " << code.ret_type - << " cannot be unified with implicit end-of-block return type " << ret_type << ": " << ue; - throw ParseError(v->loc_end, os.str()); + << " cannot be unified with implicit end-of-block return type " << expr->e_type << ": " << ue; + throw ParseError(loc_end, os.str()); } - code.emplace_back(v->loc_end, Op::_Return); + std::vector tmp_vars = expr->pre_compile(code); + code.emplace_back(loc_end, Op::_Return, std::move(tmp_vars)); } -blk_fl::val process_statement(AnyV v, CodeBlob& code); - static blk_fl::val process_vertex(V v, CodeBlob& code, bool no_new_scope = false) { if (!no_new_scope) { open_scope(v->loc); @@ -792,7 +1080,7 @@ static blk_fl::val process_vertex(V v, CodeBlob& code) { static Expr* process_catch_variable(AnyV catch_var, TypeExpr* var_type) { if (auto v_ident = catch_var->try_as()) { - return create_new_local_variable(catch_var->loc, v_ident->name, var_type); + return create_new_local_variable(catch_var->loc, v_ident->name, var_type, true); } return create_new_underscore_variable(catch_var->loc, var_type); } @@ -882,7 +1170,7 @@ blk_fl::val process_statement(AnyV v, CodeBlob& code) { case ast_try_catch_statement: return process_vertex(v->as(), code); default: { - auto expr = process_expr(v, code); + Expr* expr = process_expr(v, code); expr->chk_rvalue(); expr->pre_compile(code); return blk_fl::end; @@ -890,18 +1178,16 @@ blk_fl::val process_statement(AnyV v, CodeBlob& code) { } } -static FormalArg process_vertex(V v, int fa_idx) { - if (v->get_identifier()->name.empty()) { - return std::make_tuple(v->param_type, (SymDef*)nullptr, v->loc); +static FormalArg process_vertex(V v, SymDef* param_sym) { + if (!param_sym) { + return std::make_tuple(v->param_type, nullptr, v->loc); } SymDef* new_sym_def = define_symbol(calc_sym_idx(v->get_identifier()->name), true, v->loc); - if (!new_sym_def) { - v->error("cannot define symbol"); - } - if (new_sym_def->value) { - v->error("redefined argument"); + if (!new_sym_def || new_sym_def->value) { + v->error("redefined parameter"); } - new_sym_def->value = new SymVal{SymValKind::_Param, fa_idx, v->param_type}; + const SymValVariable* param_val = dynamic_cast(param_sym->value); + new_sym_def->value = new SymValVariable(*param_val); return std::make_tuple(v->param_type, new_sym_def, v->loc); } @@ -911,13 +1197,13 @@ static void convert_function_body_to_CodeBlob(V v, Vloc); - CodeBlob* blob = new CodeBlob{static_cast(v->get_identifier()->name), v->loc, v->ret_type}; + CodeBlob* blob = new CodeBlob{static_cast(v->get_identifier()->name), v->loc, sym_val, v->ret_type}; if (v->marked_as_pure) { blob->flags |= CodeBlob::_ForbidImpure; } FormalArgList legacy_arg_list; for (int i = 0; i < v->get_num_params(); ++i) { - legacy_arg_list.emplace_back(process_vertex(v->get_param(i), i)); + legacy_arg_list.emplace_back(process_vertex(v->get_param(i), sym_val->parameters[i])); } blob->import_params(std::move(legacy_arg_list)); @@ -931,7 +1217,7 @@ static void convert_function_body_to_CodeBlob(V v, Vloc_end, *blob); } blob->close_blk(v_body->loc_end); diff --git a/tolk/pipe-generate-fif-output.cpp b/tolk/pipe-generate-fif-output.cpp index 65225d828..91a99f96a 100644 --- a/tolk/pipe-generate-fif-output.cpp +++ b/tolk/pipe-generate-fif-output.cpp @@ -39,10 +39,10 @@ bool SymValCodeFunc::does_need_codegen() const { if (flags & flagUsedAsNonCall) { return true; } - // when a function f() is just `return anotherF(...args)`, it doesn't need to be codegenerated at all, - // since all its usages are inlined - return !is_just_wrapper_for_another_f(); - // in the future, we may want to implement a true AST inlining for `inline` functions also + // currently, there is no inlining, all functions are codegenerated + // (but actually, unused ones are later removed by Fift) + // in the future, we may want to implement a true AST inlining for "simple" functions + return true; } void SymValCodeFunc::set_code(CodeBlob* code) { diff --git a/tolk/pipe-register-symbols.cpp b/tolk/pipe-register-symbols.cpp index 2e6d26dd8..569d434aa 100644 --- a/tolk/pipe-register-symbols.cpp +++ b/tolk/pipe-register-symbols.cpp @@ -74,68 +74,6 @@ static td::RefInt256 calculate_method_id_by_func_name(std::string_view func_name return td::make_refint((crc & 0xffff) | 0x10000); } -static bool is_parameter_of_function(AnyV v_variable, V v_func) { - return v_variable->type == ast_identifier && v_func->get_param_list()->lookup_idx(v_variable->as()->name) != -1; -} - -// if a function looks like `T f(...args) { return anotherF(...args); }`, -// set a bit to flags -// then, all calls to `f(...)` will be effectively replaced with `anotherF(...)` -// todo this function (and optimization) was done before implementing AST, but after AST and registering symbols in advance, -// its behavior became a bit wrong: if anotherF is declared before f, than it's detected here, but still not inlined, -// since inlining is done is legacy code, using Expr -// in the future, inlining should be done on AST level, but it's impossible until all names resolving (including scopes) -// is also done on AST level -// in the future, when working on AST level, inlining should become much more powerful -// (for instance, it should inline `return anotherF(constants)`, etc.) -static bool detect_if_function_just_wraps_another(V v) { - if (v->method_id || v->marked_as_get_method || v->marked_as_builtin || v->marked_as_inline_ref || v->is_entrypoint) { - return false; - } - for (int i = 0; i < v->get_num_params(); ++i) { - if (v->get_param(i)->param_type->get_width() != 1 || v->get_param(i)->param_type->has_unknown_inside()) { - return false; // avoid situations like `f(int a, (int,int) b)`, inlining will be cumbersome - } - } - - auto v_body = v->get_body()->try_as(); - if (!v_body || v_body->size() != 1 || v_body->get_item(0)->type != ast_return_statement) { - return false; - } - - auto v_return = v_body->get_item(0)->as(); - auto v_anotherF = v_return->get_return_value()->try_as(); - if (!v_anotherF) { - return false; - } - - V called_arg = v_anotherF->get_called_arg(); - if (v_anotherF->get_called_f()->type != ast_identifier) { - return false; - } - - std::string_view called_name = v_anotherF->get_called_f()->try_as()->name; - std::string_view function_name = v->get_identifier()->name; - - const std::vector& v_arg_items = called_arg->get_items(); - std::set used_args; - for (AnyV v_arg : v_arg_items) { - if (!is_parameter_of_function(v_arg, v)) { - return false; - } - used_args.emplace(v_arg->as()->name); - } - if (static_cast(used_args.size()) != v->get_num_params() || used_args.size() != v_arg_items.size()) { - return false; - } - - // ok, f_current is a wrapper - if (G.is_verbosity(2)) { - std::cerr << function_name << " -> " << called_name << std::endl; - } - return true; -} - static void calc_arg_ret_order_of_asm_function(V v_body, V param_list, TypeExpr* ret_type, std::vector& arg_order, std::vector& ret_order) { int cnt = param_list->size(); @@ -201,7 +139,7 @@ static void register_constant(V v) { // todo currently, constant value calculation is dirty and roughly: init_value is evaluated to fif code // and waited to be a single expression // although it works, of course it should be later rewritten using AST calculations, as well as lots of other parts - CodeBlob code("tmp", v->loc, nullptr); + CodeBlob code("tmp", v->loc, nullptr, nullptr); Expr* x = process_expr(init_value, code); if (!x->is_rvalue()) { v->get_init_value()->error("expression is not strictly Rvalue"); @@ -211,9 +149,9 @@ static void register_constant(V v) { } SymValConst* sym_val = nullptr; if (x->cls == Expr::_Const) { // Integer constant - sym_val = new SymValConst{static_cast(G.all_constants.size()), x->intval}; + sym_val = new SymValConst(static_cast(G.all_constants.size()), x->intval); } else if (x->cls == Expr::_SliceConst) { // Slice constant (string) - sym_val = new SymValConst{static_cast(G.all_constants.size()), x->strval}; + sym_val = new SymValConst(static_cast(G.all_constants.size()), x->strval); } else if (x->cls == Expr::_Apply) { // even "1 + 2" is Expr::_Apply (it applies `_+_`) code.emplace_back(v->loc, Op::_Import, std::vector()); auto tmp_vars = x->pre_compile(code); @@ -241,14 +179,14 @@ static void register_constant(V v) { if (op.origin.is_null() || !op.origin->is_valid()) { init_value->error("precompiled expression did not result in a valid integer constant"); } - sym_val = new SymValConst{static_cast(G.all_constants.size()), op.origin}; + sym_val = new SymValConst(static_cast(G.all_constants.size()), op.origin); } else { init_value->error("integer or slice literal or constant expected"); } sym_def->value = sym_val; #ifdef TOLK_DEBUG - dynamic_cast(sym_def->value)->name = v->get_identifier()->name; + sym_def->value->sym_name = v->get_identifier()->name; #endif G.all_constants.push_back(sym_def); } @@ -259,35 +197,68 @@ static void register_global_var(V v) { fire_error_redefinition_of_symbol(v->get_identifier(), sym_def); } - sym_def->value = new SymValGlobVar{static_cast(G.all_global_vars.size()), v->declared_type}; + sym_def->value = new SymValGlobVar(static_cast(G.all_global_vars.size()), v->declared_type); #ifdef TOLK_DEBUG - dynamic_cast(sym_def->value)->name = v->get_identifier()->name; + sym_def->value->sym_name = v->get_identifier()->name; #endif G.all_global_vars.push_back(sym_def); } +static SymDef* register_parameter(V v, int idx) { + if (v->is_underscore()) { + return nullptr; + } + SymDef* sym_def = define_parameter(calc_sym_idx(v->get_identifier()->name), v->loc); + if (sym_def->value) { + // todo always false now, how to detect similar parameter names? (remember about underscore) + v->error("redefined parameter"); + } + + SymValVariable* sym_val = new SymValVariable(idx, v->param_type); + if (v->declared_as_mutate) { + sym_val->flags |= SymValVariable::flagMutateParameter; + } + if (!v->declared_as_mutate && idx == 0 && v->get_identifier()->name == "self") { + sym_val->flags |= SymValVariable::flagImmutable; + } + sym_def->value = sym_val; +#ifdef TOLK_DEBUG + sym_def->value->sym_name = v->get_identifier()->name; +#endif + return sym_def; +} + static void register_function(V v) { std::string_view func_name = v->get_identifier()->name; - // calculate TypeExpr of a function: it's a map (args -> ret), probably surrounded by forall - TypeExpr* func_type = nullptr; - if (int n_args = v->get_num_params()) { - std::vector arg_types; - arg_types.reserve(n_args); - for (int idx = 0; idx < n_args; ++idx) { - arg_types.emplace_back(v->get_param(idx)->param_type); + // calculate TypeExpr of a function: it's a map (params -> ret), probably surrounded by forall + TypeExpr* params_tensor_type = nullptr; + int n_params = v->get_num_params(); + int n_mutate_params = 0; + std::vector parameters_syms; + if (n_params) { + std::vector param_tensor_items; + param_tensor_items.reserve(n_params); + parameters_syms.reserve(n_params); + for (int i = 0; i < n_params; ++i) { + auto v_param = v->get_param(i); + n_mutate_params += static_cast(v_param->declared_as_mutate); + param_tensor_items.emplace_back(v_param->param_type); + parameters_syms.emplace_back(register_parameter(v_param, i)); } - func_type = TypeExpr::new_map(TypeExpr::new_tensor(std::move(arg_types)), v->ret_type); + params_tensor_type = TypeExpr::new_tensor(std::move(param_tensor_items)); } else { - func_type = TypeExpr::new_map(TypeExpr::new_unit(), v->ret_type); + params_tensor_type = TypeExpr::new_unit(); } + + TypeExpr* function_type = TypeExpr::new_map(params_tensor_type, v->ret_type); if (v->genericsT_list) { std::vector type_vars; type_vars.reserve(v->genericsT_list->size()); for (int idx = 0; idx < v->genericsT_list->size(); ++idx) { type_vars.emplace_back(v->genericsT_list->get_item(idx)->created_type); } - func_type = TypeExpr::new_forall(std::move(type_vars), func_type); + function_type = TypeExpr::new_forall(std::move(type_vars), function_type); } if (v->marked_as_builtin) { const SymDef* builtin_func = lookup_symbol(G.symbols.lookup(func_name)); @@ -297,7 +268,7 @@ static void register_function(V v) { } #ifdef TOLK_DEBUG // in release, we don't need this check, since `builtin` is used only in stdlib, which is our responsibility - if (!func_val->sym_type->equals_to(func_type) || func_val->is_marked_as_pure() != v->marked_as_pure) { + if (!func_val->sym_type->equals_to(function_type) || func_val->is_marked_as_pure() != v->marked_as_pure) { v->error("declaration for `builtin` function doesn't match an actual one"); } #endif @@ -309,7 +280,7 @@ static void register_function(V v) { fire_error_redefinition_of_symbol(v->get_identifier(), sym_def); } if (G.is_verbosity(1)) { - std::cerr << "fun " << func_name << " : " << func_type << std::endl; + std::cerr << "fun " << func_name << " : " << function_type << std::endl; } if (v->marked_as_pure && v->ret_type->get_width() == 0) { v->error("a pure function should return something, otherwise it will be optimized out anyway"); @@ -317,11 +288,11 @@ static void register_function(V v) { SymValFunc* sym_val = nullptr; if (const auto* v_seq = v->get_body()->try_as()) { - sym_val = new SymValCodeFunc{static_cast(G.all_code_functions.size()), func_type, v->marked_as_pure}; + sym_val = new SymValCodeFunc(std::move(parameters_syms), static_cast(G.all_code_functions.size()), function_type); } else if (const auto* v_asm = v->get_body()->try_as()) { std::vector arg_order, ret_order; calc_arg_ret_order_of_asm_function(v_asm, v->get_param_list(), v->ret_type, arg_order, ret_order); - sym_val = new SymValAsmFunc{func_type, std::move(arg_order), std::move(ret_order), v->marked_as_pure}; + sym_val = new SymValAsmFunc(std::move(parameters_syms), function_type, std::move(arg_order), std::move(ret_order), 0); } else { v->error("Unexpected function body statement"); } @@ -341,6 +312,9 @@ static void register_function(V v) { } else if (v->is_entrypoint) { sym_val->method_id = calculate_method_id_for_entrypoint(func_name); } + if (v->marked_as_pure) { + sym_val->flags |= SymValFunc::flagMarkedAsPure; + } if (v->marked_as_inline) { sym_val->flags |= SymValFunc::flagInline; } @@ -353,13 +327,19 @@ static void register_function(V v) { if (v->is_entrypoint) { sym_val->flags |= SymValFunc::flagIsEntrypoint; } - if (detect_if_function_just_wraps_another(v)) { - sym_val->flags |= SymValFunc::flagWrapsAnotherF; + if (n_mutate_params) { + sym_val->flags |= SymValFunc::flagHasMutateParams; + } + if (v->accepts_self) { + sym_val->flags |= SymValFunc::flagAcceptsSelf; + } + if (v->returns_self) { + sym_val->flags |= SymValFunc::flagReturnsSelf; } sym_def->value = sym_val; #ifdef TOLK_DEBUG - dynamic_cast(sym_def->value)->name = func_name; + sym_def->value->sym_name = func_name; #endif if (dynamic_cast(sym_val)) { G.all_code_functions.push_back(sym_def); diff --git a/tolk/symtable.cpp b/tolk/symtable.cpp index f8f64c50f..abaeb0846 100644 --- a/tolk/symtable.cpp +++ b/tolk/symtable.cpp @@ -126,6 +126,18 @@ SymDef* define_global_symbol(sym_idx_t name_idx, SrcLocation loc) { return registered; // registered->value is nullptr; it means, it's just created } +SymDef* define_parameter(sym_idx_t name_idx, SrcLocation loc) { + // note, that parameters (defined at function declaration) are not inserted into symtable + // their SymDef is registered to be inserted into SymValFunc::parameters + // (and later ->value is filled with SymValVariable) + + SymDef* registered = new SymDef(0, name_idx, loc); +#ifdef TOLK_DEBUG + registered->sym_name = registered->name(); +#endif + return registered; +} + SymDef* define_symbol(sym_idx_t name_idx, bool force_new, SrcLocation loc) { if (!name_idx) { return nullptr; diff --git a/tolk/symtable.h b/tolk/symtable.h index 75b0aa2f2..a52e8d333 100644 --- a/tolk/symtable.h +++ b/tolk/symtable.h @@ -17,6 +17,7 @@ #pragma once #include "src-file.h" +#include "type-expr.h" #include #include @@ -25,14 +26,23 @@ namespace tolk { typedef int var_idx_t; typedef int sym_idx_t; -enum class SymValKind { _Param, _Var, _Func, _Typename, _GlobVar, _Const }; +enum class SymValKind { _Var, _Func, _GlobVar, _Const }; struct SymValBase { SymValKind kind; int idx; - SymValBase(SymValKind kind, int idx) : kind(kind), idx(idx) { + TypeExpr* sym_type; +#ifdef TOLK_DEBUG + std::string sym_name; // seeing symbol name in debugger makes it much easier to delve into Tolk sources +#endif + + SymValBase(SymValKind kind, int idx, TypeExpr* sym_type) : kind(kind), idx(idx), sym_type(sym_type) { } virtual ~SymValBase() = default; + + TypeExpr* get_type() const { + return sym_type; + } }; @@ -98,6 +108,7 @@ void close_scope(); SymDef* lookup_symbol(sym_idx_t idx); SymDef* define_global_symbol(sym_idx_t name_idx, SrcLocation loc = {}); +SymDef* define_parameter(sym_idx_t name_idx, SrcLocation loc); SymDef* define_symbol(sym_idx_t name_idx, bool force_new, SrcLocation loc); } // namespace tolk diff --git a/tolk/tolk.h b/tolk/tolk.h index 199194355..971ca35dd 100644 --- a/tolk/tolk.h +++ b/tolk/tolk.h @@ -69,13 +69,14 @@ using const_idx_t = int; struct TmpVar { TypeExpr* v_type; var_idx_t idx; - bool is_tmp_unnamed; - sym_idx_t name; + sym_idx_t sym_idx; int coord; SrcLocation where; std::vector> on_modification; - TmpVar(var_idx_t _idx, bool _is_tmp_unnamed, TypeExpr* _type, SymDef* sym, SrcLocation loc); + TmpVar(var_idx_t _idx, TypeExpr* _type, sym_idx_t sym_idx, SrcLocation loc); + bool is_unnamed() const { return sym_idx == 0; } + void show(std::ostream& os, int omit_idx = 0) const; void dump(std::ostream& os) const; void set_location(SrcLocation loc); @@ -401,40 +402,56 @@ struct AsmOpList; * */ -struct SymVal : SymValBase { - TypeExpr* sym_type; - SymVal(SymValKind kind, int idx, TypeExpr* sym_type = nullptr) - : SymValBase(kind, idx), sym_type(sym_type) { +struct SymValVariable : SymValBase { + enum SymValFlag { + flagMutateParameter = 1, // parameter was declared with `mutate` keyword + flagImmutable = 2, // variable was declared via `val` (not `var`) + }; + int flags{0}; + + ~SymValVariable() override = default; + SymValVariable(int val, TypeExpr* sym_type) + : SymValBase(SymValKind::_Var, val, sym_type) {} + + bool is_function_parameter() const { + return idx >= 0; } - ~SymVal() override = default; - TypeExpr* get_type() const { - return sym_type; + bool is_mutate_parameter() const { + return flags & flagMutateParameter; + } + bool is_local_var() const { + return idx == -1; + } + bool is_immutable() const { + return flags & flagImmutable; } }; -struct SymValFunc : SymVal { +struct SymValFunc : SymValBase { enum SymValFlag { flagInline = 1, // marked `@inline` flagInlineRef = 2, // marked `@inline_ref` - flagWrapsAnotherF = 4, // `fun thisF(...args) { return anotherF(...args); }` (calls to thisF will be inlined) flagUsedAsNonCall = 8, // used not only as `f()`, but as a 1-st class function (assigned to var, pushed to tuple, etc.) flagMarkedAsPure = 16, // declared as `pure`, can't call impure and access globals, unused invocations are optimized out flagBuiltinFunction = 32, // was created via `define_builtin_func()`, not from source code flagGetMethod = 64, // was declared via `get func(): T`, method_id is auto-assigned flagIsEntrypoint = 128, // it's `main` / `onExternalMessage` / etc. + flagHasMutateParams = 256, // has parameters declared as `mutate` + flagAcceptsSelf = 512, // is a member function (has `self` first parameter) + flagReturnsSelf = 1024, // return type is `self` (returns the mutated 1st argument), calls can be chainable }; td::RefInt256 method_id; // todo why int256? it's small int flags{0}; + std::vector parameters; // [i]-th may be nullptr for underscore; if not, its val is SymValVariable std::vector arg_order, ret_order; -#ifdef TOLK_DEBUG - std::string name; // seeing function name in debugger makes it much easier to delve into Tolk sources -#endif + ~SymValFunc() override = default; - SymValFunc(int val, TypeExpr* _ft, bool marked_as_pure) - : SymVal(SymValKind::_Func, val, _ft), flags(marked_as_pure ? flagMarkedAsPure : 0) {} - SymValFunc(int val, TypeExpr* _ft, std::initializer_list _arg_order, std::initializer_list _ret_order, bool marked_as_pure) - : SymVal(SymValKind::_Func, val, _ft), flags(marked_as_pure ? flagMarkedAsPure : 0), arg_order(_arg_order), ret_order(_ret_order) { + SymValFunc(std::vector parameters, int val, TypeExpr* sym_type, int flags) + : SymValBase(SymValKind::_Func, val, sym_type), flags(flags), parameters(std::move(parameters)) { + } + SymValFunc(std::vector parameters, int val, TypeExpr* sym_type, int flags, std::initializer_list arg_order, std::initializer_list ret_order) + : SymValBase(SymValKind::_Func, val, sym_type), flags(flags), parameters(std::move(parameters)), arg_order(arg_order), ret_order(ret_order) { } const std::vector* get_arg_order() const { @@ -450,9 +467,6 @@ struct SymValFunc : SymVal { bool is_inline_ref() const { return flags & flagInlineRef; } - bool is_just_wrapper_for_another_f() const { - return flags & flagWrapsAnotherF; - } bool is_marked_as_pure() const { return flags & flagMarkedAsPure; } @@ -465,32 +479,35 @@ struct SymValFunc : SymVal { bool is_entrypoint() const { return flags & flagIsEntrypoint; } + bool has_mutate_params() const { + return flags & flagHasMutateParams; + } + bool does_accept_self() const { + return flags & flagAcceptsSelf; + } + bool does_return_self() const { + return flags & flagReturnsSelf; + } }; struct SymValCodeFunc : SymValFunc { CodeBlob* code; bool is_really_used{false}; // calculated via dfs; unused functions are not codegenerated ~SymValCodeFunc() override = default; - SymValCodeFunc(int val, TypeExpr* _ft, bool marked_as_pure) : SymValFunc(val, _ft, marked_as_pure), code(nullptr) { + SymValCodeFunc(std::vector parameters, int val, TypeExpr* _ft) + : SymValFunc(std::move(parameters), val, _ft, 0), code(nullptr) { } bool does_need_codegen() const; void set_code(CodeBlob* code); }; struct SymValGlobVar : SymValBase { - TypeExpr* sym_type; - int out_idx{0}; bool is_really_used{false}; // calculated via dfs from used functions; unused globals are not codegenerated -#ifdef TOLK_DEBUG - std::string name; // seeing variable name in debugger makes it much easier to delve into Tolk sources -#endif - SymValGlobVar(int val, TypeExpr* gvtype, int oidx = 0) - : SymValBase(SymValKind::_GlobVar, val), sym_type(gvtype), out_idx(oidx) { + + SymValGlobVar(int val, TypeExpr* gvtype) + : SymValBase(SymValKind::_GlobVar, val, gvtype) { } ~SymValGlobVar() override = default; - TypeExpr* get_type() const { - return sym_type; - } }; struct SymValConst : SymValBase { @@ -499,14 +516,12 @@ struct SymValConst : SymValBase { td::RefInt256 intval; std::string strval; ConstKind kind; -#ifdef TOLK_DEBUG - std::string name; // seeing const name in debugger makes it much easier to delve into Tolk sources -#endif + SymValConst(int idx, td::RefInt256 value) - : SymValBase(SymValKind::_Const, idx), intval(value), kind(IntConst) { + : SymValBase(SymValKind::_Const, idx, TypeExpr::new_atomic(TypeExpr::_Int)), intval(std::move(value)), kind(IntConst) { } SymValConst(int idx, std::string value) - : SymValBase(SymValKind::_Const, idx), strval(value), kind(SliceConst) { + : SymValBase(SymValKind::_Const, idx, TypeExpr::new_atomic(TypeExpr::_Slice)), strval(std::move(value)), kind(SliceConst) { } ~SymValConst() override = default; td::RefInt256 get_int_value() const { @@ -529,9 +544,10 @@ struct SymValConst : SymValBase { struct Expr { enum ExprCls { - _None, _Apply, _VarApply, + _GrabMutatedVars, + _ReturnSelf, _MkTuple, _Tensor, _Const, @@ -539,14 +555,13 @@ struct Expr { _GlobFunc, _GlobVar, _Letop, - _LetFirst, _Hole, _CondExpr, _SliceConst, }; ExprCls cls; int val{0}; - enum { _IsRvalue = 2, _IsLvalue = 4, _IsImpure = 32 }; + enum { _IsRvalue = 2, _IsLvalue = 4, _IsImmutable = 8, _IsImpure = 32 }; int flags{0}; SrcLocation here; td::RefInt256 intval; @@ -554,8 +569,6 @@ struct Expr { SymDef* sym{nullptr}; TypeExpr* e_type{nullptr}; std::vector args; - explicit Expr(ExprCls c = _None) : cls(c) { - } Expr(ExprCls c, SrcLocation loc) : cls(c), here(loc) { } Expr(ExprCls c, std::vector _args) : cls(c), args(std::move(_args)) { @@ -585,33 +598,38 @@ struct Expr { bool is_lvalue() const { return flags & _IsLvalue; } + bool is_immutable() const { + return flags & _IsImmutable; + } bool is_mktuple() const { return cls == _MkTuple; } void chk_rvalue() const { if (!is_rvalue()) { - throw ParseError(here, "rvalue expected"); - } - } - void chk_lvalue() const { - if (!is_lvalue()) { - throw ParseError(here, "lvalue expected"); + fire_error_rvalue_expected(); } } - bool deduce_type(); + void deduce_type(); void set_location(SrcLocation loc) { here = loc; } SrcLocation get_location() const { return here; } - int define_new_vars(CodeBlob& code); - int predefine_vars(); + void define_new_vars(CodeBlob& code); + void predefine_vars(); std::vector pre_compile(CodeBlob& code, std::vector>* lval_globs = nullptr) const; var_idx_t new_tmp(CodeBlob& code) const; std::vector new_tmp_vect(CodeBlob& code) const { return {new_tmp(code)}; } + + GNU_ATTRIBUTE_COLD GNU_ATTRIBUTE_NORETURN + void fire_error_rvalue_expected() const; + GNU_ATTRIBUTE_COLD GNU_ATTRIBUTE_NORETURN + void fire_error_lvalue_expected(const std::string& details) const; + GNU_ATTRIBUTE_COLD GNU_ATTRIBUTE_NORETURN + void fire_error_modifying_immutable(const std::string& details) const; }; /* @@ -1324,24 +1342,17 @@ struct SymValAsmFunc : SymValFunc { simple_compile_func_t simple_compile; compile_func_t ext_compile; ~SymValAsmFunc() override = default; - SymValAsmFunc(TypeExpr* ft, std::vector&& arg_order, std::vector&& ret_order, bool marked_as_pure) - : SymValFunc(-1, ft, marked_as_pure) { + SymValAsmFunc(std::vector parameters, TypeExpr* ft, std::vector&& arg_order, std::vector&& ret_order, int flags) + : SymValFunc(std::move(parameters), -1, ft, flags) { this->arg_order = std::move(arg_order); this->ret_order = std::move(ret_order); } - SymValAsmFunc(TypeExpr* ft, simple_compile_func_t _compile, bool marked_as_pure) - : SymValFunc(-1, ft, marked_as_pure), simple_compile(std::move(_compile)) { - } - SymValAsmFunc(TypeExpr* ft, compile_func_t _compile, bool marked_as_pure) - : SymValFunc(-1, ft, marked_as_pure), ext_compile(std::move(_compile)) { - } - SymValAsmFunc(TypeExpr* ft, simple_compile_func_t _compile, std::initializer_list arg_order, - std::initializer_list ret_order = {}, bool marked_as_pure = false) - : SymValFunc(-1, ft, arg_order, ret_order, marked_as_pure), simple_compile(std::move(_compile)) { + SymValAsmFunc(std::vector parameters, TypeExpr* ft, simple_compile_func_t _compile, int flags) + : SymValFunc(std::move(parameters), -1, ft, flags), simple_compile(std::move(_compile)) { } - SymValAsmFunc(TypeExpr* ft, compile_func_t _compile, std::initializer_list arg_order, - std::initializer_list ret_order = {}, bool marked_as_pure = false) - : SymValFunc(-1, ft, arg_order, ret_order, marked_as_pure), ext_compile(std::move(_compile)) { + SymValAsmFunc(std::vector parameters, TypeExpr* ft, simple_compile_func_t _compile, int flags, + std::initializer_list arg_order, std::initializer_list ret_order) + : SymValFunc(std::move(parameters), -1, ft, flags, arg_order, ret_order), simple_compile(std::move(_compile)) { } void set_code(std::vector code); bool compile(AsmOpList& dest, std::vector& out, std::vector& in, SrcLocation where) const; @@ -1349,29 +1360,32 @@ struct SymValAsmFunc : SymValFunc { struct CodeBlob { enum { _ForbidImpure = 4 }; - int var_cnt, in_var_cnt, op_cnt; + int var_cnt, in_var_cnt; TypeExpr* ret_type; + const SymValCodeFunc* func_val; std::string name; SrcLocation loc; std::vector vars; std::unique_ptr ops; std::unique_ptr* cur_ops; + std::vector debug_ttt; std::stack*> cur_ops_stack; int flags = 0; bool require_callxargs = false; - CodeBlob(std::string name, SrcLocation loc, TypeExpr* ret) - : var_cnt(0), in_var_cnt(0), op_cnt(0), ret_type(ret), name(std::move(name)), loc(loc), cur_ops(&ops) { + CodeBlob(std::string name, SrcLocation loc, const SymValCodeFunc* func_val, TypeExpr* ret_type) + : var_cnt(0), in_var_cnt(0), ret_type(ret_type), func_val(func_val), name(std::move(name)), loc(loc), cur_ops(&ops) { } template Op& emplace_back(Args&&... args) { Op& res = *(*cur_ops = std::make_unique(args...)); cur_ops = &(res.next); + debug_ttt.push_back(&res); return res; } bool import_params(FormalArgList arg_list); - var_idx_t create_var(bool is_tmp_unnamed, TypeExpr* var_type, SymDef* sym, SrcLocation loc); + var_idx_t create_var(TypeExpr* var_type, var_idx_t sym_idx, SrcLocation loc); var_idx_t create_tmp_var(TypeExpr* var_type, SrcLocation loc) { - return create_var(true, var_type, nullptr, loc); + return create_var(var_type, 0, loc); } int split_vars(bool strict = false); bool compute_used_code_vars(); diff --git a/tolk/type-expr.h b/tolk/type-expr.h index 0e2a870f9..21a35a8e3 100644 --- a/tolk/type-expr.h +++ b/tolk/type-expr.h @@ -2,28 +2,20 @@ #include #include -#include "lexer.h" namespace tolk { struct TypeExpr { enum Kind { te_Unknown, te_Var, te_Indirect, te_Atomic, te_Tensor, te_Tuple, te_Map, te_ForAll }; - // todo not _ - enum AtomicType { - _Int = tok_int, - _Cell = tok_cell, - _Slice = tok_slice, - _Builder = tok_builder, - _Cont = tok_continuation, - _Tuple = tok_tuple, - }; + enum AtomicType { _Int, _Cell, _Slice, _Builder, _Continutaion, _Tuple }; Kind constr; int value; int minw, maxw; static constexpr int w_inf = 1023; std::vector args; bool was_forall_var = false; - TypeExpr(Kind _constr, int _val = 0) : constr(_constr), value(_val), minw(0), maxw(w_inf) { + + explicit TypeExpr(Kind _constr, int _val = 0) : constr(_constr), value(_val), minw(0), maxw(w_inf) { } TypeExpr(Kind _constr, int _val, int width) : constr(_constr), value(_val), minw(width), maxw(width) { } @@ -48,6 +40,7 @@ struct TypeExpr { args.insert(args.end(), list.begin(), list.end()); compute_width(); } + bool is_atomic() const { return constr == te_Atomic; } @@ -127,9 +120,7 @@ struct TypeExpr { static TypeExpr* new_forall(std::vector list, TypeExpr* body) { return new TypeExpr{te_ForAll, body, std::move(list)}; } - static TypeExpr* new_forall(std::initializer_list list, TypeExpr* body) { - return new TypeExpr{te_ForAll, body, std::move(list)}; - } + static bool remove_indirect(TypeExpr*& te, TypeExpr* forbidden = nullptr); static std::vector remove_forall(TypeExpr*& te); static bool remove_forall_in(TypeExpr*& te, TypeExpr* te2, const std::vector& new_vars); diff --git a/tolk/unify-types.cpp b/tolk/unify-types.cpp index cc2073ede..cee71942b 100644 --- a/tolk/unify-types.cpp +++ b/tolk/unify-types.cpp @@ -264,7 +264,7 @@ std::ostream& TypeExpr::print(std::ostream& os, int lex_level) const { return os << "slice"; case _Builder: return os << "builder"; - case _Cont: + case _Continutaion: return os << "cont"; case _Tuple: return os << "tuple"; From 16824fcfe352e38db4a034c7fac9d9da2a967373 Mon Sep 17 00:00:00 2001 From: tolk-vm Date: Thu, 31 Oct 2024 11:20:54 +0400 Subject: [PATCH 11/12] [Tolk] Get rid of ton_crypto dependency, embed address parsing Instead on 'ton_crypto', Tolk now depends on 'ton_crypto_core'. The only purpose of ton_crypto (in FunC also, btw) is address parsing: "EQCRDM9...", "0:52b3..." and so on. Such parsing has been implemented manually exactly the same way. --- tolk/CMakeLists.txt | 2 +- tolk/pipe-ast-to-legacy.cpp | 83 +++++++++++++++++++++++++++++++++---- 2 files changed, 76 insertions(+), 9 deletions(-) diff --git a/tolk/CMakeLists.txt b/tolk/CMakeLists.txt index 09e02c0ac..d2decea71 100644 --- a/tolk/CMakeLists.txt +++ b/tolk/CMakeLists.txt @@ -26,7 +26,7 @@ set(TOLK_SOURCE add_executable(tolk tolk-main.cpp ${TOLK_SOURCE}) target_include_directories(tolk PUBLIC $) -target_link_libraries(tolk PUBLIC git ton_crypto) # todo replace with ton_crypto_core in the future +target_link_libraries(tolk PUBLIC git ton_crypto_core) if (WINGETOPT_FOUND) target_link_libraries_system(tolk wingetopt) endif () diff --git a/tolk/pipe-ast-to-legacy.cpp b/tolk/pipe-ast-to-legacy.cpp index c735698cd..050ef49d9 100644 --- a/tolk/pipe-ast-to-legacy.cpp +++ b/tolk/pipe-ast-to-legacy.cpp @@ -20,9 +20,9 @@ #include "compiler-state.h" #include "common/refint.h" #include "openssl/digest.hpp" -#include "block/block.h" -#include "block-parse.h" +#include "crypto/common/util.h" #include "td/utils/crypto.h" +#include "ton/ton-types.h" /* * In this module, we convert modern AST representation to legacy representation @@ -102,6 +102,64 @@ static void fire_error_invalid_mutate_arg_passed(SrcLocation loc, const SymDef* throw Fatal("unreachable"); } +// parse address like "EQCRDM9h4k3UJdOePPuyX40mCgA4vxge5Dc5vjBR8djbEKC5" +// based on unpack_std_smc_addr() from block.cpp +// (which is not included to avoid linking with ton_crypto) +static bool parse_friendly_address(const char packed[48], ton::WorkchainId& workchain, ton::StdSmcAddress& addr) { + unsigned char buffer[36]; + if (!td::buff_base64_decode(td::MutableSlice{buffer, 36}, td::Slice{packed, 48}, true)) { + return false; + } + td::uint16 crc = td::crc16(td::Slice{buffer, 34}); + if (buffer[34] != (crc >> 8) || buffer[35] != (crc & 0xff) || (buffer[0] & 0x3f) != 0x11) { + return false; + } + workchain = (td::int8)buffer[1]; + std::memcpy(addr.data(), buffer + 2, 32); + return true; +} + +// parse address like "0:527964d55cfa6eb731f4bfc07e9d025098097ef8505519e853986279bd8400d8" +// based on StdAddress::parse_addr() from block.cpp +// (which is not included to avoid linking with ton_crypto) +static bool parse_raw_address(const std::string& acc_string, int& workchain, ton::StdSmcAddress& addr) { + size_t pos = acc_string.find(':'); + if (pos != std::string::npos) { + td::Result r_wc = td::to_integer_safe(acc_string.substr(0, pos)); + if (r_wc.is_error()) { + return false; + } + workchain = r_wc.move_as_ok(); + pos++; + } else { + pos = 0; + } + if (acc_string.size() != pos + 64) { + return false; + } + + for (int i = 0; i < 64; ++i) { // loop through each hex digit + char c = acc_string[pos + i]; + int x; + if (c >= '0' && c <= '9') { + x = c - '0'; + } else if (c >= 'a' && c <= 'z') { + x = c - 'a' + 10; + } else if (c >= 'A' && c <= 'Z') { + x = c - 'A' + 10; + } else { + return false; + } + + if ((i & 1) == 0) { + addr.data()[i >> 1] = static_cast((addr.data()[i >> 1] & 0x0F) | (x << 4)); + } else { + addr.data()[i >> 1] = static_cast((addr.data()[i >> 1] & 0xF0) | x); + } + } + return true; +} + namespace blk_fl { enum { end = 1, ret = 2, empty = 4 }; typedef int val; @@ -577,14 +635,23 @@ static Expr* process_expr(V v) { } break; } - case 'a': { // MsgAddressInt - // todo rewrite stdaddress parsing (if done, CMake dep "ton_crypto" can be replaced with "ton_crypto_core") - block::StdAddress a; - if (a.parse_addr(str)) { - res->strval = block::tlb::MsgAddressInt().pack_std_address(a)->as_bitslice().to_hex(); - } else { + case 'a': { // MsgAddress + int workchain; + ton::StdSmcAddress addr; + bool correct = (str.size() == 48 && parse_friendly_address(str.data(), workchain, addr)) || + (str.size() != 48 && parse_raw_address(str, workchain, addr)); + if (!correct) { v->error("invalid standard address '" + str + "'"); } + if (workchain < -128 || workchain >= 128) { + v->error("anycast addresses not supported"); + } + + unsigned char data[3 + 8 + 256]; // addr_std$10 anycast:(Maybe Anycast) workchain_id:int8 address:bits256 = MsgAddressInt; + td::bitstring::bits_store_long_top(data, 0, static_cast(4) << (64 - 3), 3); + td::bitstring::bits_store_long_top(data, 3, static_cast(workchain) << (64 - 8), 8); + td::bitstring::bits_memcpy(data, 3 + 8, addr.bits().ptr, 0, addr.size()); + res->strval = td::BitSlice{data, sizeof(data)}.to_hex(); break; } case 'u': { From d110022731a62fe6da27e08958f03e4682d9f032 Mon Sep 17 00:00:00 2001 From: tolk-vm Date: Thu, 31 Oct 2024 11:22:18 +0400 Subject: [PATCH 12/12] [Tolk] Implement logical operators && || Unary logical NOT was already implemented earlier. Logical AND OR are expressed via conditional expression: * a && b -> a ? (b != 0) : 0 * a || b -> a ? 1 : (b != 0) They work as expected in any expressions. For instance, having `cond && f()`, f is called only if cond is true. For primitive cases, like `a > 0 && b > 0`, Fift code is not optimal, it could potentially be without IFs. These are moments of future optimizations. For now, it's more than enough. --- tolk-tester/tests/invalid-logical-1.tolk | 8 -- tolk-tester/tests/logical-operators.tolk | 158 +++++++++++++++++++++++ tolk/pipe-ast-to-legacy.cpp | 80 ++++++------ tolk/symtable.h | 4 +- 4 files changed, 204 insertions(+), 46 deletions(-) delete mode 100644 tolk-tester/tests/invalid-logical-1.tolk diff --git a/tolk-tester/tests/invalid-logical-1.tolk b/tolk-tester/tests/invalid-logical-1.tolk deleted file mode 100644 index 9aa210bbd..000000000 --- a/tolk-tester/tests/invalid-logical-1.tolk +++ /dev/null @@ -1,8 +0,0 @@ -fun main() { - return 1 && 2; -} - -/** -@compilation_should_fail -@stderr logical operators are not supported yet - */ diff --git a/tolk-tester/tests/logical-operators.tolk b/tolk-tester/tests/logical-operators.tolk index ec0e7a87c..e9774f3f4 100644 --- a/tolk-tester/tests/logical-operators.tolk +++ b/tolk-tester/tests/logical-operators.tolk @@ -57,6 +57,72 @@ fun testNotNull(x: int) { return [x == null, null == x, !(x == null), null == null, +(null != null)]; } +@method_id(106) +fun testAndConstCodegen() { + return ( + [1 && 0, 0 && 1, 0 && 0, 1 && 1], + [4 && 3 && 0, 5 && 0 && 7 && 8, (7 && 0) && -19], + [4 && 3 && -1, 5 && -100 && 7 && 8, (7 && (1 + 2)) && -19], + [true && false, true && true] + ); +} + +@method_id(107) +fun testOrConstCodegen() { + return ( + [1 || 0, 0 || 1, 0 || 0, 1 || 1], + [0 || 0 || 0, 0 || (0 || 0), ((0 || 0) || 0) || 0], + [4 || 3 || -1, 0 || -100 || 0 || 0, (0 || (1 + -1)) || -19], + [true || false, false || false] + ); +} + +global eqCallsCnt: int; + +fun eq(x: int) { return x; } +fun eqCnt(x: int) { eqCallsCnt += 1; return x; } +fun isGt0(x: int) { return x > 0; } + +fun alwaysThrows(): int { throw 444 ; return 444; } + +@method_id(108) +fun testAndSimpleCodegen(a: int, b: int) { + return a && b; +} + +@method_id(109) +fun testOrSimpleCodegen(a: int, b: int) { + return a > 0 || b > 0; +} + +@method_id(110) +fun testLogicalOps1(x: int) { + eqCallsCnt = 0; + return ( + isGt0(x) || !isGt0(x) || alwaysThrows(), + x && eqCnt(x) && eqCnt(x - 1) && eqCnt(x - 2), + (400 == eq(x)) && alwaysThrows(), + (500 == eq(x)) || eqCnt(x) || false, + (500 == eq(x)) || eqCnt(x) || true, + eqCallsCnt + ); +} + +@method_id(111) +fun testLogicalOps2(first: int) { + var s = beginCell().storeInt(1, 32).storeInt(2, 32).storeInt(3, 32).storeInt(4, 32).storeInt(5, 32).endCell().beginParse(); + var sum = 0; + if (first && s.loadUint(32)) { + (2 == s.loadUint(32)) && (sum += s.loadUint(32)); + (3 == s.loadUint(32)) && (sum += s.loadUint(32)); + (5 == s.preloadUint(32)) && (sum += s.loadUint(32)); + } else { + (10 == s.loadUint(32)) || (20 == s.loadUint(32)) || (3 == s.loadUint(32)) || (4 == s.loadUint(32)); + sum += s.loadUint(32); + } + return (s.getRemainingBitsCount(), sum); +} + fun main() { } @@ -80,6 +146,19 @@ fun main() { @testcase | 104 | 0 | 3 -1 5 @testcase | 105 | 0 | [ 0 0 -1 -1 0 ] @testcase | 105 | null | [ -1 -1 0 -1 0 ] +@testcase | 106 | | [ 0 0 0 -1 ] [ 0 0 0 ] [ -1 -1 -1 ] [ 0 -1 ] +@testcase | 107 | | [ -1 -1 0 -1 ] [ 0 0 0 ] [ -1 -1 -1 ] [ -1 0 ] +@testcase | 108 | 1 2 | -1 +@testcase | 108 | 1 0 | 0 +@testcase | 109 | -5 -4 | 0 +@testcase | 109 | -5 4 | -1 +@testcase | 109 | 1 99 | -1 +@testcase | 110 | 0 | -1 0 0 0 -1 2 +@testcase | 110 | 1 | -1 0 0 -1 -1 4 +@testcase | 110 | 2 | -1 0 0 -1 -1 5 +@testcase | 110 | 500 | -1 -1 0 -1 -1 3 +@testcase | 111 | 0 | 32 4 +@testcase | 111 | -1 | 0 8 @fif_codegen """ @@ -134,4 +213,83 @@ fun main() { }> """ +@fif_codegen +""" + testAndConstCodegen PROC:<{ + // + FALSE + 0 PUSHINT + DUP + TRUE + 4 TUPLE + FALSE + 0 PUSHINT + DUP + TRIPLE + TRUE + TRUE + TRUE + TRIPLE + FALSE + TRUE + PAIR + }> +""" + +@fif_codegen +""" + testOrConstCodegen PROC:<{ + // + -1 PUSHINT + TRUE + FALSE + s2 PUSH + 4 TUPLE + FALSE + FALSE + FALSE + TRIPLE + -1 PUSHINT + DUP + TRUE + TRIPLE + -1 PUSHINT + FALSE + PAIR + }> +""" + +Currently, && operator is implemented via ?: and is not optimal in primitive cases. +For example, `a && b` can be expressed without IFs. +These are moments of future optimizations. For now, it's more than enough. +@fif_codegen +""" + testAndSimpleCodegen PROC:<{ + // a b + SWAP // b a + IF:<{ // b + 0 NEQINT // _2 + }>ELSE<{ // b + DROP // + 0 PUSHINT // _2=0 + }> + }> +""" + +@fif_codegen +""" + testOrSimpleCodegen PROC:<{ + // a b + SWAP // b a + 0 GTINT // b _3 + IF:<{ // b + DROP // + -1 PUSHINT // _4=-1 + }>ELSE<{ // b + 0 GTINT // _7 + 0 NEQINT // _4 + }> + }> +""" + */ diff --git a/tolk/pipe-ast-to-legacy.cpp b/tolk/pipe-ast-to-legacy.cpp index 050ef49d9..7257bfb07 100644 --- a/tolk/pipe-ast-to-legacy.cpp +++ b/tolk/pipe-ast-to-legacy.cpp @@ -160,6 +160,22 @@ static bool parse_raw_address(const std::string& acc_string, int& workchain, ton return true; } +static Expr* create_expr_apply(SrcLocation loc, SymDef* sym, std::vector&& args) { + Expr* apply = new Expr(Expr::_Apply, sym, std::move(args)); + apply->here = loc; + apply->flags = Expr::_IsRvalue; + apply->deduce_type(); + return apply; +} + +static Expr* create_expr_int_const(SrcLocation loc, int int_val) { + Expr* int_const = new Expr(Expr::_Const, loc); + int_const->intval = td::make_refint(int_val); + int_const->flags = Expr::_IsRvalue; + int_const->e_type = TypeExpr::new_atomic(TypeExpr::_Int); + return int_const; +} + namespace blk_fl { enum { end = 1, ret = 2, empty = 4 }; typedef int val; @@ -238,13 +254,10 @@ static Expr* process_expr(V v, CodeBlob& code) { if (x->is_immutable()) { x->fire_error_modifying_immutable("left side of assignment"); } - sym_idx_t name = G.symbols.lookup_add("^_" + operator_name + "_"); + SymDef* sym = lookup_symbol(calc_sym_idx("^_" + operator_name + "_")); Expr* y = process_expr(v->get_rhs(), code); y->chk_rvalue(); - Expr* z = new Expr{Expr::_Apply, name, {x, y}}; - z->here = v->loc; - z->flags = Expr::_IsRvalue; - z->deduce_type(); + Expr* z = create_expr_apply(v->loc, sym, {x, y}); Expr* res = new Expr{Expr::_Letop, {x->copy(), z}}; res->here = v->loc; res->flags = x->flags | Expr::_IsRvalue; @@ -276,17 +289,27 @@ static Expr* process_expr(V v, CodeBlob& code) { t == tok_mul || t == tok_div || t == tok_mod || t == tok_divC || t == tok_divR) { Expr* res = process_expr(v->get_lhs(), code); res->chk_rvalue(); - sym_idx_t name = G.symbols.lookup_add("_" + operator_name + "_"); + SymDef* sym = lookup_symbol(calc_sym_idx("_" + operator_name + "_")); Expr* x = process_expr(v->get_rhs(), code); x->chk_rvalue(); - res = new Expr{Expr::_Apply, name, {res, x}}; - res->here = v->loc; - res->flags = Expr::_IsRvalue; - res->deduce_type(); + res = create_expr_apply(v->loc, sym, {res, x}); return res; } if (t == tok_logical_and || t == tok_logical_or) { - v->error("logical operators are not supported yet"); + // do the following transformations: + // a && b -> a ? (b != 0) : 0 + // a || b -> a ? 1 : (b != 0) + SymDef* sym_neq = lookup_symbol(calc_sym_idx("_!=_")); + Expr* lhs = process_expr(v->get_lhs(), code); + Expr* rhs = process_expr(v->get_rhs(), code); + Expr* e_neq0 = create_expr_apply(v->loc, sym_neq, {rhs, create_expr_int_const(v->loc, 0)}); + Expr* e_when_true = t == tok_logical_and ? e_neq0 : create_expr_int_const(v->loc, -1); + Expr* e_when_false = t == tok_logical_and ? create_expr_int_const(v->loc, 0) : e_neq0; + Expr* e_ternary = new Expr(Expr::_CondExpr, {lhs, e_when_true, e_when_false}); + e_ternary->here = v->loc; + e_ternary->flags = Expr::_IsRvalue; + e_ternary->deduce_type(); + return e_ternary; } v->error("unsupported binary operator"); @@ -294,7 +317,7 @@ static Expr* process_expr(V v, CodeBlob& code) { static Expr* process_expr(V v, CodeBlob& code) { TokenType t = v->tok; - sym_idx_t name = G.symbols.lookup_add(static_cast(v->operator_name) + "_"); + SymDef* sym = lookup_symbol(calc_sym_idx(static_cast(v->operator_name) + "_")); Expr* x = process_expr(v->get_rhs(), code); x->chk_rvalue(); @@ -316,11 +339,7 @@ static Expr* process_expr(V v, CodeBlob& code) { return x; } - auto res = new Expr{Expr::_Apply, name, {x}}; - res->here = v->loc; - res->flags = Expr::_IsRvalue; - res->deduce_type(); - return res; + return create_expr_apply(v->loc, sym, {x}); } static Expr* process_expr(V v, CodeBlob& code) { @@ -683,19 +702,12 @@ static Expr* process_expr(V v) { static Expr* process_expr(V v) { SymDef* builtin_sym = lookup_symbol(calc_sym_idx(v->bool_val ? "__true" : "__false")); - Expr* res = new Expr{Expr::_Apply, builtin_sym, {}}; - res->flags = Expr::_IsRvalue; - res->deduce_type(); - return res; + return create_expr_apply(v->loc, builtin_sym, {}); } static Expr* process_expr(V v) { SymDef* builtin_sym = lookup_symbol(calc_sym_idx("__null")); - Expr* res = new Expr{Expr::_Apply, builtin_sym, {}}; - res->here = v->loc; - res->flags = Expr::_IsRvalue; - res->deduce_type(); - return res; + return create_expr_apply(v->loc, builtin_sym, {}); } static Expr* process_expr(V v, CodeBlob& code) { @@ -1116,11 +1128,9 @@ static blk_fl::val process_vertex(V v, CodeBlob& code) { args.push_back(process_expr(v->get_thrown_code(), code)); } - Expr* expr = new Expr{Expr::_Apply, builtin_sym, std::move(args)}; - expr->here = v->loc; - expr->flags = Expr::_IsRvalue | Expr::_IsImpure; - expr->deduce_type(); - expr->pre_compile(code); + Expr* apply = create_expr_apply(v->loc, builtin_sym, std::move(args)); + apply->flags |= Expr::_IsImpure; + apply->pre_compile(code); return blk_fl::end; } @@ -1137,11 +1147,9 @@ static blk_fl::val process_vertex(V v, CodeBlob& code) { } SymDef* builtin_sym = lookup_symbol(calc_sym_idx("__throw_if_unless")); - Expr* expr = new Expr{Expr::_Apply, builtin_sym, std::move(args)}; - expr->here = v->loc; - expr->flags = Expr::_IsRvalue | Expr::_IsImpure; - expr->deduce_type(); - expr->pre_compile(code); + Expr* apply = create_expr_apply(v->loc, builtin_sym, std::move(args)); + apply->flags |= Expr::_IsImpure; + apply->pre_compile(code); return blk_fl::end; } diff --git a/tolk/symtable.h b/tolk/symtable.h index a52e8d333..69e2eaa8e 100644 --- a/tolk/symtable.h +++ b/tolk/symtable.h @@ -67,8 +67,8 @@ class SymTable { public: static constexpr sym_idx_t not_found = 0; - sym_idx_t lookup(std::string_view str, int mode = 0) { - return gen_lookup(str, mode); + sym_idx_t lookup(std::string_view str) { + return gen_lookup(str, 0); } sym_idx_t lookup_add(std::string_view str) { return gen_lookup(str, 1);