diff --git a/go.mod b/go.mod index 81aaf530fc..d1c067a684 100644 --- a/go.mod +++ b/go.mod @@ -34,7 +34,7 @@ require ( github.com/miekg/dns v1.1.59 github.com/moloch--/asciicast v0.1.0 github.com/moloch--/memmod v0.0.0-20230225130813-fd77d905589e - github.com/ncruces/go-sqlite3 v0.15.0 + github.com/ncruces/go-sqlite3 v0.16.3 github.com/reeflective/console v0.1.15 github.com/reeflective/readline v1.0.14 github.com/rsteube/carapace v0.46.3-0.20231214181515-27e49f3c3b69 @@ -42,7 +42,7 @@ require ( github.com/spf13/cobra v1.8.1 github.com/spf13/pflag v1.0.5 github.com/stretchr/testify v1.9.0 - github.com/tetratelabs/wazero v1.7.2 + github.com/tetratelabs/wazero v1.7.3 github.com/things-go/go-socks5 v0.0.5 github.com/ulikunitz/xz v0.5.12 github.com/xlab/treeprint v1.2.0 diff --git a/go.sum b/go.sum index 26afaea86d..40babc07cd 100644 --- a/go.sum +++ b/go.sum @@ -303,8 +303,8 @@ github.com/moloch--/asciicast v0.1.0 h1:eBOJwuFKSk447s/kPs9MWsc4kAl5HmuKIDLDYD6/ github.com/moloch--/asciicast v0.1.0/go.mod h1:OckO16UDLgxVLclrCnbocL1ix15Br/8Xv/caBoYq98o= github.com/moloch--/memmod v0.0.0-20230225130813-fd77d905589e h1:IkFCPlAa0iTiLxck+NqAwBx8JDlnHYm4orOQBbs4BDQ= github.com/moloch--/memmod v0.0.0-20230225130813-fd77d905589e/go.mod h1:eYeI6cQ5YHhHt9i0BBW0zc1DaQnb4ZMXsSPuEV/V5Og= -github.com/ncruces/go-sqlite3 v0.15.0 h1:C+SIrcYsAIR5GUYWmCnif6x81n6BS9y75vYcQynuGNU= -github.com/ncruces/go-sqlite3 v0.15.0/go.mod h1:kHHYmFmK4G2VFFoIovEg9BEQ8BP+D81y4ESHXnzJV/w= +github.com/ncruces/go-sqlite3 v0.16.3 h1:Ky0denOdmAGOoCE6lQlw6GCJNMD8gTikNWe8rpu+Gjc= +github.com/ncruces/go-sqlite3 v0.16.3/go.mod h1:sAU/vQwBmZ2hq5BlW/KTzqRFizL43bv2JQoBLgXhcMI= github.com/ncruces/go-strftime v0.1.9 h1:bY0MQC28UADQmHmaF5dgpLmImcShSi2kHU9XLdhx/f4= github.com/ncruces/go-strftime v0.1.9/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls= github.com/ncruces/julianday v1.0.0 h1:fH0OKwa7NWvniGQtxdJRxAgkBMolni2BjDHaWTxqt7M= @@ -387,8 +387,8 @@ github.com/tc-hib/winres v0.2.1 h1:YDE0FiP0VmtRaDn7+aaChp1KiF4owBiJa5l964l5ujA= github.com/tc-hib/winres v0.2.1/go.mod h1:C/JaNhH3KBvhNKVbvdlDWkbMDO9H4fKKDaN7/07SSuk= github.com/tcnksm/go-httpstat v0.2.0 h1:rP7T5e5U2HfmOBmZzGgGZjBQ5/GluWUylujl0tJ04I0= github.com/tcnksm/go-httpstat v0.2.0/go.mod h1:s3JVJFtQxtBEBC9dwcdTTXS9xFnM3SXAZwPG41aurT8= -github.com/tetratelabs/wazero v1.7.2 h1:1+z5nXJNwMLPAWaTePFi49SSTL0IMx/i3Fg8Yc25GDc= -github.com/tetratelabs/wazero v1.7.2/go.mod h1:ytl6Zuh20R/eROuyDaGPkp82O9C/DJfXAwJfQ3X6/7Y= +github.com/tetratelabs/wazero v1.7.3 h1:PBH5KVahrt3S2AHgEjKu4u+LlDbbk+nsGE3KLucy6Rw= +github.com/tetratelabs/wazero v1.7.3/go.mod h1:ytl6Zuh20R/eROuyDaGPkp82O9C/DJfXAwJfQ3X6/7Y= github.com/thedevsaddam/gojsonq/v2 v2.5.2 h1:CoMVaYyKFsVj6TjU6APqAhAvC07hTI6IQen8PHzHYY0= github.com/thedevsaddam/gojsonq/v2 v2.5.2/go.mod h1:bv6Xa7kWy82uT0LnXPE2SzGqTj33TAEeR560MdJkiXs= github.com/things-go/go-socks5 v0.0.5 h1:qvKaGcBkfDrUL33SchHN93srAmYGzb4CxSM2DPYufe8= diff --git a/vendor/github.com/ncruces/go-sqlite3/README.md b/vendor/github.com/ncruces/go-sqlite3/README.md index 3cf5b4afe0..25b2ccbe49 100644 --- a/vendor/github.com/ncruces/go-sqlite3/README.md +++ b/vendor/github.com/ncruces/go-sqlite3/README.md @@ -33,6 +33,8 @@ Go, wazero and [`x/sys`](https://pkg.go.dev/golang.org/x/sys) are the _only_ run provides the [`array`](https://sqlite.org/carray.html) table-valued function. - [`github.com/ncruces/go-sqlite3/ext/blobio`](https://pkg.go.dev/github.com/ncruces/go-sqlite3/ext/blobio) simplifies [incremental BLOB I/O](https://sqlite.org/c3ref/blob_open.html). +- [`github.com/ncruces/go-sqlite3/ext/bloom`](https://pkg.go.dev/github.com/ncruces/go-sqlite3/ext/bloom) + provides a [Bloom filter](https://github.com/nalgeon/sqlean/issues/27#issuecomment-1002267134) virtual table. - [`github.com/ncruces/go-sqlite3/ext/csv`](https://pkg.go.dev/github.com/ncruces/go-sqlite3/ext/csv) reads [comma-separated values](https://sqlite.org/csv.html). - [`github.com/ncruces/go-sqlite3/ext/fileio`](https://pkg.go.dev/github.com/ncruces/go-sqlite3/ext/fileio) @@ -51,12 +53,12 @@ Go, wazero and [`x/sys`](https://pkg.go.dev/golang.org/x/sys) are the _only_ run provides [Unicode aware](https://sqlite.org/src/dir/ext/icu) functions. - [`github.com/ncruces/go-sqlite3/ext/zorder`](https://pkg.go.dev/github.com/ncruces/go-sqlite3/ext/zorder) maps multidimensional data to one dimension. +- [`github.com/ncruces/go-sqlite3/vfs/adiantum`](https://pkg.go.dev/github.com/ncruces/go-sqlite3/vfs/adiantum) + wraps a VFS to offer encryption at rest. - [`github.com/ncruces/go-sqlite3/vfs/memdb`](https://pkg.go.dev/github.com/ncruces/go-sqlite3/vfs/memdb) implements an in-memory VFS. - [`github.com/ncruces/go-sqlite3/vfs/readervfs`](https://pkg.go.dev/github.com/ncruces/go-sqlite3/vfs/readervfs) implements a VFS for immutable databases. -- [`github.com/ncruces/go-sqlite3/vfs/adiantum`](https://pkg.go.dev/github.com/ncruces/go-sqlite3/vfs/adiantum) - wraps a VFS to offer encryption at rest. ### Advanced features @@ -88,7 +90,9 @@ It also benefits greatly from [SQLite's](https://sqlite.org/testing.html) and [wazero's](https://tetrate.io/blog/introducing-wazero-from-tetrate/#:~:text=Rock%2Dsolid%20test%20approach) thorough testing. Every commit is [tested](.github/workflows/test.yml) on -Linux (amd64/arm64/386/riscv64), macOS (amd64/arm64), Windows, FreeBSD and illumos. +Linux (amd64/arm64/386/riscv64/s390x), macOS (amd64/arm64), +Windows (amd64), FreeBSD (amd64), illumos (amd64), and Solaris (amd64). + The Go VFS is tested by running SQLite's [mptest](https://github.com/sqlite/sqlite/blob/master/mptest/mptest.c). diff --git a/vendor/github.com/ncruces/go-sqlite3/conn.go b/vendor/github.com/ncruces/go-sqlite3/conn.go index 8ba034feef..f170ccf57f 100644 --- a/vendor/github.com/ncruces/go-sqlite3/conn.go +++ b/vendor/github.com/ncruces/go-sqlite3/conn.go @@ -346,10 +346,9 @@ func (c *Conn) checkInterrupt() { } func progressCallback(ctx context.Context, mod api.Module, pDB uint32) (interrupt uint32) { - if c, ok := ctx.Value(connKey{}).(*Conn); ok && c.handle == pDB && c.interrupt != nil { - if c.interrupt.Err() != nil { - interrupt = 1 - } + if c, ok := ctx.Value(connKey{}).(*Conn); ok && c.handle == pDB && + c.interrupt != nil && c.interrupt.Err() != nil { + interrupt = 1 } return interrupt } @@ -363,6 +362,30 @@ func (c *Conn) BusyTimeout(timeout time.Duration) error { return c.error(r) } +func timeoutCallback(ctx context.Context, mod api.Module, pDB uint32, count, tmout int32) (retry uint32) { + if c, ok := ctx.Value(connKey{}).(*Conn); ok && + (c.interrupt == nil || c.interrupt.Err() == nil) { + const delays = "\x01\x02\x05\x0a\x0f\x14\x19\x19\x19\x32\x32\x64" + const totals = "\x00\x01\x03\x08\x12\x21\x35\x4e\x67\x80\xb2\xe4" + const ndelay = int32(len(delays) - 1) + + var delay, prior int32 + if count <= ndelay { + delay = int32(delays[count]) + prior = int32(totals[count]) + } else { + delay = int32(delays[ndelay]) + prior = int32(totals[ndelay]) + delay*(count-ndelay) + } + + if delay = min(delay, tmout-prior); delay > 0 { + time.Sleep(time.Duration(delay) * time.Millisecond) + retry = 1 + } + } + return retry +} + // BusyHandler registers a callback to handle [BUSY] errors. // // https://sqlite.org/c3ref/busy_handler.html @@ -380,7 +403,8 @@ func (c *Conn) BusyHandler(cb func(count int) (retry bool)) error { } func busyCallback(ctx context.Context, mod api.Module, pDB uint32, count int32) (retry uint32) { - if c, ok := ctx.Value(connKey{}).(*Conn); ok && c.handle == pDB && c.busy != nil { + if c, ok := ctx.Value(connKey{}).(*Conn); ok && c.handle == pDB && c.busy != nil && + (c.interrupt == nil || c.interrupt.Err() == nil) { if c.busy(int(count)) { retry = 1 } diff --git a/vendor/github.com/ncruces/go-sqlite3/driver/driver.go b/vendor/github.com/ncruces/go-sqlite3/driver/driver.go index b496f76ec6..e7863b1b8f 100644 --- a/vendor/github.com/ncruces/go-sqlite3/driver/driver.go +++ b/vendor/github.com/ncruces/go-sqlite3/driver/driver.go @@ -229,6 +229,7 @@ func (c *conn) Raw() *sqlite3.Conn { return c.Conn } +// Deprecated: use BeginTx instead. func (c *conn) Begin() (driver.Tx, error) { return c.BeginTx(context.Background(), driver.TxOptions{}) } @@ -301,7 +302,7 @@ func (c *conn) PrepareContext(ctx context.Context, query string) (driver.Stmt, e s.Close() return nil, util.TailErr } - return &stmt{Stmt: s, tmRead: c.tmRead, tmWrite: c.tmWrite}, nil + return &stmt{Stmt: s, tmRead: c.tmRead, tmWrite: c.tmWrite, inputs: -2}, nil } func (c *conn) ExecContext(ctx context.Context, query string, args []driver.NamedValue) (driver.Result, error) { @@ -335,6 +336,7 @@ type stmt struct { *sqlite3.Stmt tmWrite sqlite3.TimeFormat tmRead sqlite3.TimeFormat + inputs int } var ( @@ -345,12 +347,17 @@ var ( ) func (s *stmt) NumInput() int { + if s.inputs >= -1 { + return s.inputs + } n := s.Stmt.BindCount() for i := 1; i <= n; i++ { if s.Stmt.BindName(i) != "" { + s.inputs = -1 return -1 } } + s.inputs = n return n } @@ -389,12 +396,7 @@ func (s *stmt) QueryContext(ctx context.Context, args []driver.NamedValue) (driv return &rows{ctx: ctx, stmt: s}, nil } -func (s *stmt) setupBindings(args []driver.NamedValue) error { - err := s.Stmt.ClearBindings() - if err != nil { - return err - } - +func (s *stmt) setupBindings(args []driver.NamedValue) (err error) { var ids [3]int for _, arg := range args { ids := ids[:0] @@ -558,19 +560,20 @@ func (r *rows) Next(dest []driver.Value) error { return err } -func (r *rows) decodeTime(i int, v any) (_ time.Time, _ bool) { +func (r *rows) decodeTime(i int, v any) (_ time.Time, ok bool) { if r.tmRead == sqlite3.TimeFormatDefault { + // handled by maybeTime return } - switch r.declType(i) { - case "DATE", "TIME", "DATETIME", "TIMESTAMP": - // maybe + switch v.(type) { + case int64, float64, string: + // could be a time value default: return } - switch v.(type) { - case int64, float64, string: - // maybe + switch r.declType(i) { + case "DATE", "TIME", "DATETIME", "TIMESTAMP": + // could be a time value default: return } diff --git a/vendor/github.com/ncruces/go-sqlite3/embed/README.md b/vendor/github.com/ncruces/go-sqlite3/embed/README.md index 87281a83f8..0156f01763 100644 --- a/vendor/github.com/ncruces/go-sqlite3/embed/README.md +++ b/vendor/github.com/ncruces/go-sqlite3/embed/README.md @@ -1,6 +1,6 @@ # Embeddable Wasm build of SQLite -This folder includes an embeddable Wasm build of SQLite 3.45.3 for use with +This folder includes an embeddable Wasm build of SQLite 3.46.0 for use with [`github.com/ncruces/go-sqlite3`](https://pkg.go.dev/github.com/ncruces/go-sqlite3). The following optional features are compiled in: @@ -10,6 +10,7 @@ The following optional features are compiled in: - [R*Tree](https://sqlite.org/rtree.html) - [GeoPoly](https://sqlite.org/geopoly.html) - [soundex](https://sqlite.org/lang_corefunc.html#soundex) +- [stat4](https://sqlite.org/compile.html#enable_stat4) - [base64](https://github.com/sqlite/sqlite/blob/master/ext/misc/base64.c) - [decimal](https://github.com/sqlite/sqlite/blob/master/ext/misc/decimal.c) - [ieee754](https://github.com/sqlite/sqlite/blob/master/ext/misc/ieee754.c) @@ -23,4 +24,7 @@ See the [configuration options](../sqlite3/sqlite_cfg.h), and [patches](../sqlite3) applied. Built using [`wasi-sdk`](https://github.com/WebAssembly/wasi-sdk), -and [`binaryen`](https://github.com/WebAssembly/binaryen). \ No newline at end of file +and [`binaryen`](https://github.com/WebAssembly/binaryen). + +The build is easily reproducible, and verifiable, using +[Artifact Attestations](https://github.com/ncruces/go-sqlite3/attestations). \ No newline at end of file diff --git a/vendor/github.com/ncruces/go-sqlite3/embed/build.sh b/vendor/github.com/ncruces/go-sqlite3/embed/build.sh index c50112c0bf..abe5e60c46 100644 --- a/vendor/github.com/ncruces/go-sqlite3/embed/build.sh +++ b/vendor/github.com/ncruces/go-sqlite3/embed/build.sh @@ -8,7 +8,7 @@ BINARYEN="$ROOT/tools/binaryen-version_117/bin" WASI_SDK="$ROOT/tools/wasi-sdk-22.0/bin" "$WASI_SDK/clang" --target=wasm32-wasi -std=c17 -flto -g0 -O2 \ - -Wall -Wextra -Wno-unused-parameter \ + -Wall -Wextra -Wno-unused-parameter -Wno-unused-function \ -o sqlite3.wasm "$ROOT/sqlite3/main.c" \ -I"$ROOT/sqlite3" \ -mexec-model=reactor \ @@ -20,6 +20,7 @@ WASI_SDK="$ROOT/tools/wasi-sdk-22.0/bin" -Wl,--stack-first \ -Wl,--import-undefined \ -D_HAVE_SQLITE_CONFIG_H \ + -DSQLITE_CUSTOM_INCLUDE=sqlite_opt.h \ $(awk '{print "-Wl,--export="$0}' exports.txt) trap 'rm -f sqlite3.tmp' EXIT diff --git a/vendor/github.com/ncruces/go-sqlite3/embed/exports.txt b/vendor/github.com/ncruces/go-sqlite3/embed/exports.txt index da16316bbe..b3cb1581c9 100644 --- a/vendor/github.com/ncruces/go-sqlite3/embed/exports.txt +++ b/vendor/github.com/ncruces/go-sqlite3/embed/exports.txt @@ -36,10 +36,13 @@ sqlite3_collation_needed_go sqlite3_column_blob sqlite3_column_bytes sqlite3_column_count +sqlite3_column_database_name sqlite3_column_decltype sqlite3_column_double sqlite3_column_int64 sqlite3_column_name +sqlite3_column_origin_name +sqlite3_column_table_name sqlite3_column_text sqlite3_column_type sqlite3_column_value diff --git a/vendor/github.com/ncruces/go-sqlite3/embed/sqlite3.wasm b/vendor/github.com/ncruces/go-sqlite3/embed/sqlite3.wasm index 2f36b2948d..8dfc7da0c6 100644 Binary files a/vendor/github.com/ncruces/go-sqlite3/embed/sqlite3.wasm and b/vendor/github.com/ncruces/go-sqlite3/embed/sqlite3.wasm differ diff --git a/vendor/github.com/ncruces/go-sqlite3/go.work.sum b/vendor/github.com/ncruces/go-sqlite3/go.work.sum index 5a6f903504..27b395cc71 100644 --- a/vendor/github.com/ncruces/go-sqlite3/go.work.sum +++ b/vendor/github.com/ncruces/go-sqlite3/go.work.sum @@ -1,8 +1,7 @@ -golang.org/x/mod v0.8.0 h1:LUYupSeNrTNCGzR/hVBk2NHZO4hXcVaW1k4Qx7rjPx8= golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= -golang.org/x/net v0.21.0 h1:AQyQV4dYCvJ7vGmJyKki9+PBdyvhkSd8EIx/qb0AYv4= +golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44= -golang.org/x/term v0.19.0 h1:+ThwsDv+tYfnJFhF4L8jITxu1tdTWRTZpdsWgEgjL6Q= -golang.org/x/term v0.19.0/go.mod h1:2CuTdWZ7KHSQwUzKva0cbMg6q2DMI3Mmxp+gKJbskEk= -golang.org/x/tools v0.6.0 h1:BOw41kyTf3PuCW1pVQf8+Cyg8pMlkYB1oo9iJ6D/lKM= +golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY= +golang.org/x/term v0.21.0/go.mod h1:ooXLefLobQVslOqselCNF4SxFAaoS6KujMbsGzSDmX0= golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= +golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk= diff --git a/vendor/github.com/ncruces/go-sqlite3/internal/alloc/alloc_other.go b/vendor/github.com/ncruces/go-sqlite3/internal/alloc/alloc_other.go new file mode 100644 index 0000000000..ded8da1087 --- /dev/null +++ b/vendor/github.com/ncruces/go-sqlite3/internal/alloc/alloc_other.go @@ -0,0 +1,9 @@ +//go:build !(unix || windows) || sqlite3_nosys + +package alloc + +import "github.com/tetratelabs/wazero/experimental" + +func Virtual(cap, max uint64) experimental.LinearMemory { + return Slice(cap, max) +} diff --git a/vendor/github.com/ncruces/go-sqlite3/internal/alloc/alloc_slice.go b/vendor/github.com/ncruces/go-sqlite3/internal/alloc/alloc_slice.go new file mode 100644 index 0000000000..5072ca9c18 --- /dev/null +++ b/vendor/github.com/ncruces/go-sqlite3/internal/alloc/alloc_slice.go @@ -0,0 +1,24 @@ +//go:build !(darwin || linux) || !(amd64 || arm64 || riscv64) || sqlite3_noshm || sqlite3_nosys + +package alloc + +import "github.com/tetratelabs/wazero/experimental" + +func Slice(cap, _ uint64) experimental.LinearMemory { + return &sliceMemory{make([]byte, 0, cap)} +} + +type sliceMemory struct { + buf []byte +} + +func (b *sliceMemory) Free() {} + +func (b *sliceMemory) Reallocate(size uint64) []byte { + if cap := uint64(cap(b.buf)); size > cap { + b.buf = append(b.buf[:cap], make([]byte, size-cap)...) + } else { + b.buf = b.buf[:size] + } + return b.buf +} diff --git a/vendor/github.com/ncruces/go-sqlite3/internal/util/alloc.go b/vendor/github.com/ncruces/go-sqlite3/internal/alloc/alloc_unix.go similarity index 79% rename from vendor/github.com/ncruces/go-sqlite3/internal/util/alloc.go rename to vendor/github.com/ncruces/go-sqlite3/internal/alloc/alloc_unix.go index 23c5382375..39a3a38ccf 100644 --- a/vendor/github.com/ncruces/go-sqlite3/internal/util/alloc.go +++ b/vendor/github.com/ncruces/go-sqlite3/internal/alloc/alloc_unix.go @@ -1,6 +1,6 @@ -//go:build unix +//go:build unix && !sqlite3_nosys -package util +package alloc import ( "math" @@ -9,30 +9,24 @@ import ( "golang.org/x/sys/unix" ) -func mmappedAllocator(cap, max uint64) experimental.LinearMemory { +func Virtual(_, max uint64) experimental.LinearMemory { // Round up to the page size. rnd := uint64(unix.Getpagesize() - 1) max = (max + rnd) &^ rnd - cap = (cap + rnd) &^ rnd if max > math.MaxInt { // This ensures int(max) overflows to a negative value, // and unix.Mmap returns EINVAL. max = math.MaxUint64 } + // Reserve max bytes of address space, to ensure we won't need to move it. // A protected, private, anonymous mapping should not commit memory. b, err := unix.Mmap(-1, 0, int(max), unix.PROT_NONE, unix.MAP_PRIVATE|unix.MAP_ANON) if err != nil { panic(err) } - // Commit the initial cap bytes of memory. - err = unix.Mprotect(b[:cap], unix.PROT_READ|unix.PROT_WRITE) - if err != nil { - unix.Munmap(b) - panic(err) - } - return &mmappedMemory{buf: b[:cap]} + return &mmappedMemory{buf: b[:0]} } // The slice covers the entire mmapped memory: @@ -43,7 +37,9 @@ type mmappedMemory struct { } func (m *mmappedMemory) Reallocate(size uint64) []byte { - if com := uint64(len(m.buf)); com < size { + com := uint64(len(m.buf)) + res := uint64(cap(m.buf)) + if com < size && size < res { // Round up to the page size. rnd := uint64(unix.Getpagesize() - 1) new := (size + rnd) &^ rnd diff --git a/vendor/github.com/ncruces/go-sqlite3/internal/alloc/alloc_windows.go b/vendor/github.com/ncruces/go-sqlite3/internal/alloc/alloc_windows.go new file mode 100644 index 0000000000..27d875f2e3 --- /dev/null +++ b/vendor/github.com/ncruces/go-sqlite3/internal/alloc/alloc_windows.go @@ -0,0 +1,76 @@ +//go:build !sqlite3_nosys + +package alloc + +import ( + "math" + "reflect" + "unsafe" + + "github.com/tetratelabs/wazero/experimental" + "golang.org/x/sys/windows" +) + +func Virtual(_, max uint64) experimental.LinearMemory { + // Round up to the page size. + rnd := uint64(windows.Getpagesize() - 1) + max = (max + rnd) &^ rnd + + if max > math.MaxInt { + // This ensures uintptr(max) overflows to a large value, + // and windows.VirtualAlloc returns an error. + max = math.MaxUint64 + } + + // Reserve max bytes of address space, to ensure we won't need to move it. + // This does not commit memory. + r, err := windows.VirtualAlloc(0, uintptr(max), windows.MEM_RESERVE, windows.PAGE_READWRITE) + if err != nil { + panic(err) + } + + mem := virtualMemory{addr: r} + // SliceHeader, although deprecated, avoids a go vet warning. + sh := (*reflect.SliceHeader)(unsafe.Pointer(&mem.buf)) + sh.Cap = int(max) + sh.Data = r + return &mem +} + +// The slice covers the entire mmapped memory: +// - len(buf) is the already committed memory, +// - cap(buf) is the reserved address space. +type virtualMemory struct { + buf []byte + addr uintptr +} + +func (m *virtualMemory) Reallocate(size uint64) []byte { + com := uint64(len(m.buf)) + res := uint64(cap(m.buf)) + if com < size && size < res { + // Round up to the page size. + rnd := uint64(windows.Getpagesize() - 1) + new := (size + rnd) &^ rnd + + // Commit additional memory up to new bytes. + _, err := windows.VirtualAlloc(m.addr, uintptr(new), windows.MEM_COMMIT, windows.PAGE_READWRITE) + if err != nil { + panic(err) + } + + // Update committed memory. + m.buf = m.buf[:new] + } + // Limit returned capacity because bytes beyond + // len(m.buf) have not yet been committed. + return m.buf[:size:len(m.buf)] +} + +func (m *virtualMemory) Free() { + err := windows.VirtualFree(m.addr, 0, windows.MEM_RELEASE) + if err != nil { + panic(err) + } + m.addr = 0 +} diff --git a/vendor/github.com/ncruces/go-sqlite3/internal/util/const.go b/vendor/github.com/ncruces/go-sqlite3/internal/util/const.go index 86bb9749db..5e89018ddf 100644 --- a/vendor/github.com/ncruces/go-sqlite3/internal/util/const.go +++ b/vendor/github.com/ncruces/go-sqlite3/internal/util/const.go @@ -1,6 +1,6 @@ package util -// https://sqlite.com/matrix/rescode.html +// https://sqlite.com/rescode.html const ( OK = 0 /* Successful result */ diff --git a/vendor/github.com/ncruces/go-sqlite3/internal/util/json.go b/vendor/github.com/ncruces/go-sqlite3/internal/util/json.go index c0ba38cf05..7f6849a429 100644 --- a/vendor/github.com/ncruces/go-sqlite3/internal/util/json.go +++ b/vendor/github.com/ncruces/go-sqlite3/internal/util/json.go @@ -26,7 +26,7 @@ func (j JSON) Scan(value any) error { buf = v.AppendFormat(buf, time.RFC3339Nano) buf = append(buf, '"') case nil: - buf = append(buf, "null"...) + buf = []byte("null") default: panic(AssertErr()) } diff --git a/vendor/github.com/ncruces/go-sqlite3/internal/util/mmap.go b/vendor/github.com/ncruces/go-sqlite3/internal/util/mmap.go index 26f30beaa8..a4fa2a25aa 100644 --- a/vendor/github.com/ncruces/go-sqlite3/internal/util/mmap.go +++ b/vendor/github.com/ncruces/go-sqlite3/internal/util/mmap.go @@ -1,4 +1,4 @@ -//go:build (darwin || linux) && (amd64 || arm64 || riscv64) && !(sqlite3_flock || sqlite3_noshm || sqlite3_nosys) +//go:build unix && (amd64 || arm64 || riscv64) && !(sqlite3_noshm || sqlite3_nosys) package util @@ -7,14 +7,15 @@ import ( "os" "unsafe" + "github.com/ncruces/go-sqlite3/internal/alloc" "github.com/tetratelabs/wazero/api" "github.com/tetratelabs/wazero/experimental" "golang.org/x/sys/unix" ) -func withMmappedAllocator(ctx context.Context) context.Context { +func withAllocator(ctx context.Context) context.Context { return experimental.WithMemoryAllocator(ctx, - experimental.MemoryAllocatorFunc(mmappedAllocator)) + experimental.MemoryAllocatorFunc(alloc.Virtual)) } type mmapState struct { diff --git a/vendor/github.com/ncruces/go-sqlite3/internal/util/mmap_other.go b/vendor/github.com/ncruces/go-sqlite3/internal/util/mmap_other.go index 89631e0939..a0a3ba67d4 100644 --- a/vendor/github.com/ncruces/go-sqlite3/internal/util/mmap_other.go +++ b/vendor/github.com/ncruces/go-sqlite3/internal/util/mmap_other.go @@ -1,11 +1,22 @@ -//go:build !(darwin || linux) || !(amd64 || arm64 || riscv64) || sqlite3_flock || sqlite3_noshm || sqlite3_nosys +//go:build !unix || !(amd64 || arm64 || riscv64) || sqlite3_noshm || sqlite3_nosys package util -import "context" +import ( + "context" + + "github.com/ncruces/go-sqlite3/internal/alloc" + "github.com/tetratelabs/wazero/experimental" +) type mmapState struct{} -func withMmappedAllocator(ctx context.Context) context.Context { - return ctx +func withAllocator(ctx context.Context) context.Context { + return experimental.WithMemoryAllocator(ctx, + experimental.MemoryAllocatorFunc(func(cap, max uint64) experimental.LinearMemory { + if cap == max { + return alloc.Virtual(cap, max) + } + return alloc.Slice(cap, max) + })) } diff --git a/vendor/github.com/ncruces/go-sqlite3/internal/util/module.go b/vendor/github.com/ncruces/go-sqlite3/internal/util/module.go index 20b17b2097..22793e972e 100644 --- a/vendor/github.com/ncruces/go-sqlite3/internal/util/module.go +++ b/vendor/github.com/ncruces/go-sqlite3/internal/util/module.go @@ -14,7 +14,7 @@ type moduleState struct { func NewContext(ctx context.Context) context.Context { state := new(moduleState) - ctx = withMmappedAllocator(ctx) + ctx = withAllocator(ctx) ctx = experimental.WithCloseNotifier(ctx, state) ctx = context.WithValue(ctx, moduleKey{}, state) return ctx diff --git a/vendor/github.com/ncruces/go-sqlite3/json.go b/vendor/github.com/ncruces/go-sqlite3/json.go index 9b2565e87c..2b762c092f 100644 --- a/vendor/github.com/ncruces/go-sqlite3/json.go +++ b/vendor/github.com/ncruces/go-sqlite3/json.go @@ -5,7 +5,8 @@ import "github.com/ncruces/go-sqlite3/internal/util" // JSON returns a value that can be used as an argument to // [database/sql.DB.Exec], [database/sql.Row.Scan] and similar methods to // store value as JSON, or decode JSON into value. -// JSON should NOT be used with [BindJSON] or [ResultJSON]. +// JSON should NOT be used with [Stmt.BindJSON], [Stmt.ColumnJSON], +// [Value.JSON], or [Context.ResultJSON]. func JSON(value any) any { return util.JSON{Value: value} } diff --git a/vendor/github.com/ncruces/go-sqlite3/pointer.go b/vendor/github.com/ncruces/go-sqlite3/pointer.go index 611c1528ce..0e2418b992 100644 --- a/vendor/github.com/ncruces/go-sqlite3/pointer.go +++ b/vendor/github.com/ncruces/go-sqlite3/pointer.go @@ -4,7 +4,8 @@ import "github.com/ncruces/go-sqlite3/internal/util" // Pointer returns a pointer to a value that can be used as an argument to // [database/sql.DB.Exec] and similar methods. -// Pointer should NOT be used with [BindPointer] or [ResultPointer]. +// Pointer should NOT be used with [Stmt.BindPointer], +// [Value.Pointer], or [Context.ResultPointer]. // // https://sqlite.org/bindptr.html func Pointer[T any](value T) any { diff --git a/vendor/github.com/ncruces/go-sqlite3/sqlite.go b/vendor/github.com/ncruces/go-sqlite3/sqlite.go index a446ec0d1e..61a03652fd 100644 --- a/vendor/github.com/ncruces/go-sqlite3/sqlite.go +++ b/vendor/github.com/ncruces/go-sqlite3/sqlite.go @@ -28,6 +28,14 @@ var ( RuntimeConfig wazero.RuntimeConfig ) +// Initialize decodes and compiles the SQLite Wasm binary. +// This is called implicitly when the first connection is openned, +// but is potentially slow, so you may want to call it at a more convenient time. +func Initialize() error { + instance.once.Do(compileSQLite) + return instance.err +} + var instance struct { runtime wazero.Runtime compiled wazero.CompiledModule @@ -79,9 +87,8 @@ type sqlite struct { } func instantiateSQLite() (sqlt *sqlite, err error) { - instance.once.Do(compileSQLite) - if instance.err != nil { - return nil, instance.err + if err := Initialize(); err != nil { + return nil, err } sqlt = new(sqlite) @@ -289,8 +296,9 @@ func (a *arena) string(s string) uint32 { } func exportCallbacks(env wazero.HostModuleBuilder) wazero.HostModuleBuilder { - util.ExportFuncIII(env, "go_busy_handler", busyCallback) util.ExportFuncII(env, "go_progress_handler", progressCallback) + util.ExportFuncIIII(env, "go_busy_timeout", timeoutCallback) + util.ExportFuncIII(env, "go_busy_handler", busyCallback) util.ExportFuncII(env, "go_commit_hook", commitCallback) util.ExportFuncVI(env, "go_rollback_hook", rollbackCallback) util.ExportFuncVIIIIJ(env, "go_update_hook", updateCallback) diff --git a/vendor/github.com/ncruces/go-sqlite3/stmt.go b/vendor/github.com/ncruces/go-sqlite3/stmt.go index fc26b1e95e..381a7d06ba 100644 --- a/vendor/github.com/ncruces/go-sqlite3/stmt.go +++ b/vendor/github.com/ncruces/go-sqlite3/stmt.go @@ -367,12 +367,10 @@ func (s *Stmt) ColumnCount() int { func (s *Stmt) ColumnName(col int) string { r := s.c.call("sqlite3_column_name", uint64(s.handle), uint64(col)) - - ptr := uint32(r) - if ptr == 0 { + if r == 0 { panic(util.OOMErr) } - return util.ReadString(s.c.mod, ptr, _MAX_NAME) + return util.ReadString(s.c.mod, uint32(r), _MAX_NAME) } // ColumnType returns the initial [Datatype] of the result column. @@ -398,15 +396,57 @@ func (s *Stmt) ColumnDeclType(col int) string { return util.ReadString(s.c.mod, uint32(r), _MAX_NAME) } +// ColumnDatabaseName returns the name of the database +// that is the origin of a particular result column. +// The leftmost column of the result set has the index 0. +// +// https://sqlite.org/c3ref/column_database_name.html +func (s *Stmt) ColumnDatabaseName(col int) string { + r := s.c.call("sqlite3_column_database_name", + uint64(s.handle), uint64(col)) + if r == 0 { + return "" + } + return util.ReadString(s.c.mod, uint32(r), _MAX_NAME) +} + +// ColumnTableName returns the name of the table +// that is the origin of a particular result column. +// The leftmost column of the result set has the index 0. +// +// https://sqlite.org/c3ref/column_database_name.html +func (s *Stmt) ColumnTableName(col int) string { + r := s.c.call("sqlite3_column_table_name", + uint64(s.handle), uint64(col)) + if r == 0 { + return "" + } + return util.ReadString(s.c.mod, uint32(r), _MAX_NAME) +} + +// ColumnOriginName returns the name of the table column +// that is the origin of a particular result column. +// The leftmost column of the result set has the index 0. +// +// https://sqlite.org/c3ref/column_database_name.html +func (s *Stmt) ColumnOriginName(col int) string { + r := s.c.call("sqlite3_column_origin_name", + uint64(s.handle), uint64(col)) + if r == 0 { + return "" + } + return util.ReadString(s.c.mod, uint32(r), _MAX_NAME) +} + // ColumnBool returns the value of the result column as a bool. // The leftmost column of the result set has the index 0. // SQLite does not have a separate boolean storage class. -// Instead, boolean values are retrieved as integers, +// Instead, boolean values are retrieved as numbers, // with 0 converted to false and any other value to true. // // https://sqlite.org/c3ref/column_blob.html func (s *Stmt) ColumnBool(col int) bool { - return s.ColumnInt64(col) != 0 + return s.ColumnFloat(col) != 0 } // ColumnInt returns the value of the result column as an int. @@ -524,7 +564,7 @@ func (s *Stmt) ColumnJSON(col int, ptr any) error { var data []byte switch s.ColumnType(col) { case NULL: - data = append(data, "null"...) + data = []byte("null") case TEXT: data = s.ColumnRawText(col) case BLOB: diff --git a/vendor/github.com/ncruces/go-sqlite3/time.go b/vendor/github.com/ncruces/go-sqlite3/time.go index a14870ea54..0164a307b6 100644 --- a/vendor/github.com/ncruces/go-sqlite3/time.go +++ b/vendor/github.com/ncruces/go-sqlite3/time.go @@ -101,7 +101,7 @@ func (f TimeFormat) Encode(t time.Time) any { return t.UnixMicro() case TimeFormatUnixNano: return t.UnixNano() - // Special formats + // Special formats. case TimeFormatDefault, TimeFormatAuto: f = time.RFC3339Nano // SQLite assumes UTC if unspecified. @@ -139,7 +139,7 @@ func (f TimeFormat) Encode(t time.Time) any { // https://sqlite.org/lang_datefunc.html func (f TimeFormat) Decode(v any) (time.Time, error) { switch f { - // Numeric formats + // Numeric formats. case TimeFormatJulianDay: switch v := v.(type) { case string: @@ -183,7 +183,7 @@ func (f TimeFormat) Decode(v any) (time.Time, error) { case float64: return time.UnixMilli(int64(math.Floor(v))).UTC(), nil case int64: - return time.UnixMilli(int64(v)).UTC(), nil + return time.UnixMilli(v).UTC(), nil default: return time.Time{}, util.TimeErr } @@ -200,7 +200,7 @@ func (f TimeFormat) Decode(v any) (time.Time, error) { case float64: return time.UnixMicro(int64(math.Floor(v))).UTC(), nil case int64: - return time.UnixMicro(int64(v)).UTC(), nil + return time.UnixMicro(v).UTC(), nil default: return time.Time{}, util.TimeErr } @@ -217,12 +217,12 @@ func (f TimeFormat) Decode(v any) (time.Time, error) { case float64: return time.Unix(0, int64(math.Floor(v))).UTC(), nil case int64: - return time.Unix(0, int64(v)).UTC(), nil + return time.Unix(0, v).UTC(), nil default: return time.Time{}, util.TimeErr } - // Special formats + // Special formats. case TimeFormatAuto: switch s := v.(type) { case string: diff --git a/vendor/github.com/ncruces/go-sqlite3/value.go b/vendor/github.com/ncruces/go-sqlite3/value.go index 61d3cbf709..1894ff4f10 100644 --- a/vendor/github.com/ncruces/go-sqlite3/value.go +++ b/vendor/github.com/ncruces/go-sqlite3/value.go @@ -68,12 +68,12 @@ func (v Value) NumericType() Datatype { // Bool returns the value as a bool. // SQLite does not have a separate boolean storage class. -// Instead, boolean values are retrieved as integers, +// Instead, boolean values are retrieved as numbers, // with 0 converted to false and any other value to true. // // https://sqlite.org/c3ref/value_blob.html func (v Value) Bool() bool { - return v.Int64() != 0 + return v.Float() != 0 } // Int returns the value as an int. @@ -177,7 +177,7 @@ func (v Value) JSON(ptr any) error { var data []byte switch v.Type() { case NULL: - data = append(data, "null"...) + data = []byte("null") case TEXT: data = v.RawText() case BLOB: diff --git a/vendor/github.com/ncruces/go-sqlite3/vfs/README.md b/vendor/github.com/ncruces/go-sqlite3/vfs/README.md index 212ad6d335..741a1b6a49 100644 --- a/vendor/github.com/ncruces/go-sqlite3/vfs/README.md +++ b/vendor/github.com/ncruces/go-sqlite3/vfs/README.md @@ -2,15 +2,14 @@ This package implements the SQLite [OS Interface](https://sqlite.org/vfs.html) (aka VFS). -It replaces the default SQLite VFS with a **pure Go** implementation. - -It also exposes [interfaces](https://pkg.go.dev/github.com/ncruces/go-sqlite3/vfs#VFS) +It replaces the default SQLite VFS with a **pure Go** implementation, +and exposes [interfaces](https://pkg.go.dev/github.com/ncruces/go-sqlite3/vfs#VFS) that should allow you to implement your own custom VFSes. Since it is a from scratch reimplementation, there are naturally some ways it deviates from the original. -The main differences are [file locking](#file-locking) and [WAL mode](write-ahead-logging) support. +The main differences are [file locking](#file-locking) and [WAL mode](#write-ahead-logging) support. ### File Locking @@ -22,58 +21,72 @@ On Linux and macOS, this module uses to synchronize access to database files. OFD locks are fully compatible with POSIX advisory locks. -On BSD Unixes, this module uses -[BSD locks](https://man.freebsd.org/cgi/man.cgi?query=flock&sektion=2). -On BSD, these locks are fully compatible with POSIX advisory locks. -However, concurrency is reduced with BSD locks -(`BEGIN IMMEDIATE` behaves the same as `BEGIN EXCLUSIVE`). +This module can also use +[BSD locks](https://man.freebsd.org/cgi/man.cgi?query=flock&sektion=2), +albeit with reduced concurrency (`BEGIN IMMEDIATE` behaves like `BEGIN EXCLUSIVE`). +On BSD, macOS, and illumos, BSD locks are fully compatible with POSIX advisory locks; +on Linux and z/OS, they are fully functional, but incompatible; +elsewhere, they are very likely broken. +BSD locks are the default on BSD and illumos, +but you can opt into them with the `sqlite3_flock` build tag. On Windows, this module uses `LockFileEx` and `UnlockFileEx`, like SQLite. -On all other platforms, file locking is not supported, and you must use +Otherwise, file locking is not supported, and you must use [`nolock=1`](https://sqlite.org/uri.html#urinolock) (or [`immutable=1`](https://sqlite.org/uri.html#uriimmutable)) -to open database files.\ +to open database files. To use the [`database/sql`](https://pkg.go.dev/database/sql) driver with `nolock=1` you must disable connection pooling by calling [`db.SetMaxOpenConns(1)`](https://pkg.go.dev/database/sql#DB.SetMaxOpenConns). You can use [`vfs.SupportsFileLocking`](https://pkg.go.dev/github.com/ncruces/go-sqlite3/vfs#SupportsFileLocking) -to check if your platform supports file locking. +to check if your build supports file locking. ### Write-Ahead Logging -On 64-bit Linux and macOS, this module uses `mmap` to implement +On 64-bit Unix, this module uses `mmap` to implement [shared-memory for the WAL-index](https://sqlite.org/wal.html#implementation_of_shared_memory_for_the_wal_index), like SQLite. -To allow `mmap` to work, each connection needs to reserve up to 4GB of address space.\ -To limit the amount of address space each connection needs, +To allow `mmap` to work, each connection needs to reserve up to 4GB of address space. +To limit the address space each connection reserves, use [`WithMemoryLimitPages`](../tests/testcfg/testcfg.go). -On Windows and BSD, [WAL](https://sqlite.org/wal.html) support is -[limited](https://sqlite.org/wal.html#noshm). -`EXCLUSIVE` locking mode can be set to create, read, and write WAL databases.\ +With [BSD locks](https://man.freebsd.org/cgi/man.cgi?query=flock&sektion=2) +a WAL database can only be accessed by a single proccess. +Other processes that attempt to access a database locked with BSD locks, +will fail with the `SQLITE_PROTOCOL` error code. + +Otherwise, [WAL support is limited](https://sqlite.org/wal.html#noshm), +and `EXCLUSIVE` locking mode must be set to create, read, and write WAL databases. To use `EXCLUSIVE` locking mode with the [`database/sql`](https://pkg.go.dev/database/sql) driver you must disable connection pooling by calling [`db.SetMaxOpenConns(1)`](https://pkg.go.dev/database/sql#DB.SetMaxOpenConns). -On all other platforms, where file locking is not supported, WAL mode does not work. - You can use [`vfs.SupportsSharedMemory`](https://pkg.go.dev/github.com/ncruces/go-sqlite3/vfs#SupportsSharedMemory) -to check if your platform supports shared memory. +to check if your build supports shared memory. ### Batch-Atomic Write On 64-bit Linux, this module supports [batch-atomic writes](https://sqlite.org/cgi/src/technote/714) -with the F2FS filesystem. +on the F2FS filesystem. -### Build tags +### Build Tags The VFS can be customized with a few build tags: -- `sqlite3_flock` forces the use of BSD locks; it can be used on macOS to test the BSD locking implementation. +- `sqlite3_flock` forces the use of BSD locks; it can be used on z/OS to enable locking, + and elsewhere to test BSD locks. - `sqlite3_nosys` prevents importing [`x/sys`](https://pkg.go.dev/golang.org/x/sys); disables locking _and_ shared memory on all platforms. - `sqlite3_noshm` disables shared memory on all platforms. + +> [!IMPORTANT] +> The default configuration of this package is compatible with the standard +> [Unix and Windows SQLite VFSes](https://sqlite.org/vfs.html#multiple_vfses); +> `sqlite3_flock` builds are compatible with the +> [`unix-flock` VFS](https://sqlite.org/compile.html#enable_locking_style). +> If incompatible file locking is used, accessing databases concurrently with +> _other_ SQLite libraries will eventually corrupt data. diff --git a/vendor/github.com/ncruces/go-sqlite3/vfs/api.go b/vendor/github.com/ncruces/go-sqlite3/vfs/api.go index 19c22ae8fa..e133e8be92 100644 --- a/vendor/github.com/ncruces/go-sqlite3/vfs/api.go +++ b/vendor/github.com/ncruces/go-sqlite3/vfs/api.go @@ -168,8 +168,8 @@ type FileSharedMemory interface { // SharedMemory is a shared-memory WAL-index implementation. // Use [NewSharedMemory] to create a shared-memory. type SharedMemory interface { - shmMap(context.Context, api.Module, int32, int32, bool) (uint32, error) - shmLock(int32, int32, _ShmFlag) error + shmMap(context.Context, api.Module, int32, int32, bool) (uint32, _ErrorCode) + shmLock(int32, int32, _ShmFlag) _ErrorCode shmUnmap(bool) io.Closer } diff --git a/vendor/github.com/ncruces/go-sqlite3/vfs/const.go b/vendor/github.com/ncruces/go-sqlite3/vfs/const.go index 7f409f35f4..2fc934f336 100644 --- a/vendor/github.com/ncruces/go-sqlite3/vfs/const.go +++ b/vendor/github.com/ncruces/go-sqlite3/vfs/const.go @@ -47,9 +47,11 @@ const ( _IOERR_SHMMAP _ErrorCode = util.IOERR_SHMMAP _IOERR_SEEK _ErrorCode = util.IOERR_SEEK _IOERR_DELETE_NOENT _ErrorCode = util.IOERR_DELETE_NOENT + _IOERR_GETTEMPPATH _ErrorCode = util.IOERR_GETTEMPPATH _IOERR_BEGIN_ATOMIC _ErrorCode = util.IOERR_BEGIN_ATOMIC _IOERR_COMMIT_ATOMIC _ErrorCode = util.IOERR_COMMIT_ATOMIC _IOERR_ROLLBACK_ATOMIC _ErrorCode = util.IOERR_ROLLBACK_ATOMIC + _BUSY_SNAPSHOT _ErrorCode = util.BUSY_SNAPSHOT _CANTOPEN_FULLPATH _ErrorCode = util.CANTOPEN_FULLPATH _CANTOPEN_ISDIR _ErrorCode = util.CANTOPEN_ISDIR _READONLY_CANTINIT _ErrorCode = util.READONLY_CANTINIT diff --git a/vendor/github.com/ncruces/go-sqlite3/vfs/file.go b/vendor/github.com/ncruces/go-sqlite3/vfs/file.go index ca8cf84f3a..93a2f7eced 100644 --- a/vendor/github.com/ncruces/go-sqlite3/vfs/file.go +++ b/vendor/github.com/ncruces/go-sqlite3/vfs/file.go @@ -95,6 +95,9 @@ func (vfsOS) OpenFilename(name *Filename, flags OpenFlag) (File, OpenFlag, error f, err = osutil.OpenFile(name.String(), oflags, 0666) } if err != nil { + if name == nil { + return nil, flags, _IOERR_GETTEMPPATH + } if errors.Is(err, syscall.EISDIR) { return nil, flags, _CANTOPEN_ISDIR } diff --git a/vendor/github.com/ncruces/go-sqlite3/vfs/lock.go b/vendor/github.com/ncruces/go-sqlite3/vfs/lock.go index 57bc5f9065..86a988ae87 100644 --- a/vendor/github.com/ncruces/go-sqlite3/vfs/lock.go +++ b/vendor/github.com/ncruces/go-sqlite3/vfs/lock.go @@ -1,4 +1,4 @@ -//go:build (linux || darwin || windows || freebsd || openbsd || netbsd || dragonfly || illumos) && !sqlite3_nosys +//go:build (linux || darwin || windows || freebsd || openbsd || netbsd || dragonfly || illumos || sqlite3_flock) && !sqlite3_nosys package vfs diff --git a/vendor/github.com/ncruces/go-sqlite3/vfs/lock_other.go b/vendor/github.com/ncruces/go-sqlite3/vfs/lock_other.go index a4563af484..c395f34a73 100644 --- a/vendor/github.com/ncruces/go-sqlite3/vfs/lock_other.go +++ b/vendor/github.com/ncruces/go-sqlite3/vfs/lock_other.go @@ -1,4 +1,4 @@ -//go:build !(linux || darwin || windows || freebsd || openbsd || netbsd || dragonfly || illumos) || sqlite3_nosys +//go:build !(linux || darwin || windows || freebsd || openbsd || netbsd || dragonfly || illumos || sqlite3_flock) || sqlite3_nosys package vfs diff --git a/vendor/github.com/ncruces/go-sqlite3/vfs/os_bsd.go b/vendor/github.com/ncruces/go-sqlite3/vfs/os_bsd.go index 48ac5c9c9d..9f3c99dafa 100644 --- a/vendor/github.com/ncruces/go-sqlite3/vfs/os_bsd.go +++ b/vendor/github.com/ncruces/go-sqlite3/vfs/os_bsd.go @@ -29,5 +29,12 @@ func osReadLock(file *os.File, _ /*start*/, _ /*len*/ int64, _ /*timeout*/ time. } func osWriteLock(file *os.File, _ /*start*/, _ /*len*/ int64, _ /*timeout*/ time.Duration) _ErrorCode { - return osLock(file, unix.LOCK_EX|unix.LOCK_NB, _IOERR_LOCK) + rc := osLock(file, unix.LOCK_EX|unix.LOCK_NB, _IOERR_LOCK) + if rc == _BUSY { + // The documentation states the lock is upgraded by releasing the previous lock, + // then acquiring the new lock. + // This is a race, so return BUSY_SNAPSHOT to ensure the transaction is aborted. + return _BUSY_SNAPSHOT + } + return rc } diff --git a/vendor/github.com/ncruces/go-sqlite3/vfs/os_darwin.go b/vendor/github.com/ncruces/go-sqlite3/vfs/os_darwin.go index 9826eb274e..8bfe96bb15 100644 --- a/vendor/github.com/ncruces/go-sqlite3/vfs/os_darwin.go +++ b/vendor/github.com/ncruces/go-sqlite3/vfs/os_darwin.go @@ -1,4 +1,4 @@ -//go:build !sqlite3_flock && !sqlite3_nosys +//go:build !(sqlite3_flock || sqlite3_nosys) package vfs diff --git a/vendor/github.com/ncruces/go-sqlite3/vfs/os_linux.go b/vendor/github.com/ncruces/go-sqlite3/vfs/os_linux.go index 8a43f43921..7bb78c0af2 100644 --- a/vendor/github.com/ncruces/go-sqlite3/vfs/os_linux.go +++ b/vendor/github.com/ncruces/go-sqlite3/vfs/os_linux.go @@ -1,4 +1,4 @@ -//go:build !sqlite3_nosys +//go:build !(sqlite3_flock || sqlite3_nosys) package vfs @@ -56,7 +56,7 @@ func osLock(file *os.File, typ int16, start, len int64, timeout time.Duration, d if timeout < time.Since(before) { break } - osSleep(time.Duration(rand.Int63n(int64(time.Millisecond)))) + time.Sleep(time.Duration(rand.Int63n(int64(time.Millisecond)))) } } return osLockErrorCode(err, def) diff --git a/vendor/github.com/ncruces/go-sqlite3/vfs/os_std_sleep.go b/vendor/github.com/ncruces/go-sqlite3/vfs/os_std_sleep.go deleted file mode 100644 index c6bc407694..0000000000 --- a/vendor/github.com/ncruces/go-sqlite3/vfs/os_std_sleep.go +++ /dev/null @@ -1,9 +0,0 @@ -//go:build !windows || sqlite3_nosys - -package vfs - -import "time" - -func osSleep(d time.Duration) { - time.Sleep(d) -} diff --git a/vendor/github.com/ncruces/go-sqlite3/vfs/os_unix_lock.go b/vendor/github.com/ncruces/go-sqlite3/vfs/os_unix_lock.go index cdefa21ed7..85a7b0fc0f 100644 --- a/vendor/github.com/ncruces/go-sqlite3/vfs/os_unix_lock.go +++ b/vendor/github.com/ncruces/go-sqlite3/vfs/os_unix_lock.go @@ -1,4 +1,4 @@ -//go:build (linux || darwin || freebsd || openbsd || netbsd || dragonfly || illumos) && !sqlite3_nosys +//go:build (linux || darwin || freebsd || openbsd || netbsd || dragonfly || illumos || sqlite3_flock) && !sqlite3_nosys package vfs @@ -48,7 +48,7 @@ func osDowngradeLock(file *os.File, state LockLevel) _ErrorCode { // In theory, the downgrade to a SHARED cannot fail because another // process is holding an incompatible lock. If it does, this // indicates that the other process is not following the locking - // protocol. If this happens, return _IOERR_RDLOCK. Returning + // protocol. If this happens, return IOERR_RDLOCK. Returning // BUSY would confuse the upper layer. return _IOERR_RDLOCK } @@ -98,6 +98,9 @@ func osLockErrorCode(err error, def _ErrorCode) _ErrorCode { case unix.EPERM: return _PERM } + if errno == unix.EWOULDBLOCK && unix.EWOULDBLOCK != unix.EAGAIN { + return _BUSY + } } return def } diff --git a/vendor/github.com/ncruces/go-sqlite3/vfs/os_windows.go b/vendor/github.com/ncruces/go-sqlite3/vfs/os_windows.go index 5c68754f84..83b952b168 100644 --- a/vendor/github.com/ncruces/go-sqlite3/vfs/os_windows.go +++ b/vendor/github.com/ncruces/go-sqlite3/vfs/os_windows.go @@ -136,7 +136,7 @@ func osLock(file *os.File, flags, start, len uint32, timeout time.Duration, def if timeout < time.Since(before) { break } - osSleep(time.Duration(rand.Int63n(int64(time.Millisecond)))) + time.Sleep(time.Duration(rand.Int63n(int64(time.Millisecond)))) } } return osLockErrorCode(err, def) @@ -171,16 +171,3 @@ func osLockErrorCode(err error, def _ErrorCode) _ErrorCode { } return def } - -func osSleep(d time.Duration) { - if d > 0 { - period := max(1, d/(5*time.Millisecond)) - if period < 16 { - windows.TimeBeginPeriod(uint32(period)) - } - time.Sleep(d) - if period < 16 { - windows.TimeEndPeriod(uint32(period)) - } - } -} diff --git a/vendor/github.com/ncruces/go-sqlite3/vfs/shm.go b/vendor/github.com/ncruces/go-sqlite3/vfs/shm.go index 2b76dd5dcf..7b0d4b677f 100644 --- a/vendor/github.com/ncruces/go-sqlite3/vfs/shm.go +++ b/vendor/github.com/ncruces/go-sqlite3/vfs/shm.go @@ -51,12 +51,7 @@ type vfsShm struct { readOnly bool } -func (s *vfsShm) shmMap(ctx context.Context, mod api.Module, id, size int32, extend bool) (uint32, error) { - // Ensure size is a multiple of the OS page size. - if int(size)&(unix.Getpagesize()-1) != 0 { - return 0, _IOERR_SHMMAP - } - +func (s *vfsShm) shmOpen() _ErrorCode { if s.File == nil { var flag int if s.readOnly { @@ -67,28 +62,40 @@ func (s *vfsShm) shmMap(ctx context.Context, mod api.Module, id, size int32, ext f, err := os.OpenFile(s.path, flag|unix.O_CREAT|unix.O_NOFOLLOW, 0666) if err != nil { - return 0, _CANTOPEN + return _CANTOPEN } s.File = f } // Dead man's switch. if lock, rc := osGetLock(s.File, _SHM_DMS, 1); rc != _OK { - return 0, _IOERR_LOCK + return _IOERR_LOCK } else if lock == unix.F_WRLCK { - return 0, _BUSY + return _BUSY } else if lock == unix.F_UNLCK { if s.readOnly { - return 0, _READONLY_CANTINIT + return _READONLY_CANTINIT } if rc := osWriteLock(s.File, _SHM_DMS, 1, 0); rc != _OK { - return 0, rc + return rc } if err := s.Truncate(0); err != nil { - return 0, _IOERR_SHMOPEN + return _IOERR_SHMOPEN } } if rc := osReadLock(s.File, _SHM_DMS, 1, 0); rc != _OK { + return rc + } + return _OK +} + +func (s *vfsShm) shmMap(ctx context.Context, mod api.Module, id, size int32, extend bool) (uint32, _ErrorCode) { + // Ensure size is a multiple of the OS page size. + if int(size)&(unix.Getpagesize()-1) != 0 { + return 0, _IOERR_SHMMAP + } + + if rc := s.shmOpen(); rc != _OK { return 0, rc } @@ -99,7 +106,7 @@ func (s *vfsShm) shmMap(ctx context.Context, mod api.Module, id, size int32, ext } if n := (int64(id) + 1) * int64(size); n > o { if !extend { - return 0, nil + return 0, _OK } err := osAllocate(s.File, n) if err != nil { @@ -115,13 +122,16 @@ func (s *vfsShm) shmMap(ctx context.Context, mod api.Module, id, size int32, ext } r, err := util.MapRegion(ctx, mod, s.File, int64(id)*int64(size), size, prot) if err != nil { - return 0, err + return 0, _IOERR_SHMMAP } s.regions = append(s.regions, r) - return r.Ptr, nil + if s.readOnly { + return r.Ptr, _READONLY + } + return r.Ptr, _OK } -func (s *vfsShm) shmLock(offset, n int32, flags _ShmFlag) error { +func (s *vfsShm) shmLock(offset, n int32, flags _ShmFlag) _ErrorCode { // Argument check. if n <= 0 || offset < 0 || offset+n > _SHM_NLOCK { panic(util.AssertErr()) @@ -165,9 +175,9 @@ func (s *vfsShm) shmUnmap(delete bool) { s.regions = s.regions[:0] // Close the file. - defer s.Close() if delete { - os.Remove(s.Name()) + os.Remove(s.path) } + s.Close() s.File = nil } diff --git a/vendor/github.com/ncruces/go-sqlite3/vfs/shm_bsd.go b/vendor/github.com/ncruces/go-sqlite3/vfs/shm_bsd.go new file mode 100644 index 0000000000..ffeb3e0a00 --- /dev/null +++ b/vendor/github.com/ncruces/go-sqlite3/vfs/shm_bsd.go @@ -0,0 +1,260 @@ +//go:build (freebsd || openbsd || netbsd || dragonfly || illumos || sqlite3_flock) && (amd64 || arm64 || riscv64) && !(sqlite3_noshm || sqlite3_nosys) + +package vfs + +import ( + "context" + "io" + "os" + "sync" + + "github.com/ncruces/go-sqlite3/internal/util" + "github.com/tetratelabs/wazero/api" + "golang.org/x/sys/unix" +) + +// SupportsSharedMemory is false on platforms that do not support shared memory. +// To use [WAL without shared-memory], you need to set [EXCLUSIVE locking mode]. +// +// [WAL without shared-memory]: https://sqlite.org/wal.html#noshm +// [EXCLUSIVE locking mode]: https://sqlite.org/pragma.html#pragma_locking_mode +const SupportsSharedMemory = true + +const _SHM_NLOCK = 8 + +func (f *vfsFile) SharedMemory() SharedMemory { return f.shm } + +// NewSharedMemory returns a shared-memory WAL-index +// backed by a file with the given path. +// It will return nil if shared-memory is not supported, +// or not appropriate for the given flags. +// Only [OPEN_MAIN_DB] databases may need a WAL-index. +// You must ensure all concurrent accesses to a database +// use shared-memory instances created with the same path. +func NewSharedMemory(path string, flags OpenFlag) SharedMemory { + if flags&OPEN_MAIN_DB == 0 || flags&(OPEN_DELETEONCLOSE|OPEN_MEMORY) != 0 { + return nil + } + return &vfsShm{ + path: path, + readOnly: flags&OPEN_READONLY != 0, + } +} + +type vfsShmFile struct { + *os.File + info os.FileInfo + + // +checklocks:vfsShmFilesMtx + refs int + + // +checklocks:lockMtx + lock [_SHM_NLOCK]int16 + lockMtx sync.Mutex +} + +var ( + // +checklocks:vfsShmFilesMtx + vfsShmFiles []*vfsShmFile + vfsShmFilesMtx sync.Mutex +) + +type vfsShm struct { + *vfsShmFile + path string + lock [_SHM_NLOCK]bool + regions []*util.MappedRegion + readOnly bool +} + +func (s *vfsShm) Close() error { + if s.vfsShmFile == nil { + return nil + } + + // Unlock everything. + s.shmLock(0, _SHM_NLOCK, _SHM_UNLOCK) + + vfsShmFilesMtx.Lock() + defer vfsShmFilesMtx.Unlock() + + // Decrease reference count. + if s.vfsShmFile.refs > 1 { + s.vfsShmFile.refs-- + s.vfsShmFile = nil + return nil + } + for i, g := range vfsShmFiles { + if g == s.vfsShmFile { + vfsShmFiles[i] = nil + break + } + } + + err := s.File.Close() + s.vfsShmFile = nil + return err +} + +func (s *vfsShm) shmOpen() (rc _ErrorCode) { + if s.vfsShmFile != nil { + return _OK + } + + // Always open file read-write, as it will be shared. + f, err := os.OpenFile(s.path, + unix.O_RDWR|unix.O_CREAT|unix.O_NOFOLLOW, 0666) + if err != nil { + return _CANTOPEN + } + // Closes file if it's not nil. + defer func() { f.Close() }() + + fi, err := f.Stat() + if err != nil { + return _IOERR_FSTAT + } + + vfsShmFilesMtx.Lock() + defer vfsShmFilesMtx.Unlock() + + // Find a shared file, increase the reference count. + for _, g := range vfsShmFiles { + if g != nil && os.SameFile(fi, g.info) { + g.refs++ + s.vfsShmFile = g + return _OK + } + } + + // Lock and truncate the file, if not readonly. + // The lock is only released by closing the file. + if s.readOnly { + rc = _READONLY_CANTINIT + } else { + if rc := osLock(f, unix.LOCK_EX|unix.LOCK_NB, _IOERR_LOCK); rc != _OK { + return rc + } + if err := f.Truncate(0); err != nil { + return _IOERR_SHMOPEN + } + } + + // Add the new shared file. + s.vfsShmFile = &vfsShmFile{ + File: f, + info: fi, + refs: 1, + } + f = nil // Don't close the file. + for i, g := range vfsShmFiles { + if g == nil { + vfsShmFiles[i] = s.vfsShmFile + return rc + } + } + vfsShmFiles = append(vfsShmFiles, s.vfsShmFile) + return rc +} + +func (s *vfsShm) shmMap(ctx context.Context, mod api.Module, id, size int32, extend bool) (uint32, _ErrorCode) { + // Ensure size is a multiple of the OS page size. + if int(size)&(unix.Getpagesize()-1) != 0 { + return 0, _IOERR_SHMMAP + } + + if rc := s.shmOpen(); rc != _OK { + return 0, rc + } + + // Check if file is big enough. + o, err := s.Seek(0, io.SeekEnd) + if err != nil { + return 0, _IOERR_SHMSIZE + } + if n := (int64(id) + 1) * int64(size); n > o { + if !extend { + return 0, _OK + } + err := osAllocate(s.File, n) + if err != nil { + return 0, _IOERR_SHMSIZE + } + } + + var prot int + if s.readOnly { + prot = unix.PROT_READ + } else { + prot = unix.PROT_READ | unix.PROT_WRITE + } + r, err := util.MapRegion(ctx, mod, s.File, int64(id)*int64(size), size, prot) + if err != nil { + return 0, _IOERR_SHMMAP + } + s.regions = append(s.regions, r) + if s.readOnly { + return r.Ptr, _READONLY + } + return r.Ptr, _OK +} + +func (s *vfsShm) shmLock(offset, n int32, flags _ShmFlag) _ErrorCode { + s.lockMtx.Lock() + defer s.lockMtx.Unlock() + + switch { + case flags&_SHM_UNLOCK != 0: + for i := offset; i < offset+n; i++ { + if s.lock[i] { + if s.vfsShmFile.lock[i] <= 0 { + s.vfsShmFile.lock[i] = 0 + } else { + s.vfsShmFile.lock[i]-- + } + } + } + case flags&_SHM_SHARED != 0: + for i := offset; i < offset+n; i++ { + if s.vfsShmFile.lock[i] < 0 { + return _BUSY + } + } + for i := offset; i < offset+n; i++ { + s.vfsShmFile.lock[i]++ + s.lock[i] = true + } + case flags&_SHM_EXCLUSIVE != 0: + for i := offset; i < offset+n; i++ { + if s.vfsShmFile.lock[i] != 0 { + return _BUSY + } + } + for i := offset; i < offset+n; i++ { + s.vfsShmFile.lock[i] = -1 + s.lock[i] = true + } + } + + return _OK +} + +func (s *vfsShm) shmUnmap(delete bool) { + if s.vfsShmFile == nil { + return + } + + // Unmap regions. + for _, r := range s.regions { + r.Unmap() + } + clear(s.regions) + s.regions = s.regions[:0] + + // Close the file. + if delete { + os.Remove(s.path) + } + s.Close() + s.vfsShmFile = nil +} diff --git a/vendor/github.com/ncruces/go-sqlite3/vfs/shm_other.go b/vendor/github.com/ncruces/go-sqlite3/vfs/shm_other.go index 21191979ec..7c89975813 100644 --- a/vendor/github.com/ncruces/go-sqlite3/vfs/shm_other.go +++ b/vendor/github.com/ncruces/go-sqlite3/vfs/shm_other.go @@ -1,4 +1,4 @@ -//go:build !(darwin || linux) || !(amd64 || arm64 || riscv64) || sqlite3_flock || sqlite3_noshm || sqlite3_nosys +//go:build !(darwin || linux || freebsd || openbsd || netbsd || dragonfly || illumos || sqlite3_flock) || !(amd64 || arm64 || riscv64) || sqlite3_noshm || sqlite3_nosys package vfs diff --git a/vendor/github.com/ncruces/go-sqlite3/vfs/vfs.go b/vendor/github.com/ncruces/go-sqlite3/vfs/vfs.go index 1887e9f221..d624aa78c3 100644 --- a/vendor/github.com/ncruces/go-sqlite3/vfs/vfs.go +++ b/vendor/github.com/ncruces/go-sqlite3/vfs/vfs.go @@ -83,7 +83,7 @@ func vfsRandomness(ctx context.Context, mod api.Module, pVfs uint32, nByte int32 } func vfsSleep(ctx context.Context, mod api.Module, pVfs uint32, nMicro int32) _ErrorCode { - osSleep(time.Duration(nMicro) * time.Microsecond) + time.Sleep(time.Duration(nMicro) * time.Microsecond) return _OK } @@ -397,18 +397,14 @@ func vfsShmBarrier(ctx context.Context, mod api.Module, pFile uint32) { func vfsShmMap(ctx context.Context, mod api.Module, pFile uint32, iRegion, szRegion int32, bExtend, pp uint32) _ErrorCode { shm := vfsFileGet(ctx, mod, pFile).(FileSharedMemory).SharedMemory() - p, err := shm.shmMap(ctx, mod, iRegion, szRegion, bExtend != 0) - if err != nil { - return vfsErrorCode(err, _IOERR_SHMMAP) - } + p, rc := shm.shmMap(ctx, mod, iRegion, szRegion, bExtend != 0) util.WriteUint32(mod, pp, p) - return _OK + return rc } func vfsShmLock(ctx context.Context, mod api.Module, pFile uint32, offset, n int32, flags _ShmFlag) _ErrorCode { shm := vfsFileGet(ctx, mod, pFile).(FileSharedMemory).SharedMemory() - err := shm.shmLock(offset, n, flags) - return vfsErrorCode(err, _IOERR_SHMLOCK) + return shm.shmLock(offset, n, flags) } func vfsShmUnmap(ctx context.Context, mod api.Module, pFile, bDelete uint32) _ErrorCode { diff --git a/vendor/github.com/ncruces/go-sqlite3/vtab.go b/vendor/github.com/ncruces/go-sqlite3/vtab.go index a330c98ffb..7c19330bb2 100644 --- a/vendor/github.com/ncruces/go-sqlite3/vtab.go +++ b/vendor/github.com/ncruces/go-sqlite3/vtab.go @@ -16,14 +16,15 @@ func CreateModule[T VTab](db *Conn, name string, create, connect VTabConstructor var flags int const ( - VTAB_CREATOR = 0x01 - VTAB_DESTROYER = 0x02 - VTAB_UPDATER = 0x04 - VTAB_RENAMER = 0x08 - VTAB_OVERLOADER = 0x10 - VTAB_CHECKER = 0x20 - VTAB_TXN = 0x40 - VTAB_SAVEPOINTER = 0x80 + VTAB_CREATOR = 0x001 + VTAB_DESTROYER = 0x002 + VTAB_UPDATER = 0x004 + VTAB_RENAMER = 0x008 + VTAB_OVERLOADER = 0x010 + VTAB_CHECKER = 0x020 + VTAB_TXN = 0x040 + VTAB_SAVEPOINTER = 0x080 + VTAB_SHADOWTABS = 0x100 ) if create != nil { @@ -52,6 +53,9 @@ func CreateModule[T VTab](db *Conn, name string, create, connect VTabConstructor if implements[VTabSavepointer](vtab) { flags |= VTAB_SAVEPOINTER } + if implements[VTabShadowTabler](vtab) { + flags |= VTAB_SHADOWTABS + } defer db.arena.mark()() namePtr := db.arena.string(name) @@ -174,6 +178,17 @@ type VTabOverloader interface { FindFunction(arg int, name string) (ScalarFunction, IndexConstraintOp) } +// A VTabShadowTabler allows a virtual table to protect the content +// of shadow tables from being corrupted by hostile SQL. +// +// Implementing this interface signals that a virtual table named +// "mumble" reserves all table names starting with "mumble_". +type VTabShadowTabler interface { + VTab + // https://sqlite.org/vtab.html#the_xshadowname_method + ShadowTables() +} + // A VTabChecker allows a virtual table to report errors // to the PRAGMA integrity_check and PRAGMA quick_check commands. // diff --git a/vendor/github.com/tetratelabs/wazero/config.go b/vendor/github.com/tetratelabs/wazero/config.go index 819a76df5e..d3656849cf 100644 --- a/vendor/github.com/tetratelabs/wazero/config.go +++ b/vendor/github.com/tetratelabs/wazero/config.go @@ -148,7 +148,7 @@ type RuntimeConfig interface { // customSections := c.CustomSections() WithCustomSections(bool) RuntimeConfig - // WithCloseOnContextDone ensures the executions of functions to be closed under one of the following circumstances: + // WithCloseOnContextDone ensures the executions of functions to be terminated under one of the following circumstances: // // - context.Context passed to the Call method of api.Function is canceled during execution. (i.e. ctx by context.WithCancel) // - context.Context passed to the Call method of api.Function reaches timeout during execution. (i.e. ctx by context.WithTimeout or context.WithDeadline) @@ -159,6 +159,8 @@ type RuntimeConfig interface { // entire underlying OS thread which runs the api.Function call. See "Why it's safe to execute runtime-generated // machine codes against async Goroutine preemption" section in RATIONALE.md for detail. // + // Upon the termination of the function executions, api.Module is closed. + // // Note that this comes with a bit of extra cost when enabled. The reason is that internally this forces // interpreter and compiler runtimes to insert the periodical checks on the conditions above. For that reason, // this is disabled by default. @@ -217,9 +219,18 @@ const ( // part. wazero automatically performs ahead-of-time compilation as needed when // Runtime.CompileModule is invoked. // -// Warning: This panics at runtime if the runtime.GOOS or runtime.GOARCH does not -// support compiler. Use NewRuntimeConfig to safely detect and fallback to -// NewRuntimeConfigInterpreter if needed. +// # Warning +// +// - This panics at runtime if the runtime.GOOS or runtime.GOARCH does not +// support compiler. Use NewRuntimeConfig to safely detect and fallback to +// NewRuntimeConfigInterpreter if needed. +// +// - If you are using wazero in buildmode=c-archive or c-shared, make sure that you set up the alternate signal stack +// by using, e.g. `sigaltstack` combined with `SA_ONSTACK` flag on `sigaction` on Linux, +// before calling any api.Function. This is because the Go runtime does not set up the alternate signal stack +// for c-archive or c-shared modes, and wazero uses the different stack than the calling Goroutine. +// Hence, the signal handler might get invoked on the wazero's stack, which may cause a stack overflow. +// https://github.com/tetratelabs/wazero/blob/2092c0a879f30d49d7b37f333f4547574b8afe0d/internal/integration_test/fuzz/fuzz/tests/sigstack.rs#L19-L36 func NewRuntimeConfigCompiler() RuntimeConfig { ret := engineLessConfig.clone() ret.engineKind = engineKindCompiler diff --git a/vendor/github.com/tetratelabs/wazero/experimental/checkpoint.go b/vendor/github.com/tetratelabs/wazero/experimental/checkpoint.go index 443c5a294f..c75db615e6 100644 --- a/vendor/github.com/tetratelabs/wazero/experimental/checkpoint.go +++ b/vendor/github.com/tetratelabs/wazero/experimental/checkpoint.go @@ -21,13 +21,6 @@ type Snapshotter interface { Snapshot() Snapshot } -// EnableSnapshotterKey is a context key to indicate that snapshotting should be enabled. -// The context.Context passed to a exported function invocation should have this key set -// to a non-nil value, and host functions will be able to retrieve it using SnapshotterKey. -// -// Deprecated: use WithSnapshotter to enable snapshots. -type EnableSnapshotterKey = expctxkeys.EnableSnapshotterKey - // WithSnapshotter enables snapshots. // Passing the returned context to a exported function invocation enables snapshots, // and allows host functions to retrieve the Snapshotter using GetSnapshotter. @@ -35,12 +28,6 @@ func WithSnapshotter(ctx context.Context) context.Context { return context.WithValue(ctx, expctxkeys.EnableSnapshotterKey{}, struct{}{}) } -// SnapshotterKey is a context key to access a Snapshotter from a host function. -// It is only present if EnableSnapshotter was set in the function invocation context. -// -// Deprecated: use GetSnapshotter to get the snapshotter. -type SnapshotterKey = expctxkeys.SnapshotterKey - // GetSnapshotter gets the Snapshotter from a host function. // It is only present if WithSnapshotter was called with the function invocation context. func GetSnapshotter(ctx context.Context) Snapshotter { diff --git a/vendor/github.com/tetratelabs/wazero/experimental/listener.go b/vendor/github.com/tetratelabs/wazero/experimental/listener.go index b2ba1fe834..55fc6b668e 100644 --- a/vendor/github.com/tetratelabs/wazero/experimental/listener.go +++ b/vendor/github.com/tetratelabs/wazero/experimental/listener.go @@ -24,12 +24,6 @@ type StackIterator interface { ProgramCounter() ProgramCounter } -// FunctionListenerFactoryKey is a context.Context Value key. -// Its associated value should be a FunctionListenerFactory. -// -// Deprecated: use WithFunctionListenerFactory to enable snapshots. -type FunctionListenerFactoryKey = expctxkeys.FunctionListenerFactoryKey - // WithFunctionListenerFactory registers a FunctionListenerFactory // with the context. func WithFunctionListenerFactory(ctx context.Context, factory FunctionListenerFactory) context.Context { diff --git a/vendor/github.com/tetratelabs/wazero/experimental/sys/syscall_errno_windows.go b/vendor/github.com/tetratelabs/wazero/experimental/sys/syscall_errno_windows.go index 761a1f9dc2..5ebc1780f4 100644 --- a/vendor/github.com/tetratelabs/wazero/experimental/sys/syscall_errno_windows.go +++ b/vendor/github.com/tetratelabs/wazero/experimental/sys/syscall_errno_windows.go @@ -23,6 +23,10 @@ const ( // instead of syscall.ENOTDIR _ERROR_DIRECTORY = syscall.Errno(0x10B) + // _ERROR_NOT_A_REPARSE_POINT is a Windows error returned by os.Readlink + // instead of syscall.EINVAL + _ERROR_NOT_A_REPARSE_POINT = syscall.Errno(0x1126) + // _ERROR_INVALID_SOCKET is a Windows error returned by winsock_select // when a given handle is not a socket. _ERROR_INVALID_SOCKET = syscall.Errno(0x2736) @@ -51,7 +55,7 @@ func errorToErrno(err error) Errno { return EBADF case syscall.ERROR_PRIVILEGE_NOT_HELD: return EPERM - case _ERROR_NEGATIVE_SEEK, _ERROR_INVALID_NAME: + case _ERROR_NEGATIVE_SEEK, _ERROR_INVALID_NAME, _ERROR_NOT_A_REPARSE_POINT: return EINVAL } errno, _ := syscallToErrno(err) diff --git a/vendor/github.com/tetratelabs/wazero/imports/wasi_snapshot_preview1/fs.go b/vendor/github.com/tetratelabs/wazero/imports/wasi_snapshot_preview1/fs.go index 384036a275..1ec0d81b37 100644 --- a/vendor/github.com/tetratelabs/wazero/imports/wasi_snapshot_preview1/fs.go +++ b/vendor/github.com/tetratelabs/wazero/imports/wasi_snapshot_preview1/fs.go @@ -1596,6 +1596,10 @@ func pathOpenFn(_ context.Context, mod api.Module, params []uint64) experimental return errno } + if pathLen == 0 { + return experimentalsys.EINVAL + } + fileOpenFlags := openFlags(dirflags, oflags, fdflags, rights) isDir := fileOpenFlags&experimentalsys.O_DIRECTORY != 0 @@ -1704,7 +1708,6 @@ func openFlags(dirflags, oflags, fdflags uint16, rights uint32) (openFlags exper } if oflags&wasip1.O_DIRECTORY != 0 { openFlags |= experimentalsys.O_DIRECTORY - return // Early return for directories as the rest of flags doesn't make sense for it. } else if oflags&wasip1.O_EXCL != 0 { openFlags |= experimentalsys.O_EXCL } @@ -1951,16 +1954,16 @@ func pathSymlinkFn(_ context.Context, mod api.Module, params []uint64) experimen return experimentalsys.EFAULT } - newPathBuf, ok := mem.Read(newPath, newPathLen) - if !ok { - return experimentalsys.EFAULT + _, newPathName, errno := atPath(fsc, mod.Memory(), fd, newPath, newPathLen) + if errno != 0 { + return errno } return dir.FS.Symlink( // Do not join old path since it's only resolved when dereference the link created here. // And the dereference result depends on the opening directory's file descriptor at that point. bufToStr(oldPathBuf), - path.Join(dir.Name, bufToStr(newPathBuf)), + newPathName, ) } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/interpreter.go b/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/interpreter.go index a89ddc4573..18c5f4252d 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/interpreter.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/interpreter.go @@ -98,6 +98,9 @@ func (e *moduleEngine) SetGlobalValue(idx wasm.Index, lo, hi uint64) { // OwnsGlobals implements the same method as documented on wasm.ModuleEngine. func (e *moduleEngine) OwnsGlobals() bool { return false } +// MemoryGrown implements wasm.ModuleEngine. +func (e *moduleEngine) MemoryGrown() {} + // callEngine holds context per moduleEngine.Call, and shared across all the // function calls originating from the same moduleEngine.Call execution. // diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/executable_context.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/executable_context.go index 81c6a6b62e..8e9571b202 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/executable_context.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/executable_context.go @@ -43,7 +43,7 @@ type ExecutableContextT[Instr any] struct { labelPositionPool wazevoapi.Pool[LabelPosition[Instr]] NextLabel Label // LabelPositions maps a label to the instructions of the region which the label represents. - LabelPositions map[Label]*LabelPosition[Instr] + LabelPositions []*LabelPosition[Instr] OrderedBlockLabels []*LabelPosition[Instr] // PerBlockHead and PerBlockEnd are the head and tail of the instruction list per currently-compiled ssa.BasicBlock. @@ -67,7 +67,6 @@ func NewExecutableContextT[Instr any]( setNext: setNext, setPrev: setPrev, labelPositionPool: wazevoapi.NewPool[LabelPosition[Instr]](resetLabelPosition[Instr]), - LabelPositions: make(map[Label]*LabelPosition[Instr]), NextLabel: LabelInvalid, } } @@ -97,11 +96,7 @@ func (e *ExecutableContextT[Instr]) StartBlock(blk ssa.BasicBlock) { end := e.allocateNop0() e.PerBlockHead, e.PerBlockEnd = end, end - labelPos, ok := e.LabelPositions[l] - if !ok { - labelPos = e.AllocateLabelPosition(l) - e.LabelPositions[l] = labelPos - } + labelPos := e.GetOrAllocateLabelPosition(l) e.OrderedBlockLabels = append(e.OrderedBlockLabels, labelPos) labelPos.Begin, labelPos.End = end, end labelPos.SB = blk @@ -146,8 +141,8 @@ func (e *ExecutableContextT[T]) FlushPendingInstructions() { func (e *ExecutableContextT[T]) Reset() { e.labelPositionPool.Reset() e.InstructionPool.Reset() - for l := Label(0); l <= e.NextLabel; l++ { - delete(e.LabelPositions, l) + for i := range e.LabelPositions { + e.LabelPositions[i] = nil } e.PendingInstructions = e.PendingInstructions[:0] e.OrderedBlockLabels = e.OrderedBlockLabels[:0] @@ -163,10 +158,17 @@ func (e *ExecutableContextT[T]) AllocateLabel() Label { return e.NextLabel } -func (e *ExecutableContextT[T]) AllocateLabelPosition(la Label) *LabelPosition[T] { - l := e.labelPositionPool.Allocate() - l.L = la - return l +func (e *ExecutableContextT[T]) GetOrAllocateLabelPosition(l Label) *LabelPosition[T] { + if len(e.LabelPositions) <= int(l) { + e.LabelPositions = append(e.LabelPositions, make([]*LabelPosition[T], int(l)+1-len(e.LabelPositions))...) + } + ret := e.LabelPositions[l] + if ret == nil { + ret = e.labelPositionPool.Allocate() + ret.L = l + e.LabelPositions[l] = ret + } + return ret } func (e *ExecutableContextT[T]) GetOrAllocateSSABlockLabel(blk ssa.BasicBlock) Label { diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go index 310ad2203a..61ae6f4061 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go @@ -1906,8 +1906,10 @@ func (m *machine) InsertMove(dst, src regalloc.VReg, typ ssa.Type) { func (m *machine) Format() string { ectx := m.ectx begins := map[*instruction]backend.Label{} - for l, pos := range ectx.LabelPositions { - begins[pos.Begin] = l + for _, pos := range ectx.LabelPositions { + if pos != nil { + begins[pos.Begin] = pos.L + } } irBlocks := map[backend.Label]ssa.BasicBlockID{} @@ -1950,7 +1952,10 @@ func (m *machine) encodeWithoutSSA(root *instruction) { offset := int64(len(*bufPtr)) if cur.kind == nop0 { l := cur.nop0Label() - if pos, ok := ectx.LabelPositions[l]; ok { + if int(l) >= len(ectx.LabelPositions) { + continue + } + if pos := ectx.LabelPositions[l]; pos != nil { pos.BinaryOffset = offset } } @@ -2005,7 +2010,7 @@ func (m *machine) Encode(ctx context.Context) (err error) { switch cur.kind { case nop0: l := cur.nop0Label() - if pos, ok := ectx.LabelPositions[l]; ok { + if pos := ectx.LabelPositions[l]; pos != nil { pos.BinaryOffset = offset } case sourceOffsetInfo: @@ -2165,8 +2170,7 @@ func (m *machine) allocateBrTarget() (nop *instruction, l backend.Label) { //nol func (m *machine) allocateLabel() *labelPosition { ectx := m.ectx l := ectx.AllocateLabel() - pos := ectx.AllocateLabelPosition(l) - ectx.LabelPositions[l] = pos + pos := ectx.GetOrAllocateLabelPosition(l) return pos } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go index 6615471c6a..4eaa13ce1c 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go @@ -101,13 +101,14 @@ func (m *machine) LowerParams(args []ssa.Value) { bits := arg.Type.Bits() // At this point of compilation, we don't yet know how much space exist below the return address. // So we instruct the address mode to add the `argStackOffset` to the offset at the later phase of compilation. - amode := addressMode{imm: arg.Offset, rn: spVReg, kind: addressModeKindArgStackSpace} + amode := m.amodePool.Allocate() + *amode = addressMode{imm: arg.Offset, rn: spVReg, kind: addressModeKindArgStackSpace} load := m.allocateInstr() switch arg.Type { case ssa.TypeI32, ssa.TypeI64: - load.asULoad(operandNR(reg), amode, bits) + load.asULoad(reg, amode, bits) case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128: - load.asFpuLoad(operandNR(reg), amode, bits) + load.asFpuLoad(reg, amode, bits) default: panic("BUG") } @@ -169,7 +170,8 @@ func (m *machine) LowerReturns(rets []ssa.Value) { // At this point of compilation, we don't yet know how much space exist below the return address. // So we instruct the address mode to add the `retStackOffset` to the offset at the later phase of compilation. - amode := addressMode{imm: r.Offset, rn: spVReg, kind: addressModeKindResultStackSpace} + amode := m.amodePool.Allocate() + *amode = addressMode{imm: r.Offset, rn: spVReg, kind: addressModeKindResultStackSpace} store := m.allocateInstr() store.asStore(operandNR(reg), amode, bits) m.insert(store) @@ -215,9 +217,9 @@ func (m *machine) callerGenFunctionReturnVReg(a *backend.FunctionABI, retIndex i ldr := m.allocateInstr() switch r.Type { case ssa.TypeI32, ssa.TypeI64: - ldr.asULoad(operandNR(reg), amode, r.Type.Bits()) + ldr.asULoad(reg, amode, r.Type.Bits()) case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128: - ldr.asFpuLoad(operandNR(reg), amode, r.Type.Bits()) + ldr.asFpuLoad(reg, amode, r.Type.Bits()) default: panic("BUG") } @@ -225,7 +227,7 @@ func (m *machine) callerGenFunctionReturnVReg(a *backend.FunctionABI, retIndex i } } -func (m *machine) resolveAddressModeForOffsetAndInsert(cur *instruction, offset int64, dstBits byte, rn regalloc.VReg, allowTmpRegUse bool) (*instruction, addressMode) { +func (m *machine) resolveAddressModeForOffsetAndInsert(cur *instruction, offset int64, dstBits byte, rn regalloc.VReg, allowTmpRegUse bool) (*instruction, *addressMode) { exct := m.executableContext exct.PendingInstructions = exct.PendingInstructions[:0] mode := m.resolveAddressModeForOffset(offset, dstBits, rn, allowTmpRegUse) @@ -235,15 +237,15 @@ func (m *machine) resolveAddressModeForOffsetAndInsert(cur *instruction, offset return cur, mode } -func (m *machine) resolveAddressModeForOffset(offset int64, dstBits byte, rn regalloc.VReg, allowTmpRegUse bool) addressMode { +func (m *machine) resolveAddressModeForOffset(offset int64, dstBits byte, rn regalloc.VReg, allowTmpRegUse bool) *addressMode { if rn.RegType() != regalloc.RegTypeInt { panic("BUG: rn should be a pointer: " + formatVRegSized(rn, 64)) } - var amode addressMode + amode := m.amodePool.Allocate() if offsetFitsInAddressModeKindRegUnsignedImm12(dstBits, offset) { - amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: rn, imm: offset} + *amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: rn, imm: offset} } else if offsetFitsInAddressModeKindRegSignedImm9(offset) { - amode = addressMode{kind: addressModeKindRegSignedImm9, rn: rn, imm: offset} + *amode = addressMode{kind: addressModeKindRegSignedImm9, rn: rn, imm: offset} } else { var indexReg regalloc.VReg if allowTmpRegUse { @@ -253,7 +255,7 @@ func (m *machine) resolveAddressModeForOffset(offset int64, dstBits byte, rn reg indexReg = m.compiler.AllocateVReg(ssa.TypeI64) m.lowerConstantI64(indexReg, offset) } - amode = addressMode{kind: addressModeKindRegReg, rn: rn, rm: indexReg, extOp: extendOpUXTX /* indicates index rm is 64-bit */} + *amode = addressMode{kind: addressModeKindRegReg, rn: rn, rm: indexReg, extOp: extendOpUXTX /* indicates index rm is 64-bit */} } return amode } @@ -315,7 +317,7 @@ func (m *machine) insertAddOrSubStackPointer(rd regalloc.VReg, diff int64, add b } else { ao = aluOpSub } - alu.asALU(ao, operandNR(rd), operandNR(spVReg), imm12Operand, true) + alu.asALU(ao, rd, operandNR(spVReg), imm12Operand, true) m.insert(alu) } else { m.lowerConstantI64(tmpRegVReg, diff) @@ -326,7 +328,7 @@ func (m *machine) insertAddOrSubStackPointer(rd regalloc.VReg, diff int64, add b } else { ao = aluOpSub } - alu.asALU(ao, operandNR(rd), operandNR(spVReg), operandNR(tmpRegVReg), true) + alu.asALU(ao, rd, operandNR(spVReg), operandNR(tmpRegVReg), true) m.insert(alu) } } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_preamble.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_preamble.go index 7a9cceb332..f8b5d97ac7 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_preamble.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_preamble.go @@ -59,25 +59,26 @@ func (m *machine) goEntryPreamblePassArg(cur *instruction, paramSlicePtr regallo } else { postIndexImm = 8 } - loadMode := addressMode{kind: addressModeKindPostIndex, rn: paramSlicePtr, imm: postIndexImm} + loadMode := m.amodePool.Allocate() + *loadMode = addressMode{kind: addressModeKindPostIndex, rn: paramSlicePtr, imm: postIndexImm} instr := m.allocateInstr() switch typ { case ssa.TypeI32: - instr.asULoad(loadTargetReg, loadMode, 32) + instr.asULoad(loadTargetReg.reg(), loadMode, 32) case ssa.TypeI64: - instr.asULoad(loadTargetReg, loadMode, 64) + instr.asULoad(loadTargetReg.reg(), loadMode, 64) case ssa.TypeF32: - instr.asFpuLoad(loadTargetReg, loadMode, 32) + instr.asFpuLoad(loadTargetReg.reg(), loadMode, 32) case ssa.TypeF64: - instr.asFpuLoad(loadTargetReg, loadMode, 64) + instr.asFpuLoad(loadTargetReg.reg(), loadMode, 64) case ssa.TypeV128: - instr.asFpuLoad(loadTargetReg, loadMode, 128) + instr.asFpuLoad(loadTargetReg.reg(), loadMode, 128) } cur = linkInstr(cur, instr) if isStackArg { - var storeMode addressMode + var storeMode *addressMode cur, storeMode = m.resolveAddressModeForOffsetAndInsert(cur, argStartOffsetFromSP+arg.Offset, bits, spVReg, true) toStack := m.allocateInstr() toStack.asStore(loadTargetReg, storeMode, bits) @@ -113,21 +114,22 @@ func (m *machine) goEntryPreamblePassResult(cur *instruction, resultSlicePtr reg } if isStackArg { - var loadMode addressMode + var loadMode *addressMode cur, loadMode = m.resolveAddressModeForOffsetAndInsert(cur, resultStartOffsetFromSP+result.Offset, bits, spVReg, true) toReg := m.allocateInstr() switch typ { case ssa.TypeI32, ssa.TypeI64: - toReg.asULoad(storeTargetReg, loadMode, bits) + toReg.asULoad(storeTargetReg.reg(), loadMode, bits) case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128: - toReg.asFpuLoad(storeTargetReg, loadMode, bits) + toReg.asFpuLoad(storeTargetReg.reg(), loadMode, bits) default: panic("TODO?") } cur = linkInstr(cur, toReg) } - mode := addressMode{kind: addressModeKindPostIndex, rn: resultSlicePtr, imm: postIndexImm} + mode := m.amodePool.Allocate() + *mode = addressMode{kind: addressModeKindPostIndex, rn: resultSlicePtr, imm: postIndexImm} instr := m.allocateInstr() instr.asStore(storeTargetReg, mode, bits) cur = linkInstr(cur, instr) @@ -214,11 +216,12 @@ func (m *machine) move64(dst, src regalloc.VReg, prev *instruction) *instruction func (m *machine) loadOrStoreAtExecutionContext(d regalloc.VReg, offset wazevoapi.Offset, store bool, prev *instruction) *instruction { instr := m.allocateInstr() - mode := addressMode{kind: addressModeKindRegUnsignedImm12, rn: savedExecutionContextPtr, imm: offset.I64()} + mode := m.amodePool.Allocate() + *mode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: savedExecutionContextPtr, imm: offset.I64()} if store { instr.asStore(operandNR(d), mode, 64) } else { - instr.asULoad(operandNR(d), mode, 64) + instr.asULoad(d, mode, 64) } return linkInstr(prev, instr) } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go index 466b1f9609..99e6bb482d 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go @@ -87,7 +87,8 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig * // Module context is always the second argument. moduleCtrPtr := x1VReg store := m.allocateInstr() - amode := addressMode{kind: addressModeKindRegUnsignedImm12, rn: execCtrPtr, imm: offset} + amode := m.amodePool.Allocate() + *amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: execCtrPtr, imm: offset} store.asStore(operandNR(moduleCtrPtr), amode, 64) cur = linkInstr(cur, store) } @@ -120,11 +121,9 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig * } else { sizeInBits = 64 } - store.asStore(operandNR(v), - addressMode{ - kind: addressModeKindPostIndex, - rn: arg0ret0AddrReg, imm: int64(sizeInBits / 8), - }, sizeInBits) + amode := m.amodePool.Allocate() + *amode = addressMode{kind: addressModeKindPostIndex, rn: arg0ret0AddrReg, imm: int64(sizeInBits / 8)} + store.asStore(operandNR(v), amode, sizeInBits) cur = linkInstr(cur, store) } @@ -139,7 +138,7 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig * frameSizeReg = xzrVReg sliceSizeReg = xzrVReg } - _amode := addressModePreOrPostIndex(spVReg, -16, true) + _amode := addressModePreOrPostIndex(m, spVReg, -16, true) storeP := m.allocateInstr() storeP.asStorePair64(frameSizeReg, sliceSizeReg, _amode) cur = linkInstr(cur, storeP) @@ -165,8 +164,8 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig * cur = m.addsAddOrSubStackPointer(cur, spVReg, frameInfoSize+goCallStackSize, true) ldr := m.allocateInstr() // And load the return address. - ldr.asULoad(operandNR(lrVReg), - addressModePreOrPostIndex(spVReg, 16 /* stack pointer must be 16-byte aligned. */, false /* increment after loads */), 64) + amode := addressModePreOrPostIndex(m, spVReg, 16 /* stack pointer must be 16-byte aligned. */, false /* increment after loads */) + ldr.asULoad(lrVReg, amode, 64) cur = linkInstr(cur, ldr) originalRet0Reg := x17VReg // Caller save, so we can use it for whatever we want. @@ -183,23 +182,24 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig * r := &abi.Rets[i] if r.Kind == backend.ABIArgKindReg { loadIntoReg := m.allocateInstr() - mode := addressMode{kind: addressModeKindPostIndex, rn: arg0ret0AddrReg} + mode := m.amodePool.Allocate() + *mode = addressMode{kind: addressModeKindPostIndex, rn: arg0ret0AddrReg} switch r.Type { case ssa.TypeI32: mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - loadIntoReg.asULoad(operandNR(r.Reg), mode, 32) + loadIntoReg.asULoad(r.Reg, mode, 32) case ssa.TypeI64: mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - loadIntoReg.asULoad(operandNR(r.Reg), mode, 64) + loadIntoReg.asULoad(r.Reg, mode, 64) case ssa.TypeF32: mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - loadIntoReg.asFpuLoad(operandNR(r.Reg), mode, 32) + loadIntoReg.asFpuLoad(r.Reg, mode, 32) case ssa.TypeF64: mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - loadIntoReg.asFpuLoad(operandNR(r.Reg), mode, 64) + loadIntoReg.asFpuLoad(r.Reg, mode, 64) case ssa.TypeV128: mode.imm = 16 - loadIntoReg.asFpuLoad(operandNR(r.Reg), mode, 128) + loadIntoReg.asFpuLoad(r.Reg, mode, 128) default: panic("TODO") } @@ -208,28 +208,29 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig * // First we need to load the value to a temporary just like ^^. intTmp, floatTmp := x11VReg, v11VReg loadIntoTmpReg := m.allocateInstr() - mode := addressMode{kind: addressModeKindPostIndex, rn: arg0ret0AddrReg} + mode := m.amodePool.Allocate() + *mode = addressMode{kind: addressModeKindPostIndex, rn: arg0ret0AddrReg} var resultReg regalloc.VReg switch r.Type { case ssa.TypeI32: mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - loadIntoTmpReg.asULoad(operandNR(intTmp), mode, 32) + loadIntoTmpReg.asULoad(intTmp, mode, 32) resultReg = intTmp case ssa.TypeI64: mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - loadIntoTmpReg.asULoad(operandNR(intTmp), mode, 64) + loadIntoTmpReg.asULoad(intTmp, mode, 64) resultReg = intTmp case ssa.TypeF32: mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - loadIntoTmpReg.asFpuLoad(operandNR(floatTmp), mode, 32) + loadIntoTmpReg.asFpuLoad(floatTmp, mode, 32) resultReg = floatTmp case ssa.TypeF64: mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - loadIntoTmpReg.asFpuLoad(operandNR(floatTmp), mode, 64) + loadIntoTmpReg.asFpuLoad(floatTmp, mode, 64) resultReg = floatTmp case ssa.TypeV128: mode.imm = 16 - loadIntoTmpReg.asFpuLoad(operandNR(floatTmp), mode, 128) + loadIntoTmpReg.asFpuLoad(floatTmp, mode, 128) resultReg = floatTmp default: panic("TODO") @@ -258,12 +259,13 @@ func (m *machine) saveRegistersInExecutionContext(cur *instruction, regs []regal case regalloc.RegTypeFloat: sizeInBits = 128 } - store.asStore(operandNR(v), - addressMode{ - kind: addressModeKindRegUnsignedImm12, - // Execution context is always the first argument. - rn: x0VReg, imm: offset, - }, sizeInBits) + mode := m.amodePool.Allocate() + *mode = addressMode{ + kind: addressModeKindRegUnsignedImm12, + // Execution context is always the first argument. + rn: x0VReg, imm: offset, + } + store.asStore(operandNR(v), mode, sizeInBits) store.prev = cur cur.next = store cur = store @@ -276,7 +278,7 @@ func (m *machine) restoreRegistersInExecutionContext(cur *instruction, regs []re offset := wazevoapi.ExecutionContextOffsetSavedRegistersBegin.I64() for _, v := range regs { load := m.allocateInstr() - var as func(dst operand, amode addressMode, sizeInBits byte) + var as func(dst regalloc.VReg, amode *addressMode, sizeInBits byte) var sizeInBits byte switch v.RegType() { case regalloc.RegTypeInt: @@ -286,12 +288,13 @@ func (m *machine) restoreRegistersInExecutionContext(cur *instruction, regs []re as = load.asFpuLoad sizeInBits = 128 } - as(operandNR(v), - addressMode{ - kind: addressModeKindRegUnsignedImm12, - // Execution context is always the first argument. - rn: x0VReg, imm: offset, - }, sizeInBits) + mode := m.amodePool.Allocate() + *mode = addressMode{ + kind: addressModeKindRegUnsignedImm12, + // Execution context is always the first argument. + rn: x0VReg, imm: offset, + } + as(v, mode, sizeInBits) cur = linkInstr(cur, load) offset += 16 // Imm12 must be aligned 16 for vector regs, so we unconditionally load regs at the offset of multiple of 16. } @@ -324,11 +327,9 @@ func (m *machine) setExitCode(cur *instruction, execCtr regalloc.VReg, exitCode // Set the exit status on the execution context. setExistStatus := m.allocateInstr() - setExistStatus.asStore(operandNR(constReg), - addressMode{ - kind: addressModeKindRegUnsignedImm12, - rn: execCtr, imm: wazevoapi.ExecutionContextOffsetExitCodeOffset.I64(), - }, 32) + mode := m.amodePool.Allocate() + *mode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: execCtr, imm: wazevoapi.ExecutionContextOffsetExitCodeOffset.I64()} + setExistStatus.asStore(operandNR(constReg), mode, 32) cur = linkInstr(cur, setExistStatus) return cur } @@ -340,12 +341,13 @@ func (m *machine) storeReturnAddressAndExit(cur *instruction) *instruction { cur = linkInstr(cur, adr) storeReturnAddr := m.allocateInstr() - storeReturnAddr.asStore(operandNR(tmpRegVReg), - addressMode{ - kind: addressModeKindRegUnsignedImm12, - // Execution context is always the first argument. - rn: x0VReg, imm: wazevoapi.ExecutionContextOffsetGoCallReturnAddress.I64(), - }, 64) + mode := m.amodePool.Allocate() + *mode = addressMode{ + kind: addressModeKindRegUnsignedImm12, + // Execution context is always the first argument. + rn: x0VReg, imm: wazevoapi.ExecutionContextOffsetGoCallReturnAddress.I64(), + } + storeReturnAddr.asStore(operandNR(tmpRegVReg), mode, 64) cur = linkInstr(cur, storeReturnAddr) // Exit the execution. @@ -364,11 +366,12 @@ func (m *machine) saveCurrentStackPointer(cur *instruction, execCtr regalloc.VRe cur = linkInstr(cur, movSp) strSp := m.allocateInstr() - strSp.asStore(operandNR(tmpRegVReg), - addressMode{ - kind: addressModeKindRegUnsignedImm12, - rn: execCtr, imm: wazevoapi.ExecutionContextOffsetStackPointerBeforeGoCall.I64(), - }, 64) + mode := m.amodePool.Allocate() + *mode = addressMode{ + kind: addressModeKindRegUnsignedImm12, + rn: execCtr, imm: wazevoapi.ExecutionContextOffsetStackPointerBeforeGoCall.I64(), + } + strSp.asStore(operandNR(tmpRegVReg), mode, 64) cur = linkInstr(cur, strSp) return cur } @@ -376,27 +379,28 @@ func (m *machine) saveCurrentStackPointer(cur *instruction, execCtr regalloc.VRe func (m *machine) goFunctionCallLoadStackArg(cur *instruction, originalArg0Reg regalloc.VReg, arg *backend.ABIArg, intVReg, floatVReg regalloc.VReg) (*instruction, regalloc.VReg) { load := m.allocateInstr() var result regalloc.VReg - mode := addressMode{kind: addressModeKindPostIndex, rn: originalArg0Reg} + mode := m.amodePool.Allocate() + *mode = addressMode{kind: addressModeKindPostIndex, rn: originalArg0Reg} switch arg.Type { case ssa.TypeI32: mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - load.asULoad(operandNR(intVReg), mode, 32) + load.asULoad(intVReg, mode, 32) result = intVReg case ssa.TypeI64: mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - load.asULoad(operandNR(intVReg), mode, 64) + load.asULoad(intVReg, mode, 64) result = intVReg case ssa.TypeF32: mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - load.asFpuLoad(operandNR(floatVReg), mode, 32) + load.asFpuLoad(floatVReg, mode, 32) result = floatVReg case ssa.TypeF64: mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - load.asFpuLoad(operandNR(floatVReg), mode, 64) + load.asFpuLoad(floatVReg, mode, 64) result = floatVReg case ssa.TypeV128: mode.imm = 16 - load.asFpuLoad(operandNR(floatVReg), mode, 128) + load.asFpuLoad(floatVReg, mode, 128) result = floatVReg default: panic("TODO") @@ -408,7 +412,8 @@ func (m *machine) goFunctionCallLoadStackArg(cur *instruction, originalArg0Reg r func (m *machine) goFunctionCallStoreStackResult(cur *instruction, originalRet0Reg regalloc.VReg, result *backend.ABIArg, resultVReg regalloc.VReg) *instruction { store := m.allocateInstr() - mode := addressMode{kind: addressModeKindPostIndex, rn: originalRet0Reg} + mode := m.amodePool.Allocate() + *mode = addressMode{kind: addressModeKindPostIndex, rn: originalRet0Reg} var sizeInBits byte switch result.Type { case ssa.TypeI32, ssa.TypeF32: diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go index 8aabc5997b..7121cb5382 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go @@ -3,10 +3,12 @@ package arm64 import ( "fmt" "math" + "unsafe" "github.com/tetratelabs/wazero/internal/engine/wazevo/backend" "github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc" "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" + "github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi" ) type ( @@ -22,9 +24,9 @@ type ( // TODO: optimize the layout later once the impl settles. instruction struct { prev, next *instruction - u1, u2, u3 uint64 - rd, rm, rn, ra operand - amode addressMode + u1, u2 uint64 + rd regalloc.VReg + rm, rn operand kind instructionKind addedBeforeRegAlloc bool } @@ -174,7 +176,7 @@ func (i *instruction) Defs(regs *[]regalloc.VReg) []regalloc.VReg { switch defKinds[i.kind] { case defKindNone: case defKindRD: - *regs = append(*regs, i.rd.nr()) + *regs = append(*regs, i.rd) case defKindCall: _, _, retIntRealRegs, retFloatRealRegs, _ := backend.ABIInfoFromUint64(i.u2) for i := byte(0); i < retIntRealRegs; i++ { @@ -194,7 +196,7 @@ func (i *instruction) AssignDef(reg regalloc.VReg) { switch defKinds[i.kind] { case defKindNone: case defKindRD: - i.rd = i.rd.assignReg(reg) + i.rd = reg case defKindCall: panic("BUG: call instructions shouldn't be assigned") default: @@ -329,7 +331,7 @@ func (i *instruction) Uses(regs *[]regalloc.VReg) []regalloc.VReg { if rm := i.rm.reg(); rm.Valid() { *regs = append(*regs, rm) } - if ra := i.ra.reg(); ra.Valid() { + if ra := regalloc.VReg(i.u2); ra.Valid() { *regs = append(*regs, ra) } case useKindRNRN1RM: @@ -341,18 +343,20 @@ func (i *instruction) Uses(regs *[]regalloc.VReg) []regalloc.VReg { *regs = append(*regs, rm) } case useKindAMode: - if amodeRN := i.amode.rn; amodeRN.Valid() { + amode := i.getAmode() + if amodeRN := amode.rn; amodeRN.Valid() { *regs = append(*regs, amodeRN) } - if amodeRM := i.amode.rm; amodeRM.Valid() { + if amodeRM := amode.rm; amodeRM.Valid() { *regs = append(*regs, amodeRM) } case useKindRNAMode: *regs = append(*regs, i.rn.reg()) - if amodeRN := i.amode.rn; amodeRN.Valid() { + amode := i.getAmode() + if amodeRN := amode.rn; amodeRN.Valid() { *regs = append(*regs, amodeRN) } - if amodeRM := i.amode.rm; amodeRM.Valid() { + if amodeRM := amode.rm; amodeRM.Valid() { *regs = append(*regs, amodeRM) } case useKindCond: @@ -374,7 +378,7 @@ func (i *instruction) Uses(regs *[]regalloc.VReg) []regalloc.VReg { case useKindRDRewrite: *regs = append(*regs, i.rn.reg()) *regs = append(*regs, i.rm.reg()) - *regs = append(*regs, i.rd.reg()) + *regs = append(*regs, i.rd) default: panic(fmt.Sprintf("useKind for %v not defined", i)) } @@ -408,8 +412,8 @@ func (i *instruction) AssignUse(index int, reg regalloc.VReg) { i.rm = i.rm.assignReg(reg) } } else { - if rd := i.rd.reg(); rd.Valid() { - i.rd = i.rd.assignReg(reg) + if rd := i.rd; rd.Valid() { + i.rd = reg } } case useKindRNRN1RM: @@ -435,32 +439,36 @@ func (i *instruction) AssignUse(index int, reg regalloc.VReg) { i.rm = i.rm.assignReg(reg) } } else { - if ra := i.ra.reg(); ra.Valid() { - i.ra = i.ra.assignReg(reg) + if ra := regalloc.VReg(i.u2); ra.Valid() { + i.u2 = uint64(reg) } } case useKindAMode: if index == 0 { - if amodeRN := i.amode.rn; amodeRN.Valid() { - i.amode.rn = reg + amode := i.getAmode() + if amodeRN := amode.rn; amodeRN.Valid() { + amode.rn = reg } } else { - if amodeRM := i.amode.rm; amodeRM.Valid() { - i.amode.rm = reg + amode := i.getAmode() + if amodeRM := amode.rm; amodeRM.Valid() { + amode.rm = reg } } case useKindRNAMode: if index == 0 { i.rn = i.rn.assignReg(reg) } else if index == 1 { - if amodeRN := i.amode.rn; amodeRN.Valid() { - i.amode.rn = reg + amode := i.getAmode() + if amodeRN := amode.rn; amodeRN.Valid() { + amode.rn = reg } else { panic("BUG") } } else { - if amodeRM := i.amode.rm; amodeRM.Valid() { - i.amode.rm = reg + amode := i.getAmode() + if amodeRM := amode.rm; amodeRM.Valid() { + amode.rm = reg } else { panic("BUG") } @@ -503,35 +511,35 @@ func (i *instruction) callFuncRef() ssa.FuncRef { } // shift must be divided by 16 and must be in range 0-3 (if dst64bit is true) or 0-1 (if dst64bit is false) -func (i *instruction) asMOVZ(dst regalloc.VReg, imm uint64, shift uint64, dst64bit bool) { +func (i *instruction) asMOVZ(dst regalloc.VReg, imm uint64, shift uint32, dst64bit bool) { i.kind = movZ - i.rd = operandNR(dst) + i.rd = dst i.u1 = imm - i.u2 = shift + i.u2 = uint64(shift) if dst64bit { - i.u3 = 1 + i.u2 |= 1 << 32 } } // shift must be divided by 16 and must be in range 0-3 (if dst64bit is true) or 0-1 (if dst64bit is false) -func (i *instruction) asMOVK(dst regalloc.VReg, imm uint64, shift uint64, dst64bit bool) { +func (i *instruction) asMOVK(dst regalloc.VReg, imm uint64, shift uint32, dst64bit bool) { i.kind = movK - i.rd = operandNR(dst) + i.rd = dst i.u1 = imm - i.u2 = shift + i.u2 = uint64(shift) if dst64bit { - i.u3 = 1 + i.u2 |= 1 << 32 } } // shift must be divided by 16 and must be in range 0-3 (if dst64bit is true) or 0-1 (if dst64bit is false) -func (i *instruction) asMOVN(dst regalloc.VReg, imm uint64, shift uint64, dst64bit bool) { +func (i *instruction) asMOVN(dst regalloc.VReg, imm uint64, shift uint32, dst64bit bool) { i.kind = movN - i.rd = operandNR(dst) + i.rd = dst i.u1 = imm - i.u2 = shift + i.u2 = uint64(shift) if dst64bit { - i.u3 = 1 + i.u2 |= 1 << 32 } } @@ -553,21 +561,21 @@ func (i *instruction) asRet() { i.kind = ret } -func (i *instruction) asStorePair64(src1, src2 regalloc.VReg, amode addressMode) { +func (i *instruction) asStorePair64(src1, src2 regalloc.VReg, amode *addressMode) { i.kind = storeP64 i.rn = operandNR(src1) i.rm = operandNR(src2) - i.amode = amode + i.setAmode(amode) } -func (i *instruction) asLoadPair64(src1, src2 regalloc.VReg, amode addressMode) { +func (i *instruction) asLoadPair64(src1, src2 regalloc.VReg, amode *addressMode) { i.kind = loadP64 i.rn = operandNR(src1) i.rm = operandNR(src2) - i.amode = amode + i.setAmode(amode) } -func (i *instruction) asStore(src operand, amode addressMode, sizeInBits byte) { +func (i *instruction) asStore(src operand, amode *addressMode, sizeInBits byte) { switch sizeInBits { case 8: i.kind = store8 @@ -589,10 +597,10 @@ func (i *instruction) asStore(src operand, amode addressMode, sizeInBits byte) { i.kind = fpuStore128 } i.rn = src - i.amode = amode + i.setAmode(amode) } -func (i *instruction) asSLoad(dst operand, amode addressMode, sizeInBits byte) { +func (i *instruction) asSLoad(dst regalloc.VReg, amode *addressMode, sizeInBits byte) { switch sizeInBits { case 8: i.kind = sLoad8 @@ -604,10 +612,10 @@ func (i *instruction) asSLoad(dst operand, amode addressMode, sizeInBits byte) { panic("BUG") } i.rd = dst - i.amode = amode + i.setAmode(amode) } -func (i *instruction) asULoad(dst operand, amode addressMode, sizeInBits byte) { +func (i *instruction) asULoad(dst regalloc.VReg, amode *addressMode, sizeInBits byte) { switch sizeInBits { case 8: i.kind = uLoad8 @@ -619,10 +627,10 @@ func (i *instruction) asULoad(dst operand, amode addressMode, sizeInBits byte) { i.kind = uLoad64 } i.rd = dst - i.amode = amode + i.setAmode(amode) } -func (i *instruction) asFpuLoad(dst operand, amode addressMode, sizeInBits byte) { +func (i *instruction) asFpuLoad(dst regalloc.VReg, amode *addressMode, sizeInBits byte) { switch sizeInBits { case 32: i.kind = fpuLoad32 @@ -632,10 +640,18 @@ func (i *instruction) asFpuLoad(dst operand, amode addressMode, sizeInBits byte) i.kind = fpuLoad128 } i.rd = dst - i.amode = amode + i.setAmode(amode) } -func (i *instruction) asVecLoad1R(rd, rn operand, arr vecArrangement) { +func (i *instruction) getAmode() *addressMode { + return wazevoapi.PtrFromUintptr[addressMode](uintptr(i.u1)) +} + +func (i *instruction) setAmode(a *addressMode) { + i.u1 = uint64(uintptr(unsafe.Pointer(a))) +} + +func (i *instruction) asVecLoad1R(rd regalloc.VReg, rn operand, arr vecArrangement) { // NOTE: currently only has support for no-offset loads, though it is suspicious that // we would need to support offset load (that is only available for post-index). i.kind = vecLoad1R @@ -646,32 +662,32 @@ func (i *instruction) asVecLoad1R(rd, rn operand, arr vecArrangement) { func (i *instruction) asCSet(rd regalloc.VReg, mask bool, c condFlag) { i.kind = cSet - i.rd = operandNR(rd) + i.rd = rd i.u1 = uint64(c) if mask { i.u2 = 1 } } -func (i *instruction) asCSel(rd, rn, rm operand, c condFlag, _64bit bool) { +func (i *instruction) asCSel(rd regalloc.VReg, rn, rm operand, c condFlag, _64bit bool) { i.kind = cSel i.rd = rd i.rn = rn i.rm = rm i.u1 = uint64(c) if _64bit { - i.u3 = 1 + i.u2 = 1 } } -func (i *instruction) asFpuCSel(rd, rn, rm operand, c condFlag, _64bit bool) { +func (i *instruction) asFpuCSel(rd regalloc.VReg, rn, rm operand, c condFlag, _64bit bool) { i.kind = fpuCSel i.rd = rd i.rn = rn i.rm = rm i.u1 = uint64(c) if _64bit { - i.u3 = 1 + i.u2 = 1 } } @@ -691,7 +707,7 @@ func (i *instruction) asBrTableSequence(indexReg regalloc.VReg, targetIndex, tar } func (i *instruction) brTableSequenceOffsetsResolved() { - i.u3 = 1 // indicate that the offsets are resolved, for debugging. + i.rm.data = 1 // indicate that the offsets are resolved, for debugging. } func (i *instruction) brLabel() label { @@ -701,7 +717,7 @@ func (i *instruction) brLabel() label { // brOffsetResolved is called when the target label is resolved. func (i *instruction) brOffsetResolve(offset int64) { i.u2 = uint64(offset) - i.u3 = 1 // indicate that the offset is resolved, for debugging. + i.rm.data = 1 // indicate that the offset is resolved, for debugging. } func (i *instruction) brOffset() int64 { @@ -714,7 +730,7 @@ func (i *instruction) asCondBr(c cond, target label, is64bit bool) { i.u1 = c.asUint64() i.u2 = uint64(target) if is64bit { - i.u3 = 1 + i.u2 |= 1 << 32 } } @@ -728,17 +744,17 @@ func (i *instruction) condBrLabel() label { // condBrOffsetResolve is called when the target label is resolved. func (i *instruction) condBrOffsetResolve(offset int64) { - i.rd.data = uint64(offset) - i.rd.data2 = 1 // indicate that the offset is resolved, for debugging. + i.rn.data = uint64(offset) + i.rn.data2 = 1 // indicate that the offset is resolved, for debugging. } // condBrOffsetResolved returns true if condBrOffsetResolve is already called. func (i *instruction) condBrOffsetResolved() bool { - return i.rd.data2 == 1 + return i.rn.data2 == 1 } func (i *instruction) condBrOffset() int64 { - return int64(i.rd.data) + return int64(i.rn.data) } func (i *instruction) condBrCond() cond { @@ -746,33 +762,33 @@ func (i *instruction) condBrCond() cond { } func (i *instruction) condBr64bit() bool { - return i.u3 == 1 + return i.u2&(1<<32) != 0 } func (i *instruction) asLoadFpuConst32(rd regalloc.VReg, raw uint64) { i.kind = loadFpuConst32 i.u1 = raw - i.rd = operandNR(rd) + i.rd = rd } func (i *instruction) asLoadFpuConst64(rd regalloc.VReg, raw uint64) { i.kind = loadFpuConst64 i.u1 = raw - i.rd = operandNR(rd) + i.rd = rd } func (i *instruction) asLoadFpuConst128(rd regalloc.VReg, lo, hi uint64) { i.kind = loadFpuConst128 i.u1 = lo i.u2 = hi - i.rd = operandNR(rd) + i.rd = rd } func (i *instruction) asFpuCmp(rn, rm operand, is64bit bool) { i.kind = fpuCmp i.rn, i.rm = rn, rm if is64bit { - i.u3 = 1 + i.u1 = 1 } } @@ -783,12 +799,12 @@ func (i *instruction) asCCmpImm(rn operand, imm uint64, c condFlag, flag byte, i i.u1 = uint64(c) i.u2 = uint64(flag) if is64bit { - i.u3 = 1 + i.u2 |= 1 << 32 } } // asALU setups a basic ALU instruction. -func (i *instruction) asALU(aluOp aluOp, rd, rn, rm operand, dst64bit bool) { +func (i *instruction) asALU(aluOp aluOp, rd regalloc.VReg, rn, rm operand, dst64bit bool) { switch rm.kind { case operandKindNR: i.kind = aluRRR @@ -804,22 +820,22 @@ func (i *instruction) asALU(aluOp aluOp, rd, rn, rm operand, dst64bit bool) { i.u1 = uint64(aluOp) i.rd, i.rn, i.rm = rd, rn, rm if dst64bit { - i.u3 = 1 + i.u2 |= 1 << 32 } } // asALU setups a basic ALU instruction. -func (i *instruction) asALURRRR(aluOp aluOp, rd, rn, rm, ra operand, dst64bit bool) { +func (i *instruction) asALURRRR(aluOp aluOp, rd regalloc.VReg, rn, rm operand, ra regalloc.VReg, dst64bit bool) { i.kind = aluRRRR i.u1 = uint64(aluOp) - i.rd, i.rn, i.rm, i.ra = rd, rn, rm, ra + i.rd, i.rn, i.rm, i.u2 = rd, rn, rm, uint64(ra) if dst64bit { - i.u3 = 1 + i.u1 |= 1 << 32 } } // asALUShift setups a shift based ALU instruction. -func (i *instruction) asALUShift(aluOp aluOp, rd, rn, rm operand, dst64bit bool) { +func (i *instruction) asALUShift(aluOp aluOp, rd regalloc.VReg, rn, rm operand, dst64bit bool) { switch rm.kind { case operandKindNR: i.kind = aluRRR // If the shift amount op is a register, then the instruction is encoded as a normal ALU instruction with two register operands. @@ -831,17 +847,17 @@ func (i *instruction) asALUShift(aluOp aluOp, rd, rn, rm operand, dst64bit bool) i.u1 = uint64(aluOp) i.rd, i.rn, i.rm = rd, rn, rm if dst64bit { - i.u3 = 1 + i.u2 |= 1 << 32 } } func (i *instruction) asALUBitmaskImm(aluOp aluOp, rd, rn regalloc.VReg, imm uint64, dst64bit bool) { i.kind = aluRRBitmaskImm i.u1 = uint64(aluOp) - i.rn, i.rd = operandNR(rn), operandNR(rd) + i.rn, i.rd = operandNR(rn), rd i.u2 = imm if dst64bit { - i.u3 = 1 + i.u1 |= 1 << 32 } } @@ -852,76 +868,76 @@ func (i *instruction) asMovToFPSR(rn regalloc.VReg) { func (i *instruction) asMovFromFPSR(rd regalloc.VReg) { i.kind = movFromFPSR - i.rd = operandNR(rd) + i.rd = rd } func (i *instruction) asBitRR(bitOp bitOp, rd, rn regalloc.VReg, is64bit bool) { i.kind = bitRR - i.rn, i.rd = operandNR(rn), operandNR(rd) + i.rn, i.rd = operandNR(rn), rd i.u1 = uint64(bitOp) if is64bit { i.u2 = 1 } } -func (i *instruction) asFpuRRR(op fpuBinOp, rd, rn, rm operand, dst64bit bool) { +func (i *instruction) asFpuRRR(op fpuBinOp, rd regalloc.VReg, rn, rm operand, dst64bit bool) { i.kind = fpuRRR i.u1 = uint64(op) i.rd, i.rn, i.rm = rd, rn, rm if dst64bit { - i.u3 = 1 + i.u2 = 1 } } -func (i *instruction) asFpuRR(op fpuUniOp, rd, rn operand, dst64bit bool) { +func (i *instruction) asFpuRR(op fpuUniOp, rd regalloc.VReg, rn operand, dst64bit bool) { i.kind = fpuRR i.u1 = uint64(op) i.rd, i.rn = rd, rn if dst64bit { - i.u3 = 1 + i.u2 = 1 } } func (i *instruction) asExtend(rd, rn regalloc.VReg, fromBits, toBits byte, signed bool) { i.kind = extend - i.rn, i.rd = operandNR(rn), operandNR(rd) + i.rn, i.rd = operandNR(rn), rd i.u1 = uint64(fromBits) i.u2 = uint64(toBits) if signed { - i.u3 = 1 + i.u2 |= 1 << 32 } } func (i *instruction) asMove32(rd, rn regalloc.VReg) { i.kind = mov32 - i.rn, i.rd = operandNR(rn), operandNR(rd) + i.rn, i.rd = operandNR(rn), rd } func (i *instruction) asMove64(rd, rn regalloc.VReg) *instruction { i.kind = mov64 - i.rn, i.rd = operandNR(rn), operandNR(rd) + i.rn, i.rd = operandNR(rn), rd return i } func (i *instruction) asFpuMov64(rd, rn regalloc.VReg) { i.kind = fpuMov64 - i.rn, i.rd = operandNR(rn), operandNR(rd) + i.rn, i.rd = operandNR(rn), rd } func (i *instruction) asFpuMov128(rd, rn regalloc.VReg) *instruction { i.kind = fpuMov128 - i.rn, i.rd = operandNR(rn), operandNR(rd) + i.rn, i.rd = operandNR(rn), rd return i } -func (i *instruction) asMovToVec(rd, rn operand, arr vecArrangement, index vecIndex) { +func (i *instruction) asMovToVec(rd regalloc.VReg, rn operand, arr vecArrangement, index vecIndex) { i.kind = movToVec i.rd = rd i.rn = rn i.u1, i.u2 = uint64(arr), uint64(index) } -func (i *instruction) asMovFromVec(rd, rn operand, arr vecArrangement, index vecIndex, signed bool) { +func (i *instruction) asMovFromVec(rd regalloc.VReg, rn operand, arr vecArrangement, index vecIndex, signed bool) { if signed { i.kind = movFromVecSigned } else { @@ -932,48 +948,48 @@ func (i *instruction) asMovFromVec(rd, rn operand, arr vecArrangement, index vec i.u1, i.u2 = uint64(arr), uint64(index) } -func (i *instruction) asVecDup(rd, rn operand, arr vecArrangement) { +func (i *instruction) asVecDup(rd regalloc.VReg, rn operand, arr vecArrangement) { i.kind = vecDup i.u1 = uint64(arr) i.rn, i.rd = rn, rd } -func (i *instruction) asVecDupElement(rd, rn operand, arr vecArrangement, index vecIndex) { +func (i *instruction) asVecDupElement(rd regalloc.VReg, rn operand, arr vecArrangement, index vecIndex) { i.kind = vecDupElement i.u1 = uint64(arr) i.rn, i.rd = rn, rd i.u2 = uint64(index) } -func (i *instruction) asVecExtract(rd, rn, rm operand, arr vecArrangement, index uint32) { +func (i *instruction) asVecExtract(rd regalloc.VReg, rn, rm operand, arr vecArrangement, index uint32) { i.kind = vecExtract i.u1 = uint64(arr) i.rn, i.rm, i.rd = rn, rm, rd i.u2 = uint64(index) } -func (i *instruction) asVecMovElement(rd, rn operand, arr vecArrangement, rdIndex, rnIndex vecIndex) { +func (i *instruction) asVecMovElement(rd regalloc.VReg, rn operand, arr vecArrangement, rdIndex, rnIndex vecIndex) { i.kind = vecMovElement i.u1 = uint64(arr) - i.u2, i.u3 = uint64(rdIndex), uint64(rnIndex) + i.u2 = uint64(rdIndex) | uint64(rnIndex)<<32 i.rn, i.rd = rn, rd } -func (i *instruction) asVecMisc(op vecOp, rd, rn operand, arr vecArrangement) { +func (i *instruction) asVecMisc(op vecOp, rd regalloc.VReg, rn operand, arr vecArrangement) { i.kind = vecMisc i.u1 = uint64(op) i.rn, i.rd = rn, rd i.u2 = uint64(arr) } -func (i *instruction) asVecLanes(op vecOp, rd, rn operand, arr vecArrangement) { +func (i *instruction) asVecLanes(op vecOp, rd regalloc.VReg, rn operand, arr vecArrangement) { i.kind = vecLanes i.u1 = uint64(op) i.rn, i.rd = rn, rd i.u2 = uint64(arr) } -func (i *instruction) asVecShiftImm(op vecOp, rd, rn, rm operand, arr vecArrangement) *instruction { +func (i *instruction) asVecShiftImm(op vecOp, rd regalloc.VReg, rn, rm operand, arr vecArrangement) *instruction { i.kind = vecShiftImm i.u1 = uint64(op) i.rn, i.rm, i.rd = rn, rm, rd @@ -981,7 +997,7 @@ func (i *instruction) asVecShiftImm(op vecOp, rd, rn, rm operand, arr vecArrange return i } -func (i *instruction) asVecTbl(nregs byte, rd, rn, rm operand, arr vecArrangement) { +func (i *instruction) asVecTbl(nregs byte, rd regalloc.VReg, rn, rm operand, arr vecArrangement) { switch nregs { case 0, 1: i.kind = vecTbl @@ -1000,14 +1016,14 @@ func (i *instruction) asVecTbl(nregs byte, rd, rn, rm operand, arr vecArrangemen i.u2 = uint64(arr) } -func (i *instruction) asVecPermute(op vecOp, rd, rn, rm operand, arr vecArrangement) { +func (i *instruction) asVecPermute(op vecOp, rd regalloc.VReg, rn, rm operand, arr vecArrangement) { i.kind = vecPermute i.u1 = uint64(op) i.rn, i.rm, i.rd = rn, rm, rd i.u2 = uint64(arr) } -func (i *instruction) asVecRRR(op vecOp, rd, rn, rm operand, arr vecArrangement) *instruction { +func (i *instruction) asVecRRR(op vecOp, rd regalloc.VReg, rn, rm operand, arr vecArrangement) *instruction { i.kind = vecRRR i.u1 = uint64(op) i.rn, i.rd, i.rm = rn, rd, rm @@ -1017,7 +1033,7 @@ func (i *instruction) asVecRRR(op vecOp, rd, rn, rm operand, arr vecArrangement) // asVecRRRRewrite encodes a vector instruction that rewrites the destination register. // IMPORTANT: the destination register must be already defined before this instruction. -func (i *instruction) asVecRRRRewrite(op vecOp, rd, rn, rm operand, arr vecArrangement) { +func (i *instruction) asVecRRRRewrite(op vecOp, rd regalloc.VReg, rn, rm operand, arr vecArrangement) { i.kind = vecRRRRewrite i.u1 = uint64(op) i.rn, i.rd, i.rm = rn, rd, rm @@ -1033,8 +1049,8 @@ func (i *instruction) IsCopy() bool { // String implements fmt.Stringer. func (i *instruction) String() (str string) { - is64SizeBitToSize := func(u3 uint64) byte { - if u3 == 0 { + is64SizeBitToSize := func(v uint64) byte { + if v == 0 { return 32 } return 64 @@ -1049,46 +1065,46 @@ func (i *instruction) String() (str string) { str = "nop0" } case aluRRR: - size := is64SizeBitToSize(i.u3) + size := is64SizeBitToSize(i.u2 >> 32) str = fmt.Sprintf("%s %s, %s, %s", aluOp(i.u1).String(), - formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), + formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size), i.rm.format(size)) case aluRRRR: - size := is64SizeBitToSize(i.u3) + size := is64SizeBitToSize(i.u1 >> 32) str = fmt.Sprintf("%s %s, %s, %s, %s", aluOp(i.u1).String(), - formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size), formatVRegSized(i.ra.nr(), size)) + formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size), formatVRegSized(regalloc.VReg(i.u2), size)) case aluRRImm12: - size := is64SizeBitToSize(i.u3) + size := is64SizeBitToSize(i.u2 >> 32) str = fmt.Sprintf("%s %s, %s, %s", aluOp(i.u1).String(), - formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), i.rm.format(size)) + formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size), i.rm.format(size)) case aluRRBitmaskImm: - size := is64SizeBitToSize(i.u3) - rd, rn := formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size) + size := is64SizeBitToSize(i.u1 >> 32) + rd, rn := formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size) if size == 32 { str = fmt.Sprintf("%s %s, %s, #%#x", aluOp(i.u1).String(), rd, rn, uint32(i.u2)) } else { str = fmt.Sprintf("%s %s, %s, #%#x", aluOp(i.u1).String(), rd, rn, i.u2) } case aluRRImmShift: - size := is64SizeBitToSize(i.u3) + size := is64SizeBitToSize(i.u2 >> 32) str = fmt.Sprintf("%s %s, %s, %#x", aluOp(i.u1).String(), - formatVRegSized(i.rd.nr(), size), + formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size), i.rm.shiftImm(), ) case aluRRRShift: - size := is64SizeBitToSize(i.u3) + size := is64SizeBitToSize(i.u2 >> 32) str = fmt.Sprintf("%s %s, %s, %s", aluOp(i.u1).String(), - formatVRegSized(i.rd.nr(), size), + formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size), i.rm.format(size), ) case aluRRRExtend: - size := is64SizeBitToSize(i.u3) + size := is64SizeBitToSize(i.u2 >> 32) str = fmt.Sprintf("%s %s, %s, %s", aluOp(i.u1).String(), - formatVRegSized(i.rd.nr(), size), + formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size), // Regardless of the source size, the register is formatted in 32-bit. i.rm.format(32), @@ -1097,57 +1113,57 @@ func (i *instruction) String() (str string) { size := is64SizeBitToSize(i.u2) str = fmt.Sprintf("%s %s, %s", bitOp(i.u1), - formatVRegSized(i.rd.nr(), size), + formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size), ) case uLoad8: - str = fmt.Sprintf("ldrb %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) + str = fmt.Sprintf("ldrb %s, %s", formatVRegSized(i.rd, 32), i.getAmode().format(32)) case sLoad8: - str = fmt.Sprintf("ldrsb %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) + str = fmt.Sprintf("ldrsb %s, %s", formatVRegSized(i.rd, 32), i.getAmode().format(32)) case uLoad16: - str = fmt.Sprintf("ldrh %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) + str = fmt.Sprintf("ldrh %s, %s", formatVRegSized(i.rd, 32), i.getAmode().format(32)) case sLoad16: - str = fmt.Sprintf("ldrsh %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) + str = fmt.Sprintf("ldrsh %s, %s", formatVRegSized(i.rd, 32), i.getAmode().format(32)) case uLoad32: - str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) + str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd, 32), i.getAmode().format(32)) case sLoad32: - str = fmt.Sprintf("ldrs %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) + str = fmt.Sprintf("ldrs %s, %s", formatVRegSized(i.rd, 32), i.getAmode().format(32)) case uLoad64: - str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 64), i.amode.format(64)) + str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd, 64), i.getAmode().format(64)) case store8: - str = fmt.Sprintf("strb %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(8)) + str = fmt.Sprintf("strb %s, %s", formatVRegSized(i.rn.nr(), 32), i.getAmode().format(8)) case store16: - str = fmt.Sprintf("strh %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(16)) + str = fmt.Sprintf("strh %s, %s", formatVRegSized(i.rn.nr(), 32), i.getAmode().format(16)) case store32: - str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(32)) + str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 32), i.getAmode().format(32)) case store64: - str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 64), i.amode.format(64)) + str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 64), i.getAmode().format(64)) case storeP64: str = fmt.Sprintf("stp %s, %s, %s", - formatVRegSized(i.rn.nr(), 64), formatVRegSized(i.rm.nr(), 64), i.amode.format(64)) + formatVRegSized(i.rn.nr(), 64), formatVRegSized(i.rm.nr(), 64), i.getAmode().format(64)) case loadP64: str = fmt.Sprintf("ldp %s, %s, %s", - formatVRegSized(i.rn.nr(), 64), formatVRegSized(i.rm.nr(), 64), i.amode.format(64)) + formatVRegSized(i.rn.nr(), 64), formatVRegSized(i.rm.nr(), 64), i.getAmode().format(64)) case mov64: str = fmt.Sprintf("mov %s, %s", - formatVRegSized(i.rd.nr(), 64), + formatVRegSized(i.rd, 64), formatVRegSized(i.rn.nr(), 64)) case mov32: - str = fmt.Sprintf("mov %s, %s", formatVRegSized(i.rd.nr(), 32), formatVRegSized(i.rn.nr(), 32)) + str = fmt.Sprintf("mov %s, %s", formatVRegSized(i.rd, 32), formatVRegSized(i.rn.nr(), 32)) case movZ: - size := is64SizeBitToSize(i.u3) - str = fmt.Sprintf("movz %s, #%#x, lsl %d", formatVRegSized(i.rd.nr(), size), uint16(i.u1), i.u2*16) + size := is64SizeBitToSize(i.u2 >> 32) + str = fmt.Sprintf("movz %s, #%#x, lsl %d", formatVRegSized(i.rd, size), uint16(i.u1), uint32(i.u2)*16) case movN: - size := is64SizeBitToSize(i.u3) - str = fmt.Sprintf("movn %s, #%#x, lsl %d", formatVRegSized(i.rd.nr(), size), uint16(i.u1), i.u2*16) + size := is64SizeBitToSize(i.u2 >> 32) + str = fmt.Sprintf("movn %s, #%#x, lsl %d", formatVRegSized(i.rd, size), uint16(i.u1), uint32(i.u2)*16) case movK: - size := is64SizeBitToSize(i.u3) - str = fmt.Sprintf("movk %s, #%#x, lsl %d", formatVRegSized(i.rd.nr(), size), uint16(i.u1), i.u2*16) + size := is64SizeBitToSize(i.u2 >> 32) + str = fmt.Sprintf("movk %s, #%#x, lsl %d", formatVRegSized(i.rd, size), uint16(i.u1), uint32(i.u2)*16) case extend: fromBits, toBits := byte(i.u1), byte(i.u2) var signedStr string - if i.u3 == 1 { + if i.u2>>32 == 1 { signedStr = "s" } else { signedStr = "u" @@ -1161,39 +1177,39 @@ func (i *instruction) String() (str string) { case 32: fromStr = "w" } - str = fmt.Sprintf("%sxt%s %s, %s", signedStr, fromStr, formatVRegSized(i.rd.nr(), toBits), formatVRegSized(i.rn.nr(), 32)) + str = fmt.Sprintf("%sxt%s %s, %s", signedStr, fromStr, formatVRegSized(i.rd, toBits), formatVRegSized(i.rn.nr(), 32)) case cSel: - size := is64SizeBitToSize(i.u3) + size := is64SizeBitToSize(i.u2) str = fmt.Sprintf("csel %s, %s, %s, %s", - formatVRegSized(i.rd.nr(), size), + formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size), condFlag(i.u1), ) case cSet: if i.u2 != 0 { - str = fmt.Sprintf("csetm %s, %s", formatVRegSized(i.rd.nr(), 64), condFlag(i.u1)) + str = fmt.Sprintf("csetm %s, %s", formatVRegSized(i.rd, 64), condFlag(i.u1)) } else { - str = fmt.Sprintf("cset %s, %s", formatVRegSized(i.rd.nr(), 64), condFlag(i.u1)) + str = fmt.Sprintf("cset %s, %s", formatVRegSized(i.rd, 64), condFlag(i.u1)) } case cCmpImm: - size := is64SizeBitToSize(i.u3) + size := is64SizeBitToSize(i.u2 >> 32) str = fmt.Sprintf("ccmp %s, #%#x, #%#x, %s", formatVRegSized(i.rn.nr(), size), i.rm.data, i.u2&0b1111, condFlag(i.u1)) case fpuMov64: str = fmt.Sprintf("mov %s, %s", - formatVRegVec(i.rd.nr(), vecArrangement8B, vecIndexNone), + formatVRegVec(i.rd, vecArrangement8B, vecIndexNone), formatVRegVec(i.rn.nr(), vecArrangement8B, vecIndexNone)) case fpuMov128: str = fmt.Sprintf("mov %s, %s", - formatVRegVec(i.rd.nr(), vecArrangement16B, vecIndexNone), + formatVRegVec(i.rd, vecArrangement16B, vecIndexNone), formatVRegVec(i.rn.nr(), vecArrangement16B, vecIndexNone)) case fpuMovFromVec: panic("TODO") case fpuRR: - dstSz := is64SizeBitToSize(i.u3) + dstSz := is64SizeBitToSize(i.u2) srcSz := dstSz op := fpuUniOp(i.u1) switch op { @@ -1203,38 +1219,38 @@ func (i *instruction) String() (str string) { srcSz = 64 } str = fmt.Sprintf("%s %s, %s", op.String(), - formatVRegSized(i.rd.nr(), dstSz), formatVRegSized(i.rn.nr(), srcSz)) + formatVRegSized(i.rd, dstSz), formatVRegSized(i.rn.nr(), srcSz)) case fpuRRR: - size := is64SizeBitToSize(i.u3) + size := is64SizeBitToSize(i.u2) str = fmt.Sprintf("%s %s, %s, %s", fpuBinOp(i.u1).String(), - formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size)) + formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size)) case fpuRRI: panic("TODO") case fpuRRRR: panic("TODO") case fpuCmp: - size := is64SizeBitToSize(i.u3) + size := is64SizeBitToSize(i.u1) str = fmt.Sprintf("fcmp %s, %s", formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size)) case fpuLoad32: - str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) + str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd, 32), i.getAmode().format(32)) case fpuStore32: - str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(64)) + str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 32), i.getAmode().format(64)) case fpuLoad64: - str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 64), i.amode.format(64)) + str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd, 64), i.getAmode().format(64)) case fpuStore64: - str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 64), i.amode.format(64)) + str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 64), i.getAmode().format(64)) case fpuLoad128: - str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 128), i.amode.format(64)) + str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd, 128), i.getAmode().format(64)) case fpuStore128: - str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 128), i.amode.format(64)) + str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 128), i.getAmode().format(64)) case loadFpuConst32: - str = fmt.Sprintf("ldr %s, #8; b 8; data.f32 %f", formatVRegSized(i.rd.nr(), 32), math.Float32frombits(uint32(i.u1))) + str = fmt.Sprintf("ldr %s, #8; b 8; data.f32 %f", formatVRegSized(i.rd, 32), math.Float32frombits(uint32(i.u1))) case loadFpuConst64: - str = fmt.Sprintf("ldr %s, #8; b 16; data.f64 %f", formatVRegSized(i.rd.nr(), 64), math.Float64frombits(i.u1)) + str = fmt.Sprintf("ldr %s, #8; b 16; data.f64 %f", formatVRegSized(i.rd, 64), math.Float64frombits(i.u1)) case loadFpuConst128: str = fmt.Sprintf("ldr %s, #8; b 32; data.v128 %016x %016x", - formatVRegSized(i.rd.nr(), 128), i.u1, i.u2) + formatVRegSized(i.rd, 128), i.u1, i.u2) case fpuToInt: var op, src, dst string if signed := i.u1 == 1; signed { @@ -1242,15 +1258,15 @@ func (i *instruction) String() (str string) { } else { op = "fcvtzu" } - if src64 := i.u2 == 1; src64 { + if src64 := i.u2&1 != 0; src64 { src = formatVRegWidthVec(i.rn.nr(), vecArrangementD) } else { src = formatVRegWidthVec(i.rn.nr(), vecArrangementS) } - if dst64 := i.u3 == 1; dst64 { - dst = formatVRegSized(i.rd.nr(), 64) + if dst64 := i.u2&2 != 0; dst64 { + dst = formatVRegSized(i.rd, 64) } else { - dst = formatVRegSized(i.rd.nr(), 32) + dst = formatVRegSized(i.rd, 32) } str = fmt.Sprintf("%s %s, %s", op, dst, src) @@ -1261,21 +1277,21 @@ func (i *instruction) String() (str string) { } else { op = "ucvtf" } - if src64 := i.u2 == 1; src64 { + if src64 := i.u2&1 != 0; src64 { src = formatVRegSized(i.rn.nr(), 64) } else { src = formatVRegSized(i.rn.nr(), 32) } - if dst64 := i.u3 == 1; dst64 { - dst = formatVRegWidthVec(i.rd.nr(), vecArrangementD) + if dst64 := i.u2&2 != 0; dst64 { + dst = formatVRegWidthVec(i.rd, vecArrangementD) } else { - dst = formatVRegWidthVec(i.rd.nr(), vecArrangementS) + dst = formatVRegWidthVec(i.rd, vecArrangementS) } str = fmt.Sprintf("%s %s, %s", op, dst, src) case fpuCSel: - size := is64SizeBitToSize(i.u3) + size := is64SizeBitToSize(i.u2) str = fmt.Sprintf("fcsel %s, %s, %s, %s", - formatVRegSized(i.rd.nr(), size), + formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size), condFlag(i.u1), @@ -1291,7 +1307,7 @@ func (i *instruction) String() (str string) { default: panic("unsupported arrangement " + arr.String()) } - str = fmt.Sprintf("ins %s, %s", formatVRegVec(i.rd.nr(), arr, vecIndex(i.u2)), formatVRegSized(i.rn.nr(), size)) + str = fmt.Sprintf("ins %s, %s", formatVRegVec(i.rd, arr, vecIndex(i.u2)), formatVRegSized(i.rn.nr(), size)) case movFromVec, movFromVecSigned: var size byte var opcode string @@ -1315,23 +1331,23 @@ func (i *instruction) String() (str string) { default: panic("unsupported arrangement " + arr.String()) } - str = fmt.Sprintf("%s %s, %s", opcode, formatVRegSized(i.rd.nr(), size), formatVRegVec(i.rn.nr(), arr, vecIndex(i.u2))) + str = fmt.Sprintf("%s %s, %s", opcode, formatVRegSized(i.rd, size), formatVRegVec(i.rn.nr(), arr, vecIndex(i.u2))) case vecDup: str = fmt.Sprintf("dup %s, %s", - formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndexNone), + formatVRegVec(i.rd, vecArrangement(i.u1), vecIndexNone), formatVRegSized(i.rn.nr(), 64), ) case vecDupElement: arr := vecArrangement(i.u1) str = fmt.Sprintf("dup %s, %s", - formatVRegVec(i.rd.nr(), arr, vecIndexNone), + formatVRegVec(i.rd, arr, vecIndexNone), formatVRegVec(i.rn.nr(), arr, vecIndex(i.u2)), ) case vecDupFromFpu: panic("TODO") case vecExtract: str = fmt.Sprintf("ext %s, %s, %s, #%d", - formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndexNone), + formatVRegVec(i.rd, vecArrangement(i.u1), vecIndexNone), formatVRegVec(i.rn.nr(), vecArrangement(i.u1), vecIndexNone), formatVRegVec(i.rm.nr(), vecArrangement(i.u1), vecIndexNone), uint32(i.u2), @@ -1340,15 +1356,15 @@ func (i *instruction) String() (str string) { panic("TODO") case vecMovElement: str = fmt.Sprintf("mov %s, %s", - formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndex(i.u2)), - formatVRegVec(i.rn.nr(), vecArrangement(i.u1), vecIndex(i.u3)), + formatVRegVec(i.rd, vecArrangement(i.u1), vecIndex(i.u2&0xffffffff)), + formatVRegVec(i.rn.nr(), vecArrangement(i.u1), vecIndex(i.u2>>32)), ) case vecMiscNarrow: panic("TODO") case vecRRR, vecRRRRewrite: str = fmt.Sprintf("%s %s, %s, %s", vecOp(i.u1), - formatVRegVec(i.rd.nr(), vecArrangement(i.u2), vecIndexNone), + formatVRegVec(i.rd, vecArrangement(i.u2), vecIndexNone), formatVRegVec(i.rn.nr(), vecArrangement(i.u2), vecIndexNone), formatVRegVec(i.rm.nr(), vecArrangement(i.u2), vecIndexNone), ) @@ -1356,12 +1372,12 @@ func (i *instruction) String() (str string) { vop := vecOp(i.u1) if vop == vecOpCmeq0 { str = fmt.Sprintf("cmeq %s, %s, #0", - formatVRegVec(i.rd.nr(), vecArrangement(i.u2), vecIndexNone), + formatVRegVec(i.rd, vecArrangement(i.u2), vecIndexNone), formatVRegVec(i.rn.nr(), vecArrangement(i.u2), vecIndexNone)) } else { str = fmt.Sprintf("%s %s, %s", vop, - formatVRegVec(i.rd.nr(), vecArrangement(i.u2), vecIndexNone), + formatVRegVec(i.rd, vecArrangement(i.u2), vecIndexNone), formatVRegVec(i.rn.nr(), vecArrangement(i.u2), vecIndexNone)) } case vecLanes: @@ -1379,24 +1395,24 @@ func (i *instruction) String() (str string) { } str = fmt.Sprintf("%s %s, %s", vecOp(i.u1), - formatVRegWidthVec(i.rd.nr(), destArr), + formatVRegWidthVec(i.rd, destArr), formatVRegVec(i.rn.nr(), arr, vecIndexNone)) case vecShiftImm: arr := vecArrangement(i.u2) str = fmt.Sprintf("%s %s, %s, #%d", vecOp(i.u1), - formatVRegVec(i.rd.nr(), arr, vecIndexNone), + formatVRegVec(i.rd, arr, vecIndexNone), formatVRegVec(i.rn.nr(), arr, vecIndexNone), i.rm.shiftImm()) case vecTbl: arr := vecArrangement(i.u2) str = fmt.Sprintf("tbl %s, { %s }, %s", - formatVRegVec(i.rd.nr(), arr, vecIndexNone), + formatVRegVec(i.rd, arr, vecIndexNone), formatVRegVec(i.rn.nr(), vecArrangement16B, vecIndexNone), formatVRegVec(i.rm.nr(), arr, vecIndexNone)) case vecTbl2: arr := vecArrangement(i.u2) - rd, rn, rm := i.rd.nr(), i.rn.nr(), i.rm.nr() + rd, rn, rm := i.rd, i.rn.nr(), i.rm.nr() rn1 := regalloc.FromRealReg(rn.RealReg()+1, rn.RegType()) str = fmt.Sprintf("tbl %s, { %s, %s }, %s", formatVRegVec(rd, arr, vecIndexNone), @@ -1407,13 +1423,13 @@ func (i *instruction) String() (str string) { arr := vecArrangement(i.u2) str = fmt.Sprintf("%s %s, %s, %s", vecOp(i.u1), - formatVRegVec(i.rd.nr(), arr, vecIndexNone), + formatVRegVec(i.rd, arr, vecIndexNone), formatVRegVec(i.rn.nr(), arr, vecIndexNone), formatVRegVec(i.rm.nr(), arr, vecIndexNone)) case movToFPSR: str = fmt.Sprintf("msr fpsr, %s", formatVRegSized(i.rn.nr(), 64)) case movFromFPSR: - str = fmt.Sprintf("mrs %s fpsr", formatVRegSized(i.rd.nr(), 64)) + str = fmt.Sprintf("mrs %s fpsr", formatVRegSized(i.rd, 64)) case call: str = fmt.Sprintf("bl %s", ssa.FuncRef(i.u1)) case callInd: @@ -1422,15 +1438,15 @@ func (i *instruction) String() (str string) { str = "ret" case br: target := label(i.u1) - if i.u3 != 0 { + if i.rm.data != 0 { str = fmt.Sprintf("b #%#x (%s)", i.brOffset(), target.String()) } else { str = fmt.Sprintf("b %s", target.String()) } case condBr: - size := is64SizeBitToSize(i.u3) + size := is64SizeBitToSize(i.u2 >> 32) c := cond(i.u1) - target := label(i.u2) + target := label(i.u2 & 0xffffffff) switch c.kind() { case condKindRegisterZero: if !i.condBrOffsetResolved() { @@ -1456,7 +1472,7 @@ func (i *instruction) String() (str string) { } } case adr: - str = fmt.Sprintf("adr %s, #%#x", formatVRegSized(i.rd.nr(), 64), int64(i.u1)) + str = fmt.Sprintf("adr %s, #%#x", formatVRegSized(i.rd, 64), int64(i.u1)) case brTableSequence: targetIndex := i.u1 str = fmt.Sprintf("br_table_sequence %s, table_index=%d", formatVRegSized(i.rn.nr(), 64), targetIndex) @@ -1473,7 +1489,7 @@ func (i *instruction) String() (str string) { case 1: m = m + "b" } - str = fmt.Sprintf("%s %s, %s, %s", m, formatVRegSized(i.rm.nr(), size), formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), 64)) + str = fmt.Sprintf("%s %s, %s, %s", m, formatVRegSized(i.rm.nr(), size), formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), 64)) case atomicCas: m := "casal" size := byte(32) @@ -1485,7 +1501,7 @@ func (i *instruction) String() (str string) { case 1: m = m + "b" } - str = fmt.Sprintf("%s %s, %s, %s", m, formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rm.nr(), size), formatVRegSized(i.rn.nr(), 64)) + str = fmt.Sprintf("%s %s, %s, %s", m, formatVRegSized(i.rd, size), formatVRegSized(i.rm.nr(), size), formatVRegSized(i.rn.nr(), 64)) case atomicLoad: m := "ldar" size := byte(32) @@ -1497,7 +1513,7 @@ func (i *instruction) String() (str string) { case 1: m = m + "b" } - str = fmt.Sprintf("%s %s, %s", m, formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), 64)) + str = fmt.Sprintf("%s %s, %s", m, formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), 64)) case atomicStore: m := "stlr" size := byte(32) @@ -1517,9 +1533,9 @@ func (i *instruction) String() (str string) { case emitSourceOffsetInfo: str = fmt.Sprintf("source_offset_info %d", ssa.SourceOffset(i.u1)) case vecLoad1R: - str = fmt.Sprintf("ld1r {%s}, [%s]", formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndexNone), formatVRegSized(i.rn.nr(), 64)) + str = fmt.Sprintf("ld1r {%s}, [%s]", formatVRegVec(i.rd, vecArrangement(i.u1), vecIndexNone), formatVRegSized(i.rn.nr(), 64)) case loadConstBlockArg: - str = fmt.Sprintf("load_const_block_arg %s, %#x", formatVRegSized(i.rd.nr(), 64), i.u1) + str = fmt.Sprintf("load_const_block_arg %s, %#x", formatVRegSized(i.rd, 64), i.u1) default: panic(i.kind) } @@ -1528,26 +1544,26 @@ func (i *instruction) String() (str string) { func (i *instruction) asAdr(rd regalloc.VReg, offset int64) { i.kind = adr - i.rd = operandNR(rd) + i.rd = rd i.u1 = uint64(offset) } -func (i *instruction) asAtomicRmw(op atomicRmwOp, rn, rs, rt operand, size uint64) { +func (i *instruction) asAtomicRmw(op atomicRmwOp, rn, rs, rt regalloc.VReg, size uint64) { i.kind = atomicRmw - i.rd, i.rn, i.rm = rt, rn, rs + i.rd, i.rn, i.rm = rt, operandNR(rn), operandNR(rs) i.u1 = uint64(op) i.u2 = size } -func (i *instruction) asAtomicCas(rn, rs, rt operand, size uint64) { +func (i *instruction) asAtomicCas(rn, rs, rt regalloc.VReg, size uint64) { i.kind = atomicCas - i.rm, i.rn, i.rd = rt, rn, rs + i.rm, i.rn, i.rd = operandNR(rt), operandNR(rn), rs i.u2 = size } -func (i *instruction) asAtomicLoad(rn, rt operand, size uint64) { +func (i *instruction) asAtomicLoad(rn, rt regalloc.VReg, size uint64) { i.kind = atomicLoad - i.rn, i.rd = rn, rt + i.rn, i.rd = operandNR(rn), rt i.u2 = size } @@ -1755,12 +1771,12 @@ func (i *instruction) asLoadConstBlockArg(v uint64, typ ssa.Type, dst regalloc.V i.kind = loadConstBlockArg i.u1 = v i.u2 = uint64(typ) - i.rd = operandNR(dst) + i.rd = dst return i } func (i *instruction) loadConstBlockArgData() (v uint64, typ ssa.Type, dst regalloc.VReg) { - return i.u1, ssa.Type(i.u2), i.rd.nr() + return i.u1, ssa.Type(i.u2), i.rd } func (i *instruction) asEmitSourceOffsetInfo(l ssa.SourceOffset) *instruction { @@ -1778,7 +1794,7 @@ func (i *instruction) asUDF() *instruction { return i } -func (i *instruction) asFpuToInt(rd, rn operand, rdSigned, src64bit, dst64bit bool) { +func (i *instruction) asFpuToInt(rd regalloc.VReg, rn operand, rdSigned, src64bit, dst64bit bool) { i.kind = fpuToInt i.rn = rn i.rd = rd @@ -1789,11 +1805,11 @@ func (i *instruction) asFpuToInt(rd, rn operand, rdSigned, src64bit, dst64bit bo i.u2 = 1 } if dst64bit { - i.u3 = 1 + i.u2 |= 2 } } -func (i *instruction) asIntToFpu(rd, rn operand, rnSigned, src64bit, dst64bit bool) { +func (i *instruction) asIntToFpu(rd regalloc.VReg, rn operand, rnSigned, src64bit, dst64bit bool) { i.kind = intToFpu i.rn = rn i.rd = rd @@ -1804,7 +1820,7 @@ func (i *instruction) asIntToFpu(rd, rn operand, rnSigned, src64bit, dst64bit bo i.u2 = 1 } if dst64bit { - i.u3 = 1 + i.u2 |= 2 } } @@ -1817,7 +1833,7 @@ func (i *instruction) asExitSequence(ctx regalloc.VReg) *instruction { // aluOp determines the type of ALU operation. Instructions whose kind is one of // aluRRR, aluRRRR, aluRRImm12, aluRRBitmaskImm, aluRRImmShift, aluRRRShift and aluRRRExtend // would use this type. -type aluOp int +type aluOp uint32 func (a aluOp) String() string { switch a { diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go index 227a964741..f0ede2d6aa 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go @@ -44,12 +44,12 @@ func (i *instruction) encode(m *machine) { case callInd: c.Emit4Bytes(encodeUnconditionalBranchReg(regNumberInEncoding[i.rn.realReg()], true)) case store8, store16, store32, store64, fpuStore32, fpuStore64, fpuStore128: - c.Emit4Bytes(encodeLoadOrStore(i.kind, regNumberInEncoding[i.rn.realReg()], i.amode)) + c.Emit4Bytes(encodeLoadOrStore(i.kind, regNumberInEncoding[i.rn.realReg()], *i.getAmode())) case uLoad8, uLoad16, uLoad32, uLoad64, sLoad8, sLoad16, sLoad32, fpuLoad32, fpuLoad64, fpuLoad128: - c.Emit4Bytes(encodeLoadOrStore(i.kind, regNumberInEncoding[i.rd.realReg()], i.amode)) + c.Emit4Bytes(encodeLoadOrStore(i.kind, regNumberInEncoding[i.rd.RealReg()], *i.getAmode())) case vecLoad1R: c.Emit4Bytes(encodeVecLoad1R( - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], vecArrangement(i.u1))) case condBr: @@ -75,22 +75,22 @@ func (i *instruction) encode(m *machine) { panic("BUG") } case movN: - c.Emit4Bytes(encodeMoveWideImmediate(0b00, regNumberInEncoding[i.rd.realReg()], i.u1, i.u2, i.u3)) + c.Emit4Bytes(encodeMoveWideImmediate(0b00, regNumberInEncoding[i.rd.RealReg()], i.u1, uint32(i.u2), uint32(i.u2>>32))) case movZ: - c.Emit4Bytes(encodeMoveWideImmediate(0b10, regNumberInEncoding[i.rd.realReg()], i.u1, i.u2, i.u3)) + c.Emit4Bytes(encodeMoveWideImmediate(0b10, regNumberInEncoding[i.rd.RealReg()], i.u1, uint32(i.u2), uint32(i.u2>>32))) case movK: - c.Emit4Bytes(encodeMoveWideImmediate(0b11, regNumberInEncoding[i.rd.realReg()], i.u1, i.u2, i.u3)) + c.Emit4Bytes(encodeMoveWideImmediate(0b11, regNumberInEncoding[i.rd.RealReg()], i.u1, uint32(i.u2), uint32(i.u2>>32))) case mov32: - to, from := i.rd.realReg(), i.rn.realReg() + to, from := i.rd.RealReg(), i.rn.realReg() c.Emit4Bytes(encodeAsMov32(regNumberInEncoding[from], regNumberInEncoding[to])) case mov64: - to, from := i.rd.realReg(), i.rn.realReg() + to, from := i.rd.RealReg(), i.rn.realReg() toIsSp := to == sp fromIsSp := from == sp c.Emit4Bytes(encodeMov64(regNumberInEncoding[to], regNumberInEncoding[from], toIsSp, fromIsSp)) case loadP64, storeP64: rt, rt2 := regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()] - amode := i.amode + amode := i.getAmode() rn := regNumberInEncoding[amode.rn.RealReg()] var pre bool switch amode.kind { @@ -102,21 +102,21 @@ func (i *instruction) encode(m *machine) { } c.Emit4Bytes(encodePreOrPostIndexLoadStorePair64(pre, kind == loadP64, rn, rt, rt2, amode.imm)) case loadFpuConst32: - rd := regNumberInEncoding[i.rd.realReg()] + rd := regNumberInEncoding[i.rd.RealReg()] if i.u1 == 0 { c.Emit4Bytes(encodeVecRRR(vecOpEOR, rd, rd, rd, vecArrangement8B)) } else { encodeLoadFpuConst32(c, rd, i.u1) } case loadFpuConst64: - rd := regNumberInEncoding[i.rd.realReg()] + rd := regNumberInEncoding[i.rd.RealReg()] if i.u1 == 0 { c.Emit4Bytes(encodeVecRRR(vecOpEOR, rd, rd, rd, vecArrangement8B)) } else { - encodeLoadFpuConst64(c, regNumberInEncoding[i.rd.realReg()], i.u1) + encodeLoadFpuConst64(c, regNumberInEncoding[i.rd.RealReg()], i.u1) } case loadFpuConst128: - rd := regNumberInEncoding[i.rd.realReg()] + rd := regNumberInEncoding[i.rd.RealReg()] lo, hi := i.u1, i.u2 if lo == 0 && hi == 0 { c.Emit4Bytes(encodeVecRRR(vecOpEOR, rd, rd, rd, vecArrangement16B)) @@ -126,35 +126,35 @@ func (i *instruction) encode(m *machine) { case aluRRRR: c.Emit4Bytes(encodeAluRRRR( aluOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()], - regNumberInEncoding[i.ra.realReg()], - uint32(i.u3), + regNumberInEncoding[regalloc.VReg(i.u2).RealReg()], + uint32(i.u1>>32), )) case aluRRImmShift: c.Emit4Bytes(encodeAluRRImm( aluOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], uint32(i.rm.shiftImm()), - uint32(i.u3), + uint32(i.u2>>32), )) case aluRRR: rn := i.rn.realReg() c.Emit4Bytes(encodeAluRRR( aluOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[rn], regNumberInEncoding[i.rm.realReg()], - i.u3 == 1, + i.u2>>32 == 1, rn == sp, )) case aluRRRExtend: rm, exo, to := i.rm.er() c.Emit4Bytes(encodeAluRRRExtend( aluOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[rm.RealReg()], exo, @@ -164,25 +164,25 @@ func (i *instruction) encode(m *machine) { r, amt, sop := i.rm.sr() c.Emit4Bytes(encodeAluRRRShift( aluOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[r.RealReg()], uint32(amt), sop, - i.u3 == 1, + i.u2>>32 == 1, )) case aluRRBitmaskImm: c.Emit4Bytes(encodeAluBitmaskImmediate( aluOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], i.u2, - i.u3 == 1, + i.u1>>32 == 1, )) case bitRR: c.Emit4Bytes(encodeBitRR( bitOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], uint32(i.u2)), ) @@ -190,22 +190,22 @@ func (i *instruction) encode(m *machine) { imm12, shift := i.rm.imm12() c.Emit4Bytes(encodeAluRRImm12( aluOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], imm12, shift, - i.u3 == 1, + i.u2>>32 == 1, )) case fpuRRR: c.Emit4Bytes(encodeFpuRRR( fpuBinOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()], - i.u3 == 1, + i.u2 == 1, )) case fpuMov64, fpuMov128: // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/MOV--vector---Move-vector--an-alias-of-ORR--vector--register-- - rd := regNumberInEncoding[i.rd.realReg()] + rd := regNumberInEncoding[i.rd.RealReg()] rn := regNumberInEncoding[i.rn.realReg()] var q uint32 if kind == fpuMov128 { @@ -213,7 +213,7 @@ func (i *instruction) encode(m *machine) { } c.Emit4Bytes(q<<30 | 0b1110101<<21 | rn<<16 | 0b000111<<10 | rn<<5 | rd) case cSet: - rd := regNumberInEncoding[i.rd.realReg()] + rd := regNumberInEncoding[i.rd.RealReg()] cf := condFlag(i.u1) if i.u2 == 1 { // https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/CSETM--Conditional-Set-Mask--an-alias-of-CSINV- @@ -225,12 +225,12 @@ func (i *instruction) encode(m *machine) { c.Emit4Bytes(0b1001101010011111<<16 | uint32(cf.invert())<<12 | 0b111111<<5 | rd) } case extend: - c.Emit4Bytes(encodeExtend(i.u3 == 1, byte(i.u1), byte(i.u2), regNumberInEncoding[i.rd.realReg()], regNumberInEncoding[i.rn.realReg()])) + c.Emit4Bytes(encodeExtend((i.u2>>32) == 1, byte(i.u1), byte(i.u2), regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()])) case fpuCmp: // https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/FCMP--Floating-point-quiet-Compare--scalar--?lang=en rn, rm := regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()] var ftype uint32 - if i.u3 == 1 { + if i.u1 == 1 { ftype = 0b01 // double precision. } c.Emit4Bytes(0b1111<<25 | ftype<<22 | 1<<21 | rm<<16 | 0b1<<13 | rn<<5) @@ -242,34 +242,34 @@ func (i *instruction) encode(m *machine) { c.Emit4Bytes(0) } case adr: - c.Emit4Bytes(encodeAdr(regNumberInEncoding[i.rd.realReg()], uint32(i.u1))) + c.Emit4Bytes(encodeAdr(regNumberInEncoding[i.rd.RealReg()], uint32(i.u1))) case cSel: c.Emit4Bytes(encodeConditionalSelect( kind, - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()], condFlag(i.u1), - i.u3 == 1, + i.u2 == 1, )) case fpuCSel: c.Emit4Bytes(encodeFpuCSel( - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()], condFlag(i.u1), - i.u3 == 1, + i.u2 == 1, )) case movToVec: c.Emit4Bytes(encodeMoveToVec( - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], vecArrangement(byte(i.u1)), vecIndex(i.u2), )) case movFromVec, movFromVecSigned: c.Emit4Bytes(encodeMoveFromVec( - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], vecArrangement(byte(i.u1)), vecIndex(i.u2), @@ -277,18 +277,18 @@ func (i *instruction) encode(m *machine) { )) case vecDup: c.Emit4Bytes(encodeVecDup( - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], vecArrangement(byte(i.u1)))) case vecDupElement: c.Emit4Bytes(encodeVecDupElement( - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], vecArrangement(byte(i.u1)), vecIndex(i.u2))) case vecExtract: c.Emit4Bytes(encodeVecExtract( - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()], vecArrangement(byte(i.u1)), @@ -296,35 +296,35 @@ func (i *instruction) encode(m *machine) { case vecPermute: c.Emit4Bytes(encodeVecPermute( vecOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()], vecArrangement(byte(i.u2)))) case vecMovElement: c.Emit4Bytes(encodeVecMovElement( - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], vecArrangement(i.u1), - uint32(i.u2), uint32(i.u3), + uint32(i.u2), uint32(i.u2>>32), )) case vecMisc: c.Emit4Bytes(encodeAdvancedSIMDTwoMisc( vecOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], vecArrangement(i.u2), )) case vecLanes: c.Emit4Bytes(encodeVecLanes( vecOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], vecArrangement(i.u2), )) case vecShiftImm: c.Emit4Bytes(encodeVecShiftImm( vecOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], uint32(i.rm.shiftImm()), vecArrangement(i.u2), @@ -332,7 +332,7 @@ func (i *instruction) encode(m *machine) { case vecTbl: c.Emit4Bytes(encodeVecTbl( 1, - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()], vecArrangement(i.u2)), @@ -340,7 +340,7 @@ func (i *instruction) encode(m *machine) { case vecTbl2: c.Emit4Bytes(encodeVecTbl( 2, - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()], vecArrangement(i.u2)), @@ -353,9 +353,9 @@ func (i *instruction) encode(m *machine) { case fpuRR: c.Emit4Bytes(encodeFloatDataOneSource( fpuUniOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], - i.u3 == 1, + i.u2 == 1, )) case vecRRR: if op := vecOp(i.u1); op == vecOpBsl || op == vecOpBit || op == vecOpUmlal { @@ -365,14 +365,14 @@ func (i *instruction) encode(m *machine) { case vecRRRRewrite: c.Emit4Bytes(encodeVecRRR( vecOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()], vecArrangement(i.u2), )) case cCmpImm: // Conditional compare (immediate) in https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Register?lang=en - sf := uint32(i.u3 & 0b1) + sf := uint32((i.u2 >> 32) & 0b1) nzcv := uint32(i.u2 & 0b1111) cond := uint32(condFlag(i.u1)) imm := uint32(i.rm.data & 0b11111) @@ -381,7 +381,7 @@ func (i *instruction) encode(m *machine) { sf<<31 | 0b111101001<<22 | imm<<16 | cond<<12 | 0b1<<11 | rn<<5 | nzcv, ) case movFromFPSR: - rt := regNumberInEncoding[i.rd.realReg()] + rt := regNumberInEncoding[i.rd.RealReg()] c.Emit4Bytes(encodeSystemRegisterMove(rt, true)) case movToFPSR: rt := regNumberInEncoding[i.rn.realReg()] @@ -390,13 +390,13 @@ func (i *instruction) encode(m *machine) { c.Emit4Bytes(encodeAtomicRmw( atomicRmwOp(i.u1), regNumberInEncoding[i.rm.realReg()], - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], uint32(i.u2), )) case atomicCas: c.Emit4Bytes(encodeAtomicCas( - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rm.realReg()], regNumberInEncoding[i.rn.realReg()], uint32(i.u2), @@ -404,7 +404,7 @@ func (i *instruction) encode(m *machine) { case atomicLoad: c.Emit4Bytes(encodeAtomicLoadStore( regNumberInEncoding[i.rn.realReg()], - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], uint32(i.u2), 1, )) @@ -810,7 +810,7 @@ func encodeFloatDataOneSource(op fpuUniOp, rd, rn uint32, dst64bit bool) uint32 // encodeCnvBetweenFloatInt encodes as "Conversion between floating-point and integer" in // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en func encodeCnvBetweenFloatInt(i *instruction) uint32 { - rd := regNumberInEncoding[i.rd.realReg()] + rd := regNumberInEncoding[i.rd.RealReg()] rn := regNumberInEncoding[i.rn.realReg()] var opcode uint32 @@ -822,8 +822,8 @@ func encodeCnvBetweenFloatInt(i *instruction) uint32 { rmode = 0b00 signed := i.u1 == 1 - src64bit := i.u2 == 1 - dst64bit := i.u3 == 1 + src64bit := i.u2&1 != 0 + dst64bit := i.u2&2 != 0 if signed { opcode = 0b010 } else { @@ -841,8 +841,8 @@ func encodeCnvBetweenFloatInt(i *instruction) uint32 { rmode = 0b11 signed := i.u1 == 1 - src64bit := i.u2 == 1 - dst64bit := i.u3 == 1 + src64bit := i.u2&1 != 0 + dst64bit := i.u2&2 != 0 if signed { opcode = 0b000 @@ -1787,13 +1787,13 @@ func encodeCBZCBNZ(rt uint32, nz bool, imm19 uint32, _64bit bool) (ret uint32) { // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Immediate?lang=en // // "shift" must have been divided by 16 at this point. -func encodeMoveWideImmediate(opc uint32, rd uint32, imm, shift, _64bit uint64) (ret uint32) { +func encodeMoveWideImmediate(opc uint32, rd uint32, imm uint64, shift, _64bit uint32) (ret uint32) { ret = rd ret |= uint32(imm&0xffff) << 5 - ret |= (uint32(shift)) << 21 + ret |= (shift) << 21 ret |= 0b100101 << 23 ret |= opc << 29 - ret |= uint32(_64bit) << 31 + ret |= _64bit << 31 return } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_constant.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_constant.go index 698b382d46..6c6824fb0a 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_constant.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_constant.go @@ -284,18 +284,18 @@ func (m *machine) load64bitConst(c int64, dst regalloc.VReg) { func (m *machine) insertMOVZ(dst regalloc.VReg, v uint64, shift int, dst64 bool) { instr := m.allocateInstr() - instr.asMOVZ(dst, v, uint64(shift), dst64) + instr.asMOVZ(dst, v, uint32(shift), dst64) m.insert(instr) } func (m *machine) insertMOVK(dst regalloc.VReg, v uint64, shift int, dst64 bool) { instr := m.allocateInstr() - instr.asMOVK(dst, v, uint64(shift), dst64) + instr.asMOVK(dst, v, uint32(shift), dst64) m.insert(instr) } func (m *machine) insertMOVN(dst regalloc.VReg, v uint64, shift int, dst64 bool) { instr := m.allocateInstr() - instr.asMOVN(dst, v, uint64(shift), dst64) + instr.asMOVN(dst, v, uint32(shift), dst64) m.insert(instr) } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go index 2bb234e8c1..048bf32040 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go @@ -52,11 +52,11 @@ func (m *machine) lowerBrTable(i *ssa.Instruction) { maxIndexReg := m.compiler.AllocateVReg(ssa.TypeI32) m.lowerConstantI32(maxIndexReg, int32(len(targets)-1)) subs := m.allocateInstr() - subs.asALU(aluOpSubS, operandNR(xzrVReg), indexOperand, operandNR(maxIndexReg), false) + subs.asALU(aluOpSubS, xzrVReg, indexOperand, operandNR(maxIndexReg), false) m.insert(subs) csel := m.allocateInstr() adjustedIndex := m.compiler.AllocateVReg(ssa.TypeI32) - csel.asCSel(operandNR(adjustedIndex), operandNR(maxIndexReg), indexOperand, hs, false) + csel.asCSel(adjustedIndex, operandNR(maxIndexReg), indexOperand, hs, false) m.insert(csel) brSequence := m.allocateInstr() @@ -249,7 +249,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { rc := m.getOperand_NR(m.compiler.ValueDefinition(c), extModeNone) rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) m.lowerSelectVec(rc, rn, rm, rd) } else { m.lowerSelect(c, x, y, instr.Return()) @@ -270,7 +270,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { x, ctx := instr.Arg2() result := instr.Return() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(result)) + rd := m.compiler.VRegOf(result) ctxVReg := m.compiler.VRegOf(ctx) m.lowerFpuToInt(rd, rn, ctxVReg, true, x.Type() == ssa.TypeF64, result.Type().Bits() == 64, op == ssa.OpcodeFcvtToSintSat) @@ -278,7 +278,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { x, ctx := instr.Arg2() result := instr.Return() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(result)) + rd := m.compiler.VRegOf(result) ctxVReg := m.compiler.VRegOf(ctx) m.lowerFpuToInt(rd, rn, ctxVReg, false, x.Type() == ssa.TypeF64, result.Type().Bits() == 64, op == ssa.OpcodeFcvtToUintSat) @@ -286,25 +286,25 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { x := instr.Arg() result := instr.Return() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(result)) + rd := m.compiler.VRegOf(result) m.lowerIntToFpu(rd, rn, true, x.Type() == ssa.TypeI64, result.Type().Bits() == 64) case ssa.OpcodeFcvtFromUint: x := instr.Arg() result := instr.Return() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(result)) + rd := m.compiler.VRegOf(result) m.lowerIntToFpu(rd, rn, false, x.Type() == ssa.TypeI64, result.Type().Bits() == 64) case ssa.OpcodeFdemote: v := instr.Arg() rn := m.getOperand_NR(m.compiler.ValueDefinition(v), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) cnt := m.allocateInstr() cnt.asFpuRR(fpuUniOpCvt64To32, rd, rn, false) m.insert(cnt) case ssa.OpcodeFpromote: v := instr.Arg() rn := m.getOperand_NR(m.compiler.ValueDefinition(v), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) cnt := m.allocateInstr() cnt.asFpuRR(fpuUniOpCvt32To64, rd, rn, true) m.insert(cnt) @@ -343,15 +343,15 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { ctxVReg := m.compiler.VRegOf(ctx) rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) m.lowerIDiv(ctxVReg, rd, rn, rm, x.Type() == ssa.TypeI64, op == ssa.OpcodeSdiv) case ssa.OpcodeSrem, ssa.OpcodeUrem: x, y, ctx := instr.Arg3() ctxVReg := m.compiler.VRegOf(ctx) rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) - m.lowerIRem(ctxVReg, rd, rn, rm, x.Type() == ssa.TypeI64, op == ssa.OpcodeSrem) + rd := m.compiler.VRegOf(instr.Return()) + m.lowerIRem(ctxVReg, rd, rn.nr(), rm, x.Type() == ssa.TypeI64, op == ssa.OpcodeSrem) case ssa.OpcodeVconst: result := m.compiler.VRegOf(instr.Return()) lo, hi := instr.VconstData() @@ -362,7 +362,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { x := instr.Arg() ins := m.allocateInstr() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) ins.asVecMisc(vecOpNot, rd, rn, vecArrangement16B) m.insert(ins) case ssa.OpcodeVbxor: @@ -382,12 +382,12 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) creg := m.getOperand_NR(m.compiler.ValueDefinition(c), extModeNone) - tmp := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) + tmp := m.compiler.AllocateVReg(ssa.TypeV128) // creg is overwritten by BSL, so we need to move it to the result register before the instruction // in case when it is used somewhere else. mov := m.allocateInstr() - mov.asFpuMov128(tmp.nr(), creg.nr()) + mov.asFpuMov128(tmp, creg.nr()) m.insert(mov) ins := m.allocateInstr() @@ -396,7 +396,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { mov2 := m.allocateInstr() rd := m.compiler.VRegOf(instr.Return()) - mov2.asFpuMov128(rd, tmp.nr()) + mov2.asFpuMov128(rd, tmp) m.insert(mov2) case ssa.OpcodeVanyTrue, ssa.OpcodeVallTrue: x, lane := instr.ArgWithLane() @@ -405,12 +405,12 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { arr = ssaLaneToArrangement(lane) } rm := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) m.lowerVcheckTrue(op, rm, rd, arr) case ssa.OpcodeVhighBits: x, lane := instr.ArgWithLane() rm := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) arr := ssaLaneToArrangement(lane) m.lowerVhighBits(rm, rd, arr) case ssa.OpcodeVIadd: @@ -441,9 +441,9 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { panic("unsupported lane " + lane.String()) } - widenLo := m.allocateInstr().asVecShiftImm(widen, tmpLo, vv, operandShiftImm(0), loArr) - widenHi := m.allocateInstr().asVecShiftImm(widen, tmpHi, vv, operandShiftImm(0), hiArr) - addp := m.allocateInstr().asVecRRR(vecOpAddp, operandNR(m.compiler.VRegOf(instr.Return())), tmpLo, tmpHi, dstArr) + widenLo := m.allocateInstr().asVecShiftImm(widen, tmpLo.nr(), vv, operandShiftImm(0), loArr) + widenHi := m.allocateInstr().asVecShiftImm(widen, tmpHi.nr(), vv, operandShiftImm(0), hiArr) + addp := m.allocateInstr().asVecRRR(vecOpAddp, m.compiler.VRegOf(instr.Return()), tmpLo, tmpHi, dstArr) m.insert(widenLo) m.insert(widenHi) m.insert(addp) @@ -493,7 +493,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { arr := ssaLaneToArrangement(lane) rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) m.lowerVIMul(rd, rn, rm, arr) case ssa.OpcodeVIabs: m.lowerVecMisc(vecOpAbs, instr) @@ -507,7 +507,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { arr := ssaLaneToArrangement(lane) rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) m.lowerVShift(op, rd, rn, rm, arr) case ssa.OpcodeVSqrt: m.lowerVecMisc(vecOpFsqrt, instr) @@ -547,18 +547,18 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { x, lane := instr.ArgWithLane() arr := ssaLaneToArrangement(lane) rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) m.lowerVfpuToInt(rd, rn, arr, op == ssa.OpcodeVFcvtToSintSat) case ssa.OpcodeVFcvtFromSint, ssa.OpcodeVFcvtFromUint: x, lane := instr.ArgWithLane() arr := ssaLaneToArrangement(lane) rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) m.lowerVfpuFromInt(rd, rn, arr, op == ssa.OpcodeVFcvtFromSint) case ssa.OpcodeSwidenLow, ssa.OpcodeUwidenLow: x, lane := instr.ArgWithLane() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) var arr vecArrangement switch lane { @@ -580,7 +580,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { case ssa.OpcodeSwidenHigh, ssa.OpcodeUwidenHigh: x, lane := instr.ArgWithLane() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) arr := ssaLaneToArrangement(lane) @@ -607,9 +607,9 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { } rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) - tmp := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) + tmp := m.compiler.AllocateVReg(ssa.TypeV128) loQxtn := m.allocateInstr() hiQxtn := m.allocateInstr() @@ -628,7 +628,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { m.insert(hiQxtn) mov := m.allocateInstr() - mov.asFpuMov128(rd.nr(), tmp.nr()) + mov.asFpuMov128(rd, tmp) m.insert(mov) case ssa.OpcodeFvpromoteLow: x, lane := instr.ArgWithLane() @@ -637,7 +637,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { } ins := m.allocateInstr() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) ins.asVecMisc(vecOpFcvtl, rd, rn, vecArrangement2S) m.insert(ins) case ssa.OpcodeFvdemote: @@ -647,14 +647,14 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { } ins := m.allocateInstr() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) ins.asVecMisc(vecOpFcvtn, rd, rn, vecArrangement2S) m.insert(ins) case ssa.OpcodeExtractlane: x, index, signed, lane := instr.ExtractlaneData() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) mov := m.allocateInstr() switch lane { @@ -680,12 +680,12 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { x, y, index, lane := instr.InsertlaneData() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) - tmpReg := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) + rd := m.compiler.VRegOf(instr.Return()) + tmpReg := m.compiler.AllocateVReg(ssa.TypeV128) // Initially mov rn to tmp. mov1 := m.allocateInstr() - mov1.asFpuMov128(tmpReg.nr(), rn.nr()) + mov1.asFpuMov128(tmpReg, rn.nr()) m.insert(mov1) // movToVec and vecMovElement do not clear the remaining bits to zero, @@ -709,14 +709,14 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { // Finally mov tmp to rd. mov3 := m.allocateInstr() - mov3.asFpuMov128(rd.nr(), tmpReg.nr()) + mov3.asFpuMov128(rd, tmpReg) m.insert(mov3) case ssa.OpcodeSwizzle: x, y, lane := instr.Arg2WithLane() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) arr := ssaLaneToArrangement(lane) @@ -729,14 +729,14 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { x, y, lane1, lane2 := instr.ShuffleData() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) m.lowerShuffle(rd, rn, rm, lane1, lane2) case ssa.OpcodeSplat: x, lane := instr.ArgWithLane() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) dup := m.allocateInstr() switch lane { @@ -760,12 +760,12 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { xx, yy := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone), m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) tmp, tmp2 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)), operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) - m.insert(m.allocateInstr().asVecRRR(vecOpSmull, tmp, xx, yy, vecArrangement8H)) - m.insert(m.allocateInstr().asVecRRR(vecOpSmull2, tmp2, xx, yy, vecArrangement8H)) - m.insert(m.allocateInstr().asVecRRR(vecOpAddp, tmp, tmp, tmp2, vecArrangement4S)) + m.insert(m.allocateInstr().asVecRRR(vecOpSmull, tmp.nr(), xx, yy, vecArrangement8H)) + m.insert(m.allocateInstr().asVecRRR(vecOpSmull2, tmp2.nr(), xx, yy, vecArrangement8H)) + m.insert(m.allocateInstr().asVecRRR(vecOpAddp, tmp.nr(), tmp, tmp2, vecArrangement4S)) - rd := operandNR(m.compiler.VRegOf(instr.Return())) - m.insert(m.allocateInstr().asFpuMov128(rd.nr(), tmp.nr())) + rd := m.compiler.VRegOf(instr.Return()) + m.insert(m.allocateInstr().asFpuMov128(rd, tmp.nr())) case ssa.OpcodeLoadSplat: ptr, offset, lane := instr.LoadSplatData() @@ -794,7 +794,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { m.executableContext.FlushPendingInstructions() } -func (m *machine) lowerShuffle(rd, rn, rm operand, lane1, lane2 uint64) { +func (m *machine) lowerShuffle(rd regalloc.VReg, rn, rm operand, lane1, lane2 uint64) { // `tbl2` requires 2 consecutive registers, so we arbitrarily pick v29, v30. vReg, wReg := v29VReg, v30VReg @@ -822,7 +822,7 @@ func (m *machine) lowerShuffle(rd, rn, rm operand, lane1, lane2 uint64) { m.insert(tbl2) } -func (m *machine) lowerVShift(op ssa.Opcode, rd, rn, rm operand, arr vecArrangement) { +func (m *machine) lowerVShift(op ssa.Opcode, rd regalloc.VReg, rn, rm operand, arr vecArrangement) { var modulo byte switch arr { case vecArrangement16B: @@ -847,13 +847,13 @@ func (m *machine) lowerVShift(op ssa.Opcode, rd, rn, rm operand, arr vecArrangem if op != ssa.OpcodeVIshl { // Negate the amount to make this as right shift. neg := m.allocateInstr() - neg.asALU(aluOpSub, rtmp, operandNR(xzrVReg), rtmp, true) + neg.asALU(aluOpSub, rtmp.nr(), operandNR(xzrVReg), rtmp, true) m.insert(neg) } // Copy the shift amount into a vector register as sshl/ushl requires it to be there. dup := m.allocateInstr() - dup.asVecDup(vtmp, rtmp, arr) + dup.asVecDup(vtmp.nr(), rtmp, arr) m.insert(dup) if op == ssa.OpcodeVIshl || op == ssa.OpcodeVSshr { @@ -867,7 +867,7 @@ func (m *machine) lowerVShift(op ssa.Opcode, rd, rn, rm operand, arr vecArrangem } } -func (m *machine) lowerVcheckTrue(op ssa.Opcode, rm, rd operand, arr vecArrangement) { +func (m *machine) lowerVcheckTrue(op ssa.Opcode, rm operand, rd regalloc.VReg, arr vecArrangement) { tmp := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) // Special case VallTrue for i64x2. @@ -878,11 +878,11 @@ func (m *machine) lowerVcheckTrue(op ssa.Opcode, rm, rd operand, arr vecArrangem // cset dst, eq ins := m.allocateInstr() - ins.asVecMisc(vecOpCmeq0, tmp, rm, vecArrangement2D) + ins.asVecMisc(vecOpCmeq0, tmp.nr(), rm, vecArrangement2D) m.insert(ins) addp := m.allocateInstr() - addp.asVecRRR(vecOpAddp, tmp, tmp, tmp, vecArrangement2D) + addp.asVecRRR(vecOpAddp, tmp.nr(), tmp, tmp, vecArrangement2D) m.insert(addp) fcmp := m.allocateInstr() @@ -890,7 +890,7 @@ func (m *machine) lowerVcheckTrue(op ssa.Opcode, rm, rd operand, arr vecArrangem m.insert(fcmp) cset := m.allocateInstr() - cset.asCSet(rd.nr(), false, eq) + cset.asCSet(rd, false, eq) m.insert(cset) return @@ -900,10 +900,10 @@ func (m *machine) lowerVcheckTrue(op ssa.Opcode, rm, rd operand, arr vecArrangem ins := m.allocateInstr() if op == ssa.OpcodeVanyTrue { // umaxp v4?.16b, v2?.16b, v2?.16b - ins.asVecRRR(vecOpUmaxp, tmp, rm, rm, vecArrangement16B) + ins.asVecRRR(vecOpUmaxp, tmp.nr(), rm, rm, vecArrangement16B) } else { // uminv d4?, v2?.4s - ins.asVecLanes(vecOpUminv, tmp, rm, arr) + ins.asVecLanes(vecOpUminv, tmp.nr(), rm, arr) } m.insert(ins) @@ -917,15 +917,15 @@ func (m *machine) lowerVcheckTrue(op ssa.Opcode, rm, rd operand, arr vecArrangem m.insert(movv) fc := m.allocateInstr() - fc.asCCmpImm(rd, uint64(0), al, 0, true) + fc.asCCmpImm(operandNR(rd), uint64(0), al, 0, true) m.insert(fc) cset := m.allocateInstr() - cset.asCSet(rd.nr(), false, ne) + cset.asCSet(rd, false, ne) m.insert(cset) } -func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) { +func (m *machine) lowerVhighBits(rm operand, rd regalloc.VReg, arr vecArrangement) { r0 := operandNR(m.compiler.AllocateVReg(ssa.TypeI64)) v0 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) v1 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) @@ -947,7 +947,7 @@ func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) { // Right arithmetic shift on the original vector and store the result into v1. So we have: // v1[i] = 0xff if vi<0, 0 otherwise. sshr := m.allocateInstr() - sshr.asVecShiftImm(vecOpSshr, v1, rm, operandShiftImm(7), vecArrangement16B) + sshr.asVecShiftImm(vecOpSshr, v1.nr(), rm, operandShiftImm(7), vecArrangement16B) m.insert(sshr) // Load the bit mask into r0. @@ -958,7 +958,7 @@ func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) { // dup r0 to v0. dup := m.allocateInstr() - dup.asVecDup(v0, r0, vecArrangement2D) + dup.asVecDup(v0.nr(), r0, vecArrangement2D) m.insert(dup) // Lane-wise logical AND with the bit mask, meaning that we have @@ -967,23 +967,23 @@ func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) { // Below, we use the following notation: // wi := (1 << i) if vi<0, 0 otherwise. and := m.allocateInstr() - and.asVecRRR(vecOpAnd, v1, v1, v0, vecArrangement16B) + and.asVecRRR(vecOpAnd, v1.nr(), v1, v0, vecArrangement16B) m.insert(and) // Swap the lower and higher 8 byte elements, and write it into v0, meaning that we have // v0[i] = w(i+8) if i < 8, w(i-8) otherwise. ext := m.allocateInstr() - ext.asVecExtract(v0, v1, v1, vecArrangement16B, uint32(8)) + ext.asVecExtract(v0.nr(), v1, v1, vecArrangement16B, uint32(8)) m.insert(ext) // v = [w0, w8, ..., w7, w15] zip1 := m.allocateInstr() - zip1.asVecPermute(vecOpZip1, v0, v1, v0, vecArrangement16B) + zip1.asVecPermute(vecOpZip1, v0.nr(), v1, v0, vecArrangement16B) m.insert(zip1) // v.h[0] = w0 + ... + w15 addv := m.allocateInstr() - addv.asVecLanes(vecOpAddv, v0, v0, vecArrangement8H) + addv.asVecLanes(vecOpAddv, v0.nr(), v0, vecArrangement8H) m.insert(addv) // Extract the v.h[0] as the result. @@ -1006,7 +1006,7 @@ func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) { // Right arithmetic shift on the original vector and store the result into v1. So we have: // v[i] = 0xffff if vi<0, 0 otherwise. sshr := m.allocateInstr() - sshr.asVecShiftImm(vecOpSshr, v1, rm, operandShiftImm(15), vecArrangement8H) + sshr.asVecShiftImm(vecOpSshr, v1.nr(), rm, operandShiftImm(15), vecArrangement8H) m.insert(sshr) // Load the bit mask into r0. @@ -1014,26 +1014,26 @@ func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) { // dup r0 to vector v0. dup := m.allocateInstr() - dup.asVecDup(v0, r0, vecArrangement2D) + dup.asVecDup(v0.nr(), r0, vecArrangement2D) m.insert(dup) lsl := m.allocateInstr() - lsl.asALUShift(aluOpLsl, r0, r0, operandShiftImm(4), true) + lsl.asALUShift(aluOpLsl, r0.nr(), r0, operandShiftImm(4), true) m.insert(lsl) movv := m.allocateInstr() - movv.asMovToVec(v0, r0, vecArrangementD, vecIndex(1)) + movv.asMovToVec(v0.nr(), r0, vecArrangementD, vecIndex(1)) m.insert(movv) // Lane-wise logical AND with the bitmask, meaning that we have // v[i] = (1 << i) if vi<0, 0 otherwise for i=0..3 // = (1 << (i+4)) if vi<0, 0 otherwise for i=3..7 and := m.allocateInstr() - and.asVecRRR(vecOpAnd, v0, v1, v0, vecArrangement16B) + and.asVecRRR(vecOpAnd, v0.nr(), v1, v0, vecArrangement16B) m.insert(and) addv := m.allocateInstr() - addv.asVecLanes(vecOpAddv, v0, v0, vecArrangement8H) + addv.asVecLanes(vecOpAddv, v0.nr(), v0, vecArrangement8H) m.insert(addv) movfv := m.allocateInstr() @@ -1055,7 +1055,7 @@ func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) { // Right arithmetic shift on the original vector and store the result into v1. So we have: // v[i] = 0xffffffff if vi<0, 0 otherwise. sshr := m.allocateInstr() - sshr.asVecShiftImm(vecOpSshr, v1, rm, operandShiftImm(31), vecArrangement4S) + sshr.asVecShiftImm(vecOpSshr, v1.nr(), rm, operandShiftImm(31), vecArrangement4S) m.insert(sshr) // Load the bit mask into r0. @@ -1063,26 +1063,26 @@ func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) { // dup r0 to vector v0. dup := m.allocateInstr() - dup.asVecDup(v0, r0, vecArrangement2D) + dup.asVecDup(v0.nr(), r0, vecArrangement2D) m.insert(dup) lsl := m.allocateInstr() - lsl.asALUShift(aluOpLsl, r0, r0, operandShiftImm(2), true) + lsl.asALUShift(aluOpLsl, r0.nr(), r0, operandShiftImm(2), true) m.insert(lsl) movv := m.allocateInstr() - movv.asMovToVec(v0, r0, vecArrangementD, vecIndex(1)) + movv.asMovToVec(v0.nr(), r0, vecArrangementD, vecIndex(1)) m.insert(movv) // Lane-wise logical AND with the bitmask, meaning that we have // v[i] = (1 << i) if vi<0, 0 otherwise for i in [0, 1] // = (1 << (i+4)) if vi<0, 0 otherwise for i in [2, 3] and := m.allocateInstr() - and.asVecRRR(vecOpAnd, v0, v1, v0, vecArrangement16B) + and.asVecRRR(vecOpAnd, v0.nr(), v1, v0, vecArrangement16B) m.insert(and) addv := m.allocateInstr() - addv.asVecLanes(vecOpAddv, v0, v0, vecArrangement4S) + addv.asVecLanes(vecOpAddv, v0.nr(), v0, vecArrangement4S) m.insert(addv) movfv := m.allocateInstr() @@ -1102,21 +1102,21 @@ func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) { // Move the higher 64-bit int into r0. movv1 := m.allocateInstr() - movv1.asMovFromVec(r0, rm, vecArrangementD, vecIndex(1), false) + movv1.asMovFromVec(r0.nr(), rm, vecArrangementD, vecIndex(1), false) m.insert(movv1) // Move the sign bit into the least significant bit. lsr1 := m.allocateInstr() - lsr1.asALUShift(aluOpLsr, r0, r0, operandShiftImm(63), true) + lsr1.asALUShift(aluOpLsr, r0.nr(), r0, operandShiftImm(63), true) m.insert(lsr1) lsr2 := m.allocateInstr() - lsr2.asALUShift(aluOpLsr, rd, rd, operandShiftImm(63), true) + lsr2.asALUShift(aluOpLsr, rd, operandNR(rd), operandShiftImm(63), true) m.insert(lsr2) // rd = (r0<<1) | rd lsl := m.allocateInstr() - lsl.asALU(aluOpAdd, rd, rd, operandSR(r0.nr(), 1, shiftOpLSL), false) + lsl.asALU(aluOpAdd, rd, operandNR(rd), operandSR(r0.nr(), 1, shiftOpLSL), false) m.insert(lsl) default: panic("Unsupported " + arr.String()) @@ -1128,7 +1128,7 @@ func (m *machine) lowerVecMisc(op vecOp, instr *ssa.Instruction) { arr := ssaLaneToArrangement(lane) ins := m.allocateInstr() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) ins.asVecMisc(op, rd, rn, arr) m.insert(ins) } @@ -1137,22 +1137,22 @@ func (m *machine) lowerVecRRR(op vecOp, x, y, ret ssa.Value, arr vecArrangement) ins := m.allocateInstr() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := operandNR(m.compiler.VRegOf(ret)) + rd := m.compiler.VRegOf(ret) ins.asVecRRR(op, rd, rn, rm, arr) m.insert(ins) } -func (m *machine) lowerVIMul(rd, rn, rm operand, arr vecArrangement) { +func (m *machine) lowerVIMul(rd regalloc.VReg, rn, rm operand, arr vecArrangement) { if arr != vecArrangement2D { mul := m.allocateInstr() mul.asVecRRR(vecOpMul, rd, rn, rm, arr) m.insert(mul) } else { - tmp1 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) - tmp2 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) - tmp3 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) + tmp1 := m.compiler.AllocateVReg(ssa.TypeV128) + tmp2 := m.compiler.AllocateVReg(ssa.TypeV128) + tmp3 := m.compiler.AllocateVReg(ssa.TypeV128) - tmpRes := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) + tmpRes := m.compiler.AllocateVReg(ssa.TypeV128) // Following the algorithm in https://chromium-review.googlesource.com/c/v8/v8/+/1781696 rev64 := m.allocateInstr() @@ -1160,7 +1160,7 @@ func (m *machine) lowerVIMul(rd, rn, rm operand, arr vecArrangement) { m.insert(rev64) mul := m.allocateInstr() - mul.asVecRRR(vecOpMul, tmp2, tmp2, rn, vecArrangement4S) + mul.asVecRRR(vecOpMul, tmp2, operandNR(tmp2), rn, vecArrangement4S) m.insert(mul) xtn1 := m.allocateInstr() @@ -1168,7 +1168,7 @@ func (m *machine) lowerVIMul(rd, rn, rm operand, arr vecArrangement) { m.insert(xtn1) addp := m.allocateInstr() - addp.asVecRRR(vecOpAddp, tmp2, tmp2, tmp2, vecArrangement4S) + addp.asVecRRR(vecOpAddp, tmp2, operandNR(tmp2), operandNR(tmp2), vecArrangement4S) m.insert(addp) xtn2 := m.allocateInstr() @@ -1179,15 +1179,15 @@ func (m *machine) lowerVIMul(rd, rn, rm operand, arr vecArrangement) { // In short, in UMLAL instruction, the result register is also one of the source register, and // the value on the result register is significant. shll := m.allocateInstr() - shll.asVecMisc(vecOpShll, tmpRes, tmp2, vecArrangement2S) + shll.asVecMisc(vecOpShll, tmpRes, operandNR(tmp2), vecArrangement2S) m.insert(shll) umlal := m.allocateInstr() - umlal.asVecRRRRewrite(vecOpUmlal, tmpRes, tmp3, tmp1, vecArrangement2S) + umlal.asVecRRRRewrite(vecOpUmlal, tmpRes, operandNR(tmp3), operandNR(tmp1), vecArrangement2S) m.insert(umlal) mov := m.allocateInstr() - mov.asFpuMov128(rd.nr(), tmpRes.nr()) + mov.asFpuMov128(rd, tmpRes) m.insert(mov) } } @@ -1203,7 +1203,7 @@ func (m *machine) lowerVMinMaxPseudo(instr *ssa.Instruction, max bool) { // BSL modifies the destination register, so we need to use a temporary register so that // the actual definition of the destination register happens *after* the BSL instruction. // That way, we can force the spill instruction to be inserted after the BSL instruction. - tmp := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) + tmp := m.compiler.AllocateVReg(ssa.TypeV128) fcmgt := m.allocateInstr() if max { @@ -1220,17 +1220,17 @@ func (m *machine) lowerVMinMaxPseudo(instr *ssa.Instruction, max bool) { res := operandNR(m.compiler.VRegOf(instr.Return())) mov2 := m.allocateInstr() - mov2.asFpuMov128(res.nr(), tmp.nr()) + mov2.asFpuMov128(res.nr(), tmp) m.insert(mov2) } -func (m *machine) lowerIRem(execCtxVReg regalloc.VReg, rd, rn, rm operand, _64bit, signed bool) { +func (m *machine) lowerIRem(execCtxVReg regalloc.VReg, rd, rn regalloc.VReg, rm operand, _64bit, signed bool) { div := m.allocateInstr() if signed { - div.asALU(aluOpSDiv, rd, rn, rm, _64bit) + div.asALU(aluOpSDiv, rd, operandNR(rn), rm, _64bit) } else { - div.asALU(aluOpUDiv, rd, rn, rm, _64bit) + div.asALU(aluOpUDiv, rd, operandNR(rn), rm, _64bit) } m.insert(div) @@ -1239,11 +1239,11 @@ func (m *machine) lowerIRem(execCtxVReg regalloc.VReg, rd, rn, rm operand, _64bi // rd = rn-rd*rm by MSUB instruction. msub := m.allocateInstr() - msub.asALURRRR(aluOpMSub, rd, rd, rm, rn, _64bit) + msub.asALURRRR(aluOpMSub, rd, operandNR(rd), rm, rn, _64bit) m.insert(msub) } -func (m *machine) lowerIDiv(execCtxVReg regalloc.VReg, rd, rn, rm operand, _64bit, signed bool) { +func (m *machine) lowerIDiv(execCtxVReg, rd regalloc.VReg, rn, rm operand, _64bit, signed bool) { div := m.allocateInstr() if signed { @@ -1260,7 +1260,7 @@ func (m *machine) lowerIDiv(execCtxVReg regalloc.VReg, rd, rn, rm operand, _64bi // We need to check the signed overflow which happens iff "math.MinInt{32,64} / -1" minusOneCheck := m.allocateInstr() // Sets eq condition if rm == -1. - minusOneCheck.asALU(aluOpAddS, operandNR(xzrVReg), rm, operandImm12(1, 0), _64bit) + minusOneCheck.asALU(aluOpAddS, xzrVReg, rm, operandImm12(1, 0), _64bit) m.insert(minusOneCheck) ccmp := m.allocateInstr() @@ -1290,20 +1290,20 @@ func (m *machine) exitIfNot(execCtxVReg regalloc.VReg, c cond, cond64bit bool, c func (m *machine) lowerFcopysign(x, y, ret ssa.Value) { rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - var tmpI, tmpF operand + var tmpI, tmpF regalloc.VReg _64 := x.Type() == ssa.TypeF64 if _64 { - tmpF = operandNR(m.compiler.AllocateVReg(ssa.TypeF64)) - tmpI = operandNR(m.compiler.AllocateVReg(ssa.TypeI64)) + tmpF = m.compiler.AllocateVReg(ssa.TypeF64) + tmpI = m.compiler.AllocateVReg(ssa.TypeI64) } else { - tmpF = operandNR(m.compiler.AllocateVReg(ssa.TypeF32)) - tmpI = operandNR(m.compiler.AllocateVReg(ssa.TypeI32)) + tmpF = m.compiler.AllocateVReg(ssa.TypeF32) + tmpI = m.compiler.AllocateVReg(ssa.TypeI32) } rd := m.compiler.VRegOf(ret) - m.lowerFcopysignImpl(operandNR(rd), rn, rm, tmpI, tmpF, _64) + m.lowerFcopysignImpl(rd, rn, rm, tmpI, tmpF, _64) } -func (m *machine) lowerFcopysignImpl(rd, rn, rm, tmpI, tmpF operand, _64bit bool) { +func (m *machine) lowerFcopysignImpl(rd regalloc.VReg, rn, rm operand, tmpI, tmpF regalloc.VReg, _64bit bool) { // This is exactly the same code emitted by GCC for "__builtin_copysign": // // mov x0, -9223372036854775808 @@ -1313,26 +1313,26 @@ func (m *machine) lowerFcopysignImpl(rd, rn, rm, tmpI, tmpF operand, _64bit bool setMSB := m.allocateInstr() if _64bit { - m.lowerConstantI64(tmpI.nr(), math.MinInt64) - setMSB.asMovToVec(tmpF, tmpI, vecArrangementD, vecIndex(0)) + m.lowerConstantI64(tmpI, math.MinInt64) + setMSB.asMovToVec(tmpF, operandNR(tmpI), vecArrangementD, vecIndex(0)) } else { - m.lowerConstantI32(tmpI.nr(), math.MinInt32) - setMSB.asMovToVec(tmpF, tmpI, vecArrangementS, vecIndex(0)) + m.lowerConstantI32(tmpI, math.MinInt32) + setMSB.asMovToVec(tmpF, operandNR(tmpI), vecArrangementS, vecIndex(0)) } m.insert(setMSB) - tmpReg := operandNR(m.compiler.AllocateVReg(ssa.TypeF64)) + tmpReg := m.compiler.AllocateVReg(ssa.TypeF64) mov := m.allocateInstr() - mov.asFpuMov64(tmpReg.nr(), rn.nr()) + mov.asFpuMov64(tmpReg, rn.nr()) m.insert(mov) vbit := m.allocateInstr() - vbit.asVecRRRRewrite(vecOpBit, tmpReg, rm, tmpF, vecArrangement8B) + vbit.asVecRRRRewrite(vecOpBit, tmpReg, rm, operandNR(tmpF), vecArrangement8B) m.insert(vbit) movDst := m.allocateInstr() - movDst.asFpuMov64(rd.nr(), tmpReg.nr()) + movDst.asFpuMov64(rd, tmpReg) m.insert(movDst) } @@ -1340,7 +1340,7 @@ func (m *machine) lowerBitcast(instr *ssa.Instruction) { v, dstType := instr.BitcastData() srcType := v.Type() rn := m.getOperand_NR(m.compiler.ValueDefinition(v), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) srcInt := srcType.IsInt() dstInt := dstType.IsInt() switch { @@ -1371,14 +1371,14 @@ func (m *machine) lowerBitcast(instr *ssa.Instruction) { func (m *machine) lowerFpuUniOp(op fpuUniOp, in, out ssa.Value) { rn := m.getOperand_NR(m.compiler.ValueDefinition(in), extModeNone) - rd := operandNR(m.compiler.VRegOf(out)) + rd := m.compiler.VRegOf(out) neg := m.allocateInstr() neg.asFpuRR(op, rd, rn, in.Type().Bits() == 64) m.insert(neg) } -func (m *machine) lowerFpuToInt(rd, rn operand, ctx regalloc.VReg, signed, src64bit, dst64bit, nonTrapping bool) { +func (m *machine) lowerFpuToInt(rd regalloc.VReg, rn operand, ctx regalloc.VReg, signed, src64bit, dst64bit, nonTrapping bool) { if !nonTrapping { // First of all, we have to clear the FPU flags. flagClear := m.allocateInstr() @@ -1405,7 +1405,7 @@ func (m *machine) lowerFpuToInt(rd, rn operand, ctx regalloc.VReg, signed, src64 // Check if the conversion was undefined by comparing the status with 1. // See https://developer.arm.com/documentation/ddi0595/2020-12/AArch64-Registers/FPSR--Floating-point-Status-Register alu := m.allocateInstr() - alu.asALU(aluOpSubS, operandNR(xzrVReg), operandNR(tmpReg), operandImm12(1, 0), true) + alu.asALU(aluOpSubS, xzrVReg, operandNR(tmpReg), operandImm12(1, 0), true) m.insert(alu) // If it is not undefined, we can return the result. @@ -1429,7 +1429,7 @@ func (m *machine) lowerFpuToInt(rd, rn operand, ctx regalloc.VReg, signed, src64 } } -func (m *machine) lowerIntToFpu(rd, rn operand, signed, src64bit, dst64bit bool) { +func (m *machine) lowerIntToFpu(rd regalloc.VReg, rn operand, signed, src64bit, dst64bit bool) { cvt := m.allocateInstr() cvt.asIntToFpu(rd, rn, signed, src64bit, dst64bit) m.insert(cvt) @@ -1456,7 +1456,7 @@ func (m *machine) lowerFpuBinOp(si *ssa.Instruction) { xDef, yDef := m.compiler.ValueDefinition(x), m.compiler.ValueDefinition(y) rn := m.getOperand_NR(xDef, extModeNone) rm := m.getOperand_NR(yDef, extModeNone) - rd := operandNR(m.compiler.VRegOf(si.Return())) + rd := m.compiler.VRegOf(si.Return()) instr.asFpuRRR(op, rd, rn, rm, x.Type().Bits() == 64) m.insert(instr) } @@ -1482,7 +1482,7 @@ func (m *machine) lowerSubOrAdd(si *ssa.Instruction, add bool) { case !add && yNegated: // rn+rm = x-(-y) = x-y aop = aluOpAdd } - rd := operandNR(m.compiler.VRegOf(si.Return())) + rd := m.compiler.VRegOf(si.Return()) alu := m.allocateInstr() alu.asALU(aop, rd, rn, rm, x.Type().Bits() == 64) m.insert(alu) @@ -1527,7 +1527,7 @@ func (m *machine) lowerIcmp(si *ssa.Instruction) { rn := m.getOperand_NR(m.compiler.ValueDefinition(x), ext) rm := m.getOperand_Imm12_ER_SR_NR(m.compiler.ValueDefinition(y), ext) alu := m.allocateInstr() - alu.asALU(aluOpSubS, operandNR(xzrVReg), rn, rm, in64bit) + alu.asALU(aluOpSubS, xzrVReg, rn, rm, in64bit) m.insert(alu) cset := m.allocateInstr() @@ -1542,7 +1542,7 @@ func (m *machine) lowerVIcmp(si *ssa.Instruction) { rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := operandNR(m.compiler.VRegOf(si.Return())) + rd := m.compiler.VRegOf(si.Return()) switch flag { case eq: @@ -1554,7 +1554,7 @@ func (m *machine) lowerVIcmp(si *ssa.Instruction) { cmp.asVecRRR(vecOpCmeq, rd, rn, rm, arr) m.insert(cmp) not := m.allocateInstr() - not.asVecMisc(vecOpNot, rd, rd, vecArrangement16B) + not.asVecMisc(vecOpNot, rd, operandNR(rd), vecArrangement16B) m.insert(not) case ge: cmp := m.allocateInstr() @@ -1598,7 +1598,7 @@ func (m *machine) lowerVFcmp(si *ssa.Instruction) { rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := operandNR(m.compiler.VRegOf(si.Return())) + rd := m.compiler.VRegOf(si.Return()) switch flag { case eq: @@ -1610,7 +1610,7 @@ func (m *machine) lowerVFcmp(si *ssa.Instruction) { cmp.asVecRRR(vecOpFcmeq, rd, rn, rm, arr) m.insert(cmp) not := m.allocateInstr() - not.asVecMisc(vecOpNot, rd, rd, vecArrangement16B) + not.asVecMisc(vecOpNot, rd, operandNR(rd), vecArrangement16B) m.insert(not) case ge: cmp := m.allocateInstr() @@ -1631,7 +1631,7 @@ func (m *machine) lowerVFcmp(si *ssa.Instruction) { } } -func (m *machine) lowerVfpuToInt(rd, rn operand, arr vecArrangement, signed bool) { +func (m *machine) lowerVfpuToInt(rd regalloc.VReg, rn operand, arr vecArrangement, signed bool) { cvt := m.allocateInstr() if signed { cvt.asVecMisc(vecOpFcvtzs, rd, rn, arr) @@ -1643,15 +1643,15 @@ func (m *machine) lowerVfpuToInt(rd, rn operand, arr vecArrangement, signed bool if arr == vecArrangement2D { narrow := m.allocateInstr() if signed { - narrow.asVecMisc(vecOpSqxtn, rd, rd, vecArrangement2S) + narrow.asVecMisc(vecOpSqxtn, rd, operandNR(rd), vecArrangement2S) } else { - narrow.asVecMisc(vecOpUqxtn, rd, rd, vecArrangement2S) + narrow.asVecMisc(vecOpUqxtn, rd, operandNR(rd), vecArrangement2S) } m.insert(narrow) } } -func (m *machine) lowerVfpuFromInt(rd, rn operand, arr vecArrangement, signed bool) { +func (m *machine) lowerVfpuFromInt(rd regalloc.VReg, rn operand, arr vecArrangement, signed bool) { cvt := m.allocateInstr() if signed { cvt.asVecMisc(vecOpScvtf, rd, rn, arr) @@ -1665,7 +1665,7 @@ func (m *machine) lowerShifts(si *ssa.Instruction, ext extMode, aluOp aluOp) { x, amount := si.Arg2() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), ext) rm := m.getOperand_ShiftImm_NR(m.compiler.ValueDefinition(amount), ext, x.Type().Bits()) - rd := operandNR(m.compiler.VRegOf(si.Return())) + rd := m.compiler.VRegOf(si.Return()) alu := m.allocateInstr() alu.asALUShift(aluOp, rd, rn, rm, x.Type().Bits() == 64) @@ -1678,11 +1678,11 @@ func (m *machine) lowerBitwiseAluOp(si *ssa.Instruction, op aluOp, ignoreResult xDef, yDef := m.compiler.ValueDefinition(x), m.compiler.ValueDefinition(y) rn := m.getOperand_NR(xDef, extModeNone) - var rd operand + var rd regalloc.VReg if ignoreResult { - rd = operandNR(xzrVReg) + rd = xzrVReg } else { - rd = operandNR(m.compiler.VRegOf(si.Return())) + rd = m.compiler.VRegOf(si.Return()) } _64 := x.Type().Bits() == 64 @@ -1691,7 +1691,7 @@ func (m *machine) lowerBitwiseAluOp(si *ssa.Instruction, op aluOp, ignoreResult c := instr.ConstantVal() if isBitMaskImmediate(c, _64) { // Constant bit wise operations can be lowered to a single instruction. - alu.asALUBitmaskImm(op, rd.nr(), rn.nr(), c, _64) + alu.asALUBitmaskImm(op, rd, rn.nr(), c, _64) m.insert(alu) return } @@ -1709,25 +1709,25 @@ func (m *machine) lowerRotl(si *ssa.Instruction) { rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - var tmp operand + var tmp regalloc.VReg if _64 { - tmp = operandNR(m.compiler.AllocateVReg(ssa.TypeI64)) + tmp = m.compiler.AllocateVReg(ssa.TypeI64) } else { - tmp = operandNR(m.compiler.AllocateVReg(ssa.TypeI32)) + tmp = m.compiler.AllocateVReg(ssa.TypeI32) } - rd := operandNR(m.compiler.VRegOf(r)) + rd := m.compiler.VRegOf(r) // Encode rotl as neg + rotr: neg is a sub against the zero-reg. m.lowerRotlImpl(rd, rn, rm, tmp, _64) } -func (m *machine) lowerRotlImpl(rd, rn, rm, tmp operand, is64bit bool) { +func (m *machine) lowerRotlImpl(rd regalloc.VReg, rn, rm operand, tmp regalloc.VReg, is64bit bool) { // Encode rotl as neg + rotr: neg is a sub against the zero-reg. neg := m.allocateInstr() neg.asALU(aluOpSub, tmp, operandNR(xzrVReg), rm, is64bit) m.insert(neg) alu := m.allocateInstr() - alu.asALU(aluOpRotR, rd, rn, tmp, is64bit) + alu.asALU(aluOpRotR, rd, rn, operandNR(tmp), is64bit) m.insert(alu) } @@ -1737,7 +1737,7 @@ func (m *machine) lowerRotr(si *ssa.Instruction) { xDef, yDef := m.compiler.ValueDefinition(x), m.compiler.ValueDefinition(y) rn := m.getOperand_NR(xDef, extModeNone) rm := m.getOperand_NR(yDef, extModeNone) - rd := operandNR(m.compiler.VRegOf(si.Return())) + rd := m.compiler.VRegOf(si.Return()) alu := m.allocateInstr() alu.asALU(aluOpRotR, rd, rn, rm, si.Return().Type().Bits() == 64) @@ -1797,7 +1797,7 @@ func (m *machine) lowerImul(x, y, result ssa.Value) { // TODO: if this comes before Add/Sub, we could merge it by putting it into the place of xzrVReg. mul := m.allocateInstr() - mul.asALURRRR(aluOpMAdd, operandNR(rd), rn, rm, operandNR(xzrVReg), x.Type().Bits() == 64) + mul.asALURRRR(aluOpMAdd, rd, rn, rm, xzrVReg, x.Type().Bits() == 64) m.insert(mul) } @@ -1849,22 +1849,22 @@ func (m *machine) lowerPopcnt(x, result ssa.Value) { // mov x5, v0.d[0] ;; finally we mov the result back to a GPR // - rd := operandNR(m.compiler.VRegOf(result)) + rd := m.compiler.VRegOf(result) rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rf1 := operandNR(m.compiler.AllocateVReg(ssa.TypeF64)) ins := m.allocateInstr() - ins.asMovToVec(rf1, rn, vecArrangementD, vecIndex(0)) + ins.asMovToVec(rf1.nr(), rn, vecArrangementD, vecIndex(0)) m.insert(ins) rf2 := operandNR(m.compiler.AllocateVReg(ssa.TypeF64)) cnt := m.allocateInstr() - cnt.asVecMisc(vecOpCnt, rf2, rf1, vecArrangement16B) + cnt.asVecMisc(vecOpCnt, rf2.nr(), rf1, vecArrangement16B) m.insert(cnt) rf3 := operandNR(m.compiler.AllocateVReg(ssa.TypeF64)) uaddlv := m.allocateInstr() - uaddlv.asVecLanes(vecOpUaddlv, rf3, rf2, vecArrangement8B) + uaddlv.asVecLanes(vecOpUaddlv, rf3.nr(), rf2, vecArrangement8B) m.insert(uaddlv) mov := m.allocateInstr() @@ -1879,32 +1879,35 @@ func (m *machine) lowerExitWithCode(execCtxVReg regalloc.VReg, code wazevoapi.Ex loadExitCodeConst.asMOVZ(tmpReg1, uint64(code), 0, true) setExitCode := m.allocateInstr() - setExitCode.asStore(operandNR(tmpReg1), - addressMode{ - kind: addressModeKindRegUnsignedImm12, - rn: execCtxVReg, imm: wazevoapi.ExecutionContextOffsetExitCodeOffset.I64(), - }, 32) + mode := m.amodePool.Allocate() + *mode = addressMode{ + kind: addressModeKindRegUnsignedImm12, + rn: execCtxVReg, imm: wazevoapi.ExecutionContextOffsetExitCodeOffset.I64(), + } + setExitCode.asStore(operandNR(tmpReg1), mode, 32) // In order to unwind the stack, we also need to push the current stack pointer: tmp2 := m.compiler.AllocateVReg(ssa.TypeI64) movSpToTmp := m.allocateInstr() movSpToTmp.asMove64(tmp2, spVReg) strSpToExecCtx := m.allocateInstr() - strSpToExecCtx.asStore(operandNR(tmp2), - addressMode{ - kind: addressModeKindRegUnsignedImm12, - rn: execCtxVReg, imm: wazevoapi.ExecutionContextOffsetStackPointerBeforeGoCall.I64(), - }, 64) + mode2 := m.amodePool.Allocate() + *mode2 = addressMode{ + kind: addressModeKindRegUnsignedImm12, + rn: execCtxVReg, imm: wazevoapi.ExecutionContextOffsetStackPointerBeforeGoCall.I64(), + } + strSpToExecCtx.asStore(operandNR(tmp2), mode2, 64) // Also the address of this exit. tmp3 := m.compiler.AllocateVReg(ssa.TypeI64) currentAddrToTmp := m.allocateInstr() currentAddrToTmp.asAdr(tmp3, 0) storeCurrentAddrToExecCtx := m.allocateInstr() - storeCurrentAddrToExecCtx.asStore(operandNR(tmp3), - addressMode{ - kind: addressModeKindRegUnsignedImm12, - rn: execCtxVReg, imm: wazevoapi.ExecutionContextOffsetGoCallReturnAddress.I64(), - }, 64) + mode3 := m.amodePool.Allocate() + *mode3 = addressMode{ + kind: addressModeKindRegUnsignedImm12, + rn: execCtxVReg, imm: wazevoapi.ExecutionContextOffsetGoCallReturnAddress.I64(), + } + storeCurrentAddrToExecCtx.asStore(operandNR(tmp3), mode3, 64) exitSeq := m.allocateInstr() exitSeq.asExitSequence(execCtxVReg) @@ -1937,7 +1940,7 @@ func (m *machine) lowerIcmpToFlag(x, y ssa.Value, signed bool) { alu.asALU( aluOpSubS, // We don't need the result, just need to set flags. - operandNR(xzrVReg), + xzrVReg, rn, rm, x.Type().Bits() == 64, @@ -2012,7 +2015,7 @@ func (m *machine) lowerSelect(c, x, y, result ssa.Value) { alu.asALU( aluOpSubS, // We don't need the result, just need to set flags. - operandNR(xzrVReg), + xzrVReg, rn, operandNR(xzrVReg), c.Type().Bits() == 64, @@ -2024,7 +2027,7 @@ func (m *machine) lowerSelect(c, x, y, result ssa.Value) { rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := operandNR(m.compiler.VRegOf(result)) + rd := m.compiler.VRegOf(result) switch x.Type() { case ssa.TypeI32, ssa.TypeI64: // csel rd, rn, rm, cc @@ -2041,10 +2044,10 @@ func (m *machine) lowerSelect(c, x, y, result ssa.Value) { } } -func (m *machine) lowerSelectVec(rc, rn, rm, rd operand) { +func (m *machine) lowerSelectVec(rc, rn, rm operand, rd regalloc.VReg) { // First check if `rc` is zero or not. checkZero := m.allocateInstr() - checkZero.asALU(aluOpSubS, operandNR(xzrVReg), rc, operandNR(xzrVReg), false) + checkZero.asALU(aluOpSubS, xzrVReg, rc, operandNR(xzrVReg), false) m.insert(checkZero) // Then use CSETM to set all bits to one if `rc` is zero. @@ -2054,7 +2057,7 @@ func (m *machine) lowerSelectVec(rc, rn, rm, rd operand) { m.insert(cset) // Then move the bits to the result vector register. - tmp2 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) + tmp2 := m.compiler.AllocateVReg(ssa.TypeV128) dup := m.allocateInstr() dup.asVecDup(tmp2, operandNR(allOnesOrZero), vecArrangement2D) m.insert(dup) @@ -2067,7 +2070,7 @@ func (m *machine) lowerSelectVec(rc, rn, rm, rd operand) { // Finally, move the result to the destination register. mov2 := m.allocateInstr() - mov2.asFpuMov128(rd.nr(), tmp2.nr()) + mov2.asFpuMov128(rd, tmp2) m.insert(mov2) } @@ -2099,28 +2102,28 @@ func (m *machine) lowerAtomicRmw(si *ssa.Instruction) { addr, val := si.Arg2() addrDef, valDef := m.compiler.ValueDefinition(addr), m.compiler.ValueDefinition(val) rn := m.getOperand_NR(addrDef, extModeNone) - rt := operandNR(m.compiler.VRegOf(si.Return())) + rt := m.compiler.VRegOf(si.Return()) rs := m.getOperand_NR(valDef, extModeNone) _64 := si.Return().Type().Bits() == 64 - var tmp operand + var tmp regalloc.VReg if _64 { - tmp = operandNR(m.compiler.AllocateVReg(ssa.TypeI64)) + tmp = m.compiler.AllocateVReg(ssa.TypeI64) } else { - tmp = operandNR(m.compiler.AllocateVReg(ssa.TypeI32)) + tmp = m.compiler.AllocateVReg(ssa.TypeI32) } - m.lowerAtomicRmwImpl(op, rn, rs, rt, tmp, size, negateArg, flipArg, _64) + m.lowerAtomicRmwImpl(op, rn.nr(), rs.nr(), rt, tmp, size, negateArg, flipArg, _64) } -func (m *machine) lowerAtomicRmwImpl(op atomicRmwOp, rn, rs, rt, tmp operand, size uint64, negateArg, flipArg, dst64bit bool) { +func (m *machine) lowerAtomicRmwImpl(op atomicRmwOp, rn, rs, rt, tmp regalloc.VReg, size uint64, negateArg, flipArg, dst64bit bool) { switch { case negateArg: neg := m.allocateInstr() - neg.asALU(aluOpSub, tmp, operandNR(xzrVReg), rs, dst64bit) + neg.asALU(aluOpSub, tmp, operandNR(xzrVReg), operandNR(rs), dst64bit) m.insert(neg) case flipArg: flip := m.allocateInstr() - flip.asALU(aluOpOrn, tmp, operandNR(xzrVReg), rs, dst64bit) + flip.asALU(aluOpOrn, tmp, operandNR(xzrVReg), operandNR(rs), dst64bit) m.insert(flip) default: tmp = rs @@ -2139,32 +2142,32 @@ func (m *machine) lowerAtomicCas(si *ssa.Instruction) { rn := m.getOperand_NR(addrDef, extModeNone) rt := m.getOperand_NR(replDef, extModeNone) rs := m.getOperand_NR(expDef, extModeNone) - tmp := operandNR(m.compiler.AllocateVReg(si.Return().Type())) + tmp := m.compiler.AllocateVReg(si.Return().Type()) _64 := si.Return().Type().Bits() == 64 // rs is overwritten by CAS, so we need to move it to the result register before the instruction // in case when it is used somewhere else. mov := m.allocateInstr() if _64 { - mov.asMove64(tmp.nr(), rs.nr()) + mov.asMove64(tmp, rs.nr()) } else { - mov.asMove32(tmp.nr(), rs.nr()) + mov.asMove32(tmp, rs.nr()) } m.insert(mov) - m.lowerAtomicCasImpl(rn, tmp, rt, size) + m.lowerAtomicCasImpl(rn.nr(), tmp, rt.nr(), size) mov2 := m.allocateInstr() rd := m.compiler.VRegOf(si.Return()) if _64 { - mov2.asMove64(rd, tmp.nr()) + mov2.asMove64(rd, tmp) } else { - mov2.asMove32(rd, tmp.nr()) + mov2.asMove32(rd, tmp) } m.insert(mov2) } -func (m *machine) lowerAtomicCasImpl(rn, rs, rt operand, size uint64) { +func (m *machine) lowerAtomicCasImpl(rn, rs, rt regalloc.VReg, size uint64) { cas := m.allocateInstr() cas.asAtomicCas(rn, rs, rt, size) m.insert(cas) @@ -2176,12 +2179,12 @@ func (m *machine) lowerAtomicLoad(si *ssa.Instruction) { addrDef := m.compiler.ValueDefinition(addr) rn := m.getOperand_NR(addrDef, extModeNone) - rt := operandNR(m.compiler.VRegOf(si.Return())) + rt := m.compiler.VRegOf(si.Return()) - m.lowerAtomicLoadImpl(rn, rt, size) + m.lowerAtomicLoadImpl(rn.nr(), rt, size) } -func (m *machine) lowerAtomicLoadImpl(rn, rt operand, size uint64) { +func (m *machine) lowerAtomicLoadImpl(rn, rt regalloc.VReg, size uint64) { ld := m.allocateInstr() ld.asAtomicLoad(rn, rt, size) m.insert(ld) diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_mem.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_mem.go index 4842eaa382..fd0760d723 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_mem.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_mem.go @@ -24,6 +24,14 @@ type ( addressModeKind byte ) +func resetAddressMode(a *addressMode) { + a.kind = 0 + a.rn = 0 + a.rm = 0 + a.extOp = 0 + a.imm = 0 +} + const ( // addressModeKindRegExtended takes a base register and an index register. The index register is sign/zero-extended, // and then scaled by bits(type)/8. @@ -140,15 +148,17 @@ func (a addressMode) format(dstSizeBits byte) (ret string) { return } -func addressModePreOrPostIndex(rn regalloc.VReg, imm int64, preIndex bool) addressMode { +func addressModePreOrPostIndex(m *machine, rn regalloc.VReg, imm int64, preIndex bool) *addressMode { if !offsetFitsInAddressModeKindRegSignedImm9(imm) { panic(fmt.Sprintf("BUG: offset %#x does not fit in addressModeKindRegSignedImm9", imm)) } + mode := m.amodePool.Allocate() if preIndex { - return addressMode{kind: addressModeKindPreIndex, rn: rn, imm: imm} + *mode = addressMode{kind: addressModeKindPreIndex, rn: rn, imm: imm} } else { - return addressMode{kind: addressModeKindPostIndex, rn: rn, imm: imm} + *mode = addressMode{kind: addressModeKindPostIndex, rn: rn, imm: imm} } + return mode } func offsetFitsInAddressModeKindRegUnsignedImm12(dstSizeInBits byte, offset int64) bool { @@ -207,9 +217,9 @@ func (m *machine) lowerExtLoad(op ssa.Opcode, ptr ssa.Value, offset uint32, ret amode := m.lowerToAddressMode(ptr, offset, size) load := m.allocateInstr() if signed { - load.asSLoad(operandNR(ret), amode, size) + load.asSLoad(ret, amode, size) } else { - load.asULoad(operandNR(ret), amode, size) + load.asULoad(ret, amode, size) } m.insert(load) } @@ -221,11 +231,11 @@ func (m *machine) lowerLoad(ptr ssa.Value, offset uint32, typ ssa.Type, ret ssa. load := m.allocateInstr() switch typ { case ssa.TypeI32, ssa.TypeI64: - load.asULoad(operandNR(dst), amode, typ.Bits()) + load.asULoad(dst, amode, typ.Bits()) case ssa.TypeF32, ssa.TypeF64: - load.asFpuLoad(operandNR(dst), amode, typ.Bits()) + load.asFpuLoad(dst, amode, typ.Bits()) case ssa.TypeV128: - load.asFpuLoad(operandNR(dst), amode, 128) + load.asFpuLoad(dst, amode, 128) default: panic("TODO") } @@ -239,7 +249,7 @@ func (m *machine) lowerLoadSplat(ptr ssa.Value, offset uint32, lane ssa.VecLane, m.lowerConstantI64(offsetReg, int64(offset)) addedBase := m.addReg64ToReg64(base, offsetReg) - rd := operandNR(m.compiler.VRegOf(ret)) + rd := m.compiler.VRegOf(ret) ld1r := m.allocateInstr() ld1r.asVecLoad1R(rd, operandNR(addedBase), ssaLaneToArrangement(lane)) @@ -258,7 +268,7 @@ func (m *machine) lowerStore(si *ssa.Instruction) { } // lowerToAddressMode converts a pointer to an addressMode that can be used as an operand for load/store instructions. -func (m *machine) lowerToAddressMode(ptr ssa.Value, offsetBase uint32, size byte) (amode addressMode) { +func (m *machine) lowerToAddressMode(ptr ssa.Value, offsetBase uint32, size byte) (amode *addressMode) { // TODO: currently the instruction selection logic doesn't support addressModeKindRegScaledExtended and // addressModeKindRegScaled since collectAddends doesn't take ssa.OpcodeIshl into account. This should be fixed // to support more efficient address resolution. @@ -272,32 +282,33 @@ func (m *machine) lowerToAddressMode(ptr ssa.Value, offsetBase uint32, size byte // During the construction, this might emit additional instructions. // // Extracted as a separate function for easy testing. -func (m *machine) lowerToAddressModeFromAddends(a32s *wazevoapi.Queue[addend32], a64s *wazevoapi.Queue[regalloc.VReg], size byte, offset int64) (amode addressMode) { +func (m *machine) lowerToAddressModeFromAddends(a32s *wazevoapi.Queue[addend32], a64s *wazevoapi.Queue[regalloc.VReg], size byte, offset int64) (amode *addressMode) { + amode = m.amodePool.Allocate() switch a64sExist, a32sExist := !a64s.Empty(), !a32s.Empty(); { case a64sExist && a32sExist: var base regalloc.VReg base = a64s.Dequeue() var a32 addend32 a32 = a32s.Dequeue() - amode = addressMode{kind: addressModeKindRegExtended, rn: base, rm: a32.r, extOp: a32.ext} + *amode = addressMode{kind: addressModeKindRegExtended, rn: base, rm: a32.r, extOp: a32.ext} case a64sExist && offsetFitsInAddressModeKindRegUnsignedImm12(size, offset): var base regalloc.VReg base = a64s.Dequeue() - amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: offset} + *amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: offset} offset = 0 case a64sExist && offsetFitsInAddressModeKindRegSignedImm9(offset): var base regalloc.VReg base = a64s.Dequeue() - amode = addressMode{kind: addressModeKindRegSignedImm9, rn: base, imm: offset} + *amode = addressMode{kind: addressModeKindRegSignedImm9, rn: base, imm: offset} offset = 0 case a64sExist: var base regalloc.VReg base = a64s.Dequeue() if !a64s.Empty() { index := a64s.Dequeue() - amode = addressMode{kind: addressModeKindRegReg, rn: base, rm: index, extOp: extendOpUXTX /* indicates index reg is 64-bit */} + *amode = addressMode{kind: addressModeKindRegReg, rn: base, rm: index, extOp: extendOpUXTX /* indicates index reg is 64-bit */} } else { - amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: 0} + *amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: 0} } case a32sExist: base32 := a32s.Dequeue() @@ -314,14 +325,14 @@ func (m *machine) lowerToAddressModeFromAddends(a32s *wazevoapi.Queue[addend32], if !a32s.Empty() { index := a32s.Dequeue() - amode = addressMode{kind: addressModeKindRegExtended, rn: base, rm: index.r, extOp: index.ext} + *amode = addressMode{kind: addressModeKindRegExtended, rn: base, rm: index.r, extOp: index.ext} } else { - amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: 0} + *amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: 0} } default: // Only static offsets. tmpReg := m.compiler.AllocateVReg(ssa.TypeI64) m.lowerConstantI64(tmpReg, offset) - amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: tmpReg, imm: 0} + *amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: tmpReg, imm: 0} offset = 0 } @@ -411,13 +422,13 @@ func (m *machine) addConstToReg64(r regalloc.VReg, c int64) (rd regalloc.VReg) { rd = m.compiler.AllocateVReg(ssa.TypeI64) alu := m.allocateInstr() if imm12Op, ok := asImm12Operand(uint64(c)); ok { - alu.asALU(aluOpAdd, operandNR(rd), operandNR(r), imm12Op, true) + alu.asALU(aluOpAdd, rd, operandNR(r), imm12Op, true) } else if imm12Op, ok = asImm12Operand(uint64(-c)); ok { - alu.asALU(aluOpSub, operandNR(rd), operandNR(r), imm12Op, true) + alu.asALU(aluOpSub, rd, operandNR(r), imm12Op, true) } else { tmp := m.compiler.AllocateVReg(ssa.TypeI64) m.load64bitConst(c, tmp) - alu.asALU(aluOpAdd, operandNR(rd), operandNR(r), operandNR(tmp), true) + alu.asALU(aluOpAdd, rd, operandNR(r), operandNR(tmp), true) } m.insert(alu) return @@ -426,7 +437,7 @@ func (m *machine) addConstToReg64(r regalloc.VReg, c int64) (rd regalloc.VReg) { func (m *machine) addReg64ToReg64(rn, rm regalloc.VReg) (rd regalloc.VReg) { rd = m.compiler.AllocateVReg(ssa.TypeI64) alu := m.allocateInstr() - alu.asALU(aluOpAdd, operandNR(rd), operandNR(rn), operandNR(rm), true) + alu.asALU(aluOpAdd, rd, operandNR(rn), operandNR(rm), true) m.insert(alu) return } @@ -434,7 +445,7 @@ func (m *machine) addReg64ToReg64(rn, rm regalloc.VReg) (rd regalloc.VReg) { func (m *machine) addRegToReg64Ext(rn, rm regalloc.VReg, ext extendOp) (rd regalloc.VReg) { rd = m.compiler.AllocateVReg(ssa.TypeI64) alu := m.allocateInstr() - alu.asALU(aluOpAdd, operandNR(rd), operandNR(rn), operandER(rm, ext, 64), true) + alu.asALU(aluOpAdd, rd, operandNR(rn), operandER(rm, ext, 64), true) m.insert(alu) return } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine.go index b435d9ba96..5f584f928b 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine.go @@ -21,6 +21,8 @@ type ( regAlloc regalloc.Allocator regAllocFn *backend.RegAllocFunction[*instruction, *machine] + amodePool wazevoapi.Pool[addressMode] + // addendsWorkQueue is used during address lowering, defined here for reuse. addendsWorkQueue wazevoapi.Queue[ssa.Value] addends32 wazevoapi.Queue[addend32] @@ -105,6 +107,7 @@ func NewBackend() backend.Machine { spillSlots: make(map[regalloc.VRegID]int64), executableContext: newExecutableContext(), regAlloc: regalloc.NewAllocator(regInfo), + amodePool: wazevoapi.NewPool[addressMode](resetAddressMode), } return m } @@ -149,6 +152,7 @@ func (m *machine) Reset() { m.maxRequiredStackSizeForCalls = 0 m.executableContext.Reset() m.jmpTableTargets = m.jmpTableTargets[:0] + m.amodePool.Reset() } // SetCurrentABI implements backend.Machine SetCurrentABI. @@ -183,9 +187,8 @@ func (m *machine) allocateBrTarget() (nop *instruction, l label) { l = ectx.AllocateLabel() nop = m.allocateInstr() nop.asNop0WithLabel(l) - pos := ectx.AllocateLabelPosition(l) + pos := ectx.GetOrAllocateLabelPosition(l) pos.Begin, pos.End = nop, nop - ectx.LabelPositions[l] = pos return } @@ -209,7 +212,7 @@ func (m *machine) allocateNop() *instruction { } func (m *machine) resolveAddressingMode(arg0offset, ret0offset int64, i *instruction) { - amode := &i.amode + amode := i.getAmode() switch amode.kind { case addressModeKindResultStackSpace: amode.imm += ret0offset @@ -281,7 +284,7 @@ func (m *machine) resolveRelativeAddresses(ctx context.Context) { switch cur.kind { case nop0: l := cur.nop0Label() - if pos, ok := ectx.LabelPositions[l]; ok { + if pos := ectx.LabelPositions[l]; pos != nil { pos.BinaryOffset = offset + size } case condBr: @@ -428,8 +431,10 @@ func (m *machine) insertConditionalJumpTrampoline(cbr *instruction, currentBlk * func (m *machine) Format() string { ectx := m.executableContext begins := map[*instruction]label{} - for l, pos := range ectx.LabelPositions { - begins[pos.Begin] = l + for _, pos := range ectx.LabelPositions { + if pos != nil { + begins[pos.Begin] = pos.L + } } irBlocks := map[label]ssa.BasicBlockID{} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go index 466fac4640..d9032f9218 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go @@ -70,7 +70,7 @@ func (m *machine) setupPrologue() { // +-----------------+ <----- SP // (low address) // - _amode := addressModePreOrPostIndex(spVReg, + _amode := addressModePreOrPostIndex(m, spVReg, -16, // stack pointer must be 16-byte aligned. true, // Decrement before store. ) @@ -159,7 +159,7 @@ func (m *machine) createReturnAddrAndSizeOfArgRetSlot(cur *instruction) *instruc sizeOfArgRetReg = tmpRegVReg subSp := m.allocateInstr() - subSp.asALU(aluOpSub, operandNR(spVReg), operandNR(spVReg), operandNR(sizeOfArgRetReg), true) + subSp.asALU(aluOpSub, spVReg, operandNR(spVReg), operandNR(sizeOfArgRetReg), true) cur = linkInstr(cur, subSp) } else { sizeOfArgRetReg = xzrVReg @@ -168,7 +168,7 @@ func (m *machine) createReturnAddrAndSizeOfArgRetSlot(cur *instruction) *instruc // Saves the return address (lr) and the size_of_arg_ret below the SP. // size_of_arg_ret is used for stack unwinding. pstr := m.allocateInstr() - amode := addressModePreOrPostIndex(spVReg, -16, true /* decrement before store */) + amode := addressModePreOrPostIndex(m, spVReg, -16, true /* decrement before store */) pstr.asStorePair64(lrVReg, sizeOfArgRetReg, amode) cur = linkInstr(cur, pstr) return cur @@ -182,7 +182,7 @@ func (m *machine) createFrameSizeSlot(cur *instruction, s int64) *instruction { } else { frameSizeReg = xzrVReg } - _amode := addressModePreOrPostIndex(spVReg, + _amode := addressModePreOrPostIndex(m, spVReg, -16, // stack pointer must be 16-byte aligned. true, // Decrement before store. ) @@ -213,7 +213,7 @@ func (m *machine) postRegAlloc() { m.executableContext.PendingInstructions = m.executableContext.PendingInstructions[:0] default: // Removes the redundant copy instruction. - if cur.IsCopy() && cur.rn.realReg() == cur.rd.realReg() { + if cur.IsCopy() && cur.rn.realReg() == cur.rd.RealReg() { prev, next := cur.prev, cur.next // Remove the copy instruction. prev.next = next @@ -286,16 +286,16 @@ func (m *machine) setupEpilogueAfter(cur *instruction) { for i := range m.clobberedRegs { vr := m.clobberedRegs[l-i] // reverse order to restore. load := m.allocateInstr() - amode := addressModePreOrPostIndex(spVReg, + amode := addressModePreOrPostIndex(m, spVReg, 16, // stack pointer must be 16-byte aligned. false, // Increment after store. ) // TODO: pair loads to reduce the number of instructions. switch regTypeToRegisterSizeInBits(vr.RegType()) { case 64: // save int reg. - load.asULoad(operandNR(vr), amode, 64) + load.asULoad(vr, amode, 64) case 128: // save vector reg. - load.asFpuLoad(operandNR(vr), amode, 128) + load.asFpuLoad(vr, amode, 128) } cur = linkInstr(cur, load) } @@ -317,8 +317,8 @@ func (m *machine) setupEpilogueAfter(cur *instruction) { // SP----> +-----------------+ ldr := m.allocateInstr() - ldr.asULoad(operandNR(lrVReg), - addressModePreOrPostIndex(spVReg, 16 /* stack pointer must be 16-byte aligned. */, false /* increment after loads */), 64) + ldr.asULoad(lrVReg, + addressModePreOrPostIndex(m, spVReg, 16 /* stack pointer must be 16-byte aligned. */, false /* increment after loads */), 64) cur = linkInstr(cur, ldr) if s := int64(m.currentABI.AlignedArgResultStackSlotSize()); s > 0 { @@ -351,14 +351,14 @@ func (m *machine) insertStackBoundsCheck(requiredStackSize int64, cur *instructi if immm12op, ok := asImm12Operand(uint64(requiredStackSize)); ok { // sub tmp, sp, #requiredStackSize sub := m.allocateInstr() - sub.asALU(aluOpSub, operandNR(tmpRegVReg), operandNR(spVReg), immm12op, true) + sub.asALU(aluOpSub, tmpRegVReg, operandNR(spVReg), immm12op, true) cur = linkInstr(cur, sub) } else { // This case, we first load the requiredStackSize into the temporary register, cur = m.lowerConstantI64AndInsert(cur, tmpRegVReg, requiredStackSize) // Then subtract it. sub := m.allocateInstr() - sub.asALU(aluOpSub, operandNR(tmpRegVReg), operandNR(spVReg), operandNR(tmpRegVReg), true) + sub.asALU(aluOpSub, tmpRegVReg, operandNR(spVReg), operandNR(tmpRegVReg), true) cur = linkInstr(cur, sub) } @@ -366,16 +366,18 @@ func (m *machine) insertStackBoundsCheck(requiredStackSize int64, cur *instructi // ldr tmp2, [executionContext #StackBottomPtr] ldr := m.allocateInstr() - ldr.asULoad(operandNR(tmp2), addressMode{ + amode := m.amodePool.Allocate() + *amode = addressMode{ kind: addressModeKindRegUnsignedImm12, rn: x0VReg, // execution context is always the first argument. imm: wazevoapi.ExecutionContextOffsetStackBottomPtr.I64(), - }, 64) + } + ldr.asULoad(tmp2, amode, 64) cur = linkInstr(cur, ldr) // subs xzr, tmp, tmp2 subs := m.allocateInstr() - subs.asALU(aluOpSubS, operandNR(xzrVReg), operandNR(tmpRegVReg), operandNR(tmp2), true) + subs.asALU(aluOpSubS, xzrVReg, operandNR(tmpRegVReg), operandNR(tmp2), true) cur = linkInstr(cur, subs) // b.ge #imm @@ -388,22 +390,25 @@ func (m *machine) insertStackBoundsCheck(requiredStackSize int64, cur *instructi // First load the requiredStackSize into the temporary register, cur = m.lowerConstantI64AndInsert(cur, tmpRegVReg, requiredStackSize) setRequiredStackSize := m.allocateInstr() - setRequiredStackSize.asStore(operandNR(tmpRegVReg), - addressMode{ - kind: addressModeKindRegUnsignedImm12, - // Execution context is always the first argument. - rn: x0VReg, imm: wazevoapi.ExecutionContextOffsetStackGrowRequiredSize.I64(), - }, 64) + amode := m.amodePool.Allocate() + *amode = addressMode{ + kind: addressModeKindRegUnsignedImm12, + // Execution context is always the first argument. + rn: x0VReg, imm: wazevoapi.ExecutionContextOffsetStackGrowRequiredSize.I64(), + } + setRequiredStackSize.asStore(operandNR(tmpRegVReg), amode, 64) cur = linkInstr(cur, setRequiredStackSize) } ldrAddress := m.allocateInstr() - ldrAddress.asULoad(operandNR(tmpRegVReg), addressMode{ + amode2 := m.amodePool.Allocate() + *amode2 = addressMode{ kind: addressModeKindRegUnsignedImm12, rn: x0VReg, // execution context is always the first argument imm: wazevoapi.ExecutionContextOffsetStackGrowCallTrampolineAddress.I64(), - }, 64) + } + ldrAddress.asULoad(tmpRegVReg, amode2, 64) cur = linkInstr(cur, ldrAddress) // Then jumps to the stack grow call sequence's address, meaning diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_regalloc.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_regalloc.go index 1c8793b73d..c7eb92cc20 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_regalloc.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_regalloc.go @@ -91,7 +91,7 @@ func (m *machine) InsertStoreRegisterAt(v regalloc.VReg, instr *instruction, aft } offsetFromSP := m.getVRegSpillSlotOffsetFromSP(v.ID(), typ.Size()) - var amode addressMode + var amode *addressMode cur, amode = m.resolveAddressModeForOffsetAndInsert(cur, offsetFromSP, typ.Bits(), spVReg, true) store := m.allocateInstr() store.asStore(operandNR(v), amode, typ.Bits()) @@ -116,16 +116,16 @@ func (m *machine) InsertReloadRegisterAt(v regalloc.VReg, instr *instruction, af } offsetFromSP := m.getVRegSpillSlotOffsetFromSP(v.ID(), typ.Size()) - var amode addressMode + var amode *addressMode cur, amode = m.resolveAddressModeForOffsetAndInsert(cur, offsetFromSP, typ.Bits(), spVReg, true) load := m.allocateInstr() switch typ { case ssa.TypeI32, ssa.TypeI64: - load.asULoad(operandNR(v), amode, typ.Bits()) + load.asULoad(v, amode, typ.Bits()) case ssa.TypeF32, ssa.TypeF64: - load.asFpuLoad(operandNR(v), amode, typ.Bits()) + load.asFpuLoad(v, amode, typ.Bits()) case ssa.TypeV128: - load.asFpuLoad(operandNR(v), amode, 128) + load.asFpuLoad(v, amode, 128) default: panic("TODO") } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc.go index 3f36c84e57..6553707860 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc.go @@ -35,7 +35,7 @@ type ( iter int reversePostOrderBlocks []RegAllocBlock[I, m] // labelToRegAllocBlockIndex maps label to the index of reversePostOrderBlocks. - labelToRegAllocBlockIndex map[Label]int + labelToRegAllocBlockIndex [] /* Label to */ int loopNestingForestRoots []ssa.BasicBlock } @@ -56,10 +56,9 @@ type ( // NewRegAllocFunction returns a new RegAllocFunction. func NewRegAllocFunction[I regalloc.InstrConstraint, M RegAllocFunctionMachine[I]](m M, ssb ssa.Builder, c Compiler) *RegAllocFunction[I, M] { return &RegAllocFunction[I, M]{ - m: m, - ssb: ssb, - c: c, - labelToRegAllocBlockIndex: make(map[Label]int), + m: m, + ssb: ssb, + c: c, } } @@ -74,6 +73,9 @@ func (f *RegAllocFunction[I, M]) AddBlock(sb ssa.BasicBlock, l Label, begin, end end: end, id: int(sb.ID()), }) + if len(f.labelToRegAllocBlockIndex) <= int(l) { + f.labelToRegAllocBlockIndex = append(f.labelToRegAllocBlockIndex, make([]int, int(l)-len(f.labelToRegAllocBlockIndex)+1)...) + } f.labelToRegAllocBlockIndex[l] = i } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regalloc.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regalloc.go index b4450d56fb..eacb6a7ef9 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regalloc.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regalloc.go @@ -60,9 +60,8 @@ type ( phiDefInstListPool wazevoapi.Pool[phiDefInstList] // Followings are re-used during various places. - blks []Block - reals []RealReg - currentOccupants regInUseSet + blks []Block + reals []RealReg // Following two fields are updated while iterating the blocks in the reverse postorder. state state @@ -755,7 +754,8 @@ func (a *Allocator) allocBlock(f Function, blk Block) { killSet := a.reals[:0] // Gather the set of registers that will be used in the current instruction. - for _, use := range instr.Uses(&a.vs) { + uses := instr.Uses(&a.vs) + for _, use := range uses { if use.IsRealReg() { r := use.RealReg() currentUsedSet = currentUsedSet.add(r) @@ -770,7 +770,7 @@ func (a *Allocator) allocBlock(f Function, blk Block) { } } - for i, use := range instr.Uses(&a.vs) { + for i, use := range uses { if !use.IsRealReg() { vs := s.getVRegState(use.ID()) killed := vs.lastUse == pc @@ -944,8 +944,7 @@ func (a *Allocator) allocBlock(f Function, blk Block) { func (a *Allocator) releaseCallerSavedRegs(addrReg RealReg) { s := &a.state - for i := 0; i < 64; i++ { - allocated := RealReg(i) + for allocated := RealReg(0); allocated < 64; allocated++ { if allocated == addrReg { // If this is the call indirect, we should not touch the addr register. continue } @@ -974,11 +973,10 @@ func (a *Allocator) fixMergeState(f Function, blk Block) { bID := blk.ID() blkSt := a.getOrAllocateBlockState(bID) desiredOccupants := &blkSt.startRegs - aliveOnRegVRegs := make(map[VReg]RealReg) - for i := 0; i < 64; i++ { - r := RealReg(i) - if v := blkSt.startRegs.get(r); v.Valid() { - aliveOnRegVRegs[v] = r + var desiredOccupantsSet RegSet + for i, v := range desiredOccupants { + if v != VRegInvalid { + desiredOccupantsSet = desiredOccupantsSet.add(RealReg(i)) } } @@ -987,56 +985,38 @@ func (a *Allocator) fixMergeState(f Function, blk Block) { } s.currentBlockID = bID - a.updateLiveInVRState(a.getOrAllocateBlockState(bID)) + a.updateLiveInVRState(blkSt) - currentOccupants := &a.currentOccupants for i := 0; i < preds; i++ { - currentOccupants.reset() if i == blkSt.startFromPredIndex { continue } - currentOccupantsRev := make(map[VReg]RealReg) pred := blk.Pred(i) predSt := a.getOrAllocateBlockState(pred.ID()) - for ii := 0; ii < 64; ii++ { - r := RealReg(ii) - if v := predSt.endRegs.get(r); v.Valid() { - if _, ok := aliveOnRegVRegs[v]; !ok { - continue - } - currentOccupants.add(r, v) - currentOccupantsRev[v] = r - } - } s.resetAt(predSt) // Finds the free registers if any. intTmp, floatTmp := VRegInvalid, VRegInvalid if intFree := s.findAllocatable( - a.regInfo.AllocatableRegisters[RegTypeInt], desiredOccupants.set, + a.regInfo.AllocatableRegisters[RegTypeInt], desiredOccupantsSet, ); intFree != RealRegInvalid { intTmp = FromRealReg(intFree, RegTypeInt) } if floatFree := s.findAllocatable( - a.regInfo.AllocatableRegisters[RegTypeFloat], desiredOccupants.set, + a.regInfo.AllocatableRegisters[RegTypeFloat], desiredOccupantsSet, ); floatFree != RealRegInvalid { floatTmp = FromRealReg(floatFree, RegTypeFloat) } - if wazevoapi.RegAllocLoggingEnabled { - fmt.Println("\t", pred.ID(), ":", currentOccupants.format(a.regInfo)) - } - - for ii := 0; ii < 64; ii++ { - r := RealReg(ii) + for r := RealReg(0); r < 64; r++ { desiredVReg := desiredOccupants.get(r) if !desiredVReg.Valid() { continue } - currentVReg := currentOccupants.get(r) + currentVReg := s.regsInUse.get(r) if desiredVReg.ID() == currentVReg.ID() { continue } @@ -1048,86 +1028,95 @@ func (a *Allocator) fixMergeState(f Function, blk Block) { } else { tmpRealReg = floatTmp } - a.reconcileEdge(f, r, pred, currentOccupants, currentOccupantsRev, currentVReg, desiredVReg, tmpRealReg, typ) + a.reconcileEdge(f, r, pred, currentVReg, desiredVReg, tmpRealReg, typ) } } } +// reconcileEdge reconciles the register state between the current block and the predecessor for the real register `r`. +// +// - currentVReg is the current VReg value that sits on the register `r`. This can be VRegInvalid if the register is not used at the end of the predecessor. +// - desiredVReg is the desired VReg value that should be on the register `r`. +// - freeReg is the temporary register that can be used to swap the values, which may or may not be used. +// - typ is the register type of the `r`. func (a *Allocator) reconcileEdge(f Function, r RealReg, pred Block, - currentOccupants *regInUseSet, - currentOccupantsRev map[VReg]RealReg, currentVReg, desiredVReg VReg, freeReg VReg, typ RegType, ) { + // There are four cases to consider: + // 1. currentVReg is valid, but desiredVReg is on the stack. + // 2. Both currentVReg and desiredVReg are valid. + // 3. Desired is on a different register than `r` and currentReg is not valid. + // 4. Desired is on the stack and currentReg is not valid. + s := &a.state if currentVReg.Valid() { - // Both are on reg. - er, ok := currentOccupantsRev[desiredVReg] - if !ok { + desiredState := s.getVRegState(desiredVReg.ID()) + er := desiredState.r + if er == RealRegInvalid { + // Case 1: currentVReg is valid, but desiredVReg is on the stack. if wazevoapi.RegAllocLoggingEnabled { fmt.Printf("\t\tv%d is desired to be on %s, but currently on the stack\n", desiredVReg.ID(), a.regInfo.RealRegName(r), ) } - // This case is that the desired value is on the stack, but currentVReg is on the target register. - // We need to move the current value to the stack, and reload the desired value. + // We need to move the current value to the stack, and reload the desired value into the register. // TODO: we can do better here. f.StoreRegisterBefore(currentVReg.SetRealReg(r), pred.LastInstrForInsertion()) - delete(currentOccupantsRev, currentVReg) + s.releaseRealReg(r) s.getVRegState(desiredVReg.ID()).recordReload(f, pred) f.ReloadRegisterBefore(desiredVReg.SetRealReg(r), pred.LastInstrForInsertion()) - currentOccupants.add(r, desiredVReg) - currentOccupantsRev[desiredVReg] = r + s.useRealReg(r, desiredVReg) return - } - - if wazevoapi.RegAllocLoggingEnabled { - fmt.Printf("\t\tv%d is desired to be on %s, but currently on %s\n", - desiredVReg.ID(), a.regInfo.RealRegName(r), a.regInfo.RealRegName(er), + } else { + // Case 2: Both currentVReg and desiredVReg are valid. + if wazevoapi.RegAllocLoggingEnabled { + fmt.Printf("\t\tv%d is desired to be on %s, but currently on %s\n", + desiredVReg.ID(), a.regInfo.RealRegName(r), a.regInfo.RealRegName(er), + ) + } + // This case, we need to swap the values between the current and desired values. + f.SwapBefore( + currentVReg.SetRealReg(r), + desiredVReg.SetRealReg(er), + freeReg, + pred.LastInstrForInsertion(), ) - } - f.SwapBefore( - currentVReg.SetRealReg(r), - desiredVReg.SetRealReg(er), - freeReg, - pred.LastInstrForInsertion(), - ) - s.allocatedRegSet = s.allocatedRegSet.add(freeReg.RealReg()) - currentOccupantsRev[desiredVReg] = r - currentOccupantsRev[currentVReg] = er - currentOccupants.add(r, desiredVReg) - currentOccupants.add(er, currentVReg) - if wazevoapi.RegAllocLoggingEnabled { - fmt.Printf("\t\tv%d previously on %s moved to %s\n", currentVReg.ID(), a.regInfo.RealRegName(r), a.regInfo.RealRegName(er)) + s.allocatedRegSet = s.allocatedRegSet.add(freeReg.RealReg()) + s.releaseRealReg(r) + s.releaseRealReg(er) + s.useRealReg(r, desiredVReg) + s.useRealReg(er, currentVReg) + if wazevoapi.RegAllocLoggingEnabled { + fmt.Printf("\t\tv%d previously on %s moved to %s\n", currentVReg.ID(), a.regInfo.RealRegName(r), a.regInfo.RealRegName(er)) + } } } else { - // Desired is on reg, but currently the target register is not used. if wazevoapi.RegAllocLoggingEnabled { fmt.Printf("\t\tv%d is desired to be on %s, current not used\n", desiredVReg.ID(), a.regInfo.RealRegName(r), ) } - if currentReg, ok := currentOccupantsRev[desiredVReg]; ok { + if currentReg := s.getVRegState(desiredVReg.ID()).r; currentReg != RealRegInvalid { + // Case 3: Desired is on a different register than `r` and currentReg is not valid. + // We simply need to move the desired value to the register. f.InsertMoveBefore( FromRealReg(r, typ), desiredVReg.SetRealReg(currentReg), pred.LastInstrForInsertion(), ) - currentOccupants.remove(currentReg) + s.releaseRealReg(currentReg) } else { + // Case 4: Both currentVReg and desiredVReg are not valid. + // We simply need to reload the desired value into the register. s.getVRegState(desiredVReg.ID()).recordReload(f, pred) f.ReloadRegisterBefore(desiredVReg.SetRealReg(r), pred.LastInstrForInsertion()) } - currentOccupantsRev[desiredVReg] = r - currentOccupants.add(r, desiredVReg) - } - - if wazevoapi.RegAllocLoggingEnabled { - fmt.Println("\t", pred.ID(), ":", currentOccupants.format(a.regInfo)) + s.useRealReg(r, desiredVReg) } } @@ -1169,8 +1158,7 @@ func (a *Allocator) scheduleSpill(f Function, vs *vrState) { } for pos != definingBlk { st := a.getOrAllocateBlockState(pos.ID()) - for ii := 0; ii < 64; ii++ { - rr := RealReg(ii) + for rr := RealReg(0); rr < 64; rr++ { if st.startRegs.get(rr) == v { r = rr // Already in the register, so we can place the spill at the beginning of the block. diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regset.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regset.go index e9bf60661c..04a8e8f4db 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regset.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regset.go @@ -46,23 +46,24 @@ func (rs RegSet) Range(f func(allocatedRealReg RealReg)) { } } -type regInUseSet struct { - set RegSet - vrs [64]VReg +type regInUseSet [64]VReg + +func newRegInUseSet() regInUseSet { + var ret regInUseSet + ret.reset() + return ret } func (rs *regInUseSet) reset() { - rs.set = 0 - for i := range rs.vrs { - rs.vrs[i] = VRegInvalid + for i := range rs { + rs[i] = VRegInvalid } } func (rs *regInUseSet) format(info *RegisterInfo) string { //nolint:unused var ret []string - for i := 0; i < 64; i++ { - if rs.set&(1<v%d)", info.RealRegName(RealReg(i)), vr.ID())) } } @@ -70,39 +71,28 @@ func (rs *regInUseSet) format(info *RegisterInfo) string { //nolint:unused } func (rs *regInUseSet) has(r RealReg) bool { - if r >= 64 { - return false - } - return rs.set&(1<= 64 { - return VRegInvalid - } - return rs.vrs[r] + return rs[r] } func (rs *regInUseSet) remove(r RealReg) { - if r >= 64 { - return - } - rs.set &= ^(1 << uint(r)) - rs.vrs[r] = VRegInvalid + rs[r] = VRegInvalid } func (rs *regInUseSet) add(r RealReg, vr VReg) { if r >= 64 { return } - rs.set |= 1 << uint(r) - rs.vrs[r] = vr + rs[r] = vr } func (rs *regInUseSet) range_(f func(allocatedRealReg RealReg, vr VReg)) { - for i := 0; i < 64; i++ { - if rs.set&(1< 0 { - b = uint64(uintptr(unsafe.Pointer(&mem.Buffer[0]))) - } - binary.LittleEndian.PutUint64(opaque[offset:], b) - binary.LittleEndian.PutUint64(opaque[offset+8:], s) -} - func (m *moduleEngine) setupOpaque() { inst := m.module offsets := &m.parent.offsets @@ -106,7 +96,7 @@ func (m *moduleEngine) setupOpaque() { ) if lm := offsets.LocalMemoryBegin; lm >= 0 { - putLocalMemory(opaque, lm, inst.MemoryInstance) + m.putLocalMemory() } // Note: imported memory is resolved in ResolveImportedFunction. @@ -227,6 +217,25 @@ func (m *moduleEngine) SetGlobalValue(i wasm.Index, lo, hi uint64) { // OwnsGlobals implements the same method as documented on wasm.ModuleEngine. func (m *moduleEngine) OwnsGlobals() bool { return true } +// MemoryGrown implements wasm.ModuleEngine. +func (m *moduleEngine) MemoryGrown() { + m.putLocalMemory() +} + +// putLocalMemory writes the local memory buffer pointer and length to the opaque buffer. +func (m *moduleEngine) putLocalMemory() { + mem := m.module.MemoryInstance + offset := m.parent.offsets.LocalMemoryBegin + + s := uint64(len(mem.Buffer)) + var b uint64 + if len(mem.Buffer) > 0 { + b = uint64(uintptr(unsafe.Pointer(&mem.Buffer[0]))) + } + binary.LittleEndian.PutUint64(m.opaque[offset:], b) + binary.LittleEndian.PutUint64(m.opaque[offset+8:], s) +} + // ResolveImportedFunction implements wasm.ModuleEngine. func (m *moduleEngine) ResolveImportedFunction(index, indexInImportedModule wasm.Index, importedModuleEngine wasm.ModuleEngine) { executableOffset, moduleCtxOffset, typeIDOffset := m.parent.offsets.ImportedFunctionOffset(index) diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block.go index 10b6b4b62b..39627b9898 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block.go @@ -49,21 +49,12 @@ type BasicBlock interface { // ReturnBlock returns ture if this block represents the function return. ReturnBlock() bool - // FormatHeader returns the debug string of this block, not including instruction. - FormatHeader(b Builder) string - // Valid is true if this block is still valid even after optimizations. Valid() bool // Sealed is true if this block has been sealed. Sealed() bool - // BeginPredIterator returns the first predecessor of this block. - BeginPredIterator() BasicBlock - - // NextPredIterator returns the next predecessor of this block. - NextPredIterator() BasicBlock - // Preds returns the number of predecessors of this block. Preds() int @@ -88,10 +79,11 @@ type ( basicBlock struct { id BasicBlockID rootInstr, currentInstr *Instruction - params []blockParam - predIter int - preds []basicBlockPredecessorInfo - success []*basicBlock + // params are Values that represent parameters to a basicBlock. + // Each parameter can be considered as an output of PHI instruction in traditional SSA. + params []Value + preds []basicBlockPredecessorInfo + success []*basicBlock // singlePred is the alias to preds[0] for fast lookup, and only set after Seal is called. singlePred *basicBlock // lastDefinitions maps Variable to its last definition in this block. @@ -116,11 +108,14 @@ type ( // loopNestingForestChildren holds the children of this block in the loop nesting forest. // Non-empty if and only if this block is a loop header (i.e. loopHeader=true) - loopNestingForestChildren []BasicBlock + loopNestingForestChildren wazevoapi.VarLength[BasicBlock] // reversePostOrder is used to sort all the blocks in the function in reverse post order. // This is used in builder.LayoutBlocks. - reversePostOrder int + reversePostOrder int32 + + // visited is used during various traversals. + visited int32 // child and sibling are the ones in the dominator tree. child, sibling *basicBlock @@ -128,15 +123,6 @@ type ( // BasicBlockID is the unique ID of a basicBlock. BasicBlockID uint32 - // blockParam implements Value and represents a parameter to a basicBlock. - blockParam struct { - // value is the Value that corresponds to the parameter in this block, - // and can be considered as an output of PHI instruction in traditional SSA. - value Value - // typ is the type of the parameter. - typ Type - } - unknownValue struct { // variable is the variable that this unknownValue represents. variable Variable @@ -145,6 +131,9 @@ type ( } ) +// basicBlockVarLengthNil is the default nil value for basicBlock.loopNestingForestChildren. +var basicBlockVarLengthNil = wazevoapi.NewNilVarLength[BasicBlock]() + const basicBlockIDReturnBlock = 0xffffffff // Name implements BasicBlock.Name. @@ -190,13 +179,13 @@ func (bb *basicBlock) ReturnBlock() bool { // AddParam implements BasicBlock.AddParam. func (bb *basicBlock) AddParam(b Builder, typ Type) Value { paramValue := b.allocateValue(typ) - bb.params = append(bb.params, blockParam{typ: typ, value: paramValue}) + bb.params = append(bb.params, paramValue) return paramValue } // addParamOn adds a parameter to this block whose value is already allocated. -func (bb *basicBlock) addParamOn(typ Type, value Value) { - bb.params = append(bb.params, blockParam{typ: typ, value: value}) +func (bb *basicBlock) addParamOn(value Value) { + bb.params = append(bb.params, value) } // Params implements BasicBlock.Params. @@ -206,8 +195,7 @@ func (bb *basicBlock) Params() int { // Param implements BasicBlock.Param. func (bb *basicBlock) Param(i int) Value { - p := &bb.params[i] - return p.value + return bb.params[i] } // Valid implements BasicBlock.Valid. @@ -248,22 +236,6 @@ func (bb *basicBlock) NumPreds() int { return len(bb.preds) } -// BeginPredIterator implements BasicBlock.BeginPredIterator. -func (bb *basicBlock) BeginPredIterator() BasicBlock { - bb.predIter = 0 - return bb.NextPredIterator() -} - -// NextPredIterator implements BasicBlock.NextPredIterator. -func (bb *basicBlock) NextPredIterator() BasicBlock { - if bb.predIter >= len(bb.preds) { - return nil - } - pred := bb.preds[bb.predIter].blk - bb.predIter++ - return pred -} - // Preds implements BasicBlock.Preds. func (bb *basicBlock) Preds() int { return len(bb.preds) @@ -305,7 +277,8 @@ func resetBasicBlock(bb *basicBlock) { bb.unknownValues = bb.unknownValues[:0] bb.lastDefinitions = wazevoapi.ResetMap(bb.lastDefinitions) bb.reversePostOrder = -1 - bb.loopNestingForestChildren = bb.loopNestingForestChildren[:0] + bb.visited = 0 + bb.loopNestingForestChildren = basicBlockVarLengthNil bb.loopHeader = false bb.sibling = nil bb.child = nil @@ -335,11 +308,11 @@ func (bb *basicBlock) addPred(blk BasicBlock, branch *Instruction) { pred.success = append(pred.success, bb) } -// FormatHeader implements BasicBlock.FormatHeader. -func (bb *basicBlock) FormatHeader(b Builder) string { +// formatHeader returns the string representation of the header of the basicBlock. +func (bb *basicBlock) formatHeader(b Builder) string { ps := make([]string, len(bb.params)) for i, p := range bb.params { - ps[i] = p.value.formatWithType(b) + ps[i] = p.formatWithType(b) } if len(bb.preds) > 0 { @@ -398,7 +371,7 @@ func (bb *basicBlock) String() string { // LoopNestingForestChildren implements BasicBlock.LoopNestingForestChildren. func (bb *basicBlock) LoopNestingForestChildren() []BasicBlock { - return bb.loopNestingForestChildren + return bb.loopNestingForestChildren.View() } // LoopHeader implements BasicBlock.LoopHeader. diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/builder.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/builder.go index 1fc84d2eaf..0b700c4b19 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/builder.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/builder.go @@ -54,9 +54,6 @@ type Builder interface { // MustFindValue searches the latest definition of the given Variable and returns the result. MustFindValue(variable Variable) Value - // MustFindValueInBlk is the same as MustFindValue except it searches the latest definition from the given BasicBlock. - MustFindValueInBlk(variable Variable, blk BasicBlock) Value - // FindValueInLinearPath tries to find the latest definition of the given Variable in the linear path to the current BasicBlock. // If it cannot find the definition, or it's not sealed yet, it returns ValueInvalid. FindValueInLinearPath(variable Variable) Value @@ -127,7 +124,11 @@ type Builder interface { // Idom returns the immediate dominator of the given BasicBlock. Idom(blk BasicBlock) BasicBlock + // VarLengthPool returns the VarLengthPool of Value. VarLengthPool() *wazevoapi.VarLengthPool[Value] + + // InsertZeroValue inserts a zero value constant instruction of the given type. + InsertZeroValue(t Type) } // NewBuilder returns a new Builder implementation. @@ -135,10 +136,10 @@ func NewBuilder() Builder { return &builder{ instructionsPool: wazevoapi.NewPool[Instruction](resetInstruction), basicBlocksPool: wazevoapi.NewPool[basicBlock](resetBasicBlock), + varLengthBasicBlockPool: wazevoapi.NewVarLengthPool[BasicBlock](), varLengthPool: wazevoapi.NewVarLengthPool[Value](), valueAnnotations: make(map[ValueID]string), signatures: make(map[SignatureID]*Signature), - blkVisited: make(map[*basicBlock]int), valueIDAliases: make(map[ValueID]Value), redundantParameterIndexToValue: make(map[int]Value), returnBlk: &basicBlock{id: basicBlockIDReturnBlock}, @@ -177,12 +178,13 @@ type builder struct { dominators []*basicBlock sparseTree dominatorSparseTree + varLengthBasicBlockPool wazevoapi.VarLengthPool[BasicBlock] + // loopNestingForestRoots are the roots of the loop nesting forest. loopNestingForestRoots []BasicBlock // The followings are used for optimization passes/deterministic compilation. instStack []*Instruction - blkVisited map[*basicBlock]int valueIDToInstruction []*Instruction blkStack []*basicBlock blkStack2 []*basicBlock @@ -200,6 +202,32 @@ type builder struct { donePostBlockLayoutPasses bool currentSourceOffset SourceOffset + + // zeros are the zero value constants for each type. + zeros [typeEnd]Value +} + +// InsertZeroValue implements Builder.InsertZeroValue. +func (b *builder) InsertZeroValue(t Type) { + if b.zeros[t].Valid() { + return + } + zeroInst := b.AllocateInstruction() + switch t { + case TypeI32: + zeroInst.AsIconst32(0) + case TypeI64: + zeroInst.AsIconst64(0) + case TypeF32: + zeroInst.AsF32const(0) + case TypeF64: + zeroInst.AsF64const(0) + case TypeV128: + zeroInst.AsVconst(0, 0) + default: + panic("TODO: " + t.String()) + } + b.zeros[t] = zeroInst.Insert(b).Return() } func (b *builder) VarLengthPool() *wazevoapi.VarLengthPool[Value] { @@ -215,10 +243,12 @@ func (b *builder) ReturnBlock() BasicBlock { func (b *builder) Init(s *Signature) { b.nextVariable = 0 b.currentSignature = s + b.zeros = [typeEnd]Value{ValueInvalid, ValueInvalid, ValueInvalid, ValueInvalid, ValueInvalid, ValueInvalid} resetBasicBlock(b.returnBlk) b.instructionsPool.Reset() b.basicBlocksPool.Reset() b.varLengthPool.Reset() + b.varLengthBasicBlockPool.Reset() b.donePreBlockLayoutPasses = false b.doneBlockLayout = false b.donePostBlockLayoutPasses = false @@ -231,11 +261,6 @@ func (b *builder) Init(s *Signature) { b.blkStack2 = b.blkStack2[:0] b.dominators = b.dominators[:0] b.loopNestingForestRoots = b.loopNestingForestRoots[:0] - - for i := 0; i < b.basicBlocksPool.Allocated(); i++ { - blk := b.basicBlocksPool.View(i) - delete(b.blkVisited, blk) - } b.basicBlocksPool.Reset() for v := ValueID(0); v < b.nextValueID; v++ { @@ -448,11 +473,6 @@ func (b *builder) findValueInLinearPath(variable Variable, blk *basicBlock) Valu return ValueInvalid } -func (b *builder) MustFindValueInBlk(variable Variable, blk BasicBlock) Value { - typ := b.definedVariableType(variable) - return b.findValue(typ, variable, blk.(*basicBlock)) -} - // MustFindValue implements Builder.MustFindValue. func (b *builder) MustFindValue(variable Variable) Value { typ := b.definedVariableType(variable) @@ -482,6 +502,9 @@ func (b *builder) findValue(typ Type, variable Variable, blk *basicBlock) Value value: value, }) return value + } else if blk.EntryBlock() { + // If this is the entry block, we reach the uninitialized variable which has zero value. + return b.zeros[b.definedVariableType(variable)] } if pred := blk.singlePred; pred != nil { @@ -495,21 +518,42 @@ func (b *builder) findValue(typ Type, variable Variable, blk *basicBlock) Value // If this block has multiple predecessors, we have to gather the definitions, // and treat them as an argument to this block. // - // The first thing is to define a new parameter to this block which may or may not be redundant, but - // later we eliminate trivial params in an optimization pass. This must be done before finding the - // definitions in the predecessors so that we can break the cycle. - paramValue := blk.AddParam(b, typ) - b.DefineVariable(variable, paramValue, blk) - - // After the new param is added, we have to manipulate the original branching instructions - // in predecessors so that they would pass the definition of `variable` as the argument to - // the newly added PHI. + // But before that, we have to check if the possible definitions are the same Value. + tmpValue := b.allocateValue(typ) + // Break the cycle by defining the variable with the tmpValue. + b.DefineVariable(variable, tmpValue, blk) + // Check all the predecessors if they have the same definition. + uniqueValue := ValueInvalid for i := range blk.preds { - pred := &blk.preds[i] - value := b.findValue(typ, variable, pred.blk) - pred.branch.addArgumentBranchInst(b, value) + predValue := b.findValue(typ, variable, blk.preds[i].blk) + if uniqueValue == ValueInvalid { + uniqueValue = predValue + } else if uniqueValue != predValue { + uniqueValue = ValueInvalid + break + } + } + + if uniqueValue != ValueInvalid { + // If all the predecessors have the same definition, we can use that value. + b.DefineVariable(variable, uniqueValue, blk) + b.alias(tmpValue, uniqueValue) + return uniqueValue + } else { + // Otherwise, add the tmpValue to this block as a parameter which may or may not be redundant, but + // later we eliminate trivial params in an optimization pass. This must be done before finding the + // definitions in the predecessors so that we can break the cycle. + blk.addParamOn(tmpValue) + // After the new param is added, we have to manipulate the original branching instructions + // in predecessors so that they would pass the definition of `variable` as the argument to + // the newly added PHI. + for i := range blk.preds { + pred := &blk.preds[i] + value := b.findValue(typ, variable, pred.blk) + pred.branch.addArgumentBranchInst(b, value) + } + return tmpValue } - return paramValue } // Seal implements Builder.Seal. @@ -523,7 +567,7 @@ func (b *builder) Seal(raw BasicBlock) { for _, v := range blk.unknownValues { variable, phiValue := v.variable, v.value typ := b.definedVariableType(variable) - blk.addParamOn(typ, phiValue) + blk.addParamOn(phiValue) for i := range blk.preds { pred := &blk.preds[i] predValue := b.findValue(typ, variable, pred.blk) @@ -566,7 +610,7 @@ func (b *builder) Format() string { } for bb := iterBegin(); bb != nil; bb = iterNext() { str.WriteByte('\n') - str.WriteString(bb.FormatHeader(b)) + str.WriteString(bb.formatHeader(b)) str.WriteByte('\n') for cur := bb.Root(); cur != nil; cur = cur.Next() { diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass.go index a2e986cd15..89ec34b7eb 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass.go @@ -22,9 +22,9 @@ func (b *builder) RunPasses() { func (b *builder) runPreBlockLayoutPasses() { passSortSuccessors(b) passDeadBlockEliminationOpt(b) - passRedundantPhiEliminationOpt(b) // The result of passCalculateImmediateDominators will be used by various passes below. passCalculateImmediateDominators(b) + passRedundantPhiEliminationOpt(b) passNopInstElimination(b) // TODO: implement either conversion of irreducible CFG into reducible one, or irreducible CFG detection where we panic. @@ -78,12 +78,11 @@ func (b *builder) runFinalizingPasses() { // passDeadBlockEliminationOpt searches the unreachable blocks, and sets the basicBlock.invalid flag true if so. func passDeadBlockEliminationOpt(b *builder) { entryBlk := b.entryBlk() - b.clearBlkVisited() b.blkStack = append(b.blkStack, entryBlk) for len(b.blkStack) > 0 { reachableBlk := b.blkStack[len(b.blkStack)-1] b.blkStack = b.blkStack[:len(b.blkStack)-1] - b.blkVisited[reachableBlk] = 0 // the value won't be used in this pass. + reachableBlk.visited = 1 if !reachableBlk.sealed && !reachableBlk.ReturnBlock() { panic(fmt.Sprintf("%s is not sealed", reachableBlk)) @@ -94,7 +93,7 @@ func passDeadBlockEliminationOpt(b *builder) { } for _, succ := range reachableBlk.success { - if _, ok := b.blkVisited[succ]; ok { + if succ.visited == 1 { continue } b.blkStack = append(b.blkStack, succ) @@ -102,13 +101,16 @@ func passDeadBlockEliminationOpt(b *builder) { } for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() { - if _, ok := b.blkVisited[blk]; !ok { + if blk.visited != 1 { blk.invalid = true } + blk.visited = 0 } } // passRedundantPhiEliminationOpt eliminates the redundant PHIs (in our terminology, parameters of a block). +// This requires the reverse post-order traversal to be calculated before calling this function, +// hence passCalculateImmediateDominators must be called before this. func passRedundantPhiEliminationOpt(b *builder) { redundantParameterIndexes := b.ints[:0] // reuse the slice from previous iterations. @@ -118,15 +120,18 @@ func passRedundantPhiEliminationOpt(b *builder) { // relatively small. For example, sqlite speedtest binary results in the large number of redundant PHIs, // the maximum number of iteration was 22, which seems to be acceptable but not that small either since the // complexity here is O(BlockNum * Iterations) at the worst case where BlockNum might be the order of thousands. + // -- Note -- + // Currently, each iteration can run in any order of blocks, but it empirically converges quickly in practice when + // running on the reverse post-order. It might be possible to optimize this further by using the dominator tree. for { changed := false - _ = b.blockIteratorBegin() // skip entry block! + _ = b.blockIteratorReversePostOrderBegin() // skip entry block! // Below, we intentionally use the named iteration variable name, as this comes with inevitable nested for loops! - for blk := b.blockIteratorNext(); blk != nil; blk = b.blockIteratorNext() { + for blk := b.blockIteratorReversePostOrderNext(); blk != nil; blk = b.blockIteratorReversePostOrderNext() { paramNum := len(blk.params) for paramIndex := 0; paramIndex < paramNum; paramIndex++ { - phiValue := blk.params[paramIndex].value + phiValue := blk.params[paramIndex] redundant := true nonSelfReferencingValue := ValueInvalid @@ -184,7 +189,7 @@ func passRedundantPhiEliminationOpt(b *builder) { // Still need to have the definition of the value of the PHI (previously as the parameter). for _, redundantParamIndex := range redundantParameterIndexes { - phiValue := blk.params[redundantParamIndex].value + phiValue := blk.params[redundantParamIndex] onlyValue := b.redundantParameterIndexToValue[redundantParamIndex] // Create an alias in this block from the only phi argument to the phi value. b.alias(phiValue, onlyValue) @@ -227,10 +232,10 @@ func passRedundantPhiEliminationOpt(b *builder) { func passDeadCodeEliminationOpt(b *builder) { nvid := int(b.nextValueID) if nvid >= len(b.valueRefCounts) { - b.valueRefCounts = append(b.valueRefCounts, make([]int, b.nextValueID)...) + b.valueRefCounts = append(b.valueRefCounts, make([]int, nvid-len(b.valueRefCounts)+1)...) } if nvid >= len(b.valueIDToInstruction) { - b.valueIDToInstruction = append(b.valueIDToInstruction, make([]*Instruction, b.nextValueID)...) + b.valueIDToInstruction = append(b.valueIDToInstruction, make([]*Instruction, nvid-len(b.valueIDToInstruction)+1)...) } // First, we gather all the instructions with side effects. @@ -350,22 +355,10 @@ func (b *builder) incRefCount(id ValueID, from *Instruction) { b.valueRefCounts[id]++ } -// clearBlkVisited clears the b.blkVisited map so that we can reuse it for multiple places. -func (b *builder) clearBlkVisited() { - b.blkStack2 = b.blkStack2[:0] - for key := range b.blkVisited { - b.blkStack2 = append(b.blkStack2, key) - } - for _, blk := range b.blkStack2 { - delete(b.blkVisited, blk) - } - b.blkStack2 = b.blkStack2[:0] -} - // passNopInstElimination eliminates the instructions which is essentially a no-op. func passNopInstElimination(b *builder) { if int(b.nextValueID) >= len(b.valueIDToInstruction) { - b.valueIDToInstruction = append(b.valueIDToInstruction, make([]*Instruction, b.nextValueID)...) + b.valueIDToInstruction = append(b.valueIDToInstruction, make([]*Instruction, int(b.nextValueID)-len(b.valueIDToInstruction)+1)...) } for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() { diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_blk_layouts.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_blk_layouts.go index 9068180a0b..584b5eadea 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_blk_layouts.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_blk_layouts.go @@ -23,8 +23,6 @@ import ( // // This heuristic is done in maybeInvertBranches function. func passLayoutBlocks(b *builder) { - b.clearBlkVisited() - // We might end up splitting critical edges which adds more basic blocks, // so we store the currently existing basic blocks in nonSplitBlocks temporarily. // That way we can iterate over the original basic blocks while appending new ones into reversePostOrderedBasicBlocks. @@ -47,20 +45,20 @@ func passLayoutBlocks(b *builder) { for _, blk := range nonSplitBlocks { for i := range blk.preds { pred := blk.preds[i].blk - if _, ok := b.blkVisited[pred]; ok || !pred.Valid() { + if pred.visited == 1 || !pred.Valid() { continue } else if pred.reversePostOrder < blk.reversePostOrder { // This means the edge is critical, and this pred is the trampoline and yet to be inserted. // Split edge trampolines must come before the destination in reverse post-order. b.reversePostOrderedBasicBlocks = append(b.reversePostOrderedBasicBlocks, pred) - b.blkVisited[pred] = 0 // mark as inserted, the value is not used. + pred.visited = 1 // mark as inserted. } } // Now that we've already added all the potential trampoline blocks incoming to this block, // we can add this block itself. b.reversePostOrderedBasicBlocks = append(b.reversePostOrderedBasicBlocks, blk) - b.blkVisited[blk] = 0 // mark as inserted, the value is not used. + blk.visited = 1 // mark as inserted. if len(blk.success) < 2 { // There won't be critical edge originating from this block. @@ -116,7 +114,7 @@ func passLayoutBlocks(b *builder) { if fallthroughBranch.opcode == OpcodeJump && fallthroughBranch.blk == trampoline { // This can be lowered as fallthrough at the end of the block. b.reversePostOrderedBasicBlocks = append(b.reversePostOrderedBasicBlocks, trampoline) - b.blkVisited[trampoline] = 0 // mark as inserted, the value is not used. + trampoline.visited = 1 // mark as inserted. } else { uninsertedTrampolines = append(uninsertedTrampolines, trampoline) } @@ -126,7 +124,7 @@ func passLayoutBlocks(b *builder) { if trampoline.success[0].reversePostOrder <= trampoline.reversePostOrder { // "<=", not "<" because the target might be itself. // This means the critical edge was backward, so we insert after the current block immediately. b.reversePostOrderedBasicBlocks = append(b.reversePostOrderedBasicBlocks, trampoline) - b.blkVisited[trampoline] = 0 // mark as inserted, the value is not used. + trampoline.visited = 1 // mark as inserted. } // If the target is forward, we can wait to insert until the target is inserted. } uninsertedTrampolines = uninsertedTrampolines[:0] // Reuse the stack for the next block. @@ -142,8 +140,8 @@ func passLayoutBlocks(b *builder) { if wazevoapi.SSAValidationEnabled { for _, trampoline := range trampolines { - if _, ok := b.blkVisited[trampoline]; !ok { - panic("BUG: trampoline block not inserted: " + trampoline.FormatHeader(b)) + if trampoline.visited != 1 { + panic("BUG: trampoline block not inserted: " + trampoline.formatHeader(b)) } trampoline.validate(b) } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_cfg.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_cfg.go index 50cb9c4750..e8288c4bd3 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_cfg.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_cfg.go @@ -15,10 +15,6 @@ import ( // At the last of pass, this function also does the loop detection and sets the basicBlock.loop flag. func passCalculateImmediateDominators(b *builder) { reversePostOrder := b.reversePostOrderedBasicBlocks[:0] - exploreStack := b.blkStack[:0] - b.clearBlkVisited() - - entryBlk := b.entryBlk() // Store the reverse postorder from the entrypoint into reversePostOrder slice. // This calculation of reverse postorder is not described in the paper, @@ -28,14 +24,17 @@ func passCalculateImmediateDominators(b *builder) { // which is a reasonable assumption as long as SSA Builder is properly used. // // First we push blocks in postorder iteratively visit successors of the entry block. - exploreStack = append(exploreStack, entryBlk) + entryBlk := b.entryBlk() + exploreStack := append(b.blkStack[:0], entryBlk) + // These flags are used to track the state of the block in the DFS traversal. + // We temporarily use the reversePostOrder field to store the state. const visitStateUnseen, visitStateSeen, visitStateDone = 0, 1, 2 - b.blkVisited[entryBlk] = visitStateSeen + entryBlk.visited = visitStateSeen for len(exploreStack) > 0 { tail := len(exploreStack) - 1 blk := exploreStack[tail] exploreStack = exploreStack[:tail] - switch b.blkVisited[blk] { + switch blk.visited { case visitStateUnseen: // This is likely a bug in the frontend. panic("BUG: unsupported CFG") @@ -48,16 +47,18 @@ func passCalculateImmediateDominators(b *builder) { if succ.ReturnBlock() || succ.invalid { continue } - if b.blkVisited[succ] == visitStateUnseen { - b.blkVisited[succ] = visitStateSeen + if succ.visited == visitStateUnseen { + succ.visited = visitStateSeen exploreStack = append(exploreStack, succ) } } // Finally, we could pop this block once we pop all of its successors. - b.blkVisited[blk] = visitStateDone + blk.visited = visitStateDone case visitStateDone: // Note: at this point we push blk in postorder despite its name. reversePostOrder = append(reversePostOrder, blk) + default: + panic("BUG") } } // At this point, reversePostOrder has postorder actually, so we reverse it. @@ -67,7 +68,7 @@ func passCalculateImmediateDominators(b *builder) { } for i, blk := range reversePostOrder { - blk.reversePostOrder = i + blk.reversePostOrder = int32(i) } // Reuse the dominators slice if possible from the previous computation of function. @@ -180,7 +181,7 @@ func passBuildLoopNestingForest(b *builder) { b.loopNestingForestRoots = append(b.loopNestingForestRoots, blk) } else if n == ent { } else if n.loopHeader { - n.loopNestingForestChildren = append(n.loopNestingForestChildren, blk) + n.loopNestingForestChildren = n.loopNestingForestChildren.Append(&b.varLengthBasicBlockPool, blk) } } @@ -193,7 +194,7 @@ func passBuildLoopNestingForest(b *builder) { func printLoopNestingForest(root *basicBlock, depth int) { fmt.Println(strings.Repeat("\t", depth), "loop nesting forest root:", root.ID()) - for _, child := range root.loopNestingForestChildren { + for _, child := range root.loopNestingForestChildren.View() { fmt.Println(strings.Repeat("\t", depth+1), "child:", child.ID()) if child.LoopHeader() { printLoopNestingForest(child.(*basicBlock), depth+2) @@ -202,10 +203,10 @@ func printLoopNestingForest(root *basicBlock, depth int) { } type dominatorSparseTree struct { - time int + time int32 euler []*basicBlock - first, depth []int - table [][]int + first, depth []int32 + table [][]int32 } // passBuildDominatorTree builds the dominator tree for the function, and constructs builder.sparseTree. @@ -232,11 +233,11 @@ func passBuildDominatorTree(b *builder) { n := b.basicBlocksPool.Allocated() st := &b.sparseTree st.euler = append(st.euler[:0], make([]*basicBlock, 2*n-1)...) - st.first = append(st.first[:0], make([]int, n)...) + st.first = append(st.first[:0], make([]int32, n)...) for i := range st.first { st.first[i] = -1 } - st.depth = append(st.depth[:0], make([]int, 2*n-1)...) + st.depth = append(st.depth[:0], make([]int32, 2*n-1)...) st.time = 0 // Start building the sparse tree. @@ -244,9 +245,9 @@ func passBuildDominatorTree(b *builder) { st.buildSparseTable() } -func (dt *dominatorSparseTree) eulerTour(node *basicBlock, height int) { +func (dt *dominatorSparseTree) eulerTour(node *basicBlock, height int32) { if wazevoapi.SSALoggingEnabled { - fmt.Println(strings.Repeat("\t", height), "euler tour:", node.ID()) + fmt.Println(strings.Repeat("\t", int(height)), "euler tour:", node.ID()) } dt.euler[dt.time] = node dt.depth[dt.time] = height @@ -270,13 +271,13 @@ func (dt *dominatorSparseTree) buildSparseTable() { table := dt.table if n >= len(table) { - table = append(table, make([][]int, n+1)...) + table = append(table, make([][]int32, n-len(table)+1)...) } for i := range table { if len(table[i]) < k { - table[i] = append(table[i], make([]int, k)...) + table[i] = append(table[i], make([]int32, k-len(table[i]))...) } - table[i][0] = i + table[i][0] = int32(i) } for j := 1; 1< 0 { - m.NonStaticLocals[idx] = nonStaticLocals - } - functionType := &m.TypeSection[m.FunctionSection[idx]] code := &m.CodeSection[idx] body := code.Body @@ -357,7 +352,6 @@ func (m *Module) validateFunctionWithMaxStackValues( return fmt.Errorf("invalid local index for %s %d >= %d(=len(locals)+len(parameters))", OpcodeLocalSetName, index, l) } - nonStaticLocals[index] = struct{}{} var expType ValueType if index < inputLen { expType = functionType.Params[index] @@ -373,7 +367,6 @@ func (m *Module) validateFunctionWithMaxStackValues( return fmt.Errorf("invalid local index for %s %d >= %d(=len(locals)+len(parameters))", OpcodeLocalTeeName, index, l) } - nonStaticLocals[index] = struct{}{} var expType ValueType if index < inputLen { expType = functionType.Params[index] diff --git a/vendor/github.com/tetratelabs/wazero/internal/wasm/memory.go b/vendor/github.com/tetratelabs/wazero/internal/wasm/memory.go index 5cc5012dae..947b16112d 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/wasm/memory.go +++ b/vendor/github.com/tetratelabs/wazero/internal/wasm/memory.go @@ -59,11 +59,14 @@ type MemoryInstance struct { // with a fixed weight of 1 and no spurious notifications. waiters sync.Map + // ownerModuleEngine is the module engine that owns this memory instance. + ownerModuleEngine ModuleEngine + expBuffer experimental.LinearMemory } // NewMemoryInstance creates a new instance based on the parameters in the SectionIDMemory. -func NewMemoryInstance(memSec *Memory, allocator experimental.MemoryAllocator) *MemoryInstance { +func NewMemoryInstance(memSec *Memory, allocator experimental.MemoryAllocator, moduleEngine ModuleEngine) *MemoryInstance { minBytes := MemoryPagesToBytesNum(memSec.Min) capBytes := MemoryPagesToBytesNum(memSec.Cap) maxBytes := MemoryPagesToBytesNum(memSec.Max) @@ -89,12 +92,13 @@ func NewMemoryInstance(memSec *Memory, allocator experimental.MemoryAllocator) * buffer = make([]byte, minBytes, capBytes) } return &MemoryInstance{ - Buffer: buffer, - Min: memSec.Min, - Cap: memoryBytesNumToPages(uint64(cap(buffer))), - Max: memSec.Max, - Shared: memSec.IsShared, - expBuffer: expBuffer, + Buffer: buffer, + Min: memSec.Min, + Cap: memoryBytesNumToPages(uint64(cap(buffer))), + Max: memSec.Max, + Shared: memSec.IsShared, + expBuffer: expBuffer, + ownerModuleEngine: moduleEngine, } } @@ -247,14 +251,12 @@ func (m *MemoryInstance) Grow(delta uint32) (result uint32, ok bool) { m.Buffer = buffer m.Cap = newPages } - return currentPages, true } else if newPages > m.Cap { // grow the memory. if m.Shared { panic("shared memory cannot be grown, this is a bug in wazero") } m.Buffer = append(m.Buffer, make([]byte, MemoryPagesToBytesNum(delta))...) m.Cap = newPages - return currentPages, true } else { // We already have the capacity we need. if m.Shared { // We assume grow is called under a guest lock. @@ -264,8 +266,9 @@ func (m *MemoryInstance) Grow(delta uint32) (result uint32, ok bool) { } else { m.Buffer = m.Buffer[:MemoryPagesToBytesNum(newPages)] } - return currentPages, true } + m.ownerModuleEngine.MemoryGrown() + return currentPages, true } // Pages implements the same method as documented on api.Memory. diff --git a/vendor/github.com/tetratelabs/wazero/internal/wasm/module.go b/vendor/github.com/tetratelabs/wazero/internal/wasm/module.go index 68573b918e..8369ad9ed6 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/wasm/module.go +++ b/vendor/github.com/tetratelabs/wazero/internal/wasm/module.go @@ -185,9 +185,6 @@ type Module struct { // as described in https://yurydelendik.github.io/webassembly-dwarf/, though it is not specified in the Wasm // specification: https://github.com/WebAssembly/debugging/issues/1 DWARFLines *wasmdebug.DWARFLines - - // NonStaticLocals collects the local indexes that will change its value through either local.get or local.tee. - NonStaticLocals []map[Index]struct{} } // ModuleID represents sha256 hash value uniquely assigned to Module. @@ -366,8 +363,6 @@ func (m *Module) validateFunctions(enabledFeatures api.CoreFeatures, functions [ br := bytes.NewReader(nil) // Also, we reuse the stacks across multiple function validations to reduce allocations. vs := &stacks{} - // Non-static locals are gathered during validation and used in the down-stream compilation. - m.NonStaticLocals = make([]map[Index]struct{}, len(m.FunctionSection)) for idx, typeIndex := range m.FunctionSection { if typeIndex >= typeCount { return fmt.Errorf("invalid %s: type section index %d out of range", m.funcDesc(SectionIDFunction, Index(idx)), typeIndex) @@ -655,7 +650,7 @@ func paramNames(localNames IndirectNameMap, funcIdx uint32, paramLen int) []stri func (m *ModuleInstance) buildMemory(module *Module, allocator experimental.MemoryAllocator) { memSec := module.MemorySection if memSec != nil { - m.MemoryInstance = NewMemoryInstance(memSec, allocator) + m.MemoryInstance = NewMemoryInstance(memSec, allocator, m.Engine) m.MemoryInstance.definition = &module.MemoryDefinitionSection[0] } } diff --git a/vendor/modules.txt b/vendor/modules.txt index 71c9939bff..fdbf7cbb49 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -545,11 +545,12 @@ github.com/moloch--/asciicast # github.com/moloch--/memmod v0.0.0-20230225130813-fd77d905589e ## explicit; go 1.17 github.com/moloch--/memmod -# github.com/ncruces/go-sqlite3 v0.15.0 +# github.com/ncruces/go-sqlite3 v0.16.3 ## explicit; go 1.21 github.com/ncruces/go-sqlite3 github.com/ncruces/go-sqlite3/driver github.com/ncruces/go-sqlite3/embed +github.com/ncruces/go-sqlite3/internal/alloc github.com/ncruces/go-sqlite3/internal/util github.com/ncruces/go-sqlite3/util/osutil github.com/ncruces/go-sqlite3/vfs @@ -704,7 +705,7 @@ github.com/tailscale/wireguard-go/tun # github.com/tcnksm/go-httpstat v0.2.0 ## explicit github.com/tcnksm/go-httpstat -# github.com/tetratelabs/wazero v1.7.2 +# github.com/tetratelabs/wazero v1.7.3 ## explicit; go 1.20 github.com/tetratelabs/wazero github.com/tetratelabs/wazero/api