From ae20998ff5aaacc8e3afd46c64e28a8e039b58a1 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Mon, 18 May 2020 22:06:50 +0200 Subject: [PATCH 01/47] ARM, ARM64, PPC: Fix TSETR fallback. Thanks to Javier Guerra Giraldez. --- src/vm_arm.dasc | 1 + src/vm_arm64.dasc | 1 + src/vm_ppc.dasc | 1 + 3 files changed, 3 insertions(+) diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc index edefac328..5bbdbbff7 100644 --- a/src/vm_arm.dasc +++ b/src/vm_arm.dasc @@ -699,6 +699,7 @@ static void build_subroutines(BuildCtx *ctx) |->vmeta_tsetr: | str BASE, L->base | .IOS mov RC, BASE + | mov CARG1, L | str PC, SAVE_PC | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) | // Returns TValue *. diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc index edceb549e..62946373f 100644 --- a/src/vm_arm64.dasc +++ b/src/vm_arm64.dasc @@ -711,6 +711,7 @@ static void build_subroutines(BuildCtx *ctx) |->vmeta_tsetr: | sxtw CARG3, TMP1w | str BASE, L->base + | mov CARG1, L | str PC, SAVE_PC | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) | // Returns TValue *. diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc index 4299e266d..a66e30b5e 100644 --- a/src/vm_ppc.dasc +++ b/src/vm_ppc.dasc @@ -1083,6 +1083,7 @@ static void build_subroutines(BuildCtx *ctx) | |->vmeta_tsetr: | stp BASE, L->base + | mr CARG1, L | stw PC, SAVE_PC | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) | // Returns TValue *. From 5bf0da3d7c02f9959fa3a9fb721e0565137b70c8 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Mon, 18 May 2020 22:24:53 +0200 Subject: [PATCH 02/47] ARM64: Fix {AHUV}LOAD specialized to nil/false/true. Reported by caohongqing. --- src/lj_asm_arm64.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h index ce2100c92..624cc2da1 100644 --- a/src/lj_asm_arm64.h +++ b/src/lj_asm_arm64.h @@ -1067,7 +1067,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), tmp); } else { emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), - ra_allock(as, (irt_toitype(ir->t) << 15) | 0x7fff, allow), tmp); + ra_allock(as, (irt_toitype(ir->t) << 15) | 0x7fff, gpr), tmp); } if (ofs & FUSE_REG) emit_dnm(as, (A64I_LDRx^A64I_LS_R)|A64I_LS_UXTWx|A64I_LS_SH, tmp, idx, (ofs & 31)); From 0eddcbead2d67c16dcd4039a6765b9d2fc8ea631 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Wed, 20 May 2020 20:42:04 +0200 Subject: [PATCH 03/47] Cleanup CPU detection and tuning for old CPUs. --- src/Makefile | 1 - src/lib_jit.c | 65 ++++++++++++-------------------- src/lj_arch.h | 6 +-- src/lj_asm_x86.h | 33 +++++------------ src/lj_dispatch.c | 7 ---- src/lj_emit_x86.h | 5 +-- src/lj_errmsg.h | 4 -- src/lj_jit.h | 94 +++++++++++++++++++++++++---------------------- src/ljamalg.c | 10 ----- 9 files changed, 87 insertions(+), 138 deletions(-) diff --git a/src/Makefile b/src/Makefile index 07a942518..82a570320 100644 --- a/src/Makefile +++ b/src/Makefile @@ -603,7 +603,6 @@ E= @echo default all: $(TARGET_T) amalg: - @grep "^[+|]" ljamalg.c $(MAKE) all "LJCORE_O=ljamalg.o" clean: diff --git a/src/lib_jit.c b/src/lib_jit.c index c97b0d531..acd6c293b 100644 --- a/src/lib_jit.c +++ b/src/lib_jit.c @@ -104,8 +104,8 @@ LJLIB_CF(jit_status) jit_State *J = L2J(L); L->top = L->base; setboolV(L->top++, (J->flags & JIT_F_ON) ? 1 : 0); - flagbits_to_strings(L, J->flags, JIT_F_CPU_FIRST, JIT_F_CPUSTRING); - flagbits_to_strings(L, J->flags, JIT_F_OPT_FIRST, JIT_F_OPTSTRING); + flagbits_to_strings(L, J->flags, JIT_F_CPU, JIT_F_CPUSTRING); + flagbits_to_strings(L, J->flags, JIT_F_OPT, JIT_F_OPTSTRING); return (int)(L->top - L->base); #else setboolV(L->top++, 0); @@ -471,7 +471,7 @@ static int jitopt_flag(jit_State *J, const char *str) str += str[2] == '-' ? 3 : 2; set = 0; } - for (opt = JIT_F_OPT_FIRST; ; opt <<= 1) { + for (opt = JIT_F_OPT; ; opt <<= 1) { size_t len = *(const uint8_t *)lst; if (len == 0) break; @@ -640,59 +640,41 @@ JIT_PARAMDEF(JIT_PARAMINIT) #undef JIT_PARAMINIT 0 }; -#endif #if LJ_TARGET_ARM && LJ_TARGET_LINUX #include #endif -/* Arch-dependent CPU detection. */ -static uint32_t jit_cpudetect(lua_State *L) +/* Arch-dependent CPU feature detection. */ +static uint32_t jit_cpudetect(void) { uint32_t flags = 0; #if LJ_TARGET_X86ORX64 + uint32_t vendor[4]; uint32_t features[4]; if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) { -#if !LJ_HASJIT -#define JIT_F_SSE2 2 -#endif - flags |= ((features[3] >> 26)&1) * JIT_F_SSE2; -#if LJ_HASJIT flags |= ((features[2] >> 0)&1) * JIT_F_SSE3; flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1; - if (vendor[2] == 0x6c65746e) { /* Intel. */ - if ((features[0] & 0x0fff0ff0) == 0x000106c0) /* Atom. */ - flags |= JIT_F_LEA_AGU; - } else if (vendor[2] == 0x444d4163) { /* AMD. */ - uint32_t fam = (features[0] & 0x0ff00f00); - if (fam >= 0x00000f00) /* K8, K10. */ - flags |= JIT_F_PREFER_IMUL; - } if (vendor[0] >= 7) { uint32_t xfeatures[4]; lj_vm_cpuid(7, xfeatures); flags |= ((xfeatures[1] >> 8)&1) * JIT_F_BMI2; } -#endif } - /* Check for required instruction set support on x86 (unnecessary on x64). */ -#if LJ_TARGET_X86 - if (!(flags & JIT_F_SSE2)) - luaL_error(L, "CPU with SSE2 required"); -#endif + /* Don't bother checking for SSE2 -- the VM will crash before getting here. */ + #elif LJ_TARGET_ARM -#if LJ_HASJIT + int ver = LJ_ARCH_VERSION; /* Compile-time ARM CPU detection. */ #if LJ_TARGET_LINUX if (ver < 70) { /* Runtime ARM CPU detection. */ struct utsname ut; uname(&ut); if (strncmp(ut.machine, "armv", 4) == 0) { - if (ut.machine[4] >= '7') - ver = 70; - else if (ut.machine[4] == '6') - ver = 60; + if (ut.machine[4] >= '8') ver = 80; + else if (ut.machine[4] == '7') ver = 70; + else if (ut.machine[4] == '6') ver = 60; } } #endif @@ -700,20 +682,22 @@ static uint32_t jit_cpudetect(lua_State *L) ver >= 61 ? JIT_F_ARMV6T2_ : ver >= 60 ? JIT_F_ARMV6_ : 0; flags |= LJ_ARCH_HASFPU == 0 ? 0 : ver >= 70 ? JIT_F_VFPV3 : JIT_F_VFPV2; -#endif + #elif LJ_TARGET_ARM64 + /* No optional CPU features to detect (for now). */ + #elif LJ_TARGET_PPC -#if LJ_HASJIT + #if LJ_ARCH_SQRT flags |= JIT_F_SQRT; #endif #if LJ_ARCH_ROUND flags |= JIT_F_ROUND; #endif -#endif + #elif LJ_TARGET_MIPS -#if LJ_HASJIT + /* Compile-time MIPS CPU detection. */ #if LJ_ARCH_VERSION >= 20 flags |= JIT_F_MIPSXXR2; @@ -731,31 +715,28 @@ static uint32_t jit_cpudetect(lua_State *L) if (x) flags |= JIT_F_MIPSXXR2; /* Either 0x80000000 (R2) or 0 (R1). */ } #endif -#endif + #else #error "Missing CPU detection for this architecture" #endif - UNUSED(L); return flags; } /* Initialize JIT compiler. */ static void jit_init(lua_State *L) { - uint32_t flags = jit_cpudetect(L); -#if LJ_HASJIT jit_State *J = L2J(L); - J->flags = flags | JIT_F_ON | JIT_F_OPT_DEFAULT; + J->flags = jit_cpudetect() | JIT_F_ON | JIT_F_OPT_DEFAULT; memcpy(J->param, jit_param_default, sizeof(J->param)); lj_dispatch_update(G(L)); -#else - UNUSED(flags); -#endif } +#endif LUALIB_API int luaopen_jit(lua_State *L) { +#if LJ_HASJIT jit_init(L); +#endif lua_pushliteral(L, LJ_OS_NAME); lua_pushliteral(L, LJ_ARCH_NAME); lua_pushinteger(L, LUAJIT_VERSION_NUM); diff --git a/src/lj_arch.h b/src/lj_arch.h index 027b39ce9..704268387 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h @@ -208,13 +208,13 @@ #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL -#if __ARM_ARCH_8__ || __ARM_ARCH_8A__ +#if __ARM_ARCH == 8 || __ARM_ARCH_8__ || __ARM_ARCH_8A__ #define LJ_ARCH_VERSION 80 -#elif __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__ +#elif __ARM_ARCH == 7 || __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__ #define LJ_ARCH_VERSION 70 #elif __ARM_ARCH_6T2__ #define LJ_ARCH_VERSION 61 -#elif __ARM_ARCH_6__ || __ARM_ARCH_6J__ || __ARM_ARCH_6K__ || __ARM_ARCH_6Z__ || __ARM_ARCH_6ZK__ +#elif __ARM_ARCH == 6 || __ARM_ARCH_6__ || __ARM_ARCH_6J__ || __ARM_ARCH_6K__ || __ARM_ARCH_6Z__ || __ARM_ARCH_6ZK__ #define LJ_ARCH_VERSION 60 #else #define LJ_ARCH_VERSION 50 diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index db3409b90..bf818f5a1 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h @@ -1214,13 +1214,8 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) emit_rmro(as, XO_MOV, dest|REX_GC64, tab, offsetof(GCtab, node)); } else { emit_rmro(as, XO_ARITH(XOg_ADD), dest|REX_GC64, tab, offsetof(GCtab,node)); - if ((as->flags & JIT_F_PREFER_IMUL)) { - emit_i8(as, sizeof(Node)); - emit_rr(as, XO_IMULi8, dest, dest); - } else { - emit_shifti(as, XOg_SHL, dest, 3); - emit_rmrxo(as, XO_LEA, dest, dest, dest, XM_SCALE2, 0); - } + emit_shifti(as, XOg_SHL, dest, 3); + emit_rmrxo(as, XO_LEA, dest, dest, dest, XM_SCALE2, 0); if (isk) { emit_gri(as, XG_ARITHi(XOg_AND), dest, (int32_t)khash); emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, hmask)); @@ -1279,7 +1274,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir) lua_assert(ofs % sizeof(Node) == 0); if (ra_hasreg(dest)) { if (ofs != 0) { - if (dest == node && !(as->flags & JIT_F_LEA_AGU)) + if (dest == node) emit_gri(as, XG_ARITHi(XOg_ADD), dest|REX_GC64, ofs); else emit_rmro(as, XO_LEA, dest|REX_GC64, node, ofs); @@ -2180,8 +2175,7 @@ static void asm_add(ASMState *as, IRIns *ir) { if (irt_isnum(ir->t)) asm_fparith(as, ir, XO_ADDSD); - else if ((as->flags & JIT_F_LEA_AGU) || as->flagmcp == as->mcp || - irt_is64(ir->t) || !asm_lea(as, ir)) + else if (as->flagmcp == as->mcp || irt_is64(ir->t) || !asm_lea(as, ir)) asm_intarith(as, ir, XOg_ADD); } @@ -2903,7 +2897,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk) MCode *target, *q; int32_t spadj = as->T->spadjust; if (spadj == 0) { - p -= ((as->flags & JIT_F_LEA_AGU) ? 7 : 6) + (LJ_64 ? 1 : 0); + p -= LJ_64 ? 7 : 6; } else { MCode *p1; /* Patch stack adjustment. */ @@ -2915,20 +2909,11 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk) p1 = p-9; *(int32_t *)p1 = spadj; } - if ((as->flags & JIT_F_LEA_AGU)) { -#if LJ_64 - p1[-4] = 0x48; -#endif - p1[-3] = (MCode)XI_LEA; - p1[-2] = MODRM(checki8(spadj) ? XM_OFS8 : XM_OFS32, RID_ESP, RID_ESP); - p1[-1] = MODRM(XM_SCALE1, RID_ESP, RID_ESP); - } else { #if LJ_64 - p1[-3] = 0x48; + p1[-3] = 0x48; #endif - p1[-2] = (MCode)(checki8(spadj) ? XI_ARITHi8 : XI_ARITHi); - p1[-1] = MODRM(XM_REG, XOg_ADD, RID_ESP); - } + p1[-2] = (MCode)(checki8(spadj) ? XI_ARITHi8 : XI_ARITHi); + p1[-1] = MODRM(XM_REG, XOg_ADD, RID_ESP); } /* Patch exit branch. */ target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; @@ -2959,7 +2944,7 @@ static void asm_tail_prep(ASMState *as) as->invmcp = as->mcp = p; } else { /* Leave room for ESP adjustment: add esp, imm or lea esp, [esp+imm] */ - as->mcp = p - (((as->flags & JIT_F_LEA_AGU) ? 7 : 6) + (LJ_64 ? 1 : 0)); + as->mcp = p - (LJ_64 ? 7 : 6); as->invmcp = NULL; } } diff --git a/src/lj_dispatch.c b/src/lj_dispatch.c index 8553438c7..39416d007 100644 --- a/src/lj_dispatch.c +++ b/src/lj_dispatch.c @@ -252,15 +252,8 @@ int luaJIT_setmode(lua_State *L, int idx, int mode) } else { if (!(mode & LUAJIT_MODE_ON)) G2J(g)->flags &= ~(uint32_t)JIT_F_ON; -#if LJ_TARGET_X86ORX64 - else if ((G2J(g)->flags & JIT_F_SSE2)) - G2J(g)->flags |= (uint32_t)JIT_F_ON; - else - return 0; /* Don't turn on JIT compiler without SSE2 support. */ -#else else G2J(g)->flags |= (uint32_t)JIT_F_ON; -#endif lj_dispatch_update(g); } break; diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h index bc4391a01..b17e28a57 100644 --- a/src/lj_emit_x86.h +++ b/src/lj_emit_x86.h @@ -559,10 +559,7 @@ static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) static void emit_addptr(ASMState *as, Reg r, int32_t ofs) { if (ofs) { - if ((as->flags & JIT_F_LEA_AGU)) - emit_rmro(as, XO_LEA, r|REX_GC64, r, ofs); - else - emit_gri(as, XG_ARITHi(XOg_ADD), r|REX_GC64, ofs); + emit_gri(as, XG_ARITHi(XOg_ADD), r|REX_GC64, ofs); } } diff --git a/src/lj_errmsg.h b/src/lj_errmsg.h index efb7c3f36..9110dc7ef 100644 --- a/src/lj_errmsg.h +++ b/src/lj_errmsg.h @@ -101,11 +101,7 @@ ERRDEF(STRGSRV, "invalid replacement value (a %s)") ERRDEF(BADMODN, "name conflict for module " LUA_QS) #if LJ_HASJIT ERRDEF(JITPROT, "runtime code generation failed, restricted kernel?") -#if LJ_TARGET_X86ORX64 -ERRDEF(NOJIT, "JIT compiler disabled, CPU does not support SSE2") -#else ERRDEF(NOJIT, "JIT compiler disabled") -#endif #elif defined(LJ_ARCH_NOJIT) ERRDEF(NOJIT, "no JIT compiler for this architecture (yet)") #else diff --git a/src/lj_jit.h b/src/lj_jit.h index f179f17f8..a9c602f07 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h @@ -9,47 +9,49 @@ #include "lj_obj.h" #include "lj_ir.h" -/* JIT engine flags. */ +/* -- JIT engine flags ---------------------------------------------------- */ + +/* General JIT engine flags. 4 bits. */ #define JIT_F_ON 0x00000001 -/* CPU-specific JIT engine flags. */ +/* CPU-specific JIT engine flags. 12 bits. Flags and strings must match. */ +#define JIT_F_CPU 0x00000010 + #if LJ_TARGET_X86ORX64 -#define JIT_F_SSE2 0x00000010 -#define JIT_F_SSE3 0x00000020 -#define JIT_F_SSE4_1 0x00000040 -#define JIT_F_PREFER_IMUL 0x00000080 -#define JIT_F_LEA_AGU 0x00000100 -#define JIT_F_BMI2 0x00000200 - -/* Names for the CPU-specific flags. Must match the order above. */ -#define JIT_F_CPU_FIRST JIT_F_SSE2 -#define JIT_F_CPUSTRING "\4SSE2\4SSE3\6SSE4.1\3AMD\4ATOM\4BMI2" + +#define JIT_F_SSE3 (JIT_F_CPU << 0) +#define JIT_F_SSE4_1 (JIT_F_CPU << 1) +#define JIT_F_BMI2 (JIT_F_CPU << 2) + + +#define JIT_F_CPUSTRING "\4SSE3\6SSE4.1\4BMI2" + #elif LJ_TARGET_ARM -#define JIT_F_ARMV6_ 0x00000010 -#define JIT_F_ARMV6T2_ 0x00000020 -#define JIT_F_ARMV7 0x00000040 -#define JIT_F_VFPV2 0x00000080 -#define JIT_F_VFPV3 0x00000100 - -#define JIT_F_ARMV6 (JIT_F_ARMV6_|JIT_F_ARMV6T2_|JIT_F_ARMV7) -#define JIT_F_ARMV6T2 (JIT_F_ARMV6T2_|JIT_F_ARMV7) + +#define JIT_F_ARMV6_ (JIT_F_CPU << 0) +#define JIT_F_ARMV6T2_ (JIT_F_CPU << 1) +#define JIT_F_ARMV7 (JIT_F_CPU << 2) +#define JIT_F_ARMV8 (JIT_F_CPU << 3) +#define JIT_F_VFPV2 (JIT_F_CPU << 4) +#define JIT_F_VFPV3 (JIT_F_CPU << 5) + +#define JIT_F_ARMV6 (JIT_F_ARMV6_|JIT_F_ARMV6T2_|JIT_F_ARMV7|JIT_F_ARMV8) +#define JIT_F_ARMV6T2 (JIT_F_ARMV6T2_|JIT_F_ARMV7|JIT_F_ARMV8) #define JIT_F_VFP (JIT_F_VFPV2|JIT_F_VFPV3) -/* Names for the CPU-specific flags. Must match the order above. */ -#define JIT_F_CPU_FIRST JIT_F_ARMV6_ -#define JIT_F_CPUSTRING "\5ARMv6\7ARMv6T2\5ARMv7\5VFPv2\5VFPv3" +#define JIT_F_CPUSTRING "\5ARMv6\7ARMv6T2\5ARMv7\5ARMv8\5VFPv2\5VFPv3" + #elif LJ_TARGET_PPC -#define JIT_F_SQRT 0x00000010 -#define JIT_F_ROUND 0x00000020 -/* Names for the CPU-specific flags. Must match the order above. */ -#define JIT_F_CPU_FIRST JIT_F_SQRT +#define JIT_F_SQRT (JIT_F_CPU << 0) +#define JIT_F_ROUND (JIT_F_CPU << 1) + #define JIT_F_CPUSTRING "\4SQRT\5ROUND" + #elif LJ_TARGET_MIPS -#define JIT_F_MIPSXXR2 0x00000010 -/* Names for the CPU-specific flags. Must match the order above. */ -#define JIT_F_CPU_FIRST JIT_F_MIPSXXR2 +#define JIT_F_MIPSXXR2 (JIT_F_CPU << 0) + #if LJ_TARGET_MIPS32 #if LJ_TARGET_MIPSR6 #define JIT_F_CPUSTRING "\010MIPS32R6" @@ -63,27 +65,29 @@ #define JIT_F_CPUSTRING "\010MIPS64R2" #endif #endif + #else -#define JIT_F_CPU_FIRST 0 + #define JIT_F_CPUSTRING "" + #endif -/* Optimization flags. */ +/* Optimization flags. 12 bits. */ +#define JIT_F_OPT 0x00010000 #define JIT_F_OPT_MASK 0x0fff0000 -#define JIT_F_OPT_FOLD 0x00010000 -#define JIT_F_OPT_CSE 0x00020000 -#define JIT_F_OPT_DCE 0x00040000 -#define JIT_F_OPT_FWD 0x00080000 -#define JIT_F_OPT_DSE 0x00100000 -#define JIT_F_OPT_NARROW 0x00200000 -#define JIT_F_OPT_LOOP 0x00400000 -#define JIT_F_OPT_ABC 0x00800000 -#define JIT_F_OPT_SINK 0x01000000 -#define JIT_F_OPT_FUSE 0x02000000 +#define JIT_F_OPT_FOLD (JIT_F_OPT << 0) +#define JIT_F_OPT_CSE (JIT_F_OPT << 1) +#define JIT_F_OPT_DCE (JIT_F_OPT << 2) +#define JIT_F_OPT_FWD (JIT_F_OPT << 3) +#define JIT_F_OPT_DSE (JIT_F_OPT << 4) +#define JIT_F_OPT_NARROW (JIT_F_OPT << 5) +#define JIT_F_OPT_LOOP (JIT_F_OPT << 6) +#define JIT_F_OPT_ABC (JIT_F_OPT << 7) +#define JIT_F_OPT_SINK (JIT_F_OPT << 8) +#define JIT_F_OPT_FUSE (JIT_F_OPT << 9) /* Optimizations names for -O. Must match the order above. */ -#define JIT_F_OPT_FIRST JIT_F_OPT_FOLD #define JIT_F_OPTSTRING \ "\4fold\3cse\3dce\3fwd\3dse\6narrow\4loop\3abc\4sink\4fuse" @@ -95,6 +99,8 @@ JIT_F_OPT_FWD|JIT_F_OPT_DSE|JIT_F_OPT_ABC|JIT_F_OPT_SINK|JIT_F_OPT_FUSE) #define JIT_F_OPT_DEFAULT JIT_F_OPT_3 +/* -- JIT engine parameters ----------------------------------------------- */ + #if LJ_TARGET_WINDOWS || LJ_64 /* See: http://blogs.msdn.com/oldnewthing/archive/2003/10/08/55239.aspx */ #define JIT_P_sizemcode_DEFAULT 64 @@ -137,6 +143,8 @@ JIT_PARAMDEF(JIT_PARAMENUM) #define JIT_PARAMSTR(len, name, value) #len #name #define JIT_P_STRING JIT_PARAMDEF(JIT_PARAMSTR) +/* -- JIT engine data structures ------------------------------------------ */ + /* Trace compiler state. */ typedef enum { LJ_TRACE_IDLE, /* Trace compiler idle. */ diff --git a/src/ljamalg.c b/src/ljamalg.c index 395429818..6712d4354 100644 --- a/src/ljamalg.c +++ b/src/ljamalg.c @@ -3,16 +3,6 @@ ** Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h */ -/* -+--------------------------------------------------------------------------+ -| WARNING: Compiling the amalgamation needs a lot of virtual memory | -| (around 300 MB with GCC 4.x)! If you don't have enough physical memory | -| your machine will start swapping to disk and the compile will not finish | -| within a reasonable amount of time. | -| So either compile on a bigger machine or use the non-amalgamated build. | -+--------------------------------------------------------------------------+ -*/ - #define ljamalg_c #define LUA_CORE From 1e6e8aaa20626ac94cf907c69b0452f76e9f5fa5 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Fri, 22 May 2020 02:45:03 +0200 Subject: [PATCH 04/47] Fix narrowing of unary minus. --- src/lj_opt_narrow.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c index ef0599c94..ba425334a 100644 --- a/src/lj_opt_narrow.c +++ b/src/lj_opt_narrow.c @@ -551,8 +551,13 @@ TRef lj_opt_narrow_unm(jit_State *J, TRef rc, TValue *vc) { rc = conv_str_tonum(J, rc, vc); if (tref_isinteger(rc)) { - if ((uint32_t)numberVint(vc) != 0x80000000u) - return emitir(IRTGI(IR_SUBOV), lj_ir_kint(J, 0), rc); + uint32_t k = (uint32_t)numberVint(vc); + if ((LJ_DUALNUM || k != 0) && k != 0x80000000u) { + TRef zero = lj_ir_kint(J, 0); + if (!LJ_DUALNUM) + emitir(IRTGI(IR_NE), rc, zero); + return emitir(IRTGI(IR_SUBOV), zero, rc); + } rc = emitir(IRTN(IR_CONV), rc, IRCONV_NUM_INT); } return emitir(IRTN(IR_NEG), rc, lj_ir_ksimd(J, LJ_KSIMD_NEG)); From 03208c8162af9cc01ca76ee1676ca79e5abe9b60 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Fri, 22 May 2020 02:45:47 +0200 Subject: [PATCH 05/47] Fix math.min()/math.max() inconsistencies. --- src/lj_asm_arm.h | 6 ++-- src/lj_asm_arm64.h | 6 ++-- src/lj_asm_mips.h | 6 ++-- src/lj_asm_ppc.h | 5 ++-- src/lj_opt_fold.c | 53 +++++++++++++---------------------- src/lj_vmmath.c | 4 +-- src/vm_arm.dasc | 4 +-- src/vm_arm64.dasc | 4 +-- src/vm_mips.dasc | 69 ++++++++++++++++++++++++++++++++++++++-------- src/vm_mips64.dasc | 68 ++++++++++++++++++++++++++++++++------------- src/vm_ppc.dasc | 14 +++++----- src/vm_x64.dasc | 2 +- src/vm_x86.dasc | 2 +- 13 files changed, 151 insertions(+), 92 deletions(-) diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h index 9d055c814..d2579349c 100644 --- a/src/lj_asm_arm.h +++ b/src/lj_asm_arm.h @@ -1659,8 +1659,8 @@ static void asm_min_max(ASMState *as, IRIns *ir, int cc, int fcc) asm_intmin_max(as, ir, cc); } -#define asm_min(as, ir) asm_min_max(as, ir, CC_GT, CC_HI) -#define asm_max(as, ir) asm_min_max(as, ir, CC_LT, CC_LO) +#define asm_min(as, ir) asm_min_max(as, ir, CC_GT, CC_PL) +#define asm_max(as, ir) asm_min_max(as, ir, CC_LT, CC_LE) /* -- Comparisons --------------------------------------------------------- */ @@ -1852,7 +1852,7 @@ static void asm_hiop(ASMState *as, IRIns *ir) } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) { as->curins--; /* Always skip the loword min/max. */ if (uselo || usehi) - asm_sfpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_HI : CC_LO); + asm_sfpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_PL : CC_LE); return; #elif LJ_HASFFI } else if ((ir-1)->o == IR_CONV) { diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h index 624cc2da1..f640b91b2 100644 --- a/src/lj_asm_arm64.h +++ b/src/lj_asm_arm64.h @@ -1598,7 +1598,7 @@ static void asm_fpmin_max(ASMState *as, IRIns *ir, A64CC fcc) Reg dest = (ra_dest(as, ir, RSET_FPR) & 31); Reg right, left = ra_alloc2(as, ir, RSET_FPR); right = ((left >> 8) & 31); left &= 31; - emit_dnm(as, A64I_FCSELd | A64F_CC(fcc), dest, left, right); + emit_dnm(as, A64I_FCSELd | A64F_CC(fcc), dest, right, left); emit_nm(as, A64I_FCMPd, left, right); } @@ -1610,8 +1610,8 @@ static void asm_min_max(ASMState *as, IRIns *ir, A64CC cc, A64CC fcc) asm_intmin_max(as, ir, cc); } -#define asm_max(as, ir) asm_min_max(as, ir, CC_GT, CC_HI) -#define asm_min(as, ir) asm_min_max(as, ir, CC_LT, CC_LO) +#define asm_min(as, ir) asm_min_max(as, ir, CC_LT, CC_PL) +#define asm_max(as, ir) asm_min_max(as, ir, CC_GT, CC_LE) /* -- Comparisons --------------------------------------------------------- */ diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h index 9309b7819..a242904e0 100644 --- a/src/lj_asm_mips.h +++ b/src/lj_asm_mips.h @@ -2121,12 +2121,12 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax) right = (left >> 8); left &= 255; #if !LJ_TARGET_MIPSR6 if (dest == left) { - emit_fg(as, MIPSI_MOVT_D, dest, right); + emit_fg(as, MIPSI_MOVF_D, dest, right); } else { - emit_fg(as, MIPSI_MOVF_D, dest, left); + emit_fg(as, MIPSI_MOVT_D, dest, left); if (dest != right) emit_fg(as, MIPSI_MOV_D, dest, right); } - emit_fgh(as, MIPSI_C_OLT_D, 0, ismax ? left : right, ismax ? right : left); + emit_fgh(as, MIPSI_C_OLT_D, 0, ismax ? right : left, ismax ? left : right); #else emit_fgh(as, ismax ? MIPSI_MAX_D : MIPSI_MIN_D, dest, left, right); #endif diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h index 8fa8c8ef6..afcd6b7a0 100644 --- a/src/lj_asm_ppc.h +++ b/src/lj_asm_ppc.h @@ -1724,9 +1724,8 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax) if (tmp == left || tmp == right) tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_FPR, dest), left), right)); - emit_facb(as, PPCI_FSEL, dest, tmp, - ismax ? left : right, ismax ? right : left); - emit_fab(as, PPCI_FSUB, tmp, left, right); + emit_facb(as, PPCI_FSEL, dest, tmp, left, right); + emit_fab(as, PPCI_FSUB, tmp, ismax ? left : right, ismax ? right : left); } else { Reg dest = ra_dest(as, ir, RSET_GPR); Reg tmp1 = RID_TMP, tmp2 = dest; diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index b4d05a263..cefd69c8f 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c @@ -1774,8 +1774,6 @@ LJFOLDF(reassoc_intarith_k64) #endif } -LJFOLD(MIN MIN any) -LJFOLD(MAX MAX any) LJFOLD(BAND BAND any) LJFOLD(BOR BOR any) LJFOLDF(reassoc_dup) @@ -1785,6 +1783,15 @@ LJFOLDF(reassoc_dup) return NEXTFOLD; } +LJFOLD(MIN MIN any) +LJFOLD(MAX MAX any) +LJFOLDF(reassoc_dup_minmax) +{ + if (fins->op2 == fleft->op2) + return LEFTFOLD; /* (a o b) o b ==> a o b */ + return NEXTFOLD; +} + LJFOLD(BXOR BXOR any) LJFOLDF(reassoc_bxor) { @@ -1823,23 +1830,12 @@ LJFOLDF(reassoc_shift) return NEXTFOLD; } -LJFOLD(MIN MIN KNUM) -LJFOLD(MAX MAX KNUM) LJFOLD(MIN MIN KINT) LJFOLD(MAX MAX KINT) LJFOLDF(reassoc_minmax_k) { IRIns *irk = IR(fleft->op2); - if (irk->o == IR_KNUM) { - lua_Number a = ir_knum(irk)->n; - lua_Number y = lj_vm_foldarith(a, knumright, fins->o - IR_ADD); - if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */ - return LEFTFOLD; - PHIBARRIER(fleft); - fins->op1 = fleft->op1; - fins->op2 = (IRRef1)lj_ir_knum(J, y); - return RETRYFOLD; /* (x o k1) o k2 ==> x o (k1 o k2) */ - } else if (irk->o == IR_KINT) { + if (irk->o == IR_KINT) { int32_t a = irk->i; int32_t y = kfold_intop(a, fright->i, fins->o); if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */ @@ -1852,24 +1848,6 @@ LJFOLDF(reassoc_minmax_k) return NEXTFOLD; } -LJFOLD(MIN MAX any) -LJFOLD(MAX MIN any) -LJFOLDF(reassoc_minmax_left) -{ - if (fins->op2 == fleft->op1 || fins->op2 == fleft->op2) - return RIGHTFOLD; /* (b o1 a) o2 b ==> b; (a o1 b) o2 b ==> b */ - return NEXTFOLD; -} - -LJFOLD(MIN any MAX) -LJFOLD(MAX any MIN) -LJFOLDF(reassoc_minmax_right) -{ - if (fins->op1 == fright->op1 || fins->op1 == fright->op2) - return LEFTFOLD; /* a o2 (a o1 b) ==> a; a o2 (b o1 a) ==> a */ - return NEXTFOLD; -} - /* -- Array bounds check elimination -------------------------------------- */ /* Eliminate ABC across PHIs to handle t[i-1] forwarding case. @@ -1995,8 +1973,6 @@ LJFOLDF(comm_comp) LJFOLD(BAND any any) LJFOLD(BOR any any) -LJFOLD(MIN any any) -LJFOLD(MAX any any) LJFOLDF(comm_dup) { if (fins->op1 == fins->op2) /* x o x ==> x */ @@ -2004,6 +1980,15 @@ LJFOLDF(comm_dup) return fold_comm_swap(J); } +LJFOLD(MIN any any) +LJFOLD(MAX any any) +LJFOLDF(comm_dup_minmax) +{ + if (fins->op1 == fins->op2) /* x o x ==> x */ + return LEFTFOLD; + return NEXTFOLD; +} + LJFOLD(BXOR any any) LJFOLDF(comm_bxor) { diff --git a/src/lj_vmmath.c b/src/lj_vmmath.c index 2a41bcaae..e89405d7d 100644 --- a/src/lj_vmmath.c +++ b/src/lj_vmmath.c @@ -50,8 +50,8 @@ double lj_vm_foldarith(double x, double y, int op) #if LJ_HASJIT case IR_ATAN2 - IR_ADD: return atan2(x, y); break; case IR_LDEXP - IR_ADD: return ldexp(x, (int)y); break; - case IR_MIN - IR_ADD: return x > y ? y : x; break; - case IR_MAX - IR_ADD: return x < y ? y : x; break; + case IR_MIN - IR_ADD: return x < y ? x : y; break; + case IR_MAX - IR_ADD: return x > y ? x : y; break; #endif default: return x; } diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc index 5bbdbbff7..013688fbe 100644 --- a/src/vm_arm.dasc +++ b/src/vm_arm.dasc @@ -1716,8 +1716,8 @@ static void build_subroutines(BuildCtx *ctx) |.endif |.endmacro | - | math_minmax math_min, gt, hi - | math_minmax math_max, lt, lo + | math_minmax math_min, gt, pl + | math_minmax math_max, lt, le | |//-- String library ----------------------------------------------------- | diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc index 62946373f..c157696ca 100644 --- a/src/vm_arm64.dasc +++ b/src/vm_arm64.dasc @@ -1489,8 +1489,8 @@ static void build_subroutines(BuildCtx *ctx) | b <6 |.endmacro | - | math_minmax math_min, gt, hi - | math_minmax math_max, lt, lo + | math_minmax math_min, gt, pl + | math_minmax math_max, lt, le | |//-- String library ----------------------------------------------------- | diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc index 37506139b..0c84c13b6 100644 --- a/src/vm_mips.dasc +++ b/src/vm_mips.dasc @@ -1768,7 +1768,7 @@ static void build_subroutines(BuildCtx *ctx) | b ->fff_res |. li RD, (2+1)*8 | - |.macro math_minmax, name, intins, fpins + |.macro math_minmax, name, intins, ismax | .ffunc_1 name | addu TMP3, BASE, NARGS8:RC | bne SFARG1HI, TISNUM, >5 @@ -1822,13 +1822,21 @@ static void build_subroutines(BuildCtx *ctx) |.endif |7: |.if FPU + |.if ismax + | c.olt.d FARG1, FRET1 + |.else | c.olt.d FRET1, FARG1 - | fpins FRET1, FARG1 + |.endif + | movf.d FRET1, FARG1 + |.else + |.if ismax + | bal ->vm_sfcmpogt |.else | bal ->vm_sfcmpolt + |.endif |. nop - | intins SFARG1LO, SFARG2LO, CRET1 - | intins SFARG1HI, SFARG2HI, CRET1 + | movz SFARG1LO, SFARG2LO, CRET1 + | movz SFARG1HI, SFARG2HI, CRET1 |.endif | b <6 |. addiu TMP2, TMP2, 8 @@ -1849,8 +1857,8 @@ static void build_subroutines(BuildCtx *ctx) | |.endmacro | - | math_minmax math_min, movz, movf.d - | math_minmax math_max, movn, movt.d + | math_minmax math_min, movz, 0 + | math_minmax math_max, movn, 1 | |//-- String library ----------------------------------------------------- | @@ -2692,6 +2700,43 @@ static void build_subroutines(BuildCtx *ctx) |. move CRET1, CRET2 |.endif | + |->vm_sfcmpogt: + |.if not FPU + | sll AT, SFARG2HI, 1 + | sll TMP0, SFARG1HI, 1 + | or CRET1, SFARG2LO, SFARG1LO + | or TMP1, AT, TMP0 + | or TMP1, TMP1, CRET1 + | beqz TMP1, >8 // Both args +-0: return 0. + |. sltu CRET1, r0, SFARG2LO + | lui TMP1, 0xffe0 + | addu AT, AT, CRET1 + | sltu CRET1, r0, SFARG1LO + | sltu AT, TMP1, AT + | addu TMP0, TMP0, CRET1 + | sltu TMP0, TMP1, TMP0 + | or TMP1, AT, TMP0 + | bnez TMP1, >9 // Either arg is NaN: return 0 or 1; + |. and AT, SFARG2HI, SFARG1HI + | bltz AT, >5 // Both args negative? + |. nop + | beq SFARG2HI, SFARG1HI, >8 + |. sltu CRET1, SFARG2LO, SFARG1LO + | jr ra + |. slt CRET1, SFARG2HI, SFARG1HI + |5: // Swap conditions if both operands are negative. + | beq SFARG2HI, SFARG1HI, >8 + |. sltu CRET1, SFARG1LO, SFARG2LO + | jr ra + |. slt CRET1, SFARG1HI, SFARG2HI + |8: + | jr ra + |. nop + |9: + | jr ra + |. li CRET1, 0 + |.endif + | |// Soft-float comparison. Equivalent to c.ole.d a, b or c.ole.d b, a. |// Input: SFARG*, TMP3. Output: CRET1. Temporaries: AT, TMP0, TMP1. |->vm_sfcmpolex: @@ -2734,24 +2779,24 @@ static void build_subroutines(BuildCtx *ctx) |. li CRET1, 0 |.endif | - |.macro sfmin_max, name, intins + |.macro sfmin_max, name, fpcall |->vm_sf .. name: |.if JIT and not FPU | move TMP2, ra - | bal ->vm_sfcmpolt + | bal ->fpcall |. nop | move TMP0, CRET1 | move SFRETHI, SFARG1HI | move SFRETLO, SFARG1LO | move ra, TMP2 - | intins SFRETHI, SFARG2HI, TMP0 + | movz SFRETHI, SFARG2HI, TMP0 | jr ra - |. intins SFRETLO, SFARG2LO, TMP0 + |. movz SFRETLO, SFARG2LO, TMP0 |.endif |.endmacro | - | sfmin_max min, movz - | sfmin_max max, movn + | sfmin_max min, vm_sfcmpolt + | sfmin_max max, vm_sfcmpogt | |//----------------------------------------------------------------------- |//-- Miscellaneous functions -------------------------------------------- diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc index 4ae19b7d9..dac143a43 100644 --- a/src/vm_mips64.dasc +++ b/src/vm_mips64.dasc @@ -1852,18 +1852,26 @@ static void build_subroutines(BuildCtx *ctx) |.if MIPSR6 | fpins FRET1, FRET1, FARG1 |.else + |.if fpins // ismax + | c.olt.d FARG1, FRET1 + |.else | c.olt.d FRET1, FARG1 - | fpins FRET1, FARG1 |.endif + | movf.d FRET1, FARG1 + |.endif + |.else + |.if fpins // ismax + | bal ->vm_sfcmpogt |.else | bal ->vm_sfcmpolt + |.endif |. nop |.if MIPSR6 - | intins AT, CARG2, CRET1 - | intinsc CARG1, CARG1, CRET1 + | seleqz AT, CARG2, CRET1 + | selnez CARG1, CARG1, CRET1 | or CARG1, CARG1, AT |.else - | intins CARG1, CARG2, CRET1 + | movz CARG1, CARG2, CRET1 |.endif |.endif | b <6 @@ -1889,8 +1897,8 @@ static void build_subroutines(BuildCtx *ctx) | math_minmax math_min, seleqz, selnez, min.d | math_minmax math_max, selnez, seleqz, max.d |.else - | math_minmax math_min, movz, _, movf.d - | math_minmax math_max, movn, _, movt.d + | math_minmax math_min, movz, _, 0 + | math_minmax math_max, movn, _, 1 |.endif | |//-- String library ----------------------------------------------------- @@ -2108,7 +2116,6 @@ static void build_subroutines(BuildCtx *ctx) | dsllv CRET2, CRET2, TMP0 // Integer check. | sextw AT, CRET1 | xor AT, CRET1, AT // Range check. - | jr ra |.if MIPSR6 | seleqz AT, AT, CRET2 | selnez CRET2, CRET2, CRET2 @@ -2809,6 +2816,34 @@ static void build_subroutines(BuildCtx *ctx) |. move CRET1, CRET2 |.endif | + |->vm_sfcmpogt: + |.if not FPU + | dsll AT, CARG2, 1 + | dsll TMP0, CARG1, 1 + | or TMP1, AT, TMP0 + | beqz TMP1, >8 // Both args +-0: return 0. + |. lui TMP1, 0xffe0 + | dsll TMP1, TMP1, 32 + | sltu AT, TMP1, AT + | sltu TMP0, TMP1, TMP0 + | or TMP1, AT, TMP0 + | bnez TMP1, >9 // Either arg is NaN: return 0 or 1; + |. and AT, CARG2, CARG1 + | bltz AT, >5 // Both args negative? + |. nop + | jr ra + |. slt CRET1, CARG2, CARG1 + |5: // Swap conditions if both operands are negative. + | jr ra + |. slt CRET1, CARG1, CARG2 + |8: + | jr ra + |. li CRET1, 0 + |9: + | jr ra + |. li CRET1, 0 + |.endif + | |// Soft-float comparison. Equivalent to c.ole.d a, b or c.ole.d b, a. |// Input: CARG1, CARG2, TMP3. Output: CRET1. Temporaries: AT, TMP0, TMP1. |->vm_sfcmpolex: @@ -2840,34 +2875,29 @@ static void build_subroutines(BuildCtx *ctx) |. li CRET1, 0 |.endif | - |.macro sfmin_max, name, intins, intinsc + |.macro sfmin_max, name, fpcall |->vm_sf .. name: |.if JIT and not FPU | move TMP2, ra - | bal ->vm_sfcmpolt + | bal ->fpcall |. nop | move ra, TMP2 | move TMP0, CRET1 | move CRET1, CARG1 |.if MIPSR6 - | intins CRET1, CRET1, TMP0 - | intinsc TMP0, CARG2, TMP0 + | selnez CRET1, CRET1, TMP0 + | seleqz TMP0, CARG2, TMP0 | jr ra |. or CRET1, CRET1, TMP0 |.else | jr ra - |. intins CRET1, CARG2, TMP0 + |. movz CRET1, CARG2, TMP0 |.endif |.endif |.endmacro | - |.if MIPSR6 - | sfmin_max min, selnez, seleqz - | sfmin_max max, seleqz, selnez - |.else - | sfmin_max min, movz, _ - | sfmin_max max, movn, _ - |.endif + | sfmin_max min, vm_sfcmpolt + | sfmin_max max, vm_sfcmpogt | |//----------------------------------------------------------------------- |//-- Miscellaneous functions -------------------------------------------- diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc index a66e30b5e..7a2d321ed 100644 --- a/src/vm_ppc.dasc +++ b/src/vm_ppc.dasc @@ -2309,12 +2309,12 @@ static void build_subroutines(BuildCtx *ctx) |6: | addi SAVE0, SAVE0, 8 |.if FPU - | fsub f0, FARG1, FARG2 |.if ismax - | fsel FARG1, f0, FARG1, FARG2 + | fsub f0, FARG1, FARG2 |.else - | fsel FARG1, f0, FARG2, FARG1 + | fsub f0, FARG2, FARG1 |.endif + | fsel FARG1, f0, FARG1, FARG2 |.else | stw CARG1, SFSAVE_1 | stw CARG2, SFSAVE_2 @@ -2354,13 +2354,13 @@ static void build_subroutines(BuildCtx *ctx) | checknum CARG2 | bge cr1, ->fff_resn | bge ->fff_fallback - | fsub f0, FARG1, FARG2 - | addi TMP1, TMP1, 8 |.if ismax - | fsel FARG1, f0, FARG1, FARG2 + | fsub f0, FARG1, FARG2 |.else - | fsel FARG1, f0, FARG2, FARG1 + | fsub f0, FARG2, FARG1 |.endif + | addi TMP1, TMP1, 8 + | fsel FARG1, f0, FARG1, FARG2 | b <1 |.endif |.endmacro diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc index a5749b17a..c714f4c7b 100644 --- a/src/vm_x64.dasc +++ b/src/vm_x64.dasc @@ -1840,7 +1840,7 @@ static void build_subroutines(BuildCtx *ctx) | jmp ->fff_res | |.macro math_minmax, name, cmovop, sseop - | .ffunc name + | .ffunc_1 name | mov RAd, 2 |.if DUALNUM | mov RB, [BASE] diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc index 1965b06ba..c3999e7c7 100644 --- a/src/vm_x86.dasc +++ b/src/vm_x86.dasc @@ -2233,7 +2233,7 @@ static void build_subroutines(BuildCtx *ctx) | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0. | |.macro math_minmax, name, cmovop, sseop - | .ffunc name + | .ffunc_1 name | mov RA, 2 | cmp dword [BASE+4], LJ_TISNUM |.if DUALNUM From d75e26275bdcfe95283b761ef9405841ef2d406f Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Fri, 22 May 2020 03:10:50 +0200 Subject: [PATCH 06/47] Don't compile math.modf() anymore. It's rarely used and properly compiling it would be difficult. --- src/lib_math.c | 2 +- src/lj_ffrecord.c | 16 ---------------- 2 files changed, 1 insertion(+), 17 deletions(-) diff --git a/src/lib_math.c b/src/lib_math.c index 3fd466cad..02aa21d76 100644 --- a/src/lib_math.c +++ b/src/lib_math.c @@ -45,7 +45,7 @@ LJLIB_ASM_(math_sinh) LJLIB_REC(math_htrig IRCALL_sinh) LJLIB_ASM_(math_cosh) LJLIB_REC(math_htrig IRCALL_cosh) LJLIB_ASM_(math_tanh) LJLIB_REC(math_htrig IRCALL_tanh) LJLIB_ASM_(math_frexp) -LJLIB_ASM_(math_modf) LJLIB_REC(.) +LJLIB_ASM_(math_modf) LJLIB_ASM(math_log) LJLIB_REC(math_log) { diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c index 5282217f9..436d5037f 100644 --- a/src/lj_ffrecord.c +++ b/src/lj_ffrecord.c @@ -601,22 +601,6 @@ static void LJ_FASTCALL recff_math_htrig(jit_State *J, RecordFFData *rd) J->base[0] = emitir(IRTN(IR_CALLN), tr, rd->data); } -static void LJ_FASTCALL recff_math_modf(jit_State *J, RecordFFData *rd) -{ - TRef tr = J->base[0]; - if (tref_isinteger(tr)) { - J->base[0] = tr; - J->base[1] = lj_ir_kint(J, 0); - } else { - TRef trt; - tr = lj_ir_tonum(J, tr); - trt = emitir(IRTN(IR_FPMATH), tr, IRFPM_TRUNC); - J->base[0] = trt; - J->base[1] = emitir(IRTN(IR_SUB), tr, trt); - } - rd->nres = 2; -} - static void LJ_FASTCALL recff_math_pow(jit_State *J, RecordFFData *rd) { J->base[0] = lj_opt_narrow_pow(J, J->base[0], J->base[1], From 2f3f07882fb4ad9c64967d7088461b1ca0a25d3a Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Fri, 22 May 2020 03:28:52 +0200 Subject: [PATCH 07/47] Fix bytecode register allocation for comparisons. --- src/lj_parse.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/lj_parse.c b/src/lj_parse.c index 74dd5706c..e18f4bfba 100644 --- a/src/lj_parse.c +++ b/src/lj_parse.c @@ -851,9 +851,12 @@ static void bcemit_comp(FuncState *fs, BinOpr opr, ExpDesc *e1, ExpDesc *e2) e1 = e2; e2 = eret; /* Swap operands. */ op = ((op-BC_ISLT)^3)+BC_ISLT; expr_toval(fs, e1); + ra = expr_toanyreg(fs, e1); + rd = expr_toanyreg(fs, e2); + } else { + rd = expr_toanyreg(fs, e2); + ra = expr_toanyreg(fs, e1); } - rd = expr_toanyreg(fs, e2); - ra = expr_toanyreg(fs, e1); ins = BCINS_AD(op, ra, rd); } /* Using expr_free might cause asserts if the order is wrong. */ From 5655be4546d9177890c69f0d0accac4773ff0887 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Fri, 22 May 2020 04:53:35 +0200 Subject: [PATCH 08/47] Cleanup math function compilation and fix inconsistencies. --- src/lib_math.c | 22 +++++++++++----------- src/lj_asm.c | 8 +------- src/lj_asm_arm.h | 1 - src/lj_asm_arm64.h | 1 - src/lj_asm_mips.h | 1 - src/lj_asm_ppc.h | 1 - src/lj_asm_x86.h | 2 -- src/lj_ffrecord.c | 19 ++----------------- src/lj_ir.h | 4 +--- src/lj_ircall.h | 14 +++++++++----- src/lj_opt_fold.c | 25 ++++++++++++++++++++++++- src/lj_opt_split.c | 3 --- src/lj_target_x86.h | 6 ------ src/lj_vmmath.c | 6 ------ 14 files changed, 48 insertions(+), 65 deletions(-) diff --git a/src/lib_math.c b/src/lib_math.c index 02aa21d76..4cc2ba6e2 100644 --- a/src/lib_math.c +++ b/src/lib_math.c @@ -33,17 +33,17 @@ LJLIB_ASM(math_sqrt) LJLIB_REC(math_unary IRFPM_SQRT) lj_lib_checknum(L, 1); return FFH_RETRY; } -LJLIB_ASM_(math_log10) LJLIB_REC(math_unary IRFPM_LOG10) -LJLIB_ASM_(math_exp) LJLIB_REC(math_unary IRFPM_EXP) -LJLIB_ASM_(math_sin) LJLIB_REC(math_unary IRFPM_SIN) -LJLIB_ASM_(math_cos) LJLIB_REC(math_unary IRFPM_COS) -LJLIB_ASM_(math_tan) LJLIB_REC(math_unary IRFPM_TAN) -LJLIB_ASM_(math_asin) LJLIB_REC(math_atrig FF_math_asin) -LJLIB_ASM_(math_acos) LJLIB_REC(math_atrig FF_math_acos) -LJLIB_ASM_(math_atan) LJLIB_REC(math_atrig FF_math_atan) -LJLIB_ASM_(math_sinh) LJLIB_REC(math_htrig IRCALL_sinh) -LJLIB_ASM_(math_cosh) LJLIB_REC(math_htrig IRCALL_cosh) -LJLIB_ASM_(math_tanh) LJLIB_REC(math_htrig IRCALL_tanh) +LJLIB_ASM_(math_log10) LJLIB_REC(math_call IRCALL_log10) +LJLIB_ASM_(math_exp) LJLIB_REC(math_call IRCALL_exp) +LJLIB_ASM_(math_sin) LJLIB_REC(math_call IRCALL_sin) +LJLIB_ASM_(math_cos) LJLIB_REC(math_call IRCALL_cos) +LJLIB_ASM_(math_tan) LJLIB_REC(math_call IRCALL_tan) +LJLIB_ASM_(math_asin) LJLIB_REC(math_call IRCALL_asin) +LJLIB_ASM_(math_acos) LJLIB_REC(math_call IRCALL_acos) +LJLIB_ASM_(math_atan) LJLIB_REC(math_call IRCALL_atan) +LJLIB_ASM_(math_sinh) LJLIB_REC(math_call IRCALL_sinh) +LJLIB_ASM_(math_cosh) LJLIB_REC(math_call IRCALL_cosh) +LJLIB_ASM_(math_tanh) LJLIB_REC(math_call IRCALL_tanh) LJLIB_ASM_(math_frexp) LJLIB_ASM_(math_modf) diff --git a/src/lj_asm.c b/src/lj_asm.c index 68d28fb07..20d637311 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -1657,14 +1657,13 @@ static void asm_ir(ASMState *as, IRIns *ir) case IR_NEG: asm_neg(as, ir); break; #if LJ_SOFTFP32 case IR_DIV: case IR_POW: case IR_ABS: - case IR_ATAN2: case IR_LDEXP: case IR_FPMATH: case IR_TOBIT: + case IR_LDEXP: case IR_FPMATH: case IR_TOBIT: lua_assert(0); /* Unused for LJ_SOFTFP32. */ break; #else case IR_DIV: asm_div(as, ir); break; case IR_POW: asm_pow(as, ir); break; case IR_ABS: asm_abs(as, ir); break; - case IR_ATAN2: asm_atan2(as, ir); break; case IR_LDEXP: asm_ldexp(as, ir); break; case IR_FPMATH: asm_fpmath(as, ir); break; case IR_TOBIT: asm_tobit(as, ir); break; @@ -2158,11 +2157,6 @@ static void asm_setup_regsp(ASMState *as) as->modset = RSET_SCRATCH; break; #if !LJ_SOFTFP - case IR_ATAN2: -#if LJ_TARGET_X86 - if (as->evenspill < 4) /* Leave room to call atan2(). */ - as->evenspill = 4; -#endif #if !LJ_TARGET_X86ORX64 case IR_LDEXP: #endif diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h index d2579349c..ccb8ccb6e 100644 --- a/src/lj_asm_arm.h +++ b/src/lj_asm_arm.h @@ -1510,7 +1510,6 @@ static void asm_mul(ASMState *as, IRIns *ir) #define asm_div(as, ir) asm_fparith(as, ir, ARMI_VDIV_D) #define asm_pow(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi) #define asm_abs(as, ir) asm_fpunary(as, ir, ARMI_VABS_D) -#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2) #define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) #endif diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h index f640b91b2..da857355c 100644 --- a/src/lj_asm_arm64.h +++ b/src/lj_asm_arm64.h @@ -1455,7 +1455,6 @@ static void asm_pow(ASMState *as, IRIns *ir) #define asm_mulov(as, ir) asm_mul(as, ir) #define asm_abs(as, ir) asm_fpunary(as, ir, A64I_FABS) -#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2) #define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) static void asm_mod(ASMState *as, IRIns *ir) diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h index a242904e0..8b5efc35c 100644 --- a/src/lj_asm_mips.h +++ b/src/lj_asm_mips.h @@ -1838,7 +1838,6 @@ static void asm_abs(ASMState *as, IRIns *ir) } #endif -#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2) #define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) static void asm_arithov(ASMState *as, IRIns *ir) diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h index afcd6b7a0..d9e4ad04c 100644 --- a/src/lj_asm_ppc.h +++ b/src/lj_asm_ppc.h @@ -1387,7 +1387,6 @@ static void asm_neg(ASMState *as, IRIns *ir) } #define asm_abs(as, ir) asm_fpunary(as, ir, PPCI_FABS) -#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2) #define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi) diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index bf818f5a1..d5cd6326a 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h @@ -1957,8 +1957,6 @@ static void asm_fpmath(ASMState *as, IRIns *ir) } } -#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2) - static void asm_ldexp(ASMState *as, IRIns *ir) { int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */ diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c index 436d5037f..42049511c 100644 --- a/src/lj_ffrecord.c +++ b/src/lj_ffrecord.c @@ -563,7 +563,7 @@ static void LJ_FASTCALL recff_math_atan2(jit_State *J, RecordFFData *rd) { TRef tr = lj_ir_tonum(J, J->base[0]); TRef tr2 = lj_ir_tonum(J, J->base[1]); - J->base[0] = emitir(IRTN(IR_ATAN2), tr, tr2); + J->base[0] = lj_ir_call(J, IRCALL_atan2, tr, tr2); UNUSED(rd); } @@ -580,22 +580,7 @@ static void LJ_FASTCALL recff_math_ldexp(jit_State *J, RecordFFData *rd) UNUSED(rd); } -/* Record math.asin, math.acos, math.atan. */ -static void LJ_FASTCALL recff_math_atrig(jit_State *J, RecordFFData *rd) -{ - TRef y = lj_ir_tonum(J, J->base[0]); - TRef x = lj_ir_knum_one(J); - uint32_t ffid = rd->data; - if (ffid != FF_math_atan) { - TRef tmp = emitir(IRTN(IR_MUL), y, y); - tmp = emitir(IRTN(IR_SUB), x, tmp); - tmp = emitir(IRTN(IR_FPMATH), tmp, IRFPM_SQRT); - if (ffid == FF_math_asin) { x = tmp; } else { x = y; y = tmp; } - } - J->base[0] = emitir(IRTN(IR_ATAN2), y, x); -} - -static void LJ_FASTCALL recff_math_htrig(jit_State *J, RecordFFData *rd) +static void LJ_FASTCALL recff_math_call(jit_State *J, RecordFFData *rd) { TRef tr = lj_ir_tonum(J, J->base[0]); J->base[0] = emitir(IRTN(IR_CALLN), tr, rd->data); diff --git a/src/lj_ir.h b/src/lj_ir.h index 6bbe0a338..60e335c2d 100644 --- a/src/lj_ir.h +++ b/src/lj_ir.h @@ -75,7 +75,6 @@ _(NEG, N , ref, ref) \ \ _(ABS, N , ref, ref) \ - _(ATAN2, N , ref, ref) \ _(LDEXP, N , ref, ref) \ _(MIN, C , ref, ref) \ _(MAX, C , ref, ref) \ @@ -178,8 +177,7 @@ LJ_STATIC_ASSERT((int)IR_XLOAD + IRDELTA_L2S == (int)IR_XSTORE); /* FPMATH sub-functions. ORDER FPM. */ #define IRFPMDEF(_) \ _(FLOOR) _(CEIL) _(TRUNC) /* Must be first and in this order. */ \ - _(SQRT) _(EXP) _(EXP2) _(LOG) _(LOG2) _(LOG10) \ - _(SIN) _(COS) _(TAN) \ + _(SQRT) _(EXP2) _(LOG) _(LOG2) \ _(OTHER) typedef enum { diff --git a/src/lj_ircall.h b/src/lj_ircall.h index f4f3f7813..35c02dc02 100644 --- a/src/lj_ircall.h +++ b/src/lj_ircall.h @@ -21,6 +21,7 @@ typedef struct CCallInfo { #define CCI_OTSHIFT 16 #define CCI_OPTYPE(ci) ((ci)->flags >> CCI_OTSHIFT) /* Get op/type. */ +#define CCI_TYPE(ci) (((ci)->flags>>CCI_OTSHIFT) & IRT_TYPE) #define CCI_OPSHIFT 24 #define CCI_OP(ci) ((ci)->flags >> CCI_OPSHIFT) /* Get op. */ @@ -172,6 +173,14 @@ typedef struct CCallInfo { _(ANY, lj_mem_newgco, 2, FS, PGC, CCI_L) \ _(ANY, lj_math_random_step, 1, FS, NUM, CCI_CASTU64) \ _(ANY, lj_vm_modi, 2, FN, INT, 0) \ + _(ANY, log10, 1, N, NUM, XA_FP) \ + _(ANY, exp, 1, N, NUM, XA_FP) \ + _(ANY, sin, 1, N, NUM, XA_FP) \ + _(ANY, cos, 1, N, NUM, XA_FP) \ + _(ANY, tan, 1, N, NUM, XA_FP) \ + _(ANY, asin, 1, N, NUM, XA_FP) \ + _(ANY, acos, 1, N, NUM, XA_FP) \ + _(ANY, atan, 1, N, NUM, XA_FP) \ _(ANY, sinh, 1, N, NUM, XA_FP) \ _(ANY, cosh, 1, N, NUM, XA_FP) \ _(ANY, tanh, 1, N, NUM, XA_FP) \ @@ -183,14 +192,9 @@ typedef struct CCallInfo { _(FPMATH, lj_vm_ceil, 1, N, NUM, XA_FP) \ _(FPMATH, lj_vm_trunc, 1, N, NUM, XA_FP) \ _(FPMATH, sqrt, 1, N, NUM, XA_FP) \ - _(ANY, exp, 1, N, NUM, XA_FP) \ _(ANY, lj_vm_exp2, 1, N, NUM, XA_FP) \ _(ANY, log, 1, N, NUM, XA_FP) \ _(ANY, lj_vm_log2, 1, N, NUM, XA_FP) \ - _(ANY, log10, 1, N, NUM, XA_FP) \ - _(ANY, sin, 1, N, NUM, XA_FP) \ - _(ANY, cos, 1, N, NUM, XA_FP) \ - _(ANY, tan, 1, N, NUM, XA_FP) \ _(ANY, lj_vm_powi, 2, N, NUM, XA_FP) \ _(ANY, pow, 2, N, NUM, XA2_FP) \ _(ANY, atan2, 2, N, NUM, XA2_FP) \ diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index cefd69c8f..ae65e15a5 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c @@ -173,7 +173,6 @@ LJFOLD(ADD KNUM KNUM) LJFOLD(SUB KNUM KNUM) LJFOLD(MUL KNUM KNUM) LJFOLD(DIV KNUM KNUM) -LJFOLD(ATAN2 KNUM KNUM) LJFOLD(LDEXP KNUM KNUM) LJFOLD(MIN KNUM KNUM) LJFOLD(MAX KNUM KNUM) @@ -213,6 +212,30 @@ LJFOLDF(kfold_fpmath) return lj_ir_knum(J, y); } +LJFOLD(CALLN KNUM any) +LJFOLDF(kfold_fpcall1) +{ + const CCallInfo *ci = &lj_ir_callinfo[fins->op2]; + if (CCI_TYPE(ci) == IRT_NUM) { + double y = ((double (*)(double))ci->func)(knumleft); + return lj_ir_knum(J, y); + } + return NEXTFOLD; +} + +LJFOLD(CALLN CARG IRCALL_atan2) +LJFOLDF(kfold_fpcall2) +{ + if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) { + const CCallInfo *ci = &lj_ir_callinfo[fins->op2]; + double a = ir_knum(IR(fleft->op1))->n; + double b = ir_knum(IR(fleft->op2))->n; + double y = ((double (*)(double, double))ci->func)(a, b); + return lj_ir_knum(J, y); + } + return NEXTFOLD; +} + LJFOLD(POW KNUM KINT) LJFOLDF(kfold_numpow) { diff --git a/src/lj_opt_split.c b/src/lj_opt_split.c index ee7cf0f9f..e526b49d0 100644 --- a/src/lj_opt_split.c +++ b/src/lj_opt_split.c @@ -426,9 +426,6 @@ static void split_ir(jit_State *J) } hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_floor + ir->op2); break; - case IR_ATAN2: - hi = split_call_ll(J, hisubst, oir, ir, IRCALL_atan2); - break; case IR_LDEXP: hi = split_call_li(J, hisubst, oir, ir, IRCALL_ldexp); break; diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h index 71c930fe8..fd72c71da 100644 --- a/src/lj_target_x86.h +++ b/src/lj_target_x86.h @@ -228,16 +228,10 @@ typedef enum { /* Note: little-endian byte-order! */ XI_FLDZ = 0xeed9, XI_FLD1 = 0xe8d9, - XI_FLDLG2 = 0xecd9, - XI_FLDLN2 = 0xedd9, XI_FDUP = 0xc0d9, /* Really fld st0. */ XI_FPOP = 0xd8dd, /* Really fstp st0. */ XI_FPOP1 = 0xd9dd, /* Really fstp st1. */ XI_FRNDINT = 0xfcd9, - XI_FSIN = 0xfed9, - XI_FCOS = 0xffd9, - XI_FPTAN = 0xf2d9, - XI_FPATAN = 0xf3d9, XI_FSCALE = 0xfdd9, XI_FYL2X = 0xf1d9, diff --git a/src/lj_vmmath.c b/src/lj_vmmath.c index e89405d7d..36178f293 100644 --- a/src/lj_vmmath.c +++ b/src/lj_vmmath.c @@ -48,7 +48,6 @@ double lj_vm_foldarith(double x, double y, int op) case IR_NEG - IR_ADD: return -x; break; case IR_ABS - IR_ADD: return fabs(x); break; #if LJ_HASJIT - case IR_ATAN2 - IR_ADD: return atan2(x, y); break; case IR_LDEXP - IR_ADD: return ldexp(x, (int)y); break; case IR_MIN - IR_ADD: return x < y ? x : y; break; case IR_MAX - IR_ADD: return x > y ? x : y; break; @@ -129,14 +128,9 @@ double lj_vm_foldfpm(double x, int fpm) case IRFPM_CEIL: return lj_vm_ceil(x); case IRFPM_TRUNC: return lj_vm_trunc(x); case IRFPM_SQRT: return sqrt(x); - case IRFPM_EXP: return exp(x); case IRFPM_EXP2: return lj_vm_exp2(x); case IRFPM_LOG: return log(x); case IRFPM_LOG2: return lj_vm_log2(x); - case IRFPM_LOG10: return log10(x); - case IRFPM_SIN: return sin(x); - case IRFPM_COS: return cos(x); - case IRFPM_TAN: return tan(x); default: lua_assert(0); } return 0; From b2307c8ad817e350d65cc909a579ca2f77439682 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sat, 23 May 2020 21:33:01 +0200 Subject: [PATCH 09/47] Remove pow() splitting and cleanup backends. --- src/lj_arch.h | 3 -- src/lj_asm.c | 106 ++++++++++++++++++++++++++++---------------- src/lj_asm_arm.h | 10 +---- src/lj_asm_arm64.h | 39 +--------------- src/lj_asm_mips.h | 38 +--------------- src/lj_asm_ppc.h | 9 +--- src/lj_asm_x86.h | 37 +--------------- src/lj_ir.h | 2 +- src/lj_ircall.h | 1 - src/lj_opt_fold.c | 18 ++++++-- src/lj_opt_narrow.c | 20 +++------ src/lj_opt_split.c | 21 --------- src/lj_vm.h | 5 --- src/lj_vmmath.c | 8 ---- 14 files changed, 95 insertions(+), 222 deletions(-) diff --git a/src/lj_arch.h b/src/lj_arch.h index 704268387..d65bc551e 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h @@ -586,9 +586,6 @@ #if defined(__ANDROID__) || defined(__symbian__) || LJ_TARGET_XBOX360 || LJ_TARGET_WINDOWS #define LUAJIT_NO_LOG2 #endif -#if defined(__symbian__) || LJ_TARGET_WINDOWS -#define LUAJIT_NO_EXP2 -#endif #if LJ_TARGET_CONSOLE || (LJ_TARGET_IOS && __IPHONE_OS_VERSION_MIN_REQUIRED >= __IPHONE_8_0) #define LJ_NO_SYSTEM 1 #endif diff --git a/src/lj_asm.c b/src/lj_asm.c index 20d637311..dd84a4f20 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -1308,32 +1308,6 @@ static void asm_call(ASMState *as, IRIns *ir) asm_gencall(as, ci, args); } -#if !LJ_SOFTFP32 -static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref) -{ - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow]; - IRRef args[2]; - args[0] = lref; - args[1] = rref; - asm_setupresult(as, ir, ci); - asm_gencall(as, ci, args); -} - -static int asm_fpjoin_pow(ASMState *as, IRIns *ir) -{ - IRIns *irp = IR(ir->op1); - if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { - IRIns *irpp = IR(irp->op1); - if (irpp == ir-2 && irpp->o == IR_FPMATH && - irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { - asm_fppow(as, ir, irpp->op1, irp->op2); - return 1; - } - } - return 0; -} -#endif - /* -- PHI and loop handling ----------------------------------------------- */ /* Break a PHI cycle by renaming to a free register (evict if needed). */ @@ -1604,6 +1578,62 @@ static void asm_loop(ASMState *as) #error "Missing assembler for target CPU" #endif +/* -- Common instruction helpers ------------------------------------------ */ + +#if !LJ_SOFTFP32 +#if !LJ_TARGET_X86ORX64 +#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) +#define asm_fppowi(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi) +#endif + +static void asm_pow(ASMState *as, IRIns *ir) +{ +#if LJ_64 && LJ_HASFFI + if (!irt_isnum(ir->t)) + asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : + IRCALL_lj_carith_powu64); + else +#endif + if (irt_isnum(IR(ir->op2)->t)) + asm_callid(as, ir, IRCALL_pow); + else + asm_fppowi(as, ir); +} + +static void asm_div(ASMState *as, IRIns *ir) +{ +#if LJ_64 && LJ_HASFFI + if (!irt_isnum(ir->t)) + asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : + IRCALL_lj_carith_divu64); + else +#endif + asm_fpdiv(as, ir); +} +#endif + +static void asm_mod(ASMState *as, IRIns *ir) +{ +#if LJ_64 && LJ_HASFFI + if (!irt_isint(ir->t)) + asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : + IRCALL_lj_carith_modu64); + else +#endif + asm_callid(as, ir, IRCALL_lj_vm_modi); +} + +static void asm_fuseequal(ASMState *as, IRIns *ir) +{ + /* Fuse HREF + EQ/NE. */ + if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) { + as->curins--; + asm_href(as, ir-1, (IROp)ir->o); + } else { + asm_equal(as, ir); + } +} + /* -- Instruction dispatch ------------------------------------------------ */ /* Assemble a single instruction. */ @@ -1626,14 +1656,7 @@ static void asm_ir(ASMState *as, IRIns *ir) case IR_ABC: asm_comp(as, ir); break; - case IR_EQ: case IR_NE: - if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) { - as->curins--; - asm_href(as, ir-1, (IROp)ir->o); - } else { - asm_equal(as, ir); - } - break; + case IR_EQ: case IR_NE: asm_fuseequal(as, ir); break; case IR_RETF: asm_retf(as, ir); break; @@ -1702,7 +1725,13 @@ static void asm_ir(ASMState *as, IRIns *ir) case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break; case IR_TNEW: asm_tnew(as, ir); break; case IR_TDUP: asm_tdup(as, ir); break; - case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break; + case IR_CNEW: case IR_CNEWI: +#if LJ_HASFFI + asm_cnew(as, ir); +#else + lua_assert(0); +#endif + break; /* Buffer operations. */ case IR_BUFHDR: asm_bufhdr(as, ir); break; @@ -2167,6 +2196,10 @@ static void asm_setup_regsp(ASMState *as) if (inloop) as->modset |= RSET_SCRATCH; #if LJ_TARGET_X86 + if (irt_isnum(IR(ir->op2)->t)) { + if (as->evenspill < 4) /* Leave room to call pow(). */ + as->evenspill = 4; + } break; #else ir->prev = REGSP_HINT(RID_FPRET); @@ -2192,9 +2225,6 @@ static void asm_setup_regsp(ASMState *as) continue; } break; - } else if (ir->op2 == IRFPM_EXP2 && !LJ_64) { - if (as->evenspill < 4) /* Leave room to call pow(). */ - as->evenspill = 4; } #endif if (inloop) diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h index ccb8ccb6e..f922ed0f5 100644 --- a/src/lj_asm_arm.h +++ b/src/lj_asm_arm.h @@ -1268,8 +1268,6 @@ static void asm_cnew(ASMState *as, IRIns *ir) ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), ra_releasetmp(as, ASMREF_TMP1)); } -#else -#define asm_cnew(as, ir) ((void)0) #endif /* -- Write barriers ------------------------------------------------------ */ @@ -1364,8 +1362,6 @@ static void asm_callround(ASMState *as, IRIns *ir, int id) static void asm_fpmath(ASMState *as, IRIns *ir) { - if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) - return; if (ir->op2 <= IRFPM_TRUNC) asm_callround(as, ir, ir->op2); else if (ir->op2 == IRFPM_SQRT) @@ -1507,14 +1503,10 @@ static void asm_mul(ASMState *as, IRIns *ir) #define asm_mulov(as, ir) asm_mul(as, ir) #if !LJ_SOFTFP -#define asm_div(as, ir) asm_fparith(as, ir, ARMI_VDIV_D) -#define asm_pow(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi) +#define asm_fpdiv(as, ir) asm_fparith(as, ir, ARMI_VDIV_D) #define asm_abs(as, ir) asm_fpunary(as, ir, ARMI_VABS_D) -#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) #endif -#define asm_mod(as, ir) asm_callid(as, ir, IRCALL_lj_vm_modi) - static void asm_neg(ASMState *as, IRIns *ir) { #if !LJ_SOFTFP diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h index da857355c..a35022239 100644 --- a/src/lj_asm_arm64.h +++ b/src/lj_asm_arm64.h @@ -1242,8 +1242,6 @@ static void asm_cnew(ASMState *as, IRIns *ir) ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), ra_releasetmp(as, ASMREF_TMP1)); } -#else -#define asm_cnew(as, ir) ((void)0) #endif /* -- Write barriers ------------------------------------------------------ */ @@ -1320,8 +1318,6 @@ static void asm_fpmath(ASMState *as, IRIns *ir) } else if (fpm <= IRFPM_TRUNC) { asm_fpunary(as, ir, fpm == IRFPM_FLOOR ? A64I_FRINTMd : fpm == IRFPM_CEIL ? A64I_FRINTPd : A64I_FRINTZd); - } else if (fpm == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) { - return; } else { asm_callid(as, ir, IRCALL_lj_vm_floor + fpm); } @@ -1428,45 +1424,12 @@ static void asm_mul(ASMState *as, IRIns *ir) asm_intmul(as, ir); } -static void asm_div(ASMState *as, IRIns *ir) -{ -#if LJ_HASFFI - if (!irt_isnum(ir->t)) - asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : - IRCALL_lj_carith_divu64); - else -#endif - asm_fparith(as, ir, A64I_FDIVd); -} - -static void asm_pow(ASMState *as, IRIns *ir) -{ -#if LJ_HASFFI - if (!irt_isnum(ir->t)) - asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : - IRCALL_lj_carith_powu64); - else -#endif - asm_callid(as, ir, IRCALL_lj_vm_powi); -} - #define asm_addov(as, ir) asm_add(as, ir) #define asm_subov(as, ir) asm_sub(as, ir) #define asm_mulov(as, ir) asm_mul(as, ir) +#define asm_fpdiv(as, ir) asm_fparith(as, ir, A64I_FDIVd) #define asm_abs(as, ir) asm_fpunary(as, ir, A64I_FABS) -#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) - -static void asm_mod(ASMState *as, IRIns *ir) -{ -#if LJ_HASFFI - if (!irt_isint(ir->t)) - asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : - IRCALL_lj_carith_modu64); - else -#endif - asm_callid(as, ir, IRCALL_lj_vm_modi); -} static void asm_neg(ASMState *as, IRIns *ir) { diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h index 8b5efc35c..6d898c5f5 100644 --- a/src/lj_asm_mips.h +++ b/src/lj_asm_mips.h @@ -1607,8 +1607,6 @@ static void asm_cnew(ASMState *as, IRIns *ir) ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), ra_releasetmp(as, ASMREF_TMP1)); } -#else -#define asm_cnew(as, ir) ((void)0) #endif /* -- Write barriers ------------------------------------------------------ */ @@ -1677,8 +1675,6 @@ static void asm_fpunary(ASMState *as, IRIns *ir, MIPSIns mi) #if !LJ_SOFTFP32 static void asm_fpmath(ASMState *as, IRIns *ir) { - if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) - return; #if !LJ_SOFTFP if (ir->op2 <= IRFPM_TRUNC) asm_callround(as, ir, IRCALL_lj_vm_floor + ir->op2); @@ -1766,41 +1762,13 @@ static void asm_mul(ASMState *as, IRIns *ir) } } -static void asm_mod(ASMState *as, IRIns *ir) -{ -#if LJ_64 && LJ_HASFFI - if (!irt_isint(ir->t)) - asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : - IRCALL_lj_carith_modu64); - else -#endif - asm_callid(as, ir, IRCALL_lj_vm_modi); -} - #if !LJ_SOFTFP32 -static void asm_pow(ASMState *as, IRIns *ir) -{ -#if LJ_64 && LJ_HASFFI - if (!irt_isnum(ir->t)) - asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : - IRCALL_lj_carith_powu64); - else -#endif - asm_callid(as, ir, IRCALL_lj_vm_powi); -} - -static void asm_div(ASMState *as, IRIns *ir) +static void asm_fpdiv(ASMState *as, IRIns *ir) { -#if LJ_64 && LJ_HASFFI - if (!irt_isnum(ir->t)) - asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : - IRCALL_lj_carith_divu64); - else -#endif #if !LJ_SOFTFP asm_fparith(as, ir, MIPSI_DIV_D); #else - asm_callid(as, ir, IRCALL_softfp_div); + asm_callid(as, ir, IRCALL_softfp_div); #endif } #endif @@ -1838,8 +1806,6 @@ static void asm_abs(ASMState *as, IRIns *ir) } #endif -#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) - static void asm_arithov(ASMState *as, IRIns *ir) { /* TODO MIPSR6: bovc/bnvc. Caveat: no delay slot to load RID_TMP. */ diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h index d9e4ad04c..c15b89fe7 100644 --- a/src/lj_asm_ppc.h +++ b/src/lj_asm_ppc.h @@ -1174,8 +1174,6 @@ static void asm_cnew(ASMState *as, IRIns *ir) ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), ra_releasetmp(as, ASMREF_TMP1)); } -#else -#define asm_cnew(as, ir) ((void)0) #endif /* -- Write barriers ------------------------------------------------------ */ @@ -1246,8 +1244,6 @@ static void asm_fpunary(ASMState *as, IRIns *ir, PPCIns pi) static void asm_fpmath(ASMState *as, IRIns *ir) { - if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) - return; if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT)) asm_fpunary(as, ir, PPCI_FSQRT); else @@ -1361,9 +1357,7 @@ static void asm_mul(ASMState *as, IRIns *ir) } } -#define asm_div(as, ir) asm_fparith(as, ir, PPCI_FDIV) -#define asm_mod(as, ir) asm_callid(as, ir, IRCALL_lj_vm_modi) -#define asm_pow(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi) +#define asm_fpdiv(as, ir) asm_fparith(as, ir, PPCI_FDIV) static void asm_neg(ASMState *as, IRIns *ir) { @@ -1387,7 +1381,6 @@ static void asm_neg(ASMState *as, IRIns *ir) } #define asm_abs(as, ir) asm_fpunary(as, ir, PPCI_FABS) -#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi) { diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index d5cd6326a..7356a5f04 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h @@ -1843,8 +1843,6 @@ static void asm_cnew(ASMState *as, IRIns *ir) asm_gencall(as, ci, args); emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)(sz+sizeof(GCcdata))); } -#else -#define asm_cnew(as, ir) ((void)0) #endif /* -- Write barriers ------------------------------------------------------ */ @@ -1950,8 +1948,6 @@ static void asm_fpmath(ASMState *as, IRIns *ir) fpm == IRFPM_CEIL ? lj_vm_ceil_sse : lj_vm_trunc_sse); ra_left(as, RID_XMM0, ir->op1); } - } else if (fpm == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) { - /* Rejoined to pow(). */ } else { asm_callid(as, ir, IRCALL_lj_vm_floor + fpm); } @@ -1986,17 +1982,6 @@ static void asm_fppowi(ASMState *as, IRIns *ir) ra_left(as, RID_EAX, ir->op2); } -static void asm_pow(ASMState *as, IRIns *ir) -{ -#if LJ_64 && LJ_HASFFI - if (!irt_isnum(ir->t)) - asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : - IRCALL_lj_carith_powu64); - else -#endif - asm_fppowi(as, ir); -} - static int asm_swapops(ASMState *as, IRIns *ir) { IRIns *irl = IR(ir->op1); @@ -2193,27 +2178,7 @@ static void asm_mul(ASMState *as, IRIns *ir) asm_intarith(as, ir, XOg_X_IMUL); } -static void asm_div(ASMState *as, IRIns *ir) -{ -#if LJ_64 && LJ_HASFFI - if (!irt_isnum(ir->t)) - asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : - IRCALL_lj_carith_divu64); - else -#endif - asm_fparith(as, ir, XO_DIVSD); -} - -static void asm_mod(ASMState *as, IRIns *ir) -{ -#if LJ_64 && LJ_HASFFI - if (!irt_isint(ir->t)) - asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : - IRCALL_lj_carith_modu64); - else -#endif - asm_callid(as, ir, IRCALL_lj_vm_modi); -} +#define asm_fpdiv(as, ir) asm_fparith(as, ir, XO_DIVSD) static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg) { diff --git a/src/lj_ir.h b/src/lj_ir.h index 60e335c2d..1a9a89a32 100644 --- a/src/lj_ir.h +++ b/src/lj_ir.h @@ -177,7 +177,7 @@ LJ_STATIC_ASSERT((int)IR_XLOAD + IRDELTA_L2S == (int)IR_XSTORE); /* FPMATH sub-functions. ORDER FPM. */ #define IRFPMDEF(_) \ _(FLOOR) _(CEIL) _(TRUNC) /* Must be first and in this order. */ \ - _(SQRT) _(EXP2) _(LOG) _(LOG2) \ + _(SQRT) _(LOG) _(LOG2) \ _(OTHER) typedef enum { diff --git a/src/lj_ircall.h b/src/lj_ircall.h index 35c02dc02..5c72478b5 100644 --- a/src/lj_ircall.h +++ b/src/lj_ircall.h @@ -192,7 +192,6 @@ typedef struct CCallInfo { _(FPMATH, lj_vm_ceil, 1, N, NUM, XA_FP) \ _(FPMATH, lj_vm_trunc, 1, N, NUM, XA_FP) \ _(FPMATH, sqrt, 1, N, NUM, XA_FP) \ - _(ANY, lj_vm_exp2, 1, N, NUM, XA_FP) \ _(ANY, log, 1, N, NUM, XA_FP) \ _(ANY, lj_vm_log2, 1, N, NUM, XA_FP) \ _(ANY, lj_vm_powi, 2, N, NUM, XA_FP) \ diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index ae65e15a5..7a02c6ff0 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c @@ -237,10 +237,11 @@ LJFOLDF(kfold_fpcall2) } LJFOLD(POW KNUM KINT) +LJFOLD(POW KNUM KNUM) LJFOLDF(kfold_numpow) { lua_Number a = knumleft; - lua_Number b = (lua_Number)fright->i; + lua_Number b = fright->o == IR_KINT ? (lua_Number)fright->i : knumright; lua_Number y = lj_vm_foldarith(a, b, IR_POW - IR_ADD); return lj_ir_knum(J, y); } @@ -1077,7 +1078,7 @@ LJFOLDF(simplify_nummuldiv_negneg) } LJFOLD(POW any KINT) -LJFOLDF(simplify_numpow_xk) +LJFOLDF(simplify_numpow_xkint) { int32_t k = fright->i; TRef ref = fins->op1; @@ -1106,13 +1107,22 @@ LJFOLDF(simplify_numpow_xk) return ref; } +LJFOLD(POW any KNUM) +LJFOLDF(simplify_numpow_xknum) +{ + if (knumright == 0.5) /* x ^ 0.5 ==> sqrt(x) */ + return emitir(IRTN(IR_FPMATH), fins->op1, IRFPM_SQRT); + return NEXTFOLD; +} + LJFOLD(POW KNUM any) LJFOLDF(simplify_numpow_kx) { lua_Number n = knumleft; - if (n == 2.0) { /* 2.0 ^ i ==> ldexp(1.0, tonum(i)) */ - fins->o = IR_CONV; + if (n == 2.0 && irt_isint(fright->t)) { /* 2.0 ^ i ==> ldexp(1.0, i) */ #if LJ_TARGET_X86ORX64 + /* Different IR_LDEXP calling convention on x86/x64 requires conversion. */ + fins->o = IR_CONV; fins->op1 = fins->op2; fins->op2 = IRCONV_NUM_INT; fins->op2 = (IRRef1)lj_opt_fold(J); diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c index ba425334a..94cce5827 100644 --- a/src/lj_opt_narrow.c +++ b/src/lj_opt_narrow.c @@ -593,10 +593,10 @@ TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc) /* Narrowing must be unconditional to preserve (-x)^i semantics. */ if (tvisint(vc) || numisint(numV(vc))) { int checkrange = 0; - /* Split pow is faster for bigger exponents. But do this only for (+k)^i. */ + /* pow() is faster for bigger exponents. But do this only for (+k)^i. */ if (tref_isk(rb) && (int32_t)ir_knum(IR(tref_ref(rb)))->u32.hi >= 0) { int32_t k = numberVint(vc); - if (!(k >= -65536 && k <= 65536)) goto split_pow; + if (!(k >= -65536 && k <= 65536)) goto force_pow_num; checkrange = 1; } if (!tref_isinteger(rc)) { @@ -607,19 +607,11 @@ TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc) TRef tmp = emitir(IRTI(IR_ADD), rc, lj_ir_kint(J, 65536)); emitir(IRTGI(IR_ULE), tmp, lj_ir_kint(J, 2*65536)); } - return emitir(IRTN(IR_POW), rb, rc); + } else { +force_pow_num: + rc = lj_ir_tonum(J, rc); /* Want POW(num, num), not POW(num, int). */ } -split_pow: - /* FOLD covers most cases, but some are easier to do here. */ - if (tref_isk(rb) && tvispone(ir_knum(IR(tref_ref(rb))))) - return rb; /* 1 ^ x ==> 1 */ - rc = lj_ir_tonum(J, rc); - if (tref_isk(rc) && ir_knum(IR(tref_ref(rc)))->n == 0.5) - return emitir(IRTN(IR_FPMATH), rb, IRFPM_SQRT); /* x ^ 0.5 ==> sqrt(x) */ - /* Split up b^c into exp2(c*log2(b)). Assembler may rejoin later. */ - rb = emitir(IRTN(IR_FPMATH), rb, IRFPM_LOG2); - rc = emitir(IRTN(IR_MUL), rb, rc); - return emitir(IRTN(IR_FPMATH), rc, IRFPM_EXP2); + return emitir(IRTN(IR_POW), rb, rc); } /* -- Predictive narrowing of induction variables ------------------------- */ diff --git a/src/lj_opt_split.c b/src/lj_opt_split.c index e526b49d0..7925cfa5e 100644 --- a/src/lj_opt_split.c +++ b/src/lj_opt_split.c @@ -403,27 +403,6 @@ static void split_ir(jit_State *J) hi = split_call_li(J, hisubst, oir, ir, IRCALL_lj_vm_powi); break; case IR_FPMATH: - /* Try to rejoin pow from EXP2, MUL and LOG2. */ - if (nir->op2 == IRFPM_EXP2 && nir->op1 > J->loopref) { - IRIns *irp = IR(nir->op1); - if (irp->o == IR_CALLN && irp->op2 == IRCALL_softfp_mul) { - IRIns *irm4 = IR(irp->op1); - IRIns *irm3 = IR(irm4->op1); - IRIns *irm12 = IR(irm3->op1); - IRIns *irl1 = IR(irm12->op1); - if (irm12->op1 > J->loopref && irl1->o == IR_CALLN && - irl1->op2 == IRCALL_lj_vm_log2) { - IRRef tmp = irl1->op1; /* Recycle first two args from LOG2. */ - IRRef arg3 = irm3->op2, arg4 = irm4->op2; - J->cur.nins--; - tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg3); - tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg4); - ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_pow); - hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); - break; - } - } - } hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_floor + ir->op2); break; case IR_LDEXP: diff --git a/src/lj_vm.h b/src/lj_vm.h index 5a7bc3925..d572e7d74 100644 --- a/src/lj_vm.h +++ b/src/lj_vm.h @@ -92,11 +92,6 @@ LJ_ASMF double lj_vm_trunc(double); LJ_ASMF double lj_vm_trunc_sf(double); #endif #endif -#ifdef LUAJIT_NO_EXP2 -LJ_ASMF double lj_vm_exp2(double); -#else -#define lj_vm_exp2 exp2 -#endif #if LJ_HASFFI LJ_ASMF int lj_vm_errno(void); #endif diff --git a/src/lj_vmmath.c b/src/lj_vmmath.c index 36178f293..623a686d8 100644 --- a/src/lj_vmmath.c +++ b/src/lj_vmmath.c @@ -79,13 +79,6 @@ double lj_vm_log2(double a) } #endif -#ifdef LUAJIT_NO_EXP2 -double lj_vm_exp2(double a) -{ - return exp(a * 0.6931471805599453); -} -#endif - #if !LJ_TARGET_X86ORX64 /* Unsigned x^k. */ static double lj_vm_powui(double x, uint32_t k) @@ -128,7 +121,6 @@ double lj_vm_foldfpm(double x, int fpm) case IRFPM_CEIL: return lj_vm_ceil(x); case IRFPM_TRUNC: return lj_vm_trunc(x); case IRFPM_SQRT: return sqrt(x); - case IRFPM_EXP2: return lj_vm_exp2(x); case IRFPM_LOG: return log(x); case IRFPM_LOG2: return lj_vm_log2(x); default: lua_assert(0); From 1a4ff1311740aa6c85f7a9101b6aa9bfaafa3f8e Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Wed, 27 May 2020 19:20:44 +0200 Subject: [PATCH 10/47] Optimize table length computation with hinting. 10x faster on loop with t[#t+1] = x idiom. Also used by table.insert. --- src/lj_asm.c | 7 ++++ src/lj_ffrecord.c | 6 +-- src/lj_ir.h | 1 + src/lj_ircall.h | 1 + src/lj_iropt.h | 2 +- src/lj_opt_fold.c | 4 +- src/lj_opt_loop.c | 10 +++-- src/lj_opt_mem.c | 97 ++++++++++++++++++++++++++++++----------------- src/lj_opt_sink.c | 3 +- src/lj_record.c | 4 +- src/lj_tab.c | 79 ++++++++++++++++++++++---------------- src/lj_tab.h | 3 ++ 12 files changed, 135 insertions(+), 82 deletions(-) diff --git a/src/lj_asm.c b/src/lj_asm.c index dd84a4f20..90373f27f 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -1634,6 +1634,12 @@ static void asm_fuseequal(ASMState *as, IRIns *ir) } } +static void asm_alen(ASMState *as, IRIns *ir) +{ + asm_callid(as, ir, ir->op2 == REF_NIL ? IRCALL_lj_tab_len : + IRCALL_lj_tab_len_hint); +} + /* -- Instruction dispatch ------------------------------------------------ */ /* Assemble a single instruction. */ @@ -1716,6 +1722,7 @@ static void asm_ir(ASMState *as, IRIns *ir) case IR_FLOAD: asm_fload(as, ir); break; case IR_XLOAD: asm_xload(as, ir); break; case IR_SLOAD: asm_sload(as, ir); break; + case IR_ALEN: asm_alen(as, ir); break; case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; case IR_FSTORE: asm_fstore(as, ir); break; diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c index 42049511c..2557cadff 100644 --- a/src/lj_ffrecord.c +++ b/src/lj_ffrecord.c @@ -281,7 +281,7 @@ static void LJ_FASTCALL recff_rawlen(jit_State *J, RecordFFData *rd) if (tref_isstr(tr)) J->base[0] = emitir(IRTI(IR_FLOAD), tr, IRFL_STR_LEN); else if (tref_istab(tr)) - J->base[0] = lj_ir_call(J, IRCALL_lj_tab_len, tr); + J->base[0] = emitir(IRTI(IR_ALEN), tr, TREF_NIL); /* else: Interpreter will throw. */ UNUSED(rd); } @@ -1026,7 +1026,7 @@ static void LJ_FASTCALL recff_table_insert(jit_State *J, RecordFFData *rd) rd->nres = 0; if (tref_istab(ix.tab) && ix.val) { if (!J->base[2]) { /* Simple push: t[#t+1] = v */ - TRef trlen = lj_ir_call(J, IRCALL_lj_tab_len, ix.tab); + TRef trlen = emitir(IRTI(IR_ALEN), ix.tab, TREF_NIL); GCtab *t = tabV(&rd->argv[0]); ix.key = emitir(IRTI(IR_ADD), trlen, lj_ir_kint(J, 1)); settabV(J->L, &ix.tabv, t); @@ -1050,7 +1050,7 @@ static void LJ_FASTCALL recff_table_concat(jit_State *J, RecordFFData *rd) lj_opt_narrow_toint(J, J->base[2]) : lj_ir_kint(J, 1); TRef tre = (J->base[1] && J->base[2] && !tref_isnil(J->base[3])) ? lj_opt_narrow_toint(J, J->base[3]) : - lj_ir_call(J, IRCALL_lj_tab_len, tab); + emitir(IRTI(IR_ALEN), tab, TREF_NIL); TRef hdr = recff_bufhdr(J); TRef tr = lj_ir_call(J, IRCALL_lj_buf_puttab, hdr, tab, sep, tri, tre); emitir(IRTG(IR_NE, IRT_PTR), tr, lj_ir_kptr(J, NULL)); diff --git a/src/lj_ir.h b/src/lj_ir.h index 1a9a89a32..a801d5d0e 100644 --- a/src/lj_ir.h +++ b/src/lj_ir.h @@ -106,6 +106,7 @@ _(XLOAD, L , ref, lit) \ _(SLOAD, L , lit, lit) \ _(VLOAD, L , ref, ___) \ + _(ALEN, L , ref, ref) \ \ _(ASTORE, S , ref, ref) \ _(HSTORE, S , ref, ref) \ diff --git a/src/lj_ircall.h b/src/lj_ircall.h index 5c72478b5..dbc8c0dbb 100644 --- a/src/lj_ircall.h +++ b/src/lj_ircall.h @@ -168,6 +168,7 @@ typedef struct CCallInfo { _(ANY, lj_tab_clear, 1, FS, NIL, 0) \ _(ANY, lj_tab_newkey, 3, S, PGC, CCI_L) \ _(ANY, lj_tab_len, 1, FL, INT, 0) \ + _(ANY, lj_tab_len_hint, 2, FL, INT, 0) \ _(ANY, lj_gc_step_jit, 2, FS, NIL, CCI_L) \ _(ANY, lj_gc_barrieruv, 2, FS, NIL, 0) \ _(ANY, lj_mem_newgco, 2, FS, PGC, CCI_L) \ diff --git a/src/lj_iropt.h b/src/lj_iropt.h index 02d6b946a..8333483f3 100644 --- a/src/lj_iropt.h +++ b/src/lj_iropt.h @@ -120,7 +120,7 @@ LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_hload(jit_State *J); LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J); LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_fload(jit_State *J); LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_xload(jit_State *J); -LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_tab_len(jit_State *J); +LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_alen(jit_State *J); LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_hrefk(jit_State *J); LJ_FUNC int LJ_FASTCALL lj_opt_fwd_href_nokey(jit_State *J); LJ_FUNC int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim); diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index 7a02c6ff0..42c57c9b0 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c @@ -2132,8 +2132,8 @@ LJFOLDX(lj_opt_fwd_hload) LJFOLD(ULOAD any) LJFOLDX(lj_opt_fwd_uload) -LJFOLD(CALLL any IRCALL_lj_tab_len) -LJFOLDX(lj_opt_fwd_tab_len) +LJFOLD(ALEN any any) +LJFOLDX(lj_opt_fwd_alen) /* Upvalue refs are really loads, but there are no corresponding stores. ** So CSE is ok for them, except for UREFO across a GC step (see below). diff --git a/src/lj_opt_loop.c b/src/lj_opt_loop.c index c5919ca01..2eacb7d7b 100644 --- a/src/lj_opt_loop.c +++ b/src/lj_opt_loop.c @@ -352,10 +352,12 @@ static void loop_unroll(LoopState *lps) irr = IR(ref); goto phiconv; } - } else if (ref != REF_DROP && irr->o == IR_CONV && - ref > invar && irr->op1 < invar) { - /* May need an extra PHI for a CONV. */ - ref = irr->op1; + } else if (ref != REF_DROP && ref > invar && + ((irr->o == IR_CONV && irr->op1 < invar) || + (irr->o == IR_ALEN && irr->op2 < invar && + irr->op2 != REF_NIL))) { + /* May need an extra PHI for a CONV or ALEN hint. */ + ref = irr->o == IR_CONV ? irr->op1 : irr->op2; irr = IR(ref); phiconv: if (ref < invar && !irref_isk(ref) && !irt_isphi(irr->t)) { diff --git a/src/lj_opt_mem.c b/src/lj_opt_mem.c index 079f7cfe8..4c2c05fe9 100644 --- a/src/lj_opt_mem.c +++ b/src/lj_opt_mem.c @@ -363,7 +363,7 @@ TRef LJ_FASTCALL lj_opt_dse_ahstore(jit_State *J) IRIns *ir; /* Check for any intervening guards (includes conflicting loads). */ for (ir = IR(J->cur.nins-1); ir > store; ir--) - if (irt_isguard(ir->t) || ir->o == IR_CALLL) + if (irt_isguard(ir->t) || ir->o == IR_ALEN) goto doemit; /* No elimination possible. */ /* Remove redundant store from chain and replace with NOP. */ *refp = store->prev; @@ -381,6 +381,67 @@ TRef LJ_FASTCALL lj_opt_dse_ahstore(jit_State *J) return EMITFOLD; /* Otherwise we have a conflict or simply no match. */ } +/* ALEN forwarding. */ +TRef LJ_FASTCALL lj_opt_fwd_alen(jit_State *J) +{ + IRRef tab = fins->op1; /* Table reference. */ + IRRef lim = tab; /* Search limit. */ + IRRef ref; + + /* Search for conflicting HSTORE with numeric key. */ + ref = J->chain[IR_HSTORE]; + while (ref > lim) { + IRIns *store = IR(ref); + IRIns *href = IR(store->op1); + IRIns *key = IR(href->op2); + if (irt_isnum(key->o == IR_KSLOT ? IR(key->op1)->t : key->t)) { + lim = ref; /* Conflicting store found, limits search for ALEN. */ + break; + } + ref = store->prev; + } + + /* Try to find a matching ALEN. */ + ref = J->chain[IR_ALEN]; + while (ref > lim) { + /* CSE for ALEN only depends on the table, not the hint. */ + if (IR(ref)->op1 == tab) { + IRRef sref; + + /* Search for aliasing table.clear. */ + if (!fwd_aa_tab_clear(J, ref, tab)) + break; + + /* Search for hint-forwarding or conflicting store. */ + sref = J->chain[IR_ASTORE]; + while (sref > ref) { + IRIns *store = IR(sref); + IRIns *aref = IR(store->op1); + IRIns *fref = IR(aref->op1); + if (tab == fref->op1) { /* ASTORE to the same table. */ + /* Detect t[#t+1] = x idiom for push. */ + IRIns *idx = IR(aref->op2); + if (!irt_isnil(store->t) && + idx->o == IR_ADD && idx->op1 == ref && + IR(idx->op2)->o == IR_KINT && IR(idx->op2)->i == 1) { + /* Note: this requires an extra PHI check in loop unroll. */ + fins->op2 = aref->op2; /* Set ALEN hint. */ + } + goto doemit; /* Conflicting store, possibly giving a hint. */ + } else if (aa_table(J, tab, fref->op1) == ALIAS_NO) { + goto doemit; /* Conflicting store. */ + } + sref = store->prev; + } + + return ref; /* Plain ALEN forwarding. */ + } + ref = IR(ref)->prev; + } +doemit: + return EMITFOLD; +} + /* -- ULOAD forwarding ---------------------------------------------------- */ /* The current alias analysis for upvalues is very simplistic. It only @@ -430,7 +491,6 @@ TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J) cselim: /* Try to find a matching load. Below the conflicting store, if any. */ - ref = J->chain[IR_ULOAD]; while (ref > lim) { IRIns *ir = IR(ref); @@ -845,39 +905,6 @@ TRef LJ_FASTCALL lj_opt_dse_xstore(jit_State *J) return EMITFOLD; /* Otherwise we have a conflict or simply no match. */ } -/* -- Forwarding of lj_tab_len -------------------------------------------- */ - -/* This is rather simplistic right now, but better than nothing. */ -TRef LJ_FASTCALL lj_opt_fwd_tab_len(jit_State *J) -{ - IRRef tab = fins->op1; /* Table reference. */ - IRRef lim = tab; /* Search limit. */ - IRRef ref; - - /* Any ASTORE is a conflict and limits the search. */ - if (J->chain[IR_ASTORE] > lim) lim = J->chain[IR_ASTORE]; - - /* Search for conflicting HSTORE with numeric key. */ - ref = J->chain[IR_HSTORE]; - while (ref > lim) { - IRIns *store = IR(ref); - IRIns *href = IR(store->op1); - IRIns *key = IR(href->op2); - if (irt_isnum(key->o == IR_KSLOT ? IR(key->op1)->t : key->t)) { - lim = ref; /* Conflicting store found, limits search for TLEN. */ - break; - } - ref = store->prev; - } - - /* Search for aliasing table.clear. */ - if (!fwd_aa_tab_clear(J, lim, tab)) - return lj_ir_emit(J); - - /* Try to find a matching load. Below the conflicting store, if any. */ - return lj_opt_cselim(J, lim); -} - /* -- ASTORE/HSTORE previous type analysis -------------------------------- */ /* Check whether the previous value for a table store is non-nil. diff --git a/src/lj_opt_sink.c b/src/lj_opt_sink.c index c5323b118..111017026 100644 --- a/src/lj_opt_sink.c +++ b/src/lj_opt_sink.c @@ -78,8 +78,7 @@ static void sink_mark_ins(jit_State *J) switch (ir->o) { case IR_BASE: return; /* Finished. */ - case IR_CALLL: /* IRCALL_lj_tab_len */ - case IR_ALOAD: case IR_HLOAD: case IR_XLOAD: case IR_TBAR: + case IR_ALOAD: case IR_HLOAD: case IR_XLOAD: case IR_TBAR: case IR_ALEN: irt_setmark(IR(ir->op1)->t); /* Mark ref for remaining loads. */ break; case IR_FLOAD: diff --git a/src/lj_record.c b/src/lj_record.c index 8eec0071f..4fc22742e 100644 --- a/src/lj_record.c +++ b/src/lj_record.c @@ -1058,7 +1058,7 @@ static TRef rec_mm_len(jit_State *J, TRef tr, TValue *tv) lj_record_call(J, func, 2); } else { if (LJ_52 && tref_istab(tr)) - return lj_ir_call(J, IRCALL_lj_tab_len, tr); + return emitir(IRTI(IR_ALEN), tr, TREF_NIL); lj_trace_err(J, LJ_TRERR_NOMM); } return 0; /* No result yet. */ @@ -2191,7 +2191,7 @@ void lj_record_ins(jit_State *J) if (tref_isstr(rc)) rc = emitir(IRTI(IR_FLOAD), rc, IRFL_STR_LEN); else if (!LJ_52 && tref_istab(rc)) - rc = lj_ir_call(J, IRCALL_lj_tab_len, rc); + rc = emitir(IRTI(IR_ALEN), rc, TREF_NIL); else rc = rec_mm_len(J, rc, rcv); break; diff --git a/src/lj_tab.c b/src/lj_tab.c index dcd24d31e..eb9ef4af9 100644 --- a/src/lj_tab.c +++ b/src/lj_tab.c @@ -639,49 +639,62 @@ int lj_tab_next(lua_State *L, GCtab *t, TValue *key) /* -- Table length calculation -------------------------------------------- */ -static MSize unbound_search(GCtab *t, MSize j) +/* Compute table length. Slow path with mixed array/hash lookups. */ +LJ_NOINLINE static MSize tab_len_slow(GCtab *t, size_t hi) { cTValue *tv; - MSize i = j; /* i is zero or a present index */ - j++; - /* find `i' and `j' such that i is present and j is not */ - while ((tv = lj_tab_getint(t, (int32_t)j)) && !tvisnil(tv)) { - i = j; - j *= 2; - if (j > (MSize)(INT_MAX-2)) { /* overflow? */ - /* table was built with bad purposes: resort to linear search */ - i = 1; - while ((tv = lj_tab_getint(t, (int32_t)i)) && !tvisnil(tv)) i++; - return i - 1; + size_t lo = hi; + hi++; + /* Widening search for an upper bound. */ + while ((tv = lj_tab_getint(t, (int32_t)hi)) && !tvisnil(tv)) { + lo = hi; + hi += hi; + if (hi > (size_t)(INT_MAX-2)) { /* Punt and do a linear search. */ + lo = 1; + while ((tv = lj_tab_getint(t, (int32_t)lo)) && !tvisnil(tv)) lo++; + return (MSize)(lo - 1); } } - /* now do a binary search between them */ - while (j - i > 1) { - MSize m = (i+j)/2; - cTValue *tvb = lj_tab_getint(t, (int32_t)m); - if (tvb && !tvisnil(tvb)) i = m; else j = m; + /* Binary search to find a non-nil to nil transition. */ + while (hi - lo > 1) { + size_t mid = (lo+hi) >> 1; + cTValue *tvb = lj_tab_getint(t, (int32_t)mid); + if (tvb && !tvisnil(tvb)) lo = mid; else hi = mid; } - return i; + return (MSize)lo; } -/* -** Try to find a boundary in table `t'. A `boundary' is an integer index -** such that t[i] is non-nil and t[i+1] is nil (and 0 if t[1] is nil). -*/ +/* Compute table length. Fast path. */ MSize LJ_FASTCALL lj_tab_len(GCtab *t) { - MSize j = (MSize)t->asize; - if (j > 1 && tvisnil(arrayslot(t, j-1))) { - MSize i = 1; - while (j - i > 1) { - MSize m = (i+j)/2; - if (tvisnil(arrayslot(t, m-1))) j = m; else i = m; + size_t hi = (size_t)t->asize; + if (hi) hi--; + /* In a growing array the last array element is very likely nil. */ + if (hi > 0 && LJ_LIKELY(tvisnil(arrayslot(t, hi)))) { + /* Binary search to find a non-nil to nil transition in the array. */ + size_t lo = 0; + while (hi - lo > 1) { + size_t mid = (lo+hi) >> 1; + if (tvisnil(arrayslot(t, mid))) hi = mid; else lo = mid; } - return i-1; + return (MSize)lo; + } + /* Without a hash part, there's an implicit nil after the last element. */ + return t->hmask ? tab_len_slow(t, hi) : (MSize)hi; +} + +#if LJ_HASJIT +/* Verify hinted table length or compute it. */ +MSize LJ_FASTCALL lj_tab_len_hint(GCtab *t, size_t hint) +{ + size_t asize = (size_t)t->asize; + cTValue *tv = arrayslot(t, hint); + if (LJ_LIKELY(hint+1 < asize)) { + if (LJ_LIKELY(!tvisnil(tv) && tvisnil(tv+1))) return (MSize)hint; + } else if (hint+1 <= asize && LJ_LIKELY(t->hmask == 0) && !tvisnil(tv)) { + return (MSize)hint; } - if (j) j--; - if (t->hmask <= 0) - return j; - return unbound_search(t, j); + return lj_tab_len(t); } +#endif diff --git a/src/lj_tab.h b/src/lj_tab.h index 597c94b23..f31590cd9 100644 --- a/src/lj_tab.h +++ b/src/lj_tab.h @@ -69,5 +69,8 @@ LJ_FUNC TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key); LJ_FUNCA int lj_tab_next(lua_State *L, GCtab *t, TValue *key); LJ_FUNCA MSize LJ_FASTCALL lj_tab_len(GCtab *t); +#if LJ_HASJIT +LJ_FUNC MSize LJ_FASTCALL lj_tab_len_hint(GCtab *t, size_t hint); +#endif #endif From f5b0fff5a990004375ad43aa6e6c4a11a8b6eb7e Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sun, 7 Jun 2020 14:34:09 +0200 Subject: [PATCH 11/47] Fix debug.debug() for non-string errors. --- src/lib_debug.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/lib_debug.c b/src/lib_debug.c index a485ff8e9..07262830a 100644 --- a/src/lib_debug.c +++ b/src/lib_debug.c @@ -369,7 +369,8 @@ LJLIB_CF(debug_debug) return 0; if (luaL_loadbuffer(L, buffer, strlen(buffer), "=(debug command)") || lua_pcall(L, 0, 0, 0)) { - fputs(lua_tostring(L, -1), stderr); + const char *s = lua_tostring(L, -1); + fputs(s ? s : "(error object is not a string)", stderr); fputs("\n", stderr); } lua_settop(L, 0); /* remove eventual returns */ From 8ae5170cdc9c307bd81019b3e014391c9fd00581 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sat, 13 Jun 2020 00:52:54 +0200 Subject: [PATCH 12/47] Improve assertions. --- src/Makefile | 2 +- src/Makefile.dep | 17 ++--- src/lib_io.c | 6 +- src/lib_jit.c | 4 +- src/lib_string.c | 6 +- src/lj_api.c | 138 ++++++++++++++++++++----------------- src/lj_asm.c | 122 +++++++++++++++++++++------------ src/lj_asm_arm.h | 119 ++++++++++++++++++-------------- src/lj_asm_arm64.h | 95 +++++++++++++++----------- src/lj_asm_mips.h | 151 ++++++++++++++++++++++++----------------- src/lj_asm_ppc.h | 113 +++++++++++++++++------------- src/lj_asm_x86.h | 161 ++++++++++++++++++++++++------------------- src/lj_assert.c | 28 ++++++++ src/lj_bcread.c | 20 +++--- src/lj_bcwrite.c | 24 +++++-- src/lj_buf.c | 4 +- src/lj_carith.c | 10 ++- src/lj_ccall.c | 19 ++++-- src/lj_ccallback.c | 42 ++++++------ src/lj_cconv.c | 57 +++++++++------- src/lj_cconv.h | 5 +- src/lj_cdata.c | 27 +++++--- src/lj_cdata.h | 7 +- src/lj_clib.c | 6 +- src/lj_cparse.c | 25 ++++--- src/lj_crecord.c | 19 +++--- src/lj_ctype.c | 13 ++-- src/lj_ctype.h | 14 +++- src/lj_debug.c | 18 ++--- src/lj_def.h | 26 +++++-- src/lj_dispatch.c | 11 +-- src/lj_emit_arm.h | 50 +++++++------- src/lj_emit_arm64.h | 21 +++--- src/lj_emit_mips.h | 22 +++--- src/lj_emit_ppc.h | 12 ++-- src/lj_emit_x86.h | 22 +++--- src/lj_err.c | 2 +- src/lj_func.c | 18 +++-- src/lj_gc.c | 72 ++++++++++++-------- src/lj_gc.h | 6 +- src/lj_gdbjit.c | 5 +- src/lj_ir.c | 31 +++++---- src/lj_ir.h | 5 +- src/lj_jit.h | 6 ++ src/lj_lex.c | 14 ++-- src/lj_lex.h | 6 ++ src/lj_load.c | 2 +- src/lj_mcode.c | 2 +- src/lj_meta.c | 6 +- src/lj_obj.h | 31 ++++++--- src/lj_opt_fold.c | 88 +++++++++++++----------- src/lj_opt_loop.c | 5 +- src/lj_opt_mem.c | 15 ++-- src/lj_opt_narrow.c | 17 ++--- src/lj_opt_split.c | 22 +++--- src/lj_parse.c | 114 +++++++++++++++++-------------- src/lj_record.c | 162 +++++++++++++++++++++++++++----------------- src/lj_snap.c | 96 +++++++++++++++----------- src/lj_snap.h | 3 +- src/lj_state.c | 18 +++-- src/lj_str.c | 5 +- src/lj_strfmt.c | 4 +- src/lj_strfmt.h | 3 +- src/lj_strfmt_num.c | 6 +- src/lj_strscan.c | 9 +-- src/lj_tab.c | 20 +++--- src/lj_target.h | 3 +- src/lj_trace.c | 48 +++++++------ src/lj_vmmath.c | 7 +- src/ljamalg.c | 1 + src/luaconf.h | 2 +- 71 files changed, 1363 insertions(+), 927 deletions(-) create mode 100644 src/lj_assert.c diff --git a/src/Makefile b/src/Makefile index 82a570320..a96c19972 100644 --- a/src/Makefile +++ b/src/Makefile @@ -484,7 +484,7 @@ LJLIB_O= lib_base.o lib_math.o lib_bit.o lib_string.o lib_table.o \ lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o lib_ffi.o LJLIB_C= $(LJLIB_O:.o=.c) -LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \ +LJCORE_O= lj_assert.o lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \ lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o lj_debug.o \ lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_strscan.o \ lj_strfmt.o lj_strfmt_num.o lj_api.o lj_profile.o \ diff --git a/src/Makefile.dep b/src/Makefile.dep index 2b1cb5ef2..03dba96b5 100644 --- a/src/Makefile.dep +++ b/src/Makefile.dep @@ -51,6 +51,7 @@ lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ lj_ircall.h lj_iropt.h lj_mcode.h lj_trace.h lj_dispatch.h lj_traceerr.h \ lj_snap.h lj_asm.h lj_vm.h lj_target.h lj_target_*.h lj_emit_*.h \ lj_asm_*.h +lj_assert.o: lj_assert.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h \ lj_bcdef.h lj_bcread.o: lj_bcread.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ @@ -155,7 +156,7 @@ lj_opt_loop.o: lj_opt_loop.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h \ lj_vm.h lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_ircall.h + lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_ircall.h lj_dispatch.h lj_bc.h lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \ lj_arch.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \ lj_traceerr.h lj_vm.h lj_strscan.h @@ -206,13 +207,13 @@ lj_vmevent.o: lj_vmevent.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_vm.h lj_vmevent.h lj_vmmath.o: lj_vmmath.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_ir.h lj_vm.h -ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \ - lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h \ - lj_func.h lj_udata.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h \ - lj_cdata.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h \ - lj_vm.h lj_err.c lj_debug.h lj_ff.h lj_ffdef.h lj_strfmt.h lj_char.c \ - lj_char.h lj_bc.c lj_bcdef.h lj_obj.c lj_buf.c lj_str.c lj_tab.c \ - lj_func.c lj_udata.c lj_meta.c lj_strscan.h lj_lib.h lj_debug.c \ +ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_assert.c lj_obj.h \ + lj_def.h lj_arch.h lj_gc.c lj_gc.h lj_err.h lj_errmsg.h lj_buf.h \ + lj_str.h lj_tab.h lj_func.h lj_udata.h lj_meta.h lj_state.h lj_frame.h \ + lj_bc.h lj_ctype.h lj_cdata.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \ + lj_traceerr.h lj_vm.h lj_err.c lj_debug.h lj_ff.h lj_ffdef.h lj_strfmt.h \ + lj_char.c lj_char.h lj_bc.c lj_bcdef.h lj_obj.c lj_buf.c lj_str.c \ + lj_tab.c lj_func.c lj_udata.c lj_meta.c lj_strscan.h lj_lib.h lj_debug.c \ lj_state.c lj_lex.h lj_alloc.h luajit.h lj_dispatch.c lj_ccallback.h \ lj_profile.h lj_vmevent.c lj_vmevent.h lj_vmmath.c lj_strscan.c \ lj_strfmt.c lj_strfmt_num.c lj_api.c lj_profile.c lj_lex.c lualib.h \ diff --git a/src/lib_io.c b/src/lib_io.c index 5e9d0d66d..c7d1bb311 100644 --- a/src/lib_io.c +++ b/src/lib_io.c @@ -101,9 +101,6 @@ static int io_file_close(lua_State *L, IOFileUD *iof) stat = pclose(iof->fp); #elif LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE && !LJ_TARGET_UWP stat = _pclose(iof->fp); -#else - lua_assert(0); - return 0; #endif #if LJ_52 iof->fp = NULL; @@ -112,7 +109,8 @@ static int io_file_close(lua_State *L, IOFileUD *iof) ok = (stat != -1); #endif } else { - lua_assert((iof->type & IOFILE_TYPE_MASK) == IOFILE_TYPE_STDF); + lj_assertL((iof->type & IOFILE_TYPE_MASK) == IOFILE_TYPE_STDF, + "close of unknown FILE* type"); setnilV(L->top++); lua_pushliteral(L, "cannot close standard file"); return 2; diff --git a/src/lib_jit.c b/src/lib_jit.c index acd6c293b..7348ef210 100644 --- a/src/lib_jit.c +++ b/src/lib_jit.c @@ -227,7 +227,7 @@ LJLIB_CF(jit_util_funcbc) if (pc < pt->sizebc) { BCIns ins = proto_bc(pt)[pc]; BCOp op = bc_op(ins); - lua_assert(op < BC__MAX); + lj_assertL(op < BC__MAX, "bad bytecode op %d", op); setintV(L->top, ins); setintV(L->top+1, lj_bc_mode[op]); L->top += 2; @@ -491,7 +491,7 @@ static int jitopt_param(jit_State *J, const char *str) int i; for (i = 0; i < JIT_P__MAX; i++) { size_t len = *(const uint8_t *)lst; - lua_assert(len != 0); + lj_assertJ(len != 0, "bad JIT_P_STRING"); if (strncmp(str, lst+1, len) == 0 && str[len] == '=') { int32_t n = 0; const char *p = &str[len+1]; diff --git a/src/lib_string.c b/src/lib_string.c index 6b88ee9b7..0d9290bcd 100644 --- a/src/lib_string.c +++ b/src/lib_string.c @@ -136,7 +136,7 @@ LJLIB_CF(string_dump) /* ------------------------------------------------------------------------ */ /* macro to `unsign' a character */ -#define uchar(c) ((unsigned char)(c)) +#define uchar(c) ((unsigned char)(c)) #define CAP_UNFINISHED (-1) #define CAP_POSITION (-2) @@ -645,7 +645,7 @@ static GCstr *string_fmt_tostring(lua_State *L, int arg, int retry) { TValue *o = L->base+arg-1; cTValue *mo; - lua_assert(o < L->top); /* Caller already checks for existence. */ + lj_assertL(o < L->top, "bad usage"); /* Caller already checks for existence. */ if (LJ_LIKELY(tvisstr(o))) return strV(o); if (retry != 2 && !tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) { @@ -717,7 +717,7 @@ LJLIB_CF(string_format) LJLIB_REC(.) lj_strfmt_putptr(sb, lj_obj_ptr(L->base+arg-1)); break; default: - lua_assert(0); + lj_assertL(0, "bad string format type"); break; } } diff --git a/src/lj_api.c b/src/lj_api.c index 24ae6611b..f1cfebbcc 100644 --- a/src/lj_api.c +++ b/src/lj_api.c @@ -28,8 +28,8 @@ /* -- Common helper functions --------------------------------------------- */ -#define api_checknelems(L, n) api_check(L, (n) <= (L->top - L->base)) -#define api_checkvalidindex(L, i) api_check(L, (i) != niltv(L)) +#define lj_checkapi_slot(idx) \ + lj_checkapi((idx) <= (L->top - L->base), "stack slot %d out of range", (idx)) static TValue *index2adr(lua_State *L, int idx) { @@ -37,7 +37,8 @@ static TValue *index2adr(lua_State *L, int idx) TValue *o = L->base + (idx - 1); return o < L->top ? o : niltv(L); } else if (idx > LUA_REGISTRYINDEX) { - api_check(L, idx != 0 && -idx <= L->top - L->base); + lj_checkapi(idx != 0 && -idx <= L->top - L->base, + "bad stack slot %d", idx); return L->top + idx; } else if (idx == LUA_GLOBALSINDEX) { TValue *o = &G(L)->tmptv; @@ -47,7 +48,8 @@ static TValue *index2adr(lua_State *L, int idx) return registry(L); } else { GCfunc *fn = curr_func(L); - api_check(L, fn->c.gct == ~LJ_TFUNC && !isluafunc(fn)); + lj_checkapi(fn->c.gct == ~LJ_TFUNC && !isluafunc(fn), + "calling frame is not a C function"); if (idx == LUA_ENVIRONINDEX) { TValue *o = &G(L)->tmptv; settabV(L, o, tabref(fn->c.env)); @@ -59,13 +61,27 @@ static TValue *index2adr(lua_State *L, int idx) } } -static TValue *stkindex2adr(lua_State *L, int idx) +static LJ_AINLINE TValue *index2adr_check(lua_State *L, int idx) +{ + TValue *o = index2adr(L, idx); + lj_checkapi(o != niltv(L), "invalid stack slot %d", idx); + return o; +} + +static TValue *index2adr_stack(lua_State *L, int idx) { if (idx > 0) { TValue *o = L->base + (idx - 1); + if (o < L->top) { + return o; + } else { + lj_checkapi(0, "invalid stack slot %d", idx); + return niltv(L); + } return o < L->top ? o : niltv(L); } else { - api_check(L, idx != 0 && -idx <= L->top - L->base); + lj_checkapi(idx != 0 && -idx <= L->top - L->base, + "invalid stack slot %d", idx); return L->top + idx; } } @@ -99,17 +115,17 @@ LUALIB_API void luaL_checkstack(lua_State *L, int size, const char *msg) lj_err_callerv(L, LJ_ERR_STKOVM, msg); } -LUA_API void lua_xmove(lua_State *from, lua_State *to, int n) +LUA_API void lua_xmove(lua_State *L, lua_State *to, int n) { TValue *f, *t; - if (from == to) return; - api_checknelems(from, n); - api_check(from, G(from) == G(to)); + if (L == to) return; + lj_checkapi_slot(n); + lj_checkapi(G(L) == G(to), "move across global states"); lj_state_checkstack(to, (MSize)n); - f = from->top; + f = L->top; t = to->top = to->top + n; while (--n >= 0) copyTV(to, --t, --f); - from->top = f; + L->top = f; } LUA_API const lua_Number *lua_version(lua_State *L) @@ -129,7 +145,7 @@ LUA_API int lua_gettop(lua_State *L) LUA_API void lua_settop(lua_State *L, int idx) { if (idx >= 0) { - api_check(L, idx <= tvref(L->maxstack) - L->base); + lj_checkapi(idx <= tvref(L->maxstack) - L->base, "bad stack slot %d", idx); if (L->base + idx > L->top) { if (L->base + idx >= tvref(L->maxstack)) lj_state_growstack(L, (MSize)idx - (MSize)(L->top - L->base)); @@ -138,23 +154,21 @@ LUA_API void lua_settop(lua_State *L, int idx) L->top = L->base + idx; } } else { - api_check(L, -(idx+1) <= (L->top - L->base)); + lj_checkapi(-(idx+1) <= (L->top - L->base), "bad stack slot %d", idx); L->top += idx+1; /* Shrinks top (idx < 0). */ } } LUA_API void lua_remove(lua_State *L, int idx) { - TValue *p = stkindex2adr(L, idx); - api_checkvalidindex(L, p); + TValue *p = index2adr_stack(L, idx); while (++p < L->top) copyTV(L, p-1, p); L->top--; } LUA_API void lua_insert(lua_State *L, int idx) { - TValue *q, *p = stkindex2adr(L, idx); - api_checkvalidindex(L, p); + TValue *q, *p = index2adr_stack(L, idx); for (q = L->top; q > p; q--) copyTV(L, q, q-1); copyTV(L, p, L->top); } @@ -162,19 +176,18 @@ LUA_API void lua_insert(lua_State *L, int idx) static void copy_slot(lua_State *L, TValue *f, int idx) { if (idx == LUA_GLOBALSINDEX) { - api_check(L, tvistab(f)); + lj_checkapi(tvistab(f), "stack slot %d is not a table", idx); /* NOBARRIER: A thread (i.e. L) is never black. */ setgcref(L->env, obj2gco(tabV(f))); } else if (idx == LUA_ENVIRONINDEX) { GCfunc *fn = curr_func(L); if (fn->c.gct != ~LJ_TFUNC) lj_err_msg(L, LJ_ERR_NOENV); - api_check(L, tvistab(f)); + lj_checkapi(tvistab(f), "stack slot %d is not a table", idx); setgcref(fn->c.env, obj2gco(tabV(f))); lj_gc_barrier(L, fn, f); } else { - TValue *o = index2adr(L, idx); - api_checkvalidindex(L, o); + TValue *o = index2adr_check(L, idx); copyTV(L, o, f); if (idx < LUA_GLOBALSINDEX) /* Need a barrier for upvalues. */ lj_gc_barrier(L, curr_func(L), f); @@ -183,7 +196,7 @@ static void copy_slot(lua_State *L, TValue *f, int idx) LUA_API void lua_replace(lua_State *L, int idx) { - api_checknelems(L, 1); + lj_checkapi_slot(1); copy_slot(L, L->top - 1, idx); L->top--; } @@ -219,7 +232,7 @@ LUA_API int lua_type(lua_State *L, int idx) #else int tt = (int)(((t < 8 ? 0x98042110u : 0x75a06u) >> 4*(t&7)) & 15u); #endif - lua_assert(tt != LUA_TNIL || tvisnil(o)); + lj_assertL(tt != LUA_TNIL || tvisnil(o), "bad tag conversion"); return tt; } } @@ -677,14 +690,14 @@ LUA_API void lua_pushcclosure(lua_State *L, lua_CFunction f, int n) { GCfunc *fn; lj_gc_check(L); - api_checknelems(L, n); + lj_checkapi_slot(n); fn = lj_func_newC(L, (MSize)n, getcurrenv(L)); fn->c.f = f; L->top -= n; while (n--) copyTV(L, &fn->c.upvalue[n], L->top+n); setfuncV(L, L->top, fn); - lua_assert(iswhite(obj2gco(fn))); + lj_assertL(iswhite(obj2gco(fn)), "new GC object is not white"); incr_top(L); } @@ -754,7 +767,7 @@ LUA_API void *lua_newuserdata(lua_State *L, size_t size) LUA_API void lua_concat(lua_State *L, int n) { - api_checknelems(L, n); + lj_checkapi_slot(n); if (n >= 2) { n--; do { @@ -780,9 +793,8 @@ LUA_API void lua_concat(lua_State *L, int n) LUA_API void lua_gettable(lua_State *L, int idx) { - cTValue *v, *t = index2adr(L, idx); - api_checkvalidindex(L, t); - v = lj_meta_tget(L, t, L->top-1); + cTValue *t = index2adr_check(L, idx); + cTValue *v = lj_meta_tget(L, t, L->top-1); if (v == NULL) { L->top += 2; lj_vm_call(L, L->top-2, 1+1); @@ -794,9 +806,8 @@ LUA_API void lua_gettable(lua_State *L, int idx) LUA_API void lua_getfield(lua_State *L, int idx, const char *k) { - cTValue *v, *t = index2adr(L, idx); + cTValue *v, *t = index2adr_check(L, idx); TValue key; - api_checkvalidindex(L, t); setstrV(L, &key, lj_str_newz(L, k)); v = lj_meta_tget(L, t, &key); if (v == NULL) { @@ -812,14 +823,14 @@ LUA_API void lua_getfield(lua_State *L, int idx, const char *k) LUA_API void lua_rawget(lua_State *L, int idx) { cTValue *t = index2adr(L, idx); - api_check(L, tvistab(t)); + lj_checkapi(tvistab(t), "stack slot %d is not a table", idx); copyTV(L, L->top-1, lj_tab_get(L, tabV(t), L->top-1)); } LUA_API void lua_rawgeti(lua_State *L, int idx, int n) { cTValue *v, *t = index2adr(L, idx); - api_check(L, tvistab(t)); + lj_checkapi(tvistab(t), "stack slot %d is not a table", idx); v = lj_tab_getint(tabV(t), n); if (v) { copyTV(L, L->top, v); @@ -861,8 +872,7 @@ LUALIB_API int luaL_getmetafield(lua_State *L, int idx, const char *field) LUA_API void lua_getfenv(lua_State *L, int idx) { - cTValue *o = index2adr(L, idx); - api_checkvalidindex(L, o); + cTValue *o = index2adr_check(L, idx); if (tvisfunc(o)) { settabV(L, L->top, tabref(funcV(o)->c.env)); } else if (tvisudata(o)) { @@ -879,7 +889,7 @@ LUA_API int lua_next(lua_State *L, int idx) { cTValue *t = index2adr(L, idx); int more; - api_check(L, tvistab(t)); + lj_checkapi(tvistab(t), "stack slot %d is not a table", idx); more = lj_tab_next(L, tabV(t), L->top-1); if (more) { incr_top(L); /* Return new key and value slot. */ @@ -905,7 +915,7 @@ LUA_API void *lua_upvalueid(lua_State *L, int idx, int n) { GCfunc *fn = funcV(index2adr(L, idx)); n--; - api_check(L, (uint32_t)n < fn->l.nupvalues); + lj_checkapi((uint32_t)n < fn->l.nupvalues, "bad upvalue %d", n); return isluafunc(fn) ? (void *)gcref(fn->l.uvptr[n]) : (void *)&fn->c.upvalue[n]; } @@ -915,8 +925,10 @@ LUA_API void lua_upvaluejoin(lua_State *L, int idx1, int n1, int idx2, int n2) GCfunc *fn1 = funcV(index2adr(L, idx1)); GCfunc *fn2 = funcV(index2adr(L, idx2)); n1--; n2--; - api_check(L, isluafunc(fn1) && (uint32_t)n1 < fn1->l.nupvalues); - api_check(L, isluafunc(fn2) && (uint32_t)n2 < fn2->l.nupvalues); + lj_checkapi(isluafunc(fn1), "stack slot %d is not a Lua function", idx1); + lj_checkapi(isluafunc(fn2), "stack slot %d is not a Lua function", idx2); + lj_checkapi((uint32_t)n1 < fn1->l.nupvalues, "bad upvalue %d", n1+1); + lj_checkapi((uint32_t)n2 < fn2->l.nupvalues, "bad upvalue %d", n2+1); setgcrefr(fn1->l.uvptr[n1], fn2->l.uvptr[n2]); lj_gc_objbarrier(L, fn1, gcref(fn1->l.uvptr[n1])); } @@ -945,9 +957,8 @@ LUALIB_API void *luaL_checkudata(lua_State *L, int idx, const char *tname) LUA_API void lua_settable(lua_State *L, int idx) { TValue *o; - cTValue *t = index2adr(L, idx); - api_checknelems(L, 2); - api_checkvalidindex(L, t); + cTValue *t = index2adr_check(L, idx); + lj_checkapi_slot(2); o = lj_meta_tset(L, t, L->top-2); if (o) { /* NOBARRIER: lj_meta_tset ensures the table is not black. */ @@ -966,9 +977,8 @@ LUA_API void lua_setfield(lua_State *L, int idx, const char *k) { TValue *o; TValue key; - cTValue *t = index2adr(L, idx); - api_checknelems(L, 1); - api_checkvalidindex(L, t); + cTValue *t = index2adr_check(L, idx); + lj_checkapi_slot(1); setstrV(L, &key, lj_str_newz(L, k)); o = lj_meta_tset(L, t, &key); if (o) { @@ -987,7 +997,7 @@ LUA_API void lua_rawset(lua_State *L, int idx) { GCtab *t = tabV(index2adr(L, idx)); TValue *dst, *key; - api_checknelems(L, 2); + lj_checkapi_slot(2); key = L->top-2; dst = lj_tab_set(L, t, key); copyTV(L, dst, key+1); @@ -999,7 +1009,7 @@ LUA_API void lua_rawseti(lua_State *L, int idx, int n) { GCtab *t = tabV(index2adr(L, idx)); TValue *dst, *src; - api_checknelems(L, 1); + lj_checkapi_slot(1); dst = lj_tab_setint(L, t, n); src = L->top-1; copyTV(L, dst, src); @@ -1011,13 +1021,12 @@ LUA_API int lua_setmetatable(lua_State *L, int idx) { global_State *g; GCtab *mt; - cTValue *o = index2adr(L, idx); - api_checknelems(L, 1); - api_checkvalidindex(L, o); + cTValue *o = index2adr_check(L, idx); + lj_checkapi_slot(1); if (tvisnil(L->top-1)) { mt = NULL; } else { - api_check(L, tvistab(L->top-1)); + lj_checkapi(tvistab(L->top-1), "top stack slot is not a table"); mt = tabV(L->top-1); } g = G(L); @@ -1054,11 +1063,10 @@ LUALIB_API void luaL_setmetatable(lua_State *L, const char *tname) LUA_API int lua_setfenv(lua_State *L, int idx) { - cTValue *o = index2adr(L, idx); + cTValue *o = index2adr_check(L, idx); GCtab *t; - api_checknelems(L, 1); - api_checkvalidindex(L, o); - api_check(L, tvistab(L->top-1)); + lj_checkapi_slot(1); + lj_checkapi(tvistab(L->top-1), "top stack slot is not a table"); t = tabV(L->top-1); if (tvisfunc(o)) { setgcref(funcV(o)->c.env, obj2gco(t)); @@ -1081,7 +1089,7 @@ LUA_API const char *lua_setupvalue(lua_State *L, int idx, int n) TValue *val; GCobj *o; const char *name; - api_checknelems(L, 1); + lj_checkapi_slot(1); name = lj_debug_uvnamev(f, (uint32_t)(n-1), &val, &o); if (name) { L->top--; @@ -1108,8 +1116,9 @@ static TValue *api_call_base(lua_State *L, int nargs) LUA_API void lua_call(lua_State *L, int nargs, int nresults) { - api_check(L, L->status == LUA_OK || L->status == LUA_ERRERR); - api_checknelems(L, nargs+1); + lj_checkapi(L->status == LUA_OK || L->status == LUA_ERRERR, + "thread called in wrong state %d", L->status); + lj_checkapi_slot(nargs+1); lj_vm_call(L, api_call_base(L, nargs), nresults+1); } @@ -1119,13 +1128,13 @@ LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc) uint8_t oldh = hook_save(g); ptrdiff_t ef; int status; - api_check(L, L->status == LUA_OK || L->status == LUA_ERRERR); - api_checknelems(L, nargs+1); + lj_checkapi(L->status == LUA_OK || L->status == LUA_ERRERR, + "thread called in wrong state %d", L->status); + lj_checkapi_slot(nargs+1); if (errfunc == 0) { ef = 0; } else { - cTValue *o = stkindex2adr(L, errfunc); - api_checkvalidindex(L, o); + cTValue *o = index2adr_stack(L, errfunc); ef = savestack(L, o); } status = lj_vm_pcall(L, api_call_base(L, nargs), nresults+1, ef); @@ -1151,7 +1160,8 @@ LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud) global_State *g = G(L); uint8_t oldh = hook_save(g); int status; - api_check(L, L->status == LUA_OK || L->status == LUA_ERRERR); + lj_checkapi(L->status == LUA_OK || L->status == LUA_ERRERR, + "thread called in wrong state %d", L->status); status = lj_vm_cpcall(L, func, ud, cpcall); if (status) hook_restore(g, oldh); return status; diff --git a/src/lj_asm.c b/src/lj_asm.c index 90373f27f..2659c8a2d 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -96,6 +96,12 @@ typedef struct ASMState { uint16_t parentmap[LJ_MAX_JSLOTS]; /* Parent instruction to RegSP map. */ } ASMState; +#ifdef LUA_USE_ASSERT +#define lj_assertA(c, ...) lj_assertG_(J2G(as->J), (c), __VA_ARGS__) +#else +#define lj_assertA(c, ...) ((void)as) +#endif + #define IR(ref) (&as->ir[(ref)]) #define ASMREF_TMP1 REF_TRUE /* Temp. register. */ @@ -127,9 +133,8 @@ static LJ_AINLINE void checkmclim(ASMState *as) #ifdef LUA_USE_ASSERT if (as->mcp + MCLIM_REDZONE < as->mcp_prev) { IRIns *ir = IR(as->curins+1); - fprintf(stderr, "RED ZONE OVERFLOW: %p IR %04d %02d %04d %04d\n", as->mcp, - as->curins+1-REF_BIAS, ir->o, ir->op1-REF_BIAS, ir->op2-REF_BIAS); - lua_assert(0); + lj_assertA(0, "red zone overflow: %p IR %04d %02d %04d %04d\n", as->mcp, + as->curins+1-REF_BIAS, ir->o, ir->op1-REF_BIAS, ir->op2-REF_BIAS); } #endif if (LJ_UNLIKELY(as->mcp < as->mclim)) asm_mclimit(as); @@ -243,7 +248,7 @@ static void ra_dprintf(ASMState *as, const char *fmt, ...) *p++ = *q >= 'A' && *q <= 'Z' ? *q + 0x20 : *q; } else { *p++ = '?'; - lua_assert(0); + lj_assertA(0, "bad register %d for debug format \"%s\"", r, fmt); } } else if (e[1] == 'f' || e[1] == 'i') { IRRef ref; @@ -261,7 +266,7 @@ static void ra_dprintf(ASMState *as, const char *fmt, ...) } else if (e[1] == 'x') { p += sprintf(p, "%08x", va_arg(argp, int32_t)); } else { - lua_assert(0); + lj_assertA(0, "bad debug format code"); } fmt = e+2; } @@ -320,7 +325,7 @@ static Reg ra_rematk(ASMState *as, IRRef ref) Reg r; if (ra_iskref(ref)) { r = ra_krefreg(ref); - lua_assert(!rset_test(as->freeset, r)); + lj_assertA(!rset_test(as->freeset, r), "rematk of free reg %d", r); ra_free(as, r); ra_modified(as, r); #if LJ_64 @@ -332,7 +337,9 @@ static Reg ra_rematk(ASMState *as, IRRef ref) } ir = IR(ref); r = ir->r; - lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s)); + lj_assertA(ra_hasreg(r), "rematk of K%03d has no reg", REF_BIAS - ref); + lj_assertA(!ra_hasspill(ir->s), + "rematk of K%03d has spill slot [%x]", REF_BIAS - ref, ir->s); ra_free(as, r); ra_modified(as, r); ir->r = RID_INIT; /* Do not keep any hint. */ @@ -346,7 +353,8 @@ static Reg ra_rematk(ASMState *as, IRRef ref) ra_sethint(ir->r, RID_BASE); /* Restore BASE register hint. */ emit_getgl(as, r, jit_base); } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) { - lua_assert(irt_isnil(ir->t)); /* REF_NIL stores ASMREF_L register. */ + /* REF_NIL stores ASMREF_L register. */ + lj_assertA(irt_isnil(ir->t), "rematk of bad ASMREF_L"); emit_getgl(as, r, cur_L); #if LJ_64 } else if (ir->o == IR_KINT64) { @@ -359,8 +367,9 @@ static Reg ra_rematk(ASMState *as, IRRef ref) #endif #endif } else { - lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || - ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL); + lj_assertA(ir->o == IR_KINT || ir->o == IR_KGC || + ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL, + "rematk of bad IR op %d", ir->o); emit_loadi(as, r, ir->i); } return r; @@ -370,7 +379,8 @@ static Reg ra_rematk(ASMState *as, IRRef ref) static int32_t ra_spill(ASMState *as, IRIns *ir) { int32_t slot = ir->s; - lua_assert(ir >= as->ir + REF_TRUE); + lj_assertA(ir >= as->ir + REF_TRUE, + "spill of K%03d", REF_BIAS - (int)(ir - as->ir)); if (!ra_hasspill(slot)) { if (irt_is64(ir->t)) { slot = as->evenspill; @@ -395,7 +405,9 @@ static Reg ra_releasetmp(ASMState *as, IRRef ref) { IRIns *ir = IR(ref); Reg r = ir->r; - lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s)); + lj_assertA(ra_hasreg(r), "release of TMP%d has no reg", ref-ASMREF_TMP1+1); + lj_assertA(!ra_hasspill(ir->s), + "release of TMP%d has spill slot [%x]", ref-ASMREF_TMP1+1, ir->s); ra_free(as, r); ra_modified(as, r); ir->r = RID_INIT; @@ -411,7 +423,7 @@ static Reg ra_restore(ASMState *as, IRRef ref) IRIns *ir = IR(ref); int32_t ofs = ra_spill(as, ir); /* Force a spill slot. */ Reg r = ir->r; - lua_assert(ra_hasreg(r)); + lj_assertA(ra_hasreg(r), "restore of IR %04d has no reg", ref - REF_BIAS); ra_sethint(ir->r, r); /* Keep hint. */ ra_free(as, r); if (!rset_test(as->weakset, r)) { /* Only restore non-weak references. */ @@ -440,14 +452,15 @@ static Reg ra_evict(ASMState *as, RegSet allow) { IRRef ref; RegCost cost = ~(RegCost)0; - lua_assert(allow != RSET_EMPTY); + lj_assertA(allow != RSET_EMPTY, "evict from empty set"); if (RID_NUM_FPR == 0 || allow < RID2RSET(RID_MAX_GPR)) { GPRDEF(MINCOST) } else { FPRDEF(MINCOST) } ref = regcost_ref(cost); - lua_assert(ra_iskref(ref) || (ref >= as->T->nk && ref < as->T->nins)); + lj_assertA(ra_iskref(ref) || (ref >= as->T->nk && ref < as->T->nins), + "evict of out-of-range IR %04d", ref - REF_BIAS); /* Preferably pick any weak ref instead of a non-weak, non-const ref. */ if (!irref_isk(ref) && (as->weakset & allow)) { IRIns *ir = IR(ref); @@ -605,7 +618,8 @@ static Reg ra_allocref(ASMState *as, IRRef ref, RegSet allow) IRIns *ir = IR(ref); RegSet pick = as->freeset & allow; Reg r; - lua_assert(ra_noreg(ir->r)); + lj_assertA(ra_noreg(ir->r), + "IR %04d already has reg %d", ref - REF_BIAS, ir->r); if (pick) { /* First check register hint from propagation or PHI. */ if (ra_hashint(ir->r)) { @@ -669,8 +683,10 @@ static void ra_rename(ASMState *as, Reg down, Reg up) IRIns *ir = IR(ref); ir->r = (uint8_t)up; as->cost[down] = 0; - lua_assert((down < RID_MAX_GPR) == (up < RID_MAX_GPR)); - lua_assert(!rset_test(as->freeset, down) && rset_test(as->freeset, up)); + lj_assertA((down < RID_MAX_GPR) == (up < RID_MAX_GPR), + "rename between GPR/FPR %d and %d", down, up); + lj_assertA(!rset_test(as->freeset, down), "rename from free reg %d", down); + lj_assertA(rset_test(as->freeset, up), "rename to non-free reg %d", up); ra_free(as, down); /* 'down' is free ... */ ra_modified(as, down); rset_clear(as->freeset, up); /* ... and 'up' is now allocated. */ @@ -711,7 +727,7 @@ static void ra_destreg(ASMState *as, IRIns *ir, Reg r) { Reg dest = ra_dest(as, ir, RID2RSET(r)); if (dest != r) { - lua_assert(rset_test(as->freeset, r)); + lj_assertA(rset_test(as->freeset, r), "dest reg %d is not free", r); ra_modified(as, r); emit_movrr(as, ir, dest, r); } @@ -744,8 +760,9 @@ static void ra_left(ASMState *as, Reg dest, IRRef lref) #endif #endif } else if (ir->o != IR_KPRI) { - lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || - ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL); + lj_assertA(ir->o == IR_KINT || ir->o == IR_KGC || + ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL, + "K%03d has bad IR op %d", REF_BIAS - lref, ir->o); emit_loadi(as, dest, ir->i); return; } @@ -887,11 +904,14 @@ static void asm_snap_alloc1(ASMState *as, IRRef ref) #endif { /* Allocate stored values for TNEW, TDUP and CNEW. */ IRIns *irs; - lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW); + lj_assertA(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW, + "sink of IR %04d has bad op %d", ref - REF_BIAS, ir->o); for (irs = IR(as->snapref-1); irs > ir; irs--) if (irs->r == RID_SINK && asm_sunk_store(as, ir, irs)) { - lua_assert(irs->o == IR_ASTORE || irs->o == IR_HSTORE || - irs->o == IR_FSTORE || irs->o == IR_XSTORE); + lj_assertA(irs->o == IR_ASTORE || irs->o == IR_HSTORE || + irs->o == IR_FSTORE || irs->o == IR_XSTORE, + "sunk store IR %04d has bad op %d", + (int)(irs - as->ir) - REF_BIAS, irs->o); asm_snap_alloc1(as, irs->op2); if (LJ_32 && (irs+1)->o == IR_HIOP) asm_snap_alloc1(as, (irs+1)->op2); @@ -938,7 +958,9 @@ static void asm_snap_alloc(ASMState *as) if (!irref_isk(ref)) { asm_snap_alloc1(as, ref); if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) { - lua_assert(irt_type(IR(ref+1)->t) == IRT_SOFTFP); + lj_assertA(irt_type(IR(ref+1)->t) == IRT_SOFTFP, + "snap %d[%d] points to bad SOFTFP IR %04d", + as->snapno, n, ref - REF_BIAS); asm_snap_alloc1(as, ref+1); } } @@ -1002,19 +1024,20 @@ static int32_t asm_stack_adjust(ASMState *as) } /* Must match with hash*() in lj_tab.c. */ -static uint32_t ir_khash(IRIns *ir) +static uint32_t ir_khash(ASMState *as, IRIns *ir) { uint32_t lo, hi; + UNUSED(as); if (irt_isstr(ir->t)) { return ir_kstr(ir)->hash; } else if (irt_isnum(ir->t)) { lo = ir_knum(ir)->u32.lo; hi = ir_knum(ir)->u32.hi << 1; } else if (irt_ispri(ir->t)) { - lua_assert(!irt_isnil(ir->t)); + lj_assertA(!irt_isnil(ir->t), "hash of nil key"); return irt_type(ir->t)-IRT_FALSE; } else { - lua_assert(irt_isgcv(ir->t)); + lj_assertA(irt_isgcv(ir->t), "hash of bad IR type %d", irt_type(ir->t)); lo = u32ptr(ir_kgc(ir)); #if LJ_GC64 hi = (uint32_t)(u64ptr(ir_kgc(ir)) >> 32) | (irt_toitype(ir->t) << 15); @@ -1122,7 +1145,8 @@ static void asm_bufput(ASMState *as, IRIns *ir) args[0] = ir->op1; /* SBuf * */ args[1] = ir->op2; /* GCstr * */ irs = IR(ir->op2); - lua_assert(irt_isstr(irs->t)); + lj_assertA(irt_isstr(irs->t), + "BUFPUT of non-string IR %04d", ir->op2 - REF_BIAS); if (irs->o == IR_KGC) { GCstr *s = ir_kstr(irs); if (s->len == 1) { /* Optimize put of single-char string constant. */ @@ -1136,7 +1160,8 @@ static void asm_bufput(ASMState *as, IRIns *ir) args[1] = ASMREF_TMP1; /* TValue * */ ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putnum]; } else { - lua_assert(irt_isinteger(IR(irs->op1)->t)); + lj_assertA(irt_isinteger(IR(irs->op1)->t), + "TOSTR of non-numeric IR %04d", irs->op1); args[1] = irs->op1; /* int */ if (irs->op2 == IRTOSTR_INT) ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putint]; @@ -1201,7 +1226,8 @@ static void asm_conv64(ASMState *as, IRIns *ir) IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH); IRCallID id; IRRef args[2]; - lua_assert((ir-1)->o == IR_CONV && ir->o == IR_HIOP); + lj_assertA((ir-1)->o == IR_CONV && ir->o == IR_HIOP, + "not a CONV/HIOP pair at IR %04d", (int)(ir - as->ir) - REF_BIAS); args[LJ_BE] = (ir-1)->op1; args[LJ_LE] = ir->op1; if (st == IRT_NUM || st == IRT_FLOAT) { @@ -1256,15 +1282,16 @@ static void asm_collectargs(ASMState *as, IRIns *ir, const CCallInfo *ci, IRRef *args) { uint32_t n = CCI_XNARGS(ci); - lua_assert(n <= CCI_NARGS_MAX*2); /* Account for split args. */ + /* Account for split args. */ + lj_assertA(n <= CCI_NARGS_MAX*2, "too many args %d to collect", n); if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; } while (n-- > 1) { ir = IR(ir->op1); - lua_assert(ir->o == IR_CARG); + lj_assertA(ir->o == IR_CARG, "malformed CALL arg tree"); args[n] = ir->op2 == REF_NIL ? 0 : ir->op2; } args[0] = ir->op1 == REF_NIL ? 0 : ir->op1; - lua_assert(IR(ir->op1)->o != IR_CARG); + lj_assertA(IR(ir->op1)->o != IR_CARG, "malformed CALL arg tree"); } /* Reconstruct CCallInfo flags for CALLX*. */ @@ -1648,7 +1675,10 @@ static void asm_ir(ASMState *as, IRIns *ir) switch ((IROp)ir->o) { /* Miscellaneous ops. */ case IR_LOOP: asm_loop(as); break; - case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break; + case IR_NOP: case IR_XBAR: + lj_assertA(!ra_used(ir), + "IR %04d not unused", (int)(ir - as->ir) - REF_BIAS); + break; case IR_USE: ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break; case IR_PHI: asm_phi(as, ir); break; @@ -1687,7 +1717,9 @@ static void asm_ir(ASMState *as, IRIns *ir) #if LJ_SOFTFP32 case IR_DIV: case IR_POW: case IR_ABS: case IR_LDEXP: case IR_FPMATH: case IR_TOBIT: - lua_assert(0); /* Unused for LJ_SOFTFP32. */ + /* Unused for LJ_SOFTFP32. */ + lj_assertA(0, "IR %04d with unused op %d", + (int)(ir - as->ir) - REF_BIAS, ir->o); break; #else case IR_DIV: asm_div(as, ir); break; @@ -1736,7 +1768,8 @@ static void asm_ir(ASMState *as, IRIns *ir) #if LJ_HASFFI asm_cnew(as, ir); #else - lua_assert(0); + lj_assertA(0, "IR %04d with unused op %d", + (int)(ir - as->ir) - REF_BIAS, ir->o); #endif break; @@ -1814,8 +1847,10 @@ static void asm_head_side(ASMState *as) for (i = as->stopins; i > REF_BASE; i--) { IRIns *ir = IR(i); RegSP rs; - lua_assert((ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_PARENT)) || - (LJ_SOFTFP && ir->o == IR_HIOP) || ir->o == IR_PVAL); + lj_assertA((ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_PARENT)) || + (LJ_SOFTFP && ir->o == IR_HIOP) || ir->o == IR_PVAL, + "IR %04d has bad parent op %d", + (int)(ir - as->ir) - REF_BIAS, ir->o); rs = as->parentmap[i - REF_FIRST]; if (ra_hasreg(ir->r)) { rset_clear(allow, ir->r); @@ -2074,7 +2109,7 @@ static void asm_setup_regsp(ASMState *as) ir = IR(REF_FIRST); if (as->parent) { uint16_t *p; - lastir = lj_snap_regspmap(as->parent, as->J->exitno, ir); + lastir = lj_snap_regspmap(as->J, as->parent, as->J->exitno, ir); if (lastir - ir > LJ_MAX_JSLOTS) lj_trace_err(as->J, LJ_TRERR_NYICOAL); as->stopins = (IRRef)((lastir-1) - as->ir); @@ -2378,7 +2413,10 @@ void lj_asm_trace(jit_State *J, GCtrace *T) /* Assemble a trace in linear backwards order. */ for (as->curins--; as->curins > as->stopins; as->curins--) { IRIns *ir = IR(as->curins); - lua_assert(!(LJ_32 && irt_isint64(ir->t))); /* Handled by SPLIT. */ + /* 64 bit types handled by SPLIT for 32 bit archs. */ + lj_assertA(!(LJ_32 && irt_isint64(ir->t)), + "IR %04d has unsplit 64 bit type", + (int)(ir - as->ir) - REF_BIAS); if (!ra_used(ir) && !ir_sideeff(ir) && (as->flags & JIT_F_OPT_DCE)) continue; /* Dead-code elimination can be soooo easy. */ if (irt_isguard(ir->t)) @@ -2408,7 +2446,7 @@ void lj_asm_trace(jit_State *J, GCtrace *T) asm_phi_fixup(as); if (J->curfinal->nins >= T->nins) { /* IR didn't grow? */ - lua_assert(J->curfinal->nk == T->nk); + lj_assertA(J->curfinal->nk == T->nk, "unexpected IR constant growth"); memcpy(J->curfinal->ir + as->orignins, T->ir + as->orignins, (T->nins - as->orignins) * sizeof(IRIns)); /* Copy RENAMEs. */ T->nins = J->curfinal->nins; diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h index f922ed0f5..56ce4a079 100644 --- a/src/lj_asm_arm.h +++ b/src/lj_asm_arm.h @@ -41,7 +41,7 @@ static Reg ra_scratchpair(ASMState *as, RegSet allow) } } } - lua_assert(rset_test(RSET_GPREVEN, r)); + lj_assertA(rset_test(RSET_GPREVEN, r), "odd reg %d", r); ra_modified(as, r); ra_modified(as, r+1); RA_DBGX((as, "scratchpair $r $r", r, r+1)); @@ -269,7 +269,7 @@ static void asm_fusexref(ASMState *as, ARMIns ai, Reg rd, IRRef ref, return; } } else if (ir->o == IR_STRREF && !(!LJ_SOFTFP && (ai & 0x08000000))) { - lua_assert(ofs == 0); + lj_assertA(ofs == 0, "bad usage"); ofs = (int32_t)sizeof(GCstr); if (irref_isk(ir->op2)) { ofs += IR(ir->op2)->i; @@ -389,9 +389,11 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1)); if (irt_isnum(ir->t)) gpr = (gpr+1) & ~1u; if (gpr <= REGARG_LASTGPR) { - lua_assert(rset_test(as->freeset, gpr)); /* Must have been evicted. */ + lj_assertA(rset_test(as->freeset, gpr), + "reg %d not free", gpr); /* Must have been evicted. */ if (irt_isnum(ir->t)) { - lua_assert(rset_test(as->freeset, gpr+1)); /* Ditto. */ + lj_assertA(rset_test(as->freeset, gpr+1), + "reg %d not free", gpr+1); /* Ditto. */ emit_dnm(as, ARMI_VMOV_RR_D, gpr, gpr+1, (src & 15)); gpr += 2; } else { @@ -408,7 +410,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) #endif { if (gpr <= REGARG_LASTGPR) { - lua_assert(rset_test(as->freeset, gpr)); /* Must have been evicted. */ + lj_assertA(rset_test(as->freeset, gpr), + "reg %d not free", gpr); /* Must have been evicted. */ if (ref) ra_leftov(as, gpr, ref); gpr++; } else { @@ -433,7 +436,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ ra_evictset(as, drop); /* Evictions must be performed first. */ if (ra_used(ir)) { - lua_assert(!irt_ispri(ir->t)); + lj_assertA(!irt_ispri(ir->t), "PRI dest"); if (!LJ_SOFTFP && irt_isfp(ir->t)) { if (LJ_ABI_SOFTFP || (ci->flags & (CCI_CASTU64|CCI_VARARG))) { Reg dest = (ra_dest(as, ir, RSET_FPR) & 15); @@ -530,13 +533,17 @@ static void asm_conv(ASMState *as, IRIns *ir) #endif IRRef lref = ir->op1; /* 64 bit integer conversions are handled by SPLIT. */ - lua_assert(!irt_isint64(ir->t) && !(st == IRT_I64 || st == IRT_U64)); + lj_assertA(!irt_isint64(ir->t) && !(st == IRT_I64 || st == IRT_U64), + "IR %04d has unsplit 64 bit type", + (int)(ir - as->ir) - REF_BIAS); #if LJ_SOFTFP /* FP conversions are handled by SPLIT. */ - lua_assert(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT)); + lj_assertA(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT), + "IR %04d has FP type", + (int)(ir - as->ir) - REF_BIAS); /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */ #else - lua_assert(irt_type(ir->t) != st); + lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV"); if (irt_isfp(ir->t)) { Reg dest = ra_dest(as, ir, RSET_FPR); if (stfp) { /* FP to FP conversion. */ @@ -553,7 +560,8 @@ static void asm_conv(ASMState *as, IRIns *ir) } else if (stfp) { /* FP to integer conversion. */ if (irt_isguard(ir->t)) { /* Checked conversions are only supported from number to int. */ - lua_assert(irt_isint(ir->t) && st == IRT_NUM); + lj_assertA(irt_isint(ir->t) && st == IRT_NUM, + "bad type for checked CONV"); asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); } else { Reg left = ra_alloc1(as, lref, RSET_FPR); @@ -572,7 +580,7 @@ static void asm_conv(ASMState *as, IRIns *ir) Reg dest = ra_dest(as, ir, RSET_GPR); if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ Reg left = ra_alloc1(as, lref, RSET_GPR); - lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); + lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT"); if ((as->flags & JIT_F_ARMV6)) { ARMIns ai = st == IRT_I8 ? ARMI_SXTB : st == IRT_U8 ? ARMI_UXTB : @@ -667,7 +675,7 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) ra_allockreg(as, i32ptr(ir_knum(ir)), dest); } else { #if LJ_SOFTFP - lua_assert(0); + lj_assertA(0, "unsplit FP op"); #else /* Otherwise force a spill and use the spill slot. */ emit_opk(as, ARMI_ADD, dest, RID_SP, ra_spill(as, ir), RSET_GPR); @@ -811,7 +819,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) *l_loop = ARMF_CC(ARMI_B, CC_NE) | ((as->mcp-l_loop-2) & 0x00ffffffu); /* Load main position relative to tab->node into dest. */ - khash = irref_isk(refkey) ? ir_khash(irkey) : 1; + khash = irref_isk(refkey) ? ir_khash(as, irkey) : 1; if (khash == 0) { emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node)); } else { @@ -867,7 +875,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir) Reg node = ra_alloc1(as, ir->op1, RSET_GPR); Reg key = RID_NONE, type = RID_TMP, idx = node; RegSet allow = rset_exclude(RSET_GPR, node); - lua_assert(ofs % sizeof(Node) == 0); + lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot"); if (ofs > 4095) { idx = dest; rset_clear(allow, dest); @@ -934,7 +942,7 @@ static void asm_uref(ASMState *as, IRIns *ir) static void asm_fref(ASMState *as, IRIns *ir) { UNUSED(as); UNUSED(ir); - lua_assert(!ra_used(ir)); + lj_assertA(!ra_used(ir), "unfused FREF"); } static void asm_strref(ASMState *as, IRIns *ir) @@ -971,25 +979,27 @@ static void asm_strref(ASMState *as, IRIns *ir) /* -- Loads and stores ---------------------------------------------------- */ -static ARMIns asm_fxloadins(IRIns *ir) +static ARMIns asm_fxloadins(ASMState *as, IRIns *ir) { + UNUSED(as); switch (irt_type(ir->t)) { case IRT_I8: return ARMI_LDRSB; case IRT_U8: return ARMI_LDRB; case IRT_I16: return ARMI_LDRSH; case IRT_U16: return ARMI_LDRH; - case IRT_NUM: lua_assert(!LJ_SOFTFP); return ARMI_VLDR_D; + case IRT_NUM: lj_assertA(!LJ_SOFTFP, "unsplit FP op"); return ARMI_VLDR_D; case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VLDR_S; /* fallthrough */ default: return ARMI_LDR; } } -static ARMIns asm_fxstoreins(IRIns *ir) +static ARMIns asm_fxstoreins(ASMState *as, IRIns *ir) { + UNUSED(as); switch (irt_type(ir->t)) { case IRT_I8: case IRT_U8: return ARMI_STRB; case IRT_I16: case IRT_U16: return ARMI_STRH; - case IRT_NUM: lua_assert(!LJ_SOFTFP); return ARMI_VSTR_D; + case IRT_NUM: lj_assertA(!LJ_SOFTFP, "unsplit FP op"); return ARMI_VSTR_D; case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VSTR_S; /* fallthrough */ default: return ARMI_STR; } @@ -997,12 +1007,13 @@ static ARMIns asm_fxstoreins(IRIns *ir) static void asm_fload(ASMState *as, IRIns *ir) { - if (ir->op1 == REF_NIL) { - lua_assert(!ra_used(ir)); /* We can end up here if DCE is turned off. */ + if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */ + /* We can end up here if DCE is turned off. */ + lj_assertA(!ra_used(ir), "NYI FLOAD GG_State"); } else { Reg dest = ra_dest(as, ir, RSET_GPR); Reg idx = ra_alloc1(as, ir->op1, RSET_GPR); - ARMIns ai = asm_fxloadins(ir); + ARMIns ai = asm_fxloadins(as, ir); int32_t ofs; if (ir->op2 == IRFL_TAB_ARRAY) { ofs = asm_fuseabase(as, ir->op1); @@ -1026,7 +1037,7 @@ static void asm_fstore(ASMState *as, IRIns *ir) IRIns *irf = IR(ir->op1); Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); int32_t ofs = field_ofs[irf->op2]; - ARMIns ai = asm_fxstoreins(ir); + ARMIns ai = asm_fxstoreins(as, ir); if ((ai & 0x04000000)) emit_lso(as, ai, src, idx, ofs); else @@ -1038,8 +1049,8 @@ static void asm_xload(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); - lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); - asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); + lj_assertA(!(ir->op2 & IRXLOAD_UNALIGNED), "unaligned XLOAD"); + asm_fusexref(as, asm_fxloadins(as, ir), dest, ir->op1, RSET_GPR, 0); } static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) @@ -1047,7 +1058,7 @@ static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) if (ir->r != RID_SINK) { Reg src = ra_alloc1(as, ir->op2, (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); - asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, + asm_fusexref(as, asm_fxstoreins(as, ir), src, ir->op1, rset_exclude(RSET_GPR, src), ofs); } } @@ -1066,8 +1077,9 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) rset_clear(allow, type); } if (ra_used(ir)) { - lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || - irt_isint(ir->t) || irt_isaddr(ir->t)); + lj_assertA((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || + irt_isint(ir->t) || irt_isaddr(ir->t), + "bad load type %d", irt_type(ir->t)); dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow); rset_clear(allow, dest); } @@ -1133,10 +1145,13 @@ static void asm_sload(ASMState *as, IRIns *ir) IRType t = hiop ? IRT_NUM : irt_type(ir->t); Reg dest = RID_NONE, type = RID_NONE, base; RegSet allow = RSET_GPR; - lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ - lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK)); + lj_assertA(!(ir->op2 & IRSLOAD_PARENT), + "bad parent SLOAD"); /* Handled by asm_head_side(). */ + lj_assertA(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK), + "inconsistent SLOAD variant"); #if LJ_SOFTFP - lua_assert(!(ir->op2 & IRSLOAD_CONVERT)); /* Handled by LJ_SOFTFP SPLIT. */ + lj_assertA(!(ir->op2 & IRSLOAD_CONVERT), + "unsplit SLOAD convert"); /* Handled by LJ_SOFTFP SPLIT. */ if (hiop && ra_used(ir+1)) { type = ra_dest(as, ir+1, allow); rset_clear(allow, type); @@ -1152,8 +1167,9 @@ static void asm_sload(ASMState *as, IRIns *ir) Reg tmp = RID_NONE; if ((ir->op2 & IRSLOAD_CONVERT)) tmp = ra_scratch(as, t == IRT_INT ? RSET_FPR : RSET_GPR); - lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || - irt_isint(ir->t) || irt_isaddr(ir->t)); + lj_assertA((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || + irt_isint(ir->t) || irt_isaddr(ir->t), + "bad SLOAD type %d", irt_type(ir->t)); dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow); rset_clear(allow, dest); base = ra_alloc1(as, REF_BASE, allow); @@ -1218,7 +1234,8 @@ static void asm_cnew(ASMState *as, IRIns *ir) IRRef args[4]; RegSet allow = (RSET_GPR & ~RSET_SCRATCH); RegSet drop = RSET_SCRATCH; - lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL)); + lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL), + "bad CNEW/CNEWI operands"); as->gcsteps++; if (ra_hasreg(ir->r)) @@ -1230,10 +1247,10 @@ static void asm_cnew(ASMState *as, IRIns *ir) /* Initialize immutable cdata object. */ if (ir->o == IR_CNEWI) { int32_t ofs = sizeof(GCcdata); - lua_assert(sz == 4 || sz == 8); + lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz); if (sz == 8) { ofs += 4; ir++; - lua_assert(ir->o == IR_HIOP); + lj_assertA(ir->o == IR_HIOP, "expected HIOP for CNEWI"); } for (;;) { Reg r = ra_alloc1(as, ir->op2, allow); @@ -1299,7 +1316,7 @@ static void asm_obar(ASMState *as, IRIns *ir) MCLabel l_end; Reg obj, val, tmp; /* No need for other object barriers (yet). */ - lua_assert(IR(ir->op1)->o == IR_UREFC); + lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type"); ra_evictset(as, RSET_SCRATCH); l_end = emit_label(as); args[0] = ASMREF_TMP1; /* global_State *g */ @@ -1575,7 +1592,7 @@ static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh) #define asm_bshr(as, ir) asm_bitshift(as, ir, ARMSH_LSR) #define asm_bsar(as, ir) asm_bitshift(as, ir, ARMSH_ASR) #define asm_bror(as, ir) asm_bitshift(as, ir, ARMSH_ROR) -#define asm_brol(as, ir) lua_assert(0) +#define asm_brol(as, ir) lj_assertA(0, "unexpected BROL") static void asm_intmin_max(ASMState *as, IRIns *ir, int cc) { @@ -1726,7 +1743,8 @@ static void asm_intcomp(ASMState *as, IRIns *ir) Reg left; uint32_t m; int cmpprev0 = 0; - lua_assert(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t)); + lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t), + "bad comparison data type %d", irt_type(ir->t)); if (asm_swapops(as, lref, rref)) { Reg tmp = lref; lref = rref; rref = tmp; if (cc >= CC_GE) cc ^= 7; /* LT <-> GT, LE <-> GE */ @@ -1895,10 +1913,11 @@ static void asm_hiop(ASMState *as, IRIns *ir) case IR_CNEWI: /* Nothing to do here. Handled by lo op itself. */ break; - default: lua_assert(0); break; + default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break; } #else - UNUSED(as); UNUSED(ir); lua_assert(0); + /* Unused without SOFTFP or FFI. */ + UNUSED(as); UNUSED(ir); lj_assertA(0, "unexpected HIOP"); #endif } @@ -1923,7 +1942,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot, if (irp) { if (!ra_hasspill(irp->s)) { pbase = irp->r; - lua_assert(ra_hasreg(pbase)); + lj_assertA(ra_hasreg(pbase), "base reg lost"); } else if (allow) { pbase = rset_pickbot(allow); } else { @@ -1935,7 +1954,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot, } emit_branch(as, ARMF_CC(ARMI_BL, CC_LS), exitstub_addr(as->J, exitno)); k = emit_isk12(0, (int32_t)(8*topslot)); - lua_assert(k); + lj_assertA(k, "slot offset %d does not fit in K12", 8*topslot); emit_n(as, ARMI_CMP^k, RID_TMP); emit_dnm(as, ARMI_SUB, RID_TMP, RID_TMP, pbase); emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, @@ -1972,7 +1991,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) #if LJ_SOFTFP RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE); Reg tmp; - lua_assert(irref_isk(ref)); /* LJ_SOFTFP: must be a number constant. */ + /* LJ_SOFTFP: must be a number constant. */ + lj_assertA(irref_isk(ref), "unsplit FP op"); tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, rset_exclude(RSET_GPREVEN, RID_BASE)); emit_lso(as, ARMI_STR, tmp, RID_BASE, ofs); @@ -1986,7 +2006,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) } else { RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE); Reg type; - lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); + lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t), + "restore of IR type %d", irt_type(ir->t)); if (!irt_ispri(ir->t)) { Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPREVEN, RID_BASE)); emit_lso(as, ARMI_STR, src, RID_BASE, ofs); @@ -2006,7 +2027,7 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) } checkmclim(as); } - lua_assert(map + nent == flinks); + lj_assertA(map + nent == flinks, "inconsistent frames in snapshot"); } /* -- GC handling --------------------------------------------------------- */ @@ -2092,7 +2113,7 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow) rset_clear(allow, ra_dest(as, ir, allow)); } else { Reg r = irp->r; - lua_assert(ra_hasreg(r)); + lj_assertA(ra_hasreg(r), "base reg lost"); rset_clear(allow, r); if (r != ir->r && !rset_test(as->freeset, r)) ra_restore(as, regcost_ref(as->cost[r])); @@ -2114,7 +2135,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk) } else { /* Patch stack adjustment. */ uint32_t k = emit_isk12(ARMI_ADD, spadj); - lua_assert(k); + lj_assertA(k, "stack adjustment %d does not fit in K12", spadj); p[-2] = (ARMI_ADD^k) | ARMF_D(RID_SP) | ARMF_N(RID_SP); } /* Patch exit branch. */ @@ -2196,7 +2217,7 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) if (!cstart) cstart = p; } } - lua_assert(cstart != NULL); + lj_assertJ(cstart != NULL, "exit stub %d not found", exitno); lj_mcode_sync(cstart, cend); lj_mcode_patch(J, mcarea, 1); } diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h index a35022239..0729a3a5f 100644 --- a/src/lj_asm_arm64.h +++ b/src/lj_asm_arm64.h @@ -213,7 +213,7 @@ static uint32_t asm_fuseopm(ASMState *as, A64Ins ai, IRRef ref, RegSet allow) return A64F_M(ir->r); } else if (irref_isk(ref)) { uint32_t m; - int64_t k = get_k64val(ir); + int64_t k = get_k64val(as, ref); if ((ai & 0x1f000000) == 0x0a000000) m = emit_isk13(k, irt_is64(ir->t)); else @@ -354,9 +354,9 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, A64Ins ai, A64Ins air) static int asm_fuseandshift(ASMState *as, IRIns *ir) { IRIns *irl = IR(ir->op1); - lua_assert(ir->o == IR_BAND); + lj_assertA(ir->o == IR_BAND, "bad usage"); if (canfuse(as, irl) && irref_isk(ir->op2)) { - uint64_t mask = get_k64val(IR(ir->op2)); + uint64_t mask = get_k64val(as, ir->op2); if (irref_isk(irl->op2) && (irl->o == IR_BSHR || irl->o == IR_BSHL)) { int32_t shmask = irt_is64(irl->t) ? 63 : 31; int32_t shift = (IR(irl->op2)->i & shmask); @@ -384,7 +384,7 @@ static int asm_fuseandshift(ASMState *as, IRIns *ir) static int asm_fuseorshift(ASMState *as, IRIns *ir) { IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); - lua_assert(ir->o == IR_BOR); + lj_assertA(ir->o == IR_BOR, "bad usage"); if (canfuse(as, irl) && canfuse(as, irr) && ((irl->o == IR_BSHR && irr->o == IR_BSHL) || (irl->o == IR_BSHL && irr->o == IR_BSHR))) { @@ -428,7 +428,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) if (ref) { if (irt_isfp(ir->t)) { if (fpr <= REGARG_LASTFPR) { - lua_assert(rset_test(as->freeset, fpr)); /* Must have been evicted. */ + lj_assertA(rset_test(as->freeset, fpr), + "reg %d not free", fpr); /* Must have been evicted. */ ra_leftov(as, fpr, ref); fpr++; } else { @@ -438,7 +439,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) } } else { if (gpr <= REGARG_LASTGPR) { - lua_assert(rset_test(as->freeset, gpr)); /* Must have been evicted. */ + lj_assertA(rset_test(as->freeset, gpr), + "reg %d not free", gpr); /* Must have been evicted. */ ra_leftov(as, gpr, ref); gpr++; } else { @@ -459,7 +461,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) rset_clear(drop, ir->r); /* Dest reg handled below. */ ra_evictset(as, drop); /* Evictions must be performed first. */ if (ra_used(ir)) { - lua_assert(!irt_ispri(ir->t)); + lj_assertA(!irt_ispri(ir->t), "PRI dest"); if (irt_isfp(ir->t)) { if (ci->flags & CCI_CASTU64) { Reg dest = ra_dest(as, ir, RSET_FPR) & 31; @@ -546,7 +548,7 @@ static void asm_conv(ASMState *as, IRIns *ir) int st64 = (st == IRT_I64 || st == IRT_U64 || st == IRT_P64); int stfp = (st == IRT_NUM || st == IRT_FLOAT); IRRef lref = ir->op1; - lua_assert(irt_type(ir->t) != st); + lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV"); if (irt_isfp(ir->t)) { Reg dest = ra_dest(as, ir, RSET_FPR); if (stfp) { /* FP to FP conversion. */ @@ -566,7 +568,8 @@ static void asm_conv(ASMState *as, IRIns *ir) } else if (stfp) { /* FP to integer conversion. */ if (irt_isguard(ir->t)) { /* Checked conversions are only supported from number to int. */ - lua_assert(irt_isint(ir->t) && st == IRT_NUM); + lj_assertA(irt_isint(ir->t) && st == IRT_NUM, + "bad type for checked CONV"); asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); } else { Reg left = ra_alloc1(as, lref, RSET_FPR); @@ -586,7 +589,7 @@ static void asm_conv(ASMState *as, IRIns *ir) A64Ins ai = st == IRT_I8 ? A64I_SXTBw : st == IRT_U8 ? A64I_UXTBw : st == IRT_I16 ? A64I_SXTHw : A64I_UXTHw; - lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); + lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT"); emit_dn(as, ai, dest, left); } else { Reg dest = ra_dest(as, ir, RSET_GPR); @@ -650,7 +653,8 @@ static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref) { RegSet allow = rset_exclude(RSET_GPR, base); IRIns *ir = IR(ref); - lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); + lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t), + "store of IR type %d", irt_type(ir->t)); if (irref_isk(ref)) { TValue k; lj_ir_kvalue(as->J->L, &k, ir); @@ -770,7 +774,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) } rset_clear(allow, scr); } else { - lua_assert(irt_ispri(kt) && !irt_isnil(kt)); + lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type"); type = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow); scr = ra_scratch(as, rset_clear(allow, type)); rset_clear(allow, scr); @@ -831,7 +835,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) rset_clear(allow, type); } /* Load main position relative to tab->node into dest. */ - khash = isk ? ir_khash(irkey) : 1; + khash = isk ? ir_khash(as, irkey) : 1; if (khash == 0) { emit_lso(as, A64I_LDRx, dest, tab, offsetof(GCtab, node)); } else { @@ -886,7 +890,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir) Reg key, idx = node; RegSet allow = rset_exclude(RSET_GPR, node); uint64_t k; - lua_assert(ofs % sizeof(Node) == 0); + lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot"); if (bigofs) { idx = dest; rset_clear(allow, dest); @@ -936,7 +940,7 @@ static void asm_uref(ASMState *as, IRIns *ir) static void asm_fref(ASMState *as, IRIns *ir) { UNUSED(as); UNUSED(ir); - lua_assert(!ra_used(ir)); + lj_assertA(!ra_used(ir), "unfused FREF"); } static void asm_strref(ASMState *as, IRIns *ir) @@ -988,7 +992,7 @@ static void asm_fload(ASMState *as, IRIns *ir) Reg idx; A64Ins ai = asm_fxloadins(ir); int32_t ofs; - if (ir->op1 == REF_NIL) { + if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */ idx = RID_GL; ofs = (ir->op2 << 2) - GG_OFS(g); } else { @@ -1019,7 +1023,7 @@ static void asm_fstore(ASMState *as, IRIns *ir) static void asm_xload(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); - lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); + lj_assertA(!(ir->op2 & IRXLOAD_UNALIGNED), "unaligned XLOAD"); asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR); } @@ -1037,8 +1041,9 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) Reg idx, tmp, type; int32_t ofs = 0; RegSet gpr = RSET_GPR, allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; - lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) || - irt_isint(ir->t)); + lj_assertA(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) || + irt_isint(ir->t), + "bad load type %d", irt_type(ir->t)); if (ra_used(ir)) { Reg dest = ra_dest(as, ir, allow); tmp = irt_isnum(ir->t) ? ra_scratch(as, rset_clear(gpr, dest)) : dest; @@ -1057,7 +1062,8 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) /* Always do the type check, even if the load result is unused. */ asm_guardcc(as, irt_isnum(ir->t) ? CC_LS : CC_NE); if (irt_type(ir->t) >= IRT_NUM) { - lua_assert(irt_isinteger(ir->t) || irt_isnum(ir->t)); + lj_assertA(irt_isinteger(ir->t) || irt_isnum(ir->t), + "bad load type %d", irt_type(ir->t)); emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), ra_allock(as, LJ_TISNUM << 15, rset_exclude(gpr, idx)), tmp); } else if (irt_isaddr(ir->t)) { @@ -1122,8 +1128,10 @@ static void asm_sload(ASMState *as, IRIns *ir) IRType1 t = ir->t; Reg dest = RID_NONE, base; RegSet allow = RSET_GPR; - lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ - lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK)); + lj_assertA(!(ir->op2 & IRSLOAD_PARENT), + "bad parent SLOAD"); /* Handled by asm_head_side(). */ + lj_assertA(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK), + "inconsistent SLOAD variant"); if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { dest = ra_scratch(as, RSET_FPR); asm_tointg(as, ir, dest); @@ -1132,7 +1140,8 @@ static void asm_sload(ASMState *as, IRIns *ir) Reg tmp = RID_NONE; if ((ir->op2 & IRSLOAD_CONVERT)) tmp = ra_scratch(as, irt_isint(t) ? RSET_FPR : RSET_GPR); - lua_assert((irt_isnum(t)) || irt_isint(t) || irt_isaddr(t)); + lj_assertA((irt_isnum(t)) || irt_isint(t) || irt_isaddr(t), + "bad SLOAD type %d", irt_type(t)); dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : allow); base = ra_alloc1(as, REF_BASE, rset_clear(allow, dest)); if (irt_isaddr(t)) { @@ -1172,7 +1181,8 @@ static void asm_sload(ASMState *as, IRIns *ir) /* Need type check, even if the load result is unused. */ asm_guardcc(as, irt_isnum(t) ? CC_LS : CC_NE); if (irt_type(t) >= IRT_NUM) { - lua_assert(irt_isinteger(t) || irt_isnum(t)); + lj_assertA(irt_isinteger(t) || irt_isnum(t), + "bad SLOAD type %d", irt_type(t)); emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), ra_allock(as, LJ_TISNUM << 15, allow), tmp); } else if (irt_isnil(t)) { @@ -1207,7 +1217,8 @@ static void asm_cnew(ASMState *as, IRIns *ir) const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; IRRef args[4]; RegSet allow = (RSET_GPR & ~RSET_SCRATCH); - lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL)); + lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL), + "bad CNEW/CNEWI operands"); as->gcsteps++; asm_setupresult(as, ir, ci); /* GCcdata * */ @@ -1215,7 +1226,7 @@ static void asm_cnew(ASMState *as, IRIns *ir) if (ir->o == IR_CNEWI) { int32_t ofs = sizeof(GCcdata); Reg r = ra_alloc1(as, ir->op2, allow); - lua_assert(sz == 4 || sz == 8); + lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz); emit_lso(as, sz == 8 ? A64I_STRx : A64I_STRw, r, RID_RET, ofs); } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; @@ -1274,7 +1285,7 @@ static void asm_obar(ASMState *as, IRIns *ir) RegSet allow = RSET_GPR; Reg obj, val, tmp; /* No need for other object barriers (yet). */ - lua_assert(IR(ir->op1)->o == IR_UREFC); + lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type"); ra_evictset(as, RSET_SCRATCH); l_end = emit_label(as); args[0] = ASMREF_TMP1; /* global_State *g */ @@ -1544,7 +1555,7 @@ static void asm_bitshift(ASMState *as, IRIns *ir, A64Ins ai, A64Shift sh) #define asm_bshr(as, ir) asm_bitshift(as, ir, A64I_UBFMw, A64SH_LSR) #define asm_bsar(as, ir) asm_bitshift(as, ir, A64I_SBFMw, A64SH_ASR) #define asm_bror(as, ir) asm_bitshift(as, ir, A64I_EXTRw, A64SH_ROR) -#define asm_brol(as, ir) lua_assert(0) +#define asm_brol(as, ir) lj_assertA(0, "unexpected BROL") static void asm_intmin_max(ASMState *as, IRIns *ir, A64CC cc) { @@ -1625,15 +1636,16 @@ static void asm_intcomp(ASMState *as, IRIns *ir) Reg left; uint32_t m; int cmpprev0 = 0; - lua_assert(irt_is64(ir->t) || irt_isint(ir->t) || - irt_isu32(ir->t) || irt_isaddr(ir->t) || irt_isu8(ir->t)); + lj_assertA(irt_is64(ir->t) || irt_isint(ir->t) || + irt_isu32(ir->t) || irt_isaddr(ir->t) || irt_isu8(ir->t), + "bad comparison data type %d", irt_type(ir->t)); if (asm_swapops(as, lref, rref)) { IRRef tmp = lref; lref = rref; rref = tmp; if (cc >= CC_GE) cc ^= 7; /* LT <-> GT, LE <-> GE */ else if (cc > CC_NE) cc ^= 11; /* LO <-> HI, LS <-> HS */ } oldcc = cc; - if (irref_isk(rref) && get_k64val(IR(rref)) == 0) { + if (irref_isk(rref) && get_k64val(as, rref) == 0) { IRIns *irl = IR(lref); if (cc == CC_GE) cc = CC_PL; else if (cc == CC_LT) cc = CC_MI; @@ -1648,7 +1660,7 @@ static void asm_intcomp(ASMState *as, IRIns *ir) Reg tmp = blref; blref = brref; brref = tmp; } if (irref_isk(brref)) { - uint64_t k = get_k64val(IR(brref)); + uint64_t k = get_k64val(as, brref); if (k && !(k & (k-1)) && (cc == CC_EQ || cc == CC_NE)) { asm_guardtnb(as, cc == CC_EQ ? A64I_TBZ : A64I_TBNZ, ra_alloc1(as, blref, RSET_GPR), emit_ctz64(k)); @@ -1697,7 +1709,8 @@ static void asm_comp(ASMState *as, IRIns *ir) /* Hiword op of a split 64 bit op. Previous op must be the loword op. */ static void asm_hiop(ASMState *as, IRIns *ir) { - UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused on 64 bit. */ + UNUSED(as); UNUSED(ir); + lj_assertA(0, "unexpected HIOP"); /* Unused on 64 bit. */ } /* -- Profiling ----------------------------------------------------------- */ @@ -1705,7 +1718,7 @@ static void asm_hiop(ASMState *as, IRIns *ir) static void asm_prof(ASMState *as, IRIns *ir) { uint32_t k = emit_isk13(HOOK_PROFILE, 0); - lua_assert(k != 0); + lj_assertA(k != 0, "HOOK_PROFILE does not fit in K13"); UNUSED(ir); asm_guardcc(as, CC_NE); emit_n(as, A64I_TSTw^k, RID_TMP); @@ -1723,7 +1736,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot, if (irp) { if (!ra_hasspill(irp->s)) { pbase = irp->r; - lua_assert(ra_hasreg(pbase)); + lj_assertA(ra_hasreg(pbase), "base reg lost"); } else if (allow) { pbase = rset_pickbot(allow); } else { @@ -1735,7 +1748,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot, } emit_cond_branch(as, CC_LS, asm_exitstub_addr(as, exitno)); k = emit_isk12((8*topslot)); - lua_assert(k); + lj_assertA(k, "slot offset %d does not fit in K12", 8*topslot); emit_n(as, A64I_CMPx^k, RID_TMP); emit_dnm(as, A64I_SUBx, RID_TMP, RID_TMP, pbase); emit_lso(as, A64I_LDRx, RID_TMP, RID_TMP, @@ -1776,7 +1789,7 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) } checkmclim(as); } - lua_assert(map + nent == flinks); + lj_assertA(map + nent == flinks, "inconsistent frames in snapshot"); } /* -- GC handling --------------------------------------------------------- */ @@ -1864,7 +1877,7 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow) rset_clear(allow, ra_dest(as, ir, allow)); } else { Reg r = irp->r; - lua_assert(ra_hasreg(r)); + lj_assertA(ra_hasreg(r), "base reg lost"); rset_clear(allow, r); if (r != ir->r && !rset_test(as->freeset, r)) ra_restore(as, regcost_ref(as->cost[r])); @@ -1888,7 +1901,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk) } else { /* Patch stack adjustment. */ uint32_t k = emit_isk12(spadj); - lua_assert(k); + lj_assertA(k, "stack adjustment %d does not fit in K12", spadj); p[-2] = (A64I_ADDx^k) | A64F_D(RID_SP) | A64F_N(RID_SP); } /* Patch exit branch. */ @@ -1974,7 +1987,7 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) } else if ((ins & 0xfc000000u) == 0x14000000u && ((ins ^ (px-p)) & 0x03ffffffu) == 0) { /* Patch b. */ - lua_assert(A64F_S_OK(delta, 26)); + lj_assertJ(A64F_S_OK(delta, 26), "branch target out of range"); *p = A64I_LE((ins & 0xfc000000u) | A64F_S26(delta)); if (!cstart) cstart = p; } else if ((ins & 0x7e000000u) == 0x34000000u && @@ -1995,7 +2008,7 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) } { /* Always patch long-range branch in exit stub itself. */ ptrdiff_t delta = target - px; - lua_assert(A64F_S_OK(delta, 26)); + lj_assertJ(A64F_S_OK(delta, 26), "branch target out of range"); *px = A64I_B | A64F_S26(delta); if (!cstart) cstart = px; } diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h index 6d898c5f5..a2b8d8e05 100644 --- a/src/lj_asm_mips.h +++ b/src/lj_asm_mips.h @@ -23,7 +23,7 @@ static Reg ra_alloc1z(ASMState *as, IRRef ref, RegSet allow) { Reg r = IR(ref)->r; if (ra_noreg(r)) { - if (!(allow & RSET_FPR) && irref_isk(ref) && get_kval(IR(ref)) == 0) + if (!(allow & RSET_FPR) && irref_isk(ref) && get_kval(as, ref) == 0) return RID_ZERO; r = ra_allocref(as, ref, allow); } else { @@ -66,10 +66,10 @@ static void asm_sparejump_setup(ASMState *as) { MCode *mxp = as->mcbot; if (((uintptr_t)mxp & (LJ_PAGESIZE-1)) == sizeof(MCLink)) { - lua_assert(MIPSI_NOP == 0); + lj_assertA(MIPSI_NOP == 0, "bad NOP"); memset(mxp, 0, MIPS_SPAREJUMP*2*sizeof(MCode)); mxp += MIPS_SPAREJUMP*2; - lua_assert(mxp < as->mctop); + lj_assertA(mxp < as->mctop, "MIPS_SPAREJUMP too big"); lj_mcode_sync(as->mcbot, mxp); lj_mcode_commitbot(as->J, mxp); as->mcbot = mxp; @@ -84,7 +84,8 @@ static void asm_exitstub_setup(ASMState *as) /* sw TMP, 0(sp); j ->vm_exit_handler; li TMP, traceno */ *--mxp = MIPSI_LI|MIPSF_T(RID_TMP)|as->T->traceno; *--mxp = MIPSI_J|((((uintptr_t)(void *)lj_vm_exit_handler)>>2)&0x03ffffffu); - lua_assert(((uintptr_t)mxp ^ (uintptr_t)(void *)lj_vm_exit_handler)>>28 == 0); + lj_assertA(((uintptr_t)mxp ^ (uintptr_t)(void *)lj_vm_exit_handler)>>28 == 0, + "branch target out of range"); *--mxp = MIPSI_SW|MIPSF_T(RID_TMP)|MIPSF_S(RID_SP)|0; as->mctop = mxp; } @@ -195,20 +196,20 @@ static void asm_fusexref(ASMState *as, MIPSIns mi, Reg rt, IRRef ref, if (ra_noreg(ir->r) && canfuse(as, ir)) { if (ir->o == IR_ADD) { intptr_t ofs2; - if (irref_isk(ir->op2) && (ofs2 = ofs + get_kval(IR(ir->op2)), + if (irref_isk(ir->op2) && (ofs2 = ofs + get_kval(as, ir->op2), checki16(ofs2))) { ref = ir->op1; ofs = (int32_t)ofs2; } } else if (ir->o == IR_STRREF) { intptr_t ofs2 = 65536; - lua_assert(ofs == 0); + lj_assertA(ofs == 0, "bad usage"); ofs = (int32_t)sizeof(GCstr); if (irref_isk(ir->op2)) { - ofs2 = ofs + get_kval(IR(ir->op2)); + ofs2 = ofs + get_kval(as, ir->op2); ref = ir->op1; } else if (irref_isk(ir->op1)) { - ofs2 = ofs + get_kval(IR(ir->op1)); + ofs2 = ofs + get_kval(as, ir->op1); ref = ir->op2; } if (!checki16(ofs2)) { @@ -252,7 +253,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) #if !LJ_SOFTFP if (irt_isfp(ir->t) && fpr <= REGARG_LASTFPR && !(ci->flags & CCI_VARARG)) { - lua_assert(rset_test(as->freeset, fpr)); /* Already evicted. */ + lj_assertA(rset_test(as->freeset, fpr), + "reg %d not free", fpr); /* Already evicted. */ ra_leftov(as, fpr, ref); fpr += LJ_32 ? 2 : 1; gpr += (LJ_32 && irt_isnum(ir->t)) ? 2 : 1; @@ -264,7 +266,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) #endif if (LJ_32 && irt_isnum(ir->t)) gpr = (gpr+1) & ~1; if (gpr <= REGARG_LASTGPR) { - lua_assert(rset_test(as->freeset, gpr)); /* Already evicted. */ + lj_assertA(rset_test(as->freeset, gpr), + "reg %d not free", gpr); /* Already evicted. */ #if !LJ_SOFTFP if (irt_isfp(ir->t)) { RegSet of = as->freeset; @@ -277,7 +280,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) #if LJ_32 emit_tg(as, MIPSI_MFC1, gpr+(LJ_BE?0:1), r+1); emit_tg(as, MIPSI_MFC1, gpr+(LJ_BE?1:0), r); - lua_assert(rset_test(as->freeset, gpr+1)); /* Already evicted. */ + lj_assertA(rset_test(as->freeset, gpr+1), + "reg %d not free", gpr+1); /* Already evicted. */ gpr += 2; #else emit_tg(as, MIPSI_DMFC1, gpr, r); @@ -347,7 +351,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) #endif ra_evictset(as, drop); /* Evictions must be performed first. */ if (ra_used(ir)) { - lua_assert(!irt_ispri(ir->t)); + lj_assertA(!irt_ispri(ir->t), "PRI dest"); if (!LJ_SOFTFP && irt_isfp(ir->t)) { if ((ci->flags & CCI_CASTU64)) { int32_t ofs = sps_scale(ir->s); @@ -395,7 +399,7 @@ static void asm_callx(ASMState *as, IRIns *ir) func = ir->op2; irf = IR(func); if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } if (irref_isk(func)) { /* Call to constant address. */ - ci.func = (ASMFunction)(void *)get_kval(irf); + ci.func = (ASMFunction)(void *)get_kval(as, func); } else { /* Need specific register for indirect calls. */ Reg r = ra_alloc1(as, func, RID2RSET(RID_CFUNCADDR)); MCode *p = as->mcp; @@ -512,15 +516,19 @@ static void asm_conv(ASMState *as, IRIns *ir) #endif IRRef lref = ir->op1; #if LJ_32 - lua_assert(!(irt_isint64(ir->t) || - (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */ + /* 64 bit integer conversions are handled by SPLIT. */ + lj_assertA(!(irt_isint64(ir->t) || (st == IRT_I64 || st == IRT_U64)), + "IR %04d has unsplit 64 bit type", + (int)(ir - as->ir) - REF_BIAS); #endif #if LJ_SOFTFP32 /* FP conversions are handled by SPLIT. */ - lua_assert(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT)); + lj_assertA(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT), + "IR %04d has FP type", + (int)(ir - as->ir) - REF_BIAS); /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */ #else - lua_assert(irt_type(ir->t) != st); + lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV"); #if !LJ_SOFTFP if (irt_isfp(ir->t)) { Reg dest = ra_dest(as, ir, RSET_FPR); @@ -579,7 +587,8 @@ static void asm_conv(ASMState *as, IRIns *ir) } else if (stfp) { /* FP to integer conversion. */ if (irt_isguard(ir->t)) { /* Checked conversions are only supported from number to int. */ - lua_assert(irt_isint(ir->t) && st == IRT_NUM); + lj_assertA(irt_isint(ir->t) && st == IRT_NUM, + "bad type for checked CONV"); asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); } else { Reg dest = ra_dest(as, ir, RSET_GPR); @@ -679,7 +688,8 @@ static void asm_conv(ASMState *as, IRIns *ir) } else if (stfp) { /* FP to integer conversion. */ if (irt_isguard(ir->t)) { /* Checked conversions are only supported from number to int. */ - lua_assert(irt_isint(ir->t) && st == IRT_NUM); + lj_assertA(irt_isint(ir->t) && st == IRT_NUM, + "bad type for checked CONV"); asm_tointg(as, ir, RID_NONE); } else { IRCallID cid = irt_is64(ir->t) ? @@ -698,7 +708,7 @@ static void asm_conv(ASMState *as, IRIns *ir) Reg dest = ra_dest(as, ir, RSET_GPR); if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ Reg left = ra_alloc1(as, ir->op1, RSET_GPR); - lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); + lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT"); if ((ir->op2 & IRCONV_SEXT)) { if (LJ_64 || (as->flags & JIT_F_MIPSXXR2)) { emit_dst(as, st == IRT_I8 ? MIPSI_SEB : MIPSI_SEH, dest, 0, left); @@ -795,7 +805,8 @@ static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref) { RegSet allow = rset_exclude(RSET_GPR, base); IRIns *ir = IR(ref); - lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); + lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t), + "store of IR type %d", irt_type(ir->t)); if (irref_isk(ref)) { TValue k; lj_ir_kvalue(as->J->L, &k, ir); @@ -944,7 +955,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) if (isk && irt_isaddr(kt)) { k = ((int64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64; } else { - lua_assert(irt_ispri(kt) && !irt_isnil(kt)); + lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type"); k = ~((int64_t)~irt_toitype(ir->t) << 47); } cmp64 = ra_allock(as, k, allow); @@ -1012,7 +1023,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) #endif /* Load main position relative to tab->node into dest. */ - khash = isk ? ir_khash(irkey) : 1; + khash = isk ? ir_khash(as, irkey) : 1; if (khash == 0) { emit_tsi(as, MIPSI_AL, dest, tab, (int32_t)offsetof(GCtab, node)); } else { @@ -1020,7 +1031,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) if (isk) tmphash = ra_allock(as, khash, allow); emit_dst(as, MIPSI_AADDU, dest, dest, tmp1); - lua_assert(sizeof(Node) == 24); + lj_assertA(sizeof(Node) == 24, "bad Node size"); emit_dst(as, MIPSI_SUBU, tmp1, tmp2, tmp1); emit_dta(as, MIPSI_SLL, tmp1, tmp1, 3); emit_dta(as, MIPSI_SLL, tmp2, tmp1, 5); @@ -1098,7 +1109,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir) Reg key = ra_scratch(as, allow); int64_t k; #endif - lua_assert(ofs % sizeof(Node) == 0); + lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot"); if (ofs > 32736) { idx = dest; rset_clear(allow, dest); @@ -1127,7 +1138,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir) emit_tsi(as, MIPSI_LW, type, idx, kofs+(LJ_BE?0:4)); #else if (irt_ispri(irkey->t)) { - lua_assert(!irt_isnil(irkey->t)); + lj_assertA(!irt_isnil(irkey->t), "bad HREFK key type"); k = ~((int64_t)~irt_toitype(irkey->t) << 47); } else if (irt_isnum(irkey->t)) { k = (int64_t)ir_knum(irkey)->u64; @@ -1166,7 +1177,7 @@ static void asm_uref(ASMState *as, IRIns *ir) static void asm_fref(ASMState *as, IRIns *ir) { UNUSED(as); UNUSED(ir); - lua_assert(!ra_used(ir)); + lj_assertA(!ra_used(ir), "unfused FREF"); } static void asm_strref(ASMState *as, IRIns *ir) @@ -1221,14 +1232,17 @@ static void asm_strref(ASMState *as, IRIns *ir) /* -- Loads and stores ---------------------------------------------------- */ -static MIPSIns asm_fxloadins(IRIns *ir) +static MIPSIns asm_fxloadins(ASMState *as, IRIns *ir) { + UNUSED(as); switch (irt_type(ir->t)) { case IRT_I8: return MIPSI_LB; case IRT_U8: return MIPSI_LBU; case IRT_I16: return MIPSI_LH; case IRT_U16: return MIPSI_LHU; - case IRT_NUM: lua_assert(!LJ_SOFTFP32); if (!LJ_SOFTFP) return MIPSI_LDC1; + case IRT_NUM: + lj_assertA(!LJ_SOFTFP32, "unsplit FP op"); + if (!LJ_SOFTFP) return MIPSI_LDC1; /* fallthrough */ case IRT_FLOAT: if (!LJ_SOFTFP) return MIPSI_LWC1; /* fallthrough */ @@ -1236,12 +1250,15 @@ static MIPSIns asm_fxloadins(IRIns *ir) } } -static MIPSIns asm_fxstoreins(IRIns *ir) +static MIPSIns asm_fxstoreins(ASMState *as, IRIns *ir) { + UNUSED(as); switch (irt_type(ir->t)) { case IRT_I8: case IRT_U8: return MIPSI_SB; case IRT_I16: case IRT_U16: return MIPSI_SH; - case IRT_NUM: lua_assert(!LJ_SOFTFP32); if (!LJ_SOFTFP) return MIPSI_SDC1; + case IRT_NUM: + lj_assertA(!LJ_SOFTFP32, "unsplit FP op"); + if (!LJ_SOFTFP) return MIPSI_SDC1; /* fallthrough */ case IRT_FLOAT: if (!LJ_SOFTFP) return MIPSI_SWC1; /* fallthrough */ @@ -1252,10 +1269,10 @@ static MIPSIns asm_fxstoreins(IRIns *ir) static void asm_fload(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, RSET_GPR); - MIPSIns mi = asm_fxloadins(ir); + MIPSIns mi = asm_fxloadins(as, ir); Reg idx; int32_t ofs; - if (ir->op1 == REF_NIL) { + if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */ idx = RID_JGL; ofs = (ir->op2 << 2) - 32768 - GG_OFS(g); } else { @@ -1269,7 +1286,7 @@ static void asm_fload(ASMState *as, IRIns *ir) } ofs = field_ofs[ir->op2]; } - lua_assert(!irt_isfp(ir->t)); + lj_assertA(!irt_isfp(ir->t), "bad FP FLOAD"); emit_tsi(as, mi, dest, idx, ofs); } @@ -1280,8 +1297,8 @@ static void asm_fstore(ASMState *as, IRIns *ir) IRIns *irf = IR(ir->op1); Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); int32_t ofs = field_ofs[irf->op2]; - MIPSIns mi = asm_fxstoreins(ir); - lua_assert(!irt_isfp(ir->t)); + MIPSIns mi = asm_fxstoreins(as, ir); + lj_assertA(!irt_isfp(ir->t), "bad FP FSTORE"); emit_tsi(as, mi, src, idx, ofs); } } @@ -1290,8 +1307,9 @@ static void asm_xload(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); - lua_assert(LJ_TARGET_UNALIGNED || !(ir->op2 & IRXLOAD_UNALIGNED)); - asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); + lj_assertA(LJ_TARGET_UNALIGNED || !(ir->op2 & IRXLOAD_UNALIGNED), + "unaligned XLOAD"); + asm_fusexref(as, asm_fxloadins(as, ir), dest, ir->op1, RSET_GPR, 0); } static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) @@ -1299,7 +1317,7 @@ static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) if (ir->r != RID_SINK) { Reg src = ra_alloc1z(as, ir->op2, (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); - asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, + asm_fusexref(as, asm_fxstoreins(as, ir), src, ir->op1, rset_exclude(RSET_GPR, src), ofs); } } @@ -1321,8 +1339,9 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) } } if (ra_used(ir)) { - lua_assert((LJ_SOFTFP32 ? 0 : irt_isnum(ir->t)) || - irt_isint(ir->t) || irt_isaddr(ir->t)); + lj_assertA((LJ_SOFTFP32 ? 0 : irt_isnum(ir->t)) || + irt_isint(ir->t) || irt_isaddr(ir->t), + "bad load type %d", irt_type(ir->t)); dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow); rset_clear(allow, dest); #if LJ_64 @@ -1427,10 +1446,13 @@ static void asm_sload(ASMState *as, IRIns *ir) #else int32_t ofs = 8*((int32_t)ir->op1-2); #endif - lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ - lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK)); + lj_assertA(!(ir->op2 & IRSLOAD_PARENT), + "bad parent SLOAD"); /* Handled by asm_head_side(). */ + lj_assertA(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK), + "inconsistent SLOAD variant"); #if LJ_SOFTFP32 - lua_assert(!(ir->op2 & IRSLOAD_CONVERT)); /* Handled by LJ_SOFTFP SPLIT. */ + lj_assertA(!(ir->op2 & IRSLOAD_CONVERT), + "unsplit SLOAD convert"); /* Handled by LJ_SOFTFP SPLIT. */ if (hiop && ra_used(ir+1)) { type = ra_dest(as, ir+1, allow); rset_clear(allow, type); @@ -1443,8 +1465,9 @@ static void asm_sload(ASMState *as, IRIns *ir) } else #endif if (ra_used(ir)) { - lua_assert((LJ_SOFTFP32 ? 0 : irt_isnum(ir->t)) || - irt_isint(ir->t) || irt_isaddr(ir->t)); + lj_assertA((LJ_SOFTFP32 ? 0 : irt_isnum(ir->t)) || + irt_isint(ir->t) || irt_isaddr(ir->t), + "bad SLOAD type %d", irt_type(ir->t)); dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow); rset_clear(allow, dest); base = ra_alloc1(as, REF_BASE, allow); @@ -1554,7 +1577,8 @@ static void asm_cnew(ASMState *as, IRIns *ir) const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; IRRef args[4]; RegSet drop = RSET_SCRATCH; - lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL)); + lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL), + "bad CNEW/CNEWI operands"); as->gcsteps++; if (ra_hasreg(ir->r)) @@ -1570,7 +1594,7 @@ static void asm_cnew(ASMState *as, IRIns *ir) int32_t ofs = sizeof(GCcdata); if (sz == 8) { ofs += 4; - lua_assert((ir+1)->o == IR_HIOP); + lj_assertA((ir+1)->o == IR_HIOP, "expected HIOP for CNEWI"); if (LJ_LE) ir++; } for (;;) { @@ -1584,7 +1608,7 @@ static void asm_cnew(ASMState *as, IRIns *ir) emit_tsi(as, sz == 8 ? MIPSI_SD : MIPSI_SW, ra_alloc1(as, ir->op2, allow), RID_RET, sizeof(GCcdata)); #endif - lua_assert(sz == 4 || sz == 8); + lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz); } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; args[0] = ASMREF_L; /* lua_State *L */ @@ -1634,7 +1658,7 @@ static void asm_obar(ASMState *as, IRIns *ir) MCLabel l_end; Reg obj, val, tmp; /* No need for other object barriers (yet). */ - lua_assert(IR(ir->op1)->o == IR_UREFC); + lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type"); ra_evictset(as, RSET_SCRATCH); l_end = emit_label(as); args[0] = ASMREF_TMP1; /* global_State *g */ @@ -1709,7 +1733,7 @@ static void asm_add(ASMState *as, IRIns *ir) Reg dest = ra_dest(as, ir, RSET_GPR); Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); if (irref_isk(ir->op2)) { - intptr_t k = get_kval(IR(ir->op2)); + intptr_t k = get_kval(as, ir->op2); if (checki16(k)) { emit_tsi(as, (LJ_64 && irt_is64(t)) ? MIPSI_DADDIU : MIPSI_ADDIU, dest, left, k); @@ -1810,7 +1834,7 @@ static void asm_arithov(ASMState *as, IRIns *ir) { /* TODO MIPSR6: bovc/bnvc. Caveat: no delay slot to load RID_TMP. */ Reg right, left, tmp, dest = ra_dest(as, ir, RSET_GPR); - lua_assert(!irt_is64(ir->t)); + lj_assertA(!irt_is64(ir->t), "bad usage"); if (irref_isk(ir->op2)) { int k = IR(ir->op2)->i; if (ir->o == IR_SUBOV) k = -k; @@ -1997,7 +2021,7 @@ static void asm_bitop(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik) Reg dest = ra_dest(as, ir, RSET_GPR); Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); if (irref_isk(ir->op2)) { - intptr_t k = get_kval(IR(ir->op2)); + intptr_t k = get_kval(as, ir->op2); if (checku16(k)) { emit_tsi(as, mik, dest, left, k); return; @@ -2030,7 +2054,7 @@ static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik) #define asm_bshl(as, ir) asm_bitshift(as, ir, MIPSI_SLLV, MIPSI_SLL) #define asm_bshr(as, ir) asm_bitshift(as, ir, MIPSI_SRLV, MIPSI_SRL) #define asm_bsar(as, ir) asm_bitshift(as, ir, MIPSI_SRAV, MIPSI_SRA) -#define asm_brol(as, ir) lua_assert(0) +#define asm_brol(as, ir) lj_assertA(0, "unexpected BROL") static void asm_bror(ASMState *as, IRIns *ir) { @@ -2222,13 +2246,13 @@ static void asm_comp(ASMState *as, IRIns *ir) } else { Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR); if (op == IR_ABC) op = IR_UGT; - if ((op&4) == 0 && irref_isk(ir->op2) && get_kval(IR(ir->op2)) == 0) { + if ((op&4) == 0 && irref_isk(ir->op2) && get_kval(as, ir->op2) == 0) { MIPSIns mi = (op&2) ? ((op&1) ? MIPSI_BLEZ : MIPSI_BGTZ) : ((op&1) ? MIPSI_BLTZ : MIPSI_BGEZ); asm_guard(as, mi, left, 0); } else { if (irref_isk(ir->op2)) { - intptr_t k = get_kval(IR(ir->op2)); + intptr_t k = get_kval(as, ir->op2); if ((op&2)) k++; if (checki16(k)) { asm_guard(as, (op&1) ? MIPSI_BNE : MIPSI_BEQ, RID_TMP, RID_ZERO); @@ -2384,10 +2408,11 @@ static void asm_hiop(ASMState *as, IRIns *ir) case IR_CNEWI: /* Nothing to do here. Handled by lo op itself. */ break; - default: lua_assert(0); break; + default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break; } #else - UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused without FFI. */ + /* Unused on MIPS64 or without SOFTFP or FFI. */ + UNUSED(as); UNUSED(ir); lj_assertA(0, "unexpected HIOP"); #endif } @@ -2456,7 +2481,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) #if LJ_SOFTFP32 Reg tmp; RegSet allow = rset_exclude(RSET_GPR, RID_BASE); - lua_assert(irref_isk(ref)); /* LJ_SOFTFP: must be a number constant. */ + /* LJ_SOFTFP: must be a number constant. */ + lj_assertA(irref_isk(ref), "unsplit FP op"); tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, allow); emit_tsi(as, MIPSI_SW, tmp, RID_BASE, ofs+(LJ_BE?4:0)); if (rset_test(as->freeset, tmp+1)) allow = RID2RSET(tmp+1); @@ -2473,7 +2499,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) #if LJ_32 RegSet allow = rset_exclude(RSET_GPR, RID_BASE); Reg type; - lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); + lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t), + "restore of IR type %d", irt_type(ir->t)); if (!irt_ispri(ir->t)) { Reg src = ra_alloc1(as, ref, allow); rset_clear(allow, src); @@ -2496,7 +2523,7 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) } checkmclim(as); } - lua_assert(map + nent == flinks); + lj_assertA(map + nent == flinks, "inconsistent frames in snapshot"); } /* -- GC handling --------------------------------------------------------- */ @@ -2694,7 +2721,7 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) } } else if (p+1 == pe) { /* Patch NOP after code for inverted loop branch. Use of J is ok. */ - lua_assert(p[1] == MIPSI_NOP); + lj_assertJ(p[1] == MIPSI_NOP, "expected NOP"); p[1] = tjump; *p = MIPSI_NOP; /* Replace the load of the exit number. */ cstop = p+2; diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h index c15b89fe7..498fdac3d 100644 --- a/src/lj_asm_ppc.h +++ b/src/lj_asm_ppc.h @@ -181,7 +181,7 @@ static void asm_fusexref(ASMState *as, PPCIns pi, Reg rt, IRRef ref, return; } } else if (ir->o == IR_STRREF) { - lua_assert(ofs == 0); + lj_assertA(ofs == 0, "bad usage"); ofs = (int32_t)sizeof(GCstr); if (irref_isk(ir->op2)) { ofs += IR(ir->op2)->i; @@ -268,7 +268,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) #if !LJ_SOFTFP if (irt_isfp(ir->t)) { if (fpr <= REGARG_LASTFPR) { - lua_assert(rset_test(as->freeset, fpr)); /* Already evicted. */ + lj_assertA(rset_test(as->freeset, fpr), + "reg %d not free", fpr); /* Already evicted. */ ra_leftov(as, fpr, ref); fpr++; } else { @@ -281,7 +282,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) #endif { if (gpr <= REGARG_LASTGPR) { - lua_assert(rset_test(as->freeset, gpr)); /* Already evicted. */ + lj_assertA(rset_test(as->freeset, gpr), + "reg %d not free", gpr); /* Already evicted. */ ra_leftov(as, gpr, ref); gpr++; } else { @@ -319,7 +321,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ ra_evictset(as, drop); /* Evictions must be performed first. */ if (ra_used(ir)) { - lua_assert(!irt_ispri(ir->t)); + lj_assertA(!irt_ispri(ir->t), "PRI dest"); if (!LJ_SOFTFP && irt_isfp(ir->t)) { if ((ci->flags & CCI_CASTU64)) { /* Use spill slot or temp slots. */ @@ -431,14 +433,18 @@ static void asm_conv(ASMState *as, IRIns *ir) int stfp = (st == IRT_NUM || st == IRT_FLOAT); #endif IRRef lref = ir->op1; - lua_assert(!(irt_isint64(ir->t) || - (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */ + /* 64 bit integer conversions are handled by SPLIT. */ + lj_assertA(!(irt_isint64(ir->t) || (st == IRT_I64 || st == IRT_U64)), + "IR %04d has unsplit 64 bit type", + (int)(ir - as->ir) - REF_BIAS); #if LJ_SOFTFP /* FP conversions are handled by SPLIT. */ - lua_assert(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT)); + lj_assertA(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT), + "IR %04d has FP type", + (int)(ir - as->ir) - REF_BIAS); /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */ #else - lua_assert(irt_type(ir->t) != st); + lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV"); if (irt_isfp(ir->t)) { Reg dest = ra_dest(as, ir, RSET_FPR); if (stfp) { /* FP to FP conversion. */ @@ -467,7 +473,8 @@ static void asm_conv(ASMState *as, IRIns *ir) } else if (stfp) { /* FP to integer conversion. */ if (irt_isguard(ir->t)) { /* Checked conversions are only supported from number to int. */ - lua_assert(irt_isint(ir->t) && st == IRT_NUM); + lj_assertA(irt_isint(ir->t) && st == IRT_NUM, + "bad type for checked CONV"); asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); } else { Reg dest = ra_dest(as, ir, RSET_GPR); @@ -503,7 +510,7 @@ static void asm_conv(ASMState *as, IRIns *ir) Reg dest = ra_dest(as, ir, RSET_GPR); if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ Reg left = ra_alloc1(as, ir->op1, RSET_GPR); - lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); + lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT"); if ((ir->op2 & IRCONV_SEXT)) emit_as(as, st == IRT_I8 ? PPCI_EXTSB : PPCI_EXTSH, dest, left); else @@ -699,7 +706,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) (((char *)as->mcp-(char *)l_loop) & 0xffffu); /* Load main position relative to tab->node into dest. */ - khash = isk ? ir_khash(irkey) : 1; + khash = isk ? ir_khash(as, irkey) : 1; if (khash == 0) { emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node)); } else { @@ -754,7 +761,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir) Reg node = ra_alloc1(as, ir->op1, RSET_GPR); Reg key = RID_NONE, type = RID_TMP, idx = node; RegSet allow = rset_exclude(RSET_GPR, node); - lua_assert(ofs % sizeof(Node) == 0); + lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot"); if (ofs > 32736) { idx = dest; rset_clear(allow, dest); @@ -813,7 +820,7 @@ static void asm_uref(ASMState *as, IRIns *ir) static void asm_fref(ASMState *as, IRIns *ir) { UNUSED(as); UNUSED(ir); - lua_assert(!ra_used(ir)); + lj_assertA(!ra_used(ir), "unfused FREF"); } static void asm_strref(ASMState *as, IRIns *ir) @@ -853,25 +860,27 @@ static void asm_strref(ASMState *as, IRIns *ir) /* -- Loads and stores ---------------------------------------------------- */ -static PPCIns asm_fxloadins(IRIns *ir) +static PPCIns asm_fxloadins(ASMState *as, IRIns *ir) { + UNUSED(as); switch (irt_type(ir->t)) { case IRT_I8: return PPCI_LBZ; /* Needs sign-extension. */ case IRT_U8: return PPCI_LBZ; case IRT_I16: return PPCI_LHA; case IRT_U16: return PPCI_LHZ; - case IRT_NUM: lua_assert(!LJ_SOFTFP); return PPCI_LFD; + case IRT_NUM: lj_assertA(!LJ_SOFTFP, "unsplit FP op"); return PPCI_LFD; case IRT_FLOAT: if (!LJ_SOFTFP) return PPCI_LFS; default: return PPCI_LWZ; } } -static PPCIns asm_fxstoreins(IRIns *ir) +static PPCIns asm_fxstoreins(ASMState *as, IRIns *ir) { + UNUSED(as); switch (irt_type(ir->t)) { case IRT_I8: case IRT_U8: return PPCI_STB; case IRT_I16: case IRT_U16: return PPCI_STH; - case IRT_NUM: lua_assert(!LJ_SOFTFP); return PPCI_STFD; + case IRT_NUM: lj_assertA(!LJ_SOFTFP, "unsplit FP op"); return PPCI_STFD; case IRT_FLOAT: if (!LJ_SOFTFP) return PPCI_STFS; default: return PPCI_STW; } @@ -880,10 +889,10 @@ static PPCIns asm_fxstoreins(IRIns *ir) static void asm_fload(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, RSET_GPR); - PPCIns pi = asm_fxloadins(ir); + PPCIns pi = asm_fxloadins(as, ir); Reg idx; int32_t ofs; - if (ir->op1 == REF_NIL) { + if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */ idx = RID_JGL; ofs = (ir->op2 << 2) - 32768; } else { @@ -897,7 +906,7 @@ static void asm_fload(ASMState *as, IRIns *ir) } ofs = field_ofs[ir->op2]; } - lua_assert(!irt_isi8(ir->t)); + lj_assertA(!irt_isi8(ir->t), "unsupported FLOAD I8"); emit_tai(as, pi, dest, idx, ofs); } @@ -908,7 +917,7 @@ static void asm_fstore(ASMState *as, IRIns *ir) IRIns *irf = IR(ir->op1); Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); int32_t ofs = field_ofs[irf->op2]; - PPCIns pi = asm_fxstoreins(ir); + PPCIns pi = asm_fxstoreins(as, ir); emit_tai(as, pi, src, idx, ofs); } } @@ -917,10 +926,10 @@ static void asm_xload(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); - lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); + lj_assertA(!(ir->op2 & IRXLOAD_UNALIGNED), "unaligned XLOAD"); if (irt_isi8(ir->t)) emit_as(as, PPCI_EXTSB, dest, dest); - asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); + asm_fusexref(as, asm_fxloadins(as, ir), dest, ir->op1, RSET_GPR, 0); } static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) @@ -936,7 +945,7 @@ static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) } else { Reg src = ra_alloc1(as, ir->op2, (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); - asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, + asm_fusexref(as, asm_fxstoreins(as, ir), src, ir->op1, rset_exclude(RSET_GPR, src), ofs); } } @@ -958,8 +967,9 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) ofs = 0; } if (ra_used(ir)) { - lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || - irt_isint(ir->t) || irt_isaddr(ir->t)); + lj_assertA((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || + irt_isint(ir->t) || irt_isaddr(ir->t), + "bad load type %d", irt_type(ir->t)); if (LJ_SOFTFP || !irt_isnum(t)) ofs = 0; dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow); rset_clear(allow, dest); @@ -1042,12 +1052,16 @@ static void asm_sload(ASMState *as, IRIns *ir) int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP); if (hiop) t.irt = IRT_NUM; - lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ - lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK)); - lua_assert(LJ_DUALNUM || - !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME))); + lj_assertA(!(ir->op2 & IRSLOAD_PARENT), + "bad parent SLOAD"); /* Handled by asm_head_side(). */ + lj_assertA(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK), + "inconsistent SLOAD variant"); + lj_assertA(LJ_DUALNUM || + !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME)), + "bad SLOAD type"); #if LJ_SOFTFP - lua_assert(!(ir->op2 & IRSLOAD_CONVERT)); /* Handled by LJ_SOFTFP SPLIT. */ + lj_assertA(!(ir->op2 & IRSLOAD_CONVERT), + "unsplit SLOAD convert"); /* Handled by LJ_SOFTFP SPLIT. */ if (hiop && ra_used(ir+1)) { type = ra_dest(as, ir+1, allow); rset_clear(allow, type); @@ -1060,7 +1074,8 @@ static void asm_sload(ASMState *as, IRIns *ir) } else #endif if (ra_used(ir)) { - lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); + lj_assertA(irt_isnum(t) || irt_isint(t) || irt_isaddr(t), + "bad SLOAD type %d", irt_type(ir->t)); dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow); rset_clear(allow, dest); base = ra_alloc1(as, REF_BASE, allow); @@ -1127,7 +1142,8 @@ static void asm_cnew(ASMState *as, IRIns *ir) const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; IRRef args[4]; RegSet drop = RSET_SCRATCH; - lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL)); + lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL), + "bad CNEW/CNEWI operands"); as->gcsteps++; if (ra_hasreg(ir->r)) @@ -1140,10 +1156,10 @@ static void asm_cnew(ASMState *as, IRIns *ir) if (ir->o == IR_CNEWI) { RegSet allow = (RSET_GPR & ~RSET_SCRATCH); int32_t ofs = sizeof(GCcdata); - lua_assert(sz == 4 || sz == 8); + lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz); if (sz == 8) { ofs += 4; - lua_assert((ir+1)->o == IR_HIOP); + lj_assertA((ir+1)->o == IR_HIOP, "expected HIOP for CNEWI"); } for (;;) { Reg r = ra_alloc1(as, ir->op2, allow); @@ -1187,7 +1203,7 @@ static void asm_tbar(ASMState *as, IRIns *ir) emit_tai(as, PPCI_STW, link, tab, (int32_t)offsetof(GCtab, gclist)); emit_tai(as, PPCI_STB, mark, tab, (int32_t)offsetof(GCtab, marked)); emit_setgl(as, tab, gc.grayagain); - lua_assert(LJ_GC_BLACK == 0x04); + lj_assertA(LJ_GC_BLACK == 0x04, "bad LJ_GC_BLACK"); emit_rot(as, PPCI_RLWINM, mark, mark, 0, 30, 28); /* Clear black bit. */ emit_getgl(as, link, gc.grayagain); emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end); @@ -1202,7 +1218,7 @@ static void asm_obar(ASMState *as, IRIns *ir) MCLabel l_end; Reg obj, val, tmp; /* No need for other object barriers (yet). */ - lua_assert(IR(ir->op1)->o == IR_UREFC); + lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type"); ra_evictset(as, RSET_SCRATCH); l_end = emit_label(as); args[0] = ASMREF_TMP1; /* global_State *g */ @@ -1673,7 +1689,7 @@ static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) #define asm_brol(as, ir) \ asm_bitshift(as, ir, PPCI_RLWNM|PPCF_MB(0)|PPCF_ME(31), \ PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31)) -#define asm_bror(as, ir) lua_assert(0) +#define asm_bror(as, ir) lj_assertA(0, "unexpected BROR") #if LJ_SOFTFP static void asm_sfpmin_max(ASMState *as, IRIns *ir) @@ -1947,10 +1963,11 @@ static void asm_hiop(ASMState *as, IRIns *ir) case IR_CNEWI: /* Nothing to do here. Handled by lo op itself. */ break; - default: lua_assert(0); break; + default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break; } #else - UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused without FFI. */ + /* Unused without SOFTFP or FFI. */ + UNUSED(as); UNUSED(ir); lj_assertA(0, "unexpected HIOP"); #endif } @@ -2010,7 +2027,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) #if LJ_SOFTFP Reg tmp; RegSet allow = rset_exclude(RSET_GPR, RID_BASE); - lua_assert(irref_isk(ref)); /* LJ_SOFTFP: must be a number constant. */ + /* LJ_SOFTFP: must be a number constant. */ + lj_assertA(irref_isk(ref), "unsplit FP op"); tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, allow); emit_tai(as, PPCI_STW, tmp, RID_BASE, ofs+(LJ_BE?4:0)); if (rset_test(as->freeset, tmp+1)) allow = RID2RSET(tmp+1); @@ -2023,7 +2041,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) } else { Reg type; RegSet allow = rset_exclude(RSET_GPR, RID_BASE); - lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); + lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t), + "restore of IR type %d", irt_type(ir->t)); if (!irt_ispri(ir->t)) { Reg src = ra_alloc1(as, ref, allow); rset_clear(allow, src); @@ -2043,7 +2062,7 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) } checkmclim(as); } - lua_assert(map + nent == flinks); + lj_assertA(map + nent == flinks, "inconsistent frames in snapshot"); } /* -- GC handling --------------------------------------------------------- */ @@ -2141,7 +2160,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk) as->mctop = p; } else { /* Patch stack adjustment. */ - lua_assert(checki16(CFRAME_SIZE+spadj)); + lj_assertA(checki16(CFRAME_SIZE+spadj), "stack adjustment out of range"); p[-3] = PPCI_ADDI | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | (CFRAME_SIZE+spadj); p[-2] = PPCI_STWU | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | spadj; } @@ -2218,14 +2237,16 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) } else if ((ins & 0xfc000000u) == PPCI_B && ((ins ^ ((char *)px-(char *)p)) & 0x03ffffffu) == 0) { ptrdiff_t delta = (char *)target - (char *)p; - lua_assert(((delta + 0x02000000) >> 26) == 0); + lj_assertJ(((delta + 0x02000000) >> 26) == 0, + "branch target out of range"); *p = PPCI_B | ((uint32_t)delta & 0x03ffffffu); if (!cstart) cstart = p; } } { /* Always patch long-range branch in exit stub itself. */ ptrdiff_t delta = (char *)target - (char *)px - clearso; - lua_assert(((delta + 0x02000000) >> 26) == 0); + lj_assertJ(((delta + 0x02000000) >> 26) == 0, + "branch target out of range"); *px = PPCI_B | ((uint32_t)delta & 0x03ffffffu); } if (!cstart) cstart = px; diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index 7356a5f04..a3adee14d 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h @@ -31,7 +31,7 @@ static MCode *asm_exitstub_gen(ASMState *as, ExitNo group) #endif /* Jump to exit handler which fills in the ExitState. */ *mxp++ = XI_JMP; mxp += 4; - *((int32_t *)(mxp-4)) = jmprel(mxp, (MCode *)(void *)lj_vm_exit_handler); + *((int32_t *)(mxp-4)) = jmprel(as->J, mxp, (MCode *)(void *)lj_vm_exit_handler); /* Commit the code for this group (even if assembly fails later on). */ lj_mcode_commitbot(as->J, mxp); as->mcbot = mxp; @@ -60,7 +60,7 @@ static void asm_guardcc(ASMState *as, int cc) MCode *p = as->mcp; if (LJ_UNLIKELY(p == as->invmcp)) { as->loopinv = 1; - *(int32_t *)(p+1) = jmprel(p+5, target); + *(int32_t *)(p+1) = jmprel(as->J, p+5, target); target = p; cc ^= 1; if (as->realign) { @@ -131,7 +131,7 @@ static IRRef asm_fuseabase(ASMState *as, IRRef ref) as->mrm.ofs = 0; if (irb->o == IR_FLOAD) { IRIns *ira = IR(irb->op1); - lua_assert(irb->op2 == IRFL_TAB_ARRAY); + lj_assertA(irb->op2 == IRFL_TAB_ARRAY, "expected FLOAD TAB_ARRAY"); /* We can avoid the FLOAD of t->array for colocated arrays. */ if (ira->o == IR_TNEW && ira->op1 <= LJ_MAX_COLOSIZE && !neverfuse(as) && noconflict(as, irb->op1, IR_NEWREF, 1)) { @@ -150,7 +150,7 @@ static IRRef asm_fuseabase(ASMState *as, IRRef ref) static void asm_fusearef(ASMState *as, IRIns *ir, RegSet allow) { IRIns *irx; - lua_assert(ir->o == IR_AREF); + lj_assertA(ir->o == IR_AREF, "expected AREF"); as->mrm.base = (uint8_t)ra_alloc1(as, asm_fuseabase(as, ir->op1), allow); irx = IR(ir->op2); if (irref_isk(ir->op2)) { @@ -217,8 +217,9 @@ static void asm_fuseahuref(ASMState *as, IRRef ref, RegSet allow) } break; default: - lua_assert(ir->o == IR_HREF || ir->o == IR_NEWREF || ir->o == IR_UREFO || - ir->o == IR_KKPTR); + lj_assertA(ir->o == IR_HREF || ir->o == IR_NEWREF || ir->o == IR_UREFO || + ir->o == IR_KKPTR, + "bad IR op %d", ir->o); break; } } @@ -230,9 +231,10 @@ static void asm_fuseahuref(ASMState *as, IRRef ref, RegSet allow) /* Fuse FLOAD/FREF reference into memory operand. */ static void asm_fusefref(ASMState *as, IRIns *ir, RegSet allow) { - lua_assert(ir->o == IR_FLOAD || ir->o == IR_FREF); + lj_assertA(ir->o == IR_FLOAD || ir->o == IR_FREF, + "bad IR op %d", ir->o); as->mrm.idx = RID_NONE; - if (ir->op1 == REF_NIL) { + if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */ #if LJ_GC64 as->mrm.ofs = (int32_t)(ir->op2 << 2) - GG_OFS(dispatch); as->mrm.base = RID_DISPATCH; @@ -271,7 +273,7 @@ static void asm_fusefref(ASMState *as, IRIns *ir, RegSet allow) static void asm_fusestrref(ASMState *as, IRIns *ir, RegSet allow) { IRIns *irr; - lua_assert(ir->o == IR_STRREF); + lj_assertA(ir->o == IR_STRREF, "bad IR op %d", ir->o); as->mrm.base = as->mrm.idx = RID_NONE; as->mrm.scale = XM_SCALE1; as->mrm.ofs = sizeof(GCstr); @@ -378,9 +380,10 @@ static Reg asm_fuseloadk64(ASMState *as, IRIns *ir) checki32(mctopofs(as, k)) && checki32(mctopofs(as, k+1))) { as->mrm.ofs = (int32_t)mcpofs(as, k); as->mrm.base = RID_RIP; - } else { + } else { /* Intern 64 bit constant at bottom of mcode. */ if (ir->i) { - lua_assert(*k == *(uint64_t*)(as->mctop - ir->i)); + lj_assertA(*k == *(uint64_t*)(as->mctop - ir->i), + "bad interned 64 bit constant"); } else { while ((uintptr_t)as->mcbot & 7) *as->mcbot++ = XI_INT3; *(uint64_t*)as->mcbot = *k; @@ -420,12 +423,12 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) } if (ir->o == IR_KNUM) { RegSet avail = as->freeset & ~as->modset & RSET_FPR; - lua_assert(allow != RSET_EMPTY); + lj_assertA(allow != RSET_EMPTY, "no register allowed"); if (!(avail & (avail-1))) /* Fuse if less than two regs available. */ return asm_fuseloadk64(as, ir); } else if (ref == REF_BASE || ir->o == IR_KINT64) { RegSet avail = as->freeset & ~as->modset & RSET_GPR; - lua_assert(allow != RSET_EMPTY); + lj_assertA(allow != RSET_EMPTY, "no register allowed"); if (!(avail & (avail-1))) { /* Fuse if less than two regs available. */ if (ref == REF_BASE) { #if LJ_GC64 @@ -606,7 +609,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) #endif emit_loadi(as, r, ir->i); } else { - lua_assert(rset_test(as->freeset, r)); /* Must have been evicted. */ + /* Must have been evicted. */ + lj_assertA(rset_test(as->freeset, r), "reg %d not free", r); if (ra_hasreg(ir->r)) { ra_noweak(as, ir->r); emit_movrr(as, ir, r, ir->r); @@ -615,7 +619,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) } } } else if (irt_isfp(ir->t)) { /* FP argument is on stack. */ - lua_assert(!(irt_isfloat(ir->t) && irref_isk(ref))); /* No float k. */ + lj_assertA(!(irt_isfloat(ir->t) && irref_isk(ref)), + "unexpected float constant"); if (LJ_32 && (ofs & 4) && irref_isk(ref)) { /* Split stores for unaligned FP consts. */ emit_movmroi(as, RID_ESP, ofs, (int32_t)ir_knum(ir)->u32.lo); @@ -691,7 +696,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) ra_destpair(as, ir); #endif } else { - lua_assert(!irt_ispri(ir->t)); + lj_assertA(!irt_ispri(ir->t), "PRI dest"); ra_destreg(as, ir, RID_RET); } } else if (LJ_32 && irt_isfp(ir->t) && !(ci->flags & CCI_CASTU64)) { @@ -810,8 +815,10 @@ static void asm_conv(ASMState *as, IRIns *ir) int st64 = (st == IRT_I64 || st == IRT_U64 || (LJ_64 && st == IRT_P64)); int stfp = (st == IRT_NUM || st == IRT_FLOAT); IRRef lref = ir->op1; - lua_assert(irt_type(ir->t) != st); - lua_assert(!(LJ_32 && (irt_isint64(ir->t) || st64))); /* Handled by SPLIT. */ + lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV"); + lj_assertA(!(LJ_32 && (irt_isint64(ir->t) || st64)), + "IR %04d has unsplit 64 bit type", + (int)(ir - as->ir) - REF_BIAS); if (irt_isfp(ir->t)) { Reg dest = ra_dest(as, ir, RSET_FPR); if (stfp) { /* FP to FP conversion. */ @@ -847,7 +854,8 @@ static void asm_conv(ASMState *as, IRIns *ir) } else if (stfp) { /* FP to integer conversion. */ if (irt_isguard(ir->t)) { /* Checked conversions are only supported from number to int. */ - lua_assert(irt_isint(ir->t) && st == IRT_NUM); + lj_assertA(irt_isint(ir->t) && st == IRT_NUM, + "bad type for checked CONV"); asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); } else { Reg dest = ra_dest(as, ir, RSET_GPR); @@ -882,7 +890,7 @@ static void asm_conv(ASMState *as, IRIns *ir) Reg left, dest = ra_dest(as, ir, RSET_GPR); RegSet allow = RSET_GPR; x86Op op; - lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); + lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT"); if (st == IRT_I8) { op = XO_MOVSXb; allow = RSET_GPR8; dest |= FORCE_REX; } else if (st == IRT_U8) { @@ -953,7 +961,7 @@ static void asm_conv_fp_int64(ASMState *as, IRIns *ir) emit_sjcc(as, CC_NS, l_end); emit_rr(as, XO_TEST, hi, hi); /* Check if u64 >= 2^63. */ } else { - lua_assert(((ir-1)->op2 & IRCONV_SRCMASK) == IRT_I64); + lj_assertA(((ir-1)->op2 & IRCONV_SRCMASK) == IRT_I64, "bad type for CONV"); } emit_rmro(as, XO_FILDq, XOg_FILDq, RID_ESP, 0); /* NYI: Avoid narrow-to-wide store-to-load forwarding stall. */ @@ -967,8 +975,8 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir) IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK); IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH); Reg lo, hi; - lua_assert(st == IRT_NUM || st == IRT_FLOAT); - lua_assert(dt == IRT_I64 || dt == IRT_U64); + lj_assertA(st == IRT_NUM || st == IRT_FLOAT, "bad type for CONV"); + lj_assertA(dt == IRT_I64 || dt == IRT_U64, "bad type for CONV"); hi = ra_dest(as, ir, RSET_GPR); lo = ra_dest(as, ir-1, rset_exclude(RSET_GPR, hi)); if (ra_used(ir-1)) emit_rmro(as, XO_MOV, lo, RID_ESP, 0); @@ -1180,13 +1188,13 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) emit_rmro(as, XO_CMP, tmp|REX_64, dest, offsetof(Node, key.u64)); } } else { - lua_assert(irt_ispri(kt) && !irt_isnil(kt)); + lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type"); emit_u32(as, (irt_toitype(kt)<<15)|0x7fff); emit_rmro(as, XO_ARITHi, XOg_CMP, dest, offsetof(Node, key.it)); #else } else { if (!irt_ispri(kt)) { - lua_assert(irt_isaddr(kt)); + lj_assertA(irt_isaddr(kt), "bad HREF key type"); if (isk) emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.gcr), ptr2addr(ir_kgc(irkey))); @@ -1194,7 +1202,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) emit_rmro(as, XO_CMP, key, dest, offsetof(Node, key.gcr)); emit_sjcc(as, CC_NE, l_next); } - lua_assert(!irt_isnil(kt)); + lj_assertA(!irt_isnil(kt), "bad HREF key type"); emit_i8(as, irt_toitype(kt)); emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it)); #endif @@ -1209,7 +1217,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) #endif /* Load main position relative to tab->node into dest. */ - khash = isk ? ir_khash(irkey) : 1; + khash = isk ? ir_khash(as, irkey) : 1; if (khash == 0) { emit_rmro(as, XO_MOV, dest|REX_GC64, tab, offsetof(GCtab, node)); } else { @@ -1271,7 +1279,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir) #if !LJ_64 MCLabel l_exit; #endif - lua_assert(ofs % sizeof(Node) == 0); + lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot"); if (ra_hasreg(dest)) { if (ofs != 0) { if (dest == node) @@ -1288,7 +1296,8 @@ static void asm_hrefk(ASMState *as, IRIns *ir) Reg key = ra_scratch(as, rset_exclude(RSET_GPR, node)); emit_rmro(as, XO_CMP, key|REX_64, node, ofs + (int32_t)offsetof(Node, key.u64)); - lua_assert(irt_isnum(irkey->t) || irt_isgcv(irkey->t)); + lj_assertA(irt_isnum(irkey->t) || irt_isgcv(irkey->t), + "bad HREFK key type"); /* Assumes -0.0 is already canonicalized to +0.0. */ emit_loadu64(as, key, irt_isnum(irkey->t) ? ir_knum(irkey)->u64 : #if LJ_GC64 @@ -1299,7 +1308,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir) (uint64_t)(uint32_t)ptr2addr(ir_kgc(irkey))); #endif } else { - lua_assert(!irt_isnil(irkey->t)); + lj_assertA(!irt_isnil(irkey->t), "bad HREFK key type"); #if LJ_GC64 emit_i32(as, (irt_toitype(irkey->t)<<15)|0x7fff); emit_rmro(as, XO_ARITHi, XOg_CMP, node, @@ -1323,13 +1332,13 @@ static void asm_hrefk(ASMState *as, IRIns *ir) (int32_t)ir_knum(irkey)->u32.hi); } else { if (!irt_ispri(irkey->t)) { - lua_assert(irt_isgcv(irkey->t)); + lj_assertA(irt_isgcv(irkey->t), "bad HREFK key type"); emit_gmroi(as, XG_ARITHi(XOg_CMP), node, ofs + (int32_t)offsetof(Node, key.gcr), ptr2addr(ir_kgc(irkey))); emit_sjcc(as, CC_NE, l_exit); } - lua_assert(!irt_isnil(irkey->t)); + lj_assertA(!irt_isnil(irkey->t), "bad HREFK key type"); emit_i8(as, irt_toitype(irkey->t)); emit_rmro(as, XO_ARITHi8, XOg_CMP, node, ofs + (int32_t)offsetof(Node, key.it)); @@ -1402,7 +1411,8 @@ static void asm_fxload(ASMState *as, IRIns *ir) if (LJ_64 && irt_is64(ir->t)) dest |= REX_64; else - lua_assert(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t)); + lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t), + "unsplit 64 bit load"); xo = XO_MOV; break; } @@ -1447,13 +1457,16 @@ static void asm_fxstore(ASMState *as, IRIns *ir) case IRT_NUM: xo = XO_MOVSDto; break; case IRT_FLOAT: xo = XO_MOVSSto; break; #if LJ_64 && !LJ_GC64 - case IRT_LIGHTUD: lua_assert(0); /* NYI: mask 64 bit lightuserdata. */ + case IRT_LIGHTUD: + /* NYI: mask 64 bit lightuserdata. */ + lj_assertA(0, "store of lightuserdata"); #endif default: if (LJ_64 && irt_is64(ir->t)) src |= REX_64; else - lua_assert(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t)); + lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t), + "unsplit 64 bit store"); xo = XO_MOVto; break; } @@ -1467,8 +1480,8 @@ static void asm_fxstore(ASMState *as, IRIns *ir) emit_i8(as, k); emit_mrm(as, XO_MOVmib, 0, RID_MRM); } else { - lua_assert(irt_is64(ir->t) || irt_isint(ir->t) || irt_isu32(ir->t) || - irt_isaddr(ir->t)); + lj_assertA(irt_is64(ir->t) || irt_isint(ir->t) || irt_isu32(ir->t) || + irt_isaddr(ir->t), "bad store type"); emit_i32(as, k); emit_mrm(as, XO_MOVmi, REX_64IR(ir, 0), RID_MRM); } @@ -1503,8 +1516,9 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) #if LJ_GC64 Reg tmp = RID_NONE; #endif - lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) || - (LJ_DUALNUM && irt_isint(ir->t))); + lj_assertA(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) || + (LJ_DUALNUM && irt_isint(ir->t)), + "bad load type %d", irt_type(ir->t)); #if LJ_64 && !LJ_GC64 if (irt_islightud(ir->t)) { Reg dest = asm_load_lightud64(as, ir, 1); @@ -1551,7 +1565,8 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) as->mrm.ofs += 4; asm_guardcc(as, irt_isnum(ir->t) ? CC_AE : CC_NE); if (LJ_64 && irt_type(ir->t) >= IRT_NUM) { - lua_assert(irt_isinteger(ir->t) || irt_isnum(ir->t)); + lj_assertA(irt_isinteger(ir->t) || irt_isnum(ir->t), + "bad load type %d", irt_type(ir->t)); #if LJ_GC64 emit_u32(as, LJ_TISNUM << 15); #else @@ -1633,13 +1648,14 @@ static void asm_ahustore(ASMState *as, IRIns *ir) #endif emit_mrm(as, XO_MOVto, src, RID_MRM); } else if (!irt_ispri(irr->t)) { - lua_assert(irt_isaddr(ir->t) || (LJ_DUALNUM && irt_isinteger(ir->t))); + lj_assertA(irt_isaddr(ir->t) || (LJ_DUALNUM && irt_isinteger(ir->t)), + "bad store type"); emit_i32(as, irr->i); emit_mrm(as, XO_MOVmi, 0, RID_MRM); } as->mrm.ofs += 4; #if LJ_GC64 - lua_assert(LJ_DUALNUM && irt_isinteger(ir->t)); + lj_assertA(LJ_DUALNUM && irt_isinteger(ir->t), "bad store type"); emit_i32(as, LJ_TNUMX << 15); #else emit_i32(as, (int32_t)irt_toitype(ir->t)); @@ -1654,10 +1670,13 @@ static void asm_sload(ASMState *as, IRIns *ir) (!LJ_FR2 && (ir->op2 & IRSLOAD_FRAME) ? 4 : 0); IRType1 t = ir->t; Reg base; - lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ - lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK)); - lua_assert(LJ_DUALNUM || - !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME))); + lj_assertA(!(ir->op2 & IRSLOAD_PARENT), + "bad parent SLOAD"); /* Handled by asm_head_side(). */ + lj_assertA(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK), + "inconsistent SLOAD variant"); + lj_assertA(LJ_DUALNUM || + !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME)), + "bad SLOAD type"); if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { Reg left = ra_scratch(as, RSET_FPR); asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */ @@ -1677,7 +1696,8 @@ static void asm_sload(ASMState *as, IRIns *ir) RegSet allow = irt_isnum(t) ? RSET_FPR : RSET_GPR; Reg dest = ra_dest(as, ir, allow); base = ra_alloc1(as, REF_BASE, RSET_GPR); - lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); + lj_assertA(irt_isnum(t) || irt_isint(t) || irt_isaddr(t), + "bad SLOAD type %d", irt_type(t)); if ((ir->op2 & IRSLOAD_CONVERT)) { t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */ emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTTSD2SI, dest, base, ofs); @@ -1723,7 +1743,8 @@ static void asm_sload(ASMState *as, IRIns *ir) /* Need type check, even if the load result is unused. */ asm_guardcc(as, irt_isnum(t) ? CC_AE : CC_NE); if (LJ_64 && irt_type(t) >= IRT_NUM) { - lua_assert(irt_isinteger(t) || irt_isnum(t)); + lj_assertA(irt_isinteger(t) || irt_isnum(t), + "bad SLOAD type %d", irt_type(t)); #if LJ_GC64 emit_u32(as, LJ_TISNUM << 15); #else @@ -1775,7 +1796,8 @@ static void asm_cnew(ASMState *as, IRIns *ir) CTInfo info = lj_ctype_info(cts, id, &sz); const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; IRRef args[4]; - lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL)); + lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL), + "bad CNEW/CNEWI operands"); as->gcsteps++; asm_setupresult(as, ir, ci); /* GCcdata * */ @@ -1805,7 +1827,7 @@ static void asm_cnew(ASMState *as, IRIns *ir) int32_t ofs = sizeof(GCcdata); if (sz == 8) { ofs += 4; ir++; - lua_assert(ir->o == IR_HIOP); + lj_assertA(ir->o == IR_HIOP, "missing CNEWI HIOP"); } do { if (irref_isk(ir->op2)) { @@ -1819,7 +1841,7 @@ static void asm_cnew(ASMState *as, IRIns *ir) ofs -= 4; ir--; } while (1); #endif - lua_assert(sz == 4 || sz == 8); + lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz); } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; args[0] = ASMREF_L; /* lua_State *L */ @@ -1869,7 +1891,7 @@ static void asm_obar(ASMState *as, IRIns *ir) MCLabel l_end; Reg obj; /* No need for other object barriers (yet). */ - lua_assert(IR(ir->op1)->o == IR_UREFC); + lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type"); ra_evictset(as, RSET_SCRATCH); l_end = emit_label(as); args[0] = ASMREF_TMP1; /* global_State *g */ @@ -1986,7 +2008,7 @@ static int asm_swapops(ASMState *as, IRIns *ir) { IRIns *irl = IR(ir->op1); IRIns *irr = IR(ir->op2); - lua_assert(ra_noreg(irr->r)); + lj_assertA(ra_noreg(irr->r), "bad usage"); if (!irm_iscomm(lj_ir_mode[ir->o])) return 0; /* Can't swap non-commutative operations. */ if (irref_isk(ir->op2)) @@ -2376,8 +2398,9 @@ static void asm_comp(ASMState *as, IRIns *ir) IROp leftop = (IROp)(IR(lref)->o); Reg r64 = REX_64IR(ir, 0); int32_t imm = 0; - lua_assert(irt_is64(ir->t) || irt_isint(ir->t) || - irt_isu32(ir->t) || irt_isaddr(ir->t) || irt_isu8(ir->t)); + lj_assertA(irt_is64(ir->t) || irt_isint(ir->t) || + irt_isu32(ir->t) || irt_isaddr(ir->t) || irt_isu8(ir->t), + "bad comparison data type %d", irt_type(ir->t)); /* Swap constants (only for ABC) and fusable loads to the right. */ if (irref_isk(lref) || (!irref_isk(rref) && opisfusableload(leftop))) { if ((cc & 0xc) == 0xc) cc ^= 0x53; /* L <-> G, LE <-> GE */ @@ -2459,7 +2482,7 @@ static void asm_comp(ASMState *as, IRIns *ir) /* Use test r,r instead of cmp r,0. */ x86Op xo = XO_TEST; if (irt_isu8(ir->t)) { - lua_assert(ir->o == IR_EQ || ir->o == IR_NE); + lj_assertA(ir->o == IR_EQ || ir->o == IR_NE, "bad usage"); xo = XO_TESTb; if (!rset_test(RSET_RANGE(RID_EAX, RID_EBX+1), left)) { if (LJ_64) { @@ -2615,10 +2638,11 @@ static void asm_hiop(ASMState *as, IRIns *ir) case IR_CNEWI: /* Nothing to do here. Handled by CNEWI itself. */ break; - default: lua_assert(0); break; + default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break; } #else - UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused on x64 or without FFI. */ + /* Unused on x64 or without FFI. */ + UNUSED(as); UNUSED(ir); lj_assertA(0, "unexpected HIOP"); #endif } @@ -2684,8 +2708,9 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) Reg src = ra_alloc1(as, ref, RSET_FPR); emit_rmro(as, XO_MOVSDto, src, RID_BASE, ofs); } else { - lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || - (LJ_DUALNUM && irt_isinteger(ir->t))); + lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || + (LJ_DUALNUM && irt_isinteger(ir->t)), + "restore of IR type %d", irt_type(ir->t)); if (!irref_isk(ref)) { Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE)); #if LJ_GC64 @@ -2730,7 +2755,7 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) } checkmclim(as); } - lua_assert(map + nent == flinks); + lj_assertA(map + nent == flinks, "inconsistent frames in snapshot"); } /* -- GC handling --------------------------------------------------------- */ @@ -2774,16 +2799,16 @@ static void asm_loop_fixup(ASMState *as) MCode *target = as->mcp; if (as->realign) { /* Realigned loops use short jumps. */ as->realign = NULL; /* Stop another retry. */ - lua_assert(((intptr_t)target & 15) == 0); + lj_assertA(((intptr_t)target & 15) == 0, "loop realign failed"); if (as->loopinv) { /* Inverted loop branch? */ p -= 5; p[0] = XI_JMP; - lua_assert(target - p >= -128); + lj_assertA(target - p >= -128, "loop realign failed"); p[-1] = (MCode)(target - p); /* Patch sjcc. */ if (as->loopinv == 2) p[-3] = (MCode)(target - p + 2); /* Patch opt. short jp. */ } else { - lua_assert(target - p >= -128); + lj_assertA(target - p >= -128, "loop realign failed"); p[-1] = (MCode)(int8_t)(target - p); /* Patch short jmp. */ p[-2] = XI_JMPs; } @@ -2880,7 +2905,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk) } /* Patch exit branch. */ target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; - *(int32_t *)(p-4) = jmprel(p, target); + *(int32_t *)(p-4) = jmprel(as->J, p, target); p[-5] = XI_JMP; /* Drop unused mcode tail. Fill with NOPs to make the prefetcher happy. */ for (q = as->mctop-1; q >= p; q--) @@ -3053,17 +3078,17 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) uint32_t statei = u32ptr(&J2G(J)->vmstate); #endif if (len > 5 && p[len-5] == XI_JMP && p+len-6 + *(int32_t *)(p+len-4) == px) - *(int32_t *)(p+len-4) = jmprel(p+len, target); + *(int32_t *)(p+len-4) = jmprel(J, p+len, target); /* Do not patch parent exit for a stack check. Skip beyond vmstate update. */ for (; p < pe; p += asm_x86_inslen(p)) { intptr_t ofs = LJ_GC64 ? (p[0] & 0xf0) == 0x40 : LJ_64; if (*(uint32_t *)(p+2+ofs) == statei && p[ofs+LJ_GC64-LJ_64] == XI_MOVmi) break; } - lua_assert(p < pe); + lj_assertJ(p < pe, "instruction length decoder failed"); for (; p < pe; p += asm_x86_inslen(p)) if ((*(uint16_t *)p & 0xf0ff) == 0x800f && p + *(int32_t *)(p+2) == px) - *(int32_t *)(p+2) = jmprel(p+6, target); + *(int32_t *)(p+2) = jmprel(J, p+6, target); lj_mcode_sync(T->mcode, T->mcode + T->szmcode); lj_mcode_patch(J, mcarea, 1); } diff --git a/src/lj_assert.c b/src/lj_assert.c new file mode 100644 index 000000000..7989dbe6f --- /dev/null +++ b/src/lj_assert.c @@ -0,0 +1,28 @@ +/* +** Internal assertions. +** Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h +*/ + +#define lj_assert_c +#define LUA_CORE + +#if defined(LUA_USE_ASSERT) || defined(LUA_USE_APICHECK) + +#include + +#include "lj_obj.h" + +void lj_assert_fail(global_State *g, const char *file, int line, + const char *func, const char *fmt, ...) +{ + va_list argp; + va_start(argp, fmt); + fprintf(stderr, "LuaJIT ASSERT %s:%d: %s: ", file, line, func); + vfprintf(stderr, fmt, argp); + fputc('\n', stderr); + va_end(argp); + UNUSED(g); /* May be NULL. TODO: optionally dump state. */ + abort(); +} + +#endif diff --git a/src/lj_bcread.c b/src/lj_bcread.c index 1585272f0..1d9547bef 100644 --- a/src/lj_bcread.c +++ b/src/lj_bcread.c @@ -47,7 +47,7 @@ static LJ_NOINLINE void bcread_error(LexState *ls, ErrMsg em) /* Refill buffer. */ static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need) { - lua_assert(len != 0); + lj_assertLS(len != 0, "empty refill"); if (len > LJ_MAX_BUF || ls->c < 0) bcread_error(ls, LJ_ERR_BCBAD); do { @@ -57,7 +57,7 @@ static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need) MSize n = (MSize)(ls->pe - ls->p); if (n) { /* Copy remainder to buffer. */ if (sbuflen(&ls->sb)) { /* Move down in buffer. */ - lua_assert(ls->pe == sbufP(&ls->sb)); + lj_assertLS(ls->pe == sbufP(&ls->sb), "bad buffer pointer"); if (ls->p != p) memmove(p, ls->p, n); } else { /* Copy from buffer provided by reader. */ p = lj_buf_need(&ls->sb, len); @@ -107,7 +107,7 @@ static LJ_AINLINE uint8_t *bcread_mem(LexState *ls, MSize len) { uint8_t *p = (uint8_t *)ls->p; ls->p += len; - lua_assert(ls->p <= ls->pe); + lj_assertLS(ls->p <= ls->pe, "buffer read overflow"); return p; } @@ -120,7 +120,7 @@ static void bcread_block(LexState *ls, void *q, MSize len) /* Read byte from buffer. */ static LJ_AINLINE uint32_t bcread_byte(LexState *ls) { - lua_assert(ls->p < ls->pe); + lj_assertLS(ls->p < ls->pe, "buffer read overflow"); return (uint32_t)(uint8_t)*ls->p++; } @@ -128,7 +128,7 @@ static LJ_AINLINE uint32_t bcread_byte(LexState *ls) static LJ_AINLINE uint32_t bcread_uleb128(LexState *ls) { uint32_t v = lj_buf_ruleb128(&ls->p); - lua_assert(ls->p <= ls->pe); + lj_assertLS(ls->p <= ls->pe, "buffer read overflow"); return v; } @@ -145,7 +145,7 @@ static uint32_t bcread_uleb128_33(LexState *ls) } while (*p++ >= 0x80); } ls->p = (char *)p; - lua_assert(ls->p <= ls->pe); + lj_assertLS(ls->p <= ls->pe, "buffer read overflow"); return v; } @@ -192,7 +192,7 @@ static void bcread_ktabk(LexState *ls, TValue *o) o->u32.lo = bcread_uleb128(ls); o->u32.hi = bcread_uleb128(ls); } else { - lua_assert(tp <= BCDUMP_KTAB_TRUE); + lj_assertLS(tp <= BCDUMP_KTAB_TRUE, "bad constant type %d", tp); setpriV(o, ~tp); } } @@ -214,7 +214,7 @@ static GCtab *bcread_ktab(LexState *ls) for (i = 0; i < nhash; i++) { TValue key; bcread_ktabk(ls, &key); - lua_assert(!tvisnil(&key)); + lj_assertLS(!tvisnil(&key), "nil key"); bcread_ktabk(ls, lj_tab_set(ls->L, t, &key)); } } @@ -251,7 +251,7 @@ static void bcread_kgc(LexState *ls, GCproto *pt, MSize sizekgc) #endif } else { lua_State *L = ls->L; - lua_assert(tp == BCDUMP_KGC_CHILD); + lj_assertLS(tp == BCDUMP_KGC_CHILD, "bad constant type %d", tp); if (L->top <= bcread_oldtop(L, ls)) /* Stack underflow? */ bcread_error(ls, LJ_ERR_BCBAD); L->top--; @@ -422,7 +422,7 @@ static int bcread_header(LexState *ls) GCproto *lj_bcread(LexState *ls) { lua_State *L = ls->L; - lua_assert(ls->c == BCDUMP_HEAD1); + lj_assertLS(ls->c == BCDUMP_HEAD1, "bad bytecode header"); bcread_savetop(L, ls, L->top); lj_buf_reset(&ls->sb); /* Check for a valid bytecode dump header. */ diff --git a/src/lj_bcwrite.c b/src/lj_bcwrite.c index dd38289e3..a8c310b82 100644 --- a/src/lj_bcwrite.c +++ b/src/lj_bcwrite.c @@ -29,8 +29,17 @@ typedef struct BCWriteCtx { void *wdata; /* Writer callback data. */ int strip; /* Strip debug info. */ int status; /* Status from writer callback. */ +#ifdef LUA_USE_ASSERT + global_State *g; +#endif } BCWriteCtx; +#ifdef LUA_USE_ASSERT +#define lj_assertBCW(c, ...) lj_assertG_(ctx->g, (c), __VA_ARGS__) +#else +#define lj_assertBCW(c, ...) ((void)ctx) +#endif + /* -- Bytecode writer ----------------------------------------------------- */ /* Write a single constant key/value of a template table. */ @@ -61,7 +70,7 @@ static void bcwrite_ktabk(BCWriteCtx *ctx, cTValue *o, int narrow) p = lj_strfmt_wuleb128(p, o->u32.lo); p = lj_strfmt_wuleb128(p, o->u32.hi); } else { - lua_assert(tvispri(o)); + lj_assertBCW(tvispri(o), "unhandled type %d", itype(o)); *p++ = BCDUMP_KTAB_NIL+~itype(o); } setsbufP(&ctx->sb, p); @@ -121,7 +130,7 @@ static void bcwrite_kgc(BCWriteCtx *ctx, GCproto *pt) tp = BCDUMP_KGC_STR + gco2str(o)->len; need = 5+gco2str(o)->len; } else if (o->gch.gct == ~LJ_TPROTO) { - lua_assert((pt->flags & PROTO_CHILD)); + lj_assertBCW((pt->flags & PROTO_CHILD), "prototype has unexpected child"); tp = BCDUMP_KGC_CHILD; #if LJ_HASFFI } else if (o->gch.gct == ~LJ_TCDATA) { @@ -132,12 +141,14 @@ static void bcwrite_kgc(BCWriteCtx *ctx, GCproto *pt) } else if (id == CTID_UINT64) { tp = BCDUMP_KGC_U64; } else { - lua_assert(id == CTID_COMPLEX_DOUBLE); + lj_assertBCW(id == CTID_COMPLEX_DOUBLE, + "bad cdata constant CTID %d", id); tp = BCDUMP_KGC_COMPLEX; } #endif } else { - lua_assert(o->gch.gct == ~LJ_TTAB); + lj_assertBCW(o->gch.gct == ~LJ_TTAB, + "bad constant GC type %d", o->gch.gct); tp = BCDUMP_KGC_TAB; need = 1+2*5; } @@ -289,7 +300,7 @@ static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt) MSize nn = (lj_fls(n)+8)*9 >> 6; char *q = sbufB(&ctx->sb) + (5 - nn); p = lj_strfmt_wuleb128(q, n); /* Fill in final size. */ - lua_assert(p == sbufB(&ctx->sb) + 5); + lj_assertBCW(p == sbufB(&ctx->sb) + 5, "bad ULEB128 write"); ctx->status = ctx->wfunc(sbufL(&ctx->sb), q, nn+n, ctx->wdata); } } @@ -349,6 +360,9 @@ int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer, void *data, ctx.wdata = data; ctx.strip = strip; ctx.status = 0; +#ifdef LUA_USE_ASSERT + ctx.g = G(L); +#endif lj_buf_init(L, &ctx.sb); status = lj_vm_cpcall(L, NULL, &ctx, cpwriter); if (status == 0) status = ctx.status; diff --git a/src/lj_buf.c b/src/lj_buf.c index c87780168..935ae4889 100644 --- a/src/lj_buf.c +++ b/src/lj_buf.c @@ -30,7 +30,7 @@ static void buf_grow(SBuf *sb, MSize sz) LJ_NOINLINE char *LJ_FASTCALL lj_buf_need2(SBuf *sb, MSize sz) { - lua_assert(sz > sbufsz(sb)); + lj_assertG_(G(sbufL(sb)), sz > sbufsz(sb), "SBuf overflow"); if (LJ_UNLIKELY(sz > LJ_MAX_BUF)) lj_err_mem(sbufL(sb)); buf_grow(sb, sz); @@ -40,7 +40,7 @@ LJ_NOINLINE char *LJ_FASTCALL lj_buf_need2(SBuf *sb, MSize sz) LJ_NOINLINE char *LJ_FASTCALL lj_buf_more2(SBuf *sb, MSize sz) { MSize len = sbuflen(sb); - lua_assert(sz > sbufleft(sb)); + lj_assertG_(G(sbufL(sb)), sz > sbufleft(sb), "SBuf overflow"); if (LJ_UNLIKELY(sz > LJ_MAX_BUF || len + sz > LJ_MAX_BUF)) lj_err_mem(sbufL(sb)); buf_grow(sb, len + sz); diff --git a/src/lj_carith.c b/src/lj_carith.c index cf71aaf5b..65ad2c101 100644 --- a/src/lj_carith.c +++ b/src/lj_carith.c @@ -122,7 +122,7 @@ static int carith_ptr(lua_State *L, CTState *cts, CDArith *ca, MMS mm) setboolV(L->top-1, ((uintptr_t)pp < (uintptr_t)pp2)); return 1; } else { - lua_assert(mm == MM_le); + lj_assertL(mm == MM_le, "bad metamethod %d", mm); setboolV(L->top-1, ((uintptr_t)pp <= (uintptr_t)pp2)); return 1; } @@ -208,7 +208,9 @@ static int carith_int64(lua_State *L, CTState *cts, CDArith *ca, MMS mm) *up = lj_carith_powu64(u0, u1); break; case MM_unm: *up = (uint64_t)-(int64_t)u0; break; - default: lua_assert(0); break; + default: + lj_assertL(0, "bad metamethod %d", mm); + break; } lj_gc_check(L); return 1; @@ -301,7 +303,9 @@ uint64_t lj_carith_shift64(uint64_t x, int32_t sh, int op) case IR_BSAR-IR_BSHL: x = lj_carith_sar64(x, sh); break; case IR_BROL-IR_BSHL: x = lj_carith_rol64(x, sh); break; case IR_BROR-IR_BSHL: x = lj_carith_ror64(x, sh); break; - default: lua_assert(0); break; + default: + lj_assertX(0, "bad shift op %d", op); + break; } return x; } diff --git a/src/lj_ccall.c b/src/lj_ccall.c index 499a01d85..5ac1b4dad 100644 --- a/src/lj_ccall.c +++ b/src/lj_ccall.c @@ -391,7 +391,8 @@ #define CCALL_HANDLE_GPR \ /* Try to pass argument in GPRs. */ \ if (n > 1) { \ - lua_assert(n == 2 || n == 4); /* int64_t or complex (float). */ \ + /* int64_t or complex (float). */ \ + lj_assertL(n == 2 || n == 4, "bad GPR size %d", n); \ if (ctype_isinteger(d->info) || ctype_isfp(d->info)) \ ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \ else if (ngpr + n > maxgpr) \ @@ -642,7 +643,8 @@ static void ccall_classify_ct(CTState *cts, CType *ct, int *rcl, CTSize ofs) ccall_classify_struct(cts, ct, rcl, ofs); } else { int cl = ctype_isfp(ct->info) ? CCALL_RCL_SSE : CCALL_RCL_INT; - lua_assert(ctype_hassize(ct->info)); + lj_assertCTS(ctype_hassize(ct->info), + "classify ctype %08x without size", ct->info); if ((ofs & (ct->size-1))) cl = CCALL_RCL_MEM; /* Unaligned. */ rcl[(ofs >= 8)] |= cl; } @@ -667,12 +669,13 @@ static int ccall_classify_struct(CTState *cts, CType *ct, int *rcl, CTSize ofs) } /* Try to split up a small struct into registers. */ -static int ccall_struct_reg(CCallState *cc, GPRArg *dp, int *rcl) +static int ccall_struct_reg(CCallState *cc, CTState *cts, GPRArg *dp, int *rcl) { MSize ngpr = cc->ngpr, nfpr = cc->nfpr; uint32_t i; + UNUSED(cts); for (i = 0; i < 2; i++) { - lua_assert(!(rcl[i] & CCALL_RCL_MEM)); + lj_assertCTS(!(rcl[i] & CCALL_RCL_MEM), "pass mem struct in reg"); if ((rcl[i] & CCALL_RCL_INT)) { /* Integer class takes precedence. */ if (ngpr >= CCALL_NARG_GPR) return 1; /* Register overflow. */ cc->gpr[ngpr++] = dp[i]; @@ -693,7 +696,8 @@ static int ccall_struct_arg(CCallState *cc, CTState *cts, CType *d, int *rcl, dp[0] = dp[1] = 0; /* Convert to temp. struct. */ lj_cconv_ct_tv(cts, d, (uint8_t *)dp, o, CCF_ARG(narg)); - if (ccall_struct_reg(cc, dp, rcl)) { /* Register overflow? Pass on stack. */ + if (ccall_struct_reg(cc, cts, dp, rcl)) { + /* Register overflow? Pass on stack. */ MSize nsp = cc->nsp, n = rcl[1] ? 2 : 1; if (nsp + n > CCALL_MAXSTACK) return 1; /* Too many arguments. */ cc->nsp = nsp + n; @@ -990,7 +994,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, if (fid) { /* Get argument type from field. */ CType *ctf = ctype_get(cts, fid); fid = ctf->sib; - lua_assert(ctype_isfield(ctf->info)); + lj_assertL(ctype_isfield(ctf->info), "field expected"); did = ctype_cid(ctf->info); } else { if (!(ct->info & CTF_VARARG)) @@ -1138,7 +1142,8 @@ static int ccall_get_results(lua_State *L, CTState *cts, CType *ct, CCALL_HANDLE_RET #endif /* No reference types end up here, so there's no need for the CTypeID. */ - lua_assert(!(ctype_isrefarray(ctr->info) || ctype_isstruct(ctr->info))); + lj_assertL(!(ctype_isrefarray(ctr->info) || ctype_isstruct(ctr->info)), + "unexpected reference ctype"); return lj_cconv_tv_ct(cts, ctr, 0, L->top-1, sp); } diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c index 4edd8a356..49775d2b9 100644 --- a/src/lj_ccallback.c +++ b/src/lj_ccallback.c @@ -107,9 +107,9 @@ MSize lj_ccallback_ptr2slot(CTState *cts, void *p) /* Initialize machine code for callback function pointers. */ #if LJ_OS_NOJIT /* Disabled callback support. */ -#define callback_mcode_init(g, p) UNUSED(p) +#define callback_mcode_init(g, p) (p) #elif LJ_TARGET_X86ORX64 -static void callback_mcode_init(global_State *g, uint8_t *page) +static void *callback_mcode_init(global_State *g, uint8_t *page) { uint8_t *p = page; uint8_t *target = (uint8_t *)(void *)lj_vm_ffi_callback; @@ -143,10 +143,10 @@ static void callback_mcode_init(global_State *g, uint8_t *page) *p++ = XI_JMPs; *p++ = (uint8_t)((2+2)*(31-(slot&31)) - 2); } } - lua_assert(p - page <= CALLBACK_MCODE_SIZE); + return p; } #elif LJ_TARGET_ARM -static void callback_mcode_init(global_State *g, uint32_t *page) +static void *callback_mcode_init(global_State *g, uint32_t *page) { uint32_t *p = page; void *target = (void *)lj_vm_ffi_callback; @@ -165,10 +165,10 @@ static void callback_mcode_init(global_State *g, uint32_t *page) *p = ARMI_B | ((page-p-2) & 0x00ffffffu); p++; } - lua_assert(p - page <= CALLBACK_MCODE_SIZE); + return p; } #elif LJ_TARGET_ARM64 -static void callback_mcode_init(global_State *g, uint32_t *page) +static void *callback_mcode_init(global_State *g, uint32_t *page) { uint32_t *p = page; void *target = (void *)lj_vm_ffi_callback; @@ -185,10 +185,10 @@ static void callback_mcode_init(global_State *g, uint32_t *page) *p = A64I_LE(A64I_B | A64F_S26((page-p) & 0x03ffffffu)); p++; } - lua_assert(p - page <= CALLBACK_MCODE_SIZE); + return p; } #elif LJ_TARGET_PPC -static void callback_mcode_init(global_State *g, uint32_t *page) +static void *callback_mcode_init(global_State *g, uint32_t *page) { uint32_t *p = page; void *target = (void *)lj_vm_ffi_callback; @@ -204,10 +204,10 @@ static void callback_mcode_init(global_State *g, uint32_t *page) *p = PPCI_B | (((page-p) & 0x00ffffffu) << 2); p++; } - lua_assert(p - page <= CALLBACK_MCODE_SIZE); + return p; } #elif LJ_TARGET_MIPS -static void callback_mcode_init(global_State *g, uint32_t *page) +static void *callback_mcode_init(global_State *g, uint32_t *page) { uint32_t *p = page; uintptr_t target = (uintptr_t)(void *)lj_vm_ffi_callback; @@ -236,11 +236,11 @@ static void callback_mcode_init(global_State *g, uint32_t *page) p++; *p++ = MIPSI_LI | MIPSF_T(RID_R1) | slot; } - lua_assert(p - page <= CALLBACK_MCODE_SIZE); + return p; } #else /* Missing support for this architecture. */ -#define callback_mcode_init(g, p) UNUSED(p) +#define callback_mcode_init(g, p) (p) #endif /* -- Machine code management --------------------------------------------- */ @@ -263,7 +263,7 @@ static void callback_mcode_init(global_State *g, uint32_t *page) static void callback_mcode_new(CTState *cts) { size_t sz = (size_t)CALLBACK_MCODE_SIZE; - void *p; + void *p, *pe; if (CALLBACK_MAX_SLOT == 0) lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV); #if LJ_TARGET_WINDOWS @@ -280,7 +280,10 @@ static void callback_mcode_new(CTState *cts) p = lj_mem_new(cts->L, sz); #endif cts->cb.mcode = p; - callback_mcode_init(cts->g, p); + pe = callback_mcode_init(cts->g, p); + UNUSED(pe); + lj_assertCTS((size_t)((char *)pe - (char *)p) <= sz, + "miscalculated CALLBACK_MAX_SLOT"); lj_mcode_sync(p, (char *)p + sz); #if LJ_TARGET_WINDOWS { @@ -421,8 +424,9 @@ void lj_ccallback_mcode_free(CTState *cts) #define CALLBACK_HANDLE_GPR \ if (n > 1) { \ - lua_assert(((LJ_ABI_SOFTFP && ctype_isnum(cta->info)) || /* double. */ \ - ctype_isinteger(cta->info)) && n == 2); /* int64_t. */ \ + lj_assertCTS(((LJ_ABI_SOFTFP && ctype_isnum(cta->info)) || /* double. */ \ + ctype_isinteger(cta->info)) && n == 2, /* int64_t. */ \ + "bad GPR type"); \ ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \ } \ if (ngpr + n <= maxgpr) { \ @@ -579,7 +583,7 @@ static void callback_conv_args(CTState *cts, lua_State *L) CTSize sz; int isfp; MSize n; - lua_assert(ctype_isfield(ctf->info)); + lj_assertCTS(ctype_isfield(ctf->info), "field expected"); cta = ctype_rawchild(cts, ctf); isfp = ctype_isfp(cta->info); sz = (cta->size + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1); @@ -671,7 +675,7 @@ lua_State * LJ_FASTCALL lj_ccallback_enter(CTState *cts, void *cf) { lua_State *L = cts->L; global_State *g = cts->g; - lua_assert(L != NULL); + lj_assertG(L != NULL, "uninitialized cts->L in callback"); if (tvref(g->jit_base)) { setstrV(L, L->top++, lj_err_str(L, LJ_ERR_FFI_BADCBACK)); if (g->panic) g->panic(L); @@ -756,7 +760,7 @@ static CType *callback_checkfunc(CTState *cts, CType *ct) CType *ctf = ctype_get(cts, fid); if (!ctype_isattrib(ctf->info)) { CType *cta; - lua_assert(ctype_isfield(ctf->info)); + lj_assertCTS(ctype_isfield(ctf->info), "field expected"); cta = ctype_rawchild(cts, ctf); if (!(ctype_isenum(cta->info) || ctype_isptr(cta->info) || (ctype_isnum(cta->info) && cta->size <= 8)) || diff --git a/src/lj_cconv.c b/src/lj_cconv.c index 03ed0ce22..400c2ae66 100644 --- a/src/lj_cconv.c +++ b/src/lj_cconv.c @@ -122,19 +122,25 @@ void lj_cconv_ct_ct(CTState *cts, CType *d, CType *s, CTInfo dinfo = d->info, sinfo = s->info; void *tmpptr; - lua_assert(!ctype_isenum(dinfo) && !ctype_isenum(sinfo)); - lua_assert(!ctype_isattrib(dinfo) && !ctype_isattrib(sinfo)); + lj_assertCTS(!ctype_isenum(dinfo) && !ctype_isenum(sinfo), + "unresolved enum"); + lj_assertCTS(!ctype_isattrib(dinfo) && !ctype_isattrib(sinfo), + "unstripped attribute"); if (ctype_type(dinfo) > CT_MAYCONVERT || ctype_type(sinfo) > CT_MAYCONVERT) goto err_conv; /* Some basic sanity checks. */ - lua_assert(!ctype_isnum(dinfo) || dsize > 0); - lua_assert(!ctype_isnum(sinfo) || ssize > 0); - lua_assert(!ctype_isbool(dinfo) || dsize == 1 || dsize == 4); - lua_assert(!ctype_isbool(sinfo) || ssize == 1 || ssize == 4); - lua_assert(!ctype_isinteger(dinfo) || (1u< 0, "bad size for number type"); + lj_assertCTS(!ctype_isnum(sinfo) || ssize > 0, "bad size for number type"); + lj_assertCTS(!ctype_isbool(dinfo) || dsize == 1 || dsize == 4, + "bad size for bool type"); + lj_assertCTS(!ctype_isbool(sinfo) || ssize == 1 || ssize == 4, + "bad size for bool type"); + lj_assertCTS(!ctype_isinteger(dinfo) || (1u<info & CTF_VLA) || d != s) goto err_conv; /* Must be exact same type. */ copyval: /* Copy value. */ - lua_assert(dsize == ssize); + lj_assertCTS(dsize == ssize, "value copy with different sizes"); memcpy(dp, sp, dsize); break; @@ -389,7 +395,7 @@ int lj_cconv_tv_ct(CTState *cts, CType *s, CTypeID sid, lj_cconv_ct_ct(cts, ctype_get(cts, CTID_DOUBLE), s, (uint8_t *)&o->n, sp, 0); /* Numbers are NOT canonicalized here! Beware of uninitialized data. */ - lua_assert(tvisnum(o)); + lj_assertCTS(tvisnum(o), "non-canonical NaN passed"); } } else { uint32_t b = s->size == 1 ? (*sp != 0) : (*(int *)sp != 0); @@ -406,7 +412,7 @@ int lj_cconv_tv_ct(CTState *cts, CType *s, CTypeID sid, CTSize sz; copyval: /* Copy value. */ sz = s->size; - lua_assert(sz != CTSIZE_INVALID); + lj_assertCTS(sz != CTSIZE_INVALID, "value copy with invalid size"); /* Attributes are stripped, qualifiers are kept (but mostly ignored). */ cd = lj_cdata_new(cts, ctype_typeid(cts, s), sz); setcdataV(cts->L, o, cd); @@ -421,19 +427,22 @@ int lj_cconv_tv_bf(CTState *cts, CType *s, TValue *o, uint8_t *sp) CTInfo info = s->info; CTSize pos, bsz; uint32_t val; - lua_assert(ctype_isbitfield(info)); + lj_assertCTS(ctype_isbitfield(info), "bitfield expected"); /* NYI: packed bitfields may cause misaligned reads. */ switch (ctype_bitcsz(info)) { case 4: val = *(uint32_t *)sp; break; case 2: val = *(uint16_t *)sp; break; case 1: val = *(uint8_t *)sp; break; - default: lua_assert(0); val = 0; break; + default: + lj_assertCTS(0, "bad bitfield container size %d", ctype_bitcsz(info)); + val = 0; + break; } /* Check if a packed bitfield crosses a container boundary. */ pos = ctype_bitpos(info); bsz = ctype_bitbsz(info); - lua_assert(pos < 8*ctype_bitcsz(info)); - lua_assert(bsz > 0 && bsz <= 8*ctype_bitcsz(info)); + lj_assertCTS(pos < 8*ctype_bitcsz(info), "bad bitfield position"); + lj_assertCTS(bsz > 0 && bsz <= 8*ctype_bitcsz(info), "bad bitfield size"); if (pos + bsz > 8*ctype_bitcsz(info)) lj_err_caller(cts->L, LJ_ERR_FFI_NYIPACKBIT); if (!(info & CTF_BOOL)) { @@ -449,7 +458,7 @@ int lj_cconv_tv_bf(CTState *cts, CType *s, TValue *o, uint8_t *sp) } } else { uint32_t b = (val >> pos) & 1; - lua_assert(bsz == 1); + lj_assertCTS(bsz == 1, "bad bool bitfield size"); setboolV(o, b); setboolV(&cts->g->tmptv2, b); /* Remember for trace recorder. */ } @@ -553,7 +562,7 @@ void lj_cconv_ct_tv(CTState *cts, CType *d, sid = cdataV(o)->ctypeid; s = ctype_get(cts, sid); if (ctype_isref(s->info)) { /* Resolve reference for value. */ - lua_assert(s->size == CTSIZE_PTR); + lj_assertCTS(s->size == CTSIZE_PTR, "ref is not pointer-sized"); sp = *(void **)sp; sid = ctype_cid(s->info); } @@ -571,7 +580,7 @@ void lj_cconv_ct_tv(CTState *cts, CType *d, CType *cct = lj_ctype_getfield(cts, d, str, &ofs); if (!cct || !ctype_isconstval(cct->info)) goto err_conv; - lua_assert(d->size == 4); + lj_assertCTS(d->size == 4, "only 32 bit enum supported"); /* NYI */ sp = (uint8_t *)&cct->size; sid = ctype_cid(cct->info); } else if (ctype_isrefarray(d->info)) { /* Copy string to array. */ @@ -635,10 +644,10 @@ void lj_cconv_bf_tv(CTState *cts, CType *d, uint8_t *dp, TValue *o) CTInfo info = d->info; CTSize pos, bsz; uint32_t val, mask; - lua_assert(ctype_isbitfield(info)); + lj_assertCTS(ctype_isbitfield(info), "bitfield expected"); if ((info & CTF_BOOL)) { uint8_t tmpbool; - lua_assert(ctype_bitbsz(info) == 1); + lj_assertCTS(ctype_bitbsz(info) == 1, "bad bool bitfield size"); lj_cconv_ct_tv(cts, ctype_get(cts, CTID_BOOL), &tmpbool, o, 0); val = tmpbool; } else { @@ -647,8 +656,8 @@ void lj_cconv_bf_tv(CTState *cts, CType *d, uint8_t *dp, TValue *o) } pos = ctype_bitpos(info); bsz = ctype_bitbsz(info); - lua_assert(pos < 8*ctype_bitcsz(info)); - lua_assert(bsz > 0 && bsz <= 8*ctype_bitcsz(info)); + lj_assertCTS(pos < 8*ctype_bitcsz(info), "bad bitfield position"); + lj_assertCTS(bsz > 0 && bsz <= 8*ctype_bitcsz(info), "bad bitfield size"); /* Check if a packed bitfield crosses a container boundary. */ if (pos + bsz > 8*ctype_bitcsz(info)) lj_err_caller(cts->L, LJ_ERR_FFI_NYIPACKBIT); @@ -659,7 +668,9 @@ void lj_cconv_bf_tv(CTState *cts, CType *d, uint8_t *dp, TValue *o) case 4: *(uint32_t *)dp = (*(uint32_t *)dp & ~mask) | (uint32_t)val; break; case 2: *(uint16_t *)dp = (*(uint16_t *)dp & ~mask) | (uint16_t)val; break; case 1: *(uint8_t *)dp = (*(uint8_t *)dp & ~mask) | (uint8_t)val; break; - default: lua_assert(0); break; + default: + lj_assertCTS(0, "bad bitfield container size %d", ctype_bitcsz(info)); + break; } } diff --git a/src/lj_cconv.h b/src/lj_cconv.h index 2fd5a71c6..1f716d2a8 100644 --- a/src/lj_cconv.h +++ b/src/lj_cconv.h @@ -27,13 +27,14 @@ enum { static LJ_AINLINE uint32_t cconv_idx(CTInfo info) { uint32_t idx = ((info >> 26) & 15u); /* Dispatch bits. */ - lua_assert(ctype_type(info) <= CT_MAYCONVERT); + lj_assertX(ctype_type(info) <= CT_MAYCONVERT, + "cannot convert ctype %08x", info); #if LJ_64 idx = ((uint32_t)(U64x(f436fff5,fff7f021) >> 4*idx) & 15u); #else idx = (((idx < 8 ? 0xfff7f021u : 0xf436fff5) >> 4*(idx & 7u)) & 15u); #endif - lua_assert(idx < 8); + lj_assertX(idx < 8, "cannot convert ctype %08x", info); return idx; } diff --git a/src/lj_cdata.c b/src/lj_cdata.c index 10d9423df..a827d1ec5 100644 --- a/src/lj_cdata.c +++ b/src/lj_cdata.c @@ -35,7 +35,7 @@ GCcdata *lj_cdata_newv(lua_State *L, CTypeID id, CTSize sz, CTSize align) uintptr_t adata = (uintptr_t)p + sizeof(GCcdataVar) + sizeof(GCcdata); uintptr_t almask = (1u << align) - 1u; GCcdata *cd = (GCcdata *)(((adata + almask) & ~almask) - sizeof(GCcdata)); - lua_assert((char *)cd - p < 65536); + lj_assertL((char *)cd - p < 65536, "excessive cdata alignment"); cdatav(cd)->offset = (uint16_t)((char *)cd - p); cdatav(cd)->extra = extra; cdatav(cd)->len = sz; @@ -76,8 +76,8 @@ void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd) } else if (LJ_LIKELY(!cdataisv(cd))) { CType *ct = ctype_raw(ctype_ctsG(g), cd->ctypeid); CTSize sz = ctype_hassize(ct->info) ? ct->size : CTSIZE_PTR; - lua_assert(ctype_hassize(ct->info) || ctype_isfunc(ct->info) || - ctype_isextern(ct->info)); + lj_assertG(ctype_hassize(ct->info) || ctype_isfunc(ct->info) || + ctype_isextern(ct->info), "free of ctype without a size"); lj_mem_free(g, cd, sizeof(GCcdata) + sz); } else { lj_mem_free(g, memcdatav(cd), sizecdatav(cd)); @@ -115,7 +115,7 @@ CType *lj_cdata_index(CTState *cts, GCcdata *cd, cTValue *key, uint8_t **pp, /* Resolve reference for cdata object. */ if (ctype_isref(ct->info)) { - lua_assert(ct->size == CTSIZE_PTR); + lj_assertCTS(ct->size == CTSIZE_PTR, "ref is not pointer-sized"); p = *(uint8_t **)p; ct = ctype_child(cts, ct); } @@ -126,7 +126,8 @@ CType *lj_cdata_index(CTState *cts, GCcdata *cd, cTValue *key, uint8_t **pp, if (ctype_attrib(ct->info) == CTA_QUAL) *qual |= ct->size; ct = ctype_child(cts, ct); } - lua_assert(!ctype_isref(ct->info)); /* Interning rejects refs to refs. */ + /* Interning rejects refs to refs. */ + lj_assertCTS(!ctype_isref(ct->info), "bad ref of ref"); if (tvisint(key)) { idx = (ptrdiff_t)intV(key); @@ -212,7 +213,8 @@ CType *lj_cdata_index(CTState *cts, GCcdata *cd, cTValue *key, uint8_t **pp, static void cdata_getconst(CTState *cts, TValue *o, CType *ct) { CType *ctt = ctype_child(cts, ct); - lua_assert(ctype_isinteger(ctt->info) && ctt->size <= 4); + lj_assertCTS(ctype_isinteger(ctt->info) && ctt->size <= 4, + "only 32 bit const supported"); /* NYI */ /* Constants are already zero-extended/sign-extended to 32 bits. */ if ((ctt->info & CTF_UNSIGNED) && (int32_t)ct->size < 0) setnumV(o, (lua_Number)(uint32_t)ct->size); @@ -233,13 +235,14 @@ int lj_cdata_get(CTState *cts, CType *s, TValue *o, uint8_t *sp) } /* Get child type of pointer/array/field. */ - lua_assert(ctype_ispointer(s->info) || ctype_isfield(s->info)); + lj_assertCTS(ctype_ispointer(s->info) || ctype_isfield(s->info), + "pointer or field expected"); sid = ctype_cid(s->info); s = ctype_get(cts, sid); /* Resolve reference for field. */ if (ctype_isref(s->info)) { - lua_assert(s->size == CTSIZE_PTR); + lj_assertCTS(s->size == CTSIZE_PTR, "ref is not pointer-sized"); sp = *(uint8_t **)sp; sid = ctype_cid(s->info); s = ctype_get(cts, sid); @@ -266,12 +269,13 @@ void lj_cdata_set(CTState *cts, CType *d, uint8_t *dp, TValue *o, CTInfo qual) } /* Get child type of pointer/array/field. */ - lua_assert(ctype_ispointer(d->info) || ctype_isfield(d->info)); + lj_assertCTS(ctype_ispointer(d->info) || ctype_isfield(d->info), + "pointer or field expected"); d = ctype_child(cts, d); /* Resolve reference for field. */ if (ctype_isref(d->info)) { - lua_assert(d->size == CTSIZE_PTR); + lj_assertCTS(d->size == CTSIZE_PTR, "ref is not pointer-sized"); dp = *(uint8_t **)dp; d = ctype_child(cts, d); } @@ -286,7 +290,8 @@ void lj_cdata_set(CTState *cts, CType *d, uint8_t *dp, TValue *o, CTInfo qual) d = ctype_child(cts, d); } - lua_assert(ctype_hassize(d->info) && !ctype_isvoid(d->info)); + lj_assertCTS(ctype_hassize(d->info), "store to ctype without size"); + lj_assertCTS(!ctype_isvoid(d->info), "store to void type"); if (((d->info|qual) & CTF_CONST)) { err_const: diff --git a/src/lj_cdata.h b/src/lj_cdata.h index c1089e64b..c3df8ba0d 100644 --- a/src/lj_cdata.h +++ b/src/lj_cdata.h @@ -18,7 +18,7 @@ static LJ_AINLINE void *cdata_getptr(void *p, CTSize sz) if (LJ_64 && sz == 4) { /* Support 32 bit pointers on 64 bit targets. */ return ((void *)(uintptr_t)*(uint32_t *)p); } else { - lua_assert(sz == CTSIZE_PTR); + lj_assertX(sz == CTSIZE_PTR, "bad pointer size %d", sz); return *(void **)p; } } @@ -29,7 +29,7 @@ static LJ_AINLINE void cdata_setptr(void *p, CTSize sz, const void *v) if (LJ_64 && sz == 4) { /* Support 32 bit pointers on 64 bit targets. */ *(uint32_t *)p = (uint32_t)(uintptr_t)v; } else { - lua_assert(sz == CTSIZE_PTR); + lj_assertX(sz == CTSIZE_PTR, "bad pointer size %d", sz); *(void **)p = (void *)v; } } @@ -40,7 +40,8 @@ static LJ_AINLINE GCcdata *lj_cdata_new(CTState *cts, CTypeID id, CTSize sz) GCcdata *cd; #ifdef LUA_USE_ASSERT CType *ct = ctype_raw(cts, id); - lua_assert((ctype_hassize(ct->info) ? ct->size : CTSIZE_PTR) == sz); + lj_assertCTS((ctype_hassize(ct->info) ? ct->size : CTSIZE_PTR) == sz, + "inconsistent size of fixed-size cdata alloc"); #endif cd = (GCcdata *)lj_mem_newgco(cts->L, sizeof(GCcdata) + sz); cd->gct = ~LJ_TCDATA; diff --git a/src/lj_clib.c b/src/lj_clib.c index 2ea6ff45b..8da41a837 100644 --- a/src/lj_clib.c +++ b/src/lj_clib.c @@ -350,7 +350,8 @@ TValue *lj_clib_index(lua_State *L, CLibrary *cl, GCstr *name) lj_err_callerv(L, LJ_ERR_FFI_NODECL, strdata(name)); if (ctype_isconstval(ct->info)) { CType *ctt = ctype_child(cts, ct); - lua_assert(ctype_isinteger(ctt->info) && ctt->size <= 4); + lj_assertCTS(ctype_isinteger(ctt->info) && ctt->size <= 4, + "only 32 bit const supported"); /* NYI */ if ((ctt->info & CTF_UNSIGNED) && (int32_t)ct->size < 0) setnumV(tv, (lua_Number)(uint32_t)ct->size); else @@ -362,7 +363,8 @@ TValue *lj_clib_index(lua_State *L, CLibrary *cl, GCstr *name) #endif void *p = clib_getsym(cl, sym); GCcdata *cd; - lua_assert(ctype_isfunc(ct->info) || ctype_isextern(ct->info)); + lj_assertCTS(ctype_isfunc(ct->info) || ctype_isextern(ct->info), + "unexpected ctype %08x in clib", ct->info); #if LJ_TARGET_X86 && LJ_ABI_WIN /* Retry with decorated name for fastcall/stdcall functions. */ if (!p && ctype_isfunc(ct->info)) { diff --git a/src/lj_cparse.c b/src/lj_cparse.c index a393965ed..78628bbaa 100644 --- a/src/lj_cparse.c +++ b/src/lj_cparse.c @@ -28,6 +28,12 @@ ** If in doubt, please check the input against your favorite C compiler. */ +#ifdef LUA_USE_ASSERT +#define lj_assertCP(c, ...) (lj_assertG_(G(cp->L), (c), __VA_ARGS__)) +#else +#define lj_assertCP(c, ...) ((void)cp) +#endif + /* -- Miscellaneous ------------------------------------------------------- */ /* Match string against a C literal. */ @@ -61,7 +67,7 @@ LJ_NORET static void cp_err(CPState *cp, ErrMsg em); static const char *cp_tok2str(CPState *cp, CPToken tok) { - lua_assert(tok < CTOK_FIRSTDECL); + lj_assertCP(tok < CTOK_FIRSTDECL, "bad CPToken %d", tok); if (tok > CTOK_OFS) return ctoknames[tok-CTOK_OFS-1]; else if (!lj_char_iscntrl(tok)) @@ -392,7 +398,7 @@ static void cp_init(CPState *cp) cp->curpack = 0; cp->packstack[0] = 255; lj_buf_init(cp->L, &cp->sb); - lua_assert(cp->p != NULL); + lj_assertCP(cp->p != NULL, "uninitialized cp->p"); cp_get(cp); /* Read-ahead first char. */ cp->tok = 0; cp->tmask = CPNS_DEFAULT; @@ -853,12 +859,13 @@ static CTypeID cp_decl_intern(CPState *cp, CPDecl *decl) /* The cid is already part of info for copies of pointers/functions. */ idx = ct->next; if (ctype_istypedef(info)) { - lua_assert(id == 0); + lj_assertCP(id == 0, "typedef not at toplevel"); id = ctype_cid(info); /* Always refetch info/size, since struct/enum may have been completed. */ cinfo = ctype_get(cp->cts, id)->info; csize = ctype_get(cp->cts, id)->size; - lua_assert(ctype_isstruct(cinfo) || ctype_isenum(cinfo)); + lj_assertCP(ctype_isstruct(cinfo) || ctype_isenum(cinfo), + "typedef of bad type"); } else if (ctype_isfunc(info)) { /* Intern function. */ CType *fct; CTypeID fid; @@ -891,7 +898,7 @@ static CTypeID cp_decl_intern(CPState *cp, CPDecl *decl) /* Inherit csize/cinfo from original type. */ } else { if (ctype_isnum(info)) { /* Handle mode/vector-size attributes. */ - lua_assert(id == 0); + lj_assertCP(id == 0, "number not at toplevel"); if (!(info & CTF_BOOL)) { CTSize msize = ctype_msizeP(decl->attr); CTSize vsize = ctype_vsizeP(decl->attr); @@ -946,7 +953,7 @@ static CTypeID cp_decl_intern(CPState *cp, CPDecl *decl) info = (info & ~CTF_ALIGN) | (cinfo & CTF_ALIGN); info |= (cinfo & CTF_QUAL); /* Inherit qual. */ } else { - lua_assert(ctype_isvoid(info)); + lj_assertCP(ctype_isvoid(info), "bad ctype %08x", info); } csize = size; cinfo = info+id; @@ -1585,7 +1592,7 @@ static CPscl cp_decl_spec(CPState *cp, CPDecl *decl, CPscl scl) cp_errmsg(cp, cp->tok, LJ_ERR_FFI_DECLSPEC); sz = sizeof(int); } - lua_assert(sz != 0); + lj_assertCP(sz != 0, "basic ctype with zero size"); info += CTALIGN(lj_fls(sz)); /* Use natural alignment. */ info += (decl->attr & CTF_QUAL); /* Merge qualifiers. */ cp_push(decl, info, sz); @@ -1845,7 +1852,7 @@ static void cp_decl_multi(CPState *cp) /* Treat both static and extern function declarations as extern. */ ct = ctype_get(cp->cts, ctypeid); /* We always get new anonymous functions (typedefs are copied). */ - lua_assert(gcref(ct->name) == NULL); + lj_assertCP(gcref(ct->name) == NULL, "unexpected named function"); id = ctypeid; /* Just name it. */ } else if ((scl & CDF_STATIC)) { /* Accept static constants. */ id = cp_decl_constinit(cp, &ct, ctypeid); @@ -1902,7 +1909,7 @@ static TValue *cpcparser(lua_State *L, lua_CFunction dummy, void *ud) cp_decl_single(cp); if (cp->param && cp->param != cp->L->top) cp_err(cp, LJ_ERR_FFI_NUMPARAM); - lua_assert(cp->depth == 0); + lj_assertCP(cp->depth == 0, "unbalanced cparser declaration depth"); return NULL; } diff --git a/src/lj_crecord.c b/src/lj_crecord.c index e50fdbf74..7ae1479ef 100644 --- a/src/lj_crecord.c +++ b/src/lj_crecord.c @@ -61,7 +61,8 @@ static GCcdata *argv2cdata(jit_State *J, TRef tr, cTValue *o) static CTypeID crec_constructor(jit_State *J, GCcdata *cd, TRef tr) { CTypeID id; - lua_assert(tref_iscdata(tr) && cd->ctypeid == CTID_CTYPEID); + lj_assertJ(tref_iscdata(tr) && cd->ctypeid == CTID_CTYPEID, + "expected CTypeID cdata"); id = *(CTypeID *)cdataptr(cd); tr = emitir(IRT(IR_FLOAD, IRT_INT), tr, IRFL_CDATA_INT); emitir(IRTG(IR_EQ, IRT_INT), tr, lj_ir_kint(J, (int32_t)id)); @@ -237,13 +238,14 @@ static void crec_copy(jit_State *J, TRef trdst, TRef trsrc, TRef trlen, if (len > CREC_COPY_MAXLEN) goto fallback; if (ct) { CTState *cts = ctype_ctsG(J2G(J)); - lua_assert(ctype_isarray(ct->info) || ctype_isstruct(ct->info)); + lj_assertJ(ctype_isarray(ct->info) || ctype_isstruct(ct->info), + "copy of non-aggregate"); if (ctype_isarray(ct->info)) { CType *cct = ctype_rawchild(cts, ct); tp = crec_ct2irt(cts, cct); if (tp == IRT_CDATA) goto rawcopy; step = lj_ir_type_size[tp]; - lua_assert((len & (step-1)) == 0); + lj_assertJ((len & (step-1)) == 0, "copy of fractional size"); } else if ((ct->info & CTF_UNION)) { step = (1u << ctype_align(ct->info)); goto rawcopy; @@ -629,7 +631,8 @@ static TRef crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, cTValue *sval) /* Specialize to the name of the enum constant. */ emitir(IRTG(IR_EQ, IRT_STR), sp, lj_ir_kstr(J, str)); if (cct && ctype_isconstval(cct->info)) { - lua_assert(ctype_child(cts, cct)->size == 4); + lj_assertJ(ctype_child(cts, cct)->size == 4, + "only 32 bit const supported"); /* NYI */ svisnz = (void *)(intptr_t)(ofs != 0); sp = lj_ir_kint(J, (int32_t)ofs); sid = ctype_cid(cct->info); @@ -757,7 +760,7 @@ static void crec_index_bf(jit_State *J, RecordFFData *rd, TRef ptr, CTInfo info) IRType t = IRT_I8 + 2*lj_fls(ctype_bitcsz(info)) + ((info&CTF_UNSIGNED)?1:0); TRef tr = emitir(IRT(IR_XLOAD, t), ptr, 0); CTSize pos = ctype_bitpos(info), bsz = ctype_bitbsz(info), shift = 32 - bsz; - lua_assert(t <= IRT_U32); /* NYI: 64 bit bitfields. */ + lj_assertJ(t <= IRT_U32, "only 32 bit bitfields supported"); /* NYI */ if (rd->data == 0) { /* __index metamethod. */ if ((info & CTF_BOOL)) { tr = emitir(IRTI(IR_BAND), tr, lj_ir_kint(J, (int32_t)((1u << pos)))); @@ -769,7 +772,7 @@ static void crec_index_bf(jit_State *J, RecordFFData *rd, TRef ptr, CTInfo info) tr = emitir(IRTI(IR_BSHL), tr, lj_ir_kint(J, shift - pos)); tr = emitir(IRTI(IR_BSAR), tr, lj_ir_kint(J, shift)); } else { - lua_assert(bsz < 32); /* Full-size fields cannot end up here. */ + lj_assertJ(bsz < 32, "unexpected full bitfield index"); tr = emitir(IRTI(IR_BSHR), tr, lj_ir_kint(J, pos)); tr = emitir(IRTI(IR_BAND), tr, lj_ir_kint(J, (int32_t)((1u << bsz)-1))); /* We can omit the U32 to NUM conversion, since bsz < 32. */ @@ -884,7 +887,7 @@ void LJ_FASTCALL recff_cdata_index(jit_State *J, RecordFFData *rd) crec_index_bf(J, rd, ptr, fct->info); return; } else { - lua_assert(ctype_isfield(fct->info)); + lj_assertJ(ctype_isfield(fct->info), "field expected"); sid = ctype_cid(fct->info); } } @@ -1111,7 +1114,7 @@ static TRef crec_call_args(jit_State *J, RecordFFData *rd, if (fid) { /* Get argument type from field. */ CType *ctf = ctype_get(cts, fid); fid = ctf->sib; - lua_assert(ctype_isfield(ctf->info)); + lj_assertJ(ctype_isfield(ctf->info), "field expected"); did = ctype_cid(ctf->info); } else { if (!(ct->info & CTF_VARARG)) diff --git a/src/lj_ctype.c b/src/lj_ctype.c index 7e96e1bcd..0e3f88552 100644 --- a/src/lj_ctype.c +++ b/src/lj_ctype.c @@ -153,7 +153,7 @@ CTypeID lj_ctype_new(CTState *cts, CType **ctp) { CTypeID id = cts->top; CType *ct; - lua_assert(cts->L); + lj_assertCTS(cts->L, "uninitialized cts->L"); if (LJ_UNLIKELY(id >= cts->sizetab)) { if (id >= CTID_MAX) lj_err_msg(cts->L, LJ_ERR_TABOV); #ifdef LUAJIT_CTYPE_CHECK_ANCHOR @@ -182,7 +182,7 @@ CTypeID lj_ctype_intern(CTState *cts, CTInfo info, CTSize size) { uint32_t h = ct_hashtype(info, size); CTypeID id = cts->hash[h]; - lua_assert(cts->L); + lj_assertCTS(cts->L, "uninitialized cts->L"); while (id) { CType *ct = ctype_get(cts, id); if (ct->info == info && ct->size == size) @@ -298,9 +298,9 @@ CTSize lj_ctype_vlsize(CTState *cts, CType *ct, CTSize nelem) } ct = ctype_raw(cts, arrid); } - lua_assert(ctype_isvlarray(ct->info)); /* Must be a VLA. */ + lj_assertCTS(ctype_isvlarray(ct->info), "VLA expected"); ct = ctype_rawchild(cts, ct); /* Get array element. */ - lua_assert(ctype_hassize(ct->info)); + lj_assertCTS(ctype_hassize(ct->info), "bad VLA without size"); /* Calculate actual size of VLA and check for overflow. */ xsz += (uint64_t)ct->size * nelem; return xsz < 0x80000000u ? (CTSize)xsz : CTSIZE_INVALID; @@ -323,7 +323,8 @@ CTInfo lj_ctype_info(CTState *cts, CTypeID id, CTSize *szp) } else { if (!(qual & CTFP_ALIGNED)) qual |= (info & CTF_ALIGN); qual |= (info & ~(CTF_ALIGN|CTMASK_CID)); - lua_assert(ctype_hassize(info) || ctype_isfunc(info)); + lj_assertCTS(ctype_hassize(info) || ctype_isfunc(info), + "ctype without size"); *szp = ctype_isfunc(info) ? CTSIZE_INVALID : ct->size; break; } @@ -528,7 +529,7 @@ static void ctype_repr(CTRepr *ctr, CTypeID id) ctype_appc(ctr, ')'); break; default: - lua_assert(0); + lj_assertG_(ctr->cts->g, 0, "bad ctype %08x", info); break; } ct = ctype_get(ctr->cts, ctype_cid(info)); diff --git a/src/lj_ctype.h b/src/lj_ctype.h index 73cefef86..8c8fc7904 100644 --- a/src/lj_ctype.h +++ b/src/lj_ctype.h @@ -260,6 +260,12 @@ typedef struct CTState { #define CT_MEMALIGN 3 /* Alignment guaranteed by memory allocator. */ +#ifdef LUA_USE_ASSERT +#define lj_assertCTS(c, ...) (lj_assertG_(cts->g, (c), __VA_ARGS__)) +#else +#define lj_assertCTS(c, ...) ((void)cts) +#endif + /* -- Predefined types ---------------------------------------------------- */ /* Target-dependent types. */ @@ -392,7 +398,8 @@ static LJ_AINLINE CTState *ctype_cts(lua_State *L) /* Check C type ID for validity when assertions are enabled. */ static LJ_AINLINE CTypeID ctype_check(CTState *cts, CTypeID id) { - lua_assert(id > 0 && id < cts->top); UNUSED(cts); + UNUSED(cts); + lj_assertCTS(id > 0 && id < cts->top, "bad CTID %d", id); return id; } @@ -408,8 +415,9 @@ static LJ_AINLINE CType *ctype_get(CTState *cts, CTypeID id) /* Get child C type. */ static LJ_AINLINE CType *ctype_child(CTState *cts, CType *ct) { - lua_assert(!(ctype_isvoid(ct->info) || ctype_isstruct(ct->info) || - ctype_isbitfield(ct->info))); /* These don't have children. */ + lj_assertCTS(!(ctype_isvoid(ct->info) || ctype_isstruct(ct->info) || + ctype_isbitfield(ct->info)), + "ctype %08x has no children", ct->info); return ctype_get(cts, ctype_cid(ct->info)); } diff --git a/src/lj_debug.c b/src/lj_debug.c index 2f2ea9f0b..c1f0f314e 100644 --- a/src/lj_debug.c +++ b/src/lj_debug.c @@ -55,7 +55,8 @@ static BCPos debug_framepc(lua_State *L, GCfunc *fn, cTValue *nextframe) const BCIns *ins; GCproto *pt; BCPos pos; - lua_assert(fn->c.gct == ~LJ_TFUNC || fn->c.gct == ~LJ_TTHREAD); + lj_assertL(fn->c.gct == ~LJ_TFUNC || fn->c.gct == ~LJ_TTHREAD, + "function or frame expected"); if (!isluafunc(fn)) { /* Cannot derive a PC for non-Lua functions. */ return NO_BCPOS; } else if (nextframe == NULL) { /* Lua function on top. */ @@ -100,7 +101,7 @@ static BCPos debug_framepc(lua_State *L, GCfunc *fn, cTValue *nextframe) #if LJ_HASJIT if (pos > pt->sizebc) { /* Undo the effects of lj_trace_exit for JLOOP. */ GCtrace *T = (GCtrace *)((char *)(ins-1) - offsetof(GCtrace, startins)); - lua_assert(bc_isret(bc_op(ins[-1]))); + lj_assertL(bc_isret(bc_op(ins[-1])), "return bytecode expected"); pos = proto_bcpos(pt, mref(T->startpc, const BCIns)); } #endif @@ -133,7 +134,7 @@ static BCLine debug_frameline(lua_State *L, GCfunc *fn, cTValue *nextframe) BCPos pc = debug_framepc(L, fn, nextframe); if (pc != NO_BCPOS) { GCproto *pt = funcproto(fn); - lua_assert(pc <= pt->sizebc); + lj_assertL(pc <= pt->sizebc, "PC out of range"); return lj_debug_line(pt, pc); } return -1; @@ -214,7 +215,7 @@ static TValue *debug_localname(lua_State *L, const lua_Debug *ar, const char *lj_debug_uvname(GCproto *pt, uint32_t idx) { const uint8_t *p = proto_uvinfo(pt); - lua_assert(idx < pt->sizeuv); + lj_assertX(idx < pt->sizeuv, "bad upvalue index"); if (!p) return ""; if (idx) while (*p++ || --idx) ; return (const char *)p; @@ -439,13 +440,14 @@ int lj_debug_getinfo(lua_State *L, const char *what, lj_Debug *ar, int ext) } else { uint32_t offset = (uint32_t)ar->i_ci & 0xffff; uint32_t size = (uint32_t)ar->i_ci >> 16; - lua_assert(offset != 0); + lj_assertL(offset != 0, "bad frame offset"); frame = tvref(L->stack) + offset; if (size) nextframe = frame + size; - lua_assert(frame <= tvref(L->maxstack) && - (!nextframe || nextframe <= tvref(L->maxstack))); + lj_assertL(frame <= tvref(L->maxstack) && + (!nextframe || nextframe <= tvref(L->maxstack)), + "broken frame chain"); fn = frame_func(frame); - lua_assert(fn->c.gct == ~LJ_TFUNC); + lj_assertL(fn->c.gct == ~LJ_TFUNC, "bad frame function"); } for (; *what; what++) { if (*what == 'S') { diff --git a/src/lj_def.h b/src/lj_def.h index 75aaeb797..af0687c4f 100644 --- a/src/lj_def.h +++ b/src/lj_def.h @@ -337,14 +337,28 @@ static LJ_AINLINE uint32_t lj_getu32(const void *v) #define LJ_FUNCA_NORET LJ_FUNCA LJ_NORET #define LJ_ASMF_NORET LJ_ASMF LJ_NORET -/* Runtime assertions. */ -#ifdef lua_assert -#define check_exp(c, e) (lua_assert(c), (e)) -#define api_check(l, e) lua_assert(e) +/* Internal assertions. */ +#if defined(LUA_USE_ASSERT) || defined(LUA_USE_APICHECK) +#define lj_assert_check(g, c, ...) \ + ((c) ? (void)0 : \ + (lj_assert_fail((g), __FILE__, __LINE__, __func__, __VA_ARGS__), 0)) +#define lj_checkapi(c, ...) lj_assert_check(G(L), (c), __VA_ARGS__) #else -#define lua_assert(c) ((void)0) +#define lj_checkapi(c, ...) ((void)L) +#endif + +#ifdef LUA_USE_ASSERT +#define lj_assertG_(g, c, ...) lj_assert_check((g), (c), __VA_ARGS__) +#define lj_assertG(c, ...) lj_assert_check(g, (c), __VA_ARGS__) +#define lj_assertL(c, ...) lj_assert_check(G(L), (c), __VA_ARGS__) +#define lj_assertX(c, ...) lj_assert_check(NULL, (c), __VA_ARGS__) +#define check_exp(c, e) (lj_assertX((c), #c), (e)) +#else +#define lj_assertG_(g, c, ...) ((void)0) +#define lj_assertG(c, ...) ((void)g) +#define lj_assertL(c, ...) ((void)L) +#define lj_assertX(c, ...) ((void)0) #define check_exp(c, e) (e) -#define api_check luai_apicheck #endif /* Static assertions. */ diff --git a/src/lj_dispatch.c b/src/lj_dispatch.c index 39416d007..c608e2237 100644 --- a/src/lj_dispatch.c +++ b/src/lj_dispatch.c @@ -367,7 +367,7 @@ static void callhook(lua_State *L, int event, BCLine line) hook_enter(g); #endif hookf(L, &ar); - lua_assert(hook_active(g)); + lj_assertG(hook_active(g), "active hook flag removed"); setgcref(g->cur_L, obj2gco(L)); #if LJ_HASPROFILE && !LJ_PROFILE_SIGPROF lj_profile_hook_leave(g); @@ -415,7 +415,8 @@ void LJ_FASTCALL lj_dispatch_ins(lua_State *L, const BCIns *pc) #endif J->L = L; lj_trace_ins(J, pc-1); /* The interpreter bytecode PC is offset by 1. */ - lua_assert(L->top - L->base == delta); + lj_assertG(L->top - L->base == delta, + "unbalanced stack after tracing of instruction"); } } #endif @@ -475,7 +476,8 @@ ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns *pc) #endif pc = (const BCIns *)((uintptr_t)pc & ~(uintptr_t)1); lj_trace_hot(J, pc); - lua_assert(L->top - L->base == delta); + lj_assertG(L->top - L->base == delta, + "unbalanced stack after hot call"); goto out; } else if (J->state != LJ_TRACE_IDLE && !(g->hookmask & (HOOK_GC|HOOK_VMEVENT))) { @@ -484,7 +486,8 @@ ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns *pc) #endif /* Record the FUNC* bytecodes, too. */ lj_trace_ins(J, pc-1); /* The interpreter bytecode PC is offset by 1. */ - lua_assert(L->top - L->base == delta); + lj_assertG(L->top - L->base == delta, + "unbalanced stack after hot instruction"); } #endif if ((g->hookmask & LUA_MASKCALL)) { diff --git a/src/lj_emit_arm.h b/src/lj_emit_arm.h index 25561549d..165d546d2 100644 --- a/src/lj_emit_arm.h +++ b/src/lj_emit_arm.h @@ -81,7 +81,8 @@ static void emit_m(ASMState *as, ARMIns ai, Reg rm) static void emit_lsox(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs) { - lua_assert(ofs >= -255 && ofs <= 255); + lj_assertA(ofs >= -255 && ofs <= 255, + "load/store offset %d out of range", ofs); if (ofs < 0) ofs = -ofs; else ai |= ARMI_LS_U; *--as->mcp = ai | ARMI_LS_P | ARMI_LSX_I | ARMF_D(rd) | ARMF_N(rn) | ((ofs & 0xf0) << 4) | (ofs & 0x0f); @@ -89,7 +90,8 @@ static void emit_lsox(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs) static void emit_lso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs) { - lua_assert(ofs >= -4095 && ofs <= 4095); + lj_assertA(ofs >= -4095 && ofs <= 4095, + "load/store offset %d out of range", ofs); /* Combine LDR/STR pairs to LDRD/STRD. */ if (*as->mcp == (ai|ARMI_LS_P|ARMI_LS_U|ARMF_D(rd^1)|ARMF_N(rn)|(ofs^4)) && (ai & ~(ARMI_LDR^ARMI_STR)) == ARMI_STR && rd != rn && @@ -106,7 +108,8 @@ static void emit_lso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs) #if !LJ_SOFTFP static void emit_vlso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs) { - lua_assert(ofs >= -1020 && ofs <= 1020 && (ofs&3) == 0); + lj_assertA(ofs >= -1020 && ofs <= 1020 && (ofs&3) == 0, + "load/store offset %d out of range", ofs); if (ofs < 0) ofs = -ofs; else ai |= ARMI_LS_U; *--as->mcp = ai | ARMI_LS_P | ARMF_D(rd & 15) | ARMF_N(rn) | (ofs >> 2); } @@ -124,7 +127,7 @@ static int emit_kdelta1(ASMState *as, Reg d, int32_t i) while (work) { Reg r = rset_picktop(work); IRRef ref = regcost_ref(as->cost[r]); - lua_assert(r != d); + lj_assertA(r != d, "dest reg not free"); if (emit_canremat(ref)) { int32_t delta = i - (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i); uint32_t k = emit_isk12(ARMI_ADD, delta); @@ -142,13 +145,13 @@ static int emit_kdelta1(ASMState *as, Reg d, int32_t i) } /* Try to find a two step delta relative to another constant. */ -static int emit_kdelta2(ASMState *as, Reg d, int32_t i) +static int emit_kdelta2(ASMState *as, Reg rd, int32_t i) { RegSet work = ~as->freeset & RSET_GPR; while (work) { Reg r = rset_picktop(work); IRRef ref = regcost_ref(as->cost[r]); - lua_assert(r != d); + lj_assertA(r != rd, "dest reg %d not free", rd); if (emit_canremat(ref)) { int32_t other = ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i; if (other) { @@ -159,8 +162,8 @@ static int emit_kdelta2(ASMState *as, Reg d, int32_t i) k2 = emit_isk12(0, delta & (255 << sh)); k = emit_isk12(0, delta & ~(255 << sh)); if (k) { - emit_dn(as, ARMI_ADD^k2^inv, d, d); - emit_dn(as, ARMI_ADD^k^inv, d, r); + emit_dn(as, ARMI_ADD^k2^inv, rd, rd); + emit_dn(as, ARMI_ADD^k^inv, rd, r); return 1; } } @@ -171,23 +174,24 @@ static int emit_kdelta2(ASMState *as, Reg d, int32_t i) } /* Load a 32 bit constant into a GPR. */ -static void emit_loadi(ASMState *as, Reg r, int32_t i) +static void emit_loadi(ASMState *as, Reg rd, int32_t i) { uint32_t k = emit_isk12(ARMI_MOV, i); - lua_assert(rset_test(as->freeset, r) || r == RID_TMP); + lj_assertA(rset_test(as->freeset, rd) || rd == RID_TMP, + "dest reg %d not free", rd); if (k) { /* Standard K12 constant. */ - emit_d(as, ARMI_MOV^k, r); + emit_d(as, ARMI_MOV^k, rd); } else if ((as->flags & JIT_F_ARMV6T2) && (uint32_t)i < 0x00010000u) { /* 16 bit loword constant for ARMv6T2. */ - emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), r); - } else if (emit_kdelta1(as, r, i)) { + emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), rd); + } else if (emit_kdelta1(as, rd, i)) { /* One step delta relative to another constant. */ } else if ((as->flags & JIT_F_ARMV6T2)) { /* 32 bit hiword/loword constant for ARMv6T2. */ - emit_d(as, ARMI_MOVT|((i>>16) & 0x0fff)|(((i>>16) & 0xf000)<<4), r); - emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), r); - } else if (emit_kdelta2(as, r, i)) { + emit_d(as, ARMI_MOVT|((i>>16) & 0x0fff)|(((i>>16) & 0xf000)<<4), rd); + emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), rd); + } else if (emit_kdelta2(as, rd, i)) { /* Two step delta relative to another constant. */ } else { /* Otherwise construct the constant with up to 4 instructions. */ @@ -197,15 +201,15 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i) int32_t m = i & (255 << sh); i &= ~(255 << sh); if (i == 0) { - emit_d(as, ARMI_MOV ^ emit_isk12(0, m), r); + emit_d(as, ARMI_MOV ^ emit_isk12(0, m), rd); break; } - emit_dn(as, ARMI_ORR ^ emit_isk12(0, m), r, r); + emit_dn(as, ARMI_ORR ^ emit_isk12(0, m), rd, rd); } } } -#define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr))) +#define emit_loada(as, rd, addr) emit_loadi(as, (rd), i32ptr((addr))) static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); @@ -261,7 +265,7 @@ static void emit_branch(ASMState *as, ARMIns ai, MCode *target) { MCode *p = as->mcp; ptrdiff_t delta = (target - p) - 1; - lua_assert(((delta + 0x00800000) >> 24) == 0); + lj_assertA(((delta + 0x00800000) >> 24) == 0, "branch target out of range"); *--p = ai | ((uint32_t)delta & 0x00ffffffu); as->mcp = p; } @@ -289,7 +293,7 @@ static void emit_call(ASMState *as, void *target) static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) { #if LJ_SOFTFP - lua_assert(!irt_isnum(ir->t)); UNUSED(ir); + lj_assertA(!irt_isnum(ir->t), "unexpected FP op"); UNUSED(ir); #else if (dst >= RID_MAX_GPR) { emit_dm(as, irt_isnum(ir->t) ? ARMI_VMOV_D : ARMI_VMOV_S, @@ -313,7 +317,7 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) { #if LJ_SOFTFP - lua_assert(!irt_isnum(ir->t)); UNUSED(ir); + lj_assertA(!irt_isnum(ir->t), "unexpected FP op"); UNUSED(ir); #else if (r >= RID_MAX_GPR) emit_vlso(as, irt_isnum(ir->t) ? ARMI_VLDR_D : ARMI_VLDR_S, r, base, ofs); @@ -326,7 +330,7 @@ static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) { #if LJ_SOFTFP - lua_assert(!irt_isnum(ir->t)); UNUSED(ir); + lj_assertA(!irt_isnum(ir->t), "unexpected FP op"); UNUSED(ir); #else if (r >= RID_MAX_GPR) emit_vlso(as, irt_isnum(ir->t) ? ARMI_VSTR_D : ARMI_VSTR_S, r, base, ofs); diff --git a/src/lj_emit_arm64.h b/src/lj_emit_arm64.h index f09c0f3ab..61a2df82f 100644 --- a/src/lj_emit_arm64.h +++ b/src/lj_emit_arm64.h @@ -8,8 +8,9 @@ /* -- Constant encoding --------------------------------------------------- */ -static uint64_t get_k64val(IRIns *ir) +static uint64_t get_k64val(ASMState *as, IRRef ref) { + IRIns *ir = IR(ref); if (ir->o == IR_KINT64) { return ir_kint64(ir)->u64; } else if (ir->o == IR_KGC) { @@ -17,7 +18,8 @@ static uint64_t get_k64val(IRIns *ir) } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { return (uint64_t)ir_kptr(ir); } else { - lua_assert(ir->o == IR_KINT || ir->o == IR_KNULL); + lj_assertA(ir->o == IR_KINT || ir->o == IR_KNULL, + "bad 64 bit const IR op %d", ir->o); return ir->i; /* Sign-extended. */ } } @@ -122,7 +124,7 @@ static int emit_checkofs(A64Ins ai, int64_t ofs) static void emit_lso(ASMState *as, A64Ins ai, Reg rd, Reg rn, int64_t ofs) { int ot = emit_checkofs(ai, ofs), sc = (ai >> 30) & 3; - lua_assert(ot); + lj_assertA(ot, "load/store offset %d out of range", ofs); /* Combine LDR/STR pairs to LDP/STP. */ if ((sc == 2 || sc == 3) && (!(ai & 0x400000) || rd != rn) && @@ -166,10 +168,10 @@ static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int lim) while (work) { Reg r = rset_picktop(work); IRRef ref = regcost_ref(as->cost[r]); - lua_assert(r != rd); + lj_assertA(r != rd, "dest reg %d not free", rd); if (ref < REF_TRUE) { uint64_t kx = ra_iskref(ref) ? (uint64_t)ra_krefk(as, ref) : - get_k64val(IR(ref)); + get_k64val(as, ref); int64_t delta = (int64_t)(k - kx); if (delta == 0) { emit_dm(as, A64I_MOVx, rd, r); @@ -312,7 +314,7 @@ static void emit_cond_branch(ASMState *as, A64CC cond, MCode *target) { MCode *p = --as->mcp; ptrdiff_t delta = target - p; - lua_assert(A64F_S_OK(delta, 19)); + lj_assertA(A64F_S_OK(delta, 19), "branch target out of range"); *p = A64I_BCC | A64F_S19(delta) | cond; } @@ -320,7 +322,7 @@ static void emit_branch(ASMState *as, A64Ins ai, MCode *target) { MCode *p = --as->mcp; ptrdiff_t delta = target - p; - lua_assert(A64F_S_OK(delta, 26)); + lj_assertA(A64F_S_OK(delta, 26), "branch target out of range"); *p = ai | A64F_S26(delta); } @@ -328,7 +330,8 @@ static void emit_tnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit, MCode *target { MCode *p = --as->mcp; ptrdiff_t delta = target - p; - lua_assert(bit < 63 && A64F_S_OK(delta, 14)); + lj_assertA(bit < 63, "bit number out of range"); + lj_assertA(A64F_S_OK(delta, 14), "branch target out of range"); if (bit > 31) ai |= A64I_X; *p = ai | A64F_BIT(bit & 31) | A64F_S14(delta) | r; } @@ -337,7 +340,7 @@ static void emit_cnb(ASMState *as, A64Ins ai, Reg r, MCode *target) { MCode *p = --as->mcp; ptrdiff_t delta = target - p; - lua_assert(A64F_S_OK(delta, 19)); + lj_assertA(A64F_S_OK(delta, 19), "branch target out of range"); *p = ai | A64F_S19(delta) | r; } diff --git a/src/lj_emit_mips.h b/src/lj_emit_mips.h index bdabcf162..3de5ff182 100644 --- a/src/lj_emit_mips.h +++ b/src/lj_emit_mips.h @@ -4,8 +4,9 @@ */ #if LJ_64 -static intptr_t get_k64val(IRIns *ir) +static intptr_t get_k64val(ASMState *as, IRRef ref) { + IRIns *ir = IR(ref); if (ir->o == IR_KINT64) { return (intptr_t)ir_kint64(ir)->u64; } else if (ir->o == IR_KGC) { @@ -15,16 +16,17 @@ static intptr_t get_k64val(IRIns *ir) } else if (LJ_SOFTFP && ir->o == IR_KNUM) { return (intptr_t)ir_knum(ir)->u64; } else { - lua_assert(ir->o == IR_KINT || ir->o == IR_KNULL); + lj_assertA(ir->o == IR_KINT || ir->o == IR_KNULL, + "bad 64 bit const IR op %d", ir->o); return ir->i; /* Sign-extended. */ } } #endif #if LJ_64 -#define get_kval(ir) get_k64val(ir) +#define get_kval(as, ref) get_k64val(as, ref) #else -#define get_kval(ir) ((ir)->i) +#define get_kval(as, ref) (IR((ref))->i) #endif /* -- Emit basic instructions --------------------------------------------- */ @@ -82,18 +84,18 @@ static void emit_tsml(ASMState *as, MIPSIns mi, Reg rt, Reg rs, uint32_t msb, #define emit_canremat(ref) ((ref) <= REF_BASE) /* Try to find a one step delta relative to another constant. */ -static int emit_kdelta1(ASMState *as, Reg t, intptr_t i) +static int emit_kdelta1(ASMState *as, Reg rd, intptr_t i) { RegSet work = ~as->freeset & RSET_GPR; while (work) { Reg r = rset_picktop(work); IRRef ref = regcost_ref(as->cost[r]); - lua_assert(r != t); + lj_assertA(r != rd, "dest reg %d not free", rd); if (ref < ASMREF_L) { intptr_t delta = (intptr_t)((uintptr_t)i - - (uintptr_t)(ra_iskref(ref) ? ra_krefk(as, ref) : get_kval(IR(ref)))); + (uintptr_t)(ra_iskref(ref) ? ra_krefk(as, ref) : get_kval(as, ref))); if (checki16(delta)) { - emit_tsi(as, MIPSI_AADDIU, t, r, delta); + emit_tsi(as, MIPSI_AADDIU, rd, r, delta); return 1; } } @@ -223,7 +225,7 @@ static void emit_branch(ASMState *as, MIPSIns mi, Reg rs, Reg rt, MCode *target) { MCode *p = as->mcp; ptrdiff_t delta = target - p; - lua_assert(((delta + 0x8000) >> 16) == 0); + lj_assertA(((delta + 0x8000) >> 16) == 0, "branch target out of range"); *--p = mi | MIPSF_S(rs) | MIPSF_T(rt) | ((uint32_t)delta & 0xffffu); as->mcp = p; } @@ -299,7 +301,7 @@ static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) static void emit_addptr(ASMState *as, Reg r, int32_t ofs) { if (ofs) { - lua_assert(checki16(ofs)); + lj_assertA(checki16(ofs), "offset %d out of range", ofs); emit_tsi(as, MIPSI_AADDIU, r, r, ofs); } } diff --git a/src/lj_emit_ppc.h b/src/lj_emit_ppc.h index 69765528b..6bc74c048 100644 --- a/src/lj_emit_ppc.h +++ b/src/lj_emit_ppc.h @@ -41,13 +41,13 @@ static void emit_rot(ASMState *as, PPCIns pi, Reg ra, Reg rs, static void emit_slwi(ASMState *as, Reg ra, Reg rs, int32_t n) { - lua_assert(n >= 0 && n < 32); + lj_assertA(n >= 0 && n < 32, "shift out or range"); emit_rot(as, PPCI_RLWINM, ra, rs, n, 0, 31-n); } static void emit_rotlwi(ASMState *as, Reg ra, Reg rs, int32_t n) { - lua_assert(n >= 0 && n < 32); + lj_assertA(n >= 0 && n < 32, "shift out or range"); emit_rot(as, PPCI_RLWINM, ra, rs, n, 0, 31); } @@ -57,17 +57,17 @@ static void emit_rotlwi(ASMState *as, Reg ra, Reg rs, int32_t n) #define emit_canremat(ref) ((ref) <= REF_BASE) /* Try to find a one step delta relative to another constant. */ -static int emit_kdelta1(ASMState *as, Reg t, int32_t i) +static int emit_kdelta1(ASMState *as, Reg rd, int32_t i) { RegSet work = ~as->freeset & RSET_GPR; while (work) { Reg r = rset_picktop(work); IRRef ref = regcost_ref(as->cost[r]); - lua_assert(r != t); + lj_assertA(r != rd, "dest reg %d not free", rd); if (ref < ASMREF_L) { int32_t delta = i - (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i); if (checki16(delta)) { - emit_tai(as, PPCI_ADDI, t, r, delta); + emit_tai(as, PPCI_ADDI, rd, r, delta); return 1; } } @@ -144,7 +144,7 @@ static void emit_condbranch(ASMState *as, PPCIns pi, PPCCC cc, MCode *target) { MCode *p = --as->mcp; ptrdiff_t delta = (char *)target - (char *)p; - lua_assert(((delta + 0x8000) >> 16) == 0); + lj_assertA(((delta + 0x8000) >> 16) == 0, "branch target out of range"); pi ^= (delta & 0x8000) * (PPCF_Y/0x8000); *p = pi | PPCF_CC(cc) | ((uint32_t)delta & 0xffffu); } diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h index b17e28a57..66750a96c 100644 --- a/src/lj_emit_x86.h +++ b/src/lj_emit_x86.h @@ -92,7 +92,7 @@ static void emit_rr(ASMState *as, x86Op xo, Reg r1, Reg r2) /* [addr] is sign-extended in x64 and must be in lower 2G (not 4G). */ static int32_t ptr2addr(const void *p) { - lua_assert((uintptr_t)p < (uintptr_t)0x80000000); + lj_assertX((uintptr_t)p < (uintptr_t)0x80000000, "pointer outside 2G range"); return i32ptr(p); } #else @@ -208,7 +208,7 @@ static void emit_mrm(ASMState *as, x86Op xo, Reg rr, Reg rb) rb = RID_ESP; #endif } else if (LJ_GC64 && rb == RID_RIP) { - lua_assert(as->mrm.idx == RID_NONE); + lj_assertA(as->mrm.idx == RID_NONE, "RIP-rel mrm cannot have index"); mode = XM_OFS0; p -= 4; *(int32_t *)p = as->mrm.ofs; @@ -401,7 +401,8 @@ static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) emit_rma(as, xo, r64, k); } else { if (ir->i) { - lua_assert(*k == *(uint64_t*)(as->mctop - ir->i)); + lj_assertA(*k == *(uint64_t*)(as->mctop - ir->i), + "bad interned 64 bit constant"); } else if (as->curins <= as->stopins && rset_test(RSET_GPR, r)) { emit_loadu64(as, r, *k); return; @@ -433,7 +434,7 @@ static void emit_sjmp(ASMState *as, MCLabel target) { MCode *p = as->mcp; ptrdiff_t delta = target - p; - lua_assert(delta == (int8_t)delta); + lj_assertA(delta == (int8_t)delta, "short jump target out of range"); p[-1] = (MCode)(int8_t)delta; p[-2] = XI_JMPs; as->mcp = p - 2; @@ -445,7 +446,7 @@ static void emit_sjcc(ASMState *as, int cc, MCLabel target) { MCode *p = as->mcp; ptrdiff_t delta = target - p; - lua_assert(delta == (int8_t)delta); + lj_assertA(delta == (int8_t)delta, "short jump target out of range"); p[-1] = (MCode)(int8_t)delta; p[-2] = (MCode)(XI_JCCs+(cc&15)); as->mcp = p - 2; @@ -471,10 +472,11 @@ static void emit_sfixup(ASMState *as, MCLabel source) #define emit_label(as) ((as)->mcp) /* Compute relative 32 bit offset for jump and call instructions. */ -static LJ_AINLINE int32_t jmprel(MCode *p, MCode *target) +static LJ_AINLINE int32_t jmprel(jit_State *J, MCode *p, MCode *target) { ptrdiff_t delta = target - p; - lua_assert(delta == (int32_t)delta); + UNUSED(J); + lj_assertJ(delta == (int32_t)delta, "jump target out of range"); return (int32_t)delta; } @@ -482,7 +484,7 @@ static LJ_AINLINE int32_t jmprel(MCode *p, MCode *target) static void emit_jcc(ASMState *as, int cc, MCode *target) { MCode *p = as->mcp; - *(int32_t *)(p-4) = jmprel(p, target); + *(int32_t *)(p-4) = jmprel(as->J, p, target); p[-5] = (MCode)(XI_JCCn+(cc&15)); p[-6] = 0x0f; as->mcp = p - 6; @@ -492,7 +494,7 @@ static void emit_jcc(ASMState *as, int cc, MCode *target) static void emit_jmp(ASMState *as, MCode *target) { MCode *p = as->mcp; - *(int32_t *)(p-4) = jmprel(p, target); + *(int32_t *)(p-4) = jmprel(as->J, p, target); p[-5] = XI_JMP; as->mcp = p - 5; } @@ -509,7 +511,7 @@ static void emit_call_(ASMState *as, MCode *target) return; } #endif - *(int32_t *)(p-4) = jmprel(p, target); + *(int32_t *)(p-4) = jmprel(as->J, p, target); p[-5] = XI_CALL; as->mcp = p - 5; } diff --git a/src/lj_err.c b/src/lj_err.c index 524989320..41fbf5c7f 100644 --- a/src/lj_err.c +++ b/src/lj_err.c @@ -589,7 +589,7 @@ static ptrdiff_t finderrfunc(lua_State *L) return savestack(L, frame_prevd(frame)+1); /* xpcall's errorfunc. */ return 0; default: - lua_assert(0); + lj_assertL(0, "bad frame type"); return 0; } } diff --git a/src/lj_func.c b/src/lj_func.c index 9afdb638e..fb2678859 100644 --- a/src/lj_func.c +++ b/src/lj_func.c @@ -24,9 +24,11 @@ void LJ_FASTCALL lj_func_freeproto(global_State *g, GCproto *pt) /* -- Upvalues ------------------------------------------------------------ */ -static void unlinkuv(GCupval *uv) +static void unlinkuv(global_State *g, GCupval *uv) { - lua_assert(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv); + UNUSED(g); + lj_assertG(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv, + "broken upvalue chain"); setgcrefr(uvnext(uv)->prev, uv->prev); setgcrefr(uvprev(uv)->next, uv->next); } @@ -40,7 +42,7 @@ static GCupval *func_finduv(lua_State *L, TValue *slot) GCupval *uv; /* Search the sorted list of open upvalues. */ while (gcref(*pp) != NULL && uvval((p = gco2uv(gcref(*pp)))) >= slot) { - lua_assert(!p->closed && uvval(p) != &p->tv); + lj_assertG(!p->closed && uvval(p) != &p->tv, "closed upvalue in chain"); if (uvval(p) == slot) { /* Found open upvalue pointing to same slot? */ if (isdead(g, obj2gco(p))) /* Resurrect it, if it's dead. */ flipwhite(obj2gco(p)); @@ -61,7 +63,8 @@ static GCupval *func_finduv(lua_State *L, TValue *slot) setgcrefr(uv->next, g->uvhead.next); setgcref(uvnext(uv)->prev, obj2gco(uv)); setgcref(g->uvhead.next, obj2gco(uv)); - lua_assert(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv); + lj_assertG(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv, + "broken upvalue chain"); return uv; } @@ -84,12 +87,13 @@ void LJ_FASTCALL lj_func_closeuv(lua_State *L, TValue *level) while (gcref(L->openupval) != NULL && uvval((uv = gco2uv(gcref(L->openupval)))) >= level) { GCobj *o = obj2gco(uv); - lua_assert(!isblack(o) && !uv->closed && uvval(uv) != &uv->tv); + lj_assertG(!isblack(o), "bad black upvalue"); + lj_assertG(!uv->closed && uvval(uv) != &uv->tv, "closed upvalue in chain"); setgcrefr(L->openupval, uv->nextgc); /* No longer in open list. */ if (isdead(g, o)) { lj_func_freeuv(g, uv); } else { - unlinkuv(uv); + unlinkuv(g, uv); lj_gc_closeuv(g, uv); } } @@ -98,7 +102,7 @@ void LJ_FASTCALL lj_func_closeuv(lua_State *L, TValue *level) void LJ_FASTCALL lj_func_freeuv(global_State *g, GCupval *uv) { if (!uv->closed) - unlinkuv(uv); + unlinkuv(g, uv); lj_mem_freet(g, uv); } diff --git a/src/lj_gc.c b/src/lj_gc.c index 81439aabe..671b59832 100644 --- a/src/lj_gc.c +++ b/src/lj_gc.c @@ -42,7 +42,8 @@ /* Mark a TValue (if needed). */ #define gc_marktv(g, tv) \ - { lua_assert(!tvisgcv(tv) || (~itype(tv) == gcval(tv)->gch.gct)); \ + { lj_assertG(!tvisgcv(tv) || (~itype(tv) == gcval(tv)->gch.gct), \ + "TValue and GC type mismatch"); \ if (tviswhite(tv)) gc_mark(g, gcV(tv)); } /* Mark a GCobj (if needed). */ @@ -56,7 +57,8 @@ static void gc_mark(global_State *g, GCobj *o) { int gct = o->gch.gct; - lua_assert(iswhite(o) && !isdead(g, o)); + lj_assertG(iswhite(o), "mark of non-white object"); + lj_assertG(!isdead(g, o), "mark of dead object"); white2gray(o); if (LJ_UNLIKELY(gct == ~LJ_TUDATA)) { GCtab *mt = tabref(gco2ud(o)->metatable); @@ -69,8 +71,9 @@ static void gc_mark(global_State *g, GCobj *o) if (uv->closed) gray2black(o); /* Closed upvalues are never gray. */ } else if (gct != ~LJ_TSTR && gct != ~LJ_TCDATA) { - lua_assert(gct == ~LJ_TFUNC || gct == ~LJ_TTAB || - gct == ~LJ_TTHREAD || gct == ~LJ_TPROTO || gct == ~LJ_TTRACE); + lj_assertG(gct == ~LJ_TFUNC || gct == ~LJ_TTAB || + gct == ~LJ_TTHREAD || gct == ~LJ_TPROTO || gct == ~LJ_TTRACE, + "bad GC type %d", gct); setgcrefr(o->gch.gclist, g->gc.gray); setgcref(g->gc.gray, o); } @@ -103,7 +106,8 @@ static void gc_mark_uv(global_State *g) { GCupval *uv; for (uv = uvnext(&g->uvhead); uv != &g->uvhead; uv = uvnext(uv)) { - lua_assert(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv); + lj_assertG(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv, + "broken upvalue chain"); if (isgray(obj2gco(uv))) gc_marktv(g, uvval(uv)); } @@ -198,7 +202,7 @@ static int gc_traverse_tab(global_State *g, GCtab *t) for (i = 0; i <= hmask; i++) { Node *n = &node[i]; if (!tvisnil(&n->val)) { /* Mark non-empty slot. */ - lua_assert(!tvisnil(&n->key)); + lj_assertG(!tvisnil(&n->key), "mark of nil key in non-empty slot"); if (!(weak & LJ_GC_WEAKKEY)) gc_marktv(g, &n->key); if (!(weak & LJ_GC_WEAKVAL)) gc_marktv(g, &n->val); } @@ -213,7 +217,8 @@ static void gc_traverse_func(global_State *g, GCfunc *fn) gc_markobj(g, tabref(fn->c.env)); if (isluafunc(fn)) { uint32_t i; - lua_assert(fn->l.nupvalues <= funcproto(fn)->sizeuv); + lj_assertG(fn->l.nupvalues <= funcproto(fn)->sizeuv, + "function upvalues out of range"); gc_markobj(g, funcproto(fn)); for (i = 0; i < fn->l.nupvalues; i++) /* Mark Lua function upvalues. */ gc_markobj(g, &gcref(fn->l.uvptr[i])->uv); @@ -229,7 +234,7 @@ static void gc_traverse_func(global_State *g, GCfunc *fn) static void gc_marktrace(global_State *g, TraceNo traceno) { GCobj *o = obj2gco(traceref(G2J(g), traceno)); - lua_assert(traceno != G2J(g)->cur.traceno); + lj_assertG(traceno != G2J(g)->cur.traceno, "active trace escaped"); if (iswhite(o)) { white2gray(o); setgcrefr(o->gch.gclist, g->gc.gray); @@ -310,7 +315,7 @@ static size_t propagatemark(global_State *g) { GCobj *o = gcref(g->gc.gray); int gct = o->gch.gct; - lua_assert(isgray(o)); + lj_assertG(isgray(o), "propagation of non-gray object"); gray2black(o); setgcrefr(g->gc.gray, o->gch.gclist); /* Remove from gray list. */ if (LJ_LIKELY(gct == ~LJ_TTAB)) { @@ -342,7 +347,7 @@ static size_t propagatemark(global_State *g) return ((sizeof(GCtrace)+7)&~7) + (T->nins-T->nk)*sizeof(IRIns) + T->nsnap*sizeof(SnapShot) + T->nsnapmap*sizeof(SnapEntry); #else - lua_assert(0); + lj_assertG(0, "bad GC type %d", gct); return 0; #endif } @@ -396,11 +401,13 @@ static GCRef *gc_sweep(global_State *g, GCRef *p, uint32_t lim) if (o->gch.gct == ~LJ_TTHREAD) /* Need to sweep open upvalues, too. */ gc_fullsweep(g, &gco2th(o)->openupval); if (((o->gch.marked ^ LJ_GC_WHITES) & ow)) { /* Black or current white? */ - lua_assert(!isdead(g, o) || (o->gch.marked & LJ_GC_FIXED)); + lj_assertG(!isdead(g, o) || (o->gch.marked & LJ_GC_FIXED), + "sweep of undead object"); makewhite(g, o); /* Value is alive, change to the current white. */ p = &o->gch.nextgc; } else { /* Otherwise value is dead, free it. */ - lua_assert(isdead(g, o) || ow == LJ_GC_SFIXED); + lj_assertG(isdead(g, o) || ow == LJ_GC_SFIXED, + "sweep of unlive object"); setgcrefr(*p, o->gch.nextgc); if (o == gcref(g->gc.root)) setgcrefr(g->gc.root, o->gch.nextgc); /* Adjust list anchor. */ @@ -427,11 +434,12 @@ static int gc_mayclear(cTValue *o, int val) } /* Clear collected entries from weak tables. */ -static void gc_clearweak(GCobj *o) +static void gc_clearweak(global_State *g, GCobj *o) { + UNUSED(g); while (o) { GCtab *t = gco2tab(o); - lua_assert((t->marked & LJ_GC_WEAK)); + lj_assertG((t->marked & LJ_GC_WEAK), "clear of non-weak table"); if ((t->marked & LJ_GC_WEAKVAL)) { MSize i, asize = t->asize; for (i = 0; i < asize; i++) { @@ -488,7 +496,7 @@ static void gc_finalize(lua_State *L) global_State *g = G(L); GCobj *o = gcnext(gcref(g->gc.mmudata)); cTValue *mo; - lua_assert(tvref(g->jit_base) == NULL); /* Must not be called on trace. */ + lj_assertG(tvref(g->jit_base) == NULL, "finalizer called on trace"); /* Unchain from list of userdata to be finalized. */ if (o == gcref(g->gc.mmudata)) setgcrefnull(g->gc.mmudata); @@ -580,7 +588,7 @@ static void atomic(global_State *g, lua_State *L) setgcrefr(g->gc.gray, g->gc.weak); /* Empty the list of weak tables. */ setgcrefnull(g->gc.weak); - lua_assert(!iswhite(obj2gco(mainthread(g)))); + lj_assertG(!iswhite(obj2gco(mainthread(g))), "main thread turned white"); gc_markobj(g, L); /* Mark running thread. */ gc_traverse_curtrace(g); /* Traverse current trace. */ gc_mark_gcroot(g); /* Mark GC roots (again). */ @@ -595,7 +603,7 @@ static void atomic(global_State *g, lua_State *L) udsize += gc_propagate_gray(g); /* And propagate the marks. */ /* All marking done, clear weak tables. */ - gc_clearweak(gcref(g->gc.weak)); + gc_clearweak(g, gcref(g->gc.weak)); lj_buf_shrink(L, &g->tmpbuf); /* Shrink temp buffer. */ @@ -631,14 +639,14 @@ static size_t gc_onestep(lua_State *L) gc_fullsweep(g, &g->strhash[g->gc.sweepstr++]); /* Sweep one chain. */ if (g->gc.sweepstr > g->strmask) g->gc.state = GCSsweep; /* All string hash chains sweeped. */ - lua_assert(old >= g->gc.total); + lj_assertG(old >= g->gc.total, "sweep increased memory"); g->gc.estimate -= old - g->gc.total; return GCSWEEPCOST; } case GCSsweep: { GCSize old = g->gc.total; setmref(g->gc.sweep, gc_sweep(g, mref(g->gc.sweep, GCRef), GCSWEEPMAX)); - lua_assert(old >= g->gc.total); + lj_assertG(old >= g->gc.total, "sweep increased memory"); g->gc.estimate -= old - g->gc.total; if (gcref(*mref(g->gc.sweep, GCRef)) == NULL) { if (g->strnum <= (g->strmask >> 2) && g->strmask > LJ_MIN_STRTAB*2-1) @@ -671,7 +679,7 @@ static size_t gc_onestep(lua_State *L) g->gc.debt = 0; return 0; default: - lua_assert(0); + lj_assertG(0, "bad GC state"); return 0; } } @@ -745,7 +753,8 @@ void lj_gc_fullgc(lua_State *L) } while (g->gc.state == GCSsweepstring || g->gc.state == GCSsweep) gc_onestep(L); /* Finish sweep. */ - lua_assert(g->gc.state == GCSfinalize || g->gc.state == GCSpause); + lj_assertG(g->gc.state == GCSfinalize || g->gc.state == GCSpause, + "bad GC state"); /* Now perform a full GC. */ g->gc.state = GCSpause; do { gc_onestep(L); } while (g->gc.state != GCSpause); @@ -758,9 +767,11 @@ void lj_gc_fullgc(lua_State *L) /* Move the GC propagation frontier forward. */ void lj_gc_barrierf(global_State *g, GCobj *o, GCobj *v) { - lua_assert(isblack(o) && iswhite(v) && !isdead(g, v) && !isdead(g, o)); - lua_assert(g->gc.state != GCSfinalize && g->gc.state != GCSpause); - lua_assert(o->gch.gct != ~LJ_TTAB); + lj_assertG(isblack(o) && iswhite(v) && !isdead(g, v) && !isdead(g, o), + "bad object states for forward barrier"); + lj_assertG(g->gc.state != GCSfinalize && g->gc.state != GCSpause, + "bad GC state"); + lj_assertG(o->gch.gct != ~LJ_TTAB, "barrier object is not a table"); /* Preserve invariant during propagation. Otherwise it doesn't matter. */ if (g->gc.state == GCSpropagate || g->gc.state == GCSatomic) gc_mark(g, v); /* Move frontier forward. */ @@ -797,7 +808,8 @@ void lj_gc_closeuv(global_State *g, GCupval *uv) lj_gc_barrierf(g, o, gcV(&uv->tv)); } else { makewhite(g, o); /* Make it white, i.e. sweep the upvalue. */ - lua_assert(g->gc.state != GCSfinalize && g->gc.state != GCSpause); + lj_assertG(g->gc.state != GCSfinalize && g->gc.state != GCSpause, + "bad GC state"); } } } @@ -817,12 +829,13 @@ void lj_gc_barriertrace(global_State *g, uint32_t traceno) void *lj_mem_realloc(lua_State *L, void *p, GCSize osz, GCSize nsz) { global_State *g = G(L); - lua_assert((osz == 0) == (p == NULL)); + lj_assertG((osz == 0) == (p == NULL), "realloc API violation"); p = g->allocf(g->allocd, p, osz, nsz); if (p == NULL && nsz > 0) lj_err_mem(L); - lua_assert((nsz == 0) == (p == NULL)); - lua_assert(checkptrGC(p)); + lj_assertG((nsz == 0) == (p == NULL), "allocf API violation"); + lj_assertG(checkptrGC(p), + "allocated memory address %p outside required range", p); g->gc.total = (g->gc.total - osz) + nsz; return p; } @@ -834,7 +847,8 @@ void * LJ_FASTCALL lj_mem_newgco(lua_State *L, GCSize size) GCobj *o = (GCobj *)g->allocf(g->allocd, NULL, 0, size); if (o == NULL) lj_err_mem(L); - lua_assert(checkptrGC(o)); + lj_assertG(checkptrGC(o), + "allocated memory address %p outside required range", o); g->gc.total += size; setgcrefr(o->gch.nextgc, g->gc.root); setgcref(g->gc.root, o); diff --git a/src/lj_gc.h b/src/lj_gc.h index 1725c6391..6fc88cf9f 100644 --- a/src/lj_gc.h +++ b/src/lj_gc.h @@ -81,8 +81,10 @@ LJ_FUNC void lj_gc_barriertrace(global_State *g, uint32_t traceno); static LJ_AINLINE void lj_gc_barrierback(global_State *g, GCtab *t) { GCobj *o = obj2gco(t); - lua_assert(isblack(o) && !isdead(g, o)); - lua_assert(g->gc.state != GCSfinalize && g->gc.state != GCSpause); + lj_assertG(isblack(o) && !isdead(g, o), + "bad object states for backward barrier"); + lj_assertG(g->gc.state != GCSfinalize && g->gc.state != GCSpause, + "bad GC state"); black2gray(o); setgcrefr(t->gclist, g->gc.grayagain); setgcref(g->gc.grayagain, o); diff --git a/src/lj_gdbjit.c b/src/lj_gdbjit.c index a20d9ae2a..69585e512 100644 --- a/src/lj_gdbjit.c +++ b/src/lj_gdbjit.c @@ -724,7 +724,7 @@ static void gdbjit_buildobj(GDBJITctx *ctx) SECTALIGN(ctx->p, sizeof(uintptr_t)); gdbjit_initsect(ctx, GDBJIT_SECT_eh_frame, gdbjit_ehframe); ctx->objsize = (size_t)((char *)ctx->p - (char *)obj); - lua_assert(ctx->objsize < sizeof(GDBJITobj)); + lj_assertX(ctx->objsize < sizeof(GDBJITobj), "GDBJITobj overflow"); } #undef SECTALIGN @@ -782,7 +782,8 @@ void lj_gdbjit_addtrace(jit_State *J, GCtrace *T) ctx.spadjp = CFRAME_SIZE_JIT + (MSize)(parent ? traceref(J, parent)->spadjust : 0); ctx.spadj = CFRAME_SIZE_JIT + T->spadjust; - lua_assert(startpc >= proto_bc(pt) && startpc < proto_bc(pt) + pt->sizebc); + lj_assertJ(startpc >= proto_bc(pt) && startpc < proto_bc(pt) + pt->sizebc, + "start PC out of range"); ctx.lineno = lj_debug_line(pt, proto_bcpos(pt, startpc)); ctx.filename = proto_chunknamestr(pt); if (*ctx.filename == '@' || *ctx.filename == '=') diff --git a/src/lj_ir.c b/src/lj_ir.c index 1dd25f234..600e432c9 100644 --- a/src/lj_ir.c +++ b/src/lj_ir.c @@ -38,7 +38,7 @@ #define fins (&J->fold.ins) /* Pass IR on to next optimization in chain (FOLD). */ -#define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J)) +#define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J)) /* -- IR tables ----------------------------------------------------------- */ @@ -90,8 +90,9 @@ static void lj_ir_growbot(jit_State *J) { IRIns *baseir = J->irbuf + J->irbotlim; MSize szins = J->irtoplim - J->irbotlim; - lua_assert(szins != 0); - lua_assert(J->cur.nk == J->irbotlim || J->cur.nk-1 == J->irbotlim); + lj_assertJ(szins != 0, "zero IR size"); + lj_assertJ(J->cur.nk == J->irbotlim || J->cur.nk-1 == J->irbotlim, + "unexpected IR growth"); if (J->cur.nins + (szins >> 1) < J->irtoplim) { /* More than half of the buffer is free on top: shift up by a quarter. */ MSize ofs = szins >> 2; @@ -148,9 +149,10 @@ TRef lj_ir_call(jit_State *J, IRCallID id, ...) /* Load field of type t from GG_State + offset. Must be 32 bit aligned. */ LJ_FUNC TRef lj_ir_ggfload(jit_State *J, IRType t, uintptr_t ofs) { - lua_assert((ofs & 3) == 0); + lj_assertJ((ofs & 3) == 0, "unaligned GG_State field offset"); ofs >>= 2; - lua_assert(ofs >= IRFL__MAX && ofs <= 0x3ff); /* 10 bit FOLD key limit. */ + lj_assertJ(ofs >= IRFL__MAX && ofs <= 0x3ff, + "GG_State field offset breaks 10 bit FOLD key limit"); lj_ir_set(J, IRT(IR_FLOAD, t), REF_NIL, ofs); return lj_opt_fold(J); } @@ -181,7 +183,7 @@ static LJ_AINLINE IRRef ir_nextk(jit_State *J) static LJ_AINLINE IRRef ir_nextk64(jit_State *J) { IRRef ref = J->cur.nk - 2; - lua_assert(J->state != LJ_TRACE_ASM); + lj_assertJ(J->state != LJ_TRACE_ASM, "bad JIT state"); if (LJ_UNLIKELY(ref < J->irbotlim)) lj_ir_growbot(J); J->cur.nk = ref; return ref; @@ -277,7 +279,7 @@ TRef lj_ir_kgc(jit_State *J, GCobj *o, IRType t) { IRIns *ir, *cir = J->cur.ir; IRRef ref; - lua_assert(!isdead(J2G(J), o)); + lj_assertJ(!isdead(J2G(J), o), "interning of dead GC object"); for (ref = J->chain[IR_KGC]; ref; ref = cir[ref].prev) if (ir_kgc(&cir[ref]) == o) goto found; @@ -299,7 +301,7 @@ TRef lj_ir_ktrace(jit_State *J) { IRRef ref = ir_nextkgc(J); IRIns *ir = IR(ref); - lua_assert(irt_toitype_(IRT_P64) == LJ_TTRACE); + lj_assertJ(irt_toitype_(IRT_P64) == LJ_TTRACE, "mismatched type mapping"); ir->t.irt = IRT_P64; ir->o = LJ_GC64 ? IR_KNUM : IR_KNULL; /* Not IR_KGC yet, but same size. */ ir->op12 = 0; @@ -313,7 +315,7 @@ TRef lj_ir_kptr_(jit_State *J, IROp op, void *ptr) IRIns *ir, *cir = J->cur.ir; IRRef ref; #if LJ_64 && !LJ_GC64 - lua_assert((void *)(uintptr_t)u32ptr(ptr) == ptr); + lj_assertJ((void *)(uintptr_t)u32ptr(ptr) == ptr, "out-of-range GC pointer"); #endif for (ref = J->chain[op]; ref; ref = cir[ref].prev) if (ir_kptr(&cir[ref]) == ptr) @@ -360,7 +362,8 @@ TRef lj_ir_kslot(jit_State *J, TRef key, IRRef slot) IRRef2 op12 = IRREF2((IRRef1)key, (IRRef1)slot); IRRef ref; /* Const part is not touched by CSE/DCE, so 0-65535 is ok for IRMlit here. */ - lua_assert(tref_isk(key) && slot == (IRRef)(IRRef1)slot); + lj_assertJ(tref_isk(key) && slot == (IRRef)(IRRef1)slot, + "out-of-range key/slot"); for (ref = J->chain[IR_KSLOT]; ref; ref = cir[ref].prev) if (cir[ref].op12 == op12) goto found; @@ -381,7 +384,7 @@ TRef lj_ir_kslot(jit_State *J, TRef key, IRRef slot) void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir) { UNUSED(L); - lua_assert(ir->o != IR_KSLOT); /* Common mistake. */ + lj_assertL(ir->o != IR_KSLOT, "unexpected KSLOT"); /* Common mistake. */ switch (ir->o) { case IR_KPRI: setpriV(tv, irt_toitype(ir->t)); break; case IR_KINT: setintV(tv, ir->i); break; @@ -397,7 +400,7 @@ void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir) break; } #endif - default: lua_assert(0); break; + default: lj_assertL(0, "bad IR constant op %d", ir->o); break; } } @@ -457,7 +460,7 @@ int lj_ir_numcmp(lua_Number a, lua_Number b, IROp op) case IR_UGE: return !(a < b); case IR_ULE: return !(a > b); case IR_UGT: return !(a <= b); - default: lua_assert(0); return 0; + default: lj_assertX(0, "bad IR op %d", op); return 0; } } @@ -470,7 +473,7 @@ int lj_ir_strcmp(GCstr *a, GCstr *b, IROp op) case IR_GE: return (res >= 0); case IR_LE: return (res <= 0); case IR_GT: return (res > 0); - default: lua_assert(0); return 0; + default: lj_assertX(0, "bad IR op %d", op); return 0; } } diff --git a/src/lj_ir.h b/src/lj_ir.h index a801d5d0e..6116f7e52 100644 --- a/src/lj_ir.h +++ b/src/lj_ir.h @@ -413,11 +413,12 @@ static LJ_AINLINE IRType itype2irt(const TValue *tv) static LJ_AINLINE uint32_t irt_toitype_(IRType t) { - lua_assert(!LJ_64 || LJ_GC64 || t != IRT_LIGHTUD); + lj_assertX(!LJ_64 || LJ_GC64 || t != IRT_LIGHTUD, + "no plain type tag for lightuserdata"); if (LJ_DUALNUM && t > IRT_NUM) { return LJ_TISNUM; } else { - lua_assert(t <= IRT_NUM); + lj_assertX(t <= IRT_NUM, "no plain type tag for IR type %d", t); return ~(uint32_t)t; } } diff --git a/src/lj_jit.h b/src/lj_jit.h index a9c602f07..fa754b64a 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h @@ -510,6 +510,12 @@ LJ_ALIGN(16) /* For DISPATCH-relative addresses in assembler part. */ #endif jit_State; +#ifdef LUA_USE_ASSERT +#define lj_assertJ(c, ...) lj_assertG_(J2G(J), (c), __VA_ARGS__) +#else +#define lj_assertJ(c, ...) ((void)J) +#endif + /* Trivial PRNG e.g. used for penalty randomization. */ static LJ_AINLINE uint32_t LJ_PRNG_BITS(jit_State *J, int bits) { diff --git a/src/lj_lex.c b/src/lj_lex.c index ada0876ec..61c7ff434 100644 --- a/src/lj_lex.c +++ b/src/lj_lex.c @@ -82,7 +82,7 @@ static LJ_AINLINE LexChar lex_savenext(LexState *ls) static void lex_newline(LexState *ls) { LexChar old = ls->c; - lua_assert(lex_iseol(ls)); + lj_assertLS(lex_iseol(ls), "bad usage"); lex_next(ls); /* Skip "\n" or "\r". */ if (lex_iseol(ls) && ls->c != old) lex_next(ls); /* Skip "\n\r" or "\r\n". */ if (++ls->linenumber >= LJ_MAX_LINE) @@ -96,7 +96,7 @@ static void lex_number(LexState *ls, TValue *tv) { StrScanFmt fmt; LexChar c, xp = 'e'; - lua_assert(lj_char_isdigit(ls->c)); + lj_assertLS(lj_char_isdigit(ls->c), "bad usage"); if ((c = ls->c) == '0' && (lex_savenext(ls) | 0x20) == 'x') xp = 'p'; while (lj_char_isident(ls->c) || ls->c == '.' || @@ -116,7 +116,8 @@ static void lex_number(LexState *ls, TValue *tv) } else if (fmt != STRSCAN_ERROR) { lua_State *L = ls->L; GCcdata *cd; - lua_assert(fmt == STRSCAN_I64 || fmt == STRSCAN_U64 || fmt == STRSCAN_IMAG); + lj_assertLS(fmt == STRSCAN_I64 || fmt == STRSCAN_U64 || fmt == STRSCAN_IMAG, + "unexpected number format %d", fmt); if (!ctype_ctsG(G(L))) { ptrdiff_t oldtop = savestack(L, L->top); luaopen_ffi(L); /* Load FFI library on-demand. */ @@ -133,7 +134,8 @@ static void lex_number(LexState *ls, TValue *tv) lj_parse_keepcdata(ls, tv, cd); #endif } else { - lua_assert(fmt == STRSCAN_ERROR); + lj_assertLS(fmt == STRSCAN_ERROR, + "unexpected number format %d", fmt); lj_lex_error(ls, TK_number, LJ_ERR_XNUMBER); } } @@ -143,7 +145,7 @@ static int lex_skipeq(LexState *ls) { int count = 0; LexChar s = ls->c; - lua_assert(s == '[' || s == ']'); + lj_assertLS(s == '[' || s == ']', "bad usage"); while (lex_savenext(ls) == '=' && count < 0x20000000) count++; return (ls->c == s) ? count : (-count) - 1; @@ -469,7 +471,7 @@ void lj_lex_next(LexState *ls) /* Look ahead for the next token. */ LexToken lj_lex_lookahead(LexState *ls) { - lua_assert(ls->lookahead == TK_eof); + lj_assertLS(ls->lookahead == TK_eof, "double lookahead"); ls->lookahead = lex_scan(ls, &ls->lookaheadval); return ls->lookahead; } diff --git a/src/lj_lex.h b/src/lj_lex.h index 8665aa2a5..e1b5610ba 100644 --- a/src/lj_lex.h +++ b/src/lj_lex.h @@ -84,4 +84,10 @@ LJ_FUNC const char *lj_lex_token2str(LexState *ls, LexToken tok); LJ_FUNC_NORET void lj_lex_error(LexState *ls, LexToken tok, ErrMsg em, ...); LJ_FUNC void lj_lex_init(lua_State *L); +#ifdef LUA_USE_ASSERT +#define lj_assertLS(c, ...) (lj_assertG_(G(ls->L), (c), __VA_ARGS__)) +#else +#define lj_assertLS(c, ...) ((void)ls) +#endif + #endif diff --git a/src/lj_load.c b/src/lj_load.c index 746bf428c..e5918c04e 100644 --- a/src/lj_load.c +++ b/src/lj_load.c @@ -159,7 +159,7 @@ LUALIB_API int luaL_loadstring(lua_State *L, const char *s) LUA_API int lua_dump(lua_State *L, lua_Writer writer, void *data) { cTValue *o = L->top-1; - api_check(L, L->top > L->base); + lj_checkapi(L->top > L->base, "top slot empty"); if (tvisfunc(o) && isluafunc(funcV(o))) return lj_bcwrite(L, funcproto(funcV(o)), writer, data, 0); else diff --git a/src/lj_mcode.c b/src/lj_mcode.c index bc3e922fc..e64c5878f 100644 --- a/src/lj_mcode.c +++ b/src/lj_mcode.c @@ -351,7 +351,7 @@ MCode *lj_mcode_patch(jit_State *J, MCode *ptr, int finish) /* Otherwise search through the list of MCode areas. */ for (;;) { mc = ((MCLink *)mc)->next; - lua_assert(mc != NULL); + lj_assertJ(mc != NULL, "broken MCode area chain"); if (ptr >= mc && ptr < (MCode *)((char *)mc + ((MCLink *)mc)->size)) { if (LJ_UNLIKELY(mcode_setprot(mc, ((MCLink *)mc)->size, MCPROT_GEN))) mcode_protfail(J); diff --git a/src/lj_meta.c b/src/lj_meta.c index 7391ff00f..2cdb6a0f0 100644 --- a/src/lj_meta.c +++ b/src/lj_meta.c @@ -47,7 +47,7 @@ void lj_meta_init(lua_State *L) cTValue *lj_meta_cache(GCtab *mt, MMS mm, GCstr *name) { cTValue *mo = lj_tab_getstr(mt, name); - lua_assert(mm <= MM_FAST); + lj_assertX(mm <= MM_FAST, "bad metamethod %d", mm); if (!mo || tvisnil(mo)) { /* No metamethod? */ mt->nomm |= (uint8_t)(1u<k, cTValue)[bc_d(ins)]; } else { - lua_assert(op == BC_ISEQP); + lj_assertL(op == BC_ISEQP, "bad bytecode op %d", op); setpriV(&tv, ~bc_d(ins)); o2 = &tv; } @@ -426,7 +426,7 @@ void lj_meta_istype(lua_State *L, BCReg ra, BCReg tp) { L->top = curr_topL(L); ra++; tp--; - lua_assert(LJ_DUALNUM || tp != ~LJ_TNUMX); /* ISTYPE -> ISNUM broken. */ + lj_assertL(LJ_DUALNUM || tp != ~LJ_TNUMX, "bad type for ISTYPE"); if (LJ_DUALNUM && tp == ~LJ_TNUMX) lj_lib_checkint(L, ra); else if (tp == ~LJ_TNUMX+1) lj_lib_checknum(L, ra); else if (tp == ~LJ_TSTR) lj_lib_checkstr(L, ra); diff --git a/src/lj_obj.h b/src/lj_obj.h index 7d5829491..c0ae68924 100644 --- a/src/lj_obj.h +++ b/src/lj_obj.h @@ -679,6 +679,11 @@ struct lua_State { #define curr_topL(L) (L->base + curr_proto(L)->framesize) #define curr_top(L) (curr_funcisL(L) ? curr_topL(L) : L->top) +#if defined(LUA_USE_ASSERT) || defined(LUA_USE_APICHECK) +LJ_FUNC_NORET void lj_assert_fail(global_State *g, const char *file, int line, + const char *func, const char *fmt, ...); +#endif + /* -- GC object definition and conversions -------------------------------- */ /* GC header for generic access to common fields of GC objects. */ @@ -732,10 +737,6 @@ typedef union GCobj { /* -- TValue getters/setters ---------------------------------------------- */ -#ifdef LUA_USE_ASSERT -#include "lj_gc.h" -#endif - /* Macros to test types. */ #if LJ_GC64 #define itype(o) ((uint32_t)((o)->it64 >> 47)) @@ -856,9 +857,19 @@ static LJ_AINLINE void setlightudV(TValue *o, void *p) #define setcont(o, f) setlightudV((o), contptr(f)) #endif -#define tvchecklive(L, o) \ - UNUSED(L), lua_assert(!tvisgcv(o) || \ - ((~itype(o) == gcval(o)->gch.gct) && !isdead(G(L), gcval(o)))) +static LJ_AINLINE void checklivetv(lua_State *L, TValue *o, const char *msg) +{ + UNUSED(L); UNUSED(o); UNUSED(msg); +#if LUA_USE_ASSERT + if (tvisgcv(o)) { + lj_assertL(~itype(o) == gcval(o)->gch.gct, + "mismatch of TValue type %d vs GC type %d", + ~itype(o), gcval(o)->gch.gct); + /* Copy of isdead check from lj_gc.h to avoid circular include. */ + lj_assertL(!(gcval(o)->gch.marked & (G(L)->gc.currentwhite ^ 3) & 3), msg); + } +#endif +} static LJ_AINLINE void setgcVraw(TValue *o, GCobj *v, uint32_t itype) { @@ -871,7 +882,8 @@ static LJ_AINLINE void setgcVraw(TValue *o, GCobj *v, uint32_t itype) static LJ_AINLINE void setgcV(lua_State *L, TValue *o, GCobj *v, uint32_t it) { - setgcVraw(o, v, it); tvchecklive(L, o); + setgcVraw(o, v, it); + checklivetv(L, o, "store to dead GC object"); } #define define_setV(name, type, tag) \ @@ -918,7 +930,8 @@ static LJ_AINLINE void setint64V(TValue *o, int64_t i) /* Copy tagged values. */ static LJ_AINLINE void copyTV(lua_State *L, TValue *o1, const TValue *o2) { - *o1 = *o2; tvchecklive(L, o1); + *o1 = *o2; + checklivetv(L, o1, "copy of dead GC object"); } /* -- Number to integer conversion ---------------------------------------- */ diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index 42c57c9b0..96f272b84 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c @@ -282,7 +282,7 @@ static int32_t kfold_intop(int32_t k1, int32_t k2, IROp op) case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 31)); break; case IR_MIN: k1 = k1 < k2 ? k1 : k2; break; case IR_MAX: k1 = k1 > k2 ? k1 : k2; break; - default: lua_assert(0); break; + default: lj_assertX(0, "bad IR op %d", op); break; } return k1; } @@ -354,7 +354,7 @@ LJFOLDF(kfold_intcomp) case IR_ULE: return CONDFOLD((uint32_t)a <= (uint32_t)b); case IR_ABC: case IR_UGT: return CONDFOLD((uint32_t)a > (uint32_t)b); - default: lua_assert(0); return FAILFOLD; + default: lj_assertJ(0, "bad IR op %d", fins->o); return FAILFOLD; } } @@ -368,10 +368,12 @@ LJFOLDF(kfold_intcomp0) /* -- Constant folding for 64 bit integers -------------------------------- */ -static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op) +static uint64_t kfold_int64arith(jit_State *J, uint64_t k1, uint64_t k2, + IROp op) { - switch (op) { + UNUSED(J); #if LJ_HASFFI + switch (op) { case IR_ADD: k1 += k2; break; case IR_SUB: k1 -= k2; break; case IR_MUL: k1 *= k2; break; @@ -383,9 +385,12 @@ static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op) case IR_BSAR: k1 >>= (k2 & 63); break; case IR_BROL: k1 = (int32_t)lj_rol((uint32_t)k1, (k2 & 63)); break; case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 63)); break; -#endif - default: UNUSED(k2); lua_assert(0); break; + default: lj_assertJ(0, "bad IR op %d", op); break; } +#else + UNUSED(k2); UNUSED(op); + lj_assertJ(0, "FFI IR op without FFI"); +#endif return k1; } @@ -397,7 +402,7 @@ LJFOLD(BOR KINT64 KINT64) LJFOLD(BXOR KINT64 KINT64) LJFOLDF(kfold_int64arith) { - return INT64FOLD(kfold_int64arith(ir_k64(fleft)->u64, + return INT64FOLD(kfold_int64arith(J, ir_k64(fleft)->u64, ir_k64(fright)->u64, (IROp)fins->o)); } @@ -419,7 +424,7 @@ LJFOLDF(kfold_int64arith2) } return INT64FOLD(k1); #else - UNUSED(J); lua_assert(0); return FAILFOLD; + UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; #endif } @@ -435,7 +440,7 @@ LJFOLDF(kfold_int64shift) int32_t sh = (fright->i & 63); return INT64FOLD(lj_carith_shift64(k, sh, fins->o - IR_BSHL)); #else - UNUSED(J); lua_assert(0); return FAILFOLD; + UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; #endif } @@ -445,7 +450,7 @@ LJFOLDF(kfold_bnot64) #if LJ_HASFFI return INT64FOLD(~ir_k64(fleft)->u64); #else - UNUSED(J); lua_assert(0); return FAILFOLD; + UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; #endif } @@ -455,7 +460,7 @@ LJFOLDF(kfold_bswap64) #if LJ_HASFFI return INT64FOLD(lj_bswap64(ir_k64(fleft)->u64)); #else - UNUSED(J); lua_assert(0); return FAILFOLD; + UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; #endif } @@ -480,10 +485,10 @@ LJFOLDF(kfold_int64comp) case IR_UGE: return CONDFOLD(a >= b); case IR_ULE: return CONDFOLD(a <= b); case IR_UGT: return CONDFOLD(a > b); - default: lua_assert(0); return FAILFOLD; + default: lj_assertJ(0, "bad IR op %d", fins->o); return FAILFOLD; } #else - UNUSED(J); lua_assert(0); return FAILFOLD; + UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; #endif } @@ -495,7 +500,7 @@ LJFOLDF(kfold_int64comp0) return DROPFOLD; return NEXTFOLD; #else - UNUSED(J); lua_assert(0); return FAILFOLD; + UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; #endif } @@ -520,7 +525,7 @@ LJFOLD(STRREF KGC KINT) LJFOLDF(kfold_strref) { GCstr *str = ir_kstr(fleft); - lua_assert((MSize)fright->i <= str->len); + lj_assertJ((MSize)fright->i <= str->len, "bad string ref"); return lj_ir_kkptr(J, (char *)strdata(str) + fright->i); } @@ -616,8 +621,9 @@ LJFOLDF(bufput_kgc) LJFOLD(BUFSTR any any) LJFOLDF(bufstr_kfold_cse) { - lua_assert(fleft->o == IR_BUFHDR || fleft->o == IR_BUFPUT || - fleft->o == IR_CALLL); + lj_assertJ(fleft->o == IR_BUFHDR || fleft->o == IR_BUFPUT || + fleft->o == IR_CALLL, + "bad buffer constructor IR op %d", fleft->o); if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) { if (fleft->o == IR_BUFHDR) { /* No put operations? */ if (!(fleft->op2 & IRBUFHDR_APPEND)) /* Empty buffer? */ @@ -637,8 +643,9 @@ LJFOLDF(bufstr_kfold_cse) while (ref) { IRIns *irs = IR(ref), *ira = fleft, *irb = IR(irs->op1); while (ira->o == irb->o && ira->op2 == irb->op2) { - lua_assert(ira->o == IR_BUFHDR || ira->o == IR_BUFPUT || - ira->o == IR_CALLL || ira->o == IR_CARG); + lj_assertJ(ira->o == IR_BUFHDR || ira->o == IR_BUFPUT || + ira->o == IR_CALLL || ira->o == IR_CARG, + "bad buffer constructor IR op %d", ira->o); if (ira->o == IR_BUFHDR && !(ira->op2 & IRBUFHDR_APPEND)) return ref; /* CSE succeeded. */ if (ira->o == IR_CALLL && ira->op2 == IRCALL_lj_buf_puttab) @@ -697,7 +704,7 @@ LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfchar) LJFOLDF(bufput_kfold_fmt) { IRIns *irc = IR(fleft->op1); - lua_assert(irref_isk(irc->op2)); /* SFormat must be const. */ + lj_assertJ(irref_isk(irc->op2), "SFormat must be const"); if (irref_isk(fleft->op2)) { SFormat sf = (SFormat)IR(irc->op2)->i; IRIns *ira = IR(fleft->op2); @@ -1216,10 +1223,10 @@ LJFOLDF(simplify_tobit_conv) { /* Fold even across PHI to avoid expensive num->int conversions in loop. */ if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT) { - lua_assert(irt_isnum(fleft->t)); + lj_assertJ(irt_isnum(fleft->t), "expected TOBIT number arg"); return fleft->op1; } else if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) { - lua_assert(irt_isnum(fleft->t)); + lj_assertJ(irt_isnum(fleft->t), "expected TOBIT number arg"); fins->o = IR_CONV; fins->op1 = fleft->op1; fins->op2 = (IRT_INT<<5)|IRT_U32; @@ -1259,7 +1266,7 @@ LJFOLDF(simplify_conv_sext) /* Use scalar evolution analysis results to strength-reduce sign-extension. */ if (ref == J->scev.idx) { IRRef lo = J->scev.dir ? J->scev.start : J->scev.stop; - lua_assert(irt_isint(J->scev.t)); + lj_assertJ(irt_isint(J->scev.t), "only int SCEV supported"); if (lo && IR(lo)->o == IR_KINT && IR(lo)->i + ofs >= 0) { ok_reduce: #if LJ_TARGET_X64 @@ -1335,7 +1342,8 @@ LJFOLDF(narrow_convert) /* Narrowing ignores PHIs and repeating it inside the loop is not useful. */ if (J->chain[IR_LOOP]) return NEXTFOLD; - lua_assert(fins->o != IR_CONV || (fins->op2&IRCONV_CONVMASK) != IRCONV_TOBIT); + lj_assertJ(fins->o != IR_CONV || (fins->op2&IRCONV_CONVMASK) != IRCONV_TOBIT, + "unexpected CONV TOBIT"); return lj_opt_narrow_convert(J); } @@ -1441,7 +1449,7 @@ LJFOLDF(simplify_intmul_k64) return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64); return NEXTFOLD; #else - UNUSED(J); lua_assert(0); return FAILFOLD; + UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; #endif } @@ -1449,7 +1457,7 @@ LJFOLD(MOD any KINT) LJFOLDF(simplify_intmod_k) { int32_t k = fright->i; - lua_assert(k != 0); + lj_assertJ(k != 0, "integer mod 0"); if (k > 0 && (k & (k-1)) == 0) { /* i % (2^k) ==> i & (2^k-1) */ fins->o = IR_BAND; fins->op2 = lj_ir_kint(J, k-1); @@ -1699,7 +1707,8 @@ LJFOLDF(simplify_shiftk_andk) fins->ot = IRTI(IR_BAND); return RETRYFOLD; } else if (irk->o == IR_KINT64) { - uint64_t k = kfold_int64arith(ir_k64(irk)->u64, fright->i, (IROp)fins->o); + uint64_t k = kfold_int64arith(J, ir_k64(irk)->u64, fright->i, + (IROp)fins->o); IROpT ot = fleft->ot; fins->op1 = fleft->op1; fins->op1 = (IRRef1)lj_opt_fold(J); @@ -1747,8 +1756,8 @@ LJFOLDF(simplify_andor_k64) IRIns *irk = IR(fleft->op2); PHIBARRIER(fleft); if (irk->o == IR_KINT64) { - uint64_t k = kfold_int64arith(ir_k64(irk)->u64, - ir_k64(fright)->u64, (IROp)fins->o); + uint64_t k = kfold_int64arith(J, ir_k64(irk)->u64, ir_k64(fright)->u64, + (IROp)fins->o); /* (i | k1) & k2 ==> i & k2, if (k1 & k2) == 0. */ /* (i & k1) | k2 ==> i | k2, if (k1 | k2) == -1. */ if (k == (fins->o == IR_BAND ? (uint64_t)0 : ~(uint64_t)0)) { @@ -1758,7 +1767,7 @@ LJFOLDF(simplify_andor_k64) } return NEXTFOLD; #else - UNUSED(J); lua_assert(0); return FAILFOLD; + UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; #endif } @@ -1794,8 +1803,8 @@ LJFOLDF(reassoc_intarith_k64) #if LJ_HASFFI IRIns *irk = IR(fleft->op2); if (irk->o == IR_KINT64) { - uint64_t k = kfold_int64arith(ir_k64(irk)->u64, - ir_k64(fright)->u64, (IROp)fins->o); + uint64_t k = kfold_int64arith(J, ir_k64(irk)->u64, ir_k64(fright)->u64, + (IROp)fins->o); PHIBARRIER(fleft); fins->op1 = fleft->op1; fins->op2 = (IRRef1)lj_ir_kint64(J, k); @@ -1803,7 +1812,7 @@ LJFOLDF(reassoc_intarith_k64) } return NEXTFOLD; #else - UNUSED(J); lua_assert(0); return FAILFOLD; + UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; #endif } @@ -2058,7 +2067,7 @@ LJFOLDF(merge_eqne_snew_kgc) { GCstr *kstr = ir_kstr(fright); int32_t len = (int32_t)kstr->len; - lua_assert(irt_isstr(fins->t)); + lj_assertJ(irt_isstr(fins->t), "bad equality IR type"); #if LJ_TARGET_UNALIGNED #define FOLD_SNEW_MAX_LEN 4 /* Handle string lengths 0, 1, 2, 3, 4. */ @@ -2122,7 +2131,7 @@ LJFOLD(HLOAD KKPTR) LJFOLDF(kfold_hload_kkptr) { UNUSED(J); - lua_assert(ir_kptr(fleft) == niltvg(J2G(J))); + lj_assertJ(ir_kptr(fleft) == niltvg(J2G(J)), "expected niltv"); return TREF_NIL; } @@ -2333,7 +2342,7 @@ LJFOLDF(fwd_sload) TRef tr = lj_opt_cse(J); return tref_ref(tr) < J->chain[IR_RETF] ? EMITFOLD : tr; } else { - lua_assert(J->slot[fins->op1] != 0); + lj_assertJ(J->slot[fins->op1] != 0, "uninitialized slot accessed"); return J->slot[fins->op1]; } } @@ -2448,8 +2457,9 @@ TRef LJ_FASTCALL lj_opt_fold(jit_State *J) IRRef ref; if (LJ_UNLIKELY((J->flags & JIT_F_OPT_MASK) != JIT_F_OPT_DEFAULT)) { - lua_assert(((JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE|JIT_F_OPT_DSE) | - JIT_F_OPT_DEFAULT) == JIT_F_OPT_DEFAULT); + lj_assertJ(((JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE|JIT_F_OPT_DSE) | + JIT_F_OPT_DEFAULT) == JIT_F_OPT_DEFAULT, + "bad JIT_F_OPT_DEFAULT"); /* Folding disabled? Chain to CSE, but not for loads/stores/allocs. */ if (!(J->flags & JIT_F_OPT_FOLD) && irm_kind(lj_ir_mode[fins->o]) == IRM_N) return lj_opt_cse(J); @@ -2511,7 +2521,7 @@ TRef LJ_FASTCALL lj_opt_fold(jit_State *J) return lj_ir_kint(J, fins->i); if (ref == FAILFOLD) lj_trace_err(J, LJ_TRERR_GFAIL); - lua_assert(ref == DROPFOLD); + lj_assertJ(ref == DROPFOLD, "bad fold result"); return REF_DROP; } diff --git a/src/lj_opt_loop.c b/src/lj_opt_loop.c index 2eacb7d7b..0e5189cdc 100644 --- a/src/lj_opt_loop.c +++ b/src/lj_opt_loop.c @@ -299,7 +299,8 @@ static void loop_unroll(LoopState *lps) loopmap = &J->cur.snapmap[loopsnap->mapofs]; /* The PC of snapshot #0 and the loop snapshot must match. */ psentinel = &loopmap[loopsnap->nent]; - lua_assert(*psentinel == J->cur.snapmap[J->cur.snap[0].nent]); + lj_assertJ(*psentinel == J->cur.snapmap[J->cur.snap[0].nent], + "mismatched PC for loop snapshot"); *psentinel = SNAP(255, 0, 0); /* Replace PC with temporary sentinel. */ /* Start substitution with snapshot #1 (#0 is empty for root traces). */ @@ -372,7 +373,7 @@ static void loop_unroll(LoopState *lps) } if (!irt_isguard(J->guardemit)) /* Drop redundant snapshot. */ J->cur.nsnapmap = (uint32_t)J->cur.snap[--J->cur.nsnap].mapofs; - lua_assert(J->cur.nsnapmap <= J->sizesnapmap); + lj_assertJ(J->cur.nsnapmap <= J->sizesnapmap, "bad snapshot map index"); *psentinel = J->cur.snapmap[J->cur.snap[0].nent]; /* Restore PC. */ loop_emit_phi(J, subst, phi, nphi, onsnap); diff --git a/src/lj_opt_mem.c b/src/lj_opt_mem.c index 4c2c05fe9..80517f163 100644 --- a/src/lj_opt_mem.c +++ b/src/lj_opt_mem.c @@ -18,6 +18,7 @@ #include "lj_jit.h" #include "lj_iropt.h" #include "lj_ircall.h" +#include "lj_dispatch.h" /* Some local macros to save typing. Undef'd at the end. */ #define IR(ref) (&J->cur.ir[(ref)]) @@ -56,8 +57,8 @@ static AliasRet aa_table(jit_State *J, IRRef ta, IRRef tb) { IRIns *taba = IR(ta), *tabb = IR(tb); int newa, newb; - lua_assert(ta != tb); - lua_assert(irt_istab(taba->t) && irt_istab(tabb->t)); + lj_assertJ(ta != tb, "bad usage"); + lj_assertJ(irt_istab(taba->t) && irt_istab(tabb->t), "bad usage"); /* Disambiguate new allocations. */ newa = (taba->o == IR_TNEW || taba->o == IR_TDUP); newb = (tabb->o == IR_TNEW || tabb->o == IR_TDUP); @@ -99,7 +100,7 @@ static AliasRet aa_ahref(jit_State *J, IRIns *refa, IRIns *refb) /* Disambiguate array references based on index arithmetic. */ int32_t ofsa = 0, ofsb = 0; IRRef basea = ka, baseb = kb; - lua_assert(refb->o == IR_AREF); + lj_assertJ(refb->o == IR_AREF, "expected AREF"); /* Gather base and offset from t[base] or t[base+-ofs]. */ if (keya->o == IR_ADD && irref_isk(keya->op2)) { basea = keya->op1; @@ -117,8 +118,9 @@ static AliasRet aa_ahref(jit_State *J, IRIns *refa, IRIns *refb) return ALIAS_NO; /* t[base+-o1] vs. t[base+-o2] and o1 != o2. */ } else { /* Disambiguate hash references based on the type of their keys. */ - lua_assert((refa->o==IR_HREF || refa->o==IR_HREFK || refa->o==IR_NEWREF) && - (refb->o==IR_HREF || refb->o==IR_HREFK || refb->o==IR_NEWREF)); + lj_assertJ((refa->o==IR_HREF || refa->o==IR_HREFK || refa->o==IR_NEWREF) && + (refb->o==IR_HREF || refb->o==IR_HREFK || refb->o==IR_NEWREF), + "bad xREF IR op %d or %d", refa->o, refb->o); if (!irt_sametype(keya->t, keyb->t)) return ALIAS_NO; /* Different key types. */ } @@ -192,7 +194,8 @@ static TRef fwd_ahload(jit_State *J, IRRef xref) if (key->o == IR_KSLOT) key = IR(key->op1); lj_ir_kvalue(J->L, &keyv, key); tv = lj_tab_get(J->L, ir_ktab(IR(ir->op1)), &keyv); - lua_assert(itype2irt(tv) == irt_type(fins->t)); + lj_assertJ(itype2irt(tv) == irt_type(fins->t), + "mismatched type in constant table"); if (irt_isnum(fins->t)) return lj_ir_knum_u64(J, tv->u64); else if (LJ_DUALNUM && irt_isint(fins->t)) diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c index 94cce5827..a381d8d85 100644 --- a/src/lj_opt_narrow.c +++ b/src/lj_opt_narrow.c @@ -372,17 +372,17 @@ static IRRef narrow_conv_emit(jit_State *J, NarrowConv *nc) } else if (op == NARROW_CONV) { *sp++ = emitir_raw(convot, ref, convop2); /* Raw emit avoids a loop. */ } else if (op == NARROW_SEXT) { - lua_assert(sp >= nc->stack+1); + lj_assertJ(sp >= nc->stack+1, "stack underflow"); sp[-1] = emitir(IRT(IR_CONV, IRT_I64), sp[-1], (IRT_I64<<5)|IRT_INT|IRCONV_SEXT); } else if (op == NARROW_INT) { - lua_assert(next < last); + lj_assertJ(next < last, "missing arg to NARROW_INT"); *sp++ = nc->t == IRT_I64 ? lj_ir_kint64(J, (int64_t)(int32_t)*next++) : lj_ir_kint(J, *next++); } else { /* Regular IROpT. Pops two operands and pushes one result. */ IRRef mode = nc->mode; - lua_assert(sp >= nc->stack+2); + lj_assertJ(sp >= nc->stack+2, "stack underflow"); sp--; /* Omit some overflow checks for array indexing. See comments above. */ if ((mode & IRCONV_CONVMASK) == IRCONV_INDEX) { @@ -398,7 +398,7 @@ static IRRef narrow_conv_emit(jit_State *J, NarrowConv *nc) narrow_bpc_set(J, narrow_ref(ref), narrow_ref(sp[-1]), mode); } } - lua_assert(sp == nc->stack+1); + lj_assertJ(sp == nc->stack+1, "stack misalignment"); return nc->stack[0]; } @@ -452,7 +452,7 @@ static TRef narrow_stripov(jit_State *J, TRef tr, int lastop, IRRef mode) TRef LJ_FASTCALL lj_opt_narrow_index(jit_State *J, TRef tr) { IRIns *ir; - lua_assert(tref_isnumber(tr)); + lj_assertJ(tref_isnumber(tr), "expected number type"); if (tref_isnum(tr)) /* Conversion may be narrowed, too. See above. */ return emitir(IRTGI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_INDEX); /* Omit some overflow checks for array indexing. See comments above. */ @@ -499,7 +499,7 @@ TRef LJ_FASTCALL lj_opt_narrow_tobit(jit_State *J, TRef tr) /* Narrow C array index (overflow undefined). */ TRef LJ_FASTCALL lj_opt_narrow_cindex(jit_State *J, TRef tr) { - lua_assert(tref_isnumber(tr)); + lj_assertJ(tref_isnumber(tr), "expected number type"); if (tref_isnum(tr)) return emitir(IRT(IR_CONV, IRT_INTP), tr, (IRT_INTP<<5)|IRT_NUM|IRCONV_ANY); /* Undefined overflow semantics allow stripping of ADDOV, SUBOV and MULOV. */ @@ -627,9 +627,10 @@ static int narrow_forl(jit_State *J, cTValue *o) /* Narrow the FORL index type by looking at the runtime values. */ IRType lj_opt_narrow_forl(jit_State *J, cTValue *tv) { - lua_assert(tvisnumber(&tv[FORL_IDX]) && + lj_assertJ(tvisnumber(&tv[FORL_IDX]) && tvisnumber(&tv[FORL_STOP]) && - tvisnumber(&tv[FORL_STEP])); + tvisnumber(&tv[FORL_STEP]), + "expected number types"); /* Narrow only if the runtime values of start/stop/step are all integers. */ if (narrow_forl(J, &tv[FORL_IDX]) && narrow_forl(J, &tv[FORL_STOP]) && diff --git a/src/lj_opt_split.c b/src/lj_opt_split.c index 7925cfa5e..798a02cc6 100644 --- a/src/lj_opt_split.c +++ b/src/lj_opt_split.c @@ -235,7 +235,7 @@ static IRRef split_bitshift(jit_State *J, IRRef1 *hisubst, return split_emit(J, IRTI(IR_BOR), t1, t2); } else { IRRef t1 = ir->prev, t2; - lua_assert(op == IR_BSHR || op == IR_BSAR); + lj_assertJ(op == IR_BSHR || op == IR_BSAR, "bad usage"); nir->o = IR_BSHR; t2 = split_emit(J, IRTI(IR_BSHL), hi, lj_ir_kint(J, (-k&31))); ir->prev = split_emit(J, IRTI(IR_BOR), t1, t2); @@ -250,7 +250,7 @@ static IRRef split_bitshift(jit_State *J, IRRef1 *hisubst, ir->prev = lj_ir_kint(J, 0); return lo; } else { - lua_assert(op == IR_BSHR || op == IR_BSAR); + lj_assertJ(op == IR_BSHR || op == IR_BSAR, "bad usage"); if (k == 32) { J->cur.nins--; ir->prev = hi; @@ -429,7 +429,7 @@ static void split_ir(jit_State *J) hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); break; case IR_FLOAD: - lua_assert(ir->op1 == REF_NIL); + lj_assertJ(ir->op1 == REF_NIL, "expected FLOAD from GG_State"); hi = lj_ir_kint(J, *(int32_t*)((char*)J2GG(J) + ir->op2 + LJ_LE*4)); nir->op2 += LJ_BE*4; break; @@ -465,8 +465,9 @@ static void split_ir(jit_State *J) break; } #endif - lua_assert(st == IRT_INT || - (LJ_32 && LJ_HASFFI && (st == IRT_U32 || st == IRT_FLOAT))); + lj_assertJ(st == IRT_INT || + (LJ_32 && LJ_HASFFI && (st == IRT_U32 || st == IRT_FLOAT)), + "bad source type for CONV"); nir->o = IR_CALLN; #if LJ_32 && LJ_HASFFI nir->op2 = st == IRT_INT ? IRCALL_softfp_i2d : @@ -496,7 +497,8 @@ static void split_ir(jit_State *J) hi = nir->op2; break; default: - lua_assert(ir->o <= IR_NE || ir->o == IR_MIN || ir->o == IR_MAX); + lj_assertJ(ir->o <= IR_NE || ir->o == IR_MIN || ir->o == IR_MAX, + "bad IR op %d", ir->o); hi = split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP), hisubst[ir->op1], hisubst[ir->op2]); break; @@ -553,7 +555,7 @@ static void split_ir(jit_State *J) hi = split_bitshift(J, hisubst, oir, nir, ir); break; case IR_FLOAD: - lua_assert(ir->op2 == IRFL_CDATA_INT64); + lj_assertJ(ir->op2 == IRFL_CDATA_INT64, "only INT64 supported"); hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64_4); #if LJ_BE ir->prev = hi; hi = nref; @@ -619,7 +621,7 @@ static void split_ir(jit_State *J) hi = nir->op2; break; default: - lua_assert(ir->o <= IR_NE); /* Comparisons. */ + lj_assertJ(ir->o <= IR_NE, "bad IR op %d", ir->o); /* Comparisons. */ split_emit(J, IRTGI(IR_HIOP), hiref, hisubst[ir->op2]); break; } @@ -697,7 +699,7 @@ static void split_ir(jit_State *J) #if LJ_SOFTFP if (st == IRT_NUM || (LJ_32 && LJ_HASFFI && st == IRT_FLOAT)) { if (irt_isguard(ir->t)) { - lua_assert(st == IRT_NUM && irt_isint(ir->t)); + lj_assertJ(st == IRT_NUM && irt_isint(ir->t), "bad CONV types"); J->cur.nins--; ir->prev = split_num2int(J, nir->op1, hisubst[ir->op1], 1); } else { @@ -828,7 +830,7 @@ void lj_opt_split(jit_State *J) if (!J->needsplit) J->needsplit = split_needsplit(J); #else - lua_assert(J->needsplit >= split_needsplit(J)); /* Verify flag. */ + lj_assertJ(J->needsplit >= split_needsplit(J), "bad SPLIT state"); #endif if (J->needsplit) { int errcode = lj_vm_cpcall(J->L, NULL, J, cpsplit); diff --git a/src/lj_parse.c b/src/lj_parse.c index 33955ab8a..3ae05446e 100644 --- a/src/lj_parse.c +++ b/src/lj_parse.c @@ -163,6 +163,12 @@ LJ_STATIC_ASSERT((int)BC_MULVV-(int)BC_ADDVV == (int)OPR_MUL-(int)OPR_ADD); LJ_STATIC_ASSERT((int)BC_DIVVV-(int)BC_ADDVV == (int)OPR_DIV-(int)OPR_ADD); LJ_STATIC_ASSERT((int)BC_MODVV-(int)BC_ADDVV == (int)OPR_MOD-(int)OPR_ADD); +#ifdef LUA_USE_ASSERT +#define lj_assertFS(c, ...) (lj_assertG_(G(fs->L), (c), __VA_ARGS__)) +#else +#define lj_assertFS(c, ...) ((void)fs) +#endif + /* -- Error handling ------------------------------------------------------ */ LJ_NORET LJ_NOINLINE static void err_syntax(LexState *ls, ErrMsg em) @@ -200,7 +206,7 @@ static BCReg const_num(FuncState *fs, ExpDesc *e) { lua_State *L = fs->L; TValue *o; - lua_assert(expr_isnumk(e)); + lj_assertFS(expr_isnumk(e), "bad usage"); o = lj_tab_set(L, fs->kt, &e->u.nval); if (tvhaskslot(o)) return tvkslot(o); @@ -225,7 +231,7 @@ static BCReg const_gc(FuncState *fs, GCobj *gc, uint32_t itype) /* Add a string constant. */ static BCReg const_str(FuncState *fs, ExpDesc *e) { - lua_assert(expr_isstrk(e) || e->k == VGLOBAL); + lj_assertFS(expr_isstrk(e) || e->k == VGLOBAL, "bad usage"); return const_gc(fs, obj2gco(e->u.sval), LJ_TSTR); } @@ -313,7 +319,7 @@ static void jmp_patchins(FuncState *fs, BCPos pc, BCPos dest) { BCIns *jmp = &fs->bcbase[pc].ins; BCPos offset = dest-(pc+1)+BCBIAS_J; - lua_assert(dest != NO_JMP); + lj_assertFS(dest != NO_JMP, "uninitialized jump target"); if (offset > BCMAX_D) err_syntax(fs->ls, LJ_ERR_XJUMP); setbc_d(jmp, offset); @@ -362,7 +368,7 @@ static void jmp_patch(FuncState *fs, BCPos list, BCPos target) if (target == fs->pc) { jmp_tohere(fs, list); } else { - lua_assert(target < fs->pc); + lj_assertFS(target < fs->pc, "bad jump target"); jmp_patchval(fs, list, target, NO_REG, target); } } @@ -392,7 +398,7 @@ static void bcreg_free(FuncState *fs, BCReg reg) { if (reg >= fs->nactvar) { fs->freereg--; - lua_assert(reg == fs->freereg); + lj_assertFS(reg == fs->freereg, "bad regfree"); } } @@ -542,7 +548,7 @@ static void expr_toreg_nobranch(FuncState *fs, ExpDesc *e, BCReg reg) } else if (e->k <= VKTRUE) { ins = BCINS_AD(BC_KPRI, reg, const_pri(e)); } else { - lua_assert(e->k == VVOID || e->k == VJMP); + lj_assertFS(e->k == VVOID || e->k == VJMP, "bad expr type %d", e->k); return; } bcemit_INS(fs, ins); @@ -637,7 +643,7 @@ static void bcemit_store(FuncState *fs, ExpDesc *var, ExpDesc *e) ins = BCINS_AD(BC_GSET, ra, const_str(fs, var)); } else { BCReg ra, rc; - lua_assert(var->k == VINDEXED); + lj_assertFS(var->k == VINDEXED, "bad expr type %d", var->k); ra = expr_toanyreg(fs, e); rc = var->u.s.aux; if ((int32_t)rc < 0) { @@ -645,10 +651,12 @@ static void bcemit_store(FuncState *fs, ExpDesc *var, ExpDesc *e) } else if (rc > BCMAX_C) { ins = BCINS_ABC(BC_TSETB, ra, var->u.s.info, rc-(BCMAX_C+1)); } else { +#ifdef LUA_USE_ASSERT /* Free late alloced key reg to avoid assert on free of value reg. */ /* This can only happen when called from expr_table(). */ - lua_assert(e->k != VNONRELOC || ra < fs->nactvar || - rc < ra || (bcreg_free(fs, rc),1)); + if (e->k == VNONRELOC && ra >= fs->nactvar && rc >= ra) + bcreg_free(fs, rc); +#endif ins = BCINS_ABC(BC_TSETV, ra, var->u.s.info, rc); } } @@ -663,7 +671,7 @@ static void bcemit_method(FuncState *fs, ExpDesc *e, ExpDesc *key) expr_free(fs, e); func = fs->freereg; bcemit_AD(fs, BC_MOV, func+1+LJ_FR2, obj); /* Copy object to 1st argument. */ - lua_assert(expr_isstrk(key)); + lj_assertFS(expr_isstrk(key), "bad usage"); idx = const_str(fs, key); if (idx <= BCMAX_C) { bcreg_reserve(fs, 2+LJ_FR2); @@ -803,7 +811,8 @@ static void bcemit_arith(FuncState *fs, BinOpr opr, ExpDesc *e1, ExpDesc *e2) else rc = expr_toanyreg(fs, e2); /* 1st operand discharged by bcemit_binop_left, but need KNUM/KSHORT. */ - lua_assert(expr_isnumk(e1) || e1->k == VNONRELOC); + lj_assertFS(expr_isnumk(e1) || e1->k == VNONRELOC, + "bad expr type %d", e1->k); expr_toval(fs, e1); /* Avoid two consts to satisfy bytecode constraints. */ if (expr_isnumk(e1) && !expr_isnumk(e2) && @@ -891,19 +900,20 @@ static void bcemit_binop(FuncState *fs, BinOpr op, ExpDesc *e1, ExpDesc *e2) if (op <= OPR_POW) { bcemit_arith(fs, op, e1, e2); } else if (op == OPR_AND) { - lua_assert(e1->t == NO_JMP); /* List must be closed. */ + lj_assertFS(e1->t == NO_JMP, "jump list not closed"); expr_discharge(fs, e2); jmp_append(fs, &e2->f, e1->f); *e1 = *e2; } else if (op == OPR_OR) { - lua_assert(e1->f == NO_JMP); /* List must be closed. */ + lj_assertFS(e1->f == NO_JMP, "jump list not closed"); expr_discharge(fs, e2); jmp_append(fs, &e2->t, e1->t); *e1 = *e2; } else if (op == OPR_CONCAT) { expr_toval(fs, e2); if (e2->k == VRELOCABLE && bc_op(*bcptr(fs, e2)) == BC_CAT) { - lua_assert(e1->u.s.info == bc_b(*bcptr(fs, e2))-1); + lj_assertFS(e1->u.s.info == bc_b(*bcptr(fs, e2))-1, + "bad CAT stack layout"); expr_free(fs, e1); setbc_b(bcptr(fs, e2), e1->u.s.info); e1->u.s.info = e2->u.s.info; @@ -915,8 +925,9 @@ static void bcemit_binop(FuncState *fs, BinOpr op, ExpDesc *e1, ExpDesc *e2) } e1->k = VRELOCABLE; } else { - lua_assert(op == OPR_NE || op == OPR_EQ || - op == OPR_LT || op == OPR_GE || op == OPR_LE || op == OPR_GT); + lj_assertFS(op == OPR_NE || op == OPR_EQ || + op == OPR_LT || op == OPR_GE || op == OPR_LE || op == OPR_GT, + "bad binop %d", op); bcemit_comp(fs, op, e1, e2); } } @@ -945,10 +956,10 @@ static void bcemit_unop(FuncState *fs, BCOp op, ExpDesc *e) e->u.s.info = fs->freereg-1; e->k = VNONRELOC; } else { - lua_assert(e->k == VNONRELOC); + lj_assertFS(e->k == VNONRELOC, "bad expr type %d", e->k); } } else { - lua_assert(op == BC_UNM || op == BC_LEN); + lj_assertFS(op == BC_UNM || op == BC_LEN, "bad unop %d", op); if (op == BC_UNM && !expr_hasjump(e)) { /* Constant-fold negations. */ #if LJ_HASFFI if (e->k == VKCDATA) { /* Fold in-place since cdata is not interned. */ @@ -1043,8 +1054,9 @@ static void var_new(LexState *ls, BCReg n, GCstr *name) lj_lex_error(ls, 0, LJ_ERR_XLIMC, LJ_MAX_VSTACK); lj_mem_growvec(ls->L, ls->vstack, ls->sizevstack, LJ_MAX_VSTACK, VarInfo); } - lua_assert((uintptr_t)name < VARNAME__MAX || - lj_tab_getstr(fs->kt, name) != NULL); + lj_assertFS((uintptr_t)name < VARNAME__MAX || + lj_tab_getstr(fs->kt, name) != NULL, + "unanchored variable name"); /* NOBARRIER: name is anchored in fs->kt and ls->vstack is not a GCobj. */ setgcref(ls->vstack[vtop].name, obj2gco(name)); fs->varmap[fs->nactvar+n] = (uint16_t)vtop; @@ -1099,7 +1111,7 @@ static MSize var_lookup_uv(FuncState *fs, MSize vidx, ExpDesc *e) return i; /* Already exists. */ /* Otherwise create a new one. */ checklimit(fs, fs->nuv, LJ_MAX_UPVAL, "upvalues"); - lua_assert(e->k == VLOCAL || e->k == VUPVAL); + lj_assertFS(e->k == VLOCAL || e->k == VUPVAL, "bad expr type %d", e->k); fs->uvmap[n] = (uint16_t)vidx; fs->uvtmp[n] = (uint16_t)(e->k == VLOCAL ? vidx : LJ_MAX_VSTACK+e->u.s.info); fs->nuv = n+1; @@ -1150,7 +1162,8 @@ static MSize gola_new(LexState *ls, GCstr *name, uint8_t info, BCPos pc) lj_lex_error(ls, 0, LJ_ERR_XLIMC, LJ_MAX_VSTACK); lj_mem_growvec(ls->L, ls->vstack, ls->sizevstack, LJ_MAX_VSTACK, VarInfo); } - lua_assert(name == NAME_BREAK || lj_tab_getstr(fs->kt, name) != NULL); + lj_assertFS(name == NAME_BREAK || lj_tab_getstr(fs->kt, name) != NULL, + "unanchored label name"); /* NOBARRIER: name is anchored in fs->kt and ls->vstack is not a GCobj. */ setgcref(ls->vstack[vtop].name, obj2gco(name)); ls->vstack[vtop].startpc = pc; @@ -1180,8 +1193,9 @@ static void gola_close(LexState *ls, VarInfo *vg) FuncState *fs = ls->fs; BCPos pc = vg->startpc; BCIns *ip = &fs->bcbase[pc].ins; - lua_assert(gola_isgoto(vg)); - lua_assert(bc_op(*ip) == BC_JMP || bc_op(*ip) == BC_UCLO); + lj_assertFS(gola_isgoto(vg), "expected goto"); + lj_assertFS(bc_op(*ip) == BC_JMP || bc_op(*ip) == BC_UCLO, + "bad bytecode op %d", bc_op(*ip)); setbc_a(ip, vg->slot); if (bc_op(*ip) == BC_JMP) { BCPos next = jmp_next(fs, pc); @@ -1200,9 +1214,9 @@ static void gola_resolve(LexState *ls, FuncScope *bl, MSize idx) if (gcrefeq(vg->name, vl->name) && gola_isgoto(vg)) { if (vg->slot < vl->slot) { GCstr *name = strref(var_get(ls, ls->fs, vg->slot).name); - lua_assert((uintptr_t)name >= VARNAME__MAX); + lj_assertLS((uintptr_t)name >= VARNAME__MAX, "expected goto name"); ls->linenumber = ls->fs->bcbase[vg->startpc].line; - lua_assert(strref(vg->name) != NAME_BREAK); + lj_assertLS(strref(vg->name) != NAME_BREAK, "unexpected break"); lj_lex_error(ls, 0, LJ_ERR_XGSCOPE, strdata(strref(vg->name)), strdata(name)); } @@ -1266,7 +1280,7 @@ static void fscope_begin(FuncState *fs, FuncScope *bl, int flags) bl->vstart = fs->ls->vtop; bl->prev = fs->bl; fs->bl = bl; - lua_assert(fs->freereg == fs->nactvar); + lj_assertFS(fs->freereg == fs->nactvar, "bad regalloc"); } /* End a scope. */ @@ -1277,7 +1291,7 @@ static void fscope_end(FuncState *fs) fs->bl = bl->prev; var_remove(ls, bl->nactvar); fs->freereg = fs->nactvar; - lua_assert(bl->nactvar == fs->nactvar); + lj_assertFS(bl->nactvar == fs->nactvar, "bad regalloc"); if ((bl->flags & (FSCOPE_UPVAL|FSCOPE_NOCLOSE)) == FSCOPE_UPVAL) bcemit_AJ(fs, BC_UCLO, bl->nactvar, 0); if ((bl->flags & FSCOPE_BREAK)) { @@ -1364,13 +1378,13 @@ static void fs_fixup_k(FuncState *fs, GCproto *pt, void *kptr) Node *n = &node[i]; if (tvhaskslot(&n->val)) { ptrdiff_t kidx = (ptrdiff_t)tvkslot(&n->val); - lua_assert(!tvisint(&n->key)); + lj_assertFS(!tvisint(&n->key), "unexpected integer key"); if (tvisnum(&n->key)) { TValue *tv = &((TValue *)kptr)[kidx]; if (LJ_DUALNUM) { lua_Number nn = numV(&n->key); int32_t k = lj_num2int(nn); - lua_assert(!tvismzero(&n->key)); + lj_assertFS(!tvismzero(&n->key), "unexpected -0 key"); if ((lua_Number)k == nn) setintV(tv, k); else @@ -1418,21 +1432,21 @@ static void fs_fixup_line(FuncState *fs, GCproto *pt, uint8_t *li = (uint8_t *)lineinfo; do { BCLine delta = base[i].line - first; - lua_assert(delta >= 0 && delta < 256); + lj_assertFS(delta >= 0 && delta < 256, "bad line delta"); li[i] = (uint8_t)delta; } while (++i < n); } else if (LJ_LIKELY(numline < 65536)) { uint16_t *li = (uint16_t *)lineinfo; do { BCLine delta = base[i].line - first; - lua_assert(delta >= 0 && delta < 65536); + lj_assertFS(delta >= 0 && delta < 65536, "bad line delta"); li[i] = (uint16_t)delta; } while (++i < n); } else { uint32_t *li = (uint32_t *)lineinfo; do { BCLine delta = base[i].line - first; - lua_assert(delta >= 0); + lj_assertFS(delta >= 0, "bad line delta"); li[i] = (uint32_t)delta; } while (++i < n); } @@ -1522,7 +1536,7 @@ static void fs_fixup_ret(FuncState *fs) } fs->bl->flags |= FSCOPE_NOCLOSE; /* Handled above. */ fscope_end(fs); - lua_assert(fs->bl == NULL); + lj_assertFS(fs->bl == NULL, "bad scope nesting"); /* May need to fixup returns encoded before first function was created. */ if (fs->flags & PROTO_FIXUP_RETURN) { BCPos pc; @@ -1594,7 +1608,7 @@ static GCproto *fs_finish(LexState *ls, BCLine line) L->top--; /* Pop table of constants. */ ls->vtop = fs->vbase; /* Reset variable stack. */ ls->fs = fs->prev; - lua_assert(ls->fs != NULL || ls->tok == TK_eof); + lj_assertL(ls->fs != NULL || ls->tok == TK_eof, "bad parser state"); return pt; } @@ -1688,14 +1702,15 @@ static void expr_bracket(LexState *ls, ExpDesc *v) } /* Get value of constant expression. */ -static void expr_kvalue(TValue *v, ExpDesc *e) +static void expr_kvalue(FuncState *fs, TValue *v, ExpDesc *e) { + UNUSED(fs); if (e->k <= VKTRUE) { setpriV(v, ~(uint32_t)e->k); } else if (e->k == VKSTR) { setgcVraw(v, obj2gco(e->u.sval), LJ_TSTR); } else { - lua_assert(tvisnumber(expr_numtv(e))); + lj_assertFS(tvisnumber(expr_numtv(e)), "bad number constant"); *v = *expr_numtv(e); } } @@ -1745,11 +1760,11 @@ static void expr_table(LexState *ls, ExpDesc *e) fs->bcbase[pc].ins = BCINS_AD(BC_TDUP, freg-1, kidx); } vcall = 0; - expr_kvalue(&k, &key); + expr_kvalue(fs, &k, &key); v = lj_tab_set(fs->L, t, &k); lj_gc_anybarriert(fs->L, t); if (expr_isk_nojump(&val)) { /* Add const key/value to template table. */ - expr_kvalue(v, &val); + expr_kvalue(fs, v, &val); } else { /* Otherwise create dummy string key (avoids lj_tab_newkey). */ settabV(fs->L, v, t); /* Preserve key with table itself as value. */ fixt = 1; /* Fix this later, after all resizes. */ @@ -1768,8 +1783,9 @@ static void expr_table(LexState *ls, ExpDesc *e) if (vcall) { BCInsLine *ilp = &fs->bcbase[fs->pc-1]; ExpDesc en; - lua_assert(bc_a(ilp->ins) == freg && - bc_op(ilp->ins) == (narr > 256 ? BC_TSETV : BC_TSETB)); + lj_assertFS(bc_a(ilp->ins) == freg && + bc_op(ilp->ins) == (narr > 256 ? BC_TSETV : BC_TSETB), + "bad CALL code generation"); expr_init(&en, VKNUM, 0); en.u.nval.u32.lo = narr-1; en.u.nval.u32.hi = 0x43300000; /* Biased integer to avoid denormals. */ @@ -1799,7 +1815,7 @@ static void expr_table(LexState *ls, ExpDesc *e) for (i = 0; i <= hmask; i++) { Node *n = &node[i]; if (tvistab(&n->val)) { - lua_assert(tabV(&n->val) == t); + lj_assertFS(tabV(&n->val) == t, "bad dummy key in template table"); setnilV(&n->val); /* Turn value into nil. */ } } @@ -1830,7 +1846,7 @@ static BCReg parse_params(LexState *ls, int needself) } while (lex_opt(ls, ',')); } var_add(ls, nparams); - lua_assert(fs->nactvar == nparams); + lj_assertFS(fs->nactvar == nparams, "bad regalloc"); bcreg_reserve(fs, nparams); lex_check(ls, ')'); return nparams; @@ -1917,7 +1933,7 @@ static void parse_args(LexState *ls, ExpDesc *e) err_syntax(ls, LJ_ERR_XFUNARG); return; /* Silence compiler. */ } - lua_assert(e->k == VNONRELOC); + lj_assertFS(e->k == VNONRELOC, "bad expr type %d", e->k); base = e->u.s.info; /* Base register for call. */ if (args.k == VCALL) { ins = BCINS_ABC(BC_CALLM, base, 2, args.u.s.aux - base - 1 - LJ_FR2); @@ -2687,8 +2703,9 @@ static void parse_chunk(LexState *ls) while (!islast && !parse_isend(ls->tok)) { islast = parse_stmt(ls); lex_opt(ls, ';'); - lua_assert(ls->fs->framesize >= ls->fs->freereg && - ls->fs->freereg >= ls->fs->nactvar); + lj_assertLS(ls->fs->framesize >= ls->fs->freereg && + ls->fs->freereg >= ls->fs->nactvar, + "bad regalloc"); ls->fs->freereg = ls->fs->nactvar; /* Free registers after each stmt. */ } synlevel_end(ls); @@ -2723,9 +2740,8 @@ GCproto *lj_parse(LexState *ls) err_token(ls, TK_eof); pt = fs_finish(ls, ls->linenumber); L->top--; /* Drop chunkname. */ - lua_assert(fs.prev == NULL); - lua_assert(ls->fs == NULL); - lua_assert(pt->sizeuv == 0); + lj_assertL(fs.prev == NULL && ls->fs == NULL, "mismatched frame nesting"); + lj_assertL(pt->sizeuv == 0, "toplevel proto has upvalues"); return pt; } diff --git a/src/lj_record.c b/src/lj_record.c index 4fc22742e..2a4a766e5 100644 --- a/src/lj_record.c +++ b/src/lj_record.c @@ -50,34 +50,52 @@ static void rec_check_ir(jit_State *J) { IRRef i, nins = J->cur.nins, nk = J->cur.nk; - lua_assert(nk <= REF_BIAS && nins >= REF_BIAS && nins < 65536); + lj_assertJ(nk <= REF_BIAS && nins >= REF_BIAS && nins < 65536, + "inconsistent IR layout"); for (i = nk; i < nins; i++) { IRIns *ir = IR(i); uint32_t mode = lj_ir_mode[ir->o]; IRRef op1 = ir->op1; IRRef op2 = ir->op2; + const char *err = NULL; switch (irm_op1(mode)) { - case IRMnone: lua_assert(op1 == 0); break; - case IRMref: lua_assert(op1 >= nk); - lua_assert(i >= REF_BIAS ? op1 < i : op1 > i); break; + case IRMnone: + if (op1 != 0) err = "IRMnone op1 used"; + break; + case IRMref: + if (op1 < nk || (i >= REF_BIAS ? op1 >= i : op1 <= i)) + err = "IRMref op1 out of range"; + break; case IRMlit: break; - case IRMcst: lua_assert(i < REF_BIAS); + case IRMcst: + if (i >= REF_BIAS) { err = "constant in IR range"; break; } if (irt_is64(ir->t) && ir->o != IR_KNULL) i++; continue; } switch (irm_op2(mode)) { - case IRMnone: lua_assert(op2 == 0); break; - case IRMref: lua_assert(op2 >= nk); - lua_assert(i >= REF_BIAS ? op2 < i : op2 > i); break; + case IRMnone: + if (op2) err = "IRMnone op2 used"; + break; + case IRMref: + if (op2 < nk || (i >= REF_BIAS ? op2 >= i : op2 <= i)) + err = "IRMref op2 out of range"; + break; case IRMlit: break; - case IRMcst: lua_assert(0); break; + case IRMcst: err = "IRMcst op2"; break; } - if (ir->prev) { - lua_assert(ir->prev >= nk); - lua_assert(i >= REF_BIAS ? ir->prev < i : ir->prev > i); - lua_assert(ir->o == IR_NOP || IR(ir->prev)->o == ir->o); + if (!err && ir->prev) { + if (ir->prev < nk || (i >= REF_BIAS ? ir->prev >= i : ir->prev <= i)) + err = "chain out of range"; + else if (ir->o != IR_NOP && IR(ir->prev)->o != ir->o) + err = "chain to different op"; } + lj_assertJ(!err, "bad IR %04d op %d(%04d,%04d): %s", + i-REF_BIAS, + ir->o, + irm_op1(mode) == IRMref ? op1-REF_BIAS : op1, + irm_op2(mode) == IRMref ? op2-REF_BIAS : op2, + err); } } @@ -87,9 +105,10 @@ static void rec_check_slots(jit_State *J) BCReg s, nslots = J->baseslot + J->maxslot; int32_t depth = 0; cTValue *base = J->L->base - J->baseslot; - lua_assert(J->baseslot >= 1+LJ_FR2); - lua_assert(J->baseslot == 1+LJ_FR2 || (J->slot[J->baseslot-1] & TREF_FRAME)); - lua_assert(nslots <= LJ_MAX_JSLOTS); + lj_assertJ(J->baseslot >= 1+LJ_FR2, "bad baseslot"); + lj_assertJ(J->baseslot == 1+LJ_FR2 || (J->slot[J->baseslot-1] & TREF_FRAME), + "baseslot does not point to frame"); + lj_assertJ(nslots <= LJ_MAX_JSLOTS, "slot overflow"); for (s = 0; s < nslots; s++) { TRef tr = J->slot[s]; if (tr) { @@ -97,56 +116,65 @@ static void rec_check_slots(jit_State *J) IRRef ref = tref_ref(tr); IRIns *ir = NULL; /* Silence compiler. */ if (!LJ_FR2 || ref || !(tr & (TREF_FRAME | TREF_CONT))) { - lua_assert(ref >= J->cur.nk && ref < J->cur.nins); + lj_assertJ(ref >= J->cur.nk && ref < J->cur.nins, + "slot %d ref %04d out of range", s, ref - REF_BIAS); ir = IR(ref); - lua_assert(irt_t(ir->t) == tref_t(tr)); + lj_assertJ(irt_t(ir->t) == tref_t(tr), "slot %d IR type mismatch", s); } if (s == 0) { - lua_assert(tref_isfunc(tr)); + lj_assertJ(tref_isfunc(tr), "frame slot 0 is not a function"); #if LJ_FR2 } else if (s == 1) { - lua_assert((tr & ~TREF_FRAME) == 0); + lj_assertJ((tr & ~TREF_FRAME) == 0, "bad frame slot 1"); #endif } else if ((tr & TREF_FRAME)) { GCfunc *fn = gco2func(frame_gc(tv)); BCReg delta = (BCReg)(tv - frame_prev(tv)); #if LJ_FR2 - if (ref) - lua_assert(ir_knum(ir)->u64 == tv->u64); + lj_assertJ(!ref || ir_knum(ir)->u64 == tv->u64, + "frame slot %d PC mismatch", s); tr = J->slot[s-1]; ir = IR(tref_ref(tr)); #endif - lua_assert(tref_isfunc(tr)); - if (tref_isk(tr)) lua_assert(fn == ir_kfunc(ir)); - lua_assert(s > delta + LJ_FR2 ? (J->slot[s-delta] & TREF_FRAME) - : (s == delta + LJ_FR2)); + lj_assertJ(tref_isfunc(tr), + "frame slot %d is not a function", s-LJ_FR2); + lj_assertJ(!tref_isk(tr) || fn == ir_kfunc(ir), + "frame slot %d function mismatch", s-LJ_FR2); + lj_assertJ(s > delta + LJ_FR2 ? (J->slot[s-delta] & TREF_FRAME) + : (s == delta + LJ_FR2), + "frame slot %d broken chain", s-LJ_FR2); depth++; } else if ((tr & TREF_CONT)) { #if LJ_FR2 - if (ref) - lua_assert(ir_knum(ir)->u64 == tv->u64); + lj_assertJ(!ref || ir_knum(ir)->u64 == tv->u64, + "cont slot %d continuation mismatch", s); #else - lua_assert(ir_kptr(ir) == gcrefp(tv->gcr, void)); + lj_assertJ(ir_kptr(ir) == gcrefp(tv->gcr, void), + "cont slot %d continuation mismatch", s); #endif - lua_assert((J->slot[s+1+LJ_FR2] & TREF_FRAME)); + lj_assertJ((J->slot[s+1+LJ_FR2] & TREF_FRAME), + "cont slot %d not followed by frame", s); depth++; } else { - if (tvisnumber(tv)) - lua_assert(tref_isnumber(tr)); /* Could be IRT_INT etc., too. */ - else - lua_assert(itype2irt(tv) == tref_type(tr)); + /* Number repr. may differ, but other types must be the same. */ + lj_assertJ(tvisnumber(tv) ? tref_isnumber(tr) : + itype2irt(tv) == tref_type(tr), + "slot %d type mismatch: stack type %d vs IR type %d", + s, itypemap(tv), tref_type(tr)); if (tref_isk(tr)) { /* Compare constants. */ TValue tvk; lj_ir_kvalue(J->L, &tvk, ir); - if (!(tvisnum(&tvk) && tvisnan(&tvk))) - lua_assert(lj_obj_equal(tv, &tvk)); - else - lua_assert(tvisnum(tv) && tvisnan(tv)); + lj_assertJ((tvisnum(&tvk) && tvisnan(&tvk)) ? + (tvisnum(tv) && tvisnan(tv)) : + lj_obj_equal(tv, &tvk), + "slot %d const mismatch: stack %016llx vs IR %016llx", + s, tv->u64, tvk.u64); } } } } - lua_assert(J->framedepth == depth); + lj_assertJ(J->framedepth == depth, + "frame depth mismatch %d vs %d", J->framedepth, depth); } #endif @@ -182,7 +210,7 @@ static TRef getcurrf(jit_State *J) { if (J->base[-1-LJ_FR2]) return J->base[-1-LJ_FR2]; - lua_assert(J->baseslot == 1+LJ_FR2); + lj_assertJ(J->baseslot == 1+LJ_FR2, "bad baseslot"); return sloadt(J, -1-LJ_FR2, IRT_FUNC, IRSLOAD_READONLY); } @@ -427,7 +455,8 @@ static void rec_for_loop(jit_State *J, const BCIns *fori, ScEvEntry *scev, TRef stop = fori_arg(J, fori, ra+FORL_STOP, t, mode); TRef step = fori_arg(J, fori, ra+FORL_STEP, t, mode); int tc, dir = rec_for_direction(&tv[FORL_STEP]); - lua_assert(bc_op(*fori) == BC_FORI || bc_op(*fori) == BC_JFORI); + lj_assertJ(bc_op(*fori) == BC_FORI || bc_op(*fori) == BC_JFORI, + "bad bytecode %d instead of FORI/JFORI", bc_op(*fori)); scev->t.irt = t; scev->dir = dir; scev->stop = tref_ref(stop); @@ -483,7 +512,7 @@ static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl) IRT_NUM; for (i = FORL_IDX; i <= FORL_STEP; i++) { if (!tr[i]) sload(J, ra+i); - lua_assert(tref_isnumber_str(tr[i])); + lj_assertJ(tref_isnumber_str(tr[i]), "bad FORI argument type"); if (tref_isstr(tr[i])) tr[i] = emitir(IRTG(IR_STRTO, IRT_NUM), tr[i], 0); if (t == IRT_INT) { @@ -615,7 +644,8 @@ static void rec_loop_jit(jit_State *J, TraceNo lnk, LoopEvent ev) static int rec_profile_need(jit_State *J, GCproto *pt, const BCIns *pc) { GCproto *ppt; - lua_assert(J->prof_mode == 'f' || J->prof_mode == 'l'); + lj_assertJ(J->prof_mode == 'f' || J->prof_mode == 'l', + "bad profiler mode %c", J->prof_mode); if (!pt) return 0; ppt = J->prev_pt; @@ -793,7 +823,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) BCReg cbase = (BCReg)frame_delta(frame); if (--J->framedepth <= 0) lj_trace_err(J, LJ_TRERR_NYIRETL); - lua_assert(J->baseslot > 1+LJ_FR2); + lj_assertJ(J->baseslot > 1+LJ_FR2, "bad baseslot for return"); gotresults++; rbase += cbase; J->baseslot -= (BCReg)cbase; @@ -817,7 +847,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) BCReg cbase = (BCReg)frame_delta(frame); if (--J->framedepth < 0) /* NYI: return of vararg func to lower frame. */ lj_trace_err(J, LJ_TRERR_NYIRETL); - lua_assert(J->baseslot > 1+LJ_FR2); + lj_assertJ(J->baseslot > 1+LJ_FR2, "bad baseslot for return"); rbase += cbase; J->baseslot -= (BCReg)cbase; J->base -= cbase; @@ -844,7 +874,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) J->maxslot = cbase+(BCReg)nresults; if (J->framedepth > 0) { /* Return to a frame that is part of the trace. */ J->framedepth--; - lua_assert(J->baseslot > cbase+1+LJ_FR2); + lj_assertJ(J->baseslot > cbase+1+LJ_FR2, "bad baseslot for return"); J->baseslot -= cbase+1+LJ_FR2; J->base -= cbase+1+LJ_FR2; } else if (J->parent == 0 && J->exitno == 0 && @@ -859,7 +889,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) emitir(IRTG(IR_RETF, IRT_PGC), trpt, trpc); J->retdepth++; J->needsnap = 1; - lua_assert(J->baseslot == 1+LJ_FR2); + lj_assertJ(J->baseslot == 1+LJ_FR2, "bad baseslot for return"); /* Shift result slots up and clear the slots of the new frame below. */ memmove(J->base + cbase, J->base-1-LJ_FR2, sizeof(TRef)*nresults); memset(J->base-1-LJ_FR2, 0, sizeof(TRef)*(cbase+1+LJ_FR2)); @@ -907,12 +937,13 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) } /* Otherwise continue with another __concat call. */ } else { /* Result type already specialized. */ - lua_assert(cont == lj_cont_condf || cont == lj_cont_condt); + lj_assertJ(cont == lj_cont_condf || cont == lj_cont_condt, + "bad continuation type"); } } else { lj_trace_err(J, LJ_TRERR_NYIRETL); /* NYI: handle return to C frame. */ } - lua_assert(J->baseslot >= 1+LJ_FR2); + lj_assertJ(J->baseslot >= 1+LJ_FR2, "bad baseslot for return"); } /* -- Metamethod handling ------------------------------------------------- */ @@ -1167,7 +1198,7 @@ static void rec_mm_comp_cdata(jit_State *J, RecordIndex *ix, int op, MMS mm) ix->tab = ix->val; copyTV(J->L, &ix->tabv, &ix->valv); } else { - lua_assert(tref_iscdata(ix->key)); + lj_assertJ(tref_iscdata(ix->key), "cdata expected"); ix->tab = ix->key; copyTV(J->L, &ix->tabv, &ix->keyv); } @@ -1264,7 +1295,8 @@ static void rec_idx_abc(jit_State *J, TRef asizeref, TRef ikey, uint32_t asize) /* Got scalar evolution analysis results for this reference? */ if (ref == J->scev.idx) { int32_t stop; - lua_assert(irt_isint(J->scev.t) && ir->o == IR_SLOAD); + lj_assertJ(irt_isint(J->scev.t) && ir->o == IR_SLOAD, + "only int SCEV supported"); stop = numberVint(&(J->L->base - J->baseslot)[ir->op1 + FORL_STOP]); /* Runtime value for stop of loop is within bounds? */ if ((uint64_t)stop + ofs < (uint64_t)asize) { @@ -1382,7 +1414,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) while (!tref_istab(ix->tab)) { /* Handle non-table lookup. */ /* Never call raw lj_record_idx() on non-table. */ - lua_assert(ix->idxchain != 0); + lj_assertJ(ix->idxchain != 0, "bad usage"); if (!lj_record_mm_lookup(J, ix, ix->val ? MM_newindex : MM_index)) lj_trace_err(J, LJ_TRERR_NOMM); handlemm: @@ -1466,10 +1498,10 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) emitir(IRTG(oldv == niltvg(J2G(J)) ? IR_EQ : IR_NE, IRT_PGC), xref, lj_ir_kkptr(J, niltvg(J2G(J)))); if (ix->idxchain && lj_record_mm_lookup(J, ix, MM_newindex)) { - lua_assert(hasmm); + lj_assertJ(hasmm, "inconsistent metamethod handling"); goto handlemm; } - lua_assert(!hasmm); + lj_assertJ(!hasmm, "inconsistent metamethod handling"); if (oldv == niltvg(J2G(J))) { /* Need to insert a new key. */ TRef key = ix->key; if (tref_isinteger(key)) /* NEWREF needs a TValue as a key. */ @@ -1575,7 +1607,7 @@ static TRef rec_upvalue(jit_State *J, uint32_t uv, TRef val) int needbarrier = 0; if (rec_upvalue_constify(J, uvp)) { /* Try to constify immutable upvalue. */ TRef tr, kfunc; - lua_assert(val == 0); + lj_assertJ(val == 0, "bad usage"); if (!tref_isk(fn)) { /* Late specialization of current function. */ if (J->pt->flags >= PROTO_CLC_POLY) goto noconstify; @@ -1697,7 +1729,7 @@ static void rec_func_vararg(jit_State *J) { GCproto *pt = J->pt; BCReg s, fixargs, vframe = J->maxslot+1+LJ_FR2; - lua_assert((pt->flags & PROTO_VARARG)); + lj_assertJ((pt->flags & PROTO_VARARG), "FUNCV in non-vararg function"); if (J->baseslot + vframe + pt->framesize >= LJ_MAX_JSLOTS) lj_trace_err(J, LJ_TRERR_STACKOV); J->base[vframe-1-LJ_FR2] = J->base[-1-LJ_FR2]; /* Copy function up. */ @@ -1766,7 +1798,7 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) { int32_t numparams = J->pt->numparams; ptrdiff_t nvararg = frame_delta(J->L->base-1) - numparams - 1 - LJ_FR2; - lua_assert(frame_isvarg(J->L->base-1)); + lj_assertJ(frame_isvarg(J->L->base-1), "VARG in non-vararg frame"); if (LJ_FR2 && dst > J->maxslot) J->base[dst-1] = 0; /* Prevent resurrection of unrelated slot. */ if (J->framedepth > 0) { /* Simple case: varargs defined on-trace. */ @@ -1889,7 +1921,7 @@ static TRef rec_cat(jit_State *J, BCReg baseslot, BCReg topslot) TValue savetv[5]; BCReg s; RecordIndex ix; - lua_assert(baseslot < topslot); + lj_assertJ(baseslot < topslot, "bad CAT arg"); for (s = baseslot; s <= topslot; s++) (void)getslot(J, s); /* Ensure all arguments have a reference. */ if (tref_isnumber_str(top[0]) && tref_isnumber_str(top[-1])) { @@ -2013,7 +2045,7 @@ void lj_record_ins(jit_State *J) if (bc_op(*J->pc) >= BC__MAX) return; break; - default: lua_assert(0); break; + default: lj_assertJ(0, "bad post-processing mode"); break; } J->postproc = LJ_POST_NONE; } @@ -2381,7 +2413,8 @@ void lj_record_ins(jit_State *J) J->loopref = J->cur.nins; break; case BC_JFORI: - lua_assert(bc_op(pc[(ptrdiff_t)rc-BCBIAS_J]) == BC_JFORL); + lj_assertJ(bc_op(pc[(ptrdiff_t)rc-BCBIAS_J]) == BC_JFORL, + "JFORI does not point to JFORL"); if (rec_for(J, pc, 0) != LOOPEV_LEAVE) /* Link to existing loop. */ lj_record_stop(J, LJ_TRLINK_ROOT, bc_d(pc[(ptrdiff_t)rc-BCBIAS_J])); /* Continue tracing if the loop is not entered. */ @@ -2434,7 +2467,8 @@ void lj_record_ins(jit_State *J) rec_func_lua(J); break; case BC_JFUNCV: - lua_assert(0); /* Cannot happen. No hotcall counting for varag funcs. */ + /* Cannot happen. No hotcall counting for varag funcs. */ + lj_assertJ(0, "unsupported vararg hotcall"); break; case BC_FUNCC: @@ -2494,11 +2528,11 @@ static const BCIns *rec_setup_root(jit_State *J) J->bc_min = pc; break; case BC_ITERL: - lua_assert(bc_op(pc[-1]) == BC_ITERC); + lj_assertJ(bc_op(pc[-1]) == BC_ITERC, "no ITERC before ITERL"); J->maxslot = ra + bc_b(pc[-1]) - 1; J->bc_extent = (MSize)(-bc_j(ins))*sizeof(BCIns); pc += 1+bc_j(ins); - lua_assert(bc_op(pc[-1]) == BC_JMP); + lj_assertJ(bc_op(pc[-1]) == BC_JMP, "ITERL does not point to JMP+1"); J->bc_min = pc; break; case BC_LOOP: @@ -2530,7 +2564,7 @@ static const BCIns *rec_setup_root(jit_State *J) pc++; break; default: - lua_assert(0); + lj_assertJ(0, "bad root trace start bytecode %d", bc_op(ins)); break; } return pc; diff --git a/src/lj_snap.c b/src/lj_snap.c index a47c0e3e4..a21894f60 100644 --- a/src/lj_snap.c +++ b/src/lj_snap.c @@ -110,7 +110,7 @@ static MSize snapshot_framelinks(jit_State *J, SnapEntry *map, uint8_t *topslot) cTValue *ftop = isluafunc(fn) ? (frame+funcproto(fn)->framesize) : J->L->top; #if LJ_FR2 uint64_t pcbase = (u64ptr(J->pc) << 8) | (J->baseslot - 2); - lua_assert(2 <= J->baseslot && J->baseslot <= 257); + lj_assertJ(2 <= J->baseslot && J->baseslot <= 257, "bad baseslot"); memcpy(map, &pcbase, sizeof(uint64_t)); #else MSize f = 0; @@ -129,7 +129,7 @@ static MSize snapshot_framelinks(jit_State *J, SnapEntry *map, uint8_t *topslot) #endif frame = frame_prevd(frame); } else { - lua_assert(!frame_isc(frame)); + lj_assertJ(!frame_isc(frame), "broken frame chain"); #if !LJ_FR2 map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); #endif @@ -141,10 +141,10 @@ static MSize snapshot_framelinks(jit_State *J, SnapEntry *map, uint8_t *topslot) } *topslot = (uint8_t)(ftop - lim); #if LJ_FR2 - lua_assert(sizeof(SnapEntry) * 2 == sizeof(uint64_t)); + lj_assertJ(sizeof(SnapEntry) * 2 == sizeof(uint64_t), "bad SnapEntry def"); return 2; #else - lua_assert(f == (MSize)(1 + J->framedepth)); + lj_assertJ(f == (MSize)(1 + J->framedepth), "miscalculated snapshot size"); return f; #endif } @@ -222,7 +222,8 @@ static BCReg snap_usedef(jit_State *J, uint8_t *udf, #define DEF_SLOT(s) udf[(s)] *= 3 /* Scan through following bytecode and check for uses/defs. */ - lua_assert(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc); + lj_assertJ(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc, + "snapshot PC out of range"); for (;;) { BCIns ins = *pc++; BCOp op = bc_op(ins); @@ -233,7 +234,7 @@ static BCReg snap_usedef(jit_State *J, uint8_t *udf, switch (bcmode_c(op)) { case BCMvar: USE_SLOT(bc_c(ins)); break; case BCMrbase: - lua_assert(op == BC_CAT); + lj_assertJ(op == BC_CAT, "unhandled op %d with RC rbase", op); for (s = bc_b(ins); s <= bc_c(ins); s++) USE_SLOT(s); for (; s < maxslot; s++) DEF_SLOT(s); break; @@ -285,7 +286,8 @@ static BCReg snap_usedef(jit_State *J, uint8_t *udf, break; default: break; } - lua_assert(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc); + lj_assertJ(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc, + "use/def analysis PC out of range"); } #undef USE_SLOT @@ -356,19 +358,20 @@ static RegSP snap_renameref(GCtrace *T, SnapNo lim, IRRef ref, RegSP rs) } /* Copy RegSP from parent snapshot to the parent links of the IR. */ -IRIns *lj_snap_regspmap(GCtrace *T, SnapNo snapno, IRIns *ir) +IRIns *lj_snap_regspmap(jit_State *J, GCtrace *T, SnapNo snapno, IRIns *ir) { SnapShot *snap = &T->snap[snapno]; SnapEntry *map = &T->snapmap[snap->mapofs]; BloomFilter rfilt = snap_renamefilter(T, snapno); MSize n = 0; IRRef ref = 0; + UNUSED(J); for ( ; ; ir++) { uint32_t rs; if (ir->o == IR_SLOAD) { if (!(ir->op2 & IRSLOAD_PARENT)) break; for ( ; ; n++) { - lua_assert(n < snap->nent); + lj_assertJ(n < snap->nent, "slot %d not found in snapshot", ir->op1); if (snap_slot(map[n]) == ir->op1) { ref = snap_ref(map[n++]); break; @@ -385,7 +388,7 @@ IRIns *lj_snap_regspmap(GCtrace *T, SnapNo snapno, IRIns *ir) if (bloomtest(rfilt, ref)) rs = snap_renameref(T, snapno, ref, rs); ir->prev = (uint16_t)rs; - lua_assert(regsp_used(rs)); + lj_assertJ(regsp_used(rs), "unused IR %04d in snapshot", ref - REF_BIAS); } return ir; } @@ -403,7 +406,7 @@ static TRef snap_replay_const(jit_State *J, IRIns *ir) case IR_KNUM: case IR_KINT64: return lj_ir_k64(J, (IROp)ir->o, ir_k64(ir)->u64); case IR_KPTR: return lj_ir_kptr(J, ir_kptr(ir)); /* Continuation. */ - default: lua_assert(0); return TREF_NIL; break; + default: lj_assertJ(0, "bad IR constant op %d", ir->o); return TREF_NIL; } } @@ -481,7 +484,7 @@ void lj_snap_replay(jit_State *J, GCtrace *T) tr = snap_replay_const(J, ir); } else if (!regsp_used(ir->prev)) { pass23 = 1; - lua_assert(s != 0); + lj_assertJ(s != 0, "unused slot 0 in snapshot"); tr = s; } else { IRType t = irt_type(ir->t); @@ -507,8 +510,9 @@ void lj_snap_replay(jit_State *J, GCtrace *T) if (regsp_reg(ir->r) == RID_SUNK) { if (J->slot[snap_slot(sn)] != snap_slot(sn)) continue; pass23 = 1; - lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP || - ir->o == IR_CNEW || ir->o == IR_CNEWI); + lj_assertJ(ir->o == IR_TNEW || ir->o == IR_TDUP || + ir->o == IR_CNEW || ir->o == IR_CNEWI, + "sunk parent IR %04d has bad op %d", refp - REF_BIAS, ir->o); if (ir->op1 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op1); if (ir->op2 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op2); if (LJ_HASFFI && ir->o == IR_CNEWI) { @@ -526,7 +530,8 @@ void lj_snap_replay(jit_State *J, GCtrace *T) } } } else if (!irref_isk(refp) && !regsp_used(ir->prev)) { - lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT); + lj_assertJ(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT, + "sunk parent IR %04d has bad op %d", refp - REF_BIAS, ir->o); J->slot[snap_slot(sn)] = snap_pref(J, T, map, nent, seen, ir->op1); } } @@ -576,7 +581,9 @@ void lj_snap_replay(jit_State *J, GCtrace *T) val = snap_pref(J, T, map, nent, seen, irs->op2); if (val == 0) { IRIns *irc = &T->ir[irs->op2]; - lua_assert(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT); + lj_assertJ(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT, + "sunk store for parent IR %04d with bad op %d", + refp - REF_BIAS, irc->o); val = snap_pref(J, T, map, nent, seen, irc->op1); val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT); } else if ((LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)) && @@ -645,13 +652,14 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, o->u64 = *(uint64_t *)sps; #endif } else { - lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */ + lj_assertJ(!irt_ispri(t), "PRI ref with spill slot"); setgcV(J->L, o, (GCobj *)(uintptr_t)*(GCSize *)sps, irt_toitype(t)); } } else { /* Restore from register. */ Reg r = regsp_reg(rs); if (ra_noreg(r)) { - lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT); + lj_assertJ(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT, + "restore from IR %04d has no reg", ref - REF_BIAS); snap_restoreval(J, T, ex, snapno, rfilt, ir->op1, o); if (LJ_DUALNUM) setnumV(o, (lua_Number)intV(o)); return; @@ -679,7 +687,7 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, #if LJ_HASFFI /* Restore raw data from the trace exit state. */ -static void snap_restoredata(GCtrace *T, ExitState *ex, +static void snap_restoredata(jit_State *J, GCtrace *T, ExitState *ex, SnapNo snapno, BloomFilter rfilt, IRRef ref, void *dst, CTSize sz) { @@ -687,6 +695,7 @@ static void snap_restoredata(GCtrace *T, ExitState *ex, RegSP rs = ir->prev; int32_t *src; uint64_t tmp; + UNUSED(J); if (irref_isk(ref)) { if (ir_isk64(ir)) { src = (int32_t *)&ir[1]; @@ -709,8 +718,9 @@ static void snap_restoredata(GCtrace *T, ExitState *ex, Reg r = regsp_reg(rs); if (ra_noreg(r)) { /* Note: this assumes CNEWI is never used for SOFTFP split numbers. */ - lua_assert(sz == 8 && ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT); - snap_restoredata(T, ex, snapno, rfilt, ir->op1, dst, 4); + lj_assertJ(sz == 8 && ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT, + "restore from IR %04d has no reg", ref - REF_BIAS); + snap_restoredata(J, T, ex, snapno, rfilt, ir->op1, dst, 4); *(lua_Number *)dst = (lua_Number)*(int32_t *)dst; return; } @@ -731,7 +741,8 @@ static void snap_restoredata(GCtrace *T, ExitState *ex, if (LJ_64 && LJ_BE && sz == 4) src++; } } - lua_assert(sz == 1 || sz == 2 || sz == 4 || sz == 8); + lj_assertJ(sz == 1 || sz == 2 || sz == 4 || sz == 8, + "restore from IR %04d with bad size %d", ref - REF_BIAS, sz); if (sz == 4) *(int32_t *)dst = *src; else if (sz == 8) *(int64_t *)dst = *(int64_t *)src; else if (sz == 1) *(int8_t *)dst = (int8_t)*src; @@ -744,8 +755,9 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, SnapNo snapno, BloomFilter rfilt, IRIns *ir, TValue *o) { - lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP || - ir->o == IR_CNEW || ir->o == IR_CNEWI); + lj_assertJ(ir->o == IR_TNEW || ir->o == IR_TDUP || + ir->o == IR_CNEW || ir->o == IR_CNEWI, + "sunk allocation with bad op %d", ir->o); #if LJ_HASFFI if (ir->o == IR_CNEW || ir->o == IR_CNEWI) { CTState *cts = ctype_cts(J->L); @@ -756,13 +768,14 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, setcdataV(J->L, o, cd); if (ir->o == IR_CNEWI) { uint8_t *p = (uint8_t *)cdataptr(cd); - lua_assert(sz == 4 || sz == 8); + lj_assertJ(sz == 4 || sz == 8, "sunk cdata with bad size %d", sz); if (LJ_32 && sz == 8 && ir+1 < T->ir + T->nins && (ir+1)->o == IR_HIOP) { - snap_restoredata(T, ex, snapno, rfilt, (ir+1)->op2, LJ_LE?p+4:p, 4); + snap_restoredata(J, T, ex, snapno, rfilt, (ir+1)->op2, + LJ_LE ? p+4 : p, 4); if (LJ_BE) p += 4; sz = 4; } - snap_restoredata(T, ex, snapno, rfilt, ir->op2, p, sz); + snap_restoredata(J, T, ex, snapno, rfilt, ir->op2, p, sz); } else { IRIns *irs, *irlast = &T->ir[T->snap[snapno].ref]; for (irs = ir+1; irs < irlast; irs++) @@ -770,8 +783,11 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, IRIns *iro = &T->ir[T->ir[irs->op1].op2]; uint8_t *p = (uint8_t *)cd; CTSize szs; - lua_assert(irs->o == IR_XSTORE && T->ir[irs->op1].o == IR_ADD); - lua_assert(iro->o == IR_KINT || iro->o == IR_KINT64); + lj_assertJ(irs->o == IR_XSTORE, "sunk store with bad op %d", irs->o); + lj_assertJ(T->ir[irs->op1].o == IR_ADD, + "sunk store with bad add op %d", T->ir[irs->op1].o); + lj_assertJ(iro->o == IR_KINT || iro->o == IR_KINT64, + "sunk store with bad const offset op %d", iro->o); if (irt_is64(irs->t)) szs = 8; else if (irt_isi8(irs->t) || irt_isu8(irs->t)) szs = 1; else if (irt_isi16(irs->t) || irt_isu16(irs->t)) szs = 2; @@ -780,14 +796,16 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, p += (int64_t)ir_k64(iro)->u64; else p += iro->i; - lua_assert(p >= (uint8_t *)cdataptr(cd) && - p + szs <= (uint8_t *)cdataptr(cd) + sz); + lj_assertJ(p >= (uint8_t *)cdataptr(cd) && + p + szs <= (uint8_t *)cdataptr(cd) + sz, + "sunk store with offset out of range"); if (LJ_32 && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) { - lua_assert(szs == 4); - snap_restoredata(T, ex, snapno, rfilt, (irs+1)->op2, LJ_LE?p+4:p,4); + lj_assertJ(szs == 4, "sunk store with bad size %d", szs); + snap_restoredata(J, T, ex, snapno, rfilt, (irs+1)->op2, + LJ_LE ? p+4 : p, 4); if (LJ_BE) p += 4; } - snap_restoredata(T, ex, snapno, rfilt, irs->op2, p, szs); + snap_restoredata(J, T, ex, snapno, rfilt, irs->op2, p, szs); } } } else @@ -802,10 +820,12 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) { IRIns *irk = &T->ir[irs->op1]; TValue tmp, *val; - lua_assert(irs->o == IR_ASTORE || irs->o == IR_HSTORE || - irs->o == IR_FSTORE); + lj_assertJ(irs->o == IR_ASTORE || irs->o == IR_HSTORE || + irs->o == IR_FSTORE, + "sunk store with bad op %d", irs->o); if (irk->o == IR_FREF) { - lua_assert(irk->op2 == IRFL_TAB_META); + lj_assertJ(irk->op2 == IRFL_TAB_META, + "sunk store with bad field %d", irk->op2); snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, &tmp); /* NOBARRIER: The table is new (marked white). */ setgcref(t->metatable, obj2gco(tabV(&tmp))); @@ -893,7 +913,7 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) #if LJ_FR2 L->base += (map[nent+LJ_BE] & 0xff); #endif - lua_assert(map + nent == flinks); + lj_assertJ(map + nent == flinks, "inconsistent frames in snapshot"); /* Compute current stack top. */ switch (bc_op(*pc)) { diff --git a/src/lj_snap.h b/src/lj_snap.h index 816a9b79c..f1760b05f 100644 --- a/src/lj_snap.h +++ b/src/lj_snap.h @@ -13,7 +13,8 @@ LJ_FUNC void lj_snap_add(jit_State *J); LJ_FUNC void lj_snap_purge(jit_State *J); LJ_FUNC void lj_snap_shrink(jit_State *J); -LJ_FUNC IRIns *lj_snap_regspmap(GCtrace *T, SnapNo snapno, IRIns *ir); +LJ_FUNC IRIns *lj_snap_regspmap(jit_State *J, GCtrace *T, SnapNo snapno, + IRIns *ir); LJ_FUNC void lj_snap_replay(jit_State *J, GCtrace *T); LJ_FUNC const BCIns *lj_snap_restore(jit_State *J, void *exptr); LJ_FUNC void lj_snap_grow_buf_(jit_State *J, MSize need); diff --git a/src/lj_state.c b/src/lj_state.c index dc82e2606..7081a474a 100644 --- a/src/lj_state.c +++ b/src/lj_state.c @@ -60,7 +60,8 @@ static void resizestack(lua_State *L, MSize n) MSize oldsize = L->stacksize; MSize realsize = n + 1 + LJ_STACK_EXTRA; GCobj *up; - lua_assert((MSize)(tvref(L->maxstack)-oldst)==L->stacksize-LJ_STACK_EXTRA-1); + lj_assertL((MSize)(tvref(L->maxstack)-oldst) == L->stacksize-LJ_STACK_EXTRA-1, + "inconsistent stack size"); st = (TValue *)lj_mem_realloc(L, tvref(L->stack), (MSize)(oldsize*sizeof(TValue)), (MSize)(realsize*sizeof(TValue))); @@ -162,8 +163,9 @@ static void close_state(lua_State *L) global_State *g = G(L); lj_func_closeuv(L, tvref(L->stack)); lj_gc_freeall(g); - lua_assert(gcref(g->gc.root) == obj2gco(L)); - lua_assert(g->strnum == 0); + lj_assertG(gcref(g->gc.root) == obj2gco(L), + "main thread is not first GC object"); + lj_assertG(g->strnum == 0, "leaked %d strings", g->strnum); lj_trace_freestate(g); #if LJ_HASFFI lj_ctype_freestate(g); @@ -171,7 +173,9 @@ static void close_state(lua_State *L) lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef); lj_buf_free(g, &g->tmpbuf); lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue); - lua_assert(g->gc.total == sizeof(GG_State)); + lj_assertG(g->gc.total == sizeof(GG_State), + "memory leak of %lld bytes", + (long long)(g->gc.total - sizeof(GG_State))); #ifndef LUAJIT_USE_SYSMALLOC if (g->allocf == lj_alloc_f) lj_alloc_destroy(g->allocd); @@ -283,17 +287,17 @@ lua_State *lj_state_new(lua_State *L) setmrefr(L1->glref, L->glref); setgcrefr(L1->env, L->env); stack_init(L1, L); /* init stack */ - lua_assert(iswhite(obj2gco(L1))); + lj_assertL(iswhite(obj2gco(L1)), "new thread object is not white"); return L1; } void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L) { - lua_assert(L != mainthread(g)); + lj_assertG(L != mainthread(g), "free of main thread"); if (obj2gco(L) == gcref(g->cur_L)) setgcrefnull(g->cur_L); lj_func_closeuv(L, tvref(L->stack)); - lua_assert(gcref(L->openupval) == NULL); + lj_assertG(gcref(L->openupval) == NULL, "stale open upvalues"); lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue); lj_mem_freet(g, L); } diff --git a/src/lj_str.c b/src/lj_str.c index ec74afa5b..0253c15e0 100644 --- a/src/lj_str.c +++ b/src/lj_str.c @@ -41,8 +41,9 @@ int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b) static LJ_AINLINE int str_fastcmp(const char *a, const char *b, MSize len) { MSize i = 0; - lua_assert(len > 0); - lua_assert((((uintptr_t)a+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4); + lj_assertX(len > 0, "fast string compare with zero length"); + lj_assertX((((uintptr_t)a+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4, + "fast string compare crossing page boundary"); do { /* Note: innocuous access up to end of string + 3. */ uint32_t v = lj_getu32(a+i) ^ *(const uint32_t *)(b+i); if (v) { diff --git a/src/lj_strfmt.c b/src/lj_strfmt.c index 8f968d32e..331d9474c 100644 --- a/src/lj_strfmt.c +++ b/src/lj_strfmt.c @@ -320,7 +320,7 @@ SBuf *lj_strfmt_putfxint(SBuf *sb, SFormat sf, uint64_t k) if ((sf & STRFMT_F_LEFT)) while (width-- > pprec) *p++ = ' '; - lua_assert(need == (MSize)(p - ps)); + lj_assertX(need == (MSize)(p - ps), "miscalculated format size"); setsbufP(sb, p); return sb; } @@ -449,7 +449,7 @@ const char *lj_strfmt_pushvf(lua_State *L, const char *fmt, va_list argp) case STRFMT_ERR: default: lj_buf_putb(sb, '?'); - lua_assert(0); + lj_assertL(0, "bad string format near offset %d", fs.len); break; } } diff --git a/src/lj_strfmt.h b/src/lj_strfmt.h index 339f8e15a..b4fbbb94f 100644 --- a/src/lj_strfmt.h +++ b/src/lj_strfmt.h @@ -79,7 +79,8 @@ static LJ_AINLINE void lj_strfmt_init(FormatState *fs, const char *p, MSize len) { fs->p = (const uint8_t *)p; fs->e = (const uint8_t *)p + len; - lua_assert(*fs->e == 0); /* Must be NUL-terminated (may have NULs inside). */ + /* Must be NUL-terminated. May have NULs inside, too. */ + lj_assertX(*fs->e == 0, "format not NUL-terminated"); } /* Raw conversions. */ diff --git a/src/lj_strfmt_num.c b/src/lj_strfmt_num.c index 36b11dc01..8cb5d47f7 100644 --- a/src/lj_strfmt_num.c +++ b/src/lj_strfmt_num.c @@ -257,7 +257,7 @@ static int nd_similar(uint32_t* nd, uint32_t ndhi, uint32_t* ref, MSize hilen, } else { prec -= hilen - 9; } - lua_assert(prec < 9); + lj_assertX(prec < 9, "bad precision %d", prec); lj_strfmt_wuint9(nd9, nd[ndhi]); lj_strfmt_wuint9(ref9, *ref); return !memcmp(nd9, ref9, prec) && (nd9[prec] < '5') == (ref9[prec] < '5'); @@ -414,14 +414,14 @@ static char *lj_strfmt_wfnum(SBuf *sb, SFormat sf, lua_Number n, char *p) ** Rescaling was performed, but this introduced some error, and might ** have pushed us across a rounding boundary. We check whether this ** error affected the result by introducing even more error (2ulp in - ** either direction), and seeing whether a roundary boundary was + ** either direction), and seeing whether a rounding boundary was ** crossed. Having already converted the -2ulp case, we save off its ** most significant digits, convert the +2ulp case, and compare them. */ int32_t eidx = e + 70 + (ND_MUL2K_MAX_SHIFT < 29) + (t.u32.lo >= 0xfffffffe && !(~t.u32.hi << 12)); const int8_t *m_e = four_ulp_m_e + eidx * 2; - lua_assert(0 <= eidx && eidx < 128); + lj_assertG_(G(sbufL(sb)), 0 <= eidx && eidx < 128, "bad eidx %d", eidx); nd[33] = nd[ndhi]; nd[32] = nd[(ndhi - 1) & 0x3f]; nd[31] = nd[(ndhi - 2) & 0x3f]; diff --git a/src/lj_strscan.c b/src/lj_strscan.c index 433b33a38..0e37a4f6f 100644 --- a/src/lj_strscan.c +++ b/src/lj_strscan.c @@ -93,7 +93,7 @@ static void strscan_double(uint64_t x, TValue *o, int32_t ex2, int32_t neg) } /* Convert to double using a signed int64_t conversion, then rescale. */ - lua_assert((int64_t)x >= 0); + lj_assertX((int64_t)x >= 0, "bad double conversion"); n = (double)(int64_t)x; if (neg) n = -n; if (ex2) n = ldexp(n, ex2); @@ -262,7 +262,7 @@ static StrScanFmt strscan_dec(const uint8_t *p, TValue *o, uint32_t hi = 0, lo = (uint32_t)(xip-xi); int32_t ex2 = 0, idig = (int32_t)lo + (ex10 >> 1); - lua_assert(lo > 0 && (ex10 & 1) == 0); + lj_assertX(lo > 0 && (ex10 & 1) == 0, "bad lo %d ex10 %d", lo, ex10); /* Handle simple overflow/underflow. */ if (idig > 310/2) { if (neg) setminfV(o); else setpinfV(o); return fmt; } @@ -528,7 +528,7 @@ int LJ_FASTCALL lj_strscan_num(GCstr *str, TValue *o) { StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), str->len, o, STRSCAN_OPT_TONUM); - lua_assert(fmt == STRSCAN_ERROR || fmt == STRSCAN_NUM); + lj_assertX(fmt == STRSCAN_ERROR || fmt == STRSCAN_NUM, "bad scan format"); return (fmt != STRSCAN_ERROR); } @@ -537,7 +537,8 @@ int LJ_FASTCALL lj_strscan_number(GCstr *str, TValue *o) { StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), str->len, o, STRSCAN_OPT_TOINT); - lua_assert(fmt == STRSCAN_ERROR || fmt == STRSCAN_NUM || fmt == STRSCAN_INT); + lj_assertX(fmt == STRSCAN_ERROR || fmt == STRSCAN_NUM || fmt == STRSCAN_INT, + "bad scan format"); if (fmt == STRSCAN_INT) setitype(o, LJ_TISNUM); return (fmt != STRSCAN_ERROR); } diff --git a/src/lj_tab.c b/src/lj_tab.c index eb9ef4af9..efc423cb9 100644 --- a/src/lj_tab.c +++ b/src/lj_tab.c @@ -38,7 +38,7 @@ static LJ_AINLINE Node *hashmask(const GCtab *t, uint32_t hash) /* Hash an arbitrary key and return its anchor position in the hash table. */ static Node *hashkey(const GCtab *t, cTValue *key) { - lua_assert(!tvisint(key)); + lj_assertX(!tvisint(key), "attempt to hash integer"); if (tvisstr(key)) return hashstr(t, strV(key)); else if (tvisnum(key)) @@ -57,7 +57,7 @@ static LJ_AINLINE void newhpart(lua_State *L, GCtab *t, uint32_t hbits) { uint32_t hsize; Node *node; - lua_assert(hbits != 0); + lj_assertL(hbits != 0, "zero hash size"); if (hbits > LJ_MAX_HBITS) lj_err_msg(L, LJ_ERR_TABOV); hsize = 1u << hbits; @@ -78,7 +78,7 @@ static LJ_AINLINE void clearhpart(GCtab *t) { uint32_t i, hmask = t->hmask; Node *node = noderef(t->node); - lua_assert(t->hmask != 0); + lj_assertX(t->hmask != 0, "empty hash part"); for (i = 0; i <= hmask; i++) { Node *n = &node[i]; setmref(n->next, NULL); @@ -103,7 +103,7 @@ static GCtab *newtab(lua_State *L, uint32_t asize, uint32_t hbits) /* First try to colocate the array part. */ if (LJ_MAX_COLOSIZE != 0 && asize > 0 && asize <= LJ_MAX_COLOSIZE) { Node *nilnode; - lua_assert((sizeof(GCtab) & 7) == 0); + lj_assertL((sizeof(GCtab) & 7) == 0, "bad GCtab size"); t = (GCtab *)lj_mem_newgco(L, sizetabcolo(asize)); t->gct = ~LJ_TTAB; t->nomm = (uint8_t)~0; @@ -185,7 +185,8 @@ GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt) GCtab *t; uint32_t asize, hmask; t = newtab(L, kt->asize, kt->hmask > 0 ? lj_fls(kt->hmask)+1 : 0); - lua_assert(kt->asize == t->asize && kt->hmask == t->hmask); + lj_assertL(kt->asize == t->asize && kt->hmask == t->hmask, + "mismatched size of table and template"); t->nomm = 0; /* Keys with metamethod names may be present. */ asize = kt->asize; if (asize > 0) { @@ -310,7 +311,7 @@ void lj_tab_resize(lua_State *L, GCtab *t, uint32_t asize, uint32_t hbits) static uint32_t countint(cTValue *key, uint32_t *bins) { - lua_assert(!tvisint(key)); + lj_assertX(!tvisint(key), "bad integer key"); if (tvisnum(key)) { lua_Number nk = numV(key); int32_t k = lj_num2int(nk); @@ -463,7 +464,8 @@ TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key) if (!tvisnil(&n->val) || t->hmask == 0) { Node *nodebase = noderef(t->node); Node *collide, *freenode = getfreetop(t, nodebase); - lua_assert(freenode >= nodebase && freenode <= nodebase+t->hmask+1); + lj_assertL(freenode >= nodebase && freenode <= nodebase+t->hmask+1, + "bad freenode"); do { if (freenode == nodebase) { /* No free node found? */ rehashtab(L, t, key); /* Rehash table. */ @@ -471,7 +473,7 @@ TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key) } } while (!tvisnil(&(--freenode)->key)); setfreetop(t, nodebase, freenode); - lua_assert(freenode != &G(L)->nilnode); + lj_assertL(freenode != &G(L)->nilnode, "store to fallback hash"); collide = hashkey(t, &n->key); if (collide != n) { /* Colliding node not the main node? */ while (noderef(collide->next) != n) /* Find predecessor. */ @@ -527,7 +529,7 @@ TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key) if (LJ_UNLIKELY(tvismzero(&n->key))) n->key.u64 = 0; lj_gc_anybarriert(L, t); - lua_assert(tvisnil(&n->val)); + lj_assertL(tvisnil(&n->val), "new hash slot is not empty"); return &n->val; } diff --git a/src/lj_target.h b/src/lj_target.h index 47c960bc9..ce67d000c 100644 --- a/src/lj_target.h +++ b/src/lj_target.h @@ -152,7 +152,8 @@ typedef uint32_t RegCost; /* Return the address of an exit stub. */ static LJ_AINLINE char *exitstub_addr_(char **group, uint32_t exitno) { - lua_assert(group[exitno / EXITSTUBS_PER_GROUP] != NULL); + lj_assertX(group[exitno / EXITSTUBS_PER_GROUP] != NULL, + "exit stub group for exit %d uninitialized", exitno); return (char *)group[exitno / EXITSTUBS_PER_GROUP] + EXITSTUB_SPACING*(exitno % EXITSTUBS_PER_GROUP); } diff --git a/src/lj_trace.c b/src/lj_trace.c index a43c8c4ef..c4e728c64 100644 --- a/src/lj_trace.c +++ b/src/lj_trace.c @@ -104,7 +104,8 @@ static void perftools_addtrace(GCtrace *T) name++; else name = "(string)"; - lua_assert(startpc >= proto_bc(pt) && startpc < proto_bc(pt) + pt->sizebc); + lj_assertX(startpc >= proto_bc(pt) && startpc < proto_bc(pt) + pt->sizebc, + "trace PC out of range"); lineno = lj_debug_line(pt, proto_bcpos(pt, startpc)); if (!fp) { char fname[40]; @@ -183,7 +184,7 @@ void lj_trace_reenableproto(GCproto *pt) { if ((pt->flags & PROTO_ILOOP)) { BCIns *bc = proto_bc(pt); - BCPos i, sizebc = pt->sizebc;; + BCPos i, sizebc = pt->sizebc; pt->flags &= ~PROTO_ILOOP; if (bc_op(bc[0]) == BC_IFUNCF) setbc_op(&bc[0], BC_FUNCF); @@ -205,27 +206,28 @@ static void trace_unpatch(jit_State *J, GCtrace *T) return; /* No need to unpatch branches in parent traces (yet). */ switch (bc_op(*pc)) { case BC_JFORL: - lua_assert(traceref(J, bc_d(*pc)) == T); + lj_assertJ(traceref(J, bc_d(*pc)) == T, "JFORL references other trace"); *pc = T->startins; pc += bc_j(T->startins); - lua_assert(bc_op(*pc) == BC_JFORI); + lj_assertJ(bc_op(*pc) == BC_JFORI, "FORL does not point to JFORI"); setbc_op(pc, BC_FORI); break; case BC_JITERL: case BC_JLOOP: - lua_assert(op == BC_ITERL || op == BC_LOOP || bc_isret(op)); + lj_assertJ(op == BC_ITERL || op == BC_LOOP || bc_isret(op), + "bad original bytecode %d", op); *pc = T->startins; break; case BC_JMP: - lua_assert(op == BC_ITERL); + lj_assertJ(op == BC_ITERL, "bad original bytecode %d", op); pc += bc_j(*pc)+2; if (bc_op(*pc) == BC_JITERL) { - lua_assert(traceref(J, bc_d(*pc)) == T); + lj_assertJ(traceref(J, bc_d(*pc)) == T, "JITERL references other trace"); *pc = T->startins; } break; case BC_JFUNCF: - lua_assert(op == BC_FUNCF); + lj_assertJ(op == BC_FUNCF, "bad original bytecode %d", op); *pc = T->startins; break; default: /* Already unpatched. */ @@ -237,7 +239,8 @@ static void trace_unpatch(jit_State *J, GCtrace *T) static void trace_flushroot(jit_State *J, GCtrace *T) { GCproto *pt = &gcref(T->startpt)->pt; - lua_assert(T->root == 0 && pt != NULL); + lj_assertJ(T->root == 0, "not a root trace"); + lj_assertJ(pt != NULL, "trace has no prototype"); /* First unpatch any modified bytecode. */ trace_unpatch(J, T); /* Unlink root trace from chain anchored in prototype. */ @@ -353,7 +356,8 @@ void lj_trace_freestate(global_State *g) { /* This assumes all traces have already been freed. */ ptrdiff_t i; for (i = 1; i < (ptrdiff_t)J->sizetrace; i++) - lua_assert(i == (ptrdiff_t)J->cur.traceno || traceref(J, i) == NULL); + lj_assertG(i == (ptrdiff_t)J->cur.traceno || traceref(J, i) == NULL, + "trace still allocated"); } #endif lj_mcode_free(J); @@ -408,8 +412,9 @@ static void trace_start(jit_State *J) if ((J->pt->flags & PROTO_NOJIT)) { /* JIT disabled for this proto? */ if (J->parent == 0 && J->exitno == 0) { /* Lazy bytecode patching to disable hotcount events. */ - lua_assert(bc_op(*J->pc) == BC_FORL || bc_op(*J->pc) == BC_ITERL || - bc_op(*J->pc) == BC_LOOP || bc_op(*J->pc) == BC_FUNCF); + lj_assertJ(bc_op(*J->pc) == BC_FORL || bc_op(*J->pc) == BC_ITERL || + bc_op(*J->pc) == BC_LOOP || bc_op(*J->pc) == BC_FUNCF, + "bad hot bytecode %d", bc_op(*J->pc)); setbc_op(J->pc, (int)bc_op(*J->pc)+(int)BC_ILOOP-(int)BC_LOOP); J->pt->flags |= PROTO_ILOOP; } @@ -420,7 +425,8 @@ static void trace_start(jit_State *J) /* Get a new trace number. */ traceno = trace_findfree(J); if (LJ_UNLIKELY(traceno == 0)) { /* No free trace? */ - lua_assert((J2G(J)->hookmask & HOOK_GC) == 0); + lj_assertJ((J2G(J)->hookmask & HOOK_GC) == 0, + "recorder called from GC hook"); lj_trace_flushall(J->L); J->state = LJ_TRACE_IDLE; /* Silently ignored. */ return; @@ -496,7 +502,7 @@ static void trace_stop(jit_State *J) goto addroot; case BC_JMP: /* Patch exit branch in parent to side trace entry. */ - lua_assert(J->parent != 0 && J->cur.root != 0); + lj_assertJ(J->parent != 0 && J->cur.root != 0, "not a side trace"); lj_asm_patchexit(J, traceref(J, J->parent), J->exitno, J->cur.mcode); /* Avoid compiling a side trace twice (stack resizing uses parent exit). */ traceref(J, J->parent)->snap[J->exitno].count = SNAPCOUNT_DONE; @@ -515,7 +521,7 @@ static void trace_stop(jit_State *J) traceref(J, J->exitno)->link = traceno; break; default: - lua_assert(0); + lj_assertJ(0, "bad stop bytecode %d", op); break; } @@ -536,8 +542,8 @@ static void trace_stop(jit_State *J) static int trace_downrec(jit_State *J) { /* Restart recording at the return instruction. */ - lua_assert(J->pt != NULL); - lua_assert(bc_isret(bc_op(*J->pc))); + lj_assertJ(J->pt != NULL, "no active prototype"); + lj_assertJ(bc_isret(bc_op(*J->pc)), "not at a return bytecode"); if (bc_op(*J->pc) == BC_RETM) return 0; /* NYI: down-recursion with RETM. */ J->parent = 0; @@ -750,7 +756,7 @@ static void trace_hotside(jit_State *J, const BCIns *pc) isluafunc(curr_func(J->L)) && snap->count != SNAPCOUNT_DONE && ++snap->count >= J->param[JIT_P_hotexit]) { - lua_assert(J->state == LJ_TRACE_IDLE); + lj_assertJ(J->state == LJ_TRACE_IDLE, "hot side exit while recording"); /* J->parent is non-zero for a side trace. */ J->state = LJ_TRACE_START; lj_trace_ins(J, pc); @@ -822,7 +828,7 @@ static TraceNo trace_exit_find(jit_State *J, MCode *pc) if (T && pc >= T->mcode && pc < (MCode *)((char *)T->mcode + T->szmcode)) return traceno; } - lua_assert(0); + lj_assertJ(0, "bad exit pc"); return 0; } #endif @@ -844,13 +850,13 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr) T = traceref(J, J->parent); UNUSED(T); #ifdef EXITSTATE_CHECKEXIT if (J->exitno == T->nsnap) { /* Treat stack check like a parent exit. */ - lua_assert(T->root != 0); + lj_assertJ(T->root != 0, "stack check in root trace"); J->exitno = T->ir[REF_BASE].op2; J->parent = T->ir[REF_BASE].op1; T = traceref(J, J->parent); } #endif - lua_assert(T != NULL && J->exitno < T->nsnap); + lj_assertJ(T != NULL && J->exitno < T->nsnap, "bad trace or exit number"); exd.J = J; exd.exptr = exptr; errcode = lj_vm_cpcall(L, NULL, &exd, trace_exit_cp); diff --git a/src/lj_vmmath.c b/src/lj_vmmath.c index 623a686d8..9ed37bf2b 100644 --- a/src/lj_vmmath.c +++ b/src/lj_vmmath.c @@ -60,7 +60,8 @@ double lj_vm_foldarith(double x, double y, int op) int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b) { uint32_t y, ua, ub; - lua_assert(b != 0); /* This must be checked before using this function. */ + /* This must be checked before using this function. */ + lj_assertX(b != 0, "modulo with zero divisor"); ua = a < 0 ? (uint32_t)-a : (uint32_t)a; ub = b < 0 ? (uint32_t)-b : (uint32_t)b; y = ua % ub; @@ -84,7 +85,7 @@ double lj_vm_log2(double a) static double lj_vm_powui(double x, uint32_t k) { double y; - lua_assert(k != 0); + lj_assertX(k != 0, "pow with zero exponent"); for (; (k & 1) == 0; k >>= 1) x *= x; y = x; if ((k >>= 1) != 0) { @@ -123,7 +124,7 @@ double lj_vm_foldfpm(double x, int fpm) case IRFPM_SQRT: return sqrt(x); case IRFPM_LOG: return log(x); case IRFPM_LOG2: return lj_vm_log2(x); - default: lua_assert(0); + default: lj_assertX(0, "bad fpm %d", fpm); } return 0; } diff --git a/src/ljamalg.c b/src/ljamalg.c index 6712d4354..19980241b 100644 --- a/src/ljamalg.c +++ b/src/ljamalg.c @@ -18,6 +18,7 @@ #include "lua.h" #include "lauxlib.h" +#include "lj_assert.c" #include "lj_gc.c" #include "lj_err.c" #include "lj_char.c" diff --git a/src/luaconf.h b/src/luaconf.h index d422827a7..18fb961da 100644 --- a/src/luaconf.h +++ b/src/luaconf.h @@ -136,7 +136,7 @@ #define LUALIB_API LUA_API -/* Support for internal assertions. */ +/* Compatibility support for assertions. */ #if defined(LUA_USE_ASSERT) || defined(LUA_USE_APICHECK) #include #endif From d333b1a6febf35f891fcd6e121876c9c3efee1e0 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sat, 13 Jun 2020 01:12:14 +0200 Subject: [PATCH 13/47] ARM: Implement FLOAD from GG_State. --- src/lj_asm_arm.h | 21 +++++++++++---------- src/lj_ffrecord.c | 6 ------ src/lj_record.c | 5 ----- 3 files changed, 11 insertions(+), 21 deletions(-) diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h index 56ce4a079..d2fad1418 100644 --- a/src/lj_asm_arm.h +++ b/src/lj_asm_arm.h @@ -1007,14 +1007,15 @@ static ARMIns asm_fxstoreins(ASMState *as, IRIns *ir) static void asm_fload(ASMState *as, IRIns *ir) { + Reg dest = ra_dest(as, ir, RSET_GPR); + ARMIns ai = asm_fxloadins(as, ir); + Reg idx; + int32_t ofs; if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */ - /* We can end up here if DCE is turned off. */ - lj_assertA(!ra_used(ir), "NYI FLOAD GG_State"); + idx = ra_allock(as, (int32_t)(ir->op2<<2) + (int32_t)J2GG(as->J), RSET_GPR); + ofs = 0; } else { - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg idx = ra_alloc1(as, ir->op1, RSET_GPR); - ARMIns ai = asm_fxloadins(as, ir); - int32_t ofs; + idx = ra_alloc1(as, ir->op1, RSET_GPR); if (ir->op2 == IRFL_TAB_ARRAY) { ofs = asm_fuseabase(as, ir->op1); if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ @@ -1023,11 +1024,11 @@ static void asm_fload(ASMState *as, IRIns *ir) } } ofs = field_ofs[ir->op2]; - if ((ai & 0x04000000)) - emit_lso(as, ai, dest, idx, ofs); - else - emit_lsox(as, ai, dest, idx, ofs); } + if ((ai & 0x04000000)) + emit_lso(as, ai, dest, idx, ofs); + else + emit_lsox(as, ai, dest, idx, ofs); } static void asm_fstore(ASMState *as, IRIns *ir) diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c index 2557cadff..d34340cef 100644 --- a/src/lj_ffrecord.c +++ b/src/lj_ffrecord.c @@ -1086,13 +1086,7 @@ static TRef recff_io_fp(jit_State *J, TRef *udp, int32_t id) { TRef tr, ud, fp; if (id) { /* io.func() */ -#if LJ_GC64 - /* TODO: fix ARM32 asm_fload(), so we can use this for all archs. */ ud = lj_ir_ggfload(J, IRT_UDATA, GG_OFS(g.gcroot[id])); -#else - tr = lj_ir_kptr(J, &J2G(J)->gcroot[id]); - ud = emitir(IRT(IR_XLOAD, IRT_UDATA), tr, 0); -#endif } else { /* fp:method() */ ud = J->base[0]; if (!tref_isudata(ud)) diff --git a/src/lj_record.c b/src/lj_record.c index 2a4a766e5..cfa48ecf6 100644 --- a/src/lj_record.c +++ b/src/lj_record.c @@ -1009,13 +1009,8 @@ int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm) } /* The cdata metatable is treated as immutable. */ if (LJ_HASFFI && tref_iscdata(ix->tab)) goto immutable_mt; -#if LJ_GC64 - /* TODO: fix ARM32 asm_fload(), so we can use this for all archs. */ ix->mt = mix.tab = lj_ir_ggfload(J, IRT_TAB, GG_OFS(g.gcroot[GCROOT_BASEMT+itypemap(&ix->tabv)])); -#else - ix->mt = mix.tab = lj_ir_ktab(J, mt); -#endif goto nocheck; } ix->mt = mt ? mix.tab : TREF_NIL; From 34e53736c6ed90dc56357aff22009e88b443ecfd Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Mon, 15 Jun 2020 11:23:04 +0200 Subject: [PATCH 14/47] Cleanup some arch defines and fix builds. --- src/Makefile | 3 +-- src/lj_alloc.c | 2 +- src/lj_arch.h | 31 +++++++++++++++++++++++-------- src/lj_gdbjit.c | 2 +- src/ps4build.bat | 4 ++-- src/vm_x64.dasc | 2 +- src/vm_x86.dasc | 2 +- src/xb1build.bat | 2 +- 8 files changed, 31 insertions(+), 17 deletions(-) diff --git a/src/Makefile b/src/Makefile index a96c19972..667fff563 100644 --- a/src/Makefile +++ b/src/Makefile @@ -212,7 +212,7 @@ TARGET_CC= $(STATIC_CC) TARGET_STCC= $(STATIC_CC) TARGET_DYNCC= $(DYNAMIC_CC) TARGET_LD= $(CROSS)$(CC) -TARGET_AR= $(CROSS)ar rcus +TARGET_AR= $(CROSS)ar rcus 2>/dev/null TARGET_STRIP= $(CROSS)strip TARGET_LIBPATH= $(or $(PREFIX),/usr/local)/$(or $(MULTILIB),lib) @@ -312,7 +312,6 @@ ifeq (Windows,$(TARGET_SYS)) TARGET_XSHLDFLAGS= -shared -Wl,--out-implib,$(TARGET_DLLDOTANAME) TARGET_DYNXLDOPTS= else - TARGET_AR+= 2>/dev/null ifeq (,$(shell $(TARGET_CC) -o /dev/null -c -x c /dev/null -fno-stack-protector 2>/dev/null || echo 1)) TARGET_XCFLAGS+= -fno-stack-protector endif diff --git a/src/lj_alloc.c b/src/lj_alloc.c index a12ec8b46..70ca1e3bb 100644 --- a/src/lj_alloc.c +++ b/src/lj_alloc.c @@ -302,7 +302,7 @@ static void *mmap_probe(size_t size) #if LJ_ALLOC_MMAP32 -#if defined(__sun__) +#if LJ_TARGET_SOLARIS #define LJ_ALLOC_MMAP32_START ((uintptr_t)0x1000) #else #define LJ_ALLOC_MMAP32_START ((uintptr_t)0) diff --git a/src/lj_arch.h b/src/lj_arch.h index d65bc551e..3e3581c21 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h @@ -8,6 +8,8 @@ #include "lua.h" +/* -- Target definitions -------------------------------------------------- */ + /* Target endianess. */ #define LUAJIT_LE 0 #define LUAJIT_BE 1 @@ -38,6 +40,14 @@ #define LUAJIT_OS_BSD 4 #define LUAJIT_OS_POSIX 5 +/* Number mode. */ +#define LJ_NUMMODE_SINGLE 0 /* Single-number mode only. */ +#define LJ_NUMMODE_SINGLE_DUAL 1 /* Default to single-number mode. */ +#define LJ_NUMMODE_DUAL 2 /* Dual-number mode only. */ +#define LJ_NUMMODE_DUAL_SINGLE 3 /* Default to dual-number mode. */ + +/* -- Target detection ---------------------------------------------------- */ + /* Select native target if no target defined. */ #ifndef LUAJIT_TARGET @@ -74,7 +84,10 @@ defined(__NetBSD__) || defined(__OpenBSD__) || \ defined(__DragonFly__)) && !defined(__ORBIS__) #define LUAJIT_OS LUAJIT_OS_BSD -#elif (defined(__sun__) && defined(__svr4__)) || defined(__HAIKU__) +#elif (defined(__sun__) && defined(__svr4__)) +#define LJ_TARGET_SOLARIS 1 +#define LUAJIT_OS LUAJIT_OS_POSIX +#elif defined(__HAIKU__) #define LUAJIT_OS LUAJIT_OS_POSIX #elif defined(__CYGWIN__) #define LJ_TARGET_CYGWIN 1 @@ -103,6 +116,7 @@ #define LJ_TARGET_WINDOWS (LUAJIT_OS == LUAJIT_OS_WINDOWS) #define LJ_TARGET_LINUX (LUAJIT_OS == LUAJIT_OS_LINUX) #define LJ_TARGET_OSX (LUAJIT_OS == LUAJIT_OS_OSX) +#define LJ_TARGET_BSD (LUAJIT_OS == LUAJIT_OS_BSD) #define LJ_TARGET_IOS (LJ_TARGET_OSX && (LUAJIT_TARGET == LUAJIT_ARCH_ARM || LUAJIT_TARGET == LUAJIT_ARCH_ARM64)) #define LJ_TARGET_POSIX (LUAJIT_OS > LUAJIT_OS_WINDOWS) #define LJ_TARGET_DLOPEN LJ_TARGET_POSIX @@ -142,10 +156,7 @@ #endif #endif -#define LJ_NUMMODE_SINGLE 0 /* Single-number mode only. */ -#define LJ_NUMMODE_SINGLE_DUAL 1 /* Default to single-number mode. */ -#define LJ_NUMMODE_DUAL 2 /* Dual-number mode only. */ -#define LJ_NUMMODE_DUAL_SINGLE 3 /* Default to dual-number mode. */ +/* -- Arch-specific settings ---------------------------------------------- */ /* Set target architecture properties. */ #if LUAJIT_TARGET == LUAJIT_ARCH_X86 @@ -407,9 +418,7 @@ #error "No target architecture defined" #endif -#ifndef LJ_PAGESIZE -#define LJ_PAGESIZE 4096 -#endif +/* -- Checks for requirements --------------------------------------------- */ /* Check for minimum required compiler versions. */ #if defined(__GNUC__) @@ -485,6 +494,8 @@ #endif #endif +/* -- Derived defines ----------------------------------------------------- */ + /* Enable or disable the dual-number mode for the VM. */ #if (LJ_ARCH_NUMMODE == LJ_NUMMODE_SINGLE && LUAJIT_NUMMODE == 2) || \ (LJ_ARCH_NUMMODE == LJ_NUMMODE_DUAL && LUAJIT_NUMMODE == 1) @@ -582,6 +593,10 @@ #define LJ_TARGET_UNALIGNED 0 #endif +#ifndef LJ_PAGESIZE +#define LJ_PAGESIZE 4096 +#endif + /* Various workarounds for embedded operating systems or weak C runtimes. */ #if defined(__ANDROID__) || defined(__symbian__) || LJ_TARGET_XBOX360 || LJ_TARGET_WINDOWS #define LUAJIT_NO_LOG2 diff --git a/src/lj_gdbjit.c b/src/lj_gdbjit.c index 69585e512..5b9fe0ad8 100644 --- a/src/lj_gdbjit.c +++ b/src/lj_gdbjit.c @@ -363,7 +363,7 @@ static const ELFheader elfhdr_template = { .eosabi = 12, #elif defined(__DragonFly__) .eosabi = 0, -#elif (defined(__sun__) && defined(__svr4__)) +#elif LJ_TARGET_SOLARIS .eosabi = 6, #else .eosabi = 0, diff --git a/src/ps4build.bat b/src/ps4build.bat index e4a7defe7..e83c674ab 100644 --- a/src/ps4build.bat +++ b/src/ps4build.bat @@ -27,12 +27,12 @@ @set DASMDIR=..\dynasm @set DASM=%DASMDIR%\dynasm.lua @set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c -@set GC64=-DLUAJIT_ENABLE_GC64 +@set GC64= @set DASC=vm_x64.dasc @if "%1" neq "gc32" goto :NOGC32 @shift -@set GC64= +@set GC64=-DLUAJIT_DISABLE_GC64 @set DASC=vm_x86.dasc :NOGC32 diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc index c714f4c7b..77a579d57 100644 --- a/src/vm_x64.dasc +++ b/src/vm_x64.dasc @@ -4734,7 +4734,7 @@ static void emit_asm_debug(BuildCtx *ctx) ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); #endif #if !LJ_NO_UNWIND -#if (defined(__sun__) && defined(__svr4__)) +#if LJ_TARGET_SOLARIS fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@unwind\n"); #else fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n"); diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc index c3999e7c7..57c8e4fcb 100644 --- a/src/vm_x86.dasc +++ b/src/vm_x86.dasc @@ -5548,7 +5548,7 @@ static void emit_asm_debug(BuildCtx *ctx) ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); #endif #if !LJ_NO_UNWIND -#if (defined(__sun__) && defined(__svr4__)) +#if LJ_TARGET_SOLARIS #if LJ_64 fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@unwind\n"); #else diff --git a/src/xb1build.bat b/src/xb1build.bat index 847e84a55..12c73dd66 100644 --- a/src/xb1build.bat +++ b/src/xb1build.bat @@ -9,7 +9,7 @@ @setlocal @echo ---- Host compiler ---- -@set LJCOMPILE=cl /nologo /c /MD /O2 /W3 /D_CRT_SECURE_NO_DEPRECATE /DLUAJIT_ENABLE_GC64 +@set LJCOMPILE=cl /nologo /c /MD /O2 /W3 /D_CRT_SECURE_NO_DEPRECATE @set LJLINK=link /nologo @set LJMT=mt /nologo @set DASMDIR=..\dynasm From a44f53acf53603e7d9b88352de035b1804be4e88 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Mon, 15 Jun 2020 12:21:05 +0200 Subject: [PATCH 15/47] Use a securely seeded global PRNG for the VM. It's not 2005 anymore. --- src/Makefile | 4 +- src/Makefile.dep | 50 ++++++----- src/lib_aux.c | 8 +- src/lib_math.c | 53 +++-------- src/lj_alloc.c | 83 ++++++++--------- src/lj_alloc.h | 3 +- src/lj_arch.h | 15 ++++ src/lj_def.h | 5 ++ src/lj_ffrecord.c | 2 +- src/lj_ir.c | 2 +- src/lj_ircall.h | 2 +- src/lj_jit.h | 13 +-- src/lj_lib.h | 5 -- src/lj_mcode.c | 41 +++------ src/lj_obj.h | 1 + src/lj_prng.c | 225 ++++++++++++++++++++++++++++++++++++++++++++++ src/lj_prng.h | 24 +++++ src/lj_record.c | 3 +- src/lj_state.c | 38 ++++++-- src/lj_state.h | 2 + src/lj_trace.c | 3 +- src/ljamalg.c | 1 + 22 files changed, 410 insertions(+), 173 deletions(-) create mode 100644 src/lj_prng.c create mode 100644 src/lj_prng.h diff --git a/src/Makefile b/src/Makefile index 667fff563..6a9de5db1 100644 --- a/src/Makefile +++ b/src/Makefile @@ -485,8 +485,8 @@ LJLIB_C= $(LJLIB_O:.o=.c) LJCORE_O= lj_assert.o lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \ lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o lj_debug.o \ - lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_strscan.o \ - lj_strfmt.o lj_strfmt_num.o lj_api.o lj_profile.o \ + lj_prng.o lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o \ + lj_strscan.o lj_strfmt.o lj_strfmt_num.o lj_api.o lj_profile.o \ lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \ lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \ lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \ diff --git a/src/Makefile.dep b/src/Makefile.dep index 03dba96b5..3f26599ed 100644 --- a/src/Makefile.dep +++ b/src/Makefile.dep @@ -1,6 +1,6 @@ lib_aux.o: lib_aux.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \ lj_arch.h lj_err.h lj_errmsg.h lj_state.h lj_trace.h lj_jit.h lj_ir.h \ - lj_dispatch.h lj_bc.h lj_traceerr.h lj_lib.h lj_alloc.h + lj_dispatch.h lj_bc.h lj_traceerr.h lj_lib.h lib_base.o: lib_base.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h \ lj_tab.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cconv.h \ @@ -28,7 +28,7 @@ lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ lj_target.h lj_target_*.h lj_trace.h lj_dispatch.h lj_traceerr.h \ lj_vm.h lj_vmevent.h lj_lib.h luajit.h lj_libdef.h lib_math.o: lib_math.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ - lj_def.h lj_arch.h lj_lib.h lj_vm.h lj_libdef.h + lj_def.h lj_arch.h lj_lib.h lj_vm.h lj_prng.h lj_libdef.h lib_os.o: lib_os.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_lib.h \ lj_libdef.h @@ -41,7 +41,8 @@ lib_string.o: lib_string.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ lib_table.o: lib_table.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \ lj_tab.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h -lj_alloc.o: lj_alloc.c lj_def.h lua.h luaconf.h lj_arch.h lj_alloc.h +lj_alloc.o: lj_alloc.c lj_def.h lua.h luaconf.h lj_arch.h lj_alloc.h \ + lj_prng.h lj_api.o: lj_api.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_func.h lj_udata.h \ lj_meta.h lj_state.h lj_bc.h lj_frame.h lj_trace.h lj_jit.h lj_ir.h \ @@ -126,7 +127,7 @@ lj_gdbjit.o: lj_gdbjit.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ lj_buf.h lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \ lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h lj_cdata.h \ - lj_carith.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lib.h + lj_carith.h lj_vm.h lj_strscan.h lj_strfmt.h lj_prng.h lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_ctype.h lj_cdata.h \ lualib.h lj_state.h lj_lex.h lj_parse.h lj_char.h lj_strscan.h \ @@ -140,7 +141,7 @@ lj_load.o: lj_load.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \ lj_frame.h lj_bc.h lj_vm.h lj_lex.h lj_bcdump.h lj_parse.h lj_mcode.o: lj_mcode.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_gc.h lj_err.h lj_errmsg.h lj_jit.h lj_ir.h lj_mcode.h lj_trace.h \ - lj_dispatch.h lj_bc.h lj_traceerr.h lj_vm.h + lj_dispatch.h lj_bc.h lj_traceerr.h lj_prng.h lj_vm.h lj_meta.o: lj_meta.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_meta.h lj_frame.h \ lj_bc.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lib.h @@ -172,11 +173,12 @@ lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_profile.o: lj_profile.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_buf.h lj_gc.h lj_str.h lj_frame.h lj_bc.h lj_debug.h lj_dispatch.h \ lj_jit.h lj_ir.h lj_trace.h lj_traceerr.h lj_profile.h luajit.h +lj_prng.o: lj_prng.c lj_def.h lua.h luaconf.h lj_arch.h lj_prng.h lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \ lj_ctype.h lj_gc.h lj_ff.h lj_ffdef.h lj_debug.h lj_ir.h lj_jit.h \ lj_ircall.h lj_iropt.h lj_trace.h lj_dispatch.h lj_traceerr.h \ - lj_record.h lj_ffrecord.h lj_snap.h lj_vm.h + lj_record.h lj_ffrecord.h lj_snap.h lj_vm.h lj_prng.h lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ lj_tab.h lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h \ lj_trace.h lj_dispatch.h lj_traceerr.h lj_snap.h lj_target.h \ @@ -184,7 +186,8 @@ lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_func.h \ lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_trace.h lj_jit.h \ - lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_lex.h lj_alloc.h luajit.h + lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_prng.h lj_lex.h \ + lj_alloc.h luajit.h lj_str.o: lj_str.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ lj_err.h lj_errmsg.h lj_str.h lj_char.h lj_strfmt.o: lj_strfmt.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ @@ -199,7 +202,7 @@ lj_trace.o: lj_trace.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_frame.h lj_bc.h \ lj_state.h lj_ir.h lj_jit.h lj_iropt.h lj_mcode.h lj_trace.h \ lj_dispatch.h lj_traceerr.h lj_snap.h lj_gdbjit.h lj_record.h lj_asm.h \ - lj_vm.h lj_vmevent.h lj_target.h lj_target_*.h + lj_vm.h lj_vmevent.h lj_target.h lj_target_*.h lj_prng.h lj_udata.o: lj_udata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_gc.h lj_udata.h lj_vmevent.o: lj_vmevent.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ @@ -214,21 +217,22 @@ ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_assert.c lj_obj.h \ lj_traceerr.h lj_vm.h lj_err.c lj_debug.h lj_ff.h lj_ffdef.h lj_strfmt.h \ lj_char.c lj_char.h lj_bc.c lj_bcdef.h lj_obj.c lj_buf.c lj_str.c \ lj_tab.c lj_func.c lj_udata.c lj_meta.c lj_strscan.h lj_lib.h lj_debug.c \ - lj_state.c lj_lex.h lj_alloc.h luajit.h lj_dispatch.c lj_ccallback.h \ - lj_profile.h lj_vmevent.c lj_vmevent.h lj_vmmath.c lj_strscan.c \ - lj_strfmt.c lj_strfmt_num.c lj_api.c lj_profile.c lj_lex.c lualib.h \ - lj_parse.h lj_parse.c lj_bcread.c lj_bcdump.h lj_bcwrite.c lj_load.c \ - lj_ctype.c lj_cdata.c lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h \ - lj_ccallback.c lj_target.h lj_target_*.h lj_mcode.h lj_carith.c \ - lj_carith.h lj_clib.c lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_ir.c \ - lj_ircall.h lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h \ - lj_opt_narrow.c lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c \ - lj_opt_sink.c lj_mcode.c lj_snap.c lj_record.c lj_record.h lj_ffrecord.h \ - lj_crecord.c lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h \ - lj_emit_*.h lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c \ - lib_aux.c lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c \ - lib_io.c lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c \ - lib_ffi.c lib_init.c + lj_prng.c lj_prng.h lj_state.c lj_lex.h lj_alloc.h luajit.h \ + lj_dispatch.c lj_ccallback.h lj_profile.h lj_vmevent.c lj_vmevent.h \ + lj_vmmath.c lj_strscan.c lj_strfmt.c lj_strfmt_num.c lj_api.c \ + lj_profile.c lj_lex.c lualib.h lj_parse.h lj_parse.c lj_bcread.c \ + lj_bcdump.h lj_bcwrite.c lj_load.c lj_ctype.c lj_cdata.c lj_cconv.h \ + lj_cconv.c lj_ccall.c lj_ccall.h lj_ccallback.c lj_target.h \ + lj_target_*.h lj_mcode.h lj_carith.c lj_carith.h lj_clib.c lj_clib.h \ + lj_cparse.c lj_cparse.h lj_lib.c lj_ir.c lj_ircall.h lj_iropt.h \ + lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c lj_opt_dce.c \ + lj_opt_loop.c lj_snap.h lj_opt_split.c lj_opt_sink.c lj_mcode.c \ + lj_snap.c lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c \ + lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h \ + lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c \ + lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c \ + lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c lib_ffi.c \ + lib_init.c luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \ lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \ diff --git a/src/lib_aux.c b/src/lib_aux.c index 8f10e23cb..35866f8d7 100644 --- a/src/lib_aux.c +++ b/src/lib_aux.c @@ -345,17 +345,13 @@ LUALIB_API lua_State *luaL_newstate(void) #else -#include "lj_alloc.h" - LUALIB_API lua_State *luaL_newstate(void) { lua_State *L; - void *ud = lj_alloc_create(); - if (ud == NULL) return NULL; #if LJ_64 && !LJ_GC64 - L = lj_state_newstate(lj_alloc_f, ud); + L = lj_state_newstate(LJ_ALLOCF_INTERNAL, NULL); #else - L = lua_newstate(lj_alloc_f, ud); + L = lua_newstate(LJ_ALLOCF_INTERNAL, NULL); #endif if (L) G(L)->panic = panic; return L; diff --git a/src/lib_math.c b/src/lib_math.c index 4cc2ba6e2..95b7d4609 100644 --- a/src/lib_math.c +++ b/src/lib_math.c @@ -15,6 +15,7 @@ #include "lj_obj.h" #include "lj_lib.h" #include "lj_vm.h" +#include "lj_prng.h" /* ------------------------------------------------------------------------ */ @@ -105,34 +106,11 @@ LJLIB_PUSH(1e310) LJLIB_SET(huge) ** Full-period ME-CF generator with L=64, J=4, k=223, N1=49. */ -/* PRNG state. */ -struct RandomState { - uint64_t gen[4]; /* State of the 4 LFSR generators. */ - int valid; /* State is valid. */ -}; - /* Union needed for bit-pattern conversion between uint64_t and double. */ typedef union { uint64_t u64; double d; } U64double; -/* Update generator i and compute a running xor of all states. */ -#define TW223_GEN(i, k, q, s) \ - z = rs->gen[i]; \ - z = (((z<> (k-s)) ^ ((z&((uint64_t)(int64_t)-1 << (64-k)))<gen[i] = z; - -/* PRNG step function. Returns a double in the range 1.0 <= d < 2.0. */ -LJ_NOINLINE uint64_t LJ_FASTCALL lj_math_random_step(RandomState *rs) -{ - uint64_t z, r = 0; - TW223_GEN(0, 63, 31, 18) - TW223_GEN(1, 58, 19, 28) - TW223_GEN(2, 55, 24, 7) - TW223_GEN(3, 47, 21, 8) - return (r & U64x(000fffff,ffffffff)) | U64x(3ff00000,00000000); -} - -/* PRNG initialization function. */ -static void random_init(RandomState *rs, double d) +/* PRNG seeding function. */ +static void random_seed(PRNGState *rs, double d) { uint32_t r = 0x11090601; /* 64-k[i] as four 8 bit constants. */ int i; @@ -141,24 +119,22 @@ static void random_init(RandomState *rs, double d) uint32_t m = 1u << (r&255); r >>= 8; u.d = d = d * 3.14159265358979323846 + 2.7182818284590452354; - if (u.u64 < m) u.u64 += m; /* Ensure k[i] MSB of gen[i] are non-zero. */ - rs->gen[i] = u.u64; + if (u.u64 < m) u.u64 += m; /* Ensure k[i] MSB of u[i] are non-zero. */ + rs->u[i] = u.u64; } - rs->valid = 1; for (i = 0; i < 10; i++) - lj_math_random_step(rs); + (void)lj_prng_u64(rs); } /* PRNG extract function. */ -LJLIB_PUSH(top-2) /* Upvalue holds userdata with RandomState. */ +LJLIB_PUSH(top-2) /* Upvalue holds userdata with PRNGState. */ LJLIB_CF(math_random) LJLIB_REC(.) { int n = (int)(L->top - L->base); - RandomState *rs = (RandomState *)(uddata(udataV(lj_lib_upvalue(L, 1)))); + PRNGState *rs = (PRNGState *)(uddata(udataV(lj_lib_upvalue(L, 1)))); U64double u; double d; - if (LJ_UNLIKELY(!rs->valid)) random_init(rs, 0.0); - u.u64 = lj_math_random_step(rs); + u.u64 = lj_prng_u64d(rs); d = u.d - 1.0; if (n > 0) { #if LJ_DUALNUM @@ -203,11 +179,11 @@ LJLIB_CF(math_random) LJLIB_REC(.) } /* PRNG seed function. */ -LJLIB_PUSH(top-2) /* Upvalue holds userdata with RandomState. */ +LJLIB_PUSH(top-2) /* Upvalue holds userdata with PRNGState. */ LJLIB_CF(math_randomseed) { - RandomState *rs = (RandomState *)(uddata(udataV(lj_lib_upvalue(L, 1)))); - random_init(rs, lj_lib_checknum(L, 1)); + PRNGState *rs = (PRNGState *)(uddata(udataV(lj_lib_upvalue(L, 1)))); + random_seed(rs, lj_lib_checknum(L, 1)); return 0; } @@ -217,9 +193,8 @@ LJLIB_CF(math_randomseed) LUALIB_API int luaopen_math(lua_State *L) { - RandomState *rs; - rs = (RandomState *)lua_newuserdata(L, sizeof(RandomState)); - rs->valid = 0; /* Use lazy initialization to save some time on startup. */ + PRNGState *rs = (PRNGState *)lua_newuserdata(L, sizeof(PRNGState)); + lj_prng_seed_fixed(rs); LJ_LIB_REG(L, LUA_MATHLIBNAME, math); return 1; } diff --git a/src/lj_alloc.c b/src/lj_alloc.c index 70ca1e3bb..bf2ae8477 100644 --- a/src/lj_alloc.c +++ b/src/lj_alloc.c @@ -31,6 +31,7 @@ #include "lj_def.h" #include "lj_arch.h" #include "lj_alloc.h" +#include "lj_prng.h" #ifndef LUAJIT_USE_SYSMALLOC @@ -140,7 +141,7 @@ static void init_mmap(void) #define INIT_MMAP() init_mmap() /* Win64 32 bit MMAP via NtAllocateVirtualMemory. */ -static void *CALL_MMAP(size_t size) +static void *mmap_plain(size_t size) { DWORD olderr = GetLastError(); void *ptr = NULL; @@ -164,7 +165,7 @@ static void *direct_mmap(size_t size) #else /* Win32 MMAP via VirtualAlloc */ -static void *CALL_MMAP(size_t size) +static void *mmap_plain(size_t size) { DWORD olderr = GetLastError(); void *ptr = LJ_WIN_VALLOC(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); @@ -184,7 +185,8 @@ static void *direct_mmap(size_t size) #endif -#define DIRECT_MMAP(size) direct_mmap(size) +#define CALL_MMAP(prng, size) mmap_plain(size) +#define DIRECT_MMAP(prng, size) direct_mmap(size) /* This function supports releasing coalesed segments */ static int CALL_MUNMAP(void *ptr, size_t size) @@ -228,30 +230,10 @@ static int CALL_MUNMAP(void *ptr, size_t size) #define LJ_ALLOC_MMAP_PROBE_LOWER ((uintptr_t)0x4000) -/* No point in a giant ifdef mess. Just try to open /dev/urandom. -** It doesn't really matter if this fails, since we get some ASLR bits from -** every unsuitable allocation, too. And we prefer linear allocation, anyway. -*/ -#include -#include - -static uintptr_t mmap_probe_seed(void) -{ - uintptr_t val; - int fd = open("/dev/urandom", O_RDONLY); - if (fd != -1) { - int ok = ((size_t)read(fd, &val, sizeof(val)) == sizeof(val)); - (void)close(fd); - if (ok) return val; - } - return 1; /* Punt. */ -} - -static void *mmap_probe(size_t size) +static void *mmap_probe(PRNGState *rs, size_t size) { /* Hint for next allocation. Doesn't need to be thread-safe. */ static uintptr_t hint_addr = 0; - static uintptr_t hint_prng = 0; int olderr = errno; int retry; for (retry = 0; retry < LJ_ALLOC_MMAP_PROBE_MAX; retry++) { @@ -283,15 +265,8 @@ static void *mmap_probe(size_t size) } } /* Finally, try pseudo-random probing. */ - if (LJ_UNLIKELY(hint_prng == 0)) { - hint_prng = mmap_probe_seed(); - } - /* The unsuitable address we got has some ASLR PRNG bits. */ - hint_addr ^= addr & ~((uintptr_t)(LJ_PAGESIZE-1)); - do { /* The PRNG itself is very weak, but see above. */ - hint_prng = hint_prng * 1103515245 + 12345; - hint_addr ^= hint_prng * (uintptr_t)LJ_PAGESIZE; - hint_addr &= (((uintptr_t)1 << LJ_ALLOC_MBITS)-1); + do { + hint_addr = lj_prng_u64(rs) & (((uintptr_t)1< nb)) { /* Check for wrap around 0 */ - char *mm = (char *)(DIRECT_MMAP(mmsize)); + char *mm = (char *)(DIRECT_MMAP(m->prng, mmsize)); if (mm != CMFAIL) { size_t offset = align_offset(chunk2mem(mm)); size_t psize = mmsize - offset - DIRECT_FOOT_PAD; @@ -853,6 +838,7 @@ static void *direct_alloc(size_t nb) return chunk2mem(p); } } + UNUSED(m); return NULL; } @@ -1001,7 +987,7 @@ static void *alloc_sys(mstate m, size_t nb) /* Directly map large chunks */ if (LJ_UNLIKELY(nb >= DEFAULT_MMAP_THRESHOLD)) { - void *mem = direct_alloc(nb); + void *mem = direct_alloc(m, nb); if (mem != 0) return mem; } @@ -1010,7 +996,7 @@ static void *alloc_sys(mstate m, size_t nb) size_t req = nb + TOP_FOOT_SIZE + SIZE_T_ONE; size_t rsize = granularity_align(req); if (LJ_LIKELY(rsize > nb)) { /* Fail if wraps around zero */ - char *mp = (char *)(CALL_MMAP(rsize)); + char *mp = (char *)(CALL_MMAP(m->prng, rsize)); if (mp != CMFAIL) { tbase = mp; tsize = rsize; @@ -1237,12 +1223,13 @@ static void *tmalloc_small(mstate m, size_t nb) /* ----------------------------------------------------------------------- */ -void *lj_alloc_create(void) +void *lj_alloc_create(PRNGState *rs) { size_t tsize = DEFAULT_GRANULARITY; char *tbase; INIT_MMAP(); - tbase = (char *)(CALL_MMAP(tsize)); + UNUSED(rs); + tbase = (char *)(CALL_MMAP(rs, tsize)); if (tbase != CMFAIL) { size_t msize = pad_request(sizeof(struct malloc_state)); mchunkptr mn; @@ -1261,6 +1248,12 @@ void *lj_alloc_create(void) return NULL; } +void lj_alloc_setprng(void *msp, PRNGState *rs) +{ + mstate ms = (mstate)msp; + ms->prng = rs; +} + void lj_alloc_destroy(void *msp) { mstate ms = (mstate)msp; diff --git a/src/lj_alloc.h b/src/lj_alloc.h index f87a7cf34..669f50b79 100644 --- a/src/lj_alloc.h +++ b/src/lj_alloc.h @@ -9,7 +9,8 @@ #include "lj_def.h" #ifndef LUAJIT_USE_SYSMALLOC -LJ_FUNC void *lj_alloc_create(void); +LJ_FUNC void *lj_alloc_create(PRNGState *rs); +LJ_FUNC void lj_alloc_setprng(void *msp, PRNGState *rs); LJ_FUNC void lj_alloc_destroy(void *msp); LJ_FUNC void *lj_alloc_f(void *msp, void *ptr, size_t osize, size_t nsize); #endif diff --git a/src/lj_arch.h b/src/lj_arch.h index 3e3581c21..626f6c13e 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h @@ -633,4 +633,19 @@ extern void *LJ_WIN_LOADLIBA(const char *path); #define LJ_52 0 #endif +/* -- VM security --------------------------------------------------------- */ + +/* Don't make any changes here. Instead build with: +** make "XCFLAGS=-DLUAJIT_SECURITY_flag=value" +*/ + +/* Security defaults. */ +#ifndef LUAJIT_SECURITY_PRNG +#define LUAJIT_SECURITY_PRNG 1 +#endif + +#ifndef LUAJIT_SECURITY_MCODE +#define LUAJIT_SECURITY_MCODE 1 +#endif + #endif diff --git a/src/lj_def.h b/src/lj_def.h index af0687c4f..e458c89f4 100644 --- a/src/lj_def.h +++ b/src/lj_def.h @@ -372,4 +372,9 @@ static LJ_AINLINE uint32_t lj_getu32(const void *v) extern void LJ_ASSERT_NAME(__LINE__)(int STATIC_ASSERTION_FAILED[(cond)?1:-1]) #endif +/* PRNG state. Need this here, details in lj_prng.h. */ +typedef struct PRNGState { + uint64_t u[4]; +} PRNGState; + #endif diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c index d34340cef..19da15a29 100644 --- a/src/lj_ffrecord.c +++ b/src/lj_ffrecord.c @@ -616,7 +616,7 @@ static void LJ_FASTCALL recff_math_random(jit_State *J, RecordFFData *rd) GCudata *ud = udataV(&J->fn->c.upvalue[0]); TRef tr, one; lj_ir_kgc(J, obj2gco(ud), IRT_UDATA); /* Prevent collection. */ - tr = lj_ir_call(J, IRCALL_lj_math_random_step, lj_ir_kptr(J, uddata(ud))); + tr = lj_ir_call(J, IRCALL_lj_prng_u64d, lj_ir_kptr(J, uddata(ud))); one = lj_ir_knum_one(J); tr = emitir(IRTN(IR_SUB), tr, one); if (J->base[0]) { diff --git a/src/lj_ir.c b/src/lj_ir.c index 600e432c9..b5e94eb84 100644 --- a/src/lj_ir.c +++ b/src/lj_ir.c @@ -31,7 +31,7 @@ #include "lj_vm.h" #include "lj_strscan.h" #include "lj_strfmt.h" -#include "lj_lib.h" +#include "lj_prng.h" /* Some local macros to save typing. Undef'd at the end. */ #define IR(ref) (&J->cur.ir[(ref)]) diff --git a/src/lj_ircall.h b/src/lj_ircall.h index dbc8c0dbb..58cebc5dd 100644 --- a/src/lj_ircall.h +++ b/src/lj_ircall.h @@ -172,7 +172,7 @@ typedef struct CCallInfo { _(ANY, lj_gc_step_jit, 2, FS, NIL, CCI_L) \ _(ANY, lj_gc_barrieruv, 2, FS, NIL, 0) \ _(ANY, lj_mem_newgco, 2, FS, PGC, CCI_L) \ - _(ANY, lj_math_random_step, 1, FS, NUM, CCI_CASTU64) \ + _(ANY, lj_prng_u64d, 1, FS, NUM, CCI_CASTU64) \ _(ANY, lj_vm_modi, 2, FN, INT, 0) \ _(ANY, log10, 1, N, NUM, XA_FP) \ _(ANY, exp, 1, N, NUM, XA_FP) \ diff --git a/src/lj_jit.h b/src/lj_jit.h index fa754b64a..b0d90fcd1 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h @@ -438,9 +438,9 @@ typedef struct jit_State { int32_t framedepth; /* Current frame depth. */ int32_t retdepth; /* Return frame depth (count of RETF). */ + uint32_t k32[LJ_K32__MAX]; /* Common 4 byte constants used by backends. */ TValue ksimd[LJ_KSIMD__MAX*2+1]; /* 16 byte aligned SIMD constants. */ - TValue k64[LJ_K64__MAX]; /* Common 8 byte constants used by backends. */ - uint32_t k32[LJ_K32__MAX]; /* Ditto for 4 byte constants. */ + TValue k64[LJ_K64__MAX]; /* Common 8 byte constants. */ IRIns *irbuf; /* Temp. IR instruction buffer. Biased with REF_BIAS. */ IRRef irtoplim; /* Upper limit of instuction buffer (biased). */ @@ -472,7 +472,6 @@ typedef struct jit_State { HotPenalty penalty[PENALTY_SLOTS]; /* Penalty slots. */ uint32_t penaltyslot; /* Round-robin index into penalty slots. */ - uint32_t prngstate; /* PRNG state. */ #ifdef LUAJIT_ENABLE_TABLE_BUMP RBCHashEntry rbchash[RBCHASH_SLOTS]; /* Reverse bytecode map. */ @@ -516,12 +515,4 @@ jit_State; #define lj_assertJ(c, ...) ((void)J) #endif -/* Trivial PRNG e.g. used for penalty randomization. */ -static LJ_AINLINE uint32_t LJ_PRNG_BITS(jit_State *J, int bits) -{ - /* Yes, this LCG is very weak, but that doesn't matter for our use case. */ - J->prngstate = J->prngstate * 1103515245 + 12345; - return J->prngstate >> (32-bits); -} - #endif diff --git a/src/lj_lib.h b/src/lj_lib.h index 83778b83c..496bdb2a8 100644 --- a/src/lj_lib.h +++ b/src/lj_lib.h @@ -107,9 +107,4 @@ LJ_FUNC int lj_lib_postreg(lua_State *L, lua_CFunction cf, int id, #define LIBINIT_FFID 0xfe #define LIBINIT_END 0xff -/* Exported library functions. */ - -typedef struct RandomState RandomState; -LJ_FUNC uint64_t LJ_FASTCALL lj_math_random_step(RandomState *rs); - #endif diff --git a/src/lj_mcode.c b/src/lj_mcode.c index e64c5878f..b2d121188 100644 --- a/src/lj_mcode.c +++ b/src/lj_mcode.c @@ -14,6 +14,7 @@ #include "lj_mcode.h" #include "lj_trace.h" #include "lj_dispatch.h" +#include "lj_prng.h" #endif #if LJ_HASJIT || LJ_HASFFI #include "lj_vm.h" @@ -118,52 +119,34 @@ static int mcode_setprot(void *p, size_t sz, int prot) return mprotect(p, sz, prot); } -#elif LJ_64 - -#error "Missing OS support for explicit placement of executable memory" - #else -/* Fallback allocator. This will fail if memory is not executable by default. */ -#define LUAJIT_UNPROTECT_MCODE -#define MCPROT_RW 0 -#define MCPROT_RX 0 -#define MCPROT_RWX 0 - -static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, int prot) -{ - UNUSED(hint); UNUSED(prot); - return lj_mem_new(J->L, sz); -} - -static void mcode_free(jit_State *J, void *p, size_t sz) -{ - lj_mem_free(J2G(J), p, sz); -} +#error "Missing OS support for explicit placement of executable memory" #endif /* -- MCode area protection ----------------------------------------------- */ -/* Define this ONLY if page protection twiddling becomes a bottleneck. */ -#ifdef LUAJIT_UNPROTECT_MCODE +#if LUAJIT_SECURITY_MCODE == 0 -/* It's generally considered to be a potential security risk to have +/* Define this ONLY if page protection twiddling becomes a bottleneck. +** +** It's generally considered to be a potential security risk to have ** pages with simultaneous write *and* execute access in a process. ** ** Do not even think about using this mode for server processes or -** apps handling untrusted external data (such as a browser). +** apps handling untrusted external data. ** ** The security risk is not in LuaJIT itself -- but if an adversary finds -** any *other* flaw in your C application logic, then any RWX memory page -** simplifies writing an exploit considerably. +** any *other* flaw in your C application logic, then any RWX memory pages +** simplify writing an exploit considerably. */ #define MCPROT_GEN MCPROT_RWX #define MCPROT_RUN MCPROT_RWX static void mcode_protect(jit_State *J, int prot) { - UNUSED(J); UNUSED(prot); + UNUSED(J); UNUSED(prot); UNUSED(mcode_setprot); } #else @@ -242,7 +225,7 @@ static void *mcode_alloc(jit_State *J, size_t sz) } /* Next try probing 64K-aligned pseudo-random addresses. */ do { - hint = LJ_PRNG_BITS(J, LJ_TARGET_JUMPRANGE-16) << 16; + hint = lj_prng_u64(&J2G(J)->prng) & ((1u<base or NULL. */ MRef ctype_state; /* Pointer to C type state. */ + PRNGState prng; /* Global PRNG state. */ GCRef gcroot[GCROOT_MAX]; /* GC roots. */ } global_State; diff --git a/src/lj_prng.c b/src/lj_prng.c new file mode 100644 index 000000000..62a6bbb7f --- /dev/null +++ b/src/lj_prng.c @@ -0,0 +1,225 @@ +/* +** Pseudo-random number generation. +** Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h +*/ + +#define lj_prng_c +#define LUA_CORE + +/* To get the syscall prototype. */ +#if defined(__linux__) && !defined(_GNU_SOURCE) +#define _GNU_SOURCE +#endif + +#include "lj_def.h" +#include "lj_arch.h" +#include "lj_prng.h" + +/* -- PRNG step function -------------------------------------------------- */ + +/* This implements a Tausworthe PRNG with period 2^223. Based on: +** Tables of maximally-equidistributed combined LFSR generators, +** Pierre L'Ecuyer, 1991, table 3, 1st entry. +** Full-period ME-CF generator with L=64, J=4, k=223, N1=49. +** +** Important note: This PRNG is NOT suitable for cryptographic use! +** +** But it works fine for math.random(), which has an API that's not +** suitable for cryptography, anyway. +** +** When used as a securely seeded global PRNG, it substantially raises +** the difficulty for various attacks on the VM. +*/ + +/* Update generator i and compute a running xor of all states. */ +#define TW223_GEN(rs, z, r, i, k, q, s) \ + z = rs->u[i]; \ + z = (((z<> (k-s)) ^ ((z&((uint64_t)(int64_t)-1 << (64-k)))<u[i] = z; + +#define TW223_STEP(rs, z, r) \ + TW223_GEN(rs, z, r, 0, 63, 31, 18) \ + TW223_GEN(rs, z, r, 1, 58, 19, 28) \ + TW223_GEN(rs, z, r, 2, 55, 24, 7) \ + TW223_GEN(rs, z, r, 3, 47, 21, 8) + +/* PRNG step function with uint64_t result. */ +LJ_NOINLINE uint64_t LJ_FASTCALL lj_prng_u64(PRNGState *rs) +{ + uint64_t z, r = 0; + TW223_STEP(rs, z, r) + return r; +} + +/* PRNG step function with double in uint64_t result. */ +LJ_NOINLINE uint64_t LJ_FASTCALL lj_prng_u64d(PRNGState *rs) +{ + uint64_t z, r = 0; + TW223_STEP(rs, z, r) + /* Returns a double bit pattern in the range 1.0 <= d < 2.0. */ + return (r & U64x(000fffff,ffffffff)) | U64x(3ff00000,00000000); +} + +/* Condition seed: ensure k[i] MSB of u[i] are non-zero. */ +static LJ_AINLINE void lj_prng_condition(PRNGState *rs) +{ + if (rs->u[0] < (1u << 1)) rs->u[0] += (1u << 1); + if (rs->u[1] < (1u << 6)) rs->u[1] += (1u << 6); + if (rs->u[2] < (1u << 9)) rs->u[2] += (1u << 9); + if (rs->u[3] < (1u << 17)) rs->u[3] += (1u << 17); +} + +/* -- PRNG seeding from OS ------------------------------------------------ */ + +#if LUAJIT_SECURITY_PRNG == 0 + +/* Nothing to define. */ + +#elif LJ_TARGET_XBOX360 + +extern int XNetRandom(void *buf, unsigned int len); + +#elif LJ_TARGET_PS3 + +extern int sys_get_random_number(void *buf, uint64_t len); + +#elif LJ_TARGET_PS4 || LJ_TARGET_PSVITA + +extern int sceRandomGetRandomNumber(void *buf, size_t len); + +#elif LJ_TARGET_WINDOWS || LJ_TARGET_XBOXONE + +#define WIN32_LEAN_AND_MEAN +#include + +#if LJ_TARGET_UWP || LJ_TARGET_XBOXONE +/* Must use BCryptGenRandom. */ +#include +#pragma comment(lib, "bcrypt.lib") +#else +/* If you wonder about this mess, then search online for RtlGenRandom. */ +typedef BOOLEAN (WINAPI *PRGR)(void *buf, ULONG len); +static PRGR libfunc_rgr; +#endif + +#elif LJ_TARGET_POSIX + +#if LJ_TARGET_LINUX +/* Avoid a dependency on glibc 2.25+ and use the getrandom syscall instead. */ +#include +#elif LJ_TARGET_OSX || LJ_TARGET_BSD || LJ_TARGET_SOLARIS || LJ_TARGET_CYGWIN +extern int getentropy(void *buf, size_t len); +#ifdef __ELF__ + __attribute__((weak)) +#endif +; +#endif + +/* For the /dev/urandom fallback. */ +#include +#include + +#endif + +#if LUAJIT_SECURITY_PRNG == 0 + +/* If you really don't care about security, then define +** LUAJIT_SECURITY_PRNG=0. This yields a predictable seed +** and provides NO SECURITY against various attacks on the VM. +** +** BTW: This is NOT the way to get predictable table iteration, +** predictable trace generation, predictable bytecode generation, etc. +*/ +int LJ_FASTCALL lj_prng_seed_secure(PRNGState *rs) +{ + lj_prng_seed_fixed(rs); /* The fixed seed is already conditioned. */ + return 1; +} + +#else + +/* Securely seed PRNG from system entropy. Returns 0 on failure. */ +int LJ_FASTCALL lj_prng_seed_secure(PRNGState *rs) +{ +#if LJ_TARGET_XBOX360 + + if (XNetRandom(rs->u, (unsigned int)sizeof(rs->u)) == 0) + goto ok; + +#elif LJ_TARGET_PS3 + + if (sys_get_random_number(rs->u, sizeof(rs->u)) == 0) + goto ok; + +#elif LJ_TARGET_PS4 || LJ_TARGET_PSVITA + + if (sceRandomGetRandomNumber(rs->u, sizeof(rs->u) == 0) + goto ok; + +#elif LJ_TARGET_UWP || LJ_TARGET_XBOXONE + + if (BCryptGenRandom(NULL, (PUCHAR)(rs->u), (ULONG)sizeof(rs->u), + BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0) + goto ok; + +#elif LJ_TARGET_WINDOWS + + /* Keep the library loaded in case multiple VMs are started. */ + if (!libfunc_rgr) { + HMODULE lib = LJ_WIN_LOADLIBA("advapi32.dll"); + if (!lib) return 0; + libfunc_rgr = (PRGR)GetProcAddress(lib, "SystemFunction036"); + if (!libfunc_rgr) return 0; + } + if (libfunc_rgr(rs->u, (ULONG)sizeof(rs->u))) + goto ok; + +#elif LJ_TARGET_POSIX + +#if LJ_TARGET_LINUX && defined(SYS_getrandom) + + if (syscall(SYS_getrandom, rs->u, sizeof(rs->u), 0) == (long)sizeof(rs->u)) + goto ok; + +#elif LJ_TARGET_OSX || LJ_TARGET_BSD || LJ_TARGET_SOLARIS || LJ_TARGET_CYGWIN + + if ((!__ELF__ || getentropy) && getentropy(rs->u, sizeof(rs->u)) == 0) + goto ok; + +#endif + + /* Fallback to /dev/urandom. This may fail if the device is not + ** existent or accessible in a chroot or container, or if the process + ** or the OS ran out of file descriptors. + */ + { + int fd = open("/dev/urandom", O_RDONLY|O_CLOEXEC); + if (fd != -1) { + ssize_t n = read(fd, rs->u, sizeof(rs->u)); + (void)close(fd); + if (n == (ssize_t)sizeof(rs->u)) + goto ok; + } + } + +#else + + /* Add an elif above for your OS with a secure PRNG seed. + ** Note that fiddling around with rand(), getpid(), time() or coercing + ** ASLR to yield a few bits of randomness is not helpful. + ** If you don't want any security, then don't pretend you have any + ** and simply define LUAJIT_SECURITY_PRNG=0 for the build. + */ +#error "Missing secure PRNG seed for this OS" + +#endif + return 0; /* Fail. */ + +ok: + lj_prng_condition(rs); + (void)lj_prng_u64(rs); + return 1; /* Success. */ +} + +#endif + diff --git a/src/lj_prng.h b/src/lj_prng.h new file mode 100644 index 000000000..40c34a717 --- /dev/null +++ b/src/lj_prng.h @@ -0,0 +1,24 @@ +/* +** Pseudo-random number generation. +** Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h +*/ + +#ifndef _LJ_PRNG_H +#define _LJ_PRNG_H + +#include "lj_def.h" + +LJ_FUNC int LJ_FASTCALL lj_prng_seed_secure(PRNGState *rs); +LJ_FUNC uint64_t LJ_FASTCALL lj_prng_u64(PRNGState *rs); +LJ_FUNC uint64_t LJ_FASTCALL lj_prng_u64d(PRNGState *rs); + +/* This is just the precomputed result of lib_math.c:random_seed(rs, 0.0). */ +static LJ_AINLINE void lj_prng_seed_fixed(PRNGState *rs) +{ + rs->u[0] = U64x(a0d27757,0a345b8c); + rs->u[1] = U64x(764a296c,5d4aa64f); + rs->u[2] = U64x(51220704,070adeaa); + rs->u[3] = U64x(2a2717b5,a7b7b927); +} + +#endif diff --git a/src/lj_record.c b/src/lj_record.c index cfa48ecf6..df4288184 100644 --- a/src/lj_record.c +++ b/src/lj_record.c @@ -33,6 +33,7 @@ #include "lj_snap.h" #include "lj_dispatch.h" #include "lj_vm.h" +#include "lj_prng.h" /* Some local macros to save typing. Undef'd at the end. */ #define IR(ref) (&J->cur.ir[(ref)]) @@ -1696,7 +1697,7 @@ static void check_call_unroll(jit_State *J, TraceNo lnk) if (lnk) { /* Possible tail- or up-recursion. */ lj_trace_flush(J, lnk); /* Flush trace that only returns. */ /* Set a small, pseudo-random hotcount for a quick retry of JFUNC*. */ - hotcount_set(J2GG(J), J->pc+1, LJ_PRNG_BITS(J, 4)); + hotcount_set(J2GG(J), J->pc+1, lj_prng_u64(&J2G(J)->prng) & 15u); } lj_trace_err(J, LJ_TRERR_CUNROLL); } diff --git a/src/lj_state.c b/src/lj_state.c index 7081a474a..a4d072be1 100644 --- a/src/lj_state.c +++ b/src/lj_state.c @@ -25,6 +25,7 @@ #include "lj_trace.h" #include "lj_dispatch.h" #include "lj_vm.h" +#include "lj_prng.h" #include "lj_lex.h" #include "lj_alloc.h" #include "luajit.h" @@ -185,16 +186,33 @@ static void close_state(lua_State *L) } #if LJ_64 && !LJ_GC64 && !(defined(LUAJIT_USE_VALGRIND) && defined(LUAJIT_USE_SYSMALLOC)) -lua_State *lj_state_newstate(lua_Alloc f, void *ud) +lua_State *lj_state_newstate(lua_Alloc allocf, void *allocd) #else -LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud) +LUA_API lua_State *lua_newstate(lua_Alloc allocf, void *allocd) #endif { - GG_State *GG = (GG_State *)f(ud, NULL, 0, sizeof(GG_State)); - lua_State *L = &GG->L; - global_State *g = &GG->g; + PRNGState prng; + GG_State *GG; + lua_State *L; + global_State *g; + /* We need the PRNG for the memory allocator, so initialize this first. */ + if (!lj_prng_seed_secure(&prng)) { + lj_assertX(0, "secure PRNG seeding failed"); + /* Can only return NULL here, so this errors with "not enough memory". */ + return NULL; + } +#ifndef LUAJIT_USE_SYSMALLOC + if (allocf == LJ_ALLOCF_INTERNAL) { + allocd = lj_alloc_create(&prng); + if (!allocd) return NULL; + allocf = lj_alloc_f; + } +#endif + GG = (GG_State *)allocf(allocd, NULL, 0, sizeof(GG_State)); if (GG == NULL || !checkptrGC(GG)) return NULL; memset(GG, 0, sizeof(GG_State)); + L = &GG->L; + g = &GG->g; L->gct = ~LJ_TTHREAD; L->marked = LJ_GC_WHITE0 | LJ_GC_FIXED | LJ_GC_SFIXED; /* Prevent free. */ L->dummy_ffid = FF_C; @@ -202,8 +220,14 @@ LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud) g->gc.currentwhite = LJ_GC_WHITE0 | LJ_GC_FIXED; g->strempty.marked = LJ_GC_WHITE0; g->strempty.gct = ~LJ_TSTR; - g->allocf = f; - g->allocd = ud; + g->allocf = allocf; + g->allocd = allocd; + g->prng = prng; +#ifndef LUAJIT_USE_SYSMALLOC + if (allocf == lj_alloc_f) { + lj_alloc_setprng(allocd, &g->prng); + } +#endif setgcref(g->mainthref, obj2gco(L)); setgcref(g->uvhead.prev, obj2gco(&g->uvhead)); setgcref(g->uvhead.next, obj2gco(&g->uvhead)); diff --git a/src/lj_state.h b/src/lj_state.h index 9a8c7d93b..50fe9000f 100644 --- a/src/lj_state.h +++ b/src/lj_state.h @@ -32,4 +32,6 @@ LJ_FUNC void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L); LJ_FUNC lua_State *lj_state_newstate(lua_Alloc f, void *ud); #endif +#define LJ_ALLOCF_INTERNAL ((lua_Alloc)(void *)(uintptr_t)(1237<<4)) + #endif diff --git a/src/lj_trace.c b/src/lj_trace.c index c4e728c64..77e9d05bb 100644 --- a/src/lj_trace.c +++ b/src/lj_trace.c @@ -30,6 +30,7 @@ #include "lj_vm.h" #include "lj_vmevent.h" #include "lj_target.h" +#include "lj_prng.h" /* -- Error handling ------------------------------------------------------ */ @@ -384,7 +385,7 @@ static void penalty_pc(jit_State *J, GCproto *pt, BCIns *pc, TraceError e) if (mref(J->penalty[i].pc, const BCIns) == pc) { /* Cache slot found? */ /* First try to bump its hotcount several times. */ val = ((uint32_t)J->penalty[i].val << 1) + - LJ_PRNG_BITS(J, PENALTY_RNDBITS); + (lj_prng_u64(&J2G(J)->prng) & ((1u< PENALTY_MAX) { blacklist_pc(pt, pc); /* Blacklist it, if that didn't help. */ return; diff --git a/src/ljamalg.c b/src/ljamalg.c index 19980241b..56585e6dd 100644 --- a/src/ljamalg.c +++ b/src/ljamalg.c @@ -31,6 +31,7 @@ #include "lj_udata.c" #include "lj_meta.c" #include "lj_debug.c" +#include "lj_prng.c" #include "lj_state.c" #include "lj_dispatch.c" #include "lj_vmevent.c" From ff34b48ddd6f2b3bdd26d6088662a214ba6b0288 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Tue, 23 Jun 2020 03:06:45 +0200 Subject: [PATCH 16/47] Redesign and harden string interning. Up to 40% faster on hash-intensive benchmarks. With some ideas from Sokolov Yura. --- src/Makefile | 1 - src/lj.supp | 41 ------ src/lj_arch.h | 17 +++ src/lj_asm.c | 2 +- src/lj_asm_arm.h | 4 +- src/lj_asm_arm64.h | 4 +- src/lj_asm_mips.h | 2 +- src/lj_asm_ppc.h | 2 +- src/lj_asm_x86.h | 2 +- src/lj_gc.c | 38 ++++- src/lj_obj.h | 37 +++-- src/lj_state.c | 8 +- src/lj_str.c | 358 +++++++++++++++++++++++++++++++++------------ src/lj_str.h | 4 + src/lj_tab.c | 4 +- src/vm_arm.dasc | 12 +- src/vm_arm64.dasc | 12 +- src/vm_mips.dasc | 12 +- src/vm_mips64.dasc | 12 +- src/vm_ppc.dasc | 12 +- src/vm_x64.dasc | 6 +- src/vm_x86.dasc | 6 +- 22 files changed, 394 insertions(+), 202 deletions(-) delete mode 100644 src/lj.supp diff --git a/src/Makefile b/src/Makefile index 6a9de5db1..178a5acd3 100644 --- a/src/Makefile +++ b/src/Makefile @@ -132,7 +132,6 @@ XCFLAGS= # # This define is required to run LuaJIT under Valgrind. The Valgrind # header files must be installed. You should enable debug information, too. -# Use --suppressions=lj.supp to avoid some false positives. #XCFLAGS+= -DLUAJIT_USE_VALGRIND # # This is the client for the GDB JIT API. GDB 7.0 or higher is required diff --git a/src/lj.supp b/src/lj.supp deleted file mode 100644 index 217f7c890..000000000 --- a/src/lj.supp +++ /dev/null @@ -1,41 +0,0 @@ -# Valgrind suppression file for LuaJIT 2.0. -{ - Optimized string compare - Memcheck:Addr4 - fun:lj_str_cmp -} -{ - Optimized string compare - Memcheck:Addr1 - fun:lj_str_cmp -} -{ - Optimized string compare - Memcheck:Addr4 - fun:lj_str_new -} -{ - Optimized string compare - Memcheck:Addr1 - fun:lj_str_new -} -{ - Optimized string compare - Memcheck:Cond - fun:lj_str_new -} -{ - Optimized string compare - Memcheck:Addr4 - fun:str_fastcmp -} -{ - Optimized string compare - Memcheck:Addr1 - fun:str_fastcmp -} -{ - Optimized string compare - Memcheck:Cond - fun:str_fastcmp -} diff --git a/src/lj_arch.h b/src/lj_arch.h index 626f6c13e..f148b3f7e 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h @@ -637,14 +637,31 @@ extern void *LJ_WIN_LOADLIBA(const char *path); /* Don't make any changes here. Instead build with: ** make "XCFLAGS=-DLUAJIT_SECURITY_flag=value" +** +** Important note to distro maintainers: DO NOT change the defaults for a +** regular distro build -- neither upwards, nor downwards! +** These build-time configurable security flags are intended for embedders +** who may have specific needs wrt. security vs. performance. */ /* Security defaults. */ #ifndef LUAJIT_SECURITY_PRNG +/* PRNG init: 0 = fixed/insecure, 1 = secure from OS. */ #define LUAJIT_SECURITY_PRNG 1 #endif +#ifndef LUAJIT_SECURITY_STRHASH +/* String hash: 0 = sparse only, 1 = sparse + dense. */ +#define LUAJIT_SECURITY_STRHASH 1 +#endif + +#ifndef LUAJIT_SECURITY_STRID +/* String IDs: 0 = linear, 1 = reseed < 255, 2 = reseed < 15, 3 = random. */ +#define LUAJIT_SECURITY_STRID 1 +#endif + #ifndef LUAJIT_SECURITY_MCODE +/* Machine code page protection: 0 = insecure RWX, 1 = secure RW^X. */ #define LUAJIT_SECURITY_MCODE 1 #endif diff --git a/src/lj_asm.c b/src/lj_asm.c index 2659c8a2d..cc7841c02 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -1029,7 +1029,7 @@ static uint32_t ir_khash(ASMState *as, IRIns *ir) uint32_t lo, hi; UNUSED(as); if (irt_isstr(ir->t)) { - return ir_kstr(ir)->hash; + return ir_kstr(ir)->sid; } else if (irt_isnum(ir->t)) { lo = ir_knum(ir)->u32.lo; hi = ir_knum(ir)->u32.hi << 1; diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h index d2fad1418..e7d2bf174 100644 --- a/src/lj_asm_arm.h +++ b/src/lj_asm_arm.h @@ -825,10 +825,10 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) } else { emit_dnm(as, ARMI_ADD|ARMF_SH(ARMSH_LSL, 3), dest, dest, tmp); emit_dnm(as, ARMI_ADD|ARMF_SH(ARMSH_LSL, 1), tmp, tmp, tmp); - if (irt_isstr(kt)) { /* Fetch of str->hash is cheaper than ra_allock. */ + if (irt_isstr(kt)) { /* Fetch of str->sid is cheaper than ra_allock. */ emit_dnm(as, ARMI_AND, tmp, tmp+1, RID_TMP); emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node)); - emit_lso(as, ARMI_LDR, tmp+1, key, (int32_t)offsetof(GCstr, hash)); + emit_lso(as, ARMI_LDR, tmp+1, key, (int32_t)offsetof(GCstr, sid)); emit_lso(as, ARMI_LDR, RID_TMP, tab, (int32_t)offsetof(GCtab, hmask)); } else if (irref_isk(refkey)) { emit_opk(as, ARMI_AND, tmp, RID_TMP, (int32_t)khash, diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h index 0729a3a5f..b1fd3accb 100644 --- a/src/lj_asm_arm64.h +++ b/src/lj_asm_arm64.h @@ -847,9 +847,9 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) emit_dnm(as, A64I_ANDw, dest, dest, tmphash); emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask)); } else if (irt_isstr(kt)) { - /* Fetch of str->hash is cheaper than ra_allock. */ + /* Fetch of str->sid is cheaper than ra_allock. */ emit_dnm(as, A64I_ANDw, dest, dest, tmp); - emit_lso(as, A64I_LDRw, tmp, key, offsetof(GCstr, hash)); + emit_lso(as, A64I_LDRw, tmp, key, offsetof(GCstr, sid)); emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask)); } else { /* Must match with hash*() in lj_tab.c. */ emit_dnm(as, A64I_ANDw, dest, dest, tmp); diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h index a2b8d8e05..513bd5ca4 100644 --- a/src/lj_asm_mips.h +++ b/src/lj_asm_mips.h @@ -1041,7 +1041,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) if (isk) { /* Nothing to do. */ } else if (irt_isstr(kt)) { - emit_tsi(as, MIPSI_LW, tmp1, key, (int32_t)offsetof(GCstr, hash)); + emit_tsi(as, MIPSI_LW, tmp1, key, (int32_t)offsetof(GCstr, sid)); } else { /* Must match with hash*() in lj_tab.c. */ emit_dst(as, MIPSI_SUBU, tmp1, tmp1, tmp2); emit_rotr(as, tmp2, tmp2, dest, (-HASH_ROT3)&31); diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h index 498fdac3d..77ab09d60 100644 --- a/src/lj_asm_ppc.h +++ b/src/lj_asm_ppc.h @@ -721,7 +721,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) if (isk) { /* Nothing to do. */ } else if (irt_isstr(kt)) { - emit_tai(as, PPCI_LWZ, tmp1, key, (int32_t)offsetof(GCstr, hash)); + emit_tai(as, PPCI_LWZ, tmp1, key, (int32_t)offsetof(GCstr, sid)); } else { /* Must match with hash*() in lj_tab.c. */ emit_tab(as, PPCI_SUBF, tmp1, tmp2, tmp1); emit_rotlwi(as, tmp2, tmp2, HASH_ROT3); diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index a3adee14d..e40b5e54d 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h @@ -1228,7 +1228,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) emit_gri(as, XG_ARITHi(XOg_AND), dest, (int32_t)khash); emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, hmask)); } else if (irt_isstr(kt)) { - emit_rmro(as, XO_ARITH(XOg_AND), dest, key, offsetof(GCstr, hash)); + emit_rmro(as, XO_ARITH(XOg_AND), dest, key, offsetof(GCstr, sid)); emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, hmask)); } else { /* Must match with hashrot() in lj_tab.c. */ emit_rmro(as, XO_ARITH(XOg_AND), dest, tab, offsetof(GCtab, hmask)); diff --git a/src/lj_gc.c b/src/lj_gc.c index 671b59832..cc4232a6e 100644 --- a/src/lj_gc.c +++ b/src/lj_gc.c @@ -417,6 +417,32 @@ static GCRef *gc_sweep(global_State *g, GCRef *p, uint32_t lim) return p; } +/* Sweep one string interning table chain. Preserves hashalg bit. */ +static void gc_sweepstr(global_State *g, GCRef *chain) +{ + /* Mask with other white and LJ_GC_FIXED. Or LJ_GC_SFIXED on shutdown. */ + int ow = otherwhite(g); + uintptr_t u = gcrefu(*chain); + GCRef q; + GCRef *p = &q; + GCobj *o; + setgcrefp(q, (u & ~(uintptr_t)1)); + while ((o = gcref(*p)) != NULL) { + if (((o->gch.marked ^ LJ_GC_WHITES) & ow)) { /* Black or current white? */ + lj_assertG(!isdead(g, o) || (o->gch.marked & LJ_GC_FIXED), + "sweep of undead string"); + makewhite(g, o); /* String is alive, change to the current white. */ + p = &o->gch.nextgc; + } else { /* Otherwise string is dead, free it. */ + lj_assertG(isdead(g, o) || ow == LJ_GC_SFIXED, + "sweep of unlive string"); + setgcrefr(*p, o->gch.nextgc); + lj_str_free(g, gco2str(o)); + } + } + setgcrefp(*chain, (gcrefu(q) | (u & 1))); +} + /* Check whether we can clear a key or a value slot from a table. */ static int gc_mayclear(cTValue *o, int val) { @@ -571,9 +597,9 @@ void lj_gc_freeall(global_State *g) /* Free everything, except super-fixed objects (the main thread). */ g->gc.currentwhite = LJ_GC_WHITES | LJ_GC_SFIXED; gc_fullsweep(g, &g->gc.root); - strmask = g->strmask; + strmask = g->str.mask; for (i = 0; i <= strmask; i++) /* Free all string hash chains. */ - gc_fullsweep(g, &g->strhash[i]); + gc_sweepstr(g, &g->str.tab[i]); } /* -- Collector ----------------------------------------------------------- */ @@ -636,8 +662,8 @@ static size_t gc_onestep(lua_State *L) return 0; case GCSsweepstring: { GCSize old = g->gc.total; - gc_fullsweep(g, &g->strhash[g->gc.sweepstr++]); /* Sweep one chain. */ - if (g->gc.sweepstr > g->strmask) + gc_sweepstr(g, &g->str.tab[g->gc.sweepstr++]); /* Sweep one chain. */ + if (g->gc.sweepstr > g->str.mask) g->gc.state = GCSsweep; /* All string hash chains sweeped. */ lj_assertG(old >= g->gc.total, "sweep increased memory"); g->gc.estimate -= old - g->gc.total; @@ -649,8 +675,8 @@ static size_t gc_onestep(lua_State *L) lj_assertG(old >= g->gc.total, "sweep increased memory"); g->gc.estimate -= old - g->gc.total; if (gcref(*mref(g->gc.sweep, GCRef)) == NULL) { - if (g->strnum <= (g->strmask >> 2) && g->strmask > LJ_MIN_STRTAB*2-1) - lj_str_resize(L, g->strmask >> 1); /* Shrink string table. */ + if (g->str.num <= (g->str.mask >> 2) && g->str.mask > LJ_MIN_STRTAB*2-1) + lj_str_resize(L, g->str.mask >> 1); /* Shrink string table. */ if (gcref(g->gc.mmudata)) { /* Need any finalizations? */ g->gc.state = GCSfinalize; #if LJ_HASFFI diff --git a/src/lj_obj.h b/src/lj_obj.h index 6c974812a..9d4bec083 100644 --- a/src/lj_obj.h +++ b/src/lj_obj.h @@ -13,7 +13,7 @@ #include "lj_def.h" #include "lj_arch.h" -/* -- Memory references (32 bit address space) ---------------------------- */ +/* -- Memory references --------------------------------------------------- */ /* Memory and GC object sizes. */ typedef uint32_t MSize; @@ -44,7 +44,7 @@ typedef struct MRef { #define setmrefr(r, v) ((r).ptr32 = (v).ptr32) #endif -/* -- GC object references (32 bit address space) ------------------------- */ +/* -- GC object references ------------------------------------------------ */ /* GCobj reference */ typedef struct GCRef { @@ -287,12 +287,16 @@ typedef const TValue cTValue; /* -- String object ------------------------------------------------------- */ +typedef uint32_t StrHash; /* String hash value. */ +typedef uint32_t StrID; /* String ID. */ + /* String object header. String payload follows. */ typedef struct GCstr { GCHeader; uint8_t reserved; /* Used by lexer for fast lookup of reserved words. */ - uint8_t unused; - MSize hash; /* Hash of string. */ + uint8_t hashalg; /* Hash algorithm. */ + StrID sid; /* Interned string ID. */ + StrHash hash; /* Hash of string. */ MSize len; /* Size of string. */ } GCstr; @@ -300,7 +304,6 @@ typedef struct GCstr { #define strdata(s) ((const char *)((s)+1)) #define strdatawr(s) ((char *)((s)+1)) #define strVdata(o) strdata(strV(o)) -#define sizestring(s) (sizeof(struct GCstr)+(s)->len+1) /* -- Userdata object ----------------------------------------------------- */ @@ -570,6 +573,7 @@ typedef enum { #define basemt_obj(g, o) ((g)->gcroot[GCROOT_BASEMT+itypemap(o)]) #define mmname_str(g, mm) (strref((g)->gcroot[GCROOT_MMNAME+(mm)])) +/* Garbage collector state. */ typedef struct GCState { GCSize total; /* Memory currently allocated. */ GCSize threshold; /* Memory threshold. */ @@ -590,25 +594,36 @@ typedef struct GCState { MSize pause; /* Pause between successive GC cycles. */ } GCState; +/* String interning state. */ +typedef struct StrInternState { + GCRef *tab; /* String hash table anchors. */ + MSize mask; /* String hash mask (size of hash table - 1). */ + MSize num; /* Number of strings in hash table. */ + StrID id; /* Next string ID. */ + uint8_t idreseed; /* String ID reseed counter. */ + uint8_t second; /* String interning table uses secondary hashing. */ + uint8_t unused1; + uint8_t unused2; + LJ_ALIGN(8) uint64_t seed; /* Random string seed. */ +} StrInternState; + /* Global state, shared by all threads of a Lua universe. */ typedef struct global_State { - GCRef *strhash; /* String hash table (hash chain anchors). */ - MSize strmask; /* String hash mask (size of hash table - 1). */ - MSize strnum; /* Number of strings in hash table. */ lua_Alloc allocf; /* Memory allocator. */ void *allocd; /* Memory allocator data. */ GCState gc; /* Garbage collector. */ - volatile int32_t vmstate; /* VM state or current JIT code trace number. */ - SBuf tmpbuf; /* Temporary string buffer. */ GCstr strempty; /* Empty string. */ uint8_t stremptyz; /* Zero terminator of empty string. */ uint8_t hookmask; /* Hook mask. */ uint8_t dispatchmode; /* Dispatch mode. */ uint8_t vmevmask; /* VM event mask. */ + StrInternState str; /* String interning. */ + volatile int32_t vmstate; /* VM state or current JIT code trace number. */ GCRef mainthref; /* Link to main thread. */ - TValue registrytv; /* Anchor for registry. */ + SBuf tmpbuf; /* Temporary string buffer. */ TValue tmptv, tmptv2; /* Temporary TValues. */ Node nilnode; /* Fallback 1-element hash part (nil key and value). */ + TValue registrytv; /* Anchor for registry. */ GCupval uvhead; /* Head of double-linked list of all open upvalues. */ int32_t hookcount; /* Instruction hook countdown. */ int32_t hookcstart; /* Start count for instruction hook counter. */ diff --git a/src/lj_state.c b/src/lj_state.c index a4d072be1..4f77e71f1 100644 --- a/src/lj_state.c +++ b/src/lj_state.c @@ -150,7 +150,7 @@ static TValue *cpluaopen(lua_State *L, lua_CFunction dummy, void *ud) /* NOBARRIER: State initialization, all objects are white. */ setgcref(L->env, obj2gco(lj_tab_new(L, 0, LJ_MIN_GLOBAL))); settabV(L, registry(L), lj_tab_new(L, 0, LJ_MIN_REGISTRY)); - lj_str_resize(L, LJ_MIN_STRTAB-1); + lj_str_init(L); lj_meta_init(L); lj_lex_init(L); fixstring(lj_err_str(L, LJ_ERR_ERRMEM)); /* Preallocate memory error msg. */ @@ -166,12 +166,12 @@ static void close_state(lua_State *L) lj_gc_freeall(g); lj_assertG(gcref(g->gc.root) == obj2gco(L), "main thread is not first GC object"); - lj_assertG(g->strnum == 0, "leaked %d strings", g->strnum); + lj_assertG(g->str.num == 0, "leaked %d strings", g->str.num); lj_trace_freestate(g); #if LJ_HASFFI lj_ctype_freestate(g); #endif - lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef); + lj_str_freetab(g); lj_buf_free(g, &g->tmpbuf); lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue); lj_assertG(g->gc.total == sizeof(GG_State), @@ -231,7 +231,7 @@ LUA_API lua_State *lua_newstate(lua_Alloc allocf, void *allocd) setgcref(g->mainthref, obj2gco(L)); setgcref(g->uvhead.prev, obj2gco(&g->uvhead)); setgcref(g->uvhead.next, obj2gco(&g->uvhead)); - g->strmask = ~(MSize)0; + g->str.mask = ~(MSize)0; setnilV(registry(L)); setnilV(&g->nilnode.val); setnilV(&g->nilnode.key); diff --git a/src/lj_str.c b/src/lj_str.c index 0253c15e0..5bf8426ca 100644 --- a/src/lj_str.c +++ b/src/lj_str.c @@ -11,6 +11,7 @@ #include "lj_err.h" #include "lj_str.h" #include "lj_char.h" +#include "lj_prng.h" /* -- String helpers ------------------------------------------------------ */ @@ -37,28 +38,6 @@ int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b) return (int32_t)(a->len - b->len); } -/* Fast string data comparison. Caveat: unaligned access to 1st string! */ -static LJ_AINLINE int str_fastcmp(const char *a, const char *b, MSize len) -{ - MSize i = 0; - lj_assertX(len > 0, "fast string compare with zero length"); - lj_assertX((((uintptr_t)a+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4, - "fast string compare crossing page boundary"); - do { /* Note: innocuous access up to end of string + 3. */ - uint32_t v = lj_getu32(a+i) ^ *(const uint32_t *)(b+i); - if (v) { - i -= len; -#if LJ_LE - return (int32_t)i >= -3 ? (v << (32+(i<<3))) : 1; -#else - return (int32_t)i >= -3 ? (v >> (32+(i<<3))) : 1; -#endif - } - i += 4; - } while (i < len); - return 0; -} - /* Find fixed string p inside string s. */ const char *lj_str_find(const char *s, const char *p, MSize slen, MSize plen) { @@ -91,108 +70,301 @@ int lj_str_haspattern(GCstr *s) return 0; /* No pattern matching chars found. */ } -/* -- String interning ---------------------------------------------------- */ - -/* Resize the string hash table (grow and shrink). */ -void lj_str_resize(lua_State *L, MSize newmask) -{ - global_State *g = G(L); - GCRef *newhash; - MSize i; - if (g->gc.state == GCSsweepstring || newmask >= LJ_MAX_STRTAB-1) - return; /* No resizing during GC traversal or if already too big. */ - newhash = lj_mem_newvec(L, newmask+1, GCRef); - memset(newhash, 0, (newmask+1)*sizeof(GCRef)); - for (i = g->strmask; i != ~(MSize)0; i--) { /* Rehash old table. */ - GCobj *p = gcref(g->strhash[i]); - while (p) { /* Follow each hash chain and reinsert all strings. */ - MSize h = gco2str(p)->hash & newmask; - GCobj *next = gcnext(p); - /* NOBARRIER: The string table is a GC root. */ - setgcrefr(p->gch.nextgc, newhash[h]); - setgcref(newhash[h], p); - p = next; - } - } - lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef); - g->strmask = newmask; - g->strhash = newhash; -} +/* -- String hashing ------------------------------------------------------ */ -/* Intern a string and return string object. */ -GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx) +/* Keyed sparse ARX string hash. Constant time. */ +static StrHash hash_sparse(uint64_t seed, const char *str, MSize len) { - global_State *g; - GCstr *s; - GCobj *o; - MSize len = (MSize)lenx; - MSize a, b, h = len; - if (lenx >= LJ_MAX_STR) - lj_err_msg(L, LJ_ERR_STROV); - g = G(L); - /* Compute string hash. Constants taken from lookup3 hash by Bob Jenkins. */ + /* Constants taken from lookup3 hash by Bob Jenkins. */ + StrHash a, b, h = len ^ (StrHash)seed; if (len >= 4) { /* Caveat: unaligned access! */ a = lj_getu32(str); h ^= lj_getu32(str+len-4); b = lj_getu32(str+(len>>1)-2); h ^= b; h -= lj_rol(b, 14); b += lj_getu32(str+(len>>2)-1); - } else if (len > 0) { + } else { a = *(const uint8_t *)str; h ^= *(const uint8_t *)(str+len-1); b = *(const uint8_t *)(str+(len>>1)); h ^= b; h -= lj_rol(b, 14); - } else { - return &g->strempty; } a ^= h; a -= lj_rol(h, 11); b ^= a; b -= lj_rol(a, 25); h ^= b; h -= lj_rol(b, 16); - /* Check if the string has already been interned. */ - o = gcref(g->strhash[h & g->strmask]); - if (LJ_LIKELY((((uintptr_t)str+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4)) { - while (o != NULL) { - GCstr *sx = gco2str(o); - if (sx->len == len && str_fastcmp(str, strdata(sx), len) == 0) { - /* Resurrect if dead. Can only happen with fixstring() (keywords). */ - if (isdead(g, o)) flipwhite(o); - return sx; /* Return existing string. */ + return h; +} + +#if LUAJIT_SECURITY_STRHASH +/* Keyed dense ARX string hash. Linear time. */ +static LJ_NOINLINE StrHash hash_dense(uint64_t seed, StrHash h, + const char *str, MSize len) +{ + StrHash b = lj_bswap(lj_rol(h ^ (StrHash)(seed >> 32), 4)); + if (len > 12) { + StrHash a = (StrHash)seed; + const char *pe = str+len-12, *p = pe, *q = str; + do { + a += lj_getu32(p); + b += lj_getu32(p+4); + h += lj_getu32(p+8); + p = q; q += 12; + h ^= b; h -= lj_rol(b, 14); + a ^= h; a -= lj_rol(h, 11); + b ^= a; b -= lj_rol(a, 25); + } while (p < pe); + h ^= b; h -= lj_rol(b, 16); + a ^= h; a -= lj_rol(h, 4); + b ^= a; b -= lj_rol(a, 14); + } + return b; +} +#endif + +/* -- String interning ---------------------------------------------------- */ + +#define LJ_STR_MAXCOLL 32 + +/* Resize the string interning hash table (grow and shrink). */ +void lj_str_resize(lua_State *L, MSize newmask) +{ + global_State *g = G(L); + GCRef *newtab, *oldtab = g->str.tab; + MSize i; + + /* No resizing during GC traversal or if already too big. */ + if (g->gc.state == GCSsweepstring || newmask >= LJ_MAX_STRTAB-1) + return; + + newtab = lj_mem_newvec(L, newmask+1, GCRef); + memset(newtab, 0, (newmask+1)*sizeof(GCRef)); + +#if LUAJIT_SECURITY_STRHASH + /* Check which chains need secondary hashes. */ + if (g->str.second) { + int newsecond = 0; + /* Compute primary chain lengths. */ + for (i = g->str.mask; i != ~(MSize)0; i--) { + GCobj *o = (GCobj *)(gcrefu(oldtab[i]) & ~(uintptr_t)1); + while (o) { + GCstr *s = gco2str(o); + MSize hash = s->hashalg ? hash_sparse(g->str.seed, strdata(s), s->len) : + s->hash; + hash &= newmask; + setgcrefp(newtab[hash], gcrefu(newtab[hash]) + 1); + o = gcnext(o); } - o = gcnext(o); } - } else { /* Slow path: end of string is too close to a page boundary. */ - while (o != NULL) { - GCstr *sx = gco2str(o); - if (sx->len == len && memcmp(str, strdata(sx), len) == 0) { - /* Resurrect if dead. Can only happen with fixstring() (keywords). */ - if (isdead(g, o)) flipwhite(o); - return sx; /* Return existing string. */ + /* Mark secondary chains. */ + for (i = newmask; i != ~(MSize)0; i--) { + int secondary = gcrefu(newtab[i]) > LJ_STR_MAXCOLL; + newsecond |= secondary; + setgcrefp(newtab[i], secondary); + } + g->str.second = newsecond; + } +#endif + + /* Reinsert all strings from the old table into the new table. */ + for (i = g->str.mask; i != ~(MSize)0; i--) { + GCobj *o = (GCobj *)(gcrefu(oldtab[i]) & ~(uintptr_t)1); + while (o) { + GCobj *next = gcnext(o); + GCstr *s = gco2str(o); + MSize hash = s->hash; +#if LUAJIT_SECURITY_STRHASH + uintptr_t u; + if (LJ_LIKELY(!s->hashalg)) { /* String hashed with primary hash. */ + hash &= newmask; + u = gcrefu(newtab[hash]); + if (LJ_UNLIKELY(u & 1)) { /* Switch string to secondary hash. */ + s->hash = hash = hash_dense(g->str.seed, s->hash, strdata(s), s->len); + s->hashalg = 1; + hash &= newmask; + u = gcrefu(newtab[hash]); + } + } else { /* String hashed with secondary hash. */ + MSize shash = hash_sparse(g->str.seed, strdata(s), s->len); + u = gcrefu(newtab[shash & newmask]); + if (u & 1) { + hash &= newmask; + u = gcrefu(newtab[hash]); + } else { /* Revert string back to primary hash. */ + s->hash = shash; + s->hashalg = 0; + hash = (shash & newmask); + } + } + /* NOBARRIER: The string table is a GC root. */ + setgcrefp(o->gch.nextgc, (u & ~(uintptr_t)1)); + setgcrefp(newtab[hash], ((uintptr_t)o | (u & 1))); +#else + hash &= newmask; + /* NOBARRIER: The string table is a GC root. */ + setgcrefr(o->gch.nextgc, newtab[hash]); + setgcref(newtab[hash], o); +#endif + o = next; + } + } + + /* Free old table and replace with new table. */ + lj_str_freetab(g); + g->str.tab = newtab; + g->str.mask = newmask; +} + +#if LUAJIT_SECURITY_STRHASH +/* Rehash and rechain all strings in a chain. */ +static LJ_NOINLINE GCstr *lj_str_rehash_chain(lua_State *L, StrHash hashc, + const char *str, MSize len) +{ + global_State *g = G(L); + int ow = g->gc.state == GCSsweepstring ? otherwhite(g) : 0; /* Sweeping? */ + GCRef *strtab = g->str.tab; + MSize strmask = g->str.mask; + GCobj *o = gcref(strtab[hashc & strmask]); + setgcrefp(strtab[hashc & strmask], (void *)((uintptr_t)1)); + g->str.second = 1; + while (o) { + uintptr_t u; + GCobj *next = gcnext(o); + GCstr *s = gco2str(o); + StrHash hash; + if (ow) { /* Must sweep while rechaining. */ + if (((o->gch.marked ^ LJ_GC_WHITES) & ow)) { /* String alive? */ + lj_assertG(!isdead(g, o) || (o->gch.marked & LJ_GC_FIXED), + "sweep of undead string"); + makewhite(g, o); + } else { /* Free dead string. */ + lj_assertG(isdead(g, o) || ow == LJ_GC_SFIXED, + "sweep of unlive string"); + lj_str_free(g, s); + o = next; + continue; } - o = gcnext(o); } + hash = s->hash; + if (!s->hashalg) { /* Rehash with secondary hash. */ + hash = hash_dense(g->str.seed, hash, strdata(s), s->len); + s->hash = hash; + s->hashalg = 1; + } + /* Rechain. */ + hash &= strmask; + u = gcrefu(strtab[hash]); + setgcrefp(o->gch.nextgc, (u & ~(uintptr_t)1)); + setgcrefp(strtab[hash], ((uintptr_t)o | (u & 1))); + o = next; } - /* Nope, create a new string. */ - s = lj_mem_newt(L, sizeof(GCstr)+len+1, GCstr); + /* Try to insert the pending string again. */ + return lj_str_new(L, str, len); +} +#endif + +/* Reseed String ID from PRNG after random interval < 2^bits. */ +#if LUAJIT_SECURITY_STRID == 1 +#define STRID_RESEED_INTERVAL 8 +#elif LUAJIT_SECURITY_STRID == 2 +#define STRID_RESEED_INTERVAL 4 +#elif LUAJIT_SECURITY_STRID >= 3 +#define STRID_RESEED_INTERVAL 0 +#endif + +/* Allocate a new string and add to string interning table. */ +static GCstr *lj_str_alloc(lua_State *L, const char *str, MSize len, + StrHash hash, int hashalg) +{ + GCstr *s = lj_mem_newt(L, lj_str_size(len), GCstr); + global_State *g = G(L); + uintptr_t u; newwhite(g, s); s->gct = ~LJ_TSTR; s->len = len; - s->hash = h; + s->hash = hash; +#ifndef STRID_RESEED_INTERVAL + s->sid = g->str.id++; +#elif STRID_RESEED_INTERVAL + if (!g->str.idreseed--) { + uint64_t r = lj_prng_u64(&g->prng); + g->str.id = (StrID)r; + g->str.idreseed = (uint8_t)(r >> (64 - STRID_RESEED_INTERVAL)); + } + s->sid = g->str.id++; +#else + s->sid = (StrID)lj_prng_u64(&g->prng); +#endif s->reserved = 0; + s->hashalg = (uint8_t)hashalg; + /* Clear last 4 bytes of allocated memory. Implies zero-termination, too. */ + *(uint32_t *)(strdatawr(s)+(len & ~(MSize)3)) = 0; memcpy(strdatawr(s), str, len); - strdatawr(s)[len] = '\0'; /* Zero-terminate string. */ - /* Add it to string hash table. */ - h &= g->strmask; - s->nextgc = g->strhash[h]; + /* Add to string hash table. */ + hash &= g->str.mask; + u = gcrefu(g->str.tab[hash]); + setgcrefp(s->nextgc, (u & ~(uintptr_t)1)); /* NOBARRIER: The string table is a GC root. */ - setgcref(g->strhash[h], obj2gco(s)); - if (g->strnum++ > g->strmask) /* Allow a 100% load factor. */ - lj_str_resize(L, (g->strmask<<1)+1); /* Grow string table. */ + setgcrefp(g->str.tab[hash], ((uintptr_t)s | (u & 1))); + if (g->str.num++ > g->str.mask) /* Allow a 100% load factor. */ + lj_str_resize(L, (g->str.mask<<1)+1); /* Grow string table. */ return s; /* Return newly interned string. */ } +/* Intern a string and return string object. */ +GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx) +{ + global_State *g = G(L); + if (lenx-1 < LJ_MAX_STR-1) { + MSize len = (MSize)lenx; + StrHash hash = hash_sparse(g->str.seed, str, len); + MSize coll = 0; + int hashalg = 0; + /* Check if the string has already been interned. */ + GCobj *o = gcref(g->str.tab[hash & g->str.mask]); +#if LUAJIT_SECURITY_STRHASH + if (LJ_UNLIKELY((uintptr_t)o & 1)) { /* Secondary hash for this chain? */ + hashalg = 1; + hash = hash_dense(g->str.seed, hash, str, len); + o = (GCobj *)(gcrefu(g->str.tab[hash & g->str.mask]) & ~(uintptr_t)1); + } +#endif + while (o != NULL) { + GCstr *sx = gco2str(o); + if (sx->hash == hash && sx->len == len) { + if (memcmp(str, strdata(sx), len) == 0) { + if (isdead(g, o)) flipwhite(o); /* Resurrect if dead. */ + return sx; /* Return existing string. */ + } + coll++; + } + coll++; + o = gcnext(o); + } +#if LUAJIT_SECURITY_STRHASH + /* Rehash chain if there are too many collisions. */ + if (LJ_UNLIKELY(coll > LJ_STR_MAXCOLL) && !hashalg) { + return lj_str_rehash_chain(L, hash, str, len); + } +#endif + /* Otherwise allocate a new string. */ + return lj_str_alloc(L, str, len, hash, hashalg); + } else { + if (lenx) + lj_err_msg(L, LJ_ERR_STROV); + return &g->strempty; + } +} + void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s) { - g->strnum--; - lj_mem_free(g, s, sizestring(s)); + g->str.num--; + lj_mem_free(g, s, lj_str_size(s->len)); +} + +void LJ_FASTCALL lj_str_init(lua_State *L) +{ + global_State *g = G(L); + g->str.seed = lj_prng_u64(&g->prng); + lj_str_resize(L, LJ_MIN_STRTAB-1); } diff --git a/src/lj_str.h b/src/lj_str.h index 2e9bfc1df..01c6ba6b5 100644 --- a/src/lj_str.h +++ b/src/lj_str.h @@ -20,8 +20,12 @@ LJ_FUNC int lj_str_haspattern(GCstr *s); LJ_FUNC void lj_str_resize(lua_State *L, MSize newmask); LJ_FUNCA GCstr *lj_str_new(lua_State *L, const char *str, size_t len); LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s); +LJ_FUNC void LJ_FASTCALL lj_str_init(lua_State *L); +#define lj_str_freetab(g) \ + (lj_mem_freevec(g, g->str.tab, g->str.mask+1, GCRef)) #define lj_str_newz(L, s) (lj_str_new(L, s, strlen(s))) #define lj_str_newlit(L, s) (lj_str_new(L, "" s, sizeof(s)-1)) +#define lj_str_size(len) (sizeof(GCstr) + (((len)+4) & ~(MSize)3)) #endif diff --git a/src/lj_tab.c b/src/lj_tab.c index efc423cb9..982b07639 100644 --- a/src/lj_tab.c +++ b/src/lj_tab.c @@ -23,8 +23,8 @@ static LJ_AINLINE Node *hashmask(const GCtab *t, uint32_t hash) return &n[hash & t->hmask]; } -/* String hashes are precomputed when they are interned. */ -#define hashstr(t, s) hashmask(t, (s)->hash) +/* String IDs are generated when a string is interned. */ +#define hashstr(t, s) hashmask(t, (s)->sid) #define hashlohi(t, lo, hi) hashmask((t), hashrot((lo), (hi))) #define hashnum(t, o) hashlohi((t), (o)->u32.lo, ((o)->u32.hi << 1)) diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc index 013688fbe..770a8e219 100644 --- a/src/vm_arm.dasc +++ b/src/vm_arm.dasc @@ -1012,9 +1012,9 @@ static void build_subroutines(BuildCtx *ctx) | cmp TAB:RB, #0 | beq ->fff_restv | ldr CARG3, TAB:RB->hmask - | ldr CARG4, STR:RC->hash + | ldr CARG4, STR:RC->sid | ldr NODE:INS, TAB:RB->node - | and CARG3, CARG3, CARG4 // idx = str->hash & tab->hmask + | and CARG3, CARG3, CARG4 // idx = str->sid & tab->hmask | add CARG3, CARG3, CARG3, lsl #1 | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8 |3: // Rearranged logic, because we expect _not_ to find the key. @@ -3500,10 +3500,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |->BC_TGETS_Z: | // (TAB:RB =) TAB:CARG1 = GCtab *, STR:RC = GCstr *, RA = dst*8 | ldr CARG3, TAB:CARG1->hmask - | ldr CARG4, STR:RC->hash + | ldr CARG4, STR:RC->sid | ldr NODE:INS, TAB:CARG1->node | mov TAB:RB, TAB:CARG1 - | and CARG3, CARG3, CARG4 // idx = str->hash & tab->hmask + | and CARG3, CARG3, CARG4 // idx = str->sid & tab->hmask | add CARG3, CARG3, CARG3, lsl #1 | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8 |1: @@ -3647,10 +3647,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |->BC_TSETS_Z: | // (TAB:RB =) TAB:CARG1 = GCtab *, STR:RC = GCstr *, RA = dst*8 | ldr CARG3, TAB:CARG1->hmask - | ldr CARG4, STR:RC->hash + | ldr CARG4, STR:RC->sid | ldr NODE:INS, TAB:CARG1->node | mov TAB:RB, TAB:CARG1 - | and CARG3, CARG3, CARG4 // idx = str->hash & tab->hmask + | and CARG3, CARG3, CARG4 // idx = str->sid & tab->hmask | add CARG3, CARG3, CARG3, lsl #1 | mov CARG4, #0 | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8 diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc index c157696ca..4a729f650 100644 --- a/src/vm_arm64.dasc +++ b/src/vm_arm64.dasc @@ -993,9 +993,9 @@ static void build_subroutines(BuildCtx *ctx) | ldr STR:RC, GL->gcroot[GCROOT_MMNAME+MM_metatable] | cbz TAB:RB, ->fff_restv | ldr TMP1w, TAB:RB->hmask - | ldr TMP2w, STR:RC->hash + | ldr TMP2w, STR:RC->sid | ldr NODE:CARG3, TAB:RB->node - | and TMP1w, TMP1w, TMP2w // idx = str->hash & tab->hmask + | and TMP1w, TMP1w, TMP2w // idx = str->sid & tab->hmask | add TMP1, TMP1, TMP1, lsl #1 | movn CARG4, #~LJ_TSTR | add NODE:CARG3, NODE:CARG3, TMP1, lsl #3 // node = tab->node + idx*3*8 @@ -2943,9 +2943,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |->BC_TGETS_Z: | // TAB:CARG2 = GCtab *, STR:RC = GCstr *, RA = dst | ldr TMP1w, TAB:CARG2->hmask - | ldr TMP2w, STR:RC->hash + | ldr TMP2w, STR:RC->sid | ldr NODE:CARG3, TAB:CARG2->node - | and TMP1w, TMP1w, TMP2w // idx = str->hash & tab->hmask + | and TMP1w, TMP1w, TMP2w // idx = str->sid & tab->hmask | add TMP1, TMP1, TMP1, lsl #1 | movn CARG4, #~LJ_TSTR | add NODE:CARG3, NODE:CARG3, TMP1, lsl #3 // node = tab->node + idx*3*8 @@ -3069,9 +3069,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |->BC_TSETS_Z: | // TAB:CARG2 = GCtab *, STR:RC = GCstr *, RA = src | ldr TMP1w, TAB:CARG2->hmask - | ldr TMP2w, STR:RC->hash + | ldr TMP2w, STR:RC->sid | ldr NODE:CARG3, TAB:CARG2->node - | and TMP1w, TMP1w, TMP2w // idx = str->hash & tab->hmask + | and TMP1w, TMP1w, TMP2w // idx = str->sid & tab->hmask | add TMP1, TMP1, TMP1, lsl #1 | movn CARG4, #~LJ_TSTR | add NODE:CARG3, NODE:CARG3, TMP1, lsl #3 // node = tab->node + idx*3*8 diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc index 0c84c13b6..91de4b5c2 100644 --- a/src/vm_mips.dasc +++ b/src/vm_mips.dasc @@ -1152,9 +1152,9 @@ static void build_subroutines(BuildCtx *ctx) |. li SFARG1HI, LJ_TNIL | lw TMP0, TAB:SFARG1LO->hmask | li SFARG1HI, LJ_TTAB // Use metatable as default result. - | lw TMP1, STR:RC->hash + | lw TMP1, STR:RC->sid | lw NODE:TMP2, TAB:SFARG1LO->node - | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask + | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask | sll TMP0, TMP1, 5 | sll TMP1, TMP1, 3 | subu TMP1, TMP0, TMP1 @@ -4029,9 +4029,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |->BC_TGETS_Z: | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8 | lw TMP0, TAB:RB->hmask - | lw TMP1, STR:RC->hash + | lw TMP1, STR:RC->sid | lw NODE:TMP2, TAB:RB->node - | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask + | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask | sll TMP0, TMP1, 5 | sll TMP1, TMP1, 3 | subu TMP1, TMP0, TMP1 @@ -4203,10 +4203,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |->BC_TSETS_Z: | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = BASE+src*8 | lw TMP0, TAB:RB->hmask - | lw TMP1, STR:RC->hash + | lw TMP1, STR:RC->sid | lw NODE:TMP2, TAB:RB->node | sb r0, TAB:RB->nomm // Clear metamethod cache. - | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask + | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask | sll TMP0, TMP1, 5 | sll TMP1, TMP1, 3 | subu TMP1, TMP0, TMP1 diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc index dac143a43..71acf9ed1 100644 --- a/src/vm_mips64.dasc +++ b/src/vm_mips64.dasc @@ -1201,9 +1201,9 @@ static void build_subroutines(BuildCtx *ctx) | beqz TAB:RB, ->fff_restv |. li CARG1, LJ_TNIL | lw TMP0, TAB:RB->hmask - | lw TMP1, STR:RC->hash + | lw TMP1, STR:RC->sid | ld NODE:TMP2, TAB:RB->node - | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask + | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask | dsll TMP0, TMP1, 5 | dsll TMP1, TMP1, 3 | dsubu TMP1, TMP0, TMP1 @@ -4239,9 +4239,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |->BC_TGETS_Z: | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8 | lw TMP0, TAB:RB->hmask - | lw TMP1, STR:RC->hash + | lw TMP1, STR:RC->sid | ld NODE:TMP2, TAB:RB->node - | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask + | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask | sll TMP0, TMP1, 5 | sll TMP1, TMP1, 3 | subu TMP1, TMP0, TMP1 @@ -4402,10 +4402,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |->BC_TSETS_Z: | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = BASE+src*8 | lw TMP0, TAB:RB->hmask - | lw TMP1, STR:RC->hash + | lw TMP1, STR:RC->sid | ld NODE:TMP2, TAB:RB->node | sb r0, TAB:RB->nomm // Clear metamethod cache. - | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask + | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask | sll TMP0, TMP1, 5 | sll TMP1, TMP1, 3 | subu TMP1, TMP0, TMP1 diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc index 7a2d321ed..18fc6f935 100644 --- a/src/vm_ppc.dasc +++ b/src/vm_ppc.dasc @@ -1447,9 +1447,9 @@ static void build_subroutines(BuildCtx *ctx) | beq ->fff_restv | lwz TMP0, TAB:CARG1->hmask | li CARG3, LJ_TTAB // Use metatable as default result. - | lwz TMP1, STR:RC->hash + | lwz TMP1, STR:RC->sid | lwz NODE:TMP2, TAB:CARG1->node - | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask + | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask | slwi TMP0, TMP1, 5 | slwi TMP1, TMP1, 3 | sub TMP1, TMP0, TMP1 @@ -4588,9 +4588,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |->BC_TGETS_Z: | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8 | lwz TMP0, TAB:RB->hmask - | lwz TMP1, STR:RC->hash + | lwz TMP1, STR:RC->sid | lwz NODE:TMP2, TAB:RB->node - | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask + | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask | slwi TMP0, TMP1, 5 | slwi TMP1, TMP1, 3 | sub TMP1, TMP0, TMP1 @@ -4784,10 +4784,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |->BC_TSETS_Z: | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = src*8 | lwz TMP0, TAB:RB->hmask - | lwz TMP1, STR:RC->hash + | lwz TMP1, STR:RC->sid | lwz NODE:TMP2, TAB:RB->node | stb ZERO, TAB:RB->nomm // Clear metamethod cache. - | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask + | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask |.if FPU | lfdx f14, BASE, RA |.else diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc index 77a579d57..14a54e343 100644 --- a/src/vm_x64.dasc +++ b/src/vm_x64.dasc @@ -1230,7 +1230,7 @@ static void build_subroutines(BuildCtx *ctx) | mov [BASE-16], TAB:RC // Store metatable as default result. | mov STR:RC, [DISPATCH+DISPATCH_GL(gcroot)+8*(GCROOT_MMNAME+MM_metatable)] | mov RAd, TAB:RB->hmask - | and RAd, STR:RC->hash + | and RAd, STR:RC->sid | settp STR:RC, LJ_TSTR | imul RAd, #NODE | add NODE:RA, TAB:RB->node @@ -3674,7 +3674,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | checktab TAB:RB, ->vmeta_tgets |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr * | mov TMPRd, TAB:RB->hmask - | and TMPRd, STR:RC->hash + | and TMPRd, STR:RC->sid | imul TMPRd, #NODE | add NODE:TMPR, TAB:RB->node | settp ITYPE, STR:RC, LJ_TSTR @@ -3806,7 +3806,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | checktab TAB:RB, ->vmeta_tsets |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr * | mov TMPRd, TAB:RB->hmask - | and TMPRd, STR:RC->hash + | and TMPRd, STR:RC->sid | imul TMPRd, #NODE | mov byte TAB:RB->nomm, 0 // Clear metamethod cache. | add NODE:TMPR, TAB:RB->node diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc index 57c8e4fcb..f9bea4261 100644 --- a/src/vm_x86.dasc +++ b/src/vm_x86.dasc @@ -1522,7 +1522,7 @@ static void build_subroutines(BuildCtx *ctx) | mov dword [BASE-4], LJ_TTAB // Store metatable as default result. | mov [BASE-8], TAB:RB | mov RA, TAB:RB->hmask - | and RA, STR:RC->hash + | and RA, STR:RC->sid | imul RA, #NODE | add NODE:RA, TAB:RB->node |3: // Rearranged logic, because we expect _not_ to find the key. @@ -4286,7 +4286,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | mov TAB:RB, [BASE+RB*8] |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA. | mov RA, TAB:RB->hmask - | and RA, STR:RC->hash + | and RA, STR:RC->sid | imul RA, #NODE | add NODE:RA, TAB:RB->node |1: @@ -4457,7 +4457,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | mov TAB:RB, [BASE+RB*8] |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA. | mov RA, TAB:RB->hmask - | and RA, STR:RC->hash + | and RA, STR:RC->sid | imul RA, #NODE | mov byte TAB:RB->nomm, 0 // Clear metamethod cache. | add NODE:RA, TAB:RB->node From 2e68e1fc1897bfd83f7498b4df04d0bde256663d Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Tue, 23 Jun 2020 03:12:32 +0200 Subject: [PATCH 17/47] Add jit.security(). --- src/lib_jit.c | 7 +++++++ src/lj_arch.h | 10 ++++++++++ 2 files changed, 17 insertions(+) diff --git a/src/lib_jit.c b/src/lib_jit.c index 7348ef210..21e01d3ed 100644 --- a/src/lib_jit.c +++ b/src/lib_jit.c @@ -113,6 +113,13 @@ LJLIB_CF(jit_status) #endif } +LJLIB_CF(jit_security) +{ + int idx = lj_lib_checkopt(L, 1, -1, LJ_SECURITY_MODESTRING); + setintV(L->top++, ((LJ_SECURITY_MODE >> (2*idx)) & 3)); + return 1; +} + LJLIB_CF(jit_attach) { #ifdef LUAJIT_DISABLE_VMEVENT diff --git a/src/lj_arch.h b/src/lj_arch.h index f148b3f7e..240d0d161 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h @@ -665,4 +665,14 @@ extern void *LJ_WIN_LOADLIBA(const char *path); #define LUAJIT_SECURITY_MCODE 1 #endif +#define LJ_SECURITY_MODE \ + ( 0u \ + | ((LUAJIT_SECURITY_PRNG & 3) << 0) \ + | ((LUAJIT_SECURITY_STRHASH & 3) << 2) \ + | ((LUAJIT_SECURITY_STRID & 3) << 4) \ + | ((LUAJIT_SECURITY_MCODE & 3) << 6) \ + ) +#define LJ_SECURITY_MODESTRING \ + "\004prng\007strhash\005strid\005mcode" + #endif From 17fb96d904d49126bebce78fd0dd14019668998a Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Tue, 23 Jun 2020 03:13:54 +0200 Subject: [PATCH 18/47] Windows: Fix NtAllocateVirtualMemory prototype. --- src/lj_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lj_alloc.c b/src/lj_alloc.c index bf2ae8477..2d41481d7 100644 --- a/src/lj_alloc.c +++ b/src/lj_alloc.c @@ -124,7 +124,7 @@ #if LJ_ALLOC_NTAVM /* Undocumented, but hey, that's what we all love so much about Windows. */ -typedef long (*PNTAVM)(HANDLE handle, void **addr, ULONG zbits, +typedef long (*PNTAVM)(HANDLE handle, void **addr, ULONG_PTR zbits, size_t *size, ULONG alloctype, ULONG prot); static PNTAVM ntavm; From 471f8936cbd6aa80a937e375fe53ecadab93696a Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Tue, 23 Jun 2020 03:14:35 +0200 Subject: [PATCH 19/47] Fix pointer check for non-GC64 mode. Thanks to Stefan Hett. --- src/lj_def.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/lj_def.h b/src/lj_def.h index e458c89f4..5e63da3e8 100644 --- a/src/lj_def.h +++ b/src/lj_def.h @@ -104,9 +104,10 @@ typedef unsigned int uintptr_t; #define checku16(x) ((x) == (int32_t)(uint16_t)(x)) #define checki32(x) ((x) == (int32_t)(x)) #define checku32(x) ((x) == (uint32_t)(x)) +#define checkptr31(x) (((uint64_t)(uintptr_t)(x) >> 31) == 0) #define checkptr32(x) ((uintptr_t)(x) == (uint32_t)(uintptr_t)(x)) #define checkptr47(x) (((uint64_t)(uintptr_t)(x) >> 47) == 0) -#define checkptrGC(x) (LJ_GC64 ? checkptr47((x)) : LJ_64 ? checkptr32((x)) :1) +#define checkptrGC(x) (LJ_GC64 ? checkptr47((x)) : LJ_64 ? checkptr31((x)) :1) /* Every half-decent C compiler transforms this into a rotate instruction. */ #define lj_rol(x, n) (((x)<<(n)) | ((x)>>(-(int)(n)&(8*sizeof(x)-1)))) From 2e2fb8f6b5118e1a7996b76600c6ee98bfd5f203 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Tue, 23 Jun 2020 03:24:40 +0200 Subject: [PATCH 20/47] OSX/iOS: Handle iOS simulator and ARM64 Macs. --- src/lj_arch.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/lj_arch.h b/src/lj_arch.h index 240d0d161..baa7b6637 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h @@ -79,6 +79,7 @@ #elif defined(__linux__) #define LUAJIT_OS LUAJIT_OS_LINUX #elif defined(__MACH__) && defined(__APPLE__) +#include "TargetConditionals.h" #define LUAJIT_OS LUAJIT_OS_OSX #elif (defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || \ defined(__NetBSD__) || defined(__OpenBSD__) || \ @@ -117,10 +118,15 @@ #define LJ_TARGET_LINUX (LUAJIT_OS == LUAJIT_OS_LINUX) #define LJ_TARGET_OSX (LUAJIT_OS == LUAJIT_OS_OSX) #define LJ_TARGET_BSD (LUAJIT_OS == LUAJIT_OS_BSD) -#define LJ_TARGET_IOS (LJ_TARGET_OSX && (LUAJIT_TARGET == LUAJIT_ARCH_ARM || LUAJIT_TARGET == LUAJIT_ARCH_ARM64)) #define LJ_TARGET_POSIX (LUAJIT_OS > LUAJIT_OS_WINDOWS) #define LJ_TARGET_DLOPEN LJ_TARGET_POSIX +#if TARGET_OS_IPHONE +#define LJ_TARGET_IOS 1 +#else +#define LJ_TARGET_IOS 0 +#endif + #ifdef __CELLOS_LV2__ #define LJ_TARGET_PS3 1 #define LJ_TARGET_CONSOLE 1 From 67654be56d0a5d644d7db04ad82d0383f21832ac Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Tue, 23 Jun 2020 11:28:46 +0200 Subject: [PATCH 21/47] Follow-up fix for iOS build. --- src/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Makefile b/src/Makefile index 178a5acd3..702207cc9 100644 --- a/src/Makefile +++ b/src/Makefile @@ -362,7 +362,7 @@ ifneq ($(HOST_SYS),$(TARGET_SYS)) HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OSX else ifeq (iOS,$(TARGET_SYS)) - HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OSX + HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OSX -DTARGET_OS_IPHONE=1 else HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OTHER endif From 5ef9e45238ff247b6f2dec572a89945e777b5abe Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Tue, 23 Jun 2020 19:17:13 +0200 Subject: [PATCH 22/47] Fix OSX build. Reported by jnozsc. --- src/lj_prng.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/lj_prng.c b/src/lj_prng.c index 62a6bbb7f..c24fe6301 100644 --- a/src/lj_prng.c +++ b/src/lj_prng.c @@ -183,8 +183,13 @@ int LJ_FASTCALL lj_prng_seed_secure(PRNGState *rs) #elif LJ_TARGET_OSX || LJ_TARGET_BSD || LJ_TARGET_SOLARIS || LJ_TARGET_CYGWIN - if ((!__ELF__ || getentropy) && getentropy(rs->u, sizeof(rs->u)) == 0) +#ifdef __ELF__ + if (getentropy && getentropy(rs->u, sizeof(rs->u)) == 0) + goto ok; +#else + if (getentropy(rs->u, sizeof(rs->u)) == 0) goto ok; +#endif #endif From e1e3034cf613f6913285fea41bbdac1cbeb2a9a8 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 25 Jun 2020 15:19:48 +0200 Subject: [PATCH 23/47] Fix compiler warning. --- src/lib_base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib_base.c b/src/lib_base.c index 8c1e98a6d..eb604538c 100644 --- a/src/lib_base.c +++ b/src/lib_base.c @@ -301,7 +301,7 @@ LJLIB_ASM(tonumber) LJLIB_REC(.) while (lj_char_isspace((unsigned char)(*ep))) ep++; if (*ep == '\0') { if (LJ_DUALNUM && LJ_LIKELY(ul < 0x80000000u+neg)) { - if (neg) ul = -ul; + if (neg) ul = (unsigned long)-(long)ul; setintV(L->base-1-LJ_FR2, (int32_t)ul); } else { lua_Number n = (lua_Number)ul; From 18eef08fb8df11dd887b1e7e3e6f7919c6f56720 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 2 Jul 2020 01:23:41 +0200 Subject: [PATCH 24/47] Android/ARM: Fix build with recent NDK. --- src/lib_io.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/lib_io.c b/src/lib_io.c index c7d1bb311..c889a6b03 100644 --- a/src/lib_io.c +++ b/src/lib_io.c @@ -304,6 +304,14 @@ LJLIB_CF(io_method_flush) LJLIB_REC(io_flush 0) return luaL_fileresult(L, fflush(io_tofile(L)->fp) == 0, NULL); } +#if LJ_32 && defined(__ANDROID__) && __ANDROID_API__ < 24 +/* The Android NDK is such an unmatched marvel of engineering. */ +extern int fseeko32(FILE *, long int, int) __asm__("fseeko"); +extern long int ftello32(FILE *) __asm__("ftello"); +#define fseeko(fp, pos, whence) (fseeko32((fp), (pos), (whence))) +#define ftello(fp) (ftello32((fp))) +#endif + LJLIB_CF(io_method_seek) { FILE *fp = io_tofile(L)->fp; From 384d6d56f4a3841fdef607a511dda92a579af2ff Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 2 Jul 2020 01:24:39 +0200 Subject: [PATCH 25/47] Fix Clang build. --- src/lj_alloc.c | 2 +- src/lj_def.h | 4 ++-- src/lj_emit_x86.h | 2 +- src/lj_err.c | 4 ++-- src/lj_ircall.h | 2 +- src/lj_mcode.c | 2 +- src/lj_strfmt.h | 2 +- src/lj_strscan.c | 2 +- 8 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/lj_alloc.c b/src/lj_alloc.c index 2d41481d7..5de60b82b 100644 --- a/src/lj_alloc.c +++ b/src/lj_alloc.c @@ -597,7 +597,7 @@ static int has_segment_link(mstate m, msegmentptr ss) noncontiguous segments are added. */ #define TOP_FOOT_SIZE\ - (align_offset(chunk2mem(0))+pad_request(sizeof(struct malloc_segment))+MIN_CHUNK_SIZE) + (align_offset(TWO_SIZE_T_SIZES)+pad_request(sizeof(struct malloc_segment))+MIN_CHUNK_SIZE) /* ---------------------------- Indexing Bins ---------------------------- */ diff --git a/src/lj_def.h b/src/lj_def.h index 5e63da3e8..cfe18c48c 100644 --- a/src/lj_def.h +++ b/src/lj_def.h @@ -120,7 +120,7 @@ typedef uintptr_t BloomFilter; #define bloomset(b, x) ((b) |= bloombit((x))) #define bloomtest(b, x) ((b) & bloombit((x))) -#if defined(__GNUC__) || defined(__psp2__) +#if defined(__GNUC__) || defined(__clang__) || defined(__psp2__) #define LJ_NORET __attribute__((noreturn)) #define LJ_ALIGN(n) __attribute__((aligned(n))) @@ -182,7 +182,7 @@ static LJ_AINLINE uint64_t lj_bswap64(uint64_t x) { return ((uint64_t)lj_bswap((uint32_t)x)<<32) | lj_bswap((uint32_t)(x>>32)); } -#elif (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) +#elif (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) || __clang__ static LJ_AINLINE uint32_t lj_bswap(uint32_t x) { return (uint32_t)__builtin_bswap32((int32_t)x); diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h index 66750a96c..9173a299c 100644 --- a/src/lj_emit_x86.h +++ b/src/lj_emit_x86.h @@ -45,7 +45,7 @@ static LJ_AINLINE MCode *emit_op(x86Op xo, Reg rr, Reg rb, Reg rx, *(uint32_t *)(p+delta-5) = (uint32_t)xo; return p+delta-5; } -#if defined(__GNUC__) +#if defined(__GNUC__) || defined(__clang__) if (__builtin_constant_p(xo) && n == -2) p[delta-2] = (MCode)(xo >> 24); else if (__builtin_constant_p(xo) && n == -3) diff --git a/src/lj_err.c b/src/lj_err.c index 41fbf5c7f..39339b108 100644 --- a/src/lj_err.c +++ b/src/lj_err.c @@ -61,7 +61,7 @@ ** The POSIX/x64 interpreter only saves r12/r13 for INT (e.g. PS4). */ -#if defined(__GNUC__) && (LJ_TARGET_X64 || defined(LUAJIT_UNWIND_EXTERNAL)) && !LJ_NO_UNWIND +#if (defined(__GNUC__) || defined(__clang__)) && (LJ_TARGET_X64 || defined(LUAJIT_UNWIND_EXTERNAL)) && !LJ_NO_UNWIND #define LJ_UNWIND_EXT 1 #elif LJ_TARGET_WINDOWS #define LJ_UNWIND_EXT 1 @@ -184,7 +184,7 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode) /* -- External frame unwinding -------------------------------------------- */ -#if defined(__GNUC__) && !LJ_NO_UNWIND && !LJ_ABI_WIN +#if (defined(__GNUC__) || defined(__clang__)) && !LJ_NO_UNWIND && !LJ_ABI_WIN /* ** We have to use our own definitions instead of the mandatory (!) unwind.h, diff --git a/src/lj_ircall.h b/src/lj_ircall.h index 58cebc5dd..a45dde343 100644 --- a/src/lj_ircall.h +++ b/src/lj_ircall.h @@ -334,7 +334,7 @@ extern double lj_vm_sfmax(double a, double b); #endif #if LJ_HASFFI && LJ_NEED_FP64 && !(LJ_TARGET_ARM && LJ_SOFTFP) -#ifdef __GNUC__ +#if defined(__GNUC__) || defined(__clang__) #define fp64_l2d __floatdidf #define fp64_ul2d __floatundidf #define fp64_l2f __floatdisf diff --git a/src/lj_mcode.c b/src/lj_mcode.c index b2d121188..a5153b25b 100644 --- a/src/lj_mcode.c +++ b/src/lj_mcode.c @@ -45,7 +45,7 @@ void lj_mcode_sync(void *start, void *end) sys_icache_invalidate(start, (char *)end-(char *)start); #elif LJ_TARGET_PPC lj_vm_cachesync(start, end); -#elif defined(__GNUC__) +#elif defined(__GNUC__) || defined(__clang__) __clear_cache(start, end); #else #error "Missing builtin to flush instruction cache" diff --git a/src/lj_strfmt.h b/src/lj_strfmt.h index b4fbbb94f..9fe46d670 100644 --- a/src/lj_strfmt.h +++ b/src/lj_strfmt.h @@ -118,7 +118,7 @@ LJ_FUNC GCstr * LJ_FASTCALL lj_strfmt_obj(lua_State *L, cTValue *o); LJ_FUNC const char *lj_strfmt_pushvf(lua_State *L, const char *fmt, va_list argp); LJ_FUNC const char *lj_strfmt_pushf(lua_State *L, const char *fmt, ...) -#ifdef __GNUC__ +#if defined(__GNUC__) || defined(__clang__) __attribute__ ((format (printf, 2, 3))) #endif ; diff --git a/src/lj_strscan.c b/src/lj_strscan.c index 0e37a4f6f..11abd5266 100644 --- a/src/lj_strscan.c +++ b/src/lj_strscan.c @@ -79,7 +79,7 @@ static void strscan_double(uint64_t x, TValue *o, int32_t ex2, int32_t neg) /* Avoid double rounding for denormals. */ if (LJ_UNLIKELY(ex2 <= -1075 && x != 0)) { /* NYI: all of this generates way too much code on 32 bit CPUs. */ -#if defined(__GNUC__) && LJ_64 +#if (defined(__GNUC__) || defined(__clang__)) && LJ_64 int32_t b = (int32_t)(__builtin_clzll(x)^63); #else int32_t b = (x>>32) ? 32+(int32_t)lj_fls((uint32_t)(x>>32)) : From 53f82e6e2e858a0a62fd1a2ff47e9866693382e6 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sun, 12 Jul 2020 14:30:34 +0200 Subject: [PATCH 26/47] Fix frame traversal for __gc handler frames. Reported by Changochen. --- src/lj_err.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/lj_err.c b/src/lj_err.c index caa7487f2..e3e0c2eb7 100644 --- a/src/lj_err.c +++ b/src/lj_err.c @@ -529,6 +529,7 @@ static ptrdiff_t finderrfunc(lua_State *L) if (cframe_canyield(cf)) return 0; if (cframe_errfunc(cf) >= 0) return cframe_errfunc(cf); + cf = cframe_prev(cf); frame = frame_prevd(frame); break; case FRAME_PCALL: From 7eb96843ff9d4bed019e8cd7c17727557e39e89c Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sun, 12 Jul 2020 15:09:55 +0200 Subject: [PATCH 27/47] Add FAQ about sandboxing. Minor fixes. --- doc/extensions.html | 2 +- doc/faq.html | 40 ++++++++++++++++++++++++++++++++-------- 2 files changed, 33 insertions(+), 9 deletions(-) diff --git a/doc/extensions.html b/doc/extensions.html index b36e2855d..0fc65330d 100644 --- a/doc/extensions.html +++ b/doc/extensions.html @@ -326,7 +326,7 @@

C++ Exception Interoperability

Windows/x64 -MSVC or WinSDK +MSVC Full diff --git a/doc/faq.html b/doc/faq.html index 6208882b8..65b0d8421 100644 --- a/doc/faq.html +++ b/doc/faq.html @@ -55,7 +55,7 @@

Frequently Asked Questions (FAQ)

-
+
Q: Where can I learn more about LuaJIT and Lua?
    @@ -75,7 +75,7 @@

    Frequently Asked Questions (FAQ)

-
+
Q: Where can I learn more about the compiler technology used by LuaJIT?
I'm planning to write more documentation about the internals of LuaJIT. @@ -91,7 +91,7 @@

Frequently Asked Questions (FAQ)

-
+
Q: Why do I get this error: "attempt to index global 'arg' (a nil value)"?
Q: My vararg functions fail after switching to LuaJIT!
LuaJIT is compatible to the Lua 5.1 language standard. It doesn't @@ -101,7 +101,7 @@

Frequently Asked Questions (FAQ)

vararg syntax.
-
+
Q: Why do I get this error: "bad FPU precision"?
Q: I get weird behavior after initializing Direct3D.
Q: Some FPU operations crash after I load a Delphi DLL.
@@ -123,7 +123,7 @@

Frequently Asked Questions (FAQ)

-
+
Q: Sometimes Ctrl-C fails to stop my Lua program. Why?
The interrupt signal handler sets a Lua debug hook. But this is currently ignored by compiled code (this will eventually be fixed). If @@ -134,7 +134,31 @@

Frequently Asked Questions (FAQ)

running inside a C function under the Lua interpreter.
-
+
+
Q: Can Lua code be safely sandboxed?
+
+Maybe for an extremly restricted subset of Lua and if you relentlessly +scrutinize every single interface function you offer to the untrusted code.
+ +Although Lua provides some sandboxing functionality (setfenv(), hooks), +it's very hard to get this right even for the Lua core libraries. Of course, +you'll need to inspect any extension library, too. And there are libraries +that are inherently unsafe, e.g. the FFI library.
+ +Relatedly, loading untrusted bytecode is not safe! It's trivial +to crash the Lua or LuaJIT VM with maliciously crafted bytecode. This is +well known and there's no bytecode verification on purpose, so please +don't report a bug about it. Check the mode parameter for the +load*() functions to disable loading of bytecode.
+ +In general, the only promising approach is to sandbox Lua code at the +process level and not the VM level.
+ +More reading material at the » Lua Wiki and Wikipedia. +
+
+ +
Q: Why doesn't my favorite power-patch for Lua apply against LuaJIT?
Because it's a completely redesigned VM and has very little code in common with Lua anymore. Also, if the patch introduces changes to @@ -145,7 +169,7 @@

Frequently Asked Questions (FAQ)

The compiler will happily optimize away such indirections.
-
+
Q: Lua runs everywhere. Why doesn't LuaJIT support my CPU?
Because it's a compiler — it needs to generate native machine code. This means the code generator must be ported to each @@ -156,7 +180,7 @@

Frequently Asked Questions (FAQ)

demand and/or sponsoring.
-
+
Q: When will feature X be added? When will the next version be released?
When it's ready.
C'mon, it's open source — I'm doing it on my own time and you're From 570e758ca7dd14f93efdd43d68cf8979c1d7f984 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Mon, 13 Jul 2020 11:54:08 +0200 Subject: [PATCH 28/47] Handle old OSX/iOS without getentropy(). --- src/lj_prng.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/lj_prng.c b/src/lj_prng.c index c24fe6301..a8b8b6de2 100644 --- a/src/lj_prng.c +++ b/src/lj_prng.c @@ -107,7 +107,19 @@ static PRGR libfunc_rgr; #if LJ_TARGET_LINUX /* Avoid a dependency on glibc 2.25+ and use the getrandom syscall instead. */ #include -#elif LJ_TARGET_OSX || LJ_TARGET_BSD || LJ_TARGET_SOLARIS || LJ_TARGET_CYGWIN +#else + +#if LJ_TARGET_OSX +#include +#if __MAC_OS_X_VERSION_MIN_REQUIRED >= 101200 || \ + __IPHONE_OS_VERSION_MIN_REQUIRED >= 100000 +#define LJ_TARGET_HAS_GETENTROPY 1 +#endif +#elif LJ_TARGET_BSD || LJ_TARGET_SOLARIS || LJ_TARGET_CYGWIN +#define LJ_TARGET_HAS_GETENTROPY 1 +#endif + +#if LJ_TARGET_HAS_GETENTROPY extern int getentropy(void *buf, size_t len); #ifdef __ELF__ __attribute__((weak)) @@ -115,6 +127,8 @@ extern int getentropy(void *buf, size_t len); ; #endif +#endif + /* For the /dev/urandom fallback. */ #include #include @@ -181,7 +195,7 @@ int LJ_FASTCALL lj_prng_seed_secure(PRNGState *rs) if (syscall(SYS_getrandom, rs->u, sizeof(rs->u), 0) == (long)sizeof(rs->u)) goto ok; -#elif LJ_TARGET_OSX || LJ_TARGET_BSD || LJ_TARGET_SOLARIS || LJ_TARGET_CYGWIN +#elif LJ_TARGET_HAS_GETENTROPY #ifdef __ELF__ if (getentropy && getentropy(rs->u, sizeof(rs->u)) == 0) From c4b1e0feae75248f37945b1993f469df0d9ead53 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Wed, 5 Aug 2020 14:14:58 +0200 Subject: [PATCH 29/47] Fix Makefile dependencies. Thanks to Stefanos Chaliasos. --- src/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Makefile b/src/Makefile index 5cf1f0be6..ce1537a8b 100644 --- a/src/Makefile +++ b/src/Makefile @@ -610,7 +610,7 @@ $(MINILUA_T): $(MINILUA_O) $(E) "HOSTLINK $@" $(Q)$(HOST_CC) $(HOST_ALDFLAGS) -o $@ $(MINILUA_O) $(MINILUA_LIBS) $(HOST_ALIBS) -host/buildvm_arch.h: $(DASM_DASC) $(DASM_DEP) +host/buildvm_arch.h: $(DASM_DASC) $(DASM_DEP) lj_arch.h lua.h luaconf.h $(E) "DYNASM $@" $(Q)$(DASM) $(DASM_FLAGS) -o $@ $(DASM_DASC) From 2211f6f960b65d200a3142798cf86576405c24cb Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Wed, 5 Aug 2020 15:21:00 +0200 Subject: [PATCH 30/47] ARM: Ensure relative GG_State element alignment differently. Thanks to jojo59516 and dwing4g. --- src/lj_dispatch.h | 8 ++++++++ src/lj_jit.h | 6 +----- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/lj_dispatch.h b/src/lj_dispatch.h index 17bf93dad..6a3bc2bd1 100644 --- a/src/lj_dispatch.h +++ b/src/lj_dispatch.h @@ -70,12 +70,20 @@ typedef uint16_t HotCount; typedef struct GG_State { lua_State L; /* Main thread. */ global_State g; /* Global state. */ +#if LJ_TARGET_ARM + /* Make g reachable via K12 encoded DISPATCH-relative addressing. */ + uint8_t align1[(16-sizeof(global_State))&15]; +#endif #if LJ_TARGET_MIPS ASMFunction got[LJ_GOT__MAX]; /* Global offset table. */ #endif #if LJ_HASJIT jit_State J; /* JIT state. */ HotCount hotcount[HOTCOUNT_SIZE]; /* Hot counters. */ +#if LJ_TARGET_ARM + /* Ditto for J. */ + uint8_t align2[(16-sizeof(jit_State)-sizeof(HotCount)*HOTCOUNT_SIZE)&15]; +#endif #endif ASMFunction dispatch[GG_LEN_DISP]; /* Instruction dispatch tables. */ BCIns bcff[GG_NUM_ASMFF]; /* Bytecode for ASM fast functions. */ diff --git a/src/lj_jit.h b/src/lj_jit.h index 0e1c48275..4a4b0b1b4 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h @@ -406,11 +406,7 @@ typedef struct jit_State { size_t szallmcarea; /* Total size of all allocated mcode areas. */ TValue errinfo; /* Additional info element for trace errors. */ -} -#if LJ_TARGET_ARM -LJ_ALIGN(16) /* For DISPATCH-relative addresses in assembler part. */ -#endif -jit_State; +} jit_State; /* Trivial PRNG e.g. used for penalty randomization. */ static LJ_AINLINE uint32_t LJ_PRNG_BITS(jit_State *J, int bits) From 12ab596997b9cb27846a5b254d11230c3f9c50c8 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sun, 9 Aug 2020 18:08:38 +0200 Subject: [PATCH 31/47] Fix handling of errors during snapshot restore. --- src/lj_trace.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/lj_trace.c b/src/lj_trace.c index 311baa73c..123e6eb83 100644 --- a/src/lj_trace.c +++ b/src/lj_trace.c @@ -701,6 +701,8 @@ static TValue *trace_exit_cp(lua_State *L, lua_CFunction dummy, void *ud) { ExitDataCP *exd = (ExitDataCP *)ud; cframe_errfunc(L->cframe) = -1; /* Inherit error function. */ + /* Always catch error here. */ + cframe_nres(L->cframe) = -2*LUAI_MAXSTACK*(int)sizeof(TValue); exd->pc = lj_snap_restore(exd->J, exd->exptr); UNUSED(dummy); return NULL; From e296f56b825c688c3530a981dc6b495d972f3d01 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sun, 9 Aug 2020 22:50:31 +0200 Subject: [PATCH 32/47] Call error function on rethrow after trace exit. --- src/lj_debug.c | 1 + src/lj_dispatch.h | 2 +- src/lj_err.c | 2 +- src/lj_err.h | 2 +- src/lj_trace.c | 4 ++-- src/vm_arm.dasc | 3 +-- src/vm_mips.dasc | 5 ++--- src/vm_ppc.dasc | 3 +-- src/vm_x86.dasc | 4 +--- 9 files changed, 11 insertions(+), 15 deletions(-) diff --git a/src/lj_debug.c b/src/lj_debug.c index 1d73da7ea..6863cffdb 100644 --- a/src/lj_debug.c +++ b/src/lj_debug.c @@ -94,6 +94,7 @@ static BCPos debug_framepc(lua_State *L, GCfunc *fn, cTValue *nextframe) } } ins = cframe_pc(cf); + if (!ins) return NO_BCPOS; } } pt = funcproto(fn); diff --git a/src/lj_dispatch.h b/src/lj_dispatch.h index 6a3bc2bd1..372de0144 100644 --- a/src/lj_dispatch.h +++ b/src/lj_dispatch.h @@ -29,7 +29,7 @@ _(floor) _(ceil) _(trunc) _(log) _(log10) _(exp) _(sin) _(cos) _(tan) \ _(asin) _(acos) _(atan) _(sinh) _(cosh) _(tanh) _(frexp) _(modf) _(atan2) \ _(pow) _(fmod) _(ldexp) \ - _(lj_dispatch_call) _(lj_dispatch_ins) _(lj_err_throw) \ + _(lj_dispatch_call) _(lj_dispatch_ins) _(lj_err_throw) _(lj_err_run) \ _(lj_ffh_coroutine_wrap_err) _(lj_func_closeuv) _(lj_func_newL_gc) \ _(lj_gc_barrieruv) _(lj_gc_step) _(lj_gc_step_fixtop) _(lj_meta_arith) \ _(lj_meta_call) _(lj_meta_cat) _(lj_meta_comp) _(lj_meta_equal) \ diff --git a/src/lj_err.c b/src/lj_err.c index e3e0c2eb7..ad3394dfe 100644 --- a/src/lj_err.c +++ b/src/lj_err.c @@ -546,7 +546,7 @@ static ptrdiff_t finderrfunc(lua_State *L) } /* Runtime error. */ -LJ_NOINLINE void lj_err_run(lua_State *L) +LJ_NOINLINE void LJ_FASTCALL lj_err_run(lua_State *L) { ptrdiff_t ef = finderrfunc(L); if (ef) { diff --git a/src/lj_err.h b/src/lj_err.h index ed148d797..4ae637d46 100644 --- a/src/lj_err.h +++ b/src/lj_err.h @@ -23,7 +23,7 @@ LJ_DATA const char *lj_err_allmsg; LJ_FUNC GCstr *lj_err_str(lua_State *L, ErrMsg em); LJ_FUNCA_NORET void LJ_FASTCALL lj_err_throw(lua_State *L, int errcode); LJ_FUNC_NORET void lj_err_mem(lua_State *L); -LJ_FUNC_NORET void lj_err_run(lua_State *L); +LJ_FUNCA_NORET void LJ_FASTCALL lj_err_run(lua_State *L); LJ_FUNC_NORET void lj_err_msg(lua_State *L, ErrMsg em); LJ_FUNC_NORET void lj_err_lex(lua_State *L, GCstr *src, const char *tok, BCLine line, ErrMsg em, va_list argp); diff --git a/src/lj_trace.c b/src/lj_trace.c index 123e6eb83..c7f3f52db 100644 --- a/src/lj_trace.c +++ b/src/lj_trace.c @@ -700,8 +700,8 @@ typedef struct ExitDataCP { static TValue *trace_exit_cp(lua_State *L, lua_CFunction dummy, void *ud) { ExitDataCP *exd = (ExitDataCP *)ud; - cframe_errfunc(L->cframe) = -1; /* Inherit error function. */ - /* Always catch error here. */ + /* Always catch error here and don't call error function. */ + cframe_errfunc(L->cframe) = 0; cframe_nres(L->cframe) = -2*LUAI_MAXSTACK*(int)sizeof(TValue); exd->pc = lj_snap_restore(exd->J, exd->exptr); UNUSED(dummy); diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc index c5e0498e4..dcfb10b3e 100644 --- a/src/vm_arm.dasc +++ b/src/vm_arm.dasc @@ -2201,9 +2201,8 @@ static void build_subroutines(BuildCtx *ctx) | bx OP | |3: // Rethrow error from the right C frame. - | rsb CARG2, CARG1, #0 | mov CARG1, L - | bl extern lj_err_throw // (lua_State *L, int errcode) + | bl extern lj_err_run // (lua_State *L) |.endif | |//----------------------------------------------------------------------- diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc index e6b53e0df..6bbad37b7 100644 --- a/src/vm_mips.dasc +++ b/src/vm_mips.dasc @@ -2163,9 +2163,8 @@ static void build_subroutines(BuildCtx *ctx) |. addu RA, RA, BASE | |3: // Rethrow error from the right C frame. - | load_got lj_err_throw - | negu CARG2, CRET1 - | call_intern lj_err_throw // (lua_State *L, int errcode) + | load_got lj_err_run + | call_intern lj_err_run // (lua_State *L) |. move CARG1, L |.endif | diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc index 6b973d4ec..de44027b1 100644 --- a/src/vm_ppc.dasc +++ b/src/vm_ppc.dasc @@ -2699,9 +2699,8 @@ static void build_subroutines(BuildCtx *ctx) | bctr | |3: // Rethrow error from the right C frame. - | neg CARG2, CARG1 | mr CARG1, L - | bl extern lj_err_throw // (lua_State *L, int errcode) + | bl extern lj_err_run // (lua_State *L) |.endif | |//----------------------------------------------------------------------- diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc index 2ccc671fd..b23d046b7 100644 --- a/src/vm_x86.dasc +++ b/src/vm_x86.dasc @@ -3060,10 +3060,8 @@ static void build_subroutines(BuildCtx *ctx) |.endif | |3: // Rethrow error from the right C frame. - | neg RD | mov FCARG1, L:RB - | mov FCARG2, RD - | call extern lj_err_throw@8 // (lua_State *L, int errcode) + | call extern lj_err_run@4 // (lua_State *L) |.endif | |//----------------------------------------------------------------------- From ff1e72acead01df7d8ed0fbb31efd32f57953618 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 27 Aug 2020 18:05:32 +0200 Subject: [PATCH 33/47] LJ_GC64: Always snapshot functions for non-base frames. Reported by Arseny Vakhrushev. Analysis and fix contributed by Peter Cawley. --- src/lj_record.c | 1 + src/lj_snap.c | 9 +++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/lj_record.c b/src/lj_record.c index df4288184..9e41ce056 100644 --- a/src/lj_record.c +++ b/src/lj_record.c @@ -211,6 +211,7 @@ static TRef getcurrf(jit_State *J) { if (J->base[-1-LJ_FR2]) return J->base[-1-LJ_FR2]; + /* Non-base frame functions ought to be loaded already. */ lj_assertJ(J->baseslot == 1+LJ_FR2, "bad baseslot"); return sloadt(J, -1-LJ_FR2, IRT_FUNC, IRSLOAD_READONLY); } diff --git a/src/lj_snap.c b/src/lj_snap.c index a21894f60..36f815285 100644 --- a/src/lj_snap.c +++ b/src/lj_snap.c @@ -85,8 +85,13 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots) IRIns *ir = &J->cur.ir[ref]; if ((LJ_FR2 || !(sn & (SNAP_CONT|SNAP_FRAME))) && ir->o == IR_SLOAD && ir->op1 == s && ref > retf) { - /* No need to snapshot unmodified non-inherited slots. */ - if (!(ir->op2 & IRSLOAD_INHERIT)) + /* + ** No need to snapshot unmodified non-inherited slots. + ** But always snapshot the function below a frame in LJ_FR2 mode. + */ + if (!(ir->op2 & IRSLOAD_INHERIT) && + (!LJ_FR2 || s == 0 || s+1 == nslots || + !(J->slot[s+1] & (TREF_CONT|TREF_FRAME)))) continue; /* No need to restore readonly slots and unmodified non-parent slots. */ if (!(LJ_DUALNUM && (ir->op2 & IRSLOAD_CONVERT)) && From 90e65514dda3994253c1e3007f63da7ace8f6b7b Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sat, 5 Sep 2020 20:02:54 +0200 Subject: [PATCH 34/47] Limit path length passed to C library loader. --- src/lib_package.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/lib_package.c b/src/lib_package.c index a8bdcf17a..f5ba3cbc9 100644 --- a/src/lib_package.c +++ b/src/lib_package.c @@ -208,7 +208,12 @@ static const char *mksymname(lua_State *L, const char *modname, static int ll_loadfunc(lua_State *L, const char *path, const char *name, int r) { - void **reg = ll_register(L, path); + void **reg; + if (strlen(path) >= 4096) { + lua_pushliteral(L, "path too long"); + return PACKAGE_ERR_LIB; + } + reg = ll_register(L, path); if (*reg == NULL) *reg = ll_load(L, path, (*name == '*')); if (*reg == NULL) { return PACKAGE_ERR_LIB; /* Unable to load library. */ From 412d5a4039cf2cf9c3817af57fae4df11a38d070 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sat, 12 Sep 2020 17:32:30 +0200 Subject: [PATCH 35/47] DynASM/x86: Fix VREG support. --- dynasm/dasm_x86.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/dynasm/dasm_x86.h b/dynasm/dasm_x86.h index 84b9d17fa..feb84c142 100644 --- a/dynasm/dasm_x86.h +++ b/dynasm/dasm_x86.h @@ -299,11 +299,13 @@ int dasm_link(Dst_DECL, size_t *szp) while (pos != lastpos) { dasm_ActList p = D->actionlist + b[pos++]; + int op = 0; while (1) { - int op, action = *p++; + int action = *p++; switch (action) { - case DASM_REL_LG: p++; op = p[-3]; goto rel_pc; - case DASM_REL_PC: op = p[-2]; rel_pc: { + case DASM_REL_LG: p++; + /* fallthrough */ + case DASM_REL_PC: { int shrink = op == 0xe9 ? 3 : ((op&0xf0) == 0x80 ? 4 : 0); if (shrink) { /* Shrinkable branch opcode? */ int lofs, lpos = b[pos]; @@ -335,9 +337,10 @@ int dasm_link(Dst_DECL, size_t *szp) case DASM_LABEL_PC: b[pos++] += ofs; break; /* Fix label offset. */ case DASM_ALIGN: ofs -= (b[pos++]+ofs)&*p++; break; /* Adjust ofs. */ case DASM_EXTERN: p += 2; break; - case DASM_ESC: p++; break; + case DASM_ESC: op = *p++; break; case DASM_MARK: break; case DASM_SECTION: case DASM_STOP: goto stop; + default: op = action; break; } } stop: (void)0; From dd5032ed844c56964347c7916db66b0eb11d8091 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Tue, 15 Sep 2020 01:02:24 +0200 Subject: [PATCH 36/47] Fix lua_yield() from C hook. Reported by Jason Carr. --- src/lj_api.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/lj_api.c b/src/lj_api.c index f1cfebbcc..f53ecc35a 100644 --- a/src/lj_api.c +++ b/src/lj_api.c @@ -1210,11 +1210,12 @@ LUA_API int lua_yield(lua_State *L, int nresults) setcont(top, lj_cont_hook); if (LJ_FR2) top++; setframe_pc(top, cframe_pc(cf)-1); - if (LJ_FR2) top++; + top++; setframe_gc(top, obj2gco(L), LJ_TTHREAD); + if (LJ_FR2) top++; setframe_ftsz(top, ((char *)(top+1)-(char *)L->base)+FRAME_CONT); L->top = L->base = top+1; -#if LJ_TARGET_X64 +#if ((defined(__GNUC__) || defined(__clang__)) && (LJ_TARGET_X64 || defined(LUAJIT_UNWIND_EXTERNAL)) && !LJ_NO_UNWIND) || LJ_TARGET_WINDOWS lj_err_throw(L, LUA_YIELD); #else L->cframe = NULL; From 881d02d3117838acaf4fb844332c8e33cc95c8c5 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Tue, 22 Sep 2020 11:56:06 +0200 Subject: [PATCH 37/47] Mark CONV as non-weak, to prevent elimination of its side-effect. An unused guarded CONV int.num cannot be omitted in general. --- src/lj_ir.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lj_ir.h b/src/lj_ir.h index f91d6d0eb..3a154a053 100644 --- a/src/lj_ir.h +++ b/src/lj_ir.h @@ -126,7 +126,7 @@ _(XBAR, S , ___, ___) \ \ /* Type conversions. */ \ - _(CONV, NW, ref, lit) \ + _(CONV, N , ref, lit) \ _(TOBIT, N , ref, ref) \ _(TOSTR, N , ref, ___) \ _(STRTO, N , ref, ___) \ From dd0f09f95f36caf1f2111c10fec02748116003bb Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Tue, 22 Sep 2020 23:37:43 +0200 Subject: [PATCH 38/47] Another fix for lua_yield() from C hook. Reported by Jason Carr. --- src/lj_ccallback.c | 2 +- src/lj_err.c | 2 +- src/lj_frame.h | 2 +- src/lj_meta.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c index 49775d2b9..4c71f1a64 100644 --- a/src/lj_ccallback.c +++ b/src/lj_ccallback.c @@ -549,13 +549,13 @@ static void callback_conv_args(CTState *cts, lua_State *L) if (LJ_FR2) { (o++)->u64 = LJ_CONT_FFI_CALLBACK; (o++)->u64 = rid; - o++; } else { o->u32.lo = LJ_CONT_FFI_CALLBACK; o->u32.hi = rid; o++; } setframe_gc(o, obj2gco(fn), fntp); + if (LJ_FR2) o++; setframe_ftsz(o, ((char *)(o+1) - (char *)L->base) + FRAME_CONT); L->top = L->base = ++o; if (!ct) diff --git a/src/lj_err.c b/src/lj_err.c index 47f88740b..656631a94 100644 --- a/src/lj_err.c +++ b/src/lj_err.c @@ -687,9 +687,9 @@ LJ_NOINLINE void lj_err_optype_call(lua_State *L, TValue *o) const BCIns *pc = cframe_Lpc(L); if (((ptrdiff_t)pc & FRAME_TYPE) != FRAME_LUA) { const char *tname = lj_typename(o); + setframe_gc(o, obj2gco(L), LJ_TTHREAD); if (LJ_FR2) o++; setframe_pc(o, pc); - setframe_gc(o, obj2gco(L), LJ_TTHREAD); L->top = L->base = o+1; err_msgv(L, LJ_ERR_BADCALL, tname); } diff --git a/src/lj_frame.h b/src/lj_frame.h index 599a2d1cf..46a5c7827 100644 --- a/src/lj_frame.h +++ b/src/lj_frame.h @@ -46,7 +46,7 @@ enum { #define frame_gc(f) (gcval((f)-1)) #define frame_ftsz(f) ((ptrdiff_t)(f)->ftsz) #define frame_pc(f) ((const BCIns *)frame_ftsz(f)) -#define setframe_gc(f, p, tp) (setgcVraw((f)-1, (p), (tp))) +#define setframe_gc(f, p, tp) (setgcVraw((f), (p), (tp))) #define setframe_ftsz(f, sz) ((f)->ftsz = (sz)) #define setframe_pc(f, pc) ((f)->ftsz = (int64_t)(intptr_t)(pc)) #else diff --git a/src/lj_meta.c b/src/lj_meta.c index 2cdb6a0f0..f6e6d46a1 100644 --- a/src/lj_meta.c +++ b/src/lj_meta.c @@ -86,8 +86,8 @@ int lj_meta_tailcall(lua_State *L, cTValue *tv) else top->u32.lo = LJ_CONT_TAILCALL; setframe_pc(top++, pc); - if (LJ_FR2) top++; setframe_gc(top, obj2gco(L), LJ_TTHREAD); /* Dummy frame object. */ + if (LJ_FR2) top++; setframe_ftsz(top, ((char *)(top+1) - (char *)base) + FRAME_CONT); L->base = L->top = top+1; /* From c59e2a21b0c39695d8d7b3d38208c3637728cceb Mon Sep 17 00:00:00 2001 From: Siddhesh Poyarekar Date: Fri, 25 Sep 2020 08:03:42 +0530 Subject: [PATCH 39/47] Revert "Fix arm64 register allocation issue for XLOAD." This reverts commit 3ad7e2eb9b46006f9b44a0da7979e71880324598. This issue has already been fixed in LuaJIT/LuaJIT. --- src/lj_asm_arm64.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h index 6f939eede..819983068 100644 --- a/src/lj_asm_arm64.h +++ b/src/lj_asm_arm64.h @@ -1049,8 +1049,7 @@ static void asm_xload(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); - asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, - rset_exclude(RSET_GPR, dest)); + asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR); } static int maybe_zero_val(ASMState *as, IRRef ref) From 20c657915fad2daffe04dd54dc9295aa3dfc82b0 Mon Sep 17 00:00:00 2001 From: Siddhesh Poyarekar Date: Fri, 25 Sep 2020 08:10:08 +0530 Subject: [PATCH 40/47] Revert "bugfix: fixed a clang warning in lj_str.c regarding unused str_fastcmp() when macro LUAJIT_USE_VALGRIND is defined." This reverts commit 4e9ce0fb0a047764b8a302360e8a3fa114cc26df. The fuction str_fastcmp has been dropped in LuaJIT/LuaJIT. Also remove note about this fix from README.md. --- README.md | 3 --- src/lj_str.c | 2 -- 2 files changed, 5 deletions(-) diff --git a/README.md b/README.md index 3e1b59a85..e865990ce 100644 --- a/README.md +++ b/README.md @@ -288,9 +288,6 @@ KN 2 1.390671161567e-309 even for alpha or beta versions. * Applied a patch to fix DragonFlyBSD compatibility. Note: this is not an officially supported target. -* Turned off string comparison optimizations for 64-bit architectures when the - build option `LUAJIT_USE_VALGRIND` is specified. LuaJIT is now (almost) - valgrind clean. * feature: jit.dump: output Lua source location after every BC. * feature: added internal memory-buffer-based trace entry/exit/start-recording event logging, mainly for debugging bugs in the JIT compiler. it requires diff --git a/src/lj_str.c b/src/lj_str.c index 0c7108fe9..84e76e6da 100644 --- a/src/lj_str.c +++ b/src/lj_str.c @@ -47,7 +47,6 @@ int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b) return (int32_t)(a->len - b->len); } -#ifndef LUAJIT_USE_VALGRIND /* Fast string data comparison. Caveat: unaligned access to 1st string! */ static LJ_AINLINE int str_fastcmp(const char *a, const char *b, MSize len) { @@ -68,7 +67,6 @@ static LJ_AINLINE int str_fastcmp(const char *a, const char *b, MSize len) } while (i < len); return 0; } -#endif /* Find fixed string p inside string s. */ const char *lj_str_find(const char *s, const char *p, MSize slen, MSize plen) From 2368755d4d8bac364a4dfcc35eb3accc243915a0 Mon Sep 17 00:00:00 2001 From: Siddhesh Poyarekar Date: Fri, 25 Sep 2020 09:50:09 +0530 Subject: [PATCH 41/47] Revert "style: minor coding style fixes." This reverts commit 4589430e611042980368f5f1ea0ac4114def6510. --- src/lj_str.c | 7 +++---- src/x64/src/lj_str_hash_x64.h | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/lj_str.c b/src/lj_str.c index 84e76e6da..bbd71ef46 100644 --- a/src/lj_str.c +++ b/src/lj_str.c @@ -129,8 +129,7 @@ void lj_str_resize(lua_State *L, MSize newmask) } static MSize -lj_str_original_hash(const char *str, size_t lenx) -{ +lj_str_original_hash(const char *str, size_t lenx) { MSize len = (MSize)lenx; MSize a, b, h = len; @@ -158,8 +157,7 @@ lj_str_original_hash(const char *str, size_t lenx) } MSize -lj_str_indep_hash(GCstr *str) -{ +lj_str_indep_hash(GCstr *str) { return lj_str_original_hash(strdata(str), str->len); } @@ -240,3 +238,4 @@ void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s) g->strnum--; lj_mem_free(g, s, sizestring(s)); } + diff --git a/src/x64/src/lj_str_hash_x64.h b/src/x64/src/lj_str_hash_x64.h index cf37a2d20..063f631c7 100644 --- a/src/x64/src/lj_str_hash_x64.h +++ b/src/x64/src/lj_str_hash_x64.h @@ -203,7 +203,7 @@ static LJ_AINLINE uint32_t get_random_pos_unsafe(uint32_t chunk_sz_order, } static LJ_NOINLINE uint32_t lj_str_hash_128_above(const char* str, - uint32_t len) + uint32_t len) { uint32_t chunk_num, chunk_sz, chunk_sz_log2, i, pos1, pos2; uint64_t h1, h2, v; From 07e8c40cdac783399f999c96bab12b2c04bebdbc Mon Sep 17 00:00:00 2001 From: Siddhesh Poyarekar Date: Fri, 25 Sep 2020 14:19:18 +0530 Subject: [PATCH 42/47] Revert "bugfix: FFI C parsers could not parse some C constructs like `__attribute((aligned(N)))` and `#pragma`." This reverts commit 2d339277106d41380a4dd3697f70e352d5c2d6c4. Reverts state back to LuaJIT/v2.1 way of parsing attributes. --- src/lj_str.c | 16 +++++--------- src/lj_str.h | 2 -- src/x64/test/Makefile | 1 - src/x64/test/unit/ffi/test_abi.lua | 10 --------- src/x64/test/unit/ffi/test_line_directive.lua | 15 ------------- .../unit/ffi/test_pragma_pack_pushpop.lua | 12 ---------- src/x64/test/unit/ffi/test_var_attribute.lua | 22 ------------------- src/x64/test/unit_test.sh | 22 ------------------- 8 files changed, 5 insertions(+), 95 deletions(-) delete mode 100644 src/x64/test/unit/ffi/test_abi.lua delete mode 100644 src/x64/test/unit/ffi/test_line_directive.lua delete mode 100644 src/x64/test/unit/ffi/test_pragma_pack_pushpop.lua delete mode 100644 src/x64/test/unit/ffi/test_var_attribute.lua delete mode 100644 src/x64/test/unit_test.sh diff --git a/src/lj_str.c b/src/lj_str.c index bbd71ef46..fb1cd4f72 100644 --- a/src/lj_str.c +++ b/src/lj_str.c @@ -128,6 +128,11 @@ void lj_str_resize(lua_State *L, MSize newmask) g->strhash = newhash; } +#include "x64/src/lj_str_hash_x64.h" + +#if defined(LJ_ARCH_STR_HASH) +#define LJ_STR_HASH LJ_ARCH_STR_HASH +#else static MSize lj_str_original_hash(const char *str, size_t lenx) { MSize len = (MSize)lenx; @@ -155,17 +160,6 @@ lj_str_original_hash(const char *str, size_t lenx) { return h; } - -MSize -lj_str_indep_hash(GCstr *str) { - return lj_str_original_hash(strdata(str), str->len); -} - -#include "x64/src/lj_str_hash_x64.h" - -#if defined(LJ_ARCH_STR_HASH) -#define LJ_STR_HASH LJ_ARCH_STR_HASH -#else #define LJ_STR_HASH lj_str_original_hash #endif diff --git a/src/lj_str.h b/src/lj_str.h index 34378a368..2e9bfc1df 100644 --- a/src/lj_str.h +++ b/src/lj_str.h @@ -24,6 +24,4 @@ LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s); #define lj_str_newz(L, s) (lj_str_new(L, s, strlen(s))) #define lj_str_newlit(L, s) (lj_str_new(L, "" s, sizeof(s)-1)) -MSize lj_str_indep_hash(GCstr *str); - #endif diff --git a/src/x64/test/Makefile b/src/x64/test/Makefile index 3ec44eae8..4326ab3db 100644 --- a/src/x64/test/Makefile +++ b/src/x64/test/Makefile @@ -24,7 +24,6 @@ CXXFLAGS := -O3 -MD -g -msse4.2 -Wall -I../src -I../../../src test: $(TEST_PROGRAM) @echo "some unit test" $(VALGRIND) ./$(TEST_PROGRAM) - ./unit_test.sh @echo "smoke test" ../../luajit test_str_comp.lua diff --git a/src/x64/test/unit/ffi/test_abi.lua b/src/x64/test/unit/ffi/test_abi.lua deleted file mode 100644 index 9fafcf55d..000000000 --- a/src/x64/test/unit/ffi/test_abi.lua +++ /dev/null @@ -1,10 +0,0 @@ -local ffi = require "ffi" - --- TODO: test "gc64" and "win" parameters -assert((ffi.abi("32bit") or ffi.abi("64bit")) - and ffi.abi("le") - and not ffi.abi("be") - and ffi.abi("fpu") - and not ffi.abi("softfp") - and ffi.abi("hardfp") - and not ffi.abi("eabi")) diff --git a/src/x64/test/unit/ffi/test_line_directive.lua b/src/x64/test/unit/ffi/test_line_directive.lua deleted file mode 100644 index a8b0403cb..000000000 --- a/src/x64/test/unit/ffi/test_line_directive.lua +++ /dev/null @@ -1,15 +0,0 @@ -local x = [=[ -local ffi = require "ffi" - -ffi.cdef [[ - #line 100 - typedef Int xxx -]] -]=] - -local function foo() - loadstring(x)() -end - -local r, e = pcall(foo) -assert(string.find(e, "declaration specifier expected near 'Int' at line 100") ~= nil) diff --git a/src/x64/test/unit/ffi/test_pragma_pack_pushpop.lua b/src/x64/test/unit/ffi/test_pragma_pack_pushpop.lua deleted file mode 100644 index 5f1bdd30b..000000000 --- a/src/x64/test/unit/ffi/test_pragma_pack_pushpop.lua +++ /dev/null @@ -1,12 +0,0 @@ -local ffi = require "ffi" - -ffi.cdef[[ -#pragma pack(push, 1) -typedef struct { - char x; - double y; -} foo; -#pragma pack(pop) -]] - -assert(ffi.sizeof("foo") == 9) diff --git a/src/x64/test/unit/ffi/test_var_attribute.lua b/src/x64/test/unit/ffi/test_var_attribute.lua deleted file mode 100644 index 11252bbaf..000000000 --- a/src/x64/test/unit/ffi/test_var_attribute.lua +++ /dev/null @@ -1,22 +0,0 @@ -local ffi = require "ffi" - -ffi.cdef[[ -typedef struct { int a; char b; } __attribute__((packed)) myty1; -typedef struct { int a; char b; } __attribute__((__packed__)) myty1_a; - -typedef struct { int a; char b; } __attribute__((aligned(16))) myty2_a; -typedef struct { int a; char b; } __attribute__((__aligned__(16))) myty2; - -typedef int __attribute__ ((vector_size (32))) myty3; -typedef int __attribute__ ((__vector_size__ (32))) myty3_a; - -typedef int __attribute__ ((mode(DI))) myty4; -]] - -assert(ffi.sizeof("myty1") == 5 and - ffi.sizeof("myty1_a") == 5 and - ffi.alignof("myty2") == 16 and - ffi.alignof("myty2_a") == 16 and - ffi.sizeof("myty3") == 32 and - ffi.sizeof("myty3_a") == 32 and - ffi.sizeof("myty4") == 8) diff --git a/src/x64/test/unit_test.sh b/src/x64/test/unit_test.sh deleted file mode 100644 index c6633ca2e..000000000 --- a/src/x64/test/unit_test.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/sh -DIR=$(cd $(dirname $0); pwd) -cd $DIR - -LUAJIT=$DIR/../../luajit -HASERR=0 - -find $DIR/unit -name "*.lua" -print | while read x; do - $LUAJIT $x >/dev/null 2>/dev/null - if [ $? -eq 0 ]; then - echo "$x ok" - else - HASERR=1 - echo "$x failed" - fi -done - -if [ $HASERR -eq 0 ]; then - exit 0 -fi - -exit 1 From b23949ab702fc214c8235ec4934febeb2aa31f50 Mon Sep 17 00:00:00 2001 From: Siddhesh Poyarekar Date: Fri, 25 Sep 2020 14:23:04 +0530 Subject: [PATCH 43/47] Revert "bugfix: fixed compatibility regression with MinGW gcc. this bug had appeared in commit 7923c63." This reverts commit b982ebb5d52b48bce0d101f6621b1ea97b7ae130. --- src/x64/src/lj_str_hash_x64.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/x64/src/lj_str_hash_x64.h b/src/x64/src/lj_str_hash_x64.h index 063f631c7..b783a3949 100644 --- a/src/x64/src/lj_str_hash_x64.h +++ b/src/x64/src/lj_str_hash_x64.h @@ -21,11 +21,6 @@ #undef LJ_AINLINE #define LJ_AINLINE -#ifdef __MINGW32__ -#define random() ((long) rand()) -#define srandom(seed) srand(seed) -#endif - static const uint64_t* cast_uint64p(const char* str) { return (const uint64_t*)(void*)str; From cb73afbe1bdfe8fe7014647ad9dd8abe8ae87855 Mon Sep 17 00:00:00 2001 From: Siddhesh Poyarekar Date: Fri, 25 Sep 2020 14:23:13 +0530 Subject: [PATCH 44/47] Revert "optimize: lj_str_new: uses randomized hash functions based on crc32 when -msse4.2 is specified." This reverts commit 7923c6343c26eb6dd3ffcc8709ff6783ff0ff80f. --- src/lj_str.c | 44 ++---- src/x64/Makefile | 13 -- src/x64/src/lj_str_hash_x64.h | 266 ------------------------------- src/x64/test/Makefile | 47 ------ src/x64/test/benchmark.cxx | 278 --------------------------------- src/x64/test/test.cpp | 73 --------- src/x64/test/test_str_comp.lua | 67 -------- src/x64/test/test_util.cxx | 21 --- src/x64/test/test_util.hpp | 57 ------- 9 files changed, 10 insertions(+), 856 deletions(-) delete mode 100644 src/x64/Makefile delete mode 100644 src/x64/src/lj_str_hash_x64.h delete mode 100644 src/x64/test/Makefile delete mode 100644 src/x64/test/benchmark.cxx delete mode 100644 src/x64/test/test.cpp delete mode 100644 src/x64/test/test_str_comp.lua delete mode 100644 src/x64/test/test_util.cxx delete mode 100644 src/x64/test/test_util.hpp diff --git a/src/lj_str.c b/src/lj_str.c index fb1cd4f72..4fb9c2d3a 100644 --- a/src/lj_str.c +++ b/src/lj_str.c @@ -128,16 +128,17 @@ void lj_str_resize(lua_State *L, MSize newmask) g->strhash = newhash; } -#include "x64/src/lj_str_hash_x64.h" - -#if defined(LJ_ARCH_STR_HASH) -#define LJ_STR_HASH LJ_ARCH_STR_HASH -#else -static MSize -lj_str_original_hash(const char *str, size_t lenx) { +/* Intern a string and return string object. */ +GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx) +{ + global_State *g; + GCstr *s; + GCobj *o; MSize len = (MSize)lenx; MSize a, b, h = len; - + if (lenx >= LJ_MAX_STR) + lj_err_msg(L, LJ_ERR_STROV); + g = G(L); /* Compute string hash. Constants taken from lookup3 hash by Bob Jenkins. */ if (len >= 4) { /* Caveat: unaligned access! */ a = lj_getu32(str); @@ -151,36 +152,11 @@ lj_str_original_hash(const char *str, size_t lenx) { b = *(const uint8_t *)(str+(len>>1)); h ^= b; h -= lj_rol(b, 14); } else { - return 0; + return &g->strempty; } - a ^= h; a -= lj_rol(h, 11); b ^= a; b -= lj_rol(a, 25); h ^= b; h -= lj_rol(b, 16); - - return h; -} -#define LJ_STR_HASH lj_str_original_hash -#endif - -/* Intern a string and return string object. */ -GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx) -{ - global_State *g; - GCstr *s; - GCobj *o; - MSize len = (MSize)lenx; - MSize h; - - if (lenx >= LJ_MAX_STR) - lj_err_msg(L, LJ_ERR_STROV); - g = G(L); - if (LJ_UNLIKELY(lenx == 0)) { - return &g->strempty; - } - - h = LJ_STR_HASH(str, lenx); - /* Check if the string has already been interned. */ o = gcref(g->strhash[h & g->strmask]); #ifndef LUAJIT_USE_VALGRIND diff --git a/src/x64/Makefile b/src/x64/Makefile deleted file mode 100644 index 27277140d..000000000 --- a/src/x64/Makefile +++ /dev/null @@ -1,13 +0,0 @@ -.PHONY: default test benchmark clean - -default: - @echo "make target include: test bechmark clean" - -test: - $(MAKE) -C test test - -benchmark: - $(MAKE) -C test benchmark - -clean: - $(MAKE) -C test clean diff --git a/src/x64/src/lj_str_hash_x64.h b/src/x64/src/lj_str_hash_x64.h deleted file mode 100644 index b783a3949..000000000 --- a/src/x64/src/lj_str_hash_x64.h +++ /dev/null @@ -1,266 +0,0 @@ -/* - * This file defines string hash function using CRC32. It takes advantage of - * Intel hardware support (crc32 instruction, SSE 4.2) to speedup the CRC32 - * computation. The hash functions try to compute CRC32 of length and up - * to 128 bytes of given string. - */ - -#ifndef _LJ_STR_HASH_X64_H_ -#define _LJ_STR_HASH_X64_H_ - -#if defined(__SSE4_2__) && defined(__x86_64) && defined(__GNUC__) - -#include -#include -#include -#include -#include - -#include "../../lj_def.h" - -#undef LJ_AINLINE -#define LJ_AINLINE - -static const uint64_t* cast_uint64p(const char* str) -{ - return (const uint64_t*)(void*)str; -} - -static const uint32_t* cast_uint32p(const char* str) -{ - return (const uint32_t*)(void*)str; -} - -/* hash string with len in [1, 4) */ -static LJ_AINLINE uint32_t lj_str_hash_1_4(const char* str, uint32_t len) -{ -#if 0 - /* TODO: The if-1 part (i.e the original algorithm) is working better when - * the load-factor is high, as revealed by conflict benchmark (via - * 'make benchmark' command); need to understand why it's so. - */ - uint32_t v = str[0]; - v = (v << 8) | str[len >> 1]; - v = (v << 8) | str[len - 1]; - v = (v << 8) | len; - return _mm_crc32_u32(0, v); -#else - uint32_t a, b, h = len; - - a = *(const uint8_t *)str; - h ^= *(const uint8_t *)(str+len-1); - b = *(const uint8_t *)(str+(len>>1)); - h ^= b; h -= lj_rol(b, 14); - - a ^= h; a -= lj_rol(h, 11); - b ^= a; b -= lj_rol(a, 25); - h ^= b; h -= lj_rol(b, 16); - - return h; -#endif -} - -/* hash string with len in [4, 16) */ -static LJ_AINLINE uint32_t lj_str_hash_4_16(const char* str, uint32_t len) -{ - uint64_t v1, v2, h; - - if (len >= 8) { - v1 = *cast_uint64p(str); - v2 = *cast_uint64p(str + len - 8); - } else { - v1 = *cast_uint32p(str); - v2 = *cast_uint32p(str + len - 4); - } - - h = _mm_crc32_u32(0, len); - h = _mm_crc32_u64(h, v1); - h = _mm_crc32_u64(h, v2); - return h; -} - -/* hash string with length in [16, 128) */ -static uint32_t lj_str_hash_16_128(const char* str, uint32_t len) -{ - uint64_t h1, h2; - uint32_t i; - - h1 = _mm_crc32_u32(0, len); - h2 = 0; - - for (i = 0; i < len - 16; i += 16) { - h1 += _mm_crc32_u64(h1, *cast_uint64p(str + i)); - h2 += _mm_crc32_u64(h2, *cast_uint64p(str + i + 8)); - }; - - h1 = _mm_crc32_u64(h1, *cast_uint64p(str + len - 16)); - h2 = _mm_crc32_u64(h2, *cast_uint64p(str + len - 8)); - - return _mm_crc32_u32(h1, h2); -} - -/* ************************************************************************** - * - * Following is code about hashing string with length >= 128 - * - * ************************************************************************** - */ -static uint32_t random_pos[32][2]; -static const int8_t log2_tab[128] = { -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4, - 4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, - 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6, - 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, - 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6 }; - -/* return floor(log2(n)) */ -static LJ_AINLINE uint32_t log2_floor(uint32_t n) -{ - if (n <= 127) { - return log2_tab[n]; - } - - if ((n >> 8) <= 127) { - return log2_tab[n >> 8] + 8; - } - - if ((n >> 16) <= 127) { - return log2_tab[n >> 16] + 16; - } - - if ((n >> 24) <= 127) { - return log2_tab[n >> 24] + 24; - } - - return 31; -} - -#define POW2_MASK(n) ((1L << (n)) - 1) - -/* This function is to populate `random_pos` such that random_pos[i][*] - * contains random value in the range of [2**i, 2**(i+1)). - */ -static void x64_init_random(void) -{ - int i, seed, rml; - - /* Calculate the ceil(log2(RAND_MAX)) */ - rml = log2_floor(RAND_MAX); - if (RAND_MAX & (RAND_MAX - 1)) { - rml += 1; - } - - /* Init seed */ - seed = _mm_crc32_u32(0, getpid()); - seed = _mm_crc32_u32(seed, time(NULL)); - srandom(seed); - - /* Now start to populate the random_pos[][]. */ - for (i = 0; i < 3; i++) { - /* No need to provide random value for chunk smaller than 8 bytes */ - random_pos[i][0] = random_pos[i][1] = 0; - } - - for (; i < rml; i++) { - random_pos[i][0] = random() & POW2_MASK(i+1); - random_pos[i][1] = random() & POW2_MASK(i+1); - } - - for (; i < 31; i++) { - int j; - for (j = 0; j < 2; j++) { - uint32_t v, scale; - scale = random_pos[i - rml][0]; - if (scale == 0) { - scale = 1; - } - v = (random() * scale) & POW2_MASK(i+1); - random_pos[i][j] = v; - } - } -} -#undef POW2_MASK - -void __attribute__((constructor)) x64_init_random_constructor() -{ - x64_init_random(); -} - -/* Return a pre-computed random number in the range of [1**chunk_sz_order, - * 1**(chunk_sz_order+1)). It is "unsafe" in the sense that the return value - * may be greater than chunk-size; it is up to the caller to make sure - * "chunk-base + return-value-of-this-func" has valid virtual address. - */ -static LJ_AINLINE uint32_t get_random_pos_unsafe(uint32_t chunk_sz_order, - uint32_t idx) -{ - uint32_t pos = random_pos[chunk_sz_order][idx & 1]; - return pos; -} - -static LJ_NOINLINE uint32_t lj_str_hash_128_above(const char* str, - uint32_t len) -{ - uint32_t chunk_num, chunk_sz, chunk_sz_log2, i, pos1, pos2; - uint64_t h1, h2, v; - const char* chunk_ptr; - - chunk_num = 16; - chunk_sz = len / chunk_num; - chunk_sz_log2 = log2_floor(chunk_sz); - - pos1 = get_random_pos_unsafe(chunk_sz_log2, 0); - pos2 = get_random_pos_unsafe(chunk_sz_log2, 1); - - h1 = _mm_crc32_u32(0, len); - h2 = 0; - - /* loop over 14 chunks, 2 chunks at a time */ - for (i = 0, chunk_ptr = str; i < (chunk_num / 2 - 1); - chunk_ptr += chunk_sz, i++) { - - v = *cast_uint64p(chunk_ptr + pos1); - h1 = _mm_crc32_u64(h1, v); - - v = *cast_uint64p(chunk_ptr + chunk_sz + pos2); - h2 = _mm_crc32_u64(h2, v); - } - - /* the last two chunks */ - v = *cast_uint64p(chunk_ptr + pos1); - h1 = _mm_crc32_u64(h1, v); - - v = *cast_uint64p(chunk_ptr + chunk_sz - 8 - pos2); - h2 = _mm_crc32_u64(h2, v); - - /* process the trailing part */ - h1 = _mm_crc32_u64(h1, *cast_uint64p(str)); - h2 = _mm_crc32_u64(h2, *cast_uint64p(str + len - 8)); - - h1 = _mm_crc32_u32(h1, h2); - return h1; -} - -/* NOTE: the "len" should not be zero */ -static LJ_AINLINE uint32_t lj_str_hash(const char* str, size_t len) -{ - if (len < 128) { - if (len >= 16) { /* [16, 128) */ - return lj_str_hash_16_128(str, len); - } - - if (len >= 4) { /* [4, 16) */ - return lj_str_hash_4_16(str, len); - } - - /* [0, 4) */ - return lj_str_hash_1_4(str, len); - } - /* [128, inf) */ - return lj_str_hash_128_above(str, len); -} - -#define LJ_ARCH_STR_HASH lj_str_hash -#else -#undef LJ_ARCH_STR_HASH -#endif -#endif /*_LJ_STR_HASH_X64_H_*/ diff --git a/src/x64/test/Makefile b/src/x64/test/Makefile deleted file mode 100644 index 4326ab3db..000000000 --- a/src/x64/test/Makefile +++ /dev/null @@ -1,47 +0,0 @@ -.PHONY: default test benchmark - -default: test benchmark - -COMMON_OBJ := test_util.o - -TEST_PROGRAM := ht_test -BENCHMARK_PROGRAM := ht_benchmark - -TEST_PROGRAM_OBJ := $(COMMON_OBJ) test.o -BENCHMARK_PROGRAM_OBJ := $(COMMON_OBJ) benchmark.o - -ifeq ($(WITH_VALGRIND), 1) - VALGRIND := valgrind --leak-check=full -else - VALGRIND := -endif - -CXXFLAGS := -O3 -MD -g -msse4.2 -Wall -I../src -I../../../src - -%.o: %.cxx - $(CXX) $(CXXFLAGS) -MD -c $< - -test: $(TEST_PROGRAM) - @echo "some unit test" - $(VALGRIND) ./$(TEST_PROGRAM) - - @echo "smoke test" - ../../luajit test_str_comp.lua - -benchmark: $(BENCHMARK_PROGRAM) - # micro benchmark - ./$(BENCHMARK_PROGRAM) - -$(TEST_PROGRAM) : $(TEST_PROGRAM_OBJ) - cat $(TEST_PROGRAM_OBJ:.o=.d) > dep1.txt - $(CXX) $+ $(CXXFLAGS) -lm -o $@ - -$(BENCHMARK_PROGRAM): $(BENCHMARK_PROGRAM_OBJ) - cat $(BENCHMARK_PROGRAM_OBJ:.o=.d) > dep2.txt - $(CXX) $+ $(CXXFLAGS) -o $@ - --include dep1.txt --include dep2.txt - -clean: - -rm -f *.o *.d dep*.txt $(BENCHMARK_PROGRAM) $(TEST_PROGRAM) diff --git a/src/x64/test/benchmark.cxx b/src/x64/test/benchmark.cxx deleted file mode 100644 index e37edb032..000000000 --- a/src/x64/test/benchmark.cxx +++ /dev/null @@ -1,278 +0,0 @@ -#include // for gettimeofday() -extern "C" { -#include "lj_str_hash_x64.h" -} -#include -#include -#include -#include -#include "test_util.hpp" -#include -#include - -using namespace std; - -#define lj_rol(x, n) (((x)<<(n)) | ((x)>>(-(int)(n)&(8*sizeof(x)-1)))) -#define lj_ror(x, n) (((x)<<(-(int)(n)&(8*sizeof(x)-1))) | ((x)>>(n))) - -const char* separator = "-------------------------------------------"; - -static uint32_t LJ_AINLINE -lj_original_hash(const char *str, size_t len) -{ - uint32_t a, b, h = len; - if (len >= 4) { - a = lj_getu32(str); h ^= lj_getu32(str+len-4); - b = lj_getu32(str+(len>>1)-2); - h ^= b; h -= lj_rol(b, 14); - b += lj_getu32(str+(len>>2)-1); - a ^= h; a -= lj_rol(h, 11); - b ^= a; b -= lj_rol(a, 25); - h ^= b; h -= lj_rol(b, 16); - } else { - a = *(const uint8_t *)str; - h ^= *(const uint8_t *)(str+len-1); - b = *(const uint8_t *)(str+(len>>1)); - h ^= b; h -= lj_rol(b, 14); - } - - a ^= h; a -= lj_rol(h, 11); - b ^= a; b -= lj_rol(a, 25); - h ^= b; h -= lj_rol(b, 16); - - return h; -} - -template double -BenchmarkHashTmpl(T func, char* buf, size_t len) -{ - TestClock timer; - uint32_t h = 0; - - timer.start(); - for(int i = 1; i < 1000000 * 100; i++) { - // So the buf is not loop invariant, hence the F(...) - buf[i % 4096] = i; - h += func(buf, len) ^ i; - } - timer.stop(); - - // make h alive - test_printf("%x", h); - return timer.getElapseInSecond(); -} - -struct TestFuncWas -{ - uint32_t operator()(const char* buf, uint32_t len) { - return lj_original_hash(buf, len); - } -}; - -struct TestFuncIs -{ - uint32_t operator()(const char* buf, uint32_t len) { - return lj_str_hash(buf, len); - } -}; - -static void -benchmarkIndividual(char* buf) -{ - fprintf(stdout,"\n\nCompare performance of particular len (in second)\n"); - fprintf(stdout, "%-12s%-8s%-8s%s\n", "len", "was", "is", "diff"); - fprintf(stdout, "-------------------------------------------\n"); - - uint32_t lens[] = {3, 4, 7, 10, 15, 16, 20, 32, 36, 63, 80, 100, - 120, 127, 280, 290, 400}; - for (unsigned i = 0; i < sizeof(lens)/sizeof(lens[0]); i++) { - uint32_t len = lens[i]; - double e1 = BenchmarkHashTmpl(TestFuncWas(), buf, len); - double e2 = BenchmarkHashTmpl(TestFuncIs(), buf, len); - fprintf(stdout, "len = %4d: %-7.3lf %-7.3lf %.2f\n", len, e1, e2, (e1-e2)/e1); - } -} - -template double -BenchmarkChangeLenTmpl(T func, char* buf, uint32_t* len_vect, uint32_t len_num) -{ - TestClock timer; - uint32_t h = 0; - - timer.start(); - for(int i = 1; i < 1000000 * 100; i++) { - for (int j = 0; j < (int)len_num; j++) { - // So the buf is not loop invariant, hence the F(...) - buf[(i + j) % 4096] = i; - h += func(buf, len_vect[j]) ^ j; - } - } - timer.stop(); - - // make h alive - test_printf("%x", h); - return timer.getElapseInSecond(); -} - -// It is to measure the performance when length is changing. -// The purpose is to see how balanced branches impact the performance. -// -static void -benchmarkToggleLens(char* buf) -{ - double e1, e2; - fprintf(stdout,"\nChanging length (in second):"); - fprintf(stdout, "\n%-20s%-8s%-8s%s\n%s\n", "len", "was", "is", "diff", - separator); - - uint32_t lens1[] = {4, 9}; - e1 = BenchmarkChangeLenTmpl(TestFuncWas(), buf, lens1, 2); - e2 = BenchmarkChangeLenTmpl(TestFuncIs(), buf, lens1, 2); - fprintf(stdout, "%-20s%-7.3lf %-7.3lf %.2f\n", "4,9", e1, e2, (e1-e2)/e1); - - uint32_t lens2[] = {1, 4, 9}; - e1 = BenchmarkChangeLenTmpl(TestFuncWas(), buf, lens2, 3); - e2 = BenchmarkChangeLenTmpl(TestFuncIs(), buf, lens2, 3); - fprintf(stdout, "%-20s%-7.3lf %-7.3lf %.2f\n", "1,4,9", e1, e2, (e1-e2)/e1); - - uint32_t lens3[] = {1, 33, 4, 9}; - e1 = BenchmarkChangeLenTmpl(TestFuncWas(), buf, lens3, 4); - e2 = BenchmarkChangeLenTmpl(TestFuncIs(), buf, lens3, 4); - fprintf(stdout, "%-20s%-7.3lf %-7.3lf %.2f\n", "1,33,4,9", - e1, e2, (e1-e2)/e1); -} - -static void -genRandomString(uint32_t min, uint32_t max, - uint32_t num, vector& result) -{ - double scale = (max - min) / (RAND_MAX + 1.0); - result.clear(); - result.reserve(num); - for (uint32_t i = 0; i < num; i++) { - uint32_t len = (rand() * scale) + min; - - char* buf = new char[len]; - for (uint32_t l = 0; l < len; l++) { - buf[l] = rand() % 255; - } - result.push_back(string(buf, len)); - delete[] buf; - } -} - -// Return the standard deviation of given array of number -static double -standarDeviation(const vector& v) -{ - uint64_t total = 0; - for (vector::const_iterator i = v.begin(), e = v.end(); - i != e; ++i) { - total += *i; - } - - double avg = total / (double)v.size(); - double sd = 0; - - for (vector::const_iterator i = v.begin(), e = v.end(); - i != e; ++i) { - double t = avg - *i; - sd = sd + t*t; - } - - return sqrt(sd/v.size()); -} - -static pair -benchmarkConflictHelper(uint32_t bucketNum, const vector& strs) -{ - if (bucketNum & (bucketNum - 1)) { - bucketNum = (1L << (log2_floor(bucketNum) + 1)); - } - uint32_t mask = bucketNum - 1; - - vector conflictWas(bucketNum); - vector conflictIs(bucketNum); - - conflictWas.resize(bucketNum); - conflictIs.resize(bucketNum); - - for (vector::const_iterator i = strs.begin(), e = strs.end(); - i != e; ++i) { - uint32_t h1 = lj_original_hash(i->c_str(), i->size()); - uint32_t h2 = lj_str_hash(i->c_str(), i->size()); - - conflictWas[h1 & mask]++; - conflictIs[h2 & mask]++; - } - -#if 0 - std::sort(conflictWas.begin(), conflictWas.end(), std::greater()); - std::sort(conflictIs.begin(), conflictIs.end(), std::greater()); - - fprintf(stderr, "%d %d %d %d vs %d %d %d %d\n", - conflictWas[0], conflictWas[1], conflictWas[2], conflictWas[3], - conflictIs[0], conflictIs[1], conflictIs[2], conflictIs[3]); -#endif - - return pair(standarDeviation(conflictWas), - standarDeviation(conflictIs)); -} - -static void -benchmarkConflict() -{ - srand(time(0)); - - float loadFactor[] = { 0.5f, 1.0f, 2.0f, 4.0f, 8.0f }; - int bucketNum[] = { 512, 1024, 2048, 4096, 8192, 16384}; - int lenRange[][2] = { {1,3}, {4, 15}, {16, 127}, {128, 1024}, {1, 1024}}; - - fprintf(stdout, - "\nBechmarking conflict (stand deviation of conflict)\n%s\n", - separator); - - for (uint32_t k = 0; k < sizeof(lenRange)/sizeof(lenRange[0]); k++) { - fprintf(stdout, "\nlen range from %d - %d\n", lenRange[k][0], - lenRange[k][1]); - fprintf(stdout, "%-10s %-12s %-10s %-10s diff\n%s\n", - "bucket", "load-factor", "was", "is", separator); - for (uint32_t i = 0; i < sizeof(bucketNum)/sizeof(bucketNum[0]); ++i) { - for (uint32_t j = 0; - j < sizeof(loadFactor)/sizeof(loadFactor[0]); - ++j) { - int strNum = bucketNum[i] * loadFactor[j]; - vector strs(strNum); - genRandomString(lenRange[k][0], lenRange[k][1], strNum, strs); - - pair p; - p = benchmarkConflictHelper(bucketNum[i], strs); - fprintf(stdout, "%-10d %-12.2f %-10.2f %-10.2f %.2f\n", - bucketNum[i], loadFactor[j], p.first, p.second, - p.first - p.second); - } - } - } -} - -static void -benchmarkHashFunc() -{ - char buf[4096]; - char c = getpid() % 'a'; - for (int i = 0; i < (int)sizeof(buf); i++) { - buf[i] = (c + i) % 255; - } - - benchmarkConflict(); - benchmarkIndividual(buf); - benchmarkToggleLens(buf); -} - -int -main(int argc, char** argv) -{ - fprintf(stdout, "========================\nMicro benchmark...\n"); - benchmarkHashFunc(); - return 0; -} diff --git a/src/x64/test/test.cpp b/src/x64/test/test.cpp deleted file mode 100644 index bc92acbbf..000000000 --- a/src/x64/test/test.cpp +++ /dev/null @@ -1,73 +0,0 @@ -#include -#include -#include -#include -#include "test_util.hpp" -#include "lj_str_hash_x64.h" - -using namespace std; - -static bool -smoke_test() -{ - fprintf(stdout, "running smoke tests...\n"); - char buf[1024]; - char c = getpid() % 'a'; - - for (int i = 0; i < (int)sizeof(buf); i++) { - buf[i] = (c + i) % 255; - } - - uint32_t lens[] = {3, 4, 5, 7, 8, 16, 17, 24, 25, 32, 33, 127, 128, - 255, 256, 257}; - for (unsigned i = 0; i < sizeof(lens)/sizeof(lens[0]); i++) { - string s(buf, lens[i]); - test_printf("%d", lj_str_hash(s.c_str(), lens[i])); - } - - return true; -} - -static bool -verify_log2() -{ - fprintf(stdout, "verify log2...\n"); - bool err = false; - std::map lm; - lm[0] =(uint32_t)-1; - lm[1] = 0; - lm[2] = 1; - for (int i = 2; i < 31; i++) { - lm[(1<::iterator iter = lm.begin(), iter_e = lm.end(); - iter != iter_e; ++iter) { - uint32_t v = (*iter).first; - uint32_t log2_expect = (*iter).second; - uint32_t log2_get = log2_floor(v); - if (log2_expect != log2_get) { - err = true; - fprintf(stderr, "log2(%u) expect %u, get %u\n", v, log2_expect, log2_get); - exit(1); - } - } - return !err; -} - -int -main(int argc, char** argv) -{ - fprintf(stdout, "=======================\nRun unit testing...\n"); - - ASSERT(smoke_test(), "smoke_test test failed"); - ASSERT(verify_log2(), "log2 failed"); - - fprintf(stdout, TestErrMsgMgr::noError() ? "succ\n\n" : "fail\n\n"); - - return TestErrMsgMgr::noError() ? 0 : -1; -} diff --git a/src/x64/test/test_str_comp.lua b/src/x64/test/test_str_comp.lua deleted file mode 100644 index 3a5c3e676..000000000 --- a/src/x64/test/test_str_comp.lua +++ /dev/null @@ -1,67 +0,0 @@ ---[[ - Given two content-idental string s1, s2, test if they end up to be the - same string object. The purpose of this test is to make sure hash function - do not accidently include extraneous bytes before and after the string in - question. -]] - -local ffi = require("ffi") -local C = ffi.C - -ffi.cdef[[ - void free(void*); - char* malloc(size_t); - void *memset(void*, int, size_t); - void *memcpy(void*, void*, size_t); - long time(void*); - void srandom(unsigned); - long random(void); -]] - - -local function test_equal(len_min, len_max) - -- source string is wrapped by 16-byte-junk both before and after the - -- string - local x = C.random() - local l = len_min + x % (len_max - len_min); - local buf_len = tonumber(l + 16 * 2) - - local src_buf = C.malloc(buf_len) - for i = 0, buf_len - 1 do - src_buf[i] = C.random() % 255 - end - - -- dest string is the clone of the source string, but it is sandwiched - -- by different junk bytes - local dest_buf = C.malloc(buf_len) - C.memset(dest_buf, 0x5a, buf_len) - - local ofst = 8 + (C.random() % 8) - C.memcpy(dest_buf + ofst, src_buf + 16, l); - - local str1 = ffi.string(src_buf + 16, l) - local str2 = ffi.string(dest_buf + ofst, l) - - C.free(src_buf) - C.free(dest_buf) - - if str1 ~= str2 then - -- Oops, look like hash function mistakenly include extraneous bytes - -- close to the string - return 1 -- wtf - end -end - ---local lens = {1, 4, 16, 128, 1024} -local lens = {128, 1024} -local iter = 1000 - -for i = 1, #lens - 1 do - for j = 1, iter do - if test_equal(lens[i], lens[i+1]) ~= nil then - os.exit(1) - end - end -end - -os.exit(0) diff --git a/src/x64/test/test_util.cxx b/src/x64/test/test_util.cxx deleted file mode 100644 index 34b7d6751..000000000 --- a/src/x64/test/test_util.cxx +++ /dev/null @@ -1,21 +0,0 @@ -#include -#include -#include "test_util.hpp" - -using namespace std; - -std::vector TestErrMsgMgr::_errMsg; - -void -test_printf(const char* format, ...) -{ - va_list args; - va_start (args, format); - - FILE* devNull = fopen("/dev/null", "w"); - if (devNull != 0) { - (void)vfprintf (devNull, format, args); - } - fclose(devNull); - va_end (args); -} diff --git a/src/x64/test/test_util.hpp b/src/x64/test/test_util.hpp deleted file mode 100644 index 6cc2ea2cf..000000000 --- a/src/x64/test/test_util.hpp +++ /dev/null @@ -1,57 +0,0 @@ -#ifndef _TEST_UTIL_HPP_ -#define _TEST_UTIL_HPP_ - -#include // gettimeofday() -#include -#include - -struct TestErrMsg -{ - const char* fileName; - unsigned lineNo; - std::string errMsg; - - TestErrMsg(const char* FN, unsigned LN, const char* Err): - fileName(FN), lineNo(LN), errMsg(Err) {} -}; - -class TestErrMsgMgr -{ -public: - static std::vector getError(); - static void - addError(const char* fileName, unsigned lineNo, const char* Err) { - _errMsg.push_back(TestErrMsg(fileName, lineNo, Err)); - } - - static bool noError() { - return _errMsg.empty(); - } - -private: - static std::vector _errMsg; -}; - -#define ASSERT(c, e) \ - if (!(c)) { TestErrMsgMgr::addError(__FILE__, __LINE__, (e)); } - -class TestClock -{ -public: - void start() { gettimeofday(&_start, 0); } - void stop() { gettimeofday(&_end, 0); } - double getElapseInSecond() { - return (_end.tv_sec - _start.tv_sec) - + ((long)_end.tv_usec - (long)_start.tv_usec) / 1000000.0; - } - -private: - struct timeval _start, _end; -}; - -// write to /dev/null, the only purpose is to make the data fed to the -// function alive. -extern void test_printf(const char* format, ...) - __attribute__ ((format (printf, 1, 2))); - -#endif //_TEST_UTIL_HPP_ From 4af86b2db5eb5ae3dfe01be89c1ea63291965d64 Mon Sep 17 00:00:00 2001 From: Siddhesh Poyarekar Date: Fri, 25 Sep 2020 14:28:14 +0530 Subject: [PATCH 45/47] Revert "optimize: lj_str_new: tests the full hash value before doing the full string comparison on hash collisions. thanks Shuxin Yang for the patch." This reverts commit 00895099c964b4713cad1492839a95fa79ea54d6. --- src/lj_str.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lj_str.c b/src/lj_str.c index 4fb9c2d3a..59357f15c 100644 --- a/src/lj_str.c +++ b/src/lj_str.c @@ -163,7 +163,7 @@ GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx) if (LJ_LIKELY((((uintptr_t)str+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4)) { while (o != NULL) { GCstr *sx = gco2str(o); - if (sx->len == len && sx->hash == h && str_fastcmp(str, strdata(sx), len) == 0) { + if (sx->len == len && str_fastcmp(str, strdata(sx), len) == 0) { /* Resurrect if dead. Can only happen with fixstring() (keywords). */ if (isdead(g, o)) flipwhite(o); return sx; /* Return existing string. */ @@ -174,7 +174,7 @@ GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx) #endif while (o != NULL) { GCstr *sx = gco2str(o); - if (sx->len == len && sx->hash == h && memcmp(str, strdata(sx), len) == 0) { + if (sx->len == len && memcmp(str, strdata(sx), len) == 0) { /* Resurrect if dead. Can only happen with fixstring() (keywords). */ if (isdead(g, o)) flipwhite(o); return sx; /* Return existing string. */ From 3d9a5481f6b66469747970419ffb52c309966d95 Mon Sep 17 00:00:00 2001 From: Siddhesh Poyarekar Date: Fri, 25 Sep 2020 14:26:39 +0530 Subject: [PATCH 46/47] Revert "no clever memory over-reads when LUAJIT_USE_VALGRIND is defined." This partially reverts commit 6315a752274f3a4db6827b64788173f40733204e since lj_str_new no longer has the fast path. --- src/lj_str.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/lj_str.c b/src/lj_str.c index 59357f15c..86a47a0ea 100644 --- a/src/lj_str.c +++ b/src/lj_str.c @@ -159,7 +159,6 @@ GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx) h ^= b; h -= lj_rol(b, 16); /* Check if the string has already been interned. */ o = gcref(g->strhash[h & g->strmask]); -#ifndef LUAJIT_USE_VALGRIND if (LJ_LIKELY((((uintptr_t)str+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4)) { while (o != NULL) { GCstr *sx = gco2str(o); @@ -171,7 +170,6 @@ GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx) o = gcnext(o); } } else { /* Slow path: end of string is too close to a page boundary. */ -#endif while (o != NULL) { GCstr *sx = gco2str(o); if (sx->len == len && memcmp(str, strdata(sx), len) == 0) { @@ -181,9 +179,7 @@ GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx) } o = gcnext(o); } -#ifndef LUAJIT_USE_VALGRIND } -#endif /* Nope, create a new string. */ s = lj_mem_newt(L, sizeof(GCstr)+len+1, GCstr); newwhite(g, s); From 7c03b051b2d8bd2dbe668332ffb8cddb71685b1b Mon Sep 17 00:00:00 2001 From: Siddhesh Poyarekar Date: Mon, 28 Sep 2020 17:28:04 +0530 Subject: [PATCH 47/47] Make jit.prngstate use the new PRNG code This change splits out the PRNG so that the PRNG used by the JIT compiler can be overridden without affecting the PRNG for the allocator. The API is changed to reflect the wider PRNG data, which is now 256 bits. jit.prngstate returns an array with 8 32-bit integers and similarly, it can be overridden with an array of up to 8 32-bit integers. Tests and README.md have been updated to reflect this change. --- README.md | 13 +++++++------ src/lib_jit.c | 47 ++++++++++++++++++++++++++++++++++++++++++++--- src/lj_errmsg.h | 1 + src/lj_jit.h | 2 ++ src/lj_mcode.c | 2 +- src/lj_record.c | 2 +- src/lj_trace.c | 4 +++- t/prngstate.t | 28 +++++++++++++++++++--------- 8 files changed, 78 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index e865990ce..63e0bb6b5 100644 --- a/README.md +++ b/README.md @@ -137,21 +137,22 @@ second argument. **syntax:** *state = jit.prngstate(state?)* -Returns (and optionally sets) the current PRNG state (a Lua number) currently -used by the JIT compiler. +Returns (and optionally sets) the current PRNG state (an array of 8 Lua +numbers with 32-bit integer values) currently used by the JIT compiler. -When the `state` argument is non-nil, it is expected to be a number, and will -override the current PRNG state. +When the `state` argument is non-nil, it is expected to be an array of up to 8 +unsigned Lua numbers, each with value less than 2\*\*32-1. This will set the +current PRNG state and return the state that was overridden. Usage: ```lua local state = jit.prngstate() -local newstate = jit.prngstate(123456) +local oldstate = jit.prngstate(123456) ``` **Note:** This API has no effect if LuaJIT is compiled with -`-DLUAJIT_DISABLE_JIT`, and will return `0`. +`-DLUAJIT_DISABLE_JIT`, and will return a table with all `0`. [Back to TOC](#table-of-contents) diff --git a/src/lib_jit.c b/src/lib_jit.c index d0480b7b4..201b96f0a 100644 --- a/src/lib_jit.c +++ b/src/lib_jit.c @@ -150,9 +150,50 @@ LJLIB_CF(jit_attach) LJLIB_CF(jit_prngstate) { - /* XXX Adapt to new pRNG algorithm. */ - int32_t cur = 0; - setintV(L->top++, cur); + GCtab *cur = lj_tab_new(L, 8, 0); + +#if LJ_HASJIT + jit_State *J = L2J(L); + + /* The old state. */ + for (int i = 1; i <= 4; i++) { + setintV(lj_tab_setint(L, cur, i*2-1), J->prng.u[i-1] & 0xffffffff); + setintV(lj_tab_setint(L, cur, i*2), J->prng.u[i-1] >> 32); + } + + /* We need to set new state using the input array. */ + if (L->base < L->top && !tvisnil(L->base)) { + GCtab *t = lj_lib_checktab(L, 1); + PRNGState prng; + int i = 1, len = lj_tab_len(t); + + /* The input array must have at most 8 elements. */ + if (len > 8) + lj_err_arg(L, 1, LJ_ERR_PRNGSTATE); + + for (i = 1; i <= len; i++) { + cTValue *v = lj_tab_getint(t, i); + + if (!tvisint(v) && (!tvisnum(v) || (double)(uint32_t)numV(v) != numV(v))) + lj_err_arg(L, 1, LJ_ERR_PRNGSTATE); + + if (i & 1) + prng.u[(i-1)/2] = numberVint(v); + else + prng.u[(i-1)/2] = prng.u[(i-1)/2] | ((uint64_t)numberVint(v) << 32); + } + + for (i /= 2; i < 4; i++) + prng.u[i] = 0; + + /* Re-initialize the JIT prng. */ + J->prng = prng; + } +#else + for (int i = 1; i <= 8; i++) + setintV(lj_tab_setint(L, cur, i), 0); +#endif + settabV(L, L->top++, cur); return 1; } diff --git a/src/lj_errmsg.h b/src/lj_errmsg.h index 9ac870ab5..065ae2909 100644 --- a/src/lj_errmsg.h +++ b/src/lj_errmsg.h @@ -108,6 +108,7 @@ ERRDEF(NOJIT, "no JIT compiler for this architecture (yet)") ERRDEF(NOJIT, "JIT compiler permanently disabled by build option") #endif ERRDEF(JITOPT, "unknown or malformed optimization flag " LUA_QS) +ERRDEF(PRNGSTATE, "PRNG state must be an array with up to 8 integers") /* Lexer/parser errors. */ ERRDEF(XMODE, "attempt to load chunk with wrong mode") diff --git a/src/lj_jit.h b/src/lj_jit.h index 9e0e49d16..974094c05 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h @@ -472,6 +472,8 @@ typedef struct jit_State { HotPenalty penalty[PENALTY_SLOTS]; /* Penalty slots. */ uint32_t penaltyslot; /* Round-robin index into penalty slots. */ + PRNGState prng; /* PRNG state for the JIT compiler, defaults to prng in + global_State. */ #ifdef LUAJIT_ENABLE_TABLE_BUMP RBCHashEntry rbchash[RBCHASH_SLOTS]; /* Reverse bytecode map. */ diff --git a/src/lj_mcode.c b/src/lj_mcode.c index a5153b25b..e9ebd168b 100644 --- a/src/lj_mcode.c +++ b/src/lj_mcode.c @@ -225,7 +225,7 @@ static void *mcode_alloc(jit_State *J, size_t sz) } /* Next try probing 64K-aligned pseudo-random addresses. */ do { - hint = lj_prng_u64(&J2G(J)->prng) & ((1u<prng) & ((1u<pc+1, lj_prng_u64(&J2G(J)->prng) & 15u); + hotcount_set(J2GG(J), J->pc+1, lj_prng_u64(&J->prng) & 15u); } lj_trace_err(J, LJ_TRERR_CUNROLL); } diff --git a/src/lj_trace.c b/src/lj_trace.c index 80dd4279c..ba4c95daa 100644 --- a/src/lj_trace.c +++ b/src/lj_trace.c @@ -312,6 +312,8 @@ void lj_trace_initstate(global_State *g) jit_State *J = G2J(g); TValue *tv; + J->prng = g->prng; + /* Initialize aligned SIMD constants. */ tv = LJ_KSIMD(J, LJ_KSIMD_ABS); tv[0].u64 = U64x(7fffffff,ffffffff); @@ -385,7 +387,7 @@ static void penalty_pc(jit_State *J, GCproto *pt, BCIns *pc, TraceError e) if (mref(J->penalty[i].pc, const BCIns) == pc) { /* Cache slot found? */ /* First try to bump its hotcount several times. */ val = ((uint32_t)J->penalty[i].val << 1) + - (lj_prng_u64(&J2G(J)->prng) & ((1u<prng) & ((1u< PENALTY_MAX) { blacklist_pc(pt, pc); /* Blacklist it, if that didn't help. */ return; diff --git a/t/prngstate.t b/t/prngstate.t index b2c5ec5c3..e28628a51 100644 --- a/t/prngstate.t +++ b/t/prngstate.t @@ -13,17 +13,27 @@ __DATA__ --- lua jit.off() -print(jit.prngstate()) -print(jit.prngstate(32)) -print(jit.prngstate(5617)) -print(jit.prngstate()) +function print_array(a) + local out = a[1] + for i=2,#a do + out = out.." "..tostring(a[i]) + end + print(out) +end + +jit.prngstate({32}) +print_array(jit.prngstate({56,1,7})) +print_array(jit.prngstate({423,432,432,423,56,867,35,5347})) +print_array(jit.prngstate()) +print_array(jit.prngstate({423,432,432,423,56,867,35,5347,452})) --- out -0 -0 -32 -5617 +32 0 0 0 0 0 0 0 +56 1 7 0 0 0 0 0 +423 432 432 423 56 867 35 5347 --- jv --- err +bad argument #1 to 'prngstate' (PRNG state must be an array with up to 8 integers) +--- exit: 1 @@ -32,7 +42,7 @@ print(jit.prngstate()) jit.opt.start("minstitch=100000", "hotloop=2") for i = 1, 50 do - jit.prngstate(i) + jit.prngstate({i}) end print('ok') --- out