diff --git a/src/hotspot/cpu/loongarch/c2_MacroAssembler_loongarch.cpp b/src/hotspot/cpu/loongarch/c2_MacroAssembler_loongarch.cpp index 436aac4031ddd..7adcad865f578 100644 --- a/src/hotspot/cpu/loongarch/c2_MacroAssembler_loongarch.cpp +++ b/src/hotspot/cpu/loongarch/c2_MacroAssembler_loongarch.cpp @@ -1129,213 +1129,125 @@ void C2_MacroAssembler::stringL_indexof_char(Register str1, Register cnt1, } // Compare strings, used for char[] and byte[]. -void C2_MacroAssembler::string_compare(Register str1, Register str2, - Register cnt1, Register cnt2, Register result, - int ae, Register tmp1, Register tmp2, - FloatRegister vtmp1, FloatRegister vtmp2) { - Label L, Loop, LoopEnd, HaveResult, Done, Loop_Start, - V_L, V_Loop, V_Result, V_Start; - - bool isLL = ae == StrIntrinsicNode::LL; - bool isLU = ae == StrIntrinsicNode::LU; - bool isUL = ae == StrIntrinsicNode::UL; - bool isUU = ae == StrIntrinsicNode::UU; - - bool str1_isL = isLL || isLU; - bool str2_isL = isLL || isUL; - - int charsInWord = isLL ? wordSize : wordSize/2; - int charsInFloatRegister = (UseLASX && (isLL||isUU))?(isLL? 32 : 16):(isLL? 16 : 8); - - if (!str1_isL) srli_w(cnt1, cnt1, 1); - if (!str2_isL) srli_w(cnt2, cnt2, 1); +void C2_MacroAssembler::string_compareL(Register str1, Register str2, + Register cnt1, Register cnt2, + Register result, + Register tmp1, Register tmp2, + FloatRegister vtmp1, + FloatRegister vtmp2) { + Label L, Loop, LoopEnd, HaveResult, Done, XV_Start, V_Start; // compute the difference of lengths (in result) - sub_d(result, cnt1, cnt2); // result holds the difference of two lengths + sub_d(result, cnt1, cnt2); // compute the shorter length (in cnt1) - bge(cnt2, cnt1, V_Start); + bge(cnt2, cnt1, XV_Start); move(cnt1, cnt2); + bind(XV_Start); + + // tiny string + li(AT, wordSize); + blt(cnt1, AT, LoopEnd); + + if (UseLSX) { + slli_d(tmp1, AT, 1); // less than 16 + blt(cnt1, tmp1, Loop); + } - bind(V_Start); // it is hard to apply the xvilvl to flate 16 bytes into 32 bytes, // so we employ the LASX only for the LL or UU StrIntrinsicNode. - if (UseLASX && (isLL || isUU)) { - ori(AT, R0, charsInFloatRegister); - addi_d(tmp1, R0, 16); - xvinsgr2vr_d(fscratch, R0, 0); + if (UseLASX) { + Label XV_L, XV_Loop, XV_Result; + + slli_d(tmp2, tmp1, 1); // less than 32 + blt(cnt1, tmp2, V_Start); + + li(tmp1, 16); + xvxor_v(fscratch, fscratch, fscratch); xvinsgr2vr_d(fscratch, tmp1, 2); + + bind(XV_Loop); + xvld(vtmp1, str1, 0); + xvld(vtmp2, str2, 0); + xvxor_v(vtmp1, vtmp1, vtmp2); + xvseteqz_v(FCC0, vtmp1); + bceqz(FCC0, XV_L); + + addi_d(cnt1, cnt1, -32); + addi_d(str1, str1, 32); + addi_d(str2, str2, 32); + bge(cnt1, tmp2, XV_Loop); + + // deal with the last loop + beqz(cnt1, Done); + addi_d(cnt1, cnt1, -32); + add_d(str1, str1, cnt1); + xvld(vtmp1, str1, 0); + add_d(str2, str2, cnt1); + xvld(vtmp2, str2, 0); + xvxor_v(vtmp1, vtmp1, vtmp2); + xvseteqz_v(FCC0, vtmp1); + bcnez(FCC0, Done); + + bind(XV_L); + xvxor_v(vtmp2, vtmp2, vtmp2); + xvabsd_b(vtmp1, vtmp1, vtmp2); + xvneg_b(vtmp1, vtmp1); + xvfrstp_b(vtmp2, vtmp1, fscratch); + xvpickve2gr_du(tmp1, vtmp2, 0); + addi_d(cnt2, R0, 16); + bne(tmp1, cnt2, XV_Result); + + xvpickve2gr_du(tmp1, vtmp2, 2); + addi_d(tmp1, tmp1, 16); + + // the index value was stored in tmp1 + bind(XV_Result); + ldx_bu(result, str1, tmp1); + ldx_bu(tmp2, str2, tmp1); + sub_d(result, result, tmp2); + b(Done); + } + + bind(V_Start); + if (UseLSX) { + Label V_L, V_Loop, V_Result; + bind(V_Loop); - blt(cnt1, AT, Loop_Start); - if (isLL) { - xvld(vtmp1, str1, 0); - xvld(vtmp2, str2, 0); - xvxor_v(vtmp1, vtmp1, vtmp2); - xvseteqz_v(FCC0, vtmp1); - bceqz(FCC0, V_L); - - addi_d(str1, str1, 32); - addi_d(str2, str2, 32); - addi_d(cnt1, cnt1, -charsInFloatRegister); - b(V_Loop); - - bind(V_L); - xvxor_v(vtmp2, vtmp2, vtmp2); - xvabsd_b(vtmp1, vtmp1, vtmp2); - xvneg_b(vtmp1, vtmp1); - xvfrstp_b(vtmp2, vtmp1, fscratch); - xvpickve2gr_du(tmp1, vtmp2, 0); - addi_d(cnt2, R0, 16); - bne(tmp1, cnt2, V_Result); - - xvpickve2gr_du(tmp1, vtmp2, 2); - addi_d(tmp1, tmp1, 16); - - // the index value was stored in tmp1 - bind(V_Result); - ldx_bu(result, str1, tmp1); - ldx_bu(tmp2, str2, tmp1); - sub_d(result, result, tmp2); - b(Done); - } else if (isUU) { - xvld(vtmp1, str1, 0); - xvld(vtmp2, str2, 0); - xvxor_v(vtmp1, vtmp1, vtmp2); - xvseteqz_v(FCC0, vtmp1); - bceqz(FCC0, V_L); - - addi_d(str1, str1, 32); - addi_d(str2, str2, 32); - addi_d(cnt1, cnt1, -charsInFloatRegister); - b(V_Loop); - - bind(V_L); - xvxor_v(vtmp2, vtmp2, vtmp2); - xvabsd_h(vtmp1, vtmp1, vtmp2); - xvneg_h(vtmp1, vtmp1); - xvfrstp_h(vtmp2, vtmp1, fscratch); - xvpickve2gr_du(tmp1, vtmp2, 0); - addi_d(cnt2, R0, 8); - bne(tmp1, cnt2, V_Result); - - xvpickve2gr_du(tmp1, vtmp2, 2); - addi_d(tmp1, tmp1, 8); - - // the index value was stored in tmp1 - bind(V_Result); - slli_d(tmp1, tmp1, 1); - ldx_hu(result, str1, tmp1); - ldx_hu(tmp2, str2, tmp1); - sub_d(result, result, tmp2); - b(Done); - } - } else if (UseLSX) { - ori(AT, R0, charsInFloatRegister); - vxor_v(fscratch, fscratch, fscratch); - bind(V_Loop); - blt(cnt1, AT, Loop_Start); - if (isLL) { - vld(vtmp1, str1, 0); - vld(vtmp2, str2, 0); - vxor_v(vtmp1, vtmp1, vtmp2); - vseteqz_v(FCC0, vtmp1); - bceqz(FCC0, V_L); - - addi_d(str1, str1, 16); - addi_d(str2, str2, 16); - addi_d(cnt1, cnt1, -charsInFloatRegister); - b(V_Loop); - - bind(V_L); - vxor_v(vtmp2, vtmp2, vtmp2); - vabsd_b(vtmp1, vtmp1, vtmp2); - vneg_b(vtmp1, vtmp1); - vfrstpi_b(vtmp2, vtmp1, 0); - vpickve2gr_bu(tmp1, vtmp2, 0); - - // the index value was stored in tmp1 - ldx_bu(result, str1, tmp1); - ldx_bu(tmp2, str2, tmp1); - sub_d(result, result, tmp2); - b(Done); - } else if (isLU) { - vld(vtmp1, str1, 0); - vld(vtmp2, str2, 0); - vilvl_b(vtmp1, fscratch, vtmp1); - vxor_v(vtmp1, vtmp1, vtmp2); - vseteqz_v(FCC0, vtmp1); - bceqz(FCC0, V_L); - - addi_d(str1, str1, 8); - addi_d(str2, str2, 16); - addi_d(cnt1, cnt1, -charsInFloatRegister); - b(V_Loop); - - bind(V_L); - vxor_v(vtmp2, vtmp2, vtmp2); - vabsd_h(vtmp1, vtmp1, vtmp2); - vneg_h(vtmp1, vtmp1); - vfrstpi_h(vtmp2, vtmp1, 0); - vpickve2gr_bu(tmp1, vtmp2, 0); - - // the index value was stored in tmp1 - ldx_bu(result, str1, tmp1); - slli_d(tmp1, tmp1, 1); - ldx_hu(tmp2, str2, tmp1); - sub_d(result, result, tmp2); - b(Done); - } else if (isUL) { - vld(vtmp1, str1, 0); - vld(vtmp2, str2, 0); - vilvl_b(vtmp2, fscratch, vtmp2); - vxor_v(vtmp1, vtmp1, vtmp2); - vseteqz_v(FCC0, vtmp1); - bceqz(FCC0, V_L); - - addi_d(str1, str1, 16); - addi_d(str2, str2, 8); - addi_d(cnt1, cnt1, -charsInFloatRegister); - b(V_Loop); - - bind(V_L); - vxor_v(vtmp2, vtmp2, vtmp2); - vabsd_h(vtmp1, vtmp1, vtmp2); - vneg_h(vtmp1, vtmp1); - vfrstpi_h(vtmp2, vtmp1, 0); - vpickve2gr_bu(tmp1, vtmp2, 0); - - // the index value was stored in tmp1 - ldx_bu(tmp2, str2, tmp1); - slli_d(tmp1, tmp1, 1); - ldx_hu(result, str1, tmp1); - sub_d(result, result, tmp2); - b(Done); - } else if (isUU) { - vld(vtmp1, str1, 0); - vld(vtmp2, str2, 0); - vxor_v(vtmp1, vtmp1, vtmp2); - vseteqz_v(FCC0, vtmp1); - bceqz(FCC0, V_L); - - addi_d(str1, str1, 16); - addi_d(str2, str2, 16); - addi_d(cnt1, cnt1, -charsInFloatRegister); - b(V_Loop); - - bind(V_L); - vxor_v(vtmp2, vtmp2, vtmp2); - vabsd_h(vtmp1, vtmp1, vtmp2); - vneg_h(vtmp1, vtmp1); - vfrstpi_h(vtmp2, vtmp1, 0); - vpickve2gr_bu(tmp1, vtmp2, 0); - - // the index value was stored in tmp1 - slli_d(tmp1, tmp1, 1); - ldx_hu(result, str1, tmp1); - ldx_hu(tmp2, str2, tmp1); - sub_d(result, result, tmp2); - b(Done); - } + vld(vtmp1, str1, 0); + vld(vtmp2, str2, 0); + vxor_v(vtmp1, vtmp1, vtmp2); + vseteqz_v(FCC0, vtmp1); + bceqz(FCC0, V_L); + + addi_d(cnt1, cnt1, -16); + addi_d(str1, str1, 16); + addi_d(str2, str2, 16); + bge(cnt1, tmp1, V_Loop); + + // deal with the last loop + beqz(cnt1, Done); + addi_d(cnt1, cnt1, -16); + add_d(str1, str1, cnt1); + vld(vtmp1, str1, 0); + add_d(str2, str2, cnt1); + vld(vtmp2, str2, 0); + vxor_v(vtmp1, vtmp1, vtmp2); + vseteqz_v(FCC0, vtmp1); + bcnez(FCC0, Done); + + bind(V_L); + vxor_v(vtmp2, vtmp2, vtmp2); + vabsd_b(vtmp1, vtmp1, vtmp2); + vneg_b(vtmp1, vtmp1); + vfrstpi_b(vtmp2, vtmp1, 0); + vpickve2gr_bu(tmp1, vtmp2, 0); + + // the index value was stored in tmp1 + ldx_bu(result, str1, tmp1); + ldx_bu(tmp2, str2, tmp1); + sub_d(result, result, tmp2); + b(Done); } // Now the shorter length is in cnt1 and cnt2 can be used as a tmp register @@ -1352,118 +1264,424 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2, // Fetch 0 to 7 bits of tmp1 and tmp2, subtract to get the result. // Other types are similar to isLL. - bind(Loop_Start); - ori(AT, R0, charsInWord); bind(Loop); + ld_d(tmp1, str1, 0); + ld_d(tmp2, str2, 0); + beq(tmp1, tmp2, L); + xorr(cnt2, tmp1, tmp2); + ctz_d(cnt2, cnt2); + andi(cnt2, cnt2, 0x38); + srl_d(tmp1, tmp1, cnt2); + srl_d(tmp2, tmp2, cnt2); + bstrpick_d(tmp1, tmp1, 7, 0); + bstrpick_d(tmp2, tmp2, 7, 0); + sub_d(result, tmp1, tmp2); + b(Done); + + bind(L); + addi_d(cnt1, cnt1, -8); + addi_d(str1, str1, 8); + addi_d(str2, str2, 8); + bge(cnt1, AT, Loop); + + // compare current character + bind(LoopEnd); + beqz(cnt1, Done); + ld_bu(tmp1, str1, 0); + ld_bu(tmp2, str2, 0); + bne(tmp1, tmp2, HaveResult); + addi_d(cnt1, cnt1, -1); + addi_d(str1, str1, 1); + addi_d(str2, str2, 1); + b(LoopEnd); + + bind(HaveResult); + sub_d(result, tmp1, tmp2); + + bind(Done); +} + +void C2_MacroAssembler::string_compareU(Register str1, Register str2, + Register cnt1, Register cnt2, + Register result, + Register tmp1, Register tmp2, + FloatRegister vtmp1, + FloatRegister vtmp2) { + Label L, Loop, LoopEnd, HaveResult, Done, XV_Start, V_Start; + + // compute the difference of lengths (in result) + srai_w(cnt1, cnt1, 1); + srai_w(cnt2, cnt2, 1); + sub_d(result, cnt1, cnt2); + + // compute the shorter length (in cnt1) + bge(cnt2, cnt1, XV_Start); + move(cnt1, cnt2); + bind(XV_Start); + + // tiny string + li(AT, wordSize/2); blt(cnt1, AT, LoopEnd); - if (isLL) { - ld_d(tmp1, str1, 0); - ld_d(tmp2, str2, 0); - beq(tmp1, tmp2, L); - xorr(cnt2, tmp1, tmp2); - ctz_d(cnt2, cnt2); - andi(cnt2, cnt2, 0x38); - srl_d(tmp1, tmp1, cnt2); - srl_d(tmp2, tmp2, cnt2); - bstrpick_d(tmp1, tmp1, 7, 0); - bstrpick_d(tmp2, tmp2, 7, 0); - sub_d(result, tmp1, tmp2); - b(Done); - bind(L); - addi_d(str1, str1, 8); - addi_d(str2, str2, 8); - addi_d(cnt1, cnt1, -charsInWord); - b(Loop); - } else if (isLU) { - ld_wu(cnt2, str1, 0); - andr(tmp1, R0, R0); - bstrins_d(tmp1, cnt2, 7, 0); - srli_d(cnt2, cnt2, 8); - bstrins_d(tmp1, cnt2, 23, 16); - srli_d(cnt2, cnt2, 8); - bstrins_d(tmp1, cnt2, 39, 32); - srli_d(cnt2, cnt2, 8); - bstrins_d(tmp1, cnt2, 55, 48); - ld_d(tmp2, str2, 0); - beq(tmp1, tmp2, L); - xorr(cnt2, tmp1, tmp2); - ctz_d(cnt2, cnt2); - andi(cnt2, cnt2, 0x30); - srl_d(tmp1, tmp1, cnt2); - srl_d(tmp2, tmp2, cnt2); - bstrpick_d(tmp1, tmp1, 15, 0); - bstrpick_d(tmp2, tmp2, 15, 0); - sub_d(result, tmp1, tmp2); + + if (UseLSX) { + slli_d(tmp1, AT, 1); // less than 8 + blt(cnt1, tmp1, Loop); + } + + // it is hard to apply the xvilvl to flate 16 bytes into 32 bytes, + // so we employ the LASX only for the LL or UU StrIntrinsicNode. + if (UseLASX) { + Label XV_L, XV_Loop, XV_Result; + + slli_d(tmp2, tmp1, 1); // less than 16 + blt(cnt1, tmp2, V_Start); + + li(tmp1, 16); + xvxor_v(fscratch, fscratch, fscratch); + xvinsgr2vr_d(fscratch, tmp1, 2); + + bind(XV_Loop); + xvld(vtmp1, str1, 0); + xvld(vtmp2, str2, 0); + xvxor_v(vtmp1, vtmp1, vtmp2); + xvseteqz_v(FCC0, vtmp1); + bceqz(FCC0, XV_L); + + addi_d(cnt1, cnt1, -16); + addi_d(str1, str1, 32); + addi_d(str2, str2, 32); + bge(cnt1, tmp2, XV_Loop); + + // deal with the last loop + beqz(cnt1, Done); + addi_d(cnt1, cnt1, -16); + alsl_d(str1, cnt1, str1, 0); + xvld(vtmp1, str1, 0); + alsl_d(str2, cnt1, str2, 0); + xvld(vtmp2, str2, 0); + xvxor_v(vtmp1, vtmp1, vtmp2); + xvseteqz_v(FCC0, vtmp1); + bcnez(FCC0, Done); + + bind(XV_L); + xvxor_v(vtmp2, vtmp2, vtmp2); + xvabsd_h(vtmp1, vtmp1, vtmp2); + xvneg_h(vtmp1, vtmp1); + xvfrstp_h(vtmp2, vtmp1, fscratch); + xvpickve2gr_du(tmp1, vtmp2, 0); + addi_d(cnt2, R0, 8); + bne(tmp1, cnt2, XV_Result); + + xvpickve2gr_du(tmp1, vtmp2, 2); + addi_d(tmp1, tmp1, 8); + + // the index value was stored in tmp1 + bind(XV_Result); + slli_d(tmp1, tmp1, 1); + ldx_hu(result, str1, tmp1); + ldx_hu(tmp2, str2, tmp1); + sub_d(result, result, tmp2); b(Done); - bind(L); - addi_d(str1, str1, 4); - addi_d(str2, str2, 8); - addi_d(cnt1, cnt1, -charsInWord); - b(Loop); - } else if (isUL) { - ld_wu(cnt2, str2, 0); - andr(tmp2, R0, R0); - bstrins_d(tmp2, cnt2, 7, 0); - srli_d(cnt2, cnt2, 8); - bstrins_d(tmp2, cnt2, 23, 16); - srli_d(cnt2, cnt2, 8); - bstrins_d(tmp2, cnt2, 39, 32); - srli_d(cnt2, cnt2, 8); - bstrins_d(tmp2, cnt2, 55, 48); - ld_d(tmp1, str1, 0); - beq(tmp1, tmp2, L); - xorr(cnt2, tmp1, tmp2); - ctz_d(cnt2, cnt2); - andi(cnt2, cnt2, 0x30); - srl_d(tmp1, tmp1, cnt2); - srl_d(tmp2, tmp2, cnt2); - bstrpick_d(tmp1, tmp1, 15, 0); - bstrpick_d(tmp2, tmp2, 15, 0); - sub_d(result, tmp1, tmp2); + } + + bind(V_Start); + if (UseLSX) { + Label V_L, V_Loop, V_Result; + + bind(V_Loop); + vld(vtmp1, str1, 0); + vld(vtmp2, str2, 0); + vxor_v(vtmp1, vtmp1, vtmp2); + vseteqz_v(FCC0, vtmp1); + bceqz(FCC0, V_L); + + addi_d(cnt1, cnt1, -8); + addi_d(str1, str1, 16); + addi_d(str2, str2, 16); + bge(cnt1, tmp1, V_Loop); + + // deal with the last loop + beqz(cnt1, Done); + addi_d(cnt1, cnt1, -8); + alsl_d(str1, cnt1, str1, 0); + vld(vtmp1, str1, 0); + alsl_d(str2, cnt1, str2, 0); + vld(vtmp2, str2, 0); + vxor_v(vtmp1, vtmp1, vtmp2); + vseteqz_v(FCC0, vtmp1); + bcnez(FCC0, Done); + + bind(V_L); + vxor_v(vtmp2, vtmp2, vtmp2); + vabsd_h(vtmp1, vtmp1, vtmp2); + vneg_h(vtmp1, vtmp1); + vfrstpi_h(vtmp2, vtmp1, 0); + vpickve2gr_bu(tmp1, vtmp2, 0); + + // the index value was stored in tmp1 + slli_d(tmp1, tmp1, 1); + ldx_hu(result, str1, tmp1); + ldx_hu(tmp2, str2, tmp1); + sub_d(result, result, tmp2); b(Done); - bind(L); + } + + bind(Loop); + ld_d(tmp1, str1, 0); + ld_d(tmp2, str2, 0); + beq(tmp1, tmp2, L); + xorr(cnt2, tmp1, tmp2); + ctz_d(cnt2, cnt2); + andi(cnt2, cnt2, 0x30); + srl_d(tmp1, tmp1, cnt2); + srl_d(tmp2, tmp2, cnt2); + bstrpick_d(tmp1, tmp1, 15, 0); + bstrpick_d(tmp2, tmp2, 15, 0); + sub_d(result, tmp1, tmp2); + b(Done); + + bind(L); + addi_d(cnt1, cnt1, -4); + addi_d(str1, str1, 8); + addi_d(str2, str2, 8); + bge(cnt1, AT, Loop); + + // compare current character + bind(LoopEnd); + beqz(cnt1, Done); + ld_hu(tmp1, str1, 0); + ld_hu(tmp2, str2, 0); + bne(tmp1, tmp2, HaveResult); + addi_d(cnt1, cnt1, -1); + addi_d(str1, str1, 2); + addi_d(str2, str2, 2); + b(LoopEnd); + + bind(HaveResult); + sub_d(result, tmp1, tmp2); + + bind(Done); +} + +void C2_MacroAssembler::string_compareLU(Register str1, Register str2, + Register cnt1, Register cnt2, + Register result, + Register tmp1, Register tmp2, + FloatRegister vtmp1, + FloatRegister vtmp2) { + Label L, Loop, LoopEnd, HaveResult, Done, V_Start; + + + // compute the difference of lengths (in result) + srai_w(cnt2, cnt2, 1); + sub_d(result, cnt1, cnt2); + + // compute the shorter length (in cnt1) + bge(cnt2, cnt1, V_Start); + move(cnt1, cnt2); + bind(V_Start); + + // tiny string + li(AT, wordSize/2); + blt(cnt1, AT, LoopEnd); + + if (UseLSX) { + slli_d(tmp1, AT, 1); // less than 8 + blt(cnt1, tmp1, Loop); + + Label V_L, V_Loop, V_Result; + + vxor_v(fscratch, fscratch, fscratch); + + bind(V_Loop); + vld(vtmp1, str1, 0); + vld(vtmp2, str2, 0); + vilvl_b(vtmp1, fscratch, vtmp1); + vxor_v(vtmp1, vtmp1, vtmp2); + vseteqz_v(FCC0, vtmp1); + bceqz(FCC0, V_L); + + addi_d(cnt1, cnt1, -8); addi_d(str1, str1, 8); - addi_d(str2, str2, 4); - addi_d(cnt1, cnt1, -charsInWord); - b(Loop); - } else { // isUU - ld_d(tmp1, str1, 0); - ld_d(tmp2, str2, 0); - beq(tmp1, tmp2, L); - xorr(cnt2, tmp1, tmp2); - ctz_d(cnt2, cnt2); - andi(cnt2, cnt2, 0x30); - srl_d(tmp1, tmp1, cnt2); - srl_d(tmp2, tmp2, cnt2); - bstrpick_d(tmp1, tmp1, 15, 0); - bstrpick_d(tmp2, tmp2, 15, 0); - sub_d(result, tmp1, tmp2); + addi_d(str2, str2, 16); + bge(cnt1, tmp1, V_Loop); + + // deal with the last loop + beqz(cnt1, Done); + addi_d(cnt1, cnt1, -8); + add_d(str1, str1, cnt1); + vld(vtmp1, str1, 0); + alsl_d(str2, cnt1, str2, 0); + vld(vtmp2, str2, 0); + vilvl_b(vtmp1, fscratch, vtmp1); + vxor_v(vtmp1, vtmp1, vtmp2); + vseteqz_v(FCC0, vtmp1); + bcnez(FCC0, Done); + + bind(V_L); + vxor_v(vtmp2, vtmp2, vtmp2); + vabsd_h(vtmp1, vtmp1, vtmp2); + vneg_h(vtmp1, vtmp1); + vfrstpi_h(vtmp2, vtmp1, 0); + vpickve2gr_bu(tmp1, vtmp2, 0); + + // the index value was stored in tmp1 + ldx_bu(result, str1, tmp1); + slli_d(tmp1, tmp1, 1); + ldx_hu(tmp2, str2, tmp1); + sub_d(result, result, tmp2); b(Done); - bind(L); - addi_d(str1, str1, 8); - addi_d(str2, str2, 8); - addi_d(cnt1, cnt1, -charsInWord); - b(Loop); } + bind(Loop); + ld_wu(cnt2, str1, 0); + andr(tmp1, R0, R0); + bstrins_d(tmp1, cnt2, 7, 0); + srli_d(cnt2, cnt2, 8); + bstrins_d(tmp1, cnt2, 23, 16); + srli_d(cnt2, cnt2, 8); + bstrins_d(tmp1, cnt2, 39, 32); + srli_d(cnt2, cnt2, 8); + bstrins_d(tmp1, cnt2, 55, 48); + ld_d(tmp2, str2, 0); + beq(tmp1, tmp2, L); + xorr(cnt2, tmp1, tmp2); + ctz_d(cnt2, cnt2); + andi(cnt2, cnt2, 0x30); + srl_d(tmp1, tmp1, cnt2); + srl_d(tmp2, tmp2, cnt2); + bstrpick_d(tmp1, tmp1, 15, 0); + bstrpick_d(tmp2, tmp2, 15, 0); + sub_d(result, tmp1, tmp2); + b(Done); + bind(L); + addi_d(cnt1, cnt1, -4); + addi_d(str1, str1, 4); + addi_d(str2, str2, 8); + bge(cnt1, AT, Loop); + + // compare current character bind(LoopEnd); beqz(cnt1, Done); - if (str1_isL) { - ld_bu(tmp1, str1, 0); - } else { - ld_hu(tmp1, str1, 0); + ld_bu(tmp1, str1, 0); + ld_hu(tmp2, str2, 0); + bne(tmp1, tmp2, HaveResult); + addi_d(cnt1, cnt1, -1); + addi_d(str1, str1, 1); + addi_d(str2, str2, 2); + b(LoopEnd); + + bind(HaveResult); + sub_d(result, tmp1, tmp2); + + bind(Done); +} + +void C2_MacroAssembler::string_compareUL(Register str1, Register str2, + Register cnt1, Register cnt2, + Register result, + Register tmp1, Register tmp2, + FloatRegister vtmp1, + FloatRegister vtmp2) { + Label L, Loop, LoopEnd, HaveResult, Done, V_Start; + + // compute the difference of lengths (in result) + srai_w(cnt1, cnt1, 1); + sub_d(result, cnt1, cnt2); + + // compute the shorter length (in cnt1) + bge(cnt2, cnt1, V_Start); + move(cnt1, cnt2); + bind(V_Start); + + // tiny string + li(AT, wordSize/2); + blt(cnt1, AT, LoopEnd); + + if (UseLSX) { + slli_d(tmp1, AT, 1); // less than 8 + blt(cnt1, tmp1, Loop); + + Label V_L, V_Loop, V_Result; + + vxor_v(fscratch, fscratch, fscratch); + + bind(V_Loop); + vld(vtmp2, str2, 0); + vld(vtmp1, str1, 0); + vilvl_b(vtmp2, fscratch, vtmp2); + vxor_v(vtmp1, vtmp1, vtmp2); + vseteqz_v(FCC0, vtmp1); + bceqz(FCC0, V_L); + + addi_d(cnt1, cnt1, -8); + addi_d(str1, str1, 16); + addi_d(str2, str2, 8); + bge(cnt1, tmp1, V_Loop); + + // deal with the last loop + beqz(cnt1, Done); + addi_d(cnt1, cnt1, -8); + add_d(str2, str2, cnt1); + vld(vtmp2, str2, 0); + alsl_d(str1, cnt1, str1, 0); + vld(vtmp1, str1, 0); + vilvl_b(vtmp2, fscratch, vtmp2); + vxor_v(vtmp1, vtmp1, vtmp2); + vseteqz_v(FCC0, vtmp1); + bcnez(FCC0, Done); + + bind(V_L); + vxor_v(vtmp2, vtmp2, vtmp2); + vabsd_h(vtmp1, vtmp1, vtmp2); + vneg_h(vtmp1, vtmp1); + vfrstpi_h(vtmp2, vtmp1, 0); + vpickve2gr_bu(tmp1, vtmp2, 0); + + // the index value was stored in tmp1 + ldx_bu(tmp2, str2, tmp1); + slli_d(tmp1, tmp1, 1); + ldx_hu(result, str1, tmp1); + sub_d(result, result, tmp2); + b(Done); } + bind(Loop); + ld_wu(cnt2, str2, 0); + andr(tmp2, R0, R0); + bstrins_d(tmp2, cnt2, 7, 0); + srli_d(cnt2, cnt2, 8); + bstrins_d(tmp2, cnt2, 23, 16); + srli_d(cnt2, cnt2, 8); + bstrins_d(tmp2, cnt2, 39, 32); + srli_d(cnt2, cnt2, 8); + bstrins_d(tmp2, cnt2, 55, 48); + ld_d(tmp1, str1, 0); + beq(tmp1, tmp2, L); + xorr(cnt2, tmp1, tmp2); + ctz_d(cnt2, cnt2); + andi(cnt2, cnt2, 0x30); + srl_d(tmp1, tmp1, cnt2); + srl_d(tmp2, tmp2, cnt2); + bstrpick_d(tmp1, tmp1, 15, 0); + bstrpick_d(tmp2, tmp2, 15, 0); + sub_d(result, tmp1, tmp2); + b(Done); + bind(L); + addi_d(cnt1, cnt1, -4); + addi_d(str1, str1, 8); + addi_d(str2, str2, 4); + bge(cnt1, AT, Loop); + // compare current character - if (str2_isL) { - ld_bu(tmp2, str2, 0); - } else { - ld_hu(tmp2, str2, 0); - } + bind(LoopEnd); + beqz(cnt1, Done); + ld_hu(tmp1, str1, 0); + ld_bu(tmp2, str2, 0); bne(tmp1, tmp2, HaveResult); - addi_d(str1, str1, str1_isL ? 1 : 2); - addi_d(str2, str2, str2_isL ? 1 : 2); addi_d(cnt1, cnt1, -1); + addi_d(str1, str1, 2); + addi_d(str2, str2, 1); b(LoopEnd); bind(HaveResult); diff --git a/src/hotspot/cpu/loongarch/c2_MacroAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/c2_MacroAssembler_loongarch.hpp index 41f814b787ae2..8e4f0150897e7 100644 --- a/src/hotspot/cpu/loongarch/c2_MacroAssembler_loongarch.hpp +++ b/src/hotspot/cpu/loongarch/c2_MacroAssembler_loongarch.hpp @@ -44,10 +44,26 @@ Register tmp1, Register tmp2, Register tmp3); // Compare strings. - void string_compare(Register str1, Register str2, - Register cnt1, Register cnt2, Register result, - int ae, Register tmp1, Register tmp2, - FloatRegister vtmp1, FloatRegister vtmp2); + void string_compareL(Register str1, Register str2, + Register cnt1, Register cnt2, + Register result, + Register tmp1, Register tmp2, + FloatRegister vtmp1, FloatRegister vtmp2); + void string_compareU(Register str1, Register str2, + Register cnt1, Register cnt2, + Register result, + Register tmp1, Register tmp2, + FloatRegister vtmp1, FloatRegister vtmp2); + void string_compareLU(Register str1, Register str2, + Register cnt1, Register cnt2, + Register result, + Register tmp1, Register tmp2, + FloatRegister vtmp1, FloatRegister vtmp2); + void string_compareUL(Register str1, Register str2, + Register cnt1, Register cnt2, + Register result, + Register tmp1, Register tmp2, + FloatRegister vtmp1, FloatRegister vtmp2); // Find index of char in Latin-1 string void stringL_indexof_char(Register str1, Register cnt1, diff --git a/src/hotspot/cpu/loongarch/jvmciCodeInstaller_loongarch.cpp b/src/hotspot/cpu/loongarch/jvmciCodeInstaller_loongarch.cpp index 8679369b78f99..9235eda44c599 100644 --- a/src/hotspot/cpu/loongarch/jvmciCodeInstaller_loongarch.cpp +++ b/src/hotspot/cpu/loongarch/jvmciCodeInstaller_loongarch.cpp @@ -1,6 +1,6 @@ /* * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2022, 2023, Loongson Technology. All rights reserved. + * Copyright (c) 2022, 2024, Loongson Technology. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -92,7 +92,7 @@ void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, HotSpotCompiledCod void CodeInstaller::pd_patch_DataSectionReference(int pc_offset, int data_offset, JVMCI_TRAPS) { address pc = _instructions->start() + pc_offset; NativeInstruction* inst = nativeInstruction_at(pc); - if (inst->is_pcaddu12i_add()) { + if (inst->is_pcaddi()) { address dest = _constants->start() + data_offset; _instructions->relocate(pc, section_word_Relocation::spec((address) dest, CodeBuffer::SECT_CONSTS)); JVMCI_event_3("relocating at " PTR_FORMAT " (+%d) with destination at %d", p2i(pc), pc_offset, data_offset); diff --git a/src/hotspot/cpu/loongarch/loongarch_64.ad b/src/hotspot/cpu/loongarch/loongarch_64.ad index 1825999d11b89..a9275ba04c3f8 100644 --- a/src/hotspot/cpu/loongarch/loongarch_64.ad +++ b/src/hotspot/cpu/loongarch/loongarch_64.ad @@ -3230,6 +3230,15 @@ operand mA2RegI() %{ interface(REG_INTER); %} +operand mA4RegI() %{ + constraint(ALLOC_IN_RC(a4_reg)); + match(RegI); + match(mRegI); + + format %{ "A4" %} + interface(REG_INTER); +%} + operand mA5RegI() %{ constraint(ALLOC_IN_RC(a5_reg)); match(RegI); @@ -3355,6 +3364,16 @@ operand a2_RegP() interface(REG_INTER); %} +operand a3_RegP() +%{ + constraint(ALLOC_IN_RC(a3_long_reg)); + match(RegP); + match(mRegP); + + format %{ %} + interface(REG_INTER); +%} + operand a4_RegP() %{ constraint(ALLOC_IN_RC(a4_long_reg)); @@ -8008,65 +8027,93 @@ instruct clear_array(a2RegL cnt, a0_RegP base, Universe dummy, a1RegL value) %{ ins_pipe( pipe_slow ); %} -instruct string_compareL(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2, mRegI result, mRegL tmp1, mRegL tmp2, regF vtmp1, regF vtmp2) %{ +instruct string_compareL(a1_RegP str1, mA2RegI cnt1, a3_RegP str2, mA4RegI cnt2, + mA0RegI result, mRegL tmp1, mRegL tmp2, + regF vtmp1, regF vtmp2) %{ predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); - effect(TEMP_DEF result, TEMP tmp1, TEMP tmp2, TEMP vtmp1, TEMP vtmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2); + effect(TEMP_DEF result, TEMP tmp1, TEMP tmp2, TEMP vtmp1, TEMP vtmp2, + USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2); + + format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] " + "tmp1:$tmp1, tmp2:$tmp2, vtmp1:$vtmp1, vtmp2:$vtmp2 -> " + "$result @ string_compareL" %} - format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] tmp1:$tmp1, tmp2:$tmp2, vtmp1:$vtmp1, vtmp2:$vtmp2 -> $result @ string_compareL" %} ins_encode %{ - __ string_compare($str1$$Register, $str2$$Register, - $cnt1$$Register, $cnt2$$Register, $result$$Register, - StrIntrinsicNode::LL, $tmp1$$Register, $tmp2$$Register, - $vtmp1$$FloatRegister, $vtmp2$$FloatRegister); + __ string_compareL($str1$$Register, $str2$$Register, + $cnt1$$Register, $cnt2$$Register, + $result$$Register, + $tmp1$$Register, $tmp2$$Register, + $vtmp1$$FloatRegister, $vtmp2$$FloatRegister); %} ins_pipe( pipe_slow ); %} -instruct string_compareU(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2, mRegI result, mRegL tmp1, mRegL tmp2, regF vtmp1, regF vtmp2) %{ +instruct string_compareU(a1_RegP str1, mA2RegI cnt1, a3_RegP str2, mA4RegI cnt2, + mA0RegI result, mRegL tmp1, mRegL tmp2, + regF vtmp1, regF vtmp2) %{ predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); - effect(TEMP_DEF result, TEMP tmp1, TEMP tmp2, TEMP vtmp1, TEMP vtmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2); + effect(TEMP_DEF result, TEMP tmp1, TEMP tmp2, TEMP vtmp1, TEMP vtmp2, + USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2); + + format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] " + "tmp1:$tmp1, tmp2:$tmp2, vtmp1:$vtmp1, vtmp2:$vtmp2 -> " + "$result @ string_compareU" %} - format %{ "String Compare char[] $str1[len: $cnt1], $str2[len: $cnt2] tmp1:$tmp1, tmp2:$tmp2, vtmp1:$vtmp1, vtmp2:$vtmp2 -> $result @ string_compareU" %} ins_encode %{ - __ string_compare($str1$$Register, $str2$$Register, - $cnt1$$Register, $cnt2$$Register, $result$$Register, - StrIntrinsicNode::UU, $tmp1$$Register, $tmp2$$Register, - $vtmp1$$FloatRegister, $vtmp2$$FloatRegister); + __ string_compareU($str1$$Register, $str2$$Register, + $cnt1$$Register, $cnt2$$Register, + $result$$Register, + $tmp1$$Register, $tmp2$$Register, + $vtmp1$$FloatRegister, $vtmp2$$FloatRegister); %} ins_pipe( pipe_slow ); %} -instruct string_compareLU(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2, mRegI result, mRegL tmp1, mRegL tmp2, regF vtmp1, regF vtmp2) %{ +instruct string_compareLU(a1_RegP str1, mA2RegI cnt1, a3_RegP str2, mA4RegI cnt2, + mA0RegI result, mRegL tmp1, mRegL tmp2, + regF vtmp1, regF vtmp2) %{ predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); - effect(TEMP_DEF result, TEMP tmp1, TEMP tmp2, TEMP vtmp1, TEMP vtmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2); + effect(TEMP_DEF result, TEMP tmp1, TEMP tmp2, TEMP vtmp1, TEMP vtmp2, + USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2); + + format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] " + "tmp1:$tmp1, tmp2:$tmp2, vtmp1:$vtmp1, vtmp2:$vtmp2 -> " + "$result @ string_compareLU" %} - format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] tmp1:$tmp1, tmp2:$tmp2, vtmp1:$vtmp1, vtmp2:$vtmp2 -> $result @ string_compareLU" %} ins_encode %{ - __ string_compare($str1$$Register, $str2$$Register, - $cnt1$$Register, $cnt2$$Register, $result$$Register, - StrIntrinsicNode::LU, $tmp1$$Register, $tmp2$$Register, - $vtmp1$$FloatRegister, $vtmp2$$FloatRegister); + __ string_compareLU($str1$$Register, $str2$$Register, + $cnt1$$Register, $cnt2$$Register, + $result$$Register, + $tmp1$$Register, $tmp2$$Register, + $vtmp1$$FloatRegister, $vtmp2$$FloatRegister); %} ins_pipe( pipe_slow ); %} -instruct string_compareUL(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2, mRegI result, mRegL tmp1, mRegL tmp2, regF vtmp1, regF vtmp2) %{ +instruct string_compareUL(a1_RegP str1, mA2RegI cnt1, a3_RegP str2, mA4RegI cnt2, + mA0RegI result, mRegL tmp1, mRegL tmp2, + regF vtmp1, regF vtmp2) %{ predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); - effect(TEMP_DEF result, TEMP tmp1, TEMP tmp2, TEMP vtmp1, TEMP vtmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2); + effect(TEMP_DEF result, TEMP tmp1, TEMP tmp2, TEMP vtmp1, TEMP vtmp2, + USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2); + + format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] " + "tmp1:$tmp1, tmp2:$tmp2, vtmp1:$vtmp1, vtmp2:$vtmp2 -> " + "$result @ string_compareUL" %} - format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] tmp1:$tmp1, tmp2:$tmp2, vtmp1:$vtmp1, vtmp2:$vtmp2 -> $result @ string_compareUL" %} ins_encode %{ - __ string_compare($str1$$Register, $str2$$Register, - $cnt1$$Register, $cnt2$$Register, $result$$Register, - StrIntrinsicNode::UL, $tmp1$$Register, $tmp2$$Register, - $vtmp1$$FloatRegister, $vtmp2$$FloatRegister); + __ string_compareUL($str1$$Register, $str2$$Register, + $cnt1$$Register, $cnt2$$Register, + $result$$Register, + $tmp1$$Register, $tmp2$$Register, + $vtmp1$$FloatRegister, $vtmp2$$FloatRegister); %} ins_pipe( pipe_slow ); diff --git a/src/hotspot/cpu/loongarch/nativeInst_loongarch.cpp b/src/hotspot/cpu/loongarch/nativeInst_loongarch.cpp index cc64142a1783a..c67984e5c2ff1 100644 --- a/src/hotspot/cpu/loongarch/nativeInst_loongarch.cpp +++ b/src/hotspot/cpu/loongarch/nativeInst_loongarch.cpp @@ -66,6 +66,10 @@ bool NativeInstruction::is_lu12iw_lu32id() const { Assembler::high(int_at(4), 7) == Assembler::lu32i_d_op; } +bool NativeInstruction::is_pcaddi() const { + return Assembler::high(int_at(0), 7) == Assembler::pcaddi_op; +} + bool NativeInstruction::is_pcaddu12i_add() const { return Assembler::high(int_at(0), 7) == Assembler::pcaddu12i_op && Assembler::high(int_at(4), 10) == Assembler::addi_d_op; diff --git a/src/hotspot/cpu/loongarch/nativeInst_loongarch.hpp b/src/hotspot/cpu/loongarch/nativeInst_loongarch.hpp index 1246c7c0aa52e..e8fc94a1f5832 100644 --- a/src/hotspot/cpu/loongarch/nativeInst_loongarch.hpp +++ b/src/hotspot/cpu/loongarch/nativeInst_loongarch.hpp @@ -75,6 +75,7 @@ class NativeInstruction { // Helper func for jvmci bool is_lu12iw_lu32id() const; bool is_pcaddu12i_add() const; + bool is_pcaddi() const; // LoongArch has no instruction to generate a illegal instruction exception? // But `break 11` is not illegal instruction for LoongArch. diff --git a/src/hotspot/os_cpu/linux_loongarch/amcas_asm.h b/src/hotspot/os_cpu/linux_loongarch/amcas_asm.h deleted file mode 100644 index 305974a178f94..0000000000000 --- a/src/hotspot/os_cpu/linux_loongarch/amcas_asm.h +++ /dev/null @@ -1,167 +0,0 @@ -/* - * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2023, Loongson Technology. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#ifndef __AMCAS_ASM_H__ -#define __AMCAS_ASM_H__ - asm( - ".macro parse_r var r \n\t" - "\\var = -1 \n\t" - ".ifc \\r, $r0 \n\t" - "\\var = 0 \n\t" - ".endif \n\t" - ".ifc \\r, $r1 \n\t" - "\\var = 1 \n\t" - ".endif \n\t" - ".ifc \\r, $r2 \n\t" - "\\var = 2 \n\t" - ".endif \n\t" - ".ifc \\r, $r3 \n\t" - "\\var = 3 \n\t" - ".endif \n\t" - ".ifc \\r, $r4 \n\t" - "\\var = 4 \n\t" - ".endif \n\t" - ".ifc \\r, $r5 \n\t" - "\\var = 5 \n\t" - ".endif \n\t" - ".ifc \\r, $r6 \n\t" - "\\var = 6 \n\t" - ".endif \n\t" - ".ifc \\r, $r7 \n\t" - "\\var = 7 \n\t" - ".endif \n\t" - ".ifc \\r, $r8 \n\t" - "\\var = 8 \n\t" - ".endif \n\t" - ".ifc \\r, $r9 \n\t" - "\\var = 9 \n\t" - ".endif \n\t" - ".ifc \\r, $r10 \n\t" - "\\var = 10 \n\t" - ".endif \n\t" - ".ifc \\r, $r11 \n\t" - "\\var = 11 \n\t" - ".endif \n\t" - ".ifc \\r, $r12 \n\t" - "\\var = 12 \n\t" - ".endif \n\t" - ".ifc \\r, $r13 \n\t" - "\\var = 13 \n\t" - ".endif \n\t" - ".ifc \\r, $r14 \n\t" - "\\var = 14 \n\t" - ".endif \n\t" - ".ifc \\r, $r15 \n\t" - "\\var = 15 \n\t" - ".endif \n\t" - ".ifc \\r, $r16 \n\t" - "\\var = 16 \n\t" - ".endif \n\t" - ".ifc \\r, $r17 \n\t" - "\\var = 17 \n\t" - ".endif \n\t" - ".ifc \\r, $r18 \n\t" - "\\var = 18 \n\t" - ".endif \n\t" - ".ifc \\r, $r19 \n\t" - "\\var = 19 \n\t" - ".endif \n\t" - ".ifc \\r, $r20 \n\t" - "\\var = 20 \n\t" - ".endif \n\t" - ".ifc \\r, $r21 \n\t" - "\\var = 21 \n\t" - ".endif \n\t" - ".ifc \\r, $r22 \n\t" - "\\var = 22 \n\t" - ".endif \n\t" - ".ifc \\r, $r23 \n\t" - "\\var = 23 \n\t" - ".endif \n\t" - ".ifc \\r, $r24 \n\t" - "\\var = 24 \n\t" - ".endif \n\t" - ".ifc \\r, $r25 \n\t" - "\\var = 25 \n\t" - ".endif \n\t" - ".ifc \\r, $r26 \n\t" - "\\var = 26 \n\t" - ".endif \n\t" - ".ifc \\r, $r27 \n\t" - "\\var = 27 \n\t" - ".endif \n\t" - ".ifc \\r, $r28 \n\t" - "\\var = 28 \n\t" - ".endif \n\t" - ".ifc \\r, $r29 \n\t" - "\\var = 29 \n\t" - ".endif \n\t" - ".ifc \\r, $r30 \n\t" - "\\var = 30 \n\t" - ".endif \n\t" - ".ifc \\r, $r31 \n\t" - "\\var = 31 \n\t" - ".endif \n\t" - ".iflt \\var \n\t" - ".error \n\t" - ".endif \n\t" - ".endm \n\t" - - ".macro amcas_w rd, rk, rj \n\t" - "parse_r d, \\rd \n\t" - "parse_r j, \\rj \n\t" - "parse_r k, \\rk \n\t" - ".word ((0b00111000010110010 << 15) | (k << 10) | (j << 5) | d) \n\t" - ".endm \n\t" - - ".macro amcas_d rd, rk, rj \n\t" - "parse_r d, \\rd \n\t" - "parse_r j, \\rj \n\t" - "parse_r k, \\rk \n\t" - ".word ((0b00111000010110011 << 15) | (k << 10) | (j << 5) | d) \n\t" - ".endm \n\t" - - ".macro amcas_db_b rd, rk, rj \n\t" - "parse_r d, \\rd \n\t" - "parse_r j, \\rj \n\t" - "parse_r k, \\rk \n\t" - ".word ((0b00111000010110100 << 15) | (k << 10) | (j << 5) | d) \n\t" - ".endm \n\t" - - ".macro amcas_db_w rd, rk, rj \n\t" - "parse_r d, \\rd \n\t" - "parse_r j, \\rj \n\t" - "parse_r k, \\rk \n\t" - ".word ((0b00111000010110110 << 15) | (k << 10) | (j << 5) | d) \n\t" - ".endm \n\t" - - ".macro amcas_db_d rd, rk, rj \n\t" - "parse_r d, \\rd \n\t" - "parse_r j, \\rj \n\t" - "parse_r k, \\rk \n\t" - ".word ((0b00111000010110111 << 15) | (k << 10) | (j << 5) | d) \n\t" - ".endm \n\t" - ); -#endif /* __AMCAS_ASM_H__ */ diff --git a/src/hotspot/os_cpu/linux_loongarch/atomic_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/atomic_linux_loongarch.hpp index bb820b5e5ae81..f0c7a7e3a5da6 100644 --- a/src/hotspot/os_cpu/linux_loongarch/atomic_linux_loongarch.hpp +++ b/src/hotspot/os_cpu/linux_loongarch/atomic_linux_loongarch.hpp @@ -1,6 +1,6 @@ /* * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved. + * Copyright (c) 2015, 2024, Loongson Technology. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -27,10 +27,146 @@ #define OS_CPU_LINUX_LOONGARCH_ATOMIC_LINUX_LOONGARCH_HPP #include "runtime/vm_version.hpp" -#include "amcas_asm.h" // Implementation of class atomic +#define AMCAS_MACRO asm volatile ( \ + ".ifndef _ASM_ASMMACRO_ \n\t" \ + ".set _ASM_ASMMACRO_, 1 \n\t" \ + ".macro parse_r var r \n\t" \ + "\\var = -1 \n\t" \ + ".ifc \\r, $r0 \n\t" \ + "\\var = 0 \n\t" \ + ".endif \n\t" \ + ".ifc \\r, $r1 \n\t" \ + "\\var = 1 \n\t" \ + ".endif \n\t" \ + ".ifc \\r, $r2 \n\t" \ + "\\var = 2 \n\t" \ + ".endif \n\t" \ + ".ifc \\r, $r3 \n\t" \ + "\\var = 3 \n\t" \ + ".endif \n\t" \ + ".ifc \\r, $r4 \n\t" \ + "\\var = 4 \n\t" \ + ".endif \n\t" \ + ".ifc \\r, $r5 \n\t" \ + "\\var = 5 \n\t" \ + ".endif \n\t" \ + ".ifc \\r, $r6 \n\t" \ + "\\var = 6 \n\t" \ + ".endif \n\t" \ + ".ifc \\r, $r7 \n\t" \ + "\\var = 7 \n\t" \ + ".endif \n\t" \ + ".ifc \\r, $r8 \n\t" \ + "\\var = 8 \n\t" \ + ".endif \n\t" \ + ".ifc \\r, $r9 \n\t" \ + "\\var = 9 \n\t" \ + ".endif \n\t" \ + ".ifc \\r, $r10 \n\t" \ + "\\var = 10 \n\t" \ + ".endif \n\t" \ + ".ifc \\r, $r11 \n\t" \ + "\\var = 11 \n\t" \ + ".endif \n\t" \ + ".ifc \\r, $r12 \n\t" \ + "\\var = 12 \n\t" \ + ".endif \n\t" \ + ".ifc \\r, $r13 \n\t" \ + "\\var = 13 \n\t" \ + ".endif \n\t" \ + ".ifc \\r, $r14 \n\t" \ + "\\var = 14 \n\t" \ + ".endif \n\t" \ + ".ifc \\r, $r15 \n\t" \ + "\\var = 15 \n\t" \ + ".endif \n\t" \ + ".ifc \\r, $r16 \n\t" \ + "\\var = 16 \n\t" \ + ".endif \n\t" \ + ".ifc \\r, $r17 \n\t" \ + "\\var = 17 \n\t" \ + ".endif \n\t" \ + ".ifc \\r, $r18 \n\t" \ + "\\var = 18 \n\t" \ + ".endif \n\t" \ + ".ifc \\r, $r19 \n\t" \ + "\\var = 19 \n\t" \ + ".endif \n\t" \ + ".ifc \\r, $r20 \n\t" \ + "\\var = 20 \n\t" \ + ".endif \n\t" \ + ".ifc \\r, $r21 \n\t" \ + "\\var = 21 \n\t" \ + ".endif \n\t" \ + ".ifc \\r, $r22 \n\t" \ + "\\var = 22 \n\t" \ + ".endif \n\t" \ + ".ifc \\r, $r23 \n\t" \ + "\\var = 23 \n\t" \ + ".endif \n\t" \ + ".ifc \\r, $r24 \n\t" \ + "\\var = 24 \n\t" \ + ".endif \n\t" \ + ".ifc \\r, $r25 \n\t" \ + "\\var = 25 \n\t" \ + ".endif \n\t" \ + ".ifc \\r, $r26 \n\t" \ + "\\var = 26 \n\t" \ + ".endif \n\t" \ + ".ifc \\r, $r27 \n\t" \ + "\\var = 27 \n\t" \ + ".endif \n\t" \ + ".ifc \\r, $r28 \n\t" \ + "\\var = 28 \n\t" \ + ".endif \n\t" \ + ".ifc \\r, $r29 \n\t" \ + "\\var = 29 \n\t" \ + ".endif \n\t" \ + ".ifc \\r, $r30 \n\t" \ + "\\var = 30 \n\t" \ + ".endif \n\t" \ + ".ifc \\r, $r31 \n\t" \ + "\\var = 31 \n\t" \ + ".endif \n\t" \ + ".iflt \\var \n\t" \ + ".endif \n\t" \ + ".endm \n\t" \ + ".macro amcas_w rd, rk, rj \n\t" \ + "parse_r d, \\rd \n\t" \ + "parse_r j, \\rj \n\t" \ + "parse_r k, \\rk \n\t" \ + ".word ((0b00111000010110010 << 15) | (k << 10) | (j << 5) | d) \n\t" \ + ".endm \n\t" \ + ".macro amcas_d rd, rk, rj \n\t" \ + "parse_r d, \\rd \n\t" \ + "parse_r j, \\rj \n\t" \ + "parse_r k, \\rk \n\t" \ + ".word ((0b00111000010110011 << 15) | (k << 10) | (j << 5) | d) \n\t" \ + ".endm \n\t" \ + ".macro amcas_db_b rd, rk, rj \n\t" \ + "parse_r d, \\rd \n\t" \ + "parse_r j, \\rj \n\t" \ + "parse_r k, \\rk \n\t" \ + ".word ((0b00111000010110100 << 15) | (k << 10) | (j << 5) | d) \n\t" \ + ".endm \n\t" \ + ".macro amcas_db_w rd, rk, rj \n\t" \ + "parse_r d, \\rd \n\t" \ + "parse_r j, \\rj \n\t" \ + "parse_r k, \\rk \n\t" \ + ".word ((0b00111000010110110 << 15) | (k << 10) | (j << 5) | d) \n\t" \ + ".endm \n\t" \ + ".macro amcas_db_d rd, rk, rj \n\t" \ + "parse_r d, \\rd \n\t" \ + "parse_r j, \\rj \n\t" \ + "parse_r k, \\rk \n\t" \ + ".word ((0b00111000010110111 << 15) | (k << 10) | (j << 5) | d) \n\t" \ + ".endm \n\t" \ + ".endif \n\t" \ + ); + template struct Atomic::PlatformAdd { template @@ -167,6 +303,7 @@ inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest, T prev, temp; if (UseAMCAS) { + AMCAS_MACRO switch (order) { case memory_order_relaxed: asm volatile ( @@ -250,6 +387,7 @@ inline T Atomic::PlatformCmpxchg<8>::operator()(T volatile* dest, T prev, temp; if (UseAMCAS) { + AMCAS_MACRO switch (order) { case memory_order_relaxed: asm volatile ( diff --git a/src/hotspot/share/compiler/compilerDirectives.cpp b/src/hotspot/share/compiler/compilerDirectives.cpp index f5ccd9e69e929..c3d19f0f88c44 100644 --- a/src/hotspot/share/compiler/compilerDirectives.cpp +++ b/src/hotspot/share/compiler/compilerDirectives.cpp @@ -780,7 +780,7 @@ DirectiveSet* DirectivesStack::getMatchingDirective(const methodHandle& method, if (dir->is_default_directive() || dir->match(method)) { match = dir->get_for(comp); assert(match != nullptr, "Consistency"); - if (match->EnableOption) { + if (match->EnableOption || dir->is_default_directive()) { // The directiveSet for this compile is also enabled -> success dir->inc_refcount(); break; diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/loongarch64/LoongArch64TestAssembler.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/loongarch64/LoongArch64TestAssembler.java index 4c76868453a80..d8d701a8c81eb 100644 --- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/loongarch64/LoongArch64TestAssembler.java +++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/loongarch64/LoongArch64TestAssembler.java @@ -1,6 +1,6 @@ /* * Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2022, Loongson Technology. All rights reserved. + * Copyright (c) 2022, 2024, Loongson Technology. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -47,6 +47,7 @@ public class LoongArch64TestAssembler extends TestAssembler { private static final Register scratchRegister = LoongArch64.SCR1; + private static final Register scratchRegister2 = LoongArch64.SCR2; private static final Register doubleScratch = LoongArch64.f23; private static final RegisterArray nativeGeneralParameterRegisters = new RegisterArray(LoongArch64.a0, LoongArch64.a1, LoongArch64.a2, @@ -60,6 +61,22 @@ public class LoongArch64TestAssembler extends TestAssembler { LoongArch64.f7); private static int currentGeneral = 0; private static int currentFloat = 0; + + public enum ConditionFlag { + EQ(0b010110), + NE(0b010111), + LT(0b011000), + GE(0b011001), + LTU(0b011010), + GEU(0b011011); + + public final int encoding; + + ConditionFlag(int encoding) { + this.encoding = encoding; + } + } + public LoongArch64TestAssembler(CodeCacheProvider codeCache, TestHotSpotVMConfig config) { super(codeCache, config, 16 /* initialFrameSize */, 16 /* stackAlignment */, @@ -82,6 +99,13 @@ private void emitNop() { code.emitInt(0x3400000); } + private void emitPcaddi(Register rd, int si20) { + // pcaddi + code.emitInt((0b0001100 << 25) + | ((low(si20, 20) >> 2) << 5) + | rd.encoding); + } + private void emitPcaddu12i(Register rj, int si20) { // pcaddu12i code.emitInt((0b0001110 << 25) @@ -242,6 +266,16 @@ private void emitJirl(Register rd, Register rj, int offs) { | rd.encoding); } + protected void emitBranch(Register rj, Register rd, ConditionFlag condition, int offs) { + // B.cond + check(isSignedNbit(18, offs) && (offs & 0b11) == 0, + "0x%x must be a 18-bit signed number and 4-byte aligned", offs); + code.emitInt((condition.encoding << 26) + | (low16(offs >> 2) << 10) + | (rj.encoding << 5) + | rd.encoding); + } + @Override public void emitGrowStack(int size) { assert size % 16 == 0; @@ -268,9 +302,21 @@ public void emitPrologue() { emitStoreRegister(LoongArch64.fp, LoongArch64Kind.QWORD, LoongArch64.sp, 16); emitGrowStack(-16); emitMove(LoongArch64.fp, LoongArch64.sp); + emitNMethodEntryBarrier(); setDeoptRescueSlot(newStackSlot(LoongArch64Kind.QWORD)); } + private void emitNMethodEntryBarrier() { + recordMark(config.MARKID_ENTRY_BARRIER_PATCH); + DataSectionReference ref = emitDataItem(0); + emitLoadPointer(scratchRegister, LoongArch64Kind.UDWORD, ref); + code.emitInt(0x38720014); // dbar 0x14 loadload|loadstore + Register thread = LoongArch64.s6; + emitLoadPointer(scratchRegister2, LoongArch64Kind.UDWORD, thread, config.threadDisarmedOffset); + emitBranch(scratchRegister, scratchRegister2, ConditionFlag.EQ, 4*4); // jump over slow path, runtime call + emitCall(config.nmethodEntryBarrier); + } + @Override public void emitEpilogue() { recordMark(config.MARKID_DEOPT_HANDLER_ENTRY); @@ -365,8 +411,11 @@ public Register emitLoadPointer(HotSpotConstant c) { @Override public Register emitLoadPointer(Register b, int offset) { - Register ret = newRegister(); - emitLoadRegister(ret, LoongArch64Kind.QWORD, b, offset); + return emitLoadPointer(newRegister(), LoongArch64Kind.QWORD, b, offset); + } + + public Register emitLoadPointer(Register ret, LoongArch64Kind kind, Register b, int offset) { + emitLoadRegister(ret, kind, b, offset); return ret; } @@ -375,20 +424,21 @@ public Register emitLoadNarrowPointer(DataSectionReference ref) { recordDataPatchInCode(ref); Register ret = newRegister(); - emitPcaddu12i(ret, 0xdead >> 12); - emitAdd(ret, ret, 0xdead & 0xfff); + emitPcaddi(ret, 0xdead); emitLoadRegister(ret, LoongArch64Kind.UDWORD, ret, 0); return ret; } @Override public Register emitLoadPointer(DataSectionReference ref) { + return emitLoadPointer(newRegister(), LoongArch64Kind.QWORD, ref); + } + + public Register emitLoadPointer(Register ret, LoongArch64Kind kind, DataSectionReference ref) { recordDataPatchInCode(ref); - Register ret = newRegister(); - emitPcaddu12i(ret, 0xdead >> 12); - emitAdd(ret, ret, 0xdead & 0xfff); - emitLoadRegister(ret, LoongArch64Kind.QWORD, ret, 0); + emitPcaddi(ret, 0xdead); + emitLoadRegister(ret, kind, ret, 0); return ret; } @@ -398,8 +448,7 @@ private Register emitLoadDouble(Register reg, double c) { data.emitDouble(c); recordDataPatchInCode(ref); - emitPcaddu12i(scratchRegister, 0xdead >> 12); - emitAdd(scratchRegister, scratchRegister, 0xdead & 0xfff); + emitPcaddi(scratchRegister, 0xdead); emitLoadRegister(reg, LoongArch64Kind.DOUBLE, scratchRegister, 0); return reg; } @@ -410,8 +459,7 @@ private Register emitLoadFloat(Register reg, float c) { data.emitFloat(c); recordDataPatchInCode(ref); - emitPcaddu12i(scratchRegister, 0xdead >> 12); - emitAdd(scratchRegister, scratchRegister, 0xdead & 0xfff); + emitPcaddi(scratchRegister, 0xdead); emitLoadRegister(reg, LoongArch64Kind.SINGLE, scratchRegister, 0); return reg; }