From 74359a49b7a2f87c88a78915b1fa0f12681c55cd Mon Sep 17 00:00:00 2001 From: loongson-jvm Date: Thu, 16 May 2024 23:59:24 +0800 Subject: [PATCH] Update (2024.05.16, 3rd) 28100: Remove cbuf parameter from trampoline_call 34106: merge three or more continuous membars to one 34070: Fix stack_offset exceeding 12 bits limit 34093: Occasional SIGSEGV in markWord::displaced_mark_helper() for SPECjvm2008 sunflow 33980: Fix generate__kernel_rem_pio2 34058: LA port of 8322122: Enhance generation of addresses --- .../c1_LIRGenerator_loongarch_64.cpp | 8 +-- src/hotspot/cpu/loongarch/loongarch_64.ad | 50 ++++++++++++------- .../loongarch/macroAssembler_loongarch.cpp | 15 +++--- .../loongarch/macroAssembler_loongarch.hpp | 3 +- .../macroAssembler_loongarch_trig.cpp | 5 +- .../loongarch/templateTable_loongarch_64.cpp | 6 +-- 6 files changed, 54 insertions(+), 33 deletions(-) diff --git a/src/hotspot/cpu/loongarch/c1_LIRGenerator_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_LIRGenerator_loongarch_64.cpp index c11f031841592..bf5fba7a100fc 100644 --- a/src/hotspot/cpu/loongarch/c1_LIRGenerator_loongarch_64.cpp +++ b/src/hotspot/cpu/loongarch/c1_LIRGenerator_loongarch_64.cpp @@ -1,6 +1,6 @@ /* * Copyright (c) 2005, 2021, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2021, 2023, Loongson Technology. All rights reserved. + * Copyright (c) 2021, 2024, Loongson Technology. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -157,8 +157,10 @@ LIR_Address* LIRGenerator::generate_address(LIR_Opr base, LIR_Opr index, if (index->is_register()) { // apply the shift and accumulate the displacement if (shift > 0) { - LIR_Opr tmp = new_pointer_register(); - __ shift_left(index, shift, tmp); + // Use long register to avoid overflow when shifting large index values left. + LIR_Opr tmp = new_register(T_LONG); + __ convert(Bytecodes::_i2l, index, tmp); + __ shift_left(tmp, shift, tmp); index = tmp; } if (large_disp != 0) { diff --git a/src/hotspot/cpu/loongarch/loongarch_64.ad b/src/hotspot/cpu/loongarch/loongarch_64.ad index ac51e3eaf7d3a..3ea3cff523ea0 100644 --- a/src/hotspot/cpu/loongarch/loongarch_64.ad +++ b/src/hotspot/cpu/loongarch/loongarch_64.ad @@ -1404,20 +1404,29 @@ static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, int size = 0; if (cbuf) { MacroAssembler _masm(cbuf); - int offset = __ offset(); if (is_load) { switch (ireg) { case Op_VecS: - __ fld_s(as_FloatRegister(Matcher::_regEncode[reg]), SP, stack_offset); + __ fld_s(as_FloatRegister(Matcher::_regEncode[reg]), Address(SP, stack_offset)); break; case Op_VecD: - __ fld_d(as_FloatRegister(Matcher::_regEncode[reg]), SP, stack_offset); + __ fld_d(as_FloatRegister(Matcher::_regEncode[reg]), Address(SP, stack_offset)); break; case Op_VecX: - __ vld(as_FloatRegister(Matcher::_regEncode[reg]), SP, stack_offset); + if (Assembler::is_simm(stack_offset, 12)) { + __ vld(as_FloatRegister(Matcher::_regEncode[reg]), SP, stack_offset); + } else { + __ li(AT, stack_offset); + __ vldx(as_FloatRegister(Matcher::_regEncode[reg]), SP, AT); + } break; case Op_VecY: - __ xvld(as_FloatRegister(Matcher::_regEncode[reg]), SP, stack_offset); + if (Assembler::is_simm(stack_offset, 12)) { + __ xvld(as_FloatRegister(Matcher::_regEncode[reg]), SP, stack_offset); + } else { + __ li(AT, stack_offset); + __ xvldx(as_FloatRegister(Matcher::_regEncode[reg]), SP, AT); + } break; default: ShouldNotReachHere(); @@ -1425,16 +1434,26 @@ static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, } else { // store switch (ireg) { case Op_VecS: - __ fst_s(as_FloatRegister(Matcher::_regEncode[reg]), SP, stack_offset); + __ fst_s(as_FloatRegister(Matcher::_regEncode[reg]), Address(SP, stack_offset)); break; case Op_VecD: - __ fst_d(as_FloatRegister(Matcher::_regEncode[reg]), SP, stack_offset); + __ fst_d(as_FloatRegister(Matcher::_regEncode[reg]), Address(SP, stack_offset)); break; case Op_VecX: - __ vst(as_FloatRegister(Matcher::_regEncode[reg]), SP, stack_offset); + if (Assembler::is_simm(stack_offset, 12)) { + __ vst(as_FloatRegister(Matcher::_regEncode[reg]), SP, stack_offset); + } else { + __ li(AT, stack_offset); + __ vstx(as_FloatRegister(Matcher::_regEncode[reg]), SP, AT); + } break; case Op_VecY: - __ xvst(as_FloatRegister(Matcher::_regEncode[reg]), SP, stack_offset); + if (Assembler::is_simm(stack_offset, 12)) { + __ xvst(as_FloatRegister(Matcher::_regEncode[reg]), SP, stack_offset); + } else { + __ li(AT, stack_offset); + __ xvstx(as_FloatRegister(Matcher::_regEncode[reg]), SP, AT); + } break; default: ShouldNotReachHere(); @@ -2413,10 +2432,8 @@ encode %{ C2_MacroAssembler _masm(&cbuf); // This is the instruction starting address for relocation info. __ block_comment("Java_To_Runtime"); - cbuf.set_insts_mark(); __ relocate(relocInfo::runtime_call_type); __ patchable_call((address)$meth$$method); - _masm.clear_inst_mark(); __ post_call_nop(); %} @@ -2424,14 +2441,13 @@ encode %{ // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine // who we intended to call. C2_MacroAssembler _masm(&cbuf); - cbuf.set_insts_mark(); address addr = (address)$meth$$method; address call; __ block_comment("Java_Static_Call"); if ( !_method ) { // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap. - call = __ trampoline_call(AddressLiteral(addr, relocInfo::runtime_call_type), &cbuf); + call = __ trampoline_call(AddressLiteral(addr, relocInfo::runtime_call_type)); if (call == nullptr) { ciEnv::current()->record_failure("CodeCache is full"); return; @@ -2445,7 +2461,7 @@ encode %{ int method_index = resolved_method_index(cbuf); RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) : static_call_Relocation::spec(method_index); - call = __ trampoline_call(AddressLiteral(addr, rspec), &cbuf); + call = __ trampoline_call(AddressLiteral(addr, rspec)); if (call == nullptr) { ciEnv::current()->record_failure("CodeCache is full"); return; @@ -2453,17 +2469,16 @@ encode %{ if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) { // Calls of the same statically bound method can share // a stub to the interpreter. - cbuf.shared_stub_to_interp_for(_method, cbuf.insts()->mark_off()); + cbuf.shared_stub_to_interp_for(_method, call - cbuf.insts_begin()); } else { // Emit stub for static call - address stub = CompiledDirectCall::emit_to_interp_stub(cbuf); + address stub = CompiledDirectCall::emit_to_interp_stub(cbuf, call); if (stub == nullptr) { ciEnv::current()->record_failure("CodeCache is full"); return; } } } - _masm.clear_inst_mark(); __ post_call_nop(); %} @@ -2479,7 +2494,6 @@ encode %{ ciEnv::current()->record_failure("CodeCache is full"); return; } - _masm.clear_inst_mark(); __ post_call_nop(); %} diff --git a/src/hotspot/cpu/loongarch/macroAssembler_loongarch.cpp b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.cpp index adcf1c8ac3bb9..95887d30aa3c1 100644 --- a/src/hotspot/cpu/loongarch/macroAssembler_loongarch.cpp +++ b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.cpp @@ -164,7 +164,7 @@ void MacroAssembler::patchable_call(address target, address call_site) { // Maybe emit a call via a trampoline. If the code cache is small // trampolines won't be emitted. -address MacroAssembler::trampoline_call(AddressLiteral entry, CodeBuffer* cbuf) { +address MacroAssembler::trampoline_call(AddressLiteral entry) { assert(entry.rspec().type() == relocInfo::runtime_call_type || entry.rspec().type() == relocInfo::opt_virtual_call_type || entry.rspec().type() == relocInfo::static_call_type || @@ -184,13 +184,12 @@ address MacroAssembler::trampoline_call(AddressLiteral entry, CodeBuffer* cbuf) target = pc(); } - if (cbuf != nullptr) { cbuf->set_insts_mark(); } + address call_pc = pc(); relocate(entry.rspec()); bl(target); - // just need to return a non-null address postcond(pc() != badAddress); - return pc(); + return call_pc; } // Emit a trampoline stub for a call to a target which is too far away. @@ -3370,12 +3369,16 @@ void MacroAssembler::membar(Membar_mask_bits hint){ address prev = pc() - NativeInstruction::sync_instruction_size; address last = code()->last_insn(); if (last != nullptr && ((NativeInstruction*)last)->is_sync() && prev == last) { - code()->set_last_insn(nullptr); NativeMembar *membar = (NativeMembar*)prev; +#ifndef PRODUCT + char buf[50]; + snprintf(buf, sizeof(buf), "merged membar 0x%x 0x%x => 0x%x", + (Ordering | membar->get_hint()), (Ordering | (~hint & 0xF)), (Ordering | (membar->get_hint() & (~hint & 0xF)))); + block_comment(buf); +#endif // merged membar // e.g. LoadLoad and LoadLoad|LoadStore to LoadLoad|LoadStore membar->set_hint(membar->get_hint() & (~hint & 0xF)); - block_comment("merged membar"); } else { code()->set_last_insn(pc()); Assembler::membar(hint); diff --git a/src/hotspot/cpu/loongarch/macroAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.hpp index 9f23fd2b26e4b..5f9addc0a371f 100644 --- a/src/hotspot/cpu/loongarch/macroAssembler_loongarch.hpp +++ b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.hpp @@ -479,7 +479,8 @@ class MacroAssembler: public Assembler { void call(address entry, RelocationHolder& rh); void call_long(address entry); - address trampoline_call(AddressLiteral entry, CodeBuffer *cbuf = nullptr); + // Return: the call PC or null if CodeCache is full. + address trampoline_call(AddressLiteral entry); static const unsigned long branch_range = NOT_DEBUG(128 * M) DEBUG_ONLY(2 * M); diff --git a/src/hotspot/cpu/loongarch/macroAssembler_loongarch_trig.cpp b/src/hotspot/cpu/loongarch/macroAssembler_loongarch_trig.cpp index b4a1b09b375eb..0c31904a36b80 100644 --- a/src/hotspot/cpu/loongarch/macroAssembler_loongarch_trig.cpp +++ b/src/hotspot/cpu/loongarch/macroAssembler_loongarch_trig.cpp @@ -1,6 +1,6 @@ /* Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2018, Cavium. All rights reserved. (By BELLSOFT) - * Copyright (c) 2022, Loongson Technology. All rights reserved. + * Copyright (c) 2022, 2024, Loongson Technology. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -894,7 +894,7 @@ void MacroAssembler::generate__kernel_rem_pio2(address two_over_pi, address pio2 b(Q_DONE); bind(JX_IS_0); if (UseLASX) { - xvfmul_d(v28, v18, v6); // f[0,1] * x[0] + xvfmul_d(v28, v18, v6); // f[0,3] * x[0] fmul_d(v30, v19, v6); // f[4] * x[0] } else { vfmul_d(v28, v18, v6); // f[0,1] * x[0] @@ -1128,6 +1128,7 @@ void MacroAssembler::generate__kernel_rem_pio2(address two_over_pi, address pio2 st_w(tmp2, SCR2, 0); addi_w(SCR1, SCR1, 24); addi_w(jz, jz, 1); + alsl_d(SCR2, jz, iqBase, 2 - 1); st_w(tmp3, SCR2, 0); // iq[jz] = (int) fw b(Z_ZERO_CHECK_DONE); bind(Z_IS_LESS_THAN_TWO24B); diff --git a/src/hotspot/cpu/loongarch/templateTable_loongarch_64.cpp b/src/hotspot/cpu/loongarch/templateTable_loongarch_64.cpp index 515a9ec813d30..f0955038c144f 100644 --- a/src/hotspot/cpu/loongarch/templateTable_loongarch_64.cpp +++ b/src/hotspot/cpu/loongarch/templateTable_loongarch_64.cpp @@ -3044,14 +3044,14 @@ void TemplateTable::fast_storefield(TosState state) { // access constant pool cache __ load_field_entry(T3, T2); + + // Must prevent reordering of the following cp cache loads with bytecode load + __ membar(__ LoadLoad); __ push(T0); // T2: field offset, T0: TOS, T1: flags load_resolved_field_entry(T3, T3, T0, T2, T1); __ pop(T0); - // Must prevent reordering of the following cp cache loads with bytecode load - __ membar(__ LoadLoad); - Label Done; { Label notVolatile;