diff --git a/Backport-JDK-8315743-8315856-8344010-8344382-RISC-V-Use-Zacas-extension-for-cmpxchg.patch b/Backport-JDK-8315743-8315856-8344010-8344382-RISC-V-Use-Zacas-extension-for-cmpxchg.patch new file mode 100644 index 0000000000000000000000000000000000000000..411f0e38b82c2cbc7b413f0d8f5674650274ab75 --- /dev/null +++ b/Backport-JDK-8315743-8315856-8344010-8344382-RISC-V-Use-Zacas-extension-for-cmpxchg.patch @@ -0,0 +1,419 @@ +diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp +index 44c60de74..522550a07 100644 +--- a/src/hotspot/cpu/riscv/assembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp +@@ -851,6 +851,8 @@ enum Aqrl {relaxed = 0b00, rl = 0b01, aq = 0b10, aqrl = 0b11}; + INSN(amomax_d , 0b0101111, 0b011, 0b10100); + INSN(amominu_d, 0b0101111, 0b011, 0b11000); + INSN(amomaxu_d, 0b0101111, 0b011, 0b11100); ++ INSN(amocas_w, 0b0101111, 0b010, 0b00101); ++ INSN(amocas_d, 0b0101111, 0b011, 0b00101); + #undef INSN + + enum operand_size { int8, int16, int32, uint32, int64 }; +diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp +index cae52c8de..2c18805ec 100644 +--- a/src/hotspot/cpu/riscv/globals_riscv.hpp ++++ b/src/hotspot/cpu/riscv/globals_riscv.hpp +@@ -106,6 +106,7 @@ define_pd_global(intx, InlineSmallCode, 1000); + product(bool, UseZba, false, DIAGNOSTIC, "Use Zba instructions") \ + product(bool, UseZbb, false, DIAGNOSTIC, "Use Zbb instructions") \ + product(bool, UseZbs, false, DIAGNOSTIC, "Use Zbs instructions") \ ++ product(bool, UseZacas, false, EXPERIMENTAL, "Use Zacas instructions") \ + product(bool, UseZfa, false, EXPERIMENTAL, "Use Zfa instructions") \ + product(bool, UseZic64b, false, EXPERIMENTAL, "Use Zic64b instructions") \ + product(bool, UseZicbom, false, EXPERIMENTAL, "Use Zicbom instructions") \ +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +index 419783e16..e411b8956 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +@@ -3246,20 +3246,29 @@ void MacroAssembler::cmpxchgptr(Register oldv, Register newv, Register addr, Reg + // oldv holds comparison value + // newv holds value to write in exchange + // addr identifies memory word to compare against/update +- Label retry_load, nope; +- bind(retry_load); +- // Load reserved from the memory location +- lr_d(tmp, addr, Assembler::aqrl); +- // Fail and exit if it is not what we expect +- bne(tmp, oldv, nope); +- // If the store conditional succeeds, tmp will be zero +- sc_d(tmp, newv, addr, Assembler::rl); +- beqz(tmp, succeed); +- // Retry only when the store conditional failed +- j(retry_load); +- +- bind(nope); ++ if (UseZacas) { ++ mv(tmp, oldv); ++ atomic_cas(tmp, newv, addr, Assembler::int64, Assembler::aq, Assembler::rl); ++ beq(tmp, oldv, succeed); ++ } else { ++ Label retry_load, nope; ++ bind(retry_load); ++ // Load reserved from the memory location ++ load_reserved(tmp, addr, int64, Assembler::aqrl); ++ // Fail and exit if it is not what we expect ++ bne(tmp, oldv, nope); ++ // If the store conditional succeeds, tmp will be zero ++ store_conditional(tmp, newv, addr, int64, Assembler::rl); ++ beqz(tmp, succeed); ++ // Retry only when the store conditional failed ++ j(retry_load); ++ ++ bind(nope); ++ } ++ ++ // neither amocas nor lr/sc have an implied barrier in the failing case + membar(AnyAny); ++ + mv(oldv, tmp); + if (fail != nullptr) { + j(*fail); +@@ -3272,18 +3281,19 @@ void MacroAssembler::cmpxchg_obj_header(Register oldv, Register newv, Register o + cmpxchgptr(oldv, newv, obj, tmp, succeed, fail); + } + +-void MacroAssembler::load_reserved(Register addr, ++void MacroAssembler::load_reserved(Register dst, ++ Register addr, + enum operand_size size, + Assembler::Aqrl acquire) { + switch (size) { + case int64: +- lr_d(t0, addr, acquire); ++ lr_d(dst, addr, acquire); + break; + case int32: +- lr_w(t0, addr, acquire); ++ lr_w(dst, addr, acquire); + break; + case uint32: +- lr_w(t0, addr, acquire); ++ lr_w(dst, addr, acquire); + zero_extend(t0, t0, 32); + break; + default: +@@ -3291,17 +3301,18 @@ void MacroAssembler::load_reserved(Register addr, + } + } + +-void MacroAssembler::store_conditional(Register addr, ++void MacroAssembler::store_conditional(Register dst, + Register new_val, ++ Register addr, + enum operand_size size, + Assembler::Aqrl release) { + switch (size) { + case int64: +- sc_d(t0, new_val, addr, release); ++ sc_d(dst, new_val, addr, release); + break; + case int32: + case uint32: +- sc_w(t0, new_val, addr, release); ++ sc_w(dst, new_val, addr, release); + break; + default: + ShouldNotReachHere(); +@@ -3309,14 +3320,11 @@ void MacroAssembler::store_conditional(Register addr, + } + + +-void MacroAssembler::cmpxchg_narrow_value_helper(Register addr, Register expected, +- Register new_val, ++void MacroAssembler::cmpxchg_narrow_value_helper(Register addr, Register expected, Register new_val, + enum operand_size size, +- Register tmp1, Register tmp2, Register tmp3) { ++ Register shift, Register mask, Register aligned_addr) { + assert(size == int8 || size == int16, "unsupported operand size"); + +- Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3; +- + andi(shift, addr, 3); + slli(shift, shift, 3); + +@@ -3331,8 +3339,6 @@ void MacroAssembler::cmpxchg_narrow_value_helper(Register addr, Register expecte + } + sll(mask, mask, shift); + +- xori(not_mask, mask, -1); +- + sll(expected, expected, shift); + andr(expected, expected, mask); + +@@ -3341,7 +3347,7 @@ void MacroAssembler::cmpxchg_narrow_value_helper(Register addr, Register expecte + } + + // cmpxchg_narrow_value will kill t0, t1, expected, new_val and tmps. +-// It's designed to implement compare and swap byte/boolean/char/short by lr.w/sc.w, ++// It's designed to implement compare and swap byte/boolean/char/short by lr.w/sc.w or amocas.w, + // which are forced to work with 4-byte aligned address. + void MacroAssembler::cmpxchg_narrow_value(Register addr, Register expected, + Register new_val, +@@ -3349,21 +3355,47 @@ void MacroAssembler::cmpxchg_narrow_value(Register addr, Register expected, + Assembler::Aqrl acquire, Assembler::Aqrl release, + Register result, bool result_as_bool, + Register tmp1, Register tmp2, Register tmp3) { +- Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0; +- assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp); +- cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3); ++ assert_different_registers(addr, expected, new_val, result, tmp1, tmp2, tmp3, t0, t1); ++ ++ Register scratch0 = t0, aligned_addr = t1; ++ Register shift = tmp1, mask = tmp2, scratch1 = tmp3; ++ ++ cmpxchg_narrow_value_helper(addr, expected, new_val, size, shift, mask, aligned_addr); + + Label retry, fail, done; + +- bind(retry); +- lr_w(old, aligned_addr, acquire); +- andr(tmp, old, mask); +- bne(tmp, expected, fail); ++ if (UseZacas) { ++ lw(result, aligned_addr); ++ ++ bind(retry); // amocas loads the current value into result ++ notr(scratch1, mask); ++ ++ andr(scratch0, result, scratch1); // scratch0 = word - cas bits ++ orr(scratch1, expected, scratch0); // scratch1 = non-cas bits + cas bits ++ bne(result, scratch1, fail); // cas bits differ, cas failed ++ ++ // result is the same as expected, use as expected value. ++ ++ // scratch0 is still = word - cas bits ++ // Or in the new value to create complete new value. ++ orr(scratch0, scratch0, new_val); ++ ++ mv(scratch1, result); // save our expected value ++ atomic_cas(result, scratch0, aligned_addr, operand_size::int32, acquire, release); ++ bne(scratch1, result, retry); ++ } else { ++ notr(scratch1, mask); ++ bind(retry); ++ ++ lr_w(result, aligned_addr, acquire); ++ andr(scratch0, result, mask); ++ bne(scratch0, expected, fail); + +- andr(tmp, old, not_mask); +- orr(tmp, tmp, new_val); +- sc_w(tmp, tmp, aligned_addr, release); +- bnez(tmp, retry); ++ andr(scratch0, result, scratch1); // scratch1 is ~mask ++ orr(scratch0, scratch0, new_val); ++ sc_w(scratch0, scratch0, aligned_addr, release); ++ bnez(scratch0, retry); ++ } + + if (result_as_bool) { + mv(result, 1); +@@ -3374,10 +3406,10 @@ void MacroAssembler::cmpxchg_narrow_value(Register addr, Register expected, + + bind(done); + } else { +- andr(tmp, old, mask); +- + bind(fail); +- srl(result, tmp, shift); ++ ++ andr(scratch0, result, mask); ++ srl(result, scratch0, shift); + + if (size == int8) { + sign_extend(result, result, 8); +@@ -3397,20 +3429,45 @@ void MacroAssembler::weak_cmpxchg_narrow_value(Register addr, Register expected, + Assembler::Aqrl acquire, Assembler::Aqrl release, + Register result, + Register tmp1, Register tmp2, Register tmp3) { +- Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0; +- assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp); +- cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3); ++ assert_different_registers(addr, expected, new_val, result, tmp1, tmp2, tmp3, t0, t1); ++ ++ Register scratch0 = t0, aligned_addr = t1; ++ Register shift = tmp1, mask = tmp2, scratch1 = tmp3; ++ ++ cmpxchg_narrow_value_helper(addr, expected, new_val, size, shift, mask, aligned_addr); + + Label fail, done; + +- lr_w(old, aligned_addr, acquire); +- andr(tmp, old, mask); +- bne(tmp, expected, fail); ++ if (UseZacas) { ++ lw(result, aligned_addr); ++ ++ notr(scratch1, mask); + +- andr(tmp, old, not_mask); +- orr(tmp, tmp, new_val); +- sc_w(tmp, tmp, aligned_addr, release); +- bnez(tmp, fail); ++ andr(scratch0, result, scratch1); // scratch0 = word - cas bits ++ orr(scratch1, expected, scratch0); // scratch1 = non-cas bits + cas bits ++ bne(result, scratch1, fail); // cas bits differ, cas failed ++ ++ // result is the same as expected, use as expected value. ++ ++ // scratch0 is still = word - cas bits ++ // Or in the new value to create complete new value. ++ orr(scratch0, scratch0, new_val); ++ ++ mv(scratch1, result); // save our expected value ++ atomic_cas(result, scratch0, aligned_addr, operand_size::int32, acquire, release); ++ bne(scratch1, result, fail); // This weak, so just bail-out. ++ } else { ++ notr(scratch1, mask); ++ ++ lr_w(result, aligned_addr, acquire); ++ andr(scratch0, result, mask); ++ bne(scratch0, expected, fail); ++ ++ andr(scratch0, result, scratch1); // scratch1 is ~mask ++ orr(scratch0, scratch0, new_val); ++ sc_w(scratch0, scratch0, aligned_addr, release); ++ bnez(scratch0, fail); ++ } + + // Success + mv(result, 1); +@@ -3433,11 +3490,36 @@ void MacroAssembler::cmpxchg(Register addr, Register expected, + assert_different_registers(expected, t0); + assert_different_registers(new_val, t0); + ++ // NOTE: ++ // Register _result_ may be the same register as _new_val_ or _expected_. ++ // Hence do NOT use _result_ until after 'cas'. ++ // ++ // Register _expected_ may be the same register as _new_val_ and is assumed to be preserved. ++ // Hence do NOT change _expected_ or _new_val_. ++ // ++ // Having _expected_ and _new_val_ being the same register is a very puzzling cas. ++ // ++ // TODO: Address these issues. ++ ++ if (UseZacas) { ++ if (result_as_bool) { ++ mv(t0, expected); ++ atomic_cas(t0, new_val, addr, size, acquire, release); ++ xorr(t0, t0, expected); ++ seqz(result, t0); ++ } else { ++ mv(t0, expected); ++ atomic_cas(t0, new_val, addr, size, acquire, release); ++ mv(result, t0); ++ } ++ return; ++ } ++ + Label retry_load, done, ne_done; + bind(retry_load); +- load_reserved(addr, size, acquire); ++ load_reserved(t0, addr, size, acquire); + bne(t0, expected, ne_done); +- store_conditional(addr, new_val, size, release); ++ store_conditional(t0, new_val, addr, size, release); + bnez(t0, retry_load); + + // equal, succeed +@@ -3464,14 +3546,20 @@ void MacroAssembler::cmpxchg_weak(Register addr, Register expected, + enum operand_size size, + Assembler::Aqrl acquire, Assembler::Aqrl release, + Register result) { ++ + assert_different_registers(addr, t0); + assert_different_registers(expected, t0); + assert_different_registers(new_val, t0); + ++ if (UseZacas) { ++ cmpxchg(addr, expected, new_val, size, acquire, release, result, true); ++ return; ++ } ++ + Label fail, done; +- load_reserved(addr, size, acquire); ++ load_reserved(t0, addr, size, acquire); + bne(t0, expected, fail); +- store_conditional(addr, new_val, size, release); ++ store_conditional(t0, new_val, addr, size, release); + bnez(t0, fail); + + // Success +@@ -3530,6 +3618,24 @@ ATOMIC_XCHGU(xchgalwu, xchgalw) + + #undef ATOMIC_XCHGU + ++void MacroAssembler::atomic_cas(Register prev, Register newv, Register addr, ++ enum operand_size size, Assembler::Aqrl acquire, Assembler::Aqrl release) { ++ switch (size) { ++ case int64: ++ amocas_d(prev, addr, newv, (Assembler::Aqrl)(acquire | release)); ++ break; ++ case int32: ++ amocas_w(prev, addr, newv, (Assembler::Aqrl)(acquire | release)); ++ break; ++ case uint32: ++ amocas_w(prev, addr, newv, (Assembler::Aqrl)(acquire | release)); ++ zero_extend(prev, prev, 32); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ + void MacroAssembler::far_jump(Address entry, Register tmp) { + assert(ReservedCodeCacheSize < 4*G, "branch out of range"); + assert(CodeCache::find_blob(entry.target()) != nullptr, +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +index c8b34c5ea..692306a73 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +@@ -1025,10 +1025,9 @@ public: + enum operand_size size, + Assembler::Aqrl acquire, Assembler::Aqrl release, + Register result); +- void cmpxchg_narrow_value_helper(Register addr, Register expected, +- Register new_val, ++ void cmpxchg_narrow_value_helper(Register addr, Register expected, Register new_val, + enum operand_size size, +- Register tmp1, Register tmp2, Register tmp3); ++ Register shift, Register mask, Register aligned_addr); + void cmpxchg_narrow_value(Register addr, Register expected, + Register new_val, + enum operand_size size, +@@ -1054,6 +1053,9 @@ public: + void atomic_xchgwu(Register prev, Register newv, Register addr); + void atomic_xchgalwu(Register prev, Register newv, Register addr); + ++ void atomic_cas(Register prev, Register newv, Register addr, enum operand_size size, ++ Assembler::Aqrl acquire = Assembler::relaxed, Assembler::Aqrl release = Assembler::relaxed); ++ + static bool far_branches() { + return ReservedCodeCacheSize > branch_range; + } +@@ -1506,8 +1508,8 @@ private: + int bitset_to_regs(unsigned int bitset, unsigned char* regs); + Address add_memory_helper(const Address dst, Register tmp); + +- void load_reserved(Register addr, enum operand_size size, Assembler::Aqrl acquire); +- void store_conditional(Register addr, Register new_val, enum operand_size size, Assembler::Aqrl release); ++ void load_reserved(Register dst, Register addr, enum operand_size size, Assembler::Aqrl acquire); ++ void store_conditional(Register dst, Register new_val, Register addr, enum operand_size size, Assembler::Aqrl release); + + public: + void lightweight_lock(Register obj, Register hdr, Register tmp1, Register tmp2, Label& slow); +diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_riscv.hpp +index e99b4be9c..20fd260ee 100644 +--- a/src/hotspot/cpu/riscv/vm_version_riscv.hpp ++++ b/src/hotspot/cpu/riscv/vm_version_riscv.hpp +@@ -145,6 +145,7 @@ class VM_Version : public Abstract_VM_Version { + decl(ext_Zifencei , "Zifencei" , RV_NO_FLAG_BIT, true , NO_UPDATE_DEFAULT) \ + decl(ext_Zic64b , "Zic64b" , RV_NO_FLAG_BIT, true , UPDATE_DEFAULT(UseZic64b)) \ + decl(ext_Zihintpause , "Zihintpause" , RV_NO_FLAG_BIT, true , UPDATE_DEFAULT(UseZihintpause)) \ ++ decl(ext_Zacas , "Zacas" , RV_NO_FLAG_BIT, true , UPDATE_DEFAULT(UseZacas)) \ + decl(ext_Zvbc , "Zvbc" , RV_NO_FLAG_BIT, true , UPDATE_DEFAULT(UseZvbc)) \ + decl(ext_Zvkn , "Zvkn" , RV_NO_FLAG_BIT, true , UPDATE_DEFAULT(UseZvkn)) \ + decl(mvendorid , "VendorId" , RV_NO_FLAG_BIT, false, NO_UPDATE_DEFAULT) \ diff --git a/Backport-JDK-8319778-8324881-8319797-8319900-Recursive-lightweight-locking-riscv64-implementation.patch b/Backport-JDK-8319778-8324881-8319797-8319900-Recursive-lightweight-locking-riscv64-implementation.patch new file mode 100644 index 0000000000000000000000000000000000000000..c8deb1fda51db4a8063bad128cf016de340314f6 --- /dev/null +++ b/Backport-JDK-8319778-8324881-8319797-8319900-Recursive-lightweight-locking-riscv64-implementation.patch @@ -0,0 +1,2881 @@ +diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp +index 6c1dce0de..702bae688 100644 +--- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp +@@ -69,13 +69,12 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr + bnez(temp, slow_case, true /* is_far */); + } + +- // Load object header +- ld(hdr, Address(obj, hdr_offset)); +- + if (LockingMode == LM_LIGHTWEIGHT) { + lightweight_lock(obj, hdr, temp, t1, slow_case); + } else if (LockingMode == LM_LEGACY) { + Label done; ++ // Load object header ++ ld(hdr, Address(obj, hdr_offset)); + // and mark it as unlocked + ori(hdr, hdr, markWord::unlocked_value); + // save unlocked object header into the displaced header location on the stack +@@ -134,9 +133,6 @@ void C1_MacroAssembler::unlock_object(Register hdr, Register obj, Register disp_ + verify_oop(obj); + + if (LockingMode == LM_LIGHTWEIGHT) { +- ld(hdr, Address(obj, oopDesc::mark_offset_in_bytes())); +- test_bit(temp, hdr, exact_log2(markWord::monitor_value)); +- bnez(temp, slow_case, /* is_far */ true); + lightweight_unlock(obj, hdr, temp, t1, slow_case); + } else if (LockingMode == LM_LEGACY) { + // test if object header is pointing to the displaced header, and if so, restore +diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp +index 1aa2bb778..8fa218ae9 100644 +--- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp +@@ -32,6 +32,7 @@ + #include "opto/output.hpp" + #include "opto/subnode.hpp" + #include "runtime/stubRoutines.hpp" ++#include "utilities/globalDefinitions.hpp" + + #ifdef PRODUCT + #define BLOCK_COMMENT(str) /* nothing */ +@@ -51,30 +52,35 @@ void C2_MacroAssembler::fast_lock(Register objectReg, Register boxReg, + Register box = boxReg; + Register disp_hdr = tmp1Reg; + Register tmp = tmp2Reg; +- Label cont; + Label object_has_monitor; +- Label count, no_count; ++ // Finish fast lock successfully. MUST branch to with flag == 0 ++ Label locked; ++ // Finish fast lock unsuccessfully. slow_path MUST branch to with flag != 0 ++ Label slow_path; + ++ assert(LockingMode != LM_LIGHTWEIGHT, "lightweight locking should use fast_lock_lightweight"); + assert_different_registers(oop, box, tmp, disp_hdr, flag, tmp3Reg, t0); + ++ mv(flag, 1); ++ + // Load markWord from object into displaced_header. + ld(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes())); + + if (DiagnoseSyncOnValueBasedClasses != 0) { +- load_klass(flag, oop); +- lwu(flag, Address(flag, Klass::access_flags_offset())); +- test_bit(flag, flag, exact_log2(JVM_ACC_IS_VALUE_BASED_CLASS)); +- bnez(flag, cont, true /* is_far */); ++ load_klass(tmp, oop); ++ lwu(tmp, Address(tmp, Klass::access_flags_offset())); ++ test_bit(tmp, tmp, exact_log2(JVM_ACC_IS_VALUE_BASED_CLASS)); ++ bnez(tmp, slow_path); + } + + // Check for existing monitor +- test_bit(t0, disp_hdr, exact_log2(markWord::monitor_value)); +- bnez(t0, object_has_monitor); ++ test_bit(tmp, disp_hdr, exact_log2(markWord::monitor_value)); ++ bnez(tmp, object_has_monitor); + + if (LockingMode == LM_MONITOR) { +- mv(flag, 1); // Set non-zero flag to indicate 'failure' -> take slow-path +- j(cont); +- } else if (LockingMode == LM_LEGACY) { ++ j(slow_path); ++ } else { ++ assert(LockingMode == LM_LEGACY, "must be"); + // Set tmp to be (markWord of object | UNLOCK_VALUE). + ori(tmp, disp_hdr, markWord::unlocked_value); + +@@ -84,39 +90,27 @@ void C2_MacroAssembler::fast_lock(Register objectReg, Register boxReg, + // Compare object markWord with an unlocked value (tmp) and if + // equal exchange the stack address of our box with object markWord. + // On failure disp_hdr contains the possibly locked markWord. +- cmpxchg(/*memory address*/oop, /*expected value*/tmp, /*new value*/box, Assembler::int64, Assembler::aq, +- Assembler::rl, /*result*/disp_hdr); +- mv(flag, zr); +- beq(disp_hdr, tmp, cont); // prepare zero flag and goto cont if we won the cas ++ cmpxchg(/*memory address*/oop, /*expected value*/tmp, /*new value*/box, Assembler::int64, ++ Assembler::aq, Assembler::rl, /*result*/disp_hdr); ++ beq(disp_hdr, tmp, locked); + + assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); + + // If the compare-and-exchange succeeded, then we found an unlocked +- // object, will have now locked it will continue at label cont ++ // object, will have now locked it will continue at label locked + // We did not see an unlocked object so try the fast recursive case. + + // Check if the owner is self by comparing the value in the + // markWord of object (disp_hdr) with the stack pointer. + sub(disp_hdr, disp_hdr, sp); + mv(tmp, (intptr_t) (~(os::vm_page_size()-1) | (uintptr_t)markWord::lock_mask_in_place)); +- // If (mark & lock_mask) == 0 and mark - sp < page_size, we are stack-locking and goto cont, ++ // If (mark & lock_mask) == 0 and mark - sp < page_size, we are stack-locking and goto label locked, + // hence we can store 0 as the displaced header in the box, which indicates that it is a + // recursive lock. + andr(tmp/*==0?*/, disp_hdr, tmp); + sd(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes())); +- mv(flag, tmp); // we can use the value of tmp as the result here +- j(cont); +- } else { +- assert(LockingMode == LM_LIGHTWEIGHT, ""); +- Label slow; +- lightweight_lock(oop, disp_hdr, tmp, tmp3Reg, slow); +- +- // Indicate success on completion. +- mv(flag, zr); +- j(count); +- bind(slow); +- mv(flag, 1); // Set non-zero flag to indicate 'failure' -> take slow-path +- j(no_count); ++ beqz(tmp, locked); ++ j(slow_path); + } + + // Handle existing monitor. +@@ -126,35 +120,42 @@ void C2_MacroAssembler::fast_lock(Register objectReg, Register boxReg, + // + // Try to CAS m->owner from NULL to current thread. + add(tmp, disp_hdr, (in_bytes(ObjectMonitor::owner_offset()) - markWord::monitor_value)); +- cmpxchg(/*memory address*/tmp, /*expected value*/zr, /*new value*/xthread, Assembler::int64, Assembler::aq, +- Assembler::rl, /*result*/flag); // cas succeeds if flag == zr(expected) +- +- if (LockingMode != LM_LIGHTWEIGHT) { +- // Store a non-null value into the box to avoid looking like a re-entrant +- // lock. The fast-path monitor unlock code checks for +- // markWord::monitor_value so use markWord::unused_mark which has the +- // relevant bit set, and also matches ObjectSynchronizer::slow_enter. +- mv(tmp, (address)markWord::unused_mark().value()); +- sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); +- } ++ cmpxchg(/*memory address*/tmp, /*expected value*/zr, /*new value*/xthread, Assembler::int64, ++ Assembler::aq, Assembler::rl, /*result*/tmp3Reg); // cas succeeds if tmp3Reg == zr(expected) + +- beqz(flag, cont); // CAS success means locking succeeded ++ // Store a non-null value into the box to avoid looking like a re-entrant ++ // lock. The fast-path monitor unlock code checks for ++ // markWord::monitor_value so use markWord::unused_mark which has the ++ // relevant bit set, and also matches ObjectSynchronizer::slow_enter. ++ mv(tmp, (address)markWord::unused_mark().value()); ++ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); + +- bne(flag, xthread, cont); // Check for recursive locking ++ beqz(tmp3Reg, locked); // CAS success means locking succeeded ++ ++ bne(tmp3Reg, xthread, slow_path); // Check for recursive locking + + // Recursive lock case +- mv(flag, zr); +- increment(Address(disp_hdr, in_bytes(ObjectMonitor::recursions_offset()) - markWord::monitor_value), 1, t0, tmp); ++ increment(Address(disp_hdr, in_bytes(ObjectMonitor::recursions_offset()) - markWord::monitor_value), 1, tmp2Reg, tmp3Reg); + +- bind(cont); +- // zero flag indicates success +- // non-zero flag indicates failure +- bnez(flag, no_count); ++ bind(locked); ++ mv(flag, zr); ++ increment(Address(xthread, JavaThread::held_monitor_count_offset()), 1, tmp2Reg, tmp3Reg); + +- bind(count); +- increment(Address(xthread, JavaThread::held_monitor_count_offset()), 1, t0, tmp); ++#ifdef ASSERT ++ // Check that locked label is reached with flag == 0. ++ Label flag_correct; ++ beqz(flag, flag_correct); ++ stop("Fast Lock Flag != 0"); ++#endif + +- bind(no_count); ++ bind(slow_path); ++#ifdef ASSERT ++ // Check that slow_path label is reached with flag != 0. ++ bnez(flag, flag_correct); ++ stop("Fast Lock Flag == 0"); ++ bind(flag_correct); ++#endif ++ // C2 uses the value of flag (0 vs !0) to determine the continuation. + } + + void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg, +@@ -165,19 +166,23 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg, + Register box = boxReg; + Register disp_hdr = tmp1Reg; + Register tmp = tmp2Reg; +- Label cont; + Label object_has_monitor; +- Label count, no_count; ++ // Finish fast lock successfully. MUST branch to with flag == 0 ++ Label unlocked; ++ // Finish fast lock unsuccessfully. slow_path MUST branch to with flag != 0 ++ Label slow_path; + ++ assert(LockingMode != LM_LIGHTWEIGHT, "lightweight locking should use fast_unlock_lightweight"); + assert_different_registers(oop, box, tmp, disp_hdr, flag, t0); + ++ mv(flag, 1); ++ + if (LockingMode == LM_LEGACY) { + // Find the lock address and load the displaced header from the stack. + ld(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes())); + + // If the displaced header is 0, we have a recursive unlock. +- mv(flag, disp_hdr); +- beqz(disp_hdr, cont); ++ beqz(disp_hdr, unlocked); + } + + // Handle existing monitor. +@@ -186,28 +191,17 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg, + bnez(t0, object_has_monitor); + + if (LockingMode == LM_MONITOR) { +- mv(flag, 1); // Set non-zero flag to indicate 'failure' -> take slow path +- j(cont); +- } else if (LockingMode == LM_LEGACY) { ++ j(slow_path); ++ } else { ++ assert(LockingMode == LM_LEGACY, "must be"); + // Check if it is still a light weight lock, this is true if we + // see the stack address of the basicLock in the markWord of the + // object. + +- cmpxchg(/*memory address*/oop, /*expected value*/box, /*new value*/disp_hdr, Assembler::int64, Assembler::relaxed, +- Assembler::rl, /*result*/tmp); +- xorr(flag, box, tmp); // box == tmp if cas succeeds +- j(cont); +- } else { +- assert(LockingMode == LM_LIGHTWEIGHT, ""); +- Label slow; +- lightweight_unlock(oop, tmp, box, disp_hdr, slow); +- +- // Indicate success on completion. +- mv(flag, zr); +- j(count); +- bind(slow); +- mv(flag, 1); // Set non-zero flag to indicate 'failure' -> take slow path +- j(no_count); ++ cmpxchg(/*memory address*/oop, /*expected value*/box, /*new value*/disp_hdr, Assembler::int64, ++ Assembler::relaxed, Assembler::rl, /*result*/tmp); ++ beq(box, tmp, unlocked); // box == tmp if cas succeeds ++ j(slow_path); + } + + assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); +@@ -217,17 +211,6 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg, + STATIC_ASSERT(markWord::monitor_value <= INT_MAX); + add(tmp, tmp, -(int)markWord::monitor_value); // monitor + +- if (LockingMode == LM_LIGHTWEIGHT) { +- // If the owner is anonymous, we need to fix it -- in an outline stub. +- Register tmp2 = disp_hdr; +- ld(tmp2, Address(tmp, ObjectMonitor::owner_offset())); +- test_bit(t0, tmp2, exact_log2(ObjectMonitor::ANONYMOUS_OWNER)); +- C2HandleAnonOMOwnerStub* stub = new (Compile::current()->comp_arena()) C2HandleAnonOMOwnerStub(tmp, tmp2); +- Compile::current()->output()->add_stub(stub); +- bnez(t0, stub->entry(), /* is_far */ true); +- bind(stub->continuation()); +- } +- + ld(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset())); + + Label notRecursive; +@@ -236,28 +219,304 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg, + // Recursive lock + addi(disp_hdr, disp_hdr, -1); + sd(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset())); +- mv(flag, zr); +- j(cont); ++ j(unlocked); + + bind(notRecursive); +- ld(flag, Address(tmp, ObjectMonitor::EntryList_offset())); ++ ld(t0, Address(tmp, ObjectMonitor::EntryList_offset())); + ld(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset())); +- orr(flag, flag, disp_hdr); // Will be 0 if both are 0. +- bnez(flag, cont); ++ orr(t0, t0, disp_hdr); // Will be 0 if both are 0. ++ bnez(t0, slow_path); ++ + // need a release store here + la(tmp, Address(tmp, ObjectMonitor::owner_offset())); + membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); + sd(zr, Address(tmp)); // set unowned + +- bind(cont); +- // zero flag indicates success +- // non-zero flag indicates failure +- bnez(flag, no_count); ++ bind(unlocked); ++ mv(flag, zr); ++ decrement(Address(xthread, JavaThread::held_monitor_count_offset()), 1, tmp1Reg, tmp2Reg); + +- bind(count); +- decrement(Address(xthread, JavaThread::held_monitor_count_offset()), 1, t0, tmp); ++#ifdef ASSERT ++ // Check that unlocked label is reached with flag == 0. ++ Label flag_correct; ++ beqz(flag, flag_correct); ++ stop("Fast Lock Flag != 0"); ++#endif + +- bind(no_count); ++ bind(slow_path); ++#ifdef ASSERT ++ // Check that slow_path label is reached with flag != 0. ++ bnez(flag, flag_correct); ++ stop("Fast Lock Flag == 0"); ++ bind(flag_correct); ++#endif ++ // C2 uses the value of flag (0 vs !0) to determine the continuation. ++} ++ ++void C2_MacroAssembler::fast_lock_lightweight(Register obj, Register tmp1, Register tmp2, Register tmp3) { ++ // Flag register, zero for success; non-zero for failure. ++ Register flag = t1; ++ ++ assert(LockingMode == LM_LIGHTWEIGHT, "must be"); ++ assert_different_registers(obj, tmp1, tmp2, tmp3, flag, t0); ++ ++ mv(flag, 1); ++ ++ // Handle inflated monitor. ++ Label inflated; ++ // Finish fast lock successfully. MUST branch to with flag == 0 ++ Label locked; ++ // Finish fast lock unsuccessfully. slow_path MUST branch to with flag != 0 ++ Label slow_path; ++ ++ if (DiagnoseSyncOnValueBasedClasses != 0) { ++ load_klass(tmp1, obj); ++ lwu(tmp1, Address(tmp1, Klass::access_flags_offset())); ++ test_bit(tmp1, tmp1, exact_log2(JVM_ACC_IS_VALUE_BASED_CLASS)); ++ bnez(tmp1, slow_path); ++ } ++ ++ const Register tmp1_mark = tmp1; ++ ++ { // Lightweight locking ++ ++ // Push lock to the lock stack and finish successfully. MUST branch to with flag == 0 ++ Label push; ++ ++ const Register tmp2_top = tmp2; ++ const Register tmp3_t = tmp3; ++ ++ // Check if lock-stack is full. ++ lwu(tmp2_top, Address(xthread, JavaThread::lock_stack_top_offset())); ++ mv(tmp3_t, (unsigned)LockStack::end_offset()); ++ bge(tmp2_top, tmp3_t, slow_path); ++ ++ // Check if recursive. ++ add(tmp3_t, xthread, tmp2_top); ++ ld(tmp3_t, Address(tmp3_t, -oopSize)); ++ beq(obj, tmp3_t, push); ++ ++ // Relaxed normal load to check for monitor. Optimization for monitor case. ++ ld(tmp1_mark, Address(obj, oopDesc::mark_offset_in_bytes())); ++ test_bit(tmp3_t, tmp1_mark, exact_log2(markWord::monitor_value)); ++ bnez(tmp3_t, inflated); ++ ++ // Not inflated ++ assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid a la"); ++ ++ // Try to lock. Transition lock-bits 0b01 => 0b00 ++ ori(tmp1_mark, tmp1_mark, markWord::unlocked_value); ++ xori(tmp3_t, tmp1_mark, markWord::unlocked_value); ++ cmpxchg(/*addr*/ obj, /*expected*/ tmp1_mark, /*new*/ tmp3_t, Assembler::int64, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::relaxed, /*result*/ tmp3_t); ++ bne(tmp1_mark, tmp3_t, slow_path); ++ ++ bind(push); ++ // After successful lock, push object on lock-stack. ++ add(tmp3_t, xthread, tmp2_top); ++ sd(obj, Address(tmp3_t)); ++ addw(tmp2_top, tmp2_top, oopSize); ++ sw(tmp2_top, Address(xthread, JavaThread::lock_stack_top_offset())); ++ j(locked); ++ } ++ ++ { // Handle inflated monitor. ++ bind(inflated); ++ ++ // mark contains the tagged ObjectMonitor*. ++ const Register tmp1_tagged_monitor = tmp1_mark; ++ const uintptr_t monitor_tag = markWord::monitor_value; ++ const Register tmp2_owner_addr = tmp2; ++ const Register tmp3_owner = tmp3; ++ ++ // Compute owner address. ++ la(tmp2_owner_addr, Address(tmp1_tagged_monitor, (in_bytes(ObjectMonitor::owner_offset()) - monitor_tag))); ++ ++ // CAS owner (null => current thread). ++ cmpxchg(/*addr*/ tmp2_owner_addr, /*expected*/ zr, /*new*/ xthread, Assembler::int64, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::relaxed, /*result*/ tmp3_owner); ++ beqz(tmp3_owner, locked); ++ ++ // Check if recursive. ++ bne(tmp3_owner, xthread, slow_path); ++ ++ // Recursive. ++ increment(Address(tmp1_tagged_monitor, in_bytes(ObjectMonitor::recursions_offset()) - monitor_tag), 1, tmp2, tmp3); ++ } ++ ++ bind(locked); ++ mv(flag, zr); ++ increment(Address(xthread, JavaThread::held_monitor_count_offset()), 1, tmp2, tmp3); ++ ++#ifdef ASSERT ++ // Check that locked label is reached with flag == 0. ++ Label flag_correct; ++ beqz(flag, flag_correct); ++ stop("Fast Lock Flag != 0"); ++#endif ++ ++ bind(slow_path); ++#ifdef ASSERT ++ // Check that slow_path label is reached with flag != 0. ++ bnez(flag, flag_correct); ++ stop("Fast Lock Flag == 0"); ++ bind(flag_correct); ++#endif ++ // C2 uses the value of flag (0 vs !0) to determine the continuation. ++} ++ ++void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register tmp1, Register tmp2, ++ Register tmp3) { ++ // Flag register, zero for success; non-zero for failure. ++ Register flag = t1; ++ ++ assert(LockingMode == LM_LIGHTWEIGHT, "must be"); ++ assert_different_registers(obj, tmp1, tmp2, tmp3, flag, t0); ++ ++ mv(flag, 1); ++ ++ // Handle inflated monitor. ++ Label inflated, inflated_load_monitor; ++ // Finish fast unlock successfully. unlocked MUST branch to with flag == 0 ++ Label unlocked; ++ // Finish fast unlock unsuccessfully. MUST branch to with flag != 0 ++ Label slow_path; ++ ++ const Register tmp1_mark = tmp1; ++ const Register tmp2_top = tmp2; ++ const Register tmp3_t = tmp3; ++ ++ { // Lightweight unlock ++ ++ // Check if obj is top of lock-stack. ++ lwu(tmp2_top, Address(xthread, JavaThread::lock_stack_top_offset())); ++ subw(tmp2_top, tmp2_top, oopSize); ++ add(tmp3_t, xthread, tmp2_top); ++ ld(tmp3_t, Address(tmp3_t)); ++ // Top of lock stack was not obj. Must be monitor. ++ bne(obj, tmp3_t, inflated_load_monitor); ++ ++ // Pop lock-stack. ++ DEBUG_ONLY(add(tmp3_t, xthread, tmp2_top);) ++ DEBUG_ONLY(sd(zr, Address(tmp3_t));) ++ sw(tmp2_top, Address(xthread, JavaThread::lock_stack_top_offset())); ++ ++ // Check if recursive. ++ add(tmp3_t, xthread, tmp2_top); ++ ld(tmp3_t, Address(tmp3_t, -oopSize)); ++ beq(obj, tmp3_t, unlocked); ++ ++ // Not recursive. ++ // Load Mark. ++ ld(tmp1_mark, Address(obj, oopDesc::mark_offset_in_bytes())); ++ ++ // Check header for monitor (0b10). ++ test_bit(tmp3_t, tmp1_mark, exact_log2(markWord::monitor_value)); ++ bnez(tmp3_t, inflated); ++ ++ // Try to unlock. Transition lock bits 0b00 => 0b01 ++ assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid lea"); ++ ori(tmp3_t, tmp1_mark, markWord::unlocked_value); ++ cmpxchg(/*addr*/ obj, /*expected*/ tmp1_mark, /*new*/ tmp3_t, Assembler::int64, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, /*result*/ tmp3_t); ++ beq(tmp1_mark, tmp3_t, unlocked); ++ ++ // Compare and exchange failed. ++ // Restore lock-stack and handle the unlock in runtime. ++ DEBUG_ONLY(add(tmp3_t, xthread, tmp2_top);) ++ DEBUG_ONLY(sd(obj, Address(tmp3_t));) ++ addw(tmp2_top, tmp2_top, oopSize); ++ sd(tmp2_top, Address(xthread, JavaThread::lock_stack_top_offset())); ++ j(slow_path); ++ } ++ ++ { // Handle inflated monitor. ++ bind(inflated_load_monitor); ++ ld(tmp1_mark, Address(obj, oopDesc::mark_offset_in_bytes())); ++#ifdef ASSERT ++ test_bit(tmp3_t, tmp1_mark, exact_log2(markWord::monitor_value)); ++ bnez(tmp3_t, inflated); ++ stop("Fast Unlock not monitor"); ++#endif ++ ++ bind(inflated); ++ ++#ifdef ASSERT ++ Label check_done; ++ subw(tmp2_top, tmp2_top, oopSize); ++ mv(tmp3_t, in_bytes(JavaThread::lock_stack_base_offset())); ++ blt(tmp2_top, tmp3_t, check_done); ++ add(tmp3_t, xthread, tmp2_top); ++ ld(tmp3_t, Address(tmp3_t)); ++ bne(obj, tmp3_t, inflated); ++ stop("Fast Unlock lock on stack"); ++ bind(check_done); ++#endif ++ ++ // mark contains the tagged ObjectMonitor*. ++ const Register tmp1_monitor = tmp1_mark; ++ const uintptr_t monitor_tag = markWord::monitor_value; ++ ++ // Untag the monitor. ++ sub(tmp1_monitor, tmp1_mark, monitor_tag); ++ ++ const Register tmp2_recursions = tmp2; ++ Label not_recursive; ++ ++ // Check if recursive. ++ ld(tmp2_recursions, Address(tmp1_monitor, ObjectMonitor::recursions_offset())); ++ beqz(tmp2_recursions, not_recursive); ++ ++ // Recursive unlock. ++ addi(tmp2_recursions, tmp2_recursions, -1); ++ sd(tmp2_recursions, Address(tmp1_monitor, ObjectMonitor::recursions_offset())); ++ j(unlocked); ++ ++ bind(not_recursive); ++ ++ Label release; ++ const Register tmp2_owner_addr = tmp2; ++ ++ // Compute owner address. ++ la(tmp2_owner_addr, Address(tmp1_monitor, ObjectMonitor::owner_offset())); ++ ++ // Check if the entry lists are empty. ++ ld(t0, Address(tmp1_monitor, ObjectMonitor::EntryList_offset())); ++ ld(tmp3_t, Address(tmp1_monitor, ObjectMonitor::cxq_offset())); ++ orr(t0, t0, tmp3_t); ++ beqz(t0, release); ++ ++ // The owner may be anonymous and we removed the last obj entry in ++ // the lock-stack. This loses the information about the owner. ++ // Write the thread to the owner field so the runtime knows the owner. ++ sd(xthread, Address(tmp2_owner_addr)); ++ j(slow_path); ++ ++ bind(release); ++ // Set owner to null. ++ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); ++ sd(zr, Address(tmp2_owner_addr)); ++ } ++ ++ bind(unlocked); ++ mv(flag, zr); ++ decrement(Address(xthread, JavaThread::held_monitor_count_offset()), 1, tmp2, tmp3); ++ ++#ifdef ASSERT ++ // Check that unlocked label is reached with flag == 0. ++ Label flag_correct; ++ beqz(flag, flag_correct); ++ stop("Fast Lock Flag != 0"); ++#endif ++ ++ bind(slow_path); ++#ifdef ASSERT ++ // Check that slow_path label is reached with flag != 0. ++ bnez(flag, flag_correct); ++ stop("Fast Lock Flag == 0"); ++ bind(flag_correct); ++#endif ++ // C2 uses the value of flag (0 vs !0) to determine the continuation. + } + + // short string +diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp +index 94d9ee791..c40b96998 100644 +--- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp +@@ -40,9 +40,11 @@ + bool is_latin, Label& DONE); + public: + // Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file. +- // See full description in macroAssembler_riscv.cpp. + void fast_lock(Register object, Register box, Register tmp1, Register tmp2, Register tmp3); + void fast_unlock(Register object, Register box, Register tmp1, Register tmp2); ++ // Code used by cmpFastLockLightweight and cmpFastUnlockLightweight mach instructions in .ad file. ++ void fast_lock_lightweight(Register object, Register tmp1, Register tmp2, Register tmp3); ++ void fast_unlock_lightweight(Register object, Register tmp1, Register tmp2, Register tmp3); + + void string_compare(Register str1, Register str2, + Register cnt1, Register cnt2, Register result, +diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +index 458c5689c..e2c7b17e0 100644 +--- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp ++++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +@@ -835,7 +835,6 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg) + } + + if (LockingMode == LM_LIGHTWEIGHT) { +- ld(tmp, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + lightweight_lock(obj_reg, tmp, tmp2, tmp3, slow_case); + j(count); + } else if (LockingMode == LM_LEGACY) { +@@ -932,24 +931,6 @@ void InterpreterMacroAssembler::unlock_object(Register lock_reg) + + if (LockingMode == LM_LIGHTWEIGHT) { + Label slow_case; +- +- // Check for non-symmetric locking. This is allowed by the spec and the interpreter +- // must handle it. +- Register tmp1 = t0; +- Register tmp2 = header_reg; +- // First check for lock-stack underflow. +- lwu(tmp1, Address(xthread, JavaThread::lock_stack_top_offset())); +- mv(tmp2, (unsigned)LockStack::start_offset()); +- ble(tmp1, tmp2, slow_case); +- // Then check if the top of the lock-stack matches the unlocked object. +- subw(tmp1, tmp1, oopSize); +- add(tmp1, xthread, tmp1); +- ld(tmp1, Address(tmp1, 0)); +- bne(tmp1, obj_reg, slow_case); +- +- ld(header_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); +- test_bit(t0, header_reg, exact_log2(markWord::monitor_value)); +- bnez(t0, slow_case); + lightweight_unlock(obj_reg, header_reg, swap_reg, tmp_reg, slow_case); + j(count); + +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +index e411b8956..17bf4314c 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +@@ -47,6 +47,7 @@ + #include "runtime/jniHandles.inline.hpp" + #include "runtime/sharedRuntime.hpp" + #include "runtime/stubRoutines.hpp" ++#include "utilities/globalDefinitions.hpp" + #include "utilities/powerOfTwo.hpp" + #ifdef COMPILER2 + #include "opto/compile.hpp" +@@ -5383,98 +5384,124 @@ void MacroAssembler::test_bit(Register Rd, Register Rs, uint32_t bit_pos) { + } + + // Implements lightweight-locking. +-// Branches to slow upon failure to lock the object. +-// Falls through upon success. + // + // - obj: the object to be locked +-// - hdr: the header, already loaded from obj, will be destroyed +-// - tmp1, tmp2: temporary registers, will be destroyed +-void MacroAssembler::lightweight_lock(Register obj, Register hdr, Register tmp1, Register tmp2, Label& slow) { ++// - tmp1, tmp2, tmp3: temporary registers, will be destroyed ++// - slow: branched to if locking fails ++void MacroAssembler::lightweight_lock(Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow) { + assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking"); +- assert_different_registers(obj, hdr, tmp1, tmp2, t0); +- +- // Check if we would have space on lock-stack for the object. +- lwu(tmp1, Address(xthread, JavaThread::lock_stack_top_offset())); +- mv(tmp2, (unsigned)LockStack::end_offset()); +- bge(tmp1, tmp2, slow, /* is_far */ true); +- +- // Load (object->mark() | 1) into hdr +- ori(hdr, hdr, markWord::unlocked_value); +- // Clear lock-bits, into tmp2 +- xori(tmp2, hdr, markWord::unlocked_value); +- +- // Try to swing header from unlocked to locked +- Label success; +- cmpxchgptr(hdr, tmp2, obj, tmp1, success, &slow); +- bind(success); +- +- // After successful lock, push object on lock-stack +- lwu(tmp1, Address(xthread, JavaThread::lock_stack_top_offset())); +- add(tmp2, xthread, tmp1); +- sd(obj, Address(tmp2, 0)); +- addw(tmp1, tmp1, oopSize); +- sw(tmp1, Address(xthread, JavaThread::lock_stack_top_offset())); ++ assert_different_registers(obj, tmp1, tmp2, tmp3, t0); ++ ++ Label push; ++ const Register top = tmp1; ++ const Register mark = tmp2; ++ const Register t = tmp3; ++ ++ // Preload the markWord. It is important that this is the first ++ // instruction emitted as it is part of C1's null check semantics. ++ ld(mark, Address(obj, oopDesc::mark_offset_in_bytes())); ++ ++ // Check if the lock-stack is full. ++ lwu(top, Address(xthread, JavaThread::lock_stack_top_offset())); ++ mv(t, (unsigned)LockStack::end_offset()); ++ bge(top, t, slow, /* is_far */ true); ++ ++ // Check for recursion. ++ add(t, xthread, top); ++ ld(t, Address(t, -oopSize)); ++ beq(obj, t, push); ++ ++ // Check header for monitor (0b10). ++ test_bit(t, mark, exact_log2(markWord::monitor_value)); ++ bnez(t, slow, /* is_far */ true); ++ ++ // Try to lock. Transition lock-bits 0b01 => 0b00 ++ assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid a la"); ++ ori(mark, mark, markWord::unlocked_value); ++ xori(t, mark, markWord::unlocked_value); ++ cmpxchg(/*addr*/ obj, /*expected*/ mark, /*new*/ t, Assembler::int64, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::relaxed, /*result*/ t); ++ bne(mark, t, slow, /* is_far */ true); ++ ++ bind(push); ++ // After successful lock, push object on lock-stack. ++ add(t, xthread, top); ++ sd(obj, Address(t)); ++ addw(top, top, oopSize); ++ sw(top, Address(xthread, JavaThread::lock_stack_top_offset())); + } + + // Implements ligthweight-unlocking. +-// Branches to slow upon failure. +-// Falls through upon success. + // + // - obj: the object to be unlocked +-// - hdr: the (pre-loaded) header of the object +-// - tmp1, tmp2: temporary registers +-void MacroAssembler::lightweight_unlock(Register obj, Register hdr, Register tmp1, Register tmp2, Label& slow) { ++// - tmp1, tmp2, tmp3: temporary registers ++// - slow: branched to if unlocking fails ++void MacroAssembler::lightweight_unlock(Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow) { + assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking"); +- assert_different_registers(obj, hdr, tmp1, tmp2, t0); ++ assert_different_registers(obj, tmp1, tmp2, tmp3, t0); + + #ifdef ASSERT + { +- // The following checks rely on the fact that LockStack is only ever modified by +- // its owning thread, even if the lock got inflated concurrently; removal of LockStack +- // entries after inflation will happen delayed in that case. +- + // Check for lock-stack underflow. + Label stack_ok; + lwu(tmp1, Address(xthread, JavaThread::lock_stack_top_offset())); + mv(tmp2, (unsigned)LockStack::start_offset()); +- bgt(tmp1, tmp2, stack_ok); ++ bge(tmp1, tmp2, stack_ok); + STOP("Lock-stack underflow"); + bind(stack_ok); + } +- { +- // Check if the top of the lock-stack matches the unlocked object. +- Label tos_ok; +- subw(tmp1, tmp1, oopSize); +- add(tmp1, xthread, tmp1); +- ld(tmp1, Address(tmp1, 0)); +- beq(tmp1, obj, tos_ok); +- STOP("Top of lock-stack does not match the unlocked object"); +- bind(tos_ok); +- } +- { +- // Check that hdr is fast-locked. +- Label hdr_ok; +- andi(tmp1, hdr, markWord::lock_mask_in_place); +- beqz(tmp1, hdr_ok); +- STOP("Header is not fast-locked"); +- bind(hdr_ok); +- } + #endif + +- // Load the new header (unlocked) into tmp1 +- ori(tmp1, hdr, markWord::unlocked_value); ++ Label unlocked, push_and_slow; ++ const Register top = tmp1; ++ const Register mark = tmp2; ++ const Register t = tmp3; ++ ++ // Check if obj is top of lock-stack. ++ lwu(top, Address(xthread, JavaThread::lock_stack_top_offset())); ++ subw(top, top, oopSize); ++ add(t, xthread, top); ++ ld(t, Address(t)); ++ bne(obj, t, slow, /* is_far */ true); ++ ++ // Pop lock-stack. ++ DEBUG_ONLY(add(t, xthread, top);) ++ DEBUG_ONLY(sd(zr, Address(t));) ++ sw(top, Address(xthread, JavaThread::lock_stack_top_offset())); ++ ++ // Check if recursive. ++ add(t, xthread, top); ++ ld(t, Address(t, -oopSize)); ++ beq(obj, t, unlocked); ++ ++ // Not recursive. Check header for monitor (0b10). ++ ld(mark, Address(obj, oopDesc::mark_offset_in_bytes())); ++ test_bit(t, mark, exact_log2(markWord::monitor_value)); ++ bnez(t, push_and_slow); + +- // Try to swing header from locked to unlocked +- Label success; +- cmpxchgptr(hdr, tmp1, obj, tmp2, success, &slow); +- bind(success); +- +- // After successful unlock, pop object from lock-stack +- lwu(tmp1, Address(xthread, JavaThread::lock_stack_top_offset())); +- subw(tmp1, tmp1, oopSize); + #ifdef ASSERT +- add(tmp2, xthread, tmp1); +- sd(zr, Address(tmp2, 0)); ++ // Check header not unlocked (0b01). ++ Label not_unlocked; ++ test_bit(t, mark, exact_log2(markWord::unlocked_value)); ++ beqz(t, not_unlocked); ++ stop("lightweight_unlock already unlocked"); ++ bind(not_unlocked); + #endif +- sw(tmp1, Address(xthread, JavaThread::lock_stack_top_offset())); ++ ++ // Try to unlock. Transition lock bits 0b00 => 0b01 ++ assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid lea"); ++ ori(t, mark, markWord::unlocked_value); ++ cmpxchg(/*addr*/ obj, /*expected*/ mark, /*new*/ t, Assembler::int64, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, /*result*/ t); ++ beq(mark, t, unlocked); ++ ++ bind(push_and_slow); ++ // Restore lock-stack and handle the unlock in runtime. ++ DEBUG_ONLY(add(t, xthread, top);) ++ DEBUG_ONLY(sd(obj, Address(t));) ++ addw(top, top, oopSize); ++ sw(top, Address(xthread, JavaThread::lock_stack_top_offset())); ++ j(slow); ++ ++ bind(unlocked); + } +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +index 692306a73..479d8d1a6 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +@@ -1512,8 +1512,8 @@ private: + void store_conditional(Register dst, Register new_val, Register addr, enum operand_size size, Assembler::Aqrl release); + + public: +- void lightweight_lock(Register obj, Register hdr, Register tmp1, Register tmp2, Label& slow); +- void lightweight_unlock(Register obj, Register hdr, Register tmp1, Register tmp2, Label& slow); ++ void lightweight_lock(Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow); ++ void lightweight_unlock(Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow); + }; + + #ifdef ASSERT +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index b07713b95..ac22dc536 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -10279,10 +10279,11 @@ instruct tlsLoadP(javaThread_RegP dst) + // using t1 as the 'flag' register to bridge the BoolNode producers and consumers + instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3) + %{ ++ predicate(LockingMode != LM_LIGHTWEIGHT); + match(Set cr (FastLock object box)); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3); + +- ins_cost(LOAD_COST * 2 + STORE_COST * 3 + ALU_COST * 6 + BRANCH_COST * 3); ++ ins_cost(10 * DEFAULT_COST); + format %{ "fastlock $object,$box\t! kills $tmp1,$tmp2,$tmp3, #@cmpFastLock" %} + + ins_encode %{ +@@ -10295,10 +10296,11 @@ instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp1, iReg + // using t1 as the 'flag' register to bridge the BoolNode producers and consumers + instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp1, iRegPNoSp tmp2) + %{ ++ predicate(LockingMode != LM_LIGHTWEIGHT); + match(Set cr (FastUnlock object box)); + effect(TEMP tmp1, TEMP tmp2); + +- ins_cost(LOAD_COST * 2 + STORE_COST + ALU_COST * 2 + BRANCH_COST * 4); ++ ins_cost(10 * DEFAULT_COST); + format %{ "fastunlock $object,$box\t! kills $tmp1, $tmp2, #@cmpFastUnlock" %} + + ins_encode %{ +@@ -10308,6 +10310,38 @@ instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp1, iR + ins_pipe(pipe_serial); + %} + ++instruct cmpFastLockLightweight(rFlagsReg cr, iRegP object, iRegP_R10 box, iRegPNoSp tmp1, iRegPNoSp tmp2) ++%{ ++ predicate(LockingMode == LM_LIGHTWEIGHT); ++ match(Set cr (FastLock object box)); ++ effect(TEMP tmp1, TEMP tmp2, USE_KILL box); ++ ++ ins_cost(10 * DEFAULT_COST); ++ format %{ "fastlock $object,$box\t! kills $box,$tmp1,$tmp2 #@cmpFastLockLightweight" %} ++ ++ ins_encode %{ ++ __ fast_lock_lightweight($object$$Register, $box$$Register, $tmp1$$Register, $tmp2$$Register); ++ %} ++ ++ ins_pipe(pipe_serial); ++%} ++ ++instruct cmpFastUnlockLightweight(rFlagsReg cr, iRegP object, iRegP_R10 box, iRegPNoSp tmp1, iRegPNoSp tmp2) ++%{ ++ predicate(LockingMode == LM_LIGHTWEIGHT); ++ match(Set cr (FastUnlock object box)); ++ effect(TEMP tmp1, TEMP tmp2, USE_KILL box); ++ ++ ins_cost(10 * DEFAULT_COST); ++ format %{ "fastunlock $object,$box\t! kills $box,$tmp1,$tmp2, #@cmpFastUnlockLightweight" %} ++ ++ ins_encode %{ ++ __ fast_unlock_lightweight($object$$Register, $box$$Register, $tmp1$$Register, $tmp2$$Register); ++ %} ++ ++ ins_pipe(pipe_serial); ++%} ++ + // Tail Call; Jump from runtime stub to Java code. + // Also known as an 'interprocedural jump'. + // Target of jump will eventually return to caller. +diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +index d900e8732..a38c8ec12 100644 +--- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp ++++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +@@ -1704,8 +1704,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, + __ sd(swap_reg, Address(lock_reg, mark_word_offset)); + __ bnez(swap_reg, slow_path_lock); + } else { +- assert(LockingMode == LM_LIGHTWEIGHT, ""); +- __ ld(swap_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); ++ assert(LockingMode == LM_LIGHTWEIGHT, "must be"); + __ lightweight_lock(obj_reg, swap_reg, tmp, lock_tmp, slow_path_lock); + } + +@@ -1831,9 +1830,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, + __ decrement(Address(xthread, JavaThread::held_monitor_count_offset())); + } else { + assert(LockingMode == LM_LIGHTWEIGHT, ""); +- __ ld(old_hdr, Address(obj_reg, oopDesc::mark_offset_in_bytes())); +- __ test_bit(t0, old_hdr, exact_log2(markWord::monitor_value)); +- __ bnez(t0, slow_path_unlock); + __ lightweight_unlock(obj_reg, old_hdr, swap_reg, lock_tmp, slow_path_unlock); + __ decrement(Address(xthread, JavaThread::held_monitor_count_offset())); + } +diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_riscv.hpp +index 20fd260ee..99a4c3dd4 100644 +--- a/src/hotspot/cpu/riscv/vm_version_riscv.hpp ++++ b/src/hotspot/cpu/riscv/vm_version_riscv.hpp +@@ -259,6 +259,8 @@ class VM_Version : public Abstract_VM_Version { + + constexpr static bool supports_stack_watermark_barrier() { return true; } + ++ constexpr static bool supports_recursive_lightweight_locking() { return true; } ++ + static bool supports_on_spin_wait() { return UseZihintpause; } + + // RISCV64 supports fast class initialization checks +diff --git a/src/hotspot/share/prims/whitebox.cpp b/src/hotspot/share/prims/whitebox.cpp +index 50410d1fa..9271f5c60 100644 +--- a/src/hotspot/share/prims/whitebox.cpp ++++ b/src/hotspot/share/prims/whitebox.cpp +@@ -82,6 +82,7 @@ + #include "runtime/javaCalls.hpp" + #include "runtime/javaThread.inline.hpp" + #include "runtime/jniHandles.inline.hpp" ++#include "runtime/lockStack.hpp" + #include "runtime/os.hpp" + #include "runtime/stackFrameStream.inline.hpp" + #include "runtime/synchronizer.hpp" +@@ -1856,6 +1857,14 @@ WB_ENTRY(jboolean, WB_IsUbsanEnabled(JNIEnv* env)) + return (jboolean) WhiteBox::is_ubsan_enabled(); + WB_END + ++WB_ENTRY(jint, WB_getLockStackCapacity(JNIEnv* env)) ++ return (jint) LockStack::CAPACITY; ++WB_END ++ ++WB_ENTRY(jboolean, WB_supportsRecursiveLightweightLocking(JNIEnv* env)) ++ return (jboolean) VM_Version::supports_recursive_lightweight_locking(); ++WB_END ++ + WB_ENTRY(jboolean, WB_DeflateIdleMonitors(JNIEnv* env, jobject wb)) + log_info(monitorinflation)("WhiteBox initiated DeflateIdleMonitors"); + return ObjectSynchronizer::request_deflate_idle_monitors_from_wb(); +@@ -2782,6 +2791,8 @@ static JNINativeMethod methods[] = { + (void*)&WB_AddModuleExportsToAll }, + {CC"deflateIdleMonitors", CC"()Z", (void*)&WB_DeflateIdleMonitors }, + {CC"isMonitorInflated0", CC"(Ljava/lang/Object;)Z", (void*)&WB_IsMonitorInflated }, ++ {CC"getLockStackCapacity", CC"()I", (void*)&WB_getLockStackCapacity }, ++ {CC"supportsRecursiveLightweightLocking", CC"()Z", (void*)&WB_supportsRecursiveLightweightLocking }, + {CC"isAsanEnabled", CC"()Z", (void*)&WB_IsAsanEnabled }, + {CC"isUbsanEnabled", CC"()Z", (void*)&WB_IsUbsanEnabled }, + {CC"forceSafepoint", CC"()V", (void*)&WB_ForceSafepoint }, +diff --git a/src/hotspot/share/runtime/abstract_vm_version.hpp b/src/hotspot/share/runtime/abstract_vm_version.hpp +index 4bf0741a2..fb5db3f47 100644 +--- a/src/hotspot/share/runtime/abstract_vm_version.hpp ++++ b/src/hotspot/share/runtime/abstract_vm_version.hpp +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -183,6 +183,9 @@ class Abstract_VM_Version: AllStatic { + // Does platform support secondary supers table lookup? + constexpr static bool supports_secondary_supers_table() { return false; } + ++ // Is recursive lightweight locking implemented for this platform? ++ constexpr static bool supports_recursive_lightweight_locking() { return false; } ++ + // Does platform support float16 instructions? + static bool supports_float16() { return false; } + +diff --git a/src/hotspot/share/runtime/deoptimization.cpp b/src/hotspot/share/runtime/deoptimization.cpp +index 2058da0ff..4f82836a4 100644 +--- a/src/hotspot/share/runtime/deoptimization.cpp ++++ b/src/hotspot/share/runtime/deoptimization.cpp +@@ -1665,13 +1665,13 @@ bool Deoptimization::relock_objects(JavaThread* thread, GrowableArrayowner()->is_locked(), "object must be locked now"); +- ObjectMonitor* mon = ObjectSynchronizer::inflate(deoptee_thread, obj(), ObjectSynchronizer::inflate_cause_vm_internal); ++ ObjectMonitor* mon = ObjectSynchronizer::inflate_for(deoptee_thread, obj(), ObjectSynchronizer::inflate_cause_vm_internal); + assert(mon->owner() == deoptee_thread, "must be"); + } else { + BasicLock* lock = mon_info->lock(); +- ObjectSynchronizer::enter(obj, lock, deoptee_thread); ++ ObjectSynchronizer::enter_for(obj, lock, deoptee_thread); + assert(mon_info->owner()->is_locked(), "object must be locked now"); + } + } +diff --git a/src/hotspot/share/runtime/lockStack.cpp b/src/hotspot/share/runtime/lockStack.cpp +index b4a3bf1e8..d7dcbdda7 100644 +--- a/src/hotspot/share/runtime/lockStack.cpp ++++ b/src/hotspot/share/runtime/lockStack.cpp +@@ -1,6 +1,7 @@ + /* + * Copyright (c) 2022, Red Hat, Inc. All rights reserved. + * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. ++ * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -25,20 +26,30 @@ + + #include "precompiled.hpp" + #include "memory/allocation.hpp" ++#include "runtime/globals.hpp" + #include "runtime/lockStack.inline.hpp" + #include "runtime/safepoint.hpp" + #include "runtime/stackWatermark.hpp" + #include "runtime/stackWatermarkSet.inline.hpp" + #include "runtime/thread.hpp" + #include "utilities/copy.hpp" ++#include "utilities/debug.hpp" ++#include "utilities/globalDefinitions.hpp" + #include "utilities/ostream.hpp" + ++#include ++ + const int LockStack::lock_stack_offset = in_bytes(JavaThread::lock_stack_offset()); + const int LockStack::lock_stack_top_offset = in_bytes(JavaThread::lock_stack_top_offset()); + const int LockStack::lock_stack_base_offset = in_bytes(JavaThread::lock_stack_base_offset()); + + LockStack::LockStack(JavaThread* jt) : + _top(lock_stack_base_offset), _base() { ++ // Make sure the layout of the object is compatible with the emitted code's assumptions. ++ STATIC_ASSERT(sizeof(_bad_oop_sentinel) == oopSize); ++ STATIC_ASSERT(sizeof(_base[0]) == oopSize); ++ STATIC_ASSERT(std::is_standard_layout::value); ++ STATIC_ASSERT(offsetof(LockStack, _bad_oop_sentinel) == offsetof(LockStack, _base) - oopSize); + #ifdef ASSERT + for (int i = 0; i < CAPACITY; i++) { + _base[i] = nullptr; +@@ -62,11 +73,21 @@ uint32_t LockStack::end_offset() { + void LockStack::verify(const char* msg) const { + assert(LockingMode == LM_LIGHTWEIGHT, "never use lock-stack when light weight locking is disabled"); + assert((_top <= end_offset()), "lockstack overflow: _top %d end_offset %d", _top, end_offset()); +- assert((_top >= start_offset()), "lockstack underflow: _top %d end_offset %d", _top, start_offset()); ++ assert((_top >= start_offset()), "lockstack underflow: _top %d start_offset %d", _top, start_offset()); + if (SafepointSynchronize::is_at_safepoint() || (Thread::current()->is_Java_thread() && is_owning_thread())) { + int top = to_index(_top); + for (int i = 0; i < top; i++) { + assert(_base[i] != nullptr, "no zapped before top"); ++ if (VM_Version::supports_recursive_lightweight_locking()) { ++ oop o = _base[i]; ++ for (; i < top - 1; i++) { ++ // Consecutive entries may be the same ++ if (_base[i + 1] != o) { ++ break; ++ } ++ } ++ } ++ + for (int j = i + 1; j < top; j++) { + assert(_base[i] != _base[j], "entries must be unique: %s", msg); + } +diff --git a/src/hotspot/share/runtime/lockStack.hpp b/src/hotspot/share/runtime/lockStack.hpp +index 25ab7a8de..45649b86a 100644 +--- a/src/hotspot/share/runtime/lockStack.hpp ++++ b/src/hotspot/share/runtime/lockStack.hpp +@@ -1,6 +1,7 @@ + /* + * Copyright (c) 2022, Red Hat, Inc. All rights reserved. + * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. ++ * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -35,9 +36,11 @@ class OopClosure; + class outputStream; + + class LockStack { ++ friend class LockStackTest; + friend class VMStructs; +-private: ++public: + static const int CAPACITY = 8; ++private: + + // TODO: It would be very useful if JavaThread::lock_stack_offset() and friends were constexpr, + // but this is currently not the case because we're using offset_of() which is non-constexpr, +@@ -50,6 +53,9 @@ private: + // We do this instead of a simple index into the array because this allows for + // efficient addressing in generated code. + uint32_t _top; ++ // The _bad_oop_sentinel acts as a sentinel value to elide underflow checks in generated code. ++ // The correct layout is statically asserted in the constructor. ++ const uintptr_t _bad_oop_sentinel = badOopVal; + oop _base[CAPACITY]; + + // Get the owning thread of this lock-stack. +@@ -74,17 +80,35 @@ public: + static uint32_t start_offset(); + static uint32_t end_offset(); + +- // Return true if we have room to push onto this lock-stack, false otherwise. +- inline bool can_push() const; ++ // Returns true if the lock-stack is full. False otherwise. ++ inline bool is_full() const; + + // Pushes an oop on this lock-stack. + inline void push(oop o); + +- // Pops an oop from this lock-stack. +- inline oop pop(); ++ // Get the oldest oop from this lock-stack. ++ // Precondition: This lock-stack must not be empty. ++ inline oop bottom() const; ++ ++ // Is the lock-stack empty. ++ inline bool is_empty() const; ++ ++ // Check if object is recursive. ++ // Precondition: This lock-stack must contain the oop. ++ inline bool is_recursive(oop o) const; ++ ++ // Try recursive enter. ++ // Precondition: This lock-stack must not be full. ++ inline bool try_recursive_enter(oop o); ++ ++ // Try recursive exit. ++ // Precondition: This lock-stack must contain the oop. ++ inline bool try_recursive_exit(oop o); + + // Removes an oop from an arbitrary location of this lock-stack. +- inline void remove(oop o); ++ // Precondition: This lock-stack must contain the oop. ++ // Returns the number of oops removed. ++ inline size_t remove(oop o); + + // Tests whether the oop is on this lock-stack. + inline bool contains(oop o) const; +diff --git a/src/hotspot/share/runtime/lockStack.inline.hpp b/src/hotspot/share/runtime/lockStack.inline.hpp +index b36be2f72..7a9874a92 100644 +--- a/src/hotspot/share/runtime/lockStack.inline.hpp ++++ b/src/hotspot/share/runtime/lockStack.inline.hpp +@@ -1,6 +1,7 @@ + /* + * Copyright (c) 2022, Red Hat, Inc. All rights reserved. + * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. ++ * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -26,14 +27,20 @@ + #ifndef SHARE_RUNTIME_LOCKSTACK_INLINE_HPP + #define SHARE_RUNTIME_LOCKSTACK_INLINE_HPP + ++#include "runtime/lockStack.hpp" ++ + #include "memory/iterator.hpp" + #include "runtime/javaThread.hpp" +-#include "runtime/lockStack.hpp" + #include "runtime/safepoint.hpp" + #include "runtime/stackWatermark.hpp" + #include "runtime/stackWatermarkSet.inline.hpp" ++#include "utilities/align.hpp" ++#include "utilities/globalDefinitions.hpp" + + inline int LockStack::to_index(uint32_t offset) { ++ assert(is_aligned(offset, oopSize), "Bad alignment: %u", offset); ++ assert((offset <= end_offset()), "lockstack overflow: offset %d end_offset %d", offset, end_offset()); ++ assert((offset >= start_offset()), "lockstack underflow: offset %d start_offset %d", offset, start_offset()); + return (offset - lock_stack_base_offset) / oopSize; + } + +@@ -42,8 +49,8 @@ JavaThread* LockStack::get_thread() const { + return reinterpret_cast(addr - lock_stack_offset); + } + +-inline bool LockStack::can_push() const { +- return to_index(_top) < CAPACITY; ++inline bool LockStack::is_full() const { ++ return to_index(_top) == CAPACITY; + } + + inline bool LockStack::is_owning_thread() const { +@@ -61,45 +68,132 @@ inline void LockStack::push(oop o) { + verify("pre-push"); + assert(oopDesc::is_oop(o), "must be"); + assert(!contains(o), "entries must be unique"); +- assert(can_push(), "must have room"); ++ assert(!is_full(), "must have room"); + assert(_base[to_index(_top)] == nullptr, "expect zapped entry"); + _base[to_index(_top)] = o; + _top += oopSize; + verify("post-push"); + } + +-inline oop LockStack::pop() { +- verify("pre-pop"); +- assert(to_index(_top) > 0, "underflow, probably unbalanced push/pop"); ++inline oop LockStack::bottom() const { ++ assert(to_index(_top) > 0, "must contain an oop"); ++ return _base[0]; ++} ++ ++inline bool LockStack::is_empty() const { ++ return to_index(_top) == 0; ++} ++ ++inline bool LockStack::is_recursive(oop o) const { ++ if (!VM_Version::supports_recursive_lightweight_locking()) { ++ return false; ++ } ++ verify("pre-is_recursive"); ++ ++ // This will succeed iff there is a consecutive run of oops on the ++ // lock-stack with a length of at least 2. ++ ++ assert(contains(o), "at least one entry must exist"); ++ int end = to_index(_top); ++ // Start iterating from the top because the runtime code is more ++ // interested in the balanced locking case when the top oop on the ++ // lock-stack matches o. This will cause the for loop to break out ++ // in the first loop iteration if it is non-recursive. ++ for (int i = end - 1; i > 0; i--) { ++ if (_base[i - 1] == o && _base[i] == o) { ++ verify("post-is_recursive"); ++ return true; ++ } ++ if (_base[i] == o) { ++ // o can only occur in one consecutive run on the lock-stack. ++ // Only one of the two oops checked matched o, so this run ++ // must be of length 1 and thus not be recursive. Stop the search. ++ break; ++ } ++ } ++ ++ verify("post-is_recursive"); ++ return false; ++} ++ ++inline bool LockStack::try_recursive_enter(oop o) { ++ if (!VM_Version::supports_recursive_lightweight_locking()) { ++ return false; ++ } ++ verify("pre-try_recursive_enter"); ++ ++ // This will succeed iff the top oop on the stack matches o. ++ // When successful o will be pushed to the lock-stack creating ++ // a consecutive run at least 2 oops that matches o on top of ++ // the lock-stack. ++ ++ assert(!is_full(), "precond"); ++ ++ int end = to_index(_top); ++ if (end == 0 || _base[end - 1] != o) { ++ // Topmost oop does not match o. ++ verify("post-try_recursive_enter"); ++ return false; ++ } ++ ++ _base[end] = o; ++ _top += oopSize; ++ verify("post-try_recursive_enter"); ++ return true; ++} ++ ++inline bool LockStack::try_recursive_exit(oop o) { ++ if (!VM_Version::supports_recursive_lightweight_locking()) { ++ return false; ++ } ++ verify("pre-try_recursive_exit"); ++ ++ // This will succeed iff the top two oops on the stack matches o. ++ // When successful the top oop will be popped of the lock-stack. ++ // When unsuccessful the lock may still be recursive, in which ++ // case the locking is unbalanced. This case is handled externally. ++ ++ assert(contains(o), "entries must exist"); ++ ++ int end = to_index(_top); ++ if (end <= 1 || _base[end - 1] != o || _base[end - 2] != o) { ++ // The two topmost oops do not match o. ++ verify("post-try_recursive_exit"); ++ return false; ++ } ++ + _top -= oopSize; +- oop o = _base[to_index(_top)]; +-#ifdef ASSERT +- _base[to_index(_top)] = nullptr; +-#endif +- assert(!contains(o), "entries must be unique: " PTR_FORMAT, p2i(o)); +- verify("post-pop"); +- return o; ++ DEBUG_ONLY(_base[to_index(_top)] = nullptr;) ++ verify("post-try_recursive_exit"); ++ return true; + } + +-inline void LockStack::remove(oop o) { ++inline size_t LockStack::remove(oop o) { + verify("pre-remove"); + assert(contains(o), "entry must be present: " PTR_FORMAT, p2i(o)); ++ + int end = to_index(_top); ++ int inserted = 0; + for (int i = 0; i < end; i++) { +- if (_base[i] == o) { +- int last = end - 1; +- for (; i < last; i++) { +- _base[i] = _base[i + 1]; ++ if (_base[i] != o) { ++ if (inserted != i) { ++ _base[inserted] = _base[i]; + } +- _top -= oopSize; +-#ifdef ASSERT +- _base[to_index(_top)] = nullptr; +-#endif +- break; ++ inserted++; + } + } +- assert(!contains(o), "entries must be unique: " PTR_FORMAT, p2i(o)); ++ ++#ifdef ASSERT ++ for (int i = inserted; i < end; i++) { ++ _base[i] = nullptr; ++ } ++#endif ++ ++ uint32_t removed = end - inserted; ++ _top -= removed * oopSize; ++ assert(!contains(o), "entry must have been removed: " PTR_FORMAT, p2i(o)); + verify("post-remove"); ++ return removed; + } + + inline bool LockStack::contains(oop o) const { +diff --git a/src/hotspot/share/runtime/objectMonitor.cpp b/src/hotspot/share/runtime/objectMonitor.cpp +index ee0f754b8..696803bbe 100644 +--- a/src/hotspot/share/runtime/objectMonitor.cpp ++++ b/src/hotspot/share/runtime/objectMonitor.cpp +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 1998, 2023, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1998, 2024, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -39,6 +39,7 @@ + #include "prims/jvmtiDeferredUpdates.hpp" + #include "prims/jvmtiExport.hpp" + #include "runtime/atomic.hpp" ++#include "runtime/globals.hpp" + #include "runtime/handles.inline.hpp" + #include "runtime/interfaceSupport.inline.hpp" + #include "runtime/javaThread.inline.hpp" +@@ -53,6 +54,7 @@ + #include "runtime/sharedRuntime.hpp" + #include "services/threadService.hpp" + #include "utilities/dtrace.hpp" ++#include "utilities/globalDefinitions.hpp" + #include "utilities/macros.hpp" + #include "utilities/preserveException.hpp" + #if INCLUDE_JFR +@@ -312,7 +314,70 @@ void ObjectMonitor::ClearSuccOnSuspend::operator()(JavaThread* current) { + // ----------------------------------------------------------------------------- + // Enter support + ++bool ObjectMonitor::enter_for(JavaThread* locking_thread) { ++ // Used by ObjectSynchronizer::enter_for to enter for another thread. ++ // The monitor is private to or already owned by locking_thread which must be suspended. ++ // So this code may only contend with deflation. ++ assert(locking_thread == Thread::current() || locking_thread->is_obj_deopt_suspend(), "must be"); ++ ++ // Block out deflation as soon as possible. ++ add_to_contentions(1); ++ ++ bool success = false; ++ if (!is_being_async_deflated()) { ++ void* prev_owner = try_set_owner_from(nullptr, locking_thread); ++ ++ if (prev_owner == nullptr) { ++ assert(_recursions == 0, "invariant"); ++ success = true; ++ } else if (prev_owner == locking_thread) { ++ _recursions++; ++ success = true; ++ } else if (prev_owner == DEFLATER_MARKER) { ++ // Racing with deflation. ++ prev_owner = try_set_owner_from(DEFLATER_MARKER, locking_thread); ++ if (prev_owner == DEFLATER_MARKER) { ++ // Cancelled deflation. Increment contentions as part of the deflation protocol. ++ add_to_contentions(1); ++ success = true; ++ } else if (prev_owner == nullptr) { ++ // At this point we cannot race with deflation as we have both incremented ++ // contentions, seen contention > 0 and seen a DEFLATER_MARKER. ++ // success will only be false if this races with something other than ++ // deflation. ++ prev_owner = try_set_owner_from(nullptr, locking_thread); ++ success = prev_owner == nullptr; ++ } ++ } else if (LockingMode == LM_LEGACY && locking_thread->is_lock_owned((address)prev_owner)) { ++ assert(_recursions == 0, "must be"); ++ _recursions = 1; ++ set_owner_from_BasicLock(prev_owner, locking_thread); ++ success = true; ++ } ++ assert(success, "Failed to enter_for: locking_thread=" INTPTR_FORMAT ++ ", this=" INTPTR_FORMAT "{owner=" INTPTR_FORMAT "}, observed owner: " INTPTR_FORMAT, ++ p2i(locking_thread), p2i(this), p2i(owner_raw()), p2i(prev_owner)); ++ } else { ++ // Async deflation is in progress and our contentions increment ++ // above lost the race to async deflation. Undo the work and ++ // force the caller to retry. ++ const oop l_object = object(); ++ if (l_object != nullptr) { ++ // Attempt to restore the header/dmw to the object's header so that ++ // we only retry once if the deflater thread happens to be slow. ++ install_displaced_markword_in_object(l_object); ++ } ++ } ++ ++ add_to_contentions(-1); ++ ++ assert(!success || owner_raw() == locking_thread, "must be"); ++ ++ return success; ++} ++ + bool ObjectMonitor::enter(JavaThread* current) { ++ assert(current == JavaThread::current(), "must be"); + // The following code is ordered to check the most common cases first + // and to reduce RTS->RTO cache line upgrades on SPARC and IA32 processors. + +diff --git a/src/hotspot/share/runtime/objectMonitor.hpp b/src/hotspot/share/runtime/objectMonitor.hpp +index d6c0e31f7..a56b6f8fb 100644 +--- a/src/hotspot/share/runtime/objectMonitor.hpp ++++ b/src/hotspot/share/runtime/objectMonitor.hpp +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 1998, 2023, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1998, 2024, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -298,6 +298,7 @@ private: + int contentions() const; + void add_to_contentions(int value); + intx recursions() const { return _recursions; } ++ void set_recursions(size_t recursions); + + // JVM/TI GetObjectMonitorUsage() needs this: + ObjectWaiter* first_waiter() { return _WaitSet; } +@@ -332,6 +333,7 @@ private: + void operator()(JavaThread* current); + }; + public: ++ bool enter_for(JavaThread* locking_thread); + bool enter(JavaThread* current); + void exit(JavaThread* current, bool not_suspended = true); + void wait(jlong millis, bool interruptible, TRAPS); +diff --git a/src/hotspot/share/runtime/objectMonitor.inline.hpp b/src/hotspot/share/runtime/objectMonitor.inline.hpp +index 36790925b..b371663ee 100644 +--- a/src/hotspot/share/runtime/objectMonitor.inline.hpp ++++ b/src/hotspot/share/runtime/objectMonitor.inline.hpp +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 1998, 2023, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1998, 2024, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -102,6 +102,12 @@ inline void ObjectMonitor::add_to_contentions(int value) { + Atomic::add(&_contentions, value); + } + ++inline void ObjectMonitor::set_recursions(size_t recursions) { ++ assert(_recursions == 0, "must be"); ++ assert(has_owner(), "must be owned"); ++ _recursions = checked_cast(recursions); ++} ++ + // Clear _owner field; current value must match old_value. + inline void ObjectMonitor::release_clear_owner(void* old_value) { + #ifdef ASSERT +diff --git a/src/hotspot/share/runtime/synchronizer.cpp b/src/hotspot/share/runtime/synchronizer.cpp +index cc73082ed..cdcea5436 100644 +--- a/src/hotspot/share/runtime/synchronizer.cpp ++++ b/src/hotspot/share/runtime/synchronizer.cpp +@@ -36,6 +36,7 @@ + #include "oops/oop.inline.hpp" + #include "runtime/atomic.hpp" + #include "runtime/frame.inline.hpp" ++#include "runtime/globals.hpp" + #include "runtime/handles.inline.hpp" + #include "runtime/handshake.hpp" + #include "runtime/interfaceSupport.inline.hpp" +@@ -60,6 +61,7 @@ + #include "utilities/align.hpp" + #include "utilities/dtrace.hpp" + #include "utilities/events.hpp" ++#include "utilities/globalDefinitions.hpp" + #include "utilities/linkedlist.hpp" + #include "utilities/preserveException.hpp" + +@@ -384,6 +386,19 @@ bool ObjectSynchronizer::quick_enter(oop obj, JavaThread* current, + return false; + } + ++ if (LockingMode == LM_LIGHTWEIGHT) { ++ LockStack& lock_stack = current->lock_stack(); ++ if (lock_stack.is_full()) { ++ // Always go into runtime if the lock stack is full. ++ return false; ++ } ++ if (lock_stack.try_recursive_enter(obj)) { ++ // Recursive lock successful. ++ current->inc_held_monitor_count(); ++ return true; ++ } ++ } ++ + const markWord mark = obj->mark(); + + if (mark.has_monitor()) { +@@ -437,8 +452,9 @@ bool ObjectSynchronizer::quick_enter(oop obj, JavaThread* current, + } + + // Handle notifications when synchronizing on value based classes +-void ObjectSynchronizer::handle_sync_on_value_based_class(Handle obj, JavaThread* current) { +- frame last_frame = current->last_frame(); ++void ObjectSynchronizer::handle_sync_on_value_based_class(Handle obj, JavaThread* locking_thread) { ++ assert(locking_thread == Thread::current() || locking_thread->is_obj_deopt_suspend(), "must be"); ++ frame last_frame = locking_thread->last_frame(); + bool bcp_was_adjusted = false; + // Don't decrement bcp if it points to the frame's first instruction. This happens when + // handle_sync_on_value_based_class() is called because of a synchronized method. There +@@ -451,9 +467,9 @@ void ObjectSynchronizer::handle_sync_on_value_based_class(Handle obj, JavaThread + } + + if (DiagnoseSyncOnValueBasedClasses == FATAL_EXIT) { +- ResourceMark rm(current); ++ ResourceMark rm; + stringStream ss; +- current->print_active_stack_on(&ss); ++ locking_thread->print_active_stack_on(&ss); + char* base = (char*)strstr(ss.base(), "at"); + char* newline = (char*)strchr(ss.base(), '\n'); + if (newline != nullptr) { +@@ -462,13 +478,13 @@ void ObjectSynchronizer::handle_sync_on_value_based_class(Handle obj, JavaThread + fatal("Synchronizing on object " INTPTR_FORMAT " of klass %s %s", p2i(obj()), obj->klass()->external_name(), base); + } else { + assert(DiagnoseSyncOnValueBasedClasses == LOG_WARNING, "invalid value for DiagnoseSyncOnValueBasedClasses"); +- ResourceMark rm(current); ++ ResourceMark rm; + Log(valuebasedclasses) vblog; + + vblog.info("Synchronizing on object " INTPTR_FORMAT " of klass %s", p2i(obj()), obj->klass()->external_name()); +- if (current->has_last_Java_frame()) { ++ if (locking_thread->has_last_Java_frame()) { + LogStream info_stream(vblog.info()); +- current->print_active_stack_on(&info_stream); ++ locking_thread->print_active_stack_on(&info_stream); + } else { + vblog.info("Cannot find the last Java frame"); + } +@@ -495,38 +511,111 @@ static bool useHeavyMonitors() { + + // ----------------------------------------------------------------------------- + // Monitor Enter/Exit ++ ++void ObjectSynchronizer::enter_for(Handle obj, BasicLock* lock, JavaThread* locking_thread) { ++ // When called with locking_thread != Thread::current() some mechanism must synchronize ++ // the locking_thread with respect to the current thread. Currently only used when ++ // deoptimizing and re-locking locks. See Deoptimization::relock_objects ++ assert(locking_thread == Thread::current() || locking_thread->is_obj_deopt_suspend(), "must be"); ++ if (!enter_fast_impl(obj, lock, locking_thread)) { ++ // Inflated ObjectMonitor::enter_for is required ++ ++ // An async deflation can race after the inflate_for() call and before ++ // enter_for() can make the ObjectMonitor busy. enter_for() returns false ++ // if we have lost the race to async deflation and we simply try again. ++ while (true) { ++ ObjectMonitor* monitor = inflate_for(locking_thread, obj(), inflate_cause_monitor_enter); ++ if (monitor->enter_for(locking_thread)) { ++ return; ++ } ++ assert(monitor->is_being_async_deflated(), "must be"); ++ } ++ } ++} ++ ++void ObjectSynchronizer::enter(Handle obj, BasicLock* lock, JavaThread* current) { ++ assert(current == Thread::current(), "must be"); ++ if (!enter_fast_impl(obj, lock, current)) { ++ // Inflated ObjectMonitor::enter is required ++ ++ // An async deflation can race after the inflate() call and before ++ // enter() can make the ObjectMonitor busy. enter() returns false if ++ // we have lost the race to async deflation and we simply try again. ++ while (true) { ++ ObjectMonitor* monitor = inflate(current, obj(), inflate_cause_monitor_enter); ++ if (monitor->enter(current)) { ++ return; ++ } ++ } ++ } ++} ++ + // The interpreter and compiler assembly code tries to lock using the fast path + // of this algorithm. Make sure to update that code if the following function is + // changed. The implementation is extremely sensitive to race condition. Be careful. ++bool ObjectSynchronizer::enter_fast_impl(Handle obj, BasicLock* lock, JavaThread* locking_thread) { + +-void ObjectSynchronizer::enter(Handle obj, BasicLock* lock, JavaThread* current) { + if (obj->klass()->is_value_based()) { +- handle_sync_on_value_based_class(obj, current); ++ handle_sync_on_value_based_class(obj, locking_thread); + } + +- current->inc_held_monitor_count(); ++ locking_thread->inc_held_monitor_count(); + + if (!useHeavyMonitors()) { + if (LockingMode == LM_LIGHTWEIGHT) { + // Fast-locking does not use the 'lock' argument. +- LockStack& lock_stack = current->lock_stack(); +- if (lock_stack.can_push()) { +- markWord mark = obj()->mark_acquire(); +- while (mark.is_neutral()) { +- // Retry until a lock state change has been observed. cas_set_mark() may collide with non lock bits modifications. +- // Try to swing into 'fast-locked' state. +- assert(!lock_stack.contains(obj()), "thread must not already hold the lock"); +- const markWord locked_mark = mark.set_fast_locked(); +- const markWord old_mark = obj()->cas_set_mark(locked_mark, mark); +- if (old_mark == mark) { +- // Successfully fast-locked, push object to lock-stack and return. +- lock_stack.push(obj()); +- return; +- } +- mark = old_mark; ++ LockStack& lock_stack = locking_thread->lock_stack(); ++ if (lock_stack.is_full()) { ++ // We unconditionally make room on the lock stack by inflating ++ // the least recently locked object on the lock stack. ++ ++ // About the choice to inflate least recently locked object. ++ // First we must chose to inflate a lock, either some lock on ++ // the lock-stack or the lock that is currently being entered ++ // (which may or may not be on the lock-stack). ++ // Second the best lock to inflate is a lock which is entered ++ // in a control flow where there are only a very few locks being ++ // used, as the costly part of inflated locking is inflation, ++ // not locking. But this property is entirely program dependent. ++ // Third inflating the lock currently being entered on when it ++ // is not present on the lock-stack will result in a still full ++ // lock-stack. This creates a scenario where every deeper nested ++ // monitorenter must call into the runtime. ++ // The rational here is as follows: ++ // Because we cannot (currently) figure out the second, and want ++ // to avoid the third, we inflate a lock on the lock-stack. ++ // The least recently locked lock is chosen as it is the lock ++ // with the longest critical section. ++ ++ log_info(monitorinflation)("LockStack capacity exceeded, inflating."); ++ ObjectMonitor* monitor = inflate_for(locking_thread, lock_stack.bottom(), inflate_cause_vm_internal); ++ assert(monitor->owner() == Thread::current(), "must be owner=" PTR_FORMAT " current=" PTR_FORMAT " mark=" PTR_FORMAT, ++ p2i(monitor->owner()), p2i(Thread::current()), monitor->object()->mark_acquire().value()); ++ assert(!lock_stack.is_full(), "must have made room here"); ++ } ++ ++ markWord mark = obj()->mark_acquire(); ++ while (mark.is_neutral()) { ++ // Retry until a lock state change has been observed. cas_set_mark() may collide with non lock bits modifications. ++ // Try to swing into 'fast-locked' state. ++ assert(!lock_stack.contains(obj()), "thread must not already hold the lock"); ++ const markWord locked_mark = mark.set_fast_locked(); ++ const markWord old_mark = obj()->cas_set_mark(locked_mark, mark); ++ if (old_mark == mark) { ++ // Successfully fast-locked, push object to lock-stack and return. ++ lock_stack.push(obj()); ++ return true; + } ++ mark = old_mark; + } +- // All other paths fall-through to inflate-enter. ++ ++ if (mark.is_fast_locked() && lock_stack.try_recursive_enter(obj())) { ++ // Recursive lock successful. ++ return true; ++ } ++ ++ // Failed to fast lock. ++ return false; + } else if (LockingMode == LM_LEGACY) { + markWord mark = obj->mark(); + if (mark.is_neutral()) { +@@ -534,15 +623,14 @@ void ObjectSynchronizer::enter(Handle obj, BasicLock* lock, JavaThread* current) + // be visible <= the ST performed by the CAS. + lock->set_displaced_header(mark); + if (mark == obj()->cas_set_mark(markWord::from_pointer(lock), mark)) { +- return; ++ return true; + } +- // Fall through to inflate() ... + } else if (mark.has_locker() && +- current->is_lock_owned((address) mark.locker())) { ++ locking_thread->is_lock_owned((address) mark.locker())) { + assert(lock != mark.locker(), "must not re-lock the same lock"); + assert(lock != (BasicLock*) obj->mark().value(), "don't relock with same BasicLock"); + lock->set_displaced_header(markWord::from_pointer(nullptr)); +- return; ++ return true; + } + + // The object header will never be displaced to this lock, +@@ -550,20 +638,15 @@ void ObjectSynchronizer::enter(Handle obj, BasicLock* lock, JavaThread* current) + // must be non-zero to avoid looking like a re-entrant lock, + // and must not look locked either. + lock->set_displaced_header(markWord::unused_mark()); ++ ++ // Failed to fast lock. ++ return false; + } + } else if (VerifyHeavyMonitors) { + guarantee((obj->mark().value() & markWord::lock_mask_in_place) != markWord::locked_value, "must not be lightweight/stack-locked"); + } + +- // An async deflation can race after the inflate() call and before +- // enter() can make the ObjectMonitor busy. enter() returns false if +- // we have lost the race to async deflation and we simply try again. +- while (true) { +- ObjectMonitor* monitor = inflate(current, obj(), inflate_cause_monitor_enter); +- if (monitor->enter(current)) { +- return; +- } +- } ++ return false; + } + + void ObjectSynchronizer::exit(oop object, BasicLock* lock, JavaThread* current) { +@@ -573,15 +656,28 @@ void ObjectSynchronizer::exit(oop object, BasicLock* lock, JavaThread* current) + markWord mark = object->mark(); + if (LockingMode == LM_LIGHTWEIGHT) { + // Fast-locking does not use the 'lock' argument. +- while (mark.is_fast_locked()) { +- // Retry until a lock state change has been observed. cas_set_mark() may collide with non lock bits modifications. +- const markWord unlocked_mark = mark.set_unlocked(); +- const markWord old_mark = object->cas_set_mark(unlocked_mark, mark); +- if (old_mark == mark) { +- current->lock_stack().remove(object); +- return; ++ LockStack& lock_stack = current->lock_stack(); ++ if (mark.is_fast_locked() && lock_stack.try_recursive_exit(object)) { ++ // Recursively unlocked. ++ return; ++ } ++ ++ if (mark.is_fast_locked() && lock_stack.is_recursive(object)) { ++ // This lock is recursive but is not at the top of the lock stack so we're ++ // doing an unbalanced exit. We have to fall thru to inflation below and ++ // let ObjectMonitor::exit() do the unlock. ++ } else { ++ while (mark.is_fast_locked()) { ++ // Retry until a lock state change has been observed. cas_set_mark() may collide with non lock bits modifications. ++ const markWord unlocked_mark = mark.set_unlocked(); ++ const markWord old_mark = object->cas_set_mark(unlocked_mark, mark); ++ if (old_mark == mark) { ++ size_t recursions = lock_stack.remove(object) - 1; ++ assert(recursions == 0, "must not be recursive here"); ++ return; ++ } ++ mark = old_mark; + } +- mark = old_mark; + } + } else if (LockingMode == LM_LEGACY) { + markWord dhw = lock->displaced_header(); +@@ -631,13 +727,7 @@ void ObjectSynchronizer::exit(oop object, BasicLock* lock, JavaThread* current) + // The ObjectMonitor* can't be async deflated until ownership is + // dropped inside exit() and the ObjectMonitor* must be !is_busy(). + ObjectMonitor* monitor = inflate(current, object, inflate_cause_vm_internal); +- if (LockingMode == LM_LIGHTWEIGHT && monitor->is_owner_anonymous()) { +- // It must be owned by us. Pop lock object from lock stack. +- LockStack& lock_stack = current->lock_stack(); +- oop popped = lock_stack.pop(); +- assert(popped == object, "must be owned by this thread"); +- monitor->set_owner_from_anonymous(current); +- } ++ assert(!monitor->is_owner_anonymous(), "must not be"); + monitor->exit(current); + } + +@@ -1313,15 +1403,28 @@ void ObjectSynchronizer::inflate_helper(oop obj) { + (void)inflate(Thread::current(), obj, inflate_cause_vm_internal); + } + +-// Can be called from non JavaThreads (e.g., VMThread) for FastHashCode +-// calculations as part of JVM/TI tagging. +-static bool is_lock_owned(Thread* thread, oop obj) { +- assert(LockingMode == LM_LIGHTWEIGHT, "only call this with new lightweight locking enabled"); +- return thread->is_Java_thread() ? JavaThread::cast(thread)->lock_stack().contains(obj) : false; ++ObjectMonitor* ObjectSynchronizer::inflate(Thread* current, oop obj, const InflateCause cause) { ++ assert(current == Thread::current(), "must be"); ++ if (LockingMode == LM_LIGHTWEIGHT && current->is_Java_thread()) { ++ return inflate_impl(JavaThread::cast(current), obj, cause); ++ } ++ return inflate_impl(nullptr, obj, cause); ++} ++ ++ObjectMonitor* ObjectSynchronizer::inflate_for(JavaThread* thread, oop obj, const InflateCause cause) { ++ assert(thread == Thread::current() || thread->is_obj_deopt_suspend(), "must be"); ++ return inflate_impl(thread, obj, cause); + } + +-ObjectMonitor* ObjectSynchronizer::inflate(Thread* current, oop object, +- const InflateCause cause) { ++ObjectMonitor* ObjectSynchronizer::inflate_impl(JavaThread* inflating_thread, oop object, const InflateCause cause) { ++ // The JavaThread* inflating_thread parameter is only used by LM_LIGHTWEIGHT and requires ++ // that the inflating_thread == Thread::current() or is suspended throughout the call by ++ // some other mechanism. ++ // Even with LM_LIGHTWEIGHT the thread might be nullptr when called from a non ++ // JavaThread. (As may still be the case from FastHashCode). However it is only ++ // important for the correctness of the LM_LIGHTWEIGHT algorithm that the thread ++ // is set when called from ObjectSynchronizer::enter from the owning thread, ++ // ObjectSynchronizer::enter_for from any thread, or ObjectSynchronizer::exit. + EventJavaMonitorInflate event; + + for (;;) { +@@ -1330,10 +1433,10 @@ ObjectMonitor* ObjectSynchronizer::inflate(Thread* current, oop object, + // The mark can be in one of the following states: + // * inflated - Just return if using stack-locking. + // If using fast-locking and the ObjectMonitor owner +- // is anonymous and the current thread owns the +- // object lock, then we make the current thread the +- // ObjectMonitor owner and remove the lock from the +- // current thread's lock stack. ++ // is anonymous and the inflating_thread owns the ++ // object lock, then we make the inflating_thread ++ // the ObjectMonitor owner and remove the lock from ++ // the inflating_thread's lock stack. + // * fast-locked - Coerce it to inflated from fast-locked. + // * stack-locked - Coerce it to inflated from stack-locked. + // * INFLATING - Busy wait for conversion from stack-locked to +@@ -1345,9 +1448,11 @@ ObjectMonitor* ObjectSynchronizer::inflate(Thread* current, oop object, + ObjectMonitor* inf = mark.monitor(); + markWord dmw = inf->header(); + assert(dmw.is_neutral(), "invariant: header=" INTPTR_FORMAT, dmw.value()); +- if (LockingMode == LM_LIGHTWEIGHT && inf->is_owner_anonymous() && is_lock_owned(current, object)) { +- inf->set_owner_from_anonymous(current); +- JavaThread::cast(current)->lock_stack().remove(object); ++ if (LockingMode == LM_LIGHTWEIGHT && inf->is_owner_anonymous() && ++ inflating_thread != nullptr && inflating_thread->lock_stack().contains(object)) { ++ inf->set_owner_from_anonymous(inflating_thread); ++ size_t removed = inflating_thread->lock_stack().remove(object); ++ inf->set_recursions(removed - 1); + } + return inf; + } +@@ -1367,12 +1472,12 @@ ObjectMonitor* ObjectSynchronizer::inflate(Thread* current, oop object, + } + + // CASE: fast-locked +- // Could be fast-locked either by current or by some other thread. ++ // Could be fast-locked either by the inflating_thread or by some other thread. + // + // Note that we allocate the ObjectMonitor speculatively, _before_ + // attempting to set the object's mark to the new ObjectMonitor. If +- // this thread owns the monitor, then we set the ObjectMonitor's +- // owner to this thread. Otherwise, we set the ObjectMonitor's owner ++ // the inflating_thread owns the monitor, then we set the ObjectMonitor's ++ // owner to the inflating_thread. Otherwise, we set the ObjectMonitor's owner + // to anonymous. If we lose the race to set the object's mark to the + // new ObjectMonitor, then we just delete it and loop around again. + // +@@ -1380,10 +1485,10 @@ ObjectMonitor* ObjectSynchronizer::inflate(Thread* current, oop object, + if (LockingMode == LM_LIGHTWEIGHT && mark.is_fast_locked()) { + ObjectMonitor* monitor = new ObjectMonitor(object); + monitor->set_header(mark.set_unlocked()); +- bool own = is_lock_owned(current, object); ++ bool own = inflating_thread != nullptr && inflating_thread->lock_stack().contains(object); + if (own) { +- // Owned by us. +- monitor->set_owner_from(nullptr, current); ++ // Owned by inflating_thread. ++ monitor->set_owner_from(nullptr, inflating_thread); + } else { + // Owned by somebody else. + monitor->set_owner_anonymous(); +@@ -1393,7 +1498,8 @@ ObjectMonitor* ObjectSynchronizer::inflate(Thread* current, oop object, + if (old_mark == mark) { + // Success! Return inflated monitor. + if (own) { +- JavaThread::cast(current)->lock_stack().remove(object); ++ size_t removed = inflating_thread->lock_stack().remove(object); ++ monitor->set_recursions(removed - 1); + } + // Once the ObjectMonitor is configured and object is associated + // with the ObjectMonitor, it is safe to allow async deflation: +@@ -1403,7 +1509,7 @@ ObjectMonitor* ObjectSynchronizer::inflate(Thread* current, oop object, + // cache lines to avoid false sharing on MP systems ... + OM_PERFDATA_OP(Inflations, inc()); + if (log_is_enabled(Trace, monitorinflation)) { +- ResourceMark rm(current); ++ ResourceMark rm; + lsh.print_cr("inflate(has_locker): object=" INTPTR_FORMAT ", mark=" + INTPTR_FORMAT ", type='%s'", p2i(object), + object->mark().value(), object->klass()->external_name()); +@@ -1502,7 +1608,7 @@ ObjectMonitor* ObjectSynchronizer::inflate(Thread* current, oop object, + // to avoid false sharing on MP systems ... + OM_PERFDATA_OP(Inflations, inc()); + if (log_is_enabled(Trace, monitorinflation)) { +- ResourceMark rm(current); ++ ResourceMark rm; + lsh.print_cr("inflate(has_locker): object=" INTPTR_FORMAT ", mark=" + INTPTR_FORMAT ", type='%s'", p2i(object), + object->mark().value(), object->klass()->external_name()); +@@ -1546,7 +1652,7 @@ ObjectMonitor* ObjectSynchronizer::inflate(Thread* current, oop object, + // cache lines to avoid false sharing on MP systems ... + OM_PERFDATA_OP(Inflations, inc()); + if (log_is_enabled(Trace, monitorinflation)) { +- ResourceMark rm(current); ++ ResourceMark rm; + lsh.print_cr("inflate(neutral): object=" INTPTR_FORMAT ", mark=" + INTPTR_FORMAT ", type='%s'", p2i(object), + object->mark().value(), object->klass()->external_name()); +diff --git a/src/hotspot/share/runtime/synchronizer.hpp b/src/hotspot/share/runtime/synchronizer.hpp +index e983aeb9d..f1a14e362 100644 +--- a/src/hotspot/share/runtime/synchronizer.hpp ++++ b/src/hotspot/share/runtime/synchronizer.hpp +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 1998, 2023, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1998, 2024, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -92,7 +92,18 @@ class ObjectSynchronizer : AllStatic { + // This is the "slow path" version of monitor enter and exit. + static void enter(Handle obj, BasicLock* lock, JavaThread* current); + static void exit(oop obj, BasicLock* lock, JavaThread* current); ++ // Used to enter a monitor for another thread. This requires that the ++ // locking_thread is suspended, and that entering on a potential ++ // inflated monitor may only contend with deflation. That is the obj being ++ // locked on is either already locked by the locking_thread or cannot ++ // escape the locking_thread. ++ static void enter_for(Handle obj, BasicLock* lock, JavaThread* locking_thread); ++private: ++ // Shared implementation for enter and enter_for. Performs all but ++ // inflated monitor enter. ++ static bool enter_fast_impl(Handle obj, BasicLock* lock, JavaThread* locking_thread); + ++public: + // Used only to handle jni locks or other unmatched monitor enter/exit + // Internally they will use heavy weight monitor. + static void jni_enter(Handle obj, JavaThread* current); +@@ -113,6 +124,14 @@ class ObjectSynchronizer : AllStatic { + + // Inflate light weight monitor to heavy weight monitor + static ObjectMonitor* inflate(Thread* current, oop obj, const InflateCause cause); ++ // Used to inflate a monitor as if it was done from the thread JavaThread. ++ static ObjectMonitor* inflate_for(JavaThread* thread, oop obj, const InflateCause cause); ++ ++private: ++ // Shared implementation between the different LockingMode. ++ static ObjectMonitor* inflate_impl(JavaThread* thread, oop obj, const InflateCause cause); ++ ++public: + // This version is only for internal use + static void inflate_helper(oop obj); + static const char* inflate_cause_name(const InflateCause cause); +@@ -193,7 +212,7 @@ class ObjectSynchronizer : AllStatic { + static size_t get_gvars_size(); + static u_char* get_gvars_stw_random_addr(); + +- static void handle_sync_on_value_based_class(Handle obj, JavaThread* current); ++ static void handle_sync_on_value_based_class(Handle obj, JavaThread* locking_thread); + }; + + // ObjectLocker enforces balanced locking and can never throw an +diff --git a/test/hotspot/gtest/runtime/test_lockStack.cpp b/test/hotspot/gtest/runtime/test_lockStack.cpp +new file mode 100644 +index 000000000..43e8959ed +--- /dev/null ++++ b/test/hotspot/gtest/runtime/test_lockStack.cpp +@@ -0,0 +1,427 @@ ++/* ++ * Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++ ++#include "precompiled.hpp" ++#include "runtime/interfaceSupport.inline.hpp" ++#include "runtime/lockStack.inline.hpp" ++#include "runtime/os.hpp" ++#include "unittest.hpp" ++#include "utilities/globalDefinitions.hpp" ++ ++class LockStackTest : public ::testing::Test { ++public: ++ static void push_raw(LockStack& ls, oop obj) { ++ ls._base[ls.to_index(ls._top)] = obj; ++ ls._top += oopSize; ++ } ++ ++ static void pop_raw(LockStack& ls) { ++ ls._top -= oopSize; ++#ifdef ASSERT ++ ls._base[ls.to_index(ls._top)] = nullptr; ++#endif ++ } ++ ++ static oop at(LockStack& ls, int index) { ++ return ls._base[index]; ++ } ++ ++ static size_t size(LockStack& ls) { ++ return ls.to_index(ls._top); ++ } ++}; ++ ++#define recursive_enter(ls, obj) \ ++ do { \ ++ bool ret = ls.try_recursive_enter(obj); \ ++ EXPECT_TRUE(ret); \ ++ } while (false) ++ ++#define recursive_exit(ls, obj) \ ++ do { \ ++ bool ret = ls.try_recursive_exit(obj); \ ++ EXPECT_TRUE(ret); \ ++ } while (false) ++ ++TEST_VM_F(LockStackTest, is_recursive) { ++ if (LockingMode != LM_LIGHTWEIGHT || !VM_Version::supports_recursive_lightweight_locking()) { ++ return; ++ } ++ ++ JavaThread* THREAD = JavaThread::current(); ++ // the thread should be in vm to use locks ++ ThreadInVMfromNative ThreadInVMfromNative(THREAD); ++ ++ LockStack& ls = THREAD->lock_stack(); ++ ++ EXPECT_TRUE(ls.is_empty()); ++ ++ oop obj0 = Universe::int_mirror(); ++ oop obj1 = Universe::float_mirror(); ++ ++ push_raw(ls, obj0); ++ ++ // 0 ++ EXPECT_FALSE(ls.is_recursive(obj0)); ++ ++ push_raw(ls, obj1); ++ ++ // 0, 1 ++ EXPECT_FALSE(ls.is_recursive(obj0)); ++ EXPECT_FALSE(ls.is_recursive(obj1)); ++ ++ push_raw(ls, obj1); ++ ++ // 0, 1, 1 ++ EXPECT_FALSE(ls.is_recursive(obj0)); ++ EXPECT_TRUE(ls.is_recursive(obj1)); ++ ++ pop_raw(ls); ++ pop_raw(ls); ++ push_raw(ls, obj0); ++ ++ // 0, 0 ++ EXPECT_TRUE(ls.is_recursive(obj0)); ++ ++ push_raw(ls, obj0); ++ ++ // 0, 0, 0 ++ EXPECT_TRUE(ls.is_recursive(obj0)); ++ ++ pop_raw(ls); ++ push_raw(ls, obj1); ++ ++ // 0, 0, 1 ++ EXPECT_TRUE(ls.is_recursive(obj0)); ++ EXPECT_FALSE(ls.is_recursive(obj1)); ++ ++ push_raw(ls, obj1); ++ ++ // 0, 0, 1, 1 ++ EXPECT_TRUE(ls.is_recursive(obj0)); ++ EXPECT_TRUE(ls.is_recursive(obj1)); ++ ++ // Clear stack ++ pop_raw(ls); ++ pop_raw(ls); ++ pop_raw(ls); ++ pop_raw(ls); ++ ++ EXPECT_TRUE(ls.is_empty()); ++} ++ ++TEST_VM_F(LockStackTest, try_recursive_enter) { ++ if (LockingMode != LM_LIGHTWEIGHT || !VM_Version::supports_recursive_lightweight_locking()) { ++ return; ++ } ++ ++ JavaThread* THREAD = JavaThread::current(); ++ // the thread should be in vm to use locks ++ ThreadInVMfromNative ThreadInVMfromNative(THREAD); ++ ++ LockStack& ls = THREAD->lock_stack(); ++ ++ EXPECT_TRUE(ls.is_empty()); ++ ++ oop obj0 = Universe::int_mirror(); ++ oop obj1 = Universe::float_mirror(); ++ ++ ls.push(obj0); ++ ++ // 0 ++ EXPECT_FALSE(ls.is_recursive(obj0)); ++ ++ ls.push(obj1); ++ ++ // 0, 1 ++ EXPECT_FALSE(ls.is_recursive(obj0)); ++ EXPECT_FALSE(ls.is_recursive(obj1)); ++ ++ recursive_enter(ls, obj1); ++ ++ // 0, 1, 1 ++ EXPECT_FALSE(ls.is_recursive(obj0)); ++ EXPECT_TRUE(ls.is_recursive(obj1)); ++ ++ recursive_exit(ls, obj1); ++ pop_raw(ls); ++ recursive_enter(ls, obj0); ++ ++ // 0, 0 ++ EXPECT_TRUE(ls.is_recursive(obj0)); ++ ++ recursive_enter(ls, obj0); ++ ++ // 0, 0, 0 ++ EXPECT_TRUE(ls.is_recursive(obj0)); ++ ++ recursive_exit(ls, obj0); ++ push_raw(ls, obj1); ++ ++ // 0, 0, 1 ++ EXPECT_TRUE(ls.is_recursive(obj0)); ++ EXPECT_FALSE(ls.is_recursive(obj1)); ++ ++ recursive_enter(ls, obj1); ++ ++ // 0, 0, 1, 1 ++ EXPECT_TRUE(ls.is_recursive(obj0)); ++ EXPECT_TRUE(ls.is_recursive(obj1)); ++ ++ // Clear stack ++ pop_raw(ls); ++ pop_raw(ls); ++ pop_raw(ls); ++ pop_raw(ls); ++ ++ EXPECT_TRUE(ls.is_empty()); ++} ++ ++TEST_VM_F(LockStackTest, contains) { ++ if (LockingMode != LM_LIGHTWEIGHT) { ++ return; ++ } ++ ++ const bool test_recursive = VM_Version::supports_recursive_lightweight_locking(); ++ ++ JavaThread* THREAD = JavaThread::current(); ++ // the thread should be in vm to use locks ++ ThreadInVMfromNative ThreadInVMfromNative(THREAD); ++ ++ LockStack& ls = THREAD->lock_stack(); ++ ++ EXPECT_TRUE(ls.is_empty()); ++ ++ oop obj0 = Universe::int_mirror(); ++ oop obj1 = Universe::float_mirror(); ++ ++ EXPECT_FALSE(ls.contains(obj0)); ++ ++ ls.push(obj0); ++ ++ // 0 ++ EXPECT_TRUE(ls.contains(obj0)); ++ EXPECT_FALSE(ls.contains(obj1)); ++ ++ if (test_recursive) { ++ push_raw(ls, obj0); ++ ++ // 0, 0 ++ EXPECT_TRUE(ls.contains(obj0)); ++ EXPECT_FALSE(ls.contains(obj1)); ++ } ++ ++ push_raw(ls, obj1); ++ ++ // 0, 0, 1 ++ EXPECT_TRUE(ls.contains(obj0)); ++ EXPECT_TRUE(ls.contains(obj1)); ++ ++ if (test_recursive) { ++ push_raw(ls, obj1); ++ ++ // 0, 0, 1, 1 ++ EXPECT_TRUE(ls.contains(obj0)); ++ EXPECT_TRUE(ls.contains(obj1)); ++ } ++ ++ pop_raw(ls); ++ if (test_recursive) { ++ pop_raw(ls); ++ pop_raw(ls); ++ } ++ push_raw(ls, obj1); ++ ++ // 0, 1 ++ EXPECT_TRUE(ls.contains(obj0)); ++ EXPECT_TRUE(ls.contains(obj1)); ++ ++ // Clear stack ++ pop_raw(ls); ++ pop_raw(ls); ++ ++ EXPECT_TRUE(ls.is_empty()); ++} ++ ++TEST_VM_F(LockStackTest, remove) { ++ if (LockingMode != LM_LIGHTWEIGHT) { ++ return; ++ } ++ ++ const bool test_recursive = VM_Version::supports_recursive_lightweight_locking(); ++ ++ JavaThread* THREAD = JavaThread::current(); ++ // the thread should be in vm to use locks ++ ThreadInVMfromNative ThreadInVMfromNative(THREAD); ++ ++ LockStack& ls = THREAD->lock_stack(); ++ ++ EXPECT_TRUE(ls.is_empty()); ++ ++ oop obj0 = Universe::int_mirror(); ++ oop obj1 = Universe::float_mirror(); ++ oop obj2 = Universe::short_mirror(); ++ oop obj3 = Universe::long_mirror(); ++ ++ push_raw(ls, obj0); ++ ++ // 0 ++ { ++ size_t removed = ls.remove(obj0); ++ EXPECT_EQ(removed, 1u); ++ EXPECT_FALSE(ls.contains(obj0)); ++ } ++ ++ if (test_recursive) { ++ push_raw(ls, obj0); ++ push_raw(ls, obj0); ++ ++ // 0, 0 ++ { ++ size_t removed = ls.remove(obj0); ++ EXPECT_EQ(removed, 2u); ++ EXPECT_FALSE(ls.contains(obj0)); ++ } ++ } ++ ++ push_raw(ls, obj0); ++ push_raw(ls, obj1); ++ ++ // 0, 1 ++ { ++ size_t removed = ls.remove(obj0); ++ EXPECT_EQ(removed, 1u); ++ EXPECT_FALSE(ls.contains(obj0)); ++ EXPECT_TRUE(ls.contains(obj1)); ++ ++ ls.remove(obj1); ++ EXPECT_TRUE(ls.is_empty()); ++ } ++ ++ push_raw(ls, obj0); ++ push_raw(ls, obj1); ++ ++ // 0, 1 ++ { ++ size_t removed = ls.remove(obj1); ++ EXPECT_EQ(removed, 1u); ++ EXPECT_FALSE(ls.contains(obj1)); ++ EXPECT_TRUE(ls.contains(obj0)); ++ ++ ls.remove(obj0); ++ EXPECT_TRUE(ls.is_empty()); ++ } ++ ++ if (test_recursive) { ++ push_raw(ls, obj0); ++ push_raw(ls, obj0); ++ push_raw(ls, obj1); ++ ++ // 0, 0, 1 ++ { ++ size_t removed = ls.remove(obj0); ++ EXPECT_EQ(removed, 2u); ++ EXPECT_FALSE(ls.contains(obj0)); ++ EXPECT_TRUE(ls.contains(obj1)); ++ ++ ls.remove(obj1); ++ EXPECT_TRUE(ls.is_empty()); ++ } ++ ++ push_raw(ls, obj0); ++ push_raw(ls, obj1); ++ push_raw(ls, obj1); ++ ++ // 0, 1, 1 ++ { ++ size_t removed = ls.remove(obj1); ++ EXPECT_EQ(removed, 2u); ++ EXPECT_FALSE(ls.contains(obj1)); ++ EXPECT_TRUE(ls.contains(obj0)); ++ ++ ls.remove(obj0); ++ EXPECT_TRUE(ls.is_empty()); ++ } ++ ++ push_raw(ls, obj0); ++ push_raw(ls, obj1); ++ push_raw(ls, obj1); ++ push_raw(ls, obj2); ++ push_raw(ls, obj2); ++ push_raw(ls, obj2); ++ push_raw(ls, obj2); ++ push_raw(ls, obj3); ++ ++ // 0, 1, 1, 2, 2, 2, 2, 3 ++ { ++ EXPECT_EQ(size(ls), 8u); ++ ++ size_t removed = ls.remove(obj1); ++ EXPECT_EQ(removed, 2u); ++ ++ EXPECT_TRUE(ls.contains(obj0)); ++ EXPECT_FALSE(ls.contains(obj1)); ++ EXPECT_TRUE(ls.contains(obj2)); ++ EXPECT_TRUE(ls.contains(obj3)); ++ ++ EXPECT_EQ(at(ls, 0), obj0); ++ EXPECT_EQ(at(ls, 1), obj2); ++ EXPECT_EQ(at(ls, 2), obj2); ++ EXPECT_EQ(at(ls, 3), obj2); ++ EXPECT_EQ(at(ls, 4), obj2); ++ EXPECT_EQ(at(ls, 5), obj3); ++ EXPECT_EQ(size(ls), 6u); ++ ++ removed = ls.remove(obj2); ++ EXPECT_EQ(removed, 4u); ++ ++ EXPECT_TRUE(ls.contains(obj0)); ++ EXPECT_FALSE(ls.contains(obj1)); ++ EXPECT_FALSE(ls.contains(obj2)); ++ EXPECT_TRUE(ls.contains(obj3)); ++ ++ EXPECT_EQ(at(ls, 0), obj0); ++ EXPECT_EQ(at(ls, 1), obj3); ++ EXPECT_EQ(size(ls), 2u); ++ ++ removed = ls.remove(obj0); ++ EXPECT_EQ(removed, 1u); ++ ++ EXPECT_FALSE(ls.contains(obj0)); ++ EXPECT_FALSE(ls.contains(obj1)); ++ EXPECT_FALSE(ls.contains(obj2)); ++ EXPECT_TRUE(ls.contains(obj3)); ++ ++ EXPECT_EQ(at(ls, 0), obj3); ++ EXPECT_EQ(size(ls), 1u); ++ ++ removed = ls.remove(obj3); ++ EXPECT_EQ(removed, 1u); ++ ++ EXPECT_TRUE(ls.is_empty()); ++ EXPECT_EQ(size(ls), 0u); ++ } ++ } ++ ++ EXPECT_TRUE(ls.is_empty()); ++} +diff --git a/test/hotspot/jtreg/TEST.groups b/test/hotspot/jtreg/TEST.groups +index 6fb2e2b0b..ff2b0cf00 100644 +--- a/test/hotspot/jtreg/TEST.groups ++++ b/test/hotspot/jtreg/TEST.groups +@@ -1,5 +1,5 @@ + # +-# Copyright (c) 2013, 2023, Oracle and/or its affiliates. All rights reserved. ++# Copyright (c) 2013, 2024, Oracle and/or its affiliates. All rights reserved. + # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + # + # This code is free software; you can redistribute it and/or modify it +@@ -149,6 +149,7 @@ serviceability_ttf_virtual = \ + tier1_common = \ + sanity/BasicVMTest.java \ + gtest/GTestWrapper.java \ ++ gtest/LockStackGtests.java \ + gtest/MetaspaceGtests.java \ + gtest/LargePageGtests.java \ + gtest/NMTGtests.java \ +diff --git a/test/hotspot/jtreg/gtest/LockStackGtests.java b/test/hotspot/jtreg/gtest/LockStackGtests.java +new file mode 100644 +index 000000000..e426b2c56 +--- /dev/null ++++ b/test/hotspot/jtreg/gtest/LockStackGtests.java +@@ -0,0 +1,32 @@ ++/* ++ * Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++/* @test ++ * @summary Run LockStack gtests with LockingMode=2 ++ * @library /test/lib ++ * @modules java.base/jdk.internal.misc ++ * java.xml ++ * @requires vm.flagless ++ * @run main/native GTestWrapper --gtest_filter=LockStackTest* -XX:LockingMode=2 ++ */ +diff --git a/test/hotspot/jtreg/runtime/lockStack/TestLockStackCapacity.java b/test/hotspot/jtreg/runtime/lockStack/TestLockStackCapacity.java +new file mode 100644 +index 000000000..01ba1f4f1 +--- /dev/null ++++ b/test/hotspot/jtreg/runtime/lockStack/TestLockStackCapacity.java +@@ -0,0 +1,108 @@ ++/* ++ * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++/* ++ * @test TestLockStackCapacity ++ * @summary Tests the interaction between recursive lightweight locking and ++ * when the lock stack capacity is exceeded. ++ * @requires vm.flagless ++ * @library /testlibrary /test/lib ++ * @build jdk.test.whitebox.WhiteBox ++ * @run driver jdk.test.lib.helpers.ClassFileInstaller jdk.test.whitebox.WhiteBox ++ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -Xint -XX:LockingMode=2 TestLockStackCapacity ++ */ ++ ++import jdk.test.lib.Asserts; ++import jdk.test.whitebox.WhiteBox; ++import jtreg.SkippedException; ++ ++public class TestLockStackCapacity { ++ static final WhiteBox WB = WhiteBox.getWhiteBox(); ++ static final int LockingMode = WB.getIntVMFlag("LockingMode").intValue(); ++ static final int LM_LIGHTWEIGHT = 2; ++ ++ static class SynchronizedObject { ++ static final SynchronizedObject OUTER = new SynchronizedObject(); ++ static final SynchronizedObject INNER = new SynchronizedObject(); ++ static final int LockStackCapacity = WB.getLockStackCapacity(); ++ ++ synchronized void runInner(int depth) { ++ assertNotInflated(); ++ if (depth == 1) { ++ return; ++ } else { ++ runInner(depth - 1); ++ } ++ assertNotInflated(); ++ } ++ ++ synchronized void runOuter(int depth, SynchronizedObject inner) { ++ assertNotInflated(); ++ if (depth == 1) { ++ inner.runInner(LockStackCapacity); ++ } else { ++ runOuter(depth - 1, inner); ++ } ++ assertInflated(); ++ } ++ ++ public static void runTest() { ++ // Test Requires a capacity of at least 2. ++ Asserts.assertGTE(LockStackCapacity, 2); ++ ++ // Just checking ++ OUTER.assertNotInflated(); ++ INNER.assertNotInflated(); ++ ++ synchronized(OUTER) { ++ OUTER.assertNotInflated(); ++ INNER.assertNotInflated(); ++ OUTER.runOuter(LockStackCapacity - 1, INNER); ++ ++ OUTER.assertInflated(); ++ INNER.assertNotInflated(); ++ } ++ } ++ ++ void assertNotInflated() { ++ Asserts.assertFalse(WB.isMonitorInflated(this)); ++ } ++ ++ void assertInflated() { ++ Asserts.assertTrue(WB.isMonitorInflated(this)); ++ } ++ } ++ ++ public static void main(String... args) throws Exception { ++ if (LockingMode != LM_LIGHTWEIGHT) { ++ throw new SkippedException("Test only valid for LM_LIGHTWEIGHT"); ++ } ++ ++ if (!WB.supportsRecursiveLightweightLocking()) { ++ throw new SkippedException("Test only valid if LM_LIGHTWEIGHT supports recursion"); ++ } ++ ++ SynchronizedObject.runTest(); ++ } ++} +diff --git a/test/jdk/com/sun/jdi/EATests.java b/test/jdk/com/sun/jdi/EATests.java +index 8f0a8fabd..70adc9d7f 100644 +--- a/test/jdk/com/sun/jdi/EATests.java ++++ b/test/jdk/com/sun/jdi/EATests.java +@@ -120,7 +120,46 @@ + * -XX:-DoEscapeAnalysis -XX:-EliminateAllocations -XX:+EliminateLocks -XX:+EliminateNestedLocks + * -XX:+IgnoreUnrecognizedVMOptions -XX:+DeoptimizeObjectsALot + * ++ * @bug 8324881 ++ * @comment Regression test for using the wrong thread when logging during re-locking from deoptimization. ++ * ++ * @comment DiagnoseSyncOnValueBasedClasses=2 will cause logging when locking on \@ValueBased objects. ++ * @run driver EATests ++ * -XX:+UnlockDiagnosticVMOptions ++ * -Xms256m -Xmx256m ++ * -Xbootclasspath/a:. ++ * -XX:CompileCommand=dontinline,*::dontinline_* ++ * -XX:+WhiteBoxAPI ++ * -Xbatch ++ * -XX:+DoEscapeAnalysis -XX:+EliminateAllocations -XX:+EliminateLocks -XX:+EliminateNestedLocks ++ * -XX:LockingMode=1 ++ * -XX:DiagnoseSyncOnValueBasedClasses=2 ++ * ++ * @comment Re-lock may inflate monitors when re-locking, which cause monitorinflation trace logging. ++ * @run driver EATests ++ * -XX:+UnlockDiagnosticVMOptions ++ * -Xms256m -Xmx256m ++ * -Xbootclasspath/a:. ++ * -XX:CompileCommand=dontinline,*::dontinline_* ++ * -XX:+WhiteBoxAPI ++ * -Xbatch ++ * -XX:+DoEscapeAnalysis -XX:+EliminateAllocations -XX:+EliminateLocks -XX:+EliminateNestedLocks ++ * -XX:LockingMode=2 ++ * -Xlog:monitorinflation=trace:file=monitorinflation.log ++ * ++ * @comment Re-lock may race with deflation. ++ * @run driver EATests ++ * -XX:+UnlockDiagnosticVMOptions ++ * -Xms256m -Xmx256m ++ * -Xbootclasspath/a:. ++ * -XX:CompileCommand=dontinline,*::dontinline_* ++ * -XX:+WhiteBoxAPI ++ * -Xbatch ++ * -XX:+DoEscapeAnalysis -XX:+EliminateAllocations -XX:+EliminateLocks -XX:+EliminateNestedLocks ++ * -XX:LockingMode=0 ++ * -XX:GuaranteedAsyncDeflationInterval=1000 + */ ++ + /** + * @test + * @bug 8227745 +@@ -254,12 +293,14 @@ class EATestsTarget { + new EARelockingRecursiveTarget() .run(); + new EARelockingNestedInflatedTarget() .run(); + new EARelockingNestedInflated_02Target() .run(); ++ new EARelockingNestedInflated_03Target() .run(); + new EARelockingArgEscapeLWLockedInCalleeFrameTarget() .run(); + new EARelockingArgEscapeLWLockedInCalleeFrame_2Target() .run(); + new EARelockingArgEscapeLWLockedInCalleeFrameNoRecursiveTarget() .run(); + new EAGetOwnedMonitorsTarget() .run(); + new EAEntryCountTarget() .run(); + new EARelockingObjectCurrentlyWaitingOnTarget() .run(); ++ new EARelockingValueBasedTarget() .run(); + + // Test cases that require deoptimization even though neither + // locks nor allocations are eliminated at the point where +@@ -374,12 +415,14 @@ public class EATests extends TestScaffold { + new EARelockingRecursive() .run(this); + new EARelockingNestedInflated() .run(this); + new EARelockingNestedInflated_02() .run(this); ++ new EARelockingNestedInflated_03() .run(this); + new EARelockingArgEscapeLWLockedInCalleeFrame() .run(this); + new EARelockingArgEscapeLWLockedInCalleeFrame_2() .run(this); + new EARelockingArgEscapeLWLockedInCalleeFrameNoRecursive() .run(this); + new EAGetOwnedMonitors() .run(this); + new EAEntryCount() .run(this); + new EARelockingObjectCurrentlyWaitingOn() .run(this); ++ new EARelockingValueBased() .run(this); + + // Test cases that require deoptimization even though neither + // locks nor allocations are eliminated at the point where +@@ -2013,6 +2056,94 @@ class EARelockingNestedInflated_02Target extends EATestCaseBaseTarget { + + ///////////////////////////////////////////////////////////////////////////// + ++/** ++ * Like {@link EARelockingNestedInflated_02} with the difference that the ++ * inflation of the lock happens because of contention. ++ */ ++class EARelockingNestedInflated_03 extends EATestCaseBaseDebugger { ++ ++ public void runTestCase() throws Exception { ++ BreakpointEvent bpe = resumeTo(TARGET_TESTCASE_BASE_NAME, "dontinline_brkpt", "()V"); ++ printStack(bpe.thread()); ++ @SuppressWarnings("unused") ++ ObjectReference o = getLocalRef(bpe.thread().frame(2), XYVAL_NAME, "l1"); ++ } ++} ++ ++class EARelockingNestedInflated_03Target extends EATestCaseBaseTarget { ++ ++ public XYVal lockInflatedByContention; ++ public boolean doLockNow; ++ public EATestCaseBaseTarget testCase; ++ ++ @Override ++ public void setUp() { ++ super.setUp(); ++ testMethodDepth = 2; ++ lockInflatedByContention = new XYVal(1, 1); ++ testCase = this; ++ } ++ ++ @Override ++ public void warmupDone() { ++ super.warmupDone(); ++ // Use new lock. lockInflatedByContention might have been inflated because of recursion. ++ lockInflatedByContention = new XYVal(1, 1); ++ // Start thread that tries to enter lockInflatedByContention while the main thread owns it -> inflation ++ DebuggeeWrapper.newThread(() -> { ++ while (true) { ++ synchronized (testCase) { ++ try { ++ if (doLockNow) { ++ doLockNow = false; // reset for main thread ++ testCase.notify(); ++ break; ++ } ++ testCase.wait(); ++ } catch (InterruptedException e) { /* ignored */ } ++ } ++ } ++ synchronized (lockInflatedByContention) { // will block and trigger inflation ++ msg(Thread.currentThread().getName() + ": acquired lockInflatedByContention"); ++ } ++ }, testCaseName + ": Lock Contender (test thread)").start(); ++ } ++ ++ public void dontinline_testMethod() { ++ @SuppressWarnings("unused") ++ XYVal xy = new XYVal(1, 1); // scalar replaced ++ XYVal l1 = lockInflatedByContention; // read by debugger ++ synchronized (l1) { ++ testMethod_inlined(l1); ++ } ++ } ++ ++ public void testMethod_inlined(XYVal l2) { ++ synchronized (l2) { // eliminated nested locking ++ dontinline_notifyOtherThread(); ++ dontinline_brkpt(); ++ } ++ } ++ ++ public void dontinline_notifyOtherThread() { ++ if (!warmupDone) { ++ return; ++ } ++ synchronized (testCase) { ++ doLockNow = true; ++ testCase.notify(); ++ // wait for other thread to reset doLockNow again ++ while (doLockNow) { ++ try { ++ testCase.wait(); ++ } catch (InterruptedException e) { /* ignored */ } ++ } ++ } ++ } ++} ++ ++///////////////////////////////////////////////////////////////////////////// ++ + /** + * Checks if an eliminated lock of an ArgEscape object l1 can be relocked if + * l1 is locked in a callee frame. +@@ -2228,6 +2359,32 @@ class EARelockingObjectCurrentlyWaitingOnTarget extends EATestCaseBaseTarget { + } + } + ++ ++///////////////////////////////////////////////////////////////////////////// ++ ++/** ++ * Test relocking eliminated @ValueBased object. ++ */ ++class EARelockingValueBased extends EATestCaseBaseDebugger { ++ ++ public void runTestCase() throws Exception { ++ BreakpointEvent bpe = resumeTo(TARGET_TESTCASE_BASE_NAME, "dontinline_brkpt", "()V"); ++ printStack(bpe.thread()); ++ @SuppressWarnings("unused") ++ ObjectReference o = getLocalRef(bpe.thread().frame(1), Integer.class.getName(), "l1"); ++ } ++} ++ ++class EARelockingValueBasedTarget extends EATestCaseBaseTarget { ++ ++ public void dontinline_testMethod() { ++ Integer l1 = new Integer(255); ++ synchronized (l1) { ++ dontinline_brkpt(); ++ } ++ } ++} ++ + ///////////////////////////////////////////////////////////////////////////// + // + // Test cases that require deoptimization even though neither locks +diff --git a/test/lib/jdk/test/whitebox/WhiteBox.java b/test/lib/jdk/test/whitebox/WhiteBox.java +index b0e2530f7..9d905b684 100644 +--- a/test/lib/jdk/test/whitebox/WhiteBox.java ++++ b/test/lib/jdk/test/whitebox/WhiteBox.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2012, 2023, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2012, 2024, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -119,6 +119,10 @@ public class WhiteBox { + return isMonitorInflated0(obj); + } + ++ public native int getLockStackCapacity(); ++ ++ public native boolean supportsRecursiveLightweightLocking(); ++ + public native void forceSafepoint(); + + public native void forceClassLoaderStatsSafepoint(); diff --git a/Backport-JDK-8345351-8356159-RISC-V-Add-Zabha.patch b/Backport-JDK-8345351-8356159-RISC-V-Add-Zabha.patch new file mode 100644 index 0000000000000000000000000000000000000000..0520205d9205245d68b51a3c5e21164bdf65e7c3 --- /dev/null +++ b/Backport-JDK-8345351-8356159-RISC-V-Add-Zabha.patch @@ -0,0 +1,1601 @@ +diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp +index 522550a07..4c167073a 100644 +--- a/src/hotspot/cpu/riscv/assembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp +@@ -818,81 +818,239 @@ public: + + #undef INSN + +-enum Aqrl {relaxed = 0b00, rl = 0b01, aq = 0b10, aqrl = 0b11}; +- +-#define INSN(NAME, op, funct3, funct7) \ +- void NAME(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { \ +- unsigned insn = 0; \ +- patch((address)&insn, 6, 0, op); \ +- patch((address)&insn, 14, 12, funct3); \ +- patch_reg((address)&insn, 7, Rd); \ +- patch_reg((address)&insn, 15, Rs1); \ +- patch_reg((address)&insn, 20, Rs2); \ +- patch((address)&insn, 31, 27, funct7); \ +- patch((address)&insn, 26, 25, memory_order); \ +- emit(insn); \ +- } +- +- INSN(amoswap_w, 0b0101111, 0b010, 0b00001); +- INSN(amoadd_w, 0b0101111, 0b010, 0b00000); +- INSN(amoxor_w, 0b0101111, 0b010, 0b00100); +- INSN(amoand_w, 0b0101111, 0b010, 0b01100); +- INSN(amoor_w, 0b0101111, 0b010, 0b01000); +- INSN(amomin_w, 0b0101111, 0b010, 0b10000); +- INSN(amomax_w, 0b0101111, 0b010, 0b10100); +- INSN(amominu_w, 0b0101111, 0b010, 0b11000); +- INSN(amomaxu_w, 0b0101111, 0b010, 0b11100); +- INSN(amoswap_d, 0b0101111, 0b011, 0b00001); +- INSN(amoadd_d, 0b0101111, 0b011, 0b00000); +- INSN(amoxor_d, 0b0101111, 0b011, 0b00100); +- INSN(amoand_d, 0b0101111, 0b011, 0b01100); +- INSN(amoor_d, 0b0101111, 0b011, 0b01000); +- INSN(amomin_d, 0b0101111, 0b011, 0b10000); +- INSN(amomax_d , 0b0101111, 0b011, 0b10100); +- INSN(amominu_d, 0b0101111, 0b011, 0b11000); +- INSN(amomaxu_d, 0b0101111, 0b011, 0b11100); +- INSN(amocas_w, 0b0101111, 0b010, 0b00101); +- INSN(amocas_d, 0b0101111, 0b011, 0b00101); +-#undef INSN +- +-enum operand_size { int8, int16, int32, uint32, int64 }; +- +-#define INSN(NAME, op, funct3, funct7) \ +- void NAME(Register Rd, Register Rs1, Aqrl memory_order = relaxed) { \ +- unsigned insn = 0; \ +- uint32_t val = memory_order & 0x3; \ +- patch((address)&insn, 6, 0, op); \ +- patch((address)&insn, 14, 12, funct3); \ +- patch_reg((address)&insn, 7, Rd); \ +- patch_reg((address)&insn, 15, Rs1); \ +- patch((address)&insn, 25, 20, 0b00000); \ +- patch((address)&insn, 31, 27, funct7); \ +- patch((address)&insn, 26, 25, val); \ +- emit(insn); \ +- } +- +- INSN(lr_w, 0b0101111, 0b010, 0b00010); +- INSN(lr_d, 0b0101111, 0b011, 0b00010); +- +-#undef INSN +- +-#define INSN(NAME, op, funct3, funct7) \ +- void NAME(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = relaxed) { \ +- unsigned insn = 0; \ +- uint32_t val = memory_order & 0x3; \ +- patch((address)&insn, 6, 0, op); \ +- patch((address)&insn, 14, 12, funct3); \ +- patch_reg((address)&insn, 7, Rd); \ +- patch_reg((address)&insn, 15, Rs2); \ +- patch_reg((address)&insn, 20, Rs1); \ +- patch((address)&insn, 31, 27, funct7); \ +- patch((address)&insn, 26, 25, val); \ +- emit(insn); \ ++ enum Aqrl {relaxed = 0b00, rl = 0b01, aq = 0b10, aqrl = 0b11}; ++ ++ private: ++ ++ enum AmoWidthFunct3 : uint8_t { ++ AMO_WIDTH_BYTE = 0b000, // Zabha extension ++ AMO_WIDTH_HALFWORD = 0b001, // Zabha extension ++ AMO_WIDTH_WORD = 0b010, ++ AMO_WIDTH_DOUBLEWORD = 0b011, ++ AMO_WIDTH_QUADWORD = 0b100, ++ // 0b101 to 0b111 are reserved ++ }; ++ ++ enum AmoOperationFunct5 : uint8_t { ++ AMO_ADD = 0b00000, ++ AMO_SWAP = 0b00001, ++ AMO_LR = 0b00010, ++ AMO_SC = 0b00011, ++ AMO_XOR = 0b00100, ++ AMO_OR = 0b01000, ++ AMO_AND = 0b01100, ++ AMO_MIN = 0b10000, ++ AMO_MAX = 0b10100, ++ AMO_MINU = 0b11000, ++ AMO_MAXU = 0b11100, ++ AMO_CAS = 0b00101 // Zacas ++ }; ++ ++ static constexpr uint32_t OP_AMO_MAJOR = 0b0101111; ++ ++ template ++ void amo_base(Register Rd, Register Rs1, uint8_t Rs2, Aqrl memory_order = aqrl) { ++ assert(width > AMO_WIDTH_HALFWORD || UseZabha, "Must be"); ++ assert(funct5 != AMO_CAS || UseZacas, "Must be"); ++ unsigned insn = 0; ++ patch((address)&insn, 6, 0, OP_AMO_MAJOR); ++ patch_reg((address)&insn, 7, Rd); ++ patch((address)&insn, 14, 12, width); ++ patch_reg((address)&insn, 15, Rs1); ++ patch((address)&insn, 24, 20, Rs2); ++ patch((address)&insn, 26, 25, memory_order); ++ patch((address)&insn, 31, 27, funct5); ++ emit(insn); + } + +- INSN(sc_w, 0b0101111, 0b010, 0b00011); +- INSN(sc_d, 0b0101111, 0b011, 0b00011); +-#undef INSN ++ template ++ void amo_base(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { ++ amo_base(Rd, Rs1, Rs2->raw_encoding(), memory_order); ++ } ++ ++ public: ++ ++ void amoadd_b(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { ++ amo_base(Rd, Rs1, Rs2, memory_order); ++ } ++ ++ void amoadd_h(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { ++ amo_base(Rd, Rs1, Rs2, memory_order); ++ } ++ ++ void amoadd_w(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { ++ amo_base(Rd, Rs1, Rs2, memory_order); ++ } ++ ++ void amoadd_d(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { ++ amo_base(Rd, Rs1, Rs2, memory_order); ++ } ++ ++ void amoswap_b(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { ++ amo_base(Rd, Rs1, Rs2, memory_order); ++ } ++ ++ void amoswap_h(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { ++ amo_base(Rd, Rs1, Rs2, memory_order); ++ } ++ ++ void amoswap_w(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { ++ amo_base(Rd, Rs1, Rs2, memory_order); ++ } ++ ++ void amoswap_d(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { ++ amo_base(Rd, Rs1, Rs2, memory_order); ++ } ++ ++ void amoxor_b(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { ++ amo_base(Rd, Rs1, Rs2, memory_order); ++ } ++ ++ void amoxor_h(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { ++ amo_base(Rd, Rs1, Rs2, memory_order); ++ } ++ ++ void amoxor_w(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { ++ amo_base(Rd, Rs1, Rs2, memory_order); ++ } ++ ++ void amoxor_d(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { ++ amo_base(Rd, Rs1, Rs2, memory_order); ++ } ++ ++ void amoor_b(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { ++ amo_base(Rd, Rs1, Rs2, memory_order); ++ } ++ ++ void amoor_h(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { ++ amo_base(Rd, Rs1, Rs2, memory_order); ++ } ++ ++ void amoor_w(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { ++ amo_base(Rd, Rs1, Rs2, memory_order); ++ } ++ ++ void amoor_d(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { ++ amo_base(Rd, Rs1, Rs2, memory_order); ++ } ++ ++ void amoand_b(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { ++ amo_base(Rd, Rs1, Rs2, memory_order); ++ } ++ ++ void amoand_h(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { ++ amo_base(Rd, Rs1, Rs2, memory_order); ++ } ++ ++ void amoand_w(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { ++ amo_base(Rd, Rs1, Rs2, memory_order); ++ } ++ ++ void amoand_d(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { ++ amo_base(Rd, Rs1, Rs2, memory_order); ++ } ++ ++ void amomin_b(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { ++ amo_base(Rd, Rs1, Rs2, memory_order); ++ } ++ ++ void amomin_h(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { ++ amo_base(Rd, Rs1, Rs2, memory_order); ++ } ++ ++ void amomin_w(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { ++ amo_base(Rd, Rs1, Rs2, memory_order); ++ } ++ ++ void amomin_d(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { ++ amo_base(Rd, Rs1, Rs2, memory_order); ++ } ++ ++ void amominu_b(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { ++ amo_base(Rd, Rs1, Rs2, memory_order); ++ } ++ ++ void amominu_h(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { ++ amo_base(Rd, Rs1, Rs2, memory_order); ++ } ++ ++ void amominu_w(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { ++ amo_base(Rd, Rs1, Rs2, memory_order); ++ } ++ ++ void amominu_d(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { ++ amo_base(Rd, Rs1, Rs2, memory_order); ++ } ++ ++ void amomax_b(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { ++ amo_base(Rd, Rs1, Rs2, memory_order); ++ } ++ ++ void amomax_h(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { ++ amo_base(Rd, Rs1, Rs2, memory_order); ++ } ++ ++ void amomax_w(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { ++ amo_base(Rd, Rs1, Rs2, memory_order); ++ } ++ ++ void amomax_d(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { ++ amo_base(Rd, Rs1, Rs2, memory_order); ++ } ++ ++ void amomaxu_b(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { ++ amo_base(Rd, Rs1, Rs2, memory_order); ++ } ++ ++ void amomaxu_h(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { ++ amo_base(Rd, Rs1, Rs2, memory_order); ++ } ++ ++ void amomaxu_w(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { ++ amo_base(Rd, Rs1, Rs2, memory_order); ++ } ++ ++ void amomaxu_d(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { ++ amo_base(Rd, Rs1, Rs2, memory_order); ++ } ++ ++ protected: ++ ++ void lr_w(Register Rd, Register Rs1, Aqrl memory_order = aqrl) { ++ amo_base(Rd, Rs1, 0, memory_order); ++ } ++ ++ void lr_d(Register Rd, Register Rs1, Aqrl memory_order = aqrl) { ++ amo_base(Rd, Rs1, 0, memory_order); ++ } ++ ++ void sc_w(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { ++ amo_base(Rd, Rs1, Rs2, memory_order); ++ } ++ ++ void sc_d(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { ++ amo_base(Rd, Rs1, Rs2, memory_order); ++ } ++ ++ void amocas_b(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { ++ amo_base(Rd, Rs1, Rs2, memory_order); ++ } ++ ++ void amocas_h(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { ++ amo_base(Rd, Rs1, Rs2, memory_order); ++ } ++ ++ void amocas_w(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { ++ amo_base(Rd, Rs1, Rs2, memory_order); ++ } ++ ++ void amocas_d(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { ++ amo_base(Rd, Rs1, Rs2, memory_order); ++ } ++ ++ public: ++ ++ enum operand_size { int8, int16, int32, uint32, int64 }; + + #define INSN(NAME, op, funct5, funct7) \ + void NAME(FloatRegister Rd, FloatRegister Rs1, RoundingMode rm = rne) { \ +diff --git a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp +index d0a281442..3738f2953 100644 +--- a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp +@@ -286,7 +286,7 @@ void ZBarrierSetAssembler::store_barrier_medium(MacroAssembler* masm, + __ relocate(barrier_Relocation::spec(), [&] { + __ li16u(rtmp1, barrier_Relocation::unpatched); + }, ZBarrierRelocationFormatStoreGoodBits); +- __ cmpxchg_weak(rtmp2, zr, rtmp1, ++ __ weak_cmpxchg(rtmp2, zr, rtmp1, + Assembler::int64, + Assembler::relaxed /* acquire */, Assembler::relaxed /* release */, + rtmp3); +diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp +index 2c18805ec..824d36f0f 100644 +--- a/src/hotspot/cpu/riscv/globals_riscv.hpp ++++ b/src/hotspot/cpu/riscv/globals_riscv.hpp +@@ -107,6 +107,7 @@ define_pd_global(intx, InlineSmallCode, 1000); + product(bool, UseZbb, false, DIAGNOSTIC, "Use Zbb instructions") \ + product(bool, UseZbs, false, DIAGNOSTIC, "Use Zbs instructions") \ + product(bool, UseZacas, false, EXPERIMENTAL, "Use Zacas instructions") \ ++ product(bool, UseZabha, false, EXPERIMENTAL, "Use UseZabha instructions") \ + product(bool, UseZfa, false, EXPERIMENTAL, "Use Zfa instructions") \ + product(bool, UseZic64b, false, EXPERIMENTAL, "Use Zic64b instructions") \ + product(bool, UseZicbom, false, EXPERIMENTAL, "Use Zicbom instructions") \ +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +index 17bf4314c..e2cc6cd92 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +@@ -3284,7 +3284,7 @@ void MacroAssembler::cmpxchg_obj_header(Register oldv, Register newv, Register o + + void MacroAssembler::load_reserved(Register dst, + Register addr, +- enum operand_size size, ++ Assembler::operand_size size, + Assembler::Aqrl acquire) { + switch (size) { + case int64: +@@ -3305,15 +3305,15 @@ void MacroAssembler::load_reserved(Register dst, + void MacroAssembler::store_conditional(Register dst, + Register new_val, + Register addr, +- enum operand_size size, ++ Assembler::operand_size size, + Assembler::Aqrl release) { + switch (size) { + case int64: +- sc_d(dst, new_val, addr, release); ++ sc_d(dst, addr, new_val, release); + break; + case int32: + case uint32: +- sc_w(dst, new_val, addr, release); ++ sc_w(dst, addr, new_val, release); + break; + default: + ShouldNotReachHere(); +@@ -3322,7 +3322,7 @@ void MacroAssembler::store_conditional(Register dst, + + + void MacroAssembler::cmpxchg_narrow_value_helper(Register addr, Register expected, Register new_val, +- enum operand_size size, ++ Assembler::operand_size size, + Register shift, Register mask, Register aligned_addr) { + assert(size == int8 || size == int16, "unsupported operand size"); + +@@ -3352,10 +3352,11 @@ void MacroAssembler::cmpxchg_narrow_value_helper(Register addr, Register expecte + // which are forced to work with 4-byte aligned address. + void MacroAssembler::cmpxchg_narrow_value(Register addr, Register expected, + Register new_val, +- enum operand_size size, ++ Assembler::operand_size size, + Assembler::Aqrl acquire, Assembler::Aqrl release, + Register result, bool result_as_bool, + Register tmp1, Register tmp2, Register tmp3) { ++ assert(!(UseZacas && UseZabha), "Use amocas"); + assert_different_registers(addr, expected, new_val, result, tmp1, tmp2, tmp3, t0, t1); + + Register scratch0 = t0, aligned_addr = t1; +@@ -3388,13 +3389,13 @@ void MacroAssembler::cmpxchg_narrow_value(Register addr, Register expected, + notr(scratch1, mask); + bind(retry); + +- lr_w(result, aligned_addr, acquire); ++ load_reserved(result, aligned_addr, operand_size::int32, acquire); + andr(scratch0, result, mask); + bne(scratch0, expected, fail); + + andr(scratch0, result, scratch1); // scratch1 is ~mask + orr(scratch0, scratch0, new_val); +- sc_w(scratch0, scratch0, aligned_addr, release); ++ store_conditional(scratch0, scratch0, aligned_addr, operand_size::int32, release); + bnez(scratch0, retry); + } + +@@ -3426,10 +3427,11 @@ void MacroAssembler::cmpxchg_narrow_value(Register addr, Register expected, + // failed. + void MacroAssembler::weak_cmpxchg_narrow_value(Register addr, Register expected, + Register new_val, +- enum operand_size size, ++ Assembler::operand_size size, + Assembler::Aqrl acquire, Assembler::Aqrl release, + Register result, + Register tmp1, Register tmp2, Register tmp3) { ++ assert(!(UseZacas && UseZabha), "Use amocas"); + assert_different_registers(addr, expected, new_val, result, tmp1, tmp2, tmp3, t0, t1); + + Register scratch0 = t0, aligned_addr = t1; +@@ -3460,13 +3462,13 @@ void MacroAssembler::weak_cmpxchg_narrow_value(Register addr, Register expected, + } else { + notr(scratch1, mask); + +- lr_w(result, aligned_addr, acquire); ++ load_reserved(result, aligned_addr, operand_size::int32, acquire); + andr(scratch0, result, mask); + bne(scratch0, expected, fail); + + andr(scratch0, result, scratch1); // scratch1 is ~mask + orr(scratch0, scratch0, new_val); +- sc_w(scratch0, scratch0, aligned_addr, release); ++ store_conditional(scratch0, scratch0, aligned_addr, operand_size::int32, release); + bnez(scratch0, fail); + } + +@@ -3483,10 +3485,10 @@ void MacroAssembler::weak_cmpxchg_narrow_value(Register addr, Register expected, + + void MacroAssembler::cmpxchg(Register addr, Register expected, + Register new_val, +- enum operand_size size, ++ Assembler::operand_size size, + Assembler::Aqrl acquire, Assembler::Aqrl release, + Register result, bool result_as_bool) { +- assert(size != int8 && size != int16, "unsupported operand size"); ++ assert((UseZacas && UseZabha) || (size != int8 && size != int16), "unsupported operand size"); + assert_different_registers(addr, t0); + assert_different_registers(expected, t0); + assert_different_registers(new_val, t0); +@@ -3542,12 +3544,12 @@ void MacroAssembler::cmpxchg(Register addr, Register expected, + bind(done); + } + +-void MacroAssembler::cmpxchg_weak(Register addr, Register expected, ++void MacroAssembler::weak_cmpxchg(Register addr, Register expected, + Register new_val, +- enum operand_size size, ++ Assembler::operand_size size, + Assembler::Aqrl acquire, Assembler::Aqrl release, + Register result) { +- ++ assert((UseZacas && UseZabha) || (size != int8 && size != int16), "unsupported operand size"); + assert_different_registers(addr, t0); + assert_different_registers(expected, t0); + assert_different_registers(new_val, t0); +@@ -3620,7 +3622,7 @@ ATOMIC_XCHGU(xchgalwu, xchgalw) + #undef ATOMIC_XCHGU + + void MacroAssembler::atomic_cas(Register prev, Register newv, Register addr, +- enum operand_size size, Assembler::Aqrl acquire, Assembler::Aqrl release) { ++ Assembler::operand_size size, Assembler::Aqrl acquire, Assembler::Aqrl release) { + switch (size) { + case int64: + amocas_d(prev, addr, newv, (Assembler::Aqrl)(acquire | release)); +@@ -3632,6 +3634,12 @@ void MacroAssembler::atomic_cas(Register prev, Register newv, Register addr, + amocas_w(prev, addr, newv, (Assembler::Aqrl)(acquire | release)); + zero_extend(prev, prev, 32); + break; ++ case int16: ++ amocas_h(prev, addr, newv, (Assembler::Aqrl)(acquire | release)); ++ break; ++ case int8: ++ amocas_b(prev, addr, newv, (Assembler::Aqrl)(acquire | release)); ++ break; + default: + ShouldNotReachHere(); + } +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +index 479d8d1a6..0be049b1b 100644 +--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +@@ -1017,26 +1017,26 @@ public: + void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, Label &succeed, Label *fail); + void cmpxchg(Register addr, Register expected, + Register new_val, +- enum operand_size size, ++ Assembler::operand_size size, + Assembler::Aqrl acquire, Assembler::Aqrl release, + Register result, bool result_as_bool = false); +- void cmpxchg_weak(Register addr, Register expected, ++ void weak_cmpxchg(Register addr, Register expected, + Register new_val, +- enum operand_size size, ++ Assembler::operand_size size, + Assembler::Aqrl acquire, Assembler::Aqrl release, + Register result); + void cmpxchg_narrow_value_helper(Register addr, Register expected, Register new_val, +- enum operand_size size, ++ Assembler::operand_size size, + Register shift, Register mask, Register aligned_addr); + void cmpxchg_narrow_value(Register addr, Register expected, + Register new_val, +- enum operand_size size, ++ Assembler::operand_size size, + Assembler::Aqrl acquire, Assembler::Aqrl release, + Register result, bool result_as_bool, + Register tmp1, Register tmp2, Register tmp3); + void weak_cmpxchg_narrow_value(Register addr, Register expected, + Register new_val, +- enum operand_size size, ++ Assembler::operand_size size, + Assembler::Aqrl acquire, Assembler::Aqrl release, + Register result, + Register tmp1, Register tmp2, Register tmp3); +@@ -1053,7 +1053,7 @@ public: + void atomic_xchgwu(Register prev, Register newv, Register addr); + void atomic_xchgalwu(Register prev, Register newv, Register addr); + +- void atomic_cas(Register prev, Register newv, Register addr, enum operand_size size, ++ void atomic_cas(Register prev, Register newv, Register addr, Assembler::operand_size size, + Assembler::Aqrl acquire = Assembler::relaxed, Assembler::Aqrl release = Assembler::relaxed); + + static bool far_branches() { +@@ -1508,8 +1508,8 @@ private: + int bitset_to_regs(unsigned int bitset, unsigned char* regs); + Address add_memory_helper(const Address dst, Register tmp); + +- void load_reserved(Register dst, Register addr, enum operand_size size, Assembler::Aqrl acquire); +- void store_conditional(Register dst, Register new_val, Register addr, enum operand_size size, Assembler::Aqrl release); ++ void load_reserved(Register dst, Register addr, Assembler::operand_size size, Assembler::Aqrl acquire); ++ void store_conditional(Register dst, Register new_val, Register addr, Assembler::operand_size size, Assembler::Aqrl release); + + public: + void lightweight_lock(Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow); +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +index ac22dc536..6e381d3f0 100644 +--- a/src/hotspot/cpu/riscv/riscv.ad ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -2256,48 +2256,6 @@ encode %{ + } + %} + +- enc_class riscv_enc_cmpxchgw(iRegINoSp res, memory mem, iRegI oldval, iRegI newval) %{ +- C2_MacroAssembler _masm(&cbuf); +- __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, +- /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, +- /*result as bool*/ true); +- %} +- +- enc_class riscv_enc_cmpxchgn(iRegINoSp res, memory mem, iRegI oldval, iRegI newval) %{ +- C2_MacroAssembler _masm(&cbuf); +- __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, +- /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, +- /*result as bool*/ true); +- %} +- +- enc_class riscv_enc_cmpxchg(iRegINoSp res, memory mem, iRegL oldval, iRegL newval) %{ +- C2_MacroAssembler _masm(&cbuf); +- __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, +- /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, +- /*result as bool*/ true); +- %} +- +- enc_class riscv_enc_cmpxchgw_acq(iRegINoSp res, memory mem, iRegI oldval, iRegI newval) %{ +- C2_MacroAssembler _masm(&cbuf); +- __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, +- /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, +- /*result as bool*/ true); +- %} +- +- enc_class riscv_enc_cmpxchgn_acq(iRegINoSp res, memory mem, iRegI oldval, iRegI newval) %{ +- C2_MacroAssembler _masm(&cbuf); +- __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, +- /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, +- /*result as bool*/ true); +- %} +- +- enc_class riscv_enc_cmpxchg_acq(iRegINoSp res, memory mem, iRegL oldval, iRegL newval) %{ +- C2_MacroAssembler _masm(&cbuf); +- __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, +- /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, +- /*result as bool*/ true); +- %} +- + // compare and branch instruction encodings + + enc_class riscv_enc_j(label lbl) %{ +@@ -5221,18 +5179,20 @@ instruct prefetchalloc( memory mem ) %{ + + // standard CompareAndSwapX when we are using barriers + // these have higher priority than the rules selected by a predicate +-instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, +- iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) ++instruct compareAndSwapB_narrow(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) + %{ ++ predicate(!UseZabha || !UseZacas); ++ + match(Set res (CompareAndSwapB mem (Binary oldval newval))); + +- ins_cost(LOAD_COST + STORE_COST + ALU_COST * 10 + BRANCH_COST * 4); ++ ins_cost(2 * VOLATILE_REF_COST); + + effect(TEMP_DEF res, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + + format %{ + "cmpxchg $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval\n\t" +- "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapB" ++ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapB_narrow" + %} + + ins_encode %{ +@@ -5244,18 +5204,42 @@ instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R1 + ins_pipe(pipe_slow); + %} + +-instruct compareAndSwapS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, +- iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) ++instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) + %{ ++ predicate(UseZabha && UseZacas); ++ ++ match(Set res (CompareAndSwapB mem (Binary oldval newval))); ++ ++ ins_cost(2 * VOLATILE_REF_COST); ++ ++ format %{ ++ "cmpxchg $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval\n\t" ++ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapB" ++ %} ++ ++ ins_encode %{ ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, ++ Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register, ++ true /* result as bool */); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct compareAndSwapS_narrow(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) ++%{ ++ predicate(!UseZabha || !UseZacas); ++ + match(Set res (CompareAndSwapS mem (Binary oldval newval))); + +- ins_cost(LOAD_COST + STORE_COST + ALU_COST * 11 + BRANCH_COST * 4); ++ ins_cost(2 * VOLATILE_REF_COST); + + effect(TEMP_DEF res, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + + format %{ + "cmpxchg $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval\n\t" +- "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapS" ++ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapS_narrow" + %} + + ins_encode %{ +@@ -5267,18 +5251,44 @@ instruct compareAndSwapS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R1 + ins_pipe(pipe_slow); + %} + ++instruct compareAndSwapS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) ++%{ ++ predicate(UseZabha && UseZacas); ++ ++ match(Set res (CompareAndSwapS mem (Binary oldval newval))); ++ ++ ins_cost(2 * VOLATILE_REF_COST); ++ ++ format %{ ++ "cmpxchg $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval\n\t" ++ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapS" ++ %} ++ ++ ins_encode %{ ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, ++ Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register, ++ true /* result as bool */); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ + instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) + %{ + match(Set res (CompareAndSwapI mem (Binary oldval newval))); + +- ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); ++ ins_cost(2 * VOLATILE_REF_COST); + + format %{ + "cmpxchg $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval\n\t" + "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapI" + %} + +- ins_encode(riscv_enc_cmpxchgw(res, mem, oldval, newval)); ++ ins_encode %{ ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, ++ /*result as bool*/ true); ++ %} + + ins_pipe(pipe_slow); + %} +@@ -5287,14 +5297,18 @@ instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval + %{ + match(Set res (CompareAndSwapL mem (Binary oldval newval))); + +- ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); ++ ins_cost(2 * VOLATILE_REF_COST); + + format %{ + "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval\n\t" + "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapL" + %} + +- ins_encode(riscv_enc_cmpxchg(res, mem, oldval, newval)); ++ ins_encode %{ ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, ++ /*result as bool*/ true); ++ %} + + ins_pipe(pipe_slow); + %} +@@ -5305,14 +5319,18 @@ instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval + + match(Set res (CompareAndSwapP mem (Binary oldval newval))); + +- ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); ++ ins_cost(2 * VOLATILE_REF_COST); + + format %{ + "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval\n\t" + "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapP" + %} + +- ins_encode(riscv_enc_cmpxchg(res, mem, oldval, newval)); ++ ins_encode %{ ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, ++ /*result as bool*/ true); ++ %} + + ins_pipe(pipe_slow); + %} +@@ -5321,33 +5339,37 @@ instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval + %{ + match(Set res (CompareAndSwapN mem (Binary oldval newval))); + +- ins_cost(LOAD_COST + STORE_COST + ALU_COST * 8 + BRANCH_COST * 4); ++ ins_cost(2 * VOLATILE_REF_COST); + + format %{ + "cmpxchg $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval\n\t" + "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapN" + %} + +- ins_encode(riscv_enc_cmpxchgn(res, mem, oldval, newval)); ++ ins_encode %{ ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, ++ /*result as bool*/ true); ++ %} + + ins_pipe(pipe_slow); + %} + + // alternative CompareAndSwapX when we are eliding barriers +-instruct compareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, +- iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) ++instruct compareAndSwapBAcq_narrow(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) + %{ +- predicate(needs_acquiring_load_reserved(n)); ++ predicate((!UseZabha || !UseZacas) && needs_acquiring_load_reserved(n)); + + match(Set res (CompareAndSwapB mem (Binary oldval newval))); + +- ins_cost(LOAD_COST + STORE_COST + ALU_COST * 10 + BRANCH_COST * 4); ++ ins_cost(2 * VOLATILE_REF_COST); + + effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); + + format %{ + "cmpxchg_acq $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval\n\t" +- "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapBAcq" ++ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapBAcq_narrow" + %} + + ins_encode %{ +@@ -5359,20 +5381,42 @@ instruct compareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI + ins_pipe(pipe_slow); + %} + +-instruct compareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, +- iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) ++instruct compareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) + %{ +- predicate(needs_acquiring_load_reserved(n)); ++ predicate((UseZabha && UseZacas) && needs_acquiring_load_reserved(n)); ++ ++ match(Set res (CompareAndSwapB mem (Binary oldval newval))); ++ ++ ins_cost(2 * VOLATILE_REF_COST); ++ ++ format %{ ++ "cmpxchg $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval\n\t" ++ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapBAcq" ++ %} ++ ++ ins_encode %{ ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, ++ Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register, ++ true /* result as bool */); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct compareAndSwapSAcq_narrow(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) ++%{ ++ predicate((!UseZabha || !UseZacas) && needs_acquiring_load_reserved(n)); + + match(Set res (CompareAndSwapS mem (Binary oldval newval))); + +- ins_cost(LOAD_COST + STORE_COST + ALU_COST * 11 + BRANCH_COST * 4); ++ ins_cost(2 * VOLATILE_REF_COST); + + effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); + + format %{ + "cmpxchg_acq $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval\n\t" +- "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapSAcq" ++ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapSAcq_narrow" + %} + + ins_encode %{ +@@ -5384,20 +5428,46 @@ instruct compareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI + ins_pipe(pipe_slow); + %} + ++instruct compareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) ++%{ ++ predicate((UseZabha && UseZacas) && needs_acquiring_load_reserved(n)); ++ ++ match(Set res (CompareAndSwapS mem (Binary oldval newval))); ++ ++ ins_cost(2 * VOLATILE_REF_COST); ++ ++ format %{ ++ "cmpxchg $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval\n\t" ++ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapSAcq" ++ %} ++ ++ ins_encode %{ ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, ++ Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register, ++ true /* result as bool */); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ + instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) + %{ + predicate(needs_acquiring_load_reserved(n)); + + match(Set res (CompareAndSwapI mem (Binary oldval newval))); + +- ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); ++ ins_cost(2 * VOLATILE_REF_COST); + + format %{ + "cmpxchg_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval\n\t" + "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapIAcq" + %} + +- ins_encode(riscv_enc_cmpxchgw_acq(res, mem, oldval, newval)); ++ ins_encode %{ ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, ++ /*result as bool*/ true); ++ %} + + ins_pipe(pipe_slow); + %} +@@ -5408,14 +5478,18 @@ instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegL oldval, iRegL new + + match(Set res (CompareAndSwapL mem (Binary oldval newval))); + +- ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); ++ ins_cost(2 * VOLATILE_REF_COST); + + format %{ + "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval\n\t" + "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapLAcq" + %} + +- ins_encode(riscv_enc_cmpxchg_acq(res, mem, oldval, newval)); ++ ins_encode %{ ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, ++ /*result as bool*/ true); ++ %} + + ins_pipe(pipe_slow); + %} +@@ -5426,14 +5500,18 @@ instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP new + + match(Set res (CompareAndSwapP mem (Binary oldval newval))); + +- ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); ++ ins_cost(2 * VOLATILE_REF_COST); + + format %{ + "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval\n\t" + "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapPAcq" + %} + +- ins_encode(riscv_enc_cmpxchg_acq(res, mem, oldval, newval)); ++ ins_encode %{ ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, ++ /*result as bool*/ true); ++ %} + + ins_pipe(pipe_slow); + %} +@@ -5444,14 +5522,18 @@ instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN new + + match(Set res (CompareAndSwapN mem (Binary oldval newval))); + +- ins_cost(LOAD_COST + STORE_COST + ALU_COST * 8 + BRANCH_COST * 4); ++ ins_cost(2 * VOLATILE_REF_COST); + + format %{ + "cmpxchg_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval\n\t" + "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapNAcq" + %} + +- ins_encode(riscv_enc_cmpxchgn_acq(res, mem, oldval, newval)); ++ ins_encode %{ ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, ++ /*result as bool*/ true); ++ %} + + ins_pipe(pipe_slow); + %} +@@ -5462,17 +5544,19 @@ instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN new + // no trailing StoreLoad barrier emitted by C2. Unfortunately we + // can't check the type of memory ordering here, so we always emit a + // sc_d(w) with rl bit set. +-instruct compareAndExchangeB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, +- iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) ++instruct compareAndExchangeB_narrow(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) + %{ ++ predicate(!UseZabha || !UseZacas); ++ + match(Set res (CompareAndExchangeB mem (Binary oldval newval))); + +- ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 5); ++ ins_cost(2 * VOLATILE_REF_COST); + + effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); + + format %{ +- "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeB" ++ "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeB_narrow" + %} + + ins_encode %{ +@@ -5484,17 +5568,39 @@ instruct compareAndExchangeB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iReg + ins_pipe(pipe_slow); + %} + +-instruct compareAndExchangeS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, +- iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) ++instruct compareAndExchangeB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) ++%{ ++ predicate(UseZabha && UseZacas); ++ ++ match(Set res (CompareAndExchangeB mem (Binary oldval newval))); ++ ++ ins_cost(2 * VOLATILE_REF_COST); ++ ++ format %{ ++ "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeB" ++ %} ++ ++ ins_encode %{ ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct compareAndExchangeS_narrow(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) + %{ ++ predicate(!UseZabha || !UseZacas); ++ + match(Set res (CompareAndExchangeS mem (Binary oldval newval))); + +- ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 6); ++ ins_cost(2 * VOLATILE_REF_COST); + + effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); + + format %{ +- "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeS" ++ "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeS_narrow" + %} + + ins_encode %{ +@@ -5506,13 +5612,31 @@ instruct compareAndExchangeS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iReg + ins_pipe(pipe_slow); + %} + ++instruct compareAndExchangeS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) ++%{ ++ predicate(UseZabha && UseZacas); ++ ++ match(Set res (CompareAndExchangeS mem (Binary oldval newval))); ++ ++ ins_cost(2 * VOLATILE_REF_COST); ++ ++ format %{ ++ "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeS" ++ %} ++ ++ ins_encode %{ ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ + instruct compareAndExchangeI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) + %{ + match(Set res (CompareAndExchangeI mem (Binary oldval newval))); + +- ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); +- +- effect(TEMP_DEF res); ++ ins_cost(2 * VOLATILE_REF_COST); + + format %{ + "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeI" +@@ -5530,9 +5654,7 @@ instruct compareAndExchangeL(iRegLNoSp res, indirect mem, iRegL oldval, iRegL ne + %{ + match(Set res (CompareAndExchangeL mem (Binary oldval newval))); + +- ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); +- +- effect(TEMP_DEF res); ++ ins_cost(2 * VOLATILE_REF_COST); + + format %{ + "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeL" +@@ -5550,9 +5672,7 @@ instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN ne + %{ + match(Set res (CompareAndExchangeN mem (Binary oldval newval))); + +- ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 3); +- +- effect(TEMP_DEF res); ++ ins_cost(2 * VOLATILE_REF_COST); + + format %{ + "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeN" +@@ -5569,11 +5689,10 @@ instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN ne + instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval) + %{ + predicate(n->as_LoadStore()->barrier_data() == 0); +- match(Set res (CompareAndExchangeP mem (Binary oldval newval))); + +- ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); ++ match(Set res (CompareAndExchangeP mem (Binary oldval newval))); + +- effect(TEMP_DEF res); ++ ins_cost(2 * VOLATILE_REF_COST); + + format %{ + "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeP" +@@ -5587,19 +5706,19 @@ instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP ne + ins_pipe(pipe_slow); + %} + +-instruct compareAndExchangeBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, +- iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) ++instruct compareAndExchangeBAcq_narrow(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) + %{ +- predicate(needs_acquiring_load_reserved(n)); ++ predicate((!UseZabha || !UseZacas) && needs_acquiring_load_reserved(n)); + + match(Set res (CompareAndExchangeB mem (Binary oldval newval))); + +- ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 5); ++ ins_cost(2 * VOLATILE_REF_COST); + + effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); + + format %{ +- "cmpxchg_acq $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeBAcq" ++ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeBAcq_narrow" + %} + + ins_encode %{ +@@ -5611,19 +5730,39 @@ instruct compareAndExchangeBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, i + ins_pipe(pipe_slow); + %} + +-instruct compareAndExchangeSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, +- iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) ++instruct compareAndExchangeBAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) + %{ +- predicate(needs_acquiring_load_reserved(n)); ++ predicate((UseZabha && UseZacas) && needs_acquiring_load_reserved(n)); ++ ++ match(Set res (CompareAndExchangeB mem (Binary oldval newval))); ++ ++ ins_cost(2 * VOLATILE_REF_COST); ++ ++ format %{ ++ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeBAcq" ++ %} ++ ++ ins_encode %{ ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct compareAndExchangeSAcq_narrow(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) ++%{ ++ predicate((!UseZabha || !UseZacas) && needs_acquiring_load_reserved(n)); + + match(Set res (CompareAndExchangeS mem (Binary oldval newval))); + +- ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 6); ++ ins_cost(2 * VOLATILE_REF_COST); + + effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); + + format %{ +- "cmpxchg_acq $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeSAcq" ++ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeSAcq_narrow" + %} + + ins_encode %{ +@@ -5635,15 +5774,33 @@ instruct compareAndExchangeSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, i + ins_pipe(pipe_slow); + %} + ++instruct compareAndExchangeSAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) ++%{ ++ predicate((UseZabha && UseZacas) && needs_acquiring_load_reserved(n)); ++ ++ match(Set res (CompareAndExchangeS mem (Binary oldval newval))); ++ ++ ins_cost(2 * VOLATILE_REF_COST); ++ ++ format %{ ++ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeSAcq" ++ %} ++ ++ ins_encode %{ ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ + instruct compareAndExchangeIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) + %{ + predicate(needs_acquiring_load_reserved(n)); + + match(Set res (CompareAndExchangeI mem (Binary oldval newval))); + +- ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); +- +- effect(TEMP_DEF res); ++ ins_cost(2 * VOLATILE_REF_COST); + + format %{ + "cmpxchg_acq $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeIAcq" +@@ -5663,9 +5820,7 @@ instruct compareAndExchangeLAcq(iRegLNoSp res, indirect mem, iRegL oldval, iRegL + + match(Set res (CompareAndExchangeL mem (Binary oldval newval))); + +- ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); +- +- effect(TEMP_DEF res); ++ ins_cost(2 * VOLATILE_REF_COST); + + format %{ + "cmpxchg_acq $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeLAcq" +@@ -5685,9 +5840,7 @@ instruct compareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN + + match(Set res (CompareAndExchangeN mem (Binary oldval newval))); + +- ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); +- +- effect(TEMP_DEF res); ++ ins_cost(2 * VOLATILE_REF_COST); + + format %{ + "cmpxchg_acq $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeNAcq" +@@ -5707,9 +5860,7 @@ instruct compareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP + + match(Set res (CompareAndExchangeP mem (Binary oldval newval))); + +- ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); +- +- effect(TEMP_DEF res); ++ ins_cost(2 * VOLATILE_REF_COST); + + format %{ + "cmpxchg_acq $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangePAcq" +@@ -5723,18 +5874,20 @@ instruct compareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP + ins_pipe(pipe_slow); + %} + +-instruct weakCompareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, +- iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) ++instruct weakCompareAndSwapB_narrow(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) + %{ ++ predicate(!UseZabha || !UseZacas); ++ + match(Set res (WeakCompareAndSwapB mem (Binary oldval newval))); + +- ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 6); ++ ins_cost(2 * VOLATILE_REF_COST); + + effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); + + format %{ +- "cmpxchg_weak $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval\n\t" +- "# $res == 1 when success, #@weakCompareAndSwapB" ++ "weak_cmpxchg $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "# $res == 1 when success, #@weakCompareAndSwapB_narrow" + %} + + ins_encode %{ +@@ -5746,18 +5899,41 @@ instruct weakCompareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iReg + ins_pipe(pipe_slow); + %} + +-instruct weakCompareAndSwapS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, +- iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) ++instruct weakCompareAndSwapB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) ++%{ ++ predicate(UseZabha && UseZacas); ++ ++ match(Set res (WeakCompareAndSwapB mem (Binary oldval newval))); ++ ++ ins_cost(2 * VOLATILE_REF_COST); ++ ++ format %{ ++ "weak_cmpxchg $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "# $res == 1 when success, #@weakCompareAndSwapB" ++ %} ++ ++ ins_encode %{ ++ __ weak_cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct weakCompareAndSwapS_narrow(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) + %{ ++ predicate(!UseZabha || !UseZacas); ++ + match(Set res (WeakCompareAndSwapS mem (Binary oldval newval))); + +- ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 7); ++ ins_cost(2 * VOLATILE_REF_COST); + + effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); + + format %{ +- "cmpxchg_weak $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval\n\t" +- "# $res == 1 when success, #@weakCompareAndSwapS" ++ "weak_cmpxchg $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "# $res == 1 when success, #@weakCompareAndSwapS_narrow" + %} + + ins_encode %{ +@@ -5769,19 +5945,40 @@ instruct weakCompareAndSwapS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iReg + ins_pipe(pipe_slow); + %} + ++instruct weakCompareAndSwapS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) ++%{ ++ predicate(UseZabha && UseZacas); ++ ++ match(Set res (WeakCompareAndSwapS mem (Binary oldval newval))); ++ ++ ins_cost(2 * VOLATILE_REF_COST); ++ ++ format %{ ++ "weak_cmpxchg $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "# $res == 1 when success, #@weakCompareAndSwapS" ++ %} ++ ++ ins_encode %{ ++ __ weak_cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ + instruct weakCompareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) + %{ + match(Set res (WeakCompareAndSwapI mem (Binary oldval newval))); + +- ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); ++ ins_cost(2 * VOLATILE_REF_COST); + + format %{ +- "cmpxchg_weak $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "weak_cmpxchg $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval\n\t" + "# $res == 1 when success, #@weakCompareAndSwapI" + %} + + ins_encode %{ +- __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, ++ __ weak_cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, + /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); + %} + +@@ -5792,15 +5989,15 @@ instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL ne + %{ + match(Set res (WeakCompareAndSwapL mem (Binary oldval newval))); + +- ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); ++ ins_cost(2 * VOLATILE_REF_COST); + + format %{ +- "cmpxchg_weak $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "weak_cmpxchg $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval\n\t" + "# $res == 1 when success, #@weakCompareAndSwapL" + %} + + ins_encode %{ +- __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, ++ __ weak_cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, + /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); + %} + +@@ -5811,15 +6008,15 @@ instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN ne + %{ + match(Set res (WeakCompareAndSwapN mem (Binary oldval newval))); + +- ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 4); ++ ins_cost(2 * VOLATILE_REF_COST); + + format %{ +- "cmpxchg_weak $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "weak_cmpxchg $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval\n\t" + "# $res == 1 when success, #@weakCompareAndSwapN" + %} + + ins_encode %{ +- __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, ++ __ weak_cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, + /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); + %} + +@@ -5829,37 +6026,38 @@ instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN ne + instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval) + %{ + predicate(n->as_LoadStore()->barrier_data() == 0); ++ + match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); + +- ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); ++ ins_cost(2 * VOLATILE_REF_COST); + + format %{ +- "cmpxchg_weak $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "weak_cmpxchg $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval\n\t" + "# $res == 1 when success, #@weakCompareAndSwapP" + %} + + ins_encode %{ +- __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, ++ __ weak_cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, + /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); + %} + + ins_pipe(pipe_slow); + %} + +-instruct weakCompareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, +- iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) ++instruct weakCompareAndSwapBAcq_narrow(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) + %{ +- predicate(needs_acquiring_load_reserved(n)); ++ predicate((!UseZabha || !UseZacas) && needs_acquiring_load_reserved(n)); + + match(Set res (WeakCompareAndSwapB mem (Binary oldval newval))); + +- ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 6); ++ ins_cost(2 * VOLATILE_REF_COST); + + effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); + + format %{ +- "cmpxchg_weak_acq $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval\n\t" +- "# $res == 1 when success, #@weakCompareAndSwapBAcq" ++ "weak_cmpxchg_acq $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "# $res == 1 when success, #@weakCompareAndSwapBAcq_narrow" + %} + + ins_encode %{ +@@ -5871,20 +6069,41 @@ instruct weakCompareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, i + ins_pipe(pipe_slow); + %} + +-instruct weakCompareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, +- iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) ++instruct weakCompareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) + %{ +- predicate(needs_acquiring_load_reserved(n)); ++ predicate((UseZabha && UseZacas) && needs_acquiring_load_reserved(n)); ++ ++ match(Set res (WeakCompareAndSwapB mem (Binary oldval newval))); ++ ++ ins_cost(2 * VOLATILE_REF_COST); ++ ++ format %{ ++ "weak_cmpxchg_acq $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "# $res == 1 when success, #@weakCompareAndSwapBAcq" ++ %} ++ ++ ins_encode %{ ++ __ weak_cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct weakCompareAndSwapSAcq_narrow(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) ++%{ ++ predicate((!UseZabha || !UseZacas) && needs_acquiring_load_reserved(n)); + + match(Set res (WeakCompareAndSwapS mem (Binary oldval newval))); + +- ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 7); ++ ins_cost(2 * VOLATILE_REF_COST); + + effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); + + format %{ +- "cmpxchg_weak_acq $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval\n\t" +- "# $res == 1 when success, #@weakCompareAndSwapSAcq" ++ "weak_cmpxchg_acq $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "# $res == 1 when success, #@weakCompareAndSwapSAcq_narrow" + %} + + ins_encode %{ +@@ -5896,21 +6115,42 @@ instruct weakCompareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, i + ins_pipe(pipe_slow); + %} + ++instruct weakCompareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) ++%{ ++ predicate((UseZabha && UseZacas) && needs_acquiring_load_reserved(n)); ++ ++ match(Set res (WeakCompareAndSwapS mem (Binary oldval newval))); ++ ++ ins_cost(2 * VOLATILE_REF_COST); ++ ++ format %{ ++ "weak_cmpxchg_acq $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "# $res == 1 when success, #@weakCompareAndSwapSAcq" ++ %} ++ ++ ins_encode %{ ++ __ weak_cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ + instruct weakCompareAndSwapIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) + %{ + predicate(needs_acquiring_load_reserved(n)); + + match(Set res (WeakCompareAndSwapI mem (Binary oldval newval))); + +- ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); ++ ins_cost(2 * VOLATILE_REF_COST); + + format %{ +- "cmpxchg_weak_acq $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "weak_cmpxchg_acq $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval\n\t" + "# $res == 1 when success, #@weakCompareAndSwapIAcq" + %} + + ins_encode %{ +- __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, ++ __ weak_cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, + /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); + %} + +@@ -5923,15 +6163,15 @@ instruct weakCompareAndSwapLAcq(iRegINoSp res, indirect mem, iRegL oldval, iRegL + + match(Set res (WeakCompareAndSwapL mem (Binary oldval newval))); + +- ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); ++ ins_cost(2 * VOLATILE_REF_COST); + + format %{ +- "cmpxchg_weak_acq $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "weak_cmpxchg_acq $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval\n\t" + "# $res == 1 when success, #@weakCompareAndSwapLAcq" + %} + + ins_encode %{ +- __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, ++ __ weak_cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, + /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); + %} + +@@ -5944,15 +6184,15 @@ instruct weakCompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN + + match(Set res (WeakCompareAndSwapN mem (Binary oldval newval))); + +- ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 4); ++ ins_cost(2 * VOLATILE_REF_COST); + + format %{ +- "cmpxchg_weak_acq $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "weak_cmpxchg_acq $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval\n\t" + "# $res == 1 when success, #@weakCompareAndSwapNAcq" + %} + + ins_encode %{ +- __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, ++ __ weak_cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, + /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); + %} + +@@ -5965,15 +6205,15 @@ instruct weakCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP + + match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); + +- ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); ++ ins_cost(2 * VOLATILE_REF_COST); + + format %{ +- "cmpxchg_weak_acq $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "weak_cmpxchg_acq $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval\n\t" + "\t# $res == 1 when success, #@weakCompareAndSwapPAcq" + %} + + ins_encode %{ +- __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, ++ __ weak_cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, + /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); + %} + diff --git a/openjdk-21.spec b/openjdk-21.spec index 8a29824872345be6251dc19ecdca45d2a625735b..53c88563df80da88513f1f6f2fc261958b09ef4f 100644 --- a/openjdk-21.spec +++ b/openjdk-21.spec @@ -905,7 +905,7 @@ Name: java-21-%{origin} Version: %{newjavaver}.%{buildver} # This package needs `.rolling` as part of Release so as to not conflict on install with # java-X-openjdk. I.e. when latest rolling release is also an LTS release packaged as -Release: 3 +Release: 4 # java-1.5.0-ibm from jpackage.org set Epoch to 1 for unknown reasons @@ -1074,6 +1074,9 @@ Patch3017: Backport-JDK-8314125-RISC-V-implement-Base64-intrinsic.patch Patch3018: Backport-JDK-8318217-RISC-V-C2-VectorizedHashCode.patch Patch3019: Backport-JDK-8317971-RISC-V-implement-copySignF-D-and-signumF-D-intrinsics.patch Patch3020: Backport-JDK-8327964-8360179-RISC-V-Only-enable-BigInteger-intrinsics-when-AvoidUnalignedAccess-false.patch +Patch3021: Backport-JDK-8315743-8315856-8344010-8344382-RISC-V-Use-Zacas-extension-for-cmpxchg.patch +Patch3022: Backport-JDK-8319778-8324881-8319797-8319900-Recursive-lightweight-locking-riscv64-implementation.patch +Patch3023: Backport-JDK-8345351-8356159-RISC-V-Add-Zabha.patch BuildRequires: autoconf BuildRequires: automake @@ -1388,6 +1391,9 @@ pushd %{top_level_dir_name} %patch3018 -p1 %patch3019 -p1 %patch3020 -p1 +%patch3021 -p1 +%patch3022 -p1 +%patch3023 -p1 popd %endif @@ -1945,6 +1951,9 @@ cjc.mainProgram(args) -- the returns from copy_jdk_configs.lua should not affect %changelog +* Fri Nov 14 2025 zhangshihui - 1:21.0.9.10-4 +- RISC-V add Zacas and Zabha and implemente lightweight locking + * Tue Nov 04 2025 panxuefeng - 1:21.0.9.10-3 - update LoongArch64 port to 21.0.9