Skip to content

Commit

Permalink
PPC64: better vthread support for object monitors
Browse files Browse the repository at this point in the history
  • Loading branch information
reinrich committed Oct 18, 2024
1 parent 9baaff9 commit 1244994
Show file tree
Hide file tree
Showing 29 changed files with 462 additions and 174 deletions.
1 change: 1 addition & 0 deletions src/hotspot/cpu/ppc/assembler_ppc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1816,6 +1816,7 @@ class Assembler : public AbstractAssembler {
relocInfo::relocType rt = relocInfo::none);

// helper function for b, bcxx
inline bool is_branch(address a);
inline bool is_within_range_of_b(address a, address pc);
inline bool is_within_range_of_bcxx(address a, address pc);

Expand Down
6 changes: 6 additions & 0 deletions src/hotspot/cpu/ppc/assembler_ppc.inline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -454,6 +454,12 @@ inline void Assembler::bclrl( int boint, int biint, int bhint, relocInfo::relocT
inline void Assembler::bcctr( int boint, int biint, int bhint, relocInfo::relocType rt) { emit_data(BCCTR_OPCODE| bo(boint) | bi(biint) | bh(bhint) | aa(0) | lk(0), rt); }
inline void Assembler::bcctrl(int boint, int biint, int bhint, relocInfo::relocType rt) { emit_data(BCCTR_OPCODE| bo(boint) | bi(biint) | bh(bhint) | aa(0) | lk(1), rt); }

inline bool Assembler::is_branch(address a) {
int32_t instr = *(int32_t*) a;
int op = inv_op_ppc(instr);
return op == b_op || op == bc_op;
}

// helper function for b
inline bool Assembler::is_within_range_of_b(address a, address pc) {
// Guard against illegal branch targets, e.g. -1 (see CompiledDirectCall and ad-file).
Expand Down
8 changes: 6 additions & 2 deletions src/hotspot/cpu/ppc/c1_MacroAssembler_ppc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,9 @@ void C1_MacroAssembler::lock_object(Register Rmark, Register Roop, Register Rbox
}

bind(done);
inc_held_monitor_count(Rmark /*tmp*/);
if (LockingMode == LM_LEGACY) {
inc_held_monitor_count(Rmark /*tmp*/);
}
}


Expand Down Expand Up @@ -179,7 +181,9 @@ void C1_MacroAssembler::unlock_object(Register Rmark, Register Roop, Register Rb

// Done
bind(done);
dec_held_monitor_count(Rmark /*tmp*/);
if (LockingMode == LM_LEGACY) {
dec_held_monitor_count(Rmark /*tmp*/);
}
}


Expand Down
6 changes: 4 additions & 2 deletions src/hotspot/cpu/ppc/c1_Runtime1_ppc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,8 @@ int StubAssembler::call_RT(Register oop_result1, Register metadata_result,

address return_pc = call_c(entry_point);

reset_last_Java_frame();
// Last java sp can be null when the RT call was preempted
reset_last_Java_frame(false /* check_last_java_sp */);

// Check for pending exceptions.
{
Expand Down Expand Up @@ -258,7 +259,8 @@ void Runtime1::initialize_pd() {
}

uint Runtime1::runtime_blob_current_thread_offset(frame f) {
Unimplemented();
// On PPC virtual threads don't save the JavaThread* in their context (e.g. C1 stub frames).
ShouldNotCallThis();
return 0;
}

Expand Down
29 changes: 20 additions & 9 deletions src/hotspot/cpu/ppc/continuationFreezeThaw_ppc.inline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,9 @@ void FreezeBase::adjust_interpreted_frame_unextended_sp(frame& f) {
}

inline void FreezeBase::prepare_freeze_interpreted_top_frame(const frame& f) {
Unimplemented();
// nothing to do
DEBUG_ONLY( intptr_t* lspp = (intptr_t*) &(f.get_ijava_state()->top_frame_sp); )
assert(*lspp == f.unextended_sp() - f.fp(), "should be " INTPTR_FORMAT " usp:" INTPTR_FORMAT " fp:" INTPTR_FORMAT, *lspp, p2i(f.unextended_sp()), p2i(f.fp()));
}

inline void FreezeBase::relativize_interpreted_frame_metadata(const frame& f, const frame& hf) {
Expand Down Expand Up @@ -354,6 +356,7 @@ inline void Thaw<ConfigT>::patch_caller_links(intptr_t* sp, intptr_t* bottom) {
if (is_entry_frame) {
callers_sp = _cont.entryFP();
} else {
assert(!Interpreter::contains(pc), "sp:" PTR_FORMAT " pc:" PTR_FORMAT, p2i(sp), p2i(pc));
CodeBlob* cb = CodeCache::find_blob_fast(pc);
callers_sp = sp + cb->frame_size();
}
Expand Down Expand Up @@ -484,8 +487,8 @@ inline frame ThawBase::new_entry_frame() {
template<typename FKind> frame ThawBase::new_stack_frame(const frame& hf, frame& caller, bool bottom) {
assert(FKind::is_instance(hf), "");

assert(is_aligned(caller.fp(), frame::frame_alignment), "");
assert(is_aligned(caller.sp(), frame::frame_alignment), "");
assert(is_aligned(caller.fp(), frame::frame_alignment), PTR_FORMAT, p2i(caller.fp()));
// caller.sp() can be unaligned. This is fixed below.
if (FKind::interpreted) {
// Note: we have to overlap with the caller, at least if it is interpreted, to match the
// max_thawing_size calculation during freeze. See also comment above.
Expand Down Expand Up @@ -514,7 +517,7 @@ template<typename FKind> frame ThawBase::new_stack_frame(const frame& hf, frame&
return f;
} else {
int fsize = FKind::size(hf);
int argsize = hf.compiled_frame_stack_argsize();
int argsize = FKind::stack_argsize(hf);
intptr_t* frame_sp = caller.sp() - fsize;

if ((bottom && argsize > 0) || caller.is_interpreted_frame()) {
Expand Down Expand Up @@ -548,13 +551,21 @@ inline void ThawBase::derelativize_interpreted_frame_metadata(const frame& hf, c
}

inline intptr_t* ThawBase::possibly_adjust_frame(frame& top) {
Unimplemented();
return nullptr;
// Nothing to do
return top.sp();
}

inline intptr_t* ThawBase::push_cleanup_continuation() {
Unimplemented();
return nullptr;
frame enterSpecial = new_entry_frame();
frame::common_abi* enterSpecial_abi = (frame::common_abi*)enterSpecial.sp();

enterSpecial_abi->lr = (intptr_t)ContinuationEntry::cleanup_pc();

log_develop_trace(continuations, preempt)("push_cleanup_continuation enterSpecial sp: " INTPTR_FORMAT " cleanup pc: " INTPTR_FORMAT,
p2i(enterSpecial_abi),
p2i(ContinuationEntry::cleanup_pc()));

return enterSpecial.sp();
}

inline void ThawBase::patch_pd(frame& f, const frame& caller) {
Expand All @@ -564,7 +575,7 @@ inline void ThawBase::patch_pd(frame& f, const frame& caller) {
}

inline void ThawBase::patch_pd(frame& f, intptr_t* caller_sp) {
Unimplemented();
assert(f.own_abi()->callers_sp == (uint64_t)caller_sp, "should have been fixed by patch_caller_links");
}

//
Expand Down
18 changes: 11 additions & 7 deletions src/hotspot/cpu/ppc/continuationHelper_ppc.inline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,18 @@

#include "runtime/continuationHelper.hpp"

template<typename FKind>
static inline intptr_t** link_address(const frame& f) {
Unimplemented();
return nullptr;
}

static inline void patch_return_pc_with_preempt_stub(frame& f) {
Unimplemented();
if (f.is_runtime_frame()) {
// Patch the pc of the now old last Java frame (we already set the anchor to enterSpecial)
// so that when target goes back to Java it will actually return to the preempt cleanup stub.
frame::common_abi* abi = (frame::common_abi*)f.sp();
abi->lr = (uint64_t)StubRoutines::cont_preempt_stub();
} else {
// The target will check for preemption once it returns to the interpreter
// or the native wrapper code and will manually jump to the preempt stub.
JavaThread *thread = JavaThread::current();
thread->set_preempt_alternate_return(StubRoutines::cont_preempt_stub());
}
}

inline int ContinuationHelper::frame_align_words(int size) {
Expand Down
5 changes: 4 additions & 1 deletion src/hotspot/cpu/ppc/frame_ppc.inline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,10 @@ inline void frame::setup(kind knd) {
// The back link for compiled frames on the heap is not valid
if (is_heap_frame()) {
// fp for interpreted frames should have been derelativized and passed to the constructor
assert(is_compiled_frame(), "");
assert(is_compiled_frame()
|| is_native_frame() // native wrapper (nmethod) for j.l.Object::wait0
|| is_runtime_frame(), // e.g. Runtime1::monitorenter, SharedRuntime::complete_monitor_locking_C
"sp:" PTR_FORMAT " fp:" PTR_FORMAT " name:%s", p2i(_sp), p2i(_unextended_sp + _cb->frame_size()), _cb->name());
// The back link for compiled frames on the heap is invalid.
_fp = _unextended_sp + _cb->frame_size();
} else {
Expand Down
10 changes: 9 additions & 1 deletion src/hotspot/cpu/ppc/interp_masm_ppc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,14 @@ class InterpreterMacroAssembler: public MacroAssembler {
virtual void check_and_handle_popframe(Register scratch_reg);
virtual void check_and_handle_earlyret(Register scratch_reg);

void call_VM_preemptable(Register oop_result, address entry_point, Register arg_1, bool check_exceptions = true);
void restore_after_resume(Register fp);
// R22 and R31 are preserved when a vthread gets preempted in the interpreter.
// The interpreter already assumes that these registers are nonvolatile across native calls.
bool nonvolatile_accross_vthread_preemtion(Register r) const {
return r->is_nonvolatile() && ((r == R22) || (r == R31));
}

// Base routine for all dispatches.
void dispatch_base(TosState state, address* table);

Expand Down Expand Up @@ -182,7 +190,7 @@ class InterpreterMacroAssembler: public MacroAssembler {
// Special call VM versions that check for exceptions and forward exception
// via short cut (not via expensive forward exception stub).
void check_and_forward_exception(Register Rscratch1, Register Rscratch2);
void call_VM(Register oop_result, address entry_point, bool check_exceptions = true);
void call_VM(Register oop_result, address entry_point, bool check_exceptions = true, Label* last_java_pc = nullptr);
void call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions = true);
void call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions = true);
void call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions = true);
Expand Down
96 changes: 84 additions & 12 deletions src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -932,7 +932,7 @@ void InterpreterMacroAssembler::remove_activation(TosState state,
//
void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
if (LockingMode == LM_MONITOR) {
call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), monitor);
call_VM_preemptable(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), monitor);
} else {
// template code (for LM_LEGACY):
//
Expand All @@ -953,8 +953,7 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
const Register current_header = R9_ARG7;
const Register tmp = R10_ARG8;

Label count_locking, done;
Label cas_failed, slow_case;
Label count_locking, done, slow_case, cas_failed;

assert_different_registers(header, object_mark_addr, current_header, tmp);

Expand All @@ -969,7 +968,7 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {

if (LockingMode == LM_LIGHTWEIGHT) {
lightweight_lock(monitor, object, header, tmp, slow_case);
b(count_locking);
b(done);
} else if (LockingMode == LM_LEGACY) {
// Load markWord from object into header.
ld(header, oopDesc::mark_offset_in_bytes(), object);
Expand Down Expand Up @@ -1035,12 +1034,15 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
// None of the above fast optimizations worked so we have to get into the
// slow case of monitor enter.
bind(slow_case);
call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), monitor);
b(done);
call_VM_preemptable(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), monitor);
// }
align(32, 12);
bind(count_locking);
inc_held_monitor_count(current_header /*tmp*/);

if (LockingMode == LM_LEGACY) {
b(done);
align(32, 12);
bind(count_locking);
inc_held_monitor_count(current_header /*tmp*/);
}
bind(done);
}
}
Expand Down Expand Up @@ -1137,7 +1139,9 @@ void InterpreterMacroAssembler::unlock_object(Register monitor) {
bind(free_slot);
li(R0, 0);
std(R0, in_bytes(BasicObjectLock::obj_offset()), monitor);
dec_held_monitor_count(current_header /*tmp*/);
if (LockingMode == LM_LEGACY) {
dec_held_monitor_count(current_header /*tmp*/);
}
bind(done);
}
}
Expand Down Expand Up @@ -2133,10 +2137,10 @@ void InterpreterMacroAssembler::check_and_forward_exception(Register Rscratch1,
bind(Ldone);
}

void InterpreterMacroAssembler::call_VM(Register oop_result, address entry_point, bool check_exceptions) {
void InterpreterMacroAssembler::call_VM(Register oop_result, address entry_point, bool check_exceptions, Label* last_java_pc) {
save_interpreter_state(R11_scratch1);

MacroAssembler::call_VM(oop_result, entry_point, false);
MacroAssembler::call_VM(oop_result, entry_point, false /*check_exceptions*/, last_java_pc);

restore_interpreter_state(R11_scratch1, /*bcp_and_mdx_only*/ true);

Expand All @@ -2155,6 +2159,74 @@ void InterpreterMacroAssembler::call_VM(Register oop_result, address entry_point
call_VM(oop_result, entry_point, check_exceptions);
}

void InterpreterMacroAssembler::call_VM_preemptable(Register oop_result, address entry_point,
Register arg_1, bool check_exceptions) {
if (!Continuations::enabled()) {
call_VM(oop_result, entry_point, arg_1, check_exceptions);
return;
}

Label resume_pc, not_preempted;

DEBUG_ONLY(ld(R0, in_bytes(JavaThread::preempt_alternate_return_offset()), R16_thread));
DEBUG_ONLY(cmpdi(CCR0, R0, 0));
asm_assert_eq("Should not have alternate return address set");

// Preserve 2 registers
assert(nonvolatile_accross_vthread_preemtion(R31) && nonvolatile_accross_vthread_preemtion(R22), "");
ld(R3_ARG1, _abi0(callers_sp), R1_SP); // load FP
std(R31, _ijava_state_neg(lresult), R3_ARG1);
std(R22, _ijava_state_neg(fresult), R3_ARG1);

// We set resume_pc as last java pc. It will be saved if the vthread gets preempted.
// Later execution will continue right there.
mr_if_needed(R4_ARG2, arg_1);
push_cont_fastpath();
call_VM(oop_result, entry_point, false /*check_exceptions*/, &resume_pc /* last_java_pc */);
pop_cont_fastpath();

// Jump to handler if the call was preempted
ld(R0, in_bytes(JavaThread::preempt_alternate_return_offset()), R16_thread);
cmpdi(CCR0, R0, 0);
beq(CCR0, not_preempted);
mtlr(R0);
li(R0, 0);
std(R0, in_bytes(JavaThread::preempt_alternate_return_offset()), R16_thread);
blr();

bind(resume_pc); // Location to resume execution
restore_after_resume(noreg /* fp */);
bind(not_preempted);
}

void InterpreterMacroAssembler::restore_after_resume(Register fp) {
if (!Continuations::enabled()) return;

const address resume_adapter = TemplateInterpreter::cont_resume_interpreter_adapter();
add_const_optimized(R31, R29_TOC, MacroAssembler::offset_to_global_toc(resume_adapter));
mtctr(R31);
bctrl();
// Restore registers that are preserved across vthread preemption
assert(nonvolatile_accross_vthread_preemtion(R31) && nonvolatile_accross_vthread_preemtion(R22), "");
ld(R3_ARG1, _abi0(callers_sp), R1_SP); // load FP
ld(R31, _ijava_state_neg(lresult), R3_ARG1);
ld(R22, _ijava_state_neg(fresult), R3_ARG1);
#ifdef ASSERT
// Assert FP is in R11_scratch1 (see generate_cont_resume_interpreter_adapter())
{
Label ok;
ld(R12_scratch2, 0, R1_SP); // load fp
cmpd(CCR0, R12_scratch2, R11_scratch1);
beq(CCR0, ok);
stop(FILE_AND_LINE ": FP is expected in R11_scratch1");
bind(ok);
}
#endif
if (fp != noreg && fp != R11_scratch1) {
mr(fp, R11_scratch1);
}
}

void InterpreterMacroAssembler::call_VM(Register oop_result, address entry_point,
Register arg_1, Register arg_2,
bool check_exceptions) {
Expand Down
Loading

0 comments on commit 1244994

Please sign in to comment.