Skip to content

Commit c5bb023

Browse files
committedJul 7, 2020
8232782: Shenandoah: streamline post-LRB CAS barrier (aarch64)
Reviewed-by: rkennke
1 parent 485194c commit c5bb023

4 files changed

+184
-49
lines changed
 

‎src/hotspot/cpu/aarch64/gc/shenandoah/c1/shenandoahBarrierSetC1_aarch64.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ void LIR_OpShenandoahCompareAndSwap::emit_code(LIR_Assembler* masm) {
4848
newval = tmp2;
4949
}
5050

51-
ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm->masm(), addr, cmpval, newval, /*acquire*/ false, /*release*/ true, /*weak*/ false, /*is_cae*/ false, result);
51+
ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm->masm(), addr, cmpval, newval, /*acquire*/ false, /*release*/ true, /*is_cae*/ false, result);
5252
}
5353

5454
#undef __

‎src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp

+166-35
Original file line numberDiff line numberDiff line change
@@ -449,9 +449,64 @@ void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler
449449
__ bind(done);
450450
}
451451

452-
453-
void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm, Register addr, Register expected, Register new_val,
454-
bool acquire, bool release, bool weak, bool is_cae,
452+
// Special Shenandoah CAS implementation that handles false negatives due
453+
// to concurrent evacuation. The service is more complex than a
454+
// traditional CAS operation because the CAS operation is intended to
455+
// succeed if the reference at addr exactly matches expected or if the
456+
// reference at addr holds a pointer to a from-space object that has
457+
// been relocated to the location named by expected. There are two
458+
// races that must be addressed:
459+
// a) A parallel thread may mutate the contents of addr so that it points
460+
// to a different object. In this case, the CAS operation should fail.
461+
// b) A parallel thread may heal the contents of addr, replacing a
462+
// from-space pointer held in addr with the to-space pointer
463+
// representing the new location of the object.
464+
// Upon entry to cmpxchg_oop, it is assured that new_val equals NULL
465+
// or it refers to an object that is not being evacuated out of
466+
// from-space, or it refers to the to-space version of an object that
467+
// is being evacuated out of from-space.
468+
//
469+
// By default, this operation implements sequential consistency and the
470+
// value held in the result register following execution of the
471+
// generated code sequence is 0 to indicate failure of CAS, non-zero
472+
// to indicate success. Arguments support variations on this theme:
473+
//
474+
// acquire: Allow relaxation of the memory ordering on CAS from
475+
// sequential consistency. This can be useful when
476+
// sequential consistency is not required, such as when
477+
// another sequentially consistent operation is already
478+
// present in the execution stream. If acquire, successful
479+
// execution has the side effect of assuring that memory
480+
// values updated by other threads and "released" will be
481+
// visible to any read operations perfomed by this thread
482+
// which follow this operation in program order. This is a
483+
// special optimization that should not be enabled by default.
484+
// release: Allow relaxation of the memory ordering on CAS from
485+
// sequential consistency. This can be useful when
486+
// sequential consistency is not required, such as when
487+
// another sequentially consistent operation is already
488+
// present in the execution stream. If release, successful
489+
// completion of this operation has the side effect of
490+
// assuring that all writes to memory performed by this
491+
// thread that precede this operation in program order are
492+
// visible to all other threads that subsequently "acquire"
493+
// before reading the respective memory values. This is a
494+
// special optimization that should not be enabled by default.
495+
// is_cae: This turns CAS (compare and swap) into CAE (compare and
496+
// exchange). This HotSpot convention is that CAE makes
497+
// available to the caller the "failure witness", which is
498+
// the value that was stored in memory which did not match
499+
// the expected value. If is_cae, the result is the value
500+
// most recently fetched from addr rather than a boolean
501+
// success indicator.
502+
//
503+
// Clobbers rscratch1, rscratch2
504+
void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm,
505+
Register addr,
506+
Register expected,
507+
Register new_val,
508+
bool acquire, bool release,
509+
bool is_cae,
455510
Register result) {
456511
Register tmp1 = rscratch1;
457512
Register tmp2 = rscratch2;
@@ -460,48 +515,124 @@ void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm, Register a
460515

461516
assert_different_registers(addr, expected, new_val, tmp1, tmp2);
462517

463-
Label retry, done, fail;
518+
Label step4, done;
464519

465-
// CAS, using LL/SC pair.
466-
__ bind(retry);
467-
__ load_exclusive(tmp1, addr, size, acquire);
468-
if (is_narrow) {
469-
__ cmpw(tmp1, expected);
470-
} else {
471-
__ cmp(tmp1, expected);
472-
}
473-
__ br(Assembler::NE, fail);
474-
__ store_exclusive(tmp2, new_val, addr, size, release);
475-
if (weak) {
476-
__ cmpw(tmp2, 0u); // If the store fails, return NE to our caller
477-
} else {
478-
__ cbnzw(tmp2, retry);
479-
}
480-
__ b(done);
520+
// There are two ways to reach this label. Initial entry into the
521+
// cmpxchg_oop code expansion starts at step1 (which is equivalent
522+
// to label step4). Additionally, in the rare case that four steps
523+
// are required to perform the requested operation, the fourth step
524+
// is the same as the first. On a second pass through step 1,
525+
// control may flow through step 2 on its way to failure. It will
526+
// not flow from step 2 to step 3 since we are assured that the
527+
// memory at addr no longer holds a from-space pointer.
528+
//
529+
// The comments that immediately follow the step4 label apply only
530+
// to the case in which control reaches this label by branch from
531+
// step 3.
532+
533+
__ bind (step4);
534+
535+
// Step 4. CAS has failed because the value most recently fetched
536+
// from addr (which is now held in tmp1) is no longer the from-space
537+
// pointer held in tmp2. If a different thread replaced the
538+
// in-memory value with its equivalent to-space pointer, then CAS
539+
// may still be able to succeed. The value held in the expected
540+
// register has not changed.
541+
//
542+
// It is extremely rare we reach this point. For this reason, the
543+
// implementation opts for smaller rather than potentially faster
544+
// code. Ultimately, smaller code for this rare case most likely
545+
// delivers higher overall throughput by enabling improved icache
546+
// performance.
547+
548+
// Step 1. Fast-path.
549+
//
550+
// Try to CAS with given arguments. If successful, then we are done.
551+
//
552+
// No label required for step 1.
553+
554+
__ cmpxchg(addr, expected, new_val, size, acquire, release, false, tmp2);
555+
// EQ flag set iff success. tmp2 holds value fetched.
556+
557+
// If expected equals null but tmp2 does not equal null, the
558+
// following branches to done to report failure of CAS. If both
559+
// expected and tmp2 equal null, the following branches to done to
560+
// report success of CAS. There's no need for a special test of
561+
// expected equal to null.
562+
563+
__ br(Assembler::EQ, done);
564+
// if CAS failed, fall through to step 2
565+
566+
// Step 2. CAS has failed because the value held at addr does not
567+
// match expected. This may be a false negative because the value fetched
568+
// from addr (now held in tmp2) may be a from-space pointer to the
569+
// original copy of same object referenced by to-space pointer expected.
570+
//
571+
// To resolve this, it suffices to find the forward pointer associated
572+
// with fetched value. If this matches expected, retry CAS with new
573+
// parameters. If this mismatches, then we have a legitimate
574+
// failure, and we're done.
575+
//
576+
// No need for step2 label.
577+
578+
// overwrite tmp1 with from-space pointer fetched from memory
579+
__ mov(tmp1, tmp2);
481580

482-
__ bind(fail);
483-
// Check if rb(expected)==rb(tmp1)
484-
// Shuffle registers so that we have memory value ready for next expected.
485-
__ mov(tmp2, expected);
486-
__ mov(expected, tmp1);
487581
if (is_narrow) {
582+
// Decode tmp1 in order to resolve its forward pointer
488583
__ decode_heap_oop(tmp1, tmp1);
489-
__ decode_heap_oop(tmp2, tmp2);
490584
}
491585
resolve_forward_pointer(masm, tmp1);
492-
resolve_forward_pointer(masm, tmp2);
493-
__ cmp(tmp1, tmp2);
494-
// Retry with expected now being the value we just loaded from addr.
495-
__ br(Assembler::EQ, retry);
496-
if (is_cae && is_narrow) {
497-
// For cmp-and-exchange and narrow oops, we need to restore
498-
// the compressed old-value. We moved it to 'expected' a few lines up.
499-
__ mov(tmp1, expected);
586+
// Encode tmp1 to compare against expected.
587+
__ encode_heap_oop(tmp1, tmp1);
588+
589+
// Does forwarded value of fetched from-space pointer match original
590+
// value of expected? If tmp1 holds null, this comparison will fail
591+
// because we know from step1 that expected is not null. There is
592+
// no need for a separate test for tmp1 (the value originally held
593+
// in memory) equal to null.
594+
__ cmp(tmp1, expected);
595+
596+
// If not, then the failure was legitimate and we're done.
597+
// Branching to done with NE condition denotes failure.
598+
__ br(Assembler::NE, done);
599+
600+
// Fall through to step 3. No need for step3 label.
601+
602+
// Step 3. We've confirmed that the value originally held in memory
603+
// (now held in tmp2) pointed to from-space version of original
604+
// expected value. Try the CAS again with the from-space expected
605+
// value. If it now succeeds, we're good.
606+
//
607+
// Note: tmp2 holds encoded from-space pointer that matches to-space
608+
// object residing at expected. tmp2 is the new "expected".
609+
610+
// Note that macro implementation of __cmpxchg cannot use same register
611+
// tmp2 for result and expected since it overwrites result before it
612+
// compares result with expected.
613+
__ cmpxchg(addr, tmp2, new_val, size, acquire, release, false, tmp1);
614+
// EQ flag set iff success. tmp2 holds value fetched.
615+
616+
// If fetched value did not equal the new expected, this could
617+
// still be a false negative because some other thread may have
618+
// newly overwritten the memory value with its to-space equivalent.
619+
__ br(Assembler::NE, step4);
620+
621+
if (is_cae) {
622+
// We're falling through to done to indicate success. Success
623+
// with is_cae is denoted by returning the value of expected as
624+
// result.
625+
__ mov(tmp2, expected);
500626
}
627+
501628
__ bind(done);
629+
// At entry to done, the Z (EQ) flag is on iff if the CAS
630+
// operation was successful. Additionally, if is_cae, tmp2 holds
631+
// the value most recently fetched from addr. In this case, success
632+
// is denoted by tmp2 matching expected.
502633

503634
if (is_cae) {
504-
__ mov(result, tmp1);
635+
__ mov(result, tmp2);
505636
} else {
506637
__ cset(result, Assembler::EQ);
507638
}

‎src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ class ShenandoahBarrierSetAssembler: public BarrierSetAssembler {
8484
virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
8585
Register obj, Register tmp, Label& slowpath);
8686
void cmpxchg_oop(MacroAssembler* masm, Register addr, Register expected, Register new_val,
87-
bool acquire, bool release, bool weak, bool is_cae, Register result);
87+
bool acquire, bool release, bool is_cae, Register result);
8888

8989
virtual void barrier_stubs_init();
9090
};

‎src/hotspot/cpu/aarch64/gc/shenandoah/shenandoah_aarch64.ad

+16-12
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ encode %{
3333
Register tmp = $tmp$$Register;
3434
__ mov(tmp, $oldval$$Register); // Must not clobber oldval.
3535
ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
36-
/*acquire*/ false, /*release*/ true, /*weak*/ false, /*is_cae*/ false, $res$$Register);
36+
/*acquire*/ false, /*release*/ true, /*is_cae*/ false, $res$$Register);
3737
%}
3838

3939
enc_class aarch64_enc_cmpxchg_acq_oop_shenandoah(memory mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, iRegINoSp res) %{
@@ -42,7 +42,7 @@ encode %{
4242
Register tmp = $tmp$$Register;
4343
__ mov(tmp, $oldval$$Register); // Must not clobber oldval.
4444
ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
45-
/*acquire*/ true, /*release*/ true, /*weak*/ false, /*is_cae*/ false, $res$$Register);
45+
/*acquire*/ true, /*release*/ true, /*is_cae*/ false, $res$$Register);
4646
%}
4747
%}
4848

@@ -76,7 +76,7 @@ instruct compareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, i
7676
ins_encode %{
7777
Register tmp = $tmp$$Register;
7878
__ mov(tmp, $oldval$$Register); // Must not clobber oldval.
79-
ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, /*acquire*/ false, /*release*/ true, /*weak*/ false, /*is_cae*/ false, $res$$Register);
79+
ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, /*acquire*/ false, /*release*/ true, /*is_cae*/ false, $res$$Register);
8080
%}
8181

8282
ins_pipe(pipe_slow);
@@ -114,7 +114,7 @@ instruct compareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval
114114
ins_encode %{
115115
Register tmp = $tmp$$Register;
116116
__ mov(tmp, $oldval$$Register); // Must not clobber oldval.
117-
ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, /*acquire*/ true, /*release*/ true, /*weak*/ false, /*is_cae*/ false, $res$$Register);
117+
ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, /*acquire*/ true, /*release*/ true, /*is_cae*/ false, $res$$Register);
118118
%}
119119

120120
ins_pipe(pipe_slow);
@@ -131,7 +131,7 @@ instruct compareAndExchangeN_shenandoah(iRegNNoSp res, indirect mem, iRegN oldva
131131
Register tmp = $tmp$$Register;
132132
__ mov(tmp, $oldval$$Register); // Must not clobber oldval.
133133
ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
134-
/*acquire*/ false, /*release*/ true, /*weak*/ false, /*is_cae*/ true, $res$$Register);
134+
/*acquire*/ false, /*release*/ true, /*is_cae*/ true, $res$$Register);
135135
%}
136136
ins_pipe(pipe_slow);
137137
%}
@@ -147,7 +147,7 @@ instruct compareAndExchangeP_shenandoah(iRegPNoSp res, indirect mem, iRegP oldva
147147
Register tmp = $tmp$$Register;
148148
__ mov(tmp, $oldval$$Register); // Must not clobber oldval.
149149
ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
150-
/*acquire*/ false, /*release*/ true, /*weak*/ false, /*is_cae*/ true, $res$$Register);
150+
/*acquire*/ false, /*release*/ true, /*is_cae*/ true, $res$$Register);
151151
%}
152152
ins_pipe(pipe_slow);
153153
%}
@@ -164,7 +164,7 @@ instruct compareAndExchangeNAcq_shenandoah(iRegNNoSp res, indirect mem, iRegN ol
164164
Register tmp = $tmp$$Register;
165165
__ mov(tmp, $oldval$$Register); // Must not clobber oldval.
166166
ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
167-
/*acquire*/ true, /*release*/ true, /*weak*/ false, /*is_cae*/ true, $res$$Register);
167+
/*acquire*/ true, /*release*/ true, /*is_cae*/ true, $res$$Register);
168168
%}
169169
ins_pipe(pipe_slow);
170170
%}
@@ -181,7 +181,7 @@ instruct compareAndExchangePAcq_shenandoah(iRegPNoSp res, indirect mem, iRegP ol
181181
Register tmp = $tmp$$Register;
182182
__ mov(tmp, $oldval$$Register); // Must not clobber oldval.
183183
ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
184-
/*acquire*/ true, /*release*/ true, /*weak*/ false, /*is_cae*/ true, $res$$Register);
184+
/*acquire*/ true, /*release*/ true, /*is_cae*/ true, $res$$Register);
185185
%}
186186
ins_pipe(pipe_slow);
187187
%}
@@ -197,8 +197,9 @@ instruct weakCompareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldva
197197
ins_encode %{
198198
Register tmp = $tmp$$Register;
199199
__ mov(tmp, $oldval$$Register); // Must not clobber oldval.
200+
// Weak is not currently supported by ShenandoahBarrierSet::cmpxchg_oop
200201
ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
201-
/*acquire*/ false, /*release*/ true, /*weak*/ true, /*is_cae*/ false, $res$$Register);
202+
/*acquire*/ false, /*release*/ true, /*is_cae*/ false, $res$$Register);
202203
%}
203204
ins_pipe(pipe_slow);
204205
%}
@@ -213,8 +214,9 @@ instruct weakCompareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldva
213214
ins_encode %{
214215
Register tmp = $tmp$$Register;
215216
__ mov(tmp, $oldval$$Register); // Must not clobber oldval.
217+
// Weak is not currently supported by ShenandoahBarrierSet::cmpxchg_oop
216218
ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
217-
/*acquire*/ false, /*release*/ true, /*weak*/ true, /*is_cae*/ false, $res$$Register);
219+
/*acquire*/ false, /*release*/ true, /*is_cae*/ false, $res$$Register);
218220
%}
219221
ins_pipe(pipe_slow);
220222
%}
@@ -231,8 +233,9 @@ instruct weakCompareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN ol
231233
ins_encode %{
232234
Register tmp = $tmp$$Register;
233235
__ mov(tmp, $oldval$$Register); // Must not clobber oldval.
236+
// Weak is not currently supported by ShenandoahBarrierSet::cmpxchg_oop
234237
ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
235-
/*acquire*/ true, /*release*/ true, /*weak*/ true, /*is_cae*/ false, $res$$Register);
238+
/*acquire*/ true, /*release*/ true, /*is_cae*/ false, $res$$Register);
236239
%}
237240
ins_pipe(pipe_slow);
238241
%}
@@ -249,8 +252,9 @@ instruct weakCompareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP ol
249252
ins_encode %{
250253
Register tmp = $tmp$$Register;
251254
__ mov(tmp, $oldval$$Register); // Must not clobber oldval.
255+
// Weak is not currently supported by ShenandoahBarrierSet::cmpxchg_oop
252256
ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register,
253-
/*acquire*/ true, /*release*/ true, /*weak*/ true, /*is_cae*/ false, $res$$Register);
257+
/*acquire*/ true, /*release*/ true, /*is_cae*/ false, $res$$Register);
254258
%}
255259
ins_pipe(pipe_slow);
256260
%}

0 commit comments

Comments
 (0)
Please sign in to comment.