@@ -449,9 +449,64 @@ void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler
449
449
__ bind (done);
450
450
}
451
451
452
-
453
- void ShenandoahBarrierSetAssembler::cmpxchg_oop (MacroAssembler* masm, Register addr, Register expected, Register new_val,
454
- bool acquire, bool release, bool weak, bool is_cae,
452
+ // Special Shenandoah CAS implementation that handles false negatives due
453
+ // to concurrent evacuation. The service is more complex than a
454
+ // traditional CAS operation because the CAS operation is intended to
455
+ // succeed if the reference at addr exactly matches expected or if the
456
+ // reference at addr holds a pointer to a from-space object that has
457
+ // been relocated to the location named by expected. There are two
458
+ // races that must be addressed:
459
+ // a) A parallel thread may mutate the contents of addr so that it points
460
+ // to a different object. In this case, the CAS operation should fail.
461
+ // b) A parallel thread may heal the contents of addr, replacing a
462
+ // from-space pointer held in addr with the to-space pointer
463
+ // representing the new location of the object.
464
+ // Upon entry to cmpxchg_oop, it is assured that new_val equals NULL
465
+ // or it refers to an object that is not being evacuated out of
466
+ // from-space, or it refers to the to-space version of an object that
467
+ // is being evacuated out of from-space.
468
+ //
469
+ // By default, this operation implements sequential consistency and the
470
+ // value held in the result register following execution of the
471
+ // generated code sequence is 0 to indicate failure of CAS, non-zero
472
+ // to indicate success. Arguments support variations on this theme:
473
+ //
474
+ // acquire: Allow relaxation of the memory ordering on CAS from
475
+ // sequential consistency. This can be useful when
476
+ // sequential consistency is not required, such as when
477
+ // another sequentially consistent operation is already
478
+ // present in the execution stream. If acquire, successful
479
+ // execution has the side effect of assuring that memory
480
+ // values updated by other threads and "released" will be
481
+ // visible to any read operations perfomed by this thread
482
+ // which follow this operation in program order. This is a
483
+ // special optimization that should not be enabled by default.
484
+ // release: Allow relaxation of the memory ordering on CAS from
485
+ // sequential consistency. This can be useful when
486
+ // sequential consistency is not required, such as when
487
+ // another sequentially consistent operation is already
488
+ // present in the execution stream. If release, successful
489
+ // completion of this operation has the side effect of
490
+ // assuring that all writes to memory performed by this
491
+ // thread that precede this operation in program order are
492
+ // visible to all other threads that subsequently "acquire"
493
+ // before reading the respective memory values. This is a
494
+ // special optimization that should not be enabled by default.
495
+ // is_cae: This turns CAS (compare and swap) into CAE (compare and
496
+ // exchange). This HotSpot convention is that CAE makes
497
+ // available to the caller the "failure witness", which is
498
+ // the value that was stored in memory which did not match
499
+ // the expected value. If is_cae, the result is the value
500
+ // most recently fetched from addr rather than a boolean
501
+ // success indicator.
502
+ //
503
+ // Clobbers rscratch1, rscratch2
504
+ void ShenandoahBarrierSetAssembler::cmpxchg_oop (MacroAssembler* masm,
505
+ Register addr,
506
+ Register expected,
507
+ Register new_val,
508
+ bool acquire, bool release,
509
+ bool is_cae,
455
510
Register result) {
456
511
Register tmp1 = rscratch1;
457
512
Register tmp2 = rscratch2;
@@ -460,48 +515,124 @@ void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm, Register a
460
515
461
516
assert_different_registers (addr, expected, new_val, tmp1, tmp2);
462
517
463
- Label retry , done, fail ;
518
+ Label step4 , done;
464
519
465
- // CAS, using LL/SC pair.
466
- __ bind (retry);
467
- __ load_exclusive (tmp1, addr, size, acquire);
468
- if (is_narrow) {
469
- __ cmpw (tmp1, expected);
470
- } else {
471
- __ cmp (tmp1, expected);
472
- }
473
- __ br (Assembler::NE, fail);
474
- __ store_exclusive (tmp2, new_val, addr, size, release);
475
- if (weak) {
476
- __ cmpw (tmp2, 0u ); // If the store fails, return NE to our caller
477
- } else {
478
- __ cbnzw (tmp2, retry);
479
- }
480
- __ b (done);
520
+ // There are two ways to reach this label. Initial entry into the
521
+ // cmpxchg_oop code expansion starts at step1 (which is equivalent
522
+ // to label step4). Additionally, in the rare case that four steps
523
+ // are required to perform the requested operation, the fourth step
524
+ // is the same as the first. On a second pass through step 1,
525
+ // control may flow through step 2 on its way to failure. It will
526
+ // not flow from step 2 to step 3 since we are assured that the
527
+ // memory at addr no longer holds a from-space pointer.
528
+ //
529
+ // The comments that immediately follow the step4 label apply only
530
+ // to the case in which control reaches this label by branch from
531
+ // step 3.
532
+
533
+ __ bind (step4);
534
+
535
+ // Step 4. CAS has failed because the value most recently fetched
536
+ // from addr (which is now held in tmp1) is no longer the from-space
537
+ // pointer held in tmp2. If a different thread replaced the
538
+ // in-memory value with its equivalent to-space pointer, then CAS
539
+ // may still be able to succeed. The value held in the expected
540
+ // register has not changed.
541
+ //
542
+ // It is extremely rare we reach this point. For this reason, the
543
+ // implementation opts for smaller rather than potentially faster
544
+ // code. Ultimately, smaller code for this rare case most likely
545
+ // delivers higher overall throughput by enabling improved icache
546
+ // performance.
547
+
548
+ // Step 1. Fast-path.
549
+ //
550
+ // Try to CAS with given arguments. If successful, then we are done.
551
+ //
552
+ // No label required for step 1.
553
+
554
+ __ cmpxchg (addr, expected, new_val, size, acquire, release, false , tmp2);
555
+ // EQ flag set iff success. tmp2 holds value fetched.
556
+
557
+ // If expected equals null but tmp2 does not equal null, the
558
+ // following branches to done to report failure of CAS. If both
559
+ // expected and tmp2 equal null, the following branches to done to
560
+ // report success of CAS. There's no need for a special test of
561
+ // expected equal to null.
562
+
563
+ __ br (Assembler::EQ, done);
564
+ // if CAS failed, fall through to step 2
565
+
566
+ // Step 2. CAS has failed because the value held at addr does not
567
+ // match expected. This may be a false negative because the value fetched
568
+ // from addr (now held in tmp2) may be a from-space pointer to the
569
+ // original copy of same object referenced by to-space pointer expected.
570
+ //
571
+ // To resolve this, it suffices to find the forward pointer associated
572
+ // with fetched value. If this matches expected, retry CAS with new
573
+ // parameters. If this mismatches, then we have a legitimate
574
+ // failure, and we're done.
575
+ //
576
+ // No need for step2 label.
577
+
578
+ // overwrite tmp1 with from-space pointer fetched from memory
579
+ __ mov (tmp1, tmp2);
481
580
482
- __ bind (fail);
483
- // Check if rb(expected)==rb(tmp1)
484
- // Shuffle registers so that we have memory value ready for next expected.
485
- __ mov (tmp2, expected);
486
- __ mov (expected, tmp1);
487
581
if (is_narrow) {
582
+ // Decode tmp1 in order to resolve its forward pointer
488
583
__ decode_heap_oop (tmp1, tmp1);
489
- __ decode_heap_oop (tmp2, tmp2);
490
584
}
491
585
resolve_forward_pointer (masm, tmp1);
492
- resolve_forward_pointer (masm, tmp2);
493
- __ cmp (tmp1, tmp2);
494
- // Retry with expected now being the value we just loaded from addr.
495
- __ br (Assembler::EQ, retry);
496
- if (is_cae && is_narrow) {
497
- // For cmp-and-exchange and narrow oops, we need to restore
498
- // the compressed old-value. We moved it to 'expected' a few lines up.
499
- __ mov (tmp1, expected);
586
+ // Encode tmp1 to compare against expected.
587
+ __ encode_heap_oop (tmp1, tmp1);
588
+
589
+ // Does forwarded value of fetched from-space pointer match original
590
+ // value of expected? If tmp1 holds null, this comparison will fail
591
+ // because we know from step1 that expected is not null. There is
592
+ // no need for a separate test for tmp1 (the value originally held
593
+ // in memory) equal to null.
594
+ __ cmp (tmp1, expected);
595
+
596
+ // If not, then the failure was legitimate and we're done.
597
+ // Branching to done with NE condition denotes failure.
598
+ __ br (Assembler::NE, done);
599
+
600
+ // Fall through to step 3. No need for step3 label.
601
+
602
+ // Step 3. We've confirmed that the value originally held in memory
603
+ // (now held in tmp2) pointed to from-space version of original
604
+ // expected value. Try the CAS again with the from-space expected
605
+ // value. If it now succeeds, we're good.
606
+ //
607
+ // Note: tmp2 holds encoded from-space pointer that matches to-space
608
+ // object residing at expected. tmp2 is the new "expected".
609
+
610
+ // Note that macro implementation of __cmpxchg cannot use same register
611
+ // tmp2 for result and expected since it overwrites result before it
612
+ // compares result with expected.
613
+ __ cmpxchg (addr, tmp2, new_val, size, acquire, release, false , tmp1);
614
+ // EQ flag set iff success. tmp2 holds value fetched.
615
+
616
+ // If fetched value did not equal the new expected, this could
617
+ // still be a false negative because some other thread may have
618
+ // newly overwritten the memory value with its to-space equivalent.
619
+ __ br (Assembler::NE, step4);
620
+
621
+ if (is_cae) {
622
+ // We're falling through to done to indicate success. Success
623
+ // with is_cae is denoted by returning the value of expected as
624
+ // result.
625
+ __ mov (tmp2, expected);
500
626
}
627
+
501
628
__ bind (done);
629
+ // At entry to done, the Z (EQ) flag is on iff if the CAS
630
+ // operation was successful. Additionally, if is_cae, tmp2 holds
631
+ // the value most recently fetched from addr. In this case, success
632
+ // is denoted by tmp2 matching expected.
502
633
503
634
if (is_cae) {
504
- __ mov (result, tmp1 );
635
+ __ mov (result, tmp2 );
505
636
} else {
506
637
__ cset (result, Assembler::EQ);
507
638
}
0 commit comments