@@ -5574,87 +5574,167 @@ class StubGenerator: public StubCodeGenerator {
5574
5574
}
5575
5575
5576
5576
#ifdef LINUX
5577
+
5577
5578
// ARMv8.1 LSE versions of the atomic stubs used by Atomic::PlatformXX.
5578
5579
//
5579
5580
// If LSE is in use, generate LSE versions of all the stubs. The
5580
5581
// non-LSE versions are in atomic_aarch64.S.
5581
- void generate_atomic_entry_points () {
5582
5582
5583
- if (! UseLSE) {
5584
- return ;
5583
+ // class AtomicStubMark records the entry point of a stub and the
5584
+ // stub pointer which will point to it. The stub pointer is set to
5585
+ // the entry point when ~AtomicStubMark() is called, which must be
5586
+ // after ICache::invalidate_range. This ensures safe publication of
5587
+ // the generated code.
5588
+ class AtomicStubMark {
5589
+ address _entry_point;
5590
+ aarch64_atomic_stub_t *_stub;
5591
+ MacroAssembler *_masm;
5592
+ public:
5593
+ AtomicStubMark (MacroAssembler *masm, aarch64_atomic_stub_t *stub) {
5594
+ _masm = masm;
5595
+ __ align (32 );
5596
+ _entry_point = __ pc ();
5597
+ _stub = stub;
5585
5598
}
5599
+ ~AtomicStubMark () {
5600
+ *_stub = (aarch64_atomic_stub_t )_entry_point;
5601
+ }
5602
+ };
5586
5603
5587
- __ align (CodeEntryAlignment);
5588
- StubCodeMark mark (this , " StubRoutines" , " atomic entry points" );
5589
-
5590
- __ align (32 );
5591
- aarch64_atomic_fetch_add_8_impl = (aarch64_atomic_stub_t )__ pc ();
5592
- {
5593
- Register prev = r2, addr = c_rarg0, incr = c_rarg1;
5594
- __ atomic_addal (prev, incr, addr);
5595
- __ mov (r0, prev);
5596
- __ ret (lr);
5604
+ // NB: For memory_order_conservative we need a trailing membar after
5605
+ // LSE atomic operations but not a leading membar.
5606
+ //
5607
+ // We don't need a leading membar because a clause in the Arm ARM
5608
+ // says:
5609
+ //
5610
+ // Barrier-ordered-before
5611
+ //
5612
+ // Barrier instructions order prior Memory effects before subsequent
5613
+ // Memory effects generated by the same Observer. A read or a write
5614
+ // RW1 is Barrier-ordered-before a read or a write RW 2 from the same
5615
+ // Observer if and only if RW1 appears in program order before RW 2
5616
+ // and [ ... ] at least one of RW 1 and RW 2 is generated by an atomic
5617
+ // instruction with both Acquire and Release semantics.
5618
+ //
5619
+ // All the atomic instructions {ldaddal, swapal, casal} have Acquire
5620
+ // and Release semantics, therefore we don't need a leading
5621
+ // barrier. However, there is no corresponding Barrier-ordered-after
5622
+ // relationship, therefore we need a trailing membar to prevent a
5623
+ // later store or load from being reordered with the store in an
5624
+ // atomic instruction.
5625
+ //
5626
+ // This was checked by using the herd7 consistency model simulator
5627
+ // (http://diy.inria.fr/) with this test case:
5628
+ //
5629
+ // AArch64 LseCas
5630
+ // { 0:X1=x; 0:X2=y; 1:X1=x; 1:X2=y; }
5631
+ // P0 | P1;
5632
+ // LDR W4, [X2] | MOV W3, #0;
5633
+ // DMB LD | MOV W4, #1;
5634
+ // LDR W3, [X1] | CASAL W3, W4, [X1];
5635
+ // | DMB ISH;
5636
+ // | STR W4, [X2];
5637
+ // exists
5638
+ // (0:X3=0 /\ 0:X4=1)
5639
+ //
5640
+ // If X3 == 0 && X4 == 1, the store to y in P1 has been reordered
5641
+ // with the store to x in P1. Without the DMB in P1 this may happen.
5642
+ //
5643
+ // At the time of writing we don't know of any AArch64 hardware that
5644
+ // reorders stores in this way, but the Reference Manual permits it.
5645
+
5646
+ void gen_cas_entry (Assembler::operand_size size,
5647
+ atomic_memory_order order) {
5648
+ Register prev = r3, ptr = c_rarg0, compare_val = c_rarg1,
5649
+ exchange_val = c_rarg2;
5650
+ bool acquire, release;
5651
+ switch (order) {
5652
+ case memory_order_relaxed:
5653
+ acquire = false ;
5654
+ release = false ;
5655
+ break ;
5656
+ default :
5657
+ acquire = true ;
5658
+ release = true ;
5659
+ break ;
5597
5660
}
5598
- __ align (32 );
5599
- aarch64_atomic_fetch_add_4_impl = (aarch64_atomic_stub_t )__ pc ();
5600
- {
5601
- Register prev = r2, addr = c_rarg0, incr = c_rarg1;
5602
- __ atomic_addalw (prev, incr, addr);
5603
- __ movw (r0, prev);
5604
- __ ret (lr);
5661
+ __ mov (prev, compare_val);
5662
+ __ lse_cas (prev, exchange_val, ptr, size, acquire, release, /* not_pair*/ true );
5663
+ if (order == memory_order_conservative) {
5664
+ __ membar (Assembler::StoreStore|Assembler::StoreLoad);
5605
5665
}
5606
- __ align (32 );
5607
- aarch64_atomic_xchg_4_impl = (aarch64_atomic_stub_t )__ pc ();
5608
- {
5609
- Register prev = r2, addr = c_rarg0, newv = c_rarg1;
5610
- __ atomic_xchglw (prev, newv, addr);
5666
+ if (size == Assembler::xword) {
5667
+ __ mov (r0, prev);
5668
+ } else {
5611
5669
__ movw (r0, prev);
5612
- __ ret (lr);
5613
5670
}
5614
- __ align (32 );
5615
- aarch64_atomic_xchg_8_impl = (aarch64_atomic_stub_t )__ pc ();
5616
- {
5617
- Register prev = r2, addr = c_rarg0, newv = c_rarg1;
5618
- __ atomic_xchgl (prev, newv, addr);
5671
+ __ ret (lr);
5672
+ }
5673
+
5674
+ void gen_ldaddal_entry (Assembler::operand_size size) {
5675
+ Register prev = r2, addr = c_rarg0, incr = c_rarg1;
5676
+ __ ldaddal (size, incr, prev, addr);
5677
+ __ membar (Assembler::StoreStore|Assembler::StoreLoad);
5678
+ if (size == Assembler::xword) {
5619
5679
__ mov (r0, prev);
5620
- __ ret (lr);
5621
- }
5622
- __ align (32 );
5623
- aarch64_atomic_cmpxchg_1_impl = (aarch64_atomic_stub_t )__ pc ();
5624
- {
5625
- Register prev = r3, ptr = c_rarg0, compare_val = c_rarg1,
5626
- exchange_val = c_rarg2;
5627
- __ cmpxchg (ptr, compare_val, exchange_val,
5628
- MacroAssembler::byte,
5629
- /* acquire*/ false , /* release*/ false , /* weak*/ false ,
5630
- prev);
5680
+ } else {
5631
5681
__ movw (r0, prev);
5632
- __ ret (lr);
5633
5682
}
5634
- __ align (32 );
5635
- aarch64_atomic_cmpxchg_4_impl = (aarch64_atomic_stub_t )__ pc ();
5636
- {
5637
- Register prev = r3, ptr = c_rarg0, compare_val = c_rarg1,
5638
- exchange_val = c_rarg2;
5639
- __ cmpxchg (ptr, compare_val, exchange_val,
5640
- MacroAssembler::word,
5641
- /* acquire*/ false , /* release*/ false , /* weak*/ false ,
5642
- prev);
5683
+ __ ret (lr);
5684
+ }
5685
+
5686
+ void gen_swpal_entry (Assembler::operand_size size) {
5687
+ Register prev = r2, addr = c_rarg0, incr = c_rarg1;
5688
+ __ swpal (size, incr, prev, addr);
5689
+ __ membar (Assembler::StoreStore|Assembler::StoreLoad);
5690
+ if (size == Assembler::xword) {
5691
+ __ mov (r0, prev);
5692
+ } else {
5643
5693
__ movw (r0, prev);
5644
- __ ret (lr);
5645
5694
}
5646
- __ align (32 );
5647
- aarch64_atomic_cmpxchg_8_impl = (aarch64_atomic_stub_t )__ pc ();
5648
- {
5649
- Register prev = r3, ptr = c_rarg0, compare_val = c_rarg1,
5650
- exchange_val = c_rarg2;
5651
- __ cmpxchg (ptr, compare_val, exchange_val,
5652
- MacroAssembler::xword,
5653
- /* acquire*/ false , /* release*/ false , /* weak*/ false ,
5654
- prev);
5655
- __ mov (r0, prev);
5656
- __ ret (lr);
5695
+ __ ret (lr);
5696
+ }
5697
+
5698
+ void generate_atomic_entry_points () {
5699
+ if (! UseLSE) {
5700
+ return ;
5657
5701
}
5702
+
5703
+ __ align (CodeEntryAlignment);
5704
+ StubCodeMark mark (this , " StubRoutines" , " atomic entry points" );
5705
+ address first_entry = __ pc ();
5706
+
5707
+ // All memory_order_conservative
5708
+ AtomicStubMark mark_fetch_add_4 (_masm, &aarch64_atomic_fetch_add_4_impl);
5709
+ gen_ldaddal_entry (Assembler::word);
5710
+ AtomicStubMark mark_fetch_add_8 (_masm, &aarch64_atomic_fetch_add_8_impl);
5711
+ gen_ldaddal_entry (Assembler::xword);
5712
+
5713
+ AtomicStubMark mark_xchg_4 (_masm, &aarch64_atomic_xchg_4_impl);
5714
+ gen_swpal_entry (Assembler::word);
5715
+ AtomicStubMark mark_xchg_8_impl (_masm, &aarch64_atomic_xchg_8_impl);
5716
+ gen_swpal_entry (Assembler::xword);
5717
+
5718
+ // CAS, memory_order_conservative
5719
+ AtomicStubMark mark_cmpxchg_1 (_masm, &aarch64_atomic_cmpxchg_1_impl);
5720
+ gen_cas_entry (MacroAssembler::byte, memory_order_conservative);
5721
+ AtomicStubMark mark_cmpxchg_4 (_masm, &aarch64_atomic_cmpxchg_4_impl);
5722
+ gen_cas_entry (MacroAssembler::word, memory_order_conservative);
5723
+ AtomicStubMark mark_cmpxchg_8 (_masm, &aarch64_atomic_cmpxchg_8_impl);
5724
+ gen_cas_entry (MacroAssembler::xword, memory_order_conservative);
5725
+
5726
+ // CAS, memory_order_relaxed
5727
+ AtomicStubMark mark_cmpxchg_1_relaxed
5728
+ (_masm, &aarch64_atomic_cmpxchg_1_relaxed_impl);
5729
+ gen_cas_entry (MacroAssembler::byte, memory_order_relaxed);
5730
+ AtomicStubMark mark_cmpxchg_4_relaxed
5731
+ (_masm, &aarch64_atomic_cmpxchg_4_relaxed_impl);
5732
+ gen_cas_entry (MacroAssembler::word, memory_order_relaxed);
5733
+ AtomicStubMark mark_cmpxchg_8_relaxed
5734
+ (_masm, &aarch64_atomic_cmpxchg_8_relaxed_impl);
5735
+ gen_cas_entry (MacroAssembler::xword, memory_order_relaxed);
5736
+
5737
+ ICache::invalidate_range (first_entry, __ pc () - first_entry);
5658
5738
}
5659
5739
#endif // LINUX
5660
5740
@@ -6772,9 +6852,7 @@ class StubGenerator: public StubCodeGenerator {
6772
6852
6773
6853
#ifdef LINUX
6774
6854
6775
- #if 0 // JDK-8261660: disabled for now.
6776
6855
generate_atomic_entry_points ();
6777
- #endif
6778
6856
6779
6857
#endif // LINUX
6780
6858
@@ -6805,19 +6883,22 @@ void StubGenerator_generate(CodeBuffer* code, bool all) {
6805
6883
// Define pointers to atomic stubs and initialize them to point to the
6806
6884
// code in atomic_aarch64.S.
6807
6885
6808
- #define DEFAULT_ATOMIC_OP (OPNAME, SIZE ) \
6809
- extern " C" uint64_t aarch64_atomic_ ## OPNAME ## _ ## SIZE ## _default_impl \
6886
+ #define DEFAULT_ATOMIC_OP (OPNAME, SIZE, RELAXED ) \
6887
+ extern " C" uint64_t aarch64_atomic_ ## OPNAME ## _ ## SIZE ## RELAXED ## _default_impl \
6810
6888
(volatile void *ptr, uint64_t arg1, uint64_t arg2); \
6811
- aarch64_atomic_stub_t aarch64_atomic_ ## OPNAME ## _ ## SIZE ## _impl \
6812
- = aarch64_atomic_ ## OPNAME ## _ ## SIZE ## _default_impl;
6813
-
6814
- DEFAULT_ATOMIC_OP (fetch_add, 4 )
6815
- DEFAULT_ATOMIC_OP(fetch_add, 8 )
6816
- DEFAULT_ATOMIC_OP(xchg, 4 )
6817
- DEFAULT_ATOMIC_OP(xchg, 8 )
6818
- DEFAULT_ATOMIC_OP(cmpxchg, 1 )
6819
- DEFAULT_ATOMIC_OP(cmpxchg, 4 )
6820
- DEFAULT_ATOMIC_OP(cmpxchg, 8 )
6889
+ aarch64_atomic_stub_t aarch64_atomic_ ## OPNAME ## _ ## SIZE ## RELAXED ## _impl \
6890
+ = aarch64_atomic_ ## OPNAME ## _ ## SIZE ## RELAXED ## _default_impl;
6891
+
6892
+ DEFAULT_ATOMIC_OP (fetch_add, 4 , )
6893
+ DEFAULT_ATOMIC_OP(fetch_add, 8 , )
6894
+ DEFAULT_ATOMIC_OP(xchg, 4 , )
6895
+ DEFAULT_ATOMIC_OP(xchg, 8 , )
6896
+ DEFAULT_ATOMIC_OP(cmpxchg, 1 , )
6897
+ DEFAULT_ATOMIC_OP(cmpxchg, 4 , )
6898
+ DEFAULT_ATOMIC_OP(cmpxchg, 8 , )
6899
+ DEFAULT_ATOMIC_OP(cmpxchg, 1 , _relaxed)
6900
+ DEFAULT_ATOMIC_OP(cmpxchg, 4 , _relaxed)
6901
+ DEFAULT_ATOMIC_OP(cmpxchg, 8 , _relaxed)
6821
6902
6822
6903
#undef DEFAULT_ATOMIC_OP
6823
6904
0 commit comments