Skip to content

Commit 1b0c36b

Browse files
author
Andrew Haley
committedFeb 19, 2021
8261649: AArch64: Optimize LSE atomics in C++ code
Reviewed-by: adinn
1 parent 61820b7 commit 1b0c36b

File tree

4 files changed

+240
-115
lines changed

4 files changed

+240
-115
lines changed
 

‎src/hotspot/cpu/aarch64/atomic_aarch64.hpp

+3
Original file line numberDiff line numberDiff line change
@@ -42,5 +42,8 @@ extern aarch64_atomic_stub_t aarch64_atomic_xchg_8_impl;
4242
extern aarch64_atomic_stub_t aarch64_atomic_cmpxchg_1_impl;
4343
extern aarch64_atomic_stub_t aarch64_atomic_cmpxchg_4_impl;
4444
extern aarch64_atomic_stub_t aarch64_atomic_cmpxchg_8_impl;
45+
extern aarch64_atomic_stub_t aarch64_atomic_cmpxchg_1_relaxed_impl;
46+
extern aarch64_atomic_stub_t aarch64_atomic_cmpxchg_4_relaxed_impl;
47+
extern aarch64_atomic_stub_t aarch64_atomic_cmpxchg_8_relaxed_impl;
4548

4649
#endif // CPU_AARCH64_ATOMIC_AARCH64_HPP

‎src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp

+159-78
Original file line numberDiff line numberDiff line change
@@ -5574,87 +5574,167 @@ class StubGenerator: public StubCodeGenerator {
55745574
}
55755575

55765576
#ifdef LINUX
5577+
55775578
// ARMv8.1 LSE versions of the atomic stubs used by Atomic::PlatformXX.
55785579
//
55795580
// If LSE is in use, generate LSE versions of all the stubs. The
55805581
// non-LSE versions are in atomic_aarch64.S.
5581-
void generate_atomic_entry_points() {
55825582

5583-
if (! UseLSE) {
5584-
return;
5583+
// class AtomicStubMark records the entry point of a stub and the
5584+
// stub pointer which will point to it. The stub pointer is set to
5585+
// the entry point when ~AtomicStubMark() is called, which must be
5586+
// after ICache::invalidate_range. This ensures safe publication of
5587+
// the generated code.
5588+
class AtomicStubMark {
5589+
address _entry_point;
5590+
aarch64_atomic_stub_t *_stub;
5591+
MacroAssembler *_masm;
5592+
public:
5593+
AtomicStubMark(MacroAssembler *masm, aarch64_atomic_stub_t *stub) {
5594+
_masm = masm;
5595+
__ align(32);
5596+
_entry_point = __ pc();
5597+
_stub = stub;
55855598
}
5599+
~AtomicStubMark() {
5600+
*_stub = (aarch64_atomic_stub_t)_entry_point;
5601+
}
5602+
};
55865603

5587-
__ align(CodeEntryAlignment);
5588-
StubCodeMark mark(this, "StubRoutines", "atomic entry points");
5589-
5590-
__ align(32);
5591-
aarch64_atomic_fetch_add_8_impl = (aarch64_atomic_stub_t)__ pc();
5592-
{
5593-
Register prev = r2, addr = c_rarg0, incr = c_rarg1;
5594-
__ atomic_addal(prev, incr, addr);
5595-
__ mov(r0, prev);
5596-
__ ret(lr);
5604+
// NB: For memory_order_conservative we need a trailing membar after
5605+
// LSE atomic operations but not a leading membar.
5606+
//
5607+
// We don't need a leading membar because a clause in the Arm ARM
5608+
// says:
5609+
//
5610+
// Barrier-ordered-before
5611+
//
5612+
// Barrier instructions order prior Memory effects before subsequent
5613+
// Memory effects generated by the same Observer. A read or a write
5614+
// RW1 is Barrier-ordered-before a read or a write RW 2 from the same
5615+
// Observer if and only if RW1 appears in program order before RW 2
5616+
// and [ ... ] at least one of RW 1 and RW 2 is generated by an atomic
5617+
// instruction with both Acquire and Release semantics.
5618+
//
5619+
// All the atomic instructions {ldaddal, swapal, casal} have Acquire
5620+
// and Release semantics, therefore we don't need a leading
5621+
// barrier. However, there is no corresponding Barrier-ordered-after
5622+
// relationship, therefore we need a trailing membar to prevent a
5623+
// later store or load from being reordered with the store in an
5624+
// atomic instruction.
5625+
//
5626+
// This was checked by using the herd7 consistency model simulator
5627+
// (http://diy.inria.fr/) with this test case:
5628+
//
5629+
// AArch64 LseCas
5630+
// { 0:X1=x; 0:X2=y; 1:X1=x; 1:X2=y; }
5631+
// P0 | P1;
5632+
// LDR W4, [X2] | MOV W3, #0;
5633+
// DMB LD | MOV W4, #1;
5634+
// LDR W3, [X1] | CASAL W3, W4, [X1];
5635+
// | DMB ISH;
5636+
// | STR W4, [X2];
5637+
// exists
5638+
// (0:X3=0 /\ 0:X4=1)
5639+
//
5640+
// If X3 == 0 && X4 == 1, the store to y in P1 has been reordered
5641+
// with the store to x in P1. Without the DMB in P1 this may happen.
5642+
//
5643+
// At the time of writing we don't know of any AArch64 hardware that
5644+
// reorders stores in this way, but the Reference Manual permits it.
5645+
5646+
void gen_cas_entry(Assembler::operand_size size,
5647+
atomic_memory_order order) {
5648+
Register prev = r3, ptr = c_rarg0, compare_val = c_rarg1,
5649+
exchange_val = c_rarg2;
5650+
bool acquire, release;
5651+
switch (order) {
5652+
case memory_order_relaxed:
5653+
acquire = false;
5654+
release = false;
5655+
break;
5656+
default:
5657+
acquire = true;
5658+
release = true;
5659+
break;
55975660
}
5598-
__ align(32);
5599-
aarch64_atomic_fetch_add_4_impl = (aarch64_atomic_stub_t)__ pc();
5600-
{
5601-
Register prev = r2, addr = c_rarg0, incr = c_rarg1;
5602-
__ atomic_addalw(prev, incr, addr);
5603-
__ movw(r0, prev);
5604-
__ ret(lr);
5661+
__ mov(prev, compare_val);
5662+
__ lse_cas(prev, exchange_val, ptr, size, acquire, release, /*not_pair*/true);
5663+
if (order == memory_order_conservative) {
5664+
__ membar(Assembler::StoreStore|Assembler::StoreLoad);
56055665
}
5606-
__ align(32);
5607-
aarch64_atomic_xchg_4_impl = (aarch64_atomic_stub_t)__ pc();
5608-
{
5609-
Register prev = r2, addr = c_rarg0, newv = c_rarg1;
5610-
__ atomic_xchglw(prev, newv, addr);
5666+
if (size == Assembler::xword) {
5667+
__ mov(r0, prev);
5668+
} else {
56115669
__ movw(r0, prev);
5612-
__ ret(lr);
56135670
}
5614-
__ align(32);
5615-
aarch64_atomic_xchg_8_impl = (aarch64_atomic_stub_t)__ pc();
5616-
{
5617-
Register prev = r2, addr = c_rarg0, newv = c_rarg1;
5618-
__ atomic_xchgl(prev, newv, addr);
5671+
__ ret(lr);
5672+
}
5673+
5674+
void gen_ldaddal_entry(Assembler::operand_size size) {
5675+
Register prev = r2, addr = c_rarg0, incr = c_rarg1;
5676+
__ ldaddal(size, incr, prev, addr);
5677+
__ membar(Assembler::StoreStore|Assembler::StoreLoad);
5678+
if (size == Assembler::xword) {
56195679
__ mov(r0, prev);
5620-
__ ret(lr);
5621-
}
5622-
__ align(32);
5623-
aarch64_atomic_cmpxchg_1_impl = (aarch64_atomic_stub_t)__ pc();
5624-
{
5625-
Register prev = r3, ptr = c_rarg0, compare_val = c_rarg1,
5626-
exchange_val = c_rarg2;
5627-
__ cmpxchg(ptr, compare_val, exchange_val,
5628-
MacroAssembler::byte,
5629-
/*acquire*/false, /*release*/false, /*weak*/false,
5630-
prev);
5680+
} else {
56315681
__ movw(r0, prev);
5632-
__ ret(lr);
56335682
}
5634-
__ align(32);
5635-
aarch64_atomic_cmpxchg_4_impl = (aarch64_atomic_stub_t)__ pc();
5636-
{
5637-
Register prev = r3, ptr = c_rarg0, compare_val = c_rarg1,
5638-
exchange_val = c_rarg2;
5639-
__ cmpxchg(ptr, compare_val, exchange_val,
5640-
MacroAssembler::word,
5641-
/*acquire*/false, /*release*/false, /*weak*/false,
5642-
prev);
5683+
__ ret(lr);
5684+
}
5685+
5686+
void gen_swpal_entry(Assembler::operand_size size) {
5687+
Register prev = r2, addr = c_rarg0, incr = c_rarg1;
5688+
__ swpal(size, incr, prev, addr);
5689+
__ membar(Assembler::StoreStore|Assembler::StoreLoad);
5690+
if (size == Assembler::xword) {
5691+
__ mov(r0, prev);
5692+
} else {
56435693
__ movw(r0, prev);
5644-
__ ret(lr);
56455694
}
5646-
__ align(32);
5647-
aarch64_atomic_cmpxchg_8_impl = (aarch64_atomic_stub_t)__ pc();
5648-
{
5649-
Register prev = r3, ptr = c_rarg0, compare_val = c_rarg1,
5650-
exchange_val = c_rarg2;
5651-
__ cmpxchg(ptr, compare_val, exchange_val,
5652-
MacroAssembler::xword,
5653-
/*acquire*/false, /*release*/false, /*weak*/false,
5654-
prev);
5655-
__ mov(r0, prev);
5656-
__ ret(lr);
5695+
__ ret(lr);
5696+
}
5697+
5698+
void generate_atomic_entry_points() {
5699+
if (! UseLSE) {
5700+
return;
56575701
}
5702+
5703+
__ align(CodeEntryAlignment);
5704+
StubCodeMark mark(this, "StubRoutines", "atomic entry points");
5705+
address first_entry = __ pc();
5706+
5707+
// All memory_order_conservative
5708+
AtomicStubMark mark_fetch_add_4(_masm, &aarch64_atomic_fetch_add_4_impl);
5709+
gen_ldaddal_entry(Assembler::word);
5710+
AtomicStubMark mark_fetch_add_8(_masm, &aarch64_atomic_fetch_add_8_impl);
5711+
gen_ldaddal_entry(Assembler::xword);
5712+
5713+
AtomicStubMark mark_xchg_4(_masm, &aarch64_atomic_xchg_4_impl);
5714+
gen_swpal_entry(Assembler::word);
5715+
AtomicStubMark mark_xchg_8_impl(_masm, &aarch64_atomic_xchg_8_impl);
5716+
gen_swpal_entry(Assembler::xword);
5717+
5718+
// CAS, memory_order_conservative
5719+
AtomicStubMark mark_cmpxchg_1(_masm, &aarch64_atomic_cmpxchg_1_impl);
5720+
gen_cas_entry(MacroAssembler::byte, memory_order_conservative);
5721+
AtomicStubMark mark_cmpxchg_4(_masm, &aarch64_atomic_cmpxchg_4_impl);
5722+
gen_cas_entry(MacroAssembler::word, memory_order_conservative);
5723+
AtomicStubMark mark_cmpxchg_8(_masm, &aarch64_atomic_cmpxchg_8_impl);
5724+
gen_cas_entry(MacroAssembler::xword, memory_order_conservative);
5725+
5726+
// CAS, memory_order_relaxed
5727+
AtomicStubMark mark_cmpxchg_1_relaxed
5728+
(_masm, &aarch64_atomic_cmpxchg_1_relaxed_impl);
5729+
gen_cas_entry(MacroAssembler::byte, memory_order_relaxed);
5730+
AtomicStubMark mark_cmpxchg_4_relaxed
5731+
(_masm, &aarch64_atomic_cmpxchg_4_relaxed_impl);
5732+
gen_cas_entry(MacroAssembler::word, memory_order_relaxed);
5733+
AtomicStubMark mark_cmpxchg_8_relaxed
5734+
(_masm, &aarch64_atomic_cmpxchg_8_relaxed_impl);
5735+
gen_cas_entry(MacroAssembler::xword, memory_order_relaxed);
5736+
5737+
ICache::invalidate_range(first_entry, __ pc() - first_entry);
56585738
}
56595739
#endif // LINUX
56605740

@@ -6772,9 +6852,7 @@ class StubGenerator: public StubCodeGenerator {
67726852

67736853
#ifdef LINUX
67746854

6775-
#if 0 // JDK-8261660: disabled for now.
67766855
generate_atomic_entry_points();
6777-
#endif
67786856

67796857
#endif // LINUX
67806858

@@ -6805,19 +6883,22 @@ void StubGenerator_generate(CodeBuffer* code, bool all) {
68056883
// Define pointers to atomic stubs and initialize them to point to the
68066884
// code in atomic_aarch64.S.
68076885

6808-
#define DEFAULT_ATOMIC_OP(OPNAME, SIZE) \
6809-
extern "C" uint64_t aarch64_atomic_ ## OPNAME ## _ ## SIZE ## _default_impl \
6886+
#define DEFAULT_ATOMIC_OP(OPNAME, SIZE, RELAXED) \
6887+
extern "C" uint64_t aarch64_atomic_ ## OPNAME ## _ ## SIZE ## RELAXED ## _default_impl \
68106888
(volatile void *ptr, uint64_t arg1, uint64_t arg2); \
6811-
aarch64_atomic_stub_t aarch64_atomic_ ## OPNAME ## _ ## SIZE ## _impl \
6812-
= aarch64_atomic_ ## OPNAME ## _ ## SIZE ## _default_impl;
6813-
6814-
DEFAULT_ATOMIC_OP(fetch_add, 4)
6815-
DEFAULT_ATOMIC_OP(fetch_add, 8)
6816-
DEFAULT_ATOMIC_OP(xchg, 4)
6817-
DEFAULT_ATOMIC_OP(xchg, 8)
6818-
DEFAULT_ATOMIC_OP(cmpxchg, 1)
6819-
DEFAULT_ATOMIC_OP(cmpxchg, 4)
6820-
DEFAULT_ATOMIC_OP(cmpxchg, 8)
6889+
aarch64_atomic_stub_t aarch64_atomic_ ## OPNAME ## _ ## SIZE ## RELAXED ## _impl \
6890+
= aarch64_atomic_ ## OPNAME ## _ ## SIZE ## RELAXED ## _default_impl;
6891+
6892+
DEFAULT_ATOMIC_OP(fetch_add, 4, )
6893+
DEFAULT_ATOMIC_OP(fetch_add, 8, )
6894+
DEFAULT_ATOMIC_OP(xchg, 4, )
6895+
DEFAULT_ATOMIC_OP(xchg, 8, )
6896+
DEFAULT_ATOMIC_OP(cmpxchg, 1, )
6897+
DEFAULT_ATOMIC_OP(cmpxchg, 4, )
6898+
DEFAULT_ATOMIC_OP(cmpxchg, 8, )
6899+
DEFAULT_ATOMIC_OP(cmpxchg, 1, _relaxed)
6900+
DEFAULT_ATOMIC_OP(cmpxchg, 4, _relaxed)
6901+
DEFAULT_ATOMIC_OP(cmpxchg, 8, _relaxed)
68216902

68226903
#undef DEFAULT_ATOMIC_OP
68236904

‎src/hotspot/os_cpu/linux_aarch64/atomic_linux_aarch64.S

+54
Original file line numberDiff line numberDiff line change
@@ -26,67 +26,121 @@
2626
.globl aarch64_atomic_fetch_add_8_default_impl
2727
.align 5
2828
aarch64_atomic_fetch_add_8_default_impl:
29+
prfm pstl1strm, [x0]
2930
0: ldaxr x2, [x0]
3031
add x8, x2, x1
3132
stlxr w9, x8, [x0]
3233
cbnz w9, 0b
34+
dmb ish
3335
mov x0, x2
3436
ret
3537

3638
.globl aarch64_atomic_fetch_add_4_default_impl
3739
.align 5
3840
aarch64_atomic_fetch_add_4_default_impl:
41+
prfm pstl1strm, [x0]
3942
0: ldaxr w2, [x0]
4043
add w8, w2, w1
4144
stlxr w9, w8, [x0]
4245
cbnz w9, 0b
46+
dmb ish
4347
mov w0, w2
4448
ret
4549

4650
.globl aarch64_atomic_xchg_4_default_impl
4751
.align 5
4852
aarch64_atomic_xchg_4_default_impl:
53+
prfm pstl1strm, [x0]
4954
0: ldaxr w2, [x0]
5055
stlxr w8, w1, [x0]
5156
cbnz w8, 0b
57+
dmb ish
5258
mov w0, w2
5359
ret
5460

5561
.globl aarch64_atomic_xchg_8_default_impl
5662
.align 5
5763
aarch64_atomic_xchg_8_default_impl:
64+
prfm pstl1strm, [x0]
5865
0: ldaxr x2, [x0]
5966
stlxr w8, x1, [x0]
6067
cbnz w8, 0b
68+
dmb ish
6169
mov x0, x2
6270
ret
6371

6472
.globl aarch64_atomic_cmpxchg_1_default_impl
6573
.align 5
6674
aarch64_atomic_cmpxchg_1_default_impl:
75+
dmb ish
76+
prfm pstl1strm, [x0]
6777
0: ldxrb w3, [x0]
6878
eor w8, w3, w1
6979
tst x8, #0xff
7080
b.ne 1f
7181
stxrb w8, w2, [x0]
7282
cbnz w8, 0b
7383
1: mov w0, w3
84+
dmb ish
7485
ret
7586

7687
.globl aarch64_atomic_cmpxchg_4_default_impl
7788
.align 5
7889
aarch64_atomic_cmpxchg_4_default_impl:
90+
dmb ish
91+
prfm pstl1strm, [x0]
7992
0: ldxr w3, [x0]
8093
cmp w3, w1
8194
b.ne 1f
8295
stxr w8, w2, [x0]
8396
cbnz w8, 0b
8497
1: mov w0, w3
98+
dmb ish
8599
ret
86100

87101
.globl aarch64_atomic_cmpxchg_8_default_impl
88102
.align 5
89103
aarch64_atomic_cmpxchg_8_default_impl:
104+
dmb ish
105+
prfm pstl1strm, [x0]
106+
0: ldxr x3, [x0]
107+
cmp x3, x1
108+
b.ne 1f
109+
stxr w8, x2, [x0]
110+
cbnz w8, 0b
111+
1: mov x0, x3
112+
dmb ish
113+
ret
114+
115+
.globl aarch64_atomic_cmpxchg_1_relaxed_default_impl
116+
.align 5
117+
aarch64_atomic_cmpxchg_1_relaxed_default_impl:
118+
prfm pstl1strm, [x0]
119+
0: ldxrb w3, [x0]
120+
eor w8, w3, w1
121+
tst x8, #0xff
122+
b.ne 1f
123+
stxrb w8, w2, [x0]
124+
cbnz w8, 0b
125+
1: mov w0, w3
126+
ret
127+
128+
.globl aarch64_atomic_cmpxchg_4_relaxed_default_impl
129+
.align 5
130+
aarch64_atomic_cmpxchg_4_relaxed_default_impl:
131+
prfm pstl1strm, [x0]
132+
0: ldxr w3, [x0]
133+
cmp w3, w1
134+
b.ne 1f
135+
stxr w8, w2, [x0]
136+
cbnz w8, 0b
137+
1: mov w0, w3
138+
ret
139+
140+
.globl aarch64_atomic_cmpxchg_8_relaxed_default_impl
141+
.align 5
142+
aarch64_atomic_cmpxchg_8_relaxed_default_impl:
143+
prfm pstl1strm, [x0]
90144
0: ldxr x3, [x0]
91145
cmp x3, x1
92146
b.ne 1f

‎src/hotspot/os_cpu/linux_aarch64/atomic_linux_aarch64.hpp

+24-37
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,6 @@ inline D Atomic::PlatformAdd<4>::fetch_and_add(D volatile* dest, I add_value,
8989
STATIC_ASSERT(4 == sizeof(D));
9090
D old_value
9191
= atomic_fastcall(aarch64_atomic_fetch_add_4_impl, dest, add_value);
92-
FULL_MEM_BARRIER;
9392
return old_value;
9493
}
9594

@@ -101,7 +100,6 @@ inline D Atomic::PlatformAdd<8>::fetch_and_add(D volatile* dest, I add_value,
101100
STATIC_ASSERT(8 == sizeof(D));
102101
D old_value
103102
= atomic_fastcall(aarch64_atomic_fetch_add_8_impl, dest, add_value);
104-
FULL_MEM_BARRIER;
105103
return old_value;
106104
}
107105

@@ -112,7 +110,6 @@ inline T Atomic::PlatformXchg<4>::operator()(T volatile* dest,
112110
atomic_memory_order order) const {
113111
STATIC_ASSERT(4 == sizeof(T));
114112
T old_value = atomic_fastcall(aarch64_atomic_xchg_4_impl, dest, exchange_value);
115-
FULL_MEM_BARRIER;
116113
return old_value;
117114
}
118115

@@ -122,7 +119,6 @@ inline T Atomic::PlatformXchg<8>::operator()(T volatile* dest, T exchange_value,
122119
atomic_memory_order order) const {
123120
STATIC_ASSERT(8 == sizeof(T));
124121
T old_value = atomic_fastcall(aarch64_atomic_xchg_8_impl, dest, exchange_value);
125-
FULL_MEM_BARRIER;
126122
return old_value;
127123
}
128124

@@ -133,18 +129,15 @@ inline T Atomic::PlatformCmpxchg<1>::operator()(T volatile* dest,
133129
T exchange_value,
134130
atomic_memory_order order) const {
135131
STATIC_ASSERT(1 == sizeof(T));
136-
aarch64_atomic_stub_t stub = aarch64_atomic_cmpxchg_1_impl;
137-
if (order == memory_order_relaxed) {
138-
T old_value = atomic_fastcall(stub, dest,
139-
compare_value, exchange_value);
140-
return old_value;
141-
} else {
142-
FULL_MEM_BARRIER;
143-
T old_value = atomic_fastcall(stub, dest,
144-
compare_value, exchange_value);
145-
FULL_MEM_BARRIER;
146-
return old_value;
132+
aarch64_atomic_stub_t stub;
133+
switch (order) {
134+
case memory_order_relaxed:
135+
stub = aarch64_atomic_cmpxchg_1_relaxed_impl; break;
136+
default:
137+
stub = aarch64_atomic_cmpxchg_1_impl; break;
147138
}
139+
140+
return atomic_fastcall(stub, dest, compare_value, exchange_value);
148141
}
149142

150143
template<>
@@ -154,18 +147,15 @@ inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest,
154147
T exchange_value,
155148
atomic_memory_order order) const {
156149
STATIC_ASSERT(4 == sizeof(T));
157-
aarch64_atomic_stub_t stub = aarch64_atomic_cmpxchg_4_impl;
158-
if (order == memory_order_relaxed) {
159-
T old_value = atomic_fastcall(stub, dest,
160-
compare_value, exchange_value);
161-
return old_value;
162-
} else {
163-
FULL_MEM_BARRIER;
164-
T old_value = atomic_fastcall(stub, dest,
165-
compare_value, exchange_value);
166-
FULL_MEM_BARRIER;
167-
return old_value;
150+
aarch64_atomic_stub_t stub;
151+
switch (order) {
152+
case memory_order_relaxed:
153+
stub = aarch64_atomic_cmpxchg_4_relaxed_impl; break;
154+
default:
155+
stub = aarch64_atomic_cmpxchg_4_impl; break;
168156
}
157+
158+
return atomic_fastcall(stub, dest, compare_value, exchange_value);
169159
}
170160

171161
template<>
@@ -175,18 +165,15 @@ inline T Atomic::PlatformCmpxchg<8>::operator()(T volatile* dest,
175165
T exchange_value,
176166
atomic_memory_order order) const {
177167
STATIC_ASSERT(8 == sizeof(T));
178-
aarch64_atomic_stub_t stub = aarch64_atomic_cmpxchg_8_impl;
179-
if (order == memory_order_relaxed) {
180-
T old_value = atomic_fastcall(stub, dest,
181-
compare_value, exchange_value);
182-
return old_value;
183-
} else {
184-
FULL_MEM_BARRIER;
185-
T old_value = atomic_fastcall(stub, dest,
186-
compare_value, exchange_value);
187-
FULL_MEM_BARRIER;
188-
return old_value;
168+
aarch64_atomic_stub_t stub;
169+
switch (order) {
170+
case memory_order_relaxed:
171+
stub = aarch64_atomic_cmpxchg_8_relaxed_impl; break;
172+
default:
173+
stub = aarch64_atomic_cmpxchg_8_impl; break;
189174
}
175+
176+
return atomic_fastcall(stub, dest, compare_value, exchange_value);
190177
}
191178

192179
template<size_t byte_size>

0 commit comments

Comments
 (0)
Please sign in to comment.