V8 Project
atomicops_internals_arm_gcc.h
Go to the documentation of this file.
1 // Copyright 2010 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // This file is an internal atomic implementation, use atomicops.h instead.
6 //
7 // LinuxKernelCmpxchg and Barrier_AtomicIncrement are from Google Gears.
8 
9 #ifndef V8_BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_
10 #define V8_BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_
11 
12 #if defined(__QNXNTO__)
13 #include <sys/cpuinline.h>
14 #endif
15 
16 namespace v8 {
17 namespace base {
18 
19 // Memory barriers on ARM are funky, but the kernel is here to help:
20 //
21 // * ARMv5 didn't support SMP, there is no memory barrier instruction at
22 // all on this architecture, or when targeting its machine code.
23 //
24 // * Some ARMv6 CPUs support SMP. A full memory barrier can be produced by
25 // writing a random value to a very specific coprocessor register.
26 //
27 // * On ARMv7, the "dmb" instruction is used to perform a full memory
28 // barrier (though writing to the co-processor will still work).
29 // However, on single core devices (e.g. Nexus One, or Nexus S),
30 // this instruction will take up to 200 ns, which is huge, even though
31 // it's completely un-needed on these devices.
32 //
33 // * There is no easy way to determine at runtime if the device is
34 // single or multi-core. However, the kernel provides a useful helper
35 // function at a fixed memory address (0xffff0fa0), which will always
36 // perform a memory barrier in the most efficient way. I.e. on single
37 // core devices, this is an empty function that exits immediately.
38 // On multi-core devices, it implements a full memory barrier.
39 //
40 // * This source could be compiled to ARMv5 machine code that runs on a
41 // multi-core ARMv6 or ARMv7 device. In this case, memory barriers
42 // are needed for correct execution. Always call the kernel helper, even
43 // when targeting ARMv5TE.
44 //
45 
46 inline void MemoryBarrier() {
47 #if defined(__linux__) || defined(__ANDROID__)
48  // Note: This is a function call, which is also an implicit compiler barrier.
49  typedef void (*KernelMemoryBarrierFunc)();
50  ((KernelMemoryBarrierFunc)0xffff0fa0)();
51 #elif defined(__QNXNTO__)
52  __cpu_membarrier();
53 #else
54 #error MemoryBarrier() is not implemented on this platform.
55 #endif
56 }
57 
58 // An ARM toolchain would only define one of these depending on which
59 // variant of the target architecture is being used. This tests against
60 // any known ARMv6 or ARMv7 variant, where it is possible to directly
61 // use ldrex/strex instructions to implement fast atomic operations.
62 #if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || \
63  defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || \
64  defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \
65  defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \
66  defined(__ARM_ARCH_6KZ__) || defined(__ARM_ARCH_6T2__)
67 
68 inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
69  Atomic32 old_value,
70  Atomic32 new_value) {
71  Atomic32 prev_value;
72  int reloop;
73  do {
74  // The following is equivalent to:
75  //
76  // prev_value = LDREX(ptr)
77  // reloop = 0
78  // if (prev_value != old_value)
79  // reloop = STREX(ptr, new_value)
80  __asm__ __volatile__(" ldrex %0, [%3]\n"
81  " mov %1, #0\n"
82  " cmp %0, %4\n"
83 #ifdef __thumb2__
84  " it eq\n"
85 #endif
86  " strexeq %1, %5, [%3]\n"
87  : "=&r"(prev_value), "=&r"(reloop), "+m"(*ptr)
88  : "r"(ptr), "r"(old_value), "r"(new_value)
89  : "cc", "memory");
90  } while (reloop != 0);
91  return prev_value;
92 }
93 
94 inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
95  Atomic32 old_value,
96  Atomic32 new_value) {
97  Atomic32 result = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
98  MemoryBarrier();
99  return result;
100 }
101 
102 inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
103  Atomic32 old_value,
104  Atomic32 new_value) {
105  MemoryBarrier();
106  return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
107 }
108 
109 inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,
110  Atomic32 increment) {
111  Atomic32 value;
112  int reloop;
113  do {
114  // Equivalent to:
115  //
116  // value = LDREX(ptr)
117  // value += increment
118  // reloop = STREX(ptr, value)
119  //
120  __asm__ __volatile__(" ldrex %0, [%3]\n"
121  " add %0, %0, %4\n"
122  " strex %1, %0, [%3]\n"
123  : "=&r"(value), "=&r"(reloop), "+m"(*ptr)
124  : "r"(ptr), "r"(increment)
125  : "cc", "memory");
126  } while (reloop);
127  return value;
128 }
129 
130 inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
131  Atomic32 increment) {
132  // TODO(digit): Investigate if it's possible to implement this with
133  // a single MemoryBarrier() operation between the LDREX and STREX.
134  // See http://crbug.com/246514
135  MemoryBarrier();
136  Atomic32 result = NoBarrier_AtomicIncrement(ptr, increment);
137  MemoryBarrier();
138  return result;
139 }
140 
141 inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
142  Atomic32 new_value) {
143  Atomic32 old_value;
144  int reloop;
145  do {
146  // old_value = LDREX(ptr)
147  // reloop = STREX(ptr, new_value)
148  __asm__ __volatile__(" ldrex %0, [%3]\n"
149  " strex %1, %4, [%3]\n"
150  : "=&r"(old_value), "=&r"(reloop), "+m"(*ptr)
151  : "r"(ptr), "r"(new_value)
152  : "cc", "memory");
153  } while (reloop != 0);
154  return old_value;
155 }
156 
157 // This tests against any known ARMv5 variant.
158 #elif defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) || \
159  defined(__ARM_ARCH_5TE__) || defined(__ARM_ARCH_5TEJ__)
160 
161 // The kernel also provides a helper function to perform an atomic
162 // compare-and-swap operation at the hard-wired address 0xffff0fc0.
163 // On ARMv5, this is implemented by a special code path that the kernel
164 // detects and treats specially when thread pre-emption happens.
165 // On ARMv6 and higher, it uses LDREX/STREX instructions instead.
166 //
167 // Note that this always perform a full memory barrier, there is no
168 // need to add calls MemoryBarrier() before or after it. It also
169 // returns 0 on success, and 1 on exit.
170 //
171 // Available and reliable since Linux 2.6.24. Both Android and ChromeOS
172 // use newer kernel revisions, so this should not be a concern.
173 namespace {
174 
175 inline int LinuxKernelCmpxchg(Atomic32 old_value,
176  Atomic32 new_value,
177  volatile Atomic32* ptr) {
178  typedef int (*KernelCmpxchgFunc)(Atomic32, Atomic32, volatile Atomic32*);
179  return ((KernelCmpxchgFunc)0xffff0fc0)(old_value, new_value, ptr);
180 }
181 
182 } // namespace
183 
184 inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
185  Atomic32 old_value,
186  Atomic32 new_value) {
187  Atomic32 prev_value;
188  for (;;) {
189  prev_value = *ptr;
190  if (prev_value != old_value)
191  return prev_value;
192  if (!LinuxKernelCmpxchg(old_value, new_value, ptr))
193  return old_value;
194  }
195 }
196 
197 inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
198  Atomic32 new_value) {
199  Atomic32 old_value;
200  do {
201  old_value = *ptr;
202  } while (LinuxKernelCmpxchg(old_value, new_value, ptr));
203  return old_value;
204 }
205 
206 inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,
207  Atomic32 increment) {
208  return Barrier_AtomicIncrement(ptr, increment);
209 }
210 
211 inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
212  Atomic32 increment) {
213  for (;;) {
214  // Atomic exchange the old value with an incremented one.
215  Atomic32 old_value = *ptr;
216  Atomic32 new_value = old_value + increment;
217  if (!LinuxKernelCmpxchg(old_value, new_value, ptr)) {
218  // The exchange took place as expected.
219  return new_value;
220  }
221  // Otherwise, *ptr changed mid-loop and we need to retry.
222  }
223 }
224 
225 inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
226  Atomic32 old_value,
227  Atomic32 new_value) {
228  Atomic32 prev_value;
229  for (;;) {
230  prev_value = *ptr;
231  if (prev_value != old_value) {
232  // Always ensure acquire semantics.
233  MemoryBarrier();
234  return prev_value;
235  }
236  if (!LinuxKernelCmpxchg(old_value, new_value, ptr))
237  return old_value;
238  }
239 }
240 
241 inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
242  Atomic32 old_value,
243  Atomic32 new_value) {
244  // This could be implemented as:
245  // MemoryBarrier();
246  // return NoBarrier_CompareAndSwap();
247  //
248  // But would use 3 barriers per succesful CAS. To save performance,
249  // use Acquire_CompareAndSwap(). Its implementation guarantees that:
250  // - A succesful swap uses only 2 barriers (in the kernel helper).
251  // - An early return due to (prev_value != old_value) performs
252  // a memory barrier with no store, which is equivalent to the
253  // generic implementation above.
254  return Acquire_CompareAndSwap(ptr, old_value, new_value);
255 }
256 
257 #else
258 # error "Your CPU's ARM architecture is not supported yet"
259 #endif
260 
261 // NOTE: Atomicity of the following load and store operations is only
262 // guaranteed in case of 32-bit alignement of |ptr| values.
263 
264 inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {
265  *ptr = value;
266 }
267 
268 inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
269  *ptr = value;
270  MemoryBarrier();
271 }
272 
273 inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {
274  MemoryBarrier();
275  *ptr = value;
276 }
277 
278 inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { return *ptr; }
279 
280 inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {
281  Atomic32 value = *ptr;
282  MemoryBarrier();
283  return value;
284 }
285 
286 inline Atomic32 Release_Load(volatile const Atomic32* ptr) {
287  MemoryBarrier();
288  return *ptr;
289 }
290 
291 // Byte accessors.
292 
293 inline void NoBarrier_Store(volatile Atomic8* ptr, Atomic8 value) {
294  *ptr = value;
295 }
296 
297 inline Atomic8 NoBarrier_Load(volatile const Atomic8* ptr) { return *ptr; }
298 
299 } } // namespace v8::base
300 
301 #endif // V8_BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_
Atomic32 Release_Load(volatile const Atomic32 *ptr)
Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32 *ptr, Atomic32 old_value, Atomic32 new_value)
int32_t Atomic32
Definition: atomicops.h:44
void Acquire_Store(volatile Atomic32 *ptr, Atomic32 value)
Atomic32 Acquire_Load(volatile const Atomic32 *ptr)
Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32 *ptr, Atomic32 increment)
void Release_Store(volatile Atomic32 *ptr, Atomic32 value)
Atomic32 Barrier_AtomicIncrement(volatile Atomic32 *ptr, Atomic32 increment)
void NoBarrier_Store(volatile Atomic8 *ptr, Atomic8 value)
Atomic32 NoBarrier_AtomicExchange(volatile Atomic32 *ptr, Atomic32 new_value)
Atomic32 Release_CompareAndSwap(volatile Atomic32 *ptr, Atomic32 old_value, Atomic32 new_value)
Atomic8 NoBarrier_Load(volatile const Atomic8 *ptr)
char Atomic8
Definition: atomicops.h:43
Atomic32 Acquire_CompareAndSwap(volatile Atomic32 *ptr, Atomic32 old_value, Atomic32 new_value)
Debugger support for the V8 JavaScript engine.
Definition: accessors.cc:20