changeset 57864:ad9548e8086b

8230392: Define AArch64 as MULTI_COPY_ATOMIC Reviewed-by: adinn, dholmes
author aph
date Thu, 23 Jan 2020 11:44:04 -0500
parents eec468f180dd
children b986fd6a9173
files src/hotspot/cpu/aarch64/globalDefinitions_aarch64.hpp src/hotspot/share/gc/shared/taskqueue.inline.hpp
diffstat 2 files changed, 21 insertions(+), 8 deletions(-) [+]
line wrap: on
line diff
--- a/src/hotspot/cpu/aarch64/globalDefinitions_aarch64.hpp	Wed Jan 29 14:34:40 2020 +0100
+++ b/src/hotspot/cpu/aarch64/globalDefinitions_aarch64.hpp	Thu Jan 23 11:44:04 2020 -0500
@@ -34,11 +34,10 @@
 
 #define SUPPORTS_NATIVE_CX8
 
-// Aarch64 was not originally defined as multi-copy-atomic, but now is.
-// See: "Simplifying ARM Concurrency: Multicopy-atomic Axiomatic and
-// Operational Models for ARMv8"
-// So we could #define CPU_MULTI_COPY_ATOMIC but historically we have
-// not done so.
+// Aarch64 was not originally defined to be multi-copy-atomic, but now
+// is.  See: "Simplifying ARM Concurrency: Multicopy-atomic Axiomatic
+// and Operational Models for ARMv8"
+#define CPU_MULTI_COPY_ATOMIC
 
 // According to the ARMv8 ARM, "Concurrent modification and execution
 // of instructions can lead to the resulting instruction performing
--- a/src/hotspot/share/gc/shared/taskqueue.inline.hpp	Wed Jan 29 14:34:40 2020 +0100
+++ b/src/hotspot/share/gc/shared/taskqueue.inline.hpp	Thu Jan 23 11:44:04 2020 -0500
@@ -204,11 +204,25 @@
 template<class E, MEMFLAGS F, unsigned int N>
 bool GenericTaskQueue<E, F, N>::pop_global(volatile E& t) {
   Age oldAge = _age.get();
-  // Architectures with weak memory model require a barrier here
-  // to guarantee that bottom is not older than age,
+#ifndef CPU_MULTI_COPY_ATOMIC
+  // Architectures with non-multi-copy-atomic memory model require a
+  // full fence here to guarantee that bottom is not older than age,
   // which is crucial for the correctness of the algorithm.
-#ifndef CPU_MULTI_COPY_ATOMIC
+  //
+  // We need a full fence here for this case:
+  //
+  // Thread1: set bottom (push)
+  // Thread2: read age, read bottom, set age (pop_global)
+  // Thread3: read age, read bottom (pop_global)
+  //
+  // The requirement is that Thread3 must never read an older bottom
+  // value than Thread2 after Thread3 has seen the age value from
+  // Thread2.
   OrderAccess::fence();
+#else
+  // Everyone else can make do with a LoadLoad barrier to keep reads
+  // from _age and _bottom in order.
+  OrderAccess::loadload();
 #endif
   uint localBot = Atomic::load_acquire(&_bottom);
   uint n_elems = size(localBot, oldAge.top());