changeset 57887:e6e4b43ee3ce

8237649: ZGC: Improved NUMA support when using small pages Reviewed-by: eosterlund, smonteith
author pliden
date Thu, 30 Jan 2020 12:41:26 +0100
parents e53122a71a0c
children b576a0ead397
files src/hotspot/os/linux/gc/z/zPhysicalMemoryBacking_linux.cpp src/hotspot/os/linux/gc/z/zPhysicalMemoryBacking_linux.hpp src/hotspot/os/linux/os_linux.cpp src/hotspot/os/linux/os_linux.hpp src/hotspot/share/gc/z/zNUMA.cpp src/hotspot/share/gc/z/zPhysicalMemory.cpp
diffstat 6 files changed, 66 insertions(+), 5 deletions(-) [+]
line wrap: on
line diff
--- a/src/hotspot/os/linux/gc/z/zPhysicalMemoryBacking_linux.cpp	Thu Jan 30 11:14:12 2020 +0100
+++ b/src/hotspot/os/linux/gc/z/zPhysicalMemoryBacking_linux.cpp	Thu Jan 30 12:41:26 2020 +0100
@@ -27,6 +27,7 @@
 #include "gc/z/zGlobals.hpp"
 #include "gc/z/zLargePages.inline.hpp"
 #include "gc/z/zMountPoint_linux.hpp"
+#include "gc/z/zNUMA.inline.hpp"
 #include "gc/z/zPhysicalMemoryBacking_linux.hpp"
 #include "gc/z/zSyscall_linux.hpp"
 #include "logging/log.hpp"
@@ -34,6 +35,7 @@
 #include "runtime/os.hpp"
 #include "utilities/align.hpp"
 #include "utilities/debug.hpp"
+#include "utilities/growableArray.hpp"
 
 #include <fcntl.h>
 #include <stdio.h>
@@ -596,7 +598,38 @@
   return true;
 }
 
-size_t ZPhysicalMemoryBacking::commit(size_t offset, size_t length) {
+static int offset_to_node(size_t offset) {
+  const GrowableArray<int>* mapping = os::Linux::numa_nindex_to_node();
+  const size_t nindex = (offset >> ZGranuleSizeShift) % mapping->length();
+  return mapping->at((int)nindex);
+}
+
+size_t ZPhysicalMemoryBacking::commit_numa_interleaved(size_t offset, size_t length) {
+  size_t committed = 0;
+
+  // Commit one granule at a time, so that each granule
+  // can be allocated from a different preferred node.
+  while (committed < length) {
+    const size_t granule_offset = offset + committed;
+
+    // Setup NUMA policy to allocate memory from a preferred node
+    os::Linux::numa_set_preferred(offset_to_node(granule_offset));
+
+    if (!commit_inner(granule_offset, ZGranuleSize)) {
+      // Failed
+      break;
+    }
+
+    committed += ZGranuleSize;
+  }
+
+  // Restore NUMA policy
+  os::Linux::numa_set_preferred(-1);
+
+  return committed;
+}
+
+size_t ZPhysicalMemoryBacking::commit_default(size_t offset, size_t length) {
   // Try to commit the whole region
   if (commit_inner(offset, length)) {
     // Success
@@ -624,6 +657,16 @@
   }
 }
 
+size_t ZPhysicalMemoryBacking::commit(size_t offset, size_t length) {
+  if (ZNUMA::is_enabled() && !ZLargePages::is_explicit()) {
+    // To get granule-level NUMA interleaving when using non-large pages,
+    // we must explicitly interleave the memory at commit/fallocate time.
+    return commit_numa_interleaved(offset, length);
+  }
+
+  return commit_default(offset, length);
+}
+
 size_t ZPhysicalMemoryBacking::uncommit(size_t offset, size_t length) {
   log_trace(gc, heap)("Uncommitting memory: " SIZE_FORMAT "M-" SIZE_FORMAT "M (" SIZE_FORMAT "M)",
                       offset / M, (offset + length) / M, length / M);
--- a/src/hotspot/os/linux/gc/z/zPhysicalMemoryBacking_linux.hpp	Thu Jan 30 11:14:12 2020 +0100
+++ b/src/hotspot/os/linux/gc/z/zPhysicalMemoryBacking_linux.hpp	Thu Jan 30 12:41:26 2020 +0100
@@ -57,6 +57,8 @@
   ZErrno fallocate(bool punch_hole, size_t offset, size_t length);
 
   bool commit_inner(size_t offset, size_t length);
+  size_t commit_numa_interleaved(size_t offset, size_t length);
+  size_t commit_default(size_t offset, size_t length);
 
 public:
   ZPhysicalMemoryBacking();
--- a/src/hotspot/os/linux/os_linux.cpp	Thu Jan 30 11:14:12 2020 +0100
+++ b/src/hotspot/os/linux/os_linux.cpp	Thu Jan 30 12:41:26 2020 +0100
@@ -3163,6 +3163,8 @@
                                                   libnuma_v2_dlsym(handle, "numa_get_interleave_mask")));
       set_numa_move_pages(CAST_TO_FN_PTR(numa_move_pages_func_t,
                                          libnuma_dlsym(handle, "numa_move_pages")));
+      set_numa_set_preferred(CAST_TO_FN_PTR(numa_set_preferred_func_t,
+                                            libnuma_dlsym(handle, "numa_set_preferred")));
 
       if (numa_available() != -1) {
         set_numa_all_nodes((unsigned long*)libnuma_dlsym(handle, "numa_all_nodes"));
@@ -3298,6 +3300,7 @@
 os::Linux::numa_get_membind_func_t os::Linux::_numa_get_membind;
 os::Linux::numa_get_interleave_mask_func_t os::Linux::_numa_get_interleave_mask;
 os::Linux::numa_move_pages_func_t os::Linux::_numa_move_pages;
+os::Linux::numa_set_preferred_func_t os::Linux::_numa_set_preferred;
 os::Linux::NumaAllocationPolicy os::Linux::_current_numa_policy;
 unsigned long* os::Linux::_numa_all_nodes;
 struct bitmask* os::Linux::_numa_all_nodes_ptr;
--- a/src/hotspot/os/linux/os_linux.hpp	Thu Jan 30 11:14:12 2020 +0100
+++ b/src/hotspot/os/linux/os_linux.hpp	Thu Jan 30 12:41:26 2020 +0100
@@ -219,7 +219,7 @@
   typedef struct bitmask* (*numa_get_membind_func_t)(void);
   typedef struct bitmask* (*numa_get_interleave_mask_func_t)(void);
   typedef long (*numa_move_pages_func_t)(int pid, unsigned long count, void **pages, const int *nodes, int *status, int flags);
-
+  typedef void (*numa_set_preferred_func_t)(int node);
   typedef void (*numa_set_bind_policy_func_t)(int policy);
   typedef int (*numa_bitmask_isbitset_func_t)(struct bitmask *bmp, unsigned int n);
   typedef int (*numa_distance_func_t)(int node1, int node2);
@@ -238,6 +238,7 @@
   static numa_get_membind_func_t _numa_get_membind;
   static numa_get_interleave_mask_func_t _numa_get_interleave_mask;
   static numa_move_pages_func_t _numa_move_pages;
+  static numa_set_preferred_func_t _numa_set_preferred;
   static unsigned long* _numa_all_nodes;
   static struct bitmask* _numa_all_nodes_ptr;
   static struct bitmask* _numa_nodes_ptr;
@@ -258,6 +259,7 @@
   static void set_numa_get_membind(numa_get_membind_func_t func) { _numa_get_membind = func; }
   static void set_numa_get_interleave_mask(numa_get_interleave_mask_func_t func) { _numa_get_interleave_mask = func; }
   static void set_numa_move_pages(numa_move_pages_func_t func) { _numa_move_pages = func; }
+  static void set_numa_set_preferred(numa_set_preferred_func_t func) { _numa_set_preferred = func; }
   static void set_numa_all_nodes(unsigned long* ptr) { _numa_all_nodes = ptr; }
   static void set_numa_all_nodes_ptr(struct bitmask **ptr) { _numa_all_nodes_ptr = (ptr == NULL ? NULL : *ptr); }
   static void set_numa_nodes_ptr(struct bitmask **ptr) { _numa_nodes_ptr = (ptr == NULL ? NULL : *ptr); }
@@ -315,6 +317,11 @@
       _numa_interleave_memory(start, size, _numa_all_nodes);
     }
   }
+  static void numa_set_preferred(int node) {
+    if (_numa_set_preferred != NULL) {
+      _numa_set_preferred(node);
+    }
+  }
   static void numa_set_bind_policy(int policy) {
     if (_numa_set_bind_policy != NULL) {
       _numa_set_bind_policy(policy);
@@ -392,6 +399,10 @@
       return false;
     }
   }
+
+  static const GrowableArray<int>* numa_nindex_to_node() {
+    return _nindex_to_node;
+  }
 };
 
 #endif // OS_LINUX_OS_LINUX_HPP
--- a/src/hotspot/share/gc/z/zNUMA.cpp	Thu Jan 30 11:14:12 2020 +0100
+++ b/src/hotspot/share/gc/z/zNUMA.cpp	Thu Jan 30 12:41:26 2020 +0100
@@ -24,7 +24,6 @@
 #include "precompiled.hpp"
 #include "gc/z/zNUMA.hpp"
 #include "logging/log.hpp"
-#include "runtime/os.hpp"
 
 bool ZNUMA::_enabled;
 
--- a/src/hotspot/share/gc/z/zPhysicalMemory.cpp	Thu Jan 30 11:14:12 2020 +0100
+++ b/src/hotspot/share/gc/z/zPhysicalMemory.cpp	Thu Jan 30 12:41:26 2020 +0100
@@ -277,8 +277,11 @@
     size += segment.size();
   }
 
-  // Setup NUMA interleaving
-  if (ZNUMA::is_enabled()) {
+  // Setup NUMA interleaving for large pages
+  if (ZNUMA::is_enabled() && ZLargePages::is_explicit()) {
+    // To get granule-level NUMA interleaving when using large pages,
+    // we simply let the kernel interleave the memory for us at page
+    // fault time.
     os::numa_make_global((char*)addr, size);
   }