changeset 55805:162f4f1c841c

Merge
author prr
date Mon, 01 Jul 2019 14:57:02 -0700
parents af678f2593e2 734e58d8477b
children 0e01b955bfd4
files src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core.test/src/org/graalvm/compiler/core/test/GuardedIntrinsicTest.java src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.test/src/org/graalvm/compiler/hotspot/test/WriteBarrierVerificationTest.java src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/gc/g1/G1ArrayRangePostWriteBarrier.java src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/gc/g1/G1ArrayRangePreWriteBarrier.java src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/gc/g1/G1BarrierSet.java src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/gc/g1/G1PostWriteBarrier.java src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/gc/g1/G1PreWriteBarrier.java src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/gc/g1/G1ReferentFieldReadBarrier.java src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/gc/shared/ArrayRangeWriteBarrier.java src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/gc/shared/BarrierSet.java src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/gc/shared/CardTableBarrierSet.java src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/gc/shared/ObjectWriteBarrier.java src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/gc/shared/SerialArrayRangeWriteBarrier.java src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/gc/shared/SerialWriteBarrier.java src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/nodes/WriteBarrier.java src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/phases/WriteBarrierAdditionPhase.java src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/phases/WriteBarrierVerificationPhase.java src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/replacements/WriteBarrierSnippets.java src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.phases/src/org/graalvm/compiler/phases/tiers/PhaseContext.java src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.replacements/src/org/graalvm/compiler/replacements/Log.java src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.replacements/src/org/graalvm/compiler/replacements/nodes/DirectStoreNode.java test/hotspot/jtreg/compiler/codecache/stress/UnexpectedDeoptimizationAllTest.java test/hotspot/jtreg/compiler/jvmci/compilerToVM/ResolveConstantInPoolTest.java test/jdk/ProblemList.txt test/jdk/sun/security/tools/keytool/PSS.java
diffstat 985 files changed, 24612 insertions(+), 16409 deletions(-) [+]
line wrap: on
line diff
--- a/.hgtags	Wed Jun 26 15:34:13 2019 -0700
+++ b/.hgtags	Mon Jul 01 14:57:02 2019 -0700
@@ -567,3 +567,5 @@
 2f4e214781a1d597ed36bf5a36f20928c6c82996 jdk-14+1
 0692b67f54621991ba7afbf23e55b788f3555e69 jdk-13+26
 43627549a488b7d0b4df8fad436e36233df89877 jdk-14+2
+b7f68ddec66f996ae3aad03291d129ca9f02482d jdk-13+27
+e64383344f144217c36196c3c8a2df8f588a2af3 jdk-14+3
--- a/make/common/FindTests.gmk	Wed Jun 26 15:34:13 2019 -0700
+++ b/make/common/FindTests.gmk	Mon Jul 01 14:57:02 2019 -0700
@@ -62,10 +62,8 @@
 
 # If this file is deemed outdated, it will automatically get regenerated
 # by this rule before being included below.
-#
-# When calling TestMake.gmk, override the log level to avoid any kind of debug
-# output being captured into the generated makefile.
-$(FIND_TESTS_CACHE_FILE): $(JTREG_ROOT_FILES) $(JTREG_GROUP_FILES)
+$(FIND_TESTS_CACHE_FILE): $(JTREG_ROOT_FILES) $(JTREG_GROUP_FILES) \
+    $(TOPDIR)/test/make/TestMake.gmk
 	$(call MakeTargetDir)
 	( $(foreach root, $(JTREG_TESTROOTS), \
 	    $(PRINTF) "\n$(root)_JTREG_TEST_GROUPS := " ; \
@@ -73,10 +71,11 @@
 	      $($(root)_JTREG_GROUP_FILES) \
 	      | $(SORT) -u | $(TR) '\n' ' ' ; \
 	  ) \
-	  $(PRINTF) "\nMAKE_TEST_TARGETS := " ; \
-	  $(MAKE) -s --no-print-directory $(MAKE_ARGS) LOG_LEVEL=warn \
-	      SPEC=$(SPEC) -f $(TOPDIR)/test/make/TestMake.gmk print-targets \
 	) > $@
+	$(PRINTF) "\nMAKE_TEST_TARGETS := " >> $@
+	$(MAKE) -s --no-print-directory $(MAKE_ARGS) \
+	    SPEC=$(SPEC) -f $(TOPDIR)/test/make/TestMake.gmk print-targets \
+	    TARGETS_FILE=$@
 
 -include $(FIND_TESTS_CACHE_FILE)
 
--- a/make/common/ProcessMarkdown.gmk	Wed Jun 26 15:34:13 2019 -0700
+++ b/make/common/ProcessMarkdown.gmk	Mon Jul 01 14:57:02 2019 -0700
@@ -103,7 +103,7 @@
 	$$(call LogInfo, Post-processing markdown file $2)
 	$$(call MakeDir, $$(SUPPORT_OUTPUTDIR)/markdown $$($1_$2_TARGET_DIR))
 	$$(call ExecuteWithLog, $$(SUPPORT_OUTPUTDIR)/markdown/$$($1_$2_MARKER)_post, \
-	    $$($1_POST_PROCESS) < $$($1_$2_PANDOC_OUTPUT) > $$($1_$2_OUTPUT_FILE))
+	    ( $$($1_POST_PROCESS) < $$($1_$2_PANDOC_OUTPUT) > $$($1_$2_OUTPUT_FILE) ) )
   endif
 
   $1 += $$($1_$2_OUTPUT_FILE)
--- a/src/hotspot/cpu/aarch64/aarch64.ad	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/cpu/aarch64/aarch64.ad	Mon Jul 01 14:57:02 2019 -0700
@@ -1761,6 +1761,17 @@
   // branch if we need to invalidate the method later
   __ nop();
 
+  if (C->clinit_barrier_on_entry()) {
+    assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
+
+    Label L_skip_barrier;
+
+    __ mov_metadata(rscratch2, C->method()->holder()->constant_encoding());
+    __ clinit_barrier(rscratch2, rscratch1, &L_skip_barrier);
+    __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
+    __ bind(L_skip_barrier);
+  }
+  
   int bangsize = C->bang_size_in_bytes();
   if (C->need_stack_bang(bangsize) && UseStackBanging)
     __ generate_stack_overflow_check(bangsize);
--- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp	Mon Jul 01 14:57:02 2019 -0700
@@ -317,7 +317,15 @@
 }
 
 void LIR_Assembler::clinit_barrier(ciMethod* method) {
-  ShouldNotReachHere(); // not implemented
+  assert(VM_Version::supports_fast_class_init_checks(), "sanity");
+  assert(!method->holder()->is_not_initialized(), "initialization should have been started");
+
+  Label L_skip_barrier;
+
+  __ mov_metadata(rscratch2, method->holder()->constant_encoding());
+  __ clinit_barrier(rscratch2, rscratch1, &L_skip_barrier /*L_fast_path*/);
+  __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
+  __ bind(L_skip_barrier);
 }
 
 void LIR_Assembler::jobject2reg(jobject o, Register reg) {
--- a/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp	Mon Jul 01 14:57:02 2019 -0700
@@ -331,11 +331,6 @@
 
 
 void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes) {
-  // If we have to make this method not-entrant we'll overwrite its
-  // first instruction with a jump.  For this action to be legal we
-  // must ensure that this first instruction is a B, BL, NOP, BKPT,
-  // SVC, HVC, or SMC.  Make it a NOP.
-  nop();
   assert(bang_size_in_bytes >= framesize, "stack bang size incorrect");
   // Make sure there is enough stack space for this method's activation.
   // Note that we do this before doing an enter().
@@ -355,6 +350,11 @@
 
 
 void C1_MacroAssembler::verified_entry() {
+  // If we have to make this method not-entrant we'll overwrite its
+  // first instruction with a jump.  For this action to be legal we
+  // must ensure that this first instruction is a B, BL, NOP, BKPT,
+  // SVC, HVC, or SMC.  Make it a NOP.
+  nop();
 }
 
 void C1_MacroAssembler::load_parameter(int offset_in_words, Register reg) {
--- a/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp	Mon Jul 01 14:57:02 2019 -0700
@@ -63,27 +63,25 @@
     return;
   }
 
-  // rscratch1 can be passed as src or dst, so don't use it.
-  RegSet savedRegs = RegSet::of(rscratch2, rheapbase);
+  assert_different_registers(rscratch1, rscratch2, src.base());
+  assert_different_registers(rscratch1, rscratch2, dst);
+
+  RegSet savedRegs = RegSet::range(r0,r28) - RegSet::of(dst, rscratch1, rscratch2);
 
   Label done;
-  assert_different_registers(rheapbase, rscratch2, dst);
-  assert_different_registers(rheapbase, rscratch2, src.base());
-
-  __ push(savedRegs, sp);
 
   // Load bad mask into scratch register.
-  __ ldr(rheapbase, address_bad_mask_from_thread(rthread));
+  __ ldr(rscratch1, address_bad_mask_from_thread(rthread));
   __ lea(rscratch2, src);
   __ ldr(dst, src);
 
   // Test reference against bad mask. If mask bad, then we need to fix it up.
-  __ tst(dst, rheapbase);
+  __ tst(dst, rscratch1);
   __ br(Assembler::EQ, done);
 
   __ enter();
 
-  __ push(RegSet::range(r0,r28) - RegSet::of(dst), sp);
+  __ push(savedRegs, sp);
 
   if (c_rarg0 != dst) {
     __ mov(c_rarg0, dst);
@@ -91,13 +89,15 @@
   __ mov(c_rarg1, rscratch2);
 
   int step = 4 * wordSize;
-  __ mov(rscratch1, -step);
+  __ mov(rscratch2, -step);
   __ sub(sp, sp, step);
 
   for (int i = 28; i >= 4; i -= 4) {
     __ st1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2),
-        as_FloatRegister(i+3), __ T1D, Address(__ post(sp, rscratch1)));
+        as_FloatRegister(i+3), __ T1D, Address(__ post(sp, rscratch2)));
   }
+  __ st1(as_FloatRegister(0), as_FloatRegister(1), as_FloatRegister(2),
+      as_FloatRegister(3), __ T1D, Address(sp));
 
   __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators), 2);
 
@@ -111,13 +111,10 @@
     __ mov(dst, r0);
   }
 
-  __ pop(RegSet::range(r0,r28) - RegSet::of(dst), sp);
+  __ pop(savedRegs, sp);
   __ leave();
 
   __ bind(done);
-
-  // Restore tmps
-  __ pop(savedRegs, sp);
 }
 
 #ifdef ASSERT
--- a/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp	Mon Jul 01 14:57:02 2019 -0700
@@ -288,6 +288,18 @@
   ldr(klass, Address(klass, Array<Klass*>::base_offset_in_bytes()));
 }
 
+void InterpreterMacroAssembler::load_resolved_method_at_index(int byte_no,
+                                                              Register method,
+                                                              Register cache) {
+  const int method_offset = in_bytes(
+    ConstantPoolCache::base_offset() +
+      ((byte_no == TemplateTable::f2_byte)
+       ? ConstantPoolCacheEntry::f2_offset()
+       : ConstantPoolCacheEntry::f1_offset()));
+
+  ldr(method, Address(cache, method_offset)); // get f1 Method*
+}
+
 // Generate a subtype check: branch to ok_is_subtype if sub_klass is a
 // subtype of super_klass.
 //
--- a/src/hotspot/cpu/aarch64/interp_masm_aarch64.hpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/cpu/aarch64/interp_masm_aarch64.hpp	Mon Jul 01 14:57:02 2019 -0700
@@ -124,6 +124,8 @@
   // load cpool->resolved_klass_at(index);
   void load_resolved_klass_at_offset(Register cpool, Register index, Register klass, Register temp);
 
+  void load_resolved_method_at_index(int byte_no, Register method, Register cache);
+
   void pop_ptr(Register r = r0);
   void pop_i(Register r = r0);
   void pop_l(Register r = r0);
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp	Mon Jul 01 14:57:02 2019 -0700
@@ -1307,6 +1307,35 @@
   bind(L_fallthrough);
 }
 
+void MacroAssembler::clinit_barrier(Register klass, Register scratch, Label* L_fast_path, Label* L_slow_path) {
+  assert(L_fast_path != NULL || L_slow_path != NULL, "at least one is required");
+  assert_different_registers(klass, rthread, scratch);
+
+  Label L_fallthrough, L_tmp;
+  if (L_fast_path == NULL) {
+    L_fast_path = &L_fallthrough;
+  } else if (L_slow_path == NULL) {
+    L_slow_path = &L_fallthrough;
+  }
+  // Fast path check: class is fully initialized
+  ldrb(scratch, Address(klass, InstanceKlass::init_state_offset()));
+  subs(zr, scratch, InstanceKlass::fully_initialized);
+  br(Assembler::EQ, *L_fast_path);
+
+  // Fast path check: current thread is initializer thread
+  ldr(scratch, Address(klass, InstanceKlass::init_thread_offset()));
+  cmp(rthread, scratch);
+
+  if (L_slow_path == &L_fallthrough) {
+    br(Assembler::EQ, *L_fast_path);
+    bind(*L_slow_path);
+  } else if (L_fast_path == &L_fallthrough) {
+    br(Assembler::NE, *L_slow_path);
+    bind(*L_fast_path);
+  } else {
+    Unimplemented();
+  }
+}
 
 void MacroAssembler::verify_oop(Register reg, const char* s) {
   if (!VerifyOops) return;
@@ -3683,6 +3712,12 @@
   bs->obj_equals(this, obj1, obj2);
 }
 
+void MacroAssembler::load_method_holder(Register holder, Register method) {
+  ldr(holder, Address(method, Method::const_offset()));                      // ConstMethod*
+  ldr(holder, Address(holder, ConstMethod::constants_offset()));             // ConstantPool*
+  ldr(holder, Address(holder, ConstantPool::pool_holder_offset_in_bytes())); // InstanceKlass*
+}
+
 void MacroAssembler::load_klass(Register dst, Register src) {
   if (UseCompressedClassPointers) {
     ldrw(dst, Address(src, oopDesc::klass_offset_in_bytes()));
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp	Mon Jul 01 14:57:02 2019 -0700
@@ -788,6 +788,8 @@
   // C 'boolean' to Java boolean: x == 0 ? 0 : 1
   void c2bool(Register x);
 
+  void load_method_holder(Register holder, Register method);
+
   // oop manipulations
   void load_klass(Register dst, Register src);
   void store_klass(Register dst, Register src);
@@ -926,6 +928,11 @@
                            Register temp_reg,
                            Label& L_success);
 
+  void clinit_barrier(Register klass,
+                      Register thread,
+                      Label* L_fast_path = NULL,
+                      Label* L_slow_path = NULL);
+
   Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
 
 
--- a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp	Mon Jul 01 14:57:02 2019 -0700
@@ -799,6 +799,22 @@
   }
 #endif
 
+  // Class initialization barrier for static methods
+  if (VM_Version::supports_fast_class_init_checks()) {
+    Label L_skip_barrier;
+
+    { // Bypass the barrier for non-static methods
+      __ ldrw(rscratch1, Address(rmethod, Method::access_flags_offset()));
+      __ andsw(zr, rscratch1, JVM_ACC_STATIC);
+      __ br(Assembler::EQ, L_skip_barrier); // non-static
+    }
+
+    __ load_method_holder(rscratch2, rmethod);
+    __ clinit_barrier(rscratch2, rscratch1, &L_skip_barrier);
+    __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
+    __ bind(L_skip_barrier);
+  }
+
   gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
 
   __ flush();
@@ -1580,6 +1596,15 @@
   // SVC, HVC, or SMC.  Make it a NOP.
   __ nop();
 
+  if (VM_Version::supports_fast_class_init_checks() && method->needs_clinit_barrier()) {
+    Label L_skip_barrier;
+    __ mov_metadata(rscratch2, method->method_holder()); // InstanceKlass*
+    __ clinit_barrier(rscratch2, rscratch1, &L_skip_barrier);
+    __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
+
+    __ bind(L_skip_barrier);
+  }
+
   // Generate stack overflow check
   if (UseStackBanging) {
     __ bang_stack_with_offset(JavaThread::stack_shadow_zone_size());
--- a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp	Mon Jul 01 14:57:02 2019 -0700
@@ -1383,7 +1383,12 @@
       // save regs before copy_memory
       __ push(RegSet::of(d, count), sp);
     }
-    copy_memory(aligned, s, d, count, rscratch1, size);
+    {
+      // UnsafeCopyMemory page error: continue after ucm
+      bool add_entry = !is_oop && (!aligned || sizeof(jlong) == size);
+      UnsafeCopyMemoryMark ucmm(this, add_entry, true);
+      copy_memory(aligned, s, d, count, rscratch1, size);
+    }
 
     if (is_oop) {
       __ pop(RegSet::of(d, count), sp);
@@ -1455,7 +1460,12 @@
       // save regs before copy_memory
       __ push(RegSet::of(d, count), sp);
     }
-    copy_memory(aligned, s, d, count, rscratch1, -size);
+    {
+      // UnsafeCopyMemory page error: continue after ucm
+      bool add_entry = !is_oop && (!aligned || sizeof(jlong) == size);
+      UnsafeCopyMemoryMark ucmm(this, add_entry, true);
+      copy_memory(aligned, s, d, count, rscratch1, -size);
+    }
     if (is_oop) {
       __ pop(RegSet::of(d, count), sp);
       if (VerifyOops)
@@ -5816,6 +5826,10 @@
   }
 }; // end class declaration
 
+#define UCM_TABLE_MAX_ENTRIES 8
 void StubGenerator_generate(CodeBuffer* code, bool all) {
+  if (UnsafeCopyMemory::_table == NULL) {
+    UnsafeCopyMemory::create_table(UCM_TABLE_MAX_ENTRIES);
+  }
   StubGenerator g(code, all);
 }
--- a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp	Mon Jul 01 14:57:02 2019 -0700
@@ -886,8 +886,8 @@
   }
 
   // Get mirror and store it in the frame as GC root for this Method*
-  __ load_mirror(rscratch1, rmethod);
-  __ stp(rscratch1, zr, Address(sp, 4 * wordSize));
+  __ load_mirror(r10, rmethod);
+  __ stp(r10, zr, Address(sp, 4 * wordSize));
 
   __ ldr(rcpool, Address(rmethod, Method::const_offset()));
   __ ldr(rcpool, Address(rcpool, ConstMethod::constants_offset()));
--- a/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp	Mon Jul 01 14:57:02 2019 -0700
@@ -2323,7 +2323,7 @@
   const Register temp = r19;
   assert_different_registers(Rcache, index, temp);
 
-  Label resolved;
+  Label resolved, clinit_barrier_slow;
 
   Bytecodes::Code code = bytecode();
   switch (code) {
@@ -2338,6 +2338,8 @@
   __ br(Assembler::EQ, resolved);
 
   // resolve first time through
+  // Class initialization barrier slow path lands here as well.
+  __ bind(clinit_barrier_slow);
   address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache);
   __ mov(temp, (int) code);
   __ call_VM(noreg, entry, temp);
@@ -2347,6 +2349,13 @@
   // n.b. unlike x86 Rcache is now rcpool plus the indexed offset
   // so all clients ofthis method must be modified accordingly
   __ bind(resolved);
+
+  // Class initialization barrier for static methods
+  if (VM_Version::supports_fast_class_init_checks() && bytecode() == Bytecodes::_invokestatic) {
+    __ load_resolved_method_at_index(byte_no, temp, Rcache);
+    __ load_method_holder(temp, temp);
+    __ clinit_barrier(temp, rscratch1, NULL, &clinit_barrier_slow);
+  }
 }
 
 // The Rcache and index registers must be set before call
@@ -3418,9 +3427,8 @@
   __ profile_virtual_call(r3, r13, r19);
 
   // Get declaring interface class from method, and itable index
-  __ ldr(r0, Address(rmethod, Method::const_offset()));
-  __ ldr(r0, Address(r0, ConstMethod::constants_offset()));
-  __ ldr(r0, Address(r0, ConstantPool::pool_holder_offset_in_bytes()));
+
+  __ load_method_holder(r0, rmethod);
   __ ldrw(rmethod, Address(rmethod, Method::itable_index_offset()));
   __ subw(rmethod, rmethod, Method::itable_index_max);
   __ negw(rmethod, rmethod);
--- a/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp	Mon Jul 01 14:57:02 2019 -0700
@@ -124,6 +124,7 @@
   static int dcache_line_size() {
     return (1 << ((_psr_info.ctr_el0 >> 16) & 0x0f)) * 4;
   }
+  static bool supports_fast_class_init_checks() { return true; }
 };
 
 #endif // CPU_AARCH64_VM_VERSION_AARCH64_HPP
--- a/src/hotspot/cpu/arm/stubGenerator_arm.cpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/cpu/arm/stubGenerator_arm.cpp	Mon Jul 01 14:57:02 2019 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2008, 2019, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -928,7 +928,7 @@
   // Scratches 'count', R3.
   // R4-R10 are preserved (saved/restored).
   //
-  int generate_forward_aligned_copy_loop(Register from, Register to, Register count, int bytes_per_count) {
+  int generate_forward_aligned_copy_loop(Register from, Register to, Register count, int bytes_per_count, bool unsafe_copy = false) {
     assert (from == R0 && to == R1 && count == R2, "adjust the implementation below");
 
     const int bytes_per_loop = 8*wordSize; // 8 registers are read and written on every loop iteration
@@ -954,107 +954,111 @@
 
     Label L_skip_pld;
 
-    // predecrease to exit when there is less than count_per_loop
-    __ sub_32(count, count, count_per_loop);
-
-    if (pld_offset != 0) {
-      pld_offset = (pld_offset < 0) ? -pld_offset : pld_offset;
-
-      prefetch(from, to, 0);
-
-      if (prefetch_before) {
-        // If prefetch is done ahead, final PLDs that overflow the
-        // copied area can be easily avoided. 'count' is predecreased
-        // by the prefetch distance to optimize the inner loop and the
-        // outer loop skips the PLD.
-        __ subs_32(count, count, (bytes_per_loop+pld_offset)/bytes_per_count);
-
-        // skip prefetch for small copies
-        __ b(L_skip_pld, lt);
-      }
-
-      int offset = ArmCopyCacheLineSize;
-      while (offset <= pld_offset) {
-        prefetch(from, to, offset);
-        offset += ArmCopyCacheLineSize;
-      };
-    }
-
     {
-      // 32-bit ARM note: we have tried implementing loop unrolling to skip one
-      // PLD with 64 bytes cache line but the gain was not significant.
-
-      Label L_copy_loop;
-      __ align(OptoLoopAlignment);
-      __ BIND(L_copy_loop);
-
-      if (prefetch_before) {
-        prefetch(from, to, bytes_per_loop + pld_offset);
-        __ BIND(L_skip_pld);
-      }
-
-      if (split_read) {
-        // Split the register set in two sets so that there is less
-        // latency between LDM and STM (R3-R6 available while R7-R10
-        // still loading) and less register locking issue when iterating
-        // on the first LDM.
-        __ ldmia(from, RegisterSet(R3, R6), writeback);
-        __ ldmia(from, RegisterSet(R7, R10), writeback);
-      } else {
-        __ ldmia(from, RegisterSet(R3, R10), writeback);
+      // UnsafeCopyMemory page error: continue after ucm
+      UnsafeCopyMemoryMark ucmm(this, unsafe_copy, true);
+      // predecrease to exit when there is less than count_per_loop
+      __ sub_32(count, count, count_per_loop);
+
+      if (pld_offset != 0) {
+        pld_offset = (pld_offset < 0) ? -pld_offset : pld_offset;
+
+        prefetch(from, to, 0);
+
+        if (prefetch_before) {
+          // If prefetch is done ahead, final PLDs that overflow the
+          // copied area can be easily avoided. 'count' is predecreased
+          // by the prefetch distance to optimize the inner loop and the
+          // outer loop skips the PLD.
+          __ subs_32(count, count, (bytes_per_loop+pld_offset)/bytes_per_count);
+
+          // skip prefetch for small copies
+          __ b(L_skip_pld, lt);
+        }
+
+        int offset = ArmCopyCacheLineSize;
+        while (offset <= pld_offset) {
+          prefetch(from, to, offset);
+          offset += ArmCopyCacheLineSize;
+        };
       }
 
-      __ subs_32(count, count, count_per_loop);
-
-      if (prefetch_after) {
-        prefetch(from, to, pld_offset, bytes_per_loop);
-      }
-
-      if (split_write) {
-        __ stmia(to, RegisterSet(R3, R6), writeback);
-        __ stmia(to, RegisterSet(R7, R10), writeback);
-      } else {
-        __ stmia(to, RegisterSet(R3, R10), writeback);
-      }
-
-      __ b(L_copy_loop, ge);
-
-      if (prefetch_before) {
-        // the inner loop may end earlier, allowing to skip PLD for the last iterations
-        __ cmn_32(count, (bytes_per_loop + pld_offset)/bytes_per_count);
-        __ b(L_skip_pld, ge);
+      {
+        // 32-bit ARM note: we have tried implementing loop unrolling to skip one
+        // PLD with 64 bytes cache line but the gain was not significant.
+
+        Label L_copy_loop;
+        __ align(OptoLoopAlignment);
+        __ BIND(L_copy_loop);
+
+        if (prefetch_before) {
+          prefetch(from, to, bytes_per_loop + pld_offset);
+          __ BIND(L_skip_pld);
+        }
+
+        if (split_read) {
+          // Split the register set in two sets so that there is less
+          // latency between LDM and STM (R3-R6 available while R7-R10
+          // still loading) and less register locking issue when iterating
+          // on the first LDM.
+          __ ldmia(from, RegisterSet(R3, R6), writeback);
+          __ ldmia(from, RegisterSet(R7, R10), writeback);
+        } else {
+          __ ldmia(from, RegisterSet(R3, R10), writeback);
+        }
+
+        __ subs_32(count, count, count_per_loop);
+
+        if (prefetch_after) {
+          prefetch(from, to, pld_offset, bytes_per_loop);
+        }
+
+        if (split_write) {
+          __ stmia(to, RegisterSet(R3, R6), writeback);
+          __ stmia(to, RegisterSet(R7, R10), writeback);
+        } else {
+          __ stmia(to, RegisterSet(R3, R10), writeback);
+        }
+
+        __ b(L_copy_loop, ge);
+
+        if (prefetch_before) {
+          // the inner loop may end earlier, allowing to skip PLD for the last iterations
+          __ cmn_32(count, (bytes_per_loop + pld_offset)/bytes_per_count);
+          __ b(L_skip_pld, ge);
+        }
       }
-    }
-    BLOCK_COMMENT("Remaining bytes:");
-    // still 0..bytes_per_loop-1 aligned bytes to copy, count already decreased by (at least) bytes_per_loop bytes
-
-    // __ add(count, count, ...); // addition useless for the bit tests
-    assert (pld_offset % bytes_per_loop == 0, "decreasing count by pld_offset before loop must not change tested bits");
-
-    __ tst(count, 16 / bytes_per_count);
-    __ ldmia(from, RegisterSet(R3, R6), writeback, ne); // copy 16 bytes
-    __ stmia(to, RegisterSet(R3, R6), writeback, ne);
-
-    __ tst(count, 8 / bytes_per_count);
-    __ ldmia(from, RegisterSet(R3, R4), writeback, ne); // copy 8 bytes
-    __ stmia(to, RegisterSet(R3, R4), writeback, ne);
-
-    if (bytes_per_count <= 4) {
-      __ tst(count, 4 / bytes_per_count);
-      __ ldr(R3, Address(from, 4, post_indexed), ne); // copy 4 bytes
-      __ str(R3, Address(to, 4, post_indexed), ne);
-    }
-
-    if (bytes_per_count <= 2) {
-      __ tst(count, 2 / bytes_per_count);
-      __ ldrh(R3, Address(from, 2, post_indexed), ne); // copy 2 bytes
-      __ strh(R3, Address(to, 2, post_indexed), ne);
-    }
-
-    if (bytes_per_count == 1) {
-      __ tst(count, 1);
-      __ ldrb(R3, Address(from, 1, post_indexed), ne);
-      __ strb(R3, Address(to, 1, post_indexed), ne);
+      BLOCK_COMMENT("Remaining bytes:");
+      // still 0..bytes_per_loop-1 aligned bytes to copy, count already decreased by (at least) bytes_per_loop bytes
+
+      // __ add(count, count, ...); // addition useless for the bit tests
+      assert (pld_offset % bytes_per_loop == 0, "decreasing count by pld_offset before loop must not change tested bits");
+
+      __ tst(count, 16 / bytes_per_count);
+      __ ldmia(from, RegisterSet(R3, R6), writeback, ne); // copy 16 bytes
+      __ stmia(to, RegisterSet(R3, R6), writeback, ne);
+
+      __ tst(count, 8 / bytes_per_count);
+      __ ldmia(from, RegisterSet(R3, R4), writeback, ne); // copy 8 bytes
+      __ stmia(to, RegisterSet(R3, R4), writeback, ne);
+
+      if (bytes_per_count <= 4) {
+        __ tst(count, 4 / bytes_per_count);
+        __ ldr(R3, Address(from, 4, post_indexed), ne); // copy 4 bytes
+        __ str(R3, Address(to, 4, post_indexed), ne);
+      }
+
+      if (bytes_per_count <= 2) {
+        __ tst(count, 2 / bytes_per_count);
+        __ ldrh(R3, Address(from, 2, post_indexed), ne); // copy 2 bytes
+        __ strh(R3, Address(to, 2, post_indexed), ne);
+      }
+
+      if (bytes_per_count == 1) {
+        __ tst(count, 1);
+        __ ldrb(R3, Address(from, 1, post_indexed), ne);
+        __ strb(R3, Address(to, 1, post_indexed), ne);
+      }
     }
 
     __ pop(RegisterSet(R4,R10));
@@ -1083,7 +1087,7 @@
   // Scratches 'count', R3.
   // ARM R4-R10 are preserved (saved/restored).
   //
-  int generate_backward_aligned_copy_loop(Register end_from, Register end_to, Register count, int bytes_per_count) {
+  int generate_backward_aligned_copy_loop(Register end_from, Register end_to, Register count, int bytes_per_count, bool unsafe_copy = false) {
     assert (end_from == R0 && end_to == R1 && count == R2, "adjust the implementation below");
 
     const int bytes_per_loop = 8*wordSize; // 8 registers are read and written on every loop iteration
@@ -1099,102 +1103,105 @@
 
     __ push(RegisterSet(R4,R10));
 
-    __ sub_32(count, count, count_per_loop);
-
-    const bool prefetch_before = pld_offset < 0;
-    const bool prefetch_after = pld_offset > 0;
-
-    Label L_skip_pld;
-
-    if (pld_offset != 0) {
-      pld_offset = (pld_offset < 0) ? -pld_offset : pld_offset;
-
-      prefetch(end_from, end_to, -wordSize);
-
-      if (prefetch_before) {
-        __ subs_32(count, count, (bytes_per_loop + pld_offset) / bytes_per_count);
-        __ b(L_skip_pld, lt);
+    {
+      // UnsafeCopyMemory page error: continue after ucm
+      UnsafeCopyMemoryMark ucmm(this, unsafe_copy, true);
+      __ sub_32(count, count, count_per_loop);
+
+      const bool prefetch_before = pld_offset < 0;
+      const bool prefetch_after = pld_offset > 0;
+
+      Label L_skip_pld;
+
+      if (pld_offset != 0) {
+        pld_offset = (pld_offset < 0) ? -pld_offset : pld_offset;
+
+        prefetch(end_from, end_to, -wordSize);
+
+        if (prefetch_before) {
+          __ subs_32(count, count, (bytes_per_loop + pld_offset) / bytes_per_count);
+          __ b(L_skip_pld, lt);
+        }
+
+        int offset = ArmCopyCacheLineSize;
+        while (offset <= pld_offset) {
+          prefetch(end_from, end_to, -(wordSize + offset));
+          offset += ArmCopyCacheLineSize;
+        };
       }
 
-      int offset = ArmCopyCacheLineSize;
-      while (offset <= pld_offset) {
-        prefetch(end_from, end_to, -(wordSize + offset));
-        offset += ArmCopyCacheLineSize;
-      };
-    }
-
-    {
-      // 32-bit ARM note: we have tried implementing loop unrolling to skip one
-      // PLD with 64 bytes cache line but the gain was not significant.
-
-      Label L_copy_loop;
-      __ align(OptoLoopAlignment);
-      __ BIND(L_copy_loop);
-
-      if (prefetch_before) {
-        prefetch(end_from, end_to, -(wordSize + bytes_per_loop + pld_offset));
-        __ BIND(L_skip_pld);
+      {
+        // 32-bit ARM note: we have tried implementing loop unrolling to skip one
+        // PLD with 64 bytes cache line but the gain was not significant.
+
+        Label L_copy_loop;
+        __ align(OptoLoopAlignment);
+        __ BIND(L_copy_loop);
+
+        if (prefetch_before) {
+          prefetch(end_from, end_to, -(wordSize + bytes_per_loop + pld_offset));
+          __ BIND(L_skip_pld);
+        }
+
+        if (split_read) {
+          __ ldmdb(end_from, RegisterSet(R7, R10), writeback);
+          __ ldmdb(end_from, RegisterSet(R3, R6), writeback);
+        } else {
+          __ ldmdb(end_from, RegisterSet(R3, R10), writeback);
+        }
+
+        __ subs_32(count, count, count_per_loop);
+
+        if (prefetch_after) {
+          prefetch(end_from, end_to, -(wordSize + pld_offset), -bytes_per_loop);
+        }
+
+        if (split_write) {
+          __ stmdb(end_to, RegisterSet(R7, R10), writeback);
+          __ stmdb(end_to, RegisterSet(R3, R6), writeback);
+        } else {
+          __ stmdb(end_to, RegisterSet(R3, R10), writeback);
+        }
+
+        __ b(L_copy_loop, ge);
+
+        if (prefetch_before) {
+          __ cmn_32(count, (bytes_per_loop + pld_offset)/bytes_per_count);
+          __ b(L_skip_pld, ge);
+        }
       }
-
-      if (split_read) {
-        __ ldmdb(end_from, RegisterSet(R7, R10), writeback);
-        __ ldmdb(end_from, RegisterSet(R3, R6), writeback);
-      } else {
-        __ ldmdb(end_from, RegisterSet(R3, R10), writeback);
-      }
-
-      __ subs_32(count, count, count_per_loop);
-
-      if (prefetch_after) {
-        prefetch(end_from, end_to, -(wordSize + pld_offset), -bytes_per_loop);
+      BLOCK_COMMENT("Remaining bytes:");
+      // still 0..bytes_per_loop-1 aligned bytes to copy, count already decreased by (at least) bytes_per_loop bytes
+
+      // __ add(count, count, ...); // addition useless for the bit tests
+      assert (pld_offset % bytes_per_loop == 0, "decreasing count by pld_offset before loop must not change tested bits");
+
+      __ tst(count, 16 / bytes_per_count);
+      __ ldmdb(end_from, RegisterSet(R3, R6), writeback, ne); // copy 16 bytes
+      __ stmdb(end_to, RegisterSet(R3, R6), writeback, ne);
+
+      __ tst(count, 8 / bytes_per_count);
+      __ ldmdb(end_from, RegisterSet(R3, R4), writeback, ne); // copy 8 bytes
+      __ stmdb(end_to, RegisterSet(R3, R4), writeback, ne);
+
+      if (bytes_per_count <= 4) {
+        __ tst(count, 4 / bytes_per_count);
+        __ ldr(R3, Address(end_from, -4, pre_indexed), ne); // copy 4 bytes
+        __ str(R3, Address(end_to, -4, pre_indexed), ne);
       }
 
-      if (split_write) {
-        __ stmdb(end_to, RegisterSet(R7, R10), writeback);
-        __ stmdb(end_to, RegisterSet(R3, R6), writeback);
-      } else {
-        __ stmdb(end_to, RegisterSet(R3, R10), writeback);
+      if (bytes_per_count <= 2) {
+        __ tst(count, 2 / bytes_per_count);
+        __ ldrh(R3, Address(end_from, -2, pre_indexed), ne); // copy 2 bytes
+        __ strh(R3, Address(end_to, -2, pre_indexed), ne);
       }
 
-      __ b(L_copy_loop, ge);
-
-      if (prefetch_before) {
-        __ cmn_32(count, (bytes_per_loop + pld_offset)/bytes_per_count);
-        __ b(L_skip_pld, ge);
+      if (bytes_per_count == 1) {
+        __ tst(count, 1);
+        __ ldrb(R3, Address(end_from, -1, pre_indexed), ne);
+        __ strb(R3, Address(end_to, -1, pre_indexed), ne);
       }
     }
-    BLOCK_COMMENT("Remaining bytes:");
-    // still 0..bytes_per_loop-1 aligned bytes to copy, count already decreased by (at least) bytes_per_loop bytes
-
-    // __ add(count, count, ...); // addition useless for the bit tests
-    assert (pld_offset % bytes_per_loop == 0, "decreasing count by pld_offset before loop must not change tested bits");
-
-    __ tst(count, 16 / bytes_per_count);
-    __ ldmdb(end_from, RegisterSet(R3, R6), writeback, ne); // copy 16 bytes
-    __ stmdb(end_to, RegisterSet(R3, R6), writeback, ne);
-
-    __ tst(count, 8 / bytes_per_count);
-    __ ldmdb(end_from, RegisterSet(R3, R4), writeback, ne); // copy 8 bytes
-    __ stmdb(end_to, RegisterSet(R3, R4), writeback, ne);
-
-    if (bytes_per_count <= 4) {
-      __ tst(count, 4 / bytes_per_count);
-      __ ldr(R3, Address(end_from, -4, pre_indexed), ne); // copy 4 bytes
-      __ str(R3, Address(end_to, -4, pre_indexed), ne);
-    }
-
-    if (bytes_per_count <= 2) {
-      __ tst(count, 2 / bytes_per_count);
-      __ ldrh(R3, Address(end_from, -2, pre_indexed), ne); // copy 2 bytes
-      __ strh(R3, Address(end_to, -2, pre_indexed), ne);
-    }
-
-    if (bytes_per_count == 1) {
-      __ tst(count, 1);
-      __ ldrb(R3, Address(end_from, -1, pre_indexed), ne);
-      __ strb(R3, Address(end_to, -1, pre_indexed), ne);
-    }
-
     __ pop(RegisterSet(R4,R10));
 
     return count_per_loop;
@@ -1749,17 +1756,21 @@
   //
   // Notes:
   //     shifts 'from' and 'to'
-  void copy_small_array(Register from, Register to, Register count, Register tmp, Register tmp2, int bytes_per_count, bool forward, Label & entry) {
+  void copy_small_array(Register from, Register to, Register count, Register tmp, Register tmp2, int bytes_per_count, bool forward, Label & entry, bool unsafe_copy = false) {
     assert_different_registers(from, to, count, tmp);
 
-    __ align(OptoLoopAlignment);
-    Label L_small_loop;
-    __ BIND(L_small_loop);
-    store_one(tmp, to, bytes_per_count, forward, al, tmp2);
-    __ BIND(entry); // entry point
-    __ subs(count, count, 1);
-    load_one(tmp, from, bytes_per_count, forward, ge, tmp2);
-    __ b(L_small_loop, ge);
+    {
+      // UnsafeCopyMemory page error: continue after ucm
+      UnsafeCopyMemoryMark ucmm(this, unsafe_copy, true);
+      __ align(OptoLoopAlignment);
+      Label L_small_loop;
+      __ BIND(L_small_loop);
+      store_one(tmp, to, bytes_per_count, forward, al, tmp2);
+      __ BIND(entry); // entry point
+      __ subs(count, count, 1);
+      load_one(tmp, from, bytes_per_count, forward, ge, tmp2);
+      __ b(L_small_loop, ge);
+    }
   }
 
   // Aligns 'to' by reading one word from 'from' and writting its part to 'to'.
@@ -1876,7 +1887,7 @@
   //
   // Scratches 'from', 'count', R3 and R12.
   // R4-R10 saved for use.
-  int align_dst_and_generate_shifted_copy_loop(Register from, Register to, Register count, int bytes_per_count, bool forward) {
+  int align_dst_and_generate_shifted_copy_loop(Register from, Register to, Register count, int bytes_per_count, bool forward, bool unsafe_copy = false) {
 
     const Register Rval = forward ? R12 : R3; // as generate_{forward,backward}_shifted_copy_loop expect
 
@@ -1886,60 +1897,64 @@
     // then the remainder of 'to' divided by wordSize is one of elements of {seq}.
 
     __ push(RegisterSet(R4,R10));
-    load_one(Rval, from, wordSize, forward);
-
-    switch (bytes_per_count) {
-      case 2:
-        min_copy = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 2, bytes_per_count, forward);
-        break;
-      case 1:
-      {
-        Label L1, L2, L3;
-        int min_copy1, min_copy2, min_copy3;
-
-        Label L_loop_finished;
-
-        if (forward) {
-            __ tbz(to, 0, L2);
-            __ tbz(to, 1, L1);
-
-            __ BIND(L3);
-            min_copy3 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 3, bytes_per_count, forward);
-            __ b(L_loop_finished);
-
-            __ BIND(L1);
-            min_copy1 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 1, bytes_per_count, forward);
-            __ b(L_loop_finished);
-
-            __ BIND(L2);
-            min_copy2 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 2, bytes_per_count, forward);
-        } else {
-            __ tbz(to, 0, L2);
-            __ tbnz(to, 1, L3);
-
-            __ BIND(L1);
-            min_copy1 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 1, bytes_per_count, forward);
-            __ b(L_loop_finished);
-
-             __ BIND(L3);
-            min_copy3 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 3, bytes_per_count, forward);
-            __ b(L_loop_finished);
-
-           __ BIND(L2);
-            min_copy2 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 2, bytes_per_count, forward);
+
+    {
+      // UnsafeCopyMemory page error: continue after ucm
+      UnsafeCopyMemoryMark ucmm(this, unsafe_copy, true);
+      load_one(Rval, from, wordSize, forward);
+
+      switch (bytes_per_count) {
+        case 2:
+          min_copy = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 2, bytes_per_count, forward);
+          break;
+        case 1:
+        {
+          Label L1, L2, L3;
+          int min_copy1, min_copy2, min_copy3;
+
+          Label L_loop_finished;
+
+          if (forward) {
+              __ tbz(to, 0, L2);
+              __ tbz(to, 1, L1);
+
+              __ BIND(L3);
+              min_copy3 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 3, bytes_per_count, forward);
+              __ b(L_loop_finished);
+
+              __ BIND(L1);
+              min_copy1 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 1, bytes_per_count, forward);
+              __ b(L_loop_finished);
+
+              __ BIND(L2);
+              min_copy2 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 2, bytes_per_count, forward);
+          } else {
+              __ tbz(to, 0, L2);
+              __ tbnz(to, 1, L3);
+
+              __ BIND(L1);
+              min_copy1 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 1, bytes_per_count, forward);
+              __ b(L_loop_finished);
+
+               __ BIND(L3);
+              min_copy3 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 3, bytes_per_count, forward);
+              __ b(L_loop_finished);
+
+             __ BIND(L2);
+              min_copy2 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 2, bytes_per_count, forward);
+          }
+
+          min_copy = MAX2(MAX2(min_copy1, min_copy2), min_copy3);
+
+          __ BIND(L_loop_finished);
+
+          break;
         }
-
-        min_copy = MAX2(MAX2(min_copy1, min_copy2), min_copy3);
-
-        __ BIND(L_loop_finished);
-
-        break;
+        default:
+          ShouldNotReachHere();
+          break;
       }
-      default:
-        ShouldNotReachHere();
-        break;
     }
-
     __ pop(RegisterSet(R4,R10));
 
     return min_copy;
@@ -1963,6 +1978,13 @@
   }
 #endif // !PRODUCT
 
+  address generate_unsafecopy_common_error_exit() {
+    address start_pc = __ pc();
+      __ mov(R0, 0);
+      __ ret();
+    return start_pc;
+  }
+
   //
   //  Generate stub for primitive array copy.  If "aligned" is true, the
   //  "from" and "to" addresses are assumed to be heapword aligned.
@@ -2033,8 +2055,13 @@
         from_is_aligned = true;
     }
 
-    int count_required_to_align = from_is_aligned ? 0 : align_src(from, to, count, tmp1, bytes_per_count, forward);
-    assert (small_copy_limit >= count_required_to_align, "alignment could exhaust count");
+    int count_required_to_align = 0;
+    {
+      // UnsafeCopyMemoryMark page error: continue at UnsafeCopyMemory common_error_exit
+      UnsafeCopyMemoryMark ucmm(this, !aligned, false);
+      count_required_to_align = from_is_aligned ? 0 : align_src(from, to, count, tmp1, bytes_per_count, forward);
+      assert (small_copy_limit >= count_required_to_align, "alignment could exhaust count");
+    }
 
     // now 'from' is aligned
 
@@ -2064,9 +2091,9 @@
 
     int min_copy;
     if (forward) {
-      min_copy = generate_forward_aligned_copy_loop (from, to, count, bytes_per_count);
+      min_copy = generate_forward_aligned_copy_loop(from, to, count, bytes_per_count, !aligned /*add UnsafeCopyMemory entry*/);
     } else {
-      min_copy = generate_backward_aligned_copy_loop(from, to, count, bytes_per_count);
+      min_copy = generate_backward_aligned_copy_loop(from, to, count, bytes_per_count, !aligned /*add UnsafeCopyMemory entry*/);
     }
     assert(small_copy_limit >= count_required_to_align + min_copy, "first loop might exhaust count");
 
@@ -2077,7 +2104,7 @@
     __ ret();
 
     {
-      copy_small_array(from, to, count, tmp1, tmp2, bytes_per_count, forward, L_small_array /* entry */);
+      copy_small_array(from, to, count, tmp1, tmp2, bytes_per_count, forward, L_small_array /* entry */, !aligned /*add UnsafeCopyMemory entry*/);
 
       if (status) {
         __ mov(R0, 0); // OK
@@ -2088,7 +2115,7 @@
 
     if (! to_is_aligned) {
       __ BIND(L_unaligned_dst);
-      int min_copy_shifted = align_dst_and_generate_shifted_copy_loop(from, to, count, bytes_per_count, forward);
+      int min_copy_shifted = align_dst_and_generate_shifted_copy_loop(from, to, count, bytes_per_count, forward, !aligned /*add UnsafeCopyMemory entry*/);
       assert (small_copy_limit >= count_required_to_align + min_copy_shifted, "first loop might exhaust count");
 
       if (status) {
@@ -2873,6 +2900,9 @@
     status = true; // generate a status compatible with C1 calls
 #endif
 
+    address ucm_common_error_exit       =  generate_unsafecopy_common_error_exit();
+    UnsafeCopyMemory::set_common_exit_stub_pc(ucm_common_error_exit);
+
     // these need always status in case they are called from generic_arraycopy
     StubRoutines::_jbyte_disjoint_arraycopy  = generate_primitive_copy(false, "jbyte_disjoint_arraycopy",  true, 1, true);
     StubRoutines::_jshort_disjoint_arraycopy = generate_primitive_copy(false, "jshort_disjoint_arraycopy", true, 2, true);
@@ -3055,6 +3085,10 @@
   }
 }; // end class declaration
 
+#define UCM_TABLE_MAX_ENTRIES 32
 void StubGenerator_generate(CodeBuffer* code, bool all) {
+  if (UnsafeCopyMemory::_table == NULL) {
+    UnsafeCopyMemory::create_table(UCM_TABLE_MAX_ENTRIES);
+  }
   StubGenerator g(code, all);
 }
--- a/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp	Mon Jul 01 14:57:02 2019 -0700
@@ -952,6 +952,20 @@
     // need to copy backwards
   }
 
+  // This is common errorexit stub for UnsafeCopyMemory.
+  address generate_unsafecopy_common_error_exit() {
+    address start_pc = __ pc();
+    Register tmp1 = R6_ARG4;
+    // probably copy stub would have changed value reset it.
+    if (VM_Version::has_mfdscr()) {
+      __ load_const_optimized(tmp1, VM_Version::_dscr_val);
+      __ mtdscr(tmp1);
+    }
+    __ li(R3_RET, 0); // return 0
+    __ blr();
+    return start_pc;
+  }
+
   // The guideline in the implementations of generate_disjoint_xxx_copy
   // (xxx=byte,short,int,long,oop) is to copy as many elements as possible with
   // single instructions, but to avoid alignment interrupts (see subsequent
@@ -989,150 +1003,154 @@
     VectorSRegister tmp_vsr2  = VSR2;
 
     Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8, l_9, l_10;
-
-    // Don't try anything fancy if arrays don't have many elements.
-    __ li(tmp3, 0);
-    __ cmpwi(CCR0, R5_ARG3, 17);
-    __ ble(CCR0, l_6); // copy 4 at a time
-
-    if (!aligned) {
-      __ xorr(tmp1, R3_ARG1, R4_ARG2);
-      __ andi_(tmp1, tmp1, 3);
-      __ bne(CCR0, l_6); // If arrays don't have the same alignment mod 4, do 4 element copy.
-
-      // Copy elements if necessary to align to 4 bytes.
-      __ neg(tmp1, R3_ARG1); // Compute distance to alignment boundary.
-      __ andi_(tmp1, tmp1, 3);
-      __ beq(CCR0, l_2);
-
-      __ subf(R5_ARG3, tmp1, R5_ARG3);
-      __ bind(l_9);
-      __ lbz(tmp2, 0, R3_ARG1);
-      __ addic_(tmp1, tmp1, -1);
-      __ stb(tmp2, 0, R4_ARG2);
-      __ addi(R3_ARG1, R3_ARG1, 1);
-      __ addi(R4_ARG2, R4_ARG2, 1);
-      __ bne(CCR0, l_9);
-
-      __ bind(l_2);
-    }
-
-    // copy 8 elements at a time
-    __ xorr(tmp2, R3_ARG1, R4_ARG2); // skip if src & dest have differing alignment mod 8
-    __ andi_(tmp1, tmp2, 7);
-    __ bne(CCR0, l_7); // not same alignment -> to or from is aligned -> copy 8
-
-    // copy a 2-element word if necessary to align to 8 bytes
-    __ andi_(R0, R3_ARG1, 7);
-    __ beq(CCR0, l_7);
-
-    __ lwzx(tmp2, R3_ARG1, tmp3);
-    __ addi(R5_ARG3, R5_ARG3, -4);
-    __ stwx(tmp2, R4_ARG2, tmp3);
-    { // FasterArrayCopy
-      __ addi(R3_ARG1, R3_ARG1, 4);
-      __ addi(R4_ARG2, R4_ARG2, 4);
-    }
-    __ bind(l_7);
-
-    { // FasterArrayCopy
-      __ cmpwi(CCR0, R5_ARG3, 31);
-      __ ble(CCR0, l_6); // copy 2 at a time if less than 32 elements remain
-
-      __ srdi(tmp1, R5_ARG3, 5);
-      __ andi_(R5_ARG3, R5_ARG3, 31);
-      __ mtctr(tmp1);
-
-     if (!VM_Version::has_vsx()) {
-
-      __ bind(l_8);
-      // Use unrolled version for mass copying (copy 32 elements a time)
-      // Load feeding store gets zero latency on Power6, however not on Power5.
-      // Therefore, the following sequence is made for the good of both.
-      __ ld(tmp1, 0, R3_ARG1);
-      __ ld(tmp2, 8, R3_ARG1);
-      __ ld(tmp3, 16, R3_ARG1);
-      __ ld(tmp4, 24, R3_ARG1);
-      __ std(tmp1, 0, R4_ARG2);
-      __ std(tmp2, 8, R4_ARG2);
-      __ std(tmp3, 16, R4_ARG2);
-      __ std(tmp4, 24, R4_ARG2);
-      __ addi(R3_ARG1, R3_ARG1, 32);
-      __ addi(R4_ARG2, R4_ARG2, 32);
-      __ bdnz(l_8);
-
-    } else { // Processor supports VSX, so use it to mass copy.
-
-      // Prefetch the data into the L2 cache.
-      __ dcbt(R3_ARG1, 0);
-
-      // If supported set DSCR pre-fetch to deepest.
-      if (VM_Version::has_mfdscr()) {
-        __ load_const_optimized(tmp2, VM_Version::_dscr_val | 7);
-        __ mtdscr(tmp2);
+    {
+      // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit
+      UnsafeCopyMemoryMark ucmm(this, !aligned, false);
+
+      // Don't try anything fancy if arrays don't have many elements.
+      __ li(tmp3, 0);
+      __ cmpwi(CCR0, R5_ARG3, 17);
+      __ ble(CCR0, l_6); // copy 4 at a time
+
+      if (!aligned) {
+        __ xorr(tmp1, R3_ARG1, R4_ARG2);
+        __ andi_(tmp1, tmp1, 3);
+        __ bne(CCR0, l_6); // If arrays don't have the same alignment mod 4, do 4 element copy.
+
+        // Copy elements if necessary to align to 4 bytes.
+        __ neg(tmp1, R3_ARG1); // Compute distance to alignment boundary.
+        __ andi_(tmp1, tmp1, 3);
+        __ beq(CCR0, l_2);
+
+        __ subf(R5_ARG3, tmp1, R5_ARG3);
+        __ bind(l_9);
+        __ lbz(tmp2, 0, R3_ARG1);
+        __ addic_(tmp1, tmp1, -1);
+        __ stb(tmp2, 0, R4_ARG2);
+        __ addi(R3_ARG1, R3_ARG1, 1);
+        __ addi(R4_ARG2, R4_ARG2, 1);
+        __ bne(CCR0, l_9);
+
+        __ bind(l_2);
+      }
+
+      // copy 8 elements at a time
+      __ xorr(tmp2, R3_ARG1, R4_ARG2); // skip if src & dest have differing alignment mod 8
+      __ andi_(tmp1, tmp2, 7);
+      __ bne(CCR0, l_7); // not same alignment -> to or from is aligned -> copy 8
+
+      // copy a 2-element word if necessary to align to 8 bytes
+      __ andi_(R0, R3_ARG1, 7);
+      __ beq(CCR0, l_7);
+
+      __ lwzx(tmp2, R3_ARG1, tmp3);
+      __ addi(R5_ARG3, R5_ARG3, -4);
+      __ stwx(tmp2, R4_ARG2, tmp3);
+      { // FasterArrayCopy
+        __ addi(R3_ARG1, R3_ARG1, 4);
+        __ addi(R4_ARG2, R4_ARG2, 4);
       }
-
-      __ li(tmp1, 16);
-
-      // Backbranch target aligned to 32-byte. Not 16-byte align as
-      // loop contains < 8 instructions that fit inside a single
-      // i-cache sector.
-      __ align(32);
-
-      __ bind(l_10);
-      // Use loop with VSX load/store instructions to
-      // copy 32 elements a time.
-      __ lxvd2x(tmp_vsr1, R3_ARG1);        // Load src
-      __ stxvd2x(tmp_vsr1, R4_ARG2);       // Store to dst
-      __ lxvd2x(tmp_vsr2, tmp1, R3_ARG1);  // Load src + 16
-      __ stxvd2x(tmp_vsr2, tmp1, R4_ARG2); // Store to dst + 16
-      __ addi(R3_ARG1, R3_ARG1, 32);       // Update src+=32
-      __ addi(R4_ARG2, R4_ARG2, 32);       // Update dsc+=32
-      __ bdnz(l_10);                       // Dec CTR and loop if not zero.
-
-      // Restore DSCR pre-fetch value.
-      if (VM_Version::has_mfdscr()) {
-        __ load_const_optimized(tmp2, VM_Version::_dscr_val);
-        __ mtdscr(tmp2);
+      __ bind(l_7);
+
+      { // FasterArrayCopy
+        __ cmpwi(CCR0, R5_ARG3, 31);
+        __ ble(CCR0, l_6); // copy 2 at a time if less than 32 elements remain
+
+        __ srdi(tmp1, R5_ARG3, 5);
+        __ andi_(R5_ARG3, R5_ARG3, 31);
+        __ mtctr(tmp1);
+
+       if (!VM_Version::has_vsx()) {
+
+        __ bind(l_8);
+        // Use unrolled version for mass copying (copy 32 elements a time)
+        // Load feeding store gets zero latency on Power6, however not on Power5.
+        // Therefore, the following sequence is made for the good of both.
+        __ ld(tmp1, 0, R3_ARG1);
+        __ ld(tmp2, 8, R3_ARG1);
+        __ ld(tmp3, 16, R3_ARG1);
+        __ ld(tmp4, 24, R3_ARG1);
+        __ std(tmp1, 0, R4_ARG2);
+        __ std(tmp2, 8, R4_ARG2);
+        __ std(tmp3, 16, R4_ARG2);
+        __ std(tmp4, 24, R4_ARG2);
+        __ addi(R3_ARG1, R3_ARG1, 32);
+        __ addi(R4_ARG2, R4_ARG2, 32);
+        __ bdnz(l_8);
+
+      } else { // Processor supports VSX, so use it to mass copy.
+
+        // Prefetch the data into the L2 cache.
+        __ dcbt(R3_ARG1, 0);
+
+        // If supported set DSCR pre-fetch to deepest.
+        if (VM_Version::has_mfdscr()) {
+          __ load_const_optimized(tmp2, VM_Version::_dscr_val | 7);
+          __ mtdscr(tmp2);
+        }
+
+        __ li(tmp1, 16);
+
+        // Backbranch target aligned to 32-byte. Not 16-byte align as
+        // loop contains < 8 instructions that fit inside a single
+        // i-cache sector.
+        __ align(32);
+
+        __ bind(l_10);
+        // Use loop with VSX load/store instructions to
+        // copy 32 elements a time.
+        __ lxvd2x(tmp_vsr1, R3_ARG1);        // Load src
+        __ stxvd2x(tmp_vsr1, R4_ARG2);       // Store to dst
+        __ lxvd2x(tmp_vsr2, tmp1, R3_ARG1);  // Load src + 16
+        __ stxvd2x(tmp_vsr2, tmp1, R4_ARG2); // Store to dst + 16
+        __ addi(R3_ARG1, R3_ARG1, 32);       // Update src+=32
+        __ addi(R4_ARG2, R4_ARG2, 32);       // Update dsc+=32
+        __ bdnz(l_10);                       // Dec CTR and loop if not zero.
+
+        // Restore DSCR pre-fetch value.
+        if (VM_Version::has_mfdscr()) {
+          __ load_const_optimized(tmp2, VM_Version::_dscr_val);
+          __ mtdscr(tmp2);
+        }
+
+      } // VSX
+     } // FasterArrayCopy
+
+      __ bind(l_6);
+
+      // copy 4 elements at a time
+      __ cmpwi(CCR0, R5_ARG3, 4);
+      __ blt(CCR0, l_1);
+      __ srdi(tmp1, R5_ARG3, 2);
+      __ mtctr(tmp1); // is > 0
+      __ andi_(R5_ARG3, R5_ARG3, 3);
+
+      { // FasterArrayCopy
+        __ addi(R3_ARG1, R3_ARG1, -4);
+        __ addi(R4_ARG2, R4_ARG2, -4);
+        __ bind(l_3);
+        __ lwzu(tmp2, 4, R3_ARG1);
+        __ stwu(tmp2, 4, R4_ARG2);
+        __ bdnz(l_3);
+        __ addi(R3_ARG1, R3_ARG1, 4);
+        __ addi(R4_ARG2, R4_ARG2, 4);
       }
 
-    } // VSX
-   } // FasterArrayCopy
-
-    __ bind(l_6);
-
-    // copy 4 elements at a time
-    __ cmpwi(CCR0, R5_ARG3, 4);
-    __ blt(CCR0, l_1);
-    __ srdi(tmp1, R5_ARG3, 2);
-    __ mtctr(tmp1); // is > 0
-    __ andi_(R5_ARG3, R5_ARG3, 3);
-
-    { // FasterArrayCopy
-      __ addi(R3_ARG1, R3_ARG1, -4);
-      __ addi(R4_ARG2, R4_ARG2, -4);
-      __ bind(l_3);
-      __ lwzu(tmp2, 4, R3_ARG1);
-      __ stwu(tmp2, 4, R4_ARG2);
-      __ bdnz(l_3);
-      __ addi(R3_ARG1, R3_ARG1, 4);
-      __ addi(R4_ARG2, R4_ARG2, 4);
-    }
-
-    // do single element copy
-    __ bind(l_1);
-    __ cmpwi(CCR0, R5_ARG3, 0);
-    __ beq(CCR0, l_4);
-
-    { // FasterArrayCopy
-      __ mtctr(R5_ARG3);
-      __ addi(R3_ARG1, R3_ARG1, -1);
-      __ addi(R4_ARG2, R4_ARG2, -1);
-
-      __ bind(l_5);
-      __ lbzu(tmp2, 1, R3_ARG1);
-      __ stbu(tmp2, 1, R4_ARG2);
-      __ bdnz(l_5);
+      // do single element copy
+      __ bind(l_1);
+      __ cmpwi(CCR0, R5_ARG3, 0);
+      __ beq(CCR0, l_4);
+
+      { // FasterArrayCopy
+        __ mtctr(R5_ARG3);
+        __ addi(R3_ARG1, R3_ARG1, -1);
+        __ addi(R4_ARG2, R4_ARG2, -1);
+
+        __ bind(l_5);
+        __ lbzu(tmp2, 1, R3_ARG1);
+        __ stbu(tmp2, 1, R4_ARG2);
+        __ bdnz(l_5);
+      }
     }
 
     __ bind(l_4);
@@ -1167,15 +1185,17 @@
     // Do reverse copy. We assume the case of actual overlap is rare enough
     // that we don't have to optimize it.
     Label l_1, l_2;
-
-    __ b(l_2);
-    __ bind(l_1);
-    __ stbx(tmp1, R4_ARG2, R5_ARG3);
-    __ bind(l_2);
-    __ addic_(R5_ARG3, R5_ARG3, -1);
-    __ lbzx(tmp1, R3_ARG1, R5_ARG3);
-    __ bge(CCR0, l_1);
-
+    {
+      // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit
+      UnsafeCopyMemoryMark ucmm(this, !aligned, false);
+      __ b(l_2);
+      __ bind(l_1);
+      __ stbx(tmp1, R4_ARG2, R5_ARG3);
+      __ bind(l_2);
+      __ addic_(R5_ARG3, R5_ARG3, -1);
+      __ lbzx(tmp1, R3_ARG1, R5_ARG3);
+      __ bge(CCR0, l_1);
+    }
     __ li(R3_RET, 0); // return 0
     __ blr();
 
@@ -1252,155 +1272,159 @@
     assert_positive_int(R5_ARG3);
 
     Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8, l_9;
-
-    // don't try anything fancy if arrays don't have many elements
-    __ li(tmp3, 0);
-    __ cmpwi(CCR0, R5_ARG3, 9);
-    __ ble(CCR0, l_6); // copy 2 at a time
-
-    if (!aligned) {
-      __ xorr(tmp1, R3_ARG1, R4_ARG2);
-      __ andi_(tmp1, tmp1, 3);
-      __ bne(CCR0, l_6); // if arrays don't have the same alignment mod 4, do 2 element copy
-
-      // At this point it is guaranteed that both, from and to have the same alignment mod 4.
-
-      // Copy 1 element if necessary to align to 4 bytes.
-      __ andi_(tmp1, R3_ARG1, 3);
-      __ beq(CCR0, l_2);
-
-      __ lhz(tmp2, 0, R3_ARG1);
-      __ addi(R3_ARG1, R3_ARG1, 2);
-      __ sth(tmp2, 0, R4_ARG2);
-      __ addi(R4_ARG2, R4_ARG2, 2);
-      __ addi(R5_ARG3, R5_ARG3, -1);
-      __ bind(l_2);
-
-      // At this point the positions of both, from and to, are at least 4 byte aligned.
-
-      // Copy 4 elements at a time.
-      // Align to 8 bytes, but only if both, from and to, have same alignment mod 8.
-      __ xorr(tmp2, R3_ARG1, R4_ARG2);
-      __ andi_(tmp1, tmp2, 7);
-      __ bne(CCR0, l_7); // not same alignment mod 8 -> copy 4, either from or to will be unaligned
-
-      // Copy a 2-element word if necessary to align to 8 bytes.
-      __ andi_(R0, R3_ARG1, 7);
-      __ beq(CCR0, l_7);
-
-      __ lwzx(tmp2, R3_ARG1, tmp3);
-      __ addi(R5_ARG3, R5_ARG3, -2);
-      __ stwx(tmp2, R4_ARG2, tmp3);
+    {
+      // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit
+      UnsafeCopyMemoryMark ucmm(this, !aligned, false);
+      // don't try anything fancy if arrays don't have many elements
+      __ li(tmp3, 0);
+      __ cmpwi(CCR0, R5_ARG3, 9);
+      __ ble(CCR0, l_6); // copy 2 at a time
+
+      if (!aligned) {
+        __ xorr(tmp1, R3_ARG1, R4_ARG2);
+        __ andi_(tmp1, tmp1, 3);
+        __ bne(CCR0, l_6); // if arrays don't have the same alignment mod 4, do 2 element copy
+
+        // At this point it is guaranteed that both, from and to have the same alignment mod 4.
+
+        // Copy 1 element if necessary to align to 4 bytes.
+        __ andi_(tmp1, R3_ARG1, 3);
+        __ beq(CCR0, l_2);
+
+        __ lhz(tmp2, 0, R3_ARG1);
+        __ addi(R3_ARG1, R3_ARG1, 2);
+        __ sth(tmp2, 0, R4_ARG2);
+        __ addi(R4_ARG2, R4_ARG2, 2);
+        __ addi(R5_ARG3, R5_ARG3, -1);
+        __ bind(l_2);
+
+        // At this point the positions of both, from and to, are at least 4 byte aligned.
+
+        // Copy 4 elements at a time.
+        // Align to 8 bytes, but only if both, from and to, have same alignment mod 8.
+        __ xorr(tmp2, R3_ARG1, R4_ARG2);
+        __ andi_(tmp1, tmp2, 7);
+        __ bne(CCR0, l_7); // not same alignment mod 8 -> copy 4, either from or to will be unaligned
+
+        // Copy a 2-element word if necessary to align to 8 bytes.
+        __ andi_(R0, R3_ARG1, 7);
+        __ beq(CCR0, l_7);
+
+        __ lwzx(tmp2, R3_ARG1, tmp3);
+        __ addi(R5_ARG3, R5_ARG3, -2);
+        __ stwx(tmp2, R4_ARG2, tmp3);
+        { // FasterArrayCopy
+          __ addi(R3_ARG1, R3_ARG1, 4);
+          __ addi(R4_ARG2, R4_ARG2, 4);
+        }
+      }
+
+      __ bind(l_7);
+
+      // Copy 4 elements at a time; either the loads or the stores can
+      // be unaligned if aligned == false.
+
       { // FasterArrayCopy
+        __ cmpwi(CCR0, R5_ARG3, 15);
+        __ ble(CCR0, l_6); // copy 2 at a time if less than 16 elements remain
+
+        __ srdi(tmp1, R5_ARG3, 4);
+        __ andi_(R5_ARG3, R5_ARG3, 15);
+        __ mtctr(tmp1);
+
+        if (!VM_Version::has_vsx()) {
+
+          __ bind(l_8);
+          // Use unrolled version for mass copying (copy 16 elements a time).
+          // Load feeding store gets zero latency on Power6, however not on Power5.
+          // Therefore, the following sequence is made for the good of both.
+          __ ld(tmp1, 0, R3_ARG1);
+          __ ld(tmp2, 8, R3_ARG1);
+          __ ld(tmp3, 16, R3_ARG1);
+          __ ld(tmp4, 24, R3_ARG1);
+          __ std(tmp1, 0, R4_ARG2);
+          __ std(tmp2, 8, R4_ARG2);
+          __ std(tmp3, 16, R4_ARG2);
+          __ std(tmp4, 24, R4_ARG2);
+          __ addi(R3_ARG1, R3_ARG1, 32);
+          __ addi(R4_ARG2, R4_ARG2, 32);
+          __ bdnz(l_8);
+
+        } else { // Processor supports VSX, so use it to mass copy.
+
+          // Prefetch src data into L2 cache.
+          __ dcbt(R3_ARG1, 0);
+
+          // If supported set DSCR pre-fetch to deepest.
+          if (VM_Version::has_mfdscr()) {
+            __ load_const_optimized(tmp2, VM_Version::_dscr_val | 7);
+            __ mtdscr(tmp2);
+          }
+          __ li(tmp1, 16);
+
+          // Backbranch target aligned to 32-byte. It's not aligned 16-byte
+          // as loop contains < 8 instructions that fit inside a single
+          // i-cache sector.
+          __ align(32);
+
+          __ bind(l_9);
+          // Use loop with VSX load/store instructions to
+          // copy 16 elements a time.
+          __ lxvd2x(tmp_vsr1, R3_ARG1);        // Load from src.
+          __ stxvd2x(tmp_vsr1, R4_ARG2);       // Store to dst.
+          __ lxvd2x(tmp_vsr2, R3_ARG1, tmp1);  // Load from src + 16.
+          __ stxvd2x(tmp_vsr2, R4_ARG2, tmp1); // Store to dst + 16.
+          __ addi(R3_ARG1, R3_ARG1, 32);       // Update src+=32.
+          __ addi(R4_ARG2, R4_ARG2, 32);       // Update dsc+=32.
+          __ bdnz(l_9);                        // Dec CTR and loop if not zero.
+
+          // Restore DSCR pre-fetch value.
+          if (VM_Version::has_mfdscr()) {
+            __ load_const_optimized(tmp2, VM_Version::_dscr_val);
+            __ mtdscr(tmp2);
+          }
+
+        }
+      } // FasterArrayCopy
+      __ bind(l_6);
+
+      // copy 2 elements at a time
+      { // FasterArrayCopy
+        __ cmpwi(CCR0, R5_ARG3, 2);
+        __ blt(CCR0, l_1);
+        __ srdi(tmp1, R5_ARG3, 1);
+        __ andi_(R5_ARG3, R5_ARG3, 1);
+
+        __ addi(R3_ARG1, R3_ARG1, -4);
+        __ addi(R4_ARG2, R4_ARG2, -4);
+        __ mtctr(tmp1);
+
+        __ bind(l_3);
+        __ lwzu(tmp2, 4, R3_ARG1);
+        __ stwu(tmp2, 4, R4_ARG2);
+        __ bdnz(l_3);
+
         __ addi(R3_ARG1, R3_ARG1, 4);
         __ addi(R4_ARG2, R4_ARG2, 4);
       }
+
+      // do single element copy
+      __ bind(l_1);
+      __ cmpwi(CCR0, R5_ARG3, 0);
+      __ beq(CCR0, l_4);
+
+      { // FasterArrayCopy
+        __ mtctr(R5_ARG3);
+        __ addi(R3_ARG1, R3_ARG1, -2);
+        __ addi(R4_ARG2, R4_ARG2, -2);
+
+        __ bind(l_5);
+        __ lhzu(tmp2, 2, R3_ARG1);
+        __ sthu(tmp2, 2, R4_ARG2);
+        __ bdnz(l_5);
+      }
     }
 
-    __ bind(l_7);
-
-    // Copy 4 elements at a time; either the loads or the stores can
-    // be unaligned if aligned == false.
-
-    { // FasterArrayCopy
-      __ cmpwi(CCR0, R5_ARG3, 15);
-      __ ble(CCR0, l_6); // copy 2 at a time if less than 16 elements remain
-
-      __ srdi(tmp1, R5_ARG3, 4);
-      __ andi_(R5_ARG3, R5_ARG3, 15);
-      __ mtctr(tmp1);
-
-      if (!VM_Version::has_vsx()) {
-
-        __ bind(l_8);
-        // Use unrolled version for mass copying (copy 16 elements a time).
-        // Load feeding store gets zero latency on Power6, however not on Power5.
-        // Therefore, the following sequence is made for the good of both.
-        __ ld(tmp1, 0, R3_ARG1);
-        __ ld(tmp2, 8, R3_ARG1);
-        __ ld(tmp3, 16, R3_ARG1);
-        __ ld(tmp4, 24, R3_ARG1);
-        __ std(tmp1, 0, R4_ARG2);
-        __ std(tmp2, 8, R4_ARG2);
-        __ std(tmp3, 16, R4_ARG2);
-        __ std(tmp4, 24, R4_ARG2);
-        __ addi(R3_ARG1, R3_ARG1, 32);
-        __ addi(R4_ARG2, R4_ARG2, 32);
-        __ bdnz(l_8);
-
-      } else { // Processor supports VSX, so use it to mass copy.
-
-        // Prefetch src data into L2 cache.
-        __ dcbt(R3_ARG1, 0);
-
-        // If supported set DSCR pre-fetch to deepest.
-        if (VM_Version::has_mfdscr()) {
-          __ load_const_optimized(tmp2, VM_Version::_dscr_val | 7);
-          __ mtdscr(tmp2);
-        }
-        __ li(tmp1, 16);
-
-        // Backbranch target aligned to 32-byte. It's not aligned 16-byte
-        // as loop contains < 8 instructions that fit inside a single
-        // i-cache sector.
-        __ align(32);
-
-        __ bind(l_9);
-        // Use loop with VSX load/store instructions to
-        // copy 16 elements a time.
-        __ lxvd2x(tmp_vsr1, R3_ARG1);        // Load from src.
-        __ stxvd2x(tmp_vsr1, R4_ARG2);       // Store to dst.
-        __ lxvd2x(tmp_vsr2, R3_ARG1, tmp1);  // Load from src + 16.
-        __ stxvd2x(tmp_vsr2, R4_ARG2, tmp1); // Store to dst + 16.
-        __ addi(R3_ARG1, R3_ARG1, 32);       // Update src+=32.
-        __ addi(R4_ARG2, R4_ARG2, 32);       // Update dsc+=32.
-        __ bdnz(l_9);                        // Dec CTR and loop if not zero.
-
-        // Restore DSCR pre-fetch value.
-        if (VM_Version::has_mfdscr()) {
-          __ load_const_optimized(tmp2, VM_Version::_dscr_val);
-          __ mtdscr(tmp2);
-        }
-
-      }
-    } // FasterArrayCopy
-    __ bind(l_6);
-
-    // copy 2 elements at a time
-    { // FasterArrayCopy
-      __ cmpwi(CCR0, R5_ARG3, 2);
-      __ blt(CCR0, l_1);
-      __ srdi(tmp1, R5_ARG3, 1);
-      __ andi_(R5_ARG3, R5_ARG3, 1);
-
-      __ addi(R3_ARG1, R3_ARG1, -4);
-      __ addi(R4_ARG2, R4_ARG2, -4);
-      __ mtctr(tmp1);
-
-      __ bind(l_3);
-      __ lwzu(tmp2, 4, R3_ARG1);
-      __ stwu(tmp2, 4, R4_ARG2);
-      __ bdnz(l_3);
-
-      __ addi(R3_ARG1, R3_ARG1, 4);
-      __ addi(R4_ARG2, R4_ARG2, 4);
-    }
-
-    // do single element copy
-    __ bind(l_1);
-    __ cmpwi(CCR0, R5_ARG3, 0);
-    __ beq(CCR0, l_4);
-
-    { // FasterArrayCopy
-      __ mtctr(R5_ARG3);
-      __ addi(R3_ARG1, R3_ARG1, -2);
-      __ addi(R4_ARG2, R4_ARG2, -2);
-
-      __ bind(l_5);
-      __ lhzu(tmp2, 2, R3_ARG1);
-      __ sthu(tmp2, 2, R4_ARG2);
-      __ bdnz(l_5);
-    }
     __ bind(l_4);
     __ li(R3_RET, 0); // return 0
     __ blr();
@@ -1432,15 +1456,18 @@
     array_overlap_test(nooverlap_target, 1);
 
     Label l_1, l_2;
-    __ sldi(tmp1, R5_ARG3, 1);
-    __ b(l_2);
-    __ bind(l_1);
-    __ sthx(tmp2, R4_ARG2, tmp1);
-    __ bind(l_2);
-    __ addic_(tmp1, tmp1, -2);
-    __ lhzx(tmp2, R3_ARG1, tmp1);
-    __ bge(CCR0, l_1);
-
+    {
+      // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit
+      UnsafeCopyMemoryMark ucmm(this, !aligned, false);
+      __ sldi(tmp1, R5_ARG3, 1);
+      __ b(l_2);
+      __ bind(l_1);
+      __ sthx(tmp2, R4_ARG2, tmp1);
+      __ bind(l_2);
+      __ addic_(tmp1, tmp1, -2);
+      __ lhzx(tmp2, R3_ARG1, tmp1);
+      __ bge(CCR0, l_1);
+    }
     __ li(R3_RET, 0); // return 0
     __ blr();
 
@@ -1588,7 +1615,11 @@
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ function_entry();
     assert_positive_int(R5_ARG3);
-    generate_disjoint_int_copy_core(aligned);
+    {
+      // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit
+      UnsafeCopyMemoryMark ucmm(this, !aligned, false);
+      generate_disjoint_int_copy_core(aligned);
+    }
     __ li(R3_RET, 0); // return 0
     __ blr();
     return start;
@@ -1736,8 +1767,11 @@
       STUB_ENTRY(jint_disjoint_arraycopy);
 
     array_overlap_test(nooverlap_target, 2);
-
-    generate_conjoint_int_copy_core(aligned);
+    {
+      // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit
+      UnsafeCopyMemoryMark ucmm(this, !aligned, false);
+      generate_conjoint_int_copy_core(aligned);
+    }
 
     __ li(R3_RET, 0); // return 0
     __ blr();
@@ -1859,11 +1893,15 @@
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ function_entry();
     assert_positive_int(R5_ARG3);
-    generate_disjoint_long_copy_core(aligned);
+    {
+      // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit
+      UnsafeCopyMemoryMark ucmm(this, !aligned, false);
+      generate_disjoint_long_copy_core(aligned);
+    }
     __ li(R3_RET, 0); // return 0
     __ blr();
 
-    return start;
+  return start;
   }
 
   // Generate core code for conjoint long copy (and oop copy on
@@ -1986,8 +2024,11 @@
       STUB_ENTRY(jlong_disjoint_arraycopy);
 
     array_overlap_test(nooverlap_target, 3);
-    generate_conjoint_long_copy_core(aligned);
-
+    {
+      // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit
+      UnsafeCopyMemoryMark ucmm(this, !aligned, false);
+      generate_conjoint_long_copy_core(aligned);
+    }
     __ li(R3_RET, 0); // return 0
     __ blr();
 
@@ -3008,6 +3049,9 @@
     // Note: the disjoint stubs must be generated first, some of
     // the conjoint stubs use them.
 
+    address ucm_common_error_exit       =  generate_unsafecopy_common_error_exit();
+    UnsafeCopyMemory::set_common_exit_stub_pc(ucm_common_error_exit);
+
     // non-aligned disjoint versions
     StubRoutines::_jbyte_disjoint_arraycopy       = generate_disjoint_byte_copy(false, "jbyte_disjoint_arraycopy");
     StubRoutines::_jshort_disjoint_arraycopy      = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy");
@@ -3579,6 +3623,10 @@
   }
 };
 
+#define UCM_TABLE_MAX_ENTRIES 8
 void StubGenerator_generate(CodeBuffer* code, bool all) {
+  if (UnsafeCopyMemory::_table == NULL) {
+    UnsafeCopyMemory::create_table(UCM_TABLE_MAX_ENTRIES);
+  }
   StubGenerator g(code, all);
 }
--- a/src/hotspot/cpu/sparc/stubGenerator_sparc.cpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/cpu/sparc/stubGenerator_sparc.cpp	Mon Jul 01 14:57:02 2019 -0700
@@ -1076,6 +1076,17 @@
       __ delayed()->add(end_from, left_shift, end_from); // restore address
   }
 
+  address generate_unsafecopy_common_error_exit() {
+    address start_pc = __ pc();
+    if (UseBlockCopy) {
+      __ wrasi(G0, Assembler::ASI_PRIMARY_NOFAULT);
+      __ membar(Assembler::StoreLoad);
+    }
+    __ retl();
+    __ delayed()->mov(G0, O0); // return 0
+    return start_pc;
+  }
+
   //
   //  Generate stub for disjoint byte copy.  If "aligned" is true, the
   //  "from" and "to" addresses are assumed to be heapword aligned.
@@ -1107,61 +1118,66 @@
       BLOCK_COMMENT("Entry:");
     }
 
-    // for short arrays, just do single element copy
-    __ cmp(count, 23); // 16 + 7
-    __ brx(Assembler::less, false, Assembler::pn, L_copy_byte);
-    __ delayed()->mov(G0, offset);
-
-    if (aligned) {
-      // 'aligned' == true when it is known statically during compilation
-      // of this arraycopy call site that both 'from' and 'to' addresses
-      // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()).
-      //
-      // Aligned arrays have 4 bytes alignment in 32-bits VM
-      // and 8 bytes - in 64-bits VM. So we do it only for 32-bits VM
-      //
-    } else {
-      // copy bytes to align 'to' on 8 byte boundary
-      __ andcc(to, 7, G1); // misaligned bytes
-      __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
-      __ delayed()->neg(G1);
-      __ inc(G1, 8);       // bytes need to copy to next 8-bytes alignment
-      __ sub(count, G1, count);
-    __ BIND(L_align);
-      __ ldub(from, 0, O3);
-      __ deccc(G1);
-      __ inc(from);
-      __ stb(O3, to, 0);
-      __ br(Assembler::notZero, false, Assembler::pt, L_align);
-      __ delayed()->inc(to);
-    __ BIND(L_skip_alignment);
-    }
-    if (!aligned) {
-      // Copy with shift 16 bytes per iteration if arrays do not have
-      // the same alignment mod 8, otherwise fall through to the next
-      // code for aligned copy.
-      // The compare above (count >= 23) guarantes 'count' >= 16 bytes.
-      // Also jump over aligned copy after the copy with shift completed.
-
-      copy_16_bytes_forward_with_shift(from, to, count, 0, L_copy_byte);
-    }
-
-    // Both array are 8 bytes aligned, copy 16 bytes at a time
+    {
+      // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit
+      UnsafeCopyMemoryMark ucmm(this, !aligned, false);
+
+      // for short arrays, just do single element copy
+      __ cmp(count, 23); // 16 + 7
+      __ brx(Assembler::less, false, Assembler::pn, L_copy_byte);
+      __ delayed()->mov(G0, offset);
+
+      if (aligned) {
+        // 'aligned' == true when it is known statically during compilation
+        // of this arraycopy call site that both 'from' and 'to' addresses
+        // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()).
+        //
+        // Aligned arrays have 4 bytes alignment in 32-bits VM
+        // and 8 bytes - in 64-bits VM. So we do it only for 32-bits VM
+        //
+      } else {
+        // copy bytes to align 'to' on 8 byte boundary
+        __ andcc(to, 7, G1); // misaligned bytes
+        __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
+        __ delayed()->neg(G1);
+        __ inc(G1, 8);       // bytes need to copy to next 8-bytes alignment
+        __ sub(count, G1, count);
+      __ BIND(L_align);
+        __ ldub(from, 0, O3);
+        __ deccc(G1);
+        __ inc(from);
+        __ stb(O3, to, 0);
+        __ br(Assembler::notZero, false, Assembler::pt, L_align);
+        __ delayed()->inc(to);
+      __ BIND(L_skip_alignment);
+      }
+      if (!aligned) {
+        // Copy with shift 16 bytes per iteration if arrays do not have
+        // the same alignment mod 8, otherwise fall through to the next
+        // code for aligned copy.
+        // The compare above (count >= 23) guarantes 'count' >= 16 bytes.
+        // Also jump over aligned copy after the copy with shift completed.
+
+        copy_16_bytes_forward_with_shift(from, to, count, 0, L_copy_byte);
+      }
+
+      // Both array are 8 bytes aligned, copy 16 bytes at a time
       __ and3(count, 7, G4); // Save count
       __ srl(count, 3, count);
-     generate_disjoint_long_copy_core(aligned);
+      generate_disjoint_long_copy_core(aligned);
       __ mov(G4, count);     // Restore count
 
-    // copy tailing bytes
-    __ BIND(L_copy_byte);
-      __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
-      __ align(OptoLoopAlignment);
-    __ BIND(L_copy_byte_loop);
-      __ ldub(from, offset, O3);
-      __ deccc(count);
-      __ stb(O3, to, offset);
-      __ brx(Assembler::notZero, false, Assembler::pt, L_copy_byte_loop);
-      __ delayed()->inc(offset);
+      // copy tailing bytes
+      __ BIND(L_copy_byte);
+        __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
+        __ align(OptoLoopAlignment);
+      __ BIND(L_copy_byte_loop);
+        __ ldub(from, offset, O3);
+        __ deccc(count);
+        __ stb(O3, to, offset);
+        __ brx(Assembler::notZero, false, Assembler::pt, L_copy_byte_loop);
+        __ delayed()->inc(offset);
+    }
 
     __ BIND(L_exit);
       // O3, O4 are used as temp registers
@@ -1207,70 +1223,75 @@
 
     array_overlap_test(nooverlap_target, 0);
 
-    __ add(to, count, end_to);       // offset after last copied element
-
-    // for short arrays, just do single element copy
-    __ cmp(count, 23); // 16 + 7
-    __ brx(Assembler::less, false, Assembler::pn, L_copy_byte);
-    __ delayed()->add(from, count, end_from);
-
     {
-      // Align end of arrays since they could be not aligned even
-      // when arrays itself are aligned.
-
-      // copy bytes to align 'end_to' on 8 byte boundary
-      __ andcc(end_to, 7, G1); // misaligned bytes
-      __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
-      __ delayed()->nop();
-      __ sub(count, G1, count);
-    __ BIND(L_align);
-      __ dec(end_from);
-      __ dec(end_to);
-      __ ldub(end_from, 0, O3);
-      __ deccc(G1);
-      __ brx(Assembler::notZero, false, Assembler::pt, L_align);
-      __ delayed()->stb(O3, end_to, 0);
-    __ BIND(L_skip_alignment);
+      // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit
+      UnsafeCopyMemoryMark ucmm(this, !aligned, false);
+
+      __ add(to, count, end_to);       // offset after last copied element
+
+      // for short arrays, just do single element copy
+      __ cmp(count, 23); // 16 + 7
+      __ brx(Assembler::less, false, Assembler::pn, L_copy_byte);
+      __ delayed()->add(from, count, end_from);
+
+      {
+        // Align end of arrays since they could be not aligned even
+        // when arrays itself are aligned.
+
+        // copy bytes to align 'end_to' on 8 byte boundary
+        __ andcc(end_to, 7, G1); // misaligned bytes
+        __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
+        __ delayed()->nop();
+        __ sub(count, G1, count);
+      __ BIND(L_align);
+        __ dec(end_from);
+        __ dec(end_to);
+        __ ldub(end_from, 0, O3);
+        __ deccc(G1);
+        __ brx(Assembler::notZero, false, Assembler::pt, L_align);
+        __ delayed()->stb(O3, end_to, 0);
+      __ BIND(L_skip_alignment);
+      }
+      if (aligned) {
+        // Both arrays are aligned to 8-bytes in 64-bits VM.
+        // The 'count' is decremented in copy_16_bytes_backward_with_shift()
+        // in unaligned case.
+        __ dec(count, 16);
+      } else {
+        // Copy with shift 16 bytes per iteration if arrays do not have
+        // the same alignment mod 8, otherwise jump to the next
+        // code for aligned copy (and substracting 16 from 'count' before jump).
+        // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
+        // Also jump over aligned copy after the copy with shift completed.
+
+       copy_16_bytes_backward_with_shift(end_from, end_to, count, 16,
+                                          L_aligned_copy, L_copy_byte);
+      }
+      // copy 4 elements (16 bytes) at a time
+        __ align(OptoLoopAlignment);
+      __ BIND(L_aligned_copy);
+        __ dec(end_from, 16);
+        __ ldx(end_from, 8, O3);
+        __ ldx(end_from, 0, O4);
+        __ dec(end_to, 16);
+        __ deccc(count, 16);
+        __ stx(O3, end_to, 8);
+        __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
+        __ delayed()->stx(O4, end_to, 0);
+        __ inc(count, 16);
+
+      // copy 1 element (2 bytes) at a time
+      __ BIND(L_copy_byte);
+        __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
+        __ align(OptoLoopAlignment);
+      __ BIND(L_copy_byte_loop);
+        __ dec(end_from);
+        __ dec(end_to);
+        __ ldub(end_from, 0, O4);
+        __ deccc(count);
+        __ brx(Assembler::greater, false, Assembler::pt, L_copy_byte_loop);
+        __ delayed()->stb(O4, end_to, 0);
     }
-    if (aligned) {
-      // Both arrays are aligned to 8-bytes in 64-bits VM.
-      // The 'count' is decremented in copy_16_bytes_backward_with_shift()
-      // in unaligned case.
-      __ dec(count, 16);
-    } else {
-      // Copy with shift 16 bytes per iteration if arrays do not have
-      // the same alignment mod 8, otherwise jump to the next
-      // code for aligned copy (and substracting 16 from 'count' before jump).
-      // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
-      // Also jump over aligned copy after the copy with shift completed.
-
-      copy_16_bytes_backward_with_shift(end_from, end_to, count, 16,
-                                        L_aligned_copy, L_copy_byte);
-    }
-    // copy 4 elements (16 bytes) at a time
-      __ align(OptoLoopAlignment);
-    __ BIND(L_aligned_copy);
-      __ dec(end_from, 16);
-      __ ldx(end_from, 8, O3);
-      __ ldx(end_from, 0, O4);
-      __ dec(end_to, 16);
-      __ deccc(count, 16);
-      __ stx(O3, end_to, 8);
-      __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
-      __ delayed()->stx(O4, end_to, 0);
-      __ inc(count, 16);
-
-    // copy 1 element (2 bytes) at a time
-    __ BIND(L_copy_byte);
-      __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
-      __ align(OptoLoopAlignment);
-    __ BIND(L_copy_byte_loop);
-      __ dec(end_from);
-      __ dec(end_to);
-      __ ldub(end_from, 0, O4);
-      __ deccc(count);
-      __ brx(Assembler::greater, false, Assembler::pt, L_copy_byte_loop);
-      __ delayed()->stb(O4, end_to, 0);
 
     __ BIND(L_exit);
     // O3, O4 are used as temp registers
@@ -1311,68 +1332,72 @@
       BLOCK_COMMENT("Entry:");
     }
 
-    // for short arrays, just do single element copy
-    __ cmp(count, 11); // 8 + 3  (22 bytes)
-    __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes);
-    __ delayed()->mov(G0, offset);
-
-    if (aligned) {
-      // 'aligned' == true when it is known statically during compilation
-      // of this arraycopy call site that both 'from' and 'to' addresses
-      // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()).
-      //
-      // Aligned arrays have 4 bytes alignment in 32-bits VM
-      // and 8 bytes - in 64-bits VM.
-      //
-    } else {
-      // copy 1 element if necessary to align 'to' on an 4 bytes
-      __ andcc(to, 3, G0);
-      __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
-      __ delayed()->lduh(from, 0, O3);
-      __ inc(from, 2);
-      __ inc(to, 2);
-      __ dec(count);
-      __ sth(O3, to, -2);
-    __ BIND(L_skip_alignment);
-
-      // copy 2 elements to align 'to' on an 8 byte boundary
-      __ andcc(to, 7, G0);
-      __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2);
-      __ delayed()->lduh(from, 0, O3);
-      __ dec(count, 2);
-      __ lduh(from, 2, O4);
-      __ inc(from, 4);
-      __ inc(to, 4);
-      __ sth(O3, to, -4);
-      __ sth(O4, to, -2);
-    __ BIND(L_skip_alignment2);
+    {
+      // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit
+      UnsafeCopyMemoryMark ucmm(this, !aligned, false);
+      // for short arrays, just do single element copy
+      __ cmp(count, 11); // 8 + 3  (22 bytes)
+      __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes);
+      __ delayed()->mov(G0, offset);
+
+      if (aligned) {
+        // 'aligned' == true when it is known statically during compilation
+        // of this arraycopy call site that both 'from' and 'to' addresses
+        // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()).
+        //
+        // Aligned arrays have 4 bytes alignment in 32-bits VM
+        // and 8 bytes - in 64-bits VM.
+        //
+      } else {
+        // copy 1 element if necessary to align 'to' on an 4 bytes
+        __ andcc(to, 3, G0);
+        __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
+        __ delayed()->lduh(from, 0, O3);
+        __ inc(from, 2);
+        __ inc(to, 2);
+        __ dec(count);
+        __ sth(O3, to, -2);
+      __ BIND(L_skip_alignment);
+
+        // copy 2 elements to align 'to' on an 8 byte boundary
+        __ andcc(to, 7, G0);
+        __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2);
+        __ delayed()->lduh(from, 0, O3);
+        __ dec(count, 2);
+        __ lduh(from, 2, O4);
+        __ inc(from, 4);
+        __ inc(to, 4);
+        __ sth(O3, to, -4);
+        __ sth(O4, to, -2);
+      __ BIND(L_skip_alignment2);
+      }
+      if (!aligned) {
+        // Copy with shift 16 bytes per iteration if arrays do not have
+        // the same alignment mod 8, otherwise fall through to the next
+        // code for aligned copy.
+        // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
+        // Also jump over aligned copy after the copy with shift completed.
+
+        copy_16_bytes_forward_with_shift(from, to, count, 1, L_copy_2_bytes);
+      }
+
+      // Both array are 8 bytes aligned, copy 16 bytes at a time
+        __ and3(count, 3, G4); // Save
+        __ srl(count, 2, count);
+       generate_disjoint_long_copy_core(aligned);
+        __ mov(G4, count); // restore
+
+      // copy 1 element at a time
+      __ BIND(L_copy_2_bytes);
+        __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
+        __ align(OptoLoopAlignment);
+      __ BIND(L_copy_2_bytes_loop);
+        __ lduh(from, offset, O3);
+        __ deccc(count);
+        __ sth(O3, to, offset);
+        __ brx(Assembler::notZero, false, Assembler::pt, L_copy_2_bytes_loop);
+        __ delayed()->inc(offset, 2);
     }
-    if (!aligned) {
-      // Copy with shift 16 bytes per iteration if arrays do not have
-      // the same alignment mod 8, otherwise fall through to the next
-      // code for aligned copy.
-      // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
-      // Also jump over aligned copy after the copy with shift completed.
-
-      copy_16_bytes_forward_with_shift(from, to, count, 1, L_copy_2_bytes);
-    }
-
-    // Both array are 8 bytes aligned, copy 16 bytes at a time
-      __ and3(count, 3, G4); // Save
-      __ srl(count, 2, count);
-     generate_disjoint_long_copy_core(aligned);
-      __ mov(G4, count); // restore
-
-    // copy 1 element at a time
-    __ BIND(L_copy_2_bytes);
-      __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
-      __ align(OptoLoopAlignment);
-    __ BIND(L_copy_2_bytes_loop);
-      __ lduh(from, offset, O3);
-      __ deccc(count);
-      __ sth(O3, to, offset);
-      __ brx(Assembler::notZero, false, Assembler::pt, L_copy_2_bytes_loop);
-      __ delayed()->inc(offset, 2);
 
     __ BIND(L_exit);
       // O3, O4 are used as temp registers
@@ -1639,79 +1664,83 @@
 
     array_overlap_test(nooverlap_target, 1);
 
-    __ sllx(count, LogBytesPerShort, byte_count);
-    __ add(to, byte_count, end_to);  // offset after last copied element
-
-    // for short arrays, just do single element copy
-    __ cmp(count, 11); // 8 + 3  (22 bytes)
-    __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes);
-    __ delayed()->add(from, byte_count, end_from);
-
     {
-      // Align end of arrays since they could be not aligned even
-      // when arrays itself are aligned.
-
-      // copy 1 element if necessary to align 'end_to' on an 4 bytes
-      __ andcc(end_to, 3, G0);
-      __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
-      __ delayed()->lduh(end_from, -2, O3);
-      __ dec(end_from, 2);
-      __ dec(end_to, 2);
-      __ dec(count);
-      __ sth(O3, end_to, 0);
-    __ BIND(L_skip_alignment);
-
-      // copy 2 elements to align 'end_to' on an 8 byte boundary
-      __ andcc(end_to, 7, G0);
-      __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2);
-      __ delayed()->lduh(end_from, -2, O3);
-      __ dec(count, 2);
-      __ lduh(end_from, -4, O4);
-      __ dec(end_from, 4);
-      __ dec(end_to, 4);
-      __ sth(O3, end_to, 2);
-      __ sth(O4, end_to, 0);
-    __ BIND(L_skip_alignment2);
+      // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit
+      UnsafeCopyMemoryMark ucmm(this, !aligned, false);
+
+      __ sllx(count, LogBytesPerShort, byte_count);
+      __ add(to, byte_count, end_to);  // offset after last copied element
+
+      // for short arrays, just do single element copy
+      __ cmp(count, 11); // 8 + 3  (22 bytes)
+      __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes);
+      __ delayed()->add(from, byte_count, end_from);
+
+      {
+        // Align end of arrays since they could be not aligned even
+        // when arrays itself are aligned.
+
+        // copy 1 element if necessary to align 'end_to' on an 4 bytes
+        __ andcc(end_to, 3, G0);
+        __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
+        __ delayed()->lduh(end_from, -2, O3);
+        __ dec(end_from, 2);
+        __ dec(end_to, 2);
+        __ dec(count);
+        __ sth(O3, end_to, 0);
+      __ BIND(L_skip_alignment);
+
+        // copy 2 elements to align 'end_to' on an 8 byte boundary
+        __ andcc(end_to, 7, G0);
+        __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2);
+        __ delayed()->lduh(end_from, -2, O3);
+        __ dec(count, 2);
+        __ lduh(end_from, -4, O4);
+        __ dec(end_from, 4);
+        __ dec(end_to, 4);
+        __ sth(O3, end_to, 2);
+        __ sth(O4, end_to, 0);
+      __ BIND(L_skip_alignment2);
+      }
+      if (aligned) {
+        // Both arrays are aligned to 8-bytes in 64-bits VM.
+        // The 'count' is decremented in copy_16_bytes_backward_with_shift()
+        // in unaligned case.
+        __ dec(count, 8);
+      } else {
+        // Copy with shift 16 bytes per iteration if arrays do not have
+        // the same alignment mod 8, otherwise jump to the next
+        // code for aligned copy (and substracting 8 from 'count' before jump).
+        // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
+        // Also jump over aligned copy after the copy with shift completed.
+
+        copy_16_bytes_backward_with_shift(end_from, end_to, count, 8,
+                                        L_aligned_copy, L_copy_2_bytes);
+      }
+      // copy 4 elements (16 bytes) at a time
+        __ align(OptoLoopAlignment);
+      __ BIND(L_aligned_copy);
+        __ dec(end_from, 16);
+        __ ldx(end_from, 8, O3);
+        __ ldx(end_from, 0, O4);
+        __ dec(end_to, 16);
+        __ deccc(count, 8);
+        __ stx(O3, end_to, 8);
+        __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
+        __ delayed()->stx(O4, end_to, 0);
+        __ inc(count, 8);
+
+      // copy 1 element (2 bytes) at a time
+      __ BIND(L_copy_2_bytes);
+        __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
+      __ BIND(L_copy_2_bytes_loop);
+        __ dec(end_from, 2);
+        __ dec(end_to, 2);
+        __ lduh(end_from, 0, O4);
+        __ deccc(count);
+        __ brx(Assembler::greater, false, Assembler::pt, L_copy_2_bytes_loop);
+        __ delayed()->sth(O4, end_to, 0);
     }
-    if (aligned) {
-      // Both arrays are aligned to 8-bytes in 64-bits VM.
-      // The 'count' is decremented in copy_16_bytes_backward_with_shift()
-      // in unaligned case.
-      __ dec(count, 8);
-    } else {
-      // Copy with shift 16 bytes per iteration if arrays do not have
-      // the same alignment mod 8, otherwise jump to the next
-      // code for aligned copy (and substracting 8 from 'count' before jump).
-      // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
-      // Also jump over aligned copy after the copy with shift completed.
-
-      copy_16_bytes_backward_with_shift(end_from, end_to, count, 8,
-                                        L_aligned_copy, L_copy_2_bytes);
-    }
-    // copy 4 elements (16 bytes) at a time
-      __ align(OptoLoopAlignment);
-    __ BIND(L_aligned_copy);
-      __ dec(end_from, 16);
-      __ ldx(end_from, 8, O3);
-      __ ldx(end_from, 0, O4);
-      __ dec(end_to, 16);
-      __ deccc(count, 8);
-      __ stx(O3, end_to, 8);
-      __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
-      __ delayed()->stx(O4, end_to, 0);
-      __ inc(count, 8);
-
-    // copy 1 element (2 bytes) at a time
-    __ BIND(L_copy_2_bytes);
-      __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
-    __ BIND(L_copy_2_bytes_loop);
-      __ dec(end_from, 2);
-      __ dec(end_to, 2);
-      __ lduh(end_from, 0, O4);
-      __ deccc(count);
-      __ brx(Assembler::greater, false, Assembler::pt, L_copy_2_bytes_loop);
-      __ delayed()->sth(O4, end_to, 0);
-
     __ BIND(L_exit);
     // O3, O4 are used as temp registers
     inc_counter_np(SharedRuntime::_jshort_array_copy_ctr, O3, O4);
@@ -1870,9 +1899,11 @@
       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
       BLOCK_COMMENT("Entry:");
     }
-
-    generate_disjoint_int_copy_core(aligned);
-
+    {
+      // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit
+      UnsafeCopyMemoryMark ucmm(this, !aligned, false);
+      generate_disjoint_int_copy_core(aligned);
+    }
     // O3, O4 are used as temp registers
     inc_counter_np(SharedRuntime::_jint_array_copy_ctr, O3, O4);
     __ retl();
@@ -2005,9 +2036,11 @@
     }
 
     array_overlap_test(nooverlap_target, 2);
-
-    generate_conjoint_int_copy_core(aligned);
-
+    {
+      // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit
+      UnsafeCopyMemoryMark ucmm(this, !aligned, false);
+      generate_conjoint_int_copy_core(aligned);
+    }
     // O3, O4 are used as temp registers
     inc_counter_np(SharedRuntime::_jint_array_copy_ctr, O3, O4);
     __ retl();
@@ -2156,8 +2189,11 @@
       BLOCK_COMMENT("Entry:");
     }
 
-    generate_disjoint_long_copy_core(aligned);
-
+    {
+      // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit
+      UnsafeCopyMemoryMark ucmm(this, true, false);
+      generate_disjoint_long_copy_core(aligned);
+    }
     // O3, O4 are used as temp registers
     inc_counter_np(SharedRuntime::_jlong_array_copy_ctr, O3, O4);
     __ retl();
@@ -2232,9 +2268,11 @@
     }
 
     array_overlap_test(nooverlap_target, 3);
-
-    generate_conjoint_long_copy_core(aligned);
-
+    {
+      // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit
+      UnsafeCopyMemoryMark ucmm(this, true, false);
+      generate_conjoint_long_copy_core(aligned);
+    }
     // O3, O4 are used as temp registers
     inc_counter_np(SharedRuntime::_jlong_array_copy_ctr, O3, O4);
     __ retl();
@@ -2929,6 +2967,9 @@
     address entry_jlong_arraycopy;
     address entry_checkcast_arraycopy;
 
+    address ucm_common_error_exit       =  generate_unsafecopy_common_error_exit();
+    UnsafeCopyMemory::set_common_exit_stub_pc(ucm_common_error_exit);
+
     //*** jbyte
     // Always need aligned and unaligned versions
     StubRoutines::_jbyte_disjoint_arraycopy         = generate_disjoint_byte_copy(false, &entry,
@@ -5821,6 +5862,10 @@
 
 }; // end class declaration
 
+#define UCM_TABLE_MAX_ENTRIES 8
 void StubGenerator_generate(CodeBuffer* code, bool all) {
+  if (UnsafeCopyMemory::_table == NULL) {
+    UnsafeCopyMemory::create_table(UCM_TABLE_MAX_ENTRIES);
+  }
   StubGenerator g(code, all);
 }
--- a/src/hotspot/cpu/x86/assembler_x86.cpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/cpu/x86/assembler_x86.cpp	Mon Jul 01 14:57:02 2019 -0700
@@ -789,6 +789,8 @@
     case 0x59: // mulpd
     case 0x6E: // movd
     case 0x7E: // movd
+    case 0x6F: // movdq
+    case 0x7F: // movdq
     case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush
     case 0xFE: // paddd
       debug_only(has_disp32 = true);
@@ -4274,6 +4276,7 @@
   emit_operand(dst, src);
   emit_int8(mode & 0xFF);
 }
+
 void Assembler::evshufi64x2(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
   assert(VM_Version::supports_evex(), "requires EVEX support");
   assert(vector_len == Assembler::AVX_256bit || vector_len == Assembler::AVX_512bit, "");
--- a/src/hotspot/cpu/x86/stubGenerator_x86_32.cpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_32.cpp	Mon Jul 01 14:57:02 2019 -0700
@@ -889,91 +889,98 @@
 
     BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
     bs->arraycopy_prologue(_masm, decorators, t, from, to, count);
-
-    __ subptr(to, from); // to --> to_from
-    __ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element
-    __ jcc(Assembler::below, L_copy_4_bytes); // use unsigned cmp
-    if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) {
-      // align source address at 4 bytes address boundary
-      if (t == T_BYTE) {
-        // One byte misalignment happens only for byte arrays
-        __ testl(from, 1);
-        __ jccb(Assembler::zero, L_skip_align1);
-        __ movb(rax, Address(from, 0));
-        __ movb(Address(from, to_from, Address::times_1, 0), rax);
-        __ increment(from);
-        __ decrement(count);
-      __ BIND(L_skip_align1);
+    {
+      bool add_entry = (t != T_OBJECT && (!aligned || t == T_INT));
+      // UnsafeCopyMemory page error: continue after ucm
+      UnsafeCopyMemoryMark ucmm(this, add_entry, true);
+      __ subptr(to, from); // to --> to_from
+      __ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element
+      __ jcc(Assembler::below, L_copy_4_bytes); // use unsigned cmp
+      if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) {
+        // align source address at 4 bytes address boundary
+        if (t == T_BYTE) {
+          // One byte misalignment happens only for byte arrays
+          __ testl(from, 1);
+          __ jccb(Assembler::zero, L_skip_align1);
+          __ movb(rax, Address(from, 0));
+          __ movb(Address(from, to_from, Address::times_1, 0), rax);
+          __ increment(from);
+          __ decrement(count);
+        __ BIND(L_skip_align1);
+        }
+        // Two bytes misalignment happens only for byte and short (char) arrays
+        __ testl(from, 2);
+        __ jccb(Assembler::zero, L_skip_align2);
+        __ movw(rax, Address(from, 0));
+        __ movw(Address(from, to_from, Address::times_1, 0), rax);
+        __ addptr(from, 2);
+        __ subl(count, 1<<(shift-1));
+      __ BIND(L_skip_align2);
       }
-      // Two bytes misalignment happens only for byte and short (char) arrays
-      __ testl(from, 2);
-      __ jccb(Assembler::zero, L_skip_align2);
-      __ movw(rax, Address(from, 0));
-      __ movw(Address(from, to_from, Address::times_1, 0), rax);
-      __ addptr(from, 2);
-      __ subl(count, 1<<(shift-1));
-    __ BIND(L_skip_align2);
-    }
-    if (!VM_Version::supports_mmx()) {
-      __ mov(rax, count);      // save 'count'
-      __ shrl(count, shift); // bytes count
-      __ addptr(to_from, from);// restore 'to'
-      __ rep_mov();
-      __ subptr(to_from, from);// restore 'to_from'
-      __ mov(count, rax);      // restore 'count'
-      __ jmpb(L_copy_2_bytes); // all dwords were copied
-    } else {
-      if (!UseUnalignedLoadStores) {
-        // align to 8 bytes, we know we are 4 byte aligned to start
-        __ testptr(from, 4);
-        __ jccb(Assembler::zero, L_copy_64_bytes);
-        __ movl(rax, Address(from, 0));
-        __ movl(Address(from, to_from, Address::times_1, 0), rax);
+      if (!VM_Version::supports_mmx()) {
+        __ mov(rax, count);      // save 'count'
+        __ shrl(count, shift); // bytes count
+        __ addptr(to_from, from);// restore 'to'
+        __ rep_mov();
+        __ subptr(to_from, from);// restore 'to_from'
+        __ mov(count, rax);      // restore 'count'
+        __ jmpb(L_copy_2_bytes); // all dwords were copied
+      } else {
+        if (!UseUnalignedLoadStores) {
+          // align to 8 bytes, we know we are 4 byte aligned to start
+          __ testptr(from, 4);
+          __ jccb(Assembler::zero, L_copy_64_bytes);
+          __ movl(rax, Address(from, 0));
+          __ movl(Address(from, to_from, Address::times_1, 0), rax);
+          __ addptr(from, 4);
+          __ subl(count, 1<<shift);
+         }
+      __ BIND(L_copy_64_bytes);
+        __ mov(rax, count);
+        __ shrl(rax, shift+1);  // 8 bytes chunk count
+        //
+        // Copy 8-byte chunks through MMX registers, 8 per iteration of the loop
+        //
+        if (UseXMMForArrayCopy) {
+          xmm_copy_forward(from, to_from, rax);
+        } else {
+          mmx_copy_forward(from, to_from, rax);
+        }
+      }
+      // copy tailing dword
+    __ BIND(L_copy_4_bytes);
+      __ testl(count, 1<<shift);
+      __ jccb(Assembler::zero, L_copy_2_bytes);
+      __ movl(rax, Address(from, 0));
+      __ movl(Address(from, to_from, Address::times_1, 0), rax);
+      if (t == T_BYTE || t == T_SHORT) {
         __ addptr(from, 4);
-        __ subl(count, 1<<shift);
-      }
-    __ BIND(L_copy_64_bytes);
-      __ mov(rax, count);
-      __ shrl(rax, shift+1);  // 8 bytes chunk count
-      //
-      // Copy 8-byte chunks through MMX registers, 8 per iteration of the loop
-      //
-      if (UseXMMForArrayCopy) {
-        xmm_copy_forward(from, to_from, rax);
+      __ BIND(L_copy_2_bytes);
+        // copy tailing word
+        __ testl(count, 1<<(shift-1));
+        __ jccb(Assembler::zero, L_copy_byte);
+        __ movw(rax, Address(from, 0));
+        __ movw(Address(from, to_from, Address::times_1, 0), rax);
+        if (t == T_BYTE) {
+          __ addptr(from, 2);
+        __ BIND(L_copy_byte);
+          // copy tailing byte
+          __ testl(count, 1);
+          __ jccb(Assembler::zero, L_exit);
+          __ movb(rax, Address(from, 0));
+          __ movb(Address(from, to_from, Address::times_1, 0), rax);
+        __ BIND(L_exit);
+        } else {
+        __ BIND(L_copy_byte);
+        }
       } else {
-        mmx_copy_forward(from, to_from, rax);
+      __ BIND(L_copy_2_bytes);
       }
     }
-    // copy tailing dword
-  __ BIND(L_copy_4_bytes);
-    __ testl(count, 1<<shift);
-    __ jccb(Assembler::zero, L_copy_2_bytes);
-    __ movl(rax, Address(from, 0));
-    __ movl(Address(from, to_from, Address::times_1, 0), rax);
-    if (t == T_BYTE || t == T_SHORT) {
-      __ addptr(from, 4);
-    __ BIND(L_copy_2_bytes);
-      // copy tailing word
-      __ testl(count, 1<<(shift-1));
-      __ jccb(Assembler::zero, L_copy_byte);
-      __ movw(rax, Address(from, 0));
-      __ movw(Address(from, to_from, Address::times_1, 0), rax);
-      if (t == T_BYTE) {
-        __ addptr(from, 2);
-      __ BIND(L_copy_byte);
-        // copy tailing byte
-        __ testl(count, 1);
-        __ jccb(Assembler::zero, L_exit);
-        __ movb(rax, Address(from, 0));
-        __ movb(Address(from, to_from, Address::times_1, 0), rax);
-      __ BIND(L_exit);
-      } else {
-      __ BIND(L_copy_byte);
-      }
-    } else {
-    __ BIND(L_copy_2_bytes);
+
+    if (VM_Version::supports_mmx() && !UseXMMForArrayCopy) {
+      __ emms();
     }
-
     __ movl(count, Address(rsp, 12+12)); // reread 'count'
     bs->arraycopy_epilogue(_masm, decorators, t, from, to, count);
 
@@ -1079,104 +1086,112 @@
     BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
     bs->arraycopy_prologue(_masm, decorators, t, from, to, count);
 
-    // copy from high to low
-    __ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element
-    __ jcc(Assembler::below, L_copy_4_bytes); // use unsigned cmp
-    if (t == T_BYTE || t == T_SHORT) {
-      // Align the end of destination array at 4 bytes address boundary
-      __ lea(end, Address(dst, count, sf, 0));
-      if (t == T_BYTE) {
-        // One byte misalignment happens only for byte arrays
-        __ testl(end, 1);
-        __ jccb(Assembler::zero, L_skip_align1);
-        __ decrement(count);
-        __ movb(rdx, Address(from, count, sf, 0));
-        __ movb(Address(to, count, sf, 0), rdx);
-      __ BIND(L_skip_align1);
-      }
-      // Two bytes misalignment happens only for byte and short (char) arrays
-      __ testl(end, 2);
-      __ jccb(Assembler::zero, L_skip_align2);
-      __ subptr(count, 1<<(shift-1));
-      __ movw(rdx, Address(from, count, sf, 0));
-      __ movw(Address(to, count, sf, 0), rdx);
-    __ BIND(L_skip_align2);
+    {
+      bool add_entry = (t != T_OBJECT && (!aligned || t == T_INT));
+      // UnsafeCopyMemory page error: continue after ucm
+      UnsafeCopyMemoryMark ucmm(this, add_entry, true);
+      // copy from high to low
       __ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element
-      __ jcc(Assembler::below, L_copy_4_bytes);
-    }
-
-    if (!VM_Version::supports_mmx()) {
-      __ std();
-      __ mov(rax, count); // Save 'count'
-      __ mov(rdx, to);    // Save 'to'
-      __ lea(rsi, Address(from, count, sf, -4));
-      __ lea(rdi, Address(to  , count, sf, -4));
-      __ shrptr(count, shift); // bytes count
-      __ rep_mov();
-      __ cld();
-      __ mov(count, rax); // restore 'count'
-      __ andl(count, (1<<shift)-1);      // mask the number of rest elements
-      __ movptr(from, Address(rsp, 12+4)); // reread 'from'
-      __ mov(to, rdx);   // restore 'to'
-      __ jmpb(L_copy_2_bytes); // all dword were copied
-   } else {
-      // Align to 8 bytes the end of array. It is aligned to 4 bytes already.
-      __ testptr(end, 4);
-      __ jccb(Assembler::zero, L_copy_8_bytes);
-      __ subl(count, 1<<shift);
-      __ movl(rdx, Address(from, count, sf, 0));
-      __ movl(Address(to, count, sf, 0), rdx);
-      __ jmpb(L_copy_8_bytes);
-
-      __ align(OptoLoopAlignment);
-      // Move 8 bytes
-    __ BIND(L_copy_8_bytes_loop);
-      if (UseXMMForArrayCopy) {
-        __ movq(xmm0, Address(from, count, sf, 0));
-        __ movq(Address(to, count, sf, 0), xmm0);
+      __ jcc(Assembler::below, L_copy_4_bytes); // use unsigned cmp
+      if (t == T_BYTE || t == T_SHORT) {
+        // Align the end of destination array at 4 bytes address boundary
+        __ lea(end, Address(dst, count, sf, 0));
+        if (t == T_BYTE) {
+          // One byte misalignment happens only for byte arrays
+          __ testl(end, 1);
+          __ jccb(Assembler::zero, L_skip_align1);
+          __ decrement(count);
+          __ movb(rdx, Address(from, count, sf, 0));
+          __ movb(Address(to, count, sf, 0), rdx);
+        __ BIND(L_skip_align1);
+        }
+        // Two bytes misalignment happens only for byte and short (char) arrays
+        __ testl(end, 2);
+        __ jccb(Assembler::zero, L_skip_align2);
+        __ subptr(count, 1<<(shift-1));
+        __ movw(rdx, Address(from, count, sf, 0));
+        __ movw(Address(to, count, sf, 0), rdx);
+      __ BIND(L_skip_align2);
+        __ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element
+        __ jcc(Assembler::below, L_copy_4_bytes);
+      }
+
+      if (!VM_Version::supports_mmx()) {
+        __ std();
+        __ mov(rax, count); // Save 'count'
+        __ mov(rdx, to);    // Save 'to'
+        __ lea(rsi, Address(from, count, sf, -4));
+        __ lea(rdi, Address(to  , count, sf, -4));
+        __ shrptr(count, shift); // bytes count
+        __ rep_mov();
+        __ cld();
+        __ mov(count, rax); // restore 'count'
+        __ andl(count, (1<<shift)-1);      // mask the number of rest elements
+        __ movptr(from, Address(rsp, 12+4)); // reread 'from'
+        __ mov(to, rdx);   // restore 'to'
+        __ jmpb(L_copy_2_bytes); // all dword were copied
       } else {
-        __ movq(mmx0, Address(from, count, sf, 0));
-        __ movq(Address(to, count, sf, 0), mmx0);
+        // Align to 8 bytes the end of array. It is aligned to 4 bytes already.
+        __ testptr(end, 4);
+        __ jccb(Assembler::zero, L_copy_8_bytes);
+        __ subl(count, 1<<shift);
+        __ movl(rdx, Address(from, count, sf, 0));
+        __ movl(Address(to, count, sf, 0), rdx);
+        __ jmpb(L_copy_8_bytes);
+
+        __ align(OptoLoopAlignment);
+        // Move 8 bytes
+      __ BIND(L_copy_8_bytes_loop);
+        if (UseXMMForArrayCopy) {
+          __ movq(xmm0, Address(from, count, sf, 0));
+          __ movq(Address(to, count, sf, 0), xmm0);
+        } else {
+          __ movq(mmx0, Address(from, count, sf, 0));
+          __ movq(Address(to, count, sf, 0), mmx0);
+        }
+      __ BIND(L_copy_8_bytes);
+        __ subl(count, 2<<shift);
+        __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop);
+        __ addl(count, 2<<shift);
+        if (!UseXMMForArrayCopy) {
+          __ emms();
+        }
       }
-    __ BIND(L_copy_8_bytes);
-      __ subl(count, 2<<shift);
-      __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop);
-      __ addl(count, 2<<shift);
-      if (!UseXMMForArrayCopy) {
-        __ emms();
+    __ BIND(L_copy_4_bytes);
+      // copy prefix qword
+      __ testl(count, 1<<shift);
+      __ jccb(Assembler::zero, L_copy_2_bytes);
+      __ movl(rdx, Address(from, count, sf, -4));
+      __ movl(Address(to, count, sf, -4), rdx);
+
+      if (t == T_BYTE || t == T_SHORT) {
+          __ subl(count, (1<<shift));
+        __ BIND(L_copy_2_bytes);
+          // copy prefix dword
+          __ testl(count, 1<<(shift-1));
+          __ jccb(Assembler::zero, L_copy_byte);
+          __ movw(rdx, Address(from, count, sf, -2));
+          __ movw(Address(to, count, sf, -2), rdx);
+          if (t == T_BYTE) {
+            __ subl(count, 1<<(shift-1));
+          __ BIND(L_copy_byte);
+            // copy prefix byte
+            __ testl(count, 1);
+            __ jccb(Assembler::zero, L_exit);
+            __ movb(rdx, Address(from, 0));
+            __ movb(Address(to, 0), rdx);
+          __ BIND(L_exit);
+          } else {
+          __ BIND(L_copy_byte);
+          }
+      } else {
+      __ BIND(L_copy_2_bytes);
       }
     }
-  __ BIND(L_copy_4_bytes);
-    // copy prefix qword
-    __ testl(count, 1<<shift);
-    __ jccb(Assembler::zero, L_copy_2_bytes);
-    __ movl(rdx, Address(from, count, sf, -4));
-    __ movl(Address(to, count, sf, -4), rdx);
-
-    if (t == T_BYTE || t == T_SHORT) {
-        __ subl(count, (1<<shift));
-      __ BIND(L_copy_2_bytes);
-        // copy prefix dword
-        __ testl(count, 1<<(shift-1));
-        __ jccb(Assembler::zero, L_copy_byte);
-        __ movw(rdx, Address(from, count, sf, -2));
-        __ movw(Address(to, count, sf, -2), rdx);
-        if (t == T_BYTE) {
-          __ subl(count, 1<<(shift-1));
-        __ BIND(L_copy_byte);
-          // copy prefix byte
-          __ testl(count, 1);
-          __ jccb(Assembler::zero, L_exit);
-          __ movb(rdx, Address(from, 0));
-          __ movb(Address(to, 0), rdx);
-        __ BIND(L_exit);
-        } else {
-        __ BIND(L_copy_byte);
-        }
-    } else {
-    __ BIND(L_copy_2_bytes);
+
+    if (VM_Version::supports_mmx() && !UseXMMForArrayCopy) {
+      __ emms();
     }
-
     __ movl2ptr(count, Address(rsp, 12+12)); // reread count
     bs->arraycopy_epilogue(_masm, decorators, t, from, to, count);
 
@@ -1212,23 +1227,30 @@
     *entry = __ pc(); // Entry point from conjoint arraycopy stub.
     BLOCK_COMMENT("Entry:");
 
-    __ subptr(to, from); // to --> to_from
-    if (VM_Version::supports_mmx()) {
-      if (UseXMMForArrayCopy) {
-        xmm_copy_forward(from, to_from, count);
+    {
+      // UnsafeCopyMemory page error: continue after ucm
+      UnsafeCopyMemoryMark ucmm(this, true, true);
+      __ subptr(to, from); // to --> to_from
+      if (VM_Version::supports_mmx()) {
+        if (UseXMMForArrayCopy) {
+          xmm_copy_forward(from, to_from, count);
+        } else {
+          mmx_copy_forward(from, to_from, count);
+        }
       } else {
-        mmx_copy_forward(from, to_from, count);
+        __ jmpb(L_copy_8_bytes);
+        __ align(OptoLoopAlignment);
+      __ BIND(L_copy_8_bytes_loop);
+        __ fild_d(Address(from, 0));
+        __ fistp_d(Address(from, to_from, Address::times_1));
+        __ addptr(from, 8);
+      __ BIND(L_copy_8_bytes);
+        __ decrement(count);
+        __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop);
       }
-    } else {
-      __ jmpb(L_copy_8_bytes);
-      __ align(OptoLoopAlignment);
-    __ BIND(L_copy_8_bytes_loop);
-      __ fild_d(Address(from, 0));
-      __ fistp_d(Address(from, to_from, Address::times_1));
-      __ addptr(from, 8);
-    __ BIND(L_copy_8_bytes);
-      __ decrement(count);
-      __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop);
+    }
+    if (VM_Version::supports_mmx() && !UseXMMForArrayCopy) {
+      __ emms();
     }
     inc_copy_counter_np(T_LONG);
     __ leave(); // required for proper stackwalking of RuntimeStub frame
@@ -1267,26 +1289,31 @@
     __ movptr(from, Address(rsp, 8));  // from
     __ jump_cc(Assembler::aboveEqual, nooverlap);
 
-    __ jmpb(L_copy_8_bytes);
-
-    __ align(OptoLoopAlignment);
-  __ BIND(L_copy_8_bytes_loop);
-    if (VM_Version::supports_mmx()) {
-      if (UseXMMForArrayCopy) {
-        __ movq(xmm0, Address(from, count, Address::times_8));
-        __ movq(Address(to, count, Address::times_8), xmm0);
+    {
+      // UnsafeCopyMemory page error: continue after ucm
+      UnsafeCopyMemoryMark ucmm(this, true, true);
+
+      __ jmpb(L_copy_8_bytes);
+
+      __ align(OptoLoopAlignment);
+    __ BIND(L_copy_8_bytes_loop);
+      if (VM_Version::supports_mmx()) {
+        if (UseXMMForArrayCopy) {
+          __ movq(xmm0, Address(from, count, Address::times_8));
+          __ movq(Address(to, count, Address::times_8), xmm0);
+        } else {
+          __ movq(mmx0, Address(from, count, Address::times_8));
+          __ movq(Address(to, count, Address::times_8), mmx0);
+        }
       } else {
-        __ movq(mmx0, Address(from, count, Address::times_8));
-        __ movq(Address(to, count, Address::times_8), mmx0);
+        __ fild_d(Address(from, count, Address::times_8));
+        __ fistp_d(Address(to, count, Address::times_8));
       }
-    } else {
-      __ fild_d(Address(from, count, Address::times_8));
-      __ fistp_d(Address(to, count, Address::times_8));
+    __ BIND(L_copy_8_bytes);
+      __ decrement(count);
+      __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop);
+
     }
-  __ BIND(L_copy_8_bytes);
-    __ decrement(count);
-    __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop);
-
     if (VM_Version::supports_mmx() && !UseXMMForArrayCopy) {
       __ emms();
     }
@@ -3945,7 +3972,10 @@
   }
 }; // end class declaration
 
-
+#define UCM_TABLE_MAX_ENTRIES 8
 void StubGenerator_generate(CodeBuffer* code, bool all) {
+  if (UnsafeCopyMemory::_table == NULL) {
+    UnsafeCopyMemory::create_table(UCM_TABLE_MAX_ENTRIES);
+  }
   StubGenerator g(code, all);
 }
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp	Mon Jul 01 14:57:02 2019 -0700
@@ -1433,7 +1433,6 @@
     __ jcc(Assembler::greater, L_copy_8_bytes); // Copy trailing qwords
   }
 
-
   // Arguments:
   //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
   //             ignored
@@ -1482,51 +1481,55 @@
     setup_arg_regs(); // from => rdi, to => rsi, count => rdx
                       // r9 and r10 may be used to save non-volatile registers
 
-    // 'from', 'to' and 'count' are now valid
-    __ movptr(byte_count, count);
-    __ shrptr(count, 3); // count => qword_count
-
-    // Copy from low to high addresses.  Use 'to' as scratch.
-    __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
-    __ lea(end_to,   Address(to,   qword_count, Address::times_8, -8));
-    __ negptr(qword_count); // make the count negative
-    __ jmp(L_copy_bytes);
-
-    // Copy trailing qwords
-  __ BIND(L_copy_8_bytes);
-    __ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
-    __ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
-    __ increment(qword_count);
-    __ jcc(Assembler::notZero, L_copy_8_bytes);
-
-    // Check for and copy trailing dword
-  __ BIND(L_copy_4_bytes);
-    __ testl(byte_count, 4);
-    __ jccb(Assembler::zero, L_copy_2_bytes);
-    __ movl(rax, Address(end_from, 8));
-    __ movl(Address(end_to, 8), rax);
-
-    __ addptr(end_from, 4);
-    __ addptr(end_to, 4);
-
-    // Check for and copy trailing word
-  __ BIND(L_copy_2_bytes);
-    __ testl(byte_count, 2);
-    __ jccb(Assembler::zero, L_copy_byte);
-    __ movw(rax, Address(end_from, 8));
-    __ movw(Address(end_to, 8), rax);
-
-    __ addptr(end_from, 2);
-    __ addptr(end_to, 2);
-
-    // Check for and copy trailing byte
-  __ BIND(L_copy_byte);
-    __ testl(byte_count, 1);
-    __ jccb(Assembler::zero, L_exit);
-    __ movb(rax, Address(end_from, 8));
-    __ movb(Address(end_to, 8), rax);
-
+    {
+      // UnsafeCopyMemory page error: continue after ucm
+      UnsafeCopyMemoryMark ucmm(this, !aligned, true);
+      // 'from', 'to' and 'count' are now valid
+      __ movptr(byte_count, count);
+      __ shrptr(count, 3); // count => qword_count
+
+      // Copy from low to high addresses.  Use 'to' as scratch.
+      __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
+      __ lea(end_to,   Address(to,   qword_count, Address::times_8, -8));
+      __ negptr(qword_count); // make the count negative
+      __ jmp(L_copy_bytes);
+
+      // Copy trailing qwords
+    __ BIND(L_copy_8_bytes);
+      __ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
+      __ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
+      __ increment(qword_count);
+      __ jcc(Assembler::notZero, L_copy_8_bytes);
+
+      // Check for and copy trailing dword
+    __ BIND(L_copy_4_bytes);
+      __ testl(byte_count, 4);
+      __ jccb(Assembler::zero, L_copy_2_bytes);
+      __ movl(rax, Address(end_from, 8));
+      __ movl(Address(end_to, 8), rax);
+
+      __ addptr(end_from, 4);
+      __ addptr(end_to, 4);
+
+      // Check for and copy trailing word
+    __ BIND(L_copy_2_bytes);
+      __ testl(byte_count, 2);
+      __ jccb(Assembler::zero, L_copy_byte);
+      __ movw(rax, Address(end_from, 8));
+      __ movw(Address(end_to, 8), rax);
+
+      __ addptr(end_from, 2);
+      __ addptr(end_to, 2);
+
+      // Check for and copy trailing byte
+    __ BIND(L_copy_byte);
+      __ testl(byte_count, 1);
+      __ jccb(Assembler::zero, L_exit);
+      __ movb(rax, Address(end_from, 8));
+      __ movb(Address(end_to, 8), rax);
+    }
   __ BIND(L_exit);
+    address ucme_exit_pc = __ pc();
     restore_arg_regs();
     inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); // Update counter after rscratch1 is free
     __ xorptr(rax, rax); // return 0
@@ -1534,10 +1537,12 @@
     __ leave(); // required for proper stackwalking of RuntimeStub frame
     __ ret(0);
 
-    // Copy in multi-bytes chunks
-    copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
-    __ jmp(L_copy_4_bytes);
-
+    {
+      UnsafeCopyMemoryMark ucmm(this, !aligned, false, ucme_exit_pc);
+      // Copy in multi-bytes chunks
+      copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
+      __ jmp(L_copy_4_bytes);
+    }
     return start;
   }
 
@@ -1582,41 +1587,44 @@
     setup_arg_regs(); // from => rdi, to => rsi, count => rdx
                       // r9 and r10 may be used to save non-volatile registers
 
-    // 'from', 'to' and 'count' are now valid
-    __ movptr(byte_count, count);
-    __ shrptr(count, 3);   // count => qword_count
-
-    // Copy from high to low addresses.
-
-    // Check for and copy trailing byte
-    __ testl(byte_count, 1);
-    __ jcc(Assembler::zero, L_copy_2_bytes);
-    __ movb(rax, Address(from, byte_count, Address::times_1, -1));
-    __ movb(Address(to, byte_count, Address::times_1, -1), rax);
-    __ decrement(byte_count); // Adjust for possible trailing word
-
-    // Check for and copy trailing word
-  __ BIND(L_copy_2_bytes);
-    __ testl(byte_count, 2);
-    __ jcc(Assembler::zero, L_copy_4_bytes);
-    __ movw(rax, Address(from, byte_count, Address::times_1, -2));
-    __ movw(Address(to, byte_count, Address::times_1, -2), rax);
-
-    // Check for and copy trailing dword
-  __ BIND(L_copy_4_bytes);
-    __ testl(byte_count, 4);
-    __ jcc(Assembler::zero, L_copy_bytes);
-    __ movl(rax, Address(from, qword_count, Address::times_8));
-    __ movl(Address(to, qword_count, Address::times_8), rax);
-    __ jmp(L_copy_bytes);
-
-    // Copy trailing qwords
-  __ BIND(L_copy_8_bytes);
-    __ movq(rax, Address(from, qword_count, Address::times_8, -8));
-    __ movq(Address(to, qword_count, Address::times_8, -8), rax);
-    __ decrement(qword_count);
-    __ jcc(Assembler::notZero, L_copy_8_bytes);
-
+    {
+      // UnsafeCopyMemory page error: continue after ucm
+      UnsafeCopyMemoryMark ucmm(this, !aligned, true);
+      // 'from', 'to' and 'count' are now valid
+      __ movptr(byte_count, count);
+      __ shrptr(count, 3);   // count => qword_count
+
+      // Copy from high to low addresses.
+
+      // Check for and copy trailing byte
+      __ testl(byte_count, 1);
+      __ jcc(Assembler::zero, L_copy_2_bytes);
+      __ movb(rax, Address(from, byte_count, Address::times_1, -1));
+      __ movb(Address(to, byte_count, Address::times_1, -1), rax);
+      __ decrement(byte_count); // Adjust for possible trailing word
+
+      // Check for and copy trailing word
+    __ BIND(L_copy_2_bytes);
+      __ testl(byte_count, 2);
+      __ jcc(Assembler::zero, L_copy_4_bytes);
+      __ movw(rax, Address(from, byte_count, Address::times_1, -2));
+      __ movw(Address(to, byte_count, Address::times_1, -2), rax);
+
+      // Check for and copy trailing dword
+    __ BIND(L_copy_4_bytes);
+      __ testl(byte_count, 4);
+      __ jcc(Assembler::zero, L_copy_bytes);
+      __ movl(rax, Address(from, qword_count, Address::times_8));
+      __ movl(Address(to, qword_count, Address::times_8), rax);
+      __ jmp(L_copy_bytes);
+
+      // Copy trailing qwords
+    __ BIND(L_copy_8_bytes);
+      __ movq(rax, Address(from, qword_count, Address::times_8, -8));
+      __ movq(Address(to, qword_count, Address::times_8, -8), rax);
+      __ decrement(qword_count);
+      __ jcc(Assembler::notZero, L_copy_8_bytes);
+    }
     restore_arg_regs();
     inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); // Update counter after rscratch1 is free
     __ xorptr(rax, rax); // return 0
@@ -1624,9 +1632,12 @@
     __ leave(); // required for proper stackwalking of RuntimeStub frame
     __ ret(0);
 
-    // Copy in multi-bytes chunks
-    copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
-
+    {
+      // UnsafeCopyMemory page error: continue after ucm
+      UnsafeCopyMemoryMark ucmm(this, !aligned, true);
+      // Copy in multi-bytes chunks
+      copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
+    }
     restore_arg_regs();
     inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); // Update counter after rscratch1 is free
     __ xorptr(rax, rax); // return 0
@@ -1684,44 +1695,48 @@
     setup_arg_regs(); // from => rdi, to => rsi, count => rdx
                       // r9 and r10 may be used to save non-volatile registers
 
-    // 'from', 'to' and 'count' are now valid
-    __ movptr(word_count, count);
-    __ shrptr(count, 2); // count => qword_count
-
-    // Copy from low to high addresses.  Use 'to' as scratch.
-    __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
-    __ lea(end_to,   Address(to,   qword_count, Address::times_8, -8));
-    __ negptr(qword_count);
-    __ jmp(L_copy_bytes);
-
-    // Copy trailing qwords
-  __ BIND(L_copy_8_bytes);
-    __ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
-    __ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
-    __ increment(qword_count);
-    __ jcc(Assembler::notZero, L_copy_8_bytes);
-
-    // Original 'dest' is trashed, so we can't use it as a
-    // base register for a possible trailing word copy
-
-    // Check for and copy trailing dword
-  __ BIND(L_copy_4_bytes);
-    __ testl(word_count, 2);
-    __ jccb(Assembler::zero, L_copy_2_bytes);
-    __ movl(rax, Address(end_from, 8));
-    __ movl(Address(end_to, 8), rax);
-
-    __ addptr(end_from, 4);
-    __ addptr(end_to, 4);
-
-    // Check for and copy trailing word
-  __ BIND(L_copy_2_bytes);
-    __ testl(word_count, 1);
-    __ jccb(Assembler::zero, L_exit);
-    __ movw(rax, Address(end_from, 8));
-    __ movw(Address(end_to, 8), rax);
-
+    {
+      // UnsafeCopyMemory page error: continue after ucm
+      UnsafeCopyMemoryMark ucmm(this, !aligned, true);
+      // 'from', 'to' and 'count' are now valid
+      __ movptr(word_count, count);
+      __ shrptr(count, 2); // count => qword_count
+
+      // Copy from low to high addresses.  Use 'to' as scratch.
+      __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
+      __ lea(end_to,   Address(to,   qword_count, Address::times_8, -8));
+      __ negptr(qword_count);
+      __ jmp(L_copy_bytes);
+
+      // Copy trailing qwords
+    __ BIND(L_copy_8_bytes);
+      __ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
+      __ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
+      __ increment(qword_count);
+      __ jcc(Assembler::notZero, L_copy_8_bytes);
+
+      // Original 'dest' is trashed, so we can't use it as a
+      // base register for a possible trailing word copy
+
+      // Check for and copy trailing dword
+    __ BIND(L_copy_4_bytes);
+      __ testl(word_count, 2);
+      __ jccb(Assembler::zero, L_copy_2_bytes);
+      __ movl(rax, Address(end_from, 8));
+      __ movl(Address(end_to, 8), rax);
+
+      __ addptr(end_from, 4);
+      __ addptr(end_to, 4);
+
+      // Check for and copy trailing word
+    __ BIND(L_copy_2_bytes);
+      __ testl(word_count, 1);
+      __ jccb(Assembler::zero, L_exit);
+      __ movw(rax, Address(end_from, 8));
+      __ movw(Address(end_to, 8), rax);
+    }
   __ BIND(L_exit);
+    address ucme_exit_pc = __ pc();
     restore_arg_regs();
     inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); // Update counter after rscratch1 is free
     __ xorptr(rax, rax); // return 0
@@ -1729,9 +1744,12 @@
     __ leave(); // required for proper stackwalking of RuntimeStub frame
     __ ret(0);
 
-    // Copy in multi-bytes chunks
-    copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
-    __ jmp(L_copy_4_bytes);
+    {
+      UnsafeCopyMemoryMark ucmm(this, !aligned, false, ucme_exit_pc);
+      // Copy in multi-bytes chunks
+      copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
+      __ jmp(L_copy_4_bytes);
+    }
 
     return start;
   }
@@ -1798,33 +1816,36 @@
     setup_arg_regs(); // from => rdi, to => rsi, count => rdx
                       // r9 and r10 may be used to save non-volatile registers
 
-    // 'from', 'to' and 'count' are now valid
-    __ movptr(word_count, count);
-    __ shrptr(count, 2); // count => qword_count
-
-    // Copy from high to low addresses.  Use 'to' as scratch.
-
-    // Check for and copy trailing word
-    __ testl(word_count, 1);
-    __ jccb(Assembler::zero, L_copy_4_bytes);
-    __ movw(rax, Address(from, word_count, Address::times_2, -2));
-    __ movw(Address(to, word_count, Address::times_2, -2), rax);
-
-    // Check for and copy trailing dword
-  __ BIND(L_copy_4_bytes);
-    __ testl(word_count, 2);
-    __ jcc(Assembler::zero, L_copy_bytes);
-    __ movl(rax, Address(from, qword_count, Address::times_8));
-    __ movl(Address(to, qword_count, Address::times_8), rax);
-    __ jmp(L_copy_bytes);
-
-    // Copy trailing qwords
-  __ BIND(L_copy_8_bytes);
-    __ movq(rax, Address(from, qword_count, Address::times_8, -8));
-    __ movq(Address(to, qword_count, Address::times_8, -8), rax);
-    __ decrement(qword_count);
-    __ jcc(Assembler::notZero, L_copy_8_bytes);
-
+    {
+      // UnsafeCopyMemory page error: continue after ucm
+      UnsafeCopyMemoryMark ucmm(this, !aligned, true);
+      // 'from', 'to' and 'count' are now valid
+      __ movptr(word_count, count);
+      __ shrptr(count, 2); // count => qword_count
+
+      // Copy from high to low addresses.  Use 'to' as scratch.
+
+      // Check for and copy trailing word
+      __ testl(word_count, 1);
+      __ jccb(Assembler::zero, L_copy_4_bytes);
+      __ movw(rax, Address(from, word_count, Address::times_2, -2));
+      __ movw(Address(to, word_count, Address::times_2, -2), rax);
+
+     // Check for and copy trailing dword
+    __ BIND(L_copy_4_bytes);
+      __ testl(word_count, 2);
+      __ jcc(Assembler::zero, L_copy_bytes);
+      __ movl(rax, Address(from, qword_count, Address::times_8));
+      __ movl(Address(to, qword_count, Address::times_8), rax);
+      __ jmp(L_copy_bytes);
+
+      // Copy trailing qwords
+    __ BIND(L_copy_8_bytes);
+      __ movq(rax, Address(from, qword_count, Address::times_8, -8));
+      __ movq(Address(to, qword_count, Address::times_8, -8), rax);
+      __ decrement(qword_count);
+      __ jcc(Assembler::notZero, L_copy_8_bytes);
+    }
     restore_arg_regs();
     inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); // Update counter after rscratch1 is free
     __ xorptr(rax, rax); // return 0
@@ -1832,9 +1853,12 @@
     __ leave(); // required for proper stackwalking of RuntimeStub frame
     __ ret(0);
 
-    // Copy in multi-bytes chunks
-    copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
-
+    {
+      // UnsafeCopyMemory page error: continue after ucm
+      UnsafeCopyMemoryMark ucmm(this, !aligned, true);
+      // Copy in multi-bytes chunks
+      copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
+    }
     restore_arg_regs();
     inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); // Update counter after rscratch1 is free
     __ xorptr(rax, rax); // return 0
@@ -1905,31 +1929,35 @@
     BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
     bs->arraycopy_prologue(_masm, decorators, type, from, to, count);
 
-    // 'from', 'to' and 'count' are now valid
-    __ movptr(dword_count, count);
-    __ shrptr(count, 1); // count => qword_count
-
-    // Copy from low to high addresses.  Use 'to' as scratch.
-    __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
-    __ lea(end_to,   Address(to,   qword_count, Address::times_8, -8));
-    __ negptr(qword_count);
-    __ jmp(L_copy_bytes);
-
-    // Copy trailing qwords
-  __ BIND(L_copy_8_bytes);
-    __ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
-    __ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
-    __ increment(qword_count);
-    __ jcc(Assembler::notZero, L_copy_8_bytes);
-
-    // Check for and copy trailing dword
-  __ BIND(L_copy_4_bytes);
-    __ testl(dword_count, 1); // Only byte test since the value is 0 or 1
-    __ jccb(Assembler::zero, L_exit);
-    __ movl(rax, Address(end_from, 8));
-    __ movl(Address(end_to, 8), rax);
-
+    {
+      // UnsafeCopyMemory page error: continue after ucm
+      UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true);
+      // 'from', 'to' and 'count' are now valid
+      __ movptr(dword_count, count);
+      __ shrptr(count, 1); // count => qword_count
+
+      // Copy from low to high addresses.  Use 'to' as scratch.
+      __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
+      __ lea(end_to,   Address(to,   qword_count, Address::times_8, -8));
+      __ negptr(qword_count);
+      __ jmp(L_copy_bytes);
+
+      // Copy trailing qwords
+    __ BIND(L_copy_8_bytes);
+      __ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
+      __ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
+      __ increment(qword_count);
+      __ jcc(Assembler::notZero, L_copy_8_bytes);
+
+      // Check for and copy trailing dword
+    __ BIND(L_copy_4_bytes);
+      __ testl(dword_count, 1); // Only byte test since the value is 0 or 1
+      __ jccb(Assembler::zero, L_exit);
+      __ movl(rax, Address(end_from, 8));
+      __ movl(Address(end_to, 8), rax);
+    }
   __ BIND(L_exit);
+    address ucme_exit_pc = __ pc();
     bs->arraycopy_epilogue(_masm, decorators, type, from, to, dword_count);
     restore_arg_regs_using_thread();
     inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free
@@ -1938,9 +1966,12 @@
     __ leave(); // required for proper stackwalking of RuntimeStub frame
     __ ret(0);
 
-    // Copy in multi-bytes chunks
-    copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
-    __ jmp(L_copy_4_bytes);
+    {
+      UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, false, ucme_exit_pc);
+      // Copy in multi-bytes chunks
+      copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
+      __ jmp(L_copy_4_bytes);
+    }
 
     return start;
   }
@@ -2001,26 +2032,29 @@
     bs->arraycopy_prologue(_masm, decorators, type, from, to, count);
 
     assert_clean_int(count, rax); // Make sure 'count' is clean int.
-    // 'from', 'to' and 'count' are now valid
-    __ movptr(dword_count, count);
-    __ shrptr(count, 1); // count => qword_count
-
-    // Copy from high to low addresses.  Use 'to' as scratch.
-
-    // Check for and copy trailing dword
-    __ testl(dword_count, 1);
-    __ jcc(Assembler::zero, L_copy_bytes);
-    __ movl(rax, Address(from, dword_count, Address::times_4, -4));
-    __ movl(Address(to, dword_count, Address::times_4, -4), rax);
-    __ jmp(L_copy_bytes);
-
-    // Copy trailing qwords
-  __ BIND(L_copy_8_bytes);
-    __ movq(rax, Address(from, qword_count, Address::times_8, -8));
-    __ movq(Address(to, qword_count, Address::times_8, -8), rax);
-    __ decrement(qword_count);
-    __ jcc(Assembler::notZero, L_copy_8_bytes);
-
+    {
+      // UnsafeCopyMemory page error: continue after ucm
+      UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true);
+      // 'from', 'to' and 'count' are now valid
+      __ movptr(dword_count, count);
+      __ shrptr(count, 1); // count => qword_count
+
+      // Copy from high to low addresses.  Use 'to' as scratch.
+
+      // Check for and copy trailing dword
+      __ testl(dword_count, 1);
+      __ jcc(Assembler::zero, L_copy_bytes);
+      __ movl(rax, Address(from, dword_count, Address::times_4, -4));
+      __ movl(Address(to, dword_count, Address::times_4, -4), rax);
+      __ jmp(L_copy_bytes);
+
+      // Copy trailing qwords
+    __ BIND(L_copy_8_bytes);
+      __ movq(rax, Address(from, qword_count, Address::times_8, -8));
+      __ movq(Address(to, qword_count, Address::times_8, -8), rax);
+      __ decrement(qword_count);
+      __ jcc(Assembler::notZero, L_copy_8_bytes);
+    }
     if (is_oop) {
       __ jmp(L_exit);
     }
@@ -2031,8 +2065,12 @@
     __ leave(); // required for proper stackwalking of RuntimeStub frame
     __ ret(0);
 
-    // Copy in multi-bytes chunks
-    copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
+    {
+      // UnsafeCopyMemory page error: continue after ucm
+      UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true);
+      // Copy in multi-bytes chunks
+      copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
+    }
 
   __ BIND(L_exit);
     bs->arraycopy_epilogue(_masm, decorators, type, from, to, dword_count);
@@ -2102,20 +2140,23 @@
     BasicType type = is_oop ? T_OBJECT : T_LONG;
     BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
     bs->arraycopy_prologue(_masm, decorators, type, from, to, qword_count);
-
-    // Copy from low to high addresses.  Use 'to' as scratch.
-    __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
-    __ lea(end_to,   Address(to,   qword_count, Address::times_8, -8));
-    __ negptr(qword_count);
-    __ jmp(L_copy_bytes);
-
-    // Copy trailing qwords
-  __ BIND(L_copy_8_bytes);
-    __ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
-    __ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
-    __ increment(qword_count);
-    __ jcc(Assembler::notZero, L_copy_8_bytes);
-
+    {
+      // UnsafeCopyMemory page error: continue after ucm
+      UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true);
+
+      // Copy from low to high addresses.  Use 'to' as scratch.
+      __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
+      __ lea(end_to,   Address(to,   qword_count, Address::times_8, -8));
+      __ negptr(qword_count);
+      __ jmp(L_copy_bytes);
+
+      // Copy trailing qwords
+    __ BIND(L_copy_8_bytes);
+      __ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
+      __ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
+      __ increment(qword_count);
+      __ jcc(Assembler::notZero, L_copy_8_bytes);
+    }
     if (is_oop) {
       __ jmp(L_exit);
     } else {
@@ -2127,8 +2168,12 @@
       __ ret(0);
     }
 
-    // Copy in multi-bytes chunks
-    copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
+    {
+      // UnsafeCopyMemory page error: continue after ucm
+      UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true);
+      // Copy in multi-bytes chunks
+      copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
+    }
 
     __ BIND(L_exit);
     bs->arraycopy_epilogue(_masm, decorators, type, from, to, qword_count);
@@ -2195,16 +2240,19 @@
     BasicType type = is_oop ? T_OBJECT : T_LONG;
     BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
     bs->arraycopy_prologue(_masm, decorators, type, from, to, qword_count);
-
-    __ jmp(L_copy_bytes);
-
-    // Copy trailing qwords
-  __ BIND(L_copy_8_bytes);
-    __ movq(rax, Address(from, qword_count, Address::times_8, -8));
-    __ movq(Address(to, qword_count, Address::times_8, -8), rax);
-    __ decrement(qword_count);
-    __ jcc(Assembler::notZero, L_copy_8_bytes);
-
+    {
+      // UnsafeCopyMemory page error: continue after ucm
+      UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true);
+
+      __ jmp(L_copy_bytes);
+
+      // Copy trailing qwords
+    __ BIND(L_copy_8_bytes);
+      __ movq(rax, Address(from, qword_count, Address::times_8, -8));
+      __ movq(Address(to, qword_count, Address::times_8, -8), rax);
+      __ decrement(qword_count);
+      __ jcc(Assembler::notZero, L_copy_8_bytes);
+    }
     if (is_oop) {
       __ jmp(L_exit);
     } else {
@@ -2215,10 +2263,13 @@
       __ leave(); // required for proper stackwalking of RuntimeStub frame
       __ ret(0);
     }
-
-    // Copy in multi-bytes chunks
-    copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
-
+    {
+      // UnsafeCopyMemory page error: continue after ucm
+      UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true);
+
+      // Copy in multi-bytes chunks
+      copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
+    }
     __ BIND(L_exit);
     bs->arraycopy_epilogue(_masm, decorators, type, from, to, qword_count);
     restore_arg_regs_using_thread();
@@ -6036,6 +6087,10 @@
   }
 }; // end class declaration
 
+#define UCM_TABLE_MAX_ENTRIES 16
 void StubGenerator_generate(CodeBuffer* code, bool all) {
+  if (UnsafeCopyMemory::_table == NULL) {
+    UnsafeCopyMemory::create_table(UCM_TABLE_MAX_ENTRIES);
+  }
   StubGenerator g(code, all);
 }
--- a/src/hotspot/cpu/zero/stubGenerator_zero.cpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/cpu/zero/stubGenerator_zero.cpp	Mon Jul 01 14:57:02 2019 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2007, 2008, 2010, 2015 Red Hat, Inc.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -156,9 +156,11 @@
     StubRoutines::_oop_arraycopy             = ShouldNotCallThisStub();
 
     StubRoutines::_checkcast_arraycopy       = ShouldNotCallThisStub();
-    StubRoutines::_unsafe_arraycopy          = ShouldNotCallThisStub();
     StubRoutines::_generic_arraycopy         = ShouldNotCallThisStub();
 
+    // Shared code tests for "NULL" to discover the stub is not generated.
+    StubRoutines::_unsafe_arraycopy          = NULL;
+
     // We don't generate specialized code for HeapWord-aligned source
     // arrays, so just use the code we've already generated
     StubRoutines::_arrayof_jbyte_disjoint_arraycopy =
--- a/src/hotspot/os/aix/os_aix.inline.hpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/os/aix/os_aix.inline.hpp	Mon Jul 01 14:57:02 2019 -0700
@@ -37,11 +37,6 @@
 #include <sys/ioctl.h>
 #include <netdb.h>
 
-// File names are case-insensitive on windows only.
-inline int os::file_name_strncmp(const char* s1, const char* s2, size_t num) {
-  return strncmp(s1, s2, num);
-}
-
 inline bool os::uses_stack_guard_pages() {
   return true;
 }
--- a/src/hotspot/os/bsd/os_bsd.inline.hpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/os/bsd/os_bsd.inline.hpp	Mon Jul 01 14:57:02 2019 -0700
@@ -35,11 +35,6 @@
 #include <poll.h>
 #include <netdb.h>
 
-// File names are case-insensitive on windows only
-inline int os::file_name_strncmp(const char* s1, const char* s2, size_t num) {
-  return strncmp(s1, s2, num);
-}
-
 inline bool os::uses_stack_guard_pages() {
   return true;
 }
--- a/src/hotspot/os/linux/os_linux.inline.hpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/os/linux/os_linux.inline.hpp	Mon Jul 01 14:57:02 2019 -0700
@@ -35,11 +35,6 @@
 #include <poll.h>
 #include <netdb.h>
 
-// File names are case-insensitive on windows only
-inline int os::file_name_strncmp(const char* s1, const char* s2, size_t num) {
-  return strncmp(s1, s2, num);
-}
-
 inline bool os::uses_stack_guard_pages() {
   return true;
 }
--- a/src/hotspot/os/posix/os_posix.cpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/os/posix/os_posix.cpp	Mon Jul 01 14:57:02 2019 -0700
@@ -1450,6 +1450,30 @@
   return path;
 }
 
+bool os::same_files(const char* file1, const char* file2) {
+  if (strcmp(file1, file2) == 0) {
+    return true;
+  }
+
+  bool is_same = false;
+  struct stat st1;
+  struct stat st2;
+
+  if (os::stat(file1, &st1) < 0) {
+    return false;
+  }
+
+  if (os::stat(file2, &st2) < 0) {
+    return false;
+  }
+
+  if (st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino) {
+    // same files
+    is_same = true;
+  }
+  return is_same;
+}
+
 // Check minimum allowable stack sizes for thread creation and to initialize
 // the java system classes, including StackOverflowError - depends on page
 // size.
--- a/src/hotspot/os/solaris/os_solaris.inline.hpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/os/solaris/os_solaris.inline.hpp	Mon Jul 01 14:57:02 2019 -0700
@@ -37,11 +37,6 @@
 #include <netdb.h>
 #include <setjmp.h>
 
-// File names are case-insensitive on windows only
-inline int os::file_name_strncmp(const char* s1, const char* s2, size_t num) {
-  return strncmp(s1, s2, num);
-}
-
 inline bool os::uses_stack_guard_pages() {
   return true;
 }
--- a/src/hotspot/os/windows/os_windows.cpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/os/windows/os_windows.cpp	Mon Jul 01 14:57:02 2019 -0700
@@ -2581,10 +2581,18 @@
         CodeBlob* cb = CodeCache::find_blob_unsafe(pc);
         nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL;
       }
-      if ((thread->thread_state() == _thread_in_vm &&
+
+      bool is_unsafe_arraycopy = (thread->thread_state() == _thread_in_native || in_java) && UnsafeCopyMemory::contains_pc(pc);
+      if (((thread->thread_state() == _thread_in_vm ||
+           thread->thread_state() == _thread_in_native ||
+           is_unsafe_arraycopy) &&
           thread->doing_unsafe_access()) ||
           (nm != NULL && nm->has_unsafe_access())) {
-        return Handle_Exception(exceptionInfo, SharedRuntime::handle_unsafe_access(thread, (address)Assembler::locate_next_instruction(pc)));
+        address next_pc =  Assembler::locate_next_instruction(pc);
+        if (is_unsafe_arraycopy) {
+          next_pc = UnsafeCopyMemory::page_error_continue_pc(pc);
+        }
+        return Handle_Exception(exceptionInfo, SharedRuntime::handle_unsafe_access(thread, next_pc));
       }
     }
 
@@ -4359,6 +4367,88 @@
   return ret;
 }
 
+static HANDLE create_read_only_file_handle(const char* file) {
+  if (file == NULL) {
+    return INVALID_HANDLE_VALUE;
+  }
+
+  char* nativepath = (char*)os::strdup(file, mtInternal);
+  if (nativepath == NULL) {
+    errno = ENOMEM;
+    return INVALID_HANDLE_VALUE;
+  }
+  os::native_path(nativepath);
+
+  size_t len = strlen(nativepath);
+  HANDLE handle = INVALID_HANDLE_VALUE;
+
+  if (len < MAX_PATH) {
+    handle = ::CreateFile(nativepath, 0, FILE_SHARE_READ,
+                          NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
+  } else {
+    errno_t err = ERROR_SUCCESS;
+    wchar_t* wfile = create_unc_path(nativepath, err);
+    if (err != ERROR_SUCCESS) {
+      if (wfile != NULL) {
+        destroy_unc_path(wfile);
+      }
+      os::free(nativepath);
+      return INVALID_HANDLE_VALUE;
+    }
+    handle = ::CreateFileW(wfile, 0, FILE_SHARE_READ,
+                           NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
+    destroy_unc_path(wfile);
+  }
+
+  os::free(nativepath);
+  return handle;
+}
+
+bool os::same_files(const char* file1, const char* file2) {
+
+  if (file1 == NULL && file2 == NULL) {
+    return true;
+  }
+
+  if (file1 == NULL || file2 == NULL) {
+    return false;
+  }
+
+  if (strcmp(file1, file2) == 0) {
+    return true;
+  }
+
+  HANDLE handle1 = create_read_only_file_handle(file1);
+  HANDLE handle2 = create_read_only_file_handle(file2);
+  bool result = false;
+
+  // if we could open both paths...
+  if (handle1 != INVALID_HANDLE_VALUE && handle2 != INVALID_HANDLE_VALUE) {
+    BY_HANDLE_FILE_INFORMATION fileInfo1;
+    BY_HANDLE_FILE_INFORMATION fileInfo2;
+    if (::GetFileInformationByHandle(handle1, &fileInfo1) &&
+      ::GetFileInformationByHandle(handle2, &fileInfo2)) {
+      // the paths are the same if they refer to the same file (fileindex) on the same volume (volume serial number)
+      if (fileInfo1.dwVolumeSerialNumber == fileInfo2.dwVolumeSerialNumber &&
+        fileInfo1.nFileIndexHigh == fileInfo2.nFileIndexHigh &&
+        fileInfo1.nFileIndexLow == fileInfo2.nFileIndexLow) {
+        result = true;
+      }
+    }
+  }
+
+  //free the handles
+  if (handle1 != INVALID_HANDLE_VALUE) {
+    ::CloseHandle(handle1);
+  }
+
+  if (handle2 != INVALID_HANDLE_VALUE) {
+    ::CloseHandle(handle2);
+  }
+
+  return result;
+}
+
 
 #define FT2INT64(ft) \
   ((jlong)((jlong)(ft).dwHighDateTime << 32 | (julong)(ft).dwLowDateTime))
--- a/src/hotspot/os/windows/os_windows.inline.hpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/os/windows/os_windows.inline.hpp	Mon Jul 01 14:57:02 2019 -0700
@@ -32,11 +32,6 @@
 
 inline const int os::default_file_open_flags() { return O_BINARY | O_NOINHERIT;}
 
-// File names are case-insensitive on windows only
-inline int os::file_name_strncmp(const char* s, const char* t, size_t num) {
-  return _strnicmp(s, t, num);
-}
-
 inline void  os::dll_unload(void *lib) {
   ::FreeLibrary((HMODULE)lib);
 }
--- a/src/hotspot/os_cpu/aix_ppc/os_aix_ppc.cpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/os_cpu/aix_ppc/os_aix_ppc.cpp	Mon Jul 01 14:57:02 2019 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012, 2018 SAP SE. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -441,8 +441,12 @@
         // underlying file has been truncated. Do not crash the VM in such a case.
         CodeBlob* cb = CodeCache::find_blob_unsafe(pc);
         CompiledMethod* nm = cb->as_compiled_method_or_null();
-        if (nm != NULL && nm->has_unsafe_access()) {
+        bool is_unsafe_arraycopy = (thread->doing_unsafe_access() && UnsafeCopyMemory::contains_pc(pc));
+        if ((nm != NULL && nm->has_unsafe_access()) || is_unsafe_arraycopy) {
           address next_pc = pc + 4;
+          if (is_unsafe_arraycopy) {
+            next_pc = UnsafeCopyMemory::page_error_continue_pc(pc);
+          }
           next_pc = SharedRuntime::handle_unsafe_access(thread, next_pc);
           os::Aix::ucontext_set_pc(uc, next_pc);
           return 1;
@@ -461,9 +465,13 @@
         stub = pc + 4;  // continue with next instruction.
         goto run_stub;
       }
-      else if (thread->thread_state() == _thread_in_vm &&
+      else if ((thread->thread_state() == _thread_in_vm ||
+                thread->thread_state() == _thread_in_native) &&
                sig == SIGBUS && thread->doing_unsafe_access()) {
         address next_pc = pc + 4;
+        if (UnsafeCopyMemory::contains_pc(pc)) {
+          next_pc = UnsafeCopyMemory::page_error_continue_pc(pc);
+        }
         next_pc = SharedRuntime::handle_unsafe_access(thread, next_pc);
         os::Aix::ucontext_set_pc(uc, next_pc);
         return 1;
--- a/src/hotspot/os_cpu/bsd_x86/os_bsd_x86.cpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/os_cpu/bsd_x86/os_bsd_x86.cpp	Mon Jul 01 14:57:02 2019 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -589,8 +589,12 @@
         // Do not crash the VM in such a case.
         CodeBlob* cb = CodeCache::find_blob_unsafe(pc);
         CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL;
-        if (nm != NULL && nm->has_unsafe_access()) {
+        bool is_unsafe_arraycopy = thread->doing_unsafe_access() && UnsafeCopyMemory::contains_pc(pc);
+        if ((nm != NULL && nm->has_unsafe_access()) || is_unsafe_arraycopy) {
           address next_pc = Assembler::locate_next_instruction(pc);
+          if (is_unsafe_arraycopy) {
+            next_pc = UnsafeCopyMemory::page_error_continue_pc(pc);
+          }
           stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
         }
       }
@@ -659,10 +663,14 @@
           // Determination of interpreter/vtable stub/compiled code null exception
           stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
       }
-    } else if (thread->thread_state() == _thread_in_vm &&
+    } else if ((thread->thread_state() == _thread_in_vm ||
+                thread->thread_state() == _thread_in_native) &&
                sig == SIGBUS && /* info->si_code == BUS_OBJERR && */
                thread->doing_unsafe_access()) {
         address next_pc = Assembler::locate_next_instruction(pc);
+        if (UnsafeCopyMemory::contains_pc(pc)) {
+          next_pc = UnsafeCopyMemory::page_error_continue_pc(pc);
+        }
         stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
     }
 
--- a/src/hotspot/os_cpu/bsd_zero/os_bsd_zero.cpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/os_cpu/bsd_zero/os_bsd_zero.cpp	Mon Jul 01 14:57:02 2019 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2007, 2008, 2009, 2010 Red Hat, Inc.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -193,7 +193,8 @@
     /*if (thread->thread_state() == _thread_in_Java) {
       ShouldNotCallThis();
     }
-    else*/ if (thread->thread_state() == _thread_in_vm &&
+    else*/ if ((thread->thread_state() == _thread_in_vm ||
+               thread->thread_state() == _thread_in_native) &&
                sig == SIGBUS && thread->doing_unsafe_access()) {
       ShouldNotCallThis();
     }
--- a/src/hotspot/os_cpu/linux_aarch64/os_linux_aarch64.cpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/os_cpu/linux_aarch64/os_linux_aarch64.cpp	Mon Jul 01 14:57:02 2019 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2014, Red Hat Inc. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -419,8 +419,12 @@
         // Do not crash the VM in such a case.
         CodeBlob* cb = CodeCache::find_blob_unsafe(pc);
         CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL;
-        if (nm != NULL && nm->has_unsafe_access()) {
+        bool is_unsafe_arraycopy = (thread->doing_unsafe_access() && UnsafeCopyMemory::contains_pc(pc));
+        if ((nm != NULL && nm->has_unsafe_access()) || is_unsafe_arraycopy) {
           address next_pc = pc + NativeCall::instruction_size;
+          if (is_unsafe_arraycopy) {
+            next_pc = UnsafeCopyMemory::page_error_continue_pc(pc);
+          }
           stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
         }
       }
@@ -439,10 +443,14 @@
           // Determination of interpreter/vtable stub/compiled code null exception
           stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
       }
-    } else if (thread->thread_state() == _thread_in_vm &&
+    } else if ((thread->thread_state() == _thread_in_vm ||
+                 thread->thread_state() == _thread_in_native) &&
                sig == SIGBUS && /* info->si_code == BUS_OBJERR && */
                thread->doing_unsafe_access()) {
       address next_pc = pc + NativeCall::instruction_size;
+      if (UnsafeCopyMemory::contains_pc(pc)) {
+        next_pc = UnsafeCopyMemory::page_error_continue_pc(pc);
+      }
       stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
     }
 
--- a/src/hotspot/os_cpu/linux_arm/os_linux_arm.cpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/os_cpu/linux_arm/os_linux_arm.cpp	Mon Jul 01 14:57:02 2019 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2008, 2019, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -384,7 +384,7 @@
         // Do not crash the VM in such a case.
         CodeBlob* cb = CodeCache::find_blob_unsafe(pc);
         CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL;
-        if (nm != NULL && nm->has_unsafe_access()) {
+        if ((nm != NULL && nm->has_unsafe_access()) || (thread->doing_unsafe_access() && UnsafeCopyMemory::contains_pc(pc))) {
           unsafe_access = true;
         }
       } else if (sig == SIGSEGV &&
@@ -398,7 +398,8 @@
         // Zombie
         stub = SharedRuntime::get_handle_wrong_method_stub();
       }
-    } else if (thread->thread_state() == _thread_in_vm &&
+    } else if ((thread->thread_state() == _thread_in_vm ||
+                thread->thread_state() == _thread_in_native) &&
                sig == SIGBUS && thread->doing_unsafe_access()) {
         unsafe_access = true;
     }
@@ -418,6 +419,9 @@
     // any other suitable exception reason,
     // so assume it is an unsafe access.
     address next_pc = pc + Assembler::InstructionSize;
+    if (UnsafeCopyMemory::contains_pc(pc)) {
+      next_pc = UnsafeCopyMemory::page_error_continue_pc(pc);
+    }
 #ifdef __thumb__
     if (uc->uc_mcontext.arm_cpsr & PSR_T_BIT) {
       next_pc = (address)((intptr_t)next_pc | 0x1);
--- a/src/hotspot/os_cpu/linux_ppc/os_linux_ppc.cpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/os_cpu/linux_ppc/os_linux_ppc.cpp	Mon Jul 01 14:57:02 2019 -0700
@@ -469,8 +469,12 @@
         // underlying file has been truncated. Do not crash the VM in such a case.
         CodeBlob* cb = CodeCache::find_blob_unsafe(pc);
         CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL;
-        if (nm != NULL && nm->has_unsafe_access()) {
+        bool is_unsafe_arraycopy = (thread->doing_unsafe_access() && UnsafeCopyMemory::contains_pc(pc));
+        if ((nm != NULL && nm->has_unsafe_access()) || is_unsafe_arraycopy) {
           address next_pc = pc + 4;
+          if (is_unsafe_arraycopy) {
+            next_pc = UnsafeCopyMemory::page_error_continue_pc(pc);
+          }
           next_pc = SharedRuntime::handle_unsafe_access(thread, next_pc);
           os::Linux::ucontext_set_pc(uc, next_pc);
           return true;
@@ -485,11 +489,15 @@
                         // flushing of icache is not necessary.
         stub = pc + 4;  // continue with next instruction.
       }
-      else if (thread->thread_state() == _thread_in_vm &&
+      else if ((thread->thread_state() == _thread_in_vm ||
+                thread->thread_state() == _thread_in_native) &&
                sig == SIGBUS && thread->doing_unsafe_access()) {
         address next_pc = pc + 4;
+        if (UnsafeCopyMemory::contains_pc(pc)) {
+          next_pc = UnsafeCopyMemory::page_error_continue_pc(pc);
+        }
         next_pc = SharedRuntime::handle_unsafe_access(thread, next_pc);
-        os::Linux::ucontext_set_pc(uc, pc + 4);
+        os::Linux::ucontext_set_pc(uc, next_pc);
         return true;
       }
     }
--- a/src/hotspot/os_cpu/linux_s390/os_linux_s390.cpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/os_cpu/linux_s390/os_linux_s390.cpp	Mon Jul 01 14:57:02 2019 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2019, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2016, 2018 SAP SE. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -467,7 +467,8 @@
         // when the vector facility is installed, but operating system support is missing.
         VM_Version::reset_has_VectorFacility();
         stub = pc; // Continue with next instruction.
-      } else if (thread->thread_state() == _thread_in_vm &&
+      } else if ((thread->thread_state() == _thread_in_vm ||
+                  thread->thread_state() == _thread_in_native) &&
                  sig == SIGBUS && thread->doing_unsafe_access()) {
         // We don't really need a stub here! Just set the pending exeption and
         // continue at the next instruction after the faulting read. Returning
--- a/src/hotspot/os_cpu/linux_sparc/os_linux_sparc.cpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/os_cpu/linux_sparc/os_linux_sparc.cpp	Mon Jul 01 14:57:02 2019 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -385,7 +385,11 @@
   // Do not crash the VM in such a case.
   CodeBlob* cb = CodeCache::find_blob_unsafe(pc);
   CompiledMethod* nm = cb->as_compiled_method_or_null();
-  if (nm != NULL && nm->has_unsafe_access()) {
+  bool is_unsafe_arraycopy = (thread->doing_unsafe_access() && UnsafeCopyMemory::contains_pc(pc));
+  if ((nm != NULL && nm->has_unsafe_access()) || is_unsafe_arraycopy) {
+    if (is_unsafe_arraycopy) {
+      npc = UnsafeCopyMemory::page_error_continue_pc(pc);
+    }
     *stub = SharedRuntime::handle_unsafe_access(thread, npc);
     return true;
   }
@@ -550,8 +554,12 @@
     }
 
     if (sig == SIGBUS &&
-        thread->thread_state() == _thread_in_vm &&
+        (thread->thread_state() == _thread_in_vm ||
+         thread->thread_state() == _thread_in_native) &&
         thread->doing_unsafe_access()) {
+      if (UnsafeCopyMemory::contains_pc(pc)) {
+        npc = UnsafeCopyMemory::page_error_continue_pc(pc);
+      }
       stub = SharedRuntime::handle_unsafe_access(thread, npc);
     }
 
--- a/src/hotspot/os_cpu/linux_x86/os_linux_x86.cpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/os_cpu/linux_x86/os_linux_x86.cpp	Mon Jul 01 14:57:02 2019 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -435,8 +435,12 @@
         // Do not crash the VM in such a case.
         CodeBlob* cb = CodeCache::find_blob_unsafe(pc);
         CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL;
-        if (nm != NULL && nm->has_unsafe_access()) {
+        bool is_unsafe_arraycopy = thread->doing_unsafe_access() && UnsafeCopyMemory::contains_pc(pc);
+        if ((nm != NULL && nm->has_unsafe_access()) || is_unsafe_arraycopy) {
           address next_pc = Assembler::locate_next_instruction(pc);
+          if (is_unsafe_arraycopy) {
+            next_pc = UnsafeCopyMemory::page_error_continue_pc(pc);
+          }
           stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
         }
       }
@@ -483,10 +487,14 @@
           // Determination of interpreter/vtable stub/compiled code null exception
           stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
       }
-    } else if (thread->thread_state() == _thread_in_vm &&
-               sig == SIGBUS && /* info->si_code == BUS_OBJERR && */
-               thread->doing_unsafe_access()) {
+    } else if ((thread->thread_state() == _thread_in_vm ||
+                thread->thread_state() == _thread_in_native) &&
+               (sig == SIGBUS && /* info->si_code == BUS_OBJERR && */
+               thread->doing_unsafe_access())) {
         address next_pc = Assembler::locate_next_instruction(pc);
+        if (UnsafeCopyMemory::contains_pc(pc)) {
+          next_pc = UnsafeCopyMemory::page_error_continue_pc(pc);
+        }
         stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
     }
 
--- a/src/hotspot/os_cpu/linux_zero/os_linux_zero.cpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/os_cpu/linux_zero/os_linux_zero.cpp	Mon Jul 01 14:57:02 2019 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2007, 2008, 2009, 2010 Red Hat, Inc.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -207,7 +207,8 @@
     /*if (thread->thread_state() == _thread_in_Java) {
       ShouldNotCallThis();
     }
-    else*/ if (thread->thread_state() == _thread_in_vm &&
+    else*/ if ((thread->thread_state() == _thread_in_vm ||
+               thread->thread_state() == _thread_in_native) &&
                sig == SIGBUS && thread->doing_unsafe_access()) {
       ShouldNotCallThis();
     }
--- a/src/hotspot/os_cpu/solaris_sparc/os_solaris_sparc.cpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/os_cpu/solaris_sparc/os_solaris_sparc.cpp	Mon Jul 01 14:57:02 2019 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -436,8 +436,12 @@
     }
 
 
-    if (thread->thread_state() == _thread_in_vm) {
+    if (thread->thread_state() == _thread_in_vm ||
+        thread->thread_state() == _thread_in_native) {
       if (sig == SIGBUS && thread->doing_unsafe_access()) {
+        if (UnsafeCopyMemory::contains_pc(pc)) {
+          npc = UnsafeCopyMemory::page_error_continue_pc(pc);
+        }
         stub = SharedRuntime::handle_unsafe_access(thread, npc);
       }
     }
@@ -476,7 +480,11 @@
         // Do not crash the VM in such a case.
         CodeBlob* cb = CodeCache::find_blob_unsafe(pc);
         CompiledMethod* nm = cb->as_compiled_method_or_null();
-        if (nm != NULL && nm->has_unsafe_access()) {
+        bool is_unsafe_arraycopy = (thread->doing_unsafe_access() && UnsafeCopyMemory::contains_pc(pc));
+        if ((nm != NULL && nm->has_unsafe_access()) || is_unsafe_arraycopy) {
+          if (is_unsafe_arraycopy) {
+            npc = UnsafeCopyMemory::page_error_continue_pc(pc);
+          }
           stub = SharedRuntime::handle_unsafe_access(thread, npc);
         }
       }
--- a/src/hotspot/os_cpu/solaris_x86/os_solaris_x86.cpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/os_cpu/solaris_x86/os_solaris_x86.cpp	Mon Jul 01 14:57:02 2019 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -517,9 +517,13 @@
       stub = VM_Version::cpuinfo_cont_addr();
     }
 
-    if (thread->thread_state() == _thread_in_vm) {
+    if (thread->thread_state() == _thread_in_vm ||
+         thread->thread_state() == _thread_in_native) {
       if (sig == SIGBUS && info->si_code == BUS_OBJERR && thread->doing_unsafe_access()) {
         address next_pc = Assembler::locate_next_instruction(pc);
+        if (UnsafeCopyMemory::contains_pc(pc)) {
+          next_pc = UnsafeCopyMemory::page_error_continue_pc(pc);
+        }
         stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
       }
     }
@@ -536,8 +540,12 @@
         CodeBlob* cb = CodeCache::find_blob_unsafe(pc);
         if (cb != NULL) {
           CompiledMethod* nm = cb->as_compiled_method_or_null();
-          if (nm != NULL && nm->has_unsafe_access()) {
+          bool is_unsafe_arraycopy = thread->doing_unsafe_access() && UnsafeCopyMemory::contains_pc(pc);
+          if ((nm != NULL && nm->has_unsafe_access()) || is_unsafe_arraycopy)) {
             address next_pc = Assembler::locate_next_instruction(pc);
+            if (is_unsafe_arraycopy) {
+              next_pc = UnsafeCopyMemory::page_error_continue_pc(pc);
+            }
             stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
           }
         }
--- a/src/hotspot/share/aot/aotCodeHeap.cpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/share/aot/aotCodeHeap.cpp	Mon Jul 01 14:57:02 2019 -0700
@@ -38,7 +38,6 @@
 #include "memory/universe.hpp"
 #include "oops/compressedOops.hpp"
 #include "oops/method.inline.hpp"
-#include "runtime/deoptimization.hpp"
 #include "runtime/handles.inline.hpp"
 #include "runtime/os.hpp"
 #include "runtime/safepointVerifiers.hpp"
@@ -734,7 +733,8 @@
     }
   }
   if (marked > 0) {
-    Deoptimization::deoptimize_all_marked();
+    VM_Deoptimize op;
+    VMThread::execute(&op);
   }
 }
 
--- a/src/hotspot/share/aot/aotCompiledMethod.cpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/share/aot/aotCompiledMethod.cpp	Mon Jul 01 14:57:02 2019 -0700
@@ -165,7 +165,7 @@
 
   {
     // Enter critical section.  Does not block for safepoint.
-    MutexLocker pl(CompiledMethod_lock, Mutex::_no_safepoint_check_flag);
+    MutexLocker pl(Patching_lock, Mutex::_no_safepoint_check_flag);
 
     if (*_state_adr == new_state) {
       // another thread already performed this transition so nothing
@@ -188,10 +188,12 @@
 #endif
 
     // Remove AOTCompiledMethod from method.
-    if (method() != NULL) {
-      method()->unlink_code(this);
+    if (method() != NULL && (method()->code() == this ||
+                             method()->from_compiled_entry() == verified_entry_point())) {
+      HandleMark hm;
+      method()->clear_code(false /* already owns Patching_lock */);
     }
-  } // leave critical region under CompiledMethod_lock
+  } // leave critical region under Patching_lock
 
 
   if (TraceCreateZombies) {
@@ -214,7 +216,7 @@
 
   {
     // Enter critical section.  Does not block for safepoint.
-    MutexLocker pl(CompiledMethod_lock, Mutex::_no_safepoint_check_flag);
+    MutexLocker pl(Patching_lock, Mutex::_no_safepoint_check_flag);
 
     if (*_state_adr == in_use) {
       // another thread already performed this transition so nothing
@@ -228,7 +230,7 @@
 
     // Log the transition once
     log_state_change();
-  } // leave critical region under CompiledMethod_lock
+  } // leave critical region under Patching_lock
 
 
   if (TraceCreateZombies) {
--- a/src/hotspot/share/aot/aotCompiledMethod.hpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/share/aot/aotCompiledMethod.hpp	Mon Jul 01 14:57:02 2019 -0700
@@ -176,7 +176,6 @@
                                                  state() == not_used; }
   virtual bool is_alive() const { return _is_alive(); }
   virtual bool is_in_use() const { return state() == in_use; }
-  virtual bool is_not_installed() const { return state() == not_installed; }
 
   virtual bool is_unloading() { return false; }
 
--- a/src/hotspot/share/ci/ciMethodData.cpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/share/ci/ciMethodData.cpp	Mon Jul 01 14:57:02 2019 -0700
@@ -81,13 +81,13 @@
 // Check for entries that reference an unloaded method
 class PrepareExtraDataClosure : public CleanExtraDataClosure {
   MethodData*            _mdo;
-  uint64_t               _safepoint_counter;
+  SafepointStateTracker  _safepoint_tracker;
   GrowableArray<Method*> _uncached_methods;
 
 public:
   PrepareExtraDataClosure(MethodData* mdo)
     : _mdo(mdo),
-      _safepoint_counter(SafepointSynchronize::safepoint_counter()),
+      _safepoint_tracker(SafepointSynchronize::safepoint_state_tracker()),
       _uncached_methods()
   { }
 
@@ -103,7 +103,7 @@
   }
 
   bool has_safepointed() {
-    return SafepointSynchronize::safepoint_counter() != _safepoint_counter;
+    return _safepoint_tracker.safepoint_state_changed();
   }
 
   bool finish() {
--- a/src/hotspot/share/classfile/classLoader.cpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/share/classfile/classLoader.cpp	Mon Jul 01 14:57:02 2019 -0700
@@ -292,11 +292,13 @@
   return NULL;
 }
 
-ClassPathZipEntry::ClassPathZipEntry(jzfile* zip, const char* zip_name, bool is_boot_append) : ClassPathEntry() {
+ClassPathZipEntry::ClassPathZipEntry(jzfile* zip, const char* zip_name,
+                                     bool is_boot_append, bool from_class_path_attr) : ClassPathEntry() {
   _zip = zip;
   char *copy = NEW_C_HEAP_ARRAY(char, strlen(zip_name)+1, mtClass);
   strcpy(copy, zip_name);
   _zip_name = copy;
+  _from_class_path_attr = from_class_path_attr;
 }
 
 ClassPathZipEntry::~ClassPathZipEntry() {
@@ -577,7 +579,7 @@
     strncpy(path, &class_path[start], end - start);
     path[end - start] = '\0';
 
-    update_class_path_entry_list(path, false, false);
+    update_class_path_entry_list(path, false, false, false);
 
     while (class_path[end] == os::path_separator()[0]) {
       end++;
@@ -612,7 +614,7 @@
   // File or directory found
   ClassPathEntry* new_entry = NULL;
   new_entry = create_class_path_entry(path, &st, true /* throw_exception */,
-                                      false /*is_boot_append */, CHECK);
+                                      false /*is_boot_append */, false /* from_class_path_attr */, CHECK);
   if (new_entry == NULL) {
     return;
   }
@@ -668,7 +670,7 @@
       struct stat st;
       if (os::stat(path, &st) == 0) {
         // File or directory found
-        ClassPathEntry* new_entry = create_class_path_entry(path, &st, false, false, CHECK);
+        ClassPathEntry* new_entry = create_class_path_entry(path, &st, false, false, false, CHECK);
         // If the path specification is valid, enter it into this module's list
         if (new_entry != NULL) {
           module_cpl->add_to_list(new_entry);
@@ -737,7 +739,7 @@
       struct stat st;
       if (os::stat(path, &st) == 0) {
         // Directory found
-        ClassPathEntry* new_entry = create_class_path_entry(path, &st, false, false, CHECK);
+        ClassPathEntry* new_entry = create_class_path_entry(path, &st, false, false, false, CHECK);
 
         // Check for a jimage
         if (Arguments::has_jimage()) {
@@ -754,7 +756,7 @@
     } else {
       // Every entry on the system boot class path after the initial base piece,
       // which is set by os::set_boot_path(), is considered an appended entry.
-      update_class_path_entry_list(path, false, true);
+      update_class_path_entry_list(path, false, true, false);
     }
 
     while (class_path[end] == os::path_separator()[0]) {
@@ -782,7 +784,7 @@
   struct stat st;
   if (os::stat(path, &st) == 0) {
     // Directory found
-    ClassPathEntry* new_entry = create_class_path_entry(path, &st, false, false, CHECK);
+    ClassPathEntry* new_entry = create_class_path_entry(path, &st, false, false, false, CHECK);
 
     // If the path specification is valid, enter it into this module's list.
     // There is no need to check for duplicate modules in the exploded entry list,
@@ -802,7 +804,9 @@
 
 ClassPathEntry* ClassLoader::create_class_path_entry(const char *path, const struct stat* st,
                                                      bool throw_exception,
-                                                     bool is_boot_append, TRAPS) {
+                                                     bool is_boot_append,
+                                                     bool from_class_path_attr,
+                                                     TRAPS) {
   JavaThread* thread = JavaThread::current();
   ClassPathEntry* new_entry = NULL;
   if ((st->st_mode & S_IFMT) == S_IFREG) {
@@ -832,7 +836,7 @@
         zip = (*ZipOpen)(canonical_path, &error_msg);
       }
       if (zip != NULL && error_msg == NULL) {
-        new_entry = new ClassPathZipEntry(zip, path, is_boot_append);
+        new_entry = new ClassPathZipEntry(zip, path, is_boot_append, from_class_path_attr);
       } else {
         char *msg;
         if (error_msg == NULL) {
@@ -882,7 +886,7 @@
         }
         if (zip != NULL && error_msg == NULL) {
           // create using canonical path
-          return new ClassPathZipEntry(zip, canonical_path, is_boot_append);
+          return new ClassPathZipEntry(zip, canonical_path, is_boot_append, false);
         }
       }
     }
@@ -956,13 +960,14 @@
 bool ClassLoader::update_class_path_entry_list(const char *path,
                                                bool check_for_duplicates,
                                                bool is_boot_append,
+                                               bool from_class_path_attr,
                                                bool throw_exception) {
   struct stat st;
   if (os::stat(path, &st) == 0) {
     // File or directory found
     ClassPathEntry* new_entry = NULL;
     Thread* THREAD = Thread::current();
-    new_entry = create_class_path_entry(path, &st, throw_exception, is_boot_append, CHECK_(false));
+    new_entry = create_class_path_entry(path, &st, throw_exception, is_boot_append, from_class_path_attr, CHECK_(false));
     if (new_entry == NULL) {
       return false;
     }
--- a/src/hotspot/share/classfile/classLoader.hpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/share/classfile/classLoader.hpp	Mon Jul 01 14:57:02 2019 -0700
@@ -53,6 +53,8 @@
   void set_next(ClassPathEntry* next);
   virtual bool is_modules_image() const = 0;
   virtual bool is_jar_file() const = 0;
+  // Is this entry created from the "Class-path" attribute from a JAR Manifest?
+  virtual bool from_class_path_attr() const = 0;
   virtual const char* name() const = 0;
   virtual JImageFile* jimage() const = 0;
   virtual void close_jimage() = 0;
@@ -73,6 +75,7 @@
  public:
   bool is_modules_image() const { return false; }
   bool is_jar_file() const { return false;  }
+  bool from_class_path_attr() const { return false; }
   const char* name() const { return _dir; }
   JImageFile* jimage() const { return NULL; }
   void close_jimage() {}
@@ -99,13 +102,15 @@
  private:
   jzfile* _zip;              // The zip archive
   const char*   _zip_name;   // Name of zip archive
+  bool _from_class_path_attr; // From the "Class-path" attribute of a jar file
  public:
   bool is_modules_image() const { return false; }
   bool is_jar_file() const { return true;  }
+  bool from_class_path_attr() const { return _from_class_path_attr; }
   const char* name() const { return _zip_name; }
   JImageFile* jimage() const { return NULL; }
   void close_jimage() {}
-  ClassPathZipEntry(jzfile* zip, const char* zip_name, bool is_boot_append);
+  ClassPathZipEntry(jzfile* zip, const char* zip_name, bool is_boot_append, bool from_class_path_attr);
   virtual ~ClassPathZipEntry();
   u1* open_entry(const char* name, jint* filesize, bool nul_terminate, TRAPS);
   ClassFileStream* open_stream(const char* name, TRAPS);
@@ -122,6 +127,7 @@
 public:
   bool is_modules_image() const;
   bool is_jar_file() const { return false; }
+  bool from_class_path_attr() const { return false; }
   bool is_open() const { return _jimage != NULL; }
   const char* name() const { return _name == NULL ? "" : _name; }
   JImageFile* jimage() const { return _jimage; }
@@ -257,7 +263,8 @@
  public:
   static ClassPathEntry* create_class_path_entry(const char *path, const struct stat* st,
                                                  bool throw_exception,
-                                                 bool is_boot_append, TRAPS);
+                                                 bool is_boot_append,
+                                                 bool from_class_path_attr, TRAPS);
 
   // If the package for the fully qualified class name is in the boot
   // loader's package entry table then add_package() sets the classpath_index
@@ -281,6 +288,7 @@
   static bool update_class_path_entry_list(const char *path,
                                            bool check_for_duplicates,
                                            bool is_boot_append,
+                                           bool from_class_path_attr,
                                            bool throw_exception=true);
   CDS_ONLY(static void update_module_path_entry_list(const char *path, TRAPS);)
   static void print_bootclasspath();
--- a/src/hotspot/share/classfile/classLoaderExt.cpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/share/classfile/classLoaderExt.cpp	Mon Jul 01 14:57:02 2019 -0700
@@ -213,7 +213,7 @@
         int n = os::snprintf(libname, libname_len + 1, "%.*s%s", dir_len, dir_name, file_start);
         assert((size_t)n == libname_len, "Unexpected number of characters in string");
         trace_class_path("library = ", libname);
-        ClassLoader::update_class_path_entry_list(libname, true, false);
+        ClassLoader::update_class_path_entry_list(libname, true, false, true /* from_class_path_attr */);
       }
 
       file_start = file_end;
@@ -339,7 +339,7 @@
   }
   ClassPathEntry* new_entry = NULL;
 
-  new_entry = create_class_path_entry(path, &st, false, false, CHECK_NULL);
+  new_entry = create_class_path_entry(path, &st, false, false, false, CHECK_NULL);
   if (new_entry == NULL) {
     return NULL;
   }
--- a/src/hotspot/share/classfile/sharedPathsMiscInfo.cpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/share/classfile/sharedPathsMiscInfo.cpp	Mon Jul 01 14:57:02 2019 -0700
@@ -153,83 +153,10 @@
   return true;
 }
 
-char* skip_first_path_entry(const char* path) {
-  size_t path_sep_len = strlen(os::path_separator());
-  char* p = strstr((char*)path, os::path_separator());
-  if (p != NULL) {
-    debug_only( {
-      size_t image_name_len = strlen(MODULES_IMAGE_NAME);
-      assert(strncmp(p - image_name_len, MODULES_IMAGE_NAME, image_name_len) == 0,
-             "first entry must be the modules image");
-    } );
-    p += path_sep_len;
-  } else {
-    debug_only( {
-      assert(ClassLoader::string_ends_with(path, MODULES_IMAGE_NAME),
-             "first entry must be the modules image");
-    } );
-  }
-  return p;
-}
-
 bool SharedPathsMiscInfo::check(jint type, const char* path, bool is_static) {
   assert(UseSharedSpaces, "runtime only");
   switch (type) {
   case BOOT_PATH:
-    {
-      //
-      // - Archive contains boot classes only - relaxed boot path check:
-      //   Extra path elements appended to the boot path at runtime are allowed.
-      //
-      // - Archive contains application or platform classes - strict boot path check:
-      //   Validate the entire runtime boot path, which must be compactible
-      //   with the dump time boot path. Appending boot path at runtime is not
-      //   allowed.
-      //
-
-      // The first entry in boot path is the modules_image (guaranteed by
-      // ClassLoader::setup_boot_search_path()). Skip the first entry. The
-      // path of the runtime modules_image may be different from the dump
-      // time path (e.g. the JDK image is copied to a different location
-      // after generating the shared archive), which is acceptable. For most
-      // common cases, the dump time boot path might contain modules_image only.
-      char* runtime_boot_path = Arguments::get_sysclasspath();
-      char* rp = skip_first_path_entry(runtime_boot_path);
-      char* dp = skip_first_path_entry(path);
-
-      bool relaxed_check = is_static ?
-                             !FileMapInfo::current_info()->header()->has_platform_or_app_classes() :
-                             !FileMapInfo::dynamic_info()->header()->has_platform_or_app_classes();
-      if (dp == NULL && rp == NULL) {
-        break;   // ok, both runtime and dump time boot paths have modules_images only
-      } else if (dp == NULL && rp != NULL && relaxed_check) {
-        break;   // ok, relaxed check, runtime has extra boot append path entries
-      } else if (dp != NULL && rp != NULL) {
-        size_t num;
-        size_t dp_len = strlen(dp);
-        size_t rp_len = strlen(rp);
-        if (rp_len >= dp_len) {
-          if (relaxed_check) {
-            // only check the leading entries in the runtime boot path, up to
-            // the length of the dump time boot path
-            num = dp_len;
-          } else {
-            // check the full runtime boot path, must match with dump time
-            num = rp_len;
-          }
-
-          if (os::file_name_strncmp(dp, rp, num) == 0) {
-            // make sure it is the end of an entry in the runtime boot path
-            if (rp[dp_len] == '\0' || rp[dp_len] == os::path_separator()[0]) {
-              break; // ok, runtime and dump time paths match
-            }
-          }
-        }
-      }
-
-      // The paths are different
-      return fail("[BOOT classpath mismatch, actual =", runtime_boot_path);
-    }
     break;
   case NON_EXIST:
     {
@@ -242,22 +169,6 @@
     }
     break;
   case APP_PATH:
-    {
-      size_t len = strlen(path);
-      const char *appcp = Arguments::get_appclasspath();
-      assert(appcp != NULL, "NULL app classpath");
-      size_t appcp_len = strlen(appcp);
-      if (appcp_len < len) {
-        return fail("Run time APP classpath is shorter than the one at dump time: ", appcp);
-      }
-      // Prefix is OK: E.g., dump with -cp foo.jar, but run with -cp foo.jar:bar.jar.
-      if (os::file_name_strncmp(path, appcp, len) != 0) {
-        return fail("[APP classpath mismatch, actual: -Djava.class.path=", appcp);
-      }
-      if (appcp[len] != '\0' && appcp[len] != os::path_separator()[0]) {
-        return fail("Dump time APP classpath is not a proper prefix of run time APP classpath: ", appcp);
-      }
-    }
     break;
   default:
     return fail("Corrupted archive file header");
--- a/src/hotspot/share/classfile/stringTable.cpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/share/classfile/stringTable.cpp	Mon Jul 01 14:57:02 2019 -0700
@@ -79,8 +79,7 @@
 
 // --------------------------------------------------------------------------
 
-typedef ConcurrentHashTable<WeakHandle<vm_string_table_data>,
-                            StringTableConfig, mtSymbol> StringTableHash;
+typedef ConcurrentHashTable<StringTableConfig, mtSymbol> StringTableHash;
 static StringTableHash* _local_table = NULL;
 
 volatile bool StringTable::_has_work = false;
@@ -101,11 +100,12 @@
     java_lang_String::hash_code(s, len);
 }
 
-class StringTableConfig : public StringTableHash::BaseConfig {
+class StringTableConfig : public StackObj {
  private:
  public:
-  static uintx get_hash(WeakHandle<vm_string_table_data> const& value,
-                        bool* is_dead) {
+  typedef WeakHandle<vm_string_table_data> Value;
+
+  static uintx get_hash(Value const& value, bool* is_dead) {
     EXCEPTION_MARK;
     oop val_oop = value.peek();
     if (val_oop == NULL) {
@@ -124,15 +124,13 @@
     return 0;
   }
   // We use default allocation/deallocation but counted
-  static void* allocate_node(size_t size,
-                             WeakHandle<vm_string_table_data> const& value) {
+  static void* allocate_node(size_t size, Value const& value) {
     StringTable::item_added();
-    return StringTableHash::BaseConfig::allocate_node(size, value);
+    return AllocateHeap(size, mtSymbol);
   }
-  static void free_node(void* memory,
-                        WeakHandle<vm_string_table_data> const& value) {
+  static void free_node(void* memory, Value const& value) {
     value.release();
-    StringTableHash::BaseConfig::free_node(memory, value);
+    FreeHeap(memory);
     StringTable::item_removed();
   }
 };
--- a/src/hotspot/share/classfile/symbolTable.cpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/share/classfile/symbolTable.cpp	Mon Jul 01 14:57:02 2019 -0700
@@ -77,8 +77,7 @@
 
 // --------------------------------------------------------------------------
 
-typedef ConcurrentHashTable<Symbol*,
-                            SymbolTableConfig, mtSymbol> SymbolTableHash;
+typedef ConcurrentHashTable<SymbolTableConfig, mtSymbol> SymbolTableHash;
 static SymbolTableHash* _local_table = NULL;
 
 volatile bool SymbolTable::_has_work = 0;
@@ -121,10 +120,12 @@
 }
 #endif
 
-class SymbolTableConfig : public SymbolTableHash::BaseConfig {
+class SymbolTableConfig : public AllStatic {
 private:
 public:
-  static uintx get_hash(Symbol* const& value, bool* is_dead) {
+  typedef Symbol* Value;  // value of the Node in the hashtable
+
+  static uintx get_hash(Value const& value, bool* is_dead) {
     *is_dead = (value->refcount() == 0);
     if (*is_dead) {
       return 0;
@@ -133,11 +134,11 @@
     }
   }
   // We use default allocation/deallocation but counted
-  static void* allocate_node(size_t size, Symbol* const& value) {
+  static void* allocate_node(size_t size, Value const& value) {
     SymbolTable::item_added();
-    return SymbolTableHash::BaseConfig::allocate_node(size, value);
+    return AllocateHeap(size, mtSymbol);
   }
-  static void free_node(void* memory, Symbol* const& value) {
+  static void free_node(void* memory, Value const& value) {
     // We get here because #1 some threads lost a race to insert a newly created Symbol
     // or #2 we're cleaning up unused symbol.
     // If #1, then the symbol can be either permanent (refcount==PERM_REFCOUNT),
@@ -150,7 +151,7 @@
       assert(value->refcount() == 0, "expected dead symbol");
     }
     SymbolTable::delete_symbol(value);
-    SymbolTableHash::BaseConfig::free_node(memory, value);
+    FreeHeap(memory);
     SymbolTable::item_removed();
   }
 };
--- a/src/hotspot/share/classfile/systemDictionary.cpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/share/classfile/systemDictionary.cpp	Mon Jul 01 14:57:02 2019 -0700
@@ -2142,7 +2142,7 @@
     // See whether biased locking is enabled and if so set it for this
     // klass.
     // Note that this must be done past the last potential blocking
-    // point / safepoint. We enable biased locking lazily using a
+    // point / safepoint. We might enable biased locking lazily using a
     // VM_Operation to iterate the SystemDictionary and installing the
     // biasable mark word into each InstanceKlass's prototype header.
     // To avoid race conditions where we accidentally miss enabling the
--- a/src/hotspot/share/code/codeCache.cpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/share/code/codeCache.cpp	Mon Jul 01 14:57:02 2019 -0700
@@ -1142,25 +1142,28 @@
 
   // At least one nmethod has been marked for deoptimization
 
-  Deoptimization::deoptimize_all_marked();
+  // All this already happens inside a VM_Operation, so we'll do all the work here.
+  // Stuff copied from VM_Deoptimize and modified slightly.
+
+  // We do not want any GCs to happen while we are in the middle of this VM operation
+  ResourceMark rm;
+  DeoptimizationMarker dm;
+
+  // Deoptimize all activations depending on marked nmethods
+  Deoptimization::deoptimize_dependents();
+
+  // Make the dependent methods not entrant
+  make_marked_nmethods_not_entrant();
 }
 #endif // INCLUDE_JVMTI
 
-// Mark methods for deopt (if safe or possible).
+// Deoptimize all methods
 void CodeCache::mark_all_nmethods_for_deoptimization() {
   MutexLocker mu(CodeCache_lock, Mutex::_no_safepoint_check_flag);
   CompiledMethodIterator iter(CompiledMethodIterator::only_alive_and_not_unloading);
   while(iter.next()) {
     CompiledMethod* nm = iter.method();
-    if (!nm->method()->is_method_handle_intrinsic() &&
-        !nm->is_not_installed() &&
-        nm->is_in_use() &&
-        !nm->is_native_method()) {
-      // Intrinsics and native methods are never deopted. A method that is
-      // not installed yet or is not in use is not safe to deopt; the
-      // is_in_use() check covers the not_entrant and not zombie cases.
-      // Note: A not_entrant method can become a zombie at anytime if it was
-      // made not_entrant before the previous safepoint/handshake.
+    if (!nm->method()->is_method_handle_intrinsic()) {
       nm->mark_for_deoptimization();
     }
   }
@@ -1188,12 +1191,7 @@
   CompiledMethodIterator iter(CompiledMethodIterator::only_alive_and_not_unloading);
   while(iter.next()) {
     CompiledMethod* nm = iter.method();
-    if (nm->is_marked_for_deoptimization() && nm->is_in_use()) {
-      // only_alive_and_not_unloading() can return not_entrant nmethods.
-      // A not_entrant method can become a zombie at anytime if it was
-      // made not_entrant before the previous safepoint/handshake. The
-      // is_in_use() check covers the not_entrant and not zombie cases
-      // that have become true after the method was marked for deopt.
+    if (nm->is_marked_for_deoptimization() && !nm->is_not_entrant()) {
       nm->make_not_entrant();
     }
   }
@@ -1205,12 +1203,17 @@
 
   if (number_of_nmethods_with_dependencies() == 0) return;
 
+  // CodeCache can only be updated by a thread_in_VM and they will all be
+  // stopped during the safepoint so CodeCache will be safe to update without
+  // holding the CodeCache_lock.
+
   KlassDepChange changes(dependee);
 
   // Compute the dependent nmethods
   if (mark_for_deoptimization(changes) > 0) {
     // At least one nmethod has been marked for deoptimization
-    Deoptimization::deoptimize_all_marked();
+    VM_Deoptimize op;
+    VMThread::execute(&op);
   }
 }
 
@@ -1219,9 +1222,26 @@
   // --- Compile_lock is not held. However we are at a safepoint.
   assert_locked_or_safepoint(Compile_lock);
 
+  // CodeCache can only be updated by a thread_in_VM and they will all be
+  // stopped dring the safepoint so CodeCache will be safe to update without
+  // holding the CodeCache_lock.
+
   // Compute the dependent nmethods
   if (mark_for_deoptimization(m_h()) > 0) {
-    Deoptimization::deoptimize_all_marked();
+    // At least one nmethod has been marked for deoptimization
+
+    // All this already happens inside a VM_Operation, so we'll do all the work here.
+    // Stuff copied from VM_Deoptimize and modified slightly.
+
+    // We do not want any GCs to happen while we are in the middle of this VM operation
+    ResourceMark rm;
+    DeoptimizationMarker dm;
+
+    // Deoptimize all activations depending on marked nmethods
+    Deoptimization::deoptimize_dependents();
+
+    // Make the dependent methods not entrant
+    make_marked_nmethods_not_entrant();
   }
 }
 
--- a/src/hotspot/share/code/compiledMethod.hpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/share/code/compiledMethod.hpp	Mon Jul 01 14:57:02 2019 -0700
@@ -214,7 +214,6 @@
   };
 
   virtual bool  is_in_use() const = 0;
-  virtual bool  is_not_installed() const = 0;
   virtual int   comp_level() const = 0;
   virtual int   compile_id() const = 0;
 
--- a/src/hotspot/share/code/dependencyContext.hpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/share/code/dependencyContext.hpp	Mon Jul 01 14:57:02 2019 -0700
@@ -99,15 +99,15 @@
   // Safepoints are forbidden during DC lifetime. GC can invalidate
   // _dependency_context_addr if it relocates the holder
   // (e.g. CallSiteContext Java object).
-  uint64_t _safepoint_counter;
+  SafepointStateTracker _safepoint_tracker;
 
   DependencyContext(nmethodBucket* volatile* bucket_addr, volatile uint64_t* last_cleanup_addr)
     : _dependency_context_addr(bucket_addr),
       _last_cleanup_addr(last_cleanup_addr),
-      _safepoint_counter(SafepointSynchronize::safepoint_counter()) {}
+      _safepoint_tracker(SafepointSynchronize::safepoint_state_tracker()) {}
 
   ~DependencyContext() {
-    assert(SafepointSynchronize::is_same_safepoint(_safepoint_counter), "must be the same safepoint");
+    assert(!_safepoint_tracker.safepoint_state_changed(), "must be the same safepoint");
   }
 #else
   DependencyContext(nmethodBucket* volatile* bucket_addr, volatile uint64_t* last_cleanup_addr)
--- a/src/hotspot/share/code/nmethod.cpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/share/code/nmethod.cpp	Mon Jul 01 14:57:02 2019 -0700
@@ -50,7 +50,6 @@
 #include "oops/oop.inline.hpp"
 #include "prims/jvmtiImpl.hpp"
 #include "runtime/atomic.hpp"
-#include "runtime/deoptimization.hpp"
 #include "runtime/flags/flagSetting.hpp"
 #include "runtime/frame.inline.hpp"
 #include "runtime/handles.inline.hpp"
@@ -1178,7 +1177,11 @@
   // have the Method* live here, in case we unload the nmethod because
   // it is pointing to some oop (other than the Method*) being unloaded.
   if (_method != NULL) {
-    _method->unlink_code(this);
+    // OSR methods point to the Method*, but the Method* does not
+    // point back!
+    if (_method->code() == this) {
+      _method->clear_code(); // Break a cycle
+    }
   }
 
   // Make the class unloaded - i.e., change state and notify sweeper
@@ -1260,9 +1263,16 @@
   }
 }
 
-void nmethod::unlink_from_method() {
-  if (method() != NULL) {
-    method()->unlink_code(this);
+void nmethod::unlink_from_method(bool acquire_lock) {
+  // We need to check if both the _code and _from_compiled_code_entry_point
+  // refer to this nmethod because there is a race in setting these two fields
+  // in Method* as seen in bugid 4947125.
+  // If the vep() points to the zombie nmethod, the memory for the nmethod
+  // could be flushed and the compiler and vtable stubs could still call
+  // through it.
+  if (method() != NULL && (method()->code() == this ||
+                           method()->from_compiled_entry() == verified_entry_point())) {
+    method()->clear_code(acquire_lock);
   }
 }
 
@@ -1289,24 +1299,24 @@
 
   // during patching, depending on the nmethod state we must notify the GC that
   // code has been unloaded, unregistering it. We cannot do this right while
-  // holding the CompiledMethod_lock because we need to use the CodeCache_lock. This
+  // holding the Patching_lock because we need to use the CodeCache_lock. This
   // would be prone to deadlocks.
   // This flag is used to remember whether we need to later lock and unregister.
   bool nmethod_needs_unregister = false;
 
-  // invalidate osr nmethod before acquiring the patching lock since
-  // they both acquire leaf locks and we don't want a deadlock.
-  // This logic is equivalent to the logic below for patching the
-  // verified entry point of regular methods. We check that the
-  // nmethod is in use to ensure that it is invalidated only once.
-  if (is_osr_method() && is_in_use()) {
-    // this effectively makes the osr nmethod not entrant
-    invalidate_osr_method();
-  }
-
   {
+    // invalidate osr nmethod before acquiring the patching lock since
+    // they both acquire leaf locks and we don't want a deadlock.
+    // This logic is equivalent to the logic below for patching the
+    // verified entry point of regular methods. We check that the
+    // nmethod is in use to ensure that it is invalidated only once.
+    if (is_osr_method() && is_in_use()) {
+      // this effectively makes the osr nmethod not entrant
+      invalidate_osr_method();
+    }
+
     // Enter critical section.  Does not block for safepoint.
-    MutexLocker pl(CompiledMethod_lock, Mutex::_no_safepoint_check_flag);
+    MutexLocker pl(Patching_lock, Mutex::_no_safepoint_check_flag);
 
     if (_state == state) {
       // another thread already performed this transition so nothing
@@ -1350,9 +1360,8 @@
     log_state_change();
 
     // Remove nmethod from method.
-    unlink_from_method();
-
-  } // leave critical region under CompiledMethod_lock
+    unlink_from_method(false /* already owns Patching_lock */);
+  } // leave critical region under Patching_lock
 
 #if INCLUDE_JVMCI
   // Invalidate can't occur while holding the Patching lock
--- a/src/hotspot/share/code/nmethod.hpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/share/code/nmethod.hpp	Mon Jul 01 14:57:02 2019 -0700
@@ -119,7 +119,7 @@
   // used by jvmti to track if an unload event has been posted for this nmethod.
   bool _unload_reported;
 
-  // Protected by CompiledMethod_lock
+  // Protected by Patching_lock
   volatile signed char _state;               // {not_installed, in_use, not_entrant, zombie, unloaded}
 
 #ifdef ASSERT
@@ -387,7 +387,7 @@
 
   int   comp_level() const                        { return _comp_level; }
 
-  void unlink_from_method();
+  void unlink_from_method(bool acquire_lock);
 
   // Support for oops in scopes and relocs:
   // Note: index 0 is reserved for null.
--- a/src/hotspot/share/gc/cms/cmsHeap.cpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/share/gc/cms/cmsHeap.cpp	Mon Jul 01 14:57:02 2019 -0700
@@ -62,7 +62,7 @@
   }
 
   size_t used_in_bytes() {
-    return _space->used();
+    return _space->used_stable();
   }
 };
 
--- a/src/hotspot/share/gc/cms/compactibleFreeListSpace.cpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/share/gc/cms/compactibleFreeListSpace.cpp	Mon Jul 01 14:57:02 2019 -0700
@@ -372,6 +372,8 @@
     )
   }
   _dictionary->set_par_lock(&_parDictionaryAllocLock);
+
+  _used_stable = 0;
 }
 
 // Like CompactibleSpace forward() but always calls cross_threshold() to
@@ -577,6 +579,14 @@
   return capacity() - free();
 }
 
+size_t CompactibleFreeListSpace::used_stable() const {
+  return _used_stable;
+}
+
+void CompactibleFreeListSpace::recalculate_used_stable() {
+  _used_stable = used();
+}
+
 size_t CompactibleFreeListSpace::free() const {
   // "MT-safe, but not MT-precise"(TM), if you will: i.e.
   // if you do this while the structures are in flux you
@@ -1374,6 +1384,9 @@
     debug_only(fc->mangleAllocated(size));
   }
 
+  // After allocation, recalculate used space and update used_stable
+  recalculate_used_stable();
+
   return res;
 }
 
--- a/src/hotspot/share/gc/cms/compactibleFreeListSpace.hpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/share/gc/cms/compactibleFreeListSpace.hpp	Mon Jul 01 14:57:02 2019 -0700
@@ -192,6 +192,9 @@
   // Used to keep track of limit of sweep for the space
   HeapWord* _sweep_limit;
 
+  // Stable value of used().
+  size_t _used_stable;
+
   // Used to make the young collector update the mod union table
   MemRegionClosure* _preconsumptionDirtyCardClosure;
 
@@ -412,6 +415,17 @@
   // which overestimates the region by returning the entire
   // committed region (this is safe, but inefficient).
 
+  // Returns monotonically increasing stable used space bytes for CMS.
+  // This is required for jstat and other memory monitoring tools
+  // that might otherwise see inconsistent used space values during a garbage
+  // collection, promotion or allocation into compactibleFreeListSpace.
+  // The value returned by this function might be smaller than the
+  // actual value.
+  size_t used_stable() const;
+  // Recalculate and cache the current stable used() value. Only to be called
+  // in places where we can be sure that the result is stable.
+  void recalculate_used_stable();
+
   // Returns a subregion of the space containing all the objects in
   // the space.
   MemRegion used_region() const {
--- a/src/hotspot/share/gc/cms/concurrentMarkSweepGeneration.cpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/share/gc/cms/concurrentMarkSweepGeneration.cpp	Mon Jul 01 14:57:02 2019 -0700
@@ -692,6 +692,10 @@
   return _cmsSpace->max_alloc_in_words() * HeapWordSize;
 }
 
+size_t ConcurrentMarkSweepGeneration::used_stable() const {
+  return cmsSpace()->used_stable();
+}
+
 size_t ConcurrentMarkSweepGeneration::max_available() const {
   return free() + _virtual_space.uncommitted_size();
 }
@@ -1523,6 +1527,8 @@
   FreelistLocker z(this);
   MetaspaceGC::compute_new_size();
   _cmsGen->compute_new_size_free_list();
+  // recalculate CMS used space after CMS collection
+  _cmsGen->cmsSpace()->recalculate_used_stable();
 }
 
 // A work method used by the foreground collector to do
@@ -2051,6 +2057,7 @@
 
   _capacity_at_prologue = capacity();
   _used_at_prologue = used();
+  _cmsSpace->recalculate_used_stable();
 
   // We enable promotion tracking so that card-scanning can recognize
   // which objects have been promoted during this GC and skip them.
@@ -2123,6 +2130,7 @@
   _eden_chunk_index = 0;
 
   size_t cms_used   = _cmsGen->cmsSpace()->used();
+  _cmsGen->cmsSpace()->recalculate_used_stable();
 
   // update performance counters - this uses a special version of
   // update_counters() that allows the utilization to be passed as a
@@ -2816,6 +2824,8 @@
     rp->enable_discovery();
     _collectorState = Marking;
   }
+
+  _cmsGen->cmsSpace()->recalculate_used_stable();
 }
 
 void CMSCollector::checkpointRootsInitialWork() {
@@ -4177,6 +4187,7 @@
     MutexLocker y(bitMapLock(),
                   Mutex::_no_safepoint_check_flag);
     checkpointRootsFinalWork();
+    _cmsGen->cmsSpace()->recalculate_used_stable();
   }
   verify_work_stacks_empty();
   verify_overflow_empty();
@@ -4250,7 +4261,6 @@
   if (should_unload_classes()) {
     heap->prune_scavengable_nmethods();
   }
-  JvmtiExport::gc_epilogue();
 
   // If we encountered any (marking stack / work queue) overflow
   // events during the current CMS cycle, take appropriate
@@ -5337,9 +5347,14 @@
     // further below.
     {
       CMSTokenSyncWithLocks ts(true, _cmsGen->freelistLock());
+
       // Update heap occupancy information which is used as
       // input to soft ref clearing policy at the next gc.
       Universe::update_heap_info_at_gc();
+
+      // recalculate CMS used space after CMS collection
+      _cmsGen->cmsSpace()->recalculate_used_stable();
+
       _collectorState = Resizing;
     }
   }
@@ -5428,6 +5443,7 @@
     // Gather statistics on the young generation collection.
     collector()->stats().record_gc0_end(used());
   }
+  _cmsSpace->recalculate_used_stable();
 }
 
 void CMSCollector::sweepWork(ConcurrentMarkSweepGeneration* old_gen) {
--- a/src/hotspot/share/gc/cms/concurrentMarkSweepGeneration.hpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/share/gc/cms/concurrentMarkSweepGeneration.hpp	Mon Jul 01 14:57:02 2019 -0700
@@ -1112,6 +1112,7 @@
   double occupancy() const { return ((double)used())/((double)capacity()); }
   size_t contiguous_available() const;
   size_t unsafe_max_alloc_nogc() const;
+  size_t used_stable() const;
 
   // over-rides
   MemRegion used_region_at_save_marks() const;
--- a/src/hotspot/share/gc/cms/gSpaceCounters.hpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/share/gc/cms/gSpaceCounters.hpp	Mon Jul 01 14:57:02 2019 -0700
@@ -59,7 +59,7 @@
   }
 
   inline void update_used() {
-    _used->set_value(_gen->used());
+    _used->set_value(_gen->used_stable());
   }
 
   // special version of update_used() to allow the used value to be
@@ -103,7 +103,7 @@
     GenerationUsedHelper(Generation* g) : _gen(g) { }
 
     inline jlong take_sample() {
-      return _gen->used();
+      return _gen->used_stable();
     }
 };
 
--- a/src/hotspot/share/gc/g1/g1Analytics.cpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/share/gc/g1/g1Analytics.cpp	Mon Jul 01 14:57:02 2019 -0700
@@ -38,7 +38,7 @@
   0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
 };
 
-static double cost_per_card_ms_defaults[] = {
+static double cost_per_log_buffer_entry_ms_defaults[] = {
   0.01, 0.005, 0.005, 0.003, 0.003, 0.002, 0.002, 0.0015
 };
 
@@ -47,7 +47,7 @@
   1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0
 };
 
-static double cost_per_entry_ms_defaults[] = {
+static double young_only_cost_per_remset_card_ms_defaults[] = {
   0.015, 0.01, 0.01, 0.008, 0.008, 0.0055, 0.0055, 0.005
 };
 
@@ -77,12 +77,12 @@
     _alloc_rate_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
     _prev_collection_pause_end_ms(0.0),
     _rs_length_diff_seq(new TruncatedSeq(TruncatedSeqLength)),
-    _cost_per_card_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
+    _cost_per_log_buffer_entry_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
     _cost_scan_hcc_seq(new TruncatedSeq(TruncatedSeqLength)),
     _young_cards_per_entry_ratio_seq(new TruncatedSeq(TruncatedSeqLength)),
     _mixed_cards_per_entry_ratio_seq(new TruncatedSeq(TruncatedSeqLength)),
-    _cost_per_entry_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
-    _mixed_cost_per_entry_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
+    _young_only_cost_per_remset_card_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
+    _mixed_cost_per_remset_card_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
     _cost_per_byte_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
     _constant_other_time_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
     _young_other_cost_per_region_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
@@ -101,10 +101,10 @@
   int index = MIN2(ParallelGCThreads - 1, 7u);
 
   _rs_length_diff_seq->add(rs_length_diff_defaults[index]);
-  _cost_per_card_ms_seq->add(cost_per_card_ms_defaults[index]);
+  _cost_per_log_buffer_entry_ms_seq->add(cost_per_log_buffer_entry_ms_defaults[index]);
   _cost_scan_hcc_seq->add(0.0);
   _young_cards_per_entry_ratio_seq->add(young_cards_per_entry_ratio_defaults[index]);
-  _cost_per_entry_ms_seq->add(cost_per_entry_ms_defaults[index]);
+  _young_only_cost_per_remset_card_ms_seq->add(young_only_cost_per_remset_card_ms_defaults[index]);
   _cost_per_byte_ms_seq->add(cost_per_byte_ms_defaults[index]);
   _constant_other_time_ms_seq->add(constant_other_time_ms_defaults[index]);
   _young_other_cost_per_region_ms_seq->add(young_other_cost_per_region_ms_defaults[index]);
@@ -158,19 +158,19 @@
     (pause_time_ms * _recent_prev_end_times_for_all_gcs_sec->num()) / interval_ms;
 }
 
-void G1Analytics::report_cost_per_card_ms(double cost_per_card_ms) {
-  _cost_per_card_ms_seq->add(cost_per_card_ms);
+void G1Analytics::report_cost_per_log_buffer_entry_ms(double cost_per_log_buffer_entry_ms) {
+  _cost_per_log_buffer_entry_ms_seq->add(cost_per_log_buffer_entry_ms);
 }
 
 void G1Analytics::report_cost_scan_hcc(double cost_scan_hcc) {
   _cost_scan_hcc_seq->add(cost_scan_hcc);
 }
 
-void G1Analytics::report_cost_per_entry_ms(double cost_per_entry_ms, bool for_young_gc) {
+void G1Analytics::report_cost_per_remset_card_ms(double cost_per_remset_card_ms, bool for_young_gc) {
   if (for_young_gc) {
-    _cost_per_entry_ms_seq->add(cost_per_entry_ms);
+    _young_only_cost_per_remset_card_ms_seq->add(cost_per_remset_card_ms);
   } else {
-    _mixed_cost_per_entry_ms_seq->add(cost_per_entry_ms);
+    _mixed_cost_per_remset_card_ms_seq->add(cost_per_remset_card_ms);
   }
 }
 
@@ -222,8 +222,8 @@
   return get_new_prediction(_alloc_rate_ms_seq);
 }
 
-double G1Analytics::predict_cost_per_card_ms() const {
-  return get_new_prediction(_cost_per_card_ms_seq);
+double G1Analytics::predict_cost_per_log_buffer_entry_ms() const {
+  return get_new_prediction(_cost_per_log_buffer_entry_ms_seq);
 }
 
 double G1Analytics::predict_scan_hcc_ms() const {
@@ -231,7 +231,7 @@
 }
 
 double G1Analytics::predict_rs_update_time_ms(size_t pending_cards) const {
-  return pending_cards * predict_cost_per_card_ms() + predict_scan_hcc_ms();
+  return pending_cards * predict_cost_per_log_buffer_entry_ms() + predict_scan_hcc_ms();
 }
 
 double G1Analytics::predict_young_cards_per_entry_ratio() const {
@@ -256,17 +256,17 @@
 
 double G1Analytics::predict_rs_scan_time_ms(size_t card_num, bool for_young_gc) const {
   if (for_young_gc) {
-    return card_num * get_new_prediction(_cost_per_entry_ms_seq);
+    return card_num * get_new_prediction(_young_only_cost_per_remset_card_ms_seq);
   } else {
     return predict_mixed_rs_scan_time_ms(card_num);
   }
 }
 
 double G1Analytics::predict_mixed_rs_scan_time_ms(size_t card_num) const {
-  if (_mixed_cost_per_entry_ms_seq->num() < 3) {
-    return card_num * get_new_prediction(_cost_per_entry_ms_seq);
+  if (_mixed_cost_per_remset_card_ms_seq->num() < 3) {
+    return card_num * get_new_prediction(_young_only_cost_per_remset_card_ms_seq);
   } else {
-    return card_num * get_new_prediction(_mixed_cost_per_entry_ms_seq);
+    return card_num * get_new_prediction(_mixed_cost_per_remset_card_ms_seq);
   }
 }
 
--- a/src/hotspot/share/gc/g1/g1Analytics.hpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/share/gc/g1/g1Analytics.hpp	Mon Jul 01 14:57:02 2019 -0700
@@ -46,12 +46,12 @@
   double        _prev_collection_pause_end_ms;
 
   TruncatedSeq* _rs_length_diff_seq;
-  TruncatedSeq* _cost_per_card_ms_seq;
+  TruncatedSeq* _cost_per_log_buffer_entry_ms_seq;
   TruncatedSeq* _cost_scan_hcc_seq;
   TruncatedSeq* _young_cards_per_entry_ratio_seq;
   TruncatedSeq* _mixed_cards_per_entry_ratio_seq;
-  TruncatedSeq* _cost_per_entry_ms_seq;
-  TruncatedSeq* _mixed_cost_per_entry_ms_seq;
+  TruncatedSeq* _young_only_cost_per_remset_card_ms_seq;
+  TruncatedSeq* _mixed_cost_per_remset_card_ms_seq;
   TruncatedSeq* _cost_per_byte_ms_seq;
   TruncatedSeq* _constant_other_time_ms_seq;
   TruncatedSeq* _young_other_cost_per_region_ms_seq;
@@ -99,9 +99,9 @@
   void report_concurrent_mark_remark_times_ms(double ms);
   void report_concurrent_mark_cleanup_times_ms(double ms);
   void report_alloc_rate_ms(double alloc_rate);
-  void report_cost_per_card_ms(double cost_per_card_ms);
+  void report_cost_per_log_buffer_entry_ms(double cost_per_log_buffer_entry_ms);
   void report_cost_scan_hcc(double cost_scan_hcc);
-  void report_cost_per_entry_ms(double cost_per_entry_ms, bool for_young_gc);
+  void report_cost_per_remset_card_ms(double cost_per_remset_card_ms, bool for_young_gc);
   void report_cards_per_entry_ratio(double cards_per_entry_ratio, bool for_young_gc);
   void report_rs_length_diff(double rs_length_diff);
   void report_cost_per_byte_ms(double cost_per_byte_ms, bool mark_or_rebuild_in_progress);
@@ -116,7 +116,7 @@
   double predict_alloc_rate_ms() const;
   int num_alloc_rate_ms() const;
 
-  double predict_cost_per_card_ms() const;
+  double predict_cost_per_log_buffer_entry_ms() const;
 
   double predict_scan_hcc_ms() const;
 
--- a/src/hotspot/share/gc/g1/g1CardTable.cpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/share/gc/g1/g1CardTable.cpp	Mon Jul 01 14:57:02 2019 -0700
@@ -30,28 +30,6 @@
 #include "runtime/atomic.hpp"
 #include "runtime/orderAccess.hpp"
 
-bool G1CardTable::mark_card_deferred(size_t card_index) {
-  CardValue val = _byte_map[card_index];
-  // It's already processed
-  if ((val & (clean_card_mask_val() | deferred_card_val())) == deferred_card_val()) {
-    return false;
-  }
-
-  // Cached bit can be installed either on a clean card or on a claimed card.
-  CardValue new_val = val;
-  if (val == clean_card_val()) {
-    new_val = deferred_card_val();
-  } else {
-    if (val & claimed_card_val()) {
-      new_val = val | deferred_card_val();
-    }
-  }
-  if (new_val != val) {
-    Atomic::cmpxchg(new_val, &_byte_map[card_index], val);
-  }
-  return true;
-}
-
 void G1CardTable::g1_mark_as_young(const MemRegion& mr) {
   CardValue *const first = byte_for(mr.start());
   CardValue *const last = byte_after(mr.last());
--- a/src/hotspot/share/gc/g1/g1CardTable.hpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/share/gc/g1/g1CardTable.hpp	Mon Jul 01 14:57:02 2019 -0700
@@ -44,55 +44,65 @@
   virtual void on_commit(uint start_idx, size_t num_regions, bool zero_filled);
 };
 
-class G1CardTable: public CardTable {
+class G1CardTable : public CardTable {
   friend class VMStructs;
   friend class G1CardTableChangedListener;
 
   G1CardTableChangedListener _listener;
 
+public:
   enum G1CardValues {
-    g1_young_gen = CT_MR_BS_last_reserved << 1
+    g1_young_gen = CT_MR_BS_last_reserved << 1,
+
+    // During evacuation we use the card table to consolidate the cards we need to
+    // scan for roots onto the card table from the various sources. Further it is
+    // used to record already completely scanned cards to avoid re-scanning them
+    // when incrementally evacuating the old gen regions of a collection set.
+    // This means that already scanned cards should be preserved.
+    //
+    // The merge at the start of each evacuation round simply sets cards to dirty
+    // that are clean; scanned cards are set to 0x1.
+    //
+    // This means that the LSB determines what to do with the card during evacuation
+    // given the following possible values:
+    //
+    // 11111111 - clean, do not scan
+    // 00000001 - already scanned, do not scan
+    // 00000000 - dirty, needs to be scanned.
+    //
+    g1_card_already_scanned = 0x1
   };
 
-public:
+  static const size_t WordAllClean = SIZE_MAX;
+  static const size_t WordAllDirty = 0;
+
+  STATIC_ASSERT(BitsPerByte == 8);
+  static const size_t WordAlreadyScanned = (SIZE_MAX / 255) * g1_card_already_scanned;
+
   G1CardTable(MemRegion whole_heap): CardTable(whole_heap, /* scanned concurrently */ true), _listener() {
     _listener.set_card_table(this);
   }
-  bool is_card_dirty(size_t card_index) {
-    return _byte_map[card_index] == dirty_card_val();
-  }
 
   static CardValue g1_young_card_val() { return g1_young_gen; }
 
-/*
-   Claimed and deferred bits are used together in G1 during the evacuation
-   pause. These bits can have the following state transitions:
-   1. The claimed bit can be put over any other card state. Except that
-      the "dirty -> dirty and claimed" transition is checked for in
-      G1 code and is not used.
-   2. Deferred bit can be set only if the previous state of the card
-      was either clean or claimed. mark_card_deferred() is wait-free.
-      We do not care if the operation is be successful because if
-      it does not it will only result in duplicate entry in the update
-      buffer because of the "cache-miss". So it's not worth spinning.
- */
-
-  bool is_card_claimed(size_t card_index) {
-    CardValue val = _byte_map[card_index];
-    return (val & (clean_card_mask_val() | claimed_card_val())) == claimed_card_val();
-  }
-
-  inline void set_card_claimed(size_t card_index);
-
   void verify_g1_young_region(MemRegion mr) PRODUCT_RETURN;
   void g1_mark_as_young(const MemRegion& mr);
 
-  bool mark_card_deferred(size_t card_index);
+  size_t index_for_cardvalue(CardValue const* p) const {
+    return pointer_delta(p, _byte_map, sizeof(CardValue));
+  }
+
+  // Mark the given card as Dirty if it is Clean.
+  inline void mark_clean_as_dirty(size_t card_index);
 
-  bool is_card_deferred(size_t card_index) {
-    CardValue val = _byte_map[card_index];
-    return (val & (clean_card_mask_val() | deferred_card_val())) == deferred_card_val();
-  }
+  // Change Clean cards in a (large) area on the card table as Dirty, preserving
+  // already scanned cards. Assumes that most cards in that area are Clean.
+  inline void mark_region_dirty(size_t start_card_index, size_t num_cards);
+
+  // Mark the given range of cards as Scanned. All of these cards must be Dirty.
+  inline void mark_as_scanned(size_t start_card_index, size_t num_cards);
+
+  inline uint region_idx_for(CardValue* p);
 
   static size_t compute_size(size_t mem_region_size_in_words) {
     size_t number_of_slots = (mem_region_size_in_words / card_size_in_words);
--- a/src/hotspot/share/gc/g1/g1CardTable.inline.hpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/share/gc/g1/g1CardTable.inline.hpp	Mon Jul 01 14:57:02 2019 -0700
@@ -26,15 +26,58 @@
 #define SHARE_GC_G1_G1CARDTABLE_INLINE_HPP
 
 #include "gc/g1/g1CardTable.hpp"
+#include "gc/g1/heapRegion.hpp"
 
-void G1CardTable::set_card_claimed(size_t card_index) {
-  jbyte val = _byte_map[card_index];
-  if (val == clean_card_val()) {
-    val = (jbyte)claimed_card_val();
-  } else {
-    val |= (jbyte)claimed_card_val();
+inline uint G1CardTable::region_idx_for(CardValue* p) {
+  size_t const card_idx = pointer_delta(p, _byte_map, sizeof(CardValue));
+  return (uint)(card_idx >> (HeapRegion::LogOfHRGrainBytes - card_shift));
+}
+
+inline void G1CardTable::mark_clean_as_dirty(size_t card_index) {
+  CardValue value = _byte_map[card_index];
+  if (value == clean_card_val()) {
+    _byte_map[card_index] = dirty_card_val();
   }
-  _byte_map[card_index] = val;
 }
 
-#endif // SHARE_GC_G1_G1CARDTABLE_INLINE_HPP
+inline void G1CardTable::mark_region_dirty(size_t start_card_index, size_t num_cards) {
+  assert(is_aligned(start_card_index, sizeof(size_t)), "Start card index must be aligned.");
+  assert(is_aligned(num_cards, sizeof(size_t)), "Number of cards to change must be evenly divisible.");
+
+  size_t const num_chunks = num_cards / sizeof(size_t);
+
+  size_t* cur_word = (size_t*)&_byte_map[start_card_index];
+  size_t* const end_word_map = cur_word + num_chunks;
+  while (cur_word < end_word_map) {
+    size_t value = *cur_word;
+    if (value == WordAllClean) {
+      *cur_word = WordAllDirty;
+    } else if (value == WordAllDirty) {
+      // do nothing.
+    } else {
+      // There is a mix of cards in there. Tread slowly.
+      CardValue* cur = (CardValue*)cur_word;
+      for (size_t i = 0; i < sizeof(size_t); i++) {
+        CardValue value = *cur;
+        if (value == clean_card_val()) {
+          *cur = dirty_card_val();
+        }
+        cur++;
+      }
+    }
+    cur_word++;
+  }
+}
+
+inline void G1CardTable::mark_as_scanned(size_t start_card_index, size_t num_cards) {
+  CardValue* start = &_byte_map[start_card_index];
+  CardValue* const end = start + num_cards;
+  while (start < end) {
+    CardValue value = *start;
+    assert(value == dirty_card_val(),
+           "Must have been dirty %d start " PTR_FORMAT " " PTR_FORMAT, value, p2i(start), p2i(end));
+    *start++ = g1_card_already_scanned;
+  }
+}
+
+#endif /* SHARE_GC_G1_G1CARDTABLE_INLINE_HPP */
--- a/src/hotspot/share/gc/g1/g1CollectedHeap.cpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/share/gc/g1/g1CollectedHeap.cpp	Mon Jul 01 14:57:02 2019 -0700
@@ -1677,7 +1677,6 @@
   _card_table = ct;
 
   G1BarrierSet::satb_mark_queue_set().initialize(this,
-                                                 SATB_Q_CBL_mon,
                                                  &bs->satb_mark_queue_buffer_allocator(),
                                                  G1SATBProcessCompletedThreshold,
                                                  G1SATBBufferEnqueueingThresholdPercent);
@@ -1955,7 +1954,7 @@
     n_completed_buffers++;
   }
   assert(dcqs.completed_buffers_num() == 0, "Completed buffers exist!");
-  phase_times()->record_thread_work_item(G1GCPhaseTimes::UpdateRS, worker_i, n_completed_buffers, G1GCPhaseTimes::UpdateRSProcessedBuffers);
+  phase_times()->record_thread_work_item(G1GCPhaseTimes::MergeLB, worker_i, n_completed_buffers, G1GCPhaseTimes::MergeLBProcessedBuffers);
 }
 
 // Computes the sum of the storage used by the various regions.
@@ -2239,8 +2238,8 @@
   _collection_set.iterate(cl);
 }
 
-void G1CollectedHeap::collection_set_iterate_increment_from(HeapRegionClosure *cl, uint worker_id) {
-  _collection_set.iterate_incremental_part_from(cl, worker_id, workers()->active_workers());
+void G1CollectedHeap::collection_set_iterate_increment_from(HeapRegionClosure *cl, HeapRegionClaimer* hr_claimer, uint worker_id) {
+  _collection_set.iterate_incremental_part_from(cl, hr_claimer, worker_id, workers()->active_workers());
 }
 
 HeapWord* G1CollectedHeap::block_start(const void* addr) const {
@@ -2631,8 +2630,6 @@
   size_t _total_humongous;
   size_t _candidate_humongous;
 
-  G1DirtyCardQueue _dcq;
-
   bool humongous_region_is_candidate(G1CollectedHeap* g1h, HeapRegion* region) const {
     assert(region->is_starts_humongous(), "Must start a humongous object");
 
@@ -2692,8 +2689,7 @@
  public:
   RegisterRegionsWithRegionAttrTableClosure()
   : _total_humongous(0),
-    _candidate_humongous(0),
-    _dcq(&G1BarrierSet::dirty_card_queue_set()) {
+    _candidate_humongous(0) {
   }
 
   virtual bool do_heap_region(HeapRegion* r) {
@@ -2708,49 +2704,9 @@
     uint rindex = r->hrm_index();
     g1h->set_humongous_reclaim_candidate(rindex, is_candidate);
     if (is_candidate) {
+      g1h->register_humongous_region_with_region_attr(rindex);
       _candidate_humongous++;
-      g1h->register_humongous_region_with_region_attr(rindex);
-      // Is_candidate already filters out humongous object with large remembered sets.
-      // If we have a humongous object with a few remembered sets, we simply flush these
-      // remembered set entries into the DCQS. That will result in automatic
-      // re-evaluation of their remembered set entries during the following evacuation
-      // phase.
-      if (!r->rem_set()->is_empty()) {
-        guarantee(r->rem_set()->occupancy_less_or_equal_than(G1RSetSparseRegionEntries),
-                  "Found a not-small remembered set here. This is inconsistent with previous assumptions.");
-        G1CardTable* ct = g1h->card_table();
-        HeapRegionRemSetIterator hrrs(r->rem_set());
-        size_t card_index;
-        while (hrrs.has_next(card_index)) {
-          CardTable::CardValue* card_ptr = ct->byte_for_index(card_index);
-          // The remembered set might contain references to already freed
-          // regions. Filter out such entries to avoid failing card table
-          // verification.
-          if (g1h->is_in(ct->addr_for(card_ptr))) {
-            if (*card_ptr != G1CardTable::dirty_card_val()) {
-              *card_ptr = G1CardTable::dirty_card_val();
-              _dcq.enqueue(card_ptr);
-            }
-          }
-        }
-        assert(hrrs.n_yielded() == r->rem_set()->occupied(),
-               "Remembered set hash maps out of sync, cur: " SIZE_FORMAT " entries, next: " SIZE_FORMAT " entries",
-               hrrs.n_yielded(), r->rem_set()->occupied());
-        // We should only clear the card based remembered set here as we will not
-        // implicitly rebuild anything else during eager reclaim. Note that at the moment
-        // (and probably never) we do not enter this path if there are other kind of
-        // remembered sets for this region.
-        r->rem_set()->clear_locked(true /* only_cardset */);
-        // Clear_locked() above sets the state to Empty. However we want to continue
-        // collecting remembered set entries for humongous regions that were not
-        // reclaimed.
-        r->rem_set()->set_state_complete();
-#ifdef ASSERT
-        G1HeapRegionAttr region_attr = g1h->region_attr(oop(r->bottom()));
-        assert(region_attr.needs_remset_update(), "must be");
-#endif
-      }
-      assert(r->rem_set()->is_empty(), "At this point any humongous candidate remembered set must be empty.");
+      // We will later handle the remembered sets of these regions.
     } else {
       g1h->register_region_with_region_attr(r);
     }
@@ -2761,8 +2717,6 @@
 
   size_t total_humongous() const { return _total_humongous; }
   size_t candidate_humongous() const { return _candidate_humongous; }
-
-  void flush_rem_set_entries() { _dcq.flush(); }
 };
 
 void G1CollectedHeap::register_regions_with_region_attr() {
@@ -2775,9 +2729,6 @@
                                          cl.total_humongous(),
                                          cl.candidate_humongous());
   _has_humongous_reclaim_candidates = cl.candidate_humongous() > 0;
-
-  // Finally flush all remembered set entries to re-check into the global DCQS.
-  cl.flush_rem_set_entries();
 }
 
 #ifndef PRODUCT
@@ -3072,7 +3023,7 @@
                                                   workers()->active_workers(),
                                                   collection_set()->young_region_length(),
                                                   collection_set()->optional_region_length());
-        pre_evacuate_collection_set(evacuation_info);
+        pre_evacuate_collection_set(evacuation_info, &per_thread_states);
 
         // Actually do the work...
         evacuate_initial_collection_set(&per_thread_states);
@@ -3105,9 +3056,7 @@
 
         double sample_end_time_sec = os::elapsedTime();
         double pause_time_ms = (sample_end_time_sec - sample_start_time_sec) * MILLIUNITS;
-        size_t total_cards_scanned = phase_times()->sum_thread_work_items(G1GCPhaseTimes::ScanRS, G1GCPhaseTimes::ScanRSScannedCards) +
-                                     phase_times()->sum_thread_work_items(G1GCPhaseTimes::OptScanRS, G1GCPhaseTimes::ScanRSScannedCards);
-        policy()->record_collection_pause_end(pause_time_ms, total_cards_scanned, heap_used_bytes_before_gc);
+        policy()->record_collection_pause_end(pause_time_ms, heap_used_bytes_before_gc);
       }
 
       verify_after_young_collection(verify_type);
@@ -3581,7 +3530,7 @@
   phase_times()->record_merge_pss_time_ms((os::elapsedTime() - merge_pss_time_start) * 1000.0);
 }
 
-void G1CollectedHeap::pre_evacuate_collection_set(G1EvacuationInfo& evacuation_info) {
+void G1CollectedHeap::pre_evacuate_collection_set(G1EvacuationInfo& evacuation_info, G1ParScanThreadStateSet* per_thread_states) {
   _expand_heap_after_alloc_failure = true;
   _evacuation_failed = false;
 
@@ -3592,10 +3541,15 @@
   // Initialize the GC alloc regions.
   _allocator->init_gc_alloc_regions(evacuation_info);
 
+  {
+    Ticks start = Ticks::now();
+    rem_set()->prepare_for_scan_heap_roots();
+    phase_times()->record_prepare_heap_roots_time_ms((Ticks::now() - start).seconds() * 1000.0);
+  }
+
   register_regions_with_region_attr();
   assert(_verifier->check_region_attr_table(), "Inconsistency in the region attributes table.");
 
-  rem_set()->prepare_for_scan_rem_set();
   _preserved_marks_set.assert_empty();
 
 #if COMPILER2_OR_JVMCI
@@ -3697,8 +3651,8 @@
 
   void scan_roots(G1ParScanThreadState* pss, uint worker_id) {
     _root_processor->evacuate_roots(pss, worker_id);
-    _g1h->rem_set()->update_rem_set(pss, worker_id);
-    _g1h->rem_set()->scan_rem_set(pss, worker_id, G1GCPhaseTimes::ScanRS, G1GCPhaseTimes::ObjCopy, G1GCPhaseTimes::CodeRoots);
+    _g1h->rem_set()->scan_heap_roots(pss, worker_id, G1GCPhaseTimes::ScanHR, G1GCPhaseTimes::ObjCopy);
+    _g1h->rem_set()->scan_collection_set_regions(pss, worker_id, G1GCPhaseTimes::ScanHR, G1GCPhaseTimes::CodeRoots, G1GCPhaseTimes::ObjCopy);
   }
 
   void evacuate_live_objects(G1ParScanThreadState* pss, uint worker_id) {
@@ -3725,6 +3679,14 @@
 };
 
 void G1CollectedHeap::evacuate_initial_collection_set(G1ParScanThreadStateSet* per_thread_states) {
+  G1GCPhaseTimes* p = phase_times();
+
+  {
+    Ticks start = Ticks::now();
+    rem_set()->merge_heap_roots(false /* remset_only */, G1GCPhaseTimes::MergeRS);
+    p->record_merge_heap_roots_time((Ticks::now() - start).seconds() * 1000.0);
+  }
+
   Tickspan task_time;
   const uint num_workers = workers()->active_workers();
 
@@ -3739,7 +3701,6 @@
   }
   Tickspan total_processing = Ticks::now() - start_processing;
 
-  G1GCPhaseTimes* p = phase_times();
   p->record_initial_evac_time(task_time.seconds() * 1000.0);
   p->record_or_add_code_root_fixup_time((total_processing - task_time).seconds() * 1000.0);
 }
@@ -3747,7 +3708,8 @@
 class G1EvacuateOptionalRegionsTask : public G1EvacuateRegionsBaseTask {
 
   void scan_roots(G1ParScanThreadState* pss, uint worker_id) {
-    _g1h->rem_set()->scan_rem_set(pss, worker_id, G1GCPhaseTimes::OptScanRS, G1GCPhaseTimes::OptObjCopy, G1GCPhaseTimes::OptCodeRoots);
+    _g1h->rem_set()->scan_heap_roots(pss, worker_id, G1GCPhaseTimes::OptScanHR, G1GCPhaseTimes::OptObjCopy);
+    _g1h->rem_set()->scan_collection_set_regions(pss, worker_id, G1GCPhaseTimes::OptScanHR, G1GCPhaseTimes::OptCodeRoots, G1GCPhaseTimes::OptObjCopy);
   }
 
   void evacuate_live_objects(G1ParScanThreadState* pss, uint worker_id) {
@@ -3783,8 +3745,6 @@
 void G1CollectedHeap::evacuate_optional_collection_set(G1ParScanThreadStateSet* per_thread_states) {
   const double gc_start_time_ms = phase_times()->cur_collection_start_sec() * 1000.0;
 
-  Ticks start = Ticks::now();
-
   while (!evacuation_failed() && _collection_set.optional_region_length() > 0) {
 
     double time_used_ms = os::elapsedTime() * 1000.0 - gc_start_time_ms;
@@ -3797,18 +3757,24 @@
       break;
     }
 
-    evacuate_next_optional_regions(per_thread_states);
+    {
+      Ticks start = Ticks::now();
+      rem_set()->merge_heap_roots(true /* remset_only */, G1GCPhaseTimes::OptMergeRS);
+      phase_times()->record_or_add_optional_merge_heap_roots_time((Ticks::now() - start).seconds() * 1000.0);
+    }
+
+    {
+      Ticks start = Ticks::now();
+      evacuate_next_optional_regions(per_thread_states);
+      phase_times()->record_or_add_optional_evac_time((Ticks::now() - start).seconds() * 1000.0);
+    }
   }
 
   _collection_set.abandon_optional_collection_set(per_thread_states);
-
-  phase_times()->record_or_add_optional_evac_time((Ticks::now() - start).seconds() * 1000.0);
 }
 
 void G1CollectedHeap::post_evacuate_collection_set(G1EvacuationInfo& evacuation_info, G1ParScanThreadStateSet* per_thread_states) {
-  // Also cleans the card table from temporary duplicate detection information used
-  // during UpdateRS/ScanRS.
-  rem_set()->cleanup_after_scan_rem_set();
+  rem_set()->cleanup_after_scan_heap_roots();
 
   // Process any discovered reference objects - we have
   // to do this _before_ we retire the GC alloc regions
--- a/src/hotspot/share/gc/g1/g1CollectedHeap.hpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/share/gc/g1/g1CollectedHeap.hpp	Mon Jul 01 14:57:02 2019 -0700
@@ -78,7 +78,6 @@
 class G1HotCardCache;
 class G1RemSet;
 class G1YoungRemSetSamplingThread;
-class HeapRegionRemSetIterator;
 class G1ConcurrentMark;
 class G1ConcurrentMarkThread;
 class G1ConcurrentRefine;
@@ -757,7 +756,7 @@
   void evacuate_next_optional_regions(G1ParScanThreadStateSet* per_thread_states);
 
 public:
-  void pre_evacuate_collection_set(G1EvacuationInfo& evacuation_info);
+  void pre_evacuate_collection_set(G1EvacuationInfo& evacuation_info, G1ParScanThreadStateSet* pss);
   void post_evacuate_collection_set(G1EvacuationInfo& evacuation_info, G1ParScanThreadStateSet* pss);
 
   void expand_heap_after_young_collection();
@@ -1115,7 +1114,8 @@
 
  public:
 
-  inline G1HeapRegionAttr region_attr(const void* obj);
+  inline G1HeapRegionAttr region_attr(const void* obj) const;
+  inline G1HeapRegionAttr region_attr(uint idx) const;
 
   // Return "TRUE" iff the given object address is in the reserved
   // region of g1.
@@ -1182,7 +1182,12 @@
   // Starts the iteration so that the start regions of a given worker id over the
   // set active_workers are evenly spread across the set of collection set regions
   // to be iterated.
-  void collection_set_iterate_increment_from(HeapRegionClosure *blk, uint worker_id);
+  // The variant with the HeapRegionClaimer guarantees that the closure will be
+  // applied to a particular region exactly once.
+  void collection_set_iterate_increment_from(HeapRegionClosure *blk, uint worker_id) {
+    collection_set_iterate_increment_from(blk, NULL, worker_id);
+  }
+  void collection_set_iterate_increment_from(HeapRegionClosure *blk, HeapRegionClaimer* hr_claimer, uint worker_id);
 
   // Returns the HeapRegion that contains addr. addr must not be NULL.
   template <class T>
--- a/src/hotspot/share/gc/g1/g1CollectedHeap.inline.hpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/share/gc/g1/g1CollectedHeap.inline.hpp	Mon Jul 01 14:57:02 2019 -0700
@@ -163,10 +163,14 @@
   return _region_attr.is_in_cset_or_humongous((HeapWord*)obj);
 }
 
-G1HeapRegionAttr G1CollectedHeap::region_attr(const void* addr) {
+G1HeapRegionAttr G1CollectedHeap::region_attr(const void* addr) const {
   return _region_attr.at((HeapWord*)addr);
 }
 
+G1HeapRegionAttr G1CollectedHeap::region_attr(uint idx) const {
+  return _region_attr.get_by_index(idx);
+}
+
 void G1CollectedHeap::register_humongous_region_with_region_attr(uint index) {
   _region_attr.set_humongous(index, region_at(index)->rem_set()->is_tracked());
 }
@@ -177,7 +181,7 @@
 
 void G1CollectedHeap::register_old_region_with_region_attr(HeapRegion* r) {
   _region_attr.set_in_old(r->hrm_index(), r->rem_set()->is_tracked());
-  _rem_set->prepare_for_scan_rem_set(r->hrm_index());
+  _rem_set->prepare_for_scan_heap_roots(r->hrm_index());
 }
 
 void G1CollectedHeap::register_optional_region_with_region_attr(HeapRegion* r) {
--- a/src/hotspot/share/gc/g1/g1CollectionSet.cpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/share/gc/g1/g1CollectionSet.cpp	Mon Jul 01 14:57:02 2019 -0700
@@ -217,10 +217,13 @@
   }
 }
 
-void G1CollectionSet::iterate_incremental_part_from(HeapRegionClosure* cl, uint worker_id, uint total_workers) const {
+void G1CollectionSet::iterate_incremental_part_from(HeapRegionClosure* cl,
+                                                    HeapRegionClaimer* hr_claimer,
+                                                    uint worker_id,
+                                                    uint total_workers) const {
   assert_at_safepoint();
 
-  size_t len = _collection_set_cur_length - _inc_part_start;
+  size_t len = increment_length();
   if (len == 0) {
     return;
   }
@@ -229,9 +232,12 @@
   size_t cur_pos = start_pos;
 
   do {
-    HeapRegion* r = _g1h->region_at(_collection_set_regions[cur_pos + _inc_part_start]);
-    bool result = cl->do_heap_region(r);
-    guarantee(!result, "Must not cancel iteration");
+    uint region_idx = _collection_set_regions[cur_pos + _inc_part_start];
+    if (hr_claimer == NULL || hr_claimer->claim_region(region_idx)) {
+      HeapRegion* r = _g1h->region_at(region_idx);
+      bool result = cl->do_heap_region(r);
+      guarantee(!result, "Must not cancel iteration");
+    }
 
     cur_pos++;
     if (cur_pos == len) {
--- a/src/hotspot/share/gc/g1/g1CollectionSet.hpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/share/gc/g1/g1CollectionSet.hpp	Mon Jul 01 14:57:02 2019 -0700
@@ -36,6 +36,7 @@
 class G1Policy;
 class G1SurvivorRegions;
 class HeapRegion;
+class HeapRegionClaimer;
 class HeapRegionClosure;
 
 // The collection set.
@@ -279,7 +280,12 @@
 
   // Iterate over the current collection set increment applying the given HeapRegionClosure
   // from a starting position determined by the given worker id.
-  void iterate_incremental_part_from(HeapRegionClosure* cl, uint worker_id, uint total_workers) const;
+  void iterate_incremental_part_from(HeapRegionClosure* cl, HeapRegionClaimer* hr_claimer, uint worker_id, uint total_workers) const;
+
+  // Returns the length of the current increment in number of regions.
+  size_t increment_length() const { return _collection_set_cur_length - _inc_part_start; }
+  // Returns the length of the whole current collection set in number of regions
+  size_t cur_length() const { return _collection_set_cur_length; }
 
   // Iterate over the entire collection set (all increments calculated so far), applying
   // the given HeapRegionClosure on all of them.
--- a/src/hotspot/share/gc/g1/g1ConcurrentMark.cpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/share/gc/g1/g1ConcurrentMark.cpp	Mon Jul 01 14:57:02 2019 -0700
@@ -2419,12 +2419,13 @@
     abort_marking_if_regular_check_fail();
   }
 
+  // Can't assert qset is empty here, even if not aborted.  If concurrent,
+  // some other thread might be adding to the queue.  If not concurrent,
+  // some other thread might have won the race for the last buffer, but
+  // has not yet decremented the count.
+
   _draining_satb_buffers = false;
 
-  assert(has_aborted() ||
-         _cm->concurrent() ||
-         satb_mq_set.completed_buffers_num() == 0, "invariant");
-
   // again, this was a potentially expensive operation, decrease the
   // limits to get the regular clock call early
   decrease_limits();
--- a/src/hotspot/share/gc/g1/g1ConcurrentRefine.cpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/share/gc/g1/g1ConcurrentRefine.cpp	Mon Jul 01 14:57:02 2019 -0700
@@ -206,7 +206,7 @@
     // available buffers near green_zone value.  When yellow_size is
     // large we don't want to allow a full step to accumulate before
     // doing any processing, as that might lead to significantly more
-    // than green_zone buffers to be processed by update_rs.
+    // than green_zone buffers to be processed during scanning.
     step = MIN2(step, ParallelGCThreads / 2.0);
   }
   size_t activate_offset = static_cast<size_t>(ceil(step * (worker_i + 1)));
@@ -322,18 +322,18 @@
 }
 
 static size_t calc_new_green_zone(size_t green,
-                                  double update_rs_time,
-                                  size_t update_rs_processed_buffers,
+                                  double log_buffer_scan_time,
+                                  size_t processed_log_buffers,
                                   double goal_ms) {
   // Adjust green zone based on whether we're meeting the time goal.
   // Limit to max_green_zone.
   const double inc_k = 1.1, dec_k = 0.9;
-  if (update_rs_time > goal_ms) {
+  if (log_buffer_scan_time > goal_ms) {
     if (green > 0) {
       green = static_cast<size_t>(green * dec_k);
     }
-  } else if (update_rs_time < goal_ms &&
-             update_rs_processed_buffers > green) {
+  } else if (log_buffer_scan_time < goal_ms &&
+             processed_log_buffers > green) {
     green = static_cast<size_t>(MAX2(green * inc_k, green + 1.0));
     green = MIN2(green, max_green_zone);
   }
@@ -350,20 +350,20 @@
   return MIN2(yellow + (yellow - green), max_red_zone);
 }
 
-void G1ConcurrentRefine::update_zones(double update_rs_time,
-                                      size_t update_rs_processed_buffers,
+void G1ConcurrentRefine::update_zones(double log_buffer_scan_time,
+                                      size_t processed_log_buffers,
                                       double goal_ms) {
   log_trace( CTRL_TAGS )("Updating Refinement Zones: "
-                         "update_rs time: %.3fms, "
-                         "update_rs buffers: " SIZE_FORMAT ", "
-                         "update_rs goal time: %.3fms",
-                         update_rs_time,
-                         update_rs_processed_buffers,
+                         "log buffer scan time: %.3fms, "
+                         "processed buffers: " SIZE_FORMAT ", "
+                         "goal time: %.3fms",
+                         log_buffer_scan_time,
+                         processed_log_buffers,
                          goal_ms);
 
   _green_zone = calc_new_green_zone(_green_zone,
-                                    update_rs_time,
-                                    update_rs_processed_buffers,
+                                    log_buffer_scan_time,
+                                    processed_log_buffers,
                                     goal_ms);
   _yellow_zone = calc_new_yellow_zone(_green_zone, _min_yellow_zone_size);
   _red_zone = calc_new_red_zone(_green_zone, _yellow_zone);
@@ -376,13 +376,13 @@
             _green_zone, _yellow_zone, _red_zone);
 }
 
-void G1ConcurrentRefine::adjust(double update_rs_time,
-                                size_t update_rs_processed_buffers,
+void G1ConcurrentRefine::adjust(double log_buffer_scan_time,
+                                size_t processed_log_buffers,
                                 double goal_ms) {
   G1DirtyCardQueueSet& dcqs = G1BarrierSet::dirty_card_queue_set();
 
   if (G1UseAdaptiveConcRefinement) {
-    update_zones(update_rs_time, update_rs_processed_buffers, goal_ms);
+    update_zones(log_buffer_scan_time, processed_log_buffers, goal_ms);
 
     // Change the barrier params
     if (max_num_threads() == 0) {
--- a/src/hotspot/share/gc/g1/g1ConcurrentRefine.hpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/share/gc/g1/g1ConcurrentRefine.hpp	Mon Jul 01 14:57:02 2019 -0700
@@ -97,8 +97,8 @@
                      size_t min_yellow_zone_size);
 
   // Update green/yellow/red zone values based on how well goals are being met.
-  void update_zones(double update_rs_time,
-                    size_t update_rs_processed_buffers,
+  void update_zones(double log_buffer_scan_time,
+                    size_t processed_log_buffers,
                     double goal_ms);
 
   static uint worker_id_offset();
@@ -115,7 +115,7 @@
   void stop();
 
   // Adjust refinement thresholds based on work done during the pause and the goal time.
-  void adjust(double update_rs_time, size_t update_rs_processed_buffers, double goal_ms);
+  void adjust(double log_buffer_scan_time, size_t processed_log_buffers, double goal_ms);
 
   size_t activation_threshold(uint worker_id) const;
   size_t deactivation_threshold(uint worker_id) const;
--- a/src/hotspot/share/gc/g1/g1DirtyCardQueue.cpp	Wed Jun 26 15:34:13 2019 -0700
+++ b/src/hotspot/share/gc/g1/g1DirtyCardQueue.cpp	Mon Jul 01 14:57:02 2019 -0700
@@ -78,7 +78,14 @@
 }
 
 G1DirtyCardQueueSet::G1DirtyCardQueueSet(bool notify_when_complete) :
-  PtrQueueSet(notify_when_complete),
+  PtrQueueSet(),
+  _cbl_mon(NULL),
+  _completed_buffers_head(NULL),
+  _completed_buffers_tail(NULL),
+  _n_completed_buffers(0),
+  _process_completed_buffers_threshold(ProcessCompletedBuffersThresholdNever),
+  _process_completed_buffers(false),
+  _notify_when_complete(notify_when_complete),
   _max_completed_buffers(MaxCompletedBuffersUnlimited),
   _completed_buffers_padding(0),
   _free_ids(NULL),
@@ -90,6 +97,7 @@
 }
 
 G1DirtyCardQueueSet::~G1DirtyCardQueueSet() {
+  abandon_completed_buffers();
   delete _free_ids;
 }
 
@@ -101,7 +109,9 @@
 void G1DirtyCardQueueSet::initialize(Monitor* cbl_mon,
                                      BufferNode::Allocator* allocator,
                                      bool init_free_ids) {
-  PtrQueueSet::initialize(cbl_mon, allocator);
+  PtrQueueSet::initialize(allocator);
+  assert(_cbl_mon == NULL, "Init order issue?");
+  _cbl_mon = cbl_mon;
   if (init_free_ids) {
     _free_ids = new G1FreeIdSet(0, num_par_ids());
   }
@@ -111,6 +121,123 @@
   G1ThreadLocalData::dirty_card_queue(t).handle_zero_index();
 }
 
+void G1DirtyCardQueueSet::enqueue_completed_buffer(BufferNode* cbn) {
+  MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag);
+  cbn->set_next(NULL);
+  if (_completed_buffers_tail == NULL) {
+    assert(_completed_buffers_head == NULL, "Well-formedness");
+    _completed_buffers_head = cbn;
+    _completed_buffers_tail = cbn;
+  } else {
+    _completed_buffers_tail->set_next(cbn);
+    _completed_buffers_tail = cbn;
+  }
+  _n_completed_buffers++;
+
+  if (!process_completed_buffers() &&
+      (_n_completed_buffers > process_completed_buffers_threshold())) {
+    set_process_completed_buffers(true);
+    if (_notify_when_complete) {
+      _cbl_mon->notify_all();
+    }
+  }
+  assert_completed_buffers_list_len_correct_locked();
+}
+
+BufferNode* G1DirtyCardQueueSet::get_completed_buffer(size_t stop_at) {
+  MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag);
+
+  if (_n_completed_buffers <= stop_at) {
+    return NULL;
+  }
+
+  assert(_n_completed_buffers > 0, "invariant");
+  assert(_completed_buffers_head != NULL, "invariant");
+  assert(_completed_buffers_tail != NULL, "invariant");
+
+  BufferNode* bn = _completed_buffers_head;
+  _n_completed_buffers--;
+  _completed_buffers_head = bn->next();
+  if (_completed_buffers_head == NULL) {
+    assert(_n_completed_buffers == 0, "invariant");
+    _completed_buffers_tail = NULL;
+    set_process_completed_buffers(false);
+  }
+  assert_completed_buffers_list_len_correct_locked();
+  bn->set_next(NULL);
+  return bn;
+}
+
+void G1DirtyCardQueueSet::abandon_completed_buffers() {
+  BufferNode* buffers_to_delete = NULL;
+  {
+    MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag);
+    buffers_to_delete = _completed_buffers_head;
+    _completed_buffers_head = NULL;
+    _completed_buffers_tail = NULL;
+    _n_completed_buffers = 0;
+    set_process_completed_buffers(false);
+  }
+  while (buffers_to_delete != NULL) {
+    BufferNode* bn = buffers_to_delete;
+    buffers_to_delete = bn->next();
+    bn->set_next(NULL);
+    deallocate_buffer(bn);
+  }
+}
+
+void G1DirtyCardQueueSet::notify_if_necessary() {
+  MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag);
+  if (_n_completed_buffers > process_completed_buffers_threshold()) {
+    set_process_completed_buffers(true);
+    if (_notify_when_complete)
+      _cbl_mon->notify();
+  }
+}
+
+#ifdef ASSERT
+void G1DirtyCardQueueSet::assert_completed_buffers_list_len_correct_locked() {
+  assert_lock_strong(_cbl_mon);
+  size_t n = 0;
+  for (BufferNode* bn = _completed_buffers_head; bn != NULL; bn = bn->next()) {
+    ++n;
+  }
+  assert(n == _n_completed_buffers,
+         "Completed buffer length is wrong: counted: " SIZE_FORMAT
+         ", expected: " SIZE_FORMAT, n, _n_completed_buffers);
+}
+#endif // ASSERT
+
+// Merge lists of buffers. Notify the processing threads.
+// The source queue is emptied as a result. The queues
+// must share the monitor.
+void G1DirtyCardQueueSet::merge_bufferlists(G1DirtyCardQueueSet *src) {
+  assert(_cbl_mon == src->_cbl_mon, "Should share the same lock");
+  MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag);
+  if (_completed_buffers_tail == NULL) {
+    assert(_completed_buffers_head == NULL, "Well-formedness");
+    _completed_buffers_head = src->_completed_buffers_head;
+    _completed_buffers_tail = src->_completed_buffers_tail;
+  } else {
+    assert(_completed_buffers_head != NULL, "Well form