OpenJDK / jdk / jdk
changeset 55805:162f4f1c841c
Merge
author | prr |
---|---|
date | Mon, 01 Jul 2019 14:57:02 -0700 |
parents | af678f2593e2 734e58d8477b |
children | 0e01b955bfd4 |
files | src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core.test/src/org/graalvm/compiler/core/test/GuardedIntrinsicTest.java src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.test/src/org/graalvm/compiler/hotspot/test/WriteBarrierVerificationTest.java src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/gc/g1/G1ArrayRangePostWriteBarrier.java src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/gc/g1/G1ArrayRangePreWriteBarrier.java src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/gc/g1/G1BarrierSet.java src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/gc/g1/G1PostWriteBarrier.java src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/gc/g1/G1PreWriteBarrier.java src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/gc/g1/G1ReferentFieldReadBarrier.java src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/gc/shared/ArrayRangeWriteBarrier.java src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/gc/shared/BarrierSet.java src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/gc/shared/CardTableBarrierSet.java src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/gc/shared/ObjectWriteBarrier.java src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/gc/shared/SerialArrayRangeWriteBarrier.java src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/gc/shared/SerialWriteBarrier.java src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/nodes/WriteBarrier.java src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/phases/WriteBarrierAdditionPhase.java src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/phases/WriteBarrierVerificationPhase.java src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/replacements/WriteBarrierSnippets.java src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.phases/src/org/graalvm/compiler/phases/tiers/PhaseContext.java src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.replacements/src/org/graalvm/compiler/replacements/Log.java src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.replacements/src/org/graalvm/compiler/replacements/nodes/DirectStoreNode.java test/hotspot/jtreg/compiler/codecache/stress/UnexpectedDeoptimizationAllTest.java test/hotspot/jtreg/compiler/jvmci/compilerToVM/ResolveConstantInPoolTest.java test/jdk/ProblemList.txt test/jdk/sun/security/tools/keytool/PSS.java |
diffstat | 985 files changed, 24612 insertions(+), 16409 deletions(-) [+] |
line wrap: on
line diff
--- a/.hgtags Wed Jun 26 15:34:13 2019 -0700 +++ b/.hgtags Mon Jul 01 14:57:02 2019 -0700 @@ -567,3 +567,5 @@ 2f4e214781a1d597ed36bf5a36f20928c6c82996 jdk-14+1 0692b67f54621991ba7afbf23e55b788f3555e69 jdk-13+26 43627549a488b7d0b4df8fad436e36233df89877 jdk-14+2 +b7f68ddec66f996ae3aad03291d129ca9f02482d jdk-13+27 +e64383344f144217c36196c3c8a2df8f588a2af3 jdk-14+3
--- a/make/common/FindTests.gmk Wed Jun 26 15:34:13 2019 -0700 +++ b/make/common/FindTests.gmk Mon Jul 01 14:57:02 2019 -0700 @@ -62,10 +62,8 @@ # If this file is deemed outdated, it will automatically get regenerated # by this rule before being included below. -# -# When calling TestMake.gmk, override the log level to avoid any kind of debug -# output being captured into the generated makefile. -$(FIND_TESTS_CACHE_FILE): $(JTREG_ROOT_FILES) $(JTREG_GROUP_FILES) +$(FIND_TESTS_CACHE_FILE): $(JTREG_ROOT_FILES) $(JTREG_GROUP_FILES) \ + $(TOPDIR)/test/make/TestMake.gmk $(call MakeTargetDir) ( $(foreach root, $(JTREG_TESTROOTS), \ $(PRINTF) "\n$(root)_JTREG_TEST_GROUPS := " ; \ @@ -73,10 +71,11 @@ $($(root)_JTREG_GROUP_FILES) \ | $(SORT) -u | $(TR) '\n' ' ' ; \ ) \ - $(PRINTF) "\nMAKE_TEST_TARGETS := " ; \ - $(MAKE) -s --no-print-directory $(MAKE_ARGS) LOG_LEVEL=warn \ - SPEC=$(SPEC) -f $(TOPDIR)/test/make/TestMake.gmk print-targets \ ) > $@ + $(PRINTF) "\nMAKE_TEST_TARGETS := " >> $@ + $(MAKE) -s --no-print-directory $(MAKE_ARGS) \ + SPEC=$(SPEC) -f $(TOPDIR)/test/make/TestMake.gmk print-targets \ + TARGETS_FILE=$@ -include $(FIND_TESTS_CACHE_FILE)
--- a/make/common/ProcessMarkdown.gmk Wed Jun 26 15:34:13 2019 -0700 +++ b/make/common/ProcessMarkdown.gmk Mon Jul 01 14:57:02 2019 -0700 @@ -103,7 +103,7 @@ $$(call LogInfo, Post-processing markdown file $2) $$(call MakeDir, $$(SUPPORT_OUTPUTDIR)/markdown $$($1_$2_TARGET_DIR)) $$(call ExecuteWithLog, $$(SUPPORT_OUTPUTDIR)/markdown/$$($1_$2_MARKER)_post, \ - $$($1_POST_PROCESS) < $$($1_$2_PANDOC_OUTPUT) > $$($1_$2_OUTPUT_FILE)) + ( $$($1_POST_PROCESS) < $$($1_$2_PANDOC_OUTPUT) > $$($1_$2_OUTPUT_FILE) ) ) endif $1 += $$($1_$2_OUTPUT_FILE)
--- a/src/hotspot/cpu/aarch64/aarch64.ad Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/cpu/aarch64/aarch64.ad Mon Jul 01 14:57:02 2019 -0700 @@ -1761,6 +1761,17 @@ // branch if we need to invalidate the method later __ nop(); + if (C->clinit_barrier_on_entry()) { + assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started"); + + Label L_skip_barrier; + + __ mov_metadata(rscratch2, C->method()->holder()->constant_encoding()); + __ clinit_barrier(rscratch2, rscratch1, &L_skip_barrier); + __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); + __ bind(L_skip_barrier); + } + int bangsize = C->bang_size_in_bytes(); if (C->need_stack_bang(bangsize) && UseStackBanging) __ generate_stack_overflow_check(bangsize);
--- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp Mon Jul 01 14:57:02 2019 -0700 @@ -317,7 +317,15 @@ } void LIR_Assembler::clinit_barrier(ciMethod* method) { - ShouldNotReachHere(); // not implemented + assert(VM_Version::supports_fast_class_init_checks(), "sanity"); + assert(!method->holder()->is_not_initialized(), "initialization should have been started"); + + Label L_skip_barrier; + + __ mov_metadata(rscratch2, method->holder()->constant_encoding()); + __ clinit_barrier(rscratch2, rscratch1, &L_skip_barrier /*L_fast_path*/); + __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); + __ bind(L_skip_barrier); } void LIR_Assembler::jobject2reg(jobject o, Register reg) {
--- a/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp Mon Jul 01 14:57:02 2019 -0700 @@ -331,11 +331,6 @@ void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes) { - // If we have to make this method not-entrant we'll overwrite its - // first instruction with a jump. For this action to be legal we - // must ensure that this first instruction is a B, BL, NOP, BKPT, - // SVC, HVC, or SMC. Make it a NOP. - nop(); assert(bang_size_in_bytes >= framesize, "stack bang size incorrect"); // Make sure there is enough stack space for this method's activation. // Note that we do this before doing an enter(). @@ -355,6 +350,11 @@ void C1_MacroAssembler::verified_entry() { + // If we have to make this method not-entrant we'll overwrite its + // first instruction with a jump. For this action to be legal we + // must ensure that this first instruction is a B, BL, NOP, BKPT, + // SVC, HVC, or SMC. Make it a NOP. + nop(); } void C1_MacroAssembler::load_parameter(int offset_in_words, Register reg) {
--- a/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp Mon Jul 01 14:57:02 2019 -0700 @@ -63,27 +63,25 @@ return; } - // rscratch1 can be passed as src or dst, so don't use it. - RegSet savedRegs = RegSet::of(rscratch2, rheapbase); + assert_different_registers(rscratch1, rscratch2, src.base()); + assert_different_registers(rscratch1, rscratch2, dst); + + RegSet savedRegs = RegSet::range(r0,r28) - RegSet::of(dst, rscratch1, rscratch2); Label done; - assert_different_registers(rheapbase, rscratch2, dst); - assert_different_registers(rheapbase, rscratch2, src.base()); - - __ push(savedRegs, sp); // Load bad mask into scratch register. - __ ldr(rheapbase, address_bad_mask_from_thread(rthread)); + __ ldr(rscratch1, address_bad_mask_from_thread(rthread)); __ lea(rscratch2, src); __ ldr(dst, src); // Test reference against bad mask. If mask bad, then we need to fix it up. - __ tst(dst, rheapbase); + __ tst(dst, rscratch1); __ br(Assembler::EQ, done); __ enter(); - __ push(RegSet::range(r0,r28) - RegSet::of(dst), sp); + __ push(savedRegs, sp); if (c_rarg0 != dst) { __ mov(c_rarg0, dst); @@ -91,13 +89,15 @@ __ mov(c_rarg1, rscratch2); int step = 4 * wordSize; - __ mov(rscratch1, -step); + __ mov(rscratch2, -step); __ sub(sp, sp, step); for (int i = 28; i >= 4; i -= 4) { __ st1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2), - as_FloatRegister(i+3), __ T1D, Address(__ post(sp, rscratch1))); + as_FloatRegister(i+3), __ T1D, Address(__ post(sp, rscratch2))); } + __ st1(as_FloatRegister(0), as_FloatRegister(1), as_FloatRegister(2), + as_FloatRegister(3), __ T1D, Address(sp)); __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators), 2); @@ -111,13 +111,10 @@ __ mov(dst, r0); } - __ pop(RegSet::range(r0,r28) - RegSet::of(dst), sp); + __ pop(savedRegs, sp); __ leave(); __ bind(done); - - // Restore tmps - __ pop(savedRegs, sp); } #ifdef ASSERT
--- a/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp Mon Jul 01 14:57:02 2019 -0700 @@ -288,6 +288,18 @@ ldr(klass, Address(klass, Array<Klass*>::base_offset_in_bytes())); } +void InterpreterMacroAssembler::load_resolved_method_at_index(int byte_no, + Register method, + Register cache) { + const int method_offset = in_bytes( + ConstantPoolCache::base_offset() + + ((byte_no == TemplateTable::f2_byte) + ? ConstantPoolCacheEntry::f2_offset() + : ConstantPoolCacheEntry::f1_offset())); + + ldr(method, Address(cache, method_offset)); // get f1 Method* +} + // Generate a subtype check: branch to ok_is_subtype if sub_klass is a // subtype of super_klass. //
--- a/src/hotspot/cpu/aarch64/interp_masm_aarch64.hpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/cpu/aarch64/interp_masm_aarch64.hpp Mon Jul 01 14:57:02 2019 -0700 @@ -124,6 +124,8 @@ // load cpool->resolved_klass_at(index); void load_resolved_klass_at_offset(Register cpool, Register index, Register klass, Register temp); + void load_resolved_method_at_index(int byte_no, Register method, Register cache); + void pop_ptr(Register r = r0); void pop_i(Register r = r0); void pop_l(Register r = r0);
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp Mon Jul 01 14:57:02 2019 -0700 @@ -1307,6 +1307,35 @@ bind(L_fallthrough); } +void MacroAssembler::clinit_barrier(Register klass, Register scratch, Label* L_fast_path, Label* L_slow_path) { + assert(L_fast_path != NULL || L_slow_path != NULL, "at least one is required"); + assert_different_registers(klass, rthread, scratch); + + Label L_fallthrough, L_tmp; + if (L_fast_path == NULL) { + L_fast_path = &L_fallthrough; + } else if (L_slow_path == NULL) { + L_slow_path = &L_fallthrough; + } + // Fast path check: class is fully initialized + ldrb(scratch, Address(klass, InstanceKlass::init_state_offset())); + subs(zr, scratch, InstanceKlass::fully_initialized); + br(Assembler::EQ, *L_fast_path); + + // Fast path check: current thread is initializer thread + ldr(scratch, Address(klass, InstanceKlass::init_thread_offset())); + cmp(rthread, scratch); + + if (L_slow_path == &L_fallthrough) { + br(Assembler::EQ, *L_fast_path); + bind(*L_slow_path); + } else if (L_fast_path == &L_fallthrough) { + br(Assembler::NE, *L_slow_path); + bind(*L_fast_path); + } else { + Unimplemented(); + } +} void MacroAssembler::verify_oop(Register reg, const char* s) { if (!VerifyOops) return; @@ -3683,6 +3712,12 @@ bs->obj_equals(this, obj1, obj2); } +void MacroAssembler::load_method_holder(Register holder, Register method) { + ldr(holder, Address(method, Method::const_offset())); // ConstMethod* + ldr(holder, Address(holder, ConstMethod::constants_offset())); // ConstantPool* + ldr(holder, Address(holder, ConstantPool::pool_holder_offset_in_bytes())); // InstanceKlass* +} + void MacroAssembler::load_klass(Register dst, Register src) { if (UseCompressedClassPointers) { ldrw(dst, Address(src, oopDesc::klass_offset_in_bytes()));
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp Mon Jul 01 14:57:02 2019 -0700 @@ -788,6 +788,8 @@ // C 'boolean' to Java boolean: x == 0 ? 0 : 1 void c2bool(Register x); + void load_method_holder(Register holder, Register method); + // oop manipulations void load_klass(Register dst, Register src); void store_klass(Register dst, Register src); @@ -926,6 +928,11 @@ Register temp_reg, Label& L_success); + void clinit_barrier(Register klass, + Register thread, + Label* L_fast_path = NULL, + Label* L_slow_path = NULL); + Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
--- a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp Mon Jul 01 14:57:02 2019 -0700 @@ -799,6 +799,22 @@ } #endif + // Class initialization barrier for static methods + if (VM_Version::supports_fast_class_init_checks()) { + Label L_skip_barrier; + + { // Bypass the barrier for non-static methods + __ ldrw(rscratch1, Address(rmethod, Method::access_flags_offset())); + __ andsw(zr, rscratch1, JVM_ACC_STATIC); + __ br(Assembler::EQ, L_skip_barrier); // non-static + } + + __ load_method_holder(rscratch2, rmethod); + __ clinit_barrier(rscratch2, rscratch1, &L_skip_barrier); + __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); + __ bind(L_skip_barrier); + } + gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); __ flush(); @@ -1580,6 +1596,15 @@ // SVC, HVC, or SMC. Make it a NOP. __ nop(); + if (VM_Version::supports_fast_class_init_checks() && method->needs_clinit_barrier()) { + Label L_skip_barrier; + __ mov_metadata(rscratch2, method->method_holder()); // InstanceKlass* + __ clinit_barrier(rscratch2, rscratch1, &L_skip_barrier); + __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); + + __ bind(L_skip_barrier); + } + // Generate stack overflow check if (UseStackBanging) { __ bang_stack_with_offset(JavaThread::stack_shadow_zone_size());
--- a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp Mon Jul 01 14:57:02 2019 -0700 @@ -1383,7 +1383,12 @@ // save regs before copy_memory __ push(RegSet::of(d, count), sp); } - copy_memory(aligned, s, d, count, rscratch1, size); + { + // UnsafeCopyMemory page error: continue after ucm + bool add_entry = !is_oop && (!aligned || sizeof(jlong) == size); + UnsafeCopyMemoryMark ucmm(this, add_entry, true); + copy_memory(aligned, s, d, count, rscratch1, size); + } if (is_oop) { __ pop(RegSet::of(d, count), sp); @@ -1455,7 +1460,12 @@ // save regs before copy_memory __ push(RegSet::of(d, count), sp); } - copy_memory(aligned, s, d, count, rscratch1, -size); + { + // UnsafeCopyMemory page error: continue after ucm + bool add_entry = !is_oop && (!aligned || sizeof(jlong) == size); + UnsafeCopyMemoryMark ucmm(this, add_entry, true); + copy_memory(aligned, s, d, count, rscratch1, -size); + } if (is_oop) { __ pop(RegSet::of(d, count), sp); if (VerifyOops) @@ -5816,6 +5826,10 @@ } }; // end class declaration +#define UCM_TABLE_MAX_ENTRIES 8 void StubGenerator_generate(CodeBuffer* code, bool all) { + if (UnsafeCopyMemory::_table == NULL) { + UnsafeCopyMemory::create_table(UCM_TABLE_MAX_ENTRIES); + } StubGenerator g(code, all); }
--- a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp Mon Jul 01 14:57:02 2019 -0700 @@ -886,8 +886,8 @@ } // Get mirror and store it in the frame as GC root for this Method* - __ load_mirror(rscratch1, rmethod); - __ stp(rscratch1, zr, Address(sp, 4 * wordSize)); + __ load_mirror(r10, rmethod); + __ stp(r10, zr, Address(sp, 4 * wordSize)); __ ldr(rcpool, Address(rmethod, Method::const_offset())); __ ldr(rcpool, Address(rcpool, ConstMethod::constants_offset()));
--- a/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp Mon Jul 01 14:57:02 2019 -0700 @@ -2323,7 +2323,7 @@ const Register temp = r19; assert_different_registers(Rcache, index, temp); - Label resolved; + Label resolved, clinit_barrier_slow; Bytecodes::Code code = bytecode(); switch (code) { @@ -2338,6 +2338,8 @@ __ br(Assembler::EQ, resolved); // resolve first time through + // Class initialization barrier slow path lands here as well. + __ bind(clinit_barrier_slow); address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache); __ mov(temp, (int) code); __ call_VM(noreg, entry, temp); @@ -2347,6 +2349,13 @@ // n.b. unlike x86 Rcache is now rcpool plus the indexed offset // so all clients ofthis method must be modified accordingly __ bind(resolved); + + // Class initialization barrier for static methods + if (VM_Version::supports_fast_class_init_checks() && bytecode() == Bytecodes::_invokestatic) { + __ load_resolved_method_at_index(byte_no, temp, Rcache); + __ load_method_holder(temp, temp); + __ clinit_barrier(temp, rscratch1, NULL, &clinit_barrier_slow); + } } // The Rcache and index registers must be set before call @@ -3418,9 +3427,8 @@ __ profile_virtual_call(r3, r13, r19); // Get declaring interface class from method, and itable index - __ ldr(r0, Address(rmethod, Method::const_offset())); - __ ldr(r0, Address(r0, ConstMethod::constants_offset())); - __ ldr(r0, Address(r0, ConstantPool::pool_holder_offset_in_bytes())); + + __ load_method_holder(r0, rmethod); __ ldrw(rmethod, Address(rmethod, Method::itable_index_offset())); __ subw(rmethod, rmethod, Method::itable_index_max); __ negw(rmethod, rmethod);
--- a/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp Mon Jul 01 14:57:02 2019 -0700 @@ -124,6 +124,7 @@ static int dcache_line_size() { return (1 << ((_psr_info.ctr_el0 >> 16) & 0x0f)) * 4; } + static bool supports_fast_class_init_checks() { return true; } }; #endif // CPU_AARCH64_VM_VERSION_AARCH64_HPP
--- a/src/hotspot/cpu/arm/stubGenerator_arm.cpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/cpu/arm/stubGenerator_arm.cpp Mon Jul 01 14:57:02 2019 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2008, 2019, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -928,7 +928,7 @@ // Scratches 'count', R3. // R4-R10 are preserved (saved/restored). // - int generate_forward_aligned_copy_loop(Register from, Register to, Register count, int bytes_per_count) { + int generate_forward_aligned_copy_loop(Register from, Register to, Register count, int bytes_per_count, bool unsafe_copy = false) { assert (from == R0 && to == R1 && count == R2, "adjust the implementation below"); const int bytes_per_loop = 8*wordSize; // 8 registers are read and written on every loop iteration @@ -954,107 +954,111 @@ Label L_skip_pld; - // predecrease to exit when there is less than count_per_loop - __ sub_32(count, count, count_per_loop); - - if (pld_offset != 0) { - pld_offset = (pld_offset < 0) ? -pld_offset : pld_offset; - - prefetch(from, to, 0); - - if (prefetch_before) { - // If prefetch is done ahead, final PLDs that overflow the - // copied area can be easily avoided. 'count' is predecreased - // by the prefetch distance to optimize the inner loop and the - // outer loop skips the PLD. - __ subs_32(count, count, (bytes_per_loop+pld_offset)/bytes_per_count); - - // skip prefetch for small copies - __ b(L_skip_pld, lt); - } - - int offset = ArmCopyCacheLineSize; - while (offset <= pld_offset) { - prefetch(from, to, offset); - offset += ArmCopyCacheLineSize; - }; - } - { - // 32-bit ARM note: we have tried implementing loop unrolling to skip one - // PLD with 64 bytes cache line but the gain was not significant. - - Label L_copy_loop; - __ align(OptoLoopAlignment); - __ BIND(L_copy_loop); - - if (prefetch_before) { - prefetch(from, to, bytes_per_loop + pld_offset); - __ BIND(L_skip_pld); - } - - if (split_read) { - // Split the register set in two sets so that there is less - // latency between LDM and STM (R3-R6 available while R7-R10 - // still loading) and less register locking issue when iterating - // on the first LDM. - __ ldmia(from, RegisterSet(R3, R6), writeback); - __ ldmia(from, RegisterSet(R7, R10), writeback); - } else { - __ ldmia(from, RegisterSet(R3, R10), writeback); + // UnsafeCopyMemory page error: continue after ucm + UnsafeCopyMemoryMark ucmm(this, unsafe_copy, true); + // predecrease to exit when there is less than count_per_loop + __ sub_32(count, count, count_per_loop); + + if (pld_offset != 0) { + pld_offset = (pld_offset < 0) ? -pld_offset : pld_offset; + + prefetch(from, to, 0); + + if (prefetch_before) { + // If prefetch is done ahead, final PLDs that overflow the + // copied area can be easily avoided. 'count' is predecreased + // by the prefetch distance to optimize the inner loop and the + // outer loop skips the PLD. + __ subs_32(count, count, (bytes_per_loop+pld_offset)/bytes_per_count); + + // skip prefetch for small copies + __ b(L_skip_pld, lt); + } + + int offset = ArmCopyCacheLineSize; + while (offset <= pld_offset) { + prefetch(from, to, offset); + offset += ArmCopyCacheLineSize; + }; } - __ subs_32(count, count, count_per_loop); - - if (prefetch_after) { - prefetch(from, to, pld_offset, bytes_per_loop); - } - - if (split_write) { - __ stmia(to, RegisterSet(R3, R6), writeback); - __ stmia(to, RegisterSet(R7, R10), writeback); - } else { - __ stmia(to, RegisterSet(R3, R10), writeback); - } - - __ b(L_copy_loop, ge); - - if (prefetch_before) { - // the inner loop may end earlier, allowing to skip PLD for the last iterations - __ cmn_32(count, (bytes_per_loop + pld_offset)/bytes_per_count); - __ b(L_skip_pld, ge); + { + // 32-bit ARM note: we have tried implementing loop unrolling to skip one + // PLD with 64 bytes cache line but the gain was not significant. + + Label L_copy_loop; + __ align(OptoLoopAlignment); + __ BIND(L_copy_loop); + + if (prefetch_before) { + prefetch(from, to, bytes_per_loop + pld_offset); + __ BIND(L_skip_pld); + } + + if (split_read) { + // Split the register set in two sets so that there is less + // latency between LDM and STM (R3-R6 available while R7-R10 + // still loading) and less register locking issue when iterating + // on the first LDM. + __ ldmia(from, RegisterSet(R3, R6), writeback); + __ ldmia(from, RegisterSet(R7, R10), writeback); + } else { + __ ldmia(from, RegisterSet(R3, R10), writeback); + } + + __ subs_32(count, count, count_per_loop); + + if (prefetch_after) { + prefetch(from, to, pld_offset, bytes_per_loop); + } + + if (split_write) { + __ stmia(to, RegisterSet(R3, R6), writeback); + __ stmia(to, RegisterSet(R7, R10), writeback); + } else { + __ stmia(to, RegisterSet(R3, R10), writeback); + } + + __ b(L_copy_loop, ge); + + if (prefetch_before) { + // the inner loop may end earlier, allowing to skip PLD for the last iterations + __ cmn_32(count, (bytes_per_loop + pld_offset)/bytes_per_count); + __ b(L_skip_pld, ge); + } } - } - BLOCK_COMMENT("Remaining bytes:"); - // still 0..bytes_per_loop-1 aligned bytes to copy, count already decreased by (at least) bytes_per_loop bytes - - // __ add(count, count, ...); // addition useless for the bit tests - assert (pld_offset % bytes_per_loop == 0, "decreasing count by pld_offset before loop must not change tested bits"); - - __ tst(count, 16 / bytes_per_count); - __ ldmia(from, RegisterSet(R3, R6), writeback, ne); // copy 16 bytes - __ stmia(to, RegisterSet(R3, R6), writeback, ne); - - __ tst(count, 8 / bytes_per_count); - __ ldmia(from, RegisterSet(R3, R4), writeback, ne); // copy 8 bytes - __ stmia(to, RegisterSet(R3, R4), writeback, ne); - - if (bytes_per_count <= 4) { - __ tst(count, 4 / bytes_per_count); - __ ldr(R3, Address(from, 4, post_indexed), ne); // copy 4 bytes - __ str(R3, Address(to, 4, post_indexed), ne); - } - - if (bytes_per_count <= 2) { - __ tst(count, 2 / bytes_per_count); - __ ldrh(R3, Address(from, 2, post_indexed), ne); // copy 2 bytes - __ strh(R3, Address(to, 2, post_indexed), ne); - } - - if (bytes_per_count == 1) { - __ tst(count, 1); - __ ldrb(R3, Address(from, 1, post_indexed), ne); - __ strb(R3, Address(to, 1, post_indexed), ne); + BLOCK_COMMENT("Remaining bytes:"); + // still 0..bytes_per_loop-1 aligned bytes to copy, count already decreased by (at least) bytes_per_loop bytes + + // __ add(count, count, ...); // addition useless for the bit tests + assert (pld_offset % bytes_per_loop == 0, "decreasing count by pld_offset before loop must not change tested bits"); + + __ tst(count, 16 / bytes_per_count); + __ ldmia(from, RegisterSet(R3, R6), writeback, ne); // copy 16 bytes + __ stmia(to, RegisterSet(R3, R6), writeback, ne); + + __ tst(count, 8 / bytes_per_count); + __ ldmia(from, RegisterSet(R3, R4), writeback, ne); // copy 8 bytes + __ stmia(to, RegisterSet(R3, R4), writeback, ne); + + if (bytes_per_count <= 4) { + __ tst(count, 4 / bytes_per_count); + __ ldr(R3, Address(from, 4, post_indexed), ne); // copy 4 bytes + __ str(R3, Address(to, 4, post_indexed), ne); + } + + if (bytes_per_count <= 2) { + __ tst(count, 2 / bytes_per_count); + __ ldrh(R3, Address(from, 2, post_indexed), ne); // copy 2 bytes + __ strh(R3, Address(to, 2, post_indexed), ne); + } + + if (bytes_per_count == 1) { + __ tst(count, 1); + __ ldrb(R3, Address(from, 1, post_indexed), ne); + __ strb(R3, Address(to, 1, post_indexed), ne); + } } __ pop(RegisterSet(R4,R10)); @@ -1083,7 +1087,7 @@ // Scratches 'count', R3. // ARM R4-R10 are preserved (saved/restored). // - int generate_backward_aligned_copy_loop(Register end_from, Register end_to, Register count, int bytes_per_count) { + int generate_backward_aligned_copy_loop(Register end_from, Register end_to, Register count, int bytes_per_count, bool unsafe_copy = false) { assert (end_from == R0 && end_to == R1 && count == R2, "adjust the implementation below"); const int bytes_per_loop = 8*wordSize; // 8 registers are read and written on every loop iteration @@ -1099,102 +1103,105 @@ __ push(RegisterSet(R4,R10)); - __ sub_32(count, count, count_per_loop); - - const bool prefetch_before = pld_offset < 0; - const bool prefetch_after = pld_offset > 0; - - Label L_skip_pld; - - if (pld_offset != 0) { - pld_offset = (pld_offset < 0) ? -pld_offset : pld_offset; - - prefetch(end_from, end_to, -wordSize); - - if (prefetch_before) { - __ subs_32(count, count, (bytes_per_loop + pld_offset) / bytes_per_count); - __ b(L_skip_pld, lt); + { + // UnsafeCopyMemory page error: continue after ucm + UnsafeCopyMemoryMark ucmm(this, unsafe_copy, true); + __ sub_32(count, count, count_per_loop); + + const bool prefetch_before = pld_offset < 0; + const bool prefetch_after = pld_offset > 0; + + Label L_skip_pld; + + if (pld_offset != 0) { + pld_offset = (pld_offset < 0) ? -pld_offset : pld_offset; + + prefetch(end_from, end_to, -wordSize); + + if (prefetch_before) { + __ subs_32(count, count, (bytes_per_loop + pld_offset) / bytes_per_count); + __ b(L_skip_pld, lt); + } + + int offset = ArmCopyCacheLineSize; + while (offset <= pld_offset) { + prefetch(end_from, end_to, -(wordSize + offset)); + offset += ArmCopyCacheLineSize; + }; } - int offset = ArmCopyCacheLineSize; - while (offset <= pld_offset) { - prefetch(end_from, end_to, -(wordSize + offset)); - offset += ArmCopyCacheLineSize; - }; - } - - { - // 32-bit ARM note: we have tried implementing loop unrolling to skip one - // PLD with 64 bytes cache line but the gain was not significant. - - Label L_copy_loop; - __ align(OptoLoopAlignment); - __ BIND(L_copy_loop); - - if (prefetch_before) { - prefetch(end_from, end_to, -(wordSize + bytes_per_loop + pld_offset)); - __ BIND(L_skip_pld); + { + // 32-bit ARM note: we have tried implementing loop unrolling to skip one + // PLD with 64 bytes cache line but the gain was not significant. + + Label L_copy_loop; + __ align(OptoLoopAlignment); + __ BIND(L_copy_loop); + + if (prefetch_before) { + prefetch(end_from, end_to, -(wordSize + bytes_per_loop + pld_offset)); + __ BIND(L_skip_pld); + } + + if (split_read) { + __ ldmdb(end_from, RegisterSet(R7, R10), writeback); + __ ldmdb(end_from, RegisterSet(R3, R6), writeback); + } else { + __ ldmdb(end_from, RegisterSet(R3, R10), writeback); + } + + __ subs_32(count, count, count_per_loop); + + if (prefetch_after) { + prefetch(end_from, end_to, -(wordSize + pld_offset), -bytes_per_loop); + } + + if (split_write) { + __ stmdb(end_to, RegisterSet(R7, R10), writeback); + __ stmdb(end_to, RegisterSet(R3, R6), writeback); + } else { + __ stmdb(end_to, RegisterSet(R3, R10), writeback); + } + + __ b(L_copy_loop, ge); + + if (prefetch_before) { + __ cmn_32(count, (bytes_per_loop + pld_offset)/bytes_per_count); + __ b(L_skip_pld, ge); + } } - - if (split_read) { - __ ldmdb(end_from, RegisterSet(R7, R10), writeback); - __ ldmdb(end_from, RegisterSet(R3, R6), writeback); - } else { - __ ldmdb(end_from, RegisterSet(R3, R10), writeback); - } - - __ subs_32(count, count, count_per_loop); - - if (prefetch_after) { - prefetch(end_from, end_to, -(wordSize + pld_offset), -bytes_per_loop); + BLOCK_COMMENT("Remaining bytes:"); + // still 0..bytes_per_loop-1 aligned bytes to copy, count already decreased by (at least) bytes_per_loop bytes + + // __ add(count, count, ...); // addition useless for the bit tests + assert (pld_offset % bytes_per_loop == 0, "decreasing count by pld_offset before loop must not change tested bits"); + + __ tst(count, 16 / bytes_per_count); + __ ldmdb(end_from, RegisterSet(R3, R6), writeback, ne); // copy 16 bytes + __ stmdb(end_to, RegisterSet(R3, R6), writeback, ne); + + __ tst(count, 8 / bytes_per_count); + __ ldmdb(end_from, RegisterSet(R3, R4), writeback, ne); // copy 8 bytes + __ stmdb(end_to, RegisterSet(R3, R4), writeback, ne); + + if (bytes_per_count <= 4) { + __ tst(count, 4 / bytes_per_count); + __ ldr(R3, Address(end_from, -4, pre_indexed), ne); // copy 4 bytes + __ str(R3, Address(end_to, -4, pre_indexed), ne); } - if (split_write) { - __ stmdb(end_to, RegisterSet(R7, R10), writeback); - __ stmdb(end_to, RegisterSet(R3, R6), writeback); - } else { - __ stmdb(end_to, RegisterSet(R3, R10), writeback); + if (bytes_per_count <= 2) { + __ tst(count, 2 / bytes_per_count); + __ ldrh(R3, Address(end_from, -2, pre_indexed), ne); // copy 2 bytes + __ strh(R3, Address(end_to, -2, pre_indexed), ne); } - __ b(L_copy_loop, ge); - - if (prefetch_before) { - __ cmn_32(count, (bytes_per_loop + pld_offset)/bytes_per_count); - __ b(L_skip_pld, ge); + if (bytes_per_count == 1) { + __ tst(count, 1); + __ ldrb(R3, Address(end_from, -1, pre_indexed), ne); + __ strb(R3, Address(end_to, -1, pre_indexed), ne); } } - BLOCK_COMMENT("Remaining bytes:"); - // still 0..bytes_per_loop-1 aligned bytes to copy, count already decreased by (at least) bytes_per_loop bytes - - // __ add(count, count, ...); // addition useless for the bit tests - assert (pld_offset % bytes_per_loop == 0, "decreasing count by pld_offset before loop must not change tested bits"); - - __ tst(count, 16 / bytes_per_count); - __ ldmdb(end_from, RegisterSet(R3, R6), writeback, ne); // copy 16 bytes - __ stmdb(end_to, RegisterSet(R3, R6), writeback, ne); - - __ tst(count, 8 / bytes_per_count); - __ ldmdb(end_from, RegisterSet(R3, R4), writeback, ne); // copy 8 bytes - __ stmdb(end_to, RegisterSet(R3, R4), writeback, ne); - - if (bytes_per_count <= 4) { - __ tst(count, 4 / bytes_per_count); - __ ldr(R3, Address(end_from, -4, pre_indexed), ne); // copy 4 bytes - __ str(R3, Address(end_to, -4, pre_indexed), ne); - } - - if (bytes_per_count <= 2) { - __ tst(count, 2 / bytes_per_count); - __ ldrh(R3, Address(end_from, -2, pre_indexed), ne); // copy 2 bytes - __ strh(R3, Address(end_to, -2, pre_indexed), ne); - } - - if (bytes_per_count == 1) { - __ tst(count, 1); - __ ldrb(R3, Address(end_from, -1, pre_indexed), ne); - __ strb(R3, Address(end_to, -1, pre_indexed), ne); - } - __ pop(RegisterSet(R4,R10)); return count_per_loop; @@ -1749,17 +1756,21 @@ // // Notes: // shifts 'from' and 'to' - void copy_small_array(Register from, Register to, Register count, Register tmp, Register tmp2, int bytes_per_count, bool forward, Label & entry) { + void copy_small_array(Register from, Register to, Register count, Register tmp, Register tmp2, int bytes_per_count, bool forward, Label & entry, bool unsafe_copy = false) { assert_different_registers(from, to, count, tmp); - __ align(OptoLoopAlignment); - Label L_small_loop; - __ BIND(L_small_loop); - store_one(tmp, to, bytes_per_count, forward, al, tmp2); - __ BIND(entry); // entry point - __ subs(count, count, 1); - load_one(tmp, from, bytes_per_count, forward, ge, tmp2); - __ b(L_small_loop, ge); + { + // UnsafeCopyMemory page error: continue after ucm + UnsafeCopyMemoryMark ucmm(this, unsafe_copy, true); + __ align(OptoLoopAlignment); + Label L_small_loop; + __ BIND(L_small_loop); + store_one(tmp, to, bytes_per_count, forward, al, tmp2); + __ BIND(entry); // entry point + __ subs(count, count, 1); + load_one(tmp, from, bytes_per_count, forward, ge, tmp2); + __ b(L_small_loop, ge); + } } // Aligns 'to' by reading one word from 'from' and writting its part to 'to'. @@ -1876,7 +1887,7 @@ // // Scratches 'from', 'count', R3 and R12. // R4-R10 saved for use. - int align_dst_and_generate_shifted_copy_loop(Register from, Register to, Register count, int bytes_per_count, bool forward) { + int align_dst_and_generate_shifted_copy_loop(Register from, Register to, Register count, int bytes_per_count, bool forward, bool unsafe_copy = false) { const Register Rval = forward ? R12 : R3; // as generate_{forward,backward}_shifted_copy_loop expect @@ -1886,60 +1897,64 @@ // then the remainder of 'to' divided by wordSize is one of elements of {seq}. __ push(RegisterSet(R4,R10)); - load_one(Rval, from, wordSize, forward); - - switch (bytes_per_count) { - case 2: - min_copy = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 2, bytes_per_count, forward); - break; - case 1: - { - Label L1, L2, L3; - int min_copy1, min_copy2, min_copy3; - - Label L_loop_finished; - - if (forward) { - __ tbz(to, 0, L2); - __ tbz(to, 1, L1); - - __ BIND(L3); - min_copy3 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 3, bytes_per_count, forward); - __ b(L_loop_finished); - - __ BIND(L1); - min_copy1 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 1, bytes_per_count, forward); - __ b(L_loop_finished); - - __ BIND(L2); - min_copy2 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 2, bytes_per_count, forward); - } else { - __ tbz(to, 0, L2); - __ tbnz(to, 1, L3); - - __ BIND(L1); - min_copy1 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 1, bytes_per_count, forward); - __ b(L_loop_finished); - - __ BIND(L3); - min_copy3 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 3, bytes_per_count, forward); - __ b(L_loop_finished); - - __ BIND(L2); - min_copy2 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 2, bytes_per_count, forward); + + { + // UnsafeCopyMemory page error: continue after ucm + UnsafeCopyMemoryMark ucmm(this, unsafe_copy, true); + load_one(Rval, from, wordSize, forward); + + switch (bytes_per_count) { + case 2: + min_copy = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 2, bytes_per_count, forward); + break; + case 1: + { + Label L1, L2, L3; + int min_copy1, min_copy2, min_copy3; + + Label L_loop_finished; + + if (forward) { + __ tbz(to, 0, L2); + __ tbz(to, 1, L1); + + __ BIND(L3); + min_copy3 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 3, bytes_per_count, forward); + __ b(L_loop_finished); + + __ BIND(L1); + min_copy1 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 1, bytes_per_count, forward); + __ b(L_loop_finished); + + __ BIND(L2); + min_copy2 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 2, bytes_per_count, forward); + } else { + __ tbz(to, 0, L2); + __ tbnz(to, 1, L3); + + __ BIND(L1); + min_copy1 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 1, bytes_per_count, forward); + __ b(L_loop_finished); + + __ BIND(L3); + min_copy3 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 3, bytes_per_count, forward); + __ b(L_loop_finished); + + __ BIND(L2); + min_copy2 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 2, bytes_per_count, forward); + } + + min_copy = MAX2(MAX2(min_copy1, min_copy2), min_copy3); + + __ BIND(L_loop_finished); + + break; } - - min_copy = MAX2(MAX2(min_copy1, min_copy2), min_copy3); - - __ BIND(L_loop_finished); - - break; + default: + ShouldNotReachHere(); + break; } - default: - ShouldNotReachHere(); - break; } - __ pop(RegisterSet(R4,R10)); return min_copy; @@ -1963,6 +1978,13 @@ } #endif // !PRODUCT + address generate_unsafecopy_common_error_exit() { + address start_pc = __ pc(); + __ mov(R0, 0); + __ ret(); + return start_pc; + } + // // Generate stub for primitive array copy. If "aligned" is true, the // "from" and "to" addresses are assumed to be heapword aligned. @@ -2033,8 +2055,13 @@ from_is_aligned = true; } - int count_required_to_align = from_is_aligned ? 0 : align_src(from, to, count, tmp1, bytes_per_count, forward); - assert (small_copy_limit >= count_required_to_align, "alignment could exhaust count"); + int count_required_to_align = 0; + { + // UnsafeCopyMemoryMark page error: continue at UnsafeCopyMemory common_error_exit + UnsafeCopyMemoryMark ucmm(this, !aligned, false); + count_required_to_align = from_is_aligned ? 0 : align_src(from, to, count, tmp1, bytes_per_count, forward); + assert (small_copy_limit >= count_required_to_align, "alignment could exhaust count"); + } // now 'from' is aligned @@ -2064,9 +2091,9 @@ int min_copy; if (forward) { - min_copy = generate_forward_aligned_copy_loop (from, to, count, bytes_per_count); + min_copy = generate_forward_aligned_copy_loop(from, to, count, bytes_per_count, !aligned /*add UnsafeCopyMemory entry*/); } else { - min_copy = generate_backward_aligned_copy_loop(from, to, count, bytes_per_count); + min_copy = generate_backward_aligned_copy_loop(from, to, count, bytes_per_count, !aligned /*add UnsafeCopyMemory entry*/); } assert(small_copy_limit >= count_required_to_align + min_copy, "first loop might exhaust count"); @@ -2077,7 +2104,7 @@ __ ret(); { - copy_small_array(from, to, count, tmp1, tmp2, bytes_per_count, forward, L_small_array /* entry */); + copy_small_array(from, to, count, tmp1, tmp2, bytes_per_count, forward, L_small_array /* entry */, !aligned /*add UnsafeCopyMemory entry*/); if (status) { __ mov(R0, 0); // OK @@ -2088,7 +2115,7 @@ if (! to_is_aligned) { __ BIND(L_unaligned_dst); - int min_copy_shifted = align_dst_and_generate_shifted_copy_loop(from, to, count, bytes_per_count, forward); + int min_copy_shifted = align_dst_and_generate_shifted_copy_loop(from, to, count, bytes_per_count, forward, !aligned /*add UnsafeCopyMemory entry*/); assert (small_copy_limit >= count_required_to_align + min_copy_shifted, "first loop might exhaust count"); if (status) { @@ -2873,6 +2900,9 @@ status = true; // generate a status compatible with C1 calls #endif + address ucm_common_error_exit = generate_unsafecopy_common_error_exit(); + UnsafeCopyMemory::set_common_exit_stub_pc(ucm_common_error_exit); + // these need always status in case they are called from generic_arraycopy StubRoutines::_jbyte_disjoint_arraycopy = generate_primitive_copy(false, "jbyte_disjoint_arraycopy", true, 1, true); StubRoutines::_jshort_disjoint_arraycopy = generate_primitive_copy(false, "jshort_disjoint_arraycopy", true, 2, true); @@ -3055,6 +3085,10 @@ } }; // end class declaration +#define UCM_TABLE_MAX_ENTRIES 32 void StubGenerator_generate(CodeBuffer* code, bool all) { + if (UnsafeCopyMemory::_table == NULL) { + UnsafeCopyMemory::create_table(UCM_TABLE_MAX_ENTRIES); + } StubGenerator g(code, all); }
--- a/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp Mon Jul 01 14:57:02 2019 -0700 @@ -952,6 +952,20 @@ // need to copy backwards } + // This is common errorexit stub for UnsafeCopyMemory. + address generate_unsafecopy_common_error_exit() { + address start_pc = __ pc(); + Register tmp1 = R6_ARG4; + // probably copy stub would have changed value reset it. + if (VM_Version::has_mfdscr()) { + __ load_const_optimized(tmp1, VM_Version::_dscr_val); + __ mtdscr(tmp1); + } + __ li(R3_RET, 0); // return 0 + __ blr(); + return start_pc; + } + // The guideline in the implementations of generate_disjoint_xxx_copy // (xxx=byte,short,int,long,oop) is to copy as many elements as possible with // single instructions, but to avoid alignment interrupts (see subsequent @@ -989,150 +1003,154 @@ VectorSRegister tmp_vsr2 = VSR2; Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8, l_9, l_10; - - // Don't try anything fancy if arrays don't have many elements. - __ li(tmp3, 0); - __ cmpwi(CCR0, R5_ARG3, 17); - __ ble(CCR0, l_6); // copy 4 at a time - - if (!aligned) { - __ xorr(tmp1, R3_ARG1, R4_ARG2); - __ andi_(tmp1, tmp1, 3); - __ bne(CCR0, l_6); // If arrays don't have the same alignment mod 4, do 4 element copy. - - // Copy elements if necessary to align to 4 bytes. - __ neg(tmp1, R3_ARG1); // Compute distance to alignment boundary. - __ andi_(tmp1, tmp1, 3); - __ beq(CCR0, l_2); - - __ subf(R5_ARG3, tmp1, R5_ARG3); - __ bind(l_9); - __ lbz(tmp2, 0, R3_ARG1); - __ addic_(tmp1, tmp1, -1); - __ stb(tmp2, 0, R4_ARG2); - __ addi(R3_ARG1, R3_ARG1, 1); - __ addi(R4_ARG2, R4_ARG2, 1); - __ bne(CCR0, l_9); - - __ bind(l_2); - } - - // copy 8 elements at a time - __ xorr(tmp2, R3_ARG1, R4_ARG2); // skip if src & dest have differing alignment mod 8 - __ andi_(tmp1, tmp2, 7); - __ bne(CCR0, l_7); // not same alignment -> to or from is aligned -> copy 8 - - // copy a 2-element word if necessary to align to 8 bytes - __ andi_(R0, R3_ARG1, 7); - __ beq(CCR0, l_7); - - __ lwzx(tmp2, R3_ARG1, tmp3); - __ addi(R5_ARG3, R5_ARG3, -4); - __ stwx(tmp2, R4_ARG2, tmp3); - { // FasterArrayCopy - __ addi(R3_ARG1, R3_ARG1, 4); - __ addi(R4_ARG2, R4_ARG2, 4); - } - __ bind(l_7); - - { // FasterArrayCopy - __ cmpwi(CCR0, R5_ARG3, 31); - __ ble(CCR0, l_6); // copy 2 at a time if less than 32 elements remain - - __ srdi(tmp1, R5_ARG3, 5); - __ andi_(R5_ARG3, R5_ARG3, 31); - __ mtctr(tmp1); - - if (!VM_Version::has_vsx()) { - - __ bind(l_8); - // Use unrolled version for mass copying (copy 32 elements a time) - // Load feeding store gets zero latency on Power6, however not on Power5. - // Therefore, the following sequence is made for the good of both. - __ ld(tmp1, 0, R3_ARG1); - __ ld(tmp2, 8, R3_ARG1); - __ ld(tmp3, 16, R3_ARG1); - __ ld(tmp4, 24, R3_ARG1); - __ std(tmp1, 0, R4_ARG2); - __ std(tmp2, 8, R4_ARG2); - __ std(tmp3, 16, R4_ARG2); - __ std(tmp4, 24, R4_ARG2); - __ addi(R3_ARG1, R3_ARG1, 32); - __ addi(R4_ARG2, R4_ARG2, 32); - __ bdnz(l_8); - - } else { // Processor supports VSX, so use it to mass copy. - - // Prefetch the data into the L2 cache. - __ dcbt(R3_ARG1, 0); - - // If supported set DSCR pre-fetch to deepest. - if (VM_Version::has_mfdscr()) { - __ load_const_optimized(tmp2, VM_Version::_dscr_val | 7); - __ mtdscr(tmp2); + { + // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit + UnsafeCopyMemoryMark ucmm(this, !aligned, false); + + // Don't try anything fancy if arrays don't have many elements. + __ li(tmp3, 0); + __ cmpwi(CCR0, R5_ARG3, 17); + __ ble(CCR0, l_6); // copy 4 at a time + + if (!aligned) { + __ xorr(tmp1, R3_ARG1, R4_ARG2); + __ andi_(tmp1, tmp1, 3); + __ bne(CCR0, l_6); // If arrays don't have the same alignment mod 4, do 4 element copy. + + // Copy elements if necessary to align to 4 bytes. + __ neg(tmp1, R3_ARG1); // Compute distance to alignment boundary. + __ andi_(tmp1, tmp1, 3); + __ beq(CCR0, l_2); + + __ subf(R5_ARG3, tmp1, R5_ARG3); + __ bind(l_9); + __ lbz(tmp2, 0, R3_ARG1); + __ addic_(tmp1, tmp1, -1); + __ stb(tmp2, 0, R4_ARG2); + __ addi(R3_ARG1, R3_ARG1, 1); + __ addi(R4_ARG2, R4_ARG2, 1); + __ bne(CCR0, l_9); + + __ bind(l_2); + } + + // copy 8 elements at a time + __ xorr(tmp2, R3_ARG1, R4_ARG2); // skip if src & dest have differing alignment mod 8 + __ andi_(tmp1, tmp2, 7); + __ bne(CCR0, l_7); // not same alignment -> to or from is aligned -> copy 8 + + // copy a 2-element word if necessary to align to 8 bytes + __ andi_(R0, R3_ARG1, 7); + __ beq(CCR0, l_7); + + __ lwzx(tmp2, R3_ARG1, tmp3); + __ addi(R5_ARG3, R5_ARG3, -4); + __ stwx(tmp2, R4_ARG2, tmp3); + { // FasterArrayCopy + __ addi(R3_ARG1, R3_ARG1, 4); + __ addi(R4_ARG2, R4_ARG2, 4); } - - __ li(tmp1, 16); - - // Backbranch target aligned to 32-byte. Not 16-byte align as - // loop contains < 8 instructions that fit inside a single - // i-cache sector. - __ align(32); - - __ bind(l_10); - // Use loop with VSX load/store instructions to - // copy 32 elements a time. - __ lxvd2x(tmp_vsr1, R3_ARG1); // Load src - __ stxvd2x(tmp_vsr1, R4_ARG2); // Store to dst - __ lxvd2x(tmp_vsr2, tmp1, R3_ARG1); // Load src + 16 - __ stxvd2x(tmp_vsr2, tmp1, R4_ARG2); // Store to dst + 16 - __ addi(R3_ARG1, R3_ARG1, 32); // Update src+=32 - __ addi(R4_ARG2, R4_ARG2, 32); // Update dsc+=32 - __ bdnz(l_10); // Dec CTR and loop if not zero. - - // Restore DSCR pre-fetch value. - if (VM_Version::has_mfdscr()) { - __ load_const_optimized(tmp2, VM_Version::_dscr_val); - __ mtdscr(tmp2); + __ bind(l_7); + + { // FasterArrayCopy + __ cmpwi(CCR0, R5_ARG3, 31); + __ ble(CCR0, l_6); // copy 2 at a time if less than 32 elements remain + + __ srdi(tmp1, R5_ARG3, 5); + __ andi_(R5_ARG3, R5_ARG3, 31); + __ mtctr(tmp1); + + if (!VM_Version::has_vsx()) { + + __ bind(l_8); + // Use unrolled version for mass copying (copy 32 elements a time) + // Load feeding store gets zero latency on Power6, however not on Power5. + // Therefore, the following sequence is made for the good of both. + __ ld(tmp1, 0, R3_ARG1); + __ ld(tmp2, 8, R3_ARG1); + __ ld(tmp3, 16, R3_ARG1); + __ ld(tmp4, 24, R3_ARG1); + __ std(tmp1, 0, R4_ARG2); + __ std(tmp2, 8, R4_ARG2); + __ std(tmp3, 16, R4_ARG2); + __ std(tmp4, 24, R4_ARG2); + __ addi(R3_ARG1, R3_ARG1, 32); + __ addi(R4_ARG2, R4_ARG2, 32); + __ bdnz(l_8); + + } else { // Processor supports VSX, so use it to mass copy. + + // Prefetch the data into the L2 cache. + __ dcbt(R3_ARG1, 0); + + // If supported set DSCR pre-fetch to deepest. + if (VM_Version::has_mfdscr()) { + __ load_const_optimized(tmp2, VM_Version::_dscr_val | 7); + __ mtdscr(tmp2); + } + + __ li(tmp1, 16); + + // Backbranch target aligned to 32-byte. Not 16-byte align as + // loop contains < 8 instructions that fit inside a single + // i-cache sector. + __ align(32); + + __ bind(l_10); + // Use loop with VSX load/store instructions to + // copy 32 elements a time. + __ lxvd2x(tmp_vsr1, R3_ARG1); // Load src + __ stxvd2x(tmp_vsr1, R4_ARG2); // Store to dst + __ lxvd2x(tmp_vsr2, tmp1, R3_ARG1); // Load src + 16 + __ stxvd2x(tmp_vsr2, tmp1, R4_ARG2); // Store to dst + 16 + __ addi(R3_ARG1, R3_ARG1, 32); // Update src+=32 + __ addi(R4_ARG2, R4_ARG2, 32); // Update dsc+=32 + __ bdnz(l_10); // Dec CTR and loop if not zero. + + // Restore DSCR pre-fetch value. + if (VM_Version::has_mfdscr()) { + __ load_const_optimized(tmp2, VM_Version::_dscr_val); + __ mtdscr(tmp2); + } + + } // VSX + } // FasterArrayCopy + + __ bind(l_6); + + // copy 4 elements at a time + __ cmpwi(CCR0, R5_ARG3, 4); + __ blt(CCR0, l_1); + __ srdi(tmp1, R5_ARG3, 2); + __ mtctr(tmp1); // is > 0 + __ andi_(R5_ARG3, R5_ARG3, 3); + + { // FasterArrayCopy + __ addi(R3_ARG1, R3_ARG1, -4); + __ addi(R4_ARG2, R4_ARG2, -4); + __ bind(l_3); + __ lwzu(tmp2, 4, R3_ARG1); + __ stwu(tmp2, 4, R4_ARG2); + __ bdnz(l_3); + __ addi(R3_ARG1, R3_ARG1, 4); + __ addi(R4_ARG2, R4_ARG2, 4); } - } // VSX - } // FasterArrayCopy - - __ bind(l_6); - - // copy 4 elements at a time - __ cmpwi(CCR0, R5_ARG3, 4); - __ blt(CCR0, l_1); - __ srdi(tmp1, R5_ARG3, 2); - __ mtctr(tmp1); // is > 0 - __ andi_(R5_ARG3, R5_ARG3, 3); - - { // FasterArrayCopy - __ addi(R3_ARG1, R3_ARG1, -4); - __ addi(R4_ARG2, R4_ARG2, -4); - __ bind(l_3); - __ lwzu(tmp2, 4, R3_ARG1); - __ stwu(tmp2, 4, R4_ARG2); - __ bdnz(l_3); - __ addi(R3_ARG1, R3_ARG1, 4); - __ addi(R4_ARG2, R4_ARG2, 4); - } - - // do single element copy - __ bind(l_1); - __ cmpwi(CCR0, R5_ARG3, 0); - __ beq(CCR0, l_4); - - { // FasterArrayCopy - __ mtctr(R5_ARG3); - __ addi(R3_ARG1, R3_ARG1, -1); - __ addi(R4_ARG2, R4_ARG2, -1); - - __ bind(l_5); - __ lbzu(tmp2, 1, R3_ARG1); - __ stbu(tmp2, 1, R4_ARG2); - __ bdnz(l_5); + // do single element copy + __ bind(l_1); + __ cmpwi(CCR0, R5_ARG3, 0); + __ beq(CCR0, l_4); + + { // FasterArrayCopy + __ mtctr(R5_ARG3); + __ addi(R3_ARG1, R3_ARG1, -1); + __ addi(R4_ARG2, R4_ARG2, -1); + + __ bind(l_5); + __ lbzu(tmp2, 1, R3_ARG1); + __ stbu(tmp2, 1, R4_ARG2); + __ bdnz(l_5); + } } __ bind(l_4); @@ -1167,15 +1185,17 @@ // Do reverse copy. We assume the case of actual overlap is rare enough // that we don't have to optimize it. Label l_1, l_2; - - __ b(l_2); - __ bind(l_1); - __ stbx(tmp1, R4_ARG2, R5_ARG3); - __ bind(l_2); - __ addic_(R5_ARG3, R5_ARG3, -1); - __ lbzx(tmp1, R3_ARG1, R5_ARG3); - __ bge(CCR0, l_1); - + { + // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit + UnsafeCopyMemoryMark ucmm(this, !aligned, false); + __ b(l_2); + __ bind(l_1); + __ stbx(tmp1, R4_ARG2, R5_ARG3); + __ bind(l_2); + __ addic_(R5_ARG3, R5_ARG3, -1); + __ lbzx(tmp1, R3_ARG1, R5_ARG3); + __ bge(CCR0, l_1); + } __ li(R3_RET, 0); // return 0 __ blr(); @@ -1252,155 +1272,159 @@ assert_positive_int(R5_ARG3); Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8, l_9; - - // don't try anything fancy if arrays don't have many elements - __ li(tmp3, 0); - __ cmpwi(CCR0, R5_ARG3, 9); - __ ble(CCR0, l_6); // copy 2 at a time - - if (!aligned) { - __ xorr(tmp1, R3_ARG1, R4_ARG2); - __ andi_(tmp1, tmp1, 3); - __ bne(CCR0, l_6); // if arrays don't have the same alignment mod 4, do 2 element copy - - // At this point it is guaranteed that both, from and to have the same alignment mod 4. - - // Copy 1 element if necessary to align to 4 bytes. - __ andi_(tmp1, R3_ARG1, 3); - __ beq(CCR0, l_2); - - __ lhz(tmp2, 0, R3_ARG1); - __ addi(R3_ARG1, R3_ARG1, 2); - __ sth(tmp2, 0, R4_ARG2); - __ addi(R4_ARG2, R4_ARG2, 2); - __ addi(R5_ARG3, R5_ARG3, -1); - __ bind(l_2); - - // At this point the positions of both, from and to, are at least 4 byte aligned. - - // Copy 4 elements at a time. - // Align to 8 bytes, but only if both, from and to, have same alignment mod 8. - __ xorr(tmp2, R3_ARG1, R4_ARG2); - __ andi_(tmp1, tmp2, 7); - __ bne(CCR0, l_7); // not same alignment mod 8 -> copy 4, either from or to will be unaligned - - // Copy a 2-element word if necessary to align to 8 bytes. - __ andi_(R0, R3_ARG1, 7); - __ beq(CCR0, l_7); - - __ lwzx(tmp2, R3_ARG1, tmp3); - __ addi(R5_ARG3, R5_ARG3, -2); - __ stwx(tmp2, R4_ARG2, tmp3); + { + // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit + UnsafeCopyMemoryMark ucmm(this, !aligned, false); + // don't try anything fancy if arrays don't have many elements + __ li(tmp3, 0); + __ cmpwi(CCR0, R5_ARG3, 9); + __ ble(CCR0, l_6); // copy 2 at a time + + if (!aligned) { + __ xorr(tmp1, R3_ARG1, R4_ARG2); + __ andi_(tmp1, tmp1, 3); + __ bne(CCR0, l_6); // if arrays don't have the same alignment mod 4, do 2 element copy + + // At this point it is guaranteed that both, from and to have the same alignment mod 4. + + // Copy 1 element if necessary to align to 4 bytes. + __ andi_(tmp1, R3_ARG1, 3); + __ beq(CCR0, l_2); + + __ lhz(tmp2, 0, R3_ARG1); + __ addi(R3_ARG1, R3_ARG1, 2); + __ sth(tmp2, 0, R4_ARG2); + __ addi(R4_ARG2, R4_ARG2, 2); + __ addi(R5_ARG3, R5_ARG3, -1); + __ bind(l_2); + + // At this point the positions of both, from and to, are at least 4 byte aligned. + + // Copy 4 elements at a time. + // Align to 8 bytes, but only if both, from and to, have same alignment mod 8. + __ xorr(tmp2, R3_ARG1, R4_ARG2); + __ andi_(tmp1, tmp2, 7); + __ bne(CCR0, l_7); // not same alignment mod 8 -> copy 4, either from or to will be unaligned + + // Copy a 2-element word if necessary to align to 8 bytes. + __ andi_(R0, R3_ARG1, 7); + __ beq(CCR0, l_7); + + __ lwzx(tmp2, R3_ARG1, tmp3); + __ addi(R5_ARG3, R5_ARG3, -2); + __ stwx(tmp2, R4_ARG2, tmp3); + { // FasterArrayCopy + __ addi(R3_ARG1, R3_ARG1, 4); + __ addi(R4_ARG2, R4_ARG2, 4); + } + } + + __ bind(l_7); + + // Copy 4 elements at a time; either the loads or the stores can + // be unaligned if aligned == false. + { // FasterArrayCopy + __ cmpwi(CCR0, R5_ARG3, 15); + __ ble(CCR0, l_6); // copy 2 at a time if less than 16 elements remain + + __ srdi(tmp1, R5_ARG3, 4); + __ andi_(R5_ARG3, R5_ARG3, 15); + __ mtctr(tmp1); + + if (!VM_Version::has_vsx()) { + + __ bind(l_8); + // Use unrolled version for mass copying (copy 16 elements a time). + // Load feeding store gets zero latency on Power6, however not on Power5. + // Therefore, the following sequence is made for the good of both. + __ ld(tmp1, 0, R3_ARG1); + __ ld(tmp2, 8, R3_ARG1); + __ ld(tmp3, 16, R3_ARG1); + __ ld(tmp4, 24, R3_ARG1); + __ std(tmp1, 0, R4_ARG2); + __ std(tmp2, 8, R4_ARG2); + __ std(tmp3, 16, R4_ARG2); + __ std(tmp4, 24, R4_ARG2); + __ addi(R3_ARG1, R3_ARG1, 32); + __ addi(R4_ARG2, R4_ARG2, 32); + __ bdnz(l_8); + + } else { // Processor supports VSX, so use it to mass copy. + + // Prefetch src data into L2 cache. + __ dcbt(R3_ARG1, 0); + + // If supported set DSCR pre-fetch to deepest. + if (VM_Version::has_mfdscr()) { + __ load_const_optimized(tmp2, VM_Version::_dscr_val | 7); + __ mtdscr(tmp2); + } + __ li(tmp1, 16); + + // Backbranch target aligned to 32-byte. It's not aligned 16-byte + // as loop contains < 8 instructions that fit inside a single + // i-cache sector. + __ align(32); + + __ bind(l_9); + // Use loop with VSX load/store instructions to + // copy 16 elements a time. + __ lxvd2x(tmp_vsr1, R3_ARG1); // Load from src. + __ stxvd2x(tmp_vsr1, R4_ARG2); // Store to dst. + __ lxvd2x(tmp_vsr2, R3_ARG1, tmp1); // Load from src + 16. + __ stxvd2x(tmp_vsr2, R4_ARG2, tmp1); // Store to dst + 16. + __ addi(R3_ARG1, R3_ARG1, 32); // Update src+=32. + __ addi(R4_ARG2, R4_ARG2, 32); // Update dsc+=32. + __ bdnz(l_9); // Dec CTR and loop if not zero. + + // Restore DSCR pre-fetch value. + if (VM_Version::has_mfdscr()) { + __ load_const_optimized(tmp2, VM_Version::_dscr_val); + __ mtdscr(tmp2); + } + + } + } // FasterArrayCopy + __ bind(l_6); + + // copy 2 elements at a time + { // FasterArrayCopy + __ cmpwi(CCR0, R5_ARG3, 2); + __ blt(CCR0, l_1); + __ srdi(tmp1, R5_ARG3, 1); + __ andi_(R5_ARG3, R5_ARG3, 1); + + __ addi(R3_ARG1, R3_ARG1, -4); + __ addi(R4_ARG2, R4_ARG2, -4); + __ mtctr(tmp1); + + __ bind(l_3); + __ lwzu(tmp2, 4, R3_ARG1); + __ stwu(tmp2, 4, R4_ARG2); + __ bdnz(l_3); + __ addi(R3_ARG1, R3_ARG1, 4); __ addi(R4_ARG2, R4_ARG2, 4); } + + // do single element copy + __ bind(l_1); + __ cmpwi(CCR0, R5_ARG3, 0); + __ beq(CCR0, l_4); + + { // FasterArrayCopy + __ mtctr(R5_ARG3); + __ addi(R3_ARG1, R3_ARG1, -2); + __ addi(R4_ARG2, R4_ARG2, -2); + + __ bind(l_5); + __ lhzu(tmp2, 2, R3_ARG1); + __ sthu(tmp2, 2, R4_ARG2); + __ bdnz(l_5); + } } - __ bind(l_7); - - // Copy 4 elements at a time; either the loads or the stores can - // be unaligned if aligned == false. - - { // FasterArrayCopy - __ cmpwi(CCR0, R5_ARG3, 15); - __ ble(CCR0, l_6); // copy 2 at a time if less than 16 elements remain - - __ srdi(tmp1, R5_ARG3, 4); - __ andi_(R5_ARG3, R5_ARG3, 15); - __ mtctr(tmp1); - - if (!VM_Version::has_vsx()) { - - __ bind(l_8); - // Use unrolled version for mass copying (copy 16 elements a time). - // Load feeding store gets zero latency on Power6, however not on Power5. - // Therefore, the following sequence is made for the good of both. - __ ld(tmp1, 0, R3_ARG1); - __ ld(tmp2, 8, R3_ARG1); - __ ld(tmp3, 16, R3_ARG1); - __ ld(tmp4, 24, R3_ARG1); - __ std(tmp1, 0, R4_ARG2); - __ std(tmp2, 8, R4_ARG2); - __ std(tmp3, 16, R4_ARG2); - __ std(tmp4, 24, R4_ARG2); - __ addi(R3_ARG1, R3_ARG1, 32); - __ addi(R4_ARG2, R4_ARG2, 32); - __ bdnz(l_8); - - } else { // Processor supports VSX, so use it to mass copy. - - // Prefetch src data into L2 cache. - __ dcbt(R3_ARG1, 0); - - // If supported set DSCR pre-fetch to deepest. - if (VM_Version::has_mfdscr()) { - __ load_const_optimized(tmp2, VM_Version::_dscr_val | 7); - __ mtdscr(tmp2); - } - __ li(tmp1, 16); - - // Backbranch target aligned to 32-byte. It's not aligned 16-byte - // as loop contains < 8 instructions that fit inside a single - // i-cache sector. - __ align(32); - - __ bind(l_9); - // Use loop with VSX load/store instructions to - // copy 16 elements a time. - __ lxvd2x(tmp_vsr1, R3_ARG1); // Load from src. - __ stxvd2x(tmp_vsr1, R4_ARG2); // Store to dst. - __ lxvd2x(tmp_vsr2, R3_ARG1, tmp1); // Load from src + 16. - __ stxvd2x(tmp_vsr2, R4_ARG2, tmp1); // Store to dst + 16. - __ addi(R3_ARG1, R3_ARG1, 32); // Update src+=32. - __ addi(R4_ARG2, R4_ARG2, 32); // Update dsc+=32. - __ bdnz(l_9); // Dec CTR and loop if not zero. - - // Restore DSCR pre-fetch value. - if (VM_Version::has_mfdscr()) { - __ load_const_optimized(tmp2, VM_Version::_dscr_val); - __ mtdscr(tmp2); - } - - } - } // FasterArrayCopy - __ bind(l_6); - - // copy 2 elements at a time - { // FasterArrayCopy - __ cmpwi(CCR0, R5_ARG3, 2); - __ blt(CCR0, l_1); - __ srdi(tmp1, R5_ARG3, 1); - __ andi_(R5_ARG3, R5_ARG3, 1); - - __ addi(R3_ARG1, R3_ARG1, -4); - __ addi(R4_ARG2, R4_ARG2, -4); - __ mtctr(tmp1); - - __ bind(l_3); - __ lwzu(tmp2, 4, R3_ARG1); - __ stwu(tmp2, 4, R4_ARG2); - __ bdnz(l_3); - - __ addi(R3_ARG1, R3_ARG1, 4); - __ addi(R4_ARG2, R4_ARG2, 4); - } - - // do single element copy - __ bind(l_1); - __ cmpwi(CCR0, R5_ARG3, 0); - __ beq(CCR0, l_4); - - { // FasterArrayCopy - __ mtctr(R5_ARG3); - __ addi(R3_ARG1, R3_ARG1, -2); - __ addi(R4_ARG2, R4_ARG2, -2); - - __ bind(l_5); - __ lhzu(tmp2, 2, R3_ARG1); - __ sthu(tmp2, 2, R4_ARG2); - __ bdnz(l_5); - } __ bind(l_4); __ li(R3_RET, 0); // return 0 __ blr(); @@ -1432,15 +1456,18 @@ array_overlap_test(nooverlap_target, 1); Label l_1, l_2; - __ sldi(tmp1, R5_ARG3, 1); - __ b(l_2); - __ bind(l_1); - __ sthx(tmp2, R4_ARG2, tmp1); - __ bind(l_2); - __ addic_(tmp1, tmp1, -2); - __ lhzx(tmp2, R3_ARG1, tmp1); - __ bge(CCR0, l_1); - + { + // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit + UnsafeCopyMemoryMark ucmm(this, !aligned, false); + __ sldi(tmp1, R5_ARG3, 1); + __ b(l_2); + __ bind(l_1); + __ sthx(tmp2, R4_ARG2, tmp1); + __ bind(l_2); + __ addic_(tmp1, tmp1, -2); + __ lhzx(tmp2, R3_ARG1, tmp1); + __ bge(CCR0, l_1); + } __ li(R3_RET, 0); // return 0 __ blr(); @@ -1588,7 +1615,11 @@ StubCodeMark mark(this, "StubRoutines", name); address start = __ function_entry(); assert_positive_int(R5_ARG3); - generate_disjoint_int_copy_core(aligned); + { + // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit + UnsafeCopyMemoryMark ucmm(this, !aligned, false); + generate_disjoint_int_copy_core(aligned); + } __ li(R3_RET, 0); // return 0 __ blr(); return start; @@ -1736,8 +1767,11 @@ STUB_ENTRY(jint_disjoint_arraycopy); array_overlap_test(nooverlap_target, 2); - - generate_conjoint_int_copy_core(aligned); + { + // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit + UnsafeCopyMemoryMark ucmm(this, !aligned, false); + generate_conjoint_int_copy_core(aligned); + } __ li(R3_RET, 0); // return 0 __ blr(); @@ -1859,11 +1893,15 @@ StubCodeMark mark(this, "StubRoutines", name); address start = __ function_entry(); assert_positive_int(R5_ARG3); - generate_disjoint_long_copy_core(aligned); + { + // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit + UnsafeCopyMemoryMark ucmm(this, !aligned, false); + generate_disjoint_long_copy_core(aligned); + } __ li(R3_RET, 0); // return 0 __ blr(); - return start; + return start; } // Generate core code for conjoint long copy (and oop copy on @@ -1986,8 +2024,11 @@ STUB_ENTRY(jlong_disjoint_arraycopy); array_overlap_test(nooverlap_target, 3); - generate_conjoint_long_copy_core(aligned); - + { + // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit + UnsafeCopyMemoryMark ucmm(this, !aligned, false); + generate_conjoint_long_copy_core(aligned); + } __ li(R3_RET, 0); // return 0 __ blr(); @@ -3008,6 +3049,9 @@ // Note: the disjoint stubs must be generated first, some of // the conjoint stubs use them. + address ucm_common_error_exit = generate_unsafecopy_common_error_exit(); + UnsafeCopyMemory::set_common_exit_stub_pc(ucm_common_error_exit); + // non-aligned disjoint versions StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, "jbyte_disjoint_arraycopy"); StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy"); @@ -3579,6 +3623,10 @@ } }; +#define UCM_TABLE_MAX_ENTRIES 8 void StubGenerator_generate(CodeBuffer* code, bool all) { + if (UnsafeCopyMemory::_table == NULL) { + UnsafeCopyMemory::create_table(UCM_TABLE_MAX_ENTRIES); + } StubGenerator g(code, all); }
--- a/src/hotspot/cpu/sparc/stubGenerator_sparc.cpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/cpu/sparc/stubGenerator_sparc.cpp Mon Jul 01 14:57:02 2019 -0700 @@ -1076,6 +1076,17 @@ __ delayed()->add(end_from, left_shift, end_from); // restore address } + address generate_unsafecopy_common_error_exit() { + address start_pc = __ pc(); + if (UseBlockCopy) { + __ wrasi(G0, Assembler::ASI_PRIMARY_NOFAULT); + __ membar(Assembler::StoreLoad); + } + __ retl(); + __ delayed()->mov(G0, O0); // return 0 + return start_pc; + } + // // Generate stub for disjoint byte copy. If "aligned" is true, the // "from" and "to" addresses are assumed to be heapword aligned. @@ -1107,61 +1118,66 @@ BLOCK_COMMENT("Entry:"); } - // for short arrays, just do single element copy - __ cmp(count, 23); // 16 + 7 - __ brx(Assembler::less, false, Assembler::pn, L_copy_byte); - __ delayed()->mov(G0, offset); - - if (aligned) { - // 'aligned' == true when it is known statically during compilation - // of this arraycopy call site that both 'from' and 'to' addresses - // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()). - // - // Aligned arrays have 4 bytes alignment in 32-bits VM - // and 8 bytes - in 64-bits VM. So we do it only for 32-bits VM - // - } else { - // copy bytes to align 'to' on 8 byte boundary - __ andcc(to, 7, G1); // misaligned bytes - __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); - __ delayed()->neg(G1); - __ inc(G1, 8); // bytes need to copy to next 8-bytes alignment - __ sub(count, G1, count); - __ BIND(L_align); - __ ldub(from, 0, O3); - __ deccc(G1); - __ inc(from); - __ stb(O3, to, 0); - __ br(Assembler::notZero, false, Assembler::pt, L_align); - __ delayed()->inc(to); - __ BIND(L_skip_alignment); - } - if (!aligned) { - // Copy with shift 16 bytes per iteration if arrays do not have - // the same alignment mod 8, otherwise fall through to the next - // code for aligned copy. - // The compare above (count >= 23) guarantes 'count' >= 16 bytes. - // Also jump over aligned copy after the copy with shift completed. - - copy_16_bytes_forward_with_shift(from, to, count, 0, L_copy_byte); - } - - // Both array are 8 bytes aligned, copy 16 bytes at a time + { + // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit + UnsafeCopyMemoryMark ucmm(this, !aligned, false); + + // for short arrays, just do single element copy + __ cmp(count, 23); // 16 + 7 + __ brx(Assembler::less, false, Assembler::pn, L_copy_byte); + __ delayed()->mov(G0, offset); + + if (aligned) { + // 'aligned' == true when it is known statically during compilation + // of this arraycopy call site that both 'from' and 'to' addresses + // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()). + // + // Aligned arrays have 4 bytes alignment in 32-bits VM + // and 8 bytes - in 64-bits VM. So we do it only for 32-bits VM + // + } else { + // copy bytes to align 'to' on 8 byte boundary + __ andcc(to, 7, G1); // misaligned bytes + __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); + __ delayed()->neg(G1); + __ inc(G1, 8); // bytes need to copy to next 8-bytes alignment + __ sub(count, G1, count); + __ BIND(L_align); + __ ldub(from, 0, O3); + __ deccc(G1); + __ inc(from); + __ stb(O3, to, 0); + __ br(Assembler::notZero, false, Assembler::pt, L_align); + __ delayed()->inc(to); + __ BIND(L_skip_alignment); + } + if (!aligned) { + // Copy with shift 16 bytes per iteration if arrays do not have + // the same alignment mod 8, otherwise fall through to the next + // code for aligned copy. + // The compare above (count >= 23) guarantes 'count' >= 16 bytes. + // Also jump over aligned copy after the copy with shift completed. + + copy_16_bytes_forward_with_shift(from, to, count, 0, L_copy_byte); + } + + // Both array are 8 bytes aligned, copy 16 bytes at a time __ and3(count, 7, G4); // Save count __ srl(count, 3, count); - generate_disjoint_long_copy_core(aligned); + generate_disjoint_long_copy_core(aligned); __ mov(G4, count); // Restore count - // copy tailing bytes - __ BIND(L_copy_byte); - __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); - __ align(OptoLoopAlignment); - __ BIND(L_copy_byte_loop); - __ ldub(from, offset, O3); - __ deccc(count); - __ stb(O3, to, offset); - __ brx(Assembler::notZero, false, Assembler::pt, L_copy_byte_loop); - __ delayed()->inc(offset); + // copy tailing bytes + __ BIND(L_copy_byte); + __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); + __ align(OptoLoopAlignment); + __ BIND(L_copy_byte_loop); + __ ldub(from, offset, O3); + __ deccc(count); + __ stb(O3, to, offset); + __ brx(Assembler::notZero, false, Assembler::pt, L_copy_byte_loop); + __ delayed()->inc(offset); + } __ BIND(L_exit); // O3, O4 are used as temp registers @@ -1207,70 +1223,75 @@ array_overlap_test(nooverlap_target, 0); - __ add(to, count, end_to); // offset after last copied element - - // for short arrays, just do single element copy - __ cmp(count, 23); // 16 + 7 - __ brx(Assembler::less, false, Assembler::pn, L_copy_byte); - __ delayed()->add(from, count, end_from); - { - // Align end of arrays since they could be not aligned even - // when arrays itself are aligned. - - // copy bytes to align 'end_to' on 8 byte boundary - __ andcc(end_to, 7, G1); // misaligned bytes - __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); - __ delayed()->nop(); - __ sub(count, G1, count); - __ BIND(L_align); - __ dec(end_from); - __ dec(end_to); - __ ldub(end_from, 0, O3); - __ deccc(G1); - __ brx(Assembler::notZero, false, Assembler::pt, L_align); - __ delayed()->stb(O3, end_to, 0); - __ BIND(L_skip_alignment); + // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit + UnsafeCopyMemoryMark ucmm(this, !aligned, false); + + __ add(to, count, end_to); // offset after last copied element + + // for short arrays, just do single element copy + __ cmp(count, 23); // 16 + 7 + __ brx(Assembler::less, false, Assembler::pn, L_copy_byte); + __ delayed()->add(from, count, end_from); + + { + // Align end of arrays since they could be not aligned even + // when arrays itself are aligned. + + // copy bytes to align 'end_to' on 8 byte boundary + __ andcc(end_to, 7, G1); // misaligned bytes + __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); + __ delayed()->nop(); + __ sub(count, G1, count); + __ BIND(L_align); + __ dec(end_from); + __ dec(end_to); + __ ldub(end_from, 0, O3); + __ deccc(G1); + __ brx(Assembler::notZero, false, Assembler::pt, L_align); + __ delayed()->stb(O3, end_to, 0); + __ BIND(L_skip_alignment); + } + if (aligned) { + // Both arrays are aligned to 8-bytes in 64-bits VM. + // The 'count' is decremented in copy_16_bytes_backward_with_shift() + // in unaligned case. + __ dec(count, 16); + } else { + // Copy with shift 16 bytes per iteration if arrays do not have + // the same alignment mod 8, otherwise jump to the next + // code for aligned copy (and substracting 16 from 'count' before jump). + // The compare above (count >= 11) guarantes 'count' >= 16 bytes. + // Also jump over aligned copy after the copy with shift completed. + + copy_16_bytes_backward_with_shift(end_from, end_to, count, 16, + L_aligned_copy, L_copy_byte); + } + // copy 4 elements (16 bytes) at a time + __ align(OptoLoopAlignment); + __ BIND(L_aligned_copy); + __ dec(end_from, 16); + __ ldx(end_from, 8, O3); + __ ldx(end_from, 0, O4); + __ dec(end_to, 16); + __ deccc(count, 16); + __ stx(O3, end_to, 8); + __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy); + __ delayed()->stx(O4, end_to, 0); + __ inc(count, 16); + + // copy 1 element (2 bytes) at a time + __ BIND(L_copy_byte); + __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); + __ align(OptoLoopAlignment); + __ BIND(L_copy_byte_loop); + __ dec(end_from); + __ dec(end_to); + __ ldub(end_from, 0, O4); + __ deccc(count); + __ brx(Assembler::greater, false, Assembler::pt, L_copy_byte_loop); + __ delayed()->stb(O4, end_to, 0); } - if (aligned) { - // Both arrays are aligned to 8-bytes in 64-bits VM. - // The 'count' is decremented in copy_16_bytes_backward_with_shift() - // in unaligned case. - __ dec(count, 16); - } else { - // Copy with shift 16 bytes per iteration if arrays do not have - // the same alignment mod 8, otherwise jump to the next - // code for aligned copy (and substracting 16 from 'count' before jump). - // The compare above (count >= 11) guarantes 'count' >= 16 bytes. - // Also jump over aligned copy after the copy with shift completed. - - copy_16_bytes_backward_with_shift(end_from, end_to, count, 16, - L_aligned_copy, L_copy_byte); - } - // copy 4 elements (16 bytes) at a time - __ align(OptoLoopAlignment); - __ BIND(L_aligned_copy); - __ dec(end_from, 16); - __ ldx(end_from, 8, O3); - __ ldx(end_from, 0, O4); - __ dec(end_to, 16); - __ deccc(count, 16); - __ stx(O3, end_to, 8); - __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy); - __ delayed()->stx(O4, end_to, 0); - __ inc(count, 16); - - // copy 1 element (2 bytes) at a time - __ BIND(L_copy_byte); - __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); - __ align(OptoLoopAlignment); - __ BIND(L_copy_byte_loop); - __ dec(end_from); - __ dec(end_to); - __ ldub(end_from, 0, O4); - __ deccc(count); - __ brx(Assembler::greater, false, Assembler::pt, L_copy_byte_loop); - __ delayed()->stb(O4, end_to, 0); __ BIND(L_exit); // O3, O4 are used as temp registers @@ -1311,68 +1332,72 @@ BLOCK_COMMENT("Entry:"); } - // for short arrays, just do single element copy - __ cmp(count, 11); // 8 + 3 (22 bytes) - __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes); - __ delayed()->mov(G0, offset); - - if (aligned) { - // 'aligned' == true when it is known statically during compilation - // of this arraycopy call site that both 'from' and 'to' addresses - // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()). - // - // Aligned arrays have 4 bytes alignment in 32-bits VM - // and 8 bytes - in 64-bits VM. - // - } else { - // copy 1 element if necessary to align 'to' on an 4 bytes - __ andcc(to, 3, G0); - __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); - __ delayed()->lduh(from, 0, O3); - __ inc(from, 2); - __ inc(to, 2); - __ dec(count); - __ sth(O3, to, -2); - __ BIND(L_skip_alignment); - - // copy 2 elements to align 'to' on an 8 byte boundary - __ andcc(to, 7, G0); - __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2); - __ delayed()->lduh(from, 0, O3); - __ dec(count, 2); - __ lduh(from, 2, O4); - __ inc(from, 4); - __ inc(to, 4); - __ sth(O3, to, -4); - __ sth(O4, to, -2); - __ BIND(L_skip_alignment2); + { + // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit + UnsafeCopyMemoryMark ucmm(this, !aligned, false); + // for short arrays, just do single element copy + __ cmp(count, 11); // 8 + 3 (22 bytes) + __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes); + __ delayed()->mov(G0, offset); + + if (aligned) { + // 'aligned' == true when it is known statically during compilation + // of this arraycopy call site that both 'from' and 'to' addresses + // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()). + // + // Aligned arrays have 4 bytes alignment in 32-bits VM + // and 8 bytes - in 64-bits VM. + // + } else { + // copy 1 element if necessary to align 'to' on an 4 bytes + __ andcc(to, 3, G0); + __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); + __ delayed()->lduh(from, 0, O3); + __ inc(from, 2); + __ inc(to, 2); + __ dec(count); + __ sth(O3, to, -2); + __ BIND(L_skip_alignment); + + // copy 2 elements to align 'to' on an 8 byte boundary + __ andcc(to, 7, G0); + __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2); + __ delayed()->lduh(from, 0, O3); + __ dec(count, 2); + __ lduh(from, 2, O4); + __ inc(from, 4); + __ inc(to, 4); + __ sth(O3, to, -4); + __ sth(O4, to, -2); + __ BIND(L_skip_alignment2); + } + if (!aligned) { + // Copy with shift 16 bytes per iteration if arrays do not have + // the same alignment mod 8, otherwise fall through to the next + // code for aligned copy. + // The compare above (count >= 11) guarantes 'count' >= 16 bytes. + // Also jump over aligned copy after the copy with shift completed. + + copy_16_bytes_forward_with_shift(from, to, count, 1, L_copy_2_bytes); + } + + // Both array are 8 bytes aligned, copy 16 bytes at a time + __ and3(count, 3, G4); // Save + __ srl(count, 2, count); + generate_disjoint_long_copy_core(aligned); + __ mov(G4, count); // restore + + // copy 1 element at a time + __ BIND(L_copy_2_bytes); + __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); + __ align(OptoLoopAlignment); + __ BIND(L_copy_2_bytes_loop); + __ lduh(from, offset, O3); + __ deccc(count); + __ sth(O3, to, offset); + __ brx(Assembler::notZero, false, Assembler::pt, L_copy_2_bytes_loop); + __ delayed()->inc(offset, 2); } - if (!aligned) { - // Copy with shift 16 bytes per iteration if arrays do not have - // the same alignment mod 8, otherwise fall through to the next - // code for aligned copy. - // The compare above (count >= 11) guarantes 'count' >= 16 bytes. - // Also jump over aligned copy after the copy with shift completed. - - copy_16_bytes_forward_with_shift(from, to, count, 1, L_copy_2_bytes); - } - - // Both array are 8 bytes aligned, copy 16 bytes at a time - __ and3(count, 3, G4); // Save - __ srl(count, 2, count); - generate_disjoint_long_copy_core(aligned); - __ mov(G4, count); // restore - - // copy 1 element at a time - __ BIND(L_copy_2_bytes); - __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); - __ align(OptoLoopAlignment); - __ BIND(L_copy_2_bytes_loop); - __ lduh(from, offset, O3); - __ deccc(count); - __ sth(O3, to, offset); - __ brx(Assembler::notZero, false, Assembler::pt, L_copy_2_bytes_loop); - __ delayed()->inc(offset, 2); __ BIND(L_exit); // O3, O4 are used as temp registers @@ -1639,79 +1664,83 @@ array_overlap_test(nooverlap_target, 1); - __ sllx(count, LogBytesPerShort, byte_count); - __ add(to, byte_count, end_to); // offset after last copied element - - // for short arrays, just do single element copy - __ cmp(count, 11); // 8 + 3 (22 bytes) - __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes); - __ delayed()->add(from, byte_count, end_from); - { - // Align end of arrays since they could be not aligned even - // when arrays itself are aligned. - - // copy 1 element if necessary to align 'end_to' on an 4 bytes - __ andcc(end_to, 3, G0); - __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); - __ delayed()->lduh(end_from, -2, O3); - __ dec(end_from, 2); - __ dec(end_to, 2); - __ dec(count); - __ sth(O3, end_to, 0); - __ BIND(L_skip_alignment); - - // copy 2 elements to align 'end_to' on an 8 byte boundary - __ andcc(end_to, 7, G0); - __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2); - __ delayed()->lduh(end_from, -2, O3); - __ dec(count, 2); - __ lduh(end_from, -4, O4); - __ dec(end_from, 4); - __ dec(end_to, 4); - __ sth(O3, end_to, 2); - __ sth(O4, end_to, 0); - __ BIND(L_skip_alignment2); + // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit + UnsafeCopyMemoryMark ucmm(this, !aligned, false); + + __ sllx(count, LogBytesPerShort, byte_count); + __ add(to, byte_count, end_to); // offset after last copied element + + // for short arrays, just do single element copy + __ cmp(count, 11); // 8 + 3 (22 bytes) + __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes); + __ delayed()->add(from, byte_count, end_from); + + { + // Align end of arrays since they could be not aligned even + // when arrays itself are aligned. + + // copy 1 element if necessary to align 'end_to' on an 4 bytes + __ andcc(end_to, 3, G0); + __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment); + __ delayed()->lduh(end_from, -2, O3); + __ dec(end_from, 2); + __ dec(end_to, 2); + __ dec(count); + __ sth(O3, end_to, 0); + __ BIND(L_skip_alignment); + + // copy 2 elements to align 'end_to' on an 8 byte boundary + __ andcc(end_to, 7, G0); + __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2); + __ delayed()->lduh(end_from, -2, O3); + __ dec(count, 2); + __ lduh(end_from, -4, O4); + __ dec(end_from, 4); + __ dec(end_to, 4); + __ sth(O3, end_to, 2); + __ sth(O4, end_to, 0); + __ BIND(L_skip_alignment2); + } + if (aligned) { + // Both arrays are aligned to 8-bytes in 64-bits VM. + // The 'count' is decremented in copy_16_bytes_backward_with_shift() + // in unaligned case. + __ dec(count, 8); + } else { + // Copy with shift 16 bytes per iteration if arrays do not have + // the same alignment mod 8, otherwise jump to the next + // code for aligned copy (and substracting 8 from 'count' before jump). + // The compare above (count >= 11) guarantes 'count' >= 16 bytes. + // Also jump over aligned copy after the copy with shift completed. + + copy_16_bytes_backward_with_shift(end_from, end_to, count, 8, + L_aligned_copy, L_copy_2_bytes); + } + // copy 4 elements (16 bytes) at a time + __ align(OptoLoopAlignment); + __ BIND(L_aligned_copy); + __ dec(end_from, 16); + __ ldx(end_from, 8, O3); + __ ldx(end_from, 0, O4); + __ dec(end_to, 16); + __ deccc(count, 8); + __ stx(O3, end_to, 8); + __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy); + __ delayed()->stx(O4, end_to, 0); + __ inc(count, 8); + + // copy 1 element (2 bytes) at a time + __ BIND(L_copy_2_bytes); + __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); + __ BIND(L_copy_2_bytes_loop); + __ dec(end_from, 2); + __ dec(end_to, 2); + __ lduh(end_from, 0, O4); + __ deccc(count); + __ brx(Assembler::greater, false, Assembler::pt, L_copy_2_bytes_loop); + __ delayed()->sth(O4, end_to, 0); } - if (aligned) { - // Both arrays are aligned to 8-bytes in 64-bits VM. - // The 'count' is decremented in copy_16_bytes_backward_with_shift() - // in unaligned case. - __ dec(count, 8); - } else { - // Copy with shift 16 bytes per iteration if arrays do not have - // the same alignment mod 8, otherwise jump to the next - // code for aligned copy (and substracting 8 from 'count' before jump). - // The compare above (count >= 11) guarantes 'count' >= 16 bytes. - // Also jump over aligned copy after the copy with shift completed. - - copy_16_bytes_backward_with_shift(end_from, end_to, count, 8, - L_aligned_copy, L_copy_2_bytes); - } - // copy 4 elements (16 bytes) at a time - __ align(OptoLoopAlignment); - __ BIND(L_aligned_copy); - __ dec(end_from, 16); - __ ldx(end_from, 8, O3); - __ ldx(end_from, 0, O4); - __ dec(end_to, 16); - __ deccc(count, 8); - __ stx(O3, end_to, 8); - __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy); - __ delayed()->stx(O4, end_to, 0); - __ inc(count, 8); - - // copy 1 element (2 bytes) at a time - __ BIND(L_copy_2_bytes); - __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit); - __ BIND(L_copy_2_bytes_loop); - __ dec(end_from, 2); - __ dec(end_to, 2); - __ lduh(end_from, 0, O4); - __ deccc(count); - __ brx(Assembler::greater, false, Assembler::pt, L_copy_2_bytes_loop); - __ delayed()->sth(O4, end_to, 0); - __ BIND(L_exit); // O3, O4 are used as temp registers inc_counter_np(SharedRuntime::_jshort_array_copy_ctr, O3, O4); @@ -1870,9 +1899,11 @@ // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) BLOCK_COMMENT("Entry:"); } - - generate_disjoint_int_copy_core(aligned); - + { + // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit + UnsafeCopyMemoryMark ucmm(this, !aligned, false); + generate_disjoint_int_copy_core(aligned); + } // O3, O4 are used as temp registers inc_counter_np(SharedRuntime::_jint_array_copy_ctr, O3, O4); __ retl(); @@ -2005,9 +2036,11 @@ } array_overlap_test(nooverlap_target, 2); - - generate_conjoint_int_copy_core(aligned); - + { + // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit + UnsafeCopyMemoryMark ucmm(this, !aligned, false); + generate_conjoint_int_copy_core(aligned); + } // O3, O4 are used as temp registers inc_counter_np(SharedRuntime::_jint_array_copy_ctr, O3, O4); __ retl(); @@ -2156,8 +2189,11 @@ BLOCK_COMMENT("Entry:"); } - generate_disjoint_long_copy_core(aligned); - + { + // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit + UnsafeCopyMemoryMark ucmm(this, true, false); + generate_disjoint_long_copy_core(aligned); + } // O3, O4 are used as temp registers inc_counter_np(SharedRuntime::_jlong_array_copy_ctr, O3, O4); __ retl(); @@ -2232,9 +2268,11 @@ } array_overlap_test(nooverlap_target, 3); - - generate_conjoint_long_copy_core(aligned); - + { + // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit + UnsafeCopyMemoryMark ucmm(this, true, false); + generate_conjoint_long_copy_core(aligned); + } // O3, O4 are used as temp registers inc_counter_np(SharedRuntime::_jlong_array_copy_ctr, O3, O4); __ retl(); @@ -2929,6 +2967,9 @@ address entry_jlong_arraycopy; address entry_checkcast_arraycopy; + address ucm_common_error_exit = generate_unsafecopy_common_error_exit(); + UnsafeCopyMemory::set_common_exit_stub_pc(ucm_common_error_exit); + //*** jbyte // Always need aligned and unaligned versions StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, &entry, @@ -5821,6 +5862,10 @@ }; // end class declaration +#define UCM_TABLE_MAX_ENTRIES 8 void StubGenerator_generate(CodeBuffer* code, bool all) { + if (UnsafeCopyMemory::_table == NULL) { + UnsafeCopyMemory::create_table(UCM_TABLE_MAX_ENTRIES); + } StubGenerator g(code, all); }
--- a/src/hotspot/cpu/x86/assembler_x86.cpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/cpu/x86/assembler_x86.cpp Mon Jul 01 14:57:02 2019 -0700 @@ -789,6 +789,8 @@ case 0x59: // mulpd case 0x6E: // movd case 0x7E: // movd + case 0x6F: // movdq + case 0x7F: // movdq case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush case 0xFE: // paddd debug_only(has_disp32 = true); @@ -4274,6 +4276,7 @@ emit_operand(dst, src); emit_int8(mode & 0xFF); } + void Assembler::evshufi64x2(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) { assert(VM_Version::supports_evex(), "requires EVEX support"); assert(vector_len == Assembler::AVX_256bit || vector_len == Assembler::AVX_512bit, "");
--- a/src/hotspot/cpu/x86/stubGenerator_x86_32.cpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/cpu/x86/stubGenerator_x86_32.cpp Mon Jul 01 14:57:02 2019 -0700 @@ -889,91 +889,98 @@ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); bs->arraycopy_prologue(_masm, decorators, t, from, to, count); - - __ subptr(to, from); // to --> to_from - __ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element - __ jcc(Assembler::below, L_copy_4_bytes); // use unsigned cmp - if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) { - // align source address at 4 bytes address boundary - if (t == T_BYTE) { - // One byte misalignment happens only for byte arrays - __ testl(from, 1); - __ jccb(Assembler::zero, L_skip_align1); - __ movb(rax, Address(from, 0)); - __ movb(Address(from, to_from, Address::times_1, 0), rax); - __ increment(from); - __ decrement(count); - __ BIND(L_skip_align1); + { + bool add_entry = (t != T_OBJECT && (!aligned || t == T_INT)); + // UnsafeCopyMemory page error: continue after ucm + UnsafeCopyMemoryMark ucmm(this, add_entry, true); + __ subptr(to, from); // to --> to_from + __ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element + __ jcc(Assembler::below, L_copy_4_bytes); // use unsigned cmp + if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) { + // align source address at 4 bytes address boundary + if (t == T_BYTE) { + // One byte misalignment happens only for byte arrays + __ testl(from, 1); + __ jccb(Assembler::zero, L_skip_align1); + __ movb(rax, Address(from, 0)); + __ movb(Address(from, to_from, Address::times_1, 0), rax); + __ increment(from); + __ decrement(count); + __ BIND(L_skip_align1); + } + // Two bytes misalignment happens only for byte and short (char) arrays + __ testl(from, 2); + __ jccb(Assembler::zero, L_skip_align2); + __ movw(rax, Address(from, 0)); + __ movw(Address(from, to_from, Address::times_1, 0), rax); + __ addptr(from, 2); + __ subl(count, 1<<(shift-1)); + __ BIND(L_skip_align2); } - // Two bytes misalignment happens only for byte and short (char) arrays - __ testl(from, 2); - __ jccb(Assembler::zero, L_skip_align2); - __ movw(rax, Address(from, 0)); - __ movw(Address(from, to_from, Address::times_1, 0), rax); - __ addptr(from, 2); - __ subl(count, 1<<(shift-1)); - __ BIND(L_skip_align2); - } - if (!VM_Version::supports_mmx()) { - __ mov(rax, count); // save 'count' - __ shrl(count, shift); // bytes count - __ addptr(to_from, from);// restore 'to' - __ rep_mov(); - __ subptr(to_from, from);// restore 'to_from' - __ mov(count, rax); // restore 'count' - __ jmpb(L_copy_2_bytes); // all dwords were copied - } else { - if (!UseUnalignedLoadStores) { - // align to 8 bytes, we know we are 4 byte aligned to start - __ testptr(from, 4); - __ jccb(Assembler::zero, L_copy_64_bytes); - __ movl(rax, Address(from, 0)); - __ movl(Address(from, to_from, Address::times_1, 0), rax); + if (!VM_Version::supports_mmx()) { + __ mov(rax, count); // save 'count' + __ shrl(count, shift); // bytes count + __ addptr(to_from, from);// restore 'to' + __ rep_mov(); + __ subptr(to_from, from);// restore 'to_from' + __ mov(count, rax); // restore 'count' + __ jmpb(L_copy_2_bytes); // all dwords were copied + } else { + if (!UseUnalignedLoadStores) { + // align to 8 bytes, we know we are 4 byte aligned to start + __ testptr(from, 4); + __ jccb(Assembler::zero, L_copy_64_bytes); + __ movl(rax, Address(from, 0)); + __ movl(Address(from, to_from, Address::times_1, 0), rax); + __ addptr(from, 4); + __ subl(count, 1<<shift); + } + __ BIND(L_copy_64_bytes); + __ mov(rax, count); + __ shrl(rax, shift+1); // 8 bytes chunk count + // + // Copy 8-byte chunks through MMX registers, 8 per iteration of the loop + // + if (UseXMMForArrayCopy) { + xmm_copy_forward(from, to_from, rax); + } else { + mmx_copy_forward(from, to_from, rax); + } + } + // copy tailing dword + __ BIND(L_copy_4_bytes); + __ testl(count, 1<<shift); + __ jccb(Assembler::zero, L_copy_2_bytes); + __ movl(rax, Address(from, 0)); + __ movl(Address(from, to_from, Address::times_1, 0), rax); + if (t == T_BYTE || t == T_SHORT) { __ addptr(from, 4); - __ subl(count, 1<<shift); - } - __ BIND(L_copy_64_bytes); - __ mov(rax, count); - __ shrl(rax, shift+1); // 8 bytes chunk count - // - // Copy 8-byte chunks through MMX registers, 8 per iteration of the loop - // - if (UseXMMForArrayCopy) { - xmm_copy_forward(from, to_from, rax); + __ BIND(L_copy_2_bytes); + // copy tailing word + __ testl(count, 1<<(shift-1)); + __ jccb(Assembler::zero, L_copy_byte); + __ movw(rax, Address(from, 0)); + __ movw(Address(from, to_from, Address::times_1, 0), rax); + if (t == T_BYTE) { + __ addptr(from, 2); + __ BIND(L_copy_byte); + // copy tailing byte + __ testl(count, 1); + __ jccb(Assembler::zero, L_exit); + __ movb(rax, Address(from, 0)); + __ movb(Address(from, to_from, Address::times_1, 0), rax); + __ BIND(L_exit); + } else { + __ BIND(L_copy_byte); + } } else { - mmx_copy_forward(from, to_from, rax); + __ BIND(L_copy_2_bytes); } } - // copy tailing dword - __ BIND(L_copy_4_bytes); - __ testl(count, 1<<shift); - __ jccb(Assembler::zero, L_copy_2_bytes); - __ movl(rax, Address(from, 0)); - __ movl(Address(from, to_from, Address::times_1, 0), rax); - if (t == T_BYTE || t == T_SHORT) { - __ addptr(from, 4); - __ BIND(L_copy_2_bytes); - // copy tailing word - __ testl(count, 1<<(shift-1)); - __ jccb(Assembler::zero, L_copy_byte); - __ movw(rax, Address(from, 0)); - __ movw(Address(from, to_from, Address::times_1, 0), rax); - if (t == T_BYTE) { - __ addptr(from, 2); - __ BIND(L_copy_byte); - // copy tailing byte - __ testl(count, 1); - __ jccb(Assembler::zero, L_exit); - __ movb(rax, Address(from, 0)); - __ movb(Address(from, to_from, Address::times_1, 0), rax); - __ BIND(L_exit); - } else { - __ BIND(L_copy_byte); - } - } else { - __ BIND(L_copy_2_bytes); + + if (VM_Version::supports_mmx() && !UseXMMForArrayCopy) { + __ emms(); } - __ movl(count, Address(rsp, 12+12)); // reread 'count' bs->arraycopy_epilogue(_masm, decorators, t, from, to, count); @@ -1079,104 +1086,112 @@ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); bs->arraycopy_prologue(_masm, decorators, t, from, to, count); - // copy from high to low - __ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element - __ jcc(Assembler::below, L_copy_4_bytes); // use unsigned cmp - if (t == T_BYTE || t == T_SHORT) { - // Align the end of destination array at 4 bytes address boundary - __ lea(end, Address(dst, count, sf, 0)); - if (t == T_BYTE) { - // One byte misalignment happens only for byte arrays - __ testl(end, 1); - __ jccb(Assembler::zero, L_skip_align1); - __ decrement(count); - __ movb(rdx, Address(from, count, sf, 0)); - __ movb(Address(to, count, sf, 0), rdx); - __ BIND(L_skip_align1); - } - // Two bytes misalignment happens only for byte and short (char) arrays - __ testl(end, 2); - __ jccb(Assembler::zero, L_skip_align2); - __ subptr(count, 1<<(shift-1)); - __ movw(rdx, Address(from, count, sf, 0)); - __ movw(Address(to, count, sf, 0), rdx); - __ BIND(L_skip_align2); + { + bool add_entry = (t != T_OBJECT && (!aligned || t == T_INT)); + // UnsafeCopyMemory page error: continue after ucm + UnsafeCopyMemoryMark ucmm(this, add_entry, true); + // copy from high to low __ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element - __ jcc(Assembler::below, L_copy_4_bytes); - } - - if (!VM_Version::supports_mmx()) { - __ std(); - __ mov(rax, count); // Save 'count' - __ mov(rdx, to); // Save 'to' - __ lea(rsi, Address(from, count, sf, -4)); - __ lea(rdi, Address(to , count, sf, -4)); - __ shrptr(count, shift); // bytes count - __ rep_mov(); - __ cld(); - __ mov(count, rax); // restore 'count' - __ andl(count, (1<<shift)-1); // mask the number of rest elements - __ movptr(from, Address(rsp, 12+4)); // reread 'from' - __ mov(to, rdx); // restore 'to' - __ jmpb(L_copy_2_bytes); // all dword were copied - } else { - // Align to 8 bytes the end of array. It is aligned to 4 bytes already. - __ testptr(end, 4); - __ jccb(Assembler::zero, L_copy_8_bytes); - __ subl(count, 1<<shift); - __ movl(rdx, Address(from, count, sf, 0)); - __ movl(Address(to, count, sf, 0), rdx); - __ jmpb(L_copy_8_bytes); - - __ align(OptoLoopAlignment); - // Move 8 bytes - __ BIND(L_copy_8_bytes_loop); - if (UseXMMForArrayCopy) { - __ movq(xmm0, Address(from, count, sf, 0)); - __ movq(Address(to, count, sf, 0), xmm0); + __ jcc(Assembler::below, L_copy_4_bytes); // use unsigned cmp + if (t == T_BYTE || t == T_SHORT) { + // Align the end of destination array at 4 bytes address boundary + __ lea(end, Address(dst, count, sf, 0)); + if (t == T_BYTE) { + // One byte misalignment happens only for byte arrays + __ testl(end, 1); + __ jccb(Assembler::zero, L_skip_align1); + __ decrement(count); + __ movb(rdx, Address(from, count, sf, 0)); + __ movb(Address(to, count, sf, 0), rdx); + __ BIND(L_skip_align1); + } + // Two bytes misalignment happens only for byte and short (char) arrays + __ testl(end, 2); + __ jccb(Assembler::zero, L_skip_align2); + __ subptr(count, 1<<(shift-1)); + __ movw(rdx, Address(from, count, sf, 0)); + __ movw(Address(to, count, sf, 0), rdx); + __ BIND(L_skip_align2); + __ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element + __ jcc(Assembler::below, L_copy_4_bytes); + } + + if (!VM_Version::supports_mmx()) { + __ std(); + __ mov(rax, count); // Save 'count' + __ mov(rdx, to); // Save 'to' + __ lea(rsi, Address(from, count, sf, -4)); + __ lea(rdi, Address(to , count, sf, -4)); + __ shrptr(count, shift); // bytes count + __ rep_mov(); + __ cld(); + __ mov(count, rax); // restore 'count' + __ andl(count, (1<<shift)-1); // mask the number of rest elements + __ movptr(from, Address(rsp, 12+4)); // reread 'from' + __ mov(to, rdx); // restore 'to' + __ jmpb(L_copy_2_bytes); // all dword were copied } else { - __ movq(mmx0, Address(from, count, sf, 0)); - __ movq(Address(to, count, sf, 0), mmx0); + // Align to 8 bytes the end of array. It is aligned to 4 bytes already. + __ testptr(end, 4); + __ jccb(Assembler::zero, L_copy_8_bytes); + __ subl(count, 1<<shift); + __ movl(rdx, Address(from, count, sf, 0)); + __ movl(Address(to, count, sf, 0), rdx); + __ jmpb(L_copy_8_bytes); + + __ align(OptoLoopAlignment); + // Move 8 bytes + __ BIND(L_copy_8_bytes_loop); + if (UseXMMForArrayCopy) { + __ movq(xmm0, Address(from, count, sf, 0)); + __ movq(Address(to, count, sf, 0), xmm0); + } else { + __ movq(mmx0, Address(from, count, sf, 0)); + __ movq(Address(to, count, sf, 0), mmx0); + } + __ BIND(L_copy_8_bytes); + __ subl(count, 2<<shift); + __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop); + __ addl(count, 2<<shift); + if (!UseXMMForArrayCopy) { + __ emms(); + } } - __ BIND(L_copy_8_bytes); - __ subl(count, 2<<shift); - __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop); - __ addl(count, 2<<shift); - if (!UseXMMForArrayCopy) { - __ emms(); + __ BIND(L_copy_4_bytes); + // copy prefix qword + __ testl(count, 1<<shift); + __ jccb(Assembler::zero, L_copy_2_bytes); + __ movl(rdx, Address(from, count, sf, -4)); + __ movl(Address(to, count, sf, -4), rdx); + + if (t == T_BYTE || t == T_SHORT) { + __ subl(count, (1<<shift)); + __ BIND(L_copy_2_bytes); + // copy prefix dword + __ testl(count, 1<<(shift-1)); + __ jccb(Assembler::zero, L_copy_byte); + __ movw(rdx, Address(from, count, sf, -2)); + __ movw(Address(to, count, sf, -2), rdx); + if (t == T_BYTE) { + __ subl(count, 1<<(shift-1)); + __ BIND(L_copy_byte); + // copy prefix byte + __ testl(count, 1); + __ jccb(Assembler::zero, L_exit); + __ movb(rdx, Address(from, 0)); + __ movb(Address(to, 0), rdx); + __ BIND(L_exit); + } else { + __ BIND(L_copy_byte); + } + } else { + __ BIND(L_copy_2_bytes); } } - __ BIND(L_copy_4_bytes); - // copy prefix qword - __ testl(count, 1<<shift); - __ jccb(Assembler::zero, L_copy_2_bytes); - __ movl(rdx, Address(from, count, sf, -4)); - __ movl(Address(to, count, sf, -4), rdx); - - if (t == T_BYTE || t == T_SHORT) { - __ subl(count, (1<<shift)); - __ BIND(L_copy_2_bytes); - // copy prefix dword - __ testl(count, 1<<(shift-1)); - __ jccb(Assembler::zero, L_copy_byte); - __ movw(rdx, Address(from, count, sf, -2)); - __ movw(Address(to, count, sf, -2), rdx); - if (t == T_BYTE) { - __ subl(count, 1<<(shift-1)); - __ BIND(L_copy_byte); - // copy prefix byte - __ testl(count, 1); - __ jccb(Assembler::zero, L_exit); - __ movb(rdx, Address(from, 0)); - __ movb(Address(to, 0), rdx); - __ BIND(L_exit); - } else { - __ BIND(L_copy_byte); - } - } else { - __ BIND(L_copy_2_bytes); + + if (VM_Version::supports_mmx() && !UseXMMForArrayCopy) { + __ emms(); } - __ movl2ptr(count, Address(rsp, 12+12)); // reread count bs->arraycopy_epilogue(_masm, decorators, t, from, to, count); @@ -1212,23 +1227,30 @@ *entry = __ pc(); // Entry point from conjoint arraycopy stub. BLOCK_COMMENT("Entry:"); - __ subptr(to, from); // to --> to_from - if (VM_Version::supports_mmx()) { - if (UseXMMForArrayCopy) { - xmm_copy_forward(from, to_from, count); + { + // UnsafeCopyMemory page error: continue after ucm + UnsafeCopyMemoryMark ucmm(this, true, true); + __ subptr(to, from); // to --> to_from + if (VM_Version::supports_mmx()) { + if (UseXMMForArrayCopy) { + xmm_copy_forward(from, to_from, count); + } else { + mmx_copy_forward(from, to_from, count); + } } else { - mmx_copy_forward(from, to_from, count); + __ jmpb(L_copy_8_bytes); + __ align(OptoLoopAlignment); + __ BIND(L_copy_8_bytes_loop); + __ fild_d(Address(from, 0)); + __ fistp_d(Address(from, to_from, Address::times_1)); + __ addptr(from, 8); + __ BIND(L_copy_8_bytes); + __ decrement(count); + __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop); } - } else { - __ jmpb(L_copy_8_bytes); - __ align(OptoLoopAlignment); - __ BIND(L_copy_8_bytes_loop); - __ fild_d(Address(from, 0)); - __ fistp_d(Address(from, to_from, Address::times_1)); - __ addptr(from, 8); - __ BIND(L_copy_8_bytes); - __ decrement(count); - __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop); + } + if (VM_Version::supports_mmx() && !UseXMMForArrayCopy) { + __ emms(); } inc_copy_counter_np(T_LONG); __ leave(); // required for proper stackwalking of RuntimeStub frame @@ -1267,26 +1289,31 @@ __ movptr(from, Address(rsp, 8)); // from __ jump_cc(Assembler::aboveEqual, nooverlap); - __ jmpb(L_copy_8_bytes); - - __ align(OptoLoopAlignment); - __ BIND(L_copy_8_bytes_loop); - if (VM_Version::supports_mmx()) { - if (UseXMMForArrayCopy) { - __ movq(xmm0, Address(from, count, Address::times_8)); - __ movq(Address(to, count, Address::times_8), xmm0); + { + // UnsafeCopyMemory page error: continue after ucm + UnsafeCopyMemoryMark ucmm(this, true, true); + + __ jmpb(L_copy_8_bytes); + + __ align(OptoLoopAlignment); + __ BIND(L_copy_8_bytes_loop); + if (VM_Version::supports_mmx()) { + if (UseXMMForArrayCopy) { + __ movq(xmm0, Address(from, count, Address::times_8)); + __ movq(Address(to, count, Address::times_8), xmm0); + } else { + __ movq(mmx0, Address(from, count, Address::times_8)); + __ movq(Address(to, count, Address::times_8), mmx0); + } } else { - __ movq(mmx0, Address(from, count, Address::times_8)); - __ movq(Address(to, count, Address::times_8), mmx0); + __ fild_d(Address(from, count, Address::times_8)); + __ fistp_d(Address(to, count, Address::times_8)); } - } else { - __ fild_d(Address(from, count, Address::times_8)); - __ fistp_d(Address(to, count, Address::times_8)); + __ BIND(L_copy_8_bytes); + __ decrement(count); + __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop); + } - __ BIND(L_copy_8_bytes); - __ decrement(count); - __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop); - if (VM_Version::supports_mmx() && !UseXMMForArrayCopy) { __ emms(); } @@ -3945,7 +3972,10 @@ } }; // end class declaration - +#define UCM_TABLE_MAX_ENTRIES 8 void StubGenerator_generate(CodeBuffer* code, bool all) { + if (UnsafeCopyMemory::_table == NULL) { + UnsafeCopyMemory::create_table(UCM_TABLE_MAX_ENTRIES); + } StubGenerator g(code, all); }
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp Mon Jul 01 14:57:02 2019 -0700 @@ -1433,7 +1433,6 @@ __ jcc(Assembler::greater, L_copy_8_bytes); // Copy trailing qwords } - // Arguments: // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary // ignored @@ -1482,51 +1481,55 @@ setup_arg_regs(); // from => rdi, to => rsi, count => rdx // r9 and r10 may be used to save non-volatile registers - // 'from', 'to' and 'count' are now valid - __ movptr(byte_count, count); - __ shrptr(count, 3); // count => qword_count - - // Copy from low to high addresses. Use 'to' as scratch. - __ lea(end_from, Address(from, qword_count, Address::times_8, -8)); - __ lea(end_to, Address(to, qword_count, Address::times_8, -8)); - __ negptr(qword_count); // make the count negative - __ jmp(L_copy_bytes); - - // Copy trailing qwords - __ BIND(L_copy_8_bytes); - __ movq(rax, Address(end_from, qword_count, Address::times_8, 8)); - __ movq(Address(end_to, qword_count, Address::times_8, 8), rax); - __ increment(qword_count); - __ jcc(Assembler::notZero, L_copy_8_bytes); - - // Check for and copy trailing dword - __ BIND(L_copy_4_bytes); - __ testl(byte_count, 4); - __ jccb(Assembler::zero, L_copy_2_bytes); - __ movl(rax, Address(end_from, 8)); - __ movl(Address(end_to, 8), rax); - - __ addptr(end_from, 4); - __ addptr(end_to, 4); - - // Check for and copy trailing word - __ BIND(L_copy_2_bytes); - __ testl(byte_count, 2); - __ jccb(Assembler::zero, L_copy_byte); - __ movw(rax, Address(end_from, 8)); - __ movw(Address(end_to, 8), rax); - - __ addptr(end_from, 2); - __ addptr(end_to, 2); - - // Check for and copy trailing byte - __ BIND(L_copy_byte); - __ testl(byte_count, 1); - __ jccb(Assembler::zero, L_exit); - __ movb(rax, Address(end_from, 8)); - __ movb(Address(end_to, 8), rax); - + { + // UnsafeCopyMemory page error: continue after ucm + UnsafeCopyMemoryMark ucmm(this, !aligned, true); + // 'from', 'to' and 'count' are now valid + __ movptr(byte_count, count); + __ shrptr(count, 3); // count => qword_count + + // Copy from low to high addresses. Use 'to' as scratch. + __ lea(end_from, Address(from, qword_count, Address::times_8, -8)); + __ lea(end_to, Address(to, qword_count, Address::times_8, -8)); + __ negptr(qword_count); // make the count negative + __ jmp(L_copy_bytes); + + // Copy trailing qwords + __ BIND(L_copy_8_bytes); + __ movq(rax, Address(end_from, qword_count, Address::times_8, 8)); + __ movq(Address(end_to, qword_count, Address::times_8, 8), rax); + __ increment(qword_count); + __ jcc(Assembler::notZero, L_copy_8_bytes); + + // Check for and copy trailing dword + __ BIND(L_copy_4_bytes); + __ testl(byte_count, 4); + __ jccb(Assembler::zero, L_copy_2_bytes); + __ movl(rax, Address(end_from, 8)); + __ movl(Address(end_to, 8), rax); + + __ addptr(end_from, 4); + __ addptr(end_to, 4); + + // Check for and copy trailing word + __ BIND(L_copy_2_bytes); + __ testl(byte_count, 2); + __ jccb(Assembler::zero, L_copy_byte); + __ movw(rax, Address(end_from, 8)); + __ movw(Address(end_to, 8), rax); + + __ addptr(end_from, 2); + __ addptr(end_to, 2); + + // Check for and copy trailing byte + __ BIND(L_copy_byte); + __ testl(byte_count, 1); + __ jccb(Assembler::zero, L_exit); + __ movb(rax, Address(end_from, 8)); + __ movb(Address(end_to, 8), rax); + } __ BIND(L_exit); + address ucme_exit_pc = __ pc(); restore_arg_regs(); inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); // Update counter after rscratch1 is free __ xorptr(rax, rax); // return 0 @@ -1534,10 +1537,12 @@ __ leave(); // required for proper stackwalking of RuntimeStub frame __ ret(0); - // Copy in multi-bytes chunks - copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); - __ jmp(L_copy_4_bytes); - + { + UnsafeCopyMemoryMark ucmm(this, !aligned, false, ucme_exit_pc); + // Copy in multi-bytes chunks + copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); + __ jmp(L_copy_4_bytes); + } return start; } @@ -1582,41 +1587,44 @@ setup_arg_regs(); // from => rdi, to => rsi, count => rdx // r9 and r10 may be used to save non-volatile registers - // 'from', 'to' and 'count' are now valid - __ movptr(byte_count, count); - __ shrptr(count, 3); // count => qword_count - - // Copy from high to low addresses. - - // Check for and copy trailing byte - __ testl(byte_count, 1); - __ jcc(Assembler::zero, L_copy_2_bytes); - __ movb(rax, Address(from, byte_count, Address::times_1, -1)); - __ movb(Address(to, byte_count, Address::times_1, -1), rax); - __ decrement(byte_count); // Adjust for possible trailing word - - // Check for and copy trailing word - __ BIND(L_copy_2_bytes); - __ testl(byte_count, 2); - __ jcc(Assembler::zero, L_copy_4_bytes); - __ movw(rax, Address(from, byte_count, Address::times_1, -2)); - __ movw(Address(to, byte_count, Address::times_1, -2), rax); - - // Check for and copy trailing dword - __ BIND(L_copy_4_bytes); - __ testl(byte_count, 4); - __ jcc(Assembler::zero, L_copy_bytes); - __ movl(rax, Address(from, qword_count, Address::times_8)); - __ movl(Address(to, qword_count, Address::times_8), rax); - __ jmp(L_copy_bytes); - - // Copy trailing qwords - __ BIND(L_copy_8_bytes); - __ movq(rax, Address(from, qword_count, Address::times_8, -8)); - __ movq(Address(to, qword_count, Address::times_8, -8), rax); - __ decrement(qword_count); - __ jcc(Assembler::notZero, L_copy_8_bytes); - + { + // UnsafeCopyMemory page error: continue after ucm + UnsafeCopyMemoryMark ucmm(this, !aligned, true); + // 'from', 'to' and 'count' are now valid + __ movptr(byte_count, count); + __ shrptr(count, 3); // count => qword_count + + // Copy from high to low addresses. + + // Check for and copy trailing byte + __ testl(byte_count, 1); + __ jcc(Assembler::zero, L_copy_2_bytes); + __ movb(rax, Address(from, byte_count, Address::times_1, -1)); + __ movb(Address(to, byte_count, Address::times_1, -1), rax); + __ decrement(byte_count); // Adjust for possible trailing word + + // Check for and copy trailing word + __ BIND(L_copy_2_bytes); + __ testl(byte_count, 2); + __ jcc(Assembler::zero, L_copy_4_bytes); + __ movw(rax, Address(from, byte_count, Address::times_1, -2)); + __ movw(Address(to, byte_count, Address::times_1, -2), rax); + + // Check for and copy trailing dword + __ BIND(L_copy_4_bytes); + __ testl(byte_count, 4); + __ jcc(Assembler::zero, L_copy_bytes); + __ movl(rax, Address(from, qword_count, Address::times_8)); + __ movl(Address(to, qword_count, Address::times_8), rax); + __ jmp(L_copy_bytes); + + // Copy trailing qwords + __ BIND(L_copy_8_bytes); + __ movq(rax, Address(from, qword_count, Address::times_8, -8)); + __ movq(Address(to, qword_count, Address::times_8, -8), rax); + __ decrement(qword_count); + __ jcc(Assembler::notZero, L_copy_8_bytes); + } restore_arg_regs(); inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); // Update counter after rscratch1 is free __ xorptr(rax, rax); // return 0 @@ -1624,9 +1632,12 @@ __ leave(); // required for proper stackwalking of RuntimeStub frame __ ret(0); - // Copy in multi-bytes chunks - copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); - + { + // UnsafeCopyMemory page error: continue after ucm + UnsafeCopyMemoryMark ucmm(this, !aligned, true); + // Copy in multi-bytes chunks + copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); + } restore_arg_regs(); inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); // Update counter after rscratch1 is free __ xorptr(rax, rax); // return 0 @@ -1684,44 +1695,48 @@ setup_arg_regs(); // from => rdi, to => rsi, count => rdx // r9 and r10 may be used to save non-volatile registers - // 'from', 'to' and 'count' are now valid - __ movptr(word_count, count); - __ shrptr(count, 2); // count => qword_count - - // Copy from low to high addresses. Use 'to' as scratch. - __ lea(end_from, Address(from, qword_count, Address::times_8, -8)); - __ lea(end_to, Address(to, qword_count, Address::times_8, -8)); - __ negptr(qword_count); - __ jmp(L_copy_bytes); - - // Copy trailing qwords - __ BIND(L_copy_8_bytes); - __ movq(rax, Address(end_from, qword_count, Address::times_8, 8)); - __ movq(Address(end_to, qword_count, Address::times_8, 8), rax); - __ increment(qword_count); - __ jcc(Assembler::notZero, L_copy_8_bytes); - - // Original 'dest' is trashed, so we can't use it as a - // base register for a possible trailing word copy - - // Check for and copy trailing dword - __ BIND(L_copy_4_bytes); - __ testl(word_count, 2); - __ jccb(Assembler::zero, L_copy_2_bytes); - __ movl(rax, Address(end_from, 8)); - __ movl(Address(end_to, 8), rax); - - __ addptr(end_from, 4); - __ addptr(end_to, 4); - - // Check for and copy trailing word - __ BIND(L_copy_2_bytes); - __ testl(word_count, 1); - __ jccb(Assembler::zero, L_exit); - __ movw(rax, Address(end_from, 8)); - __ movw(Address(end_to, 8), rax); - + { + // UnsafeCopyMemory page error: continue after ucm + UnsafeCopyMemoryMark ucmm(this, !aligned, true); + // 'from', 'to' and 'count' are now valid + __ movptr(word_count, count); + __ shrptr(count, 2); // count => qword_count + + // Copy from low to high addresses. Use 'to' as scratch. + __ lea(end_from, Address(from, qword_count, Address::times_8, -8)); + __ lea(end_to, Address(to, qword_count, Address::times_8, -8)); + __ negptr(qword_count); + __ jmp(L_copy_bytes); + + // Copy trailing qwords + __ BIND(L_copy_8_bytes); + __ movq(rax, Address(end_from, qword_count, Address::times_8, 8)); + __ movq(Address(end_to, qword_count, Address::times_8, 8), rax); + __ increment(qword_count); + __ jcc(Assembler::notZero, L_copy_8_bytes); + + // Original 'dest' is trashed, so we can't use it as a + // base register for a possible trailing word copy + + // Check for and copy trailing dword + __ BIND(L_copy_4_bytes); + __ testl(word_count, 2); + __ jccb(Assembler::zero, L_copy_2_bytes); + __ movl(rax, Address(end_from, 8)); + __ movl(Address(end_to, 8), rax); + + __ addptr(end_from, 4); + __ addptr(end_to, 4); + + // Check for and copy trailing word + __ BIND(L_copy_2_bytes); + __ testl(word_count, 1); + __ jccb(Assembler::zero, L_exit); + __ movw(rax, Address(end_from, 8)); + __ movw(Address(end_to, 8), rax); + } __ BIND(L_exit); + address ucme_exit_pc = __ pc(); restore_arg_regs(); inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); // Update counter after rscratch1 is free __ xorptr(rax, rax); // return 0 @@ -1729,9 +1744,12 @@ __ leave(); // required for proper stackwalking of RuntimeStub frame __ ret(0); - // Copy in multi-bytes chunks - copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); - __ jmp(L_copy_4_bytes); + { + UnsafeCopyMemoryMark ucmm(this, !aligned, false, ucme_exit_pc); + // Copy in multi-bytes chunks + copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); + __ jmp(L_copy_4_bytes); + } return start; } @@ -1798,33 +1816,36 @@ setup_arg_regs(); // from => rdi, to => rsi, count => rdx // r9 and r10 may be used to save non-volatile registers - // 'from', 'to' and 'count' are now valid - __ movptr(word_count, count); - __ shrptr(count, 2); // count => qword_count - - // Copy from high to low addresses. Use 'to' as scratch. - - // Check for and copy trailing word - __ testl(word_count, 1); - __ jccb(Assembler::zero, L_copy_4_bytes); - __ movw(rax, Address(from, word_count, Address::times_2, -2)); - __ movw(Address(to, word_count, Address::times_2, -2), rax); - - // Check for and copy trailing dword - __ BIND(L_copy_4_bytes); - __ testl(word_count, 2); - __ jcc(Assembler::zero, L_copy_bytes); - __ movl(rax, Address(from, qword_count, Address::times_8)); - __ movl(Address(to, qword_count, Address::times_8), rax); - __ jmp(L_copy_bytes); - - // Copy trailing qwords - __ BIND(L_copy_8_bytes); - __ movq(rax, Address(from, qword_count, Address::times_8, -8)); - __ movq(Address(to, qword_count, Address::times_8, -8), rax); - __ decrement(qword_count); - __ jcc(Assembler::notZero, L_copy_8_bytes); - + { + // UnsafeCopyMemory page error: continue after ucm + UnsafeCopyMemoryMark ucmm(this, !aligned, true); + // 'from', 'to' and 'count' are now valid + __ movptr(word_count, count); + __ shrptr(count, 2); // count => qword_count + + // Copy from high to low addresses. Use 'to' as scratch. + + // Check for and copy trailing word + __ testl(word_count, 1); + __ jccb(Assembler::zero, L_copy_4_bytes); + __ movw(rax, Address(from, word_count, Address::times_2, -2)); + __ movw(Address(to, word_count, Address::times_2, -2), rax); + + // Check for and copy trailing dword + __ BIND(L_copy_4_bytes); + __ testl(word_count, 2); + __ jcc(Assembler::zero, L_copy_bytes); + __ movl(rax, Address(from, qword_count, Address::times_8)); + __ movl(Address(to, qword_count, Address::times_8), rax); + __ jmp(L_copy_bytes); + + // Copy trailing qwords + __ BIND(L_copy_8_bytes); + __ movq(rax, Address(from, qword_count, Address::times_8, -8)); + __ movq(Address(to, qword_count, Address::times_8, -8), rax); + __ decrement(qword_count); + __ jcc(Assembler::notZero, L_copy_8_bytes); + } restore_arg_regs(); inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); // Update counter after rscratch1 is free __ xorptr(rax, rax); // return 0 @@ -1832,9 +1853,12 @@ __ leave(); // required for proper stackwalking of RuntimeStub frame __ ret(0); - // Copy in multi-bytes chunks - copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); - + { + // UnsafeCopyMemory page error: continue after ucm + UnsafeCopyMemoryMark ucmm(this, !aligned, true); + // Copy in multi-bytes chunks + copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); + } restore_arg_regs(); inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); // Update counter after rscratch1 is free __ xorptr(rax, rax); // return 0 @@ -1905,31 +1929,35 @@ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); bs->arraycopy_prologue(_masm, decorators, type, from, to, count); - // 'from', 'to' and 'count' are now valid - __ movptr(dword_count, count); - __ shrptr(count, 1); // count => qword_count - - // Copy from low to high addresses. Use 'to' as scratch. - __ lea(end_from, Address(from, qword_count, Address::times_8, -8)); - __ lea(end_to, Address(to, qword_count, Address::times_8, -8)); - __ negptr(qword_count); - __ jmp(L_copy_bytes); - - // Copy trailing qwords - __ BIND(L_copy_8_bytes); - __ movq(rax, Address(end_from, qword_count, Address::times_8, 8)); - __ movq(Address(end_to, qword_count, Address::times_8, 8), rax); - __ increment(qword_count); - __ jcc(Assembler::notZero, L_copy_8_bytes); - - // Check for and copy trailing dword - __ BIND(L_copy_4_bytes); - __ testl(dword_count, 1); // Only byte test since the value is 0 or 1 - __ jccb(Assembler::zero, L_exit); - __ movl(rax, Address(end_from, 8)); - __ movl(Address(end_to, 8), rax); - + { + // UnsafeCopyMemory page error: continue after ucm + UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true); + // 'from', 'to' and 'count' are now valid + __ movptr(dword_count, count); + __ shrptr(count, 1); // count => qword_count + + // Copy from low to high addresses. Use 'to' as scratch. + __ lea(end_from, Address(from, qword_count, Address::times_8, -8)); + __ lea(end_to, Address(to, qword_count, Address::times_8, -8)); + __ negptr(qword_count); + __ jmp(L_copy_bytes); + + // Copy trailing qwords + __ BIND(L_copy_8_bytes); + __ movq(rax, Address(end_from, qword_count, Address::times_8, 8)); + __ movq(Address(end_to, qword_count, Address::times_8, 8), rax); + __ increment(qword_count); + __ jcc(Assembler::notZero, L_copy_8_bytes); + + // Check for and copy trailing dword + __ BIND(L_copy_4_bytes); + __ testl(dword_count, 1); // Only byte test since the value is 0 or 1 + __ jccb(Assembler::zero, L_exit); + __ movl(rax, Address(end_from, 8)); + __ movl(Address(end_to, 8), rax); + } __ BIND(L_exit); + address ucme_exit_pc = __ pc(); bs->arraycopy_epilogue(_masm, decorators, type, from, to, dword_count); restore_arg_regs_using_thread(); inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free @@ -1938,9 +1966,12 @@ __ leave(); // required for proper stackwalking of RuntimeStub frame __ ret(0); - // Copy in multi-bytes chunks - copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); - __ jmp(L_copy_4_bytes); + { + UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, false, ucme_exit_pc); + // Copy in multi-bytes chunks + copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); + __ jmp(L_copy_4_bytes); + } return start; } @@ -2001,26 +2032,29 @@ bs->arraycopy_prologue(_masm, decorators, type, from, to, count); assert_clean_int(count, rax); // Make sure 'count' is clean int. - // 'from', 'to' and 'count' are now valid - __ movptr(dword_count, count); - __ shrptr(count, 1); // count => qword_count - - // Copy from high to low addresses. Use 'to' as scratch. - - // Check for and copy trailing dword - __ testl(dword_count, 1); - __ jcc(Assembler::zero, L_copy_bytes); - __ movl(rax, Address(from, dword_count, Address::times_4, -4)); - __ movl(Address(to, dword_count, Address::times_4, -4), rax); - __ jmp(L_copy_bytes); - - // Copy trailing qwords - __ BIND(L_copy_8_bytes); - __ movq(rax, Address(from, qword_count, Address::times_8, -8)); - __ movq(Address(to, qword_count, Address::times_8, -8), rax); - __ decrement(qword_count); - __ jcc(Assembler::notZero, L_copy_8_bytes); - + { + // UnsafeCopyMemory page error: continue after ucm + UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true); + // 'from', 'to' and 'count' are now valid + __ movptr(dword_count, count); + __ shrptr(count, 1); // count => qword_count + + // Copy from high to low addresses. Use 'to' as scratch. + + // Check for and copy trailing dword + __ testl(dword_count, 1); + __ jcc(Assembler::zero, L_copy_bytes); + __ movl(rax, Address(from, dword_count, Address::times_4, -4)); + __ movl(Address(to, dword_count, Address::times_4, -4), rax); + __ jmp(L_copy_bytes); + + // Copy trailing qwords + __ BIND(L_copy_8_bytes); + __ movq(rax, Address(from, qword_count, Address::times_8, -8)); + __ movq(Address(to, qword_count, Address::times_8, -8), rax); + __ decrement(qword_count); + __ jcc(Assembler::notZero, L_copy_8_bytes); + } if (is_oop) { __ jmp(L_exit); } @@ -2031,8 +2065,12 @@ __ leave(); // required for proper stackwalking of RuntimeStub frame __ ret(0); - // Copy in multi-bytes chunks - copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); + { + // UnsafeCopyMemory page error: continue after ucm + UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true); + // Copy in multi-bytes chunks + copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); + } __ BIND(L_exit); bs->arraycopy_epilogue(_masm, decorators, type, from, to, dword_count); @@ -2102,20 +2140,23 @@ BasicType type = is_oop ? T_OBJECT : T_LONG; BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); bs->arraycopy_prologue(_masm, decorators, type, from, to, qword_count); - - // Copy from low to high addresses. Use 'to' as scratch. - __ lea(end_from, Address(from, qword_count, Address::times_8, -8)); - __ lea(end_to, Address(to, qword_count, Address::times_8, -8)); - __ negptr(qword_count); - __ jmp(L_copy_bytes); - - // Copy trailing qwords - __ BIND(L_copy_8_bytes); - __ movq(rax, Address(end_from, qword_count, Address::times_8, 8)); - __ movq(Address(end_to, qword_count, Address::times_8, 8), rax); - __ increment(qword_count); - __ jcc(Assembler::notZero, L_copy_8_bytes); - + { + // UnsafeCopyMemory page error: continue after ucm + UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true); + + // Copy from low to high addresses. Use 'to' as scratch. + __ lea(end_from, Address(from, qword_count, Address::times_8, -8)); + __ lea(end_to, Address(to, qword_count, Address::times_8, -8)); + __ negptr(qword_count); + __ jmp(L_copy_bytes); + + // Copy trailing qwords + __ BIND(L_copy_8_bytes); + __ movq(rax, Address(end_from, qword_count, Address::times_8, 8)); + __ movq(Address(end_to, qword_count, Address::times_8, 8), rax); + __ increment(qword_count); + __ jcc(Assembler::notZero, L_copy_8_bytes); + } if (is_oop) { __ jmp(L_exit); } else { @@ -2127,8 +2168,12 @@ __ ret(0); } - // Copy in multi-bytes chunks - copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); + { + // UnsafeCopyMemory page error: continue after ucm + UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true); + // Copy in multi-bytes chunks + copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); + } __ BIND(L_exit); bs->arraycopy_epilogue(_masm, decorators, type, from, to, qword_count); @@ -2195,16 +2240,19 @@ BasicType type = is_oop ? T_OBJECT : T_LONG; BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); bs->arraycopy_prologue(_masm, decorators, type, from, to, qword_count); - - __ jmp(L_copy_bytes); - - // Copy trailing qwords - __ BIND(L_copy_8_bytes); - __ movq(rax, Address(from, qword_count, Address::times_8, -8)); - __ movq(Address(to, qword_count, Address::times_8, -8), rax); - __ decrement(qword_count); - __ jcc(Assembler::notZero, L_copy_8_bytes); - + { + // UnsafeCopyMemory page error: continue after ucm + UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true); + + __ jmp(L_copy_bytes); + + // Copy trailing qwords + __ BIND(L_copy_8_bytes); + __ movq(rax, Address(from, qword_count, Address::times_8, -8)); + __ movq(Address(to, qword_count, Address::times_8, -8), rax); + __ decrement(qword_count); + __ jcc(Assembler::notZero, L_copy_8_bytes); + } if (is_oop) { __ jmp(L_exit); } else { @@ -2215,10 +2263,13 @@ __ leave(); // required for proper stackwalking of RuntimeStub frame __ ret(0); } - - // Copy in multi-bytes chunks - copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); - + { + // UnsafeCopyMemory page error: continue after ucm + UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true); + + // Copy in multi-bytes chunks + copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes); + } __ BIND(L_exit); bs->arraycopy_epilogue(_masm, decorators, type, from, to, qword_count); restore_arg_regs_using_thread(); @@ -6036,6 +6087,10 @@ } }; // end class declaration +#define UCM_TABLE_MAX_ENTRIES 16 void StubGenerator_generate(CodeBuffer* code, bool all) { + if (UnsafeCopyMemory::_table == NULL) { + UnsafeCopyMemory::create_table(UCM_TABLE_MAX_ENTRIES); + } StubGenerator g(code, all); }
--- a/src/hotspot/cpu/zero/stubGenerator_zero.cpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/cpu/zero/stubGenerator_zero.cpp Mon Jul 01 14:57:02 2019 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. * Copyright 2007, 2008, 2010, 2015 Red Hat, Inc. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -156,9 +156,11 @@ StubRoutines::_oop_arraycopy = ShouldNotCallThisStub(); StubRoutines::_checkcast_arraycopy = ShouldNotCallThisStub(); - StubRoutines::_unsafe_arraycopy = ShouldNotCallThisStub(); StubRoutines::_generic_arraycopy = ShouldNotCallThisStub(); + // Shared code tests for "NULL" to discover the stub is not generated. + StubRoutines::_unsafe_arraycopy = NULL; + // We don't generate specialized code for HeapWord-aligned source // arrays, so just use the code we've already generated StubRoutines::_arrayof_jbyte_disjoint_arraycopy =
--- a/src/hotspot/os/aix/os_aix.inline.hpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/os/aix/os_aix.inline.hpp Mon Jul 01 14:57:02 2019 -0700 @@ -37,11 +37,6 @@ #include <sys/ioctl.h> #include <netdb.h> -// File names are case-insensitive on windows only. -inline int os::file_name_strncmp(const char* s1, const char* s2, size_t num) { - return strncmp(s1, s2, num); -} - inline bool os::uses_stack_guard_pages() { return true; }
--- a/src/hotspot/os/bsd/os_bsd.inline.hpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/os/bsd/os_bsd.inline.hpp Mon Jul 01 14:57:02 2019 -0700 @@ -35,11 +35,6 @@ #include <poll.h> #include <netdb.h> -// File names are case-insensitive on windows only -inline int os::file_name_strncmp(const char* s1, const char* s2, size_t num) { - return strncmp(s1, s2, num); -} - inline bool os::uses_stack_guard_pages() { return true; }
--- a/src/hotspot/os/linux/os_linux.inline.hpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/os/linux/os_linux.inline.hpp Mon Jul 01 14:57:02 2019 -0700 @@ -35,11 +35,6 @@ #include <poll.h> #include <netdb.h> -// File names are case-insensitive on windows only -inline int os::file_name_strncmp(const char* s1, const char* s2, size_t num) { - return strncmp(s1, s2, num); -} - inline bool os::uses_stack_guard_pages() { return true; }
--- a/src/hotspot/os/posix/os_posix.cpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/os/posix/os_posix.cpp Mon Jul 01 14:57:02 2019 -0700 @@ -1450,6 +1450,30 @@ return path; } +bool os::same_files(const char* file1, const char* file2) { + if (strcmp(file1, file2) == 0) { + return true; + } + + bool is_same = false; + struct stat st1; + struct stat st2; + + if (os::stat(file1, &st1) < 0) { + return false; + } + + if (os::stat(file2, &st2) < 0) { + return false; + } + + if (st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino) { + // same files + is_same = true; + } + return is_same; +} + // Check minimum allowable stack sizes for thread creation and to initialize // the java system classes, including StackOverflowError - depends on page // size.
--- a/src/hotspot/os/solaris/os_solaris.inline.hpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/os/solaris/os_solaris.inline.hpp Mon Jul 01 14:57:02 2019 -0700 @@ -37,11 +37,6 @@ #include <netdb.h> #include <setjmp.h> -// File names are case-insensitive on windows only -inline int os::file_name_strncmp(const char* s1, const char* s2, size_t num) { - return strncmp(s1, s2, num); -} - inline bool os::uses_stack_guard_pages() { return true; }
--- a/src/hotspot/os/windows/os_windows.cpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/os/windows/os_windows.cpp Mon Jul 01 14:57:02 2019 -0700 @@ -2581,10 +2581,18 @@ CodeBlob* cb = CodeCache::find_blob_unsafe(pc); nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL; } - if ((thread->thread_state() == _thread_in_vm && + + bool is_unsafe_arraycopy = (thread->thread_state() == _thread_in_native || in_java) && UnsafeCopyMemory::contains_pc(pc); + if (((thread->thread_state() == _thread_in_vm || + thread->thread_state() == _thread_in_native || + is_unsafe_arraycopy) && thread->doing_unsafe_access()) || (nm != NULL && nm->has_unsafe_access())) { - return Handle_Exception(exceptionInfo, SharedRuntime::handle_unsafe_access(thread, (address)Assembler::locate_next_instruction(pc))); + address next_pc = Assembler::locate_next_instruction(pc); + if (is_unsafe_arraycopy) { + next_pc = UnsafeCopyMemory::page_error_continue_pc(pc); + } + return Handle_Exception(exceptionInfo, SharedRuntime::handle_unsafe_access(thread, next_pc)); } } @@ -4359,6 +4367,88 @@ return ret; } +static HANDLE create_read_only_file_handle(const char* file) { + if (file == NULL) { + return INVALID_HANDLE_VALUE; + } + + char* nativepath = (char*)os::strdup(file, mtInternal); + if (nativepath == NULL) { + errno = ENOMEM; + return INVALID_HANDLE_VALUE; + } + os::native_path(nativepath); + + size_t len = strlen(nativepath); + HANDLE handle = INVALID_HANDLE_VALUE; + + if (len < MAX_PATH) { + handle = ::CreateFile(nativepath, 0, FILE_SHARE_READ, + NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); + } else { + errno_t err = ERROR_SUCCESS; + wchar_t* wfile = create_unc_path(nativepath, err); + if (err != ERROR_SUCCESS) { + if (wfile != NULL) { + destroy_unc_path(wfile); + } + os::free(nativepath); + return INVALID_HANDLE_VALUE; + } + handle = ::CreateFileW(wfile, 0, FILE_SHARE_READ, + NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); + destroy_unc_path(wfile); + } + + os::free(nativepath); + return handle; +} + +bool os::same_files(const char* file1, const char* file2) { + + if (file1 == NULL && file2 == NULL) { + return true; + } + + if (file1 == NULL || file2 == NULL) { + return false; + } + + if (strcmp(file1, file2) == 0) { + return true; + } + + HANDLE handle1 = create_read_only_file_handle(file1); + HANDLE handle2 = create_read_only_file_handle(file2); + bool result = false; + + // if we could open both paths... + if (handle1 != INVALID_HANDLE_VALUE && handle2 != INVALID_HANDLE_VALUE) { + BY_HANDLE_FILE_INFORMATION fileInfo1; + BY_HANDLE_FILE_INFORMATION fileInfo2; + if (::GetFileInformationByHandle(handle1, &fileInfo1) && + ::GetFileInformationByHandle(handle2, &fileInfo2)) { + // the paths are the same if they refer to the same file (fileindex) on the same volume (volume serial number) + if (fileInfo1.dwVolumeSerialNumber == fileInfo2.dwVolumeSerialNumber && + fileInfo1.nFileIndexHigh == fileInfo2.nFileIndexHigh && + fileInfo1.nFileIndexLow == fileInfo2.nFileIndexLow) { + result = true; + } + } + } + + //free the handles + if (handle1 != INVALID_HANDLE_VALUE) { + ::CloseHandle(handle1); + } + + if (handle2 != INVALID_HANDLE_VALUE) { + ::CloseHandle(handle2); + } + + return result; +} + #define FT2INT64(ft) \ ((jlong)((jlong)(ft).dwHighDateTime << 32 | (julong)(ft).dwLowDateTime))
--- a/src/hotspot/os/windows/os_windows.inline.hpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/os/windows/os_windows.inline.hpp Mon Jul 01 14:57:02 2019 -0700 @@ -32,11 +32,6 @@ inline const int os::default_file_open_flags() { return O_BINARY | O_NOINHERIT;} -// File names are case-insensitive on windows only -inline int os::file_name_strncmp(const char* s, const char* t, size_t num) { - return _strnicmp(s, t, num); -} - inline void os::dll_unload(void *lib) { ::FreeLibrary((HMODULE)lib); }
--- a/src/hotspot/os_cpu/aix_ppc/os_aix_ppc.cpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/os_cpu/aix_ppc/os_aix_ppc.cpp Mon Jul 01 14:57:02 2019 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012, 2018 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -441,8 +441,12 @@ // underlying file has been truncated. Do not crash the VM in such a case. CodeBlob* cb = CodeCache::find_blob_unsafe(pc); CompiledMethod* nm = cb->as_compiled_method_or_null(); - if (nm != NULL && nm->has_unsafe_access()) { + bool is_unsafe_arraycopy = (thread->doing_unsafe_access() && UnsafeCopyMemory::contains_pc(pc)); + if ((nm != NULL && nm->has_unsafe_access()) || is_unsafe_arraycopy) { address next_pc = pc + 4; + if (is_unsafe_arraycopy) { + next_pc = UnsafeCopyMemory::page_error_continue_pc(pc); + } next_pc = SharedRuntime::handle_unsafe_access(thread, next_pc); os::Aix::ucontext_set_pc(uc, next_pc); return 1; @@ -461,9 +465,13 @@ stub = pc + 4; // continue with next instruction. goto run_stub; } - else if (thread->thread_state() == _thread_in_vm && + else if ((thread->thread_state() == _thread_in_vm || + thread->thread_state() == _thread_in_native) && sig == SIGBUS && thread->doing_unsafe_access()) { address next_pc = pc + 4; + if (UnsafeCopyMemory::contains_pc(pc)) { + next_pc = UnsafeCopyMemory::page_error_continue_pc(pc); + } next_pc = SharedRuntime::handle_unsafe_access(thread, next_pc); os::Aix::ucontext_set_pc(uc, next_pc); return 1;
--- a/src/hotspot/os_cpu/bsd_x86/os_bsd_x86.cpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/os_cpu/bsd_x86/os_bsd_x86.cpp Mon Jul 01 14:57:02 2019 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -589,8 +589,12 @@ // Do not crash the VM in such a case. CodeBlob* cb = CodeCache::find_blob_unsafe(pc); CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL; - if (nm != NULL && nm->has_unsafe_access()) { + bool is_unsafe_arraycopy = thread->doing_unsafe_access() && UnsafeCopyMemory::contains_pc(pc); + if ((nm != NULL && nm->has_unsafe_access()) || is_unsafe_arraycopy) { address next_pc = Assembler::locate_next_instruction(pc); + if (is_unsafe_arraycopy) { + next_pc = UnsafeCopyMemory::page_error_continue_pc(pc); + } stub = SharedRuntime::handle_unsafe_access(thread, next_pc); } } @@ -659,10 +663,14 @@ // Determination of interpreter/vtable stub/compiled code null exception stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL); } - } else if (thread->thread_state() == _thread_in_vm && + } else if ((thread->thread_state() == _thread_in_vm || + thread->thread_state() == _thread_in_native) && sig == SIGBUS && /* info->si_code == BUS_OBJERR && */ thread->doing_unsafe_access()) { address next_pc = Assembler::locate_next_instruction(pc); + if (UnsafeCopyMemory::contains_pc(pc)) { + next_pc = UnsafeCopyMemory::page_error_continue_pc(pc); + } stub = SharedRuntime::handle_unsafe_access(thread, next_pc); }
--- a/src/hotspot/os_cpu/bsd_zero/os_bsd_zero.cpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/os_cpu/bsd_zero/os_bsd_zero.cpp Mon Jul 01 14:57:02 2019 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. * Copyright 2007, 2008, 2009, 2010 Red Hat, Inc. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -193,7 +193,8 @@ /*if (thread->thread_state() == _thread_in_Java) { ShouldNotCallThis(); } - else*/ if (thread->thread_state() == _thread_in_vm && + else*/ if ((thread->thread_state() == _thread_in_vm || + thread->thread_state() == _thread_in_native) && sig == SIGBUS && thread->doing_unsafe_access()) { ShouldNotCallThis(); }
--- a/src/hotspot/os_cpu/linux_aarch64/os_linux_aarch64.cpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/os_cpu/linux_aarch64/os_linux_aarch64.cpp Mon Jul 01 14:57:02 2019 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -419,8 +419,12 @@ // Do not crash the VM in such a case. CodeBlob* cb = CodeCache::find_blob_unsafe(pc); CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL; - if (nm != NULL && nm->has_unsafe_access()) { + bool is_unsafe_arraycopy = (thread->doing_unsafe_access() && UnsafeCopyMemory::contains_pc(pc)); + if ((nm != NULL && nm->has_unsafe_access()) || is_unsafe_arraycopy) { address next_pc = pc + NativeCall::instruction_size; + if (is_unsafe_arraycopy) { + next_pc = UnsafeCopyMemory::page_error_continue_pc(pc); + } stub = SharedRuntime::handle_unsafe_access(thread, next_pc); } } @@ -439,10 +443,14 @@ // Determination of interpreter/vtable stub/compiled code null exception stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL); } - } else if (thread->thread_state() == _thread_in_vm && + } else if ((thread->thread_state() == _thread_in_vm || + thread->thread_state() == _thread_in_native) && sig == SIGBUS && /* info->si_code == BUS_OBJERR && */ thread->doing_unsafe_access()) { address next_pc = pc + NativeCall::instruction_size; + if (UnsafeCopyMemory::contains_pc(pc)) { + next_pc = UnsafeCopyMemory::page_error_continue_pc(pc); + } stub = SharedRuntime::handle_unsafe_access(thread, next_pc); }
--- a/src/hotspot/os_cpu/linux_arm/os_linux_arm.cpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/os_cpu/linux_arm/os_linux_arm.cpp Mon Jul 01 14:57:02 2019 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2008, 2019, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -384,7 +384,7 @@ // Do not crash the VM in such a case. CodeBlob* cb = CodeCache::find_blob_unsafe(pc); CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL; - if (nm != NULL && nm->has_unsafe_access()) { + if ((nm != NULL && nm->has_unsafe_access()) || (thread->doing_unsafe_access() && UnsafeCopyMemory::contains_pc(pc))) { unsafe_access = true; } } else if (sig == SIGSEGV && @@ -398,7 +398,8 @@ // Zombie stub = SharedRuntime::get_handle_wrong_method_stub(); } - } else if (thread->thread_state() == _thread_in_vm && + } else if ((thread->thread_state() == _thread_in_vm || + thread->thread_state() == _thread_in_native) && sig == SIGBUS && thread->doing_unsafe_access()) { unsafe_access = true; } @@ -418,6 +419,9 @@ // any other suitable exception reason, // so assume it is an unsafe access. address next_pc = pc + Assembler::InstructionSize; + if (UnsafeCopyMemory::contains_pc(pc)) { + next_pc = UnsafeCopyMemory::page_error_continue_pc(pc); + } #ifdef __thumb__ if (uc->uc_mcontext.arm_cpsr & PSR_T_BIT) { next_pc = (address)((intptr_t)next_pc | 0x1);
--- a/src/hotspot/os_cpu/linux_ppc/os_linux_ppc.cpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/os_cpu/linux_ppc/os_linux_ppc.cpp Mon Jul 01 14:57:02 2019 -0700 @@ -469,8 +469,12 @@ // underlying file has been truncated. Do not crash the VM in such a case. CodeBlob* cb = CodeCache::find_blob_unsafe(pc); CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL; - if (nm != NULL && nm->has_unsafe_access()) { + bool is_unsafe_arraycopy = (thread->doing_unsafe_access() && UnsafeCopyMemory::contains_pc(pc)); + if ((nm != NULL && nm->has_unsafe_access()) || is_unsafe_arraycopy) { address next_pc = pc + 4; + if (is_unsafe_arraycopy) { + next_pc = UnsafeCopyMemory::page_error_continue_pc(pc); + } next_pc = SharedRuntime::handle_unsafe_access(thread, next_pc); os::Linux::ucontext_set_pc(uc, next_pc); return true; @@ -485,11 +489,15 @@ // flushing of icache is not necessary. stub = pc + 4; // continue with next instruction. } - else if (thread->thread_state() == _thread_in_vm && + else if ((thread->thread_state() == _thread_in_vm || + thread->thread_state() == _thread_in_native) && sig == SIGBUS && thread->doing_unsafe_access()) { address next_pc = pc + 4; + if (UnsafeCopyMemory::contains_pc(pc)) { + next_pc = UnsafeCopyMemory::page_error_continue_pc(pc); + } next_pc = SharedRuntime::handle_unsafe_access(thread, next_pc); - os::Linux::ucontext_set_pc(uc, pc + 4); + os::Linux::ucontext_set_pc(uc, next_pc); return true; } }
--- a/src/hotspot/os_cpu/linux_s390/os_linux_s390.cpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/os_cpu/linux_s390/os_linux_s390.cpp Mon Jul 01 14:57:02 2019 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016, 2019, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2016, 2018 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -467,7 +467,8 @@ // when the vector facility is installed, but operating system support is missing. VM_Version::reset_has_VectorFacility(); stub = pc; // Continue with next instruction. - } else if (thread->thread_state() == _thread_in_vm && + } else if ((thread->thread_state() == _thread_in_vm || + thread->thread_state() == _thread_in_native) && sig == SIGBUS && thread->doing_unsafe_access()) { // We don't really need a stub here! Just set the pending exeption and // continue at the next instruction after the faulting read. Returning
--- a/src/hotspot/os_cpu/linux_sparc/os_linux_sparc.cpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/os_cpu/linux_sparc/os_linux_sparc.cpp Mon Jul 01 14:57:02 2019 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -385,7 +385,11 @@ // Do not crash the VM in such a case. CodeBlob* cb = CodeCache::find_blob_unsafe(pc); CompiledMethod* nm = cb->as_compiled_method_or_null(); - if (nm != NULL && nm->has_unsafe_access()) { + bool is_unsafe_arraycopy = (thread->doing_unsafe_access() && UnsafeCopyMemory::contains_pc(pc)); + if ((nm != NULL && nm->has_unsafe_access()) || is_unsafe_arraycopy) { + if (is_unsafe_arraycopy) { + npc = UnsafeCopyMemory::page_error_continue_pc(pc); + } *stub = SharedRuntime::handle_unsafe_access(thread, npc); return true; } @@ -550,8 +554,12 @@ } if (sig == SIGBUS && - thread->thread_state() == _thread_in_vm && + (thread->thread_state() == _thread_in_vm || + thread->thread_state() == _thread_in_native) && thread->doing_unsafe_access()) { + if (UnsafeCopyMemory::contains_pc(pc)) { + npc = UnsafeCopyMemory::page_error_continue_pc(pc); + } stub = SharedRuntime::handle_unsafe_access(thread, npc); }
--- a/src/hotspot/os_cpu/linux_x86/os_linux_x86.cpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/os_cpu/linux_x86/os_linux_x86.cpp Mon Jul 01 14:57:02 2019 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -435,8 +435,12 @@ // Do not crash the VM in such a case. CodeBlob* cb = CodeCache::find_blob_unsafe(pc); CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL; - if (nm != NULL && nm->has_unsafe_access()) { + bool is_unsafe_arraycopy = thread->doing_unsafe_access() && UnsafeCopyMemory::contains_pc(pc); + if ((nm != NULL && nm->has_unsafe_access()) || is_unsafe_arraycopy) { address next_pc = Assembler::locate_next_instruction(pc); + if (is_unsafe_arraycopy) { + next_pc = UnsafeCopyMemory::page_error_continue_pc(pc); + } stub = SharedRuntime::handle_unsafe_access(thread, next_pc); } } @@ -483,10 +487,14 @@ // Determination of interpreter/vtable stub/compiled code null exception stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL); } - } else if (thread->thread_state() == _thread_in_vm && - sig == SIGBUS && /* info->si_code == BUS_OBJERR && */ - thread->doing_unsafe_access()) { + } else if ((thread->thread_state() == _thread_in_vm || + thread->thread_state() == _thread_in_native) && + (sig == SIGBUS && /* info->si_code == BUS_OBJERR && */ + thread->doing_unsafe_access())) { address next_pc = Assembler::locate_next_instruction(pc); + if (UnsafeCopyMemory::contains_pc(pc)) { + next_pc = UnsafeCopyMemory::page_error_continue_pc(pc); + } stub = SharedRuntime::handle_unsafe_access(thread, next_pc); }
--- a/src/hotspot/os_cpu/linux_zero/os_linux_zero.cpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/os_cpu/linux_zero/os_linux_zero.cpp Mon Jul 01 14:57:02 2019 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. * Copyright 2007, 2008, 2009, 2010 Red Hat, Inc. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -207,7 +207,8 @@ /*if (thread->thread_state() == _thread_in_Java) { ShouldNotCallThis(); } - else*/ if (thread->thread_state() == _thread_in_vm && + else*/ if ((thread->thread_state() == _thread_in_vm || + thread->thread_state() == _thread_in_native) && sig == SIGBUS && thread->doing_unsafe_access()) { ShouldNotCallThis(); }
--- a/src/hotspot/os_cpu/solaris_sparc/os_solaris_sparc.cpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/os_cpu/solaris_sparc/os_solaris_sparc.cpp Mon Jul 01 14:57:02 2019 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -436,8 +436,12 @@ } - if (thread->thread_state() == _thread_in_vm) { + if (thread->thread_state() == _thread_in_vm || + thread->thread_state() == _thread_in_native) { if (sig == SIGBUS && thread->doing_unsafe_access()) { + if (UnsafeCopyMemory::contains_pc(pc)) { + npc = UnsafeCopyMemory::page_error_continue_pc(pc); + } stub = SharedRuntime::handle_unsafe_access(thread, npc); } } @@ -476,7 +480,11 @@ // Do not crash the VM in such a case. CodeBlob* cb = CodeCache::find_blob_unsafe(pc); CompiledMethod* nm = cb->as_compiled_method_or_null(); - if (nm != NULL && nm->has_unsafe_access()) { + bool is_unsafe_arraycopy = (thread->doing_unsafe_access() && UnsafeCopyMemory::contains_pc(pc)); + if ((nm != NULL && nm->has_unsafe_access()) || is_unsafe_arraycopy) { + if (is_unsafe_arraycopy) { + npc = UnsafeCopyMemory::page_error_continue_pc(pc); + } stub = SharedRuntime::handle_unsafe_access(thread, npc); } }
--- a/src/hotspot/os_cpu/solaris_x86/os_solaris_x86.cpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/os_cpu/solaris_x86/os_solaris_x86.cpp Mon Jul 01 14:57:02 2019 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -517,9 +517,13 @@ stub = VM_Version::cpuinfo_cont_addr(); } - if (thread->thread_state() == _thread_in_vm) { + if (thread->thread_state() == _thread_in_vm || + thread->thread_state() == _thread_in_native) { if (sig == SIGBUS && info->si_code == BUS_OBJERR && thread->doing_unsafe_access()) { address next_pc = Assembler::locate_next_instruction(pc); + if (UnsafeCopyMemory::contains_pc(pc)) { + next_pc = UnsafeCopyMemory::page_error_continue_pc(pc); + } stub = SharedRuntime::handle_unsafe_access(thread, next_pc); } } @@ -536,8 +540,12 @@ CodeBlob* cb = CodeCache::find_blob_unsafe(pc); if (cb != NULL) { CompiledMethod* nm = cb->as_compiled_method_or_null(); - if (nm != NULL && nm->has_unsafe_access()) { + bool is_unsafe_arraycopy = thread->doing_unsafe_access() && UnsafeCopyMemory::contains_pc(pc); + if ((nm != NULL && nm->has_unsafe_access()) || is_unsafe_arraycopy)) { address next_pc = Assembler::locate_next_instruction(pc); + if (is_unsafe_arraycopy) { + next_pc = UnsafeCopyMemory::page_error_continue_pc(pc); + } stub = SharedRuntime::handle_unsafe_access(thread, next_pc); } }
--- a/src/hotspot/share/aot/aotCodeHeap.cpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/share/aot/aotCodeHeap.cpp Mon Jul 01 14:57:02 2019 -0700 @@ -38,7 +38,6 @@ #include "memory/universe.hpp" #include "oops/compressedOops.hpp" #include "oops/method.inline.hpp" -#include "runtime/deoptimization.hpp" #include "runtime/handles.inline.hpp" #include "runtime/os.hpp" #include "runtime/safepointVerifiers.hpp" @@ -734,7 +733,8 @@ } } if (marked > 0) { - Deoptimization::deoptimize_all_marked(); + VM_Deoptimize op; + VMThread::execute(&op); } }
--- a/src/hotspot/share/aot/aotCompiledMethod.cpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/share/aot/aotCompiledMethod.cpp Mon Jul 01 14:57:02 2019 -0700 @@ -165,7 +165,7 @@ { // Enter critical section. Does not block for safepoint. - MutexLocker pl(CompiledMethod_lock, Mutex::_no_safepoint_check_flag); + MutexLocker pl(Patching_lock, Mutex::_no_safepoint_check_flag); if (*_state_adr == new_state) { // another thread already performed this transition so nothing @@ -188,10 +188,12 @@ #endif // Remove AOTCompiledMethod from method. - if (method() != NULL) { - method()->unlink_code(this); + if (method() != NULL && (method()->code() == this || + method()->from_compiled_entry() == verified_entry_point())) { + HandleMark hm; + method()->clear_code(false /* already owns Patching_lock */); } - } // leave critical region under CompiledMethod_lock + } // leave critical region under Patching_lock if (TraceCreateZombies) { @@ -214,7 +216,7 @@ { // Enter critical section. Does not block for safepoint. - MutexLocker pl(CompiledMethod_lock, Mutex::_no_safepoint_check_flag); + MutexLocker pl(Patching_lock, Mutex::_no_safepoint_check_flag); if (*_state_adr == in_use) { // another thread already performed this transition so nothing @@ -228,7 +230,7 @@ // Log the transition once log_state_change(); - } // leave critical region under CompiledMethod_lock + } // leave critical region under Patching_lock if (TraceCreateZombies) {
--- a/src/hotspot/share/aot/aotCompiledMethod.hpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/share/aot/aotCompiledMethod.hpp Mon Jul 01 14:57:02 2019 -0700 @@ -176,7 +176,6 @@ state() == not_used; } virtual bool is_alive() const { return _is_alive(); } virtual bool is_in_use() const { return state() == in_use; } - virtual bool is_not_installed() const { return state() == not_installed; } virtual bool is_unloading() { return false; }
--- a/src/hotspot/share/ci/ciMethodData.cpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/share/ci/ciMethodData.cpp Mon Jul 01 14:57:02 2019 -0700 @@ -81,13 +81,13 @@ // Check for entries that reference an unloaded method class PrepareExtraDataClosure : public CleanExtraDataClosure { MethodData* _mdo; - uint64_t _safepoint_counter; + SafepointStateTracker _safepoint_tracker; GrowableArray<Method*> _uncached_methods; public: PrepareExtraDataClosure(MethodData* mdo) : _mdo(mdo), - _safepoint_counter(SafepointSynchronize::safepoint_counter()), + _safepoint_tracker(SafepointSynchronize::safepoint_state_tracker()), _uncached_methods() { } @@ -103,7 +103,7 @@ } bool has_safepointed() { - return SafepointSynchronize::safepoint_counter() != _safepoint_counter; + return _safepoint_tracker.safepoint_state_changed(); } bool finish() {
--- a/src/hotspot/share/classfile/classLoader.cpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/share/classfile/classLoader.cpp Mon Jul 01 14:57:02 2019 -0700 @@ -292,11 +292,13 @@ return NULL; } -ClassPathZipEntry::ClassPathZipEntry(jzfile* zip, const char* zip_name, bool is_boot_append) : ClassPathEntry() { +ClassPathZipEntry::ClassPathZipEntry(jzfile* zip, const char* zip_name, + bool is_boot_append, bool from_class_path_attr) : ClassPathEntry() { _zip = zip; char *copy = NEW_C_HEAP_ARRAY(char, strlen(zip_name)+1, mtClass); strcpy(copy, zip_name); _zip_name = copy; + _from_class_path_attr = from_class_path_attr; } ClassPathZipEntry::~ClassPathZipEntry() { @@ -577,7 +579,7 @@ strncpy(path, &class_path[start], end - start); path[end - start] = '\0'; - update_class_path_entry_list(path, false, false); + update_class_path_entry_list(path, false, false, false); while (class_path[end] == os::path_separator()[0]) { end++; @@ -612,7 +614,7 @@ // File or directory found ClassPathEntry* new_entry = NULL; new_entry = create_class_path_entry(path, &st, true /* throw_exception */, - false /*is_boot_append */, CHECK); + false /*is_boot_append */, false /* from_class_path_attr */, CHECK); if (new_entry == NULL) { return; } @@ -668,7 +670,7 @@ struct stat st; if (os::stat(path, &st) == 0) { // File or directory found - ClassPathEntry* new_entry = create_class_path_entry(path, &st, false, false, CHECK); + ClassPathEntry* new_entry = create_class_path_entry(path, &st, false, false, false, CHECK); // If the path specification is valid, enter it into this module's list if (new_entry != NULL) { module_cpl->add_to_list(new_entry); @@ -737,7 +739,7 @@ struct stat st; if (os::stat(path, &st) == 0) { // Directory found - ClassPathEntry* new_entry = create_class_path_entry(path, &st, false, false, CHECK); + ClassPathEntry* new_entry = create_class_path_entry(path, &st, false, false, false, CHECK); // Check for a jimage if (Arguments::has_jimage()) { @@ -754,7 +756,7 @@ } else { // Every entry on the system boot class path after the initial base piece, // which is set by os::set_boot_path(), is considered an appended entry. - update_class_path_entry_list(path, false, true); + update_class_path_entry_list(path, false, true, false); } while (class_path[end] == os::path_separator()[0]) { @@ -782,7 +784,7 @@ struct stat st; if (os::stat(path, &st) == 0) { // Directory found - ClassPathEntry* new_entry = create_class_path_entry(path, &st, false, false, CHECK); + ClassPathEntry* new_entry = create_class_path_entry(path, &st, false, false, false, CHECK); // If the path specification is valid, enter it into this module's list. // There is no need to check for duplicate modules in the exploded entry list, @@ -802,7 +804,9 @@ ClassPathEntry* ClassLoader::create_class_path_entry(const char *path, const struct stat* st, bool throw_exception, - bool is_boot_append, TRAPS) { + bool is_boot_append, + bool from_class_path_attr, + TRAPS) { JavaThread* thread = JavaThread::current(); ClassPathEntry* new_entry = NULL; if ((st->st_mode & S_IFMT) == S_IFREG) { @@ -832,7 +836,7 @@ zip = (*ZipOpen)(canonical_path, &error_msg); } if (zip != NULL && error_msg == NULL) { - new_entry = new ClassPathZipEntry(zip, path, is_boot_append); + new_entry = new ClassPathZipEntry(zip, path, is_boot_append, from_class_path_attr); } else { char *msg; if (error_msg == NULL) { @@ -882,7 +886,7 @@ } if (zip != NULL && error_msg == NULL) { // create using canonical path - return new ClassPathZipEntry(zip, canonical_path, is_boot_append); + return new ClassPathZipEntry(zip, canonical_path, is_boot_append, false); } } } @@ -956,13 +960,14 @@ bool ClassLoader::update_class_path_entry_list(const char *path, bool check_for_duplicates, bool is_boot_append, + bool from_class_path_attr, bool throw_exception) { struct stat st; if (os::stat(path, &st) == 0) { // File or directory found ClassPathEntry* new_entry = NULL; Thread* THREAD = Thread::current(); - new_entry = create_class_path_entry(path, &st, throw_exception, is_boot_append, CHECK_(false)); + new_entry = create_class_path_entry(path, &st, throw_exception, is_boot_append, from_class_path_attr, CHECK_(false)); if (new_entry == NULL) { return false; }
--- a/src/hotspot/share/classfile/classLoader.hpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/share/classfile/classLoader.hpp Mon Jul 01 14:57:02 2019 -0700 @@ -53,6 +53,8 @@ void set_next(ClassPathEntry* next); virtual bool is_modules_image() const = 0; virtual bool is_jar_file() const = 0; + // Is this entry created from the "Class-path" attribute from a JAR Manifest? + virtual bool from_class_path_attr() const = 0; virtual const char* name() const = 0; virtual JImageFile* jimage() const = 0; virtual void close_jimage() = 0; @@ -73,6 +75,7 @@ public: bool is_modules_image() const { return false; } bool is_jar_file() const { return false; } + bool from_class_path_attr() const { return false; } const char* name() const { return _dir; } JImageFile* jimage() const { return NULL; } void close_jimage() {} @@ -99,13 +102,15 @@ private: jzfile* _zip; // The zip archive const char* _zip_name; // Name of zip archive + bool _from_class_path_attr; // From the "Class-path" attribute of a jar file public: bool is_modules_image() const { return false; } bool is_jar_file() const { return true; } + bool from_class_path_attr() const { return _from_class_path_attr; } const char* name() const { return _zip_name; } JImageFile* jimage() const { return NULL; } void close_jimage() {} - ClassPathZipEntry(jzfile* zip, const char* zip_name, bool is_boot_append); + ClassPathZipEntry(jzfile* zip, const char* zip_name, bool is_boot_append, bool from_class_path_attr); virtual ~ClassPathZipEntry(); u1* open_entry(const char* name, jint* filesize, bool nul_terminate, TRAPS); ClassFileStream* open_stream(const char* name, TRAPS); @@ -122,6 +127,7 @@ public: bool is_modules_image() const; bool is_jar_file() const { return false; } + bool from_class_path_attr() const { return false; } bool is_open() const { return _jimage != NULL; } const char* name() const { return _name == NULL ? "" : _name; } JImageFile* jimage() const { return _jimage; } @@ -257,7 +263,8 @@ public: static ClassPathEntry* create_class_path_entry(const char *path, const struct stat* st, bool throw_exception, - bool is_boot_append, TRAPS); + bool is_boot_append, + bool from_class_path_attr, TRAPS); // If the package for the fully qualified class name is in the boot // loader's package entry table then add_package() sets the classpath_index @@ -281,6 +288,7 @@ static bool update_class_path_entry_list(const char *path, bool check_for_duplicates, bool is_boot_append, + bool from_class_path_attr, bool throw_exception=true); CDS_ONLY(static void update_module_path_entry_list(const char *path, TRAPS);) static void print_bootclasspath();
--- a/src/hotspot/share/classfile/classLoaderExt.cpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/share/classfile/classLoaderExt.cpp Mon Jul 01 14:57:02 2019 -0700 @@ -213,7 +213,7 @@ int n = os::snprintf(libname, libname_len + 1, "%.*s%s", dir_len, dir_name, file_start); assert((size_t)n == libname_len, "Unexpected number of characters in string"); trace_class_path("library = ", libname); - ClassLoader::update_class_path_entry_list(libname, true, false); + ClassLoader::update_class_path_entry_list(libname, true, false, true /* from_class_path_attr */); } file_start = file_end; @@ -339,7 +339,7 @@ } ClassPathEntry* new_entry = NULL; - new_entry = create_class_path_entry(path, &st, false, false, CHECK_NULL); + new_entry = create_class_path_entry(path, &st, false, false, false, CHECK_NULL); if (new_entry == NULL) { return NULL; }
--- a/src/hotspot/share/classfile/sharedPathsMiscInfo.cpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/share/classfile/sharedPathsMiscInfo.cpp Mon Jul 01 14:57:02 2019 -0700 @@ -153,83 +153,10 @@ return true; } -char* skip_first_path_entry(const char* path) { - size_t path_sep_len = strlen(os::path_separator()); - char* p = strstr((char*)path, os::path_separator()); - if (p != NULL) { - debug_only( { - size_t image_name_len = strlen(MODULES_IMAGE_NAME); - assert(strncmp(p - image_name_len, MODULES_IMAGE_NAME, image_name_len) == 0, - "first entry must be the modules image"); - } ); - p += path_sep_len; - } else { - debug_only( { - assert(ClassLoader::string_ends_with(path, MODULES_IMAGE_NAME), - "first entry must be the modules image"); - } ); - } - return p; -} - bool SharedPathsMiscInfo::check(jint type, const char* path, bool is_static) { assert(UseSharedSpaces, "runtime only"); switch (type) { case BOOT_PATH: - { - // - // - Archive contains boot classes only - relaxed boot path check: - // Extra path elements appended to the boot path at runtime are allowed. - // - // - Archive contains application or platform classes - strict boot path check: - // Validate the entire runtime boot path, which must be compactible - // with the dump time boot path. Appending boot path at runtime is not - // allowed. - // - - // The first entry in boot path is the modules_image (guaranteed by - // ClassLoader::setup_boot_search_path()). Skip the first entry. The - // path of the runtime modules_image may be different from the dump - // time path (e.g. the JDK image is copied to a different location - // after generating the shared archive), which is acceptable. For most - // common cases, the dump time boot path might contain modules_image only. - char* runtime_boot_path = Arguments::get_sysclasspath(); - char* rp = skip_first_path_entry(runtime_boot_path); - char* dp = skip_first_path_entry(path); - - bool relaxed_check = is_static ? - !FileMapInfo::current_info()->header()->has_platform_or_app_classes() : - !FileMapInfo::dynamic_info()->header()->has_platform_or_app_classes(); - if (dp == NULL && rp == NULL) { - break; // ok, both runtime and dump time boot paths have modules_images only - } else if (dp == NULL && rp != NULL && relaxed_check) { - break; // ok, relaxed check, runtime has extra boot append path entries - } else if (dp != NULL && rp != NULL) { - size_t num; - size_t dp_len = strlen(dp); - size_t rp_len = strlen(rp); - if (rp_len >= dp_len) { - if (relaxed_check) { - // only check the leading entries in the runtime boot path, up to - // the length of the dump time boot path - num = dp_len; - } else { - // check the full runtime boot path, must match with dump time - num = rp_len; - } - - if (os::file_name_strncmp(dp, rp, num) == 0) { - // make sure it is the end of an entry in the runtime boot path - if (rp[dp_len] == '\0' || rp[dp_len] == os::path_separator()[0]) { - break; // ok, runtime and dump time paths match - } - } - } - } - - // The paths are different - return fail("[BOOT classpath mismatch, actual =", runtime_boot_path); - } break; case NON_EXIST: { @@ -242,22 +169,6 @@ } break; case APP_PATH: - { - size_t len = strlen(path); - const char *appcp = Arguments::get_appclasspath(); - assert(appcp != NULL, "NULL app classpath"); - size_t appcp_len = strlen(appcp); - if (appcp_len < len) { - return fail("Run time APP classpath is shorter than the one at dump time: ", appcp); - } - // Prefix is OK: E.g., dump with -cp foo.jar, but run with -cp foo.jar:bar.jar. - if (os::file_name_strncmp(path, appcp, len) != 0) { - return fail("[APP classpath mismatch, actual: -Djava.class.path=", appcp); - } - if (appcp[len] != '\0' && appcp[len] != os::path_separator()[0]) { - return fail("Dump time APP classpath is not a proper prefix of run time APP classpath: ", appcp); - } - } break; default: return fail("Corrupted archive file header");
--- a/src/hotspot/share/classfile/stringTable.cpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/share/classfile/stringTable.cpp Mon Jul 01 14:57:02 2019 -0700 @@ -79,8 +79,7 @@ // -------------------------------------------------------------------------- -typedef ConcurrentHashTable<WeakHandle<vm_string_table_data>, - StringTableConfig, mtSymbol> StringTableHash; +typedef ConcurrentHashTable<StringTableConfig, mtSymbol> StringTableHash; static StringTableHash* _local_table = NULL; volatile bool StringTable::_has_work = false; @@ -101,11 +100,12 @@ java_lang_String::hash_code(s, len); } -class StringTableConfig : public StringTableHash::BaseConfig { +class StringTableConfig : public StackObj { private: public: - static uintx get_hash(WeakHandle<vm_string_table_data> const& value, - bool* is_dead) { + typedef WeakHandle<vm_string_table_data> Value; + + static uintx get_hash(Value const& value, bool* is_dead) { EXCEPTION_MARK; oop val_oop = value.peek(); if (val_oop == NULL) { @@ -124,15 +124,13 @@ return 0; } // We use default allocation/deallocation but counted - static void* allocate_node(size_t size, - WeakHandle<vm_string_table_data> const& value) { + static void* allocate_node(size_t size, Value const& value) { StringTable::item_added(); - return StringTableHash::BaseConfig::allocate_node(size, value); + return AllocateHeap(size, mtSymbol); } - static void free_node(void* memory, - WeakHandle<vm_string_table_data> const& value) { + static void free_node(void* memory, Value const& value) { value.release(); - StringTableHash::BaseConfig::free_node(memory, value); + FreeHeap(memory); StringTable::item_removed(); } };
--- a/src/hotspot/share/classfile/symbolTable.cpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/share/classfile/symbolTable.cpp Mon Jul 01 14:57:02 2019 -0700 @@ -77,8 +77,7 @@ // -------------------------------------------------------------------------- -typedef ConcurrentHashTable<Symbol*, - SymbolTableConfig, mtSymbol> SymbolTableHash; +typedef ConcurrentHashTable<SymbolTableConfig, mtSymbol> SymbolTableHash; static SymbolTableHash* _local_table = NULL; volatile bool SymbolTable::_has_work = 0; @@ -121,10 +120,12 @@ } #endif -class SymbolTableConfig : public SymbolTableHash::BaseConfig { +class SymbolTableConfig : public AllStatic { private: public: - static uintx get_hash(Symbol* const& value, bool* is_dead) { + typedef Symbol* Value; // value of the Node in the hashtable + + static uintx get_hash(Value const& value, bool* is_dead) { *is_dead = (value->refcount() == 0); if (*is_dead) { return 0; @@ -133,11 +134,11 @@ } } // We use default allocation/deallocation but counted - static void* allocate_node(size_t size, Symbol* const& value) { + static void* allocate_node(size_t size, Value const& value) { SymbolTable::item_added(); - return SymbolTableHash::BaseConfig::allocate_node(size, value); + return AllocateHeap(size, mtSymbol); } - static void free_node(void* memory, Symbol* const& value) { + static void free_node(void* memory, Value const& value) { // We get here because #1 some threads lost a race to insert a newly created Symbol // or #2 we're cleaning up unused symbol. // If #1, then the symbol can be either permanent (refcount==PERM_REFCOUNT), @@ -150,7 +151,7 @@ assert(value->refcount() == 0, "expected dead symbol"); } SymbolTable::delete_symbol(value); - SymbolTableHash::BaseConfig::free_node(memory, value); + FreeHeap(memory); SymbolTable::item_removed(); } };
--- a/src/hotspot/share/classfile/systemDictionary.cpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/share/classfile/systemDictionary.cpp Mon Jul 01 14:57:02 2019 -0700 @@ -2142,7 +2142,7 @@ // See whether biased locking is enabled and if so set it for this // klass. // Note that this must be done past the last potential blocking - // point / safepoint. We enable biased locking lazily using a + // point / safepoint. We might enable biased locking lazily using a // VM_Operation to iterate the SystemDictionary and installing the // biasable mark word into each InstanceKlass's prototype header. // To avoid race conditions where we accidentally miss enabling the
--- a/src/hotspot/share/code/codeCache.cpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/share/code/codeCache.cpp Mon Jul 01 14:57:02 2019 -0700 @@ -1142,25 +1142,28 @@ // At least one nmethod has been marked for deoptimization - Deoptimization::deoptimize_all_marked(); + // All this already happens inside a VM_Operation, so we'll do all the work here. + // Stuff copied from VM_Deoptimize and modified slightly. + + // We do not want any GCs to happen while we are in the middle of this VM operation + ResourceMark rm; + DeoptimizationMarker dm; + + // Deoptimize all activations depending on marked nmethods + Deoptimization::deoptimize_dependents(); + + // Make the dependent methods not entrant + make_marked_nmethods_not_entrant(); } #endif // INCLUDE_JVMTI -// Mark methods for deopt (if safe or possible). +// Deoptimize all methods void CodeCache::mark_all_nmethods_for_deoptimization() { MutexLocker mu(CodeCache_lock, Mutex::_no_safepoint_check_flag); CompiledMethodIterator iter(CompiledMethodIterator::only_alive_and_not_unloading); while(iter.next()) { CompiledMethod* nm = iter.method(); - if (!nm->method()->is_method_handle_intrinsic() && - !nm->is_not_installed() && - nm->is_in_use() && - !nm->is_native_method()) { - // Intrinsics and native methods are never deopted. A method that is - // not installed yet or is not in use is not safe to deopt; the - // is_in_use() check covers the not_entrant and not zombie cases. - // Note: A not_entrant method can become a zombie at anytime if it was - // made not_entrant before the previous safepoint/handshake. + if (!nm->method()->is_method_handle_intrinsic()) { nm->mark_for_deoptimization(); } } @@ -1188,12 +1191,7 @@ CompiledMethodIterator iter(CompiledMethodIterator::only_alive_and_not_unloading); while(iter.next()) { CompiledMethod* nm = iter.method(); - if (nm->is_marked_for_deoptimization() && nm->is_in_use()) { - // only_alive_and_not_unloading() can return not_entrant nmethods. - // A not_entrant method can become a zombie at anytime if it was - // made not_entrant before the previous safepoint/handshake. The - // is_in_use() check covers the not_entrant and not zombie cases - // that have become true after the method was marked for deopt. + if (nm->is_marked_for_deoptimization() && !nm->is_not_entrant()) { nm->make_not_entrant(); } } @@ -1205,12 +1203,17 @@ if (number_of_nmethods_with_dependencies() == 0) return; + // CodeCache can only be updated by a thread_in_VM and they will all be + // stopped during the safepoint so CodeCache will be safe to update without + // holding the CodeCache_lock. + KlassDepChange changes(dependee); // Compute the dependent nmethods if (mark_for_deoptimization(changes) > 0) { // At least one nmethod has been marked for deoptimization - Deoptimization::deoptimize_all_marked(); + VM_Deoptimize op; + VMThread::execute(&op); } } @@ -1219,9 +1222,26 @@ // --- Compile_lock is not held. However we are at a safepoint. assert_locked_or_safepoint(Compile_lock); + // CodeCache can only be updated by a thread_in_VM and they will all be + // stopped dring the safepoint so CodeCache will be safe to update without + // holding the CodeCache_lock. + // Compute the dependent nmethods if (mark_for_deoptimization(m_h()) > 0) { - Deoptimization::deoptimize_all_marked(); + // At least one nmethod has been marked for deoptimization + + // All this already happens inside a VM_Operation, so we'll do all the work here. + // Stuff copied from VM_Deoptimize and modified slightly. + + // We do not want any GCs to happen while we are in the middle of this VM operation + ResourceMark rm; + DeoptimizationMarker dm; + + // Deoptimize all activations depending on marked nmethods + Deoptimization::deoptimize_dependents(); + + // Make the dependent methods not entrant + make_marked_nmethods_not_entrant(); } }
--- a/src/hotspot/share/code/compiledMethod.hpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/share/code/compiledMethod.hpp Mon Jul 01 14:57:02 2019 -0700 @@ -214,7 +214,6 @@ }; virtual bool is_in_use() const = 0; - virtual bool is_not_installed() const = 0; virtual int comp_level() const = 0; virtual int compile_id() const = 0;
--- a/src/hotspot/share/code/dependencyContext.hpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/share/code/dependencyContext.hpp Mon Jul 01 14:57:02 2019 -0700 @@ -99,15 +99,15 @@ // Safepoints are forbidden during DC lifetime. GC can invalidate // _dependency_context_addr if it relocates the holder // (e.g. CallSiteContext Java object). - uint64_t _safepoint_counter; + SafepointStateTracker _safepoint_tracker; DependencyContext(nmethodBucket* volatile* bucket_addr, volatile uint64_t* last_cleanup_addr) : _dependency_context_addr(bucket_addr), _last_cleanup_addr(last_cleanup_addr), - _safepoint_counter(SafepointSynchronize::safepoint_counter()) {} + _safepoint_tracker(SafepointSynchronize::safepoint_state_tracker()) {} ~DependencyContext() { - assert(SafepointSynchronize::is_same_safepoint(_safepoint_counter), "must be the same safepoint"); + assert(!_safepoint_tracker.safepoint_state_changed(), "must be the same safepoint"); } #else DependencyContext(nmethodBucket* volatile* bucket_addr, volatile uint64_t* last_cleanup_addr)
--- a/src/hotspot/share/code/nmethod.cpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/share/code/nmethod.cpp Mon Jul 01 14:57:02 2019 -0700 @@ -50,7 +50,6 @@ #include "oops/oop.inline.hpp" #include "prims/jvmtiImpl.hpp" #include "runtime/atomic.hpp" -#include "runtime/deoptimization.hpp" #include "runtime/flags/flagSetting.hpp" #include "runtime/frame.inline.hpp" #include "runtime/handles.inline.hpp" @@ -1178,7 +1177,11 @@ // have the Method* live here, in case we unload the nmethod because // it is pointing to some oop (other than the Method*) being unloaded. if (_method != NULL) { - _method->unlink_code(this); + // OSR methods point to the Method*, but the Method* does not + // point back! + if (_method->code() == this) { + _method->clear_code(); // Break a cycle + } } // Make the class unloaded - i.e., change state and notify sweeper @@ -1260,9 +1263,16 @@ } } -void nmethod::unlink_from_method() { - if (method() != NULL) { - method()->unlink_code(this); +void nmethod::unlink_from_method(bool acquire_lock) { + // We need to check if both the _code and _from_compiled_code_entry_point + // refer to this nmethod because there is a race in setting these two fields + // in Method* as seen in bugid 4947125. + // If the vep() points to the zombie nmethod, the memory for the nmethod + // could be flushed and the compiler and vtable stubs could still call + // through it. + if (method() != NULL && (method()->code() == this || + method()->from_compiled_entry() == verified_entry_point())) { + method()->clear_code(acquire_lock); } } @@ -1289,24 +1299,24 @@ // during patching, depending on the nmethod state we must notify the GC that // code has been unloaded, unregistering it. We cannot do this right while - // holding the CompiledMethod_lock because we need to use the CodeCache_lock. This + // holding the Patching_lock because we need to use the CodeCache_lock. This // would be prone to deadlocks. // This flag is used to remember whether we need to later lock and unregister. bool nmethod_needs_unregister = false; - // invalidate osr nmethod before acquiring the patching lock since - // they both acquire leaf locks and we don't want a deadlock. - // This logic is equivalent to the logic below for patching the - // verified entry point of regular methods. We check that the - // nmethod is in use to ensure that it is invalidated only once. - if (is_osr_method() && is_in_use()) { - // this effectively makes the osr nmethod not entrant - invalidate_osr_method(); - } - { + // invalidate osr nmethod before acquiring the patching lock since + // they both acquire leaf locks and we don't want a deadlock. + // This logic is equivalent to the logic below for patching the + // verified entry point of regular methods. We check that the + // nmethod is in use to ensure that it is invalidated only once. + if (is_osr_method() && is_in_use()) { + // this effectively makes the osr nmethod not entrant + invalidate_osr_method(); + } + // Enter critical section. Does not block for safepoint. - MutexLocker pl(CompiledMethod_lock, Mutex::_no_safepoint_check_flag); + MutexLocker pl(Patching_lock, Mutex::_no_safepoint_check_flag); if (_state == state) { // another thread already performed this transition so nothing @@ -1350,9 +1360,8 @@ log_state_change(); // Remove nmethod from method. - unlink_from_method(); - - } // leave critical region under CompiledMethod_lock + unlink_from_method(false /* already owns Patching_lock */); + } // leave critical region under Patching_lock #if INCLUDE_JVMCI // Invalidate can't occur while holding the Patching lock
--- a/src/hotspot/share/code/nmethod.hpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/share/code/nmethod.hpp Mon Jul 01 14:57:02 2019 -0700 @@ -119,7 +119,7 @@ // used by jvmti to track if an unload event has been posted for this nmethod. bool _unload_reported; - // Protected by CompiledMethod_lock + // Protected by Patching_lock volatile signed char _state; // {not_installed, in_use, not_entrant, zombie, unloaded} #ifdef ASSERT @@ -387,7 +387,7 @@ int comp_level() const { return _comp_level; } - void unlink_from_method(); + void unlink_from_method(bool acquire_lock); // Support for oops in scopes and relocs: // Note: index 0 is reserved for null.
--- a/src/hotspot/share/gc/cms/cmsHeap.cpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/share/gc/cms/cmsHeap.cpp Mon Jul 01 14:57:02 2019 -0700 @@ -62,7 +62,7 @@ } size_t used_in_bytes() { - return _space->used(); + return _space->used_stable(); } };
--- a/src/hotspot/share/gc/cms/compactibleFreeListSpace.cpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/share/gc/cms/compactibleFreeListSpace.cpp Mon Jul 01 14:57:02 2019 -0700 @@ -372,6 +372,8 @@ ) } _dictionary->set_par_lock(&_parDictionaryAllocLock); + + _used_stable = 0; } // Like CompactibleSpace forward() but always calls cross_threshold() to @@ -577,6 +579,14 @@ return capacity() - free(); } +size_t CompactibleFreeListSpace::used_stable() const { + return _used_stable; +} + +void CompactibleFreeListSpace::recalculate_used_stable() { + _used_stable = used(); +} + size_t CompactibleFreeListSpace::free() const { // "MT-safe, but not MT-precise"(TM), if you will: i.e. // if you do this while the structures are in flux you @@ -1374,6 +1384,9 @@ debug_only(fc->mangleAllocated(size)); } + // After allocation, recalculate used space and update used_stable + recalculate_used_stable(); + return res; }
--- a/src/hotspot/share/gc/cms/compactibleFreeListSpace.hpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/share/gc/cms/compactibleFreeListSpace.hpp Mon Jul 01 14:57:02 2019 -0700 @@ -192,6 +192,9 @@ // Used to keep track of limit of sweep for the space HeapWord* _sweep_limit; + // Stable value of used(). + size_t _used_stable; + // Used to make the young collector update the mod union table MemRegionClosure* _preconsumptionDirtyCardClosure; @@ -412,6 +415,17 @@ // which overestimates the region by returning the entire // committed region (this is safe, but inefficient). + // Returns monotonically increasing stable used space bytes for CMS. + // This is required for jstat and other memory monitoring tools + // that might otherwise see inconsistent used space values during a garbage + // collection, promotion or allocation into compactibleFreeListSpace. + // The value returned by this function might be smaller than the + // actual value. + size_t used_stable() const; + // Recalculate and cache the current stable used() value. Only to be called + // in places where we can be sure that the result is stable. + void recalculate_used_stable(); + // Returns a subregion of the space containing all the objects in // the space. MemRegion used_region() const {
--- a/src/hotspot/share/gc/cms/concurrentMarkSweepGeneration.cpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/share/gc/cms/concurrentMarkSweepGeneration.cpp Mon Jul 01 14:57:02 2019 -0700 @@ -692,6 +692,10 @@ return _cmsSpace->max_alloc_in_words() * HeapWordSize; } +size_t ConcurrentMarkSweepGeneration::used_stable() const { + return cmsSpace()->used_stable(); +} + size_t ConcurrentMarkSweepGeneration::max_available() const { return free() + _virtual_space.uncommitted_size(); } @@ -1523,6 +1527,8 @@ FreelistLocker z(this); MetaspaceGC::compute_new_size(); _cmsGen->compute_new_size_free_list(); + // recalculate CMS used space after CMS collection + _cmsGen->cmsSpace()->recalculate_used_stable(); } // A work method used by the foreground collector to do @@ -2051,6 +2057,7 @@ _capacity_at_prologue = capacity(); _used_at_prologue = used(); + _cmsSpace->recalculate_used_stable(); // We enable promotion tracking so that card-scanning can recognize // which objects have been promoted during this GC and skip them. @@ -2123,6 +2130,7 @@ _eden_chunk_index = 0; size_t cms_used = _cmsGen->cmsSpace()->used(); + _cmsGen->cmsSpace()->recalculate_used_stable(); // update performance counters - this uses a special version of // update_counters() that allows the utilization to be passed as a @@ -2816,6 +2824,8 @@ rp->enable_discovery(); _collectorState = Marking; } + + _cmsGen->cmsSpace()->recalculate_used_stable(); } void CMSCollector::checkpointRootsInitialWork() { @@ -4177,6 +4187,7 @@ MutexLocker y(bitMapLock(), Mutex::_no_safepoint_check_flag); checkpointRootsFinalWork(); + _cmsGen->cmsSpace()->recalculate_used_stable(); } verify_work_stacks_empty(); verify_overflow_empty(); @@ -4250,7 +4261,6 @@ if (should_unload_classes()) { heap->prune_scavengable_nmethods(); } - JvmtiExport::gc_epilogue(); // If we encountered any (marking stack / work queue) overflow // events during the current CMS cycle, take appropriate @@ -5337,9 +5347,14 @@ // further below. { CMSTokenSyncWithLocks ts(true, _cmsGen->freelistLock()); + // Update heap occupancy information which is used as // input to soft ref clearing policy at the next gc. Universe::update_heap_info_at_gc(); + + // recalculate CMS used space after CMS collection + _cmsGen->cmsSpace()->recalculate_used_stable(); + _collectorState = Resizing; } } @@ -5428,6 +5443,7 @@ // Gather statistics on the young generation collection. collector()->stats().record_gc0_end(used()); } + _cmsSpace->recalculate_used_stable(); } void CMSCollector::sweepWork(ConcurrentMarkSweepGeneration* old_gen) {
--- a/src/hotspot/share/gc/cms/concurrentMarkSweepGeneration.hpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/share/gc/cms/concurrentMarkSweepGeneration.hpp Mon Jul 01 14:57:02 2019 -0700 @@ -1112,6 +1112,7 @@ double occupancy() const { return ((double)used())/((double)capacity()); } size_t contiguous_available() const; size_t unsafe_max_alloc_nogc() const; + size_t used_stable() const; // over-rides MemRegion used_region_at_save_marks() const;
--- a/src/hotspot/share/gc/cms/gSpaceCounters.hpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/share/gc/cms/gSpaceCounters.hpp Mon Jul 01 14:57:02 2019 -0700 @@ -59,7 +59,7 @@ } inline void update_used() { - _used->set_value(_gen->used()); + _used->set_value(_gen->used_stable()); } // special version of update_used() to allow the used value to be @@ -103,7 +103,7 @@ GenerationUsedHelper(Generation* g) : _gen(g) { } inline jlong take_sample() { - return _gen->used(); + return _gen->used_stable(); } };
--- a/src/hotspot/share/gc/g1/g1Analytics.cpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/share/gc/g1/g1Analytics.cpp Mon Jul 01 14:57:02 2019 -0700 @@ -38,7 +38,7 @@ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; -static double cost_per_card_ms_defaults[] = { +static double cost_per_log_buffer_entry_ms_defaults[] = { 0.01, 0.005, 0.005, 0.003, 0.003, 0.002, 0.002, 0.0015 }; @@ -47,7 +47,7 @@ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 }; -static double cost_per_entry_ms_defaults[] = { +static double young_only_cost_per_remset_card_ms_defaults[] = { 0.015, 0.01, 0.01, 0.008, 0.008, 0.0055, 0.0055, 0.005 }; @@ -77,12 +77,12 @@ _alloc_rate_ms_seq(new TruncatedSeq(TruncatedSeqLength)), _prev_collection_pause_end_ms(0.0), _rs_length_diff_seq(new TruncatedSeq(TruncatedSeqLength)), - _cost_per_card_ms_seq(new TruncatedSeq(TruncatedSeqLength)), + _cost_per_log_buffer_entry_ms_seq(new TruncatedSeq(TruncatedSeqLength)), _cost_scan_hcc_seq(new TruncatedSeq(TruncatedSeqLength)), _young_cards_per_entry_ratio_seq(new TruncatedSeq(TruncatedSeqLength)), _mixed_cards_per_entry_ratio_seq(new TruncatedSeq(TruncatedSeqLength)), - _cost_per_entry_ms_seq(new TruncatedSeq(TruncatedSeqLength)), - _mixed_cost_per_entry_ms_seq(new TruncatedSeq(TruncatedSeqLength)), + _young_only_cost_per_remset_card_ms_seq(new TruncatedSeq(TruncatedSeqLength)), + _mixed_cost_per_remset_card_ms_seq(new TruncatedSeq(TruncatedSeqLength)), _cost_per_byte_ms_seq(new TruncatedSeq(TruncatedSeqLength)), _constant_other_time_ms_seq(new TruncatedSeq(TruncatedSeqLength)), _young_other_cost_per_region_ms_seq(new TruncatedSeq(TruncatedSeqLength)), @@ -101,10 +101,10 @@ int index = MIN2(ParallelGCThreads - 1, 7u); _rs_length_diff_seq->add(rs_length_diff_defaults[index]); - _cost_per_card_ms_seq->add(cost_per_card_ms_defaults[index]); + _cost_per_log_buffer_entry_ms_seq->add(cost_per_log_buffer_entry_ms_defaults[index]); _cost_scan_hcc_seq->add(0.0); _young_cards_per_entry_ratio_seq->add(young_cards_per_entry_ratio_defaults[index]); - _cost_per_entry_ms_seq->add(cost_per_entry_ms_defaults[index]); + _young_only_cost_per_remset_card_ms_seq->add(young_only_cost_per_remset_card_ms_defaults[index]); _cost_per_byte_ms_seq->add(cost_per_byte_ms_defaults[index]); _constant_other_time_ms_seq->add(constant_other_time_ms_defaults[index]); _young_other_cost_per_region_ms_seq->add(young_other_cost_per_region_ms_defaults[index]); @@ -158,19 +158,19 @@ (pause_time_ms * _recent_prev_end_times_for_all_gcs_sec->num()) / interval_ms; } -void G1Analytics::report_cost_per_card_ms(double cost_per_card_ms) { - _cost_per_card_ms_seq->add(cost_per_card_ms); +void G1Analytics::report_cost_per_log_buffer_entry_ms(double cost_per_log_buffer_entry_ms) { + _cost_per_log_buffer_entry_ms_seq->add(cost_per_log_buffer_entry_ms); } void G1Analytics::report_cost_scan_hcc(double cost_scan_hcc) { _cost_scan_hcc_seq->add(cost_scan_hcc); } -void G1Analytics::report_cost_per_entry_ms(double cost_per_entry_ms, bool for_young_gc) { +void G1Analytics::report_cost_per_remset_card_ms(double cost_per_remset_card_ms, bool for_young_gc) { if (for_young_gc) { - _cost_per_entry_ms_seq->add(cost_per_entry_ms); + _young_only_cost_per_remset_card_ms_seq->add(cost_per_remset_card_ms); } else { - _mixed_cost_per_entry_ms_seq->add(cost_per_entry_ms); + _mixed_cost_per_remset_card_ms_seq->add(cost_per_remset_card_ms); } } @@ -222,8 +222,8 @@ return get_new_prediction(_alloc_rate_ms_seq); } -double G1Analytics::predict_cost_per_card_ms() const { - return get_new_prediction(_cost_per_card_ms_seq); +double G1Analytics::predict_cost_per_log_buffer_entry_ms() const { + return get_new_prediction(_cost_per_log_buffer_entry_ms_seq); } double G1Analytics::predict_scan_hcc_ms() const { @@ -231,7 +231,7 @@ } double G1Analytics::predict_rs_update_time_ms(size_t pending_cards) const { - return pending_cards * predict_cost_per_card_ms() + predict_scan_hcc_ms(); + return pending_cards * predict_cost_per_log_buffer_entry_ms() + predict_scan_hcc_ms(); } double G1Analytics::predict_young_cards_per_entry_ratio() const { @@ -256,17 +256,17 @@ double G1Analytics::predict_rs_scan_time_ms(size_t card_num, bool for_young_gc) const { if (for_young_gc) { - return card_num * get_new_prediction(_cost_per_entry_ms_seq); + return card_num * get_new_prediction(_young_only_cost_per_remset_card_ms_seq); } else { return predict_mixed_rs_scan_time_ms(card_num); } } double G1Analytics::predict_mixed_rs_scan_time_ms(size_t card_num) const { - if (_mixed_cost_per_entry_ms_seq->num() < 3) { - return card_num * get_new_prediction(_cost_per_entry_ms_seq); + if (_mixed_cost_per_remset_card_ms_seq->num() < 3) { + return card_num * get_new_prediction(_young_only_cost_per_remset_card_ms_seq); } else { - return card_num * get_new_prediction(_mixed_cost_per_entry_ms_seq); + return card_num * get_new_prediction(_mixed_cost_per_remset_card_ms_seq); } }
--- a/src/hotspot/share/gc/g1/g1Analytics.hpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/share/gc/g1/g1Analytics.hpp Mon Jul 01 14:57:02 2019 -0700 @@ -46,12 +46,12 @@ double _prev_collection_pause_end_ms; TruncatedSeq* _rs_length_diff_seq; - TruncatedSeq* _cost_per_card_ms_seq; + TruncatedSeq* _cost_per_log_buffer_entry_ms_seq; TruncatedSeq* _cost_scan_hcc_seq; TruncatedSeq* _young_cards_per_entry_ratio_seq; TruncatedSeq* _mixed_cards_per_entry_ratio_seq; - TruncatedSeq* _cost_per_entry_ms_seq; - TruncatedSeq* _mixed_cost_per_entry_ms_seq; + TruncatedSeq* _young_only_cost_per_remset_card_ms_seq; + TruncatedSeq* _mixed_cost_per_remset_card_ms_seq; TruncatedSeq* _cost_per_byte_ms_seq; TruncatedSeq* _constant_other_time_ms_seq; TruncatedSeq* _young_other_cost_per_region_ms_seq; @@ -99,9 +99,9 @@ void report_concurrent_mark_remark_times_ms(double ms); void report_concurrent_mark_cleanup_times_ms(double ms); void report_alloc_rate_ms(double alloc_rate); - void report_cost_per_card_ms(double cost_per_card_ms); + void report_cost_per_log_buffer_entry_ms(double cost_per_log_buffer_entry_ms); void report_cost_scan_hcc(double cost_scan_hcc); - void report_cost_per_entry_ms(double cost_per_entry_ms, bool for_young_gc); + void report_cost_per_remset_card_ms(double cost_per_remset_card_ms, bool for_young_gc); void report_cards_per_entry_ratio(double cards_per_entry_ratio, bool for_young_gc); void report_rs_length_diff(double rs_length_diff); void report_cost_per_byte_ms(double cost_per_byte_ms, bool mark_or_rebuild_in_progress); @@ -116,7 +116,7 @@ double predict_alloc_rate_ms() const; int num_alloc_rate_ms() const; - double predict_cost_per_card_ms() const; + double predict_cost_per_log_buffer_entry_ms() const; double predict_scan_hcc_ms() const;
--- a/src/hotspot/share/gc/g1/g1CardTable.cpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/share/gc/g1/g1CardTable.cpp Mon Jul 01 14:57:02 2019 -0700 @@ -30,28 +30,6 @@ #include "runtime/atomic.hpp" #include "runtime/orderAccess.hpp" -bool G1CardTable::mark_card_deferred(size_t card_index) { - CardValue val = _byte_map[card_index]; - // It's already processed - if ((val & (clean_card_mask_val() | deferred_card_val())) == deferred_card_val()) { - return false; - } - - // Cached bit can be installed either on a clean card or on a claimed card. - CardValue new_val = val; - if (val == clean_card_val()) { - new_val = deferred_card_val(); - } else { - if (val & claimed_card_val()) { - new_val = val | deferred_card_val(); - } - } - if (new_val != val) { - Atomic::cmpxchg(new_val, &_byte_map[card_index], val); - } - return true; -} - void G1CardTable::g1_mark_as_young(const MemRegion& mr) { CardValue *const first = byte_for(mr.start()); CardValue *const last = byte_after(mr.last());
--- a/src/hotspot/share/gc/g1/g1CardTable.hpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/share/gc/g1/g1CardTable.hpp Mon Jul 01 14:57:02 2019 -0700 @@ -44,55 +44,65 @@ virtual void on_commit(uint start_idx, size_t num_regions, bool zero_filled); }; -class G1CardTable: public CardTable { +class G1CardTable : public CardTable { friend class VMStructs; friend class G1CardTableChangedListener; G1CardTableChangedListener _listener; +public: enum G1CardValues { - g1_young_gen = CT_MR_BS_last_reserved << 1 + g1_young_gen = CT_MR_BS_last_reserved << 1, + + // During evacuation we use the card table to consolidate the cards we need to + // scan for roots onto the card table from the various sources. Further it is + // used to record already completely scanned cards to avoid re-scanning them + // when incrementally evacuating the old gen regions of a collection set. + // This means that already scanned cards should be preserved. + // + // The merge at the start of each evacuation round simply sets cards to dirty + // that are clean; scanned cards are set to 0x1. + // + // This means that the LSB determines what to do with the card during evacuation + // given the following possible values: + // + // 11111111 - clean, do not scan + // 00000001 - already scanned, do not scan + // 00000000 - dirty, needs to be scanned. + // + g1_card_already_scanned = 0x1 }; -public: + static const size_t WordAllClean = SIZE_MAX; + static const size_t WordAllDirty = 0; + + STATIC_ASSERT(BitsPerByte == 8); + static const size_t WordAlreadyScanned = (SIZE_MAX / 255) * g1_card_already_scanned; + G1CardTable(MemRegion whole_heap): CardTable(whole_heap, /* scanned concurrently */ true), _listener() { _listener.set_card_table(this); } - bool is_card_dirty(size_t card_index) { - return _byte_map[card_index] == dirty_card_val(); - } static CardValue g1_young_card_val() { return g1_young_gen; } -/* - Claimed and deferred bits are used together in G1 during the evacuation - pause. These bits can have the following state transitions: - 1. The claimed bit can be put over any other card state. Except that - the "dirty -> dirty and claimed" transition is checked for in - G1 code and is not used. - 2. Deferred bit can be set only if the previous state of the card - was either clean or claimed. mark_card_deferred() is wait-free. - We do not care if the operation is be successful because if - it does not it will only result in duplicate entry in the update - buffer because of the "cache-miss". So it's not worth spinning. - */ - - bool is_card_claimed(size_t card_index) { - CardValue val = _byte_map[card_index]; - return (val & (clean_card_mask_val() | claimed_card_val())) == claimed_card_val(); - } - - inline void set_card_claimed(size_t card_index); - void verify_g1_young_region(MemRegion mr) PRODUCT_RETURN; void g1_mark_as_young(const MemRegion& mr); - bool mark_card_deferred(size_t card_index); + size_t index_for_cardvalue(CardValue const* p) const { + return pointer_delta(p, _byte_map, sizeof(CardValue)); + } + + // Mark the given card as Dirty if it is Clean. + inline void mark_clean_as_dirty(size_t card_index); - bool is_card_deferred(size_t card_index) { - CardValue val = _byte_map[card_index]; - return (val & (clean_card_mask_val() | deferred_card_val())) == deferred_card_val(); - } + // Change Clean cards in a (large) area on the card table as Dirty, preserving + // already scanned cards. Assumes that most cards in that area are Clean. + inline void mark_region_dirty(size_t start_card_index, size_t num_cards); + + // Mark the given range of cards as Scanned. All of these cards must be Dirty. + inline void mark_as_scanned(size_t start_card_index, size_t num_cards); + + inline uint region_idx_for(CardValue* p); static size_t compute_size(size_t mem_region_size_in_words) { size_t number_of_slots = (mem_region_size_in_words / card_size_in_words);
--- a/src/hotspot/share/gc/g1/g1CardTable.inline.hpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/share/gc/g1/g1CardTable.inline.hpp Mon Jul 01 14:57:02 2019 -0700 @@ -26,15 +26,58 @@ #define SHARE_GC_G1_G1CARDTABLE_INLINE_HPP #include "gc/g1/g1CardTable.hpp" +#include "gc/g1/heapRegion.hpp" -void G1CardTable::set_card_claimed(size_t card_index) { - jbyte val = _byte_map[card_index]; - if (val == clean_card_val()) { - val = (jbyte)claimed_card_val(); - } else { - val |= (jbyte)claimed_card_val(); +inline uint G1CardTable::region_idx_for(CardValue* p) { + size_t const card_idx = pointer_delta(p, _byte_map, sizeof(CardValue)); + return (uint)(card_idx >> (HeapRegion::LogOfHRGrainBytes - card_shift)); +} + +inline void G1CardTable::mark_clean_as_dirty(size_t card_index) { + CardValue value = _byte_map[card_index]; + if (value == clean_card_val()) { + _byte_map[card_index] = dirty_card_val(); } - _byte_map[card_index] = val; } -#endif // SHARE_GC_G1_G1CARDTABLE_INLINE_HPP +inline void G1CardTable::mark_region_dirty(size_t start_card_index, size_t num_cards) { + assert(is_aligned(start_card_index, sizeof(size_t)), "Start card index must be aligned."); + assert(is_aligned(num_cards, sizeof(size_t)), "Number of cards to change must be evenly divisible."); + + size_t const num_chunks = num_cards / sizeof(size_t); + + size_t* cur_word = (size_t*)&_byte_map[start_card_index]; + size_t* const end_word_map = cur_word + num_chunks; + while (cur_word < end_word_map) { + size_t value = *cur_word; + if (value == WordAllClean) { + *cur_word = WordAllDirty; + } else if (value == WordAllDirty) { + // do nothing. + } else { + // There is a mix of cards in there. Tread slowly. + CardValue* cur = (CardValue*)cur_word; + for (size_t i = 0; i < sizeof(size_t); i++) { + CardValue value = *cur; + if (value == clean_card_val()) { + *cur = dirty_card_val(); + } + cur++; + } + } + cur_word++; + } +} + +inline void G1CardTable::mark_as_scanned(size_t start_card_index, size_t num_cards) { + CardValue* start = &_byte_map[start_card_index]; + CardValue* const end = start + num_cards; + while (start < end) { + CardValue value = *start; + assert(value == dirty_card_val(), + "Must have been dirty %d start " PTR_FORMAT " " PTR_FORMAT, value, p2i(start), p2i(end)); + *start++ = g1_card_already_scanned; + } +} + +#endif /* SHARE_GC_G1_G1CARDTABLE_INLINE_HPP */
--- a/src/hotspot/share/gc/g1/g1CollectedHeap.cpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/share/gc/g1/g1CollectedHeap.cpp Mon Jul 01 14:57:02 2019 -0700 @@ -1677,7 +1677,6 @@ _card_table = ct; G1BarrierSet::satb_mark_queue_set().initialize(this, - SATB_Q_CBL_mon, &bs->satb_mark_queue_buffer_allocator(), G1SATBProcessCompletedThreshold, G1SATBBufferEnqueueingThresholdPercent); @@ -1955,7 +1954,7 @@ n_completed_buffers++; } assert(dcqs.completed_buffers_num() == 0, "Completed buffers exist!"); - phase_times()->record_thread_work_item(G1GCPhaseTimes::UpdateRS, worker_i, n_completed_buffers, G1GCPhaseTimes::UpdateRSProcessedBuffers); + phase_times()->record_thread_work_item(G1GCPhaseTimes::MergeLB, worker_i, n_completed_buffers, G1GCPhaseTimes::MergeLBProcessedBuffers); } // Computes the sum of the storage used by the various regions. @@ -2239,8 +2238,8 @@ _collection_set.iterate(cl); } -void G1CollectedHeap::collection_set_iterate_increment_from(HeapRegionClosure *cl, uint worker_id) { - _collection_set.iterate_incremental_part_from(cl, worker_id, workers()->active_workers()); +void G1CollectedHeap::collection_set_iterate_increment_from(HeapRegionClosure *cl, HeapRegionClaimer* hr_claimer, uint worker_id) { + _collection_set.iterate_incremental_part_from(cl, hr_claimer, worker_id, workers()->active_workers()); } HeapWord* G1CollectedHeap::block_start(const void* addr) const { @@ -2631,8 +2630,6 @@ size_t _total_humongous; size_t _candidate_humongous; - G1DirtyCardQueue _dcq; - bool humongous_region_is_candidate(G1CollectedHeap* g1h, HeapRegion* region) const { assert(region->is_starts_humongous(), "Must start a humongous object"); @@ -2692,8 +2689,7 @@ public: RegisterRegionsWithRegionAttrTableClosure() : _total_humongous(0), - _candidate_humongous(0), - _dcq(&G1BarrierSet::dirty_card_queue_set()) { + _candidate_humongous(0) { } virtual bool do_heap_region(HeapRegion* r) { @@ -2708,49 +2704,9 @@ uint rindex = r->hrm_index(); g1h->set_humongous_reclaim_candidate(rindex, is_candidate); if (is_candidate) { + g1h->register_humongous_region_with_region_attr(rindex); _candidate_humongous++; - g1h->register_humongous_region_with_region_attr(rindex); - // Is_candidate already filters out humongous object with large remembered sets. - // If we have a humongous object with a few remembered sets, we simply flush these - // remembered set entries into the DCQS. That will result in automatic - // re-evaluation of their remembered set entries during the following evacuation - // phase. - if (!r->rem_set()->is_empty()) { - guarantee(r->rem_set()->occupancy_less_or_equal_than(G1RSetSparseRegionEntries), - "Found a not-small remembered set here. This is inconsistent with previous assumptions."); - G1CardTable* ct = g1h->card_table(); - HeapRegionRemSetIterator hrrs(r->rem_set()); - size_t card_index; - while (hrrs.has_next(card_index)) { - CardTable::CardValue* card_ptr = ct->byte_for_index(card_index); - // The remembered set might contain references to already freed - // regions. Filter out such entries to avoid failing card table - // verification. - if (g1h->is_in(ct->addr_for(card_ptr))) { - if (*card_ptr != G1CardTable::dirty_card_val()) { - *card_ptr = G1CardTable::dirty_card_val(); - _dcq.enqueue(card_ptr); - } - } - } - assert(hrrs.n_yielded() == r->rem_set()->occupied(), - "Remembered set hash maps out of sync, cur: " SIZE_FORMAT " entries, next: " SIZE_FORMAT " entries", - hrrs.n_yielded(), r->rem_set()->occupied()); - // We should only clear the card based remembered set here as we will not - // implicitly rebuild anything else during eager reclaim. Note that at the moment - // (and probably never) we do not enter this path if there are other kind of - // remembered sets for this region. - r->rem_set()->clear_locked(true /* only_cardset */); - // Clear_locked() above sets the state to Empty. However we want to continue - // collecting remembered set entries for humongous regions that were not - // reclaimed. - r->rem_set()->set_state_complete(); -#ifdef ASSERT - G1HeapRegionAttr region_attr = g1h->region_attr(oop(r->bottom())); - assert(region_attr.needs_remset_update(), "must be"); -#endif - } - assert(r->rem_set()->is_empty(), "At this point any humongous candidate remembered set must be empty."); + // We will later handle the remembered sets of these regions. } else { g1h->register_region_with_region_attr(r); } @@ -2761,8 +2717,6 @@ size_t total_humongous() const { return _total_humongous; } size_t candidate_humongous() const { return _candidate_humongous; } - - void flush_rem_set_entries() { _dcq.flush(); } }; void G1CollectedHeap::register_regions_with_region_attr() { @@ -2775,9 +2729,6 @@ cl.total_humongous(), cl.candidate_humongous()); _has_humongous_reclaim_candidates = cl.candidate_humongous() > 0; - - // Finally flush all remembered set entries to re-check into the global DCQS. - cl.flush_rem_set_entries(); } #ifndef PRODUCT @@ -3072,7 +3023,7 @@ workers()->active_workers(), collection_set()->young_region_length(), collection_set()->optional_region_length()); - pre_evacuate_collection_set(evacuation_info); + pre_evacuate_collection_set(evacuation_info, &per_thread_states); // Actually do the work... evacuate_initial_collection_set(&per_thread_states); @@ -3105,9 +3056,7 @@ double sample_end_time_sec = os::elapsedTime(); double pause_time_ms = (sample_end_time_sec - sample_start_time_sec) * MILLIUNITS; - size_t total_cards_scanned = phase_times()->sum_thread_work_items(G1GCPhaseTimes::ScanRS, G1GCPhaseTimes::ScanRSScannedCards) + - phase_times()->sum_thread_work_items(G1GCPhaseTimes::OptScanRS, G1GCPhaseTimes::ScanRSScannedCards); - policy()->record_collection_pause_end(pause_time_ms, total_cards_scanned, heap_used_bytes_before_gc); + policy()->record_collection_pause_end(pause_time_ms, heap_used_bytes_before_gc); } verify_after_young_collection(verify_type); @@ -3581,7 +3530,7 @@ phase_times()->record_merge_pss_time_ms((os::elapsedTime() - merge_pss_time_start) * 1000.0); } -void G1CollectedHeap::pre_evacuate_collection_set(G1EvacuationInfo& evacuation_info) { +void G1CollectedHeap::pre_evacuate_collection_set(G1EvacuationInfo& evacuation_info, G1ParScanThreadStateSet* per_thread_states) { _expand_heap_after_alloc_failure = true; _evacuation_failed = false; @@ -3592,10 +3541,15 @@ // Initialize the GC alloc regions. _allocator->init_gc_alloc_regions(evacuation_info); + { + Ticks start = Ticks::now(); + rem_set()->prepare_for_scan_heap_roots(); + phase_times()->record_prepare_heap_roots_time_ms((Ticks::now() - start).seconds() * 1000.0); + } + register_regions_with_region_attr(); assert(_verifier->check_region_attr_table(), "Inconsistency in the region attributes table."); - rem_set()->prepare_for_scan_rem_set(); _preserved_marks_set.assert_empty(); #if COMPILER2_OR_JVMCI @@ -3697,8 +3651,8 @@ void scan_roots(G1ParScanThreadState* pss, uint worker_id) { _root_processor->evacuate_roots(pss, worker_id); - _g1h->rem_set()->update_rem_set(pss, worker_id); - _g1h->rem_set()->scan_rem_set(pss, worker_id, G1GCPhaseTimes::ScanRS, G1GCPhaseTimes::ObjCopy, G1GCPhaseTimes::CodeRoots); + _g1h->rem_set()->scan_heap_roots(pss, worker_id, G1GCPhaseTimes::ScanHR, G1GCPhaseTimes::ObjCopy); + _g1h->rem_set()->scan_collection_set_regions(pss, worker_id, G1GCPhaseTimes::ScanHR, G1GCPhaseTimes::CodeRoots, G1GCPhaseTimes::ObjCopy); } void evacuate_live_objects(G1ParScanThreadState* pss, uint worker_id) { @@ -3725,6 +3679,14 @@ }; void G1CollectedHeap::evacuate_initial_collection_set(G1ParScanThreadStateSet* per_thread_states) { + G1GCPhaseTimes* p = phase_times(); + + { + Ticks start = Ticks::now(); + rem_set()->merge_heap_roots(false /* remset_only */, G1GCPhaseTimes::MergeRS); + p->record_merge_heap_roots_time((Ticks::now() - start).seconds() * 1000.0); + } + Tickspan task_time; const uint num_workers = workers()->active_workers(); @@ -3739,7 +3701,6 @@ } Tickspan total_processing = Ticks::now() - start_processing; - G1GCPhaseTimes* p = phase_times(); p->record_initial_evac_time(task_time.seconds() * 1000.0); p->record_or_add_code_root_fixup_time((total_processing - task_time).seconds() * 1000.0); } @@ -3747,7 +3708,8 @@ class G1EvacuateOptionalRegionsTask : public G1EvacuateRegionsBaseTask { void scan_roots(G1ParScanThreadState* pss, uint worker_id) { - _g1h->rem_set()->scan_rem_set(pss, worker_id, G1GCPhaseTimes::OptScanRS, G1GCPhaseTimes::OptObjCopy, G1GCPhaseTimes::OptCodeRoots); + _g1h->rem_set()->scan_heap_roots(pss, worker_id, G1GCPhaseTimes::OptScanHR, G1GCPhaseTimes::OptObjCopy); + _g1h->rem_set()->scan_collection_set_regions(pss, worker_id, G1GCPhaseTimes::OptScanHR, G1GCPhaseTimes::OptCodeRoots, G1GCPhaseTimes::OptObjCopy); } void evacuate_live_objects(G1ParScanThreadState* pss, uint worker_id) { @@ -3783,8 +3745,6 @@ void G1CollectedHeap::evacuate_optional_collection_set(G1ParScanThreadStateSet* per_thread_states) { const double gc_start_time_ms = phase_times()->cur_collection_start_sec() * 1000.0; - Ticks start = Ticks::now(); - while (!evacuation_failed() && _collection_set.optional_region_length() > 0) { double time_used_ms = os::elapsedTime() * 1000.0 - gc_start_time_ms; @@ -3797,18 +3757,24 @@ break; } - evacuate_next_optional_regions(per_thread_states); + { + Ticks start = Ticks::now(); + rem_set()->merge_heap_roots(true /* remset_only */, G1GCPhaseTimes::OptMergeRS); + phase_times()->record_or_add_optional_merge_heap_roots_time((Ticks::now() - start).seconds() * 1000.0); + } + + { + Ticks start = Ticks::now(); + evacuate_next_optional_regions(per_thread_states); + phase_times()->record_or_add_optional_evac_time((Ticks::now() - start).seconds() * 1000.0); + } } _collection_set.abandon_optional_collection_set(per_thread_states); - - phase_times()->record_or_add_optional_evac_time((Ticks::now() - start).seconds() * 1000.0); } void G1CollectedHeap::post_evacuate_collection_set(G1EvacuationInfo& evacuation_info, G1ParScanThreadStateSet* per_thread_states) { - // Also cleans the card table from temporary duplicate detection information used - // during UpdateRS/ScanRS. - rem_set()->cleanup_after_scan_rem_set(); + rem_set()->cleanup_after_scan_heap_roots(); // Process any discovered reference objects - we have // to do this _before_ we retire the GC alloc regions
--- a/src/hotspot/share/gc/g1/g1CollectedHeap.hpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/share/gc/g1/g1CollectedHeap.hpp Mon Jul 01 14:57:02 2019 -0700 @@ -78,7 +78,6 @@ class G1HotCardCache; class G1RemSet; class G1YoungRemSetSamplingThread; -class HeapRegionRemSetIterator; class G1ConcurrentMark; class G1ConcurrentMarkThread; class G1ConcurrentRefine; @@ -757,7 +756,7 @@ void evacuate_next_optional_regions(G1ParScanThreadStateSet* per_thread_states); public: - void pre_evacuate_collection_set(G1EvacuationInfo& evacuation_info); + void pre_evacuate_collection_set(G1EvacuationInfo& evacuation_info, G1ParScanThreadStateSet* pss); void post_evacuate_collection_set(G1EvacuationInfo& evacuation_info, G1ParScanThreadStateSet* pss); void expand_heap_after_young_collection(); @@ -1115,7 +1114,8 @@ public: - inline G1HeapRegionAttr region_attr(const void* obj); + inline G1HeapRegionAttr region_attr(const void* obj) const; + inline G1HeapRegionAttr region_attr(uint idx) const; // Return "TRUE" iff the given object address is in the reserved // region of g1. @@ -1182,7 +1182,12 @@ // Starts the iteration so that the start regions of a given worker id over the // set active_workers are evenly spread across the set of collection set regions // to be iterated. - void collection_set_iterate_increment_from(HeapRegionClosure *blk, uint worker_id); + // The variant with the HeapRegionClaimer guarantees that the closure will be + // applied to a particular region exactly once. + void collection_set_iterate_increment_from(HeapRegionClosure *blk, uint worker_id) { + collection_set_iterate_increment_from(blk, NULL, worker_id); + } + void collection_set_iterate_increment_from(HeapRegionClosure *blk, HeapRegionClaimer* hr_claimer, uint worker_id); // Returns the HeapRegion that contains addr. addr must not be NULL. template <class T>
--- a/src/hotspot/share/gc/g1/g1CollectedHeap.inline.hpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/share/gc/g1/g1CollectedHeap.inline.hpp Mon Jul 01 14:57:02 2019 -0700 @@ -163,10 +163,14 @@ return _region_attr.is_in_cset_or_humongous((HeapWord*)obj); } -G1HeapRegionAttr G1CollectedHeap::region_attr(const void* addr) { +G1HeapRegionAttr G1CollectedHeap::region_attr(const void* addr) const { return _region_attr.at((HeapWord*)addr); } +G1HeapRegionAttr G1CollectedHeap::region_attr(uint idx) const { + return _region_attr.get_by_index(idx); +} + void G1CollectedHeap::register_humongous_region_with_region_attr(uint index) { _region_attr.set_humongous(index, region_at(index)->rem_set()->is_tracked()); } @@ -177,7 +181,7 @@ void G1CollectedHeap::register_old_region_with_region_attr(HeapRegion* r) { _region_attr.set_in_old(r->hrm_index(), r->rem_set()->is_tracked()); - _rem_set->prepare_for_scan_rem_set(r->hrm_index()); + _rem_set->prepare_for_scan_heap_roots(r->hrm_index()); } void G1CollectedHeap::register_optional_region_with_region_attr(HeapRegion* r) {
--- a/src/hotspot/share/gc/g1/g1CollectionSet.cpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/share/gc/g1/g1CollectionSet.cpp Mon Jul 01 14:57:02 2019 -0700 @@ -217,10 +217,13 @@ } } -void G1CollectionSet::iterate_incremental_part_from(HeapRegionClosure* cl, uint worker_id, uint total_workers) const { +void G1CollectionSet::iterate_incremental_part_from(HeapRegionClosure* cl, + HeapRegionClaimer* hr_claimer, + uint worker_id, + uint total_workers) const { assert_at_safepoint(); - size_t len = _collection_set_cur_length - _inc_part_start; + size_t len = increment_length(); if (len == 0) { return; } @@ -229,9 +232,12 @@ size_t cur_pos = start_pos; do { - HeapRegion* r = _g1h->region_at(_collection_set_regions[cur_pos + _inc_part_start]); - bool result = cl->do_heap_region(r); - guarantee(!result, "Must not cancel iteration"); + uint region_idx = _collection_set_regions[cur_pos + _inc_part_start]; + if (hr_claimer == NULL || hr_claimer->claim_region(region_idx)) { + HeapRegion* r = _g1h->region_at(region_idx); + bool result = cl->do_heap_region(r); + guarantee(!result, "Must not cancel iteration"); + } cur_pos++; if (cur_pos == len) {
--- a/src/hotspot/share/gc/g1/g1CollectionSet.hpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/share/gc/g1/g1CollectionSet.hpp Mon Jul 01 14:57:02 2019 -0700 @@ -36,6 +36,7 @@ class G1Policy; class G1SurvivorRegions; class HeapRegion; +class HeapRegionClaimer; class HeapRegionClosure; // The collection set. @@ -279,7 +280,12 @@ // Iterate over the current collection set increment applying the given HeapRegionClosure // from a starting position determined by the given worker id. - void iterate_incremental_part_from(HeapRegionClosure* cl, uint worker_id, uint total_workers) const; + void iterate_incremental_part_from(HeapRegionClosure* cl, HeapRegionClaimer* hr_claimer, uint worker_id, uint total_workers) const; + + // Returns the length of the current increment in number of regions. + size_t increment_length() const { return _collection_set_cur_length - _inc_part_start; } + // Returns the length of the whole current collection set in number of regions + size_t cur_length() const { return _collection_set_cur_length; } // Iterate over the entire collection set (all increments calculated so far), applying // the given HeapRegionClosure on all of them.
--- a/src/hotspot/share/gc/g1/g1ConcurrentMark.cpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/share/gc/g1/g1ConcurrentMark.cpp Mon Jul 01 14:57:02 2019 -0700 @@ -2419,12 +2419,13 @@ abort_marking_if_regular_check_fail(); } + // Can't assert qset is empty here, even if not aborted. If concurrent, + // some other thread might be adding to the queue. If not concurrent, + // some other thread might have won the race for the last buffer, but + // has not yet decremented the count. + _draining_satb_buffers = false; - assert(has_aborted() || - _cm->concurrent() || - satb_mq_set.completed_buffers_num() == 0, "invariant"); - // again, this was a potentially expensive operation, decrease the // limits to get the regular clock call early decrease_limits();
--- a/src/hotspot/share/gc/g1/g1ConcurrentRefine.cpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/share/gc/g1/g1ConcurrentRefine.cpp Mon Jul 01 14:57:02 2019 -0700 @@ -206,7 +206,7 @@ // available buffers near green_zone value. When yellow_size is // large we don't want to allow a full step to accumulate before // doing any processing, as that might lead to significantly more - // than green_zone buffers to be processed by update_rs. + // than green_zone buffers to be processed during scanning. step = MIN2(step, ParallelGCThreads / 2.0); } size_t activate_offset = static_cast<size_t>(ceil(step * (worker_i + 1))); @@ -322,18 +322,18 @@ } static size_t calc_new_green_zone(size_t green, - double update_rs_time, - size_t update_rs_processed_buffers, + double log_buffer_scan_time, + size_t processed_log_buffers, double goal_ms) { // Adjust green zone based on whether we're meeting the time goal. // Limit to max_green_zone. const double inc_k = 1.1, dec_k = 0.9; - if (update_rs_time > goal_ms) { + if (log_buffer_scan_time > goal_ms) { if (green > 0) { green = static_cast<size_t>(green * dec_k); } - } else if (update_rs_time < goal_ms && - update_rs_processed_buffers > green) { + } else if (log_buffer_scan_time < goal_ms && + processed_log_buffers > green) { green = static_cast<size_t>(MAX2(green * inc_k, green + 1.0)); green = MIN2(green, max_green_zone); } @@ -350,20 +350,20 @@ return MIN2(yellow + (yellow - green), max_red_zone); } -void G1ConcurrentRefine::update_zones(double update_rs_time, - size_t update_rs_processed_buffers, +void G1ConcurrentRefine::update_zones(double log_buffer_scan_time, + size_t processed_log_buffers, double goal_ms) { log_trace( CTRL_TAGS )("Updating Refinement Zones: " - "update_rs time: %.3fms, " - "update_rs buffers: " SIZE_FORMAT ", " - "update_rs goal time: %.3fms", - update_rs_time, - update_rs_processed_buffers, + "log buffer scan time: %.3fms, " + "processed buffers: " SIZE_FORMAT ", " + "goal time: %.3fms", + log_buffer_scan_time, + processed_log_buffers, goal_ms); _green_zone = calc_new_green_zone(_green_zone, - update_rs_time, - update_rs_processed_buffers, + log_buffer_scan_time, + processed_log_buffers, goal_ms); _yellow_zone = calc_new_yellow_zone(_green_zone, _min_yellow_zone_size); _red_zone = calc_new_red_zone(_green_zone, _yellow_zone); @@ -376,13 +376,13 @@ _green_zone, _yellow_zone, _red_zone); } -void G1ConcurrentRefine::adjust(double update_rs_time, - size_t update_rs_processed_buffers, +void G1ConcurrentRefine::adjust(double log_buffer_scan_time, + size_t processed_log_buffers, double goal_ms) { G1DirtyCardQueueSet& dcqs = G1BarrierSet::dirty_card_queue_set(); if (G1UseAdaptiveConcRefinement) { - update_zones(update_rs_time, update_rs_processed_buffers, goal_ms); + update_zones(log_buffer_scan_time, processed_log_buffers, goal_ms); // Change the barrier params if (max_num_threads() == 0) {
--- a/src/hotspot/share/gc/g1/g1ConcurrentRefine.hpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/share/gc/g1/g1ConcurrentRefine.hpp Mon Jul 01 14:57:02 2019 -0700 @@ -97,8 +97,8 @@ size_t min_yellow_zone_size); // Update green/yellow/red zone values based on how well goals are being met. - void update_zones(double update_rs_time, - size_t update_rs_processed_buffers, + void update_zones(double log_buffer_scan_time, + size_t processed_log_buffers, double goal_ms); static uint worker_id_offset(); @@ -115,7 +115,7 @@ void stop(); // Adjust refinement thresholds based on work done during the pause and the goal time. - void adjust(double update_rs_time, size_t update_rs_processed_buffers, double goal_ms); + void adjust(double log_buffer_scan_time, size_t processed_log_buffers, double goal_ms); size_t activation_threshold(uint worker_id) const; size_t deactivation_threshold(uint worker_id) const;
--- a/src/hotspot/share/gc/g1/g1DirtyCardQueue.cpp Wed Jun 26 15:34:13 2019 -0700 +++ b/src/hotspot/share/gc/g1/g1DirtyCardQueue.cpp Mon Jul 01 14:57:02 2019 -0700 @@ -78,7 +78,14 @@ } G1DirtyCardQueueSet::G1DirtyCardQueueSet(bool notify_when_complete) : - PtrQueueSet(notify_when_complete), + PtrQueueSet(), + _cbl_mon(NULL), + _completed_buffers_head(NULL), + _completed_buffers_tail(NULL), + _n_completed_buffers(0), + _process_completed_buffers_threshold(ProcessCompletedBuffersThresholdNever), + _process_completed_buffers(false), + _notify_when_complete(notify_when_complete), _max_completed_buffers(MaxCompletedBuffersUnlimited), _completed_buffers_padding(0), _free_ids(NULL), @@ -90,6 +97,7 @@ } G1DirtyCardQueueSet::~G1DirtyCardQueueSet() { + abandon_completed_buffers(); delete _free_ids; } @@ -101,7 +109,9 @@ void G1DirtyCardQueueSet::initialize(Monitor* cbl_mon, BufferNode::Allocator* allocator, bool init_free_ids) { - PtrQueueSet::initialize(cbl_mon, allocator); + PtrQueueSet::initialize(allocator); + assert(_cbl_mon == NULL, "Init order issue?"); + _cbl_mon = cbl_mon; if (init_free_ids) { _free_ids = new G1FreeIdSet(0, num_par_ids()); } @@ -111,6 +121,123 @@ G1ThreadLocalData::dirty_card_queue(t).handle_zero_index(); } +void G1DirtyCardQueueSet::enqueue_completed_buffer(BufferNode* cbn) { + MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag); + cbn->set_next(NULL); + if (_completed_buffers_tail == NULL) { + assert(_completed_buffers_head == NULL, "Well-formedness"); + _completed_buffers_head = cbn; + _completed_buffers_tail = cbn; + } else { + _completed_buffers_tail->set_next(cbn); + _completed_buffers_tail = cbn; + } + _n_completed_buffers++; + + if (!process_completed_buffers() && + (_n_completed_buffers > process_completed_buffers_threshold())) { + set_process_completed_buffers(true); + if (_notify_when_complete) { + _cbl_mon->notify_all(); + } + } + assert_completed_buffers_list_len_correct_locked(); +} + +BufferNode* G1DirtyCardQueueSet::get_completed_buffer(size_t stop_at) { + MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag); + + if (_n_completed_buffers <= stop_at) { + return NULL; + } + + assert(_n_completed_buffers > 0, "invariant"); + assert(_completed_buffers_head != NULL, "invariant"); + assert(_completed_buffers_tail != NULL, "invariant"); + + BufferNode* bn = _completed_buffers_head; + _n_completed_buffers--; + _completed_buffers_head = bn->next(); + if (_completed_buffers_head == NULL) { + assert(_n_completed_buffers == 0, "invariant"); + _completed_buffers_tail = NULL; + set_process_completed_buffers(false); + } + assert_completed_buffers_list_len_correct_locked(); + bn->set_next(NULL); + return bn; +} + +void G1DirtyCardQueueSet::abandon_completed_buffers() { + BufferNode* buffers_to_delete = NULL; + { + MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag); + buffers_to_delete = _completed_buffers_head; + _completed_buffers_head = NULL; + _completed_buffers_tail = NULL; + _n_completed_buffers = 0; + set_process_completed_buffers(false); + } + while (buffers_to_delete != NULL) { + BufferNode* bn = buffers_to_delete; + buffers_to_delete = bn->next(); + bn->set_next(NULL); + deallocate_buffer(bn); + } +} + +void G1DirtyCardQueueSet::notify_if_necessary() { + MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag); + if (_n_completed_buffers > process_completed_buffers_threshold()) { + set_process_completed_buffers(true); + if (_notify_when_complete) + _cbl_mon->notify(); + } +} + +#ifdef ASSERT +void G1DirtyCardQueueSet::assert_completed_buffers_list_len_correct_locked() { + assert_lock_strong(_cbl_mon); + size_t n = 0; + for (BufferNode* bn = _completed_buffers_head; bn != NULL; bn = bn->next()) { + ++n; + } + assert(n == _n_completed_buffers, + "Completed buffer length is wrong: counted: " SIZE_FORMAT + ", expected: " SIZE_FORMAT, n, _n_completed_buffers); +} +#endif // ASSERT + +// Merge lists of buffers. Notify the processing threads. +// The source queue is emptied as a result. The queues +// must share the monitor. +void G1DirtyCardQueueSet::merge_bufferlists(G1DirtyCardQueueSet *src) { + assert(_cbl_mon == src->_cbl_mon, "Should share the same lock"); + MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag); + if (_completed_buffers_tail == NULL) { + assert(_completed_buffers_head == NULL, "Well-formedness"); + _completed_buffers_head = src->_completed_buffers_head; + _completed_buffers_tail = src->_completed_buffers_tail; + } else { + assert(_completed_buffers_head != NULL, "Well form