OpenJDK / jdk / jdk
changeset 1411:e97ccaa63428
Merge
author | trims |
---|---|
date | Wed, 01 Oct 2008 16:57:40 -0700 |
parents | 10d63e93ac15 a30f4c5a5023 |
children | 2bb3fe3e00ea 22bac34084fb |
files | |
diffstat | 260 files changed, 38783 insertions(+), 3672 deletions(-) [+] |
line wrap: on
line diff
--- a/hotspot/make/linux/makefiles/top.make Wed Oct 01 16:57:19 2008 -0700 +++ b/hotspot/make/linux/makefiles/top.make Wed Oct 01 16:57:40 2008 -0700 @@ -64,6 +64,7 @@ $(VM)/gc_implementation/includeDB_gc_parallelScavenge \ $(VM)/gc_implementation/includeDB_gc_concurrentMarkSweep \ $(VM)/gc_implementation/includeDB_gc_parNew \ + $(VM)/gc_implementation/includeDB_gc_g1 \ $(VM)/gc_implementation/includeDB_gc_serial \ $(VM)/gc_implementation/includeDB_gc_shared
--- a/hotspot/make/solaris/makefiles/top.make Wed Oct 01 16:57:19 2008 -0700 +++ b/hotspot/make/solaris/makefiles/top.make Wed Oct 01 16:57:40 2008 -0700 @@ -54,6 +54,7 @@ $(VM)/gc_implementation/includeDB_gc_parallelScavenge \ $(VM)/gc_implementation/includeDB_gc_concurrentMarkSweep \ $(VM)/gc_implementation/includeDB_gc_parNew \ + $(VM)/gc_implementation/includeDB_gc_g1 \ $(VM)/gc_implementation/includeDB_gc_serial \ $(VM)/gc_implementation/includeDB_gc_shared
--- a/hotspot/make/windows/makefiles/generated.make Wed Oct 01 16:57:19 2008 -0700 +++ b/hotspot/make/windows/makefiles/generated.make Wed Oct 01 16:57:40 2008 -0700 @@ -50,7 +50,8 @@ $(WorkSpace)/src/share/vm/gc_implementation/includeDB_gc_parallelScavenge \ $(WorkSpace)/src/share/vm/gc_implementation/includeDB_gc_shared \ $(WorkSpace)/src/share/vm/gc_implementation/includeDB_gc_parNew \ - $(WorkSpace)/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep + $(WorkSpace)/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep \ + $(WorkSpace)/src/share/vm/gc_implementation/includeDB_gc_g1 IncludeDBs_core=$(IncludeDBs_base) $(IncludeDBs_gc) \ $(WorkSpace)/src/share/vm/includeDB_features
--- a/hotspot/make/windows/makefiles/makedeps.make Wed Oct 01 16:57:19 2008 -0700 +++ b/hotspot/make/windows/makefiles/makedeps.make Wed Oct 01 16:57:40 2008 -0700 @@ -64,6 +64,7 @@ -relativeInclude src\share\vm\gc_implementation\shared \ -relativeInclude src\share\vm\gc_implementation\parNew \ -relativeInclude src\share\vm\gc_implementation\concurrentMarkSweep \ + -relativeInclude src\share\vm\gc_implementation\g1 \ -relativeInclude src\share\vm\gc_interface \ -relativeInclude src\share\vm\asm \ -relativeInclude src\share\vm\memory \ @@ -115,6 +116,7 @@ -additionalFile includeDB_gc_parallel \ -additionalFile includeDB_gc_parallelScavenge \ -additionalFile includeDB_gc_concurrentMarkSweep \ + -additionalFile includeDB_gc_g1 \ -additionalFile includeDB_gc_parNew \ -additionalFile includeDB_gc_shared \ -additionalFile includeDB_gc_serial \
--- a/hotspot/make/windows/makefiles/vm.make Wed Oct 01 16:57:19 2008 -0700 +++ b/hotspot/make/windows/makefiles/vm.make Wed Oct 01 16:57:40 2008 -0700 @@ -117,6 +117,7 @@ /I "$(WorkSpace)\src\share\vm\gc_implementation\shared"\ /I "$(WorkSpace)\src\share\vm\gc_implementation\parNew"\ /I "$(WorkSpace)\src\share\vm\gc_implementation\concurrentMarkSweep"\ + /I "$(WorkSpace)\src\share\vm\gc_implementation\g1"\ /I "$(WorkSpace)\src\share\vm\gc_interface"\ /I "$(WorkSpace)\src\share\vm\asm" \ /I "$(WorkSpace)\src\share\vm\memory" \ @@ -146,6 +147,7 @@ VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/gc_implementation/shared VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/gc_implementation/parNew VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/gc_implementation/concurrentMarkSweep +VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/gc_implementation/g1 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/gc_interface VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/asm VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/memory @@ -222,6 +224,9 @@ {$(WorkSpace)\src\share\vm\gc_implementation\concurrentMarkSweep}.cpp.obj:: $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $< +{$(WorkSpace)\src\share\vm\gc_implementation\g1}.cpp.obj:: + $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $< + {$(WorkSpace)\src\share\vm\gc_interface}.cpp.obj:: $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
--- a/hotspot/src/cpu/sparc/vm/assembler_sparc.cpp Wed Oct 01 16:57:19 2008 -0700 +++ b/hotspot/src/cpu/sparc/vm/assembler_sparc.cpp Wed Oct 01 16:57:40 2008 -0700 @@ -130,6 +130,20 @@ return 0x00; // illegal instruction 0x00000000 } +Assembler::Condition Assembler::reg_cond_to_cc_cond(Assembler::RCondition in) { + switch (in) { + case rc_z: return equal; + case rc_lez: return lessEqual; + case rc_lz: return less; + case rc_nz: return notEqual; + case rc_gz: return greater; + case rc_gez: return greaterEqual; + default: + ShouldNotReachHere(); + } + return equal; +} + // Generate a bunch 'o stuff (including v9's #ifndef PRODUCT void Assembler::test_v9() { @@ -1213,31 +1227,19 @@ } -void MacroAssembler::store_check(Register tmp, Register obj) { - // Use two shifts to clear out those low order two bits! (Cannot opt. into 1.) - - /* $$$ This stuff needs to go into one of the BarrierSet generator - functions. (The particular barrier sets will have to be friends of - MacroAssembler, I guess.) */ - BarrierSet* bs = Universe::heap()->barrier_set(); - assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); - CardTableModRefBS* ct = (CardTableModRefBS*)bs; - assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); +void MacroAssembler::card_table_write(jbyte* byte_map_base, + Register tmp, Register obj) { #ifdef _LP64 srlx(obj, CardTableModRefBS::card_shift, obj); #else srl(obj, CardTableModRefBS::card_shift, obj); #endif assert( tmp != obj, "need separate temp reg"); - Address rs(tmp, (address)ct->byte_map_base); + Address rs(tmp, (address)byte_map_base); load_address(rs); stb(G0, rs.base(), obj); } -void MacroAssembler::store_check(Register tmp, Register obj, Register offset) { - store_check(tmp, obj); -} - // %%% Note: The following six instructions have been moved, // unchanged, from assembler_sparc.inline.hpp. // They will be refactored at a later date. @@ -1663,11 +1665,21 @@ if (reg == G0) return; // always NULL, which is always an oop - char buffer[16]; + char buffer[64]; +#ifdef COMPILER1 + if (CommentedAssembly) { + snprintf(buffer, sizeof(buffer), "verify_oop at %d", offset()); + block_comment(buffer); + } +#endif + + int len = strlen(file) + strlen(msg) + 1 + 4; sprintf(buffer, "%d", line); - int len = strlen(file) + strlen(msg) + 1 + 4 + strlen(buffer); + len += strlen(buffer); + sprintf(buffer, " at offset %d ", offset()); + len += strlen(buffer); char * real_msg = new char[len]; - sprintf(real_msg, "%s (%s:%d)", msg, file, line); + sprintf(real_msg, "%s%s(%s:%d)", msg, buffer, file, line); // Call indirectly to solve generation ordering problem Address a(O7, (address)StubRoutines::verify_oop_subroutine_entry_address()); @@ -2059,6 +2071,27 @@ #endif } +void MacroAssembler::br_on_reg_cond( RCondition rc, bool a, Predict p, + Register s1, address d, + relocInfo::relocType rt ) { + if (VM_Version::v9_instructions_work()) { + bpr(rc, a, p, s1, d, rt); + } else { + tst(s1); + br(reg_cond_to_cc_cond(rc), a, p, d, rt); + } +} + +void MacroAssembler::br_on_reg_cond( RCondition rc, bool a, Predict p, + Register s1, Label& L ) { + if (VM_Version::v9_instructions_work()) { + bpr(rc, a, p, s1, L); + } else { + tst(s1); + br(reg_cond_to_cc_cond(rc), a, p, L); + } +} + // instruction sequences factored across compiler & interpreter @@ -3241,68 +3274,74 @@ assert(0 <= con_size_in_bytes && Assembler::is_simm13(con_size_in_bytes), "illegal object size"); assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, "object size is not multiple of alignment"); - // get eden boundaries - // note: we need both top & top_addr! - const Register top_addr = t1; - const Register end = t2; - - CollectedHeap* ch = Universe::heap(); - set((intx)ch->top_addr(), top_addr); - intx delta = (intx)ch->end_addr() - (intx)ch->top_addr(); - ld_ptr(top_addr, delta, end); - ld_ptr(top_addr, 0, obj); - - // try to allocate - Label retry; - bind(retry); -#ifdef ASSERT - // make sure eden top is properly aligned - { - Label L; - btst(MinObjAlignmentInBytesMask, obj); - br(Assembler::zero, false, Assembler::pt, L); + if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { + // No allocation in the shared eden. + br(Assembler::always, false, Assembler::pt, slow_case); delayed()->nop(); - stop("eden top is not properly aligned"); - bind(L); - } + } else { + // get eden boundaries + // note: we need both top & top_addr! + const Register top_addr = t1; + const Register end = t2; + + CollectedHeap* ch = Universe::heap(); + set((intx)ch->top_addr(), top_addr); + intx delta = (intx)ch->end_addr() - (intx)ch->top_addr(); + ld_ptr(top_addr, delta, end); + ld_ptr(top_addr, 0, obj); + + // try to allocate + Label retry; + bind(retry); +#ifdef ASSERT + // make sure eden top is properly aligned + { + Label L; + btst(MinObjAlignmentInBytesMask, obj); + br(Assembler::zero, false, Assembler::pt, L); + delayed()->nop(); + stop("eden top is not properly aligned"); + bind(L); + } #endif // ASSERT - const Register free = end; - sub(end, obj, free); // compute amount of free space - if (var_size_in_bytes->is_valid()) { - // size is unknown at compile time - cmp(free, var_size_in_bytes); - br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case - delayed()->add(obj, var_size_in_bytes, end); - } else { - // size is known at compile time - cmp(free, con_size_in_bytes); - br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case - delayed()->add(obj, con_size_in_bytes, end); - } - // Compare obj with the value at top_addr; if still equal, swap the value of - // end with the value at top_addr. If not equal, read the value at top_addr - // into end. - casx_under_lock(top_addr, obj, end, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr()); - // if someone beat us on the allocation, try again, otherwise continue - cmp(obj, end); - brx(Assembler::notEqual, false, Assembler::pn, retry); - delayed()->mov(end, obj); // nop if successfull since obj == end + const Register free = end; + sub(end, obj, free); // compute amount of free space + if (var_size_in_bytes->is_valid()) { + // size is unknown at compile time + cmp(free, var_size_in_bytes); + br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case + delayed()->add(obj, var_size_in_bytes, end); + } else { + // size is known at compile time + cmp(free, con_size_in_bytes); + br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case + delayed()->add(obj, con_size_in_bytes, end); + } + // Compare obj with the value at top_addr; if still equal, swap the value of + // end with the value at top_addr. If not equal, read the value at top_addr + // into end. + casx_under_lock(top_addr, obj, end, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr()); + // if someone beat us on the allocation, try again, otherwise continue + cmp(obj, end); + brx(Assembler::notEqual, false, Assembler::pn, retry); + delayed()->mov(end, obj); // nop if successfull since obj == end #ifdef ASSERT - // make sure eden top is properly aligned - { - Label L; - const Register top_addr = t1; - - set((intx)ch->top_addr(), top_addr); - ld_ptr(top_addr, 0, top_addr); - btst(MinObjAlignmentInBytesMask, top_addr); - br(Assembler::zero, false, Assembler::pt, L); - delayed()->nop(); - stop("eden top is not properly aligned"); - bind(L); + // make sure eden top is properly aligned + { + Label L; + const Register top_addr = t1; + + set((intx)ch->top_addr(), top_addr); + ld_ptr(top_addr, 0, top_addr); + btst(MinObjAlignmentInBytesMask, top_addr); + br(Assembler::zero, false, Assembler::pt, L); + delayed()->nop(); + stop("eden top is not properly aligned"); + bind(L); + } +#endif // ASSERT } -#endif // ASSERT } @@ -3554,6 +3593,468 @@ } } +/////////////////////////////////////////////////////////////////////////////////// +#ifndef SERIALGC + +static uint num_stores = 0; +static uint num_null_pre_stores = 0; + +static void count_null_pre_vals(void* pre_val) { + num_stores++; + if (pre_val == NULL) num_null_pre_stores++; + if ((num_stores % 1000000) == 0) { + tty->print_cr(UINT32_FORMAT " stores, " UINT32_FORMAT " (%5.2f%%) with null pre-vals.", + num_stores, num_null_pre_stores, + 100.0*(float)num_null_pre_stores/(float)num_stores); + } +} + +static address satb_log_enqueue_with_frame = 0; +static u_char* satb_log_enqueue_with_frame_end = 0; + +static address satb_log_enqueue_frameless = 0; +static u_char* satb_log_enqueue_frameless_end = 0; + +static int EnqueueCodeSize = 128 DEBUG_ONLY( + 256); // Instructions? + +// The calls to this don't work. We'd need to do a fair amount of work to +// make it work. +static void check_index(int ind) { + assert(0 <= ind && ind <= 64*K && ((ind % oopSize) == 0), + "Invariants.") +} + +static void generate_satb_log_enqueue(bool with_frame) { + BufferBlob* bb = BufferBlob::create("enqueue_with_frame", EnqueueCodeSize); + CodeBuffer buf(bb->instructions_begin(), bb->instructions_size()); + MacroAssembler masm(&buf); + address start = masm.pc(); + Register pre_val; + + Label refill, restart; + if (with_frame) { + masm.save_frame(0); + pre_val = I0; // Was O0 before the save. + } else { + pre_val = O0; + } + int satb_q_index_byte_offset = + in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_index()); + int satb_q_buf_byte_offset = + in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_buf()); + assert(in_bytes(PtrQueue::byte_width_of_index()) == sizeof(intptr_t) && + in_bytes(PtrQueue::byte_width_of_buf()) == sizeof(intptr_t), + "check sizes in assembly below"); + + masm.bind(restart); + masm.ld_ptr(G2_thread, satb_q_index_byte_offset, L0); + + masm.br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn, L0, refill); + // If the branch is taken, no harm in executing this in the delay slot. + masm.delayed()->ld_ptr(G2_thread, satb_q_buf_byte_offset, L1); + masm.sub(L0, oopSize, L0); + + masm.st_ptr(pre_val, L1, L0); // [_buf + index] := I0 + if (!with_frame) { + // Use return-from-leaf + masm.retl(); + masm.delayed()->st_ptr(L0, G2_thread, satb_q_index_byte_offset); + } else { + // Not delayed. + masm.st_ptr(L0, G2_thread, satb_q_index_byte_offset); + } + if (with_frame) { + masm.ret(); + masm.delayed()->restore(); + } + masm.bind(refill); + + address handle_zero = + CAST_FROM_FN_PTR(address, + &SATBMarkQueueSet::handle_zero_index_for_thread); + // This should be rare enough that we can afford to save all the + // scratch registers that the calling context might be using. + masm.mov(G1_scratch, L0); + masm.mov(G3_scratch, L1); + masm.mov(G4, L2); + // We need the value of O0 above (for the write into the buffer), so we + // save and restore it. + masm.mov(O0, L3); + // Since the call will overwrite O7, we save and restore that, as well. + masm.mov(O7, L4); + masm.call_VM_leaf(L5, handle_zero, G2_thread); + masm.mov(L0, G1_scratch); + masm.mov(L1, G3_scratch); + masm.mov(L2, G4); + masm.mov(L3, O0); + masm.br(Assembler::always, /*annul*/false, Assembler::pt, restart); + masm.delayed()->mov(L4, O7); + + if (with_frame) { + satb_log_enqueue_with_frame = start; + satb_log_enqueue_with_frame_end = masm.pc(); + } else { + satb_log_enqueue_frameless = start; + satb_log_enqueue_frameless_end = masm.pc(); + } +} + +static inline void generate_satb_log_enqueue_if_necessary(bool with_frame) { + if (with_frame) { + if (satb_log_enqueue_with_frame == 0) { + generate_satb_log_enqueue(with_frame); + assert(satb_log_enqueue_with_frame != 0, "postcondition."); + if (G1SATBPrintStubs) { + tty->print_cr("Generated with-frame satb enqueue:"); + Disassembler::decode((u_char*)satb_log_enqueue_with_frame, + satb_log_enqueue_with_frame_end, + tty); + } + } + } else { + if (satb_log_enqueue_frameless == 0) { + generate_satb_log_enqueue(with_frame); + assert(satb_log_enqueue_frameless != 0, "postcondition."); + if (G1SATBPrintStubs) { + tty->print_cr("Generated frameless satb enqueue:"); + Disassembler::decode((u_char*)satb_log_enqueue_frameless, + satb_log_enqueue_frameless_end, + tty); + } + } + } +} + +void MacroAssembler::g1_write_barrier_pre(Register obj, Register index, int offset, Register tmp, bool preserve_o_regs) { + assert(offset == 0 || index == noreg, "choose one"); + + if (G1DisablePreBarrier) return; + // satb_log_barrier(tmp, obj, offset, preserve_o_regs); + Label filtered; + // satb_log_barrier_work0(tmp, filtered); + if (in_bytes(PtrQueue::byte_width_of_active()) == 4) { + ld(G2, + in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_active()), + tmp); + } else { + guarantee(in_bytes(PtrQueue::byte_width_of_active()) == 1, + "Assumption"); + ldsb(G2, + in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_active()), + tmp); + } + // Check on whether to annul. + br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, tmp, filtered); + delayed() -> nop(); + + // satb_log_barrier_work1(tmp, offset); + if (index == noreg) { + if (Assembler::is_simm13(offset)) { + ld_ptr(obj, offset, tmp); + } else { + set(offset, tmp); + ld_ptr(obj, tmp, tmp); + } + } else { + ld_ptr(obj, index, tmp); + } + + // satb_log_barrier_work2(obj, tmp, offset); + + // satb_log_barrier_work3(tmp, filtered, preserve_o_regs); + + const Register pre_val = tmp; + + if (G1SATBBarrierPrintNullPreVals) { + save_frame(0); + mov(pre_val, O0); + // Save G-regs that target may use. + mov(G1, L1); + mov(G2, L2); + mov(G3, L3); + mov(G4, L4); + mov(G5, L5); + call(CAST_FROM_FN_PTR(address, &count_null_pre_vals)); + delayed()->nop(); + // Restore G-regs that target may have used. + mov(L1, G1); + mov(L2, G2); + mov(L3, G3); + mov(L4, G4); + mov(L5, G5); + restore(G0, G0, G0); + } + + // Check on whether to annul. + br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, pre_val, filtered); + delayed() -> nop(); + + // OK, it's not filtered, so we'll need to call enqueue. In the normal + // case, pre_val will be a scratch G-reg, but there's some cases in which + // it's an O-reg. In the first case, do a normal call. In the latter, + // do a save here and call the frameless version. + + guarantee(pre_val->is_global() || pre_val->is_out(), + "Or we need to think harder."); + if (pre_val->is_global() && !preserve_o_regs) { + generate_satb_log_enqueue_if_necessary(true); // with frame. + call(satb_log_enqueue_with_frame); + delayed()->mov(pre_val, O0); + } else { + generate_satb_log_enqueue_if_necessary(false); // with frameless. + save_frame(0); + call(satb_log_enqueue_frameless); + delayed()->mov(pre_val->after_save(), O0); + restore(); + } + + bind(filtered); +} + +static jint num_ct_writes = 0; +static jint num_ct_writes_filtered_in_hr = 0; +static jint num_ct_writes_filtered_null = 0; +static jint num_ct_writes_filtered_pop = 0; +static G1CollectedHeap* g1 = NULL; + +static Thread* count_ct_writes(void* filter_val, void* new_val) { + Atomic::inc(&num_ct_writes); + if (filter_val == NULL) { + Atomic::inc(&num_ct_writes_filtered_in_hr); + } else if (new_val == NULL) { + Atomic::inc(&num_ct_writes_filtered_null); + } else { + if (g1 == NULL) { + g1 = G1CollectedHeap::heap(); + } + if ((HeapWord*)new_val < g1->popular_object_boundary()) { + Atomic::inc(&num_ct_writes_filtered_pop); + } + } + if ((num_ct_writes % 1000000) == 0) { + jint num_ct_writes_filtered = + num_ct_writes_filtered_in_hr + + num_ct_writes_filtered_null + + num_ct_writes_filtered_pop; + + tty->print_cr("%d potential CT writes: %5.2f%% filtered\n" + " (%5.2f%% intra-HR, %5.2f%% null, %5.2f%% popular).", + num_ct_writes, + 100.0*(float)num_ct_writes_filtered/(float)num_ct_writes, + 100.0*(float)num_ct_writes_filtered_in_hr/ + (float)num_ct_writes, + 100.0*(float)num_ct_writes_filtered_null/ + (float)num_ct_writes, + 100.0*(float)num_ct_writes_filtered_pop/ + (float)num_ct_writes); + } + return Thread::current(); +} + +static address dirty_card_log_enqueue = 0; +static u_char* dirty_card_log_enqueue_end = 0; + +// This gets to assume that o0 contains the object address. +static void generate_dirty_card_log_enqueue(jbyte* byte_map_base) { + BufferBlob* bb = BufferBlob::create("dirty_card_enqueue", EnqueueCodeSize*2); + CodeBuffer buf(bb->instructions_begin(), bb->instructions_size()); + MacroAssembler masm(&buf); + address start = masm.pc(); + + Label not_already_dirty, restart, refill; + +#ifdef _LP64 + masm.srlx(O0, CardTableModRefBS::card_shift, O0); +#else + masm.srl(O0, CardTableModRefBS::card_shift, O0); +#endif + Address rs(O1, (address)byte_map_base); + masm.load_address(rs); // O1 := <card table base> + masm.ldub(O0, O1, O2); // O2 := [O0 + O1] + + masm.br_on_reg_cond(Assembler::rc_nz, /*annul*/false, Assembler::pt, + O2, not_already_dirty); + // Get O1 + O2 into a reg by itself -- useful in the take-the-branch + // case, harmless if not. + masm.delayed()->add(O0, O1, O3); + + // We didn't take the branch, so we're already dirty: return. + // Use return-from-leaf + masm.retl(); + masm.delayed()->nop(); + + // Not dirty. + masm.bind(not_already_dirty); + // First, dirty it. + masm.stb(G0, O3, G0); // [cardPtr] := 0 (i.e., dirty). + int dirty_card_q_index_byte_offset = + in_bytes(JavaThread::dirty_card_queue_offset() + + PtrQueue::byte_offset_of_index()); + int dirty_card_q_buf_byte_offset = + in_bytes(JavaThread::dirty_card_queue_offset() + + PtrQueue::byte_offset_of_buf()); + masm.bind(restart); + masm.ld_ptr(G2_thread, dirty_card_q_index_byte_offset, L0); + + masm.br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn, + L0, refill); + // If the branch is taken, no harm in executing this in the delay slot. + masm.delayed()->ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, L1); + masm.sub(L0, oopSize, L0); + + masm.st_ptr(O3, L1, L0); // [_buf + index] := I0 + // Use return-from-leaf + masm.retl(); + masm.delayed()->st_ptr(L0, G2_thread, dirty_card_q_index_byte_offset); + + masm.bind(refill); + address handle_zero = + CAST_FROM_FN_PTR(address, + &DirtyCardQueueSet::handle_zero_index_for_thread); + // This should be rare enough that we can afford to save all the + // scratch registers that the calling context might be using. + masm.mov(G1_scratch, L3); + masm.mov(G3_scratch, L5); + // We need the value of O3 above (for the write into the buffer), so we + // save and restore it. + masm.mov(O3, L6); + // Since the call will overwrite O7, we save and restore that, as well. + masm.mov(O7, L4); + + masm.call_VM_leaf(L7_thread_cache, handle_zero, G2_thread); + masm.mov(L3, G1_scratch); + masm.mov(L5, G3_scratch); + masm.mov(L6, O3); + masm.br(Assembler::always, /*annul*/false, Assembler::pt, restart); + masm.delayed()->mov(L4, O7); + + dirty_card_log_enqueue = start; + dirty_card_log_enqueue_end = masm.pc(); + // XXX Should have a guarantee here about not going off the end! + // Does it already do so? Do an experiment... +} + +static inline void +generate_dirty_card_log_enqueue_if_necessary(jbyte* byte_map_base) { + if (dirty_card_log_enqueue == 0) { + generate_dirty_card_log_enqueue(byte_map_base); + assert(dirty_card_log_enqueue != 0, "postcondition."); + if (G1SATBPrintStubs) { + tty->print_cr("Generated dirty_card enqueue:"); + Disassembler::decode((u_char*)dirty_card_log_enqueue, + dirty_card_log_enqueue_end, + tty); + } + } +} + + +void MacroAssembler::g1_write_barrier_post(Register store_addr, Register new_val, Register tmp) { + + Label filtered; + MacroAssembler* post_filter_masm = this; + + if (new_val == G0) return; + if (G1DisablePostBarrier) return; + + G1SATBCardTableModRefBS* bs = (G1SATBCardTableModRefBS*) Universe::heap()->barrier_set(); + assert(bs->kind() == BarrierSet::G1SATBCT || + bs->kind() == BarrierSet::G1SATBCTLogging, "wrong barrier"); + if (G1RSBarrierRegionFilter) { + xor3(store_addr, new_val, tmp); +#ifdef _LP64 + srlx(tmp, HeapRegion::LogOfHRGrainBytes, tmp); +#else + srl(tmp, HeapRegion::LogOfHRGrainBytes, tmp); +#endif + if (G1PrintCTFilterStats) { + guarantee(tmp->is_global(), "Or stats won't work..."); + // This is a sleazy hack: I'm temporarily hijacking G2, which I + // promise to restore. + mov(new_val, G2); + save_frame(0); + mov(tmp, O0); + mov(G2, O1); + // Save G-regs that target may use. + mov(G1, L1); + mov(G2, L2); + mov(G3, L3); + mov(G4, L4); + mov(G5, L5); + call(CAST_FROM_FN_PTR(address, &count_ct_writes)); + delayed()->nop(); + mov(O0, G2); + // Restore G-regs that target may have used. + mov(L1, G1); + mov(L3, G3); + mov(L4, G4); + mov(L5, G5); + restore(G0, G0, G0); + } + // XXX Should I predict this taken or not? Does it mattern? + br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, tmp, filtered); + delayed()->nop(); + } + + // Now we decide how to generate the card table write. If we're + // enqueueing, we call out to a generated function. Otherwise, we do it + // inline here. + + if (G1RSBarrierUseQueue) { + // If the "store_addr" register is an "in" or "local" register, move it to + // a scratch reg so we can pass it as an argument. + bool use_scr = !(store_addr->is_global() || store_addr->is_out()); + // Pick a scratch register different from "tmp". + Register scr = (tmp == G1_scratch ? G3_scratch : G1_scratch); + // Make sure we use up the delay slot! + if (use_scr) { + post_filter_masm->mov(store_addr, scr); + } else { + post_filter_masm->nop(); + } + generate_dirty_card_log_enqueue_if_necessary(bs->byte_map_base); + save_frame(0); + call(dirty_card_log_enqueue); + if (use_scr) { + delayed()->mov(scr, O0); + } else { + delayed()->mov(store_addr->after_save(), O0); + } + restore(); + + } else { + +#ifdef _LP64 + post_filter_masm->srlx(store_addr, CardTableModRefBS::card_shift, store_addr); +#else + post_filter_masm->srl(store_addr, CardTableModRefBS::card_shift, store_addr); +#endif + assert( tmp != store_addr, "need separate temp reg"); + Address rs(tmp, (address)bs->byte_map_base); + load_address(rs); + stb(G0, rs.base(), store_addr); + } + + bind(filtered); + +} + +#endif // SERIALGC +/////////////////////////////////////////////////////////////////////////////////// + +void MacroAssembler::card_write_barrier_post(Register store_addr, Register new_val, Register tmp) { + // If we're writing constant NULL, we can skip the write barrier. + if (new_val == G0) return; + CardTableModRefBS* bs = (CardTableModRefBS*) Universe::heap()->barrier_set(); + assert(bs->kind() == BarrierSet::CardTableModRef || + bs->kind() == BarrierSet::CardTableExtension, "wrong barrier"); + card_table_write(bs->byte_map_base, tmp, store_addr); +} + void MacroAssembler::load_klass(Register src_oop, Register klass) { // The number of bytes in this code is used by // MachCallDynamicJavaNode::ret_addr_offset()
--- a/hotspot/src/cpu/sparc/vm/assembler_sparc.hpp Wed Oct 01 16:57:19 2008 -0700 +++ b/hotspot/src/cpu/sparc/vm/assembler_sparc.hpp Wed Oct 01 16:57:40 2008 -0700 @@ -1439,7 +1439,11 @@ // pp 214 void save( Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(save_op3) | rs1(s1) | rs2(s2) ); } - void save( Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(save_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); } + void save( Register s1, int simm13a, Register d ) { + // make sure frame is at least large enough for the register save area + assert(-simm13a >= 16 * wordSize, "frame too small"); + emit_long( op(arith_op) | rd(d) | op3(save_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); + } void restore( Register s1 = G0, Register s2 = G0, Register d = G0 ) { emit_long( op(arith_op) | rd(d) | op3(restore_op3) | rs1(s1) | rs2(s2) ); } void restore( Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(restore_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); } @@ -1594,6 +1598,11 @@ inline void wrasi( Register d) { v9_only(); emit_long( op(arith_op) | rs1(d) | op3(wrreg_op3) | u_field(3, 29, 25)); } inline void wrfprs( Register d) { v9_only(); emit_long( op(arith_op) | rs1(d) | op3(wrreg_op3) | u_field(6, 29, 25)); } + // For a given register condition, return the appropriate condition code + // Condition (the one you would use to get the same effect after "tst" on + // the target register.) + Assembler::Condition reg_cond_to_cc_cond(RCondition in); + // Creation Assembler(CodeBuffer* code) : AbstractAssembler(code) { @@ -1630,6 +1639,8 @@ // restore global registers in case C code disturbed them static void restore_registers(MacroAssembler* a, Register r); + + }; @@ -1722,6 +1733,12 @@ void br_null ( Register s1, bool a, Predict p, Label& L ); void br_notnull( Register s1, bool a, Predict p, Label& L ); + // These versions will do the most efficient thing on v8 and v9. Perhaps + // this is what the routine above was meant to do, but it didn't (and + // didn't cover both target address kinds.) + void br_on_reg_cond( RCondition c, bool a, Predict p, Register s1, address d, relocInfo::relocType rt = relocInfo::none ); + void br_on_reg_cond( RCondition c, bool a, Predict p, Register s1, Label& L); + inline void bp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt = relocInfo::none ); inline void bp( Condition c, bool a, CC cc, Predict p, Label& L ); @@ -2056,9 +2073,23 @@ #endif // ASSERT public: - // Stores - void store_check(Register tmp, Register obj); // store check for obj - register is destroyed afterwards - void store_check(Register tmp, Register obj, Register offset); // store check for obj - register is destroyed afterwards + + // Write to card table for - register is destroyed afterwards. + void card_table_write(jbyte* byte_map_base, Register tmp, Register obj); + + void card_write_barrier_post(Register store_addr, Register new_val, Register tmp); + +#ifndef SERIALGC + // Array store and offset + void g1_write_barrier_pre(Register obj, Register index, int offset, Register tmp, bool preserve_o_regs); + + void g1_write_barrier_post(Register store_addr, Register new_val, Register tmp); + + // May do filtering, depending on the boolean arguments. + void g1_card_table_write(jbyte* byte_map_base, + Register tmp, Register obj, Register new_val, + bool region_filter, bool null_filter); +#endif // SERIALGC // pushes double TOS element of FPU stack on CPU stack; pops from FPU stack void push_fTOS();
--- a/hotspot/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp Wed Oct 01 16:57:19 2008 -0700 +++ b/hotspot/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp Wed Oct 01 16:57:40 2008 -0700 @@ -404,4 +404,55 @@ } +/////////////////////////////////////////////////////////////////////////////////// +#ifndef SERIALGC + +void G1PreBarrierStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + + assert(pre_val()->is_register(), "Precondition."); + + Register pre_val_reg = pre_val()->as_register(); + + ce->mem2reg(addr(), pre_val(), T_OBJECT, patch_code(), info(), false); + __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pt, + pre_val_reg, _continuation); + __ delayed()->nop(); + + __ call(Runtime1::entry_for(Runtime1::Runtime1::g1_pre_barrier_slow_id)); + __ delayed()->mov(pre_val_reg, G4); + __ br(Assembler::always, false, Assembler::pt, _continuation); + __ delayed()->nop(); + +} + +jbyte* G1PostBarrierStub::_byte_map_base = NULL; + +jbyte* G1PostBarrierStub::byte_map_base_slow() { + BarrierSet* bs = Universe::heap()->barrier_set(); + assert(bs->is_a(BarrierSet::G1SATBCTLogging), + "Must be if we're using this."); + return ((G1SATBCardTableModRefBS*)bs)->byte_map_base; +} + +void G1PostBarrierStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + + assert(addr()->is_register(), "Precondition."); + assert(new_val()->is_register(), "Precondition."); + Register addr_reg = addr()->as_pointer_register(); + Register new_val_reg = new_val()->as_register(); + __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pt, + new_val_reg, _continuation); + __ delayed()->nop(); + + __ call(Runtime1::entry_for(Runtime1::Runtime1::g1_post_barrier_slow_id)); + __ delayed()->mov(addr_reg, G4); + __ br(Assembler::always, false, Assembler::pt, _continuation); + __ delayed()->nop(); +} + +#endif // SERIALGC +/////////////////////////////////////////////////////////////////////////////////// + #undef __
--- a/hotspot/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp Wed Oct 01 16:57:19 2008 -0700 +++ b/hotspot/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp Wed Oct 01 16:57:40 2008 -0700 @@ -2093,7 +2093,11 @@ // the known type isn't loaded since the code sanity checks // in debug mode and the type isn't required when we know the exact type // also check that the type is an array type. - if (op->expected_type() == NULL) { + // We also, for now, always call the stub if the barrier set requires a + // write_ref_pre barrier (which the stub does, but none of the optimized + // cases currently does). + if (op->expected_type() == NULL || + Universe::heap()->barrier_set()->has_write_ref_pre_barrier()) { __ mov(src, O0); __ mov(src_pos, O1); __ mov(dst, O2);
--- a/hotspot/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp Wed Oct 01 16:57:19 2008 -0700 +++ b/hotspot/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp Wed Oct 01 16:57:40 2008 -0700 @@ -365,6 +365,10 @@ __ store_check(value.result(), array.result(), tmp1, tmp2, tmp3, store_check_info); } + if (obj_store) { + // Needs GC write barriers. + pre_barrier(LIR_OprFact::address(array_addr), false, NULL); + } __ move(value.result(), array_addr, null_check_info); if (obj_store) { // Is this precise? @@ -663,6 +667,10 @@ __ add(obj.result(), offset.result(), addr); + if (type == objectType) { // Write-barrier needed for Object fields. + pre_barrier(obj.result(), false, NULL); + } + if (type == objectType) __ cas_obj(addr, cmp.result(), val.result(), t1, t2); else if (type == intType) @@ -677,7 +685,11 @@ LIR_Opr result = rlock_result(x); __ cmove(lir_cond_equal, LIR_OprFact::intConst(1), LIR_OprFact::intConst(0), result); if (type == objectType) { // Write-barrier needed for Object fields. +#ifdef PRECISE_CARDMARK + post_barrier(addr, val.result()); +#else post_barrier(obj.result(), val.result()); +#endif // PRECISE_CARDMARK } } @@ -1154,6 +1166,10 @@ addr = new LIR_Address(base_op, index_op, type); } + if (is_obj) { + pre_barrier(LIR_OprFact::address(addr), false, NULL); + // _bs->c1_write_barrier_pre(this, LIR_OprFact::address(addr)); + } __ move(data, addr); if (is_obj) { // This address is precise
--- a/hotspot/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp Wed Oct 01 16:57:19 2008 -0700 +++ b/hotspot/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp Wed Oct 01 16:57:40 2008 -0700 @@ -832,6 +832,163 @@ } break; +#ifndef SERIALGC + case g1_pre_barrier_slow_id: + { // G4: previous value of memory + BarrierSet* bs = Universe::heap()->barrier_set(); + if (bs->kind() != BarrierSet::G1SATBCTLogging) { + __ save_frame(0); + __ set((int)id, O1); + __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), I0); + __ should_not_reach_here(); + break; + } + + __ set_info("g1_pre_barrier_slow_id", dont_gc_arguments); + + Register pre_val = G4; + Register tmp = G1_scratch; + Register tmp2 = G3_scratch; + + Label refill, restart; + bool with_frame = false; // I don't know if we can do with-frame. + int satb_q_index_byte_offset = + in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_index()); + int satb_q_buf_byte_offset = + in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_buf()); + __ bind(restart); + __ ld_ptr(G2_thread, satb_q_index_byte_offset, tmp); + + __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, + Assembler::pn, tmp, refill); + + // If the branch is taken, no harm in executing this in the delay slot. + __ delayed()->ld_ptr(G2_thread, satb_q_buf_byte_offset, tmp2); + __ sub(tmp, oopSize, tmp); + + __ st_ptr(pre_val, tmp2, tmp); // [_buf + index] := <address_of_card> + // Use return-from-leaf + __ retl(); + __ delayed()->st_ptr(tmp, G2_thread, satb_q_index_byte_offset); + + __ bind(refill); + __ save_frame(0); + + __ mov(pre_val, L0); + __ mov(tmp, L1); + __ mov(tmp2, L2); + + __ call_VM_leaf(L7_thread_cache, + CAST_FROM_FN_PTR(address, + SATBMarkQueueSet::handle_zero_index_for_thread), + G2_thread); + + __ mov(L0, pre_val); + __ mov(L1, tmp); + __ mov(L2, tmp2); + + __ br(Assembler::always, /*annul*/false, Assembler::pt, restart); + __ delayed()->restore(); + } + break; + + case g1_post_barrier_slow_id: + { + BarrierSet* bs = Universe::heap()->barrier_set(); + if (bs->kind() != BarrierSet::G1SATBCTLogging) { + __ save_frame(0); + __ set((int)id, O1); + __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), I0); + __ should_not_reach_here(); + break; + } + + __ set_info("g1_post_barrier_slow_id", dont_gc_arguments); + + Register addr = G4; + Register cardtable = G5; + Register tmp = G1_scratch; + Register tmp2 = G3_scratch; + jbyte* byte_map_base = ((CardTableModRefBS*)bs)->byte_map_base; + + Label not_already_dirty, restart, refill; + +#ifdef _LP64 + __ srlx(addr, CardTableModRefBS::card_shift, addr); +#else + __ srl(addr, CardTableModRefBS::card_shift, addr); +#endif + + Address rs(cardtable, (address)byte_map_base); + __ load_address(rs); // cardtable := <card table base> + __ ldub(addr, cardtable, tmp); // tmp := [addr + cardtable] + + __ br_on_reg_cond(Assembler::rc_nz, /*annul*/false, Assembler::pt, + tmp, not_already_dirty); + // Get cardtable + tmp into a reg by itself -- useful in the take-the-branch + // case, harmless if not. + __ delayed()->add(addr, cardtable, tmp2); + + // We didn't take the branch, so we're already dirty: return. + // Use return-from-leaf + __ retl(); + __ delayed()->nop(); + + // Not dirty. + __ bind(not_already_dirty); + // First, dirty it. + __ stb(G0, tmp2, 0); // [cardPtr] := 0 (i.e., dirty). + + Register tmp3 = cardtable; + Register tmp4 = tmp; + + // these registers are now dead + addr = cardtable = tmp = noreg; + + int dirty_card_q_index_byte_offset = + in_bytes(JavaThread::dirty_card_queue_offset() + + PtrQueue::byte_offset_of_index()); + int dirty_card_q_buf_byte_offset = + in_bytes(JavaThread::dirty_card_queue_offset() + + PtrQueue::byte_offset_of_buf()); + __ bind(restart); + __ ld_ptr(G2_thread, dirty_card_q_index_byte_offset, tmp3); + + __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn, + tmp3, refill); + // If the branch is taken, no harm in executing this in the delay slot. + __ delayed()->ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, tmp4); + __ sub(tmp3, oopSize, tmp3); + + __ st_ptr(tmp2, tmp4, tmp3); // [_buf + index] := <address_of_card> + // Use return-from-leaf + __ retl(); + __ delayed()->st_ptr(tmp3, G2_thread, dirty_card_q_index_byte_offset); + + __ bind(refill); + __ save_frame(0); + + __ mov(tmp2, L0); + __ mov(tmp3, L1); + __ mov(tmp4, L2); + + __ call_VM_leaf(L7_thread_cache, + CAST_FROM_FN_PTR(address, + DirtyCardQueueSet::handle_zero_index_for_thread), + G2_thread); + + __ mov(L0, tmp2); + __ mov(L1, tmp3); + __ mov(L2, tmp4); + + __ br(Assembler::always, /*annul*/false, Assembler::pt, restart); + __ delayed()->restore(); + } + break; +#endif // !SERIALGC + default: { __ set_info("unimplemented entry", dont_gc_arguments); __ save_frame(0);
--- a/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp Wed Oct 01 16:57:19 2008 -0700 +++ b/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp Wed Oct 01 16:57:40 2008 -0700 @@ -1110,30 +1110,31 @@ // The input registers are overwritten. // void gen_write_ref_array_pre_barrier(Register addr, Register count) { -#if 0 // G1 only BarrierSet* bs = Universe::heap()->barrier_set(); if (bs->has_write_ref_pre_barrier()) { assert(bs->has_write_ref_array_pre_opt(), "Else unsupported barrier set."); - assert(addr->is_global() && count->is_global(), - "If not, then we have to fix this code to handle more " - "general cases."); - // Get some new fresh output registers. __ save_frame(0); // Save the necessary global regs... will be used after. - __ mov(addr, L0); - __ mov(count, L1); - - __ mov(addr, O0); + if (addr->is_global()) { + __ mov(addr, L0); + } + if (count->is_global()) { + __ mov(count, L1); + } + __ mov(addr->after_save(), O0); // Get the count into O1 __ call(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre)); - __ delayed()->mov(count, O1); - __ mov(L0, addr); - __ mov(L1, count); + __ delayed()->mov(count->after_save(), O1); + if (addr->is_global()) { + __ mov(L0, addr); + } + if (count->is_global()) { + __ mov(L1, count); + } __ restore(); } -#endif // 0 } // // Generate post-write barrier for array. @@ -1150,22 +1151,17 @@ BarrierSet* bs = Universe::heap()->barrier_set(); switch (bs->kind()) { -#if 0 // G1 - only case BarrierSet::G1SATBCT: case BarrierSet::G1SATBCTLogging: { - assert(addr->is_global() && count->is_global(), - "If not, then we have to fix this code to handle more " - "general cases."); // Get some new fresh output registers. __ save_frame(0); - __ mov(addr, O0); + __ mov(addr->after_save(), O0); __ call(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post)); - __ delayed()->mov(count, O1); + __ delayed()->mov(count->after_save(), O1); __ restore(); } break; -#endif // 0 G1 - only case BarrierSet::CardTableModRef: case BarrierSet::CardTableExtension: { @@ -2412,8 +2408,7 @@ StubCodeMark mark(this, "StubRoutines", name); address start = __ pc(); - gen_write_ref_array_pre_barrier(G1, G5); - + gen_write_ref_array_pre_barrier(O1, O2); #ifdef ASSERT // We sometimes save a frame (see partial_subtype_check below).
--- a/hotspot/src/cpu/sparc/vm/templateTable_sparc.cpp Wed Oct 01 16:57:19 2008 -0700 +++ b/hotspot/src/cpu/sparc/vm/templateTable_sparc.cpp Wed Oct 01 16:57:40 2008 -0700 @@ -28,6 +28,79 @@ #ifndef CC_INTERP #define __ _masm-> +// Misc helpers + +// Do an oop store like *(base + index + offset) = val +// index can be noreg, +static void do_oop_store(InterpreterMacroAssembler* _masm, + Register base, + Register index, + int offset, + Register val, + Register tmp, + BarrierSet::Name barrier, + bool precise) { + assert(tmp != val && tmp != base && tmp != index, "register collision"); + assert(index == noreg || offset == 0, "only one offset"); + switch (barrier) { +#ifndef SERIALGC + case BarrierSet::G1SATBCT: + case BarrierSet::G1SATBCTLogging: + { + __ g1_write_barrier_pre( base, index, offset, tmp, /*preserve_o_regs*/true); + if (index == noreg ) { + assert(Assembler::is_simm13(offset), "fix this code"); + __ store_heap_oop(val, base, offset); + } else { + __ store_heap_oop(val, base, index); + } + + // No need for post barrier if storing NULL + if (val != G0) { + if (precise) { + if (index == noreg) { + __ add(base, offset, base); + } else { + __ add(base, index, base); + } + } + __ g1_write_barrier_post(base, val, tmp); + } + } + break; +#endif // SERIALGC + case BarrierSet::CardTableModRef: + case BarrierSet::CardTableExtension: + { + if (index == noreg ) { + assert(Assembler::is_simm13(offset), "fix this code"); + __ store_heap_oop(val, base, offset); + } else { + __ store_heap_oop(val, base, index); + } + // No need for post barrier if storing NULL + if (val != G0) { + if (precise) { + if (index == noreg) { + __ add(base, offset, base); + } else { + __ add(base, index, base); + } + } + __ card_write_barrier_post(base, val, tmp); + } + } + break; + case BarrierSet::ModRef: + case BarrierSet::Other: + ShouldNotReachHere(); + break; + default : + ShouldNotReachHere(); + + } +} + //---------------------------------------------------------------------------------------------------- // Platform-dependent initialization @@ -758,6 +831,8 @@ // O4: array element klass // O5: value klass + // Address element(O1, 0, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); + // Generate a fast subtype check. Branch to store_ok if no // failure. Throw if failure. __ gen_subtype_check( O5, O4, G3_scratch, G4_scratch, G1_scratch, store_ok ); @@ -767,18 +842,14 @@ // Store is OK. __ bind(store_ok); - __ store_heap_oop(Otos_i, O1, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); - // Quote from rememberedSet.hpp: For objArrays, the precise card - // corresponding to the pointer store is dirtied so we don't need to - // scavenge the entire array. - Address element(O1, 0, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); - __ add(element, O1); // address the element precisely - __ store_check(G3_scratch, O1); + do_oop_store(_masm, O1, noreg, arrayOopDesc::base_offset_in_bytes(T_OBJECT), Otos_i, G3_scratch, _bs->kind(), true); + __ ba(false,done); __ delayed()->inc(Lesp, 3* Interpreter::stackElementSize()); // adj sp (pops array, index and value) __ bind(is_null); - __ store_heap_oop(Otos_i, element); + do_oop_store(_masm, O1, noreg, arrayOopDesc::base_offset_in_bytes(T_OBJECT), G0, G4_scratch, _bs->kind(), true); + __ profile_null_seen(G3_scratch); __ inc(Lesp, 3* Interpreter::stackElementSize()); // adj sp (pops array, index and value) __ bind(done); @@ -2449,8 +2520,9 @@ // atos __ pop_ptr(); __ verify_oop(Otos_i); - __ store_heap_oop(Otos_i, Rclass, Roffset); - __ store_check(G1_scratch, Rclass, Roffset); + + do_oop_store(_masm, Rclass, Roffset, 0, Otos_i, G1_scratch, _bs->kind(), false); + __ ba(false, checkVolatile); __ delayed()->tst(Lscratch); @@ -2491,8 +2563,9 @@ __ pop_ptr(); pop_and_check_object(Rclass); __ verify_oop(Otos_i); - __ store_heap_oop(Otos_i, Rclass, Roffset); - __ store_check(G1_scratch, Rclass, Roffset); + + do_oop_store(_masm, Rclass, Roffset, 0, Otos_i, G1_scratch, _bs->kind(), false); + patch_bytecode(Bytecodes::_fast_aputfield, G3_scratch, G4_scratch); __ ba(false, checkVolatile); __ delayed()->tst(Lscratch); @@ -2646,8 +2719,7 @@ __ stf(FloatRegisterImpl::D, Ftos_d, Rclass, Roffset); break; case Bytecodes::_fast_aputfield: - __ store_heap_oop(Otos_i, Rclass, Roffset); - __ store_check(G1_scratch, Rclass, Roffset); + do_oop_store(_masm, Rclass, Roffset, 0, Otos_i, G1_scratch, _bs->kind(), false); break; default: ShouldNotReachHere();
--- a/hotspot/src/cpu/x86/vm/assembler_x86.cpp Wed Oct 01 16:57:19 2008 -0700 +++ b/hotspot/src/cpu/x86/vm/assembler_x86.cpp Wed Oct 01 16:57:40 2008 -0700 @@ -5935,26 +5935,30 @@ Label& slow_case) { assert(obj == rax, "obj must be in rax, for cmpxchg"); assert_different_registers(obj, var_size_in_bytes, t1); - Register end = t1; - Label retry; - bind(retry); - ExternalAddress heap_top((address) Universe::heap()->top_addr()); - movptr(obj, heap_top); - if (var_size_in_bytes == noreg) { - lea(end, Address(obj, con_size_in_bytes)); + if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { + jmp(slow_case); } else { - lea(end, Address(obj, var_size_in_bytes, Address::times_1)); - } - // if end < obj then we wrapped around => object too long => slow case - cmpptr(end, obj); - jcc(Assembler::below, slow_case); - cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr())); - jcc(Assembler::above, slow_case); - // Compare obj with the top addr, and if still equal, store the new top addr in - // end at the address of the top addr pointer. Sets ZF if was equal, and clears - // it otherwise. Use lock prefix for atomicity on MPs. - locked_cmpxchgptr(end, heap_top); - jcc(Assembler::notEqual, retry); + Register end = t1; + Label retry; + bind(retry); + ExternalAddress heap_top((address) Universe::heap()->top_addr()); + movptr(obj, heap_top); + if (var_size_in_bytes == noreg) { + lea(end, Address(obj, con_size_in_bytes)); + } else { + lea(end, Address(obj, var_size_in_bytes, Address::times_1)); + } + // if end < obj then we wrapped around => object too long => slow case + cmpptr(end, obj); + jcc(Assembler::below, slow_case); + cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr())); + jcc(Assembler::above, slow_case); + // Compare obj with the top addr, and if still equal, store the new top addr in + // end at the address of the top addr pointer. Sets ZF if was equal, and clears + // it otherwise. Use lock prefix for atomicity on MPs. + locked_cmpxchgptr(end, heap_top); + jcc(Assembler::notEqual, retry); + } } void MacroAssembler::enter() { @@ -6491,6 +6495,179 @@ } } +////////////////////////////////////////////////////////////////////////////////// +#ifndef SERIALGC + +void MacroAssembler::g1_write_barrier_pre(Register obj, +#ifndef _LP64 + Register thread, +#endif + Register tmp, + Register tmp2, + bool tosca_live) { + LP64_ONLY(Register thread = r15_thread;) + Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_active())); + + Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_index())); + Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_buf())); + + + Label done; + Label runtime; + + // if (!marking_in_progress) goto done; + if (in_bytes(PtrQueue::byte_width_of_active()) == 4) { + cmpl(in_progress, 0); + } else { + assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption"); + cmpb(in_progress, 0); + } + jcc(Assembler::equal, done); + + // if (x.f == NULL) goto done; + cmpptr(Address(obj, 0), NULL_WORD); + jcc(Assembler::equal, done); + + // Can we store original value in the thread's buffer? + + LP64_ONLY(movslq(tmp, index);) + movptr(tmp2, Address(obj, 0)); +#ifdef _LP64 + cmpq(tmp, 0); +#else + cmpl(index, 0); +#endif + jcc(Assembler::equal, runtime); +#ifdef _LP64 + subq(tmp, wordSize); + movl(index, tmp); + addq(tmp, buffer); +#else + subl(index, wordSize); + movl(tmp, buffer); + addl(tmp, index); +#endif + movptr(Address(tmp, 0), tmp2); + jmp(done); + bind(runtime); + // save the live input values + if(tosca_live) push(rax); + push(obj); +#ifdef _LP64 + movq(c_rarg0, Address(obj, 0)); + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), c_rarg0, r15_thread); +#else + push(thread); + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), tmp2, thread); + pop(thread); +#endif + pop(obj); + if(tosca_live) pop(rax); + bind(done); + +} + +void MacroAssembler::g1_write_barrier_post(Register store_addr, + Register new_val, +#ifndef _LP64 + Register thread, +#endif + Register tmp, + Register tmp2) { + + LP64_ONLY(Register thread = r15_thread;) + Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() + + PtrQueue::byte_offset_of_index())); + Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() + + PtrQueue::byte_offset_of_buf())); + BarrierSet* bs = Universe::heap()->barrier_set(); + CardTableModRefBS* ct = (CardTableModRefBS*)bs; + Label done; + Label runtime; + + // Does store cross heap regions? + + movptr(tmp, store_addr); + xorptr(tmp, new_val); + shrptr(tmp, HeapRegion::LogOfHRGrainBytes); + jcc(Assembler::equal, done); + + // crosses regions, storing NULL? + + cmpptr(new_val, (int32_t) NULL_WORD); + jcc(Assembler::equal, done); + + // storing region crossing non-NULL, is card already dirty? + + ExternalAddress cardtable((address) ct->byte_map_base); + assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); +#ifdef _LP64 + const Register card_addr = tmp; + + movq(card_addr, store_addr); + shrq(card_addr, CardTableModRefBS::card_shift); + + lea(tmp2, cardtable); + + // get the address of the card + addq(card_addr, tmp2); +#else + const Register card_index = tmp; + + movl(card_index, store_addr); + shrl(card_index, CardTableModRefBS::card_shift); + + Address index(noreg, card_index, Address::times_1); + const Register card_addr = tmp; + lea(card_addr, as_Address(ArrayAddress(cardtable, index))); +#endif + cmpb(Address(card_addr, 0), 0); + jcc(Assembler::equal, done); + + // storing a region crossing, non-NULL oop, card is clean. + // dirty card and log. + + movb(Address(card_addr, 0), 0); + + cmpl(queue_index, 0); + jcc(Assembler::equal, runtime); + subl(queue_index, wordSize); + movptr(tmp2, buffer); +#ifdef _LP64 + movslq(rscratch1, queue_index); + addq(tmp2, rscratch1); + movq(Address(tmp2, 0), card_addr); +#else + addl(tmp2, queue_index); + movl(Address(tmp2, 0), card_index); +#endif + jmp(done); + + bind(runtime); + // save the live input values + push(store_addr); + push(new_val); +#ifdef _LP64 + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread); +#else + push(thread); + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread); + pop(thread); +#endif + pop(new_val); + pop(store_addr); + + bind(done); + +} + +#endif // SERIALGC +////////////////////////////////////////////////////////////////////////////////// + + void MacroAssembler::store_check(Register obj) { // Does a store check for the oop in register obj. The content of // register obj is destroyed afterwards.
--- a/hotspot/src/cpu/x86/vm/assembler_x86.hpp Wed Oct 01 16:57:19 2008 -0700 +++ b/hotspot/src/cpu/x86/vm/assembler_x86.hpp Wed Oct 01 16:57:40 2008 -0700 @@ -227,9 +227,11 @@ #endif // ASSERT // accessors - bool uses(Register reg) const { - return _base == reg || _index == reg; - } + bool uses(Register reg) const { return _base == reg || _index == reg; } + Register base() const { return _base; } + Register index() const { return _index; } + ScaleFactor scale() const { return _scale; } + int disp() const { return _disp; } // Convert the raw encoding form into the form expected by the constructor for // Address. An index of 4 (rsp) corresponds to having no index, so convert @@ -1310,7 +1312,8 @@ // on arguments should also go in here. class MacroAssembler: public Assembler { - friend class LIR_Assembler; + friend class LIR_Assembler; + friend class Runtime1; // as_Address() protected: Address as_Address(AddressLiteral adr); @@ -1453,6 +1456,7 @@ // The pointer will be loaded into the thread register. void get_thread(Register thread); + // Support for VM calls // // It is imperative that all calls into the VM are handled via the call_VM macros. @@ -1527,6 +1531,22 @@ void store_check(Register obj); // store check for obj - register is destroyed afterwards void store_check(Register obj, Address dst); // same as above, dst is exact store location (reg. is destroyed) + void g1_write_barrier_pre(Register obj, +#ifndef _LP64 + Register thread, +#endif + Register tmp, + Register tmp2, + bool tosca_live); + void g1_write_barrier_post(Register store_addr, + Register new_val, +#ifndef _LP64 + Register thread, +#endif + Register tmp, + Register tmp2); + + // split store_check(Register obj) to enhance instruction interleaving void store_check_part_1(Register obj); void store_check_part_2(Register obj);
--- a/hotspot/src/cpu/x86/vm/c1_CodeStubs_x86.cpp Wed Oct 01 16:57:19 2008 -0700 +++ b/hotspot/src/cpu/x86/vm/c1_CodeStubs_x86.cpp Wed Oct 01 16:57:40 2008 -0700 @@ -456,5 +456,50 @@ __ jmp(_continuation); } +///////////////////////////////////////////////////////////////////////////// +#ifndef SERIALGC + +void G1PreBarrierStub::emit_code(LIR_Assembler* ce) { + + // At this point we know that marking is in progress + + __ bind(_entry); + assert(pre_val()->is_register(), "Precondition."); + + Register pre_val_reg = pre_val()->as_register(); + + ce->mem2reg(addr(), pre_val(), T_OBJECT, patch_code(), info(), false); + + __ cmpptr(pre_val_reg, (int32_t) NULL_WORD); + __ jcc(Assembler::equal, _continuation); + ce->store_parameter(pre_val()->as_register(), 0); + __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::g1_pre_barrier_slow_id))); + __ jmp(_continuation); + +} + +jbyte* G1PostBarrierStub::_byte_map_base = NULL; + +jbyte* G1PostBarrierStub::byte_map_base_slow() { + BarrierSet* bs = Universe::heap()->barrier_set(); + assert(bs->is_a(BarrierSet::G1SATBCTLogging), + "Must be if we're using this."); + return ((G1SATBCardTableModRefBS*)bs)->byte_map_base; +} + +void G1PostBarrierStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + assert(addr()->is_register(), "Precondition."); + assert(new_val()->is_register(), "Precondition."); + Register new_val_reg = new_val()->as_register(); + __ cmpptr(new_val_reg, (int32_t) NULL_WORD); + __ jcc(Assembler::equal, _continuation); + ce->store_parameter(addr()->as_register(), 0); + __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::g1_post_barrier_slow_id))); + __ jmp(_continuation); +} + +#endif // SERIALGC +///////////////////////////////////////////////////////////////////////////// #undef __
--- a/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp Wed Oct 01 16:57:19 2008 -0700 +++ b/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp Wed Oct 01 16:57:40 2008 -0700 @@ -302,6 +302,8 @@ } if (obj_store) { + // Needs GC write barriers. + pre_barrier(LIR_OprFact::address(array_addr), false, NULL); __ move(value.result(), array_addr, null_check_info); // Seems to be a precise post_barrier(LIR_OprFact::address(array_addr), value.result()); @@ -756,7 +758,10 @@ __ move(obj.result(), addr); __ add(addr, offset.result(), addr); - + if (type == objectType) { // Write-barrier needed for Object fields. + // Do the pre-write barrier, if any. + pre_barrier(addr, false, NULL); + } LIR_Opr ill = LIR_OprFact::illegalOpr; // for convenience if (type == objectType) @@ -1286,6 +1291,8 @@ LIR_Address* addr = new LIR_Address(src, offset, type); bool is_obj = (type == T_ARRAY || type == T_OBJECT); if (is_obj) { + // Do the pre-write barrier, if any. + pre_barrier(LIR_OprFact::address(addr), false, NULL); __ move(data, addr); assert(src->is_register(), "must be register"); // Seems to be a precise address
--- a/hotspot/src/cpu/x86/vm/c1_Runtime1_x86.cpp Wed Oct 01 16:57:19 2008 -0700 +++ b/hotspot/src/cpu/x86/vm/c1_Runtime1_x86.cpp Wed Oct 01 16:57:40 2008 -0700 @@ -1583,6 +1583,166 @@ } break; +#ifndef SERIALGC + case g1_pre_barrier_slow_id: + { + StubFrame f(sasm, "g1_pre_barrier", dont_gc_arguments); + // arg0 : previous value of memory + + BarrierSet* bs = Universe::heap()->barrier_set(); + if (bs->kind() != BarrierSet::G1SATBCTLogging) { + __ movptr(rax, (int)id); + __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), rax); + __ should_not_reach_here(); + break; + } + + __ push(rax); + __ push(rdx); + + const Register pre_val = rax; + const Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread); + const Register tmp = rdx; + + NOT_LP64(__ get_thread(thread);) + + Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_active())); + + Address queue_index(thread, in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_index())); + Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_buf())); + + + Label done; + Label runtime; + + // Can we store original value in the thread's buffer? + + LP64_ONLY(__ movslq(tmp, queue_index);) +#ifdef _LP64 + __ cmpq(tmp, 0); +#else + __ cmpl(queue_index, 0); +#endif + __ jcc(Assembler::equal, runtime); +#ifdef _LP64 + __ subq(tmp, wordSize); + __ movl(queue_index, tmp); + __ addq(tmp, buffer); +#else + __ subl(queue_index, wordSize); + __ movl(tmp, buffer); + __ addl(tmp, queue_index); +#endif + + // prev_val (rax) + f.load_argument(0, pre_val); + __ movptr(Address(tmp, 0), pre_val); + __ jmp(done); + + __ bind(runtime); + // load the pre-value + __ push(rcx); + f.load_argument(0, rcx); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), rcx, thread); + __ pop(rcx); + + __ bind(done); + __ pop(rdx); + __ pop(rax); + } + break; + + case g1_post_barrier_slow_id: + { + StubFrame f(sasm, "g1_post_barrier", dont_gc_arguments); + + + // arg0: store_address + Address store_addr(rbp, 2*BytesPerWord); + + BarrierSet* bs = Universe::heap()->barrier_set(); + CardTableModRefBS* ct = (CardTableModRefBS*)bs; + Label done; + Label runtime; + + // At this point we know new_value is non-NULL and the new_value crosses regsion. + // Must check to see if card is already dirty + + const Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread); + + Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() + + PtrQueue::byte_offset_of_index())); + Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() + + PtrQueue::byte_offset_of_buf())); + + __ push(rax); + __ push(rdx); + + NOT_LP64(__ get_thread(thread);) + ExternalAddress cardtable((address)ct->byte_map_base); + assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); + + const Register card_addr = rdx; +#ifdef _LP64 + const Register tmp = rscratch1; + f.load_argument(0, card_addr); + __ shrq(card_addr, CardTableModRefBS::card_shift); + __ lea(tmp, cardtable); + // get the address of the card + __ addq(card_addr, tmp); +#else + const Register card_index = rdx; + f.load_argument(0, card_index); + __ shrl(card_index, CardTableModRefBS::card_shift); + + Address index(noreg, card_index, Address::times_1); + __ leal(card_addr, __ as_Address(ArrayAddress(cardtable, index))); +#endif + + __ cmpb(Address(card_addr, 0), 0); + __ jcc(Assembler::equal, done); + + // storing region crossing non-NULL, card is clean. + // dirty card and log. + + __ movb(Address(card_addr, 0), 0); + + __ cmpl(queue_index, 0); + __ jcc(Assembler::equal, runtime); + __ subl(queue_index, wordSize); + + const Register buffer_addr = rbx; + __ push(rbx); + + __ movptr(buffer_addr, buffer); + +#ifdef _LP64 + __ movslq(rscratch1, queue_index); + __ addptr(buffer_addr, rscratch1); +#else + __ addptr(buffer_addr, queue_index); +#endif + __ movptr(Address(buffer_addr, 0), card_addr); + + __ pop(rbx); + __ jmp(done); + + __ bind(runtime); + NOT_LP64(__ push(rcx);) + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread); + NOT_LP64(__ pop(rcx);) + + __ bind(done); + __ pop(rdx); + __ pop(rax); + + } + break; +#endif // !SERIALGC + default: { StubFrame f(sasm, "unimplemented entry", dont_gc_arguments); __ movptr(rax, (int)id);
--- a/hotspot/src/cpu/x86/vm/interp_masm_x86_64.cpp Wed Oct 01 16:57:19 2008 -0700 +++ b/hotspot/src/cpu/x86/vm/interp_masm_x86_64.cpp Wed Oct 01 16:57:40 2008 -0700 @@ -44,8 +44,13 @@ // Note: No need to save/restore bcp & locals (r13 & r14) pointer // since these are callee saved registers and no blocking/ // GC can happen in leaf calls. + // Further Note: DO NOT save/restore bcp/locals. If a caller has + // already saved them so that it can use esi/edi as temporaries + // then a save/restore here will DESTROY the copy the caller + // saved! There used to be a save_bcp() that only happened in + // the ASSERT path (no restore_bcp). Which caused bizarre failures + // when jvm built with ASSERTs. #ifdef ASSERT - save_bcp(); { Label L; cmpptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD); @@ -58,24 +63,9 @@ // super call MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments); // interpreter specific -#ifdef ASSERT - { - Label L; - cmpptr(r13, Address(rbp, frame::interpreter_frame_bcx_offset * wordSize)); - jcc(Assembler::equal, L); - stop("InterpreterMacroAssembler::call_VM_leaf_base:" - " r13 not callee saved?"); - bind(L); - } - { - Label L; - cmpptr(r14, Address(rbp, frame::interpreter_frame_locals_offset * wordSize)); - jcc(Assembler::equal, L); - stop("InterpreterMacroAssembler::call_VM_leaf_base:" - " r14 not callee saved?"); - bind(L); - } -#endif + // Used to ASSERT that r13/r14 were equal to frame's bcp/locals + // but since they may not have been saved (and we don't want to + // save thme here (see note above) the assert is invalid. } void InterpreterMacroAssembler::call_VM_base(Register oop_result,
--- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp Wed Oct 01 16:57:19 2008 -0700 +++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp Wed Oct 01 16:57:40 2008 -0700 @@ -712,7 +712,6 @@ // end - element count void gen_write_ref_array_pre_barrier(Register start, Register count) { assert_different_registers(start, count); -#if 0 // G1 only BarrierSet* bs = Universe::heap()->barrier_set(); switch (bs->kind()) { case BarrierSet::G1SATBCT: @@ -721,8 +720,8 @@ __ pusha(); // push registers __ push(count); __ push(start); - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre)); - __ addl(esp, wordSize * 2); + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre))); + __ addptr(rsp, 2*wordSize); __ popa(); } break; @@ -734,7 +733,6 @@ ShouldNotReachHere(); } -#endif // 0 - G1 only } @@ -750,20 +748,18 @@ BarrierSet* bs = Universe::heap()->barrier_set(); assert_different_registers(start, count); switch (bs->kind()) { -#if 0 // G1 only case BarrierSet::G1SATBCT: case BarrierSet::G1SATBCTLogging: { __ pusha(); // push registers __ push(count); __ push(start); - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post)); - __ addl(esp, wordSize * 2); + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post))); + __ addptr(rsp, 2*wordSize); __ popa(); } break; -#endif // 0 G1 only case BarrierSet::CardTableModRef: case BarrierSet::CardTableExtension: @@ -1378,9 +1374,9 @@ Address elem_klass_addr(elem, oopDesc::klass_offset_in_bytes()); // Copy from low to high addresses, indexed from the end of each array. + gen_write_ref_array_pre_barrier(to, count); __ lea(end_from, end_from_addr); __ lea(end_to, end_to_addr); - gen_write_ref_array_pre_barrier(to, count); assert(length == count, ""); // else fix next line: __ negptr(count); // negate and test the length __ jccb(Assembler::notZero, L_load_element);
--- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp Wed Oct 01 16:57:19 2008 -0700 +++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp Wed Oct 01 16:57:40 2008 -0700 @@ -1153,18 +1153,26 @@ // Destroy no registers! // void gen_write_ref_array_pre_barrier(Register addr, Register count) { -#if 0 // G1 - only - assert_different_registers(addr, c_rarg1); - assert_different_registers(count, c_rarg0); BarrierSet* bs = Universe::heap()->barrier_set(); switch (bs->kind()) { case BarrierSet::G1SATBCT: case BarrierSet::G1SATBCTLogging: { __ pusha(); // push registers - __ movptr(c_rarg0, addr); - __ movptr(c_rarg1, count); - __ call(RuntimeAddress(BarrierSet::static_write_ref_array_pre)); + if (count == c_rarg0) { + if (addr == c_rarg1) { + // exactly backwards!! + __ xchgptr(c_rarg1, c_rarg0); + } else { + __ movptr(c_rarg1, count); + __ movptr(c_rarg0, addr); + } + + } else { + __ movptr(c_rarg0, addr); + __ movptr(c_rarg1, count); + } + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre))); __ popa(); } break; @@ -1172,11 +1180,10 @@ case BarrierSet::CardTableExtension: case BarrierSet::ModRef: break; - default : + default: ShouldNotReachHere(); } -#endif // 0 G1 - only } // @@ -1193,7 +1200,6 @@ assert_different_registers(start, end, scratch); BarrierSet* bs = Universe::heap()->barrier_set(); switch (bs->kind()) { -#if 0 // G1 - only case BarrierSet::G1SATBCT: case BarrierSet::G1SATBCTLogging: @@ -1206,11 +1212,10 @@ __ shrptr(scratch, LogBytesPerWord); __ mov(c_rarg0, start); __ mov(c_rarg1, scratch); - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post)); + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post))); __ popa(); } break; -#endif // 0 G1 - only case BarrierSet::CardTableModRef: case BarrierSet::CardTableExtension: { @@ -1239,8 +1244,12 @@ __ decrement(count); __ jcc(Assembler::greaterEqual, L_loop); } - } - } + break; + default: + ShouldNotReachHere(); + + } + } // Copy big chunks forward // @@ -2282,7 +2291,7 @@ // and report their number to the caller. assert_different_registers(rax, r14_length, count, to, end_to, rcx); __ lea(end_to, to_element_addr); - gen_write_ref_array_post_barrier(to, end_to, rcx); + gen_write_ref_array_post_barrier(to, end_to, rscratch1); __ movptr(rax, r14_length); // original oops __ addptr(rax, count); // K = (original - remaining) oops __ notptr(rax); // report (-1^K) to caller @@ -2291,7 +2300,7 @@ // Come here on success only. __ BIND(L_do_card_marks); __ addptr(end_to, -wordSize); // make an inclusive end pointer - gen_write_ref_array_post_barrier(to, end_to, rcx); + gen_write_ref_array_post_barrier(to, end_to, rscratch1); __ xorptr(rax, rax); // return 0 on success // Common exit point (success or failure).
--- a/hotspot/src/cpu/x86/vm/templateTable_x86_32.cpp Wed Oct 01 16:57:19 2008 -0700 +++ b/hotspot/src/cpu/x86/vm/templateTable_x86_32.cpp Wed Oct 01 16:57:40 2008 -0700 @@ -107,6 +107,78 @@ //---------------------------------------------------------------------------------------------------- // Miscelaneous helper routines +// Store an oop (or NULL) at the address described by obj. +// If val == noreg this means store a NULL + +static void do_oop_store(InterpreterMacroAssembler* _masm, + Address obj, + Register val, + BarrierSet::Name barrier, + bool precise) { + assert(val == noreg || val == rax, "parameter is just for looks"); + switch (barrier) { +#ifndef SERIALGC + case BarrierSet::G1SATBCT: + case BarrierSet::G1SATBCTLogging: + { + // flatten object address if needed + // We do it regardless of precise because we need the registers + if (obj.index() == noreg && obj.disp() == 0) { + if (obj.base() != rdx) { + __ movl(rdx, obj.base()); + } + } else { + __ leal(rdx, obj); + } + __ get_thread(rcx); + __ save_bcp(); + __ g1_write_barrier_pre(rdx, rcx, rsi, rbx, val != noreg); + + // Do the actual store + // noreg means NULL + if (val == noreg) { + __ movl(Address(rdx, 0), NULL_WORD); + // No post barrier for NULL + } else { + __ movl(Address(rdx, 0), val); + __ g1_write_barrier_post(rdx, rax, rcx, rbx, rsi); + } + __ restore_bcp(); + + } + break; +#endif // SERIALGC + case BarrierSet::CardTableModRef: + case BarrierSet::CardTableExtension: + { + if (val == noreg) { + __ movl(obj, NULL_WORD); + } else { + __ movl(obj, val); + // flatten object address if needed + if (!precise || (obj.index() == noreg && obj.disp() == 0)) { + __ store_check(obj.base()); + } else { + __ leal(rdx, obj); + __ store_check(rdx); + } + } + } + break; + case BarrierSet::ModRef: + case BarrierSet::Other: + if (val == noreg) { + __ movl(obj, NULL_WORD); + } else { + __ movl(obj, val); + } + break; + default : + ShouldNotReachHere(); + + } +} + Address TemplateTable::at_bcp(int offset) { assert(_desc->uses_bcp(), "inconsistent uses_bcp information"); return Address(rsi, offset); @@ -876,6 +948,8 @@ __ movptr(rax, at_tos()); // Value __ movl(rcx, at_tos_p1()); // Index __ movptr(rdx, at_tos_p2()); // Array + + Address element_address(rdx, rcx, Address::times_4, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); index_check_without_pop(rdx, rcx); // kills rbx, // do array store check - check for NULL value first __ testptr(rax, rax); @@ -887,7 +961,7 @@ __ movptr(rax, Address(rdx, oopDesc::klass_offset_in_bytes())); __ movptr(rax, Address(rax, sizeof(oopDesc) + objArrayKlass::element_klass_offset_in_bytes())); // Compress array+index*wordSize+12 into a single register. Frees ECX. - __ lea(rdx, Address(rdx, rcx, Address::times_ptr, arrayOopDesc::base_offset_in_bytes(T_OBJECT))); + __ lea(rdx, element_address); // Generate subtype check. Blows ECX. Resets EDI to locals. // Superklass in EAX. Subklass in EBX. @@ -899,15 +973,20 @@ // Come here on success __ bind(ok_is_subtype); - __ movptr(rax, at_rsp()); // Value - __ movptr(Address(rdx, 0), rax); - __ store_check(rdx); - __ jmpb(done); + + // Get the value to store + __ movptr(rax, at_rsp()); + // and store it with appropriate barrier + do_oop_store(_masm, Address(rdx, 0), rax, _bs->kind(), true); + + __ jmp(done); // Have a NULL in EAX, EDX=array, ECX=index. Store NULL at ary[idx] __ bind(is_null); __ profile_null_seen(rbx); - __ movptr(Address(rdx, rcx, Address::times_ptr, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), rax); + + // Store NULL, (noreg means NULL to do_oop_store) + do_oop_store(_masm, element_address, noreg, _bs->kind(), true); // Pop stack arguments __ bind(done); @@ -1515,7 +1594,7 @@ // compute return address as bci in rax, __ lea(rax, at_bcp((is_wide ? 5 : 3) - in_bytes(constMethodOopDesc::codes_offset()))); __ subptr(rax, Address(rcx, methodOopDesc::const_offset())); - // Adjust the bcp in ESI by the displacement in EDX + // Adjust the bcp in RSI by the displacement in EDX __ addptr(rsi, rdx); // Push return address __ push_i(rax); @@ -1526,7 +1605,7 @@ // Normal (non-jsr) branch handling - // Adjust the bcp in ESI by the displacement in EDX + // Adjust the bcp in RSI by the displacement in EDX __ addptr(rsi, rdx); assert(UseLoopCounter || !UseOnStackReplacement, "on-stack-replacement requires loop counters"); @@ -2439,11 +2518,12 @@ __ pop(atos); if (!is_static) pop_and_check_object(obj); - __ movptr(lo, rax ); - __ store_check(obj, lo); // Need to mark card + do_oop_store(_masm, lo, rax, _bs->kind(), false); + if (!is_static) { patch_bytecode(Bytecodes::_fast_aputfield, rcx, rbx); } + __ jmp(Done); __ bind(notObj); @@ -2664,7 +2744,10 @@ break; case Bytecodes::_fast_fputfield: __ fstp_s(lo); break; case Bytecodes::_fast_dputfield: __ fstp_d(lo); break; - case Bytecodes::_fast_aputfield: __ movptr(lo, rax); __ store_check(rcx, lo); break; + case Bytecodes::_fast_aputfield: { + do_oop_store(_masm, lo, rax, _bs->kind(), false); + break; + } default: ShouldNotReachHere(); } @@ -2672,7 +2755,8 @@ Label done; volatile_barrier(Assembler::Membar_mask_bits(Assembler::StoreLoad | Assembler::StoreStore)); - __ jmpb(done); + // Barriers are so large that short branch doesn't reach! + __ jmp(done); // Same code as above, but don't need rdx to test for volatile. __ bind(notVolatile); @@ -2694,7 +2778,10 @@ break; case Bytecodes::_fast_fputfield: __ fstp_s(lo); break; case Bytecodes::_fast_dputfield: __ fstp_d(lo); break; - case Bytecodes::_fast_aputfield: __ movptr(lo, rax); __ store_check(rcx, lo); break; + case Bytecodes::_fast_aputfield: { + do_oop_store(_masm, lo, rax, _bs->kind(), false); + break; + } default: ShouldNotReachHere(); } @@ -3054,8 +3141,6 @@ Label initialize_object; // including clearing the fields Label allocate_shared; - ExternalAddress heap_top((address)Universe::heap()->top_addr()); - __ get_cpool_and_tags(rcx, rax); // get instanceKlass __ movptr(rcx, Address(rcx, rdx, Address::times_ptr, sizeof(constantPoolOopDesc))); @@ -3112,6 +3197,8 @@ if (allow_shared_alloc) { __ bind(allocate_shared); + ExternalAddress heap_top((address)Universe::heap()->top_addr()); + Label retry; __ bind(retry); __ movptr(rax, heap_top);
--- a/hotspot/src/cpu/x86/vm/templateTable_x86_64.cpp Wed Oct 01 16:57:19 2008 -0700 +++ b/hotspot/src/cpu/x86/vm/templateTable_x86_64.cpp Wed Oct 01 16:57:40 2008 -0700 @@ -115,6 +115,69 @@ // Miscelaneous helper routines +// Store an oop (or NULL) at the address described by obj. +// If val == noreg this means store a NULL + +static void do_oop_store(InterpreterMacroAssembler* _masm, + Address obj, + Register val, + BarrierSet::Name barrier, + bool precise) { + assert(val == noreg || val == rax, "parameter is just for looks"); + switch (barrier) { +#ifndef SERIALGC + case BarrierSet::G1SATBCT: + case BarrierSet::G1SATBCTLogging: + { + // flatten object address if needed + if (obj.index() == noreg && obj.disp() == 0) { + if (obj.base() != rdx) { + __ movq(rdx, obj.base()); + } + } else { + __ leaq(rdx, obj); + } + __ g1_write_barrier_pre(rdx, r8, rbx, val != noreg); + if (val == noreg) { + __ store_heap_oop(Address(rdx, 0), NULL_WORD); + } else { + __ store_heap_oop(Address(rdx, 0), val); + __ g1_write_barrier_post(rdx, val, r8, rbx); + } + + } + break; +#endif // SERIALGC + case BarrierSet::CardTableModRef: + case BarrierSet::CardTableExtension: + { + if (val == noreg) { + __ store_heap_oop(obj, NULL_WORD); + } else { + __ store_heap_oop(obj, val); + // flatten object address if needed + if (!precise || (obj.index() == noreg && obj.disp() == 0)) { + __ store_check(obj.base()); + } else { + __ leaq(rdx, obj); + __ store_check(rdx); + } + } + } + break; + case BarrierSet::ModRef: + case BarrierSet::Other: + if (val == noreg) { + __ store_heap_oop(obj, NULL_WORD); + } else { + __ store_heap_oop(obj, val); + } + break; + default : + ShouldNotReachHere(); + + } +} Address TemplateTable::at_bcp(int offset) { assert(_desc->uses_bcp(), "inconsistent uses_bcp information"); @@ -560,8 +623,8 @@ // rdx: array index_check(rdx, rax); // kills rbx __ load_heap_oop(rax, Address(rdx, rax, - UseCompressedOops ? Address::times_4 : Address::times_8, - arrayOopDesc::base_offset_in_bytes(T_OBJECT))); + UseCompressedOops ? Address::times_4 : Address::times_8, + arrayOopDesc::base_offset_in_bytes(T_OBJECT))); } void TemplateTable::baload() { @@ -866,6 +929,11 @@ __ movptr(rax, at_tos()); // value __ movl(rcx, at_tos_p1()); // index __ movptr(rdx, at_tos_p2()); // array + + Address element_address(rdx, rcx, + UseCompressedOops? Address::times_4 : Address::times_8, + arrayOopDesc::base_offset_in_bytes(T_OBJECT)); + index_check(rdx, rcx); // kills rbx // do array store check - check for NULL value first __ testptr(rax, rax); @@ -879,9 +947,7 @@ sizeof(oopDesc) + objArrayKlass::element_klass_offset_in_bytes())); // Compress array + index*oopSize + 12 into a single register. Frees rcx. - __ lea(rdx, Address(rdx, rcx, - UseCompressedOops ? Address::times_4 : Address::times_8, - arrayOopDesc::base_offset_in_bytes(T_OBJECT))); + __ lea(rdx, element_address); // Generate subtype check. Blows rcx, rdi // Superklass in rax. Subklass in rbx. @@ -893,18 +959,19 @@ // Come here on success __ bind(ok_is_subtype); - __ movptr(rax, at_tos()); // Value - __ store_heap_oop(Address(rdx, 0), rax); - __ store_check(rdx); + + // Get the value we will store + __ movptr(rax, at_tos()); + // Now store using the appropriate barrier + do_oop_store(_masm, Address(rdx, 0), rax, _bs->kind(), true); __ jmp(done); // Have a NULL in rax, rdx=array, ecx=index. Store NULL at ary[idx] __ bind(is_null); __ profile_null_seen(rbx); - __ store_heap_oop(Address(rdx, rcx, - UseCompressedOops ? Address::times_4 : Address::times_8, - arrayOopDesc::base_offset_in_bytes(T_OBJECT)), - rax); + + // Store a NULL + do_oop_store(_masm, element_address, noreg, _bs->kind(), true); // Pop stack arguments __ bind(done); @@ -2396,8 +2463,10 @@ // atos __ pop(atos); if (!is_static) pop_and_check_object(obj); - __ store_heap_oop(field, rax); - __ store_check(obj, field); // Need to mark card + + // Store into the field + do_oop_store(_masm, field, rax, _bs->kind(), false); + if (!is_static) { patch_bytecode(Bytecodes::_fast_aputfield, bc, rbx); } @@ -2584,8 +2653,7 @@ // access field switch (bytecode()) { case Bytecodes::_fast_aputfield: - __ store_heap_oop(field, rax); - __ store_check(rcx, field); + do_oop_store(_masm, field, rax, _bs->kind(), false); break; case Bytecodes::_fast_lputfield: __ movq(field, rax); @@ -3044,8 +3112,6 @@ Label initialize_header; Label initialize_object; // including clearing the fields Label allocate_shared; - ExternalAddress top((address)Universe::heap()->top_addr()); - ExternalAddress end((address)Universe::heap()->end_addr()); __ get_cpool_and_tags(rsi, rax); // get instanceKlass @@ -3106,6 +3172,9 @@ if (allow_shared_alloc) { __ bind(allocate_shared); + ExternalAddress top((address)Universe::heap()->top_addr()); + ExternalAddress end((address)Universe::heap()->end_addr()); + const Register RtopAddr = rscratch1; const Register RendAddr = rscratch2;
--- a/hotspot/src/os/linux/vm/os_linux.cpp Wed Oct 01 16:57:19 2008 -0700 +++ b/hotspot/src/os/linux/vm/os_linux.cpp Wed Oct 01 16:57:40 2008 -0700 @@ -1261,6 +1261,17 @@ return (1000 * 1000); } +// For now, we say that linux does not support vtime. I have no idea +// whether it can actually be made to (DLD, 9/13/05). + +bool os::supports_vtime() { return false; } +bool os::enable_vtime() { return false; } +bool os::vtime_enabled() { return false; } +double os::elapsedVTime() { + // better than nothing, but not much + return elapsedTime(); +} + jlong os::javaTimeMillis() { timeval time; int status = gettimeofday(&time, NULL);
--- a/hotspot/src/os/solaris/vm/os_solaris.cpp Wed Oct 01 16:57:19 2008 -0700 +++ b/hotspot/src/os/solaris/vm/os_solaris.cpp Wed Oct 01 16:57:40 2008 -0700 @@ -1691,6 +1691,40 @@ } } +bool os::supports_vtime() { return true; } + +bool os::enable_vtime() { + int fd = open("/proc/self/ctl", O_WRONLY); + if (fd == -1) + return false; + + long cmd[] = { PCSET, PR_MSACCT }; + int res = write(fd, cmd, sizeof(long) * 2); + close(fd); + if (res != sizeof(long) * 2) + return false; + + return true; +} + +bool os::vtime_enabled() { + int fd = open("/proc/self/status", O_RDONLY); + if (fd == -1) + return false; + + pstatus_t status; + int res = read(fd, (void*) &status, sizeof(pstatus_t)); + close(fd); + if (res != sizeof(pstatus_t)) + return false; + + return status.pr_flags & PR_MSACCT; +} + +double os::elapsedVTime() { + return (double)gethrvtime() / (double)hrtime_hz; +} + // Used internally for comparisons only // getTimeMillis guaranteed to not move backwards on Solaris jlong getTimeMillis() { @@ -2688,7 +2722,7 @@ return bottom; } -// Detect the topology change. Typically happens during CPU pluggin-unplugging. +// Detect the topology change. Typically happens during CPU plugging-unplugging. bool os::numa_topology_changed() { int is_stale = Solaris::lgrp_cookie_stale(Solaris::lgrp_cookie()); if (is_stale != -1 && is_stale) {
--- a/hotspot/src/os/windows/vm/os_windows.cpp Wed Oct 01 16:57:19 2008 -0700 +++ b/hotspot/src/os/windows/vm/os_windows.cpp Wed Oct 01 16:57:40 2008 -0700 @@ -737,6 +737,17 @@ return result; } +// For now, we say that Windows does not support vtime. I have no idea +// whether it can actually be made to (DLD, 9/13/05). + +bool os::supports_vtime() { return false; } +bool os::enable_vtime() { return false; } +bool os::vtime_enabled() { return false; } +double os::elapsedVTime() { + // better than nothing, but not much + return elapsedTime(); +} + jlong os::javaTimeMillis() { if (UseFakeTimers) { return fake_time++;
--- a/hotspot/src/share/vm/adlc/formssel.cpp Wed Oct 01 16:57:19 2008 -0700 +++ b/hotspot/src/share/vm/adlc/formssel.cpp Wed Oct 01 16:57:40 2008 -0700 @@ -3768,6 +3768,10 @@ int MatchRule::is_ideal_copy() const { if( _rChild ) { const char *opType = _rChild->_opType; +#if 1 + if( strcmp(opType,"CastIP")==0 ) + return 1; +#else if( strcmp(opType,"CastII")==0 ) return 1; // Do not treat *CastPP this way, because it @@ -3787,6 +3791,7 @@ // return 1; //if( strcmp(opType,"CastP2X")==0 ) // return 1; +#endif } if( is_chain_rule(_AD.globalNames()) && _lChild && strncmp(_lChild->_opType,"stackSlot",9)==0 )
--- a/hotspot/src/share/vm/asm/assembler.cpp Wed Oct 01 16:57:19 2008 -0700 +++ b/hotspot/src/share/vm/asm/assembler.cpp Wed Oct 01 16:57:40 2008 -0700 @@ -249,8 +249,6 @@ bool MacroAssembler::needs_explicit_null_check(intptr_t offset) { // Exception handler checks the nmethod's implicit null checks table // only when this method returns false. -#ifndef SPARC - // Sparc does not have based addressing if (UseCompressedOops) { // The first page after heap_base is unmapped and // the 'offset' is equal to [heap_base + offset] for @@ -261,7 +259,6 @@ offset = (intptr_t)(pointer_delta((void*)offset, (void*)heap_base, 1)); } } -#endif // SPARC return offset < 0 || os::vm_page_size() <= offset; }
--- a/hotspot/src/share/vm/c1/c1_CodeStubs.hpp Wed Oct 01 16:57:19 2008 -0700 +++ b/hotspot/src/share/vm/c1/c1_CodeStubs.hpp Wed Oct 01 16:57:40 2008 -0700 @@ -482,3 +482,81 @@ virtual void print_name(outputStream* out) const { out->print("ArrayCopyStub"); } #endif // PRODUCT }; + +////////////////////////////////////////////////////////////////////////////////////////// +#ifndef SERIALGC + +// Code stubs for Garbage-First barriers. +class G1PreBarrierStub: public CodeStub { + private: + LIR_Opr _addr; + LIR_Opr _pre_val; + LIR_PatchCode _patch_code; + CodeEmitInfo* _info; + + public: + // pre_val (a temporary register) must be a register; + // addr (the address of the field to be read) must be a LIR_Address + G1PreBarrierStub(LIR_Opr addr, LIR_Opr pre_val, LIR_PatchCode patch_code, CodeEmitInfo* info) : + _addr(addr), _pre_val(pre_val), _patch_code(patch_code), _info(info) + { + assert(_pre_val->is_register(), "should be temporary register"); + assert(_addr->is_address(), "should be the address of the field"); + } + + LIR_Opr addr() const { return _addr; } + LIR_Opr pre_val() const { return _pre_val; } + LIR_PatchCode patch_code() const { return _patch_code; } + CodeEmitInfo* info() const { return _info; } + + virtual void emit_code(LIR_Assembler* e); + virtual void visit(LIR_OpVisitState* visitor) { + // don't pass in the code emit info since it's processed in the fast + // path + if (_info != NULL) + visitor->do_slow_case(_info); + else + visitor->do_slow_case(); + visitor->do_input(_addr); + visitor->do_temp(_pre_val); + } +#ifndef PRODUCT + virtual void print_name(outputStream* out) const { out->print("G1PreBarrierStub"); } +#endif // PRODUCT +}; + +class G1PostBarrierStub: public CodeStub { + private: + LIR_Opr _addr; + LIR_Opr _new_val; + + static jbyte* _byte_map_base; + static jbyte* byte_map_base_slow(); + static jbyte* byte_map_base() { + if (_byte_map_base == NULL) { + _byte_map_base = byte_map_base_slow(); + } + return _byte_map_base; + } + + public: + // addr (the address of the object head) and new_val must be registers. + G1PostBarrierStub(LIR_Opr addr, LIR_Opr new_val): _addr(addr), _new_val(new_val) { } + + LIR_Opr addr() const { return _addr; } + LIR_Opr new_val() const { return _new_val; } + + virtual void emit_code(LIR_Assembler* e); + virtual void visit(LIR_OpVisitState* visitor) { + // don't pass in the code emit info since it's processed in the fast path + visitor->do_slow_case(); + visitor->do_input(_addr); + visitor->do_input(_new_val); + } +#ifndef PRODUCT + virtual void print_name(outputStream* out) const { out->print("G1PostBarrierStub"); } +#endif // PRODUCT +}; + +#endif // SERIALGC +//////////////////////////////////////////////////////////////////////////////////////////
--- a/hotspot/src/share/vm/c1/c1_LIRAssembler.cpp Wed Oct 01 16:57:19 2008 -0700 +++ b/hotspot/src/share/vm/c1/c1_LIRAssembler.cpp Wed Oct 01 16:57:40 2008 -0700 @@ -74,6 +74,7 @@ LIR_Assembler::LIR_Assembler(Compilation* c): _compilation(c) , _masm(c->masm()) + , _bs(Universe::heap()->barrier_set()) , _frame_map(c->frame_map()) , _current_block(NULL) , _pending_non_safepoint(NULL)
--- a/hotspot/src/share/vm/c1/c1_LIRAssembler.hpp Wed Oct 01 16:57:19 2008 -0700 +++ b/hotspot/src/share/vm/c1/c1_LIRAssembler.hpp Wed Oct 01 16:57:40 2008 -0700 @@ -24,11 +24,13 @@ class Compilation; class ScopeValue; +class BarrierSet; class LIR_Assembler: public CompilationResourceObj { private: C1_MacroAssembler* _masm; CodeStubList* _slow_case_stubs; + BarrierSet* _bs; Compilation* _compilation; FrameMap* _frame_map;
--- a/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp Wed Oct 01 16:57:19 2008 -0700 +++ b/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp Wed Oct 01 16:57:40 2008 -0700 @@ -285,16 +285,7 @@ void LIRGenerator::init() { - BarrierSet* bs = Universe::heap()->barrier_set(); - assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); - CardTableModRefBS* ct = (CardTableModRefBS*)bs; - assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); - -#ifdef _LP64 - _card_table_base = new LIR_Const((jlong)ct->byte_map_base); -#else - _card_table_base = new LIR_Const((jint)ct->byte_map_base); -#endif + _bs = Universe::heap()->barrier_set(); } @@ -1239,8 +1230,37 @@ // Various barriers +void LIRGenerator::pre_barrier(LIR_Opr addr_opr, bool patch, CodeEmitInfo* info) { + // Do the pre-write barrier, if any. + switch (_bs->kind()) { +#ifndef SERIALGC + case BarrierSet::G1SATBCT: + case BarrierSet::G1SATBCTLogging: + G1SATBCardTableModRef_pre_barrier(addr_opr, patch, info); + break; +#endif // SERIALGC + case BarrierSet::CardTableModRef: + case BarrierSet::CardTableExtension: + // No pre barriers + break; + case BarrierSet::ModRef: + case BarrierSet::Other: + // No pre barriers + break; + default : + ShouldNotReachHere(); + + } +} + void LIRGenerator::post_barrier(LIR_OprDesc* addr, LIR_OprDesc* new_val) { - switch (Universe::heap()->barrier_set()->kind()) { + switch (_bs->kind()) { +#ifndef SERIALGC + case BarrierSet::G1SATBCT: + case BarrierSet::G1SATBCTLogging: + G1SATBCardTableModRef_post_barrier(addr, new_val); + break; +#endif // SERIALGC case BarrierSet::CardTableModRef: case BarrierSet::CardTableExtension: CardTableModRef_post_barrier(addr, new_val); @@ -1254,11 +1274,120 @@ } } +//////////////////////////////////////////////////////////////////////// +#ifndef SERIALGC + +void LIRGenerator::G1SATBCardTableModRef_pre_barrier(LIR_Opr addr_opr, bool patch, CodeEmitInfo* info) { + if (G1DisablePreBarrier) return; + + // First we test whether marking is in progress. + BasicType flag_type; + if (in_bytes(PtrQueue::byte_width_of_active()) == 4) { + flag_type = T_INT; + } else { + guarantee(in_bytes(PtrQueue::byte_width_of_active()) == 1, + "Assumption"); + flag_type = T_BYTE; + } + LIR_Opr thrd = getThreadPointer(); + LIR_Address* mark_active_flag_addr = + new LIR_Address(thrd, + in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_active()), + flag_type); + // Read the marking-in-progress flag. + LIR_Opr flag_val = new_register(T_INT); + __ load(mark_active_flag_addr, flag_val); + + LabelObj* start_store = new LabelObj(); + + LIR_PatchCode pre_val_patch_code = + patch ? lir_patch_normal : lir_patch_none; + + LIR_Opr pre_val = new_register(T_OBJECT); + + __ cmp(lir_cond_notEqual, flag_val, LIR_OprFact::intConst(0)); + if (!addr_opr->is_address()) { + assert(addr_opr->is_register(), "must be"); + addr_opr = LIR_OprFact::address(new LIR_Address(addr_opr, 0, T_OBJECT)); + } + CodeStub* slow = new G1PreBarrierStub(addr_opr, pre_val, pre_val_patch_code, + info); + __ branch(lir_cond_notEqual, T_INT, slow); + __ branch_destination(slow->continuation()); +} + +void LIRGenerator::G1SATBCardTableModRef_post_barrier(LIR_OprDesc* addr, LIR_OprDesc* new_val) { + if (G1DisablePostBarrier) return; + + // If the "new_val" is a constant NULL, no barrier is necessary. + if (new_val->is_constant() && + new_val->as_constant_ptr()->as_jobject() == NULL) return; + + if (!new_val->is_register()) { + LIR_Opr new_val_reg = new_pointer_register(); + if (new_val->is_constant()) { + __ move(new_val, new_val_reg); + } else { + __ leal(new_val, new_val_reg); + } + new_val = new_val_reg; + } + assert(new_val->is_register(), "must be a register at this point"); + + if (addr->is_address()) { + LIR_Address* address = addr->as_address_ptr(); + LIR_Opr ptr = new_pointer_register(); + if (!address->index()->is_valid() && address->disp() == 0) { + __ move(address->base(), ptr); + } else { + assert(address->disp() != max_jint, "lea doesn't support patched addresses!"); + __ leal(addr, ptr); + } + addr = ptr; + } + assert(addr->is_register(), "must be a register at this point"); + + LIR_Opr xor_res = new_pointer_register(); + LIR_Opr xor_shift_res = new_pointer_register(); + + if (TwoOperandLIRForm ) { + __ move(addr, xor_res); + __ logical_xor(xor_res, new_val, xor_res); + __ move(xor_res, xor_shift_res); + __ unsigned_shift_right(xor_shift_res, + LIR_OprFact::intConst(HeapRegion::LogOfHRGrainBytes), + xor_shift_res, + LIR_OprDesc::illegalOpr()); + } else { + __ logical_xor(addr, new_val, xor_res); + __ unsigned_shift_right(xor_res, + LIR_OprFact::intConst(HeapRegion::LogOfHRGrainBytes), + xor_shift_res, + LIR_OprDesc::illegalOpr()); + } + + if (!new_val->is_register()) { + LIR_Opr new_val_reg = new_pointer_register(); + __ leal(new_val, new_val_reg); + new_val = new_val_reg; + } + assert(new_val->is_register(), "must be a register at this point"); + + __ cmp(lir_cond_notEqual, xor_shift_res, LIR_OprFact::intptrConst(NULL_WORD)); + + CodeStub* slow = new G1PostBarrierStub(addr, new_val); + __ branch(lir_cond_notEqual, T_INT, slow); + __ branch_destination(slow->continuation()); +} + +#endif // SERIALGC +//////////////////////////////////////////////////////////////////////// + void LIRGenerator::CardTableModRef_post_barrier(LIR_OprDesc* addr, LIR_OprDesc* new_val) { - BarrierSet* bs = Universe::heap()->barrier_set(); - assert(sizeof(*((CardTableModRefBS*)bs)->byte_map_base) == sizeof(jbyte), "adjust this code"); - LIR_Const* card_table_base = new LIR_Const(((CardTableModRefBS*)bs)->byte_map_base); + assert(sizeof(*((CardTableModRefBS*)_bs)->byte_map_base) == sizeof(jbyte), "adjust this code"); + LIR_Const* card_table_base = new LIR_Const(((CardTableModRefBS*)_bs)->byte_map_base); if (addr->is_address()) { LIR_Address* address = addr->as_address_ptr(); LIR_Opr ptr = new_register(T_OBJECT); @@ -1388,6 +1517,13 @@ __ membar_release(); } + if (is_oop) { + // Do the pre-write barrier, if any. + pre_barrier(LIR_OprFact::address(address), + needs_patching, + (info ? new CodeEmitInfo(info) : NULL)); + } + if (is_volatile) { assert(!needs_patching && x->is_loaded(), "how do we know it's volatile if it's not loaded"); @@ -1398,7 +1534,12 @@ } if (is_oop) { +#ifdef PRECISE_CARDMARK + // Precise cardmarks don't work + post_barrier(LIR_OprFact::address(address), value.result()); +#else post_barrier(object.result(), value.result()); +#endif // PRECISE_CARDMARK } if (is_volatile && os::is_MP()) {
--- a/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp Wed Oct 01 16:57:19 2008 -0700 +++ b/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp Wed Oct 01 16:57:40 2008 -0700 @@ -145,6 +145,7 @@ // only the classes below belong in the same file class LIRGenerator: public InstructionVisitor, public BlockClosure { + private: Compilation* _compilation; ciMethod* _method; // method that we are compiling @@ -154,6 +155,7 @@ Values _instruction_for_operand; BitMap2D _vreg_flags; // flags which can be set on a per-vreg basis LIR_List* _lir; + BarrierSet* _bs; LIRGenerator* gen() { return this; @@ -174,8 +176,6 @@ LIR_OprList _reg_for_constants; Values _unpinned_constants; - LIR_Const* _card_table_base; - friend class PhiResolver; // unified bailout support @@ -196,8 +196,6 @@ LIR_Opr load_constant(Constant* x); LIR_Opr load_constant(LIR_Const* constant); - LIR_Const* card_table_base() const { return _card_table_base; } - void set_result(Value x, LIR_Opr opr) { assert(opr->is_valid(), "must set to valid value"); assert(x->operand()->is_illegal(), "operand should never change"); @@ -253,12 +251,17 @@ // generic interface + void pre_barrier(LIR_Opr addr_opr, bool patch, CodeEmitInfo* info); void post_barrier(LIR_OprDesc* addr, LIR_OprDesc* new_val); // specific implementations + // pre barriers + + void G1SATBCardTableModRef_pre_barrier(LIR_Opr addr_opr, bool patch, CodeEmitInfo* info); // post barriers + void G1SATBCardTableModRef_post_barrier(LIR_OprDesc* addr, LIR_OprDesc* new_val); void CardTableModRef_post_barrier(LIR_OprDesc* addr, LIR_OprDesc* new_val);
--- a/hotspot/src/share/vm/c1/c1_Runtime1.cpp Wed Oct 01 16:57:19 2008 -0700 +++ b/hotspot/src/share/vm/c1/c1_Runtime1.cpp Wed Oct 01 16:57:40 2008 -0700 @@ -168,6 +168,8 @@ switch (id) { // These stubs don't need to have an oopmap case dtrace_object_alloc_id: + case g1_pre_barrier_slow_id: + case g1_post_barrier_slow_id: case slow_subtype_check_id: case fpu2long_stub_id: case unwind_exception_id:
--- a/hotspot/src/share/vm/c1/c1_Runtime1.hpp Wed Oct 01 16:57:19 2008 -0700 +++ b/hotspot/src/share/vm/c1/c1_Runtime1.hpp Wed Oct 01 16:57:40 2008 -0700 @@ -56,6 +56,8 @@ stub(access_field_patching) \ stub(load_klass_patching) \ stub(jvmti_exception_throw) \ + stub(g1_pre_barrier_slow) \ + stub(g1_post_barrier_slow) \ stub(fpu2long_stub) \ stub(counter_overflow) \ last_entry(number_of_ids)
--- a/hotspot/src/share/vm/c1/c1_globals.hpp Wed Oct 01 16:57:19 2008 -0700 +++ b/hotspot/src/share/vm/c1/c1_globals.hpp Wed Oct 01 16:57:40 2008 -0700 @@ -213,9 +213,6 @@ develop(bool, UseFastLocking, true, \ "Use fast inlined locking code") \ \ - product(bool, FastTLABRefill, true, \ - "Use fast TLAB refill code") \ - \ develop(bool, UseSlowPath, false, \ "For debugging: test slow cases by always using them") \ \
--- a/hotspot/src/share/vm/ci/ciMethodBlocks.cpp Wed Oct 01 16:57:19 2008 -0700 +++ b/hotspot/src/share/vm/ci/ciMethodBlocks.cpp Wed Oct 01 16:57:40 2008 -0700 @@ -49,7 +49,7 @@ // first half. Returns the range beginning at bci. ciBlock *ciMethodBlocks::split_block_at(int bci) { ciBlock *former_block = block_containing(bci); - ciBlock *new_block = new(_arena) ciBlock(_method, _num_blocks++, this, former_block->start_bci()); + ciBlock *new_block = new(_arena) ciBlock(_method, _num_blocks++, former_block->start_bci()); _blocks->append(new_block); assert(former_block != NULL, "must not be NULL"); new_block->set_limit_bci(bci); @@ -83,7 +83,7 @@ if (cb == NULL ) { // This is our first time visiting this bytecode. Create // a fresh block and assign it this starting point. - ciBlock *nb = new(_arena) ciBlock(_method, _num_blocks++, this, bci); + ciBlock *nb = new(_arena) ciBlock(_method, _num_blocks++, bci); _blocks->append(nb); _bci_to_block[bci] = nb; return nb; @@ -98,6 +98,11 @@ } } +ciBlock *ciMethodBlocks::make_dummy_block() { + ciBlock *dum = new(_arena) ciBlock(_method, -1, 0); + return dum; +} + void ciMethodBlocks::do_analysis() { ciBytecodeStream s(_method); ciBlock *cur_block = block_containing(0); @@ -253,7 +258,7 @@ Copy::zero_to_words((HeapWord*) _bci_to_block, b2bsize / sizeof(HeapWord)); // create initial block covering the entire method - ciBlock *b = new(arena) ciBlock(_method, _num_blocks++, this, 0); + ciBlock *b = new(arena) ciBlock(_method, _num_blocks++, 0); _blocks->append(b); _bci_to_block[0] = b; @@ -334,7 +339,7 @@ #endif -ciBlock::ciBlock(ciMethod *method, int index, ciMethodBlocks *mb, int start_bci) : +ciBlock::ciBlock(ciMethod *method, int index, int start_bci) : #ifndef PRODUCT _method(method), #endif
--- a/hotspot/src/share/vm/ci/ciMethodBlocks.hpp Wed Oct 01 16:57:19 2008 -0700 +++ b/hotspot/src/share/vm/ci/ciMethodBlocks.hpp Wed Oct 01 16:57:40 2008 -0700 @@ -48,6 +48,8 @@ int num_blocks() { return _num_blocks;} void clear_processed(); + ciBlock *make_dummy_block(); // a block not associated with a bci + #ifndef PRODUCT void dump(); #endif @@ -81,7 +83,7 @@ fall_through_bci = -1 }; - ciBlock(ciMethod *method, int index, ciMethodBlocks *mb, int start_bci); + ciBlock(ciMethod *method, int index, int start_bci); int start_bci() const { return _start_bci; } int limit_bci() const { return _limit_bci; } int control_bci() const { return _control_bci; } @@ -94,7 +96,6 @@ int ex_limit_bci() const { return _ex_limit_bci; } bool contains(int bci) const { return start_bci() <= bci && bci < limit_bci(); } - // flag handling bool processed() const { return (_flags & Processed) != 0; } bool is_handler() const { return (_flags & Handler) != 0; }
--- a/hotspot/src/share/vm/ci/ciTypeFlow.cpp Wed Oct 01 16:57:19 2008 -0700 +++ b/hotspot/src/share/vm/ci/ciTypeFlow.cpp Wed Oct 01 16:57:40 2008 -0700 @@ -338,8 +338,10 @@ } _trap_bci = -1; _trap_index = 0; + _def_locals.clear(); } + // ------------------------------------------------------------------ // ciTypeFlow::get_start_state // @@ -735,7 +737,7 @@ void ciTypeFlow::StateVector::do_new(ciBytecodeStream* str) { bool will_link; ciKlass* klass = str->get_klass(will_link); - if (!will_link) { + if (!will_link || str->is_unresolved_klass()) { trap(str, klass, str->get_klass_index()); } else { push_object(klass); @@ -1268,7 +1270,9 @@ } case Bytecodes::_iinc: { - check_int(local(str->get_index())); + int lnum = str->get_index(); + check_int(local(lnum)); + store_to_local(lnum); break; } case Bytecodes::_iload: load_local_int(str->get_index()); break; @@ -1506,6 +1510,46 @@ } #endif + +// ------------------------------------------------------------------ +// ciTypeFlow::SuccIter::next +// +void ciTypeFlow::SuccIter::next() { + int succ_ct = _pred->successors()->length(); + int next = _index + 1; + if (next < succ_ct) { + _index = next; + _succ = _pred->successors()->at(next); + return; + } + for (int i = next - succ_ct; i < _pred->exceptions()->length(); i++) { + // Do not compile any code for unloaded exception types. + // Following compiler passes are responsible for doing this also. + ciInstanceKlass* exception_klass = _pred->exc_klasses()->at(i); + if (exception_klass->is_loaded()) { + _index = next; + _succ = _pred->exceptions()->at(i); + return; + } + next++; + } + _index = -1; + _succ = NULL; +} + +// ------------------------------------------------------------------ +// ciTypeFlow::SuccIter::set_succ +// +void ciTypeFlow::SuccIter::set_succ(Block* succ) { + int succ_ct = _pred->successors()->length(); + if (_index < succ_ct) { + _pred->successors()->at_put(_index, succ); + } else { + int idx = _index - succ_ct; + _pred->exceptions()->at_put(idx, succ); + } +} + // ciTypeFlow::Block // // A basic block. @@ -1526,10 +1570,11 @@ _jsrs = new_jsrs; _next = NULL; _on_work_list = false; - _pre_order = -1; assert(!has_pre_order(), ""); - _private_copy = false; + _backedge_copy = false; + _exception_entry = false; _trap_bci = -1; _trap_index = 0; + df_init(); if (CITraceTypeFlow) { tty->print_cr(">> Created new block"); @@ -1541,55 +1586,13 @@ } // ------------------------------------------------------------------ -// ciTypeFlow::Block::clone_loop_head -// -ciTypeFlow::Block* -ciTypeFlow::Block::clone_loop_head(ciTypeFlow* analyzer, - int branch_bci, - ciTypeFlow::Block* target, - ciTypeFlow::JsrSet* jsrs) { - // Loop optimizations are not performed on Tier1 compiles. Do nothing. - if (analyzer->env()->comp_level() < CompLevel_full_optimization) { - return target; - } - - // The current block ends with a branch. - // - // If the target block appears to be the test-clause of a for loop, and - // it is not too large, and it has not yet been cloned, clone it. - // The pre-existing copy becomes the private clone used only by - // the initial iteration of the loop. (We know we are simulating - // the initial iteration right now, since we have never calculated - // successors before for this block.) - - if (branch_bci <= start() - && (target->limit() - target->start()) <= CICloneLoopTestLimit - && target->private_copy_count() == 0) { - // Setting the private_copy bit ensures that the target block cannot be - // reached by any other paths, such as fall-in from the loop body. - // The private copy will be accessible only on successor lists - // created up to this point. - target->set_private_copy(true); - if (CITraceTypeFlow) { - tty->print(">> Cloning a test-clause block "); - print_value_on(tty); - tty->cr(); - } - // If the target is the current block, then later on a new copy of the - // target block will be created when its bytecodes are reached by - // an alternate path. (This is the case for loops with the loop - // head at the bci-wise bottom of the loop, as with pre-1.4.2 javac.) - // - // Otherwise, duplicate the target block now and use it immediately. - // (The case for loops with the loop head at the bci-wise top of the - // loop, as with 1.4.2 javac.) - // - // In either case, the new copy of the block will remain public. - if (target != this) { - target = analyzer->block_at(branch_bci, jsrs); - } - } - return target; +// ciTypeFlow::Block::df_init +void ciTypeFlow::Block::df_init() { + _pre_order = -1; assert(!has_pre_order(), ""); + _post_order = -1; assert(!has_post_order(), ""); + _loop = NULL; + _irreducible_entry = false; + _rpo_next = NULL; } // ------------------------------------------------------------------ @@ -1644,7 +1647,6 @@ case Bytecodes::_ifnull: case Bytecodes::_ifnonnull: // Our successors are the branch target and the next bci. branch_bci = str->get_dest(); - clone_loop_head(analyzer, branch_bci, this, jsrs); _successors = new (arena) GrowableArray<Block*>(arena, 2, 0, NULL); assert(_successors->length() == IF_NOT_TAKEN, ""); @@ -1658,14 +1660,7 @@ _successors = new (arena) GrowableArray<Block*>(arena, 1, 0, NULL); assert(_successors->length() == GOTO_TARGET, ""); - target = analyzer->block_at(branch_bci, jsrs); - // If the target block has not been visited yet, and looks like - // a two-way branch, attempt to clone it if it is a loop head. - if (target->_successors != NULL - && target->_successors->length() == (IF_TAKEN + 1)) { - target = clone_loop_head(analyzer, branch_bci, target, jsrs); - } - _successors->append(target); + _successors->append(analyzer->block_at(branch_bci, jsrs)); break; case Bytecodes::_jsr: @@ -1801,65 +1796,60 @@ } // ------------------------------------------------------------------ -// ciTypeFlow::Block::is_simpler_than -// -// A relation used to order our work list. We work on a block earlier -// if it has a smaller jsr stack or it occurs earlier in the program -// text. -// -// Note: maybe we should redo this functionality to make blocks -// which correspond to exceptions lower priority. -bool ciTypeFlow::Block::is_simpler_than(ciTypeFlow::Block* other) { - if (other == NULL) { - return true; - } else { - int size1 = _jsrs->size(); - int size2 = other->_jsrs->size(); - if (size1 < size2) { - return true; - } else if (size2 < size1) { - return false; - } else { -#if 0 - if (size1 > 0) { - int r1 = _jsrs->record_at(0)->return_address(); - int r2 = _jsrs->record_at(0)->return_address(); - if (r1 < r2) { - return true; - } else if (r2 < r1) { - return false; - } else { - int e1 = _jsrs->record_at(0)->return_address(); - int e2 = _jsrs->record_at(0)->return_address(); - if (e1 < e2) { - return true; - } else if (e2 < e1) { - return false; - } - } - } -#endif - return (start() <= other->start()); - } - } +// ciTypeFlow::Block::set_backedge_copy +// Use this only to make a pre-existing public block into a backedge copy. +void ciTypeFlow::Block::set_backedge_copy(bool z) { + assert(z || (z == is_backedge_copy()), "cannot make a backedge copy public"); + _backedge_copy = z; } // ------------------------------------------------------------------ -// ciTypeFlow::Block::set_private_copy -// Use this only to make a pre-existing public block into a private copy. -void ciTypeFlow::Block::set_private_copy(bool z) { - assert(z || (z == is_private_copy()), "cannot make a private copy public"); - _private_copy = z; +// ciTypeFlow::Block::is_clonable_exit +// +// At most 2 normal successors, one of which continues looping, +// and all exceptional successors must exit. +bool ciTypeFlow::Block::is_clonable_exit(ciTypeFlow::Loop* lp) { + int normal_cnt = 0; + int in_loop_cnt = 0; + for (SuccIter iter(this); !iter.done(); iter.next()) { + Block* succ = iter.succ(); + if (iter.is_normal_ctrl()) { + if (++normal_cnt > 2) return false; + if (lp->contains(succ->loop())) { + if (++in_loop_cnt > 1) return false; + } + } else { + if (lp->contains(succ->loop())) return false; + } + } + return in_loop_cnt == 1; +} + +// ------------------------------------------------------------------ +// ciTypeFlow::Block::looping_succ +// +ciTypeFlow::Block* ciTypeFlow::Block::looping_succ(ciTypeFlow::Loop* lp) { + assert(successors()->length() <= 2, "at most 2 normal successors"); + for (SuccIter iter(this); !iter.done(); iter.next()) { + Block* succ = iter.succ(); + if (lp->contains(succ->loop())) { + return succ; + } + } + return NULL; } #ifndef PRODUCT // ------------------------------------------------------------------ // ciTypeFlow::Block::print_value_on void ciTypeFlow::Block::print_value_on(outputStream* st) const { - if (has_pre_order()) st->print("#%-2d ", pre_order()); + if (has_pre_order()) st->print("#%-2d ", pre_order()); + if (has_rpo()) st->print("rpo#%-2d ", rpo()); st->print("[%d - %d)", start(), limit()); + if (is_loop_head()) st->print(" lphd"); + if (is_irreducible_entry()) st->print(" irred"); if (_jsrs->size() > 0) { st->print("/"); _jsrs->print_on(st); } - if (is_private_copy()) st->print("/private_copy"); + if (is_backedge_copy()) st->print("/backedge_copy"); } // ------------------------------------------------------------------ @@ -1871,6 +1861,16 @@ st->print_cr(" ==================================================== "); st->print (" "); print_value_on(st); + st->print(" Stored locals: "); def_locals()->print_on(st, outer()->method()->max_locals()); tty->cr(); + if (loop() && loop()->parent() != NULL) { + st->print(" loops:"); + Loop* lp = loop(); + do { + st->print(" %d<-%d", lp->head()->pre_order(),lp->tail()->pre_order()); + if (lp->is_irreducible()) st->print("(ir)"); + lp = lp->parent(); + } while (lp->parent() != NULL); + } st->cr(); _state->print_on(st); if (_successors == NULL) { @@ -1907,6 +1907,21 @@ } #endif +#ifndef PRODUCT +// ------------------------------------------------------------------ +// ciTypeFlow::LocalSet::print_on +void ciTypeFlow::LocalSet::print_on(outputStream* st, int limit) const { + st->print("{"); + for (int i = 0; i < max; i++) { + if (test(i)) st->print(" %d", i); + } + if (limit > max) { + st->print(" %d..%d ", max, limit); + } + st->print(" }"); +} +#endif + // ciTypeFlow // // This is a pass over the bytecodes which computes the following: @@ -1922,12 +1937,11 @@ _max_locals = method->max_locals(); _max_stack = method->max_stack(); _code_size = method->code_size(); + _has_irreducible_entry = false; _osr_bci = osr_bci; _failure_reason = NULL; assert(start_bci() >= 0 && start_bci() < code_size() , "correct osr_bci argument"); - _work_list = NULL; - _next_pre_order = 0; _ciblock_count = _methodBlocks->num_blocks(); _idx_to_blocklist = NEW_ARENA_ARRAY(arena(), GrowableArray<Block*>*, _ciblock_count); @@ -1949,12 +1963,6 @@ _work_list = next_block->next(); next_block->set_next(NULL); next_block->set_on_work_list(false); - if (!next_block->has_pre_order()) { - // Assign "pre_order" as each new block is taken from the work list. - // This number may be used by following phases to order block visits. - assert(!have_block_count(), "must not have mapped blocks yet") - next_block->set_pre_order(_next_pre_order++); - } return next_block; } @@ -1962,30 +1970,37 @@ // ciTypeFlow::add_to_work_list // // Add a basic block to our work list. +// List is sorted by decreasing postorder sort (same as increasing RPO) void ciTypeFlow::add_to_work_list(ciTypeFlow::Block* block) { assert(!block->is_on_work_list(), "must not already be on work list"); if (CITraceTypeFlow) { - tty->print(">> Adding block%s ", block->has_pre_order() ? " (again)" : ""); + tty->print(">> Adding block "); block->print_value_on(tty); tty->print_cr(" to the work list : "); } block->set_on_work_list(true); - if (block->is_simpler_than(_work_list)) { + + // decreasing post order sort + + Block* prev = NULL; + Block* current = _work_list; + int po = block->post_order(); + while (current != NULL) { + if (!current->has_post_order() || po > current->post_order()) + break; + prev = current; + current = current->next(); + } + if (prev == NULL) { block->set_next(_work_list); _work_list = block; } else { - Block *temp = _work_list; - while (!block->is_simpler_than(temp->next())) { - if (CITraceTypeFlow) { - tty->print("."); - } - temp = temp->next(); - } - block->set_next(temp->next()); - temp->set_next(block); + block->set_next(current); + prev->set_next(block); } + if (CITraceTypeFlow) { tty->cr(); } @@ -2008,7 +2023,7 @@ assert(ciblk->start_bci() == bci, "bad ciBlock boundaries"); Block* block = get_block_for(ciblk->index(), jsrs, option); - assert(block == NULL? (option == no_create): block->is_private_copy() == (option == create_private_copy), "create option consistent with result"); + assert(block == NULL? (option == no_create): block->is_backedge_copy() == (option == create_backedge_copy), "create option consistent with result"); if (CITraceTypeFlow) { if (block != NULL) { @@ -2072,8 +2087,9 @@ } if (block->meet_exception(exception_klass, state)) { - // Block was modified. Add it to the work list. - if (!block->is_on_work_list()) { + // Block was modified and has PO. Add it to the work list. + if (block->has_post_order() && + !block->is_on_work_list()) { add_to_work_list(block); } } @@ -2091,8 +2107,9 @@ for (int i = 0; i < len; i++) { Block* block = successors->at(i); if (block->meet(state)) { - // Block was modified. Add it to the work list. - if (!block->is_on_work_list()) { + // Block was modified and has PO. Add it to the work list. + if (block->has_post_order() && + !block->is_on_work_list()) { add_to_work_list(block); } } @@ -2133,6 +2150,111 @@ return true; } +// ------------------------------------------------------------------ +// ciTypeFlow::clone_loop_heads +// +// Clone the loop heads +bool ciTypeFlow::clone_loop_heads(Loop* lp, StateVector* temp_vector, JsrSet* temp_set) { + bool rslt = false; + for (PreorderLoops iter(loop_tree_root()); !iter.done(); iter.next()) { + lp = iter.current(); + Block* head = lp->head(); + if (lp == loop_tree_root() || + lp->is_irreducible() || + !head->is_clonable_exit(lp)) + continue; + + // check not already cloned + if (head->backedge_copy_count() != 0) + continue; + + // check _no_ shared head below us + Loop* ch; + for (ch = lp->child(); ch != NULL && ch->head() != head; ch = ch->sibling()); + if (ch != NULL) + continue; + + // Clone head + Block* new_head = head->looping_succ(lp); + Block* clone = clone_loop_head(lp, temp_vector, temp_set); + // Update lp's info + clone->set_loop(lp); + lp->set_head(new_head); + lp->set_tail(clone); + // And move original head into outer loop + head->set_loop(lp->parent()); + + rslt = true; + } + return rslt; +} + +// ------------------------------------------------------------------ +// ciTypeFlow::clone_loop_head +// +// Clone lp's head and replace tail's successors with clone. +// +// | +// v +// head <-> body +// | +// v +// exit +// +// new_head +// +// | +// v +// head ----------\ +// | | +// | v +// | clone <-> body +// | | +// | /--/ +// | | +// v v +// exit +// +ciTypeFlow::Block* ciTypeFlow::clone_loop_head(Loop* lp, StateVector* temp_vector, JsrSet* temp_set) { + Block* head = lp->head(); + Block* tail = lp->tail(); + if (CITraceTypeFlow) { + tty->print(">> Requesting clone of loop head "); head->print_value_on(tty); + tty->print(" for predecessor "); tail->print_value_on(tty); + tty->cr(); + } + Block* clone = block_at(head->start(), head->jsrs(), create_backedge_copy); + assert(clone->backedge_copy_count() == 1, "one backedge copy for all back edges"); + + assert(!clone->has_pre_order(), "just created"); + clone->set_next_pre_order(); + + // Insert clone after (orig) tail in reverse post order + clone->set_rpo_next(tail->rpo_next()); + tail->set_rpo_next(clone); + + // tail->head becomes tail->clone + for (SuccIter iter(tail); !iter.done(); iter.next()) { + if (iter.succ() == head) { + iter.set_succ(clone); + break; + } + } + flow_block(tail, temp_vector, temp_set); + if (head == tail) { + // For self-loops, clone->head becomes clone->clone + flow_block(clone, temp_vector, temp_set); + for (SuccIter iter(clone); !iter.done(); iter.next()) { + if (iter.succ() == head) { + iter.set_succ(clone); + break; + } + } + } + flow_block(clone, temp_vector, temp_set); + + return clone; +} // ------------------------------------------------------------------ // ciTypeFlow::flow_block @@ -2159,11 +2281,14 @@ // Grab the state from the current block. block->copy_state_into(state); + state->def_locals()->clear(); GrowableArray<Block*>* exceptions = block->exceptions(); GrowableArray<ciInstanceKlass*>* exc_klasses = block->exc_klasses(); bool has_exceptions = exceptions->length() > 0; + bool exceptions_used = false; + ciBytecodeStream str(method()); str.reset_to_bci(start); Bytecodes::Code code; @@ -2172,6 +2297,7 @@ // Check for exceptional control flow from this point. if (has_exceptions && can_trap(str)) { flow_exceptions(exceptions, exc_klasses, state); + exceptions_used = true; } // Apply the effects of the current bytecode to our state. bool res = state->apply_one_bytecode(&str); @@ -2189,9 +2315,14 @@ block->print_on(tty); } + // Save set of locals defined in this block + block->def_locals()->add(state->def_locals()); + // Record (no) successors. block->successors(&str, state, jsrs); + assert(!has_exceptions || exceptions_used, "Not removing exceptions"); + // Discontinue interpretation of this Block. return; } @@ -2202,6 +2333,7 @@ // Check for exceptional control flow from this point. if (has_exceptions && can_trap(str)) { flow_exceptions(exceptions, exc_klasses, state); + exceptions_used = true; } // Fix the JsrSet to reflect effect of the bytecode. @@ -2218,11 +2350,306 @@ successors = block->successors(&str, NULL, NULL); } + // Save set of locals defined in this block + block->def_locals()->add(state->def_locals()); + + // Remove untaken exception paths + if (!exceptions_used) + exceptions->clear(); + // Pass our state to successors. flow_successors(successors, state); } // ------------------------------------------------------------------ +// ciTypeFlow::PostOrderLoops::next +// +// Advance to next loop tree using a postorder, left-to-right traversal. +void ciTypeFlow::PostorderLoops::next() { + assert(!done(), "must not be done."); + if (_current->sibling() != NULL) { + _current = _current->sibling(); + while (_current->child() != NULL) { + _current = _current->child(); + } + } else { + _current = _current->parent(); + } +} + +// ------------------------------------------------------------------ +// ciTypeFlow::PreOrderLoops::next +// +// Advance to next loop tree using a preorder, left-to-right traversal. +void ciTypeFlow::PreorderLoops::next() { + assert(!done(), "must not be done."); + if (_current->child() != NULL) { + _current = _current->child(); + } else if (_current->sibling() != NULL) { + _current = _current->sibling(); + } else { + while (_current != _root && _current->sibling() == NULL) { + _current = _current->parent(); + } + if (_current == _root) { + _current = NULL; + assert(done(), "must be done."); + } else { + assert(_current->sibling() != NULL, "must be more to do"); + _current = _current->sibling(); + } + } +} + +// ------------------------------------------------------------------ +// ciTypeFlow::Loop::sorted_merge +// +// Merge the branch lp into this branch, sorting on the loop head +// pre_orders. Returns the leaf of the merged branch. +// Child and sibling pointers will be setup later. +// Sort is (looking from leaf towards the root) +// descending on primary key: loop head's pre_order, and +// ascending on secondary key: loop tail's pre_order. +ciTypeFlow::Loop* ciTypeFlow::Loop::sorted_merge(Loop* lp) { + Loop* leaf = this; + Loop* prev = NULL; + Loop* current = leaf; + while (lp != NULL) { + int lp_pre_order = lp->head()->pre_order(); + // Find insertion point for "lp" + while (current != NULL) { + if (current == lp) + return leaf; // Already in list + if (current->head()->pre_order() < lp_pre_order) + break; + if (current->head()->pre_order() == lp_pre_order && + current->tail()->pre_order() > lp->tail()->pre_order()) { + break; + } + prev = current; + current = current->parent(); + } + Loop* next_lp = lp->parent(); // Save future list of items to insert + // Insert lp before current + lp->set_parent(current); + if (prev != NULL) { + prev->set_parent(lp); + } else { + leaf = lp; + } + prev = lp; // Inserted item is new prev[ious] + lp = next_lp; // Next item to insert + } + return leaf; +} + +// ------------------------------------------------------------------ +// ciTypeFlow::build_loop_tree +// +// Incrementally build loop tree. +void ciTypeFlow::build_loop_tree(Block* blk) { + assert(!blk->is_post_visited(), "precondition"); + Loop* innermost = NULL; // merge of loop tree branches over all successors + + for (SuccIter iter(blk); !iter.done(); iter.next()) { + Loop* lp = NULL; + Block* succ = iter.succ(); + if (!succ->is_post_visited()) { + // Found backedge since predecessor post visited, but successor is not + assert(succ->pre_order() <= blk->pre_order(), "should be backedge"); + + // Create a LoopNode to mark this loop. + lp = new (arena()) Loop(succ, blk); + if (succ->loop() == NULL) + succ->set_loop(lp); + // succ->loop will be updated to innermost loop on a later call, when blk==succ + + } else { // Nested loop + lp = succ->loop(); + + // If succ is loop head, find outer loop. + while (lp != NULL && lp->head() == succ) { + lp = lp->parent(); + } + if (lp == NULL) { + // Infinite loop, it's parent is the root + lp = loop_tree_root(); + } + } + + // Check for irreducible loop. + // Successor has already been visited. If the successor's loop head + // has already been post-visited, then this is another entry into the loop. + while (lp->head()->is_post_visited() && lp != loop_tree_root()) { + _has_irreducible_entry = true; + lp->set_irreducible(succ); + if (!succ->is_on_work_list()) { + // Assume irreducible entries need more data flow + add_to_work_list(succ); + } + lp = lp->parent(); + assert(lp != NULL, "nested loop must have parent by now"); + } + + // Merge loop tree branch for all successors. + innermost = innermost == NULL ? lp : innermost->sorted_merge(lp); + + } // end loop + + if (innermost == NULL) { + assert(blk->successors()->length() == 0, "CFG exit"); + blk->set_loop(loop_tree_root()); + } else if (innermost->head() == blk) { + // If loop header, complete the tree pointers + if (blk->loop() != innermost) { +#if ASSERT + assert(blk->loop()->head() == innermost->head(), "same head"); + Loop* dl; + for (dl = innermost; dl != NULL && dl != blk->loop(); dl = dl->parent()); + assert(dl == blk->loop(), "blk->loop() already in innermost list"); +#endif + blk->set_loop(innermost); + } + innermost->def_locals()->add(blk->def_locals()); + Loop* l = innermost; + Loop* p = l->parent(); + while (p && l->head() == blk) { + l->set_sibling(p->child()); // Put self on parents 'next child' + p->set_child(l); // Make self the first child of parent + p->def_locals()->add(l->def_locals()); + l = p; // Walk up the parent chain + p = l->parent(); + } + } else { + blk->set_loop(innermost); + innermost->def_locals()->add(blk->def_locals()); + } +} + +// ------------------------------------------------------------------ +// ciTypeFlow::Loop::contains +// +// Returns true if lp is nested loop. +bool ciTypeFlow::Loop::contains(ciTypeFlow::Loop* lp) const { + assert(lp != NULL, ""); + if (this == lp || head() == lp->head()) return true; + int depth1 = depth(); + int depth2 = lp->depth(); + if (depth1 > depth2) + return false; + while (depth1 < depth2) { + depth2--; + lp = lp->parent(); + } + return this == lp; +} + +// ------------------------------------------------------------------ +// ciTypeFlow::Loop::depth +// +// Loop depth +int ciTypeFlow::Loop::depth() const { + int dp = 0; + for (Loop* lp = this->parent(); lp != NULL; lp = lp->parent()) + dp++; + return dp; +} + +#ifndef PRODUCT +// ------------------------------------------------------------------ +// ciTypeFlow::Loop::print +void ciTypeFlow::Loop::print(outputStream* st, int indent) const { + for (int i = 0; i < indent; i++) st->print(" "); + st->print("%d<-%d %s", + is_root() ? 0 : this->head()->pre_order(), + is_root() ? 0 : this->tail()->pre_order(), + is_irreducible()?" irr":""); + st->print(" defs: "); + def_locals()->print_on(st, _head->outer()->method()->max_locals()); + st->cr(); + for (Loop* ch = child(); ch != NULL; ch = ch->sibling()) + ch->print(st, indent+2); +} +#endif + +// ------------------------------------------------------------------ +// ciTypeFlow::df_flow_types +// +// Perform the depth first type flow analysis. Helper for flow_types. +void ciTypeFlow::df_flow_types(Block* start, + bool do_flow, + StateVector* temp_vector, + JsrSet* temp_set) { + int dft_len = 100; + GrowableArray<Block*> stk(arena(), dft_len, 0, NULL); + + ciBlock* dummy = _methodBlocks->make_dummy_block(); + JsrSet* root_set = new JsrSet(NULL, 0); + Block* root_head = new (arena()) Block(this, dummy, root_set); + Block* root_tail = new (arena()) Block(this, dummy, root_set); + root_head->set_pre_order(0); + root_head->set_post_order(0); + root_tail->set_pre_order(max_jint); + root_tail->set_post_order(max_jint); + set_loop_tree_root(new (arena()) Loop(root_head, root_tail)); + + stk.push(start); + + _next_pre_order = 0; // initialize pre_order counter + _rpo_list = NULL; + int next_po = 0; // initialize post_order counter + + // Compute RPO and the control flow graph + int size; + while ((size = stk.length()) > 0) { + Block* blk = stk.top(); // Leave node on stack + if (!blk->is_visited()) { + // forward arc in graph + assert (!blk->has_pre_order(), ""); + blk->set_next_pre_order(); + + if (_next_pre_order >= MaxNodeLimit / 2) { + // Too many basic blocks. Bail out. + // This can happen when try/finally constructs are nested to depth N, + // and there is O(2**N) cloning of jsr bodies. See bug 4697245! + // "MaxNodeLimit / 2" is used because probably the parser will + // generate at least twice that many nodes and bail out. + record_failure("too many basic blocks"); + return; + } + if (do_flow) { + flow_block(blk, temp_vector, temp_set); + if (failing()) return; // Watch for bailouts. + } + } else if (!blk->is_post_visited()) { + // cross or back arc + for (SuccIter iter(blk); !iter.done(); iter.next()) { + Block* succ = iter.succ(); + if (!succ->is_visited()) { + stk.push(succ); + } + } + if (stk.length() == size) { + // There were no additional children, post visit node now + stk.pop(); // Remove node from stack + + build_loop_tree(blk); + blk->set_post_order(next_po++); // Assign post order + prepend_to_rpo_list(blk); + assert(blk->is_post_visited(), ""); + + if (blk->is_loop_head() && !blk->is_on_work_list()) { + // Assume loop heads need more data flow + add_to_work_list(blk); + } + } + } else { + stk.pop(); // Remove post-visited node from stack + } + } +} + +// ------------------------------------------------------------------ // ciTypeFlow::flow_types // // Perform the type flow analysis, creating and cloning Blocks as @@ -2233,91 +2660,93 @@ JsrSet* temp_set = new JsrSet(NULL, 16); // Create the method entry block. - Block* block = block_at(start_bci(), temp_set); - block->set_pre_order(_next_pre_order++); - assert(block->is_start(), "start block must have order #0"); + Block* start = block_at(start_bci(), temp_set); // Load the initial state into it. const StateVector* start_state = get_start_state(); if (failing()) return; - block->meet(start_state); - add_to_work_list(block); + start->meet(start_state); + + // Depth first visit + df_flow_types(start, true /*do flow*/, temp_vector, temp_set); - // Trickle away. - while (!work_list_empty()) { - Block* block = work_list_next(); - flow_block(block, temp_vector, temp_set); + if (failing()) return; + assert(_rpo_list == start, "must be start"); + // Any loops found? + if (loop_tree_root()->child() != NULL && + env()->comp_level() >= CompLevel_full_optimization) { + // Loop optimizations are not performed on Tier1 compiles. + + bool changed = clone_loop_heads(loop_tree_root(), temp_vector, temp_set); - // NodeCountCutoff is the number of nodes at which the parser - // will bail out. Probably if we already have lots of BBs, - // the parser will generate at least twice that many nodes and bail out. - // Therefore, this is a conservatively large limit at which to - // bail out in the pre-parse typeflow pass. - int block_limit = MaxNodeLimit / 2; + // If some loop heads were cloned, recompute postorder and loop tree + if (changed) { + loop_tree_root()->set_child(NULL); + for (Block* blk = _rpo_list; blk != NULL;) { + Block* next = blk->rpo_next(); + blk->df_init(); + blk = next; + } + df_flow_types(start, false /*no flow*/, temp_vector, temp_set); + } + } - if (_next_pre_order >= block_limit) { - // Too many basic blocks. Bail out. - // - // This can happen when try/finally constructs are nested to depth N, - // and there is O(2**N) cloning of jsr bodies. See bug 4697245! - record_failure("too many basic blocks"); - return; - } + if (CITraceTypeFlow) { + tty->print_cr("\nLoop tree"); + loop_tree_root()->print(); + } + + // Continue flow analysis until fixed point reached + + debug_only(int max_block = _next_pre_order;) - // Watch for bailouts. - if (failing()) return; + while (!work_list_empty()) { + Block* blk = work_list_next(); + assert (blk->has_post_order(), "post order assigned above"); + + flow_block(blk, temp_vector, temp_set); + + assert (max_block == _next_pre_order, "no new blocks"); + assert (!failing(), "no more bailouts"); } } // ------------------------------------------------------------------ // ciTypeFlow::map_blocks // -// Create the block map, which indexes blocks in pre_order. +// Create the block map, which indexes blocks in reverse post-order. void ciTypeFlow::map_blocks() { assert(_block_map == NULL, "single initialization"); - int pre_order_limit = _next_pre_order; - _block_map = NEW_ARENA_ARRAY(arena(), Block*, pre_order_limit); - assert(pre_order_limit == block_count(), ""); - int po; - for (po = 0; po < pre_order_limit; po++) { - debug_only(_block_map[po] = NULL); + int block_ct = _next_pre_order; + _block_map = NEW_ARENA_ARRAY(arena(), Block*, block_ct); + assert(block_ct == block_count(), ""); + + Block* blk = _rpo_list; + for (int m = 0; m < block_ct; m++) { + int rpo = blk->rpo(); + assert(rpo == m, "should be sequential"); + _block_map[rpo] = blk; + blk = blk->rpo_next(); } - ciMethodBlocks *mblks = _methodBlocks; - ciBlock* current = NULL; - int limit_bci = code_size(); - for (int bci = 0; bci < limit_bci; bci++) { - ciBlock* ciblk = mblks->block_containing(bci); - if (ciblk != NULL && ciblk != current) { - current = ciblk; - int curidx = ciblk->index(); - int block_count = (_idx_to_blocklist[curidx] == NULL) ? 0 : _idx_to_blocklist[curidx]->length(); - for (int i = 0; i < block_count; i++) { - Block* block = _idx_to_blocklist[curidx]->at(i); - if (!block->has_pre_order()) continue; - int po = block->pre_order(); - assert(_block_map[po] == NULL, "unique ref to block"); - assert(0 <= po && po < pre_order_limit, ""); - _block_map[po] = block; - } - } - } - for (po = 0; po < pre_order_limit; po++) { - assert(_block_map[po] != NULL, "must not drop any blocks"); - Block* block = _block_map[po]; + assert(blk == NULL, "should be done"); + + for (int j = 0; j < block_ct; j++) { + assert(_block_map[j] != NULL, "must not drop any blocks"); + Block* block = _block_map[j]; // Remove dead blocks from successor lists: for (int e = 0; e <= 1; e++) { GrowableArray<Block*>* l = e? block->exceptions(): block->successors(); - for (int i = 0; i < l->length(); i++) { - Block* s = l->at(i); - if (!s->has_pre_order()) { + for (int k = 0; k < l->length(); k++) { + Block* s = l->at(k); + if (!s->has_post_order()) { if (CITraceTypeFlow) { tty->print("Removing dead %s successor of #%d: ", (e? "exceptional": "normal"), block->pre_order()); s->print_value_on(tty); tty->cr(); } l->remove(s); - --i; + --k; } } } @@ -2329,7 +2758,7 @@ // // Find a block with this ciBlock which has a compatible JsrSet. // If no such block exists, create it, unless the option is no_create. -// If the option is create_private_copy, always create a fresh private copy. +// If the option is create_backedge_copy, always create a fresh backedge copy. ciTypeFlow::Block* ciTypeFlow::get_block_for(int ciBlockIndex, ciTypeFlow::JsrSet* jsrs, CreateOption option) { Arena* a = arena(); GrowableArray<Block*>* blocks = _idx_to_blocklist[ciBlockIndex]; @@ -2342,11 +2771,11 @@ _idx_to_blocklist[ciBlockIndex] = blocks; } - if (option != create_private_copy) { + if (option != create_backedge_copy) { int len = blocks->length(); for (int i = 0; i < len; i++) { Block* block = blocks->at(i); - if (!block->is_private_copy() && block->is_compatible_with(jsrs)) { + if (!block->is_backedge_copy() && block->is_compatible_with(jsrs)) { return block; } } @@ -2357,15 +2786,15 @@ // We did not find a compatible block. Create one. Block* new_block = new (a) Block(this, _methodBlocks->block(ciBlockIndex), jsrs); - if (option == create_private_copy) new_block->set_private_copy(true); + if (option == create_backedge_copy) new_block->set_backedge_copy(true); blocks->append(new_block); return new_block; } // ------------------------------------------------------------------ -// ciTypeFlow::private_copy_count +// ciTypeFlow::backedge_copy_count // -int ciTypeFlow::private_copy_count(int ciBlockIndex, ciTypeFlow::JsrSet* jsrs) const { +int ciTypeFlow::backedge_copy_count(int ciBlockIndex, ciTypeFlow::JsrSet* jsrs) const { GrowableArray<Block*>* blocks = _idx_to_blocklist[ciBlockIndex]; if (blocks == NULL) { @@ -2376,7 +2805,7 @@ int len = blocks->length(); for (int i = 0; i < len; i++) { Block* block = blocks->at(i); - if (block->is_private_copy() && block->is_compatible_with(jsrs)) { + if (block->is_backedge_copy() && block->is_compatible_with(jsrs)) { count++; } } @@ -2405,10 +2834,12 @@ if (failing()) { return; } + + map_blocks(); + if (CIPrintTypeFlow || CITraceTypeFlow) { - print_on(tty); + rpo_print_on(tty); } - map_blocks(); } // ------------------------------------------------------------------ @@ -2466,4 +2897,19 @@ st->print_cr("********************************************************"); st->cr(); } + +void ciTypeFlow::rpo_print_on(outputStream* st) const { + st->print_cr("********************************************************"); + st->print ("TypeFlow for "); + method()->name()->print_symbol_on(st); + int limit_bci = code_size(); + st->print_cr(" %d bytes", limit_bci); + for (Block* blk = _rpo_list; blk != NULL; blk = blk->rpo_next()) { + blk->print_on(st); + st->print_cr("--------------------------------------------------------"); + st->cr(); + } + st->print_cr("********************************************************"); + st->cr(); +} #endif
--- a/hotspot/src/share/vm/ci/ciTypeFlow.hpp Wed Oct 01 16:57:19 2008 -0700 +++ b/hotspot/src/share/vm/ci/ciTypeFlow.hpp Wed Oct 01 16:57:40 2008 -0700 @@ -34,11 +34,13 @@ int _max_locals; int _max_stack; int _code_size; + bool _has_irreducible_entry; const char* _failure_reason; public: class StateVector; + class Loop; class Block; // Build a type flow analyzer @@ -55,6 +57,7 @@ int max_stack() const { return _max_stack; } int max_cells() const { return _max_locals + _max_stack; } int code_size() const { return _code_size; } + bool has_irreducible_entry() const { return _has_irreducible_entry; } // Represents information about an "active" jsr call. This // class represents a call to the routine at some entry address @@ -125,6 +128,19 @@ void print_on(outputStream* st) const PRODUCT_RETURN; }; + class LocalSet VALUE_OBJ_CLASS_SPEC { + private: + enum Constants { max = 63 }; + uint64_t _bits; + public: + LocalSet() : _bits(0) {} + void add(uint32_t i) { if (i < (uint32_t)max) _bits |= (1LL << i); } + void add(LocalSet* ls) { _bits |= ls->_bits; } + bool test(uint32_t i) const { return i < (uint32_t)max ? (_bits>>i)&1U : true; } + void clear() { _bits = 0; } + void print_on(outputStream* st, int limit) const PRODUCT_RETURN; + }; + // Used as a combined index for locals and temps enum Cell { Cell_0, Cell_max = INT_MAX @@ -142,6 +158,8 @@ int _trap_bci; int _trap_index; + LocalSet _def_locals; // For entire block + static ciType* type_meet_internal(ciType* t1, ciType* t2, ciTypeFlow* analyzer); public: @@ -181,6 +199,9 @@ int monitor_count() const { return _monitor_count; } void set_monitor_count(int mc) { _monitor_count = mc; } + LocalSet* def_locals() { return &_def_locals; } + const LocalSet* def_locals() const { return &_def_locals; } + static Cell start_cell() { return (Cell)0; } static Cell next_cell(Cell c) { return (Cell)(((int)c) + 1); } Cell limit_cell() const { @@ -250,6 +271,10 @@ return type->basic_type() == T_DOUBLE; } + void store_to_local(int lnum) { + _def_locals.add((uint) lnum); + } + void push_translate(ciType* type); void push_int() { @@ -358,6 +383,7 @@ "must be reference type or return address"); overwrite_local_double_long(index); set_type_at(local(index), type); + store_to_local(index); } void load_local_double(int index) { @@ -376,6 +402,8 @@ overwrite_local_double_long(index); set_type_at(local(index), type); set_type_at(local(index+1), type2); + store_to_local(index); + store_to_local(index+1); } void load_local_float(int index) { @@ -388,6 +416,7 @@ assert(is_float(type), "must be float type"); overwrite_local_double_long(index); set_type_at(local(index), type); + store_to_local(index); } void load_local_int(int index) { @@ -400,6 +429,7 @@ assert(is_int(type), "must be int type"); overwrite_local_double_long(index); set_type_at(local(index), type); + store_to_local(index); } void load_local_long(int index) { @@ -418,6 +448,8 @@ overwrite_local_double_long(index); set_type_at(local(index), type); set_type_at(local(index+1), type2); + store_to_local(index); + store_to_local(index+1); } // Stop interpretation of this path with a trap. @@ -450,13 +482,31 @@ }; // Parameter for "find_block" calls: - // Describes the difference between a public and private copy. + // Describes the difference between a public and backedge copy. enum CreateOption { create_public_copy, - create_private_copy, + create_backedge_copy, no_create }; + // Successor iterator + class SuccIter : public StackObj { + private: + Block* _pred; + int _index; + Block* _succ; + public: + SuccIter() : _pred(NULL), _index(-1), _succ(NULL) {} + SuccIter(Block* pred) : _pred(pred), _index(-1), _succ(NULL) { next(); } + int index() { return _index; } + Block* pred() { return _pred; } // Return predecessor + bool done() { return _index < 0; } // Finished? + Block* succ() { return _succ; } // Return current successor + void next(); // Advance + void set_succ(Block* succ); // Update current successor + bool is_normal_ctrl() { return index() < _pred->successors()->length(); } + }; + // A basic block class Block : public ResourceObj { private: @@ -470,15 +520,24 @@ int _trap_bci; int _trap_index; - // A reasonable approximation to pre-order, provided.to the client. + // pre_order, assigned at first visit. Used as block ID and "visited" tag int _pre_order; - // Has this block been cloned for some special purpose? - bool _private_copy; + // A post-order, used to compute the reverse post order (RPO) provided to the client + int _post_order; // used to compute rpo + + // Has this block been cloned for a loop backedge? + bool _backedge_copy; // A pointer used for our internal work list - Block* _next; - bool _on_work_list; + Block* _next; + bool _on_work_list; // on the work list + Block* _rpo_next; // Reverse post order list + + // Loop info + Loop* _loop; // nearest loop + bool _irreducible_entry; // entry to irreducible loop + bool _exception_entry; // entry to exception handler ciBlock* ciblock() const { return _ciblock; } StateVector* state() const { return _state; } @@ -504,10 +563,11 @@ int start() const { return _ciblock->start_bci(); } int limit() const { return _ciblock->limit_bci(); } int control() const { return _ciblock->control_bci(); } + JsrSet* jsrs() const { return _jsrs; } - bool is_private_copy() const { return _private_copy; } - void set_private_copy(bool z); - int private_copy_count() const { return outer()->private_copy_count(ciblock()->index(), _jsrs); } + bool is_backedge_copy() const { return _backedge_copy; } + void set_backedge_copy(bool z); + int backedge_copy_count() const { return outer()->backedge_copy_count(ciblock()->index(), _jsrs); } // access to entry state int stack_size() const { return _state->stack_size(); } @@ -515,6 +575,20 @@ ciType* local_type_at(int i) const { return _state->local_type_at(i); } ciType* stack_type_at(int i) const { return _state->stack_type_at(i); } + // Data flow on locals + bool is_invariant_local(uint v) const { + assert(is_loop_head(), "only loop heads"); + // Find outermost loop with same loop head + Loop* lp = loop(); + while (lp->parent() != NULL) { + if (lp->parent()->head() != lp->head()) break; + lp = lp->parent(); + } + return !lp->def_locals()->test(v); + } + LocalSet* def_locals() { return _state->def_locals(); } + const LocalSet* def_locals() const { return _state->def_locals(); } + // Get the successors for this Block. GrowableArray<Block*>* successors(ciBytecodeStream* str, StateVector* state, @@ -524,13 +598,6 @@ return _successors; } - // Helper function for "successors" when making private copies of - // loop heads for C2. - Block * clone_loop_head(ciTypeFlow* analyzer, - int branch_bci, - Block* target, - JsrSet* jsrs); - // Get the exceptional successors for this Block. GrowableArray<Block*>* exceptions() { if (_exceptions == NULL) { @@ -584,17 +651,126 @@ bool is_on_work_list() const { return _on_work_list; } bool has_pre_order() const { return _pre_order >= 0; } - void set_pre_order(int po) { assert(!has_pre_order() && po >= 0, ""); _pre_order = po; } + void set_pre_order(int po) { assert(!has_pre_order(), ""); _pre_order = po; } int pre_order() const { assert(has_pre_order(), ""); return _pre_order; } + void set_next_pre_order() { set_pre_order(outer()->inc_next_pre_order()); } bool is_start() const { return _pre_order == outer()->start_block_num(); } - // A ranking used in determining order within the work list. - bool is_simpler_than(Block* other); + // Reverse post order + void df_init(); + bool has_post_order() const { return _post_order >= 0; } + void set_post_order(int po) { assert(!has_post_order() && po >= 0, ""); _post_order = po; } + void reset_post_order(int o){ _post_order = o; } + int post_order() const { assert(has_post_order(), ""); return _post_order; } + + bool has_rpo() const { return has_post_order() && outer()->have_block_count(); } + int rpo() const { assert(has_rpo(), ""); return outer()->block_count() - post_order() - 1; } + void set_rpo_next(Block* b) { _rpo_next = b; } + Block* rpo_next() { return _rpo_next; } + + // Loops + Loop* loop() const { return _loop; } + void set_loop(Loop* lp) { _loop = lp; } + bool is_loop_head() const { return _loop && _loop->head() == this; } + void set_irreducible_entry(bool c) { _irreducible_entry = c; } + bool is_irreducible_entry() const { return _irreducible_entry; } + bool is_visited() const { return has_pre_order(); } + bool is_post_visited() const { return has_post_order(); } + bool is_clonable_exit(Loop* lp); + Block* looping_succ(Loop* lp); // Successor inside of loop + bool is_single_entry_loop_head() const { + if (!is_loop_head()) return false; + for (Loop* lp = loop(); lp != NULL && lp->head() == this; lp = lp->parent()) + if (lp->is_irreducible()) return false; + return true; + } void print_value_on(outputStream* st) const PRODUCT_RETURN; void print_on(outputStream* st) const PRODUCT_RETURN; }; + // Loop + class Loop : public ResourceObj { + private: + Loop* _parent; + Loop* _sibling; // List of siblings, null terminated + Loop* _child; // Head of child list threaded thru sibling pointer + Block* _head; // Head of loop + Block* _tail; // Tail of loop + bool _irreducible; + LocalSet _def_locals; + + public: + Loop(Block* head, Block* tail) : + _head(head), _tail(tail), + _parent(NULL), _sibling(NULL), _child(NULL), + _irreducible(false), _def_locals() {} + + Loop* parent() const { return _parent; } + Loop* sibling() const { return _sibling; } + Loop* child() const { return _child; } + Block* head() const { return _head; } + Block* tail() const { return _tail; } + void set_parent(Loop* p) { _parent = p; } + void set_sibling(Loop* s) { _sibling = s; } + void set_child(Loop* c) { _child = c; } + void set_head(Block* hd) { _head = hd; } + void set_tail(Block* tl) { _tail = tl; } + + int depth() const; // nesting depth + + // Returns true if lp is a nested loop or us. + bool contains(Loop* lp) const; + bool contains(Block* blk) const { return contains(blk->loop()); } + + // Data flow on locals + LocalSet* def_locals() { return &_def_locals; } + const LocalSet* def_locals() const { return &_def_locals; } + + // Merge the branch lp into this branch, sorting on the loop head + // pre_orders. Returns the new branch. + Loop* sorted_merge(Loop* lp); + + // Mark non-single entry to loop + void set_irreducible(Block* entry) { + _irreducible = true; + entry->set_irreducible_entry(true); + } + bool is_irreducible() const { return _irreducible; } + + bool is_root() const { return _tail->pre_order() == max_jint; } + + void print(outputStream* st = tty, int indent = 0) const PRODUCT_RETURN; + }; + + // Postorder iteration over the loop tree. + class PostorderLoops : public StackObj { + private: + Loop* _root; + Loop* _current; + public: + PostorderLoops(Loop* root) : _root(root), _current(root) { + while (_current->child() != NULL) { + _current = _current->child(); + } + } + bool done() { return _current == NULL; } // Finished iterating? + void next(); // Advance to next loop + Loop* current() { return _current; } // Return current loop. + }; + + // Preorder iteration over the loop tree. + class PreorderLoops : public StackObj { + private: + Loop* _root; + Loop* _current; + public: + PreorderLoops(Loop* root) : _root(root), _current(root) {} + bool done() { return _current == NULL; } // Finished iterating? + void next(); // Advance to next loop + Loop* current() { return _current; } // Return current loop. + }; + // Standard indexes of successors, for various bytecodes. enum { FALL_THROUGH = 0, // normal control @@ -619,6 +795,12 @@ // Tells if a given instruction is able to generate an exception edge. bool can_trap(ciBytecodeStream& str); + // Clone the loop heads. Returns true if any cloning occurred. + bool clone_loop_heads(Loop* lp, StateVector* temp_vector, JsrSet* temp_set); + + // Clone lp's head and replace tail's successors with clone. + Block* clone_loop_head(Loop* lp, StateVector* temp_vector, JsrSet* temp_set); + public: // Return the block beginning at bci which has a JsrSet compatible // with jsrs. @@ -627,8 +809,8 @@ // block factory Block* get_block_for(int ciBlockIndex, JsrSet* jsrs, CreateOption option = create_public_copy); - // How many of the blocks have the private_copy bit set? - int private_copy_count(int ciBlockIndex, JsrSet* jsrs) const; + // How many of the blocks have the backedge_copy bit set? + int backedge_copy_count(int ciBlockIndex, JsrSet* jsrs) const; // Return an existing block containing bci which has a JsrSet compatible // with jsrs, or NULL if there is none. @@ -651,11 +833,18 @@ return _block_map[po]; } Block* start_block() const { return pre_order_at(start_block_num()); } int start_block_num() const { return 0; } + Block* rpo_at(int rpo) const { assert(0 <= rpo && rpo < block_count(), "out of bounds"); + return _block_map[rpo]; } + int next_pre_order() { return _next_pre_order; } + int inc_next_pre_order() { return _next_pre_order++; } private: // A work list used during flow analysis. Block* _work_list; + // List of blocks in reverse post order + Block* _rpo_list; + // Next Block::_pre_order. After mapping, doubles as block_count. int _next_pre_order; @@ -668,6 +857,15 @@ // Add a basic block to our work list. void add_to_work_list(Block* block); + // Prepend a basic block to rpo list. + void prepend_to_rpo_list(Block* blk) { + blk->set_rpo_next(_rpo_list); + _rpo_list = blk; + } + + // Root of the loop tree + Loop* _loop_tree_root; + // State used for make_jsr_record int _jsr_count; GrowableArray<JsrRecord*>* _jsr_records; @@ -677,6 +875,9 @@ // does not already exist. JsrRecord* make_jsr_record(int entry_address, int return_address); + void set_loop_tree_root(Loop* ltr) { _loop_tree_root = ltr; } + Loop* loop_tree_root() { return _loop_tree_root; } + private: // Get the initial state for start_bci: const StateVector* get_start_state(); @@ -703,6 +904,15 @@ // necessary. void flow_types(); + // Perform the depth first type flow analysis. Helper for flow_types. + void df_flow_types(Block* start, + bool do_flow, + StateVector* temp_vector, + JsrSet* temp_set); + + // Incrementally build loop tree. + void build_loop_tree(Block* blk); + // Create the block map, which indexes blocks in pre_order. void map_blocks(); @@ -711,4 +921,6 @@ void do_flow(); void print_on(outputStream* st) const PRODUCT_RETURN; + + void rpo_print_on(outputStream* st) const PRODUCT_RETURN; };
--- a/hotspot/src/share/vm/code/nmethod.cpp Wed Oct 01 16:57:19 2008 -0700 +++ b/hotspot/src/share/vm/code/nmethod.cpp Wed Oct 01 16:57:40 2008 -0700 @@ -1350,11 +1350,7 @@ return false; } } - if (!UseParallelOldGC || !VerifyParallelOldWithMarkSweep) { - // Cannot do this test if verification of the UseParallelOldGC - // code using the PSMarkSweep code is being done. - assert(unloading_occurred, "Inconsistency in unloading"); - } + assert(unloading_occurred, "Inconsistency in unloading"); make_unloaded(is_alive, obj); return true; }
--- a/hotspot/src/share/vm/compiler/methodLiveness.cpp Wed Oct 01 16:57:19 2008 -0700 +++ b/hotspot/src/share/vm/compiler/methodLiveness.cpp Wed Oct 01 16:57:40 2008 -0700 @@ -76,8 +76,9 @@ BitCounter() : _count(0) {} // Callback when bit in map is set - virtual void do_bit(size_t offset) { + virtual bool do_bit(size_t offset) { _count++; + return true; } int count() { @@ -467,7 +468,7 @@ bci = 0; } - MethodLivenessResult answer(NULL,0); + MethodLivenessResult answer((uintptr_t*)NULL,0); if (_block_count > 0) { if (TimeLivenessAnalysis) _time_total.start();
--- a/hotspot/src/share/vm/compiler/methodLiveness.hpp Wed Oct 01 16:57:19 2008 -0700 +++ b/hotspot/src/share/vm/compiler/methodLiveness.hpp Wed Oct 01 16:57:40 2008 -0700 @@ -29,7 +29,7 @@ bool _is_valid; public: - MethodLivenessResult(uintptr_t* map, idx_t size_in_bits) + MethodLivenessResult(BitMap::bm_word_t* map, idx_t size_in_bits) : BitMap(map, size_in_bits) , _is_valid(false) {}
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp Wed Oct 01 16:57:19 2008 -0700 +++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp Wed Oct 01 16:57:40 2008 -0700 @@ -790,7 +790,7 @@ } -HeapWord* CompactibleFreeListSpace::block_start(const void* p) const { +HeapWord* CompactibleFreeListSpace::block_start_const(const void* p) const { NOT_PRODUCT(verify_objects_initialized()); return _bt.block_start(p); } @@ -2286,9 +2286,9 @@ } void CompactibleFreeListSpace::verifyIndexedFreeList(size_t size) const { - guarantee(size % 2 == 0, "Odd slots should be empty"); - for (FreeChunk* fc = _indexedFreeList[size].head(); fc != NULL; - fc = fc->next()) { + FreeChunk* fc = _indexedFreeList[size].head(); + guarantee((size % 2 == 0) || fc == NULL, "Odd slots should be empty"); + for (; fc != NULL; fc = fc->next()) { guarantee(fc->size() == size, "Size inconsistency"); guarantee(fc->isFree(), "!free?"); guarantee(fc->next() == NULL || fc->next()->prev() == fc, "Broken list"); @@ -2790,10 +2790,11 @@ assert(n_threads > 0, "Unexpected n_threads argument"); const size_t task_size = rescan_task_size(); size_t n_tasks = (used_region().word_size() + task_size - 1)/task_size; - assert((used_region().start() + (n_tasks - 1)*task_size < - used_region().end()) && - (used_region().start() + n_tasks*task_size >= - used_region().end()), "n_task calculation incorrect"); + assert((n_tasks == 0) == used_region().is_empty(), "n_tasks incorrect"); + assert(n_tasks == 0 || + ((used_region().start() + (n_tasks - 1)*task_size < used_region().end()) && + (used_region().start() + n_tasks*task_size >= used_region().end())), + "n_tasks calculation incorrect"); SequentialSubTasksDone* pst = conc_par_seq_tasks(); assert(!pst->valid(), "Clobbering existing data?"); pst->set_par_threads(n_threads); @@ -2833,7 +2834,7 @@ assert(n_tasks == 0 || ((span.start() + (n_tasks - 1)*task_size < span.end()) && (span.start() + n_tasks*task_size >= span.end())), - "n_task calculation incorrect"); + "n_tasks calculation incorrect"); SequentialSubTasksDone* pst = conc_par_seq_tasks(); assert(!pst->valid(), "Clobbering existing data?"); pst->set_par_threads(n_threads);
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp Wed Oct 01 16:57:19 2008 -0700 +++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp Wed Oct 01 16:57:40 2008 -0700 @@ -502,7 +502,7 @@ void blk_iterate(BlkClosure* cl); void blk_iterate_careful(BlkClosureCareful* cl); - HeapWord* block_start(const void* p) const; + HeapWord* block_start_const(const void* p) const; HeapWord* block_start_careful(const void* p) const; size_t block_size(const HeapWord* p) const; size_t block_size_no_stall(HeapWord* p, const CMSCollector* c) const;
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp Wed Oct 01 16:57:19 2008 -0700 +++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp Wed Oct 01 16:57:40 2008 -0700 @@ -2761,13 +2761,14 @@ public: VerifyMarkedClosure(CMSBitMap* bm): _marks(bm), _failed(false) {} - void do_bit(size_t offset) { + bool do_bit(size_t offset) { HeapWord* addr = _marks->offsetToHeapWord(offset); if (!_marks->isMarked(addr)) { oop(addr)->print(); gclog_or_tty->print_cr(" ("INTPTR_FORMAT" should have been marked)", addr); _failed = true; } + return true; } bool failed() { return _failed; } @@ -3650,6 +3651,7 @@ CompactibleFreeListSpace* _cms_space; CompactibleFreeListSpace* _perm_space; HeapWord* _global_finger; + HeapWord* _restart_addr; // Exposed here for yielding support Mutex* const _bit_map_lock; @@ -3680,7 +3682,7 @@ _term.set_task(this); assert(_cms_space->bottom() < _perm_space->bottom(), "Finger incorrectly initialized below"); - _global_finger = _cms_space->bottom(); + _restart_addr = _global_finger = _cms_space->bottom(); } @@ -3698,6 +3700,10 @@ bool result() { return _result; } void reset(HeapWord* ra) { + assert(_global_finger >= _cms_space->end(), "Postcondition of ::work(i)"); + assert(_global_finger >= _perm_space->end(), "Postcondition of ::work(i)"); + assert(ra < _perm_space->end(), "ra too large"); + _restart_addr = _global_finger = ra; _term.reset_for_reuse(); } @@ -3842,16 +3848,24 @@ int n_tasks = pst->n_tasks(); // We allow that there may be no tasks to do here because // we are restarting after a stack overflow. - assert(pst->valid() || n_tasks == 0, "Uninitializd use?"); + assert(pst->valid() || n_tasks == 0, "Uninitialized use?"); int nth_task = 0; - HeapWord* start = sp->bottom(); + HeapWord* aligned_start = sp->bottom(); + if (sp->used_region().contains(_restart_addr)) { + // Align down to a card boundary for the start of 0th task + // for this space. + aligned_start = + (HeapWord*)align_size_down((uintptr_t)_restart_addr, + CardTableModRefBS::card_size); + } + size_t chunk_size = sp->marking_task_size(); while (!pst->is_task_claimed(/* reference */ nth_task)) { // Having claimed the nth task in this space, // compute the chunk that it corresponds to: - MemRegion span = MemRegion(start + nth_task*chunk_size, - start + (nth_task+1)*chunk_size); + MemRegion span = MemRegion(aligned_start + nth_task*chunk_size, + aligned_start + (nth_task+1)*chunk_size); // Try and bump the global finger via a CAS; // note that we need to do the global finger bump // _before_ taking the intersection below, because @@ -3866,26 +3880,40 @@ // beyond the "top" address of the space. span = span.intersection(sp->used_region()); if (!span.is_empty()) { // Non-null task - // We want to skip the first object because - // the protocol is to scan any object in its entirety - // that _starts_ in this span; a fortiori, any - // object starting in an earlier span is scanned - // as part of an earlier claimed task. - // Below we use the "careful" version of block_start - // so we do not try to navigate uninitialized objects. - HeapWord* prev_obj = sp->block_start_careful(span.start()); - // Below we use a variant of block_size that uses the - // Printezis bits to avoid waiting for allocated - // objects to become initialized/parsable. - while (prev_obj < span.start()) { - size_t sz = sp->block_size_no_stall(prev_obj, _collector); - if (sz > 0) { - prev_obj += sz; + HeapWord* prev_obj; + assert(!span.contains(_restart_addr) || nth_task == 0, + "Inconsistency"); + if (nth_task == 0) { + // For the 0th task, we'll not need to compute a block_start. + if (span.contains(_restart_addr)) { + // In the case of a restart because of stack overflow, + // we might additionally skip a chunk prefix. + prev_obj = _restart_addr; } else { - // In this case we may end up doing a bit of redundant - // scanning, but that appears unavoidable, short of - // locking the free list locks; see bug 6324141. - break; + prev_obj = span.start(); + } + } else { + // We want to skip the first object because + // the protocol is to scan any object in its entirety + // that _starts_ in this span; a fortiori, any + // object starting in an earlier span is scanned + // as part of an earlier claimed task. + // Below we use the "careful" version of block_start + // so we do not try to navigate uninitialized objects. + prev_obj = sp->block_start_careful(span.start()); + // Below we use a variant of block_size that uses the + // Printezis bits to avoid waiting for allocated + // objects to become initialized/parsable. + while (prev_obj < span.start()) { + size_t sz = sp->block_size_no_stall(prev_obj, _collector); + if (sz > 0) { + prev_obj += sz; + } else { + // In this case we may end up doing a bit of redundant + // scanning, but that appears unavoidable, short of + // locking the free list locks; see bug 6324141. + break; + } } } if (prev_obj < span.end()) { @@ -3938,12 +3966,14 @@ void handle_stack_overflow(HeapWord* lost); }; -// Grey object rescan during work stealing phase -- -// the salient assumption here is that stolen oops must -// always be initialized, so we do not need to check for -// uninitialized objects before scanning here. +// Grey object scanning during work stealing phase -- +// the salient assumption here is that any references +// that are in these stolen objects being scanned must +// already have been initialized (else they would not have +// been published), so we do not need to check for +// uninitialized objects before pushing here. void Par_ConcMarkingClosure::do_oop(oop obj) { - assert(obj->is_oop_or_null(), "expected an oop or NULL"); + assert(obj->is_oop_or_null(true), "expected an oop or NULL"); HeapWord* addr = (HeapWord*)obj; // Check if oop points into the CMS generation // and is not marked @@ -4001,7 +4031,7 @@ // in CMSCollector's _restart_address. void Par_ConcMarkingClosure::handle_stack_overflow(HeapWord* lost) { // We need to do this under a mutex to prevent other - // workers from interfering with the expansion below. + // workers from interfering with the work done below. MutexLockerEx ml(_overflow_stack->par_lock(), Mutex::_no_safepoint_check_flag); // Remember the least grey address discarded @@ -4640,8 +4670,11 @@ startTimer(); sample_eden(); // Get and clear dirty region from card table - dirtyRegion = _ct->ct_bs()->dirty_card_range_after_preclean( - MemRegion(nextAddr, endAddr)); + dirtyRegion = _ct->ct_bs()->dirty_card_range_after_reset( + MemRegion(nextAddr, endAddr), + true, + CardTableModRefBS::precleaned_card_val()); + assert(dirtyRegion.start() >= nextAddr, "returned region inconsistent?"); } @@ -5409,8 +5442,8 @@ &mrias_cl); { TraceTime t("grey object rescan", PrintGCDetails, false, gclog_or_tty); - // Iterate over the dirty cards, marking them precleaned, and - // setting the corresponding bits in the mod union table. + // Iterate over the dirty cards, setting the corresponding bits in the + // mod union table. { ModUnionClosure modUnionClosure(&_modUnionTable); _ct->ct_bs()->dirty_card_iterate( @@ -6182,7 +6215,7 @@ // bit vector itself. That is done by a separate call CMSBitMap::allocate() // further below. CMSBitMap::CMSBitMap(int shifter, int mutex_rank, const char* mutex_name): - _bm(NULL,0), + _bm(), _shifter(shifter), _lock(mutex_rank >= 0 ? new Mutex(mutex_rank, mutex_name, true) : NULL) { @@ -6207,7 +6240,7 @@ } assert(_virtual_space.committed_size() == brs.size(), "didn't reserve backing store for all of CMS bit map?"); - _bm.set_map((uintptr_t*)_virtual_space.low()); + _bm.set_map((BitMap::bm_word_t*)_virtual_space.low()); assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >= _bmWordSize, "inconsistency in bit map sizing"); _bm.set_size(_bmWordSize >> _shifter); @@ -6554,7 +6587,7 @@ if (obj != NULL) { // Ignore mark word because this could be an already marked oop // that may be chained at the end of the overflow list. - assert(obj->is_oop(), "expected an oop"); + assert(obj->is_oop(true), "expected an oop"); HeapWord* addr = (HeapWord*)obj; if (_span.contains(addr) && !_bit_map->isMarked(addr)) { @@ -6845,10 +6878,10 @@ // Should revisit to see if this should be restructured for // greater efficiency. -void MarkFromRootsClosure::do_bit(size_t offset) { +bool MarkFromRootsClosure::do_bit(size_t offset) { if (_skipBits > 0) { _skipBits--; - return; + return true; } // convert offset into a HeapWord* HeapWord* addr = _bitMap->startWord() + offset; @@ -6886,10 +6919,11 @@ } // ...else the setting of klass will dirty the card anyway. } DEBUG_ONLY(}) - return; + return true; } } scanOopsInOop(addr); + return true; } // We take a break if we've been at this for a while, @@ -7023,10 +7057,10 @@ // Should revisit to see if this should be restructured for // greater efficiency. -void Par_MarkFromRootsClosure::do_bit(size_t offset) { +bool Par_MarkFromRootsClosure::do_bit(size_t offset) { if (_skip_bits > 0) { _skip_bits--; - return; + return true; } // convert offset into a HeapWord* HeapWord* addr = _bit_map->startWord() + offset; @@ -7041,10 +7075,11 @@ if (p->klass_or_null() == NULL || !p->is_parsable()) { // in the case of Clean-on-Enter optimization, redirty card // and avoid clearing card by increasing the threshold. - return; + return true; } } scan_oops_in_oop(addr); + return true; } void Par_MarkFromRootsClosure::scan_oops_in_oop(HeapWord* ptr) { @@ -7167,7 +7202,7 @@ // Should revisit to see if this should be restructured for // greater efficiency. -void MarkFromRootsVerifyClosure::do_bit(size_t offset) { +bool MarkFromRootsVerifyClosure::do_bit(size_t offset) { // convert offset into a HeapWord* HeapWord* addr = _verification_bm->startWord() + offset; assert(_verification_bm->endWord() && addr < _verification_bm->endWord(), @@ -7195,6 +7230,7 @@ new_oop->oop_iterate(&_pam_verify_closure); } assert(_mark_stack->isEmpty(), "tautology, emphasizing post-condition"); + return true; } PushAndMarkVerifyClosure::PushAndMarkVerifyClosure( @@ -7289,6 +7325,8 @@ _should_remember_klasses(collector->should_unload_classes()) { } +// Assumes thread-safe access by callers, who are +// responsible for mutual exclusion. void CMSCollector::lower_restart_addr(HeapWord* low) { assert(_span.contains(low), "Out of bounds addr"); if (_restart_addr == NULL) { @@ -7314,7 +7352,7 @@ // in CMSCollector's _restart_address. void Par_PushOrMarkClosure::handle_stack_overflow(HeapWord* lost) { // We need to do this under a mutex to prevent other - // workers from interfering with the expansion below. + // workers from interfering with the work done below. MutexLockerEx ml(_overflow_stack->par_lock(), Mutex::_no_safepoint_check_flag); // Remember the least grey address discarded @@ -7438,8 +7476,12 @@ // Grey object rescan during pre-cleaning and second checkpoint phases -- // the non-parallel version (the parallel version appears further below.) void PushAndMarkClosure::do_oop(oop obj) { - // If _concurrent_precleaning, ignore mark word verification - assert(obj->is_oop_or_null(_concurrent_precleaning), + // Ignore mark word verification. If during concurrent precleaning, + // the object monitor may be locked. If during the checkpoint + // phases, the object may already have been reached by a different + // path and may be at the end of the global overflow list (so + // the mark word may be NULL). + assert(obj->is_oop_or_null(true /* ignore mark word */), "expected an oop or NULL"); HeapWord* addr = (HeapWord*)obj; // Check if oop points into the CMS generation
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp Wed Oct 01 16:57:19 2008 -0700 +++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp Wed Oct 01 16:57:40 2008 -0700 @@ -1327,7 +1327,7 @@ CMSMarkStack* markStack, CMSMarkStack* revisitStack, bool should_yield, bool verifying = false); - void do_bit(size_t offset); + bool do_bit(size_t offset); void reset(HeapWord* addr); inline void do_yield_check(); @@ -1363,7 +1363,7 @@ CMSMarkStack* overflow_stack, CMSMarkStack* revisit_stack, bool should_yield); - void do_bit(size_t offset); + bool do_bit(size_t offset); inline void do_yield_check(); private: @@ -1411,7 +1411,7 @@ CMSBitMap* verification_bm, CMSBitMap* cms_bm, CMSMarkStack* mark_stack); - void do_bit(size_t offset); + bool do_bit(size_t offset); void reset(HeapWord* addr); }; @@ -1420,8 +1420,9 @@ // "empty" (i.e. the bit vector doesn't have any 1-bits). class FalseBitMapClosure: public BitMapClosure { public: - void do_bit(size_t offset) { + bool do_bit(size_t offset) { guarantee(false, "Should not have a 1 bit"); + return true; } };
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hotspot/src/share/vm/gc_implementation/g1/bufferingOopClosure.hpp Wed Oct 01 16:57:40 2008 -0700 @@ -0,0 +1,195 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +// A BufferingOops closure tries to separate out the cost of finding roots +// from the cost of applying closures to them. It maintains an array of +// ref-containing locations. Until the array is full, applying the closure +// to an oop* merely records that location in the array. Since this +// closure app cost is small, an elapsed timer can approximately attribute +// all of this cost to the cost of finding the roots. When the array fills +// up, the wrapped closure is applied to all elements, keeping track of +// this elapsed time of this process, and leaving the array empty. +// The caller must be sure to call "done" to process any unprocessed +// buffered entriess. + +class Generation; +class HeapRegion; + +class BufferingOopClosure: public OopClosure { +protected: + enum PrivateConstants { + BufferLength = 1024 + }; + + oop *_buffer[BufferLength]; + oop **_buffer_top; + oop **_buffer_curr; + + OopClosure *_oc; + double _closure_app_seconds; + + void process_buffer () { + + double start = os::elapsedTime(); + for (oop **curr = _buffer; curr < _buffer_curr; ++curr) { + _oc->do_oop(*curr); + } + _buffer_curr = _buffer; + _closure_app_seconds += (os::elapsedTime() - start); + } + +public: + virtual void do_oop(narrowOop* p) { + guarantee(false, "NYI"); + } + virtual void do_oop(oop *p) { + if (_buffer_curr == _buffer_top) { + process_buffer(); + } + + *_buffer_curr = p; + ++_buffer_curr; + } + void done () { + if (_buffer_curr > _buffer) { + process_buffer(); + } + } + double closure_app_seconds () { + return _closure_app_seconds; + } + BufferingOopClosure (OopClosure *oc) : + _oc(oc), + _buffer_curr(_buffer), _buffer_top(_buffer + BufferLength), + _closure_app_seconds(0.0) { } +}; + +class BufferingOopsInGenClosure: public OopsInGenClosure { + BufferingOopClosure _boc; + OopsInGenClosure* _oc; +public: + BufferingOopsInGenClosure(OopsInGenClosure *oc) : + _boc(oc), _oc(oc) {} + + virtual void do_oop(narrowOop* p) { + guarantee(false, "NYI"); + } + + virtual void do_oop(oop* p) { + assert(generation()->is_in_reserved(p), "Must be in!"); + _boc.do_oop(p); + } + + void done() { + _boc.done(); + } + + double closure_app_seconds () { + return _boc.closure_app_seconds(); + } + + void set_generation(Generation* gen) { + OopsInGenClosure::set_generation(gen); + _oc->set_generation(gen); + } + + void reset_generation() { + // Make sure we finish the current work with the current generation. + _boc.done(); + OopsInGenClosure::reset_generation(); + _oc->reset_generation(); + } + +}; + + +class BufferingOopsInHeapRegionClosure: public OopsInHeapRegionClosure { +private: + enum PrivateConstants { + BufferLength = 1024 + }; + + oop *_buffer[BufferLength]; + oop **_buffer_top; + oop **_buffer_curr; + + HeapRegion *_hr_buffer[BufferLength]; + HeapRegion **_hr_curr; + + OopsInHeapRegionClosure *_oc; + double _closure_app_seconds; + + void process_buffer () { + + assert((_hr_curr - _hr_buffer) == (_buffer_curr - _buffer), + "the two lengths should be the same"); + + double start = os::elapsedTime(); + HeapRegion **hr_curr = _hr_buffer; + HeapRegion *hr_prev = NULL; + for (oop **curr = _buffer; curr < _buffer_curr; ++curr) { + HeapRegion *region = *hr_curr; + if (region != hr_prev) { + _oc->set_region(region); + hr_prev = region; + } + _oc->do_oop(*curr); + ++hr_curr; + } + _buffer_curr = _buffer; + _hr_curr = _hr_buffer; + _closure_app_seconds += (os::elapsedTime() - start); + } + +public: + virtual void do_oop(narrowOop *p) { + guarantee(false, "NYI"); + } + + virtual void do_oop(oop *p) { + if (_buffer_curr == _buffer_top) { + assert(_hr_curr > _hr_buffer, "_hr_curr should be consistent with _buffer_curr"); + process_buffer(); + } + + *_buffer_curr = p; + ++_buffer_curr; + *_hr_curr = _from; + ++_hr_curr; + } + void done () { + if (_buffer_curr > _buffer) { + assert(_hr_curr > _hr_buffer, "_hr_curr should be consistent with _buffer_curr"); + process_buffer(); + } + } + double closure_app_seconds () { + return _closure_app_seconds; + } + BufferingOopsInHeapRegionClosure (OopsInHeapRegionClosure *oc) : + _oc(oc), + _buffer_curr(_buffer), _buffer_top(_buffer + BufferLength), + _hr_curr(_hr_buffer), + _closure_app_seconds(0.0) { } +};
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hotspot/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp Wed Oct 01 16:57:40 2008 -0700 @@ -0,0 +1,409 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +# include "incls/_precompiled.incl" +# include "incls/_collectionSetChooser.cpp.incl" + +CSetChooserCache::CSetChooserCache() { + for (int i = 0; i < CacheLength; ++i) + _cache[i] = NULL; + clear(); +} + +void CSetChooserCache::clear() { + _occupancy = 0; + _first = 0; + for (int i = 0; i < CacheLength; ++i) { + HeapRegion *hr = _cache[i]; + if (hr != NULL) + hr->set_sort_index(-1); + _cache[i] = NULL; + } +} + +#ifndef PRODUCT +bool CSetChooserCache::verify() { + int index = _first; + HeapRegion *prev = NULL; + for (int i = 0; i < _occupancy; ++i) { + guarantee(_cache[index] != NULL, "cache entry should not be empty"); + HeapRegion *hr = _cache[index]; + guarantee(!hr->is_young(), "should not be young!"); + if (prev != NULL) { + guarantee(prev->gc_efficiency() >= hr->gc_efficiency(), + "cache should be correctly ordered"); + } + guarantee(hr->sort_index() == get_sort_index(index), + "sort index should be correct"); + index = trim_index(index + 1); + prev = hr; + } + + for (int i = 0; i < (CacheLength - _occupancy); ++i) { + guarantee(_cache[index] == NULL, "cache entry should be empty"); + index = trim_index(index + 1); + } + + guarantee(index == _first, "we should have reached where we started from"); + return true; +} +#endif // PRODUCT + +void CSetChooserCache::insert(HeapRegion *hr) { + assert(!is_full(), "cache should not be empty"); + hr->calc_gc_efficiency(); + + int empty_index; + if (_occupancy == 0) { + empty_index = _first; + } else { + empty_index = trim_index(_first + _occupancy); + assert(_cache[empty_index] == NULL, "last slot should be empty"); + int last_index = trim_index(empty_index - 1); + HeapRegion *last = _cache[last_index]; + assert(last != NULL,"as the cache is not empty, last should not be empty"); + while (empty_index != _first && + last->gc_efficiency() < hr->gc_efficiency()) { + _cache[empty_index] = last; + last->set_sort_index(get_sort_index(empty_index)); + empty_index = last_index; + last_index = trim_index(last_index - 1); + last = _cache[last_index]; + } + } + _cache[empty_index] = hr; + hr->set_sort_index(get_sort_index(empty_index)); + + ++_occupancy; + assert(verify(), "cache should be consistent"); +} + +HeapRegion *CSetChooserCache::remove_first() { + if (_occupancy > 0) { + assert(_cache[_first] != NULL, "cache should have at least one region"); + HeapRegion *ret = _cache[_first]; + _cache[_first] = NULL; + ret->set_sort_index(-1); + --_occupancy; + _first = trim_index(_first + 1); + assert(verify(), "cache should be consistent"); + return ret; + } else { + return NULL; + } +} + +// this is a bit expensive... but we expect that it should not be called +// to often. +void CSetChooserCache::remove(HeapRegion *hr) { + assert(_occupancy > 0, "cache should not be empty"); + assert(hr->sort_index() < -1, "should already be in the cache"); + int index = get_index(hr->sort_index()); + assert(_cache[index] == hr, "index should be correct"); + int next_index = trim_index(index + 1); + int last_index = trim_index(_first + _occupancy - 1); + while (index != last_index) { + assert(_cache[next_index] != NULL, "should not be null"); + _cache[index] = _cache[next_index]; + _cache[index]->set_sort_index(get_sort_index(index)); + + index = next_index; + next_index = trim_index(next_index+1); + } + assert(index == last_index, "should have reached the last one"); + _cache[index] = NULL; + hr->set_sort_index(-1); + --_occupancy; + assert(verify(), "cache should be consistent"); +} + +static inline int orderRegions(HeapRegion* hr1, HeapRegion* hr2) { + if (hr1 == NULL) { + if (hr2 == NULL) return 0; + else return 1; + } else if (hr2 == NULL) { + return -1; + } + if (hr2->gc_efficiency() < hr1->gc_efficiency()) return -1; + else if (hr1->gc_efficiency() < hr2->gc_efficiency()) return 1; + else return 0; +} + +static int orderRegions(HeapRegion** hr1p, HeapRegion** hr2p) { + return orderRegions(*hr1p, *hr2p); +} + +CollectionSetChooser::CollectionSetChooser() : + // The line below is the worst bit of C++ hackery I've ever written + // (Detlefs, 11/23). You should think of it as equivalent to + // "_regions(100, true)": initialize the growable array and inform it + // that it should allocate its elem array(s) on the C heap. The first + // argument, however, is actually a comma expression (new-expr, 100). + // The purpose of the new_expr is to inform the growable array that it + // is *already* allocated on the C heap: it uses the placement syntax to + // keep it from actually doing any allocation. + _markedRegions((ResourceObj::operator new (sizeof(GrowableArray<HeapRegion*>), + (void*)&_markedRegions, + ResourceObj::C_HEAP), + 100), + true), + _curMarkedIndex(0), + _numMarkedRegions(0), + _unmarked_age_1_returned_as_new(false), + _first_par_unreserved_idx(0) +{} + + + +#ifndef PRODUCT +bool CollectionSetChooser::verify() { + int index = 0; + guarantee(_curMarkedIndex <= _numMarkedRegions, + "_curMarkedIndex should be within bounds"); + while (index < _curMarkedIndex) { + guarantee(_markedRegions.at(index++) == NULL, + "all entries before _curMarkedIndex should be NULL"); + } + HeapRegion *prev = NULL; + while (index < _numMarkedRegions) { + HeapRegion *curr = _markedRegions.at(index++); + if (curr != NULL) { + int si = curr->sort_index(); + guarantee(!curr->is_young(), "should not be young!"); + guarantee(si > -1 && si == (index-1), "sort index invariant"); + if (prev != NULL) { + guarantee(orderRegions(prev, curr) != 1, "regions should be sorted"); + } + prev = curr; + } + } + return _cache.verify(); +} +#endif + +bool +CollectionSetChooser::addRegionToCache() { + assert(!_cache.is_full(), "cache should not be full"); + + HeapRegion *hr = NULL; + while (hr == NULL && _curMarkedIndex < _numMarkedRegions) { + hr = _markedRegions.at(_curMarkedIndex++); + } + if (hr == NULL) + return false; + assert(!hr->is_young(), "should not be young!"); + assert(hr->sort_index() == _curMarkedIndex-1, "sort_index invariant"); + _markedRegions.at_put(hr->sort_index(), NULL); + _cache.insert(hr); + assert(!_cache.is_empty(), "cache should not be empty"); + assert(verify(), "cache should be consistent"); + return false; +} + +void +CollectionSetChooser::fillCache() { + while (!_cache.is_full() && addRegionToCache()) { + } +} + +void +CollectionSetChooser::sortMarkedHeapRegions() { + guarantee(_cache.is_empty(), "cache should be empty"); + // First trim any unused portion of the top in the parallel case. + if (_first_par_unreserved_idx > 0) { + if (G1PrintParCleanupStats) { + gclog_or_tty->print(" Truncating _markedRegions from %d to %d.\n", + _markedRegions.length(), _first_par_unreserved_idx); + } + assert(_first_par_unreserved_idx <= _markedRegions.length(), + "Or we didn't reserved enough length"); + _markedRegions.trunc_to(_first_par_unreserved_idx); + } + _markedRegions.sort(orderRegions); + assert(_numMarkedRegions <= _markedRegions.length(), "Requirement"); + assert(_numMarkedRegions == 0 + || _markedRegions.at(_numMarkedRegions-1) != NULL, + "Testing _numMarkedRegions"); + assert(_numMarkedRegions == _markedRegions.length() + || _markedRegions.at(_numMarkedRegions) == NULL, + "Testing _numMarkedRegions"); + if (G1PrintParCleanupStats) { + gclog_or_tty->print_cr(" Sorted %d marked regions.", _numMarkedRegions); + } + for (int i = 0; i < _numMarkedRegions; i++) { + assert(_markedRegions.at(i) != NULL, "Should be true by sorting!"); + _markedRegions.at(i)->set_sort_index(i); + if (G1PrintRegionLivenessInfo > 0) { + if (i == 0) gclog_or_tty->print_cr("Sorted marked regions:"); + if (i < G1PrintRegionLivenessInfo || + (_numMarkedRegions-i) < G1PrintRegionLivenessInfo) { + HeapRegion* hr = _markedRegions.at(i); + size_t u = hr->used(); + gclog_or_tty->print_cr(" Region %d: %d used, %d max live, %5.2f%%.", + i, u, hr->max_live_bytes(), + 100.0*(float)hr->max_live_bytes()/(float)u); + } + } + } + if (G1PolicyVerbose > 1) + printSortedHeapRegions(); + assert(verify(), "should now be sorted"); +} + +void +printHeapRegion(HeapRegion *hr) { + if (hr->isHumongous()) + gclog_or_tty->print("H: "); + if (hr->in_collection_set()) + gclog_or_tty->print("CS: "); + if (hr->popular()) + gclog_or_tty->print("pop: "); + gclog_or_tty->print_cr("Region " PTR_FORMAT " (%s%s) " + "[" PTR_FORMAT ", " PTR_FORMAT"] " + "Used: " SIZE_FORMAT "K, garbage: " SIZE_FORMAT "K.", + hr, hr->is_young() ? "Y " : " ", + hr->is_marked()? "M1" : "M0", + hr->bottom(), hr->end(), + hr->used()/K, hr->garbage_bytes()/K); +} + +void +CollectionSetChooser::addMarkedHeapRegion(HeapRegion* hr) { + assert(!hr->isHumongous(), + "Humongous regions shouldn't be added to the collection set"); + assert(!hr->is_young(), "should not be young!"); + _markedRegions.append(hr); + _numMarkedRegions++; + hr->calc_gc_efficiency(); +} + +void +CollectionSetChooser:: +prepareForAddMarkedHeapRegionsPar(size_t n_regions, size_t chunkSize) { + _first_par_unreserved_idx = 0; + size_t max_waste = ParallelGCThreads * chunkSize; + // it should be aligned with respect to chunkSize + size_t aligned_n_regions = + (n_regions + (chunkSize - 1)) / chunkSize * chunkSize; + assert( aligned_n_regions % chunkSize == 0, "should be aligned" ); + _markedRegions.at_put_grow((int)(aligned_n_regions + max_waste - 1), NULL); +} + +jint +CollectionSetChooser::getParMarkedHeapRegionChunk(jint n_regions) { + jint res = Atomic::add(n_regions, &_first_par_unreserved_idx); + assert(_markedRegions.length() > res + n_regions - 1, + "Should already have been expanded"); + return res - n_regions; +} + +void +CollectionSetChooser::setMarkedHeapRegion(jint index, HeapRegion* hr) { + assert(_markedRegions.at(index) == NULL, "precondition"); + assert(!hr->is_young(), "should not be young!"); + _markedRegions.at_put(index, hr); + hr->calc_gc_efficiency(); +} + +void +CollectionSetChooser::incNumMarkedHeapRegions(jint inc_by) { + (void)Atomic::add(inc_by, &_numMarkedRegions); +} + +void +CollectionSetChooser::clearMarkedHeapRegions(){ + for (int i = 0; i < _markedRegions.length(); i++) { + HeapRegion* r = _markedRegions.at(i); + if (r != NULL) r->set_sort_index(-1); + } + _markedRegions.clear(); + _curMarkedIndex = 0; + _numMarkedRegions = 0; + _cache.clear(); +}; + +void +CollectionSetChooser::updateAfterFullCollection() { + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + clearMarkedHeapRegions(); +} + +void +CollectionSetChooser::printSortedHeapRegions() { + gclog_or_tty->print_cr("Printing %d Heap Regions sorted by amount of known garbage", + _numMarkedRegions); + for (int i = 0; i < _markedRegions.length(); i++) { + printHeapRegion(_markedRegions.at(i)); + } + gclog_or_tty->print_cr("Done sorted heap region print"); +} + +void CollectionSetChooser::removeRegion(HeapRegion *hr) { + int si = hr->sort_index(); + assert(si == -1 || hr->is_marked(), "Sort index not valid."); + if (si > -1) { + assert(_markedRegions.at(si) == hr, "Sort index not valid." ); + _markedRegions.at_put(si, NULL); + } else if (si < -1) { + assert(_cache.region_in_cache(hr), "should be in the cache"); + _cache.remove(hr); + assert(hr->sort_index() == -1, "sort index invariant"); + } + hr->set_sort_index(-1); +} + +// if time_remaining < 0.0, then this method should try to return +// a region, whether it fits within the remaining time or not +HeapRegion* +CollectionSetChooser::getNextMarkedRegion(double time_remaining, + double avg_prediction) { + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + G1CollectorPolicy* g1p = g1h->g1_policy(); + fillCache(); + if (_cache.is_empty()) { + assert(_curMarkedIndex == _numMarkedRegions, + "if cache is empty, list should also be empty"); + return NULL; + } + + HeapRegion *hr = _cache.get_first(); + assert(hr != NULL, "if cache not empty, first entry should be non-null"); + double predicted_time = g1h->predict_region_elapsed_time_ms(hr, false); + + if (g1p->adaptive_young_list_length()) { + if (time_remaining - predicted_time < 0.0) { + g1h->check_if_region_is_too_expensive(predicted_time); + return NULL; + } + } else { + if (predicted_time > 2.0 * avg_prediction) { + return NULL; + } + } + + HeapRegion *hr2 = _cache.remove_first(); + assert(hr == hr2, "cache contents should not have changed"); + + return hr; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hotspot/src/share/vm/gc_implementation/g1/collectionSetChooser.hpp Wed Oct 01 16:57:40 2008 -0700 @@ -0,0 +1,138 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +// We need to sort heap regions by collection desirability. + +class CSetChooserCache { +private: + enum { + CacheLength = 16 + } PrivateConstants; + + HeapRegion* _cache[CacheLength]; + int _occupancy; // number of region in cache + int _first; // "first" region in the cache + + // adding CacheLength to deal with negative values + inline int trim_index(int index) { + return (index + CacheLength) % CacheLength; + } + + inline int get_sort_index(int index) { + return -index-2; + } + inline int get_index(int sort_index) { + return -sort_index-2; + } + +public: + CSetChooserCache(void); + + inline int occupancy(void) { return _occupancy; } + inline bool is_full() { return _occupancy == CacheLength; } + inline bool is_empty() { return _occupancy == 0; } + + void clear(void); + void insert(HeapRegion *hr); + HeapRegion *remove_first(void); + void remove (HeapRegion *hr); + inline HeapRegion *get_first(void) { + return _cache[_first]; + } + +#ifndef PRODUCT + bool verify (void); + bool region_in_cache(HeapRegion *hr) { + int sort_index = hr->sort_index(); + if (sort_index < -1) { + int index = get_index(sort_index); + guarantee(index < CacheLength, "should be within bounds"); + return _cache[index] == hr; + } else + return 0; + } +#endif // PRODUCT +}; + +class CollectionSetChooser: public CHeapObj { + + GrowableArray<HeapRegion*> _markedRegions; + int _curMarkedIndex; + int _numMarkedRegions; + CSetChooserCache _cache; + + // True iff last collection pause ran of out new "age 0" regions, and + // returned an "age 1" region. + bool _unmarked_age_1_returned_as_new; + + jint _first_par_unreserved_idx; + +public: + + HeapRegion* getNextMarkedRegion(double time_so_far, double avg_prediction); + + CollectionSetChooser(); + + void printSortedHeapRegions(); + + void sortMarkedHeapRegions(); + void fillCache(); + bool addRegionToCache(void); + void addMarkedHeapRegion(HeapRegion *hr); + + // Must be called before calls to getParMarkedHeapRegionChunk. + // "n_regions" is the number of regions, "chunkSize" the chunk size. + void prepareForAddMarkedHeapRegionsPar(size_t n_regions, size_t chunkSize); + // Returns the first index in a contiguous chunk of "n_regions" indexes + // that the calling thread has reserved. These must be set by the + // calling thread using "setMarkedHeapRegion" (to NULL if necessary). + jint getParMarkedHeapRegionChunk(jint n_regions); + // Set the marked array entry at index to hr. Careful to claim the index + // first if in parallel. + void setMarkedHeapRegion(jint index, HeapRegion* hr); + // Atomically increment the number of claimed regions by "inc_by". + void incNumMarkedHeapRegions(jint inc_by); + + void clearMarkedHeapRegions(); + + void updateAfterFullCollection(); + + // Ensure that "hr" is not a member of the marked region array or the cache + void removeRegion(HeapRegion* hr); + + bool unmarked_age_1_returned_as_new() { return _unmarked_age_1_returned_as_new; } + + // Returns true if the used portion of "_markedRegions" is properly + // sorted, otherwise asserts false. +#ifndef PRODUCT + bool verify(void); + bool regionProperlyOrdered(HeapRegion* r) { + int si = r->sort_index(); + return (si == -1) || + (si > -1 && _markedRegions.at(si) == r) || + (si < -1 && _cache.region_in_cache(r)); + } +#endif + +};
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp Wed Oct 01 16:57:40 2008 -0700 @@ -0,0 +1,355 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +#include "incls/_precompiled.incl" +#include "incls/_concurrentG1Refine.cpp.incl" + +bool ConcurrentG1Refine::_enabled = false; + +ConcurrentG1Refine::ConcurrentG1Refine() : + _pya(PYA_continue), _last_pya(PYA_continue), + _last_cards_during(), _first_traversal(false), + _card_counts(NULL), _cur_card_count_histo(NULL), _cum_card_count_histo(NULL), + _hot_cache(NULL), + _def_use_cache(false), _use_cache(false), + _n_periods(0), _total_cards(0), _total_travs(0) +{ + if (G1ConcRefine) { + _cg1rThread = new ConcurrentG1RefineThread(this); + assert(cg1rThread() != NULL, "Conc refine should have been created"); + assert(cg1rThread()->cg1r() == this, + "Conc refine thread should refer to this"); + } else { + _cg1rThread = NULL; + } +} + +void ConcurrentG1Refine::init() { + if (G1ConcRSLogCacheSize > 0 || G1ConcRSCountTraversals) { + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + _n_card_counts = + (unsigned) (g1h->g1_reserved_obj_bytes() >> CardTableModRefBS::card_shift); + _card_counts = NEW_C_HEAP_ARRAY(unsigned char, _n_card_counts); + for (size_t i = 0; i < _n_card_counts; i++) _card_counts[i] = 0; + ModRefBarrierSet* bs = g1h->mr_bs(); + guarantee(bs->is_a(BarrierSet::CardTableModRef), "Precondition"); + CardTableModRefBS* ctbs = (CardTableModRefBS*)bs; + _ct_bot = ctbs->byte_for_const(g1h->reserved_region().start()); + if (G1ConcRSCountTraversals) { + _cur_card_count_histo = NEW_C_HEAP_ARRAY(unsigned, 256); + _cum_card_count_histo = NEW_C_HEAP_ARRAY(unsigned, 256); + for (int i = 0; i < 256; i++) { + _cur_card_count_histo[i] = 0; + _cum_card_count_histo[i] = 0; + } + } + } + if (G1ConcRSLogCacheSize > 0) { + _def_use_cache = true; + _use_cache = true; + _hot_cache_size = (1 << G1ConcRSLogCacheSize); + _hot_cache = NEW_C_HEAP_ARRAY(jbyte*, _hot_cache_size); + _n_hot = 0; + _hot_cache_idx = 0; + } +} + +ConcurrentG1Refine::~ConcurrentG1Refine() { + if (G1ConcRSLogCacheSize > 0 || G1ConcRSCountTraversals) { + assert(_card_counts != NULL, "Logic"); + FREE_C_HEAP_ARRAY(unsigned char, _card_counts); + assert(_cur_card_count_histo != NULL, "Logic"); + FREE_C_HEAP_ARRAY(unsigned, _cur_card_count_histo); + assert(_cum_card_count_histo != NULL, "Logic"); + FREE_C_HEAP_ARRAY(unsigned, _cum_card_count_histo); + } + if (G1ConcRSLogCacheSize > 0) { + assert(_hot_cache != NULL, "Logic"); + FREE_C_HEAP_ARRAY(jbyte*, _hot_cache); + } +} + +bool ConcurrentG1Refine::refine() { + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + unsigned cards_before = g1h->g1_rem_set()->conc_refine_cards(); + clear_hot_cache(); // Any previous values in this are now invalid. + g1h->g1_rem_set()->concurrentRefinementPass(this); + _traversals++; + unsigned cards_after = g1h->g1_rem_set()->conc_refine_cards(); + unsigned cards_during = cards_after-cards_before; + // If this is the first traversal in the current enabling + // and we did some cards, or if the number of cards found is decreasing + // sufficiently quickly, then keep going. Otherwise, sleep a while. + bool res = + (_first_traversal && cards_during > 0) + || + (!_first_traversal && cards_during * 3 < _last_cards_during * 2); + _last_cards_during = cards_during; + _first_traversal = false; + return res; +} + +void ConcurrentG1Refine::enable() { + MutexLocker x(G1ConcRefine_mon); + if (!_enabled) { + _enabled = true; + _first_traversal = true; _last_cards_during = 0; + G1ConcRefine_mon->notify_all(); + } +} + +unsigned ConcurrentG1Refine::disable() { + MutexLocker x(G1ConcRefine_mon); + if (_enabled) { + _enabled = false; + return _traversals; + } else { + return 0; + } +} + +void ConcurrentG1Refine::wait_for_ConcurrentG1Refine_enabled() { + G1ConcRefine_mon->lock(); + while (!_enabled) { + G1ConcRefine_mon->wait(Mutex::_no_safepoint_check_flag); + } + G1ConcRefine_mon->unlock(); + _traversals = 0; +}; + +void ConcurrentG1Refine::set_pya_restart() { + // If we're using the log-based RS barrier, the above will cause + // in-progress traversals of completed log buffers to quit early; we will + // also abandon all other buffers. + if (G1RSBarrierUseQueue) { + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); + dcqs.abandon_logs(); + if (_cg1rThread->do_traversal()) { + _pya = PYA_restart; + } else { + _cg1rThread->set_do_traversal(true); + // Reset the post-yield actions. + _pya = PYA_continue; + _last_pya = PYA_continue; + } + } else { + _pya = PYA_restart; + } +} + +void ConcurrentG1Refine::set_pya_cancel() { + _pya = PYA_cancel; +} + +PostYieldAction ConcurrentG1Refine::get_pya() { + if (_pya != PYA_continue) { + jint val = _pya; + while (true) { + jint val_read = Atomic::cmpxchg(PYA_continue, &_pya, val); + if (val_read == val) { + PostYieldAction res = (PostYieldAction)val; + assert(res != PYA_continue, "Only the refine thread should reset."); + _last_pya = res; + return res; + } else { + val = val_read; + } + } + } + // QQQ WELL WHAT DO WE RETURN HERE??? + // make up something! + return PYA_continue; +} + +PostYieldAction ConcurrentG1Refine::get_last_pya() { + PostYieldAction res = _last_pya; + _last_pya = PYA_continue; + return res; +} + +bool ConcurrentG1Refine::do_traversal() { + return _cg1rThread->do_traversal(); +} + +int ConcurrentG1Refine::add_card_count(jbyte* card_ptr) { + size_t card_num = (card_ptr - _ct_bot); + guarantee(0 <= card_num && card_num < _n_card_counts, "Bounds"); + unsigned char cnt = _card_counts[card_num]; + if (cnt < 255) _card_counts[card_num]++; + return cnt; + _total_travs++; +} + +jbyte* ConcurrentG1Refine::cache_insert(jbyte* card_ptr) { + int count = add_card_count(card_ptr); + // Count previously unvisited cards. + if (count == 0) _total_cards++; + // We'll assume a traversal unless we store it in the cache. + if (count < G1ConcRSHotCardLimit) { + _total_travs++; + return card_ptr; + } + // Otherwise, it's hot. + jbyte* res = NULL; + MutexLockerEx x(HotCardCache_lock, Mutex::_no_safepoint_check_flag); + if (_n_hot == _hot_cache_size) { + _total_travs++; + res = _hot_cache[_hot_cache_idx]; + _n_hot--; + } + // Now _n_hot < _hot_cache_size, and we can insert at _hot_cache_idx. + _hot_cache[_hot_cache_idx] = card_ptr; + _hot_cache_idx++; + if (_hot_cache_idx == _hot_cache_size) _hot_cache_idx = 0; + _n_hot++; + return res; +} + + +void ConcurrentG1Refine::clean_up_cache(int worker_i, G1RemSet* g1rs) { + assert(!use_cache(), "cache should be disabled"); + int start_ind = _hot_cache_idx-1; + for (int i = 0; i < _n_hot; i++) { + int ind = start_ind - i; + if (ind < 0) ind = ind + _hot_cache_size; + jbyte* entry = _hot_cache[ind]; + if (entry != NULL) { + g1rs->concurrentRefineOneCard(entry, worker_i); + } + } + _n_hot = 0; + _hot_cache_idx = 0; +} + +void ConcurrentG1Refine::clear_and_record_card_counts() { + if (G1ConcRSLogCacheSize == 0 && !G1ConcRSCountTraversals) return; + _n_periods++; + if (G1ConcRSCountTraversals) { + for (size_t i = 0; i < _n_card_counts; i++) { + unsigned char bucket = _card_counts[i]; + _cur_card_count_histo[bucket]++; + _card_counts[i] = 0; + } + gclog_or_tty->print_cr("Card counts:"); + for (int i = 0; i < 256; i++) { + if (_cur_card_count_histo[i] > 0) { + gclog_or_tty->print_cr(" %3d: %9d", i, _cur_card_count_histo[i]); + _cum_card_count_histo[i] += _cur_card_count_histo[i]; + _cur_card_count_histo[i] = 0; + } + } + } else { + assert(G1ConcRSLogCacheSize > 0, "Logic"); + Copy::fill_to_words((HeapWord*)(&_card_counts[0]), + _n_card_counts / HeapWordSize); + } +} + +void +ConcurrentG1Refine:: +print_card_count_histo_range(unsigned* histo, int from, int to, + float& cum_card_pct, + float& cum_travs_pct) { + unsigned cards = 0; + unsigned travs = 0; + guarantee(to <= 256, "Precondition"); + for (int i = from; i < to-1; i++) { + cards += histo[i]; + travs += histo[i] * i; + } + if (to == 256) { + unsigned histo_card_sum = 0; + unsigned histo_trav_sum = 0; + for (int i = 1; i < 255; i++) { + histo_trav_sum += histo[i] * i; + } + cards += histo[255]; + // correct traversals for the last one. + unsigned travs_255 = (unsigned) (_total_travs - histo_trav_sum); + travs += travs_255; + + } else { + cards += histo[to-1]; + travs += histo[to-1] * (to-1); + } + float fperiods = (float)_n_periods; + float f_tot_cards = (float)_total_cards/fperiods; + float f_tot_travs = (float)_total_travs/fperiods; + if (cards > 0) { + float fcards = (float)cards/fperiods; + float ftravs = (float)travs/fperiods; + if (to == 256) { + gclog_or_tty->print(" %4d- %10.2f%10.2f", from, fcards, ftravs); + } else { + gclog_or_tty->print(" %4d-%4d %10.2f%10.2f", from, to-1, fcards, ftravs); + } + float pct_cards = fcards*100.0/f_tot_cards; + cum_card_pct += pct_cards; + float pct_travs = ftravs*100.0/f_tot_travs; + cum_travs_pct += pct_travs; + gclog_or_tty->print_cr("%10.2f%10.2f%10.2f%10.2f", + pct_cards, cum_card_pct, + pct_travs, cum_travs_pct); + } +} + +void ConcurrentG1Refine::print_final_card_counts() { + if (!G1ConcRSCountTraversals) return; + + gclog_or_tty->print_cr("Did %d total traversals of %d distinct cards.", + _total_travs, _total_cards); + float fperiods = (float)_n_periods; + gclog_or_tty->print_cr(" This is an average of %8.2f traversals, %8.2f cards, " + "per collection.", (float)_total_travs/fperiods, + (float)_total_cards/fperiods); + gclog_or_tty->print_cr(" This is an average of %8.2f traversals/distinct " + "dirty card.\n", + _total_cards > 0 ? + (float)_total_travs/(float)_total_cards : 0.0); + + + gclog_or_tty->print_cr("Histogram:\n\n%10s %10s%10s%10s%10s%10s%10s", + "range", "# cards", "# travs", "% cards", "(cum)", + "% travs", "(cum)"); + gclog_or_tty->print_cr("------------------------------------------------------------" + "-------------"); + float cum_cards_pct = 0.0; + float cum_travs_pct = 0.0; + for (int i = 1; i < 10; i++) { + print_card_count_histo_range(_cum_card_count_histo, i, i+1, + cum_cards_pct, cum_travs_pct); + } + for (int i = 10; i < 100; i += 10) { + print_card_count_histo_range(_cum_card_count_histo, i, i+10, + cum_cards_pct, cum_travs_pct); + } + print_card_count_histo_range(_cum_card_count_histo, 100, 150, + cum_cards_pct, cum_travs_pct); + print_card_count_histo_range(_cum_card_count_histo, 150, 200, + cum_cards_pct, cum_travs_pct); + print_card_count_histo_range(_cum_card_count_histo, 150, 255, + cum_cards_pct, cum_travs_pct); + print_card_count_histo_range(_cum_card_count_histo, 255, 256, + cum_cards_pct, cum_travs_pct); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp Wed Oct 01 16:57:40 2008 -0700 @@ -0,0 +1,132 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +// Forward decl +class ConcurrentG1RefineThread; +class G1RemSet; + +// What to do after a yield: +enum PostYieldAction { + PYA_continue, // Continue the traversal + PYA_restart, // Restart + PYA_cancel // It's been completed by somebody else: cancel. +}; + +class ConcurrentG1Refine { + ConcurrentG1RefineThread* _cg1rThread; + + volatile jint _pya; + PostYieldAction _last_pya; + + static bool _enabled; // Protected by G1ConcRefine_mon. + unsigned _traversals; + + // Number of cards processed during last refinement traversal. + unsigned _first_traversal; + unsigned _last_cards_during; + + // The cache for card refinement. + bool _use_cache; + bool _def_use_cache; + size_t _n_periods; + size_t _total_cards; + size_t _total_travs; + + unsigned char* _card_counts; + unsigned _n_card_counts; + const jbyte* _ct_bot; + unsigned* _cur_card_count_histo; + unsigned* _cum_card_count_histo; + jbyte** _hot_cache; + int _hot_cache_size; + int _n_hot; + int _hot_cache_idx; + + // Returns the count of this card after incrementing it. + int add_card_count(jbyte* card_ptr); + + void print_card_count_histo_range(unsigned* histo, int from, int to, + float& cum_card_pct, + float& cum_travs_pct); + public: + ConcurrentG1Refine(); + ~ConcurrentG1Refine(); + + void init(); // Accomplish some initialization that has to wait. + + // Enabled Conc refinement, waking up thread if necessary. + void enable(); + + // Returns the number of traversals performed since this refiner was enabled. + unsigned disable(); + + // Requires G1ConcRefine_mon to be held. + bool enabled() { return _enabled; } + + // Returns only when G1 concurrent refinement has been enabled. + void wait_for_ConcurrentG1Refine_enabled(); + + // Do one concurrent refinement pass over the card table. Returns "true" + // if heuristics determine that another pass should be done immediately. + bool refine(); + + // Indicate that an in-progress refinement pass should start over. + void set_pya_restart(); + // Indicate that an in-progress refinement pass should quit. + void set_pya_cancel(); + + // Get the appropriate post-yield action. Also sets last_pya. + PostYieldAction get_pya(); + + // The last PYA read by "get_pya". + PostYieldAction get_last_pya(); + + bool do_traversal(); + + ConcurrentG1RefineThread* cg1rThread() { return _cg1rThread; } + + // If this is the first entry for the slot, writes into the cache and + // returns NULL. If it causes an eviction, returns the evicted pointer. + // Otherwise, its a cache hit, and returns NULL. + jbyte* cache_insert(jbyte* card_ptr); + + // Process the cached entries. + void clean_up_cache(int worker_i, G1RemSet* g1rs); + + // Discard entries in the hot cache. + void clear_hot_cache() { + _hot_cache_idx = 0; _n_hot = 0; + } + + bool hot_cache_is_empty() { return _n_hot == 0; } + + bool use_cache() { return _use_cache; } + void set_use_cache(bool b) { + if (b) _use_cache = _def_use_cache; + else _use_cache = false; + } + + void clear_and_record_card_counts(); + void print_final_card_counts(); +};
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp Wed Oct 01 16:57:40 2008 -0700 @@ -0,0 +1,246 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +#include "incls/_precompiled.incl" +#include "incls/_concurrentG1RefineThread.cpp.incl" + +// ======= Concurrent Mark Thread ======== + +// The CM thread is created when the G1 garbage collector is used + +ConcurrentG1RefineThread:: +ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r) : + ConcurrentGCThread(), + _cg1r(cg1r), + _started(false), + _in_progress(false), + _do_traversal(false), + _vtime_accum(0.0), + _co_tracker(G1CRGroup), + _interval_ms(5.0) +{ + create_and_start(); +} + +const long timeout = 200; // ms. + +void ConcurrentG1RefineThread::traversalBasedRefinement() { + _cg1r->wait_for_ConcurrentG1Refine_enabled(); + MutexLocker x(G1ConcRefine_mon); + while (_cg1r->enabled()) { + MutexUnlocker ux(G1ConcRefine_mon); + ResourceMark rm; + HandleMark hm; + + if (TraceG1Refine) gclog_or_tty->print_cr("G1-Refine starting pass"); + _sts.join(); + bool no_sleep = _cg1r->refine(); + _sts.leave(); + if (!no_sleep) { + MutexLockerEx x(CGC_lock, Mutex::_no_safepoint_check_flag); + // We do this only for the timeout; we don't expect this to be signalled. + CGC_lock->wait(Mutex::_no_safepoint_check_flag, timeout); + } + } +} + +void ConcurrentG1RefineThread::queueBasedRefinement() { + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); + // Wait for completed log buffers to exist. + { + MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag); + while (!_do_traversal && !dcqs.process_completed_buffers() && + !_should_terminate) { + DirtyCardQ_CBL_mon->wait(Mutex::_no_safepoint_check_flag); + } + } + + if (_should_terminate) { + return; + } + + // Now we take them off (this doesn't hold locks while it applies + // closures.) (If we did a full collection, then we'll do a full + // traversal. + _sts.join(); + if (_do_traversal) { + (void)_cg1r->refine(); + switch (_cg1r->get_last_pya()) { + case PYA_cancel: case PYA_continue: + // Continue was caught and handled inside "refine". If it's still + // "continue" when we get here, we're done. + _do_traversal = false; + break; + case PYA_restart: + assert(_do_traversal, "Because of Full GC."); + break; + } + } else { + int n_logs = 0; + int lower_limit = 0; + double start_vtime_sec; // only used when G1SmoothConcRefine is on + int prev_buffer_num; // only used when G1SmoothConcRefine is on + + if (G1SmoothConcRefine) { + lower_limit = 0; + start_vtime_sec = os::elapsedVTime(); + prev_buffer_num = (int) dcqs.completed_buffers_num(); + } else { + lower_limit = DCQBarrierProcessCompletedThreshold / 4; // For now. + } + while (dcqs.apply_closure_to_completed_buffer(0, lower_limit)) { + double end_vtime_sec; + double elapsed_vtime_sec; + int elapsed_vtime_ms; + int curr_buffer_num; + + if (G1SmoothConcRefine) { + end_vtime_sec = os::elapsedVTime(); + elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; + elapsed_vtime_ms = (int) (elapsed_vtime_sec * 1000.0); + curr_buffer_num = (int) dcqs.completed_buffers_num(); + + if (curr_buffer_num > prev_buffer_num || + curr_buffer_num > DCQBarrierProcessCompletedThreshold) { + decreaseInterval(elapsed_vtime_ms); + } else if (curr_buffer_num < prev_buffer_num) { + increaseInterval(elapsed_vtime_ms); + } + } + + sample_young_list_rs_lengths(); + _co_tracker.update(false); + + if (G1SmoothConcRefine) { + start_vtime_sec = os::elapsedVTime(); + prev_buffer_num = curr_buffer_num; + + _sts.leave(); + os::sleep(Thread::current(), (jlong) _interval_ms, false); + _sts.join(); + } + + n_logs++; + } + // Make sure we harvest the PYA, if any. + (void)_cg1r->get_pya(); + } + _sts.leave(); +} + +void ConcurrentG1RefineThread::sample_young_list_rs_lengths() { + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + G1CollectorPolicy* g1p = g1h->g1_policy(); + if (g1p->adaptive_young_list_length()) { + int regions_visited = 0; + + g1h->young_list_rs_length_sampling_init(); + while (g1h->young_list_rs_length_sampling_more()) { + g1h->young_list_rs_length_sampling_next(); + ++regions_visited; + + // we try to yield every time we visit 10 regions + if (regions_visited == 10) { + if (_sts.should_yield()) { + _sts.yield("G1 refine"); + // we just abandon the iteration + break; + } + regions_visited = 0; + } + } + + g1p->check_prediction_validity(); + } +} + +void ConcurrentG1RefineThread::run() { + initialize_in_thread(); + _vtime_start = os::elapsedVTime(); + wait_for_universe_init(); + + _co_tracker.enable(); + _co_tracker.start(); + + while (!_should_terminate) { + // wait until started is set. + if (G1RSBarrierUseQueue) { + queueBasedRefinement(); + } else { + traversalBasedRefinement(); + } + _sts.join(); + _co_tracker.update(); + _sts.leave(); + if (os::supports_vtime()) { + _vtime_accum = (os::elapsedVTime() - _vtime_start); + } else { + _vtime_accum = 0.0; + } + } + _sts.join(); + _co_tracker.update(true); + _sts.leave(); + assert(_should_terminate, "just checking"); + + terminate(); +} + + +void ConcurrentG1RefineThread::yield() { + if (TraceG1Refine) gclog_or_tty->print_cr("G1-Refine-yield"); + _sts.yield("G1 refine"); + if (TraceG1Refine) gclog_or_tty->print_cr("G1-Refine-yield-end"); +} + +void ConcurrentG1RefineThread::stop() { + // it is ok to take late safepoints here, if needed + { + MutexLockerEx mu(Terminator_lock); + _should_terminate = true; + } + + { + MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag); + DirtyCardQ_CBL_mon->notify_all(); + } + + { + MutexLockerEx mu(Terminator_lock); + while (!_has_terminated) { + Terminator_lock->wait(); + } + } + if (TraceG1Refine) gclog_or_tty->print_cr("G1-Refine-stop"); +} + +void ConcurrentG1RefineThread::print() { + gclog_or_tty->print("\"Concurrent G1 Refinement Thread\" "); + Thread::print(); + gclog_or_tty->cr(); +} + +void ConcurrentG1RefineThread::set_do_traversal(bool b) { + _do_traversal = b; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.hpp Wed Oct 01 16:57:40 2008 -0700 @@ -0,0 +1,104 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +// Forward Decl. +class ConcurrentG1Refine; + +// The G1 Concurrent Refinement Thread (could be several in the future). + +class ConcurrentG1RefineThread: public ConcurrentGCThread { + friend class VMStructs; + friend class G1CollectedHeap; + + double _vtime_start; // Initial virtual time. + double _vtime_accum; // Initial virtual time. + + public: + virtual void run(); + + private: + ConcurrentG1Refine* _cg1r; + bool _started; + bool _in_progress; + volatile bool _restart; + + COTracker _co_tracker; + double _interval_ms; + + bool _do_traversal; + + void decreaseInterval(int processing_time_ms) { + double min_interval_ms = (double) processing_time_ms; + _interval_ms = 0.8 * _interval_ms; + if (_interval_ms < min_interval_ms) + _interval_ms = min_interval_ms; + } + void increaseInterval(int processing_time_ms) { + double max_interval_ms = 9.0 * (double) processing_time_ms; + _interval_ms = 1.1 * _interval_ms; + if (max_interval_ms > 0 && _interval_ms > max_interval_ms) + _interval_ms = max_interval_ms; + } + + void sleepBeforeNextCycle(); + + void traversalBasedRefinement(); + + void queueBasedRefinement(); + + // For use by G1CollectedHeap, which is a friend. + static SuspendibleThreadSet* sts() { return &_sts; } + + public: + // Constructor + ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r); + + // Printing + void print(); + + // Total virtual time so far. + double vtime_accum() { return _vtime_accum; } + + ConcurrentG1Refine* cg1r() { return _cg1r; } + + + void set_started() { _started = true; } + void clear_started() { _started = false; } + bool started() { return _started; } + + void set_in_progress() { _in_progress = true; } + void clear_in_progress() { _in_progress = false; } + bool in_progress() { return _in_progress; } + + void set_do_traversal(bool b); + bool do_traversal() { return _do_traversal; } + + void sample_young_list_rs_lengths(); + + // Yield for GC + void yield(); + + // shutdown + static void stop(); +};
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp Wed Oct 01 16:57:40 2008 -0700 @@ -0,0 +1,3979 @@ +/* + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +#include "incls/_precompiled.incl" +#include "incls/_concurrentMark.cpp.incl" + +// +// CMS Bit Map Wrapper + +CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter): + _bm((uintptr_t*)NULL,0), + _shifter(shifter) { + _bmStartWord = (HeapWord*)(rs.base()); + _bmWordSize = rs.size()/HeapWordSize; // rs.size() is in bytes + ReservedSpace brs(ReservedSpace::allocation_align_size_up( + (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1)); + + guarantee(brs.is_reserved(), "couldn't allocate CMS bit map"); + // For now we'll just commit all of the bit map up fromt. + // Later on we'll try to be more parsimonious with swap. + guarantee(_virtual_space.initialize(brs, brs.size()), + "couldn't reseve backing store for CMS bit map"); + assert(_virtual_space.committed_size() == brs.size(), + "didn't reserve backing store for all of CMS bit map?"); + _bm.set_map((uintptr_t*)_virtual_space.low()); + assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >= + _bmWordSize, "inconsistency in bit map sizing"); + _bm.set_size(_bmWordSize >> _shifter); +} + +HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr, + HeapWord* limit) const { + // First we must round addr *up* to a possible object boundary. + addr = (HeapWord*)align_size_up((intptr_t)addr, + HeapWordSize << _shifter); + size_t addrOffset = heapWordToOffset(addr); + if (limit == NULL) limit = _bmStartWord + _bmWordSize; + size_t limitOffset = heapWordToOffset(limit); + size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); + HeapWord* nextAddr = offsetToHeapWord(nextOffset); + assert(nextAddr >= addr, "get_next_one postcondition"); + assert(nextAddr == limit || isMarked(nextAddr), + "get_next_one postcondition"); + return nextAddr; +} + +HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr, + HeapWord* limit) const { + size_t addrOffset = heapWordToOffset(addr); + if (limit == NULL) limit = _bmStartWord + _bmWordSize; + size_t limitOffset = heapWordToOffset(limit); + size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset); + HeapWord* nextAddr = offsetToHeapWord(nextOffset); + assert(nextAddr >= addr, "get_next_one postcondition"); + assert(nextAddr == limit || !isMarked(nextAddr), + "get_next_one postcondition"); + return nextAddr; +} + +int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const { + assert((diff & ((1 << _shifter) - 1)) == 0, "argument check"); + return (int) (diff >> _shifter); +} + +bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) { + HeapWord* left = MAX2(_bmStartWord, mr.start()); + HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end()); + if (right > left) { + // Right-open interval [leftOffset, rightOffset). + return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right)); + } else { + return true; + } +} + +void CMBitMapRO::mostly_disjoint_range_union(BitMap* from_bitmap, + size_t from_start_index, + HeapWord* to_start_word, + size_t word_num) { + _bm.mostly_disjoint_range_union(from_bitmap, + from_start_index, + heapWordToOffset(to_start_word), + word_num); +} + +#ifndef PRODUCT +bool CMBitMapRO::covers(ReservedSpace rs) const { + // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); + assert(((size_t)_bm.size() * (1 << _shifter)) == _bmWordSize, + "size inconsistency"); + return _bmStartWord == (HeapWord*)(rs.base()) && + _bmWordSize == rs.size()>>LogHeapWordSize; +} +#endif + +void CMBitMap::clearAll() { + _bm.clear(); + return; +} + +void CMBitMap::markRange(MemRegion mr) { + mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); + assert(!mr.is_empty(), "unexpected empty region"); + assert((offsetToHeapWord(heapWordToOffset(mr.end())) == + ((HeapWord *) mr.end())), + "markRange memory region end is not card aligned"); + // convert address range into offset range + _bm.at_put_range(heapWordToOffset(mr.start()), + heapWordToOffset(mr.end()), true); +} + +void CMBitMap::clearRange(MemRegion mr) { + mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); + assert(!mr.is_empty(), "unexpected empty region"); + // convert address range into offset range + _bm.at_put_range(heapWordToOffset(mr.start()), + heapWordToOffset(mr.end()), false); +} + +MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr, + HeapWord* end_addr) { + HeapWord* start = getNextMarkedWordAddress(addr); + start = MIN2(start, end_addr); + HeapWord* end = getNextUnmarkedWordAddress(start); + end = MIN2(end, end_addr); + assert(start <= end, "Consistency check"); + MemRegion mr(start, end); + if (!mr.is_empty()) { + clearRange(mr); + } + return mr; +} + +CMMarkStack::CMMarkStack(ConcurrentMark* cm) : + _base(NULL), _cm(cm) +#ifdef ASSERT + , _drain_in_progress(false) + , _drain_in_progress_yields(false) +#endif +{} + +void CMMarkStack::allocate(size_t size) { + _base = NEW_C_HEAP_ARRAY(oop, size); + if (_base == NULL) + vm_exit_during_initialization("Failed to allocate " + "CM region mark stack"); + _index = 0; + // QQQQ cast ... + _capacity = (jint) size; + _oops_do_bound = -1; + NOT_PRODUCT(_max_depth = 0); +} + +CMMarkStack::~CMMarkStack() { + if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base); +} + +void CMMarkStack::par_push(oop ptr) { + while (true) { + if (isFull()) { + _overflow = true; + return; + } + // Otherwise... + jint index = _index; + jint next_index = index+1; + jint res = Atomic::cmpxchg(next_index, &_index, index); + if (res == index) { + _base[index] = ptr; + // Note that we don't maintain this atomically. We could, but it + // doesn't seem necessary. + NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); + return; + } + // Otherwise, we need to try again. + } +} + +void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) { + while (true) { + if (isFull()) { + _overflow = true; + return; + } + // Otherwise... + jint index = _index; + jint next_index = index + n; + if (next_index > _capacity) { + _overflow = true; + return; + } + jint res = Atomic::cmpxchg(next_index, &_index, index); + if (res == index) { + for (int i = 0; i < n; i++) { + int ind = index + i; + assert(ind < _capacity, "By overflow test above."); + _base[ind] = ptr_arr[i]; + } + NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); + return; + } + // Otherwise, we need to try again. + } +} + + +void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { + MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); + jint start = _index; + jint next_index = start + n; + if (next_index > _capacity) { + _overflow = true; + return; + } + // Otherwise. + _index = next_index; + for (int i = 0; i < n; i++) { + int ind = start + i; + guarantee(ind < _capacity, "By overflow test above."); + _base[ind] = ptr_arr[i]; + } +} + + +bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { + MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); + jint index = _index; + if (index == 0) { + *n = 0; + return false; + } else { + int k = MIN2(max, index); + jint new_ind = index - k; + for (int j = 0; j < k; j++) { + ptr_arr[j] = _base[new_ind + j]; + } + _index = new_ind; + *n = k; + return true; + } +} + + +CMRegionStack::CMRegionStack() : _base(NULL) {} + +void CMRegionStack::allocate(size_t size) { + _base = NEW_C_HEAP_ARRAY(MemRegion, size); + if (_base == NULL) + vm_exit_during_initialization("Failed to allocate " + "CM region mark stack"); + _index = 0; + // QQQQ cast ... + _capacity = (jint) size; +} + +CMRegionStack::~CMRegionStack() { + if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base); +} + +void CMRegionStack::push(MemRegion mr) { + assert(mr.word_size() > 0, "Precondition"); + while (true) { + if (isFull()) { + _overflow = true; + return; + } + // Otherwise... + jint index = _index; + jint next_index = index+1; + jint res = Atomic::cmpxchg(next_index, &_index, index); + if (res == index) { + _base[index] = mr; + return; + } + // Otherwise, we need to try again. + } +} + +MemRegion CMRegionStack::pop() { + while (true) { + // Otherwise... + jint index = _index; + + if (index == 0) { + return MemRegion(); + } + jint next_index = index-1; + jint res = Atomic::cmpxchg(next_index, &_index, index); + if (res == index) { + MemRegion mr = _base[next_index]; + if (mr.start() != NULL) { + tmp_guarantee_CM( mr.end() != NULL, "invariant" ); + tmp_guarantee_CM( mr.word_size() > 0, "invariant" ); + return mr; + } else { + // that entry was invalidated... let's skip it + tmp_guarantee_CM( mr.end() == NULL, "invariant" ); + } + } + // Otherwise, we need to try again. + } +} + +bool CMRegionStack::invalidate_entries_into_cset() { + bool result = false; + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + for (int i = 0; i < _oops_do_bound; ++i) { + MemRegion mr = _base[i]; + if (mr.start() != NULL) { + tmp_guarantee_CM( mr.end() != NULL, "invariant"); + tmp_guarantee_CM( mr.word_size() > 0, "invariant" ); + HeapRegion* hr = g1h->heap_region_containing(mr.start()); + tmp_guarantee_CM( hr != NULL, "invariant" ); + if (hr->in_collection_set()) { + // The region points into the collection set + _base[i] = MemRegion(); + result = true; + } + } else { + // that entry was invalidated... let's skip it + tmp_guarantee_CM( mr.end() == NULL, "invariant" ); + } + } + return result; +} + +template<class OopClosureClass> +bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) { + assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after + || SafepointSynchronize::is_at_safepoint(), + "Drain recursion must be yield-safe."); + bool res = true; + debug_only(_drain_in_progress = true); + debug_only(_drain_in_progress_yields = yield_after); + while (!isEmpty()) { + oop newOop = pop(); + assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop"); + assert(newOop->is_oop(), "Expected an oop"); + assert(bm == NULL || bm->isMarked((HeapWord*)newOop), + "only grey objects on this stack"); + // iterate over the oops in this oop, marking and pushing + // the ones in CMS generation. + newOop->oop_iterate(cl); + if (yield_after && _cm->do_yield_check()) { + res = false; break; + } + } + debug_only(_drain_in_progress = false); + return res; +} + +void CMMarkStack::oops_do(OopClosure* f) { + if (_index == 0) return; + assert(_oops_do_bound != -1 && _oops_do_bound <= _index, + "Bound must be set."); + for (int i = 0; i < _oops_do_bound; i++) { + f->do_oop(&_base[i]); + } + _oops_do_bound = -1; +} + +bool ConcurrentMark::not_yet_marked(oop obj) const { + return (_g1h->is_obj_ill(obj) + || (_g1h->is_in_permanent(obj) + && !nextMarkBitMap()->isMarked((HeapWord*)obj))); +} + +#ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away +#pragma warning( disable:4355 ) // 'this' : used in base member initializer list +#endif // _MSC_VER + +ConcurrentMark::ConcurrentMark(ReservedSpace rs, + int max_regions) : + _markBitMap1(rs, MinObjAlignment - 1), + _markBitMap2(rs, MinObjAlignment - 1), + + _parallel_marking_threads(0), + _sleep_factor(0.0), + _marking_task_overhead(1.0), + _cleanup_sleep_factor(0.0), + _cleanup_task_overhead(1.0), + _region_bm(max_regions, false /* in_resource_area*/), + _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >> + CardTableModRefBS::card_shift, + false /* in_resource_area*/), + _prevMarkBitMap(&_markBitMap1), + _nextMarkBitMap(&_markBitMap2), + _at_least_one_mark_complete(false), + + _markStack(this), + _regionStack(), + // _finger set in set_non_marking_state + + _max_task_num(MAX2(ParallelGCThreads, (size_t)1)), + // _active_tasks set in set_non_marking_state + // _tasks set inside the constructor + _task_queues(new CMTaskQueueSet((int) _max_task_num)), + _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)), + + _has_overflown(false), + _concurrent(false), + + // _verbose_level set below + + _init_times(), + _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), + _cleanup_times(), + _total_counting_time(0.0), + _total_rs_scrub_time(0.0), + + _parallel_workers(NULL), + _cleanup_co_tracker(G1CLGroup) +{ + CMVerboseLevel verbose_level = + (CMVerboseLevel) G1MarkingVerboseLevel; + if (verbose_level < no_verbose) + verbose_level = no_verbose; + if (verbose_level > high_verbose) + verbose_level = high_verbose; + _verbose_level = verbose_level; + + if (verbose_low()) + gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", " + "heap end = "PTR_FORMAT, _heap_start, _heap_end); + + _markStack.allocate(G1CMStackSize); + _regionStack.allocate(G1CMRegionStackSize); + + // Create & start a ConcurrentMark thread. + if (G1ConcMark) { + _cmThread = new ConcurrentMarkThread(this); + assert(cmThread() != NULL, "CM Thread should have been created"); + assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); + } else { + _cmThread = NULL; + } + _g1h = G1CollectedHeap::heap(); + assert(CGC_lock != NULL, "Where's the CGC_lock?"); + assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency"); + assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency"); + + SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); + satb_qs.set_buffer_size(G1SATBLogBufferSize); + + int size = (int) MAX2(ParallelGCThreads, (size_t)1); + _par_cleanup_thread_state = NEW_C_HEAP_ARRAY(ParCleanupThreadState*, size); + for (int i = 0 ; i < size; i++) { + _par_cleanup_thread_state[i] = new ParCleanupThreadState; + } + + _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num); + _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num); + + // so that the assertion in MarkingTaskQueue::task_queue doesn't fail + _active_tasks = _max_task_num; + for (int i = 0; i < (int) _max_task_num; ++i) { + CMTaskQueue* task_queue = new CMTaskQueue(); + task_queue->initialize(); + _task_queues->register_queue(i, task_queue); + + _tasks[i] = new CMTask(i, this, task_queue, _task_queues); + _accum_task_vtime[i] = 0.0; + } + + if (ParallelMarkingThreads > ParallelGCThreads) { + vm_exit_during_initialization("Can't have more ParallelMarkingThreads " + "than ParallelGCThreads."); + } + if (ParallelGCThreads == 0) { + // if we are not running with any parallel GC threads we will not + // spawn any marking threads either + _parallel_marking_threads = 0; + _sleep_factor = 0.0; + _marking_task_overhead = 1.0; + } else { + if (ParallelMarkingThreads > 0) { + // notice that ParallelMarkingThreads overwrites G1MarkingOverheadPerc + // if both are set + + _parallel_marking_threads = ParallelMarkingThreads; + _sleep_factor = 0.0; + _marking_task_overhead = 1.0; + } else if (G1MarkingOverheadPerc > 0) { + // we will calculate the number of parallel marking threads + // based on a target overhead with respect to the soft real-time + // goal + + double marking_overhead = (double) G1MarkingOverheadPerc / 100.0; + double overall_cm_overhead = + (double) G1MaxPauseTimeMS * marking_overhead / (double) G1TimeSliceMS; + double cpu_ratio = 1.0 / (double) os::processor_count(); + double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); + double marking_task_overhead = + overall_cm_overhead / marking_thread_num * + (double) os::processor_count(); + double sleep_factor = + (1.0 - marking_task_overhead) / marking_task_overhead; + + _parallel_marking_threads = (size_t) marking_thread_num; + _sleep_factor = sleep_factor; + _marking_task_overhead = marking_task_overhead; + } else { + _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1); + _sleep_factor = 0.0; + _marking_task_overhead = 1.0; + } + + if (parallel_marking_threads() > 1) + _cleanup_task_overhead = 1.0; + else + _cleanup_task_overhead = marking_task_overhead(); + _cleanup_sleep_factor = + (1.0 - cleanup_task_overhead()) / cleanup_task_overhead(); + +#if 0 + gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads()); + gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead()); + gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor()); + gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead()); + gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor()); +#endif + + guarantee( parallel_marking_threads() > 0, "peace of mind" ); + _parallel_workers = new WorkGang("Parallel Marking Threads", + (int) parallel_marking_threads(), false, true); + if (_parallel_workers == NULL) + vm_exit_during_initialization("Failed necessary allocation."); + } + + // so that the call below can read a sensible value + _heap_start = (HeapWord*) rs.base(); + set_non_marking_state(); +} + +void ConcurrentMark::update_g1_committed(bool force) { + // If concurrent marking is not in progress, then we do not need to + // update _heap_end. This has a subtle and important + // side-effect. Imagine that two evacuation pauses happen between + // marking completion and remark. The first one can grow the + // heap (hence now the finger is below the heap end). Then, the + // second one could unnecessarily push regions on the region + // stack. This causes the invariant that the region stack is empty + // at the beginning of remark to be false. By ensuring that we do + // not observe heap expansions after marking is complete, then we do + // not have this problem. + if (!concurrent_marking_in_progress() && !force) + return; + + MemRegion committed = _g1h->g1_committed(); + tmp_guarantee_CM( committed.start() == _heap_start, + "start shouldn't change" ); + HeapWord* new_end = committed.end(); + if (new_end > _heap_end) { + // The heap has been expanded. + + _heap_end = new_end; + } + // Notice that the heap can also shrink. However, this only happens + // during a Full GC (at least currently) and the entire marking + // phase will bail out and the task will not be restarted. So, let's + // do nothing. +} + +void ConcurrentMark::reset() { + // Starting values for these two. This should be called in a STW + // phase. CM will be notified of any future g1_committed expansions + // will be at the end of evacuation pauses, when tasks are + // inactive. + MemRegion committed = _g1h->g1_committed(); + _heap_start = committed.start(); + _heap_end = committed.end(); + + guarantee( _heap_start != NULL && + _heap_end != NULL && + _heap_start < _heap_end, "heap bounds should look ok" ); + + // reset all the marking data structures and any necessary flags + clear_marking_state(); + + if (verbose_low()) + gclog_or_tty->print_cr("[global] resetting"); + + // We do reset all of them, since different phases will use + // different number of active threads. So, it's easiest to have all + // of them ready. + for (int i = 0; i < (int) _max_task_num; ++i) + _tasks[i]->reset(_nextMarkBitMap); + + // we need this to make sure that the flag is on during the evac + // pause with initial mark piggy-backed + set_concurrent_marking_in_progress(); +} + +void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) { + guarantee( active_tasks <= _max_task_num, "we should not have more" ); + + _active_tasks = active_tasks; + // Need to update the three data structures below according to the + // number of active threads for this phase. + _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); + _first_overflow_barrier_sync.set_n_workers((int) active_tasks); + _second_overflow_barrier_sync.set_n_workers((int) active_tasks); + + _concurrent = concurrent; + // We propagate this to all tasks, not just the active ones. + for (int i = 0; i < (int) _max_task_num; ++i) + _tasks[i]->set_concurrent(concurrent); + + if (concurrent) { + set_concurrent_marking_in_progress(); + } else { + // We currently assume that the concurrent flag has been set to + // false before we start remark. At this point we should also be + // in a STW phase. + guarantee( !concurrent_marking_in_progress(), "invariant" ); + guarantee( _finger == _heap_end, "only way to get here" ); + update_g1_committed(true); + } +} + +void ConcurrentMark::set_non_marking_state() { + // We set the global marking state to some default values when we're + // not doing marking. + clear_marking_state(); + _active_tasks = 0; + clear_concurrent_marking_in_progress(); +} + +ConcurrentMark::~ConcurrentMark() { + int size = (int) MAX2(ParallelGCThreads, (size_t)1); + for (int i = 0; i < size; i++) delete _par_cleanup_thread_state[i]; + FREE_C_HEAP_ARRAY(ParCleanupThreadState*, + _par_cleanup_thread_state); + + for (int i = 0; i < (int) _max_task_num; ++i) { + delete _task_queues->queue(i); + delete _tasks[i]; + } + delete _task_queues; + FREE_C_HEAP_ARRAY(CMTask*, _max_task_num); +} + +// This closure is used to mark refs into the g1 generation +// from external roots in the CMS bit map. +// Called at the first checkpoint. +// + +#define PRINT_REACHABLE_AT_INITIAL_MARK 0 +#if PRINT_REACHABLE_AT_INITIAL_MARK +static FILE* reachable_file = NULL; + +class PrintReachableClosure: public OopsInGenClosure { + CMBitMap* _bm; + int _level; +public: + PrintReachableClosure(CMBitMap* bm) : + _bm(bm), _level(0) { + guarantee(reachable_file != NULL, "pre-condition"); + } + void do_oop(oop* p) { + oop obj = *p; + HeapWord* obj_addr = (HeapWord*)obj; + if (obj == NULL) return; + fprintf(reachable_file, "%d: "PTR_FORMAT" -> "PTR_FORMAT" (%d)\n", + _level, p, (void*) obj, _bm->isMarked(obj_addr)); + if (!_bm->isMarked(obj_addr)) { + _bm->mark(obj_addr); + _level++; + obj->oop_iterate(this); + _level--; + } + } +}; +#endif // PRINT_REACHABLE_AT_INITIAL_MARK + +#define SEND_HEAP_DUMP_TO_FILE 0 +#if SEND_HEAP_DUMP_TO_FILE +static FILE* heap_dump_file = NULL; +#endif // SEND_HEAP_DUMP_TO_FILE + +void ConcurrentMark::clearNextBitmap() { + guarantee(!G1CollectedHeap::heap()->mark_in_progress(), "Precondition."); + + // clear the mark bitmap (no grey objects to start with). + // We need to do this in chunks and offer to yield in between + // each chunk. + HeapWord* start = _nextMarkBitMap->startWord(); + HeapWord* end = _nextMarkBitMap->endWord(); + HeapWord* cur = start; + size_t chunkSize = M; + while (cur < end) { + HeapWord* next = cur + chunkSize; + if (next > end) + next = end; + MemRegion mr(cur,next); + _nextMarkBitMap->clearRange(mr); + cur = next; + do_yield_check(); + } +} + +class NoteStartOfMarkHRClosure: public HeapRegionClosure { +public: + bool doHeapRegion(HeapRegion* r) { + if (!r->continuesHumongous()) { + r->note_start_of_marking(true); + } + return false; + } +}; + +void ConcurrentMark::checkpointRootsInitialPre() { + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + G1CollectorPolicy* g1p = g1h->g1_policy(); + + _has_aborted = false; + + // Find all the reachable objects... +#if PRINT_REACHABLE_AT_INITIAL_MARK + guarantee(reachable_file == NULL, "Protocol"); + char fn_buf[100]; + sprintf(fn_buf, "/tmp/reachable.txt.%d", os::current_process_id()); + reachable_file = fopen(fn_buf, "w"); + // clear the mark bitmap (no grey objects to start with) + _nextMarkBitMap->clearAll(); + PrintReachableClosure prcl(_nextMarkBitMap); + g1h->process_strong_roots( + false, // fake perm gen collection + SharedHeap::SO_AllClasses, + &prcl, // Regular roots + &prcl // Perm Gen Roots + ); + // The root iteration above "consumed" dirty cards in the perm gen. + // Therefore, as a shortcut, we dirty all such cards. + g1h->rem_set()->invalidate(g1h->perm_gen()->used_region(), false); + fclose(reachable_file); + reachable_file = NULL; + // clear the mark bitmap again. + _nextMarkBitMap->clearAll(); + COMPILER2_PRESENT(DerivedPointerTable::update_pointers()); + COMPILER2_PRESENT(DerivedPointerTable::clear()); +#endif // PRINT_REACHABLE_AT_INITIAL_MARK + + // Initialise marking structures. This has to be done in a STW phase. + reset(); +} + +class CMMarkRootsClosure: public OopsInGenClosure { +private: + ConcurrentMark* _cm; + G1CollectedHeap* _g1h; + bool _do_barrier; + +public: + CMMarkRootsClosure(ConcurrentMark* cm, + G1CollectedHeap* g1h, + bool do_barrier) : _cm(cm), _g1h(g1h), + _do_barrier(do_barrier) { } + + virtual void do_oop(narrowOop* p) { + guarantee(false, "NYI"); + } + + virtual void do_oop(oop* p) { + oop thisOop = *p; + if (thisOop != NULL) { + assert(thisOop->is_oop() || thisOop->mark() == NULL, + "expected an oop, possibly with mark word displaced"); + HeapWord* addr = (HeapWord*)thisOop; + if (_g1h->is_in_g1_reserved(addr)) { + _cm->grayRoot(thisOop); + } + } + if (_do_barrier) { + assert(!_g1h->is_in_g1_reserved(p), + "Should be called on external roots"); + do_barrier(p); + } + } +}; + +void ConcurrentMark::checkpointRootsInitialPost() { + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + + // For each region note start of marking. + NoteStartOfMarkHRClosure startcl; + g1h->heap_region_iterate(&startcl); + + // Start weak-reference discovery. + ReferenceProcessor* rp = g1h->ref_processor(); + rp->verify_no_references_recorded(); + rp->enable_discovery(); // enable ("weak") refs discovery + + SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); + satb_mq_set.set_process_completed_threshold(G1SATBProcessCompletedThreshold); + satb_mq_set.set_active_all_threads(true); + + // update_g1_committed() will be called at the end of an evac pause + // when marking is on. So, it's also called at the end of the + // initial-mark pause to update the heap end, if the heap expands + // during it. No need to call it here. + + guarantee( !_cleanup_co_tracker.enabled(), "invariant" ); + + size_t max_marking_threads = + MAX2((size_t) 1, parallel_marking_threads()); + for (int i = 0; i < (int)_max_task_num; ++i) { + _tasks[i]->enable_co_tracker(); + if (i < (int) max_marking_threads) + _tasks[i]->reset_co_tracker(marking_task_overhead()); + else + _tasks[i]->reset_co_tracker(0.0); + } +} + +// Checkpoint the roots into this generation from outside +// this generation. [Note this initial checkpoint need only +// be approximate -- we'll do a catch up phase subsequently.] +void ConcurrentMark::checkpointRootsInitial() { + assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped"); + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + + double start = os::elapsedTime(); + GCOverheadReporter::recordSTWStart(start); + + // If there has not been a GC[n-1] since last GC[n] cycle completed, + // precede our marking with a collection of all + // younger generations to keep floating garbage to a minimum. + // YSR: we won't do this for now -- it's an optimization to be + // done post-beta. + + // YSR: ignoring weak refs for now; will do at bug fixing stage + // EVM: assert(discoveredRefsAreClear()); + + + G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); + g1p->record_concurrent_mark_init_start(); + checkpointRootsInitialPre(); + + // YSR: when concurrent precleaning is in place, we'll + // need to clear the cached card table here + + ResourceMark rm; + HandleMark hm; + + g1h->ensure_parsability(false); + g1h->perm_gen()->save_marks(); + + CMMarkRootsClosure notOlder(this, g1h, false); + CMMarkRootsClosure older(this, g1h, true); + + g1h->set_marking_started(); + g1h->rem_set()->prepare_for_younger_refs_iterate(false); + + g1h->process_strong_roots(false, // fake perm gen collection + SharedHeap::SO_AllClasses, + ¬Older, // Regular roots + &older // Perm Gen Roots + ); + checkpointRootsInitialPost(); + + // Statistics. + double end = os::elapsedTime(); + _init_times.add((end - start) * 1000.0); + GCOverheadReporter::recordSTWEnd(end); + + g1p->record_concurrent_mark_init_end(); +} + +/* + Notice that in the next two methods, we actually leave the STS + during the barrier sync and join it immediately afterwards. If we + do not do this, this then the following deadlock can occur: one + thread could be in the barrier sync code, waiting for the other + thread to also sync up, whereas another one could be trying to + yield, while also waiting for the other threads to sync up too. + + Because the thread that does the sync barrier has left the STS, it + is possible to be suspended for a Full GC or an evacuation pause + could occur. This is actually safe, since the entering the sync + barrier is one of the last things do_marking_step() does, and it + doesn't manipulate any data structures afterwards. +*/ + +void ConcurrentMark::enter_first_sync_barrier(int task_num) { + if (verbose_low()) + gclog_or_tty->print_cr("[%d] entering first barrier", task_num); + + ConcurrentGCThread::stsLeave(); + _first_overflow_barrier_sync.enter(); + ConcurrentGCThread::stsJoin(); + // at this point everyone should have synced up and not be doing any + // more work + + if (verbose_low()) + gclog_or_tty->print_cr("[%d] leaving first barrier", task_num); + + // let task 0 do this + if (task_num == 0) { + // task 0 is responsible for clearing the global data structures + clear_marking_state(); + + if (PrintGC) { + gclog_or_tty->date_stamp(PrintGCDateStamps); + gclog_or_tty->stamp(PrintGCTimeStamps); + gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]"); + } + } + + // after this, each task should reset its own data structures then + // then go into the second barrier +} + +void ConcurrentMark::enter_second_sync_barrier(int task_num) { + if (verbose_low()) + gclog_or_tty->print_cr("[%d] entering second barrier", task_num); + + ConcurrentGCThread::stsLeave(); + _second_overflow_barrier_sync.enter(); + ConcurrentGCThread::stsJoin(); + // at this point everything should be re-initialised and ready to go + + if (verbose_low()) + gclog_or_tty->print_cr("[%d] leaving second barrier", task_num); +} + +void ConcurrentMark::grayRoot(oop p) { + HeapWord* addr = (HeapWord*) p; + // We can't really check against _heap_start and _heap_end, since it + // is possible during an evacuation pause with piggy-backed + // initial-mark that the committed space is expanded during the + // pause without CM observing this change. So the assertions below + // is a bit conservative; but better than nothing. + tmp_guarantee_CM( _g1h->g1_committed().contains(addr), + "address should be within the heap bounds" ); + + if (!_nextMarkBitMap->isMarked(addr)) + _nextMarkBitMap->parMark(addr); +} + +void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) { + // The objects on the region have already been marked "in bulk" by + // the caller. We only need to decide whether to push the region on + // the region stack or not. + + if (!concurrent_marking_in_progress() || !_should_gray_objects) + // We're done with marking and waiting for remark. We do not need to + // push anything else on the region stack. + return; + + HeapWord* finger = _finger; + + if (verbose_low()) + gclog_or_tty->print_cr("[global] attempting to push " + "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at " + PTR_FORMAT, mr.start(), mr.end(), finger); + + if (mr.start() < finger) { + // The finger is always heap region aligned and it is not possible + // for mr to span heap regions. + tmp_guarantee_CM( mr.end() <= finger, "invariant" ); + + tmp_guarantee_CM( mr.start() <= mr.end() && + _heap_start <= mr.start() && + mr.end() <= _heap_end, + "region boundaries should fall within the committed space" ); + if (verbose_low()) + gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") " + "below the finger, pushing it", + mr.start(), mr.end()); + + if (!region_stack_push(mr)) { + if (verbose_low()) + gclog_or_tty->print_cr("[global] region stack has overflown."); + } + } +} + +void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) { + // The object is not marked by the caller. We need to at least mark + // it and maybe push in on the stack. + + HeapWord* addr = (HeapWord*)p; + if (!_nextMarkBitMap->isMarked(addr)) { + // We definitely need to mark it, irrespective whether we bail out + // because we're done with marking. + if (_nextMarkBitMap->parMark(addr)) { + if (!concurrent_marking_in_progress() || !_should_gray_objects) + // If we're done with concurrent marking and we're waiting for + // remark, then we're not pushing anything on the stack. + return; + + // No OrderAccess:store_load() is needed. It is implicit in the + // CAS done in parMark(addr) above + HeapWord* finger = _finger; + + if (addr < finger) { + if (!mark_stack_push(oop(addr))) { + if (verbose_low()) + gclog_or_tty->print_cr("[global] global stack overflow " + "during parMark"); + } + } + } + } +} + +class CMConcurrentMarkingTask: public AbstractGangTask { +private: + ConcurrentMark* _cm; + ConcurrentMarkThread* _cmt; + +public: + void work(int worker_i) { + guarantee( Thread::current()->is_ConcurrentGC_thread(), + "this should only be done by a conc GC thread" ); + + double start_vtime = os::elapsedVTime(); + + ConcurrentGCThread::stsJoin(); + + guarantee( (size_t)worker_i < _cm->active_tasks(), "invariant" ); + CMTask* the_task = _cm->task(worker_i); + the_task->start_co_tracker(); + the_task->record_start_time(); + if (!_cm->has_aborted()) { + do { + double start_vtime_sec = os::elapsedVTime(); + double start_time_sec = os::elapsedTime(); + the_task->do_marking_step(10.0); + double end_time_sec = os::elapsedTime(); + double end_vtime_sec = os::elapsedVTime(); + double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; + double elapsed_time_sec = end_time_sec - start_time_sec; + _cm->clear_has_overflown(); + + bool ret = _cm->do_yield_check(worker_i); + + jlong sleep_time_ms; + if (!_cm->has_aborted() && the_task->has_aborted()) { + sleep_time_ms = + (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); + ConcurrentGCThread::stsLeave(); + os::sleep(Thread::current(), sleep_time_ms, false); + ConcurrentGCThread::stsJoin(); + } + double end_time2_sec = os::elapsedTime(); + double elapsed_time2_sec = end_time2_sec - start_time_sec; + + the_task->update_co_tracker(); + +#if 0 + gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, " + "overhead %1.4lf", + elapsed_vtime_sec * 1000.0, (double) sleep_time_ms, + the_task->conc_overhead(os::elapsedTime()) * 8.0); + gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms", + elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0); +#endif + } while (!_cm->has_aborted() && the_task->has_aborted()); + } + the_task->record_end_time(); + guarantee( !the_task->has_aborted() || _cm->has_aborted(), "invariant" ); + + ConcurrentGCThread::stsLeave(); + + double end_vtime = os::elapsedVTime(); + the_task->update_co_tracker(true); + _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime); + } + + CMConcurrentMarkingTask(ConcurrentMark* cm, + ConcurrentMarkThread* cmt) : + AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } + + ~CMConcurrentMarkingTask() { } +}; + +void ConcurrentMark::markFromRoots() { + // we might be tempted to assert that: + // assert(asynch == !SafepointSynchronize::is_at_safepoint(), + // "inconsistent argument?"); + // However that wouldn't be right, because it's possible that + // a safepoint is indeed in progress as a younger generation + // stop-the-world GC happens even as we mark in this generation. + + _restart_for_overflow = false; + + set_phase(MAX2((size_t) 1, parallel_marking_threads()), true); + + CMConcurrentMarkingTask markingTask(this, cmThread()); + if (parallel_marking_threads() > 0) + _parallel_workers->run_task(&markingTask); + else + markingTask.work(0); + print_stats(); +} + +void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { + // world is stopped at this checkpoint + assert(SafepointSynchronize::is_at_safepoint(), + "world should be stopped"); + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + + // If a full collection has happened, we shouldn't do this. + if (has_aborted()) { + g1h->set_marking_complete(); // So bitmap clearing isn't confused + return; + } + + G1CollectorPolicy* g1p = g1h->g1_policy(); + g1p->record_concurrent_mark_remark_start(); + + double start = os::elapsedTime(); + GCOverheadReporter::recordSTWStart(start); + + checkpointRootsFinalWork(); + + double mark_work_end = os::elapsedTime(); + + weakRefsWork(clear_all_soft_refs); + + if (has_overflown()) { + // Oops. We overflowed. Restart concurrent marking. + _restart_for_overflow = true; + // Clear the flag. We do not need it any more. + clear_has_overflown(); + if (G1TraceMarkStackOverflow) + gclog_or_tty->print_cr("\nRemark led to restart for overflow."); + } else { + // We're done with marking. + JavaThread::satb_mark_queue_set().set_active_all_threads(false); + } + +#if VERIFY_OBJS_PROCESSED + _scan_obj_cl.objs_processed = 0; + ThreadLocalObjQueue::objs_enqueued = 0; +#endif + + // Statistics + double now = os::elapsedTime(); + _remark_mark_times.add((mark_work_end - start) * 1000.0); + _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); + _remark_times.add((now - start) * 1000.0); + + GCOverheadReporter::recordSTWEnd(now); + for (int i = 0; i < (int)_max_task_num; ++i) + _tasks[i]->disable_co_tracker(); + _cleanup_co_tracker.enable(); + _cleanup_co_tracker.reset(cleanup_task_overhead()); + g1p->record_concurrent_mark_remark_end(); +} + + +#define CARD_BM_TEST_MODE 0 + +class CalcLiveObjectsClosure: public HeapRegionClosure { + + CMBitMapRO* _bm; + ConcurrentMark* _cm; + COTracker* _co_tracker; + bool _changed; + bool _yield; + size_t _words_done; + size_t _tot_live; + size_t _tot_used; + size_t _regions_done; + double _start_vtime_sec; + + BitMap* _region_bm; + BitMap* _card_bm; + intptr_t _bottom_card_num; + bool _final; + + void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) { + for (intptr_t i = start_card_num; i <= last_card_num; i++) { +#if CARD_BM_TEST_MODE + guarantee(_card_bm->at(i - _bottom_card_num), + "Should already be set."); +#else + _card_bm->par_at_put(i - _bottom_card_num, 1); +#endif + } + } + +public: + CalcLiveObjectsClosure(bool final, + CMBitMapRO *bm, ConcurrentMark *cm, + BitMap* region_bm, BitMap* card_bm, + COTracker* co_tracker) : + _bm(bm), _cm(cm), _changed(false), _yield(true), + _words_done(0), _tot_live(0), _tot_used(0), + _region_bm(region_bm), _card_bm(card_bm), + _final(final), _co_tracker(co_tracker), + _regions_done(0), _start_vtime_sec(0.0) + { + _bottom_card_num = + intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >> + CardTableModRefBS::card_shift); + } + + bool doHeapRegion(HeapRegion* hr) { + if (_co_tracker != NULL) + _co_tracker->update(); + + if (!_final && _regions_done == 0) + _start_vtime_sec = os::elapsedVTime(); + + if (hr->continuesHumongous()) return false; + + HeapWord* nextTop = hr->next_top_at_mark_start(); + HeapWord* start = hr->top_at_conc_mark_count(); + assert(hr->bottom() <= start && start <= hr->end() && + hr->bottom() <= nextTop && nextTop <= hr->end() && + start <= nextTop, + "Preconditions."); + // Otherwise, record the number of word's we'll examine. + size_t words_done = (nextTop - start); + // Find the first marked object at or after "start". + start = _bm->getNextMarkedWordAddress(start, nextTop); + size_t marked_bytes = 0; + + // Below, the term "card num" means the result of shifting an address + // by the card shift -- address 0 corresponds to card number 0. One + // must subtract the card num of the bottom of the heap to obtain a + // card table index. + // The first card num of the sequence of live cards currently being + // constructed. -1 ==> no sequence. + intptr_t start_card_num = -1; + // The last card num of the sequence of live cards currently being + // constructed. -1 ==> no sequence. + intptr_t last_card_num = -1; + + while (start < nextTop) { + if (_yield && _cm->do_yield_check()) { + // We yielded. It might be for a full collection, in which case + // all bets are off; terminate the traversal. + if (_cm->has_aborted()) { + _changed = false; + return true; + } else { + // Otherwise, it might be a collection pause, and the region + // we're looking at might be in the collection set. We'll + // abandon this region. + return false; + } + } + oop obj = oop(start); + int obj_sz = obj->size(); + // The card num of the start of the current object. + intptr_t obj_card_num = + intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift); + + HeapWord* obj_last = start + obj_sz - 1; + intptr_t obj_last_card_num = + intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift); + + if (obj_card_num != last_card_num) { + if (start_card_num == -1) { + assert(last_card_num == -1, "Both or neither."); + start_card_num = obj_card_num; + } else { + assert(last_card_num != -1, "Both or neither."); + assert(obj_card_num >= last_card_num, "Inv"); + if ((obj_card_num - last_card_num) > 1) { + // Mark the last run, and start a new one. + mark_card_num_range(start_card_num, last_card_num); + start_card_num = obj_card_num; + } + } +#if CARD_BM_TEST_MODE + /* + gclog_or_tty->print_cr("Setting bits from %d/%d.", + obj_card_num - _bottom_card_num, + obj_last_card_num - _bottom_card_num); + */ + for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) { + _card_bm->par_at_put(j - _bottom_card_num, 1); + } +#endif + } + // In any case, we set the last card num. + last_card_num = obj_last_card_num; + + marked_bytes += obj_sz * HeapWordSize; + // Find the next marked object after this one. + start = _bm->getNextMarkedWordAddress(start + 1, nextTop); + _changed = true; + } + // Handle the last range, if any. + if (start_card_num != -1) + mark_card_num_range(start_card_num, last_card_num); + if (_final) { + // Mark the allocated-since-marking portion... + HeapWord* tp = hr->top(); + if (nextTop < tp) { + start_card_num = + intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift); + last_card_num = + intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift); + mark_card_num_range(start_card_num, last_card_num); + // This definitely means the region has live objects. + _region_bm->par_at_put(hr->hrs_index(), 1); + } + } + + hr->add_to_marked_bytes(marked_bytes); + // Update the live region bitmap. + if (marked_bytes > 0) { + _region_bm->par_at_put(hr->hrs_index(), 1); + } + hr->set_top_at_conc_mark_count(nextTop); + _tot_live += hr->next_live_bytes(); + _tot_used += hr->used(); + _words_done = words_done; + + if (!_final) { + ++_regions_done; + if (_regions_done % 10 == 0) { + double end_vtime_sec = os::elapsedVTime(); + double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec; + if (elapsed_vtime_sec > (10.0 / 1000.0)) { + jlong sleep_time_ms = + (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0); +#if 0 + gclog_or_tty->print_cr("CL: elapsed %1.4lf ms, sleep %1.4lf ms, " + "overhead %1.4lf", + elapsed_vtime_sec * 1000.0, (double) sleep_time_ms, + _co_tracker->concOverhead(os::elapsedTime())); +#endif + os::sleep(Thread::current(), sleep_time_ms, false); + _start_vtime_sec = end_vtime_sec; + } + } + } + + return false; + } + + bool changed() { return _changed; } + void reset() { _changed = false; _words_done = 0; } + void no_yield() { _yield = false; } + size_t words_done() { return _words_done; } + size_t tot_live() { return _tot_live; } + size_t tot_used() { return _tot_used; } +}; + + +void ConcurrentMark::calcDesiredRegions() { + guarantee( _cleanup_co_tracker.enabled(), "invariant" ); + _cleanup_co_tracker.start(); + + _region_bm.clear(); + _card_bm.clear(); + CalcLiveObjectsClosure calccl(false /*final*/, + nextMarkBitMap(), this, + &_region_bm, &_card_bm, + &_cleanup_co_tracker); + G1CollectedHeap *g1h = G1CollectedHeap::heap(); + g1h->heap_region_iterate(&calccl); + + do { + calccl.reset(); + g1h->heap_region_iterate(&calccl); + } while (calccl.changed()); + + _cleanup_co_tracker.update(true); +} + +class G1ParFinalCountTask: public AbstractGangTask { +protected: + G1CollectedHeap* _g1h; + CMBitMap* _bm; + size_t _n_workers; + size_t *_live_bytes; + size_t *_used_bytes; + BitMap* _region_bm; + BitMap* _card_bm; +public: + G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm, + BitMap* region_bm, BitMap* card_bm) : + AbstractGangTask("G1 final counting"), _g1h(g1h), + _bm(bm), _region_bm(region_bm), _card_bm(card_bm) + { + if (ParallelGCThreads > 0) + _n_workers = _g1h->workers()->total_workers(); + else + _n_workers = 1; + _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers); + _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers); + } + + ~G1ParFinalCountTask() { + FREE_C_HEAP_ARRAY(size_t, _live_bytes); + FREE_C_HEAP_ARRAY(size_t, _used_bytes); + } + + void work(int i) { + CalcLiveObjectsClosure calccl(true /*final*/, + _bm, _g1h->concurrent_mark(), + _region_bm, _card_bm, + NULL /* CO tracker */); + calccl.no_yield(); + if (ParallelGCThreads > 0) { + _g1h->heap_region_par_iterate_chunked(&calccl, i, + HeapRegion::FinalCountClaimValue); + } else { + _g1h->heap_region_iterate(&calccl); + } + assert(calccl.complete(), "Shouldn't have yielded!"); + + guarantee( (size_t)i < _n_workers, "invariant" ); + _live_bytes[i] = calccl.tot_live(); + _used_bytes[i] = calccl.tot_used(); + } + size_t live_bytes() { + size_t live_bytes = 0; + for (size_t i = 0; i < _n_workers; ++i) + live_bytes += _live_bytes[i]; + return live_bytes; + } + size_t used_bytes() { + size_t used_bytes = 0; + for (size_t i = 0; i < _n_workers; ++i) + used_bytes += _used_bytes[i]; + return used_bytes; + } +}; + +class G1ParNoteEndTask; + +class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { + G1CollectedHeap* _g1; + int _worker_num; + size_t _max_live_bytes; + size_t _regions_claimed; + size_t _freed_bytes; + size_t _cleared_h_regions; + size_t _freed_regions; + UncleanRegionList* _unclean_region_list; + double _claimed_region_time; + double _max_region_time; + +public: + G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, + UncleanRegionList* list, + int worker_num); + size_t freed_bytes() { return _freed_bytes; } + size_t cleared_h_regions() { return _cleared_h_regions; } + size_t freed_regions() { return _freed_regions; } + UncleanRegionList* unclean_region_list() { + return _unclean_region_list; + } + + bool doHeapRegion(HeapRegion *r); + + size_t max_live_bytes() { return _max_live_bytes; } + size_t regions_claimed() { return _regions_claimed; } + double claimed_region_time_sec() { return _claimed_region_time; } + double max_region_time_sec() { return _max_region_time; } +}; + +class G1ParNoteEndTask: public AbstractGangTask { + friend class G1NoteEndOfConcMarkClosure; +protected: + G1CollectedHeap* _g1h; + size_t _max_live_bytes; + size_t _freed_bytes; + ConcurrentMark::ParCleanupThreadState** _par_cleanup_thread_state; +public: + G1ParNoteEndTask(G1CollectedHeap* g1h, + ConcurrentMark::ParCleanupThreadState** + par_cleanup_thread_state) : + AbstractGangTask("G1 note end"), _g1h(g1h), + _max_live_bytes(0), _freed_bytes(0), + _par_cleanup_thread_state(par_cleanup_thread_state) + {} + + void work(int i) { + double start = os::elapsedTime(); + G1NoteEndOfConcMarkClosure g1_note_end(_g1h, + &_par_cleanup_thread_state[i]->list, + i); + if (ParallelGCThreads > 0) { + _g1h->heap_region_par_iterate_chunked(&g1_note_end, i, + HeapRegion::NoteEndClaimValue); + } else { + _g1h->heap_region_iterate(&g1_note_end); + } + assert(g1_note_end.complete(), "Shouldn't have yielded!"); + + // Now finish up freeing the current thread's regions. + _g1h->finish_free_region_work(g1_note_end.freed_bytes(), + g1_note_end.cleared_h_regions(), + 0, NULL); + { + MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); + _max_live_bytes += g1_note_end.max_live_bytes(); + _freed_bytes += g1_note_end.freed_bytes(); + } + double end = os::elapsedTime(); + if (G1PrintParCleanupStats) { + gclog_or_tty->print(" Worker thread %d [%8.3f..%8.3f = %8.3f ms] " + "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n", + i, start, end, (end-start)*1000.0, + g1_note_end.regions_claimed(), + g1_note_end.claimed_region_time_sec()*1000.0, + g1_note_end.max_region_time_sec()*1000.0); + } + } + size_t max_live_bytes() { return _max_live_bytes; } + size_t freed_bytes() { return _freed_bytes; } +}; + +class G1ParScrubRemSetTask: public AbstractGangTask { +protected: + G1RemSet* _g1rs; + BitMap* _region_bm; + BitMap* _card_bm; +public: + G1ParScrubRemSetTask(G1CollectedHeap* g1h, + BitMap* region_bm, BitMap* card_bm) : + AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), + _region_bm(region_bm), _card_bm(card_bm) + {} + + void work(int i) { + if (ParallelGCThreads > 0) { + _g1rs->scrub_par(_region_bm, _card_bm, i, + HeapRegion::ScrubRemSetClaimValue); + } else { + _g1rs->scrub(_region_bm, _card_bm); + } + } + +}; + +G1NoteEndOfConcMarkClosure:: +G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, + UncleanRegionList* list, + int worker_num) + : _g1(g1), _worker_num(worker_num), + _max_live_bytes(0), _regions_claimed(0), + _freed_bytes(0), _cleared_h_regions(0), _freed_regions(0), + _claimed_region_time(0.0), _max_region_time(0.0), + _unclean_region_list(list) +{} + +bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *r) { + // We use a claim value of zero here because all regions + // were claimed with value 1 in the FinalCount task. + r->reset_gc_time_stamp(); + if (!r->continuesHumongous()) { + double start = os::elapsedTime(); + _regions_claimed++; + r->note_end_of_marking(); + _max_live_bytes += r->max_live_bytes(); + _g1->free_region_if_totally_empty_work(r, + _freed_bytes, + _cleared_h_regions, + _freed_regions, + _unclean_region_list, + true /*par*/); + double region_time = (os::elapsedTime() - start); + _claimed_region_time += region_time; + if (region_time > _max_region_time) _max_region_time = region_time; + } + return false; +} + +void ConcurrentMark::cleanup() { + // world is stopped at this checkpoint + assert(SafepointSynchronize::is_at_safepoint(), + "world should be stopped"); + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + + // If a full collection has happened, we shouldn't do this. + if (has_aborted()) { + g1h->set_marking_complete(); // So bitmap clearing isn't confused + return; + } + + _cleanup_co_tracker.disable(); + + G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); + g1p->record_concurrent_mark_cleanup_start(); + + double start = os::elapsedTime(); + GCOverheadReporter::recordSTWStart(start); + + // Do counting once more with the world stopped for good measure. + G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(), + &_region_bm, &_card_bm); + if (ParallelGCThreads > 0) { + assert(g1h->check_heap_region_claim_values( + HeapRegion::InitialClaimValue), + "sanity check"); + + int n_workers = g1h->workers()->total_workers(); + g1h->set_par_threads(n_workers); + g1h->workers()->run_task(&g1_par_count_task); + g1h->set_par_threads(0); + + assert(g1h->check_heap_region_claim_values( + HeapRegion::FinalCountClaimValue), + "sanity check"); + } else { + g1_par_count_task.work(0); + } + + size_t known_garbage_bytes = + g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes(); +#if 0 + gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf", + (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024), + (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024), + (double) known_garbage_bytes / (double) (1024 * 1024)); +#endif // 0 + g1p->set_known_garbage_bytes(known_garbage_bytes); + + size_t start_used_bytes = g1h->used(); + _at_least_one_mark_complete = true; + g1h->set_marking_complete(); + + double count_end = os::elapsedTime(); + double this_final_counting_time = (count_end - start); + if (G1PrintParCleanupStats) { + gclog_or_tty->print_cr("Cleanup:"); + gclog_or_tty->print_cr(" Finalize counting: %8.3f ms", + this_final_counting_time*1000.0); + } + _total_counting_time += this_final_counting_time; + + // Install newly created mark bitMap as "prev". + swapMarkBitMaps(); + + g1h->reset_gc_time_stamp(); + + // Note end of marking in all heap regions. + double note_end_start = os::elapsedTime(); + G1ParNoteEndTask g1_par_note_end_task(g1h, _par_cleanup_thread_state); + if (ParallelGCThreads > 0) { + int n_workers = g1h->workers()->total_workers(); + g1h->set_par_threads(n_workers); + g1h->workers()->run_task(&g1_par_note_end_task); + g1h->set_par_threads(0); + + assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue), + "sanity check"); + } else { + g1_par_note_end_task.work(0); + } + g1h->set_unclean_regions_coming(true); + double note_end_end = os::elapsedTime(); + // Tell the mutators that there might be unclean regions coming... + if (G1PrintParCleanupStats) { + gclog_or_tty->print_cr(" note end of marking: %8.3f ms.", + (note_end_end - note_end_start)*1000.0); + } + + + // call below, since it affects the metric by which we sort the heap + // regions. + if (G1ScrubRemSets) { + double rs_scrub_start = os::elapsedTime(); + G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm); + if (ParallelGCThreads > 0) { + int n_workers = g1h->workers()->total_workers(); + g1h->set_par_threads(n_workers); + g1h->workers()->run_task(&g1_par_scrub_rs_task); + g1h->set_par_threads(0); + + assert(g1h->check_heap_region_claim_values( + HeapRegion::ScrubRemSetClaimValue), + "sanity check"); + } else { + g1_par_scrub_rs_task.work(0); + } + + double rs_scrub_end = os::elapsedTime(); + double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); + _total_rs_scrub_time += this_rs_scrub_time; + } + + // this will also free any regions totally full of garbage objects, + // and sort the regions. + g1h->g1_policy()->record_concurrent_mark_cleanup_end( + g1_par_note_end_task.freed_bytes(), + g1_par_note_end_task.max_live_bytes()); + + // Statistics. + double end = os::elapsedTime(); + _cleanup_times.add((end - start) * 1000.0); + GCOverheadReporter::recordSTWEnd(end); + + // G1CollectedHeap::heap()->print(); + // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d", + // G1CollectedHeap::heap()->get_gc_time_stamp()); + + if (PrintGC || PrintGCDetails) { + g1h->print_size_transition(gclog_or_tty, + start_used_bytes, + g1h->used(), + g1h->capacity()); + } + + size_t cleaned_up_bytes = start_used_bytes - g1h->used(); + g1p->decrease_known_garbage_bytes(cleaned_up_bytes); + + // We need to make this be a "collection" so any collection pause that + // races with it goes around and waits for completeCleanup to finish. + g1h->increment_total_collections(); + +#ifndef PRODUCT + if (G1VerifyConcMark) { + G1CollectedHeap::heap()->prepare_for_verify(); + G1CollectedHeap::heap()->verify(true,false); + } +#endif +} + +void ConcurrentMark::completeCleanup() { + // A full collection intervened. + if (has_aborted()) return; + + int first = 0; + int last = (int)MAX2(ParallelGCThreads, (size_t)1); + for (int t = 0; t < last; t++) { + UncleanRegionList* list = &_par_cleanup_thread_state[t]->list; + assert(list->well_formed(), "Inv"); + HeapRegion* hd = list->hd(); + while (hd != NULL) { + // Now finish up the other stuff. + hd->rem_set()->clear(); + HeapRegion* next_hd = hd->next_from_unclean_list(); + (void)list->pop(); + guarantee(list->hd() == next_hd, "how not?"); + _g1h->put_region_on_unclean_list(hd); + if (!hd->isHumongous()) { + // Add this to the _free_regions count by 1. + _g1h->finish_free_region_work(0, 0, 1, NULL); + } + hd = list->hd(); + guarantee(hd == next_hd, "how not?"); + } + } +} + + +class G1CMIsAliveClosure: public BoolObjectClosure { + G1CollectedHeap* _g1; + public: + G1CMIsAliveClosure(G1CollectedHeap* g1) : + _g1(g1) + {} + + void do_object(oop obj) { + assert(false, "not to be invoked"); + } + bool do_object_b(oop obj) { + HeapWord* addr = (HeapWord*)obj; + return addr != NULL && + (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); + } +}; + +class G1CMKeepAliveClosure: public OopClosure { + G1CollectedHeap* _g1; + ConcurrentMark* _cm; + CMBitMap* _bitMap; + public: + G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm, + CMBitMap* bitMap) : + _g1(g1), _cm(cm), + _bitMap(bitMap) {} + + void do_oop(narrowOop* p) { + guarantee(false, "NYI"); + } + + void do_oop(oop* p) { + oop thisOop = *p; + HeapWord* addr = (HeapWord*)thisOop; + if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(thisOop)) { + _bitMap->mark(addr); + _cm->mark_stack_push(thisOop); + } + } +}; + +class G1CMDrainMarkingStackClosure: public VoidClosure { + CMMarkStack* _markStack; + CMBitMap* _bitMap; + G1CMKeepAliveClosure* _oopClosure; + public: + G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack, + G1CMKeepAliveClosure* oopClosure) : + _bitMap(bitMap), + _markStack(markStack), + _oopClosure(oopClosure) + {} + + void do_void() { + _markStack->drain((OopClosure*)_oopClosure, _bitMap, false); + } +}; + +void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { + ResourceMark rm; + HandleMark hm; + ReferencePolicy* soft_ref_policy; + + // Process weak references. + if (clear_all_soft_refs) { + soft_ref_policy = new AlwaysClearPolicy(); + } else { +#ifdef COMPILER2 + soft_ref_policy = new LRUMaxHeapPolicy(); +#else + soft_ref_policy = new LRUCurrentHeapPolicy(); +#endif + } + assert(_markStack.isEmpty(), "mark stack should be empty"); + + G1CollectedHeap* g1 = G1CollectedHeap::heap(); + G1CMIsAliveClosure g1IsAliveClosure(g1); + + G1CMKeepAliveClosure g1KeepAliveClosure(g1, this, nextMarkBitMap()); + G1CMDrainMarkingStackClosure + g1DrainMarkingStackClosure(nextMarkBitMap(), &_markStack, + &g1KeepAliveClosure); + + // XXXYYY Also: copy the parallel ref processing code from CMS. + ReferenceProcessor* rp = g1->ref_processor(); + rp->process_discovered_references(soft_ref_policy, + &g1IsAliveClosure, + &g1KeepAliveClosure, + &g1DrainMarkingStackClosure, + NULL); + assert(_markStack.overflow() || _markStack.isEmpty(), + "mark stack should be empty (unless it overflowed)"); + if (_markStack.overflow()) { + set_has_overflown(); + } + + rp->enqueue_discovered_references(); + rp->verify_no_references_recorded(); + assert(!rp->discovery_enabled(), "should have been disabled"); + + // Now clean up stale oops in SymbolTable and StringTable + SymbolTable::unlink(&g1IsAliveClosure); + StringTable::unlink(&g1IsAliveClosure); +} + +void ConcurrentMark::swapMarkBitMaps() { + CMBitMapRO* temp = _prevMarkBitMap; + _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; + _nextMarkBitMap = (CMBitMap*) temp; +} + +class CMRemarkTask: public AbstractGangTask { +private: + ConcurrentMark *_cm; + +public: + void work(int worker_i) { + // Since all available tasks are actually started, we should + // only proceed if we're supposed to be actived. + if ((size_t)worker_i < _cm->active_tasks()) { + CMTask* task = _cm->task(worker_i); + task->record_start_time(); + do { + task->do_marking_step(1000000000.0 /* something very large */); + } while (task->has_aborted() && !_cm->has_overflown()); + // If we overflow, then we do not want to restart. We instead + // want to abort remark and do concurrent marking again. + task->record_end_time(); + } + } + + CMRemarkTask(ConcurrentMark* cm) : + AbstractGangTask("Par Remark"), _cm(cm) { } +}; + +void ConcurrentMark::checkpointRootsFinalWork() { + ResourceMark rm; + HandleMark hm; + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + + g1h->ensure_parsability(false); + + if (ParallelGCThreads > 0) { + g1h->change_strong_roots_parity(); + // this is remark, so we'll use up all available threads + int active_workers = ParallelGCThreads; + set_phase(active_workers, false); + + CMRemarkTask remarkTask(this); + // We will start all available threads, even if we decide that the + // active_workers will be fewer. The extra ones will just bail out + // immediately. + int n_workers = g1h->workers()->total_workers(); + g1h->set_par_threads(n_workers); + g1h->workers()->run_task(&remarkTask); + g1h->set_par_threads(0); + + SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); + guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" ); + } else { + g1h->change_strong_roots_parity(); + // this is remark, so we'll use up all available threads + int active_workers = 1; + set_phase(active_workers, false); + + CMRemarkTask remarkTask(this); + // We will start all available threads, even if we decide that the + // active_workers will be fewer. The extra ones will just bail out + // immediately. + remarkTask.work(0); + + SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); + guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" ); + } + + print_stats(); + + if (!restart_for_overflow()) + set_non_marking_state(); + +#if VERIFY_OBJS_PROCESSED + if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) { + gclog_or_tty->print_cr("Processed = %d, enqueued = %d.", + _scan_obj_cl.objs_processed, + ThreadLocalObjQueue::objs_enqueued); + guarantee(_scan_obj_cl.objs_processed == + ThreadLocalObjQueue::objs_enqueued, + "Different number of objs processed and enqueued."); + } +#endif +} + +class ReachablePrinterOopClosure: public OopClosure { +private: + G1CollectedHeap* _g1h; + CMBitMapRO* _bitmap; + outputStream* _out; + +public: + ReachablePrinterOopClosure(CMBitMapRO* bitmap, outputStream* out) : + _bitmap(bitmap), _g1h(G1CollectedHeap::heap()), _out(out) { } + + void do_oop(narrowOop* p) { + guarantee(false, "NYI"); + } + + void do_oop(oop* p) { + oop obj = *p; + const char* str = NULL; + const char* str2 = ""; + + if (!_g1h->is_in_g1_reserved(obj)) + str = "outside G1 reserved"; + else { + HeapRegion* hr = _g1h->heap_region_containing(obj); + guarantee( hr != NULL, "invariant" ); + if (hr->obj_allocated_since_prev_marking(obj)) { + str = "over TAMS"; + if (_bitmap->isMarked((HeapWord*) obj)) + str2 = " AND MARKED"; + } else if (_bitmap->isMarked((HeapWord*) obj)) + str = "marked"; + else + str = "#### NOT MARKED ####"; + } + + _out->print_cr(" "PTR_FORMAT" contains "PTR_FORMAT" %s%s", + p, (void*) obj, str, str2); + } +}; + +class ReachablePrinterClosure: public BitMapClosure { +private: + CMBitMapRO* _bitmap; + outputStream* _out; + +public: + ReachablePrinterClosure(CMBitMapRO* bitmap, outputStream* out) : + _bitmap(bitmap), _out(out) { } + + bool do_bit(size_t offset) { + HeapWord* addr = _bitmap->offsetToHeapWord(offset); + ReachablePrinterOopClosure oopCl(_bitmap, _out); + + _out->print_cr(" obj "PTR_FORMAT", offset %10d (marked)", addr, offset); + oop(addr)->oop_iterate(&oopCl); + _out->print_cr(""); + + return true; + } +}; + +class ObjInRegionReachablePrinterClosure : public ObjectClosure { +private: + CMBitMapRO* _bitmap; + outputStream* _out; + +public: + void do_object(oop o) { + ReachablePrinterOopClosure oopCl(_bitmap, _out); + + _out->print_cr(" obj "PTR_FORMAT" (over TAMS)", (void*) o); + o->oop_iterate(&oopCl); + _out->print_cr(""); + } + + ObjInRegionReachablePrinterClosure(CMBitMapRO* bitmap, outputStream* out) : + _bitmap(bitmap), _out(out) { } +}; + +class RegionReachablePrinterClosure : public HeapRegionClosure { +private: + CMBitMapRO* _bitmap; + outputStream* _out; + +public: + bool doHeapRegion(HeapRegion* hr) { + HeapWord* b = hr->bottom(); + HeapWord* e = hr->end(); + HeapWord* t = hr->top(); + HeapWord* p = hr->prev_top_at_mark_start(); + _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" " + "PTAMS: "PTR_FORMAT, b, e, t, p); + _out->print_cr(""); + + ObjInRegionReachablePrinterClosure ocl(_bitmap, _out); + hr->object_iterate_mem_careful(MemRegion(p, t), &ocl); + + return false; + } + + RegionReachablePrinterClosure(CMBitMapRO* bitmap, + outputStream* out) : + _bitmap(bitmap), _out(out) { } +}; + +void ConcurrentMark::print_prev_bitmap_reachable() { + outputStream* out = gclog_or_tty; + +#if SEND_HEAP_DUMP_TO_FILE + guarantee(heap_dump_file == NULL, "Protocol"); + char fn_buf[100]; + sprintf(fn_buf, "/tmp/dump.txt.%d", os::current_process_id()); + heap_dump_file = fopen(fn_buf, "w"); + fileStream fstream(heap_dump_file); + out = &fstream; +#endif // SEND_HEAP_DUMP_TO_FILE + + RegionReachablePrinterClosure rcl(_prevMarkBitMap, out); + out->print_cr("--- ITERATING OVER REGIONS WITH PTAMS < TOP"); + _g1h->heap_region_iterate(&rcl); + out->print_cr(""); + + ReachablePrinterClosure cl(_prevMarkBitMap, out); + out->print_cr("--- REACHABLE OBJECTS ON THE BITMAP"); + _prevMarkBitMap->iterate(&cl); + out->print_cr(""); + +#if SEND_HEAP_DUMP_TO_FILE + fclose(heap_dump_file); + heap_dump_file = NULL; +#endif // SEND_HEAP_DUMP_TO_FILE +} + +// This note is for drainAllSATBBuffers and the code in between. +// In the future we could reuse a task to do this work during an +// evacuation pause (since now tasks are not active and can be claimed +// during an evacuation pause). This was a late change to the code and +// is currently not being taken advantage of. + +class CMGlobalObjectClosure : public ObjectClosure { +private: + ConcurrentMark* _cm; + +public: + void do_object(oop obj) { + _cm->deal_with_reference(obj); + } + + CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { } +}; + +void ConcurrentMark::deal_with_reference(oop obj) { + if (verbose_high()) + gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT, + (void*) obj); + + + HeapWord* objAddr = (HeapWord*) obj; + if (_g1h->is_in_g1_reserved(objAddr)) { + tmp_guarantee_CM( obj != NULL, "is_in_g1_reserved should ensure this" ); + HeapRegion* hr = _g1h->heap_region_containing(obj); + if (_g1h->is_obj_ill(obj, hr)) { + if (verbose_high()) + gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered " + "marked", (void*) obj); + + // we need to mark it first + if (_nextMarkBitMap->parMark(objAddr)) { + // No OrderAccess:store_load() is needed. It is implicit in the + // CAS done in parMark(objAddr) above + HeapWord* finger = _finger; + if (objAddr < finger) { + if (verbose_high()) + gclog_or_tty->print_cr("[global] below the global finger " + "("PTR_FORMAT"), pushing it", finger); + if (!mark_stack_push(obj)) { + if (verbose_low()) + gclog_or_tty->print_cr("[global] global stack overflow during " + "deal_with_reference"); + } + } + } + } + } +} + +void ConcurrentMark::drainAllSATBBuffers() { + CMGlobalObjectClosure oc(this); + SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); + satb_mq_set.set_closure(&oc); + + while (satb_mq_set.apply_closure_to_completed_buffer()) { + if (verbose_medium()) + gclog_or_tty->print_cr("[global] processed an SATB buffer"); + } + + // no need to check whether we should do this, as this is only + // called during an evacuation pause + satb_mq_set.iterate_closure_all_threads(); + + satb_mq_set.set_closure(NULL); + guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" ); +} + +void ConcurrentMark::markPrev(oop p) { + // Note we are overriding the read-only view of the prev map here, via + // the cast. + ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p); +} + +void ConcurrentMark::clear(oop p) { + assert(p != NULL && p->is_oop(), "expected an oop"); + HeapWord* addr = (HeapWord*)p; + assert(addr >= _nextMarkBitMap->startWord() || + addr < _nextMarkBitMap->endWord(), "in a region"); + + _nextMarkBitMap->clear(addr); +} + +void ConcurrentMark::clearRangeBothMaps(MemRegion mr) { + // Note we are overriding the read-only view of the prev map here, via + // the cast. + ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); + _nextMarkBitMap->clearRange(mr); +} + +HeapRegion* +ConcurrentMark::claim_region(int task_num) { + // "checkpoint" the finger + HeapWord* finger = _finger; + + // _heap_end will not change underneath our feet; it only changes at + // yield points. + while (finger < _heap_end) { + tmp_guarantee_CM( _g1h->is_in_g1_reserved(finger), "invariant" ); + + // is the gap between reading the finger and doing the CAS too long? + + HeapRegion* curr_region = _g1h->heap_region_containing(finger); + HeapWord* bottom = curr_region->bottom(); + HeapWord* end = curr_region->end(); + HeapWord* limit = curr_region->next_top_at_mark_start(); + + if (verbose_low()) + gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" " + "["PTR_FORMAT", "PTR_FORMAT"), " + "limit = "PTR_FORMAT, + task_num, curr_region, bottom, end, limit); + + HeapWord* res = + (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); + if (res == finger) { + // we succeeded + + // notice that _finger == end cannot be guaranteed here since, + // someone else might have moved the finger even further + guarantee( _finger >= end, "the finger should have moved forward" ); + + if (verbose_low()) + gclog_or_tty->print_cr("[%d] we were successful with region = " + PTR_FORMAT, task_num, curr_region); + + if (limit > bottom) { + if (verbose_low()) + gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, " + "returning it ", task_num, curr_region); + return curr_region; + } else { + tmp_guarantee_CM( limit == bottom, + "the region limit should be at bottom" ); + if (verbose_low()) + gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, " + "returning NULL", task_num, curr_region); + // we return NULL and the caller should try calling + // claim_region() again. + return NULL; + } + } else { + guarantee( _finger > finger, "the finger should have moved forward" ); + if (verbose_low()) + gclog_or_tty->print_cr("[%d] somebody else moved the finger, " + "global finger = "PTR_FORMAT", " + "our finger = "PTR_FORMAT, + task_num, _finger, finger); + + // read it again + finger = _finger; + } + } + + return NULL; +} + +void ConcurrentMark::oops_do(OopClosure* cl) { + if (_markStack.size() > 0 && verbose_low()) + gclog_or_tty->print_cr("[global] scanning the global marking stack, " + "size = %d", _markStack.size()); + // we first iterate over the contents of the mark stack... + _markStack.oops_do(cl); + + for (int i = 0; i < (int)_max_task_num; ++i) { + OopTaskQueue* queue = _task_queues->queue((int)i); + + if (queue->size() > 0 && verbose_low()) + gclog_or_tty->print_cr("[global] scanning task queue of task %d, " + "size = %d", i, queue->size()); + + // ...then over the contents of the all the task queues. + queue->oops_do(cl); + } + + // finally, invalidate any entries that in the region stack that + // point into the collection set + if (_regionStack.invalidate_entries_into_cset()) { + // otherwise, any gray objects copied during the evacuation pause + // might not be visited. + guarantee( _should_gray_objects, "invariant" ); + } +} + +void ConcurrentMark::clear_marking_state() { + _markStack.setEmpty(); + _markStack.clear_overflow(); + _regionStack.setEmpty(); + _regionStack.clear_overflow(); + clear_has_overflown(); + _finger = _heap_start; + + for (int i = 0; i < (int)_max_task_num; ++i) { + OopTaskQueue* queue = _task_queues->queue(i); + queue->set_empty(); + } +} + +void ConcurrentMark::print_stats() { + if (verbose_stats()) { + gclog_or_tty->print_cr("---------------------------------------------------------------------"); + for (size_t i = 0; i < _active_tasks; ++i) { + _tasks[i]->print_stats(); + gclog_or_tty->print_cr("---------------------------------------------------------------------"); + } + } +} + +class CSMarkOopClosure: public OopClosure { + friend class CSMarkBitMapClosure; + + G1CollectedHeap* _g1h; + CMBitMap* _bm; + ConcurrentMark* _cm; + oop* _ms; + jint* _array_ind_stack; + int _ms_size; + int _ms_ind; + int _array_increment; + + bool push(oop obj, int arr_ind = 0) { + if (_ms_ind == _ms_size) { + gclog_or_tty->print_cr("Mark stack is full."); + return false; + } + _ms[_ms_ind] = obj; + if (obj->is_objArray()) _array_ind_stack[_ms_ind] = arr_ind; + _ms_ind++; + return true; + } + + oop pop() { + if (_ms_ind == 0) return NULL; + else { + _ms_ind--; + return _ms[_ms_ind]; + } + } + + bool drain() { + while (_ms_ind > 0) { + oop obj = pop(); + assert(obj != NULL, "Since index was non-zero."); + if (obj->is_objArray()) { + jint arr_ind = _array_ind_stack[_ms_ind]; + objArrayOop aobj = objArrayOop(obj); + jint len = aobj->length(); + jint next_arr_ind = arr_ind + _array_increment; + if (next_arr_ind < len) { + push(obj, next_arr_ind); + } + // Now process this portion of this one. + int lim = MIN2(next_arr_ind, len); + assert(!UseCompressedOops, "This needs to be fixed"); + for (int j = arr_ind; j < lim; j++) { + do_oop(aobj->obj_at_addr<oop>(j)); + } + + } else { + obj->oop_iterate(this); + } + if (abort()) return false; + } + return true; + } + +public: + CSMarkOopClosure(ConcurrentMark* cm, int ms_size) : + _g1h(G1CollectedHeap::heap()), + _cm(cm), + _bm(cm->nextMarkBitMap()), + _ms_size(ms_size), _ms_ind(0), + _ms(NEW_C_HEAP_ARRAY(oop, ms_size)), + _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)), + _array_increment(MAX2(ms_size/8, 16)) + {} + + ~CSMarkOopClosure() { + FREE_C_HEAP_ARRAY(oop, _ms); + FREE_C_HEAP_ARRAY(jint, _array_ind_stack); + } + + void do_oop(narrowOop* p) { + guarantee(false, "NYI"); + } + + void do_oop(oop* p) { + oop obj = *p; + if (obj == NULL) return; + if (obj->is_forwarded()) { + // If the object has already been forwarded, we have to make sure + // that it's marked. So follow the forwarding pointer. Note that + // this does the right thing for self-forwarding pointers in the + // evacuation failure case. + obj = obj->forwardee(); + } + HeapRegion* hr = _g1h->heap_region_containing(obj); + if (hr != NULL) { + if (hr->in_collection_set()) { + if (_g1h->is_obj_ill(obj)) { + _bm->mark((HeapWord*)obj); + if (!push(obj)) { + gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed."); + set_abort(); + } + } + } else { + // Outside the collection set; we need to gray it + _cm->deal_with_reference(obj); + } + } + } +}; + +class CSMarkBitMapClosure: public BitMapClosure { + G1CollectedHeap* _g1h; + CMBitMap* _bitMap; + ConcurrentMark* _cm; + CSMarkOopClosure _oop_cl; +public: + CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) : + _g1h(G1CollectedHeap::heap()), + _bitMap(cm->nextMarkBitMap()), + _oop_cl(cm, ms_size) + {} + + ~CSMarkBitMapClosure() {} + + bool do_bit(size_t offset) { + // convert offset into a HeapWord* + HeapWord* addr = _bitMap->offsetToHeapWord(offset); + assert(_bitMap->endWord() && addr < _bitMap->endWord(), + "address out of range"); + assert(_bitMap->isMarked(addr), "tautology"); + oop obj = oop(addr); + if (!obj->is_forwarded()) { + if (!_oop_cl.push(obj)) return false; + if (!_oop_cl.drain()) return false; + } + // Otherwise... + return true; + } +}; + + +class CompleteMarkingInCSHRClosure: public HeapRegionClosure { + CMBitMap* _bm; + CSMarkBitMapClosure _bit_cl; + enum SomePrivateConstants { + MSSize = 1000 + }; + bool _completed; +public: + CompleteMarkingInCSHRClosure(ConcurrentMark* cm) : + _bm(cm->nextMarkBitMap()), + _bit_cl(cm, MSSize), + _completed(true) + {} + + ~CompleteMarkingInCSHRClosure() {} + + bool doHeapRegion(HeapRegion* r) { + if (!r->evacuation_failed()) { + MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start()); + if (!mr.is_empty()) { + if (!_bm->iterate(&_bit_cl, mr)) { + _completed = false; + return true; + } + } + } + return false; + } + + bool completed() { return _completed; } +}; + +class ClearMarksInHRClosure: public HeapRegionClosure { + CMBitMap* _bm; +public: + ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { } + + bool doHeapRegion(HeapRegion* r) { + if (!r->used_region().is_empty() && !r->evacuation_failed()) { + MemRegion usedMR = r->used_region(); + _bm->clearRange(r->used_region()); + } + return false; + } +}; + +void ConcurrentMark::complete_marking_in_collection_set() { + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + + if (!g1h->mark_in_progress()) { + g1h->g1_policy()->record_mark_closure_time(0.0); + return; + } + + int i = 1; + double start = os::elapsedTime(); + while (true) { + i++; + CompleteMarkingInCSHRClosure cmplt(this); + g1h->collection_set_iterate(&cmplt); + if (cmplt.completed()) break; + } + double end_time = os::elapsedTime(); + double elapsed_time_ms = (end_time - start) * 1000.0; + g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms); + if (PrintGCDetails) { + gclog_or_tty->print_cr("Mark closure took %5.2f ms.", elapsed_time_ms); + } + + ClearMarksInHRClosure clr(nextMarkBitMap()); + g1h->collection_set_iterate(&clr); +} + +// The next two methods deal with the following optimisation. Some +// objects are gray by being marked and located above the finger. If +// they are copied, during an evacuation pause, below the finger then +// the need to be pushed on the stack. The observation is that, if +// there are no regions in the collection set located above the +// finger, then the above cannot happen, hence we do not need to +// explicitly gray any objects when copying them to below the +// finger. The global stack will be scanned to ensure that, if it +// points to objects being copied, it will update their +// location. There is a tricky situation with the gray objects in +// region stack that are being coped, however. See the comment in +// newCSet(). + +void ConcurrentMark::newCSet() { + if (!concurrent_marking_in_progress()) + // nothing to do if marking is not in progress + return; + + // find what the lowest finger is among the global and local fingers + _min_finger = _finger; + for (int i = 0; i < (int)_max_task_num; ++i) { + CMTask* task = _tasks[i]; + HeapWord* task_finger = task->finger(); + if (task_finger != NULL && task_finger < _min_finger) + _min_finger = task_finger; + } + + _should_gray_objects = false; + + // This fixes a very subtle and fustrating bug. It might be the case + // that, during en evacuation pause, heap regions that contain + // objects that are gray (by being in regions contained in the + // region stack) are included in the collection set. Since such gray + // objects will be moved, and because it's not easy to redirect + // region stack entries to point to a new location (because objects + // in one region might be scattered to multiple regions after they + // are copied), one option is to ensure that all marked objects + // copied during a pause are pushed on the stack. Notice, however, + // that this problem can only happen when the region stack is not + // empty during an evacuation pause. So, we make the fix a bit less + // conservative and ensure that regions are pushed on the stack, + // irrespective whether all collection set regions are below the + // finger, if the region stack is not empty. This is expected to be + // a rare case, so I don't think it's necessary to be smarted about it. + if (!region_stack_empty()) + _should_gray_objects = true; +} + +void ConcurrentMark::registerCSetRegion(HeapRegion* hr) { + if (!concurrent_marking_in_progress()) + return; + + HeapWord* region_end = hr->end(); + if (region_end > _min_finger) + _should_gray_objects = true; +} + +void ConcurrentMark::disable_co_trackers() { + if (has_aborted()) { + if (_cleanup_co_tracker.enabled()) + _cleanup_co_tracker.disable(); + for (int i = 0; i < (int)_max_task_num; ++i) { + CMTask* task = _tasks[i]; + if (task->co_tracker_enabled()) + task->disable_co_tracker(); + } + } else { + guarantee( !_cleanup_co_tracker.enabled(), "invariant" ); + for (int i = 0; i < (int)_max_task_num; ++i) { + CMTask* task = _tasks[i]; + guarantee( !task->co_tracker_enabled(), "invariant" ); + } + } +} + +// abandon current marking iteration due to a Full GC +void ConcurrentMark::abort() { + // If we're not marking, nothing to do. + if (!G1ConcMark) return; + + // Clear all marks to force marking thread to do nothing + _nextMarkBitMap->clearAll(); + // Empty mark stack + clear_marking_state(); + for (int i = 0; i < (int)_max_task_num; ++i) + _tasks[i]->clear_region_fields(); + _has_aborted = true; + + SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); + satb_mq_set.abandon_partial_marking(); + satb_mq_set.set_active_all_threads(false); +} + +static void print_ms_time_info(const char* prefix, const char* name, + NumberSeq& ns) { + gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", + prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); + if (ns.num() > 0) { + gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]", + prefix, ns.sd(), ns.maximum()); + } +} + +void ConcurrentMark::print_summary_info() { + gclog_or_tty->print_cr(" Concurrent marking:"); + print_ms_time_info(" ", "init marks", _init_times); + print_ms_time_info(" ", "remarks", _remark_times); + { + print_ms_time_info(" ", "final marks", _remark_mark_times); + print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); + + } + print_ms_time_info(" ", "cleanups", _cleanup_times); + gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).", + _total_counting_time, + (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / + (double)_cleanup_times.num() + : 0.0)); + if (G1ScrubRemSets) { + gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).", + _total_rs_scrub_time, + (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / + (double)_cleanup_times.num() + : 0.0)); + } + gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.", + (_init_times.sum() + _remark_times.sum() + + _cleanup_times.sum())/1000.0); + gclog_or_tty->print_cr(" Total concurrent time = %8.2f s " + "(%8.2f s marking, %8.2f s counting).", + cmThread()->vtime_accum(), + cmThread()->vtime_mark_accum(), + cmThread()->vtime_count_accum()); +} + +// Closures +// XXX: there seems to be a lot of code duplication here; +// should refactor and consolidate the shared code. + +// This closure is used to mark refs into the CMS generation in +// the CMS bit map. Called at the first checkpoint. + +// We take a break if someone is trying to stop the world. +bool ConcurrentMark::do_yield_check(int worker_i) { + if (should_yield()) { + if (worker_i == 0) + _g1h->g1_policy()->record_concurrent_pause(); + cmThread()->yield(); + if (worker_i == 0) + _g1h->g1_policy()->record_concurrent_pause_end(); + return true; + } else { + return false; + } +} + +bool ConcurrentMark::should_yield() { + return cmThread()->should_yield(); +} + +bool ConcurrentMark::containing_card_is_marked(void* p) { + size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1); + return _card_bm.at(offset >> CardTableModRefBS::card_shift); +} + +bool ConcurrentMark::containing_cards_are_marked(void* start, + void* last) { + return + containing_card_is_marked(start) && + containing_card_is_marked(last); +} + +#ifndef PRODUCT +// for debugging purposes +void ConcurrentMark::print_finger() { + gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT, + _heap_start, _heap_end, _finger); + for (int i = 0; i < (int) _max_task_num; ++i) { + gclog_or_tty->print(" %d: "PTR_FORMAT, i, _tasks[i]->finger()); + } + gclog_or_tty->print_cr(""); +} +#endif + +// Closure for iteration over bitmaps +class CMBitMapClosure : public BitMapClosure { +private: + // the bitmap that is being iterated over + CMBitMap* _nextMarkBitMap; + ConcurrentMark* _cm; + CMTask* _task; + // true if we're scanning a heap region claimed by the task (so that + // we move the finger along), false if we're not, i.e. currently when + // scanning a heap region popped from the region stack (so that we + // do not move the task finger along; it'd be a mistake if we did so). + bool _scanning_heap_region; + +public: + CMBitMapClosure(CMTask *task, + ConcurrentMark* cm, + CMBitMap* nextMarkBitMap) + : _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } + + void set_scanning_heap_region(bool scanning_heap_region) { + _scanning_heap_region = scanning_heap_region; + } + + bool do_bit(size_t offset) {