OpenJDK / jdk / jdk
changeset 57787:e72e86d5a090
Merge
author | prr |
---|---|
date | Tue, 14 Jan 2020 15:23:01 -0800 |
parents | b681239b7258 d8a27d799478 |
children | f446d8919043 |
files | src/java.base/share/classes/sun/net/idn/Punycode.java src/java.base/share/classes/sun/net/idn/StringPrep.java src/java.base/share/classes/sun/net/idn/StringPrepDataReader.java src/java.base/share/classes/sun/net/idn/UCharacterDirection.java src/java.base/share/classes/sun/net/idn/UCharacterEnums.java src/java.base/share/classes/sun/text/bidi/BidiBase.java src/java.base/share/classes/sun/text/bidi/BidiLine.java src/java.base/share/classes/sun/text/bidi/BidiRun.java src/java.base/share/classes/sun/text/bidi/BidiWriter.java src/java.base/share/classes/sun/text/normalizer/BMPSet.java src/java.base/share/classes/sun/text/normalizer/CharTrie.java src/java.base/share/classes/sun/text/normalizer/CharacterIteratorWrapper.java src/java.base/share/classes/sun/text/normalizer/CodePointMap.java src/java.base/share/classes/sun/text/normalizer/CodePointTrie.java src/java.base/share/classes/sun/text/normalizer/FilteredNormalizer2.java src/java.base/share/classes/sun/text/normalizer/ICUBinary.java src/java.base/share/classes/sun/text/normalizer/Norm2AllModes.java src/java.base/share/classes/sun/text/normalizer/Normalizer2.java src/java.base/share/classes/sun/text/normalizer/NormalizerBase.java src/java.base/share/classes/sun/text/normalizer/NormalizerImpl.java src/java.base/share/classes/sun/text/normalizer/OutputInt.java src/java.base/share/classes/sun/text/normalizer/Replaceable.java src/java.base/share/classes/sun/text/normalizer/ReplaceableString.java src/java.base/share/classes/sun/text/normalizer/ReplaceableUCharacterIterator.java src/java.base/share/classes/sun/text/normalizer/Trie.java src/java.base/share/classes/sun/text/normalizer/Trie2.java src/java.base/share/classes/sun/text/normalizer/Trie2_16.java src/java.base/share/classes/sun/text/normalizer/UBiDiProps.java src/java.base/share/classes/sun/text/normalizer/UCharacter.java src/java.base/share/classes/sun/text/normalizer/UCharacterIterator.java src/java.base/share/classes/sun/text/normalizer/UCharacterProperty.java src/java.base/share/classes/sun/text/normalizer/UTF16.java src/java.base/share/classes/sun/text/normalizer/UnicodeSet.java src/java.base/share/classes/sun/text/normalizer/UnicodeSetStringSpan.java src/java.base/share/classes/sun/text/normalizer/Utility.java src/java.base/share/classes/sun/text/normalizer/VersionInfo.java src/java.base/share/classes/sun/text/resources/nfc.nrm src/java.base/share/classes/sun/text/resources/nfkc.nrm src/java.base/share/classes/sun/text/resources/ubidi.icu src/java.base/share/classes/sun/text/resources/uprops.icu test/jdk/jdk/jfr/api/consumer/recordingstream/TestSetFlushInterval.java test/jdk/jdk/jfr/api/recording/time/TestSetFlushInterval.java |
diffstat | 332 files changed, 24698 insertions(+), 24428 deletions(-) [+] |
line wrap: on
line diff
--- a/.hgtags Tue Jan 14 15:19:49 2020 -0800 +++ b/.hgtags Tue Jan 14 15:23:01 2020 -0800 @@ -608,4 +608,5 @@ d05fcdf25717d85e80a3a39a6b719458b22be5fe jdk-15+3 d54ce919da90dab361995bb4d87be9851f00537a jdk-14+30 bb0a7975b31ded63d594ee8dbfc4d4ead587f79b jdk-15+4 +decd3d2953b640f1043ee76953ff89238bff92e8 jdk-14+31 b97c1773ccafae4a8c16cc6aedb10b2a4f9a07ed jdk-15+5
--- a/make/autoconf/jdk-options.m4 Tue Jan 14 15:19:49 2020 -0800 +++ b/make/autoconf/jdk-options.m4 Tue Jan 14 15:23:01 2020 -0800 @@ -1,5 +1,5 @@ # -# Copyright (c) 2011, 2019, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2011, 2020, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it @@ -302,15 +302,21 @@ if test "x$withval" = xexternal || test "x$withval" = xzipped; then AC_MSG_ERROR([AIX only supports the parameters 'none' and 'internal' for --with-native-debug-symbols]) fi + else + if test "x$OPENJDK_TARGET_OS" = xwindows; then + if test "x$withval" = xinternal; then + AC_MSG_ERROR([Windows does not support the parameter 'internal' for --with-native-debug-symbols]) + fi + fi fi ], [ - if test "x$OPENJDK_TARGET_OS" = xaix; then - # AIX doesn't support 'external' so use 'internal' as default - with_native_debug_symbols="internal" + if test "x$STATIC_BUILD" = xtrue; then + with_native_debug_symbols="none" else - if test "x$STATIC_BUILD" = xtrue; then - with_native_debug_symbols="none" + if test "x$OPENJDK_TARGET_OS" = xaix; then + # AIX doesn't support 'external' so use 'internal' as default + with_native_debug_symbols="internal" else with_native_debug_symbols="external" fi @@ -319,20 +325,7 @@ NATIVE_DEBUG_SYMBOLS=$with_native_debug_symbols AC_MSG_RESULT([$NATIVE_DEBUG_SYMBOLS]) - if test "x$NATIVE_DEBUG_SYMBOLS" = xzipped; then - - if test "x$OPENJDK_TARGET_OS" = xsolaris || test "x$OPENJDK_TARGET_OS" = xlinux; then - if test "x$OBJCOPY" = x; then - # enabling of enable-debug-symbols and can't find objcopy - # this is an error - AC_MSG_ERROR([Unable to find objcopy, cannot enable native debug symbols]) - fi - fi - - COMPILE_WITH_DEBUG_SYMBOLS=true - COPY_DEBUG_SYMBOLS=true - ZIP_EXTERNAL_DEBUG_SYMBOLS=true - elif test "x$NATIVE_DEBUG_SYMBOLS" = xnone; then + if test "x$NATIVE_DEBUG_SYMBOLS" = xnone; then COMPILE_WITH_DEBUG_SYMBOLS=false COPY_DEBUG_SYMBOLS=false ZIP_EXTERNAL_DEBUG_SYMBOLS=false @@ -353,6 +346,19 @@ COMPILE_WITH_DEBUG_SYMBOLS=true COPY_DEBUG_SYMBOLS=true ZIP_EXTERNAL_DEBUG_SYMBOLS=false + elif test "x$NATIVE_DEBUG_SYMBOLS" = xzipped; then + + if test "x$OPENJDK_TARGET_OS" = xsolaris || test "x$OPENJDK_TARGET_OS" = xlinux; then + if test "x$OBJCOPY" = x; then + # enabling of enable-debug-symbols and can't find objcopy + # this is an error + AC_MSG_ERROR([Unable to find objcopy, cannot enable native debug symbols]) + fi + fi + + COMPILE_WITH_DEBUG_SYMBOLS=true + COPY_DEBUG_SYMBOLS=true + ZIP_EXTERNAL_DEBUG_SYMBOLS=true else AC_MSG_ERROR([Allowed native debug symbols are: none, internal, external, zipped]) fi
--- a/make/autoconf/toolchain_windows.m4 Tue Jan 14 15:19:49 2020 -0800 +++ b/make/autoconf/toolchain_windows.m4 Tue Jan 14 15:23:01 2020 -0800 @@ -209,8 +209,8 @@ eval SDK_INSTALL_DIR="\${VS_SDK_INSTALLDIR_${VS_VERSION}}" eval VS_ENV_ARGS="\${VS_ENV_ARGS_${VS_VERSION}}" eval VS_TOOLSET_SUPPORTED="\${VS_TOOLSET_SUPPORTED_${VS_VERSION}}" - - VS_ENV_CMD="" + + VS_ENV_CMD="" # When using --with-tools-dir, assume it points to the correct and default # version of Visual Studio or that --with-toolchain-version was also set.
--- a/make/conf/jib-profiles.js Tue Jan 14 15:19:49 2020 -0800 +++ b/make/conf/jib-profiles.js Tue Jan 14 15:23:01 2020 -0800 @@ -1066,7 +1066,7 @@ // build_number: "b07", // file: "bundles/jcov-3_0.zip", organization: common.organization, - revision: "3.0-58-support+1.0", + revision: "3.0-59-support+1.0", ext: "zip", environment_name: "JCOV_HOME", },
--- a/make/hotspot/test/GtestImage.gmk Tue Jan 14 15:19:49 2020 -0800 +++ b/make/hotspot/test/GtestImage.gmk Tue Jan 14 15:23:01 2020 -0800 @@ -1,5 +1,5 @@ # -# Copyright (c) 2016, 2019, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2016, 2020, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it @@ -45,12 +45,14 @@ FLATTEN := true, \ )) \ $(eval TARGETS += $$(COPY_GTEST_MSVCR_$v)) \ - $(eval $(call SetupCopyFiles, COPY_GTEST_PDB_$v, \ - SRC := $(HOTSPOT_OUTPUTDIR)/variant-$v/libjvm/gtest, \ - DEST := $(TEST_IMAGE_DIR)/hotspot/gtest/$v, \ - FILES := jvm.pdb gtestLauncher.pdb, \ - )) \ - $(eval TARGETS += $$(COPY_GTEST_PDB_$v)) \ + $(if $(call equals, $(COPY_DEBUG_SYMBOLS), true), \ + $(eval $(call SetupCopyFiles, COPY_GTEST_PDB_$v, \ + SRC := $(HOTSPOT_OUTPUTDIR)/variant-$v/libjvm/gtest, \ + DEST := $(TEST_IMAGE_DIR)/hotspot/gtest/$v, \ + FILES := jvm.pdb gtestLauncher.pdb, \ + )) \ + $(eval TARGETS += $$(COPY_GTEST_PDB_$v)) \ + ) \ ) endif
--- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp Tue Jan 14 15:23:01 2020 -0800 @@ -35,8 +35,6 @@ #include "ci/ciArrayKlass.hpp" #include "ci/ciInstance.hpp" #include "code/compiledIC.hpp" -#include "gc/shared/barrierSet.hpp" -#include "gc/shared/cardTableBarrierSet.hpp" #include "gc/shared/collectedHeap.hpp" #include "nativeInst_aarch64.hpp" #include "oops/objArrayKlass.hpp"
--- a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp Tue Jan 14 15:23:01 2020 -0800 @@ -31,8 +31,6 @@ #include "c1/c1_ValueStack.hpp" #include "ci/ciArrayKlass.hpp" #include "ci/ciInstance.hpp" -#include "gc/shared/barrierSet.hpp" -#include "gc/shared/cardTableBarrierSet.hpp" #include "gc/shared/collectedHeap.hpp" #include "memory/universe.hpp" #include "nativeInst_arm.hpp"
--- a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp Tue Jan 14 15:23:01 2020 -0800 @@ -33,8 +33,6 @@ #include "ci/ciArrayKlass.hpp" #include "ci/ciInstance.hpp" #include "gc/shared/collectedHeap.hpp" -#include "gc/shared/barrierSet.hpp" -#include "gc/shared/cardTableBarrierSet.hpp" #include "memory/universe.hpp" #include "nativeInst_ppc.hpp" #include "oops/compressedOops.hpp"
--- a/src/hotspot/cpu/s390/c1_CodeStubs_s390.cpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/cpu/s390/c1_CodeStubs_s390.cpp Tue Jan 14 15:23:01 2020 -0800 @@ -294,7 +294,7 @@ void PatchingStub::emit_code(LIR_Assembler* ce) { // Copy original code here. assert(NativeGeneralJump::instruction_size <= _bytes_to_copy && _bytes_to_copy <= 0xFF, - "not enough room for call"); + "not enough room for call, need %d", _bytes_to_copy); NearLabel call_patch; @@ -331,7 +331,7 @@ } #endif } else { - // Make a copy the code which is going to be patched. + // Make a copy of the code which is going to be patched. for (int i = 0; i < _bytes_to_copy; i++) { address ptr = (address)(_pc_start + i); int a_byte = (*ptr) & 0xFF;
--- a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp Tue Jan 14 15:23:01 2020 -0800 @@ -33,8 +33,6 @@ #include "ci/ciArrayKlass.hpp" #include "ci/ciInstance.hpp" #include "gc/shared/collectedHeap.hpp" -#include "gc/shared/barrierSet.hpp" -#include "gc/shared/cardTableBarrierSet.hpp" #include "memory/universe.hpp" #include "nativeInst_s390.hpp" #include "oops/objArrayKlass.hpp" @@ -897,7 +895,7 @@ bool needs_patching = (patch_code != lir_patch_none); if (addr->base()->type() == T_OBJECT) { - __ verify_oop(src); + __ verify_oop(src, FILE_AND_LINE); } PatchingStub* patch = NULL; @@ -972,7 +970,7 @@ } else { __ z_lg(dest->as_register(), disp_value, disp_reg, src); } - __ verify_oop(dest->as_register()); + __ verify_oop(dest->as_register(), FILE_AND_LINE); break; } case T_FLOAT: @@ -1006,7 +1004,7 @@ if (dest->is_single_cpu()) { if (is_reference_type(type)) { __ mem2reg_opt(dest->as_register(), frame_map()->address_for_slot(src->single_stack_ix()), true); - __ verify_oop(dest->as_register()); + __ verify_oop(dest->as_register(), FILE_AND_LINE); } else if (type == T_METADATA || type == T_ADDRESS) { __ mem2reg_opt(dest->as_register(), frame_map()->address_for_slot(src->single_stack_ix()), true); } else { @@ -1033,7 +1031,7 @@ if (src->is_single_cpu()) { const Address dst = frame_map()->address_for_slot(dest->single_stack_ix()); if (is_reference_type(type)) { - __ verify_oop(src->as_register()); + __ verify_oop(src->as_register(), FILE_AND_LINE); __ reg2mem_opt(src->as_register(), dst, true); } else if (type == T_METADATA || type == T_ADDRESS) { __ reg2mem_opt(src->as_register(), dst, true); @@ -1079,7 +1077,7 @@ ShouldNotReachHere(); } if (is_reference_type(to_reg->type())) { - __ verify_oop(to_reg->as_register()); + __ verify_oop(to_reg->as_register(), FILE_AND_LINE); } } @@ -1095,7 +1093,7 @@ bool needs_patching = (patch_code != lir_patch_none); if (addr->base()->is_oop_register()) { - __ verify_oop(dest); + __ verify_oop(dest, FILE_AND_LINE); } PatchingStub* patch = NULL; @@ -1130,7 +1128,7 @@ assert(disp_reg != Z_R0 || Immediate::is_simm20(disp_value), "should have set this up"); if (is_reference_type(type)) { - __ verify_oop(from->as_register()); + __ verify_oop(from->as_register(), FILE_AND_LINE); } bool short_disp = Immediate::is_uimm12(disp_value); @@ -2412,7 +2410,7 @@ op->klass()->as_register(), *op->stub()->entry()); __ bind(*op->stub()->continuation()); - __ verify_oop(op->obj()->as_register()); + __ verify_oop(op->obj()->as_register(), FILE_AND_LINE); } void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) { @@ -2548,7 +2546,7 @@ } assert(obj != k_RInfo, "must be different"); - __ verify_oop(obj); + __ verify_oop(obj, FILE_AND_LINE); // Get object class. // Not a safepoint as obj null check happens earlier. @@ -3009,7 +3007,7 @@ assert(do_null || do_update, "why are we here?"); assert(!TypeEntries::was_null_seen(current_klass) || do_update, "why are we here?"); - __ verify_oop(obj); + __ verify_oop(obj, FILE_AND_LINE); if (do_null || tmp1 != obj DEBUG_ONLY(|| true)) { __ z_ltgr(tmp1, obj);
--- a/src/hotspot/cpu/s390/c1_MacroAssembler_s390.cpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/cpu/s390/c1_MacroAssembler_s390.cpp Tue Jan 14 15:23:01 2020 -0800 @@ -40,7 +40,7 @@ void C1_MacroAssembler::inline_cache_check(Register receiver, Register iCache) { Label ic_miss, ic_hit; - verify_oop(receiver); + verify_oop(receiver, FILE_AND_LINE); int klass_offset = oopDesc::klass_offset_in_bytes(); if (!ImplicitNullChecks || MacroAssembler::needs_explicit_null_check(klass_offset)) { @@ -83,7 +83,7 @@ assert_different_registers(hdr, obj, disp_hdr); NearLabel done; - verify_oop(obj); + verify_oop(obj, FILE_AND_LINE); // Load object header. z_lg(hdr, Address(obj, hdr_offset)); @@ -158,7 +158,7 @@ // Load object. z_lg(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); } - verify_oop(obj); + verify_oop(obj, FILE_AND_LINE); // Test if object header is pointing to the displaced header, and if so, restore // the displaced header in the object. If the object header is not pointing to // the displaced header, get the object header instead. @@ -278,7 +278,7 @@ // call(RuntimeAddress(Runtime1::entry_for (Runtime1::dtrace_object_alloc_id))); // } - verify_oop(obj); + verify_oop(obj, FILE_AND_LINE); } void C1_MacroAssembler::allocate_array( @@ -336,16 +336,15 @@ // call(RuntimeAddress(Runtime1::entry_for (Runtime1::dtrace_object_alloc_id))); // } - verify_oop(obj); + verify_oop(obj, FILE_AND_LINE); } #ifndef PRODUCT void C1_MacroAssembler::verify_stack_oop(int stack_offset) { - Unimplemented(); - // if (!VerifyOops) return; - // verify_oop_addr(Address(SP, stack_offset + STACK_BIAS)); + if (!VerifyOops) return; + verify_oop_addr(Address(Z_SP, stack_offset), FILE_AND_LINE); } void C1_MacroAssembler::verify_not_null_oop(Register r) { @@ -354,7 +353,7 @@ compareU64_and_branch(r, (intptr_t)0, bcondNotEqual, not_null); stop("non-null oop required"); bind(not_null); - verify_oop(r); + verify_oop(r, FILE_AND_LINE); } void C1_MacroAssembler::invalidate_registers(Register preserve1,
--- a/src/hotspot/cpu/s390/c1_Runtime1_s390.cpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/cpu/s390/c1_Runtime1_s390.cpp Tue Jan 14 15:23:01 2020 -0800 @@ -339,7 +339,7 @@ oop_maps->add_gc_map(call_offset, map); restore_live_registers_except_r2(sasm); - __ verify_oop(obj); + __ verify_oop(obj, FILE_AND_LINE); __ z_br(Z_R14); } break; @@ -405,7 +405,7 @@ oop_maps->add_gc_map(call_offset, map); restore_live_registers_except_r2(sasm); - __ verify_oop(obj); + __ verify_oop(obj, FILE_AND_LINE); __ z_br(Z_R14); } break; @@ -423,7 +423,7 @@ restore_live_registers_except_r2(sasm); // Z_R2,: new multi array - __ verify_oop(Z_R2); + __ verify_oop(Z_R2, FILE_AND_LINE); __ z_br(Z_R14); } break;
--- a/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.cpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.cpp Tue Jan 14 15:23:01 2020 -0800 @@ -400,11 +400,11 @@ __ z_tmll(tmp1, JNIHandles::weak_tag_mask); // Test for jweak tag. __ z_braz(Lnot_weak); - __ verify_oop(value); + __ verify_oop(value, FILE_AND_LINE); DecoratorSet decorators = IN_NATIVE | ON_PHANTOM_OOP_REF; g1_write_barrier_pre(masm, decorators, (const Address*)NULL, value, noreg, tmp1, tmp2, true); __ bind(Lnot_weak); - __ verify_oop(value); + __ verify_oop(value, FILE_AND_LINE); __ bind(Ldone); }
--- a/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.cpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.cpp Tue Jan 14 15:23:01 2020 -0800 @@ -108,7 +108,7 @@ __ z_nill(value, ~JNIHandles::weak_tag_mask); __ z_lg(value, 0, value); // Resolve (untagged) jobject. - __ verify_oop(value); + __ verify_oop(value, FILE_AND_LINE); __ bind(Ldone); }
--- a/src/hotspot/cpu/s390/interp_masm_s390.cpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/cpu/s390/interp_masm_s390.cpp Tue Jan 14 15:23:01 2020 -0800 @@ -1664,7 +1664,7 @@ compareU64_and_branch(obj, (intptr_t)0, Assembler::bcondEqual, null_seen); } - verify_oop(obj); + MacroAssembler::verify_oop(obj, FILE_AND_LINE); load_klass(klass, obj); // Klass seen before, nothing to do (regardless of unknown bit). @@ -2073,7 +2073,7 @@ } void InterpreterMacroAssembler::verify_oop(Register reg, TosState state) { - if (state == atos) { MacroAssembler::verify_oop(reg); } + if (state == atos) { MacroAssembler::verify_oop(reg, FILE_AND_LINE); } } // Inline assembly for:
--- a/src/hotspot/cpu/s390/macroAssembler_s390.cpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/cpu/s390/macroAssembler_s390.cpp Tue Jan 14 15:23:01 2020 -0800 @@ -3587,7 +3587,7 @@ z_lg(oop_result, Address(Z_thread, JavaThread::vm_result_offset())); clear_mem(Address(Z_thread, JavaThread::vm_result_offset()), sizeof(void*)); - verify_oop(oop_result); + verify_oop(oop_result, FILE_AND_LINE); } void MacroAssembler::get_vm_result_2(Register result) { @@ -6813,26 +6813,94 @@ } } +// Save and restore functions: Exclude Z_R0. +void MacroAssembler::save_volatile_regs(Register dst, int offset, bool include_fp, bool include_flags) { + z_stmg(Z_R1, Z_R5, offset, dst); offset += 5 * BytesPerWord; + if (include_fp) { + z_std(Z_F0, Address(dst, offset)); offset += BytesPerWord; + z_std(Z_F1, Address(dst, offset)); offset += BytesPerWord; + z_std(Z_F2, Address(dst, offset)); offset += BytesPerWord; + z_std(Z_F3, Address(dst, offset)); offset += BytesPerWord; + z_std(Z_F4, Address(dst, offset)); offset += BytesPerWord; + z_std(Z_F5, Address(dst, offset)); offset += BytesPerWord; + z_std(Z_F6, Address(dst, offset)); offset += BytesPerWord; + z_std(Z_F7, Address(dst, offset)); offset += BytesPerWord; + } + if (include_flags) { + Label done; + z_mvi(Address(dst, offset), 2); // encoding: equal + z_bre(done); + z_mvi(Address(dst, offset), 4); // encoding: higher + z_brh(done); + z_mvi(Address(dst, offset), 1); // encoding: lower + bind(done); + } +} +void MacroAssembler::restore_volatile_regs(Register src, int offset, bool include_fp, bool include_flags) { + z_lmg(Z_R1, Z_R5, offset, src); offset += 5 * BytesPerWord; + if (include_fp) { + z_ld(Z_F0, Address(src, offset)); offset += BytesPerWord; + z_ld(Z_F1, Address(src, offset)); offset += BytesPerWord; + z_ld(Z_F2, Address(src, offset)); offset += BytesPerWord; + z_ld(Z_F3, Address(src, offset)); offset += BytesPerWord; + z_ld(Z_F4, Address(src, offset)); offset += BytesPerWord; + z_ld(Z_F5, Address(src, offset)); offset += BytesPerWord; + z_ld(Z_F6, Address(src, offset)); offset += BytesPerWord; + z_ld(Z_F7, Address(src, offset)); offset += BytesPerWord; + } + if (include_flags) { + z_cli(Address(src, offset), 2); // see encoding above + } +} + // Plausibility check for oops. void MacroAssembler::verify_oop(Register oop, const char* msg) { if (!VerifyOops) return; BLOCK_COMMENT("verify_oop {"); - Register tmp = Z_R0; - unsigned int nbytes_save = 5*BytesPerWord; - address entry = StubRoutines::verify_oop_subroutine_entry_address(); + unsigned int nbytes_save = (5 + 8 + 1) * BytesPerWord; + address entry_addr = StubRoutines::verify_oop_subroutine_entry_address(); save_return_pc(); - push_frame_abi160(nbytes_save); - z_stmg(Z_R1, Z_R5, frame::z_abi_160_size, Z_SP); - - z_lgr(Z_ARG2, oop); - load_const(Z_ARG1, (address) msg); - load_const(Z_R1, entry); + + // Push frame, but preserve flags + z_lgr(Z_R0, Z_SP); + z_lay(Z_SP, -((int64_t)nbytes_save + frame::z_abi_160_size), Z_SP); + z_stg(Z_R0, _z_abi(callers_sp), Z_SP); + + save_volatile_regs(Z_SP, frame::z_abi_160_size, true, true); + + lgr_if_needed(Z_ARG2, oop); + load_const_optimized(Z_ARG1, (address)msg); + load_const_optimized(Z_R1, entry_addr); z_lg(Z_R1, 0, Z_R1); call_c(Z_R1); - z_lmg(Z_R1, Z_R5, frame::z_abi_160_size, Z_SP); + restore_volatile_regs(Z_SP, frame::z_abi_160_size, true, true); + pop_frame(); + restore_return_pc(); + + BLOCK_COMMENT("} verify_oop "); +} + +void MacroAssembler::verify_oop_addr(Address addr, const char* msg) { + if (!VerifyOops) return; + + BLOCK_COMMENT("verify_oop {"); + unsigned int nbytes_save = (5 + 8) * BytesPerWord; + address entry_addr = StubRoutines::verify_oop_subroutine_entry_address(); + + save_return_pc(); + unsigned int frame_size = push_frame_abi160(nbytes_save); // kills Z_R0 + save_volatile_regs(Z_SP, frame::z_abi_160_size, true, false); + + z_lg(Z_ARG2, addr.plus_disp(frame_size)); + load_const_optimized(Z_ARG1, (address)msg); + load_const_optimized(Z_R1, entry_addr); + z_lg(Z_R1, 0, Z_R1); + call_c(Z_R1); + + restore_volatile_regs(Z_SP, frame::z_abi_160_size, true, false); pop_frame(); restore_return_pc();
--- a/src/hotspot/cpu/s390/macroAssembler_s390.hpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/cpu/s390/macroAssembler_s390.hpp Tue Jan 14 15:23:01 2020 -0800 @@ -973,8 +973,15 @@ // Verify Z_thread contents. void verify_thread(); + // Save and restore functions: Exclude Z_R0. + void save_volatile_regs( Register dst, int offset, bool include_fp, bool include_flags); + void restore_volatile_regs(Register src, int offset, bool include_fp, bool include_flags); + // Only if +VerifyOops. + // Kills Z_R0. void verify_oop(Register reg, const char* s = "broken oop"); + // Kills Z_R0, condition code. + void verify_oop_addr(Address addr, const char* msg = "contains broken oop"); // TODO: verify_method and klass metadata (compare against vptr?). void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {}
--- a/src/hotspot/cpu/s390/methodHandles_s390.cpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/cpu/s390/methodHandles_s390.cpp Tue Jan 14 15:23:01 2020 -0800 @@ -85,7 +85,7 @@ BLOCK_COMMENT("verify_klass {"); - __ verify_oop(obj_reg); + __ verify_oop(obj_reg, FILE_AND_LINE); __ compareU64_and_branch(obj_reg, (intptr_t)0L, Assembler::bcondEqual, L_bad); __ load_klass(temp_reg, obj_reg); // klass_addr is a klass in allstatic SystemDictionaryHandles. Can't get GCed. @@ -194,22 +194,22 @@ BLOCK_COMMENT("jump_to_lambda_form {"); // Load the invoker, as MH -> MH.form -> LF.vmentry - __ verify_oop(recv); + __ verify_oop(recv, FILE_AND_LINE); __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes())), noreg, noreg, IS_NOT_NULL); - __ verify_oop(method_temp); + __ verify_oop(method_temp, FILE_AND_LINE); __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes())), noreg, noreg, IS_NOT_NULL); - __ verify_oop(method_temp); + __ verify_oop(method_temp, FILE_AND_LINE); __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes())), noreg, noreg, IS_NOT_NULL); - __ verify_oop(method_temp); + __ verify_oop(method_temp, FILE_AND_LINE); __ z_lg(method_temp, Address(method_temp, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes()))); @@ -385,7 +385,7 @@ Register temp1_recv_klass = temp1; if (iid != vmIntrinsics::_linkToStatic) { - __ verify_oop(receiver_reg); + __ verify_oop(receiver_reg, FILE_AND_LINE); if (iid == vmIntrinsics::_linkToSpecial) { // Don't actually load the klass; just null-check the receiver. __ null_check(receiver_reg);
--- a/src/hotspot/cpu/s390/sharedRuntime_s390.cpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/cpu/s390/sharedRuntime_s390.cpp Tue Jan 14 15:23:01 2020 -0800 @@ -892,9 +892,9 @@ if (r->is_stack()) { __ z_lg(Z_R0_scratch, Address(Z_SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); - __ verify_oop(Z_R0_scratch); + __ verify_oop(Z_R0_scratch, FILE_AND_LINE); } else { - __ verify_oop(r->as_Register()); + __ verify_oop(r->as_Register(), FILE_AND_LINE); } } } @@ -2686,7 +2686,7 @@ __ z_ltgr(Z_ARG1, Z_ARG1); __ z_bre(ic_miss); } - __ verify_oop(Z_ARG1); + __ verify_oop(Z_ARG1, FILE_AND_LINE); // Check ic: object class <-> cached class // Compress cached class for comparison. That's more efficient. @@ -2955,7 +2955,7 @@ #ifdef ASSERT // verify that there is really an exception oop in JavaThread __ z_lg(Z_ARG1, Address(Z_thread, JavaThread::exception_oop_offset())); - __ verify_oop(Z_ARG1); + __ MacroAssembler::verify_oop(Z_ARG1, FILE_AND_LINE); // verify that there is no pending exception __ asm_assert_mem8_is_zero(in_bytes(Thread::pending_exception_offset()), Z_thread,
--- a/src/hotspot/cpu/s390/stubGenerator_s390.cpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/cpu/s390/stubGenerator_s390.cpp Tue Jan 14 15:23:01 2020 -0800 @@ -667,6 +667,17 @@ return start; } +#if !defined(PRODUCT) + // Wrapper which calls oopDesc::is_oop_or_null() + // Only called by MacroAssembler::verify_oop + static void verify_oop_helper(const char* message, oopDesc* o) { + if (!oopDesc::is_oop_or_null(o)) { + fatal("%s. oop: " PTR_FORMAT, message, p2i(o)); + } + ++ StubRoutines::_verify_oop_count; + } +#endif + // Return address of code to be called from code generated by // MacroAssembler::verify_oop. // @@ -679,6 +690,11 @@ // StubCodeMark mark(this, "StubRoutines", "verify_oop_stub"); address start = 0; + +#if !defined(PRODUCT) + start = CAST_FROM_FN_PTR(address, verify_oop_helper); +#endif + return start; }
--- a/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.cpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.cpp Tue Jan 14 15:23:01 2020 -0800 @@ -31,8 +31,6 @@ #include "c1/c1_ValueStack.hpp" #include "ci/ciArrayKlass.hpp" #include "ci/ciInstance.hpp" -#include "gc/shared/barrierSet.hpp" -#include "gc/shared/cardTableBarrierSet.hpp" #include "gc/shared/collectedHeap.hpp" #include "memory/universe.hpp" #include "nativeInst_sparc.hpp"
--- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp Tue Jan 14 15:23:01 2020 -0800 @@ -32,8 +32,6 @@ #include "c1/c1_ValueStack.hpp" #include "ci/ciArrayKlass.hpp" #include "ci/ciInstance.hpp" -#include "gc/shared/barrierSet.hpp" -#include "gc/shared/cardTableBarrierSet.hpp" #include "gc/shared/collectedHeap.hpp" #include "nativeInst_x86.hpp" #include "oops/objArrayKlass.hpp"
--- a/src/hotspot/cpu/x86/sharedRuntime_x86_32.cpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/cpu/x86/sharedRuntime_x86_32.cpp Tue Jan 14 15:23:01 2020 -0800 @@ -30,6 +30,8 @@ #include "code/nativeInst.hpp" #include "code/vtableStubs.hpp" #include "gc/shared/gcLocker.hpp" +#include "gc/shared/barrierSet.hpp" +#include "gc/shared/barrierSetAssembler.hpp" #include "interpreter/interpreter.hpp" #include "logging/log.hpp" #include "memory/resourceArea.hpp"
--- a/src/hotspot/share/c1/c1_LIRGenerator.cpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/share/c1/c1_LIRGenerator.cpp Tue Jan 14 15:23:01 2020 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -2225,7 +2225,7 @@ int lng = x->length(); for (int i = 0; i < lng; i++) { - SwitchRange* one_range = x->at(i); + C1SwitchRange* one_range = x->at(i); int low_key = one_range->low_key(); int high_key = one_range->high_key(); BlockBegin* dest = one_range->sux(); @@ -2257,7 +2257,7 @@ BlockBegin* sux = x->sux_at(0); int key = x->lo_key(); BlockBegin* default_sux = x->default_sux(); - SwitchRange* range = new SwitchRange(key, sux); + C1SwitchRange* range = new C1SwitchRange(key, sux); for (int i = 0; i < len; i++, key++) { BlockBegin* new_sux = x->sux_at(i); if (sux == new_sux) { @@ -2268,7 +2268,7 @@ if (sux != default_sux) { res->append(range); } - range = new SwitchRange(key, new_sux); + range = new C1SwitchRange(key, new_sux); } sux = new_sux; } @@ -2286,7 +2286,7 @@ BlockBegin* default_sux = x->default_sux(); int key = x->key_at(0); BlockBegin* sux = x->sux_at(0); - SwitchRange* range = new SwitchRange(key, sux); + C1SwitchRange* range = new C1SwitchRange(key, sux); for (int i = 1; i < len; i++) { int new_key = x->key_at(i); BlockBegin* new_sux = x->sux_at(i); @@ -2298,7 +2298,7 @@ if (range->sux() != default_sux) { res->append(range); } - range = new SwitchRange(new_key, new_sux); + range = new C1SwitchRange(new_key, new_sux); } key = new_key; sux = new_sux;
--- a/src/hotspot/share/c1/c1_LIRGenerator.hpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/share/c1/c1_LIRGenerator.hpp Tue Jan 14 15:23:01 2020 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -42,18 +42,17 @@ class LIRGenerator; class LIREmitter; class Invoke; -class SwitchRange; class LIRItem; typedef GrowableArray<LIRItem*> LIRItemList; -class SwitchRange: public CompilationResourceObj { +class C1SwitchRange: public CompilationResourceObj { private: int _low_key; int _high_key; BlockBegin* _sux; public: - SwitchRange(int start_key, BlockBegin* sux): _low_key(start_key), _high_key(start_key), _sux(sux) {} + C1SwitchRange(int start_key, BlockBegin* sux): _low_key(start_key), _high_key(start_key), _sux(sux) {} void set_high_key(int key) { _high_key = key; } int high_key() const { return _high_key; } @@ -61,8 +60,8 @@ BlockBegin* sux() const { return _sux; } }; -typedef GrowableArray<SwitchRange*> SwitchRangeArray; -typedef GrowableArray<SwitchRange*> SwitchRangeList; +typedef GrowableArray<C1SwitchRange*> SwitchRangeArray; +typedef GrowableArray<C1SwitchRange*> SwitchRangeList; class ResolveNode;
--- a/src/hotspot/share/code/debugInfo.cpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/share/code/debugInfo.cpp Tue Jan 14 15:23:01 2020 -0800 @@ -51,7 +51,15 @@ } oop DebugInfoReadStream::read_oop() { - oop o = code()->oop_at(read_int()); + nmethod* nm = const_cast<CompiledMethod*>(code())->as_nmethod_or_null(); + oop o; + if (nm != NULL) { + // Despite these oops being found inside nmethods that are on-stack, + // they are not kept alive by all GCs (e.g. G1 and Shenandoah). + o = nm->oop_at_phantom(read_int()); + } else { + o = code()->oop_at(read_int()); + } assert(oopDesc::is_oop_or_null(o), "oop only"); return o; }
--- a/src/hotspot/share/gc/g1/g1Allocator.cpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/share/gc/g1/g1Allocator.cpp Tue Jan 14 15:23:01 2020 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -412,8 +412,7 @@ } bool G1ArchiveAllocator::_archive_check_enabled = false; -G1ArchiveRegionMap G1ArchiveAllocator::_closed_archive_region_map; -G1ArchiveRegionMap G1ArchiveAllocator::_open_archive_region_map; +G1ArchiveRegionMap G1ArchiveAllocator::_archive_region_map; G1ArchiveAllocator* G1ArchiveAllocator::create_allocator(G1CollectedHeap* g1h, bool open) { // Create the archive allocator, and also enable archive object checking
--- a/src/hotspot/share/gc/g1/g1Allocator.hpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/share/gc/g1/g1Allocator.hpp Tue Jan 14 15:23:01 2020 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -203,12 +203,17 @@ void undo_allocation(G1HeapRegionAttr dest, HeapWord* obj, size_t word_sz, uint node_index); }; -// G1ArchiveRegionMap is a boolean array used to mark G1 regions as +// G1ArchiveRegionMap is an array used to mark G1 regions as // archive regions. This allows a quick check for whether an object // should not be marked because it is in an archive region. -class G1ArchiveRegionMap : public G1BiasedMappedArray<bool> { +class G1ArchiveRegionMap : public G1BiasedMappedArray<uint8_t> { +public: + static const uint8_t NoArchive = 0; + static const uint8_t OpenArchive = 1; + static const uint8_t ClosedArchive = 2; + protected: - bool default_value() const { return false; } + uint8_t default_value() const { return NoArchive; } }; // G1ArchiveAllocator is used to allocate memory in archive @@ -290,7 +295,7 @@ // Mark regions containing the specified address range as archive/non-archive. static inline void set_range_archive(MemRegion range, bool open); - static inline void clear_range_archive(MemRegion range, bool open); + static inline void clear_range_archive(MemRegion range); // Check if the object is in closed archive static inline bool is_closed_archive_object(oop object); @@ -301,8 +306,7 @@ private: static bool _archive_check_enabled; - static G1ArchiveRegionMap _closed_archive_region_map; - static G1ArchiveRegionMap _open_archive_region_map; + static G1ArchiveRegionMap _archive_region_map; // Check if an object is in a closed archive region using the _closed_archive_region_map. static inline bool in_closed_archive_range(oop object);
--- a/src/hotspot/share/gc/g1/g1Allocator.inline.hpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/share/gc/g1/g1Allocator.inline.hpp Tue Jan 14 15:23:01 2020 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -131,12 +131,9 @@ _archive_check_enabled = true; size_t length = G1CollectedHeap::heap()->max_reserved_capacity(); - _closed_archive_region_map.initialize(G1CollectedHeap::heap()->base(), - G1CollectedHeap::heap()->base() + length, - HeapRegion::GrainBytes); - _open_archive_region_map.initialize(G1CollectedHeap::heap()->base(), - G1CollectedHeap::heap()->base() + length, - HeapRegion::GrainBytes); + _archive_region_map.initialize(G1CollectedHeap::heap()->base(), + G1CollectedHeap::heap()->base() + length, + HeapRegion::GrainBytes); } // Set the regions containing the specified address range as archive. @@ -146,36 +143,26 @@ open ? "open" : "closed", p2i(range.start()), p2i(range.last())); - if (open) { - _open_archive_region_map.set_by_address(range, true); - } else { - _closed_archive_region_map.set_by_address(range, true); - } + uint8_t const value = open ? G1ArchiveRegionMap::OpenArchive : G1ArchiveRegionMap::ClosedArchive; + _archive_region_map.set_by_address(range, value); } // Clear the archive regions map containing the specified address range. -inline void G1ArchiveAllocator::clear_range_archive(MemRegion range, bool open) { +inline void G1ArchiveAllocator::clear_range_archive(MemRegion range) { assert(_archive_check_enabled, "archive range check not enabled"); - log_info(gc, cds)("Clear %s archive regions in map: [" PTR_FORMAT ", " PTR_FORMAT "]", - open ? "open" : "closed", + log_info(gc, cds)("Clear archive regions in map: [" PTR_FORMAT ", " PTR_FORMAT "]", p2i(range.start()), p2i(range.last())); - if (open) { - _open_archive_region_map.set_by_address(range, false); - } else { - _closed_archive_region_map.set_by_address(range, false); - } + _archive_region_map.set_by_address(range, G1ArchiveRegionMap::NoArchive); } // Check if an object is in a closed archive region using the _archive_region_map. inline bool G1ArchiveAllocator::in_closed_archive_range(oop object) { - // This is the out-of-line part of is_closed_archive_object test, done separately - // to avoid additional performance impact when the check is not enabled. - return _closed_archive_region_map.get_by_address((HeapWord*)object); + return _archive_region_map.get_by_address((HeapWord*)object) == G1ArchiveRegionMap::ClosedArchive; } inline bool G1ArchiveAllocator::in_open_archive_range(oop object) { - return _open_archive_region_map.get_by_address((HeapWord*)object); + return _archive_region_map.get_by_address((HeapWord*)object) == G1ArchiveRegionMap::OpenArchive; } // Check if archive object checking is enabled, to avoid calling in_open/closed_archive_range @@ -193,8 +180,8 @@ } inline bool G1ArchiveAllocator::is_archived_object(oop object) { - return (archive_check_enabled() && (in_closed_archive_range(object) || - in_open_archive_range(object))); + return archive_check_enabled() && + (_archive_region_map.get_by_address((HeapWord*)object) != G1ArchiveRegionMap::NoArchive); } #endif // SHARE_GC_G1_G1ALLOCATOR_INLINE_HPP
--- a/src/hotspot/share/gc/g1/g1CollectedHeap.cpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/share/gc/g1/g1CollectedHeap.cpp Tue Jan 14 15:23:01 2020 -0800 @@ -1,4 +1,4 @@ - /* +/* * Copyright (c) 2001, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -766,7 +766,7 @@ return result; } -void G1CollectedHeap::dealloc_archive_regions(MemRegion* ranges, size_t count, bool is_open) { +void G1CollectedHeap::dealloc_archive_regions(MemRegion* ranges, size_t count) { assert(!is_init_completed(), "Expect to be called at JVM init time"); assert(ranges != NULL, "MemRegion array NULL"); assert(count != 0, "No MemRegions provided"); @@ -828,7 +828,7 @@ } // Notify mark-sweep that this is no longer an archive range. - G1ArchiveAllocator::clear_range_archive(ranges[i], is_open); + G1ArchiveAllocator::clear_range_archive(ranges[i]); } if (uncommitted_regions != 0) { @@ -2151,6 +2151,13 @@ return op.gc_succeeded(); } + // If VMOp skipped initiating concurrent marking cycle because + // we're terminating, then we're done. + if (op.terminating()) { + LOG_COLLECT_CONCURRENTLY(cause, "skipped: terminating"); + return false; + } + // Lock to get consistent set of values. uint old_marking_started_after; uint old_marking_completed_after;
--- a/src/hotspot/share/gc/g1/g1CollectedHeap.hpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/share/gc/g1/g1CollectedHeap.hpp Tue Jan 14 15:23:01 2020 -0800 @@ -723,7 +723,7 @@ // which had been allocated by alloc_archive_regions. This should be called // rather than fill_archive_regions at JVM init time if the archive file // mapping failed, with the same non-overlapping and sorted MemRegion array. - void dealloc_archive_regions(MemRegion* range, size_t count, bool is_open); + void dealloc_archive_regions(MemRegion* range, size_t count); oop materialize_archived_object(oop obj);
--- a/src/hotspot/share/gc/g1/g1VMOperations.cpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/share/gc/g1/g1VMOperations.cpp Tue Jan 14 15:23:01 2020 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -47,6 +47,7 @@ _target_pause_time_ms(target_pause_time_ms), _transient_failure(false), _cycle_already_in_progress(false), + _terminating(false), _gc_succeeded(false) {} @@ -66,7 +67,17 @@ G1CollectedHeap* g1h = G1CollectedHeap::heap(); GCCauseSetter x(g1h, _gc_cause); - if (!g1h->policy()->force_initial_mark_if_outside_cycle(_gc_cause)) { + + // Record for handling by caller. + _terminating = g1h->_cm_thread->should_terminate(); + + if (_terminating && GCCause::is_user_requested_gc(_gc_cause)) { + // When terminating, the request to initiate a concurrent cycle will be + // ignored by do_collection_pause_at_safepoint; instead it will just do + // a young-only or mixed GC (depending on phase). For a user request + // there's no point in even doing that much, so done. For some non-user + // requests the alternative GC might still be needed. + } else if (!g1h->policy()->force_initial_mark_if_outside_cycle(_gc_cause)) { // Failure to force the next GC pause to be an initial mark indicates // there is already a concurrent marking cycle in progress. Set flag // to notify the caller and return immediately.
--- a/src/hotspot/share/gc/g1/g1VMOperations.hpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/share/gc/g1/g1VMOperations.hpp Tue Jan 14 15:23:01 2020 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -52,6 +52,7 @@ double _target_pause_time_ms; bool _transient_failure; bool _cycle_already_in_progress; + bool _terminating; bool _gc_succeeded; public: @@ -63,6 +64,7 @@ virtual void doit(); bool transient_failure() const { return _transient_failure; } bool cycle_already_in_progress() const { return _cycle_already_in_progress; } + bool terminating() const { return _terminating; } bool gc_succeeded() const { return _gc_succeeded; } };
--- a/src/hotspot/share/gc/shared/gcTrace.cpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/share/gc/shared/gcTrace.cpp Tue Jan 14 15:23:01 2020 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -100,7 +100,7 @@ KlassInfoTable cit(false); if (!cit.allocation_failed()) { - HeapInspection hi(false, false, false, NULL); + HeapInspection hi; hi.populate_table(&cit, is_alive_cl); ObjectCountEventSenderClosure event_sender(cit.size_of_instances_in_words(), Ticks::now()); cit.iterate(&event_sender);
--- a/src/hotspot/share/gc/shared/gcVMOperations.cpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/share/gc/shared/gcVMOperations.cpp Tue Jan 14 15:23:01 2020 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -151,8 +151,7 @@ log_warning(gc)("GC locker is held; pre-dump GC was skipped"); } } - HeapInspection inspect(_csv_format, _print_help, _print_class_stats, - _columns); + HeapInspection inspect; inspect.heap_inspection(_out); }
--- a/src/hotspot/share/gc/shared/gcVMOperations.hpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/share/gc/shared/gcVMOperations.hpp Tue Jan 14 15:23:01 2020 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -125,32 +125,17 @@ private: outputStream* _out; bool _full_gc; - bool _csv_format; // "comma separated values" format for spreadsheet. - bool _print_help; - bool _print_class_stats; - const char* _columns; public: VM_GC_HeapInspection(outputStream* out, bool request_full_gc) : VM_GC_Operation(0 /* total collections, dummy, ignored */, GCCause::_heap_inspection /* GC Cause */, 0 /* total full collections, dummy, ignored */, - request_full_gc) { - _out = out; - _full_gc = request_full_gc; - _csv_format = false; - _print_help = false; - _print_class_stats = false; - _columns = NULL; - } + request_full_gc), _out(out), _full_gc(request_full_gc) {} ~VM_GC_HeapInspection() {} virtual VMOp_Type type() const { return VMOp_GC_HeapInspection; } virtual bool skip_operation() const; virtual void doit(); - void set_csv_format(bool value) {_csv_format = value;} - void set_print_help(bool value) {_print_help = value;} - void set_print_class_stats(bool value) {_print_class_stats = value;} - void set_columns(const char* value) {_columns = value;} protected: bool collect(); };
--- a/src/hotspot/share/gc/shenandoah/shenandoahCodeRoots.cpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/share/gc/shenandoah/shenandoahCodeRoots.cpp Tue Jan 14 15:23:01 2020 -0800 @@ -223,7 +223,9 @@ ShenandoahReentrantLocker locker(nm_data->lock()); // Heal oops and disarm - ShenandoahNMethod::heal_nmethod(nm); + if (_heap->is_evacuation_in_progress()) { + ShenandoahNMethod::heal_nmethod(nm); + } ShenandoahNMethod::disarm_nmethod(nm); // Clear compiled ICs and exception caches
--- a/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp Tue Jan 14 15:23:01 2020 -0800 @@ -1715,7 +1715,6 @@ if (obj == old) { _dead_counter ++; } - assert(*p == NULL, "Must be"); } else if (_evac_in_progress && _heap->in_collection_set(obj)) { oop resolved = ShenandoahBarrierSet::resolve_forwarded_not_null(obj); if (resolved == obj) { @@ -2805,7 +2804,7 @@ ShenandoahGCPhase total_phase(ShenandoahPhaseTimings::total_pause); ShenandoahGCPhase phase(ShenandoahPhaseTimings::init_traversal_gc); - static const char* msg = "Pause Init Traversal"; + static const char* msg = init_traversal_event_message(); GCTraceTime(Info, gc) time(msg, gc_timer()); EventMark em("%s", msg); @@ -2820,7 +2819,7 @@ ShenandoahGCPhase total_phase(ShenandoahPhaseTimings::total_pause); ShenandoahGCPhase phase(ShenandoahPhaseTimings::final_traversal_gc); - static const char* msg = "Pause Final Traversal"; + static const char* msg = final_traversal_event_message(); GCTraceTime(Info, gc) time(msg, gc_timer()); EventMark em("%s", msg); @@ -2972,7 +2971,7 @@ } void ShenandoahHeap::entry_traversal() { - static const char* msg = "Concurrent traversal"; + static const char* msg = conc_traversal_event_message(); GCTraceTime(Info, gc) time(msg, NULL, GCCause::_no_gc, true); EventMark em("%s", msg); @@ -3141,6 +3140,51 @@ } } +const char* ShenandoahHeap::init_traversal_event_message() const { + bool proc_refs = process_references(); + bool unload_cls = unload_classes(); + + if (proc_refs && unload_cls) { + return "Pause Init Traversal (process weakrefs) (unload classes)"; + } else if (proc_refs) { + return "Pause Init Traversal (process weakrefs)"; + } else if (unload_cls) { + return "Pause Init Traversal (unload classes)"; + } else { + return "Pause Init Traversal"; + } +} + +const char* ShenandoahHeap::final_traversal_event_message() const { + bool proc_refs = process_references(); + bool unload_cls = unload_classes(); + + if (proc_refs && unload_cls) { + return "Pause Final Traversal (process weakrefs) (unload classes)"; + } else if (proc_refs) { + return "Pause Final Traversal (process weakrefs)"; + } else if (unload_cls) { + return "Pause Final Traversal (unload classes)"; + } else { + return "Pause Final Traversal"; + } +} + +const char* ShenandoahHeap::conc_traversal_event_message() const { + bool proc_refs = process_references(); + bool unload_cls = unload_classes(); + + if (proc_refs && unload_cls) { + return "Concurrent Traversal (process weakrefs) (unload classes)"; + } else if (proc_refs) { + return "Concurrent Traversal (process weakrefs)"; + } else if (unload_cls) { + return "Concurrent Traversal (unload classes)"; + } else { + return "Concurrent Traversal"; + } +} + const char* ShenandoahHeap::degen_event_message(ShenandoahDegenPoint point) const { switch (point) { case _degenerated_unset:
--- a/src/hotspot/share/gc/shenandoah/shenandoahHeap.hpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/share/gc/shenandoah/shenandoahHeap.hpp Tue Jan 14 15:23:01 2020 -0800 @@ -438,6 +438,9 @@ const char* init_mark_event_message() const; const char* final_mark_event_message() const; const char* conc_mark_event_message() const; + const char* init_traversal_event_message() const; + const char* final_traversal_event_message() const; + const char* conc_traversal_event_message() const; const char* degen_event_message(ShenandoahDegenPoint point) const; // ---------- GC subsystems
--- a/src/hotspot/share/gc/shenandoah/shenandoahRootProcessor.hpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/share/gc/shenandoah/shenandoahRootProcessor.hpp Tue Jan 14 15:23:01 2020 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, 2019, Red Hat, Inc. All rights reserved. + * Copyright (c) 2015, 2020, Red Hat, Inc. All rights reserved. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as @@ -226,6 +226,7 @@ ShenandoahThreadRoots _thread_roots; ShenandoahCodeCacheRoots<ITR> _code_roots; ShenandoahVMRoots<false /*concurrent*/ > _vm_roots; + ShenandoahStringDedupRoots _dedup_roots; ShenandoahClassLoaderDataRoots<false /*concurrent*/, false /*single threaded*/> _cld_roots; public:
--- a/src/hotspot/share/gc/shenandoah/shenandoahRootProcessor.inline.hpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/share/gc/shenandoah/shenandoahRootProcessor.inline.hpp Tue Jan 14 15:23:01 2020 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, Red Hat, Inc. All rights reserved. + * Copyright (c) 2019, 2020, Red Hat, Inc. All rights reserved. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as @@ -250,6 +250,9 @@ if (code != NULL && !ShenandoahConcurrentScanCodeRoots) { _code_roots.code_blobs_do(code, worker_id); } + + AlwaysTrueClosure always_true; + _dedup_roots.oops_do(&always_true, oops, worker_id); } template <typename ITR>
--- a/src/hotspot/share/gc/shenandoah/shenandoahTraversalGC.cpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/share/gc/shenandoah/shenandoahTraversalGC.cpp Tue Jan 14 15:23:01 2020 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved. + * Copyright (c) 2018, 2020, Red Hat, Inc. All rights reserved. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as @@ -193,9 +193,6 @@ } else { _rp->roots_do(worker_id, &roots_cl, &cld_cl, &code_cl); } - - AlwaysTrueClosure is_alive; - _dedup_roots.oops_do(&is_alive, &roots_cl, worker_id); } } };
--- a/src/hotspot/share/jfr/metadata/metadata.xml Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/share/jfr/metadata/metadata.xml Tue Jan 14 15:23:01 2020 -0800 @@ -1,7 +1,7 @@ <?xml version="1.0" encoding="utf-8"?> <!-- - Copyright (c) 2012, 2019, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2012, 2020, Oracle and/or its affiliates. All rights reserved. DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. This code is free software; you can redistribute it and/or modify it @@ -1023,36 +1023,6 @@ <Field type="ulong" contentType="bytes" name="size" label="Size Written" /> </Event> - <Event name="FlushStorage" category="Flight Recorder" label="Flush Storage" thread="false" experimental="true"> - <Field type="ulong" name="flushId" label="Flush Identifier" relation="FlushId" /> - <Field type="ulong" name="elements" label="Elements Written" /> - <Field type="ulong" contentType="bytes" name="size" label="Size Written" /> - </Event> - - <Event name="FlushStacktrace" category="Flight Recorder" label="Flush Stacktrace" thread="false" experimental="true"> - <Field type="ulong" name="flushId" label="Flush Identifier" relation="FlushId" /> - <Field type="ulong" name="elements" label="Elements Written" /> - <Field type="ulong" contentType="bytes" name="size" label="Size Written" /> - </Event> - - <Event name="FlushStringPool" category="Flight Recorder" label="Flush String Pool" thread="false" experimental="true"> - <Field type="ulong" name="flushId" label="Flush Identifier" relation="FlushId" /> - <Field type="ulong" name="elements" label="Elements Written" /> - <Field type="ulong" contentType="bytes" name="size" label="Size Written" /> - </Event> - - <Event name="FlushMetadata" category="Flight Recorder" label="Flush Metadata" thread="false" experimental="true"> - <Field type="ulong" name="flushId" label="Flush Identifier" relation="FlushId" /> - <Field type="ulong" name="elements" label="Elements Written" /> - <Field type="ulong" contentType="bytes" name="size" label="Size Written" /> - </Event> - - <Event name="FlushTypeSet" category="Flight Recorder" label="Flush Type Set" thread="false" experimental="true"> - <Field type="ulong" name="flushId" label="Flush Identifier" relation="FlushId" /> - <Field type="ulong" name="elements" label="Elements Written" /> - <Field type="ulong" contentType="bytes" name="size" label="Size Written" /> - </Event> - <Type name="DeoptimizationReason" label="Deoptimization Reason"> <Field type="string" name="reason" label="Reason" /> </Type>
--- a/src/hotspot/share/jfr/recorder/service/jfrRecorderService.cpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/share/jfr/recorder/service/jfrRecorderService.cpp Tue Jan 14 15:23:01 2020 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -58,13 +58,12 @@ // incremented on each flushpoint static u8 flushpoint_id = 0; -template <typename E, typename Instance, size_t(Instance::*func)()> +template <typename Instance, size_t(Instance::*func)()> class Content { private: Instance& _instance; u4 _elements; public: - typedef E EventType; Content(Instance& instance) : _instance(instance), _elements(0) {} bool process() { _elements = (u4)(_instance.*func)(); @@ -82,7 +81,6 @@ Content& _content; const int64_t _start_offset; public: - typedef typename Content::EventType EventType; WriteContent(JfrChunkWriter& cw, Content& content) : _start_time(JfrTicks::now()), @@ -128,14 +126,6 @@ return (u4)(end_offset() - start_offset()); } - static bool is_event_enabled() { - return EventType::is_enabled(); - } - - static u8 event_id() { - return EventType::eventId; - } - void write_elements(int64_t offset) { _cw.write_padded_at_offset<u4>(elements(), offset); } @@ -199,22 +189,15 @@ } template <typename Functor> -static void write_flush_event(Functor& f) { - if (Functor::is_event_enabled()) { - typename Functor::EventType e(UNTIMED); - e.set_starttime(f.start_time()); - e.set_endtime(f.end_time()); - e.set_flushId(flushpoint_id); - e.set_elements(f.elements()); - e.set_size(f.size()); - e.commit(); - } -} - -template <typename Functor> static u4 invoke_with_flush_event(Functor& f) { const u4 elements = invoke(f); - write_flush_event(f); + EventFlush e(UNTIMED); + e.set_starttime(f.start_time()); + e.set_endtime(f.end_time()); + e.set_flushId(flushpoint_id); + e.set_elements(f.elements()); + e.set_size(f.size()); + e.commit(); return elements; } @@ -226,7 +209,6 @@ bool _clear; public: - typedef EventFlushStacktrace EventType; StackTraceRepository(JfrStackTraceRepository& repo, JfrChunkWriter& cw, bool clear) : _repo(repo), _cw(cw), _elements(0), _clear(clear) {} bool process() { @@ -242,7 +224,7 @@ static u4 flush_stacktrace(JfrStackTraceRepository& stack_trace_repo, JfrChunkWriter& chunkwriter) { StackTraceRepository str(stack_trace_repo, chunkwriter, false); WriteStackTrace wst(chunkwriter, str, TYPE_STACKTRACE); - return invoke_with_flush_event(wst); + return invoke(wst); } static u4 write_stacktrace(JfrStackTraceRepository& stack_trace_repo, JfrChunkWriter& chunkwriter, bool clear) { @@ -251,14 +233,14 @@ return invoke(wst); } -typedef Content<EventFlushStorage, JfrStorage, &JfrStorage::write> Storage; +typedef Content<JfrStorage, &JfrStorage::write> Storage; typedef WriteContent<Storage> WriteStorage; static size_t flush_storage(JfrStorage& storage, JfrChunkWriter& chunkwriter) { assert(chunkwriter.is_valid(), "invariant"); Storage fsf(storage); WriteStorage fs(chunkwriter, fsf); - return invoke_with_flush_event(fs); + return invoke(fs); } static size_t write_storage(JfrStorage& storage, JfrChunkWriter& chunkwriter) { @@ -268,15 +250,15 @@ return invoke(fs); } -typedef Content<EventFlushStringPool, JfrStringPool, &JfrStringPool::write> StringPool; -typedef Content<EventFlushStringPool, JfrStringPool, &JfrStringPool::write_at_safepoint> StringPoolSafepoint; +typedef Content<JfrStringPool, &JfrStringPool::write> StringPool; +typedef Content<JfrStringPool, &JfrStringPool::write_at_safepoint> StringPoolSafepoint; typedef WriteCheckpointEvent<StringPool> WriteStringPool; typedef WriteCheckpointEvent<StringPoolSafepoint> WriteStringPoolSafepoint; static u4 flush_stringpool(JfrStringPool& string_pool, JfrChunkWriter& chunkwriter) { StringPool sp(string_pool); WriteStringPool wsp(chunkwriter, sp, TYPE_STRING); - return invoke_with_flush_event(wsp); + return invoke(wsp); } static u4 write_stringpool(JfrStringPool& string_pool, JfrChunkWriter& chunkwriter) { @@ -291,20 +273,19 @@ return invoke(wsps); } -typedef Content<EventFlushTypeSet, JfrCheckpointManager, &JfrCheckpointManager::flush_type_set> FlushTypeSetFunctor; +typedef Content<JfrCheckpointManager, &JfrCheckpointManager::flush_type_set> FlushTypeSetFunctor; typedef WriteContent<FlushTypeSetFunctor> FlushTypeSet; static u4 flush_typeset(JfrCheckpointManager& checkpoint_manager, JfrChunkWriter& chunkwriter) { FlushTypeSetFunctor flush_type_set(checkpoint_manager); FlushTypeSet fts(chunkwriter, flush_type_set); - return invoke_with_flush_event(fts); + return invoke(fts); } class MetadataEvent : public StackObj { private: JfrChunkWriter& _cw; public: - typedef EventFlushMetadata EventType; MetadataEvent(JfrChunkWriter& cw) : _cw(cw) {} bool process() { JfrMetadataEvent::write(_cw); @@ -319,7 +300,7 @@ assert(chunkwriter.is_valid(), "invariant"); MetadataEvent me(chunkwriter); WriteMetadata wm(chunkwriter, me); - return invoke_with_flush_event(wm); + return invoke(wm); } static u4 write_metadata(JfrChunkWriter& chunkwriter) { @@ -651,7 +632,7 @@ return total_elements; } -typedef Content<EventFlush, JfrRecorderService, &JfrRecorderService::flush> FlushFunctor; +typedef Content<JfrRecorderService, &JfrRecorderService::flush> FlushFunctor; typedef WriteContent<FlushFunctor> Flush; void JfrRecorderService::invoke_flush() { @@ -671,7 +652,9 @@ void JfrRecorderService::flushpoint() { MutexLocker lock(JfrStream_lock, Mutex::_no_safepoint_check_flag); - invoke_flush(); + if (_chunkwriter.is_valid()) { + invoke_flush(); + } } void JfrRecorderService::process_full_buffers() {
--- a/src/hotspot/share/memory/filemap.cpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/share/memory/filemap.cpp Tue Jan 14 15:23:01 2020 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -1790,7 +1790,7 @@ si->allow_exec()); if (base == NULL || base != addr) { // dealloc the regions from java heap - dealloc_archive_heap_regions(regions, region_num, is_open_archive); + dealloc_archive_heap_regions(regions, region_num); log_info(cds)("UseSharedSpaces: Unable to map at required address in java heap. " INTPTR_FORMAT ", size = " SIZE_FORMAT " bytes", p2i(addr), regions[i].byte_size()); @@ -1799,7 +1799,7 @@ if (VerifySharedSpaces && !region_crc_check(addr, regions[i].byte_size(), si->crc())) { // dealloc the regions from java heap - dealloc_archive_heap_regions(regions, region_num, is_open_archive); + dealloc_archive_heap_regions(regions, region_num); log_info(cds)("UseSharedSpaces: mapped heap regions are corrupt"); return false; } @@ -1855,10 +1855,10 @@ } // dealloc the archive regions from java heap -void FileMapInfo::dealloc_archive_heap_regions(MemRegion* regions, int num, bool is_open) { +void FileMapInfo::dealloc_archive_heap_regions(MemRegion* regions, int num) { if (num > 0) { assert(regions != NULL, "Null archive ranges array with non-zero count"); - G1CollectedHeap::heap()->dealloc_archive_regions(regions, num, is_open); + G1CollectedHeap::heap()->dealloc_archive_regions(regions, num); } } #endif // INCLUDE_CDS_JAVA_HEAP @@ -2075,11 +2075,9 @@ // Dealloc the archive heap regions only without unmapping. The regions are part // of the java heap. Unmapping of the heap regions are managed by GC. map_info->dealloc_archive_heap_regions(open_archive_heap_ranges, - num_open_archive_heap_ranges, - true); + num_open_archive_heap_ranges); map_info->dealloc_archive_heap_regions(closed_archive_heap_ranges, - num_closed_archive_heap_ranges, - false); + num_closed_archive_heap_ranges); } else if (DumpSharedSpaces) { fail_stop("%s", msg); }
--- a/src/hotspot/share/memory/filemap.hpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/share/memory/filemap.hpp Tue Jan 14 15:23:01 2020 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -538,7 +538,7 @@ bool map_heap_data(MemRegion **heap_mem, int first, int max, int* num, bool is_open = false) NOT_CDS_JAVA_HEAP_RETURN_(false); bool region_crc_check(char* buf, size_t size, int expected_crc) NOT_CDS_RETURN_(false); - void dealloc_archive_heap_regions(MemRegion* regions, int num, bool is_open) NOT_CDS_JAVA_HEAP_RETURN; + void dealloc_archive_heap_regions(MemRegion* regions, int num) NOT_CDS_JAVA_HEAP_RETURN; void map_heap_regions_impl() NOT_CDS_JAVA_HEAP_RETURN; char* map_relocation_bitmap(size_t& bitmap_size); MapArchiveResult map_region(int i, intx addr_delta, char* mapped_base_address, ReservedSpace rs);
--- a/src/hotspot/share/memory/heapInspection.cpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/share/memory/heapInspection.cpp Tue Jan 14 15:23:01 2020 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2002, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -40,14 +40,6 @@ // HeapInspection -int KlassSizeStats::count(oop x) { - return (HeapWordSize * (((x) != NULL) ? (x)->size() : 0)); -} - -int KlassSizeStats::count_array(objArrayOop x) { - return (HeapWordSize * (((x) != NULL) ? (x)->size() : 0)); -} - inline KlassInfoEntry::~KlassInfoEntry() { if (_subclasses != NULL) { delete _subclasses; @@ -277,67 +269,6 @@ total, totalw * HeapWordSize); } -#define MAKE_COL_NAME(field, name, help) #name, -#define MAKE_COL_HELP(field, name, help) help, - -static const char *name_table[] = { - HEAP_INSPECTION_COLUMNS_DO(MAKE_COL_NAME) -}; - -static const char *help_table[] = { - HEAP_INSPECTION_COLUMNS_DO(MAKE_COL_HELP) -}; - -bool KlassInfoHisto::is_selected(const char *col_name) { - if (_selected_columns == NULL) { - return true; - } - if (strcmp(_selected_columns, col_name) == 0) { - return true; - } - - const char *start = strstr(_selected_columns, col_name); - if (start == NULL) { - return false; - } - - // The following must be true, because _selected_columns != col_name - if (start > _selected_columns && start[-1] != ',') { - return false; - } - char x = start[strlen(col_name)]; - if (x != ',' && x != '\0') { - return false; - } - - return true; -} - -void KlassInfoHisto::print_title(outputStream* st, bool csv_format, - bool selected[], int width_table[], - const char *name_table[]) { - if (csv_format) { - st->print("Index,Super"); - for (int c=0; c<KlassSizeStats::_num_columns; c++) { - if (selected[c]) {st->print(",%s", name_table[c]);} - } - st->print(",ClassName"); - } else { - st->print("Index Super"); - for (int c = 0; c < KlassSizeStats::_num_columns; c++) { - if (selected[c]) { - st->print("%*s", width_table[c], name_table[c]); - } - } - st->print(" ClassName"); - } - - if (is_selected("ClassLoader")) { - st->print(",ClassLoader"); - } - st->cr(); -} - class HierarchyClosure : public KlassInfoClosure { private: GrowableArray<KlassInfoEntry*> *_elements; @@ -528,155 +459,10 @@ } } -void KlassInfoHisto::print_class_stats(outputStream* st, - bool csv_format, const char *columns) { - ResourceMark rm; - KlassSizeStats sz, sz_sum; - int i; - julong *col_table = (julong*)(&sz); - julong *colsum_table = (julong*)(&sz_sum); - int width_table[KlassSizeStats::_num_columns]; - bool selected[KlassSizeStats::_num_columns]; - - _selected_columns = columns; - - memset(&sz_sum, 0, sizeof(sz_sum)); - for (int c=0; c<KlassSizeStats::_num_columns; c++) { - selected[c] = is_selected(name_table[c]); - } - - for(i=0; i < elements()->length(); i++) { - elements()->at(i)->set_index(i+1); - } - - // First iteration is for accumulating stats totals in colsum_table[]. - // Second iteration is for printing stats for each class. - for (int pass=1; pass<=2; pass++) { - if (pass == 2) { - print_title(st, csv_format, selected, width_table, name_table); - } - for(i=0; i < elements()->length(); i++) { - KlassInfoEntry* e = (KlassInfoEntry*)elements()->at(i); - const Klass* k = e->klass(); - - // Get the stats for this class. - memset(&sz, 0, sizeof(sz)); - sz._inst_count = e->count(); - sz._inst_bytes = HeapWordSize * e->words(); - k->collect_statistics(&sz); - sz._total_bytes = sz._ro_bytes + sz._rw_bytes; - - if (pass == 1) { - // Add the stats for this class to the overall totals. - for (int c=0; c<KlassSizeStats::_num_columns; c++) { - colsum_table[c] += col_table[c]; - } - } else { - int super_index = -1; - // Print the stats for this class. - if (k->is_instance_klass()) { - Klass* super = k->super(); - if (super) { - KlassInfoEntry* super_e = _cit->lookup(super); - if (super_e) { - super_index = super_e->index(); - } - } - } - - if (csv_format) { - st->print("%ld,%d", e->index(), super_index); - for (int c=0; c<KlassSizeStats::_num_columns; c++) { - if (selected[c]) {st->print("," JULONG_FORMAT, col_table[c]);} - } - st->print(",%s",e->name()); - } else { - st->print("%5ld %5d", e->index(), super_index); - for (int c=0; c<KlassSizeStats::_num_columns; c++) { - if (selected[c]) {print_julong(st, width_table[c], col_table[c]);} - } - st->print(" %s", e->name()); - } - if (is_selected("ClassLoader")) { - ClassLoaderData* loader_data = k->class_loader_data(); - st->print(","); - loader_data->print_value_on(st); - } - st->cr(); - } - } - - if (pass == 1) { - // Calculate the minimum width needed for the column by accounting for the - // column header width and the width of the largest value in the column. - for (int c=0; c<KlassSizeStats::_num_columns; c++) { - width_table[c] = col_width(colsum_table[c], name_table[c]); - } - } - } - - sz_sum._inst_size = 0; - - // Print the column totals. - if (csv_format) { - st->print(","); - for (int c=0; c<KlassSizeStats::_num_columns; c++) { - if (selected[c]) {st->print("," JULONG_FORMAT, colsum_table[c]);} - } - } else { - st->print(" "); - for (int c=0; c<KlassSizeStats::_num_columns; c++) { - if (selected[c]) {print_julong(st, width_table[c], colsum_table[c]);} - } - st->print(" Total"); - if (sz_sum._total_bytes > 0) { - st->cr(); - st->print(" "); - for (int c=0; c<KlassSizeStats::_num_columns; c++) { - if (selected[c]) { - switch (c) { - case KlassSizeStats::_index_inst_size: - case KlassSizeStats::_index_inst_count: - case KlassSizeStats::_index_method_count: - st->print("%*s", width_table[c], "-"); - break; - default: - { - double perc = (double)(100) * (double)(colsum_table[c]) / (double)sz_sum._total_bytes; - st->print("%*.1f%%", width_table[c]-1, perc); - } - } - } - } - } - } - st->cr(); - - if (!csv_format) { - print_title(st, csv_format, selected, width_table, name_table); - } -} - -julong KlassInfoHisto::annotations_bytes(Array<AnnotationArray*>* p) const { - julong bytes = 0; - if (p != NULL) { - for (int i = 0; i < p->length(); i++) { - bytes += count_bytes_array(p->at(i)); - } - bytes += count_bytes_array(p); - } - return bytes; -} - -void KlassInfoHisto::print_histo_on(outputStream* st, bool print_stats, - bool csv_format, const char *columns) { - if (print_stats) { - print_class_stats(st, csv_format, columns); - } else { - st->print_cr(" num #instances #bytes class name (module)"); - st->print_cr("-------------------------------------------------------"); - print_elements(st); - } +void KlassInfoHisto::print_histo_on(outputStream* st) { + st->print_cr(" num #instances #bytes class name (module)"); + st->print_cr("-------------------------------------------------------"); + print_elements(st); } class HistoClosure : public KlassInfoClosure { @@ -726,25 +512,7 @@ void HeapInspection::heap_inspection(outputStream* st) { ResourceMark rm; - if (_print_help) { - for (int c=0; c<KlassSizeStats::_num_columns; c++) { - st->print("%s:\n\t", name_table[c]); - const int max_col = 60; - int col = 0; - for (const char *p = help_table[c]; *p; p++,col++) { - if (col >= max_col && *p == ' ') { - st->print("\n\t"); - col = 0; - } else { - st->print("%c", *p); - } - } - st->print_cr(".\n"); - } - return; - } - - KlassInfoTable cit(_print_class_stats); + KlassInfoTable cit(true); if (!cit.allocation_failed()) { // populate table with object allocation info size_t missed_count = populate_table(&cit); @@ -761,7 +529,7 @@ cit.iterate(&hc); histo.sort(); - histo.print_histo_on(st, _print_class_stats, _csv_format, _columns); + histo.print_histo_on(st); } else { st->print_cr("ERROR: Ran out of C-heap; histogram not generated"); }
--- a/src/hotspot/share/memory/heapInspection.hpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/share/memory/heapInspection.hpp Tue Jan 14 15:23:01 2020 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2002, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -47,141 +47,6 @@ // to KlassInfoEntry's and is used to sort // the entries. -#define HEAP_INSPECTION_COLUMNS_DO(f) \ - f(inst_size, InstSize, \ - "Size of each object instance of the Java class") \ - f(inst_count, InstCount, \ - "Number of object instances of the Java class") \ - f(inst_bytes, InstBytes, \ - "This is usually (InstSize * InstNum). The only exception is " \ - "java.lang.Class, whose InstBytes also includes the slots " \ - "used to store static fields. InstBytes is not counted in " \ - "ROAll, RWAll or Total") \ - f(mirror_bytes, Mirror, \ - "Size of the Klass::java_mirror() object") \ - f(klass_bytes, KlassBytes, \ - "Size of the InstanceKlass or ArrayKlass for this class. " \ - "Note that this includes VTab, ITab, OopMap") \ - f(secondary_supers_bytes, K_secondary_supers, \ - "Number of bytes used by the Klass::secondary_supers() array") \ - f(vtab_bytes, VTab, \ - "Size of the embedded vtable in InstanceKlass") \ - f(itab_bytes, ITab, \ - "Size of the embedded itable in InstanceKlass") \ - f(nonstatic_oopmap_bytes, OopMap, \ - "Size of the embedded nonstatic_oop_map in InstanceKlass") \ - f(methods_array_bytes, IK_methods, \ - "Number of bytes used by the InstanceKlass::methods() array") \ - f(method_ordering_bytes, IK_method_ordering, \ - "Number of bytes used by the InstanceKlass::method_ordering() array") \ - f(default_methods_array_bytes, IK_default_methods, \ - "Number of bytes used by the InstanceKlass::default_methods() array") \ - f(default_vtable_indices_bytes, IK_default_vtable_indices, \ - "Number of bytes used by the InstanceKlass::default_vtable_indices() array") \ - f(local_interfaces_bytes, IK_local_interfaces, \ - "Number of bytes used by the InstanceKlass::local_interfaces() array") \ - f(transitive_interfaces_bytes, IK_transitive_interfaces, \ - "Number of bytes used by the InstanceKlass::transitive_interfaces() array") \ - f(fields_bytes, IK_fields, \ - "Number of bytes used by the InstanceKlass::fields() array") \ - f(inner_classes_bytes, IK_inner_classes, \ - "Number of bytes used by the InstanceKlass::inner_classes() array") \ - f(nest_members_bytes, IK_nest_members, \ - "Number of bytes used by the InstanceKlass::nest_members() array") \ - f(record_components_bytes, IK_record_components, \ - "Number of bytes used by the InstanceKlass::record_components() array") \ - f(signers_bytes, IK_signers, \ - "Number of bytes used by the InstanceKlass::singers() array") \ - f(class_annotations_bytes, class_annotations, \ - "Size of class annotations") \ - f(class_type_annotations_bytes, class_type_annotations, \ - "Size of class type annotations") \ - f(fields_annotations_bytes, fields_annotations, \ - "Size of field annotations") \ - f(fields_type_annotations_bytes, fields_type_annotations, \ - "Size of field type annotations") \ - f(methods_annotations_bytes, methods_annotations, \ - "Size of method annotations") \ - f(methods_parameter_annotations_bytes, methods_parameter_annotations, \ - "Size of method parameter annotations") \ - f(methods_type_annotations_bytes, methods_type_annotations, \ - "Size of methods type annotations") \ - f(methods_default_annotations_bytes, methods_default_annotations, \ - "Size of methods default annotations") \ - f(annotations_bytes, annotations, \ - "Size of all annotations") \ - f(cp_bytes, Cp, \ - "Size of InstanceKlass::constants()") \ - f(cp_tags_bytes, CpTags, \ - "Size of InstanceKlass::constants()->tags()") \ - f(cp_cache_bytes, CpCache, \ - "Size of InstanceKlass::constants()->cache()") \ - f(cp_operands_bytes, CpOperands, \ - "Size of InstanceKlass::constants()->operands()") \ - f(cp_refmap_bytes, CpRefMap, \ - "Size of InstanceKlass::constants()->reference_map()") \ - f(cp_all_bytes, CpAll, \ - "Sum of Cp + CpTags + CpCache + CpOperands + CpRefMap") \ - f(method_count, MethodCount, \ - "Number of methods in this class") \ - f(method_bytes, MethodBytes, \ - "Size of the Method object") \ - f(const_method_bytes, ConstMethod, \ - "Size of the ConstMethod object") \ - f(method_data_bytes, MethodData, \ - "Size of the MethodData object") \ - f(stackmap_bytes, StackMap, \ - "Size of the stackmap_data") \ - f(bytecode_bytes, Bytecodes, \ - "Of the MethodBytes column, how much are the space taken up by bytecodes") \ - f(method_all_bytes, MethodAll, \ - "Sum of MethodBytes + Constmethod + Stackmap + Methoddata") \ - f(ro_bytes, ROAll, \ - "Size of all class meta data that could (potentially) be placed " \ - "in read-only memory. (This could change with CDS design)") \ - f(rw_bytes, RWAll, \ - "Size of all class meta data that must be placed in read/write " \ - "memory. (This could change with CDS design) ") \ - f(total_bytes, Total, \ - "ROAll + RWAll. Note that this does NOT include InstBytes.") - -// Size statistics for a Klass - filled in by Klass::collect_statistics() -class KlassSizeStats { -public: -#define COUNT_KLASS_SIZE_STATS_FIELD(field, name, help) _index_ ## field, -#define DECLARE_KLASS_SIZE_STATS_FIELD(field, name, help) julong _ ## field; - - enum { - HEAP_INSPECTION_COLUMNS_DO(COUNT_KLASS_SIZE_STATS_FIELD) - _num_columns - }; - - HEAP_INSPECTION_COLUMNS_DO(DECLARE_KLASS_SIZE_STATS_FIELD) - - static int count(oop x); - - static int count_array(objArrayOop x); - - template <class T> static int count(T* x) { - return (HeapWordSize * ((x) ? (x)->size() : 0)); - } - - template <class T> static int count_array(T* x) { - if (x == NULL) { - return 0; - } - if (x->length() == 0) { - // This is a shared array, e.g., Universe::the_empty_int_array(). Don't - // count it to avoid double-counting. - return 0; - } - return HeapWordSize * x->size(); - } -}; - - - - class KlassInfoEntry: public CHeapObj<mtInternal> { private: KlassInfoEntry* _next; @@ -281,13 +146,7 @@ GrowableArray<KlassInfoEntry*>* elements() const { return _elements; } static int sort_helper(KlassInfoEntry** e1, KlassInfoEntry** e2); void print_elements(outputStream* st) const; - void print_class_stats(outputStream* st, bool csv_format, const char *columns); - julong annotations_bytes(Array<AnnotationArray*>* p) const; - const char *_selected_columns; bool is_selected(const char *col_name); - void print_title(outputStream* st, bool csv_format, - bool selected_columns_table[], int width_table[], - const char *name_table[]); template <class T> static int count_bytes(T* x) { return (HeapWordSize * ((x) ? (x)->size() : 0)); @@ -339,7 +198,7 @@ KlassInfoHisto(KlassInfoTable* cit); ~KlassInfoHisto(); void add(KlassInfoEntry* cie); - void print_histo_on(outputStream* st, bool print_class_stats, bool csv_format, const char *columns); + void print_histo_on(outputStream* st); void sort(); }; @@ -351,15 +210,7 @@ class KlassInfoClosure; class HeapInspection : public StackObj { - bool _csv_format; // "comma separated values" format for spreadsheet. - bool _print_help; - bool _print_class_stats; - const char* _columns; public: - HeapInspection(bool csv_format, bool print_help, - bool print_class_stats, const char *columns) : - _csv_format(csv_format), _print_help(print_help), - _print_class_stats(print_class_stats), _columns(columns) {} void heap_inspection(outputStream* st) NOT_SERVICES_RETURN; size_t populate_table(KlassInfoTable* cit, BoolObjectClosure* filter = NULL) NOT_SERVICES_RETURN_(0); static void find_instances_at_safepoint(Klass* k, GrowableArray<oop>* result) NOT_SERVICES_RETURN;
--- a/src/hotspot/share/memory/heapShared.cpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/share/memory/heapShared.cpp Tue Jan 14 15:23:01 2020 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -78,9 +78,7 @@ // Entry fields for subgraphs archived in the open archive heap region. static ArchivableStaticFieldInfo open_archive_subgraph_entry_fields[] = { {"jdk/internal/module/ArchivedModuleGraph", "archivedModuleGraph"}, - {"java/util/ImmutableCollections$ListN", "EMPTY_LIST"}, - {"java/util/ImmutableCollections$MapN", "EMPTY_MAP"}, - {"java/util/ImmutableCollections$SetN", "EMPTY_SET"}, + {"java/util/ImmutableCollections", "archivedObjects"}, {"java/lang/module/Configuration", "EMPTY_CONFIGURATION"}, {"jdk/internal/math/FDBigInteger", "archivedCaches"}, };
--- a/src/hotspot/share/oops/annotations.cpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/share/oops/annotations.cpp Tue Jan 14 15:23:01 2020 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,7 +25,6 @@ #include "precompiled.hpp" #include "classfile/classLoaderData.hpp" #include "logging/log.hpp" -#include "memory/heapInspection.hpp" #include "memory/metadataFactory.hpp" #include "memory/metaspaceClosure.hpp" #include "memory/oopFactory.hpp" @@ -89,37 +88,6 @@ st->print("Annotations(" INTPTR_FORMAT ")", p2i(this)); } -#if INCLUDE_SERVICES -// Size Statistics - -julong Annotations::count_bytes(Array<AnnotationArray*>* p) { - julong bytes = 0; - if (p != NULL) { - for (int i = 0; i < p->length(); i++) { - bytes += KlassSizeStats::count_array(p->at(i)); - } - bytes += KlassSizeStats::count_array(p); - } - return bytes; -} - -void Annotations::collect_statistics(KlassSizeStats *sz) const { - sz->_annotations_bytes = sz->count(this); - sz->_class_annotations_bytes = sz->count(class_annotations()); - sz->_class_type_annotations_bytes = sz->count(class_type_annotations()); - sz->_fields_annotations_bytes = count_bytes(fields_annotations()); - sz->_fields_type_annotations_bytes = count_bytes(fields_type_annotations()); - - sz->_annotations_bytes += - sz->_class_annotations_bytes + - sz->_class_type_annotations_bytes + - sz->_fields_annotations_bytes + - sz->_fields_type_annotations_bytes; - - sz->_ro_bytes += sz->_annotations_bytes; -} -#endif // INCLUDE_SERVICES - #define BULLET " - " #ifndef PRODUCT
--- a/src/hotspot/share/oops/annotations.hpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/share/oops/annotations.hpp Tue Jan 14 15:23:01 2020 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -33,7 +33,6 @@ class ClassLoaderData; class outputStream; -class KlassSizeStats; typedef Array<u1> AnnotationArray; @@ -70,10 +69,6 @@ // Annotations should be stored in the read-only region of CDS archive. static bool is_read_only_by_default() { return true; } -#if INCLUDE_SERVICES - void collect_statistics(KlassSizeStats *sz) const; -#endif - // Constructor to initialize to null Annotations() : _class_annotations(NULL), _fields_annotations(NULL),
--- a/src/hotspot/share/oops/arrayKlass.hpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/share/oops/arrayKlass.hpp Tue Jan 14 15:23:01 2020 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -104,14 +104,6 @@ virtual void metaspace_pointers_do(MetaspaceClosure* iter); -#if INCLUDE_SERVICES - virtual void collect_statistics(KlassSizeStats *sz) const { - Klass::collect_statistics(sz); - // Do nothing for now, but remember to modify if you add new - // stuff to ArrayKlass. - } -#endif - // Iterators void array_klasses_do(void f(Klass* k)); void array_klasses_do(void f(Klass* k, TRAPS), TRAPS);
--- a/src/hotspot/share/oops/constMethod.cpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/share/oops/constMethod.cpp Tue Jan 14 15:23:01 2020 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -24,7 +24,6 @@ #include "precompiled.hpp" #include "interpreter/interpreter.hpp" -#include "memory/heapInspection.hpp" #include "memory/metadataFactory.hpp" #include "memory/metaspaceClosure.hpp" #include "memory/resourceArea.hpp" @@ -454,36 +453,6 @@ } } -#if INCLUDE_SERVICES -// Size Statistics -void ConstMethod::collect_statistics(KlassSizeStats *sz) const { - int n1, n2, n3; - sz->_const_method_bytes += (n1 = sz->count(this)); - sz->_bytecode_bytes += (n2 = code_size()); - sz->_stackmap_bytes += (n3 = sz->count_array(stackmap_data())); - - // Count method annotations - int a1 = 0, a2 = 0, a3 = 0, a4 = 0; - if (has_method_annotations()) { - sz->_methods_annotations_bytes += (a1 = sz->count_array(method_annotations())); - } - if (has_parameter_annotations()) { - sz->_methods_parameter_annotations_bytes += (a2 = sz->count_array(parameter_annotations())); - } - if (has_type_annotations()) { - sz->_methods_type_annotations_bytes += (a3 = sz->count_array(type_annotations())); - } - if (has_default_annotations()) { - sz->_methods_default_annotations_bytes += (a4 = sz->count_array(default_annotations())); - } - - int size_annotations = a1 + a2 + a3 + a4; - - sz->_method_all_bytes += n1 + n3 + size_annotations; // note: n2 is part of n3 - sz->_ro_bytes += n1 + n3 + size_annotations; -} -#endif // INCLUDE_SERVICES - // Verification void ConstMethod::verify_on(outputStream* st) {
--- a/src/hotspot/share/oops/constMethod.hpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/share/oops/constMethod.hpp Tue Jan 14 15:23:01 2020 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -122,7 +122,6 @@ u2 flags; }; -class KlassSizeStats; class AdapterHandlerEntry; // Class to collect the sizes of ConstMethod inline tables @@ -379,10 +378,6 @@ // ConstMethods should be stored in the read-only region of CDS archive. static bool is_read_only_by_default() { return true; } -#if INCLUDE_SERVICES - void collect_statistics(KlassSizeStats *sz) const; -#endif - // code size int code_size() const { return _code_size; } void set_code_size(int size) {
--- a/src/hotspot/share/oops/constantPool.cpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/share/oops/constantPool.cpp Tue Jan 14 15:23:01 2020 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -33,7 +33,6 @@ #include "interpreter/bootstrapInfo.hpp" #include "interpreter/linkResolver.hpp" #include "memory/allocation.inline.hpp" -#include "memory/heapInspection.hpp" #include "memory/heapShared.hpp" #include "memory/metadataFactory.hpp" #include "memory/metaspaceClosure.hpp" @@ -2379,21 +2378,6 @@ } } -#if INCLUDE_SERVICES -// Size Statistics -void ConstantPool::collect_statistics(KlassSizeStats *sz) const { - sz->_cp_all_bytes += (sz->_cp_bytes = sz->count(this)); - sz->_cp_all_bytes += (sz->_cp_tags_bytes = sz->count_array(tags())); - sz->_cp_all_bytes += (sz->_cp_cache_bytes = sz->count(cache())); - sz->_cp_all_bytes += (sz->_cp_operands_bytes = sz->count_array(operands())); - sz->_cp_all_bytes += (sz->_cp_refmap_bytes = sz->count_array(reference_map())); - - sz->_ro_bytes += sz->_cp_operands_bytes + sz->_cp_tags_bytes + - sz->_cp_refmap_bytes; - sz->_rw_bytes += sz->_cp_bytes + sz->_cp_cache_bytes; -} -#endif // INCLUDE_SERVICES - // Verification void ConstantPool::verify_on(outputStream* st) {
--- a/src/hotspot/share/oops/constantPool.hpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/share/oops/constantPool.hpp Tue Jan 14 15:23:01 2020 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -93,8 +93,6 @@ } }; -class KlassSizeStats; - class ConstantPool : public Metadata { friend class VMStructs; friend class JVMCIVMStructs; @@ -773,9 +771,6 @@ } static int size(int length) { return align_metadata_size(header_size() + length); } int size() const { return size(length()); } -#if INCLUDE_SERVICES - void collect_statistics(KlassSizeStats *sz) const; -#endif // ConstantPools should be stored in the read-only region of CDS archive. static bool is_read_only_by_default() { return true; }
--- a/src/hotspot/share/oops/instanceKlass.cpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/share/oops/instanceKlass.cpp Tue Jan 14 15:23:01 2020 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -46,7 +46,6 @@ #include "logging/logMessage.hpp" #include "logging/logStream.hpp" #include "memory/allocation.inline.hpp" -#include "memory/heapInspection.hpp" #include "memory/iterator.inline.hpp" #include "memory/metadataFactory.hpp" #include "memory/metaspaceClosure.hpp" @@ -3541,61 +3540,6 @@ } } -#if INCLUDE_SERVICES -// Size Statistics -void InstanceKlass::collect_statistics(KlassSizeStats *sz) const { - Klass::collect_statistics(sz); - - sz->_inst_size = wordSize * size_helper(); - sz->_vtab_bytes = wordSize * vtable_length(); - sz->_itab_bytes = wordSize * itable_length(); - sz->_nonstatic_oopmap_bytes = wordSize * nonstatic_oop_map_size(); - - int n = 0; - n += (sz->_methods_array_bytes = sz->count_array(methods())); - n += (sz->_method_ordering_bytes = sz->count_array(method_ordering())); - n += (sz->_local_interfaces_bytes = sz->count_array(local_interfaces())); - n += (sz->_transitive_interfaces_bytes = sz->count_array(transitive_interfaces())); - n += (sz->_fields_bytes = sz->count_array(fields())); - n += (sz->_inner_classes_bytes = sz->count_array(inner_classes())); - n += (sz->_nest_members_bytes = sz->count_array(nest_members())); - n += (sz->_record_components_bytes = sz->count_array(record_components())); - sz->_ro_bytes += n; - - const ConstantPool* cp = constants(); - if (cp) { - cp->collect_statistics(sz); - } - - const Annotations* anno = annotations(); - if (anno) { - anno->collect_statistics(sz); - } - - const Array<Method*>* methods_array = methods(); - if (methods()) { - for (int i = 0; i < methods_array->length(); i++) { - Method* method = methods_array->at(i); - if (method) { - sz->_method_count ++; - method->collect_statistics(sz); - } - } - } - - const Array<RecordComponent*>* components = record_components(); - if (components != NULL) { - for (int i = 0; i < components->length(); i++) { - RecordComponent* component = components->at(i); - if (component != NULL) { - component->collect_statistics(sz); - } - } - } - -} -#endif // INCLUDE_SERVICES - // Verification class VerifyFieldClosure: public BasicOopIterateClosure {
--- a/src/hotspot/share/oops/instanceKlass.hpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/share/oops/instanceKlass.hpp Tue Jan 14 15:23:01 2020 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -1066,9 +1066,6 @@ is_unsafe_anonymous(), has_stored_fingerprint()); } -#if INCLUDE_SERVICES - virtual void collect_statistics(KlassSizeStats *sz) const; -#endif intptr_t* start_of_itable() const { return (intptr_t*)start_of_vtable() + vtable_length(); } intptr_t* end_of_itable() const { return start_of_itable() + itable_length(); }
--- a/src/hotspot/share/oops/klass.cpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/share/oops/klass.cpp Tue Jan 14 15:23:01 2020 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -32,7 +32,6 @@ #include "classfile/vmSymbols.hpp" #include "gc/shared/collectedHeap.inline.hpp" #include "logging/log.hpp" -#include "memory/heapInspection.hpp" #include "memory/heapShared.hpp" #include "memory/metadataFactory.hpp" #include "memory/metaspaceClosure.hpp" @@ -756,18 +755,6 @@ obj->print_address_on(st); } -#if INCLUDE_SERVICES -// Size Statistics -void Klass::collect_statistics(KlassSizeStats *sz) const { - sz->_klass_bytes = sz->count(this); - sz->_mirror_bytes = sz->count(java_mirror_no_keepalive()); - sz->_secondary_supers_bytes = sz->count_array(secondary_supers()); - - sz->_ro_bytes += sz->_secondary_supers_bytes; - sz->_rw_bytes += sz->_klass_bytes + sz->_mirror_bytes; -} -#endif // INCLUDE_SERVICES - // Verification void Klass::verify_on(outputStream* st) {
--- a/src/hotspot/share/oops/klass.hpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/share/oops/klass.hpp Tue Jan 14 15:23:01 2020 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -68,7 +68,6 @@ template <class T> class Array; template <class T> class GrowableArray; class fieldDescriptor; -class KlassSizeStats; class klassVtable; class ModuleEntry; class PackageEntry; @@ -541,9 +540,6 @@ // Size of klass in word size. virtual int size() const = 0; -#if INCLUDE_SERVICES - virtual void collect_statistics(KlassSizeStats *sz) const; -#endif // Returns the Java name for a class (Resource allocated) // For arrays, this returns the name of the element with a leading '['.
--- a/src/hotspot/share/oops/method.cpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/share/oops/method.cpp Tue Jan 14 15:23:01 2020 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -39,7 +39,6 @@ #include "logging/log.hpp" #include "logging/logTag.hpp" #include "memory/allocation.inline.hpp" -#include "memory/heapInspection.hpp" #include "memory/metadataFactory.hpp" #include "memory/metaspaceClosure.hpp" #include "memory/metaspaceShared.hpp" @@ -2427,23 +2426,6 @@ if (WizardMode && code() != NULL) st->print(" ((nmethod*)%p)", code()); } -#if INCLUDE_SERVICES -// Size Statistics -void Method::collect_statistics(KlassSizeStats *sz) const { - int mysize = sz->count(this); - sz->_method_bytes += mysize; - sz->_method_all_bytes += mysize; - sz->_rw_bytes += mysize; - - if (constMethod()) { - constMethod()->collect_statistics(sz); - } - if (method_data()) { - method_data()->collect_statistics(sz); - } -} -#endif // INCLUDE_SERVICES - // LogTouchedMethods and PrintTouchedMethods // TouchedMethodRecord -- we can't use a HashtableEntry<Method*> because
--- a/src/hotspot/share/oops/method.hpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/share/oops/method.hpp Tue Jan 14 15:23:01 2020 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -64,7 +64,6 @@ class MethodCounters; class ConstMethod; class InlineTableSizes; -class KlassSizeStats; class CompiledMethod; class InterpreterOopMap; @@ -713,9 +712,6 @@ } static int size(bool is_native); int size() const { return method_size(); } -#if INCLUDE_SERVICES - void collect_statistics(KlassSizeStats *sz) const; -#endif void log_touched(TRAPS); static void print_touched_methods(outputStream* out);
--- a/src/hotspot/share/oops/methodData.cpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/share/oops/methodData.cpp Tue Jan 14 15:23:01 2020 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -29,7 +29,6 @@ #include "interpreter/bytecode.hpp" #include "interpreter/bytecodeStream.hpp" #include "interpreter/linkResolver.hpp" -#include "memory/heapInspection.hpp" #include "memory/metaspaceClosure.hpp" #include "memory/resourceArea.hpp" #include "oops/methodData.inline.hpp" @@ -1576,16 +1575,6 @@ } } -#if INCLUDE_SERVICES -// Size Statistics -void MethodData::collect_statistics(KlassSizeStats *sz) const { - int n = sz->count(this); - sz->_method_data_bytes += n; - sz->_method_all_bytes += n; - sz->_rw_bytes += n; -} -#endif // INCLUDE_SERVICES - // Verification void MethodData::verify_on(outputStream* st) {
--- a/src/hotspot/share/oops/methodData.hpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/share/oops/methodData.hpp Tue Jan 14 15:23:01 2020 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -33,7 +33,6 @@ #include "utilities/align.hpp" class BytecodeStream; -class KlassSizeStats; // The MethodData object collects counts and other profile information // during zeroth-tier (interpretive) and first-tier execution. @@ -2181,9 +2180,6 @@ // My size int size_in_bytes() const { return _size; } int size() const { return align_metadata_size(align_up(_size, BytesPerWord)/BytesPerWord); } -#if INCLUDE_SERVICES - void collect_statistics(KlassSizeStats *sz) const; -#endif int creation_mileage() const { return _creation_mileage; } void set_creation_mileage(int x) { _creation_mileage = x; }
--- a/src/hotspot/share/oops/recordComponent.cpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/share/oops/recordComponent.cpp Tue Jan 14 15:23:01 2020 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -24,7 +24,6 @@ #include "precompiled.hpp" #include "logging/log.hpp" -#include "memory/heapInspection.hpp" #include "memory/metadataFactory.hpp" #include "memory/metaspace.hpp" #include "memory/metaspaceClosure.hpp" @@ -63,19 +62,6 @@ st->print("RecordComponent(" INTPTR_FORMAT ")", p2i(this)); } -#if INCLUDE_SERVICES -void RecordComponent::collect_statistics(KlassSizeStats *sz) const { - if (_annotations != NULL) { - sz->_annotations_bytes += sz->count(_annotations); - sz->_ro_bytes += sz->count(_annotations); - } - if (_type_annotations != NULL) { - sz->_annotations_bytes += sz->count(_type_annotations); - sz->_ro_bytes += sz->count(_type_annotations); - } -} -#endif - #ifndef PRODUCT void RecordComponent::print_on(outputStream* st) const { st->print("name_index: %d", _name_index);
--- a/src/hotspot/share/oops/recordComponent.hpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/share/oops/recordComponent.hpp Tue Jan 14 15:23:01 2020 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -29,8 +29,6 @@ #include "oops/metadata.hpp" #include "utilities/globalDefinitions.hpp" -class KlassSizeStats; - // This class stores information extracted from the Record class attribute. class RecordComponent: public MetaspaceObj { private: @@ -91,10 +89,6 @@ static bool is_read_only_by_default() { return true; } DEBUG_ONLY(bool on_stack() { return false; }) // for template -#if INCLUDE_SERVICES - void collect_statistics(KlassSizeStats *sz) const; -#endif - bool is_klass() const { return false; } #ifndef PRODUCT
--- a/src/hotspot/share/opto/matcher.cpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/share/opto/matcher.cpp Tue Jan 14 15:23:01 2020 -0800 @@ -2525,14 +2525,17 @@ //---------------------------------------------------------------------- // Convert (leg)Vec to (leg)Vec[SDXYZ]. -MachOper* Matcher::specialize_vector_operand_helper(MachNode* m, MachOper* original_opnd) { - const Type* t = m->bottom_type(); +MachOper* Matcher::specialize_vector_operand_helper(MachNode* m, uint opnd_idx, const Type* t) { + MachOper* original_opnd = m->_opnds[opnd_idx]; uint ideal_reg = t->ideal_reg(); - // Handle special cases + // Handle special cases. if (t->isa_vect()) { - // RShiftCntV/RShiftCntV report wide vector type, but VecS as ideal register (see vectornode.hpp). - if (m->ideal_Opcode() == Op_RShiftCntV || m->ideal_Opcode() == Op_LShiftCntV) { - ideal_reg = TypeVect::VECTS->ideal_reg(); // ideal_reg == Op_VecS + // LShiftCntV/RShiftCntV report wide vector type, but Matcher::vector_shift_count_ideal_reg() as ideal register (see vectornode.hpp). + // Look for shift count use sites as well (at vector shift nodes). + int opc = m->ideal_Opcode(); + if ((VectorNode::is_shift_count(opc) && opnd_idx == 0) || // DEF operand of LShiftCntV/RShiftCntV + (VectorNode::is_vector_shift(opc) && opnd_idx == 2)) { // shift operand of a vector shift node + ideal_reg = Matcher::vector_shift_count_ideal_reg(t->is_vect()->length_in_bytes()); } } else { // Chain instructions which convert scalar to vector (e.g., vshiftcntimm on x86) don't have vector type. @@ -2556,22 +2559,23 @@ } // Compute concrete vector operand for a generic DEF/USE vector operand (of mach node m at index idx). -MachOper* Matcher::specialize_vector_operand(MachNode* m, uint idx) { - assert(Matcher::is_generic_vector(m->_opnds[idx]), "repeated updates"); - if (idx == 0) { // DEF - // Use mach node itself to compute vector operand type. - return specialize_vector_operand_helper(m, m->_opnds[0]); +MachOper* Matcher::specialize_vector_operand(MachNode* m, uint opnd_idx) { + assert(Matcher::is_generic_vector(m->_opnds[opnd_idx]), "repeated updates"); + Node* def = NULL; + if (opnd_idx == 0) { // DEF + def = m; // use mach node itself to compute vector operand type } else { - // Use def node to compute operand type. - int base_idx = m->operand_index(idx); - MachNode* in = m->in(base_idx)->as_Mach(); - if (in->is_MachTemp() && Matcher::is_generic_vector(in->_opnds[0])) { - specialize_temp_node(in->as_MachTemp(), m, base_idx); // MachTemp node use site - } else if (is_generic_reg2reg_move(in)) { - in = in->in(1)->as_Mach(); // skip over generic reg-to-reg moves + int base_idx = m->operand_index(opnd_idx); + def = m->in(base_idx); + if (def->is_Mach()) { + if (def->is_MachTemp() && Matcher::is_generic_vector(def->as_Mach()->_opnds[0])) { + specialize_temp_node(def->as_MachTemp(), m, base_idx); // MachTemp node use site + } else if (is_generic_reg2reg_move(def->as_Mach())) { + def = def->in(1); // skip over generic reg-to-reg moves + } } - return specialize_vector_operand_helper(in, m->_opnds[idx]); } + return specialize_vector_operand_helper(m, opnd_idx, def->bottom_type()); } void Matcher::specialize_mach_node(MachNode* m) {
--- a/src/hotspot/share/opto/matcher.hpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/share/opto/matcher.hpp Tue Jan 14 15:23:01 2020 -0800 @@ -516,8 +516,8 @@ void specialize_generic_vector_operands(); void specialize_mach_node(MachNode* m); void specialize_temp_node(MachTempNode* tmp, MachNode* use, uint idx); - MachOper* specialize_vector_operand(MachNode* m, uint idx); - MachOper* specialize_vector_operand_helper(MachNode* m, MachOper* generic_opnd); + MachOper* specialize_vector_operand(MachNode* m, uint opnd_idx); + MachOper* specialize_vector_operand_helper(MachNode* m, uint opnd_idx, const Type* t); static MachOper* specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp);
--- a/src/hotspot/share/opto/memnode.cpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/share/opto/memnode.cpp Tue Jan 14 15:23:01 2020 -0800 @@ -4170,7 +4170,7 @@ Node* InitializeNode::complete_stores(Node* rawctl, Node* rawmem, Node* rawptr, intptr_t header_size, Node* size_in_bytes, - PhaseGVN* phase) { + PhaseIterGVN* phase) { assert(!is_complete(), "not already complete"); assert(stores_are_sane(phase), ""); assert(allocation() != NULL, "must be present"); @@ -4262,7 +4262,7 @@ } // Collect the store and move on: - st->set_req(MemNode::Memory, inits); + phase->replace_input_of(st, MemNode::Memory, inits); inits = st; // put it on the linearized chain set_req(i, zmem); // unhook from previous position
--- a/src/hotspot/share/opto/memnode.hpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/share/opto/memnode.hpp Tue Jan 14 15:23:01 2020 -0800 @@ -1402,7 +1402,7 @@ // Called when the associated AllocateNode is expanded into CFG. Node* complete_stores(Node* rawctl, Node* rawmem, Node* rawptr, intptr_t header_size, Node* size_in_bytes, - PhaseGVN* phase); + PhaseIterGVN* phase); private: void remove_extra_zeroes();
--- a/src/hotspot/share/opto/parse2.cpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/share/opto/parse2.cpp Tue Jan 14 15:23:01 2020 -0800 @@ -1048,11 +1048,11 @@ // if there is a higher range, test for it and process it: if (mid < hi && !eq_test_only) { // two comparisons of same values--should enable 1 test for 2 branches - // Use BoolTest::le instead of BoolTest::gt + // Use BoolTest::lt instead of BoolTest::gt float cnt = sum_of_cnts(lo, mid-1); - IfNode *iff_le = jump_if_fork_int(key_val, test_val, BoolTest::le, if_prob(cnt, total_cnt), if_cnt(cnt)); - Node *iftrue = _gvn.transform( new IfTrueNode(iff_le) ); - Node *iffalse = _gvn.transform( new IfFalseNode(iff_le) ); + IfNode *iff_lt = jump_if_fork_int(key_val, test_val, BoolTest::lt, if_prob(cnt, total_cnt), if_cnt(cnt)); + Node *iftrue = _gvn.transform( new IfTrueNode(iff_lt) ); + Node *iffalse = _gvn.transform( new IfFalseNode(iff_lt) ); { PreserveJVMState pjvms(this); set_control(iffalse); jump_switch_ranges(key_val, mid+1, hi, switch_depth+1);
--- a/src/hotspot/share/opto/vectornode.cpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/share/opto/vectornode.cpp Tue Jan 14 15:23:01 2020 -0800 @@ -485,6 +485,38 @@ } } +bool VectorNode::is_vector_shift(int opc) { + assert(opc > _last_machine_leaf && opc < _last_opcode, "invalid opcode"); + switch (opc) { + case Op_LShiftVB: + case Op_LShiftVS: + case Op_LShiftVI: + case Op_LShiftVL: + case Op_RShiftVB: + case Op_RShiftVS: + case Op_RShiftVI: + case Op_RShiftVL: + case Op_URShiftVB: + case Op_URShiftVS: + case Op_URShiftVI: + case Op_URShiftVL: + return true; + default: + return false; + } +} + +bool VectorNode::is_shift_count(int opc) { + assert(opc > _last_machine_leaf && opc < _last_opcode, "invalid opcode"); + switch (opc) { + case Op_RShiftCntV: + case Op_LShiftCntV: + return true; + default: + return false; + } +} + // Return initial Pack node. Additional operands added with add_opd() calls. PackNode* PackNode::make(Node* s, uint vlen, BasicType bt) { const TypeVect* vt = TypeVect::make(bt, vlen);
--- a/src/hotspot/share/opto/vectornode.hpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/share/opto/vectornode.hpp Tue Jan 14 15:23:01 2020 -0800 @@ -74,6 +74,9 @@ static bool is_invariant_vector(Node* n); // [Start, end) half-open range defining which operands are vectors static void vector_operands(Node* n, uint* start, uint* end); + + static bool is_vector_shift(int opc); + static bool is_shift_count(int opc); }; //===========================Vector=ALU=Operations=============================
--- a/src/hotspot/share/prims/whitebox.cpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/share/prims/whitebox.cpp Tue Jan 14 15:23:01 2020 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -1986,6 +1986,14 @@ #endif // INCLUDE_CDS WB_END +WB_ENTRY(jboolean, WB_isC2OrJVMCIIncludedInVmBuild(JNIEnv* env)) +#if COMPILER2_OR_JVMCI + return true; +#else + return false; +#endif +WB_END + WB_ENTRY(jboolean, WB_IsJavaHeapArchiveSupported(JNIEnv* env)) return HeapShared::is_heap_object_archiving_allowed(); WB_END @@ -2192,6 +2200,12 @@ return result; WB_END +WB_ENTRY(jint, WB_GetKlassMetadataSize(JNIEnv* env, jobject wb, jclass mirror)) + Klass* k = java_lang_Class::as_Klass(JNIHandles::resolve(mirror)); + // Return size in bytes. + return k->size() * wordSize; +WB_END + #define CC (char*) static JNINativeMethod methods[] = { @@ -2411,6 +2425,7 @@ {CC"areOpenArchiveHeapObjectsMapped", CC"()Z", (void*)&WB_AreOpenArchiveHeapObjectsMapped}, {CC"isCDSIncludedInVmBuild", CC"()Z", (void*)&WB_IsCDSIncludedInVmBuild }, {CC"isJFRIncludedInVmBuild", CC"()Z", (void*)&WB_IsJFRIncludedInVmBuild }, + {CC"isC2OrJVMCIIncludedInVmBuild", CC"()Z", (void*)&WB_isC2OrJVMCIIncludedInVmBuild }, {CC"isJavaHeapArchiveSupported", CC"()Z", (void*)&WB_IsJavaHeapArchiveSupported }, {CC"cdsMemoryMappingFailed", CC"()Z", (void*)&WB_CDSMemoryMappingFailed }, @@ -2433,6 +2448,7 @@ {CC"resolvedMethodItemsCount", CC"()J", (void*)&WB_ResolvedMethodItemsCount }, {CC"protectionDomainRemovedCount", CC"()I", (void*)&WB_ProtectionDomainRemovedCount }, {CC"aotLibrariesCount", CC"()I", (void*)&WB_AotLibrariesCount }, + {CC"getKlassMetadataSize", CC"(Ljava/lang/Class;)I",(void*)&WB_GetKlassMetadataSize}, };
--- a/src/hotspot/share/runtime/safepoint.hpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/share/runtime/safepoint.hpp Tue Jan 14 15:23:01 2020 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -108,7 +108,6 @@ // JavaThreads that need to block for the safepoint will stop on the // _wait_barrier, where they can quickly be started again. static WaitBarrier* _wait_barrier; - static long _end_of_last_safepoint; // Time of last safepoint in milliseconds static julong _coalesced_vmop_count; // coalesced vmop count // For debug long safepoint
--- a/src/hotspot/share/services/diagnosticCommand.cpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/share/services/diagnosticCommand.cpp Tue Jan 14 15:23:01 2020 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -90,7 +90,6 @@ #if INCLUDE_SERVICES DCmdFactory::register_DCmdFactory(new DCmdFactoryImpl<HeapDumpDCmd>(DCmd_Source_Internal | DCmd_Source_AttachAPI, true, false)); DCmdFactory::register_DCmdFactory(new DCmdFactoryImpl<ClassHistogramDCmd>(full_export, true, false)); - DCmdFactory::register_DCmdFactory(new DCmdFactoryImpl<ClassStatsDCmd>(full_export, true, false)); DCmdFactory::register_DCmdFactory(new DCmdFactoryImpl<SystemDictionaryDCmd>(full_export, true, false)); DCmdFactory::register_DCmdFactory(new DCmdFactoryImpl<ClassHierarchyDCmd>(full_export, true, false)); DCmdFactory::register_DCmdFactory(new DCmdFactoryImpl<SymboltableDCmd>(full_export, true, false)); @@ -554,57 +553,6 @@ } } -#define DEFAULT_COLUMNS "InstBytes,KlassBytes,CpAll,annotations,MethodCount,Bytecodes,MethodAll,ROAll,RWAll,Total" -ClassStatsDCmd::ClassStatsDCmd(outputStream* output, bool heap) : - DCmdWithParser(output, heap), - _all("-all", "Show all columns", - "BOOLEAN", false, "false"), - _csv("-csv", "Print in CSV (comma-separated values) format for spreadsheets", - "BOOLEAN", false, "false"), - _help("-help", "Show meaning of all the columns", - "BOOLEAN", false, "false"), - _columns("columns", "Comma-separated list of all the columns to show. " - "If not specified, the following columns are shown: " DEFAULT_COLUMNS, - "STRING", false) { - _dcmdparser.add_dcmd_option(&_all); - _dcmdparser.add_dcmd_option(&_csv); - _dcmdparser.add_dcmd_option(&_help); - _dcmdparser.add_dcmd_argument(&_columns); -} - -void ClassStatsDCmd::execute(DCmdSource source, TRAPS) { - VM_GC_HeapInspection heapop(output(), - true /* request_full_gc */); - heapop.set_csv_format(_csv.value()); - heapop.set_print_help(_help.value()); - heapop.set_print_class_stats(true); - if (_all.value()) { - if (_columns.has_value()) { - output()->print_cr("Cannot specify -all and individual columns at the same time"); - return; - } else { - heapop.set_columns(NULL); - } - } else { - if (_columns.has_value()) { - heapop.set_columns(_columns.value()); - } else { - heapop.set_columns(DEFAULT_COLUMNS); - } - } - VMThread::execute(&heapop); -} - -int ClassStatsDCmd::num_arguments() { - ResourceMark rm; - ClassStatsDCmd* dcmd = new ClassStatsDCmd(NULL, false); - if (dcmd != NULL) { - DCmdMark mark(dcmd); - return dcmd->_dcmdparser.num_arguments(); - } else { - return 0; - } -} #endif // INCLUDE_SERVICES ThreadDumpDCmd::ThreadDumpDCmd(outputStream* output, bool heap) :
--- a/src/hotspot/share/services/diagnosticCommand.hpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/share/services/diagnosticCommand.hpp Tue Jan 14 15:23:01 2020 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -376,28 +376,6 @@ virtual void execute(DCmdSource source, TRAPS); }; -class ClassStatsDCmd : public DCmdWithParser { -protected: - DCmdArgument<bool> _all; - DCmdArgument<bool> _csv; - DCmdArgument<bool> _help; - DCmdArgument<char*> _columns; -public: - ClassStatsDCmd(outputStream* output, bool heap); - static const char* name() { - return "GC.class_stats"; - } - static const char* description() { - return "(Deprecated) Provide statistics about Java class meta data."; - } - static const char* impact() { - return "High: Depends on Java heap size and content."; - } - static int num_arguments(); - virtual void execute(DCmdSource source, TRAPS); -}; - - class ClassHierarchyDCmd : public DCmdWithParser { protected: DCmdArgument<bool> _print_interfaces; // true if inherited interfaces should be printed.
--- a/src/hotspot/share/utilities/count_leading_zeros.hpp Tue Jan 14 15:19:49 2020 -0800 +++ b/src/hotspot/share/utilities/count_leading_zeros.hpp Tue Jan 14 15:23:01 2020 -0800 @@ -108,8 +108,8 @@ template <typename T> struct CountLeadingZerosImpl<T, 8> { static unsigned doit(T v) { +#ifdef _LP64 unsigned long index; -#ifdef _LP64 _BitScanReverse64(&index, v); return 63u - index; #else
--- a/src/java.base/share/classes/java/lang/Class.java Tue Jan 14 15:19:49 2020 -0800 +++ b/src/java.base/share/classes/java/lang/Class.java Tue Jan 14 15:23:01 2020 -0800 @@ -89,16 +89,15 @@ import sun.reflect.misc.ReflectUtil; /** - * Instances of the class {@code Class} represent classes and interfaces - * in a running Java application. An enum type is a kind of class and an - * annotation type is a kind of interface. Every array also - * belongs to a class that is reflected as a {@code Class} object - * that is shared by all arrays with the same element type and number - * of dimensions. The primitive Java types ({@code boolean}, - * {@code byte}, {@code char}, {@code short}, - * {@code int}, {@code long}, {@code float}, and - * {@code double}), and the keyword {@code void} are also - * represented as {@code Class} objects. + * Instances of the class {@code Class} represent classes and + * interfaces in a running Java application. An enum type and a record + * type are kinds of class; an annotation type is a kind of + * interface. Every array also belongs to a class that is reflected as + * a {@code Class} object that is shared by all arrays with the same + * element type and number of dimensions. The primitive Java types + * ({@code boolean}, {@code byte}, {@code char}, {@code short}, {@code + * int}, {@code long}, {@code float}, and {@code double}), and the + * keyword {@code void} are also represented as {@code Class} objects. * * <p> {@code Class} has no public constructor. Instead a {@code Class} * object is constructed automatically by the Java Virtual Machine
--- a/src/java.base/share/classes/java/lang/annotation/ElementType.java Tue Jan 14 15:19:49 2020 -0800 +++ b/src/java.base/share/classes/java/lang/annotation/ElementType.java Tue Jan 14 15:23:01 2020 -0800 @@ -71,7 +71,8 @@ * @jls 4.1 The Kinds of Types and Values */ public enum ElementType { - /** Class, interface (including annotation type), or enum declaration */ + /** Class, interface (including annotation type), enum, or record + * declaration */ TYPE, /** Field declaration (includes enum constants) */
--- a/src/java.base/share/classes/java/lang/invoke/MethodHandles.java Tue Jan 14 15:19:49 2020 -0800 +++ b/src/java.base/share/classes/java/lang/invoke/MethodHandles.java Tue Jan 14 15:23:01 2020 -0800 @@ -1451,7 +1451,7 @@ * <li>If the new lookup class is in the same module as the old lookup class, * the new previous lookup class is the old previous lookup class. * <li>If the new lookup class is in a different module from the old lookup class, - * the new previous lookup class is the the old lookup class. + * the new previous lookup class is the old lookup class. *</ul> * <p> * The resulting lookup's capabilities for loading classes
--- a/src/java.base/share/classes/java/net/IDN.java Tue Jan 14 15:19:49 2020 -0800 +++ b/src/java.base/share/classes/java/net/IDN.java Tue Jan 14 15:23:01 2020 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -29,9 +29,9 @@ import java.security.AccessController; import java.security.PrivilegedAction; -import sun.net.idn.StringPrep; -import sun.net.idn.Punycode; -import sun.text.normalizer.UCharacterIterator; +import jdk.internal.icu.impl.Punycode; +import jdk.internal.icu.text.StringPrep; +import jdk.internal.icu.text.UCharacterIterator; /** * Provides methods to convert internationalized domain names (IDNs) between @@ -226,7 +226,7 @@ InputStream stream = null; try { - final String IDN_PROFILE = "uidna.spp"; + final String IDN_PROFILE = "/sun/net/idn/uidna.spp"; if (System.getSecurityManager() != null) { stream = AccessController.doPrivileged(new PrivilegedAction<>() { public InputStream run() {
--- a/src/java.base/share/classes/java/text/Bidi.java Tue Jan 14 15:19:49 2020 -0800 +++ b/src/java.base/share/classes/java/text/Bidi.java Tue Jan 14 15:23:01 2020 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -35,7 +35,7 @@ package java.text; -import sun.text.bidi.BidiBase; +import jdk.internal.icu.text.BidiBase; /** * This class implements the Unicode Bidirectional Algorithm.
--- a/src/java.base/share/classes/java/text/CollationElementIterator.java Tue Jan 14 15:19:49 2020 -0800 +++ b/src/java.base/share/classes/java/text/CollationElementIterator.java Tue Jan 14 15:23:01 2020 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1996, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1996, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -41,7 +41,7 @@ import java.lang.Character; import java.util.Vector; import sun.text.CollatorUtilities; -import sun.text.normalizer.NormalizerBase; +import jdk.internal.icu.text.NormalizerBase; /** * The {@code CollationElementIterator} class is used as an iterator
--- a/src/java.base/share/classes/java/text/Normalizer.java Tue Jan 14 15:19:49 2020 -0800 +++ b/src/java.base/share/classes/java/text/Normalizer.java Tue Jan 14 15:23:01 2020 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -37,7 +37,7 @@ package java.text; -import sun.text.normalizer.NormalizerBase; +import jdk.internal.icu.text.NormalizerBase; /** * This class provides the method {@code normalize} which transforms Unicode
--- a/src/java.base/share/classes/java/text/RBTableBuilder.java Tue Jan 14 15:19:49 2020 -0800 +++ b/src/java.base/share/classes/java/text/RBTableBuilder.java Tue Jan 14 15:23:01 2020 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -42,8 +42,7 @@ import sun.text.UCompactIntArray; import sun.text.IntHashtable; import sun.text.ComposedCharIter; -import sun.text.CollatorUtilities; -import sun.text.normalizer.NormalizerImpl; +import jdk.internal.icu.impl.NormalizerImpl; /** * This class contains all the code to parse a RuleBasedCollator pattern
--- a/src/java.base/share/classes/java/util/ArrayList.java Tue Jan 14 15:19:49 2020 -0800 +++ b/src/java.base/share/classes/java/util/ArrayList.java Tue Jan 14 15:23:01 2020 -0800 @@ -1133,7 +1133,7 @@ this.parent = parent; this.offset = parent.offset + fromIndex; this.size = toIndex - fromIndex; - this.modCount = root.modCount; + this.modCount = parent.modCount; } public E set(int index, E element) { @@ -1286,7 +1286,7 @@ return new ListIterator<E>() { int cursor = index; int lastRet = -1; - int expectedModCount = root.modCount; + int expectedModCount = SubList.this.modCount; public boolean hasNext() { return cursor != SubList.this.size; @@ -1330,7 +1330,7 @@ final Object[] es = root.elementData; if (offset + i >= es.length) throw new ConcurrentModificationException(); - for (; i < size && modCount == expectedModCount; i++) + for (; i < size && root.modCount == expectedModCount; i++) action.accept(elementAt(es, offset + i)); // update once at end to reduce heap write traffic cursor = i; @@ -1356,7 +1356,7 @@ SubList.this.remove(lastRet); cursor = lastRet; lastRet = -1; - expectedModCount = root.modCount; + expectedModCount = SubList.this.modCount; } catch (IndexOutOfBoundsException ex) { throw new ConcurrentModificationException(); } @@ -1382,7 +1382,7 @@ SubList.this.add(i, e); cursor = i + 1; lastRet = -1; - expectedModCount = root.modCount; + expectedModCount = SubList.this.modCount; } catch (IndexOutOfBoundsException ex) { throw new ConcurrentModificationException(); }
--- a/src/java.base/share/classes/java/util/ImmutableCollections.java Tue Jan 14 15:19:49 2020 -0800 +++ b/src/java.base/share/classes/java/util/ImmutableCollections.java Tue Jan 14 15:23:01 2020 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -56,9 +56,47 @@ * will vary between JVM runs. */ static final int SALT; + + /** + * For set and map iteration, we will iterate in "reverse" stochastically, + * decided at bootstrap time. + */ + private static final boolean REVERSE; static { - long nt = System.nanoTime(); - SALT = (int)((nt >>> 32) ^ nt); + long color = 0x243F_6A88_85A3_08D3L; // pi slice + long seed = System.nanoTime(); + SALT = (int)((color * seed) >> 16); // avoid LSB and MSB + REVERSE = SALT >= 0; + } + + /** + * Constants following this might be initialized from the CDS archive via + * this array. + */ + private static Object[] archivedObjects; + + private static final Object EMPTY; + + static final ListN<?> EMPTY_LIST; + + static final SetN<?> EMPTY_SET; + + static final MapN<?,?> EMPTY_MAP; + + static { + VM.initializeFromArchive(ImmutableCollections.class); + if (archivedObjects == null) { + EMPTY = new Object(); + EMPTY_LIST = new ListN<>(); + EMPTY_SET = new SetN<>(); + EMPTY_MAP = new MapN<>(); + archivedObjects = new Object[] { EMPTY, EMPTY_LIST, EMPTY_SET, EMPTY_MAP }; + } else { + EMPTY = archivedObjects[0]; + EMPTY_LIST = (ListN)archivedObjects[1]; + EMPTY_SET = (SetN)archivedObjects[2]; + EMPTY_MAP = (MapN)archivedObjects[3]; + } } /** No instances. */ @@ -386,11 +424,13 @@ private final E e0; @Stable - private final E e1; + private final Object e1; List12(E e0) { this.e0 = Objects.requireNonNull(e0); - this.e1 = null; + // Use EMPTY as a sentinel for an unused element: not using null + // enable constant folding optimizations over single-element lists + this.e1 = EMPTY; } List12(E e0, E e1) { @@ -400,7 +440,7 @@ @Override public int size() { - return e1 != null ? 2 : 1; + return e1 != EMPTY ? 2 : 1; } @Override @@ -409,11 +449,12 @@ } @Override + @SuppressWarnings("unchecked") public E get(int index) { if (index == 0) { return e0; - } else if (index == 1 && e1 != null) { - return e1; + } else if (index == 1 && e1 != EMPTY) { + return (E)e1; } throw outOfBounds(index); } @@ -425,7 +466,7 @@ @java.io.Serial private Object writeReplace() { - if (e1 == null) { + if (e1 == EMPTY) { return new CollSer(CollSer.IMM_LIST, e0); } else { return new CollSer(CollSer.IMM_LIST, e0, e1); @@ -434,7 +475,7 @@ @Override public Object[] toArray() { - if (e1 == null) { + if (e1 == EMPTY) { return new Object[] { e0 }; } else { return new Object[] { e0, e1 }; @@ -444,7 +485,7 @@ @Override @SuppressWarnings("unchecked") public <T> T[] toArray(T[] a) { - int size = e1 == null ? 1 : 2; + int size = size(); T[] array = a.length >= size ? a : (T[])Array.newInstance(a.getClass().getComponentType(), size); array[0] = (T)e0; @@ -461,16 +502,6 @@ static final class ListN<E> extends AbstractImmutableList<E> implements Serializable { - // EMPTY_LIST may be initialized from the CDS archive. - static @Stable List<?> EMPTY_LIST; - - static { - VM.initializeFromArchive(ListN.class); - if (EMPTY_LIST == null) { - EMPTY_LIST = new ListN<>(); - } - } - @Stable private final E[] elements; @@ -564,13 +595,16 @@ implements Serializable { @Stable - final E e0; + private final E e0; + @Stable - final E e1; + private final Object e1; Set12(E e0) { this.e0 = Objects.requireNonNull(e0); - this.e1 = null; + // Use EMPTY as a sentinel for an unused element: not using null + // enable constant folding optimizations over single-element sets + this.e1 = EMPTY; } Set12(E e0, E e1) { @@ -584,7 +618,7 @@ @Override public int size() { - return (e1 == null) ? 1 : 2; + return (e1 == EMPTY) ? 1 : 2; } @Override @@ -594,12 +628,12 @@ @Override public boolean contains(Object o) { - return o.equals(e0) || o.equals(e1); // implicit nullcheck of o + return o.equals(e0) || e1.equals(o); // implicit nullcheck of o } @Override public int hashCode() { - return e0.hashCode() + (e1 == null ? 0 : e1.hashCode()); + return e0.hashCode() + (e1 == EMPTY ? 0 : e1.hashCode()); } @Override @@ -613,13 +647,14 @@ } @Override + @SuppressWarnings("unchecked") public E next() { if (idx == 1) { idx = 0; - return SALT >= 0 || e1 == null ? e0 : e1; + return (REVERSE || e1 == EMPTY) ? e0 : (E)e1; } else if (idx == 2) { idx = 1; - return SALT >= 0 ? e1 : e0; + return REVERSE ? (E)e1 : e0; } else { throw new NoSuchElementException(); } @@ -634,7 +669,7 @@ @java.io.Serial private Object writeReplace() { - if (e1 == null) { + if (e1 == EMPTY) { return new CollSer(CollSer.IMM_SET, e0); } else { return new CollSer(CollSer.IMM_SET, e0, e1); @@ -643,9 +678,9 @@ @Override public Object[] toArray() { - if (e1 == null) { + if (e1 == EMPTY) { return new Object[] { e0 }; - } else if (SALT >= 0) { + } else if (REVERSE) { return new Object[] { e1, e0 }; } else { return new Object[] { e0, e1 }; @@ -655,12 +690,12 @@ @Override @SuppressWarnings("unchecked") public <T> T[] toArray(T[] a) { - int size = e1 == null ? 1 : 2; + int size = size(); T[] array = a.length >= size ? a : (T[])Array.newInstance(a.getClass().getComponentType(), size); if (size == 1) { array[0] = (T)e0; - } else if (SALT >= 0) { + } else if (REVERSE) { array[0] = (T)e1; array[1] = (T)e0; } else { @@ -684,18 +719,9 @@ static final class SetN<E> extends AbstractImmutableSet<E> implements Serializable { - // EMPTY_SET may be initialized from the CDS archive. - static @Stable Set<?> EMPTY_SET; - - static { - VM.initializeFromArchive(SetN.class); - if (EMPTY_SET == null) { - EMPTY_SET = new SetN<>(); - } - } - @Stable final E[] elements; + @Stable final int size; @@ -752,7 +778,7 @@ private int nextIndex() { int idx = this.idx; - if (SALT >= 0) { + if (REVERSE) { if (++idx >= elements.length) { idx = 0; } @@ -941,16 +967,6 @@ */ static final class MapN<K,V> extends AbstractImmutableMap<K,V> { - // EMPTY_MAP may be initialized from the CDS archive. - static @Stable Map<?,?> EMPTY_MAP; - - static { - VM.initializeFromArchive(MapN.class); - if (EMPTY_MAP == null) { - EMPTY_MAP = new MapN<>(); - } - } - @Stable final Object[] table; // pairs of key, value @@ -1058,7 +1074,7 @@ private int nextIndex() { int idx = this.idx; - if (SALT >= 0) { + if (REVERSE) { if ((idx += 2) >= table.length) { idx = 0; } @@ -1284,7 +1300,7 @@ return Set.of(array); case IMM_MAP: if (array.length == 0) { - return ImmutableCollections.MapN.EMPTY_MAP; + return ImmutableCollections.EMPTY_MAP; } else if (array.length == 2) { return new ImmutableCollections.Map1<>(array[0], array[1]); } else {
--- a/src/java.base/share/classes/java/util/List.java Tue Jan 14 15:19:49 2020 -0800 +++ b/src/java.base/share/classes/java/util/List.java Tue Jan 14 15:23:01 2020 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -789,7 +789,7 @@ */ @SuppressWarnings("unchecked") static <E> List<E> of() { - return (List<E>) ImmutableCollections.ListN.EMPTY_LIST; + return (List<E>) ImmutableCollections.EMPTY_LIST; } /** @@ -1033,7 +1033,7 @@ switch (elements.length) { // implicit null check of elements case 0: @SuppressWarnings("unchecked") - var list = (List<E>) ImmutableCollections.ListN.EMPTY_LIST; + var list = (List<E>) ImmutableCollections.EMPTY_LIST; return list; case 1: return new ImmutableCollections.List12<>(elements[0]);
--- a/src/java.base/share/classes/java/util/Map.java Tue Jan 14 15:19:49 2020 -0800 +++ b/src/java.base/share/classes/java/util/Map.java Tue Jan 14 15:23:01 2020 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -1288,7 +1288,7 @@ */ @SuppressWarnings("unchecked") static <K, V> Map<K, V> of() { - return (Map<K,V>) ImmutableCollections.MapN.EMPTY_MAP; + return (Map<K,V>) ImmutableCollections.EMPTY_MAP; } /** @@ -1606,7 +1606,7 @@ static <K, V> Map<K, V> ofEntries(Entry<? extends K, ? extends V>... entries) { if (entries.length == 0) { // implicit null check of entries array @SuppressWarnings("unchecked") - var map = (Map<K,V>) ImmutableCollections.MapN.EMPTY_MAP; + var map = (Map<K,V>) ImmutableCollections.EMPTY_MAP; return map; } else if (entries.length == 1) { // implicit null check of the array slot
--- a/src/java.base/share/classes/java/util/Set.java Tue Jan 14 15:19:49 2020 -0800 +++ b/src/java.base/share/classes/java/util/Set.java Tue Jan 14 15:23:01 2020 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -450,7 +450,7 @@ */ @SuppressWarnings("unchecked") static <E> Set<E> of() { - return (Set<E>) ImmutableCollections.SetN.EMPTY_SET; + return (Set<E>) ImmutableCollections.EMPTY_SET; } /** @@ -694,7 +694,7 @@ switch (elements.length) { // implicit null check of elements case 0: @SuppressWarnings("unchecked") - var set = (Set<E>) ImmutableCollections.SetN.EMPTY_SET; + var set = (Set<E>) ImmutableCollections.EMPTY_SET; return set; case 1: return new ImmutableCollections.Set12<>(elements[0]);
--- a/src/java.base/share/classes/java/util/jar/JarFile.java Tue Jan 14 15:19:49 2020 -0800 +++ b/src/java.base/share/classes/java/util/jar/JarFile.java Tue Jan 14 15:23:01 2020 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -157,8 +157,10 @@ private boolean jvInitialized; private boolean verify; private final Runtime.Version version; // current version - private final int versionFeature; // version.feature() + private final int versionFeature; // version.feature() private boolean isMultiRelease; // is jar multi-release? + static final ThreadLocal<Boolean> isInitializing = + ThreadLocal.withInitial(() -> Boolean.FALSE); // indicates if Class-Path attribute present private boolean hasClassPathAttribute; @@ -1031,8 +1033,13 @@ throw new RuntimeException(e); } if (jv != null && !jvInitialized) { - initializeVerifier(); - jvInitialized = true; + isInitializing.set(Boolean.TRUE); + try { + initializeVerifier(); + jvInitialized = true; + } finally { + isInitializing.set(Boolean.FALSE); + } } }
--- a/src/java.base/share/classes/java/util/jar/JavaUtilJarAccessImpl.java Tue Jan 14 15:19:49 2020 -0800 +++ b/src/java.base/share/classes/java/util/jar/JavaUtilJarAccessImpl.java Tue Jan 14 15:23:01 2020 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2002, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -68,4 +68,8 @@ public void ensureInitialization(JarFile jar) { jar.ensureInitialization(); } + + public Boolean isInitializing() { + return JarFile.isInitializing.get(); + } }
--- a/src/java.base/share/classes/jdk/internal/access/JavaUtilJarAccess.java Tue Jan 14 15:19:49 2020 -0800 +++ b/src/java.base/share/classes/jdk/internal/access/JavaUtilJarAccess.java Tue Jan 14 15:23:01 2020 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2002, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -45,4 +45,5 @@ public List<Object> getManifestDigests(JarFile jar); public Attributes getTrustedAttributes(Manifest man, String name); public void ensureInitialization(JarFile jar); + public Boolean isInitializing(); }
--- a/src/java.base/share/classes/jdk/internal/event/EventHelper.java Tue Jan 14 15:19:49 2020 -0800 +++ b/src/java.base/share/classes/jdk/internal/event/EventHelper.java Tue Jan 14 15:23:01 2020 -0800 @@ -25,6 +25,11 @@ package jdk.internal.event; +import jdk.internal.access.JavaUtilJarAccess; +import jdk.internal.access.SharedSecrets; + +import java.lang.invoke.MethodHandles; +import java.lang.invoke.VarHandle; import java.time.Duration; import java.time.Instant; import java.util.Date; @@ -37,14 +42,24 @@ public final class EventHelper { + private static final JavaUtilJarAccess JUJA = SharedSecrets.javaUtilJarAccess(); + private static volatile boolean loggingSecurity; + private static volatile System.Logger securityLogger; + private static final VarHandle LOGGER_HANDLE; + static { + try { + LOGGER_HANDLE = + MethodHandles.lookup().findStaticVarHandle( + EventHelper.class, "securityLogger", System.Logger.class); + } catch (ReflectiveOperationException e) { + throw new Error(e); + } + } private static final System.Logger.Level LOG_LEVEL = System.Logger.Level.DEBUG; // helper class used for logging security related events for now private static final String SECURITY_LOGGER_NAME = "jdk.event.security"; - private static final System.Logger SECURITY_LOGGER = - System.getLogger(SECURITY_LOGGER_NAME); - private static final boolean LOGGING_SECURITY = - SECURITY_LOGGER.isLoggable(LOG_LEVEL); + public static void logTLSHandshakeEvent(Instant start, String peerHost, @@ -52,8 +67,9 @@ String cipherSuite, String protocolVersion, long peerCertId) { + assert securityLogger != null; String prepend = getDurationString(start); - SECURITY_LOGGER.log(LOG_LEVEL, prepend + + securityLogger.log(LOG_LEVEL, prepend + " TLSHandshake: {0}:{1,number,#}, {2}, {3}, {4,number,#}", peerHost, peerPort, protocolVersion, cipherSuite, peerCertId); } @@ -61,18 +77,18 @@ public static void logSecurityPropertyEvent(String key, String value) { - if (isLoggingSecurity()) { - SECURITY_LOGGER.log(LOG_LEVEL, - "SecurityPropertyModification: key:{0}, value:{1}", key, value); - } + assert securityLogger != null; + securityLogger.log(LOG_LEVEL, + "SecurityPropertyModification: key:{0}, value:{1}", key, value); } public static void logX509ValidationEvent(int anchorCertId, int[] certIds) { + assert securityLogger != null; String codes = IntStream.of(certIds) .mapToObj(Integer::toString) .collect(Collectors.joining(", ")); - SECURITY_LOGGER.log(LOG_LEVEL, + securityLogger.log(LOG_LEVEL, "ValidationChain: {0,number,#}, {1}", anchorCertId, codes); } @@ -85,7 +101,8 @@ long certId, long beginDate, long endDate) { - SECURITY_LOGGER.log(LOG_LEVEL, "X509Certificate: Alg:{0}, Serial:{1}" + + assert securityLogger != null; + securityLogger.log(LOG_LEVEL, "X509Certificate: Alg:{0}, Serial:{1}" + ", Subject:{2}, Issuer:{3}, Key type:{4}, Length:{5,number,#}" + ", Cert Id:{6,number,#}, Valid from:{7}, Valid until:{8}", algId, serialNum, subject, issuer, keyType, length, @@ -124,6 +141,14 @@ * @return boolean indicating whether an event should be logged */ public static boolean isLoggingSecurity() { - return LOGGING_SECURITY; + // Avoid a bootstrap issue where the commitEvent attempts to + // trigger early loading of System Logger but where + // the verification process still has JarFiles locked + if (securityLogger == null && !JUJA.isInitializing()) { + LOGGER_HANDLE.compareAndSet( null, System.getLogger(SECURITY_LOGGER_NAME)); + loggingSecurity = securityLogger.isLoggable(LOG_LEVEL); + } + return loggingSecurity; } + }
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/java.base/share/classes/jdk/internal/icu/impl/BMPSet.java Tue Jan 14 15:23:01 2020 -0800 @@ -0,0 +1,527 @@ +/* + * Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + ****************************************************************************** + * + * Copyright (C) 2009-2014, International Business Machines + * Corporation and others. All Rights Reserved. + * + ****************************************************************************** + */ + +package jdk.internal.icu.impl; + +import jdk.internal.icu.text.UnicodeSet.SpanCondition; +import jdk.internal.icu.util.OutputInt; + +/** + * Helper class for frozen UnicodeSets, implements contains() and span() optimized for BMP code points. + * + * Latin-1: Look up bytes. + * 2-byte characters: Bits organized vertically. + * 3-byte characters: Use zero/one/mixed data per 64-block in U+0000..U+FFFF, with mixed for illegal ranges. + * Supplementary characters: Call contains() on the parent set. + */ +public final class BMPSet { + + /** + * One boolean ('true' or 'false') per Latin-1 character. + */ + private boolean[] latin1Contains; + + /** + * One bit per code point from U+0000..U+07FF. The bits are organized vertically; consecutive code points + * correspond to the same bit positions in consecutive table words. With code point parts lead=c{10..6} + * trail=c{5..0} it is set.contains(c)==(table7FF[trail] bit lead) + * + * Bits for 0..7F (non-shortest forms) are set to the result of contains(FFFD) for faster validity checking at + * runtime. + */ + private int[] table7FF; + + /** + * One bit per 64 BMP code points. The bits are organized vertically; consecutive 64-code point blocks + * correspond to the same bit position in consecutive table words. With code point parts lead=c{15..12} + * t1=c{11..6} test bits (lead+16) and lead in bmpBlockBits[t1]. If the upper bit is 0, then the lower bit + * indicates if contains(c) for all code points in the 64-block. If the upper bit is 1, then the block is mixed + * and set.contains(c) must be called. + * + * Bits for 0..7FF (non-shortest forms) and D800..DFFF are set to the result of contains(FFFD) for faster + * validity checking at runtime. + */ + private int[] bmpBlockBits; + + /** + * Inversion list indexes for restricted binary searches in findCodePoint(), from findCodePoint(U+0800, U+1000, + * U+2000, .., U+F000, U+10000). U+0800 is the first 3-byte-UTF-8 code point. Code points below U+0800 are + * always looked up in the bit tables. The last pair of indexes is for finding supplementary code points. + */ + private int[] list4kStarts; + + /** + * The inversion list of the parent set, for the slower contains() implementation for mixed BMP blocks and for + * supplementary code points. The list is terminated with list[listLength-1]=0x110000. + */ + private final int[] list; + private final int listLength; // length used; list may be longer to minimize reallocs + + public BMPSet(final int[] parentList, int parentListLength) { + list = parentList; + listLength = parentListLength; + latin1Contains = new boolean[0x100]; + table7FF = new int[64]; + bmpBlockBits = new int[64]; + list4kStarts = new int[18]; + + /* + * Set the list indexes for binary searches for U+0800, U+1000, U+2000, .., U+F000, U+10000. U+0800 is the + * first 3-byte-UTF-8 code point. Lower code points are looked up in the bit tables. The last pair of + * indexes is for finding supplementary code points. + */ + list4kStarts[0] = findCodePoint(0x800, 0, listLength - 1); + int i; + for (i = 1; i <= 0x10; ++i) { + list4kStarts[i] = findCodePoint(i << 12, list4kStarts[i - 1], listLength - 1); + } + list4kStarts[0x11] = listLength - 1; + + initBits(); + } + + public boolean contains(int c) { + if (c <= 0xff) { + return (latin1Contains[c]); + } else if (c <= 0x7ff) { + return ((table7FF[c & 0x3f] & (1 << (c >> 6))) != 0); + } else if (c < 0xd800 || (c >= 0xe000 && c <= 0xffff)) { + int lead = c >> 12; + int twoBits = (bmpBlockBits[(c >> 6) & 0x3f] >> lead) & 0x10001; + if (twoBits <= 1) { + // All 64 code points with the same bits 15..6 + // are either in the set or not. + return (0 != twoBits); + } else { + // Look up the code point in its 4k block of code points. + return containsSlow(c, list4kStarts[lead], list4kStarts[lead + 1]); + } + } else if (c <= 0x10ffff) { + // surrogate or supplementary code point + return containsSlow(c, list4kStarts[0xd], list4kStarts[0x11]); + } else { + // Out-of-range code points get false, consistent with long-standing + // behavior of UnicodeSet.contains(c). + return false; + } + } + + /** + * Span the initial substring for which each character c has spanCondition==contains(c). It must be + * spanCondition==0 or 1. + * + * @param start The start index + * @param outCount If not null: Receives the number of code points in the span. + * @return the limit (exclusive end) of the span + * + * NOTE: to reduce the overhead of function call to contains(c), it is manually inlined here. Check for + * sufficient length for trail unit for each surrogate pair. Handle single surrogates as surrogate code points + * as usual in ICU. + */ + public final int span(CharSequence s, int start, SpanCondition spanCondition, + OutputInt outCount) { + char c, c2; + int i = start; + int limit = s.length(); + int numSupplementary = 0; + if (SpanCondition.NOT_CONTAINED != spanCondition) { + // span + while (i < limit) { + c = s.charAt(i); + if (c <= 0xff) { + if (!latin1Contains[c]) { + break; + } + } else if (c <= 0x7ff) { + if ((table7FF[c & 0x3f] & (1 << (c >> 6))) == 0) { + break; + } + } else if (c < 0xd800 || + c >= 0xdc00 || (i + 1) == limit || (c2 = s.charAt(i + 1)) < 0xdc00 || c2 >= 0xe000) { + int lead = c >> 12; + int twoBits = (bmpBlockBits[(c >> 6) & 0x3f] >> lead) & 0x10001; + if (twoBits <= 1) { + // All 64 code points with the same bits 15..6 + // are either in the set or not. + if (twoBits == 0) { + break; + } + } else { + // Look up the code point in its 4k block of code points. + if (!containsSlow(c, list4kStarts[lead], list4kStarts[lead + 1])) { + break; + } + } + } else { + // surrogate pair + int supplementary = UCharacterProperty.getRawSupplementary(c, c2); + if (!containsSlow(supplementary, list4kStarts[0x10], list4kStarts[0x11])) { + break; + } + ++numSupplementary; + ++i; + } + ++i; + } + } else { + // span not + while (i < limit) { + c = s.charAt(i); + if (c <= 0xff) { + if (latin1Contains[c]) { + break; + } + } else if (c <= 0x7ff) { + if ((table7FF[c & 0x3f] & (1 << (c >> 6))) != 0) { + break; + } + } else if (c < 0xd800 || + c >= 0xdc00 || (i + 1) == limit || (c2 = s.charAt(i + 1)) < 0xdc00 || c2 >= 0xe000) { + int lead = c >> 12; + int twoBits = (bmpBlockBits[(c >> 6) & 0x3f] >> lead) & 0x10001; + if (twoBits <= 1) { + // All 64 code points with the same bits 15..6 + // are either in the set or not. + if (twoBits != 0) { + break; + } + } else { + // Look up the code point in its 4k block of code points. + if (containsSlow(c, list4kStarts[lead], list4kStarts[lead + 1])) { + break; + } + } + } else { + // surrogate pair + int supplementary = UCharacterProperty.getRawSupplementary(c, c2); + if (containsSlow(supplementary, list4kStarts[0x10], list4kStarts[0x11])) { + break; + } + ++numSupplementary; + ++i; + } + ++i; + } + } + if (outCount != null) { + int spanLength = i - start; + outCount.value = spanLength - numSupplementary; // number of code points + } + return i; + } + + /** + * Symmetrical with span(). + * Span the trailing substring for which each character c has spanCondition==contains(c). It must be s.length >= + * limit and spanCondition==0 or 1. + * + * @return The string index which starts the span (i.e. inclusive). + */ + public final int spanBack(CharSequence s, int limit, SpanCondition spanCondition) { + char c, c2; + + if (SpanCondition.NOT_CONTAINED != spanCondition) { + // span + for (;;) { + c = s.charAt(--limit); + if (c <= 0xff) { + if (!latin1Contains[c]) { + break; + } + } else if (c <= 0x7ff) { + if ((table7FF[c & 0x3f] & (1 << (c >> 6))) == 0) { + break; + } + } else if (c < 0xd800 || + c < 0xdc00 || 0 == limit || (c2 = s.charAt(limit - 1)) < 0xd800 || c2 >= 0xdc00) { + int lead = c >> 12; + int twoBits = (bmpBlockBits[(c >> 6) & 0x3f] >> lead) & 0x10001; + if (twoBits <= 1) { + // All 64 code points with the same bits 15..6 + // are either in the set or not. + if (twoBits == 0) { + break; + } + } else { + // Look up the code point in its 4k block of code points. + if (!containsSlow(c, list4kStarts[lead], list4kStarts[lead + 1])) { + break; + } + } + } else { + // surrogate pair + int supplementary = UCharacterProperty.getRawSupplementary(c2, c); + if (!containsSlow(supplementary, list4kStarts[0x10], list4kStarts[0x11])) { + break; + } + --limit; + } + if (0 == limit) { + return 0; + } + } + } else { + // span not + for (;;) { + c = s.charAt(--limit); + if (c <= 0xff) { + if (latin1Contains[c]) { + break; + } + } else if (c <= 0x7ff) { + if ((table7FF[c & 0x3f] & (1 << (c >> 6))) != 0) { + break; + } + } else if (c < 0xd800 || + c < 0xdc00 || 0 == limit || (c2 = s.charAt(limit - 1)) < 0xd800 || c2 >= 0xdc00) { + int lead = c >> 12; + int twoBits = (bmpBlockBits[(c >> 6) & 0x3f] >> lead) & 0x10001; + if (twoBits <= 1) { + // All 64 code points with the same bits 15..6 + // are either in the set or not. + if (twoBits != 0) { + break; + } + } else { + // Look up the code point in its 4k block of code points. + if (containsSlow(c, list4kStarts[lead], list4kStarts[lead + 1])) { + break; + } + } + } else { + // surrogate pair + int supplementary = UCharacterProperty.getRawSupplementary(c2, c); + if (containsSlow(supplementary, list4kStarts[0x10], list4kStarts[0x11])) { + break; + } + --limit; + } + if (0 == limit) { + return 0; + } + } + } + return limit + 1; + } + + /** + * Set bits in a bit rectangle in "vertical" bit organization. start<limit<=0x800 + */ + private static void set32x64Bits(int[] table, int start, int limit) { + assert (64 == table.length); + int lead = start >> 6; // Named for UTF-8 2-byte lead byte with upper 5 bits. + int trail = start & 0x3f; // Named for UTF-8 2-byte trail byte with lower 6 bits. + + // Set one bit indicating an all-one block. + int bits = 1 << lead; + if ((start + 1) == limit) { // Single-character shortcut. + table[trail] |= bits; + return; + } + + int limitLead = limit >> 6; + int limitTrail = limit & 0x3f; + + if (lead == limitLead) { + // Partial vertical bit column. + while (trail < limitTrail) { + table[trail++] |= bits; + } + } else { + // Partial vertical bit column, + // followed by a bit rectangle, + // followed by another partial vertical bit column. + if (trail > 0) { + do { + table[trail++] |= bits; + } while (trail < 64); + ++lead; + } + if (lead < limitLead) { + bits = ~((1 << lead) - 1); + if (limitLead < 0x20) { + bits &= (1 << limitLead) - 1; + } + for (trail = 0; trail < 64; ++trail) { + table[trail] |= bits; + } + } + // limit<=0x800. If limit==0x800 then limitLead=32 and limitTrail=0. + // In that case, bits=1<<limitLead == 1<<0 == 1 + // (because Java << uses only the lower 5 bits of the shift operand) + // but the bits value is not used because trail<limitTrail is already false. + bits = 1 << limitLead; + for (trail = 0; trail < limitTrail; ++trail) { + table[trail] |= bits; + } + } + } + + private void initBits() { + int start, limit; + int listIndex = 0; + + // Set latin1Contains[]. + do { + start = list[listIndex++]; + if (listIndex < listLength) { + limit = list[listIndex++]; + } else { + limit = 0x110000; + } + if (start >= 0x100) { + break; + } + do { + latin1Contains[start++] = true; + } while (start < limit && start < 0x100); + } while (limit <= 0x100); + + // Set table7FF[]. + while (start < 0x800) { + set32x64Bits(table7FF, start, limit <= 0x800 ? limit : 0x800); + if (limit > 0x800) { + start = 0x800; + break; + } + + start = list[listIndex++]; + if (listIndex < listLength) { + limit = list[listIndex++]; + } else { + limit = 0x110000; + } + } + + // Set bmpBlockBits[]. + int minStart = 0x800; + while (start < 0x10000) { + if (limit > 0x10000) { + limit = 0x10000; + } + + if (start < minStart) { + start = minStart; + } + if (start < limit) { // Else: Another range entirely in a known mixed-value block. + if (0 != (start & 0x3f)) { + // Mixed-value block of 64 code points. + start >>= 6; + bmpBlockBits[start & 0x3f] |= 0x10001 << (start >> 6); + start = (start + 1) << 6; // Round up to the next block boundary. + minStart = start; // Ignore further ranges in this block. + } + if (start < limit) { + if (start < (limit & ~0x3f)) { + // Multiple all-ones blocks of 64 code points each. + set32x64Bits(bmpBlockBits, start >> 6, limit >> 6); + } + + if (0 != (limit & 0x3f)) { + // Mixed-value block of 64 code points. + limit >>= 6; + bmpBlockBits[limit & 0x3f] |= 0x10001 << (limit >> 6); + limit = (limit + 1) << 6; // Round up to the next block boundary. + minStart = limit; // Ignore further ranges in this block. + } + } + } + + if (limit == 0x10000) { + break; + } + + start = list[listIndex++]; + if (listIndex < listLength) { + limit = list[listIndex++]; + } else { + limit = 0x110000; + } + } + } + + /** + * Same as UnicodeSet.findCodePoint(int c) except that the binary search is restricted for finding code + * points in a certain range. + * + * For restricting the search for finding in the range start..end, pass in lo=findCodePoint(start) and + * hi=findCodePoint(end) with 0<=lo<=hi<len. findCodePoint(c) defaults to lo=0 and hi=len-1. + * + * @param c + * a character in a subrange of MIN_VALUE..MAX_VALUE + * @param lo + * The lowest index to be returned. + * @param hi + * The highest index to be returned. + * @return the smallest integer i in the range lo..hi, inclusive, such that c < list[i] + */ + private int findCodePoint(int c, int lo, int hi) { + /* Examples: + findCodePoint(c) + set list[] c=0 1 3 4 7 8 + === ============== =========== + [] [110000] 0 0 0 0 0 0 + [\u0000-\u0003] [0, 4, 110000] 1 1 1 2 2 2 + [\u0004-\u0007] [4, 8, 110000] 0 0 0 1 1 2 + [:Any:] [0, 110000] 1 1 1 1 1 1 + */ + + // Return the smallest i such that c < list[i]. Assume + // list[len - 1] == HIGH and that c is legal (0..HIGH-1). + if (c < list[lo]) + return lo; + // High runner test. c is often after the last range, so an + // initial check for this condition pays off. + if (lo >= hi || c >= list[hi - 1]) + return hi; + // invariant: c >= list[lo] + // invariant: c < list[hi] + for (;;) { + int i = (lo + hi) >>> 1; + if (i == lo) { + break; // Found! + } else if (c < list[i]) { + hi = i; + } else { + lo = i; + } + } + return hi; + } + + private final boolean containsSlow(int c, int lo, int hi) { + return (0 != (findCodePoint(c, lo, hi) & 1)); + } +} +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/java.base/share/classes/jdk/internal/icu/impl/CharTrie.java Tue Jan 14 15:23:01 2020 -0800 @@ -0,0 +1,177 @@ +/* + * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + ****************************************************************************** + * Copyright (C) 1996-2014, International Business Machines Corporation and + * others. All Rights Reserved. + ****************************************************************************** + */ + +package jdk.internal.icu.impl; + +import jdk.internal.icu.text.UTF16; + +import java.io.DataInputStream; +import java.io.InputStream; +import java.io.IOException; + +/** + * Trie implementation which stores data in char, 16 bits. + * @author synwee + * @see com.ibm.icu.impl.Trie + * @since release 2.1, Jan 01 2002 + */ + + // note that i need to handle the block calculations later, since chartrie + // in icu4c uses the same index array. +public class CharTrie extends Trie +{ + // public constructors --------------------------------------------- + + /** + * <p>Creates a new Trie with the settings for the trie data.</p> + * <p>Unserialize the 32-bit-aligned input stream and use the data for the + * trie.</p> + * @param inputStream file input stream to a ICU data file, containing + * the trie + * @param dataManipulate object which provides methods to parse the char + * data + * @throws IOException thrown when data reading fails + * @draft 2.1 + */ + public CharTrie(InputStream inputStream, + DataManipulate dataManipulate) throws IOException + { + super(inputStream, dataManipulate); + + if (!isCharTrie()) { + throw new IllegalArgumentException( + "Data given does not belong to a char trie."); + } + } + + // public methods -------------------------------------------------- + + /** + * Gets the value associated with the codepoint. + * If no value is associated with the codepoint, a default value will be + * returned. + * @param ch codepoint + * @return offset to data + */ + public final char getCodePointValue(int ch) + { + int offset; + + // fastpath for U+0000..U+D7FF + if(0 <= ch && ch < UTF16.LEAD_SURROGATE_MIN_VALUE) { + // copy of getRawOffset() + offset = (m_index_[ch >> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_) + + (ch & INDEX_STAGE_3_MASK_); + return m_data_[offset]; + } + + // handle U+D800..U+10FFFF + offset = getCodePointOffset(ch); + + // return -1 if there is an error, in this case we return the default + // value: m_initialValue_ + return (offset >= 0) ? m_data_[offset] : m_initialValue_; + } + + /** + * Gets the value to the data which this lead surrogate character points + * to. + * Returned data may contain folding offset information for the next + * trailing surrogate character. + * This method does not guarantee correct results for trail surrogates. + * @param ch lead surrogate character + * @return data value + */ + public final char getLeadValue(char ch) + { + return m_data_[getLeadOffset(ch)]; + } + + // protected methods ----------------------------------------------- + + /** + * <p>Parses the input stream and stores its trie content into a index and + * data array</p> + * @param inputStream data input stream containing trie data + * @exception IOException thrown when data reading fails + */ + protected final void unserialize(InputStream inputStream) + throws IOException + { + DataInputStream input = new DataInputStream(inputStream); + int indexDataLength = m_dataOffset_ + m_dataLength_; + m_index_ = new char[indexDataLength]; + for (int i = 0; i < indexDataLength; i ++) { + m_index_[i] = input.readChar(); + } + m_data_ = m_index_; + m_initialValue_ = m_data_[m_dataOffset_]; + } + + /** + * Gets the offset to the data which the surrogate pair points to. + * @param lead lead surrogate + * @param trail trailing surrogate + * @return offset to data + * @draft 2.1 + */ + protected final int getSurrogateOffset(char lead, char trail) + { + if (m_dataManipulate_ == null) { + throw new NullPointerException( + "The field DataManipulate in this Trie is null"); + } + + // get fold position for the next trail surrogate + int offset = m_dataManipulate_.getFoldingOffset(getLeadValue(lead)); + + // get the real data from the folded lead/trail units + if (offset > 0) { + return getRawOffset(offset, (char)(trail & SURROGATE_MASK_)); + } + + // return -1 if there is an error, in this case we return the default + // value: m_initialValue_ + return -1; + } + + // private data members -------------------------------------------- + + /** + * Default value + */ + private char m_initialValue_; + /** + * Array of char data + */ + private char m_data_[]; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/java.base/share/classes/jdk/internal/icu/impl/CharacterIteratorWrapper.java Tue Jan 14 15:23:01 2020 -0800 @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + ******************************************************************************* + * (C) Copyright IBM Corp. 1996-2005 - All Rights Reserved * + * * + * The original version of this source code and documentation is copyrighted * + * and owned by IBM, These materials are provided under terms of a License * + * Agreement between IBM and Sun. This technology is protected by multiple * + * US and International patents. This notice and attribution to IBM may not * + * to removed. * + ******************************************************************************* + */ + +package jdk.internal.icu.impl; + +import java.text.CharacterIterator; + +import jdk.internal.icu.text.UCharacterIterator; + +/** + * This class is a wrapper around CharacterIterator and implements the + * UCharacterIterator protocol + * @author ram + */ + +public class CharacterIteratorWrapper extends UCharacterIterator { + + private CharacterIterator iterator; + + public CharacterIteratorWrapper(CharacterIterator iter){ + if(iter==null){ + throw new IllegalArgumentException(); + } + iterator = iter; + } + + /** + * @see UCharacterIterator#current() + */ + public int current() { + int c = iterator.current(); + if(c==CharacterIterator.DONE){ + return DONE; + } + return c; + } + + /** + * @see UCharacterIterator#getLength() + */ + public int getLength() { + return (iterator.getEndIndex() - iterator.getBeginIndex()); + } + + /** + * @see UCharacterIterator#getIndex() + */ + public int getIndex() { + return iterator.getIndex(); + } + + /** + * @see UCharacterIterator#next() + */ + public int next() { + int i = iterator.current(); + iterator.next(); + if(i==CharacterIterator.DONE){ + return DONE; + } + return i; + } + + /** + * @see UCharacterIterator#previous() + */ + public int previous() { + int i = iterator.previous(); + if(i==CharacterIterator.DONE){ + return DONE; + } + return i; + } + + /** + * @see UCharacterIterator#setIndex(int) + */ + public void setIndex(int index) { + iterator.setIndex(index); + } + + /** + * @see UCharacterIterator#getText(char[]) + */ + public int getText(char[] fillIn, int offset){ + int length =iterator.getEndIndex() - iterator.getBeginIndex(); + int currentIndex = iterator.getIndex(); + if(offset < 0 || offset + length > fillIn.length){ + throw new IndexOutOfBoundsException(Integer.toString(length)); + } + + for (char ch = iterator.first(); ch != CharacterIterator.DONE; ch = iterator.next()) { + fillIn[offset++] = ch; + } + iterator.setIndex(currentIndex); + + return length; + } + + /** + * Creates a clone of this iterator. Clones the underlying character iterator. + * @see UCharacterIterator#clone() + */ + public Object clone(){ + try { + CharacterIteratorWrapper result = (CharacterIteratorWrapper) super.clone(); + result.iterator = (CharacterIterator)this.iterator.clone(); + return result; + } catch (CloneNotSupportedException e) { + return null; // only invoked if bad underlying character iterator + } + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/java.base/share/classes/jdk/internal/icu/impl/ICUBinary.java Tue Jan 14 15:23:01 2020 -0800 @@ -0,0 +1,324 @@ +/* + * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + ******************************************************************************* + * Copyright (C) 1996-2014, International Business Machines Corporation and + * others. All Rights Reserved. + ******************************************************************************* + */ + +package jdk.internal.icu.impl; + +import java.io.DataInputStream; +import java.io.InputStream; +import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.util.Arrays; +import java.security.AccessController; +import java.security.PrivilegedAction; + +import jdk.internal.icu.util.VersionInfo; + +public final class ICUBinary { + + private static final class IsAcceptable implements Authenticate { + @Override + public boolean isDataVersionAcceptable(byte version[]) { + return version[0] == 1; + } + } + + // public inner interface ------------------------------------------------ + + /** + * Special interface for data authentication + */ + public static interface Authenticate + { + /** + * Method used in ICUBinary.readHeader() to provide data format + * authentication. + * @param version version of the current data + * @return true if dataformat is an acceptable version, false otherwise + */ + public boolean isDataVersionAcceptable(byte version[]); + } + + // public methods -------------------------------------------------------- + + /** + * Loads an ICU binary data file and returns it as a ByteBuffer. + * The buffer contents is normally read-only, but its position etc. can be modified. + * + * @param itemPath Relative ICU data item path, for example "root.res" or "coll/ucadata.icu". + * @return The data as a read-only ByteBuffer. + */ + public static ByteBuffer getRequiredData(String itemPath) { + final Class<ICUBinary> root = ICUBinary.class; + + try (InputStream is = AccessController.doPrivileged(new PrivilegedAction<InputStream>() { + public InputStream run() { + return root.getResourceAsStream(itemPath); + } + })) { + + // is.available() may return 0, or 1, or the total number of bytes in the stream, + // or some other number. + // Do not try to use is.available() == 0 to find the end of the stream! + byte[] bytes; + int avail = is.available(); + if (avail > 32) { + // There are more bytes available than just the ICU data header length. + // With luck, it is the total number of bytes. + bytes = new byte[avail]; + } else { + bytes = new byte[128]; // empty .res files are even smaller + } + // Call is.read(...) until one returns a negative value. + int length = 0; + for(;;) { + if (length < bytes.length) { + int numRead = is.read(bytes, length, bytes.length - length); + if (numRead < 0) { + break; // end of stream + } + length += numRead; + } else { + // See if we are at the end of the stream before we grow the array. + int nextByte = is.read(); + if (nextByte < 0) { + break; + } + int capacity = 2 * bytes.length; + if (capacity < 128) { + capacity = 128; + } else if (capacity < 0x4000) { + capacity *= 2; // Grow faster until we reach 16kB. + } + bytes = Arrays.copyOf(bytes, capacity); + bytes[length++] = (byte) nextByte; + } + } + return ByteBuffer.wrap(bytes, 0, length); + } + catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + /** + * Same as readHeader(), but returns a VersionInfo rather than a compact int. + */ + public static VersionInfo readHeaderAndDataVersion(ByteBuffer bytes, + int dataFormat, + Authenticate authenticate) + throws IOException { + return getVersionInfoFromCompactInt(readHeader(bytes, dataFormat, authenticate)); + } + + private static final byte BIG_ENDIAN_ = 1; + public static final byte[] readHeader(InputStream inputStream, + byte dataFormatIDExpected[], + Authenticate authenticate) + throws IOException + { + DataInputStream input = new DataInputStream(inputStream); + char headersize = input.readChar(); + int readcount = 2; + //reading the header format + byte magic1 = input.readByte(); + readcount ++; + byte magic2 = input.readByte(); + readcount ++; + if (magic1 != MAGIC1 || magic2 != MAGIC2) { + throw new IOException(MAGIC_NUMBER_AUTHENTICATION_FAILED_); + } + + input.readChar(); // reading size + readcount += 2; + input.readChar(); // reading reserved word + readcount += 2; + byte bigendian = input.readByte(); + readcount ++; + byte charset = input.readByte(); + readcount ++; + byte charsize = input.readByte(); + readcount ++; + input.readByte(); // reading reserved byte + readcount ++; + + byte dataFormatID[] = new byte[4]; + input.readFully(dataFormatID); + readcount += 4; + byte dataVersion[] = new byte[4]; + input.readFully(dataVersion); + readcount += 4; + byte unicodeVersion[] = new byte[4]; + input.readFully(unicodeVersion); + readcount += 4; + if (headersize < readcount) { + throw new IOException("Internal Error: Header size error"); + } + input.skipBytes(headersize - readcount); + + if (bigendian != BIG_ENDIAN_ || charset != CHAR_SET_ + || charsize != CHAR_SIZE_ + || !Arrays.equals(dataFormatIDExpected, dataFormatID) + || (authenticate != null + && !authenticate.isDataVersionAcceptable(dataVersion))) { + throw new IOException(HEADER_AUTHENTICATION_FAILED_); + } + return unicodeVersion; + } + + /** + * Reads an ICU data header, checks the data format, and returns the data version. + * + * <p>Assumes that the ByteBuffer position is 0 on input. + * The buffer byte order is set according to the data. + * The buffer position is advanced past the header (including UDataInfo and comment). + * + * <p>See C++ ucmndata.h and unicode/udata.h. + * + * @return dataVersion + * @throws IOException if this is not a valid ICU data item of the expected dataFormat + */ + public static int readHeader(ByteBuffer bytes, int dataFormat, Authenticate authenticate) + throws IOException { + assert bytes.position() == 0; + byte magic1 = bytes.get(2); + byte magic2 = bytes.get(3); + if (magic1 != MAGIC1 || magic2 != MAGIC2) { + throw new IOException(MAGIC_NUMBER_AUTHENTICATION_FAILED_); + } + + byte isBigEndian = bytes.get(8); + byte charsetFamily = bytes.get(9); + byte sizeofUChar = bytes.get(10); + if (isBigEndian < 0 || 1 < isBigEndian || + charsetFamily != CHAR_SET_ || sizeofUChar != CHAR_SIZE_) { + throw new IOException(HEADER_AUTHENTICATION_FAILED_); + } + bytes.order(isBigEndian != 0 ? ByteOrder.BIG_ENDIAN : ByteOrder.LITTLE_ENDIAN); + + int headerSize = bytes.getChar(0); + int sizeofUDataInfo = bytes.getChar(4); + if (sizeofUDataInfo < 20 || headerSize < (sizeofUDataInfo + 4)) { + throw new IOException("Internal Error: Header size error"); + } + // TODO: Change Authenticate to take int major, int minor, int milli, int micro + // to avoid array allocation. + byte[] formatVersion = new byte[] { + bytes.get(16), bytes.get(17), bytes.get(18), bytes.get(19) + }; + if (bytes.get(12) != (byte)(dataFormat >> 24) || + bytes.get(13) != (byte)(dataFormat >> 16) || + bytes.get(14) != (byte)(dataFormat >> 8) || + bytes.get(15) != (byte)dataFormat || + (authenticate != null && !authenticate.isDataVersionAcceptable(formatVersion))) { + throw new IOException(HEADER_AUTHENTICATION_FAILED_ + + String.format("; data format %02x%02x%02x%02x, format version %d.%d.%d.%d", + bytes.get(12), bytes.get(13), bytes.get(14), bytes.get(15), + formatVersion[0] & 0xff, formatVersion[1] & 0xff, + formatVersion[2] & 0xff, formatVersion[3] & 0xff)); + } + + bytes.position(headerSize); + return // dataVersion + ((int)bytes.get(20) << 24) | + ((bytes.get(21) & 0xff) << 16) | + ((bytes.get(22) & 0xff) << 8) | + (bytes.get(23) & 0xff); + } + + public static void skipBytes(ByteBuffer bytes, int skipLength) { + if (skipLength > 0) { + bytes.position(bytes.position() + skipLength); + } + } + + public static byte[] getBytes(ByteBuffer bytes, int length, int additionalSkipLength) { + byte[] dest = new byte[length]; + bytes.get(dest); + if (additionalSkipLength > 0) { + skipBytes(bytes, additionalSkipLength); + } + return dest; + } + + public static String getString(ByteBuffer bytes, int length, int additionalSkipLength) { + CharSequence cs = bytes.asCharBuffer(); + String s = cs.subSequence(0, length).toString(); + skipBytes(bytes, length * 2 + additionalSkipLength); + return s; + } + + public static char[] getChars(ByteBuffer bytes, int length, int additionalSkipLength) { + char[] dest = new char[length]; + bytes.asCharBuffer().get(dest); + skipBytes(bytes, length * 2 + additionalSkipLength); + return dest; + } + + public static int[] getInts(ByteBuffer bytes, int length, int additionalSkipLength) { + int[] dest = new int[length]; + bytes.asIntBuffer().get(dest); + skipBytes(bytes, length * 4 + additionalSkipLength); + return dest; + } + + /** + * Returns a VersionInfo for the bytes in the compact version integer. + */ + public static VersionInfo getVersionInfoFromCompactInt(int version) { + return VersionInfo.getInstance( + version >>> 24, (version >> 16) & 0xff, (version >> 8) & 0xff, version & 0xff); + } + + // private variables ------------------------------------------------- + + /** + * Magic numbers to authenticate the data file + */ + private static final byte MAGIC1 = (byte)0xda; + private static final byte MAGIC2 = (byte)0x27; + + /** + * File format authentication values + */ + private static final byte CHAR_SET_ = 0; + private static final byte CHAR_SIZE_ = 2; + + /** + * Error messages + */ + private static final String MAGIC_NUMBER_AUTHENTICATION_FAILED_ = + "ICUBinary data file error: Magic number authentication failed"; + private static final String HEADER_AUTHENTICATION_FAILED_ = + "ICUBinary data file error: Header authentication failed"; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/java.base/share/classes/jdk/internal/icu/impl/Norm2AllModes.java Tue Jan 14 15:23:01 2020 -0800 @@ -0,0 +1,292 @@ +/* + * Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + ******************************************************************************* + * Copyright (C) 2009-2014, International Business Machines + * Corporation and others. All Rights Reserved. + ******************************************************************************* + */ + +package jdk.internal.icu.impl; + +import java.io.IOException; + +import jdk.internal.icu.text.Normalizer2; +import jdk.internal.icu.util.VersionInfo; + +public final class Norm2AllModes { + // Public API dispatch via Normalizer2 subclasses -------------------------- *** + + // Normalizer2 implementation for the old UNORM_NONE. + public static final class NoopNormalizer2 extends Normalizer2 { + @Override + public StringBuilder normalize(CharSequence src, StringBuilder dest) { + if(dest!=src) { + dest.setLength(0); + return dest.append(src); + } else { + throw new IllegalArgumentException(); + } + } + + @Override + public Appendable normalize(CharSequence src, Appendable dest) { + if(dest!=src) { + try { + return dest.append(src); + } catch(IOException e) { + throw new InternalError(e.toString(), e); + } + } else { + throw new IllegalArgumentException(); + } + } + + @Override + public StringBuilder normalizeSecondAndAppend(StringBuilder first, CharSequence second) { + if(first!=second) { + return first.append(second); + } else { + throw new IllegalArgumentException(); + } + } + + @Override + public StringBuilder append(StringBuilder first, CharSequence second) { + if(first!=second) { + return first.append(second); + } else { + throw new IllegalArgumentException(); + } + } + + @Override + public String getDecomposition(int c) { + return null; + } + + // No need to override the default getRawDecomposition(). + @Override + public boolean isNormalized(CharSequence s) { return true; } + + @Override + public int spanQuickCheckYes(CharSequence s) { return s.length(); } + + @Override + public boolean hasBoundaryBefore(int c) { return true; } + } + + // Intermediate class: + // Has NormalizerImpl and does boilerplate argument checking and setup. + public abstract static class Normalizer2WithImpl extends Normalizer2 { + public Normalizer2WithImpl(NormalizerImpl ni) { + impl=ni; + } + + // normalize + @Override + public StringBuilder normalize(CharSequence src, StringBuilder dest) { + if(dest==src) { + throw new IllegalArgumentException(); + } + dest.setLength(0); + normalize(src, new NormalizerImpl.ReorderingBuffer(impl, dest, src.length())); + return dest; + } + + @Override + public Appendable normalize(CharSequence src, Appendable dest) { + if(dest==src) { + throw new IllegalArgumentException(); + } + NormalizerImpl.ReorderingBuffer buffer= + new NormalizerImpl.ReorderingBuffer(impl, dest, src.length()); + normalize(src, buffer); + buffer.flush(); + return dest; + } + + protected abstract void normalize(CharSequence src, NormalizerImpl.ReorderingBuffer buffer); + + // normalize and append + @Override + public StringBuilder normalizeSecondAndAppend(StringBuilder first, CharSequence second) { + return normalizeSecondAndAppend(first, second, true); + } + + @Override + public StringBuilder append(StringBuilder first, CharSequence second) { + return normalizeSecondAndAppend(first, second, false); + } + + public StringBuilder normalizeSecondAndAppend( + StringBuilder first, CharSequence second, boolean doNormalize) { + if(first==second) { + throw new IllegalArgumentException(); + } + normalizeAndAppend( + second, doNormalize, + new NormalizerImpl.ReorderingBuffer(impl, first, first.length()+second.length())); + return first; + } + + protected abstract void normalizeAndAppend( + CharSequence src, boolean doNormalize, NormalizerImpl.ReorderingBuffer buffer); + + @Override + public String getDecomposition(int c) { + return impl.getDecomposition(c); + } + + @Override + public int getCombiningClass(int c) { + return impl.getCC(impl.getNorm16(c)); + } + + // quick checks + @Override + public boolean isNormalized(CharSequence s) { + return s.length()==spanQuickCheckYes(s); + } + + public final NormalizerImpl impl; + } + + public static final class DecomposeNormalizer2 extends Normalizer2WithImpl { + public DecomposeNormalizer2(NormalizerImpl ni) { + super(ni); + } + + @Override + protected void normalize(CharSequence src, NormalizerImpl.ReorderingBuffer buffer) { + impl.decompose(src, 0, src.length(), buffer); + } + + @Override + protected void normalizeAndAppend( + CharSequence src, boolean doNormalize, NormalizerImpl.ReorderingBuffer buffer) { + impl.decomposeAndAppend(src, doNormalize, buffer); + } + + @Override + public int spanQuickCheckYes(CharSequence s) { + return impl.decompose(s, 0, s.length(), null); + } + + @Override + public boolean hasBoundaryBefore(int c) { return impl.hasDecompBoundaryBefore(c); } + } + + public static final class ComposeNormalizer2 extends Normalizer2WithImpl { + public ComposeNormalizer2(NormalizerImpl ni, boolean fcc) { + super(ni); + onlyContiguous=fcc; + } + + @Override + protected void normalize(CharSequence src, NormalizerImpl.ReorderingBuffer buffer) { + impl.compose(src, 0, src.length(), onlyContiguous, true, buffer); + } + + @Override + protected void normalizeAndAppend( + CharSequence src, boolean doNormalize, NormalizerImpl.ReorderingBuffer buffer) { + impl.composeAndAppend(src, doNormalize, onlyContiguous, buffer); + } + + @Override + public boolean isNormalized(CharSequence s) { + // 5: small destCapacity for substring normalization + return impl.compose(s, 0, s.length(), + onlyContiguous, false, + new NormalizerImpl.ReorderingBuffer(impl, new StringBuilder(), 5)); + } + + @Override + public int spanQuickCheckYes(CharSequence s) { + return impl.composeQuickCheck(s, 0, s.length(), onlyContiguous, true)>>>1; + } + + @Override + public boolean hasBoundaryBefore(int c) { return impl.hasCompBoundaryBefore(c); } + + private final boolean onlyContiguous; + } + + // instance cache ---------------------------------------------------------- *** + + private Norm2AllModes(NormalizerImpl ni) { + impl=ni; + comp=new ComposeNormalizer2(ni, false); + decomp=new DecomposeNormalizer2(ni); + } + + public final NormalizerImpl impl; + public final ComposeNormalizer2 comp; + public final DecomposeNormalizer2 decomp; + + private static Norm2AllModes getInstanceFromSingleton(Norm2AllModesSingleton singleton) { + if(singleton.exception!=null) { + throw singleton.exception; + } + return singleton.allModes; + } + + public static Norm2AllModes getNFCInstance() { + return getInstanceFromSingleton(NFCSingleton.INSTANCE); + } + + public static Norm2AllModes getNFKCInstance() { + return getInstanceFromSingleton(NFKCSingleton.INSTANCE); + } + + public static final NoopNormalizer2 NOOP_NORMALIZER2=new NoopNormalizer2(); + + private static final class Norm2AllModesSingleton { + private Norm2AllModesSingleton(String name) { + try { + @SuppressWarnings("deprecation") + String DATA_FILE_NAME = "/jdk/internal/icu/impl/data/icudt" + + VersionInfo.ICU_DATA_VERSION_PATH + "/" + name + ".nrm"; + NormalizerImpl impl=new NormalizerImpl().load(DATA_FILE_NAME); + allModes=new Norm2AllModes(impl); + } catch (RuntimeException e) { + exception=e; + } + } + + private Norm2AllModes allModes; + private RuntimeException exception; + } + + private static final class NFCSingleton { + private static final Norm2AllModesSingleton INSTANCE=new Norm2AllModesSingleton("nfc"); + } + + private static final class NFKCSingleton { + private static final Norm2AllModesSingleton INSTANCE=new Norm2AllModesSingleton("nfkc"); + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/java.base/share/classes/jdk/internal/icu/impl/NormalizerImpl.java Tue Jan 14 15:23:01 2020 -0800 @@ -0,0 +1,2193 @@ +/* + * Copyright (c) 2009, 2020, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + ******************************************************************************* + * Copyright (C) 2009-2014, International Business Machines + * Corporation and others. All Rights Reserved. + ******************************************************************************* + */ +package jdk.internal.icu.impl; + +import java.io.IOException; +import java.nio.ByteBuffer; + +import jdk.internal.icu.lang.UCharacter; +import jdk.internal.icu.text.Normalizer2; +import jdk.internal.icu.text.UTF16; +import jdk.internal.icu.util.CodePointTrie; +import jdk.internal.icu.util.VersionInfo; + +// Original filename in ICU4J: Normalizer2Impl.java +public final class NormalizerImpl { + public static final class Hangul { + /* Korean Hangul and Jamo constants */ + public static final int JAMO_L_BASE=0x1100; /* "lead" jamo */ + public static final int JAMO_V_BASE=0x1161; /* "vowel" jamo */ + public static final int JAMO_T_BASE=0x11a7; /* "trail" jamo */ + + public static final int HANGUL_BASE=0xac00; + public static final int HANGUL_END=0xd7a3; + + public static final int JAMO_L_COUNT=19; + public static final int JAMO_V_COUNT=21; + public static final int JAMO_T_COUNT=28; + + public static final int HANGUL_COUNT=JAMO_L_COUNT*JAMO_V_COUNT*JAMO_T_COUNT; + public static final int HANGUL_LIMIT=HANGUL_BASE+HANGUL_COUNT; + + public static boolean isHangul(int c) { + return HANGUL_BASE<=c && c<HANGUL_LIMIT; + } + public static boolean isHangulLV(int c) { + c-=HANGUL_BASE; + return 0<=c && c<HANGUL_COUNT && c%JAMO_T_COUNT==0; + } + + /** + * Decomposes c, which must be a Hangul syllable, into buffer + * and returns the length of the decomposition (2 or 3). + */ + public static int decompose(int c, Appendable buffer) { + try { + c-=HANGUL_BASE; + int c2=c%JAMO_T_COUNT; + c/=JAMO_T_COUNT; + buffer.append((char)(JAMO_L_BASE+c/JAMO_V_COUNT)); + buffer.append((char)(JAMO_V_BASE+c%JAMO_V_COUNT)); + if(c2==0) { + return 2; + } else { + buffer.append((char)(JAMO_T_BASE+c2)); + return 3; + } + } catch(IOException e) { + throw new InternalError(e); + } + } + } + + /** + * Writable buffer that takes care of canonical ordering. + * Its Appendable methods behave like the C++ implementation's + * appendZeroCC() methods. + * <p> + * If dest is a StringBuilder, then the buffer writes directly to it. + * Otherwise, the buffer maintains a StringBuilder for intermediate text segments + * until no further changes are necessary and whole segments are appended. + * append() methods that take combining-class values always write to the StringBuilder. + * Other append() methods flush and append to the Appendable. + */ + public static final class ReorderingBuffer implements Appendable { + public ReorderingBuffer(NormalizerImpl ni, Appendable dest, int destCapacity) { + impl=ni; + app=dest; + if (app instanceof StringBuilder) { + appIsStringBuilder=true; + str=(StringBuilder)dest; + // In Java, the constructor subsumes public void init(int destCapacity) + str.ensureCapacity(destCapacity); + reorderStart=0; + if(str.length()==0) { + lastCC=0; + } else { + setIterator(); + lastCC=previousCC(); + // Set reorderStart after the last code point with cc<=1 if there is one. + if(lastCC>1) { + while(previousCC()>1) {} + } + reorderStart=codePointLimit; + } + } else { + appIsStringBuilder=false; + str=new StringBuilder(); + reorderStart=0; + lastCC=0; + } + } + + public boolean isEmpty() { return str.length()==0; } + public int length() { return str.length(); } + public int getLastCC() { return lastCC; } + + public StringBuilder getStringBuilder() { return str; } + + public boolean equals(CharSequence s, int start, int limit) { + return UTF16Plus.equal(str, 0, str.length(), s, start, limit); + } + + public void append(int c, int cc) { + if(lastCC<=cc || cc==0) { + str.appendCodePoint(c); + lastCC=cc; + if(cc<=1) { + reorderStart=str.length(); + } + } else { + insert(c, cc); + } + } + public void append(CharSequence s, int start, int limit, boolean isNFD, + int leadCC, int trailCC) { + if(start==limit) { + return; + } + if(lastCC<=leadCC || leadCC==0) { + if(trailCC<=1) { + reorderStart=str.length()+(limit-start); + } else if(leadCC<=1) { + reorderStart=str.length()+1; // Ok if not a code point boundary. + } + str.append(s, start, limit); + lastCC=trailCC; + } else { + int c=Character.codePointAt(s, start); + start+=Character.charCount(c); + insert(c, leadCC); // insert first code point + while(start<limit) { + c=Character.codePointAt(s, start); + start+=Character.charCount(c); + if(start<limit) { + if (isNFD) { + leadCC = getCCFromYesOrMaybe(impl.getNorm16(c)); + } else { + leadCC = impl.getCC(impl.getNorm16(c)); + } + } else { + leadCC=trailCC; + } + append(c, leadCC); + } + } + } + // The following append() methods work like C++ appendZeroCC(). + // They assume that the cc or trailCC of their input is 0. + // Most of them implement Appendable interface methods. + @Override + public ReorderingBuffer append(char c) { + str.append(c); + lastCC=0; + reorderStart=str.length(); + return this; + } + public void appendZeroCC(int c) { + str.appendCodePoint(c); + lastCC=0; + reorderStart=str.length(); + } + @Override + public ReorderingBuffer append(CharSequence s) { + if(s.length()!=0) { + str.append(s); + lastCC=0; + reorderStart=str.length(); + } + return this; + } + @Override + public ReorderingBuffer append(CharSequence s, int start, int limit) { + if(start!=limit) { + str.append(s, start, limit); + lastCC=0; + reorderStart=str.length(); + } + return this; + } + /** + * Flushes from the intermediate StringBuilder to the Appendable, + * if they are different objects. + * Used after recomposition. + * Must be called at the end when writing to a non-StringBuilder Appendable. + */ + public void flush() { + if(appIsStringBuilder) { + reorderStart=str.length(); + } else { + try { + app.append(str); + str.setLength(0); + reorderStart=0; + } catch(IOException e) { + throw new InternalError(e); // Avoid declaring "throws IOException". + } + } + lastCC=0; + } + /** + * Flushes from the intermediate StringBuilder to the Appendable, + * if they are different objects. + * Then appends the new text to the Appendable or StringBuilder. + * Normally used after quick check loops find a non-empty sequence. + */ + public ReorderingBuffer flushAndAppendZeroCC(CharSequence s, int start, int limit) { + if(appIsStringBuilder) { + str.append(s, start, limit); + reorderStart=str.length(); + } else { + try { + app.append(str).append(s, start, limit); + str.setLength(0); + reorderStart=0; + } catch(IOException e) { + throw new InternalError(e); // Avoid declaring "throws IOException". + } + } + lastCC=0; + return this; + } + public void remove() { + str.setLength(0); + lastCC=0; + reorderStart=0; + } + public void removeSuffix(int suffixLength) { + int oldLength=str.length(); + str.delete(oldLength-suffixLength, oldLength); + lastCC=0; + reorderStart=str.length(); + } + + // Inserts c somewhere before the last character. + // Requires 0<cc<lastCC which implies reorderStart<limit. + private void insert(int c, int cc) { + for(setIterator(), skipPrevious(); previousCC()>cc;) {} + // insert c at codePointLimit, after the character with prevCC<=cc + if(c<=0xffff) { + str.insert(codePointLimit, (char)c); + if(cc<=1) { + reorderStart=codePointLimit+1; + } + } else { + str.insert(codePointLimit, Character.toChars(c)); + if(cc<=1) { + reorderStart=codePointLimit+2; + } + } + } + + private final NormalizerImpl impl; + private final Appendable app; + private final StringBuilder str; + private final boolean appIsStringBuilder; + private int reorderStart; + private int lastCC; + + // private backward iterator + private void setIterator() { codePointStart=str.length(); } + private void skipPrevious() { // Requires 0<codePointStart. + codePointLimit=codePointStart; + codePointStart=str.offsetByCodePoints(codePointStart, -1); + } + private int previousCC() { // Returns 0 if there is no previous character. + codePointLimit=codePointStart; + if(reorderStart>=codePointStart) { + return 0; + } + int c=str.codePointBefore(codePointStart); + codePointStart-=Character.charCount(c); + return impl.getCCFromYesOrMaybeCP(c); + } + private int codePointStart, codePointLimit; + } + + // TODO: Propose as public API on the UTF16 class. + // TODO: Propose widening UTF16 methods that take char to take int. + // TODO: Propose widening UTF16 methods that take String to take CharSequence. + public static final class UTF16Plus { + /** + * Is this code point a lead surrogate (U+d800..U+dbff)? + * @param c code unit or code point + * @return true or false + */ + public static boolean isLeadSurrogate(int c) { return (c & 0xfffffc00) == 0xd800; } + /** + * Assuming c is a surrogate code point (UTF16.isSurrogate(c)), + * is it a lead surrogate? + * @param c code unit or code point + * @return true or false + */ + public static boolean isSurrogateLead(int c) { return (c&0x400)==0; } + + /** + * Compares two CharSequence subsequences for binary equality. + * @param s1 first sequence + * @param start1 start offset in first sequence + * @param limit1 limit offset in first sequence + * @param s2 second sequence + * @param start2 start offset in second sequence + * @param limit2 limit offset in second sequence + * @return true if s1.subSequence(start1, limit1) contains the same text + * as s2.subSequence(start2, limit2) + */ + public static boolean equal(CharSequence s1, int start1, int limit1, + CharSequence s2, int start2, int limit2) { + if((limit1-start1)!=(limit2-start2)) { + return false; + } + if(s1==s2 && start1==start2) { + return true; + } + while(start1<limit1) { + if(s1.charAt(start1++)!=s2.charAt(start2++)) { + return false; + } + } + return true; + } + } + + public NormalizerImpl() {} + + private static final class IsAcceptable implements ICUBinary.Authenticate { + public boolean isDataVersionAcceptable(byte version[]) { + return version[0]==4; + } + } + private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable(); + private static final int DATA_FORMAT = 0x4e726d32; // "Nrm2" + + public NormalizerImpl load(ByteBuffer bytes) { + try { + dataVersion=ICUBinary.readHeaderAndDataVersion(bytes, DATA_FORMAT, IS_ACCEPTABLE); + int indexesLength=bytes.getInt()/4; // inIndexes[IX_NORM_TRIE_OFFSET]/4 + if(indexesLength<=IX_MIN_LCCC_CP) { + throw new InternalError("Normalizer2 data: not enough indexes"); + } + int[] inIndexes=new int[indexesLength]; + inIndexes[0]=indexesLength*4; + for(int i=1; i<indexesLength; ++i) { + inIndexes[i]=bytes.getInt(); + } + + minDecompNoCP=inIndexes[IX_MIN_DECOMP_NO_CP]; + minCompNoMaybeCP=inIndexes[IX_MIN_COMP_NO_MAYBE_CP]; + minLcccCP=inIndexes[IX_MIN_LCCC_CP]; + + minYesNo=inIndexes[IX_MIN_YES_NO]; + minYesNoMappingsOnly=inIndexes[IX_MIN_YES_NO_MAPPINGS_ONLY]; + minNoNo=inIndexes[IX_MIN_NO_NO]; + minNoNoCompBoundaryBefore=inIndexes[IX_MIN_NO_NO_COMP_BOUNDARY_BEFORE]; + minNoNoCompNoMaybeCC=inIndexes[IX_MIN_NO_NO_COMP_NO_MAYBE_CC]; + minNoNoEmpty=inIndexes[IX_MIN_NO_NO_EMPTY]; + limitNoNo=inIndexes[IX_LIMIT_NO_NO]; + minMaybeYes=inIndexes[IX_MIN_MAYBE_YES]; + assert((minMaybeYes&7)==0); // 8-aligned for noNoDelta bit fields + centerNoNoDelta=(minMaybeYes>>DELTA_SHIFT)-MAX_DELTA-1; + + // Read the normTrie. + int offset=inIndexes[IX_NORM_TRIE_OFFSET]; + int nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET]; + int triePosition = bytes.position(); + normTrie = CodePointTrie.Fast16.fromBinary(bytes); + int trieLength = bytes.position() - triePosition; + if(trieLength>(nextOffset-offset)) { + throw new InternalError("Normalizer2 data: not enough bytes for normTrie"); + } + ICUBinary.skipBytes(bytes, (nextOffset-offset)-trieLength); // skip padding after trie bytes + + // Read the composition and mapping data. + offset=nextOffset; + nextOffset=inIndexes[IX_SMALL_FCD_OFFSET]; + int numChars=(nextOffset-offset)/2; + if(numChars!=0) { + maybeYesCompositions=ICUBinary.getString(bytes, numChars, 0); + extraData=maybeYesCompositions.substring((MIN_NORMAL_MAYBE_YES-minMaybeYes)>>OFFSET_SHIFT); + } + + // smallFCD: new in formatVersion 2 + offset=nextOffset; + smallFCD=new byte[0x100]; + bytes.get(smallFCD); + + return this; + } catch(IOException e) { + throw new InternalError(e); + } + } + public NormalizerImpl load(String name) { + return load(ICUBinary.getRequiredData(name)); + } + + // The trie stores values for lead surrogate code *units*. + // Surrogate code *points* are inert. + public int getNorm16(int c) { + return UTF16Plus.isLeadSurrogate(c) ? INERT : normTrie.get(c); + } + public int getRawNorm16(int c) { return normTrie.get(c); } + public boolean isAlgorithmicNoNo(int norm16) { return limitNoNo<=norm16 && norm16<minMaybeYes; } + public boolean isCompNo(int norm16) { return minNoNo<=norm16 && norm16<minMaybeYes; } + public boolean isDecompYes(int norm16) { return norm16<minYesNo || minMaybeYes<=norm16; } + + public int getCC(int norm16) { + if(norm16>=MIN_NORMAL_MAYBE_YES) { + return getCCFromNormalYesOrMaybe(norm16); + } + if(norm16<minNoNo || limitNoNo<=norm16) { + return 0; + } + return getCCFromNoNo(norm16); + } + public static int getCCFromNormalYesOrMaybe(int norm16) { + return (norm16 >> OFFSET_SHIFT) & 0xff; + } + public static int getCCFromYesOrMaybe(int norm16) { + return norm16>=MIN_NORMAL_MAYBE_YES ? getCCFromNormalYesOrMaybe(norm16) : 0; + } + public int getCCFromYesOrMaybeCP(int c) { + if (c < minCompNoMaybeCP) { return 0; } + return getCCFromYesOrMaybe(getNorm16(c)); + } + + /** + * Returns the FCD data for code point c. + * @param c A Unicode code point. + * @return The lccc(c) in bits 15..8 and tccc(c) in bits 7..0. + */ + public int getFCD16(int c) { + if(c<minDecompNoCP) { + return 0; + } else if(c<=0xffff) { + if(!singleLeadMightHaveNonZeroFCD16(c)) { return 0; } + } + return getFCD16FromNormData(c); + } + /** Returns true if the single-or-lead code unit c might have non-zero FCD data. */ + public boolean singleLeadMightHaveNonZeroFCD16(int lead) { + // 0<=lead<=0xffff + byte bits=smallFCD[lead>>8]; + if(bits==0) { return false; } + return ((bits>>((lead>>5)&7))&1)!=0; + } + + /** Gets the FCD value from the regular normalization data. */ + public int getFCD16FromNormData(int c) { + int norm16=getNorm16(c); + if (norm16 >= limitNoNo) { + if(norm16>=MIN_NORMAL_MAYBE_YES) { + // combining mark + norm16=getCCFromNormalYesOrMaybe(norm16); + return norm16|(norm16<<8); + } else if(norm16>=minMaybeYes) { + return 0; + } else { // isDecompNoAlgorithmic(norm16) + int deltaTrailCC = norm16 & DELTA_TCCC_MASK; + if (deltaTrailCC <= DELTA_TCCC_1) { + return deltaTrailCC >> OFFSET_SHIFT; + } + // Maps to an isCompYesAndZeroCC. + c=mapAlgorithmic(c, norm16); + norm16=getRawNorm16(c); + } + } + if(norm16<=minYesNo || isHangulLVT(norm16)) { + // no decomposition or Hangul syllable, all zeros + return 0; + } + // c decomposes, get everything from the variable-length extra data + int mapping=norm16>>OFFSET_SHIFT; + int firstUnit=extraData.charAt(mapping); + int fcd16=firstUnit>>8; // tccc + if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD)!=0) { + fcd16|=extraData.charAt(mapping-1)&0xff00; // lccc + } + return fcd16; + } + + /** + * Gets the decomposition for one code point. + * @param c code point + * @return c's decomposition, if it has one; returns null if it does not have a decomposition + */ + public String getDecomposition(int c) { + int norm16; + if(c<minDecompNoCP || isMaybeOrNonZeroCC(norm16=getNorm16(c))) { + // c does not decompose + return null; + } + int decomp = -1; + if(isDecompNoAlgorithmic(norm16)) { + // Maps to an isCompYesAndZeroCC. + decomp=c=mapAlgorithmic(c, norm16); + // The mapping might decompose further. + norm16 = getRawNorm16(c); + } + if (norm16 < minYesNo) { + if(decomp<0) { + return null; + } else { + return UTF16.valueOf(decomp); + } + } else if(isHangulLV(norm16) || isHangulLVT(norm16)) { + // Hangul syllable: decompose algorithmically + StringBuilder buffer=new StringBuilder(); + Hangul.decompose(c, buffer); + return buffer.toString(); + } + // c decomposes, get everything from the variable-length extra data + int mapping=norm16>>OFFSET_SHIFT; + int length=extraData.charAt(mapping++)&MAPPING_LENGTH_MASK; + return extraData.substring(mapping, mapping+length); + } + + // Fixed norm16 values. + public static final int MIN_YES_YES_WITH_CC=0xfe02; + public static final int JAMO_VT=0xfe00; + public static final int MIN_NORMAL_MAYBE_YES=0xfc00; + public static final int JAMO_L=2; // offset=1 hasCompBoundaryAfter=FALSE + public static final int INERT=1; // offset=0 hasCompBoundaryAfter=TRUE + + // norm16 bit 0 is comp-boundary-after. + public static final int HAS_COMP_BOUNDARY_AFTER=1; + public static final int OFFSET_SHIFT=1; + + // For algorithmic one-way mappings, norm16 bits 2..1 indicate the + // tccc (0, 1, >1) for quick FCC boundary-after tests. + public static final int DELTA_TCCC_0=0; + public static final int DELTA_TCCC_1=2; + public static final int DELTA_TCCC_GT_1=4; + public static final int DELTA_TCCC_MASK=6; + public static final int DELTA_SHIFT=3; + + public static final int MAX_DELTA=0x40; + + // Byte offsets from the start of the data, after the generic header. + public static final int IX_NORM_TRIE_OFFSET=0; + public static final int IX_EXTRA_DATA_OFFSET=1; + public static final int IX_SMALL_FCD_OFFSET=2; + public static final int IX_RESERVED3_OFFSET=3; + public static final int IX_TOTAL_SIZE=7; + public static final int MIN_CCC_LCCC_CP=0x300; + // Code point thresholds for quick check codes. + public static final int IX_MIN_DECOMP_NO_CP=8; + public static final int IX_MIN_COMP_NO_MAYBE_CP=9; + + // Norm16 value thresholds for quick check combinations and types of extra data. + + /** Mappings & compositions in [minYesNo..minYesNoMappingsOnly[. */ + public static final int IX_MIN_YES_NO=10; + /** Mappings are comp-normalized. */ + public static final int IX_MIN_NO_NO=11; + public static final int IX_LIMIT_NO_NO=12; + public static final int IX_MIN_MAYBE_YES=13; + + /** Mappings only in [minYesNoMappingsOnly..minNoNo[. */ + public static final int IX_MIN_YES_NO_MAPPINGS_ONLY=14; + /** Mappings are not comp-normalized but have a comp boundary before. */ + public static final int IX_MIN_NO_NO_COMP_BOUNDARY_BEFORE=15; + /** Mappings do not have a comp boundary before. */ + public static final int IX_MIN_NO_NO_COMP_NO_MAYBE_CC=16; + /** Mappings to the empty string. */ + public static final int IX_MIN_NO_NO_EMPTY=17; + + public static final int IX_MIN_LCCC_CP=18; + public static final int IX_COUNT=20; + + public static final int MAPPING_HAS_CCC_LCCC_WORD=0x80; + public static final int MAPPING_HAS_RAW_MAPPING=0x40; + // unused bit 0x20; + public static final int MAPPING_LENGTH_MASK=0x1f; + + public static final int COMP_1_LAST_TUPLE=0x8000; + public static final int COMP_1_TRIPLE=1; + public static final int COMP_1_TRAIL_LIMIT=0x3400; + public static final int COMP_1_TRAIL_MASK=0x7ffe; + public static final int COMP_1_TRAIL_SHIFT=9; // 10-1 for the "triple" bit + public static final int COMP_2_TRAIL_SHIFT=6; + public static final int COMP_2_TRAIL_MASK=0xffc0; + + // higher-level functionality ------------------------------------------ *** + + /** + * Decomposes s[src, limit[ and writes the result to dest. + * limit can be NULL if src is NUL-terminated. + * destLengthEstimate is the initial dest buffer capacity and can be -1. + */ + public void decompose(CharSequence s, int src, int limit, StringBuilder dest, + int destLengthEstimate) { + if(destLengthEstimate<0) { + destLengthEstimate=limit-src; + } + dest.setLength(0); + ReorderingBuffer buffer=new ReorderingBuffer(this, dest, destLengthEstimate); + decompose(s, src, limit, buffer); + } + + // Dual functionality: + // buffer!=NULL: normalize + // buffer==NULL: isNormalized/quickCheck/spanQuickCheckYes + public int decompose(CharSequence s, int src, int limit, + ReorderingBuffer buffer) { + int minNoCP=minDecompNoCP; + + int prevSrc; + int c=0; + int norm16=0; + + // only for quick check + int prevBoundary=src; + int prevCC=0; + + for(;;) { + // count code units below the minimum or with irrelevant data for the quick check + for(prevSrc=src; src!=limit;) { + if( (c=s.charAt(src))<minNoCP || + isMostDecompYesAndZeroCC(norm16=normTrie.bmpGet(c)) + ) { + ++src; + } else if(!UTF16Plus.isLeadSurrogate(c)) { + break; + } else { + char c2; + if ((src + 1) != limit && Character.isLowSurrogate(c2 = s.charAt(src + 1))) { + c = Character.toCodePoint((char)c, c2); + norm16 = normTrie.suppGet(c); + if (isMostDecompYesAndZeroCC(norm16)) { + src += 2; + } else { + break; + } + } else { + ++src; // unpaired lead surrogate: inert + } + } + } + // copy these code units all at once + if(src!=prevSrc) { + if(buffer!=null) { + buffer.flushAndAppendZeroCC(s, prevSrc, src); + } else { + prevCC=0; + prevBoundary=src; + } + } + if(src==limit) { + break; + } + + // Check one above-minimum, relevant code point. + src+=Character.charCount(c); + if(buffer!=null) { + decompose(c, norm16, buffer); + } else { + if(isDecompYes(norm16)) { + int cc=getCCFromYesOrMaybe(norm16); + if(prevCC<=cc || cc==0) { + prevCC=cc; + if(cc<=1) { + prevBoundary=src; + } + continue; + } + } + return prevBoundary; // "no" or cc out of order + } + } + return src; + } + public void decomposeAndAppend(CharSequence s, boolean doDecompose, ReorderingBuffer buffer) { + int limit=s.length(); + if(limit==0) { + return; + } + if(doDecompose) { + decompose(s, 0, limit, buffer); + return; + } + // Just merge the strings at the boundary. + int c=Character.codePointAt(s, 0); + int src=0; + int firstCC, prevCC, cc; + firstCC=prevCC=cc=getCC(getNorm16(c)); + while(cc!=0) { + prevCC=cc; + src+=Character.charCount(c); + if(src>=limit) { + break; + } + c=Character.codePointAt(s, src); + cc=getCC(getNorm16(c)); + }; + buffer.append(s, 0, src, false, firstCC, prevCC); + buffer.append(s, src, limit); + } + + // Very similar to composeQuickCheck(): Make the same changes in both places if relevant. + // doCompose: normalize + // !doCompose: isNormalized (buffer must be empty and initialized) + public boolean compose(CharSequence s, int src, int limit, + boolean onlyContiguous, + boolean doCompose, + ReorderingBuffer buffer) { + int prevBoundary=src; + int minNoMaybeCP=minCompNoMaybeCP; + + for (;;) { + // Fast path: Scan over a sequence of characters below the minimum "no or maybe" code point, + // or with (compYes && ccc==0) properties. + int prevSrc; + int c = 0; + int norm16 = 0; + for (;;) { + if (src == limit) { + if (prevBoundary != limit && doCompose) { + buffer.append(s, prevBoundary, limit); + } + return true; + } + if( (c=s.charAt(src))<minNoMaybeCP || + isCompYesAndZeroCC(norm16=normTrie.bmpGet(c)) + ) { + ++src; + } else { + prevSrc = src++; + if (!UTF16Plus.isLeadSurrogate(c)) { + break; + } else { + char c2; + if (src != limit && Character.isLowSurrogate(c2 = s.charAt(src))) { + ++src; + c = Character.toCodePoint((char)c, c2); + norm16 = normTrie.suppGet(c); + if (!isCompYesAndZeroCC(norm16)) { + break; + } + } + } + } + } + // isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo. + // The current character is either a "noNo" (has a mapping) + // or a "maybeYes" (combines backward) + // or a "yesYes" with ccc!=0. + // It is not a Hangul syllable or Jamo L because those have "yes" properties. + + // Medium-fast path: Handle cases that do not require full decomposition and recomposition. + if (!isMaybeOrNonZeroCC(norm16)) { // minNoNo <= norm16 < minMaybeYes + if (!doCompose) { + return false; + } + // Fast path for mapping a character that is immediately surrounded by boundaries. + // In this case, we need not decompose around the current character. + if (isDecompNoAlgorithmic(norm16)) { + // Maps to a single isCompYesAndZeroCC character + // which also implies hasCompBoundaryBefore. + if (norm16HasCompBoundaryAfter(norm16, onlyContiguous) || + hasCompBoundaryBefore(s, src, limit)) { + if (prevBoundary != prevSrc) { + buffer.append(s, prevBoundary, prevSrc); + } + buffer.append(mapAlgorithmic(c, norm16), 0); + prevBoundary = src; + continue; + } + } else if (norm16 < minNoNoCompBoundaryBefore) { + // The mapping is comp-normalized which also implies hasCompBoundaryBefore. + if (norm16HasCompBoundaryAfter(norm16, onlyContiguous) || + hasCompBoundaryBefore(s, src, limit)) { + if (prevBoundary != prevSrc) { + buffer.append(s, prevBoundary, prevSrc); + } + int mapping = norm16 >> OFFSET_SHIFT; + int length = extraData.charAt(mapping++) & MAPPING_LENGTH_MASK; + buffer.append(extraData, mapping, mapping + length); + prevBoundary = src; + continue; + } + } else if (norm16 >= minNoNoEmpty) { + // The current character maps to nothing. + // Simply omit it from the output if there is a boundary before _or_ after it. + // The character itself implies no boundaries. + if (hasCompBoundaryBefore(s, src, limit) || + hasCompBoundaryAfter(s, prevBoundary, prevSrc, onlyContiguous)) { + if (prevBoundary != prevSrc) { + buffer.append(s, prevBoundary, prevSrc); + } + prevBoundary = src; + continue; + } + } + // Other "noNo" type, or need to examine more text around this character: + // Fall through to the slow path. + } else if (isJamoVT(norm16) && prevBoundary != prevSrc) { + char prev=s.charAt(prevSrc-1); + if(c<Hangul.JAMO_T_BASE) { + // The current character is a Jamo Vowel, + // compose with previous Jamo L and following Jamo T. + char l = (char)(prev-Hangul.JAMO_L_BASE); + if(l<Hangul.JAMO_L_COUNT) { + if (!doCompose) { + return false; + } + int t; + if (src != limit && + 0 < (t = (s.charAt(src) - Hangul.JAMO_T_BASE)) && + t < Hangul.JAMO_T_COUNT) { + // The next character is a Jamo T. + ++src; + } else if (hasCompBoundaryBefore(s, src, limit)) { + // No Jamo T follows, not even via decomposition. + t = 0; + } else { + t = -1; + } + if (t >= 0) { + int syllable = Hangul.HANGUL_BASE + + (l*Hangul.JAMO_V_COUNT + (c-Hangul.JAMO_V_BASE)) * + Hangul.JAMO_T_COUNT + t; + --prevSrc; // Replace the Jamo L as well. + if (prevBoundary != prevSrc) { + buffer.append(s, prevBoundary, prevSrc); + } + buffer.append((char)syllable); + prevBoundary = src; + continue; + } + // If we see L+V+x where x!=T then we drop to the slow path, + // decompose and recompose. + // This is to deal with NFKC finding normal L and V but a + // compatibility variant of a T. + // We need to either fully compose that combination here + // (which would complicate the code and may not work with strange custom data) + // or use the slow path. + } + } else if (Hangul.isHangulLV(prev)) { + // The current character is a Jamo Trailing consonant, + // compose with previous Hangul LV that does not contain a Jamo T. + if (!doCompose) { + return false; + } + int syllable = prev + c - Hangul.JAMO_T_BASE; + --prevSrc; // Replace the Hangul LV as well. + if (prevBoundary != prevSrc) { + buffer.append(s, prevBoundary, prevSrc); + } + buffer.append((char)syllable); + prevBoundary = src; + continue; + } + // No matching context, or may need to decompose surrounding text first: + // Fall through to the slow path. + } else if (norm16 > JAMO_VT) { // norm16 >= MIN_YES_YES_WITH_CC + // One or more combining marks that do not combine-back: + // Check for canonical order, copy unchanged if ok and + // if followed by a character with a boundary-before. + int cc = getCCFromNormalYesOrMaybe(norm16); // cc!=0 + if (onlyContiguous /* FCC */ && getPreviousTrailCC(s, prevBoundary, prevSrc) > cc) { + // Fails FCD test, need to decompose and contiguously recompose. + if (!doCompose) { + return false; + } + } else { + // If !onlyContiguous (not FCC), then we ignore the tccc of + // the previous character which passed the quick check "yes && ccc==0" test. + int n16; + for (;;) { + if (src == limit) { + if (doCompose) { + buffer.append(s, prevBoundary, limit); + } + return true; + } + int prevCC = cc; + c = Character.codePointAt(s, src); + n16 = normTrie.get(c); + if (n16 >= MIN_YES_YES_WITH_CC) { + cc = getCCFromNormalYesOrMaybe(n16); + if (prevCC > cc) { + if (!doCompose) { + return false; + } + break; + } + } else { + break; + } + src += Character.charCount(c); + } + // p is after the last in-order combining mark. + // If there is a boundary here, then we continue with no change. + if (norm16HasCompBoundaryBefore(n16)) { + if (isCompYesAndZeroCC(n16)) { + src += Character.charCount(c); + } + continue; + } + // Use the slow path. There is no boundary in [prevSrc, src[. + } + } + + // Slow path: Find the nearest boundaries around the current character, + // decompose and recompose. + if (prevBoundary != prevSrc && !norm16HasCompBoundaryBefore(norm16)) { + c = Character.codePointBefore(s, prevSrc); + norm16 = normTrie.get(c); + if (!norm16HasCompBoundaryAfter(norm16, onlyContiguous)) { + prevSrc -= Character.charCount(c); + } + } + if (doCompose && prevBoundary != prevSrc) { + buffer.append(s, prevBoundary, prevSrc); + } + int recomposeStartIndex=buffer.length(); + // We know there is not a boundary here. + decomposeShort(s, prevSrc, src, false /* !stopAtCompBoundary */, onlyContiguous, + buffer); + // Decompose until the next boundary. + src = decomposeShort(s, src, limit, true /* stopAtCompBoundary */, onlyContiguous, + buffer); + recompose(buffer, recomposeStartIndex, onlyContiguous); + if(!doCompose) { + if(!buffer.equals(s, prevSrc, src)) { + return false; + } + buffer.remove(); + } + prevBoundary=src; + } + } + + /** + * Very similar to compose(): Make the same changes in both places if relevant. + * doSpan: spanQuickCheckYes (ignore bit 0 of the return value) + * !doSpan: quickCheck + * @return bits 31..1: spanQuickCheckYes (==s.length() if "yes") and + * bit 0: set if "maybe"; otherwise, if the span length<s.length() + * then the quick check result is "no" + */ + public int composeQuickCheck(CharSequence s, int src, int limit, + boolean onlyContiguous, boolean doSpan) { + int qcResult=0; + int prevBoundary=src; + int minNoMaybeCP=minCompNoMaybeCP; + + for(;;) { + // Fast path: Scan over a sequence of characters below the minimum "no or maybe" code point, + // or with (compYes && ccc==0) properties. + int prevSrc; + int c = 0; + int norm16 = 0; + for (;;) { + if(src==limit) { + return (src<<1)|qcResult; // "yes" or "maybe" + } + if( (c=s.charAt(src))<minNoMaybeCP || + isCompYesAndZeroCC(norm16=normTrie.bmpGet(c)) + ) { + ++src; + } else { + prevSrc = src++; + if (!UTF16Plus.isLeadSurrogate(c)) { + break; + } else { + char c2; + if (src != limit && Character.isLowSurrogate(c2 = s.charAt(src))) { + ++src; + c = Character.toCodePoint((char)c, c2); + norm16 = normTrie.suppGet(c); + if (!isCompYesAndZeroCC(norm16)) { + break; + } + } + } + } + } + // isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo. + // The current character is either a "noNo" (has a mapping) + // or a "maybeYes" (combines backward) + // or a "yesYes" with ccc!=0. + // It is not a Hangul syllable or Jamo L because those have "yes" properties. + + int prevNorm16 = INERT; + if (prevBoundary != prevSrc) { + prevBoundary = prevSrc; + if (!norm16HasCompBoundaryBefore(norm16)) { + c = Character.codePointBefore(s, prevSrc); + int n16 = getNorm16(c); + if (!norm16HasCompBoundaryAfter(n16, onlyContiguous)) { + prevBoundary -= Character.charCount(c); + prevNorm16 = n16; + } + } + } + + if(isMaybeOrNonZeroCC(norm16)) { + int cc=getCCFromYesOrMaybe(norm16); + if (onlyContiguous /* FCC */ && cc != 0 && + getTrailCCFromCompYesAndZeroCC(prevNorm16) > cc) { + // The [prevBoundary..prevSrc[ character + // passed the quick check "yes && ccc==0" test + // but is out of canonical order with the current combining mark. + } else { + // If !onlyContiguous (not FCC), then we ignore the tccc of + // the previous character which passed the quick check "yes && ccc==0" test. + for (;;) { + if (norm16 < MIN_YES_YES_WITH_CC) { + if (!doSpan) { + qcResult = 1; + } else { + return prevBoundary << 1; // spanYes does not care to know it's "maybe" + } + } + if (src == limit) { + return (src<<1) | qcResult; // "yes" or "maybe" + } + int prevCC = cc; + c = Character.codePointAt(s, src); + norm16 = getNorm16(c); + if (isMaybeOrNonZeroCC(norm16)) { + cc = getCCFromYesOrMaybe(norm16); + if (!(prevCC <= cc || cc == 0)) { + break; + } + } else { + break; + } + src += Character.charCount(c); + } + // src is after the last in-order combining mark. + if (isCompYesAndZeroCC(norm16)) { + prevBoundary = src; + src += Character.charCount(c); + continue; + } + } + } + return prevBoundary<<1; // "no" + } + } + public void composeAndAppend(CharSequence s, + boolean doCompose, + boolean onlyContiguous, + ReorderingBuffer buffer) { + int src=0, limit=s.length(); + if(!buffer.isEmpty()) { + int firstStarterInSrc=findNextCompBoundary(s, 0, limit, onlyContiguous); + if(0!=firstStarterInSrc) { + int lastStarterInDest=findPreviousCompBoundary(buffer.getStringBuilder(), + buffer.length(), onlyContiguous); + StringBuilder middle=new StringBuilder((buffer.length()-lastStarterInDest)+ + firstStarterInSrc+16); + middle.append(buffer.getStringBuilder(), lastStarterInDest, buffer.length()); + buffer.removeSuffix(buffer.length()-lastStarterInDest); + middle.append(s, 0, firstStarterInSrc); + compose(middle, 0, middle.length(), onlyContiguous, true, buffer); + src=firstStarterInSrc; + } + } + if(doCompose) { + compose(s, src, limit, onlyContiguous, true, buffer); + } else { + buffer.append(s, src, limit); + } + } + // Dual functionality: + // buffer!=NULL: normalize + // buffer==NULL: isNormalized/quickCheck/spanQuickCheckYes + public int makeFCD(CharSequence s, int src, int limit, ReorderingBuffer buffer) { + // Note: In this function we use buffer->appendZeroCC() because we track + // the lead and trail combining classes here, rather than leaving it to + // the ReorderingBuffer. + // The exception is the call to decomposeShort() which uses the buffer + // in the normal way. + + // Tracks the last FCD-safe boundary, before lccc=0 or after properly-ordered tccc<=1. + // Similar to the prevBoundary in the compose() implementation. + int prevBoundary=src; + int prevSrc; + int c=0; + int prevFCD16=0; + int fcd16=0; + + for(;;) { + // count code units with lccc==0 + for(prevSrc=src; src!=limit;) { + if((c=s.charAt(src))<minLcccCP) { + prevFCD16=~c; + ++src; + } else if(!singleLeadMightHaveNonZeroFCD16(c)) { + prevFCD16=0; + ++src; + } else { + if (UTF16Plus.isLeadSurrogate(c)) { + char c2; + if ((src + 1) != limit && Character.isLowSurrogate(c2 = s.charAt(src + 1))) { + c = Character.toCodePoint((char)c, c2); + } + } + if((fcd16=getFCD16FromNormData(c))<=0xff) { + prevFCD16=fcd16; + src+=Character.charCount(c); + } else { + break; + } + } + } + // copy these code units all at once + if(src!=prevSrc) { + if(src==limit) { + if(buffer!=null) { + buffer.flushAndAppendZeroCC(s, prevSrc, src); + } + break; + } + prevBoundary=src; + // We know that the previous character's lccc==0. + if(prevFCD16<0) { + // Fetching the fcd16 value was deferred for this below-minLcccCP code point. + int prev=~prevFCD16; + if(prev<minDecompNoCP) { + prevFCD16=0; + } else { + prevFCD16=getFCD16FromNormData(prev); + if(prevFCD16>1) { + --prevBoundary; + } + } + } else { + int p=src-1; + if( Character.isLowSurrogate(s.charAt(p)) && prevSrc<p && + Character.isHighSurrogate(s.charAt(p-1)) + ) { + --p; + // Need to fetch the previous character's FCD value because + // prevFCD16 was just for the trail surrogate code point. + prevFCD16=getFCD16FromNormData(Character.toCodePoint(s.charAt(p), s.charAt(p+1))); + // Still known to have lccc==0 because its lead surrogate unit had lccc==0. + } + if(prevFCD16>1) { + prevBoundary=p; + } + } + if(buffer!=null) { + // The last lccc==0 character is excluded from the + // flush-and-append call in case it needs to be modified. + buffer.flushAndAppendZeroCC(s, prevSrc, prevBoundary); + buffer.append(s, prevBoundary, src); + } + // The start of the current character (c). + prevSrc=src; + } else if(src==limit) { + break; + } + + src+=Character.charCount(c); + // The current character (c) at [prevSrc..src[ has a non-zero lead combining class. + // Check for proper order, and decompose locally if necessary. + if((prevFCD16&0xff)<=(fcd16>>8)) { + // proper order: prev tccc <= current lccc + if((fcd16&0xff)<=1) { + prevBoundary=src; + } + if(buffer!=null) { + buffer.appendZeroCC(c); + } + prevFCD16=fcd16; + continue; + } else if(buffer==null) { + return prevBoundary; // quick check "no" + } else { + /* + * Back out the part of the source that we copied or appended + * already but is now going to be decomposed. + * prevSrc is set to after what was copied/appended. + */ + buffer.removeSuffix(prevSrc-prevBoundary); + /* + * Find the part of the source that needs to be decomposed, + * up to the next safe boundary. + */ + src=findNextFCDBoundary(s, src, limit); + /* + * The source text does not fulfill the conditions for FCD. + * Decompose and reorder a limited piece of the text. + */ + decomposeShort(s, prevBoundary, src, false, false, buffer); + prevBoundary=src; + prevFCD16=0; + } + } + return src; + } + + public boolean hasDecompBoundaryBefore(int c) { + return c < minLcccCP || (c <= 0xffff && !singleLeadMightHaveNonZeroFCD16(c)) || + norm16HasDecompBoundaryBefore(getNorm16(c)); + } + public boolean norm16HasDecompBoundaryBefore(int norm16) { + if (norm16 < minNoNoCompNoMaybeCC) { + return true; + } + if (norm16 >= limitNoNo) { + return norm16 <= MIN_NORMAL_MAYBE_YES || norm16 == JAMO_VT; + } + // c decomposes, get everything from the variable-length extra data + int mapping=norm16>>OFFSET_SHIFT; + int firstUnit=extraData.charAt(mapping); + // true if leadCC==0 (hasFCDBoundaryBefore()) + return (firstUnit&MAPPING_HAS_CCC_LCCC_WORD)==0 || (extraData.charAt(mapping-1)&0xff00)==0; + } + public boolean hasDecompBoundaryAfter(int c) { + if (c < minDecompNoCP) { + return true; + } + if (c <= 0xffff && !singleLeadMightHaveNonZeroFCD16(c)) { + return true; + } + return norm16HasDecompBoundaryAfter(getNorm16(c)); + } + public boolean norm16HasDecompBoundaryAfter(int norm16) { + if(norm16 <= minYesNo || isHangulLVT(norm16)) { + return true; + } + if (norm16 >= limitNoNo) { + if (isMaybeOrNonZeroCC(norm16)) { + return norm16 <= MIN_NORMAL_MAYBE_YES || norm16 == JAMO_VT; + } + // Maps to an isCompYesAndZeroCC. + return (norm16 & DELTA_TCCC_MASK) <= DELTA_TCCC_1; + } + // c decomposes, get everything from the variable-length extra data + int mapping=norm16>>OFFSET_SHIFT; + int firstUnit=extraData.charAt(mapping); + // decomp after-boundary: same as hasFCDBoundaryAfter(), + // fcd16<=1 || trailCC==0 + if(firstUnit>0x1ff) { + return false; // trailCC>1 + } + if(firstUnit<=0xff) { + return true; // trailCC==0 + } + // if(trailCC==1) test leadCC==0, same as checking for before-boundary + // true if leadCC==0 (hasFCDBoundaryBefore()) + return (firstUnit&MAPPING_HAS_CCC_LCCC_WORD)==0 || (extraData.charAt(mapping-1)&0xff00)==0; + } + public boolean isDecompInert(int c) { return isDecompYesAndZeroCC(getNorm16(c)); } + + public boolean hasCompBoundaryBefore(int c) { + return c<minCompNoMaybeCP || norm16HasCompBoundaryBefore(getNorm16(c)); + } + public boolean hasCompBoundaryAfter(int c, boolean onlyContiguous) { + return norm16HasCompBoundaryAfter(getNorm16(c), onlyContiguous); + } + + private boolean isMaybe(int norm16) { return minMaybeYes<=norm16 && norm16<=JAMO_VT; } + private boolean isMaybeOrNonZeroCC(int norm16) { return norm16>=minMaybeYes; } + private static boolean isInert(int norm16) { return norm16==INERT; } + private static boolean isJamoVT(int norm16) { return norm16==JAMO_VT; } + private int hangulLVT() { return minYesNoMappingsOnly|HAS_COMP_BOUNDARY_AFTER; } + private boolean isHangulLV(int norm16) { return norm16==minYesNo; } + private boolean isHangulLVT(int norm16) { + return norm16==hangulLVT(); + } + private boolean isCompYesAndZeroCC(int norm16) { return norm16<minNoNo; } + // UBool isCompYes(uint16_t norm16) const { + // return norm16>=MIN_YES_YES_WITH_CC || norm16<minNoNo; + // } + // UBool isCompYesOrMaybe(uint16_t norm16) const { + // return norm16<minNoNo || minMaybeYes<=norm16; + // } + // private boolean hasZeroCCFromDecompYes(int norm16) { + // return norm16<=MIN_NORMAL_MAYBE_YES || norm16==JAMO_VT; + // } + private boolean isDecompYesAndZeroCC(int norm16) { + return norm16<minYesNo || + norm16==JAMO_VT || + (minMaybeYes<=norm16 && norm16<=MIN_NORMAL_MAYBE_YES); + } + /** + * A little faster and simpler than isDecompYesAndZeroCC() but does not include + * the MaybeYes which combine-forward and have ccc=0. + * (Standard Unicode 10 normalization does not have such characters.) + */ + private boolean isMostDecompYesAndZeroCC(int norm16) { + return norm16<minYesNo || norm16==MIN_NORMAL_MAYBE_YES || norm16==JAMO_VT; + } + private boolean isDecompNoAlgorithmic(int norm16) { return norm16>=limitNoNo; } + + // For use with isCompYes(). + // Perhaps the compiler can combine the two tests for MIN_YES_YES_WITH_CC. + // static uint8_t getCCFromYes(uint16_t norm16) { + // return norm16>=MIN_YES_YES_WITH_CC ? getCCFromNormalYesOrMaybe(norm16) : 0; + // } + private int getCCFromNoNo(int norm16) { + int mapping=norm16>>OFFSET_SHIFT; + if((extraData.charAt(mapping)&MAPPING_HAS_CCC_LCCC_WORD)!=0) { + return extraData.charAt(mapping-1)&0xff; + } else { + return 0; + } + } + int getTrailCCFromCompYesAndZeroCC(int norm16) { + if(norm16<=minYesNo) { + return 0; // yesYes and Hangul LV have ccc=tccc=0 + } else { + // For Hangul LVT we harmlessly fetch a firstUnit with tccc=0 here. + return extraData.charAt(norm16>>OFFSET_SHIFT)>>8; // tccc from yesNo + } + } + + // Requires algorithmic-NoNo. + private int mapAlgorithmic(int c, int norm16) { + return c+(norm16>>DELTA_SHIFT)-centerNoNoDelta; + } + + // Requires minYesNo<norm16<limitNoNo. + // private int getMapping(int norm16) { return extraData+(norm16>>OFFSET_SHIFT); } + + /** + * @return index into maybeYesCompositions, or -1 + */ + private int getCompositionsListForDecompYes(int norm16) { + if(norm16<JAMO_L || MIN_NORMAL_MAYBE_YES<=norm16) { + return -1; + } else { + if((norm16-=minMaybeYes)<0) { + // norm16<minMaybeYes: index into extraData which is a substring at + // maybeYesCompositions[MIN_NORMAL_MAYBE_YES-minMaybeYes] + // same as (MIN_NORMAL_MAYBE_YES-minMaybeYes)+norm16 + norm16+=MIN_NORMAL_MAYBE_YES; // for yesYes; if Jamo L: harmless empty list + } + return norm16>>OFFSET_SHIFT; + } + } + /** + * @return index into maybeYesCompositions + */ + private int getCompositionsListForComposite(int norm16) { + // A composite has both mapping & compositions list. + int list=((MIN_NORMAL_MAYBE_YES-minMaybeYes)+norm16)>>OFFSET_SHIFT; + int firstUnit=maybeYesCompositions.charAt(list); + return list+ // mapping in maybeYesCompositions + 1+ // +1 to skip the first unit with the mapping length + (firstUnit&MAPPING_LENGTH_MASK); // + mapping length + } + + // Decompose a short piece of text which is likely to contain characters that + // fail the quick check loop and/or where the quick check loop's overhead + // is unlikely to be amortized. + // Called by the compose() and makeFCD() implementations. + // Public in Java for collation implementation code. + private int decomposeShort( + CharSequence s, int src, int limit, + boolean stopAtCompBoundary, boolean onlyContiguous, + ReorderingBuffer buffer) { + while(src<limit) { + int c=Character.codePointAt(s, src); + if (stopAtCompBoundary && c < minCompNoMaybeCP) { + return src; + } + int norm16 = getNorm16(c); + if (stopAtCompBoundary && norm16HasCompBoundaryBefore(norm16)) { + return src; + } + src+=Character.charCount(c); + decompose(c, norm16, buffer); + if (stopAtCompBoundary && norm16HasCompBoundaryAfter(norm16, onlyContiguous)) { + return src; + } + } + return src; + } + private void decompose(int c, int norm16, ReorderingBuffer buffer) { + // get the decomposition and the lead and trail cc's + if (norm16 >= limitNoNo) { + if (isMaybeOrNonZeroCC(norm16)) { + buffer.append(c, getCCFromYesOrMaybe(norm16)); + return; + } + // Maps to an isCompYesAndZeroCC. + c=mapAlgorithmic(c, norm16); + norm16=getRawNorm16(c); + } + if (norm16 < minYesNo) { + // c does not decompose + buffer.append(c, 0); + } else if(isHangulLV(norm16) || isHangulLVT(norm16)) { + // Hangul syllable: decompose algorithmically + Hangul.decompose(c, buffer); + } else { + // c decomposes, get everything from the variable-length extra data + int mapping=norm16>>OFFSET_SHIFT; + int firstUnit=extraData.charAt(mapping); + int length=firstUnit&MAPPING_LENGTH_MASK; + int leadCC, trailCC; + trailCC=firstUnit>>8; + if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD)!=0) { + leadCC=extraData.charAt(mapping-1)>>8; + } else { + leadCC=0; + } + ++mapping; // skip over the firstUnit + buffer.append(extraData, mapping, mapping+length, true, leadCC, trailCC); + } + } + + /** + * Finds the recomposition result for + * a forward-combining "lead" character, + * specified with a pointer to its compositions list, + * and a backward-combining "trail" character. + * + * <p>If the lead and trail characters combine, then this function returns + * the following "compositeAndFwd" value: + * <pre> + * Bits 21..1 composite character + * Bit 0 set if the composite is a forward-combining starter + * </pre> + * otherwise it returns -1. + * + * <p>The compositions list has (trail, compositeAndFwd) pair entries, + * encoded as either pairs or triples of 16-bit units. + * The last entry has the high bit of its first unit set. + * + * <p>The list is sorted by ascending trail characters (there are no duplicates). + * A linear search is used. + * + * <p>See normalizer2impl.h for a more detailed description + * of the compositions list format. + */ + private static int combine(String compositions, int list, int trail) { + int key1, firstUnit; + if(trail<COMP_1_TRAIL_LIMIT) { + // trail character is 0..33FF + // result entry may have 2 or 3 units + key1=(trail<<1); + while(key1>(firstUnit=compositions.charAt(list))) { + list+=2+(firstUnit&COMP_1_TRIPLE); + } + if(key1==(firstUnit&COMP_1_TRAIL_MASK)) { + if((firstUnit&COMP_1_TRIPLE)!=0) { + return (compositions.charAt(list+1)<<16)|compositions.charAt(list+2); + } else { + return compositions.charAt(list+1); + } + } + } else { + // trail character is 3400..10FFFF + // result entry has 3 units + key1=COMP_1_TRAIL_LIMIT+(((trail>>COMP_1_TRAIL_SHIFT))&~COMP_1_TRIPLE); + int key2=(trail<<COMP_2_TRAIL_SHIFT)&0xffff; + int secondUnit; + for(;;) { + if(key1>(firstUnit=compositions.charAt(list))) { + list+=2+(firstUnit&COMP_1_TRIPLE); + } else if(key1==(firstUnit&COMP_1_TRAIL_MASK)) { + if(key2>(secondUnit=compositions.charAt(list+1))) { + if((firstUnit&COMP_1_LAST_TUPLE)!=0) { + break; + } else { + list+=3; + } + } else if(key2==(secondUnit&COMP_2_TRAIL_MASK)) { + return ((secondUnit&~COMP_2_TRAIL_MASK)<<16)|compositions.charAt(list+2); + } else { + break; + } + } else { + break; + } + } + } + return -1; + } + + /* + * Recomposes the buffer text starting at recomposeStartIndex + * (which is in NFD - decomposed and canonically ordered), + * and truncates the buffer contents. + * + * Note that recomposition never lengthens the text: + * Any character consists of either one or two code units; + * a composition may contain at most one more code unit than the original starter, + * while the combining mark that is removed has at least one code unit. + */ + private void recompose(ReorderingBuffer buffer, int recomposeStartIndex, + boolean onlyContiguous) { + StringBuilder sb=buffer.getStringBuilder(); + int p=recomposeStartIndex; + if(p==sb.length()) { + return; + } + + int starter, pRemove; + int compositionsList; + int c, compositeAndFwd; + int norm16; + int cc, prevCC; + boolean starterIsSupplementary; + + // Some of the following variables are not used until we have a forward-combining starter + // and are only initialized now to avoid compiler warnings. + compositionsList=-1; // used as indicator for whether we have a forward-combining starter + starter=-1; + starterIsSupplementary=false; + prevCC=0; + + for(;;) { + c=sb.codePointAt(p); + p+=Character.charCount(c); + norm16=getNorm16(c); + cc=getCCFromYesOrMaybe(norm16); + if( // this character combines backward and + isMaybe(norm16) && + // we have seen a starter that combines forward and + compositionsList>=0 && + // the backward-combining character is not blocked + (prevCC<cc || prevCC==0) + ) { + if(isJamoVT(norm16)) { + // c is a Jamo V/T, see if we can compose it with the previous character. + if(c<Hangul.JAMO_T_BASE) { + // c is a Jamo Vowel, compose with previous Jamo L and following Jamo T. + char prev=(char)(sb.charAt(starter)-Hangul.JAMO_L_BASE); + if(prev<Hangul.JAMO_L_COUNT) { + pRemove=p-1; + char syllable=(char) + (Hangul.HANGUL_BASE+ + (prev*Hangul.JAMO_V_COUNT+(c-Hangul.JAMO_V_BASE))* + Hangul.JAMO_T_COUNT); + char t; + if(p!=sb.length() && (t=(char)(sb.charAt(p)-Hangul.JAMO_T_BASE))<Hangul.JAMO_T_COUNT) { + ++p; + syllable+=t; // The next character was a Jamo T. + } + sb.setCharAt(starter, syllable); + // remove the Jamo V/T + sb.delete(pRemove, p); + p=pRemove; + } + } + /* + * No "else" for Jamo T: + * Since the input is in NFD, there are no Hangul LV syllables that + * a Jamo T could combine with. + * All Jamo Ts are combined above when handling Jamo Vs. + */ + if(p==sb.length()) { + break; + } + compositionsList=-1; + continue; + } else if((compositeAndFwd=combine(maybeYesCompositions, compositionsList, c))>=0) { + // The starter and the combining mark (c) do combine. + int composite=compositeAndFwd>>1; + + // Remove the combining mark. + pRemove=p-Character.charCount(c); // pRemove & p: start & limit of the combining mark + sb.delete(pRemove, p); + p=pRemove; + // Replace the starter with the composite. + if(starterIsSupplementary) { + if(composite>0xffff) { + // both are supplementary + sb.setCharAt(starter, UTF16.getLeadSurrogate(composite)); + sb.setCharAt(starter+1, UTF16.getTrailSurrogate(composite)); + } else { + sb.setCharAt(starter, (char)c); + sb.deleteCharAt(starter+1); + // The composite is shorter than the starter, + // move the intermediate characters forward one. + starterIsSupplementary=false; + --p; + } + } else if(composite>0xffff) { + // The composite is longer than the starter, + // move the intermediate characters back one. + starterIsSupplementary=true; + sb.setCharAt(starter, UTF16.getLeadSurrogate(composite)); + sb.insert(starter+1, UTF16.getTrailSurrogate(composite)); + ++p; + } else { + // both are on the BMP + sb.setCharAt(starter, (char)composite); + } + + // Keep prevCC because we removed the combining mark. + + if(p==sb.length()) { + break; + } + // Is the composite a starter that combines forward? + if((compositeAndFwd&1)!=0) { + compositionsList= + getCompositionsListForComposite(getRawNorm16(composite)); + } else { + compositionsList=-1; + } + + // We combined; continue with looking for compositions. + continue; + } + } + + // no combination this time + prevCC=cc; + if(p==sb.length()) { + break; + } + + // If c did not combine, then check if it is a starter. + if(cc==0) { + // Found a new starter. + if((compositionsList=getCompositionsListForDecompYes(norm16))>=0) { + // It may combine with something, prepare for it. + if(c<=0xffff) { + starterIsSupplementary=false; + starter=p-1; + } else { + starterIsSupplementary=true; + starter=p-2; + } + } + } else if(onlyContiguous) { + // FCC: no discontiguous compositions; any intervening character blocks. + compositionsList=-1; + } + } + buffer.flush(); + } + + /** + * Does c have a composition boundary before it? + * True if its decomposition begins with a character that has + * ccc=0 && NFC_QC=Yes (isCompYesAndZeroCC()). + * As a shortcut, this is true if c itself has ccc=0 && NFC_QC=Yes + * (isCompYesAndZeroCC()) so we need not decompose. + */ + private boolean hasCompBoundaryBefore(int c, int norm16) { + return c<minCompNoMaybeCP || norm16HasCompBoundaryBefore(norm16); + } + private boolean norm16HasCompBoundaryBefore(int norm16) { + return norm16 < minNoNoCompNoMaybeCC || isAlgorithmicNoNo(norm16); + } + private boolean hasCompBoundaryBefore(CharSequence s, int src, int limit) { + return src == limit || hasCompBoundaryBefore(Character.codePointAt(s, src)); + } + private boolean norm16HasCompBoundaryAfter(int norm16, boolean onlyContiguous) { + return (norm16 & HAS_COMP_BOUNDARY_AFTER) != 0 && + (!onlyContiguous || isTrailCC01ForCompBoundaryAfter(norm16)); + } + private boolean hasCompBoundaryAfter(CharSequence s, int start, int p, boolean onlyContiguous) { + return start == p || hasCompBoundaryAfter(Character.codePointBefore(s, p), onlyContiguous); + } + /** For FCC: Given norm16 HAS_COMP_BOUNDARY_AFTER, does it have tccc<=1? */ + private boolean isTrailCC01ForCompBoundaryAfter(int norm16) { + return isInert(norm16) || (isDecompNoAlgorithmic(norm16) ? + (norm16 & DELTA_TCCC_MASK) <= DELTA_TCCC_1 : extraData.charAt(norm16 >> OFFSET_SHIFT) <= 0x1ff); + } + + private int findPreviousCompBoundary(CharSequence s, int p, boolean onlyContiguous) { + while(p>0) { + int c=Character.codePointBefore(s, p); + int norm16 = getNorm16(c); + if (norm16HasCompBoundaryAfter(norm16, onlyContiguous)) { + break; + } + p-=Character.charCount(c); + if(hasCompBoundaryBefore(c, norm16)) { + break; + } + } + return p; + } + private int findNextCompBoundary(CharSequence s, int p, int limit, boolean onlyContiguous) { + while(p<limit) { + int c=Character.codePointAt(s, p); + int norm16=normTrie.get(c); + if(hasCompBoundaryBefore(c, norm16)) { + break; + } + p+=Character.charCount(c); + if (norm16HasCompBoundaryAfter(norm16, onlyContiguous)) { + break; + } + } + return p; + } + + + private int findNextFCDBoundary(CharSequence s, int p, int limit) { + while(p<limit) { + int c=Character.codePointAt(s, p); + int norm16; + if (c < minLcccCP || norm16HasDecompBoundaryBefore(norm16 = getNorm16(c))) { + break; + } + p+=Character.charCount(c); + if (norm16HasDecompBoundaryAfter(norm16)) { + break; + } + } + return p; + } + + /** + * Get the canonical decomposition + * sherman for ComposedCharIter + */ + public static int getDecompose(int chars[], String decomps[]) { + Normalizer2 impl = Normalizer2.getNFDInstance(); + + int length=0; + int norm16 = 0; + int ch = -1; + int i = 0; + + while (++ch < 0x2fa1e) { //no cannoical above 0x3ffff + //TBD !!!! the hack code heres save us about 50ms for startup + //need a better solution/lookup + if (ch == 0x30ff) + ch = 0xf900; + else if (ch == 0x115bc) + ch = 0x1d15e; + else if (ch == 0x1d1c1) + ch = 0x2f800; + + String s = impl.getDecomposition(ch); + + if(s != null && i < chars.length) { + chars[i] = ch; + decomps[i++] = s; + } + } + return i; + } + + //------------------------------------------------------ + // special method for Collation (RBTableBuilder.build()) + //------------------------------------------------------ + private static boolean needSingleQuotation(char c) { + return (c >= 0x0009 && c <= 0x000D) || + (c >= 0x0020 && c <= 0x002F) || + (c >= 0x003A && c <= 0x0040) || + (c >= 0x005B && c <= 0x0060) || + (c >= 0x007B && c <= 0x007E); + } + + public static String canonicalDecomposeWithSingleQuotation(String string) { + Normalizer2 impl = Normalizer2.getNFDInstance(); + char[] src = string.toCharArray(); + int srcIndex = 0; + int srcLimit = src.length; + char[] dest = new char[src.length * 3]; //MAX_BUF_SIZE_DECOMPOSE = 3 + int destIndex = 0; + int destLimit = dest.length; + + int prevSrc; + String norm; + int reorderStartIndex, length; + char c1, c2; + int cp; + int minNoMaybe = 0x00c0; + int cc, prevCC, trailCC; + char[] p; + int pStart; + + // initialize + reorderStartIndex = 0; + prevCC = 0; + norm = null; + cp = 0; + pStart = 0; + + cc = trailCC = -1; // initialize to bogus value + c1 = 0; + for (;;) { + prevSrc=srcIndex; + //quick check (1)less than minNoMaybe (2)no decomp (3)hangual + while (srcIndex != srcLimit && + ((c1 = src[srcIndex]) < minNoMaybe || + (norm = impl.getDecomposition(cp = string.codePointAt(srcIndex))) == null || + (c1 >= '\uac00' && c1 <= '\ud7a3'))) { // Hangul Syllables + prevCC = 0; + srcIndex += (cp < 0x10000) ? 1 : 2; + } + + // copy these code units all at once + if (srcIndex != prevSrc) { + length = srcIndex - prevSrc; + if ((destIndex + length) <= destLimit) { + System.arraycopy(src,prevSrc,dest,destIndex,length); + } + + destIndex += length; + reorderStartIndex = destIndex; + } + + // end of source reached? + if (srcIndex == srcLimit) { + break; + } + + // cp already contains *src and norm32 is set for it, increment src + srcIndex += (cp < 0x10000) ? 1 : 2; + + if (cp < Character.MIN_SUPPLEMENTARY_CODE_POINT) { + c2 = 0; + length = 1; + + if (Character.isHighSurrogate(c1) + || Character.isLowSurrogate(c1)) { + norm = null; + } + } else { + length = 2; + c2 = src[srcIndex-1]; + } + + // get the decomposition and the lead and trail cc's + if (norm == null) { + // cp does not decompose + cc = trailCC = UCharacter.getCombiningClass(cp); + p = null; + pStart = -1; + } else { + + pStart = 0; + p = norm.toCharArray(); + length = p.length; + int cpNum = norm.codePointCount(0, length); + cc= UCharacter.getCombiningClass(norm.codePointAt(0)); + trailCC= UCharacter.getCombiningClass(norm.codePointAt(cpNum-1)); + if (length == 1) { + // fastpath a single code unit from decomposition + c1 = p[pStart]; + c2 = 0; + p = null; + pStart = -1; + } + } + + if((destIndex + length * 3) >= destLimit) { // 2 SingleQuotations + // buffer overflow + char[] tmpBuf = new char[destLimit * 2]; + System.arraycopy(dest, 0, tmpBuf, 0, destIndex); + dest = tmpBuf; + destLimit = dest.length; + } + + // append the decomposition to the destination buffer, assume length>0 + { + int reorderSplit = destIndex; + if (p == null) { + // fastpath: single code point + if (needSingleQuotation(c1)) { + //if we need single quotation, no need to consider "prevCC" + //and it must NOT be a supplementary pair + dest[destIndex++] = '\''; + dest[destIndex++] = c1; + dest[destIndex++] = '\''; + trailCC = 0; + } else if(cc != 0 && cc < prevCC) { + // (c1, c2) is out of order with respect to the preceding + // text + destIndex += length; + trailCC = insertOrdered(dest, reorderStartIndex, + reorderSplit, destIndex, c1, c2, cc); + } else { + // just append (c1, c2) + dest[destIndex++] = c1; + if(c2 != 0) { + dest[destIndex++] = c2; + } + } + } else { + // general: multiple code points (ordered by themselves) + // from decomposition + if (needSingleQuotation(p[pStart])) { + dest[destIndex++] = '\''; + dest[destIndex++] = p[pStart++]; + dest[destIndex++] = '\''; + length--; + do { + dest[destIndex++] = p[pStart++]; + } while(--length > 0); + } else if (cc != 0 && cc < prevCC) { + destIndex += length; + trailCC = mergeOrdered(dest, reorderStartIndex, + reorderSplit, p, pStart, + pStart+length); + } else { + // just append the decomposition + do { + dest[destIndex++] = p[pStart++]; + } while (--length > 0); + } + } + } + prevCC = trailCC; + if(prevCC == 0) { + reorderStartIndex = destIndex; + } + } + + return new String(dest, 0, destIndex); + } + + /** + * simpler, single-character version of mergeOrdered() - + * bubble-insert one single code point into the preceding string + * which is already canonically ordered + * (c, c2) may or may not yet have been inserted at src[current]..src[p] + * + * it must be p=current+lengthof(c, c2) i.e. p=current+(c2==0 ? 1 : 2) + * + * before: src[start]..src[current] is already ordered, and + * src[current]..src[p] may or may not hold (c, c2) but + * must be exactly the same length as (c, c2) + * after: src[start]..src[p] is ordered + * + * @return the trailing combining class + */ + private static int/*unsigned byte*/ insertOrdered(char[] source, + int start, + int current, int p, + char c1, char c2, + int/*unsigned byte*/ cc) { + int back, preBack; + int r; + int prevCC, trailCC=cc; + + if (start<current && cc!=0) { + // search for the insertion point where cc>=prevCC + preBack=back=current; + + PrevArgs prevArgs = new PrevArgs(); + prevArgs.current = current; + prevArgs.start = start; + prevArgs.src = source; + prevArgs.c1 = c1; + prevArgs.c2 = c2; + + // get the prevCC + prevCC=getPrevCC(prevArgs); + preBack = prevArgs.current; + + if(cc<prevCC) { + // this will be the last code point, so keep its cc + trailCC=prevCC; + back=preBack; + while(start<preBack) { + prevCC=getPrevCC(prevArgs); + preBack=prevArgs.current; + if(cc>=prevCC) { + break; + } + back=preBack; + } + + // this is where we are right now with all these indicies: + // [start]..[pPreBack] 0..? code points that we can ignore + // [pPreBack]..[pBack] 0..1 code points with prevCC<=cc + // [pBack]..[current] 0..n code points with >cc, move up to insert (c, c2) + // [current]..[p] 1 code point (c, c2) with cc + + // move the code units in between up + r=p; + do { + source[--r]=source[--current]; + } while (back!=current); + } + } + + // insert (c1, c2) + source[current] = c1; + if (c2!=0) { + source[(current+1)] = c2; + } + + // we know the cc of the last code point + return trailCC; + } + /** + * merge two UTF-16 string parts together + * to canonically order (order by combining classes) their concatenation + * + * the two strings may already be adjacent, so that the merging is done + * in-place if the two strings are not adjacent, then the buffer holding the + * first one must be large enough + * the second string may or may not be ordered in itself + * + * before: [start]..[current] is already ordered, and + * [next]..[limit] may be ordered in itself, but + * is not in relation to [start..current[ + * after: [start..current+(limit-next)[ is ordered + * + * the algorithm is a simple bubble-sort that takes the characters from + * src[next++] and inserts them in correct combining class order into the + * preceding part of the string + * + * since this function is called much less often than the single-code point + * insertOrdered(), it just uses that for easier maintenance + * + * @return the trailing combining class + */ + private static int /*unsigned byte*/ mergeOrdered(char[] source, + int start, + int current, + char[] data, + int next, + int limit) { + int r; + int /*unsigned byte*/ cc, trailCC=0; + boolean adjacent; + + adjacent= current==next; + NextCCArgs ncArgs = new NextCCArgs(); + ncArgs.source = data; + ncArgs.next = next; + ncArgs.limit = limit; + + if(start!=current) { + + while(ncArgs.next<ncArgs.limit) { + cc=getNextCC(ncArgs); + if(cc==0) { + // does not bubble back + trailCC=0; + if(adjacent) { + current=ncArgs.next; + } else { + data[current++]=ncArgs.c1; + if(ncArgs.c2!=0) { + data[current++]=ncArgs.c2; + } + } + break; + } else { + r=current+(ncArgs.c2==0 ? 1 : 2); + trailCC=insertOrdered(source,start, current, r, + ncArgs.c1, ncArgs.c2, cc); + current=r; + } + } + } + + if(ncArgs.next==ncArgs.limit) { + // we know the cc of the last code point + return trailCC; + } else { + if(!adjacent) { + // copy the second string part + do { + source[current++]=data[ncArgs.next++]; + } while(ncArgs.next!=ncArgs.limit); + ncArgs.limit=current; + } + PrevArgs prevArgs = new PrevArgs(); + prevArgs.src = data; + prevArgs.start = start; + prevArgs.current = ncArgs.limit; + return getPrevCC(prevArgs); + } + + } + private static final class PrevArgs{ + char[] src; + int start; + int current; + char c1; + char c2; + } + + private static final class NextCCArgs{ + char[] source; + int next; + int limit; + char c1; + char c2; + } + private static int /*unsigned byte*/ getNextCC(NextCCArgs args) { + args.c1=args.source[args.next++]; + args.c2=0; + + if (UTF16.isTrailSurrogate(args.c1)) { + /* unpaired second surrogate */ + return 0; + } else if (!UTF16.isLeadSurrogate(args.c1)) { + return UCharacter.getCombiningClass(args.c1); + } else if (args.next!=args.limit && + UTF16.isTrailSurrogate(args.c2=args.source[args.next])){ + ++args.next; + return UCharacter.getCombiningClass(Character.toCodePoint(args.c1, args.c2)); + } else { + /* unpaired first surrogate */ + args.c2=0; + return 0; + } + } + private static int /*unsigned*/ getPrevCC(PrevArgs args) { + args.c1=args.src[--args.current]; + args.c2=0; + + if (args.c1 < MIN_CCC_LCCC_CP) { + return 0; + } else if (UTF16.isLeadSurrogate(args.c1)) { + /* unpaired first surrogate */ + return 0; + } else if (!UTF16.isTrailSurrogate(args.c1)) { + return UCharacter.getCombiningClass(args.c1); + } else if (args.current!=args.start && + UTF16.isLeadSurrogate(args.c2=args.src[args.current-1])) { + --args.current; + return UCharacter.getCombiningClass(Character.toCodePoint(args.c2, args.c1)); + } else { + /* unpaired second surrogate */ + args.c2=0; + return 0; + } + } + + private int getPreviousTrailCC(CharSequence s, int start, int p) { + if (start == p) { + return 0; + } + return getFCD16(Character.codePointBefore(s, p)); + } + + private VersionInfo dataVersion; + + // BMP code point thresholds for quick check loops looking at single UTF-16 code units. + private int minDecompNoCP; + private int minCompNoMaybeCP; + private int minLcccCP; + + // Norm16 value thresholds for quick check combinations and types of extra data. + private int minYesNo; + private int minYesNoMappingsOnly; + private int minNoNo; + private int minNoNoCompBoundaryBefore; + private int minNoNoCompNoMaybeCC; + private int minNoNoEmpty; + private int limitNoNo; + private int centerNoNoDelta; + private int minMaybeYes; + + private CodePointTrie.Fast16 normTrie; + private String maybeYesCompositions; + private String extraData; // mappings and/or compositions for yesYes, yesNo & noNo characters + private byte[] smallFCD; // [0x100] one bit per 32 BMP code points, set if any FCD!=0 +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/java.base/share/classes/jdk/internal/icu/impl/Punycode.java Tue Jan 14 15:23:01 2020 -0800 @@ -0,0 +1,512 @@ +/* + * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +/* + ******************************************************************************* + * Copyright (C) 2003-2004, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +// +// CHANGELOG +// 2005-05-19 Edward Wang +// - copy this file from icu4jsrc_3_2/src/com/ibm/icu/text/Punycode.java +// - move from package com.ibm.icu.text to package sun.net.idn +// - use ParseException instead of StringPrepParseException +// 2007-08-14 Martin Buchholz +// - remove redundant casts +// +package jdk.internal.icu.impl; + +import java.text.ParseException; +import jdk.internal.icu.lang.UCharacter; +import jdk.internal.icu.text.UTF16; + +/** + * Ported code from ICU punycode.c + * @author ram + */ + +/* Package Private class */ +public final class Punycode { + + /* Punycode parameters for Bootstring */ + private static final int BASE = 36; + private static final int TMIN = 1; + private static final int TMAX = 26; + private static final int SKEW = 38; + private static final int DAMP = 700; + private static final int INITIAL_BIAS = 72; + private static final int INITIAL_N = 0x80; + + /* "Basic" Unicode/ASCII code points */ + private static final int HYPHEN = 0x2d; + private static final int DELIMITER = HYPHEN; + + private static final int ZERO = 0x30; + private static final int NINE = 0x39; + + private static final int SMALL_A = 0x61; + private static final int SMALL_Z = 0x7a; + + private static final int CAPITAL_A = 0x41; + private static final int CAPITAL_Z = 0x5a; + + // TODO: eliminate the 256 limitation + private static final int MAX_CP_COUNT = 256; + + private static final int UINT_MAGIC = 0x80000000; + private static final long ULONG_MAGIC = 0x8000000000000000L; + + private static int adaptBias(int delta, int length, boolean firstTime){ + if(firstTime){ + delta /=DAMP; + }else{ + delta /= 2; + } + delta += delta/length; + + int count=0; + for(; delta>((BASE-TMIN)*TMAX)/2; count+=BASE) { + delta/=(BASE-TMIN); + } + + return count+(((BASE-TMIN+1)*delta)/(delta+SKEW)); + } + + /** + * basicToDigit[] contains the numeric value of a basic code + * point (for use in representing integers) in the range 0 to + * BASE-1, or -1 if b is does not represent a value. + */ + static final int[] basicToDigit= new int[]{ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1, -1, + + -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, + + -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, + + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 + }; + + private static char asciiCaseMap(char b, boolean uppercase) { + if(uppercase) { + if(SMALL_A<=b && b<=SMALL_Z) { + b-=(SMALL_A-CAPITAL_A); + } + } else { + if(CAPITAL_A<=b && b<=CAPITAL_Z) { + b+=(SMALL_A-CAPITAL_A); + } + } + return b; + } + + /** + * digitToBasic() returns the basic code point whose value + * (when used for representing integers) is d, which must be in the + * range 0 to BASE-1. The lowercase form is used unless the uppercase flag is + * nonzero, in which case the uppercase form is used. + */ + private static char digitToBasic(int digit, boolean uppercase) { + /* 0..25 map to ASCII a..z or A..Z */ + /* 26..35 map to ASCII 0..9 */ + if(digit<26) { + if(uppercase) { + return (char)(CAPITAL_A+digit); + } else { + return (char)(SMALL_A+digit); + } + } else { + return (char)((ZERO-26)+digit); + } + } + /** + * Converts Unicode to Punycode. + * The input string must not contain single, unpaired surrogates. + * The output will be represented as an array of ASCII code points. + * + * @param src + * @param caseFlags + * @return + * @throws ParseException + */ + public static StringBuffer encode(StringBuffer src, boolean[] caseFlags) throws ParseException{ + + int[] cpBuffer = new int[MAX_CP_COUNT]; + int n, delta, handledCPCount, basicLength, destLength, bias, j, m, q, k, t, srcCPCount; + char c, c2; + int srcLength = src.length(); + int destCapacity = MAX_CP_COUNT; + char[] dest = new char[destCapacity]; + StringBuffer result = new StringBuffer(); + /* + * Handle the basic code points and + * convert extended ones to UTF-32 in cpBuffer (caseFlag in sign bit): + */ + srcCPCount=destLength=0; + + for(j=0; j<srcLength; ++j) { + if(srcCPCount==MAX_CP_COUNT) { + /* too many input code points */ + throw new IndexOutOfBoundsException(); + } + c=src.charAt(j); + if(isBasic(c)) { + if(destLength<destCapacity) { + cpBuffer[srcCPCount++]=0; + dest[destLength]= + caseFlags!=null ? + asciiCaseMap(c, caseFlags[j]) : + c; + } + ++destLength; + } else { + n=((caseFlags!=null && caseFlags[j])? 1 : 0)<<31L; + if(!UTF16.isSurrogate(c)) { + n|=c; + } else if(UTF16.isLeadSurrogate(c) && (j+1)<srcLength && UTF16.isTrailSurrogate(c2=src.charAt(j+1))) { + ++j; + + n|=UCharacter.getCodePoint(c, c2); + } else { + /* error: unmatched surrogate */ + throw new ParseException("Illegal char found", -1); + } + cpBuffer[srcCPCount++]=n; + } + } + + /* Finish the basic string - if it is not empty - with a delimiter. */ + basicLength=destLength; + if(basicLength>0) { + if(destLength<destCapacity) { + dest[destLength]=DELIMITER; + } + ++destLength; + } + + /* + * handledCPCount is the number of code points that have been handled + * basicLength is the number of basic code points + * destLength is the number of chars that have been output + */ + + /* Initialize the state: */ + n=INITIAL_N; + delta=0; + bias=INITIAL_BIAS; + + /* Main encoding loop: */ + for(handledCPCount=basicLength; handledCPCount<srcCPCount; /* no op */) { + /* + * All non-basic code points < n have been handled already. + * Find the next larger one: + */ + for(m=0x7fffffff, j=0; j<srcCPCount; ++j) { + q=cpBuffer[j]&0x7fffffff; /* remove case flag from the sign bit */ + if(n<=q && q<m) { + m=q; + } + } + + /* + * Increase delta enough to advance the decoder's + * <n,i> state to <m,0>, but guard against overflow: + */ + if(m-n>(0x7fffffff-MAX_CP_COUNT-delta)/(handledCPCount+1)) { + throw new RuntimeException("Internal program error"); + } + delta+=(m-n)*(handledCPCount+1); + n=m; + + /* Encode a sequence of same code points n */ + for(j=0; j<srcCPCount; ++j) { + q=cpBuffer[j]&0x7fffffff; /* remove case flag from the sign bit */ + if(q<n) { + ++delta; + } else if(q==n) { + /* Represent delta as a generalized variable-length integer: */ + for(q=delta, k=BASE; /* no condition */; k+=BASE) { + + /** RAM: comment out the old code for conformance with draft-ietf-idn-punycode-03.txt + + t=k-bias; + if(t<TMIN) { + t=TMIN; + } else if(t>TMAX) { + t=TMAX; + } + */ + + t=k-bias; + if(t<TMIN) { + t=TMIN; + } else if(k>=(bias+TMAX)) { + t=TMAX; + } + + if(q<t) { + break; + } + + if(destLength<destCapacity) { + dest[destLength++]=digitToBasic(t+(q-t)%(BASE-t), false); + } + q=(q-t)/(BASE-t); + } + + if(destLength<destCapacity) { + dest[destLength++]=digitToBasic(q, (cpBuffer[j]<0)); + } + bias=adaptBias(delta, handledCPCount+1,(handledCPCount==basicLength)); + delta=0; + ++handledCPCount; + } + } + + ++delta; + ++n; + } + + return result.append(dest, 0, destLength); + } + + private static boolean isBasic(int ch){ + return (ch < INITIAL_N); + } + + private static boolean isBasicUpperCase(int ch){ + return( CAPITAL_A <= ch && ch <= CAPITAL_Z); + } + + private static boolean isSurrogate(int ch){ + return (((ch)&0xfffff800)==0xd800); + } + /** + * Converts Punycode to Unicode. + * The Unicode string will be at most as long as the Punycode string. + * + * @param src + * @param caseFlags + * @return + * @throws ParseException + */ + public static StringBuffer decode(StringBuffer src, boolean[] caseFlags) + throws ParseException{ + int srcLength = src.length(); + StringBuffer result = new StringBuffer(); + int n, destLength, i, bias, basicLength, j, in, oldi, w, k, digit, t, + destCPCount, firstSupplementaryIndex, cpLength; + char b; + int destCapacity = MAX_CP_COUNT; + char[] dest = new char[destCapacity]; + + /* + * Handle the basic code points: + * Let basicLength be the number of input code points + * before the last delimiter, or 0 if there is none, + * then copy the first basicLength code points to the output. + * + * The two following loops iterate backward. + */ + for(j=srcLength; j>0;) { + if(src.charAt(--j)==DELIMITER) { + break; + } + } + destLength=basicLength=destCPCount=j; + + while(j>0) { + b=src.charAt(--j); + if(!isBasic(b)) { + throw new ParseException("Illegal char found", -1); + } + + if(j<destCapacity) { + dest[j]= b; + + if(caseFlags!=null) { + caseFlags[j]=isBasicUpperCase(b); + } + } + } + + /* Initialize the state: */ + n=INITIAL_N; + i=0; + bias=INITIAL_BIAS; + firstSupplementaryIndex=1000000000; + + /* + * Main decoding loop: + * Start just after the last delimiter if any + * basic code points were copied; start at the beginning otherwise. + */ + for(in=basicLength>0 ? basicLength+1 : 0; in<srcLength; /* no op */) { + /* + * in is the index of the next character to be consumed, and + * destCPCount is the number of code points in the output array. + * + * Decode a generalized variable-length integer into delta, + * which gets added to i. The overflow checking is easier + * if we increase i as we go, then subtract off its starting + * value at the end to obtain delta. + */ + for(oldi=i, w=1, k=BASE; /* no condition */; k+=BASE) { + if(in>=srcLength) { + throw new ParseException("Illegal char found", -1); + } + + digit=basicToDigit[(byte)src.charAt(in++)]; + if(digit<0) { + throw new ParseException("Invalid char found", -1); + } + if(digit>(0x7fffffff-i)/w) { + /* integer overflow */ + throw new ParseException("Illegal char found", -1); + } + + i+=digit*w; + t=k-bias; + if(t<TMIN) { + t=TMIN; + } else if(k>=(bias+TMAX)) { + t=TMAX; + } + if(digit<t) { + break; + } + + if(w>0x7fffffff/(BASE-t)) { + /* integer overflow */ + throw new ParseException("Illegal char found", -1); + } + w*=BASE-t; + } + + /* + * Modification from sample code: + * Increments destCPCount here, + * where needed instead of in for() loop tail. + */ + ++destCPCount; + bias=adaptBias(i-oldi, destCPCount, (oldi==0)); + + /* + * i was supposed to wrap around from (incremented) destCPCount to 0, + * incrementing n each time, so we'll fix that now: + */ + if(i/destCPCount>(0x7fffffff-n)) { + /* integer overflow */ + throw new ParseException("Illegal char found", -1); + } + + n+=i/destCPCount; + i%=destCPCount; + /* not needed for Punycode: */ + /* if (decode_digit(n) <= BASE) return punycode_invalid_input; */ + + if(n>0x10ffff || isSurrogate(n)) { + /* Unicode code point overflow */ + throw new ParseException("Illegal char found", -1); + } + + /* Insert n at position i of the output: */ + cpLength=UTF16.getCharCount(n); + if((destLength+cpLength)<destCapacity) { + int codeUnitIndex; + + /* + * Handle indexes when supplementary code points are present. + * + * In almost all cases, there will be only BMP code points before i + * and even in the entire string. + * This is handled with the same efficiency as with UTF-32. + * + * Only the rare cases with supplementary code points are handled + * more slowly - but not too bad since this is an insertion anyway. + */ + if(i<=firstSupplementaryIndex) { + codeUnitIndex=i; + if(cpLength>1) { + firstSupplementaryIndex=codeUnitIndex; + } else { + ++firstSupplementaryIndex; + } + } else { + codeUnitIndex=firstSupplementaryIndex; + codeUnitIndex=UTF16.moveCodePointOffset(dest, 0, destLength, codeUnitIndex, i-codeUnitIndex); + } + + /* use the UChar index codeUnitIndex instead of the code point index i */ + if(codeUnitIndex<destLength) { + System.arraycopy(dest, codeUnitIndex, + dest, codeUnitIndex+cpLength, + (destLength-codeUnitIndex)); + if(caseFlags!=null) { + System.arraycopy(caseFlags, codeUnitIndex, + caseFlags, codeUnitIndex+cpLength, + destLength-codeUnitIndex); + } + } + if(cpLength==1) { + /* BMP, insert one code unit */ + dest[codeUnitIndex]=(char)n; + } else { + /* supplementary character, insert two code units */ + dest[codeUnitIndex]=UTF16.getLeadSurrogate(n); + dest[codeUnitIndex+1]=UTF16.getTrailSurrogate(n); + } + if(caseFlags!=null) { + /* Case of last character determines uppercase flag: */ + caseFlags[codeUnitIndex]=isBasicUpperCase(src.charAt(in-1)); + if(cpLength==2) { + caseFlags[codeUnitIndex+1]=false; + } + } + } + destLength+=cpLength; + ++i; + } + result.append(dest, 0, destLength); + return result; + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/java.base/share/classes/jdk/internal/icu/impl/ReplaceableUCharacterIterator.java Tue Jan 14 15:23:01 2020 -0800 @@ -0,0 +1,190 @@ +/* + * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + ******************************************************************************* + * (C) Copyright IBM Corp. 1996-2005 - All Rights Reserved * + * * + * The original version of this source code and documentation is copyrighted * + * and owned by IBM, These materials are provided under terms of a License * + * Agreement between IBM and Sun. This technology is protected by multiple * + * US and International patents. This notice and attribution to IBM may not * + * to removed. * + ******************************************************************************* + */ + +package jdk.internal.icu.impl; + +import jdk.internal.icu.text.Replaceable; +import jdk.internal.icu.text.ReplaceableString; +import jdk.internal.icu.text.UCharacterIterator; + +/** + * DLF docs must define behavior when Replaceable is mutated underneath + * the iterator. + * + * This and ICUCharacterIterator share some code, maybe they should share + * an implementation, or the common state and implementation should be + * moved up into UCharacterIterator. + * + * What are first, last, and getBeginIndex doing here?!?!?! + */ +public class ReplaceableUCharacterIterator extends UCharacterIterator { + + // public constructor ------------------------------------------------------ + + /** + * Public constructor + * @param str text which the iterator will be based on + */ + public ReplaceableUCharacterIterator(String str){ + if(str==null){ + throw new IllegalArgumentException(); + } + this.replaceable = new ReplaceableString(str); + this.currentIndex = 0; + } + + /** + * Public constructor + * @param buf buffer of text on which the iterator will be based + */ + public ReplaceableUCharacterIterator(StringBuffer buf){ + if(buf==null){ + throw new IllegalArgumentException(); + } + this.replaceable = new ReplaceableString(buf); + this.currentIndex = 0; + } + + // public methods ---------------------------------------------------------- + + /** + * Creates a copy of this iterator, does not clone the underlying + * <code>Replaceable</code>object + * @return copy of this iterator + */ + public Object clone(){ + try { + return super.clone(); + } catch (CloneNotSupportedException e) { + return null; // never invoked + } + } + + /** + * Returns the current UTF16 character. + * @return current UTF16 character + */ + public int current(){ + if (currentIndex < replaceable.length()) { + return replaceable.charAt(currentIndex); + } + return DONE; + } + + /** + * Returns the length of the text + * @return length of the text + */ + public int getLength(){ + return replaceable.length(); + } + + /** + * Gets the current currentIndex in text. + * @return current currentIndex in text. + */ + public int getIndex(){ + return currentIndex; + } + + /** + * Returns next UTF16 character and increments the iterator's currentIndex by 1. + * If the resulting currentIndex is greater or equal to the text length, the + * currentIndex is reset to the text length and a value of DONECODEPOINT is + * returned. + * @return next UTF16 character in text or DONE if the new currentIndex is off the + * end of the text range. + */ + public int next(){ + if (currentIndex < replaceable.length()) { + return replaceable.charAt(currentIndex++); + } + return DONE; + } + + + /** + * Returns previous UTF16 character and decrements the iterator's currentIndex by + * 1. + * If the resulting currentIndex is less than 0, the currentIndex is reset to 0 and a + * value of DONECODEPOINT is returned. + * @return next UTF16 character in text or DONE if the new currentIndex is off the + * start of the text range. + */ + public int previous(){ + if (currentIndex > 0) { + return replaceable.charAt(--currentIndex); + } + return DONE; + } + + /** + * Sets the currentIndex to the specified currentIndex in the text and returns that + * single UTF16 character at currentIndex. + * This assumes the text is stored as 16-bit code units. + * @param currentIndex the currentIndex within the text. + * @exception IllegalArgumentException is thrown if an invalid currentIndex is + * supplied. i.e. currentIndex is out of bounds. + */ + public void setIndex(int currentIndex) { + if (currentIndex < 0 || currentIndex > replaceable.length()) { + throw new IllegalArgumentException(); + } + this.currentIndex = currentIndex; + } + + public int getText(char[] fillIn, int offset){ + int length = replaceable.length(); + if(offset < 0 || offset + length > fillIn.length){ + throw new IndexOutOfBoundsException(Integer.toString(length)); + } + replaceable.getChars(0,length,fillIn,offset); + return length; + } + + // private data members ---------------------------------------------------- + + /** + * Replaceable object + */ + private Replaceable replaceable; + /** + * Current currentIndex + */ + private int currentIndex; + +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/java.base/share/classes/jdk/internal/icu/impl/StringPrepDataReader.java Tue Jan 14 15:23:01 2020 -0800 @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +/* +/* + ****************************************************************************** + * Copyright (C) 2003, International Business Machines Corporation and * + * others. All Rights Reserved. * + ****************************************************************************** + * + * Created on May 2, 2003 + * + * To change the template for this generated file go to + * Window>Preferences>Java>Code Generation>Code and Comments + */ +// CHANGELOG +// 2005-05-19 Edward Wang +// - copy this file from icu4jsrc_3_2/src/com/ibm/icu/impl/StringPrepDataReader.java +// - move from package com.ibm.icu.impl to package sun.net.idn +// +package jdk.internal.icu.impl; + +import java.io.DataInputStream; +import java.io.IOException; +import java.io.InputStream; + +import jdk.internal.icu.impl.ICUBinary; + + +/** + * @author ram + * + * To change the template for this generated type comment go to + * Window>Preferences>Java>Code Generation>Code and Comments + */ +public final class StringPrepDataReader implements ICUBinary.Authenticate { + + /** + * <p>private constructor.</p> + * @param inputStream ICU uprop.dat file input stream + * @exception IOException throw if data file fails authentication + * @draft 2.1 + */ + public StringPrepDataReader(InputStream inputStream) + throws IOException{ + + unicodeVersion = ICUBinary.readHeader(inputStream, DATA_FORMAT_ID, this); + + + dataInputStream = new DataInputStream(inputStream); + + } + + public void read(byte[] idnaBytes, + char[] mappingTable) + throws IOException{ + + //Read the bytes that make up the idnaTrie + dataInputStream.read(idnaBytes); + + //Read the extra data + for(int i=0;i<mappingTable.length;i++){ + mappingTable[i]=dataInputStream.readChar(); + } + } + + public byte[] getDataFormatVersion(){ + return DATA_FORMAT_VERSION; + } + + public boolean isDataVersionAcceptable(byte version[]){ + return version[0] == DATA_FORMAT_VERSION[0] + && version[2] == DATA_FORMAT_VERSION[2] + && version[3] == DATA_FORMAT_VERSION[3]; + } + public int[] readIndexes(int length)throws IOException{ + int[] indexes = new int[length]; + //Read the indexes + for (int i = 0; i <length ; i++) { + indexes[i] = dataInputStream.readInt(); + } + return indexes; + } + + public byte[] getUnicodeVersion(){ + return unicodeVersion; + } + // private data members ------------------------------------------------- + + + /** + * ICU data file input stream + */ + private DataInputStream dataInputStream; + private byte[] unicodeVersion; + /** + * File format version that this class understands. + * No guarantees are made if a older version is used + * see store.c of gennorm for more information and values + */ + ///* dataFormat="SPRP" 0x53, 0x50, 0x52, 0x50 */ + private static final byte DATA_FORMAT_ID[] = {(byte)0x53, (byte)0x50, + (byte)0x52, (byte)0x50}; + private static final byte DATA_FORMAT_VERSION[] = {(byte)0x3, (byte)0x2, + (byte)0x5, (byte)0x2}; + +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/java.base/share/classes/jdk/internal/icu/impl/Trie.java Tue Jan 14 15:23:01 2020 -0800 @@ -0,0 +1,367 @@ +/* + * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + ****************************************************************************** + * Copyright (C) 1996-2014, International Business Machines Corporation and + * others. All Rights Reserved. + ****************************************************************************** + */ + +package jdk.internal.icu.impl; + +import jdk.internal.icu.lang.UCharacter; +import jdk.internal.icu.text.UTF16; + +import java.io.DataInputStream; +import java.io.InputStream; +import java.io.IOException; + +/** + * <p>A trie is a kind of compressed, serializable table of values + * associated with Unicode code points (0..0x10ffff).</p> + * <p>This class defines the basic structure of a trie and provides methods + * to <b>retrieve the offsets to the actual data</b>.</p> + * <p>Data will be the form of an array of basic types, char or int.</p> + * <p>The actual data format will have to be specified by the user in the + * inner static interface com.ibm.icu.impl.Trie.DataManipulate.</p> + * <p>This trie implementation is optimized for getting offset while walking + * forward through a UTF-16 string. + * Therefore, the simplest and fastest access macros are the + * fromLead() and fromOffsetTrail() methods. + * The fromBMP() method are a little more complicated; they get offsets even + * for lead surrogate codepoints, while the fromLead() method get special + * "folded" offsets for lead surrogate code units if there is relevant data + * associated with them. + * From such a folded offsets, an offset needs to be extracted to supply + * to the fromOffsetTrail() methods. + * To handle such supplementary codepoints, some offset information are kept + * in the data.</p> + * <p>Methods in com.ibm.icu.impl.Trie.DataManipulate are called to retrieve + * that offset from the folded value for the lead surrogate unit.</p> + * <p>For examples of use, see com.ibm.icu.impl.CharTrie or + * com.ibm.icu.impl.IntTrie.</p> + * @author synwee + * @see com.ibm.icu.impl.CharTrie + * @see com.ibm.icu.impl.IntTrie + * @since release 2.1, Jan 01 2002 + */ +public abstract class Trie +{ + // public class declaration ---------------------------------------- + + /** + * Character data in com.ibm.impl.Trie have different user-specified format + * for different purposes. + * This interface specifies methods to be implemented in order for + * com.ibm.impl.Trie, to surrogate offset information encapsulated within + * the data. + */ + public static interface DataManipulate + { + /** + * Called by com.ibm.icu.impl.Trie to extract from a lead surrogate's + * data + * the index array offset of the indexes for that lead surrogate. + * @param value data value for a surrogate from the trie, including the + * folding offset + * @return data offset or 0 if there is no data for the lead surrogate + */ + public int getFoldingOffset(int value); + } + + // default implementation + private static class DefaultGetFoldingOffset implements DataManipulate { + public int getFoldingOffset(int value) { + return value; + } + } + + // protected constructor ------------------------------------------- + + /** + * Trie constructor for CharTrie use. + * @param inputStream ICU data file input stream which contains the + * trie + * @param dataManipulate object containing the information to parse the + * trie data + * @throws IOException thrown when input stream does not have the + * right header. + */ + protected Trie(InputStream inputStream, + DataManipulate dataManipulate) throws IOException + { + DataInputStream input = new DataInputStream(inputStream); + // Magic number to authenticate the data. + int signature = input.readInt(); + m_options_ = input.readInt(); + + if (!checkHeader(signature)) { + throw new IllegalArgumentException("ICU data file error: Trie header authentication failed, please check if you have the most updated ICU data file"); + } + + if(dataManipulate != null) { + m_dataManipulate_ = dataManipulate; + } else { + m_dataManipulate_ = new DefaultGetFoldingOffset(); + } + m_isLatin1Linear_ = (m_options_ & + HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_) != 0; + m_dataOffset_ = input.readInt(); + m_dataLength_ = input.readInt(); + unserialize(inputStream); + } + + // protected data members ------------------------------------------ + + /** + * Lead surrogate code points' index displacement in the index array. + * <pre>{@code + * 0x10000-0xd800=0x2800 + * 0x2800 >> INDEX_STAGE_1_SHIFT_ + * }</pre> + */ + protected static final int LEAD_INDEX_OFFSET_ = 0x2800 >> 5; + /** + * Shift size for shifting right the input index. 1..9 + */ + protected static final int INDEX_STAGE_1_SHIFT_ = 5; + /** + * Shift size for shifting left the index array values. + * Increases possible data size with 16-bit index values at the cost + * of compactability. + * This requires blocks of stage 2 data to be aligned by + * DATA_GRANULARITY. + * 0..INDEX_STAGE_1_SHIFT + */ + protected static final int INDEX_STAGE_2_SHIFT_ = 2; + /** + * Number of data values in a stage 2 (data array) block. + */ + protected static final int DATA_BLOCK_LENGTH=1<<INDEX_STAGE_1_SHIFT_; + /** + * Mask for getting the lower bits from the input index. + * DATA_BLOCK_LENGTH - 1. + */ + protected static final int INDEX_STAGE_3_MASK_ = DATA_BLOCK_LENGTH - 1; + /** + * Surrogate mask to use when shifting offset to retrieve supplementary + * values + */ + protected static final int SURROGATE_MASK_ = 0x3FF; + /** + * Index or UTF16 characters + */ + protected char m_index_[]; + /** + * Internal TrieValue which handles the parsing of the data value. + * This class is to be implemented by the user + */ + protected DataManipulate m_dataManipulate_; + /** + * Start index of the data portion of the trie. CharTrie combines + * index and data into a char array, so this is used to indicate the + * initial offset to the data portion. + * Note this index always points to the initial value. + */ + protected int m_dataOffset_; + /** + * Length of the data array + */ + protected int m_dataLength_; + + // protected methods ----------------------------------------------- + + /** + * Gets the offset to the data which the surrogate pair points to. + * @param lead lead surrogate + * @param trail trailing surrogate + * @return offset to data + */ + protected abstract int getSurrogateOffset(char lead, char trail); + + /** + * Gets the offset to the data which the index ch after variable offset + * points to. + * Note for locating a non-supplementary character data offset, calling + * <p> + * getRawOffset(0, ch); + * </p> + * will do. Otherwise if it is a supplementary character formed by + * surrogates lead and trail. Then we would have to call getRawOffset() + * with getFoldingIndexOffset(). See getSurrogateOffset(). + * @param offset index offset which ch is to start from + * @param ch index to be used after offset + * @return offset to the data + */ + protected final int getRawOffset(int offset, char ch) + { + return (m_index_[offset + (ch >> INDEX_STAGE_1_SHIFT_)] + << INDEX_STAGE_2_SHIFT_) + + (ch & INDEX_STAGE_3_MASK_); + } + + /** + * Gets the offset to data which the BMP character points to + * Treats a lead surrogate as a normal code point. + * @param ch BMP character + * @return offset to data + */ + protected final int getBMPOffset(char ch) + { + return (ch >= UTF16.LEAD_SURROGATE_MIN_VALUE + && ch <= UTF16.LEAD_SURROGATE_MAX_VALUE) + ? getRawOffset(LEAD_INDEX_OFFSET_, ch) + : getRawOffset(0, ch); + // using a getRawOffset(ch) makes no diff + } + + /** + * Gets the offset to the data which this lead surrogate character points + * to. + * Data at the returned offset may contain folding offset information for + * the next trailing surrogate character. + * @param ch lead surrogate character + * @return offset to data + */ + protected final int getLeadOffset(char ch) + { + return getRawOffset(0, ch); + } + + /** + * Internal trie getter from a code point. + * Could be faster(?) but longer with + * {@code if((c32)<=0xd7ff) { (result)=_TRIE_GET_RAW(trie, data, 0, c32); }} + * Gets the offset to data which the codepoint points to + * @param ch codepoint + * @return offset to data + */ + protected final int getCodePointOffset(int ch) + { + // if ((ch >> 16) == 0) slower + if (ch < 0) { + return -1; + } else if (ch < UTF16.LEAD_SURROGATE_MIN_VALUE) { + // fastpath for the part of the BMP below surrogates (D800) where getRawOffset() works + return getRawOffset(0, (char)ch); + } else if (ch < UTF16.SUPPLEMENTARY_MIN_VALUE) { + // BMP codepoint + return getBMPOffset((char)ch); + } else if (ch <= UCharacter.MAX_VALUE) { + // look at the construction of supplementary characters + // trail forms the ends of it. + return getSurrogateOffset(UTF16.getLeadSurrogate(ch), + (char)(ch & SURROGATE_MASK_)); + } else { + // return -1 if there is an error, in this case we return + return -1; + } + } + + /** + * <p>Parses the inputstream and creates the trie index with it.</p> + * <p>This is overwritten by the child classes. + * @param inputStream input stream containing the trie information + * @exception IOException thrown when data reading fails. + */ + protected void unserialize(InputStream inputStream) throws IOException + { + //indexLength is a multiple of 1024 >> INDEX_STAGE_2_SHIFT_ + m_index_ = new char[m_dataOffset_]; + DataInputStream input = new DataInputStream(inputStream); + for (int i = 0; i < m_dataOffset_; i ++) { + m_index_[i] = input.readChar(); + } + } + + /** + * Determines if this is a 16 bit trie + * @return true if this is a 16 bit trie + */ + protected final boolean isCharTrie() + { + return (m_options_ & HEADER_OPTIONS_DATA_IS_32_BIT_) == 0; + } + + // private data members -------------------------------------------- + + /** + * Latin 1 option mask + */ + protected static final int HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_ = 0x200; + /** + * Constant number to authenticate the byte block + */ + protected static final int HEADER_SIGNATURE_ = 0x54726965; + /** + * Header option formatting + */ + private static final int HEADER_OPTIONS_SHIFT_MASK_ = 0xF; + protected static final int HEADER_OPTIONS_INDEX_SHIFT_ = 4; + protected static final int HEADER_OPTIONS_DATA_IS_32_BIT_ = 0x100; + + /** + * Flag indicator for Latin quick access data block + */ + private boolean m_isLatin1Linear_; + + /** + * <p>Trie options field.</p> + * <p>options bit field:<br> + * 9 1 = Latin-1 data is stored linearly at data + DATA_BLOCK_LENGTH<br> + * 8 0 = 16-bit data, 1=32-bit data<br> + * 7..4 INDEX_STAGE_1_SHIFT // 0..INDEX_STAGE_2_SHIFT<br> + * 3..0 INDEX_STAGE_2_SHIFT // 1..9<br> + */ + private int m_options_; + + // private methods --------------------------------------------------- + + /** + * Authenticates raw data header. + * Checking the header information, signature and options. + * @param signature This contains the options and type of a Trie + * @return true if the header is authenticated valid + */ + private final boolean checkHeader(int signature)