changeset 57620:f1007d3e1907

8174270: Consolidate ICU sources in one location Reviewed-by: srl, joehw
author naoto
date Mon, 13 Jan 2020 08:05:59 -0800
parents 9c8384d60dd0
children 5f4d6c32a45c
files src/java.base/share/classes/java/net/IDN.java src/java.base/share/classes/java/text/Bidi.java src/java.base/share/classes/java/text/CollationElementIterator.java src/java.base/share/classes/java/text/Normalizer.java src/java.base/share/classes/java/text/RBTableBuilder.java src/java.base/share/classes/jdk/internal/icu/impl/BMPSet.java src/java.base/share/classes/jdk/internal/icu/impl/CharTrie.java src/java.base/share/classes/jdk/internal/icu/impl/CharacterIteratorWrapper.java src/java.base/share/classes/jdk/internal/icu/impl/ICUBinary.java src/java.base/share/classes/jdk/internal/icu/impl/Norm2AllModes.java src/java.base/share/classes/jdk/internal/icu/impl/NormalizerImpl.java src/java.base/share/classes/jdk/internal/icu/impl/Punycode.java src/java.base/share/classes/jdk/internal/icu/impl/ReplaceableUCharacterIterator.java src/java.base/share/classes/jdk/internal/icu/impl/StringPrepDataReader.java src/java.base/share/classes/jdk/internal/icu/impl/Trie.java src/java.base/share/classes/jdk/internal/icu/impl/Trie2.java src/java.base/share/classes/jdk/internal/icu/impl/Trie2_16.java src/java.base/share/classes/jdk/internal/icu/impl/UBiDiProps.java src/java.base/share/classes/jdk/internal/icu/impl/UCharacterProperty.java src/java.base/share/classes/jdk/internal/icu/impl/UnicodeSetStringSpan.java src/java.base/share/classes/jdk/internal/icu/impl/Utility.java src/java.base/share/classes/jdk/internal/icu/impl/data/icudt64b/nfc.nrm src/java.base/share/classes/jdk/internal/icu/impl/data/icudt64b/nfkc.nrm src/java.base/share/classes/jdk/internal/icu/impl/data/icudt64b/ubidi.icu src/java.base/share/classes/jdk/internal/icu/impl/data/icudt64b/uprops.icu src/java.base/share/classes/jdk/internal/icu/lang/UCharacter.java src/java.base/share/classes/jdk/internal/icu/lang/UCharacterDirection.java src/java.base/share/classes/jdk/internal/icu/lang/UCharacterEnums.java src/java.base/share/classes/jdk/internal/icu/text/BidiBase.java src/java.base/share/classes/jdk/internal/icu/text/BidiLine.java src/java.base/share/classes/jdk/internal/icu/text/BidiRun.java src/java.base/share/classes/jdk/internal/icu/text/BidiWriter.java src/java.base/share/classes/jdk/internal/icu/text/FilteredNormalizer2.java src/java.base/share/classes/jdk/internal/icu/text/Normalizer2.java src/java.base/share/classes/jdk/internal/icu/text/NormalizerBase.java src/java.base/share/classes/jdk/internal/icu/text/Replaceable.java src/java.base/share/classes/jdk/internal/icu/text/ReplaceableString.java src/java.base/share/classes/jdk/internal/icu/text/StringPrep.java src/java.base/share/classes/jdk/internal/icu/text/UCharacterIterator.java src/java.base/share/classes/jdk/internal/icu/text/UTF16.java src/java.base/share/classes/jdk/internal/icu/text/UnicodeSet.java src/java.base/share/classes/jdk/internal/icu/util/CodePointMap.java src/java.base/share/classes/jdk/internal/icu/util/CodePointTrie.java src/java.base/share/classes/jdk/internal/icu/util/OutputInt.java src/java.base/share/classes/jdk/internal/icu/util/VersionInfo.java src/java.base/share/classes/sun/net/idn/Punycode.java src/java.base/share/classes/sun/net/idn/StringPrep.java src/java.base/share/classes/sun/net/idn/StringPrepDataReader.java src/java.base/share/classes/sun/net/idn/UCharacterDirection.java src/java.base/share/classes/sun/net/idn/UCharacterEnums.java src/java.base/share/classes/sun/text/CollatorUtilities.java src/java.base/share/classes/sun/text/ComposedCharIter.java src/java.base/share/classes/sun/text/Normalizer.java src/java.base/share/classes/sun/text/bidi/BidiBase.java src/java.base/share/classes/sun/text/bidi/BidiLine.java src/java.base/share/classes/sun/text/bidi/BidiRun.java src/java.base/share/classes/sun/text/bidi/BidiWriter.java src/java.base/share/classes/sun/text/normalizer/BMPSet.java src/java.base/share/classes/sun/text/normalizer/CharTrie.java src/java.base/share/classes/sun/text/normalizer/CharacterIteratorWrapper.java src/java.base/share/classes/sun/text/normalizer/CodePointMap.java src/java.base/share/classes/sun/text/normalizer/CodePointTrie.java src/java.base/share/classes/sun/text/normalizer/FilteredNormalizer2.java src/java.base/share/classes/sun/text/normalizer/ICUBinary.java src/java.base/share/classes/sun/text/normalizer/Norm2AllModes.java src/java.base/share/classes/sun/text/normalizer/Normalizer2.java src/java.base/share/classes/sun/text/normalizer/NormalizerBase.java src/java.base/share/classes/sun/text/normalizer/NormalizerImpl.java src/java.base/share/classes/sun/text/normalizer/OutputInt.java src/java.base/share/classes/sun/text/normalizer/Replaceable.java src/java.base/share/classes/sun/text/normalizer/ReplaceableString.java src/java.base/share/classes/sun/text/normalizer/ReplaceableUCharacterIterator.java src/java.base/share/classes/sun/text/normalizer/Trie.java src/java.base/share/classes/sun/text/normalizer/Trie2.java src/java.base/share/classes/sun/text/normalizer/Trie2_16.java src/java.base/share/classes/sun/text/normalizer/UBiDiProps.java src/java.base/share/classes/sun/text/normalizer/UCharacter.java src/java.base/share/classes/sun/text/normalizer/UCharacterIterator.java src/java.base/share/classes/sun/text/normalizer/UCharacterProperty.java src/java.base/share/classes/sun/text/normalizer/UTF16.java src/java.base/share/classes/sun/text/normalizer/UnicodeSet.java src/java.base/share/classes/sun/text/normalizer/UnicodeSetStringSpan.java src/java.base/share/classes/sun/text/normalizer/Utility.java src/java.base/share/classes/sun/text/normalizer/VersionInfo.java src/java.base/share/classes/sun/text/resources/nfc.nrm src/java.base/share/classes/sun/text/resources/nfkc.nrm src/java.base/share/classes/sun/text/resources/ubidi.icu src/java.base/share/classes/sun/text/resources/uprops.icu test/jdk/java/text/Bidi/Bug6850113.java test/jdk/java/text/Bidi/Bug7051769.java test/jdk/java/text/Normalizer/ConformanceTest.java test/jdk/java/text/Normalizer/ICUBasicTest.java test/jdk/java/text/Normalizer/NormalizerAPITest.java test/jdk/java/text/Normalizer/ThreadSafeTest.java test/jdk/sun/net/idn/NFS4StringPrep.java test/jdk/sun/net/idn/PunycodeTest.java test/jdk/sun/net/idn/TestStringPrep.java
diffstat 97 files changed, 21966 insertions(+), 21893 deletions(-) [+]
line wrap: on
line diff
--- a/src/java.base/share/classes/java/net/IDN.java	Mon Jan 13 16:56:21 2020 +0100
+++ b/src/java.base/share/classes/java/net/IDN.java	Mon Jan 13 08:05:59 2020 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -29,9 +29,9 @@
 import java.security.AccessController;
 import java.security.PrivilegedAction;
 
-import sun.net.idn.StringPrep;
-import sun.net.idn.Punycode;
-import sun.text.normalizer.UCharacterIterator;
+import jdk.internal.icu.impl.Punycode;
+import jdk.internal.icu.text.StringPrep;
+import jdk.internal.icu.text.UCharacterIterator;
 
 /**
  * Provides methods to convert internationalized domain names (IDNs) between
@@ -226,7 +226,7 @@
         InputStream stream = null;
 
         try {
-            final String IDN_PROFILE = "uidna.spp";
+            final String IDN_PROFILE = "/sun/net/idn/uidna.spp";
             if (System.getSecurityManager() != null) {
                 stream = AccessController.doPrivileged(new PrivilegedAction<>() {
                     public InputStream run() {
--- a/src/java.base/share/classes/java/text/Bidi.java	Mon Jan 13 16:56:21 2020 +0100
+++ b/src/java.base/share/classes/java/text/Bidi.java	Mon Jan 13 08:05:59 2020 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -35,7 +35,7 @@
 
 package java.text;
 
-import sun.text.bidi.BidiBase;
+import jdk.internal.icu.text.BidiBase;
 
 /**
  * This class implements the Unicode Bidirectional Algorithm.
--- a/src/java.base/share/classes/java/text/CollationElementIterator.java	Mon Jan 13 16:56:21 2020 +0100
+++ b/src/java.base/share/classes/java/text/CollationElementIterator.java	Mon Jan 13 08:05:59 2020 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1996, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1996, 2020, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -41,7 +41,7 @@
 import java.lang.Character;
 import java.util.Vector;
 import sun.text.CollatorUtilities;
-import sun.text.normalizer.NormalizerBase;
+import jdk.internal.icu.text.NormalizerBase;
 
 /**
  * The {@code CollationElementIterator} class is used as an iterator
--- a/src/java.base/share/classes/java/text/Normalizer.java	Mon Jan 13 16:56:21 2020 +0100
+++ b/src/java.base/share/classes/java/text/Normalizer.java	Mon Jan 13 08:05:59 2020 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -37,7 +37,7 @@
 
 package java.text;
 
-import sun.text.normalizer.NormalizerBase;
+import jdk.internal.icu.text.NormalizerBase;
 
 /**
  * This class provides the method {@code normalize} which transforms Unicode
--- a/src/java.base/share/classes/java/text/RBTableBuilder.java	Mon Jan 13 16:56:21 2020 +0100
+++ b/src/java.base/share/classes/java/text/RBTableBuilder.java	Mon Jan 13 08:05:59 2020 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -42,8 +42,7 @@
 import sun.text.UCompactIntArray;
 import sun.text.IntHashtable;
 import sun.text.ComposedCharIter;
-import sun.text.CollatorUtilities;
-import sun.text.normalizer.NormalizerImpl;
+import jdk.internal.icu.impl.NormalizerImpl;
 
 /**
  * This class contains all the code to parse a RuleBasedCollator pattern
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/java.base/share/classes/jdk/internal/icu/impl/BMPSet.java	Mon Jan 13 08:05:59 2020 -0800
@@ -0,0 +1,527 @@
+/*
+ * Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ ******************************************************************************
+ *
+ *   Copyright (C) 2009-2014, International Business Machines
+ *   Corporation and others.  All Rights Reserved.
+ *
+ ******************************************************************************
+ */
+
+package jdk.internal.icu.impl;
+
+import jdk.internal.icu.text.UnicodeSet.SpanCondition;
+import jdk.internal.icu.util.OutputInt;
+
+/**
+ * Helper class for frozen UnicodeSets, implements contains() and span() optimized for BMP code points.
+ *
+ * Latin-1: Look up bytes.
+ * 2-byte characters: Bits organized vertically.
+ * 3-byte characters: Use zero/one/mixed data per 64-block in U+0000..U+FFFF, with mixed for illegal ranges.
+ * Supplementary characters: Call contains() on the parent set.
+ */
+public final class BMPSet {
+
+    /**
+     * One boolean ('true' or 'false') per Latin-1 character.
+     */
+    private boolean[] latin1Contains;
+
+    /**
+     * One bit per code point from U+0000..U+07FF. The bits are organized vertically; consecutive code points
+     * correspond to the same bit positions in consecutive table words. With code point parts lead=c{10..6}
+     * trail=c{5..0} it is set.contains(c)==(table7FF[trail] bit lead)
+     *
+     * Bits for 0..7F (non-shortest forms) are set to the result of contains(FFFD) for faster validity checking at
+     * runtime.
+     */
+    private int[] table7FF;
+
+    /**
+     * One bit per 64 BMP code points. The bits are organized vertically; consecutive 64-code point blocks
+     * correspond to the same bit position in consecutive table words. With code point parts lead=c{15..12}
+     * t1=c{11..6} test bits (lead+16) and lead in bmpBlockBits[t1]. If the upper bit is 0, then the lower bit
+     * indicates if contains(c) for all code points in the 64-block. If the upper bit is 1, then the block is mixed
+     * and set.contains(c) must be called.
+     *
+     * Bits for 0..7FF (non-shortest forms) and D800..DFFF are set to the result of contains(FFFD) for faster
+     * validity checking at runtime.
+     */
+    private int[] bmpBlockBits;
+
+    /**
+     * Inversion list indexes for restricted binary searches in findCodePoint(), from findCodePoint(U+0800, U+1000,
+     * U+2000, .., U+F000, U+10000). U+0800 is the first 3-byte-UTF-8 code point. Code points below U+0800 are
+     * always looked up in the bit tables. The last pair of indexes is for finding supplementary code points.
+     */
+    private int[] list4kStarts;
+
+    /**
+     * The inversion list of the parent set, for the slower contains() implementation for mixed BMP blocks and for
+     * supplementary code points. The list is terminated with list[listLength-1]=0x110000.
+     */
+    private final int[] list;
+    private final int listLength; // length used; list may be longer to minimize reallocs
+
+    public BMPSet(final int[] parentList, int parentListLength) {
+        list = parentList;
+        listLength = parentListLength;
+        latin1Contains = new boolean[0x100];
+        table7FF = new int[64];
+        bmpBlockBits = new int[64];
+        list4kStarts = new int[18];
+
+        /*
+         * Set the list indexes for binary searches for U+0800, U+1000, U+2000, .., U+F000, U+10000. U+0800 is the
+         * first 3-byte-UTF-8 code point. Lower code points are looked up in the bit tables. The last pair of
+         * indexes is for finding supplementary code points.
+         */
+        list4kStarts[0] = findCodePoint(0x800, 0, listLength - 1);
+        int i;
+        for (i = 1; i <= 0x10; ++i) {
+            list4kStarts[i] = findCodePoint(i << 12, list4kStarts[i - 1], listLength - 1);
+        }
+        list4kStarts[0x11] = listLength - 1;
+
+        initBits();
+    }
+
+    public boolean contains(int c) {
+        if (c <= 0xff) {
+            return (latin1Contains[c]);
+        } else if (c <= 0x7ff) {
+            return ((table7FF[c & 0x3f] & (1 << (c >> 6))) != 0);
+        } else if (c < 0xd800 || (c >= 0xe000 && c <= 0xffff)) {
+            int lead = c >> 12;
+            int twoBits = (bmpBlockBits[(c >> 6) & 0x3f] >> lead) & 0x10001;
+            if (twoBits <= 1) {
+                // All 64 code points with the same bits 15..6
+                // are either in the set or not.
+                return (0 != twoBits);
+            } else {
+                // Look up the code point in its 4k block of code points.
+                return containsSlow(c, list4kStarts[lead], list4kStarts[lead + 1]);
+            }
+        } else if (c <= 0x10ffff) {
+            // surrogate or supplementary code point
+            return containsSlow(c, list4kStarts[0xd], list4kStarts[0x11]);
+        } else {
+            // Out-of-range code points get false, consistent with long-standing
+            // behavior of UnicodeSet.contains(c).
+            return false;
+        }
+    }
+
+    /**
+     * Span the initial substring for which each character c has spanCondition==contains(c). It must be
+     * spanCondition==0 or 1.
+     *
+     * @param start The start index
+     * @param outCount If not null: Receives the number of code points in the span.
+     * @return the limit (exclusive end) of the span
+     *
+     * NOTE: to reduce the overhead of function call to contains(c), it is manually inlined here. Check for
+     * sufficient length for trail unit for each surrogate pair. Handle single surrogates as surrogate code points
+     * as usual in ICU.
+     */
+    public final int span(CharSequence s, int start, SpanCondition spanCondition,
+            OutputInt outCount) {
+        char c, c2;
+        int i = start;
+        int limit = s.length();
+        int numSupplementary = 0;
+        if (SpanCondition.NOT_CONTAINED != spanCondition) {
+            // span
+            while (i < limit) {
+                c = s.charAt(i);
+                if (c <= 0xff) {
+                    if (!latin1Contains[c]) {
+                        break;
+                    }
+                } else if (c <= 0x7ff) {
+                    if ((table7FF[c & 0x3f] & (1 << (c >> 6))) == 0) {
+                        break;
+                    }
+                } else if (c < 0xd800 ||
+                           c >= 0xdc00 || (i + 1) == limit || (c2 = s.charAt(i + 1)) < 0xdc00 || c2 >= 0xe000) {
+                    int lead = c >> 12;
+                    int twoBits = (bmpBlockBits[(c >> 6) & 0x3f] >> lead) & 0x10001;
+                    if (twoBits <= 1) {
+                        // All 64 code points with the same bits 15..6
+                        // are either in the set or not.
+                        if (twoBits == 0) {
+                            break;
+                        }
+                    } else {
+                        // Look up the code point in its 4k block of code points.
+                        if (!containsSlow(c, list4kStarts[lead], list4kStarts[lead + 1])) {
+                            break;
+                        }
+                    }
+                } else {
+                    // surrogate pair
+                    int supplementary = UCharacterProperty.getRawSupplementary(c, c2);
+                    if (!containsSlow(supplementary, list4kStarts[0x10], list4kStarts[0x11])) {
+                        break;
+                    }
+                    ++numSupplementary;
+                    ++i;
+                }
+                ++i;
+            }
+        } else {
+            // span not
+            while (i < limit) {
+                c = s.charAt(i);
+                if (c <= 0xff) {
+                    if (latin1Contains[c]) {
+                        break;
+                    }
+                } else if (c <= 0x7ff) {
+                    if ((table7FF[c & 0x3f] & (1 << (c >> 6))) != 0) {
+                        break;
+                    }
+                } else if (c < 0xd800 ||
+                           c >= 0xdc00 || (i + 1) == limit || (c2 = s.charAt(i + 1)) < 0xdc00 || c2 >= 0xe000) {
+                    int lead = c >> 12;
+                    int twoBits = (bmpBlockBits[(c >> 6) & 0x3f] >> lead) & 0x10001;
+                    if (twoBits <= 1) {
+                        // All 64 code points with the same bits 15..6
+                        // are either in the set or not.
+                        if (twoBits != 0) {
+                            break;
+                        }
+                    } else {
+                        // Look up the code point in its 4k block of code points.
+                        if (containsSlow(c, list4kStarts[lead], list4kStarts[lead + 1])) {
+                            break;
+                        }
+                    }
+                } else {
+                    // surrogate pair
+                    int supplementary = UCharacterProperty.getRawSupplementary(c, c2);
+                    if (containsSlow(supplementary, list4kStarts[0x10], list4kStarts[0x11])) {
+                        break;
+                    }
+                    ++numSupplementary;
+                    ++i;
+                }
+                ++i;
+            }
+        }
+        if (outCount != null) {
+            int spanLength = i - start;
+            outCount.value = spanLength - numSupplementary;  // number of code points
+        }
+        return i;
+    }
+
+    /**
+     * Symmetrical with span().
+     * Span the trailing substring for which each character c has spanCondition==contains(c). It must be s.length >=
+     * limit and spanCondition==0 or 1.
+     *
+     * @return The string index which starts the span (i.e. inclusive).
+     */
+    public final int spanBack(CharSequence s, int limit, SpanCondition spanCondition) {
+        char c, c2;
+
+        if (SpanCondition.NOT_CONTAINED != spanCondition) {
+            // span
+            for (;;) {
+                c = s.charAt(--limit);
+                if (c <= 0xff) {
+                    if (!latin1Contains[c]) {
+                        break;
+                    }
+                } else if (c <= 0x7ff) {
+                    if ((table7FF[c & 0x3f] & (1 << (c >> 6))) == 0) {
+                        break;
+                    }
+                } else if (c < 0xd800 ||
+                           c < 0xdc00 || 0 == limit || (c2 = s.charAt(limit - 1)) < 0xd800 || c2 >= 0xdc00) {
+                    int lead = c >> 12;
+                    int twoBits = (bmpBlockBits[(c >> 6) & 0x3f] >> lead) & 0x10001;
+                    if (twoBits <= 1) {
+                        // All 64 code points with the same bits 15..6
+                        // are either in the set or not.
+                        if (twoBits == 0) {
+                            break;
+                        }
+                    } else {
+                        // Look up the code point in its 4k block of code points.
+                        if (!containsSlow(c, list4kStarts[lead], list4kStarts[lead + 1])) {
+                            break;
+                        }
+                    }
+                } else {
+                    // surrogate pair
+                    int supplementary = UCharacterProperty.getRawSupplementary(c2, c);
+                    if (!containsSlow(supplementary, list4kStarts[0x10], list4kStarts[0x11])) {
+                        break;
+                    }
+                    --limit;
+                }
+                if (0 == limit) {
+                    return 0;
+                }
+            }
+        } else {
+            // span not
+            for (;;) {
+                c = s.charAt(--limit);
+                if (c <= 0xff) {
+                    if (latin1Contains[c]) {
+                        break;
+                    }
+                } else if (c <= 0x7ff) {
+                    if ((table7FF[c & 0x3f] & (1 << (c >> 6))) != 0) {
+                        break;
+                    }
+                } else if (c < 0xd800 ||
+                           c < 0xdc00 || 0 == limit || (c2 = s.charAt(limit - 1)) < 0xd800 || c2 >= 0xdc00) {
+                    int lead = c >> 12;
+                    int twoBits = (bmpBlockBits[(c >> 6) & 0x3f] >> lead) & 0x10001;
+                    if (twoBits <= 1) {
+                        // All 64 code points with the same bits 15..6
+                        // are either in the set or not.
+                        if (twoBits != 0) {
+                            break;
+                        }
+                    } else {
+                        // Look up the code point in its 4k block of code points.
+                        if (containsSlow(c, list4kStarts[lead], list4kStarts[lead + 1])) {
+                            break;
+                        }
+                    }
+                } else {
+                    // surrogate pair
+                    int supplementary = UCharacterProperty.getRawSupplementary(c2, c);
+                    if (containsSlow(supplementary, list4kStarts[0x10], list4kStarts[0x11])) {
+                        break;
+                    }
+                    --limit;
+                }
+                if (0 == limit) {
+                    return 0;
+                }
+            }
+        }
+        return limit + 1;
+    }
+
+    /**
+     * Set bits in a bit rectangle in "vertical" bit organization. start<limit<=0x800
+     */
+    private static void set32x64Bits(int[] table, int start, int limit) {
+        assert (64 == table.length);
+        int lead = start >> 6;  // Named for UTF-8 2-byte lead byte with upper 5 bits.
+        int trail = start & 0x3f;  // Named for UTF-8 2-byte trail byte with lower 6 bits.
+
+        // Set one bit indicating an all-one block.
+        int bits = 1 << lead;
+        if ((start + 1) == limit) { // Single-character shortcut.
+            table[trail] |= bits;
+            return;
+        }
+
+        int limitLead = limit >> 6;
+        int limitTrail = limit & 0x3f;
+
+        if (lead == limitLead) {
+            // Partial vertical bit column.
+            while (trail < limitTrail) {
+                table[trail++] |= bits;
+            }
+        } else {
+            // Partial vertical bit column,
+            // followed by a bit rectangle,
+            // followed by another partial vertical bit column.
+            if (trail > 0) {
+                do {
+                    table[trail++] |= bits;
+                } while (trail < 64);
+                ++lead;
+            }
+            if (lead < limitLead) {
+                bits = ~((1 << lead) - 1);
+                if (limitLead < 0x20) {
+                    bits &= (1 << limitLead) - 1;
+                }
+                for (trail = 0; trail < 64; ++trail) {
+                    table[trail] |= bits;
+                }
+            }
+            // limit<=0x800. If limit==0x800 then limitLead=32 and limitTrail=0.
+            // In that case, bits=1<<limitLead == 1<<0 == 1
+            // (because Java << uses only the lower 5 bits of the shift operand)
+            // but the bits value is not used because trail<limitTrail is already false.
+            bits = 1 << limitLead;
+            for (trail = 0; trail < limitTrail; ++trail) {
+                table[trail] |= bits;
+            }
+        }
+    }
+
+    private void initBits() {
+        int start, limit;
+        int listIndex = 0;
+
+        // Set latin1Contains[].
+        do {
+            start = list[listIndex++];
+            if (listIndex < listLength) {
+                limit = list[listIndex++];
+            } else {
+                limit = 0x110000;
+            }
+            if (start >= 0x100) {
+                break;
+            }
+            do {
+                latin1Contains[start++] = true;
+            } while (start < limit && start < 0x100);
+        } while (limit <= 0x100);
+
+        // Set table7FF[].
+        while (start < 0x800) {
+            set32x64Bits(table7FF, start, limit <= 0x800 ? limit : 0x800);
+            if (limit > 0x800) {
+                start = 0x800;
+                break;
+            }
+
+            start = list[listIndex++];
+            if (listIndex < listLength) {
+                limit = list[listIndex++];
+            } else {
+                limit = 0x110000;
+            }
+        }
+
+        // Set bmpBlockBits[].
+        int minStart = 0x800;
+        while (start < 0x10000) {
+            if (limit > 0x10000) {
+                limit = 0x10000;
+            }
+
+            if (start < minStart) {
+                start = minStart;
+            }
+            if (start < limit) { // Else: Another range entirely in a known mixed-value block.
+                if (0 != (start & 0x3f)) {
+                    // Mixed-value block of 64 code points.
+                    start >>= 6;
+                    bmpBlockBits[start & 0x3f] |= 0x10001 << (start >> 6);
+                    start = (start + 1) << 6; // Round up to the next block boundary.
+                    minStart = start; // Ignore further ranges in this block.
+                }
+                if (start < limit) {
+                    if (start < (limit & ~0x3f)) {
+                        // Multiple all-ones blocks of 64 code points each.
+                        set32x64Bits(bmpBlockBits, start >> 6, limit >> 6);
+                    }
+
+                    if (0 != (limit & 0x3f)) {
+                        // Mixed-value block of 64 code points.
+                        limit >>= 6;
+                        bmpBlockBits[limit & 0x3f] |= 0x10001 << (limit >> 6);
+                      limit = (limit + 1) << 6; // Round up to the next block boundary.
+                        minStart = limit; // Ignore further ranges in this block.
+                    }
+                }
+            }
+
+            if (limit == 0x10000) {
+                break;
+          }
+
+            start = list[listIndex++];
+            if (listIndex < listLength) {
+                limit = list[listIndex++];
+            } else {
+                limit = 0x110000;
+            }
+        }
+    }
+
+    /**
+     * Same as UnicodeSet.findCodePoint(int c) except that the binary search is restricted for finding code
+     * points in a certain range.
+     *
+     * For restricting the search for finding in the range start..end, pass in lo=findCodePoint(start) and
+     * hi=findCodePoint(end) with 0<=lo<=hi<len. findCodePoint(c) defaults to lo=0 and hi=len-1.
+     *
+     * @param c
+     *            a character in a subrange of MIN_VALUE..MAX_VALUE
+     * @param lo
+     *            The lowest index to be returned.
+     * @param hi
+     *            The highest index to be returned.
+     * @return the smallest integer i in the range lo..hi, inclusive, such that c < list[i]
+     */
+    private int findCodePoint(int c, int lo, int hi) {
+        /* Examples:
+                                           findCodePoint(c)
+           set              list[]         c=0 1 3 4 7 8
+           ===              ==============   ===========
+           []               [110000]         0 0 0 0 0 0
+           [\u0000-\u0003]  [0, 4, 110000]   1 1 1 2 2 2
+           [\u0004-\u0007]  [4, 8, 110000]   0 0 0 1 1 2
+           [:Any:]          [0, 110000]      1 1 1 1 1 1
+         */
+
+        // Return the smallest i such that c < list[i]. Assume
+        // list[len - 1] == HIGH and that c is legal (0..HIGH-1).
+        if (c < list[lo])
+            return lo;
+        // High runner test. c is often after the last range, so an
+        // initial check for this condition pays off.
+        if (lo >= hi || c >= list[hi - 1])
+            return hi;
+        // invariant: c >= list[lo]
+        // invariant: c < list[hi]
+        for (;;) {
+            int i = (lo + hi) >>> 1;
+            if (i == lo) {
+                break; // Found!
+            } else if (c < list[i]) {
+                hi = i;
+            } else {
+                lo = i;
+            }
+        }
+        return hi;
+    }
+
+    private final boolean containsSlow(int c, int lo, int hi) {
+        return (0 != (findCodePoint(c, lo, hi) & 1));
+    }
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/java.base/share/classes/jdk/internal/icu/impl/CharTrie.java	Mon Jan 13 08:05:59 2020 -0800
@@ -0,0 +1,177 @@
+/*
+ * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ ******************************************************************************
+ * Copyright (C) 1996-2014, International Business Machines Corporation and
+ * others. All Rights Reserved.
+ ******************************************************************************
+ */
+
+package jdk.internal.icu.impl;
+
+import jdk.internal.icu.text.UTF16;
+
+import java.io.DataInputStream;
+import java.io.InputStream;
+import java.io.IOException;
+
+/**
+ * Trie implementation which stores data in char, 16 bits.
+ * @author synwee
+ * @see com.ibm.icu.impl.Trie
+ * @since release 2.1, Jan 01 2002
+ */
+
+ // note that i need to handle the block calculations later, since chartrie
+ // in icu4c uses the same index array.
+public class CharTrie extends Trie
+{
+    // public constructors ---------------------------------------------
+
+    /**
+    * <p>Creates a new Trie with the settings for the trie data.</p>
+    * <p>Unserialize the 32-bit-aligned input stream and use the data for the
+    * trie.</p>
+    * @param inputStream file input stream to a ICU data file, containing
+    *                    the trie
+    * @param dataManipulate object which provides methods to parse the char
+    *                        data
+    * @throws IOException thrown when data reading fails
+    * @draft 2.1
+    */
+    public CharTrie(InputStream inputStream,
+                    DataManipulate dataManipulate) throws IOException
+    {
+        super(inputStream, dataManipulate);
+
+        if (!isCharTrie()) {
+            throw new IllegalArgumentException(
+                               "Data given does not belong to a char trie.");
+        }
+    }
+
+    // public methods --------------------------------------------------
+
+    /**
+     * Gets the value associated with the codepoint.
+     * If no value is associated with the codepoint, a default value will be
+     * returned.
+     * @param ch codepoint
+     * @return offset to data
+     */
+    public final char getCodePointValue(int ch)
+    {
+        int offset;
+
+        // fastpath for U+0000..U+D7FF
+        if(0 <= ch && ch < UTF16.LEAD_SURROGATE_MIN_VALUE) {
+            // copy of getRawOffset()
+            offset = (m_index_[ch >> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_)
+                    + (ch & INDEX_STAGE_3_MASK_);
+            return m_data_[offset];
+        }
+
+        // handle U+D800..U+10FFFF
+        offset = getCodePointOffset(ch);
+
+        // return -1 if there is an error, in this case we return the default
+        // value: m_initialValue_
+        return (offset >= 0) ? m_data_[offset] : m_initialValue_;
+    }
+
+    /**
+    * Gets the value to the data which this lead surrogate character points
+    * to.
+    * Returned data may contain folding offset information for the next
+    * trailing surrogate character.
+    * This method does not guarantee correct results for trail surrogates.
+    * @param ch lead surrogate character
+    * @return data value
+    */
+    public final char getLeadValue(char ch)
+    {
+       return m_data_[getLeadOffset(ch)];
+    }
+
+    // protected methods -----------------------------------------------
+
+    /**
+    * <p>Parses the input stream and stores its trie content into a index and
+    * data array</p>
+    * @param inputStream data input stream containing trie data
+    * @exception IOException thrown when data reading fails
+    */
+    protected final void unserialize(InputStream inputStream)
+                                                throws IOException
+    {
+        DataInputStream input = new DataInputStream(inputStream);
+        int indexDataLength = m_dataOffset_ + m_dataLength_;
+        m_index_ = new char[indexDataLength];
+        for (int i = 0; i < indexDataLength; i ++) {
+            m_index_[i] = input.readChar();
+        }
+        m_data_           = m_index_;
+        m_initialValue_   = m_data_[m_dataOffset_];
+    }
+
+    /**
+    * Gets the offset to the data which the surrogate pair points to.
+    * @param lead lead surrogate
+    * @param trail trailing surrogate
+    * @return offset to data
+    * @draft 2.1
+    */
+    protected final int getSurrogateOffset(char lead, char trail)
+    {
+        if (m_dataManipulate_ == null) {
+            throw new NullPointerException(
+                             "The field DataManipulate in this Trie is null");
+        }
+
+        // get fold position for the next trail surrogate
+        int offset = m_dataManipulate_.getFoldingOffset(getLeadValue(lead));
+
+        // get the real data from the folded lead/trail units
+        if (offset > 0) {
+            return getRawOffset(offset, (char)(trail & SURROGATE_MASK_));
+        }
+
+        // return -1 if there is an error, in this case we return the default
+        // value: m_initialValue_
+        return -1;
+    }
+
+    // private data members --------------------------------------------
+
+    /**
+     * Default value
+     */
+    private char m_initialValue_;
+    /**
+     * Array of char data
+     */
+    private char m_data_[];
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/java.base/share/classes/jdk/internal/icu/impl/CharacterIteratorWrapper.java	Mon Jan 13 08:05:59 2020 -0800
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ *******************************************************************************
+ * (C) Copyright IBM Corp. 1996-2005 - All Rights Reserved                     *
+ *                                                                             *
+ * The original version of this source code and documentation is copyrighted   *
+ * and owned by IBM, These materials are provided under terms of a License     *
+ * Agreement between IBM and Sun. This technology is protected by multiple     *
+ * US and International patents. This notice and attribution to IBM may not    *
+ * to removed.                                                                 *
+ *******************************************************************************
+ */
+
+package jdk.internal.icu.impl;
+
+import java.text.CharacterIterator;
+
+import jdk.internal.icu.text.UCharacterIterator;
+
+/**
+ * This class is a wrapper around CharacterIterator and implements the
+ * UCharacterIterator protocol
+ * @author ram
+ */
+
+public class CharacterIteratorWrapper extends UCharacterIterator {
+
+    private CharacterIterator iterator;
+
+    public CharacterIteratorWrapper(CharacterIterator iter){
+        if(iter==null){
+            throw new IllegalArgumentException();
+        }
+        iterator     = iter;
+    }
+
+    /**
+     * @see UCharacterIterator#current()
+     */
+    public int current() {
+        int c = iterator.current();
+        if(c==CharacterIterator.DONE){
+          return DONE;
+        }
+        return c;
+    }
+
+    /**
+     * @see UCharacterIterator#getLength()
+     */
+    public int getLength() {
+        return (iterator.getEndIndex() - iterator.getBeginIndex());
+    }
+
+    /**
+     * @see UCharacterIterator#getIndex()
+     */
+    public int getIndex() {
+        return iterator.getIndex();
+    }
+
+    /**
+     * @see UCharacterIterator#next()
+     */
+    public int next() {
+        int i = iterator.current();
+        iterator.next();
+        if(i==CharacterIterator.DONE){
+          return DONE;
+        }
+        return i;
+    }
+
+    /**
+     * @see UCharacterIterator#previous()
+     */
+    public int previous() {
+        int i = iterator.previous();
+        if(i==CharacterIterator.DONE){
+            return DONE;
+        }
+        return i;
+    }
+
+    /**
+     * @see UCharacterIterator#setIndex(int)
+     */
+    public void setIndex(int index) {
+        iterator.setIndex(index);
+    }
+
+    /**
+     * @see UCharacterIterator#getText(char[])
+     */
+    public int getText(char[] fillIn, int offset){
+        int length =iterator.getEndIndex() - iterator.getBeginIndex();
+        int currentIndex = iterator.getIndex();
+        if(offset < 0 || offset + length > fillIn.length){
+            throw new IndexOutOfBoundsException(Integer.toString(length));
+        }
+
+        for (char ch = iterator.first(); ch != CharacterIterator.DONE; ch = iterator.next()) {
+            fillIn[offset++] = ch;
+        }
+        iterator.setIndex(currentIndex);
+
+        return length;
+    }
+
+    /**
+     * Creates a clone of this iterator.  Clones the underlying character iterator.
+     * @see UCharacterIterator#clone()
+     */
+    public Object clone(){
+        try {
+            CharacterIteratorWrapper result = (CharacterIteratorWrapper) super.clone();
+            result.iterator = (CharacterIterator)this.iterator.clone();
+            return result;
+        } catch (CloneNotSupportedException e) {
+            return null; // only invoked if bad underlying character iterator
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/java.base/share/classes/jdk/internal/icu/impl/ICUBinary.java	Mon Jan 13 08:05:59 2020 -0800
@@ -0,0 +1,324 @@
+/*
+ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ *******************************************************************************
+ * Copyright (C) 1996-2014, International Business Machines Corporation and
+ * others. All Rights Reserved.
+ *******************************************************************************
+ */
+
+package jdk.internal.icu.impl;
+
+import java.io.DataInputStream;
+import java.io.InputStream;
+import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.util.Arrays;
+import java.security.AccessController;
+import java.security.PrivilegedAction;
+
+import jdk.internal.icu.util.VersionInfo;
+
+public final class ICUBinary {
+
+    private static final class IsAcceptable implements Authenticate {
+        @Override
+        public boolean isDataVersionAcceptable(byte version[]) {
+            return version[0] == 1;
+        }
+    }
+
+    // public inner interface ------------------------------------------------
+
+    /**
+     * Special interface for data authentication
+     */
+    public static interface Authenticate
+    {
+        /**
+         * Method used in ICUBinary.readHeader() to provide data format
+         * authentication.
+         * @param version version of the current data
+         * @return true if dataformat is an acceptable version, false otherwise
+         */
+        public boolean isDataVersionAcceptable(byte version[]);
+    }
+
+    // public methods --------------------------------------------------------
+
+    /**
+     * Loads an ICU binary data file and returns it as a ByteBuffer.
+     * The buffer contents is normally read-only, but its position etc. can be modified.
+     *
+     * @param itemPath Relative ICU data item path, for example "root.res" or "coll/ucadata.icu".
+     * @return The data as a read-only ByteBuffer.
+     */
+    public static ByteBuffer getRequiredData(String itemPath) {
+        final Class<ICUBinary> root = ICUBinary.class;
+
+        try (InputStream is = AccessController.doPrivileged(new PrivilegedAction<InputStream>() {
+                public InputStream run() {
+                    return root.getResourceAsStream(itemPath);
+                }
+            })) {
+
+            // is.available() may return 0, or 1, or the total number of bytes in the stream,
+            // or some other number.
+            // Do not try to use is.available() == 0 to find the end of the stream!
+            byte[] bytes;
+            int avail = is.available();
+            if (avail > 32) {
+                // There are more bytes available than just the ICU data header length.
+                // With luck, it is the total number of bytes.
+                bytes = new byte[avail];
+            } else {
+                bytes = new byte[128];  // empty .res files are even smaller
+            }
+            // Call is.read(...) until one returns a negative value.
+            int length = 0;
+            for(;;) {
+                if (length < bytes.length) {
+                    int numRead = is.read(bytes, length, bytes.length - length);
+                    if (numRead < 0) {
+                        break;  // end of stream
+                    }
+                    length += numRead;
+                } else {
+                    // See if we are at the end of the stream before we grow the array.
+                    int nextByte = is.read();
+                    if (nextByte < 0) {
+                        break;
+                    }
+                    int capacity = 2 * bytes.length;
+                    if (capacity < 128) {
+                        capacity = 128;
+                    } else if (capacity < 0x4000) {
+                        capacity *= 2;  // Grow faster until we reach 16kB.
+                    }
+                    bytes = Arrays.copyOf(bytes, capacity);
+                    bytes[length++] = (byte) nextByte;
+                }
+           }
+            return ByteBuffer.wrap(bytes, 0, length);
+        }
+        catch (IOException e) {
+            throw new UncheckedIOException(e);
+        }
+    }
+
+    /**
+     * Same as readHeader(), but returns a VersionInfo rather than a compact int.
+     */
+    public static VersionInfo readHeaderAndDataVersion(ByteBuffer bytes,
+                                                             int dataFormat,
+                                                             Authenticate authenticate)
+                                                                throws IOException {
+        return getVersionInfoFromCompactInt(readHeader(bytes, dataFormat, authenticate));
+    }
+
+    private static final byte BIG_ENDIAN_ = 1;
+    public static final byte[] readHeader(InputStream inputStream,
+                                        byte dataFormatIDExpected[],
+                                        Authenticate authenticate)
+                                                          throws IOException
+    {
+        DataInputStream input = new DataInputStream(inputStream);
+        char headersize = input.readChar();
+        int readcount = 2;
+        //reading the header format
+        byte magic1 = input.readByte();
+        readcount ++;
+        byte magic2 = input.readByte();
+        readcount ++;
+        if (magic1 != MAGIC1 || magic2 != MAGIC2) {
+            throw new IOException(MAGIC_NUMBER_AUTHENTICATION_FAILED_);
+        }
+
+        input.readChar(); // reading size
+        readcount += 2;
+        input.readChar(); // reading reserved word
+        readcount += 2;
+        byte bigendian    = input.readByte();
+        readcount ++;
+        byte charset      = input.readByte();
+        readcount ++;
+        byte charsize     = input.readByte();
+        readcount ++;
+        input.readByte(); // reading reserved byte
+        readcount ++;
+
+        byte dataFormatID[] = new byte[4];
+        input.readFully(dataFormatID);
+        readcount += 4;
+        byte dataVersion[] = new byte[4];
+        input.readFully(dataVersion);
+        readcount += 4;
+        byte unicodeVersion[] = new byte[4];
+        input.readFully(unicodeVersion);
+        readcount += 4;
+        if (headersize < readcount) {
+            throw new IOException("Internal Error: Header size error");
+        }
+        input.skipBytes(headersize - readcount);
+
+        if (bigendian != BIG_ENDIAN_ || charset != CHAR_SET_
+            || charsize != CHAR_SIZE_
+            || !Arrays.equals(dataFormatIDExpected, dataFormatID)
+            || (authenticate != null
+                && !authenticate.isDataVersionAcceptable(dataVersion))) {
+            throw new IOException(HEADER_AUTHENTICATION_FAILED_);
+        }
+        return unicodeVersion;
+    }
+
+    /**
+     * Reads an ICU data header, checks the data format, and returns the data version.
+     *
+     * <p>Assumes that the ByteBuffer position is 0 on input.
+     * The buffer byte order is set according to the data.
+     * The buffer position is advanced past the header (including UDataInfo and comment).
+     *
+     * <p>See C++ ucmndata.h and unicode/udata.h.
+     *
+     * @return dataVersion
+     * @throws IOException if this is not a valid ICU data item of the expected dataFormat
+     */
+    public static int readHeader(ByteBuffer bytes, int dataFormat, Authenticate authenticate)
+            throws IOException {
+        assert bytes.position() == 0;
+        byte magic1 = bytes.get(2);
+        byte magic2 = bytes.get(3);
+        if (magic1 != MAGIC1 || magic2 != MAGIC2) {
+            throw new IOException(MAGIC_NUMBER_AUTHENTICATION_FAILED_);
+        }
+
+        byte isBigEndian = bytes.get(8);
+        byte charsetFamily = bytes.get(9);
+        byte sizeofUChar = bytes.get(10);
+        if (isBigEndian < 0 || 1 < isBigEndian ||
+                charsetFamily != CHAR_SET_ || sizeofUChar != CHAR_SIZE_) {
+            throw new IOException(HEADER_AUTHENTICATION_FAILED_);
+        }
+        bytes.order(isBigEndian != 0 ? ByteOrder.BIG_ENDIAN : ByteOrder.LITTLE_ENDIAN);
+
+        int headerSize = bytes.getChar(0);
+        int sizeofUDataInfo = bytes.getChar(4);
+        if (sizeofUDataInfo < 20 || headerSize < (sizeofUDataInfo + 4)) {
+            throw new IOException("Internal Error: Header size error");
+        }
+        // TODO: Change Authenticate to take int major, int minor, int milli, int micro
+        // to avoid array allocation.
+        byte[] formatVersion = new byte[] {
+            bytes.get(16), bytes.get(17), bytes.get(18), bytes.get(19)
+        };
+        if (bytes.get(12) != (byte)(dataFormat >> 24) ||
+                bytes.get(13) != (byte)(dataFormat >> 16) ||
+                bytes.get(14) != (byte)(dataFormat >> 8) ||
+                bytes.get(15) != (byte)dataFormat ||
+                (authenticate != null && !authenticate.isDataVersionAcceptable(formatVersion))) {
+            throw new IOException(HEADER_AUTHENTICATION_FAILED_ +
+                    String.format("; data format %02x%02x%02x%02x, format version %d.%d.%d.%d",
+                            bytes.get(12), bytes.get(13), bytes.get(14), bytes.get(15),
+                            formatVersion[0] & 0xff, formatVersion[1] & 0xff,
+                            formatVersion[2] & 0xff, formatVersion[3] & 0xff));
+        }
+
+        bytes.position(headerSize);
+        return  // dataVersion
+                ((int)bytes.get(20) << 24) |
+                ((bytes.get(21) & 0xff) << 16) |
+                ((bytes.get(22) & 0xff) << 8) |
+                (bytes.get(23) & 0xff);
+    }
+
+    public static void skipBytes(ByteBuffer bytes, int skipLength) {
+        if (skipLength > 0) {
+            bytes.position(bytes.position() + skipLength);
+        }
+    }
+
+    public static byte[] getBytes(ByteBuffer bytes, int length, int additionalSkipLength) {
+        byte[] dest = new byte[length];
+        bytes.get(dest);
+        if (additionalSkipLength > 0) {
+            skipBytes(bytes, additionalSkipLength);
+        }
+        return dest;
+    }
+
+    public static String getString(ByteBuffer bytes, int length, int additionalSkipLength) {
+        CharSequence cs = bytes.asCharBuffer();
+        String s = cs.subSequence(0, length).toString();
+        skipBytes(bytes, length * 2 + additionalSkipLength);
+        return s;
+    }
+
+    public static char[] getChars(ByteBuffer bytes, int length, int additionalSkipLength) {
+        char[] dest = new char[length];
+        bytes.asCharBuffer().get(dest);
+        skipBytes(bytes, length * 2 + additionalSkipLength);
+        return dest;
+    }
+
+    public static int[] getInts(ByteBuffer bytes, int length, int additionalSkipLength) {
+        int[] dest = new int[length];
+        bytes.asIntBuffer().get(dest);
+        skipBytes(bytes, length * 4 + additionalSkipLength);
+        return dest;
+    }
+
+    /**
+     * Returns a VersionInfo for the bytes in the compact version integer.
+     */
+    public static VersionInfo getVersionInfoFromCompactInt(int version) {
+        return VersionInfo.getInstance(
+                version >>> 24, (version >> 16) & 0xff, (version >> 8) & 0xff, version & 0xff);
+    }
+
+    // private variables -------------------------------------------------
+
+    /**
+    * Magic numbers to authenticate the data file
+    */
+    private static final byte MAGIC1 = (byte)0xda;
+    private static final byte MAGIC2 = (byte)0x27;
+
+    /**
+    * File format authentication values
+    */
+    private static final byte CHAR_SET_ = 0;
+    private static final byte CHAR_SIZE_ = 2;
+
+    /**
+    * Error messages
+    */
+    private static final String MAGIC_NUMBER_AUTHENTICATION_FAILED_ =
+                       "ICUBinary data file error: Magic number authentication failed";
+    private static final String HEADER_AUTHENTICATION_FAILED_ =
+        "ICUBinary data file error: Header authentication failed";
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/java.base/share/classes/jdk/internal/icu/impl/Norm2AllModes.java	Mon Jan 13 08:05:59 2020 -0800
@@ -0,0 +1,292 @@
+/*
+ * Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ *******************************************************************************
+ *   Copyright (C) 2009-2014, International Business Machines
+ *   Corporation and others.  All Rights Reserved.
+ *******************************************************************************
+ */
+
+package jdk.internal.icu.impl;
+
+import java.io.IOException;
+
+import jdk.internal.icu.text.Normalizer2;
+import jdk.internal.icu.util.VersionInfo;
+
+public final class Norm2AllModes {
+    // Public API dispatch via Normalizer2 subclasses -------------------------- ***
+
+    // Normalizer2 implementation for the old UNORM_NONE.
+    public static final class NoopNormalizer2 extends Normalizer2 {
+        @Override
+        public StringBuilder normalize(CharSequence src, StringBuilder dest) {
+            if(dest!=src) {
+                dest.setLength(0);
+                return dest.append(src);
+            } else {
+                throw new IllegalArgumentException();
+            }
+        }
+
+        @Override
+        public Appendable normalize(CharSequence src, Appendable dest) {
+            if(dest!=src) {
+                try {
+                    return dest.append(src);
+                } catch(IOException e) {
+                    throw new InternalError(e.toString(), e);
+                }
+            } else {
+                throw new IllegalArgumentException();
+            }
+        }
+
+        @Override
+        public StringBuilder normalizeSecondAndAppend(StringBuilder first, CharSequence second) {
+            if(first!=second) {
+                return first.append(second);
+            } else {
+                throw new IllegalArgumentException();
+            }
+        }
+
+        @Override
+        public StringBuilder append(StringBuilder first, CharSequence second) {
+            if(first!=second) {
+                return first.append(second);
+            } else {
+                throw new IllegalArgumentException();
+            }
+        }
+
+        @Override
+        public String getDecomposition(int c) {
+            return null;
+        }
+
+        // No need to override the default getRawDecomposition().
+        @Override
+        public boolean isNormalized(CharSequence s) { return true; }
+
+        @Override
+        public int spanQuickCheckYes(CharSequence s) { return s.length(); }
+
+        @Override
+        public boolean hasBoundaryBefore(int c) { return true; }
+    }
+
+    // Intermediate class:
+    // Has NormalizerImpl and does boilerplate argument checking and setup.
+    public abstract static class Normalizer2WithImpl extends Normalizer2 {
+        public Normalizer2WithImpl(NormalizerImpl ni) {
+            impl=ni;
+        }
+
+        // normalize
+        @Override
+        public StringBuilder normalize(CharSequence src, StringBuilder dest) {
+            if(dest==src) {
+                throw new IllegalArgumentException();
+            }
+            dest.setLength(0);
+            normalize(src, new NormalizerImpl.ReorderingBuffer(impl, dest, src.length()));
+            return dest;
+        }
+
+        @Override
+        public Appendable normalize(CharSequence src, Appendable dest) {
+            if(dest==src) {
+                throw new IllegalArgumentException();
+            }
+            NormalizerImpl.ReorderingBuffer buffer=
+                new NormalizerImpl.ReorderingBuffer(impl, dest, src.length());
+            normalize(src, buffer);
+            buffer.flush();
+            return dest;
+        }
+
+        protected abstract void normalize(CharSequence src, NormalizerImpl.ReorderingBuffer buffer);
+
+        // normalize and append
+        @Override
+        public StringBuilder normalizeSecondAndAppend(StringBuilder first, CharSequence second) {
+            return normalizeSecondAndAppend(first, second, true);
+        }
+
+        @Override
+        public StringBuilder append(StringBuilder first, CharSequence second) {
+            return normalizeSecondAndAppend(first, second, false);
+        }
+
+        public StringBuilder normalizeSecondAndAppend(
+                StringBuilder first, CharSequence second, boolean doNormalize) {
+            if(first==second) {
+                throw new IllegalArgumentException();
+            }
+            normalizeAndAppend(
+                second, doNormalize,
+                new NormalizerImpl.ReorderingBuffer(impl, first, first.length()+second.length()));
+            return first;
+        }
+
+        protected abstract void normalizeAndAppend(
+                CharSequence src, boolean doNormalize, NormalizerImpl.ReorderingBuffer buffer);
+
+        @Override
+        public String getDecomposition(int c) {
+            return impl.getDecomposition(c);
+        }
+
+        @Override
+        public int getCombiningClass(int c) {
+            return impl.getCC(impl.getNorm16(c));
+        }
+
+        // quick checks
+        @Override
+        public boolean isNormalized(CharSequence s) {
+            return s.length()==spanQuickCheckYes(s);
+        }
+
+        public final NormalizerImpl impl;
+    }
+
+    public static final class DecomposeNormalizer2 extends Normalizer2WithImpl {
+        public DecomposeNormalizer2(NormalizerImpl ni) {
+            super(ni);
+        }
+
+        @Override
+        protected void normalize(CharSequence src, NormalizerImpl.ReorderingBuffer buffer) {
+            impl.decompose(src, 0, src.length(), buffer);
+        }
+
+        @Override
+        protected void normalizeAndAppend(
+                CharSequence src, boolean doNormalize, NormalizerImpl.ReorderingBuffer buffer) {
+            impl.decomposeAndAppend(src, doNormalize, buffer);
+        }
+
+        @Override
+        public int spanQuickCheckYes(CharSequence s) {
+            return impl.decompose(s, 0, s.length(), null);
+        }
+
+        @Override
+        public boolean hasBoundaryBefore(int c) { return impl.hasDecompBoundaryBefore(c); }
+    }
+
+    public static final class ComposeNormalizer2 extends Normalizer2WithImpl {
+        public ComposeNormalizer2(NormalizerImpl ni, boolean fcc) {
+            super(ni);
+            onlyContiguous=fcc;
+        }
+
+        @Override
+        protected void normalize(CharSequence src, NormalizerImpl.ReorderingBuffer buffer) {
+            impl.compose(src, 0, src.length(), onlyContiguous, true, buffer);
+        }
+
+        @Override
+        protected void normalizeAndAppend(
+                CharSequence src, boolean doNormalize, NormalizerImpl.ReorderingBuffer buffer) {
+            impl.composeAndAppend(src, doNormalize, onlyContiguous, buffer);
+        }
+
+        @Override
+        public boolean isNormalized(CharSequence s) {
+            // 5: small destCapacity for substring normalization
+            return impl.compose(s, 0, s.length(),
+                                onlyContiguous, false,
+                                new NormalizerImpl.ReorderingBuffer(impl, new StringBuilder(), 5));
+        }
+
+        @Override
+        public int spanQuickCheckYes(CharSequence s) {
+            return impl.composeQuickCheck(s, 0, s.length(), onlyContiguous, true)>>>1;
+        }
+
+        @Override
+        public boolean hasBoundaryBefore(int c) { return impl.hasCompBoundaryBefore(c); }
+
+        private final boolean onlyContiguous;
+    }
+
+    // instance cache ---------------------------------------------------------- ***
+
+    private Norm2AllModes(NormalizerImpl ni) {
+        impl=ni;
+        comp=new ComposeNormalizer2(ni, false);
+        decomp=new DecomposeNormalizer2(ni);
+    }
+
+    public final NormalizerImpl impl;
+    public final ComposeNormalizer2 comp;
+    public final DecomposeNormalizer2 decomp;
+
+    private static Norm2AllModes getInstanceFromSingleton(Norm2AllModesSingleton singleton) {
+        if(singleton.exception!=null) {
+            throw singleton.exception;
+        }
+        return singleton.allModes;
+    }
+
+    public static Norm2AllModes getNFCInstance() {
+        return getInstanceFromSingleton(NFCSingleton.INSTANCE);
+    }
+
+    public static Norm2AllModes getNFKCInstance() {
+        return getInstanceFromSingleton(NFKCSingleton.INSTANCE);
+    }
+
+    public static final NoopNormalizer2 NOOP_NORMALIZER2=new NoopNormalizer2();
+
+    private static final class Norm2AllModesSingleton {
+        private Norm2AllModesSingleton(String name) {
+            try {
+                @SuppressWarnings("deprecation")
+                String DATA_FILE_NAME = "/jdk/internal/icu/impl/data/icudt" +
+                    VersionInfo.ICU_DATA_VERSION_PATH + "/" + name + ".nrm";
+                NormalizerImpl impl=new NormalizerImpl().load(DATA_FILE_NAME);
+                allModes=new Norm2AllModes(impl);
+            } catch (RuntimeException e) {
+                exception=e;
+            }
+        }
+
+        private Norm2AllModes allModes;
+        private RuntimeException exception;
+    }
+
+    private static final class NFCSingleton {
+        private static final Norm2AllModesSingleton INSTANCE=new Norm2AllModesSingleton("nfc");
+    }
+
+    private static final class NFKCSingleton {
+        private static final Norm2AllModesSingleton INSTANCE=new Norm2AllModesSingleton("nfkc");
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/java.base/share/classes/jdk/internal/icu/impl/NormalizerImpl.java	Mon Jan 13 08:05:59 2020 -0800
@@ -0,0 +1,2193 @@
+/*
+ * Copyright (c) 2009, 2020, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ *******************************************************************************
+ *   Copyright (C) 2009-2014, International Business Machines
+ *   Corporation and others.  All Rights Reserved.
+ *******************************************************************************
+ */
+package jdk.internal.icu.impl;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+import jdk.internal.icu.lang.UCharacter;
+import jdk.internal.icu.text.Normalizer2;
+import jdk.internal.icu.text.UTF16;
+import jdk.internal.icu.util.CodePointTrie;
+import jdk.internal.icu.util.VersionInfo;
+
+// Original filename in ICU4J: Normalizer2Impl.java
+public final class NormalizerImpl {
+    public static final class Hangul {
+        /* Korean Hangul and Jamo constants */
+        public static final int JAMO_L_BASE=0x1100;     /* "lead" jamo */
+        public static final int JAMO_V_BASE=0x1161;     /* "vowel" jamo */
+        public static final int JAMO_T_BASE=0x11a7;     /* "trail" jamo */
+
+        public static final int HANGUL_BASE=0xac00;
+        public static final int HANGUL_END=0xd7a3;
+
+        public static final int JAMO_L_COUNT=19;
+        public static final int JAMO_V_COUNT=21;
+        public static final int JAMO_T_COUNT=28;
+
+        public static final int HANGUL_COUNT=JAMO_L_COUNT*JAMO_V_COUNT*JAMO_T_COUNT;
+        public static final int HANGUL_LIMIT=HANGUL_BASE+HANGUL_COUNT;
+
+        public static boolean isHangul(int c) {
+            return HANGUL_BASE<=c && c<HANGUL_LIMIT;
+        }
+        public static boolean isHangulLV(int c) {
+            c-=HANGUL_BASE;
+            return 0<=c && c<HANGUL_COUNT && c%JAMO_T_COUNT==0;
+        }
+
+        /**
+         * Decomposes c, which must be a Hangul syllable, into buffer
+         * and returns the length of the decomposition (2 or 3).
+         */
+        public static int decompose(int c, Appendable buffer) {
+            try {
+                c-=HANGUL_BASE;
+                int c2=c%JAMO_T_COUNT;
+                c/=JAMO_T_COUNT;
+                buffer.append((char)(JAMO_L_BASE+c/JAMO_V_COUNT));
+                buffer.append((char)(JAMO_V_BASE+c%JAMO_V_COUNT));
+                if(c2==0) {
+                    return 2;
+                } else {
+                    buffer.append((char)(JAMO_T_BASE+c2));
+                    return 3;
+                }
+            } catch(IOException e) {
+                throw new InternalError(e);
+            }
+        }
+    }
+
+    /**
+     * Writable buffer that takes care of canonical ordering.
+     * Its Appendable methods behave like the C++ implementation's
+     * appendZeroCC() methods.
+     * <p>
+     * If dest is a StringBuilder, then the buffer writes directly to it.
+     * Otherwise, the buffer maintains a StringBuilder for intermediate text segments
+     * until no further changes are necessary and whole segments are appended.
+     * append() methods that take combining-class values always write to the StringBuilder.
+     * Other append() methods flush and append to the Appendable.
+     */
+    public static final class ReorderingBuffer implements Appendable {
+        public ReorderingBuffer(NormalizerImpl ni, Appendable dest, int destCapacity) {
+            impl=ni;
+            app=dest;
+            if (app instanceof StringBuilder) {
+                appIsStringBuilder=true;
+                str=(StringBuilder)dest;
+                // In Java, the constructor subsumes public void init(int destCapacity)
+                str.ensureCapacity(destCapacity);
+                reorderStart=0;
+                if(str.length()==0) {
+                    lastCC=0;
+                } else {
+                    setIterator();
+                    lastCC=previousCC();
+                    // Set reorderStart after the last code point with cc<=1 if there is one.
+                    if(lastCC>1) {
+                        while(previousCC()>1) {}
+                    }
+                    reorderStart=codePointLimit;
+                }
+            } else {
+                appIsStringBuilder=false;
+                str=new StringBuilder();
+                reorderStart=0;
+                lastCC=0;
+            }
+        }
+
+        public boolean isEmpty() { return str.length()==0; }
+        public int length() { return str.length(); }
+        public int getLastCC() { return lastCC; }
+
+        public StringBuilder getStringBuilder() { return str; }
+
+        public boolean equals(CharSequence s, int start, int limit) {
+            return UTF16Plus.equal(str, 0, str.length(), s, start, limit);
+        }
+
+        public void append(int c, int cc) {
+            if(lastCC<=cc || cc==0) {
+                str.appendCodePoint(c);
+                lastCC=cc;
+                if(cc<=1) {
+                    reorderStart=str.length();
+                }
+            } else {
+                insert(c, cc);
+            }
+        }
+        public void append(CharSequence s, int start, int limit, boolean isNFD,
+                           int leadCC, int trailCC) {
+            if(start==limit) {
+                return;
+            }
+            if(lastCC<=leadCC || leadCC==0) {
+                if(trailCC<=1) {
+                    reorderStart=str.length()+(limit-start);
+                } else if(leadCC<=1) {
+                    reorderStart=str.length()+1;  // Ok if not a code point boundary.
+                }
+                str.append(s, start, limit);
+                lastCC=trailCC;
+            } else {
+                int c=Character.codePointAt(s, start);
+                start+=Character.charCount(c);
+                insert(c, leadCC);  // insert first code point
+                while(start<limit) {
+                    c=Character.codePointAt(s, start);
+                    start+=Character.charCount(c);
+                    if(start<limit) {
+                        if (isNFD) {
+                            leadCC = getCCFromYesOrMaybe(impl.getNorm16(c));
+                        } else {
+                            leadCC = impl.getCC(impl.getNorm16(c));
+                        }
+                    } else {
+                        leadCC=trailCC;
+                    }
+                    append(c, leadCC);
+                }
+            }
+        }
+        // The following append() methods work like C++ appendZeroCC().
+        // They assume that the cc or trailCC of their input is 0.
+        // Most of them implement Appendable interface methods.
+        @Override
+        public ReorderingBuffer append(char c) {
+            str.append(c);
+            lastCC=0;
+            reorderStart=str.length();
+            return this;
+        }
+        public void appendZeroCC(int c) {
+            str.appendCodePoint(c);
+            lastCC=0;
+            reorderStart=str.length();
+        }
+        @Override
+        public ReorderingBuffer append(CharSequence s) {
+            if(s.length()!=0) {
+                str.append(s);
+                lastCC=0;
+                reorderStart=str.length();
+            }
+            return this;
+        }
+        @Override
+        public ReorderingBuffer append(CharSequence s, int start, int limit) {
+            if(start!=limit) {
+                str.append(s, start, limit);
+                lastCC=0;
+                reorderStart=str.length();
+            }
+            return this;
+        }
+        /**
+         * Flushes from the intermediate StringBuilder to the Appendable,
+         * if they are different objects.
+         * Used after recomposition.
+         * Must be called at the end when writing to a non-StringBuilder Appendable.
+         */
+        public void flush() {
+            if(appIsStringBuilder) {
+                reorderStart=str.length();
+            } else {
+                try {
+                    app.append(str);
+                    str.setLength(0);
+                    reorderStart=0;
+                } catch(IOException e) {
+                    throw new InternalError(e);  // Avoid declaring "throws IOException".
+                }
+            }
+            lastCC=0;
+        }
+        /**
+         * Flushes from the intermediate StringBuilder to the Appendable,
+         * if they are different objects.
+         * Then appends the new text to the Appendable or StringBuilder.
+         * Normally used after quick check loops find a non-empty sequence.
+         */
+        public ReorderingBuffer flushAndAppendZeroCC(CharSequence s, int start, int limit) {
+            if(appIsStringBuilder) {
+                str.append(s, start, limit);
+                reorderStart=str.length();
+            } else {
+                try {
+                    app.append(str).append(s, start, limit);
+                    str.setLength(0);
+                    reorderStart=0;
+                } catch(IOException e) {
+                    throw new InternalError(e);  // Avoid declaring "throws IOException".
+                }
+            }
+            lastCC=0;
+            return this;
+        }
+        public void remove() {
+            str.setLength(0);
+            lastCC=0;
+            reorderStart=0;
+        }
+        public void removeSuffix(int suffixLength) {
+            int oldLength=str.length();
+            str.delete(oldLength-suffixLength, oldLength);
+            lastCC=0;
+            reorderStart=str.length();
+        }
+
+        // Inserts c somewhere before the last character.
+        // Requires 0<cc<lastCC which implies reorderStart<limit.
+        private void insert(int c, int cc) {
+            for(setIterator(), skipPrevious(); previousCC()>cc;) {}
+            // insert c at codePointLimit, after the character with prevCC<=cc
+            if(c<=0xffff) {
+                str.insert(codePointLimit, (char)c);
+                if(cc<=1) {
+                    reorderStart=codePointLimit+1;
+                }
+            } else {
+                str.insert(codePointLimit, Character.toChars(c));
+                if(cc<=1) {
+                    reorderStart=codePointLimit+2;
+                }
+            }
+        }
+
+        private final NormalizerImpl impl;
+        private final Appendable app;
+        private final StringBuilder str;
+        private final boolean appIsStringBuilder;
+        private int reorderStart;
+        private int lastCC;
+
+        // private backward iterator
+        private void setIterator() { codePointStart=str.length(); }
+        private void skipPrevious() {  // Requires 0<codePointStart.
+            codePointLimit=codePointStart;
+            codePointStart=str.offsetByCodePoints(codePointStart, -1);
+        }
+        private int previousCC() {  // Returns 0 if there is no previous character.
+            codePointLimit=codePointStart;
+            if(reorderStart>=codePointStart) {
+                return 0;
+            }
+            int c=str.codePointBefore(codePointStart);
+            codePointStart-=Character.charCount(c);
+            return impl.getCCFromYesOrMaybeCP(c);
+        }
+        private int codePointStart, codePointLimit;
+    }
+
+    // TODO: Propose as public API on the UTF16 class.
+    // TODO: Propose widening UTF16 methods that take char to take int.
+    // TODO: Propose widening UTF16 methods that take String to take CharSequence.
+    public static final class UTF16Plus {
+        /**
+         * Is this code point a lead surrogate (U+d800..U+dbff)?
+         * @param c code unit or code point
+         * @return true or false
+         */
+        public static boolean isLeadSurrogate(int c) { return (c & 0xfffffc00) == 0xd800; }
+        /**
+         * Assuming c is a surrogate code point (UTF16.isSurrogate(c)),
+         * is it a lead surrogate?
+         * @param c code unit or code point
+         * @return true or false
+         */
+        public static boolean isSurrogateLead(int c) { return (c&0x400)==0; }
+
+        /**
+         * Compares two CharSequence subsequences for binary equality.
+         * @param s1 first sequence
+         * @param start1 start offset in first sequence
+         * @param limit1 limit offset in first sequence
+         * @param s2 second sequence
+         * @param start2 start offset in second sequence
+         * @param limit2 limit offset in second sequence
+         * @return true if s1.subSequence(start1, limit1) contains the same text
+         *              as s2.subSequence(start2, limit2)
+         */
+        public static boolean equal(CharSequence s1, int start1, int limit1,
+                                    CharSequence s2, int start2, int limit2) {
+            if((limit1-start1)!=(limit2-start2)) {
+                return false;
+            }
+            if(s1==s2 && start1==start2) {
+                return true;
+            }
+            while(start1<limit1) {
+                if(s1.charAt(start1++)!=s2.charAt(start2++)) {
+                    return false;
+                }
+            }
+            return true;
+        }
+    }
+
+    public NormalizerImpl() {}
+
+    private static final class IsAcceptable implements ICUBinary.Authenticate {
+        public boolean isDataVersionAcceptable(byte version[]) {
+            return version[0]==4;
+        }
+    }
+    private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable();
+    private static final int DATA_FORMAT = 0x4e726d32;  // "Nrm2"
+
+    public NormalizerImpl load(ByteBuffer bytes) {
+        try {
+            dataVersion=ICUBinary.readHeaderAndDataVersion(bytes, DATA_FORMAT, IS_ACCEPTABLE);
+            int indexesLength=bytes.getInt()/4;  // inIndexes[IX_NORM_TRIE_OFFSET]/4
+            if(indexesLength<=IX_MIN_LCCC_CP) {
+                throw new InternalError("Normalizer2 data: not enough indexes");
+            }
+            int[] inIndexes=new int[indexesLength];
+            inIndexes[0]=indexesLength*4;
+            for(int i=1; i<indexesLength; ++i) {
+                inIndexes[i]=bytes.getInt();
+            }
+
+            minDecompNoCP=inIndexes[IX_MIN_DECOMP_NO_CP];
+            minCompNoMaybeCP=inIndexes[IX_MIN_COMP_NO_MAYBE_CP];
+            minLcccCP=inIndexes[IX_MIN_LCCC_CP];
+
+            minYesNo=inIndexes[IX_MIN_YES_NO];
+            minYesNoMappingsOnly=inIndexes[IX_MIN_YES_NO_MAPPINGS_ONLY];
+            minNoNo=inIndexes[IX_MIN_NO_NO];
+            minNoNoCompBoundaryBefore=inIndexes[IX_MIN_NO_NO_COMP_BOUNDARY_BEFORE];
+            minNoNoCompNoMaybeCC=inIndexes[IX_MIN_NO_NO_COMP_NO_MAYBE_CC];
+            minNoNoEmpty=inIndexes[IX_MIN_NO_NO_EMPTY];
+            limitNoNo=inIndexes[IX_LIMIT_NO_NO];
+            minMaybeYes=inIndexes[IX_MIN_MAYBE_YES];
+            assert((minMaybeYes&7)==0);  // 8-aligned for noNoDelta bit fields
+            centerNoNoDelta=(minMaybeYes>>DELTA_SHIFT)-MAX_DELTA-1;
+
+            // Read the normTrie.
+            int offset=inIndexes[IX_NORM_TRIE_OFFSET];
+            int nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET];
+            int triePosition = bytes.position();
+            normTrie = CodePointTrie.Fast16.fromBinary(bytes);
+            int trieLength = bytes.position() - triePosition;
+            if(trieLength>(nextOffset-offset)) {
+                throw new InternalError("Normalizer2 data: not enough bytes for normTrie");
+            }
+            ICUBinary.skipBytes(bytes, (nextOffset-offset)-trieLength);  // skip padding after trie bytes
+
+            // Read the composition and mapping data.
+            offset=nextOffset;
+            nextOffset=inIndexes[IX_SMALL_FCD_OFFSET];
+            int numChars=(nextOffset-offset)/2;
+            if(numChars!=0) {
+                maybeYesCompositions=ICUBinary.getString(bytes, numChars, 0);
+                extraData=maybeYesCompositions.substring((MIN_NORMAL_MAYBE_YES-minMaybeYes)>>OFFSET_SHIFT);
+            }
+
+            // smallFCD: new in formatVersion 2
+            offset=nextOffset;
+            smallFCD=new byte[0x100];
+            bytes.get(smallFCD);
+
+            return this;
+        } catch(IOException e) {
+            throw new InternalError(e);
+        }
+    }
+    public NormalizerImpl load(String name) {
+        return load(ICUBinary.getRequiredData(name));
+    }
+
+    // The trie stores values for lead surrogate code *units*.
+    // Surrogate code *points* are inert.
+    public int getNorm16(int c) {
+        return UTF16Plus.isLeadSurrogate(c) ? INERT : normTrie.get(c);
+    }
+    public int getRawNorm16(int c) { return normTrie.get(c); }
+    public boolean isAlgorithmicNoNo(int norm16) { return limitNoNo<=norm16 && norm16<minMaybeYes; }
+    public boolean isCompNo(int norm16) { return minNoNo<=norm16 && norm16<minMaybeYes; }
+    public boolean isDecompYes(int norm16) { return norm16<minYesNo || minMaybeYes<=norm16; }
+
+    public int getCC(int norm16) {
+        if(norm16>=MIN_NORMAL_MAYBE_YES) {
+            return getCCFromNormalYesOrMaybe(norm16);
+        }
+        if(norm16<minNoNo || limitNoNo<=norm16) {
+            return 0;
+        }
+        return getCCFromNoNo(norm16);
+    }
+    public static int getCCFromNormalYesOrMaybe(int norm16) {
+        return (norm16 >> OFFSET_SHIFT) & 0xff;
+    }
+    public static int getCCFromYesOrMaybe(int norm16) {
+        return norm16>=MIN_NORMAL_MAYBE_YES ? getCCFromNormalYesOrMaybe(norm16) : 0;
+    }
+    public int getCCFromYesOrMaybeCP(int c) {
+        if (c < minCompNoMaybeCP) { return 0; }
+        return getCCFromYesOrMaybe(getNorm16(c));
+    }
+
+    /**
+     * Returns the FCD data for code point c.
+     * @param c A Unicode code point.
+     * @return The lccc(c) in bits 15..8 and tccc(c) in bits 7..0.
+     */
+    public int getFCD16(int c) {
+        if(c<minDecompNoCP) {
+            return 0;
+        } else if(c<=0xffff) {
+            if(!singleLeadMightHaveNonZeroFCD16(c)) { return 0; }
+        }
+        return getFCD16FromNormData(c);
+    }
+    /** Returns true if the single-or-lead code unit c might have non-zero FCD data. */
+    public boolean singleLeadMightHaveNonZeroFCD16(int lead) {
+        // 0<=lead<=0xffff
+        byte bits=smallFCD[lead>>8];
+        if(bits==0) { return false; }
+        return ((bits>>((lead>>5)&7))&1)!=0;
+    }
+
+    /** Gets the FCD value from the regular normalization data. */
+    public int getFCD16FromNormData(int c) {
+        int norm16=getNorm16(c);
+        if (norm16 >= limitNoNo) {
+            if(norm16>=MIN_NORMAL_MAYBE_YES) {
+                // combining mark
+                norm16=getCCFromNormalYesOrMaybe(norm16);
+                return norm16|(norm16<<8);
+            } else if(norm16>=minMaybeYes) {
+                return 0;
+            } else {  // isDecompNoAlgorithmic(norm16)
+                int deltaTrailCC = norm16 & DELTA_TCCC_MASK;
+                if (deltaTrailCC <= DELTA_TCCC_1) {
+                    return deltaTrailCC >> OFFSET_SHIFT;
+                }
+                // Maps to an isCompYesAndZeroCC.
+                c=mapAlgorithmic(c, norm16);
+                norm16=getRawNorm16(c);
+            }
+        }
+        if(norm16<=minYesNo || isHangulLVT(norm16)) {
+            // no decomposition or Hangul syllable, all zeros
+            return 0;
+        }
+        // c decomposes, get everything from the variable-length extra data
+        int mapping=norm16>>OFFSET_SHIFT;
+        int firstUnit=extraData.charAt(mapping);
+        int fcd16=firstUnit>>8;  // tccc
+        if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD)!=0) {
+            fcd16|=extraData.charAt(mapping-1)&0xff00;  // lccc
+        }
+        return fcd16;
+    }
+
+    /**
+     * Gets the decomposition for one code point.
+     * @param c code point
+     * @return c's decomposition, if it has one; returns null if it does not have a decomposition
+     */
+    public String getDecomposition(int c) {
+        int norm16;
+        if(c<minDecompNoCP || isMaybeOrNonZeroCC(norm16=getNorm16(c))) {
+            // c does not decompose
+            return null;
+        }
+        int decomp = -1;
+        if(isDecompNoAlgorithmic(norm16)) {
+            // Maps to an isCompYesAndZeroCC.
+            decomp=c=mapAlgorithmic(c, norm16);
+            // The mapping might decompose further.
+            norm16 = getRawNorm16(c);
+        }
+        if (norm16 < minYesNo) {
+            if(decomp<0) {
+                return null;
+            } else {
+                return UTF16.valueOf(decomp);
+            }
+        } else if(isHangulLV(norm16) || isHangulLVT(norm16)) {
+            // Hangul syllable: decompose algorithmically
+            StringBuilder buffer=new StringBuilder();
+            Hangul.decompose(c, buffer);
+            return buffer.toString();
+        }
+        // c decomposes, get everything from the variable-length extra data
+        int mapping=norm16>>OFFSET_SHIFT;
+        int length=extraData.charAt(mapping++)&MAPPING_LENGTH_MASK;
+        return extraData.substring(mapping, mapping+length);
+    }
+
+    // Fixed norm16 values.
+    public static final int MIN_YES_YES_WITH_CC=0xfe02;
+    public static final int JAMO_VT=0xfe00;
+    public static final int MIN_NORMAL_MAYBE_YES=0xfc00;
+    public static final int JAMO_L=2;  // offset=1 hasCompBoundaryAfter=FALSE
+    public static final int INERT=1;  // offset=0 hasCompBoundaryAfter=TRUE
+
+    // norm16 bit 0 is comp-boundary-after.
+    public static final int HAS_COMP_BOUNDARY_AFTER=1;
+    public static final int OFFSET_SHIFT=1;
+
+    // For algorithmic one-way mappings, norm16 bits 2..1 indicate the
+    // tccc (0, 1, >1) for quick FCC boundary-after tests.
+    public static final int DELTA_TCCC_0=0;
+    public static final int DELTA_TCCC_1=2;
+    public static final int DELTA_TCCC_GT_1=4;
+    public static final int DELTA_TCCC_MASK=6;
+    public static final int DELTA_SHIFT=3;
+
+    public static final int MAX_DELTA=0x40;
+
+    // Byte offsets from the start of the data, after the generic header.
+    public static final int IX_NORM_TRIE_OFFSET=0;
+    public static final int IX_EXTRA_DATA_OFFSET=1;
+    public static final int IX_SMALL_FCD_OFFSET=2;
+    public static final int IX_RESERVED3_OFFSET=3;
+    public static final int IX_TOTAL_SIZE=7;
+    public static final int MIN_CCC_LCCC_CP=0x300;
+    // Code point thresholds for quick check codes.
+    public static final int IX_MIN_DECOMP_NO_CP=8;
+    public static final int IX_MIN_COMP_NO_MAYBE_CP=9;
+
+    // Norm16 value thresholds for quick check combinations and types of extra data.
+
+    /** Mappings & compositions in [minYesNo..minYesNoMappingsOnly[. */
+    public static final int IX_MIN_YES_NO=10;
+    /** Mappings are comp-normalized. */
+    public static final int IX_MIN_NO_NO=11;
+    public static final int IX_LIMIT_NO_NO=12;
+    public static final int IX_MIN_MAYBE_YES=13;
+
+    /** Mappings only in [minYesNoMappingsOnly..minNoNo[. */
+    public static final int IX_MIN_YES_NO_MAPPINGS_ONLY=14;
+    /** Mappings are not comp-normalized but have a comp boundary before. */
+    public static final int IX_MIN_NO_NO_COMP_BOUNDARY_BEFORE=15;
+    /** Mappings do not have a comp boundary before. */
+    public static final int IX_MIN_NO_NO_COMP_NO_MAYBE_CC=16;
+    /** Mappings to the empty string. */
+    public static final int IX_MIN_NO_NO_EMPTY=17;
+
+    public static final int IX_MIN_LCCC_CP=18;
+    public static final int IX_COUNT=20;
+
+    public static final int MAPPING_HAS_CCC_LCCC_WORD=0x80;
+    public static final int MAPPING_HAS_RAW_MAPPING=0x40;
+    // unused bit 0x20;
+    public static final int MAPPING_LENGTH_MASK=0x1f;
+
+    public static final int COMP_1_LAST_TUPLE=0x8000;
+    public static final int COMP_1_TRIPLE=1;
+    public static final int COMP_1_TRAIL_LIMIT=0x3400;
+    public static final int COMP_1_TRAIL_MASK=0x7ffe;
+    public static final int COMP_1_TRAIL_SHIFT=9;  // 10-1 for the "triple" bit
+    public static final int COMP_2_TRAIL_SHIFT=6;
+    public static final int COMP_2_TRAIL_MASK=0xffc0;
+
+    // higher-level functionality ------------------------------------------ ***
+
+    /**
+     * Decomposes s[src, limit[ and writes the result to dest.
+     * limit can be NULL if src is NUL-terminated.
+     * destLengthEstimate is the initial dest buffer capacity and can be -1.
+     */
+    public void decompose(CharSequence s, int src, int limit, StringBuilder dest,
+                   int destLengthEstimate) {
+        if(destLengthEstimate<0) {
+            destLengthEstimate=limit-src;
+        }
+        dest.setLength(0);
+        ReorderingBuffer buffer=new ReorderingBuffer(this, dest, destLengthEstimate);
+        decompose(s, src, limit, buffer);
+    }
+
+    // Dual functionality:
+    // buffer!=NULL: normalize
+    // buffer==NULL: isNormalized/quickCheck/spanQuickCheckYes
+    public int decompose(CharSequence s, int src, int limit,
+                         ReorderingBuffer buffer) {
+        int minNoCP=minDecompNoCP;
+
+        int prevSrc;
+        int c=0;
+        int norm16=0;
+
+        // only for quick check
+        int prevBoundary=src;
+        int prevCC=0;
+
+        for(;;) {
+            // count code units below the minimum or with irrelevant data for the quick check
+            for(prevSrc=src; src!=limit;) {
+                if( (c=s.charAt(src))<minNoCP ||
+                    isMostDecompYesAndZeroCC(norm16=normTrie.bmpGet(c))
+                ) {
+                    ++src;
+                } else if(!UTF16Plus.isLeadSurrogate(c)) {
+                    break;
+                } else {
+                    char c2;
+                    if ((src + 1) != limit && Character.isLowSurrogate(c2 = s.charAt(src + 1))) {
+                        c = Character.toCodePoint((char)c, c2);
+                        norm16 = normTrie.suppGet(c);
+                        if (isMostDecompYesAndZeroCC(norm16)) {
+                            src += 2;
+                        } else {
+                            break;
+                        }
+                    } else {
+                        ++src;  // unpaired lead surrogate: inert
+                    }
+                }
+            }
+            // copy these code units all at once
+            if(src!=prevSrc) {
+                if(buffer!=null) {
+                    buffer.flushAndAppendZeroCC(s, prevSrc, src);
+                } else {
+                    prevCC=0;
+                    prevBoundary=src;
+                }
+            }
+            if(src==limit) {
+                break;
+            }
+
+            // Check one above-minimum, relevant code point.
+            src+=Character.charCount(c);
+            if(buffer!=null) {
+                decompose(c, norm16, buffer);
+            } else {
+                if(isDecompYes(norm16)) {
+                    int cc=getCCFromYesOrMaybe(norm16);
+                    if(prevCC<=cc || cc==0) {
+                        prevCC=cc;
+                        if(cc<=1) {
+                            prevBoundary=src;
+                        }
+                        continue;
+                    }
+                }
+                return prevBoundary;  // "no" or cc out of order
+            }
+        }
+        return src;
+    }
+    public void decomposeAndAppend(CharSequence s, boolean doDecompose, ReorderingBuffer buffer) {
+        int limit=s.length();
+        if(limit==0) {
+            return;
+        }
+        if(doDecompose) {
+            decompose(s, 0, limit, buffer);
+            return;
+        }
+        // Just merge the strings at the boundary.
+        int c=Character.codePointAt(s, 0);
+        int src=0;
+        int firstCC, prevCC, cc;
+        firstCC=prevCC=cc=getCC(getNorm16(c));
+        while(cc!=0) {
+            prevCC=cc;
+            src+=Character.charCount(c);
+            if(src>=limit) {
+                break;
+            }
+            c=Character.codePointAt(s, src);
+            cc=getCC(getNorm16(c));
+        };
+        buffer.append(s, 0, src, false, firstCC, prevCC);
+        buffer.append(s, src, limit);
+    }
+
+    // Very similar to composeQuickCheck(): Make the same changes in both places if relevant.
+    // doCompose: normalize
+    // !doCompose: isNormalized (buffer must be empty and initialized)
+    public boolean compose(CharSequence s, int src, int limit,
+                           boolean onlyContiguous,
+                           boolean doCompose,
+                           ReorderingBuffer buffer) {
+        int prevBoundary=src;
+        int minNoMaybeCP=minCompNoMaybeCP;
+
+        for (;;) {
+            // Fast path: Scan over a sequence of characters below the minimum "no or maybe" code point,
+            // or with (compYes && ccc==0) properties.
+            int prevSrc;
+            int c = 0;
+            int norm16 = 0;
+            for (;;) {
+                if (src == limit) {
+                    if (prevBoundary != limit && doCompose) {
+                        buffer.append(s, prevBoundary, limit);
+                    }
+                    return true;
+                }
+                if( (c=s.charAt(src))<minNoMaybeCP ||
+                    isCompYesAndZeroCC(norm16=normTrie.bmpGet(c))
+                ) {
+                    ++src;
+                } else {
+                    prevSrc = src++;
+                    if (!UTF16Plus.isLeadSurrogate(c)) {
+                        break;
+                    } else {
+                        char c2;
+                        if (src != limit && Character.isLowSurrogate(c2 = s.charAt(src))) {
+                            ++src;
+                            c = Character.toCodePoint((char)c, c2);
+                            norm16 = normTrie.suppGet(c);
+                            if (!isCompYesAndZeroCC(norm16)) {
+                                break;
+                            }
+                        }
+                    }
+                }
+            }
+            // isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo.
+            // The current character is either a "noNo" (has a mapping)
+            // or a "maybeYes" (combines backward)
+            // or a "yesYes" with ccc!=0.
+            // It is not a Hangul syllable or Jamo L because those have "yes" properties.
+
+            // Medium-fast path: Handle cases that do not require full decomposition and recomposition.
+            if (!isMaybeOrNonZeroCC(norm16)) {  // minNoNo <= norm16 < minMaybeYes
+                if (!doCompose) {
+                    return false;
+                }
+                // Fast path for mapping a character that is immediately surrounded by boundaries.
+                // In this case, we need not decompose around the current character.
+                if (isDecompNoAlgorithmic(norm16)) {
+                    // Maps to a single isCompYesAndZeroCC character
+                    // which also implies hasCompBoundaryBefore.
+                    if (norm16HasCompBoundaryAfter(norm16, onlyContiguous) ||
+                            hasCompBoundaryBefore(s, src, limit)) {
+                        if (prevBoundary != prevSrc) {
+                            buffer.append(s, prevBoundary, prevSrc);
+                        }
+                        buffer.append(mapAlgorithmic(c, norm16), 0);
+                        prevBoundary = src;
+                        continue;
+                    }
+                } else if (norm16 < minNoNoCompBoundaryBefore) {
+                    // The mapping is comp-normalized which also implies hasCompBoundaryBefore.
+                    if (norm16HasCompBoundaryAfter(norm16, onlyContiguous) ||
+                            hasCompBoundaryBefore(s, src, limit)) {
+                        if (prevBoundary != prevSrc) {
+                            buffer.append(s, prevBoundary, prevSrc);
+                        }
+                        int mapping = norm16 >> OFFSET_SHIFT;
+                        int length = extraData.charAt(mapping++) & MAPPING_LENGTH_MASK;
+                        buffer.append(extraData, mapping, mapping + length);
+                        prevBoundary = src;
+                        continue;
+                    }
+                } else if (norm16 >= minNoNoEmpty) {
+                    // The current character maps to nothing.
+                    // Simply omit it from the output if there is a boundary before _or_ after it.
+                    // The character itself implies no boundaries.
+                    if (hasCompBoundaryBefore(s, src, limit) ||
+                            hasCompBoundaryAfter(s, prevBoundary, prevSrc, onlyContiguous)) {
+                        if (prevBoundary != prevSrc) {
+                            buffer.append(s, prevBoundary, prevSrc);
+                        }
+                        prevBoundary = src;
+                        continue;
+                    }
+                }
+                // Other "noNo" type, or need to examine more text around this character:
+                // Fall through to the slow path.
+            } else if (isJamoVT(norm16) && prevBoundary != prevSrc) {
+                char prev=s.charAt(prevSrc-1);
+                if(c<Hangul.JAMO_T_BASE) {
+                    // The current character is a Jamo Vowel,
+                    // compose with previous Jamo L and following Jamo T.
+                    char l = (char)(prev-Hangul.JAMO_L_BASE);
+                    if(l<Hangul.JAMO_L_COUNT) {
+                        if (!doCompose) {
+                            return false;
+                        }
+                        int t;
+                        if (src != limit &&
+                                0 < (t = (s.charAt(src) - Hangul.JAMO_T_BASE)) &&
+                                t < Hangul.JAMO_T_COUNT) {
+                            // The next character is a Jamo T.
+                            ++src;
+                        } else if (hasCompBoundaryBefore(s, src, limit)) {
+                            // No Jamo T follows, not even via decomposition.
+                            t = 0;
+                        } else {
+                            t = -1;
+                        }
+                        if (t >= 0) {
+                            int syllable = Hangul.HANGUL_BASE +
+                                (l*Hangul.JAMO_V_COUNT + (c-Hangul.JAMO_V_BASE)) *
+                                Hangul.JAMO_T_COUNT + t;
+                            --prevSrc;  // Replace the Jamo L as well.
+                            if (prevBoundary != prevSrc) {
+                                buffer.append(s, prevBoundary, prevSrc);
+                            }
+                            buffer.append((char)syllable);
+                            prevBoundary = src;
+                            continue;
+                        }
+                        // If we see L+V+x where x!=T then we drop to the slow path,
+                        // decompose and recompose.
+                        // This is to deal with NFKC finding normal L and V but a
+                        // compatibility variant of a T.
+                        // We need to either fully compose that combination here
+                        // (which would complicate the code and may not work with strange custom data)
+                        // or use the slow path.
+                    }
+                } else if (Hangul.isHangulLV(prev)) {
+                    // The current character is a Jamo Trailing consonant,
+                    // compose with previous Hangul LV that does not contain a Jamo T.
+                    if (!doCompose) {
+                        return false;
+                    }
+                    int syllable = prev + c - Hangul.JAMO_T_BASE;
+                    --prevSrc;  // Replace the Hangul LV as well.
+                    if (prevBoundary != prevSrc) {
+                        buffer.append(s, prevBoundary, prevSrc);
+                    }
+                    buffer.append((char)syllable);
+                    prevBoundary = src;
+                    continue;
+                }
+                // No matching context, or may need to decompose surrounding text first:
+                // Fall through to the slow path.
+            } else if (norm16 > JAMO_VT) {  // norm16 >= MIN_YES_YES_WITH_CC
+                // One or more combining marks that do not combine-back:
+                // Check for canonical order, copy unchanged if ok and
+                // if followed by a character with a boundary-before.
+                int cc = getCCFromNormalYesOrMaybe(norm16);  // cc!=0
+                if (onlyContiguous /* FCC */ && getPreviousTrailCC(s, prevBoundary, prevSrc) > cc) {
+                    // Fails FCD test, need to decompose and contiguously recompose.
+                    if (!doCompose) {
+                        return false;
+                    }
+                } else {
+                    // If !onlyContiguous (not FCC), then we ignore the tccc of
+                    // the previous character which passed the quick check "yes && ccc==0" test.
+                    int n16;
+                    for (;;) {
+                        if (src == limit) {
+                            if (doCompose) {
+                                buffer.append(s, prevBoundary, limit);
+                            }
+                            return true;
+                        }
+                        int prevCC = cc;
+                        c = Character.codePointAt(s, src);
+                        n16 = normTrie.get(c);
+                        if (n16 >= MIN_YES_YES_WITH_CC) {
+                            cc = getCCFromNormalYesOrMaybe(n16);
+                            if (prevCC > cc) {
+                                if (!doCompose) {
+                                    return false;
+                                }
+                                break;
+                            }
+                        } else {
+                            break;
+                        }
+                        src += Character.charCount(c);
+                    }
+                    // p is after the last in-order combining mark.
+                    // If there is a boundary here, then we continue with no change.
+                    if (norm16HasCompBoundaryBefore(n16)) {
+                        if (isCompYesAndZeroCC(n16)) {
+                            src += Character.charCount(c);
+                        }
+                        continue;
+                    }
+                    // Use the slow path. There is no boundary in [prevSrc, src[.
+                }
+            }
+
+            // Slow path: Find the nearest boundaries around the current character,
+            // decompose and recompose.
+            if (prevBoundary != prevSrc && !norm16HasCompBoundaryBefore(norm16)) {
+                c = Character.codePointBefore(s, prevSrc);
+                norm16 = normTrie.get(c);
+                if (!norm16HasCompBoundaryAfter(norm16, onlyContiguous)) {
+                    prevSrc -= Character.charCount(c);
+                }
+            }
+            if (doCompose && prevBoundary != prevSrc) {
+                buffer.append(s, prevBoundary, prevSrc);
+            }
+            int recomposeStartIndex=buffer.length();
+            // We know there is not a boundary here.
+            decomposeShort(s, prevSrc, src, false /* !stopAtCompBoundary */, onlyContiguous,
+                           buffer);
+            // Decompose until the next boundary.
+            src = decomposeShort(s, src, limit, true /* stopAtCompBoundary */, onlyContiguous,
+                                 buffer);
+            recompose(buffer, recomposeStartIndex, onlyContiguous);
+            if(!doCompose) {
+                if(!buffer.equals(s, prevSrc, src)) {
+                    return false;
+                }
+                buffer.remove();
+            }
+            prevBoundary=src;
+        }
+    }
+
+    /**
+     * Very similar to compose(): Make the same changes in both places if relevant.
+     * doSpan: spanQuickCheckYes (ignore bit 0 of the return value)
+     * !doSpan: quickCheck
+     * @return bits 31..1: spanQuickCheckYes (==s.length() if "yes") and
+     *         bit 0: set if "maybe"; otherwise, if the span length&lt;s.length()
+     *         then the quick check result is "no"
+     */
+    public int composeQuickCheck(CharSequence s, int src, int limit,
+                                 boolean onlyContiguous, boolean doSpan) {
+        int qcResult=0;
+        int prevBoundary=src;
+        int minNoMaybeCP=minCompNoMaybeCP;
+
+        for(;;) {
+            // Fast path: Scan over a sequence of characters below the minimum "no or maybe" code point,
+            // or with (compYes && ccc==0) properties.
+            int prevSrc;
+            int c = 0;
+            int norm16 = 0;
+            for (;;) {
+                if(src==limit) {
+                    return (src<<1)|qcResult;  // "yes" or "maybe"
+                }
+                if( (c=s.charAt(src))<minNoMaybeCP ||
+                    isCompYesAndZeroCC(norm16=normTrie.bmpGet(c))
+                ) {
+                    ++src;
+                } else {
+                    prevSrc = src++;
+                    if (!UTF16Plus.isLeadSurrogate(c)) {
+                        break;
+                    } else {
+                        char c2;
+                        if (src != limit && Character.isLowSurrogate(c2 = s.charAt(src))) {
+                            ++src;
+                            c = Character.toCodePoint((char)c, c2);
+                            norm16 = normTrie.suppGet(c);
+                            if (!isCompYesAndZeroCC(norm16)) {
+                                break;
+                            }
+                        }
+                    }
+                }
+            }
+            // isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo.
+            // The current character is either a "noNo" (has a mapping)
+            // or a "maybeYes" (combines backward)
+            // or a "yesYes" with ccc!=0.
+            // It is not a Hangul syllable or Jamo L because those have "yes" properties.
+
+            int prevNorm16 = INERT;
+            if (prevBoundary != prevSrc) {
+                prevBoundary = prevSrc;
+                if (!norm16HasCompBoundaryBefore(norm16)) {
+                    c = Character.codePointBefore(s, prevSrc);
+                    int n16 = getNorm16(c);
+                    if (!norm16HasCompBoundaryAfter(n16, onlyContiguous)) {
+                        prevBoundary -= Character.charCount(c);
+                        prevNorm16 = n16;
+                    }
+                }
+            }
+
+            if(isMaybeOrNonZeroCC(norm16)) {
+                int cc=getCCFromYesOrMaybe(norm16);
+                if (onlyContiguous /* FCC */ && cc != 0 &&
+                        getTrailCCFromCompYesAndZeroCC(prevNorm16) > cc) {
+                    // The [prevBoundary..prevSrc[ character
+                    // passed the quick check "yes && ccc==0" test
+                    // but is out of canonical order with the current combining mark.
+                } else {
+                    // If !onlyContiguous (not FCC), then we ignore the tccc of
+                    // the previous character which passed the quick check "yes && ccc==0" test.
+                    for (;;) {
+                        if (norm16 < MIN_YES_YES_WITH_CC) {
+                            if (!doSpan) {
+                                qcResult = 1;
+                            } else {
+                                return prevBoundary << 1;  // spanYes does not care to know it's "maybe"
+                            }
+                        }
+                        if (src == limit) {
+                            return (src<<1) | qcResult;  // "yes" or "maybe"
+                        }
+                        int prevCC = cc;
+                        c = Character.codePointAt(s, src);
+                        norm16 = getNorm16(c);
+                        if (isMaybeOrNonZeroCC(norm16)) {
+                            cc = getCCFromYesOrMaybe(norm16);
+                            if (!(prevCC <= cc || cc == 0)) {
+                                break;
+                            }
+                        } else {
+                            break;
+                        }
+                        src += Character.charCount(c);
+                    }
+                    // src is after the last in-order combining mark.
+                    if (isCompYesAndZeroCC(norm16)) {
+                        prevBoundary = src;
+                        src += Character.charCount(c);
+                        continue;
+                    }
+                }
+            }
+            return prevBoundary<<1;  // "no"
+        }
+    }
+    public void composeAndAppend(CharSequence s,
+                                 boolean doCompose,
+                                 boolean onlyContiguous,
+                                 ReorderingBuffer buffer) {
+        int src=0, limit=s.length();
+        if(!buffer.isEmpty()) {
+            int firstStarterInSrc=findNextCompBoundary(s, 0, limit, onlyContiguous);
+            if(0!=firstStarterInSrc) {
+                int lastStarterInDest=findPreviousCompBoundary(buffer.getStringBuilder(),
+                                                               buffer.length(), onlyContiguous);
+                StringBuilder middle=new StringBuilder((buffer.length()-lastStarterInDest)+
+                                                       firstStarterInSrc+16);
+                middle.append(buffer.getStringBuilder(), lastStarterInDest, buffer.length());
+                buffer.removeSuffix(buffer.length()-lastStarterInDest);
+                middle.append(s, 0, firstStarterInSrc);
+                compose(middle, 0, middle.length(), onlyContiguous, true, buffer);
+                src=firstStarterInSrc;
+            }
+        }
+        if(doCompose) {
+            compose(s, src, limit, onlyContiguous, true, buffer);
+        } else {
+            buffer.append(s, src, limit);
+        }
+    }
+    // Dual functionality:
+    // buffer!=NULL: normalize
+    // buffer==NULL: isNormalized/quickCheck/spanQuickCheckYes
+    public int makeFCD(CharSequence s, int src, int limit, ReorderingBuffer buffer) {
+        // Note: In this function we use buffer->appendZeroCC() because we track
+        // the lead and trail combining classes here, rather than leaving it to
+        // the ReorderingBuffer.
+        // The exception is the call to decomposeShort() which uses the buffer
+        // in the normal way.
+
+        // Tracks the last FCD-safe boundary, before lccc=0 or after properly-ordered tccc<=1.
+        // Similar to the prevBoundary in the compose() implementation.
+        int prevBoundary=src;
+        int prevSrc;
+        int c=0;
+        int prevFCD16=0;
+        int fcd16=0;
+
+        for(;;) {
+            // count code units with lccc==0
+            for(prevSrc=src; src!=limit;) {
+                if((c=s.charAt(src))<minLcccCP) {
+                    prevFCD16=~c;
+                    ++src;
+                } else if(!singleLeadMightHaveNonZeroFCD16(c)) {
+                    prevFCD16=0;
+                    ++src;
+                } else {
+                    if (UTF16Plus.isLeadSurrogate(c)) {
+                        char c2;
+                        if ((src + 1) != limit && Character.isLowSurrogate(c2 = s.charAt(src + 1))) {
+                            c = Character.toCodePoint((char)c, c2);
+                        }
+                    }
+                    if((fcd16=getFCD16FromNormData(c))<=0xff) {
+                        prevFCD16=fcd16;
+                        src+=Character.charCount(c);
+                    } else {
+                        break;
+                    }
+                }
+            }
+            // copy these code units all at once
+            if(src!=prevSrc) {
+                if(src==limit) {
+                    if(buffer!=null) {
+                        buffer.flushAndAppendZeroCC(s, prevSrc, src);
+                    }
+                    break;
+                }
+                prevBoundary=src;
+                // We know that the previous character's lccc==0.
+                if(prevFCD16<0) {
+                    // Fetching the fcd16 value was deferred for this below-minLcccCP code point.
+                    int prev=~prevFCD16;
+                    if(prev<minDecompNoCP) {
+                        prevFCD16=0;
+                    } else {
+                        prevFCD16=getFCD16FromNormData(prev);
+                        if(prevFCD16>1) {
+                            --prevBoundary;
+                        }
+                    }
+                } else {
+                    int p=src-1;
+                    if( Character.isLowSurrogate(s.charAt(p)) && prevSrc<p &&
+                        Character.isHighSurrogate(s.charAt(p-1))
+                    ) {
+                        --p;
+                        // Need to fetch the previous character's FCD value because
+                        // prevFCD16 was just for the trail surrogate code point.
+                        prevFCD16=getFCD16FromNormData(Character.toCodePoint(s.charAt(p), s.charAt(p+1)));
+                        // Still known to have lccc==0 because its lead surrogate unit had lccc==0.
+                    }
+                    if(prevFCD16>1) {
+                        prevBoundary=p;
+                    }
+                }
+                if(buffer!=null) {
+                    // The last lccc==0 character is excluded from the
+                    // flush-and-append call in case it needs to be modified.
+                    buffer.flushAndAppendZeroCC(s, prevSrc, prevBoundary);
+                    buffer.append(s, prevBoundary, src);
+                }
+                // The start of the current character (c).
+                prevSrc=src;
+            } else if(src==limit) {
+                break;
+            }
+
+            src+=Character.charCount(c);
+            // The current character (c) at [prevSrc..src[ has a non-zero lead combining class.
+            // Check for proper order, and decompose locally if necessary.
+            if((prevFCD16&0xff)<=(fcd16>>8)) {
+                // proper order: prev tccc <= current lccc
+                if((fcd16&0xff)<=1) {
+                    prevBoundary=src;
+                }
+                if(buffer!=null) {
+                    buffer.appendZeroCC(c);
+                }
+                prevFCD16=fcd16;
+                continue;
+            } else if(buffer==null) {
+                return prevBoundary;  // quick check "no"
+            } else {
+                /*
+                 * Back out the part of the source that we copied or appended
+                 * already but is now going to be decomposed.
+                 * prevSrc is set to after what was copied/appended.
+                 */
+                buffer.removeSuffix(prevSrc-prevBoundary);
+                /*
+                 * Find the part of the source that needs to be decomposed,
+                 * up to the next safe boundary.
+                 */
+                src=findNextFCDBoundary(s, src, limit);
+                /*
+                 * The source text does not fulfill the conditions for FCD.
+                 * Decompose and reorder a limited piece of the text.
+                 */
+                decomposeShort(s, prevBoundary, src, false, false, buffer);
+                prevBoundary=src;
+                prevFCD16=0;
+            }
+        }
+        return src;
+    }
+
+    public boolean hasDecompBoundaryBefore(int c) {
+        return c < minLcccCP || (c <= 0xffff && !singleLeadMightHaveNonZeroFCD16(c)) ||
+            norm16HasDecompBoundaryBefore(getNorm16(c));
+    }
+    public boolean norm16HasDecompBoundaryBefore(int norm16) {
+        if (norm16 < minNoNoCompNoMaybeCC) {
+            return true;
+        }
+        if (norm16 >= limitNoNo) {
+            return norm16 <= MIN_NORMAL_MAYBE_YES || norm16 == JAMO_VT;
+        }
+        // c decomposes, get everything from the variable-length extra data
+        int mapping=norm16>>OFFSET_SHIFT;
+        int firstUnit=extraData.charAt(mapping);
+        // true if leadCC==0 (hasFCDBoundaryBefore())
+        return (firstUnit&MAPPING_HAS_CCC_LCCC_WORD)==0 || (extraData.charAt(mapping-1)&0xff00)==0;
+    }
+    public boolean hasDecompBoundaryAfter(int c) {
+        if (c < minDecompNoCP) {
+            return true;
+        }
+        if (c <= 0xffff && !singleLeadMightHaveNonZeroFCD16(c)) {
+            return true;
+        }
+        return norm16HasDecompBoundaryAfter(getNorm16(c));
+    }
+    public boolean norm16HasDecompBoundaryAfter(int norm16) {
+        if(norm16 <= minYesNo || isHangulLVT(norm16)) {
+            return true;
+        }
+        if (norm16 >= limitNoNo) {
+            if (isMaybeOrNonZeroCC(norm16)) {
+                return norm16 <= MIN_NORMAL_MAYBE_YES || norm16 == JAMO_VT;
+            }
+            // Maps to an isCompYesAndZeroCC.
+            return (norm16 & DELTA_TCCC_MASK) <= DELTA_TCCC_1;
+        }
+        // c decomposes, get everything from the variable-length extra data
+        int mapping=norm16>>OFFSET_SHIFT;
+        int firstUnit=extraData.charAt(mapping);
+        // decomp after-boundary: same as hasFCDBoundaryAfter(),
+        // fcd16<=1 || trailCC==0
+        if(firstUnit>0x1ff) {
+            return false;  // trailCC>1
+        }
+        if(firstUnit<=0xff) {
+            return true;  // trailCC==0
+        }
+        // if(trailCC==1) test leadCC==0, same as checking for before-boundary
+        // true if leadCC==0 (hasFCDBoundaryBefore())
+        return (firstUnit&MAPPING_HAS_CCC_LCCC_WORD)==0 || (extraData.charAt(mapping-1)&0xff00)==0;
+    }
+    public boolean isDecompInert(int c) { return isDecompYesAndZeroCC(getNorm16(c)); }
+
+    public boolean hasCompBoundaryBefore(int c) {
+        return c<minCompNoMaybeCP || norm16HasCompBoundaryBefore(getNorm16(c));
+    }
+    public boolean hasCompBoundaryAfter(int c, boolean onlyContiguous) {
+        return norm16HasCompBoundaryAfter(getNorm16(c), onlyContiguous);
+    }
+
+    private boolean isMaybe(int norm16) { return minMaybeYes<=norm16 && norm16<=JAMO_VT; }
+    private boolean isMaybeOrNonZeroCC(int norm16) { return norm16>=minMaybeYes; }
+    private static boolean isInert(int norm16) { return norm16==INERT; }
+    private static boolean isJamoVT(int norm16) { return norm16==JAMO_VT; }
+    private int hangulLVT() { return minYesNoMappingsOnly|HAS_COMP_BOUNDARY_AFTER; }
+    private boolean isHangulLV(int norm16) { return norm16==minYesNo; }
+    private boolean isHangulLVT(int norm16) {
+        return norm16==hangulLVT();
+    }
+    private boolean isCompYesAndZeroCC(int norm16) { return norm16<minNoNo; }
+    // UBool isCompYes(uint16_t norm16) const {
+    //     return norm16>=MIN_YES_YES_WITH_CC || norm16<minNoNo;
+    // }
+    // UBool isCompYesOrMaybe(uint16_t norm16) const {
+    //     return norm16<minNoNo || minMaybeYes<=norm16;
+    // }
+    // private boolean hasZeroCCFromDecompYes(int norm16) {
+    //     return norm16<=MIN_NORMAL_MAYBE_YES || norm16==JAMO_VT;
+    // }
+    private boolean isDecompYesAndZeroCC(int norm16) {
+        return norm16<minYesNo ||
+               norm16==JAMO_VT ||
+               (minMaybeYes<=norm16 && norm16<=MIN_NORMAL_MAYBE_YES);
+    }
+    /**
+     * A little faster and simpler than isDecompYesAndZeroCC() but does not include
+     * the MaybeYes which combine-forward and have ccc=0.
+     * (Standard Unicode 10 normalization does not have such characters.)
+     */
+    private boolean isMostDecompYesAndZeroCC(int norm16) {
+        return norm16<minYesNo || norm16==MIN_NORMAL_MAYBE_YES || norm16==JAMO_VT;
+    }
+    private boolean isDecompNoAlgorithmic(int norm16) { return norm16>=limitNoNo; }
+
+    // For use with isCompYes().
+    // Perhaps the compiler can combine the two tests for MIN_YES_YES_WITH_CC.
+    // static uint8_t getCCFromYes(uint16_t norm16) {
+    //     return norm16>=MIN_YES_YES_WITH_CC ? getCCFromNormalYesOrMaybe(norm16) : 0;
+    // }
+    private int getCCFromNoNo(int norm16) {
+        int mapping=norm16>>OFFSET_SHIFT;
+        if((extraData.charAt(mapping)&MAPPING_HAS_CCC_LCCC_WORD)!=0) {
+            return extraData.charAt(mapping-1)&0xff;
+        } else {
+            return 0;
+        }
+    }
+    int getTrailCCFromCompYesAndZeroCC(int norm16) {
+        if(norm16<=minYesNo) {
+            return 0;  // yesYes and Hangul LV have ccc=tccc=0
+        } else {
+            // For Hangul LVT we harmlessly fetch a firstUnit with tccc=0 here.
+            return extraData.charAt(norm16>>OFFSET_SHIFT)>>8;  // tccc from yesNo
+        }
+    }
+
+    // Requires algorithmic-NoNo.
+    private int mapAlgorithmic(int c, int norm16) {
+        return c+(norm16>>DELTA_SHIFT)-centerNoNoDelta;
+    }
+
+    // Requires minYesNo<norm16<limitNoNo.
+    // private int getMapping(int norm16) { return extraData+(norm16>>OFFSET_SHIFT); }
+
+    /**
+     * @return index into maybeYesCompositions, or -1
+     */
+    private int getCompositionsListForDecompYes(int norm16) {
+        if(norm16<JAMO_L || MIN_NORMAL_MAYBE_YES<=norm16) {
+            return -1;
+        } else {
+            if((norm16-=minMaybeYes)<0) {
+                // norm16<minMaybeYes: index into extraData which is a substring at
+                //     maybeYesCompositions[MIN_NORMAL_MAYBE_YES-minMaybeYes]
+                // same as (MIN_NORMAL_MAYBE_YES-minMaybeYes)+norm16
+                norm16+=MIN_NORMAL_MAYBE_YES;  // for yesYes; if Jamo L: harmless empty list
+            }
+            return norm16>>OFFSET_SHIFT;
+        }
+    }
+    /**
+     * @return index into maybeYesCompositions
+     */
+    private int getCompositionsListForComposite(int norm16) {
+        // A composite has both mapping & compositions list.
+        int list=((MIN_NORMAL_MAYBE_YES-minMaybeYes)+norm16)>>OFFSET_SHIFT;
+        int firstUnit=maybeYesCompositions.charAt(list);
+        return list+  // mapping in maybeYesCompositions
+            1+  // +1 to skip the first unit with the mapping length
+            (firstUnit&MAPPING_LENGTH_MASK);  // + mapping length
+    }
+
+    // Decompose a short piece of text which is likely to contain characters that
+    // fail the quick check loop and/or where the quick check loop's overhead
+    // is unlikely to be amortized.
+    // Called by the compose() and makeFCD() implementations.
+    // Public in Java for collation implementation code.
+    private int decomposeShort(
+            CharSequence s, int src, int limit,
+            boolean stopAtCompBoundary, boolean onlyContiguous,
+            ReorderingBuffer buffer) {
+        while(src<limit) {
+            int c=Character.codePointAt(s, src);
+            if (stopAtCompBoundary && c < minCompNoMaybeCP) {
+                return src;
+            }
+            int norm16 = getNorm16(c);
+            if (stopAtCompBoundary && norm16HasCompBoundaryBefore(norm16)) {
+                return src;
+            }
+            src+=Character.charCount(c);
+            decompose(c, norm16, buffer);
+            if (stopAtCompBoundary && norm16HasCompBoundaryAfter(norm16, onlyContiguous)) {
+                return src;
+            }
+        }
+        return src;
+    }
+    private void decompose(int c, int norm16, ReorderingBuffer buffer) {
+        // get the decomposition and the lead and trail cc's
+        if (norm16 >= limitNoNo) {
+            if (isMaybeOrNonZeroCC(norm16)) {
+                buffer.append(c, getCCFromYesOrMaybe(norm16));
+                return;
+            }
+            // Maps to an isCompYesAndZeroCC.
+            c=mapAlgorithmic(c, norm16);
+            norm16=getRawNorm16(c);
+        }
+        if (norm16 < minYesNo) {
+            // c does not decompose
+            buffer.append(c, 0);
+        } else if(isHangulLV(norm16) || isHangulLVT(norm16)) {
+            // Hangul syllable: decompose algorithmically
+            Hangul.decompose(c, buffer);
+        } else {
+            // c decomposes, get everything from the variable-length extra data
+            int mapping=norm16>>OFFSET_SHIFT;
+            int firstUnit=extraData.charAt(mapping);
+            int length=firstUnit&MAPPING_LENGTH_MASK;
+            int leadCC, trailCC;
+            trailCC=firstUnit>>8;
+            if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD)!=0) {
+                leadCC=extraData.charAt(mapping-1)>>8;
+            } else {
+                leadCC=0;
+            }
+            ++mapping;  // skip over the firstUnit
+            buffer.append(extraData, mapping, mapping+length, true, leadCC, trailCC);
+        }
+    }
+
+    /**
+     * Finds the recomposition result for
+     * a forward-combining "lead" character,
+     * specified with a pointer to its compositions list,
+     * and a backward-combining "trail" character.
+     *
+     * <p>If the lead and trail characters combine, then this function returns
+     * the following "compositeAndFwd" value:
+     * <pre>
+     * Bits 21..1  composite character
+     * Bit      0  set if the composite is a forward-combining starter
+     * </pre>
+     * otherwise it returns -1.
+     *
+     * <p>The compositions list has (trail, compositeAndFwd) pair entries,
+     * encoded as either pairs or triples of 16-bit units.
+     * The last entry has the high bit of its first unit set.
+     *
+     * <p>The list is sorted by ascending trail characters (there are no duplicates).
+     * A linear search is used.
+     *
+     * <p>See normalizer2impl.h for a more detailed description
+     * of the compositions list format.
+     */
+    private static int combine(String compositions, int list, int trail) {
+        int key1, firstUnit;
+        if(trail<COMP_1_TRAIL_LIMIT) {
+            // trail character is 0..33FF
+            // result entry may have 2 or 3 units
+            key1=(trail<<1);
+            while(key1>(firstUnit=compositions.charAt(list))) {
+                list+=2+(firstUnit&COMP_1_TRIPLE);
+            }
+            if(key1==(firstUnit&COMP_1_TRAIL_MASK)) {
+                if((firstUnit&COMP_1_TRIPLE)!=0) {
+                    return (compositions.charAt(list+1)<<16)|compositions.charAt(list+2);
+                } else {
+                    return compositions.charAt(list+1);
+                }
+            }
+        } else {
+            // trail character is 3400..10FFFF
+            // result entry has 3 units
+            key1=COMP_1_TRAIL_LIMIT+(((trail>>COMP_1_TRAIL_SHIFT))&~COMP_1_TRIPLE);
+            int key2=(trail<<COMP_2_TRAIL_SHIFT)&0xffff;
+            int secondUnit;
+            for(;;) {
+                if(key1>(firstUnit=compositions.charAt(list))) {
+                    list+=2+(firstUnit&COMP_1_TRIPLE);
+                } else if(key1==(firstUnit&COMP_1_TRAIL_MASK)) {
+                    if(key2>(secondUnit=compositions.charAt(list+1))) {
+                        if((firstUnit&COMP_1_LAST_TUPLE)!=0) {
+                            break;
+                        } else {
+                            list+=3;
+                        }
+                    } else if(key2==(secondUnit&COMP_2_TRAIL_MASK)) {
+                        return ((secondUnit&~COMP_2_TRAIL_MASK)<<16)|compositions.charAt(list+2);
+                    } else {
+                        break;
+                    }
+                } else {
+                    break;
+                }
+            }
+        }
+        return -1;
+    }
+
+    /*
+     * Recomposes the buffer text starting at recomposeStartIndex
+     * (which is in NFD - decomposed and canonically ordered),
+     * and truncates the buffer contents.
+     *
+     * Note that recomposition never lengthens the text:
+     * Any character consists of either one or two code units;
+     * a composition may contain at most one more code unit than the original starter,
+     * while the combining mark that is removed has at least one code unit.
+     */
+    private void recompose(ReorderingBuffer buffer, int recomposeStartIndex,
+                           boolean onlyContiguous) {
+        StringBuilder sb=buffer.getStringBuilder();
+        int p=recomposeStartIndex;
+        if(p==sb.length()) {
+            return;
+        }
+
+        int starter, pRemove;
+        int compositionsList;
+        int c, compositeAndFwd;
+        int norm16;
+        int cc, prevCC;
+        boolean starterIsSupplementary;
+
+        // Some of the following variables are not used until we have a forward-combining starter
+        // and are only initialized now to avoid compiler warnings.
+        compositionsList=-1;  // used as indicator for whether we have a forward-combining starter
+        starter=-1;
+        starterIsSupplementary=false;
+        prevCC=0;
+
+        for(;;) {
+            c=sb.codePointAt(p);
+            p+=Character.charCount(c);
+            norm16=getNorm16(c);
+            cc=getCCFromYesOrMaybe(norm16);
+            if( // this character combines backward and
+                isMaybe(norm16) &&
+                // we have seen a starter that combines forward and
+                compositionsList>=0 &&
+                // the backward-combining character is not blocked
+                (prevCC<cc || prevCC==0)
+            ) {
+                if(isJamoVT(norm16)) {
+                    // c is a Jamo V/T, see if we can compose it with the previous character.
+                    if(c<Hangul.JAMO_T_BASE) {
+                        // c is a Jamo Vowel, compose with previous Jamo L and following Jamo T.
+                        char prev=(char)(sb.charAt(starter)-Hangul.JAMO_L_BASE);
+                        if(prev<Hangul.JAMO_L_COUNT) {
+                            pRemove=p-1;
+                            char syllable=(char)
+                                (Hangul.HANGUL_BASE+
+                                 (prev*Hangul.JAMO_V_COUNT+(c-Hangul.JAMO_V_BASE))*
+                                 Hangul.JAMO_T_COUNT);
+                            char t;
+                            if(p!=sb.length() && (t=(char)(sb.charAt(p)-Hangul.JAMO_T_BASE))<Hangul.JAMO_T_COUNT) {
+                                ++p;
+                                syllable+=t;  // The next character was a Jamo T.
+                            }
+                            sb.setCharAt(starter, syllable);
+                            // remove the Jamo V/T
+                            sb.delete(pRemove, p);
+                            p=pRemove;
+                        }
+                    }
+                    /*
+                     * No "else" for Jamo T:
+                     * Since the input is in NFD, there are no Hangul LV syllables that
+                     * a Jamo T could combine with.
+                     * All Jamo Ts are combined above when handling Jamo Vs.
+                     */
+                    if(p==sb.length()) {
+                        break;
+                    }
+                    compositionsList=-1;
+                    continue;
+                } else if((compositeAndFwd=combine(maybeYesCompositions, compositionsList, c))>=0) {
+                    // The starter and the combining mark (c) do combine.
+                    int composite=compositeAndFwd>>1;
+
+                    // Remove the combining mark.
+                    pRemove=p-Character.charCount(c);  // pRemove & p: start & limit of the combining mark
+                    sb.delete(pRemove, p);
+                    p=pRemove;
+                    // Replace the starter with the composite.
+                    if(starterIsSupplementary) {
+                        if(composite>0xffff) {
+                            // both are supplementary
+                            sb.setCharAt(starter, UTF16.getLeadSurrogate(composite));
+                            sb.setCharAt(starter+1, UTF16.getTrailSurrogate(composite));
+                        } else {
+                            sb.setCharAt(starter, (char)c);
+                            sb.deleteCharAt(starter+1);
+                            // The composite is shorter than the starter,
+                            // move the intermediate characters forward one.
+                            starterIsSupplementary=false;
+                            --p;
+                        }
+                    } else if(composite>0xffff) {
+                        // The composite is longer than the starter,
+                        // move the intermediate characters back one.
+                        starterIsSupplementary=true;
+                        sb.setCharAt(starter, UTF16.getLeadSurrogate(composite));
+                        sb.insert(starter+1, UTF16.getTrailSurrogate(composite));
+                        ++p;
+                    } else {
+                        // both are on the BMP
+                        sb.setCharAt(starter, (char)composite);
+                    }
+
+                    // Keep prevCC because we removed the combining mark.
+
+                    if(p==sb.length()) {
+                        break;
+                    }
+                    // Is the composite a starter that combines forward?
+                    if((compositeAndFwd&1)!=0) {
+                        compositionsList=
+                            getCompositionsListForComposite(getRawNorm16(composite));
+                    } else {
+                        compositionsList=-1;
+                    }
+
+                    // We combined; continue with looking for compositions.
+                    continue;
+                }
+            }
+
+            // no combination this time
+            prevCC=cc;
+            if(p==sb.length()) {
+                break;
+            }
+
+            // If c did not combine, then check if it is a starter.
+            if(cc==0) {
+                // Found a new starter.
+                if((compositionsList=getCompositionsListForDecompYes(norm16))>=0) {
+                    // It may combine with something, prepare for it.
+                    if(c<=0xffff) {
+                        starterIsSupplementary=false;
+                        starter=p-1;
+                    } else {
+                        starterIsSupplementary=true;
+                        starter=p-2;
+                    }
+                }
+            } else if(onlyContiguous) {
+                // FCC: no discontiguous compositions; any intervening character blocks.
+                compositionsList=-1;
+            }
+        }
+        buffer.flush();
+    }
+
+    /**
+     * Does c have a composition boundary before it?
+     * True if its decomposition begins with a character that has
+     * ccc=0 && NFC_QC=Yes (isCompYesAndZeroCC()).
+     * As a shortcut, this is true if c itself has ccc=0 && NFC_QC=Yes
+     * (isCompYesAndZeroCC()) so we need not decompose.
+     */
+    private boolean hasCompBoundaryBefore(int c, int norm16) {
+        return c<minCompNoMaybeCP || norm16HasCompBoundaryBefore(norm16);
+    }
+    private boolean norm16HasCompBoundaryBefore(int norm16) {
+        return norm16 < minNoNoCompNoMaybeCC || isAlgorithmicNoNo(norm16);
+    }
+    private boolean hasCompBoundaryBefore(CharSequence s, int src, int limit) {
+        return src == limit || hasCompBoundaryBefore(Character.codePointAt(s, src));
+    }
+    private boolean norm16HasCompBoundaryAfter(int norm16, boolean onlyContiguous) {
+        return (norm16 & HAS_COMP_BOUNDARY_AFTER) != 0 &&
+            (!onlyContiguous || isTrailCC01ForCompBoundaryAfter(norm16));
+    }
+    private boolean hasCompBoundaryAfter(CharSequence s, int start, int p, boolean onlyContiguous) {
+        return start == p || hasCompBoundaryAfter(Character.codePointBefore(s, p), onlyContiguous);
+    }
+    /** For FCC: Given norm16 HAS_COMP_BOUNDARY_AFTER, does it have tccc<=1? */
+    private boolean isTrailCC01ForCompBoundaryAfter(int norm16) {
+        return isInert(norm16) || (isDecompNoAlgorithmic(norm16) ?
+            (norm16 & DELTA_TCCC_MASK) <= DELTA_TCCC_1 : extraData.charAt(norm16 >> OFFSET_SHIFT) <= 0x1ff);
+    }
+
+    private int findPreviousCompBoundary(CharSequence s, int p, boolean onlyContiguous) {
+        while(p>0) {
+            int c=Character.codePointBefore(s, p);
+            int norm16 = getNorm16(c);
+            if (norm16HasCompBoundaryAfter(norm16, onlyContiguous)) {
+                break;
+            }
+            p-=Character.charCount(c);
+            if(hasCompBoundaryBefore(c, norm16)) {
+                break;
+            }
+        }
+        return p;
+    }
+    private int findNextCompBoundary(CharSequence s, int p, int limit, boolean onlyContiguous) {
+        while(p<limit) {
+            int c=Character.codePointAt(s, p);
+            int norm16=normTrie.get(c);
+            if(hasCompBoundaryBefore(c, norm16)) {
+                break;
+            }
+            p+=Character.charCount(c);
+            if (norm16HasCompBoundaryAfter(norm16, onlyContiguous)) {
+                break;
+            }
+        }
+        return p;
+    }
+
+
+    private int findNextFCDBoundary(CharSequence s, int p, int limit) {
+        while(p<limit) {
+            int c=Character.codePointAt(s, p);
+            int norm16;
+            if (c < minLcccCP || norm16HasDecompBoundaryBefore(norm16 = getNorm16(c))) {
+                break;
+            }
+            p+=Character.charCount(c);
+            if (norm16HasDecompBoundaryAfter(norm16)) {
+                break;
+            }
+        }
+        return p;
+    }
+
+    /**
+     * Get the canonical decomposition
+     * sherman  for ComposedCharIter
+     */
+    public static int getDecompose(int chars[], String decomps[]) {
+        Normalizer2 impl = Normalizer2.getNFDInstance();
+
+        int length=0;
+        int norm16 = 0;
+        int ch = -1;
+        int i = 0;
+
+        while (++ch < 0x2fa1e) {   //no cannoical above 0x3ffff
+            //TBD !!!! the hack code heres save us about 50ms for startup
+            //need a better solution/lookup
+            if (ch == 0x30ff)
+                ch = 0xf900;
+            else if (ch == 0x115bc)
+                ch = 0x1d15e;
+            else if (ch == 0x1d1c1)
+                ch = 0x2f800;
+
+            String s = impl.getDecomposition(ch);
+
+            if(s != null && i < chars.length) {
+                chars[i] = ch;
+                decomps[i++] = s;
+            }
+        }
+        return i;
+    }
+
+    //------------------------------------------------------
+    // special method for Collation (RBTableBuilder.build())
+    //------------------------------------------------------
+    private static boolean needSingleQuotation(char c) {
+        return (c >= 0x0009 && c <= 0x000D) ||
+               (c >= 0x0020 && c <= 0x002F) ||
+               (c >= 0x003A && c <= 0x0040) ||
+               (c >= 0x005B && c <= 0x0060) ||
+               (c >= 0x007B && c <= 0x007E);
+    }
+
+    public static String canonicalDecomposeWithSingleQuotation(String string) {
+       Normalizer2 impl = Normalizer2.getNFDInstance();
+       char[] src = string.toCharArray();
+       int    srcIndex = 0;
+       int    srcLimit = src.length;
+       char[] dest = new char[src.length * 3];  //MAX_BUF_SIZE_DECOMPOSE = 3
+       int    destIndex = 0;
+       int    destLimit = dest.length;
+
+        int prevSrc;
+        String norm;
+        int reorderStartIndex, length;
+        char c1, c2;
+        int cp;
+        int minNoMaybe = 0x00c0;
+        int cc, prevCC, trailCC;
+        char[] p;
+        int pStart;
+
+        // initialize
+        reorderStartIndex = 0;
+        prevCC = 0;
+        norm = null;
+        cp = 0;
+        pStart = 0;
+
+        cc = trailCC = -1; // initialize to bogus value
+        c1 = 0;
+        for (;;) {
+            prevSrc=srcIndex;
+            //quick check (1)less than minNoMaybe (2)no decomp (3)hangual
+            while (srcIndex != srcLimit &&
+                   ((c1 = src[srcIndex]) < minNoMaybe ||
+                    (norm = impl.getDecomposition(cp = string.codePointAt(srcIndex))) == null ||
+                    (c1 >= '\uac00' && c1 <= '\ud7a3'))) { // Hangul Syllables
+                prevCC = 0;
+                srcIndex += (cp < 0x10000) ? 1 : 2;
+            }
+
+            // copy these code units all at once
+            if (srcIndex != prevSrc) {
+                length = srcIndex - prevSrc;
+                if ((destIndex + length) <= destLimit) {
+                    System.arraycopy(src,prevSrc,dest,destIndex,length);
+                }
+
+                destIndex += length;
+                reorderStartIndex = destIndex;
+            }
+
+            // end of source reached?
+            if (srcIndex == srcLimit) {
+                break;
+            }
+
+            // cp already contains *src and norm32 is set for it, increment src
+            srcIndex += (cp < 0x10000) ? 1 : 2;
+
+            if (cp < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
+                c2 = 0;
+                length = 1;
+
+                if (Character.isHighSurrogate(c1)
+                    || Character.isLowSurrogate(c1)) {
+                    norm = null;
+                }
+            } else {
+                length = 2;
+                c2 = src[srcIndex-1];
+            }
+
+          // get the decomposition and the lead and trail cc's
+          if (norm == null) {
+              // cp does not decompose
+              cc = trailCC = UCharacter.getCombiningClass(cp);
+              p = null;
+              pStart = -1;
+          } else {
+
+                pStart = 0;
+                p = norm.toCharArray();
+                length = p.length;
+                int cpNum = norm.codePointCount(0, length);
+                cc= UCharacter.getCombiningClass(norm.codePointAt(0));
+                trailCC= UCharacter.getCombiningClass(norm.codePointAt(cpNum-1));
+                if (length == 1) {
+                    // fastpath a single code unit from decomposition
+                    c1 = p[pStart];
+                    c2 = 0;
+                    p = null;
+                    pStart = -1;
+                }
+            }
+
+            if((destIndex + length * 3) >= destLimit) {  // 2 SingleQuotations
+                // buffer overflow
+                char[] tmpBuf = new char[destLimit * 2];
+                System.arraycopy(dest, 0, tmpBuf, 0, destIndex);
+                dest = tmpBuf;
+                destLimit = dest.length;
+            }
+
+            // append the decomposition to the destination buffer, assume length>0
+            {
+                int reorderSplit = destIndex;
+                if (p == null) {
+                    // fastpath: single code point
+                    if (needSingleQuotation(c1)) {
+                        //if we need single quotation, no need to consider "prevCC"
+                        //and it must NOT be a supplementary pair
+                        dest[destIndex++] = '\'';
+                        dest[destIndex++] = c1;
+                        dest[destIndex++] = '\'';
+                        trailCC = 0;
+                    } else if(cc != 0 && cc < prevCC) {
+                        // (c1, c2) is out of order with respect to the preceding
+                        //  text
+                        destIndex += length;
+                        trailCC = insertOrdered(dest, reorderStartIndex,
+                                                reorderSplit, destIndex, c1, c2, cc);
+                    } else {
+                        // just append (c1, c2)
+                        dest[destIndex++] = c1;
+                        if(c2 != 0) {
+                            dest[destIndex++] = c2;
+                        }
+                    }
+                } else {
+                    // general: multiple code points (ordered by themselves)
+                    // from decomposition
+                    if (needSingleQuotation(p[pStart])) {
+                        dest[destIndex++] = '\'';
+                        dest[destIndex++] = p[pStart++];
+                        dest[destIndex++] = '\'';
+                        length--;
+                        do {
+                            dest[destIndex++] = p[pStart++];
+                        } while(--length > 0);
+                    } else if (cc != 0 && cc < prevCC) {
+                        destIndex += length;
+                        trailCC = mergeOrdered(dest, reorderStartIndex,
+                                               reorderSplit, p, pStart,
+                                               pStart+length);
+                    } else {
+                        // just append the decomposition
+                        do {
+                            dest[destIndex++] = p[pStart++];
+                        } while (--length > 0);
+                    }
+                }
+            }
+            prevCC = trailCC;
+            if(prevCC == 0) {
+                reorderStartIndex = destIndex;
+            }
+        }
+
+        return new String(dest, 0, destIndex);
+    }
+
+    /**
+     * simpler, single-character version of mergeOrdered() -
+     * bubble-insert one single code point into the preceding string
+     * which is already canonically ordered
+     * (c, c2) may or may not yet have been inserted at src[current]..src[p]
+     *
+     * it must be p=current+lengthof(c, c2) i.e. p=current+(c2==0 ? 1 : 2)
+     *
+     * before: src[start]..src[current] is already ordered, and
+     *         src[current]..src[p]     may or may not hold (c, c2) but
+     *                          must be exactly the same length as (c, c2)
+     * after: src[start]..src[p] is ordered
+     *
+     * @return the trailing combining class
+     */
+    private static int/*unsigned byte*/ insertOrdered(char[] source,
+                                                      int start,
+                                                      int current, int p,
+                                                      char c1, char c2,
+                                                      int/*unsigned byte*/ cc) {
+        int back, preBack;
+        int r;
+        int prevCC, trailCC=cc;
+
+        if (start<current && cc!=0) {
+            // search for the insertion point where cc>=prevCC
+            preBack=back=current;
+
+            PrevArgs prevArgs = new PrevArgs();
+            prevArgs.current  = current;
+            prevArgs.start    = start;
+            prevArgs.src      = source;
+            prevArgs.c1       = c1;
+            prevArgs.c2       = c2;
+
+            // get the prevCC
+            prevCC=getPrevCC(prevArgs);
+            preBack = prevArgs.current;
+
+            if(cc<prevCC) {
+                // this will be the last code point, so keep its cc
+                trailCC=prevCC;
+                back=preBack;
+                while(start<preBack) {
+                    prevCC=getPrevCC(prevArgs);
+                    preBack=prevArgs.current;
+                    if(cc>=prevCC) {
+                        break;
+                    }
+                    back=preBack;
+                }
+
+                // this is where we are right now with all these indicies:
+                // [start]..[pPreBack] 0..? code points that we can ignore
+                // [pPreBack]..[pBack] 0..1 code points with prevCC<=cc
+                // [pBack]..[current] 0..n code points with >cc, move up to insert (c, c2)
+                // [current]..[p]         1 code point (c, c2) with cc
+
+                // move the code units in between up
+                r=p;
+                do {
+                    source[--r]=source[--current];
+                } while (back!=current);
+            }
+        }
+
+        // insert (c1, c2)
+        source[current] = c1;
+        if (c2!=0) {
+            source[(current+1)] = c2;
+        }
+
+        // we know the cc of the last code point
+        return trailCC;
+    }
+    /**
+     * merge two UTF-16 string parts together
+     * to canonically order (order by combining classes) their concatenation
+     *
+     * the two strings may already be adjacent, so that the merging is done
+     * in-place if the two strings are not adjacent, then the buffer holding the
+     * first one must be large enough
+     * the second string may or may not be ordered in itself
+     *
+     * before: [start]..[current] is already ordered, and
+     *         [next]..[limit]    may be ordered in itself, but
+     *                          is not in relation to [start..current[
+     * after: [start..current+(limit-next)[ is ordered
+     *
+     * the algorithm is a simple bubble-sort that takes the characters from
+     * src[next++] and inserts them in correct combining class order into the
+     * preceding part of the string
+     *
+     * since this function is called much less often than the single-code point
+     * insertOrdered(), it just uses that for easier maintenance
+     *
+     * @return the trailing combining class
+     */
+    private static int /*unsigned byte*/ mergeOrdered(char[] source,
+                                                      int start,
+                                                      int current,
+                                                      char[] data,
+                                                        int next,
+                                                        int limit) {
+            int r;
+            int /*unsigned byte*/ cc, trailCC=0;
+            boolean adjacent;
+
+            adjacent= current==next;
+            NextCCArgs ncArgs = new NextCCArgs();
+            ncArgs.source = data;
+            ncArgs.next   = next;
+            ncArgs.limit  = limit;
+
+            if(start!=current) {
+
+                while(ncArgs.next<ncArgs.limit) {
+                    cc=getNextCC(ncArgs);
+                    if(cc==0) {
+                        // does not bubble back
+                        trailCC=0;
+                        if(adjacent) {
+                            current=ncArgs.next;
+                        } else {
+                            data[current++]=ncArgs.c1;
+                            if(ncArgs.c2!=0) {
+                                data[current++]=ncArgs.c2;
+                            }
+                        }
+                        break;
+                    } else {
+                        r=current+(ncArgs.c2==0 ? 1 : 2);
+                        trailCC=insertOrdered(source,start, current, r,
+                                              ncArgs.c1, ncArgs.c2, cc);
+                        current=r;
+                    }
+                }
+            }
+
+            if(ncArgs.next==ncArgs.limit) {
+                // we know the cc of the last code point
+                return trailCC;
+            } else {
+                if(!adjacent) {
+                    // copy the second string part
+                    do {
+                        source[current++]=data[ncArgs.next++];
+                    } while(ncArgs.next!=ncArgs.limit);
+                    ncArgs.limit=current;
+                }
+                PrevArgs prevArgs = new PrevArgs();
+                prevArgs.src   = data;
+                prevArgs.start = start;
+                prevArgs.current =  ncArgs.limit;
+                return getPrevCC(prevArgs);
+            }
+
+    }
+    private static final class PrevArgs{
+        char[] src;
+        int start;
+        int current;
+        char c1;
+        char c2;
+    }
+
+    private static final class NextCCArgs{
+        char[] source;
+        int next;
+        int limit;
+        char c1;
+        char c2;
+    }
+    private static int /*unsigned byte*/ getNextCC(NextCCArgs args) {
+        args.c1=args.source[args.next++];
+        args.c2=0;
+
+        if (UTF16.isTrailSurrogate(args.c1)) {
+            /* unpaired second surrogate */
+            return 0;
+        } else if (!UTF16.isLeadSurrogate(args.c1)) {
+            return UCharacter.getCombiningClass(args.c1);
+        } else if (args.next!=args.limit &&
+                        UTF16.isTrailSurrogate(args.c2=args.source[args.next])){
+            ++args.next;
+            return UCharacter.getCombiningClass(Character.toCodePoint(args.c1, args.c2));
+        } else {
+            /* unpaired first surrogate */
+            args.c2=0;
+            return 0;
+        }
+    }
+    private static int /*unsigned*/ getPrevCC(PrevArgs args) {
+        args.c1=args.src[--args.current];
+        args.c2=0;
+
+        if (args.c1 < MIN_CCC_LCCC_CP) {
+            return 0;
+        } else if (UTF16.isLeadSurrogate(args.c1)) {
+            /* unpaired first surrogate */
+            return 0;
+        } else if (!UTF16.isTrailSurrogate(args.c1)) {
+            return UCharacter.getCombiningClass(args.c1);
+        } else if (args.current!=args.start &&
+                    UTF16.isLeadSurrogate(args.c2=args.src[args.current-1])) {
+            --args.current;
+            return UCharacter.getCombiningClass(Character.toCodePoint(args.c2, args.c1));
+        } else {
+            /* unpaired second surrogate */
+            args.c2=0;
+            return 0;
+        }
+    }
+
+    private int getPreviousTrailCC(CharSequence s, int start, int p) {
+        if (start == p) {
+            return 0;
+        }
+        return getFCD16(Character.codePointBefore(s, p));
+    }
+
+    private VersionInfo dataVersion;
+
+    // BMP code point thresholds for quick check loops looking at single UTF-16 code units.
+    private int minDecompNoCP;
+    private int minCompNoMaybeCP;
+    private int minLcccCP;
+
+    // Norm16 value thresholds for quick check combinations and types of extra data.
+    private int minYesNo;
+    private int minYesNoMappingsOnly;
+    private int minNoNo;
+    private int minNoNoCompBoundaryBefore;
+    private int minNoNoCompNoMaybeCC;
+    private int minNoNoEmpty;
+    private int limitNoNo;
+    private int centerNoNoDelta;
+    private int minMaybeYes;
+
+    private CodePointTrie.Fast16 normTrie;
+    private String maybeYesCompositions;
+    private String extraData;  // mappings and/or compositions for yesYes, yesNo & noNo characters
+    private byte[] smallFCD;  // [0x100] one bit per 32 BMP code points, set if any FCD!=0
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/java.base/share/classes/jdk/internal/icu/impl/Punycode.java	Mon Jan 13 08:05:59 2020 -0800
@@ -0,0 +1,512 @@
+/*
+ * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+/*
+ *******************************************************************************
+ * Copyright (C) 2003-2004, International Business Machines Corporation and    *
+ * others. All Rights Reserved.                                                *
+ *******************************************************************************
+ */
+//
+// CHANGELOG
+//      2005-05-19 Edward Wang
+//          - copy this file from icu4jsrc_3_2/src/com/ibm/icu/text/Punycode.java
+//          - move from package com.ibm.icu.text to package sun.net.idn
+//          - use ParseException instead of StringPrepParseException
+//      2007-08-14 Martin Buchholz
+//          - remove redundant casts
+//
+package jdk.internal.icu.impl;
+
+import java.text.ParseException;
+import jdk.internal.icu.lang.UCharacter;
+import jdk.internal.icu.text.UTF16;
+
+/**
+ * Ported code from ICU punycode.c
+ * @author ram
+ */
+
+/* Package Private class */
+public final class Punycode {
+
+    /* Punycode parameters for Bootstring */
+    private static final int BASE           = 36;
+    private static final int TMIN           = 1;
+    private static final int TMAX           = 26;
+    private static final int SKEW           = 38;
+    private static final int DAMP           = 700;
+    private static final int INITIAL_BIAS   = 72;
+    private static final int INITIAL_N      = 0x80;
+
+    /* "Basic" Unicode/ASCII code points */
+    private static final int HYPHEN         = 0x2d;
+    private static final int DELIMITER      = HYPHEN;
+
+    private static final int ZERO           = 0x30;
+    private static final int NINE           = 0x39;
+
+    private static final int SMALL_A        = 0x61;
+    private static final int SMALL_Z        = 0x7a;
+
+    private static final int CAPITAL_A      = 0x41;
+    private static final int CAPITAL_Z      = 0x5a;
+
+    //  TODO: eliminate the 256 limitation
+    private static final int MAX_CP_COUNT   = 256;
+
+    private static final int UINT_MAGIC     = 0x80000000;
+    private static final long ULONG_MAGIC   = 0x8000000000000000L;
+
+    private static int adaptBias(int delta, int length, boolean firstTime){
+        if(firstTime){
+            delta /=DAMP;
+        }else{
+            delta /=  2;
+        }
+        delta += delta/length;
+
+        int count=0;
+        for(; delta>((BASE-TMIN)*TMAX)/2; count+=BASE) {
+            delta/=(BASE-TMIN);
+        }
+
+        return count+(((BASE-TMIN+1)*delta)/(delta+SKEW));
+    }
+
+    /**
+     * basicToDigit[] contains the numeric value of a basic code
+     * point (for use in representing integers) in the range 0 to
+     * BASE-1, or -1 if b is does not represent a value.
+     */
+    static final int[]    basicToDigit= new int[]{
+        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+
+        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+        26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1, -1,
+
+        -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
+        15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
+
+        -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
+        15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
+
+        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+
+        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+
+        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+
+        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
+    };
+
+    private static char asciiCaseMap(char b, boolean uppercase) {
+        if(uppercase) {
+            if(SMALL_A<=b && b<=SMALL_Z) {
+                b-=(SMALL_A-CAPITAL_A);
+            }
+        } else {
+            if(CAPITAL_A<=b && b<=CAPITAL_Z) {
+                b+=(SMALL_A-CAPITAL_A);
+            }
+        }
+        return b;
+    }
+
+    /**
+     * digitToBasic() returns the basic code point whose value
+     * (when used for representing integers) is d, which must be in the
+     * range 0 to BASE-1. The lowercase form is used unless the uppercase flag is
+     * nonzero, in which case the uppercase form is used.
+     */
+    private static char digitToBasic(int digit, boolean uppercase) {
+        /*  0..25 map to ASCII a..z or A..Z */
+        /* 26..35 map to ASCII 0..9         */
+        if(digit<26) {
+            if(uppercase) {
+                return (char)(CAPITAL_A+digit);
+            } else {
+                return (char)(SMALL_A+digit);
+            }
+        } else {
+            return (char)((ZERO-26)+digit);
+        }
+    }
+    /**
+     * Converts Unicode to Punycode.
+     * The input string must not contain single, unpaired surrogates.
+     * The output will be represented as an array of ASCII code points.
+     *
+     * @param src
+     * @param caseFlags
+     * @return
+     * @throws ParseException
+     */
+    public static StringBuffer encode(StringBuffer src, boolean[] caseFlags) throws ParseException{
+
+        int[] cpBuffer = new int[MAX_CP_COUNT];
+        int n, delta, handledCPCount, basicLength, destLength, bias, j, m, q, k, t, srcCPCount;
+        char c, c2;
+        int srcLength = src.length();
+        int destCapacity = MAX_CP_COUNT;
+        char[] dest = new char[destCapacity];
+        StringBuffer result = new StringBuffer();
+        /*
+         * Handle the basic code points and
+         * convert extended ones to UTF-32 in cpBuffer (caseFlag in sign bit):
+         */
+        srcCPCount=destLength=0;
+
+        for(j=0; j<srcLength; ++j) {
+            if(srcCPCount==MAX_CP_COUNT) {
+                /* too many input code points */
+                throw new IndexOutOfBoundsException();
+            }
+            c=src.charAt(j);
+            if(isBasic(c)) {
+                if(destLength<destCapacity) {
+                    cpBuffer[srcCPCount++]=0;
+                    dest[destLength]=
+                        caseFlags!=null ?
+                            asciiCaseMap(c, caseFlags[j]) :
+                            c;
+                }
+                ++destLength;
+            } else {
+                n=((caseFlags!=null && caseFlags[j])? 1 : 0)<<31L;
+                if(!UTF16.isSurrogate(c)) {
+                    n|=c;
+                } else if(UTF16.isLeadSurrogate(c) && (j+1)<srcLength && UTF16.isTrailSurrogate(c2=src.charAt(j+1))) {
+                    ++j;
+
+                    n|=UCharacter.getCodePoint(c, c2);
+                } else {
+                    /* error: unmatched surrogate */
+                    throw new ParseException("Illegal char found", -1);
+                }
+                cpBuffer[srcCPCount++]=n;
+            }
+        }
+
+        /* Finish the basic string - if it is not empty - with a delimiter. */
+        basicLength=destLength;
+        if(basicLength>0) {
+            if(destLength<destCapacity) {
+                dest[destLength]=DELIMITER;
+            }
+            ++destLength;
+        }
+
+        /*
+         * handledCPCount is the number of code points that have been handled
+         * basicLength is the number of basic code points
+         * destLength is the number of chars that have been output
+         */
+
+        /* Initialize the state: */
+        n=INITIAL_N;
+        delta=0;
+        bias=INITIAL_BIAS;
+
+        /* Main encoding loop: */
+        for(handledCPCount=basicLength; handledCPCount<srcCPCount; /* no op */) {
+            /*
+             * All non-basic code points < n have been handled already.
+             * Find the next larger one:
+             */
+            for(m=0x7fffffff, j=0; j<srcCPCount; ++j) {
+                q=cpBuffer[j]&0x7fffffff; /* remove case flag from the sign bit */
+                if(n<=q && q<m) {
+                    m=q;
+                }
+            }
+
+            /*
+             * Increase delta enough to advance the decoder's
+             * <n,i> state to <m,0>, but guard against overflow:
+             */
+            if(m-n>(0x7fffffff-MAX_CP_COUNT-delta)/(handledCPCount+1)) {
+                throw new RuntimeException("Internal program error");
+            }
+            delta+=(m-n)*(handledCPCount+1);
+            n=m;
+
+            /* Encode a sequence of same code points n */
+            for(j=0; j<srcCPCount; ++j) {
+                q=cpBuffer[j]&0x7fffffff; /* remove case flag from the sign bit */
+                if(q<n) {
+                    ++delta;
+                } else if(q==n) {
+                    /* Represent delta as a generalized variable-length integer: */
+                    for(q=delta, k=BASE; /* no condition */; k+=BASE) {
+
+                        /** RAM: comment out the old code for conformance with draft-ietf-idn-punycode-03.txt
+
+                        t=k-bias;
+                        if(t<TMIN) {
+                            t=TMIN;
+                        } else if(t>TMAX) {
+                            t=TMAX;
+                        }
+                        */
+
+                        t=k-bias;
+                        if(t<TMIN) {
+                            t=TMIN;
+                        } else if(k>=(bias+TMAX)) {
+                            t=TMAX;
+                        }
+
+                        if(q<t) {
+                            break;
+                        }
+
+                        if(destLength<destCapacity) {
+                            dest[destLength++]=digitToBasic(t+(q-t)%(BASE-t), false);
+                        }
+                        q=(q-t)/(BASE-t);
+                    }
+
+                    if(destLength<destCapacity) {
+                        dest[destLength++]=digitToBasic(q, (cpBuffer[j]<0));
+                    }
+                    bias=adaptBias(delta, handledCPCount+1,(handledCPCount==basicLength));
+                    delta=0;
+                    ++handledCPCount;
+                }
+            }
+
+            ++delta;
+            ++n;
+        }
+
+        return result.append(dest, 0, destLength);
+    }
+
+    private static boolean isBasic(int ch){
+        return (ch < INITIAL_N);
+    }
+
+    private static boolean isBasicUpperCase(int ch){
+        return( CAPITAL_A <= ch && ch <= CAPITAL_Z);
+    }
+
+    private static boolean isSurrogate(int ch){
+        return (((ch)&0xfffff800)==0xd800);
+    }
+    /**
+     * Converts Punycode to Unicode.
+     * The Unicode string will be at most as long as the Punycode string.
+     *
+     * @param src
+     * @param caseFlags
+     * @return
+     * @throws ParseException
+     */
+    public static StringBuffer decode(StringBuffer src, boolean[] caseFlags)
+                               throws ParseException{
+        int srcLength = src.length();
+        StringBuffer result = new StringBuffer();
+        int n, destLength, i, bias, basicLength, j, in, oldi, w, k, digit, t,
+                destCPCount, firstSupplementaryIndex, cpLength;
+        char b;
+        int destCapacity = MAX_CP_COUNT;
+        char[] dest = new char[destCapacity];
+
+        /*
+         * Handle the basic code points:
+         * Let basicLength be the number of input code points
+         * before the last delimiter, or 0 if there is none,
+         * then copy the first basicLength code points to the output.
+         *
+         * The two following loops iterate backward.
+         */
+        for(j=srcLength; j>0;) {
+            if(src.charAt(--j)==DELIMITER) {
+                break;
+            }
+        }
+        destLength=basicLength=destCPCount=j;
+
+        while(j>0) {
+            b=src.charAt(--j);
+            if(!isBasic(b)) {
+                throw new ParseException("Illegal char found", -1);
+            }
+
+            if(j<destCapacity) {
+                dest[j]= b;
+
+                if(caseFlags!=null) {
+                    caseFlags[j]=isBasicUpperCase(b);
+                }
+            }
+        }
+
+        /* Initialize the state: */
+        n=INITIAL_N;
+        i=0;
+        bias=INITIAL_BIAS;
+        firstSupplementaryIndex=1000000000;
+
+        /*
+         * Main decoding loop:
+         * Start just after the last delimiter if any
+         * basic code points were copied; start at the beginning otherwise.
+         */
+        for(in=basicLength>0 ? basicLength+1 : 0; in<srcLength; /* no op */) {
+            /*
+             * in is the index of the next character to be consumed, and
+             * destCPCount is the number of code points in the output array.
+             *
+             * Decode a generalized variable-length integer into delta,
+             * which gets added to i.  The overflow checking is easier
+             * if we increase i as we go, then subtract off its starting
+             * value at the end to obtain delta.
+             */
+            for(oldi=i, w=1, k=BASE; /* no condition */; k+=BASE) {
+                if(in>=srcLength) {
+                    throw new ParseException("Illegal char found", -1);
+                }
+
+                digit=basicToDigit[(byte)src.charAt(in++)];
+                if(digit<0) {
+                    throw new ParseException("Invalid char found", -1);
+                }
+                if(digit>(0x7fffffff-i)/w) {
+                    /* integer overflow */
+                    throw new ParseException("Illegal char found", -1);
+                }
+
+                i+=digit*w;
+                t=k-bias;
+                if(t<TMIN) {
+                    t=TMIN;
+                } else if(k>=(bias+TMAX)) {
+                    t=TMAX;
+                }
+                if(digit<t) {
+                    break;
+                }
+
+                if(w>0x7fffffff/(BASE-t)) {
+                    /* integer overflow */
+                    throw new ParseException("Illegal char found", -1);
+                }
+                w*=BASE-t;
+            }
+
+            /*
+             * Modification from sample code:
+             * Increments destCPCount here,
+             * where needed instead of in for() loop tail.
+             */
+            ++destCPCount;
+            bias=adaptBias(i-oldi, destCPCount, (oldi==0));
+
+            /*
+             * i was supposed to wrap around from (incremented) destCPCount to 0,
+             * incrementing n each time, so we'll fix that now:
+             */
+            if(i/destCPCount>(0x7fffffff-n)) {
+                /* integer overflow */
+                throw new ParseException("Illegal char found", -1);
+            }
+
+            n+=i/destCPCount;
+            i%=destCPCount;
+            /* not needed for Punycode: */
+            /* if (decode_digit(n) <= BASE) return punycode_invalid_input; */
+
+            if(n>0x10ffff || isSurrogate(n)) {
+                /* Unicode code point overflow */
+                throw new ParseException("Illegal char found", -1);
+            }
+
+            /* Insert n at position i of the output: */
+            cpLength=UTF16.getCharCount(n);
+            if((destLength+cpLength)<destCapacity) {
+                int codeUnitIndex;
+
+                /*
+                 * Handle indexes when supplementary code points are present.
+                 *
+                 * In almost all cases, there will be only BMP code points before i
+                 * and even in the entire string.
+                 * This is handled with the same efficiency as with UTF-32.
+                 *
+                 * Only the rare cases with supplementary code points are handled
+                 * more slowly - but not too bad since this is an insertion anyway.
+                 */
+                if(i<=firstSupplementaryIndex) {
+                    codeUnitIndex=i;
+                    if(cpLength>1) {
+                        firstSupplementaryIndex=codeUnitIndex;
+                    } else {
+                        ++firstSupplementaryIndex;
+                    }
+                } else {
+                    codeUnitIndex=firstSupplementaryIndex;
+                    codeUnitIndex=UTF16.moveCodePointOffset(dest, 0, destLength, codeUnitIndex, i-codeUnitIndex);
+                }
+
+                /* use the UChar index codeUnitIndex instead of the code point index i */
+                if(codeUnitIndex<destLength) {
+                    System.arraycopy(dest, codeUnitIndex,
+                                     dest, codeUnitIndex+cpLength,
+                                    (destLength-codeUnitIndex));
+                    if(caseFlags!=null) {
+                        System.arraycopy(caseFlags, codeUnitIndex,
+                                         caseFlags, codeUnitIndex+cpLength,
+                                         destLength-codeUnitIndex);
+                    }
+                }
+                if(cpLength==1) {
+                    /* BMP, insert one code unit */
+                    dest[codeUnitIndex]=(char)n;
+                } else {
+                    /* supplementary character, insert two code units */
+                    dest[codeUnitIndex]=UTF16.getLeadSurrogate(n);
+                    dest[codeUnitIndex+1]=UTF16.getTrailSurrogate(n);
+                }
+                if(caseFlags!=null) {
+                    /* Case of last character determines uppercase flag: */
+                    caseFlags[codeUnitIndex]=isBasicUpperCase(src.charAt(in-1));
+                    if(cpLength==2) {
+                        caseFlags[codeUnitIndex+1]=false;
+                    }
+                }
+            }
+            destLength+=cpLength;
+            ++i;
+        }
+        result.append(dest, 0, destLength);
+        return result;
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/java.base/share/classes/jdk/internal/icu/impl/ReplaceableUCharacterIterator.java	Mon Jan 13 08:05:59 2020 -0800
@@ -0,0 +1,190 @@
+/*
+ * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ *******************************************************************************
+ * (C) Copyright IBM Corp. 1996-2005 - All Rights Reserved                     *
+ *                                                                             *
+ * The original version of this source code and documentation is copyrighted   *
+ * and owned by IBM, These materials are provided under terms of a License     *
+ * Agreement between IBM and Sun. This technology is protected by multiple     *
+ * US and International patents. This notice and attribution to IBM may not    *
+ * to removed.                                                                 *
+ *******************************************************************************
+ */
+
+package jdk.internal.icu.impl;
+
+import jdk.internal.icu.text.Replaceable;
+import jdk.internal.icu.text.ReplaceableString;
+import jdk.internal.icu.text.UCharacterIterator;
+
+/**
+ * DLF docs must define behavior when Replaceable is mutated underneath
+ * the iterator.
+ *
+ * This and ICUCharacterIterator share some code, maybe they should share
+ * an implementation, or the common state and implementation should be
+ * moved up into UCharacterIterator.
+ *
+ * What are first, last, and getBeginIndex doing here?!?!?!
+ */
+public class ReplaceableUCharacterIterator extends UCharacterIterator {
+
+    // public constructor ------------------------------------------------------
+
+    /**
+     * Public constructor
+     * @param str text which the iterator will be based on
+     */
+    public ReplaceableUCharacterIterator(String str){
+        if(str==null){
+            throw new IllegalArgumentException();
+        }
+        this.replaceable  = new ReplaceableString(str);
+        this.currentIndex = 0;
+    }
+
+    /**
+     * Public constructor
+     * @param buf buffer of text on which the iterator will be based
+     */
+    public ReplaceableUCharacterIterator(StringBuffer buf){
+        if(buf==null){
+            throw new IllegalArgumentException();
+        }
+        this.replaceable  = new ReplaceableString(buf);
+        this.currentIndex = 0;
+    }
+
+    // public methods ----------------------------------------------------------
+
+    /**
+     * Creates a copy of this iterator, does not clone the underlying
+     * <code>Replaceable</code>object
+     * @return copy of this iterator
+     */
+    public Object clone(){
+        try {
+          return super.clone();
+        } catch (CloneNotSupportedException e) {
+            return null; // never invoked
+        }
+    }
+
+    /**
+     * Returns the current UTF16 character.
+     * @return current UTF16 character
+     */
+    public int current(){
+        if (currentIndex < replaceable.length()) {
+            return replaceable.charAt(currentIndex);
+        }
+        return DONE;
+    }
+
+    /**
+     * Returns the length of the text
+     * @return length of the text
+     */
+    public int getLength(){
+        return replaceable.length();
+    }
+
+    /**
+     * Gets the current currentIndex in text.
+     * @return current currentIndex in text.
+     */
+    public int getIndex(){
+        return currentIndex;
+    }
+
+    /**
+     * Returns next UTF16 character and increments the iterator's currentIndex by 1.
+     * If the resulting currentIndex is greater or equal to the text length, the
+     * currentIndex is reset to the text length and a value of DONECODEPOINT is
+     * returned.
+     * @return next UTF16 character in text or DONE if the new currentIndex is off the
+     *         end of the text range.
+     */
+    public int next(){
+        if (currentIndex < replaceable.length()) {
+            return replaceable.charAt(currentIndex++);
+        }
+        return DONE;
+    }
+
+
+    /**
+     * Returns previous UTF16 character and decrements the iterator's currentIndex by
+     * 1.
+     * If the resulting currentIndex is less than 0, the currentIndex is reset to 0 and a
+     * value of DONECODEPOINT is returned.
+     * @return next UTF16 character in text or DONE if the new currentIndex is off the
+     *         start of the text range.
+     */
+    public int previous(){
+        if (currentIndex > 0) {
+            return replaceable.charAt(--currentIndex);
+        }
+        return DONE;
+    }
+
+    /**
+     * Sets the currentIndex to the specified currentIndex in the text and returns that
+     * single UTF16 character at currentIndex.
+     * This assumes the text is stored as 16-bit code units.
+     * @param currentIndex the currentIndex within the text.
+     * @exception IllegalArgumentException is thrown if an invalid currentIndex is
+     *            supplied. i.e. currentIndex is out of bounds.
+     */
+    public void setIndex(int currentIndex) {
+        if (currentIndex < 0 || currentIndex > replaceable.length()) {
+            throw new IllegalArgumentException();
+        }
+        this.currentIndex = currentIndex;
+    }
+
+    public int getText(char[] fillIn, int offset){
+        int length = replaceable.length();
+        if(offset < 0 || offset + length > fillIn.length){
+            throw new IndexOutOfBoundsException(Integer.toString(length));
+        }
+        replaceable.getChars(0,length,fillIn,offset);
+        return length;
+    }
+
+    // private data members ----------------------------------------------------
+
+    /**
+     * Replaceable object
+     */
+    private Replaceable replaceable;
+    /**
+     * Current currentIndex
+     */
+    private int currentIndex;
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/java.base/share/classes/jdk/internal/icu/impl/StringPrepDataReader.java	Mon Jan 13 08:05:59 2020 -0800
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+/*
+/*
+ ******************************************************************************
+ * Copyright (C) 2003, International Business Machines Corporation and   *
+ * others. All Rights Reserved.                                               *
+ ******************************************************************************
+ *
+ * Created on May 2, 2003
+ *
+ * To change the template for this generated file go to
+ * Window>Preferences>Java>Code Generation>Code and Comments
+ */
+// CHANGELOG
+//      2005-05-19 Edward Wang
+//          - copy this file from icu4jsrc_3_2/src/com/ibm/icu/impl/StringPrepDataReader.java
+//          - move from package com.ibm.icu.impl to package sun.net.idn
+//
+package jdk.internal.icu.impl;
+
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+import jdk.internal.icu.impl.ICUBinary;
+
+
+/**
+ * @author ram
+ *
+ * To change the template for this generated type comment go to
+ * Window>Preferences>Java>Code Generation>Code and Comments
+ */
+public final class StringPrepDataReader implements ICUBinary.Authenticate {
+
+   /**
+    * <p>private constructor.</p>
+    * @param inputStream ICU uprop.dat file input stream
+    * @exception IOException throw if data file fails authentication
+    * @draft 2.1
+    */
+    public StringPrepDataReader(InputStream inputStream)
+                                        throws IOException{
+
+        unicodeVersion = ICUBinary.readHeader(inputStream, DATA_FORMAT_ID, this);
+
+
+        dataInputStream = new DataInputStream(inputStream);
+
+    }
+
+    public void read(byte[] idnaBytes,
+                        char[] mappingTable)
+                        throws IOException{
+
+        //Read the bytes that make up the idnaTrie
+        dataInputStream.read(idnaBytes);
+
+        //Read the extra data
+        for(int i=0;i<mappingTable.length;i++){
+            mappingTable[i]=dataInputStream.readChar();
+        }
+    }
+
+    public byte[] getDataFormatVersion(){
+        return DATA_FORMAT_VERSION;
+    }
+
+    public boolean isDataVersionAcceptable(byte version[]){
+        return version[0] == DATA_FORMAT_VERSION[0]
+               && version[2] == DATA_FORMAT_VERSION[2]
+               && version[3] == DATA_FORMAT_VERSION[3];
+    }
+    public int[] readIndexes(int length)throws IOException{
+        int[] indexes = new int[length];
+        //Read the indexes
+        for (int i = 0; i <length ; i++) {
+             indexes[i] = dataInputStream.readInt();
+        }
+        return indexes;
+    }
+
+    public byte[] getUnicodeVersion(){
+        return unicodeVersion;
+    }
+    // private data members -------------------------------------------------
+
+
+    /**
+    * ICU data file input stream
+    */
+    private DataInputStream dataInputStream;
+    private byte[] unicodeVersion;
+    /**
+    * File format version that this class understands.
+    * No guarantees are made if a older version is used
+    * see store.c of gennorm for more information and values
+    */
+    ///* dataFormat="SPRP" 0x53, 0x50, 0x52, 0x50  */
+    private static final byte DATA_FORMAT_ID[] = {(byte)0x53, (byte)0x50,
+                                                    (byte)0x52, (byte)0x50};
+    private static final byte DATA_FORMAT_VERSION[] = {(byte)0x3, (byte)0x2,
+                                                        (byte)0x5, (byte)0x2};
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/java.base/share/classes/jdk/internal/icu/impl/Trie.java	Mon Jan 13 08:05:59 2020 -0800
@@ -0,0 +1,367 @@
+/*
+ * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ ******************************************************************************
+ * Copyright (C) 1996-2014, International Business Machines Corporation and
+ * others. All Rights Reserved.
+ ******************************************************************************
+ */
+
+package jdk.internal.icu.impl;
+
+import jdk.internal.icu.lang.UCharacter;
+import jdk.internal.icu.text.UTF16;
+
+import java.io.DataInputStream;
+import java.io.InputStream;
+import java.io.IOException;
+
+/**
+ * <p>A trie is a kind of compressed, serializable table of values
+ * associated with Unicode code points (0..0x10ffff).</p>
+ * <p>This class defines the basic structure of a trie and provides methods
+ * to <b>retrieve the offsets to the actual data</b>.</p>
+ * <p>Data will be the form of an array of basic types, char or int.</p>
+ * <p>The actual data format will have to be specified by the user in the
+ * inner static interface com.ibm.icu.impl.Trie.DataManipulate.</p>
+ * <p>This trie implementation is optimized for getting offset while walking
+ * forward through a UTF-16 string.
+ * Therefore, the simplest and fastest access macros are the
+ * fromLead() and fromOffsetTrail() methods.
+ * The fromBMP() method are a little more complicated; they get offsets even
+ * for lead surrogate codepoints, while the fromLead() method get special
+ * "folded" offsets for lead surrogate code units if there is relevant data
+ * associated with them.
+ * From such a folded offsets, an offset needs to be extracted to supply
+ * to the fromOffsetTrail() methods.
+ * To handle such supplementary codepoints, some offset information are kept
+ * in the data.</p>
+ * <p>Methods in com.ibm.icu.impl.Trie.DataManipulate are called to retrieve
+ * that offset from the folded value for the lead surrogate unit.</p>
+ * <p>For examples of use, see com.ibm.icu.impl.CharTrie or
+ * com.ibm.icu.impl.IntTrie.</p>
+ * @author synwee
+ * @see com.ibm.icu.impl.CharTrie
+ * @see com.ibm.icu.impl.IntTrie
+ * @since release 2.1, Jan 01 2002
+ */
+public abstract class Trie
+{
+    // public class declaration ----------------------------------------
+
+    /**
+    * Character data in com.ibm.impl.Trie have different user-specified format
+    * for different purposes.
+    * This interface specifies methods to be implemented in order for
+    * com.ibm.impl.Trie, to surrogate offset information encapsulated within
+    * the data.
+    */
+    public static interface DataManipulate
+    {
+        /**
+        * Called by com.ibm.icu.impl.Trie to extract from a lead surrogate's
+        * data
+        * the index array offset of the indexes for that lead surrogate.
+        * @param value data value for a surrogate from the trie, including the
+        *        folding offset
+        * @return data offset or 0 if there is no data for the lead surrogate
+        */
+        public int getFoldingOffset(int value);
+    }
+
+    // default implementation
+    private static class DefaultGetFoldingOffset implements DataManipulate {
+        public int getFoldingOffset(int value) {
+            return value;
+        }
+    }
+
+    // protected constructor -------------------------------------------
+
+    /**
+    * Trie constructor for CharTrie use.
+    * @param inputStream ICU data file input stream which contains the
+    *                        trie
+    * @param dataManipulate object containing the information to parse the
+    *                       trie data
+    * @throws IOException thrown when input stream does not have the
+    *                        right header.
+    */
+    protected Trie(InputStream inputStream,
+                   DataManipulate  dataManipulate) throws IOException
+    {
+        DataInputStream input = new DataInputStream(inputStream);
+        // Magic number to authenticate the data.
+        int signature = input.readInt();
+        m_options_    = input.readInt();
+
+        if (!checkHeader(signature)) {
+            throw new IllegalArgumentException("ICU data file error: Trie header authentication failed, please check if you have the most updated ICU data file");
+        }
+
+        if(dataManipulate != null) {
+            m_dataManipulate_ = dataManipulate;
+        } else {
+            m_dataManipulate_ = new DefaultGetFoldingOffset();
+        }
+        m_isLatin1Linear_ = (m_options_ &
+                             HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_) != 0;
+        m_dataOffset_     = input.readInt();
+        m_dataLength_     = input.readInt();
+        unserialize(inputStream);
+    }
+
+    // protected data members ------------------------------------------
+
+    /**
+     * Lead surrogate code points' index displacement in the index array.
+     * <pre>{@code
+     * 0x10000-0xd800=0x2800
+     * 0x2800 >> INDEX_STAGE_1_SHIFT_
+     * }</pre>
+     */
+    protected static final int LEAD_INDEX_OFFSET_ = 0x2800 >> 5;
+    /**
+     * Shift size for shifting right the input index. 1..9
+     */
+    protected static final int INDEX_STAGE_1_SHIFT_ = 5;
+    /**
+     * Shift size for shifting left the index array values.
+     * Increases possible data size with 16-bit index values at the cost
+     * of compactability.
+     * This requires blocks of stage 2 data to be aligned by
+     * DATA_GRANULARITY.
+     * 0..INDEX_STAGE_1_SHIFT
+     */
+    protected static final int INDEX_STAGE_2_SHIFT_ = 2;
+    /**
+     * Number of data values in a stage 2 (data array) block.
+     */
+    protected static final int DATA_BLOCK_LENGTH=1<<INDEX_STAGE_1_SHIFT_;
+    /**
+     * Mask for getting the lower bits from the input index.
+     * DATA_BLOCK_LENGTH - 1.
+     */
+    protected static final int INDEX_STAGE_3_MASK_ = DATA_BLOCK_LENGTH - 1;
+    /**
+     * Surrogate mask to use when shifting offset to retrieve supplementary
+     * values
+     */
+    protected static final int SURROGATE_MASK_ = 0x3FF;
+    /**
+     * Index or UTF16 characters
+     */
+    protected char m_index_[];
+    /**
+     * Internal TrieValue which handles the parsing of the data value.
+     * This class is to be implemented by the user
+     */
+    protected DataManipulate m_dataManipulate_;
+    /**
+     * Start index of the data portion of the trie. CharTrie combines
+     * index and data into a char array, so this is used to indicate the
+     * initial offset to the data portion.
+     * Note this index always points to the initial value.
+     */
+    protected int m_dataOffset_;
+    /**
+     * Length of the data array
+     */
+    protected int m_dataLength_;
+
+    // protected methods -----------------------------------------------
+
+    /**
+    * Gets the offset to the data which the surrogate pair points to.
+    * @param lead lead surrogate
+    * @param trail trailing surrogate
+    * @return offset to data
+    */
+    protected abstract int getSurrogateOffset(char lead, char trail);
+
+    /**
+    * Gets the offset to the data which the index ch after variable offset
+    * points to.
+    * Note for locating a non-supplementary character data offset, calling
+    * <p>
+    * getRawOffset(0, ch);
+    * </p>
+    * will do. Otherwise if it is a supplementary character formed by
+    * surrogates lead and trail. Then we would have to call getRawOffset()
+    * with getFoldingIndexOffset(). See getSurrogateOffset().
+    * @param offset index offset which ch is to start from
+    * @param ch index to be used after offset
+    * @return offset to the data
+    */
+    protected final int getRawOffset(int offset, char ch)
+    {
+        return (m_index_[offset + (ch >> INDEX_STAGE_1_SHIFT_)]
+                << INDEX_STAGE_2_SHIFT_)
+                + (ch & INDEX_STAGE_3_MASK_);
+    }
+
+    /**
+    * Gets the offset to data which the BMP character points to
+    * Treats a lead surrogate as a normal code point.
+    * @param ch BMP character
+    * @return offset to data
+    */
+    protected final int getBMPOffset(char ch)
+    {
+        return (ch >= UTF16.LEAD_SURROGATE_MIN_VALUE
+                && ch <= UTF16.LEAD_SURROGATE_MAX_VALUE)
+                ? getRawOffset(LEAD_INDEX_OFFSET_, ch)
+                : getRawOffset(0, ch);
+                // using a getRawOffset(ch) makes no diff
+    }
+
+    /**
+    * Gets the offset to the data which this lead surrogate character points
+    * to.
+    * Data at the returned offset may contain folding offset information for
+    * the next trailing surrogate character.
+    * @param ch lead surrogate character
+    * @return offset to data
+    */
+    protected final int getLeadOffset(char ch)
+    {
+       return getRawOffset(0, ch);
+    }
+
+    /**
+     * Internal trie getter from a code point.
+     * Could be faster(?) but longer with
+     * {@code if((c32)<=0xd7ff) { (result)=_TRIE_GET_RAW(trie, data, 0, c32); }}
+     * Gets the offset to data which the codepoint points to
+     * @param ch codepoint
+     * @return offset to data
+     */
+    protected final int getCodePointOffset(int ch)
+    {
+        // if ((ch >> 16) == 0) slower
+        if (ch < 0) {
+            return -1;
+        } else if (ch < UTF16.LEAD_SURROGATE_MIN_VALUE) {
+            // fastpath for the part of the BMP below surrogates (D800) where getRawOffset() works
+            return getRawOffset(0, (char)ch);
+        } else if (ch < UTF16.SUPPLEMENTARY_MIN_VALUE) {
+            // BMP codepoint
+            return getBMPOffset((char)ch);
+        } else if (ch <= UCharacter.MAX_VALUE) {
+            // look at the construction of supplementary characters
+            // trail forms the ends of it.
+            return getSurrogateOffset(UTF16.getLeadSurrogate(ch),
+                                      (char)(ch & SURROGATE_MASK_));
+        } else {
+            // return -1 if there is an error, in this case we return
+            return -1;
+        }
+    }
+
+    /**
+    * <p>Parses the inputstream and creates the trie index with it.</p>
+    * <p>This is overwritten by the child classes.
+    * @param inputStream input stream containing the trie information
+    * @exception IOException thrown when data reading fails.
+    */
+    protected void unserialize(InputStream inputStream) throws IOException
+    {
+        //indexLength is a multiple of 1024 >> INDEX_STAGE_2_SHIFT_
+        m_index_              = new char[m_dataOffset_];
+        DataInputStream input = new DataInputStream(inputStream);
+        for (int i = 0; i < m_dataOffset_; i ++) {
+             m_index_[i] = input.readChar();
+        }
+    }
+
+    /**
+    * Determines if this is a 16 bit trie
+    * @return true if this is a 16 bit trie
+    */
+    protected final boolean isCharTrie()
+    {
+        return (m_options_ & HEADER_OPTIONS_DATA_IS_32_BIT_) == 0;
+    }
+
+    // private data members --------------------------------------------
+
+    /**
+     * Latin 1 option mask
+     */
+    protected static final int HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_ = 0x200;
+    /**
+    * Constant number to authenticate the byte block
+    */
+    protected static final int HEADER_SIGNATURE_ = 0x54726965;
+    /**
+    * Header option formatting
+    */
+    private static final int HEADER_OPTIONS_SHIFT_MASK_ = 0xF;
+    protected static final int HEADER_OPTIONS_INDEX_SHIFT_ = 4;
+    protected static final int HEADER_OPTIONS_DATA_IS_32_BIT_ = 0x100;
+
+    /**
+     * Flag indicator for Latin quick access data block
+     */
+    private boolean m_isLatin1Linear_;
+
+    /**
+     * <p>Trie options field.</p>
+     * <p>options bit field:<br>
+     * 9  1 = Latin-1 data is stored linearly at data + DATA_BLOCK_LENGTH<br>
+     * 8  0 = 16-bit data, 1=32-bit data<br>
+     * 7..4  INDEX_STAGE_1_SHIFT   // 0..INDEX_STAGE_2_SHIFT<br>
+     * 3..0  INDEX_STAGE_2_SHIFT   // 1..9<br>
+     */
+    private int m_options_;
+
+    // private methods ---------------------------------------------------
+
+    /**
+     * Authenticates raw data header.
+     * Checking the header information, signature and options.
+     * @param signature This contains the options and type of a Trie
+     * @return true if the header is authenticated valid
+     */
+    private final boolean checkHeader(int signature)
+    {
+        // check the signature
+        // Trie in big-endian US-ASCII (0x54726965).
+        // Magic number to authenticate the data.
+        if (signature != HEADER_SIGNATURE_) {
+            return false;
+        }
+
+        if ((m_options_ & HEADER_OPTIONS_SHIFT_MASK_) !=
+                                                    INDEX_STAGE_1_SHIFT_ ||
+            ((m_options_ >> HEADER_OPTIONS_INDEX_SHIFT_) &
+                                                HEADER_OPTIONS_SHIFT_MASK_)
+                                                 != INDEX_STAGE_2_SHIFT_) {
+            return false;
+        }
+        return true;
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/java.base/share/classes/jdk/internal/icu/impl/Trie2.java	Mon Jan 13 08:05:59 2020 -0800
@@ -0,0 +1,655 @@
+/*
+ * Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ *******************************************************************************
+ * Copyright (C) 2009-2014, International Business Machines Corporation and
+ * others. All Rights Reserved.
+ *******************************************************************************
+ */
+
+package jdk.internal.icu.impl;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+
+
+/**
+ * This is the interface and common implementation of a Unicode Trie2.
+ * It is a kind of compressed table that maps from Unicode code points (0..0x10ffff)
+ * to 16- or 32-bit integer values.  It works best when there are ranges of
+ * characters with the same value, which is generally the case with Unicode
+ * character properties.
+ *
+ * This is the second common version of a Unicode trie (hence the name Trie2).
+ *
+ */
+abstract class Trie2 implements Iterable<Trie2.Range> {
+
+    /**
+     * Create a Trie2 from its serialized form.  Inverse of utrie2_serialize().
+     *
+     * Reads from the current position and leaves the buffer after the end of the trie.
+     *
+     * The serialized format is identical between ICU4C and ICU4J, so this function
+     * will work with serialized Trie2s from either.
+     *
+     * The actual type of the returned Trie2 will be either Trie2_16 or Trie2_32, depending
+     * on the width of the data.
+     *
+     * To obtain the width of the Trie2, check the actual class type of the returned Trie2.
+     * Or use the createFromSerialized() function of Trie2_16 or Trie2_32, which will
+     * return only Tries of their specific type/size.
+     *
+     * The serialized Trie2 on the stream may be in either little or big endian byte order.
+     * This allows using serialized Tries from ICU4C without needing to consider the
+     * byte order of the system that created them.
+     *
+     * @param bytes a byte buffer to the serialized form of a UTrie2.
+     * @return An unserialized Trie2, ready for use.
+     * @throws IllegalArgumentException if the stream does not contain a serialized Trie2.
+     * @throws IOException if a read error occurs in the buffer.
+     *
+     */
+    public static Trie2 createFromSerialized(ByteBuffer bytes) throws IOException {
+         //    From ICU4C utrie2_impl.h
+         //    * Trie2 data structure in serialized form:
+         //     *
+         //     * UTrie2Header header;
+         //     * uint16_t index[header.index2Length];
+         //     * uint16_t data[header.shiftedDataLength<<2];  -- or uint32_t data[...]
+         //     * @internal
+         //     */
+         //    typedef struct UTrie2Header {
+         //        /** "Tri2" in big-endian US-ASCII (0x54726932) */
+         //        uint32_t signature;
+
+         //       /**
+         //         * options bit field:
+         //         * 15.. 4   reserved (0)
+         //         *  3.. 0   UTrie2ValueBits valueBits
+         //         */
+         //        uint16_t options;
+         //
+         //        /** UTRIE2_INDEX_1_OFFSET..UTRIE2_MAX_INDEX_LENGTH */
+         //        uint16_t indexLength;
+         //
+         //        /** (UTRIE2_DATA_START_OFFSET..UTRIE2_MAX_DATA_LENGTH)>>UTRIE2_INDEX_SHIFT */
+         //        uint16_t shiftedDataLength;
+         //
+         //        /** Null index and data blocks, not shifted. */
+         //        uint16_t index2NullOffset, dataNullOffset;
+         //
+         //        /**
+         //         * First code point of the single-value range ending with U+10ffff,
+         //         * rounded up and then shifted right by UTRIE2_SHIFT_1.
+         //         */
+         //        uint16_t shiftedHighStart;
+         //    } UTrie2Header;
+
+        ByteOrder outerByteOrder = bytes.order();
+        try {
+            UTrie2Header header = new UTrie2Header();
+
+            /* check the signature */
+            header.signature = bytes.getInt();
+            switch (header.signature) {
+            case 0x54726932:
+                // The buffer is already set to the trie data byte order.
+                break;
+            case 0x32697254:
+                // Temporarily reverse the byte order.
+                boolean isBigEndian = outerByteOrder == ByteOrder.BIG_ENDIAN;
+                bytes.order(isBigEndian ? ByteOrder.LITTLE_ENDIAN : ByteOrder.BIG_ENDIAN);
+                header.signature = 0x54726932;
+                break;
+            default:
+                throw new IllegalArgumentException("Buffer does not contain a serialized UTrie2");
+            }
+
+            header.options = bytes.getChar();
+            header.indexLength = bytes.getChar();
+            header.shiftedDataLength = bytes.getChar();
+            header.index2NullOffset = bytes.getChar();
+            header.dataNullOffset   = bytes.getChar();
+            header.shiftedHighStart = bytes.getChar();
+
+            if ((header.options & UTRIE2_OPTIONS_VALUE_BITS_MASK) != 0) {
+                throw new IllegalArgumentException("UTrie2 serialized format error.");
+            }
+
+            Trie2 This;
+            This = new Trie2_16();
+            This.header = header;
+
+            /* get the length values and offsets */
+            This.indexLength      = header.indexLength;
+            This.dataLength       = header.shiftedDataLength << UTRIE2_INDEX_SHIFT;
+            This.index2NullOffset = header.index2NullOffset;
+            This.dataNullOffset   = header.dataNullOffset;
+            This.highStart        = header.shiftedHighStart << UTRIE2_SHIFT_1;
+            This.highValueIndex   = This.dataLength - UTRIE2_DATA_GRANULARITY;
+            This.highValueIndex += This.indexLength;
+
+            // Allocate the Trie2 index array. If the data width is 16 bits, the array also
+            // includes the space for the data.
+
+            int indexArraySize = This.indexLength;
+            indexArraySize += This.dataLength;
+            This.index = new char[indexArraySize];
+
+            /* Read in the index */
+            int i;
+            for (i=0; i<This.indexLength; i++) {
+                This.index[i] = bytes.getChar();
+            }
+
+            /* Read in the data. 16 bit data goes in the same array as the index.
+             * 32 bit data goes in its own separate data array.
+             */
+            This.data16 = This.indexLength;
+            for (i=0; i<This.dataLength; i++) {
+                This.index[This.data16 + i] = bytes.getChar();
+            }
+
+            This.data32 = null;
+            This.initialValue = This.index[This.dataNullOffset];
+            This.errorValue   = This.index[This.data16+UTRIE2_BAD_UTF8_DATA_OFFSET];
+
+            return This;
+        } finally {
+            bytes.order(outerByteOrder);
+        }
+    }
+
+    /**
+     * Get the value for a code point as stored in the Trie2.
+     *
+     * @param codePoint the code point
+     * @return the value
+     */
+    public abstract int get(int codePoint);
+
+    /**
+     * Get the trie value for a UTF-16 code unit.
+     *
+     * A Trie2 stores two distinct values for input in the lead surrogate
+     * range, one for lead surrogates, which is the value that will be
+     * returned by this function, and a second value that is returned
+     * by Trie2.get().
+     *
+     * For code units outside of the lead surrogate range, this function
+     * returns the same result as Trie2.get().
+     *
+     * This function, together with the alternate value for lead surrogates,
+     * makes possible very efficient processing of UTF-16 strings without
+     * first converting surrogate pairs to their corresponding 32 bit code point
+     * values.
+     *
+     * At build-time, enumerate the contents of the Trie2 to see if there
+     * is non-trivial (non-initialValue) data for any of the supplementary
+     * code points associated with a lead surrogate.
+     * If so, then set a special (application-specific) value for the
+     * lead surrogate code _unit_, with Trie2Writable.setForLeadSurrogateCodeUnit().
+     *
+     * At runtime, use Trie2.getFromU16SingleLead(). If there is non-trivial
+     * data and the code unit is a lead surrogate, then check if a trail surrogate
+     * follows. If so, assemble the supplementary code point and look up its value
+     * with Trie2.get(); otherwise reset the lead
+     * surrogate's value or do a code point lookup for it.
+     *
+     * If there is only trivial data for lead and trail surrogates, then processing
+     * can often skip them. For example, in normalization or case mapping
+     * all characters that do not have any mappings are simply copied as is.
+     *
+     * @param c the code point or lead surrogate value.
+     * @return the value
+     */
+    public abstract int getFromU16SingleLead(char c);
+
+    /**
+     * When iterating over the contents of a Trie2, Elements of this type are produced.
+     * The iterator will return one item for each contiguous range of codepoints  having the same value.
+     *
+     * When iterating, the same Trie2EnumRange object will be reused and returned for each range.
+     * If you need to retain complete iteration results, clone each returned Trie2EnumRange,
+     * or save the range in some other way, before advancing to the next iteration step.
+     */
+    public static class Range {
+        public int     startCodePoint;
+        public int     endCodePoint;     // Inclusive.
+        public int     value;
+        public boolean leadSurrogate;
+
+        public boolean equals(Object other) {
+            if (other == null || !(other.getClass().equals(getClass()))) {
+                return false;
+            }
+            Range tother = (Range)other;
+            return this.startCodePoint == tother.startCodePoint &&
+                   this.endCodePoint   == tother.endCodePoint   &&
+                   this.value          == tother.value          &&
+                   this.leadSurrogate  == tother.leadSurrogate;
+        }
+
+        public int hashCode() {
+            int h = initHash();
+            h = hashUChar32(h, startCodePoint);
+            h = hashUChar32(h, endCodePoint);
+            h = hashInt(h, value);
+            h = hashByte(h, leadSurrogate? 1: 0);
+            return h;
+        }
+    }
+
+    /**
+     *  Create an iterator over the value ranges in this Trie2.
+     *  Values from the Trie2 are not remapped or filtered, but are returned as they
+     *  are stored in the Trie2.
+     *
+     * @return an Iterator
+     */
+    public Iterator<Range> iterator() {
+        return iterator(defaultValueMapper);
+    }
+
+    private static ValueMapper defaultValueMapper = new ValueMapper() {
+        public int map(int in) {
+            return in;
+        }
+    };
+
+    /**
+     * Create an iterator over the value ranges from this Trie2.
+     * Values from the Trie2 are passed through a caller-supplied remapping function,
+     * and it is the remapped values that determine the ranges that
+     * will be produced by the iterator.
+     *
+     *
+     * @param mapper provides a function to remap values obtained from the Trie2.
+     * @return an Iterator
+     */
+    public Iterator<Range> iterator(ValueMapper mapper) {
+        return new Trie2Iterator(mapper);
+    }
+
+    /**
+     * When iterating over the contents of a Trie2, an instance of TrieValueMapper may
+     * be used to remap the values from the Trie2.  The remapped values will be used
+     * both in determining the ranges of codepoints and as the value to be returned
+     * for each range.
+     *
+     * Example of use, with an anonymous subclass of TrieValueMapper:
+     *
+     *
+     * ValueMapper m = new ValueMapper() {
+     *    int map(int in) {return in & 0x1f;};
+     * }
+     * for (Iterator<Trie2EnumRange> iter = trie.iterator(m); i.hasNext(); ) {
+     *     Trie2EnumRange r = i.next();
+     *     ...  // Do something with the range r.
+     * }
+     *
+     */
+    public interface ValueMapper {
+        public int  map(int originalVal);
+    }
+
+    //--------------------------------------------------------------------------------
+    //
+    // Below this point are internal implementation items.  No further public API.
+    //
+    //--------------------------------------------------------------------------------
+
+     /**
+     * Trie2 data structure in serialized form:
+     *
+     * UTrie2Header header;
+     * uint16_t index[header.index2Length];
+     * uint16_t data[header.shiftedDataLength<<2];  -- or uint32_t data[...]
+     *
+     * For Java, this is read from the stream into an instance of UTrie2Header.
+     * (The C version just places a struct over the raw serialized data.)
+     *
+     * @internal
+     */
+    static class UTrie2Header {
+        /** "Tri2" in big-endian US-ASCII (0x54726932) */
+        int signature;
+
+        /**
+         * options bit field (uint16_t):
+         * 15.. 4   reserved (0)
+         *  3.. 0   UTrie2ValueBits valueBits
+         */
+        int  options;
+
+        /** UTRIE2_INDEX_1_OFFSET..UTRIE2_MAX_INDEX_LENGTH  (uint16_t) */
+        int  indexLength;
+
+        /** (UTRIE2_DATA_START_OFFSET..UTRIE2_MAX_DATA_LENGTH)>>UTRIE2_INDEX_SHIFT  (uint16_t) */
+        int  shiftedDataLength;
+
+        /** Null index and data blocks, not shifted.  (uint16_t) */
+        int  index2NullOffset, dataNullOffset;
+
+        /**
+         * First code point of the single-value range ending with U+10ffff,
+         * rounded up and then shifted right by UTRIE2_SHIFT_1.  (uint16_t)
+         */
+        int shiftedHighStart;
+    }
+
+    //
+    //  Data members of UTrie2.
+    //
+    UTrie2Header  header;
+    char          index[];           // Index array.  Includes data for 16 bit Tries.
+    int           data16;            // Offset to data portion of the index array, if 16 bit data.
+                                     //    zero if 32 bit data.
+    int           data32[];          // NULL if 16b data is used via index
+
+    int           indexLength;
+    int           dataLength;
+    int           index2NullOffset;  // 0xffff if there is no dedicated index-2 null block
+    int           initialValue;
+
+    /** Value returned for out-of-range code points and illegal UTF-8. */
+    int           errorValue;
+
+    /* Start of the last range which ends at U+10ffff, and its value. */
+    int           highStart;
+    int           highValueIndex;
+
+    int           dataNullOffset;
+
+    /**
+     * Trie2 constants, defining shift widths, index array lengths, etc.
+     *
+     * These are needed for the runtime macros but users can treat these as
+     * implementation details and skip to the actual public API further below.
+     */
+
+    static final int UTRIE2_OPTIONS_VALUE_BITS_MASK=0x000f;
+
+
+    /** Shift size for getting the index-1 table offset. */
+    static final int UTRIE2_SHIFT_1=6+5;
+
+    /** Shift size for getting the index-2 table offset. */
+    static final int UTRIE2_SHIFT_2=5;
+
+    /**
+     * Difference between the two shift sizes,
+     * for getting an index-1 offset from an index-2 offset. 6=11-5
+     */
+    static final int UTRIE2_SHIFT_1_2=UTRIE2_SHIFT_1-UTRIE2_SHIFT_2;
+
+    /**
+     * Number of index-1 entries for the BMP. 32=0x20
+     * This part of the index-1 table is omitted from the serialized form.
+     */
+    static final int UTRIE2_OMITTED_BMP_INDEX_1_LENGTH=0x10000>>UTRIE2_SHIFT_1;
+
+    /** Number of entries in an index-2 block. 64=0x40 */
+    static final int UTRIE2_INDEX_2_BLOCK_LENGTH=1<<UTRIE2_SHIFT_1_2;
+
+    /** Mask for getting the lower bits for the in-index-2-block offset. */
+    static final int UTRIE2_INDEX_2_MASK=UTRIE2_INDEX_2_BLOCK_LENGTH-1;
+
+    /** Number of entries in a data block. 32=0x20 */
+    static final int UTRIE2_DATA_BLOCK_LENGTH=1<<UTRIE2_SHIFT_2;
+
+    /** Mask for getting the lower bits for the in-data-block offset. */
+    static final int UTRIE2_DATA_MASK=UTRIE2_DATA_BLOCK_LENGTH-1;
+
+    /**
+     * Shift size for shifting left the index array values.
+     * Increases possible data size with 16-bit index values at the cost
+     * of compactability.
+     * This requires data blocks to be aligned by UTRIE2_DATA_GRANULARITY.
+     */
+    static final int UTRIE2_INDEX_SHIFT=2;
+
+    /** The alignment size of a data block. Also the granularity for compaction. */
+    static final int UTRIE2_DATA_GRANULARITY=1<<UTRIE2_INDEX_SHIFT;
+
+    /**
+     * The part of the index-2 table for U+D800..U+DBFF stores values for
+     * lead surrogate code _units_ not code _points_.
+     * Values for lead surrogate code _points_ are indexed with this portion of the table.
+     * Length=32=0x20=0x400>>UTRIE2_SHIFT_2. (There are 1024=0x400 lead surrogates.)
+     */
+    static final int UTRIE2_LSCP_INDEX_2_OFFSET=0x10000>>UTRIE2_SHIFT_2;
+    static final int UTRIE2_LSCP_INDEX_2_LENGTH=0x400>>UTRIE2_SHIFT_2;
+
+    /** Count the lengths of both BMP pieces. 2080=0x820 */
+    static final int UTRIE2_INDEX_2_BMP_LENGTH=UTRIE2_LSCP_INDEX_2_OFFSET+UTRIE2_LSCP_INDEX_2_LENGTH;
+
+    /**
+     * The 2-byte UTF-8 version of the index-2 table follows at offset 2080=0x820.
+     * Length 32=0x20 for lead bytes C0..DF, regardless of UTRIE2_SHIFT_2.
+     */
+    static final int UTRIE2_UTF8_2B_INDEX_2_OFFSET=UTRIE2_INDEX_2_BMP_LENGTH;
+    static final int UTRIE2_UTF8_2B_INDEX_2_LENGTH=0x800>>6;  /* U+0800 is the first code point after 2-byte UTF-8 */
+
+    /**
+     * The index-1 table, only used for supplementary code points, at offset 2112=0x840.
+     * Variable length, for code points up to highStart, where the last single-value range starts.
+     * Maximum length 512=0x200=0x100000>>UTRIE2_SHIFT_1.
+     * (For 0x100000 supplementary code points U+10000..U+10ffff.)
+     *
+     * The part of the index-2 table for supplementary code points starts
+     * after this index-1 table.
+     *
+     * Both the index-1 table and the following part of the index-2 table
+     * are omitted completely if there is only BMP data.
+     */
+    static final int UTRIE2_INDEX_1_OFFSET=UTRIE2_UTF8_2B_INDEX_2_OFFSET+UTRIE2_UTF8_2B_INDEX_2_LENGTH;
+
+    /**
+     * The illegal-UTF-8 data block follows the ASCII block, at offset 128=0x80.
+     * Used with linear access for single bytes 0..0xbf for simple error handling.
+     * Length 64=0x40, not UTRIE2_DATA_BLOCK_LENGTH.
+     */
+    static final int UTRIE2_BAD_UTF8_DATA_OFFSET=0x80;
+
+    /**
+     * Implementation class for an iterator over a Trie2.
+     *
+     *   Iteration over a Trie2 first returns all of the ranges that are indexed by code points,
+     *   then returns the special alternate values for the lead surrogates
+     *
+     * @internal
+     */
+    class Trie2Iterator implements Iterator<Range> {
+
+        // The normal constructor that configures the iterator to cover the complete
+        //   contents of the Trie2
+        Trie2Iterator(ValueMapper vm) {
+            mapper    = vm;
+            nextStart = 0;
+            limitCP   = 0x110000;
+            doLeadSurrogates = true;
+        }
+
+        /**
+         *  The main next() function for Trie2 iterators
+         *
+         */
+        public Range next() {
+            if (!hasNext()) {
+                throw new NoSuchElementException();
+            }
+            if (nextStart >= limitCP) {
+                // Switch over from iterating normal code point values to
+                //   doing the alternate lead-surrogate values.
+                doingCodePoints = false;
+                nextStart = 0xd800;
+            }
+            int   endOfRange = 0;
+            int   val = 0;
+            int   mappedVal = 0;
+
+            if (doingCodePoints) {
+                // Iteration over code point values.
+                val = get(nextStart);
+                mappedVal = mapper.map(val);
+                endOfRange = rangeEnd(nextStart, limitCP, val);
+                // Loop once for each range in the Trie2 with the same raw (unmapped) value.
+                // Loop continues so long as the mapped values are the same.
+                for (;;) {
+                    if (endOfRange >= limitCP-1) {
+                        break;
+                    }
+                    val = get(endOfRange+1);
+                    if (mapper.map(val) != mappedVal) {
+                        break;
+                    }
+                    endOfRange = rangeEnd(endOfRange+1, limitCP, val);
+                }
+            } else {
+                // Iteration over the alternate lead surrogate values.
+                val = getFromU16SingleLead((char)nextStart);
+                mappedVal = mapper.map(val);
+                endOfRange = rangeEndLS((char)nextStart);
+                // Loop once for each range in the Trie2 with the same raw (unmapped) value.
+                // Loop continues so long as the mapped values are the same.
+                for (;;) {
+                    if (endOfRange >= 0xdbff) {
+                        break;
+                    }
+                    val = getFromU16SingleLead((char)(endOfRange+1));
+                    if (mapper.map(val) != mappedVal) {
+                        break;
+                    }
+                    endOfRange = rangeEndLS((char)(endOfRange+1));
+                }
+            }
+            returnValue.startCodePoint = nextStart;
+            returnValue.endCodePoint   = endOfRange;
+            returnValue.value          = mappedVal;
+            returnValue.leadSurrogate  = !doingCodePoints;
+            nextStart                  = endOfRange+1;
+            return returnValue;
+        }
+
+        /**
+         *
+         */
+        public boolean hasNext() {
+            return doingCodePoints && (doLeadSurrogates || nextStart < limitCP) || nextStart < 0xdc00;
+        }
+
+        private int rangeEndLS(char startingLS) {
+            if (startingLS >= 0xdbff) {
+                return 0xdbff;
+            }
+
+            int c;
+            int val = getFromU16SingleLead(startingLS);
+            for (c = startingLS+1; c <= 0x0dbff; c++) {
+                if (getFromU16SingleLead((char)c) != val) {
+                    break;
+                }
+            }
+            return c-1;
+        }
+
+        //
+        //   Iteration State Variables
+        //
+        private ValueMapper    mapper;
+        private Range          returnValue = new Range();
+        // The starting code point for the next range to be returned.
+        private int            nextStart;
+        // The upper limit for the last normal range to be returned.  Normally 0x110000, but
+        //   may be lower when iterating over the code points for a single lead surrogate.
+        private int            limitCP;
+
+        // True while iterating over the Trie2 values for code points.
+        // False while iterating over the alternate values for lead surrogates.
+        private boolean        doingCodePoints = true;
+
+        // True if the iterator should iterate the special values for lead surrogates in
+        //   addition to the normal values for code points.
+        private boolean        doLeadSurrogates = true;
+    }
+
+    /**
+     * Find the last character in a contiguous range of characters with the
+     * same Trie2 value as the input character.
+     *
+     * @param c  The character to begin with.
+     * @return   The last contiguous character with the same value.
+     */
+    int rangeEnd(int start, int limitp, int val) {
+        int c;
+        int limit = Math.min(highStart, limitp);
+
+        for (c = start+1; c < limit; c++) {
+            if (get(c) != val) {
+                break;
+            }
+        }
+        if (c >= highStart) {
+            c = limitp;
+        }
+        return c - 1;
+    }
+
+
+    //
+    //  Hashing implementation functions.  FNV hash.  Respected public domain algorithm.
+    //
+    private static int initHash() {
+        return 0x811c9DC5;  // unsigned 2166136261
+    }
+
+    private static int hashByte(int h, int b) {
+        h = h * 16777619;
+        h = h ^ b;
+        return h;
+    }
+
+    private static int hashUChar32(int h, int c) {
+        h = Trie2.hashByte(h, c & 255);
+        h = Trie2.hashByte(h, (c>>8) & 255);
+        h = Trie2.hashByte(h, c>>16);
+        return h;
+    }
+
+    private static int hashInt(int h, int i) {
+        h = Trie2.hashByte(h, i & 255);
+        h = Trie2.hashByte(h, (i>>8) & 255);
+        h = Trie2.hashByte(h, (i>>16) & 255);
+        h = Trie2.hashByte(h, (i>>24) & 255);
+        return h;
+    }
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/java.base/share/classes/jdk/internal/icu/impl/Trie2_16.java	Mon Jan 13 08:05:59 2020 -0800
@@ -0,0 +1,167 @@
+/*
+ * Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ *******************************************************************************
+ * Copyright (C) 2009-2014, International Business Machines Corporation and
+ * others. All Rights Reserved.
+ *******************************************************************************
+ */
+
+package jdk.internal.icu.impl;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+
+/**
+ * @author aheninger
+ *
+ * A read-only Trie2, holding 16 bit data values.
+ *
+ * A Trie2 is a highly optimized data structure for mapping from Unicode
+ * code points (values ranging from 0 to 0x10ffff) to a 16 or 32 bit value.
+ *
+ * See class Trie2 for descriptions of the API for accessing the contents of a trie.
+ *
+ * The fundamental data access methods are declared final in this class, with
+ * the intent that applications might gain a little extra performance, when compared
+ * with calling the same methods via the abstract UTrie2 base class.
+ */
+public final class Trie2_16 extends Trie2 {
+
+    /**
+     *  Internal constructor, not for general use.
+     */
+    Trie2_16() {
+    }
+
+
+    /**
+     * Create a Trie2 from its serialized form.  Inverse of utrie2_serialize().
+     * The serialized format is identical between ICU4C and ICU4J, so this function
+     * will work with serialized Trie2s from either.
+     *
+     * The serialized Trie2 in the bytes may be in either little or big endian byte order.
+     * This allows using serialized Tries from ICU4C without needing to consider the
+     * byte order of the system that created them.
+     *
+     * @param bytes a byte buffer to the serialized form of a UTrie2.
+     * @return An unserialized Trie2_16, ready for use.
+     * @throws IllegalArgumentException if the buffer does not contain a serialized Trie2.
+     * @throws IOException if a read error occurs in the buffer.
+     * @throws ClassCastException if the bytes contain a serialized Trie2_32
+     */
+    public static Trie2_16  createFromSerialized(ByteBuffer bytes) throws IOException {
+        return (Trie2_16) Trie2.createFromSerialized(bytes);
+    }
+
+    /**
+     * Get the value for a code point as stored in the Trie2.
+     *
+     * @param codePoint the code point
+     * @return the value
+     */
+    @Override
+    public final int get(int codePoint) {
+        int value;
+        int ix;
+
+        if (codePoint >= 0) {
+            if (codePoint < 0x0d800 || (codePoint > 0x0dbff && codePoint <= 0x0ffff)) {
+                // Ordinary BMP code point, excluding leading surrogates.
+                // BMP uses a single level lookup.  BMP index starts at offset 0 in the Trie2 index.
+                // 16 bit data is stored in the index array itself.
+                ix = index[codePoint >> UTRIE2_SHIFT_2];
+                ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK);
+                value = index[ix];
+                return value;
+            }
+            if (codePoint <= 0xffff) {
+                // Lead Surrogate Code Point.  A Separate index section is stored for
+                // lead surrogate code units and code points.
+                //   The main index has the code unit data.
+                //   For this function, we need the code point data.
+                // Note: this expression could be refactored for slightly improved efficiency, but
+                //       surrogate code points will be so rare in practice that it's not worth it.
+                ix = index[UTRIE2_LSCP_INDEX_2_OFFSET + ((codePoint - 0xd800) >> UTRIE2_SHIFT_2)];
+                ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK);
+                value = index[ix];
+                return value;
+            }
+            if (codePoint < highStart) {
+                // Supplemental code point, use two-level lookup.
+                ix = (UTRIE2_INDEX_1_OFFSET - UTRIE2_OMITTED_BMP_INDEX_1_LENGTH) + (codePoint >> UTRIE2_SHIFT_1);
+                ix = index[ix];
+                ix += (codePoint >> UTRIE2_SHIFT_2) & UTRIE2_INDEX_2_MASK;
+                ix = index[ix];
+                ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK);
+                value = index[ix];
+                return value;
+            }
+            if (codePoint <= 0x10ffff) {
+                value = index[highValueIndex];
+                return value;
+            }
+        }
+
+        // Fall through.  The code point is outside of the legal range of 0..0x10ffff.
+        return errorValue;
+    }
+
+
+    /**
+     * Get a Trie2 value for a UTF-16 code unit.
+     *
+     * This function returns the same value as get() if the input
+     * character is outside of the lead surrogate range
+     *
+     * There are two values stored in a Trie2 for inputs in the lead
+     * surrogate range.  This function returns the alternate value,
+     * while Trie2.get() returns the main value.
+     *
+     * @param codeUnit a 16 bit code unit or lead surrogate value.
+     * @return the value
+     */
+    @Override
+    public int getFromU16SingleLead(char codeUnit) {
+        int value;
+        int ix;
+
+        // Because the input is a 16 bit char, we can skip the tests for it being in
+        // the BMP range.  It is.
+        ix = index[codeUnit >> UTRIE2_SHIFT_2];
+        ix = (ix << UTRIE2_INDEX_SHIFT) + (codeUnit & UTRIE2_DATA_MASK);
+        value = index[ix];
+        return value;
+    }
+
+    /**
+     * @return the number of bytes of the serialized trie
+     */
+    public int getSerializedLength() {
+        return 16+(header.indexLength+dataLength)*2;
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/java.base/share/classes/jdk/internal/icu/impl/UBiDiProps.java	Mon Jan 13 08:05:59 2020 -0800
@@ -0,0 +1,274 @@
+/*
+ * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+/*
+ *******************************************************************************
+ *
+ *   Copyright (C) 2004-2014, International Business Machines
+ *   Corporation and others.  All Rights Reserved.
+ *
+ *******************************************************************************
+ *   file name:  UBiDiProps.java
+ *   encoding:   US-ASCII
+ *   tab size:   8 (not used)
+ *   indentation:4
+ *
+ *   created on: 2005jan16
+ *   created by: Markus W. Scherer
+ *
+ *   Low-level Unicode bidi/shaping properties access.
+ *   Java port of ubidi_props.h/.c.
+ */
+
+package jdk.internal.icu.impl;
+
+import jdk.internal.icu.lang.UCharacter;
+import jdk.internal.icu.util.VersionInfo;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.MissingResourceException;
+
+public final class UBiDiProps {
+    // constructors etc. --------------------------------------------------- ***
+
+    // port of ubidi_openProps()
+    private UBiDiProps() throws IOException{
+        ByteBuffer bytes=ICUBinary.getRequiredData(DATA_FILE_NAME);
+        readData(bytes);
+    }
+
+    private void readData(ByteBuffer bytes) throws IOException {
+        // read the header
+        ICUBinary.readHeader(bytes, FMT, new IsAcceptable());
+
+        // read indexes[]
+        int i, count;
+        count=bytes.getInt();
+        if(count<IX_TOP) {
+            throw new IOException("indexes[0] too small in "+DATA_FILE_NAME);
+        }
+        indexes=new int[count];
+
+        indexes[0]=count;
+        for(i=1; i<count; ++i) {
+            indexes[i]=bytes.getInt();
+        }
+
+        // read the trie
+        trie=Trie2_16.createFromSerialized(bytes);
+        int expectedTrieLength=indexes[IX_TRIE_SIZE];
+        int trieLength=trie.getSerializedLength();
+        if(trieLength>expectedTrieLength) {
+            throw new IOException(DATA_FILE_NAME+": not enough bytes for the trie");
+        }
+        // skip padding after trie bytes
+        ICUBinary.skipBytes(bytes, expectedTrieLength-trieLength);
+
+        // read mirrors[]
+        count=indexes[IX_MIRROR_LENGTH];
+        if(count>0) {
+            mirrors=new int[count];
+            for(i=0; i<count; ++i) {
+                mirrors[i]=bytes.getInt();
+            }
+        }
+
+        // read jgArray[]
+        count=indexes[IX_JG_LIMIT]-indexes[IX_JG_START];
+        jgArray=new byte[count];
+        for(i=0; i<count; ++i) {
+            jgArray[i]=bytes.get();
+        }
+
+        // read jgArray2[]
+        count=indexes[IX_JG_LIMIT2]-indexes[IX_JG_START2];
+        jgArray2=new byte[count];
+        for(i=0; i<count; ++i) {
+            jgArray2[i]=bytes.get();
+        }
+    }
+
+    // implement ICUBinary.Authenticate
+    private static final class IsAcceptable implements ICUBinary.Authenticate {
+        public boolean isDataVersionAcceptable(byte version[]) {
+            return version[0]==2;
+        }
+    }
+
+    // property access functions ------------------------------------------- ***
+
+    public final int getClass(int c) {
+        return getClassFromProps(trie.get(c));
+    }
+
+    private final int getMirror(int c, int props) {
+        int delta=getMirrorDeltaFromProps(props);
+        if(delta!=ESC_MIRROR_DELTA) {
+            return c+delta;
+        } else {
+            /* look for mirror code point in the mirrors[] table */
+            int m;
+            int i, length;
+            int c2;
+
+            length=indexes[IX_MIRROR_LENGTH];
+
+            /* linear search */
+            for(i=0; i<length; ++i) {
+                m=mirrors[i];
+                c2=getMirrorCodePoint(m);
+                if(c==c2) {
+                    /* found c, return its mirror code point using the index in m */
+                    return getMirrorCodePoint(mirrors[getMirrorIndex(m)]);
+                } else if(c<c2) {
+                    break;
+                }
+            }
+
+            /* c not found, return it itself */
+            return c;
+        }
+    }
+
+    public final int getMirror(int c) {
+        int props=trie.get(c);
+        return getMirror(c, props);
+    }
+
+    public final int getJoiningType(int c) {
+        return (trie.get(c)&JT_MASK)>>JT_SHIFT;
+    }
+
+    public final int getJoiningGroup(int c) {
+        int start, limit;
+
+        start=indexes[IX_JG_START];
+        limit=indexes[IX_JG_LIMIT];
+        if(start<=c && c<limit) {
+            return (int)jgArray[c-start]&0xff;
+        }
+        start=indexes[IX_JG_START2];
+        limit=indexes[IX_JG_LIMIT2];
+        if(start<=c && c<limit) {
+            return (int)jgArray2[c-start]&0xff;
+        }
+        return UCharacter.JoiningGroup.NO_JOINING_GROUP;
+    }
+
+    public final int getPairedBracketType(int c) {
+        return (trie.get(c)&BPT_MASK)>>BPT_SHIFT;
+    }
+
+    public final int getPairedBracket(int c) {
+        int props=trie.get(c);
+        if((props&BPT_MASK)==0) {
+            return c;
+        } else {
+            return getMirror(c, props);
+        }
+    }
+
+    // data members -------------------------------------------------------- ***
+    private int indexes[];
+    private int mirrors[];
+    private byte jgArray[];
+    private byte jgArray2[];
+
+    private Trie2_16 trie;
+
+    // data format constants ----------------------------------------------- ***
+    @SuppressWarnings("deprecation")
+    private static final String DATA_FILE_NAME =
+            "/jdk/internal/icu/impl/data/icudt" +
+            VersionInfo.ICU_DATA_VERSION_PATH +
+            "/ubidi.icu";
+
+    /* format "BiDi" */
+    private static final int FMT=0x42694469;
+
+    /* indexes into indexes[] */
+    private static final int IX_TRIE_SIZE=2;
+    private static final int IX_MIRROR_LENGTH=3;
+
+    private static final int IX_JG_START=4;
+    private static final int IX_JG_LIMIT=5;
+    private static final int IX_JG_START2=6;  /* new in format version 2.2, ICU 54 */
+    private static final int IX_JG_LIMIT2=7;
+
+    private static final int IX_TOP=16;
+
+    // definitions for 16-bit bidi/shaping properties word ----------------- ***
+
+                          /* CLASS_SHIFT=0, */     /* bidi class: 5 bits (4..0) */
+    private static final int JT_SHIFT=5;           /* joining type: 3 bits (7..5) */
+
+    private static final int BPT_SHIFT=8;          /* Bidi_Paired_Bracket_Type(bpt): 2 bits (9..8) */
+
+    private static final int MIRROR_DELTA_SHIFT=13;        /* bidi mirroring delta: 3 bits (15..13) */
+
+    private static final int CLASS_MASK=    0x0000001f;
+    private static final int JT_MASK=       0x000000e0;
+    private static final int BPT_MASK=      0x00000300;
+
+    private static final int getClassFromProps(int props) {
+        return props&CLASS_MASK;
+    }
+    private static final boolean getFlagFromProps(int props, int shift) {
+        return ((props>>shift)&1)!=0;
+    }
+    private static final int getMirrorDeltaFromProps(int props) {
+        return (short)props>>MIRROR_DELTA_SHIFT;
+    }
+
+    private static final int ESC_MIRROR_DELTA=-4;
+
+    // definitions for 32-bit mirror table entry --------------------------- ***
+
+    /* the source Unicode code point takes 21 bits (20..0) */
+    private static final int MIRROR_INDEX_SHIFT=21;
+
+    private static final int getMirrorCodePoint(int m) {
+        return m&0x1fffff;
+    }
+    private static final int getMirrorIndex(int m) {
+        return m>>>MIRROR_INDEX_SHIFT;
+    }
+
+
+    /*
+     * public singleton instance
+     */
+    public static final UBiDiProps INSTANCE;
+
+    // This static initializer block must be placed after
+    // other static member initialization
+    static {
+        try {
+            INSTANCE = new UBiDiProps();
+        } catch (IOException e) {
+            throw new MissingResourceException(e.getMessage(),DATA_FILE_NAME,"");
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/java.base/share/classes/jdk/internal/icu/impl/UCharacterProperty.java	Mon Jan 13 08:05:59 2020 -0800
@@ -0,0 +1,614 @@
+/*
+ * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+/*
+ *******************************************************************************
+ * Copyright (C) 1996-2014, International Business Machines Corporation and
+ * others. All Rights Reserved.
+ *******************************************************************************
+ */
+
+package jdk.internal.icu.impl;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Iterator;
+import java.util.MissingResourceException;
+
+import jdk.internal.icu.lang.UCharacter.HangulSyllableType;
+import jdk.internal.icu.lang.UCharacter.NumericType;
+import jdk.internal.icu.text.UTF16;
+import jdk.internal.icu.text.UnicodeSet;
+import jdk.internal.icu.util.VersionInfo;
+
+/**
+* <p>Internal class used for Unicode character property database.</p>
+* <p>This classes store binary data read from uprops.icu.
+* It does not have the capability to parse the data into more high-level
+* information. It only returns bytes of information when required.</p>
+* <p>Due to the form most commonly used for retrieval, array of char is used
+* to store the binary data.</p>
+* <p>UCharacterPropertyDB also contains information on accessing indexes to
+* significant points in the binary data.</p>
+* <p>Responsibility for molding the binary data into more meaning form lies on
+* <a href=UCharacter.html>UCharacter</a>.</p>
+* @author Syn Wee Quek
+* @since release 2.1, february 1st 2002
+*/
+
+public final class UCharacterProperty
+{
+    // public data members -----------------------------------------------
+
+    /*
+     * public singleton instance
+     */
+    public static final UCharacterProperty INSTANCE;
+
+    /**
+    * Trie data
+    */
+    public Trie2_16 m_trie_;
+
+    /**
+    * Unicode version
+    */
+    public VersionInfo m_unicodeVersion_;
+
+    /**
+    * Character type mask
+    */
+    public static final int TYPE_MASK = 0x1F;
+
+    // uprops.h enum UPropertySource --------------------------------------- ***
+
+    /** From uchar.c/uprops.icu main trie */
+    public static final int SRC_CHAR=1;
+    /** From uchar.c/uprops.icu properties vectors trie */
+    public static final int SRC_PROPSVEC=2;
+    /** From ubidi_props.c/ubidi.icu */
+    public static final int SRC_BIDI=5;
+    /** From normalizer2impl.cpp/nfc.nrm */
+    public static final int SRC_NFC=8;
+    /** From normalizer2impl.cpp/nfkc.nrm */
+    public static final int SRC_NFKC=9;
+
+    // public methods ----------------------------------------------------
+
+    /**
+    * Gets the main property value for code point ch.
+    * @param ch code point whose property value is to be retrieved
+    * @return property value of code point
+    */
+    public final int getProperty(int ch)
+    {
+        return m_trie_.get(ch);
+    }
+
+    /**
+     * Gets the unicode additional properties.
+     * Java version of C u_getUnicodeProperties().
+     * @param codepoint codepoint whose additional properties is to be
+     *                  retrieved
+     * @param column The column index.
+     * @return unicode properties
+     */
+    public int getAdditional(int codepoint, int column) {
+        assert column >= 0;
+        if (column >= m_additionalColumnsCount_) {
+            return 0;
+        }
+        return m_additionalVectors_[m_additionalTrie_.get(codepoint) + column];
+    }
+
+    /**
+     * <p>Get the "age" of the code point.</p>
+     * <p>The "age" is the Unicode version when the code point was first
+     * designated (as a non-character or for Private Use) or assigned a
+     * character.</p>
+     * <p>This can be useful to avoid emitting code points to receiving
+     * processes that do not accept newer characters.</p>
+     * <p>The data is from the UCD file DerivedAge.txt.</p>
+     * <p>This API does not check the validity of the codepoint.</p>
+     * @param codepoint The code point.
+     * @return the Unicode version number
+     */
+    public VersionInfo getAge(int codepoint)
+    {
+        int version = getAdditional(codepoint, 0) >> AGE_SHIFT_;
+        return VersionInfo.getInstance(
+                           (version >> FIRST_NIBBLE_SHIFT_) & LAST_NIBBLE_MASK_,
+                           version & LAST_NIBBLE_MASK_, 0, 0);
+    }
+
+    // int-value and enumerated properties --------------------------------- ***
+
+    public int getType(int c) {
+        return getProperty(c)&TYPE_MASK;
+    }
+
+    /*
+     * Map some of the Grapheme Cluster Break values to Hangul Syllable Types.
+     * Hangul_Syllable_Type is fully redundant with a subset of Grapheme_Cluster_Break.
+     */
+    private static final int /* UHangulSyllableType */ gcbToHst[]={
+        HangulSyllableType.NOT_APPLICABLE,   /* U_GCB_OTHER */
+        HangulSyllableType.NOT_APPLICABLE,   /* U_GCB_CONTROL */
+        HangulSyllableType.NOT_APPLICABLE,   /* U_GCB_CR */
+        HangulSyllableType.NOT_APPLICABLE,   /* U_GCB_EXTEND */
+        HangulSyllableType.LEADING_JAMO,     /* U_GCB_L */
+        HangulSyllableType.NOT_APPLICABLE,   /* U_GCB_LF */
+        HangulSyllableType.LV_SYLLABLE,      /* U_GCB_LV */
+        HangulSyllableType.LVT_SYLLABLE,     /* U_GCB_LVT */
+        HangulSyllableType.TRAILING_JAMO,    /* U_GCB_T */
+        HangulSyllableType.VOWEL_JAMO        /* U_GCB_V */
+        /*
+         * Omit GCB values beyond what we need for hst.
+         * The code below checks for the array length.
+         */
+    };
+
+    private class IntProperty {
+        int column;  // SRC_PROPSVEC column, or "source" if mask==0
+        int mask;
+        int shift;
+
+        IntProperty(int column, int mask, int shift) {
+            this.column=column;
+            this.mask=mask;
+            this.shift=shift;
+        }
+
+        IntProperty(int source) {
+            this.column=source;
+            this.mask=0;
+        }
+
+        int getValue(int c) {
+            // systematic, directly stored properties
+            return (getAdditional(c, column)&mask)>>>shift;
+        }
+    }
+
+    private class BiDiIntProperty extends IntProperty {
+        BiDiIntProperty() {
+            super(SRC_BIDI);
+        }
+    }
+
+    private class CombiningClassIntProperty extends IntProperty {
+        CombiningClassIntProperty(int source) {
+            super(source);
+        }
+    }
+
+    private class NormQuickCheckIntProperty extends IntProperty {  // UCHAR_NF*_QUICK_CHECK properties
+        int which;
+        int max;
+
+        NormQuickCheckIntProperty(int source, int which, int max) {
+            super(source);
+            this.which=which;
+            this.max=max;
+        }
+    }
+
+    private IntProperty intProp =  new BiDiIntProperty() {  // BIDI_PAIRED_BRACKET_TYPE
+        int getValue(int c) {
+            return UBiDiProps.INSTANCE.getPairedBracketType(c);
+        }
+    };
+
+    public int getIntPropertyValue(int c, int which) {
+        if (which == BIDI_PAIRED_BRACKET_TYPE) {
+            return intProp.getValue(c);
+        }
+        return 0; // undefined
+    }
+
+    /**
+    * Forms a supplementary code point from the argument character<br>
+    * Note this is for internal use hence no checks for the validity of the
+    * surrogate characters are done
+    * @param lead lead surrogate character
+    * @param trail trailing surrogate character
+    * @return code point of the supplementary character
+    */
+    public static int getRawSupplementary(char lead, char trail)
+    {
+        return (lead << LEAD_SURROGATE_SHIFT_) + trail + SURROGATE_OFFSET_;
+    }
+
+    /**
+     * Gets the type mask
+     * @param type character type
+     * @return mask
+     */
+    public static final int getMask(int type)
+    {
+        return 1 << type;
+    }
+
+    /**
+     * Returns the digit values of characters like 'A' - 'Z', normal,
+     * half-width and full-width. This method assumes that the other digit
+     * characters are checked by the calling method.
+     * @param ch character to test
+     * @return -1 if ch is not a character of the form 'A' - 'Z', otherwise
+     *         its corresponding digit will be returned.
+     */
+    public static int getEuropeanDigit(int ch) {
+        if ((ch > 0x7a && ch < 0xff21)
+            || ch < 0x41 || (ch > 0x5a && ch < 0x61)
+            || ch > 0xff5a || (ch > 0xff3a && ch < 0xff41)) {
+            return -1;
+        }
+        if (ch <= 0x7a) {
+            // ch >= 0x41 or ch < 0x61
+            return ch + 10 - ((ch <= 0x5a) ? 0x41 : 0x61);
+        }
+        // ch >= 0xff21
+        if (ch <= 0xff3a) {
+            return ch + 10 - 0xff21;
+        }
+        // ch >= 0xff41 && ch <= 0xff5a
+        return ch + 10 - 0xff41;
+    }
+
+    public int digit(int c) {
+        int value = getNumericTypeValue(getProperty(c)) - NTV_DECIMAL_START_;
+        if(value<=9) {
+            return value;
+        } else {
+            return -1;
+        }
+    }
+
+    // protected variables -----------------------------------------------
+
+    /**
+     * Extra property trie
+     */
+    Trie2_16 m_additionalTrie_;
+    /**
+     * Extra property vectors, 1st column for age and second for binary
+     * properties.
+     */
+    int m_additionalVectors_[];
+    /**
+     * Number of additional columns
+     */
+    int m_additionalColumnsCount_;
+    /**
+     * Maximum values for block, bits used as in vector word
+     * 0
+     */
+    int m_maxBlockScriptValue_;
+    /**
+     * Maximum values for script, bits used as in vector word
+     * 0
+     */
+     int m_maxJTGValue_;
+    /**
+     * Script_Extensions data
+     */
+    public char[] m_scriptExtensions_;
+
+    // private variables -------------------------------------------------
+
+    /**
+    * Default name of the datafile
+    */
+    @SuppressWarnings("deprecation")
+    private static final String DATA_FILE_NAME_ =
+            "/jdk/internal/icu/impl/data/icudt" +
+            VersionInfo.ICU_DATA_VERSION_PATH +
+            "/uprops.icu";
+
+    /**
+    * Shift value for lead surrogate to form a supplementary character.
+    */
+    private static final int LEAD_SURROGATE_SHIFT_ = 10;
+    /**
+    * Offset to add to combined surrogate pair to avoid masking.
+    */
+    private static final int SURROGATE_OFFSET_ =
+                           UTF16.SUPPLEMENTARY_MIN_VALUE -
+                           (UTF16.SURROGATE_MIN_VALUE <<
+                           LEAD_SURROGATE_SHIFT_) -
+                           UTF16.TRAIL_SURROGATE_MIN_VALUE;
+
+
+    // property data constants -------------------------------------------------
+
+    /**
+     * Numeric types and values in the main properties words.
+     */
+    private static final int NUMERIC_TYPE_VALUE_SHIFT_ = 6;
+    private static final int getNumericTypeValue(int props) {
+        return props >> NUMERIC_TYPE_VALUE_SHIFT_;
+    }
+
+    /* constants for the storage form of numeric types and values */
+    /** No numeric value. */
+    private static final int NTV_NONE_ = 0;
+    /** Decimal digits: nv=0..9 */
+    private static final int NTV_DECIMAL_START_ = 1;
+    /** Other digits: nv=0..9 */
+    private static final int NTV_DIGIT_START_ = 11;
+    /** Small integers: nv=0..154 */
+    private static final int NTV_NUMERIC_START_ = 21;
+
+    private static final int ntvGetType(int ntv) {
+        return
+            (ntv==NTV_NONE_) ? NumericType.NONE :
+            (ntv<NTV_DIGIT_START_) ?  NumericType.DECIMAL :
+            (ntv<NTV_NUMERIC_START_) ? NumericType.DIGIT :
+            NumericType.NUMERIC;
+    }
+
+    /*
+     * Properties in vector word 0
+     * Bits
+     * 31..24   DerivedAge version major/minor one nibble each
+     * 23..22   3..1: Bits 7..0 = Script_Extensions index
+     *             3: Script value from Script_Extensions
+     *             2: Script=Inherited
+     *             1: Script=Common
+     *             0: Script=bits 7..0
+     * 21..20   reserved
+     * 19..17   East Asian Width
+     * 16.. 8   UBlockCode
+     *  7.. 0   UScriptCode
+     */
+    /**
+     * Script_Extensions: mask includes Script
+     */
+    public static final int SCRIPT_X_MASK = 0x00c000ff;
+    //private static final int SCRIPT_X_SHIFT = 22;
+    /**
+     * Integer properties mask and shift values for East Asian cell width.
+     * Equivalent to icu4c UPROPS_EA_MASK
+     */
+    private static final int EAST_ASIAN_MASK_ = 0x000e0000;
+    /**
+     * Integer properties mask and shift values for East Asian cell width.
+     * Equivalent to icu4c UPROPS_EA_SHIFT
+     */
+    private static final int EAST_ASIAN_SHIFT_ = 17;
+    /**
+     * Integer properties mask and shift values for blocks.
+     * Equivalent to icu4c UPROPS_BLOCK_MASK
+     */
+    private static final int BLOCK_MASK_ = 0x0001ff00;
+    /**
+     * Integer properties mask and shift values for blocks.
+     * Equivalent to icu4c UPROPS_BLOCK_SHIFT
+     */
+    private static final int BLOCK_SHIFT_ = 8;
+    /**
+     * Integer properties mask and shift values for scripts.
+     * Equivalent to icu4c UPROPS_SHIFT_MASK
+     */
+    public static final int SCRIPT_MASK_ = 0x000000ff;
+
+    /**
+     * Additional properties used in internal trie data
+     */
+    /*
+     * Properties in vector word 1
+     * Each bit encodes one binary property.
+     * The following constants represent the bit number, use 1<<UPROPS_XYZ.
+     * UPROPS_BINARY_1_TOP<=32!
+     *
+     * Keep this list of property enums in sync with
+     * propListNames[] in icu/source/tools/genprops/props2.c!
+     *
+     * ICU 2.6/uprops format version 3.2 stores full properties instead of "Other_".
+     */
+    private static final int WHITE_SPACE_PROPERTY_ = 0;
+    private static final int DASH_PROPERTY_ = 1;
+    private static final int HYPHEN_PROPERTY_ = 2;
+    private static final int QUOTATION_MARK_PROPERTY_ = 3;
+    private static final int TERMINAL_PUNCTUATION_PROPERTY_ = 4;
+    private static final int MATH_PROPERTY_ = 5;
+    private static final int HEX_DIGIT_PROPERTY_ = 6;
+    private static final int ASCII_HEX_DIGIT_PROPERTY_ = 7;
+    private static final int ALPHABETIC_PROPERTY_ = 8;
+    private static final int IDEOGRAPHIC_PROPERTY_ = 9;
+    private static final int DIACRITIC_PROPERTY_ = 10;
+    private static final int EXTENDER_PROPERTY_ = 11;
+    private static final int NONCHARACTER_CODE_POINT_PROPERTY_ = 12;
+    private static final int GRAPHEME_EXTEND_PROPERTY_ = 13;
+    private static final int GRAPHEME_LINK_PROPERTY_ = 14;
+    private static final int IDS_BINARY_OPERATOR_PROPERTY_ = 15;
+    private static final int IDS_TRINARY_OPERATOR_PROPERTY_ = 16;
+    private static final int RADICAL_PROPERTY_ = 17;
+    private static final int UNIFIED_IDEOGRAPH_PROPERTY_ = 18;
+    private static final int DEFAULT_IGNORABLE_CODE_POINT_PROPERTY_ = 19;
+    private static final int DEPRECATED_PROPERTY_ = 20;
+    private static final int LOGICAL_ORDER_EXCEPTION_PROPERTY_ = 21;
+    private static final int XID_START_PROPERTY_ = 22;
+    private static final int XID_CONTINUE_PROPERTY_ = 23;
+    private static final int ID_START_PROPERTY_    = 24;
+    private static final int ID_CONTINUE_PROPERTY_ = 25;
+    private static final int GRAPHEME_BASE_PROPERTY_ = 26;
+    private static final int S_TERM_PROPERTY_ = 27;
+    private static final int VARIATION_SELECTOR_PROPERTY_ = 28;
+    private static final int PATTERN_SYNTAX = 29;                   /* new in ICU 3.4 and Unicode 4.1 */
+    private static final int PATTERN_WHITE_SPACE = 30;
+
+    /*
+     * Properties in vector word 2
+     * Bits
+     * 31..26   reserved
+     * 25..20   Line Break
+     * 19..15   Sentence Break
+     * 14..10   Word Break
+     *  9.. 5   Grapheme Cluster Break
+     *  4.. 0   Decomposition Type
+     */
+    private static final int LB_MASK          = 0x03f00000;
+    private static final int LB_SHIFT         = 20;
+
+    private static final int SB_MASK          = 0x000f8000;
+    private static final int SB_SHIFT         = 15;
+
+    private static final int WB_MASK          = 0x00007c00;
+    private static final int WB_SHIFT         = 10;
+
+    private static final int GCB_MASK         = 0x000003e0;
+    private static final int GCB_SHIFT        = 5;
+
+    /**
+     * Integer properties mask for decomposition type.
+     * Equivalent to icu4c UPROPS_DT_MASK.
+     */
+    private static final int DECOMPOSITION_TYPE_MASK_ = 0x0000001f;
+
+    /**
+     * First nibble shift
+     */
+    private static final int FIRST_NIBBLE_SHIFT_ = 0x4;
+    /**
+     * Second nibble mask
+     */
+    private static final int LAST_NIBBLE_MASK_ = 0xF;
+    /**
+     * Age value shift
+     */
+    private static final int AGE_SHIFT_ = 24;
+
+    // private constructors --------------------------------------------------
+
+    /**
+     * Constructor
+     * @exception IOException thrown when data reading fails or data corrupted
+     */
+    private UCharacterProperty() throws IOException
+    {
+        // jar access
+        ByteBuffer bytes=ICUBinary.getRequiredData(DATA_FILE_NAME_);
+        m_unicodeVersion_ = ICUBinary.readHeaderAndDataVersion(bytes, DATA_FORMAT, new IsAcceptable());
+        // Read or skip the 16 indexes.
+        int propertyOffset = bytes.getInt();
+        /* exceptionOffset = */ bytes.getInt();
+        /* caseOffset = */ bytes.getInt();
+        int additionalOffset = bytes.getInt();
+        int additionalVectorsOffset = bytes.getInt();
+        m_additionalColumnsCount_ = bytes.getInt();
+        int scriptExtensionsOffset = bytes.getInt();
+        int reservedOffset7 = bytes.getInt();
+        /* reservedOffset8 = */ bytes.getInt();
+        /* dataTopOffset = */ bytes.getInt();
+        m_maxBlockScriptValue_ = bytes.getInt();
+        m_maxJTGValue_ = bytes.getInt();
+        ICUBinary.skipBytes(bytes, (16 - 12) << 2);
+
+        // read the main properties trie
+        m_trie_ = Trie2_16.createFromSerialized(bytes);
+        int expectedTrieLength = (propertyOffset - 16) * 4;
+        int trieLength = m_trie_.getSerializedLength();
+        if(trieLength > expectedTrieLength) {
+            throw new IOException("uprops.icu: not enough bytes for main trie");
+        }
+        // skip padding after trie bytes
+        ICUBinary.skipBytes(bytes, expectedTrieLength - trieLength);
+
+        // skip unused intervening data structures
+        ICUBinary.skipBytes(bytes, (additionalOffset - propertyOffset) * 4);
+
+        if(m_additionalColumnsCount_ > 0) {
+            // reads the additional property block
+            m_additionalTrie_ = Trie2_16.createFromSerialized(bytes);
+            expectedTrieLength = (additionalVectorsOffset-additionalOffset)*4;
+            trieLength = m_additionalTrie_.getSerializedLength();
+            if(trieLength > expectedTrieLength) {
+                throw new IOException("uprops.icu: not enough bytes for additional-properties trie");
+            }
+            // skip padding after trie bytes
+            ICUBinary.skipBytes(bytes, expectedTrieLength - trieLength);
+
+            // additional properties
+            int size = scriptExtensionsOffset - additionalVectorsOffset;
+            m_additionalVectors_ = new int[size];
+            for (int i = 0; i < size; i ++) {
+                m_additionalVectors_[i] = bytes.getInt();
+            }
+        }
+
+        // Script_Extensions
+        int numChars = (reservedOffset7 - scriptExtensionsOffset) * 2;
+        if(numChars > 0) {
+            m_scriptExtensions_ = new char[numChars];
+            for(int i = 0; i < numChars; ++i) {
+                m_scriptExtensions_[i] = bytes.getChar();
+            }
+        }
+    }
+
+    private static final class IsAcceptable implements ICUBinary.Authenticate {
+        // @Override when we switch to Java 6
+        public boolean isDataVersionAcceptable(byte version[]) {
+            return version[0] == 7;
+        }
+    }
+
+    private static final int DATA_FORMAT = 0x5550726F;  // "UPro"
+
+    public void upropsvec_addPropertyStarts(UnicodeSet set) {
+        /* add the start code point of each same-value range of the properties vectors trie */
+        if(m_additionalColumnsCount_>0) {
+            /* if m_additionalColumnsCount_==0 then the properties vectors trie may not be there at all */
+            Iterator<Trie2.Range> trieIterator = m_additionalTrie_.iterator();
+            Trie2.Range range;
+            while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) {
+                set.add(range.startCodePoint);
+            }
+        }
+    }
+
+    // This static initializer block must be placed after
+    // other static member initialization
+    static {
+        try {
+            INSTANCE = new UCharacterProperty();
+        }
+        catch (IOException e) {
+            throw new MissingResourceException(e.getMessage(),DATA_FILE_NAME_,"");
+        }
+    }
+
+
+    // Moved from UProperty.java
+    /**
+     * Enumerated property Bidi_Paired_Bracket_Type (new in Unicode 6.3).
+     * Used in UAX #9: Unicode Bidirectional Algorithm
+     * (http://www.unicode.org/reports/tr9/)
+     * Returns UCharacter.BidiPairedBracketType values.
+     * @stable ICU 52
+     */
+    public static final int BIDI_PAIRED_BRACKET_TYPE = 0x1015;
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/java.base/share/classes/jdk/internal/icu/impl/UnicodeSetStringSpan.java	Mon Jan 13 08:05:59 2020 -0800
@@ -0,0 +1,1168 @@
+/*
+ * Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ ******************************************************************************
+ *
+ *   Copyright (C) 2009-2014, International Business Machines
+ *   Corporation and others.  All Rights Reserved.
+ *
+ ******************************************************************************
+ */
+
+package jdk.internal.icu.impl;
+
+import java.util.ArrayList;
+
+import jdk.internal.icu.text.UTF16;
+import jdk.internal.icu.text.UnicodeSet;
+import jdk.internal.icu.text.UnicodeSet.SpanCondition;
+import jdk.internal.icu.util.OutputInt;
+
+/*
+ * Implement span() etc. for a set with strings.
+ * Avoid recursion because of its exponential complexity.
+ * Instead, try multiple paths at once and track them with an IndexList.
+ */
+public class UnicodeSetStringSpan {
+
+    /*
+     * Which span() variant will be used? The object is either built for one variant and used once,
+     * or built for all and may be used many times.
+     */
+    public static final int WITH_COUNT    = 0x40;  // spanAndCount() may be called
+    public static final int FWD           = 0x20;
+    public static final int BACK          = 0x10;
+    // public static final int UTF16      = 8;
+    public static final int CONTAINED     = 2;
+    public static final int NOT_CONTAINED = 1;
+
+    public static final int ALL = 0x7f;
+
+    public static final int FWD_UTF16_CONTAINED      = FWD  | /* UTF16 | */    CONTAINED;
+    public static final int FWD_UTF16_NOT_CONTAINED  = FWD  | /* UTF16 | */NOT_CONTAINED;
+    public static final int BACK_UTF16_CONTAINED     = BACK | /* UTF16 | */    CONTAINED;
+    public static final int BACK_UTF16_NOT_CONTAINED = BACK | /* UTF16 | */NOT_CONTAINED;
+
+    /**
+     * Special spanLength short values. (since Java has not unsigned byte type)
+     * All code points in the string are contained in the parent set.
+     */
+    static final short ALL_CP_CONTAINED = 0xff;
+
+    /** The spanLength is >=0xfe. */
+    static final short LONG_SPAN = ALL_CP_CONTAINED - 1;
+
+    /** Set for span(). Same as parent but without strings. */
+    private UnicodeSet spanSet;
+
+    /**
+     * Set for span(not contained).
+     * Same as spanSet, plus characters that start or end strings.
+     */
+    private UnicodeSet spanNotSet;
+
+    /** The strings of the parent set. */
+    private ArrayList<String> strings;
+
+    /** The lengths of span(), spanBack() etc. for each string. */
+    private short[] spanLengths;
+
+    /** Maximum lengths of relevant strings. */
+    private int maxLength16;
+
+    /** Are there strings that are not fully contained in the code point set? */
+    private boolean someRelevant;
+
+    /** Set up for all variants of span()? */
+    private boolean all;
+
+    /** Span helper */
+    private OffsetList offsets;
+
+    /**
+     * Constructs for all variants of span(), or only for any one variant.
+     * Initializes as little as possible, for single use.
+     */
+    public UnicodeSetStringSpan(final UnicodeSet set, final ArrayList<String> setStrings, int which) {
+        spanSet = new UnicodeSet(0, 0x10ffff);
+        // TODO: With Java 6, just take the parent set's strings as is,
+        // as a NavigableSet<String>, rather than as an ArrayList copy of the set of strings.
+        // Then iterate via the first() and higher() methods.
+        // (We do not want to create multiple Iterator objects in each span().)
+        // See ICU ticket #7454.
+        strings = setStrings;
+        all = (which == ALL);
+        spanSet.retainAll(set);
+        if (0 != (which & NOT_CONTAINED)) {
+            // Default to the same sets.
+            // addToSpanNotSet() will create a separate set if necessary.
+            spanNotSet = spanSet;
+        }
+        offsets = new OffsetList();
+
+        // Determine if the strings even need to be taken into account at all for span() etc.
+        // If any string is relevant, then all strings need to be used for
+        // span(longest match) but only the relevant ones for span(while contained).
+        // TODO: Possible optimization: Distinguish CONTAINED vs. LONGEST_MATCH
+        // and do not store UTF-8 strings if !thisRelevant and CONTAINED.
+        // (Only store irrelevant UTF-8 strings for LONGEST_MATCH where they are relevant after all.)
+        // Also count the lengths of the UTF-8 versions of the strings for memory allocation.
+        int stringsLength = strings.size();
+
+        int i, spanLength;
+        someRelevant = false;
+        for (i = 0; i < stringsLength; ++i) {
+            String string = strings.get(i);
+            int length16 = string.length();
+            spanLength = spanSet.span(string, SpanCondition.CONTAINED);
+            if (spanLength < length16) { // Relevant string.
+                someRelevant = true;
+            }
+            if (/* (0 != (which & UTF16)) && */ length16 > maxLength16) {
+                maxLength16 = length16;
+            }
+        }
+        if (!someRelevant && (which & WITH_COUNT) == 0) {
+            return;
+        }
+
+        // Freeze after checking for the need to use strings at all because freezing
+        // a set takes some time and memory which are wasted if there are no relevant strings.
+        if (all) {
+            spanSet.freeze();
+        }
+
+        int spanBackLengthsOffset;
+
+        // Allocate a block of meta data.
+        int allocSize;
+        if (all) {
+            // 2 sets of span lengths
+            allocSize = stringsLength * (2);
+        } else {
+            allocSize = stringsLength; // One set of span lengths.
+        }
+        spanLengths = new short[allocSize];
+
+        if (all) {
+            // Store span lengths for all span() variants.
+            spanBackLengthsOffset = stringsLength;
+        } else {
+            // Store span lengths for only one span() variant.
+            spanBackLengthsOffset = 0;
+        }
+
+        // Set the meta data and spanNotSet and write the UTF-8 strings.
+
+        for (i = 0; i < stringsLength; ++i) {
+            String string = strings.get(i);
+            int length16 = string.length();
+            spanLength = spanSet.span(string, SpanCondition.CONTAINED);
+            if (spanLength < length16) { // Relevant string.
+                if (true /* 0 != (which & UTF16) */) {
+                    if (0 != (which & CONTAINED)) {
+                        if (0 != (which & FWD)) {
+                            spanLengths[i] = makeSpanLengthByte(spanLength);
+                        }
+                        if (0 != (which & BACK)) {
+                            spanLength = length16
+                                    - spanSet.spanBack(string, length16, SpanCondition.CONTAINED);
+                            spanLengths[spanBackLengthsOffset + i] = makeSpanLengthByte(spanLength);
+                        }
+                    } else /* not CONTAINED, not all, but NOT_CONTAINED */{
+                        spanLengths[i] = spanLengths[spanBackLengthsOffset + i] = 0; // Only store a relevant/irrelevant
+                                                                                     // flag.
+                    }
+                }
+                if (0 != (which & NOT_CONTAINED)) {
+                    // Add string start and end code points to the spanNotSet so that
+                    // a span(while not contained) stops before any string.
+                    int c;
+                    if (0 != (which & FWD)) {
+                        c = string.codePointAt(0);
+                        addToSpanNotSet(c);
+                    }
+                    if (0 != (which & BACK)) {
+                        c = string.codePointBefore(length16);
+                        addToSpanNotSet(c);
+                    }
+                }
+            } else { // Irrelevant string.
+                if (all) {
+                    spanLengths[i] = spanLengths[spanBackLengthsOffset + i] = ALL_CP_CONTAINED;
+                } else {
+                    // All spanXYZLengths pointers contain the same address.
+                    spanLengths[i] = ALL_CP_CONTAINED;
+                }
+            }
+        }
+
+        // Finish.
+        if (all) {
+            spanNotSet.freeze();
+        }
+    }
+
+    /**
+     * Do the strings need to be checked in span() etc.?
+     *
+     * @return true if strings need to be checked (call span() here),
+     *         false if not (use a BMPSet for best performance).
+     */
+    public boolean needsStringSpanUTF16() {
+        return someRelevant;
+    }
+
+    /** For fast UnicodeSet::contains(c). */
+    public boolean contains(int c) {
+        return spanSet.contains(c);
+    }
+
+    /**
+     * Adds a starting or ending string character to the spanNotSet
+     * so that a character span ends before any string.
+     */
+    private void addToSpanNotSet(int c) {
+        if (spanNotSet == null || spanNotSet == spanSet) {
+            if (spanSet.contains(c)) {
+                return; // Nothing to do.
+            }
+            spanNotSet = spanSet.cloneAsThawed();
+        }
+        spanNotSet.add(c);
+    }
+
+    /*
+     * Note: In span() when spanLength==0
+     * (after a string match, or at the beginning after an empty code point span)
+     * and in spanNot() and spanNotUTF8(),
+     * string matching could use a binary search because all string matches are done
+     * from the same start index.
+     *
+     * For UTF-8, this would require a comparison function that returns UTF-16 order.
+     *
+     * This optimization should not be necessary for normal UnicodeSets because most sets have no strings, and most sets
+     * with strings have very few very short strings. For cases with many strings, it might be better to use a different
+     * API and implementation with a DFA (state machine).
+     */
+
+    /*
+     * Algorithm for span(SpanCondition.CONTAINED)
+     *
+     * Theoretical algorithm:
+     * - Iterate through the string, and at each code point boundary:
+     *   + If the code point there is in the set, then remember to continue after it.
+     *   + If a set string matches at the current position, then remember to continue after it.
+     *   + Either recursively span for each code point or string match, or recursively span
+     *     for all but the shortest one and iteratively continue the span with the shortest local match.
+     *   + Remember the longest recursive span (the farthest end point).
+     *   + If there is no match at the current position,
+     *     neither for the code point there nor for any set string,
+     *     then stop and return the longest recursive span length.
+     *
+     * Optimized implementation:
+     *
+     * (We assume that most sets will have very few very short strings.
+     * A span using a string-less set is extremely fast.)
+     *
+     * Create and cache a spanSet which contains all of the single code points of the original set
+     * but none of its strings.
+     *
+     * - Start with spanLength=spanSet.span(SpanCondition.CONTAINED).
+     * - Loop:
+     *   + Try to match each set string at the end of the spanLength.
+     *     ~ Set strings that start with set-contained code points
+     *       must be matched with a partial overlap
+     *       because the recursive algorithm would have tried to match them at every position.
+     *     ~ Set strings that entirely consist of set-contained code points
+     *       are irrelevant for span(SpanCondition.CONTAINED)
+     *       because the recursive algorithm would continue after them anyway and
+     *       find the longest recursive match from their end.
+     *     ~ Rather than recursing, note each end point of a set string match.
+     *   + If no set string matched after spanSet.span(),
+     *     then return with where the spanSet.span() ended.
+     *   + If at least one set string matched after spanSet.span(),
+     *     then pop the shortest string match end point and continue the loop,
+     *     trying to match all set strings from there.
+     *   + If at least one more set string matched after a previous string match, then test if the
+     *     code point after the previous string match is also contained in the set.
+     *     Continue the loop with the shortest end point of
+     *     either this code point or a matching set string.
+     *   + If no more set string matched after a previous string match,
+     *     then try another spanLength=spanSet.span(SpanCondition.CONTAINED).
+     *     Stop if spanLength==0, otherwise continue the loop.
+     *
+     * By noting each end point of a set string match, the function visits each string position at most once and
+     * finishes in linear time.
+     *
+     * The recursive algorithm may visit the same string position many times
+     * if multiple paths lead to it and finishes in exponential time.
+     */
+
+    /*
+     * Algorithm for span(SIMPLE)
+     *
+     * Theoretical algorithm:
+     * - Iterate through the string, and at each code point boundary:
+     *   + If the code point there is in the set, then remember to continue after it.
+     *   + If a set string matches at the current position, then remember to continue after it.
+     *   + Continue from the farthest match position and ignore all others.
+     *   + If there is no match at the current position, then stop and return the current position.
+     *
+     * Optimized implementation:
+     *
+     * (Same assumption and spanSet as above.)
+     *
+     * - Start with spanLength=spanSet.span(SpanCondition.CONTAINED).
+     * - Loop:
+     *   + Try to match each set string at the end of the spanLength.
+     *     ~ Set strings that start with set-contained code points
+     *       must be matched with a partial overlap
+     *       because the standard algorithm would have tried to match them earlier.
+     *     ~ Set strings that entirely consist of set-contained code points
+     *       must be matched with a full overlap because the longest-match algorithm
+     *       would hide set string matches that end earlier.
+     *       Such set strings need not be matched earlier inside the code point span
+     *       because the standard algorithm would then have
+     *       continued after the set string match anyway.
+     *     ~ Remember the longest set string match (farthest end point)
+     *       from the earliest starting point.
+     *   + If no set string matched after spanSet.span(),
+     *     then return with where the spanSet.span() ended.
+     *   + If at least one set string matched,
+     *     then continue the loop after the longest match from the earliest position.
+     *   + If no more set string matched after a previous string match,
+     *     then try another spanLength=spanSet.span(SpanCondition.CONTAINED).
+     *     Stop if spanLength==0, otherwise continue the loop.
+     */
+    /**
+     * Spans a string.
+     *
+     * @param s The string to be spanned
+     * @param start The start index that the span begins
+     * @param spanCondition The span condition
+     * @return the limit (exclusive end) of the span
+     */
+    public int span(CharSequence s, int start, SpanCondition spanCondition) {
+        if (spanCondition == SpanCondition.NOT_CONTAINED) {
+            return spanNot(s, start, null);
+        }
+        int spanLimit = spanSet.span(s, start, SpanCondition.CONTAINED);
+        if (spanLimit == s.length()) {
+            return spanLimit;
+        }
+        return spanWithStrings(s, start, spanLimit, spanCondition);
+    }
+
+    /**
+     * Synchronized method for complicated spans using the offsets.
+     * Avoids synchronization for simple cases.
+     *
+     * @param spanLimit = spanSet.span(s, start, CONTAINED)
+     */
+    private synchronized int spanWithStrings(CharSequence s, int start, int spanLimit,
+            SpanCondition spanCondition) {
+        // Consider strings; they may overlap with the span.
+        int initSize = 0;
+        if (spanCondition == SpanCondition.CONTAINED) {
+            // Use offset list to try all possibilities.
+            initSize = maxLength16;
+        }
+        offsets.setMaxLength(initSize);
+        int length = s.length();
+        int pos = spanLimit, rest = length - spanLimit;
+        int spanLength = spanLimit - start;
+        int i, stringsLength = strings.size();
+        for (;;) {
+            if (spanCondition == SpanCondition.CONTAINED) {
+                for (i = 0; i < stringsLength; ++i) {
+                    int overlap = spanLengths[i];
+                    if (overlap == ALL_CP_CONTAINED) {
+                        continue; // Irrelevant string.
+                    }
+                    String string = strings.get(i);
+
+                    int length16 = string.length();
+
+                    // Try to match this string at pos-overlap..pos.
+                    if (overlap >= LONG_SPAN) {
+                        overlap = length16;
+                        // While contained: No point matching fully inside the code point span.
+                        overlap = string.offsetByCodePoints(overlap, -1); // Length of the string minus the last code
+                                                                          // point.
+                    }
+                    if (overlap > spanLength) {
+                        overlap = spanLength;
+                    }
+                    int inc = length16 - overlap; // Keep overlap+inc==length16.
+                    for (;;) {
+                        if (inc > rest) {
+                            break;
+                        }
+                        // Try to match if the increment is not listed already.
+                        if (!offsets.containsOffset(inc) && matches16CPB(s, pos - overlap, length, string, length16)) {
+                            if (inc == rest) {
+                                return length; // Reached the end of the string.
+                            }
+                            offsets.addOffset(inc);
+                        }
+                        if (overlap == 0) {
+                            break;
+                        }
+                        --overlap;
+                        ++inc;
+                    }
+                }
+            } else /* SIMPLE */{
+                int maxInc = 0, maxOverlap = 0;
+                for (i = 0; i < stringsLength; ++i) {
+                    int overlap = spanLengths[i];
+                    // For longest match, we do need to try to match even an all-contained string
+                    // to find the match from the earliest start.
+
+                    String string = strings.get(i);
+
+                    int length16 = string.length();
+
+                    // Try to match this string at pos-overlap..pos.
+                    if (overlap >= LONG_SPAN) {
+                        overlap = length16;
+                        // Longest match: Need to match fully inside the code point span
+                        // to find the match from the earliest start.
+                    }
+                    if (overlap > spanLength) {
+                        overlap = spanLength;
+                    }
+                    int inc = length16 - overlap; // Keep overlap+inc==length16.
+                    for (;;) {
+                        if (inc > rest || overlap < maxOverlap) {
+                            break;
+                        }
+                        // Try to match if the string is longer or starts earlier.
+                        if ((overlap > maxOverlap || /* redundant overlap==maxOverlap && */inc > maxInc)
+                                && matches16CPB(s, pos - overlap, length, string, length16)) {
+                            maxInc = inc; // Longest match from earliest start.
+                            maxOverlap = overlap;
+                            break;
+                        }
+                        --overlap;
+                        ++inc;
+                    }
+                }
+
+                if (maxInc != 0 || maxOverlap != 0) {
+                    // Longest-match algorithm, and there was a string match.
+                    // Simply continue after it.
+                    pos += maxInc;
+                    rest -= maxInc;
+                    if (rest == 0) {
+                        return length; // Reached the end of the string.
+                    }
+                    spanLength = 0; // Match strings from after a string match.
+                    continue;
+                }
+            }
+            // Finished trying to match all strings at pos.
+
+            if (spanLength != 0 || pos == 0) {
+                // The position is after an unlimited code point span (spanLength!=0),
+                // not after a string match.
+                // The only position where spanLength==0 after a span is pos==0.
+                // Otherwise, an unlimited code point span is only tried again when no
+                // strings match, and if such a non-initial span fails we stop.
+                if (offsets.isEmpty()) {
+                    return pos; // No strings matched after a span.
+                }
+                // Match strings from after the next string match.
+            } else {
+                // The position is after a string match (or a single code point).
+                if (offsets.isEmpty()) {
+                    // No more strings matched after a previous string match.
+                    // Try another code point span from after the last string match.
+                    spanLimit = spanSet.span(s, pos, SpanCondition.CONTAINED);
+                    spanLength = spanLimit - pos;
+                    if (spanLength == rest || // Reached the end of the string, or
+                            spanLength == 0 // neither strings nor span progressed.
+                    ) {
+                        return spanLimit;
+                    }
+                    pos += spanLength;
+                    rest -= spanLength;
+                    continue; // spanLength>0: Match strings from after a span.
+                } else {
+                    // Try to match only one code point from after a string match if some
+                    // string matched beyond it, so that we try all possible positions
+                    // and don't overshoot.
+                    spanLength = spanOne(spanSet, s, pos, rest);
+                    if (spanLength > 0) {
+                        if (spanLength == rest) {
+                            return length; // Reached the end of the string.
+                        }
+                        // Match strings after this code point.
+                        // There cannot be any increments below it because UnicodeSet strings
+                        // contain multiple code points.
+                        pos += spanLength;
+                        rest -= spanLength;
+                        offsets.shift(spanLength);
+                        spanLength = 0;
+                        continue; // Match strings from after a single code point.
+                    }
+                    // Match strings from after the next string match.
+                }
+            }
+            int minOffset = offsets.popMinimum(null);
+            pos += minOffset;
+            rest -= minOffset;
+            spanLength = 0; // Match strings from after a string match.
+        }
+    }
+
+    /**
+     * Spans a string and counts the smallest number of set elements on any path across the span.
+     *
+     * <p>For proper counting, we cannot ignore strings that are fully contained in code point spans.
+     *
+     * <p>If the set does not have any fully-contained strings, then we could optimize this
+     * like span(), but such sets are likely rare, and this is at least still linear.
+     *
+     * @param s The string to be spanned
+     * @param start The start index that the span begins
+     * @param spanCondition The span condition
+     * @param outCount The count
+     * @return the limit (exclusive end) of the span
+     */
+    public int spanAndCount(CharSequence s, int start, SpanCondition spanCondition,
+            OutputInt outCount) {
+        if (spanCondition == SpanCondition.NOT_CONTAINED) {
+            return spanNot(s, start, outCount);
+        }
+        // Consider strings; they may overlap with the span,
+        // and they may result in a smaller count that with just code points.
+        if (spanCondition == SpanCondition.CONTAINED) {
+            return spanContainedAndCount(s, start, outCount);
+        }
+        // SIMPLE (not synchronized, does not use offsets)
+        int stringsLength = strings.size();
+        int length = s.length();
+        int pos = start;
+        int rest = length - start;
+        int count = 0;
+        while (rest != 0) {
+            // Try to match the next code point.
+            int cpLength = spanOne(spanSet, s, pos, rest);
+            int maxInc = (cpLength > 0) ? cpLength : 0;
+            // Try to match all of the strings.
+            for (int i = 0; i < stringsLength; ++i) {
+                String string = strings.get(i);
+                int length16 = string.length();
+                if (maxInc < length16 && length16 <= rest &&
+                        matches16CPB(s, pos, length, string, length16)) {
+                    maxInc = length16;
+                }
+            }
+            // We are done if there is no match beyond pos.
+            if (maxInc == 0) {
+                outCount.value = count;
+                return pos;
+            }
+            // Continue from the longest match.
+            ++count;
+            pos += maxInc;
+            rest -= maxInc;
+        }
+        outCount.value = count;
+        return pos;
+    }
+
+    private synchronized int spanContainedAndCount(CharSequence s, int start, OutputInt outCount) {
+        // Use offset list to try all possibilities.
+        offsets.setMaxLength(maxLength16);
+        int stringsLength = strings.size();
+        int length = s.length();
+        int pos = start;
+        int rest = length - start;
+        int count = 0;
+        while (rest != 0) {
+            // Try to match the next code point.
+            int cpLength = spanOne(spanSet, s, pos, rest);
+            if (cpLength > 0) {
+                offsets.addOffsetAndCount(cpLength, count + 1);
+            }
+            // Try to match all of the strings.
+            for (int i = 0; i < stringsLength; ++i) {
+                String string = strings.get(i);
+                int length16 = string.length();
+                // Note: If the strings were sorted by length, then we could also
+                // avoid trying to match if there is already a match of the same length.
+                if (length16 <= rest && !offsets.hasCountAtOffset(length16, count + 1) &&
+                        matches16CPB(s, pos, length, string, length16)) {
+                    offsets.addOffsetAndCount(length16, count + 1);
+                }
+            }
+            // We are done if there is no match beyond pos.
+            if (offsets.isEmpty()) {
+                outCount.value = count;
+                return pos;
+            }
+            // Continue from the nearest match.
+            int minOffset = offsets.popMinimum(outCount);
+            count = outCount.value;
+            pos += minOffset;
+            rest -= minOffset;
+        }
+        outCount.value = count;
+        return pos;
+    }
+
+    /**
+     * Span a string backwards.
+     *
+     * @param s The string to be spanned
+     * @param spanCondition The span condition
+     * @return The string index which starts the span (i.e. inclusive).
+     */
+    public synchronized int spanBack(CharSequence s, int length, SpanCondition spanCondition) {
+        if (spanCondition == SpanCondition.NOT_CONTAINED) {
+            return spanNotBack(s, length);
+        }
+        int pos = spanSet.spanBack(s, length, SpanCondition.CONTAINED);
+        if (pos == 0) {
+            return 0;
+        }
+        int spanLength = length - pos;
+
+        // Consider strings; they may overlap with the span.
+        int initSize = 0;
+        if (spanCondition == SpanCondition.CONTAINED) {
+            // Use offset list to try all possibilities.
+            initSize = maxLength16;
+        }
+        offsets.setMaxLength(initSize);
+        int i, stringsLength = strings.size();
+        int spanBackLengthsOffset = 0;
+        if (all) {
+            spanBackLengthsOffset = stringsLength;
+        }
+        for (;;) {
+            if (spanCondition == SpanCondition.CONTAINED) {
+                for (i = 0; i < stringsLength; ++i) {
+                    int overlap = spanLengths[spanBackLengthsOffset + i];
+                    if (overlap == ALL_CP_CONTAINED) {
+                        continue; // Irrelevant string.
+                    }
+                    String string = strings.get(i);
+
+                    int length16 = string.length();
+
+                    // Try to match this string at pos-(length16-overlap)..pos-length16.
+                    if (overlap >= LONG_SPAN) {
+                        overlap = length16;
+                        // While contained: No point matching fully inside the code point span.
+                        int len1 = 0;
+                        len1 = string.offsetByCodePoints(0, 1);
+                        overlap -= len1; // Length of the string minus the first code point.
+                    }
+                    if (overlap > spanLength) {
+                        overlap = spanLength;
+                    }
+                    int dec = length16 - overlap; // Keep dec+overlap==length16.
+                    for (;;) {
+                        if (dec > pos) {
+                            break;
+                        }
+                        // Try to match if the decrement is not listed already.
+                        if (!offsets.containsOffset(dec) && matches16CPB(s, pos - dec, length, string, length16)) {
+                            if (dec == pos) {
+                                return 0; // Reached the start of the string.
+                            }
+                            offsets.addOffset(dec);
+                        }
+                        if (overlap == 0) {
+                            break;
+                        }
+                        --overlap;
+                        ++dec;
+                    }
+                }
+            } else /* SIMPLE */{
+                int maxDec = 0, maxOverlap = 0;
+                for (i = 0; i < stringsLength; ++i) {
+                    int overlap = spanLengths[spanBackLengthsOffset + i];
+                    // For longest match, we do need to try to match even an all-contained string
+                    // to find the match from the latest end.
+
+                    String string = strings.get(i);
+
+                    int length16 = string.length();
+
+                    // Try to match this string at pos-(length16-overlap)..pos-length16.
+                    if (overlap >= LONG_SPAN) {
+                        overlap = length16;
+                        // Longest match: Need to match fully inside the code point span
+                        // to find the match from the latest end.
+                    }
+                    if (overlap > spanLength) {
+                      overlap = spanLength;
+                    }
+                    int dec = length16 - overlap; // Keep dec+overlap==length16.
+                    for (;;) {
+                        if (dec > pos || overlap < maxOverlap) {
+                            break;
+                        }
+                        // Try to match if the string is longer or ends later.
+                        if ((overlap > maxOverlap || /* redundant overlap==maxOverlap && */dec > maxDec)
+                                && matches16CPB(s, pos - dec, length, string, length16)) {
+                            maxDec = dec; // Longest match from latest end.
+                            maxOverlap = overlap;
+                            break;
+                        }
+                        --overlap;
+                        ++dec;
+                    }
+                }
+
+                if (maxDec != 0 || maxOverlap != 0) {
+                    // Longest-match algorithm, and there was a string match.
+                    // Simply continue before it.
+                    pos -= maxDec;
+                    if (pos == 0) {
+                        return 0; // Reached the start of the string.
+                    }
+                    spanLength = 0; // Match strings from before a string match.
+                    continue;
+                }
+            }
+            // Finished trying to match all strings at pos.
+
+            if (spanLength != 0 || pos == length) {
+                // The position is before an unlimited code point span (spanLength!=0),
+                // not before a string match.
+                // The only position where spanLength==0 before a span is pos==length.
+                // Otherwise, an unlimited code point span is only tried again when no
+                // strings match, and if such a non-initial span fails we stop.
+                if (offsets.isEmpty()) {
+                    return pos; // No strings matched before a span.
+                }
+                // Match strings from before the next string match.
+            } else {
+                // The position is before a string match (or a single code point).
+                if (offsets.isEmpty()) {
+                    // No more strings matched before a previous string match.
+                    // Try another code point span from before the last string match.
+                    int oldPos = pos;
+                    pos = spanSet.spanBack(s, oldPos, SpanCondition.CONTAINED);
+                    spanLength = oldPos - pos;
+                    if (pos == 0 || // Reached the start of the string, or
+                            spanLength == 0 // neither strings nor span progressed.
+                    ) {
+                        return pos;
+                    }
+                    continue; // spanLength>0: Match strings from before a span.
+                } else {
+                    // Try to match only one code point from before a string match if some
+                    // string matched beyond it, so that we try all possible positions
+                    // and don't overshoot.
+                    spanLength = spanOneBack(spanSet, s, pos);
+                    if (spanLength > 0) {
+                        if (spanLength == pos) {
+                            return 0; // Reached the start of the string.
+                        }
+                        // Match strings before this code point.
+                        // There cannot be any decrements below it because UnicodeSet strings
+                        // contain multiple code points.
+                        pos -= spanLength;
+                        offsets.shift(spanLength);
+                        spanLength = 0;
+                        continue; // Match strings from before a single code point.
+                    }
+                    // Match strings from before the next string match.
+                }
+            }
+            pos -= offsets.popMinimum(null);
+            spanLength = 0; // Match strings from before a string match.
+        }
+    }
+
+    /**
+     * Algorithm for spanNot()==span(SpanCondition.NOT_CONTAINED)
+     *
+     * Theoretical algorithm:
+     * - Iterate through the string, and at each code point boundary:
+     *   + If the code point there is in the set, then return with the current position.
+     *   + If a set string matches at the current position, then return with the current position.
+     *
+     * Optimized implementation:
+     *
+     * (Same assumption as for span() above.)
+     *
+     * Create and cache a spanNotSet which contains
+     * all of the single code points of the original set but none of its strings.
+     * For each set string add its initial code point to the spanNotSet.
+     * (Also add its final code point for spanNotBack().)
+     *
+     * - Loop:
+     *   + Do spanLength=spanNotSet.span(SpanCondition.NOT_CONTAINED).
+     *   + If the current code point is in the original set, then return the current position.
+     *   + If any set string matches at the current position, then return the current position.
+     *   + If there is no match at the current position, neither for the code point
+     *     there nor for any set string, then skip this code point and continue the loop.
+     *     This happens for set-string-initial code points that were added to spanNotSet
+     *     when there is not actually a match for such a set string.
+     *
+     * @param s The string to be spanned
+     * @param start The start index that the span begins
+     * @param outCount If not null: Receives the number of code points across the span.
+     * @return the limit (exclusive end) of the span
+     */
+    private int spanNot(CharSequence s, int start, OutputInt outCount) {
+        int length = s.length();
+        int pos = start, rest = length - start;
+        int stringsLength = strings.size();
+        int count = 0;
+        do {
+            // Span until we find a code point from the set,
+            // or a code point that starts or ends some string.
+            int spanLimit;
+            if (outCount == null) {
+                spanLimit = spanNotSet.span(s, pos, SpanCondition.NOT_CONTAINED);
+            } else {
+                spanLimit = spanNotSet.spanAndCount(s, pos, SpanCondition.NOT_CONTAINED, outCount);
+                outCount.value = count = count + outCount.value;
+            }
+            if (spanLimit == length) {
+                return length; // Reached the end of the string.
+            }
+            pos = spanLimit;
+            rest = length - spanLimit;
+
+            // Check whether the current code point is in the original set,
+            // without the string starts and ends.
+            int cpLength = spanOne(spanSet, s, pos, rest);
+            if (cpLength > 0) {
+                return pos; // There is a set element at pos.
+            }
+
+            // Try to match the strings at pos.
+            for (int i = 0; i < stringsLength; ++i) {
+                if (spanLengths[i] == ALL_CP_CONTAINED) {
+                    continue; // Irrelevant string.
+                }
+                String string = strings.get(i);
+
+                int length16 = string.length();
+                if (length16 <= rest && matches16CPB(s, pos, length, string, length16)) {
+                    return pos; // There is a set element at pos.
+                }
+            }
+
+            // The span(while not contained) ended on a string start/end which is
+            // not in the original set. Skip this code point and continue.
+            // cpLength<0
+            pos -= cpLength;
+            rest += cpLength;
+            ++count;
+        } while (rest != 0);
+        if (outCount != null) {
+            outCount.value = count;
+        }
+        return length; // Reached the end of the string.
+    }
+
+    private int spanNotBack(CharSequence s, int length) {
+        int pos = length;
+        int i, stringsLength = strings.size();
+        do {
+            // Span until we find a code point from the set,
+            // or a code point that starts or ends some string.
+            pos = spanNotSet.spanBack(s, pos, SpanCondition.NOT_CONTAINED);
+            if (pos == 0) {
+                return 0; // Reached the start of the string.
+            }
+
+            // Check whether the current code point is in the original set,
+            // without the string starts and ends.
+            int cpLength = spanOneBack(spanSet, s, pos);
+            if (cpLength > 0) {
+                return pos; // There is a set element at pos.
+            }
+
+            // Try to match the strings at pos.
+            for (i = 0; i < stringsLength; ++i) {
+                // Use spanLengths rather than a spanLengths pointer because
+                // it is easier and we only need to know whether the string is irrelevant
+                // which is the same in either array.
+                if (spanLengths[i] == ALL_CP_CONTAINED) {
+                    continue; // Irrelevant string.
+                }
+                String string = strings.get(i);
+
+                int length16 = string.length();
+                if (length16 <= pos && matches16CPB(s, pos - length16, length, string, length16)) {
+                    return pos; // There is a set element at pos.
+                }
+            }
+
+            // The span(while not contained) ended on a string start/end which is
+            // not in the original set. Skip this code point and continue.
+            // cpLength<0
+            pos += cpLength;
+        } while (pos != 0);
+        return 0; // Reached the start of the string.
+    }
+
+    static short makeSpanLengthByte(int spanLength) {
+        // 0xfe==UnicodeSetStringSpan::LONG_SPAN
+        return spanLength < LONG_SPAN ? (short) spanLength : LONG_SPAN;
+    }
+
+    // Compare strings without any argument checks. Requires length>0.
+    private static boolean matches16(CharSequence s, int start, final String t, int length) {
+        int end = start + length;
+        while (length-- > 0) {
+            if (s.charAt(--end) != t.charAt(length)) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    /**
+     * Compare 16-bit Unicode strings (which may be malformed UTF-16)
+     * at code point boundaries.
+     * That is, each edge of a match must not be in the middle of a surrogate pair.
+     * @param s       The string to match in.
+     * @param start   The start index of s.
+     * @param limit   The limit of the subsequence of s being spanned.
+     * @param t       The substring to be matched in s.
+     * @param tlength The length of t.
+     */
+    static boolean matches16CPB(CharSequence s, int start, int limit, final String t, int tlength) {
+        return matches16(s, start, t, tlength)
+                && !(0 < start && Character.isHighSurrogate(s.charAt(start - 1)) &&
+                        Character.isLowSurrogate(s.charAt(start)))
+                && !((start + tlength) < limit && Character.isHighSurrogate(s.charAt(start + tlength - 1)) &&
+                        Character.isLowSurrogate(s.charAt(start + tlength)));
+    }
+
+    /**
+     * Does the set contain the next code point?
+     * If so, return its length; otherwise return its negative length.
+     */
+    static int spanOne(final UnicodeSet set, CharSequence s, int start, int length) {
+        char c = s.charAt(start);
+        if (c >= 0xd800 && c <= 0xdbff && length >= 2) {
+            char c2 = s.charAt(start + 1);
+            if (UTF16.isTrailSurrogate(c2)) {
+                int supplementary = UCharacterProperty.getRawSupplementary(c, c2);
+                return set.contains(supplementary) ? 2 : -2;
+            }
+        }
+        return set.contains(c) ? 1 : -1;
+    }
+
+    static int spanOneBack(final UnicodeSet set, CharSequence s, int length) {
+        char c = s.charAt(length - 1);
+        if (c >= 0xdc00 && c <= 0xdfff && length >= 2) {
+            char c2 = s.charAt(length - 2);
+            if (UTF16.isLeadSurrogate(c2)) {
+                int supplementary = UCharacterProperty.getRawSupplementary(c2, c);
+                return set.contains(supplementary) ? 2 : -2;
+            }
+        }
+        return set.contains(c) ? 1 : -1;
+    }
+
+    /**
+     * Helper class for UnicodeSetStringSpan.
+     *
+     * <p>List of offsets from the current position from where to try matching
+     * a code point or a string.
+     * Stores offsets rather than indexes to simplify the code and use the same list
+     * for both increments (in span()) and decrements (in spanBack()).
+     *
+     * <p>Assumption: The maximum offset is limited, and the offsets that are stored at any one time
+     * are relatively dense, that is,
+     * there are normally no gaps of hundreds or thousands of offset values.
+     *
+     * <p>This class optionally also tracks the minimum non-negative count for each position,
+     * intended to count the smallest number of elements of any path leading to that position.
+     *
+     * <p>The implementation uses a circular buffer of count integers,
+     * each indicating whether the corresponding offset is in the list,
+     * and its path element count.
+     * This avoids inserting into a sorted list of offsets (or absolute indexes)
+     * and physically moving part of the list.
+     *
+     * <p>Note: In principle, the caller should setMaxLength() to
+     * the maximum of the max string length and U16_LENGTH/U8_LENGTH
+     * to account for "long" single code points.
+     *
+     * <p>Note: An earlier version did not track counts and stored only byte flags.
+     * With boolean flags, if maxLength were guaranteed to be no more than 32 or 64,
+     * the list could be stored as bit flags in a single integer.
+     * Rather than handling a circular buffer with a start list index,
+     * the integer would simply be shifted when lower offsets are removed.
+     * UnicodeSet does not have a limit on the lengths of strings.
+     */
+    private static final class OffsetList {
+        private int[] list;
+        private int length;
+        private int start;
+
+        public OffsetList() {
+            list = new int[16];  // default size
+        }
+
+        public void setMaxLength(int maxLength) {
+            if (maxLength > list.length) {
+                list = new int[maxLength];
+            }
+            clear();
+        }
+
+        public void clear() {
+            for (int i = list.length; i-- > 0;) {
+                list[i] = 0;
+            }
+            start = length = 0;
+        }
+
+        public boolean isEmpty() {
+            return (length == 0);
+        }
+
+        /**
+         * Reduces all stored offsets by delta, used when the current position moves by delta.
+         * There must not be any offsets lower than delta.
+         * If there is an offset equal to delta, it is removed.
+         *
+         * @param delta [1..maxLength]
+         */
+        public void shift(int delta) {
+            int i = start + delta;
+            if (i >= list.length) {
+                i -= list.length;
+            }
+            if (list[i] != 0) {
+                list[i] = 0;
+                --length;
+            }
+            start = i;
+        }
+
+        /**
+         * Adds an offset. The list must not contain it yet.
+         * @param offset [1..maxLength]
+         */
+        public void addOffset(int offset) {
+            int i = start + offset;
+            if (i >= list.length) {
+                i -= list.length;
+            }
+            assert list[i] == 0;
+            list[i] = 1;
+            ++length;
+        }
+
+        /**
+         * Adds an offset and updates its count.
+         * The list may already contain the offset.
+         * @param offset [1..maxLength]
+         */
+        public void addOffsetAndCount(int offset, int count) {
+            assert count > 0;
+            int i = start + offset;
+            if (i >= list.length) {
+                i -= list.length;
+            }
+            if (list[i] == 0) {
+                list[i] = count;
+                ++length;
+            } else if (count < list[i]) {
+                list[i] = count;
+            }
+        }
+
+        /**
+         * @param offset [1..maxLength]
+         */
+        public boolean containsOffset(int offset) {
+            int i = start + offset;
+            if (i >= list.length) {
+                i -= list.length;
+            }
+            return list[i] != 0;
+        }
+
+        /**
+         * @param offset [1..maxLength]
+         */
+        public boolean hasCountAtOffset(int offset, int count) {
+            int i = start + offset;
+            if (i >= list.length) {
+                i -= list.length;
+            }
+            int oldCount = list[i];
+            return oldCount != 0 && oldCount <= count;
+        }
+
+        /**
+         * Finds the lowest stored offset from a non-empty list, removes it,
+         * and reduces all other offsets by this minimum.
+         * @return min=[1..maxLength]
+         */
+        public int popMinimum(OutputInt outCount) {
+            // Look for the next offset in list[start+1..list.length-1].
+            int i = start, result;
+            while (++i < list.length) {
+                int count = list[i];
+                if (count != 0) {
+                    list[i] = 0;
+                    --length;
+                    result = i - start;
+                    start = i;
+                    if (outCount != null) { outCount.value = count; }
+                    return result;
+                }
+            }
+            // i==list.length
+
+            // Wrap around and look for the next offset in list[0..start].
+            // Since the list is not empty, there will be one.
+            result = list.length - start;
+            i = 0;
+            int count;
+            while ((count = list[i]) == 0) {
+                ++i;
+            }
+            list[i] = 0;
+            --length;
+            start = i;
+            if (outCount != null) { outCount.value = count; }
+            return result + i;
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/java.base/share/classes/jdk/internal/icu/impl/Utility.java	Mon Jan 13 08:05:59 2020 -0800
@@ -0,0 +1,276 @@
+/*
+ * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+/*
+ *******************************************************************************
+ * Copyright (C) 1996-2011, International Business Machines Corporation and    *
+ * others. All Rights Reserved.                                                *
+ *******************************************************************************
+ */
+
+package jdk.internal.icu.impl;
+
+import jdk.internal.icu.lang.UCharacter;
+import jdk.internal.icu.text.UTF16;
+
+import java.io.IOException;
+import java.util.Locale;
+
+public final class Utility {
+
+    /**
+     * Convert characters outside the range U+0020 to U+007F to
+     * Unicode escapes, and convert backslash to a double backslash.
+     */
+    public static final String escape(String s) {
+        StringBuilder buf = new StringBuilder();
+        for (int i=0; i<s.length(); ) {
+            int c = Character.codePointAt(s, i);
+            i += UTF16.getCharCount(c);
+            if (c >= ' ' && c <= 0x007F) {
+                if (c == '\\') {
+                    buf.append("\\\\"); // That is, "\\"
+                } else {
+                    buf.append((char)c);
+                }
+            } else {
+                boolean four = c <= 0xFFFF;
+                buf.append(four ? "\\u" : "\\U");
+                buf.append(hex(c, four ? 4 : 8));
+            }
+        }
+        return buf.toString();
+    }
+
+    /* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */
+    private static final char[] UNESCAPE_MAP = {
+        /*"   0x22, 0x22 */
+        /*'   0x27, 0x27 */
+        /*?   0x3F, 0x3F */
+        /*\   0x5C, 0x5C */
+        /*a*/ 0x61, 0x07,
+        /*b*/ 0x62, 0x08,
+        /*e*/ 0x65, 0x1b,
+        /*f*/ 0x66, 0x0c,
+        /*n*/ 0x6E, 0x0a,
+        /*r*/ 0x72, 0x0d,
+        /*t*/ 0x74, 0x09,
+        /*v*/ 0x76, 0x0b
+    };
+
+    /**
+     * Convert an escape to a 32-bit code point value.  We attempt
+     * to parallel the icu4c unescapeAt() function.
+     * @param offset16 an array containing offset to the character
+     * <em>after</em> the backslash.  Upon return offset16[0] will
+     * be updated to point after the escape sequence.
+     * @return character value from 0 to 10FFFF, or -1 on error.
+     */
+    public static int unescapeAt(String s, int[] offset16) {
+        int c;
+        int result = 0;
+        int n = 0;
+        int minDig = 0;
+        int maxDig = 0;
+        int bitsPerDigit = 4;
+        int dig;
+        int i;
+        boolean braces = false;
+
+        /* Check that offset is in range */
+        int offset = offset16[0];
+        int length = s.length();
+        if (offset < 0 || offset >= length) {
+            return -1;
+        }
+
+        /* Fetch first UChar after '\\' */
+        c = Character.codePointAt(s, offset);
+        offset += UTF16.getCharCount(c);
+
+        /* Convert hexadecimal and octal escapes */
+        switch (c) {
+        case 'u':
+            minDig = maxDig = 4;
+            break;
+        case 'U':
+            minDig = maxDig = 8;
+            break;
+        case 'x':
+            minDig = 1;
+            if (offset < length && UTF16.charAt(s, offset) == 0x7B /*{*/) {
+                ++offset;
+                braces = true;
+                maxDig = 8;
+            } else {
+                maxDig = 2;
+          }
+            break;
+        default:
+            dig = UCharacter.digit(c, 8);
+            if (dig >= 0) {
+                minDig = 1;
+                maxDig = 3;
+                n = 1; /* Already have first octal digit */
+                bitsPerDigit = 3;
+                result = dig;
+            }
+            break;
+        }
+        if (minDig != 0) {
+            while (offset < length && n < maxDig) {
+                c = UTF16.charAt(s, offset);
+                dig = UCharacter.digit(c, (bitsPerDigit == 3) ? 8 : 16);
+                if (dig < 0) {
+                    break;
+                }
+                result = (result << bitsPerDigit) | dig;
+                offset += UTF16.getCharCount(c);
+                ++n;
+            }
+            if (n < minDig) {
+                return -1;
+            }
+            if (braces) {
+                if (c != 0x7D /*}*/) {
+                    return -1;
+                }
+                ++offset;
+          }
+            if (result < 0 || result >= 0x110000) {
+                return -1;
+            }
+            // If an escape sequence specifies a lead surrogate, see
+            // if there is a trail surrogate after it, either as an
+            // escape or as a literal.  If so, join them up into a
+            // supplementary.
+            if (offset < length &&
+                    UTF16.isLeadSurrogate((char) result)) {
+                int ahead = offset+1;
+                c = s.charAt(offset); // [sic] get 16-bit code unit
+                if (c == '\\' && ahead < length) {
+                    int o[] = new int[] { ahead };
+                    c = unescapeAt(s, o);
+                    ahead = o[0];
+                }
+                if (UTF16.isTrailSurrogate((char) c)) {
+                    offset = ahead;
+                    result = UCharacterProperty.getRawSupplementary(
+                            (char) result, (char) c);
+                }
+            }
+            offset16[0] = offset;
+            return result;
+        }
+
+        /* Convert C-style escapes in table */
+        for (i=0; i<UNESCAPE_MAP.length; i+=2) {
+            if (c == UNESCAPE_MAP[i]) {
+                offset16[0] = offset;
+                return UNESCAPE_MAP[i+1];
+            } else if (c < UNESCAPE_MAP[i]) {
+                break;
+            }
+        }
+
+        /* Map \cX to control-X: X & 0x1F */
+        if (c == 'c' && offset < length) {
+            c = UTF16.charAt(s, offset);
+            offset16[0] = offset + UTF16.getCharCount(c);
+            return 0x1F & c;
+        }
+
+        /* If no special forms are recognized, then consider
+         * the backslash to generically escape the next character. */
+        offset16[0] = offset;
+        return c;
+    }
+
+    /**
+     * Supplies a zero-padded hex representation of an integer (without 0x)
+     */
+    public static String hex(long i, int places) {
+        if (i == Long.MIN_VALUE) return "-8000000000000000";
+        boolean negative = i < 0;
+        if (negative) {
+            i = -i;
+        }
+        String result = Long.toString(i, 16).toUpperCase(Locale.ENGLISH);
+        if (result.length() < places) {
+            result = "0000000000000000".substring(result.length(),places) + result;
+        }
+        if (negative) {
+            return '-' + result;
+        }
+        return result;
+    }
+
+    static final char DIGITS[] = {
+        '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
+        'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
+        'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
+        'U', 'V', 'W', 'X', 'Y', 'Z'
+    };
+
+    /**
+     * Return true if the character is NOT printable ASCII.  The tab,
+     * newline and linefeed characters are considered unprintable.
+     */
+    public static boolean isUnprintable(int c) {
+        //0x20 = 32 and 0x7E = 126
+        return !(c >= 0x20 && c <= 0x7E);
+    }
+
+    /**
+     * Escape unprintable characters using <backslash>uxxxx notation
+     * for U+0000 to U+FFFF and <backslash>Uxxxxxxxx for U+10000 and
+     * above.  If the character is printable ASCII, then do nothing
+     * and return FALSE.  Otherwise, append the escaped notation and
+     * return TRUE.
+     */
+    public static <T extends Appendable> boolean escapeUnprintable(T result, int c) {
+        try {
+            if (isUnprintable(c)) {
+                result.append('\\');
+                if ((c & ~0xFFFF) != 0) {
+                    result.append('U');
+                    result.append(DIGITS[0xF&(c>>28)]);
+                    result.append(DIGITS[0xF&(c>>24)]);
+                    result.append(DIGITS[0xF&(c>>20)]);
+                    result.append(DIGITS[0xF&(c>>16)]);
+                } else {
+                    result.append('u');
+                }
+                result.append(DIGITS[0xF&(c>>12)]);
+                result.append(DIGITS[0xF&(c>>8)]);
+                result.append(DIGITS[0xF&(c>>4)]);
+                result.append(DIGITS[0xF&c]);
+                return true;
+            }
+            return false;
+        } catch (IOException e) {
+            throw new IllegalArgumentException(e);
+        }
+    }
+}
Binary file src/java.base/share/classes/jdk/internal/icu/impl/data/icudt64b/nfc.nrm has changed
Binary file src/java.base/share/classes/jdk/internal/icu/impl/data/icudt64b/nfkc.nrm has changed
Binary file src/java.base/share/classes/jdk/internal/icu/impl/data/icudt64b/ubidi.icu has changed
Binary file src/java.base/share/classes/jdk/internal/icu/impl/data/icudt64b/uprops.icu has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/java.base/share/classes/jdk/internal/icu/lang/UCharacter.java	Mon Jan 13 08:05:59 2020 -0800
@@ -0,0 +1,545 @@
+/*
+ * Copyright (c) 2009, 2020, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+*******************************************************************************
+* Copyright (C) 1996-2014, International Business Machines Corporation and
+* others. All Rights Reserved.
+*******************************************************************************
+*/
+
+package jdk.internal.icu.lang;
+
+import jdk.internal.icu.impl.UBiDiProps;
+import jdk.internal.icu.impl.UCharacterProperty;
+import jdk.internal.icu.text.Normalizer2;
+import jdk.internal.icu.text.UTF16;
+import jdk.internal.icu.util.VersionInfo;
+
+/**
+ * <p>The UCharacter class provides extensions to the
+ * <a href="http://java.sun.com/j2se/1.5/docs/api/java/lang/Character.html">
+ * java.lang.Character</a> class. These extensions provide support for
+ * more Unicode properties and together with the <a href=../text/UTF16.html>UTF16</a>
+ * class, provide support for supplementary characters (those with code
+ * points above U+FFFF).
+ * Each ICU release supports the latest version of Unicode available at that time.
+ *
+ * <p>Code points are represented in these API using ints. While it would be
+ * more convenient in Java to have a separate primitive datatype for them,
+ * ints suffice in the meantime.
+ *
+ * <p>To use this class please add the jar file name icu4j.jar to the
+ * class path, since it contains data files which supply the information used
+ * by this file.<br>
+ * E.g. In Windows <br>
+ * <code>set CLASSPATH=%CLASSPATH%;$JAR_FILE_PATH/ucharacter.jar</code>.<br>
+ * Otherwise, another method would be to copy the files uprops.dat and
+ * unames.icu from the icu4j source subdirectory
+ * <i>$ICU4J_SRC/src/com.ibm.icu.impl.data</i> to your class directory
+ * <i>$ICU4J_CLASS/com.ibm.icu.impl.data</i>.
+ *
+ * <p>Aside from the additions for UTF-16 support, and the updated Unicode
+ * properties, the main differences between UCharacter and Character are:
+ * <ul>
+ * <li> UCharacter is not designed to be a char wrapper and does not have
+ *      APIs to which involves management of that single char.<br>
+ *      These include:
+ *      <ul>
+ *        <li> char charValue(),
+ *        <li> int compareTo(java.lang.Character, java.lang.Character), etc.
+ *      </ul>
+ * <li> UCharacter does not include Character APIs that are deprecated, nor
+ *      does it include the Java-specific character information, such as
+ *      boolean isJavaIdentifierPart(char ch).
+ * <li> Character maps characters 'A' - 'Z' and 'a' - 'z' to the numeric
+ *      values '10' - '35'. UCharacter also does this in digit and
+ *      getNumericValue, to adhere to the java semantics of these
+ *      methods.  New methods unicodeDigit, and
+ *      getUnicodeNumericValue do not treat the above code points
+ *      as having numeric values.  This is a semantic change from ICU4J 1.3.1.
+ * </ul>
+ * <p>
+ * Further detail on differences can be determined using the program
+ *        <a href=
+ * "http://source.icu-project.org/repos/icu/icu4j/trunk/src/com/ibm/icu/dev/test/lang/UCharacterCompare.java">
+ *        com.ibm.icu.dev.test.lang.UCharacterCompare</a>
+ * </p>
+ * <p>
+ * In addition to Java compatibility functions, which calculate derived properties,
+ * this API provides low-level access to the Unicode Character Database.
+ * </p>
+ * <p>
+ * Unicode assigns each code point (not just assigned character) values for
+ * many properties.
+ * Most of them are simple boolean flags, or constants from a small enumerated list.
+ * For some properties, values are strings or other relatively more complex types.
+ * </p>
+ * <p>
+ * For more information see
+ * <a href="http://www.unicode/org/ucd/">"About the Unicode Character Database"</a>
+ * (http://www.unicode.org/ucd/)
+ * and the <a href="http://www.icu-project.org/userguide/properties.html">ICU
+ * User Guide chapter on Properties</a>
+ * (http://www.icu-project.org/userguide/properties.html).
+ * </p>
+ * <p>
+ * There are also functions that provide easy migration from C/POSIX functions
+ * like isblank(). Their use is generally discouraged because the C/POSIX
+ * standards do not define their semantics beyond the ASCII range, which means
+ * that different implementations exhibit very different behavior.
+ * Instead, Unicode properties should be used directly.
+ * </p>
+ * <p>
+ * There are also only a few, broad C/POSIX character classes, and they tend
+ * to be used for conflicting purposes. For example, the "isalpha()" class
+ * is sometimes used to determine word boundaries, while a more sophisticated
+ * approach would at least distinguish initial letters from continuation
+ * characters (the latter including combining marks).
+ * (In ICU, BreakIterator is the most sophisticated API for word boundaries.)
+ * Another example: There is no "istitle()" class for titlecase characters.
+ * </p>
+ * <p>
+ * ICU 3.4 and later provides API access for all twelve C/POSIX character classes.
+ * ICU implements them according to the Standard Recommendations in
+ * Annex C: Compatibility Properties of UTS #18 Unicode Regular Expressions
+ * (http://www.unicode.org/reports/tr18/#Compatibility_Properties).
+ * </p>
+ * <p>
+ * API access for C/POSIX character classes is as follows:
+ * <pre>{@code
+ * - alpha:     isUAlphabetic(c) or hasBinaryProperty(c, UProperty.ALPHABETIC)
+ * - lower:     isULowercase(c) or hasBinaryProperty(c, UProperty.LOWERCASE)
+ * - upper:     isUUppercase(c) or hasBinaryProperty(c, UProperty.UPPERCASE)
+ * - punct:     ((1<<getType(c)) & ((1<<DASH_PUNCTUATION)|(1<<START_PUNCTUATION)|
+ *               (1<<END_PUNCTUATION)|(1<<CONNECTOR_PUNCTUATION)|(1<<OTHER_PUNCTUATION)|
+ *               (1<<INITIAL_PUNCTUATION)|(1<<FINAL_PUNCTUATION)))!=0
+ * - digit:     isDigit(c) or getType(c)==DECIMAL_DIGIT_NUMBER
+ * - xdigit:    hasBinaryProperty(c, UProperty.POSIX_XDIGIT)
+ * - alnum:     hasBinaryProperty(c, UProperty.POSIX_ALNUM)
+ * - space:     isUWhiteSpace(c) or hasBinaryProperty(c, UProperty.WHITE_SPACE)
+ * - blank:     hasBinaryProperty(c, UProperty.POSIX_BLANK)
+ * - cntrl:     getType(c)==CONTROL
+ * - graph:     hasBinaryProperty(c, UProperty.POSIX_GRAPH)
+ * - print:     hasBinaryProperty(c, UProperty.POSIX_PRINT)
+ * }</pre>
+ * </p>
+ * <p>
+ * The C/POSIX character classes are also available in UnicodeSet patterns,
+ * using patterns like [:graph:] or \p{graph}.
+ * </p>
+ *
+ * There are several ICU (and Java) whitespace functions.
+ * Comparison:<ul>
+ * <li> isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property;
+ *       most of general categories "Z" (separators) + most whitespace ISO controls
+ *       (including no-break spaces, but excluding IS1..IS4 and ZWSP)
+ * <li> isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces
+ * <li> isSpaceChar: just Z (including no-break spaces)</ul>
+ * </p>
+ * <p>
+ * This class is not subclassable.
+ * </p>
+ * @author Syn Wee Quek
+ * @stable ICU 2.1
+ * @see com.ibm.icu.lang.UCharacterEnums
+ */
+
+public final class UCharacter
+{
+
+    /**
+     * Joining Group constants.
+     * @see UProperty#JOINING_GROUP
+     * @stable ICU 2.4
+     */
+    public static interface JoiningGroup
+    {
+        /**
+         * @stable ICU 2.4
+         */
+        public static final int NO_JOINING_GROUP = 0;
+    }
+
+    /**
+     * Numeric Type constants.
+     * @see UProperty#NUMERIC_TYPE
+     * @stable ICU 2.4
+     */
+    public static interface NumericType
+    {
+        /**
+         * @stable ICU 2.4
+         */
+        public static final int NONE = 0;
+        /**
+         * @stable ICU 2.4
+         */
+        public static final int DECIMAL = 1;
+        /**
+         * @stable ICU 2.4
+         */
+        public static final int DIGIT = 2;
+        /**
+         * @stable ICU 2.4
+         */
+        public static final int NUMERIC = 3;
+        /**
+         * @stable ICU 2.4
+         */
+        public static final int COUNT = 4;
+    }
+
+    /**
+     * Hangul Syllable Type constants.
+     *
+     * @see UProperty#HANGUL_SYLLABLE_TYPE
+     * @stable ICU 2.6
+     */
+    public static interface HangulSyllableType
+    {
+        /**
+         * @stable ICU 2.6
+         */
+        public static final int NOT_APPLICABLE      = 0;   /*[NA]*/ /*See note !!*/
+        /**
+         * @stable ICU 2.6
+         */
+        public static final int LEADING_JAMO        = 1;   /*[L]*/
+        /**
+         * @stable ICU 2.6
+         */
+        public static final int VOWEL_JAMO          = 2;   /*[V]*/
+        /**
+         * @stable ICU 2.6
+         */
+        public static final int TRAILING_JAMO       = 3;   /*[T]*/
+        /**
+         * @stable ICU 2.6
+         */
+        public static final int LV_SYLLABLE         = 4;   /*[LV]*/
+        /**
+         * @stable ICU 2.6
+         */
+        public static final int LVT_SYLLABLE        = 5;   /*[LVT]*/
+        /**
+         * @stable ICU 2.6
+         */
+        public static final int COUNT               = 6;
+    }
+
+    // public data members -----------------------------------------------
+
+    /**
+     * The lowest Unicode code point value.
+     * @stable ICU 2.1
+     */
+    public static final int MIN_VALUE = UTF16.CODEPOINT_MIN_VALUE;
+
+    /**
+     * The highest Unicode code point value (scalar value) according to the
+     * Unicode Standard.
+     * This is a 21-bit value (21 bits, rounded up).<br>
+     * Up-to-date Unicode implementation of java.lang.Character.MAX_VALUE
+     * @stable ICU 2.1
+     */
+    public static final int MAX_VALUE = UTF16.CODEPOINT_MAX_VALUE;
+
+    // public methods ----------------------------------------------------
+
+    /**
+     * Returns the numeric value of a decimal digit code point.
+     * <br>This method observes the semantics of
+     * <code>java.lang.Character.digit()</code>.  Note that this
+     * will return positive values for code points for which isDigit
+     * returns false, just like java.lang.Character.
+     * <br><em>Semantic Change:</em> In release 1.3.1 and
+     * prior, this did not treat the European letters as having a
+     * digit value, and also treated numeric letters and other numbers as
+     * digits.
+     * This has been changed to conform to the java semantics.
+     * <br>A code point is a valid digit if and only if:
+     * <ul>
+     *   <li>ch is a decimal digit or one of the european letters, and
+     *   <li>the value of ch is less than the specified radix.
+     * </ul>
+     * @param ch the code point to query
+     * @param radix the radix
+     * @return the numeric value represented by the code point in the
+     * specified radix, or -1 if the code point is not a decimal digit
+     * or if its value is too large for the radix
+     * @stable ICU 2.1
+     */
+    public static int digit(int ch, int radix)
+    {
+        if (2 <= radix && radix <= 36) {
+            int value = digit(ch);
+            if (value < 0) {
+                // ch is not a decimal digit, try latin letters
+                value = UCharacterProperty.getEuropeanDigit(ch);
+            }
+            return (value < radix) ? value : -1;
+        } else {
+            return -1;  // invalid radix
+        }
+    }
+
+    /**
+     * Returns the numeric value of a decimal digit code point.
+     * <br>This is a convenience overload of <code>digit(int, int)</code>
+     * that provides a decimal radix.
+     * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this
+     * treated numeric letters and other numbers as digits.  This has
+     * been changed to conform to the java semantics.
+     * @param ch the code point to query
+     * @return the numeric value represented by the code point,
+     * or -1 if the code point is not a decimal digit or if its
+     * value is too large for a decimal radix
+     * @stable ICU 2.1
+     */
+    public static int digit(int ch)
+    {
+        return UCharacterProperty.INSTANCE.digit(ch);
+    }
+
+    /**
+     * Returns a value indicating a code point's Unicode category.
+     * Up-to-date Unicode implementation of java.lang.Character.getType()
+     * except for the above mentioned code points that had their category
+     * changed.<br>
+     * Return results are constants from the interface
+     * <a href=UCharacterCategory.html>UCharacterCategory</a><br>
+     * <em>NOTE:</em> the UCharacterCategory values are <em>not</em> compatible with
+     * those returned by java.lang.Character.getType.  UCharacterCategory values
+     * match the ones used in ICU4C, while java.lang.Character type
+     * values, though similar, skip the value 17.</p>
+     * @param ch code point whose type is to be determined
+     * @return category which is a value of UCharacterCategory
+     * @stable ICU 2.1
+     */
+    public static int getType(int ch)
+    {
+        return UCharacterProperty.INSTANCE.getType(ch);
+    }
+
+    /**
+     * Returns the Bidirection property of a code point.
+     * For example, 0x0041 (letter A) has the LEFT_TO_RIGHT directional
+     * property.<br>
+     * Result returned belongs to the interface
+     * <a href=UCharacterDirection.html>UCharacterDirection</a>
+     * @param ch the code point to be determined its direction
+     * @return direction constant from UCharacterDirection.
+     * @stable ICU 2.1
+     */
+    public static int getDirection(int ch)
+    {
+        return UBiDiProps.INSTANCE.getClass(ch);
+    }
+
+    /**
+     * Maps the specified code point to a "mirror-image" code point.
+     * For code points with the "mirrored" property, implementations sometimes
+     * need a "poor man's" mapping to another code point such that the default
+     * glyph may serve as the mirror-image of the default glyph of the
+     * specified code point.<br>
+     * This is useful for text conversion to and from codepages with visual
+     * order, and for displays without glyph selection capabilities.
+     * @param ch code point whose mirror is to be retrieved
+     * @return another code point that may serve as a mirror-image substitute,
+     *         or ch itself if there is no such mapping or ch does not have the
+     *         "mirrored" property
+     * @stable ICU 2.1
+     */
+    public static int getMirror(int ch)
+    {
+        return UBiDiProps.INSTANCE.getMirror(ch);
+    }
+
+    /**
+     * Maps the specified character to its paired bracket character.
+     * For Bidi_Paired_Bracket_Type!=None, this is the same as getMirror(int).
+     * Otherwise c itself is returned.
+     * See http://www.unicode.org/reports/tr9/
+     *
+     * @param c the code point to be mapped
+     * @return the paired bracket code point,
+     *         or c itself if there is no such mapping
+     *         (Bidi_Paired_Bracket_Type=None)
+     *
+     * @see UProperty#BIDI_PAIRED_BRACKET
+     * @see UProperty#BIDI_PAIRED_BRACKET_TYPE
+     * @see #getMirror(int)
+     * @stable ICU 52
+     */
+    public static int getBidiPairedBracket(int c) {
+        return UBiDiProps.INSTANCE.getPairedBracket(c);
+    }
+
+    /**
+     * Returns the combining class of the argument codepoint
+     * @param ch code point whose combining is to be retrieved
+     * @return the combining class of the codepoint
+     * @stable ICU 2.1
+     */
+    public static int getCombiningClass(int ch)
+    {
+        return Normalizer2.getNFDInstance().getCombiningClass(ch);
+    }
+
+    /**
+     * Returns the version of Unicode data used.
+     * @return the unicode version number used
+     * @stable ICU 2.1
+     */
+    public static VersionInfo getUnicodeVersion()
+    {
+        return UCharacterProperty.INSTANCE.m_unicodeVersion_;
+    }
+
+    /**
+     * Returns a code point corresponding to the two UTF16 characters.
+     * @param lead the lead char
+     * @param trail the trail char
+     * @return code point if surrogate characters are valid.
+     * @exception IllegalArgumentException thrown when argument characters do
+     *            not form a valid codepoint
+     * @stable ICU 2.1
+     */
+    public static int getCodePoint(char lead, char trail)
+    {
+        if (UTF16.isLeadSurrogate(lead) && UTF16.isTrailSurrogate(trail)) {
+            return UCharacterProperty.getRawSupplementary(lead, trail);
+        }
+        throw new IllegalArgumentException("Illegal surrogate characters");
+    }
+
+    /**
+     * Returns the "age" of the code point.</p>
+     * <p>The "age" is the Unicode version when the code point was first
+     * designated (as a non-character or for Private Use) or assigned a
+     * character.
+     * <p>This can be useful to avoid emitting code points to receiving
+     * processes that do not accept newer characters.</p>
+     * <p>The data is from the UCD file DerivedAge.txt.</p>
+     * @param ch The code point.
+     * @return the Unicode version number
+     * @stable ICU 2.6
+     */
+    public static VersionInfo getAge(int ch)
+    {
+        if (ch < MIN_VALUE || ch > MAX_VALUE) {
+            throw new IllegalArgumentException("Codepoint out of bounds");
+        }
+        return UCharacterProperty.INSTANCE.getAge(ch);
+    }
+
+    /**
+     * Returns the property value for an Unicode property type of a code point.
+     * Also returns binary and mask property values.</p>
+     * <p>Unicode, especially in version 3.2, defines many more properties than
+     * the original set in UnicodeData.txt.</p>
+     * <p>The properties APIs are intended to reflect Unicode properties as
+     * defined in the Unicode Character Database (UCD) and Unicode Technical
+     * Reports (UTR). For details about the properties see
+     * http://www.unicode.org/.</p>
+     * <p>For names of Unicode properties see the UCD file PropertyAliases.txt.
+     * </p>
+     * <pre>
+     * Sample usage:
+     * int ea = UCharacter.getIntPropertyValue(c, UProperty.EAST_ASIAN_WIDTH);
+     * int ideo = UCharacter.getIntPropertyValue(c, UProperty.IDEOGRAPHIC);
+     * boolean b = (ideo == 1) ? true : false;
+     * </pre>
+     * @param ch code point to test.
+     * @param type UProperty selector constant, identifies which binary
+     *        property to check. Must be
+     *        UProperty.BINARY_START &lt;= type &lt; UProperty.BINARY_LIMIT or
+     *        UProperty.INT_START &lt;= type &lt; UProperty.INT_LIMIT or
+     *        UProperty.MASK_START &lt;= type &lt; UProperty.MASK_LIMIT.
+     * @return numeric value that is directly the property value or,
+     *         for enumerated properties, corresponds to the numeric value of
+     *         the enumerated constant of the respective property value
+     *         enumeration type (cast to enum type if necessary).
+     *         Returns 0 or 1 (for false / true) for binary Unicode properties.
+     *         Returns a bit-mask for mask properties.
+     *         Returns 0 if 'type' is out of bounds or if the Unicode version
+     *         does not have data for the property at all, or not for this code
+     *         point.
+     * @see UProperty
+     * @see #hasBinaryProperty
+     * @see #getIntPropertyMinValue
+     * @see #getIntPropertyMaxValue
+     * @see #getUnicodeVersion
+     * @stable ICU 2.4
+     */
+     // for BiDiBase.java
+    public static int getIntPropertyValue(int ch, int type) {
+        return UCharacterProperty.INSTANCE.getIntPropertyValue(ch, type);
+    }
+
+    // private constructor -----------------------------------------------
+
+    /**
+     * Private constructor to prevent instantiation
+     */
+    private UCharacter() { }
+
+      /*
+       * Copied from UCharacterEnums.java
+       */
+
+        /**
+         * Character type Mn
+         * @stable ICU 2.1
+         */
+        public static final byte NON_SPACING_MARK        = 6;
+        /**
+         * Character type Me
+         * @stable ICU 2.1
+         */
+        public static final byte ENCLOSING_MARK          = 7;
+        /**
+         * Character type Mc
+         * @stable ICU 2.1
+         */
+        public static final byte COMBINING_SPACING_MARK  = 8;
+        /**
+         * Character type count
+         * @stable ICU 2.1
+         */
+        public static final byte CHAR_CATEGORY_COUNT     = 30;
+
+        /**
+         * Directional type R
+         * @stable ICU 2.1
+         */
+        public static final int RIGHT_TO_LEFT              = 1;
+        /**
+         * Directional type AL
+         * @stable ICU 2.1
+         */
+        public static final int RIGHT_TO_LEFT_ARABIC       = 13;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/java.base/share/classes/jdk/internal/icu/lang/UCharacterDirection.java	Mon Jan 13 08:05:59 2020 -0800
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+/*
+/**
+*******************************************************************************
+* Copyright (C) 1996-2004, International Business Machines Corporation and    *
+* others. All Rights Reserved.                                                *
+*******************************************************************************
+*/
+// CHANGELOG
+//      2005-05-19 Edward Wang
+//          - copy this file from icu4jsrc_3_2/src/com/ibm/icu/lang/UCharacterDirection.java
+//          - move from package com.ibm.icu.lang to package sun.net.idn
+//
+
+package jdk.internal.icu.lang;
+
+/**
+ * Enumerated Unicode character linguistic direction constants.
+ * Used as return results from <a href=UCharacter.html>UCharacter</a>
+ * <p>
+ * This class is not subclassable
+ * </p>
+ * @author Syn Wee Quek
+ * @stable ICU 2.1
+ */
+
+@SuppressWarnings("deprecation")
+public final class UCharacterDirection implements UCharacterEnums.ECharacterDirection {
+
+    // private constructor =========================================
+    ///CLOVER:OFF
+    /**
+     * Private constructor to prevent initialisation
+     */
+    private UCharacterDirection()
+    {
+    }
+    ///CLOVER:ON
+
+    /**
+     * Gets the name of the argument direction
+     * @param dir direction type to retrieve name
+     * @return directional name
+     * @stable ICU 2.1
+     */
+    public static String toString(int dir) {
+        switch(dir)
+            {
+            case LEFT_TO_RIGHT :
+                return "Left-to-Right";
+            case RIGHT_TO_LEFT :
+                return "Right-to-Left";
+            case EUROPEAN_NUMBER :
+                return "European Number";
+            case EUROPEAN_NUMBER_SEPARATOR :
+                return "European Number Separator";
+            case EUROPEAN_NUMBER_TERMINATOR :
+                return "European Number Terminator";
+            case ARABIC_NUMBER :
+                return "Arabic Number";
+            case COMMON_NUMBER_SEPARATOR :
+                return "Common Number Separator";
+            case BLOCK_SEPARATOR :
+                return "Paragraph Separator";
+            case SEGMENT_SEPARATOR :
+                return "Segment Separator";
+            case WHITE_SPACE_NEUTRAL :
+                return "Whitespace";
+            case OTHER_NEUTRAL :
+                return "Other Neutrals";
+            case LEFT_TO_RIGHT_EMBEDDING :
+                return "Left-to-Right Embedding";
+            case LEFT_TO_RIGHT_OVERRIDE :
+                return "Left-to-Right Override";
+            case RIGHT_TO_LEFT_ARABIC :
+                return "Right-to-Left Arabic";
+            case RIGHT_TO_LEFT_EMBEDDING :
+                return "Right-to-Left Embedding";
+            case RIGHT_TO_LEFT_OVERRIDE :
+                return "Right-to-Left Override";
+            case POP_DIRECTIONAL_FORMAT :
+                return "Pop Directional Format";
+            case DIR_NON_SPACING_MARK :
+                return "Non-Spacing Mark";
+            case BOUNDARY_NEUTRAL :
+                return "Boundary Neutral";
+            }
+        return "Unassigned";
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/java.base/share/classes/jdk/internal/icu/lang/UCharacterEnums.java	Mon Jan 13 08:05:59 2020 -0800
@@ -0,0 +1,588 @@
+/*
+ * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+/*
+/**
+ *******************************************************************************
+ * Copyright (C) 2004, International Business Machines Corporation and         *
+ * others. All Rights Reserved.                                                *
+ *******************************************************************************
+ */
+// CHANGELOG
+//      2005-05-19 Edward Wang
+//          - copy this file from icu4jsrc_3_2/src/com/ibm/icu/lang/UCharacterEnums.java
+//          - move from package com.ibm.icu.lang to package sun.net.idn
+//
+//      2011-09-06 Kurchi Subhra Hazra
+//          - Added @Deprecated tag to the following:
+//            - class UCharacterEnums
+//            - interfaces ECharacterCategory, ECharacterDirection
+//            - fields INITIAL_QUOTE_PUNCTUATION, FINAL_QUOTE_PUNCTUATION,
+//              DIRECTIONALITY_LEFT_TO_RIGHT, DIRECTIONALITY_RIGHT_TO_LEFT,
+//              DIRECTIONALITY_EUROPEAN_NUMBER, DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
+//              DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR, DIRECTIONALITY_ARABIC_NUMBER,
+//              DIRECTIONALITY_COMMON_NUMBER_SEPARATOR, DIRECTIONALITY_PARAGRAPH_SEPARATOR,
+//              DIRECTIONALITY_SEGMENT_SEPARATOR, DIRECTIONALITY_WHITESPACE,
+//              DIRECTIONALITY_OTHER_NEUTRALS, DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING,
+//              DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE, DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC,
+//              DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING, DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE,
+//              DIRECTIONALITY_POP_DIRECTIONAL_FORMAT, DIRECTIONALITY_NON_SPACING_MARK,
+//              DIRECTIONALITY_BOUNDARY_NEUTRAL, DIRECTIONALITY_UNDEFINED
+//
+
+package jdk.internal.icu.lang;
+
+/**
+ * A container for the different 'enumerated types' used by UCharacter.
+ * @draft ICU 3.0
+ * @deprecated This is a draft API and might change in a future release of ICU.
+ */
+
+@Deprecated
+class UCharacterEnums {
+
+    /** This is just a namespace, it is not instantiatable. */
+    private UCharacterEnums() {};
+
+    /**
+     * 'Enum' for the CharacterCategory constants.  These constants are
+     * compatible in name <b>but not in value</b> with those defined in
+     * <code>java.lang.Character</code>.
+     * @see UCharacterCategory
+     * @draft ICU 3.0
+     * @deprecated This is a draft API and might change in a future release of ICU.
+     */
+    @Deprecated
+    public static interface ECharacterCategory {
+        /**
+         * Unassigned character type
+         * @stable ICU 2.1
+         */
+        public static final int UNASSIGNED              = 0;
+
+        /**
+         * Character type Cn
+         * Not Assigned (no characters in [UnicodeData.txt] have this property)
+         * @stable ICU 2.6
+         */
+        public static final int GENERAL_OTHER_TYPES     = 0;
+
+        /**
+         * Character type Lu
+         * @stable ICU 2.1
+         */
+        public static final int UPPERCASE_LETTER        = 1;
+
+        /**
+         * Character type Ll
+         * @stable ICU 2.1
+         */
+        public static final int LOWERCASE_LETTER        = 2;
+
+        /**
+         * Character type Lt
+         * @stable ICU 2.1
+         */
+
+        public static final int TITLECASE_LETTER        = 3;
+
+        /**
+         * Character type Lm
+         * @stable ICU 2.1
+         */
+        public static final int MODIFIER_LETTER         = 4;
+
+        /**
+         * Character type Lo
+         * @stable ICU 2.1
+         */
+        public static final int OTHER_LETTER            = 5;
+
+        /**
+         * Character type Mn
+         * @stable ICU 2.1
+         */
+        public static final int NON_SPACING_MARK        = 6;
+
+        /**
+         * Character type Me
+         * @stable ICU 2.1
+         */
+        public static final int ENCLOSING_MARK          = 7;
+
+        /**
+         * Character type Mc
+         * @stable ICU 2.1
+         */
+        public static final int COMBINING_SPACING_MARK  = 8;
+
+        /**
+         * Character type Nd
+         * @stable ICU 2.1
+         */
+        public static final int DECIMAL_DIGIT_NUMBER    = 9;
+
+        /**
+         * Character type Nl
+         * @stable ICU 2.1
+         */
+        public static final int LETTER_NUMBER           = 10;
+
+        /**
+         * Character type No
+         * @stable ICU 2.1
+         */
+        public static final int OTHER_NUMBER            = 11;
+
+        /**
+         * Character type Zs
+         * @stable ICU 2.1
+         */
+        public static final int SPACE_SEPARATOR         = 12;
+
+        /**
+         * Character type Zl
+         * @stable ICU 2.1
+         */
+        public static final int LINE_SEPARATOR          = 13;
+
+        /**
+         * Character type Zp
+         * @stable ICU 2.1
+         */
+        public static final int PARAGRAPH_SEPARATOR     = 14;
+
+        /**
+         * Character type Cc
+         * @stable ICU 2.1
+         */
+        public static final int CONTROL                 = 15;
+
+        /**
+         * Character type Cf
+         * @stable ICU 2.1
+         */
+        public static final int FORMAT                  = 16;
+
+        /**
+         * Character type Co
+         * @stable ICU 2.1
+         */
+        public static final int PRIVATE_USE             = 17;
+
+        /**
+         * Character type Cs
+         * @stable ICU 2.1
+         */
+        public static final int SURROGATE               = 18;
+
+        /**
+         * Character type Pd
+         * @stable ICU 2.1
+         */
+        public static final int DASH_PUNCTUATION        = 19;
+
+        /**
+         * Character type Ps
+         * @stable ICU 2.1
+         */
+        public static final int START_PUNCTUATION       = 20;
+
+        /**
+         * Character type Pe
+         * @stable ICU 2.1
+         */
+        public static final int END_PUNCTUATION         = 21;
+
+        /**
+         * Character type Pc
+         * @stable ICU 2.1
+         */
+        public static final int CONNECTOR_PUNCTUATION   = 22;
+
+        /**
+         * Character type Po
+         * @stable ICU 2.1
+         */
+        public static final int OTHER_PUNCTUATION       = 23;
+
+        /**
+         * Character type Sm
+         * @stable ICU 2.1
+         */
+        public static final int MATH_SYMBOL             = 24;
+
+        /**
+         * Character type Sc
+         * @stable ICU 2.1
+         */
+        public static final int CURRENCY_SYMBOL         = 25;
+
+        /**
+         * Character type Sk
+         * @stable ICU 2.1
+         */
+        public static final int MODIFIER_SYMBOL         = 26;
+
+        /**
+         * Character type So
+         * @stable ICU 2.1
+         */
+        public static final int OTHER_SYMBOL            = 27;
+
+        /**
+         * Character type Pi
+         * @see #INITIAL_QUOTE_PUNCTUATION
+         * @stable ICU 2.1
+         */
+        public static final int INITIAL_PUNCTUATION     = 28;
+
+        /**
+         * Character type Pi
+         * This name is compatible with java.lang.Character's name for this type.
+         * @see #INITIAL_PUNCTUATION
+         * @draft ICU 2.8
+     * @deprecated This is a draft API and might change in a future release of ICU.
+         */
+        @Deprecated
+        public static final int INITIAL_QUOTE_PUNCTUATION = 28;
+
+        /**
+         * Character type Pf
+         * @see #FINAL_QUOTE_PUNCTUATION
+         * @stable ICU 2.1
+         */
+        public static final int FINAL_PUNCTUATION       = 29;
+
+        /**
+         * Character type Pf
+         * This name is compatible with java.lang.Character's name for this type.
+         * @see #FINAL_PUNCTUATION
+         * @draft ICU 2.8
+     * @deprecated This is a draft API and might change in a future release of ICU.
+         */
+        @Deprecated
+        public static final int FINAL_QUOTE_PUNCTUATION   = 29;
+
+        /**
+         * Character type count
+         * @stable ICU 2.1
+         */
+        public static final int CHAR_CATEGORY_COUNT     = 30;
+    }
+
+    /**
+     * 'Enum' for the CharacterDirection constants.  There are two sets
+     * of names, those used in ICU, and those used in the JDK.  The
+     * JDK constants are compatible in name <b>but not in value</b>
+     * with those defined in <code>java.lang.Character</code>.
+     * @see UCharacterDirection
+     * @draft ICU 3.0
+     * @deprecated This is a draft API and might change in a future release of ICU.
+     */
+
+    @Deprecated
+    public static interface ECharacterDirection {
+        /**
+         * Directional type L
+         * @stable ICU 2.1
+         */
+        public static final int LEFT_TO_RIGHT              = 0;
+
+        /**
+         * JDK-compatible synonum for LEFT_TO_RIGHT.
+         * @draft ICU 3.0
+     * @deprecated This is a draft API and might change in a future release of ICU.
+         */
+        @Deprecated
+        public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = (byte)LEFT_TO_RIGHT;
+
+        /**
+         * Directional type R
+         * @stable ICU 2.1
+         */
+        public static final int RIGHT_TO_LEFT              = 1;
+
+        /**
+         * JDK-compatible synonum for RIGHT_TO_LEFT.
+         * @draft ICU 3.0
+     * @deprecated This is a draft API and might change in a future release of ICU.
+         */
+        @Deprecated
+        public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = (byte)RIGHT_TO_LEFT;
+
+        /**
+         * Directional type EN
+         * @stable ICU 2.1
+         */
+        public static final int EUROPEAN_NUMBER            = 2;
+
+        /**
+         * JDK-compatible synonum for EUROPEAN_NUMBER.
+         * @draft ICU 3.0
+     * @deprecated This is a draft API and might change in a future release of ICU.
+         */
+        @Deprecated
+        public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = (byte)EUROPEAN_NUMBER;
+
+        /**
+         * Directional type ES
+         * @stable ICU 2.1
+         */
+        public static final int EUROPEAN_NUMBER_SEPARATOR  = 3;
+
+        /**
+         * JDK-compatible synonum for EUROPEAN_NUMBER_SEPARATOR.
+         * @draft ICU 3.0
+     * @deprecated This is a draft API and might change in a future release of ICU.
+         */
+        @Deprecated
+        public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = (byte)EUROPEAN_NUMBER_SEPARATOR;
+
+        /**
+         * Directional type ET
+         * @stable ICU 2.1
+         */
+        public static final int EUROPEAN_NUMBER_TERMINATOR = 4;
+
+        /**
+         * JDK-compatible synonum for EUROPEAN_NUMBER_TERMINATOR.
+         * @draft ICU 3.0
+     * @deprecated This is a draft API and might change in a future release of ICU.
+         */
+        @Deprecated
+        public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = (byte)EUROPEAN_NUMBER_TERMINATOR;
+
+        /**
+         * Directional type AN
+         * @stable ICU 2.1
+         */
+        public static final int ARABIC_NUMBER              = 5;
+
+        /**
+         * JDK-compatible synonum for ARABIC_NUMBER.
+         * @draft ICU 3.0
+     * @deprecated This is a draft API and might change in a future release of ICU.
+         */
+        @Deprecated
+        public static final byte DIRECTIONALITY_ARABIC_NUMBER = (byte)ARABIC_NUMBER;
+
+        /**
+         * Directional type CS
+         * @stable ICU 2.1
+         */
+        public static final int COMMON_NUMBER_SEPARATOR    = 6;
+
+        /**
+         * JDK-compatible synonum for COMMON_NUMBER_SEPARATOR.
+         * @draft ICU 3.0
+     * @deprecated This is a draft API and might change in a future release of ICU.
+         */
+        @Deprecated
+        public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = (byte)COMMON_NUMBER_SEPARATOR;
+
+        /**
+         * Directional type B
+         * @stable ICU 2.1
+         */
+        public static final int BLOCK_SEPARATOR            = 7;
+
+        /**
+         * JDK-compatible synonum for BLOCK_SEPARATOR.
+         * @draft ICU 3.0
+     * @deprecated This is a draft API and might change in a future release of ICU.
+         */
+        @Deprecated
+        public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = (byte)BLOCK_SEPARATOR;
+
+        /**
+         * Directional type S
+         * @stable ICU 2.1
+         */
+        public static final int SEGMENT_SEPARATOR          = 8;
+
+        /**
+         * JDK-compatible synonum for SEGMENT_SEPARATOR.
+         * @draft ICU 3.0
+     * @deprecated This is a draft API and might change in a future release of ICU.
+         */
+        @Deprecated
+        public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = (byte)SEGMENT_SEPARATOR;
+
+        /**
+         * Directional type WS
+         * @stable ICU 2.1
+         */
+        public static final int WHITE_SPACE_NEUTRAL        = 9;
+
+        /**
+         * JDK-compatible synonum for WHITE_SPACE_NEUTRAL.
+         * @draft ICU 3.0
+     * @deprecated This is a draft API and might change in a future release of ICU.
+         */
+        @Deprecated
+        public static final byte DIRECTIONALITY_WHITESPACE = (byte)WHITE_SPACE_NEUTRAL;
+
+        /**
+         * Directional type ON
+         * @stable ICU 2.1
+         */
+        public static final int OTHER_NEUTRAL              = 10;
+
+        /**
+         * JDK-compatible synonum for OTHER_NEUTRAL.
+         * @draft ICU 3.0
+     * @deprecated This is a draft API and might change in a future release of ICU.
+         */
+        @Deprecated
+        public static final byte DIRECTIONALITY_OTHER_NEUTRALS = (byte)OTHER_NEUTRAL;
+
+        /**
+         * Directional type LRE
+         * @stable ICU 2.1
+         */
+        public static final int LEFT_TO_RIGHT_EMBEDDING    = 11;
+
+        /**
+         * JDK-compatible synonum for LEFT_TO_RIGHT_EMBEDDING.
+         * @draft ICU 3.0
+     * @deprecated This is a draft API and might change in a future release of ICU.
+         */
+        @Deprecated
+        public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = (byte)LEFT_TO_RIGHT_EMBEDDING;
+
+        /**
+         * Directional type LRO
+         * @stable ICU 2.1
+         */
+        public static final int LEFT_TO_RIGHT_OVERRIDE     = 12;
+
+        /**
+         * JDK-compatible synonum for LEFT_TO_RIGHT_OVERRIDE.
+         * @draft ICU 3.0
+     * @deprecated This is a draft API and might change in a future release of ICU.
+         */
+        @Deprecated
+        public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = (byte)LEFT_TO_RIGHT_OVERRIDE;
+
+        /**
+         * Directional type AL
+         * @stable ICU 2.1
+         */
+        public static final int RIGHT_TO_LEFT_ARABIC       = 13;
+
+        /**
+         * JDK-compatible synonum for RIGHT_TO_LEFT_ARABIC.
+         * @draft ICU 3.0
+     * @deprecated This is a draft API and might change in a future release of ICU.
+         */
+        @Deprecated
+        public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = (byte)RIGHT_TO_LEFT_ARABIC;
+
+        /**
+         * Directional type RLE
+         * @stable ICU 2.1
+         */
+        public static final int RIGHT_TO_LEFT_EMBEDDING    = 14;
+
+        /**
+         * JDK-compatible synonum for RIGHT_TO_LEFT_EMBEDDING.
+         * @draft ICU 3.0
+     * @deprecated This is a draft API and might change in a future release of ICU.
+         */
+        @Deprecated
+        public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = (byte)RIGHT_TO_LEFT_EMBEDDING;
+
+        /**
+         * Directional type RLO
+         * @stable ICU 2.1
+         */
+        public static final int RIGHT_TO_LEFT_OVERRIDE     = 15;
+
+        /**
+         * JDK-compatible synonum for RIGHT_TO_LEFT_OVERRIDE.
+         * @draft ICU 3.0
+     * @deprecated This is a draft API and might change in a future release of ICU.
+         */
+        @Deprecated
+        public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = (byte)RIGHT_TO_LEFT_OVERRIDE;
+
+        /**
+         * Directional type PDF
+         * @stable ICU 2.1
+         */
+        public static final int POP_DIRECTIONAL_FORMAT     = 16;
+
+        /**
+         * JDK-compatible synonum for POP_DIRECTIONAL_FORMAT.
+         * @draft ICU 3.0
+     * @deprecated This is a draft API and might change in a future release of ICU.
+         */
+        @Deprecated
+        public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = (byte)POP_DIRECTIONAL_FORMAT;
+
+        /**
+         * Directional type NSM
+         * @stable ICU 2.1
+         */
+        public static final int DIR_NON_SPACING_MARK       = 17;
+
+        /**
+         * JDK-compatible synonum for DIR_NON_SPACING_MARK.
+         * @draft ICU 3.0
+     * @deprecated This is a draft API and might change in a future release of ICU.
+         */
+        @Deprecated
+        public static final byte DIRECTIONALITY_NON_SPACING_MARK = (byte)DIR_NON_SPACING_MARK;
+
+        /**
+         * Directional type BN
+         * @stable ICU 2.1
+         */
+        public static final int BOUNDARY_NEUTRAL           = 18;
+
+        /**
+         * JDK-compatible synonum for BOUNDARY_NEUTRAL.
+         * @draft ICU 3.0
+     * @deprecated This is a draft API and might change in a future release of