OpenJDK / jdk8 / jdk8 / jdk
changeset 1565:1ff977b938e5
6840246: Lightweight implementation of String.split for simple use case
Summary: Added a fastpath for simple use case
Reviewed-by: alanb, martin
author | sherman |
---|---|
date | Thu, 13 Aug 2009 10:50:23 -0700 |
parents | 82b66d0368ff |
children | 6797a2407a50 35f32639ee20 |
files | src/share/classes/java/lang/String.java test/java/lang/String/Split.java |
diffstat | 2 files changed, 95 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- a/src/share/classes/java/lang/String.java Tue Aug 11 20:06:52 2009 -0600 +++ b/src/share/classes/java/lang/String.java Thu Aug 13 10:50:23 2009 -0700 @@ -2301,6 +2301,54 @@ * @spec JSR-51 */ public String[] split(String regex, int limit) { + /* fastpath if the regex is a + (1)one-char String and this character is not one of the + RegEx's meta characters ".$|()[{^?*+\\", or + (2)two-char String and the first char is the backslash and + the second is not the ascii digit or ascii letter. + */ + char ch = 0; + if (((regex.count == 1 && + ".$|()[{^?*+\\".indexOf(ch = regex.charAt(0)) == -1) || + (regex.length() == 2 && + regex.charAt(0) == '\\' && + (((ch = regex.charAt(1))-'0')|('9'-ch)) < 0 && + ((ch-'a')|('z'-ch)) < 0 && + ((ch-'A')|('Z'-ch)) < 0)) && + (ch < Character.MIN_HIGH_SURROGATE || + ch > Character.MAX_LOW_SURROGATE)) + { + int off = 0; + int next = 0; + boolean limited = limit > 0; + ArrayList<String> list = new ArrayList<String>(); + while ((next = indexOf(ch, off)) != -1) { + if (!limited || list.size() < limit - 1) { + list.add(substring(off, next)); + off = next + 1; + } else { // last one + //assert (list.size() == limit - 1); + list.add(substring(off, count)); + off = count; + break; + } + } + // If no match was found, return this + if (off == 0) + return new String[] { this }; + + // Add remaining segment + if (!limited || list.size() < limit) + list.add(substring(off, count)); + + // Construct result + int resultSize = list.size(); + if (limit == 0) + while (resultSize > 0 && list.get(resultSize-1).length() == 0) + resultSize--; + String[] result = new String[resultSize]; + return list.subList(0, resultSize).toArray(result); + } return Pattern.compile(regex).split(this, limit); }
--- a/test/java/lang/String/Split.java Tue Aug 11 20:06:52 2009 -0600 +++ b/test/java/lang/String/Split.java Thu Aug 13 10:50:23 2009 -0700 @@ -23,14 +23,18 @@ /** * @test + * @bug 6840246 * @summary test String.split() */ +import java.util.Arrays; +import java.util.Random; import java.util.regex.*; public class Split { public static void main(String[] args) throws Exception { String source = "0123456789"; + for (int limit=-2; limit<3; limit++) { for (int x=0; x<10; x++) { String[] result = source.split(Integer.toString(x), limit); @@ -80,5 +84,48 @@ throw new RuntimeException("String.split failure 8"); if (!result[0].equals(source)) throw new RuntimeException("String.split failure 9"); + + // check fastpath of String.split() + source = "0123456789abcdefgABCDEFG"; + Random r = new Random(); + + for (boolean doEscape: new boolean[] {false, true}) { + for (int cp = 0; cp < 0x11000; cp++) { + Pattern p = null; + String regex = new String(Character.toChars(cp)); + if (doEscape) + regex = "\\" + regex; + try { + p = Pattern.compile(regex); + } catch (PatternSyntaxException pse) { + // illegal syntax + try { + "abc".split(regex); + } catch (PatternSyntaxException pse0) { + continue; + } + throw new RuntimeException("String.split failure 11"); + } + int off = r.nextInt(source.length()); + String[] srcStrs = new String[] { + "", + source, + regex + source, + source + regex, + source.substring(0, 3) + + regex + source.substring(3, 9) + + regex + source.substring(9, 15) + + regex + source.substring(15), + source.substring(0, off) + regex + source.substring(off) + }; + for (String src: srcStrs) { + for (int limit=-2; limit<3; limit++) { + if (!Arrays.equals(src.split(regex, limit), + p.split(src, limit))) + throw new RuntimeException("String.split failure 12"); + } + } + } + } } }