changeset 57765:6b794294d0e8

8236034: Use optimized Ques node for curly {0,1} quantifier Reviewed-by: rriggs
author igerasim
date Tue, 21 Jan 2020 17:52:55 -0800
parents 93e22f0a6401
children 49a11875e369
files src/java.base/share/classes/java/util/regex/Pattern.java test/jdk/java/util/regex/RegExTest.java test/jdk/java/util/regex/TestCases.txt
diffstat 3 files changed, 102 insertions(+), 33 deletions(-) [+]
line wrap: on
line diff
--- a/src/java.base/share/classes/java/util/regex/Pattern.java	Tue Jan 21 17:37:18 2020 -0800
+++ b/src/java.base/share/classes/java/util/regex/Pattern.java	Tue Jan 21 17:52:55 2020 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -3243,21 +3243,28 @@
         GREEDY, LAZY, POSSESSIVE, INDEPENDENT
     }
 
-    private Node curly(Node prev, int cmin) {
+    private Qtype qtype() {
         int ch = next();
         if (ch == '?') {
             next();
-            return new Curly(prev, cmin, MAX_REPS, Qtype.LAZY);
+            return Qtype.LAZY;
         } else if (ch == '+') {
             next();
-            return new Curly(prev, cmin, MAX_REPS, Qtype.POSSESSIVE);
-        }
-        if (prev instanceof BmpCharProperty) {
-            return new BmpCharPropertyGreedy((BmpCharProperty)prev, cmin);
-        } else if (prev instanceof CharProperty) {
-            return new CharPropertyGreedy((CharProperty)prev, cmin);
-        }
-        return new Curly(prev, cmin, MAX_REPS, Qtype.GREEDY);
+            return Qtype.POSSESSIVE;
+        }
+        return Qtype.GREEDY;
+    }
+
+    private Node curly(Node prev, int cmin) {
+        Qtype qtype = qtype();
+        if (qtype == Qtype.GREEDY) {
+            if (prev instanceof BmpCharProperty) {
+                return new BmpCharPropertyGreedy((BmpCharProperty)prev, cmin);
+            } else if (prev instanceof CharProperty) {
+                return new CharPropertyGreedy((CharProperty)prev, cmin);
+            }
+        }
+        return new Curly(prev, cmin, MAX_REPS, qtype);
     }
 
     /**
@@ -3269,15 +3276,7 @@
         int ch = peek();
         switch (ch) {
         case '?':
-            ch = next();
-            if (ch == '?') {
-                next();
-                return new Ques(prev, Qtype.LAZY);
-            } else if (ch == '+') {
-                next();
-                return new Ques(prev, Qtype.POSSESSIVE);
-            }
-            return new Ques(prev, Qtype.GREEDY);
+            return new Ques(prev, qtype());
         case '*':
             return curly(prev, 0);
         case '+':
@@ -3314,16 +3313,10 @@
                     throw error("Unclosed counted closure");
                 if (cmax < cmin)
                     throw error("Illegal repetition range");
-                ch = peek();
-                if (ch == '?') {
-                    next();
-                    return new Curly(prev, cmin, cmax, Qtype.LAZY);
-                } else if (ch == '+') {
-                    next();
-                    return new Curly(prev, cmin, cmax, Qtype.POSSESSIVE);
-                } else {
-                    return new Curly(prev, cmin, cmax, Qtype.GREEDY);
-                }
+                unread();
+                return (cmin == 0 && cmax == 1)
+                        ? new Ques(prev, qtype())
+                        : new Curly(prev, cmin, cmax, qtype());
             } else {
                 throw error("Illegal repetition");
             }
--- a/test/jdk/java/util/regex/RegExTest.java	Tue Jan 21 17:37:18 2020 -0800
+++ b/test/jdk/java/util/regex/RegExTest.java	Tue Jan 21 17:52:55 2020 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -35,7 +35,7 @@
  * 8027645 8035076 8039124 8035975 8074678 6854417 8143854 8147531 7071819
  * 8151481 4867170 7080302 6728861 6995635 6736245 4916384 6328855 6192895
  * 6345469 6988218 6693451 7006761 8140212 8143282 8158482 8176029 8184706
- * 8194667 8197462 8184692 8221431 8224789 8228352 8230829
+ * 8194667 8197462 8184692 8221431 8224789 8228352 8230829 8236034
  *
  * @library /test/lib
  * @library /lib/testlibrary/java/lang
--- a/test/jdk/java/util/regex/TestCases.txt	Tue Jan 21 17:37:18 2020 -0800
+++ b/test/jdk/java/util/regex/TestCases.txt	Tue Jan 21 17:52:55 2020 -0800
@@ -1,5 +1,5 @@
 //
-// Copyright (c) 1999, 2009, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 //
 // This code is free software; you can redistribute it and/or modify it
@@ -34,26 +34,50 @@
 a
 true a 1
 
+^(a){0,1}a
+a
+true a 1
+
 ^(aa(bb)?)+$
 aabbaa
 true aabbaa 2 aa bb
 
+^(aa(bb){0,1})+$
+aabbaa
+true aabbaa 2 aa bb
+
 ((a|b)?b)+
 b
 true b 2 b
 
+((a|b){0,1}b)+
+b
+true b 2 b
+
 (aaa)?aaa
 aaa
 true aaa 1
 
+(aaa){0,1}aaa
+aaa
+true aaa 1
+
 ^(a(b)?)+$
 aba
 true aba 2 a b
 
+^(a(b){0,1})+$
+aba
+true aba 2 a b
+
 ^(a(b(c)?)?)?abc
 abc
 true abc 3
 
+^(a(b(c){0,1}){0,1}){0,1}abc
+abc
+true abc 3
+
 ^(a(b(c))).*
 abc
 true abc 3 abc bc c
@@ -750,52 +774,100 @@
 aaaab
 true ab 0
 
+a{0,1}b
+aaaab
+true ab 0
+
 a?b
 b
 true b 0
 
+a{0,1}b
+b
+true b 0
+
 a?b
 aaaccc
 false 0
 
+a{0,1}b
+aaaccc
+false 0
+
 .?b
 aaaab
 true ab 0
 
+.{0,1}b
+aaaab
+true ab 0
+
 // Reluctant ? metacharacter
 a??b
 aaaab
 true ab 0
 
+a{0,1}?b
+aaaab
+true ab 0
+
 a??b
 b
 true b 0
 
+a{0,1}?b
+b
+true b 0
+
 a??b
 aaaccc
 false 0
 
+a{0,1}?b
+aaaccc
+false 0
+
 .??b
 aaaab
 true ab 0
 
+.{0,1}?b
+aaaab
+true ab 0
+
 // Possessive ? metacharacter
 a?+b
 aaaab
 true ab 0
 
+a{0,1}+b
+aaaab
+true ab 0
+
 a?+b
 b
 true b 0
 
+a{0,1}+b
+b
+true b 0
+
 a?+b
 aaaccc
 false 0
 
+a{0,1}+b
+aaaccc
+false 0
+
 .?+b
 aaaab
 true ab 0
 
+.{0,1}+b
+aaaab
+true ab 0
+
 // Greedy + metacharacter
 a+b
 aaaab
@@ -1155,3 +1227,7 @@
 (|f)?+
 foo
 true  1 
+
+(|f){0,1}+
+foo
+true  1