OpenJDK / jdk / jdk
changeset 57765:6b794294d0e8
8236034: Use optimized Ques node for curly {0,1} quantifier
Reviewed-by: rriggs
author | igerasim |
---|---|
date | Tue, 21 Jan 2020 17:52:55 -0800 |
parents | 93e22f0a6401 |
children | 49a11875e369 |
files | src/java.base/share/classes/java/util/regex/Pattern.java test/jdk/java/util/regex/RegExTest.java test/jdk/java/util/regex/TestCases.txt |
diffstat | 3 files changed, 102 insertions(+), 33 deletions(-) [+] |
line wrap: on
line diff
--- a/src/java.base/share/classes/java/util/regex/Pattern.java Tue Jan 21 17:37:18 2020 -0800 +++ b/src/java.base/share/classes/java/util/regex/Pattern.java Tue Jan 21 17:52:55 2020 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -3243,21 +3243,28 @@ GREEDY, LAZY, POSSESSIVE, INDEPENDENT } - private Node curly(Node prev, int cmin) { + private Qtype qtype() { int ch = next(); if (ch == '?') { next(); - return new Curly(prev, cmin, MAX_REPS, Qtype.LAZY); + return Qtype.LAZY; } else if (ch == '+') { next(); - return new Curly(prev, cmin, MAX_REPS, Qtype.POSSESSIVE); - } - if (prev instanceof BmpCharProperty) { - return new BmpCharPropertyGreedy((BmpCharProperty)prev, cmin); - } else if (prev instanceof CharProperty) { - return new CharPropertyGreedy((CharProperty)prev, cmin); - } - return new Curly(prev, cmin, MAX_REPS, Qtype.GREEDY); + return Qtype.POSSESSIVE; + } + return Qtype.GREEDY; + } + + private Node curly(Node prev, int cmin) { + Qtype qtype = qtype(); + if (qtype == Qtype.GREEDY) { + if (prev instanceof BmpCharProperty) { + return new BmpCharPropertyGreedy((BmpCharProperty)prev, cmin); + } else if (prev instanceof CharProperty) { + return new CharPropertyGreedy((CharProperty)prev, cmin); + } + } + return new Curly(prev, cmin, MAX_REPS, qtype); } /** @@ -3269,15 +3276,7 @@ int ch = peek(); switch (ch) { case '?': - ch = next(); - if (ch == '?') { - next(); - return new Ques(prev, Qtype.LAZY); - } else if (ch == '+') { - next(); - return new Ques(prev, Qtype.POSSESSIVE); - } - return new Ques(prev, Qtype.GREEDY); + return new Ques(prev, qtype()); case '*': return curly(prev, 0); case '+': @@ -3314,16 +3313,10 @@ throw error("Unclosed counted closure"); if (cmax < cmin) throw error("Illegal repetition range"); - ch = peek(); - if (ch == '?') { - next(); - return new Curly(prev, cmin, cmax, Qtype.LAZY); - } else if (ch == '+') { - next(); - return new Curly(prev, cmin, cmax, Qtype.POSSESSIVE); - } else { - return new Curly(prev, cmin, cmax, Qtype.GREEDY); - } + unread(); + return (cmin == 0 && cmax == 1) + ? new Ques(prev, qtype()) + : new Curly(prev, cmin, cmax, qtype()); } else { throw error("Illegal repetition"); }
--- a/test/jdk/java/util/regex/RegExTest.java Tue Jan 21 17:37:18 2020 -0800 +++ b/test/jdk/java/util/regex/RegExTest.java Tue Jan 21 17:52:55 2020 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -35,7 +35,7 @@ * 8027645 8035076 8039124 8035975 8074678 6854417 8143854 8147531 7071819 * 8151481 4867170 7080302 6728861 6995635 6736245 4916384 6328855 6192895 * 6345469 6988218 6693451 7006761 8140212 8143282 8158482 8176029 8184706 - * 8194667 8197462 8184692 8221431 8224789 8228352 8230829 + * 8194667 8197462 8184692 8221431 8224789 8228352 8230829 8236034 * * @library /test/lib * @library /lib/testlibrary/java/lang
--- a/test/jdk/java/util/regex/TestCases.txt Tue Jan 21 17:37:18 2020 -0800 +++ b/test/jdk/java/util/regex/TestCases.txt Tue Jan 21 17:52:55 2020 -0800 @@ -1,5 +1,5 @@ // -// Copyright (c) 1999, 2009, Oracle and/or its affiliates. All rights reserved. +// Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. // // This code is free software; you can redistribute it and/or modify it @@ -34,26 +34,50 @@ a true a 1 +^(a){0,1}a +a +true a 1 + ^(aa(bb)?)+$ aabbaa true aabbaa 2 aa bb +^(aa(bb){0,1})+$ +aabbaa +true aabbaa 2 aa bb + ((a|b)?b)+ b true b 2 b +((a|b){0,1}b)+ +b +true b 2 b + (aaa)?aaa aaa true aaa 1 +(aaa){0,1}aaa +aaa +true aaa 1 + ^(a(b)?)+$ aba true aba 2 a b +^(a(b){0,1})+$ +aba +true aba 2 a b + ^(a(b(c)?)?)?abc abc true abc 3 +^(a(b(c){0,1}){0,1}){0,1}abc +abc +true abc 3 + ^(a(b(c))).* abc true abc 3 abc bc c @@ -750,52 +774,100 @@ aaaab true ab 0 +a{0,1}b +aaaab +true ab 0 + a?b b true b 0 +a{0,1}b +b +true b 0 + a?b aaaccc false 0 +a{0,1}b +aaaccc +false 0 + .?b aaaab true ab 0 +.{0,1}b +aaaab +true ab 0 + // Reluctant ? metacharacter a??b aaaab true ab 0 +a{0,1}?b +aaaab +true ab 0 + a??b b true b 0 +a{0,1}?b +b +true b 0 + a??b aaaccc false 0 +a{0,1}?b +aaaccc +false 0 + .??b aaaab true ab 0 +.{0,1}?b +aaaab +true ab 0 + // Possessive ? metacharacter a?+b aaaab true ab 0 +a{0,1}+b +aaaab +true ab 0 + a?+b b true b 0 +a{0,1}+b +b +true b 0 + a?+b aaaccc false 0 +a{0,1}+b +aaaccc +false 0 + .?+b aaaab true ab 0 +.{0,1}+b +aaaab +true ab 0 + // Greedy + metacharacter a+b aaaab @@ -1155,3 +1227,7 @@ (|f)?+ foo true 1 + +(|f){0,1}+ +foo +true 1