16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23
24 /**
25 * @test
26 * @summary tests RegExp framework (use -Dseed=X to set PRNG seed)
27 * @author Mike McCloskey
28 * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345
29 * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962
30 * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
31 * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
32 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
33 * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066
34 * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590
35 * 8027645 8035076 8039124 8035975 8074678 6854417 8143854 8147531 7071819
36 * @library /lib/testlibrary
37 * @build jdk.testlibrary.*
38 * @run main RegExTest
39 * @key randomness
40 */
41
42 import java.util.function.Function;
43 import java.util.regex.*;
44 import java.util.Random;
45 import java.util.Scanner;
46 import java.io.*;
47 import java.nio.file.*;
48 import java.util.*;
49 import java.nio.CharBuffer;
50 import java.util.function.Predicate;
51 import jdk.testlibrary.RandomFactory;
52
53 /**
54 * This is a test class created to check the operation of
55 * the Pattern and Matcher classes.
145 wordSearchTest();
146 hitEndTest();
147 toMatchResultTest();
148 toMatchResultTest2();
149 surrogatesInClassTest();
150 removeQEQuotingTest();
151 namedGroupCaptureTest();
152 nonBmpClassComplementTest();
153 unicodePropertiesTest();
154 unicodeHexNotationTest();
155 unicodeClassesTest();
156 unicodeCharacterNameTest();
157 horizontalAndVerticalWSTest();
158 linebreakTest();
159 branchTest();
160 groupCurlyNotFoundSuppTest();
161 groupCurlyBackoffTest();
162 patternAsPredicate();
163 invalidFlags();
164 grapheme();
165
166 if (failure) {
167 throw new
168 RuntimeException("RegExTest failed, 1st failure: " +
169 firstFailure);
170 } else {
171 System.err.println("OKAY: All tests passed.");
172 }
173 }
174
175 // Utility functions
176
177 private static String getRandomAlphaString(int length) {
178 StringBuffer buf = new StringBuffer(length);
179 for (int i=0; i<length; i++) {
180 char randChar = (char)(97 + generator.nextInt(26));
181 buf.append(randChar);
182 }
183 return buf.toString();
184 }
2642 failCount++;
2643
2644 // Marks that cannot legally change order and be equivalent
2645 p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ);
2646 check(p, "testa\u0308\u0300", true);
2647 check(p, "testa\u0300\u0308", false);
2648
2649 // Marks that can legally change order and be equivalent
2650 p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ);
2651 check(p, "testa\u0308\u0323", true);
2652 check(p, "testa\u0323\u0308", true);
2653
2654 // Test all equivalences of the sequence a\u0308\u0323\u0300
2655 p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ);
2656 check(p, "testa\u0308\u0323\u0300", true);
2657 check(p, "testa\u0323\u0308\u0300", true);
2658 check(p, "testa\u0308\u0300\u0323", true);
2659 check(p, "test\u00e4\u0323\u0300", true);
2660 check(p, "test\u00e4\u0300\u0323", true);
2661
2662 /*
2663 * The following canonical equivalence tests don't work. Bug id: 4916384.
2664 *
2665 // Decomposed hangul (jamos)
2666 p = Pattern.compile("\u1100\u1161", Pattern.CANON_EQ);
2667 m = p.matcher("\u1100\u1161");
2668 if (!m.matches())
2669 failCount++;
2670
2671 m.reset("\uac00");
2672 if (!m.matches())
2673 failCount++;
2674
2675 // Composed hangul
2676 p = Pattern.compile("\uac00", Pattern.CANON_EQ);
2677 m = p.matcher("\u1100\u1161");
2678 if (!m.matches())
2679 failCount++;
2680
2681 m.reset("\uac00");
2682 if (!m.matches())
2683 failCount++;
2684
2685 // Decomposed supplementary outside char classes
2686 p = Pattern.compile("test\ud834\uddbc\ud834\udd6f", Pattern.CANON_EQ);
2687 m = p.matcher("test\ud834\uddc0");
2688 if (!m.matches())
2689 failCount++;
2690
2691 m.reset("test\ud834\uddbc\ud834\udd6f");
2692 if (!m.matches())
2693 failCount++;
2694
2695 // Composed supplementary outside char classes
2696 p = Pattern.compile("test\ud834\uddc0", Pattern.CANON_EQ);
2697 m.reset("test\ud834\uddbc\ud834\udd6f");
2698 if (!m.matches())
2699 failCount++;
2700
2701 m = p.matcher("test\ud834\uddc0");
2702 if (!m.matches())
2703 failCount++;
2704
2705 */
2706
2707 report("Canonical Equivalence");
2708 }
2709
2710 /**
2711 * A basic sanity test of Matcher.replaceAll().
2712 */
2713 private static void globalSubstitute() throws Exception {
2714 // Global substitution with a literal
2715 Pattern p = Pattern.compile("(ab)(c*)");
2716 Matcher m = p.matcher("abccczzzabcczzzabccc");
2717 if (!m.replaceAll("test").equals("testzzztestzzztest"))
2718 failCount++;
2719
2720 m.reset("zzzabccczzzabcczzzabccczzz");
2721 if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz"))
2722 failCount++;
2723
2724 // Global substitution with groups
2725 m.reset("zzzabccczzzabcczzzabccczzz");
2726 String result = m.replaceAll("$1");
3829 if (m.start() != m.start(0))
3830 failCount++;
3831 //assert(m.end() = m.end(0);
3832 if (m.start() != m.start(0))
3833 failCount++;
3834 //assert(m.group() = m.group(0);
3835 if (!m.group().equals(m.group(0)))
3836 failCount++;
3837 try {
3838 m.group(50);
3839 failCount++;
3840 } catch (IndexOutOfBoundsException ise) {}
3841
3842 return failCount;
3843 }
3844
3845 private static Pattern compileTestPattern(String patternString) {
3846 if (!patternString.startsWith("'")) {
3847 return Pattern.compile(patternString);
3848 }
3849
3850 int break1 = patternString.lastIndexOf("'");
3851 String flagString = patternString.substring(
3852 break1+1, patternString.length());
3853 patternString = patternString.substring(1, break1);
3854
3855 if (flagString.equals("i"))
3856 return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE);
3857
3858 if (flagString.equals("m"))
3859 return Pattern.compile(patternString, Pattern.MULTILINE);
3860
3861 return Pattern.compile(patternString);
3862 }
3863
3864 /**
3865 * Reads a line from the input file. Keeps reading lines until a non
3866 * empty non comment line is read. If the line contains a \n then
3867 * these two characters are replaced by a newline char. If a \\uxxxx
3868 * sequence is read then the sequence is replaced by the unicode char.
3869 */
4075 "${dog}",
4076 "zzzDogzzzDogzzz");
4077
4078 // backref in Matcher & String
4079 if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") ||
4080 !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh"))
4081 failCount++;
4082
4083 // negative
4084 checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)");
4085 checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)");
4086 checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)");
4087 checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>");
4088 checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>");
4089 checkExpectedIAE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"),
4090 "gnameX");
4091 checkExpectedNPE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"));
4092 report("NamedGroupCapture");
4093 }
4094
4095 // This is for bug 6969132
4096 private static void nonBmpClassComplementTest() throws Exception {
4097 Pattern p = Pattern.compile("\\P{Lu}");
4098 Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
4099 if (m.find() && m.start() == 1)
4100 failCount++;
4101
4102 // from a unicode category
4103 p = Pattern.compile("\\P{Lu}");
4104 m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
4105 if (m.find())
4106 failCount++;
4107 if (!m.hitEnd())
4108 failCount++;
4109
4110 // block
4111 p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}");
4112 m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
4113 if (m.find() && m.start() == 1)
4114 failCount++;
4115
4116 report("NonBmpClassComplement");
4117 }
4118
4119 private static void unicodePropertiesTest() throws Exception {
4120 // different forms
4121 if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() ||
4122 !Pattern.compile("\\p{Lu}").matcher("A").matches() ||
4123 !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() ||
4124 !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() ||
4125 !Pattern.compile("\\p{IsLatin}").matcher("B").matches() ||
4126 !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() ||
4127 !Pattern.compile("\\p{script=Latin}").matcher("B").matches() ||
4128 !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() ||
4129 !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() ||
4130 !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches())
4131 failCount++;
4132
4133 Matcher common = Pattern.compile("\\p{script=Common}").matcher("");
4134 Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher("");
4135 Matcher lastSM = common;
4645 }
4646 // (2) test \\b{g} + \\X via Scanner
4647 boolean hasNext = s.hasNext(p);
4648 // if (!s.hasNext() || !s.next().equals(next)) {
4649 if (!s.hasNext(p) || !s.next(p).equals(g)) {
4650 System.out.println("Failed b{g} [" + ln + "] : " + g);
4651 failCount++;
4652 }
4653 }
4654 });
4655 // some sanity checks
4656 if (!Pattern.compile("\\X{10}").matcher("abcdefghij").matches() ||
4657 !Pattern.compile("\\b{g}(?:\\X\\b{g}){5}\\b{g}").matcher("abcde").matches() ||
4658 !Pattern.compile("(?:\\X\\b{g}){2}").matcher("\ud800\udc00\ud801\udc02").matches())
4659 failCount++;
4660 // make sure "\b{n}" still works
4661 if (!Pattern.compile("\\b{1}hello\\b{1} \\b{1}world\\b{1}").matcher("hello world").matches())
4662 failCount++;
4663 report("Unicode extended grapheme cluster");
4664 }
4665 }
|
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23
24 /**
25 * @test
26 * @summary tests RegExp framework (use -Dseed=X to set PRNG seed)
27 * @author Mike McCloskey
28 * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345
29 * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962
30 * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
31 * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
32 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
33 * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066
34 * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590
35 * 8027645 8035076 8039124 8035975 8074678 6854417 8143854 8147531 7071819
36 * 8151481 4867170 7080302 6728861 6995635 6736245 4916384
37 * 6328855 6192895 6345469 6988218 6693451 7006761 8140212
38 *
39 * @library /lib/testlibrary
40 * @build jdk.testlibrary.*
41 * @run main RegExTest
42 * @key randomness
43 */
44
45 import java.util.function.Function;
46 import java.util.regex.*;
47 import java.util.Random;
48 import java.util.Scanner;
49 import java.io.*;
50 import java.nio.file.*;
51 import java.util.*;
52 import java.nio.CharBuffer;
53 import java.util.function.Predicate;
54 import jdk.testlibrary.RandomFactory;
55
56 /**
57 * This is a test class created to check the operation of
58 * the Pattern and Matcher classes.
148 wordSearchTest();
149 hitEndTest();
150 toMatchResultTest();
151 toMatchResultTest2();
152 surrogatesInClassTest();
153 removeQEQuotingTest();
154 namedGroupCaptureTest();
155 nonBmpClassComplementTest();
156 unicodePropertiesTest();
157 unicodeHexNotationTest();
158 unicodeClassesTest();
159 unicodeCharacterNameTest();
160 horizontalAndVerticalWSTest();
161 linebreakTest();
162 branchTest();
163 groupCurlyNotFoundSuppTest();
164 groupCurlyBackoffTest();
165 patternAsPredicate();
166 invalidFlags();
167 grapheme();
168 expoBacktracking();
169
170 if (failure) {
171 throw new
172 RuntimeException("RegExTest failed, 1st failure: " +
173 firstFailure);
174 } else {
175 System.err.println("OKAY: All tests passed.");
176 }
177 }
178
179 // Utility functions
180
181 private static String getRandomAlphaString(int length) {
182 StringBuffer buf = new StringBuffer(length);
183 for (int i=0; i<length; i++) {
184 char randChar = (char)(97 + generator.nextInt(26));
185 buf.append(randChar);
186 }
187 return buf.toString();
188 }
2646 failCount++;
2647
2648 // Marks that cannot legally change order and be equivalent
2649 p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ);
2650 check(p, "testa\u0308\u0300", true);
2651 check(p, "testa\u0300\u0308", false);
2652
2653 // Marks that can legally change order and be equivalent
2654 p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ);
2655 check(p, "testa\u0308\u0323", true);
2656 check(p, "testa\u0323\u0308", true);
2657
2658 // Test all equivalences of the sequence a\u0308\u0323\u0300
2659 p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ);
2660 check(p, "testa\u0308\u0323\u0300", true);
2661 check(p, "testa\u0323\u0308\u0300", true);
2662 check(p, "testa\u0308\u0300\u0323", true);
2663 check(p, "test\u00e4\u0323\u0300", true);
2664 check(p, "test\u00e4\u0300\u0323", true);
2665
2666 Object[][] data = new Object[][] {
2667
2668 // JDK-4867170
2669 { "[\u1f80-\u1f82]", "ab\u1f80cd", "f", true },
2670 { "[\u1f80-\u1f82]", "ab\u1f81cd", "f", true },
2671 { "[\u1f80-\u1f82]", "ab\u1f82cd", "f", true },
2672 { "[\u1f80-\u1f82]", "ab\u03b1\u0314\u0345cd", "f", true },
2673 { "[\u1f80-\u1f82]", "ab\u03b1\u0345\u0314cd", "f", true },
2674 { "[\u1f80-\u1f82]", "ab\u1f01\u0345cd", "f", true },
2675 { "[\u1f80-\u1f82]", "ab\u1f00\u0345cd", "f", true },
2676
2677 { "\\p{IsGreek}", "ab\u1f80cd", "f", true },
2678 { "\\p{IsGreek}", "ab\u1f81cd", "f", true },
2679 { "\\p{IsGreek}", "ab\u1f82cd", "f", true },
2680 { "\\p{IsGreek}", "ab\u03b1\u0314\u0345cd", "f", true },
2681 { "\\p{IsGreek}", "ab\u1f01\u0345cd", "f", true },
2682
2683 // backtracking, force to match "\u1f80", instead of \u1f82"
2684 { "ab\\p{IsGreek}\u0300cd", "ab\u03b1\u0313\u0345\u0300cd", "m", true },
2685
2686 { "[\\p{IsGreek}]", "\u03b1\u0314\u0345", "m", true },
2687 { "\\p{IsGreek}", "\u03b1\u0314\u0345", "m", true },
2688
2689 { "[^\u1f80-\u1f82]","\u1f81", "m", false },
2690 { "[^\u1f80-\u1f82]","\u03b1\u0314\u0345", "m", false },
2691 { "[^\u1f01\u0345]", "\u1f81", "f", false },
2692
2693 { "[^\u1f81]+", "\u1f80\u1f82", "f", true },
2694 { "[\u1f80]", "ab\u1f80cd", "f", true },
2695 { "\u1f80", "ab\u1f80cd", "f", true },
2696 { "\u1f00\u0345\u0300", "\u1f82", "m", true },
2697 { "\u1f80", "-\u1f00\u0345\u0300-", "f", true },
2698 { "\u1f82", "\u1f00\u0345\u0300", "m", true },
2699 { "\u1f82", "\u1f80\u0300", "m", true },
2700
2701 // JDK-7080302 # compile failed
2702 { "a(\u0041\u0301\u0328)", "a\u0041\u0301\u0328", "m", true},
2703
2704 // JDK-6728861, same cause as above one
2705 { "\u00e9\u00e9n", "e\u0301e\u0301n", "m", true},
2706
2707 // JDK-6995635
2708 { "(\u00e9)", "e\u0301", "m", true },
2709
2710 // JDK-6736245
2711 // intereting special case, nfc(u2add+u0338) -> u2add+u0338) NOT u2adc
2712 { "\u2ADC", "\u2ADC", "m", true}, // NFC
2713 { "\u2ADC", "\u2ADD\u0338", "m", true}, // NFD
2714
2715 // 4916384.
2716 // Decomposed hangul (jamos) works inside clazz
2717 { "[\u1100\u1161]", "\u1100\u1161", "m", true},
2718 { "[\u1100\u1161]", "\uac00", "m", true},
2719
2720 { "[\uac00]", "\u1100\u1161", "m", true},
2721 { "[\uac00]", "\uac00", "m", true},
2722
2723 // Decomposed hangul (jamos)
2724 { "\u1100\u1161", "\u1100\u1161", "m", true},
2725 { "\u1100\u1161", "\uac00", "m", true},
2726
2727 // Composed hangul
2728 { "\uac00", "\u1100\u1161", "m", true },
2729 { "\uac00", "\uac00", "m", true },
2730
2731 /* Need a NFDSlice to nfd the source to solve this issue
2732 u+1d1c0 -> nfd: <u+1d1ba><u+1d165><u+1d16f> -> nfc: <u+1d1ba><u+1d165><u+1d16f>
2733 u+1d1bc -> nfd: <u+1d1ba><u+1d165> -> nfc: <u+1d1ba><u+1d165>
2734 <u+1d1bc><u+1d16f> -> nfd: <u+1d1ba><u+1d165><u+1d16f> -> nfc: <u+1d1ba><u+1d165><u+1d16f>
2735
2736 // Decomposed supplementary outside char classes
2737 // { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0", "m", true },
2738 // Composed supplementary outside char classes
2739 // { "test\ud834\uddc0", "test\ud834\uddbc\ud834\udd6f", "m", true },
2740 */
2741 { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddbc\ud834\udd6f", "m", true },
2742 { "test\ud834\uddc0", "test\ud834\uddbc\ud834\udd6f", "m", true },
2743
2744 { "test\ud834\uddc0", "test\ud834\uddc0", "m", true },
2745 { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0", "m", true },
2746 };
2747
2748 int failCount = 0;
2749 for (Object[] d : data) {
2750 String pn = (String)d[0];
2751 String tt = (String)d[1];
2752 boolean isFind = "f".equals(((String)d[2]));
2753 boolean expected = (boolean)d[3];
2754 boolean ret = isFind ? Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).find()
2755 : Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).matches();
2756 if (ret != expected) {
2757 failCount++;
2758 continue;
2759 }
2760 }
2761 report("Canonical Equivalence");
2762 }
2763
2764 /**
2765 * A basic sanity test of Matcher.replaceAll().
2766 */
2767 private static void globalSubstitute() throws Exception {
2768 // Global substitution with a literal
2769 Pattern p = Pattern.compile("(ab)(c*)");
2770 Matcher m = p.matcher("abccczzzabcczzzabccc");
2771 if (!m.replaceAll("test").equals("testzzztestzzztest"))
2772 failCount++;
2773
2774 m.reset("zzzabccczzzabcczzzabccczzz");
2775 if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz"))
2776 failCount++;
2777
2778 // Global substitution with groups
2779 m.reset("zzzabccczzzabcczzzabccczzz");
2780 String result = m.replaceAll("$1");
3883 if (m.start() != m.start(0))
3884 failCount++;
3885 //assert(m.end() = m.end(0);
3886 if (m.start() != m.start(0))
3887 failCount++;
3888 //assert(m.group() = m.group(0);
3889 if (!m.group().equals(m.group(0)))
3890 failCount++;
3891 try {
3892 m.group(50);
3893 failCount++;
3894 } catch (IndexOutOfBoundsException ise) {}
3895
3896 return failCount;
3897 }
3898
3899 private static Pattern compileTestPattern(String patternString) {
3900 if (!patternString.startsWith("'")) {
3901 return Pattern.compile(patternString);
3902 }
3903 int break1 = patternString.lastIndexOf("'");
3904 String flagString = patternString.substring(
3905 break1+1, patternString.length());
3906 patternString = patternString.substring(1, break1);
3907
3908 if (flagString.equals("i"))
3909 return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE);
3910
3911 if (flagString.equals("m"))
3912 return Pattern.compile(patternString, Pattern.MULTILINE);
3913
3914 return Pattern.compile(patternString);
3915 }
3916
3917 /**
3918 * Reads a line from the input file. Keeps reading lines until a non
3919 * empty non comment line is read. If the line contains a \n then
3920 * these two characters are replaced by a newline char. If a \\uxxxx
3921 * sequence is read then the sequence is replaced by the unicode char.
3922 */
4128 "${dog}",
4129 "zzzDogzzzDogzzz");
4130
4131 // backref in Matcher & String
4132 if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") ||
4133 !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh"))
4134 failCount++;
4135
4136 // negative
4137 checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)");
4138 checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)");
4139 checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)");
4140 checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>");
4141 checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>");
4142 checkExpectedIAE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"),
4143 "gnameX");
4144 checkExpectedNPE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"));
4145 report("NamedGroupCapture");
4146 }
4147
4148 // This is for bug 6919132
4149 private static void nonBmpClassComplementTest() throws Exception {
4150 Pattern p = Pattern.compile("\\P{Lu}");
4151 Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
4152
4153 if (m.find() && m.start() == 1)
4154 failCount++;
4155
4156 // from a unicode category
4157 p = Pattern.compile("\\P{Lu}");
4158 m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
4159 if (m.find())
4160 failCount++;
4161 if (!m.hitEnd())
4162 failCount++;
4163
4164 // block
4165 p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}");
4166 m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
4167 if (m.find() && m.start() == 1)
4168 failCount++;
4169
4170 p = Pattern.compile("\\P{sc=GRANTHA}");
4171 m = p.matcher(new String(new int[] {0x11350}, 0, 1));
4172 if (m.find() && m.start() == 1)
4173 failCount++;
4174
4175 report("NonBmpClassComplement");
4176 }
4177
4178 private static void unicodePropertiesTest() throws Exception {
4179 // different forms
4180 if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() ||
4181 !Pattern.compile("\\p{Lu}").matcher("A").matches() ||
4182 !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() ||
4183 !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() ||
4184 !Pattern.compile("\\p{IsLatin}").matcher("B").matches() ||
4185 !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() ||
4186 !Pattern.compile("\\p{script=Latin}").matcher("B").matches() ||
4187 !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() ||
4188 !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() ||
4189 !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches())
4190 failCount++;
4191
4192 Matcher common = Pattern.compile("\\p{script=Common}").matcher("");
4193 Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher("");
4194 Matcher lastSM = common;
4704 }
4705 // (2) test \\b{g} + \\X via Scanner
4706 boolean hasNext = s.hasNext(p);
4707 // if (!s.hasNext() || !s.next().equals(next)) {
4708 if (!s.hasNext(p) || !s.next(p).equals(g)) {
4709 System.out.println("Failed b{g} [" + ln + "] : " + g);
4710 failCount++;
4711 }
4712 }
4713 });
4714 // some sanity checks
4715 if (!Pattern.compile("\\X{10}").matcher("abcdefghij").matches() ||
4716 !Pattern.compile("\\b{g}(?:\\X\\b{g}){5}\\b{g}").matcher("abcde").matches() ||
4717 !Pattern.compile("(?:\\X\\b{g}){2}").matcher("\ud800\udc00\ud801\udc02").matches())
4718 failCount++;
4719 // make sure "\b{n}" still works
4720 if (!Pattern.compile("\\b{1}hello\\b{1} \\b{1}world\\b{1}").matcher("hello world").matches())
4721 failCount++;
4722 report("Unicode extended grapheme cluster");
4723 }
4724
4725 // hangup/timeout if go into exponential backtracking
4726 private static void expoBacktracking() throws Exception {
4727
4728 Object[][] patternMatchers = {
4729 // 6328855
4730 { "(.*\n*)*",
4731 "this little fine string lets\r\njava.lang.String.matches\r\ncrash\r\n(We don't know why but adding \r* to the regex makes it work again)",
4732 false },
4733 // 6192895
4734 { " *([a-zA-Z0-9/\\-\\?:\\(\\)\\.,'\\+\\{\\}]+ *)+",
4735 "Hello World this is a test this is a test this is a test A",
4736 true },
4737 { " *([a-zA-Z0-9/\\-\\?:\\(\\)\\.,'\\+\\{\\}]+ *)+",
4738 "Hello World this is a test this is a test this is a test \u4e00 ",
4739 false },
4740 { " *([a-z0-9]+ *)+",
4741 "hello world this is a test this is a test this is a test A",
4742 false },
4743 // 4771934 [FIXED] #5013651?
4744 { "^(\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,4})+[,;]?)+$",
4745 "abc@efg.abc,efg@abc.abc,abc@xyz.mno;abc@sdfsd.com",
4746 true },
4747 // 4866249 [FIXED]
4748 { "<\\s*" + "(meta|META)" + "(\\s|[^>])+" + "(CHARSET|charset)=" + "(\\s|[^>])+>",
4749 "<META http-equiv=\"Content-Type\" content=\"text/html; charset=ISO-8859-5\">",
4750 true },
4751 { "^(\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,4})+[,;]?)+$",
4752 "abc@efg.abc,efg@abc.abc,abc@xyz.mno;sdfsd.com",
4753 false },
4754 // 6345469
4755 { "((<[^>]+>)?(((\\s)?)*(\\ )?)*((\\s)?)*)+",
4756 " < br/> < / p> <p> <html> <adfasfdasdf> </p>",
4757 true }, // --> matched
4758 { "((<[^>]+>)?(((\\s)?)*(\\ )?)*((\\s)?)*)+",
4759 " < br/> < / p> <p> <html> <adfasfdasdf> p </p>",
4760 false },
4761 // 5026912
4762 { "^\\s*" + "(\\w|\\d|[\\xC0-\\xFF]|/)+" + "\\s+|$",
4763 "156580451111112225588087755221111111566969655555555",
4764 false},
4765 // 6988218
4766 { "^([+-]?((0[xX](\\p{XDigit}+))|(((\\p{Digit}+)(\\.)?((\\p{Digit}+)?)([eE][+-]?(\\p{Digit}+))?)|(\\.((\\p{Digit}+))([eE][+-]?(\\p{Digit}+))?)))|[n|N]?'([^']*(?:'')*[^']*)*')",
4767 "'%)) order by ANGEBOT.ID",
4768 false}, // find
4769 // 6693451
4770 { "^(\\s*foo\\s*)*$",
4771 "foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo",
4772 true },
4773 { "^(\\s*foo\\s*)*$",
4774 "foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo fo",
4775 false
4776 },
4777 // 7006761
4778 { "(([0-9A-Z]+)([_]?+)*)*", "FOOOOO_BAAAR_FOOOOOOOOO_BA_", true},
4779 { "(([0-9A-Z]+)([_]?+)*)*", "FOOOOO_BAAAR_FOOOOOOOOO_BA_ ", false},
4780 // 8140212
4781 { "(?<before>.*)\\{(?<reflection>\\w+):(?<innerMethod>\\w+(\\.?\\w+(\\(((?<args>(('[^']*')|((/|\\w)+))(,(('[^']*')|((/|\\w)+)))*))?\\))?)*)\\}(?<after>.*)",
4782 "{CeGlobal:getSodCutoff.getGui.getAmqp.getSimpleModeEnabled()",
4783 false
4784 },
4785 { "^(a+)+$", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", true},
4786 { "^(a+)+$", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa!", false},
4787
4788 { "(x+)*y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy", true },
4789 { "(x+)*y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz", false},
4790
4791 { "(x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy", true},
4792 { "(x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz", false},
4793
4794 { "(([0-9A-Z]+)([_]?+)*)*", "--------------------------------------", false},
4795
4796 /* not fixed
4797 //8132141 ---> second level exponential backtracking
4798 { "(h|h|ih(((i|a|c|c|a|i|i|j|b|a|i|b|a|a|j))+h)ahbfhba|c|i)*",
4799 "hchcchicihcchciiicichhcichcihcchiihichiciiiihhcchicchhcihchcihiihciichhccciccichcichiihcchcihhicchcciicchcccihiiihhihihihichicihhcciccchihhhcchichchciihiicihciihcccciciccicciiiiiiiiicihhhiiiihchccchchhhhiiihchihcccchhhiiiiiiiicicichicihcciciihichhhhchihciiihhiccccccciciihhichiccchhicchicihihccichicciihcichccihhiciccccccccichhhhihihhcchchihihiihhihihihicichihiiiihhhhihhhchhichiicihhiiiiihchccccchichci" },
4800 */
4801 };
4802
4803 for (Object[] pm : patternMatchers) {
4804 String p = (String)pm[0];
4805 String s = (String)pm[1];
4806 boolean r = (Boolean)pm[2];
4807 if (r != Pattern.compile(p).matcher(s).matches()) {
4808 failCount++;
4809 }
4810 }
4811 }
4812 }
|