16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23
24 /**
25 * @test
26 * @summary tests RegExp framework (use -Dseed=X to set PRNG seed)
27 * @author Mike McCloskey
28 * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345
29 * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962
30 * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
31 * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
32 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
33 * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066
34 * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590
35 * 8027645 8035076 8039124 8035975 8074678 6854417 8143854 8147531 7071819
36 * @library /lib/testlibrary
37 * @build jdk.testlibrary.*
38 * @run main RegExTest
39 * @key randomness
40 */
41
42 import java.util.function.Function;
43 import java.util.regex.*;
44 import java.util.Random;
45 import java.util.Scanner;
46 import java.io.*;
47 import java.nio.file.*;
48 import java.util.*;
49 import java.nio.CharBuffer;
50 import java.util.function.Predicate;
51 import jdk.testlibrary.RandomFactory;
52
53 /**
54 * This is a test class created to check the operation of
55 * the Pattern and Matcher classes.
2642 failCount++;
2643
2644 // Marks that cannot legally change order and be equivalent
2645 p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ);
2646 check(p, "testa\u0308\u0300", true);
2647 check(p, "testa\u0300\u0308", false);
2648
2649 // Marks that can legally change order and be equivalent
2650 p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ);
2651 check(p, "testa\u0308\u0323", true);
2652 check(p, "testa\u0323\u0308", true);
2653
2654 // Test all equivalences of the sequence a\u0308\u0323\u0300
2655 p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ);
2656 check(p, "testa\u0308\u0323\u0300", true);
2657 check(p, "testa\u0323\u0308\u0300", true);
2658 check(p, "testa\u0308\u0300\u0323", true);
2659 check(p, "test\u00e4\u0323\u0300", true);
2660 check(p, "test\u00e4\u0300\u0323", true);
2661
2662 /*
2663 * The following canonical equivalence tests don't work. Bug id: 4916384.
2664 *
2665 // Decomposed hangul (jamos)
2666 p = Pattern.compile("\u1100\u1161", Pattern.CANON_EQ);
2667 m = p.matcher("\u1100\u1161");
2668 if (!m.matches())
2669 failCount++;
2670
2671 m.reset("\uac00");
2672 if (!m.matches())
2673 failCount++;
2674
2675 // Composed hangul
2676 p = Pattern.compile("\uac00", Pattern.CANON_EQ);
2677 m = p.matcher("\u1100\u1161");
2678 if (!m.matches())
2679 failCount++;
2680
2681 m.reset("\uac00");
2682 if (!m.matches())
2683 failCount++;
2684
2685 // Decomposed supplementary outside char classes
2686 p = Pattern.compile("test\ud834\uddbc\ud834\udd6f", Pattern.CANON_EQ);
2687 m = p.matcher("test\ud834\uddc0");
2688 if (!m.matches())
2689 failCount++;
2690
2691 m.reset("test\ud834\uddbc\ud834\udd6f");
2692 if (!m.matches())
2693 failCount++;
2694
2695 // Composed supplementary outside char classes
2696 p = Pattern.compile("test\ud834\uddc0", Pattern.CANON_EQ);
2697 m.reset("test\ud834\uddbc\ud834\udd6f");
2698 if (!m.matches())
2699 failCount++;
2700
2701 m = p.matcher("test\ud834\uddc0");
2702 if (!m.matches())
2703 failCount++;
2704
2705 */
2706
2707 report("Canonical Equivalence");
2708 }
2709
2710 /**
2711 * A basic sanity test of Matcher.replaceAll().
2712 */
2713 private static void globalSubstitute() throws Exception {
2714 // Global substitution with a literal
2715 Pattern p = Pattern.compile("(ab)(c*)");
2716 Matcher m = p.matcher("abccczzzabcczzzabccc");
2717 if (!m.replaceAll("test").equals("testzzztestzzztest"))
2718 failCount++;
2719
2720 m.reset("zzzabccczzzabcczzzabccczzz");
2721 if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz"))
2722 failCount++;
2723
2724 // Global substitution with groups
2725 m.reset("zzzabccczzzabcczzzabccczzz");
2726 String result = m.replaceAll("$1");
3829 if (m.start() != m.start(0))
3830 failCount++;
3831 //assert(m.end() = m.end(0);
3832 if (m.start() != m.start(0))
3833 failCount++;
3834 //assert(m.group() = m.group(0);
3835 if (!m.group().equals(m.group(0)))
3836 failCount++;
3837 try {
3838 m.group(50);
3839 failCount++;
3840 } catch (IndexOutOfBoundsException ise) {}
3841
3842 return failCount;
3843 }
3844
3845 private static Pattern compileTestPattern(String patternString) {
3846 if (!patternString.startsWith("'")) {
3847 return Pattern.compile(patternString);
3848 }
3849
3850 int break1 = patternString.lastIndexOf("'");
3851 String flagString = patternString.substring(
3852 break1+1, patternString.length());
3853 patternString = patternString.substring(1, break1);
3854
3855 if (flagString.equals("i"))
3856 return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE);
3857
3858 if (flagString.equals("m"))
3859 return Pattern.compile(patternString, Pattern.MULTILINE);
3860
3861 return Pattern.compile(patternString);
3862 }
3863
3864 /**
3865 * Reads a line from the input file. Keeps reading lines until a non
3866 * empty non comment line is read. If the line contains a \n then
3867 * these two characters are replaced by a newline char. If a \\uxxxx
3868 * sequence is read then the sequence is replaced by the unicode char.
3869 */
4075 "${dog}",
4076 "zzzDogzzzDogzzz");
4077
4078 // backref in Matcher & String
4079 if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") ||
4080 !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh"))
4081 failCount++;
4082
4083 // negative
4084 checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)");
4085 checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)");
4086 checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)");
4087 checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>");
4088 checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>");
4089 checkExpectedIAE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"),
4090 "gnameX");
4091 checkExpectedNPE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"));
4092 report("NamedGroupCapture");
4093 }
4094
4095 // This is for bug 6969132
4096 private static void nonBmpClassComplementTest() throws Exception {
4097 Pattern p = Pattern.compile("\\P{Lu}");
4098 Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
4099 if (m.find() && m.start() == 1)
4100 failCount++;
4101
4102 // from a unicode category
4103 p = Pattern.compile("\\P{Lu}");
4104 m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
4105 if (m.find())
4106 failCount++;
4107 if (!m.hitEnd())
4108 failCount++;
4109
4110 // block
4111 p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}");
4112 m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
4113 if (m.find() && m.start() == 1)
4114 failCount++;
4115
4116 report("NonBmpClassComplement");
4117 }
4118
4119 private static void unicodePropertiesTest() throws Exception {
4120 // different forms
4121 if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() ||
4122 !Pattern.compile("\\p{Lu}").matcher("A").matches() ||
4123 !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() ||
4124 !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() ||
4125 !Pattern.compile("\\p{IsLatin}").matcher("B").matches() ||
4126 !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() ||
4127 !Pattern.compile("\\p{script=Latin}").matcher("B").matches() ||
4128 !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() ||
4129 !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() ||
4130 !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches())
4131 failCount++;
4132
4133 Matcher common = Pattern.compile("\\p{script=Common}").matcher("");
4134 Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher("");
4135 Matcher lastSM = common;
|
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23
24 /**
25 * @test
26 * @summary tests RegExp framework (use -Dseed=X to set PRNG seed)
27 * @author Mike McCloskey
28 * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345
29 * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962
30 * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
31 * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
32 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
33 * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066
34 * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590
35 * 8027645 8035076 8039124 8035975 8074678 6854417 8143854 8147531 7071819
36 * 8151481 4867170 7080302 6728861 6995635 6736245 4916384
37 * @library /lib/testlibrary
38 * @build jdk.testlibrary.*
39 * @run main RegExTest
40 * @key randomness
41 */
42
43 import java.util.function.Function;
44 import java.util.regex.*;
45 import java.util.Random;
46 import java.util.Scanner;
47 import java.io.*;
48 import java.nio.file.*;
49 import java.util.*;
50 import java.nio.CharBuffer;
51 import java.util.function.Predicate;
52 import jdk.testlibrary.RandomFactory;
53
54 /**
55 * This is a test class created to check the operation of
56 * the Pattern and Matcher classes.
2643 failCount++;
2644
2645 // Marks that cannot legally change order and be equivalent
2646 p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ);
2647 check(p, "testa\u0308\u0300", true);
2648 check(p, "testa\u0300\u0308", false);
2649
2650 // Marks that can legally change order and be equivalent
2651 p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ);
2652 check(p, "testa\u0308\u0323", true);
2653 check(p, "testa\u0323\u0308", true);
2654
2655 // Test all equivalences of the sequence a\u0308\u0323\u0300
2656 p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ);
2657 check(p, "testa\u0308\u0323\u0300", true);
2658 check(p, "testa\u0323\u0308\u0300", true);
2659 check(p, "testa\u0308\u0300\u0323", true);
2660 check(p, "test\u00e4\u0323\u0300", true);
2661 check(p, "test\u00e4\u0300\u0323", true);
2662
2663 Object[][] data = new Object[][] {
2664
2665 // JDK-4867170
2666 { "[\u1f80-\u1f82]", "ab\u1f80cd", "f", true },
2667 { "[\u1f80-\u1f82]", "ab\u1f81cd", "f", true },
2668 { "[\u1f80-\u1f82]", "ab\u1f82cd", "f", true },
2669 { "[\u1f80-\u1f82]", "ab\u03b1\u0314\u0345cd", "f", true },
2670 { "[\u1f80-\u1f82]", "ab\u03b1\u0345\u0314cd", "f", true },
2671 { "[\u1f80-\u1f82]", "ab\u1f01\u0345cd", "f", true },
2672 { "[\u1f80-\u1f82]", "ab\u1f00\u0345cd", "f", true },
2673
2674 { "\\p{IsGreek}", "ab\u1f80cd", "f", true },
2675 { "\\p{IsGreek}", "ab\u1f81cd", "f", true },
2676 { "\\p{IsGreek}", "ab\u1f82cd", "f", true },
2677 { "\\p{IsGreek}", "ab\u03b1\u0314\u0345cd", "f", true },
2678 { "\\p{IsGreek}", "ab\u1f01\u0345cd", "f", true },
2679
2680 // backtracking, force to match "\u1f80", instead of \u1f82"
2681 { "ab\\p{IsGreek}\u0300cd", "ab\u03b1\u0313\u0345\u0300cd", "m", true },
2682
2683 { "[\\p{IsGreek}]", "\u03b1\u0314\u0345", "m", true },
2684 { "\\p{IsGreek}", "\u03b1\u0314\u0345", "m", true },
2685
2686 { "[^\u1f80-\u1f82]","\u1f81", "m", false },
2687 { "[^\u1f80-\u1f82]","\u03b1\u0314\u0345", "m", false },
2688 { "[^\u1f01\u0345]", "\u1f81", "f", false },
2689
2690 { "[^\u1f81]+", "\u1f80\u1f82", "f", true },
2691 { "[\u1f80]", "ab\u1f80cd", "f", true },
2692 { "\u1f80", "ab\u1f80cd", "f", true },
2693 { "\u1f00\u0345\u0300", "\u1f82", "m", true },
2694 { "\u1f80", "-\u1f00\u0345\u0300-", "f", true },
2695 { "\u1f82", "\u1f00\u0345\u0300", "m", true },
2696 { "\u1f82", "\u1f80\u0300", "m", true },
2697
2698 // JDK-7080302 # compile failed
2699 { "a(\u0041\u0301\u0328)", "a\u0041\u0301\u0328", "m", true},
2700
2701 // JDK-6728861, same cause as above one
2702 // Pattern pt = Pattern.compile("één", Pattern.CANON_EQ);
2703 { "\u00e9\u00e9n", "e\u0301e\u0301n", "m", true},
2704
2705 // JDK-6995635
2706 // Pattern patternThatIsGonnaBug=Pattern.compile("(ë)",Pattern.CANON_EQ);
2707 { "(\u00e9)", "e\u0301", "m", true },
2708
2709 // JDK-6736245
2710 // intereting special case, nfc(u2add+u0338) -> u2add+u0338) NOT u2adc
2711 { "\u2ADC", "\u2ADC", "m", true}, // NFC
2712 { "\u2ADC", "\u2ADD\u0338", "m", true}, // NFD
2713
2714 // 4916384.
2715 // Decomposed hangul (jamos) works inside clazz
2716 { "[\u1100\u1161]", "\u1100\u1161", "m", true},
2717 { "[\u1100\u1161]", "\uac00", "m", true},
2718
2719 { "[\uac00]", "\u1100\u1161", "m", true},
2720 { "[\uac00]", "\uac00", "m", true},
2721
2722 // Decomposed hangul (jamos)
2723 { "\u1100\u1161", "\u1100\u1161", "m", true},
2724 { "\u1100\u1161", "\uac00", "m", true},
2725
2726 // Composed hangul
2727 { "\uac00", "\u1100\u1161", "m", true },
2728 { "\uac00", "\uac00", "m", true },
2729
2730 /* Need a NFDSlice to nfd the source to solve this issue
2731 u+1d1c0 -> nfd: <u+1d1ba><u+1d165><u+1d16f> -> nfc: <u+1d1ba><u+1d165><u+1d16f>
2732 u+1d1bc -> nfd: <u+1d1ba><u+1d165> -> nfc: <u+1d1ba><u+1d165>
2733 <u+1d1bc><u+1d16f> -> nfd: <u+1d1ba><u+1d165><u+1d16f> -> nfc: <u+1d1ba><u+1d165><u+1d16f>
2734
2735 // Decomposed supplementary outside char classes
2736 // { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0", "m", true },
2737 // Composed supplementary outside char classes
2738 // { "test\ud834\uddc0", "test\ud834\uddbc\ud834\udd6f", "m", true },
2739 */
2740 { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddbc\ud834\udd6f", "m", true },
2741 { "test\ud834\uddc0", "test\ud834\uddbc\ud834\udd6f", "m", true },
2742
2743 { "test\ud834\uddc0", "test\ud834\uddc0", "m", true },
2744 { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0", "m", true },
2745 };
2746
2747 int failCount = 0;
2748 for (Object[] d : data) {
2749 String pn = (String)d[0];
2750 String tt = (String)d[1];
2751 boolean isFind = "f".equals(((String)d[2]));
2752 boolean expected = (boolean)d[3];
2753 boolean ret = isFind ? Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).find()
2754 : Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).matches();
2755 if (ret != expected) {
2756 failCount++;
2757 continue;
2758 }
2759 }
2760 report("Canonical Equivalence");
2761 }
2762
2763 /**
2764 * A basic sanity test of Matcher.replaceAll().
2765 */
2766 private static void globalSubstitute() throws Exception {
2767 // Global substitution with a literal
2768 Pattern p = Pattern.compile("(ab)(c*)");
2769 Matcher m = p.matcher("abccczzzabcczzzabccc");
2770 if (!m.replaceAll("test").equals("testzzztestzzztest"))
2771 failCount++;
2772
2773 m.reset("zzzabccczzzabcczzzabccczzz");
2774 if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz"))
2775 failCount++;
2776
2777 // Global substitution with groups
2778 m.reset("zzzabccczzzabcczzzabccczzz");
2779 String result = m.replaceAll("$1");
3882 if (m.start() != m.start(0))
3883 failCount++;
3884 //assert(m.end() = m.end(0);
3885 if (m.start() != m.start(0))
3886 failCount++;
3887 //assert(m.group() = m.group(0);
3888 if (!m.group().equals(m.group(0)))
3889 failCount++;
3890 try {
3891 m.group(50);
3892 failCount++;
3893 } catch (IndexOutOfBoundsException ise) {}
3894
3895 return failCount;
3896 }
3897
3898 private static Pattern compileTestPattern(String patternString) {
3899 if (!patternString.startsWith("'")) {
3900 return Pattern.compile(patternString);
3901 }
3902 int break1 = patternString.lastIndexOf("'");
3903 String flagString = patternString.substring(
3904 break1+1, patternString.length());
3905 patternString = patternString.substring(1, break1);
3906
3907 if (flagString.equals("i"))
3908 return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE);
3909
3910 if (flagString.equals("m"))
3911 return Pattern.compile(patternString, Pattern.MULTILINE);
3912
3913 return Pattern.compile(patternString);
3914 }
3915
3916 /**
3917 * Reads a line from the input file. Keeps reading lines until a non
3918 * empty non comment line is read. If the line contains a \n then
3919 * these two characters are replaced by a newline char. If a \\uxxxx
3920 * sequence is read then the sequence is replaced by the unicode char.
3921 */
4127 "${dog}",
4128 "zzzDogzzzDogzzz");
4129
4130 // backref in Matcher & String
4131 if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") ||
4132 !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh"))
4133 failCount++;
4134
4135 // negative
4136 checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)");
4137 checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)");
4138 checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)");
4139 checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>");
4140 checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>");
4141 checkExpectedIAE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"),
4142 "gnameX");
4143 checkExpectedNPE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"));
4144 report("NamedGroupCapture");
4145 }
4146
4147 // This is for bug 6919132
4148 private static void nonBmpClassComplementTest() throws Exception {
4149 Pattern p = Pattern.compile("\\P{Lu}");
4150 Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
4151
4152 if (m.find() && m.start() == 1)
4153 failCount++;
4154
4155 // from a unicode category
4156 p = Pattern.compile("\\P{Lu}");
4157 m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
4158 if (m.find())
4159 failCount++;
4160 if (!m.hitEnd())
4161 failCount++;
4162
4163 // block
4164 p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}");
4165 m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
4166 if (m.find() && m.start() == 1)
4167 failCount++;
4168
4169 p = Pattern.compile("\\P{sc=GRANTHA}");
4170 m = p.matcher(new String(new int[] {0x11350}, 0, 1));
4171 if (m.find() && m.start() == 1)
4172 failCount++;
4173
4174 report("NonBmpClassComplement");
4175 }
4176
4177 private static void unicodePropertiesTest() throws Exception {
4178 // different forms
4179 if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() ||
4180 !Pattern.compile("\\p{Lu}").matcher("A").matches() ||
4181 !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() ||
4182 !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() ||
4183 !Pattern.compile("\\p{IsLatin}").matcher("B").matches() ||
4184 !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() ||
4185 !Pattern.compile("\\p{script=Latin}").matcher("B").matches() ||
4186 !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() ||
4187 !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() ||
4188 !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches())
4189 failCount++;
4190
4191 Matcher common = Pattern.compile("\\p{script=Common}").matcher("");
4192 Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher("");
4193 Matcher lastSM = common;
|