15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26 /**
27 * @test
28 * @summary tests RegExp framework
29 * @author Mike McCloskey
30 * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345
31 * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962
32 * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
33 * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
34 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
35 * 6350801 6676425 6878475 6919132 6931676 6948903 7014645
36 */
37
38 import java.util.regex.*;
39 import java.util.Random;
40 import java.io.*;
41 import java.util.*;
42 import java.nio.CharBuffer;
43
44 /**
45 * This is a test class created to check the operation of
46 * the Pattern and Matcher classes.
47 */
48 public class RegExTest {
49
50 private static Random generator = new Random();
51 private static boolean failure = false;
52 private static int failCount = 0;
53
54 /**
55 * Main to interpret arguments and run several tests.
120 nonCaptureRepetitionTest();
121 notCapturedGroupCurlyMatchTest();
122 escapedSegmentTest();
123 literalPatternTest();
124 literalReplacementTest();
125 regionTest();
126 toStringTest();
127 negatedCharClassTest();
128 findFromTest();
129 boundsTest();
130 unicodeWordBoundsTest();
131 caretAtEndTest();
132 wordSearchTest();
133 hitEndTest();
134 toMatchResultTest();
135 surrogatesInClassTest();
136 namedGroupCaptureTest();
137 nonBmpClassComplementTest();
138 unicodePropertiesTest();
139 unicodeHexNotationTest();
140 if (failure)
141 throw new RuntimeException("Failure in the RE handling.");
142 else
143 System.err.println("OKAY: All tests passed.");
144 }
145
146 // Utility functions
147
148 private static String getRandomAlphaString(int length) {
149 StringBuffer buf = new StringBuffer(length);
150 for (int i=0; i<length; i++) {
151 char randChar = (char)(97 + generator.nextInt(26));
152 buf.append(randChar);
153 }
154 return buf.toString();
155 }
156
157 private static void check(Matcher m, String expected) {
158 m.find();
159 if (!m.group().equals(expected))
3640 check("^[\\x{DF3C}\\x{D800}]+$", "\uDF3C\uD800", true);
3641
3642 for (int cp = 0; cp <= 0x10FFFF; cp++) {
3643 String s = "A" + new String(Character.toChars(cp)) + "B";
3644 String hexUTF16 = (cp <= 0xFFFF)? String.format("\\u%04x", cp)
3645 : String.format("\\u%04x\\u%04x",
3646 (int) Character.toChars(cp)[0],
3647 (int) Character.toChars(cp)[1]);
3648 String hexCodePoint = "\\x{" + Integer.toHexString(cp) + "}";
3649 if (!Pattern.matches("A" + hexUTF16 + "B", s))
3650 failCount++;
3651 if (!Pattern.matches("A[" + hexUTF16 + "]B", s))
3652 failCount++;
3653 if (!Pattern.matches("A" + hexCodePoint + "B", s))
3654 failCount++;
3655 if (!Pattern.matches("A[" + hexCodePoint + "]B", s))
3656 failCount++;
3657 }
3658 report("unicodeHexNotation");
3659 }
3660 }
|
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26 /**
27 * @test
28 * @summary tests RegExp framework
29 * @author Mike McCloskey
30 * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345
31 * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962
32 * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
33 * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
34 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
35 * 6350801 6676425 6878475 6919132 6931676 6948903 7014645 7039066
36 */
37
38 import java.util.regex.*;
39 import java.util.Random;
40 import java.io.*;
41 import java.util.*;
42 import java.nio.CharBuffer;
43
44 /**
45 * This is a test class created to check the operation of
46 * the Pattern and Matcher classes.
47 */
48 public class RegExTest {
49
50 private static Random generator = new Random();
51 private static boolean failure = false;
52 private static int failCount = 0;
53
54 /**
55 * Main to interpret arguments and run several tests.
120 nonCaptureRepetitionTest();
121 notCapturedGroupCurlyMatchTest();
122 escapedSegmentTest();
123 literalPatternTest();
124 literalReplacementTest();
125 regionTest();
126 toStringTest();
127 negatedCharClassTest();
128 findFromTest();
129 boundsTest();
130 unicodeWordBoundsTest();
131 caretAtEndTest();
132 wordSearchTest();
133 hitEndTest();
134 toMatchResultTest();
135 surrogatesInClassTest();
136 namedGroupCaptureTest();
137 nonBmpClassComplementTest();
138 unicodePropertiesTest();
139 unicodeHexNotationTest();
140 unicodeClassesTest();
141 if (failure)
142 throw new RuntimeException("Failure in the RE handling.");
143 else
144 System.err.println("OKAY: All tests passed.");
145 }
146
147 // Utility functions
148
149 private static String getRandomAlphaString(int length) {
150 StringBuffer buf = new StringBuffer(length);
151 for (int i=0; i<length; i++) {
152 char randChar = (char)(97 + generator.nextInt(26));
153 buf.append(randChar);
154 }
155 return buf.toString();
156 }
157
158 private static void check(Matcher m, String expected) {
159 m.find();
160 if (!m.group().equals(expected))
3641 check("^[\\x{DF3C}\\x{D800}]+$", "\uDF3C\uD800", true);
3642
3643 for (int cp = 0; cp <= 0x10FFFF; cp++) {
3644 String s = "A" + new String(Character.toChars(cp)) + "B";
3645 String hexUTF16 = (cp <= 0xFFFF)? String.format("\\u%04x", cp)
3646 : String.format("\\u%04x\\u%04x",
3647 (int) Character.toChars(cp)[0],
3648 (int) Character.toChars(cp)[1]);
3649 String hexCodePoint = "\\x{" + Integer.toHexString(cp) + "}";
3650 if (!Pattern.matches("A" + hexUTF16 + "B", s))
3651 failCount++;
3652 if (!Pattern.matches("A[" + hexUTF16 + "]B", s))
3653 failCount++;
3654 if (!Pattern.matches("A" + hexCodePoint + "B", s))
3655 failCount++;
3656 if (!Pattern.matches("A[" + hexCodePoint + "]B", s))
3657 failCount++;
3658 }
3659 report("unicodeHexNotation");
3660 }
3661
3662 private static void unicodeClassesTest() throws Exception {
3663
3664 Matcher lower = Pattern.compile("\\p{Lower}").matcher("");
3665 Matcher upper = Pattern.compile("\\p{Upper}").matcher("");
3666 Matcher ASCII = Pattern.compile("\\p{ASCII}").matcher("");
3667 Matcher alpha = Pattern.compile("\\p{Alpha}").matcher("");
3668 Matcher digit = Pattern.compile("\\p{Digit}").matcher("");
3669 Matcher alnum = Pattern.compile("\\p{Alnum}").matcher("");
3670 Matcher punct = Pattern.compile("\\p{Punct}").matcher("");
3671 Matcher graph = Pattern.compile("\\p{Graph}").matcher("");
3672 Matcher print = Pattern.compile("\\p{Print}").matcher("");
3673 Matcher blank = Pattern.compile("\\p{Blank}").matcher("");
3674 Matcher cntrl = Pattern.compile("\\p{Cntrl}").matcher("");
3675 Matcher xdigit = Pattern.compile("\\p{XDigit}").matcher("");
3676 Matcher space = Pattern.compile("\\p{Space}").matcher("");
3677 Matcher bound = Pattern.compile("\\b").matcher("");
3678 Matcher word = Pattern.compile("\\w++").matcher("");
3679 // UNICODE_CHARACTER_CLASS
3680 Matcher lowerU = Pattern.compile("\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3681 Matcher upperU = Pattern.compile("\\p{Upper}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3682 Matcher ASCIIU = Pattern.compile("\\p{ASCII}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3683 Matcher alphaU = Pattern.compile("\\p{Alpha}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3684 Matcher digitU = Pattern.compile("\\p{Digit}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3685 Matcher alnumU = Pattern.compile("\\p{Alnum}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3686 Matcher punctU = Pattern.compile("\\p{Punct}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3687 Matcher graphU = Pattern.compile("\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3688 Matcher printU = Pattern.compile("\\p{Print}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3689 Matcher blankU = Pattern.compile("\\p{Blank}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3690 Matcher cntrlU = Pattern.compile("\\p{Cntrl}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3691 Matcher xdigitU = Pattern.compile("\\p{XDigit}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3692 Matcher spaceU = Pattern.compile("\\p{Space}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3693 Matcher boundU = Pattern.compile("\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3694 Matcher wordU = Pattern.compile("\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3695 // embedded flag (?U)
3696 Matcher lowerEU = Pattern.compile("(?U)\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3697 Matcher graphEU = Pattern.compile("(?U)\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3698 Matcher wordEU = Pattern.compile("(?U)\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3699
3700 Matcher bwb = Pattern.compile("\\b\\w\\b").matcher("");
3701 Matcher bwbU = Pattern.compile("\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3702 Matcher bwbEU = Pattern.compile("(?U)\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3703 // properties
3704 Matcher lowerP = Pattern.compile("\\p{IsLowerCase}").matcher("");
3705 Matcher upperP = Pattern.compile("\\p{IsUpperCase}").matcher("");
3706 Matcher titleP = Pattern.compile("\\p{IsTitleCase}").matcher("");
3707 Matcher letterP = Pattern.compile("\\p{IsLetter}").matcher("");
3708 Matcher alphaP = Pattern.compile("\\p{IsAlphabetic}").matcher("");
3709 Matcher ideogP = Pattern.compile("\\p{IsIdeographic}").matcher("");
3710 Matcher cntrlP = Pattern.compile("\\p{IsControl}").matcher("");
3711 Matcher spaceP = Pattern.compile("\\p{IsWhiteSpace}").matcher("");
3712 Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher("");
3713 Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher("");
3714
3715 // javaMethod
3716 Matcher lowerJ = Pattern.compile("\\p{javaLowerCase}").matcher("");
3717 Matcher upperJ = Pattern.compile("\\p{javaUpperCase}").matcher("");
3718 Matcher alphaJ = Pattern.compile("\\p{javaAlphabetic}").matcher("");
3719 Matcher ideogJ = Pattern.compile("\\p{javaIdeographic}").matcher("");
3720
3721 for (int cp = 1; cp < 0x30000; cp++) {
3722 String str = new String(Character.toChars(cp));
3723 int type = Character.getType(cp);
3724 if (// lower
3725 POSIX_ASCII.isLower(cp) != lower.reset(str).matches() ||
3726 Character.isLowerCase(cp) != lowerU.reset(str).matches() ||
3727 Character.isLowerCase(cp) != lowerP.reset(str).matches() ||
3728 Character.isLowerCase(cp) != lowerEU.reset(str).matches()||
3729 Character.isLowerCase(cp) != lowerJ.reset(str).matches()||
3730 // upper
3731 POSIX_ASCII.isUpper(cp) != upper.reset(str).matches() ||
3732 POSIX_Unicode.isUpper(cp) != upperU.reset(str).matches() ||
3733 Character.isUpperCase(cp) != upperP.reset(str).matches() ||
3734 Character.isUpperCase(cp) != upperJ.reset(str).matches() ||
3735 // alpha
3736 POSIX_ASCII.isAlpha(cp) != alpha.reset(str).matches() ||
3737 POSIX_Unicode.isAlpha(cp) != alphaU.reset(str).matches() ||
3738 Character.isAlphabetic(cp)!= alphaP.reset(str).matches() ||
3739 Character.isAlphabetic(cp)!= alphaJ.reset(str).matches() ||
3740 // digit
3741 POSIX_ASCII.isDigit(cp) != digit.reset(str).matches() ||
3742 Character.isDigit(cp) != digitU.reset(str).matches() ||
3743 // alnum
3744 POSIX_ASCII.isAlnum(cp) != alnum.reset(str).matches() ||
3745 POSIX_Unicode.isAlnum(cp) != alnumU.reset(str).matches() ||
3746 // punct
3747 POSIX_ASCII.isPunct(cp) != punct.reset(str).matches() ||
3748 POSIX_Unicode.isPunct(cp) != punctU.reset(str).matches() ||
3749 // graph
3750 POSIX_ASCII.isGraph(cp) != graph.reset(str).matches() ||
3751 POSIX_Unicode.isGraph(cp) != graphU.reset(str).matches() ||
3752 POSIX_Unicode.isGraph(cp) != graphEU.reset(str).matches()||
3753 // blank
3754 POSIX_ASCII.isType(cp, POSIX_ASCII.BLANK)
3755 != blank.reset(str).matches() ||
3756 POSIX_Unicode.isBlank(cp) != blankU.reset(str).matches() ||
3757 // print
3758 POSIX_ASCII.isPrint(cp) != print.reset(str).matches() ||
3759 POSIX_Unicode.isPrint(cp) != printU.reset(str).matches() ||
3760 // cntrl
3761 POSIX_ASCII.isCntrl(cp) != cntrl.reset(str).matches() ||
3762 POSIX_Unicode.isCntrl(cp) != cntrlU.reset(str).matches() ||
3763 (Character.CONTROL == type) != cntrlP.reset(str).matches() ||
3764 // hexdigit
3765 POSIX_ASCII.isHexDigit(cp) != xdigit.reset(str).matches() ||
3766 POSIX_Unicode.isHexDigit(cp) != xdigitU.reset(str).matches() ||
3767 // space
3768 POSIX_ASCII.isSpace(cp) != space.reset(str).matches() ||
3769 POSIX_Unicode.isSpace(cp) != spaceU.reset(str).matches() ||
3770 POSIX_Unicode.isSpace(cp) != spaceP.reset(str).matches() ||
3771 // word
3772 POSIX_ASCII.isWord(cp) != word.reset(str).matches() ||
3773 POSIX_Unicode.isWord(cp) != wordU.reset(str).matches() ||
3774 POSIX_Unicode.isWord(cp) != wordEU.reset(str).matches()||
3775 // bwordb
3776 POSIX_ASCII.isWord(cp) != bwb.reset(str).matches() ||
3777 POSIX_Unicode.isWord(cp) != bwbU.reset(str).matches() ||
3778 // properties
3779 Character.isTitleCase(cp) != titleP.reset(str).matches() ||
3780 Character.isLetter(cp) != letterP.reset(str).matches()||
3781 Character.isIdeographic(cp) != ideogP.reset(str).matches() ||
3782 Character.isIdeographic(cp) != ideogJ.reset(str).matches() ||
3783 (Character.UNASSIGNED == type) == definedP.reset(str).matches() ||
3784 POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches())
3785 failCount++;
3786 }
3787
3788 // bounds/word align
3789 twoFindIndexes(" \u0180sherman\u0400 ", bound, 1, 10);
3790 if (!bwbU.reset("\u0180sherman\u0400").matches())
3791 failCount++;
3792 twoFindIndexes(" \u0180sh\u0345erman\u0400 ", bound, 1, 11);
3793 if (!bwbU.reset("\u0180sh\u0345erman\u0400").matches())
3794 failCount++;
3795 twoFindIndexes(" \u0724\u0739\u0724 ", bound, 1, 4);
3796 if (!bwbU.reset("\u0724\u0739\u0724").matches())
3797 failCount++;
3798 if (!bwbEU.reset("\u0724\u0739\u0724").matches())
3799 failCount++;
3800 report("unicodePredefinedClasses");
3801 }
3802 }
|