test/java/util/regex/RegExTest.java
Print this page
*** 30,40 ****
* @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345
* 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962
* 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
* 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
* 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
! * 6350801 6676425 6878475 6919132 6931676 6948903 7014645
*/
import java.util.regex.*;
import java.util.Random;
import java.io.*;
--- 30,40 ----
* @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345
* 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962
* 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
* 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
* 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
! * 6350801 6676425 6878475 6919132 6931676 6948903 7014645 7039066
*/
import java.util.regex.*;
import java.util.Random;
import java.io.*;
*** 135,144 ****
--- 135,145 ----
surrogatesInClassTest();
namedGroupCaptureTest();
nonBmpClassComplementTest();
unicodePropertiesTest();
unicodeHexNotationTest();
+ unicodeClassesTest();
if (failure)
throw new RuntimeException("Failure in the RE handling.");
else
System.err.println("OKAY: All tests passed.");
}
*** 3655,3660 ****
--- 3656,3802 ----
if (!Pattern.matches("A[" + hexCodePoint + "]B", s))
failCount++;
}
report("unicodeHexNotation");
}
+
+ private static void unicodeClassesTest() throws Exception {
+
+ Matcher lower = Pattern.compile("\\p{Lower}").matcher("");
+ Matcher upper = Pattern.compile("\\p{Upper}").matcher("");
+ Matcher ASCII = Pattern.compile("\\p{ASCII}").matcher("");
+ Matcher alpha = Pattern.compile("\\p{Alpha}").matcher("");
+ Matcher digit = Pattern.compile("\\p{Digit}").matcher("");
+ Matcher alnum = Pattern.compile("\\p{Alnum}").matcher("");
+ Matcher punct = Pattern.compile("\\p{Punct}").matcher("");
+ Matcher graph = Pattern.compile("\\p{Graph}").matcher("");
+ Matcher print = Pattern.compile("\\p{Print}").matcher("");
+ Matcher blank = Pattern.compile("\\p{Blank}").matcher("");
+ Matcher cntrl = Pattern.compile("\\p{Cntrl}").matcher("");
+ Matcher xdigit = Pattern.compile("\\p{XDigit}").matcher("");
+ Matcher space = Pattern.compile("\\p{Space}").matcher("");
+ Matcher bound = Pattern.compile("\\b").matcher("");
+ Matcher word = Pattern.compile("\\w++").matcher("");
+ // UNICODE_CHARACTER_CLASS
+ Matcher lowerU = Pattern.compile("\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
+ Matcher upperU = Pattern.compile("\\p{Upper}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
+ Matcher ASCIIU = Pattern.compile("\\p{ASCII}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
+ Matcher alphaU = Pattern.compile("\\p{Alpha}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
+ Matcher digitU = Pattern.compile("\\p{Digit}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
+ Matcher alnumU = Pattern.compile("\\p{Alnum}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
+ Matcher punctU = Pattern.compile("\\p{Punct}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
+ Matcher graphU = Pattern.compile("\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
+ Matcher printU = Pattern.compile("\\p{Print}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
+ Matcher blankU = Pattern.compile("\\p{Blank}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
+ Matcher cntrlU = Pattern.compile("\\p{Cntrl}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
+ Matcher xdigitU = Pattern.compile("\\p{XDigit}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
+ Matcher spaceU = Pattern.compile("\\p{Space}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
+ Matcher boundU = Pattern.compile("\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
+ Matcher wordU = Pattern.compile("\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
+ // embedded flag (?U)
+ Matcher lowerEU = Pattern.compile("(?U)\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
+ Matcher graphEU = Pattern.compile("(?U)\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
+ Matcher wordEU = Pattern.compile("(?U)\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
+
+ Matcher bwb = Pattern.compile("\\b\\w\\b").matcher("");
+ Matcher bwbU = Pattern.compile("\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
+ Matcher bwbEU = Pattern.compile("(?U)\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
+ // properties
+ Matcher lowerP = Pattern.compile("\\p{IsLowerCase}").matcher("");
+ Matcher upperP = Pattern.compile("\\p{IsUpperCase}").matcher("");
+ Matcher titleP = Pattern.compile("\\p{IsTitleCase}").matcher("");
+ Matcher letterP = Pattern.compile("\\p{IsLetter}").matcher("");
+ Matcher alphaP = Pattern.compile("\\p{IsAlphabetic}").matcher("");
+ Matcher ideogP = Pattern.compile("\\p{IsIdeographic}").matcher("");
+ Matcher cntrlP = Pattern.compile("\\p{IsControl}").matcher("");
+ Matcher spaceP = Pattern.compile("\\p{IsWhiteSpace}").matcher("");
+ Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher("");
+ Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher("");
+
+ // javaMethod
+ Matcher lowerJ = Pattern.compile("\\p{javaLowerCase}").matcher("");
+ Matcher upperJ = Pattern.compile("\\p{javaUpperCase}").matcher("");
+ Matcher alphaJ = Pattern.compile("\\p{javaAlphabetic}").matcher("");
+ Matcher ideogJ = Pattern.compile("\\p{javaIdeographic}").matcher("");
+
+ for (int cp = 1; cp < 0x30000; cp++) {
+ String str = new String(Character.toChars(cp));
+ int type = Character.getType(cp);
+ if (// lower
+ POSIX_ASCII.isLower(cp) != lower.reset(str).matches() ||
+ Character.isLowerCase(cp) != lowerU.reset(str).matches() ||
+ Character.isLowerCase(cp) != lowerP.reset(str).matches() ||
+ Character.isLowerCase(cp) != lowerEU.reset(str).matches()||
+ Character.isLowerCase(cp) != lowerJ.reset(str).matches()||
+ // upper
+ POSIX_ASCII.isUpper(cp) != upper.reset(str).matches() ||
+ POSIX_Unicode.isUpper(cp) != upperU.reset(str).matches() ||
+ Character.isUpperCase(cp) != upperP.reset(str).matches() ||
+ Character.isUpperCase(cp) != upperJ.reset(str).matches() ||
+ // alpha
+ POSIX_ASCII.isAlpha(cp) != alpha.reset(str).matches() ||
+ POSIX_Unicode.isAlpha(cp) != alphaU.reset(str).matches() ||
+ Character.isAlphabetic(cp)!= alphaP.reset(str).matches() ||
+ Character.isAlphabetic(cp)!= alphaJ.reset(str).matches() ||
+ // digit
+ POSIX_ASCII.isDigit(cp) != digit.reset(str).matches() ||
+ Character.isDigit(cp) != digitU.reset(str).matches() ||
+ // alnum
+ POSIX_ASCII.isAlnum(cp) != alnum.reset(str).matches() ||
+ POSIX_Unicode.isAlnum(cp) != alnumU.reset(str).matches() ||
+ // punct
+ POSIX_ASCII.isPunct(cp) != punct.reset(str).matches() ||
+ POSIX_Unicode.isPunct(cp) != punctU.reset(str).matches() ||
+ // graph
+ POSIX_ASCII.isGraph(cp) != graph.reset(str).matches() ||
+ POSIX_Unicode.isGraph(cp) != graphU.reset(str).matches() ||
+ POSIX_Unicode.isGraph(cp) != graphEU.reset(str).matches()||
+ // blank
+ POSIX_ASCII.isType(cp, POSIX_ASCII.BLANK)
+ != blank.reset(str).matches() ||
+ POSIX_Unicode.isBlank(cp) != blankU.reset(str).matches() ||
+ // print
+ POSIX_ASCII.isPrint(cp) != print.reset(str).matches() ||
+ POSIX_Unicode.isPrint(cp) != printU.reset(str).matches() ||
+ // cntrl
+ POSIX_ASCII.isCntrl(cp) != cntrl.reset(str).matches() ||
+ POSIX_Unicode.isCntrl(cp) != cntrlU.reset(str).matches() ||
+ (Character.CONTROL == type) != cntrlP.reset(str).matches() ||
+ // hexdigit
+ POSIX_ASCII.isHexDigit(cp) != xdigit.reset(str).matches() ||
+ POSIX_Unicode.isHexDigit(cp) != xdigitU.reset(str).matches() ||
+ // space
+ POSIX_ASCII.isSpace(cp) != space.reset(str).matches() ||
+ POSIX_Unicode.isSpace(cp) != spaceU.reset(str).matches() ||
+ POSIX_Unicode.isSpace(cp) != spaceP.reset(str).matches() ||
+ // word
+ POSIX_ASCII.isWord(cp) != word.reset(str).matches() ||
+ POSIX_Unicode.isWord(cp) != wordU.reset(str).matches() ||
+ POSIX_Unicode.isWord(cp) != wordEU.reset(str).matches()||
+ // bwordb
+ POSIX_ASCII.isWord(cp) != bwb.reset(str).matches() ||
+ POSIX_Unicode.isWord(cp) != bwbU.reset(str).matches() ||
+ // properties
+ Character.isTitleCase(cp) != titleP.reset(str).matches() ||
+ Character.isLetter(cp) != letterP.reset(str).matches()||
+ Character.isIdeographic(cp) != ideogP.reset(str).matches() ||
+ Character.isIdeographic(cp) != ideogJ.reset(str).matches() ||
+ (Character.UNASSIGNED == type) == definedP.reset(str).matches() ||
+ POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches())
+ failCount++;
+ }
+
+ // bounds/word align
+ twoFindIndexes(" \u0180sherman\u0400 ", bound, 1, 10);
+ if (!bwbU.reset("\u0180sherman\u0400").matches())
+ failCount++;
+ twoFindIndexes(" \u0180sh\u0345erman\u0400 ", bound, 1, 11);
+ if (!bwbU.reset("\u0180sh\u0345erman\u0400").matches())
+ failCount++;
+ twoFindIndexes(" \u0724\u0739\u0724 ", bound, 1, 4);
+ if (!bwbU.reset("\u0724\u0739\u0724").matches())
+ failCount++;
+ if (!bwbEU.reset("\u0724\u0739\u0724").matches())
+ failCount++;
+ report("unicodePredefinedClasses");
+ }
}