test/java/util/regex/RegExTest.java
Print this page
@@ -30,21 +30,23 @@
* 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
* 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
* 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
* 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066
* 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590
- * 8027645 8035076 8039124 8035975 8074678 6854417 8143854
+ * 8027645 8035076 8039124 8035975 8074678 6854417 8143854 8147531 7071819
* @library /lib/testlibrary
* @build jdk.testlibrary.*
* @run main RegExTest
* @key randomness
*/
import java.util.function.Function;
import java.util.regex.*;
import java.util.Random;
+import java.util.Scanner;
import java.io.*;
+import java.nio.file.*;
import java.util.*;
import java.nio.CharBuffer;
import java.util.function.Predicate;
import jdk.testlibrary.RandomFactory;
@@ -149,17 +151,19 @@
namedGroupCaptureTest();
nonBmpClassComplementTest();
unicodePropertiesTest();
unicodeHexNotationTest();
unicodeClassesTest();
+ unicodeCharacterNameTest();
horizontalAndVerticalWSTest();
linebreakTest();
branchTest();
groupCurlyNotFoundSuppTest();
groupCurlyBackoffTest();
patternAsPredicate();
invalidFlags();
+ grapheme();
if (failure) {
throw new
RuntimeException("RegExTest failed, 1st failure: " +
firstFailure);
@@ -4370,10 +4374,69 @@
if (!bwbEU.reset("\u0724\u0739\u0724").matches())
failCount++;
report("unicodePredefinedClasses");
}
+ private static void unicodeCharacterNameTest() throws Exception {
+
+ for (int cp = 0; cp < Character.MAX_CODE_POINT; cp++) {
+ if (!Character.isValidCodePoint(cp) ||
+ Character.getType(cp) == Character.UNASSIGNED)
+ continue;
+ String str = new String(Character.toChars(cp));
+ // single
+ String p = "\\N{" + Character.getName(cp) + "}";
+ if (!Pattern.compile(p).matcher(str).matches()) {
+ failCount++;
+ }
+ // class[c]
+ p = "[\\N{" + Character.getName(cp) + "}]";
+ if (!Pattern.compile(p).matcher(str).matches()) {
+ failCount++;
+ }
+ }
+
+ // range
+ for (int i = 0; i < 10; i++) {
+ int start = generator.nextInt(20);
+ int end = start + generator.nextInt(200);
+ String p = "[\\N{" + Character.getName(start) + "}-\\N{" + Character.getName(end) + "}]";
+ String str;
+ for (int cp = start; cp < end; cp++) {
+ str = new String(Character.toChars(cp));
+ if (!Pattern.compile(p).matcher(str).matches()) {
+ failCount++;
+ }
+ }
+ str = new String(Character.toChars(end + 10));
+ if (Pattern.compile(p).matcher(str).matches()) {
+ failCount++;
+ }
+ }
+
+ // slice
+ for (int i = 0; i < 10; i++) {
+ int n = generator.nextInt(256);
+ int[] buf = new int[n];
+ StringBuffer sb = new StringBuffer(1024);
+ for (int j = 0; j < n; j++) {
+ int cp = generator.nextInt(1000);
+ if (!Character.isValidCodePoint(cp) ||
+ Character.getType(cp) == Character.UNASSIGNED)
+ cp = 0x4e00; // just use 4e00
+ sb.append("\\N{" + Character.getName(cp) + "}");
+ buf[j] = cp;
+ }
+ String p = sb.toString();
+ String str = new String(buf, 0, buf.length);
+ if (!Pattern.compile(p).matcher(str).matches()) {
+ failCount++;
+ }
+ }
+ report("unicodeCharacterName");
+ }
+
private static void horizontalAndVerticalWSTest() throws Exception {
String hws = new String (new char[] {
0x09, 0x20, 0xa0, 0x1680, 0x180e,
0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005,
0x2006, 0x2007, 0x2008, 0x2009, 0x200a,
@@ -4543,6 +4606,60 @@
}
}
}
report("Invalid compile flags");
}
+
+ private static void grapheme() throws Exception {
+ Files.lines(Paths.get(System.getProperty("test.src", "."),
+ "GraphemeBreakTest.txt"))
+ .filter( ln -> ln.length() != 0 && !ln.startsWith("#") )
+ .forEach( ln -> {
+ ln = ln.replaceAll("\\s+|\\([a-zA-Z]+\\)|\\[[a-zA-Z]]+\\]|#.*", "");
+ // System.out.println(str);
+ String[] strs = ln.split("\u00f7|\u00d7");
+ StringBuilder src = new StringBuilder();
+ ArrayList<String> graphemes = new ArrayList<>();
+ StringBuilder buf = new StringBuilder();
+ int offBk = 0;
+ for (String str : strs) {
+ if (str.length() == 0) // first empty str
+ continue;
+ int cp = Integer.parseInt(str, 16);
+ src.appendCodePoint(cp);
+ buf.appendCodePoint(cp);
+ offBk += (str.length() + 1);
+ if (ln.charAt(offBk) == '\u00f7') { // DIV
+ graphemes.add(buf.toString());
+ buf = new StringBuilder();
+ }
+ }
+ Pattern p = Pattern.compile("\\X");
+ Matcher m = p.matcher(src.toString());
+ Scanner s = new Scanner(src.toString()).useDelimiter("\\b{g}");
+ for (String g : graphemes) {
+ // System.out.printf(" grapheme:=[%s]%n", g);
+ // (1) test \\X directly
+ if (!m.find() || !m.group().equals(g)) {
+ System.out.println("Failed \\X [" + ln + "] : " + g);
+ failCount++;
+ }
+ // (2) test \\b{g} + \\X via Scanner
+ boolean hasNext = s.hasNext(p);
+ // if (!s.hasNext() || !s.next().equals(next)) {
+ if (!s.hasNext(p) || !s.next(p).equals(g)) {
+ System.out.println("Failed b{g} [" + ln + "] : " + g);
+ failCount++;
+ }
+ }
+ });
+ // some sanity checks
+ if (!Pattern.compile("\\X{10}").matcher("abcdefghij").matches() ||
+ !Pattern.compile("\\b{g}(?:\\X\\b{g}){5}\\b{g}").matcher("abcde").matches() ||
+ !Pattern.compile("(?:\\X\\b{g}){2}").matcher("\ud800\udc00\ud801\udc02").matches())
+ failCount++;
+ // make sure "\b{n}" still works
+ if (!Pattern.compile("\\b{1}hello\\b{1} \\b{1}world\\b{1}").matcher("hello world").matches())
+ failCount++;
+ report("Unicode extended grapheme cluster");
+ }
}