test/java/util/regex/RegExTest.java

Print this page

        

@@ -31,10 +31,11 @@
  * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
  * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
  * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066
  * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590
  * 8027645 8035076 8039124 8035975 8074678 6854417 8143854 8147531 7071819
+ * 8151481 4867170 7080302 6728861 6995635 6736245 4916384
  * @library /lib/testlibrary
  * @build jdk.testlibrary.*
  * @run main RegExTest
  * @key randomness
  */

@@ -2657,55 +2658,107 @@
         check(p, "testa\u0323\u0308\u0300", true);
         check(p, "testa\u0308\u0300\u0323", true);
         check(p, "test\u00e4\u0323\u0300", true);
         check(p, "test\u00e4\u0300\u0323", true);
 
-        /*
-         * The following canonical equivalence tests don't work. Bug id: 4916384.
-         *
-        // Decomposed hangul (jamos)
-        p = Pattern.compile("\u1100\u1161", Pattern.CANON_EQ);
-        m = p.matcher("\u1100\u1161");
-        if (!m.matches())
-            failCount++;
+        Object[][] data = new Object[][] {
 
-        m.reset("\uac00");
-        if (!m.matches())
-            failCount++;
+        // JDK-4867170
+        { "[\u1f80-\u1f82]", "ab\u1f80cd",             "f", true },
+        { "[\u1f80-\u1f82]", "ab\u1f81cd",             "f", true },
+        { "[\u1f80-\u1f82]", "ab\u1f82cd",             "f", true },
+        { "[\u1f80-\u1f82]", "ab\u03b1\u0314\u0345cd", "f", true },
+        { "[\u1f80-\u1f82]", "ab\u03b1\u0345\u0314cd", "f", true },
+        { "[\u1f80-\u1f82]", "ab\u1f01\u0345cd",       "f", true },
+        { "[\u1f80-\u1f82]", "ab\u1f00\u0345cd",       "f", true },
+
+        { "\\p{IsGreek}",    "ab\u1f80cd",             "f", true },
+        { "\\p{IsGreek}",    "ab\u1f81cd",             "f", true },
+        { "\\p{IsGreek}",    "ab\u1f82cd",             "f", true },
+        { "\\p{IsGreek}",    "ab\u03b1\u0314\u0345cd", "f", true },
+        { "\\p{IsGreek}",    "ab\u1f01\u0345cd",       "f", true },
+
+        // backtracking, force to match "\u1f80", instead of \u1f82"
+        { "ab\\p{IsGreek}\u0300cd", "ab\u03b1\u0313\u0345\u0300cd", "m", true },
+
+        { "[\\p{IsGreek}]",  "\u03b1\u0314\u0345",     "m", true },
+        { "\\p{IsGreek}",    "\u03b1\u0314\u0345",     "m", true },
+ 
+        { "[^\u1f80-\u1f82]","\u1f81",                 "m", false },
+        { "[^\u1f80-\u1f82]","\u03b1\u0314\u0345",     "m", false },
+        { "[^\u1f01\u0345]", "\u1f81",                 "f", false },
+ 
+        { "[^\u1f81]+",      "\u1f80\u1f82",           "f", true },
+        { "[\u1f80]",        "ab\u1f80cd",             "f", true },
+        { "\u1f80",          "ab\u1f80cd",             "f", true },
+        { "\u1f00\u0345\u0300",  "\u1f82", "m", true },
+        { "\u1f80",          "-\u1f00\u0345\u0300-",   "f", true },
+        { "\u1f82",          "\u1f00\u0345\u0300",     "m", true },
+        { "\u1f82",          "\u1f80\u0300",           "m", true },
+ 
+        // JDK-7080302       # compile failed
+        { "a(\u0041\u0301\u0328)", "a\u0041\u0301\u0328", "m", true},
+
+        // JDK-6728861, same cause as above one
+        // Pattern pt = Pattern.compile("één", Pattern.CANON_EQ);
+        { "\u00e9\u00e9n", "e\u0301e\u0301n", "m", true},
+
+        // JDK-6995635
+        // Pattern patternThatIsGonnaBug=Pattern.compile("(ë)",Pattern.CANON_EQ);
+        { "(\u00e9)", "e\u0301", "m", true },
+
+        // JDK-6736245
+        // intereting special case, nfc(u2add+u0338) -> u2add+u0338) NOT u2adc
+        { "\u2ADC", "\u2ADC", "m", true},          // NFC
+        { "\u2ADC", "\u2ADD\u0338", "m", true},    // NFD 
+
+        //  4916384.
+        // Decomposed hangul (jamos) works inside clazz
+        { "[\u1100\u1161]", "\u1100\u1161", "m", true},
+        { "[\u1100\u1161]", "\uac00", "m", true},
 
-        // Composed hangul
-        p = Pattern.compile("\uac00", Pattern.CANON_EQ);
-        m = p.matcher("\u1100\u1161");
-        if (!m.matches())
-            failCount++;
+        { "[\uac00]", "\u1100\u1161", "m", true},
+        { "[\uac00]", "\uac00", "m", true},
 
-        m.reset("\uac00");
-        if (!m.matches())
-            failCount++;
+        // Decomposed hangul (jamos)
+        { "\u1100\u1161", "\u1100\u1161", "m", true},
+        { "\u1100\u1161", "\uac00", "m", true},
 
-        // Decomposed supplementary outside char classes
-        p = Pattern.compile("test\ud834\uddbc\ud834\udd6f", Pattern.CANON_EQ);
-        m = p.matcher("test\ud834\uddc0");
-        if (!m.matches())
-            failCount++;
+        // Composed hangul
+        { "\uac00",  "\u1100\u1161", "m", true },
+        { "\uac00",  "\uac00", "m", true },
 
-        m.reset("test\ud834\uddbc\ud834\udd6f");
-        if (!m.matches())
-            failCount++;
+        /* Need a NFDSlice to nfd the source to solve this issue
+           u+1d1c0 -> nfd: <u+1d1ba><u+1d165><u+1d16f>  -> nfc: <u+1d1ba><u+1d165><u+1d16f>
+           u+1d1bc -> nfd: <u+1d1ba><u+1d165>           -> nfc: <u+1d1ba><u+1d165>
+           <u+1d1bc><u+1d16f> -> nfd: <u+1d1ba><u+1d165><u+1d16f> -> nfc: <u+1d1ba><u+1d165><u+1d16f>
 
+        // Decomposed supplementary outside char classes
+        // { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0", "m", true },
         // Composed supplementary outside char classes
-        p = Pattern.compile("test\ud834\uddc0", Pattern.CANON_EQ);
-        m.reset("test\ud834\uddbc\ud834\udd6f");
-        if (!m.matches())
-            failCount++;
-
-        m = p.matcher("test\ud834\uddc0");
-        if (!m.matches())
-            failCount++;
-
+        // { "test\ud834\uddc0", "test\ud834\uddbc\ud834\udd6f", "m", true },
         */
+        { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddbc\ud834\udd6f", "m", true },
+        { "test\ud834\uddc0",             "test\ud834\uddbc\ud834\udd6f", "m", true },
 
+        { "test\ud834\uddc0",             "test\ud834\uddc0",             "m", true },
+        { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0",             "m", true },
+        };
+
+        int failCount = 0;
+        for (Object[] d : data) {
+            String pn = (String)d[0];
+            String tt = (String)d[1];
+            boolean isFind = "f".equals(((String)d[2]));
+            boolean expected = (boolean)d[3];
+            boolean ret = isFind ? Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).find()
+                                 : Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).matches();
+            if (ret != expected) {
+                failCount++;
+                continue;
+            }
+        }
         report("Canonical Equivalence");
     }
 
     /**
      * A basic sanity test of Matcher.replaceAll().

@@ -3844,11 +3897,10 @@
 
     private static Pattern compileTestPattern(String patternString) {
         if (!patternString.startsWith("'")) {
             return Pattern.compile(patternString);
         }
-
         int break1 = patternString.lastIndexOf("'");
         String flagString = patternString.substring(
                                           break1+1, patternString.length());
         patternString = patternString.substring(1, break1);
 

@@ -4090,14 +4142,15 @@
                          "gnameX");
         checkExpectedNPE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"));
         report("NamedGroupCapture");
     }
 
-    // This is for bug 6969132
+    // This is for bug 6919132
     private static void nonBmpClassComplementTest() throws Exception {
         Pattern p = Pattern.compile("\\P{Lu}");
         Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
+
         if (m.find() && m.start() == 1)
             failCount++;
 
         // from a unicode category
         p = Pattern.compile("\\P{Lu}");

@@ -4111,10 +4164,15 @@
         p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}");
         m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
         if (m.find() && m.start() == 1)
             failCount++;
 
+        p = Pattern.compile("\\P{sc=GRANTHA}");
+        m = p.matcher(new String(new int[] {0x11350}, 0, 1));
+        if (m.find() && m.start() == 1)
+            failCount++;
+
         report("NonBmpClassComplement");
     }
 
     private static void unicodePropertiesTest() throws Exception {
         // different forms