--- old/test/java/util/regex/RegExTest.java 2016-03-17 22:10:17.021245167 -0700 +++ new/test/java/util/regex/RegExTest.java 2016-03-17 22:10:16.764246251 -0700 @@ -33,6 +33,7 @@ * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066 * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590 * 8027645 8035076 8039124 8035975 8074678 6854417 8143854 8147531 7071819 + * 8151481 4867170 7080302 6728861 6995635 6736245 4916384 * @library /lib/testlibrary * @build jdk.testlibrary.* * @run main RegExTest @@ -2659,51 +2660,103 @@ check(p, "test\u00e4\u0323\u0300", true); check(p, "test\u00e4\u0300\u0323", true); - /* - * The following canonical equivalence tests don't work. Bug id: 4916384. - * - // Decomposed hangul (jamos) - p = Pattern.compile("\u1100\u1161", Pattern.CANON_EQ); - m = p.matcher("\u1100\u1161"); - if (!m.matches()) - failCount++; + Object[][] data = new Object[][] { - m.reset("\uac00"); - if (!m.matches()) - failCount++; + // JDK-4867170 + { "[\u1f80-\u1f82]", "ab\u1f80cd", "f", true }, + { "[\u1f80-\u1f82]", "ab\u1f81cd", "f", true }, + { "[\u1f80-\u1f82]", "ab\u1f82cd", "f", true }, + { "[\u1f80-\u1f82]", "ab\u03b1\u0314\u0345cd", "f", true }, + { "[\u1f80-\u1f82]", "ab\u03b1\u0345\u0314cd", "f", true }, + { "[\u1f80-\u1f82]", "ab\u1f01\u0345cd", "f", true }, + { "[\u1f80-\u1f82]", "ab\u1f00\u0345cd", "f", true }, + + { "\\p{IsGreek}", "ab\u1f80cd", "f", true }, + { "\\p{IsGreek}", "ab\u1f81cd", "f", true }, + { "\\p{IsGreek}", "ab\u1f82cd", "f", true }, + { "\\p{IsGreek}", "ab\u03b1\u0314\u0345cd", "f", true }, + { "\\p{IsGreek}", "ab\u1f01\u0345cd", "f", true }, + + // backtracking, force to match "\u1f80", instead of \u1f82" + { "ab\\p{IsGreek}\u0300cd", "ab\u03b1\u0313\u0345\u0300cd", "m", true }, + + { "[\\p{IsGreek}]", "\u03b1\u0314\u0345", "m", true }, + { "\\p{IsGreek}", "\u03b1\u0314\u0345", "m", true }, + + { "[^\u1f80-\u1f82]","\u1f81", "m", false }, + { "[^\u1f80-\u1f82]","\u03b1\u0314\u0345", "m", false }, + { "[^\u1f01\u0345]", "\u1f81", "f", false }, + + { "[^\u1f81]+", "\u1f80\u1f82", "f", true }, + { "[\u1f80]", "ab\u1f80cd", "f", true }, + { "\u1f80", "ab\u1f80cd", "f", true }, + { "\u1f00\u0345\u0300", "\u1f82", "m", true }, + { "\u1f80", "-\u1f00\u0345\u0300-", "f", true }, + { "\u1f82", "\u1f00\u0345\u0300", "m", true }, + { "\u1f82", "\u1f80\u0300", "m", true }, + + // JDK-7080302 # compile failed + { "a(\u0041\u0301\u0328)", "a\u0041\u0301\u0328", "m", true}, + + // JDK-6728861, same cause as above one + // Pattern pt = Pattern.compile("één", Pattern.CANON_EQ); + { "\u00e9\u00e9n", "e\u0301e\u0301n", "m", true}, + + // JDK-6995635 + // Pattern patternThatIsGonnaBug=Pattern.compile("(ë)",Pattern.CANON_EQ); + { "(\u00e9)", "e\u0301", "m", true }, + + // JDK-6736245 + // intereting special case, nfc(u2add+u0338) -> u2add+u0338) NOT u2adc + { "\u2ADC", "\u2ADC", "m", true}, // NFC + { "\u2ADC", "\u2ADD\u0338", "m", true}, // NFD + + // 4916384. + // Decomposed hangul (jamos) works inside clazz + { "[\u1100\u1161]", "\u1100\u1161", "m", true}, + { "[\u1100\u1161]", "\uac00", "m", true}, - // Composed hangul - p = Pattern.compile("\uac00", Pattern.CANON_EQ); - m = p.matcher("\u1100\u1161"); - if (!m.matches()) - failCount++; + { "[\uac00]", "\u1100\u1161", "m", true}, + { "[\uac00]", "\uac00", "m", true}, - m.reset("\uac00"); - if (!m.matches()) - failCount++; + // Decomposed hangul (jamos) + { "\u1100\u1161", "\u1100\u1161", "m", true}, + { "\u1100\u1161", "\uac00", "m", true}, - // Decomposed supplementary outside char classes - p = Pattern.compile("test\ud834\uddbc\ud834\udd6f", Pattern.CANON_EQ); - m = p.matcher("test\ud834\uddc0"); - if (!m.matches()) - failCount++; + // Composed hangul + { "\uac00", "\u1100\u1161", "m", true }, + { "\uac00", "\uac00", "m", true }, - m.reset("test\ud834\uddbc\ud834\udd6f"); - if (!m.matches()) - failCount++; + /* Need a NFDSlice to nfd the source to solve this issue + u+1d1c0 -> nfd: -> nfc: + u+1d1bc -> nfd: -> nfc: + -> nfd: -> nfc: + // Decomposed supplementary outside char classes + // { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0", "m", true }, // Composed supplementary outside char classes - p = Pattern.compile("test\ud834\uddc0", Pattern.CANON_EQ); - m.reset("test\ud834\uddbc\ud834\udd6f"); - if (!m.matches()) - failCount++; - - m = p.matcher("test\ud834\uddc0"); - if (!m.matches()) - failCount++; + // { "test\ud834\uddc0", "test\ud834\uddbc\ud834\udd6f", "m", true }, + */ + { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddbc\ud834\udd6f", "m", true }, + { "test\ud834\uddc0", "test\ud834\uddbc\ud834\udd6f", "m", true }, - */ + { "test\ud834\uddc0", "test\ud834\uddc0", "m", true }, + { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0", "m", true }, + }; + int failCount = 0; + for (Object[] d : data) { + String pn = (String)d[0]; + String tt = (String)d[1]; + boolean isFind = "f".equals(((String)d[2])); + boolean expected = (boolean)d[3]; + boolean ret = isFind ? Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).find() + : Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).matches(); + if (ret != expected) { + failCount++; + continue; + } + } report("Canonical Equivalence"); } @@ -3846,7 +3899,6 @@ if (!patternString.startsWith("'")) { return Pattern.compile(patternString); } - int break1 = patternString.lastIndexOf("'"); String flagString = patternString.substring( break1+1, patternString.length()); @@ -4092,10 +4144,11 @@ report("NamedGroupCapture"); } - // This is for bug 6969132 + // This is for bug 6919132 private static void nonBmpClassComplementTest() throws Exception { Pattern p = Pattern.compile("\\P{Lu}"); Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); + if (m.find() && m.start() == 1) failCount++; @@ -4113,6 +4166,11 @@ if (m.find() && m.start() == 1) failCount++; + p = Pattern.compile("\\P{sc=GRANTHA}"); + m = p.matcher(new String(new int[] {0x11350}, 0, 1)); + if (m.find() && m.start() == 1) + failCount++; + report("NonBmpClassComplement"); }