test/java/util/regex/RegExTest.java

Print this page

        

*** 31,40 **** --- 31,41 ---- * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133 * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066 * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590 * 8027645 8035076 8039124 8035975 8074678 6854417 8143854 8147531 7071819 + * 8151481 4867170 7080302 6728861 6995635 6736245 4916384 * @library /lib/testlibrary * @build jdk.testlibrary.* * @run main RegExTest * @key randomness */
*** 2657,2711 **** check(p, "testa\u0323\u0308\u0300", true); check(p, "testa\u0308\u0300\u0323", true); check(p, "test\u00e4\u0323\u0300", true); check(p, "test\u00e4\u0300\u0323", true); ! /* ! * The following canonical equivalence tests don't work. Bug id: 4916384. ! * ! // Decomposed hangul (jamos) ! p = Pattern.compile("\u1100\u1161", Pattern.CANON_EQ); ! m = p.matcher("\u1100\u1161"); ! if (!m.matches()) ! failCount++; ! m.reset("\uac00"); ! if (!m.matches()) ! failCount++; ! // Composed hangul ! p = Pattern.compile("\uac00", Pattern.CANON_EQ); ! m = p.matcher("\u1100\u1161"); ! if (!m.matches()) ! failCount++; ! m.reset("\uac00"); ! if (!m.matches()) ! failCount++; ! // Decomposed supplementary outside char classes ! p = Pattern.compile("test\ud834\uddbc\ud834\udd6f", Pattern.CANON_EQ); ! m = p.matcher("test\ud834\uddc0"); ! if (!m.matches()) ! failCount++; ! m.reset("test\ud834\uddbc\ud834\udd6f"); ! if (!m.matches()) ! failCount++; // Composed supplementary outside char classes ! p = Pattern.compile("test\ud834\uddc0", Pattern.CANON_EQ); ! m.reset("test\ud834\uddbc\ud834\udd6f"); ! if (!m.matches()) ! failCount++; ! ! m = p.matcher("test\ud834\uddc0"); ! if (!m.matches()) ! failCount++; ! */ report("Canonical Equivalence"); } /** * A basic sanity test of Matcher.replaceAll(). --- 2658,2764 ---- check(p, "testa\u0323\u0308\u0300", true); check(p, "testa\u0308\u0300\u0323", true); check(p, "test\u00e4\u0323\u0300", true); check(p, "test\u00e4\u0300\u0323", true); ! Object[][] data = new Object[][] { ! // JDK-4867170 ! { "[\u1f80-\u1f82]", "ab\u1f80cd", "f", true }, ! { "[\u1f80-\u1f82]", "ab\u1f81cd", "f", true }, ! { "[\u1f80-\u1f82]", "ab\u1f82cd", "f", true }, ! { "[\u1f80-\u1f82]", "ab\u03b1\u0314\u0345cd", "f", true }, ! { "[\u1f80-\u1f82]", "ab\u03b1\u0345\u0314cd", "f", true }, ! { "[\u1f80-\u1f82]", "ab\u1f01\u0345cd", "f", true }, ! { "[\u1f80-\u1f82]", "ab\u1f00\u0345cd", "f", true }, ! ! { "\\p{IsGreek}", "ab\u1f80cd", "f", true }, ! { "\\p{IsGreek}", "ab\u1f81cd", "f", true }, ! { "\\p{IsGreek}", "ab\u1f82cd", "f", true }, ! { "\\p{IsGreek}", "ab\u03b1\u0314\u0345cd", "f", true }, ! { "\\p{IsGreek}", "ab\u1f01\u0345cd", "f", true }, ! ! // backtracking, force to match "\u1f80", instead of \u1f82" ! { "ab\\p{IsGreek}\u0300cd", "ab\u03b1\u0313\u0345\u0300cd", "m", true }, ! ! { "[\\p{IsGreek}]", "\u03b1\u0314\u0345", "m", true }, ! { "\\p{IsGreek}", "\u03b1\u0314\u0345", "m", true }, ! ! { "[^\u1f80-\u1f82]","\u1f81", "m", false }, ! { "[^\u1f80-\u1f82]","\u03b1\u0314\u0345", "m", false }, ! { "[^\u1f01\u0345]", "\u1f81", "f", false }, ! ! { "[^\u1f81]+", "\u1f80\u1f82", "f", true }, ! { "[\u1f80]", "ab\u1f80cd", "f", true }, ! { "\u1f80", "ab\u1f80cd", "f", true }, ! { "\u1f00\u0345\u0300", "\u1f82", "m", true }, ! { "\u1f80", "-\u1f00\u0345\u0300-", "f", true }, ! { "\u1f82", "\u1f00\u0345\u0300", "m", true }, ! { "\u1f82", "\u1f80\u0300", "m", true }, ! ! // JDK-7080302 # compile failed ! { "a(\u0041\u0301\u0328)", "a\u0041\u0301\u0328", "m", true}, ! ! // JDK-6728861, same cause as above one ! // Pattern pt = Pattern.compile("één", Pattern.CANON_EQ); ! { "\u00e9\u00e9n", "e\u0301e\u0301n", "m", true}, ! ! // JDK-6995635 ! // Pattern patternThatIsGonnaBug=Pattern.compile("(ë)",Pattern.CANON_EQ); ! { "(\u00e9)", "e\u0301", "m", true }, ! ! // JDK-6736245 ! // intereting special case, nfc(u2add+u0338) -> u2add+u0338) NOT u2adc ! { "\u2ADC", "\u2ADC", "m", true}, // NFC ! { "\u2ADC", "\u2ADD\u0338", "m", true}, // NFD ! ! // 4916384. ! // Decomposed hangul (jamos) works inside clazz ! { "[\u1100\u1161]", "\u1100\u1161", "m", true}, ! { "[\u1100\u1161]", "\uac00", "m", true}, ! { "[\uac00]", "\u1100\u1161", "m", true}, ! { "[\uac00]", "\uac00", "m", true}, ! // Decomposed hangul (jamos) ! { "\u1100\u1161", "\u1100\u1161", "m", true}, ! { "\u1100\u1161", "\uac00", "m", true}, ! // Composed hangul ! { "\uac00", "\u1100\u1161", "m", true }, ! { "\uac00", "\uac00", "m", true }, ! /* Need a NFDSlice to nfd the source to solve this issue ! u+1d1c0 -> nfd: <u+1d1ba><u+1d165><u+1d16f> -> nfc: <u+1d1ba><u+1d165><u+1d16f> ! u+1d1bc -> nfd: <u+1d1ba><u+1d165> -> nfc: <u+1d1ba><u+1d165> ! <u+1d1bc><u+1d16f> -> nfd: <u+1d1ba><u+1d165><u+1d16f> -> nfc: <u+1d1ba><u+1d165><u+1d16f> + // Decomposed supplementary outside char classes + // { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0", "m", true }, // Composed supplementary outside char classes ! // { "test\ud834\uddc0", "test\ud834\uddbc\ud834\udd6f", "m", true }, */ + { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddbc\ud834\udd6f", "m", true }, + { "test\ud834\uddc0", "test\ud834\uddbc\ud834\udd6f", "m", true }, + { "test\ud834\uddc0", "test\ud834\uddc0", "m", true }, + { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0", "m", true }, + }; + + int failCount = 0; + for (Object[] d : data) { + String pn = (String)d[0]; + String tt = (String)d[1]; + boolean isFind = "f".equals(((String)d[2])); + boolean expected = (boolean)d[3]; + boolean ret = isFind ? Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).find() + : Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).matches(); + if (ret != expected) { + failCount++; + continue; + } + } report("Canonical Equivalence"); } /** * A basic sanity test of Matcher.replaceAll().
*** 3844,3854 **** private static Pattern compileTestPattern(String patternString) { if (!patternString.startsWith("'")) { return Pattern.compile(patternString); } - int break1 = patternString.lastIndexOf("'"); String flagString = patternString.substring( break1+1, patternString.length()); patternString = patternString.substring(1, break1); --- 3897,3906 ----
*** 4090,4103 **** "gnameX"); checkExpectedNPE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef")); report("NamedGroupCapture"); } ! // This is for bug 6969132 private static void nonBmpClassComplementTest() throws Exception { Pattern p = Pattern.compile("\\P{Lu}"); Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); if (m.find() && m.start() == 1) failCount++; // from a unicode category p = Pattern.compile("\\P{Lu}"); --- 4142,4156 ---- "gnameX"); checkExpectedNPE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef")); report("NamedGroupCapture"); } ! // This is for bug 6919132 private static void nonBmpClassComplementTest() throws Exception { Pattern p = Pattern.compile("\\P{Lu}"); Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); + if (m.find() && m.start() == 1) failCount++; // from a unicode category p = Pattern.compile("\\P{Lu}");
*** 4111,4120 **** --- 4164,4178 ---- p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}"); m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); if (m.find() && m.start() == 1) failCount++; + p = Pattern.compile("\\P{sc=GRANTHA}"); + m = p.matcher(new String(new int[] {0x11350}, 0, 1)); + if (m.find() && m.start() == 1) + failCount++; + report("NonBmpClassComplement"); } private static void unicodePropertiesTest() throws Exception { // different forms