--- old/src/share/classes/java/util/regex/Pattern.java 2013-04-30 09:42:14.000000000 -0700 +++ new/src/share/classes/java/util/regex/Pattern.java 2013-04-30 09:42:13.000000000 -0700 @@ -612,6 +612,7 @@ *
  • White_Space *
  • Digit *
  • Hex_Digit + *
  • Join_Control *
  • Noncharacter_Code_Point *
  • Assigned * @@ -662,7 +663,7 @@ * \S * A non-whitespace character: [^\s] * \w - * A word character: [\p{Alpha}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{Digit}\p{gc=Pc}] + * A word character: [\p{Alpha}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{Digit}\p{gc=Pc}\p{IsJoin_Control}] * \W * A non-word character: [^\w] * --- old/src/share/classes/java/util/regex/UnicodeProp.java 2013-04-30 09:42:14.000000000 -0700 +++ new/src/share/classes/java/util/regex/UnicodeProp.java 2013-04-30 09:42:14.000000000 -0700 @@ -181,6 +181,7 @@ // \p{gc=Mark} // \p{digit} // \p{gc=Connector_Punctuation} + // \p{Join_Control} 200C..200D public boolean is(int ch) { return ALPHABETIC.is(ch) || @@ -189,7 +190,15 @@ (1 << Character.COMBINING_SPACING_MARK) | (1 << Character.DECIMAL_DIGIT_NUMBER) | (1 << Character.CONNECTOR_PUNCTUATION)) >> Character.getType(ch)) & 1) - != 0; + != 0 || + JOIN_CONTROL.is(ch); + } + }, + + JOIN_CONTROL { + // 200C..200D PropList.txt:Join_Control + public boolean is(int ch) { + return (ch == 0x200C || ch == 0x200D); } }; @@ -212,6 +221,7 @@ aliases.put("WHITESPACE", "WHITE_SPACE"); aliases.put("HEXDIGIT","HEX_DIGIT"); aliases.put("NONCHARACTERCODEPOINT", "NONCHARACTER_CODE_POINT"); + aliases.put("JOINCONTROL", "JOIN_CONTROL"); } public static UnicodeProp forName(String propName) { --- old/test/java/util/regex/RegExTest.java 2013-04-30 09:42:15.000000000 -0700 +++ new/test/java/util/regex/RegExTest.java 2013-04-30 09:42:15.000000000 -0700 @@ -33,7 +33,7 @@ * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133 * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066 - * 7067045 7014640 7189363 8007395 + * 7067045 7014640 7189363 8007395 8013252 8013254 */ import java.util.regex.*; @@ -3390,7 +3390,9 @@ private static void check(Pattern p, String s, String g, String expected) { Matcher m = p.matcher(s); m.find(); - if (!m.group(g).equals(expected)) + if (!m.group(g).equals(expected) || + s.charAt(m.start(g)) != expected.charAt(0) || + s.charAt(m.end(g) - 1) != expected.charAt(expected.length() - 1)) failCount++; } @@ -3420,19 +3422,42 @@ failCount++; } - private static void checkExpectedFail(Matcher m, String g) { + private static void checkExpectedIAE(Matcher m, String g) { m.find(); try { m.group(g); - } catch (IllegalArgumentException iae) { + } catch (IllegalArgumentException x) { //iae.printStackTrace(); - return; - } catch (NullPointerException npe) { - return; + try { + m.start(g); + } catch (IllegalArgumentException xx) { + try { + m.start(g); + } catch (IllegalArgumentException xxx) { + return; + } + } } failCount++; } + private static void checkExpectedNPE(Matcher m) { + m.find(); + try { + m.group(null); + } catch (NullPointerException x) { + try { + m.start(null); + } catch (NullPointerException xx) { + try { + m.end(null); + } catch (NullPointerException xxx) { + return; + } + } + } + failCount++; + } private static void namedGroupCaptureTest() throws Exception { check(Pattern.compile("x+(?y+)z+"), @@ -3559,10 +3584,9 @@ checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)"); checkExpectedFail("(?abc)(def)\\k"); checkExpectedFail("(?abc)(?def)\\k"); - checkExpectedFail(Pattern.compile("(?abc)(def)").matcher("abcdef"), - "gnameX"); - checkExpectedFail(Pattern.compile("(?abc)(def)").matcher("abcdef"), - null); + checkExpectedIAE(Pattern.compile("(?abc)(def)").matcher("abcdef"), + "gnameX"); + checkExpectedNPE(Pattern.compile("(?abc)(def)").matcher("abcdef")); report("NamedGroupCapture"); } @@ -3759,6 +3783,7 @@ Matcher spaceP = Pattern.compile("\\p{IsWhiteSpace}").matcher(""); Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher(""); Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher(""); + Matcher joinCrtl = Pattern.compile("\\p{IsJoinControl}").matcher(""); // javaMethod Matcher lowerJ = Pattern.compile("\\p{javaLowerCase}").matcher(""); @@ -3829,7 +3854,8 @@ Character.isIdeographic(cp) != ideogP.reset(str).matches() || Character.isIdeographic(cp) != ideogJ.reset(str).matches() || (Character.UNASSIGNED == type) == definedP.reset(str).matches() || - POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches()) + POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches() || + POSIX_Unicode.isJoinControl(cp) != joinCrtl.reset(str).matches()) failCount++; } --- old/test/java/util/regex/POSIX_Unicode.java 2013-04-30 09:42:16.000000000 -0700 +++ new/test/java/util/regex/POSIX_Unicode.java 2013-04-30 09:42:16.000000000 -0700 @@ -125,6 +125,10 @@ return (ch & 0xfffe) == 0xfffe || (ch >= 0xfdd0 && ch <= 0xfdef); } + public static boolean isJoinControl(int ch) { + return (ch == 0x200C || ch == 0x200D); + } + // \p{alpha} // \p{gc=Mark} // \p{digit} @@ -136,6 +140,7 @@ (1 << Character.COMBINING_SPACING_MARK) | (1 << Character.CONNECTOR_PUNCTUATION)) >> Character.getType(ch)) & 1) != 0 || - isDigit(ch); + isDigit(ch) || + isJoinControl(ch); } }