--- old/src/share/classes/java/util/regex/Pattern.java 2013-04-30 09:42:14.000000000 -0700
+++ new/src/share/classes/java/util/regex/Pattern.java 2013-04-30 09:42:13.000000000 -0700
@@ -612,6 +612,7 @@
*
White_Space
* Digit
* Hex_Digit
+ * Join_Control
* Noncharacter_Code_Point
* Assigned
*
@@ -662,7 +663,7 @@
* \S |
* A non-whitespace character: [^\s] |
* \w |
- * A word character: [\p{Alpha}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{Digit}\p{gc=Pc}] |
+ * A word character: [\p{Alpha}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{Digit}\p{gc=Pc}\p{IsJoin_Control}] |
* \W |
* A non-word character: [^\w] |
*
--- old/src/share/classes/java/util/regex/UnicodeProp.java 2013-04-30 09:42:14.000000000 -0700
+++ new/src/share/classes/java/util/regex/UnicodeProp.java 2013-04-30 09:42:14.000000000 -0700
@@ -181,6 +181,7 @@
// \p{gc=Mark}
// \p{digit}
// \p{gc=Connector_Punctuation}
+ // \p{Join_Control} 200C..200D
public boolean is(int ch) {
return ALPHABETIC.is(ch) ||
@@ -189,7 +190,15 @@
(1 << Character.COMBINING_SPACING_MARK) |
(1 << Character.DECIMAL_DIGIT_NUMBER) |
(1 << Character.CONNECTOR_PUNCTUATION)) >> Character.getType(ch)) & 1)
- != 0;
+ != 0 ||
+ JOIN_CONTROL.is(ch);
+ }
+ },
+
+ JOIN_CONTROL {
+ // 200C..200D PropList.txt:Join_Control
+ public boolean is(int ch) {
+ return (ch == 0x200C || ch == 0x200D);
}
};
@@ -212,6 +221,7 @@
aliases.put("WHITESPACE", "WHITE_SPACE");
aliases.put("HEXDIGIT","HEX_DIGIT");
aliases.put("NONCHARACTERCODEPOINT", "NONCHARACTER_CODE_POINT");
+ aliases.put("JOINCONTROL", "JOIN_CONTROL");
}
public static UnicodeProp forName(String propName) {
--- old/test/java/util/regex/RegExTest.java 2013-04-30 09:42:15.000000000 -0700
+++ new/test/java/util/regex/RegExTest.java 2013-04-30 09:42:15.000000000 -0700
@@ -33,7 +33,7 @@
* 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
* 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
* 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066
- * 7067045 7014640 7189363 8007395
+ * 7067045 7014640 7189363 8007395 8013252 8013254
*/
import java.util.regex.*;
@@ -3390,7 +3390,9 @@
private static void check(Pattern p, String s, String g, String expected) {
Matcher m = p.matcher(s);
m.find();
- if (!m.group(g).equals(expected))
+ if (!m.group(g).equals(expected) ||
+ s.charAt(m.start(g)) != expected.charAt(0) ||
+ s.charAt(m.end(g) - 1) != expected.charAt(expected.length() - 1))
failCount++;
}
@@ -3420,19 +3422,42 @@
failCount++;
}
- private static void checkExpectedFail(Matcher m, String g) {
+ private static void checkExpectedIAE(Matcher m, String g) {
m.find();
try {
m.group(g);
- } catch (IllegalArgumentException iae) {
+ } catch (IllegalArgumentException x) {
//iae.printStackTrace();
- return;
- } catch (NullPointerException npe) {
- return;
+ try {
+ m.start(g);
+ } catch (IllegalArgumentException xx) {
+ try {
+ m.start(g);
+ } catch (IllegalArgumentException xxx) {
+ return;
+ }
+ }
}
failCount++;
}
+ private static void checkExpectedNPE(Matcher m) {
+ m.find();
+ try {
+ m.group(null);
+ } catch (NullPointerException x) {
+ try {
+ m.start(null);
+ } catch (NullPointerException xx) {
+ try {
+ m.end(null);
+ } catch (NullPointerException xxx) {
+ return;
+ }
+ }
+ }
+ failCount++;
+ }
private static void namedGroupCaptureTest() throws Exception {
check(Pattern.compile("x+(?y+)z+"),
@@ -3559,10 +3584,9 @@
checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)");
checkExpectedFail("(?abc)(def)\\k");
checkExpectedFail("(?abc)(?def)\\k");
- checkExpectedFail(Pattern.compile("(?abc)(def)").matcher("abcdef"),
- "gnameX");
- checkExpectedFail(Pattern.compile("(?abc)(def)").matcher("abcdef"),
- null);
+ checkExpectedIAE(Pattern.compile("(?abc)(def)").matcher("abcdef"),
+ "gnameX");
+ checkExpectedNPE(Pattern.compile("(?abc)(def)").matcher("abcdef"));
report("NamedGroupCapture");
}
@@ -3759,6 +3783,7 @@
Matcher spaceP = Pattern.compile("\\p{IsWhiteSpace}").matcher("");
Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher("");
Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher("");
+ Matcher joinCrtl = Pattern.compile("\\p{IsJoinControl}").matcher("");
// javaMethod
Matcher lowerJ = Pattern.compile("\\p{javaLowerCase}").matcher("");
@@ -3829,7 +3854,8 @@
Character.isIdeographic(cp) != ideogP.reset(str).matches() ||
Character.isIdeographic(cp) != ideogJ.reset(str).matches() ||
(Character.UNASSIGNED == type) == definedP.reset(str).matches() ||
- POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches())
+ POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches() ||
+ POSIX_Unicode.isJoinControl(cp) != joinCrtl.reset(str).matches())
failCount++;
}
--- old/test/java/util/regex/POSIX_Unicode.java 2013-04-30 09:42:16.000000000 -0700
+++ new/test/java/util/regex/POSIX_Unicode.java 2013-04-30 09:42:16.000000000 -0700
@@ -125,6 +125,10 @@
return (ch & 0xfffe) == 0xfffe || (ch >= 0xfdd0 && ch <= 0xfdef);
}
+ public static boolean isJoinControl(int ch) {
+ return (ch == 0x200C || ch == 0x200D);
+ }
+
// \p{alpha}
// \p{gc=Mark}
// \p{digit}
@@ -136,6 +140,7 @@
(1 << Character.COMBINING_SPACING_MARK) |
(1 << Character.CONNECTOR_PUNCTUATION)) >> Character.getType(ch)) & 1)
!= 0 ||
- isDigit(ch);
+ isDigit(ch) ||
+ isJoinControl(ch);
}
}