--- old/src/java.base/share/classes/java/util/regex/Pattern.java 2019-05-22 15:33:10.000000000 -0700 +++ new/src/java.base/share/classes/java/util/regex/Pattern.java 2019-05-22 15:33:10.000000000 -0700 @@ -540,7 +540,7 @@ *
This class is in conformance with Level 1 of Unicode Technical * Standard #18: Unicode Regular Expression, plus RL2.1 - * Canonical Equivalents. + * Canonical Equivalents and RL2.2 Extended Grapheme Clusters. *
* Unicode escape sequences such as \u2014
in Java source code
* are processed as described in section 3.3 of
@@ -1501,15 +1501,8 @@
off++;
continue;
}
- int j = off + Character.charCount(ch0);
+ int j = Grapheme.nextBoundary(src, off, limit);
int ch1;
- while (j < limit) {
- ch1 = src.codePointAt(j);
- if (Grapheme.isBoundary(ch0, ch1))
- break;
- ch0 = ch1;
- j += Character.charCount(ch1);
- }
String seq = src.substring(off, j);
String nfd = Normalizer.normalize(seq, Normalizer.Form.NFD);
off = j;
@@ -3975,14 +3968,7 @@
if (i < matcher.to) {
int ch0 = Character.codePointAt(seq, i);
int n = Character.charCount(ch0);
- int j = i + n;
- while (j < matcher.to) {
- int ch1 = Character.codePointAt(seq, j);
- if (Grapheme.isBoundary(ch0, ch1))
- break;
- ch0 = ch1;
- j += Character.charCount(ch1);
- }
+ int j = Grapheme.nextBoundary(seq, i, matcher.to);
if (i + n == j) { // single, assume nfc cp
if (predicate.is(ch0))
return next.match(matcher, j, seq);
@@ -4021,15 +4007,7 @@
static class XGrapheme extends Node {
boolean match(Matcher matcher, int i, CharSequence seq) {
if (i < matcher.to) {
- int ch0 = Character.codePointAt(seq, i);
- i += Character.charCount(ch0);
- while (i < matcher.to) {
- int ch1 = Character.codePointAt(seq, i);
- if (Grapheme.isBoundary(ch0, ch1))
- break;
- ch0 = ch1;
- i += Character.charCount(ch1);
- }
+ i = Grapheme.nextBoundary(seq, i, matcher.to);
return next.match(matcher, i, seq);
}
matcher.hitEnd = true;
@@ -4059,8 +4037,9 @@
}
if (i < endIndex) {
if (Character.isSurrogatePair(seq.charAt(i-1), seq.charAt(i)) ||
- !Grapheme.isBoundary(Character.codePointBefore(seq, i),
- Character.codePointAt(seq, i))) {
+ Grapheme.nextBoundary(seq,
+ i - Character.charCount(Character.codePointBefore(seq, i)),
+ i + Character.charCount(Character.codePointAt(seq, i))) > i) {
return false;
}
} else {