--- old/src/java.base/share/classes/java/util/regex/Pattern.java 2019-05-22 15:33:10.000000000 -0700 +++ new/src/java.base/share/classes/java/util/regex/Pattern.java 2019-05-22 15:33:10.000000000 -0700 @@ -540,7 +540,7 @@ *

This class is in conformance with Level 1 of Unicode Technical * Standard #18: Unicode Regular Expression, plus RL2.1 - * Canonical Equivalents. + * Canonical Equivalents and RL2.2 Extended Grapheme Clusters. *

* Unicode escape sequences such as \u2014 in Java source code * are processed as described in section 3.3 of @@ -1501,15 +1501,8 @@ off++; continue; } - int j = off + Character.charCount(ch0); + int j = Grapheme.nextBoundary(src, off, limit); int ch1; - while (j < limit) { - ch1 = src.codePointAt(j); - if (Grapheme.isBoundary(ch0, ch1)) - break; - ch0 = ch1; - j += Character.charCount(ch1); - } String seq = src.substring(off, j); String nfd = Normalizer.normalize(seq, Normalizer.Form.NFD); off = j; @@ -3975,14 +3968,7 @@ if (i < matcher.to) { int ch0 = Character.codePointAt(seq, i); int n = Character.charCount(ch0); - int j = i + n; - while (j < matcher.to) { - int ch1 = Character.codePointAt(seq, j); - if (Grapheme.isBoundary(ch0, ch1)) - break; - ch0 = ch1; - j += Character.charCount(ch1); - } + int j = Grapheme.nextBoundary(seq, i, matcher.to); if (i + n == j) { // single, assume nfc cp if (predicate.is(ch0)) return next.match(matcher, j, seq); @@ -4021,15 +4007,7 @@ static class XGrapheme extends Node { boolean match(Matcher matcher, int i, CharSequence seq) { if (i < matcher.to) { - int ch0 = Character.codePointAt(seq, i); - i += Character.charCount(ch0); - while (i < matcher.to) { - int ch1 = Character.codePointAt(seq, i); - if (Grapheme.isBoundary(ch0, ch1)) - break; - ch0 = ch1; - i += Character.charCount(ch1); - } + i = Grapheme.nextBoundary(seq, i, matcher.to); return next.match(matcher, i, seq); } matcher.hitEnd = true; @@ -4059,8 +4037,9 @@ } if (i < endIndex) { if (Character.isSurrogatePair(seq.charAt(i-1), seq.charAt(i)) || - !Grapheme.isBoundary(Character.codePointBefore(seq, i), - Character.codePointAt(seq, i))) { + Grapheme.nextBoundary(seq, + i - Character.charCount(Character.codePointBefore(seq, i)), + i + Character.charCount(Character.codePointAt(seq, i))) > i) { return false; } } else {