< prev index next >

src/java.base/share/classes/java/util/regex/Pattern.java

Print this page
rev 58483 : [mq]: 8237599-Greedy-matching-against-supplementary-chars-does-not-respect-the-region

*** 4338,4355 **** CharPropertyGreedy(CharProperty cp, int cmin) { this.predicate = cp.predicate; this.cmin = cmin; } boolean match(Matcher matcher, int i, CharSequence seq) { int n = 0; int to = matcher.to; // greedy, all the way down while (i < to) { int ch = Character.codePointAt(seq, i); if (!predicate.is(ch)) break; ! i += Character.charCount(ch); n++; } if (i >= to) { matcher.hitEnd = true; } --- 4338,4363 ---- CharPropertyGreedy(CharProperty cp, int cmin) { this.predicate = cp.predicate; this.cmin = cmin; } boolean match(Matcher matcher, int i, CharSequence seq) { + int starti = i; int n = 0; int to = matcher.to; // greedy, all the way down while (i < to) { int ch = Character.codePointAt(seq, i); + int len = Character.charCount(ch); + if (i + len > to) { + // the region cut off the high half of a surrogate pair + matcher.hitEnd = true; + ch = seq.charAt(i); + len = 1; + } if (!predicate.is(ch)) break; ! i += len; n++; } if (i >= to) { matcher.hitEnd = true; }
*** 4358,4368 **** return true; if (n == cmin) return false; // backing off if match fails int ch = Character.codePointBefore(seq, i); ! i -= Character.charCount(ch); n--; } return false; } --- 4366,4377 ---- return true; if (n == cmin) return false; // backing off if match fails int ch = Character.codePointBefore(seq, i); ! // check if the region cut off the low half of a surrogate pair ! i = Math.max(starti, i - Character.charCount(ch)); n--; } return false; }
< prev index next >