1 /* 2 * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.util.regex; 27 28 import java.util.ConcurrentModificationException; 29 import java.util.Iterator; 30 import java.util.NoSuchElementException; 31 import java.util.Objects; 32 import java.util.Spliterator; 33 import java.util.Spliterators; 34 import java.util.function.Consumer; 35 import java.util.function.Function; 36 import java.util.stream.Stream; 37 import java.util.stream.StreamSupport; 38 39 /** 40 * An engine that performs match operations on a {@linkplain java.lang.CharSequence 41 * character sequence} by interpreting a {@link Pattern}. 42 * 43 * <p> A matcher is created from a pattern by invoking the pattern's {@link 44 * Pattern#matcher matcher} method. Once created, a matcher can be used to 45 * perform three different kinds of match operations: 46 * 47 * <ul> 48 * 49 * <li><p> The {@link #matches matches} method attempts to match the entire 50 * input sequence against the pattern. </p></li> 51 * 52 * <li><p> The {@link #lookingAt lookingAt} method attempts to match the 53 * input sequence, starting at the beginning, against the pattern. </p></li> 54 * 55 * <li><p> The {@link #find find} method scans the input sequence looking for 56 * the next subsequence that matches the pattern. </p></li> 57 * 58 * </ul> 59 * 60 * <p> Each of these methods returns a boolean indicating success or failure. 61 * More information about a successful match can be obtained by querying the 62 * state of the matcher. 63 * 64 * <p> A matcher finds matches in a subset of its input called the 65 * <i>region</i>. By default, the region contains all of the matcher's input. 66 * The region can be modified via the {@link #region region} method and queried 67 * via the {@link #regionStart regionStart} and {@link #regionEnd regionEnd} 68 * methods. The way that the region boundaries interact with some pattern 69 * constructs can be changed. See {@link #useAnchoringBounds 70 * useAnchoringBounds} and {@link #useTransparentBounds useTransparentBounds} 71 * for more details. 72 * 73 * <p> This class also defines methods for replacing matched subsequences with 74 * new strings whose contents can, if desired, be computed from the match 75 * result. The {@link #appendReplacement appendReplacement} and {@link 76 * #appendTail appendTail} methods can be used in tandem in order to collect 77 * the result into an existing string buffer or string builder. Alternatively, 78 * the more convenient {@link #replaceAll replaceAll} method can be used to 79 * create a string in which every matching subsequence in the input sequence 80 * is replaced. 81 * 82 * <p> The explicit state of a matcher includes the start and end indices of 83 * the most recent successful match. It also includes the start and end 84 * indices of the input subsequence captured by each <a 85 * href="Pattern.html#cg">capturing group</a> in the pattern as well as a total 86 * count of such subsequences. As a convenience, methods are also provided for 87 * returning these captured subsequences in string form. 88 * 89 * <p> The explicit state of a matcher is initially undefined; attempting to 90 * query any part of it before a successful match will cause an {@link 91 * IllegalStateException} to be thrown. The explicit state of a matcher is 92 * recomputed by every match operation. 93 * 94 * <p> The implicit state of a matcher includes the input character sequence as 95 * well as the <i>append position</i>, which is initially zero and is updated 96 * by the {@link #appendReplacement appendReplacement} method. 97 * 98 * <p> A matcher may be reset explicitly by invoking its {@link #reset()} 99 * method or, if a new input sequence is desired, its {@link 100 * #reset(java.lang.CharSequence) reset(CharSequence)} method. Resetting a 101 * matcher discards its explicit state information and sets the append position 102 * to zero. 103 * 104 * <p> Instances of this class are not safe for use by multiple concurrent 105 * threads. </p> 106 * 107 * 108 * @author Mike McCloskey 109 * @author Mark Reinhold 110 * @author JSR-51 Expert Group 111 * @since 1.4 112 * @spec JSR-51 113 */ 114 115 public final class Matcher implements MatchResult { 116 117 /** 118 * The Pattern object that created this Matcher. 119 */ 120 Pattern parentPattern; 121 122 /** 123 * The storage used by groups. They may contain invalid values if 124 * a group was skipped during the matching. 125 */ 126 int[] groups; 127 128 /** 129 * The range within the sequence that is to be matched. Anchors 130 * will match at these "hard" boundaries. Changing the region 131 * changes these values. 132 */ 133 int from, to; 134 135 /** 136 * Lookbehind uses this value to ensure that the subexpression 137 * match ends at the point where the lookbehind was encountered. 138 */ 139 int lookbehindTo; 140 141 /** 142 * The original string being matched. 143 */ 144 CharSequence text; 145 146 /** 147 * Matcher state used by the last node. NOANCHOR is used when a 148 * match does not have to consume all of the input. ENDANCHOR is 149 * the mode used for matching all the input. 150 */ 151 static final int ENDANCHOR = 1; 152 static final int NOANCHOR = 0; 153 int acceptMode = NOANCHOR; 154 155 /** 156 * The range of string that last matched the pattern. If the last 157 * match failed then first is -1; last initially holds 0 then it 158 * holds the index of the end of the last match (which is where the 159 * next search starts). 160 */ 161 int first = -1, last = 0; 162 163 /** 164 * The end index of what matched in the last match operation. 165 */ 166 int oldLast = -1; 167 168 /** 169 * The index of the last position appended in a substitution. 170 */ 171 int lastAppendPosition = 0; 172 173 /** 174 * Storage used by nodes to tell what repetition they are on in 175 * a pattern, and where groups begin. The nodes themselves are stateless, 176 * so they rely on this field to hold state during a match. 177 */ 178 int[] locals; 179 180 /** 181 * Boolean indicating whether or not more input could change 182 * the results of the last match. 183 * 184 * If hitEnd is true, and a match was found, then more input 185 * might cause a different match to be found. 186 * If hitEnd is true and a match was not found, then more 187 * input could cause a match to be found. 188 * If hitEnd is false and a match was found, then more input 189 * will not change the match. 190 * If hitEnd is false and a match was not found, then more 191 * input will not cause a match to be found. 192 */ 193 boolean hitEnd; 194 195 /** 196 * Boolean indicating whether or not more input could change 197 * a positive match into a negative one. 198 * 199 * If requireEnd is true, and a match was found, then more 200 * input could cause the match to be lost. 201 * If requireEnd is false and a match was found, then more 202 * input might change the match but the match won't be lost. 203 * If a match was not found, then requireEnd has no meaning. 204 */ 205 boolean requireEnd; 206 207 /** 208 * If transparentBounds is true then the boundaries of this 209 * matcher's region are transparent to lookahead, lookbehind, 210 * and boundary matching constructs that try to see beyond them. 211 */ 212 boolean transparentBounds = false; 213 214 /** 215 * If anchoringBounds is true then the boundaries of this 216 * matcher's region match anchors such as ^ and $. 217 */ 218 boolean anchoringBounds = true; 219 220 /** 221 * Number of times this matcher's state has been modified 222 */ 223 int modCount; 224 225 /** 226 * No default constructor. 227 */ 228 Matcher() { 229 } 230 231 /** 232 * All matchers have the state used by Pattern during a match. 233 */ 234 Matcher(Pattern parent, CharSequence text) { 235 this.parentPattern = parent; 236 this.text = text; 237 238 // Allocate state storage 239 int parentGroupCount = Math.max(parent.capturingGroupCount, 10); 240 groups = new int[parentGroupCount * 2]; 241 locals = new int[parent.localCount]; 242 243 // Put fields into initial states 244 reset(); 245 } 246 247 /** 248 * Returns the pattern that is interpreted by this matcher. 249 * 250 * @return The pattern for which this matcher was created 251 */ 252 public Pattern pattern() { 253 return parentPattern; 254 } 255 256 /** 257 * Returns the match state of this matcher as a {@link MatchResult}. 258 * The result is unaffected by subsequent operations performed upon this 259 * matcher. 260 * 261 * @return a <code>MatchResult</code> with the state of this matcher 262 * @since 1.5 263 */ 264 public MatchResult toMatchResult() { 265 return toMatchResult(text.toString()); 266 } 267 268 private MatchResult toMatchResult(String text) { 269 return new ImmutableMatchResult(this.first, 270 this.last, 271 groupCount(), 272 this.groups.clone(), 273 text); 274 } 275 276 private static class ImmutableMatchResult implements MatchResult { 277 private final int first; 278 private final int last; 279 private final int[] groups; 280 private final int groupCount; 281 private final String text; 282 283 ImmutableMatchResult(int first, int last, int groupCount, 284 int groups[], String text) 285 { 286 this.first = first; 287 this.last = last; 288 this.groupCount = groupCount; 289 this.groups = groups; 290 this.text = text; 291 } 292 293 @Override 294 public int start() { 295 checkMatch(); 296 return first; 297 } 298 299 @Override 300 public int start(int group) { 301 checkMatch(); 302 if (group < 0 || group > groupCount) 303 throw new IndexOutOfBoundsException("No group " + group); 304 return groups[group * 2]; 305 } 306 307 @Override 308 public int end() { 309 checkMatch(); 310 return last; 311 } 312 313 @Override 314 public int end(int group) { 315 checkMatch(); 316 if (group < 0 || group > groupCount) 317 throw new IndexOutOfBoundsException("No group " + group); 318 return groups[group * 2 + 1]; 319 } 320 321 @Override 322 public int groupCount() { 323 return groupCount; 324 } 325 326 @Override 327 public String group() { 328 checkMatch(); 329 return group(0); 330 } 331 332 @Override 333 public String group(int group) { 334 checkMatch(); 335 if (group < 0 || group > groupCount) 336 throw new IndexOutOfBoundsException("No group " + group); 337 if ((groups[group*2] == -1) || (groups[group*2+1] == -1)) 338 return null; 339 return text.subSequence(groups[group * 2], groups[group * 2 + 1]).toString(); 340 } 341 342 private void checkMatch() { 343 if (first < 0) 344 throw new IllegalStateException("No match found"); 345 346 } 347 } 348 349 /** 350 * Changes the <tt>Pattern</tt> that this <tt>Matcher</tt> uses to 351 * find matches with. 352 * 353 * <p> This method causes this matcher to lose information 354 * about the groups of the last match that occurred. The 355 * matcher's position in the input is maintained and its 356 * last append position is unaffected.</p> 357 * 358 * @param newPattern 359 * The new pattern used by this matcher 360 * @return This matcher 361 * @throws IllegalArgumentException 362 * If newPattern is <tt>null</tt> 363 * @since 1.5 364 */ 365 public Matcher usePattern(Pattern newPattern) { 366 if (newPattern == null) 367 throw new IllegalArgumentException("Pattern cannot be null"); 368 parentPattern = newPattern; 369 370 // Reallocate state storage 371 int parentGroupCount = Math.max(newPattern.capturingGroupCount, 10); 372 groups = new int[parentGroupCount * 2]; 373 locals = new int[newPattern.localCount]; 374 for (int i = 0; i < groups.length; i++) 375 groups[i] = -1; 376 for (int i = 0; i < locals.length; i++) 377 locals[i] = -1; 378 modCount++; 379 return this; 380 } 381 382 /** 383 * Resets this matcher. 384 * 385 * <p> Resetting a matcher discards all of its explicit state information 386 * and sets its append position to zero. The matcher's region is set to the 387 * default region, which is its entire character sequence. The anchoring 388 * and transparency of this matcher's region boundaries are unaffected. 389 * 390 * @return This matcher 391 */ 392 public Matcher reset() { 393 first = -1; 394 last = 0; 395 oldLast = -1; 396 for(int i=0; i<groups.length; i++) 397 groups[i] = -1; 398 for(int i=0; i<locals.length; i++) 399 locals[i] = -1; 400 lastAppendPosition = 0; 401 from = 0; 402 to = getTextLength(); 403 modCount++; 404 return this; 405 } 406 407 /** 408 * Resets this matcher with a new input sequence. 409 * 410 * <p> Resetting a matcher discards all of its explicit state information 411 * and sets its append position to zero. The matcher's region is set to 412 * the default region, which is its entire character sequence. The 413 * anchoring and transparency of this matcher's region boundaries are 414 * unaffected. 415 * 416 * @param input 417 * The new input character sequence 418 * 419 * @return This matcher 420 */ 421 public Matcher reset(CharSequence input) { 422 text = input; 423 return reset(); 424 } 425 426 /** 427 * Returns the start index of the previous match. 428 * 429 * @return The index of the first character matched 430 * 431 * @throws IllegalStateException 432 * If no match has yet been attempted, 433 * or if the previous match operation failed 434 */ 435 public int start() { 436 if (first < 0) 437 throw new IllegalStateException("No match available"); 438 return first; 439 } 440 441 /** 442 * Returns the start index of the subsequence captured by the given group 443 * during the previous match operation. 444 * 445 * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left 446 * to right, starting at one. Group zero denotes the entire pattern, so 447 * the expression <i>m.</i><tt>start(0)</tt> is equivalent to 448 * <i>m.</i><tt>start()</tt>. </p> 449 * 450 * @param group 451 * The index of a capturing group in this matcher's pattern 452 * 453 * @return The index of the first character captured by the group, 454 * or <tt>-1</tt> if the match was successful but the group 455 * itself did not match anything 456 * 457 * @throws IllegalStateException 458 * If no match has yet been attempted, 459 * or if the previous match operation failed 460 * 461 * @throws IndexOutOfBoundsException 462 * If there is no capturing group in the pattern 463 * with the given index 464 */ 465 public int start(int group) { 466 if (first < 0) 467 throw new IllegalStateException("No match available"); 468 if (group < 0 || group > groupCount()) 469 throw new IndexOutOfBoundsException("No group " + group); 470 return groups[group * 2]; 471 } 472 473 /** 474 * Returns the start index of the subsequence captured by the given 475 * <a href="Pattern.html#groupname">named-capturing group</a> during the 476 * previous match operation. 477 * 478 * @param name 479 * The name of a named-capturing group in this matcher's pattern 480 * 481 * @return The index of the first character captured by the group, 482 * or {@code -1} if the match was successful but the group 483 * itself did not match anything 484 * 485 * @throws IllegalStateException 486 * If no match has yet been attempted, 487 * or if the previous match operation failed 488 * 489 * @throws IllegalArgumentException 490 * If there is no capturing group in the pattern 491 * with the given name 492 * @since 1.8 493 */ 494 public int start(String name) { 495 return groups[getMatchedGroupIndex(name) * 2]; 496 } 497 498 /** 499 * Returns the offset after the last character matched. 500 * 501 * @return The offset after the last character matched 502 * 503 * @throws IllegalStateException 504 * If no match has yet been attempted, 505 * or if the previous match operation failed 506 */ 507 public int end() { 508 if (first < 0) 509 throw new IllegalStateException("No match available"); 510 return last; 511 } 512 513 /** 514 * Returns the offset after the last character of the subsequence 515 * captured by the given group during the previous match operation. 516 * 517 * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left 518 * to right, starting at one. Group zero denotes the entire pattern, so 519 * the expression <i>m.</i><tt>end(0)</tt> is equivalent to 520 * <i>m.</i><tt>end()</tt>. </p> 521 * 522 * @param group 523 * The index of a capturing group in this matcher's pattern 524 * 525 * @return The offset after the last character captured by the group, 526 * or <tt>-1</tt> if the match was successful 527 * but the group itself did not match anything 528 * 529 * @throws IllegalStateException 530 * If no match has yet been attempted, 531 * or if the previous match operation failed 532 * 533 * @throws IndexOutOfBoundsException 534 * If there is no capturing group in the pattern 535 * with the given index 536 */ 537 public int end(int group) { 538 if (first < 0) 539 throw new IllegalStateException("No match available"); 540 if (group < 0 || group > groupCount()) 541 throw new IndexOutOfBoundsException("No group " + group); 542 return groups[group * 2 + 1]; 543 } 544 545 /** 546 * Returns the offset after the last character of the subsequence 547 * captured by the given <a href="Pattern.html#groupname">named-capturing 548 * group</a> during the previous match operation. 549 * 550 * @param name 551 * The name of a named-capturing group in this matcher's pattern 552 * 553 * @return The offset after the last character captured by the group, 554 * or {@code -1} if the match was successful 555 * but the group itself did not match anything 556 * 557 * @throws IllegalStateException 558 * If no match has yet been attempted, 559 * or if the previous match operation failed 560 * 561 * @throws IllegalArgumentException 562 * If there is no capturing group in the pattern 563 * with the given name 564 * @since 1.8 565 */ 566 public int end(String name) { 567 return groups[getMatchedGroupIndex(name) * 2 + 1]; 568 } 569 570 /** 571 * Returns the input subsequence matched by the previous match. 572 * 573 * <p> For a matcher <i>m</i> with input sequence <i>s</i>, 574 * the expressions <i>m.</i><tt>group()</tt> and 575 * <i>s.</i><tt>substring(</tt><i>m.</i><tt>start(),</tt> <i>m.</i><tt>end())</tt> 576 * are equivalent. </p> 577 * 578 * <p> Note that some patterns, for example <tt>a*</tt>, match the empty 579 * string. This method will return the empty string when the pattern 580 * successfully matches the empty string in the input. </p> 581 * 582 * @return The (possibly empty) subsequence matched by the previous match, 583 * in string form 584 * 585 * @throws IllegalStateException 586 * If no match has yet been attempted, 587 * or if the previous match operation failed 588 */ 589 public String group() { 590 return group(0); 591 } 592 593 /** 594 * Returns the input subsequence captured by the given group during the 595 * previous match operation. 596 * 597 * <p> For a matcher <i>m</i>, input sequence <i>s</i>, and group index 598 * <i>g</i>, the expressions <i>m.</i><tt>group(</tt><i>g</i><tt>)</tt> and 599 * <i>s.</i><tt>substring(</tt><i>m.</i><tt>start(</tt><i>g</i><tt>),</tt> <i>m.</i><tt>end(</tt><i>g</i><tt>))</tt> 600 * are equivalent. </p> 601 * 602 * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left 603 * to right, starting at one. Group zero denotes the entire pattern, so 604 * the expression <tt>m.group(0)</tt> is equivalent to <tt>m.group()</tt>. 605 * </p> 606 * 607 * <p> If the match was successful but the group specified failed to match 608 * any part of the input sequence, then <tt>null</tt> is returned. Note 609 * that some groups, for example <tt>(a*)</tt>, match the empty string. 610 * This method will return the empty string when such a group successfully 611 * matches the empty string in the input. </p> 612 * 613 * @param group 614 * The index of a capturing group in this matcher's pattern 615 * 616 * @return The (possibly empty) subsequence captured by the group 617 * during the previous match, or <tt>null</tt> if the group 618 * failed to match part of the input 619 * 620 * @throws IllegalStateException 621 * If no match has yet been attempted, 622 * or if the previous match operation failed 623 * 624 * @throws IndexOutOfBoundsException 625 * If there is no capturing group in the pattern 626 * with the given index 627 */ 628 public String group(int group) { 629 if (first < 0) 630 throw new IllegalStateException("No match found"); 631 if (group < 0 || group > groupCount()) 632 throw new IndexOutOfBoundsException("No group " + group); 633 if ((groups[group*2] == -1) || (groups[group*2+1] == -1)) 634 return null; 635 return getSubSequence(groups[group * 2], groups[group * 2 + 1]).toString(); 636 } 637 638 /** 639 * Returns the input subsequence captured by the given 640 * <a href="Pattern.html#groupname">named-capturing group</a> during the previous 641 * match operation. 642 * 643 * <p> If the match was successful but the group specified failed to match 644 * any part of the input sequence, then <tt>null</tt> is returned. Note 645 * that some groups, for example <tt>(a*)</tt>, match the empty string. 646 * This method will return the empty string when such a group successfully 647 * matches the empty string in the input. </p> 648 * 649 * @param name 650 * The name of a named-capturing group in this matcher's pattern 651 * 652 * @return The (possibly empty) subsequence captured by the named group 653 * during the previous match, or <tt>null</tt> if the group 654 * failed to match part of the input 655 * 656 * @throws IllegalStateException 657 * If no match has yet been attempted, 658 * or if the previous match operation failed 659 * 660 * @throws IllegalArgumentException 661 * If there is no capturing group in the pattern 662 * with the given name 663 * @since 1.7 664 */ 665 public String group(String name) { 666 int group = getMatchedGroupIndex(name); 667 if ((groups[group*2] == -1) || (groups[group*2+1] == -1)) 668 return null; 669 return getSubSequence(groups[group * 2], groups[group * 2 + 1]).toString(); 670 } 671 672 /** 673 * Returns the number of capturing groups in this matcher's pattern. 674 * 675 * <p> Group zero denotes the entire pattern by convention. It is not 676 * included in this count. 677 * 678 * <p> Any non-negative integer smaller than or equal to the value 679 * returned by this method is guaranteed to be a valid group index for 680 * this matcher. </p> 681 * 682 * @return The number of capturing groups in this matcher's pattern 683 */ 684 public int groupCount() { 685 return parentPattern.capturingGroupCount - 1; 686 } 687 688 /** 689 * Attempts to match the entire region against the pattern. 690 * 691 * <p> If the match succeeds then more information can be obtained via the 692 * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods. </p> 693 * 694 * @return <tt>true</tt> if, and only if, the entire region sequence 695 * matches this matcher's pattern 696 */ 697 public boolean matches() { 698 return match(from, ENDANCHOR); 699 } 700 701 /** 702 * Attempts to find the next subsequence of the input sequence that matches 703 * the pattern. 704 * 705 * <p> This method starts at the beginning of this matcher's region, or, if 706 * a previous invocation of the method was successful and the matcher has 707 * not since been reset, at the first character not matched by the previous 708 * match. 709 * 710 * <p> If the match succeeds then more information can be obtained via the 711 * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods. </p> 712 * 713 * @return <tt>true</tt> if, and only if, a subsequence of the input 714 * sequence matches this matcher's pattern 715 */ 716 public boolean find() { 717 int nextSearchIndex = last; 718 if (nextSearchIndex == first) 719 nextSearchIndex++; 720 721 // If next search starts before region, start it at region 722 if (nextSearchIndex < from) 723 nextSearchIndex = from; 724 725 // If next search starts beyond region then it fails 726 if (nextSearchIndex > to) { 727 for (int i = 0; i < groups.length; i++) 728 groups[i] = -1; 729 return false; 730 } 731 return search(nextSearchIndex); 732 } 733 734 /** 735 * Resets this matcher and then attempts to find the next subsequence of 736 * the input sequence that matches the pattern, starting at the specified 737 * index. 738 * 739 * <p> If the match succeeds then more information can be obtained via the 740 * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods, and subsequent 741 * invocations of the {@link #find()} method will start at the first 742 * character not matched by this match. </p> 743 * 744 * @param start the index to start searching for a match 745 * @throws IndexOutOfBoundsException 746 * If start is less than zero or if start is greater than the 747 * length of the input sequence. 748 * 749 * @return <tt>true</tt> if, and only if, a subsequence of the input 750 * sequence starting at the given index matches this matcher's 751 * pattern 752 */ 753 public boolean find(int start) { 754 int limit = getTextLength(); 755 if ((start < 0) || (start > limit)) 756 throw new IndexOutOfBoundsException("Illegal start index"); 757 reset(); 758 return search(start); 759 } 760 761 /** 762 * Attempts to match the input sequence, starting at the beginning of the 763 * region, against the pattern. 764 * 765 * <p> Like the {@link #matches matches} method, this method always starts 766 * at the beginning of the region; unlike that method, it does not 767 * require that the entire region be matched. 768 * 769 * <p> If the match succeeds then more information can be obtained via the 770 * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods. </p> 771 * 772 * @return <tt>true</tt> if, and only if, a prefix of the input 773 * sequence matches this matcher's pattern 774 */ 775 public boolean lookingAt() { 776 return match(from, NOANCHOR); 777 } 778 779 /** 780 * Returns a literal replacement <code>String</code> for the specified 781 * <code>String</code>. 782 * 783 * This method produces a <code>String</code> that will work 784 * as a literal replacement <code>s</code> in the 785 * <code>appendReplacement</code> method of the {@link Matcher} class. 786 * The <code>String</code> produced will match the sequence of characters 787 * in <code>s</code> treated as a literal sequence. Slashes ('\') and 788 * dollar signs ('$') will be given no special meaning. 789 * 790 * @param s The string to be literalized 791 * @return A literal string replacement 792 * @since 1.5 793 */ 794 public static String quoteReplacement(String s) { 795 if ((s.indexOf('\\') == -1) && (s.indexOf('$') == -1)) 796 return s; 797 StringBuilder sb = new StringBuilder(); 798 for (int i=0; i<s.length(); i++) { 799 char c = s.charAt(i); 800 if (c == '\\' || c == '$') { 801 sb.append('\\'); 802 } 803 sb.append(c); 804 } 805 return sb.toString(); 806 } 807 808 /** 809 * Implements a non-terminal append-and-replace step. 810 * 811 * <p> This method performs the following actions: </p> 812 * 813 * <ol> 814 * 815 * <li><p> It reads characters from the input sequence, starting at the 816 * append position, and appends them to the given string buffer. It 817 * stops after reading the last character preceding the previous match, 818 * that is, the character at index {@link 819 * #start()} <tt>-</tt> <tt>1</tt>. </p></li> 820 * 821 * <li><p> It appends the given replacement string to the string buffer. 822 * </p></li> 823 * 824 * <li><p> It sets the append position of this matcher to the index of 825 * the last character matched, plus one, that is, to {@link #end()}. 826 * </p></li> 827 * 828 * </ol> 829 * 830 * <p> The replacement string may contain references to subsequences 831 * captured during the previous match: Each occurrence of 832 * <tt>${</tt><i>name</i><tt>}</tt> or <tt>$</tt><i>g</i> 833 * will be replaced by the result of evaluating the corresponding 834 * {@link #group(String) group(name)} or {@link #group(int) group(g)} 835 * respectively. For <tt>$</tt><i>g</i>, 836 * the first number after the <tt>$</tt> is always treated as part of 837 * the group reference. Subsequent numbers are incorporated into g if 838 * they would form a legal group reference. Only the numerals '0' 839 * through '9' are considered as potential components of the group 840 * reference. If the second group matched the string <tt>"foo"</tt>, for 841 * example, then passing the replacement string <tt>"$2bar"</tt> would 842 * cause <tt>"foobar"</tt> to be appended to the string buffer. A dollar 843 * sign (<tt>$</tt>) may be included as a literal in the replacement 844 * string by preceding it with a backslash (<tt>\$</tt>). 845 * 846 * <p> Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in 847 * the replacement string may cause the results to be different than if it 848 * were being treated as a literal replacement string. Dollar signs may be 849 * treated as references to captured subsequences as described above, and 850 * backslashes are used to escape literal characters in the replacement 851 * string. 852 * 853 * <p> This method is intended to be used in a loop together with the 854 * {@link #appendTail appendTail} and {@link #find find} methods. The 855 * following code, for example, writes <tt>one dog two dogs in the 856 * yard</tt> to the standard-output stream: </p> 857 * 858 * <blockquote><pre> 859 * Pattern p = Pattern.compile("cat"); 860 * Matcher m = p.matcher("one cat two cats in the yard"); 861 * StringBuffer sb = new StringBuffer(); 862 * while (m.find()) { 863 * m.appendReplacement(sb, "dog"); 864 * } 865 * m.appendTail(sb); 866 * System.out.println(sb.toString());</pre></blockquote> 867 * 868 * @param sb 869 * The target string buffer 870 * 871 * @param replacement 872 * The replacement string 873 * 874 * @return This matcher 875 * 876 * @throws IllegalStateException 877 * If no match has yet been attempted, 878 * or if the previous match operation failed 879 * 880 * @throws IllegalArgumentException 881 * If the replacement string refers to a named-capturing 882 * group that does not exist in the pattern 883 * 884 * @throws IndexOutOfBoundsException 885 * If the replacement string refers to a capturing group 886 * that does not exist in the pattern 887 */ 888 public Matcher appendReplacement(StringBuffer sb, String replacement) { 889 // If no match, return error 890 if (first < 0) 891 throw new IllegalStateException("No match available"); 892 StringBuilder result = new StringBuilder(); 893 appendExpandedReplacement(replacement, result); 894 // Append the intervening text 895 sb.append(text, lastAppendPosition, first); 896 // Append the match substitution 897 sb.append(result); 898 lastAppendPosition = last; 899 modCount++; 900 return this; 901 } 902 903 /** 904 * Implements a non-terminal append-and-replace step. 905 * 906 * <p> This method performs the following actions: </p> 907 * 908 * <ol> 909 * 910 * <li><p> It reads characters from the input sequence, starting at the 911 * append position, and appends them to the given string builder. It 912 * stops after reading the last character preceding the previous match, 913 * that is, the character at index {@link 914 * #start()} <tt>-</tt> <tt>1</tt>. </p></li> 915 * 916 * <li><p> It appends the given replacement string to the string builder. 917 * </p></li> 918 * 919 * <li><p> It sets the append position of this matcher to the index of 920 * the last character matched, plus one, that is, to {@link #end()}. 921 * </p></li> 922 * 923 * </ol> 924 * 925 * <p> The replacement string may contain references to subsequences 926 * captured during the previous match: Each occurrence of 927 * <tt>$</tt><i>g</i> will be replaced by the result of 928 * evaluating {@link #group(int) group}<tt>(</tt><i>g</i><tt>)</tt>. 929 * The first number after the <tt>$</tt> is always treated as part of 930 * the group reference. Subsequent numbers are incorporated into g if 931 * they would form a legal group reference. Only the numerals '0' 932 * through '9' are considered as potential components of the group 933 * reference. If the second group matched the string <tt>"foo"</tt>, for 934 * example, then passing the replacement string <tt>"$2bar"</tt> would 935 * cause <tt>"foobar"</tt> to be appended to the string builder. A dollar 936 * sign (<tt>$</tt>) may be included as a literal in the replacement 937 * string by preceding it with a backslash (<tt>\$</tt>). 938 * 939 * <p> Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in 940 * the replacement string may cause the results to be different than if it 941 * were being treated as a literal replacement string. Dollar signs may be 942 * treated as references to captured subsequences as described above, and 943 * backslashes are used to escape literal characters in the replacement 944 * string. 945 * 946 * <p> This method is intended to be used in a loop together with the 947 * {@link #appendTail appendTail} and {@link #find find} methods. The 948 * following code, for example, writes <tt>one dog two dogs in the 949 * yard</tt> to the standard-output stream: </p> 950 * 951 * <blockquote><pre> 952 * Pattern p = Pattern.compile("cat"); 953 * Matcher m = p.matcher("one cat two cats in the yard"); 954 * StringBuilder sb = new StringBuilder(); 955 * while (m.find()) { 956 * m.appendReplacement(sb, "dog"); 957 * } 958 * m.appendTail(sb); 959 * System.out.println(sb.toString());</pre></blockquote> 960 * 961 * @param sb 962 * The target string builder 963 * @param replacement 964 * The replacement string 965 * @return This matcher 966 * 967 * @throws IllegalStateException 968 * If no match has yet been attempted, 969 * or if the previous match operation failed 970 * @throws IllegalArgumentException 971 * If the replacement string refers to a named-capturing 972 * group that does not exist in the pattern 973 * @throws IndexOutOfBoundsException 974 * If the replacement string refers to a capturing group 975 * that does not exist in the pattern 976 * @since 1.9 977 */ 978 public Matcher appendReplacement(StringBuilder sb, String replacement) { 979 // If no match, return error 980 if (first < 0) 981 throw new IllegalStateException("No match available"); 982 StringBuilder result = new StringBuilder(); 983 appendExpandedReplacement(replacement, result); 984 // Append the intervening text 985 sb.append(text, lastAppendPosition, first); 986 // Append the match substitution 987 sb.append(result); 988 lastAppendPosition = last; 989 modCount++; 990 return this; 991 } 992 993 /** 994 * Processes replacement string to replace group references with 995 * groups. 996 */ 997 private StringBuilder appendExpandedReplacement( 998 String replacement, StringBuilder result) { 999 int cursor = 0; 1000 while (cursor < replacement.length()) { 1001 char nextChar = replacement.charAt(cursor); 1002 if (nextChar == '\\') { 1003 cursor++; 1004 if (cursor == replacement.length()) 1005 throw new IllegalArgumentException( 1006 "character to be escaped is missing"); 1007 nextChar = replacement.charAt(cursor); 1008 result.append(nextChar); 1009 cursor++; 1010 } else if (nextChar == '$') { 1011 // Skip past $ 1012 cursor++; 1013 // Throw IAE if this "$" is the last character in replacement 1014 if (cursor == replacement.length()) 1015 throw new IllegalArgumentException( 1016 "Illegal group reference: group index is missing"); 1017 nextChar = replacement.charAt(cursor); 1018 int refNum = -1; 1019 if (nextChar == '{') { 1020 cursor++; 1021 StringBuilder gsb = new StringBuilder(); 1022 while (cursor < replacement.length()) { 1023 nextChar = replacement.charAt(cursor); 1024 if (ASCII.isLower(nextChar) || 1025 ASCII.isUpper(nextChar) || 1026 ASCII.isDigit(nextChar)) { 1027 gsb.append(nextChar); 1028 cursor++; 1029 } else { 1030 break; 1031 } 1032 } 1033 if (gsb.length() == 0) 1034 throw new IllegalArgumentException( 1035 "named capturing group has 0 length name"); 1036 if (nextChar != '}') 1037 throw new IllegalArgumentException( 1038 "named capturing group is missing trailing '}'"); 1039 String gname = gsb.toString(); 1040 if (ASCII.isDigit(gname.charAt(0))) 1041 throw new IllegalArgumentException( 1042 "capturing group name {" + gname + 1043 "} starts with digit character"); 1044 if (!parentPattern.namedGroups().containsKey(gname)) 1045 throw new IllegalArgumentException( 1046 "No group with name {" + gname + "}"); 1047 refNum = parentPattern.namedGroups().get(gname); 1048 cursor++; 1049 } else { 1050 // The first number is always a group 1051 refNum = nextChar - '0'; 1052 if ((refNum < 0) || (refNum > 9)) 1053 throw new IllegalArgumentException( 1054 "Illegal group reference"); 1055 cursor++; 1056 // Capture the largest legal group string 1057 boolean done = false; 1058 while (!done) { 1059 if (cursor >= replacement.length()) { 1060 break; 1061 } 1062 int nextDigit = replacement.charAt(cursor) - '0'; 1063 if ((nextDigit < 0) || (nextDigit > 9)) { // not a number 1064 break; 1065 } 1066 int newRefNum = (refNum * 10) + nextDigit; 1067 if (groupCount() < newRefNum) { 1068 done = true; 1069 } else { 1070 refNum = newRefNum; 1071 cursor++; 1072 } 1073 } 1074 } 1075 // Append group 1076 if (start(refNum) != -1 && end(refNum) != -1) 1077 result.append(text, start(refNum), end(refNum)); 1078 } else { 1079 result.append(nextChar); 1080 cursor++; 1081 } 1082 } 1083 return result; 1084 } 1085 1086 /** 1087 * Implements a terminal append-and-replace step. 1088 * 1089 * <p> This method reads characters from the input sequence, starting at 1090 * the append position, and appends them to the given string buffer. It is 1091 * intended to be invoked after one or more invocations of the {@link 1092 * #appendReplacement appendReplacement} method in order to copy the 1093 * remainder of the input sequence. </p> 1094 * 1095 * @param sb 1096 * The target string buffer 1097 * 1098 * @return The target string buffer 1099 */ 1100 public StringBuffer appendTail(StringBuffer sb) { 1101 sb.append(text, lastAppendPosition, getTextLength()); 1102 return sb; 1103 } 1104 1105 /** 1106 * Implements a terminal append-and-replace step. 1107 * 1108 * <p> This method reads characters from the input sequence, starting at 1109 * the append position, and appends them to the given string builder. It is 1110 * intended to be invoked after one or more invocations of the {@link 1111 * #appendReplacement appendReplacement} method in order to copy the 1112 * remainder of the input sequence. </p> 1113 * 1114 * @param sb 1115 * The target string builder 1116 * 1117 * @return The target string builder 1118 * 1119 * @since 1.9 1120 */ 1121 public StringBuilder appendTail(StringBuilder sb) { 1122 sb.append(text, lastAppendPosition, getTextLength()); 1123 return sb; 1124 } 1125 1126 /** 1127 * Replaces every subsequence of the input sequence that matches the 1128 * pattern with the given replacement string. 1129 * 1130 * <p> This method first resets this matcher. It then scans the input 1131 * sequence looking for matches of the pattern. Characters that are not 1132 * part of any match are appended directly to the result string; each match 1133 * is replaced in the result by the replacement string. The replacement 1134 * string may contain references to captured subsequences as in the {@link 1135 * #appendReplacement appendReplacement} method. 1136 * 1137 * <p> Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in 1138 * the replacement string may cause the results to be different than if it 1139 * were being treated as a literal replacement string. Dollar signs may be 1140 * treated as references to captured subsequences as described above, and 1141 * backslashes are used to escape literal characters in the replacement 1142 * string. 1143 * 1144 * <p> Given the regular expression <tt>a*b</tt>, the input 1145 * <tt>"aabfooaabfooabfoob"</tt>, and the replacement string 1146 * <tt>"-"</tt>, an invocation of this method on a matcher for that 1147 * expression would yield the string <tt>"-foo-foo-foo-"</tt>. 1148 * 1149 * <p> Invoking this method changes this matcher's state. If the matcher 1150 * is to be used in further matching operations then it should first be 1151 * reset. </p> 1152 * 1153 * @param replacement 1154 * The replacement string 1155 * 1156 * @return The string constructed by replacing each matching subsequence 1157 * by the replacement string, substituting captured subsequences 1158 * as needed 1159 */ 1160 public String replaceAll(String replacement) { 1161 reset(); 1162 boolean result = find(); 1163 if (result) { 1164 StringBuilder sb = new StringBuilder(); 1165 do { 1166 appendReplacement(sb, replacement); 1167 result = find(); 1168 } while (result); 1169 appendTail(sb); 1170 return sb.toString(); 1171 } 1172 return text.toString(); 1173 } 1174 1175 /** 1176 * Replaces every subsequence of the input sequence that matches the 1177 * pattern with the result of applying the given replacer function to the 1178 * match result of this matcher corresponding to that subsequence. 1179 * Exceptions thrown by the function are relayed to the caller. 1180 * 1181 * <p> This method first resets this matcher. It then scans the input 1182 * sequence looking for matches of the pattern. Characters that are not 1183 * part of any match are appended directly to the result string; each match 1184 * is replaced in the result by the applying the replacer function that 1185 * returns a replacement string. Each replacement string may contain 1186 * references to captured subsequences as in the {@link #appendReplacement 1187 * appendReplacement} method. 1188 * 1189 * <p> Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in 1190 * a replacement string may cause the results to be different than if it 1191 * were being treated as a literal replacement string. Dollar signs may be 1192 * treated as references to captured subsequences as described above, and 1193 * backslashes are used to escape literal characters in the replacement 1194 * string. 1195 * 1196 * <p> Given the regular expression <tt>dog</tt>, the input 1197 * <tt>"zzzdogzzzdogzzz"</tt>, and the function 1198 * {@code mr -> mr.group().toUpperCase()}, an invocation of this method on 1199 * a matcher for that expression would yield the string 1200 * <tt>"zzzDOGzzzDOGzzz"</tt>. 1201 * 1202 * <p> Invoking this method changes this matcher's state. If the matcher 1203 * is to be used in further matching operations then it should first be 1204 * reset. </p> 1205 * 1206 * <p> The replacer function should not modify this matcher's state during 1207 * replacement. This method will, on a best-effort basis, throw a 1208 * {@link java.util.ConcurrentModificationException} if such modification is 1209 * detected. 1210 * 1211 * <p> The state of each match result passed to the replacer function is 1212 * guaranteed to be constant only for the duration of the replacer function 1213 * call and only if the replacer function does not modify this matcher's 1214 * state. 1215 * 1216 * @implNote 1217 * This implementation applies the replacer function to this matcher, which 1218 * is an instance of {@code MatchResult}. 1219 * 1220 * @param replacer 1221 * The function to be applied to the match result of this matcher 1222 * that returns a replacement string. 1223 * @return The string constructed by replacing each matching subsequence 1224 * with the result of applying the replacer function to that 1225 * matched subsequence, substituting captured subsequences as 1226 * needed. 1227 * @throws NullPointerException if the replacer function is null 1228 * @throws ConcurrentModificationException if it is detected, on a 1229 * best-effort basis, that the replacer function modified this 1230 * matcher's state 1231 * @since 1.9 1232 */ 1233 public String replaceAll(Function<MatchResult, String> replacer) { 1234 Objects.requireNonNull(replacer); 1235 reset(); 1236 boolean result = find(); 1237 if (result) { 1238 StringBuilder sb = new StringBuilder(); 1239 do { 1240 int ec = modCount; 1241 String replacement = replacer.apply(this); 1242 if (ec != modCount) 1243 throw new ConcurrentModificationException(); 1244 appendReplacement(sb, replacement); 1245 result = find(); 1246 } while (result); 1247 appendTail(sb); 1248 return sb.toString(); 1249 } 1250 return text.toString(); 1251 } 1252 1253 /** 1254 * Returns a stream of match results for each subsequence of the input 1255 * sequence that matches the pattern. The match results occur in the 1256 * same order as the matching subsequences in the input sequence. 1257 * 1258 * <p> Each match result is produced as if by {@link #toMatchResult()}. 1259 * 1260 * <p> This method does not reset this matcher. Matching starts on 1261 * initiation of the terminal stream operation either at the beginning of 1262 * this matcher's region, or, if the matcher has not since been reset, at 1263 * the first character not matched by a previous match. 1264 * 1265 * <p> If the matcher is to be used for further matching operations after 1266 * the terminal stream operation completes then it should be first reset. 1267 * 1268 * <p> This matcher's state should not be modified during execution of the 1269 * returned stream's pipeline. The returned stream's source 1270 * {@code Spliterator} is <em>fail-fast</em> and will, on a best-effort 1271 * basis, throw a {@link java.util.ConcurrentModificationException} if such 1272 * modification is detected. 1273 * 1274 * @return a sequential stream of match results. 1275 * @since 1.9 1276 */ 1277 public Stream<MatchResult> results() { 1278 class MatchResultIterator implements Iterator<MatchResult> { 1279 // -ve for call to find, 0 for not found, 1 for found 1280 int state = -1; 1281 // State for concurrent modification checking 1282 // -1 for uninitialized 1283 int expectedCount = -1; 1284 // The input sequence as a string, set once only after first find 1285 // Avoids repeated conversion from CharSequence for each match 1286 String textAsString; 1287 1288 @Override 1289 public MatchResult next() { 1290 if (expectedCount >= 0 && expectedCount != modCount) 1291 throw new ConcurrentModificationException(); 1292 1293 if (!hasNext()) 1294 throw new NoSuchElementException(); 1295 1296 state = -1; 1297 return toMatchResult(textAsString); 1298 } 1299 1300 @Override 1301 public boolean hasNext() { 1302 if (state >= 0) 1303 return state == 1; 1304 1305 // Defer throwing ConcurrentModificationException to when next 1306 // or forEachRemaining is called. The is consistent with other 1307 // fail-fast implementations. 1308 if (expectedCount >= 0 && expectedCount != modCount) 1309 return true; 1310 1311 boolean found = find(); 1312 // Capture the input sequence as a string on first find 1313 if (found && state < 0) 1314 textAsString = text.toString(); 1315 state = found ? 1 : 0; 1316 expectedCount = modCount; 1317 return found; 1318 } 1319 1320 @Override 1321 public void forEachRemaining(Consumer<? super MatchResult> action) { 1322 if (expectedCount >= 0 && expectedCount != modCount) 1323 throw new ConcurrentModificationException(); 1324 1325 int s = state; 1326 if (s == 0) 1327 return; 1328 1329 // Set state to report no more elements on further operations 1330 state = 0; 1331 expectedCount = -1; 1332 1333 // Perform a first find if required 1334 if (s < 0 && !find()) 1335 return; 1336 1337 // Capture the input sequence as a string on first find 1338 textAsString = text.toString(); 1339 1340 do { 1341 int ec = modCount; 1342 action.accept(toMatchResult(textAsString)); 1343 if (ec != modCount) 1344 throw new ConcurrentModificationException(); 1345 } while (find()); 1346 } 1347 } 1348 return StreamSupport.stream(Spliterators.spliteratorUnknownSize( 1349 new MatchResultIterator(), Spliterator.ORDERED | Spliterator.NONNULL), false); 1350 } 1351 1352 /** 1353 * Replaces the first subsequence of the input sequence that matches the 1354 * pattern with the given replacement string. 1355 * 1356 * <p> This method first resets this matcher. It then scans the input 1357 * sequence looking for a match of the pattern. Characters that are not 1358 * part of the match are appended directly to the result string; the match 1359 * is replaced in the result by the replacement string. The replacement 1360 * string may contain references to captured subsequences as in the {@link 1361 * #appendReplacement appendReplacement} method. 1362 * 1363 * <p>Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in 1364 * the replacement string may cause the results to be different than if it 1365 * were being treated as a literal replacement string. Dollar signs may be 1366 * treated as references to captured subsequences as described above, and 1367 * backslashes are used to escape literal characters in the replacement 1368 * string. 1369 * 1370 * <p> Given the regular expression <tt>dog</tt>, the input 1371 * <tt>"zzzdogzzzdogzzz"</tt>, and the replacement string 1372 * <tt>"cat"</tt>, an invocation of this method on a matcher for that 1373 * expression would yield the string <tt>"zzzcatzzzdogzzz"</tt>. </p> 1374 * 1375 * <p> Invoking this method changes this matcher's state. If the matcher 1376 * is to be used in further matching operations then it should first be 1377 * reset. </p> 1378 * 1379 * @param replacement 1380 * The replacement string 1381 * @return The string constructed by replacing the first matching 1382 * subsequence by the replacement string, substituting captured 1383 * subsequences as needed 1384 */ 1385 public String replaceFirst(String replacement) { 1386 if (replacement == null) 1387 throw new NullPointerException("replacement"); 1388 reset(); 1389 if (!find()) 1390 return text.toString(); 1391 StringBuilder sb = new StringBuilder(); 1392 appendReplacement(sb, replacement); 1393 appendTail(sb); 1394 return sb.toString(); 1395 } 1396 1397 /** 1398 * Replaces the first subsequence of the input sequence that matches the 1399 * pattern with the result of applying the given replacer function to the 1400 * match result of this matcher corresponding to that subsequence. 1401 * Exceptions thrown by the replace function are relayed to the caller. 1402 * 1403 * <p> This method first resets this matcher. It then scans the input 1404 * sequence looking for a match of the pattern. Characters that are not 1405 * part of the match are appended directly to the result string; the match 1406 * is replaced in the result by the applying the replacer function that 1407 * returns a replacement string. The replacement string may contain 1408 * references to captured subsequences as in the {@link #appendReplacement 1409 * appendReplacement} method. 1410 * 1411 * <p>Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in 1412 * the replacement string may cause the results to be different than if it 1413 * were being treated as a literal replacement string. Dollar signs may be 1414 * treated as references to captured subsequences as described above, and 1415 * backslashes are used to escape literal characters in the replacement 1416 * string. 1417 * 1418 * <p> Given the regular expression <tt>dog</tt>, the input 1419 * <tt>"zzzdogzzzdogzzz"</tt>, and the function 1420 * {@code mr -> mr.group().toUpperCase()}, an invocation of this method on 1421 * a matcher for that expression would yield the string 1422 * <tt>"zzzDOGzzzdogzzz"</tt>. 1423 * 1424 * <p> Invoking this method changes this matcher's state. If the matcher 1425 * is to be used in further matching operations then it should first be 1426 * reset. 1427 * 1428 * <p> The replacer function should not modify this matcher's state during 1429 * replacement. This method will, on a best-effort basis, throw a 1430 * {@link java.util.ConcurrentModificationException} if such modification is 1431 * detected. 1432 * 1433 * <p> The state of the match result passed to the replacer function is 1434 * guaranteed to be constant only for the duration of the replacer function 1435 * call and only if the replacer function does not modify this matcher's 1436 * state. 1437 * 1438 * @implNote 1439 * This implementation applies the replacer function to this matcher, which 1440 * is an instance of {@code MatchResult}. 1441 * 1442 * @param replacer 1443 * The function to be applied to the match result of this matcher 1444 * that returns a replacement string. 1445 * @return The string constructed by replacing the first matching 1446 * subsequence with the result of applying the replacer function to 1447 * the matched subsequence, substituting captured subsequences as 1448 * needed. 1449 * @throws NullPointerException if the replacer function is null 1450 * @throws ConcurrentModificationException if it is detected, on a 1451 * best-effort basis, that the replacer function modified this 1452 * matcher's state 1453 * @since 1.9 1454 */ 1455 public String replaceFirst(Function<MatchResult, String> replacer) { 1456 Objects.requireNonNull(replacer); 1457 reset(); 1458 if (!find()) 1459 return text.toString(); 1460 StringBuilder sb = new StringBuilder(); 1461 int ec = modCount; 1462 String replacement = replacer.apply(this); 1463 if (ec != modCount) 1464 throw new ConcurrentModificationException(); 1465 appendReplacement(sb, replacement); 1466 appendTail(sb); 1467 return sb.toString(); 1468 } 1469 1470 /** 1471 * Sets the limits of this matcher's region. The region is the part of the 1472 * input sequence that will be searched to find a match. Invoking this 1473 * method resets the matcher, and then sets the region to start at the 1474 * index specified by the <code>start</code> parameter and end at the 1475 * index specified by the <code>end</code> parameter. 1476 * 1477 * <p>Depending on the transparency and anchoring being used (see 1478 * {@link #useTransparentBounds useTransparentBounds} and 1479 * {@link #useAnchoringBounds useAnchoringBounds}), certain constructs such 1480 * as anchors may behave differently at or around the boundaries of the 1481 * region. 1482 * 1483 * @param start 1484 * The index to start searching at (inclusive) 1485 * @param end 1486 * The index to end searching at (exclusive) 1487 * @throws IndexOutOfBoundsException 1488 * If start or end is less than zero, if 1489 * start is greater than the length of the input sequence, if 1490 * end is greater than the length of the input sequence, or if 1491 * start is greater than end. 1492 * @return this matcher 1493 * @since 1.5 1494 */ 1495 public Matcher region(int start, int end) { 1496 if ((start < 0) || (start > getTextLength())) 1497 throw new IndexOutOfBoundsException("start"); 1498 if ((end < 0) || (end > getTextLength())) 1499 throw new IndexOutOfBoundsException("end"); 1500 if (start > end) 1501 throw new IndexOutOfBoundsException("start > end"); 1502 reset(); 1503 from = start; 1504 to = end; 1505 return this; 1506 } 1507 1508 /** 1509 * Reports the start index of this matcher's region. The 1510 * searches this matcher conducts are limited to finding matches 1511 * within {@link #regionStart regionStart} (inclusive) and 1512 * {@link #regionEnd regionEnd} (exclusive). 1513 * 1514 * @return The starting point of this matcher's region 1515 * @since 1.5 1516 */ 1517 public int regionStart() { 1518 return from; 1519 } 1520 1521 /** 1522 * Reports the end index (exclusive) of this matcher's region. 1523 * The searches this matcher conducts are limited to finding matches 1524 * within {@link #regionStart regionStart} (inclusive) and 1525 * {@link #regionEnd regionEnd} (exclusive). 1526 * 1527 * @return the ending point of this matcher's region 1528 * @since 1.5 1529 */ 1530 public int regionEnd() { 1531 return to; 1532 } 1533 1534 /** 1535 * Queries the transparency of region bounds for this matcher. 1536 * 1537 * <p> This method returns <tt>true</tt> if this matcher uses 1538 * <i>transparent</i> bounds, <tt>false</tt> if it uses <i>opaque</i> 1539 * bounds. 1540 * 1541 * <p> See {@link #useTransparentBounds useTransparentBounds} for a 1542 * description of transparent and opaque bounds. 1543 * 1544 * <p> By default, a matcher uses opaque region boundaries. 1545 * 1546 * @return <tt>true</tt> iff this matcher is using transparent bounds, 1547 * <tt>false</tt> otherwise. 1548 * @see java.util.regex.Matcher#useTransparentBounds(boolean) 1549 * @since 1.5 1550 */ 1551 public boolean hasTransparentBounds() { 1552 return transparentBounds; 1553 } 1554 1555 /** 1556 * Sets the transparency of region bounds for this matcher. 1557 * 1558 * <p> Invoking this method with an argument of <tt>true</tt> will set this 1559 * matcher to use <i>transparent</i> bounds. If the boolean 1560 * argument is <tt>false</tt>, then <i>opaque</i> bounds will be used. 1561 * 1562 * <p> Using transparent bounds, the boundaries of this 1563 * matcher's region are transparent to lookahead, lookbehind, 1564 * and boundary matching constructs. Those constructs can see beyond the 1565 * boundaries of the region to see if a match is appropriate. 1566 * 1567 * <p> Using opaque bounds, the boundaries of this matcher's 1568 * region are opaque to lookahead, lookbehind, and boundary matching 1569 * constructs that may try to see beyond them. Those constructs cannot 1570 * look past the boundaries so they will fail to match anything outside 1571 * of the region. 1572 * 1573 * <p> By default, a matcher uses opaque bounds. 1574 * 1575 * @param b a boolean indicating whether to use opaque or transparent 1576 * regions 1577 * @return this matcher 1578 * @see java.util.regex.Matcher#hasTransparentBounds 1579 * @since 1.5 1580 */ 1581 public Matcher useTransparentBounds(boolean b) { 1582 transparentBounds = b; 1583 return this; 1584 } 1585 1586 /** 1587 * Queries the anchoring of region bounds for this matcher. 1588 * 1589 * <p> This method returns <tt>true</tt> if this matcher uses 1590 * <i>anchoring</i> bounds, <tt>false</tt> otherwise. 1591 * 1592 * <p> See {@link #useAnchoringBounds useAnchoringBounds} for a 1593 * description of anchoring bounds. 1594 * 1595 * <p> By default, a matcher uses anchoring region boundaries. 1596 * 1597 * @return <tt>true</tt> iff this matcher is using anchoring bounds, 1598 * <tt>false</tt> otherwise. 1599 * @see java.util.regex.Matcher#useAnchoringBounds(boolean) 1600 * @since 1.5 1601 */ 1602 public boolean hasAnchoringBounds() { 1603 return anchoringBounds; 1604 } 1605 1606 /** 1607 * Sets the anchoring of region bounds for this matcher. 1608 * 1609 * <p> Invoking this method with an argument of <tt>true</tt> will set this 1610 * matcher to use <i>anchoring</i> bounds. If the boolean 1611 * argument is <tt>false</tt>, then <i>non-anchoring</i> bounds will be 1612 * used. 1613 * 1614 * <p> Using anchoring bounds, the boundaries of this 1615 * matcher's region match anchors such as ^ and $. 1616 * 1617 * <p> Without anchoring bounds, the boundaries of this 1618 * matcher's region will not match anchors such as ^ and $. 1619 * 1620 * <p> By default, a matcher uses anchoring region boundaries. 1621 * 1622 * @param b a boolean indicating whether or not to use anchoring bounds. 1623 * @return this matcher 1624 * @see java.util.regex.Matcher#hasAnchoringBounds 1625 * @since 1.5 1626 */ 1627 public Matcher useAnchoringBounds(boolean b) { 1628 anchoringBounds = b; 1629 return this; 1630 } 1631 1632 /** 1633 * <p>Returns the string representation of this matcher. The 1634 * string representation of a <code>Matcher</code> contains information 1635 * that may be useful for debugging. The exact format is unspecified. 1636 * 1637 * @return The string representation of this matcher 1638 * @since 1.5 1639 */ 1640 public String toString() { 1641 StringBuilder sb = new StringBuilder(); 1642 sb.append("java.util.regex.Matcher") 1643 .append("[pattern=").append(pattern()) 1644 .append(" region=") 1645 .append(regionStart()).append(',').append(regionEnd()) 1646 .append(" lastmatch="); 1647 if ((first >= 0) && (group() != null)) { 1648 sb.append(group()); 1649 } 1650 sb.append(']'); 1651 return sb.toString(); 1652 } 1653 1654 /** 1655 * <p>Returns true if the end of input was hit by the search engine in 1656 * the last match operation performed by this matcher. 1657 * 1658 * <p>When this method returns true, then it is possible that more input 1659 * would have changed the result of the last search. 1660 * 1661 * @return true iff the end of input was hit in the last match; false 1662 * otherwise 1663 * @since 1.5 1664 */ 1665 public boolean hitEnd() { 1666 return hitEnd; 1667 } 1668 1669 /** 1670 * <p>Returns true if more input could change a positive match into a 1671 * negative one. 1672 * 1673 * <p>If this method returns true, and a match was found, then more 1674 * input could cause the match to be lost. If this method returns false 1675 * and a match was found, then more input might change the match but the 1676 * match won't be lost. If a match was not found, then requireEnd has no 1677 * meaning. 1678 * 1679 * @return true iff more input could change a positive match into a 1680 * negative one. 1681 * @since 1.5 1682 */ 1683 public boolean requireEnd() { 1684 return requireEnd; 1685 } 1686 1687 /** 1688 * Initiates a search to find a Pattern within the given bounds. 1689 * The groups are filled with default values and the match of the root 1690 * of the state machine is called. The state machine will hold the state 1691 * of the match as it proceeds in this matcher. 1692 * 1693 * Matcher.from is not set here, because it is the "hard" boundary 1694 * of the start of the search which anchors will set to. The from param 1695 * is the "soft" boundary of the start of the search, meaning that the 1696 * regex tries to match at that index but ^ won't match there. Subsequent 1697 * calls to the search methods start at a new "soft" boundary which is 1698 * the end of the previous match. 1699 */ 1700 boolean search(int from) { 1701 this.hitEnd = false; 1702 this.requireEnd = false; 1703 from = from < 0 ? 0 : from; 1704 this.first = from; 1705 this.oldLast = oldLast < 0 ? from : oldLast; 1706 for (int i = 0; i < groups.length; i++) 1707 groups[i] = -1; 1708 acceptMode = NOANCHOR; 1709 boolean result = parentPattern.root.match(this, from, text); 1710 if (!result) 1711 this.first = -1; 1712 this.oldLast = this.last; 1713 this.modCount++; 1714 return result; 1715 } 1716 1717 /** 1718 * Initiates a search for an anchored match to a Pattern within the given 1719 * bounds. The groups are filled with default values and the match of the 1720 * root of the state machine is called. The state machine will hold the 1721 * state of the match as it proceeds in this matcher. 1722 */ 1723 boolean match(int from, int anchor) { 1724 this.hitEnd = false; 1725 this.requireEnd = false; 1726 from = from < 0 ? 0 : from; 1727 this.first = from; 1728 this.oldLast = oldLast < 0 ? from : oldLast; 1729 for (int i = 0; i < groups.length; i++) 1730 groups[i] = -1; 1731 acceptMode = anchor; 1732 boolean result = parentPattern.matchRoot.match(this, from, text); 1733 if (!result) 1734 this.first = -1; 1735 this.oldLast = this.last; 1736 this.modCount++; 1737 return result; 1738 } 1739 1740 /** 1741 * Returns the end index of the text. 1742 * 1743 * @return the index after the last character in the text 1744 */ 1745 int getTextLength() { 1746 return text.length(); 1747 } 1748 1749 /** 1750 * Generates a String from this Matcher's input in the specified range. 1751 * 1752 * @param beginIndex the beginning index, inclusive 1753 * @param endIndex the ending index, exclusive 1754 * @return A String generated from this Matcher's input 1755 */ 1756 CharSequence getSubSequence(int beginIndex, int endIndex) { 1757 return text.subSequence(beginIndex, endIndex); 1758 } 1759 1760 /** 1761 * Returns this Matcher's input character at index i. 1762 * 1763 * @return A char from the specified index 1764 */ 1765 char charAt(int i) { 1766 return text.charAt(i); 1767 } 1768 1769 /** 1770 * Returns the group index of the matched capturing group. 1771 * 1772 * @return the index of the named-capturing group 1773 */ 1774 int getMatchedGroupIndex(String name) { 1775 Objects.requireNonNull(name, "Group name"); 1776 if (first < 0) 1777 throw new IllegalStateException("No match found"); 1778 if (!parentPattern.namedGroups().containsKey(name)) 1779 throw new IllegalArgumentException("No group with name <" + name + ">"); 1780 return parentPattern.namedGroups().get(name); 1781 } 1782 }