1 /* 2 * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.util.regex; 27 28 import java.util.ConcurrentModificationException; 29 import java.util.Iterator; 30 import java.util.NoSuchElementException; 31 import java.util.Objects; 32 import java.util.Spliterator; 33 import java.util.Spliterators; 34 import java.util.function.Consumer; 35 import java.util.function.Function; 36 import java.util.stream.Stream; 37 import java.util.stream.StreamSupport; 38 39 /** 40 * An engine that performs match operations on a {@linkplain java.lang.CharSequence 41 * character sequence} by interpreting a {@link Pattern}. 42 * 43 * <p> A matcher is created from a pattern by invoking the pattern's {@link 44 * Pattern#matcher matcher} method. Once created, a matcher can be used to 45 * perform three different kinds of match operations: 46 * 47 * <ul> 48 * 49 * <li><p> The {@link #matches matches} method attempts to match the entire 50 * input sequence against the pattern. </p></li> 51 * 52 * <li><p> The {@link #lookingAt lookingAt} method attempts to match the 53 * input sequence, starting at the beginning, against the pattern. </p></li> 54 * 55 * <li><p> The {@link #find find} method scans the input sequence looking for 56 * the next subsequence that matches the pattern. </p></li> 57 * 58 * </ul> 59 * 60 * <p> Each of these methods returns a boolean indicating success or failure. 61 * More information about a successful match can be obtained by querying the 62 * state of the matcher. 63 * 64 * <p> A matcher finds matches in a subset of its input called the 65 * <i>region</i>. By default, the region contains all of the matcher's input. 66 * The region can be modified via the {@link #region region} method and queried 67 * via the {@link #regionStart regionStart} and {@link #regionEnd regionEnd} 68 * methods. The way that the region boundaries interact with some pattern 69 * constructs can be changed. See {@link #useAnchoringBounds 70 * useAnchoringBounds} and {@link #useTransparentBounds useTransparentBounds} 71 * for more details. 72 * 73 * <p> This class also defines methods for replacing matched subsequences with 74 * new strings whose contents can, if desired, be computed from the match 75 * result. The {@link #appendReplacement appendReplacement} and {@link 76 * #appendTail appendTail} methods can be used in tandem in order to collect 77 * the result into an existing string buffer or string builder. Alternatively, 78 * the more convenient {@link #replaceAll replaceAll} method can be used to 79 * create a string in which every matching subsequence in the input sequence 80 * is replaced. 81 * 82 * <p> The explicit state of a matcher includes the start and end indices of 83 * the most recent successful match. It also includes the start and end 84 * indices of the input subsequence captured by each <a 85 * href="Pattern.html#cg">capturing group</a> in the pattern as well as a total 86 * count of such subsequences. As a convenience, methods are also provided for 87 * returning these captured subsequences in string form. 88 * 89 * <p> The explicit state of a matcher is initially undefined; attempting to 90 * query any part of it before a successful match will cause an {@link 91 * IllegalStateException} to be thrown. The explicit state of a matcher is 92 * recomputed by every match operation. 93 * 94 * <p> The implicit state of a matcher includes the input character sequence as 95 * well as the <i>append position</i>, which is initially zero and is updated 96 * by the {@link #appendReplacement appendReplacement} method. 97 * 98 * <p> A matcher may be reset explicitly by invoking its {@link #reset()} 99 * method or, if a new input sequence is desired, its {@link 100 * #reset(java.lang.CharSequence) reset(CharSequence)} method. Resetting a 101 * matcher discards its explicit state information and sets the append position 102 * to zero. 103 * 104 * <p> Instances of this class are not safe for use by multiple concurrent 105 * threads. </p> 106 * 107 * 108 * @author Mike McCloskey 109 * @author Mark Reinhold 110 * @author JSR-51 Expert Group 111 * @since 1.4 112 * @spec JSR-51 113 */ 114 115 public final class Matcher implements MatchResult { 116 117 /** 118 * The Pattern object that created this Matcher. 119 */ 120 Pattern parentPattern; 121 122 /** 123 * The storage used by groups. They may contain invalid values if 124 * a group was skipped during the matching. 125 */ 126 int[] groups; 127 128 /** 129 * The range within the sequence that is to be matched. Anchors 130 * will match at these "hard" boundaries. Changing the region 131 * changes these values. 132 */ 133 int from, to; 134 135 /** 136 * Lookbehind uses this value to ensure that the subexpression 137 * match ends at the point where the lookbehind was encountered. 138 */ 139 int lookbehindTo; 140 141 /** 142 * The original string being matched. 143 */ 144 CharSequence text; 145 146 /** 147 * Matcher state used by the last node. NOANCHOR is used when a 148 * match does not have to consume all of the input. ENDANCHOR is 149 * the mode used for matching all the input. 150 */ 151 static final int ENDANCHOR = 1; 152 static final int NOANCHOR = 0; 153 int acceptMode = NOANCHOR; 154 155 /** 156 * The range of string that last matched the pattern. If the last 157 * match failed then first is -1; last initially holds 0 then it 158 * holds the index of the end of the last match (which is where the 159 * next search starts). 160 */ 161 int first = -1, last = 0; 162 163 /** 164 * The end index of what matched in the last match operation. 165 */ 166 int oldLast = -1; 167 168 /** 169 * The index of the last position appended in a substitution. 170 */ 171 int lastAppendPosition = 0; 172 173 /** 174 * Storage used by nodes to tell what repetition they are on in 175 * a pattern, and where groups begin. The nodes themselves are stateless, 176 * so they rely on this field to hold state during a match. 177 */ 178 int[] locals; 179 180 /** 181 * Boolean indicating whether or not more input could change 182 * the results of the last match. 183 * 184 * If hitEnd is true, and a match was found, then more input 185 * might cause a different match to be found. 186 * If hitEnd is true and a match was not found, then more 187 * input could cause a match to be found. 188 * If hitEnd is false and a match was found, then more input 189 * will not change the match. 190 * If hitEnd is false and a match was not found, then more 191 * input will not cause a match to be found. 192 */ 193 boolean hitEnd; 194 195 /** 196 * Boolean indicating whether or not more input could change 197 * a positive match into a negative one. 198 * 199 * If requireEnd is true, and a match was found, then more 200 * input could cause the match to be lost. 201 * If requireEnd is false and a match was found, then more 202 * input might change the match but the match won't be lost. 203 * If a match was not found, then requireEnd has no meaning. 204 */ 205 boolean requireEnd; 206 207 /** 208 * If transparentBounds is true then the boundaries of this 209 * matcher's region are transparent to lookahead, lookbehind, 210 * and boundary matching constructs that try to see beyond them. 211 */ 212 boolean transparentBounds = false; 213 214 /** 215 * If anchoringBounds is true then the boundaries of this 216 * matcher's region match anchors such as ^ and $. 217 */ 218 boolean anchoringBounds = true; 219 220 /** 221 * Number of times this matcher's state has been modified 222 */ 223 int modCount; 224 225 /** 226 * No default constructor. 227 */ 228 Matcher() { 229 } 230 231 /** 232 * All matchers have the state used by Pattern during a match. 233 */ 234 Matcher(Pattern parent, CharSequence text) { 235 this.parentPattern = parent; 236 this.text = text; 237 238 // Allocate state storage 239 int parentGroupCount = Math.max(parent.capturingGroupCount, 10); 240 groups = new int[parentGroupCount * 2]; 241 locals = new int[parent.localCount]; 242 243 // Put fields into initial states 244 reset(); 245 } 246 247 /** 248 * Returns the pattern that is interpreted by this matcher. 249 * 250 * @return The pattern for which this matcher was created 251 */ 252 public Pattern pattern() { 253 return parentPattern; 254 } 255 256 /** 257 * Returns the match state of this matcher as a {@link MatchResult}. 258 * The result is unaffected by subsequent operations performed upon this 259 * matcher. 260 * 261 * @return a {@code MatchResult} with the state of this matcher 262 * @since 1.5 263 */ 264 public MatchResult toMatchResult() { 265 return toMatchResult(text.toString()); 266 } 267 268 private MatchResult toMatchResult(String text) { 269 return new ImmutableMatchResult(this.first, 270 this.last, 271 groupCount(), 272 this.groups.clone(), 273 text); 274 } 275 276 private static class ImmutableMatchResult implements MatchResult { 277 private final int first; 278 private final int last; 279 private final int[] groups; 280 private final int groupCount; 281 private final String text; 282 283 ImmutableMatchResult(int first, int last, int groupCount, 284 int groups[], String text) 285 { 286 this.first = first; 287 this.last = last; 288 this.groupCount = groupCount; 289 this.groups = groups; 290 this.text = text; 291 } 292 293 @Override 294 public int start() { 295 checkMatch(); 296 return first; 297 } 298 299 @Override 300 public int start(int group) { 301 checkMatch(); 302 if (group < 0 || group > groupCount) 303 throw new IndexOutOfBoundsException("No group " + group); 304 return groups[group * 2]; 305 } 306 307 @Override 308 public int end() { 309 checkMatch(); 310 return last; 311 } 312 313 @Override 314 public int end(int group) { 315 checkMatch(); 316 if (group < 0 || group > groupCount) 317 throw new IndexOutOfBoundsException("No group " + group); 318 return groups[group * 2 + 1]; 319 } 320 321 @Override 322 public int groupCount() { 323 return groupCount; 324 } 325 326 @Override 327 public String group() { 328 checkMatch(); 329 return group(0); 330 } 331 332 @Override 333 public String group(int group) { 334 checkMatch(); 335 if (group < 0 || group > groupCount) 336 throw new IndexOutOfBoundsException("No group " + group); 337 if ((groups[group*2] == -1) || (groups[group*2+1] == -1)) 338 return null; 339 return text.subSequence(groups[group * 2], groups[group * 2 + 1]).toString(); 340 } 341 342 private void checkMatch() { 343 if (first < 0) 344 throw new IllegalStateException("No match found"); 345 346 } 347 } 348 349 /** 350 * Changes the {@code Pattern} that this {@code Matcher} uses to 351 * find matches with. 352 * 353 * <p> This method causes this matcher to lose information 354 * about the groups of the last match that occurred. The 355 * matcher's position in the input is maintained and its 356 * last append position is unaffected.</p> 357 * 358 * @param newPattern 359 * The new pattern used by this matcher 360 * @return This matcher 361 * @throws IllegalArgumentException 362 * If newPattern is {@code null} 363 * @since 1.5 364 */ 365 public Matcher usePattern(Pattern newPattern) { 366 if (newPattern == null) 367 throw new IllegalArgumentException("Pattern cannot be null"); 368 parentPattern = newPattern; 369 370 // Reallocate state storage 371 int parentGroupCount = Math.max(newPattern.capturingGroupCount, 10); 372 groups = new int[parentGroupCount * 2]; 373 locals = new int[newPattern.localCount]; 374 for (int i = 0; i < groups.length; i++) 375 groups[i] = -1; 376 for (int i = 0; i < locals.length; i++) 377 locals[i] = -1; 378 modCount++; 379 return this; 380 } 381 382 /** 383 * Resets this matcher. 384 * 385 * <p> Resetting a matcher discards all of its explicit state information 386 * and sets its append position to zero. The matcher's region is set to the 387 * default region, which is its entire character sequence. The anchoring 388 * and transparency of this matcher's region boundaries are unaffected. 389 * 390 * @return This matcher 391 */ 392 public Matcher reset() { 393 first = -1; 394 last = 0; 395 oldLast = -1; 396 for(int i=0; i<groups.length; i++) 397 groups[i] = -1; 398 for(int i=0; i<locals.length; i++) 399 locals[i] = -1; 400 lastAppendPosition = 0; 401 from = 0; 402 to = getTextLength(); 403 modCount++; 404 return this; 405 } 406 407 /** 408 * Resets this matcher with a new input sequence. 409 * 410 * <p> Resetting a matcher discards all of its explicit state information 411 * and sets its append position to zero. The matcher's region is set to 412 * the default region, which is its entire character sequence. The 413 * anchoring and transparency of this matcher's region boundaries are 414 * unaffected. 415 * 416 * @param input 417 * The new input character sequence 418 * 419 * @return This matcher 420 */ 421 public Matcher reset(CharSequence input) { 422 text = input; 423 return reset(); 424 } 425 426 /** 427 * Returns the start index of the previous match. 428 * 429 * @return The index of the first character matched 430 * 431 * @throws IllegalStateException 432 * If no match has yet been attempted, 433 * or if the previous match operation failed 434 */ 435 public int start() { 436 if (first < 0) 437 throw new IllegalStateException("No match available"); 438 return first; 439 } 440 441 /** 442 * Returns the start index of the subsequence captured by the given group 443 * during the previous match operation. 444 * 445 * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left 446 * to right, starting at one. Group zero denotes the entire pattern, so 447 * the expression <i>m.</i>{@code start(0)} is equivalent to 448 * <i>m.</i>{@code start()}. </p> 449 * 450 * @param group 451 * The index of a capturing group in this matcher's pattern 452 * 453 * @return The index of the first character captured by the group, 454 * or {@code -1} if the match was successful but the group 455 * itself did not match anything 456 * 457 * @throws IllegalStateException 458 * If no match has yet been attempted, 459 * or if the previous match operation failed 460 * 461 * @throws IndexOutOfBoundsException 462 * If there is no capturing group in the pattern 463 * with the given index 464 */ 465 public int start(int group) { 466 if (first < 0) 467 throw new IllegalStateException("No match available"); 468 if (group < 0 || group > groupCount()) 469 throw new IndexOutOfBoundsException("No group " + group); 470 return groups[group * 2]; 471 } 472 473 /** 474 * Returns the start index of the subsequence captured by the given 475 * <a href="Pattern.html#groupname">named-capturing group</a> during the 476 * previous match operation. 477 * 478 * @param name 479 * The name of a named-capturing group in this matcher's pattern 480 * 481 * @return The index of the first character captured by the group, 482 * or {@code -1} if the match was successful but the group 483 * itself did not match anything 484 * 485 * @throws IllegalStateException 486 * If no match has yet been attempted, 487 * or if the previous match operation failed 488 * 489 * @throws IllegalArgumentException 490 * If there is no capturing group in the pattern 491 * with the given name 492 * @since 1.8 493 */ 494 public int start(String name) { 495 return groups[getMatchedGroupIndex(name) * 2]; 496 } 497 498 /** 499 * Returns the offset after the last character matched. 500 * 501 * @return The offset after the last character matched 502 * 503 * @throws IllegalStateException 504 * If no match has yet been attempted, 505 * or if the previous match operation failed 506 */ 507 public int end() { 508 if (first < 0) 509 throw new IllegalStateException("No match available"); 510 return last; 511 } 512 513 /** 514 * Returns the offset after the last character of the subsequence 515 * captured by the given group during the previous match operation. 516 * 517 * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left 518 * to right, starting at one. Group zero denotes the entire pattern, so 519 * the expression <i>m.</i>{@code end(0)} is equivalent to 520 * <i>m.</i>{@code end()}. </p> 521 * 522 * @param group 523 * The index of a capturing group in this matcher's pattern 524 * 525 * @return The offset after the last character captured by the group, 526 * or {@code -1} if the match was successful 527 * but the group itself did not match anything 528 * 529 * @throws IllegalStateException 530 * If no match has yet been attempted, 531 * or if the previous match operation failed 532 * 533 * @throws IndexOutOfBoundsException 534 * If there is no capturing group in the pattern 535 * with the given index 536 */ 537 public int end(int group) { 538 if (first < 0) 539 throw new IllegalStateException("No match available"); 540 if (group < 0 || group > groupCount()) 541 throw new IndexOutOfBoundsException("No group " + group); 542 return groups[group * 2 + 1]; 543 } 544 545 /** 546 * Returns the offset after the last character of the subsequence 547 * captured by the given <a href="Pattern.html#groupname">named-capturing 548 * group</a> during the previous match operation. 549 * 550 * @param name 551 * The name of a named-capturing group in this matcher's pattern 552 * 553 * @return The offset after the last character captured by the group, 554 * or {@code -1} if the match was successful 555 * but the group itself did not match anything 556 * 557 * @throws IllegalStateException 558 * If no match has yet been attempted, 559 * or if the previous match operation failed 560 * 561 * @throws IllegalArgumentException 562 * If there is no capturing group in the pattern 563 * with the given name 564 * @since 1.8 565 */ 566 public int end(String name) { 567 return groups[getMatchedGroupIndex(name) * 2 + 1]; 568 } 569 570 /** 571 * Returns the input subsequence matched by the previous match. 572 * 573 * <p> For a matcher <i>m</i> with input sequence <i>s</i>, 574 * the expressions <i>m.</i>{@code group()} and 575 * <i>s.</i>{@code substring(}<i>m.</i>{@code start(),} <i>m.</i>{@code end())} 576 * are equivalent. </p> 577 * 578 * <p> Note that some patterns, for example {@code a*}, match the empty 579 * string. This method will return the empty string when the pattern 580 * successfully matches the empty string in the input. </p> 581 * 582 * @return The (possibly empty) subsequence matched by the previous match, 583 * in string form 584 * 585 * @throws IllegalStateException 586 * If no match has yet been attempted, 587 * or if the previous match operation failed 588 */ 589 public String group() { 590 return group(0); 591 } 592 593 /** 594 * Returns the input subsequence captured by the given group during the 595 * previous match operation. 596 * 597 * <p> For a matcher <i>m</i>, input sequence <i>s</i>, and group index 598 * <i>g</i>, the expressions <i>m.</i>{@code group(}<i>g</i>{@code )} and 599 * <i>s.</i>{@code substring(}<i>m.</i>{@code start(}<i>g</i>{@code 600 * ),} <i>m.</i>{@code end(}<i>g</i>{@code ))} 601 * are equivalent. </p> 602 * 603 * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left 604 * to right, starting at one. Group zero denotes the entire pattern, so 605 * the expression {@code m.group(0)} is equivalent to {@code m.group()}. 606 * </p> 607 * 608 * <p> If the match was successful but the group specified failed to match 609 * any part of the input sequence, then {@code null} is returned. Note 610 * that some groups, for example {@code (a*)}, match the empty string. 611 * This method will return the empty string when such a group successfully 612 * matches the empty string in the input. </p> 613 * 614 * @param group 615 * The index of a capturing group in this matcher's pattern 616 * 617 * @return The (possibly empty) subsequence captured by the group 618 * during the previous match, or {@code null} if the group 619 * failed to match part of the input 620 * 621 * @throws IllegalStateException 622 * If no match has yet been attempted, 623 * or if the previous match operation failed 624 * 625 * @throws IndexOutOfBoundsException 626 * If there is no capturing group in the pattern 627 * with the given index 628 */ 629 public String group(int group) { 630 if (first < 0) 631 throw new IllegalStateException("No match found"); 632 if (group < 0 || group > groupCount()) 633 throw new IndexOutOfBoundsException("No group " + group); 634 if ((groups[group*2] == -1) || (groups[group*2+1] == -1)) 635 return null; 636 return getSubSequence(groups[group * 2], groups[group * 2 + 1]).toString(); 637 } 638 639 /** 640 * Returns the input subsequence captured by the given 641 * <a href="Pattern.html#groupname">named-capturing group</a> during the previous 642 * match operation. 643 * 644 * <p> If the match was successful but the group specified failed to match 645 * any part of the input sequence, then {@code null} is returned. Note 646 * that some groups, for example {@code (a*)}, match the empty string. 647 * This method will return the empty string when such a group successfully 648 * matches the empty string in the input. </p> 649 * 650 * @param name 651 * The name of a named-capturing group in this matcher's pattern 652 * 653 * @return The (possibly empty) subsequence captured by the named group 654 * during the previous match, or {@code null} if the group 655 * failed to match part of the input 656 * 657 * @throws IllegalStateException 658 * If no match has yet been attempted, 659 * or if the previous match operation failed 660 * 661 * @throws IllegalArgumentException 662 * If there is no capturing group in the pattern 663 * with the given name 664 * @since 1.7 665 */ 666 public String group(String name) { 667 int group = getMatchedGroupIndex(name); 668 if ((groups[group*2] == -1) || (groups[group*2+1] == -1)) 669 return null; 670 return getSubSequence(groups[group * 2], groups[group * 2 + 1]).toString(); 671 } 672 673 /** 674 * Returns the number of capturing groups in this matcher's pattern. 675 * 676 * <p> Group zero denotes the entire pattern by convention. It is not 677 * included in this count. 678 * 679 * <p> Any non-negative integer smaller than or equal to the value 680 * returned by this method is guaranteed to be a valid group index for 681 * this matcher. </p> 682 * 683 * @return The number of capturing groups in this matcher's pattern 684 */ 685 public int groupCount() { 686 return parentPattern.capturingGroupCount - 1; 687 } 688 689 /** 690 * Attempts to match the entire region against the pattern. 691 * 692 * <p> If the match succeeds then more information can be obtained via the 693 * {@code start}, {@code end}, and {@code group} methods. </p> 694 * 695 * @return {@code true} if, and only if, the entire region sequence 696 * matches this matcher's pattern 697 */ 698 public boolean matches() { 699 return match(from, ENDANCHOR); 700 } 701 702 /** 703 * Attempts to find the next subsequence of the input sequence that matches 704 * the pattern. 705 * 706 * <p> This method starts at the beginning of this matcher's region, or, if 707 * a previous invocation of the method was successful and the matcher has 708 * not since been reset, at the first character not matched by the previous 709 * match. 710 * 711 * <p> If the match succeeds then more information can be obtained via the 712 * {@code start}, {@code end}, and {@code group} methods. </p> 713 * 714 * @return {@code true} if, and only if, a subsequence of the input 715 * sequence matches this matcher's pattern 716 */ 717 public boolean find() { 718 int nextSearchIndex = last; 719 if (nextSearchIndex == first) 720 nextSearchIndex++; 721 722 // If next search starts before region, start it at region 723 if (nextSearchIndex < from) 724 nextSearchIndex = from; 725 726 // If next search starts beyond region then it fails 727 if (nextSearchIndex > to) { 728 for (int i = 0; i < groups.length; i++) 729 groups[i] = -1; 730 return false; 731 } 732 return search(nextSearchIndex); 733 } 734 735 /** 736 * Resets this matcher and then attempts to find the next subsequence of 737 * the input sequence that matches the pattern, starting at the specified 738 * index. 739 * 740 * <p> If the match succeeds then more information can be obtained via the 741 * {@code start}, {@code end}, and {@code group} methods, and subsequent 742 * invocations of the {@link #find()} method will start at the first 743 * character not matched by this match. </p> 744 * 745 * @param start the index to start searching for a match 746 * @throws IndexOutOfBoundsException 747 * If start is less than zero or if start is greater than the 748 * length of the input sequence. 749 * 750 * @return {@code true} if, and only if, a subsequence of the input 751 * sequence starting at the given index matches this matcher's 752 * pattern 753 */ 754 public boolean find(int start) { 755 int limit = getTextLength(); 756 if ((start < 0) || (start > limit)) 757 throw new IndexOutOfBoundsException("Illegal start index"); 758 reset(); 759 return search(start); 760 } 761 762 /** 763 * Attempts to match the input sequence, starting at the beginning of the 764 * region, against the pattern. 765 * 766 * <p> Like the {@link #matches matches} method, this method always starts 767 * at the beginning of the region; unlike that method, it does not 768 * require that the entire region be matched. 769 * 770 * <p> If the match succeeds then more information can be obtained via the 771 * {@code start}, {@code end}, and {@code group} methods. </p> 772 * 773 * @return {@code true} if, and only if, a prefix of the input 774 * sequence matches this matcher's pattern 775 */ 776 public boolean lookingAt() { 777 return match(from, NOANCHOR); 778 } 779 780 /** 781 * Returns a literal replacement {@code String} for the specified 782 * {@code String}. 783 * 784 * This method produces a {@code String} that will work 785 * as a literal replacement {@code s} in the 786 * {@code appendReplacement} method of the {@link Matcher} class. 787 * The {@code String} produced will match the sequence of characters 788 * in {@code s} treated as a literal sequence. Slashes ('\') and 789 * dollar signs ('$') will be given no special meaning. 790 * 791 * @param s The string to be literalized 792 * @return A literal string replacement 793 * @since 1.5 794 */ 795 public static String quoteReplacement(String s) { 796 if ((s.indexOf('\\') == -1) && (s.indexOf('$') == -1)) 797 return s; 798 StringBuilder sb = new StringBuilder(); 799 for (int i=0; i<s.length(); i++) { 800 char c = s.charAt(i); 801 if (c == '\\' || c == '$') { 802 sb.append('\\'); 803 } 804 sb.append(c); 805 } 806 return sb.toString(); 807 } 808 809 /** 810 * Implements a non-terminal append-and-replace step. 811 * 812 * <p> This method performs the following actions: </p> 813 * 814 * <ol> 815 * 816 * <li><p> It reads characters from the input sequence, starting at the 817 * append position, and appends them to the given string buffer. It 818 * stops after reading the last character preceding the previous match, 819 * that is, the character at index {@link 820 * #start()} {@code -} {@code 1}. </p></li> 821 * 822 * <li><p> It appends the given replacement string to the string buffer. 823 * </p></li> 824 * 825 * <li><p> It sets the append position of this matcher to the index of 826 * the last character matched, plus one, that is, to {@link #end()}. 827 * </p></li> 828 * 829 * </ol> 830 * 831 * <p> The replacement string may contain references to subsequences 832 * captured during the previous match: Each occurrence of 833 * <code>${</code><i>name</i><code>}</code> or {@code $}<i>g</i> 834 * will be replaced by the result of evaluating the corresponding 835 * {@link #group(String) group(name)} or {@link #group(int) group(g)} 836 * respectively. For {@code $}<i>g</i>, 837 * the first number after the {@code $} is always treated as part of 838 * the group reference. Subsequent numbers are incorporated into g if 839 * they would form a legal group reference. Only the numerals '0' 840 * through '9' are considered as potential components of the group 841 * reference. If the second group matched the string {@code "foo"}, for 842 * example, then passing the replacement string {@code "$2bar"} would 843 * cause {@code "foobar"} to be appended to the string buffer. A dollar 844 * sign ({@code $}) may be included as a literal in the replacement 845 * string by preceding it with a backslash ({@code \$}). 846 * 847 * <p> Note that backslashes ({@code \}) and dollar signs ({@code $}) in 848 * the replacement string may cause the results to be different than if it 849 * were being treated as a literal replacement string. Dollar signs may be 850 * treated as references to captured subsequences as described above, and 851 * backslashes are used to escape literal characters in the replacement 852 * string. 853 * 854 * <p> This method is intended to be used in a loop together with the 855 * {@link #appendTail appendTail} and {@link #find find} methods. The 856 * following code, for example, writes {@code one dog two dogs in the 857 * yard} to the standard-output stream: </p> 858 * 859 * <blockquote><pre> 860 * Pattern p = Pattern.compile("cat"); 861 * Matcher m = p.matcher("one cat two cats in the yard"); 862 * StringBuffer sb = new StringBuffer(); 863 * while (m.find()) { 864 * m.appendReplacement(sb, "dog"); 865 * } 866 * m.appendTail(sb); 867 * System.out.println(sb.toString());</pre></blockquote> 868 * 869 * @param sb 870 * The target string buffer 871 * 872 * @param replacement 873 * The replacement string 874 * 875 * @return This matcher 876 * 877 * @throws IllegalStateException 878 * If no match has yet been attempted, 879 * or if the previous match operation failed 880 * 881 * @throws IllegalArgumentException 882 * If the replacement string refers to a named-capturing 883 * group that does not exist in the pattern 884 * 885 * @throws IndexOutOfBoundsException 886 * If the replacement string refers to a capturing group 887 * that does not exist in the pattern 888 */ 889 public Matcher appendReplacement(StringBuffer sb, String replacement) { 890 // If no match, return error 891 if (first < 0) 892 throw new IllegalStateException("No match available"); 893 StringBuilder result = new StringBuilder(); 894 appendExpandedReplacement(replacement, result); 895 // Append the intervening text 896 sb.append(text, lastAppendPosition, first); 897 // Append the match substitution 898 sb.append(result); 899 lastAppendPosition = last; 900 modCount++; 901 return this; 902 } 903 904 /** 905 * Implements a non-terminal append-and-replace step. 906 * 907 * <p> This method performs the following actions: </p> 908 * 909 * <ol> 910 * 911 * <li><p> It reads characters from the input sequence, starting at the 912 * append position, and appends them to the given string builder. It 913 * stops after reading the last character preceding the previous match, 914 * that is, the character at index {@link 915 * #start()} {@code -} {@code 1}. </p></li> 916 * 917 * <li><p> It appends the given replacement string to the string builder. 918 * </p></li> 919 * 920 * <li><p> It sets the append position of this matcher to the index of 921 * the last character matched, plus one, that is, to {@link #end()}. 922 * </p></li> 923 * 924 * </ol> 925 * 926 * <p> The replacement string may contain references to subsequences 927 * captured during the previous match: Each occurrence of 928 * {@code $}<i>g</i> will be replaced by the result of 929 * evaluating {@link #group(int) group}{@code (}<i>g</i>{@code )}. 930 * The first number after the {@code $} is always treated as part of 931 * the group reference. Subsequent numbers are incorporated into g if 932 * they would form a legal group reference. Only the numerals '0' 933 * through '9' are considered as potential components of the group 934 * reference. If the second group matched the string {@code "foo"}, for 935 * example, then passing the replacement string {@code "$2bar"} would 936 * cause {@code "foobar"} to be appended to the string builder. A dollar 937 * sign ({@code $}) may be included as a literal in the replacement 938 * string by preceding it with a backslash ({@code \$}). 939 * 940 * <p> Note that backslashes ({@code \}) and dollar signs ({@code $}) in 941 * the replacement string may cause the results to be different than if it 942 * were being treated as a literal replacement string. Dollar signs may be 943 * treated as references to captured subsequences as described above, and 944 * backslashes are used to escape literal characters in the replacement 945 * string. 946 * 947 * <p> This method is intended to be used in a loop together with the 948 * {@link #appendTail appendTail} and {@link #find find} methods. The 949 * following code, for example, writes {@code one dog two dogs in the 950 * yard} to the standard-output stream: </p> 951 * 952 * <blockquote><pre> 953 * Pattern p = Pattern.compile("cat"); 954 * Matcher m = p.matcher("one cat two cats in the yard"); 955 * StringBuilder sb = new StringBuilder(); 956 * while (m.find()) { 957 * m.appendReplacement(sb, "dog"); 958 * } 959 * m.appendTail(sb); 960 * System.out.println(sb.toString());</pre></blockquote> 961 * 962 * @param sb 963 * The target string builder 964 * @param replacement 965 * The replacement string 966 * @return This matcher 967 * 968 * @throws IllegalStateException 969 * If no match has yet been attempted, 970 * or if the previous match operation failed 971 * @throws IllegalArgumentException 972 * If the replacement string refers to a named-capturing 973 * group that does not exist in the pattern 974 * @throws IndexOutOfBoundsException 975 * If the replacement string refers to a capturing group 976 * that does not exist in the pattern 977 * @since 1.9 978 */ 979 public Matcher appendReplacement(StringBuilder sb, String replacement) { 980 // If no match, return error 981 if (first < 0) 982 throw new IllegalStateException("No match available"); 983 StringBuilder result = new StringBuilder(); 984 appendExpandedReplacement(replacement, result); 985 // Append the intervening text 986 sb.append(text, lastAppendPosition, first); 987 // Append the match substitution 988 sb.append(result); 989 lastAppendPosition = last; 990 modCount++; 991 return this; 992 } 993 994 /** 995 * Processes replacement string to replace group references with 996 * groups. 997 */ 998 private StringBuilder appendExpandedReplacement( 999 String replacement, StringBuilder result) { 1000 int cursor = 0; 1001 while (cursor < replacement.length()) { 1002 char nextChar = replacement.charAt(cursor); 1003 if (nextChar == '\\') { 1004 cursor++; 1005 if (cursor == replacement.length()) 1006 throw new IllegalArgumentException( 1007 "character to be escaped is missing"); 1008 nextChar = replacement.charAt(cursor); 1009 result.append(nextChar); 1010 cursor++; 1011 } else if (nextChar == '$') { 1012 // Skip past $ 1013 cursor++; 1014 // Throw IAE if this "$" is the last character in replacement 1015 if (cursor == replacement.length()) 1016 throw new IllegalArgumentException( 1017 "Illegal group reference: group index is missing"); 1018 nextChar = replacement.charAt(cursor); 1019 int refNum = -1; 1020 if (nextChar == '{') { 1021 cursor++; 1022 StringBuilder gsb = new StringBuilder(); 1023 while (cursor < replacement.length()) { 1024 nextChar = replacement.charAt(cursor); 1025 if (ASCII.isLower(nextChar) || 1026 ASCII.isUpper(nextChar) || 1027 ASCII.isDigit(nextChar)) { 1028 gsb.append(nextChar); 1029 cursor++; 1030 } else { 1031 break; 1032 } 1033 } 1034 if (gsb.length() == 0) 1035 throw new IllegalArgumentException( 1036 "named capturing group has 0 length name"); 1037 if (nextChar != '}') 1038 throw new IllegalArgumentException( 1039 "named capturing group is missing trailing '}'"); 1040 String gname = gsb.toString(); 1041 if (ASCII.isDigit(gname.charAt(0))) 1042 throw new IllegalArgumentException( 1043 "capturing group name {" + gname + 1044 "} starts with digit character"); 1045 if (!parentPattern.namedGroups().containsKey(gname)) 1046 throw new IllegalArgumentException( 1047 "No group with name {" + gname + "}"); 1048 refNum = parentPattern.namedGroups().get(gname); 1049 cursor++; 1050 } else { 1051 // The first number is always a group 1052 refNum = nextChar - '0'; 1053 if ((refNum < 0) || (refNum > 9)) 1054 throw new IllegalArgumentException( 1055 "Illegal group reference"); 1056 cursor++; 1057 // Capture the largest legal group string 1058 boolean done = false; 1059 while (!done) { 1060 if (cursor >= replacement.length()) { 1061 break; 1062 } 1063 int nextDigit = replacement.charAt(cursor) - '0'; 1064 if ((nextDigit < 0) || (nextDigit > 9)) { // not a number 1065 break; 1066 } 1067 int newRefNum = (refNum * 10) + nextDigit; 1068 if (groupCount() < newRefNum) { 1069 done = true; 1070 } else { 1071 refNum = newRefNum; 1072 cursor++; 1073 } 1074 } 1075 } 1076 // Append group 1077 if (start(refNum) != -1 && end(refNum) != -1) 1078 result.append(text, start(refNum), end(refNum)); 1079 } else { 1080 result.append(nextChar); 1081 cursor++; 1082 } 1083 } 1084 return result; 1085 } 1086 1087 /** 1088 * Implements a terminal append-and-replace step. 1089 * 1090 * <p> This method reads characters from the input sequence, starting at 1091 * the append position, and appends them to the given string buffer. It is 1092 * intended to be invoked after one or more invocations of the {@link 1093 * #appendReplacement appendReplacement} method in order to copy the 1094 * remainder of the input sequence. </p> 1095 * 1096 * @param sb 1097 * The target string buffer 1098 * 1099 * @return The target string buffer 1100 */ 1101 public StringBuffer appendTail(StringBuffer sb) { 1102 sb.append(text, lastAppendPosition, getTextLength()); 1103 return sb; 1104 } 1105 1106 /** 1107 * Implements a terminal append-and-replace step. 1108 * 1109 * <p> This method reads characters from the input sequence, starting at 1110 * the append position, and appends them to the given string builder. It is 1111 * intended to be invoked after one or more invocations of the {@link 1112 * #appendReplacement appendReplacement} method in order to copy the 1113 * remainder of the input sequence. </p> 1114 * 1115 * @param sb 1116 * The target string builder 1117 * 1118 * @return The target string builder 1119 * 1120 * @since 1.9 1121 */ 1122 public StringBuilder appendTail(StringBuilder sb) { 1123 sb.append(text, lastAppendPosition, getTextLength()); 1124 return sb; 1125 } 1126 1127 /** 1128 * Replaces every subsequence of the input sequence that matches the 1129 * pattern with the given replacement string. 1130 * 1131 * <p> This method first resets this matcher. It then scans the input 1132 * sequence looking for matches of the pattern. Characters that are not 1133 * part of any match are appended directly to the result string; each match 1134 * is replaced in the result by the replacement string. The replacement 1135 * string may contain references to captured subsequences as in the {@link 1136 * #appendReplacement appendReplacement} method. 1137 * 1138 * <p> Note that backslashes ({@code \}) and dollar signs ({@code $}) in 1139 * the replacement string may cause the results to be different than if it 1140 * were being treated as a literal replacement string. Dollar signs may be 1141 * treated as references to captured subsequences as described above, and 1142 * backslashes are used to escape literal characters in the replacement 1143 * string. 1144 * 1145 * <p> Given the regular expression {@code a*b}, the input 1146 * {@code "aabfooaabfooabfoob"}, and the replacement string 1147 * {@code "-"}, an invocation of this method on a matcher for that 1148 * expression would yield the string {@code "-foo-foo-foo-"}. 1149 * 1150 * <p> Invoking this method changes this matcher's state. If the matcher 1151 * is to be used in further matching operations then it should first be 1152 * reset. </p> 1153 * 1154 * @param replacement 1155 * The replacement string 1156 * 1157 * @return The string constructed by replacing each matching subsequence 1158 * by the replacement string, substituting captured subsequences 1159 * as needed 1160 */ 1161 public String replaceAll(String replacement) { 1162 reset(); 1163 boolean result = find(); 1164 if (result) { 1165 StringBuilder sb = new StringBuilder(); 1166 do { 1167 appendReplacement(sb, replacement); 1168 result = find(); 1169 } while (result); 1170 appendTail(sb); 1171 return sb.toString(); 1172 } 1173 return text.toString(); 1174 } 1175 1176 /** 1177 * Replaces every subsequence of the input sequence that matches the 1178 * pattern with the result of applying the given replacer function to the 1179 * match result of this matcher corresponding to that subsequence. 1180 * Exceptions thrown by the function are relayed to the caller. 1181 * 1182 * <p> This method first resets this matcher. It then scans the input 1183 * sequence looking for matches of the pattern. Characters that are not 1184 * part of any match are appended directly to the result string; each match 1185 * is replaced in the result by the applying the replacer function that 1186 * returns a replacement string. Each replacement string may contain 1187 * references to captured subsequences as in the {@link #appendReplacement 1188 * appendReplacement} method. 1189 * 1190 * <p> Note that backslashes ({@code \}) and dollar signs ({@code $}) in 1191 * a replacement string may cause the results to be different than if it 1192 * were being treated as a literal replacement string. Dollar signs may be 1193 * treated as references to captured subsequences as described above, and 1194 * backslashes are used to escape literal characters in the replacement 1195 * string. 1196 * 1197 * <p> Given the regular expression {@code dog}, the input 1198 * {@code "zzzdogzzzdogzzz"}, and the function 1199 * {@code mr -> mr.group().toUpperCase()}, an invocation of this method on 1200 * a matcher for that expression would yield the string 1201 * {@code "zzzDOGzzzDOGzzz"}. 1202 * 1203 * <p> Invoking this method changes this matcher's state. If the matcher 1204 * is to be used in further matching operations then it should first be 1205 * reset. </p> 1206 * 1207 * <p> The replacer function should not modify this matcher's state during 1208 * replacement. This method will, on a best-effort basis, throw a 1209 * {@link java.util.ConcurrentModificationException} if such modification is 1210 * detected. 1211 * 1212 * <p> The state of each match result passed to the replacer function is 1213 * guaranteed to be constant only for the duration of the replacer function 1214 * call and only if the replacer function does not modify this matcher's 1215 * state. 1216 * 1217 * @implNote 1218 * This implementation applies the replacer function to this matcher, which 1219 * is an instance of {@code MatchResult}. 1220 * 1221 * @param replacer 1222 * The function to be applied to the match result of this matcher 1223 * that returns a replacement string. 1224 * @return The string constructed by replacing each matching subsequence 1225 * with the result of applying the replacer function to that 1226 * matched subsequence, substituting captured subsequences as 1227 * needed. 1228 * @throws NullPointerException if the replacer function is null 1229 * @throws ConcurrentModificationException if it is detected, on a 1230 * best-effort basis, that the replacer function modified this 1231 * matcher's state 1232 * @since 1.9 1233 */ 1234 public String replaceAll(Function<MatchResult, String> replacer) { 1235 Objects.requireNonNull(replacer); 1236 reset(); 1237 boolean result = find(); 1238 if (result) { 1239 StringBuilder sb = new StringBuilder(); 1240 do { 1241 int ec = modCount; 1242 String replacement = replacer.apply(this); 1243 if (ec != modCount) 1244 throw new ConcurrentModificationException(); 1245 appendReplacement(sb, replacement); 1246 result = find(); 1247 } while (result); 1248 appendTail(sb); 1249 return sb.toString(); 1250 } 1251 return text.toString(); 1252 } 1253 1254 /** 1255 * Returns a stream of match results for each subsequence of the input 1256 * sequence that matches the pattern. The match results occur in the 1257 * same order as the matching subsequences in the input sequence. 1258 * 1259 * <p> Each match result is produced as if by {@link #toMatchResult()}. 1260 * 1261 * <p> This method does not reset this matcher. Matching starts on 1262 * initiation of the terminal stream operation either at the beginning of 1263 * this matcher's region, or, if the matcher has not since been reset, at 1264 * the first character not matched by a previous match. 1265 * 1266 * <p> If the matcher is to be used for further matching operations after 1267 * the terminal stream operation completes then it should be first reset. 1268 * 1269 * <p> This matcher's state should not be modified during execution of the 1270 * returned stream's pipeline. The returned stream's source 1271 * {@code Spliterator} is <em>fail-fast</em> and will, on a best-effort 1272 * basis, throw a {@link java.util.ConcurrentModificationException} if such 1273 * modification is detected. 1274 * 1275 * @return a sequential stream of match results. 1276 * @since 1.9 1277 */ 1278 public Stream<MatchResult> results() { 1279 class MatchResultIterator implements Iterator<MatchResult> { 1280 // -ve for call to find, 0 for not found, 1 for found 1281 int state = -1; 1282 // State for concurrent modification checking 1283 // -1 for uninitialized 1284 int expectedCount = -1; 1285 // The input sequence as a string, set once only after first find 1286 // Avoids repeated conversion from CharSequence for each match 1287 String textAsString; 1288 1289 @Override 1290 public MatchResult next() { 1291 if (expectedCount >= 0 && expectedCount != modCount) 1292 throw new ConcurrentModificationException(); 1293 1294 if (!hasNext()) 1295 throw new NoSuchElementException(); 1296 1297 state = -1; 1298 return toMatchResult(textAsString); 1299 } 1300 1301 @Override 1302 public boolean hasNext() { 1303 if (state >= 0) 1304 return state == 1; 1305 1306 // Defer throwing ConcurrentModificationException to when next 1307 // or forEachRemaining is called. The is consistent with other 1308 // fail-fast implementations. 1309 if (expectedCount >= 0 && expectedCount != modCount) 1310 return true; 1311 1312 boolean found = find(); 1313 // Capture the input sequence as a string on first find 1314 if (found && state < 0) 1315 textAsString = text.toString(); 1316 state = found ? 1 : 0; 1317 expectedCount = modCount; 1318 return found; 1319 } 1320 1321 @Override 1322 public void forEachRemaining(Consumer<? super MatchResult> action) { 1323 if (expectedCount >= 0 && expectedCount != modCount) 1324 throw new ConcurrentModificationException(); 1325 1326 int s = state; 1327 if (s == 0) 1328 return; 1329 1330 // Set state to report no more elements on further operations 1331 state = 0; 1332 expectedCount = -1; 1333 1334 // Perform a first find if required 1335 if (s < 0 && !find()) 1336 return; 1337 1338 // Capture the input sequence as a string on first find 1339 textAsString = text.toString(); 1340 1341 do { 1342 int ec = modCount; 1343 action.accept(toMatchResult(textAsString)); 1344 if (ec != modCount) 1345 throw new ConcurrentModificationException(); 1346 } while (find()); 1347 } 1348 } 1349 return StreamSupport.stream(Spliterators.spliteratorUnknownSize( 1350 new MatchResultIterator(), Spliterator.ORDERED | Spliterator.NONNULL), false); 1351 } 1352 1353 /** 1354 * Replaces the first subsequence of the input sequence that matches the 1355 * pattern with the given replacement string. 1356 * 1357 * <p> This method first resets this matcher. It then scans the input 1358 * sequence looking for a match of the pattern. Characters that are not 1359 * part of the match are appended directly to the result string; the match 1360 * is replaced in the result by the replacement string. The replacement 1361 * string may contain references to captured subsequences as in the {@link 1362 * #appendReplacement appendReplacement} method. 1363 * 1364 * <p>Note that backslashes ({@code \}) and dollar signs ({@code $}) in 1365 * the replacement string may cause the results to be different than if it 1366 * were being treated as a literal replacement string. Dollar signs may be 1367 * treated as references to captured subsequences as described above, and 1368 * backslashes are used to escape literal characters in the replacement 1369 * string. 1370 * 1371 * <p> Given the regular expression {@code dog}, the input 1372 * {@code "zzzdogzzzdogzzz"}, and the replacement string 1373 * {@code "cat"}, an invocation of this method on a matcher for that 1374 * expression would yield the string {@code "zzzcatzzzdogzzz"}. </p> 1375 * 1376 * <p> Invoking this method changes this matcher's state. If the matcher 1377 * is to be used in further matching operations then it should first be 1378 * reset. </p> 1379 * 1380 * @param replacement 1381 * The replacement string 1382 * @return The string constructed by replacing the first matching 1383 * subsequence by the replacement string, substituting captured 1384 * subsequences as needed 1385 */ 1386 public String replaceFirst(String replacement) { 1387 if (replacement == null) 1388 throw new NullPointerException("replacement"); 1389 reset(); 1390 if (!find()) 1391 return text.toString(); 1392 StringBuilder sb = new StringBuilder(); 1393 appendReplacement(sb, replacement); 1394 appendTail(sb); 1395 return sb.toString(); 1396 } 1397 1398 /** 1399 * Replaces the first subsequence of the input sequence that matches the 1400 * pattern with the result of applying the given replacer function to the 1401 * match result of this matcher corresponding to that subsequence. 1402 * Exceptions thrown by the replace function are relayed to the caller. 1403 * 1404 * <p> This method first resets this matcher. It then scans the input 1405 * sequence looking for a match of the pattern. Characters that are not 1406 * part of the match are appended directly to the result string; the match 1407 * is replaced in the result by the applying the replacer function that 1408 * returns a replacement string. The replacement string may contain 1409 * references to captured subsequences as in the {@link #appendReplacement 1410 * appendReplacement} method. 1411 * 1412 * <p>Note that backslashes ({@code \}) and dollar signs ({@code $}) in 1413 * the replacement string may cause the results to be different than if it 1414 * were being treated as a literal replacement string. Dollar signs may be 1415 * treated as references to captured subsequences as described above, and 1416 * backslashes are used to escape literal characters in the replacement 1417 * string. 1418 * 1419 * <p> Given the regular expression {@code dog}, the input 1420 * {@code "zzzdogzzzdogzzz"}, and the function 1421 * {@code mr -> mr.group().toUpperCase()}, an invocation of this method on 1422 * a matcher for that expression would yield the string 1423 * {@code "zzzDOGzzzdogzzz"}. 1424 * 1425 * <p> Invoking this method changes this matcher's state. If the matcher 1426 * is to be used in further matching operations then it should first be 1427 * reset. 1428 * 1429 * <p> The replacer function should not modify this matcher's state during 1430 * replacement. This method will, on a best-effort basis, throw a 1431 * {@link java.util.ConcurrentModificationException} if such modification is 1432 * detected. 1433 * 1434 * <p> The state of the match result passed to the replacer function is 1435 * guaranteed to be constant only for the duration of the replacer function 1436 * call and only if the replacer function does not modify this matcher's 1437 * state. 1438 * 1439 * @implNote 1440 * This implementation applies the replacer function to this matcher, which 1441 * is an instance of {@code MatchResult}. 1442 * 1443 * @param replacer 1444 * The function to be applied to the match result of this matcher 1445 * that returns a replacement string. 1446 * @return The string constructed by replacing the first matching 1447 * subsequence with the result of applying the replacer function to 1448 * the matched subsequence, substituting captured subsequences as 1449 * needed. 1450 * @throws NullPointerException if the replacer function is null 1451 * @throws ConcurrentModificationException if it is detected, on a 1452 * best-effort basis, that the replacer function modified this 1453 * matcher's state 1454 * @since 1.9 1455 */ 1456 public String replaceFirst(Function<MatchResult, String> replacer) { 1457 Objects.requireNonNull(replacer); 1458 reset(); 1459 if (!find()) 1460 return text.toString(); 1461 StringBuilder sb = new StringBuilder(); 1462 int ec = modCount; 1463 String replacement = replacer.apply(this); 1464 if (ec != modCount) 1465 throw new ConcurrentModificationException(); 1466 appendReplacement(sb, replacement); 1467 appendTail(sb); 1468 return sb.toString(); 1469 } 1470 1471 /** 1472 * Sets the limits of this matcher's region. The region is the part of the 1473 * input sequence that will be searched to find a match. Invoking this 1474 * method resets the matcher, and then sets the region to start at the 1475 * index specified by the {@code start} parameter and end at the 1476 * index specified by the {@code end} parameter. 1477 * 1478 * <p>Depending on the transparency and anchoring being used (see 1479 * {@link #useTransparentBounds useTransparentBounds} and 1480 * {@link #useAnchoringBounds useAnchoringBounds}), certain constructs such 1481 * as anchors may behave differently at or around the boundaries of the 1482 * region. 1483 * 1484 * @param start 1485 * The index to start searching at (inclusive) 1486 * @param end 1487 * The index to end searching at (exclusive) 1488 * @throws IndexOutOfBoundsException 1489 * If start or end is less than zero, if 1490 * start is greater than the length of the input sequence, if 1491 * end is greater than the length of the input sequence, or if 1492 * start is greater than end. 1493 * @return this matcher 1494 * @since 1.5 1495 */ 1496 public Matcher region(int start, int end) { 1497 if ((start < 0) || (start > getTextLength())) 1498 throw new IndexOutOfBoundsException("start"); 1499 if ((end < 0) || (end > getTextLength())) 1500 throw new IndexOutOfBoundsException("end"); 1501 if (start > end) 1502 throw new IndexOutOfBoundsException("start > end"); 1503 reset(); 1504 from = start; 1505 to = end; 1506 return this; 1507 } 1508 1509 /** 1510 * Reports the start index of this matcher's region. The 1511 * searches this matcher conducts are limited to finding matches 1512 * within {@link #regionStart regionStart} (inclusive) and 1513 * {@link #regionEnd regionEnd} (exclusive). 1514 * 1515 * @return The starting point of this matcher's region 1516 * @since 1.5 1517 */ 1518 public int regionStart() { 1519 return from; 1520 } 1521 1522 /** 1523 * Reports the end index (exclusive) of this matcher's region. 1524 * The searches this matcher conducts are limited to finding matches 1525 * within {@link #regionStart regionStart} (inclusive) and 1526 * {@link #regionEnd regionEnd} (exclusive). 1527 * 1528 * @return the ending point of this matcher's region 1529 * @since 1.5 1530 */ 1531 public int regionEnd() { 1532 return to; 1533 } 1534 1535 /** 1536 * Queries the transparency of region bounds for this matcher. 1537 * 1538 * <p> This method returns {@code true} if this matcher uses 1539 * <i>transparent</i> bounds, {@code false} if it uses <i>opaque</i> 1540 * bounds. 1541 * 1542 * <p> See {@link #useTransparentBounds useTransparentBounds} for a 1543 * description of transparent and opaque bounds. 1544 * 1545 * <p> By default, a matcher uses opaque region boundaries. 1546 * 1547 * @return {@code true} iff this matcher is using transparent bounds, 1548 * {@code false} otherwise. 1549 * @see java.util.regex.Matcher#useTransparentBounds(boolean) 1550 * @since 1.5 1551 */ 1552 public boolean hasTransparentBounds() { 1553 return transparentBounds; 1554 } 1555 1556 /** 1557 * Sets the transparency of region bounds for this matcher. 1558 * 1559 * <p> Invoking this method with an argument of {@code true} will set this 1560 * matcher to use <i>transparent</i> bounds. If the boolean 1561 * argument is {@code false}, then <i>opaque</i> bounds will be used. 1562 * 1563 * <p> Using transparent bounds, the boundaries of this 1564 * matcher's region are transparent to lookahead, lookbehind, 1565 * and boundary matching constructs. Those constructs can see beyond the 1566 * boundaries of the region to see if a match is appropriate. 1567 * 1568 * <p> Using opaque bounds, the boundaries of this matcher's 1569 * region are opaque to lookahead, lookbehind, and boundary matching 1570 * constructs that may try to see beyond them. Those constructs cannot 1571 * look past the boundaries so they will fail to match anything outside 1572 * of the region. 1573 * 1574 * <p> By default, a matcher uses opaque bounds. 1575 * 1576 * @param b a boolean indicating whether to use opaque or transparent 1577 * regions 1578 * @return this matcher 1579 * @see java.util.regex.Matcher#hasTransparentBounds 1580 * @since 1.5 1581 */ 1582 public Matcher useTransparentBounds(boolean b) { 1583 transparentBounds = b; 1584 return this; 1585 } 1586 1587 /** 1588 * Queries the anchoring of region bounds for this matcher. 1589 * 1590 * <p> This method returns {@code true} if this matcher uses 1591 * <i>anchoring</i> bounds, {@code false} otherwise. 1592 * 1593 * <p> See {@link #useAnchoringBounds useAnchoringBounds} for a 1594 * description of anchoring bounds. 1595 * 1596 * <p> By default, a matcher uses anchoring region boundaries. 1597 * 1598 * @return {@code true} iff this matcher is using anchoring bounds, 1599 * {@code false} otherwise. 1600 * @see java.util.regex.Matcher#useAnchoringBounds(boolean) 1601 * @since 1.5 1602 */ 1603 public boolean hasAnchoringBounds() { 1604 return anchoringBounds; 1605 } 1606 1607 /** 1608 * Sets the anchoring of region bounds for this matcher. 1609 * 1610 * <p> Invoking this method with an argument of {@code true} will set this 1611 * matcher to use <i>anchoring</i> bounds. If the boolean 1612 * argument is {@code false}, then <i>non-anchoring</i> bounds will be 1613 * used. 1614 * 1615 * <p> Using anchoring bounds, the boundaries of this 1616 * matcher's region match anchors such as ^ and $. 1617 * 1618 * <p> Without anchoring bounds, the boundaries of this 1619 * matcher's region will not match anchors such as ^ and $. 1620 * 1621 * <p> By default, a matcher uses anchoring region boundaries. 1622 * 1623 * @param b a boolean indicating whether or not to use anchoring bounds. 1624 * @return this matcher 1625 * @see java.util.regex.Matcher#hasAnchoringBounds 1626 * @since 1.5 1627 */ 1628 public Matcher useAnchoringBounds(boolean b) { 1629 anchoringBounds = b; 1630 return this; 1631 } 1632 1633 /** 1634 * <p>Returns the string representation of this matcher. The 1635 * string representation of a {@code Matcher} contains information 1636 * that may be useful for debugging. The exact format is unspecified. 1637 * 1638 * @return The string representation of this matcher 1639 * @since 1.5 1640 */ 1641 public String toString() { 1642 StringBuilder sb = new StringBuilder(); 1643 sb.append("java.util.regex.Matcher") 1644 .append("[pattern=").append(pattern()) 1645 .append(" region=") 1646 .append(regionStart()).append(',').append(regionEnd()) 1647 .append(" lastmatch="); 1648 if ((first >= 0) && (group() != null)) { 1649 sb.append(group()); 1650 } 1651 sb.append(']'); 1652 return sb.toString(); 1653 } 1654 1655 /** 1656 * <p>Returns true if the end of input was hit by the search engine in 1657 * the last match operation performed by this matcher. 1658 * 1659 * <p>When this method returns true, then it is possible that more input 1660 * would have changed the result of the last search. 1661 * 1662 * @return true iff the end of input was hit in the last match; false 1663 * otherwise 1664 * @since 1.5 1665 */ 1666 public boolean hitEnd() { 1667 return hitEnd; 1668 } 1669 1670 /** 1671 * <p>Returns true if more input could change a positive match into a 1672 * negative one. 1673 * 1674 * <p>If this method returns true, and a match was found, then more 1675 * input could cause the match to be lost. If this method returns false 1676 * and a match was found, then more input might change the match but the 1677 * match won't be lost. If a match was not found, then requireEnd has no 1678 * meaning. 1679 * 1680 * @return true iff more input could change a positive match into a 1681 * negative one. 1682 * @since 1.5 1683 */ 1684 public boolean requireEnd() { 1685 return requireEnd; 1686 } 1687 1688 /** 1689 * Initiates a search to find a Pattern within the given bounds. 1690 * The groups are filled with default values and the match of the root 1691 * of the state machine is called. The state machine will hold the state 1692 * of the match as it proceeds in this matcher. 1693 * 1694 * Matcher.from is not set here, because it is the "hard" boundary 1695 * of the start of the search which anchors will set to. The from param 1696 * is the "soft" boundary of the start of the search, meaning that the 1697 * regex tries to match at that index but ^ won't match there. Subsequent 1698 * calls to the search methods start at a new "soft" boundary which is 1699 * the end of the previous match. 1700 */ 1701 boolean search(int from) { 1702 this.hitEnd = false; 1703 this.requireEnd = false; 1704 from = from < 0 ? 0 : from; 1705 this.first = from; 1706 this.oldLast = oldLast < 0 ? from : oldLast; 1707 for (int i = 0; i < groups.length; i++) 1708 groups[i] = -1; 1709 acceptMode = NOANCHOR; 1710 boolean result = parentPattern.root.match(this, from, text); 1711 if (!result) 1712 this.first = -1; 1713 this.oldLast = this.last; 1714 this.modCount++; 1715 return result; 1716 } 1717 1718 /** 1719 * Initiates a search for an anchored match to a Pattern within the given 1720 * bounds. The groups are filled with default values and the match of the 1721 * root of the state machine is called. The state machine will hold the 1722 * state of the match as it proceeds in this matcher. 1723 */ 1724 boolean match(int from, int anchor) { 1725 this.hitEnd = false; 1726 this.requireEnd = false; 1727 from = from < 0 ? 0 : from; 1728 this.first = from; 1729 this.oldLast = oldLast < 0 ? from : oldLast; 1730 for (int i = 0; i < groups.length; i++) 1731 groups[i] = -1; 1732 acceptMode = anchor; 1733 boolean result = parentPattern.matchRoot.match(this, from, text); 1734 if (!result) 1735 this.first = -1; 1736 this.oldLast = this.last; 1737 this.modCount++; 1738 return result; 1739 } 1740 1741 /** 1742 * Returns the end index of the text. 1743 * 1744 * @return the index after the last character in the text 1745 */ 1746 int getTextLength() { 1747 return text.length(); 1748 } 1749 1750 /** 1751 * Generates a String from this Matcher's input in the specified range. 1752 * 1753 * @param beginIndex the beginning index, inclusive 1754 * @param endIndex the ending index, exclusive 1755 * @return A String generated from this Matcher's input 1756 */ 1757 CharSequence getSubSequence(int beginIndex, int endIndex) { 1758 return text.subSequence(beginIndex, endIndex); 1759 } 1760 1761 /** 1762 * Returns this Matcher's input character at index i. 1763 * 1764 * @return A char from the specified index 1765 */ 1766 char charAt(int i) { 1767 return text.charAt(i); 1768 } 1769 1770 /** 1771 * Returns the group index of the matched capturing group. 1772 * 1773 * @return the index of the named-capturing group 1774 */ 1775 int getMatchedGroupIndex(String name) { 1776 Objects.requireNonNull(name, "Group name"); 1777 if (first < 0) 1778 throw new IllegalStateException("No match found"); 1779 if (!parentPattern.namedGroups().containsKey(name)) 1780 throw new IllegalArgumentException("No group with name <" + name + ">"); 1781 return parentPattern.namedGroups().get(name); 1782 } 1783 }