1 /* 2 * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.util.regex; 27 28 import java.util.ConcurrentModificationException; 29 import java.util.Iterator; 30 import java.util.NoSuchElementException; 31 import java.util.Objects; 32 import java.util.Spliterator; 33 import java.util.Spliterators; 34 import java.util.function.Consumer; 35 import java.util.function.Function; 36 import java.util.stream.Stream; 37 import java.util.stream.StreamSupport; 38 39 /** 40 * An engine that performs match operations on a {@linkplain 41 * java.lang.CharSequence character sequence} by interpreting a {@link Pattern}. 42 * 43 * <p> A matcher is created from a pattern by invoking the pattern's {@link 44 * Pattern#matcher matcher} method. Once created, a matcher can be used to 45 * perform three different kinds of match operations: 46 * 47 * <ul> 48 * 49 * <li><p> The {@link #matches matches} method attempts to match the entire 50 * input sequence against the pattern. </p></li> 51 * 52 * <li><p> The {@link #lookingAt lookingAt} method attempts to match the 53 * input sequence, starting at the beginning, against the pattern. </p></li> 54 * 55 * <li><p> The {@link #find find} method scans the input sequence looking 56 * for the next subsequence that matches the pattern. </p></li> 57 * 58 * </ul> 59 * 60 * <p> Each of these methods returns a boolean indicating success or failure. 61 * More information about a successful match can be obtained by querying the 62 * state of the matcher. 63 * 64 * <p> A matcher finds matches in a subset of its input called the 65 * <i>region</i>. By default, the region contains all of the matcher's input. 66 * The region can be modified via the {@link #region(int, int) region} method 67 * and queried via the {@link #regionStart() regionStart} and {@link 68 * #regionEnd() regionEnd} methods. The way that the region boundaries interact 69 * with some pattern constructs can be changed. See {@link 70 * #useAnchoringBounds(boolean) useAnchoringBounds} and {@link 71 * #useTransparentBounds(boolean) useTransparentBounds} for more details. 72 * 73 * <p> This class also defines methods for replacing matched subsequences with 74 * new strings whose contents can, if desired, be computed from the match 75 * result. The {@link #appendReplacement appendReplacement} and {@link 76 * #appendTail appendTail} methods can be used in tandem in order to collect 77 * the result into an existing string buffer or string builder. Alternatively, 78 * the more convenient {@link #replaceAll replaceAll} method can be used to 79 * create a string in which every matching subsequence in the input sequence 80 * is replaced. 81 * 82 * <p> The explicit state of a matcher includes the start and end indices of 83 * the most recent successful match. It also includes the start and end 84 * indices of the input subsequence captured by each <a 85 * href="Pattern.html#cg">capturing group</a> in the pattern as well as a total 86 * count of such subsequences. As a convenience, methods are also provided for 87 * returning these captured subsequences in string form. 88 * 89 * <p> The explicit state of a matcher is initially undefined; attempting to 90 * query any part of it before a successful match will cause an {@link 91 * IllegalStateException} to be thrown. The explicit state of a matcher is 92 * recomputed by every match operation. 93 * 94 * <p> The implicit state of a matcher includes the input character sequence as 95 * well as the <i>append position</i>, which is initially zero and is updated 96 * by the {@link #appendReplacement appendReplacement} method. 97 * 98 * <p> A matcher may be reset explicitly by invoking its {@link #reset()} 99 * method or, if a new input sequence is desired, its {@link 100 * #reset(java.lang.CharSequence) reset(CharSequence)} method. Resetting a 101 * matcher discards its explicit state information and sets the append position 102 * to zero. 103 * 104 * <p> Instances of this class are not safe for use by multiple concurrent 105 * threads. </p> 106 * 107 * 108 * @author Mike McCloskey 109 * @author Mark Reinhold 110 * @author JSR-51 Expert Group 111 * @since 1.4 112 * @spec JSR-51 113 */ 114 115 public final class Matcher implements MatchResult { 116 117 /** 118 * The Pattern object that created this Matcher. 119 */ 120 Pattern parentPattern; 121 122 /** 123 * The storage used by groups. They may contain invalid values if 124 * a group was skipped during the matching. 125 */ 126 int[] groups; 127 128 /** 129 * The range within the sequence that is to be matched. Anchors 130 * will match at these "hard" boundaries. Changing the region 131 * changes these values. 132 */ 133 int from, to; 134 135 /** 136 * Lookbehind uses this value to ensure that the subexpression 137 * match ends at the point where the lookbehind was encountered. 138 */ 139 int lookbehindTo; 140 141 /** 142 * The original string being matched. 143 */ 144 CharSequence text; 145 146 /** 147 * Matcher state used by the last node. NOANCHOR is used when a 148 * match does not have to consume all of the input. ENDANCHOR is 149 * the mode used for matching all the input. 150 */ 151 static final int ENDANCHOR = 1; 152 static final int NOANCHOR = 0; 153 int acceptMode = NOANCHOR; 154 155 /** 156 * The range of string that last matched the pattern. If the last 157 * match failed then first is -1; last initially holds 0 then it 158 * holds the index of the end of the last match (which is where the 159 * next search starts). 160 */ 161 int first = -1, last = 0; 162 163 /** 164 * The end index of what matched in the last match operation. 165 */ 166 int oldLast = -1; 167 168 /** 169 * The index of the last position appended in a substitution. 170 */ 171 int lastAppendPosition = 0; 172 173 /** 174 * Storage used by nodes to tell what repetition they are on in 175 * a pattern, and where groups begin. The nodes themselves are stateless, 176 * so they rely on this field to hold state during a match. 177 */ 178 int[] locals; 179 180 /** 181 * Storage used by top greedy Loop node to store a specific hash set to 182 * keep the beginning index of the failed repetition match. The nodes 183 * themselves are stateless, so they rely on this field to hold state 184 * during a match. 185 */ 186 IntHashSet[] localsPos; 187 188 /** 189 * Boolean indicating whether or not more input could change 190 * the results of the last match. 191 * 192 * If hitEnd is true, and a match was found, then more input 193 * might cause a different match to be found. 194 * If hitEnd is true and a match was not found, then more 195 * input could cause a match to be found. 196 * If hitEnd is false and a match was found, then more input 197 * will not change the match. 198 * If hitEnd is false and a match was not found, then more 199 * input will not cause a match to be found. 200 */ 201 boolean hitEnd; 202 203 /** 204 * Boolean indicating whether or not more input could change 205 * a positive match into a negative one. 206 * 207 * If requireEnd is true, and a match was found, then more 208 * input could cause the match to be lost. 209 * If requireEnd is false and a match was found, then more 210 * input might change the match but the match won't be lost. 211 * If a match was not found, then requireEnd has no meaning. 212 */ 213 boolean requireEnd; 214 215 /** 216 * If transparentBounds is true then the boundaries of this 217 * matcher's region are transparent to lookahead, lookbehind, 218 * and boundary matching constructs that try to see beyond them. 219 */ 220 boolean transparentBounds = false; 221 222 /** 223 * If anchoringBounds is true then the boundaries of this 224 * matcher's region match anchors such as ^ and $. 225 */ 226 boolean anchoringBounds = true; 227 228 /** 229 * Number of times this matcher's state has been modified 230 */ 231 int modCount; 232 233 /** 234 * No default constructor. 235 */ 236 Matcher() { 237 } 238 239 /** 240 * All matchers have the state used by Pattern during a match. 241 */ 242 Matcher(Pattern parent, CharSequence text) { 243 this.parentPattern = parent; 244 this.text = text; 245 246 // Allocate state storage 247 if (parent.capturingGroupCount > 10) { 248 groups = new int[parent.capturingGroupCount * 2]; 249 } else { 250 groups = new int[20]; 251 } 252 locals = new int[parent.localCount]; 253 localsPos = new IntHashSet[parent.localTCNCount]; 254 255 // Put fields into initial states 256 reset(); 257 } 258 259 /** 260 * Returns the pattern that is interpreted by this matcher. 261 * 262 * @return The pattern for which this matcher was created 263 */ 264 public Pattern pattern() { 265 return parentPattern; 266 } 267 268 /** 269 * Returns the match state of this matcher as a {@link MatchResult}. 270 * The result is unaffected by subsequent operations performed upon this 271 * matcher. 272 * 273 * @return a {@code MatchResult} with the state of this matcher 274 * @since 1.5 275 */ 276 public MatchResult toMatchResult() { 277 return toMatchResult(text.toString()); 278 } 279 280 private MatchResult toMatchResult(String text) { 281 return new ImmutableMatchResult(this.first, 282 this.last, 283 groupCount(), 284 this.groups.clone(), 285 text); 286 } 287 288 private static class ImmutableMatchResult implements MatchResult { 289 private final int first; 290 private final int last; 291 private final int[] groups; 292 private final int groupCount; 293 private final String text; 294 295 ImmutableMatchResult(int first, int last, int groupCount, 296 int groups[], String text) 297 { 298 this.first = first; 299 this.last = last; 300 this.groupCount = groupCount; 301 this.groups = groups; 302 this.text = text; 303 } 304 305 @Override 306 public int start() { 307 checkMatch(); 308 return first; 309 } 310 311 @Override 312 public int start(int group) { 313 checkMatch(); 314 if (group < 0 || group > groupCount) 315 throw new IndexOutOfBoundsException("No group " + group); 316 return groups[group * 2]; 317 } 318 319 @Override 320 public int end() { 321 checkMatch(); 322 return last; 323 } 324 325 @Override 326 public int end(int group) { 327 checkMatch(); 328 if (group < 0 || group > groupCount) 329 throw new IndexOutOfBoundsException("No group " + group); 330 return groups[group * 2 + 1]; 331 } 332 333 @Override 334 public int groupCount() { 335 return groupCount; 336 } 337 338 @Override 339 public String group() { 340 checkMatch(); 341 return group(0); 342 } 343 344 @Override 345 public String group(int group) { 346 checkMatch(); 347 if (group < 0 || group > groupCount) 348 throw new IndexOutOfBoundsException("No group " + group); 349 if ((groups[group*2] == -1) || (groups[group*2+1] == -1)) 350 return null; 351 return text.subSequence(groups[group * 2], groups[group * 2 + 1]).toString(); 352 } 353 354 private void checkMatch() { 355 if (first < 0) 356 throw new IllegalStateException("No match found"); 357 358 } 359 } 360 361 /** 362 * Changes the {@code Pattern} that this {@code Matcher} uses to 363 * find matches with. 364 * 365 * <p> This method causes this matcher to lose information 366 * about the groups of the last match that occurred. The 367 * matcher's position in the input is maintained and its 368 * last append position is unaffected.</p> 369 * 370 * @param newPattern 371 * The new pattern used by this matcher 372 * @return This matcher 373 * @throws IllegalArgumentException 374 * If newPattern is {@code null} 375 * @since 1.5 376 */ 377 public Matcher usePattern(Pattern newPattern) { 378 if (newPattern == null) 379 throw new IllegalArgumentException("Pattern cannot be null"); 380 parentPattern = newPattern; 381 382 // Reallocate state storage 383 int parentGroupCount = Math.max(newPattern.capturingGroupCount, 10); 384 groups = new int[parentGroupCount * 2]; 385 locals = new int[newPattern.localCount]; 386 for (int i = 0; i < groups.length; i++) 387 groups[i] = -1; 388 for (int i = 0; i < locals.length; i++) 389 locals[i] = -1; 390 localsPos = new IntHashSet[parentPattern.localTCNCount]; 391 modCount++; 392 return this; 393 } 394 395 /** 396 * Resets this matcher. 397 * 398 * <p> Resetting a matcher discards all of its explicit state information 399 * and sets its append position to zero. The matcher's region is set to the 400 * default region, which is its entire character sequence. The anchoring 401 * and transparency of this matcher's region boundaries are unaffected. 402 * 403 * @return This matcher 404 */ 405 public Matcher reset() { 406 first = -1; 407 last = 0; 408 oldLast = -1; 409 for(int i=0; i<groups.length; i++) 410 groups[i] = -1; 411 for(int i=0; i<locals.length; i++) 412 locals[i] = -1; 413 for (int i = 0; i < localsPos.length; i++) { 414 if (localsPos[i] != null) 415 localsPos[i].clear(); 416 } 417 lastAppendPosition = 0; 418 from = 0; 419 to = getTextLength(); 420 modCount++; 421 return this; 422 } 423 424 /** 425 * Resets this matcher with a new input sequence. 426 * 427 * <p> Resetting a matcher discards all of its explicit state information 428 * and sets its append position to zero. The matcher's region is set to 429 * the default region, which is its entire character sequence. The 430 * anchoring and transparency of this matcher's region boundaries are 431 * unaffected. 432 * 433 * @param input 434 * The new input character sequence 435 * 436 * @return This matcher 437 */ 438 public Matcher reset(CharSequence input) { 439 text = input; 440 return reset(); 441 } 442 443 /** 444 * Returns the start index of the previous match. 445 * 446 * @return The index of the first character matched 447 * 448 * @throws IllegalStateException 449 * If no match has yet been attempted, 450 * or if the previous match operation failed 451 */ 452 public int start() { 453 if (first < 0) 454 throw new IllegalStateException("No match available"); 455 return first; 456 } 457 458 /** 459 * Returns the start index of the subsequence captured by the given group 460 * during the previous match operation. 461 * 462 * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left 463 * to right, starting at one. Group zero denotes the entire pattern, so 464 * the expression <i>m.</i>{@code start(0)} is equivalent to 465 * <i>m.</i>{@code start()}. </p> 466 * 467 * @param group 468 * The index of a capturing group in this matcher's pattern 469 * 470 * @return The index of the first character captured by the group, 471 * or {@code -1} if the match was successful but the group 472 * itself did not match anything 473 * 474 * @throws IllegalStateException 475 * If no match has yet been attempted, 476 * or if the previous match operation failed 477 * 478 * @throws IndexOutOfBoundsException 479 * If there is no capturing group in the pattern 480 * with the given index 481 */ 482 public int start(int group) { 483 if (first < 0) 484 throw new IllegalStateException("No match available"); 485 if (group < 0 || group > groupCount()) 486 throw new IndexOutOfBoundsException("No group " + group); 487 return groups[group * 2]; 488 } 489 490 /** 491 * Returns the start index of the subsequence captured by the given 492 * <a href="Pattern.html#groupname">named-capturing group</a> during the 493 * previous match operation. 494 * 495 * @param name 496 * The name of a named-capturing group in this matcher's pattern 497 * 498 * @return The index of the first character captured by the group, 499 * or {@code -1} if the match was successful but the group 500 * itself did not match anything 501 * 502 * @throws IllegalStateException 503 * If no match has yet been attempted, 504 * or if the previous match operation failed 505 * 506 * @throws IllegalArgumentException 507 * If there is no capturing group in the pattern 508 * with the given name 509 * @since 1.8 510 */ 511 public int start(String name) { 512 return groups[getMatchedGroupIndex(name) * 2]; 513 } 514 515 /** 516 * Returns the offset after the last character matched. 517 * 518 * @return The offset after the last character matched 519 * 520 * @throws IllegalStateException 521 * If no match has yet been attempted, 522 * or if the previous match operation failed 523 */ 524 public int end() { 525 if (first < 0) 526 throw new IllegalStateException("No match available"); 527 return last; 528 } 529 530 /** 531 * Returns the offset after the last character of the subsequence 532 * captured by the given group during the previous match operation. 533 * 534 * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left 535 * to right, starting at one. Group zero denotes the entire pattern, so 536 * the expression <i>m.</i>{@code end(0)} is equivalent to 537 * <i>m.</i>{@code end()}. </p> 538 * 539 * @param group 540 * The index of a capturing group in this matcher's pattern 541 * 542 * @return The offset after the last character captured by the group, 543 * or {@code -1} if the match was successful 544 * but the group itself did not match anything 545 * 546 * @throws IllegalStateException 547 * If no match has yet been attempted, 548 * or if the previous match operation failed 549 * 550 * @throws IndexOutOfBoundsException 551 * If there is no capturing group in the pattern 552 * with the given index 553 */ 554 public int end(int group) { 555 if (first < 0) 556 throw new IllegalStateException("No match available"); 557 if (group < 0 || group > groupCount()) 558 throw new IndexOutOfBoundsException("No group " + group); 559 return groups[group * 2 + 1]; 560 } 561 562 /** 563 * Returns the offset after the last character of the subsequence 564 * captured by the given <a href="Pattern.html#groupname">named-capturing 565 * group</a> during the previous match operation. 566 * 567 * @param name 568 * The name of a named-capturing group in this matcher's pattern 569 * 570 * @return The offset after the last character captured by the group, 571 * or {@code -1} if the match was successful 572 * but the group itself did not match anything 573 * 574 * @throws IllegalStateException 575 * If no match has yet been attempted, 576 * or if the previous match operation failed 577 * 578 * @throws IllegalArgumentException 579 * If there is no capturing group in the pattern 580 * with the given name 581 * @since 1.8 582 */ 583 public int end(String name) { 584 return groups[getMatchedGroupIndex(name) * 2 + 1]; 585 } 586 587 /** 588 * Returns the input subsequence matched by the previous match. 589 * 590 * <p> For a matcher <i>m</i> with input sequence <i>s</i>, 591 * the expressions <i>m.</i>{@code group()} and 592 * <i>s.</i>{@code substring(}<i>m.</i>{@code start(),} <i>m.</i> 593 * {@code end())} are equivalent. </p> 594 * 595 * <p> Note that some patterns, for example {@code a*}, match the empty 596 * string. This method will return the empty string when the pattern 597 * successfully matches the empty string in the input. </p> 598 * 599 * @return The (possibly empty) subsequence matched by the previous match, 600 * in string form 601 * 602 * @throws IllegalStateException 603 * If no match has yet been attempted, 604 * or if the previous match operation failed 605 */ 606 public String group() { 607 return group(0); 608 } 609 610 /** 611 * Returns the input subsequence captured by the given group during the 612 * previous match operation. 613 * 614 * <p> For a matcher <i>m</i>, input sequence <i>s</i>, and group index 615 * <i>g</i>, the expressions <i>m.</i>{@code group(}<i>g</i>{@code )} and 616 * <i>s.</i>{@code substring(}<i>m.</i>{@code start(}<i>g</i>{@code 617 * ),} <i>m.</i>{@code end(}<i>g</i>{@code ))} 618 * are equivalent. </p> 619 * 620 * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left 621 * to right, starting at one. Group zero denotes the entire pattern, so 622 * the expression {@code m.group(0)} is equivalent to {@code m.group()}. 623 * </p> 624 * 625 * <p> If the match was successful but the group specified failed to match 626 * any part of the input sequence, then {@code null} is returned. Note 627 * that some groups, for example {@code (a*)}, match the empty string. 628 * This method will return the empty string when such a group successfully 629 * matches the empty string in the input. </p> 630 * 631 * @param group 632 * The index of a capturing group in this matcher's pattern 633 * 634 * @return The (possibly empty) subsequence captured by the group 635 * during the previous match, or {@code null} if the group 636 * failed to match part of the input 637 * 638 * @throws IllegalStateException 639 * If no match has yet been attempted, 640 * or if the previous match operation failed 641 * 642 * @throws IndexOutOfBoundsException 643 * If there is no capturing group in the pattern 644 * with the given index 645 */ 646 public String group(int group) { 647 if (first < 0) 648 throw new IllegalStateException("No match found"); 649 if (group < 0 || group > groupCount()) 650 throw new IndexOutOfBoundsException("No group " + group); 651 if ((groups[group*2] == -1) || (groups[group*2+1] == -1)) 652 return null; 653 return getSubSequence(groups[group * 2], groups[group * 2 + 1]).toString(); 654 } 655 656 /** 657 * Returns the input subsequence captured by the given 658 * <a href="Pattern.html#groupname">named-capturing group</a> during the 659 * previous match operation. 660 * 661 * <p> If the match was successful but the group specified failed to match 662 * any part of the input sequence, then {@code null} is returned. Note 663 * that some groups, for example {@code (a*)}, match the empty string. 664 * This method will return the empty string when such a group successfully 665 * matches the empty string in the input. </p> 666 * 667 * @param name 668 * The name of a named-capturing group in this matcher's pattern 669 * 670 * @return The (possibly empty) subsequence captured by the named group 671 * during the previous match, or {@code null} if the group 672 * failed to match part of the input 673 * 674 * @throws IllegalStateException 675 * If no match has yet been attempted, 676 * or if the previous match operation failed 677 * 678 * @throws IllegalArgumentException 679 * If there is no capturing group in the pattern 680 * with the given name 681 * @since 1.7 682 */ 683 public String group(String name) { 684 int group = getMatchedGroupIndex(name); 685 if ((groups[group*2] == -1) || (groups[group*2+1] == -1)) 686 return null; 687 return getSubSequence(groups[group * 2], groups[group * 2 + 1]).toString(); 688 } 689 690 /** 691 * Returns the number of capturing groups in this matcher's pattern. 692 * 693 * <p> Group zero denotes the entire pattern by convention. It is not 694 * included in this count. 695 * 696 * <p> Any non-negative integer smaller than or equal to the value 697 * returned by this method is guaranteed to be a valid group index for 698 * this matcher. </p> 699 * 700 * @return The number of capturing groups in this matcher's pattern 701 */ 702 public int groupCount() { 703 return parentPattern.capturingGroupCount - 1; 704 } 705 706 /** 707 * Attempts to match the entire region against the pattern. 708 * 709 * <p> If the match succeeds then more information can be obtained via the 710 * {@code start}, {@code end}, and {@code group} methods. </p> 711 * 712 * @return {@code true} if, and only if, the entire region sequence 713 * matches this matcher's pattern 714 */ 715 public boolean matches() { 716 return match(from, ENDANCHOR); 717 } 718 719 /** 720 * Attempts to find the next subsequence of the input sequence that matches 721 * the pattern. 722 * 723 * <p> This method starts at the beginning of this matcher's region, or, if 724 * a previous invocation of the method was successful and the matcher has 725 * not since been reset, at the first character not matched by the previous 726 * match. 727 * 728 * <p> If the match succeeds then more information can be obtained via the 729 * {@code start}, {@code end}, and {@code group} methods. </p> 730 * 731 * @return {@code true} if, and only if, a subsequence of the input 732 * sequence matches this matcher's pattern 733 */ 734 public boolean find() { 735 int nextSearchIndex = last; 736 if (nextSearchIndex == first) 737 nextSearchIndex++; 738 739 // If next search starts before region, start it at region 740 if (nextSearchIndex < from) 741 nextSearchIndex = from; 742 743 // If next search starts beyond region then it fails 744 if (nextSearchIndex > to) { 745 for (int i = 0; i < groups.length; i++) 746 groups[i] = -1; 747 return false; 748 } 749 return search(nextSearchIndex); 750 } 751 752 /** 753 * Resets this matcher and then attempts to find the next subsequence of 754 * the input sequence that matches the pattern, starting at the specified 755 * index. 756 * 757 * <p> If the match succeeds then more information can be obtained via the 758 * {@code start}, {@code end}, and {@code group} methods, and subsequent 759 * invocations of the {@link #find()} method will start at the first 760 * character not matched by this match. </p> 761 * 762 * @param start the index to start searching for a match 763 * @throws IndexOutOfBoundsException 764 * If start is less than zero or if start is greater than the 765 * length of the input sequence. 766 * 767 * @return {@code true} if, and only if, a subsequence of the input 768 * sequence starting at the given index matches this matcher's 769 * pattern 770 */ 771 public boolean find(int start) { 772 int limit = getTextLength(); 773 if ((start < 0) || (start > limit)) 774 throw new IndexOutOfBoundsException("Illegal start index"); 775 reset(); 776 return search(start); 777 } 778 779 /** 780 * Attempts to match the input sequence, starting at the beginning of the 781 * region, against the pattern. 782 * 783 * <p> Like the {@link #matches matches} method, this method always starts 784 * at the beginning of the region; unlike that method, it does not 785 * require that the entire region be matched. 786 * 787 * <p> If the match succeeds then more information can be obtained via the 788 * {@code start}, {@code end}, and {@code group} methods. </p> 789 * 790 * @return {@code true} if, and only if, a prefix of the input 791 * sequence matches this matcher's pattern 792 */ 793 public boolean lookingAt() { 794 return match(from, NOANCHOR); 795 } 796 797 /** 798 * Returns a literal replacement {@code String} for the specified 799 * {@code String}. 800 * 801 * This method produces a {@code String} that will work 802 * as a literal replacement {@code s} in the 803 * {@code appendReplacement} method of the {@link Matcher} class. 804 * The {@code String} produced will match the sequence of characters 805 * in {@code s} treated as a literal sequence. Slashes ('\') and 806 * dollar signs ('$') will be given no special meaning. 807 * 808 * @param s The string to be literalized 809 * @return A literal string replacement 810 * @since 1.5 811 */ 812 public static String quoteReplacement(String s) { 813 if ((s.indexOf('\\') == -1) && (s.indexOf('$') == -1)) 814 return s; 815 StringBuilder sb = new StringBuilder(); 816 for (int i=0; i<s.length(); i++) { 817 char c = s.charAt(i); 818 if (c == '\\' || c == '$') { 819 sb.append('\\'); 820 } 821 sb.append(c); 822 } 823 return sb.toString(); 824 } 825 826 /** 827 * Implements a non-terminal append-and-replace step. 828 * 829 * <p> This method performs the following actions: </p> 830 * 831 * <ol> 832 * 833 * <li><p> It reads characters from the input sequence, starting at the 834 * append position, and appends them to the given string buffer. It 835 * stops after reading the last character preceding the previous match, 836 * that is, the character at index {@link 837 * #start()} {@code -} {@code 1}. </p></li> 838 * 839 * <li><p> It appends the given replacement string to the string buffer. 840 * </p></li> 841 * 842 * <li><p> It sets the append position of this matcher to the index of 843 * the last character matched, plus one, that is, to {@link #end()}. 844 * </p></li> 845 * 846 * </ol> 847 * 848 * <p> The replacement string may contain references to subsequences 849 * captured during the previous match: Each occurrence of 850 * <code>${</code><i>name</i><code>}</code> or {@code $}<i>g</i> 851 * will be replaced by the result of evaluating the corresponding 852 * {@link #group(String) group(name)} or {@link #group(int) group(g)} 853 * respectively. For {@code $}<i>g</i>, 854 * the first number after the {@code $} is always treated as part of 855 * the group reference. Subsequent numbers are incorporated into g if 856 * they would form a legal group reference. Only the numerals '0' 857 * through '9' are considered as potential components of the group 858 * reference. If the second group matched the string {@code "foo"}, for 859 * example, then passing the replacement string {@code "$2bar"} would 860 * cause {@code "foobar"} to be appended to the string buffer. A dollar 861 * sign ({@code $}) may be included as a literal in the replacement 862 * string by preceding it with a backslash ({@code \$}). 863 * 864 * <p> Note that backslashes ({@code \}) and dollar signs ({@code $}) in 865 * the replacement string may cause the results to be different than if it 866 * were being treated as a literal replacement string. Dollar signs may be 867 * treated as references to captured subsequences as described above, and 868 * backslashes are used to escape literal characters in the replacement 869 * string. 870 * 871 * <p> This method is intended to be used in a loop together with the 872 * {@link #appendTail(StringBuffer) appendTail} and {@link #find() find} 873 * methods. The following code, for example, writes {@code one dog two dogs 874 * in the yard} to the standard-output stream: </p> 875 * 876 * <blockquote><pre> 877 * Pattern p = Pattern.compile("cat"); 878 * Matcher m = p.matcher("one cat two cats in the yard"); 879 * StringBuffer sb = new StringBuffer(); 880 * while (m.find()) { 881 * m.appendReplacement(sb, "dog"); 882 * } 883 * m.appendTail(sb); 884 * System.out.println(sb.toString());</pre></blockquote> 885 * 886 * @param sb 887 * The target string buffer 888 * 889 * @param replacement 890 * The replacement string 891 * 892 * @return This matcher 893 * 894 * @throws IllegalStateException 895 * If no match has yet been attempted, 896 * or if the previous match operation failed 897 * 898 * @throws IllegalArgumentException 899 * If the replacement string refers to a named-capturing 900 * group that does not exist in the pattern 901 * 902 * @throws IndexOutOfBoundsException 903 * If the replacement string refers to a capturing group 904 * that does not exist in the pattern 905 */ 906 public Matcher appendReplacement(StringBuffer sb, String replacement) { 907 // If no match, return error 908 if (first < 0) 909 throw new IllegalStateException("No match available"); 910 StringBuilder result = new StringBuilder(); 911 appendExpandedReplacement(replacement, result); 912 // Append the intervening text 913 sb.append(text, lastAppendPosition, first); 914 // Append the match substitution 915 sb.append(result); 916 lastAppendPosition = last; 917 modCount++; 918 return this; 919 } 920 921 /** 922 * Implements a non-terminal append-and-replace step. 923 * 924 * <p> This method performs the following actions: </p> 925 * 926 * <ol> 927 * 928 * <li><p> It reads characters from the input sequence, starting at the 929 * append position, and appends them to the given string builder. It 930 * stops after reading the last character preceding the previous match, 931 * that is, the character at index {@link 932 * #start()} {@code -} {@code 1}. </p></li> 933 * 934 * <li><p> It appends the given replacement string to the string builder. 935 * </p></li> 936 * 937 * <li><p> It sets the append position of this matcher to the index of 938 * the last character matched, plus one, that is, to {@link #end()}. 939 * </p></li> 940 * 941 * </ol> 942 * 943 * <p> The replacement string may contain references to subsequences 944 * captured during the previous match: Each occurrence of 945 * {@code $}<i>g</i> will be replaced by the result of 946 * evaluating {@link #group(int) group}{@code (}<i>g</i>{@code )}. 947 * The first number after the {@code $} is always treated as part of 948 * the group reference. Subsequent numbers are incorporated into g if 949 * they would form a legal group reference. Only the numerals '0' 950 * through '9' are considered as potential components of the group 951 * reference. If the second group matched the string {@code "foo"}, for 952 * example, then passing the replacement string {@code "$2bar"} would 953 * cause {@code "foobar"} to be appended to the string builder. A dollar 954 * sign ({@code $}) may be included as a literal in the replacement 955 * string by preceding it with a backslash ({@code \$}). 956 * 957 * <p> Note that backslashes ({@code \}) and dollar signs ({@code $}) in 958 * the replacement string may cause the results to be different than if it 959 * were being treated as a literal replacement string. Dollar signs may be 960 * treated as references to captured subsequences as described above, and 961 * backslashes are used to escape literal characters in the replacement 962 * string. 963 * 964 * <p> This method is intended to be used in a loop together with the 965 * {@link #appendTail(StringBuilder) appendTail} and 966 * {@link #find() find} methods. The following code, for example, writes 967 * {@code one dog two dogs in the yard} to the standard-output stream: </p> 968 * 969 * <blockquote><pre> 970 * Pattern p = Pattern.compile("cat"); 971 * Matcher m = p.matcher("one cat two cats in the yard"); 972 * StringBuilder sb = new StringBuilder(); 973 * while (m.find()) { 974 * m.appendReplacement(sb, "dog"); 975 * } 976 * m.appendTail(sb); 977 * System.out.println(sb.toString());</pre></blockquote> 978 * 979 * @param sb 980 * The target string builder 981 * @param replacement 982 * The replacement string 983 * @return This matcher 984 * 985 * @throws IllegalStateException 986 * If no match has yet been attempted, 987 * or if the previous match operation failed 988 * @throws IllegalArgumentException 989 * If the replacement string refers to a named-capturing 990 * group that does not exist in the pattern 991 * @throws IndexOutOfBoundsException 992 * If the replacement string refers to a capturing group 993 * that does not exist in the pattern 994 * @since 9 995 */ 996 public Matcher appendReplacement(StringBuilder sb, String replacement) { 997 // If no match, return error 998 if (first < 0) 999 throw new IllegalStateException("No match available"); 1000 StringBuilder result = new StringBuilder(); 1001 appendExpandedReplacement(replacement, result); 1002 // Append the intervening text 1003 sb.append(text, lastAppendPosition, first); 1004 // Append the match substitution 1005 sb.append(result); 1006 lastAppendPosition = last; 1007 modCount++; 1008 return this; 1009 } 1010 1011 /** 1012 * Processes replacement string to replace group references with 1013 * groups. 1014 */ 1015 private StringBuilder appendExpandedReplacement( 1016 String replacement, StringBuilder result) { 1017 int cursor = 0; 1018 while (cursor < replacement.length()) { 1019 char nextChar = replacement.charAt(cursor); 1020 if (nextChar == '\\') { 1021 cursor++; 1022 if (cursor == replacement.length()) 1023 throw new IllegalArgumentException( 1024 "character to be escaped is missing"); 1025 nextChar = replacement.charAt(cursor); 1026 result.append(nextChar); 1027 cursor++; 1028 } else if (nextChar == '$') { 1029 // Skip past $ 1030 cursor++; 1031 // Throw IAE if this "$" is the last character in replacement 1032 if (cursor == replacement.length()) 1033 throw new IllegalArgumentException( 1034 "Illegal group reference: group index is missing"); 1035 nextChar = replacement.charAt(cursor); 1036 int refNum = -1; 1037 if (nextChar == '{') { 1038 cursor++; 1039 StringBuilder gsb = new StringBuilder(); 1040 while (cursor < replacement.length()) { 1041 nextChar = replacement.charAt(cursor); 1042 if (ASCII.isLower(nextChar) || 1043 ASCII.isUpper(nextChar) || 1044 ASCII.isDigit(nextChar)) { 1045 gsb.append(nextChar); 1046 cursor++; 1047 } else { 1048 break; 1049 } 1050 } 1051 if (gsb.length() == 0) 1052 throw new IllegalArgumentException( 1053 "named capturing group has 0 length name"); 1054 if (nextChar != '}') 1055 throw new IllegalArgumentException( 1056 "named capturing group is missing trailing '}'"); 1057 String gname = gsb.toString(); 1058 if (ASCII.isDigit(gname.charAt(0))) 1059 throw new IllegalArgumentException( 1060 "capturing group name {" + gname + 1061 "} starts with digit character"); 1062 if (!parentPattern.namedGroups().containsKey(gname)) 1063 throw new IllegalArgumentException( 1064 "No group with name {" + gname + "}"); 1065 refNum = parentPattern.namedGroups().get(gname); 1066 cursor++; 1067 } else { 1068 // The first number is always a group 1069 refNum = nextChar - '0'; 1070 if ((refNum < 0) || (refNum > 9)) 1071 throw new IllegalArgumentException( 1072 "Illegal group reference"); 1073 cursor++; 1074 // Capture the largest legal group string 1075 boolean done = false; 1076 while (!done) { 1077 if (cursor >= replacement.length()) { 1078 break; 1079 } 1080 int nextDigit = replacement.charAt(cursor) - '0'; 1081 if ((nextDigit < 0) || (nextDigit > 9)) { // not a number 1082 break; 1083 } 1084 int newRefNum = (refNum * 10) + nextDigit; 1085 if (groupCount() < newRefNum) { 1086 done = true; 1087 } else { 1088 refNum = newRefNum; 1089 cursor++; 1090 } 1091 } 1092 } 1093 // Append group 1094 if (start(refNum) != -1 && end(refNum) != -1) 1095 result.append(text, start(refNum), end(refNum)); 1096 } else { 1097 result.append(nextChar); 1098 cursor++; 1099 } 1100 } 1101 return result; 1102 } 1103 1104 /** 1105 * Implements a terminal append-and-replace step. 1106 * 1107 * <p> This method reads characters from the input sequence, starting at 1108 * the append position, and appends them to the given string buffer. It is 1109 * intended to be invoked after one or more invocations of the {@link 1110 * #appendReplacement(StringBuffer, String) appendReplacement} method in 1111 * order to copy the remainder of the input sequence. </p> 1112 * 1113 * @param sb 1114 * The target string buffer 1115 * 1116 * @return The target string buffer 1117 */ 1118 public StringBuffer appendTail(StringBuffer sb) { 1119 sb.append(text, lastAppendPosition, getTextLength()); 1120 return sb; 1121 } 1122 1123 /** 1124 * Implements a terminal append-and-replace step. 1125 * 1126 * <p> This method reads characters from the input sequence, starting at 1127 * the append position, and appends them to the given string builder. It is 1128 * intended to be invoked after one or more invocations of the {@link 1129 * #appendReplacement(StringBuilder, String) 1130 * appendReplacement} method in order to copy the remainder of the input 1131 * sequence. </p> 1132 * 1133 * @param sb 1134 * The target string builder 1135 * 1136 * @return The target string builder 1137 * 1138 * @since 9 1139 */ 1140 public StringBuilder appendTail(StringBuilder sb) { 1141 sb.append(text, lastAppendPosition, getTextLength()); 1142 return sb; 1143 } 1144 1145 /** 1146 * Replaces every subsequence of the input sequence that matches the 1147 * pattern with the given replacement string. 1148 * 1149 * <p> This method first resets this matcher. It then scans the input 1150 * sequence looking for matches of the pattern. Characters that are not 1151 * part of any match are appended directly to the result string; each match 1152 * is replaced in the result by the replacement string. The replacement 1153 * string may contain references to captured subsequences as in the {@link 1154 * #appendReplacement appendReplacement} method. 1155 * 1156 * <p> Note that backslashes ({@code \}) and dollar signs ({@code $}) in 1157 * the replacement string may cause the results to be different than if it 1158 * were being treated as a literal replacement string. Dollar signs may be 1159 * treated as references to captured subsequences as described above, and 1160 * backslashes are used to escape literal characters in the replacement 1161 * string. 1162 * 1163 * <p> Given the regular expression {@code a*b}, the input 1164 * {@code "aabfooaabfooabfoob"}, and the replacement string 1165 * {@code "-"}, an invocation of this method on a matcher for that 1166 * expression would yield the string {@code "-foo-foo-foo-"}. 1167 * 1168 * <p> Invoking this method changes this matcher's state. If the matcher 1169 * is to be used in further matching operations then it should first be 1170 * reset. </p> 1171 * 1172 * @param replacement 1173 * The replacement string 1174 * 1175 * @return The string constructed by replacing each matching subsequence 1176 * by the replacement string, substituting captured subsequences 1177 * as needed 1178 */ 1179 public String replaceAll(String replacement) { 1180 reset(); 1181 boolean result = find(); 1182 if (result) { 1183 StringBuilder sb = new StringBuilder(); 1184 do { 1185 appendReplacement(sb, replacement); 1186 result = find(); 1187 } while (result); 1188 appendTail(sb); 1189 return sb.toString(); 1190 } 1191 return text.toString(); 1192 } 1193 1194 /** 1195 * Replaces every subsequence of the input sequence that matches the 1196 * pattern with the result of applying the given replacer function to the 1197 * match result of this matcher corresponding to that subsequence. 1198 * Exceptions thrown by the function are relayed to the caller. 1199 * 1200 * <p> This method first resets this matcher. It then scans the input 1201 * sequence looking for matches of the pattern. Characters that are not 1202 * part of any match are appended directly to the result string; each match 1203 * is replaced in the result by the applying the replacer function that 1204 * returns a replacement string. Each replacement string may contain 1205 * references to captured subsequences as in the {@link #appendReplacement 1206 * appendReplacement} method. 1207 * 1208 * <p> Note that backslashes ({@code \}) and dollar signs ({@code $}) in 1209 * a replacement string may cause the results to be different than if it 1210 * were being treated as a literal replacement string. Dollar signs may be 1211 * treated as references to captured subsequences as described above, and 1212 * backslashes are used to escape literal characters in the replacement 1213 * string. 1214 * 1215 * <p> Given the regular expression {@code dog}, the input 1216 * {@code "zzzdogzzzdogzzz"}, and the function 1217 * {@code mr -> mr.group().toUpperCase()}, an invocation of this method on 1218 * a matcher for that expression would yield the string 1219 * {@code "zzzDOGzzzDOGzzz"}. 1220 * 1221 * <p> Invoking this method changes this matcher's state. If the matcher 1222 * is to be used in further matching operations then it should first be 1223 * reset. </p> 1224 * 1225 * <p> The replacer function should not modify this matcher's state during 1226 * replacement. This method will, on a best-effort basis, throw a 1227 * {@link java.util.ConcurrentModificationException} if such modification is 1228 * detected. 1229 * 1230 * <p> The state of each match result passed to the replacer function is 1231 * guaranteed to be constant only for the duration of the replacer function 1232 * call and only if the replacer function does not modify this matcher's 1233 * state. 1234 * 1235 * @implNote 1236 * This implementation applies the replacer function to this matcher, which 1237 * is an instance of {@code MatchResult}. 1238 * 1239 * @param replacer 1240 * The function to be applied to the match result of this matcher 1241 * that returns a replacement string. 1242 * @return The string constructed by replacing each matching subsequence 1243 * with the result of applying the replacer function to that 1244 * matched subsequence, substituting captured subsequences as 1245 * needed. 1246 * @throws NullPointerException if the replacer function is null 1247 * @throws ConcurrentModificationException if it is detected, on a 1248 * best-effort basis, that the replacer function modified this 1249 * matcher's state 1250 * @since 9 1251 */ 1252 public String replaceAll(Function<MatchResult, String> replacer) { 1253 Objects.requireNonNull(replacer); 1254 reset(); 1255 boolean result = find(); 1256 if (result) { 1257 StringBuilder sb = new StringBuilder(); 1258 do { 1259 int ec = modCount; 1260 String replacement = replacer.apply(this); 1261 if (ec != modCount) 1262 throw new ConcurrentModificationException(); 1263 appendReplacement(sb, replacement); 1264 result = find(); 1265 } while (result); 1266 appendTail(sb); 1267 return sb.toString(); 1268 } 1269 return text.toString(); 1270 } 1271 1272 /** 1273 * Returns a stream of match results for each subsequence of the input 1274 * sequence that matches the pattern. The match results occur in the 1275 * same order as the matching subsequences in the input sequence. 1276 * 1277 * <p> Each match result is produced as if by {@link #toMatchResult()}. 1278 * 1279 * <p> This method does not reset this matcher. Matching starts on 1280 * initiation of the terminal stream operation either at the beginning of 1281 * this matcher's region, or, if the matcher has not since been reset, at 1282 * the first character not matched by a previous match. 1283 * 1284 * <p> If the matcher is to be used for further matching operations after 1285 * the terminal stream operation completes then it should be first reset. 1286 * 1287 * <p> This matcher's state should not be modified during execution of the 1288 * returned stream's pipeline. The returned stream's source 1289 * {@code Spliterator} is <em>fail-fast</em> and will, on a best-effort 1290 * basis, throw a {@link java.util.ConcurrentModificationException} if such 1291 * modification is detected. 1292 * 1293 * @return a sequential stream of match results. 1294 * @since 9 1295 */ 1296 public Stream<MatchResult> results() { 1297 class MatchResultIterator implements Iterator<MatchResult> { 1298 // -ve for call to find, 0 for not found, 1 for found 1299 int state = -1; 1300 // State for concurrent modification checking 1301 // -1 for uninitialized 1302 int expectedCount = -1; 1303 // The input sequence as a string, set once only after first find 1304 // Avoids repeated conversion from CharSequence for each match 1305 String textAsString; 1306 1307 @Override 1308 public MatchResult next() { 1309 if (expectedCount >= 0 && expectedCount != modCount) 1310 throw new ConcurrentModificationException(); 1311 1312 if (!hasNext()) 1313 throw new NoSuchElementException(); 1314 1315 state = -1; 1316 return toMatchResult(textAsString); 1317 } 1318 1319 @Override 1320 public boolean hasNext() { 1321 if (state >= 0) 1322 return state == 1; 1323 1324 // Defer throwing ConcurrentModificationException to when next 1325 // or forEachRemaining is called. The is consistent with other 1326 // fail-fast implementations. 1327 if (expectedCount >= 0 && expectedCount != modCount) 1328 return true; 1329 1330 boolean found = find(); 1331 // Capture the input sequence as a string on first find 1332 if (found && state < 0) 1333 textAsString = text.toString(); 1334 state = found ? 1 : 0; 1335 expectedCount = modCount; 1336 return found; 1337 } 1338 1339 @Override 1340 public void forEachRemaining(Consumer<? super MatchResult> action) { 1341 if (expectedCount >= 0 && expectedCount != modCount) 1342 throw new ConcurrentModificationException(); 1343 1344 int s = state; 1345 if (s == 0) 1346 return; 1347 1348 // Set state to report no more elements on further operations 1349 state = 0; 1350 expectedCount = -1; 1351 1352 // Perform a first find if required 1353 if (s < 0 && !find()) 1354 return; 1355 1356 // Capture the input sequence as a string on first find 1357 textAsString = text.toString(); 1358 1359 do { 1360 int ec = modCount; 1361 action.accept(toMatchResult(textAsString)); 1362 if (ec != modCount) 1363 throw new ConcurrentModificationException(); 1364 } while (find()); 1365 } 1366 } 1367 return StreamSupport.stream(Spliterators.spliteratorUnknownSize( 1368 new MatchResultIterator(), Spliterator.ORDERED | Spliterator.NONNULL), false); 1369 } 1370 1371 /** 1372 * Replaces the first subsequence of the input sequence that matches the 1373 * pattern with the given replacement string. 1374 * 1375 * <p> This method first resets this matcher. It then scans the input 1376 * sequence looking for a match of the pattern. Characters that are not 1377 * part of the match are appended directly to the result string; the match 1378 * is replaced in the result by the replacement string. The replacement 1379 * string may contain references to captured subsequences as in the {@link 1380 * #appendReplacement appendReplacement} method. 1381 * 1382 * <p>Note that backslashes ({@code \}) and dollar signs ({@code $}) in 1383 * the replacement string may cause the results to be different than if it 1384 * were being treated as a literal replacement string. Dollar signs may be 1385 * treated as references to captured subsequences as described above, and 1386 * backslashes are used to escape literal characters in the replacement 1387 * string. 1388 * 1389 * <p> Given the regular expression {@code dog}, the input 1390 * {@code "zzzdogzzzdogzzz"}, and the replacement string 1391 * {@code "cat"}, an invocation of this method on a matcher for that 1392 * expression would yield the string {@code "zzzcatzzzdogzzz"}. </p> 1393 * 1394 * <p> Invoking this method changes this matcher's state. If the matcher 1395 * is to be used in further matching operations then it should first be 1396 * reset. </p> 1397 * 1398 * @param replacement 1399 * The replacement string 1400 * @return The string constructed by replacing the first matching 1401 * subsequence by the replacement string, substituting captured 1402 * subsequences as needed 1403 */ 1404 public String replaceFirst(String replacement) { 1405 if (replacement == null) 1406 throw new NullPointerException("replacement"); 1407 reset(); 1408 if (!find()) 1409 return text.toString(); 1410 StringBuilder sb = new StringBuilder(); 1411 appendReplacement(sb, replacement); 1412 appendTail(sb); 1413 return sb.toString(); 1414 } 1415 1416 /** 1417 * Replaces the first subsequence of the input sequence that matches the 1418 * pattern with the result of applying the given replacer function to the 1419 * match result of this matcher corresponding to that subsequence. 1420 * Exceptions thrown by the replace function are relayed to the caller. 1421 * 1422 * <p> This method first resets this matcher. It then scans the input 1423 * sequence looking for a match of the pattern. Characters that are not 1424 * part of the match are appended directly to the result string; the match 1425 * is replaced in the result by the applying the replacer function that 1426 * returns a replacement string. The replacement string may contain 1427 * references to captured subsequences as in the {@link #appendReplacement 1428 * appendReplacement} method. 1429 * 1430 * <p>Note that backslashes ({@code \}) and dollar signs ({@code $}) in 1431 * the replacement string may cause the results to be different than if it 1432 * were being treated as a literal replacement string. Dollar signs may be 1433 * treated as references to captured subsequences as described above, and 1434 * backslashes are used to escape literal characters in the replacement 1435 * string. 1436 * 1437 * <p> Given the regular expression {@code dog}, the input 1438 * {@code "zzzdogzzzdogzzz"}, and the function 1439 * {@code mr -> mr.group().toUpperCase()}, an invocation of this method on 1440 * a matcher for that expression would yield the string 1441 * {@code "zzzDOGzzzdogzzz"}. 1442 * 1443 * <p> Invoking this method changes this matcher's state. If the matcher 1444 * is to be used in further matching operations then it should first be 1445 * reset. 1446 * 1447 * <p> The replacer function should not modify this matcher's state during 1448 * replacement. This method will, on a best-effort basis, throw a 1449 * {@link java.util.ConcurrentModificationException} if such modification is 1450 * detected. 1451 * 1452 * <p> The state of the match result passed to the replacer function is 1453 * guaranteed to be constant only for the duration of the replacer function 1454 * call and only if the replacer function does not modify this matcher's 1455 * state. 1456 * 1457 * @implNote 1458 * This implementation applies the replacer function to this matcher, which 1459 * is an instance of {@code MatchResult}. 1460 * 1461 * @param replacer 1462 * The function to be applied to the match result of this matcher 1463 * that returns a replacement string. 1464 * @return The string constructed by replacing the first matching 1465 * subsequence with the result of applying the replacer function to 1466 * the matched subsequence, substituting captured subsequences as 1467 * needed. 1468 * @throws NullPointerException if the replacer function is null 1469 * @throws ConcurrentModificationException if it is detected, on a 1470 * best-effort basis, that the replacer function modified this 1471 * matcher's state 1472 * @since 9 1473 */ 1474 public String replaceFirst(Function<MatchResult, String> replacer) { 1475 Objects.requireNonNull(replacer); 1476 reset(); 1477 if (!find()) 1478 return text.toString(); 1479 StringBuilder sb = new StringBuilder(); 1480 int ec = modCount; 1481 String replacement = replacer.apply(this); 1482 if (ec != modCount) 1483 throw new ConcurrentModificationException(); 1484 appendReplacement(sb, replacement); 1485 appendTail(sb); 1486 return sb.toString(); 1487 } 1488 1489 /** 1490 * Sets the limits of this matcher's region. The region is the part of the 1491 * input sequence that will be searched to find a match. Invoking this 1492 * method resets the matcher, and then sets the region to start at the 1493 * index specified by the {@code start} parameter and end at the 1494 * index specified by the {@code end} parameter. 1495 * 1496 * <p>Depending on the transparency and anchoring being used (see 1497 * {@link #useTransparentBounds(boolean) useTransparentBounds} and 1498 * {@link #useAnchoringBounds(boolean) useAnchoringBounds}), certain 1499 * constructs such as anchors may behave differently at or around the 1500 * boundaries of the region. 1501 * 1502 * @param start 1503 * The index to start searching at (inclusive) 1504 * @param end 1505 * The index to end searching at (exclusive) 1506 * @throws IndexOutOfBoundsException 1507 * If start or end is less than zero, if 1508 * start is greater than the length of the input sequence, if 1509 * end is greater than the length of the input sequence, or if 1510 * start is greater than end. 1511 * @return this matcher 1512 * @since 1.5 1513 */ 1514 public Matcher region(int start, int end) { 1515 if ((start < 0) || (start > getTextLength())) 1516 throw new IndexOutOfBoundsException("start"); 1517 if ((end < 0) || (end > getTextLength())) 1518 throw new IndexOutOfBoundsException("end"); 1519 if (start > end) 1520 throw new IndexOutOfBoundsException("start > end"); 1521 reset(); 1522 from = start; 1523 to = end; 1524 return this; 1525 } 1526 1527 /** 1528 * Reports the start index of this matcher's region. The 1529 * searches this matcher conducts are limited to finding matches 1530 * within {@link #regionStart() regionStart} (inclusive) and 1531 * {@link #regionEnd() regionEnd} (exclusive). 1532 * 1533 * @return The starting point of this matcher's region 1534 * @since 1.5 1535 */ 1536 public int regionStart() { 1537 return from; 1538 } 1539 1540 /** 1541 * Reports the end index (exclusive) of this matcher's region. 1542 * The searches this matcher conducts are limited to finding matches 1543 * within {@link #regionStart() regionStart} (inclusive) and 1544 * {@link #regionEnd() regionEnd} (exclusive). 1545 * 1546 * @return the ending point of this matcher's region 1547 * @since 1.5 1548 */ 1549 public int regionEnd() { 1550 return to; 1551 } 1552 1553 /** 1554 * Queries the transparency of region bounds for this matcher. 1555 * 1556 * <p> This method returns {@code true} if this matcher uses 1557 * <i>transparent</i> bounds, {@code false} if it uses <i>opaque</i> 1558 * bounds. 1559 * 1560 * <p> See {@link #useTransparentBounds(boolean) useTransparentBounds} for a 1561 * description of transparent and opaque bounds. 1562 * 1563 * <p> By default, a matcher uses opaque region boundaries. 1564 * 1565 * @return {@code true} iff this matcher is using transparent bounds, 1566 * {@code false} otherwise. 1567 * @see java.util.regex.Matcher#useTransparentBounds(boolean) 1568 * @since 1.5 1569 */ 1570 public boolean hasTransparentBounds() { 1571 return transparentBounds; 1572 } 1573 1574 /** 1575 * Sets the transparency of region bounds for this matcher. 1576 * 1577 * <p> Invoking this method with an argument of {@code true} will set this 1578 * matcher to use <i>transparent</i> bounds. If the boolean 1579 * argument is {@code false}, then <i>opaque</i> bounds will be used. 1580 * 1581 * <p> Using transparent bounds, the boundaries of this 1582 * matcher's region are transparent to lookahead, lookbehind, 1583 * and boundary matching constructs. Those constructs can see beyond the 1584 * boundaries of the region to see if a match is appropriate. 1585 * 1586 * <p> Using opaque bounds, the boundaries of this matcher's 1587 * region are opaque to lookahead, lookbehind, and boundary matching 1588 * constructs that may try to see beyond them. Those constructs cannot 1589 * look past the boundaries so they will fail to match anything outside 1590 * of the region. 1591 * 1592 * <p> By default, a matcher uses opaque bounds. 1593 * 1594 * @param b a boolean indicating whether to use opaque or transparent 1595 * regions 1596 * @return this matcher 1597 * @see java.util.regex.Matcher#hasTransparentBounds 1598 * @since 1.5 1599 */ 1600 public Matcher useTransparentBounds(boolean b) { 1601 transparentBounds = b; 1602 return this; 1603 } 1604 1605 /** 1606 * Queries the anchoring of region bounds for this matcher. 1607 * 1608 * <p> This method returns {@code true} if this matcher uses 1609 * <i>anchoring</i> bounds, {@code false} otherwise. 1610 * 1611 * <p> See {@link #useAnchoringBounds(boolean) useAnchoringBounds} for a 1612 * description of anchoring bounds. 1613 * 1614 * <p> By default, a matcher uses anchoring region boundaries. 1615 * 1616 * @return {@code true} iff this matcher is using anchoring bounds, 1617 * {@code false} otherwise. 1618 * @see java.util.regex.Matcher#useAnchoringBounds(boolean) 1619 * @since 1.5 1620 */ 1621 public boolean hasAnchoringBounds() { 1622 return anchoringBounds; 1623 } 1624 1625 /** 1626 * Sets the anchoring of region bounds for this matcher. 1627 * 1628 * <p> Invoking this method with an argument of {@code true} will set this 1629 * matcher to use <i>anchoring</i> bounds. If the boolean 1630 * argument is {@code false}, then <i>non-anchoring</i> bounds will be 1631 * used. 1632 * 1633 * <p> Using anchoring bounds, the boundaries of this 1634 * matcher's region match anchors such as ^ and $. 1635 * 1636 * <p> Without anchoring bounds, the boundaries of this 1637 * matcher's region will not match anchors such as ^ and $. 1638 * 1639 * <p> By default, a matcher uses anchoring region boundaries. 1640 * 1641 * @param b a boolean indicating whether or not to use anchoring bounds. 1642 * @return this matcher 1643 * @see java.util.regex.Matcher#hasAnchoringBounds 1644 * @since 1.5 1645 */ 1646 public Matcher useAnchoringBounds(boolean b) { 1647 anchoringBounds = b; 1648 return this; 1649 } 1650 1651 /** 1652 * <p>Returns the string representation of this matcher. The 1653 * string representation of a {@code Matcher} contains information 1654 * that may be useful for debugging. The exact format is unspecified. 1655 * 1656 * @return The string representation of this matcher 1657 * @since 1.5 1658 */ 1659 public String toString() { 1660 StringBuilder sb = new StringBuilder(); 1661 sb.append("java.util.regex.Matcher") 1662 .append("[pattern=").append(pattern()) 1663 .append(" region=") 1664 .append(regionStart()).append(',').append(regionEnd()) 1665 .append(" lastmatch="); 1666 if ((first >= 0) && (group() != null)) { 1667 sb.append(group()); 1668 } 1669 sb.append(']'); 1670 return sb.toString(); 1671 } 1672 1673 /** 1674 * <p>Returns true if the end of input was hit by the search engine in 1675 * the last match operation performed by this matcher. 1676 * 1677 * <p>When this method returns true, then it is possible that more input 1678 * would have changed the result of the last search. 1679 * 1680 * @return true iff the end of input was hit in the last match; false 1681 * otherwise 1682 * @since 1.5 1683 */ 1684 public boolean hitEnd() { 1685 return hitEnd; 1686 } 1687 1688 /** 1689 * <p>Returns true if more input could change a positive match into a 1690 * negative one. 1691 * 1692 * <p>If this method returns true, and a match was found, then more 1693 * input could cause the match to be lost. If this method returns false 1694 * and a match was found, then more input might change the match but the 1695 * match won't be lost. If a match was not found, then requireEnd has no 1696 * meaning. 1697 * 1698 * @return true iff more input could change a positive match into a 1699 * negative one. 1700 * @since 1.5 1701 */ 1702 public boolean requireEnd() { 1703 return requireEnd; 1704 } 1705 1706 /** 1707 * Initiates a search to find a Pattern within the given bounds. 1708 * The groups are filled with default values and the match of the root 1709 * of the state machine is called. The state machine will hold the state 1710 * of the match as it proceeds in this matcher. 1711 * 1712 * Matcher.from is not set here, because it is the "hard" boundary 1713 * of the start of the search which anchors will set to. The from param 1714 * is the "soft" boundary of the start of the search, meaning that the 1715 * regex tries to match at that index but ^ won't match there. Subsequent 1716 * calls to the search methods start at a new "soft" boundary which is 1717 * the end of the previous match. 1718 */ 1719 boolean search(int from) { 1720 this.hitEnd = false; 1721 this.requireEnd = false; 1722 from = from < 0 ? 0 : from; 1723 this.first = from; 1724 this.oldLast = oldLast < 0 ? from : oldLast; 1725 for (int i = 0; i < groups.length; i++) 1726 groups[i] = -1; 1727 for (int i = 0; i < localsPos.length; i++) { 1728 if (localsPos[i] != null) 1729 localsPos[i].clear(); 1730 } 1731 acceptMode = NOANCHOR; 1732 boolean result = parentPattern.root.match(this, from, text); 1733 if (!result) 1734 this.first = -1; 1735 this.oldLast = this.last; 1736 this.modCount++; 1737 return result; 1738 } 1739 1740 /** 1741 * Initiates a search for an anchored match to a Pattern within the given 1742 * bounds. The groups are filled with default values and the match of the 1743 * root of the state machine is called. The state machine will hold the 1744 * state of the match as it proceeds in this matcher. 1745 */ 1746 boolean match(int from, int anchor) { 1747 this.hitEnd = false; 1748 this.requireEnd = false; 1749 from = from < 0 ? 0 : from; 1750 this.first = from; 1751 this.oldLast = oldLast < 0 ? from : oldLast; 1752 for (int i = 0; i < groups.length; i++) 1753 groups[i] = -1; 1754 for (int i = 0; i < localsPos.length; i++) { 1755 if (localsPos[i] != null) 1756 localsPos[i].clear(); 1757 } 1758 acceptMode = anchor; 1759 boolean result = parentPattern.matchRoot.match(this, from, text); 1760 if (!result) 1761 this.first = -1; 1762 this.oldLast = this.last; 1763 this.modCount++; 1764 return result; 1765 } 1766 1767 /** 1768 * Returns the end index of the text. 1769 * 1770 * @return the index after the last character in the text 1771 */ 1772 int getTextLength() { 1773 return text.length(); 1774 } 1775 1776 /** 1777 * Generates a String from this matcher's input in the specified range. 1778 * 1779 * @param beginIndex the beginning index, inclusive 1780 * @param endIndex the ending index, exclusive 1781 * @return A String generated from this matcher's input 1782 */ 1783 CharSequence getSubSequence(int beginIndex, int endIndex) { 1784 return text.subSequence(beginIndex, endIndex); 1785 } 1786 1787 /** 1788 * Returns this matcher's input character at index i. 1789 * 1790 * @return A char from the specified index 1791 */ 1792 char charAt(int i) { 1793 return text.charAt(i); 1794 } 1795 1796 /** 1797 * Returns the group index of the matched capturing group. 1798 * 1799 * @return the index of the named-capturing group 1800 */ 1801 int getMatchedGroupIndex(String name) { 1802 Objects.requireNonNull(name, "Group name"); 1803 if (first < 0) 1804 throw new IllegalStateException("No match found"); 1805 if (!parentPattern.namedGroups().containsKey(name)) 1806 throw new IllegalArgumentException("No group with name <" + name + ">"); 1807 return parentPattern.namedGroups().get(name); 1808 } 1809 }