1 /* 2 * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.util.regex; 27 28 import java.util.ConcurrentModificationException; 29 import java.util.Iterator; 30 import java.util.NoSuchElementException; 31 import java.util.Objects; 32 import java.util.Spliterator; 33 import java.util.Spliterators; 34 import java.util.function.Consumer; 35 import java.util.function.Function; 36 import java.util.stream.Stream; 37 import java.util.stream.StreamSupport; 38 39 /** 40 * An engine that performs match operations on a {@linkplain java.lang.CharSequence 41 * character sequence} by interpreting a {@link Pattern}. 42 * 43 * <p> A matcher is created from a pattern by invoking the pattern's {@link 44 * Pattern#matcher matcher} method. Once created, a matcher can be used to 45 * perform three different kinds of match operations: 46 * 47 * <ul> 48 * 49 * <li><p> The {@link #matches matches} method attempts to match the entire 50 * input sequence against the pattern. </p></li> 51 * 52 * <li><p> The {@link #lookingAt lookingAt} method attempts to match the 53 * input sequence, starting at the beginning, against the pattern. </p></li> 54 * 55 * <li><p> The {@link #find find} method scans the input sequence looking for 56 * the next subsequence that matches the pattern. </p></li> 57 * 58 * </ul> 59 * 60 * <p> Each of these methods returns a boolean indicating success or failure. 61 * More information about a successful match can be obtained by querying the 62 * state of the matcher. 63 * 64 * <p> A matcher finds matches in a subset of its input called the 65 * <i>region</i>. By default, the region contains all of the matcher's input. 66 * The region can be modified via the {@link #region region} method and queried 67 * via the {@link #regionStart regionStart} and {@link #regionEnd regionEnd} 68 * methods. The way that the region boundaries interact with some pattern 69 * constructs can be changed. See {@link #useAnchoringBounds 70 * useAnchoringBounds} and {@link #useTransparentBounds useTransparentBounds} 71 * for more details. 72 * 73 * <p> This class also defines methods for replacing matched subsequences with 74 * new strings whose contents can, if desired, be computed from the match 75 * result. The {@link #appendReplacement appendReplacement} and {@link 76 * #appendTail appendTail} methods can be used in tandem in order to collect 77 * the result into an existing string buffer or string builder. Alternatively, 78 * the more convenient {@link #replaceAll replaceAll} method can be used to 79 * create a string in which every matching subsequence in the input sequence 80 * is replaced. 81 * 82 * <p> The explicit state of a matcher includes the start and end indices of 83 * the most recent successful match. It also includes the start and end 84 * indices of the input subsequence captured by each <a 85 * href="Pattern.html#cg">capturing group</a> in the pattern as well as a total 86 * count of such subsequences. As a convenience, methods are also provided for 87 * returning these captured subsequences in string form. 88 * 89 * <p> The explicit state of a matcher is initially undefined; attempting to 90 * query any part of it before a successful match will cause an {@link 91 * IllegalStateException} to be thrown. The explicit state of a matcher is 92 * recomputed by every match operation. 93 * 94 * <p> The implicit state of a matcher includes the input character sequence as 95 * well as the <i>append position</i>, which is initially zero and is updated 96 * by the {@link #appendReplacement appendReplacement} method. 97 * 98 * <p> A matcher may be reset explicitly by invoking its {@link #reset()} 99 * method or, if a new input sequence is desired, its {@link 100 * #reset(java.lang.CharSequence) reset(CharSequence)} method. Resetting a 101 * matcher discards its explicit state information and sets the append position 102 * to zero. 103 * 104 * <p> Instances of this class are not safe for use by multiple concurrent 105 * threads. </p> 106 * 107 * 108 * @author Mike McCloskey 109 * @author Mark Reinhold 110 * @author JSR-51 Expert Group 111 * @since 1.4 112 * @spec JSR-51 113 */ 114 115 public final class Matcher implements MatchResult { 116 117 /** 118 * The Pattern object that created this Matcher. 119 */ 120 Pattern parentPattern; 121 122 /** 123 * The storage used by groups. They may contain invalid values if 124 * a group was skipped during the matching. 125 */ 126 int[] groups; 127 128 /** 129 * The range within the sequence that is to be matched. Anchors 130 * will match at these "hard" boundaries. Changing the region 131 * changes these values. 132 */ 133 int from, to; 134 135 /** 136 * Lookbehind uses this value to ensure that the subexpression 137 * match ends at the point where the lookbehind was encountered. 138 */ 139 int lookbehindTo; 140 141 /** 142 * The original string being matched. 143 */ 144 CharSequence text; 145 146 /** 147 * Matcher state used by the last node. NOANCHOR is used when a 148 * match does not have to consume all of the input. ENDANCHOR is 149 * the mode used for matching all the input. 150 */ 151 static final int ENDANCHOR = 1; 152 static final int NOANCHOR = 0; 153 int acceptMode = NOANCHOR; 154 155 /** 156 * The range of string that last matched the pattern. If the last 157 * match failed then first is -1; last initially holds 0 then it 158 * holds the index of the end of the last match (which is where the 159 * next search starts). 160 */ 161 int first = -1, last = 0; 162 163 /** 164 * The end index of what matched in the last match operation. 165 */ 166 int oldLast = -1; 167 168 /** 169 * The index of the last position appended in a substitution. 170 */ 171 int lastAppendPosition = 0; 172 173 /** 174 * Storage used by nodes to tell what repetition they are on in 175 * a pattern, and where groups begin. The nodes themselves are stateless, 176 * so they rely on this field to hold state during a match. 177 */ 178 int[] locals; 179 180 /** 181 * Storage used by top greedy Loop node to store a specific hash set to 182 * keep the beginning index of the failed repetition match. The nodes 183 * themselves are stateless, so they rely on this field to hold state 184 * during a match. 185 */ 186 IntHashSet[] localsPos; 187 188 /** 189 * Boolean indicating whether or not more input could change 190 * the results of the last match. 191 * 192 * If hitEnd is true, and a match was found, then more input 193 * might cause a different match to be found. 194 * If hitEnd is true and a match was not found, then more 195 * input could cause a match to be found. 196 * If hitEnd is false and a match was found, then more input 197 * will not change the match. 198 * If hitEnd is false and a match was not found, then more 199 * input will not cause a match to be found. 200 */ 201 boolean hitEnd; 202 203 /** 204 * Boolean indicating whether or not more input could change 205 * a positive match into a negative one. 206 * 207 * If requireEnd is true, and a match was found, then more 208 * input could cause the match to be lost. 209 * If requireEnd is false and a match was found, then more 210 * input might change the match but the match won't be lost. 211 * If a match was not found, then requireEnd has no meaning. 212 */ 213 boolean requireEnd; 214 215 /** 216 * If transparentBounds is true then the boundaries of this 217 * matcher's region are transparent to lookahead, lookbehind, 218 * and boundary matching constructs that try to see beyond them. 219 */ 220 boolean transparentBounds = false; 221 222 /** 223 * If anchoringBounds is true then the boundaries of this 224 * matcher's region match anchors such as ^ and $. 225 */ 226 boolean anchoringBounds = true; 227 228 /** 229 * Number of times this matcher's state has been modified 230 */ 231 int modCount; 232 233 /** 234 * No default constructor. 235 */ 236 Matcher() { 237 } 238 239 /** 240 * All matchers have the state used by Pattern during a match. 241 */ 242 Matcher(Pattern parent, CharSequence text) { 243 this.parentPattern = parent; 244 this.text = text; 245 246 // Allocate state storage 247 int parentGroupCount = Math.max(parent.capturingGroupCount, 10); 248 groups = new int[parentGroupCount * 2]; 249 locals = new int[parent.localCount]; 250 localsPos = new IntHashSet[parent.localTCNCount]; 251 252 // Put fields into initial states 253 reset(); 254 } 255 256 /** 257 * Returns the pattern that is interpreted by this matcher. 258 * 259 * @return The pattern for which this matcher was created 260 */ 261 public Pattern pattern() { 262 return parentPattern; 263 } 264 265 /** 266 * Returns the match state of this matcher as a {@link MatchResult}. 267 * The result is unaffected by subsequent operations performed upon this 268 * matcher. 269 * 270 * @return a {@code MatchResult} with the state of this matcher 271 * @since 1.5 272 */ 273 public MatchResult toMatchResult() { 274 return toMatchResult(text.toString()); 275 } 276 277 private MatchResult toMatchResult(String text) { 278 return new ImmutableMatchResult(this.first, 279 this.last, 280 groupCount(), 281 this.groups.clone(), 282 text); 283 } 284 285 private static class ImmutableMatchResult implements MatchResult { 286 private final int first; 287 private final int last; 288 private final int[] groups; 289 private final int groupCount; 290 private final String text; 291 292 ImmutableMatchResult(int first, int last, int groupCount, 293 int groups[], String text) 294 { 295 this.first = first; 296 this.last = last; 297 this.groupCount = groupCount; 298 this.groups = groups; 299 this.text = text; 300 } 301 302 @Override 303 public int start() { 304 checkMatch(); 305 return first; 306 } 307 308 @Override 309 public int start(int group) { 310 checkMatch(); 311 if (group < 0 || group > groupCount) 312 throw new IndexOutOfBoundsException("No group " + group); 313 return groups[group * 2]; 314 } 315 316 @Override 317 public int end() { 318 checkMatch(); 319 return last; 320 } 321 322 @Override 323 public int end(int group) { 324 checkMatch(); 325 if (group < 0 || group > groupCount) 326 throw new IndexOutOfBoundsException("No group " + group); 327 return groups[group * 2 + 1]; 328 } 329 330 @Override 331 public int groupCount() { 332 return groupCount; 333 } 334 335 @Override 336 public String group() { 337 checkMatch(); 338 return group(0); 339 } 340 341 @Override 342 public String group(int group) { 343 checkMatch(); 344 if (group < 0 || group > groupCount) 345 throw new IndexOutOfBoundsException("No group " + group); 346 if ((groups[group*2] == -1) || (groups[group*2+1] == -1)) 347 return null; 348 return text.subSequence(groups[group * 2], groups[group * 2 + 1]).toString(); 349 } 350 351 private void checkMatch() { 352 if (first < 0) 353 throw new IllegalStateException("No match found"); 354 355 } 356 } 357 358 /** 359 * Changes the {@code Pattern} that this {@code Matcher} uses to 360 * find matches with. 361 * 362 * <p> This method causes this matcher to lose information 363 * about the groups of the last match that occurred. The 364 * matcher's position in the input is maintained and its 365 * last append position is unaffected.</p> 366 * 367 * @param newPattern 368 * The new pattern used by this matcher 369 * @return This matcher 370 * @throws IllegalArgumentException 371 * If newPattern is {@code null} 372 * @since 1.5 373 */ 374 public Matcher usePattern(Pattern newPattern) { 375 if (newPattern == null) 376 throw new IllegalArgumentException("Pattern cannot be null"); 377 parentPattern = newPattern; 378 379 // Reallocate state storage 380 int parentGroupCount = Math.max(newPattern.capturingGroupCount, 10); 381 groups = new int[parentGroupCount * 2]; 382 locals = new int[newPattern.localCount]; 383 for (int i = 0; i < groups.length; i++) 384 groups[i] = -1; 385 for (int i = 0; i < locals.length; i++) 386 locals[i] = -1; 387 localsPos = new IntHashSet[parentPattern.localTCNCount]; 388 modCount++; 389 return this; 390 } 391 392 /** 393 * Resets this matcher. 394 * 395 * <p> Resetting a matcher discards all of its explicit state information 396 * and sets its append position to zero. The matcher's region is set to the 397 * default region, which is its entire character sequence. The anchoring 398 * and transparency of this matcher's region boundaries are unaffected. 399 * 400 * @return This matcher 401 */ 402 public Matcher reset() { 403 first = -1; 404 last = 0; 405 oldLast = -1; 406 for(int i=0; i<groups.length; i++) 407 groups[i] = -1; 408 for(int i=0; i<locals.length; i++) 409 locals[i] = -1; 410 for (int i = 0; i < localsPos.length; i++) { 411 if (localsPos[i] != null) 412 localsPos[i].clear(); 413 } 414 lastAppendPosition = 0; 415 from = 0; 416 to = getTextLength(); 417 modCount++; 418 return this; 419 } 420 421 /** 422 * Resets this matcher with a new input sequence. 423 * 424 * <p> Resetting a matcher discards all of its explicit state information 425 * and sets its append position to zero. The matcher's region is set to 426 * the default region, which is its entire character sequence. The 427 * anchoring and transparency of this matcher's region boundaries are 428 * unaffected. 429 * 430 * @param input 431 * The new input character sequence 432 * 433 * @return This matcher 434 */ 435 public Matcher reset(CharSequence input) { 436 text = input; 437 return reset(); 438 } 439 440 /** 441 * Returns the start index of the previous match. 442 * 443 * @return The index of the first character matched 444 * 445 * @throws IllegalStateException 446 * If no match has yet been attempted, 447 * or if the previous match operation failed 448 */ 449 public int start() { 450 if (first < 0) 451 throw new IllegalStateException("No match available"); 452 return first; 453 } 454 455 /** 456 * Returns the start index of the subsequence captured by the given group 457 * during the previous match operation. 458 * 459 * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left 460 * to right, starting at one. Group zero denotes the entire pattern, so 461 * the expression <i>m.</i>{@code start(0)} is equivalent to 462 * <i>m.</i>{@code start()}. </p> 463 * 464 * @param group 465 * The index of a capturing group in this matcher's pattern 466 * 467 * @return The index of the first character captured by the group, 468 * or {@code -1} if the match was successful but the group 469 * itself did not match anything 470 * 471 * @throws IllegalStateException 472 * If no match has yet been attempted, 473 * or if the previous match operation failed 474 * 475 * @throws IndexOutOfBoundsException 476 * If there is no capturing group in the pattern 477 * with the given index 478 */ 479 public int start(int group) { 480 if (first < 0) 481 throw new IllegalStateException("No match available"); 482 if (group < 0 || group > groupCount()) 483 throw new IndexOutOfBoundsException("No group " + group); 484 return groups[group * 2]; 485 } 486 487 /** 488 * Returns the start index of the subsequence captured by the given 489 * <a href="Pattern.html#groupname">named-capturing group</a> during the 490 * previous match operation. 491 * 492 * @param name 493 * The name of a named-capturing group in this matcher's pattern 494 * 495 * @return The index of the first character captured by the group, 496 * or {@code -1} if the match was successful but the group 497 * itself did not match anything 498 * 499 * @throws IllegalStateException 500 * If no match has yet been attempted, 501 * or if the previous match operation failed 502 * 503 * @throws IllegalArgumentException 504 * If there is no capturing group in the pattern 505 * with the given name 506 * @since 1.8 507 */ 508 public int start(String name) { 509 return groups[getMatchedGroupIndex(name) * 2]; 510 } 511 512 /** 513 * Returns the offset after the last character matched. 514 * 515 * @return The offset after the last character matched 516 * 517 * @throws IllegalStateException 518 * If no match has yet been attempted, 519 * or if the previous match operation failed 520 */ 521 public int end() { 522 if (first < 0) 523 throw new IllegalStateException("No match available"); 524 return last; 525 } 526 527 /** 528 * Returns the offset after the last character of the subsequence 529 * captured by the given group during the previous match operation. 530 * 531 * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left 532 * to right, starting at one. Group zero denotes the entire pattern, so 533 * the expression <i>m.</i>{@code end(0)} is equivalent to 534 * <i>m.</i>{@code end()}. </p> 535 * 536 * @param group 537 * The index of a capturing group in this matcher's pattern 538 * 539 * @return The offset after the last character captured by the group, 540 * or {@code -1} if the match was successful 541 * but the group itself did not match anything 542 * 543 * @throws IllegalStateException 544 * If no match has yet been attempted, 545 * or if the previous match operation failed 546 * 547 * @throws IndexOutOfBoundsException 548 * If there is no capturing group in the pattern 549 * with the given index 550 */ 551 public int end(int group) { 552 if (first < 0) 553 throw new IllegalStateException("No match available"); 554 if (group < 0 || group > groupCount()) 555 throw new IndexOutOfBoundsException("No group " + group); 556 return groups[group * 2 + 1]; 557 } 558 559 /** 560 * Returns the offset after the last character of the subsequence 561 * captured by the given <a href="Pattern.html#groupname">named-capturing 562 * group</a> during the previous match operation. 563 * 564 * @param name 565 * The name of a named-capturing group in this matcher's pattern 566 * 567 * @return The offset after the last character captured by the group, 568 * or {@code -1} if the match was successful 569 * but the group itself did not match anything 570 * 571 * @throws IllegalStateException 572 * If no match has yet been attempted, 573 * or if the previous match operation failed 574 * 575 * @throws IllegalArgumentException 576 * If there is no capturing group in the pattern 577 * with the given name 578 * @since 1.8 579 */ 580 public int end(String name) { 581 return groups[getMatchedGroupIndex(name) * 2 + 1]; 582 } 583 584 /** 585 * Returns the input subsequence matched by the previous match. 586 * 587 * <p> For a matcher <i>m</i> with input sequence <i>s</i>, 588 * the expressions <i>m.</i>{@code group()} and 589 * <i>s.</i>{@code substring(}<i>m.</i>{@code start(),} <i>m.</i>{@code end())} 590 * are equivalent. </p> 591 * 592 * <p> Note that some patterns, for example {@code a*}, match the empty 593 * string. This method will return the empty string when the pattern 594 * successfully matches the empty string in the input. </p> 595 * 596 * @return The (possibly empty) subsequence matched by the previous match, 597 * in string form 598 * 599 * @throws IllegalStateException 600 * If no match has yet been attempted, 601 * or if the previous match operation failed 602 */ 603 public String group() { 604 return group(0); 605 } 606 607 /** 608 * Returns the input subsequence captured by the given group during the 609 * previous match operation. 610 * 611 * <p> For a matcher <i>m</i>, input sequence <i>s</i>, and group index 612 * <i>g</i>, the expressions <i>m.</i>{@code group(}<i>g</i>{@code )} and 613 * <i>s.</i>{@code substring(}<i>m.</i>{@code start(}<i>g</i>{@code 614 * ),} <i>m.</i>{@code end(}<i>g</i>{@code ))} 615 * are equivalent. </p> 616 * 617 * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left 618 * to right, starting at one. Group zero denotes the entire pattern, so 619 * the expression {@code m.group(0)} is equivalent to {@code m.group()}. 620 * </p> 621 * 622 * <p> If the match was successful but the group specified failed to match 623 * any part of the input sequence, then {@code null} is returned. Note 624 * that some groups, for example {@code (a*)}, match the empty string. 625 * This method will return the empty string when such a group successfully 626 * matches the empty string in the input. </p> 627 * 628 * @param group 629 * The index of a capturing group in this matcher's pattern 630 * 631 * @return The (possibly empty) subsequence captured by the group 632 * during the previous match, or {@code null} if the group 633 * failed to match part of the input 634 * 635 * @throws IllegalStateException 636 * If no match has yet been attempted, 637 * or if the previous match operation failed 638 * 639 * @throws IndexOutOfBoundsException 640 * If there is no capturing group in the pattern 641 * with the given index 642 */ 643 public String group(int group) { 644 if (first < 0) 645 throw new IllegalStateException("No match found"); 646 if (group < 0 || group > groupCount()) 647 throw new IndexOutOfBoundsException("No group " + group); 648 if ((groups[group*2] == -1) || (groups[group*2+1] == -1)) 649 return null; 650 return getSubSequence(groups[group * 2], groups[group * 2 + 1]).toString(); 651 } 652 653 /** 654 * Returns the input subsequence captured by the given 655 * <a href="Pattern.html#groupname">named-capturing group</a> during the previous 656 * match operation. 657 * 658 * <p> If the match was successful but the group specified failed to match 659 * any part of the input sequence, then {@code null} is returned. Note 660 * that some groups, for example {@code (a*)}, match the empty string. 661 * This method will return the empty string when such a group successfully 662 * matches the empty string in the input. </p> 663 * 664 * @param name 665 * The name of a named-capturing group in this matcher's pattern 666 * 667 * @return The (possibly empty) subsequence captured by the named group 668 * during the previous match, or {@code null} if the group 669 * failed to match part of the input 670 * 671 * @throws IllegalStateException 672 * If no match has yet been attempted, 673 * or if the previous match operation failed 674 * 675 * @throws IllegalArgumentException 676 * If there is no capturing group in the pattern 677 * with the given name 678 * @since 1.7 679 */ 680 public String group(String name) { 681 int group = getMatchedGroupIndex(name); 682 if ((groups[group*2] == -1) || (groups[group*2+1] == -1)) 683 return null; 684 return getSubSequence(groups[group * 2], groups[group * 2 + 1]).toString(); 685 } 686 687 /** 688 * Returns the number of capturing groups in this matcher's pattern. 689 * 690 * <p> Group zero denotes the entire pattern by convention. It is not 691 * included in this count. 692 * 693 * <p> Any non-negative integer smaller than or equal to the value 694 * returned by this method is guaranteed to be a valid group index for 695 * this matcher. </p> 696 * 697 * @return The number of capturing groups in this matcher's pattern 698 */ 699 public int groupCount() { 700 return parentPattern.capturingGroupCount - 1; 701 } 702 703 /** 704 * Attempts to match the entire region against the pattern. 705 * 706 * <p> If the match succeeds then more information can be obtained via the 707 * {@code start}, {@code end}, and {@code group} methods. </p> 708 * 709 * @return {@code true} if, and only if, the entire region sequence 710 * matches this matcher's pattern 711 */ 712 public boolean matches() { 713 return match(from, ENDANCHOR); 714 } 715 716 /** 717 * Attempts to find the next subsequence of the input sequence that matches 718 * the pattern. 719 * 720 * <p> This method starts at the beginning of this matcher's region, or, if 721 * a previous invocation of the method was successful and the matcher has 722 * not since been reset, at the first character not matched by the previous 723 * match. 724 * 725 * <p> If the match succeeds then more information can be obtained via the 726 * {@code start}, {@code end}, and {@code group} methods. </p> 727 * 728 * @return {@code true} if, and only if, a subsequence of the input 729 * sequence matches this matcher's pattern 730 */ 731 public boolean find() { 732 int nextSearchIndex = last; 733 if (nextSearchIndex == first) 734 nextSearchIndex++; 735 736 // If next search starts before region, start it at region 737 if (nextSearchIndex < from) 738 nextSearchIndex = from; 739 740 // If next search starts beyond region then it fails 741 if (nextSearchIndex > to) { 742 for (int i = 0; i < groups.length; i++) 743 groups[i] = -1; 744 return false; 745 } 746 return search(nextSearchIndex); 747 } 748 749 /** 750 * Resets this matcher and then attempts to find the next subsequence of 751 * the input sequence that matches the pattern, starting at the specified 752 * index. 753 * 754 * <p> If the match succeeds then more information can be obtained via the 755 * {@code start}, {@code end}, and {@code group} methods, and subsequent 756 * invocations of the {@link #find()} method will start at the first 757 * character not matched by this match. </p> 758 * 759 * @param start the index to start searching for a match 760 * @throws IndexOutOfBoundsException 761 * If start is less than zero or if start is greater than the 762 * length of the input sequence. 763 * 764 * @return {@code true} if, and only if, a subsequence of the input 765 * sequence starting at the given index matches this matcher's 766 * pattern 767 */ 768 public boolean find(int start) { 769 int limit = getTextLength(); 770 if ((start < 0) || (start > limit)) 771 throw new IndexOutOfBoundsException("Illegal start index"); 772 reset(); 773 return search(start); 774 } 775 776 /** 777 * Attempts to match the input sequence, starting at the beginning of the 778 * region, against the pattern. 779 * 780 * <p> Like the {@link #matches matches} method, this method always starts 781 * at the beginning of the region; unlike that method, it does not 782 * require that the entire region be matched. 783 * 784 * <p> If the match succeeds then more information can be obtained via the 785 * {@code start}, {@code end}, and {@code group} methods. </p> 786 * 787 * @return {@code true} if, and only if, a prefix of the input 788 * sequence matches this matcher's pattern 789 */ 790 public boolean lookingAt() { 791 return match(from, NOANCHOR); 792 } 793 794 /** 795 * Returns a literal replacement {@code String} for the specified 796 * {@code String}. 797 * 798 * This method produces a {@code String} that will work 799 * as a literal replacement {@code s} in the 800 * {@code appendReplacement} method of the {@link Matcher} class. 801 * The {@code String} produced will match the sequence of characters 802 * in {@code s} treated as a literal sequence. Slashes ('\') and 803 * dollar signs ('$') will be given no special meaning. 804 * 805 * @param s The string to be literalized 806 * @return A literal string replacement 807 * @since 1.5 808 */ 809 public static String quoteReplacement(String s) { 810 if ((s.indexOf('\\') == -1) && (s.indexOf('$') == -1)) 811 return s; 812 StringBuilder sb = new StringBuilder(); 813 for (int i=0; i<s.length(); i++) { 814 char c = s.charAt(i); 815 if (c == '\\' || c == '$') { 816 sb.append('\\'); 817 } 818 sb.append(c); 819 } 820 return sb.toString(); 821 } 822 823 /** 824 * Implements a non-terminal append-and-replace step. 825 * 826 * <p> This method performs the following actions: </p> 827 * 828 * <ol> 829 * 830 * <li><p> It reads characters from the input sequence, starting at the 831 * append position, and appends them to the given string buffer. It 832 * stops after reading the last character preceding the previous match, 833 * that is, the character at index {@link 834 * #start()} {@code -} {@code 1}. </p></li> 835 * 836 * <li><p> It appends the given replacement string to the string buffer. 837 * </p></li> 838 * 839 * <li><p> It sets the append position of this matcher to the index of 840 * the last character matched, plus one, that is, to {@link #end()}. 841 * </p></li> 842 * 843 * </ol> 844 * 845 * <p> The replacement string may contain references to subsequences 846 * captured during the previous match: Each occurrence of 847 * <code>${</code><i>name</i><code>}</code> or {@code $}<i>g</i> 848 * will be replaced by the result of evaluating the corresponding 849 * {@link #group(String) group(name)} or {@link #group(int) group(g)} 850 * respectively. For {@code $}<i>g</i>, 851 * the first number after the {@code $} is always treated as part of 852 * the group reference. Subsequent numbers are incorporated into g if 853 * they would form a legal group reference. Only the numerals '0' 854 * through '9' are considered as potential components of the group 855 * reference. If the second group matched the string {@code "foo"}, for 856 * example, then passing the replacement string {@code "$2bar"} would 857 * cause {@code "foobar"} to be appended to the string buffer. A dollar 858 * sign ({@code $}) may be included as a literal in the replacement 859 * string by preceding it with a backslash ({@code \$}). 860 * 861 * <p> Note that backslashes ({@code \}) and dollar signs ({@code $}) in 862 * the replacement string may cause the results to be different than if it 863 * were being treated as a literal replacement string. Dollar signs may be 864 * treated as references to captured subsequences as described above, and 865 * backslashes are used to escape literal characters in the replacement 866 * string. 867 * 868 * <p> This method is intended to be used in a loop together with the 869 * {@link #appendTail appendTail} and {@link #find find} methods. The 870 * following code, for example, writes {@code one dog two dogs in the 871 * yard} to the standard-output stream: </p> 872 * 873 * <blockquote><pre> 874 * Pattern p = Pattern.compile("cat"); 875 * Matcher m = p.matcher("one cat two cats in the yard"); 876 * StringBuffer sb = new StringBuffer(); 877 * while (m.find()) { 878 * m.appendReplacement(sb, "dog"); 879 * } 880 * m.appendTail(sb); 881 * System.out.println(sb.toString());</pre></blockquote> 882 * 883 * @param sb 884 * The target string buffer 885 * 886 * @param replacement 887 * The replacement string 888 * 889 * @return This matcher 890 * 891 * @throws IllegalStateException 892 * If no match has yet been attempted, 893 * or if the previous match operation failed 894 * 895 * @throws IllegalArgumentException 896 * If the replacement string refers to a named-capturing 897 * group that does not exist in the pattern 898 * 899 * @throws IndexOutOfBoundsException 900 * If the replacement string refers to a capturing group 901 * that does not exist in the pattern 902 */ 903 public Matcher appendReplacement(StringBuffer sb, String replacement) { 904 // If no match, return error 905 if (first < 0) 906 throw new IllegalStateException("No match available"); 907 StringBuilder result = new StringBuilder(); 908 appendExpandedReplacement(replacement, result); 909 // Append the intervening text 910 sb.append(text, lastAppendPosition, first); 911 // Append the match substitution 912 sb.append(result); 913 lastAppendPosition = last; 914 modCount++; 915 return this; 916 } 917 918 /** 919 * Implements a non-terminal append-and-replace step. 920 * 921 * <p> This method performs the following actions: </p> 922 * 923 * <ol> 924 * 925 * <li><p> It reads characters from the input sequence, starting at the 926 * append position, and appends them to the given string builder. It 927 * stops after reading the last character preceding the previous match, 928 * that is, the character at index {@link 929 * #start()} {@code -} {@code 1}. </p></li> 930 * 931 * <li><p> It appends the given replacement string to the string builder. 932 * </p></li> 933 * 934 * <li><p> It sets the append position of this matcher to the index of 935 * the last character matched, plus one, that is, to {@link #end()}. 936 * </p></li> 937 * 938 * </ol> 939 * 940 * <p> The replacement string may contain references to subsequences 941 * captured during the previous match: Each occurrence of 942 * {@code $}<i>g</i> will be replaced by the result of 943 * evaluating {@link #group(int) group}{@code (}<i>g</i>{@code )}. 944 * The first number after the {@code $} is always treated as part of 945 * the group reference. Subsequent numbers are incorporated into g if 946 * they would form a legal group reference. Only the numerals '0' 947 * through '9' are considered as potential components of the group 948 * reference. If the second group matched the string {@code "foo"}, for 949 * example, then passing the replacement string {@code "$2bar"} would 950 * cause {@code "foobar"} to be appended to the string builder. A dollar 951 * sign ({@code $}) may be included as a literal in the replacement 952 * string by preceding it with a backslash ({@code \$}). 953 * 954 * <p> Note that backslashes ({@code \}) and dollar signs ({@code $}) in 955 * the replacement string may cause the results to be different than if it 956 * were being treated as a literal replacement string. Dollar signs may be 957 * treated as references to captured subsequences as described above, and 958 * backslashes are used to escape literal characters in the replacement 959 * string. 960 * 961 * <p> This method is intended to be used in a loop together with the 962 * {@link #appendTail appendTail} and {@link #find find} methods. The 963 * following code, for example, writes {@code one dog two dogs in the 964 * yard} to the standard-output stream: </p> 965 * 966 * <blockquote><pre> 967 * Pattern p = Pattern.compile("cat"); 968 * Matcher m = p.matcher("one cat two cats in the yard"); 969 * StringBuilder sb = new StringBuilder(); 970 * while (m.find()) { 971 * m.appendReplacement(sb, "dog"); 972 * } 973 * m.appendTail(sb); 974 * System.out.println(sb.toString());</pre></blockquote> 975 * 976 * @param sb 977 * The target string builder 978 * @param replacement 979 * The replacement string 980 * @return This matcher 981 * 982 * @throws IllegalStateException 983 * If no match has yet been attempted, 984 * or if the previous match operation failed 985 * @throws IllegalArgumentException 986 * If the replacement string refers to a named-capturing 987 * group that does not exist in the pattern 988 * @throws IndexOutOfBoundsException 989 * If the replacement string refers to a capturing group 990 * that does not exist in the pattern 991 * @since 9 992 */ 993 public Matcher appendReplacement(StringBuilder sb, String replacement) { 994 // If no match, return error 995 if (first < 0) 996 997 throw new IllegalStateException("No match available"); 998 StringBuilder result = new StringBuilder(); 999 appendExpandedReplacement(replacement, result); 1000 // Append the intervening text 1001 sb.append(text, lastAppendPosition, first); 1002 // Append the match substitution 1003 sb.append(result); 1004 lastAppendPosition = last; 1005 modCount++; 1006 return this; 1007 } 1008 1009 /** 1010 * Processes replacement string to replace group references with 1011 * groups. 1012 */ 1013 private StringBuilder appendExpandedReplacement( 1014 String replacement, StringBuilder result) { 1015 int cursor = 0; 1016 while (cursor < replacement.length()) { 1017 char nextChar = replacement.charAt(cursor); 1018 if (nextChar == '\\') { 1019 cursor++; 1020 if (cursor == replacement.length()) 1021 throw new IllegalArgumentException( 1022 "character to be escaped is missing"); 1023 nextChar = replacement.charAt(cursor); 1024 result.append(nextChar); 1025 cursor++; 1026 } else if (nextChar == '$') { 1027 // Skip past $ 1028 cursor++; 1029 // Throw IAE if this "$" is the last character in replacement 1030 if (cursor == replacement.length()) 1031 throw new IllegalArgumentException( 1032 "Illegal group reference: group index is missing"); 1033 nextChar = replacement.charAt(cursor); 1034 int refNum = -1; 1035 if (nextChar == '{') { 1036 cursor++; 1037 StringBuilder gsb = new StringBuilder(); 1038 while (cursor < replacement.length()) { 1039 nextChar = replacement.charAt(cursor); 1040 if (ASCII.isLower(nextChar) || 1041 ASCII.isUpper(nextChar) || 1042 ASCII.isDigit(nextChar)) { 1043 gsb.append(nextChar); 1044 cursor++; 1045 } else { 1046 break; 1047 } 1048 } 1049 if (gsb.length() == 0) 1050 throw new IllegalArgumentException( 1051 "named capturing group has 0 length name"); 1052 if (nextChar != '}') 1053 throw new IllegalArgumentException( 1054 "named capturing group is missing trailing '}'"); 1055 String gname = gsb.toString(); 1056 if (ASCII.isDigit(gname.charAt(0))) 1057 throw new IllegalArgumentException( 1058 "capturing group name {" + gname + 1059 "} starts with digit character"); 1060 if (!parentPattern.namedGroups().containsKey(gname)) 1061 throw new IllegalArgumentException( 1062 "No group with name {" + gname + "}"); 1063 refNum = parentPattern.namedGroups().get(gname); 1064 cursor++; 1065 } else { 1066 // The first number is always a group 1067 refNum = nextChar - '0'; 1068 if ((refNum < 0) || (refNum > 9)) 1069 throw new IllegalArgumentException( 1070 "Illegal group reference"); 1071 cursor++; 1072 // Capture the largest legal group string 1073 boolean done = false; 1074 while (!done) { 1075 if (cursor >= replacement.length()) { 1076 break; 1077 } 1078 int nextDigit = replacement.charAt(cursor) - '0'; 1079 if ((nextDigit < 0) || (nextDigit > 9)) { // not a number 1080 break; 1081 } 1082 int newRefNum = (refNum * 10) + nextDigit; 1083 if (groupCount() < newRefNum) { 1084 done = true; 1085 } else { 1086 refNum = newRefNum; 1087 cursor++; 1088 } 1089 } 1090 } 1091 // Append group 1092 if (start(refNum) != -1 && end(refNum) != -1) 1093 result.append(text, start(refNum), end(refNum)); 1094 } else { 1095 result.append(nextChar); 1096 cursor++; 1097 } 1098 } 1099 return result; 1100 } 1101 1102 /** 1103 * Implements a terminal append-and-replace step. 1104 * 1105 * <p> This method reads characters from the input sequence, starting at 1106 * the append position, and appends them to the given string buffer. It is 1107 * intended to be invoked after one or more invocations of the {@link 1108 * #appendReplacement appendReplacement} method in order to copy the 1109 * remainder of the input sequence. </p> 1110 * 1111 * @param sb 1112 * The target string buffer 1113 * 1114 * @return The target string buffer 1115 */ 1116 public StringBuffer appendTail(StringBuffer sb) { 1117 sb.append(text, lastAppendPosition, getTextLength()); 1118 return sb; 1119 } 1120 1121 /** 1122 * Implements a terminal append-and-replace step. 1123 * 1124 * <p> This method reads characters from the input sequence, starting at 1125 * the append position, and appends them to the given string builder. It is 1126 * intended to be invoked after one or more invocations of the {@link 1127 * #appendReplacement appendReplacement} method in order to copy the 1128 * remainder of the input sequence. </p> 1129 * 1130 * @param sb 1131 * The target string builder 1132 * 1133 * @return The target string builder 1134 * 1135 * @since 9 1136 */ 1137 public StringBuilder appendTail(StringBuilder sb) { 1138 sb.append(text, lastAppendPosition, getTextLength()); 1139 return sb; 1140 } 1141 1142 /** 1143 * Replaces every subsequence of the input sequence that matches the 1144 * pattern with the given replacement string. 1145 * 1146 * <p> This method first resets this matcher. It then scans the input 1147 * sequence looking for matches of the pattern. Characters that are not 1148 * part of any match are appended directly to the result string; each match 1149 * is replaced in the result by the replacement string. The replacement 1150 * string may contain references to captured subsequences as in the {@link 1151 * #appendReplacement appendReplacement} method. 1152 * 1153 * <p> Note that backslashes ({@code \}) and dollar signs ({@code $}) in 1154 * the replacement string may cause the results to be different than if it 1155 * were being treated as a literal replacement string. Dollar signs may be 1156 * treated as references to captured subsequences as described above, and 1157 * backslashes are used to escape literal characters in the replacement 1158 * string. 1159 * 1160 * <p> Given the regular expression {@code a*b}, the input 1161 * {@code "aabfooaabfooabfoob"}, and the replacement string 1162 * {@code "-"}, an invocation of this method on a matcher for that 1163 * expression would yield the string {@code "-foo-foo-foo-"}. 1164 * 1165 * <p> Invoking this method changes this matcher's state. If the matcher 1166 * is to be used in further matching operations then it should first be 1167 * reset. </p> 1168 * 1169 * @param replacement 1170 * The replacement string 1171 * 1172 * @return The string constructed by replacing each matching subsequence 1173 * by the replacement string, substituting captured subsequences 1174 * as needed 1175 */ 1176 public String replaceAll(String replacement) { 1177 reset(); 1178 boolean result = find(); 1179 if (result) { 1180 StringBuilder sb = new StringBuilder(); 1181 do { 1182 sb.append(text, lastAppendPosition, first); 1183 appendExpandedReplacement(replacement, sb); 1184 lastAppendPosition = last; 1185 result = find(); 1186 } while (result); 1187 sb.append(text, lastAppendPosition, getTextLength()); 1188 return sb.toString(); 1189 } 1190 return text.toString(); 1191 } 1192 1193 /** 1194 * Replaces every subsequence of the input sequence that matches the 1195 * pattern with the result of applying the given replacer function to the 1196 * match result of this matcher corresponding to that subsequence. 1197 * Exceptions thrown by the function are relayed to the caller. 1198 * 1199 * <p> This method first resets this matcher. It then scans the input 1200 * sequence looking for matches of the pattern. Characters that are not 1201 * part of any match are appended directly to the result string; each match 1202 * is replaced in the result by the applying the replacer function that 1203 * returns a replacement string. Each replacement string may contain 1204 * references to captured subsequences as in the {@link #appendReplacement 1205 * appendReplacement} method. 1206 * 1207 * <p> Note that backslashes ({@code \}) and dollar signs ({@code $}) in 1208 * a replacement string may cause the results to be different than if it 1209 * were being treated as a literal replacement string. Dollar signs may be 1210 * treated as references to captured subsequences as described above, and 1211 * backslashes are used to escape literal characters in the replacement 1212 * string. 1213 * 1214 * <p> Given the regular expression {@code dog}, the input 1215 * {@code "zzzdogzzzdogzzz"}, and the function 1216 * {@code mr -> mr.group().toUpperCase()}, an invocation of this method on 1217 * a matcher for that expression would yield the string 1218 * {@code "zzzDOGzzzDOGzzz"}. 1219 * 1220 * <p> Invoking this method changes this matcher's state. If the matcher 1221 * is to be used in further matching operations then it should first be 1222 * reset. </p> 1223 * 1224 * <p> The replacer function should not modify this matcher's state during 1225 * replacement. This method will, on a best-effort basis, throw a 1226 * {@link java.util.ConcurrentModificationException} if such modification is 1227 * detected. 1228 * 1229 * <p> The state of each match result passed to the replacer function is 1230 * guaranteed to be constant only for the duration of the replacer function 1231 * call and only if the replacer function does not modify this matcher's 1232 * state. 1233 * 1234 * @implNote 1235 * This implementation applies the replacer function to this matcher, which 1236 * is an instance of {@code MatchResult}. 1237 * 1238 * @param replacer 1239 * The function to be applied to the match result of this matcher 1240 * that returns a replacement string. 1241 * @return The string constructed by replacing each matching subsequence 1242 * with the result of applying the replacer function to that 1243 * matched subsequence, substituting captured subsequences as 1244 * needed. 1245 * @throws NullPointerException if the replacer function is null 1246 * @throws ConcurrentModificationException if it is detected, on a 1247 * best-effort basis, that the replacer function modified this 1248 * matcher's state 1249 * @since 9 1250 */ 1251 public String replaceAll(Function<MatchResult, String> replacer) { 1252 Objects.requireNonNull(replacer); 1253 reset(); 1254 boolean result = find(); 1255 if (result) { 1256 StringBuilder sb = new StringBuilder(); 1257 do { 1258 int ec = modCount; 1259 String replacement = replacer.apply(this); 1260 if (ec != modCount) 1261 throw new ConcurrentModificationException(); 1262 appendReplacement(sb, replacement); 1263 result = find(); 1264 } while (result); 1265 appendTail(sb); 1266 return sb.toString(); 1267 } 1268 return text.toString(); 1269 } 1270 1271 /** 1272 * Returns a stream of match results for each subsequence of the input 1273 * sequence that matches the pattern. The match results occur in the 1274 * same order as the matching subsequences in the input sequence. 1275 * 1276 * <p> Each match result is produced as if by {@link #toMatchResult()}. 1277 * 1278 * <p> This method does not reset this matcher. Matching starts on 1279 * initiation of the terminal stream operation either at the beginning of 1280 * this matcher's region, or, if the matcher has not since been reset, at 1281 * the first character not matched by a previous match. 1282 * 1283 * <p> If the matcher is to be used for further matching operations after 1284 * the terminal stream operation completes then it should be first reset. 1285 * 1286 * <p> This matcher's state should not be modified during execution of the 1287 * returned stream's pipeline. The returned stream's source 1288 * {@code Spliterator} is <em>fail-fast</em> and will, on a best-effort 1289 * basis, throw a {@link java.util.ConcurrentModificationException} if such 1290 * modification is detected. 1291 * 1292 * @return a sequential stream of match results. 1293 * @since 9 1294 */ 1295 public Stream<MatchResult> results() { 1296 class MatchResultIterator implements Iterator<MatchResult> { 1297 // -ve for call to find, 0 for not found, 1 for found 1298 int state = -1; 1299 // State for concurrent modification checking 1300 // -1 for uninitialized 1301 int expectedCount = -1; 1302 // The input sequence as a string, set once only after first find 1303 // Avoids repeated conversion from CharSequence for each match 1304 String textAsString; 1305 1306 @Override 1307 public MatchResult next() { 1308 if (expectedCount >= 0 && expectedCount != modCount) 1309 throw new ConcurrentModificationException(); 1310 1311 if (!hasNext()) 1312 throw new NoSuchElementException(); 1313 1314 state = -1; 1315 return toMatchResult(textAsString); 1316 } 1317 1318 @Override 1319 public boolean hasNext() { 1320 if (state >= 0) 1321 return state == 1; 1322 1323 // Defer throwing ConcurrentModificationException to when next 1324 // or forEachRemaining is called. The is consistent with other 1325 // fail-fast implementations. 1326 if (expectedCount >= 0 && expectedCount != modCount) 1327 return true; 1328 1329 boolean found = find(); 1330 // Capture the input sequence as a string on first find 1331 if (found && state < 0) 1332 textAsString = text.toString(); 1333 state = found ? 1 : 0; 1334 expectedCount = modCount; 1335 return found; 1336 } 1337 1338 @Override 1339 public void forEachRemaining(Consumer<? super MatchResult> action) { 1340 if (expectedCount >= 0 && expectedCount != modCount) 1341 throw new ConcurrentModificationException(); 1342 1343 int s = state; 1344 if (s == 0) 1345 return; 1346 1347 // Set state to report no more elements on further operations 1348 state = 0; 1349 expectedCount = -1; 1350 1351 // Perform a first find if required 1352 if (s < 0 && !find()) 1353 return; 1354 1355 // Capture the input sequence as a string on first find 1356 textAsString = text.toString(); 1357 1358 do { 1359 int ec = modCount; 1360 action.accept(toMatchResult(textAsString)); 1361 if (ec != modCount) 1362 throw new ConcurrentModificationException(); 1363 } while (find()); 1364 } 1365 } 1366 return StreamSupport.stream(Spliterators.spliteratorUnknownSize( 1367 new MatchResultIterator(), Spliterator.ORDERED | Spliterator.NONNULL), false); 1368 } 1369 1370 /** 1371 * Replaces the first subsequence of the input sequence that matches the 1372 * pattern with the given replacement string. 1373 * 1374 * <p> This method first resets this matcher. It then scans the input 1375 * sequence looking for a match of the pattern. Characters that are not 1376 * part of the match are appended directly to the result string; the match 1377 * is replaced in the result by the replacement string. The replacement 1378 * string may contain references to captured subsequences as in the {@link 1379 * #appendReplacement appendReplacement} method. 1380 * 1381 * <p>Note that backslashes ({@code \}) and dollar signs ({@code $}) in 1382 * the replacement string may cause the results to be different than if it 1383 * were being treated as a literal replacement string. Dollar signs may be 1384 * treated as references to captured subsequences as described above, and 1385 * backslashes are used to escape literal characters in the replacement 1386 * string. 1387 * 1388 * <p> Given the regular expression {@code dog}, the input 1389 * {@code "zzzdogzzzdogzzz"}, and the replacement string 1390 * {@code "cat"}, an invocation of this method on a matcher for that 1391 * expression would yield the string {@code "zzzcatzzzdogzzz"}. </p> 1392 * 1393 * <p> Invoking this method changes this matcher's state. If the matcher 1394 * is to be used in further matching operations then it should first be 1395 * reset. </p> 1396 * 1397 * @param replacement 1398 * The replacement string 1399 * @return The string constructed by replacing the first matching 1400 * subsequence by the replacement string, substituting captured 1401 * subsequences as needed 1402 */ 1403 public String replaceFirst(String replacement) { 1404 if (replacement == null) 1405 throw new NullPointerException("replacement"); 1406 reset(); 1407 if (!find()) 1408 return text.toString(); 1409 StringBuilder sb = new StringBuilder(); 1410 sb.append(text, 0, first); 1411 appendExpandedReplacement(replacement, sb); 1412 sb.append(text, last, getTextLength()); 1413 return sb.toString(); 1414 } 1415 1416 /** 1417 * Replaces the first subsequence of the input sequence that matches the 1418 * pattern with the result of applying the given replacer function to the 1419 * match result of this matcher corresponding to that subsequence. 1420 * Exceptions thrown by the replace function are relayed to the caller. 1421 * 1422 * <p> This method first resets this matcher. It then scans the input 1423 * sequence looking for a match of the pattern. Characters that are not 1424 * part of the match are appended directly to the result string; the match 1425 * is replaced in the result by the applying the replacer function that 1426 * returns a replacement string. The replacement string may contain 1427 * references to captured subsequences as in the {@link #appendReplacement 1428 * appendReplacement} method. 1429 * 1430 * <p>Note that backslashes ({@code \}) and dollar signs ({@code $}) in 1431 * the replacement string may cause the results to be different than if it 1432 * were being treated as a literal replacement string. Dollar signs may be 1433 * treated as references to captured subsequences as described above, and 1434 * backslashes are used to escape literal characters in the replacement 1435 * string. 1436 * 1437 * <p> Given the regular expression {@code dog}, the input 1438 * {@code "zzzdogzzzdogzzz"}, and the function 1439 * {@code mr -> mr.group().toUpperCase()}, an invocation of this method on 1440 * a matcher for that expression would yield the string 1441 * {@code "zzzDOGzzzdogzzz"}. 1442 * 1443 * <p> Invoking this method changes this matcher's state. If the matcher 1444 * is to be used in further matching operations then it should first be 1445 * reset. 1446 * 1447 * <p> The replacer function should not modify this matcher's state during 1448 * replacement. This method will, on a best-effort basis, throw a 1449 * {@link java.util.ConcurrentModificationException} if such modification is 1450 * detected. 1451 * 1452 * <p> The state of the match result passed to the replacer function is 1453 * guaranteed to be constant only for the duration of the replacer function 1454 * call and only if the replacer function does not modify this matcher's 1455 * state. 1456 * 1457 * @implNote 1458 * This implementation applies the replacer function to this matcher, which 1459 * is an instance of {@code MatchResult}. 1460 * 1461 * @param replacer 1462 * The function to be applied to the match result of this matcher 1463 * that returns a replacement string. 1464 * @return The string constructed by replacing the first matching 1465 * subsequence with the result of applying the replacer function to 1466 * the matched subsequence, substituting captured subsequences as 1467 * needed. 1468 * @throws NullPointerException if the replacer function is null 1469 * @throws ConcurrentModificationException if it is detected, on a 1470 * best-effort basis, that the replacer function modified this 1471 * matcher's state 1472 * @since 9 1473 */ 1474 public String replaceFirst(Function<MatchResult, String> replacer) { 1475 Objects.requireNonNull(replacer); 1476 reset(); 1477 if (!find()) 1478 return text.toString(); 1479 StringBuilder sb = new StringBuilder(); 1480 int ec = modCount; 1481 String replacement = replacer.apply(this); 1482 if (ec != modCount) 1483 throw new ConcurrentModificationException(); 1484 appendReplacement(sb, replacement); 1485 appendTail(sb); 1486 return sb.toString(); 1487 } 1488 1489 /** 1490 * Sets the limits of this matcher's region. The region is the part of the 1491 * input sequence that will be searched to find a match. Invoking this 1492 * method resets the matcher, and then sets the region to start at the 1493 * index specified by the {@code start} parameter and end at the 1494 * index specified by the {@code end} parameter. 1495 * 1496 * <p>Depending on the transparency and anchoring being used (see 1497 * {@link #useTransparentBounds useTransparentBounds} and 1498 * {@link #useAnchoringBounds useAnchoringBounds}), certain constructs such 1499 * as anchors may behave differently at or around the boundaries of the 1500 * region. 1501 * 1502 * @param start 1503 * The index to start searching at (inclusive) 1504 * @param end 1505 * The index to end searching at (exclusive) 1506 * @throws IndexOutOfBoundsException 1507 * If start or end is less than zero, if 1508 * start is greater than the length of the input sequence, if 1509 * end is greater than the length of the input sequence, or if 1510 * start is greater than end. 1511 * @return this matcher 1512 * @since 1.5 1513 */ 1514 public Matcher region(int start, int end) { 1515 if ((start < 0) || (start > getTextLength())) 1516 throw new IndexOutOfBoundsException("start"); 1517 if ((end < 0) || (end > getTextLength())) 1518 throw new IndexOutOfBoundsException("end"); 1519 if (start > end) 1520 throw new IndexOutOfBoundsException("start > end"); 1521 reset(); 1522 from = start; 1523 to = end; 1524 return this; 1525 } 1526 1527 /** 1528 * Reports the start index of this matcher's region. The 1529 * searches this matcher conducts are limited to finding matches 1530 * within {@link #regionStart regionStart} (inclusive) and 1531 * {@link #regionEnd regionEnd} (exclusive). 1532 * 1533 * @return The starting point of this matcher's region 1534 * @since 1.5 1535 */ 1536 public int regionStart() { 1537 return from; 1538 } 1539 1540 /** 1541 * Reports the end index (exclusive) of this matcher's region. 1542 * The searches this matcher conducts are limited to finding matches 1543 * within {@link #regionStart regionStart} (inclusive) and 1544 * {@link #regionEnd regionEnd} (exclusive). 1545 * 1546 * @return the ending point of this matcher's region 1547 * @since 1.5 1548 */ 1549 public int regionEnd() { 1550 return to; 1551 } 1552 1553 /** 1554 * Queries the transparency of region bounds for this matcher. 1555 * 1556 * <p> This method returns {@code true} if this matcher uses 1557 * <i>transparent</i> bounds, {@code false} if it uses <i>opaque</i> 1558 * bounds. 1559 * 1560 * <p> See {@link #useTransparentBounds useTransparentBounds} for a 1561 * description of transparent and opaque bounds. 1562 * 1563 * <p> By default, a matcher uses opaque region boundaries. 1564 * 1565 * @return {@code true} iff this matcher is using transparent bounds, 1566 * {@code false} otherwise. 1567 * @see java.util.regex.Matcher#useTransparentBounds(boolean) 1568 * @since 1.5 1569 */ 1570 public boolean hasTransparentBounds() { 1571 return transparentBounds; 1572 } 1573 1574 /** 1575 * Sets the transparency of region bounds for this matcher. 1576 * 1577 * <p> Invoking this method with an argument of {@code true} will set this 1578 * matcher to use <i>transparent</i> bounds. If the boolean 1579 * argument is {@code false}, then <i>opaque</i> bounds will be used. 1580 * 1581 * <p> Using transparent bounds, the boundaries of this 1582 * matcher's region are transparent to lookahead, lookbehind, 1583 * and boundary matching constructs. Those constructs can see beyond the 1584 * boundaries of the region to see if a match is appropriate. 1585 * 1586 * <p> Using opaque bounds, the boundaries of this matcher's 1587 * region are opaque to lookahead, lookbehind, and boundary matching 1588 * constructs that may try to see beyond them. Those constructs cannot 1589 * look past the boundaries so they will fail to match anything outside 1590 * of the region. 1591 * 1592 * <p> By default, a matcher uses opaque bounds. 1593 * 1594 * @param b a boolean indicating whether to use opaque or transparent 1595 * regions 1596 * @return this matcher 1597 * @see java.util.regex.Matcher#hasTransparentBounds 1598 * @since 1.5 1599 */ 1600 public Matcher useTransparentBounds(boolean b) { 1601 transparentBounds = b; 1602 return this; 1603 } 1604 1605 /** 1606 * Queries the anchoring of region bounds for this matcher. 1607 * 1608 * <p> This method returns {@code true} if this matcher uses 1609 * <i>anchoring</i> bounds, {@code false} otherwise. 1610 * 1611 * <p> See {@link #useAnchoringBounds useAnchoringBounds} for a 1612 * description of anchoring bounds. 1613 * 1614 * <p> By default, a matcher uses anchoring region boundaries. 1615 * 1616 * @return {@code true} iff this matcher is using anchoring bounds, 1617 * {@code false} otherwise. 1618 * @see java.util.regex.Matcher#useAnchoringBounds(boolean) 1619 * @since 1.5 1620 */ 1621 public boolean hasAnchoringBounds() { 1622 return anchoringBounds; 1623 } 1624 1625 /** 1626 * Sets the anchoring of region bounds for this matcher. 1627 * 1628 * <p> Invoking this method with an argument of {@code true} will set this 1629 * matcher to use <i>anchoring</i> bounds. If the boolean 1630 * argument is {@code false}, then <i>non-anchoring</i> bounds will be 1631 * used. 1632 * 1633 * <p> Using anchoring bounds, the boundaries of this 1634 * matcher's region match anchors such as ^ and $. 1635 * 1636 * <p> Without anchoring bounds, the boundaries of this 1637 * matcher's region will not match anchors such as ^ and $. 1638 * 1639 * <p> By default, a matcher uses anchoring region boundaries. 1640 * 1641 * @param b a boolean indicating whether or not to use anchoring bounds. 1642 * @return this matcher 1643 * @see java.util.regex.Matcher#hasAnchoringBounds 1644 * @since 1.5 1645 */ 1646 public Matcher useAnchoringBounds(boolean b) { 1647 anchoringBounds = b; 1648 return this; 1649 } 1650 1651 /** 1652 * <p>Returns the string representation of this matcher. The 1653 * string representation of a {@code Matcher} contains information 1654 * that may be useful for debugging. The exact format is unspecified. 1655 * 1656 * @return The string representation of this matcher 1657 * @since 1.5 1658 */ 1659 public String toString() { 1660 StringBuilder sb = new StringBuilder(); 1661 sb.append("java.util.regex.Matcher") 1662 .append("[pattern=").append(pattern()) 1663 .append(" region=") 1664 .append(regionStart()).append(',').append(regionEnd()) 1665 .append(" lastmatch="); 1666 if ((first >= 0) && (group() != null)) { 1667 sb.append(group()); 1668 } 1669 sb.append(']'); 1670 return sb.toString(); 1671 } 1672 1673 /** 1674 * <p>Returns true if the end of input was hit by the search engine in 1675 * the last match operation performed by this matcher. 1676 * 1677 * <p>When this method returns true, then it is possible that more input 1678 * would have changed the result of the last search. 1679 * 1680 * @return true iff the end of input was hit in the last match; false 1681 * otherwise 1682 * @since 1.5 1683 */ 1684 public boolean hitEnd() { 1685 return hitEnd; 1686 } 1687 1688 /** 1689 * <p>Returns true if more input could change a positive match into a 1690 * negative one. 1691 * 1692 * <p>If this method returns true, and a match was found, then more 1693 * input could cause the match to be lost. If this method returns false 1694 * and a match was found, then more input might change the match but the 1695 * match won't be lost. If a match was not found, then requireEnd has no 1696 * meaning. 1697 * 1698 * @return true iff more input could change a positive match into a 1699 * negative one. 1700 * @since 1.5 1701 */ 1702 public boolean requireEnd() { 1703 return requireEnd; 1704 } 1705 1706 /** 1707 * Initiates a search to find a Pattern within the given bounds. 1708 * The groups are filled with default values and the match of the root 1709 * of the state machine is called. The state machine will hold the state 1710 * of the match as it proceeds in this matcher. 1711 * 1712 * Matcher.from is not set here, because it is the "hard" boundary 1713 * of the start of the search which anchors will set to. The from param 1714 * is the "soft" boundary of the start of the search, meaning that the 1715 * regex tries to match at that index but ^ won't match there. Subsequent 1716 * calls to the search methods start at a new "soft" boundary which is 1717 * the end of the previous match. 1718 */ 1719 boolean search(int from) { 1720 this.hitEnd = false; 1721 this.requireEnd = false; 1722 from = from < 0 ? 0 : from; 1723 this.first = from; 1724 this.oldLast = oldLast < 0 ? from : oldLast; 1725 for (int i = 0; i < groups.length; i++) 1726 groups[i] = -1; 1727 for (int i = 0; i < localsPos.length; i++) { 1728 if (localsPos[i] != null) 1729 localsPos[i].clear(); 1730 } 1731 acceptMode = NOANCHOR; 1732 boolean result = parentPattern.root.match(this, from, text); 1733 if (!result) 1734 this.first = -1; 1735 this.oldLast = this.last; 1736 this.modCount++; 1737 return result; 1738 } 1739 1740 /** 1741 * Initiates a search for an anchored match to a Pattern within the given 1742 * bounds. The groups are filled with default values and the match of the 1743 * root of the state machine is called. The state machine will hold the 1744 * state of the match as it proceeds in this matcher. 1745 */ 1746 boolean match(int from, int anchor) { 1747 this.hitEnd = false; 1748 this.requireEnd = false; 1749 from = from < 0 ? 0 : from; 1750 this.first = from; 1751 this.oldLast = oldLast < 0 ? from : oldLast; 1752 for (int i = 0; i < groups.length; i++) 1753 groups[i] = -1; 1754 for (int i = 0; i < localsPos.length; i++) { 1755 if (localsPos[i] != null) 1756 localsPos[i].clear(); 1757 } 1758 acceptMode = anchor; 1759 boolean result = parentPattern.matchRoot.match(this, from, text); 1760 if (!result) 1761 this.first = -1; 1762 this.oldLast = this.last; 1763 this.modCount++; 1764 return result; 1765 } 1766 1767 /** 1768 * Returns the end index of the text. 1769 * 1770 * @return the index after the last character in the text 1771 */ 1772 int getTextLength() { 1773 return text.length(); 1774 } 1775 1776 /** 1777 * Generates a String from this Matcher's input in the specified range. 1778 * 1779 * @param beginIndex the beginning index, inclusive 1780 * @param endIndex the ending index, exclusive 1781 * @return A String generated from this Matcher's input 1782 */ 1783 CharSequence getSubSequence(int beginIndex, int endIndex) { 1784 return text.subSequence(beginIndex, endIndex); 1785 } 1786 1787 /** 1788 * Returns this Matcher's input character at index i. 1789 * 1790 * @return A char from the specified index 1791 */ 1792 char charAt(int i) { 1793 return text.charAt(i); 1794 } 1795 1796 /** 1797 * Returns the group index of the matched capturing group. 1798 * 1799 * @return the index of the named-capturing group 1800 */ 1801 int getMatchedGroupIndex(String name) { 1802 Objects.requireNonNull(name, "Group name"); 1803 if (first < 0) 1804 throw new IllegalStateException("No match found"); 1805 if (!parentPattern.namedGroups().containsKey(name)) 1806 throw new IllegalArgumentException("No group with name <" + name + ">"); 1807 return parentPattern.namedGroups().get(name); 1808 } 1809 }