1 /* 2 * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.util.regex; 27 28 import java.util.Objects; 29 30 /** 31 * An engine that performs match operations on a {@linkplain java.lang.CharSequence 32 * character sequence} by interpreting a {@link Pattern}. 33 * 34 * <p> A matcher is created from a pattern by invoking the pattern's {@link 35 * Pattern#matcher matcher} method. Once created, a matcher can be used to 36 * perform three different kinds of match operations: 37 * 38 * <ul> 39 * 40 * <li><p> The {@link #matches matches} method attempts to match the entire 41 * input sequence against the pattern. </p></li> 42 * 43 * <li><p> The {@link #lookingAt lookingAt} method attempts to match the 44 * input sequence, starting at the beginning, against the pattern. </p></li> 45 * 46 * <li><p> The {@link #find find} method scans the input sequence looking for 47 * the next subsequence that matches the pattern. </p></li> 48 * 49 * </ul> 50 * 51 * <p> Each of these methods returns a boolean indicating success or failure. 52 * More information about a successful match can be obtained by querying the 53 * state of the matcher. 54 * 55 * <p> A matcher finds matches in a subset of its input called the 56 * <i>region</i>. By default, the region contains all of the matcher's input. 57 * The region can be modified via the{@link #region region} method and queried 58 * via the {@link #regionStart regionStart} and {@link #regionEnd regionEnd} 59 * methods. The way that the region boundaries interact with some pattern 60 * constructs can be changed. See {@link #useAnchoringBounds 61 * useAnchoringBounds} and {@link #useTransparentBounds useTransparentBounds} 62 * for more details. 63 * 64 * <p> This class also defines methods for replacing matched subsequences with 65 * new strings whose contents can, if desired, be computed from the match 66 * result. The {@link #appendReplacement appendReplacement} and {@link 67 * #appendTail appendTail} methods can be used in tandem in order to collect 68 * the result into an existing string buffer or string builder. Alternatively, 69 * the more convenient {@link #replaceAll replaceAll} method can be used to 70 * create a string in which every matching subsequence in the input sequence 71 * is replaced. 72 * 73 * <p> The explicit state of a matcher includes the start and end indices of 74 * the most recent successful match. It also includes the start and end 75 * indices of the input subsequence captured by each <a 76 * href="Pattern.html#cg">capturing group</a> in the pattern as well as a total 77 * count of such subsequences. As a convenience, methods are also provided for 78 * returning these captured subsequences in string form. 79 * 80 * <p> The explicit state of a matcher is initially undefined; attempting to 81 * query any part of it before a successful match will cause an {@link 82 * IllegalStateException} to be thrown. The explicit state of a matcher is 83 * recomputed by every match operation. 84 * 85 * <p> The implicit state of a matcher includes the input character sequence as 86 * well as the <i>append position</i>, which is initially zero and is updated 87 * by the {@link #appendReplacement appendReplacement} method. 88 * 89 * <p> A matcher may be reset explicitly by invoking its {@link #reset()} 90 * method or, if a new input sequence is desired, its {@link 91 * #reset(java.lang.CharSequence) reset(CharSequence)} method. Resetting a 92 * matcher discards its explicit state information and sets the append position 93 * to zero. 94 * 95 * <p> Instances of this class are not safe for use by multiple concurrent 96 * threads. </p> 97 * 98 * 99 * @author Mike McCloskey 100 * @author Mark Reinhold 101 * @author JSR-51 Expert Group 102 * @since 1.4 103 * @spec JSR-51 104 */ 105 106 public final class Matcher implements MatchResult { 107 108 /** 109 * The Pattern object that created this Matcher. 110 */ 111 Pattern parentPattern; 112 113 /** 114 * The storage used by groups. They may contain invalid values if 115 * a group was skipped during the matching. 116 */ 117 int[] groups; 118 119 /** 120 * The range within the sequence that is to be matched. Anchors 121 * will match at these "hard" boundaries. Changing the region 122 * changes these values. 123 */ 124 int from, to; 125 126 /** 127 * Lookbehind uses this value to ensure that the subexpression 128 * match ends at the point where the lookbehind was encountered. 129 */ 130 int lookbehindTo; 131 132 /** 133 * The original string being matched. 134 */ 135 CharSequence text; 136 137 /** 138 * Matcher state used by the last node. NOANCHOR is used when a 139 * match does not have to consume all of the input. ENDANCHOR is 140 * the mode used for matching all the input. 141 */ 142 static final int ENDANCHOR = 1; 143 static final int NOANCHOR = 0; 144 int acceptMode = NOANCHOR; 145 146 /** 147 * The range of string that last matched the pattern. If the last 148 * match failed then first is -1; last initially holds 0 then it 149 * holds the index of the end of the last match (which is where the 150 * next search starts). 151 */ 152 int first = -1, last = 0; 153 154 /** 155 * The end index of what matched in the last match operation. 156 */ 157 int oldLast = -1; 158 159 /** 160 * The index of the last position appended in a substitution. 161 */ 162 int lastAppendPosition = 0; 163 164 /** 165 * Storage used by nodes to tell what repetition they are on in 166 * a pattern, and where groups begin. The nodes themselves are stateless, 167 * so they rely on this field to hold state during a match. 168 */ 169 int[] locals; 170 171 /** 172 * Boolean indicating whether or not more input could change 173 * the results of the last match. 174 * 175 * If hitEnd is true, and a match was found, then more input 176 * might cause a different match to be found. 177 * If hitEnd is true and a match was not found, then more 178 * input could cause a match to be found. 179 * If hitEnd is false and a match was found, then more input 180 * will not change the match. 181 * If hitEnd is false and a match was not found, then more 182 * input will not cause a match to be found. 183 */ 184 boolean hitEnd; 185 186 /** 187 * Boolean indicating whether or not more input could change 188 * a positive match into a negative one. 189 * 190 * If requireEnd is true, and a match was found, then more 191 * input could cause the match to be lost. 192 * If requireEnd is false and a match was found, then more 193 * input might change the match but the match won't be lost. 194 * If a match was not found, then requireEnd has no meaning. 195 */ 196 boolean requireEnd; 197 198 /** 199 * If transparentBounds is true then the boundaries of this 200 * matcher's region are transparent to lookahead, lookbehind, 201 * and boundary matching constructs that try to see beyond them. 202 */ 203 boolean transparentBounds = false; 204 205 /** 206 * If anchoringBounds is true then the boundaries of this 207 * matcher's region match anchors such as ^ and $. 208 */ 209 boolean anchoringBounds = true; 210 211 /** 212 * No default constructor. 213 */ 214 Matcher() { 215 } 216 217 /** 218 * All matchers have the state used by Pattern during a match. 219 */ 220 Matcher(Pattern parent, CharSequence text) { 221 this.parentPattern = parent; 222 this.text = text; 223 224 // Allocate state storage 225 int parentGroupCount = Math.max(parent.capturingGroupCount, 10); 226 groups = new int[parentGroupCount * 2]; 227 locals = new int[parent.localCount]; 228 229 // Put fields into initial states 230 reset(); 231 } 232 233 /** 234 * Returns the pattern that is interpreted by this matcher. 235 * 236 * @return The pattern for which this matcher was created 237 */ 238 public Pattern pattern() { 239 return parentPattern; 240 } 241 242 /** 243 * Returns the match state of this matcher as a {@link MatchResult}. 244 * The result is unaffected by subsequent operations performed upon this 245 * matcher. 246 * 247 * @return a <code>MatchResult</code> with the state of this matcher 248 * @since 1.5 249 */ 250 public MatchResult toMatchResult() { 251 Matcher result = new Matcher(this.parentPattern, text.toString()); 252 result.first = this.first; 253 result.last = this.last; 254 result.groups = this.groups.clone(); 255 return result; 256 } 257 258 /** 259 * Changes the <tt>Pattern</tt> that this <tt>Matcher</tt> uses to 260 * find matches with. 261 * 262 * <p> This method causes this matcher to lose information 263 * about the groups of the last match that occurred. The 264 * matcher's position in the input is maintained and its 265 * last append position is unaffected.</p> 266 * 267 * @param newPattern 268 * The new pattern used by this matcher 269 * @return This matcher 270 * @throws IllegalArgumentException 271 * If newPattern is <tt>null</tt> 272 * @since 1.5 273 */ 274 public Matcher usePattern(Pattern newPattern) { 275 if (newPattern == null) 276 throw new IllegalArgumentException("Pattern cannot be null"); 277 parentPattern = newPattern; 278 279 // Reallocate state storage 280 int parentGroupCount = Math.max(newPattern.capturingGroupCount, 10); 281 groups = new int[parentGroupCount * 2]; 282 locals = new int[newPattern.localCount]; 283 for (int i = 0; i < groups.length; i++) 284 groups[i] = -1; 285 for (int i = 0; i < locals.length; i++) 286 locals[i] = -1; 287 return this; 288 } 289 290 /** 291 * Resets this matcher. 292 * 293 * <p> Resetting a matcher discards all of its explicit state information 294 * and sets its append position to zero. The matcher's region is set to the 295 * default region, which is its entire character sequence. The anchoring 296 * and transparency of this matcher's region boundaries are unaffected. 297 * 298 * @return This matcher 299 */ 300 public Matcher reset() { 301 first = -1; 302 last = 0; 303 oldLast = -1; 304 for(int i=0; i<groups.length; i++) 305 groups[i] = -1; 306 for(int i=0; i<locals.length; i++) 307 locals[i] = -1; 308 lastAppendPosition = 0; 309 from = 0; 310 to = getTextLength(); 311 return this; 312 } 313 314 /** 315 * Resets this matcher with a new input sequence. 316 * 317 * <p> Resetting a matcher discards all of its explicit state information 318 * and sets its append position to zero. The matcher's region is set to 319 * the default region, which is its entire character sequence. The 320 * anchoring and transparency of this matcher's region boundaries are 321 * unaffected. 322 * 323 * @param input 324 * The new input character sequence 325 * 326 * @return This matcher 327 */ 328 public Matcher reset(CharSequence input) { 329 text = input; 330 return reset(); 331 } 332 333 /** 334 * Returns the start index of the previous match. 335 * 336 * @return The index of the first character matched 337 * 338 * @throws IllegalStateException 339 * If no match has yet been attempted, 340 * or if the previous match operation failed 341 */ 342 public int start() { 343 if (first < 0) 344 throw new IllegalStateException("No match available"); 345 return first; 346 } 347 348 /** 349 * Returns the start index of the subsequence captured by the given group 350 * during the previous match operation. 351 * 352 * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left 353 * to right, starting at one. Group zero denotes the entire pattern, so 354 * the expression <i>m.</i><tt>start(0)</tt> is equivalent to 355 * <i>m.</i><tt>start()</tt>. </p> 356 * 357 * @param group 358 * The index of a capturing group in this matcher's pattern 359 * 360 * @return The index of the first character captured by the group, 361 * or <tt>-1</tt> if the match was successful but the group 362 * itself did not match anything 363 * 364 * @throws IllegalStateException 365 * If no match has yet been attempted, 366 * or if the previous match operation failed 367 * 368 * @throws IndexOutOfBoundsException 369 * If there is no capturing group in the pattern 370 * with the given index 371 */ 372 public int start(int group) { 373 if (first < 0) 374 throw new IllegalStateException("No match available"); 375 if (group < 0 || group > groupCount()) 376 throw new IndexOutOfBoundsException("No group " + group); 377 return groups[group * 2]; 378 } 379 380 /** 381 * Returns the start index of the subsequence captured by the given 382 * <a href="Pattern.html#groupname">named-capturing group</a> during the 383 * previous match operation. 384 * 385 * @param name 386 * The name of a named-capturing group in this matcher's pattern 387 * 388 * @return The index of the first character captured by the group, 389 * or {@code -1} if the match was successful but the group 390 * itself did not match anything 391 * 392 * @throws IllegalStateException 393 * If no match has yet been attempted, 394 * or if the previous match operation failed 395 * 396 * @throws IllegalArgumentException 397 * If there is no capturing group in the pattern 398 * with the given name 399 * @since 1.8 400 */ 401 public int start(String name) { 402 return groups[getMatchedGroupIndex(name) * 2]; 403 } 404 405 /** 406 * Returns the offset after the last character matched. 407 * 408 * @return The offset after the last character matched 409 * 410 * @throws IllegalStateException 411 * If no match has yet been attempted, 412 * or if the previous match operation failed 413 */ 414 public int end() { 415 if (first < 0) 416 throw new IllegalStateException("No match available"); 417 return last; 418 } 419 420 /** 421 * Returns the offset after the last character of the subsequence 422 * captured by the given group during the previous match operation. 423 * 424 * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left 425 * to right, starting at one. Group zero denotes the entire pattern, so 426 * the expression <i>m.</i><tt>end(0)</tt> is equivalent to 427 * <i>m.</i><tt>end()</tt>. </p> 428 * 429 * @param group 430 * The index of a capturing group in this matcher's pattern 431 * 432 * @return The offset after the last character captured by the group, 433 * or <tt>-1</tt> if the match was successful 434 * but the group itself did not match anything 435 * 436 * @throws IllegalStateException 437 * If no match has yet been attempted, 438 * or if the previous match operation failed 439 * 440 * @throws IndexOutOfBoundsException 441 * If there is no capturing group in the pattern 442 * with the given index 443 */ 444 public int end(int group) { 445 if (first < 0) 446 throw new IllegalStateException("No match available"); 447 if (group < 0 || group > groupCount()) 448 throw new IndexOutOfBoundsException("No group " + group); 449 return groups[group * 2 + 1]; 450 } 451 452 /** 453 * Returns the offset after the last character of the subsequence 454 * captured by the given <a href="Pattern.html#groupname">named-capturing 455 * group</a> during the previous match operation. 456 * 457 * @param name 458 * The name of a named-capturing group in this matcher's pattern 459 * 460 * @return The offset after the last character captured by the group, 461 * or {@code -1} if the match was successful 462 * but the group itself did not match anything 463 * 464 * @throws IllegalStateException 465 * If no match has yet been attempted, 466 * or if the previous match operation failed 467 * 468 * @throws IllegalArgumentException 469 * If there is no capturing group in the pattern 470 * with the given name 471 * @since 1.8 472 */ 473 public int end(String name) { 474 return groups[getMatchedGroupIndex(name) * 2 + 1]; 475 } 476 477 /** 478 * Returns the input subsequence matched by the previous match. 479 * 480 * <p> For a matcher <i>m</i> with input sequence <i>s</i>, 481 * the expressions <i>m.</i><tt>group()</tt> and 482 * <i>s.</i><tt>substring(</tt><i>m.</i><tt>start(),</tt> <i>m.</i><tt>end())</tt> 483 * are equivalent. </p> 484 * 485 * <p> Note that some patterns, for example <tt>a*</tt>, match the empty 486 * string. This method will return the empty string when the pattern 487 * successfully matches the empty string in the input. </p> 488 * 489 * @return The (possibly empty) subsequence matched by the previous match, 490 * in string form 491 * 492 * @throws IllegalStateException 493 * If no match has yet been attempted, 494 * or if the previous match operation failed 495 */ 496 public String group() { 497 return group(0); 498 } 499 500 /** 501 * Returns the input subsequence captured by the given group during the 502 * previous match operation. 503 * 504 * <p> For a matcher <i>m</i>, input sequence <i>s</i>, and group index 505 * <i>g</i>, the expressions <i>m.</i><tt>group(</tt><i>g</i><tt>)</tt> and 506 * <i>s.</i><tt>substring(</tt><i>m.</i><tt>start(</tt><i>g</i><tt>),</tt> <i>m.</i><tt>end(</tt><i>g</i><tt>))</tt> 507 * are equivalent. </p> 508 * 509 * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left 510 * to right, starting at one. Group zero denotes the entire pattern, so 511 * the expression <tt>m.group(0)</tt> is equivalent to <tt>m.group()</tt>. 512 * </p> 513 * 514 * <p> If the match was successful but the group specified failed to match 515 * any part of the input sequence, then <tt>null</tt> is returned. Note 516 * that some groups, for example <tt>(a*)</tt>, match the empty string. 517 * This method will return the empty string when such a group successfully 518 * matches the empty string in the input. </p> 519 * 520 * @param group 521 * The index of a capturing group in this matcher's pattern 522 * 523 * @return The (possibly empty) subsequence captured by the group 524 * during the previous match, or <tt>null</tt> if the group 525 * failed to match part of the input 526 * 527 * @throws IllegalStateException 528 * If no match has yet been attempted, 529 * or if the previous match operation failed 530 * 531 * @throws IndexOutOfBoundsException 532 * If there is no capturing group in the pattern 533 * with the given index 534 */ 535 public String group(int group) { 536 if (first < 0) 537 throw new IllegalStateException("No match found"); 538 if (group < 0 || group > groupCount()) 539 throw new IndexOutOfBoundsException("No group " + group); 540 if ((groups[group*2] == -1) || (groups[group*2+1] == -1)) 541 return null; 542 return getSubSequence(groups[group * 2], groups[group * 2 + 1]).toString(); 543 } 544 545 /** 546 * Returns the input subsequence captured by the given 547 * <a href="Pattern.html#groupname">named-capturing group</a> during the previous 548 * match operation. 549 * 550 * <p> If the match was successful but the group specified failed to match 551 * any part of the input sequence, then <tt>null</tt> is returned. Note 552 * that some groups, for example <tt>(a*)</tt>, match the empty string. 553 * This method will return the empty string when such a group successfully 554 * matches the empty string in the input. </p> 555 * 556 * @param name 557 * The name of a named-capturing group in this matcher's pattern 558 * 559 * @return The (possibly empty) subsequence captured by the named group 560 * during the previous match, or <tt>null</tt> if the group 561 * failed to match part of the input 562 * 563 * @throws IllegalStateException 564 * If no match has yet been attempted, 565 * or if the previous match operation failed 566 * 567 * @throws IllegalArgumentException 568 * If there is no capturing group in the pattern 569 * with the given name 570 * @since 1.7 571 */ 572 public String group(String name) { 573 int group = getMatchedGroupIndex(name); 574 if ((groups[group*2] == -1) || (groups[group*2+1] == -1)) 575 return null; 576 return getSubSequence(groups[group * 2], groups[group * 2 + 1]).toString(); 577 } 578 579 /** 580 * Returns the number of capturing groups in this matcher's pattern. 581 * 582 * <p> Group zero denotes the entire pattern by convention. It is not 583 * included in this count. 584 * 585 * <p> Any non-negative integer smaller than or equal to the value 586 * returned by this method is guaranteed to be a valid group index for 587 * this matcher. </p> 588 * 589 * @return The number of capturing groups in this matcher's pattern 590 */ 591 public int groupCount() { 592 return parentPattern.capturingGroupCount - 1; 593 } 594 595 /** 596 * Attempts to match the entire region against the pattern. 597 * 598 * <p> If the match succeeds then more information can be obtained via the 599 * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods. </p> 600 * 601 * @return <tt>true</tt> if, and only if, the entire region sequence 602 * matches this matcher's pattern 603 */ 604 public boolean matches() { 605 return match(from, ENDANCHOR); 606 } 607 608 /** 609 * Attempts to find the next subsequence of the input sequence that matches 610 * the pattern. 611 * 612 * <p> This method starts at the beginning of this matcher's region, or, if 613 * a previous invocation of the method was successful and the matcher has 614 * not since been reset, at the first character not matched by the previous 615 * match. 616 * 617 * <p> If the match succeeds then more information can be obtained via the 618 * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods. </p> 619 * 620 * @return <tt>true</tt> if, and only if, a subsequence of the input 621 * sequence matches this matcher's pattern 622 */ 623 public boolean find() { 624 int nextSearchIndex = last; 625 if (nextSearchIndex == first) 626 nextSearchIndex++; 627 628 // If next search starts before region, start it at region 629 if (nextSearchIndex < from) 630 nextSearchIndex = from; 631 632 // If next search starts beyond region then it fails 633 if (nextSearchIndex > to) { 634 for (int i = 0; i < groups.length; i++) 635 groups[i] = -1; 636 return false; 637 } 638 return search(nextSearchIndex); 639 } 640 641 /** 642 * Resets this matcher and then attempts to find the next subsequence of 643 * the input sequence that matches the pattern, starting at the specified 644 * index. 645 * 646 * <p> If the match succeeds then more information can be obtained via the 647 * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods, and subsequent 648 * invocations of the {@link #find()} method will start at the first 649 * character not matched by this match. </p> 650 * 651 * @param start the index to start searching for a match 652 * @throws IndexOutOfBoundsException 653 * If start is less than zero or if start is greater than the 654 * length of the input sequence. 655 * 656 * @return <tt>true</tt> if, and only if, a subsequence of the input 657 * sequence starting at the given index matches this matcher's 658 * pattern 659 */ 660 public boolean find(int start) { 661 int limit = getTextLength(); 662 if ((start < 0) || (start > limit)) 663 throw new IndexOutOfBoundsException("Illegal start index"); 664 reset(); 665 return search(start); 666 } 667 668 /** 669 * Attempts to match the input sequence, starting at the beginning of the 670 * region, against the pattern. 671 * 672 * <p> Like the {@link #matches matches} method, this method always starts 673 * at the beginning of the region; unlike that method, it does not 674 * require that the entire region be matched. 675 * 676 * <p> If the match succeeds then more information can be obtained via the 677 * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods. </p> 678 * 679 * @return <tt>true</tt> if, and only if, a prefix of the input 680 * sequence matches this matcher's pattern 681 */ 682 public boolean lookingAt() { 683 return match(from, NOANCHOR); 684 } 685 686 /** 687 * Returns a literal replacement <code>String</code> for the specified 688 * <code>String</code>. 689 * 690 * This method produces a <code>String</code> that will work 691 * as a literal replacement <code>s</code> in the 692 * <code>appendReplacement</code> method of the {@link Matcher} class. 693 * The <code>String</code> produced will match the sequence of characters 694 * in <code>s</code> treated as a literal sequence. Slashes ('\') and 695 * dollar signs ('$') will be given no special meaning. 696 * 697 * @param s The string to be literalized 698 * @return A literal string replacement 699 * @since 1.5 700 */ 701 public static String quoteReplacement(String s) { 702 if ((s.indexOf('\\') == -1) && (s.indexOf('$') == -1)) 703 return s; 704 StringBuilder sb = new StringBuilder(); 705 for (int i=0; i<s.length(); i++) { 706 char c = s.charAt(i); 707 if (c == '\\' || c == '$') { 708 sb.append('\\'); 709 } 710 sb.append(c); 711 } 712 return sb.toString(); 713 } 714 715 /** 716 * Implements a non-terminal append-and-replace step. 717 * 718 * <p> This method performs the following actions: </p> 719 * 720 * <ol> 721 * 722 * <li><p> It reads characters from the input sequence, starting at the 723 * append position, and appends them to the given string buffer. It 724 * stops after reading the last character preceding the previous match, 725 * that is, the character at index {@link 726 * #start()} <tt>-</tt> <tt>1</tt>. </p></li> 727 * 728 * <li><p> It appends the given replacement string to the string buffer. 729 * </p></li> 730 * 731 * <li><p> It sets the append position of this matcher to the index of 732 * the last character matched, plus one, that is, to {@link #end()}. 733 * </p></li> 734 * 735 * </ol> 736 * 737 * <p> The replacement string may contain references to subsequences 738 * captured during the previous match: Each occurrence of 739 * <tt>${</tt><i>name</i><tt>}</tt> or <tt>$</tt><i>g</i> 740 * will be replaced by the result of evaluating the corresponding 741 * {@link #group(String) group(name)} or {@link #group(int) group(g)} 742 * respectively. For <tt>$</tt><i>g</i>, 743 * the first number after the <tt>$</tt> is always treated as part of 744 * the group reference. Subsequent numbers are incorporated into g if 745 * they would form a legal group reference. Only the numerals '0' 746 * through '9' are considered as potential components of the group 747 * reference. If the second group matched the string <tt>"foo"</tt>, for 748 * example, then passing the replacement string <tt>"$2bar"</tt> would 749 * cause <tt>"foobar"</tt> to be appended to the string buffer. A dollar 750 * sign (<tt>$</tt>) may be included as a literal in the replacement 751 * string by preceding it with a backslash (<tt>\$</tt>). 752 * 753 * <p> Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in 754 * the replacement string may cause the results to be different than if it 755 * were being treated as a literal replacement string. Dollar signs may be 756 * treated as references to captured subsequences as described above, and 757 * backslashes are used to escape literal characters in the replacement 758 * string. 759 * 760 * <p> This method is intended to be used in a loop together with the 761 * {@link #appendTail appendTail} and {@link #find find} methods. The 762 * following code, for example, writes <tt>one dog two dogs in the 763 * yard</tt> to the standard-output stream: </p> 764 * 765 * <blockquote><pre> 766 * Pattern p = Pattern.compile("cat"); 767 * Matcher m = p.matcher("one cat two cats in the yard"); 768 * StringBuffer sb = new StringBuffer(); 769 * while (m.find()) { 770 * m.appendReplacement(sb, "dog"); 771 * } 772 * m.appendTail(sb); 773 * System.out.println(sb.toString());</pre></blockquote> 774 * 775 * @param sb 776 * The target string buffer 777 * 778 * @param replacement 779 * The replacement string 780 * 781 * @return This matcher 782 * 783 * @throws IllegalStateException 784 * If no match has yet been attempted, 785 * or if the previous match operation failed 786 * 787 * @throws IllegalArgumentException 788 * If the replacement string refers to a named-capturing 789 * group that does not exist in the pattern 790 * 791 * @throws IndexOutOfBoundsException 792 * If the replacement string refers to a capturing group 793 * that does not exist in the pattern 794 */ 795 public Matcher appendReplacement(StringBuffer sb, String replacement) { 796 // If no match, return error 797 if (first < 0) 798 throw new IllegalStateException("No match available"); 799 StringBuilder result = new StringBuilder(); 800 appendExpandedReplacement(replacement, result); 801 // Append the intervening text 802 sb.append(text, lastAppendPosition, first); 803 // Append the match substitution 804 sb.append(result); 805 lastAppendPosition = last; 806 return this; 807 } 808 809 /** 810 * Implements a non-terminal append-and-replace step. 811 * 812 * <p> This method performs the following actions: </p> 813 * 814 * <ol> 815 * 816 * <li><p> It reads characters from the input sequence, starting at the 817 * append position, and appends them to the given string builder. It 818 * stops after reading the last character preceding the previous match, 819 * that is, the character at index {@link 820 * #start()} <tt>-</tt> <tt>1</tt>. </p></li> 821 * 822 * <li><p> It appends the given replacement string to the string builder. 823 * </p></li> 824 * 825 * <li><p> It sets the append position of this matcher to the index of 826 * the last character matched, plus one, that is, to {@link #end()}. 827 * </p></li> 828 * 829 * </ol> 830 * 831 * <p> The replacement string may contain references to subsequences 832 * captured during the previous match: Each occurrence of 833 * <tt>$</tt><i>g</i><tt></tt> will be replaced by the result of 834 * evaluating {@link #group(int) group}<tt>(</tt><i>g</i><tt>)</tt>. 835 * The first number after the <tt>$</tt> is always treated as part of 836 * the group reference. Subsequent numbers are incorporated into g if 837 * they would form a legal group reference. Only the numerals '0' 838 * through '9' are considered as potential components of the group 839 * reference. If the second group matched the string <tt>"foo"</tt>, for 840 * example, then passing the replacement string <tt>"$2bar"</tt> would 841 * cause <tt>"foobar"</tt> to be appended to the string builder. A dollar 842 * sign (<tt>$</tt>) may be included as a literal in the replacement 843 * string by preceding it with a backslash (<tt>\$</tt>). 844 * 845 * <p> Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in 846 * the replacement string may cause the results to be different than if it 847 * were being treated as a literal replacement string. Dollar signs may be 848 * treated as references to captured subsequences as described above, and 849 * backslashes are used to escape literal characters in the replacement 850 * string. 851 * 852 * <p> This method is intended to be used in a loop together with the 853 * {@link #appendTail appendTail} and {@link #find find} methods. The 854 * following code, for example, writes <tt>one dog two dogs in the 855 * yard</tt> to the standard-output stream: </p> 856 * 857 * <blockquote><pre> 858 * Pattern p = Pattern.compile("cat"); 859 * Matcher m = p.matcher("one cat two cats in the yard"); 860 * StringBuilder sb = new StringBuilder(); 861 * while (m.find()) { 862 * m.appendReplacement(sb, "dog"); 863 * } 864 * m.appendTail(sb); 865 * System.out.println(sb.toString());</pre></blockquote> 866 * 867 * @param sb 868 * The target string builder 869 * @param replacement 870 * The replacement string 871 * @return This matcher 872 * 873 * @throws IllegalStateException 874 * If no match has yet been attempted, 875 * or if the previous match operation failed 876 * @throws IllegalArgumentException 877 * If the replacement string refers to a named-capturing 878 * group that does not exist in the pattern 879 * @throws IndexOutOfBoundsException 880 * If the replacement string refers to a capturing group 881 * that does not exist in the pattern 882 * @since 1.9 883 */ 884 public Matcher appendReplacement(StringBuilder sb, String replacement) { 885 // If no match, return error 886 if (first < 0) 887 throw new IllegalStateException("No match available"); 888 StringBuilder result = new StringBuilder(); 889 appendExpandedReplacement(replacement, result); 890 // Append the intervening text 891 sb.append(text, lastAppendPosition, first); 892 // Append the match substitution 893 sb.append(result); 894 lastAppendPosition = last; 895 return this; 896 } 897 898 /** 899 * Processes replacement string to replace group references with 900 * groups. 901 */ 902 private StringBuilder appendExpandedReplacement( 903 String replacement, StringBuilder result) { 904 int cursor = 0; 905 while (cursor < replacement.length()) { 906 char nextChar = replacement.charAt(cursor); 907 if (nextChar == '\\') { 908 cursor++; 909 if (cursor == replacement.length()) 910 throw new IllegalArgumentException( 911 "character to be escaped is missing"); 912 nextChar = replacement.charAt(cursor); 913 result.append(nextChar); 914 cursor++; 915 } else if (nextChar == '$') { 916 // Skip past $ 917 cursor++; 918 // Throw IAE if this "$" is the last character in replacement 919 if (cursor == replacement.length()) 920 throw new IllegalArgumentException( 921 "Illegal group reference: group index is missing"); 922 nextChar = replacement.charAt(cursor); 923 int refNum = -1; 924 if (nextChar == '{') { 925 cursor++; 926 StringBuilder gsb = new StringBuilder(); 927 while (cursor < replacement.length()) { 928 nextChar = replacement.charAt(cursor); 929 if (ASCII.isLower(nextChar) || 930 ASCII.isUpper(nextChar) || 931 ASCII.isDigit(nextChar)) { 932 gsb.append(nextChar); 933 cursor++; 934 } else { 935 break; 936 } 937 } 938 if (gsb.length() == 0) 939 throw new IllegalArgumentException( 940 "named capturing group has 0 length name"); 941 if (nextChar != '}') 942 throw new IllegalArgumentException( 943 "named capturing group is missing trailing '}'"); 944 String gname = gsb.toString(); 945 if (ASCII.isDigit(gname.charAt(0))) 946 throw new IllegalArgumentException( 947 "capturing group name {" + gname + 948 "} starts with digit character"); 949 if (!parentPattern.namedGroups().containsKey(gname)) 950 throw new IllegalArgumentException( 951 "No group with name {" + gname + "}"); 952 refNum = parentPattern.namedGroups().get(gname); 953 cursor++; 954 } else { 955 // The first number is always a group 956 refNum = nextChar - '0'; 957 if ((refNum < 0) || (refNum > 9)) 958 throw new IllegalArgumentException( 959 "Illegal group reference"); 960 cursor++; 961 // Capture the largest legal group string 962 boolean done = false; 963 while (!done) { 964 if (cursor >= replacement.length()) { 965 break; 966 } 967 int nextDigit = replacement.charAt(cursor) - '0'; 968 if ((nextDigit < 0) || (nextDigit > 9)) { // not a number 969 break; 970 } 971 int newRefNum = (refNum * 10) + nextDigit; 972 if (groupCount() < newRefNum) { 973 done = true; 974 } else { 975 refNum = newRefNum; 976 cursor++; 977 } 978 } 979 } 980 // Append group 981 if (start(refNum) != -1 && end(refNum) != -1) 982 result.append(text, start(refNum), end(refNum)); 983 } else { 984 result.append(nextChar); 985 cursor++; 986 } 987 } 988 return result; 989 } 990 991 /** 992 * Implements a terminal append-and-replace step. 993 * 994 * <p> This method reads characters from the input sequence, starting at 995 * the append position, and appends them to the given string buffer. It is 996 * intended to be invoked after one or more invocations of the {@link 997 * #appendReplacement appendReplacement} method in order to copy the 998 * remainder of the input sequence. </p> 999 * 1000 * @param sb 1001 * The target string buffer 1002 * 1003 * @return The target string buffer 1004 */ 1005 public StringBuffer appendTail(StringBuffer sb) { 1006 sb.append(text, lastAppendPosition, getTextLength()); 1007 return sb; 1008 } 1009 1010 /** 1011 * Implements a terminal append-and-replace step. 1012 * 1013 * <p> This method reads characters from the input sequence, starting at 1014 * the append position, and appends them to the given string builder. It is 1015 * intended to be invoked after one or more invocations of the {@link 1016 * #appendReplacement appendReplacement} method in order to copy the 1017 * remainder of the input sequence. </p> 1018 * 1019 * @param sb 1020 * The target string builder 1021 * 1022 * @return The target string builder 1023 * 1024 * @since 1.9 1025 */ 1026 public StringBuilder appendTail(StringBuilder sb) { 1027 sb.append(text, lastAppendPosition, getTextLength()); 1028 return sb; 1029 } 1030 1031 /** 1032 * Replaces every subsequence of the input sequence that matches the 1033 * pattern with the given replacement string. 1034 * 1035 * <p> This method first resets this matcher. It then scans the input 1036 * sequence looking for matches of the pattern. Characters that are not 1037 * part of any match are appended directly to the result string; each match 1038 * is replaced in the result by the replacement string. The replacement 1039 * string may contain references to captured subsequences as in the {@link 1040 * #appendReplacement appendReplacement} method. 1041 * 1042 * <p> Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in 1043 * the replacement string may cause the results to be different than if it 1044 * were being treated as a literal replacement string. Dollar signs may be 1045 * treated as references to captured subsequences as described above, and 1046 * backslashes are used to escape literal characters in the replacement 1047 * string. 1048 * 1049 * <p> Given the regular expression <tt>a*b</tt>, the input 1050 * <tt>"aabfooaabfooabfoob"</tt>, and the replacement string 1051 * <tt>"-"</tt>, an invocation of this method on a matcher for that 1052 * expression would yield the string <tt>"-foo-foo-foo-"</tt>. 1053 * 1054 * <p> Invoking this method changes this matcher's state. If the matcher 1055 * is to be used in further matching operations then it should first be 1056 * reset. </p> 1057 * 1058 * @param replacement 1059 * The replacement string 1060 * 1061 * @return The string constructed by replacing each matching subsequence 1062 * by the replacement string, substituting captured subsequences 1063 * as needed 1064 */ 1065 public String replaceAll(String replacement) { 1066 reset(); 1067 boolean result = find(); 1068 if (result) { 1069 StringBuilder sb = new StringBuilder(); 1070 do { 1071 appendReplacement(sb, replacement); 1072 result = find(); 1073 } while (result); 1074 appendTail(sb); 1075 return sb.toString(); 1076 } 1077 return text.toString(); 1078 } 1079 1080 /** 1081 * Replaces the first subsequence of the input sequence that matches the 1082 * pattern with the given replacement string. 1083 * 1084 * <p> This method first resets this matcher. It then scans the input 1085 * sequence looking for a match of the pattern. Characters that are not 1086 * part of the match are appended directly to the result string; the match 1087 * is replaced in the result by the replacement string. The replacement 1088 * string may contain references to captured subsequences as in the {@link 1089 * #appendReplacement appendReplacement} method. 1090 * 1091 * <p>Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in 1092 * the replacement string may cause the results to be different than if it 1093 * were being treated as a literal replacement string. Dollar signs may be 1094 * treated as references to captured subsequences as described above, and 1095 * backslashes are used to escape literal characters in the replacement 1096 * string. 1097 * 1098 * <p> Given the regular expression <tt>dog</tt>, the input 1099 * <tt>"zzzdogzzzdogzzz"</tt>, and the replacement string 1100 * <tt>"cat"</tt>, an invocation of this method on a matcher for that 1101 * expression would yield the string <tt>"zzzcatzzzdogzzz"</tt>. </p> 1102 * 1103 * <p> Invoking this method changes this matcher's state. If the matcher 1104 * is to be used in further matching operations then it should first be 1105 * reset. </p> 1106 * 1107 * @param replacement 1108 * The replacement string 1109 * @return The string constructed by replacing the first matching 1110 * subsequence by the replacement string, substituting captured 1111 * subsequences as needed 1112 */ 1113 public String replaceFirst(String replacement) { 1114 if (replacement == null) 1115 throw new NullPointerException("replacement"); 1116 reset(); 1117 if (!find()) 1118 return text.toString(); 1119 StringBuilder sb = new StringBuilder(); 1120 appendReplacement(sb, replacement); 1121 appendTail(sb); 1122 return sb.toString(); 1123 } 1124 1125 /** 1126 * Sets the limits of this matcher's region. The region is the part of the 1127 * input sequence that will be searched to find a match. Invoking this 1128 * method resets the matcher, and then sets the region to start at the 1129 * index specified by the <code>start</code> parameter and end at the 1130 * index specified by the <code>end</code> parameter. 1131 * 1132 * <p>Depending on the transparency and anchoring being used (see 1133 * {@link #useTransparentBounds useTransparentBounds} and 1134 * {@link #useAnchoringBounds useAnchoringBounds}), certain constructs such 1135 * as anchors may behave differently at or around the boundaries of the 1136 * region. 1137 * 1138 * @param start 1139 * The index to start searching at (inclusive) 1140 * @param end 1141 * The index to end searching at (exclusive) 1142 * @throws IndexOutOfBoundsException 1143 * If start or end is less than zero, if 1144 * start is greater than the length of the input sequence, if 1145 * end is greater than the length of the input sequence, or if 1146 * start is greater than end. 1147 * @return this matcher 1148 * @since 1.5 1149 */ 1150 public Matcher region(int start, int end) { 1151 if ((start < 0) || (start > getTextLength())) 1152 throw new IndexOutOfBoundsException("start"); 1153 if ((end < 0) || (end > getTextLength())) 1154 throw new IndexOutOfBoundsException("end"); 1155 if (start > end) 1156 throw new IndexOutOfBoundsException("start > end"); 1157 reset(); 1158 from = start; 1159 to = end; 1160 return this; 1161 } 1162 1163 /** 1164 * Reports the start index of this matcher's region. The 1165 * searches this matcher conducts are limited to finding matches 1166 * within {@link #regionStart regionStart} (inclusive) and 1167 * {@link #regionEnd regionEnd} (exclusive). 1168 * 1169 * @return The starting point of this matcher's region 1170 * @since 1.5 1171 */ 1172 public int regionStart() { 1173 return from; 1174 } 1175 1176 /** 1177 * Reports the end index (exclusive) of this matcher's region. 1178 * The searches this matcher conducts are limited to finding matches 1179 * within {@link #regionStart regionStart} (inclusive) and 1180 * {@link #regionEnd regionEnd} (exclusive). 1181 * 1182 * @return the ending point of this matcher's region 1183 * @since 1.5 1184 */ 1185 public int regionEnd() { 1186 return to; 1187 } 1188 1189 /** 1190 * Queries the transparency of region bounds for this matcher. 1191 * 1192 * <p> This method returns <tt>true</tt> if this matcher uses 1193 * <i>transparent</i> bounds, <tt>false</tt> if it uses <i>opaque</i> 1194 * bounds. 1195 * 1196 * <p> See {@link #useTransparentBounds useTransparentBounds} for a 1197 * description of transparent and opaque bounds. 1198 * 1199 * <p> By default, a matcher uses opaque region boundaries. 1200 * 1201 * @return <tt>true</tt> iff this matcher is using transparent bounds, 1202 * <tt>false</tt> otherwise. 1203 * @see java.util.regex.Matcher#useTransparentBounds(boolean) 1204 * @since 1.5 1205 */ 1206 public boolean hasTransparentBounds() { 1207 return transparentBounds; 1208 } 1209 1210 /** 1211 * Sets the transparency of region bounds for this matcher. 1212 * 1213 * <p> Invoking this method with an argument of <tt>true</tt> will set this 1214 * matcher to use <i>transparent</i> bounds. If the boolean 1215 * argument is <tt>false</tt>, then <i>opaque</i> bounds will be used. 1216 * 1217 * <p> Using transparent bounds, the boundaries of this 1218 * matcher's region are transparent to lookahead, lookbehind, 1219 * and boundary matching constructs. Those constructs can see beyond the 1220 * boundaries of the region to see if a match is appropriate. 1221 * 1222 * <p> Using opaque bounds, the boundaries of this matcher's 1223 * region are opaque to lookahead, lookbehind, and boundary matching 1224 * constructs that may try to see beyond them. Those constructs cannot 1225 * look past the boundaries so they will fail to match anything outside 1226 * of the region. 1227 * 1228 * <p> By default, a matcher uses opaque bounds. 1229 * 1230 * @param b a boolean indicating whether to use opaque or transparent 1231 * regions 1232 * @return this matcher 1233 * @see java.util.regex.Matcher#hasTransparentBounds 1234 * @since 1.5 1235 */ 1236 public Matcher useTransparentBounds(boolean b) { 1237 transparentBounds = b; 1238 return this; 1239 } 1240 1241 /** 1242 * Queries the anchoring of region bounds for this matcher. 1243 * 1244 * <p> This method returns <tt>true</tt> if this matcher uses 1245 * <i>anchoring</i> bounds, <tt>false</tt> otherwise. 1246 * 1247 * <p> See {@link #useAnchoringBounds useAnchoringBounds} for a 1248 * description of anchoring bounds. 1249 * 1250 * <p> By default, a matcher uses anchoring region boundaries. 1251 * 1252 * @return <tt>true</tt> iff this matcher is using anchoring bounds, 1253 * <tt>false</tt> otherwise. 1254 * @see java.util.regex.Matcher#useAnchoringBounds(boolean) 1255 * @since 1.5 1256 */ 1257 public boolean hasAnchoringBounds() { 1258 return anchoringBounds; 1259 } 1260 1261 /** 1262 * Sets the anchoring of region bounds for this matcher. 1263 * 1264 * <p> Invoking this method with an argument of <tt>true</tt> will set this 1265 * matcher to use <i>anchoring</i> bounds. If the boolean 1266 * argument is <tt>false</tt>, then <i>non-anchoring</i> bounds will be 1267 * used. 1268 * 1269 * <p> Using anchoring bounds, the boundaries of this 1270 * matcher's region match anchors such as ^ and $. 1271 * 1272 * <p> Without anchoring bounds, the boundaries of this 1273 * matcher's region will not match anchors such as ^ and $. 1274 * 1275 * <p> By default, a matcher uses anchoring region boundaries. 1276 * 1277 * @param b a boolean indicating whether or not to use anchoring bounds. 1278 * @return this matcher 1279 * @see java.util.regex.Matcher#hasAnchoringBounds 1280 * @since 1.5 1281 */ 1282 public Matcher useAnchoringBounds(boolean b) { 1283 anchoringBounds = b; 1284 return this; 1285 } 1286 1287 /** 1288 * <p>Returns the string representation of this matcher. The 1289 * string representation of a <code>Matcher</code> contains information 1290 * that may be useful for debugging. The exact format is unspecified. 1291 * 1292 * @return The string representation of this matcher 1293 * @since 1.5 1294 */ 1295 public String toString() { 1296 StringBuilder sb = new StringBuilder(); 1297 sb.append("java.util.regex.Matcher"); 1298 sb.append("[pattern=" + pattern()); 1299 sb.append(" region="); 1300 sb.append(regionStart() + "," + regionEnd()); 1301 sb.append(" lastmatch="); 1302 if ((first >= 0) && (group() != null)) { 1303 sb.append(group()); 1304 } 1305 sb.append("]"); 1306 return sb.toString(); 1307 } 1308 1309 /** 1310 * <p>Returns true if the end of input was hit by the search engine in 1311 * the last match operation performed by this matcher. 1312 * 1313 * <p>When this method returns true, then it is possible that more input 1314 * would have changed the result of the last search. 1315 * 1316 * @return true iff the end of input was hit in the last match; false 1317 * otherwise 1318 * @since 1.5 1319 */ 1320 public boolean hitEnd() { 1321 return hitEnd; 1322 } 1323 1324 /** 1325 * <p>Returns true if more input could change a positive match into a 1326 * negative one. 1327 * 1328 * <p>If this method returns true, and a match was found, then more 1329 * input could cause the match to be lost. If this method returns false 1330 * and a match was found, then more input might change the match but the 1331 * match won't be lost. If a match was not found, then requireEnd has no 1332 * meaning. 1333 * 1334 * @return true iff more input could change a positive match into a 1335 * negative one. 1336 * @since 1.5 1337 */ 1338 public boolean requireEnd() { 1339 return requireEnd; 1340 } 1341 1342 /** 1343 * Initiates a search to find a Pattern within the given bounds. 1344 * The groups are filled with default values and the match of the root 1345 * of the state machine is called. The state machine will hold the state 1346 * of the match as it proceeds in this matcher. 1347 * 1348 * Matcher.from is not set here, because it is the "hard" boundary 1349 * of the start of the search which anchors will set to. The from param 1350 * is the "soft" boundary of the start of the search, meaning that the 1351 * regex tries to match at that index but ^ won't match there. Subsequent 1352 * calls to the search methods start at a new "soft" boundary which is 1353 * the end of the previous match. 1354 */ 1355 boolean search(int from) { 1356 this.hitEnd = false; 1357 this.requireEnd = false; 1358 from = from < 0 ? 0 : from; 1359 this.first = from; 1360 this.oldLast = oldLast < 0 ? from : oldLast; 1361 for (int i = 0; i < groups.length; i++) 1362 groups[i] = -1; 1363 acceptMode = NOANCHOR; 1364 boolean result = parentPattern.root.match(this, from, text); 1365 if (!result) 1366 this.first = -1; 1367 this.oldLast = this.last; 1368 return result; 1369 } 1370 1371 /** 1372 * Initiates a search for an anchored match to a Pattern within the given 1373 * bounds. The groups are filled with default values and the match of the 1374 * root of the state machine is called. The state machine will hold the 1375 * state of the match as it proceeds in this matcher. 1376 */ 1377 boolean match(int from, int anchor) { 1378 this.hitEnd = false; 1379 this.requireEnd = false; 1380 from = from < 0 ? 0 : from; 1381 this.first = from; 1382 this.oldLast = oldLast < 0 ? from : oldLast; 1383 for (int i = 0; i < groups.length; i++) 1384 groups[i] = -1; 1385 acceptMode = anchor; 1386 boolean result = parentPattern.matchRoot.match(this, from, text); 1387 if (!result) 1388 this.first = -1; 1389 this.oldLast = this.last; 1390 return result; 1391 } 1392 1393 /** 1394 * Returns the end index of the text. 1395 * 1396 * @return the index after the last character in the text 1397 */ 1398 int getTextLength() { 1399 return text.length(); 1400 } 1401 1402 /** 1403 * Generates a String from this Matcher's input in the specified range. 1404 * 1405 * @param beginIndex the beginning index, inclusive 1406 * @param endIndex the ending index, exclusive 1407 * @return A String generated from this Matcher's input 1408 */ 1409 CharSequence getSubSequence(int beginIndex, int endIndex) { 1410 return text.subSequence(beginIndex, endIndex); 1411 } 1412 1413 /** 1414 * Returns this Matcher's input character at index i. 1415 * 1416 * @return A char from the specified index 1417 */ 1418 char charAt(int i) { 1419 return text.charAt(i); 1420 } 1421 1422 /** 1423 * Returns the group index of the matched capturing group. 1424 * 1425 * @return the index of the named-capturing group 1426 */ 1427 int getMatchedGroupIndex(String name) { 1428 Objects.requireNonNull(name, "Group name"); 1429 if (first < 0) 1430 throw new IllegalStateException("No match found"); 1431 if (!parentPattern.namedGroups().containsKey(name)) 1432 throw new IllegalArgumentException("No group with name <" + name + ">"); 1433 return parentPattern.namedGroups().get(name); 1434 } 1435 }