1 /*
   2  * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.util.regex;
  27 
  28 import java.util.ConcurrentModificationException;
  29 import java.util.Iterator;
  30 import java.util.NoSuchElementException;
  31 import java.util.Objects;
  32 import java.util.Spliterator;
  33 import java.util.Spliterators;
  34 import java.util.function.Consumer;
  35 import java.util.function.Function;
  36 import java.util.stream.Stream;
  37 import java.util.stream.StreamSupport;
  38 
  39 /**
  40  * An engine that performs match operations on a {@linkplain java.lang.CharSequence
  41  * character sequence} by interpreting a {@link Pattern}.
  42  *
  43  * <p> A matcher is created from a pattern by invoking the pattern's {@link
  44  * Pattern#matcher matcher} method.  Once created, a matcher can be used to
  45  * perform three different kinds of match operations:
  46  *
  47  * <ul>
  48  *
  49  *   <li><p> The {@link #matches matches} method attempts to match the entire
  50  *   input sequence against the pattern.  </p></li>
  51  *
  52  *   <li><p> The {@link #lookingAt lookingAt} method attempts to match the
  53  *   input sequence, starting at the beginning, against the pattern.  </p></li>
  54  *
  55  *   <li><p> The {@link #find find} method scans the input sequence looking for
  56  *   the next subsequence that matches the pattern.  </p></li>
  57  *
  58  * </ul>
  59  *
  60  * <p> Each of these methods returns a boolean indicating success or failure.
  61  * More information about a successful match can be obtained by querying the
  62  * state of the matcher.
  63  *
  64  * <p> A matcher finds matches in a subset of its input called the
  65  * <i>region</i>. By default, the region contains all of the matcher's input.
  66  * The region can be modified via the {@link #region region} method and queried
  67  * via the {@link #regionStart regionStart} and {@link #regionEnd regionEnd}
  68  * methods. The way that the region boundaries interact with some pattern
  69  * constructs can be changed. See {@link #useAnchoringBounds
  70  * useAnchoringBounds} and {@link #useTransparentBounds useTransparentBounds}
  71  * for more details.
  72  *
  73  * <p> This class also defines methods for replacing matched subsequences with
  74  * new strings whose contents can, if desired, be computed from the match
  75  * result.  The {@link #appendReplacement appendReplacement} and {@link
  76  * #appendTail appendTail} methods can be used in tandem in order to collect
  77  * the result into an existing string buffer or string builder. Alternatively,
  78  * the more convenient {@link #replaceAll replaceAll} method can be used to
  79  * create a string in which every matching subsequence in the input sequence
  80  * is replaced.
  81  *
  82  * <p> The explicit state of a matcher includes the start and end indices of
  83  * the most recent successful match.  It also includes the start and end
  84  * indices of the input subsequence captured by each <a
  85  * href="Pattern.html#cg">capturing group</a> in the pattern as well as a total
  86  * count of such subsequences.  As a convenience, methods are also provided for
  87  * returning these captured subsequences in string form.
  88  *
  89  * <p> The explicit state of a matcher is initially undefined; attempting to
  90  * query any part of it before a successful match will cause an {@link
  91  * IllegalStateException} to be thrown.  The explicit state of a matcher is
  92  * recomputed by every match operation.
  93  *
  94  * <p> The implicit state of a matcher includes the input character sequence as
  95  * well as the <i>append position</i>, which is initially zero and is updated
  96  * by the {@link #appendReplacement appendReplacement} method.
  97  *
  98  * <p> A matcher may be reset explicitly by invoking its {@link #reset()}
  99  * method or, if a new input sequence is desired, its {@link
 100  * #reset(java.lang.CharSequence) reset(CharSequence)} method.  Resetting a
 101  * matcher discards its explicit state information and sets the append position
 102  * to zero.
 103  *
 104  * <p> Instances of this class are not safe for use by multiple concurrent
 105  * threads. </p>
 106  *
 107  *
 108  * @author      Mike McCloskey
 109  * @author      Mark Reinhold
 110  * @author      JSR-51 Expert Group
 111  * @since       1.4
 112  * @spec        JSR-51
 113  */
 114 
 115 public final class Matcher implements MatchResult {
 116 
 117     /**
 118      * The Pattern object that created this Matcher.
 119      */
 120     Pattern parentPattern;
 121 
 122     /**
 123      * The storage used by groups. They may contain invalid values if
 124      * a group was skipped during the matching.
 125      */
 126     int[] groups;
 127 
 128     /**
 129      * The range within the sequence that is to be matched. Anchors
 130      * will match at these "hard" boundaries. Changing the region
 131      * changes these values.
 132      */
 133     int from, to;
 134 
 135     /**
 136      * Lookbehind uses this value to ensure that the subexpression
 137      * match ends at the point where the lookbehind was encountered.
 138      */
 139     int lookbehindTo;
 140 
 141     /**
 142      * The original string being matched.
 143      */
 144     CharSequence text;
 145 
 146     /**
 147      * Matcher state used by the last node. NOANCHOR is used when a
 148      * match does not have to consume all of the input. ENDANCHOR is
 149      * the mode used for matching all the input.
 150      */
 151     static final int ENDANCHOR = 1;
 152     static final int NOANCHOR = 0;
 153     int acceptMode = NOANCHOR;
 154 
 155     /**
 156      * The range of string that last matched the pattern. If the last
 157      * match failed then first is -1; last initially holds 0 then it
 158      * holds the index of the end of the last match (which is where the
 159      * next search starts).
 160      */
 161     int first = -1, last = 0;
 162 
 163     /**
 164      * The end index of what matched in the last match operation.
 165      */
 166     int oldLast = -1;
 167 
 168     /**
 169      * The index of the last position appended in a substitution.
 170      */
 171     int lastAppendPosition = 0;
 172 
 173     /**
 174      * Storage used by nodes to tell what repetition they are on in
 175      * a pattern, and where groups begin. The nodes themselves are stateless,
 176      * so they rely on this field to hold state during a match.
 177      */
 178     int[] locals;
 179 
 180     /**
 181      * Boolean indicating whether or not more input could change
 182      * the results of the last match.
 183      *
 184      * If hitEnd is true, and a match was found, then more input
 185      * might cause a different match to be found.
 186      * If hitEnd is true and a match was not found, then more
 187      * input could cause a match to be found.
 188      * If hitEnd is false and a match was found, then more input
 189      * will not change the match.
 190      * If hitEnd is false and a match was not found, then more
 191      * input will not cause a match to be found.
 192      */
 193     boolean hitEnd;
 194 
 195     /**
 196      * Boolean indicating whether or not more input could change
 197      * a positive match into a negative one.
 198      *
 199      * If requireEnd is true, and a match was found, then more
 200      * input could cause the match to be lost.
 201      * If requireEnd is false and a match was found, then more
 202      * input might change the match but the match won't be lost.
 203      * If a match was not found, then requireEnd has no meaning.
 204      */
 205     boolean requireEnd;
 206 
 207     /**
 208      * If transparentBounds is true then the boundaries of this
 209      * matcher's region are transparent to lookahead, lookbehind,
 210      * and boundary matching constructs that try to see beyond them.
 211      */
 212     boolean transparentBounds = false;
 213 
 214     /**
 215      * If anchoringBounds is true then the boundaries of this
 216      * matcher's region match anchors such as ^ and $.
 217      */
 218     boolean anchoringBounds = true;
 219 
 220     /**
 221      * Number of times this matcher's state has been modified
 222      */
 223     int modCount;
 224 
 225     /**
 226      * No default constructor.
 227      */
 228     Matcher() {
 229     }
 230 
 231     /**
 232      * All matchers have the state used by Pattern during a match.
 233      */
 234     Matcher(Pattern parent, CharSequence text) {
 235         this.parentPattern = parent;
 236         this.text = text;
 237 
 238         // Allocate state storage
 239         int parentGroupCount = Math.max(parent.capturingGroupCount, 10);
 240         groups = new int[parentGroupCount * 2];
 241         locals = new int[parent.localCount];
 242 
 243         // Put fields into initial states
 244         reset();
 245     }
 246 
 247     /**
 248      * Returns the pattern that is interpreted by this matcher.
 249      *
 250      * @return  The pattern for which this matcher was created
 251      */
 252     public Pattern pattern() {
 253         return parentPattern;
 254     }
 255 
 256     /**
 257      * Returns the match state of this matcher as a {@link MatchResult}.
 258      * The result is unaffected by subsequent operations performed upon this
 259      * matcher.
 260      *
 261      * @return  a <code>MatchResult</code> with the state of this matcher
 262      * @since 1.5
 263      */
 264     public MatchResult toMatchResult() {
 265         return toMatchResult(text.toString());
 266     }
 267 
 268     private MatchResult toMatchResult(String text) {
 269         return new ImmutableMatchResult(this.first,
 270                                         this.last,
 271                                         groupCount(),
 272                                         this.groups.clone(),
 273                                         text);
 274     }
 275 
 276     private static class ImmutableMatchResult implements MatchResult {
 277         private final int first;
 278         private final int last;
 279         private final int[] groups;
 280         private final int groupCount;
 281         private final String text;
 282 
 283         ImmutableMatchResult(int first, int last, int groupCount,
 284                              int groups[], String text)
 285         {
 286             this.first = first;
 287             this.last = last;
 288             this.groupCount = groupCount;
 289             this.groups = groups;
 290             this.text = text;
 291         }
 292 
 293         @Override
 294         public int start() {
 295             checkMatch();
 296             return first;
 297         }
 298 
 299         @Override
 300         public int start(int group) {
 301             checkMatch();
 302             if (group < 0 || group > groupCount)
 303                 throw new IndexOutOfBoundsException("No group " + group);
 304             return groups[group * 2];
 305         }
 306 
 307         @Override
 308         public int end() {
 309             checkMatch();
 310             return last;
 311         }
 312 
 313         @Override
 314         public int end(int group) {
 315             checkMatch();
 316             if (group < 0 || group > groupCount)
 317                 throw new IndexOutOfBoundsException("No group " + group);
 318             return groups[group * 2 + 1];
 319         }
 320 
 321         @Override
 322         public int groupCount() {
 323             return groupCount;
 324         }
 325 
 326         @Override
 327         public String group() {
 328             checkMatch();
 329             return group(0);
 330         }
 331 
 332         @Override
 333         public String group(int group) {
 334             checkMatch();
 335             if (group < 0 || group > groupCount)
 336                 throw new IndexOutOfBoundsException("No group " + group);
 337             if ((groups[group*2] == -1) || (groups[group*2+1] == -1))
 338                 return null;
 339             return text.subSequence(groups[group * 2], groups[group * 2 + 1]).toString();
 340         }
 341 
 342         private void checkMatch() {
 343             if (first < 0)
 344                 throw new IllegalStateException("No match found");
 345 
 346         }
 347     }
 348 
 349     /**
 350       * Changes the <tt>Pattern</tt> that this <tt>Matcher</tt> uses to
 351       * find matches with.
 352       *
 353       * <p> This method causes this matcher to lose information
 354       * about the groups of the last match that occurred. The
 355       * matcher's position in the input is maintained and its
 356       * last append position is unaffected.</p>
 357       *
 358       * @param  newPattern
 359       *         The new pattern used by this matcher
 360       * @return  This matcher
 361       * @throws  IllegalArgumentException
 362       *          If newPattern is <tt>null</tt>
 363       * @since 1.5
 364       */
 365     public Matcher usePattern(Pattern newPattern) {
 366         if (newPattern == null)
 367             throw new IllegalArgumentException("Pattern cannot be null");
 368         parentPattern = newPattern;
 369 
 370         // Reallocate state storage
 371         int parentGroupCount = Math.max(newPattern.capturingGroupCount, 10);
 372         groups = new int[parentGroupCount * 2];
 373         locals = new int[newPattern.localCount];
 374         for (int i = 0; i < groups.length; i++)
 375             groups[i] = -1;
 376         for (int i = 0; i < locals.length; i++)
 377             locals[i] = -1;
 378         modCount++;
 379         return this;
 380     }
 381 
 382     /**
 383      * Resets this matcher.
 384      *
 385      * <p> Resetting a matcher discards all of its explicit state information
 386      * and sets its append position to zero. The matcher's region is set to the
 387      * default region, which is its entire character sequence. The anchoring
 388      * and transparency of this matcher's region boundaries are unaffected.
 389      *
 390      * @return  This matcher
 391      */
 392     public Matcher reset() {
 393         first = -1;
 394         last = 0;
 395         oldLast = -1;
 396         for(int i=0; i<groups.length; i++)
 397             groups[i] = -1;
 398         for(int i=0; i<locals.length; i++)
 399             locals[i] = -1;
 400         lastAppendPosition = 0;
 401         from = 0;
 402         to = getTextLength();
 403         modCount++;
 404         return this;
 405     }
 406 
 407     /**
 408      * Resets this matcher with a new input sequence.
 409      *
 410      * <p> Resetting a matcher discards all of its explicit state information
 411      * and sets its append position to zero.  The matcher's region is set to
 412      * the default region, which is its entire character sequence.  The
 413      * anchoring and transparency of this matcher's region boundaries are
 414      * unaffected.
 415      *
 416      * @param  input
 417      *         The new input character sequence
 418      *
 419      * @return  This matcher
 420      */
 421     public Matcher reset(CharSequence input) {
 422         text = input;
 423         return reset();
 424     }
 425 
 426     /**
 427      * Returns the start index of the previous match.
 428      *
 429      * @return  The index of the first character matched
 430      *
 431      * @throws  IllegalStateException
 432      *          If no match has yet been attempted,
 433      *          or if the previous match operation failed
 434      */
 435     public int start() {
 436         if (first < 0)
 437             throw new IllegalStateException("No match available");
 438         return first;
 439     }
 440 
 441     /**
 442      * Returns the start index of the subsequence captured by the given group
 443      * during the previous match operation.
 444      *
 445      * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left
 446      * to right, starting at one.  Group zero denotes the entire pattern, so
 447      * the expression <i>m.</i><tt>start(0)</tt> is equivalent to
 448      * <i>m.</i><tt>start()</tt>.  </p>
 449      *
 450      * @param  group
 451      *         The index of a capturing group in this matcher's pattern
 452      *
 453      * @return  The index of the first character captured by the group,
 454      *          or <tt>-1</tt> if the match was successful but the group
 455      *          itself did not match anything
 456      *
 457      * @throws  IllegalStateException
 458      *          If no match has yet been attempted,
 459      *          or if the previous match operation failed
 460      *
 461      * @throws  IndexOutOfBoundsException
 462      *          If there is no capturing group in the pattern
 463      *          with the given index
 464      */
 465     public int start(int group) {
 466         if (first < 0)
 467             throw new IllegalStateException("No match available");
 468         if (group < 0 || group > groupCount())
 469             throw new IndexOutOfBoundsException("No group " + group);
 470         return groups[group * 2];
 471     }
 472 
 473     /**
 474      * Returns the start index of the subsequence captured by the given
 475      * <a href="Pattern.html#groupname">named-capturing group</a> during the
 476      * previous match operation.
 477      *
 478      * @param  name
 479      *         The name of a named-capturing group in this matcher's pattern
 480      *
 481      * @return  The index of the first character captured by the group,
 482      *          or {@code -1} if the match was successful but the group
 483      *          itself did not match anything
 484      *
 485      * @throws  IllegalStateException
 486      *          If no match has yet been attempted,
 487      *          or if the previous match operation failed
 488      *
 489      * @throws  IllegalArgumentException
 490      *          If there is no capturing group in the pattern
 491      *          with the given name
 492      * @since 1.8
 493      */
 494     public int start(String name) {
 495         return groups[getMatchedGroupIndex(name) * 2];
 496     }
 497 
 498     /**
 499      * Returns the offset after the last character matched.
 500      *
 501      * @return  The offset after the last character matched
 502      *
 503      * @throws  IllegalStateException
 504      *          If no match has yet been attempted,
 505      *          or if the previous match operation failed
 506      */
 507     public int end() {
 508         if (first < 0)
 509             throw new IllegalStateException("No match available");
 510         return last;
 511     }
 512 
 513     /**
 514      * Returns the offset after the last character of the subsequence
 515      * captured by the given group during the previous match operation.
 516      *
 517      * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left
 518      * to right, starting at one.  Group zero denotes the entire pattern, so
 519      * the expression <i>m.</i><tt>end(0)</tt> is equivalent to
 520      * <i>m.</i><tt>end()</tt>.  </p>
 521      *
 522      * @param  group
 523      *         The index of a capturing group in this matcher's pattern
 524      *
 525      * @return  The offset after the last character captured by the group,
 526      *          or <tt>-1</tt> if the match was successful
 527      *          but the group itself did not match anything
 528      *
 529      * @throws  IllegalStateException
 530      *          If no match has yet been attempted,
 531      *          or if the previous match operation failed
 532      *
 533      * @throws  IndexOutOfBoundsException
 534      *          If there is no capturing group in the pattern
 535      *          with the given index
 536      */
 537     public int end(int group) {
 538         if (first < 0)
 539             throw new IllegalStateException("No match available");
 540         if (group < 0 || group > groupCount())
 541             throw new IndexOutOfBoundsException("No group " + group);
 542         return groups[group * 2 + 1];
 543     }
 544 
 545     /**
 546      * Returns the offset after the last character of the subsequence
 547      * captured by the given <a href="Pattern.html#groupname">named-capturing
 548      * group</a> during the previous match operation.
 549      *
 550      * @param  name
 551      *         The name of a named-capturing group in this matcher's pattern
 552      *
 553      * @return  The offset after the last character captured by the group,
 554      *          or {@code -1} if the match was successful
 555      *          but the group itself did not match anything
 556      *
 557      * @throws  IllegalStateException
 558      *          If no match has yet been attempted,
 559      *          or if the previous match operation failed
 560      *
 561      * @throws  IllegalArgumentException
 562      *          If there is no capturing group in the pattern
 563      *          with the given name
 564      * @since 1.8
 565      */
 566     public int end(String name) {
 567         return groups[getMatchedGroupIndex(name) * 2 + 1];
 568     }
 569 
 570     /**
 571      * Returns the input subsequence matched by the previous match.
 572      *
 573      * <p> For a matcher <i>m</i> with input sequence <i>s</i>,
 574      * the expressions <i>m.</i><tt>group()</tt> and
 575      * <i>s.</i><tt>substring(</tt><i>m.</i><tt>start(),</tt>&nbsp;<i>m.</i><tt>end())</tt>
 576      * are equivalent.  </p>
 577      *
 578      * <p> Note that some patterns, for example <tt>a*</tt>, match the empty
 579      * string.  This method will return the empty string when the pattern
 580      * successfully matches the empty string in the input.  </p>
 581      *
 582      * @return The (possibly empty) subsequence matched by the previous match,
 583      *         in string form
 584      *
 585      * @throws  IllegalStateException
 586      *          If no match has yet been attempted,
 587      *          or if the previous match operation failed
 588      */
 589     public String group() {
 590         return group(0);
 591     }
 592 
 593     /**
 594      * Returns the input subsequence captured by the given group during the
 595      * previous match operation.
 596      *
 597      * <p> For a matcher <i>m</i>, input sequence <i>s</i>, and group index
 598      * <i>g</i>, the expressions <i>m.</i><tt>group(</tt><i>g</i><tt>)</tt> and
 599      * <i>s.</i><tt>substring(</tt><i>m.</i><tt>start(</tt><i>g</i><tt>),</tt>&nbsp;<i>m.</i><tt>end(</tt><i>g</i><tt>))</tt>
 600      * are equivalent.  </p>
 601      *
 602      * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left
 603      * to right, starting at one.  Group zero denotes the entire pattern, so
 604      * the expression <tt>m.group(0)</tt> is equivalent to <tt>m.group()</tt>.
 605      * </p>
 606      *
 607      * <p> If the match was successful but the group specified failed to match
 608      * any part of the input sequence, then <tt>null</tt> is returned. Note
 609      * that some groups, for example <tt>(a*)</tt>, match the empty string.
 610      * This method will return the empty string when such a group successfully
 611      * matches the empty string in the input.  </p>
 612      *
 613      * @param  group
 614      *         The index of a capturing group in this matcher's pattern
 615      *
 616      * @return  The (possibly empty) subsequence captured by the group
 617      *          during the previous match, or <tt>null</tt> if the group
 618      *          failed to match part of the input
 619      *
 620      * @throws  IllegalStateException
 621      *          If no match has yet been attempted,
 622      *          or if the previous match operation failed
 623      *
 624      * @throws  IndexOutOfBoundsException
 625      *          If there is no capturing group in the pattern
 626      *          with the given index
 627      */
 628     public String group(int group) {
 629         if (first < 0)
 630             throw new IllegalStateException("No match found");
 631         if (group < 0 || group > groupCount())
 632             throw new IndexOutOfBoundsException("No group " + group);
 633         if ((groups[group*2] == -1) || (groups[group*2+1] == -1))
 634             return null;
 635         return getSubSequence(groups[group * 2], groups[group * 2 + 1]).toString();
 636     }
 637 
 638     /**
 639      * Returns the input subsequence captured by the given
 640      * <a href="Pattern.html#groupname">named-capturing group</a> during the previous
 641      * match operation.
 642      *
 643      * <p> If the match was successful but the group specified failed to match
 644      * any part of the input sequence, then <tt>null</tt> is returned. Note
 645      * that some groups, for example <tt>(a*)</tt>, match the empty string.
 646      * This method will return the empty string when such a group successfully
 647      * matches the empty string in the input.  </p>
 648      *
 649      * @param  name
 650      *         The name of a named-capturing group in this matcher's pattern
 651      *
 652      * @return  The (possibly empty) subsequence captured by the named group
 653      *          during the previous match, or <tt>null</tt> if the group
 654      *          failed to match part of the input
 655      *
 656      * @throws  IllegalStateException
 657      *          If no match has yet been attempted,
 658      *          or if the previous match operation failed
 659      *
 660      * @throws  IllegalArgumentException
 661      *          If there is no capturing group in the pattern
 662      *          with the given name
 663      * @since 1.7
 664      */
 665     public String group(String name) {
 666         int group = getMatchedGroupIndex(name);
 667         if ((groups[group*2] == -1) || (groups[group*2+1] == -1))
 668             return null;
 669         return getSubSequence(groups[group * 2], groups[group * 2 + 1]).toString();
 670     }
 671 
 672     /**
 673      * Returns the number of capturing groups in this matcher's pattern.
 674      *
 675      * <p> Group zero denotes the entire pattern by convention. It is not
 676      * included in this count.
 677      *
 678      * <p> Any non-negative integer smaller than or equal to the value
 679      * returned by this method is guaranteed to be a valid group index for
 680      * this matcher.  </p>
 681      *
 682      * @return The number of capturing groups in this matcher's pattern
 683      */
 684     public int groupCount() {
 685         return parentPattern.capturingGroupCount - 1;
 686     }
 687 
 688     /**
 689      * Attempts to match the entire region against the pattern.
 690      *
 691      * <p> If the match succeeds then more information can be obtained via the
 692      * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods.  </p>
 693      *
 694      * @return  <tt>true</tt> if, and only if, the entire region sequence
 695      *          matches this matcher's pattern
 696      */
 697     public boolean matches() {
 698         return match(from, ENDANCHOR);
 699     }
 700 
 701     /**
 702      * Attempts to find the next subsequence of the input sequence that matches
 703      * the pattern.
 704      *
 705      * <p> This method starts at the beginning of this matcher's region, or, if
 706      * a previous invocation of the method was successful and the matcher has
 707      * not since been reset, at the first character not matched by the previous
 708      * match.
 709      *
 710      * <p> If the match succeeds then more information can be obtained via the
 711      * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods.  </p>
 712      *
 713      * @return  <tt>true</tt> if, and only if, a subsequence of the input
 714      *          sequence matches this matcher's pattern
 715      */
 716     public boolean find() {
 717         int nextSearchIndex = last;
 718         if (nextSearchIndex == first)
 719             nextSearchIndex++;
 720 
 721         // If next search starts before region, start it at region
 722         if (nextSearchIndex < from)
 723             nextSearchIndex = from;
 724 
 725         // If next search starts beyond region then it fails
 726         if (nextSearchIndex > to) {
 727             for (int i = 0; i < groups.length; i++)
 728                 groups[i] = -1;
 729             return false;
 730         }
 731         return search(nextSearchIndex);
 732     }
 733 
 734     /**
 735      * Resets this matcher and then attempts to find the next subsequence of
 736      * the input sequence that matches the pattern, starting at the specified
 737      * index.
 738      *
 739      * <p> If the match succeeds then more information can be obtained via the
 740      * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods, and subsequent
 741      * invocations of the {@link #find()} method will start at the first
 742      * character not matched by this match.  </p>
 743      *
 744      * @param start the index to start searching for a match
 745      * @throws  IndexOutOfBoundsException
 746      *          If start is less than zero or if start is greater than the
 747      *          length of the input sequence.
 748      *
 749      * @return  <tt>true</tt> if, and only if, a subsequence of the input
 750      *          sequence starting at the given index matches this matcher's
 751      *          pattern
 752      */
 753     public boolean find(int start) {
 754         int limit = getTextLength();
 755         if ((start < 0) || (start > limit))
 756             throw new IndexOutOfBoundsException("Illegal start index");
 757         reset();
 758         return search(start);
 759     }
 760 
 761     /**
 762      * Attempts to match the input sequence, starting at the beginning of the
 763      * region, against the pattern.
 764      *
 765      * <p> Like the {@link #matches matches} method, this method always starts
 766      * at the beginning of the region; unlike that method, it does not
 767      * require that the entire region be matched.
 768      *
 769      * <p> If the match succeeds then more information can be obtained via the
 770      * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods.  </p>
 771      *
 772      * @return  <tt>true</tt> if, and only if, a prefix of the input
 773      *          sequence matches this matcher's pattern
 774      */
 775     public boolean lookingAt() {
 776         return match(from, NOANCHOR);
 777     }
 778 
 779     /**
 780      * Returns a literal replacement <code>String</code> for the specified
 781      * <code>String</code>.
 782      *
 783      * This method produces a <code>String</code> that will work
 784      * as a literal replacement <code>s</code> in the
 785      * <code>appendReplacement</code> method of the {@link Matcher} class.
 786      * The <code>String</code> produced will match the sequence of characters
 787      * in <code>s</code> treated as a literal sequence. Slashes ('\') and
 788      * dollar signs ('$') will be given no special meaning.
 789      *
 790      * @param  s The string to be literalized
 791      * @return  A literal string replacement
 792      * @since 1.5
 793      */
 794     public static String quoteReplacement(String s) {
 795         if ((s.indexOf('\\') == -1) && (s.indexOf('$') == -1))
 796             return s;
 797         StringBuilder sb = new StringBuilder();
 798         for (int i=0; i<s.length(); i++) {
 799             char c = s.charAt(i);
 800             if (c == '\\' || c == '$') {
 801                 sb.append('\\');
 802             }
 803             sb.append(c);
 804         }
 805         return sb.toString();
 806     }
 807 
 808     /**
 809      * Implements a non-terminal append-and-replace step.
 810      *
 811      * <p> This method performs the following actions: </p>
 812      *
 813      * <ol>
 814      *
 815      *   <li><p> It reads characters from the input sequence, starting at the
 816      *   append position, and appends them to the given string buffer.  It
 817      *   stops after reading the last character preceding the previous match,
 818      *   that is, the character at index {@link
 819      *   #start()}&nbsp;<tt>-</tt>&nbsp;<tt>1</tt>.  </p></li>
 820      *
 821      *   <li><p> It appends the given replacement string to the string buffer.
 822      *   </p></li>
 823      *
 824      *   <li><p> It sets the append position of this matcher to the index of
 825      *   the last character matched, plus one, that is, to {@link #end()}.
 826      *   </p></li>
 827      *
 828      * </ol>
 829      *
 830      * <p> The replacement string may contain references to subsequences
 831      * captured during the previous match: Each occurrence of
 832      * <tt>${</tt><i>name</i><tt>}</tt> or <tt>$</tt><i>g</i>
 833      * will be replaced by the result of evaluating the corresponding
 834      * {@link #group(String) group(name)} or {@link #group(int) group(g)}
 835      * respectively. For  <tt>$</tt><i>g</i>,
 836      * the first number after the <tt>$</tt> is always treated as part of
 837      * the group reference. Subsequent numbers are incorporated into g if
 838      * they would form a legal group reference. Only the numerals '0'
 839      * through '9' are considered as potential components of the group
 840      * reference. If the second group matched the string <tt>"foo"</tt>, for
 841      * example, then passing the replacement string <tt>"$2bar"</tt> would
 842      * cause <tt>"foobar"</tt> to be appended to the string buffer. A dollar
 843      * sign (<tt>$</tt>) may be included as a literal in the replacement
 844      * string by preceding it with a backslash (<tt>\$</tt>).
 845      *
 846      * <p> Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in
 847      * the replacement string may cause the results to be different than if it
 848      * were being treated as a literal replacement string. Dollar signs may be
 849      * treated as references to captured subsequences as described above, and
 850      * backslashes are used to escape literal characters in the replacement
 851      * string.
 852      *
 853      * <p> This method is intended to be used in a loop together with the
 854      * {@link #appendTail appendTail} and {@link #find find} methods.  The
 855      * following code, for example, writes <tt>one dog two dogs in the
 856      * yard</tt> to the standard-output stream: </p>
 857      *
 858      * <blockquote><pre>
 859      * Pattern p = Pattern.compile("cat");
 860      * Matcher m = p.matcher("one cat two cats in the yard");
 861      * StringBuffer sb = new StringBuffer();
 862      * while (m.find()) {
 863      *     m.appendReplacement(sb, "dog");
 864      * }
 865      * m.appendTail(sb);
 866      * System.out.println(sb.toString());</pre></blockquote>
 867      *
 868      * @param  sb
 869      *         The target string buffer
 870      *
 871      * @param  replacement
 872      *         The replacement string
 873      *
 874      * @return  This matcher
 875      *
 876      * @throws  IllegalStateException
 877      *          If no match has yet been attempted,
 878      *          or if the previous match operation failed
 879      *
 880      * @throws  IllegalArgumentException
 881      *          If the replacement string refers to a named-capturing
 882      *          group that does not exist in the pattern
 883      *
 884      * @throws  IndexOutOfBoundsException
 885      *          If the replacement string refers to a capturing group
 886      *          that does not exist in the pattern
 887      */
 888     public Matcher appendReplacement(StringBuffer sb, String replacement) {
 889         // If no match, return error
 890         if (first < 0)
 891             throw new IllegalStateException("No match available");
 892         StringBuilder result = new StringBuilder();
 893         appendExpandedReplacement(replacement, result);
 894         // Append the intervening text
 895         sb.append(text, lastAppendPosition, first);
 896         // Append the match substitution
 897         sb.append(result);
 898         lastAppendPosition = last;
 899         modCount++;
 900         return this;
 901     }
 902 
 903     /**
 904      * Implements a non-terminal append-and-replace step.
 905      *
 906      * <p> This method performs the following actions: </p>
 907      *
 908      * <ol>
 909      *
 910      *   <li><p> It reads characters from the input sequence, starting at the
 911      *   append position, and appends them to the given string builder.  It
 912      *   stops after reading the last character preceding the previous match,
 913      *   that is, the character at index {@link
 914      *   #start()}&nbsp;<tt>-</tt>&nbsp;<tt>1</tt>.  </p></li>
 915      *
 916      *   <li><p> It appends the given replacement string to the string builder.
 917      *   </p></li>
 918      *
 919      *   <li><p> It sets the append position of this matcher to the index of
 920      *   the last character matched, plus one, that is, to {@link #end()}.
 921      *   </p></li>
 922      *
 923      * </ol>
 924      *
 925      * <p> The replacement string may contain references to subsequences
 926      * captured during the previous match: Each occurrence of
 927      * <tt>$</tt><i>g</i> will be replaced by the result of
 928      * evaluating {@link #group(int) group}<tt>(</tt><i>g</i><tt>)</tt>.
 929      * The first number after the <tt>$</tt> is always treated as part of
 930      * the group reference. Subsequent numbers are incorporated into g if
 931      * they would form a legal group reference. Only the numerals '0'
 932      * through '9' are considered as potential components of the group
 933      * reference. If the second group matched the string <tt>"foo"</tt>, for
 934      * example, then passing the replacement string <tt>"$2bar"</tt> would
 935      * cause <tt>"foobar"</tt> to be appended to the string builder. A dollar
 936      * sign (<tt>$</tt>) may be included as a literal in the replacement
 937      * string by preceding it with a backslash (<tt>\$</tt>).
 938      *
 939      * <p> Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in
 940      * the replacement string may cause the results to be different than if it
 941      * were being treated as a literal replacement string. Dollar signs may be
 942      * treated as references to captured subsequences as described above, and
 943      * backslashes are used to escape literal characters in the replacement
 944      * string.
 945      *
 946      * <p> This method is intended to be used in a loop together with the
 947      * {@link #appendTail appendTail} and {@link #find find} methods.  The
 948      * following code, for example, writes <tt>one dog two dogs in the
 949      * yard</tt> to the standard-output stream: </p>
 950      *
 951      * <blockquote><pre>
 952      * Pattern p = Pattern.compile("cat");
 953      * Matcher m = p.matcher("one cat two cats in the yard");
 954      * StringBuilder sb = new StringBuilder();
 955      * while (m.find()) {
 956      *     m.appendReplacement(sb, "dog");
 957      * }
 958      * m.appendTail(sb);
 959      * System.out.println(sb.toString());</pre></blockquote>
 960      *
 961      * @param  sb
 962      *         The target string builder
 963      * @param  replacement
 964      *         The replacement string
 965      * @return  This matcher
 966      *
 967      * @throws  IllegalStateException
 968      *          If no match has yet been attempted,
 969      *          or if the previous match operation failed
 970      * @throws  IllegalArgumentException
 971      *          If the replacement string refers to a named-capturing
 972      *          group that does not exist in the pattern
 973      * @throws  IndexOutOfBoundsException
 974      *          If the replacement string refers to a capturing group
 975      *          that does not exist in the pattern
 976      * @since 1.9
 977      */
 978     public Matcher appendReplacement(StringBuilder sb, String replacement) {
 979         // If no match, return error
 980         if (first < 0)
 981             throw new IllegalStateException("No match available");
 982         StringBuilder result = new StringBuilder();
 983         appendExpandedReplacement(replacement, result);
 984         // Append the intervening text
 985         sb.append(text, lastAppendPosition, first);
 986         // Append the match substitution
 987         sb.append(result);
 988         lastAppendPosition = last;
 989         modCount++;
 990         return this;
 991     }
 992 
 993     /**
 994      * Processes replacement string to replace group references with
 995      * groups.
 996      */
 997     private StringBuilder appendExpandedReplacement(
 998         String replacement, StringBuilder result) {
 999         int cursor = 0;
1000         while (cursor < replacement.length()) {
1001             char nextChar = replacement.charAt(cursor);
1002             if (nextChar == '\\') {
1003                 cursor++;
1004                 if (cursor == replacement.length())
1005                     throw new IllegalArgumentException(
1006                         "character to be escaped is missing");
1007                 nextChar = replacement.charAt(cursor);
1008                 result.append(nextChar);
1009                 cursor++;
1010             } else if (nextChar == '$') {
1011                 // Skip past $
1012                 cursor++;
1013                 // Throw IAE if this "$" is the last character in replacement
1014                 if (cursor == replacement.length())
1015                    throw new IllegalArgumentException(
1016                         "Illegal group reference: group index is missing");
1017                 nextChar = replacement.charAt(cursor);
1018                 int refNum = -1;
1019                 if (nextChar == '{') {
1020                     cursor++;
1021                     StringBuilder gsb = new StringBuilder();
1022                     while (cursor < replacement.length()) {
1023                         nextChar = replacement.charAt(cursor);
1024                         if (ASCII.isLower(nextChar) ||
1025                             ASCII.isUpper(nextChar) ||
1026                             ASCII.isDigit(nextChar)) {
1027                             gsb.append(nextChar);
1028                             cursor++;
1029                         } else {
1030                             break;
1031                         }
1032                     }
1033                     if (gsb.length() == 0)
1034                         throw new IllegalArgumentException(
1035                             "named capturing group has 0 length name");
1036                     if (nextChar != '}')
1037                         throw new IllegalArgumentException(
1038                             "named capturing group is missing trailing '}'");
1039                     String gname = gsb.toString();
1040                     if (ASCII.isDigit(gname.charAt(0)))
1041                         throw new IllegalArgumentException(
1042                             "capturing group name {" + gname +
1043                             "} starts with digit character");
1044                     if (!parentPattern.namedGroups().containsKey(gname))
1045                         throw new IllegalArgumentException(
1046                             "No group with name {" + gname + "}");
1047                     refNum = parentPattern.namedGroups().get(gname);
1048                     cursor++;
1049                 } else {
1050                     // The first number is always a group
1051                     refNum = nextChar - '0';
1052                     if ((refNum < 0) || (refNum > 9))
1053                         throw new IllegalArgumentException(
1054                             "Illegal group reference");
1055                     cursor++;
1056                     // Capture the largest legal group string
1057                     boolean done = false;
1058                     while (!done) {
1059                         if (cursor >= replacement.length()) {
1060                             break;
1061                         }
1062                         int nextDigit = replacement.charAt(cursor) - '0';
1063                         if ((nextDigit < 0) || (nextDigit > 9)) { // not a number
1064                             break;
1065                         }
1066                         int newRefNum = (refNum * 10) + nextDigit;
1067                         if (groupCount() < newRefNum) {
1068                             done = true;
1069                         } else {
1070                             refNum = newRefNum;
1071                             cursor++;
1072                         }
1073                     }
1074                 }
1075                 // Append group
1076                 if (start(refNum) != -1 && end(refNum) != -1)
1077                     result.append(text, start(refNum), end(refNum));
1078             } else {
1079                 result.append(nextChar);
1080                 cursor++;
1081             }
1082         }
1083         return result;
1084     }
1085 
1086     /**
1087      * Implements a terminal append-and-replace step.
1088      *
1089      * <p> This method reads characters from the input sequence, starting at
1090      * the append position, and appends them to the given string buffer.  It is
1091      * intended to be invoked after one or more invocations of the {@link
1092      * #appendReplacement appendReplacement} method in order to copy the
1093      * remainder of the input sequence.  </p>
1094      *
1095      * @param  sb
1096      *         The target string buffer
1097      *
1098      * @return  The target string buffer
1099      */
1100     public StringBuffer appendTail(StringBuffer sb) {
1101         sb.append(text, lastAppendPosition, getTextLength());
1102         return sb;
1103     }
1104 
1105     /**
1106      * Implements a terminal append-and-replace step.
1107      *
1108      * <p> This method reads characters from the input sequence, starting at
1109      * the append position, and appends them to the given string builder.  It is
1110      * intended to be invoked after one or more invocations of the {@link
1111      * #appendReplacement appendReplacement} method in order to copy the
1112      * remainder of the input sequence.  </p>
1113      *
1114      * @param  sb
1115      *         The target string builder
1116      *
1117      * @return  The target string builder
1118      *
1119      * @since 1.9
1120      */
1121     public StringBuilder appendTail(StringBuilder sb) {
1122         sb.append(text, lastAppendPosition, getTextLength());
1123         return sb;
1124     }
1125 
1126     /**
1127      * Replaces every subsequence of the input sequence that matches the
1128      * pattern with the given replacement string.
1129      *
1130      * <p> This method first resets this matcher.  It then scans the input
1131      * sequence looking for matches of the pattern.  Characters that are not
1132      * part of any match are appended directly to the result string; each match
1133      * is replaced in the result by the replacement string.  The replacement
1134      * string may contain references to captured subsequences as in the {@link
1135      * #appendReplacement appendReplacement} method.
1136      *
1137      * <p> Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in
1138      * the replacement string may cause the results to be different than if it
1139      * were being treated as a literal replacement string. Dollar signs may be
1140      * treated as references to captured subsequences as described above, and
1141      * backslashes are used to escape literal characters in the replacement
1142      * string.
1143      *
1144      * <p> Given the regular expression <tt>a*b</tt>, the input
1145      * <tt>"aabfooaabfooabfoob"</tt>, and the replacement string
1146      * <tt>"-"</tt>, an invocation of this method on a matcher for that
1147      * expression would yield the string <tt>"-foo-foo-foo-"</tt>.
1148      *
1149      * <p> Invoking this method changes this matcher's state.  If the matcher
1150      * is to be used in further matching operations then it should first be
1151      * reset.  </p>
1152      *
1153      * @param  replacement
1154      *         The replacement string
1155      *
1156      * @return  The string constructed by replacing each matching subsequence
1157      *          by the replacement string, substituting captured subsequences
1158      *          as needed
1159      */
1160     public String replaceAll(String replacement) {
1161         reset();
1162         boolean result = find();
1163         if (result) {
1164             StringBuilder sb = new StringBuilder();
1165             do {
1166                 appendReplacement(sb, replacement);
1167                 result = find();
1168             } while (result);
1169             appendTail(sb);
1170             return sb.toString();
1171         }
1172         return text.toString();
1173     }
1174 
1175     /**
1176      * Replaces every subsequence of the input sequence that matches the
1177      * pattern with the result of applying the given replacer function to the
1178      * match result of this matcher corresponding to that subsequence.
1179      * Exceptions thrown by the function are relayed to the caller.
1180      *
1181      * <p> This method first resets this matcher.  It then scans the input
1182      * sequence looking for matches of the pattern.  Characters that are not
1183      * part of any match are appended directly to the result string; each match
1184      * is replaced in the result by the applying the replacer function that
1185      * returns a replacement string.  Each replacement string may contain
1186      * references to captured subsequences as in the {@link #appendReplacement
1187      * appendReplacement} method.
1188      *
1189      * <p> Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in
1190      * a replacement string may cause the results to be different than if it
1191      * were being treated as a literal replacement string. Dollar signs may be
1192      * treated as references to captured subsequences as described above, and
1193      * backslashes are used to escape literal characters in the replacement
1194      * string.
1195      *
1196      * <p> Given the regular expression <tt>dog</tt>, the input
1197      * <tt>"zzzdogzzzdogzzz"</tt>, and the function
1198      * {@code mr -> mr.group().toUpperCase()}, an invocation of this method on
1199      * a matcher for that expression would yield the string
1200      * <tt>"zzzDOGzzzDOGzzz"</tt>.
1201      *
1202      * <p> Invoking this method changes this matcher's state.  If the matcher
1203      * is to be used in further matching operations then it should first be
1204      * reset.  </p>
1205      *
1206      * <p> The replacer function should not modify this matcher's state during
1207      * replacement.  This method will, on a best-effort basis, throw a
1208      * {@link java.util.ConcurrentModificationException} if such modification is
1209      * detected.
1210      *
1211      * <p> The state of each match result passed to the replacer function is
1212      * guaranteed to be constant only for the duration of the replacer function
1213      * call and only if the replacer function does not modify this matcher's
1214      * state.
1215      *
1216      * @implNote
1217      * This implementation applies the replacer function to this matcher, which
1218      * is an instance of {@code MatchResult}.
1219      *
1220      * @param  replacer
1221      *         The function to be applied to the match result of this matcher
1222      *         that returns a replacement string.
1223      * @return  The string constructed by replacing each matching subsequence
1224      *          with the result of applying the replacer function to that
1225      *          matched subsequence, substituting captured subsequences as
1226      *          needed.
1227      * @throws NullPointerException if the replacer function is null
1228      * @throws ConcurrentModificationException if it is detected, on a
1229      *         best-effort basis, that the replacer function modified this
1230      *         matcher's state
1231      * @since 1.9
1232      */
1233     public String replaceAll(Function<MatchResult, String> replacer) {
1234         Objects.requireNonNull(replacer);
1235         reset();
1236         boolean result = find();
1237         if (result) {
1238             StringBuilder sb = new StringBuilder();
1239             do {
1240                 int ec = modCount;
1241                 String replacement =  replacer.apply(this);
1242                 if (ec != modCount)
1243                     throw new ConcurrentModificationException();
1244                 appendReplacement(sb, replacement);
1245                 result = find();
1246             } while (result);
1247             appendTail(sb);
1248             return sb.toString();
1249         }
1250         return text.toString();
1251     }
1252 
1253     /**
1254      * Returns a stream of match results for each subsequence of the input
1255      * sequence that matches the pattern.  The match results occur in the
1256      * same order as the matching subsequences in the input sequence.
1257      *
1258      * <p> Each match result is produced as if by {@link #toMatchResult()}.
1259      *
1260      * <p> This method does not reset this matcher.  Matching starts on
1261      * initiation of the terminal stream operation either at the beginning of
1262      * this matcher's region, or, if the matcher has not since been reset, at
1263      * the first character not matched by a previous match.
1264      *
1265      * <p> If the matcher is to be used for further matching operations after
1266      * the terminal stream operation completes then it should be first reset.
1267      *
1268      * <p> This matcher's state should not be modified during execution of the
1269      * returned stream's pipeline.  The returned stream's source
1270      * {@code Spliterator} is <em>fail-fast</em> and will, on a best-effort
1271      * basis, throw a {@link java.util.ConcurrentModificationException} if such
1272      * modification is detected.
1273      *
1274      * @return a sequential stream of match results.
1275      * @since 1.9
1276      */
1277     public Stream<MatchResult> results() {
1278         class MatchResultIterator implements Iterator<MatchResult> {
1279             // -ve for call to find, 0 for not found, 1 for found
1280             int state = -1;
1281             // State for concurrent modification checking
1282             // -1 for uninitialized
1283             int expectedCount = -1;
1284             // The input sequence as a string, set once only after first find
1285             // Avoids repeated conversion from CharSequence for each match
1286             String textAsString;
1287 
1288             @Override
1289             public MatchResult next() {
1290                 if (expectedCount >= 0 && expectedCount != modCount)
1291                     throw new ConcurrentModificationException();
1292 
1293                 if (!hasNext())
1294                     throw new NoSuchElementException();
1295 
1296                 state = -1;
1297                 return toMatchResult(textAsString);
1298             }
1299 
1300             @Override
1301             public boolean hasNext() {
1302                 if (state >= 0)
1303                     return state == 1;
1304 
1305                 // Defer throwing ConcurrentModificationException to when next
1306                 // or forEachRemaining is called.  The is consistent with other
1307                 // fail-fast implementations.
1308                 if (expectedCount >= 0 && expectedCount != modCount)
1309                     return true;
1310 
1311                 boolean found = find();
1312                 // Capture the input sequence as a string on first find
1313                 if (found && state < 0)
1314                     textAsString = text.toString();
1315                 state = found ? 1 : 0;
1316                 expectedCount = modCount;
1317                 return found;
1318             }
1319 
1320             @Override
1321             public void forEachRemaining(Consumer<? super MatchResult> action) {
1322                 if (expectedCount >= 0 && expectedCount != modCount)
1323                     throw new ConcurrentModificationException();
1324 
1325                 int s = state;
1326                 if (s == 0)
1327                     return;
1328 
1329                 // Set state to report no more elements on further operations
1330                 state = 0;
1331                 expectedCount = -1;
1332 
1333                 // Perform a first find if required
1334                 if (s < 0 && !find())
1335                     return;
1336 
1337                 // Capture the input sequence as a string on first find
1338                 textAsString = text.toString();
1339 
1340                 do {
1341                     int ec = modCount;
1342                     action.accept(toMatchResult(textAsString));
1343                     if (ec != modCount)
1344                         throw new ConcurrentModificationException();
1345                 } while (find());
1346             }
1347         }
1348         return StreamSupport.stream(Spliterators.spliteratorUnknownSize(
1349                 new MatchResultIterator(), Spliterator.ORDERED | Spliterator.NONNULL), false);
1350     }
1351 
1352     /**
1353      * Replaces the first subsequence of the input sequence that matches the
1354      * pattern with the given replacement string.
1355      *
1356      * <p> This method first resets this matcher.  It then scans the input
1357      * sequence looking for a match of the pattern.  Characters that are not
1358      * part of the match are appended directly to the result string; the match
1359      * is replaced in the result by the replacement string.  The replacement
1360      * string may contain references to captured subsequences as in the {@link
1361      * #appendReplacement appendReplacement} method.
1362      *
1363      * <p>Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in
1364      * the replacement string may cause the results to be different than if it
1365      * were being treated as a literal replacement string. Dollar signs may be
1366      * treated as references to captured subsequences as described above, and
1367      * backslashes are used to escape literal characters in the replacement
1368      * string.
1369      *
1370      * <p> Given the regular expression <tt>dog</tt>, the input
1371      * <tt>"zzzdogzzzdogzzz"</tt>, and the replacement string
1372      * <tt>"cat"</tt>, an invocation of this method on a matcher for that
1373      * expression would yield the string <tt>"zzzcatzzzdogzzz"</tt>.  </p>
1374      *
1375      * <p> Invoking this method changes this matcher's state.  If the matcher
1376      * is to be used in further matching operations then it should first be
1377      * reset.  </p>
1378      *
1379      * @param  replacement
1380      *         The replacement string
1381      * @return  The string constructed by replacing the first matching
1382      *          subsequence by the replacement string, substituting captured
1383      *          subsequences as needed
1384      */
1385     public String replaceFirst(String replacement) {
1386         if (replacement == null)
1387             throw new NullPointerException("replacement");
1388         reset();
1389         if (!find())
1390             return text.toString();
1391         StringBuilder sb = new StringBuilder();
1392         appendReplacement(sb, replacement);
1393         appendTail(sb);
1394         return sb.toString();
1395     }
1396 
1397     /**
1398      * Replaces the first subsequence of the input sequence that matches the
1399      * pattern with the result of applying the given replacer function to the
1400      * match result of this matcher corresponding to that subsequence.
1401      * Exceptions thrown by the replace function are relayed to the caller.
1402      *
1403      * <p> This method first resets this matcher.  It then scans the input
1404      * sequence looking for a match of the pattern.  Characters that are not
1405      * part of the match are appended directly to the result string; the match
1406      * is replaced in the result by the applying the replacer function that
1407      * returns a replacement string.  The replacement string may contain
1408      * references to captured subsequences as in the {@link #appendReplacement
1409      * appendReplacement} method.
1410      *
1411      * <p>Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in
1412      * the replacement string may cause the results to be different than if it
1413      * were being treated as a literal replacement string. Dollar signs may be
1414      * treated as references to captured subsequences as described above, and
1415      * backslashes are used to escape literal characters in the replacement
1416      * string.
1417      *
1418      * <p> Given the regular expression <tt>dog</tt>, the input
1419      * <tt>"zzzdogzzzdogzzz"</tt>, and the function
1420      * {@code mr -> mr.group().toUpperCase()}, an invocation of this method on
1421      * a matcher for that expression would yield the string
1422      * <tt>"zzzDOGzzzdogzzz"</tt>.
1423      *
1424      * <p> Invoking this method changes this matcher's state.  If the matcher
1425      * is to be used in further matching operations then it should first be
1426      * reset.
1427      *
1428      * <p> The replacer function should not modify this matcher's state during
1429      * replacement.  This method will, on a best-effort basis, throw a
1430      * {@link java.util.ConcurrentModificationException} if such modification is
1431      * detected.
1432      *
1433      * <p> The state of the match result passed to the replacer function is
1434      * guaranteed to be constant only for the duration of the replacer function
1435      * call and only if the replacer function does not modify this matcher's
1436      * state.
1437      *
1438      * @implNote
1439      * This implementation applies the replacer function to this matcher, which
1440      * is an instance of {@code MatchResult}.
1441      *
1442      * @param  replacer
1443      *         The function to be applied to the match result of this matcher
1444      *         that returns a replacement string.
1445      * @return  The string constructed by replacing the first matching
1446      *          subsequence with the result of applying the replacer function to
1447      *          the matched subsequence, substituting captured subsequences as
1448      *          needed.
1449      * @throws NullPointerException if the replacer function is null
1450      * @throws ConcurrentModificationException if it is detected, on a
1451      *         best-effort basis, that the replacer function modified this
1452      *         matcher's state
1453      * @since 1.9
1454      */
1455     public String replaceFirst(Function<MatchResult, String> replacer) {
1456         Objects.requireNonNull(replacer);
1457         reset();
1458         if (!find())
1459             return text.toString();
1460         StringBuilder sb = new StringBuilder();
1461         int ec = modCount;
1462         String replacement = replacer.apply(this);
1463         if (ec != modCount)
1464             throw new ConcurrentModificationException();
1465         appendReplacement(sb, replacement);
1466         appendTail(sb);
1467         return sb.toString();
1468     }
1469 
1470     /**
1471      * Sets the limits of this matcher's region. The region is the part of the
1472      * input sequence that will be searched to find a match. Invoking this
1473      * method resets the matcher, and then sets the region to start at the
1474      * index specified by the <code>start</code> parameter and end at the
1475      * index specified by the <code>end</code> parameter.
1476      *
1477      * <p>Depending on the transparency and anchoring being used (see
1478      * {@link #useTransparentBounds useTransparentBounds} and
1479      * {@link #useAnchoringBounds useAnchoringBounds}), certain constructs such
1480      * as anchors may behave differently at or around the boundaries of the
1481      * region.
1482      *
1483      * @param  start
1484      *         The index to start searching at (inclusive)
1485      * @param  end
1486      *         The index to end searching at (exclusive)
1487      * @throws  IndexOutOfBoundsException
1488      *          If start or end is less than zero, if
1489      *          start is greater than the length of the input sequence, if
1490      *          end is greater than the length of the input sequence, or if
1491      *          start is greater than end.
1492      * @return  this matcher
1493      * @since 1.5
1494      */
1495     public Matcher region(int start, int end) {
1496         if ((start < 0) || (start > getTextLength()))
1497             throw new IndexOutOfBoundsException("start");
1498         if ((end < 0) || (end > getTextLength()))
1499             throw new IndexOutOfBoundsException("end");
1500         if (start > end)
1501             throw new IndexOutOfBoundsException("start > end");
1502         reset();
1503         from = start;
1504         to = end;
1505         return this;
1506     }
1507 
1508     /**
1509      * Reports the start index of this matcher's region. The
1510      * searches this matcher conducts are limited to finding matches
1511      * within {@link #regionStart regionStart} (inclusive) and
1512      * {@link #regionEnd regionEnd} (exclusive).
1513      *
1514      * @return  The starting point of this matcher's region
1515      * @since 1.5
1516      */
1517     public int regionStart() {
1518         return from;
1519     }
1520 
1521     /**
1522      * Reports the end index (exclusive) of this matcher's region.
1523      * The searches this matcher conducts are limited to finding matches
1524      * within {@link #regionStart regionStart} (inclusive) and
1525      * {@link #regionEnd regionEnd} (exclusive).
1526      *
1527      * @return  the ending point of this matcher's region
1528      * @since 1.5
1529      */
1530     public int regionEnd() {
1531         return to;
1532     }
1533 
1534     /**
1535      * Queries the transparency of region bounds for this matcher.
1536      *
1537      * <p> This method returns <tt>true</tt> if this matcher uses
1538      * <i>transparent</i> bounds, <tt>false</tt> if it uses <i>opaque</i>
1539      * bounds.
1540      *
1541      * <p> See {@link #useTransparentBounds useTransparentBounds} for a
1542      * description of transparent and opaque bounds.
1543      *
1544      * <p> By default, a matcher uses opaque region boundaries.
1545      *
1546      * @return <tt>true</tt> iff this matcher is using transparent bounds,
1547      *         <tt>false</tt> otherwise.
1548      * @see java.util.regex.Matcher#useTransparentBounds(boolean)
1549      * @since 1.5
1550      */
1551     public boolean hasTransparentBounds() {
1552         return transparentBounds;
1553     }
1554 
1555     /**
1556      * Sets the transparency of region bounds for this matcher.
1557      *
1558      * <p> Invoking this method with an argument of <tt>true</tt> will set this
1559      * matcher to use <i>transparent</i> bounds. If the boolean
1560      * argument is <tt>false</tt>, then <i>opaque</i> bounds will be used.
1561      *
1562      * <p> Using transparent bounds, the boundaries of this
1563      * matcher's region are transparent to lookahead, lookbehind,
1564      * and boundary matching constructs. Those constructs can see beyond the
1565      * boundaries of the region to see if a match is appropriate.
1566      *
1567      * <p> Using opaque bounds, the boundaries of this matcher's
1568      * region are opaque to lookahead, lookbehind, and boundary matching
1569      * constructs that may try to see beyond them. Those constructs cannot
1570      * look past the boundaries so they will fail to match anything outside
1571      * of the region.
1572      *
1573      * <p> By default, a matcher uses opaque bounds.
1574      *
1575      * @param  b a boolean indicating whether to use opaque or transparent
1576      *         regions
1577      * @return this matcher
1578      * @see java.util.regex.Matcher#hasTransparentBounds
1579      * @since 1.5
1580      */
1581     public Matcher useTransparentBounds(boolean b) {
1582         transparentBounds = b;
1583         return this;
1584     }
1585 
1586     /**
1587      * Queries the anchoring of region bounds for this matcher.
1588      *
1589      * <p> This method returns <tt>true</tt> if this matcher uses
1590      * <i>anchoring</i> bounds, <tt>false</tt> otherwise.
1591      *
1592      * <p> See {@link #useAnchoringBounds useAnchoringBounds} for a
1593      * description of anchoring bounds.
1594      *
1595      * <p> By default, a matcher uses anchoring region boundaries.
1596      *
1597      * @return <tt>true</tt> iff this matcher is using anchoring bounds,
1598      *         <tt>false</tt> otherwise.
1599      * @see java.util.regex.Matcher#useAnchoringBounds(boolean)
1600      * @since 1.5
1601      */
1602     public boolean hasAnchoringBounds() {
1603         return anchoringBounds;
1604     }
1605 
1606     /**
1607      * Sets the anchoring of region bounds for this matcher.
1608      *
1609      * <p> Invoking this method with an argument of <tt>true</tt> will set this
1610      * matcher to use <i>anchoring</i> bounds. If the boolean
1611      * argument is <tt>false</tt>, then <i>non-anchoring</i> bounds will be
1612      * used.
1613      *
1614      * <p> Using anchoring bounds, the boundaries of this
1615      * matcher's region match anchors such as ^ and $.
1616      *
1617      * <p> Without anchoring bounds, the boundaries of this
1618      * matcher's region will not match anchors such as ^ and $.
1619      *
1620      * <p> By default, a matcher uses anchoring region boundaries.
1621      *
1622      * @param  b a boolean indicating whether or not to use anchoring bounds.
1623      * @return this matcher
1624      * @see java.util.regex.Matcher#hasAnchoringBounds
1625      * @since 1.5
1626      */
1627     public Matcher useAnchoringBounds(boolean b) {
1628         anchoringBounds = b;
1629         return this;
1630     }
1631 
1632     /**
1633      * <p>Returns the string representation of this matcher. The
1634      * string representation of a <code>Matcher</code> contains information
1635      * that may be useful for debugging. The exact format is unspecified.
1636      *
1637      * @return  The string representation of this matcher
1638      * @since 1.5
1639      */
1640     public String toString() {
1641         StringBuilder sb = new StringBuilder();
1642         sb.append("java.util.regex.Matcher")
1643                 .append("[pattern=").append(pattern())
1644                 .append(" region=")
1645                 .append(regionStart()).append(',').append(regionEnd())
1646                 .append(" lastmatch=");
1647         if ((first >= 0) && (group() != null)) {
1648             sb.append(group());
1649         }
1650         sb.append(']');
1651         return sb.toString();
1652     }
1653 
1654     /**
1655      * <p>Returns true if the end of input was hit by the search engine in
1656      * the last match operation performed by this matcher.
1657      *
1658      * <p>When this method returns true, then it is possible that more input
1659      * would have changed the result of the last search.
1660      *
1661      * @return  true iff the end of input was hit in the last match; false
1662      *          otherwise
1663      * @since 1.5
1664      */
1665     public boolean hitEnd() {
1666         return hitEnd;
1667     }
1668 
1669     /**
1670      * <p>Returns true if more input could change a positive match into a
1671      * negative one.
1672      *
1673      * <p>If this method returns true, and a match was found, then more
1674      * input could cause the match to be lost. If this method returns false
1675      * and a match was found, then more input might change the match but the
1676      * match won't be lost. If a match was not found, then requireEnd has no
1677      * meaning.
1678      *
1679      * @return  true iff more input could change a positive match into a
1680      *          negative one.
1681      * @since 1.5
1682      */
1683     public boolean requireEnd() {
1684         return requireEnd;
1685     }
1686 
1687     /**
1688      * Initiates a search to find a Pattern within the given bounds.
1689      * The groups are filled with default values and the match of the root
1690      * of the state machine is called. The state machine will hold the state
1691      * of the match as it proceeds in this matcher.
1692      *
1693      * Matcher.from is not set here, because it is the "hard" boundary
1694      * of the start of the search which anchors will set to. The from param
1695      * is the "soft" boundary of the start of the search, meaning that the
1696      * regex tries to match at that index but ^ won't match there. Subsequent
1697      * calls to the search methods start at a new "soft" boundary which is
1698      * the end of the previous match.
1699      */
1700     boolean search(int from) {
1701         this.hitEnd = false;
1702         this.requireEnd = false;
1703         from        = from < 0 ? 0 : from;
1704         this.first  = from;
1705         this.oldLast = oldLast < 0 ? from : oldLast;
1706         for (int i = 0; i < groups.length; i++)
1707             groups[i] = -1;
1708         acceptMode = NOANCHOR;
1709         boolean result = parentPattern.root.match(this, from, text);
1710         if (!result)
1711             this.first = -1;
1712         this.oldLast = this.last;
1713         this.modCount++;
1714         return result;
1715     }
1716 
1717     /**
1718      * Initiates a search for an anchored match to a Pattern within the given
1719      * bounds. The groups are filled with default values and the match of the
1720      * root of the state machine is called. The state machine will hold the
1721      * state of the match as it proceeds in this matcher.
1722      */
1723     boolean match(int from, int anchor) {
1724         this.hitEnd = false;
1725         this.requireEnd = false;
1726         from        = from < 0 ? 0 : from;
1727         this.first  = from;
1728         this.oldLast = oldLast < 0 ? from : oldLast;
1729         for (int i = 0; i < groups.length; i++)
1730             groups[i] = -1;
1731         acceptMode = anchor;
1732         boolean result = parentPattern.matchRoot.match(this, from, text);
1733         if (!result)
1734             this.first = -1;
1735         this.oldLast = this.last;
1736         this.modCount++;
1737         return result;
1738     }
1739 
1740     /**
1741      * Returns the end index of the text.
1742      *
1743      * @return the index after the last character in the text
1744      */
1745     int getTextLength() {
1746         return text.length();
1747     }
1748 
1749     /**
1750      * Generates a String from this Matcher's input in the specified range.
1751      *
1752      * @param  beginIndex   the beginning index, inclusive
1753      * @param  endIndex     the ending index, exclusive
1754      * @return A String generated from this Matcher's input
1755      */
1756     CharSequence getSubSequence(int beginIndex, int endIndex) {
1757         return text.subSequence(beginIndex, endIndex);
1758     }
1759 
1760     /**
1761      * Returns this Matcher's input character at index i.
1762      *
1763      * @return A char from the specified index
1764      */
1765     char charAt(int i) {
1766         return text.charAt(i);
1767     }
1768 
1769     /**
1770      * Returns the group index of the matched capturing group.
1771      *
1772      * @return the index of the named-capturing group
1773      */
1774     int getMatchedGroupIndex(String name) {
1775         Objects.requireNonNull(name, "Group name");
1776         if (first < 0)
1777             throw new IllegalStateException("No match found");
1778         if (!parentPattern.namedGroups().containsKey(name))
1779             throw new IllegalArgumentException("No group with name <" + name + ">");
1780         return parentPattern.namedGroups().get(name);
1781     }
1782 }