src/java.base/share/classes/java/util/regex/Matcher.java

Print this page




   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.util.regex;
  27 
  28 import java.util.ConcurrentModificationException;

  29 import java.util.Iterator;
  30 import java.util.NoSuchElementException;
  31 import java.util.Objects;
  32 import java.util.Spliterator;
  33 import java.util.Spliterators;
  34 import java.util.function.Consumer;
  35 import java.util.function.Function;
  36 import java.util.stream.Stream;
  37 import java.util.stream.StreamSupport;
  38 
  39 /**
  40  * An engine that performs match operations on a {@linkplain java.lang.CharSequence
  41  * character sequence} by interpreting a {@link Pattern}.
  42  *
  43  * <p> A matcher is created from a pattern by invoking the pattern's {@link
  44  * Pattern#matcher matcher} method.  Once created, a matcher can be used to
  45  * perform three different kinds of match operations:
  46  *
  47  * <ul>
  48  *


 161     int first = -1, last = 0;
 162 
 163     /**
 164      * The end index of what matched in the last match operation.
 165      */
 166     int oldLast = -1;
 167 
 168     /**
 169      * The index of the last position appended in a substitution.
 170      */
 171     int lastAppendPosition = 0;
 172 
 173     /**
 174      * Storage used by nodes to tell what repetition they are on in
 175      * a pattern, and where groups begin. The nodes themselves are stateless,
 176      * so they rely on this field to hold state during a match.
 177      */
 178     int[] locals;
 179 
 180     /**








 181      * Boolean indicating whether or not more input could change
 182      * the results of the last match.
 183      *
 184      * If hitEnd is true, and a match was found, then more input
 185      * might cause a different match to be found.
 186      * If hitEnd is true and a match was not found, then more
 187      * input could cause a match to be found.
 188      * If hitEnd is false and a match was found, then more input
 189      * will not change the match.
 190      * If hitEnd is false and a match was not found, then more
 191      * input will not cause a match to be found.
 192      */
 193     boolean hitEnd;
 194 
 195     /**
 196      * Boolean indicating whether or not more input could change
 197      * a positive match into a negative one.
 198      *
 199      * If requireEnd is true, and a match was found, then more
 200      * input could cause the match to be lost.


 222      */
 223     int modCount;
 224 
 225     /**
 226      * No default constructor.
 227      */
 228     Matcher() {
 229     }
 230 
 231     /**
 232      * All matchers have the state used by Pattern during a match.
 233      */
 234     Matcher(Pattern parent, CharSequence text) {
 235         this.parentPattern = parent;
 236         this.text = text;
 237 
 238         // Allocate state storage
 239         int parentGroupCount = Math.max(parent.capturingGroupCount, 10);
 240         groups = new int[parentGroupCount * 2];
 241         locals = new int[parent.localCount];

 242 
 243         // Put fields into initial states
 244         reset();
 245     }
 246 
 247     /**
 248      * Returns the pattern that is interpreted by this matcher.
 249      *
 250      * @return  The pattern for which this matcher was created
 251      */
 252     public Pattern pattern() {
 253         return parentPattern;
 254     }
 255 
 256     /**
 257      * Returns the match state of this matcher as a {@link MatchResult}.
 258      * The result is unaffected by subsequent operations performed upon this
 259      * matcher.
 260      *
 261      * @return  a {@code MatchResult} with the state of this matcher


 358       * @param  newPattern
 359       *         The new pattern used by this matcher
 360       * @return  This matcher
 361       * @throws  IllegalArgumentException
 362       *          If newPattern is {@code null}
 363       * @since 1.5
 364       */
 365     public Matcher usePattern(Pattern newPattern) {
 366         if (newPattern == null)
 367             throw new IllegalArgumentException("Pattern cannot be null");
 368         parentPattern = newPattern;
 369 
 370         // Reallocate state storage
 371         int parentGroupCount = Math.max(newPattern.capturingGroupCount, 10);
 372         groups = new int[parentGroupCount * 2];
 373         locals = new int[newPattern.localCount];
 374         for (int i = 0; i < groups.length; i++)
 375             groups[i] = -1;
 376         for (int i = 0; i < locals.length; i++)
 377             locals[i] = -1;

 378         modCount++;
 379         return this;
 380     }
 381 
 382     /**
 383      * Resets this matcher.
 384      *
 385      * <p> Resetting a matcher discards all of its explicit state information
 386      * and sets its append position to zero. The matcher's region is set to the
 387      * default region, which is its entire character sequence. The anchoring
 388      * and transparency of this matcher's region boundaries are unaffected.
 389      *
 390      * @return  This matcher
 391      */
 392     public Matcher reset() {
 393         first = -1;
 394         last = 0;
 395         oldLast = -1;
 396         for(int i=0; i<groups.length; i++)
 397             groups[i] = -1;




   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.util.regex;
  27 
  28 import java.util.ConcurrentModificationException;
  29 import java.util.HashSet;
  30 import java.util.Iterator;
  31 import java.util.NoSuchElementException;
  32 import java.util.Objects;
  33 import java.util.Spliterator;
  34 import java.util.Spliterators;
  35 import java.util.function.Consumer;
  36 import java.util.function.Function;
  37 import java.util.stream.Stream;
  38 import java.util.stream.StreamSupport;
  39 
  40 /**
  41  * An engine that performs match operations on a {@linkplain java.lang.CharSequence
  42  * character sequence} by interpreting a {@link Pattern}.
  43  *
  44  * <p> A matcher is created from a pattern by invoking the pattern's {@link
  45  * Pattern#matcher matcher} method.  Once created, a matcher can be used to
  46  * perform three different kinds of match operations:
  47  *
  48  * <ul>
  49  *


 162     int first = -1, last = 0;
 163 
 164     /**
 165      * The end index of what matched in the last match operation.
 166      */
 167     int oldLast = -1;
 168 
 169     /**
 170      * The index of the last position appended in a substitution.
 171      */
 172     int lastAppendPosition = 0;
 173 
 174     /**
 175      * Storage used by nodes to tell what repetition they are on in
 176      * a pattern, and where groups begin. The nodes themselves are stateless,
 177      * so they rely on this field to hold state during a match.
 178      */
 179     int[] locals;
 180 
 181     /**
 182      * Storage used by top greedy Loop node to store a specific hash set to
 183      * keep the beginning index of the failed repetition match. The nodes
 184      * themselves are stateless, so they rely on this field to hold state
 185      * during a match.
 186      */
 187     HashSetInt[] localsPos;
 188 
 189     /**
 190      * Boolean indicating whether or not more input could change
 191      * the results of the last match.
 192      *
 193      * If hitEnd is true, and a match was found, then more input
 194      * might cause a different match to be found.
 195      * If hitEnd is true and a match was not found, then more
 196      * input could cause a match to be found.
 197      * If hitEnd is false and a match was found, then more input
 198      * will not change the match.
 199      * If hitEnd is false and a match was not found, then more
 200      * input will not cause a match to be found.
 201      */
 202     boolean hitEnd;
 203 
 204     /**
 205      * Boolean indicating whether or not more input could change
 206      * a positive match into a negative one.
 207      *
 208      * If requireEnd is true, and a match was found, then more
 209      * input could cause the match to be lost.


 231      */
 232     int modCount;
 233 
 234     /**
 235      * No default constructor.
 236      */
 237     Matcher() {
 238     }
 239 
 240     /**
 241      * All matchers have the state used by Pattern during a match.
 242      */
 243     Matcher(Pattern parent, CharSequence text) {
 244         this.parentPattern = parent;
 245         this.text = text;
 246 
 247         // Allocate state storage
 248         int parentGroupCount = Math.max(parent.capturingGroupCount, 10);
 249         groups = new int[parentGroupCount * 2];
 250         locals = new int[parent.localCount];
 251         localsPos = new HashSetInt[parent.localTGRGroupCount];
 252 
 253         // Put fields into initial states
 254         reset();
 255     }
 256 
 257     /**
 258      * Returns the pattern that is interpreted by this matcher.
 259      *
 260      * @return  The pattern for which this matcher was created
 261      */
 262     public Pattern pattern() {
 263         return parentPattern;
 264     }
 265 
 266     /**
 267      * Returns the match state of this matcher as a {@link MatchResult}.
 268      * The result is unaffected by subsequent operations performed upon this
 269      * matcher.
 270      *
 271      * @return  a {@code MatchResult} with the state of this matcher


 368       * @param  newPattern
 369       *         The new pattern used by this matcher
 370       * @return  This matcher
 371       * @throws  IllegalArgumentException
 372       *          If newPattern is {@code null}
 373       * @since 1.5
 374       */
 375     public Matcher usePattern(Pattern newPattern) {
 376         if (newPattern == null)
 377             throw new IllegalArgumentException("Pattern cannot be null");
 378         parentPattern = newPattern;
 379 
 380         // Reallocate state storage
 381         int parentGroupCount = Math.max(newPattern.capturingGroupCount, 10);
 382         groups = new int[parentGroupCount * 2];
 383         locals = new int[newPattern.localCount];
 384         for (int i = 0; i < groups.length; i++)
 385             groups[i] = -1;
 386         for (int i = 0; i < locals.length; i++)
 387             locals[i] = -1;
 388         localsPos = new HashSetInt[parentPattern.localTGRGroupCount];
 389         modCount++;
 390         return this;
 391     }
 392 
 393     /**
 394      * Resets this matcher.
 395      *
 396      * <p> Resetting a matcher discards all of its explicit state information
 397      * and sets its append position to zero. The matcher's region is set to the
 398      * default region, which is its entire character sequence. The anchoring
 399      * and transparency of this matcher's region boundaries are unaffected.
 400      *
 401      * @return  This matcher
 402      */
 403     public Matcher reset() {
 404         first = -1;
 405         last = 0;
 406         oldLast = -1;
 407         for(int i=0; i<groups.length; i++)
 408             groups[i] = -1;