241 locals = new int[parent.localCount];
242
243 // Put fields into initial states
244 reset();
245 }
246
247 /**
248 * Returns the pattern that is interpreted by this matcher.
249 *
250 * @return The pattern for which this matcher was created
251 */
252 public Pattern pattern() {
253 return parentPattern;
254 }
255
256 /**
257 * Returns the match state of this matcher as a {@link MatchResult}.
258 * The result is unaffected by subsequent operations performed upon this
259 * matcher.
260 *
261 * @return a <code>MatchResult</code> with the state of this matcher
262 * @since 1.5
263 */
264 public MatchResult toMatchResult() {
265 return toMatchResult(text.toString());
266 }
267
268 private MatchResult toMatchResult(String text) {
269 return new ImmutableMatchResult(this.first,
270 this.last,
271 groupCount(),
272 this.groups.clone(),
273 text);
274 }
275
276 private static class ImmutableMatchResult implements MatchResult {
277 private final int first;
278 private final int last;
279 private final int[] groups;
280 private final int groupCount;
281 private final String text;
330 }
331
332 @Override
333 public String group(int group) {
334 checkMatch();
335 if (group < 0 || group > groupCount)
336 throw new IndexOutOfBoundsException("No group " + group);
337 if ((groups[group*2] == -1) || (groups[group*2+1] == -1))
338 return null;
339 return text.subSequence(groups[group * 2], groups[group * 2 + 1]).toString();
340 }
341
342 private void checkMatch() {
343 if (first < 0)
344 throw new IllegalStateException("No match found");
345
346 }
347 }
348
349 /**
350 * Changes the <tt>Pattern</tt> that this <tt>Matcher</tt> uses to
351 * find matches with.
352 *
353 * <p> This method causes this matcher to lose information
354 * about the groups of the last match that occurred. The
355 * matcher's position in the input is maintained and its
356 * last append position is unaffected.</p>
357 *
358 * @param newPattern
359 * The new pattern used by this matcher
360 * @return This matcher
361 * @throws IllegalArgumentException
362 * If newPattern is <tt>null</tt>
363 * @since 1.5
364 */
365 public Matcher usePattern(Pattern newPattern) {
366 if (newPattern == null)
367 throw new IllegalArgumentException("Pattern cannot be null");
368 parentPattern = newPattern;
369
370 // Reallocate state storage
371 int parentGroupCount = Math.max(newPattern.capturingGroupCount, 10);
372 groups = new int[parentGroupCount * 2];
373 locals = new int[newPattern.localCount];
374 for (int i = 0; i < groups.length; i++)
375 groups[i] = -1;
376 for (int i = 0; i < locals.length; i++)
377 locals[i] = -1;
378 modCount++;
379 return this;
380 }
381
382 /**
427 * Returns the start index of the previous match.
428 *
429 * @return The index of the first character matched
430 *
431 * @throws IllegalStateException
432 * If no match has yet been attempted,
433 * or if the previous match operation failed
434 */
435 public int start() {
436 if (first < 0)
437 throw new IllegalStateException("No match available");
438 return first;
439 }
440
441 /**
442 * Returns the start index of the subsequence captured by the given group
443 * during the previous match operation.
444 *
445 * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left
446 * to right, starting at one. Group zero denotes the entire pattern, so
447 * the expression <i>m.</i><tt>start(0)</tt> is equivalent to
448 * <i>m.</i><tt>start()</tt>. </p>
449 *
450 * @param group
451 * The index of a capturing group in this matcher's pattern
452 *
453 * @return The index of the first character captured by the group,
454 * or <tt>-1</tt> if the match was successful but the group
455 * itself did not match anything
456 *
457 * @throws IllegalStateException
458 * If no match has yet been attempted,
459 * or if the previous match operation failed
460 *
461 * @throws IndexOutOfBoundsException
462 * If there is no capturing group in the pattern
463 * with the given index
464 */
465 public int start(int group) {
466 if (first < 0)
467 throw new IllegalStateException("No match available");
468 if (group < 0 || group > groupCount())
469 throw new IndexOutOfBoundsException("No group " + group);
470 return groups[group * 2];
471 }
472
473 /**
474 * Returns the start index of the subsequence captured by the given
499 * Returns the offset after the last character matched.
500 *
501 * @return The offset after the last character matched
502 *
503 * @throws IllegalStateException
504 * If no match has yet been attempted,
505 * or if the previous match operation failed
506 */
507 public int end() {
508 if (first < 0)
509 throw new IllegalStateException("No match available");
510 return last;
511 }
512
513 /**
514 * Returns the offset after the last character of the subsequence
515 * captured by the given group during the previous match operation.
516 *
517 * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left
518 * to right, starting at one. Group zero denotes the entire pattern, so
519 * the expression <i>m.</i><tt>end(0)</tt> is equivalent to
520 * <i>m.</i><tt>end()</tt>. </p>
521 *
522 * @param group
523 * The index of a capturing group in this matcher's pattern
524 *
525 * @return The offset after the last character captured by the group,
526 * or <tt>-1</tt> if the match was successful
527 * but the group itself did not match anything
528 *
529 * @throws IllegalStateException
530 * If no match has yet been attempted,
531 * or if the previous match operation failed
532 *
533 * @throws IndexOutOfBoundsException
534 * If there is no capturing group in the pattern
535 * with the given index
536 */
537 public int end(int group) {
538 if (first < 0)
539 throw new IllegalStateException("No match available");
540 if (group < 0 || group > groupCount())
541 throw new IndexOutOfBoundsException("No group " + group);
542 return groups[group * 2 + 1];
543 }
544
545 /**
546 * Returns the offset after the last character of the subsequence
554 * or {@code -1} if the match was successful
555 * but the group itself did not match anything
556 *
557 * @throws IllegalStateException
558 * If no match has yet been attempted,
559 * or if the previous match operation failed
560 *
561 * @throws IllegalArgumentException
562 * If there is no capturing group in the pattern
563 * with the given name
564 * @since 1.8
565 */
566 public int end(String name) {
567 return groups[getMatchedGroupIndex(name) * 2 + 1];
568 }
569
570 /**
571 * Returns the input subsequence matched by the previous match.
572 *
573 * <p> For a matcher <i>m</i> with input sequence <i>s</i>,
574 * the expressions <i>m.</i><tt>group()</tt> and
575 * <i>s.</i><tt>substring(</tt><i>m.</i><tt>start(),</tt> <i>m.</i><tt>end())</tt>
576 * are equivalent. </p>
577 *
578 * <p> Note that some patterns, for example <tt>a*</tt>, match the empty
579 * string. This method will return the empty string when the pattern
580 * successfully matches the empty string in the input. </p>
581 *
582 * @return The (possibly empty) subsequence matched by the previous match,
583 * in string form
584 *
585 * @throws IllegalStateException
586 * If no match has yet been attempted,
587 * or if the previous match operation failed
588 */
589 public String group() {
590 return group(0);
591 }
592
593 /**
594 * Returns the input subsequence captured by the given group during the
595 * previous match operation.
596 *
597 * <p> For a matcher <i>m</i>, input sequence <i>s</i>, and group index
598 * <i>g</i>, the expressions <i>m.</i><tt>group(</tt><i>g</i><tt>)</tt> and
599 * <i>s.</i><tt>substring(</tt><i>m.</i><tt>start(</tt><i>g</i><tt>),</tt> <i>m.</i><tt>end(</tt><i>g</i><tt>))</tt>
600 * are equivalent. </p>
601 *
602 * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left
603 * to right, starting at one. Group zero denotes the entire pattern, so
604 * the expression <tt>m.group(0)</tt> is equivalent to <tt>m.group()</tt>.
605 * </p>
606 *
607 * <p> If the match was successful but the group specified failed to match
608 * any part of the input sequence, then <tt>null</tt> is returned. Note
609 * that some groups, for example <tt>(a*)</tt>, match the empty string.
610 * This method will return the empty string when such a group successfully
611 * matches the empty string in the input. </p>
612 *
613 * @param group
614 * The index of a capturing group in this matcher's pattern
615 *
616 * @return The (possibly empty) subsequence captured by the group
617 * during the previous match, or <tt>null</tt> if the group
618 * failed to match part of the input
619 *
620 * @throws IllegalStateException
621 * If no match has yet been attempted,
622 * or if the previous match operation failed
623 *
624 * @throws IndexOutOfBoundsException
625 * If there is no capturing group in the pattern
626 * with the given index
627 */
628 public String group(int group) {
629 if (first < 0)
630 throw new IllegalStateException("No match found");
631 if (group < 0 || group > groupCount())
632 throw new IndexOutOfBoundsException("No group " + group);
633 if ((groups[group*2] == -1) || (groups[group*2+1] == -1))
634 return null;
635 return getSubSequence(groups[group * 2], groups[group * 2 + 1]).toString();
636 }
637
638 /**
639 * Returns the input subsequence captured by the given
640 * <a href="Pattern.html#groupname">named-capturing group</a> during the previous
641 * match operation.
642 *
643 * <p> If the match was successful but the group specified failed to match
644 * any part of the input sequence, then <tt>null</tt> is returned. Note
645 * that some groups, for example <tt>(a*)</tt>, match the empty string.
646 * This method will return the empty string when such a group successfully
647 * matches the empty string in the input. </p>
648 *
649 * @param name
650 * The name of a named-capturing group in this matcher's pattern
651 *
652 * @return The (possibly empty) subsequence captured by the named group
653 * during the previous match, or <tt>null</tt> if the group
654 * failed to match part of the input
655 *
656 * @throws IllegalStateException
657 * If no match has yet been attempted,
658 * or if the previous match operation failed
659 *
660 * @throws IllegalArgumentException
661 * If there is no capturing group in the pattern
662 * with the given name
663 * @since 1.7
664 */
665 public String group(String name) {
666 int group = getMatchedGroupIndex(name);
667 if ((groups[group*2] == -1) || (groups[group*2+1] == -1))
668 return null;
669 return getSubSequence(groups[group * 2], groups[group * 2 + 1]).toString();
670 }
671
672 /**
673 * Returns the number of capturing groups in this matcher's pattern.
674 *
675 * <p> Group zero denotes the entire pattern by convention. It is not
676 * included in this count.
677 *
678 * <p> Any non-negative integer smaller than or equal to the value
679 * returned by this method is guaranteed to be a valid group index for
680 * this matcher. </p>
681 *
682 * @return The number of capturing groups in this matcher's pattern
683 */
684 public int groupCount() {
685 return parentPattern.capturingGroupCount - 1;
686 }
687
688 /**
689 * Attempts to match the entire region against the pattern.
690 *
691 * <p> If the match succeeds then more information can be obtained via the
692 * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods. </p>
693 *
694 * @return <tt>true</tt> if, and only if, the entire region sequence
695 * matches this matcher's pattern
696 */
697 public boolean matches() {
698 return match(from, ENDANCHOR);
699 }
700
701 /**
702 * Attempts to find the next subsequence of the input sequence that matches
703 * the pattern.
704 *
705 * <p> This method starts at the beginning of this matcher's region, or, if
706 * a previous invocation of the method was successful and the matcher has
707 * not since been reset, at the first character not matched by the previous
708 * match.
709 *
710 * <p> If the match succeeds then more information can be obtained via the
711 * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods. </p>
712 *
713 * @return <tt>true</tt> if, and only if, a subsequence of the input
714 * sequence matches this matcher's pattern
715 */
716 public boolean find() {
717 int nextSearchIndex = last;
718 if (nextSearchIndex == first)
719 nextSearchIndex++;
720
721 // If next search starts before region, start it at region
722 if (nextSearchIndex < from)
723 nextSearchIndex = from;
724
725 // If next search starts beyond region then it fails
726 if (nextSearchIndex > to) {
727 for (int i = 0; i < groups.length; i++)
728 groups[i] = -1;
729 return false;
730 }
731 return search(nextSearchIndex);
732 }
733
734 /**
735 * Resets this matcher and then attempts to find the next subsequence of
736 * the input sequence that matches the pattern, starting at the specified
737 * index.
738 *
739 * <p> If the match succeeds then more information can be obtained via the
740 * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods, and subsequent
741 * invocations of the {@link #find()} method will start at the first
742 * character not matched by this match. </p>
743 *
744 * @param start the index to start searching for a match
745 * @throws IndexOutOfBoundsException
746 * If start is less than zero or if start is greater than the
747 * length of the input sequence.
748 *
749 * @return <tt>true</tt> if, and only if, a subsequence of the input
750 * sequence starting at the given index matches this matcher's
751 * pattern
752 */
753 public boolean find(int start) {
754 int limit = getTextLength();
755 if ((start < 0) || (start > limit))
756 throw new IndexOutOfBoundsException("Illegal start index");
757 reset();
758 return search(start);
759 }
760
761 /**
762 * Attempts to match the input sequence, starting at the beginning of the
763 * region, against the pattern.
764 *
765 * <p> Like the {@link #matches matches} method, this method always starts
766 * at the beginning of the region; unlike that method, it does not
767 * require that the entire region be matched.
768 *
769 * <p> If the match succeeds then more information can be obtained via the
770 * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods. </p>
771 *
772 * @return <tt>true</tt> if, and only if, a prefix of the input
773 * sequence matches this matcher's pattern
774 */
775 public boolean lookingAt() {
776 return match(from, NOANCHOR);
777 }
778
779 /**
780 * Returns a literal replacement <code>String</code> for the specified
781 * <code>String</code>.
782 *
783 * This method produces a <code>String</code> that will work
784 * as a literal replacement <code>s</code> in the
785 * <code>appendReplacement</code> method of the {@link Matcher} class.
786 * The <code>String</code> produced will match the sequence of characters
787 * in <code>s</code> treated as a literal sequence. Slashes ('\') and
788 * dollar signs ('$') will be given no special meaning.
789 *
790 * @param s The string to be literalized
791 * @return A literal string replacement
792 * @since 1.5
793 */
794 public static String quoteReplacement(String s) {
795 if ((s.indexOf('\\') == -1) && (s.indexOf('$') == -1))
796 return s;
797 StringBuilder sb = new StringBuilder();
798 for (int i=0; i<s.length(); i++) {
799 char c = s.charAt(i);
800 if (c == '\\' || c == '$') {
801 sb.append('\\');
802 }
803 sb.append(c);
804 }
805 return sb.toString();
806 }
807
808 /**
809 * Implements a non-terminal append-and-replace step.
810 *
811 * <p> This method performs the following actions: </p>
812 *
813 * <ol>
814 *
815 * <li><p> It reads characters from the input sequence, starting at the
816 * append position, and appends them to the given string buffer. It
817 * stops after reading the last character preceding the previous match,
818 * that is, the character at index {@link
819 * #start()} <tt>-</tt> <tt>1</tt>. </p></li>
820 *
821 * <li><p> It appends the given replacement string to the string buffer.
822 * </p></li>
823 *
824 * <li><p> It sets the append position of this matcher to the index of
825 * the last character matched, plus one, that is, to {@link #end()}.
826 * </p></li>
827 *
828 * </ol>
829 *
830 * <p> The replacement string may contain references to subsequences
831 * captured during the previous match: Each occurrence of
832 * <tt>${</tt><i>name</i><tt>}</tt> or <tt>$</tt><i>g</i>
833 * will be replaced by the result of evaluating the corresponding
834 * {@link #group(String) group(name)} or {@link #group(int) group(g)}
835 * respectively. For <tt>$</tt><i>g</i>,
836 * the first number after the <tt>$</tt> is always treated as part of
837 * the group reference. Subsequent numbers are incorporated into g if
838 * they would form a legal group reference. Only the numerals '0'
839 * through '9' are considered as potential components of the group
840 * reference. If the second group matched the string <tt>"foo"</tt>, for
841 * example, then passing the replacement string <tt>"$2bar"</tt> would
842 * cause <tt>"foobar"</tt> to be appended to the string buffer. A dollar
843 * sign (<tt>$</tt>) may be included as a literal in the replacement
844 * string by preceding it with a backslash (<tt>\$</tt>).
845 *
846 * <p> Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in
847 * the replacement string may cause the results to be different than if it
848 * were being treated as a literal replacement string. Dollar signs may be
849 * treated as references to captured subsequences as described above, and
850 * backslashes are used to escape literal characters in the replacement
851 * string.
852 *
853 * <p> This method is intended to be used in a loop together with the
854 * {@link #appendTail appendTail} and {@link #find find} methods. The
855 * following code, for example, writes <tt>one dog two dogs in the
856 * yard</tt> to the standard-output stream: </p>
857 *
858 * <blockquote><pre>
859 * Pattern p = Pattern.compile("cat");
860 * Matcher m = p.matcher("one cat two cats in the yard");
861 * StringBuffer sb = new StringBuffer();
862 * while (m.find()) {
863 * m.appendReplacement(sb, "dog");
864 * }
865 * m.appendTail(sb);
866 * System.out.println(sb.toString());</pre></blockquote>
867 *
868 * @param sb
869 * The target string buffer
870 *
871 * @param replacement
872 * The replacement string
873 *
874 * @return This matcher
875 *
876 * @throws IllegalStateException
894 // Append the intervening text
895 sb.append(text, lastAppendPosition, first);
896 // Append the match substitution
897 sb.append(result);
898 lastAppendPosition = last;
899 modCount++;
900 return this;
901 }
902
903 /**
904 * Implements a non-terminal append-and-replace step.
905 *
906 * <p> This method performs the following actions: </p>
907 *
908 * <ol>
909 *
910 * <li><p> It reads characters from the input sequence, starting at the
911 * append position, and appends them to the given string builder. It
912 * stops after reading the last character preceding the previous match,
913 * that is, the character at index {@link
914 * #start()} <tt>-</tt> <tt>1</tt>. </p></li>
915 *
916 * <li><p> It appends the given replacement string to the string builder.
917 * </p></li>
918 *
919 * <li><p> It sets the append position of this matcher to the index of
920 * the last character matched, plus one, that is, to {@link #end()}.
921 * </p></li>
922 *
923 * </ol>
924 *
925 * <p> The replacement string may contain references to subsequences
926 * captured during the previous match: Each occurrence of
927 * <tt>$</tt><i>g</i> will be replaced by the result of
928 * evaluating {@link #group(int) group}<tt>(</tt><i>g</i><tt>)</tt>.
929 * The first number after the <tt>$</tt> is always treated as part of
930 * the group reference. Subsequent numbers are incorporated into g if
931 * they would form a legal group reference. Only the numerals '0'
932 * through '9' are considered as potential components of the group
933 * reference. If the second group matched the string <tt>"foo"</tt>, for
934 * example, then passing the replacement string <tt>"$2bar"</tt> would
935 * cause <tt>"foobar"</tt> to be appended to the string builder. A dollar
936 * sign (<tt>$</tt>) may be included as a literal in the replacement
937 * string by preceding it with a backslash (<tt>\$</tt>).
938 *
939 * <p> Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in
940 * the replacement string may cause the results to be different than if it
941 * were being treated as a literal replacement string. Dollar signs may be
942 * treated as references to captured subsequences as described above, and
943 * backslashes are used to escape literal characters in the replacement
944 * string.
945 *
946 * <p> This method is intended to be used in a loop together with the
947 * {@link #appendTail appendTail} and {@link #find find} methods. The
948 * following code, for example, writes <tt>one dog two dogs in the
949 * yard</tt> to the standard-output stream: </p>
950 *
951 * <blockquote><pre>
952 * Pattern p = Pattern.compile("cat");
953 * Matcher m = p.matcher("one cat two cats in the yard");
954 * StringBuilder sb = new StringBuilder();
955 * while (m.find()) {
956 * m.appendReplacement(sb, "dog");
957 * }
958 * m.appendTail(sb);
959 * System.out.println(sb.toString());</pre></blockquote>
960 *
961 * @param sb
962 * The target string builder
963 * @param replacement
964 * The replacement string
965 * @return This matcher
966 *
967 * @throws IllegalStateException
968 * If no match has yet been attempted,
969 * or if the previous match operation failed
1117 * @return The target string builder
1118 *
1119 * @since 1.9
1120 */
1121 public StringBuilder appendTail(StringBuilder sb) {
1122 sb.append(text, lastAppendPosition, getTextLength());
1123 return sb;
1124 }
1125
1126 /**
1127 * Replaces every subsequence of the input sequence that matches the
1128 * pattern with the given replacement string.
1129 *
1130 * <p> This method first resets this matcher. It then scans the input
1131 * sequence looking for matches of the pattern. Characters that are not
1132 * part of any match are appended directly to the result string; each match
1133 * is replaced in the result by the replacement string. The replacement
1134 * string may contain references to captured subsequences as in the {@link
1135 * #appendReplacement appendReplacement} method.
1136 *
1137 * <p> Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in
1138 * the replacement string may cause the results to be different than if it
1139 * were being treated as a literal replacement string. Dollar signs may be
1140 * treated as references to captured subsequences as described above, and
1141 * backslashes are used to escape literal characters in the replacement
1142 * string.
1143 *
1144 * <p> Given the regular expression <tt>a*b</tt>, the input
1145 * <tt>"aabfooaabfooabfoob"</tt>, and the replacement string
1146 * <tt>"-"</tt>, an invocation of this method on a matcher for that
1147 * expression would yield the string <tt>"-foo-foo-foo-"</tt>.
1148 *
1149 * <p> Invoking this method changes this matcher's state. If the matcher
1150 * is to be used in further matching operations then it should first be
1151 * reset. </p>
1152 *
1153 * @param replacement
1154 * The replacement string
1155 *
1156 * @return The string constructed by replacing each matching subsequence
1157 * by the replacement string, substituting captured subsequences
1158 * as needed
1159 */
1160 public String replaceAll(String replacement) {
1161 reset();
1162 boolean result = find();
1163 if (result) {
1164 StringBuilder sb = new StringBuilder();
1165 do {
1166 appendReplacement(sb, replacement);
1167 result = find();
1169 appendTail(sb);
1170 return sb.toString();
1171 }
1172 return text.toString();
1173 }
1174
1175 /**
1176 * Replaces every subsequence of the input sequence that matches the
1177 * pattern with the result of applying the given replacer function to the
1178 * match result of this matcher corresponding to that subsequence.
1179 * Exceptions thrown by the function are relayed to the caller.
1180 *
1181 * <p> This method first resets this matcher. It then scans the input
1182 * sequence looking for matches of the pattern. Characters that are not
1183 * part of any match are appended directly to the result string; each match
1184 * is replaced in the result by the applying the replacer function that
1185 * returns a replacement string. Each replacement string may contain
1186 * references to captured subsequences as in the {@link #appendReplacement
1187 * appendReplacement} method.
1188 *
1189 * <p> Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in
1190 * a replacement string may cause the results to be different than if it
1191 * were being treated as a literal replacement string. Dollar signs may be
1192 * treated as references to captured subsequences as described above, and
1193 * backslashes are used to escape literal characters in the replacement
1194 * string.
1195 *
1196 * <p> Given the regular expression <tt>dog</tt>, the input
1197 * <tt>"zzzdogzzzdogzzz"</tt>, and the function
1198 * {@code mr -> mr.group().toUpperCase()}, an invocation of this method on
1199 * a matcher for that expression would yield the string
1200 * <tt>"zzzDOGzzzDOGzzz"</tt>.
1201 *
1202 * <p> Invoking this method changes this matcher's state. If the matcher
1203 * is to be used in further matching operations then it should first be
1204 * reset. </p>
1205 *
1206 * <p> The replacer function should not modify this matcher's state during
1207 * replacement. This method will, on a best-effort basis, throw a
1208 * {@link java.util.ConcurrentModificationException} if such modification is
1209 * detected.
1210 *
1211 * <p> The state of each match result passed to the replacer function is
1212 * guaranteed to be constant only for the duration of the replacer function
1213 * call and only if the replacer function does not modify this matcher's
1214 * state.
1215 *
1216 * @implNote
1217 * This implementation applies the replacer function to this matcher, which
1218 * is an instance of {@code MatchResult}.
1219 *
1220 * @param replacer
1343 if (ec != modCount)
1344 throw new ConcurrentModificationException();
1345 } while (find());
1346 }
1347 }
1348 return StreamSupport.stream(Spliterators.spliteratorUnknownSize(
1349 new MatchResultIterator(), Spliterator.ORDERED | Spliterator.NONNULL), false);
1350 }
1351
1352 /**
1353 * Replaces the first subsequence of the input sequence that matches the
1354 * pattern with the given replacement string.
1355 *
1356 * <p> This method first resets this matcher. It then scans the input
1357 * sequence looking for a match of the pattern. Characters that are not
1358 * part of the match are appended directly to the result string; the match
1359 * is replaced in the result by the replacement string. The replacement
1360 * string may contain references to captured subsequences as in the {@link
1361 * #appendReplacement appendReplacement} method.
1362 *
1363 * <p>Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in
1364 * the replacement string may cause the results to be different than if it
1365 * were being treated as a literal replacement string. Dollar signs may be
1366 * treated as references to captured subsequences as described above, and
1367 * backslashes are used to escape literal characters in the replacement
1368 * string.
1369 *
1370 * <p> Given the regular expression <tt>dog</tt>, the input
1371 * <tt>"zzzdogzzzdogzzz"</tt>, and the replacement string
1372 * <tt>"cat"</tt>, an invocation of this method on a matcher for that
1373 * expression would yield the string <tt>"zzzcatzzzdogzzz"</tt>. </p>
1374 *
1375 * <p> Invoking this method changes this matcher's state. If the matcher
1376 * is to be used in further matching operations then it should first be
1377 * reset. </p>
1378 *
1379 * @param replacement
1380 * The replacement string
1381 * @return The string constructed by replacing the first matching
1382 * subsequence by the replacement string, substituting captured
1383 * subsequences as needed
1384 */
1385 public String replaceFirst(String replacement) {
1386 if (replacement == null)
1387 throw new NullPointerException("replacement");
1388 reset();
1389 if (!find())
1390 return text.toString();
1391 StringBuilder sb = new StringBuilder();
1392 appendReplacement(sb, replacement);
1393 appendTail(sb);
1394 return sb.toString();
1395 }
1396
1397 /**
1398 * Replaces the first subsequence of the input sequence that matches the
1399 * pattern with the result of applying the given replacer function to the
1400 * match result of this matcher corresponding to that subsequence.
1401 * Exceptions thrown by the replace function are relayed to the caller.
1402 *
1403 * <p> This method first resets this matcher. It then scans the input
1404 * sequence looking for a match of the pattern. Characters that are not
1405 * part of the match are appended directly to the result string; the match
1406 * is replaced in the result by the applying the replacer function that
1407 * returns a replacement string. The replacement string may contain
1408 * references to captured subsequences as in the {@link #appendReplacement
1409 * appendReplacement} method.
1410 *
1411 * <p>Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in
1412 * the replacement string may cause the results to be different than if it
1413 * were being treated as a literal replacement string. Dollar signs may be
1414 * treated as references to captured subsequences as described above, and
1415 * backslashes are used to escape literal characters in the replacement
1416 * string.
1417 *
1418 * <p> Given the regular expression <tt>dog</tt>, the input
1419 * <tt>"zzzdogzzzdogzzz"</tt>, and the function
1420 * {@code mr -> mr.group().toUpperCase()}, an invocation of this method on
1421 * a matcher for that expression would yield the string
1422 * <tt>"zzzDOGzzzdogzzz"</tt>.
1423 *
1424 * <p> Invoking this method changes this matcher's state. If the matcher
1425 * is to be used in further matching operations then it should first be
1426 * reset.
1427 *
1428 * <p> The replacer function should not modify this matcher's state during
1429 * replacement. This method will, on a best-effort basis, throw a
1430 * {@link java.util.ConcurrentModificationException} if such modification is
1431 * detected.
1432 *
1433 * <p> The state of the match result passed to the replacer function is
1434 * guaranteed to be constant only for the duration of the replacer function
1435 * call and only if the replacer function does not modify this matcher's
1436 * state.
1437 *
1438 * @implNote
1439 * This implementation applies the replacer function to this matcher, which
1440 * is an instance of {@code MatchResult}.
1441 *
1442 * @param replacer
1454 */
1455 public String replaceFirst(Function<MatchResult, String> replacer) {
1456 Objects.requireNonNull(replacer);
1457 reset();
1458 if (!find())
1459 return text.toString();
1460 StringBuilder sb = new StringBuilder();
1461 int ec = modCount;
1462 String replacement = replacer.apply(this);
1463 if (ec != modCount)
1464 throw new ConcurrentModificationException();
1465 appendReplacement(sb, replacement);
1466 appendTail(sb);
1467 return sb.toString();
1468 }
1469
1470 /**
1471 * Sets the limits of this matcher's region. The region is the part of the
1472 * input sequence that will be searched to find a match. Invoking this
1473 * method resets the matcher, and then sets the region to start at the
1474 * index specified by the <code>start</code> parameter and end at the
1475 * index specified by the <code>end</code> parameter.
1476 *
1477 * <p>Depending on the transparency and anchoring being used (see
1478 * {@link #useTransparentBounds useTransparentBounds} and
1479 * {@link #useAnchoringBounds useAnchoringBounds}), certain constructs such
1480 * as anchors may behave differently at or around the boundaries of the
1481 * region.
1482 *
1483 * @param start
1484 * The index to start searching at (inclusive)
1485 * @param end
1486 * The index to end searching at (exclusive)
1487 * @throws IndexOutOfBoundsException
1488 * If start or end is less than zero, if
1489 * start is greater than the length of the input sequence, if
1490 * end is greater than the length of the input sequence, or if
1491 * start is greater than end.
1492 * @return this matcher
1493 * @since 1.5
1494 */
1495 public Matcher region(int start, int end) {
1517 public int regionStart() {
1518 return from;
1519 }
1520
1521 /**
1522 * Reports the end index (exclusive) of this matcher's region.
1523 * The searches this matcher conducts are limited to finding matches
1524 * within {@link #regionStart regionStart} (inclusive) and
1525 * {@link #regionEnd regionEnd} (exclusive).
1526 *
1527 * @return the ending point of this matcher's region
1528 * @since 1.5
1529 */
1530 public int regionEnd() {
1531 return to;
1532 }
1533
1534 /**
1535 * Queries the transparency of region bounds for this matcher.
1536 *
1537 * <p> This method returns <tt>true</tt> if this matcher uses
1538 * <i>transparent</i> bounds, <tt>false</tt> if it uses <i>opaque</i>
1539 * bounds.
1540 *
1541 * <p> See {@link #useTransparentBounds useTransparentBounds} for a
1542 * description of transparent and opaque bounds.
1543 *
1544 * <p> By default, a matcher uses opaque region boundaries.
1545 *
1546 * @return <tt>true</tt> iff this matcher is using transparent bounds,
1547 * <tt>false</tt> otherwise.
1548 * @see java.util.regex.Matcher#useTransparentBounds(boolean)
1549 * @since 1.5
1550 */
1551 public boolean hasTransparentBounds() {
1552 return transparentBounds;
1553 }
1554
1555 /**
1556 * Sets the transparency of region bounds for this matcher.
1557 *
1558 * <p> Invoking this method with an argument of <tt>true</tt> will set this
1559 * matcher to use <i>transparent</i> bounds. If the boolean
1560 * argument is <tt>false</tt>, then <i>opaque</i> bounds will be used.
1561 *
1562 * <p> Using transparent bounds, the boundaries of this
1563 * matcher's region are transparent to lookahead, lookbehind,
1564 * and boundary matching constructs. Those constructs can see beyond the
1565 * boundaries of the region to see if a match is appropriate.
1566 *
1567 * <p> Using opaque bounds, the boundaries of this matcher's
1568 * region are opaque to lookahead, lookbehind, and boundary matching
1569 * constructs that may try to see beyond them. Those constructs cannot
1570 * look past the boundaries so they will fail to match anything outside
1571 * of the region.
1572 *
1573 * <p> By default, a matcher uses opaque bounds.
1574 *
1575 * @param b a boolean indicating whether to use opaque or transparent
1576 * regions
1577 * @return this matcher
1578 * @see java.util.regex.Matcher#hasTransparentBounds
1579 * @since 1.5
1580 */
1581 public Matcher useTransparentBounds(boolean b) {
1582 transparentBounds = b;
1583 return this;
1584 }
1585
1586 /**
1587 * Queries the anchoring of region bounds for this matcher.
1588 *
1589 * <p> This method returns <tt>true</tt> if this matcher uses
1590 * <i>anchoring</i> bounds, <tt>false</tt> otherwise.
1591 *
1592 * <p> See {@link #useAnchoringBounds useAnchoringBounds} for a
1593 * description of anchoring bounds.
1594 *
1595 * <p> By default, a matcher uses anchoring region boundaries.
1596 *
1597 * @return <tt>true</tt> iff this matcher is using anchoring bounds,
1598 * <tt>false</tt> otherwise.
1599 * @see java.util.regex.Matcher#useAnchoringBounds(boolean)
1600 * @since 1.5
1601 */
1602 public boolean hasAnchoringBounds() {
1603 return anchoringBounds;
1604 }
1605
1606 /**
1607 * Sets the anchoring of region bounds for this matcher.
1608 *
1609 * <p> Invoking this method with an argument of <tt>true</tt> will set this
1610 * matcher to use <i>anchoring</i> bounds. If the boolean
1611 * argument is <tt>false</tt>, then <i>non-anchoring</i> bounds will be
1612 * used.
1613 *
1614 * <p> Using anchoring bounds, the boundaries of this
1615 * matcher's region match anchors such as ^ and $.
1616 *
1617 * <p> Without anchoring bounds, the boundaries of this
1618 * matcher's region will not match anchors such as ^ and $.
1619 *
1620 * <p> By default, a matcher uses anchoring region boundaries.
1621 *
1622 * @param b a boolean indicating whether or not to use anchoring bounds.
1623 * @return this matcher
1624 * @see java.util.regex.Matcher#hasAnchoringBounds
1625 * @since 1.5
1626 */
1627 public Matcher useAnchoringBounds(boolean b) {
1628 anchoringBounds = b;
1629 return this;
1630 }
1631
1632 /**
1633 * <p>Returns the string representation of this matcher. The
1634 * string representation of a <code>Matcher</code> contains information
1635 * that may be useful for debugging. The exact format is unspecified.
1636 *
1637 * @return The string representation of this matcher
1638 * @since 1.5
1639 */
1640 public String toString() {
1641 StringBuilder sb = new StringBuilder();
1642 sb.append("java.util.regex.Matcher")
1643 .append("[pattern=").append(pattern())
1644 .append(" region=")
1645 .append(regionStart()).append(',').append(regionEnd())
1646 .append(" lastmatch=");
1647 if ((first >= 0) && (group() != null)) {
1648 sb.append(group());
1649 }
1650 sb.append(']');
1651 return sb.toString();
1652 }
1653
1654 /**
|
241 locals = new int[parent.localCount];
242
243 // Put fields into initial states
244 reset();
245 }
246
247 /**
248 * Returns the pattern that is interpreted by this matcher.
249 *
250 * @return The pattern for which this matcher was created
251 */
252 public Pattern pattern() {
253 return parentPattern;
254 }
255
256 /**
257 * Returns the match state of this matcher as a {@link MatchResult}.
258 * The result is unaffected by subsequent operations performed upon this
259 * matcher.
260 *
261 * @return a {@code MatchResult} with the state of this matcher
262 * @since 1.5
263 */
264 public MatchResult toMatchResult() {
265 return toMatchResult(text.toString());
266 }
267
268 private MatchResult toMatchResult(String text) {
269 return new ImmutableMatchResult(this.first,
270 this.last,
271 groupCount(),
272 this.groups.clone(),
273 text);
274 }
275
276 private static class ImmutableMatchResult implements MatchResult {
277 private final int first;
278 private final int last;
279 private final int[] groups;
280 private final int groupCount;
281 private final String text;
330 }
331
332 @Override
333 public String group(int group) {
334 checkMatch();
335 if (group < 0 || group > groupCount)
336 throw new IndexOutOfBoundsException("No group " + group);
337 if ((groups[group*2] == -1) || (groups[group*2+1] == -1))
338 return null;
339 return text.subSequence(groups[group * 2], groups[group * 2 + 1]).toString();
340 }
341
342 private void checkMatch() {
343 if (first < 0)
344 throw new IllegalStateException("No match found");
345
346 }
347 }
348
349 /**
350 * Changes the {@code Pattern} that this {@code Matcher} uses to
351 * find matches with.
352 *
353 * <p> This method causes this matcher to lose information
354 * about the groups of the last match that occurred. The
355 * matcher's position in the input is maintained and its
356 * last append position is unaffected.</p>
357 *
358 * @param newPattern
359 * The new pattern used by this matcher
360 * @return This matcher
361 * @throws IllegalArgumentException
362 * If newPattern is {@code null}
363 * @since 1.5
364 */
365 public Matcher usePattern(Pattern newPattern) {
366 if (newPattern == null)
367 throw new IllegalArgumentException("Pattern cannot be null");
368 parentPattern = newPattern;
369
370 // Reallocate state storage
371 int parentGroupCount = Math.max(newPattern.capturingGroupCount, 10);
372 groups = new int[parentGroupCount * 2];
373 locals = new int[newPattern.localCount];
374 for (int i = 0; i < groups.length; i++)
375 groups[i] = -1;
376 for (int i = 0; i < locals.length; i++)
377 locals[i] = -1;
378 modCount++;
379 return this;
380 }
381
382 /**
427 * Returns the start index of the previous match.
428 *
429 * @return The index of the first character matched
430 *
431 * @throws IllegalStateException
432 * If no match has yet been attempted,
433 * or if the previous match operation failed
434 */
435 public int start() {
436 if (first < 0)
437 throw new IllegalStateException("No match available");
438 return first;
439 }
440
441 /**
442 * Returns the start index of the subsequence captured by the given group
443 * during the previous match operation.
444 *
445 * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left
446 * to right, starting at one. Group zero denotes the entire pattern, so
447 * the expression <i>m.</i>{@code start(0)} is equivalent to
448 * <i>m.</i>{@code start()}. </p>
449 *
450 * @param group
451 * The index of a capturing group in this matcher's pattern
452 *
453 * @return The index of the first character captured by the group,
454 * or {@code -1} if the match was successful but the group
455 * itself did not match anything
456 *
457 * @throws IllegalStateException
458 * If no match has yet been attempted,
459 * or if the previous match operation failed
460 *
461 * @throws IndexOutOfBoundsException
462 * If there is no capturing group in the pattern
463 * with the given index
464 */
465 public int start(int group) {
466 if (first < 0)
467 throw new IllegalStateException("No match available");
468 if (group < 0 || group > groupCount())
469 throw new IndexOutOfBoundsException("No group " + group);
470 return groups[group * 2];
471 }
472
473 /**
474 * Returns the start index of the subsequence captured by the given
499 * Returns the offset after the last character matched.
500 *
501 * @return The offset after the last character matched
502 *
503 * @throws IllegalStateException
504 * If no match has yet been attempted,
505 * or if the previous match operation failed
506 */
507 public int end() {
508 if (first < 0)
509 throw new IllegalStateException("No match available");
510 return last;
511 }
512
513 /**
514 * Returns the offset after the last character of the subsequence
515 * captured by the given group during the previous match operation.
516 *
517 * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left
518 * to right, starting at one. Group zero denotes the entire pattern, so
519 * the expression <i>m.</i>{@code end(0)} is equivalent to
520 * <i>m.</i>{@code end()}. </p>
521 *
522 * @param group
523 * The index of a capturing group in this matcher's pattern
524 *
525 * @return The offset after the last character captured by the group,
526 * or {@code -1} if the match was successful
527 * but the group itself did not match anything
528 *
529 * @throws IllegalStateException
530 * If no match has yet been attempted,
531 * or if the previous match operation failed
532 *
533 * @throws IndexOutOfBoundsException
534 * If there is no capturing group in the pattern
535 * with the given index
536 */
537 public int end(int group) {
538 if (first < 0)
539 throw new IllegalStateException("No match available");
540 if (group < 0 || group > groupCount())
541 throw new IndexOutOfBoundsException("No group " + group);
542 return groups[group * 2 + 1];
543 }
544
545 /**
546 * Returns the offset after the last character of the subsequence
554 * or {@code -1} if the match was successful
555 * but the group itself did not match anything
556 *
557 * @throws IllegalStateException
558 * If no match has yet been attempted,
559 * or if the previous match operation failed
560 *
561 * @throws IllegalArgumentException
562 * If there is no capturing group in the pattern
563 * with the given name
564 * @since 1.8
565 */
566 public int end(String name) {
567 return groups[getMatchedGroupIndex(name) * 2 + 1];
568 }
569
570 /**
571 * Returns the input subsequence matched by the previous match.
572 *
573 * <p> For a matcher <i>m</i> with input sequence <i>s</i>,
574 * the expressions <i>m.</i>{@code group()} and
575 * <i>s.</i>{@code substring(}<i>m.</i>{@code start(),} <i>m.</i>{@code end())}
576 * are equivalent. </p>
577 *
578 * <p> Note that some patterns, for example {@code a*}, match the empty
579 * string. This method will return the empty string when the pattern
580 * successfully matches the empty string in the input. </p>
581 *
582 * @return The (possibly empty) subsequence matched by the previous match,
583 * in string form
584 *
585 * @throws IllegalStateException
586 * If no match has yet been attempted,
587 * or if the previous match operation failed
588 */
589 public String group() {
590 return group(0);
591 }
592
593 /**
594 * Returns the input subsequence captured by the given group during the
595 * previous match operation.
596 *
597 * <p> For a matcher <i>m</i>, input sequence <i>s</i>, and group index
598 * <i>g</i>, the expressions <i>m.</i>{@code group(}<i>g</i>{@code )} and
599 * <i>s.</i>{@code substring(}<i>m.</i>{@code start(}<i>g</i>{@code
600 * ),} <i>m.</i>{@code end(}<i>g</i>{@code ))}
601 * are equivalent. </p>
602 *
603 * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left
604 * to right, starting at one. Group zero denotes the entire pattern, so
605 * the expression {@code m.group(0)} is equivalent to {@code m.group()}.
606 * </p>
607 *
608 * <p> If the match was successful but the group specified failed to match
609 * any part of the input sequence, then {@code null} is returned. Note
610 * that some groups, for example {@code (a*)}, match the empty string.
611 * This method will return the empty string when such a group successfully
612 * matches the empty string in the input. </p>
613 *
614 * @param group
615 * The index of a capturing group in this matcher's pattern
616 *
617 * @return The (possibly empty) subsequence captured by the group
618 * during the previous match, or {@code null} if the group
619 * failed to match part of the input
620 *
621 * @throws IllegalStateException
622 * If no match has yet been attempted,
623 * or if the previous match operation failed
624 *
625 * @throws IndexOutOfBoundsException
626 * If there is no capturing group in the pattern
627 * with the given index
628 */
629 public String group(int group) {
630 if (first < 0)
631 throw new IllegalStateException("No match found");
632 if (group < 0 || group > groupCount())
633 throw new IndexOutOfBoundsException("No group " + group);
634 if ((groups[group*2] == -1) || (groups[group*2+1] == -1))
635 return null;
636 return getSubSequence(groups[group * 2], groups[group * 2 + 1]).toString();
637 }
638
639 /**
640 * Returns the input subsequence captured by the given
641 * <a href="Pattern.html#groupname">named-capturing group</a> during the previous
642 * match operation.
643 *
644 * <p> If the match was successful but the group specified failed to match
645 * any part of the input sequence, then {@code null} is returned. Note
646 * that some groups, for example {@code (a*)}, match the empty string.
647 * This method will return the empty string when such a group successfully
648 * matches the empty string in the input. </p>
649 *
650 * @param name
651 * The name of a named-capturing group in this matcher's pattern
652 *
653 * @return The (possibly empty) subsequence captured by the named group
654 * during the previous match, or {@code null} if the group
655 * failed to match part of the input
656 *
657 * @throws IllegalStateException
658 * If no match has yet been attempted,
659 * or if the previous match operation failed
660 *
661 * @throws IllegalArgumentException
662 * If there is no capturing group in the pattern
663 * with the given name
664 * @since 1.7
665 */
666 public String group(String name) {
667 int group = getMatchedGroupIndex(name);
668 if ((groups[group*2] == -1) || (groups[group*2+1] == -1))
669 return null;
670 return getSubSequence(groups[group * 2], groups[group * 2 + 1]).toString();
671 }
672
673 /**
674 * Returns the number of capturing groups in this matcher's pattern.
675 *
676 * <p> Group zero denotes the entire pattern by convention. It is not
677 * included in this count.
678 *
679 * <p> Any non-negative integer smaller than or equal to the value
680 * returned by this method is guaranteed to be a valid group index for
681 * this matcher. </p>
682 *
683 * @return The number of capturing groups in this matcher's pattern
684 */
685 public int groupCount() {
686 return parentPattern.capturingGroupCount - 1;
687 }
688
689 /**
690 * Attempts to match the entire region against the pattern.
691 *
692 * <p> If the match succeeds then more information can be obtained via the
693 * {@code start}, {@code end}, and {@code group} methods. </p>
694 *
695 * @return {@code true} if, and only if, the entire region sequence
696 * matches this matcher's pattern
697 */
698 public boolean matches() {
699 return match(from, ENDANCHOR);
700 }
701
702 /**
703 * Attempts to find the next subsequence of the input sequence that matches
704 * the pattern.
705 *
706 * <p> This method starts at the beginning of this matcher's region, or, if
707 * a previous invocation of the method was successful and the matcher has
708 * not since been reset, at the first character not matched by the previous
709 * match.
710 *
711 * <p> If the match succeeds then more information can be obtained via the
712 * {@code start}, {@code end}, and {@code group} methods. </p>
713 *
714 * @return {@code true} if, and only if, a subsequence of the input
715 * sequence matches this matcher's pattern
716 */
717 public boolean find() {
718 int nextSearchIndex = last;
719 if (nextSearchIndex == first)
720 nextSearchIndex++;
721
722 // If next search starts before region, start it at region
723 if (nextSearchIndex < from)
724 nextSearchIndex = from;
725
726 // If next search starts beyond region then it fails
727 if (nextSearchIndex > to) {
728 for (int i = 0; i < groups.length; i++)
729 groups[i] = -1;
730 return false;
731 }
732 return search(nextSearchIndex);
733 }
734
735 /**
736 * Resets this matcher and then attempts to find the next subsequence of
737 * the input sequence that matches the pattern, starting at the specified
738 * index.
739 *
740 * <p> If the match succeeds then more information can be obtained via the
741 * {@code start}, {@code end}, and {@code group} methods, and subsequent
742 * invocations of the {@link #find()} method will start at the first
743 * character not matched by this match. </p>
744 *
745 * @param start the index to start searching for a match
746 * @throws IndexOutOfBoundsException
747 * If start is less than zero or if start is greater than the
748 * length of the input sequence.
749 *
750 * @return {@code true} if, and only if, a subsequence of the input
751 * sequence starting at the given index matches this matcher's
752 * pattern
753 */
754 public boolean find(int start) {
755 int limit = getTextLength();
756 if ((start < 0) || (start > limit))
757 throw new IndexOutOfBoundsException("Illegal start index");
758 reset();
759 return search(start);
760 }
761
762 /**
763 * Attempts to match the input sequence, starting at the beginning of the
764 * region, against the pattern.
765 *
766 * <p> Like the {@link #matches matches} method, this method always starts
767 * at the beginning of the region; unlike that method, it does not
768 * require that the entire region be matched.
769 *
770 * <p> If the match succeeds then more information can be obtained via the
771 * {@code start}, {@code end}, and {@code group} methods. </p>
772 *
773 * @return {@code true} if, and only if, a prefix of the input
774 * sequence matches this matcher's pattern
775 */
776 public boolean lookingAt() {
777 return match(from, NOANCHOR);
778 }
779
780 /**
781 * Returns a literal replacement {@code String} for the specified
782 * {@code String}.
783 *
784 * This method produces a {@code String} that will work
785 * as a literal replacement {@code s} in the
786 * {@code appendReplacement} method of the {@link Matcher} class.
787 * The {@code String} produced will match the sequence of characters
788 * in {@code s} treated as a literal sequence. Slashes ('\') and
789 * dollar signs ('$') will be given no special meaning.
790 *
791 * @param s The string to be literalized
792 * @return A literal string replacement
793 * @since 1.5
794 */
795 public static String quoteReplacement(String s) {
796 if ((s.indexOf('\\') == -1) && (s.indexOf('$') == -1))
797 return s;
798 StringBuilder sb = new StringBuilder();
799 for (int i=0; i<s.length(); i++) {
800 char c = s.charAt(i);
801 if (c == '\\' || c == '$') {
802 sb.append('\\');
803 }
804 sb.append(c);
805 }
806 return sb.toString();
807 }
808
809 /**
810 * Implements a non-terminal append-and-replace step.
811 *
812 * <p> This method performs the following actions: </p>
813 *
814 * <ol>
815 *
816 * <li><p> It reads characters from the input sequence, starting at the
817 * append position, and appends them to the given string buffer. It
818 * stops after reading the last character preceding the previous match,
819 * that is, the character at index {@link
820 * #start()} {@code -} {@code 1}. </p></li>
821 *
822 * <li><p> It appends the given replacement string to the string buffer.
823 * </p></li>
824 *
825 * <li><p> It sets the append position of this matcher to the index of
826 * the last character matched, plus one, that is, to {@link #end()}.
827 * </p></li>
828 *
829 * </ol>
830 *
831 * <p> The replacement string may contain references to subsequences
832 * captured during the previous match: Each occurrence of
833 * <code>${</code><i>name</i><code>}</code> or {@code $}<i>g</i>
834 * will be replaced by the result of evaluating the corresponding
835 * {@link #group(String) group(name)} or {@link #group(int) group(g)}
836 * respectively. For {@code $}<i>g</i>,
837 * the first number after the {@code $} is always treated as part of
838 * the group reference. Subsequent numbers are incorporated into g if
839 * they would form a legal group reference. Only the numerals '0'
840 * through '9' are considered as potential components of the group
841 * reference. If the second group matched the string {@code "foo"}, for
842 * example, then passing the replacement string {@code "$2bar"} would
843 * cause {@code "foobar"} to be appended to the string buffer. A dollar
844 * sign ({@code $}) may be included as a literal in the replacement
845 * string by preceding it with a backslash ({@code \$}).
846 *
847 * <p> Note that backslashes ({@code \}) and dollar signs ({@code $}) in
848 * the replacement string may cause the results to be different than if it
849 * were being treated as a literal replacement string. Dollar signs may be
850 * treated as references to captured subsequences as described above, and
851 * backslashes are used to escape literal characters in the replacement
852 * string.
853 *
854 * <p> This method is intended to be used in a loop together with the
855 * {@link #appendTail appendTail} and {@link #find find} methods. The
856 * following code, for example, writes {@code one dog two dogs in the
857 * yard} to the standard-output stream: </p>
858 *
859 * <blockquote><pre>
860 * Pattern p = Pattern.compile("cat");
861 * Matcher m = p.matcher("one cat two cats in the yard");
862 * StringBuffer sb = new StringBuffer();
863 * while (m.find()) {
864 * m.appendReplacement(sb, "dog");
865 * }
866 * m.appendTail(sb);
867 * System.out.println(sb.toString());</pre></blockquote>
868 *
869 * @param sb
870 * The target string buffer
871 *
872 * @param replacement
873 * The replacement string
874 *
875 * @return This matcher
876 *
877 * @throws IllegalStateException
895 // Append the intervening text
896 sb.append(text, lastAppendPosition, first);
897 // Append the match substitution
898 sb.append(result);
899 lastAppendPosition = last;
900 modCount++;
901 return this;
902 }
903
904 /**
905 * Implements a non-terminal append-and-replace step.
906 *
907 * <p> This method performs the following actions: </p>
908 *
909 * <ol>
910 *
911 * <li><p> It reads characters from the input sequence, starting at the
912 * append position, and appends them to the given string builder. It
913 * stops after reading the last character preceding the previous match,
914 * that is, the character at index {@link
915 * #start()} {@code -} {@code 1}. </p></li>
916 *
917 * <li><p> It appends the given replacement string to the string builder.
918 * </p></li>
919 *
920 * <li><p> It sets the append position of this matcher to the index of
921 * the last character matched, plus one, that is, to {@link #end()}.
922 * </p></li>
923 *
924 * </ol>
925 *
926 * <p> The replacement string may contain references to subsequences
927 * captured during the previous match: Each occurrence of
928 * {@code $}<i>g</i> will be replaced by the result of
929 * evaluating {@link #group(int) group}{@code (}<i>g</i>{@code )}.
930 * The first number after the {@code $} is always treated as part of
931 * the group reference. Subsequent numbers are incorporated into g if
932 * they would form a legal group reference. Only the numerals '0'
933 * through '9' are considered as potential components of the group
934 * reference. If the second group matched the string {@code "foo"}, for
935 * example, then passing the replacement string {@code "$2bar"} would
936 * cause {@code "foobar"} to be appended to the string builder. A dollar
937 * sign ({@code $}) may be included as a literal in the replacement
938 * string by preceding it with a backslash ({@code \$}).
939 *
940 * <p> Note that backslashes ({@code \}) and dollar signs ({@code $}) in
941 * the replacement string may cause the results to be different than if it
942 * were being treated as a literal replacement string. Dollar signs may be
943 * treated as references to captured subsequences as described above, and
944 * backslashes are used to escape literal characters in the replacement
945 * string.
946 *
947 * <p> This method is intended to be used in a loop together with the
948 * {@link #appendTail appendTail} and {@link #find find} methods. The
949 * following code, for example, writes {@code one dog two dogs in the
950 * yard} to the standard-output stream: </p>
951 *
952 * <blockquote><pre>
953 * Pattern p = Pattern.compile("cat");
954 * Matcher m = p.matcher("one cat two cats in the yard");
955 * StringBuilder sb = new StringBuilder();
956 * while (m.find()) {
957 * m.appendReplacement(sb, "dog");
958 * }
959 * m.appendTail(sb);
960 * System.out.println(sb.toString());</pre></blockquote>
961 *
962 * @param sb
963 * The target string builder
964 * @param replacement
965 * The replacement string
966 * @return This matcher
967 *
968 * @throws IllegalStateException
969 * If no match has yet been attempted,
970 * or if the previous match operation failed
1118 * @return The target string builder
1119 *
1120 * @since 1.9
1121 */
1122 public StringBuilder appendTail(StringBuilder sb) {
1123 sb.append(text, lastAppendPosition, getTextLength());
1124 return sb;
1125 }
1126
1127 /**
1128 * Replaces every subsequence of the input sequence that matches the
1129 * pattern with the given replacement string.
1130 *
1131 * <p> This method first resets this matcher. It then scans the input
1132 * sequence looking for matches of the pattern. Characters that are not
1133 * part of any match are appended directly to the result string; each match
1134 * is replaced in the result by the replacement string. The replacement
1135 * string may contain references to captured subsequences as in the {@link
1136 * #appendReplacement appendReplacement} method.
1137 *
1138 * <p> Note that backslashes ({@code \}) and dollar signs ({@code $}) in
1139 * the replacement string may cause the results to be different than if it
1140 * were being treated as a literal replacement string. Dollar signs may be
1141 * treated as references to captured subsequences as described above, and
1142 * backslashes are used to escape literal characters in the replacement
1143 * string.
1144 *
1145 * <p> Given the regular expression {@code a*b}, the input
1146 * {@code "aabfooaabfooabfoob"}, and the replacement string
1147 * {@code "-"}, an invocation of this method on a matcher for that
1148 * expression would yield the string {@code "-foo-foo-foo-"}.
1149 *
1150 * <p> Invoking this method changes this matcher's state. If the matcher
1151 * is to be used in further matching operations then it should first be
1152 * reset. </p>
1153 *
1154 * @param replacement
1155 * The replacement string
1156 *
1157 * @return The string constructed by replacing each matching subsequence
1158 * by the replacement string, substituting captured subsequences
1159 * as needed
1160 */
1161 public String replaceAll(String replacement) {
1162 reset();
1163 boolean result = find();
1164 if (result) {
1165 StringBuilder sb = new StringBuilder();
1166 do {
1167 appendReplacement(sb, replacement);
1168 result = find();
1170 appendTail(sb);
1171 return sb.toString();
1172 }
1173 return text.toString();
1174 }
1175
1176 /**
1177 * Replaces every subsequence of the input sequence that matches the
1178 * pattern with the result of applying the given replacer function to the
1179 * match result of this matcher corresponding to that subsequence.
1180 * Exceptions thrown by the function are relayed to the caller.
1181 *
1182 * <p> This method first resets this matcher. It then scans the input
1183 * sequence looking for matches of the pattern. Characters that are not
1184 * part of any match are appended directly to the result string; each match
1185 * is replaced in the result by the applying the replacer function that
1186 * returns a replacement string. Each replacement string may contain
1187 * references to captured subsequences as in the {@link #appendReplacement
1188 * appendReplacement} method.
1189 *
1190 * <p> Note that backslashes ({@code \}) and dollar signs ({@code $}) in
1191 * a replacement string may cause the results to be different than if it
1192 * were being treated as a literal replacement string. Dollar signs may be
1193 * treated as references to captured subsequences as described above, and
1194 * backslashes are used to escape literal characters in the replacement
1195 * string.
1196 *
1197 * <p> Given the regular expression {@code dog}, the input
1198 * {@code "zzzdogzzzdogzzz"}, and the function
1199 * {@code mr -> mr.group().toUpperCase()}, an invocation of this method on
1200 * a matcher for that expression would yield the string
1201 * {@code "zzzDOGzzzDOGzzz"}.
1202 *
1203 * <p> Invoking this method changes this matcher's state. If the matcher
1204 * is to be used in further matching operations then it should first be
1205 * reset. </p>
1206 *
1207 * <p> The replacer function should not modify this matcher's state during
1208 * replacement. This method will, on a best-effort basis, throw a
1209 * {@link java.util.ConcurrentModificationException} if such modification is
1210 * detected.
1211 *
1212 * <p> The state of each match result passed to the replacer function is
1213 * guaranteed to be constant only for the duration of the replacer function
1214 * call and only if the replacer function does not modify this matcher's
1215 * state.
1216 *
1217 * @implNote
1218 * This implementation applies the replacer function to this matcher, which
1219 * is an instance of {@code MatchResult}.
1220 *
1221 * @param replacer
1344 if (ec != modCount)
1345 throw new ConcurrentModificationException();
1346 } while (find());
1347 }
1348 }
1349 return StreamSupport.stream(Spliterators.spliteratorUnknownSize(
1350 new MatchResultIterator(), Spliterator.ORDERED | Spliterator.NONNULL), false);
1351 }
1352
1353 /**
1354 * Replaces the first subsequence of the input sequence that matches the
1355 * pattern with the given replacement string.
1356 *
1357 * <p> This method first resets this matcher. It then scans the input
1358 * sequence looking for a match of the pattern. Characters that are not
1359 * part of the match are appended directly to the result string; the match
1360 * is replaced in the result by the replacement string. The replacement
1361 * string may contain references to captured subsequences as in the {@link
1362 * #appendReplacement appendReplacement} method.
1363 *
1364 * <p>Note that backslashes ({@code \}) and dollar signs ({@code $}) in
1365 * the replacement string may cause the results to be different than if it
1366 * were being treated as a literal replacement string. Dollar signs may be
1367 * treated as references to captured subsequences as described above, and
1368 * backslashes are used to escape literal characters in the replacement
1369 * string.
1370 *
1371 * <p> Given the regular expression {@code dog}, the input
1372 * {@code "zzzdogzzzdogzzz"}, and the replacement string
1373 * {@code "cat"}, an invocation of this method on a matcher for that
1374 * expression would yield the string {@code "zzzcatzzzdogzzz"}. </p>
1375 *
1376 * <p> Invoking this method changes this matcher's state. If the matcher
1377 * is to be used in further matching operations then it should first be
1378 * reset. </p>
1379 *
1380 * @param replacement
1381 * The replacement string
1382 * @return The string constructed by replacing the first matching
1383 * subsequence by the replacement string, substituting captured
1384 * subsequences as needed
1385 */
1386 public String replaceFirst(String replacement) {
1387 if (replacement == null)
1388 throw new NullPointerException("replacement");
1389 reset();
1390 if (!find())
1391 return text.toString();
1392 StringBuilder sb = new StringBuilder();
1393 appendReplacement(sb, replacement);
1394 appendTail(sb);
1395 return sb.toString();
1396 }
1397
1398 /**
1399 * Replaces the first subsequence of the input sequence that matches the
1400 * pattern with the result of applying the given replacer function to the
1401 * match result of this matcher corresponding to that subsequence.
1402 * Exceptions thrown by the replace function are relayed to the caller.
1403 *
1404 * <p> This method first resets this matcher. It then scans the input
1405 * sequence looking for a match of the pattern. Characters that are not
1406 * part of the match are appended directly to the result string; the match
1407 * is replaced in the result by the applying the replacer function that
1408 * returns a replacement string. The replacement string may contain
1409 * references to captured subsequences as in the {@link #appendReplacement
1410 * appendReplacement} method.
1411 *
1412 * <p>Note that backslashes ({@code \}) and dollar signs ({@code $}) in
1413 * the replacement string may cause the results to be different than if it
1414 * were being treated as a literal replacement string. Dollar signs may be
1415 * treated as references to captured subsequences as described above, and
1416 * backslashes are used to escape literal characters in the replacement
1417 * string.
1418 *
1419 * <p> Given the regular expression {@code dog}, the input
1420 * {@code "zzzdogzzzdogzzz"}, and the function
1421 * {@code mr -> mr.group().toUpperCase()}, an invocation of this method on
1422 * a matcher for that expression would yield the string
1423 * {@code "zzzDOGzzzdogzzz"}.
1424 *
1425 * <p> Invoking this method changes this matcher's state. If the matcher
1426 * is to be used in further matching operations then it should first be
1427 * reset.
1428 *
1429 * <p> The replacer function should not modify this matcher's state during
1430 * replacement. This method will, on a best-effort basis, throw a
1431 * {@link java.util.ConcurrentModificationException} if such modification is
1432 * detected.
1433 *
1434 * <p> The state of the match result passed to the replacer function is
1435 * guaranteed to be constant only for the duration of the replacer function
1436 * call and only if the replacer function does not modify this matcher's
1437 * state.
1438 *
1439 * @implNote
1440 * This implementation applies the replacer function to this matcher, which
1441 * is an instance of {@code MatchResult}.
1442 *
1443 * @param replacer
1455 */
1456 public String replaceFirst(Function<MatchResult, String> replacer) {
1457 Objects.requireNonNull(replacer);
1458 reset();
1459 if (!find())
1460 return text.toString();
1461 StringBuilder sb = new StringBuilder();
1462 int ec = modCount;
1463 String replacement = replacer.apply(this);
1464 if (ec != modCount)
1465 throw new ConcurrentModificationException();
1466 appendReplacement(sb, replacement);
1467 appendTail(sb);
1468 return sb.toString();
1469 }
1470
1471 /**
1472 * Sets the limits of this matcher's region. The region is the part of the
1473 * input sequence that will be searched to find a match. Invoking this
1474 * method resets the matcher, and then sets the region to start at the
1475 * index specified by the {@code start} parameter and end at the
1476 * index specified by the {@code end} parameter.
1477 *
1478 * <p>Depending on the transparency and anchoring being used (see
1479 * {@link #useTransparentBounds useTransparentBounds} and
1480 * {@link #useAnchoringBounds useAnchoringBounds}), certain constructs such
1481 * as anchors may behave differently at or around the boundaries of the
1482 * region.
1483 *
1484 * @param start
1485 * The index to start searching at (inclusive)
1486 * @param end
1487 * The index to end searching at (exclusive)
1488 * @throws IndexOutOfBoundsException
1489 * If start or end is less than zero, if
1490 * start is greater than the length of the input sequence, if
1491 * end is greater than the length of the input sequence, or if
1492 * start is greater than end.
1493 * @return this matcher
1494 * @since 1.5
1495 */
1496 public Matcher region(int start, int end) {
1518 public int regionStart() {
1519 return from;
1520 }
1521
1522 /**
1523 * Reports the end index (exclusive) of this matcher's region.
1524 * The searches this matcher conducts are limited to finding matches
1525 * within {@link #regionStart regionStart} (inclusive) and
1526 * {@link #regionEnd regionEnd} (exclusive).
1527 *
1528 * @return the ending point of this matcher's region
1529 * @since 1.5
1530 */
1531 public int regionEnd() {
1532 return to;
1533 }
1534
1535 /**
1536 * Queries the transparency of region bounds for this matcher.
1537 *
1538 * <p> This method returns {@code true} if this matcher uses
1539 * <i>transparent</i> bounds, {@code false} if it uses <i>opaque</i>
1540 * bounds.
1541 *
1542 * <p> See {@link #useTransparentBounds useTransparentBounds} for a
1543 * description of transparent and opaque bounds.
1544 *
1545 * <p> By default, a matcher uses opaque region boundaries.
1546 *
1547 * @return {@code true} iff this matcher is using transparent bounds,
1548 * {@code false} otherwise.
1549 * @see java.util.regex.Matcher#useTransparentBounds(boolean)
1550 * @since 1.5
1551 */
1552 public boolean hasTransparentBounds() {
1553 return transparentBounds;
1554 }
1555
1556 /**
1557 * Sets the transparency of region bounds for this matcher.
1558 *
1559 * <p> Invoking this method with an argument of {@code true} will set this
1560 * matcher to use <i>transparent</i> bounds. If the boolean
1561 * argument is {@code false}, then <i>opaque</i> bounds will be used.
1562 *
1563 * <p> Using transparent bounds, the boundaries of this
1564 * matcher's region are transparent to lookahead, lookbehind,
1565 * and boundary matching constructs. Those constructs can see beyond the
1566 * boundaries of the region to see if a match is appropriate.
1567 *
1568 * <p> Using opaque bounds, the boundaries of this matcher's
1569 * region are opaque to lookahead, lookbehind, and boundary matching
1570 * constructs that may try to see beyond them. Those constructs cannot
1571 * look past the boundaries so they will fail to match anything outside
1572 * of the region.
1573 *
1574 * <p> By default, a matcher uses opaque bounds.
1575 *
1576 * @param b a boolean indicating whether to use opaque or transparent
1577 * regions
1578 * @return this matcher
1579 * @see java.util.regex.Matcher#hasTransparentBounds
1580 * @since 1.5
1581 */
1582 public Matcher useTransparentBounds(boolean b) {
1583 transparentBounds = b;
1584 return this;
1585 }
1586
1587 /**
1588 * Queries the anchoring of region bounds for this matcher.
1589 *
1590 * <p> This method returns {@code true} if this matcher uses
1591 * <i>anchoring</i> bounds, {@code false} otherwise.
1592 *
1593 * <p> See {@link #useAnchoringBounds useAnchoringBounds} for a
1594 * description of anchoring bounds.
1595 *
1596 * <p> By default, a matcher uses anchoring region boundaries.
1597 *
1598 * @return {@code true} iff this matcher is using anchoring bounds,
1599 * {@code false} otherwise.
1600 * @see java.util.regex.Matcher#useAnchoringBounds(boolean)
1601 * @since 1.5
1602 */
1603 public boolean hasAnchoringBounds() {
1604 return anchoringBounds;
1605 }
1606
1607 /**
1608 * Sets the anchoring of region bounds for this matcher.
1609 *
1610 * <p> Invoking this method with an argument of {@code true} will set this
1611 * matcher to use <i>anchoring</i> bounds. If the boolean
1612 * argument is {@code false}, then <i>non-anchoring</i> bounds will be
1613 * used.
1614 *
1615 * <p> Using anchoring bounds, the boundaries of this
1616 * matcher's region match anchors such as ^ and $.
1617 *
1618 * <p> Without anchoring bounds, the boundaries of this
1619 * matcher's region will not match anchors such as ^ and $.
1620 *
1621 * <p> By default, a matcher uses anchoring region boundaries.
1622 *
1623 * @param b a boolean indicating whether or not to use anchoring bounds.
1624 * @return this matcher
1625 * @see java.util.regex.Matcher#hasAnchoringBounds
1626 * @since 1.5
1627 */
1628 public Matcher useAnchoringBounds(boolean b) {
1629 anchoringBounds = b;
1630 return this;
1631 }
1632
1633 /**
1634 * <p>Returns the string representation of this matcher. The
1635 * string representation of a {@code Matcher} contains information
1636 * that may be useful for debugging. The exact format is unspecified.
1637 *
1638 * @return The string representation of this matcher
1639 * @since 1.5
1640 */
1641 public String toString() {
1642 StringBuilder sb = new StringBuilder();
1643 sb.append("java.util.regex.Matcher")
1644 .append("[pattern=").append(pattern())
1645 .append(" region=")
1646 .append(regionStart()).append(',').append(regionEnd())
1647 .append(" lastmatch=");
1648 if ((first >= 0) && (group() != null)) {
1649 sb.append(group());
1650 }
1651 sb.append(']');
1652 return sb.toString();
1653 }
1654
1655 /**
|