src/share/classes/java/util/regex/Matcher.java

Print this page




 474      *
 475      * @throws  IllegalStateException
 476      *          If no match has yet been attempted,
 477      *          or if the previous match operation failed
 478      *
 479      * @throws  IndexOutOfBoundsException
 480      *          If there is no capturing group in the pattern
 481      *          with the given index
 482      */
 483     public String group(int group) {
 484         if (first < 0)
 485             throw new IllegalStateException("No match found");
 486         if (group < 0 || group > groupCount())
 487             throw new IndexOutOfBoundsException("No group " + group);
 488         if ((groups[group*2] == -1) || (groups[group*2+1] == -1))
 489             return null;
 490         return getSubSequence(groups[group * 2], groups[group * 2 + 1]).toString();
 491     }
 492 
 493     /**







































 494      * Returns the number of capturing groups in this matcher's pattern.
 495      *
 496      * <p> Group zero denotes the entire pattern by convention. It is not
 497      * included in this count.
 498      *
 499      * <p> Any non-negative integer smaller than or equal to the value
 500      * returned by this method is guaranteed to be a valid group index for
 501      * this matcher.  </p>
 502      *
 503      * @return The number of capturing groups in this matcher's pattern
 504      */
 505     public int groupCount() {
 506         return parentPattern.capturingGroupCount - 1;
 507     }
 508 
 509     /**
 510      * Attempts to match the entire region against the pattern.
 511      *
 512      * <p> If the match succeeds then more information can be obtained via the
 513      * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods.  </p>


 632      *
 633      * <ol>
 634      *
 635      *   <li><p> It reads characters from the input sequence, starting at the
 636      *   append position, and appends them to the given string buffer.  It
 637      *   stops after reading the last character preceding the previous match,
 638      *   that is, the character at index {@link
 639      *   #start()}&nbsp;<tt>-</tt>&nbsp;<tt>1</tt>.  </p></li>
 640      *
 641      *   <li><p> It appends the given replacement string to the string buffer.
 642      *   </p></li>
 643      *
 644      *   <li><p> It sets the append position of this matcher to the index of
 645      *   the last character matched, plus one, that is, to {@link #end()}.
 646      *   </p></li>
 647      *
 648      * </ol>
 649      *
 650      * <p> The replacement string may contain references to subsequences
 651      * captured during the previous match: Each occurrence of
 652      * <tt>$</tt><i>g</i><tt></tt> will be replaced by the result of
 653      * evaluating {@link #group(int) group}<tt>(</tt><i>g</i><tt>)</tt>.
 654      * The first number after the <tt>$</tt> is always treated as part of


 655      * the group reference. Subsequent numbers are incorporated into g if
 656      * they would form a legal group reference. Only the numerals '0'
 657      * through '9' are considered as potential components of the group
 658      * reference. If the second group matched the string <tt>"foo"</tt>, for
 659      * example, then passing the replacement string <tt>"$2bar"</tt> would
 660      * cause <tt>"foobar"</tt> to be appended to the string buffer. A dollar
 661      * sign (<tt>$</tt>) may be included as a literal in the replacement
 662      * string by preceding it with a backslash (<tt>\$</tt>).
 663      *
 664      * <p> Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in
 665      * the replacement string may cause the results to be different than if it
 666      * were being treated as a literal replacement string. Dollar signs may be
 667      * treated as references to captured subsequences as described above, and
 668      * backslashes are used to escape literal characters in the replacement
 669      * string.
 670      *
 671      * <p> This method is intended to be used in a loop together with the
 672      * {@link #appendTail appendTail} and {@link #find find} methods.  The
 673      * following code, for example, writes <tt>one dog two dogs in the
 674      * yard</tt> to the standard-output stream: </p>


 678      * Matcher m = p.matcher("one cat two cats in the yard");
 679      * StringBuffer sb = new StringBuffer();
 680      * while (m.find()) {
 681      *     m.appendReplacement(sb, "dog");
 682      * }
 683      * m.appendTail(sb);
 684      * System.out.println(sb.toString());</pre></blockquote>
 685      *
 686      * @param  sb
 687      *         The target string buffer
 688      *
 689      * @param  replacement
 690      *         The replacement string
 691      *
 692      * @return  This matcher
 693      *
 694      * @throws  IllegalStateException
 695      *          If no match has yet been attempted,
 696      *          or if the previous match operation failed
 697      *




 698      * @throws  IndexOutOfBoundsException
 699      *          If the replacement string refers to a capturing group
 700      *          that does not exist in the pattern
 701      */
 702     public Matcher appendReplacement(StringBuffer sb, String replacement) {
 703 
 704         // If no match, return error
 705         if (first < 0)
 706             throw new IllegalStateException("No match available");
 707 
 708         // Process substitution string to replace group references with groups
 709         int cursor = 0;
 710         StringBuilder result = new StringBuilder();
 711 
 712         while (cursor < replacement.length()) {
 713             char nextChar = replacement.charAt(cursor);
 714             if (nextChar == '\\') {
 715                 cursor++;
 716                 nextChar = replacement.charAt(cursor);
 717                 result.append(nextChar);
 718                 cursor++;
 719             } else if (nextChar == '$') {
 720                 // Skip past $
 721                 cursor++;

































 722                 // The first number is always a group
 723                 int refNum = (int)replacement.charAt(cursor) - '0';
 724                 if ((refNum < 0)||(refNum > 9))
 725                     throw new IllegalArgumentException(
 726                         "Illegal group reference");
 727                 cursor++;
 728 
 729                 // Capture the largest legal group string
 730                 boolean done = false;
 731                 while (!done) {
 732                     if (cursor >= replacement.length()) {
 733                         break;
 734                     }
 735                     int nextDigit = replacement.charAt(cursor) - '0';
 736                     if ((nextDigit < 0)||(nextDigit > 9)) { // not a number
 737                         break;
 738                     }
 739                     int newRefNum = (refNum * 10) + nextDigit;
 740                     if (groupCount() < newRefNum) {
 741                         done = true;
 742                     } else {
 743                         refNum = newRefNum;
 744                         cursor++;
 745                     }
 746                 }

 747                 // Append group
 748                 if (start(refNum) != -1 && end(refNum) != -1)
 749                     result.append(text, start(refNum), end(refNum));
 750             } else {
 751                 result.append(nextChar);
 752                 cursor++;
 753             }
 754         }
 755         // Append the intervening text
 756         sb.append(text, lastAppendPosition, first);
 757         // Append the match substitution
 758         sb.append(result);
 759 
 760         lastAppendPosition = last;
 761         return this;
 762     }
 763 
 764     /**
 765      * Implements a terminal append-and-replace step.
 766      *




 474      *
 475      * @throws  IllegalStateException
 476      *          If no match has yet been attempted,
 477      *          or if the previous match operation failed
 478      *
 479      * @throws  IndexOutOfBoundsException
 480      *          If there is no capturing group in the pattern
 481      *          with the given index
 482      */
 483     public String group(int group) {
 484         if (first < 0)
 485             throw new IllegalStateException("No match found");
 486         if (group < 0 || group > groupCount())
 487             throw new IndexOutOfBoundsException("No group " + group);
 488         if ((groups[group*2] == -1) || (groups[group*2+1] == -1))
 489             return null;
 490         return getSubSequence(groups[group * 2], groups[group * 2 + 1]).toString();
 491     }
 492 
 493     /**
 494      * Returns the input subsequence captured by the given
 495      * <a href="Pattern.html#groupname">named-capturing group</a> during the previous
 496      * match operation.
 497      *
 498      * <p> If the match was successful but the group specified failed to match
 499      * any part of the input sequence, then <tt>null</tt> is returned. Note
 500      * that some groups, for example <tt>(a*)</tt>, match the empty string.
 501      * This method will return the empty string when such a group successfully
 502      * matches the empty string in the input.  </p>
 503      *
 504      * @param  name
 505      *         The name of a named-capturing group in this matcher's pattern
 506      *
 507      * @return  The (possibly empty) subsequence captured by the named group
 508      *          during the previous match, or <tt>null</tt> if the group
 509      *          failed to match part of the input
 510      *
 511      * @throws  IllegalStateException
 512      *          If no match has yet been attempted,
 513      *          or if the previous match operation failed
 514      *
 515      * @throws  IllegalArgumentException
 516      *          If there is no capturing group in the pattern
 517      *          with the given name
 518      */
 519     public String group(String name) {
 520         if (name == null)
 521             throw new NullPointerException("Null group name");
 522         if (first < 0)
 523             throw new IllegalStateException("No match found");
 524         if (!parentPattern.namedGroups().containsKey(name))
 525             throw new IllegalArgumentException("No group with name <" + name + ">");
 526         int group = parentPattern.namedGroups().get(name);
 527         if ((groups[group*2] == -1) || (groups[group*2+1] == -1))
 528             return null;
 529         return getSubSequence(groups[group * 2], groups[group * 2 + 1]).toString();
 530     }
 531 
 532     /**
 533      * Returns the number of capturing groups in this matcher's pattern.
 534      *
 535      * <p> Group zero denotes the entire pattern by convention. It is not
 536      * included in this count.
 537      *
 538      * <p> Any non-negative integer smaller than or equal to the value
 539      * returned by this method is guaranteed to be a valid group index for
 540      * this matcher.  </p>
 541      *
 542      * @return The number of capturing groups in this matcher's pattern
 543      */
 544     public int groupCount() {
 545         return parentPattern.capturingGroupCount - 1;
 546     }
 547 
 548     /**
 549      * Attempts to match the entire region against the pattern.
 550      *
 551      * <p> If the match succeeds then more information can be obtained via the
 552      * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods.  </p>


 671      *
 672      * <ol>
 673      *
 674      *   <li><p> It reads characters from the input sequence, starting at the
 675      *   append position, and appends them to the given string buffer.  It
 676      *   stops after reading the last character preceding the previous match,
 677      *   that is, the character at index {@link
 678      *   #start()}&nbsp;<tt>-</tt>&nbsp;<tt>1</tt>.  </p></li>
 679      *
 680      *   <li><p> It appends the given replacement string to the string buffer.
 681      *   </p></li>
 682      *
 683      *   <li><p> It sets the append position of this matcher to the index of
 684      *   the last character matched, plus one, that is, to {@link #end()}.
 685      *   </p></li>
 686      *
 687      * </ol>
 688      *
 689      * <p> The replacement string may contain references to subsequences
 690      * captured during the previous match: Each occurrence of
 691      * <tt>$</tt>&lt;<i>name</i>&gt; or <tt>$</tt><i>g</i>
 692      * will be replaced by the result of evaluating the corresponding
 693      * {@link #group(String) group(name)} or {@link #group(int) group(g)</tt>}
 694      * respectively. For  <tt>$</tt><i>g</i><tt></tt>, 
 695      * the first number after the <tt>$</tt> is always treated as part of
 696      * the group reference. Subsequent numbers are incorporated into g if
 697      * they would form a legal group reference. Only the numerals '0'
 698      * through '9' are considered as potential components of the group
 699      * reference. If the second group matched the string <tt>"foo"</tt>, for
 700      * example, then passing the replacement string <tt>"$2bar"</tt> would
 701      * cause <tt>"foobar"</tt> to be appended to the string buffer. A dollar
 702      * sign (<tt>$</tt>) may be included as a literal in the replacement
 703      * string by preceding it with a backslash (<tt>\$</tt>).
 704      *
 705      * <p> Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in
 706      * the replacement string may cause the results to be different than if it
 707      * were being treated as a literal replacement string. Dollar signs may be
 708      * treated as references to captured subsequences as described above, and
 709      * backslashes are used to escape literal characters in the replacement
 710      * string.
 711      *
 712      * <p> This method is intended to be used in a loop together with the
 713      * {@link #appendTail appendTail} and {@link #find find} methods.  The
 714      * following code, for example, writes <tt>one dog two dogs in the
 715      * yard</tt> to the standard-output stream: </p>


 719      * Matcher m = p.matcher("one cat two cats in the yard");
 720      * StringBuffer sb = new StringBuffer();
 721      * while (m.find()) {
 722      *     m.appendReplacement(sb, "dog");
 723      * }
 724      * m.appendTail(sb);
 725      * System.out.println(sb.toString());</pre></blockquote>
 726      *
 727      * @param  sb
 728      *         The target string buffer
 729      *
 730      * @param  replacement
 731      *         The replacement string
 732      *
 733      * @return  This matcher
 734      *
 735      * @throws  IllegalStateException
 736      *          If no match has yet been attempted,
 737      *          or if the previous match operation failed
 738      *
 739      * @throws  IllegalArgumentException
 740      *          If the replacement string refers to a named-capturing
 741      *          group that does not exist in the pattern
 742      *
 743      * @throws  IndexOutOfBoundsException
 744      *          If the replacement string refers to a capturing group
 745      *          that does not exist in the pattern
 746      */
 747     public Matcher appendReplacement(StringBuffer sb, String replacement) {
 748 
 749         // If no match, return error
 750         if (first < 0)
 751             throw new IllegalStateException("No match available");
 752 
 753         // Process substitution string to replace group references with groups
 754         int cursor = 0;
 755         StringBuilder result = new StringBuilder();
 756 
 757         while (cursor < replacement.length()) {
 758             char nextChar = replacement.charAt(cursor);
 759             if (nextChar == '\\') {
 760                 cursor++;
 761                 nextChar = replacement.charAt(cursor);
 762                 result.append(nextChar);
 763                 cursor++;
 764             } else if (nextChar == '$') {
 765                 // Skip past $
 766                 cursor++;
 767                 // A StringIndexOutOfBoundsException is thrown if
 768                 // this "$" is the last character in replacement
 769                 // string in current implementation, a IAE might be
 770                 // more appropriate.
 771                 nextChar = replacement.charAt(cursor);
 772                 int refNum = -1;
 773                 if (nextChar == '<') {
 774                     cursor++;
 775                     StringBuilder gsb = new StringBuilder();
 776                     while (cursor < replacement.length()) {
 777                         nextChar = replacement.charAt(cursor);
 778                         if (ASCII.isLower(nextChar) ||
 779                             ASCII.isUpper(nextChar) || 
 780                             ASCII.isDigit(nextChar)) {
 781                             gsb.append(nextChar);
 782                             cursor++;
 783                         } else {
 784                             break;
 785                         }
 786                     }
 787                     if (gsb.length() == 0)
 788                         throw new IllegalArgumentException(
 789                             "named capturing group has 0 length name");
 790                     if (nextChar != '>')
 791                         throw new IllegalArgumentException(
 792                             "named capturing group is missing trailing '>'");
 793                     String gname = gsb.toString();
 794                     if (!parentPattern.namedGroups().containsKey(gname))
 795                         throw new IllegalArgumentException(
 796                             "No group with name <" + gname + ">");
 797                     refNum = parentPattern.namedGroups().get(gname);
 798                     cursor++;
 799                 } else {
 800                     // The first number is always a group
 801                     refNum = (int)nextChar - '0';
 802                     if ((refNum < 0)||(refNum > 9))
 803                         throw new IllegalArgumentException(
 804                             "Illegal group reference");
 805                     cursor++;

 806                     // Capture the largest legal group string
 807                     boolean done = false;
 808                     while (!done) {
 809                         if (cursor >= replacement.length()) {
 810                             break;
 811                         }
 812                         int nextDigit = replacement.charAt(cursor) - '0';
 813                         if ((nextDigit < 0)||(nextDigit > 9)) { // not a number
 814                             break;
 815                         }
 816                         int newRefNum = (refNum * 10) + nextDigit;
 817                         if (groupCount() < newRefNum) {
 818                             done = true;
 819                         } else {
 820                             refNum = newRefNum;
 821                             cursor++;
 822                         }
 823                     }
 824                 }
 825                 // Append group
 826                 if (start(refNum) != -1 && end(refNum) != -1)
 827                     result.append(text, start(refNum), end(refNum));
 828             } else {
 829                 result.append(nextChar);
 830                 cursor++;
 831             }
 832         }
 833         // Append the intervening text
 834         sb.append(text, lastAppendPosition, first);
 835         // Append the match substitution
 836         sb.append(result);
 837 
 838         lastAppendPosition = last;
 839         return this;
 840     }
 841 
 842     /**
 843      * Implements a terminal append-and-replace step.
 844      *