< prev index next >

src/java.base/share/classes/java/util/StringTokenizer.java

Print this page




  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.util;
  27 
  28 import java.lang.*;
  29 
  30 /**
  31  * The string tokenizer class allows an application to break a
  32  * string into tokens. The tokenization method is much simpler than
  33  * the one used by the <code>StreamTokenizer</code> class. The
  34  * <code>StringTokenizer</code> methods do not distinguish among
  35  * identifiers, numbers, and quoted strings, nor do they recognize
  36  * and skip comments.
  37  * <p>
  38  * The set of delimiters (the characters that separate tokens) may
  39  * be specified either at creation time or on a per-token basis.
  40  * <p>
  41  * An instance of <code>StringTokenizer</code> behaves in one of two
  42  * ways, depending on whether it was created with the
  43  * <code>returnDelims</code> flag having the value <code>true</code>
  44  * or <code>false</code>:
  45  * <ul>
  46  * <li>If the flag is <code>false</code>, delimiter characters serve to
  47  *     separate tokens. A token is a maximal sequence of consecutive
  48  *     characters that are not delimiters.
  49  * <li>If the flag is <code>true</code>, delimiter characters are themselves
  50  *     considered to be tokens. A token is thus either one delimiter
  51  *     character, or a maximal sequence of consecutive characters that are
  52  *     not delimiters.
  53  * </ul><p>
  54  * A <tt>StringTokenizer</tt> object internally maintains a current
  55  * position within the string to be tokenized. Some operations advance this
  56  * current position past the characters processed.<p>
  57  * A token is returned by taking a substring of the string that was used to
  58  * create the <tt>StringTokenizer</tt> object.
  59  * <p>
  60  * The following is one example of the use of the tokenizer. The code:
  61  * <blockquote><pre>
  62  *     StringTokenizer st = new StringTokenizer("this is a test");
  63  *     while (st.hasMoreTokens()) {
  64  *         System.out.println(st.nextToken());
  65  *     }
  66  * </pre></blockquote>
  67  * <p>
  68  * prints the following output:
  69  * <blockquote><pre>
  70  *     this
  71  *     is
  72  *     a
  73  *     test
  74  * </pre></blockquote>
  75  *
  76  * <p>
  77  * <tt>StringTokenizer</tt> is a legacy class that is retained for
  78  * compatibility reasons although its use is discouraged in new code. It is
  79  * recommended that anyone seeking this functionality use the <tt>split</tt>
  80  * method of <tt>String</tt> or the java.util.regex package instead.
  81  * <p>
  82  * The following example illustrates how the <tt>String.split</tt>
  83  * method can be used to break up a string into its basic tokens:
  84  * <blockquote><pre>
  85  *     String[] result = "this is a test".split("\\s");
  86  *     for (int x=0; x&lt;result.length; x++)
  87  *         System.out.println(result[x]);
  88  * </pre></blockquote>
  89  * <p>
  90  * prints the following output:
  91  * <blockquote><pre>
  92  *     this
  93  *     is
  94  *     a
  95  *     test
  96  * </pre></blockquote>
  97  *
  98  * @author  unascribed
  99  * @see     java.io.StreamTokenizer
 100  * @since   1.0
 101  */
 102 public


 154                 c = delimiters.codePointAt(i);
 155                 hasSurrogates = true;
 156             }
 157             if (m < c)
 158                 m = c;
 159             count++;
 160         }
 161         maxDelimCodePoint = m;
 162 
 163         if (hasSurrogates) {
 164             delimiterCodePoints = new int[count];
 165             for (int i = 0, j = 0; i < count; i++, j += Character.charCount(c)) {
 166                 c = delimiters.codePointAt(j);
 167                 delimiterCodePoints[i] = c;
 168             }
 169         }
 170     }
 171 
 172     /**
 173      * Constructs a string tokenizer for the specified string. All
 174      * characters in the <code>delim</code> argument are the delimiters
 175      * for separating tokens.
 176      * <p>
 177      * If the <code>returnDelims</code> flag is <code>true</code>, then
 178      * the delimiter characters are also returned as tokens. Each
 179      * delimiter is returned as a string of length one. If the flag is
 180      * <code>false</code>, the delimiter characters are skipped and only
 181      * serve as separators between tokens.
 182      * <p>
 183      * Note that if <tt>delim</tt> is <tt>null</tt>, this constructor does
 184      * not throw an exception. However, trying to invoke other methods on the
 185      * resulting <tt>StringTokenizer</tt> may result in a
 186      * <tt>NullPointerException</tt>.
 187      *
 188      * @param   str            a string to be parsed.
 189      * @param   delim          the delimiters.
 190      * @param   returnDelims   flag indicating whether to return the delimiters
 191      *                         as tokens.
 192      * @exception NullPointerException if str is <CODE>null</CODE>
 193      */
 194     public StringTokenizer(String str, String delim, boolean returnDelims) {
 195         currentPosition = 0;
 196         newPosition = -1;
 197         delimsChanged = false;
 198         this.str = str;
 199         maxPosition = str.length();
 200         delimiters = delim;
 201         retDelims = returnDelims;
 202         setMaxDelimCodePoint();
 203     }
 204 
 205     /**
 206      * Constructs a string tokenizer for the specified string. The
 207      * characters in the <code>delim</code> argument are the delimiters
 208      * for separating tokens. Delimiter characters themselves will not
 209      * be treated as tokens.
 210      * <p>
 211      * Note that if <tt>delim</tt> is <tt>null</tt>, this constructor does
 212      * not throw an exception. However, trying to invoke other methods on the
 213      * resulting <tt>StringTokenizer</tt> may result in a
 214      * <tt>NullPointerException</tt>.
 215      *
 216      * @param   str     a string to be parsed.
 217      * @param   delim   the delimiters.
 218      * @exception NullPointerException if str is <CODE>null</CODE>
 219      */
 220     public StringTokenizer(String str, String delim) {
 221         this(str, delim, false);
 222     }
 223 
 224     /**
 225      * Constructs a string tokenizer for the specified string. The
 226      * tokenizer uses the default delimiter set, which is
 227      * <code>"&nbsp;\t\n\r\f"</code>: the space character,
 228      * the tab character, the newline character, the carriage-return character,
 229      * and the form-feed character. Delimiter characters themselves will
 230      * not be treated as tokens.
 231      *
 232      * @param   str   a string to be parsed.
 233      * @exception NullPointerException if str is <CODE>null</CODE>
 234      */
 235     public StringTokenizer(String str) {
 236         this(str, " \t\n\r\f", false);
 237     }
 238 
 239     /**
 240      * Skips delimiters starting from the specified position. If retDelims
 241      * is false, returns the index of the first non-delimiter character at or
 242      * after startPos. If retDelims is true, startPos is returned.
 243      */
 244     private int skipDelimiters(int startPos) {
 245         if (delimiters == null)
 246             throw new NullPointerException();
 247 
 248         int position = startPos;
 249         while (!retDelims && position < maxPosition) {
 250             if (!hasSurrogates) {
 251                 char c = str.charAt(position);
 252                 if ((c > maxDelimCodePoint) || (delimiters.indexOf(c) < 0))
 253                     break;


 290             } else {
 291                 int c = str.codePointAt(position);
 292                 if ((c <= maxDelimCodePoint) && isDelimiter(c))
 293                     position += Character.charCount(c);
 294             }
 295         }
 296         return position;
 297     }
 298 
 299     private boolean isDelimiter(int codePoint) {
 300         for (int delimiterCodePoint : delimiterCodePoints) {
 301             if (delimiterCodePoint == codePoint) {
 302                 return true;
 303             }
 304         }
 305         return false;
 306     }
 307 
 308     /**
 309      * Tests if there are more tokens available from this tokenizer's string.
 310      * If this method returns <tt>true</tt>, then a subsequent call to
 311      * <tt>nextToken</tt> with no argument will successfully return a token.
 312      *
 313      * @return  <code>true</code> if and only if there is at least one token
 314      *          in the string after the current position; <code>false</code>
 315      *          otherwise.
 316      */
 317     public boolean hasMoreTokens() {
 318         /*
 319          * Temporarily store this position and use it in the following
 320          * nextToken() method only if the delimiters haven't been changed in
 321          * that nextToken() invocation.
 322          */
 323         newPosition = skipDelimiters(currentPosition);
 324         return (newPosition < maxPosition);
 325     }
 326 
 327     /**
 328      * Returns the next token from this string tokenizer.
 329      *
 330      * @return     the next token from this string tokenizer.
 331      * @exception  NoSuchElementException  if there are no more tokens in this
 332      *               tokenizer's string.
 333      */
 334     public String nextToken() {


 338          * then use the computed value.
 339          */
 340 
 341         currentPosition = (newPosition >= 0 && !delimsChanged) ?
 342             newPosition : skipDelimiters(currentPosition);
 343 
 344         /* Reset these anyway */
 345         delimsChanged = false;
 346         newPosition = -1;
 347 
 348         if (currentPosition >= maxPosition)
 349             throw new NoSuchElementException();
 350         int start = currentPosition;
 351         currentPosition = scanToken(currentPosition);
 352         return str.substring(start, currentPosition);
 353     }
 354 
 355     /**
 356      * Returns the next token in this string tokenizer's string. First,
 357      * the set of characters considered to be delimiters by this
 358      * <tt>StringTokenizer</tt> object is changed to be the characters in
 359      * the string <tt>delim</tt>. Then the next token in the string
 360      * after the current position is returned. The current position is
 361      * advanced beyond the recognized token.  The new delimiter set
 362      * remains the default after this call.
 363      *
 364      * @param      delim   the new delimiters.
 365      * @return     the next token, after switching to the new delimiter set.
 366      * @exception  NoSuchElementException  if there are no more tokens in this
 367      *               tokenizer's string.
 368      * @exception NullPointerException if delim is <CODE>null</CODE>
 369      */
 370     public String nextToken(String delim) {
 371         delimiters = delim;
 372 
 373         /* delimiter string specified, so set the appropriate flag. */
 374         delimsChanged = true;
 375 
 376         setMaxDelimCodePoint();
 377         return nextToken();
 378     }
 379 
 380     /**
 381      * Returns the same value as the <code>hasMoreTokens</code>
 382      * method. It exists so that this class can implement the
 383      * <code>Enumeration</code> interface.
 384      *
 385      * @return  <code>true</code> if there are more tokens;
 386      *          <code>false</code> otherwise.
 387      * @see     java.util.Enumeration
 388      * @see     java.util.StringTokenizer#hasMoreTokens()
 389      */
 390     public boolean hasMoreElements() {
 391         return hasMoreTokens();
 392     }
 393 
 394     /**
 395      * Returns the same value as the <code>nextToken</code> method,
 396      * except that its declared return value is <code>Object</code> rather than
 397      * <code>String</code>. It exists so that this class can implement the
 398      * <code>Enumeration</code> interface.
 399      *
 400      * @return     the next token in the string.
 401      * @exception  NoSuchElementException  if there are no more tokens in this
 402      *               tokenizer's string.
 403      * @see        java.util.Enumeration
 404      * @see        java.util.StringTokenizer#nextToken()
 405      */
 406     public Object nextElement() {
 407         return nextToken();
 408     }
 409 
 410     /**
 411      * Calculates the number of times that this tokenizer's
 412      * <code>nextToken</code> method can be called before it generates an
 413      * exception. The current position is not advanced.
 414      *
 415      * @return  the number of tokens remaining in the string using the current
 416      *          delimiter set.
 417      * @see     java.util.StringTokenizer#nextToken()
 418      */
 419     public int countTokens() {
 420         int count = 0;
 421         int currpos = currentPosition;
 422         while (currpos < maxPosition) {
 423             currpos = skipDelimiters(currpos);
 424             if (currpos >= maxPosition)
 425                 break;
 426             currpos = scanToken(currpos);
 427             count++;
 428         }
 429         return count;
 430     }
 431 }


  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.util;
  27 
  28 import java.lang.*;
  29 
  30 /**
  31  * The string tokenizer class allows an application to break a
  32  * string into tokens. The tokenization method is much simpler than
  33  * the one used by the {@code StreamTokenizer} class. The
  34  * {@code StringTokenizer} methods do not distinguish among
  35  * identifiers, numbers, and quoted strings, nor do they recognize
  36  * and skip comments.
  37  * <p>
  38  * The set of delimiters (the characters that separate tokens) may
  39  * be specified either at creation time or on a per-token basis.
  40  * <p>
  41  * An instance of {@code StringTokenizer} behaves in one of two
  42  * ways, depending on whether it was created with the
  43  * {@code returnDelims} flag having the value {@code true}
  44  * or {@code false}:
  45  * <ul>
  46  * <li>If the flag is {@code false}, delimiter characters serve to
  47  *     separate tokens. A token is a maximal sequence of consecutive
  48  *     characters that are not delimiters.
  49  * <li>If the flag is {@code true}, delimiter characters are themselves
  50  *     considered to be tokens. A token is thus either one delimiter
  51  *     character, or a maximal sequence of consecutive characters that are
  52  *     not delimiters.
  53  * </ul><p>
  54  * A {@code StringTokenizer} object internally maintains a current
  55  * position within the string to be tokenized. Some operations advance this
  56  * current position past the characters processed.<p>
  57  * A token is returned by taking a substring of the string that was used to
  58  * create the {@code StringTokenizer} object.
  59  * <p>
  60  * The following is one example of the use of the tokenizer. The code:
  61  * <blockquote><pre>
  62  *     StringTokenizer st = new StringTokenizer("this is a test");
  63  *     while (st.hasMoreTokens()) {
  64  *         System.out.println(st.nextToken());
  65  *     }
  66  * </pre></blockquote>
  67  * <p>
  68  * prints the following output:
  69  * <blockquote><pre>
  70  *     this
  71  *     is
  72  *     a
  73  *     test
  74  * </pre></blockquote>
  75  *
  76  * <p>
  77  * {@code StringTokenizer} is a legacy class that is retained for
  78  * compatibility reasons although its use is discouraged in new code. It is
  79  * recommended that anyone seeking this functionality use the {@code split}
  80  * method of {@code String} or the java.util.regex package instead.
  81  * <p>
  82  * The following example illustrates how the {@code String.split}
  83  * method can be used to break up a string into its basic tokens:
  84  * <blockquote><pre>
  85  *     String[] result = "this is a test".split("\\s");
  86  *     for (int x=0; x&lt;result.length; x++)
  87  *         System.out.println(result[x]);
  88  * </pre></blockquote>
  89  * <p>
  90  * prints the following output:
  91  * <blockquote><pre>
  92  *     this
  93  *     is
  94  *     a
  95  *     test
  96  * </pre></blockquote>
  97  *
  98  * @author  unascribed
  99  * @see     java.io.StreamTokenizer
 100  * @since   1.0
 101  */
 102 public


 154                 c = delimiters.codePointAt(i);
 155                 hasSurrogates = true;
 156             }
 157             if (m < c)
 158                 m = c;
 159             count++;
 160         }
 161         maxDelimCodePoint = m;
 162 
 163         if (hasSurrogates) {
 164             delimiterCodePoints = new int[count];
 165             for (int i = 0, j = 0; i < count; i++, j += Character.charCount(c)) {
 166                 c = delimiters.codePointAt(j);
 167                 delimiterCodePoints[i] = c;
 168             }
 169         }
 170     }
 171 
 172     /**
 173      * Constructs a string tokenizer for the specified string. All
 174      * characters in the {@code delim} argument are the delimiters
 175      * for separating tokens.
 176      * <p>
 177      * If the {@code returnDelims} flag is {@code true}, then
 178      * the delimiter characters are also returned as tokens. Each
 179      * delimiter is returned as a string of length one. If the flag is
 180      * {@code false}, the delimiter characters are skipped and only
 181      * serve as separators between tokens.
 182      * <p>
 183      * Note that if {@code delim} is {@code null}, this constructor does
 184      * not throw an exception. However, trying to invoke other methods on the
 185      * resulting {@code StringTokenizer} may result in a
 186      * {@code NullPointerException}.
 187      *
 188      * @param   str            a string to be parsed.
 189      * @param   delim          the delimiters.
 190      * @param   returnDelims   flag indicating whether to return the delimiters
 191      *                         as tokens.
 192      * @exception NullPointerException if str is {@code null}
 193      */
 194     public StringTokenizer(String str, String delim, boolean returnDelims) {
 195         currentPosition = 0;
 196         newPosition = -1;
 197         delimsChanged = false;
 198         this.str = str;
 199         maxPosition = str.length();
 200         delimiters = delim;
 201         retDelims = returnDelims;
 202         setMaxDelimCodePoint();
 203     }
 204 
 205     /**
 206      * Constructs a string tokenizer for the specified string. The
 207      * characters in the {@code delim} argument are the delimiters
 208      * for separating tokens. Delimiter characters themselves will not
 209      * be treated as tokens.
 210      * <p>
 211      * Note that if {@code delim} is {@code null}, this constructor does
 212      * not throw an exception. However, trying to invoke other methods on the
 213      * resulting {@code StringTokenizer} may result in a
 214      * {@code NullPointerException}.
 215      *
 216      * @param   str     a string to be parsed.
 217      * @param   delim   the delimiters.
 218      * @exception NullPointerException if str is {@code null}
 219      */
 220     public StringTokenizer(String str, String delim) {
 221         this(str, delim, false);
 222     }
 223 
 224     /**
 225      * Constructs a string tokenizer for the specified string. The
 226      * tokenizer uses the default delimiter set, which is
 227      * <code>"&nbsp;\t\n\r\f"</code>: the space character,
 228      * the tab character, the newline character, the carriage-return character,
 229      * and the form-feed character. Delimiter characters themselves will
 230      * not be treated as tokens.
 231      *
 232      * @param   str   a string to be parsed.
 233      * @exception NullPointerException if str is {@code null}
 234      */
 235     public StringTokenizer(String str) {
 236         this(str, " \t\n\r\f", false);
 237     }
 238 
 239     /**
 240      * Skips delimiters starting from the specified position. If retDelims
 241      * is false, returns the index of the first non-delimiter character at or
 242      * after startPos. If retDelims is true, startPos is returned.
 243      */
 244     private int skipDelimiters(int startPos) {
 245         if (delimiters == null)
 246             throw new NullPointerException();
 247 
 248         int position = startPos;
 249         while (!retDelims && position < maxPosition) {
 250             if (!hasSurrogates) {
 251                 char c = str.charAt(position);
 252                 if ((c > maxDelimCodePoint) || (delimiters.indexOf(c) < 0))
 253                     break;


 290             } else {
 291                 int c = str.codePointAt(position);
 292                 if ((c <= maxDelimCodePoint) && isDelimiter(c))
 293                     position += Character.charCount(c);
 294             }
 295         }
 296         return position;
 297     }
 298 
 299     private boolean isDelimiter(int codePoint) {
 300         for (int delimiterCodePoint : delimiterCodePoints) {
 301             if (delimiterCodePoint == codePoint) {
 302                 return true;
 303             }
 304         }
 305         return false;
 306     }
 307 
 308     /**
 309      * Tests if there are more tokens available from this tokenizer's string.
 310      * If this method returns {@code true}, then a subsequent call to
 311      * {@code nextToken} with no argument will successfully return a token.
 312      *
 313      * @return  {@code true} if and only if there is at least one token
 314      *          in the string after the current position; {@code false}
 315      *          otherwise.
 316      */
 317     public boolean hasMoreTokens() {
 318         /*
 319          * Temporarily store this position and use it in the following
 320          * nextToken() method only if the delimiters haven't been changed in
 321          * that nextToken() invocation.
 322          */
 323         newPosition = skipDelimiters(currentPosition);
 324         return (newPosition < maxPosition);
 325     }
 326 
 327     /**
 328      * Returns the next token from this string tokenizer.
 329      *
 330      * @return     the next token from this string tokenizer.
 331      * @exception  NoSuchElementException  if there are no more tokens in this
 332      *               tokenizer's string.
 333      */
 334     public String nextToken() {


 338          * then use the computed value.
 339          */
 340 
 341         currentPosition = (newPosition >= 0 && !delimsChanged) ?
 342             newPosition : skipDelimiters(currentPosition);
 343 
 344         /* Reset these anyway */
 345         delimsChanged = false;
 346         newPosition = -1;
 347 
 348         if (currentPosition >= maxPosition)
 349             throw new NoSuchElementException();
 350         int start = currentPosition;
 351         currentPosition = scanToken(currentPosition);
 352         return str.substring(start, currentPosition);
 353     }
 354 
 355     /**
 356      * Returns the next token in this string tokenizer's string. First,
 357      * the set of characters considered to be delimiters by this
 358      * {@code StringTokenizer} object is changed to be the characters in
 359      * the string {@code delim}. Then the next token in the string
 360      * after the current position is returned. The current position is
 361      * advanced beyond the recognized token.  The new delimiter set
 362      * remains the default after this call.
 363      *
 364      * @param      delim   the new delimiters.
 365      * @return     the next token, after switching to the new delimiter set.
 366      * @exception  NoSuchElementException  if there are no more tokens in this
 367      *               tokenizer's string.
 368      * @exception NullPointerException if delim is {@code null}
 369      */
 370     public String nextToken(String delim) {
 371         delimiters = delim;
 372 
 373         /* delimiter string specified, so set the appropriate flag. */
 374         delimsChanged = true;
 375 
 376         setMaxDelimCodePoint();
 377         return nextToken();
 378     }
 379 
 380     /**
 381      * Returns the same value as the {@code hasMoreTokens}
 382      * method. It exists so that this class can implement the
 383      * {@code Enumeration} interface.
 384      *
 385      * @return  {@code true} if there are more tokens;
 386      *          {@code false} otherwise.
 387      * @see     java.util.Enumeration
 388      * @see     java.util.StringTokenizer#hasMoreTokens()
 389      */
 390     public boolean hasMoreElements() {
 391         return hasMoreTokens();
 392     }
 393 
 394     /**
 395      * Returns the same value as the {@code nextToken} method,
 396      * except that its declared return value is {@code Object} rather than
 397      * {@code String}. It exists so that this class can implement the
 398      * {@code Enumeration} interface.
 399      *
 400      * @return     the next token in the string.
 401      * @exception  NoSuchElementException  if there are no more tokens in this
 402      *               tokenizer's string.
 403      * @see        java.util.Enumeration
 404      * @see        java.util.StringTokenizer#nextToken()
 405      */
 406     public Object nextElement() {
 407         return nextToken();
 408     }
 409 
 410     /**
 411      * Calculates the number of times that this tokenizer's
 412      * {@code nextToken} method can be called before it generates an
 413      * exception. The current position is not advanced.
 414      *
 415      * @return  the number of tokens remaining in the string using the current
 416      *          delimiter set.
 417      * @see     java.util.StringTokenizer#nextToken()
 418      */
 419     public int countTokens() {
 420         int count = 0;
 421         int currpos = currentPosition;
 422         while (currpos < maxPosition) {
 423             currpos = skipDelimiters(currpos);
 424             if (currpos >= maxPosition)
 425                 break;
 426             currpos = scanToken(currpos);
 427             count++;
 428         }
 429         return count;
 430     }
 431 }
< prev index next >