1 /*
2 * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
78 */
79 public static final int EOF = -4;
80
81 /**
82 * Constructor.
83 * @param type Token type
84 * @param value Token value
85 */
86 public Token(int type, String value) {
87 this.type = type;
88 this.value = value;
89 }
90
91 /**
92 * Return the type of the token. If the token represents a
93 * delimiter or a control character, the type is that character
94 * itself, converted to an integer. Otherwise, it's value is
95 * one of the following:
96 * <ul>
97 * <li><code>ATOM</code> A sequence of ASCII characters
98 * delimited by either SPACE, CTL, "(", <"> or the
99 * specified SPECIALS
100 * <li><code>QUOTEDSTRING</code> A sequence of ASCII characters
101 * within quotes
102 * <li><code>COMMENT</code> A sequence of ASCII characters
103 * within "(" and ")".
104 * <li><code>EOF</code> End of header
105 * </ul>
106 */
107 public int getType() {
108 return type;
109 }
110
111 /**
112 * Returns the value of the token just read. When the current
113 * token is a quoted string, this field contains the body of the
114 * string, without the quotes. When the current token is a comment,
115 * this field contains the body of the comment.
116 *
117 * @return token value
118 */
119 public String getValue() {
120 return value;
121 }
122 }
123
124 private String string; // the string to be tokenized
125 private boolean skipComments; // should comments be skipped ?
159 this.skipComments = skipComments;
160 this.delimiters = delimiters;
161 currentPos = nextPos = peekPos = 0;
162 maxPos = string.length();
163 }
164
165 /**
166 * Constructor. Comments are ignored and not returned as tokens
167 *
168 * @param header The header that is tokenized
169 * @param delimiters The delimiters to be used
170 */
171 public HeaderTokenizer(String header, String delimiters) {
172 this(header, delimiters, true);
173 }
174
175 /**
176 * Constructor. The RFC822 defined delimiters - RFC822 - are
177 * used to delimit ATOMS. Also comments are skipped and not
178 * returned as tokens
179 */
180 public HeaderTokenizer(String header) {
181 this(header, RFC822);
182 }
183
184 /**
185 * Parses the next token from this String. <p>
186 *
187 * Clients sit in a loop calling next() to parse successive
188 * tokens until an EOF Token is returned.
189 *
190 * @return the next Token
191 * @exception ParseException if the parse fails
192 */
193 public Token next() throws ParseException {
194 Token tk;
195
196 currentPos = nextPos; // setup currentPos
197 tk = getNext();
198 nextPos = peekPos = currentPos; // update currentPos and peekPos
300 else if (c == '"') {
301 currentPos++;
302 String s;
303
304 if (filter)
305 s = filterToken(string, start, currentPos-1);
306 else
307 s = string.substring(start,currentPos-1);
308
309 return new Token(Token.QUOTEDSTRING, s);
310 }
311 }
312 throw new ParseException("Unbalanced quoted string");
313 }
314
315 // Check for SPECIAL or CTL
316 if (c < 040 || c >= 0177 || delimiters.indexOf(c) >= 0) {
317 currentPos++; // re-position currentPos
318 char ch[] = new char[1];
319 ch[0] = c;
320 return new Token((int)c, new String(ch));
321 }
322
323 // Check for ATOM
324 for (start = currentPos; currentPos < maxPos; currentPos++) {
325 c = string.charAt(currentPos);
326 // ATOM is delimited by either SPACE, CTL, "(", <">
327 // or the specified SPECIALS
328 if (c < 040 || c >= 0177 || c == '(' || c == ' ' ||
329 c == '"' || delimiters.indexOf(c) >= 0)
330 break;
331 }
332 return new Token(Token.ATOM, string.substring(start, currentPos));
333 }
334
335 // Skip SPACE, HT, CR and NL
336 private int skipWhiteSpace() {
337 char c;
338 for (; currentPos < maxPos; currentPos++)
339 if (((c = string.charAt(currentPos)) != ' ') &&
340 (c != '\t') && (c != '\r') && (c != '\n'))
|
1 /*
2 * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
78 */
79 public static final int EOF = -4;
80
81 /**
82 * Constructor.
83 * @param type Token type
84 * @param value Token value
85 */
86 public Token(int type, String value) {
87 this.type = type;
88 this.value = value;
89 }
90
91 /**
92 * Return the type of the token. If the token represents a
93 * delimiter or a control character, the type is that character
94 * itself, converted to an integer. Otherwise, it's value is
95 * one of the following:
96 * <ul>
97 * <li><code>ATOM</code> A sequence of ASCII characters
98 * delimited by either SPACE, CTL, "(", <"> or the
99 * specified SPECIALS</li>
100 * <li><code>QUOTEDSTRING</code> A sequence of ASCII characters
101 * within quotes</li>
102 * <li><code>COMMENT</code> A sequence of ASCII characters
103 * within "(" and ")".</li>
104 * <li><code>EOF</code> End of header</li>
105 * </ul>
106 * @return type
107 */
108 public int getType() {
109 return type;
110 }
111
112 /**
113 * Returns the value of the token just read. When the current
114 * token is a quoted string, this field contains the body of the
115 * string, without the quotes. When the current token is a comment,
116 * this field contains the body of the comment.
117 *
118 * @return token value
119 */
120 public String getValue() {
121 return value;
122 }
123 }
124
125 private String string; // the string to be tokenized
126 private boolean skipComments; // should comments be skipped ?
160 this.skipComments = skipComments;
161 this.delimiters = delimiters;
162 currentPos = nextPos = peekPos = 0;
163 maxPos = string.length();
164 }
165
166 /**
167 * Constructor. Comments are ignored and not returned as tokens
168 *
169 * @param header The header that is tokenized
170 * @param delimiters The delimiters to be used
171 */
172 public HeaderTokenizer(String header, String delimiters) {
173 this(header, delimiters, true);
174 }
175
176 /**
177 * Constructor. The RFC822 defined delimiters - RFC822 - are
178 * used to delimit ATOMS. Also comments are skipped and not
179 * returned as tokens
180 * @param header The header that is tokenized.
181 */
182 public HeaderTokenizer(String header) {
183 this(header, RFC822);
184 }
185
186 /**
187 * Parses the next token from this String. <p>
188 *
189 * Clients sit in a loop calling next() to parse successive
190 * tokens until an EOF Token is returned.
191 *
192 * @return the next Token
193 * @exception ParseException if the parse fails
194 */
195 public Token next() throws ParseException {
196 Token tk;
197
198 currentPos = nextPos; // setup currentPos
199 tk = getNext();
200 nextPos = peekPos = currentPos; // update currentPos and peekPos
302 else if (c == '"') {
303 currentPos++;
304 String s;
305
306 if (filter)
307 s = filterToken(string, start, currentPos-1);
308 else
309 s = string.substring(start,currentPos-1);
310
311 return new Token(Token.QUOTEDSTRING, s);
312 }
313 }
314 throw new ParseException("Unbalanced quoted string");
315 }
316
317 // Check for SPECIAL or CTL
318 if (c < 040 || c >= 0177 || delimiters.indexOf(c) >= 0) {
319 currentPos++; // re-position currentPos
320 char ch[] = new char[1];
321 ch[0] = c;
322 return new Token(c, new String(ch));
323 }
324
325 // Check for ATOM
326 for (start = currentPos; currentPos < maxPos; currentPos++) {
327 c = string.charAt(currentPos);
328 // ATOM is delimited by either SPACE, CTL, "(", <">
329 // or the specified SPECIALS
330 if (c < 040 || c >= 0177 || c == '(' || c == ' ' ||
331 c == '"' || delimiters.indexOf(c) >= 0)
332 break;
333 }
334 return new Token(Token.ATOM, string.substring(start, currentPos));
335 }
336
337 // Skip SPACE, HT, CR and NL
338 private int skipWhiteSpace() {
339 char c;
340 for (; currentPos < maxPos; currentPos++)
341 if (((c = string.charAt(currentPos)) != ' ') &&
342 (c != '\t') && (c != '\r') && (c != '\n'))
|