47 * This code and its internal interfaces are subject to change or
48 * deletion without notice.</b>
49 */
50 public class UnicodeReader {
51
52 /** The input buffer, index of next character to be read,
53 * index of one past last character in buffer.
54 */
55 protected char[] buf;
56 protected int bp;
57 protected final int buflen;
58
59 /** The current character.
60 */
61 protected char ch;
62
63 /** The buffer index of the last converted unicode character
64 */
65 protected int unicodeConversionBp = -1;
66
67 protected Log log;
68 protected Names names;
69
70 /** A character buffer for saved chars.
71 */
72 protected char[] sbuf = new char[128];
73 protected int realLength;
74 protected int sp;
75
76 /**
77 * Create a scanner from the input array. This method might
78 * modify the array. To avoid copying the input array, ensure
79 * that {@code inputLength < input.length} or
80 * {@code input[input.length -1]} is a white space character.
81 *
82 * @param sf the factory which created this Scanner
83 * @param buffer the input, might be modified
84 * Must be positive and less than or equal to input.length.
85 */
86 protected UnicodeReader(ScannerFactory sf, CharBuffer buffer) {
137 if (scan)
138 scanChar();
139 }
140
141 protected void putChar(char ch) {
142 putChar(ch, false);
143 }
144
145 protected void putChar(boolean scan) {
146 putChar(ch, scan);
147 }
148
149 Name name() {
150 return names.fromChars(sbuf, 0, sp);
151 }
152
153 String chars() {
154 return new String(sbuf, 0, sp);
155 }
156
157 /** Convert unicode escape; bp points to initial '\' character
158 * (Spec 3.3).
159 */
160 protected void convertUnicode() {
161 if (ch == '\\' && unicodeConversionBp != bp) {
162 bp++; ch = buf[bp];
163 if (ch == 'u') {
164 do {
165 bp++; ch = buf[bp];
166 } while (ch == 'u');
167 int limit = bp + 3;
168 if (limit < buflen) {
169 int d = digit(bp, 16);
170 int code = d;
171 while (bp < limit && d >= 0) {
172 bp++; ch = buf[bp];
173 d = digit(bp, 16);
174 code = (code << 4) + d;
175 }
176 if (d >= 0) {
177 ch = (char)code;
178 unicodeConversionBp = bp;
179 return;
180 }
181 }
237 log.error(pos + 1, Errors.IllegalNonasciiDigit);
238 if (codePoint >= 0)
239 scanChar();
240 ch = "0123456789abcdef".charAt(result);
241 }
242 return result;
243 }
244
245 protected boolean isUnicode() {
246 return unicodeConversionBp == bp;
247 }
248
249 protected void skipChar() {
250 bp++;
251 }
252
253 protected char peekChar() {
254 return buf[bp + 1];
255 }
256
257 /**
258 * Returns a copy of the input buffer, up to its inputLength.
259 * Unicode escape sequences are not translated.
260 */
261 public char[] getRawCharacters() {
262 char[] chars = new char[buflen];
263 System.arraycopy(buf, 0, chars, 0, buflen);
264 return chars;
265 }
266
267 /**
268 * Returns a copy of a character array subset of the input buffer.
269 * The returned array begins at the {@code beginIndex} and
270 * extends to the character at index {@code endIndex - 1}.
271 * Thus the length of the substring is {@code endIndex-beginIndex}.
272 * This behavior is like
273 * {@code String.substring(beginIndex, endIndex)}.
274 * Unicode escape sequences are not translated.
275 *
276 * @param beginIndex the beginning index, inclusive.
|
47 * This code and its internal interfaces are subject to change or
48 * deletion without notice.</b>
49 */
50 public class UnicodeReader {
51
52 /** The input buffer, index of next character to be read,
53 * index of one past last character in buffer.
54 */
55 protected char[] buf;
56 protected int bp;
57 protected final int buflen;
58
59 /** The current character.
60 */
61 protected char ch;
62
63 /** The buffer index of the last converted unicode character
64 */
65 protected int unicodeConversionBp = -1;
66
67 /** Control conversion of unicode characters
68 */
69 protected boolean unicodeConversion = true;
70
71 protected Log log;
72 protected Names names;
73
74 /** A character buffer for saved chars.
75 */
76 protected char[] sbuf = new char[128];
77 protected int realLength;
78 protected int sp;
79
80 /**
81 * Create a scanner from the input array. This method might
82 * modify the array. To avoid copying the input array, ensure
83 * that {@code inputLength < input.length} or
84 * {@code input[input.length -1]} is a white space character.
85 *
86 * @param sf the factory which created this Scanner
87 * @param buffer the input, might be modified
88 * Must be positive and less than or equal to input.length.
89 */
90 protected UnicodeReader(ScannerFactory sf, CharBuffer buffer) {
141 if (scan)
142 scanChar();
143 }
144
145 protected void putChar(char ch) {
146 putChar(ch, false);
147 }
148
149 protected void putChar(boolean scan) {
150 putChar(ch, scan);
151 }
152
153 Name name() {
154 return names.fromChars(sbuf, 0, sp);
155 }
156
157 String chars() {
158 return new String(sbuf, 0, sp);
159 }
160
161 protected boolean setUnicodeConversion(boolean newState) {
162 boolean oldState = unicodeConversion;
163 unicodeConversion = newState;
164 return oldState;
165 }
166
167 /** Convert unicode escape; bp points to initial '\' character
168 * (Spec 3.3).
169 */
170 protected void convertUnicode() {
171 if (ch == '\\' && unicodeConversion && unicodeConversionBp != bp ) {
172 bp++; ch = buf[bp];
173 if (ch == 'u') {
174 do {
175 bp++; ch = buf[bp];
176 } while (ch == 'u');
177 int limit = bp + 3;
178 if (limit < buflen) {
179 int d = digit(bp, 16);
180 int code = d;
181 while (bp < limit && d >= 0) {
182 bp++; ch = buf[bp];
183 d = digit(bp, 16);
184 code = (code << 4) + d;
185 }
186 if (d >= 0) {
187 ch = (char)code;
188 unicodeConversionBp = bp;
189 return;
190 }
191 }
247 log.error(pos + 1, Errors.IllegalNonasciiDigit);
248 if (codePoint >= 0)
249 scanChar();
250 ch = "0123456789abcdef".charAt(result);
251 }
252 return result;
253 }
254
255 protected boolean isUnicode() {
256 return unicodeConversionBp == bp;
257 }
258
259 protected void skipChar() {
260 bp++;
261 }
262
263 protected char peekChar() {
264 return buf[bp + 1];
265 }
266
267 protected char peekBack() {
268 return buf[bp];
269 }
270
271 /**
272 * Skips consecutive occurrences of the current character, leaving bp positioned
273 * at the last occurrence. Returns the occurrence count.
274 */
275 protected int skipRepeats() {
276 int start = bp;
277 while (bp < buflen) {
278 if (buf[bp] != buf[bp + 1])
279 break;
280 bp++;
281 }
282 return bp - start;
283 }
284
285 /**
286 * Returns a copy of the input buffer, up to its inputLength.
287 * Unicode escape sequences are not translated.
288 */
289 public char[] getRawCharacters() {
290 char[] chars = new char[buflen];
291 System.arraycopy(buf, 0, chars, 0, buflen);
292 return chars;
293 }
294
295 /**
296 * Returns a copy of a character array subset of the input buffer.
297 * The returned array begins at the {@code beginIndex} and
298 * extends to the character at index {@code endIndex - 1}.
299 * Thus the length of the substring is {@code endIndex-beginIndex}.
300 * This behavior is like
301 * {@code String.substring(beginIndex, endIndex)}.
302 * Unicode escape sequences are not translated.
303 *
304 * @param beginIndex the beginning index, inclusive.
|