1 /*
2 * Copyright (c) 2005, 2009, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25 /*
26 *******************************************************************************
27 * (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved *
28 * *
29 * The original version of this source code and documentation is copyrighted *
30 * and owned by IBM, These materials are provided under terms of a License *
31 * Agreement between IBM and Sun. This technology is protected by multiple *
32 * US and International patents. This notice and attribution to IBM may not *
33 * to removed. *
34 *******************************************************************************
35 */
36
37 package sun.text.normalizer;
38
39 public final class Utility {
40
41 /**
42 * Convenience utility to compare two Object[]s
43 * Ought to be in System.
44 * @param len the length to compare.
45 * The start indices and start+len must be valid.
46 */
47 public final static boolean arrayRegionMatches(char[] source, int sourceStart,
48 char[] target, int targetStart,
49 int len)
50 {
51 int sourceEnd = sourceStart + len;
52 int delta = targetStart - sourceStart;
53 for (int i = sourceStart; i < sourceEnd; i++) {
54 if (source[i]!=target[i + delta])
55 return false;
56 }
57 return true;
58 }
59
60 /**
61 * Convert characters outside the range U+0020 to U+007F to
62 * Unicode escapes, and convert backslash to a double backslash.
63 */
64 public static final String escape(String s) {
65 StringBuffer buf = new StringBuffer();
66 for (int i=0; i<s.length(); ) {
67 int c = UTF16.charAt(s, i);
68 i += UTF16.getCharCount(c);
69 if (c >= ' ' && c <= 0x007F) {
70 if (c == '\\') {
71 buf.append("\\\\"); // That is, "\\"
72 } else {
73 buf.append((char)c);
74 }
75 } else {
76 boolean four = c <= 0xFFFF;
77 buf.append(four ? "\\u" : "\\U");
78 hex(c, four ? 4 : 8, buf);
79 }
80 }
81 return buf.toString();
82 }
83
84 /* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */
85 static private final char[] UNESCAPE_MAP = {
86 /*" 0x22, 0x22 */
87 /*' 0x27, 0x27 */
88 /*? 0x3F, 0x3F */
89 /*\ 0x5C, 0x5C */
90 /*a*/ 0x61, 0x07,
91 /*b*/ 0x62, 0x08,
92 /*e*/ 0x65, 0x1b,
93 /*f*/ 0x66, 0x0c,
94 /*n*/ 0x6E, 0x0a,
95 /*r*/ 0x72, 0x0d,
96 /*t*/ 0x74, 0x09,
97 /*v*/ 0x76, 0x0b
98 };
107 */
108 public static int unescapeAt(String s, int[] offset16) {
109 int c;
110 int result = 0;
111 int n = 0;
112 int minDig = 0;
113 int maxDig = 0;
114 int bitsPerDigit = 4;
115 int dig;
116 int i;
117 boolean braces = false;
118
119 /* Check that offset is in range */
120 int offset = offset16[0];
121 int length = s.length();
122 if (offset < 0 || offset >= length) {
123 return -1;
124 }
125
126 /* Fetch first UChar after '\\' */
127 c = UTF16.charAt(s, offset);
128 offset += UTF16.getCharCount(c);
129
130 /* Convert hexadecimal and octal escapes */
131 switch (c) {
132 case 'u':
133 minDig = maxDig = 4;
134 break;
135 case 'U':
136 minDig = maxDig = 8;
137 break;
138 case 'x':
139 minDig = 1;
140 if (offset < length && UTF16.charAt(s, offset) == 0x7B /*{*/) {
141 ++offset;
142 braces = true;
143 maxDig = 8;
144 } else {
145 maxDig = 2;
146 }
147 break;
209 return UNESCAPE_MAP[i+1];
210 } else if (c < UNESCAPE_MAP[i]) {
211 break;
212 }
213 }
214
215 /* Map \cX to control-X: X & 0x1F */
216 if (c == 'c' && offset < length) {
217 c = UTF16.charAt(s, offset);
218 offset16[0] = offset + UTF16.getCharCount(c);
219 return 0x1F & c;
220 }
221
222 /* If no special forms are recognized, then consider
223 * the backslash to generically escape the next character. */
224 offset16[0] = offset;
225 return c;
226 }
227
228 /**
229 * Convert a integer to size width hex uppercase digits.
230 * E.g., {@code hex('a', 4, str) => "0041"}.
231 * Append the output to the given StringBuffer.
232 * If width is too small to fit, nothing will be appended to output.
233 */
234 public static StringBuffer hex(int ch, int width, StringBuffer output) {
235 return appendNumber(output, ch, 16, width);
236 }
237
238 /**
239 * Convert a integer to size width (minimum) hex uppercase digits.
240 * E.g., {@code hex('a', 4, str) => "0041"}. If the integer requires more
241 * than width digits, more will be used.
242 */
243 public static String hex(int ch, int width) {
244 StringBuffer buf = new StringBuffer();
245 return appendNumber(buf, ch, 16, width).toString();
246 }
247
248 /**
249 * Skip over a sequence of zero or more white space characters
250 * at pos. Return the index of the first non-white-space character
251 * at or after pos, or str.length(), if there is none.
252 */
253 public static int skipWhitespace(String str, int pos) {
254 while (pos < str.length()) {
255 int c = UTF16.charAt(str, pos);
256 if (!UCharacterProperty.isRuleWhiteSpace(c)) {
257 break;
258 }
259 pos += UTF16.getCharCount(c);
260 }
261 return pos;
262 }
263
264 static final char DIGITS[] = {
265 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
266 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
267 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
268 'U', 'V', 'W', 'X', 'Y', 'Z'
269 };
270
271 /**
272 * Append the digits of a positive integer to the given
273 * <code>StringBuffer</code> in the given radix. This is
274 * done recursively since it is easiest to generate the low-
275 * order digit first, but it must be appended last.
276 *
277 * @param result is the <code>StringBuffer</code> to append to
278 * @param n is the positive integer
279 * @param radix is the radix, from 2 to 36 inclusive
280 * @param minDigits is the minimum number of digits to append.
281 */
282 private static void recursiveAppendNumber(StringBuffer result, int n,
283 int radix, int minDigits)
284 {
285 int digit = n % radix;
286
287 if (n >= radix || minDigits > 1) {
288 recursiveAppendNumber(result, n / radix, radix, minDigits - 1);
289 }
290
291 result.append(DIGITS[digit]);
292 }
293
294 /**
295 * Append a number to the given StringBuffer in the given radix.
296 * Standard digits '0'-'9' are used and letters 'A'-'Z' for
297 * radices 11 through 36.
298 * @param result the digits of the number are appended here
299 * @param n the number to be converted to digits; may be negative.
300 * If negative, a '-' is prepended to the digits.
301 * @param radix a radix from 2 to 36 inclusive.
302 * @param minDigits the minimum number of digits, not including
303 * any '-', to produce. Values less than 2 have no effect. One
304 * digit is always emitted regardless of this parameter.
305 * @return a reference to result
306 */
307 public static StringBuffer appendNumber(StringBuffer result, int n,
308 int radix, int minDigits)
309 throws IllegalArgumentException
310 {
311 if (radix < 2 || radix > 36) {
312 throw new IllegalArgumentException("Illegal radix " + radix);
313 }
314
315
316 int abs = n;
317
318 if (n < 0) {
319 abs = -n;
320 result.append("-");
321 }
322
323 recursiveAppendNumber(result, abs, radix, minDigits);
324
325 return result;
326 }
327
328 /**
329 * Return true if the character is NOT printable ASCII. The tab,
330 * newline and linefeed characters are considered unprintable.
331 */
332 public static boolean isUnprintable(int c) {
333 return !(c >= 0x20 && c <= 0x7E);
334 }
335
336 /**
337 * Escape unprintable characters using {@code <backslash>uxxxx} notation
338 * for U+0000 to U+FFFF and {@code <backslash>Uxxxxxxxx} for U+10000 and
339 * above. If the character is printable ASCII, then do nothing
340 * and return FALSE. Otherwise, append the escaped notation and
341 * return TRUE.
342 */
343 public static boolean escapeUnprintable(StringBuffer result, int c) {
344 if (isUnprintable(c)) {
345 result.append('\\');
346 if ((c & ~0xFFFF) != 0) {
347 result.append('U');
348 result.append(DIGITS[0xF&(c>>28)]);
349 result.append(DIGITS[0xF&(c>>24)]);
350 result.append(DIGITS[0xF&(c>>20)]);
351 result.append(DIGITS[0xF&(c>>16)]);
352 } else {
353 result.append('u');
354 }
355 result.append(DIGITS[0xF&(c>>12)]);
356 result.append(DIGITS[0xF&(c>>8)]);
357 result.append(DIGITS[0xF&(c>>4)]);
358 result.append(DIGITS[0xF&c]);
359 return true;
360 }
361 return false;
362 }
363
364 /**
365 * Similar to StringBuffer.getChars, version 1.3.
366 * Since JDK 1.2 implements StringBuffer.getChars differently, this method
367 * is here to provide consistent results.
368 * To be removed after JDK 1.2 ceased to be the reference platform.
369 * @param src source string buffer
370 * @param srcBegin offset to the start of the src to retrieve from
371 * @param srcEnd offset to the end of the src to retrieve from
372 * @param dst char array to store the retrieved chars
373 * @param dstBegin offset to the start of the destination char array to
374 * store the retrieved chars
375 */
376 public static void getChars(StringBuffer src, int srcBegin, int srcEnd,
377 char dst[], int dstBegin)
378 {
379 if (srcBegin == srcEnd) {
380 return;
381 }
382 src.getChars(srcBegin, srcEnd, dst, dstBegin);
383 }
384
385 }
|
1 /*
2 * Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25 /*
26 *******************************************************************************
27 * Copyright (C) 1996-2011, International Business Machines Corporation and *
28 * others. All Rights Reserved. *
29 *******************************************************************************
30 */
31
32 package sun.text.normalizer;
33
34 import java.io.IOException;
35 import java.util.Locale;
36
37 final class Utility {
38
39 /**
40 * Convert characters outside the range U+0020 to U+007F to
41 * Unicode escapes, and convert backslash to a double backslash.
42 */
43 public static final String escape(String s) {
44 StringBuilder buf = new StringBuilder();
45 for (int i=0; i<s.length(); ) {
46 int c = Character.codePointAt(s, i);
47 i += UTF16.getCharCount(c);
48 if (c >= ' ' && c <= 0x007F) {
49 if (c == '\\') {
50 buf.append("\\\\"); // That is, "\\"
51 } else {
52 buf.append((char)c);
53 }
54 } else {
55 boolean four = c <= 0xFFFF;
56 buf.append(four ? "\\u" : "\\U");
57 buf.append(hex(c, four ? 4 : 8));
58 }
59 }
60 return buf.toString();
61 }
62
63 /* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */
64 static private final char[] UNESCAPE_MAP = {
65 /*" 0x22, 0x22 */
66 /*' 0x27, 0x27 */
67 /*? 0x3F, 0x3F */
68 /*\ 0x5C, 0x5C */
69 /*a*/ 0x61, 0x07,
70 /*b*/ 0x62, 0x08,
71 /*e*/ 0x65, 0x1b,
72 /*f*/ 0x66, 0x0c,
73 /*n*/ 0x6E, 0x0a,
74 /*r*/ 0x72, 0x0d,
75 /*t*/ 0x74, 0x09,
76 /*v*/ 0x76, 0x0b
77 };
86 */
87 public static int unescapeAt(String s, int[] offset16) {
88 int c;
89 int result = 0;
90 int n = 0;
91 int minDig = 0;
92 int maxDig = 0;
93 int bitsPerDigit = 4;
94 int dig;
95 int i;
96 boolean braces = false;
97
98 /* Check that offset is in range */
99 int offset = offset16[0];
100 int length = s.length();
101 if (offset < 0 || offset >= length) {
102 return -1;
103 }
104
105 /* Fetch first UChar after '\\' */
106 c = Character.codePointAt(s, offset);
107 offset += UTF16.getCharCount(c);
108
109 /* Convert hexadecimal and octal escapes */
110 switch (c) {
111 case 'u':
112 minDig = maxDig = 4;
113 break;
114 case 'U':
115 minDig = maxDig = 8;
116 break;
117 case 'x':
118 minDig = 1;
119 if (offset < length && UTF16.charAt(s, offset) == 0x7B /*{*/) {
120 ++offset;
121 braces = true;
122 maxDig = 8;
123 } else {
124 maxDig = 2;
125 }
126 break;
188 return UNESCAPE_MAP[i+1];
189 } else if (c < UNESCAPE_MAP[i]) {
190 break;
191 }
192 }
193
194 /* Map \cX to control-X: X & 0x1F */
195 if (c == 'c' && offset < length) {
196 c = UTF16.charAt(s, offset);
197 offset16[0] = offset + UTF16.getCharCount(c);
198 return 0x1F & c;
199 }
200
201 /* If no special forms are recognized, then consider
202 * the backslash to generically escape the next character. */
203 offset16[0] = offset;
204 return c;
205 }
206
207 /**
208 * Supplies a zero-padded hex representation of an integer (without 0x)
209 */
210 static public String hex(long i, int places) {
211 if (i == Long.MIN_VALUE) return "-8000000000000000";
212 boolean negative = i < 0;
213 if (negative) {
214 i = -i;
215 }
216 String result = Long.toString(i, 16).toUpperCase(Locale.ENGLISH);
217 if (result.length() < places) {
218 result = "0000000000000000".substring(result.length(),places) + result;
219 }
220 if (negative) {
221 return '-' + result;
222 }
223 return result;
224 }
225
226 static final char DIGITS[] = {
227 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
228 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
229 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
230 'U', 'V', 'W', 'X', 'Y', 'Z'
231 };
232
233 /**
234 * Return true if the character is NOT printable ASCII. The tab,
235 * newline and linefeed characters are considered unprintable.
236 */
237 public static boolean isUnprintable(int c) {
238 //0x20 = 32 and 0x7E = 126
239 return !(c >= 0x20 && c <= 0x7E);
240 }
241
242 /**
243 * Escape unprintable characters using <backslash>uxxxx notation
244 * for U+0000 to U+FFFF and <backslash>Uxxxxxxxx for U+10000 and
245 * above. If the character is printable ASCII, then do nothing
246 * and return FALSE. Otherwise, append the escaped notation and
247 * return TRUE.
248 */
249 public static <T extends Appendable> boolean escapeUnprintable(T result, int c) {
250 try {
251 if (isUnprintable(c)) {
252 result.append('\\');
253 if ((c & ~0xFFFF) != 0) {
254 result.append('U');
255 result.append(DIGITS[0xF&(c>>28)]);
256 result.append(DIGITS[0xF&(c>>24)]);
257 result.append(DIGITS[0xF&(c>>20)]);
258 result.append(DIGITS[0xF&(c>>16)]);
259 } else {
260 result.append('u');
261 }
262 result.append(DIGITS[0xF&(c>>12)]);
263 result.append(DIGITS[0xF&(c>>8)]);
264 result.append(DIGITS[0xF&(c>>4)]);
265 result.append(DIGITS[0xF&c]);
266 return true;
267 }
268 return false;
269 } catch (IOException e) {
270 throw new IllegalArgumentException(e);
271 }
272 }
273 }
|