/* * Copyright (c) 2005, 2009, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. Oracle designates this * particular file as subject to the "Classpath" exception as provided * by Oracle in the LICENSE file that accompanied this code. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. */ /* ******************************************************************************* * (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved * * * * The original version of this source code and documentation is copyrighted * * and owned by IBM, These materials are provided under terms of a License * * Agreement between IBM and Sun. This technology is protected by multiple * * US and International patents. This notice and attribution to IBM may not * * to removed. * ******************************************************************************* */ package sun.text.normalizer; public final class Utility { /** * Convenience utility to compare two Object[]s * Ought to be in System. * @param len the length to compare. * The start indices and start+len must be valid. */ public final static boolean arrayRegionMatches(char[] source, int sourceStart, char[] target, int targetStart, int len) { int sourceEnd = sourceStart + len; int delta = targetStart - sourceStart; for (int i = sourceStart; i < sourceEnd; i++) { if (source[i]!=target[i + delta]) return false; } return true; } /** * Convert characters outside the range U+0020 to U+007F to * Unicode escapes, and convert backslash to a double backslash. */ public static final String escape(String s) { StringBuffer buf = new StringBuffer(); for (int i=0; i= ' ' && c <= 0x007F) { if (c == '\\') { buf.append("\\\\"); // That is, "\\" } else { buf.append((char)c); } } else { boolean four = c <= 0xFFFF; buf.append(four ? "\\u" : "\\U"); hex(c, four ? 4 : 8, buf); } } return buf.toString(); } /* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */ static private final char[] UNESCAPE_MAP = { /*" 0x22, 0x22 */ /*' 0x27, 0x27 */ /*? 0x3F, 0x3F */ /*\ 0x5C, 0x5C */ /*a*/ 0x61, 0x07, /*b*/ 0x62, 0x08, /*e*/ 0x65, 0x1b, /*f*/ 0x66, 0x0c, /*n*/ 0x6E, 0x0a, /*r*/ 0x72, 0x0d, /*t*/ 0x74, 0x09, /*v*/ 0x76, 0x0b }; /** * Convert an escape to a 32-bit code point value. We attempt * to parallel the icu4c unescapeAt() function. * @param offset16 an array containing offset to the character * after the backslash. Upon return offset16[0] will * be updated to point after the escape sequence. * @return character value from 0 to 10FFFF, or -1 on error. */ public static int unescapeAt(String s, int[] offset16) { int c; int result = 0; int n = 0; int minDig = 0; int maxDig = 0; int bitsPerDigit = 4; int dig; int i; boolean braces = false; /* Check that offset is in range */ int offset = offset16[0]; int length = s.length(); if (offset < 0 || offset >= length) { return -1; } /* Fetch first UChar after '\\' */ c = UTF16.charAt(s, offset); offset += UTF16.getCharCount(c); /* Convert hexadecimal and octal escapes */ switch (c) { case 'u': minDig = maxDig = 4; break; case 'U': minDig = maxDig = 8; break; case 'x': minDig = 1; if (offset < length && UTF16.charAt(s, offset) == 0x7B /*{*/) { ++offset; braces = true; maxDig = 8; } else { maxDig = 2; } break; default: dig = UCharacter.digit(c, 8); if (dig >= 0) { minDig = 1; maxDig = 3; n = 1; /* Already have first octal digit */ bitsPerDigit = 3; result = dig; } break; } if (minDig != 0) { while (offset < length && n < maxDig) { c = UTF16.charAt(s, offset); dig = UCharacter.digit(c, (bitsPerDigit == 3) ? 8 : 16); if (dig < 0) { break; } result = (result << bitsPerDigit) | dig; offset += UTF16.getCharCount(c); ++n; } if (n < minDig) { return -1; } if (braces) { if (c != 0x7D /*}*/) { return -1; } ++offset; } if (result < 0 || result >= 0x110000) { return -1; } // If an escape sequence specifies a lead surrogate, see // if there is a trail surrogate after it, either as an // escape or as a literal. If so, join them up into a // supplementary. if (offset < length && UTF16.isLeadSurrogate((char) result)) { int ahead = offset+1; c = s.charAt(offset); // [sic] get 16-bit code unit if (c == '\\' && ahead < length) { int o[] = new int[] { ahead }; c = unescapeAt(s, o); ahead = o[0]; } if (UTF16.isTrailSurrogate((char) c)) { offset = ahead; result = UCharacterProperty.getRawSupplementary( (char) result, (char) c); } } offset16[0] = offset; return result; } /* Convert C-style escapes in table */ for (i=0; i "0041"}. * Append the output to the given StringBuffer. * If width is too small to fit, nothing will be appended to output. */ public static StringBuffer hex(int ch, int width, StringBuffer output) { return appendNumber(output, ch, 16, width); } /** * Convert a integer to size width (minimum) hex uppercase digits. * E.g., {@code hex('a', 4, str) => "0041"}. If the integer requires more * than width digits, more will be used. */ public static String hex(int ch, int width) { StringBuffer buf = new StringBuffer(); return appendNumber(buf, ch, 16, width).toString(); } /** * Skip over a sequence of zero or more white space characters * at pos. Return the index of the first non-white-space character * at or after pos, or str.length(), if there is none. */ public static int skipWhitespace(String str, int pos) { while (pos < str.length()) { int c = UTF16.charAt(str, pos); if (!UCharacterProperty.isRuleWhiteSpace(c)) { break; } pos += UTF16.getCharCount(c); } return pos; } static final char DIGITS[] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z' }; /** * Append the digits of a positive integer to the given * StringBuffer in the given radix. This is * done recursively since it is easiest to generate the low- * order digit first, but it must be appended last. * * @param result is the StringBuffer to append to * @param n is the positive integer * @param radix is the radix, from 2 to 36 inclusive * @param minDigits is the minimum number of digits to append. */ private static void recursiveAppendNumber(StringBuffer result, int n, int radix, int minDigits) { int digit = n % radix; if (n >= radix || minDigits > 1) { recursiveAppendNumber(result, n / radix, radix, minDigits - 1); } result.append(DIGITS[digit]); } /** * Append a number to the given StringBuffer in the given radix. * Standard digits '0'-'9' are used and letters 'A'-'Z' for * radices 11 through 36. * @param result the digits of the number are appended here * @param n the number to be converted to digits; may be negative. * If negative, a '-' is prepended to the digits. * @param radix a radix from 2 to 36 inclusive. * @param minDigits the minimum number of digits, not including * any '-', to produce. Values less than 2 have no effect. One * digit is always emitted regardless of this parameter. * @return a reference to result */ public static StringBuffer appendNumber(StringBuffer result, int n, int radix, int minDigits) throws IllegalArgumentException { if (radix < 2 || radix > 36) { throw new IllegalArgumentException("Illegal radix " + radix); } int abs = n; if (n < 0) { abs = -n; result.append("-"); } recursiveAppendNumber(result, abs, radix, minDigits); return result; } /** * Return true if the character is NOT printable ASCII. The tab, * newline and linefeed characters are considered unprintable. */ public static boolean isUnprintable(int c) { return !(c >= 0x20 && c <= 0x7E); } /** * Escape unprintable characters using {@code uxxxx} notation * for U+0000 to U+FFFF and {@code Uxxxxxxxx} for U+10000 and * above. If the character is printable ASCII, then do nothing * and return FALSE. Otherwise, append the escaped notation and * return TRUE. */ public static boolean escapeUnprintable(StringBuffer result, int c) { if (isUnprintable(c)) { result.append('\\'); if ((c & ~0xFFFF) != 0) { result.append('U'); result.append(DIGITS[0xF&(c>>28)]); result.append(DIGITS[0xF&(c>>24)]); result.append(DIGITS[0xF&(c>>20)]); result.append(DIGITS[0xF&(c>>16)]); } else { result.append('u'); } result.append(DIGITS[0xF&(c>>12)]); result.append(DIGITS[0xF&(c>>8)]); result.append(DIGITS[0xF&(c>>4)]); result.append(DIGITS[0xF&c]); return true; } return false; } /** * Similar to StringBuffer.getChars, version 1.3. * Since JDK 1.2 implements StringBuffer.getChars differently, this method * is here to provide consistent results. * To be removed after JDK 1.2 ceased to be the reference platform. * @param src source string buffer * @param srcBegin offset to the start of the src to retrieve from * @param srcEnd offset to the end of the src to retrieve from * @param dst char array to store the retrieved chars * @param dstBegin offset to the start of the destination char array to * store the retrieved chars */ public static void getChars(StringBuffer src, int srcBegin, int srcEnd, char dst[], int dstBegin) { if (srcBegin == srcEnd) { return; } src.getChars(srcBegin, srcEnd, dst, dstBegin); } }