< prev index next >

jdk/src/java.base/share/classes/sun/text/normalizer/Utility.java

Print this page


   1 /*
   2  * Copyright (c) 2005, 2009, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 /*
  26  *******************************************************************************
  27  * (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved         *
  28  *                                                                             *
  29  * The original version of this source code and documentation is copyrighted   *
  30  * and owned by IBM, These materials are provided under terms of a License     *
  31  * Agreement between IBM and Sun. This technology is protected by multiple     *
  32  * US and International patents. This notice and attribution to IBM may not    *
  33  * to removed.                                                                 *
  34  *******************************************************************************
  35  */
  36 
  37 package sun.text.normalizer;
  38 
  39 public final class Utility {

  40 
  41     /**
  42      * Convenience utility to compare two Object[]s
  43      * Ought to be in System.
  44      * @param len the length to compare.
  45      * The start indices and start+len must be valid.
  46      */
  47     public final static boolean arrayRegionMatches(char[] source, int sourceStart,
  48                                             char[] target, int targetStart,
  49                                             int len)
  50     {
  51         int sourceEnd = sourceStart + len;
  52         int delta = targetStart - sourceStart;
  53         for (int i = sourceStart; i < sourceEnd; i++) {
  54             if (source[i]!=target[i + delta])
  55             return false;
  56         }
  57         return true;
  58     }
  59 
  60     /**
  61      * Convert characters outside the range U+0020 to U+007F to
  62      * Unicode escapes, and convert backslash to a double backslash.
  63      */
  64     public static final String escape(String s) {
  65         StringBuffer buf = new StringBuffer();
  66         for (int i=0; i<s.length(); ) {
  67             int c = UTF16.charAt(s, i);
  68             i += UTF16.getCharCount(c);
  69             if (c >= ' ' && c <= 0x007F) {
  70                 if (c == '\\') {
  71                     buf.append("\\\\"); // That is, "\\"
  72                 } else {
  73                     buf.append((char)c);
  74                 }
  75             } else {
  76                 boolean four = c <= 0xFFFF;
  77                 buf.append(four ? "\\u" : "\\U");
  78                 hex(c, four ? 4 : 8, buf);
  79             }
  80         }
  81         return buf.toString();
  82     }
  83 
  84     /* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */
  85     static private final char[] UNESCAPE_MAP = {
  86         /*"   0x22, 0x22 */
  87         /*'   0x27, 0x27 */
  88         /*?   0x3F, 0x3F */
  89         /*\   0x5C, 0x5C */
  90         /*a*/ 0x61, 0x07,
  91         /*b*/ 0x62, 0x08,
  92         /*e*/ 0x65, 0x1b,
  93         /*f*/ 0x66, 0x0c,
  94         /*n*/ 0x6E, 0x0a,
  95         /*r*/ 0x72, 0x0d,
  96         /*t*/ 0x74, 0x09,
  97         /*v*/ 0x76, 0x0b
  98     };


 107      */
 108     public static int unescapeAt(String s, int[] offset16) {
 109         int c;
 110         int result = 0;
 111         int n = 0;
 112         int minDig = 0;
 113         int maxDig = 0;
 114         int bitsPerDigit = 4;
 115         int dig;
 116         int i;
 117         boolean braces = false;
 118 
 119         /* Check that offset is in range */
 120         int offset = offset16[0];
 121         int length = s.length();
 122         if (offset < 0 || offset >= length) {
 123             return -1;
 124         }
 125 
 126         /* Fetch first UChar after '\\' */
 127         c = UTF16.charAt(s, offset);
 128         offset += UTF16.getCharCount(c);
 129 
 130         /* Convert hexadecimal and octal escapes */
 131         switch (c) {
 132         case 'u':
 133             minDig = maxDig = 4;
 134             break;
 135         case 'U':
 136             minDig = maxDig = 8;
 137             break;
 138         case 'x':
 139             minDig = 1;
 140             if (offset < length && UTF16.charAt(s, offset) == 0x7B /*{*/) {
 141                 ++offset;
 142                 braces = true;
 143                 maxDig = 8;
 144             } else {
 145                 maxDig = 2;
 146             }
 147             break;


 209                 return UNESCAPE_MAP[i+1];
 210             } else if (c < UNESCAPE_MAP[i]) {
 211                 break;
 212             }
 213         }
 214 
 215         /* Map \cX to control-X: X & 0x1F */
 216         if (c == 'c' && offset < length) {
 217             c = UTF16.charAt(s, offset);
 218             offset16[0] = offset + UTF16.getCharCount(c);
 219             return 0x1F & c;
 220         }
 221 
 222         /* If no special forms are recognized, then consider
 223          * the backslash to generically escape the next character. */
 224         offset16[0] = offset;
 225         return c;
 226     }
 227 
 228     /**
 229      * Convert a integer to size width hex uppercase digits.
 230      * E.g., {@code hex('a', 4, str) => "0041"}.
 231      * Append the output to the given StringBuffer.
 232      * If width is too small to fit, nothing will be appended to output.
 233      */
 234     public static StringBuffer hex(int ch, int width, StringBuffer output) {
 235         return appendNumber(output, ch, 16, width);







 236     }
 237 
 238     /**
 239      * Convert a integer to size width (minimum) hex uppercase digits.
 240      * E.g., {@code hex('a', 4, str) => "0041"}.  If the integer requires more
 241      * than width digits, more will be used.
 242      */
 243     public static String hex(int ch, int width) {
 244         StringBuffer buf = new StringBuffer();
 245         return appendNumber(buf, ch, 16, width).toString();
 246     }
 247 
 248     /**
 249      * Skip over a sequence of zero or more white space characters
 250      * at pos.  Return the index of the first non-white-space character
 251      * at or after pos, or str.length(), if there is none.
 252      */
 253     public static int skipWhitespace(String str, int pos) {
 254         while (pos < str.length()) {
 255             int c = UTF16.charAt(str, pos);
 256             if (!UCharacterProperty.isRuleWhiteSpace(c)) {
 257                 break;
 258             }
 259             pos += UTF16.getCharCount(c);
 260         }
 261         return pos;
 262     }
 263 
 264     static final char DIGITS[] = {
 265         '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
 266         'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
 267         'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
 268         'U', 'V', 'W', 'X', 'Y', 'Z'
 269     };
 270 
 271     /**
 272      * Append the digits of a positive integer to the given
 273      * <code>StringBuffer</code> in the given radix. This is
 274      * done recursively since it is easiest to generate the low-
 275      * order digit first, but it must be appended last.
 276      *
 277      * @param result is the <code>StringBuffer</code> to append to
 278      * @param n is the positive integer
 279      * @param radix is the radix, from 2 to 36 inclusive
 280      * @param minDigits is the minimum number of digits to append.
 281      */
 282     private static void recursiveAppendNumber(StringBuffer result, int n,
 283                                                 int radix, int minDigits)
 284     {
 285         int digit = n % radix;
 286 
 287         if (n >= radix || minDigits > 1) {
 288             recursiveAppendNumber(result, n / radix, radix, minDigits - 1);
 289         }
 290 
 291         result.append(DIGITS[digit]);
 292     }
 293 
 294     /**
 295      * Append a number to the given StringBuffer in the given radix.
 296      * Standard digits '0'-'9' are used and letters 'A'-'Z' for
 297      * radices 11 through 36.
 298      * @param result the digits of the number are appended here
 299      * @param n the number to be converted to digits; may be negative.
 300      * If negative, a '-' is prepended to the digits.
 301      * @param radix a radix from 2 to 36 inclusive.
 302      * @param minDigits the minimum number of digits, not including
 303      * any '-', to produce.  Values less than 2 have no effect.  One
 304      * digit is always emitted regardless of this parameter.
 305      * @return a reference to result
 306      */
 307     public static StringBuffer appendNumber(StringBuffer result, int n,
 308                                              int radix, int minDigits)
 309         throws IllegalArgumentException
 310     {
 311         if (radix < 2 || radix > 36) {
 312             throw new IllegalArgumentException("Illegal radix " + radix);
 313         }
 314 
 315 
 316         int abs = n;
 317 
 318         if (n < 0) {
 319             abs = -n;
 320             result.append("-");
 321         }
 322 
 323         recursiveAppendNumber(result, abs, radix, minDigits);
 324 
 325         return result;
 326     }
 327 
 328     /**
 329      * Return true if the character is NOT printable ASCII.  The tab,
 330      * newline and linefeed characters are considered unprintable.
 331      */
 332     public static boolean isUnprintable(int c) {

 333         return !(c >= 0x20 && c <= 0x7E);
 334     }
 335 
 336     /**
 337      * Escape unprintable characters using {@code <backslash>uxxxx} notation
 338      * for U+0000 to U+FFFF and {@code <backslash>Uxxxxxxxx} for U+10000 and
 339      * above.  If the character is printable ASCII, then do nothing
 340      * and return FALSE.  Otherwise, append the escaped notation and
 341      * return TRUE.
 342      */
 343     public static boolean escapeUnprintable(StringBuffer result, int c) {

 344         if (isUnprintable(c)) {
 345             result.append('\\');
 346             if ((c & ~0xFFFF) != 0) {
 347                 result.append('U');
 348                 result.append(DIGITS[0xF&(c>>28)]);
 349                 result.append(DIGITS[0xF&(c>>24)]);
 350                 result.append(DIGITS[0xF&(c>>20)]);
 351                 result.append(DIGITS[0xF&(c>>16)]);
 352             } else {
 353                 result.append('u');
 354             }
 355             result.append(DIGITS[0xF&(c>>12)]);
 356             result.append(DIGITS[0xF&(c>>8)]);
 357             result.append(DIGITS[0xF&(c>>4)]);
 358             result.append(DIGITS[0xF&c]);
 359             return true;
 360         }
 361         return false;


 362     }
 363 
 364     /**
 365     * Similar to StringBuffer.getChars, version 1.3.
 366     * Since JDK 1.2 implements StringBuffer.getChars differently, this method
 367     * is here to provide consistent results.
 368     * To be removed after JDK 1.2 ceased to be the reference platform.
 369     * @param src source string buffer
 370     * @param srcBegin offset to the start of the src to retrieve from
 371     * @param srcEnd offset to the end of the src to retrieve from
 372     * @param dst char array to store the retrieved chars
 373     * @param dstBegin offset to the start of the destination char array to
 374     *                 store the retrieved chars
 375     */
 376     public static void getChars(StringBuffer src, int srcBegin, int srcEnd,
 377                                 char dst[], int dstBegin)
 378     {
 379         if (srcBegin == srcEnd) {
 380             return;
 381         }
 382         src.getChars(srcBegin, srcEnd, dst, dstBegin);
 383     }
 384 
 385 }
   1 /*
   2  * Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 /*
  26  *******************************************************************************
  27  * Copyright (C) 1996-2011, International Business Machines Corporation and    *
  28  * others. All Rights Reserved.                                                *





  29  *******************************************************************************
  30  */
  31 
  32 package sun.text.normalizer;
  33 
  34 import java.io.IOException;
  35 import java.util.Locale;
  36 
  37 final class Utility {

















  38 
  39     /**
  40      * Convert characters outside the range U+0020 to U+007F to
  41      * Unicode escapes, and convert backslash to a double backslash.
  42      */
  43     public static final String escape(String s) {
  44         StringBuilder buf = new StringBuilder();
  45         for (int i=0; i<s.length(); ) {
  46             int c = Character.codePointAt(s, i);
  47             i += UTF16.getCharCount(c);
  48             if (c >= ' ' && c <= 0x007F) {
  49                 if (c == '\\') {
  50                     buf.append("\\\\"); // That is, "\\"
  51                 } else {
  52                     buf.append((char)c);
  53                 }
  54             } else {
  55                 boolean four = c <= 0xFFFF;
  56                 buf.append(four ? "\\u" : "\\U");
  57                 buf.append(hex(c, four ? 4 : 8));
  58             }
  59         }
  60         return buf.toString();
  61     }
  62 
  63     /* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */
  64     static private final char[] UNESCAPE_MAP = {
  65         /*"   0x22, 0x22 */
  66         /*'   0x27, 0x27 */
  67         /*?   0x3F, 0x3F */
  68         /*\   0x5C, 0x5C */
  69         /*a*/ 0x61, 0x07,
  70         /*b*/ 0x62, 0x08,
  71         /*e*/ 0x65, 0x1b,
  72         /*f*/ 0x66, 0x0c,
  73         /*n*/ 0x6E, 0x0a,
  74         /*r*/ 0x72, 0x0d,
  75         /*t*/ 0x74, 0x09,
  76         /*v*/ 0x76, 0x0b
  77     };


  86      */
  87     public static int unescapeAt(String s, int[] offset16) {
  88         int c;
  89         int result = 0;
  90         int n = 0;
  91         int minDig = 0;
  92         int maxDig = 0;
  93         int bitsPerDigit = 4;
  94         int dig;
  95         int i;
  96         boolean braces = false;
  97 
  98         /* Check that offset is in range */
  99         int offset = offset16[0];
 100         int length = s.length();
 101         if (offset < 0 || offset >= length) {
 102             return -1;
 103         }
 104 
 105         /* Fetch first UChar after '\\' */
 106         c = Character.codePointAt(s, offset);
 107         offset += UTF16.getCharCount(c);
 108 
 109         /* Convert hexadecimal and octal escapes */
 110         switch (c) {
 111         case 'u':
 112             minDig = maxDig = 4;
 113             break;
 114         case 'U':
 115             minDig = maxDig = 8;
 116             break;
 117         case 'x':
 118             minDig = 1;
 119             if (offset < length && UTF16.charAt(s, offset) == 0x7B /*{*/) {
 120                 ++offset;
 121                 braces = true;
 122                 maxDig = 8;
 123             } else {
 124                 maxDig = 2;
 125           }
 126             break;


 188                 return UNESCAPE_MAP[i+1];
 189             } else if (c < UNESCAPE_MAP[i]) {
 190                 break;
 191             }
 192         }
 193 
 194         /* Map \cX to control-X: X & 0x1F */
 195         if (c == 'c' && offset < length) {
 196             c = UTF16.charAt(s, offset);
 197             offset16[0] = offset + UTF16.getCharCount(c);
 198             return 0x1F & c;
 199         }
 200 
 201         /* If no special forms are recognized, then consider
 202          * the backslash to generically escape the next character. */
 203         offset16[0] = offset;
 204         return c;
 205     }
 206 
 207     /**
 208      * Supplies a zero-padded hex representation of an integer (without 0x)



 209      */
 210     static public String hex(long i, int places) {
 211         if (i == Long.MIN_VALUE) return "-8000000000000000";
 212         boolean negative = i < 0;
 213         if (negative) {
 214             i = -i;
 215         }
 216         String result = Long.toString(i, 16).toUpperCase(Locale.ENGLISH);
 217         if (result.length() < places) {
 218             result = "0000000000000000".substring(result.length(),places) + result;
 219         }
 220         if (negative) {
 221             return '-' + result;







 222         }
 223         return result;














 224     }
 225 
 226     static final char DIGITS[] = {
 227         '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
 228         'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
 229         'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
 230         'U', 'V', 'W', 'X', 'Y', 'Z'
 231     };
 232 
 233     /**

























































 234      * Return true if the character is NOT printable ASCII.  The tab,
 235      * newline and linefeed characters are considered unprintable.
 236      */
 237     public static boolean isUnprintable(int c) {
 238         //0x20 = 32 and 0x7E = 126
 239         return !(c >= 0x20 && c <= 0x7E);
 240     }
 241 
 242     /**
 243      * Escape unprintable characters using <backslash>uxxxx notation
 244      * for U+0000 to U+FFFF and <backslash>Uxxxxxxxx for U+10000 and
 245      * above.  If the character is printable ASCII, then do nothing
 246      * and return FALSE.  Otherwise, append the escaped notation and
 247      * return TRUE.
 248      */
 249     public static <T extends Appendable> boolean escapeUnprintable(T result, int c) {
 250         try {
 251             if (isUnprintable(c)) {
 252                 result.append('\\');
 253                 if ((c & ~0xFFFF) != 0) {
 254                     result.append('U');
 255                     result.append(DIGITS[0xF&(c>>28)]);
 256                     result.append(DIGITS[0xF&(c>>24)]);
 257                     result.append(DIGITS[0xF&(c>>20)]);
 258                     result.append(DIGITS[0xF&(c>>16)]);
 259                 } else {
 260                     result.append('u');
 261                 }
 262                 result.append(DIGITS[0xF&(c>>12)]);
 263                 result.append(DIGITS[0xF&(c>>8)]);
 264                 result.append(DIGITS[0xF&(c>>4)]);
 265                 result.append(DIGITS[0xF&c]);
 266                 return true;
 267             }
 268             return false;
 269         } catch (IOException e) {
 270             throw new IllegalArgumentException(e);
 271         }


















 272     }



 273 }
< prev index next >