< prev index next >

jdk/src/java.base/share/classes/sun/text/normalizer/UTF16.java

Print this page

        

*** 1,7 **** /* ! * Copyright (c) 2005, 2009, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. Oracle designates this --- 1,7 ---- /* ! * Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. Oracle designates this
*** 20,38 **** * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. */ ! /* ******************************************************************************* ! * (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved * ! * * ! * The original version of this source code and documentation is copyrighted * ! * and owned by IBM, These materials are provided under terms of a License * ! * Agreement between IBM and Sun. This technology is protected by multiple * ! * US and International patents. This notice and attribution to IBM may not * ! * to removed. * ******************************************************************************* */ package sun.text.normalizer; --- 20,33 ---- * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. */ ! /** ******************************************************************************* ! * Copyright (C) 1996-2014, International Business Machines Corporation and ! * others. All Rights Reserved. ******************************************************************************* */ package sun.text.normalizer;
*** 55,79 **** * doSomethingWith(ch); * } * * // iteration forwards: Changes for UTF-32 * int ch; ! * for (int i = 0; i < s.length(); i+=UTF16.getCharCount(ch)) { ! * ch = UTF16.charAt(s,i); * doSomethingWith(ch); * } * * // iteration backwards: Original ! * for (int i = s.length() -1; i >= 0; --i) { * char ch = s.charAt(i); * doSomethingWith(ch); * } * * // iteration backwards: Changes for UTF-32 * int ch; ! * for (int i = s.length() -1; i > 0; i-=UTF16.getCharCount(ch)) { ! * ch = UTF16.charAt(s,i); * doSomethingWith(ch); * } * }</pre> * <strong>Notes:</strong> * <ul> --- 50,74 ---- * doSomethingWith(ch); * } * * // iteration forwards: Changes for UTF-32 * int ch; ! * for (int i = 0; i < s.length(); i += UTF16.getCharCount(ch)) { ! * ch = UTF16.charAt(s, i); * doSomethingWith(ch); * } * * // iteration backwards: Original ! * for (int i = s.length() - 1; i >= 0; --i) { * char ch = s.charAt(i); * doSomethingWith(ch); * } * * // iteration backwards: Changes for UTF-32 * int ch; ! * for (int i = s.length() - 1; i > 0; i -= UTF16.getCharCount(ch)) { ! * ch = UTF16.charAt(s, i); * doSomethingWith(ch); * } * }</pre> * <strong>Notes:</strong> * <ul>
*** 159,169 **** --- 154,198 ---- /** * Surrogate minimum value * @stable ICU 2.1 */ public static final int SURROGATE_MIN_VALUE = LEAD_SURROGATE_MIN_VALUE; + /** + * Lead surrogate bitmask + */ + private static final int LEAD_SURROGATE_BITMASK = 0xFFFFFC00; + /** + * Trail surrogate bitmask + */ + private static final int TRAIL_SURROGATE_BITMASK = 0xFFFFFC00; + /** + * Surrogate bitmask + */ + private static final int SURROGATE_BITMASK = 0xFFFFF800; + /** + * Lead surrogate bits + */ + private static final int LEAD_SURROGATE_BITS = 0xD800; + /** + * Trail surrogate bits + */ + private static final int TRAIL_SURROGATE_BITS = 0xDC00; + /** + * Surrogate bits + */ + private static final int SURROGATE_BITS = 0xD800; + + // constructor -------------------------------------------------------- + // /CLOVER:OFF + /** + * Prevent instance from being created. + */ + private UTF16() { + } + + // /CLOVER:ON // public method ------------------------------------------------------ /** * Extract a single UTF-32 value from a string. * Used when iterating forwards or backwards (with
*** 220,252 **** } return single; // return unmatched surrogate } /** ! * Extract a single UTF-32 value from a substring. * Used when iterating forwards or backwards (with * <code>UTF16.getCharCount()</code>, as well as random access. If a * validity check is required, use * <code><a href="../lang/UCharacter.html#isLegal(char)">UCharacter.isLegal() * </a></code> on the return value. * If the char retrieved is part of a surrogate pair, its supplementary * character will be returned. If a complete supplementary character is * not found the incomplete character will be returned * @param source array of UTF-16 chars ! * @param start offset to substring in the source array for analyzing ! * @param limit offset to substring in the source array for analyzing ! * @param offset16 UTF-16 offset relative to start * @return UTF-32 value for the UTF-32 value that contains the char at * offset16. The boundaries of that codepoint are the same as in * <code>bounds32()</code>. ! * @exception IndexOutOfBoundsException thrown if offset16 is not within ! * the range of start and limit. * @stable ICU 2.1 */ ! public static int charAt(char source[], int start, int limit, ! int offset16) ! { offset16 += start; if (offset16 < start || offset16 >= limit) { throw new ArrayIndexOutOfBoundsException(offset16); } --- 249,334 ---- } return single; // return unmatched surrogate } /** ! * Extract a single UTF-32 value from a string. * Used when iterating forwards or backwards (with * <code>UTF16.getCharCount()</code>, as well as random access. If a * validity check is required, use * <code><a href="../lang/UCharacter.html#isLegal(char)">UCharacter.isLegal() * </a></code> on the return value. * If the char retrieved is part of a surrogate pair, its supplementary * character will be returned. If a complete supplementary character is * not found the incomplete character will be returned * @param source array of UTF-16 chars ! * @param offset16 UTF-16 offset to the start of the character. * @return UTF-32 value for the UTF-32 value that contains the char at * offset16. The boundaries of that codepoint are the same as in * <code>bounds32()</code>. ! * @exception IndexOutOfBoundsException thrown if offset16 is out of bounds. * @stable ICU 2.1 */ ! public static int charAt(CharSequence source, int offset16) { ! char single = source.charAt(offset16); ! if (single < UTF16.LEAD_SURROGATE_MIN_VALUE) { ! return single; ! } ! return _charAt(source, offset16, single); ! } ! ! private static int _charAt(CharSequence source, int offset16, char single) { ! if (single > UTF16.TRAIL_SURROGATE_MAX_VALUE) { ! return single; ! } ! ! // Convert the UTF-16 surrogate pair if necessary. ! // For simplicity in usage, and because the frequency of pairs is ! // low, look both directions. ! ! if (single <= UTF16.LEAD_SURROGATE_MAX_VALUE) { ! ++offset16; ! if (source.length() != offset16) { ! char trail = source.charAt(offset16); ! if (trail >= UTF16.TRAIL_SURROGATE_MIN_VALUE ! && trail <= UTF16.TRAIL_SURROGATE_MAX_VALUE) { ! return UCharacterProperty.getRawSupplementary(single, trail); ! } ! } ! } else { ! --offset16; ! if (offset16 >= 0) { ! // single is a trail surrogate so ! char lead = source.charAt(offset16); ! if (lead >= UTF16.LEAD_SURROGATE_MIN_VALUE ! && lead <= UTF16.LEAD_SURROGATE_MAX_VALUE) { ! return UCharacterProperty.getRawSupplementary(lead, single); ! } ! } ! } ! return single; // return unmatched surrogate ! } ! ! /** ! * Extract a single UTF-32 value from a substring. Used when iterating forwards or backwards ! * (with <code>UTF16.getCharCount()</code>, as well as random access. If a validity check is ! * required, use <code><a href="../lang/UCharacter.html#isLegal(char)">UCharacter.isLegal() ! * </a></code> ! * on the return value. If the char retrieved is part of a surrogate pair, its supplementary ! * character will be returned. If a complete supplementary character is not found the incomplete ! * character will be returned ! * ! * @param source Array of UTF-16 chars ! * @param start Offset to substring in the source array for analyzing ! * @param limit Offset to substring in the source array for analyzing ! * @param offset16 UTF-16 offset relative to start ! * @return UTF-32 value for the UTF-32 value that contains the char at offset16. The boundaries ! * of that codepoint are the same as in <code>bounds32()</code>. ! * @exception IndexOutOfBoundsException Thrown if offset16 is not within the range of start and limit. ! * @stable ICU 2.1 ! */ ! public static int charAt(char source[], int start, int limit, int offset16) { offset16 += start; if (offset16 < start || offset16 >= limit) { throw new ArrayIndexOutOfBoundsException(offset16); }
*** 257,267 **** // Convert the UTF-16 surrogate pair if necessary. // For simplicity in usage, and because the frequency of pairs is // low, look both directions. if (single <= LEAD_SURROGATE_MAX_VALUE) { ! offset16 ++; if (offset16 >= limit) { return single; } char trail = source[offset16]; if (isTrailSurrogate(trail)) { --- 339,349 ---- // Convert the UTF-16 surrogate pair if necessary. // For simplicity in usage, and because the frequency of pairs is // low, look both directions. if (single <= LEAD_SURROGATE_MAX_VALUE) { ! offset16++; if (offset16 >= limit) { return single; } char trail = source[offset16]; if (isTrailSurrogate(trail)) {
*** 270,280 **** } else { // isTrailSurrogate(single), so if (offset16 == start) { return single; } ! offset16 --; char lead = source[offset16]; if (isLeadSurrogate(lead)) return UCharacterProperty.getRawSupplementary(lead, single); } return single; // return unmatched surrogate --- 352,362 ---- } else { // isTrailSurrogate(single), so if (offset16 == start) { return single; } ! offset16--; char lead = source[offset16]; if (isLeadSurrogate(lead)) return UCharacterProperty.getRawSupplementary(lead, single); } return single; // return unmatched surrogate
*** 298,338 **** } /** * Determines whether the code value is a surrogate. * @param char16 the input character. ! * @return true iff the input character is a surrogate. * @stable ICU 2.1 */ public static boolean isSurrogate(char char16) { ! return LEAD_SURROGATE_MIN_VALUE <= char16 && ! char16 <= TRAIL_SURROGATE_MAX_VALUE; } /** * Determines whether the character is a trail surrogate. * @param char16 the input character. ! * @return true iff the input character is a trail surrogate. * @stable ICU 2.1 */ public static boolean isTrailSurrogate(char char16) { ! return (TRAIL_SURROGATE_MIN_VALUE <= char16 && ! char16 <= TRAIL_SURROGATE_MAX_VALUE); } /** * Determines whether the character is a lead surrogate. * @param char16 the input character. ! * @return true iff the input character is a lead surrogate * @stable ICU 2.1 */ public static boolean isLeadSurrogate(char char16) { ! return LEAD_SURROGATE_MIN_VALUE <= char16 && ! char16 <= LEAD_SURROGATE_MAX_VALUE; } /** * Returns the lead surrogate. * If a validity check is required, use --- 380,417 ---- } /** * Determines whether the code value is a surrogate. * @param char16 the input character. ! * @return true if the input character is a surrogate. * @stable ICU 2.1 */ public static boolean isSurrogate(char char16) { ! return (char16 & SURROGATE_BITMASK) == SURROGATE_BITS; } /** * Determines whether the character is a trail surrogate. * @param char16 the input character. ! * @return true if the input character is a trail surrogate. * @stable ICU 2.1 */ public static boolean isTrailSurrogate(char char16) { ! return (char16 & TRAIL_SURROGATE_BITMASK) == TRAIL_SURROGATE_BITS; } /** * Determines whether the character is a lead surrogate. * @param char16 the input character. ! * @return true if the input character is a lead surrogate * @stable ICU 2.1 */ public static boolean isLeadSurrogate(char char16) { ! return (char16 & LEAD_SURROGATE_BITMASK) == LEAD_SURROGATE_BITS; } /** * Returns the lead surrogate. * If a validity check is required, use
*** 357,378 **** * Returns the trail surrogate. * If a validity check is required, use * <code><a href="../lang/UCharacter.html#isLegal(char)">isLegal()</a></code> * on char32 before calling. * @param char32 the input character. ! * @return the trail surrogate if the getCharCount(ch) is 2; <br>otherwise * the character itself * @stable ICU 2.1 */ public static char getTrailSurrogate(int char32) { if (char32 >= SUPPLEMENTARY_MIN_VALUE) { return (char)(TRAIL_SURROGATE_MIN_VALUE + (char32 & TRAIL_SURROGATE_MASK_)); } ! return (char)char32; } /** * Convenience method corresponding to String.valueOf(char). Returns a one * or two char string containing the UTF-32 value in UTF16 format. If a --- 436,457 ---- * Returns the trail surrogate. * If a validity check is required, use * <code><a href="../lang/UCharacter.html#isLegal(char)">isLegal()</a></code> * on char32 before calling. * @param char32 the input character. ! * @return the trail surrogate if the getCharCount(ch) is 2; <br> otherwise * the character itself * @stable ICU 2.1 */ public static char getTrailSurrogate(int char32) { if (char32 >= SUPPLEMENTARY_MIN_VALUE) { return (char)(TRAIL_SURROGATE_MIN_VALUE + (char32 & TRAIL_SURROGATE_MASK_)); } ! return (char) char32; } /** * Convenience method corresponding to String.valueOf(char). Returns a one * or two char string containing the UTF-32 value in UTF16 format. If a
*** 417,432 **** { target.append(getLeadSurrogate(char32)); target.append(getTrailSurrogate(char32)); } else { ! target.append((char)char32); } return target; } - //// for StringPrep /** * Shifts offset16 by the argument number of codepoints within a subarray. * @param source char array * @param start position of the subarray to be performed on * @param limit position of the subarray to be performed on --- 496,510 ---- { target.append(getLeadSurrogate(char32)); target.append(getTrailSurrogate(char32)); } else { ! target.append((char) char32); } return target; } /** * Shifts offset16 by the argument number of codepoints within a subarray. * @param source char array * @param start position of the subarray to be performed on * @param limit position of the subarray to be performed on
*** 443,487 **** { int size = source.length; int count; char ch; int result = offset16 + start; ! if (start<0 || limit<start) { throw new StringIndexOutOfBoundsException(start); } ! if (limit>size) { throw new StringIndexOutOfBoundsException(limit); } ! if (offset16<0 || result>limit) { throw new StringIndexOutOfBoundsException(offset16); } ! if (shift32 > 0 ) { if (shift32 + result > size) { throw new StringIndexOutOfBoundsException(result); } count = shift32; while (result < limit && count > 0) { ch = source[result]; ! if (isLeadSurrogate(ch) && (result+1 < limit) && ! isTrailSurrogate(source[result+1])) { ! result ++; } ! count --; ! result ++; } } else { if (result + shift32 < start) { throw new StringIndexOutOfBoundsException(result); } ! for (count=-shift32; count>0; count--) { result--; ! if (result<start) { break; } ch = source[result]; ! if (isTrailSurrogate(ch) && result>start && isLeadSurrogate(source[result-1])) { result--; } } } if (count != 0) { --- 521,565 ---- { int size = source.length; int count; char ch; int result = offset16 + start; ! if (start < 0 || limit < start) { throw new StringIndexOutOfBoundsException(start); } ! if (limit > size) { throw new StringIndexOutOfBoundsException(limit); } ! if (offset16 < 0 || result > limit) { throw new StringIndexOutOfBoundsException(offset16); } ! if (shift32 > 0) { if (shift32 + result > size) { throw new StringIndexOutOfBoundsException(result); } count = shift32; while (result < limit && count > 0) { ch = source[result]; ! if (isLeadSurrogate(ch) && (result + 1 < limit) && ! isTrailSurrogate(source[result + 1])) { ! result++; } ! count--; ! result++; } } else { if (result + shift32 < start) { throw new StringIndexOutOfBoundsException(result); } ! for (count = -shift32; count > 0; count--) { result--; ! if (result < start) { break; } ch = source[result]; ! if (isTrailSurrogate(ch) && result > start && isLeadSurrogate(source[result - 1])) { result--; } } } if (count != 0) {
*** 525,535 **** * @return string representation of the code point */ private static String toString(int ch) { if (ch < SUPPLEMENTARY_MIN_VALUE) { ! return String.valueOf((char)ch); } StringBuilder result = new StringBuilder(); result.append(getLeadSurrogate(ch)); result.append(getTrailSurrogate(ch)); --- 603,613 ---- * @return string representation of the code point */ private static String toString(int ch) { if (ch < SUPPLEMENTARY_MIN_VALUE) { ! return String.valueOf((char) ch); } StringBuilder result = new StringBuilder(); result.append(getLeadSurrogate(ch)); result.append(getTrailSurrogate(ch));
< prev index next >