--- old/jdk/src/java.base/share/classes/sun/text/normalizer/UTF16.java 2015-07-13 16:11:56.000000000 +0900
+++ new/jdk/src/java.base/share/classes/sun/text/normalizer/UTF16.java 2015-07-13 16:11:56.000000000 +0900
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2005, 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -22,15 +22,10 @@
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
-/*
+/**
*******************************************************************************
- * (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved *
- * *
- * The original version of this source code and documentation is copyrighted *
- * and owned by IBM, These materials are provided under terms of a License *
- * Agreement between IBM and Sun. This technology is protected by multiple *
- * US and International patents. This notice and attribution to IBM may not *
- * to removed. *
+ * Copyright (C) 1996-2014, International Business Machines Corporation and
+ * others. All Rights Reserved.
*******************************************************************************
*/
@@ -57,21 +52,21 @@
*
* // iteration forwards: Changes for UTF-32
* int ch;
- * for (int i = 0; i < s.length(); i+=UTF16.getCharCount(ch)) {
- * ch = UTF16.charAt(s,i);
+ * for (int i = 0; i < s.length(); i += UTF16.getCharCount(ch)) {
+ * ch = UTF16.charAt(s, i);
* doSomethingWith(ch);
* }
*
* // iteration backwards: Original
- * for (int i = s.length() -1; i >= 0; --i) {
+ * for (int i = s.length() - 1; i >= 0; --i) {
* char ch = s.charAt(i);
* doSomethingWith(ch);
* }
*
* // iteration backwards: Changes for UTF-32
* int ch;
- * for (int i = s.length() -1; i > 0; i-=UTF16.getCharCount(ch)) {
- * ch = UTF16.charAt(s,i);
+ * for (int i = s.length() - 1; i > 0; i -= UTF16.getCharCount(ch)) {
+ * ch = UTF16.charAt(s, i);
* doSomethingWith(ch);
* }
* }
@@ -93,7 +88,7 @@
* back if and only if bounds(string, offset16) != TRAIL
.
*
*
UCharacter.isLegal()
can be used to check
@@ -106,10 +101,10 @@
* practice as missing glyphs (see the Unicode Standard Section 5.4, 5.5).
* UTF16.getCharCount()
, as well as random access. If a
* validity check is required, use
@@ -232,19 +261,72 @@
* character will be returned. If a complete supplementary character is
* not found the incomplete character will be returned
* @param source array of UTF-16 chars
- * @param start offset to substring in the source array for analyzing
- * @param limit offset to substring in the source array for analyzing
- * @param offset16 UTF-16 offset relative to start
+ * @param offset16 UTF-16 offset to the start of the character.
* @return UTF-32 value for the UTF-32 value that contains the char at
* offset16. The boundaries of that codepoint are the same as in
* bounds32()
.
- * @exception IndexOutOfBoundsException thrown if offset16 is not within
- * the range of start and limit.
+ * @exception IndexOutOfBoundsException thrown if offset16 is out of bounds.
* @stable ICU 2.1
*/
- public static int charAt(char source[], int start, int limit,
- int offset16)
- {
+ public static int charAt(CharSequence source, int offset16) {
+ char single = source.charAt(offset16);
+ if (single < UTF16.LEAD_SURROGATE_MIN_VALUE) {
+ return single;
+ }
+ return _charAt(source, offset16, single);
+ }
+
+ private static int _charAt(CharSequence source, int offset16, char single) {
+ if (single > UTF16.TRAIL_SURROGATE_MAX_VALUE) {
+ return single;
+ }
+
+ // Convert the UTF-16 surrogate pair if necessary.
+ // For simplicity in usage, and because the frequency of pairs is
+ // low, look both directions.
+
+ if (single <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
+ ++offset16;
+ if (source.length() != offset16) {
+ char trail = source.charAt(offset16);
+ if (trail >= UTF16.TRAIL_SURROGATE_MIN_VALUE
+ && trail <= UTF16.TRAIL_SURROGATE_MAX_VALUE) {
+ return UCharacterProperty.getRawSupplementary(single, trail);
+ }
+ }
+ } else {
+ --offset16;
+ if (offset16 >= 0) {
+ // single is a trail surrogate so
+ char lead = source.charAt(offset16);
+ if (lead >= UTF16.LEAD_SURROGATE_MIN_VALUE
+ && lead <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
+ return UCharacterProperty.getRawSupplementary(lead, single);
+ }
+ }
+ }
+ return single; // return unmatched surrogate
+ }
+
+ /**
+ * Extract a single UTF-32 value from a substring. Used when iterating forwards or backwards
+ * (with UTF16.getCharCount()
, as well as random access. If a validity check is
+ * required, use UCharacter.isLegal()
+ *
+ * on the return value. If the char retrieved is part of a surrogate pair, its supplementary
+ * character will be returned. If a complete supplementary character is not found the incomplete
+ * character will be returned
+ *
+ * @param source Array of UTF-16 chars
+ * @param start Offset to substring in the source array for analyzing
+ * @param limit Offset to substring in the source array for analyzing
+ * @param offset16 UTF-16 offset relative to start
+ * @return UTF-32 value for the UTF-32 value that contains the char at offset16. The boundaries
+ * of that codepoint are the same as in bounds32()
.
+ * @exception IndexOutOfBoundsException Thrown if offset16 is not within the range of start and limit.
+ * @stable ICU 2.1
+ */
+ public static int charAt(char source[], int start, int limit, int offset16) {
offset16 += start;
if (offset16 < start || offset16 >= limit) {
throw new ArrayIndexOutOfBoundsException(offset16);
@@ -259,7 +341,7 @@
// For simplicity in usage, and because the frequency of pairs is
// low, look both directions.
if (single <= LEAD_SURROGATE_MAX_VALUE) {
- offset16 ++;
+ offset16++;
if (offset16 >= limit) {
return single;
}
@@ -272,7 +354,7 @@
if (offset16 == start) {
return single;
}
- offset16 --;
+ offset16--;
char lead = source[offset16];
if (isLeadSurrogate(lead))
return UCharacterProperty.getRawSupplementary(lead, single);
@@ -300,37 +382,34 @@
/**
* Determines whether the code value is a surrogate.
* @param char16 the input character.
- * @return true iff the input character is a surrogate.
+ * @return true if the input character is a surrogate.
* @stable ICU 2.1
*/
public static boolean isSurrogate(char char16)
{
- return LEAD_SURROGATE_MIN_VALUE <= char16 &&
- char16 <= TRAIL_SURROGATE_MAX_VALUE;
+ return (char16 & SURROGATE_BITMASK) == SURROGATE_BITS;
}
/**
* Determines whether the character is a trail surrogate.
* @param char16 the input character.
- * @return true iff the input character is a trail surrogate.
+ * @return true if the input character is a trail surrogate.
* @stable ICU 2.1
*/
public static boolean isTrailSurrogate(char char16)
{
- return (TRAIL_SURROGATE_MIN_VALUE <= char16 &&
- char16 <= TRAIL_SURROGATE_MAX_VALUE);
+ return (char16 & TRAIL_SURROGATE_BITMASK) == TRAIL_SURROGATE_BITS;
}
/**
* Determines whether the character is a lead surrogate.
* @param char16 the input character.
- * @return true iff the input character is a lead surrogate
+ * @return true if the input character is a lead surrogate
* @stable ICU 2.1
*/
public static boolean isLeadSurrogate(char char16)
{
- return LEAD_SURROGATE_MIN_VALUE <= char16 &&
- char16 <= LEAD_SURROGATE_MAX_VALUE;
+ return (char16 & LEAD_SURROGATE_BITMASK) == LEAD_SURROGATE_BITS;
}
/**
@@ -359,7 +438,7 @@
* isLegal()
* on char32 before calling.
* @param char32 the input character.
- * @return the trail surrogate if the getCharCount(ch) is 2;