< prev index next >

jdk/src/java.base/share/classes/sun/text/normalizer/UTF16.java

Print this page

        

@@ -1,7 +1,7 @@
 /*
- * Copyright (c) 2005, 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 only, as
  * published by the Free Software Foundation.  Oracle designates this

@@ -20,19 +20,14 @@
  *
  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  * or visit www.oracle.com if you need additional information or have any
  * questions.
  */
-/*
+/**
  *******************************************************************************
- * (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved         *
- *                                                                             *
- * The original version of this source code and documentation is copyrighted   *
- * and owned by IBM, These materials are provided under terms of a License     *
- * Agreement between IBM and Sun. This technology is protected by multiple     *
- * US and International patents. This notice and attribution to IBM may not    *
- * to removed.                                                                 *
+ * Copyright (C) 1996-2014, International Business Machines Corporation and
+ * others. All Rights Reserved.
  *******************************************************************************
  */
 
 package sun.text.normalizer;
 

@@ -55,25 +50,25 @@
  *     doSomethingWith(ch);
  * }
  *
  * // iteration forwards: Changes for UTF-32
  * int ch;
- * for (int i = 0; i < s.length(); i+=UTF16.getCharCount(ch)) {
- *     ch = UTF16.charAt(s,i);
+ * for (int i = 0; i < s.length(); i += UTF16.getCharCount(ch)) {
+ *     ch = UTF16.charAt(s, i);
  *     doSomethingWith(ch);
  * }
  *
  * // iteration backwards: Original
- * for (int i = s.length() -1; i >= 0; --i) {
+ * for (int i = s.length() - 1; i >= 0; --i) {
  *     char ch = s.charAt(i);
  *     doSomethingWith(ch);
  * }
  *
  * // iteration backwards: Changes for UTF-32
  * int ch;
- * for (int i = s.length() -1; i > 0; i-=UTF16.getCharCount(ch)) {
- *     ch = UTF16.charAt(s,i);
+ * for (int i = s.length() - 1; i > 0; i -= UTF16.getCharCount(ch)) {
+ *     ch = UTF16.charAt(s, i);
  *     doSomethingWith(ch);
  * }
  * }</pre>
  * <strong>Notes:</strong>
  * <ul>

@@ -159,11 +154,45 @@
     /**
      * Surrogate minimum value
      * @stable ICU 2.1
      */
     public static final int SURROGATE_MIN_VALUE = LEAD_SURROGATE_MIN_VALUE;
+    /**
+     * Lead surrogate bitmask
+     */
+    private static final int LEAD_SURROGATE_BITMASK = 0xFFFFFC00;
+    /**
+     * Trail surrogate bitmask
+     */
+    private static final int TRAIL_SURROGATE_BITMASK = 0xFFFFFC00;
+    /**
+     * Surrogate bitmask
+     */
+    private static final int SURROGATE_BITMASK = 0xFFFFF800;
+    /**
+     * Lead surrogate bits
+     */
+    private static final int LEAD_SURROGATE_BITS = 0xD800;
+    /**
+     * Trail surrogate bits
+     */
+    private static final int TRAIL_SURROGATE_BITS = 0xDC00;
+    /**
+     * Surrogate bits
+     */
+    private static final int SURROGATE_BITS = 0xD800;
+
+    // constructor --------------------------------------------------------
 
+    // /CLOVER:OFF
+    /**
+     * Prevent instance from being created.
+     */
+    private UTF16() {
+    }
+
+    // /CLOVER:ON
     // public method ------------------------------------------------------
 
     /**
      * Extract a single UTF-32 value from a string.
      * Used when iterating forwards or backwards (with

@@ -220,33 +249,86 @@
         }
         return single; // return unmatched surrogate
     }
 
     /**
-     * Extract a single UTF-32 value from a substring.
+     * Extract a single UTF-32 value from a string.
      * Used when iterating forwards or backwards (with
      * <code>UTF16.getCharCount()</code>, as well as random access. If a
      * validity check is required, use
      * <code><a href="../lang/UCharacter.html#isLegal(char)">UCharacter.isLegal()
      * </a></code> on the return value.
      * If the char retrieved is part of a surrogate pair, its supplementary
      * character will be returned. If a complete supplementary character is
      * not found the incomplete character will be returned
      * @param source array of UTF-16 chars
-     * @param start offset to substring in the source array for analyzing
-     * @param limit offset to substring in the source array for analyzing
-     * @param offset16 UTF-16 offset relative to start
+     * @param offset16 UTF-16 offset to the start of the character.
      * @return UTF-32 value for the UTF-32 value that contains the char at
      *         offset16. The boundaries of that codepoint are the same as in
      *         <code>bounds32()</code>.
-     * @exception IndexOutOfBoundsException thrown if offset16 is not within
-     *            the range of start and limit.
+     * @exception IndexOutOfBoundsException thrown if offset16 is out of bounds.
      * @stable ICU 2.1
      */
-    public static int charAt(char source[], int start, int limit,
-                             int offset16)
-    {
+    public static int charAt(CharSequence source, int offset16) {
+        char single = source.charAt(offset16);
+        if (single < UTF16.LEAD_SURROGATE_MIN_VALUE) {
+            return single;
+        }
+        return _charAt(source, offset16, single);
+    }
+
+    private static int _charAt(CharSequence source, int offset16, char single) {
+        if (single > UTF16.TRAIL_SURROGATE_MAX_VALUE) {
+            return single;
+        }
+
+        // Convert the UTF-16 surrogate pair if necessary.
+        // For simplicity in usage, and because the frequency of pairs is
+        // low, look both directions.
+
+        if (single <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
+            ++offset16;
+            if (source.length() != offset16) {
+                char trail = source.charAt(offset16);
+                if (trail >= UTF16.TRAIL_SURROGATE_MIN_VALUE
+                        && trail <= UTF16.TRAIL_SURROGATE_MAX_VALUE) {
+                    return UCharacterProperty.getRawSupplementary(single, trail);
+                }
+            }
+        } else {
+            --offset16;
+            if (offset16 >= 0) {
+                // single is a trail surrogate so
+                char lead = source.charAt(offset16);
+                if (lead >= UTF16.LEAD_SURROGATE_MIN_VALUE
+                        && lead <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
+                    return UCharacterProperty.getRawSupplementary(lead, single);
+                }
+            }
+        }
+        return single; // return unmatched surrogate
+    }
+
+    /**
+     * Extract a single UTF-32 value from a substring. Used when iterating forwards or backwards
+     * (with <code>UTF16.getCharCount()</code>, as well as random access. If a validity check is
+     * required, use <code><a href="../lang/UCharacter.html#isLegal(char)">UCharacter.isLegal()
+     * </a></code>
+     * on the return value. If the char retrieved is part of a surrogate pair, its supplementary
+     * character will be returned. If a complete supplementary character is not found the incomplete
+     * character will be returned
+     *
+     * @param source Array of UTF-16 chars
+     * @param start Offset to substring in the source array for analyzing
+     * @param limit Offset to substring in the source array for analyzing
+     * @param offset16 UTF-16 offset relative to start
+     * @return UTF-32 value for the UTF-32 value that contains the char at offset16. The boundaries
+     *         of that codepoint are the same as in <code>bounds32()</code>.
+     * @exception IndexOutOfBoundsException Thrown if offset16 is not within the range of start and limit.
+     * @stable ICU 2.1
+     */
+    public static int charAt(char source[], int start, int limit, int offset16) {
         offset16 += start;
         if (offset16 < start || offset16 >= limit) {
             throw new ArrayIndexOutOfBoundsException(offset16);
         }
 

@@ -257,11 +339,11 @@
 
         // Convert the UTF-16 surrogate pair if necessary.
         // For simplicity in usage, and because the frequency of pairs is
         // low, look both directions.
         if (single <= LEAD_SURROGATE_MAX_VALUE) {
-            offset16 ++;
+            offset16++;
             if (offset16 >= limit) {
                 return single;
             }
             char trail = source[offset16];
             if (isTrailSurrogate(trail)) {

@@ -270,11 +352,11 @@
         }
         else { // isTrailSurrogate(single), so
             if (offset16 == start) {
                 return single;
             }
-            offset16 --;
+            offset16--;
             char lead = source[offset16];
             if (isLeadSurrogate(lead))
                 return UCharacterProperty.getRawSupplementary(lead, single);
         }
         return single; // return unmatched surrogate

@@ -298,41 +380,38 @@
     }
 
     /**
      * Determines whether the code value is a surrogate.
      * @param char16 the input character.
-     * @return true iff the input character is a surrogate.
+     * @return true if the input character is a surrogate.
      * @stable ICU 2.1
      */
     public static boolean isSurrogate(char char16)
     {
-        return LEAD_SURROGATE_MIN_VALUE <= char16 &&
-            char16 <= TRAIL_SURROGATE_MAX_VALUE;
+        return (char16 & SURROGATE_BITMASK) == SURROGATE_BITS;
     }
 
     /**
      * Determines whether the character is a trail surrogate.
      * @param char16 the input character.
-     * @return true iff the input character is a trail surrogate.
+     * @return true if the input character is a trail surrogate.
      * @stable ICU 2.1
      */
     public static boolean isTrailSurrogate(char char16)
     {
-        return (TRAIL_SURROGATE_MIN_VALUE <= char16 &&
-                char16 <= TRAIL_SURROGATE_MAX_VALUE);
+        return (char16 & TRAIL_SURROGATE_BITMASK) == TRAIL_SURROGATE_BITS;
     }
 
     /**
      * Determines whether the character is a lead surrogate.
      * @param char16 the input character.
-     * @return true iff the input character is a lead surrogate
+     * @return true if the input character is a lead surrogate
      * @stable ICU 2.1
      */
     public static boolean isLeadSurrogate(char char16)
     {
-        return LEAD_SURROGATE_MIN_VALUE <= char16 &&
-            char16 <= LEAD_SURROGATE_MAX_VALUE;
+        return (char16 & LEAD_SURROGATE_BITMASK) == LEAD_SURROGATE_BITS;
     }
 
     /**
      * Returns the lead surrogate.
      * If a validity check is required, use

@@ -357,22 +436,22 @@
      * Returns the trail surrogate.
      * If a validity check is required, use
      * <code><a href="../lang/UCharacter.html#isLegal(char)">isLegal()</a></code>
      * on char32 before calling.
      * @param char32 the input character.
-     * @return the trail surrogate if the getCharCount(ch) is 2; <br>otherwise
+     * @return the trail surrogate if the getCharCount(ch) is 2; <br> otherwise
      *         the character itself
      * @stable ICU 2.1
      */
     public static char getTrailSurrogate(int char32)
     {
         if (char32 >= SUPPLEMENTARY_MIN_VALUE) {
             return (char)(TRAIL_SURROGATE_MIN_VALUE +
                           (char32 & TRAIL_SURROGATE_MASK_));
         }
 
-        return (char)char32;
+        return (char) char32;
     }
 
     /**
      * Convenience method corresponding to String.valueOf(char). Returns a one
      * or two char string containing the UTF-32 value in UTF16 format. If a

@@ -417,16 +496,15 @@
             {
                 target.append(getLeadSurrogate(char32));
                 target.append(getTrailSurrogate(char32));
             }
         else {
-            target.append((char)char32);
+            target.append((char) char32);
         }
         return target;
     }
 
-    //// for StringPrep
     /**
      * Shifts offset16 by the argument number of codepoints within a subarray.
      * @param source char array
      * @param start position of the subarray to be performed on
      * @param limit position of the subarray to be performed on

@@ -443,45 +521,45 @@
     {
         int         size = source.length;
         int         count;
         char        ch;
         int         result = offset16 + start;
-        if (start<0 || limit<start) {
+        if (start < 0 || limit < start) {
             throw new StringIndexOutOfBoundsException(start);
         }
-        if (limit>size) {
+        if (limit > size) {
             throw new StringIndexOutOfBoundsException(limit);
         }
-        if (offset16<0 || result>limit) {
+        if (offset16 < 0 || result > limit) {
             throw new StringIndexOutOfBoundsException(offset16);
         }
-        if (shift32 > 0 ) {
+        if (shift32 > 0) {
             if (shift32 + result > size) {
                 throw new StringIndexOutOfBoundsException(result);
             }
             count = shift32;
             while (result < limit && count > 0)
             {
                 ch = source[result];
-                if (isLeadSurrogate(ch) && (result+1 < limit) &&
-                        isTrailSurrogate(source[result+1])) {
-                    result ++;
+                if (isLeadSurrogate(ch) && (result + 1 < limit) &&
+                    isTrailSurrogate(source[result + 1])) {
+                    result++;
                 }
-                count --;
-                result ++;
+                count--;
+                result++;
             }
         } else {
             if (result + shift32 < start) {
                 throw new StringIndexOutOfBoundsException(result);
             }
-            for (count=-shift32; count>0; count--) {
+            for (count = -shift32; count > 0; count--) {
                 result--;
-                if (result<start) {
+                if (result < start) {
                     break;
                 }
                 ch = source[result];
-                if (isTrailSurrogate(ch) && result>start && isLeadSurrogate(source[result-1])) {
+                if (isTrailSurrogate(ch) && result > start && isLeadSurrogate(source[result - 1])) {
                     result--;
                 }
             }
         }
         if (count != 0)  {

@@ -525,11 +603,11 @@
      * @return string representation of the code point
      */
     private static String toString(int ch)
     {
         if (ch < SUPPLEMENTARY_MIN_VALUE) {
-            return String.valueOf((char)ch);
+            return String.valueOf((char) ch);
         }
 
         StringBuilder result = new StringBuilder();
         result.append(getLeadSurrogate(ch));
         result.append(getTrailSurrogate(ch));
< prev index next >