--- /dev/null 2015-07-13 16:12:16.000000000 +0900 +++ new/jdk/src/java.base/share/classes/sun/text/normalizer/Trie2_16.java 2015-07-13 16:12:16.000000000 +0900 @@ -0,0 +1,167 @@ +/* + * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + ******************************************************************************* + * Copyright (C) 2009-2014, International Business Machines Corporation and + * others. All Rights Reserved. + ******************************************************************************* + */ + +package sun.text.normalizer; + +import java.io.IOException; +import java.nio.ByteBuffer; + + +/** + * @author aheninger + * + * A read-only Trie2, holding 16 bit data values. + * + * A Trie2 is a highly optimized data structure for mapping from Unicode + * code points (values ranging from 0 to 0x10ffff) to a 16 or 32 bit value. + * + * See class Trie2 for descriptions of the API for accessing the contents of a trie. + * + * The fundamental data access methods are declared final in this class, with + * the intent that applications might gain a little extra performance, when compared + * with calling the same methods via the abstract UTrie2 base class. + */ +public final class Trie2_16 extends Trie2 { + + /** + * Internal constructor, not for general use. + */ + Trie2_16() { + } + + + /** + * Create a Trie2 from its serialized form. Inverse of utrie2_serialize(). + * The serialized format is identical between ICU4C and ICU4J, so this function + * will work with serialized Trie2s from either. + * + * The serialized Trie2 in the bytes may be in either little or big endian byte order. + * This allows using serialized Tries from ICU4C without needing to consider the + * byte order of the system that created them. + * + * @param bytes a byte buffer to the serialized form of a UTrie2. + * @return An unserialized Trie2_16, ready for use. + * @throws IllegalArgumentException if the buffer does not contain a serialized Trie2. + * @throws IOException if a read error occurs in the buffer. + * @throws ClassCastException if the bytes contain a serialized Trie2_32 + */ + public static Trie2_16 createFromSerialized(ByteBuffer bytes) throws IOException { + return (Trie2_16) Trie2.createFromSerialized(bytes); + } + + /** + * Get the value for a code point as stored in the Trie2. + * + * @param codePoint the code point + * @return the value + */ + @Override + public final int get(int codePoint) { + int value; + int ix; + + if (codePoint >= 0) { + if (codePoint < 0x0d800 || (codePoint > 0x0dbff && codePoint <= 0x0ffff)) { + // Ordinary BMP code point, excluding leading surrogates. + // BMP uses a single level lookup. BMP index starts at offset 0 in the Trie2 index. + // 16 bit data is stored in the index array itself. + ix = index[codePoint >> UTRIE2_SHIFT_2]; + ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK); + value = index[ix]; + return value; + } + if (codePoint <= 0xffff) { + // Lead Surrogate Code Point. A Separate index section is stored for + // lead surrogate code units and code points. + // The main index has the code unit data. + // For this function, we need the code point data. + // Note: this expression could be refactored for slightly improved efficiency, but + // surrogate code points will be so rare in practice that it's not worth it. + ix = index[UTRIE2_LSCP_INDEX_2_OFFSET + ((codePoint - 0xd800) >> UTRIE2_SHIFT_2)]; + ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK); + value = index[ix]; + return value; + } + if (codePoint < highStart) { + // Supplemental code point, use two-level lookup. + ix = (UTRIE2_INDEX_1_OFFSET - UTRIE2_OMITTED_BMP_INDEX_1_LENGTH) + (codePoint >> UTRIE2_SHIFT_1); + ix = index[ix]; + ix += (codePoint >> UTRIE2_SHIFT_2) & UTRIE2_INDEX_2_MASK; + ix = index[ix]; + ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK); + value = index[ix]; + return value; + } + if (codePoint <= 0x10ffff) { + value = index[highValueIndex]; + return value; + } + } + + // Fall through. The code point is outside of the legal range of 0..0x10ffff. + return errorValue; + } + + + /** + * Get a Trie2 value for a UTF-16 code unit. + * + * This function returns the same value as get() if the input + * character is outside of the lead surrogate range + * + * There are two values stored in a Trie2 for inputs in the lead + * surrogate range. This function returns the alternate value, + * while Trie2.get() returns the main value. + * + * @param codeUnit a 16 bit code unit or lead surrogate value. + * @return the value + */ + @Override + public int getFromU16SingleLead(char codeUnit) { + int value; + int ix; + + // Because the input is a 16 bit char, we can skip the tests for it being in + // the BMP range. It is. + ix = index[codeUnit >> UTRIE2_SHIFT_2]; + ix = (ix << UTRIE2_INDEX_SHIFT) + (codeUnit & UTRIE2_DATA_MASK); + value = index[ix]; + return value; + } + + /** + * @return the number of bytes of the serialized trie + */ + public int getSerializedLength() { + return 16+(header.indexLength+dataLength)*2; + } +}