--- old/src/java.base/share/classes/sun/text/normalizer/Trie2_16.java 2020-01-10 15:57:39.000000000 -0800 +++ /dev/null 2020-01-10 15:57:39.000000000 -0800 @@ -1,167 +0,0 @@ -/* - * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. Oracle designates this - * particular file as subject to the "Classpath" exception as provided - * by Oracle in the LICENSE file that accompanied this code. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ - -/* - ******************************************************************************* - * Copyright (C) 2009-2014, International Business Machines Corporation and - * others. All Rights Reserved. - ******************************************************************************* - */ - -package sun.text.normalizer; - -import java.io.IOException; -import java.nio.ByteBuffer; - - -/** - * @author aheninger - * - * A read-only Trie2, holding 16 bit data values. - * - * A Trie2 is a highly optimized data structure for mapping from Unicode - * code points (values ranging from 0 to 0x10ffff) to a 16 or 32 bit value. - * - * See class Trie2 for descriptions of the API for accessing the contents of a trie. - * - * The fundamental data access methods are declared final in this class, with - * the intent that applications might gain a little extra performance, when compared - * with calling the same methods via the abstract UTrie2 base class. - */ -public final class Trie2_16 extends Trie2 { - - /** - * Internal constructor, not for general use. - */ - Trie2_16() { - } - - - /** - * Create a Trie2 from its serialized form. Inverse of utrie2_serialize(). - * The serialized format is identical between ICU4C and ICU4J, so this function - * will work with serialized Trie2s from either. - * - * The serialized Trie2 in the bytes may be in either little or big endian byte order. - * This allows using serialized Tries from ICU4C without needing to consider the - * byte order of the system that created them. - * - * @param bytes a byte buffer to the serialized form of a UTrie2. - * @return An unserialized Trie2_16, ready for use. - * @throws IllegalArgumentException if the buffer does not contain a serialized Trie2. - * @throws IOException if a read error occurs in the buffer. - * @throws ClassCastException if the bytes contain a serialized Trie2_32 - */ - public static Trie2_16 createFromSerialized(ByteBuffer bytes) throws IOException { - return (Trie2_16) Trie2.createFromSerialized(bytes); - } - - /** - * Get the value for a code point as stored in the Trie2. - * - * @param codePoint the code point - * @return the value - */ - @Override - public final int get(int codePoint) { - int value; - int ix; - - if (codePoint >= 0) { - if (codePoint < 0x0d800 || (codePoint > 0x0dbff && codePoint <= 0x0ffff)) { - // Ordinary BMP code point, excluding leading surrogates. - // BMP uses a single level lookup. BMP index starts at offset 0 in the Trie2 index. - // 16 bit data is stored in the index array itself. - ix = index[codePoint >> UTRIE2_SHIFT_2]; - ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK); - value = index[ix]; - return value; - } - if (codePoint <= 0xffff) { - // Lead Surrogate Code Point. A Separate index section is stored for - // lead surrogate code units and code points. - // The main index has the code unit data. - // For this function, we need the code point data. - // Note: this expression could be refactored for slightly improved efficiency, but - // surrogate code points will be so rare in practice that it's not worth it. - ix = index[UTRIE2_LSCP_INDEX_2_OFFSET + ((codePoint - 0xd800) >> UTRIE2_SHIFT_2)]; - ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK); - value = index[ix]; - return value; - } - if (codePoint < highStart) { - // Supplemental code point, use two-level lookup. - ix = (UTRIE2_INDEX_1_OFFSET - UTRIE2_OMITTED_BMP_INDEX_1_LENGTH) + (codePoint >> UTRIE2_SHIFT_1); - ix = index[ix]; - ix += (codePoint >> UTRIE2_SHIFT_2) & UTRIE2_INDEX_2_MASK; - ix = index[ix]; - ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK); - value = index[ix]; - return value; - } - if (codePoint <= 0x10ffff) { - value = index[highValueIndex]; - return value; - } - } - - // Fall through. The code point is outside of the legal range of 0..0x10ffff. - return errorValue; - } - - - /** - * Get a Trie2 value for a UTF-16 code unit. - * - * This function returns the same value as get() if the input - * character is outside of the lead surrogate range - * - * There are two values stored in a Trie2 for inputs in the lead - * surrogate range. This function returns the alternate value, - * while Trie2.get() returns the main value. - * - * @param codeUnit a 16 bit code unit or lead surrogate value. - * @return the value - */ - @Override - public int getFromU16SingleLead(char codeUnit) { - int value; - int ix; - - // Because the input is a 16 bit char, we can skip the tests for it being in - // the BMP range. It is. - ix = index[codeUnit >> UTRIE2_SHIFT_2]; - ix = (ix << UTRIE2_INDEX_SHIFT) + (codeUnit & UTRIE2_DATA_MASK); - value = index[ix]; - return value; - } - - /** - * @return the number of bytes of the serialized trie - */ - public int getSerializedLength() { - return 16+(header.indexLength+dataLength)*2; - } -} --- /dev/null 2020-01-10 15:57:39.000000000 -0800 +++ new/src/java.base/share/classes/jdk/internal/icu/impl/Trie2_16.java 2020-01-10 15:57:38.000000000 -0800 @@ -0,0 +1,167 @@ +/* + * Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + ******************************************************************************* + * Copyright (C) 2009-2014, International Business Machines Corporation and + * others. All Rights Reserved. + ******************************************************************************* + */ + +package jdk.internal.icu.impl; + +import java.io.IOException; +import java.nio.ByteBuffer; + + +/** + * @author aheninger + * + * A read-only Trie2, holding 16 bit data values. + * + * A Trie2 is a highly optimized data structure for mapping from Unicode + * code points (values ranging from 0 to 0x10ffff) to a 16 or 32 bit value. + * + * See class Trie2 for descriptions of the API for accessing the contents of a trie. + * + * The fundamental data access methods are declared final in this class, with + * the intent that applications might gain a little extra performance, when compared + * with calling the same methods via the abstract UTrie2 base class. + */ +public final class Trie2_16 extends Trie2 { + + /** + * Internal constructor, not for general use. + */ + Trie2_16() { + } + + + /** + * Create a Trie2 from its serialized form. Inverse of utrie2_serialize(). + * The serialized format is identical between ICU4C and ICU4J, so this function + * will work with serialized Trie2s from either. + * + * The serialized Trie2 in the bytes may be in either little or big endian byte order. + * This allows using serialized Tries from ICU4C without needing to consider the + * byte order of the system that created them. + * + * @param bytes a byte buffer to the serialized form of a UTrie2. + * @return An unserialized Trie2_16, ready for use. + * @throws IllegalArgumentException if the buffer does not contain a serialized Trie2. + * @throws IOException if a read error occurs in the buffer. + * @throws ClassCastException if the bytes contain a serialized Trie2_32 + */ + public static Trie2_16 createFromSerialized(ByteBuffer bytes) throws IOException { + return (Trie2_16) Trie2.createFromSerialized(bytes); + } + + /** + * Get the value for a code point as stored in the Trie2. + * + * @param codePoint the code point + * @return the value + */ + @Override + public final int get(int codePoint) { + int value; + int ix; + + if (codePoint >= 0) { + if (codePoint < 0x0d800 || (codePoint > 0x0dbff && codePoint <= 0x0ffff)) { + // Ordinary BMP code point, excluding leading surrogates. + // BMP uses a single level lookup. BMP index starts at offset 0 in the Trie2 index. + // 16 bit data is stored in the index array itself. + ix = index[codePoint >> UTRIE2_SHIFT_2]; + ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK); + value = index[ix]; + return value; + } + if (codePoint <= 0xffff) { + // Lead Surrogate Code Point. A Separate index section is stored for + // lead surrogate code units and code points. + // The main index has the code unit data. + // For this function, we need the code point data. + // Note: this expression could be refactored for slightly improved efficiency, but + // surrogate code points will be so rare in practice that it's not worth it. + ix = index[UTRIE2_LSCP_INDEX_2_OFFSET + ((codePoint - 0xd800) >> UTRIE2_SHIFT_2)]; + ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK); + value = index[ix]; + return value; + } + if (codePoint < highStart) { + // Supplemental code point, use two-level lookup. + ix = (UTRIE2_INDEX_1_OFFSET - UTRIE2_OMITTED_BMP_INDEX_1_LENGTH) + (codePoint >> UTRIE2_SHIFT_1); + ix = index[ix]; + ix += (codePoint >> UTRIE2_SHIFT_2) & UTRIE2_INDEX_2_MASK; + ix = index[ix]; + ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK); + value = index[ix]; + return value; + } + if (codePoint <= 0x10ffff) { + value = index[highValueIndex]; + return value; + } + } + + // Fall through. The code point is outside of the legal range of 0..0x10ffff. + return errorValue; + } + + + /** + * Get a Trie2 value for a UTF-16 code unit. + * + * This function returns the same value as get() if the input + * character is outside of the lead surrogate range + * + * There are two values stored in a Trie2 for inputs in the lead + * surrogate range. This function returns the alternate value, + * while Trie2.get() returns the main value. + * + * @param codeUnit a 16 bit code unit or lead surrogate value. + * @return the value + */ + @Override + public int getFromU16SingleLead(char codeUnit) { + int value; + int ix; + + // Because the input is a 16 bit char, we can skip the tests for it being in + // the BMP range. It is. + ix = index[codeUnit >> UTRIE2_SHIFT_2]; + ix = (ix << UTRIE2_INDEX_SHIFT) + (codeUnit & UTRIE2_DATA_MASK); + value = index[ix]; + return value; + } + + /** + * @return the number of bytes of the serialized trie + */ + public int getSerializedLength() { + return 16+(header.indexLength+dataLength)*2; + } +}