1 /* 2 * Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 /* 27 ******************************************************************************* 28 * Copyright (C) 2009-2014, International Business Machines Corporation and 29 * others. All Rights Reserved. 30 ******************************************************************************* 31 */ 32 33 package jdk.internal.icu.impl; 34 35 import java.io.IOException; 36 import java.nio.ByteBuffer; 37 38 39 /** 40 * @author aheninger 41 * 42 * A read-only Trie2, holding 16 bit data values. 43 * 44 * A Trie2 is a highly optimized data structure for mapping from Unicode 45 * code points (values ranging from 0 to 0x10ffff) to a 16 or 32 bit value. 46 * 47 * See class Trie2 for descriptions of the API for accessing the contents of a trie. 48 * 49 * The fundamental data access methods are declared final in this class, with 50 * the intent that applications might gain a little extra performance, when compared 51 * with calling the same methods via the abstract UTrie2 base class. 52 */ 53 public final class Trie2_16 extends Trie2 { 54 55 /** 56 * Internal constructor, not for general use. 57 */ 58 Trie2_16() { 59 } 60 61 62 /** 63 * Create a Trie2 from its serialized form. Inverse of utrie2_serialize(). 64 * The serialized format is identical between ICU4C and ICU4J, so this function 65 * will work with serialized Trie2s from either. 66 * 67 * The serialized Trie2 in the bytes may be in either little or big endian byte order. 68 * This allows using serialized Tries from ICU4C without needing to consider the 69 * byte order of the system that created them. 70 * 71 * @param bytes a byte buffer to the serialized form of a UTrie2. 72 * @return An unserialized Trie2_16, ready for use. 73 * @throws IllegalArgumentException if the buffer does not contain a serialized Trie2. 74 * @throws IOException if a read error occurs in the buffer. 75 * @throws ClassCastException if the bytes contain a serialized Trie2_32 76 */ 77 public static Trie2_16 createFromSerialized(ByteBuffer bytes) throws IOException { 78 return (Trie2_16) Trie2.createFromSerialized(bytes); 79 } 80 81 /** 82 * Get the value for a code point as stored in the Trie2. 83 * 84 * @param codePoint the code point 85 * @return the value 86 */ 87 @Override 88 public final int get(int codePoint) { 89 int value; 90 int ix; 91 92 if (codePoint >= 0) { 93 if (codePoint < 0x0d800 || (codePoint > 0x0dbff && codePoint <= 0x0ffff)) { 94 // Ordinary BMP code point, excluding leading surrogates. 95 // BMP uses a single level lookup. BMP index starts at offset 0 in the Trie2 index. 96 // 16 bit data is stored in the index array itself. 97 ix = index[codePoint >> UTRIE2_SHIFT_2]; 98 ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK); 99 value = index[ix]; 100 return value; 101 } 102 if (codePoint <= 0xffff) { 103 // Lead Surrogate Code Point. A Separate index section is stored for 104 // lead surrogate code units and code points. 105 // The main index has the code unit data. 106 // For this function, we need the code point data. 107 // Note: this expression could be refactored for slightly improved efficiency, but 108 // surrogate code points will be so rare in practice that it's not worth it. 109 ix = index[UTRIE2_LSCP_INDEX_2_OFFSET + ((codePoint - 0xd800) >> UTRIE2_SHIFT_2)]; 110 ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK); 111 value = index[ix]; 112 return value; 113 } 114 if (codePoint < highStart) { 115 // Supplemental code point, use two-level lookup. 116 ix = (UTRIE2_INDEX_1_OFFSET - UTRIE2_OMITTED_BMP_INDEX_1_LENGTH) + (codePoint >> UTRIE2_SHIFT_1); 117 ix = index[ix]; 118 ix += (codePoint >> UTRIE2_SHIFT_2) & UTRIE2_INDEX_2_MASK; 119 ix = index[ix]; 120 ix = (ix << UTRIE2_INDEX_SHIFT) + (codePoint & UTRIE2_DATA_MASK); 121 value = index[ix]; 122 return value; 123 } 124 if (codePoint <= 0x10ffff) { 125 value = index[highValueIndex]; 126 return value; 127 } 128 } 129 130 // Fall through. The code point is outside of the legal range of 0..0x10ffff. 131 return errorValue; 132 } 133 134 135 /** 136 * Get a Trie2 value for a UTF-16 code unit. 137 * 138 * This function returns the same value as get() if the input 139 * character is outside of the lead surrogate range 140 * 141 * There are two values stored in a Trie2 for inputs in the lead 142 * surrogate range. This function returns the alternate value, 143 * while Trie2.get() returns the main value. 144 * 145 * @param codeUnit a 16 bit code unit or lead surrogate value. 146 * @return the value 147 */ 148 @Override 149 public int getFromU16SingleLead(char codeUnit) { 150 int value; 151 int ix; 152 153 // Because the input is a 16 bit char, we can skip the tests for it being in 154 // the BMP range. It is. 155 ix = index[codeUnit >> UTRIE2_SHIFT_2]; 156 ix = (ix << UTRIE2_INDEX_SHIFT) + (codeUnit & UTRIE2_DATA_MASK); 157 value = index[ix]; 158 return value; 159 } 160 161 /** 162 * @return the number of bytes of the serialized trie 163 */ 164 public int getSerializedLength() { 165 return 16+(header.indexLength+dataLength)*2; 166 } 167 }