--- /dev/null 2019-05-22 15:33:48.000000000 -0700 +++ new/src/java.base/share/classes/sun/text/normalizer/CodePointTrie.java 2019-05-22 15:33:48.000000000 -0700 @@ -0,0 +1,1310 @@ +/* + * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +// (c) 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html#License + +// created: 2018may04 Markus W. Scherer + +package sun.text.normalizer; + +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.UncheckedIOException; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +import static sun.text.normalizer.NormalizerImpl.UTF16Plus; + +/** + * Immutable Unicode code point trie. + * Fast, reasonably compact, map from Unicode code points (U+0000..U+10FFFF) to integer values. + * For details see http://site.icu-project.org/design/struct/utrie + * + *

This class is not intended for public subclassing. + * + * @see MutableCodePointTrie + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ +@SuppressWarnings("deprecation") +public abstract class CodePointTrie extends CodePointMap { + /** + * Selectors for the type of a CodePointTrie. + * Different trade-offs for size vs. speed. + * + *

Use null for {@link #fromBinary} to accept any type; + * {@link #getType} will return the actual type. + * + * @see MutableCodePointTrie#buildImmutable(CodePointTrie.Type, CodePointTrie.ValueWidth) + * @see #fromBinary + * @see #getType + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public enum Type { + /** + * Fast/simple/larger BMP data structure. + * The {@link Fast} subclasses have additional functions for lookup for BMP and supplementary code points. + * + * @see Fast + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + FAST, + /** + * Small/slower BMP data structure. + * + * @see Small + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + SMALL + } + + /** + * Selectors for the number of bits in a CodePointTrie data value. + * + *

Use null for {@link #fromBinary} to accept any data value width; + * {@link #getValueWidth} will return the actual data value width. + * + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public enum ValueWidth { + /** + * The trie stores 16 bits per data value. + * It returns them as unsigned values 0..0xffff=65535. + * + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + BITS_16, + /** + * The trie stores 32 bits per data value. + * + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + BITS_32, + /** + * The trie stores 8 bits per data value. + * It returns them as unsigned values 0..0xff=255. + * + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + BITS_8 + } + + private CodePointTrie(char[] index, Data data, int highStart, + int index3NullOffset, int dataNullOffset) { + this.ascii = new int[ASCII_LIMIT]; + this.index = index; + this.data = data; + this.dataLength = data.getDataLength(); + this.highStart = highStart; + this.index3NullOffset = index3NullOffset; + this.dataNullOffset = dataNullOffset; + + for (int c = 0; c < ASCII_LIMIT; ++c) { + ascii[c] = data.getFromIndex(c); + } + + int nullValueOffset = dataNullOffset; + if (nullValueOffset >= dataLength) { + nullValueOffset = dataLength - HIGH_VALUE_NEG_DATA_OFFSET; + } + nullValue = data.getFromIndex(nullValueOffset); + } + + /** + * Creates a trie from its binary form, + * stored in the ByteBuffer starting at the current position. + * Advances the buffer position to just after the trie data. + * Inverse of {@link #toBinary(OutputStream)}. + * + *

The data is copied from the buffer; + * later modification of the buffer will not affect the trie. + * + * @param type selects the trie type; this method throws an exception + * if the type does not match the binary data; + * use null to accept any type + * @param valueWidth selects the number of bits in a data value; this method throws an exception + * if the valueWidth does not match the binary data; + * use null to accept any data value width + * @param bytes a buffer containing the binary data of a CodePointTrie + * @return the trie + * @see MutableCodePointTrie#MutableCodePointTrie(int, int) + * @see MutableCodePointTrie#buildImmutable(CodePointTrie.Type, CodePointTrie.ValueWidth) + * @see #toBinary(OutputStream) + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public static CodePointTrie fromBinary(Type type, ValueWidth valueWidth, ByteBuffer bytes) { + ByteOrder outerByteOrder = bytes.order(); + try { + // Enough data for a trie header? + if (bytes.remaining() < 16 /* sizeof(UCPTrieHeader) */) { + throw new InternalError("Buffer too short for a CodePointTrie header"); + } + + // struct UCPTrieHeader + /** "Tri3" in big-endian US-ASCII (0x54726933) */ + int signature = bytes.getInt(); + + // Check the signature. + switch (signature) { + case 0x54726933: + // The buffer is already set to the trie data byte order. + break; + case 0x33697254: + // Temporarily reverse the byte order. + boolean isBigEndian = outerByteOrder == ByteOrder.BIG_ENDIAN; + bytes.order(isBigEndian ? ByteOrder.LITTLE_ENDIAN : ByteOrder.BIG_ENDIAN); + signature = 0x54726933; + break; + default: + throw new InternalError("Buffer does not contain a serialized CodePointTrie"); + } + + // struct UCPTrieHeader continued + /** + * Options bit field: + * Bits 15..12: Data length bits 19..16. + * Bits 11..8: Data null block offset bits 19..16. + * Bits 7..6: UCPTrieType + * Bits 5..3: Reserved (0). + * Bits 2..0: UCPTrieValueWidth + */ + int options = bytes.getChar(); + + /** Total length of the index tables. */ + int indexLength = bytes.getChar(); + + /** Data length bits 15..0. */ + int dataLength = bytes.getChar(); + + /** Index-3 null block offset, 0x7fff or 0xffff if none. */ + int index3NullOffset = bytes.getChar(); + + /** Data null block offset bits 15..0, 0xfffff if none. */ + int dataNullOffset = bytes.getChar(); + + /** + * First code point of the single-value range ending with U+10ffff, + * rounded up and then shifted right by SHIFT_2. + */ + int shiftedHighStart = bytes.getChar(); + // struct UCPTrieHeader end + + int typeInt = (options >> 6) & 3; + Type actualType; + switch (typeInt) { + case 0: actualType = Type.FAST; break; + case 1: actualType = Type.SMALL; break; + default: + throw new InternalError("CodePointTrie data header has an unsupported type"); + } + + int valueWidthInt = options & OPTIONS_VALUE_BITS_MASK; + ValueWidth actualValueWidth; + switch (valueWidthInt) { + case 0: actualValueWidth = ValueWidth.BITS_16; break; + case 1: actualValueWidth = ValueWidth.BITS_32; break; + case 2: actualValueWidth = ValueWidth.BITS_8; break; + default: + throw new InternalError("CodePointTrie data header has an unsupported value width"); + } + + if ((options & OPTIONS_RESERVED_MASK) != 0) { + throw new InternalError("CodePointTrie data header has unsupported options"); + } + + if (type == null) { + type = actualType; + } + if (valueWidth == null) { + valueWidth = actualValueWidth; + } + if (type != actualType || valueWidth != actualValueWidth) { + throw new InternalError("CodePointTrie data header has a different type or value width than required"); + } + + // Get the length values and offsets. + dataLength |= ((options & OPTIONS_DATA_LENGTH_MASK) << 4); + dataNullOffset |= ((options & OPTIONS_DATA_NULL_OFFSET_MASK) << 8); + + int highStart = shiftedHighStart << SHIFT_2; + + // Calculate the actual length, minus the header. + int actualLength = indexLength * 2; + if (valueWidth == ValueWidth.BITS_16) { + actualLength += dataLength * 2; + } else if (valueWidth == ValueWidth.BITS_32) { + actualLength += dataLength * 4; + } else { + actualLength += dataLength; + } + if (bytes.remaining() < actualLength) { + throw new InternalError("Buffer too short for the CodePointTrie data"); + } + + char[] index = ICUBinary.getChars(bytes, indexLength, 0); + switch (valueWidth) { + case BITS_16: { + char[] data16 = ICUBinary.getChars(bytes, dataLength, 0); + return type == Type.FAST ? + new Fast16(index, data16, highStart, index3NullOffset, dataNullOffset) : + new Small16(index, data16, highStart, index3NullOffset, dataNullOffset); + } + case BITS_32: { + int[] data32 = ICUBinary.getInts(bytes, dataLength, 0); + return type == Type.FAST ? + new Fast32(index, data32, highStart, index3NullOffset, dataNullOffset) : + new Small32(index, data32, highStart, index3NullOffset, dataNullOffset); + } + case BITS_8: { + byte[] data8 = ICUBinary.getBytes(bytes, dataLength, 0); + return type == Type.FAST ? + new Fast8(index, data8, highStart, index3NullOffset, dataNullOffset) : + new Small8(index, data8, highStart, index3NullOffset, dataNullOffset); + } + default: + throw new AssertionError("should be unreachable"); + } + } finally { + bytes.order(outerByteOrder); + } + } + + /** + * Returns the trie type. + * + * @return the trie type + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public abstract Type getType(); + /** + * Returns the number of bits in a trie data value. + * + * @return the number of bits in a trie data value + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public final ValueWidth getValueWidth() { return data.getValueWidth(); } + + /** + * {@inheritDoc} + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + @Override + public int get(int c) { + return data.getFromIndex(cpIndex(c)); + } + + /** + * Returns a trie value for an ASCII code point, without range checking. + * + * @param c the input code point; must be U+0000..U+007F + * @return The ASCII code point's trie value. + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public final int asciiGet(int c) { + return ascii[c]; + } + + private static final int MAX_UNICODE = 0x10ffff; + + private static final int ASCII_LIMIT = 0x80; + + private static final int maybeFilterValue(int value, int trieNullValue, int nullValue, + ValueFilter filter) { + if (value == trieNullValue) { + value = nullValue; + } else if (filter != null) { + value = filter.apply(value); + } + return value; + } + + /** + * {@inheritDoc} + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + @Override + public final boolean getRange(int start, ValueFilter filter, Range range) { + if (start < 0 || MAX_UNICODE < start) { + return false; + } + if (start >= highStart) { + int di = dataLength - HIGH_VALUE_NEG_DATA_OFFSET; + int value = data.getFromIndex(di); + if (filter != null) { value = filter.apply(value); } + range.set(start, MAX_UNICODE, value); + return true; + } + + int nullValue = this.nullValue; + if (filter != null) { nullValue = filter.apply(nullValue); } + Type type = getType(); + + int prevI3Block = -1; + int prevBlock = -1; + int c = start; + // Initialize to make compiler happy. Real value when haveValue is true. + int trieValue = 0, value = 0; + boolean haveValue = false; + do { + int i3Block; + int i3; + int i3BlockLength; + int dataBlockLength; + if (c <= 0xffff && (type == Type.FAST || c <= SMALL_MAX)) { + i3Block = 0; + i3 = c >> FAST_SHIFT; + i3BlockLength = type == Type.FAST ? BMP_INDEX_LENGTH : SMALL_INDEX_LENGTH; + dataBlockLength = FAST_DATA_BLOCK_LENGTH; + } else { + // Use the multi-stage index. + int i1 = c >> SHIFT_1; + if (type == Type.FAST) { + assert(0xffff < c && c < highStart); + i1 += BMP_INDEX_LENGTH - OMITTED_BMP_INDEX_1_LENGTH; + } else { + assert(c < highStart && highStart > SMALL_LIMIT); + i1 += SMALL_INDEX_LENGTH; + } + i3Block = index[index[i1] + ((c >> SHIFT_2) & INDEX_2_MASK)]; + if (i3Block == prevI3Block && (c - start) >= CP_PER_INDEX_2_ENTRY) { + // The index-3 block is the same as the previous one, and filled with value. + assert((c & (CP_PER_INDEX_2_ENTRY - 1)) == 0); + c += CP_PER_INDEX_2_ENTRY; + continue; + } + prevI3Block = i3Block; + if (i3Block == index3NullOffset) { + // This is the index-3 null block. + if (haveValue) { + if (nullValue != value) { + range.set(start, c - 1, value); + return true; + } + } else { + trieValue = this.nullValue; + value = nullValue; + haveValue = true; + } + prevBlock = dataNullOffset; + c = (c + CP_PER_INDEX_2_ENTRY) & ~(CP_PER_INDEX_2_ENTRY - 1); + continue; + } + i3 = (c >> SHIFT_3) & INDEX_3_MASK; + i3BlockLength = INDEX_3_BLOCK_LENGTH; + dataBlockLength = SMALL_DATA_BLOCK_LENGTH; + } + // Enumerate data blocks for one index-3 block. + do { + int block; + if ((i3Block & 0x8000) == 0) { + block = index[i3Block + i3]; + } else { + // 18-bit indexes stored in groups of 9 entries per 8 indexes. + int group = (i3Block & 0x7fff) + (i3 & ~7) + (i3 >> 3); + int gi = i3 & 7; + block = (index[group++] << (2 + (2 * gi))) & 0x30000; + block |= index[group + gi]; + } + if (block == prevBlock && (c - start) >= dataBlockLength) { + // The block is the same as the previous one, and filled with value. + assert((c & (dataBlockLength - 1)) == 0); + c += dataBlockLength; + } else { + int dataMask = dataBlockLength - 1; + prevBlock = block; + if (block == dataNullOffset) { + // This is the data null block. + if (haveValue) { + if (nullValue != value) { + range.set(start, c - 1, value); + return true; + } + } else { + trieValue = this.nullValue; + value = nullValue; + haveValue = true; + } + c = (c + dataBlockLength) & ~dataMask; + } else { + int di = block + (c & dataMask); + int trieValue2 = data.getFromIndex(di); + if (haveValue) { + if (trieValue2 != trieValue) { + if (filter == null || + maybeFilterValue(trieValue2, this.nullValue, nullValue, + filter) != value) { + range.set(start, c - 1, value); + return true; + } + trieValue = trieValue2; // may or may not help + } + } else { + trieValue = trieValue2; + value = maybeFilterValue(trieValue2, this.nullValue, nullValue, filter); + haveValue = true; + } + while ((++c & dataMask) != 0) { + trieValue2 = data.getFromIndex(++di); + if (trieValue2 != trieValue) { + if (filter == null || + maybeFilterValue(trieValue2, this.nullValue, nullValue, + filter) != value) { + range.set(start, c - 1, value); + return true; + } + trieValue = trieValue2; // may or may not help + } + } + } + } + } while (++i3 < i3BlockLength); + } while (c < highStart); + assert(haveValue); + int di = dataLength - HIGH_VALUE_NEG_DATA_OFFSET; + int highValue = data.getFromIndex(di); + if (maybeFilterValue(highValue, this.nullValue, nullValue, filter) != value) { + --c; + } else { + c = MAX_UNICODE; + } + range.set(start, c, value); + return true; + } + + /** + * Writes a representation of the trie to the output stream. + * Inverse of {@link #fromBinary}. + * + * @param os the output stream + * @return the number of bytes written + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public final int toBinary(OutputStream os) { + try { + DataOutputStream dos = new DataOutputStream(os); + + // Write the UCPTrieHeader + dos.writeInt(0x54726933); // signature="Tri3" + dos.writeChar( // options + ((dataLength & 0xf0000) >> 4) | + ((dataNullOffset & 0xf0000) >> 8) | + (getType().ordinal() << 6) | + getValueWidth().ordinal()); + dos.writeChar(index.length); + dos.writeChar(dataLength); + dos.writeChar(index3NullOffset); + dos.writeChar(dataNullOffset); + dos.writeChar(highStart >> SHIFT_2); // shiftedHighStart + int length = 16; // sizeof(UCPTrieHeader) + + for (char i : index) { dos.writeChar(i); } + length += index.length * 2; + length += data.write(dos); + return length; + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + /** @internal */ + static final int FAST_SHIFT = 6; + + /** Number of entries in a data block for code points below the fast limit. 64=0x40 @internal */ + static final int FAST_DATA_BLOCK_LENGTH = 1 << FAST_SHIFT; + + /** Mask for getting the lower bits for the in-fast-data-block offset. @internal */ + private static final int FAST_DATA_MASK = FAST_DATA_BLOCK_LENGTH - 1; + + /** @internal */ + private static final int SMALL_MAX = 0xfff; + + /** + * Offset from dataLength (to be subtracted) for fetching the + * value returned for out-of-range code points and ill-formed UTF-8/16. + * @internal + */ + private static final int ERROR_VALUE_NEG_DATA_OFFSET = 1; + /** + * Offset from dataLength (to be subtracted) for fetching the + * value returned for code points highStart..U+10FFFF. + * @internal + */ + private static final int HIGH_VALUE_NEG_DATA_OFFSET = 2; + + // ucptrie_impl.h + + /** The length of the BMP index table. 1024=0x400 */ + private static final int BMP_INDEX_LENGTH = 0x10000 >> FAST_SHIFT; + + static final int SMALL_LIMIT = 0x1000; + private static final int SMALL_INDEX_LENGTH = SMALL_LIMIT >> FAST_SHIFT; + + /** Shift size for getting the index-3 table offset. */ + static final int SHIFT_3 = 4; + + /** Shift size for getting the index-2 table offset. */ + private static final int SHIFT_2 = 5 + SHIFT_3; + + /** Shift size for getting the index-1 table offset. */ + private static final int SHIFT_1 = 5 + SHIFT_2; + + /** + * Difference between two shift sizes, + * for getting an index-2 offset from an index-3 offset. 5=9-4 + */ + static final int SHIFT_2_3 = SHIFT_2 - SHIFT_3; + + /** + * Difference between two shift sizes, + * for getting an index-1 offset from an index-2 offset. 5=14-9 + */ + static final int SHIFT_1_2 = SHIFT_1 - SHIFT_2; + + /** + * Number of index-1 entries for the BMP. (4) + * This part of the index-1 table is omitted from the serialized form. + */ + private static final int OMITTED_BMP_INDEX_1_LENGTH = 0x10000 >> SHIFT_1; + + /** Number of entries in an index-2 block. 32=0x20 */ + static final int INDEX_2_BLOCK_LENGTH = 1 << SHIFT_1_2; + + /** Mask for getting the lower bits for the in-index-2-block offset. */ + static final int INDEX_2_MASK = INDEX_2_BLOCK_LENGTH - 1; + + /** Number of code points per index-2 table entry. 512=0x200 */ + static final int CP_PER_INDEX_2_ENTRY = 1 << SHIFT_2; + + /** Number of entries in an index-3 block. 32=0x20 */ + static final int INDEX_3_BLOCK_LENGTH = 1 << SHIFT_2_3; + + /** Mask for getting the lower bits for the in-index-3-block offset. */ + private static final int INDEX_3_MASK = INDEX_3_BLOCK_LENGTH - 1; + + /** Number of entries in a small data block. 16=0x10 */ + static final int SMALL_DATA_BLOCK_LENGTH = 1 << SHIFT_3; + + /** Mask for getting the lower bits for the in-small-data-block offset. */ + static final int SMALL_DATA_MASK = SMALL_DATA_BLOCK_LENGTH - 1; + + // ucptrie_impl.h: Constants for use with UCPTrieHeader.options. + private static final int OPTIONS_DATA_LENGTH_MASK = 0xf000; + private static final int OPTIONS_DATA_NULL_OFFSET_MASK = 0xf00; + private static final int OPTIONS_RESERVED_MASK = 0x38; + private static final int OPTIONS_VALUE_BITS_MASK = 7; + /** + * Value for index3NullOffset which indicates that there is no index-3 null block. + * Bit 15 is unused for this value because this bit is used if the index-3 contains + * 18-bit indexes. + */ + static final int NO_INDEX3_NULL_OFFSET = 0x7fff; + static final int NO_DATA_NULL_OFFSET = 0xfffff; + + private static abstract class Data { + abstract ValueWidth getValueWidth(); + abstract int getDataLength(); + abstract int getFromIndex(int index); + abstract int write(DataOutputStream dos) throws IOException; + } + + private static final class Data16 extends Data { + char[] array; + Data16(char[] a) { array = a; } + @Override ValueWidth getValueWidth() { return ValueWidth.BITS_16; } + @Override int getDataLength() { return array.length; } + @Override int getFromIndex(int index) { return array[index]; } + @Override int write(DataOutputStream dos) throws IOException { + for (char v : array) { dos.writeChar(v); } + return array.length * 2; + } + } + + private static final class Data32 extends Data { + int[] array; + Data32(int[] a) { array = a; } + @Override ValueWidth getValueWidth() { return ValueWidth.BITS_32; } + @Override int getDataLength() { return array.length; } + @Override int getFromIndex(int index) { return array[index]; } + @Override int write(DataOutputStream dos) throws IOException { + for (int v : array) { dos.writeInt(v); } + return array.length * 4; + } + } + + private static final class Data8 extends Data { + byte[] array; + Data8(byte[] a) { array = a; } + @Override ValueWidth getValueWidth() { return ValueWidth.BITS_8; } + @Override int getDataLength() { return array.length; } + @Override int getFromIndex(int index) { return array[index] & 0xff; } + @Override int write(DataOutputStream dos) throws IOException { + for (byte v : array) { dos.writeByte(v); } + return array.length; + } + } + + /** @internal */ + private final int[] ascii; + + /** @internal */ + private final char[] index; + + /** + * @internal + * @deprecated This API is ICU internal only. + */ + @Deprecated + protected final Data data; + /** + * @internal + * @deprecated This API is ICU internal only. + */ + @Deprecated + protected final int dataLength; + /** + * Start of the last range which ends at U+10FFFF. + * @internal + * @deprecated This API is ICU internal only. + */ + @Deprecated + protected final int highStart; + + /** + * Internal index-3 null block offset. + * Set to an impossibly high value (e.g., 0xffff) if there is no dedicated index-3 null block. + * @internal + */ + private final int index3NullOffset; + /** + * Internal data null block offset, not shifted. + * Set to an impossibly high value (e.g., 0xfffff) if there is no dedicated data null block. + * @internal + */ + private final int dataNullOffset; + /** @internal */ + private final int nullValue; + + /** + * @internal + * @deprecated This API is ICU internal only. + */ + @Deprecated + protected final int fastIndex(int c) { + return index[c >> FAST_SHIFT] + (c & FAST_DATA_MASK); + } + + /** + * @internal + * @deprecated This API is ICU internal only. + */ + @Deprecated + protected final int smallIndex(Type type, int c) { + // Split into two methods to make this part inline-friendly. + // In C, this part is a macro. + if (c >= highStart) { + return dataLength - HIGH_VALUE_NEG_DATA_OFFSET; + } + return internalSmallIndex(type, c); + } + + private final int internalSmallIndex(Type type, int c) { + int i1 = c >> SHIFT_1; + if (type == Type.FAST) { + assert(0xffff < c && c < highStart); + i1 += BMP_INDEX_LENGTH - OMITTED_BMP_INDEX_1_LENGTH; + } else { + assert(0 <= c && c < highStart && highStart > SMALL_LIMIT); + i1 += SMALL_INDEX_LENGTH; + } + int i3Block = index[index[i1] + ((c >> SHIFT_2) & INDEX_2_MASK)]; + int i3 = (c >> SHIFT_3) & INDEX_3_MASK; + int dataBlock; + if ((i3Block & 0x8000) == 0) { + // 16-bit indexes + dataBlock = index[i3Block + i3]; + } else { + // 18-bit indexes stored in groups of 9 entries per 8 indexes. + i3Block = (i3Block & 0x7fff) + (i3 & ~7) + (i3 >> 3); + i3 &= 7; + dataBlock = (index[i3Block++] << (2 + (2 * i3))) & 0x30000; + dataBlock |= index[i3Block + i3]; + } + return dataBlock + (c & SMALL_DATA_MASK); + } + + /** + * @internal + * @deprecated This API is ICU internal only. + */ + @Deprecated + protected abstract int cpIndex(int c); + + /** + * A CodePointTrie with {@link Type#FAST}. + * + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public static abstract class Fast extends CodePointTrie { + private Fast(char[] index, Data data, int highStart, + int index3NullOffset, int dataNullOffset) { + super(index, data, highStart, index3NullOffset, dataNullOffset); + } + + /** + * Creates a trie from its binary form. + * Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)} + * with {@link Type#FAST}. + * + * @param valueWidth selects the number of bits in a data value; this method throws an exception + * if the valueWidth does not match the binary data; + * use null to accept any data value width + * @param bytes a buffer containing the binary data of a CodePointTrie + * @return the trie + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public static Fast fromBinary(ValueWidth valueWidth, ByteBuffer bytes) { + return (Fast) CodePointTrie.fromBinary(Type.FAST, valueWidth, bytes); + } + + /** + * @return {@link Type#FAST} + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + @Override + public final Type getType() { return Type.FAST; } + + /** + * Returns a trie value for a BMP code point (U+0000..U+FFFF), without range checking. + * Can be used to look up a value for a UTF-16 code unit if other parts of + * the string processing check for surrogates. + * + * @param c the input code point, must be U+0000..U+FFFF + * @return The BMP code point's trie value. + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public abstract int bmpGet(int c); + + /** + * Returns a trie value for a supplementary code point (U+10000..U+10FFFF), + * without range checking. + * + * @param c the input code point, must be U+10000..U+10FFFF + * @return The supplementary code point's trie value. + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public abstract int suppGet(int c); + + /** + * @internal + * @deprecated This API is ICU internal only. + */ + @Deprecated + @Override + protected final int cpIndex(int c) { + if (c >= 0) { + if (c <= 0xffff) { + return fastIndex(c); + } else if (c <= 0x10ffff) { + return smallIndex(Type.FAST, c); + } + } + return dataLength - ERROR_VALUE_NEG_DATA_OFFSET; + } + + /** + * {@inheritDoc} + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + @Override + public final StringIterator stringIterator(CharSequence s, int sIndex) { + return new FastStringIterator(s, sIndex); + } + + private final class FastStringIterator extends StringIterator { + private FastStringIterator(CharSequence s, int sIndex) { + super(s, sIndex); + } + + @Override + public boolean next() { + if (sIndex >= s.length()) { + return false; + } + char lead = s.charAt(sIndex++); + c = lead; + int dataIndex; + if (!Character.isSurrogate(lead)) { + dataIndex = fastIndex(c); + } else { + char trail; + if (UTF16Plus.isSurrogateLead(lead) && sIndex < s.length() && + Character.isLowSurrogate(trail = s.charAt(sIndex))) { + ++sIndex; + c = Character.toCodePoint(lead, trail); + dataIndex = smallIndex(Type.FAST, c); + } else { + dataIndex = dataLength - ERROR_VALUE_NEG_DATA_OFFSET; + } + } + value = data.getFromIndex(dataIndex); + return true; + } + + @Override + public boolean previous() { + if (sIndex <= 0) { + return false; + } + char trail = s.charAt(--sIndex); + c = trail; + int dataIndex; + if (!Character.isSurrogate(trail)) { + dataIndex = fastIndex(c); + } else { + char lead; + if (!UTF16Plus.isSurrogateLead(trail) && sIndex > 0 && + Character.isHighSurrogate(lead = s.charAt(sIndex - 1))) { + --sIndex; + c = Character.toCodePoint(lead, trail); + dataIndex = smallIndex(Type.FAST, c); + } else { + dataIndex = dataLength - ERROR_VALUE_NEG_DATA_OFFSET; + } + } + value = data.getFromIndex(dataIndex); + return true; + } + } + } + + /** + * A CodePointTrie with {@link Type#SMALL}. + * + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public static abstract class Small extends CodePointTrie { + private Small(char[] index, Data data, int highStart, + int index3NullOffset, int dataNullOffset) { + super(index, data, highStart, index3NullOffset, dataNullOffset); + } + + /** + * Creates a trie from its binary form. + * Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)} + * with {@link Type#SMALL}. + * + * @param valueWidth selects the number of bits in a data value; this method throws an exception + * if the valueWidth does not match the binary data; + * use null to accept any data value width + * @param bytes a buffer containing the binary data of a CodePointTrie + * @return the trie + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public static Small fromBinary(ValueWidth valueWidth, ByteBuffer bytes) { + return (Small) CodePointTrie.fromBinary(Type.SMALL, valueWidth, bytes); + } + + /** + * @return {@link Type#SMALL} + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + @Override + public final Type getType() { return Type.SMALL; } + + /** + * @internal + * @deprecated This API is ICU internal only. + */ + @Deprecated + @Override + protected final int cpIndex(int c) { + if (c >= 0) { + if (c <= SMALL_MAX) { + return fastIndex(c); + } else if (c <= 0x10ffff) { + return smallIndex(Type.SMALL, c); + } + } + return dataLength - ERROR_VALUE_NEG_DATA_OFFSET; + } + + /** + * {@inheritDoc} + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + @Override + public final StringIterator stringIterator(CharSequence s, int sIndex) { + return new SmallStringIterator(s, sIndex); + } + + private final class SmallStringIterator extends StringIterator { + private SmallStringIterator(CharSequence s, int sIndex) { + super(s, sIndex); + } + + @Override + public boolean next() { + if (sIndex >= s.length()) { + return false; + } + char lead = s.charAt(sIndex++); + c = lead; + int dataIndex; + if (!Character.isSurrogate(lead)) { + dataIndex = cpIndex(c); + } else { + char trail; + if (UTF16Plus.isSurrogateLead(lead) && sIndex < s.length() && + Character.isLowSurrogate(trail = s.charAt(sIndex))) { + ++sIndex; + c = Character.toCodePoint(lead, trail); + dataIndex = smallIndex(Type.SMALL, c); + } else { + dataIndex = dataLength - ERROR_VALUE_NEG_DATA_OFFSET; + } + } + value = data.getFromIndex(dataIndex); + return true; + } + + @Override + public boolean previous() { + if (sIndex <= 0) { + return false; + } + char trail = s.charAt(--sIndex); + c = trail; + int dataIndex; + if (!Character.isSurrogate(trail)) { + dataIndex = cpIndex(c); + } else { + char lead; + if (!UTF16Plus.isSurrogateLead(trail) && sIndex > 0 && + Character.isHighSurrogate(lead = s.charAt(sIndex - 1))) { + --sIndex; + c = Character.toCodePoint(lead, trail); + dataIndex = smallIndex(Type.SMALL, c); + } else { + dataIndex = dataLength - ERROR_VALUE_NEG_DATA_OFFSET; + } + } + value = data.getFromIndex(dataIndex); + return true; + } + } + } + + /** + * A CodePointTrie with {@link Type#FAST} and {@link ValueWidth#BITS_16}. + * + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public static final class Fast16 extends Fast { + private final char[] dataArray; + + Fast16(char[] index, char[] data16, int highStart, + int index3NullOffset, int dataNullOffset) { + super(index, new Data16(data16), highStart, index3NullOffset, dataNullOffset); + this.dataArray = data16; + } + + /** + * Creates a trie from its binary form. + * Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)} + * with {@link Type#FAST} and {@link ValueWidth#BITS_16}. + * + * @param bytes a buffer containing the binary data of a CodePointTrie + * @return the trie + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public static Fast16 fromBinary(ByteBuffer bytes) { + return (Fast16) CodePointTrie.fromBinary(Type.FAST, ValueWidth.BITS_16, bytes); + } + + /** + * {@inheritDoc} + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + @Override + public final int get(int c) { + return dataArray[cpIndex(c)]; + } + + /** + * {@inheritDoc} + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + @Override + public final int bmpGet(int c) { + assert 0 <= c && c <= 0xffff; + return dataArray[fastIndex(c)]; + } + + /** + * {@inheritDoc} + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + @Override + public final int suppGet(int c) { + assert 0x10000 <= c && c <= 0x10ffff; + return dataArray[smallIndex(Type.FAST, c)]; + } + } + + /** + * A CodePointTrie with {@link Type#FAST} and {@link ValueWidth#BITS_32}. + * + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public static final class Fast32 extends Fast { + private final int[] dataArray; + + Fast32(char[] index, int[] data32, int highStart, + int index3NullOffset, int dataNullOffset) { + super(index, new Data32(data32), highStart, index3NullOffset, dataNullOffset); + this.dataArray = data32; + } + + /** + * Creates a trie from its binary form. + * Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)} + * with {@link Type#FAST} and {@link ValueWidth#BITS_32}. + * + * @param bytes a buffer containing the binary data of a CodePointTrie + * @return the trie + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public static Fast32 fromBinary(ByteBuffer bytes) { + return (Fast32) CodePointTrie.fromBinary(Type.FAST, ValueWidth.BITS_32, bytes); + } + + /** + * {@inheritDoc} + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + @Override + public final int get(int c) { + return dataArray[cpIndex(c)]; + } + + /** + * {@inheritDoc} + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + @Override + public final int bmpGet(int c) { + assert 0 <= c && c <= 0xffff; + return dataArray[fastIndex(c)]; + } + + /** + * {@inheritDoc} + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + @Override + public final int suppGet(int c) { + assert 0x10000 <= c && c <= 0x10ffff; + return dataArray[smallIndex(Type.FAST, c)]; + } + } + + /** + * A CodePointTrie with {@link Type#FAST} and {@link ValueWidth#BITS_8}. + * + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public static final class Fast8 extends Fast { + private final byte[] dataArray; + + Fast8(char[] index, byte[] data8, int highStart, + int index3NullOffset, int dataNullOffset) { + super(index, new Data8(data8), highStart, index3NullOffset, dataNullOffset); + this.dataArray = data8; + } + + /** + * Creates a trie from its binary form. + * Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)} + * with {@link Type#FAST} and {@link ValueWidth#BITS_8}. + * + * @param bytes a buffer containing the binary data of a CodePointTrie + * @return the trie + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public static Fast8 fromBinary(ByteBuffer bytes) { + return (Fast8) CodePointTrie.fromBinary(Type.FAST, ValueWidth.BITS_8, bytes); + } + + /** + * {@inheritDoc} + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + @Override + public final int get(int c) { + return dataArray[cpIndex(c)] & 0xff; + } + + /** + * {@inheritDoc} + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + @Override + public final int bmpGet(int c) { + assert 0 <= c && c <= 0xffff; + return dataArray[fastIndex(c)] & 0xff; + } + + /** + * {@inheritDoc} + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + @Override + public final int suppGet(int c) { + assert 0x10000 <= c && c <= 0x10ffff; + return dataArray[smallIndex(Type.FAST, c)] & 0xff; + } + } + + /** + * A CodePointTrie with {@link Type#SMALL} and {@link ValueWidth#BITS_16}. + * + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public static final class Small16 extends Small { + Small16(char[] index, char[] data16, int highStart, + int index3NullOffset, int dataNullOffset) { + super(index, new Data16(data16), highStart, index3NullOffset, dataNullOffset); + } + + /** + * Creates a trie from its binary form. + * Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)} + * with {@link Type#SMALL} and {@link ValueWidth#BITS_16}. + * + * @param bytes a buffer containing the binary data of a CodePointTrie + * @return the trie + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public static Small16 fromBinary(ByteBuffer bytes) { + return (Small16) CodePointTrie.fromBinary(Type.SMALL, ValueWidth.BITS_16, bytes); + } + } + + /** + * A CodePointTrie with {@link Type#SMALL} and {@link ValueWidth#BITS_32}. + * + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public static final class Small32 extends Small { + Small32(char[] index, int[] data32, int highStart, + int index3NullOffset, int dataNullOffset) { + super(index, new Data32(data32), highStart, index3NullOffset, dataNullOffset); + } + + /** + * Creates a trie from its binary form. + * Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)} + * with {@link Type#SMALL} and {@link ValueWidth#BITS_32}. + * + * @param bytes a buffer containing the binary data of a CodePointTrie + * @return the trie + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public static Small32 fromBinary(ByteBuffer bytes) { + return (Small32) CodePointTrie.fromBinary(Type.SMALL, ValueWidth.BITS_32, bytes); + } + } + + /** + * A CodePointTrie with {@link Type#SMALL} and {@link ValueWidth#BITS_8}. + * + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public static final class Small8 extends Small { + Small8(char[] index, byte[] data8, int highStart, + int index3NullOffset, int dataNullOffset) { + super(index, new Data8(data8), highStart, index3NullOffset, dataNullOffset); + } + + /** + * Creates a trie from its binary form. + * Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)} + * with {@link Type#SMALL} and {@link ValueWidth#BITS_8}. + * + * @param bytes a buffer containing the binary data of a CodePointTrie + * @return the trie + * @draft ICU 63 + * @provisional This API might change or be removed in a future release. + */ + public static Small8 fromBinary(ByteBuffer bytes) { + return (Small8) CodePointTrie.fromBinary(Type.SMALL, ValueWidth.BITS_8, bytes); + } + } +}