--- old/src/java.base/share/classes/sun/text/normalizer/UCharacterProperty.java 2020-01-10 15:57:41.000000000 -0800 +++ /dev/null 2020-01-10 15:57:41.000000000 -0800 @@ -1,607 +0,0 @@ -/* - * Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. Oracle designates this - * particular file as subject to the "Classpath" exception as provided - * by Oracle in the LICENSE file that accompanied this code. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ -/* - ******************************************************************************* - * Copyright (C) 1996-2014, International Business Machines Corporation and - * others. All Rights Reserved. - ******************************************************************************* - */ - -package sun.text.normalizer; - -import java.io.IOException; -import java.nio.ByteBuffer; -import java.util.Iterator; -import java.util.MissingResourceException; - -import sun.text.normalizer.UCharacter.HangulSyllableType; -import sun.text.normalizer.UCharacter.NumericType; - -/** -*

Internal class used for Unicode character property database.

-*

This classes store binary data read from uprops.icu. -* It does not have the capability to parse the data into more high-level -* information. It only returns bytes of information when required.

-*

Due to the form most commonly used for retrieval, array of char is used -* to store the binary data.

-*

UCharacterPropertyDB also contains information on accessing indexes to -* significant points in the binary data.

-*

Responsibility for molding the binary data into more meaning form lies on -* UCharacter.

-* @author Syn Wee Quek -* @since release 2.1, february 1st 2002 -*/ - -final class UCharacterProperty -{ - // public data members ----------------------------------------------- - - /* - * public singleton instance - */ - public static final UCharacterProperty INSTANCE; - - /** - * Trie data - */ - public Trie2_16 m_trie_; - - /** - * Unicode version - */ - public VersionInfo m_unicodeVersion_; - - /** - * Character type mask - */ - public static final int TYPE_MASK = 0x1F; - - // uprops.h enum UPropertySource --------------------------------------- *** - - /** From uchar.c/uprops.icu main trie */ - public static final int SRC_CHAR=1; - /** From uchar.c/uprops.icu properties vectors trie */ - public static final int SRC_PROPSVEC=2; - /** From ubidi_props.c/ubidi.icu */ - public static final int SRC_BIDI=5; - /** From normalizer2impl.cpp/nfc.nrm */ - public static final int SRC_NFC=8; - /** From normalizer2impl.cpp/nfkc.nrm */ - public static final int SRC_NFKC=9; - - // public methods ---------------------------------------------------- - - /** - * Gets the main property value for code point ch. - * @param ch code point whose property value is to be retrieved - * @return property value of code point - */ - public final int getProperty(int ch) - { - return m_trie_.get(ch); - } - - /** - * Gets the unicode additional properties. - * Java version of C u_getUnicodeProperties(). - * @param codepoint codepoint whose additional properties is to be - * retrieved - * @param column The column index. - * @return unicode properties - */ - public int getAdditional(int codepoint, int column) { - assert column >= 0; - if (column >= m_additionalColumnsCount_) { - return 0; - } - return m_additionalVectors_[m_additionalTrie_.get(codepoint) + column]; - } - - /** - *

Get the "age" of the code point.

- *

The "age" is the Unicode version when the code point was first - * designated (as a non-character or for Private Use) or assigned a - * character.

- *

This can be useful to avoid emitting code points to receiving - * processes that do not accept newer characters.

- *

The data is from the UCD file DerivedAge.txt.

- *

This API does not check the validity of the codepoint.

- * @param codepoint The code point. - * @return the Unicode version number - */ - public VersionInfo getAge(int codepoint) - { - int version = getAdditional(codepoint, 0) >> AGE_SHIFT_; - return VersionInfo.getInstance( - (version >> FIRST_NIBBLE_SHIFT_) & LAST_NIBBLE_MASK_, - version & LAST_NIBBLE_MASK_, 0, 0); - } - - // int-value and enumerated properties --------------------------------- *** - - public int getType(int c) { - return getProperty(c)&TYPE_MASK; - } - - /* - * Map some of the Grapheme Cluster Break values to Hangul Syllable Types. - * Hangul_Syllable_Type is fully redundant with a subset of Grapheme_Cluster_Break. - */ - private static final int /* UHangulSyllableType */ gcbToHst[]={ - HangulSyllableType.NOT_APPLICABLE, /* U_GCB_OTHER */ - HangulSyllableType.NOT_APPLICABLE, /* U_GCB_CONTROL */ - HangulSyllableType.NOT_APPLICABLE, /* U_GCB_CR */ - HangulSyllableType.NOT_APPLICABLE, /* U_GCB_EXTEND */ - HangulSyllableType.LEADING_JAMO, /* U_GCB_L */ - HangulSyllableType.NOT_APPLICABLE, /* U_GCB_LF */ - HangulSyllableType.LV_SYLLABLE, /* U_GCB_LV */ - HangulSyllableType.LVT_SYLLABLE, /* U_GCB_LVT */ - HangulSyllableType.TRAILING_JAMO, /* U_GCB_T */ - HangulSyllableType.VOWEL_JAMO /* U_GCB_V */ - /* - * Omit GCB values beyond what we need for hst. - * The code below checks for the array length. - */ - }; - - private class IntProperty { - int column; // SRC_PROPSVEC column, or "source" if mask==0 - int mask; - int shift; - - IntProperty(int column, int mask, int shift) { - this.column=column; - this.mask=mask; - this.shift=shift; - } - - IntProperty(int source) { - this.column=source; - this.mask=0; - } - - int getValue(int c) { - // systematic, directly stored properties - return (getAdditional(c, column)&mask)>>>shift; - } - } - - private class BiDiIntProperty extends IntProperty { - BiDiIntProperty() { - super(SRC_BIDI); - } - } - - private class CombiningClassIntProperty extends IntProperty { - CombiningClassIntProperty(int source) { - super(source); - } - } - - private class NormQuickCheckIntProperty extends IntProperty { // UCHAR_NF*_QUICK_CHECK properties - int which; - int max; - - NormQuickCheckIntProperty(int source, int which, int max) { - super(source); - this.which=which; - this.max=max; - } - } - - private IntProperty intProp = new BiDiIntProperty() { // BIDI_PAIRED_BRACKET_TYPE - int getValue(int c) { - return UBiDiProps.INSTANCE.getPairedBracketType(c); - } - }; - - public int getIntPropertyValue(int c, int which) { - if (which == BIDI_PAIRED_BRACKET_TYPE) { - return intProp.getValue(c); - } - return 0; // undefined - } - - /** - * Forms a supplementary code point from the argument character
- * Note this is for internal use hence no checks for the validity of the - * surrogate characters are done - * @param lead lead surrogate character - * @param trail trailing surrogate character - * @return code point of the supplementary character - */ - public static int getRawSupplementary(char lead, char trail) - { - return (lead << LEAD_SURROGATE_SHIFT_) + trail + SURROGATE_OFFSET_; - } - - /** - * Gets the type mask - * @param type character type - * @return mask - */ - public static final int getMask(int type) - { - return 1 << type; - } - - /** - * Returns the digit values of characters like 'A' - 'Z', normal, - * half-width and full-width. This method assumes that the other digit - * characters are checked by the calling method. - * @param ch character to test - * @return -1 if ch is not a character of the form 'A' - 'Z', otherwise - * its corresponding digit will be returned. - */ - public static int getEuropeanDigit(int ch) { - if ((ch > 0x7a && ch < 0xff21) - || ch < 0x41 || (ch > 0x5a && ch < 0x61) - || ch > 0xff5a || (ch > 0xff3a && ch < 0xff41)) { - return -1; - } - if (ch <= 0x7a) { - // ch >= 0x41 or ch < 0x61 - return ch + 10 - ((ch <= 0x5a) ? 0x41 : 0x61); - } - // ch >= 0xff21 - if (ch <= 0xff3a) { - return ch + 10 - 0xff21; - } - // ch >= 0xff41 && ch <= 0xff5a - return ch + 10 - 0xff41; - } - - public int digit(int c) { - int value = getNumericTypeValue(getProperty(c)) - NTV_DECIMAL_START_; - if(value<=9) { - return value; - } else { - return -1; - } - } - - // protected variables ----------------------------------------------- - - /** - * Extra property trie - */ - Trie2_16 m_additionalTrie_; - /** - * Extra property vectors, 1st column for age and second for binary - * properties. - */ - int m_additionalVectors_[]; - /** - * Number of additional columns - */ - int m_additionalColumnsCount_; - /** - * Maximum values for block, bits used as in vector word - * 0 - */ - int m_maxBlockScriptValue_; - /** - * Maximum values for script, bits used as in vector word - * 0 - */ - int m_maxJTGValue_; - /** - * Script_Extensions data - */ - public char[] m_scriptExtensions_; - - // private variables ------------------------------------------------- - - /** - * Default name of the datafile - */ - private static final String DATA_FILE_NAME_ = "/sun/text/resources/uprops.icu"; - - /** - * Shift value for lead surrogate to form a supplementary character. - */ - private static final int LEAD_SURROGATE_SHIFT_ = 10; - /** - * Offset to add to combined surrogate pair to avoid masking. - */ - private static final int SURROGATE_OFFSET_ = - UTF16.SUPPLEMENTARY_MIN_VALUE - - (UTF16.SURROGATE_MIN_VALUE << - LEAD_SURROGATE_SHIFT_) - - UTF16.TRAIL_SURROGATE_MIN_VALUE; - - - // property data constants ------------------------------------------------- - - /** - * Numeric types and values in the main properties words. - */ - private static final int NUMERIC_TYPE_VALUE_SHIFT_ = 6; - private static final int getNumericTypeValue(int props) { - return props >> NUMERIC_TYPE_VALUE_SHIFT_; - } - - /* constants for the storage form of numeric types and values */ - /** No numeric value. */ - private static final int NTV_NONE_ = 0; - /** Decimal digits: nv=0..9 */ - private static final int NTV_DECIMAL_START_ = 1; - /** Other digits: nv=0..9 */ - private static final int NTV_DIGIT_START_ = 11; - /** Small integers: nv=0..154 */ - private static final int NTV_NUMERIC_START_ = 21; - - private static final int ntvGetType(int ntv) { - return - (ntv==NTV_NONE_) ? NumericType.NONE : - (ntv expectedTrieLength) { - throw new IOException("uprops.icu: not enough bytes for main trie"); - } - // skip padding after trie bytes - ICUBinary.skipBytes(bytes, expectedTrieLength - trieLength); - - // skip unused intervening data structures - ICUBinary.skipBytes(bytes, (additionalOffset - propertyOffset) * 4); - - if(m_additionalColumnsCount_ > 0) { - // reads the additional property block - m_additionalTrie_ = Trie2_16.createFromSerialized(bytes); - expectedTrieLength = (additionalVectorsOffset-additionalOffset)*4; - trieLength = m_additionalTrie_.getSerializedLength(); - if(trieLength > expectedTrieLength) { - throw new IOException("uprops.icu: not enough bytes for additional-properties trie"); - } - // skip padding after trie bytes - ICUBinary.skipBytes(bytes, expectedTrieLength - trieLength); - - // additional properties - int size = scriptExtensionsOffset - additionalVectorsOffset; - m_additionalVectors_ = new int[size]; - for (int i = 0; i < size; i ++) { - m_additionalVectors_[i] = bytes.getInt(); - } - } - - // Script_Extensions - int numChars = (reservedOffset7 - scriptExtensionsOffset) * 2; - if(numChars > 0) { - m_scriptExtensions_ = new char[numChars]; - for(int i = 0; i < numChars; ++i) { - m_scriptExtensions_[i] = bytes.getChar(); - } - } - } - - private static final class IsAcceptable implements ICUBinary.Authenticate { - // @Override when we switch to Java 6 - public boolean isDataVersionAcceptable(byte version[]) { - return version[0] == 7; - } - } - - private static final int DATA_FORMAT = 0x5550726F; // "UPro" - - public void upropsvec_addPropertyStarts(UnicodeSet set) { - /* add the start code point of each same-value range of the properties vectors trie */ - if(m_additionalColumnsCount_>0) { - /* if m_additionalColumnsCount_==0 then the properties vectors trie may not be there at all */ - Iterator trieIterator = m_additionalTrie_.iterator(); - Trie2.Range range; - while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) { - set.add(range.startCodePoint); - } - } - } - - // This static initializer block must be placed after - // other static member initialization - static { - try { - INSTANCE = new UCharacterProperty(); - } - catch (IOException e) { - throw new MissingResourceException(e.getMessage(),DATA_FILE_NAME_,""); - } - } - - - // Moved from UProperty.java - /** - * Enumerated property Bidi_Paired_Bracket_Type (new in Unicode 6.3). - * Used in UAX #9: Unicode Bidirectional Algorithm - * (http://www.unicode.org/reports/tr9/) - * Returns UCharacter.BidiPairedBracketType values. - * @stable ICU 52 - */ - public static final int BIDI_PAIRED_BRACKET_TYPE = 0x1015; - -} --- /dev/null 2020-01-10 15:57:41.000000000 -0800 +++ new/src/java.base/share/classes/jdk/internal/icu/impl/UCharacterProperty.java 2020-01-10 15:57:41.000000000 -0800 @@ -0,0 +1,614 @@ +/* + * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +/* + ******************************************************************************* + * Copyright (C) 1996-2014, International Business Machines Corporation and + * others. All Rights Reserved. + ******************************************************************************* + */ + +package jdk.internal.icu.impl; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Iterator; +import java.util.MissingResourceException; + +import jdk.internal.icu.lang.UCharacter.HangulSyllableType; +import jdk.internal.icu.lang.UCharacter.NumericType; +import jdk.internal.icu.text.UTF16; +import jdk.internal.icu.text.UnicodeSet; +import jdk.internal.icu.util.VersionInfo; + +/** +*

Internal class used for Unicode character property database.

+*

This classes store binary data read from uprops.icu. +* It does not have the capability to parse the data into more high-level +* information. It only returns bytes of information when required.

+*

Due to the form most commonly used for retrieval, array of char is used +* to store the binary data.

+*

UCharacterPropertyDB also contains information on accessing indexes to +* significant points in the binary data.

+*

Responsibility for molding the binary data into more meaning form lies on +* UCharacter.

+* @author Syn Wee Quek +* @since release 2.1, february 1st 2002 +*/ + +public final class UCharacterProperty +{ + // public data members ----------------------------------------------- + + /* + * public singleton instance + */ + public static final UCharacterProperty INSTANCE; + + /** + * Trie data + */ + public Trie2_16 m_trie_; + + /** + * Unicode version + */ + public VersionInfo m_unicodeVersion_; + + /** + * Character type mask + */ + public static final int TYPE_MASK = 0x1F; + + // uprops.h enum UPropertySource --------------------------------------- *** + + /** From uchar.c/uprops.icu main trie */ + public static final int SRC_CHAR=1; + /** From uchar.c/uprops.icu properties vectors trie */ + public static final int SRC_PROPSVEC=2; + /** From ubidi_props.c/ubidi.icu */ + public static final int SRC_BIDI=5; + /** From normalizer2impl.cpp/nfc.nrm */ + public static final int SRC_NFC=8; + /** From normalizer2impl.cpp/nfkc.nrm */ + public static final int SRC_NFKC=9; + + // public methods ---------------------------------------------------- + + /** + * Gets the main property value for code point ch. + * @param ch code point whose property value is to be retrieved + * @return property value of code point + */ + public final int getProperty(int ch) + { + return m_trie_.get(ch); + } + + /** + * Gets the unicode additional properties. + * Java version of C u_getUnicodeProperties(). + * @param codepoint codepoint whose additional properties is to be + * retrieved + * @param column The column index. + * @return unicode properties + */ + public int getAdditional(int codepoint, int column) { + assert column >= 0; + if (column >= m_additionalColumnsCount_) { + return 0; + } + return m_additionalVectors_[m_additionalTrie_.get(codepoint) + column]; + } + + /** + *

Get the "age" of the code point.

+ *

The "age" is the Unicode version when the code point was first + * designated (as a non-character or for Private Use) or assigned a + * character.

+ *

This can be useful to avoid emitting code points to receiving + * processes that do not accept newer characters.

+ *

The data is from the UCD file DerivedAge.txt.

+ *

This API does not check the validity of the codepoint.

+ * @param codepoint The code point. + * @return the Unicode version number + */ + public VersionInfo getAge(int codepoint) + { + int version = getAdditional(codepoint, 0) >> AGE_SHIFT_; + return VersionInfo.getInstance( + (version >> FIRST_NIBBLE_SHIFT_) & LAST_NIBBLE_MASK_, + version & LAST_NIBBLE_MASK_, 0, 0); + } + + // int-value and enumerated properties --------------------------------- *** + + public int getType(int c) { + return getProperty(c)&TYPE_MASK; + } + + /* + * Map some of the Grapheme Cluster Break values to Hangul Syllable Types. + * Hangul_Syllable_Type is fully redundant with a subset of Grapheme_Cluster_Break. + */ + private static final int /* UHangulSyllableType */ gcbToHst[]={ + HangulSyllableType.NOT_APPLICABLE, /* U_GCB_OTHER */ + HangulSyllableType.NOT_APPLICABLE, /* U_GCB_CONTROL */ + HangulSyllableType.NOT_APPLICABLE, /* U_GCB_CR */ + HangulSyllableType.NOT_APPLICABLE, /* U_GCB_EXTEND */ + HangulSyllableType.LEADING_JAMO, /* U_GCB_L */ + HangulSyllableType.NOT_APPLICABLE, /* U_GCB_LF */ + HangulSyllableType.LV_SYLLABLE, /* U_GCB_LV */ + HangulSyllableType.LVT_SYLLABLE, /* U_GCB_LVT */ + HangulSyllableType.TRAILING_JAMO, /* U_GCB_T */ + HangulSyllableType.VOWEL_JAMO /* U_GCB_V */ + /* + * Omit GCB values beyond what we need for hst. + * The code below checks for the array length. + */ + }; + + private class IntProperty { + int column; // SRC_PROPSVEC column, or "source" if mask==0 + int mask; + int shift; + + IntProperty(int column, int mask, int shift) { + this.column=column; + this.mask=mask; + this.shift=shift; + } + + IntProperty(int source) { + this.column=source; + this.mask=0; + } + + int getValue(int c) { + // systematic, directly stored properties + return (getAdditional(c, column)&mask)>>>shift; + } + } + + private class BiDiIntProperty extends IntProperty { + BiDiIntProperty() { + super(SRC_BIDI); + } + } + + private class CombiningClassIntProperty extends IntProperty { + CombiningClassIntProperty(int source) { + super(source); + } + } + + private class NormQuickCheckIntProperty extends IntProperty { // UCHAR_NF*_QUICK_CHECK properties + int which; + int max; + + NormQuickCheckIntProperty(int source, int which, int max) { + super(source); + this.which=which; + this.max=max; + } + } + + private IntProperty intProp = new BiDiIntProperty() { // BIDI_PAIRED_BRACKET_TYPE + int getValue(int c) { + return UBiDiProps.INSTANCE.getPairedBracketType(c); + } + }; + + public int getIntPropertyValue(int c, int which) { + if (which == BIDI_PAIRED_BRACKET_TYPE) { + return intProp.getValue(c); + } + return 0; // undefined + } + + /** + * Forms a supplementary code point from the argument character
+ * Note this is for internal use hence no checks for the validity of the + * surrogate characters are done + * @param lead lead surrogate character + * @param trail trailing surrogate character + * @return code point of the supplementary character + */ + public static int getRawSupplementary(char lead, char trail) + { + return (lead << LEAD_SURROGATE_SHIFT_) + trail + SURROGATE_OFFSET_; + } + + /** + * Gets the type mask + * @param type character type + * @return mask + */ + public static final int getMask(int type) + { + return 1 << type; + } + + /** + * Returns the digit values of characters like 'A' - 'Z', normal, + * half-width and full-width. This method assumes that the other digit + * characters are checked by the calling method. + * @param ch character to test + * @return -1 if ch is not a character of the form 'A' - 'Z', otherwise + * its corresponding digit will be returned. + */ + public static int getEuropeanDigit(int ch) { + if ((ch > 0x7a && ch < 0xff21) + || ch < 0x41 || (ch > 0x5a && ch < 0x61) + || ch > 0xff5a || (ch > 0xff3a && ch < 0xff41)) { + return -1; + } + if (ch <= 0x7a) { + // ch >= 0x41 or ch < 0x61 + return ch + 10 - ((ch <= 0x5a) ? 0x41 : 0x61); + } + // ch >= 0xff21 + if (ch <= 0xff3a) { + return ch + 10 - 0xff21; + } + // ch >= 0xff41 && ch <= 0xff5a + return ch + 10 - 0xff41; + } + + public int digit(int c) { + int value = getNumericTypeValue(getProperty(c)) - NTV_DECIMAL_START_; + if(value<=9) { + return value; + } else { + return -1; + } + } + + // protected variables ----------------------------------------------- + + /** + * Extra property trie + */ + Trie2_16 m_additionalTrie_; + /** + * Extra property vectors, 1st column for age and second for binary + * properties. + */ + int m_additionalVectors_[]; + /** + * Number of additional columns + */ + int m_additionalColumnsCount_; + /** + * Maximum values for block, bits used as in vector word + * 0 + */ + int m_maxBlockScriptValue_; + /** + * Maximum values for script, bits used as in vector word + * 0 + */ + int m_maxJTGValue_; + /** + * Script_Extensions data + */ + public char[] m_scriptExtensions_; + + // private variables ------------------------------------------------- + + /** + * Default name of the datafile + */ + @SuppressWarnings("deprecation") + private static final String DATA_FILE_NAME_ = + "/jdk/internal/icu/impl/data/icudt" + + VersionInfo.ICU_DATA_VERSION_PATH + + "/uprops.icu"; + + /** + * Shift value for lead surrogate to form a supplementary character. + */ + private static final int LEAD_SURROGATE_SHIFT_ = 10; + /** + * Offset to add to combined surrogate pair to avoid masking. + */ + private static final int SURROGATE_OFFSET_ = + UTF16.SUPPLEMENTARY_MIN_VALUE - + (UTF16.SURROGATE_MIN_VALUE << + LEAD_SURROGATE_SHIFT_) - + UTF16.TRAIL_SURROGATE_MIN_VALUE; + + + // property data constants ------------------------------------------------- + + /** + * Numeric types and values in the main properties words. + */ + private static final int NUMERIC_TYPE_VALUE_SHIFT_ = 6; + private static final int getNumericTypeValue(int props) { + return props >> NUMERIC_TYPE_VALUE_SHIFT_; + } + + /* constants for the storage form of numeric types and values */ + /** No numeric value. */ + private static final int NTV_NONE_ = 0; + /** Decimal digits: nv=0..9 */ + private static final int NTV_DECIMAL_START_ = 1; + /** Other digits: nv=0..9 */ + private static final int NTV_DIGIT_START_ = 11; + /** Small integers: nv=0..154 */ + private static final int NTV_NUMERIC_START_ = 21; + + private static final int ntvGetType(int ntv) { + return + (ntv==NTV_NONE_) ? NumericType.NONE : + (ntv expectedTrieLength) { + throw new IOException("uprops.icu: not enough bytes for main trie"); + } + // skip padding after trie bytes + ICUBinary.skipBytes(bytes, expectedTrieLength - trieLength); + + // skip unused intervening data structures + ICUBinary.skipBytes(bytes, (additionalOffset - propertyOffset) * 4); + + if(m_additionalColumnsCount_ > 0) { + // reads the additional property block + m_additionalTrie_ = Trie2_16.createFromSerialized(bytes); + expectedTrieLength = (additionalVectorsOffset-additionalOffset)*4; + trieLength = m_additionalTrie_.getSerializedLength(); + if(trieLength > expectedTrieLength) { + throw new IOException("uprops.icu: not enough bytes for additional-properties trie"); + } + // skip padding after trie bytes + ICUBinary.skipBytes(bytes, expectedTrieLength - trieLength); + + // additional properties + int size = scriptExtensionsOffset - additionalVectorsOffset; + m_additionalVectors_ = new int[size]; + for (int i = 0; i < size; i ++) { + m_additionalVectors_[i] = bytes.getInt(); + } + } + + // Script_Extensions + int numChars = (reservedOffset7 - scriptExtensionsOffset) * 2; + if(numChars > 0) { + m_scriptExtensions_ = new char[numChars]; + for(int i = 0; i < numChars; ++i) { + m_scriptExtensions_[i] = bytes.getChar(); + } + } + } + + private static final class IsAcceptable implements ICUBinary.Authenticate { + // @Override when we switch to Java 6 + public boolean isDataVersionAcceptable(byte version[]) { + return version[0] == 7; + } + } + + private static final int DATA_FORMAT = 0x5550726F; // "UPro" + + public void upropsvec_addPropertyStarts(UnicodeSet set) { + /* add the start code point of each same-value range of the properties vectors trie */ + if(m_additionalColumnsCount_>0) { + /* if m_additionalColumnsCount_==0 then the properties vectors trie may not be there at all */ + Iterator trieIterator = m_additionalTrie_.iterator(); + Trie2.Range range; + while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) { + set.add(range.startCodePoint); + } + } + } + + // This static initializer block must be placed after + // other static member initialization + static { + try { + INSTANCE = new UCharacterProperty(); + } + catch (IOException e) { + throw new MissingResourceException(e.getMessage(),DATA_FILE_NAME_,""); + } + } + + + // Moved from UProperty.java + /** + * Enumerated property Bidi_Paired_Bracket_Type (new in Unicode 6.3). + * Used in UAX #9: Unicode Bidirectional Algorithm + * (http://www.unicode.org/reports/tr9/) + * Returns UCharacter.BidiPairedBracketType values. + * @stable ICU 52 + */ + public static final int BIDI_PAIRED_BRACKET_TYPE = 0x1015; + +}