< prev index next >
jdk/src/java.base/share/classes/sun/text/normalizer/UCharacterProperty.java
Print this page
@@ -1,7 +1,7 @@
/*
- * Copyright (c) 2005, 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
@@ -22,27 +22,25 @@
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/*
*******************************************************************************
- * (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved *
- * *
- * The original version of this source code and documentation is copyrighted *
- * and owned by IBM, These materials are provided under terms of a License *
- * Agreement between IBM and Sun. This technology is protected by multiple *
- * US and International patents. This notice and attribution to IBM may not *
- * to removed. *
+ * Copyright (C) 1996-2014, International Business Machines Corporation and
+ * others. All Rights Reserved.
*******************************************************************************
*/
package sun.text.normalizer;
-import java.io.BufferedInputStream;
-import java.io.InputStream;
import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Iterator;
import java.util.MissingResourceException;
+import sun.text.normalizer.UCharacter.HangulSyllableType;
+import sun.text.normalizer.UCharacter.NumericType;
+
/**
* <p>Internal class used for Unicode character property database.</p>
* <p>This classes store binary data read from uprops.icu.
* It does not have the capability to parse the data into more high-level
* information. It only returns bytes of information when required.</p>
@@ -54,135 +52,73 @@
* <a href=UCharacter.html>UCharacter</a>.</p>
* @author Syn Wee Quek
* @since release 2.1, february 1st 2002
*/
-public final class UCharacterProperty
+final class UCharacterProperty
{
// public data members -----------------------------------------------
- /**
- * Trie data
- */
- public CharTrie m_trie_;
- /**
- * Optimization
- * CharTrie index array
- */
- public char[] m_trieIndex_;
- /**
- * Optimization
- * CharTrie data array
+ /*
+ * public singleton instance
*/
- public char[] m_trieData_;
+ public static final UCharacterProperty INSTANCE;
+
/**
- * Optimization
- * CharTrie data offset
+ * Trie data
*/
- public int m_trieInitialValue_;
+ public Trie2_16 m_trie_;
+
/**
* Unicode version
*/
public VersionInfo m_unicodeVersion_;
+ /**
+ * Character type mask
+ */
+ public static final int TYPE_MASK = 0x1F;
+
// uprops.h enum UPropertySource --------------------------------------- ***
+ /** From uchar.c/uprops.icu main trie */
+ public static final int SRC_CHAR=1;
/** From uchar.c/uprops.icu properties vectors trie */
public static final int SRC_PROPSVEC=2;
- /** One more than the highest UPropertySource (SRC_) constant. */
- public static final int SRC_COUNT=9;
+ /** From ubidi_props.c/ubidi.icu */
+ public static final int SRC_BIDI=5;
+ /** From normalizer2impl.cpp/nfc.nrm */
+ public static final int SRC_NFC=8;
+ /** From normalizer2impl.cpp/nfkc.nrm */
+ public static final int SRC_NFKC=9;
// public methods ----------------------------------------------------
/**
- * Java friends implementation
- */
- public void setIndexData(CharTrie.FriendAgent friendagent)
- {
- m_trieIndex_ = friendagent.getPrivateIndex();
- m_trieData_ = friendagent.getPrivateData();
- m_trieInitialValue_ = friendagent.getPrivateInitialValue();
- }
-
- /**
- * Gets the property value at the index.
- * This is optimized.
- * Note this is alittle different from CharTrie the index m_trieData_
- * is never negative.
+ * Gets the main property value for code point ch.
* @param ch code point whose property value is to be retrieved
* @return property value of code point
*/
public final int getProperty(int ch)
{
- if (ch < UTF16.LEAD_SURROGATE_MIN_VALUE
- || (ch > UTF16.LEAD_SURROGATE_MAX_VALUE
- && ch < UTF16.SUPPLEMENTARY_MIN_VALUE)) {
- // BMP codepoint 0000..D7FF or DC00..FFFF
- // optimized
- try { // using try for ch < 0 is faster than using an if statement
- return m_trieData_[
- (m_trieIndex_[ch >> Trie.INDEX_STAGE_1_SHIFT_]
- << Trie.INDEX_STAGE_2_SHIFT_)
- + (ch & Trie.INDEX_STAGE_3_MASK_)];
- } catch (ArrayIndexOutOfBoundsException e) {
- return m_trieInitialValue_;
- }
- }
- if (ch <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
- // lead surrogate D800..DBFF
- return m_trieData_[
- (m_trieIndex_[Trie.LEAD_INDEX_OFFSET_
- + (ch >> Trie.INDEX_STAGE_1_SHIFT_)]
- << Trie.INDEX_STAGE_2_SHIFT_)
- + (ch & Trie.INDEX_STAGE_3_MASK_)];
- }
- if (ch <= UTF16.CODEPOINT_MAX_VALUE) {
- // supplementary code point 10000..10FFFF
- // look at the construction of supplementary characters
- // trail forms the ends of it.
- return m_trie_.getSurrogateValue(
- UTF16.getLeadSurrogate(ch),
- (char)(ch & Trie.SURROGATE_MASK_));
- }
- // ch is out of bounds
- // return m_dataOffset_ if there is an error, in this case we return
- // the default value: m_initialValue_
- // we cannot assume that m_initialValue_ is at offset 0
- // this is for optimization.
- return m_trieInitialValue_;
-
- // this all is an inlined form of return m_trie_.getCodePointValue(ch);
- }
-
- /**
- * Getting the unsigned numeric value of a character embedded in the property
- * argument
- * @param prop the character
- * @return unsigned numberic value
- */
- public static int getUnsignedValue(int prop)
- {
- return (prop >> VALUE_SHIFT_) & UNSIGNED_VALUE_MASK_AFTER_SHIFT_;
+ return m_trie_.get(ch);
}
/**
* Gets the unicode additional properties.
- * C version getUnicodeProperties.
+ * Java version of C u_getUnicodeProperties().
* @param codepoint codepoint whose additional properties is to be
* retrieved
- * @param column
+ * @param column The column index.
* @return unicode properties
*/
public int getAdditional(int codepoint, int column) {
- if (column == -1) {
- return getProperty(codepoint);
- }
- if (column < 0 || column >= m_additionalColumnsCount_) {
+ assert column >= 0;
+ if (column >= m_additionalColumnsCount_) {
return 0;
}
- return m_additionalVectors_[
- m_additionalTrie_.getCodePointValue(codepoint) + column];
+ return m_additionalVectors_[m_additionalTrie_.get(codepoint) + column];
}
/**
* <p>Get the "age" of the code point.</p>
* <p>The "age" is the Unicode version when the code point was first
@@ -201,10 +137,95 @@
return VersionInfo.getInstance(
(version >> FIRST_NIBBLE_SHIFT_) & LAST_NIBBLE_MASK_,
version & LAST_NIBBLE_MASK_, 0, 0);
}
+ // int-value and enumerated properties --------------------------------- ***
+
+ public int getType(int c) {
+ return getProperty(c)&TYPE_MASK;
+ }
+
+ /*
+ * Map some of the Grapheme Cluster Break values to Hangul Syllable Types.
+ * Hangul_Syllable_Type is fully redundant with a subset of Grapheme_Cluster_Break.
+ */
+ private static final int /* UHangulSyllableType */ gcbToHst[]={
+ HangulSyllableType.NOT_APPLICABLE, /* U_GCB_OTHER */
+ HangulSyllableType.NOT_APPLICABLE, /* U_GCB_CONTROL */
+ HangulSyllableType.NOT_APPLICABLE, /* U_GCB_CR */
+ HangulSyllableType.NOT_APPLICABLE, /* U_GCB_EXTEND */
+ HangulSyllableType.LEADING_JAMO, /* U_GCB_L */
+ HangulSyllableType.NOT_APPLICABLE, /* U_GCB_LF */
+ HangulSyllableType.LV_SYLLABLE, /* U_GCB_LV */
+ HangulSyllableType.LVT_SYLLABLE, /* U_GCB_LVT */
+ HangulSyllableType.TRAILING_JAMO, /* U_GCB_T */
+ HangulSyllableType.VOWEL_JAMO /* U_GCB_V */
+ /*
+ * Omit GCB values beyond what we need for hst.
+ * The code below checks for the array length.
+ */
+ };
+
+ private class IntProperty {
+ int column; // SRC_PROPSVEC column, or "source" if mask==0
+ int mask;
+ int shift;
+
+ IntProperty(int column, int mask, int shift) {
+ this.column=column;
+ this.mask=mask;
+ this.shift=shift;
+ }
+
+ IntProperty(int source) {
+ this.column=source;
+ this.mask=0;
+ }
+
+ int getValue(int c) {
+ // systematic, directly stored properties
+ return (getAdditional(c, column)&mask)>>>shift;
+ }
+ }
+
+ private class BiDiIntProperty extends IntProperty {
+ BiDiIntProperty() {
+ super(SRC_BIDI);
+ }
+ }
+
+ private class CombiningClassIntProperty extends IntProperty {
+ CombiningClassIntProperty(int source) {
+ super(source);
+ }
+ }
+
+ private class NormQuickCheckIntProperty extends IntProperty { // UCHAR_NF*_QUICK_CHECK properties
+ int which;
+ int max;
+
+ NormQuickCheckIntProperty(int source, int which, int max) {
+ super(source);
+ this.which=which;
+ this.max=max;
+ }
+ }
+
+ private IntProperty intProp = new BiDiIntProperty() { // BIDI_PAIRED_BRACKET_TYPE
+ int getValue(int c) {
+ return UBiDiProps.INSTANCE.getPairedBracketType(c);
+ }
+ };
+
+ public int getIntPropertyValue(int c, int which) {
+ if (which == BIDI_PAIRED_BRACKET_TYPE) {
+ return intProp.getValue(c);
+ }
+ return 0; // undefined
+ }
+
/**
* Forms a supplementary code point from the argument character<br>
* Note this is for internal use hence no checks for the validity of the
* surrogate characters are done
* @param lead lead surrogate character
@@ -215,54 +236,60 @@
{
return (lead << LEAD_SURROGATE_SHIFT_) + trail + SURROGATE_OFFSET_;
}
/**
- * Loads the property data and initialize the UCharacterProperty instance.
- * @throws MissingResourceException when data is missing or data has been corrupted
+ * Gets the type mask
+ * @param type character type
+ * @return mask
*/
- public static UCharacterProperty getInstance()
+ public static final int getMask(int type)
{
- if(INSTANCE_ == null) {
- try {
- INSTANCE_ = new UCharacterProperty();
+ return 1 << type;
}
- catch (Exception e) {
- throw new MissingResourceException(e.getMessage(),"","");
+
+ /**
+ * Returns the digit values of characters like 'A' - 'Z', normal,
+ * half-width and full-width. This method assumes that the other digit
+ * characters are checked by the calling method.
+ * @param ch character to test
+ * @return -1 if ch is not a character of the form 'A' - 'Z', otherwise
+ * its corresponding digit will be returned.
+ */
+ public static int getEuropeanDigit(int ch) {
+ if ((ch > 0x7a && ch < 0xff21)
+ || ch < 0x41 || (ch > 0x5a && ch < 0x61)
+ || ch > 0xff5a || (ch > 0xff3a && ch < 0xff41)) {
+ return -1;
+ }
+ if (ch <= 0x7a) {
+ // ch >= 0x41 or ch < 0x61
+ return ch + 10 - ((ch <= 0x5a) ? 0x41 : 0x61);
}
+ // ch >= 0xff21
+ if (ch <= 0xff3a) {
+ return ch + 10 - 0xff21;
}
- return INSTANCE_;
+ // ch >= 0xff41 && ch <= 0xff5a
+ return ch + 10 - 0xff41;
}
- /**
- * Checks if the argument c is to be treated as a white space in ICU
- * rules. Usually ICU rule white spaces are ignored unless quoted.
- * Equivalent to test for Pattern_White_Space Unicode property.
- * Stable set of characters, won't change.
- * See UAX #31 Identifier and Pattern Syntax: http://www.unicode.org/reports/tr31/
- * @param c codepoint to check
- * @return true if c is a ICU white space
- */
- public static boolean isRuleWhiteSpace(int c)
- {
- /* "white space" in the sense of ICU rule parsers
- This is a FIXED LIST that is NOT DEPENDENT ON UNICODE PROPERTIES.
- See UAX #31 Identifier and Pattern Syntax: http://www.unicode.org/reports/tr31/
- U+0009..U+000D, U+0020, U+0085, U+200E..U+200F, and U+2028..U+2029
- Equivalent to test for Pattern_White_Space Unicode property.
- */
- return (c >= 0x0009 && c <= 0x2029 &&
- (c <= 0x000D || c == 0x0020 || c == 0x0085 ||
- c == 0x200E || c == 0x200F || c >= 0x2028));
+ public int digit(int c) {
+ int value = getNumericTypeValue(getProperty(c)) - NTV_DECIMAL_START_;
+ if(value<=9) {
+ return value;
+ } else {
+ return -1;
+ }
}
// protected variables -----------------------------------------------
/**
* Extra property trie
*/
- CharTrie m_additionalTrie_;
+ Trie2_16 m_additionalTrie_;
/**
* Extra property vectors, 1st column for age and second for binary
* properties.
*/
int m_additionalVectors_[];
@@ -278,52 +305,182 @@
/**
* Maximum values for script, bits used as in vector word
* 0
*/
int m_maxJTGValue_;
-
- // private variables -------------------------------------------------
-
/**
- * UnicodeData.txt property object
+ * Script_Extensions data
*/
- private static UCharacterProperty INSTANCE_ = null;
+ public char[] m_scriptExtensions_;
+
+ // private variables -------------------------------------------------
/**
* Default name of the datafile
*/
private static final String DATA_FILE_NAME_ = "/sun/text/resources/uprops.icu";
/**
- * Default buffer size of datafile
+ * Shift value for lead surrogate to form a supplementary character.
*/
- private static final int DATA_BUFFER_SIZE_ = 25000;
-
+ private static final int LEAD_SURROGATE_SHIFT_ = 10;
/**
- * Numeric value shift
+ * Offset to add to combined surrogate pair to avoid masking.
*/
- private static final int VALUE_SHIFT_ = 8;
+ private static final int SURROGATE_OFFSET_ =
+ UTF16.SUPPLEMENTARY_MIN_VALUE -
+ (UTF16.SURROGATE_MIN_VALUE <<
+ LEAD_SURROGATE_SHIFT_) -
+ UTF16.TRAIL_SURROGATE_MIN_VALUE;
+
+
+ // property data constants -------------------------------------------------
/**
- * Mask to be applied after shifting to obtain an unsigned numeric value
+ * Numeric types and values in the main properties words.
*/
- private static final int UNSIGNED_VALUE_MASK_AFTER_SHIFT_ = 0xFF;
+ private static final int NUMERIC_TYPE_VALUE_SHIFT_ = 6;
+ private static final int getNumericTypeValue(int props) {
+ return props >> NUMERIC_TYPE_VALUE_SHIFT_;
+ }
+ /* constants for the storage form of numeric types and values */
+ /** No numeric value. */
+ private static final int NTV_NONE_ = 0;
+ /** Decimal digits: nv=0..9 */
+ private static final int NTV_DECIMAL_START_ = 1;
+ /** Other digits: nv=0..9 */
+ private static final int NTV_DIGIT_START_ = 11;
+ /** Small integers: nv=0..154 */
+ private static final int NTV_NUMERIC_START_ = 21;
+
+ private static final int ntvGetType(int ntv) {
+ return
+ (ntv==NTV_NONE_) ? NumericType.NONE :
+ (ntv<NTV_DIGIT_START_) ? NumericType.DECIMAL :
+ (ntv<NTV_NUMERIC_START_) ? NumericType.DIGIT :
+ NumericType.NUMERIC;
+ }
+
+ /*
+ * Properties in vector word 0
+ * Bits
+ * 31..24 DerivedAge version major/minor one nibble each
+ * 23..22 3..1: Bits 7..0 = Script_Extensions index
+ * 3: Script value from Script_Extensions
+ * 2: Script=Inherited
+ * 1: Script=Common
+ * 0: Script=bits 7..0
+ * 21..20 reserved
+ * 19..17 East Asian Width
+ * 16.. 8 UBlockCode
+ * 7.. 0 UScriptCode
+ */
/**
- * Shift value for lead surrogate to form a supplementary character.
+ * Script_Extensions: mask includes Script
*/
- private static final int LEAD_SURROGATE_SHIFT_ = 10;
+ public static final int SCRIPT_X_MASK = 0x00c000ff;
+ //private static final int SCRIPT_X_SHIFT = 22;
/**
- * Offset to add to combined surrogate pair to avoid msking.
+ * Integer properties mask and shift values for East Asian cell width.
+ * Equivalent to icu4c UPROPS_EA_MASK
*/
- private static final int SURROGATE_OFFSET_ =
- UTF16.SUPPLEMENTARY_MIN_VALUE -
- (UTF16.SURROGATE_MIN_VALUE <<
- LEAD_SURROGATE_SHIFT_) -
- UTF16.TRAIL_SURROGATE_MIN_VALUE;
+ private static final int EAST_ASIAN_MASK_ = 0x000e0000;
+ /**
+ * Integer properties mask and shift values for East Asian cell width.
+ * Equivalent to icu4c UPROPS_EA_SHIFT
+ */
+ private static final int EAST_ASIAN_SHIFT_ = 17;
+ /**
+ * Integer properties mask and shift values for blocks.
+ * Equivalent to icu4c UPROPS_BLOCK_MASK
+ */
+ private static final int BLOCK_MASK_ = 0x0001ff00;
+ /**
+ * Integer properties mask and shift values for blocks.
+ * Equivalent to icu4c UPROPS_BLOCK_SHIFT
+ */
+ private static final int BLOCK_SHIFT_ = 8;
+ /**
+ * Integer properties mask and shift values for scripts.
+ * Equivalent to icu4c UPROPS_SHIFT_MASK
+ */
+ public static final int SCRIPT_MASK_ = 0x000000ff;
- // additional properties ----------------------------------------------
+ /**
+ * Additional properties used in internal trie data
+ */
+ /*
+ * Properties in vector word 1
+ * Each bit encodes one binary property.
+ * The following constants represent the bit number, use 1<<UPROPS_XYZ.
+ * UPROPS_BINARY_1_TOP<=32!
+ *
+ * Keep this list of property enums in sync with
+ * propListNames[] in icu/source/tools/genprops/props2.c!
+ *
+ * ICU 2.6/uprops format version 3.2 stores full properties instead of "Other_".
+ */
+ private static final int WHITE_SPACE_PROPERTY_ = 0;
+ private static final int DASH_PROPERTY_ = 1;
+ private static final int HYPHEN_PROPERTY_ = 2;
+ private static final int QUOTATION_MARK_PROPERTY_ = 3;
+ private static final int TERMINAL_PUNCTUATION_PROPERTY_ = 4;
+ private static final int MATH_PROPERTY_ = 5;
+ private static final int HEX_DIGIT_PROPERTY_ = 6;
+ private static final int ASCII_HEX_DIGIT_PROPERTY_ = 7;
+ private static final int ALPHABETIC_PROPERTY_ = 8;
+ private static final int IDEOGRAPHIC_PROPERTY_ = 9;
+ private static final int DIACRITIC_PROPERTY_ = 10;
+ private static final int EXTENDER_PROPERTY_ = 11;
+ private static final int NONCHARACTER_CODE_POINT_PROPERTY_ = 12;
+ private static final int GRAPHEME_EXTEND_PROPERTY_ = 13;
+ private static final int GRAPHEME_LINK_PROPERTY_ = 14;
+ private static final int IDS_BINARY_OPERATOR_PROPERTY_ = 15;
+ private static final int IDS_TRINARY_OPERATOR_PROPERTY_ = 16;
+ private static final int RADICAL_PROPERTY_ = 17;
+ private static final int UNIFIED_IDEOGRAPH_PROPERTY_ = 18;
+ private static final int DEFAULT_IGNORABLE_CODE_POINT_PROPERTY_ = 19;
+ private static final int DEPRECATED_PROPERTY_ = 20;
+ private static final int LOGICAL_ORDER_EXCEPTION_PROPERTY_ = 21;
+ private static final int XID_START_PROPERTY_ = 22;
+ private static final int XID_CONTINUE_PROPERTY_ = 23;
+ private static final int ID_START_PROPERTY_ = 24;
+ private static final int ID_CONTINUE_PROPERTY_ = 25;
+ private static final int GRAPHEME_BASE_PROPERTY_ = 26;
+ private static final int S_TERM_PROPERTY_ = 27;
+ private static final int VARIATION_SELECTOR_PROPERTY_ = 28;
+ private static final int PATTERN_SYNTAX = 29; /* new in ICU 3.4 and Unicode 4.1 */
+ private static final int PATTERN_WHITE_SPACE = 30;
+
+ /*
+ * Properties in vector word 2
+ * Bits
+ * 31..26 reserved
+ * 25..20 Line Break
+ * 19..15 Sentence Break
+ * 14..10 Word Break
+ * 9.. 5 Grapheme Cluster Break
+ * 4.. 0 Decomposition Type
+ */
+ private static final int LB_MASK = 0x03f00000;
+ private static final int LB_SHIFT = 20;
+
+ private static final int SB_MASK = 0x000f8000;
+ private static final int SB_SHIFT = 15;
+
+ private static final int WB_MASK = 0x00007c00;
+ private static final int WB_SHIFT = 10;
+
+ private static final int GCB_MASK = 0x000003e0;
+ private static final int GCB_SHIFT = 5;
+
+ /**
+ * Integer properties mask for decomposition type.
+ * Equivalent to icu4c UPROPS_DT_MASK.
+ */
+ private static final int DECOMPOSITION_TYPE_MASK_ = 0x0000001f;
/**
* First nibble shift
*/
private static final int FIRST_NIBBLE_SHIFT_ = 0x4;
@@ -343,27 +500,108 @@
* @exception IOException thrown when data reading fails or data corrupted
*/
private UCharacterProperty() throws IOException
{
// jar access
- InputStream is = ICUData.getRequiredStream(DATA_FILE_NAME_);
- BufferedInputStream b = new BufferedInputStream(is, DATA_BUFFER_SIZE_);
- UCharacterPropertyReader reader = new UCharacterPropertyReader(b);
- reader.read(this);
- b.close();
-
- m_trie_.putIndexData(this);
+ ByteBuffer bytes=ICUBinary.getRequiredData(DATA_FILE_NAME_);
+ m_unicodeVersion_ = ICUBinary.readHeaderAndDataVersion(bytes, DATA_FORMAT, new IsAcceptable());
+ // Read or skip the 16 indexes.
+ int propertyOffset = bytes.getInt();
+ /* exceptionOffset = */ bytes.getInt();
+ /* caseOffset = */ bytes.getInt();
+ int additionalOffset = bytes.getInt();
+ int additionalVectorsOffset = bytes.getInt();
+ m_additionalColumnsCount_ = bytes.getInt();
+ int scriptExtensionsOffset = bytes.getInt();
+ int reservedOffset7 = bytes.getInt();
+ /* reservedOffset8 = */ bytes.getInt();
+ /* dataTopOffset = */ bytes.getInt();
+ m_maxBlockScriptValue_ = bytes.getInt();
+ m_maxJTGValue_ = bytes.getInt();
+ ICUBinary.skipBytes(bytes, (16 - 12) << 2);
+
+ // read the main properties trie
+ m_trie_ = Trie2_16.createFromSerialized(bytes);
+ int expectedTrieLength = (propertyOffset - 16) * 4;
+ int trieLength = m_trie_.getSerializedLength();
+ if(trieLength > expectedTrieLength) {
+ throw new IOException("uprops.icu: not enough bytes for main trie");
+ }
+ // skip padding after trie bytes
+ ICUBinary.skipBytes(bytes, expectedTrieLength - trieLength);
+
+ // skip unused intervening data structures
+ ICUBinary.skipBytes(bytes, (additionalOffset - propertyOffset) * 4);
+
+ if(m_additionalColumnsCount_ > 0) {
+ // reads the additional property block
+ m_additionalTrie_ = Trie2_16.createFromSerialized(bytes);
+ expectedTrieLength = (additionalVectorsOffset-additionalOffset)*4;
+ trieLength = m_additionalTrie_.getSerializedLength();
+ if(trieLength > expectedTrieLength) {
+ throw new IOException("uprops.icu: not enough bytes for additional-properties trie");
+ }
+ // skip padding after trie bytes
+ ICUBinary.skipBytes(bytes, expectedTrieLength - trieLength);
+
+ // additional properties
+ int size = scriptExtensionsOffset - additionalVectorsOffset;
+ m_additionalVectors_ = new int[size];
+ for (int i = 0; i < size; i ++) {
+ m_additionalVectors_[i] = bytes.getInt();
+ }
+ }
+
+ // Script_Extensions
+ int numChars = (reservedOffset7 - scriptExtensionsOffset) * 2;
+ if(numChars > 0) {
+ m_scriptExtensions_ = new char[numChars];
+ for(int i = 0; i < numChars; ++i) {
+ m_scriptExtensions_[i] = bytes.getChar();
+ }
+ }
+ }
+
+ private static final class IsAcceptable implements ICUBinary.Authenticate {
+ // @Override when we switch to Java 6
+ public boolean isDataVersionAcceptable(byte version[]) {
+ return version[0] == 7;
+ }
}
+ private static final int DATA_FORMAT = 0x5550726F; // "UPro"
+
public void upropsvec_addPropertyStarts(UnicodeSet set) {
/* add the start code point of each same-value range of the properties vectors trie */
if(m_additionalColumnsCount_>0) {
/* if m_additionalColumnsCount_==0 then the properties vectors trie may not be there at all */
- TrieIterator propsVectorsIter = new TrieIterator(m_additionalTrie_);
- RangeValueIterator.Element propsVectorsResult = new RangeValueIterator.Element();
- while(propsVectorsIter.next(propsVectorsResult)){
- set.add(propsVectorsResult.start);
+ Iterator<Trie2.Range> trieIterator = m_additionalTrie_.iterator();
+ Trie2.Range range;
+ while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) {
+ set.add(range.startCodePoint);
+ }
+ }
}
+
+ // This static initializer block must be placed after
+ // other static member initialization
+ static {
+ try {
+ INSTANCE = new UCharacterProperty();
}
+ catch (IOException e) {
+ throw new MissingResourceException(e.getMessage(),DATA_FILE_NAME_,"");
}
+ }
+
+
+ // Moved from UProperty.java
+ /**
+ * Enumerated property Bidi_Paired_Bracket_Type (new in Unicode 6.3).
+ * Used in UAX #9: Unicode Bidirectional Algorithm
+ * (http://www.unicode.org/reports/tr9/)
+ * Returns UCharacter.BidiPairedBracketType values.
+ * @stable ICU 52
+ */
+ public static final int BIDI_PAIRED_BRACKET_TYPE = 0x1015;
}
< prev index next >