< prev index next >

jdk/src/java.base/share/classes/sun/text/normalizer/UCharacterProperty.java

Print this page

        

@@ -1,7 +1,7 @@
 /*
- * Copyright (c) 2005, 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 only, as
  * published by the Free Software Foundation.  Oracle designates this

@@ -22,27 +22,25 @@
  * or visit www.oracle.com if you need additional information or have any
  * questions.
  */
 /*
  *******************************************************************************
- * (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved         *
- *                                                                             *
- * The original version of this source code and documentation is copyrighted   *
- * and owned by IBM, These materials are provided under terms of a License     *
- * Agreement between IBM and Sun. This technology is protected by multiple     *
- * US and International patents. This notice and attribution to IBM may not    *
- * to removed.                                                                 *
+ * Copyright (C) 1996-2014, International Business Machines Corporation and
+ * others. All Rights Reserved.
  *******************************************************************************
  */
 
 package sun.text.normalizer;
 
-import java.io.BufferedInputStream;
-import java.io.InputStream;
 import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Iterator;
 import java.util.MissingResourceException;
 
+import sun.text.normalizer.UCharacter.HangulSyllableType;
+import sun.text.normalizer.UCharacter.NumericType;
+
 /**
 * <p>Internal class used for Unicode character property database.</p>
 * <p>This classes store binary data read from uprops.icu.
 * It does not have the capability to parse the data into more high-level
 * information. It only returns bytes of information when required.</p>

@@ -54,135 +52,73 @@
 * <a href=UCharacter.html>UCharacter</a>.</p>
 * @author Syn Wee Quek
 * @since release 2.1, february 1st 2002
 */
 
-public final class UCharacterProperty
+final class UCharacterProperty
 {
     // public data members -----------------------------------------------
 
-    /**
-    * Trie data
-    */
-    public CharTrie m_trie_;
-    /**
-     * Optimization
-     * CharTrie index array
-     */
-    public char[] m_trieIndex_;
-    /**
-     * Optimization
-     * CharTrie data array
+    /*
+     * public singleton instance
      */
-    public char[] m_trieData_;
+    public static final UCharacterProperty INSTANCE;
+
     /**
-     * Optimization
-     * CharTrie data offset
+    * Trie data
      */
-    public int m_trieInitialValue_;
+    public Trie2_16 m_trie_;
+
     /**
     * Unicode version
     */
     public VersionInfo m_unicodeVersion_;
 
+    /**
+    * Character type mask
+    */
+    public static final int TYPE_MASK = 0x1F;
+
     // uprops.h enum UPropertySource --------------------------------------- ***
 
+    /** From uchar.c/uprops.icu main trie */
+    public static final int SRC_CHAR=1;
     /** From uchar.c/uprops.icu properties vectors trie */
     public static final int SRC_PROPSVEC=2;
-    /** One more than the highest UPropertySource (SRC_) constant. */
-    public static final int SRC_COUNT=9;
+    /** From ubidi_props.c/ubidi.icu */
+    public static final int SRC_BIDI=5;
+    /** From normalizer2impl.cpp/nfc.nrm */
+    public static final int SRC_NFC=8;
+    /** From normalizer2impl.cpp/nfkc.nrm */
+    public static final int SRC_NFKC=9;
 
     // public methods ----------------------------------------------------
 
     /**
-     * Java friends implementation
-     */
-    public void setIndexData(CharTrie.FriendAgent friendagent)
-    {
-        m_trieIndex_ = friendagent.getPrivateIndex();
-        m_trieData_ = friendagent.getPrivateData();
-        m_trieInitialValue_ = friendagent.getPrivateInitialValue();
-    }
-
-    /**
-    * Gets the property value at the index.
-    * This is optimized.
-    * Note this is alittle different from CharTrie the index m_trieData_
-    * is never negative.
+    * Gets the main property value for code point ch.
     * @param ch code point whose property value is to be retrieved
     * @return property value of code point
     */
     public final int getProperty(int ch)
     {
-        if (ch < UTF16.LEAD_SURROGATE_MIN_VALUE
-            || (ch > UTF16.LEAD_SURROGATE_MAX_VALUE
-                && ch < UTF16.SUPPLEMENTARY_MIN_VALUE)) {
-            // BMP codepoint 0000..D7FF or DC00..FFFF
-            // optimized
-            try { // using try for ch < 0 is faster than using an if statement
-                return m_trieData_[
-                    (m_trieIndex_[ch >> Trie.INDEX_STAGE_1_SHIFT_]
-                          << Trie.INDEX_STAGE_2_SHIFT_)
-                    + (ch & Trie.INDEX_STAGE_3_MASK_)];
-            } catch (ArrayIndexOutOfBoundsException e) {
-                return m_trieInitialValue_;
-            }
-        }
-        if (ch <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
-            // lead surrogate D800..DBFF
-            return m_trieData_[
-                    (m_trieIndex_[Trie.LEAD_INDEX_OFFSET_
-                                  + (ch >> Trie.INDEX_STAGE_1_SHIFT_)]
-                          << Trie.INDEX_STAGE_2_SHIFT_)
-                    + (ch & Trie.INDEX_STAGE_3_MASK_)];
-        }
-        if (ch <= UTF16.CODEPOINT_MAX_VALUE) {
-            // supplementary code point 10000..10FFFF
-            // look at the construction of supplementary characters
-            // trail forms the ends of it.
-            return m_trie_.getSurrogateValue(
-                                          UTF16.getLeadSurrogate(ch),
-                                          (char)(ch & Trie.SURROGATE_MASK_));
-        }
-        // ch is out of bounds
-        // return m_dataOffset_ if there is an error, in this case we return
-        // the default value: m_initialValue_
-        // we cannot assume that m_initialValue_ is at offset 0
-        // this is for optimization.
-        return m_trieInitialValue_;
-
-        // this all is an inlined form of return m_trie_.getCodePointValue(ch);
-    }
-
-    /**
-    * Getting the unsigned numeric value of a character embedded in the property
-    * argument
-    * @param prop the character
-    * @return unsigned numberic value
-    */
-    public static int getUnsignedValue(int prop)
-    {
-        return (prop >> VALUE_SHIFT_) & UNSIGNED_VALUE_MASK_AFTER_SHIFT_;
+        return m_trie_.get(ch);
     }
 
     /**
      * Gets the unicode additional properties.
-     * C version getUnicodeProperties.
+     * Java version of C u_getUnicodeProperties().
      * @param codepoint codepoint whose additional properties is to be
      *                  retrieved
-     * @param column
+     * @param column The column index.
      * @return unicode properties
      */
        public int getAdditional(int codepoint, int column) {
-        if (column == -1) {
-            return getProperty(codepoint);
-        }
-           if (column < 0 || column >= m_additionalColumnsCount_) {
+        assert column >= 0;
+        if (column >= m_additionalColumnsCount_) {
            return 0;
        }
-       return m_additionalVectors_[
-                     m_additionalTrie_.getCodePointValue(codepoint) + column];
+        return m_additionalVectors_[m_additionalTrie_.get(codepoint) + column];
        }
 
        /**
      * <p>Get the "age" of the code point.</p>
      * <p>The "age" is the Unicode version when the code point was first

@@ -201,10 +137,95 @@
         return VersionInfo.getInstance(
                            (version >> FIRST_NIBBLE_SHIFT_) & LAST_NIBBLE_MASK_,
                            version & LAST_NIBBLE_MASK_, 0, 0);
     }
 
+    // int-value and enumerated properties --------------------------------- ***
+
+    public int getType(int c) {
+        return getProperty(c)&TYPE_MASK;
+    }
+
+    /*
+     * Map some of the Grapheme Cluster Break values to Hangul Syllable Types.
+     * Hangul_Syllable_Type is fully redundant with a subset of Grapheme_Cluster_Break.
+     */
+    private static final int /* UHangulSyllableType */ gcbToHst[]={
+        HangulSyllableType.NOT_APPLICABLE,   /* U_GCB_OTHER */
+        HangulSyllableType.NOT_APPLICABLE,   /* U_GCB_CONTROL */
+        HangulSyllableType.NOT_APPLICABLE,   /* U_GCB_CR */
+        HangulSyllableType.NOT_APPLICABLE,   /* U_GCB_EXTEND */
+        HangulSyllableType.LEADING_JAMO,     /* U_GCB_L */
+        HangulSyllableType.NOT_APPLICABLE,   /* U_GCB_LF */
+        HangulSyllableType.LV_SYLLABLE,      /* U_GCB_LV */
+        HangulSyllableType.LVT_SYLLABLE,     /* U_GCB_LVT */
+        HangulSyllableType.TRAILING_JAMO,    /* U_GCB_T */
+        HangulSyllableType.VOWEL_JAMO        /* U_GCB_V */
+        /*
+         * Omit GCB values beyond what we need for hst.
+         * The code below checks for the array length.
+         */
+    };
+
+    private class IntProperty {
+        int column;  // SRC_PROPSVEC column, or "source" if mask==0
+        int mask;
+        int shift;
+
+        IntProperty(int column, int mask, int shift) {
+            this.column=column;
+            this.mask=mask;
+            this.shift=shift;
+        }
+
+        IntProperty(int source) {
+            this.column=source;
+            this.mask=0;
+        }
+
+        int getValue(int c) {
+            // systematic, directly stored properties
+            return (getAdditional(c, column)&mask)>>>shift;
+        }
+    }
+
+    private class BiDiIntProperty extends IntProperty {
+        BiDiIntProperty() {
+            super(SRC_BIDI);
+        }
+    }
+
+    private class CombiningClassIntProperty extends IntProperty {
+        CombiningClassIntProperty(int source) {
+            super(source);
+        }
+    }
+
+    private class NormQuickCheckIntProperty extends IntProperty {  // UCHAR_NF*_QUICK_CHECK properties
+        int which;
+        int max;
+
+        NormQuickCheckIntProperty(int source, int which, int max) {
+            super(source);
+            this.which=which;
+            this.max=max;
+        }
+    }
+
+    private IntProperty intProp =  new BiDiIntProperty() {  // BIDI_PAIRED_BRACKET_TYPE
+        int getValue(int c) {
+            return UBiDiProps.INSTANCE.getPairedBracketType(c);
+        }
+    };
+
+    public int getIntPropertyValue(int c, int which) {
+        if (which == BIDI_PAIRED_BRACKET_TYPE) {
+            return intProp.getValue(c);
+        }
+        return 0; // undefined
+    }
+
     /**
     * Forms a supplementary code point from the argument character<br>
     * Note this is for internal use hence no checks for the validity of the
     * surrogate characters are done
     * @param lead lead surrogate character

@@ -215,54 +236,60 @@
     {
         return (lead << LEAD_SURROGATE_SHIFT_) + trail + SURROGATE_OFFSET_;
     }
 
     /**
-    * Loads the property data and initialize the UCharacterProperty instance.
-    * @throws MissingResourceException when data is missing or data has been corrupted
+     * Gets the type mask
+     * @param type character type
+     * @return mask
     */
-    public static UCharacterProperty getInstance()
+    public static final int getMask(int type)
     {
-        if(INSTANCE_ == null) {
-            try {
-                INSTANCE_ = new UCharacterProperty();
+        return 1 << type;
             }
-            catch (Exception e) {
-                throw new MissingResourceException(e.getMessage(),"","");
+
+    /**
+     * Returns the digit values of characters like 'A' - 'Z', normal,
+     * half-width and full-width. This method assumes that the other digit
+     * characters are checked by the calling method.
+     * @param ch character to test
+     * @return -1 if ch is not a character of the form 'A' - 'Z', otherwise
+     *         its corresponding digit will be returned.
+     */
+    public static int getEuropeanDigit(int ch) {
+        if ((ch > 0x7a && ch < 0xff21)
+            || ch < 0x41 || (ch > 0x5a && ch < 0x61)
+            || ch > 0xff5a || (ch > 0xff3a && ch < 0xff41)) {
+            return -1;
+        }
+        if (ch <= 0x7a) {
+            // ch >= 0x41 or ch < 0x61
+            return ch + 10 - ((ch <= 0x5a) ? 0x41 : 0x61);
             }
+        // ch >= 0xff21
+        if (ch <= 0xff3a) {
+            return ch + 10 - 0xff21;
         }
-        return INSTANCE_;
+        // ch >= 0xff41 && ch <= 0xff5a
+        return ch + 10 - 0xff41;
     }
 
-    /**
-     * Checks if the argument c is to be treated as a white space in ICU
-     * rules. Usually ICU rule white spaces are ignored unless quoted.
-     * Equivalent to test for Pattern_White_Space Unicode property.
-     * Stable set of characters, won't change.
-     * See UAX #31 Identifier and Pattern Syntax: http://www.unicode.org/reports/tr31/
-     * @param c codepoint to check
-     * @return true if c is a ICU white space
-     */
-    public static boolean isRuleWhiteSpace(int c)
-    {
-        /* "white space" in the sense of ICU rule parsers
-           This is a FIXED LIST that is NOT DEPENDENT ON UNICODE PROPERTIES.
-           See UAX #31 Identifier and Pattern Syntax: http://www.unicode.org/reports/tr31/
-           U+0009..U+000D, U+0020, U+0085, U+200E..U+200F, and U+2028..U+2029
-           Equivalent to test for Pattern_White_Space Unicode property.
-        */
-        return (c >= 0x0009 && c <= 0x2029 &&
-                (c <= 0x000D || c == 0x0020 || c == 0x0085 ||
-                 c == 0x200E || c == 0x200F || c >= 0x2028));
+    public int digit(int c) {
+        int value = getNumericTypeValue(getProperty(c)) - NTV_DECIMAL_START_;
+        if(value<=9) {
+            return value;
+        } else {
+            return -1;
+        }
     }
 
     // protected variables -----------------------------------------------
 
     /**
      * Extra property trie
      */
-    CharTrie m_additionalTrie_;
+    Trie2_16 m_additionalTrie_;
     /**
      * Extra property vectors, 1st column for age and second for binary
      * properties.
      */
     int m_additionalVectors_[];

@@ -278,52 +305,182 @@
     /**
      * Maximum values for script, bits used as in vector word
      * 0
      */
      int m_maxJTGValue_;
-
-    // private variables -------------------------------------------------
-
       /**
-     * UnicodeData.txt property object
+     * Script_Extensions data
      */
-    private static UCharacterProperty INSTANCE_ = null;
+    public char[] m_scriptExtensions_;
+
+    // private variables -------------------------------------------------
 
     /**
     * Default name of the datafile
     */
     private static final String DATA_FILE_NAME_ = "/sun/text/resources/uprops.icu";
 
     /**
-    * Default buffer size of datafile
+    * Shift value for lead surrogate to form a supplementary character.
     */
-    private static final int DATA_BUFFER_SIZE_ = 25000;
-
+    private static final int LEAD_SURROGATE_SHIFT_ = 10;
     /**
-    * Numeric value shift
+    * Offset to add to combined surrogate pair to avoid masking.
     */
-    private static final int VALUE_SHIFT_ = 8;
+    private static final int SURROGATE_OFFSET_ =
+                           UTF16.SUPPLEMENTARY_MIN_VALUE -
+                           (UTF16.SURROGATE_MIN_VALUE <<
+                           LEAD_SURROGATE_SHIFT_) -
+                           UTF16.TRAIL_SURROGATE_MIN_VALUE;
+
+
+    // property data constants -------------------------------------------------
 
     /**
-    * Mask to be applied after shifting to obtain an unsigned numeric value
+     * Numeric types and values in the main properties words.
     */
-    private static final int UNSIGNED_VALUE_MASK_AFTER_SHIFT_ = 0xFF;
+    private static final int NUMERIC_TYPE_VALUE_SHIFT_ = 6;
+    private static final int getNumericTypeValue(int props) {
+        return props >> NUMERIC_TYPE_VALUE_SHIFT_;
+    }
 
+    /* constants for the storage form of numeric types and values */
+    /** No numeric value. */
+    private static final int NTV_NONE_ = 0;
+    /** Decimal digits: nv=0..9 */
+    private static final int NTV_DECIMAL_START_ = 1;
+    /** Other digits: nv=0..9 */
+    private static final int NTV_DIGIT_START_ = 11;
+    /** Small integers: nv=0..154 */
+    private static final int NTV_NUMERIC_START_ = 21;
+
+    private static final int ntvGetType(int ntv) {
+        return
+            (ntv==NTV_NONE_) ? NumericType.NONE :
+            (ntv<NTV_DIGIT_START_) ?  NumericType.DECIMAL :
+            (ntv<NTV_NUMERIC_START_) ? NumericType.DIGIT :
+            NumericType.NUMERIC;
+    }
+
+    /*
+     * Properties in vector word 0
+     * Bits
+     * 31..24   DerivedAge version major/minor one nibble each
+     * 23..22   3..1: Bits 7..0 = Script_Extensions index
+     *             3: Script value from Script_Extensions
+     *             2: Script=Inherited
+     *             1: Script=Common
+     *             0: Script=bits 7..0
+     * 21..20   reserved
+     * 19..17   East Asian Width
+     * 16.. 8   UBlockCode
+     *  7.. 0   UScriptCode
+     */
     /**
-    * Shift value for lead surrogate to form a supplementary character.
+     * Script_Extensions: mask includes Script
     */
-    private static final int LEAD_SURROGATE_SHIFT_ = 10;
+    public static final int SCRIPT_X_MASK = 0x00c000ff;
+    //private static final int SCRIPT_X_SHIFT = 22;
     /**
-    * Offset to add to combined surrogate pair to avoid msking.
+     * Integer properties mask and shift values for East Asian cell width.
+     * Equivalent to icu4c UPROPS_EA_MASK
     */
-    private static final int SURROGATE_OFFSET_ =
-                           UTF16.SUPPLEMENTARY_MIN_VALUE -
-                           (UTF16.SURROGATE_MIN_VALUE <<
-                           LEAD_SURROGATE_SHIFT_) -
-                           UTF16.TRAIL_SURROGATE_MIN_VALUE;
+    private static final int EAST_ASIAN_MASK_ = 0x000e0000;
+    /**
+     * Integer properties mask and shift values for East Asian cell width.
+     * Equivalent to icu4c UPROPS_EA_SHIFT
+     */
+    private static final int EAST_ASIAN_SHIFT_ = 17;
+    /**
+     * Integer properties mask and shift values for blocks.
+     * Equivalent to icu4c UPROPS_BLOCK_MASK
+     */
+    private static final int BLOCK_MASK_ = 0x0001ff00;
+    /**
+     * Integer properties mask and shift values for blocks.
+     * Equivalent to icu4c UPROPS_BLOCK_SHIFT
+     */
+    private static final int BLOCK_SHIFT_ = 8;
+    /**
+     * Integer properties mask and shift values for scripts.
+     * Equivalent to icu4c UPROPS_SHIFT_MASK
+     */
+    public static final int SCRIPT_MASK_ = 0x000000ff;
 
-    // additional properties ----------------------------------------------
+    /**
+     * Additional properties used in internal trie data
+     */
+    /*
+     * Properties in vector word 1
+     * Each bit encodes one binary property.
+     * The following constants represent the bit number, use 1<<UPROPS_XYZ.
+     * UPROPS_BINARY_1_TOP<=32!
+     *
+     * Keep this list of property enums in sync with
+     * propListNames[] in icu/source/tools/genprops/props2.c!
+     *
+     * ICU 2.6/uprops format version 3.2 stores full properties instead of "Other_".
+     */
+    private static final int WHITE_SPACE_PROPERTY_ = 0;
+    private static final int DASH_PROPERTY_ = 1;
+    private static final int HYPHEN_PROPERTY_ = 2;
+    private static final int QUOTATION_MARK_PROPERTY_ = 3;
+    private static final int TERMINAL_PUNCTUATION_PROPERTY_ = 4;
+    private static final int MATH_PROPERTY_ = 5;
+    private static final int HEX_DIGIT_PROPERTY_ = 6;
+    private static final int ASCII_HEX_DIGIT_PROPERTY_ = 7;
+    private static final int ALPHABETIC_PROPERTY_ = 8;
+    private static final int IDEOGRAPHIC_PROPERTY_ = 9;
+    private static final int DIACRITIC_PROPERTY_ = 10;
+    private static final int EXTENDER_PROPERTY_ = 11;
+    private static final int NONCHARACTER_CODE_POINT_PROPERTY_ = 12;
+    private static final int GRAPHEME_EXTEND_PROPERTY_ = 13;
+    private static final int GRAPHEME_LINK_PROPERTY_ = 14;
+    private static final int IDS_BINARY_OPERATOR_PROPERTY_ = 15;
+    private static final int IDS_TRINARY_OPERATOR_PROPERTY_ = 16;
+    private static final int RADICAL_PROPERTY_ = 17;
+    private static final int UNIFIED_IDEOGRAPH_PROPERTY_ = 18;
+    private static final int DEFAULT_IGNORABLE_CODE_POINT_PROPERTY_ = 19;
+    private static final int DEPRECATED_PROPERTY_ = 20;
+    private static final int LOGICAL_ORDER_EXCEPTION_PROPERTY_ = 21;
+    private static final int XID_START_PROPERTY_ = 22;
+    private static final int XID_CONTINUE_PROPERTY_ = 23;
+    private static final int ID_START_PROPERTY_    = 24;
+    private static final int ID_CONTINUE_PROPERTY_ = 25;
+    private static final int GRAPHEME_BASE_PROPERTY_ = 26;
+    private static final int S_TERM_PROPERTY_ = 27;
+    private static final int VARIATION_SELECTOR_PROPERTY_ = 28;
+    private static final int PATTERN_SYNTAX = 29;                   /* new in ICU 3.4 and Unicode 4.1 */
+    private static final int PATTERN_WHITE_SPACE = 30;
+
+    /*
+     * Properties in vector word 2
+     * Bits
+     * 31..26   reserved
+     * 25..20   Line Break
+     * 19..15   Sentence Break
+     * 14..10   Word Break
+     *  9.. 5   Grapheme Cluster Break
+     *  4.. 0   Decomposition Type
+     */
+    private static final int LB_MASK          = 0x03f00000;
+    private static final int LB_SHIFT         = 20;
+
+    private static final int SB_MASK          = 0x000f8000;
+    private static final int SB_SHIFT         = 15;
+
+    private static final int WB_MASK          = 0x00007c00;
+    private static final int WB_SHIFT         = 10;
+
+    private static final int GCB_MASK         = 0x000003e0;
+    private static final int GCB_SHIFT        = 5;
+
+    /**
+     * Integer properties mask for decomposition type.
+     * Equivalent to icu4c UPROPS_DT_MASK.
+     */
+    private static final int DECOMPOSITION_TYPE_MASK_ = 0x0000001f;
 
     /**
      * First nibble shift
      */
     private static final int FIRST_NIBBLE_SHIFT_ = 0x4;

@@ -343,27 +500,108 @@
     * @exception IOException thrown when data reading fails or data corrupted
     */
     private UCharacterProperty() throws IOException
     {
         // jar access
-        InputStream is = ICUData.getRequiredStream(DATA_FILE_NAME_);
-        BufferedInputStream b = new BufferedInputStream(is, DATA_BUFFER_SIZE_);
-        UCharacterPropertyReader reader = new UCharacterPropertyReader(b);
-        reader.read(this);
-        b.close();
-
-        m_trie_.putIndexData(this);
+        ByteBuffer bytes=ICUBinary.getRequiredData(DATA_FILE_NAME_);
+        m_unicodeVersion_ = ICUBinary.readHeaderAndDataVersion(bytes, DATA_FORMAT, new IsAcceptable());
+        // Read or skip the 16 indexes.
+        int propertyOffset = bytes.getInt();
+        /* exceptionOffset = */ bytes.getInt();
+        /* caseOffset = */ bytes.getInt();
+        int additionalOffset = bytes.getInt();
+        int additionalVectorsOffset = bytes.getInt();
+        m_additionalColumnsCount_ = bytes.getInt();
+        int scriptExtensionsOffset = bytes.getInt();
+        int reservedOffset7 = bytes.getInt();
+        /* reservedOffset8 = */ bytes.getInt();
+        /* dataTopOffset = */ bytes.getInt();
+        m_maxBlockScriptValue_ = bytes.getInt();
+        m_maxJTGValue_ = bytes.getInt();
+        ICUBinary.skipBytes(bytes, (16 - 12) << 2);
+
+        // read the main properties trie
+        m_trie_ = Trie2_16.createFromSerialized(bytes);
+        int expectedTrieLength = (propertyOffset - 16) * 4;
+        int trieLength = m_trie_.getSerializedLength();
+        if(trieLength > expectedTrieLength) {
+            throw new IOException("uprops.icu: not enough bytes for main trie");
+        }
+        // skip padding after trie bytes
+        ICUBinary.skipBytes(bytes, expectedTrieLength - trieLength);
+
+        // skip unused intervening data structures
+        ICUBinary.skipBytes(bytes, (additionalOffset - propertyOffset) * 4);
+
+        if(m_additionalColumnsCount_ > 0) {
+            // reads the additional property block
+            m_additionalTrie_ = Trie2_16.createFromSerialized(bytes);
+            expectedTrieLength = (additionalVectorsOffset-additionalOffset)*4;
+            trieLength = m_additionalTrie_.getSerializedLength();
+            if(trieLength > expectedTrieLength) {
+                throw new IOException("uprops.icu: not enough bytes for additional-properties trie");
+            }
+            // skip padding after trie bytes
+            ICUBinary.skipBytes(bytes, expectedTrieLength - trieLength);
+
+            // additional properties
+            int size = scriptExtensionsOffset - additionalVectorsOffset;
+            m_additionalVectors_ = new int[size];
+            for (int i = 0; i < size; i ++) {
+                m_additionalVectors_[i] = bytes.getInt();
+            }
+        }
+
+        // Script_Extensions
+        int numChars = (reservedOffset7 - scriptExtensionsOffset) * 2;
+        if(numChars > 0) {
+            m_scriptExtensions_ = new char[numChars];
+            for(int i = 0; i < numChars; ++i) {
+                m_scriptExtensions_[i] = bytes.getChar();
+            }
+        }
+    }
+
+    private static final class IsAcceptable implements ICUBinary.Authenticate {
+        // @Override when we switch to Java 6
+        public boolean isDataVersionAcceptable(byte version[]) {
+            return version[0] == 7;
+        }
     }
 
+    private static final int DATA_FORMAT = 0x5550726F;  // "UPro"
+
     public void upropsvec_addPropertyStarts(UnicodeSet set) {
         /* add the start code point of each same-value range of the properties vectors trie */
         if(m_additionalColumnsCount_>0) {
             /* if m_additionalColumnsCount_==0 then the properties vectors trie may not be there at all */
-            TrieIterator propsVectorsIter = new TrieIterator(m_additionalTrie_);
-            RangeValueIterator.Element propsVectorsResult = new RangeValueIterator.Element();
-            while(propsVectorsIter.next(propsVectorsResult)){
-                set.add(propsVectorsResult.start);
+            Iterator<Trie2.Range> trieIterator = m_additionalTrie_.iterator();
+            Trie2.Range range;
+            while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) {
+                set.add(range.startCodePoint);
+            }
+        }
             }
+
+    // This static initializer block must be placed after
+    // other static member initialization
+    static {
+        try {
+            INSTANCE = new UCharacterProperty();
         }
+        catch (IOException e) {
+            throw new MissingResourceException(e.getMessage(),DATA_FILE_NAME_,"");
     }
+    }
+
+
+    // Moved from UProperty.java
+    /**
+     * Enumerated property Bidi_Paired_Bracket_Type (new in Unicode 6.3).
+     * Used in UAX #9: Unicode Bidirectional Algorithm
+     * (http://www.unicode.org/reports/tr9/)
+     * Returns UCharacter.BidiPairedBracketType values.
+     * @stable ICU 52
+     */
+    public static final int BIDI_PAIRED_BRACKET_TYPE = 0x1015;
 
 }
< prev index next >