< prev index next >

src/java.base/share/classes/sun/text/normalizer/Trie.java

Print this page




 143     *                       trie data
 144     */
 145     protected Trie(char index[], int options, DataManipulate dataManipulate)
 146     {
 147         m_options_ = options;
 148         if(dataManipulate != null) {
 149             m_dataManipulate_ = dataManipulate;
 150         } else {
 151             m_dataManipulate_ = new DefaultGetFoldingOffset();
 152         }
 153         m_isLatin1Linear_ = (m_options_ &
 154                              HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_) != 0;
 155         m_index_ = index;
 156         m_dataOffset_ = m_index_.length;
 157     }
 158 
 159     // protected data members ------------------------------------------
 160 
 161     /**
 162     * Lead surrogate code points' index displacement in the index array.

 163     * 0x10000-0xd800=0x2800
 164     * 0x2800 >> INDEX_STAGE_1_SHIFT_

 165     */
 166     protected static final int LEAD_INDEX_OFFSET_ = 0x2800 >> 5;
 167     /**
 168     * Shift size for shifting right the input index. 1..9
 169     */
 170     protected static final int INDEX_STAGE_1_SHIFT_ = 5;
 171     /**
 172     * Shift size for shifting left the index array values.
 173     * Increases possible data size with 16-bit index values at the cost
 174     * of compactability.
 175     * This requires blocks of stage 2 data to be aligned by
 176     * DATA_GRANULARITY.
 177     * 0..INDEX_STAGE_1_SHIFT
 178     */
 179     protected static final int INDEX_STAGE_2_SHIFT_ = 2;
 180     /**
 181      * Number of data values in a stage 2 (data array) block.
 182      */
 183     protected static final int DATA_BLOCK_LENGTH=1<<INDEX_STAGE_1_SHIFT_;
 184     /**
 185     * Mask for getting the lower bits from the input index.
 186     * DATA_BLOCK_LENGTH - 1.
 187     */
 188     protected static final int INDEX_STAGE_3_MASK_ = DATA_BLOCK_LENGTH - 1;
 189     /** Number of bits of a trail surrogate that are used in index table lookups. */
 190     protected static final int SURROGATE_BLOCK_BITS=10-INDEX_STAGE_1_SHIFT_;
 191     /**
 192      * Number of index (stage 1) entries per lead surrogate.
 193      * Same as number of index entries for 1024 trail surrogates,
 194      * ==0x400>>INDEX_STAGE_1_SHIFT_
 195      */
 196     protected static final int SURROGATE_BLOCK_COUNT=(1<<SURROGATE_BLOCK_BITS);
 197     /** Length of the BMP portion of the index (stage 1) array. */
 198     protected static final int BMP_INDEX_LENGTH=0x10000>>INDEX_STAGE_1_SHIFT_;
 199     /**
 200     * Surrogate mask to use when shifting offset to retrieve supplementary
 201     * values
 202     */
 203     protected static final int SURROGATE_MASK_ = 0x3FF;
 204     /**
 205     * Index or UTF16 characters
 206     */
 207     protected char m_index_[];
 208     /**
 209     * Internal TrieValue which handles the parsing of the data value.
 210     * This class is to be implemented by the user
 211     */
 212     protected DataManipulate m_dataManipulate_;
 213     /**
 214     * Start index of the data portion of the trie. CharTrie combines


 280                 : getRawOffset(0, ch);
 281                 // using a getRawOffset(ch) makes no diff
 282     }
 283 
 284     /**
 285     * Gets the offset to the data which this lead surrogate character points
 286     * to.
 287     * Data at the returned offset may contain folding offset information for
 288     * the next trailing surrogate character.
 289     * @param ch lead surrogate character
 290     * @return offset to data
 291     */
 292     protected final int getLeadOffset(char ch)
 293     {
 294        return getRawOffset(0, ch);
 295     }
 296 
 297     /**
 298     * Internal trie getter from a code point.
 299     * Could be faster(?) but longer with
 300     *   if((c32)<=0xd7ff) { (result)=_TRIE_GET_RAW(trie, data, 0, c32); }
 301     * Gets the offset to data which the codepoint points to
 302     * @param ch codepoint
 303     * @return offset to data
 304     */
 305     protected final int getCodePointOffset(int ch)
 306     {
 307         // if ((ch >> 16) == 0) slower
 308         if (ch < 0) {
 309             return -1;
 310         } else if (ch < UTF16.LEAD_SURROGATE_MIN_VALUE) {
 311             // fastpath for the part of the BMP below surrogates (D800) where getRawOffset() works
 312             return getRawOffset(0, (char)ch);
 313         } else if (ch < UTF16.SUPPLEMENTARY_MIN_VALUE) {
 314             // BMP codepoint
 315             return getBMPOffset((char)ch);
 316         } else if (ch <= UCharacter.MAX_VALUE) {
 317             // look at the construction of supplementary characters
 318             // trail forms the ends of it.
 319             return getSurrogateOffset(UTF16.getLeadSurrogate(ch),
 320                                       (char)(ch & SURROGATE_MASK_));




 143     *                       trie data
 144     */
 145     protected Trie(char index[], int options, DataManipulate dataManipulate)
 146     {
 147         m_options_ = options;
 148         if(dataManipulate != null) {
 149             m_dataManipulate_ = dataManipulate;
 150         } else {
 151             m_dataManipulate_ = new DefaultGetFoldingOffset();
 152         }
 153         m_isLatin1Linear_ = (m_options_ &
 154                              HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_) != 0;
 155         m_index_ = index;
 156         m_dataOffset_ = m_index_.length;
 157     }
 158 
 159     // protected data members ------------------------------------------
 160 
 161     /**
 162     * Lead surrogate code points' index displacement in the index array.
 163     * <pre>{@code
 164     * 0x10000-0xd800=0x2800
 165     * 0x2800 >> INDEX_STAGE_1_SHIFT_
 166     * }</pre>
 167     */
 168     protected static final int LEAD_INDEX_OFFSET_ = 0x2800 >> 5;
 169     /**
 170     * Shift size for shifting right the input index. 1..9
 171     */
 172     protected static final int INDEX_STAGE_1_SHIFT_ = 5;
 173     /**
 174     * Shift size for shifting left the index array values.
 175     * Increases possible data size with 16-bit index values at the cost
 176     * of compactability.
 177     * This requires blocks of stage 2 data to be aligned by
 178     * DATA_GRANULARITY.
 179     * 0..INDEX_STAGE_1_SHIFT
 180     */
 181     protected static final int INDEX_STAGE_2_SHIFT_ = 2;
 182     /**
 183      * Number of data values in a stage 2 (data array) block.
 184      */
 185     protected static final int DATA_BLOCK_LENGTH=1<<INDEX_STAGE_1_SHIFT_;
 186     /**
 187     * Mask for getting the lower bits from the input index.
 188     * DATA_BLOCK_LENGTH - 1.
 189     */
 190     protected static final int INDEX_STAGE_3_MASK_ = DATA_BLOCK_LENGTH - 1;
 191     /** Number of bits of a trail surrogate that are used in index table lookups. */
 192     protected static final int SURROGATE_BLOCK_BITS=10-INDEX_STAGE_1_SHIFT_;
 193     /**
 194      * Number of index (stage 1) entries per lead surrogate.
 195      * Same as number of index entries for 1024 trail surrogates,
 196      * {@code ==0x400>>INDEX_STAGE_1_SHIFT_}
 197      */
 198     protected static final int SURROGATE_BLOCK_COUNT=(1<<SURROGATE_BLOCK_BITS);
 199     /** Length of the BMP portion of the index (stage 1) array. */
 200     protected static final int BMP_INDEX_LENGTH=0x10000>>INDEX_STAGE_1_SHIFT_;
 201     /**
 202     * Surrogate mask to use when shifting offset to retrieve supplementary
 203     * values
 204     */
 205     protected static final int SURROGATE_MASK_ = 0x3FF;
 206     /**
 207     * Index or UTF16 characters
 208     */
 209     protected char m_index_[];
 210     /**
 211     * Internal TrieValue which handles the parsing of the data value.
 212     * This class is to be implemented by the user
 213     */
 214     protected DataManipulate m_dataManipulate_;
 215     /**
 216     * Start index of the data portion of the trie. CharTrie combines


 282                 : getRawOffset(0, ch);
 283                 // using a getRawOffset(ch) makes no diff
 284     }
 285 
 286     /**
 287     * Gets the offset to the data which this lead surrogate character points
 288     * to.
 289     * Data at the returned offset may contain folding offset information for
 290     * the next trailing surrogate character.
 291     * @param ch lead surrogate character
 292     * @return offset to data
 293     */
 294     protected final int getLeadOffset(char ch)
 295     {
 296        return getRawOffset(0, ch);
 297     }
 298 
 299     /**
 300     * Internal trie getter from a code point.
 301     * Could be faster(?) but longer with
 302     * {@code if((c32)<=0xd7ff) { (result)=_TRIE_GET_RAW(trie, data, 0, c32); }}
 303     * Gets the offset to data which the codepoint points to
 304     * @param ch codepoint
 305     * @return offset to data
 306     */
 307     protected final int getCodePointOffset(int ch)
 308     {
 309         // if ((ch >> 16) == 0) slower
 310         if (ch < 0) {
 311             return -1;
 312         } else if (ch < UTF16.LEAD_SURROGATE_MIN_VALUE) {
 313             // fastpath for the part of the BMP below surrogates (D800) where getRawOffset() works
 314             return getRawOffset(0, (char)ch);
 315         } else if (ch < UTF16.SUPPLEMENTARY_MIN_VALUE) {
 316             // BMP codepoint
 317             return getBMPOffset((char)ch);
 318         } else if (ch <= UCharacter.MAX_VALUE) {
 319             // look at the construction of supplementary characters
 320             // trail forms the ends of it.
 321             return getSurrogateOffset(UTF16.getLeadSurrogate(ch),
 322                                       (char)(ch & SURROGATE_MASK_));


< prev index next >