143 * trie data
144 */
145 protected Trie(char index[], int options, DataManipulate dataManipulate)
146 {
147 m_options_ = options;
148 if(dataManipulate != null) {
149 m_dataManipulate_ = dataManipulate;
150 } else {
151 m_dataManipulate_ = new DefaultGetFoldingOffset();
152 }
153 m_isLatin1Linear_ = (m_options_ &
154 HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_) != 0;
155 m_index_ = index;
156 m_dataOffset_ = m_index_.length;
157 }
158
159 // protected data members ------------------------------------------
160
161 /**
162 * Lead surrogate code points' index displacement in the index array.
163 * 0x10000-0xd800=0x2800
164 * 0x2800 >> INDEX_STAGE_1_SHIFT_
165 */
166 protected static final int LEAD_INDEX_OFFSET_ = 0x2800 >> 5;
167 /**
168 * Shift size for shifting right the input index. 1..9
169 */
170 protected static final int INDEX_STAGE_1_SHIFT_ = 5;
171 /**
172 * Shift size for shifting left the index array values.
173 * Increases possible data size with 16-bit index values at the cost
174 * of compactability.
175 * This requires blocks of stage 2 data to be aligned by
176 * DATA_GRANULARITY.
177 * 0..INDEX_STAGE_1_SHIFT
178 */
179 protected static final int INDEX_STAGE_2_SHIFT_ = 2;
180 /**
181 * Number of data values in a stage 2 (data array) block.
182 */
183 protected static final int DATA_BLOCK_LENGTH=1<<INDEX_STAGE_1_SHIFT_;
184 /**
185 * Mask for getting the lower bits from the input index.
186 * DATA_BLOCK_LENGTH - 1.
187 */
188 protected static final int INDEX_STAGE_3_MASK_ = DATA_BLOCK_LENGTH - 1;
189 /** Number of bits of a trail surrogate that are used in index table lookups. */
190 protected static final int SURROGATE_BLOCK_BITS=10-INDEX_STAGE_1_SHIFT_;
191 /**
192 * Number of index (stage 1) entries per lead surrogate.
193 * Same as number of index entries for 1024 trail surrogates,
194 * ==0x400>>INDEX_STAGE_1_SHIFT_
195 */
196 protected static final int SURROGATE_BLOCK_COUNT=(1<<SURROGATE_BLOCK_BITS);
197 /** Length of the BMP portion of the index (stage 1) array. */
198 protected static final int BMP_INDEX_LENGTH=0x10000>>INDEX_STAGE_1_SHIFT_;
199 /**
200 * Surrogate mask to use when shifting offset to retrieve supplementary
201 * values
202 */
203 protected static final int SURROGATE_MASK_ = 0x3FF;
204 /**
205 * Index or UTF16 characters
206 */
207 protected char m_index_[];
208 /**
209 * Internal TrieValue which handles the parsing of the data value.
210 * This class is to be implemented by the user
211 */
212 protected DataManipulate m_dataManipulate_;
213 /**
214 * Start index of the data portion of the trie. CharTrie combines
280 : getRawOffset(0, ch);
281 // using a getRawOffset(ch) makes no diff
282 }
283
284 /**
285 * Gets the offset to the data which this lead surrogate character points
286 * to.
287 * Data at the returned offset may contain folding offset information for
288 * the next trailing surrogate character.
289 * @param ch lead surrogate character
290 * @return offset to data
291 */
292 protected final int getLeadOffset(char ch)
293 {
294 return getRawOffset(0, ch);
295 }
296
297 /**
298 * Internal trie getter from a code point.
299 * Could be faster(?) but longer with
300 * if((c32)<=0xd7ff) { (result)=_TRIE_GET_RAW(trie, data, 0, c32); }
301 * Gets the offset to data which the codepoint points to
302 * @param ch codepoint
303 * @return offset to data
304 */
305 protected final int getCodePointOffset(int ch)
306 {
307 // if ((ch >> 16) == 0) slower
308 if (ch < 0) {
309 return -1;
310 } else if (ch < UTF16.LEAD_SURROGATE_MIN_VALUE) {
311 // fastpath for the part of the BMP below surrogates (D800) where getRawOffset() works
312 return getRawOffset(0, (char)ch);
313 } else if (ch < UTF16.SUPPLEMENTARY_MIN_VALUE) {
314 // BMP codepoint
315 return getBMPOffset((char)ch);
316 } else if (ch <= UCharacter.MAX_VALUE) {
317 // look at the construction of supplementary characters
318 // trail forms the ends of it.
319 return getSurrogateOffset(UTF16.getLeadSurrogate(ch),
320 (char)(ch & SURROGATE_MASK_));
|
143 * trie data
144 */
145 protected Trie(char index[], int options, DataManipulate dataManipulate)
146 {
147 m_options_ = options;
148 if(dataManipulate != null) {
149 m_dataManipulate_ = dataManipulate;
150 } else {
151 m_dataManipulate_ = new DefaultGetFoldingOffset();
152 }
153 m_isLatin1Linear_ = (m_options_ &
154 HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_) != 0;
155 m_index_ = index;
156 m_dataOffset_ = m_index_.length;
157 }
158
159 // protected data members ------------------------------------------
160
161 /**
162 * Lead surrogate code points' index displacement in the index array.
163 * <pre>{@code
164 * 0x10000-0xd800=0x2800
165 * 0x2800 >> INDEX_STAGE_1_SHIFT_
166 * }</pre>
167 */
168 protected static final int LEAD_INDEX_OFFSET_ = 0x2800 >> 5;
169 /**
170 * Shift size for shifting right the input index. 1..9
171 */
172 protected static final int INDEX_STAGE_1_SHIFT_ = 5;
173 /**
174 * Shift size for shifting left the index array values.
175 * Increases possible data size with 16-bit index values at the cost
176 * of compactability.
177 * This requires blocks of stage 2 data to be aligned by
178 * DATA_GRANULARITY.
179 * 0..INDEX_STAGE_1_SHIFT
180 */
181 protected static final int INDEX_STAGE_2_SHIFT_ = 2;
182 /**
183 * Number of data values in a stage 2 (data array) block.
184 */
185 protected static final int DATA_BLOCK_LENGTH=1<<INDEX_STAGE_1_SHIFT_;
186 /**
187 * Mask for getting the lower bits from the input index.
188 * DATA_BLOCK_LENGTH - 1.
189 */
190 protected static final int INDEX_STAGE_3_MASK_ = DATA_BLOCK_LENGTH - 1;
191 /** Number of bits of a trail surrogate that are used in index table lookups. */
192 protected static final int SURROGATE_BLOCK_BITS=10-INDEX_STAGE_1_SHIFT_;
193 /**
194 * Number of index (stage 1) entries per lead surrogate.
195 * Same as number of index entries for 1024 trail surrogates,
196 * {@code ==0x400>>INDEX_STAGE_1_SHIFT_}
197 */
198 protected static final int SURROGATE_BLOCK_COUNT=(1<<SURROGATE_BLOCK_BITS);
199 /** Length of the BMP portion of the index (stage 1) array. */
200 protected static final int BMP_INDEX_LENGTH=0x10000>>INDEX_STAGE_1_SHIFT_;
201 /**
202 * Surrogate mask to use when shifting offset to retrieve supplementary
203 * values
204 */
205 protected static final int SURROGATE_MASK_ = 0x3FF;
206 /**
207 * Index or UTF16 characters
208 */
209 protected char m_index_[];
210 /**
211 * Internal TrieValue which handles the parsing of the data value.
212 * This class is to be implemented by the user
213 */
214 protected DataManipulate m_dataManipulate_;
215 /**
216 * Start index of the data portion of the trie. CharTrie combines
282 : getRawOffset(0, ch);
283 // using a getRawOffset(ch) makes no diff
284 }
285
286 /**
287 * Gets the offset to the data which this lead surrogate character points
288 * to.
289 * Data at the returned offset may contain folding offset information for
290 * the next trailing surrogate character.
291 * @param ch lead surrogate character
292 * @return offset to data
293 */
294 protected final int getLeadOffset(char ch)
295 {
296 return getRawOffset(0, ch);
297 }
298
299 /**
300 * Internal trie getter from a code point.
301 * Could be faster(?) but longer with
302 * {@code if((c32)<=0xd7ff) { (result)=_TRIE_GET_RAW(trie, data, 0, c32); }}
303 * Gets the offset to data which the codepoint points to
304 * @param ch codepoint
305 * @return offset to data
306 */
307 protected final int getCodePointOffset(int ch)
308 {
309 // if ((ch >> 16) == 0) slower
310 if (ch < 0) {
311 return -1;
312 } else if (ch < UTF16.LEAD_SURROGATE_MIN_VALUE) {
313 // fastpath for the part of the BMP below surrogates (D800) where getRawOffset() works
314 return getRawOffset(0, (char)ch);
315 } else if (ch < UTF16.SUPPLEMENTARY_MIN_VALUE) {
316 // BMP codepoint
317 return getBMPOffset((char)ch);
318 } else if (ch <= UCharacter.MAX_VALUE) {
319 // look at the construction of supplementary characters
320 // trail forms the ends of it.
321 return getSurrogateOffset(UTF16.getLeadSurrogate(ch),
322 (char)(ch & SURROGATE_MASK_));
|