--- old/jdk/src/java.base/share/classes/sun/text/normalizer/TrieIterator.java 2015-07-13 16:12:10.000000000 +0900 +++ /dev/null 2015-07-13 16:12:10.000000000 +0900 @@ -1,547 +0,0 @@ -/* - * Copyright (c) 2005, 2009, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. Oracle designates this - * particular file as subject to the "Classpath" exception as provided - * by Oracle in the LICENSE file that accompanied this code. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ -/* - ******************************************************************************* - * (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved * - * * - * The original version of this source code and documentation is copyrighted * - * and owned by IBM, These materials are provided under terms of a License * - * Agreement between IBM and Sun. This technology is protected by multiple * - * US and International patents. This notice and attribution to IBM may not * - * to removed. * - ******************************************************************************* - */ - -package sun.text.normalizer; - -/** - * Class enabling iteration of the values in a Trie. - *

Result of each iteration contains the interval of codepoints that have - * the same value type and the value type itself. - *

The comparison of each codepoint value is done via extract(), which the - * default implementation is to return the value as it is. - *

Method extract() can be overwritten to perform manipulations on - * codepoint values in order to perform specialized comparison. - *

TrieIterator is designed to be a generic iterator for the CharTrie - * and the IntTrie, hence to accommodate both types of data, the return - * result will be in terms of int (32 bit) values. - *

See com.ibm.icu.text.UCharacterTypeIterator for examples of use. - *

Notes for porting utrie_enum from icu4c to icu4j:
- * Internally, icu4c's utrie_enum performs all iterations in its body. In Java - * sense, the caller will have to pass a object with a callback function - * UTrieEnumRange(const void *context, UChar32 start, UChar32 limit, - * uint32_t value) into utrie_enum. utrie_enum will then find ranges of - * codepoints with the same value as determined by - * UTrieEnumValue(const void *context, uint32_t value). for each range, - * utrie_enum calls the callback function to perform a task. In this way, - * icu4c performs the iteration within utrie_enum. - * To follow the JDK model, icu4j is slightly different from icu4c. - * Instead of requesting the caller to implement an object for a callback. - * The caller will have to implement a subclass of TrieIterator, fleshing out - * the method extract(int) (equivalent to UTrieEnumValue). Independent of icu4j, - * the caller will have to code his own iteration and flesh out the task - * (equivalent to UTrieEnumRange) to be performed in the iteration loop. - * - *

There are basically 3 usage scenarios for porting: - *

1) UTrieEnumValue is the only implemented callback then just implement a - * subclass of TrieIterator and override the extract(int) method. The - * extract(int) method is analogus to UTrieEnumValue callback. - * - *

2) UTrieEnumValue and UTrieEnumRange both are implemented then implement - * a subclass of TrieIterator, override the extract method and iterate, e.g.
- * {@code utrie_enum(&normTrie, _enumPropertyStartsValue, _enumPropertyStartsRange, - * set);}
- * In Java:
- *

- * class TrieIteratorImpl extends TrieIterator{
- *     public TrieIteratorImpl(Trie data){
- *         super(data);
- *     }
- *     public int extract(int value){
- *         // port the implementation of _enumPropertyStartsValue here
- *     }
- * }
- * ....
- * TrieIterator fcdIter  = new TrieIteratorImpl(fcdTrieImpl.fcdTrie);
- * while(fcdIter.next(result)) {
- *     // port the implementation of _enumPropertyStartsRange
- * }
- * 
- * - *

3) UTrieEnumRange is the only implemented callback then just implement - * the while loop, when utrie_enum is called - *

{@code
- * // utrie_enum(&fcdTrie, NULL, _enumPropertyStartsRange, set);
- * TrieIterator fcdIter  = new TrieIterator(fcdTrieImpl.fcdTrie);
- * while(fcdIter.next(result)){
- *     set.add(result.start);
- * }
- * }
- * - * @author synwee - * @see com.ibm.icu.impl.Trie - * @see com.ibm.icu.lang.UCharacterTypeIterator - * @since release 2.1, Jan 17 2002 - */ -public class TrieIterator implements RangeValueIterator -{ - - // public constructor --------------------------------------------- - - /** - * TrieEnumeration constructor - * @param trie to be used - * @exception IllegalArgumentException throw when argument is null. - */ - public TrieIterator(Trie trie) - { - if (trie == null) { - throw new IllegalArgumentException( - "Argument trie cannot be null"); - } - m_trie_ = trie; - // synwee: check that extract belongs to the child class - m_initialValue_ = extract(m_trie_.getInitialValue()); - reset(); - } - - // public methods ------------------------------------------------- - - /** - *

Returns true if we are not at the end of the iteration, false - * otherwise.

- *

The next set of codepoints with the same value type will be - * calculated during this call and returned in the arguement element.

- * @param element return result - * @return true if we are not at the end of the iteration, false otherwise. - * @exception NoSuchElementException - if no more elements exist. - * @see com.ibm.icu.util.RangeValueIterator.Element - */ - public final boolean next(Element element) - { - if (m_nextCodepoint_ > UCharacter.MAX_VALUE) { - return false; - } - if (m_nextCodepoint_ < UCharacter.SUPPLEMENTARY_MIN_VALUE && - calculateNextBMPElement(element)) { - return true; - } - calculateNextSupplementaryElement(element); - return true; - } - - /** - * Resets the iterator to the beginning of the iteration - */ - public final void reset() - { - m_currentCodepoint_ = 0; - m_nextCodepoint_ = 0; - m_nextIndex_ = 0; - m_nextBlock_ = m_trie_.m_index_[0] << Trie.INDEX_STAGE_2_SHIFT_; - if (m_nextBlock_ == 0) { - m_nextValue_ = m_initialValue_; - } - else { - m_nextValue_ = extract(m_trie_.getValue(m_nextBlock_)); - } - m_nextBlockIndex_ = 0; - m_nextTrailIndexOffset_ = TRAIL_SURROGATE_INDEX_BLOCK_LENGTH_; - } - - // protected methods ---------------------------------------------- - - /** - * Called by next() to extracts a 32 bit value from a trie value - * used for comparison. - * This method is to be overwritten if special manipulation is to be done - * to retrieve a relevant comparison. - * The default function is to return the value as it is. - * @param value a value from the trie - * @return extracted value - */ - protected int extract(int value) - { - return value; - } - - // private methods ------------------------------------------------ - - /** - * Set the result values - * @param element return result object - * @param start codepoint of range - * @param limit (end + 1) codepoint of range - * @param value common value of range - */ - private final void setResult(Element element, int start, int limit, - int value) - { - element.start = start; - element.limit = limit; - element.value = value; - } - - /** - * Finding the next element. - * This method is called just before returning the result of - * next(). - * We always store the next element before it is requested. - * In the case that we have to continue calculations into the - * supplementary planes, a false will be returned. - * @param element return result object - * @return true if the next range is found, false if we have to proceed to - * the supplementary range. - */ - private final boolean calculateNextBMPElement(Element element) - { - int currentBlock = m_nextBlock_; - int currentValue = m_nextValue_; - m_currentCodepoint_ = m_nextCodepoint_; - m_nextCodepoint_ ++; - m_nextBlockIndex_ ++; - if (!checkBlockDetail(currentValue)) { - setResult(element, m_currentCodepoint_, m_nextCodepoint_, - currentValue); - return true; - } - // synwee check that next block index == 0 here - // enumerate BMP - the main loop enumerates data blocks - while (m_nextCodepoint_ < UCharacter.SUPPLEMENTARY_MIN_VALUE) { - m_nextIndex_ ++; - // because of the way the character is split to form the index - // the lead surrogate and trail surrogate can not be in the - // mid of a block - if (m_nextCodepoint_ == LEAD_SURROGATE_MIN_VALUE_) { - // skip lead surrogate code units, - // go to lead surrogate codepoints - m_nextIndex_ = BMP_INDEX_LENGTH_; - } - else if (m_nextCodepoint_ == TRAIL_SURROGATE_MIN_VALUE_) { - // go back to regular BMP code points - m_nextIndex_ = m_nextCodepoint_ >> Trie.INDEX_STAGE_1_SHIFT_; - } - - m_nextBlockIndex_ = 0; - if (!checkBlock(currentBlock, currentValue)) { - setResult(element, m_currentCodepoint_, m_nextCodepoint_, - currentValue); - return true; - } - } - m_nextCodepoint_ --; // step one back since this value has not been - m_nextBlockIndex_ --; // retrieved yet. - return false; - } - - /** - * Finds the next supplementary element. - * For each entry in the trie, the value to be delivered is passed through - * extract(). - * We always store the next element before it is requested. - * Called after calculateNextBMP() completes its round of BMP characters. - * There is a slight difference in the usage of m_currentCodepoint_ - * here as compared to calculateNextBMP(). Though both represents the - * lower bound of the next element, in calculateNextBMP() it gets set - * at the start of any loop, where-else, in calculateNextSupplementary() - * since m_currentCodepoint_ already contains the lower bound of the - * next element (passed down from calculateNextBMP()), we keep it till - * the end before resetting it to the new value. - * Note, if there are no more iterations, it will never get to here. - * Blocked out by next(). - * @param element return result object - */ - private final void calculateNextSupplementaryElement(Element element) - { - int currentValue = m_nextValue_; - int currentBlock = m_nextBlock_; - m_nextCodepoint_ ++; - m_nextBlockIndex_ ++; - - if (UTF16.getTrailSurrogate(m_nextCodepoint_) - != UTF16.TRAIL_SURROGATE_MIN_VALUE) { - // this piece is only called when we are in the middle of a lead - // surrogate block - if (!checkNullNextTrailIndex() && !checkBlockDetail(currentValue)) { - setResult(element, m_currentCodepoint_, m_nextCodepoint_, - currentValue); - m_currentCodepoint_ = m_nextCodepoint_; - return; - } - // we have cleared one block - m_nextIndex_ ++; - m_nextTrailIndexOffset_ ++; - if (!checkTrailBlock(currentBlock, currentValue)) { - setResult(element, m_currentCodepoint_, m_nextCodepoint_, - currentValue); - m_currentCodepoint_ = m_nextCodepoint_; - return; - } - } - int nextLead = UTF16.getLeadSurrogate(m_nextCodepoint_); - // enumerate supplementary code points - while (nextLead < TRAIL_SURROGATE_MIN_VALUE_) { - // lead surrogate access - int leadBlock = - m_trie_.m_index_[nextLead >> Trie.INDEX_STAGE_1_SHIFT_] << - Trie.INDEX_STAGE_2_SHIFT_; - if (leadBlock == m_trie_.m_dataOffset_) { - // no entries for a whole block of lead surrogates - if (currentValue != m_initialValue_) { - m_nextValue_ = m_initialValue_; - m_nextBlock_ = 0; - m_nextBlockIndex_ = 0; - setResult(element, m_currentCodepoint_, m_nextCodepoint_, - currentValue); - m_currentCodepoint_ = m_nextCodepoint_; - return; - } - - nextLead += DATA_BLOCK_LENGTH_; - // number of total affected supplementary codepoints in one - // block - // this is not a simple addition of - // DATA_BLOCK_SUPPLEMENTARY_LENGTH since we need to consider - // that we might have moved some of the codepoints - m_nextCodepoint_ = UCharacterProperty.getRawSupplementary( - (char)nextLead, - (char)UTF16.TRAIL_SURROGATE_MIN_VALUE); - continue; - } - if (m_trie_.m_dataManipulate_ == null) { - throw new NullPointerException( - "The field DataManipulate in this Trie is null"); - } - // enumerate trail surrogates for this lead surrogate - m_nextIndex_ = m_trie_.m_dataManipulate_.getFoldingOffset( - m_trie_.getValue(leadBlock + - (nextLead & Trie.INDEX_STAGE_3_MASK_))); - if (m_nextIndex_ <= 0) { - // no data for this lead surrogate - if (currentValue != m_initialValue_) { - m_nextValue_ = m_initialValue_; - m_nextBlock_ = 0; - m_nextBlockIndex_ = 0; - setResult(element, m_currentCodepoint_, m_nextCodepoint_, - currentValue); - m_currentCodepoint_ = m_nextCodepoint_; - return; - } - m_nextCodepoint_ += TRAIL_SURROGATE_COUNT_; - } else { - m_nextTrailIndexOffset_ = 0; - if (!checkTrailBlock(currentBlock, currentValue)) { - setResult(element, m_currentCodepoint_, m_nextCodepoint_, - currentValue); - m_currentCodepoint_ = m_nextCodepoint_; - return; - } - } - nextLead ++; - } - - // deliver last range - setResult(element, m_currentCodepoint_, UCharacter.MAX_VALUE + 1, - currentValue); - } - - /** - * Internal block value calculations - * Performs calculations on a data block to find codepoints in m_nextBlock_ - * after the index m_nextBlockIndex_ that has the same value. - * Note m_*_ variables at this point is the next codepoint whose value - * has not been calculated. - * But when returned with false, it will be the last codepoint whose - * value has been calculated. - * @param currentValue the value which other codepoints are tested against - * @return true if the whole block has the same value as currentValue or if - * the whole block has been calculated, false otherwise. - */ - private final boolean checkBlockDetail(int currentValue) - { - while (m_nextBlockIndex_ < DATA_BLOCK_LENGTH_) { - m_nextValue_ = extract(m_trie_.getValue(m_nextBlock_ + - m_nextBlockIndex_)); - if (m_nextValue_ != currentValue) { - return false; - } - ++ m_nextBlockIndex_; - ++ m_nextCodepoint_; - } - return true; - } - - /** - * Internal block value calculations - * Performs calculations on a data block to find codepoints in m_nextBlock_ - * that has the same value. - * Will call checkBlockDetail() if highlevel check fails. - * Note m_*_ variables at this point is the next codepoint whose value - * has not been calculated. - * @param currentBlock the initial block containing all currentValue - * @param currentValue the value which other codepoints are tested against - * @return true if the whole block has the same value as currentValue or if - * the whole block has been calculated, false otherwise. - */ - private final boolean checkBlock(int currentBlock, int currentValue) - { - m_nextBlock_ = m_trie_.m_index_[m_nextIndex_] << - Trie.INDEX_STAGE_2_SHIFT_; - if (m_nextBlock_ == currentBlock && - (m_nextCodepoint_ - m_currentCodepoint_) >= DATA_BLOCK_LENGTH_) { - // the block is the same as the previous one, filled with - // currentValue - m_nextCodepoint_ += DATA_BLOCK_LENGTH_; - } - else if (m_nextBlock_ == 0) { - // this is the all-initial-value block - if (currentValue != m_initialValue_) { - m_nextValue_ = m_initialValue_; - m_nextBlockIndex_ = 0; - return false; - } - m_nextCodepoint_ += DATA_BLOCK_LENGTH_; - } - else { - if (!checkBlockDetail(currentValue)) { - return false; - } - } - return true; - } - - /** - * Internal block value calculations - * Performs calculations on multiple data blocks for a set of trail - * surrogates to find codepoints in m_nextBlock_ that has the same value. - * Will call checkBlock() for internal block checks. - * Note m_*_ variables at this point is the next codepoint whose value - * has not been calculated. - * @param currentBlock the initial block containing all currentValue - * @param currentValue the value which other codepoints are tested against - * @return true if the whole block has the same value as currentValue or if - * the whole block has been calculated, false otherwise. - */ - private final boolean checkTrailBlock(int currentBlock, - int currentValue) - { - // enumerate code points for this lead surrogate - while (m_nextTrailIndexOffset_ < TRAIL_SURROGATE_INDEX_BLOCK_LENGTH_) - { - // if we ever reach here, we are at the start of a new block - m_nextBlockIndex_ = 0; - // copy of most of the body of the BMP loop - if (!checkBlock(currentBlock, currentValue)) { - return false; - } - m_nextTrailIndexOffset_ ++; - m_nextIndex_ ++; - } - return true; - } - - /** - * Checks if we are beginning at the start of a initial block. - * If we are then the rest of the codepoints in this initial block - * has the same values. - * We increment m_nextCodepoint_ and relevant data members if so. - * This is used only in for the supplementary codepoints because - * the offset to the trail indexes could be 0. - * @return true if we are at the start of a initial block. - */ - private final boolean checkNullNextTrailIndex() - { - if (m_nextIndex_ <= 0) { - m_nextCodepoint_ += TRAIL_SURROGATE_COUNT_ - 1; - int nextLead = UTF16.getLeadSurrogate(m_nextCodepoint_); - int leadBlock = - m_trie_.m_index_[nextLead >> Trie.INDEX_STAGE_1_SHIFT_] << - Trie.INDEX_STAGE_2_SHIFT_; - if (m_trie_.m_dataManipulate_ == null) { - throw new NullPointerException( - "The field DataManipulate in this Trie is null"); - } - m_nextIndex_ = m_trie_.m_dataManipulate_.getFoldingOffset( - m_trie_.getValue(leadBlock + - (nextLead & Trie.INDEX_STAGE_3_MASK_))); - m_nextIndex_ --; - m_nextBlockIndex_ = DATA_BLOCK_LENGTH_; - return true; - } - return false; - } - - // private data members -------------------------------------------- - - /** - * Size of the stage 1 BMP indexes - */ - private static final int BMP_INDEX_LENGTH_ = - 0x10000 >> Trie.INDEX_STAGE_1_SHIFT_; - /** - * Lead surrogate minimum value - */ - private static final int LEAD_SURROGATE_MIN_VALUE_ = 0xD800; - /** - * Trail surrogate minimum value - */ - private static final int TRAIL_SURROGATE_MIN_VALUE_ = 0xDC00; - /** - * Number of trail surrogate - */ - private static final int TRAIL_SURROGATE_COUNT_ = 0x400; - /** - * Number of stage 1 indexes for supplementary calculations that maps to - * each lead surrogate character. - * See second pass into getRawOffset for the trail surrogate character. - * 10 for significant number of bits for trail surrogates, 5 for what we - * discard during shifting. - */ - private static final int TRAIL_SURROGATE_INDEX_BLOCK_LENGTH_ = - 1 << (10 - Trie.INDEX_STAGE_1_SHIFT_); - /** - * Number of data values in a stage 2 (data array) block. - */ - private static final int DATA_BLOCK_LENGTH_ = - 1 << Trie.INDEX_STAGE_1_SHIFT_; - /** - * Trie instance - */ - private Trie m_trie_; - /** - * Initial value for trie values - */ - private int m_initialValue_; - /** - * Next element results and data. - */ - private int m_currentCodepoint_; - private int m_nextCodepoint_; - private int m_nextValue_; - private int m_nextIndex_; - private int m_nextBlock_; - private int m_nextBlockIndex_; - private int m_nextTrailIndexOffset_; -}