< prev index next >

jdk/src/java.base/share/classes/sun/text/normalizer/CharTrie.java

Print this page


   1 /*
   2  * Copyright (c) 2005, 2009, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */

  25 /*
  26  *******************************************************************************
  27  * (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved         *
  28  *                                                                             *
  29  * The original version of this source code and documentation is copyrighted   *
  30  * and owned by IBM, These materials are provided under terms of a License     *
  31  * Agreement between IBM and Sun. This technology is protected by multiple     *
  32  * US and International patents. This notice and attribution to IBM may not    *
  33  * to removed.                                                                 *
  34  *******************************************************************************
  35  */
  36 
  37 package sun.text.normalizer;
  38 
  39 import java.io.InputStream;
  40 import java.io.DataInputStream;

  41 import java.io.IOException;
  42 
  43 /**
  44  * Trie implementation which stores data in char, 16 bits.
  45  * @author synwee
  46  * @see com.ibm.icu.impl.Trie
  47  * @since release 2.1, Jan 01 2002
  48  */
  49 
  50  // note that i need to handle the block calculations later, since chartrie
  51  // in icu4c uses the same index array.
  52 public class CharTrie extends Trie
  53 {
  54     // public constructors ---------------------------------------------
  55 
  56     /**
  57     * <p>Creates a new Trie with the settings for the trie data.</p>
  58     * <p>Unserialize the 32-bit-aligned input stream and use the data for the
  59     * trie.</p>
  60     * @param inputStream file input stream to a ICU data file, containing
  61     *                    the trie
  62     * @param dataManipulate object which provides methods to parse the char
  63     *                        data
  64     * @throws IOException thrown when data reading fails
  65     * @draft 2.1
  66     */
  67     public CharTrie(InputStream inputStream,
  68                     DataManipulate dataManipulate) throws IOException
  69     {
  70         super(inputStream, dataManipulate);
  71 
  72         if (!isCharTrie()) {
  73             throw new IllegalArgumentException(
  74                                "Data given does not belong to a char trie.");
  75         }
  76         m_friendAgent_ = new FriendAgent();
  77     }
  78 
  79     /**
  80      * Make a dummy CharTrie.
  81      * A dummy trie is an empty runtime trie, used when a real data trie cannot
  82      * be loaded.
  83      *
  84      * The trie always returns the initialValue,
  85      * or the leadUnitValue for lead surrogate code points.
  86      * The Latin-1 part is always set up to be linear.
  87      *
  88      * @param initialValue the initial value that is set for all code points
  89      * @param leadUnitValue the value for lead surrogate code _units_ that do not
  90      *                      have associated supplementary data
  91      * @param dataManipulate object which provides methods to parse the char data
  92      */
  93     public CharTrie(int initialValue, int leadUnitValue, DataManipulate dataManipulate) {
  94         super(new char[BMP_INDEX_LENGTH+SURROGATE_BLOCK_COUNT], HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_, dataManipulate);
  95 
  96         int dataLength, latin1Length, i, limit;
  97         char block;
  98 
  99         /* calculate the actual size of the dummy trie data */
 100 
 101         /* max(Latin-1, block 0) */
 102         dataLength=latin1Length= INDEX_STAGE_1_SHIFT_<=8 ? 256 : DATA_BLOCK_LENGTH;
 103         if(leadUnitValue!=initialValue) {
 104             dataLength+=DATA_BLOCK_LENGTH;
 105         }
 106         m_data_=new char[dataLength];
 107         m_dataLength_=dataLength;
 108 
 109         m_initialValue_=(char)initialValue;
 110 
 111         /* fill the index and data arrays */
 112 
 113         /* indexes are preset to 0 (block 0) */
 114 
 115         /* Latin-1 data */
 116         for(i=0; i<latin1Length; ++i) {
 117             m_data_[i]=(char)initialValue;
 118         }
 119 
 120         if(leadUnitValue!=initialValue) {
 121             /* indexes for lead surrogate code units to the block after Latin-1 */
 122             block=(char)(latin1Length>>INDEX_STAGE_2_SHIFT_);
 123             i=0xd800>>INDEX_STAGE_1_SHIFT_;
 124             limit=0xdc00>>INDEX_STAGE_1_SHIFT_;
 125             for(; i<limit; ++i) {
 126                 m_index_[i]=block;
 127             }
 128 
 129             /* data for lead surrogate code units */
 130             limit=latin1Length+DATA_BLOCK_LENGTH;
 131             for(i=latin1Length; i<limit; ++i) {
 132                 m_data_[i]=(char)leadUnitValue;
 133             }
 134         }
 135 
 136         m_friendAgent_ = new FriendAgent();
 137     }
 138 
 139     /**
 140      * Java friend implementation
 141      */
 142     public class FriendAgent
 143     {
 144         /**
 145          * Gives out the index array of the trie
 146          * @return index array of trie
 147          */
 148         public char[] getPrivateIndex()
 149         {
 150             return m_index_;
 151         }
 152         /**
 153          * Gives out the data array of the trie
 154          * @return data array of trie
 155          */
 156         public char[] getPrivateData()
 157         {
 158             return m_data_;
 159         }
 160         /**
 161          * Gives out the data offset in the trie
 162          * @return data offset in the trie
 163          */
 164         public int getPrivateInitialValue()
 165         {
 166             return m_initialValue_;
 167         }
 168     }
 169 
 170     // public methods --------------------------------------------------
 171 
 172     /**
 173      * Java friend implementation
 174      * To store the index and data array into the argument.
 175      * @param friend java friend UCharacterProperty object to store the array
 176      */
 177     public void putIndexData(UCharacterProperty friend)
 178     {
 179         friend.setIndexData(m_friendAgent_);
 180     }
 181 
 182     /**
 183     * Gets the value associated with the codepoint.
 184     * If no value is associated with the codepoint, a default value will be
 185     * returned.
 186     * @param ch codepoint
 187     * @return offset to data
 188     * @draft 2.1
 189     */
 190     public final char getCodePointValue(int ch)
 191     {
 192         int offset;
 193 
 194         // fastpath for U+0000..U+D7FF
 195         if(0 <= ch && ch < UTF16.LEAD_SURROGATE_MIN_VALUE) {
 196             // copy of getRawOffset()
 197             offset = (m_index_[ch >> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_)
 198                     + (ch & INDEX_STAGE_3_MASK_);
 199             return m_data_[offset];
 200         }
 201 
 202         // handle U+D800..U+10FFFF
 203         offset = getCodePointOffset(ch);
 204 
 205         // return -1 if there is an error, in this case we return the default
 206         // value: m_initialValue_
 207         return (offset >= 0) ? m_data_[offset] : m_initialValue_;
 208     }
 209 
 210     /**
 211     * Gets the value to the data which this lead surrogate character points
 212     * to.
 213     * Returned data may contain folding offset information for the next
 214     * trailing surrogate character.
 215     * This method does not guarantee correct results for trail surrogates.
 216     * @param ch lead surrogate character
 217     * @return data value
 218     * @draft 2.1
 219     */
 220     public final char getLeadValue(char ch)
 221     {
 222        return m_data_[getLeadOffset(ch)];
 223     }
 224 
 225     /**
 226     * Get the value associated with a pair of surrogates.
 227     * @param lead a lead surrogate
 228     * @param trail a trail surrogate
 229     * @draft 2.1
 230     */
 231     public final char getSurrogateValue(char lead, char trail)
 232     {
 233         int offset = getSurrogateOffset(lead, trail);
 234         if (offset > 0) {
 235             return m_data_[offset];
 236         }
 237         return m_initialValue_;
 238     }
 239 
 240     /**
 241     * <p>Get a value from a folding offset (from the value of a lead surrogate)
 242     * and a trail surrogate.</p>
 243     * <p>If the
 244     * @param leadvalue value associated with the lead surrogate which contains
 245     *        the folding offset
 246     * @param trail surrogate
 247     * @return trie data value associated with the trail character
 248     * @draft 2.1
 249     */
 250     public final char getTrailValue(int leadvalue, char trail)
 251     {
 252         if (m_dataManipulate_ == null) {
 253             throw new NullPointerException(
 254                              "The field DataManipulate in this Trie is null");
 255         }
 256         int offset = m_dataManipulate_.getFoldingOffset(leadvalue);
 257         if (offset > 0) {
 258             return m_data_[getRawOffset(offset,
 259                                         (char)(trail & SURROGATE_MASK_))];
 260         }
 261         return m_initialValue_;
 262     }
 263 
 264     // protected methods -----------------------------------------------
 265 
 266     /**
 267     * <p>Parses the input stream and stores its trie content into a index and
 268     * data array</p>
 269     * @param inputStream data input stream containing trie data
 270     * @exception IOException thrown when data reading fails
 271     */
 272     protected final void unserialize(InputStream inputStream)
 273                                                 throws IOException
 274     {
 275         DataInputStream input = new DataInputStream(inputStream);
 276         int indexDataLength = m_dataOffset_ + m_dataLength_;
 277         m_index_ = new char[indexDataLength];
 278         for (int i = 0; i < indexDataLength; i ++) {
 279             m_index_[i] = input.readChar();
 280         }
 281         m_data_           = m_index_;
 282         m_initialValue_   = m_data_[m_dataOffset_];
 283     }


 292     protected final int getSurrogateOffset(char lead, char trail)
 293     {
 294         if (m_dataManipulate_ == null) {
 295             throw new NullPointerException(
 296                              "The field DataManipulate in this Trie is null");
 297         }
 298 
 299         // get fold position for the next trail surrogate
 300         int offset = m_dataManipulate_.getFoldingOffset(getLeadValue(lead));
 301 
 302         // get the real data from the folded lead/trail units
 303         if (offset > 0) {
 304             return getRawOffset(offset, (char)(trail & SURROGATE_MASK_));
 305         }
 306 
 307         // return -1 if there is an error, in this case we return the default
 308         // value: m_initialValue_
 309         return -1;
 310     }
 311 
 312     /**
 313     * Gets the value at the argument index.
 314     * For use internally in TrieIterator.
 315     * @param index value at index will be retrieved
 316     * @return 32 bit value
 317     * @see com.ibm.icu.impl.TrieIterator
 318     * @draft 2.1
 319     */
 320     protected final int getValue(int index)
 321     {
 322         return m_data_[index];
 323     }
 324 
 325     /**
 326     * Gets the default initial value
 327     * @return 32 bit value
 328     * @draft 2.1
 329     */
 330     protected final int getInitialValue()
 331     {
 332         return m_initialValue_;
 333     }
 334 
 335     // private data members --------------------------------------------
 336 
 337     /**
 338     * Default value
 339     */
 340     private char m_initialValue_;
 341     /**
 342     * Array of char data
 343     */
 344     private char m_data_[];
 345     /**
 346      * Agent for friends
 347      */
 348     private FriendAgent m_friendAgent_;
 349 }
   1 /*
   2  * Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 /*
  27  ******************************************************************************
  28  * Copyright (C) 1996-2014, International Business Machines Corporation and
  29  * others. All Rights Reserved.
  30  ******************************************************************************





  31  */
  32 
  33 package sun.text.normalizer;
  34 

  35 import java.io.DataInputStream;
  36 import java.io.InputStream;
  37 import java.io.IOException;
  38 
  39 /**
  40  * Trie implementation which stores data in char, 16 bits.
  41  * @author synwee
  42  * @see com.ibm.icu.impl.Trie
  43  * @since release 2.1, Jan 01 2002
  44  */
  45 
  46  // note that i need to handle the block calculations later, since chartrie
  47  // in icu4c uses the same index array.
  48 public class CharTrie extends Trie
  49 {
  50     // public constructors ---------------------------------------------
  51 
  52     /**
  53     * <p>Creates a new Trie with the settings for the trie data.</p>
  54     * <p>Unserialize the 32-bit-aligned input stream and use the data for the
  55     * trie.</p>
  56     * @param inputStream file input stream to a ICU data file, containing
  57     *                    the trie
  58     * @param dataManipulate object which provides methods to parse the char
  59     *                        data
  60     * @throws IOException thrown when data reading fails
  61     * @draft 2.1
  62     */
  63     public CharTrie(InputStream inputStream,
  64                     DataManipulate dataManipulate) throws IOException
  65     {
  66         super(inputStream, dataManipulate);
  67 
  68         if (!isCharTrie()) {
  69             throw new IllegalArgumentException(
  70                                "Data given does not belong to a char trie.");
  71         }




























































































  72     }
  73 
  74     // public methods --------------------------------------------------
  75 
  76     /**










  77      * Gets the value associated with the codepoint.
  78      * If no value is associated with the codepoint, a default value will be
  79      * returned.
  80      * @param ch codepoint
  81      * @return offset to data

  82      */
  83     public final char getCodePointValue(int ch)
  84     {
  85         int offset;
  86 
  87         // fastpath for U+0000..U+D7FF
  88         if(0 <= ch && ch < UTF16.LEAD_SURROGATE_MIN_VALUE) {
  89             // copy of getRawOffset()
  90             offset = (m_index_[ch >> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_)
  91                     + (ch & INDEX_STAGE_3_MASK_);
  92             return m_data_[offset];
  93         }
  94 
  95         // handle U+D800..U+10FFFF
  96         offset = getCodePointOffset(ch);
  97 
  98         // return -1 if there is an error, in this case we return the default
  99         // value: m_initialValue_
 100         return (offset >= 0) ? m_data_[offset] : m_initialValue_;
 101     }
 102 
 103     /**
 104     * Gets the value to the data which this lead surrogate character points
 105     * to.
 106     * Returned data may contain folding offset information for the next
 107     * trailing surrogate character.
 108     * This method does not guarantee correct results for trail surrogates.
 109     * @param ch lead surrogate character
 110     * @return data value

 111     */
 112     public final char getLeadValue(char ch)
 113     {
 114        return m_data_[getLeadOffset(ch)];
 115     }
 116 







































 117     // protected methods -----------------------------------------------
 118 
 119     /**
 120     * <p>Parses the input stream and stores its trie content into a index and
 121     * data array</p>
 122     * @param inputStream data input stream containing trie data
 123     * @exception IOException thrown when data reading fails
 124     */
 125     protected final void unserialize(InputStream inputStream)
 126                                                 throws IOException
 127     {
 128         DataInputStream input = new DataInputStream(inputStream);
 129         int indexDataLength = m_dataOffset_ + m_dataLength_;
 130         m_index_ = new char[indexDataLength];
 131         for (int i = 0; i < indexDataLength; i ++) {
 132             m_index_[i] = input.readChar();
 133         }
 134         m_data_           = m_index_;
 135         m_initialValue_   = m_data_[m_dataOffset_];
 136     }


 145     protected final int getSurrogateOffset(char lead, char trail)
 146     {
 147         if (m_dataManipulate_ == null) {
 148             throw new NullPointerException(
 149                              "The field DataManipulate in this Trie is null");
 150         }
 151 
 152         // get fold position for the next trail surrogate
 153         int offset = m_dataManipulate_.getFoldingOffset(getLeadValue(lead));
 154 
 155         // get the real data from the folded lead/trail units
 156         if (offset > 0) {
 157             return getRawOffset(offset, (char)(trail & SURROGATE_MASK_));
 158         }
 159 
 160         // return -1 if there is an error, in this case we return the default
 161         // value: m_initialValue_
 162         return -1;
 163     }
 164 























 165     // private data members --------------------------------------------
 166 
 167     /**
 168      * Default value
 169      */
 170     private char m_initialValue_;
 171     /**
 172      * Array of char data
 173      */
 174     private char m_data_[];




 175 }
< prev index next >