1 /*
   2  * Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 /*
  27  ******************************************************************************
  28  * Copyright (C) 1996-2014, International Business Machines Corporation and
  29  * others. All Rights Reserved.
  30  ******************************************************************************
  31  */
  32 
  33 package sun.text.normalizer;
  34 
  35 import java.io.DataInputStream;
  36 import java.io.InputStream;
  37 import java.io.IOException;
  38 
  39 /**
  40  * Trie implementation which stores data in char, 16 bits.
  41  * @author synwee
  42  * @see com.ibm.icu.impl.Trie
  43  * @since release 2.1, Jan 01 2002
  44  */
  45 
  46  // note that i need to handle the block calculations later, since chartrie
  47  // in icu4c uses the same index array.
  48 public class CharTrie extends Trie
  49 {
  50     // public constructors ---------------------------------------------
  51 
  52     /**
  53     * <p>Creates a new Trie with the settings for the trie data.</p>
  54     * <p>Unserialize the 32-bit-aligned input stream and use the data for the
  55     * trie.</p>
  56     * @param inputStream file input stream to a ICU data file, containing
  57     *                    the trie
  58     * @param dataManipulate object which provides methods to parse the char
  59     *                        data
  60     * @throws IOException thrown when data reading fails
  61     * @draft 2.1
  62     */
  63     public CharTrie(InputStream inputStream,
  64                     DataManipulate dataManipulate) throws IOException
  65     {
  66         super(inputStream, dataManipulate);
  67 
  68         if (!isCharTrie()) {
  69             throw new IllegalArgumentException(
  70                                "Data given does not belong to a char trie.");
  71         }
  72     }
  73 
  74     // public methods --------------------------------------------------
  75 
  76     /**
  77      * Gets the value associated with the codepoint.
  78      * If no value is associated with the codepoint, a default value will be
  79      * returned.
  80      * @param ch codepoint
  81      * @return offset to data
  82      */
  83     public final char getCodePointValue(int ch)
  84     {
  85         int offset;
  86 
  87         // fastpath for U+0000..U+D7FF
  88         if(0 <= ch && ch < UTF16.LEAD_SURROGATE_MIN_VALUE) {
  89             // copy of getRawOffset()
  90             offset = (m_index_[ch >> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_)
  91                     + (ch & INDEX_STAGE_3_MASK_);
  92             return m_data_[offset];
  93         }
  94 
  95         // handle U+D800..U+10FFFF
  96         offset = getCodePointOffset(ch);
  97 
  98         // return -1 if there is an error, in this case we return the default
  99         // value: m_initialValue_
 100         return (offset >= 0) ? m_data_[offset] : m_initialValue_;
 101     }
 102 
 103     /**
 104     * Gets the value to the data which this lead surrogate character points
 105     * to.
 106     * Returned data may contain folding offset information for the next
 107     * trailing surrogate character.
 108     * This method does not guarantee correct results for trail surrogates.
 109     * @param ch lead surrogate character
 110     * @return data value
 111     */
 112     public final char getLeadValue(char ch)
 113     {
 114        return m_data_[getLeadOffset(ch)];
 115     }
 116 
 117     // protected methods -----------------------------------------------
 118 
 119     /**
 120     * <p>Parses the input stream and stores its trie content into a index and
 121     * data array</p>
 122     * @param inputStream data input stream containing trie data
 123     * @exception IOException thrown when data reading fails
 124     */
 125     protected final void unserialize(InputStream inputStream)
 126                                                 throws IOException
 127     {
 128         DataInputStream input = new DataInputStream(inputStream);
 129         int indexDataLength = m_dataOffset_ + m_dataLength_;
 130         m_index_ = new char[indexDataLength];
 131         for (int i = 0; i < indexDataLength; i ++) {
 132             m_index_[i] = input.readChar();
 133         }
 134         m_data_           = m_index_;
 135         m_initialValue_   = m_data_[m_dataOffset_];
 136     }
 137 
 138     /**
 139     * Gets the offset to the data which the surrogate pair points to.
 140     * @param lead lead surrogate
 141     * @param trail trailing surrogate
 142     * @return offset to data
 143     * @draft 2.1
 144     */
 145     protected final int getSurrogateOffset(char lead, char trail)
 146     {
 147         if (m_dataManipulate_ == null) {
 148             throw new NullPointerException(
 149                              "The field DataManipulate in this Trie is null");
 150         }
 151 
 152         // get fold position for the next trail surrogate
 153         int offset = m_dataManipulate_.getFoldingOffset(getLeadValue(lead));
 154 
 155         // get the real data from the folded lead/trail units
 156         if (offset > 0) {
 157             return getRawOffset(offset, (char)(trail & SURROGATE_MASK_));
 158         }
 159 
 160         // return -1 if there is an error, in this case we return the default
 161         // value: m_initialValue_
 162         return -1;
 163     }
 164 
 165     // private data members --------------------------------------------
 166 
 167     /**
 168      * Default value
 169      */
 170     private char m_initialValue_;
 171     /**
 172      * Array of char data
 173      */
 174     private char m_data_[];
 175 }