1 /*
   2  * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 /*
  27  ******************************************************************************
  28  * Copyright (C) 1996-2014, International Business Machines Corporation and
  29  * others. All Rights Reserved.
  30  ******************************************************************************
  31  */
  32 
  33 package jdk.internal.icu.impl;
  34 
  35 import jdk.internal.icu.text.UTF16;
  36 
  37 import java.io.DataInputStream;
  38 import java.io.InputStream;
  39 import java.io.IOException;
  40 
  41 /**
  42  * Trie implementation which stores data in char, 16 bits.
  43  * @author synwee
  44  * @see com.ibm.icu.impl.Trie
  45  * @since release 2.1, Jan 01 2002
  46  */
  47 
  48  // note that i need to handle the block calculations later, since chartrie
  49  // in icu4c uses the same index array.
  50 public class CharTrie extends Trie
  51 {
  52     // public constructors ---------------------------------------------
  53 
  54     /**
  55     * <p>Creates a new Trie with the settings for the trie data.</p>
  56     * <p>Unserialize the 32-bit-aligned input stream and use the data for the
  57     * trie.</p>
  58     * @param inputStream file input stream to a ICU data file, containing
  59     *                    the trie
  60     * @param dataManipulate object which provides methods to parse the char
  61     *                        data
  62     * @throws IOException thrown when data reading fails
  63     * @draft 2.1
  64     */
  65     public CharTrie(InputStream inputStream,
  66                     DataManipulate dataManipulate) throws IOException
  67     {
  68         super(inputStream, dataManipulate);
  69 
  70         if (!isCharTrie()) {
  71             throw new IllegalArgumentException(
  72                                "Data given does not belong to a char trie.");
  73         }
  74     }
  75 
  76     // public methods --------------------------------------------------
  77 
  78     /**
  79      * Gets the value associated with the codepoint.
  80      * If no value is associated with the codepoint, a default value will be
  81      * returned.
  82      * @param ch codepoint
  83      * @return offset to data
  84      */
  85     public final char getCodePointValue(int ch)
  86     {
  87         int offset;
  88 
  89         // fastpath for U+0000..U+D7FF
  90         if(0 <= ch && ch < UTF16.LEAD_SURROGATE_MIN_VALUE) {
  91             // copy of getRawOffset()
  92             offset = (m_index_[ch >> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_)
  93                     + (ch & INDEX_STAGE_3_MASK_);
  94             return m_data_[offset];
  95         }
  96 
  97         // handle U+D800..U+10FFFF
  98         offset = getCodePointOffset(ch);
  99 
 100         // return -1 if there is an error, in this case we return the default
 101         // value: m_initialValue_
 102         return (offset >= 0) ? m_data_[offset] : m_initialValue_;
 103     }
 104 
 105     /**
 106     * Gets the value to the data which this lead surrogate character points
 107     * to.
 108     * Returned data may contain folding offset information for the next
 109     * trailing surrogate character.
 110     * This method does not guarantee correct results for trail surrogates.
 111     * @param ch lead surrogate character
 112     * @return data value
 113     */
 114     public final char getLeadValue(char ch)
 115     {
 116        return m_data_[getLeadOffset(ch)];
 117     }
 118 
 119     // protected methods -----------------------------------------------
 120 
 121     /**
 122     * <p>Parses the input stream and stores its trie content into a index and
 123     * data array</p>
 124     * @param inputStream data input stream containing trie data
 125     * @exception IOException thrown when data reading fails
 126     */
 127     protected final void unserialize(InputStream inputStream)
 128                                                 throws IOException
 129     {
 130         DataInputStream input = new DataInputStream(inputStream);
 131         int indexDataLength = m_dataOffset_ + m_dataLength_;
 132         m_index_ = new char[indexDataLength];
 133         for (int i = 0; i < indexDataLength; i ++) {
 134             m_index_[i] = input.readChar();
 135         }
 136         m_data_           = m_index_;
 137         m_initialValue_   = m_data_[m_dataOffset_];
 138     }
 139 
 140     /**
 141     * Gets the offset to the data which the surrogate pair points to.
 142     * @param lead lead surrogate
 143     * @param trail trailing surrogate
 144     * @return offset to data
 145     * @draft 2.1
 146     */
 147     protected final int getSurrogateOffset(char lead, char trail)
 148     {
 149         if (m_dataManipulate_ == null) {
 150             throw new NullPointerException(
 151                              "The field DataManipulate in this Trie is null");
 152         }
 153 
 154         // get fold position for the next trail surrogate
 155         int offset = m_dataManipulate_.getFoldingOffset(getLeadValue(lead));
 156 
 157         // get the real data from the folded lead/trail units
 158         if (offset > 0) {
 159             return getRawOffset(offset, (char)(trail & SURROGATE_MASK_));
 160         }
 161 
 162         // return -1 if there is an error, in this case we return the default
 163         // value: m_initialValue_
 164         return -1;
 165     }
 166 
 167     // private data members --------------------------------------------
 168 
 169     /**
 170      * Default value
 171      */
 172     private char m_initialValue_;
 173     /**
 174      * Array of char data
 175      */
 176     private char m_data_[];
 177 }