1 /* 2 * Copyright (c) 2005, 2009, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 /* 26 ******************************************************************************* 27 * (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved * 28 * * 29 * The original version of this source code and documentation is copyrighted * 30 * and owned by IBM, These materials are provided under terms of a License * 31 * Agreement between IBM and Sun. This technology is protected by multiple * 32 * US and International patents. This notice and attribution to IBM may not * 33 * to removed. * 34 ******************************************************************************* 35 */ 36 37 package sun.text.normalizer; 38 39 import java.io.InputStream; 40 import java.io.DataInputStream; 41 import java.io.IOException; 42 43 /** 44 * Trie implementation which stores data in char, 16 bits. 45 * @author synwee 46 * @see com.ibm.icu.impl.Trie 47 * @since release 2.1, Jan 01 2002 48 */ 49 50 // note that i need to handle the block calculations later, since chartrie 51 // in icu4c uses the same index array. 52 public class CharTrie extends Trie 53 { 54 // public constructors --------------------------------------------- 55 56 /** 57 * <p>Creates a new Trie with the settings for the trie data.</p> 58 * <p>Unserialize the 32-bit-aligned input stream and use the data for the 59 * trie.</p> 60 * @param inputStream file input stream to a ICU data file, containing 61 * the trie 62 * @param dataManipulate object which provides methods to parse the char 63 * data 64 * @throws IOException thrown when data reading fails 65 * @draft 2.1 66 */ 67 public CharTrie(InputStream inputStream, 68 DataManipulate dataManipulate) throws IOException 69 { 70 super(inputStream, dataManipulate); 71 72 if (!isCharTrie()) { 73 throw new IllegalArgumentException( 74 "Data given does not belong to a char trie."); 75 } 76 m_friendAgent_ = new FriendAgent(); 77 } 78 79 /** 80 * Make a dummy CharTrie. 81 * A dummy trie is an empty runtime trie, used when a real data trie cannot 82 * be loaded. 83 * 84 * The trie always returns the initialValue, 85 * or the leadUnitValue for lead surrogate code points. 86 * The Latin-1 part is always set up to be linear. 87 * 88 * @param initialValue the initial value that is set for all code points 89 * @param leadUnitValue the value for lead surrogate code _units_ that do not 90 * have associated supplementary data 91 * @param dataManipulate object which provides methods to parse the char data 92 */ 93 public CharTrie(int initialValue, int leadUnitValue, DataManipulate dataManipulate) { 94 super(new char[BMP_INDEX_LENGTH+SURROGATE_BLOCK_COUNT], HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_, dataManipulate); 95 96 int dataLength, latin1Length, i, limit; 97 char block; 98 99 /* calculate the actual size of the dummy trie data */ 100 101 /* max(Latin-1, block 0) */ 102 dataLength=latin1Length= INDEX_STAGE_1_SHIFT_<=8 ? 256 : DATA_BLOCK_LENGTH; 103 if(leadUnitValue!=initialValue) { 104 dataLength+=DATA_BLOCK_LENGTH; 105 } 106 m_data_=new char[dataLength]; 107 m_dataLength_=dataLength; 108 109 m_initialValue_=(char)initialValue; 110 111 /* fill the index and data arrays */ 112 113 /* indexes are preset to 0 (block 0) */ 114 115 /* Latin-1 data */ 116 for(i=0; i<latin1Length; ++i) { 117 m_data_[i]=(char)initialValue; 118 } 119 120 if(leadUnitValue!=initialValue) { 121 /* indexes for lead surrogate code units to the block after Latin-1 */ 122 block=(char)(latin1Length>>INDEX_STAGE_2_SHIFT_); 123 i=0xd800>>INDEX_STAGE_1_SHIFT_; 124 limit=0xdc00>>INDEX_STAGE_1_SHIFT_; 125 for(; i<limit; ++i) { 126 m_index_[i]=block; 127 } 128 129 /* data for lead surrogate code units */ 130 limit=latin1Length+DATA_BLOCK_LENGTH; 131 for(i=latin1Length; i<limit; ++i) { 132 m_data_[i]=(char)leadUnitValue; 133 } 134 } 135 136 m_friendAgent_ = new FriendAgent(); 137 } 138 139 /** 140 * Java friend implementation 141 */ 142 public class FriendAgent 143 { 144 /** 145 * Gives out the index array of the trie 146 * @return index array of trie 147 */ 148 public char[] getPrivateIndex() 149 { 150 return m_index_; 151 } 152 /** 153 * Gives out the data array of the trie 154 * @return data array of trie 155 */ 156 public char[] getPrivateData() 157 { 158 return m_data_; 159 } 160 /** 161 * Gives out the data offset in the trie 162 * @return data offset in the trie 163 */ 164 public int getPrivateInitialValue() 165 { 166 return m_initialValue_; 167 } 168 } 169 170 // public methods -------------------------------------------------- 171 172 /** 173 * Java friend implementation 174 * To store the index and data array into the argument. 175 * @param friend java friend UCharacterProperty object to store the array 176 */ 177 public void putIndexData(UCharacterProperty friend) 178 { 179 friend.setIndexData(m_friendAgent_); 180 } 181 182 /** 183 * Gets the value associated with the codepoint. 184 * If no value is associated with the codepoint, a default value will be 185 * returned. 186 * @param ch codepoint 187 * @return offset to data 188 * @draft 2.1 189 */ 190 public final char getCodePointValue(int ch) 191 { 192 int offset; 193 194 // fastpath for U+0000..U+D7FF 195 if(0 <= ch && ch < UTF16.LEAD_SURROGATE_MIN_VALUE) { 196 // copy of getRawOffset() 197 offset = (m_index_[ch >> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_) 198 + (ch & INDEX_STAGE_3_MASK_); 199 return m_data_[offset]; 200 } 201 202 // handle U+D800..U+10FFFF 203 offset = getCodePointOffset(ch); 204 205 // return -1 if there is an error, in this case we return the default 206 // value: m_initialValue_ 207 return (offset >= 0) ? m_data_[offset] : m_initialValue_; 208 } 209 210 /** 211 * Gets the value to the data which this lead surrogate character points 212 * to. 213 * Returned data may contain folding offset information for the next 214 * trailing surrogate character. 215 * This method does not guarantee correct results for trail surrogates. 216 * @param ch lead surrogate character 217 * @return data value 218 * @draft 2.1 219 */ 220 public final char getLeadValue(char ch) 221 { 222 return m_data_[getLeadOffset(ch)]; 223 } 224 225 /** 226 * Get the value associated with a pair of surrogates. 227 * @param lead a lead surrogate 228 * @param trail a trail surrogate 229 * @draft 2.1 230 */ 231 public final char getSurrogateValue(char lead, char trail) 232 { 233 int offset = getSurrogateOffset(lead, trail); 234 if (offset > 0) { 235 return m_data_[offset]; 236 } 237 return m_initialValue_; 238 } 239 240 /** 241 * <p>Get a value from a folding offset (from the value of a lead surrogate) 242 * and a trail surrogate.</p> 243 * <p>If the 244 * @param leadvalue value associated with the lead surrogate which contains 245 * the folding offset 246 * @param trail surrogate 247 * @return trie data value associated with the trail character 248 * @draft 2.1 249 */ 250 public final char getTrailValue(int leadvalue, char trail) 251 { 252 if (m_dataManipulate_ == null) { 253 throw new NullPointerException( 254 "The field DataManipulate in this Trie is null"); 255 } 256 int offset = m_dataManipulate_.getFoldingOffset(leadvalue); 257 if (offset > 0) { 258 return m_data_[getRawOffset(offset, 259 (char)(trail & SURROGATE_MASK_))]; 260 } 261 return m_initialValue_; 262 } 263 264 // protected methods ----------------------------------------------- 265 266 /** 267 * <p>Parses the input stream and stores its trie content into a index and 268 * data array</p> 269 * @param inputStream data input stream containing trie data 270 * @exception IOException thrown when data reading fails 271 */ 272 protected final void unserialize(InputStream inputStream) 273 throws IOException 274 { 275 DataInputStream input = new DataInputStream(inputStream); 276 int indexDataLength = m_dataOffset_ + m_dataLength_; 277 m_index_ = new char[indexDataLength]; 278 for (int i = 0; i < indexDataLength; i ++) { 279 m_index_[i] = input.readChar(); 280 } 281 m_data_ = m_index_; 282 m_initialValue_ = m_data_[m_dataOffset_]; 283 } 292 protected final int getSurrogateOffset(char lead, char trail) 293 { 294 if (m_dataManipulate_ == null) { 295 throw new NullPointerException( 296 "The field DataManipulate in this Trie is null"); 297 } 298 299 // get fold position for the next trail surrogate 300 int offset = m_dataManipulate_.getFoldingOffset(getLeadValue(lead)); 301 302 // get the real data from the folded lead/trail units 303 if (offset > 0) { 304 return getRawOffset(offset, (char)(trail & SURROGATE_MASK_)); 305 } 306 307 // return -1 if there is an error, in this case we return the default 308 // value: m_initialValue_ 309 return -1; 310 } 311 312 /** 313 * Gets the value at the argument index. 314 * For use internally in TrieIterator. 315 * @param index value at index will be retrieved 316 * @return 32 bit value 317 * @see com.ibm.icu.impl.TrieIterator 318 * @draft 2.1 319 */ 320 protected final int getValue(int index) 321 { 322 return m_data_[index]; 323 } 324 325 /** 326 * Gets the default initial value 327 * @return 32 bit value 328 * @draft 2.1 329 */ 330 protected final int getInitialValue() 331 { 332 return m_initialValue_; 333 } 334 335 // private data members -------------------------------------------- 336 337 /** 338 * Default value 339 */ 340 private char m_initialValue_; 341 /** 342 * Array of char data 343 */ 344 private char m_data_[]; 345 /** 346 * Agent for friends 347 */ 348 private FriendAgent m_friendAgent_; 349 } | 1 /* 2 * Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 /* 27 ****************************************************************************** 28 * Copyright (C) 1996-2014, International Business Machines Corporation and 29 * others. All Rights Reserved. 30 ****************************************************************************** 31 */ 32 33 package sun.text.normalizer; 34 35 import java.io.DataInputStream; 36 import java.io.InputStream; 37 import java.io.IOException; 38 39 /** 40 * Trie implementation which stores data in char, 16 bits. 41 * @author synwee 42 * @see com.ibm.icu.impl.Trie 43 * @since release 2.1, Jan 01 2002 44 */ 45 46 // note that i need to handle the block calculations later, since chartrie 47 // in icu4c uses the same index array. 48 public class CharTrie extends Trie 49 { 50 // public constructors --------------------------------------------- 51 52 /** 53 * <p>Creates a new Trie with the settings for the trie data.</p> 54 * <p>Unserialize the 32-bit-aligned input stream and use the data for the 55 * trie.</p> 56 * @param inputStream file input stream to a ICU data file, containing 57 * the trie 58 * @param dataManipulate object which provides methods to parse the char 59 * data 60 * @throws IOException thrown when data reading fails 61 * @draft 2.1 62 */ 63 public CharTrie(InputStream inputStream, 64 DataManipulate dataManipulate) throws IOException 65 { 66 super(inputStream, dataManipulate); 67 68 if (!isCharTrie()) { 69 throw new IllegalArgumentException( 70 "Data given does not belong to a char trie."); 71 } 72 } 73 74 // public methods -------------------------------------------------- 75 76 /** 77 * Gets the value associated with the codepoint. 78 * If no value is associated with the codepoint, a default value will be 79 * returned. 80 * @param ch codepoint 81 * @return offset to data 82 */ 83 public final char getCodePointValue(int ch) 84 { 85 int offset; 86 87 // fastpath for U+0000..U+D7FF 88 if(0 <= ch && ch < UTF16.LEAD_SURROGATE_MIN_VALUE) { 89 // copy of getRawOffset() 90 offset = (m_index_[ch >> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_) 91 + (ch & INDEX_STAGE_3_MASK_); 92 return m_data_[offset]; 93 } 94 95 // handle U+D800..U+10FFFF 96 offset = getCodePointOffset(ch); 97 98 // return -1 if there is an error, in this case we return the default 99 // value: m_initialValue_ 100 return (offset >= 0) ? m_data_[offset] : m_initialValue_; 101 } 102 103 /** 104 * Gets the value to the data which this lead surrogate character points 105 * to. 106 * Returned data may contain folding offset information for the next 107 * trailing surrogate character. 108 * This method does not guarantee correct results for trail surrogates. 109 * @param ch lead surrogate character 110 * @return data value 111 */ 112 public final char getLeadValue(char ch) 113 { 114 return m_data_[getLeadOffset(ch)]; 115 } 116 117 // protected methods ----------------------------------------------- 118 119 /** 120 * <p>Parses the input stream and stores its trie content into a index and 121 * data array</p> 122 * @param inputStream data input stream containing trie data 123 * @exception IOException thrown when data reading fails 124 */ 125 protected final void unserialize(InputStream inputStream) 126 throws IOException 127 { 128 DataInputStream input = new DataInputStream(inputStream); 129 int indexDataLength = m_dataOffset_ + m_dataLength_; 130 m_index_ = new char[indexDataLength]; 131 for (int i = 0; i < indexDataLength; i ++) { 132 m_index_[i] = input.readChar(); 133 } 134 m_data_ = m_index_; 135 m_initialValue_ = m_data_[m_dataOffset_]; 136 } 145 protected final int getSurrogateOffset(char lead, char trail) 146 { 147 if (m_dataManipulate_ == null) { 148 throw new NullPointerException( 149 "The field DataManipulate in this Trie is null"); 150 } 151 152 // get fold position for the next trail surrogate 153 int offset = m_dataManipulate_.getFoldingOffset(getLeadValue(lead)); 154 155 // get the real data from the folded lead/trail units 156 if (offset > 0) { 157 return getRawOffset(offset, (char)(trail & SURROGATE_MASK_)); 158 } 159 160 // return -1 if there is an error, in this case we return the default 161 // value: m_initialValue_ 162 return -1; 163 } 164 165 // private data members -------------------------------------------- 166 167 /** 168 * Default value 169 */ 170 private char m_initialValue_; 171 /** 172 * Array of char data 173 */ 174 private char m_data_[]; 175 } |