1 /* 2 * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. 3 */ 4 5 /* 6 * Copyright 2005 The Apache Software Foundation. 7 * 8 * Licensed under the Apache License, Version 2.0 (the "License"); 9 * you may not use this file except in compliance with the License. 10 * You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21 package com.sun.org.apache.xerces.internal.util; 22 23 /** 24 * This class is a symbol table implementation that guarantees that 25 * strings used as identifiers are unique references. Multiple calls 26 * to <code>addSymbol</code> will always return the same string 27 * reference. 28 * <p> 29 * The symbol table performs the same task as <code>String.intern()</code> 30 * with the following differences: 31 * <ul> 32 * <li> 33 * A new string object does not need to be created in order to 34 * retrieve a unique reference. Symbols can be added by using 35 * a series of characters in a character array. 36 * </li> 37 * <li> 38 * Users of the symbol table can provide their own symbol hashing 39 * implementation. For example, a simple string hashing algorithm 40 * may fail to produce a balanced set of hashcodes for symbols 41 * that are <em>mostly</em> unique. Strings with similar leading 42 * characters are especially prone to this poor hashing behavior. 43 * </li> 44 * </ul> 45 * 46 * @see SymbolHash 47 * 48 * @author Andy Clark 49 * 50 */ 51 public class SymbolTable { 52 53 // 54 // Constants 55 // 56 57 /** Default table size. */ 58 protected static final int TABLE_SIZE = 173; 59 60 61 /** Buckets. */ 62 protected Entry[] fBuckets = null; 63 64 // actual table size 65 protected int fTableSize; 66 67 // 68 // Constructors 69 // 70 71 /** Constructs a symbol table with a default number of buckets. */ 72 public SymbolTable() { 73 this(TABLE_SIZE); 74 } 75 76 /** Constructs a symbol table with a specified number of buckets. */ 77 public SymbolTable(int tableSize) { 78 fTableSize = tableSize; 79 fBuckets = new Entry[fTableSize]; 80 } 81 82 // 83 // Public methods 84 // 85 86 /** 87 * Adds the specified symbol to the symbol table and returns a 88 * reference to the unique symbol. If the symbol already exists, 89 * the previous symbol reference is returned instead, in order 90 * guarantee that symbol references remain unique. 91 * 92 * @param symbol The new symbol. 93 */ 94 public String addSymbol(String symbol) { 95 96 // search for identical symbol 97 final int hash = hash(symbol); 98 final int bucket = hash % fTableSize; 99 final int length = symbol.length(); 100 OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) { 101 if (length == entry.characters.length && hash == entry.hashCode) { 102 if(symbol.regionMatches(0,entry.symbol,0,length)){ 103 return entry.symbol; 104 } 105 else{ 106 continue OUTER; 107 } 108 /** 109 for (int i = 0; i < length; i++) { 110 if (symbol.charAt(i) != entry.characters[i]) { 111 continue OUTER; 112 } 113 } 114 symbolAsArray = entry.characters; 115 return entry.symbol; 116 */ 117 } 118 } 119 120 // create new entry 121 Entry entry = new Entry(symbol, fBuckets[bucket]); 122 entry.hashCode = hash; 123 fBuckets[bucket] = entry; 124 return entry.symbol; 125 126 } // addSymbol(String):String 127 128 /** 129 * Adds the specified symbol to the symbol table and returns a 130 * reference to the unique symbol. If the symbol already exists, 131 * the previous symbol reference is returned instead, in order 132 * guarantee that symbol references remain unique. 133 * 134 * @param buffer The buffer containing the new symbol. 135 * @param offset The offset into the buffer of the new symbol. 136 * @param length The length of the new symbol in the buffer. 137 */ 138 public String addSymbol(char[] buffer, int offset, int length) { 139 // search for identical symbol 140 int hash = hash(buffer, offset, length); 141 int bucket = hash % fTableSize; 142 OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) { 143 if (length == entry.characters.length && hash ==entry.hashCode) { 144 for (int i = 0; i < length; i++) { 145 if (buffer[offset + i] != entry.characters[i]) { 146 continue OUTER; 147 } 148 } 149 return entry.symbol; 150 } 151 } 152 153 // add new entry 154 Entry entry = new Entry(buffer, offset, length, fBuckets[bucket]); 155 fBuckets[bucket] = entry; 156 entry.hashCode = hash; 157 return entry.symbol; 158 159 } // addSymbol(char[],int,int):String 160 161 /** 162 * Returns a hashcode value for the specified symbol. The value 163 * returned by this method must be identical to the value returned 164 * by the <code>hash(char[],int,int)</code> method when called 165 * with the character array that comprises the symbol string. 166 * 167 * @param symbol The symbol to hash. 168 */ 169 public int hash(String symbol) { 170 171 int code = 0; 172 int length = symbol.length(); 173 for (int i = 0; i < length; i++) { 174 code = code * 37 + symbol.charAt(i); 175 } 176 return code & 0x7FFFFFFF; 177 178 } // hash(String):int 179 180 /** 181 * Returns a hashcode value for the specified symbol information. 182 * The value returned by this method must be identical to the value 183 * returned by the <code>hash(String)</code> method when called 184 * with the string object created from the symbol information. 185 * 186 * @param buffer The character buffer containing the symbol. 187 * @param offset The offset into the character buffer of the start 188 * of the symbol. 189 * @param length The length of the symbol. 190 */ 191 public int hash(char[] buffer, int offset, int length) { 192 193 int code = 0; 194 for (int i = 0; i < length; i++) { 195 code = code * 37 + buffer[offset + i]; 196 } 197 return code & 0x7FFFFFFF; 198 199 } // hash(char[],int,int):int 200 201 /** 202 * Returns true if the symbol table already contains the specified 203 * symbol. 204 * 205 * @param symbol The symbol to look for. 206 */ 207 public boolean containsSymbol(String symbol) { 208 209 // search for identical symbol 210 int hash = hash(symbol); 211 int bucket = hash % fTableSize; 212 int length = symbol.length(); 213 OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) { 214 if (length == entry.characters.length && hash == entry.hashCode) { 215 if(symbol.regionMatches(0,entry.symbol,0,length)){ 216 return true; 217 } 218 else { 219 continue OUTER; 220 } 221 /** 222 for (int i = 0; i < length; i++) { 223 if (symbol.charAt(i) != entry.characters[i]) { 224 continue OUTER; 225 } 226 } 227 return true; 228 */ 229 } 230 } 231 232 return false; 233 234 } // containsSymbol(String):boolean 235 236 /** 237 * Returns true if the symbol table already contains the specified 238 * symbol. 239 * 240 * @param buffer The buffer containing the symbol to look for. 241 * @param offset The offset into the buffer. 242 * @param length The length of the symbol in the buffer. 243 */ 244 public boolean containsSymbol(char[] buffer, int offset, int length) { 245 246 // search for identical symbol 247 int hash = hash(buffer, offset, length) ; 248 int bucket = hash % fTableSize; 249 OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) { 250 if (length == entry.characters.length && hash == entry.hashCode) { 251 for (int i = 0; i < length; i++) { 252 if (buffer[offset + i] != entry.characters[i]) { 253 continue OUTER; 254 } 255 } 256 return true; 257 } 258 } 259 260 return false; 261 262 } // containsSymbol(char[],int,int):boolean 263 264 265 // 266 // Classes 267 // 268 269 /** 270 * This class is a symbol table entry. Each entry acts as a node 271 * in a linked list. 272 */ 273 protected static final class Entry { 274 275 // 276 // Data 277 // 278 279 /** Symbol. */ 280 public String symbol; 281 int hashCode = 0; 282 283 /** 284 * Symbol characters. This information is duplicated here for 285 * comparison performance. 286 */ 287 public char[] characters; 288 289 /** The next entry. */ 290 public Entry next; 291 292 // 293 // Constructors 294 // 295 296 /** 297 * Constructs a new entry from the specified symbol and next entry 298 * reference. 299 */ 300 public Entry(String symbol, Entry next) { 301 this.symbol = symbol.intern(); 302 characters = new char[symbol.length()]; 303 symbol.getChars(0, characters.length, characters, 0); 304 this.next = next; 305 } 306 307 /** 308 * Constructs a new entry from the specified symbol information and 309 * next entry reference. 310 */ 311 public Entry(char[] ch, int offset, int length, Entry next) { 312 characters = new char[length]; 313 System.arraycopy(ch, offset, characters, 0, length); 314 symbol = new String(characters).intern(); 315 this.next = next; 316 } 317 318 } // class Entry 319 320 } // class SymbolTable