1 /*
   2  * Copyright (c) 2003, 2005, Oracle and/or its affiliates. All rights reserved.
   3  */
   4 
   5 /*
   6  * Copyright 2005 The Apache Software Foundation.
   7  *
   8  * Licensed under the Apache License, Version 2.0 (the "License");
   9  * you may not use this file except in compliance with the License.
  10  * You may obtain a copy of the License at
  11  *
  12  *      http://www.apache.org/licenses/LICENSE-2.0
  13  *
  14  * Unless required by applicable law or agreed to in writing, software
  15  * distributed under the License is distributed on an "AS IS" BASIS,
  16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  17  * See the License for the specific language governing permissions and
  18  * limitations under the License.
  19  */
  20 
  21 package com.sun.org.apache.xerces.internal.util;
  22 
  23 /**
  24  * This class is a symbol table implementation that guarantees that
  25  * strings used as identifiers are unique references. Multiple calls
  26  * to <code>addSymbol</code> will always return the same string
  27  * reference.
  28  * <p>
  29  * The symbol table performs the same task as <code>String.intern()</code>
  30  * with the following differences:
  31  * <ul>
  32  *  <li>
  33  *   A new string object does not need to be created in order to
  34  *   retrieve a unique reference. Symbols can be added by using
  35  *   a series of characters in a character array.
  36  *  </li>
  37  *  <li>
  38  *   Users of the symbol table can provide their own symbol hashing
  39  *   implementation. For example, a simple string hashing algorithm
  40  *   may fail to produce a balanced set of hashcodes for symbols
  41  *   that are <em>mostly</em> unique. Strings with similar leading
  42  *   characters are especially prone to this poor hashing behavior.
  43  *  </li>
  44  * </ul>
  45  *
  46  * @see SymbolHash
  47  *
  48  * @author Andy Clark
  49  *
  50  */
  51 public class SymbolTable {
  52 
  53     //
  54     // Constants
  55     //
  56 
  57     /** Default table size. */
  58     protected static final int TABLE_SIZE = 173;
  59 
  60 
  61     /** Buckets. */
  62     protected Entry[] fBuckets = null;
  63 
  64     // actual table size
  65     protected int fTableSize;
  66 
  67     //
  68     // Constructors
  69     //
  70 
  71     /** Constructs a symbol table with a default number of buckets. */
  72     public SymbolTable() {
  73         this(TABLE_SIZE);
  74     }
  75 
  76     /** Constructs a symbol table with a specified number of buckets. */
  77     public SymbolTable(int tableSize) {
  78         fTableSize = tableSize;
  79         fBuckets = new Entry[fTableSize];
  80     }
  81 
  82     //
  83     // Public methods
  84     //
  85 
  86     /**
  87      * Adds the specified symbol to the symbol table and returns a
  88      * reference to the unique symbol. If the symbol already exists,
  89      * the previous symbol reference is returned instead, in order
  90      * guarantee that symbol references remain unique.
  91      *
  92      * @param symbol The new symbol.
  93      */
  94     public String addSymbol(String symbol) {
  95 
  96         // search for identical symbol
  97         final int hash = hash(symbol);
  98         final int bucket = hash % fTableSize;
  99         final int length = symbol.length();
 100         OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
 101             if (length == entry.characters.length && hash == entry.hashCode) {
 102                 if(symbol.regionMatches(0,entry.symbol,0,length)){
 103                     return entry.symbol;
 104                 }
 105                 else{
 106                     continue OUTER;
 107                 }
 108                 /**
 109                 for (int i = 0; i < length; i++) {
 110                     if (symbol.charAt(i) != entry.characters[i]) {
 111                         continue OUTER;
 112                     }
 113                 }
 114                 symbolAsArray = entry.characters;
 115                 return entry.symbol;
 116                  */
 117             }
 118         }
 119 
 120         // create new entry
 121         Entry entry = new Entry(symbol, fBuckets[bucket]);
 122         entry.hashCode = hash;
 123         fBuckets[bucket] = entry;
 124         return entry.symbol;
 125 
 126     } // addSymbol(String):String
 127 
 128     /**
 129      * Adds the specified symbol to the symbol table and returns a
 130      * reference to the unique symbol. If the symbol already exists,
 131      * the previous symbol reference is returned instead, in order
 132      * guarantee that symbol references remain unique.
 133      *
 134      * @param buffer The buffer containing the new symbol.
 135      * @param offset The offset into the buffer of the new symbol.
 136      * @param length The length of the new symbol in the buffer.
 137      */
 138     public String addSymbol(char[] buffer, int offset, int length) {
 139         // search for identical symbol
 140         int hash = hash(buffer, offset, length);
 141         int bucket = hash % fTableSize;
 142         OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
 143             if (length == entry.characters.length && hash ==entry.hashCode) {
 144                 for (int i = 0; i < length; i++) {
 145                     if (buffer[offset + i] != entry.characters[i]) {
 146                         continue OUTER;
 147                     }
 148                 }
 149                 return entry.symbol;
 150             }
 151         }
 152 
 153         // add new entry
 154         Entry entry = new Entry(buffer, offset, length, fBuckets[bucket]);
 155         fBuckets[bucket] = entry;
 156         entry.hashCode = hash;
 157         return entry.symbol;
 158 
 159     } // addSymbol(char[],int,int):String
 160 
 161     /**
 162      * Returns a hashcode value for the specified symbol. The value
 163      * returned by this method must be identical to the value returned
 164      * by the <code>hash(char[],int,int)</code> method when called
 165      * with the character array that comprises the symbol string.
 166      *
 167      * @param symbol The symbol to hash.
 168      */
 169     public int hash(String symbol) {
 170 
 171         int code = 0;
 172         int length = symbol.length();
 173         for (int i = 0; i < length; i++) {
 174             code = code * 37 + symbol.charAt(i);
 175         }
 176         return code & 0x7FFFFFFF;
 177 
 178     } // hash(String):int
 179 
 180     /**
 181      * Returns a hashcode value for the specified symbol information.
 182      * The value returned by this method must be identical to the value
 183      * returned by the <code>hash(String)</code> method when called
 184      * with the string object created from the symbol information.
 185      *
 186      * @param buffer The character buffer containing the symbol.
 187      * @param offset The offset into the character buffer of the start
 188      *               of the symbol.
 189      * @param length The length of the symbol.
 190      */
 191     public int hash(char[] buffer, int offset, int length) {
 192 
 193         int code = 0;
 194         for (int i = 0; i < length; i++) {
 195             code = code * 37 + buffer[offset + i];
 196         }
 197         return code & 0x7FFFFFFF;
 198 
 199     } // hash(char[],int,int):int
 200 
 201     /**
 202      * Returns true if the symbol table already contains the specified
 203      * symbol.
 204      *
 205      * @param symbol The symbol to look for.
 206      */
 207     public boolean containsSymbol(String symbol) {
 208 
 209         // search for identical symbol
 210         int hash = hash(symbol);
 211         int bucket = hash % fTableSize;
 212         int length = symbol.length();
 213         OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
 214             if (length == entry.characters.length && hash == entry.hashCode) {
 215                 if(symbol.regionMatches(0,entry.symbol,0,length)){
 216                     return true;
 217                 }
 218                 else {
 219                     continue OUTER;
 220                 }
 221                 /**
 222                 for (int i = 0; i < length; i++) {
 223                     if (symbol.charAt(i) != entry.characters[i]) {
 224                         continue OUTER;
 225                     }
 226                 }
 227                  return true;
 228                  */
 229             }
 230         }
 231 
 232         return false;
 233 
 234     } // containsSymbol(String):boolean
 235 
 236     /**
 237      * Returns true if the symbol table already contains the specified
 238      * symbol.
 239      *
 240      * @param buffer The buffer containing the symbol to look for.
 241      * @param offset The offset into the buffer.
 242      * @param length The length of the symbol in the buffer.
 243      */
 244     public boolean containsSymbol(char[] buffer, int offset, int length) {
 245 
 246         // search for identical symbol
 247         int hash = hash(buffer, offset, length) ;
 248         int bucket = hash % fTableSize;
 249         OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry = entry.next) {
 250             if (length == entry.characters.length && hash == entry.hashCode) {
 251                 for (int i = 0; i < length; i++) {
 252                     if (buffer[offset + i] != entry.characters[i]) {
 253                         continue OUTER;
 254                     }
 255                 }
 256                 return true;
 257             }
 258         }
 259 
 260         return false;
 261 
 262     } // containsSymbol(char[],int,int):boolean
 263 
 264 
 265     //
 266     // Classes
 267     //
 268 
 269     /**
 270      * This class is a symbol table entry. Each entry acts as a node
 271      * in a linked list.
 272      */
 273     protected static final class Entry {
 274 
 275         //
 276         // Data
 277         //
 278 
 279         /** Symbol. */
 280         public String symbol;
 281         int hashCode = 0;
 282 
 283         /**
 284          * Symbol characters. This information is duplicated here for
 285          * comparison performance.
 286          */
 287         public char[] characters;
 288 
 289         /** The next entry. */
 290         public Entry next;
 291 
 292         //
 293         // Constructors
 294         //
 295 
 296         /**
 297          * Constructs a new entry from the specified symbol and next entry
 298          * reference.
 299          */
 300         public Entry(String symbol, Entry next) {
 301             this.symbol = symbol.intern();
 302             characters = new char[symbol.length()];
 303             symbol.getChars(0, characters.length, characters, 0);
 304             this.next = next;
 305         }
 306 
 307         /**
 308          * Constructs a new entry from the specified symbol information and
 309          * next entry reference.
 310          */
 311         public Entry(char[] ch, int offset, int length, Entry next) {
 312             characters = new char[length];
 313             System.arraycopy(ch, offset, characters, 0, length);
 314             symbol = new String(characters).intern();
 315             this.next = next;
 316         }
 317 
 318     } // class Entry
 319 
 320 } // class SymbolTable