1 /*
   2  * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
   3  * @LastModified: Oct 2017
   4  */
   5 /*
   6  * Licensed to the Apache Software Foundation (ASF) under one or more
   7  * contributor license agreements.  See the NOTICE file distributed with
   8  * this work for additional information regarding copyright ownership.
   9  * The ASF licenses this file to You under the Apache License, Version 2.0
  10  * (the "License"); you may not use this file except in compliance with
  11  * the License.  You may obtain a copy of the License at
  12  *
  13  *      http://www.apache.org/licenses/LICENSE-2.0
  14  *
  15  * Unless required by applicable law or agreed to in writing, software
  16  * distributed under the License is distributed on an "AS IS" BASIS,
  17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  18  * See the License for the specific language governing permissions and
  19  * limitations under the License.
  20  */
  21 
  22 package com.sun.org.apache.xml.internal.dtm.ref;
  23 
  24 import com.sun.org.apache.xml.internal.utils.IntVector;
  25 import java.util.ArrayList;
  26 import java.util.List;
  27 
  28 /** <p>DTMStringPool is an "interning" mechanism for strings. It will
  29  * create a stable 1:1 mapping between a set of string values and a set of
  30  * integer index values, so the integers can be used to reliably and
  31  * uniquely identify (and when necessary retrieve) the strings.</p>
  32  *
  33  * <p>Design Priorities:
  34  * <ul>
  35  * <li>String-to-index lookup speed is critical.</li>
  36  * <li>Index-to-String lookup speed is slightly less so.</li>
  37  * <li>Threadsafety is not guaranteed at this level.
  38  * Enforce that in the application if needed.</li>
  39  * <li>Storage efficiency is an issue but not a huge one.
  40  * It is expected that string pools won't exceed about 2000 entries.</li>
  41  * </ul>
  42  * </p>
  43  *
  44  * <p>Implementation detail: A standard Hashtable is relatively
  45  * inefficient when looking up primitive int values, especially when
  46  * we're already maintaining an int-to-string vector.  So I'm
  47  * maintaining a simple hash chain within this class.</p>
  48  *
  49  * <p>NOTE: There is nothing in the code that has a real dependency upon
  50  * String. It would work with any object type that implements reliable
  51  * .hashCode() and .equals() operations. The API enforces Strings because
  52  * it's safer that way, but this could trivially be turned into a general
  53  * ObjectPool if one was needed.</p>
  54  *
  55  * <p>Status: Passed basic test in main().</p>
  56  * */
  57 public class DTMStringPool
  58 {
  59   List<String> m_intToString;
  60   static final int HASHPRIME=101;
  61   int[] m_hashStart=new int[HASHPRIME];
  62   IntVector m_hashChain;
  63   public static final int NULL=-1;
  64 
  65   /**
  66    * Create a DTMStringPool using the given chain size
  67    *
  68    * @param chainSize The size of the hash chain vector
  69    */
  70   public DTMStringPool(int chainSize)
  71     {
  72       m_intToString = new ArrayList<>();
  73       m_hashChain= new IntVector(chainSize);
  74       removeAllElements();
  75 
  76       // -sb Add this to force empty strings to be index 0.
  77       stringToIndex("");
  78     }
  79 
  80   public DTMStringPool()
  81     {
  82       this(512);
  83     }
  84 
  85   public void removeAllElements()
  86     {
  87       m_intToString.clear();
  88       for(int i=0;i<HASHPRIME;++i)
  89         m_hashStart[i]=NULL;
  90       m_hashChain.removeAllElements();
  91     }
  92 
  93   /** @return string whose value is uniquely identified by this integer index.
  94    * @throws java.lang.IndexOutOfBoundsException
  95    *  if index doesn't map to a string.
  96    * */
  97   public String indexToString(int i)
  98     throws java.lang.IndexOutOfBoundsException
  99     {
 100       if(i==NULL) return null;
 101       return m_intToString.get(i);
 102     }
 103 
 104   /** @return integer index uniquely identifying the value of this string. */
 105   public int stringToIndex(String s)
 106     {
 107       if(s==null) return NULL;
 108 
 109       int hashslot=s.hashCode()%HASHPRIME;
 110       if(hashslot<0) hashslot=-hashslot;
 111 
 112       // Is it one we already know?
 113       int hashlast=m_hashStart[hashslot];
 114       int hashcandidate=hashlast;
 115       while(hashcandidate!=NULL)
 116         {
 117           if(m_intToString.get(hashcandidate).equals(s))
 118             return hashcandidate;
 119 
 120           hashlast=hashcandidate;
 121           hashcandidate=m_hashChain.elementAt(hashcandidate);
 122         }
 123 
 124       // New value. Add to tables.
 125       int newIndex=m_intToString.size();
 126       m_intToString.add(s);
 127 
 128       m_hashChain.addElement(NULL);     // Initialize to no-following-same-hash
 129       if(hashlast==NULL)  // First for this hash
 130         m_hashStart[hashslot]=newIndex;
 131       else // Link from previous with same hash
 132         m_hashChain.setElementAt(newIndex,hashlast);
 133 
 134       return newIndex;
 135     }
 136 
 137   /** Command-line unit test driver. This test relies on the fact that
 138    * this version of the pool assigns indices consecutively, starting
 139    * from zero, as new unique strings are encountered.
 140    */
 141   public static void _main(String[] args)
 142   {
 143     String[] word={
 144       "Zero","One","Two","Three","Four","Five",
 145       "Six","Seven","Eight","Nine","Ten",
 146       "Eleven","Twelve","Thirteen","Fourteen","Fifteen",
 147       "Sixteen","Seventeen","Eighteen","Nineteen","Twenty",
 148       "Twenty-One","Twenty-Two","Twenty-Three","Twenty-Four",
 149       "Twenty-Five","Twenty-Six","Twenty-Seven","Twenty-Eight",
 150       "Twenty-Nine","Thirty","Thirty-One","Thirty-Two",
 151       "Thirty-Three","Thirty-Four","Thirty-Five","Thirty-Six",
 152       "Thirty-Seven","Thirty-Eight","Thirty-Nine"};
 153 
 154     DTMStringPool pool=new DTMStringPool();
 155 
 156     System.out.println("If no complaints are printed below, we passed initial test.");
 157 
 158     for(int pass=0;pass<=1;++pass)
 159       {
 160         int i;
 161 
 162         for(i=0;i<word.length;++i)
 163           {
 164             int j=pool.stringToIndex(word[i]);
 165             if(j!=i)
 166               System.out.println("\tMismatch populating pool: assigned "+
 167                                  j+" for create "+i);
 168           }
 169 
 170         for(i=0;i<word.length;++i)
 171           {
 172             int j=pool.stringToIndex(word[i]);
 173             if(j!=i)
 174               System.out.println("\tMismatch in stringToIndex: returned "+
 175                                  j+" for lookup "+i);
 176           }
 177 
 178         for(i=0;i<word.length;++i)
 179           {
 180             String w=pool.indexToString(i);
 181             if(!word[i].equals(w))
 182               System.out.println("\tMismatch in indexToString: returned"+
 183                                  w+" for lookup "+i);
 184           }
 185 
 186         pool.removeAllElements();
 187 
 188         System.out.println("\nPass "+pass+" complete\n");
 189       } // end pass loop
 190   }
 191 }