1 /*
   2  * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
   3  */
   4 /*
   5  * Licensed to the Apache Software Foundation (ASF) under one or more
   6  * contributor license agreements.  See the NOTICE file distributed with
   7  * this work for additional information regarding copyright ownership.
   8  * The ASF licenses this file to You under the Apache License, Version 2.0
   9  * (the "License"); you may not use this file except in compliance with
  10  * the License.  You may obtain a copy of the License at
  11  *
  12  *      http://www.apache.org/licenses/LICENSE-2.0
  13  *
  14  * Unless required by applicable law or agreed to in writing, software
  15  * distributed under the License is distributed on an "AS IS" BASIS,
  16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  17  * See the License for the specific language governing permissions and
  18  * limitations under the License.
  19  */
  20 
  21 package com.sun.org.apache.xml.internal.dtm.ref;
  22 
  23 import com.sun.org.apache.xml.internal.utils.IntVector;
  24 import java.util.ArrayList;
  25 import java.util.List;
  26 
  27 /** <p>DTMStringPool is an "interning" mechanism for strings. It will
  28  * create a stable 1:1 mapping between a set of string values and a set of
  29  * integer index values, so the integers can be used to reliably and
  30  * uniquely identify (and when necessary retrieve) the strings.</p>
  31  *
  32  * <p>Design Priorities:
  33  * <ul>
  34  * <li>String-to-index lookup speed is critical.</li>
  35  * <li>Index-to-String lookup speed is slightly less so.</li>
  36  * <li>Threadsafety is not guaranteed at this level.
  37  * Enforce that in the application if needed.</li>
  38  * <li>Storage efficiency is an issue but not a huge one.
  39  * It is expected that string pools won't exceed about 2000 entries.</li>
  40  * </ul>
  41  * </p>
  42  *
  43  * <p>Implementation detail: A standard Hashtable is relatively
  44  * inefficient when looking up primitive int values, especially when
  45  * we're already maintaining an int-to-string vector.  So I'm
  46  * maintaining a simple hash chain within this class.</p>
  47  *
  48  * <p>NOTE: There is nothing in the code that has a real dependency upon
  49  * String. It would work with any object type that implements reliable
  50  * .hashCode() and .equals() operations. The API enforces Strings because
  51  * it's safer that way, but this could trivially be turned into a general
  52  * ObjectPool if one was needed.</p>
  53  *
  54  * <p>Status: Passed basic test in main().</p>
  55  *
  56  * @LastModified: Oct 2017
  57  */
  58 public class DTMStringPool
  59 {
  60   List<String> m_intToString;
  61   static final int HASHPRIME=101;
  62   int[] m_hashStart=new int[HASHPRIME];
  63   IntVector m_hashChain;
  64   public static final int NULL=-1;
  65 
  66   /**
  67    * Create a DTMStringPool using the given chain size
  68    *
  69    * @param chainSize The size of the hash chain vector
  70    */
  71   public DTMStringPool(int chainSize)
  72     {
  73       m_intToString = new ArrayList<>();
  74       m_hashChain= new IntVector(chainSize);
  75       removeAllElements();
  76 
  77       // -sb Add this to force empty strings to be index 0.
  78       stringToIndex("");
  79     }
  80 
  81   public DTMStringPool()
  82     {
  83       this(512);
  84     }
  85 
  86   public void removeAllElements()
  87     {
  88       m_intToString.clear();
  89       for(int i=0;i<HASHPRIME;++i)
  90         m_hashStart[i]=NULL;
  91       m_hashChain.removeAllElements();
  92     }
  93 
  94   /** @return string whose value is uniquely identified by this integer index.
  95    * @throws java.lang.IndexOutOfBoundsException
  96    *  if index doesn't map to a string.
  97    * */
  98   public String indexToString(int i)
  99     throws java.lang.IndexOutOfBoundsException
 100     {
 101       if(i==NULL) return null;
 102       return m_intToString.get(i);
 103     }
 104 
 105   /** @return integer index uniquely identifying the value of this string. */
 106   public int stringToIndex(String s)
 107     {
 108       if(s==null) return NULL;
 109 
 110       int hashslot=s.hashCode()%HASHPRIME;
 111       if(hashslot<0) hashslot=-hashslot;
 112 
 113       // Is it one we already know?
 114       int hashlast=m_hashStart[hashslot];
 115       int hashcandidate=hashlast;
 116       while(hashcandidate!=NULL)
 117         {
 118           if(m_intToString.get(hashcandidate).equals(s))
 119             return hashcandidate;
 120 
 121           hashlast=hashcandidate;
 122           hashcandidate=m_hashChain.elementAt(hashcandidate);
 123         }
 124 
 125       // New value. Add to tables.
 126       int newIndex=m_intToString.size();
 127       m_intToString.add(s);
 128 
 129       m_hashChain.addElement(NULL);     // Initialize to no-following-same-hash
 130       if(hashlast==NULL)  // First for this hash
 131         m_hashStart[hashslot]=newIndex;
 132       else // Link from previous with same hash
 133         m_hashChain.setElementAt(newIndex,hashlast);
 134 
 135       return newIndex;
 136     }
 137 
 138   /** Command-line unit test driver. This test relies on the fact that
 139    * this version of the pool assigns indices consecutively, starting
 140    * from zero, as new unique strings are encountered.
 141    */
 142   public static void _main(String[] args)
 143   {
 144     String[] word={
 145       "Zero","One","Two","Three","Four","Five",
 146       "Six","Seven","Eight","Nine","Ten",
 147       "Eleven","Twelve","Thirteen","Fourteen","Fifteen",
 148       "Sixteen","Seventeen","Eighteen","Nineteen","Twenty",
 149       "Twenty-One","Twenty-Two","Twenty-Three","Twenty-Four",
 150       "Twenty-Five","Twenty-Six","Twenty-Seven","Twenty-Eight",
 151       "Twenty-Nine","Thirty","Thirty-One","Thirty-Two",
 152       "Thirty-Three","Thirty-Four","Thirty-Five","Thirty-Six",
 153       "Thirty-Seven","Thirty-Eight","Thirty-Nine"};
 154 
 155     DTMStringPool pool=new DTMStringPool();
 156 
 157     System.out.println("If no complaints are printed below, we passed initial test.");
 158 
 159     for(int pass=0;pass<=1;++pass)
 160       {
 161         int i;
 162 
 163         for(i=0;i<word.length;++i)
 164           {
 165             int j=pool.stringToIndex(word[i]);
 166             if(j!=i)
 167               System.out.println("\tMismatch populating pool: assigned "+
 168                                  j+" for create "+i);
 169           }
 170 
 171         for(i=0;i<word.length;++i)
 172           {
 173             int j=pool.stringToIndex(word[i]);
 174             if(j!=i)
 175               System.out.println("\tMismatch in stringToIndex: returned "+
 176                                  j+" for lookup "+i);
 177           }
 178 
 179         for(i=0;i<word.length;++i)
 180           {
 181             String w=pool.indexToString(i);
 182             if(!word[i].equals(w))
 183               System.out.println("\tMismatch in indexToString: returned"+
 184                                  w+" for lookup "+i);
 185           }
 186 
 187         pool.removeAllElements();
 188 
 189         System.out.println("\nPass "+pass+" complete\n");
 190       } // end pass loop
 191   }
 192 }