1 /* 2 * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved. 3 */ 4 /* 5 * Licensed to the Apache Software Foundation (ASF) under one or more 6 * contributor license agreements. See the NOTICE file distributed with 7 * this work for additional information regarding copyright ownership. 8 * The ASF licenses this file to You under the Apache License, Version 2.0 9 * (the "License"); you may not use this file except in compliance with 10 * the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21 package com.sun.org.apache.xml.internal.dtm.ref; 22 23 import com.sun.org.apache.xml.internal.utils.IntVector; 24 import java.util.ArrayList; 25 import java.util.List; 26 27 /** <p>DTMStringPool is an "interning" mechanism for strings. It will 28 * create a stable 1:1 mapping between a set of string values and a set of 29 * integer index values, so the integers can be used to reliably and 30 * uniquely identify (and when necessary retrieve) the strings.</p> 31 * 32 * <p>Design Priorities: 33 * <ul> 34 * <li>String-to-index lookup speed is critical.</li> 35 * <li>Index-to-String lookup speed is slightly less so.</li> 36 * <li>Threadsafety is not guaranteed at this level. 37 * Enforce that in the application if needed.</li> 38 * <li>Storage efficiency is an issue but not a huge one. 39 * It is expected that string pools won't exceed about 2000 entries.</li> 40 * </ul> 41 * </p> 42 * 43 * <p>Implementation detail: A standard Hashtable is relatively 44 * inefficient when looking up primitive int values, especially when 45 * we're already maintaining an int-to-string vector. So I'm 46 * maintaining a simple hash chain within this class.</p> 47 * 48 * <p>NOTE: There is nothing in the code that has a real dependency upon 49 * String. It would work with any object type that implements reliable 50 * .hashCode() and .equals() operations. The API enforces Strings because 51 * it's safer that way, but this could trivially be turned into a general 52 * ObjectPool if one was needed.</p> 53 * 54 * <p>Status: Passed basic test in main().</p> 55 * 56 * @LastModified: Oct 2017 57 */ 58 public class DTMStringPool 59 { 60 List<String> m_intToString; 61 static final int HASHPRIME=101; 62 int[] m_hashStart=new int[HASHPRIME]; 63 IntVector m_hashChain; 64 public static final int NULL=-1; 65 66 /** 67 * Create a DTMStringPool using the given chain size 68 * 69 * @param chainSize The size of the hash chain vector 70 */ 71 public DTMStringPool(int chainSize) 72 { 73 m_intToString = new ArrayList<>(); 74 m_hashChain= new IntVector(chainSize); 75 removeAllElements(); 76 77 // -sb Add this to force empty strings to be index 0. 78 stringToIndex(""); 79 } 80 81 public DTMStringPool() 82 { 83 this(512); 84 } 85 86 public void removeAllElements() 87 { 88 m_intToString.clear(); 89 for(int i=0;i<HASHPRIME;++i) 90 m_hashStart[i]=NULL; 91 m_hashChain.removeAllElements(); 92 } 93 94 /** @return string whose value is uniquely identified by this integer index. 95 * @throws java.lang.IndexOutOfBoundsException 96 * if index doesn't map to a string. 97 * */ 98 public String indexToString(int i) 99 throws java.lang.IndexOutOfBoundsException 100 { 101 if(i==NULL) return null; 102 return m_intToString.get(i); 103 } 104 105 /** @return integer index uniquely identifying the value of this string. */ 106 public int stringToIndex(String s) 107 { 108 if(s==null) return NULL; 109 110 int hashslot=s.hashCode()%HASHPRIME; 111 if(hashslot<0) hashslot=-hashslot; 112 113 // Is it one we already know? 114 int hashlast=m_hashStart[hashslot]; 115 int hashcandidate=hashlast; 116 while(hashcandidate!=NULL) 117 { 118 if(m_intToString.get(hashcandidate).equals(s)) 119 return hashcandidate; 120 121 hashlast=hashcandidate; 122 hashcandidate=m_hashChain.elementAt(hashcandidate); 123 } 124 125 // New value. Add to tables. 126 int newIndex=m_intToString.size(); 127 m_intToString.add(s); 128 129 m_hashChain.addElement(NULL); // Initialize to no-following-same-hash 130 if(hashlast==NULL) // First for this hash 131 m_hashStart[hashslot]=newIndex; 132 else // Link from previous with same hash 133 m_hashChain.setElementAt(newIndex,hashlast); 134 135 return newIndex; 136 } 137 138 /** Command-line unit test driver. This test relies on the fact that 139 * this version of the pool assigns indices consecutively, starting 140 * from zero, as new unique strings are encountered. 141 */ 142 public static void _main(String[] args) 143 { 144 String[] word={ 145 "Zero","One","Two","Three","Four","Five", 146 "Six","Seven","Eight","Nine","Ten", 147 "Eleven","Twelve","Thirteen","Fourteen","Fifteen", 148 "Sixteen","Seventeen","Eighteen","Nineteen","Twenty", 149 "Twenty-One","Twenty-Two","Twenty-Three","Twenty-Four", 150 "Twenty-Five","Twenty-Six","Twenty-Seven","Twenty-Eight", 151 "Twenty-Nine","Thirty","Thirty-One","Thirty-Two", 152 "Thirty-Three","Thirty-Four","Thirty-Five","Thirty-Six", 153 "Thirty-Seven","Thirty-Eight","Thirty-Nine"}; 154 155 DTMStringPool pool=new DTMStringPool(); 156 157 System.out.println("If no complaints are printed below, we passed initial test."); 158 159 for(int pass=0;pass<=1;++pass) 160 { 161 int i; 162 163 for(i=0;i<word.length;++i) 164 { 165 int j=pool.stringToIndex(word[i]); 166 if(j!=i) 167 System.out.println("\tMismatch populating pool: assigned "+ 168 j+" for create "+i); 169 } 170 171 for(i=0;i<word.length;++i) 172 { 173 int j=pool.stringToIndex(word[i]); 174 if(j!=i) 175 System.out.println("\tMismatch in stringToIndex: returned "+ 176 j+" for lookup "+i); 177 } 178 179 for(i=0;i<word.length;++i) 180 { 181 String w=pool.indexToString(i); 182 if(!word[i].equals(w)) 183 System.out.println("\tMismatch in indexToString: returned"+ 184 w+" for lookup "+i); 185 } 186 187 pool.removeAllElements(); 188 189 System.out.println("\nPass "+pass+" complete\n"); 190 } // end pass loop 191 } 192 }