1 /*
   2  * Copyright (c) 1999, 2003, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 /*
  27  * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
  28  * (C) Copyright IBM Corp. 1996-1998 - All Rights Reserved
  29  *
  30  *   The original version of this source code and documentation is copyrighted
  31  * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
  32  * materials are provided under terms of a License Agreement between Taligent
  33  * and Sun. This technology is protected by multiple US and International
  34  * patents. This notice and attribution to Taligent may not be removed.
  35  *   Taligent is a registered trademark of Taligent, Inc.
  36  *
  37  */
  38 
  39 package java.text;
  40 
  41 import java.util.Vector;
  42 import sun.text.UCompactIntArray;
  43 import sun.text.IntHashtable;
  44 
  45 /**
  46  * This class contains the static state of a RuleBasedCollator: The various
  47  * tables that are used by the collation routines.  Several RuleBasedCollators
  48  * can share a single RBCollationTables object, easing memory requirements and
  49  * improving performance.
  50  */
  51 final class RBCollationTables {
  52     //===========================================================================================
  53     //  The following diagram shows the data structure of the RBCollationTables object.
  54     //  Suppose we have the rule, where 'o-umlaut' is the unicode char 0x00F6.
  55     //  "a, A < b, B < c, C, ch, cH, Ch, CH < d, D ... < o, O; 'o-umlaut'/E, 'O-umlaut'/E ...".
  56     //  What the rule says is, sorts 'ch'ligatures and 'c' only with tertiary difference and
  57     //  sorts 'o-umlaut' as if it's always expanded with 'e'.
  58     //
  59     // mapping table                     contracting list           expanding list
  60     // (contains all unicode char
  61     //  entries)                   ___    ____________       _________________________
  62     //  ________                +>|_*_|->|'c' |v('c') |  +>|v('o')|v('umlaut')|v('e')|
  63     // |_\u0001_|-> v('\u0001') | |_:_|  |------------|  | |-------------------------|
  64     // |_\u0002_|-> v('\u0002') | |_:_|  |'ch'|v('ch')|  | |             :           |
  65     // |____:___|               | |_:_|  |------------|  | |-------------------------|
  66     // |____:___|               |        |'cH'|v('cH')|  | |             :           |
  67     // |__'a'___|-> v('a')      |        |------------|  | |-------------------------|
  68     // |__'b'___|-> v('b')      |        |'Ch'|v('Ch')|  | |             :           |
  69     // |____:___|               |        |------------|  | |-------------------------|
  70     // |____:___|               |        |'CH'|v('CH')|  | |             :           |
  71     // |___'c'__|----------------         ------------   | |-------------------------|
  72     // |____:___|                                        | |             :           |
  73     // |o-umlaut|----------------------------------------  |_________________________|
  74     // |____:___|
  75     //
  76     // Noted by Helena Shih on 6/23/97
  77     //============================================================================================
  78 
  79     public RBCollationTables(String rules, int decmp) throws ParseException {
  80         this.rules = rules;
  81 
  82         RBTableBuilder builder = new RBTableBuilder(new BuildAPI());
  83         builder.build(rules, decmp); // this object is filled in through
  84                                             // the BuildAPI object
  85     }
  86 
  87     final class BuildAPI {
  88         /**
  89          * Private constructor.  Prevents anyone else besides RBTableBuilder
  90          * from gaining direct access to the internals of this class.
  91          */
  92         private BuildAPI() {
  93         }
  94 
  95         /**
  96          * This function is used by RBTableBuilder to fill in all the members of this
  97          * object.  (Effectively, the builder class functions as a "friend" of this
  98          * class, but to avoid changing too much of the logic, it carries around "shadow"
  99          * copies of all these variables until the end of the build process and then
 100          * copies them en masse into the actual tables object once all the construction
 101          * logic is complete.  This function does that "copying en masse".
 102          * @param f2ary The value for frenchSec (the French-secondary flag)
 103          * @param swap The value for SE Asian swapping rule
 104          * @param map The collator's character-mapping table (the value for mapping)
 105          * @param cTbl The collator's contracting-character table (the value for contractTable)
 106          * @param eTbl The collator's expanding-character table (the value for expandTable)
 107          * @param cFlgs The hash table of characters that participate in contracting-
 108          *              character sequences (the value for contractFlags)
 109          * @param mso The value for maxSecOrder
 110          * @param mto The value for maxTerOrder
 111          */
 112         void fillInTables(boolean f2ary,
 113                           boolean swap,
 114                           UCompactIntArray map,
 115                           Vector<Vector<EntryPair>> cTbl,
 116                           Vector<int[]> eTbl,
 117                           IntHashtable cFlgs,
 118                           short mso,
 119                           short mto) {
 120             frenchSec = f2ary;
 121             seAsianSwapping = swap;
 122             mapping = map;
 123             contractTable = cTbl;
 124             expandTable = eTbl;
 125             contractFlags = cFlgs;
 126             maxSecOrder = mso;
 127             maxTerOrder = mto;
 128         }
 129     }
 130 
 131     /**
 132      * Gets the table-based rules for the collation object.
 133      * @return returns the collation rules that the table collation object
 134      * was created from.
 135      */
 136     public String getRules()
 137     {
 138         return rules;
 139     }
 140 
 141     public boolean isFrenchSec() {
 142         return frenchSec;
 143     }
 144 
 145     public boolean isSEAsianSwapping() {
 146         return seAsianSwapping;
 147     }
 148 
 149     // ==============================================================
 150     // internal (for use by CollationElementIterator)
 151     // ==============================================================
 152 
 153     /**
 154      *  Get the entry of hash table of the contracting string in the collation
 155      *  table.
 156      *  @param ch the starting character of the contracting string
 157      */
 158     Vector<EntryPair> getContractValues(int ch)
 159     {
 160         int index = mapping.elementAt(ch);
 161         return getContractValuesImpl(index - CONTRACTCHARINDEX);
 162     }
 163 
 164     //get contract values from contractTable by index
 165     private Vector<EntryPair> getContractValuesImpl(int index)
 166     {
 167         if (index >= 0)
 168         {
 169             return contractTable.elementAt(index);
 170         }
 171         else // not found
 172         {
 173             return null;
 174         }
 175     }
 176 
 177     /**
 178      * Returns true if this character appears anywhere in a contracting
 179      * character sequence.  (Used by CollationElementIterator.setOffset().)
 180      */
 181     boolean usedInContractSeq(int c) {
 182         return contractFlags.get(c) == 1;
 183     }
 184 
 185     /**
 186       * Return the maximum length of any expansion sequences that end
 187       * with the specified comparison order.
 188       *
 189       * @param order a collation order returned by previous or next.
 190       * @return the maximum length of any expansion seuences ending
 191       *         with the specified order.
 192       *
 193       * @see CollationElementIterator#getMaxExpansion
 194       */
 195     int getMaxExpansion(int order) {
 196         int result = 1;
 197 
 198         if (expandTable != null) {
 199             // Right now this does a linear search through the entire
 200             // expansion table.  If a collator had a large number of expansions,
 201             // this could cause a performance problem, but in practise that
 202             // rarely happens
 203             for (int i = 0; i < expandTable.size(); i++) {
 204                 int[] valueList = expandTable.elementAt(i);
 205                 int length = valueList.length;
 206 
 207                 if (length > result && valueList[length-1] == order) {
 208                     result = length;
 209                 }
 210             }
 211         }
 212 
 213         return result;
 214     }
 215 
 216     /**
 217      * Get the entry of hash table of the expanding string in the collation
 218      * table.
 219      * @param idx the index of the expanding string value list
 220      */
 221     final int[] getExpandValueList(int idx) {
 222         return expandTable.elementAt(idx - EXPANDCHARINDEX);
 223     }
 224 
 225     /**
 226      * Get the comarison order of a character from the collation table.
 227      * @return the comparison order of a character.
 228      */
 229     int getUnicodeOrder(int ch) {
 230         return mapping.elementAt(ch);
 231     }
 232 
 233     short getMaxSecOrder() {
 234         return maxSecOrder;
 235     }
 236 
 237     short getMaxTerOrder() {
 238         return maxTerOrder;
 239     }
 240 
 241     /**
 242      * Reverse a string.
 243      */
 244     //shemran/Note: this is used for secondary order value reverse, no
 245     //              need to consider supplementary pair.
 246     static void reverse (StringBuffer result, int from, int to)
 247     {
 248         int i = from;
 249         char swap;
 250 
 251         int j = to - 1;
 252         while (i < j) {
 253             swap =  result.charAt(i);
 254             result.setCharAt(i, result.charAt(j));
 255             result.setCharAt(j, swap);
 256             i++;
 257             j--;
 258         }
 259     }
 260 
 261     final static int getEntry(Vector<EntryPair> list, String name, boolean fwd) {
 262         for (int i = 0; i < list.size(); i++) {
 263             EntryPair pair = list.elementAt(i);
 264             if (pair.fwd == fwd && pair.entryName.equals(name)) {
 265                 return i;
 266             }
 267         }
 268         return UNMAPPED;
 269     }
 270 
 271     // ==============================================================
 272     // constants
 273     // ==============================================================
 274     //sherman/Todo: is the value big enough?????
 275     final static int EXPANDCHARINDEX = 0x7E000000; // Expand index follows
 276     final static int CONTRACTCHARINDEX = 0x7F000000;  // contract indexes follow
 277     final static int UNMAPPED = 0xFFFFFFFF;
 278 
 279     final static int PRIMARYORDERMASK = 0xffff0000;
 280     final static int SECONDARYORDERMASK = 0x0000ff00;
 281     final static int TERTIARYORDERMASK = 0x000000ff;
 282     final static int PRIMARYDIFFERENCEONLY = 0xffff0000;
 283     final static int SECONDARYDIFFERENCEONLY = 0xffffff00;
 284     final static int PRIMARYORDERSHIFT = 16;
 285     final static int SECONDARYORDERSHIFT = 8;
 286 
 287     // ==============================================================
 288     // instance variables
 289     // ==============================================================
 290     private String rules = null;
 291     private boolean frenchSec = false;
 292     private boolean seAsianSwapping = false;
 293 
 294     private UCompactIntArray mapping = null;
 295     private Vector<Vector<EntryPair>> contractTable = null;
 296     private Vector<int[]> expandTable = null;
 297     private IntHashtable contractFlags = null;
 298 
 299     private short maxSecOrder = 0;
 300     private short maxTerOrder = 0;
 301 }