1 /*
   2  * Copyright (c) 1996, 2000, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 /*
  27  * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
  28  * (C) Copyright IBM Corp. 1996, 1997 - All Rights Reserved
  29  *
  30  *   The original version of this source code and documentation is copyrighted
  31  * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
  32  * materials are provided under terms of a License Agreement between Taligent
  33  * and Sun. This technology is protected by multiple US and International
  34  * patents. This notice and attribution to Taligent may not be removed.
  35  *   Taligent is a registered trademark of Taligent, Inc.
  36  *
  37  */
  38 
  39 package java.text;
  40 
  41 import java.lang.Character;
  42 
  43 /**
  44  * Utility class for normalizing and merging patterns for collation.
  45  * This is to be used with MergeCollation for adding patterns to an
  46  * existing rule table.
  47  * @see        MergeCollation
  48  * @author     Mark Davis, Helena Shih
  49  */
  50 
  51 class PatternEntry {
  52     /**
  53      * Gets the current extension, quoted
  54      */
  55     public void appendQuotedExtension(StringBuffer toAddTo) {
  56         appendQuoted(extension,toAddTo);
  57     }
  58 
  59     /**
  60      * Gets the current chars, quoted
  61      */
  62     public void appendQuotedChars(StringBuffer toAddTo) {
  63         appendQuoted(chars,toAddTo);
  64     }
  65 
  66     /**
  67      * WARNING this is used for searching in a Vector.
  68      * Because Vector.indexOf doesn't take a comparator,
  69      * this method is ill-defined and ignores strength.
  70      */
  71     public boolean equals(Object obj) {
  72         if (obj == null) return false;
  73         PatternEntry other = (PatternEntry) obj;
  74         boolean result = chars.equals(other.chars);
  75         return result;
  76     }
  77 
  78     public int hashCode() {
  79         return chars.hashCode();
  80     }
  81 
  82     /**
  83      * For debugging.
  84      */
  85     public String toString() {
  86         StringBuffer result = new StringBuffer();
  87         addToBuffer(result, true, false, null);
  88         return result.toString();
  89     }
  90 
  91     /**
  92      * Gets the strength of the entry.
  93      */
  94     final int getStrength() {
  95         return strength;
  96     }
  97 
  98     /**
  99      * Gets the expanding characters of the entry.
 100      */
 101     final String getExtension() {
 102         return extension;
 103     }
 104 
 105     /**
 106      * Gets the core characters of the entry.
 107      */
 108     final String getChars() {
 109         return chars;
 110     }
 111 
 112     // ===== privates =====
 113 
 114     void addToBuffer(StringBuffer toAddTo,
 115                      boolean showExtension,
 116                      boolean showWhiteSpace,
 117                      PatternEntry lastEntry)
 118     {
 119         if (showWhiteSpace && toAddTo.length() > 0)
 120             if (strength == Collator.PRIMARY || lastEntry != null)
 121                 toAddTo.append('\n');
 122             else
 123                 toAddTo.append(' ');
 124         if (lastEntry != null) {
 125             toAddTo.append('&');
 126             if (showWhiteSpace)
 127                 toAddTo.append(' ');
 128             lastEntry.appendQuotedChars(toAddTo);
 129             appendQuotedExtension(toAddTo);
 130             if (showWhiteSpace)
 131                 toAddTo.append(' ');
 132         }
 133         switch (strength) {
 134         case Collator.IDENTICAL: toAddTo.append('='); break;
 135         case Collator.TERTIARY:  toAddTo.append(','); break;
 136         case Collator.SECONDARY: toAddTo.append(';'); break;
 137         case Collator.PRIMARY:   toAddTo.append('<'); break;
 138         case RESET: toAddTo.append('&'); break;
 139         case UNSET: toAddTo.append('?'); break;
 140         }
 141         if (showWhiteSpace)
 142             toAddTo.append(' ');
 143         appendQuoted(chars,toAddTo);
 144         if (showExtension && !extension.isEmpty()) {
 145             toAddTo.append('/');
 146             appendQuoted(extension,toAddTo);
 147         }
 148     }
 149 
 150     static void appendQuoted(String chars, StringBuffer toAddTo) {
 151         boolean inQuote = false;
 152         char ch = chars.charAt(0);
 153         if (Character.isSpaceChar(ch)) {
 154             inQuote = true;
 155             toAddTo.append('\'');
 156         } else {
 157           if (PatternEntry.isSpecialChar(ch)) {
 158                 inQuote = true;
 159                 toAddTo.append('\'');
 160             } else {
 161                 switch (ch) {
 162                     case 0x0010: case '\f': case '\r':
 163                     case '\t': case '\n':  case '@':
 164                     inQuote = true;
 165                     toAddTo.append('\'');
 166                     break;
 167                 case '\'':
 168                     inQuote = true;
 169                     toAddTo.append('\'');
 170                     break;
 171                 default:
 172                     if (inQuote) {
 173                         inQuote = false; toAddTo.append('\'');
 174                     }
 175                     break;
 176                 }
 177            }
 178         }
 179         toAddTo.append(chars);
 180         if (inQuote)
 181             toAddTo.append('\'');
 182     }
 183 
 184     //========================================================================
 185     // Parsing a pattern into a list of PatternEntries....
 186     //========================================================================
 187 
 188     PatternEntry(int strength,
 189                  StringBuffer chars,
 190                  StringBuffer extension)
 191     {
 192         this.strength = strength;
 193         this.chars = chars.toString();
 194         this.extension = (extension.length() > 0) ? extension.toString()
 195                                                   : "";
 196     }
 197 
 198     static class Parser {
 199         private String pattern;
 200         private int i;
 201 
 202         public Parser(String pattern) {
 203             this.pattern = pattern;
 204             this.i = 0;
 205         }
 206 
 207         public PatternEntry next() throws ParseException {
 208             int newStrength = UNSET;
 209 
 210             newChars.setLength(0);
 211             newExtension.setLength(0);
 212 
 213             boolean inChars = true;
 214             boolean inQuote = false;
 215         mainLoop:
 216             while (i < pattern.length()) {
 217                 char ch = pattern.charAt(i);
 218                 if (inQuote) {
 219                     if (ch == '\'') {
 220                         inQuote = false;
 221                     } else {
 222                         if (newChars.length() == 0) newChars.append(ch);
 223                         else if (inChars) newChars.append(ch);
 224                         else newExtension.append(ch);
 225                     }
 226                 } else switch (ch) {
 227                 case '=': if (newStrength != UNSET) break mainLoop;
 228                     newStrength = Collator.IDENTICAL; break;
 229                 case ',': if (newStrength != UNSET) break mainLoop;
 230                     newStrength = Collator.TERTIARY; break;
 231                 case ';': if (newStrength != UNSET) break mainLoop;
 232                     newStrength = Collator.SECONDARY; break;
 233                 case '<': if (newStrength != UNSET) break mainLoop;
 234                     newStrength = Collator.PRIMARY; break;
 235                 case '&': if (newStrength != UNSET) break mainLoop;
 236                     newStrength = RESET; break;
 237                 case '\t':
 238                 case '\n':
 239                 case '\f':
 240                 case '\r':
 241                 case ' ': break; // skip whitespace TODO use Character
 242                 case '/': inChars = false; break;
 243                 case '\'':
 244                     inQuote = true;
 245                     ch = pattern.charAt(++i);
 246                     if (newChars.length() == 0) newChars.append(ch);
 247                     else if (inChars) newChars.append(ch);
 248                     else newExtension.append(ch);
 249                     break;
 250                 default:
 251                     if (newStrength == UNSET) {
 252                         throw new ParseException
 253                             ("missing char (=,;<&) : " +
 254                              pattern.substring(i,
 255                                 (i+10 < pattern.length()) ?
 256                                  i+10 : pattern.length()),
 257                              i);
 258                     }
 259                     if (PatternEntry.isSpecialChar(ch) && (inQuote == false))
 260                         throw new ParseException
 261                             ("Unquoted punctuation character : " + Integer.toString(ch, 16), i);
 262                     if (inChars) {
 263                         newChars.append(ch);
 264                     } else {
 265                         newExtension.append(ch);
 266                     }
 267                     break;
 268                 }
 269                 i++;
 270             }
 271             if (newStrength == UNSET)
 272                 return null;
 273             if (newChars.length() == 0) {
 274                 throw new ParseException
 275                     ("missing chars (=,;<&): " +
 276                       pattern.substring(i,
 277                           (i+10 < pattern.length()) ?
 278                            i+10 : pattern.length()),
 279                      i);
 280             }
 281 
 282             return new PatternEntry(newStrength, newChars, newExtension);
 283         }
 284 
 285         // We re-use these objects in order to improve performance
 286         private StringBuffer newChars = new StringBuffer();
 287         private StringBuffer newExtension = new StringBuffer();
 288 
 289     }
 290 
 291     static boolean isSpecialChar(char ch) {
 292         return ((ch == '\u0020') ||
 293                 ((ch <= '\u002F') && (ch >= '\u0022')) ||
 294                 ((ch <= '\u003F') && (ch >= '\u003A')) ||
 295                 ((ch <= '\u0060') && (ch >= '\u005B')) ||
 296                 ((ch <= '\u007E') && (ch >= '\u007B')));
 297     }
 298 
 299 
 300     static final int RESET = -2;
 301     static final int UNSET = -1;
 302 
 303     int strength = UNSET;
 304     String chars = "";
 305     String extension = "";
 306 }