1 /*
   2  * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package com.sun.xml.internal.bind.api.impl;
  27 
  28 import java.util.ArrayList;
  29 import java.util.Collections;
  30 import java.util.HashSet;
  31 import java.util.List;
  32 import java.util.Locale;
  33 
  34 /**
  35  * Methods that convert strings into various formats.
  36  *
  37  * <p>
  38  * What JAX-RPC name binding tells us is that even such basic method
  39  * like "isLetter" can be different depending on the situation.
  40  *
  41  * For this reason, a whole lot of methods are made non-static,
  42  * even though they look like they should be static.
  43  */
  44 class NameUtil {
  45     protected boolean isPunct(char c) {
  46         return c == '-' || c == '.' || c == ':' || c == '_' || c == '\u00b7' || c == '\u0387' || c == '\u06dd' || c == '\u06de';
  47     }
  48 
  49     protected static boolean isDigit(char c) {
  50         return c >= '0' && c <= '9' || Character.isDigit(c);
  51     }
  52 
  53     protected static boolean isUpper(char c) {
  54         return c >= 'A' && c <= 'Z' || Character.isUpperCase(c);
  55     }
  56 
  57     protected static boolean isLower(char c) {
  58         return c >= 'a' && c <= 'z' || Character.isLowerCase(c);
  59     }
  60 
  61     protected boolean isLetter(char c) {
  62         return c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z' || Character.isLetter(c);
  63     }
  64 
  65     private String toLowerCase(String s)
  66     {
  67         return s.toLowerCase(Locale.ENGLISH);
  68     }
  69 
  70     private String toUpperCase(char c)
  71     {
  72         return String.valueOf(c).toUpperCase(Locale.ENGLISH);
  73     }
  74 
  75     private String toUpperCase(String s)
  76     {
  77         return s.toUpperCase(Locale.ENGLISH);
  78     }
  79 
  80     /**
  81      * Capitalizes the first character of the specified string,
  82      * and de-capitalize the rest of characters.
  83      */
  84     public String capitalize(String s) {
  85         if (!isLower(s.charAt(0)))
  86             return s;
  87         StringBuilder sb = new StringBuilder(s.length());
  88         sb.append(toUpperCase(s.charAt(0)));
  89         sb.append(toLowerCase(s.substring(1)));
  90         return sb.toString();
  91     }
  92 
  93     // Precondition: s[start] is not punctuation
  94     private int nextBreak(String s, int start) {
  95         int n = s.length();
  96 
  97         char c1 = s.charAt(start);
  98         int t1 = classify(c1);
  99 
 100         for (int i=start+1; i<n; i++) {
 101             // shift (c1,t1) into (c0,t0)
 102             // char c0 = c1;  --- conceptually, but c0 won't be used
 103             int t0 = t1;
 104 
 105             c1 = s.charAt(i);
 106             t1 = classify(c1);
 107 
 108             switch(actionTable[t0*5+t1]) {
 109             case ACTION_CHECK_PUNCT:
 110                 if(isPunct(c1)) return i;
 111                 break;
 112             case ACTION_CHECK_C2:
 113                 if (i < n-1) {
 114                     char c2 = s.charAt(i+1);
 115                     if (isLower(c2))
 116                         return i;
 117                 }
 118                 break;
 119             case ACTION_BREAK:
 120                 return i;
 121             }
 122         }
 123         return -1;
 124     }
 125 
 126     // the 5-category classification that we use in this code
 127     // to find work breaks
 128     static protected final int UPPER_LETTER = 0;
 129     static protected final int LOWER_LETTER = 1;
 130     static protected final int OTHER_LETTER = 2;
 131     static protected final int DIGIT = 3;
 132     static protected final int OTHER = 4;
 133 
 134     /**
 135      * Look up table for actions.
 136      * type0*5+type1 would yield the action to be taken.
 137      */
 138     private static final byte[] actionTable = new byte[5*5];
 139 
 140     // action constants. see nextBreak for the meaning
 141     static private final byte ACTION_CHECK_PUNCT = 0;
 142     static private final byte ACTION_CHECK_C2 = 1;
 143     static private final byte ACTION_BREAK = 2;
 144     static private final byte ACTION_NOBREAK = 3;
 145 
 146     /**
 147      * Decide the action to be taken given
 148      * the classification of the preceding character 't0' and
 149      * the classification of the next character 't1'.
 150      */
 151     private static byte decideAction( int t0, int t1 ) {
 152         if(t0==OTHER && t1==OTHER)  return ACTION_CHECK_PUNCT;
 153         if(!xor(t0==DIGIT,t1==DIGIT))  return ACTION_BREAK;
 154         if(t0==LOWER_LETTER && t1!=LOWER_LETTER)    return ACTION_BREAK;
 155         if(!xor(t0<=OTHER_LETTER,t1<=OTHER_LETTER)) return ACTION_BREAK;
 156         if(!xor(t0==OTHER_LETTER,t1==OTHER_LETTER)) return ACTION_BREAK;
 157 
 158         if(t0==UPPER_LETTER && t1==UPPER_LETTER)    return ACTION_CHECK_C2;
 159 
 160         return ACTION_NOBREAK;
 161     }
 162 
 163     private static boolean xor(boolean x,boolean y) {
 164         return (x&&y) || (!x&&!y);
 165     }
 166 
 167     static {
 168         // initialize the action table
 169         for( int t0=0; t0<5; t0++ )
 170             for( int t1=0; t1<5; t1++ )
 171                 actionTable[t0*5+t1] = decideAction(t0,t1);
 172     }
 173 
 174     /**
 175      * Classify a character into 5 categories that determine the word break.
 176      */
 177     protected int classify(char c0) {
 178         switch(Character.getType(c0)) {
 179         case Character.UPPERCASE_LETTER:        return UPPER_LETTER;
 180         case Character.LOWERCASE_LETTER:        return LOWER_LETTER;
 181         case Character.TITLECASE_LETTER:
 182         case Character.MODIFIER_LETTER:
 183         case Character.OTHER_LETTER:            return OTHER_LETTER;
 184         case Character.DECIMAL_DIGIT_NUMBER:    return DIGIT;
 185         default:                                return OTHER;
 186         }
 187     }
 188 
 189 
 190     /**
 191      * Tokenizes a string into words and capitalizes the first
 192      * character of each word.
 193      *
 194      * <p>
 195      * This method uses a change in character type as a splitter
 196      * of two words. For example, "abc100ghi" will be splitted into
 197      * {"Abc", "100","Ghi"}.
 198      */
 199     public List<String> toWordList(String s) {
 200         ArrayList<String> ss = new ArrayList<String>();
 201         int n = s.length();
 202         for (int i = 0; i < n;) {
 203 
 204             // Skip punctuation
 205             while (i < n) {
 206                 if (!isPunct(s.charAt(i)))
 207                     break;
 208                 i++;
 209             }
 210             if (i >= n) break;
 211 
 212             // Find next break and collect word
 213             int b = nextBreak(s, i);
 214             String w = (b == -1) ? s.substring(i) : s.substring(i, b);
 215             ss.add(escape(capitalize(w)));
 216             if (b == -1) break;
 217             i = b;
 218         }
 219 
 220 //      we can't guarantee a valid Java identifier anyway,
 221 //      so there's not much point in rejecting things in this way.
 222 //        if (ss.size() == 0)
 223 //            throw new IllegalArgumentException("Zero-length identifier");
 224         return ss;
 225     }
 226 
 227     protected String toMixedCaseName(List<String> ss, boolean startUpper) {
 228         StringBuilder sb = new StringBuilder();
 229         if(!ss.isEmpty()) {
 230             sb.append(startUpper ? ss.get(0) : toLowerCase(ss.get(0)));
 231             for (int i = 1; i < ss.size(); i++)
 232                 sb.append(ss.get(i));
 233         }
 234         return sb.toString();
 235     }
 236 
 237     protected String toMixedCaseVariableName(String[] ss,
 238                                                   boolean startUpper,
 239                                                   boolean cdrUpper) {
 240         if (cdrUpper)
 241             for (int i = 1; i < ss.length; i++)
 242                 ss[i] = capitalize(ss[i]);
 243         StringBuilder sb = new StringBuilder();
 244         if( ss.length>0 ) {
 245             sb.append(startUpper ? ss[0] : toLowerCase(ss[0]));
 246             for (int i = 1; i < ss.length; i++)
 247                 sb.append(ss[i]);
 248         }
 249         return sb.toString();
 250     }
 251 
 252 
 253     /**
 254      * Formats a string into "THIS_KIND_OF_FORMAT_ABC_DEF".
 255      *
 256      * @return
 257      *      Always return a string but there's no guarantee that
 258      *      the generated code is a valid Java identifier.
 259      */
 260     public String toConstantName(String s) {
 261         return toConstantName(toWordList(s));
 262     }
 263 
 264     /**
 265      * Formats a string into "THIS_KIND_OF_FORMAT_ABC_DEF".
 266      *
 267      * @return
 268      *      Always return a string but there's no guarantee that
 269      *      the generated code is a valid Java identifier.
 270      */
 271     public String toConstantName(List<String> ss) {
 272         StringBuilder sb = new StringBuilder();
 273         if( !ss.isEmpty() ) {
 274             sb.append(toUpperCase(ss.get(0)));
 275             for (int i = 1; i < ss.size(); i++) {
 276                 sb.append('_');
 277                 sb.append(toUpperCase(ss.get(i)));
 278             }
 279         }
 280         return sb.toString();
 281     }
 282 
 283 
 284 
 285     /**
 286      * Escapes characters is the given string so that they can be
 287      * printed by only using US-ASCII characters.
 288      *
 289      * The escaped characters will be appended to the given
 290      * StringBuffer.
 291      *
 292      * @param sb
 293      *      StringBuffer that receives escaped string.
 294      * @param s
 295      *      String to be escaped. <code>s.substring(start)</code>
 296      *      will be escaped and copied to the string buffer.
 297      */
 298     public static void escape(StringBuilder sb, String s, int start) {
 299         int n = s.length();
 300         for (int i = start; i < n; i++) {
 301             char c = s.charAt(i);
 302             if (Character.isJavaIdentifierPart(c))
 303                 sb.append(c);
 304             else {
 305                 sb.append('_');
 306                 if (c <= '\u000f') sb.append("000");
 307                 else if (c <= '\u00ff') sb.append("00");
 308                 else if (c <= '\u0fff') sb.append('0');
 309                 sb.append(Integer.toString(c, 16));
 310             }
 311         }
 312     }
 313 
 314     /**
 315      * Escapes characters that are unusable as Java identifiers
 316      * by replacing unsafe characters with safe characters.
 317      */
 318     private static String escape(String s) {
 319         int n = s.length();
 320         for (int i = 0; i < n; i++)
 321             if (!Character.isJavaIdentifierPart(s.charAt(i))) {
 322                 StringBuilder sb = new StringBuilder(s.substring(0, i));
 323                 escape(sb, s, i);
 324                 return sb.toString();
 325             }
 326         return s;
 327     }
 328 }