1 /* 2 * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package com.sun.xml.internal.bind.api.impl; 27 28 import java.util.ArrayList; 29 import java.util.Collections; 30 import java.util.HashSet; 31 import java.util.List; 32 import java.util.Locale; 33 34 /** 35 * Methods that convert strings into various formats. 36 * 37 * <p> 38 * What JAX-RPC name binding tells us is that even such basic method 39 * like "isLetter" can be different depending on the situation. 40 * 41 * For this reason, a whole lot of methods are made non-static, 42 * even though they look like they should be static. 43 */ 44 class NameUtil { 45 protected boolean isPunct(char c) { 46 return c == '-' || c == '.' || c == ':' || c == '_' || c == '\u00b7' || c == '\u0387' || c == '\u06dd' || c == '\u06de'; 47 } 48 49 protected static boolean isDigit(char c) { 50 return c >= '0' && c <= '9' || Character.isDigit(c); 51 } 52 53 protected static boolean isUpper(char c) { 54 return c >= 'A' && c <= 'Z' || Character.isUpperCase(c); 55 } 56 57 protected static boolean isLower(char c) { 58 return c >= 'a' && c <= 'z' || Character.isLowerCase(c); 59 } 60 61 protected boolean isLetter(char c) { 62 return c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z' || Character.isLetter(c); 63 } 64 65 private String toLowerCase(String s) 66 { 67 return s.toLowerCase(Locale.ENGLISH); 68 } 69 70 private String toUpperCase(char c) 71 { 72 return String.valueOf(c).toUpperCase(Locale.ENGLISH); 73 } 74 75 private String toUpperCase(String s) 76 { 77 return s.toUpperCase(Locale.ENGLISH); 78 } 79 80 /** 81 * Capitalizes the first character of the specified string, 82 * and de-capitalize the rest of characters. 83 */ 84 public String capitalize(String s) { 85 if (!isLower(s.charAt(0))) 86 return s; 87 StringBuilder sb = new StringBuilder(s.length()); 88 sb.append(toUpperCase(s.charAt(0))); 89 sb.append(toLowerCase(s.substring(1))); 90 return sb.toString(); 91 } 92 93 // Precondition: s[start] is not punctuation 94 private int nextBreak(String s, int start) { 95 int n = s.length(); 96 97 char c1 = s.charAt(start); 98 int t1 = classify(c1); 99 100 for (int i=start+1; i<n; i++) { 101 // shift (c1,t1) into (c0,t0) 102 // char c0 = c1; --- conceptually, but c0 won't be used 103 int t0 = t1; 104 105 c1 = s.charAt(i); 106 t1 = classify(c1); 107 108 switch(actionTable[t0*5+t1]) { 109 case ACTION_CHECK_PUNCT: 110 if(isPunct(c1)) return i; 111 break; 112 case ACTION_CHECK_C2: 113 if (i < n-1) { 114 char c2 = s.charAt(i+1); 115 if (isLower(c2)) 116 return i; 117 } 118 break; 119 case ACTION_BREAK: 120 return i; 121 } 122 } 123 return -1; 124 } 125 126 // the 5-category classification that we use in this code 127 // to find work breaks 128 static protected final int UPPER_LETTER = 0; 129 static protected final int LOWER_LETTER = 1; 130 static protected final int OTHER_LETTER = 2; 131 static protected final int DIGIT = 3; 132 static protected final int OTHER = 4; 133 134 /** 135 * Look up table for actions. 136 * type0*5+type1 would yield the action to be taken. 137 */ 138 private static final byte[] actionTable = new byte[5*5]; 139 140 // action constants. see nextBreak for the meaning 141 static private final byte ACTION_CHECK_PUNCT = 0; 142 static private final byte ACTION_CHECK_C2 = 1; 143 static private final byte ACTION_BREAK = 2; 144 static private final byte ACTION_NOBREAK = 3; 145 146 /** 147 * Decide the action to be taken given 148 * the classification of the preceding character 't0' and 149 * the classification of the next character 't1'. 150 */ 151 private static byte decideAction( int t0, int t1 ) { 152 if(t0==OTHER && t1==OTHER) return ACTION_CHECK_PUNCT; 153 if(!xor(t0==DIGIT,t1==DIGIT)) return ACTION_BREAK; 154 if(t0==LOWER_LETTER && t1!=LOWER_LETTER) return ACTION_BREAK; 155 if(!xor(t0<=OTHER_LETTER,t1<=OTHER_LETTER)) return ACTION_BREAK; 156 if(!xor(t0==OTHER_LETTER,t1==OTHER_LETTER)) return ACTION_BREAK; 157 158 if(t0==UPPER_LETTER && t1==UPPER_LETTER) return ACTION_CHECK_C2; 159 160 return ACTION_NOBREAK; 161 } 162 163 private static boolean xor(boolean x,boolean y) { 164 return (x&&y) || (!x&&!y); 165 } 166 167 static { 168 // initialize the action table 169 for( int t0=0; t0<5; t0++ ) 170 for( int t1=0; t1<5; t1++ ) 171 actionTable[t0*5+t1] = decideAction(t0,t1); 172 } 173 174 /** 175 * Classify a character into 5 categories that determine the word break. 176 */ 177 protected int classify(char c0) { 178 switch(Character.getType(c0)) { 179 case Character.UPPERCASE_LETTER: return UPPER_LETTER; 180 case Character.LOWERCASE_LETTER: return LOWER_LETTER; 181 case Character.TITLECASE_LETTER: 182 case Character.MODIFIER_LETTER: 183 case Character.OTHER_LETTER: return OTHER_LETTER; 184 case Character.DECIMAL_DIGIT_NUMBER: return DIGIT; 185 default: return OTHER; 186 } 187 } 188 189 190 /** 191 * Tokenizes a string into words and capitalizes the first 192 * character of each word. 193 * 194 * <p> 195 * This method uses a change in character type as a splitter 196 * of two words. For example, "abc100ghi" will be splitted into 197 * {"Abc", "100","Ghi"}. 198 */ 199 public List<String> toWordList(String s) { 200 ArrayList<String> ss = new ArrayList<String>(); 201 int n = s.length(); 202 for (int i = 0; i < n;) { 203 204 // Skip punctuation 205 while (i < n) { 206 if (!isPunct(s.charAt(i))) 207 break; 208 i++; 209 } 210 if (i >= n) break; 211 212 // Find next break and collect word 213 int b = nextBreak(s, i); 214 String w = (b == -1) ? s.substring(i) : s.substring(i, b); 215 ss.add(escape(capitalize(w))); 216 if (b == -1) break; 217 i = b; 218 } 219 220 // we can't guarantee a valid Java identifier anyway, 221 // so there's not much point in rejecting things in this way. 222 // if (ss.size() == 0) 223 // throw new IllegalArgumentException("Zero-length identifier"); 224 return ss; 225 } 226 227 protected String toMixedCaseName(List<String> ss, boolean startUpper) { 228 StringBuilder sb = new StringBuilder(); 229 if(!ss.isEmpty()) { 230 sb.append(startUpper ? ss.get(0) : toLowerCase(ss.get(0))); 231 for (int i = 1; i < ss.size(); i++) 232 sb.append(ss.get(i)); 233 } 234 return sb.toString(); 235 } 236 237 protected String toMixedCaseVariableName(String[] ss, 238 boolean startUpper, 239 boolean cdrUpper) { 240 if (cdrUpper) 241 for (int i = 1; i < ss.length; i++) 242 ss[i] = capitalize(ss[i]); 243 StringBuilder sb = new StringBuilder(); 244 if( ss.length>0 ) { 245 sb.append(startUpper ? ss[0] : toLowerCase(ss[0])); 246 for (int i = 1; i < ss.length; i++) 247 sb.append(ss[i]); 248 } 249 return sb.toString(); 250 } 251 252 253 /** 254 * Formats a string into "THIS_KIND_OF_FORMAT_ABC_DEF". 255 * 256 * @return 257 * Always return a string but there's no guarantee that 258 * the generated code is a valid Java identifier. 259 */ 260 public String toConstantName(String s) { 261 return toConstantName(toWordList(s)); 262 } 263 264 /** 265 * Formats a string into "THIS_KIND_OF_FORMAT_ABC_DEF". 266 * 267 * @return 268 * Always return a string but there's no guarantee that 269 * the generated code is a valid Java identifier. 270 */ 271 public String toConstantName(List<String> ss) { 272 StringBuilder sb = new StringBuilder(); 273 if( !ss.isEmpty() ) { 274 sb.append(toUpperCase(ss.get(0))); 275 for (int i = 1; i < ss.size(); i++) { 276 sb.append('_'); 277 sb.append(toUpperCase(ss.get(i))); 278 } 279 } 280 return sb.toString(); 281 } 282 283 284 285 /** 286 * Escapes characters is the given string so that they can be 287 * printed by only using US-ASCII characters. 288 * 289 * The escaped characters will be appended to the given 290 * StringBuffer. 291 * 292 * @param sb 293 * StringBuffer that receives escaped string. 294 * @param s 295 * String to be escaped. <code>s.substring(start)</code> 296 * will be escaped and copied to the string buffer. 297 */ 298 public static void escape(StringBuilder sb, String s, int start) { 299 int n = s.length(); 300 for (int i = start; i < n; i++) { 301 char c = s.charAt(i); 302 if (Character.isJavaIdentifierPart(c)) 303 sb.append(c); 304 else { 305 sb.append('_'); 306 if (c <= '\u000f') sb.append("000"); 307 else if (c <= '\u00ff') sb.append("00"); 308 else if (c <= '\u0fff') sb.append('0'); 309 sb.append(Integer.toString(c, 16)); 310 } 311 } 312 } 313 314 /** 315 * Escapes characters that are unusable as Java identifiers 316 * by replacing unsafe characters with safe characters. 317 */ 318 private static String escape(String s) { 319 int n = s.length(); 320 for (int i = 0; i < n; i++) 321 if (!Character.isJavaIdentifierPart(s.charAt(i))) { 322 StringBuilder sb = new StringBuilder(s.substring(0, i)); 323 escape(sb, s, i); 324 return sb.toString(); 325 } 326 return s; 327 } 328 }