1 /* 2 * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package com.sun.activation.registries; 27 28 /** 29 * A tokenizer for strings in the form of "foo/bar; prop1=val1; ... ". 30 * Useful for parsing MIME content types. 31 */ 32 public class MailcapTokenizer { 33 34 public static final int UNKNOWN_TOKEN = 0; 35 public static final int START_TOKEN = 1; 36 public static final int STRING_TOKEN = 2; 37 public static final int EOI_TOKEN = 5; 38 public static final int SLASH_TOKEN = '/'; 39 public static final int SEMICOLON_TOKEN = ';'; 40 public static final int EQUALS_TOKEN = '='; 41 42 /** 43 * Constructor 44 * 45 * @parameter inputString the string to tokenize 46 */ 47 public MailcapTokenizer(String inputString) { 48 data = inputString; 49 dataIndex = 0; 50 dataLength = inputString.length(); 51 52 currentToken = START_TOKEN; 53 currentTokenValue = ""; 54 55 isAutoquoting = false; 56 autoquoteChar = ';'; 57 } 58 59 /** 60 * Set whether auto-quoting is on or off. 61 * 62 * Auto-quoting means that all characters after the first 63 * non-whitespace, non-control character up to the auto-quote 64 * terminator character or EOI (minus any whitespace immediatley 65 * preceeding it) is considered a token. 66 * 67 * This is required for handling command strings in a mailcap entry. 68 */ 69 public void setIsAutoquoting(boolean value) { 70 isAutoquoting = value; 71 } 72 73 /** 74 * Retrieve current token. 75 * 76 * @returns The current token value 77 */ 78 public int getCurrentToken() { 79 return currentToken; 80 } 81 82 /* 83 * Get a String that describes the given token. 84 */ 85 public static String nameForToken(int token) { 86 String name = "really unknown"; 87 88 switch(token) { 89 case UNKNOWN_TOKEN: 90 name = "unknown"; 91 break; 92 case START_TOKEN: 93 name = "start"; 94 break; 95 case STRING_TOKEN: 96 name = "string"; 97 break; 98 case EOI_TOKEN: 99 name = "EOI"; 100 break; 101 case SLASH_TOKEN: 102 name = "'/'"; 103 break; 104 case SEMICOLON_TOKEN: 105 name = "';'"; 106 break; 107 case EQUALS_TOKEN: 108 name = "'='"; 109 break; 110 } 111 112 return name; 113 } 114 115 /* 116 * Retrieve current token value. 117 * 118 * @returns A String containing the current token value 119 */ 120 public String getCurrentTokenValue() { 121 return currentTokenValue; 122 } 123 /* 124 * Process the next token. 125 * 126 * @returns the next token 127 */ 128 public int nextToken() { 129 if (dataIndex < dataLength) { 130 // skip white space 131 while ((dataIndex < dataLength) && 132 (isWhiteSpaceChar(data.charAt(dataIndex)))) { 133 ++dataIndex; 134 } 135 136 if (dataIndex < dataLength) { 137 // examine the current character and see what kind of token we have 138 char c = data.charAt(dataIndex); 139 if (isAutoquoting) { 140 if (c == ';' || c == '=') { 141 currentToken = c; 142 currentTokenValue = new Character(c).toString(); 143 ++dataIndex; 144 } else { 145 processAutoquoteToken(); 146 } 147 } else { 148 if (isStringTokenChar(c)) { 149 processStringToken(); 150 } else if ((c == '/') || (c == ';') || (c == '=')) { 151 currentToken = c; 152 currentTokenValue = new Character(c).toString(); 153 ++dataIndex; 154 } else { 155 currentToken = UNKNOWN_TOKEN; 156 currentTokenValue = new Character(c).toString(); 157 ++dataIndex; 158 } 159 } 160 } else { 161 currentToken = EOI_TOKEN; 162 currentTokenValue = null; 163 } 164 } else { 165 currentToken = EOI_TOKEN; 166 currentTokenValue = null; 167 } 168 169 return currentToken; 170 } 171 172 private void processStringToken() { 173 // capture the initial index 174 int initialIndex = dataIndex; 175 176 // skip to 1st non string token character 177 while ((dataIndex < dataLength) && 178 isStringTokenChar(data.charAt(dataIndex))) { 179 ++dataIndex; 180 } 181 182 currentToken = STRING_TOKEN; 183 currentTokenValue = data.substring(initialIndex, dataIndex); 184 } 185 186 private void processAutoquoteToken() { 187 // capture the initial index 188 int initialIndex = dataIndex; 189 190 // now skip to the 1st non-escaped autoquote termination character 191 // XXX - doesn't actually consider escaping 192 boolean foundTerminator = false; 193 while ((dataIndex < dataLength) && !foundTerminator) { 194 char c = data.charAt(dataIndex); 195 if (c != autoquoteChar) { 196 ++dataIndex; 197 } else { 198 foundTerminator = true; 199 } 200 } 201 202 currentToken = STRING_TOKEN; 203 currentTokenValue = 204 fixEscapeSequences(data.substring(initialIndex, dataIndex)); 205 } 206 207 private static boolean isSpecialChar(char c) { 208 boolean lAnswer = false; 209 210 switch(c) { 211 case '(': 212 case ')': 213 case '<': 214 case '>': 215 case '@': 216 case ',': 217 case ';': 218 case ':': 219 case '\\': 220 case '"': 221 case '/': 222 case '[': 223 case ']': 224 case '?': 225 case '=': 226 lAnswer = true; 227 break; 228 } 229 230 return lAnswer; 231 } 232 233 private static boolean isControlChar(char c) { 234 return Character.isISOControl(c); 235 } 236 237 private static boolean isWhiteSpaceChar(char c) { 238 return Character.isWhitespace(c); 239 } 240 241 private static boolean isStringTokenChar(char c) { 242 return !isSpecialChar(c) && !isControlChar(c) && !isWhiteSpaceChar(c); 243 } 244 245 private static String fixEscapeSequences(String inputString) { 246 int inputLength = inputString.length(); 247 StringBuffer buffer = new StringBuffer(); 248 buffer.ensureCapacity(inputLength); 249 250 for (int i = 0; i < inputLength; ++i) { 251 char currentChar = inputString.charAt(i); 252 if (currentChar != '\\') { 253 buffer.append(currentChar); 254 } else { 255 if (i < inputLength - 1) { 256 char nextChar = inputString.charAt(i + 1); 257 buffer.append(nextChar); 258 259 // force a skip over the next character too 260 ++i; 261 } else { 262 buffer.append(currentChar); 263 } 264 } 265 } 266 267 return buffer.toString(); 268 } 269 270 private String data; 271 private int dataIndex; 272 private int dataLength; 273 private int currentToken; 274 private String currentTokenValue; 275 private boolean isAutoquoting; 276 private char autoquoteChar; 277 278 /* 279 public static void main(String[] args) { 280 for (int i = 0; i < args.length; ++i) { 281 MailcapTokenizer tokenizer = new MailcapTokenizer(args[i]); 282 283 System.out.println("Original: |" + args[i] + "|"); 284 285 int currentToken = tokenizer.nextToken(); 286 while (currentToken != EOI_TOKEN) { 287 switch(currentToken) { 288 case UNKNOWN_TOKEN: 289 System.out.println(" Unknown Token: |" + tokenizer.getCurrentTokenValue() + "|"); 290 break; 291 case START_TOKEN: 292 System.out.println(" Start Token: |" + tokenizer.getCurrentTokenValue() + "|"); 293 break; 294 case STRING_TOKEN: 295 System.out.println(" String Token: |" + tokenizer.getCurrentTokenValue() + "|"); 296 break; 297 case EOI_TOKEN: 298 System.out.println(" EOI Token: |" + tokenizer.getCurrentTokenValue() + "|"); 299 break; 300 case SLASH_TOKEN: 301 System.out.println(" Slash Token: |" + tokenizer.getCurrentTokenValue() + "|"); 302 break; 303 case SEMICOLON_TOKEN: 304 System.out.println(" Semicolon Token: |" + tokenizer.getCurrentTokenValue() + "|"); 305 break; 306 case EQUALS_TOKEN: 307 System.out.println(" Equals Token: |" + tokenizer.getCurrentTokenValue() + "|"); 308 break; 309 default: 310 System.out.println(" Really Unknown Token: |" + tokenizer.getCurrentTokenValue() + "|"); 311 break; 312 } 313 314 currentToken = tokenizer.nextToken(); 315 } 316 317 System.out.println(""); 318 } 319 } 320 */ 321 }