/* * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. Oracle designates this * particular file as subject to the "Classpath" exception as provided * by Oracle in the LICENSE file that accompanied this code. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. */ package com.sun.activation.registries; /** * A tokenizer for strings in the form of "foo/bar; prop1=val1; ... ". * Useful for parsing MIME content types. */ public class MailcapTokenizer { public static final int UNKNOWN_TOKEN = 0; public static final int START_TOKEN = 1; public static final int STRING_TOKEN = 2; public static final int EOI_TOKEN = 5; public static final int SLASH_TOKEN = '/'; public static final int SEMICOLON_TOKEN = ';'; public static final int EQUALS_TOKEN = '='; /** * Constructor * * @parameter inputString the string to tokenize */ public MailcapTokenizer(String inputString) { data = inputString; dataIndex = 0; dataLength = inputString.length(); currentToken = START_TOKEN; currentTokenValue = ""; isAutoquoting = false; autoquoteChar = ';'; } /** * Set whether auto-quoting is on or off. * * Auto-quoting means that all characters after the first * non-whitespace, non-control character up to the auto-quote * terminator character or EOI (minus any whitespace immediatley * preceeding it) is considered a token. * * This is required for handling command strings in a mailcap entry. */ public void setIsAutoquoting(boolean value) { isAutoquoting = value; } /** * Retrieve current token. * * @returns The current token value */ public int getCurrentToken() { return currentToken; } /* * Get a String that describes the given token. */ public static String nameForToken(int token) { String name = "really unknown"; switch(token) { case UNKNOWN_TOKEN: name = "unknown"; break; case START_TOKEN: name = "start"; break; case STRING_TOKEN: name = "string"; break; case EOI_TOKEN: name = "EOI"; break; case SLASH_TOKEN: name = "'/'"; break; case SEMICOLON_TOKEN: name = "';'"; break; case EQUALS_TOKEN: name = "'='"; break; } return name; } /* * Retrieve current token value. * * @returns A String containing the current token value */ public String getCurrentTokenValue() { return currentTokenValue; } /* * Process the next token. * * @returns the next token */ public int nextToken() { if (dataIndex < dataLength) { // skip white space while ((dataIndex < dataLength) && (isWhiteSpaceChar(data.charAt(dataIndex)))) { ++dataIndex; } if (dataIndex < dataLength) { // examine the current character and see what kind of token we have char c = data.charAt(dataIndex); if (isAutoquoting) { if (c == ';' || c == '=') { currentToken = c; currentTokenValue = new Character(c).toString(); ++dataIndex; } else { processAutoquoteToken(); } } else { if (isStringTokenChar(c)) { processStringToken(); } else if ((c == '/') || (c == ';') || (c == '=')) { currentToken = c; currentTokenValue = new Character(c).toString(); ++dataIndex; } else { currentToken = UNKNOWN_TOKEN; currentTokenValue = new Character(c).toString(); ++dataIndex; } } } else { currentToken = EOI_TOKEN; currentTokenValue = null; } } else { currentToken = EOI_TOKEN; currentTokenValue = null; } return currentToken; } private void processStringToken() { // capture the initial index int initialIndex = dataIndex; // skip to 1st non string token character while ((dataIndex < dataLength) && isStringTokenChar(data.charAt(dataIndex))) { ++dataIndex; } currentToken = STRING_TOKEN; currentTokenValue = data.substring(initialIndex, dataIndex); } private void processAutoquoteToken() { // capture the initial index int initialIndex = dataIndex; // now skip to the 1st non-escaped autoquote termination character // XXX - doesn't actually consider escaping boolean foundTerminator = false; while ((dataIndex < dataLength) && !foundTerminator) { char c = data.charAt(dataIndex); if (c != autoquoteChar) { ++dataIndex; } else { foundTerminator = true; } } currentToken = STRING_TOKEN; currentTokenValue = fixEscapeSequences(data.substring(initialIndex, dataIndex)); } private static boolean isSpecialChar(char c) { boolean lAnswer = false; switch(c) { case '(': case ')': case '<': case '>': case '@': case ',': case ';': case ':': case '\\': case '"': case '/': case '[': case ']': case '?': case '=': lAnswer = true; break; } return lAnswer; } private static boolean isControlChar(char c) { return Character.isISOControl(c); } private static boolean isWhiteSpaceChar(char c) { return Character.isWhitespace(c); } private static boolean isStringTokenChar(char c) { return !isSpecialChar(c) && !isControlChar(c) && !isWhiteSpaceChar(c); } private static String fixEscapeSequences(String inputString) { int inputLength = inputString.length(); StringBuffer buffer = new StringBuffer(); buffer.ensureCapacity(inputLength); for (int i = 0; i < inputLength; ++i) { char currentChar = inputString.charAt(i); if (currentChar != '\\') { buffer.append(currentChar); } else { if (i < inputLength - 1) { char nextChar = inputString.charAt(i + 1); buffer.append(nextChar); // force a skip over the next character too ++i; } else { buffer.append(currentChar); } } } return buffer.toString(); } private String data; private int dataIndex; private int dataLength; private int currentToken; private String currentTokenValue; private boolean isAutoquoting; private char autoquoteChar; /* public static void main(String[] args) { for (int i = 0; i < args.length; ++i) { MailcapTokenizer tokenizer = new MailcapTokenizer(args[i]); System.out.println("Original: |" + args[i] + "|"); int currentToken = tokenizer.nextToken(); while (currentToken != EOI_TOKEN) { switch(currentToken) { case UNKNOWN_TOKEN: System.out.println(" Unknown Token: |" + tokenizer.getCurrentTokenValue() + "|"); break; case START_TOKEN: System.out.println(" Start Token: |" + tokenizer.getCurrentTokenValue() + "|"); break; case STRING_TOKEN: System.out.println(" String Token: |" + tokenizer.getCurrentTokenValue() + "|"); break; case EOI_TOKEN: System.out.println(" EOI Token: |" + tokenizer.getCurrentTokenValue() + "|"); break; case SLASH_TOKEN: System.out.println(" Slash Token: |" + tokenizer.getCurrentTokenValue() + "|"); break; case SEMICOLON_TOKEN: System.out.println(" Semicolon Token: |" + tokenizer.getCurrentTokenValue() + "|"); break; case EQUALS_TOKEN: System.out.println(" Equals Token: |" + tokenizer.getCurrentTokenValue() + "|"); break; default: System.out.println(" Really Unknown Token: |" + tokenizer.getCurrentTokenValue() + "|"); break; } currentToken = tokenizer.nextToken(); } System.out.println(""); } } */ }