/* * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. Oracle designates this * particular file as subject to the "Classpath" exception as provided * by Oracle in the LICENSE file that accompanied this code. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. */ /* * @(#)MimeUtility.java 1.45 03/03/10 */ package com.sun.xml.internal.messaging.saaj.packaging.mime.internet; import java.io.*; import java.util.*; import javax.activation.DataHandler; import javax.activation.DataSource; import com.sun.xml.internal.messaging.saaj.packaging.mime.MessagingException; import com.sun.xml.internal.messaging.saaj.packaging.mime.util.*; import com.sun.xml.internal.messaging.saaj.util.SAAJUtil; /** * This is a utility class that provides various MIME related * functionality.
* * There are a set of methods to encode and decode MIME headers as * per RFC 2047. A brief description on handling such headers is * given below:
* * RFC 822 mail headers must contain only US-ASCII * characters. Headers that contain non US-ASCII characters must be * encoded so that they contain only US-ASCII characters. Basically, * this process involves using either BASE64 or QP to encode certain * characters. RFC 2047 describes this in detail.
* * In Java, Strings contain (16 bit) Unicode characters. ASCII is a * subset of Unicode (and occupies the range 0 - 127). A String * that contains only ASCII characters is already mail-safe. If the * String contains non US-ASCII characters, it must be encoded. An * additional complexity in this step is that since Unicode is not * yet a widely used charset, one might want to first charset-encode * the String into another charset and then do the transfer-encoding. *
* Note that to get the actual bytes of a mail-safe String (say, * for sending over SMTP), one must do *
* * byte[] bytes = string.getBytes("iso-8859-1"); * *
*
* The setHeader
and addHeader
methods
* on MimeMessage and MimeBodyPart assume that the given header values
* are Unicode strings that contain only US-ASCII characters. Hence
* the callers of those methods must insure that the values they pass
* do not contain non US-ASCII characters. The methods in this class
* help do this.
*
* The getHeader
family of methods on MimeMessage and
* MimeBodyPart return the raw header value. These might be encoded
* as per RFC 2047, and if so, must be decoded into Unicode Strings.
* The methods in this class help to do this.
* * Several System properties control strict conformance to the MIME * spec. Note that these are not session properties but must be set * globally as System properties.
*
* The mail.mime.decodetext.strict
property controls
* decoding of MIME encoded words. The MIME spec requires that encoded
* words start at the beginning of a whitespace separated word. Some
* mailers incorrectly include encoded words in the middle of a word.
* If the mail.mime.decodetext.strict
System property is
* set to "false"
, an attempt will be made to decode these
* illegal encoded words. The default is true.
*
* The mail.mime.encodeeol.strict
property controls the
* choice of Content-Transfer-Encoding for MIME parts that are not of
* type "text". Often such parts will contain textual data for which
* an encoding that allows normal end of line conventions is appropriate.
* In rare cases, such a part will appear to contain entirely textual
* data, but will require an encoding that preserves CR and LF characters
* without change. If the mail.mime.decodetext.strict
* System property is set to "true"
, such an encoding will
* be used when necessary. The default is false.
*
* In addition, the mail.mime.charset
System property can
* be used to specify the default MIME charset to use for encoded words
* and text parts that don't otherwise specify a charset. Normally, the
* default MIME charset is derived from the default Java charset, as
* specified in the file.encoding
System property. Most
* applications will have no need to explicitly set the default MIME
* charset. In cases where the default MIME charset to be used for
* mail messages is different than the charset used for files stored on
* the system, this property should be set.
*
* @version 1.45, 03/03/10
* @author John Mani
* @author Bill Shannon
*/
public class MimeUtility {
// This class cannot be instantiated
private MimeUtility() { }
public static final int ALL = -1;
private static final int BUFFER_SIZE = 1024;
private static boolean decodeStrict = true;
private static boolean encodeEolStrict = false;
private static boolean foldEncodedWords = false;
private static boolean foldText = true;
static {
try {
String s = SAAJUtil.getSystemProperty("mail.mime.decodetext.strict");
// default to true
decodeStrict = s == null || !s.equalsIgnoreCase("false");
s = SAAJUtil.getSystemProperty("mail.mime.encodeeol.strict");
// default to false
encodeEolStrict = s != null && s.equalsIgnoreCase("true");
s = SAAJUtil.getSystemProperty("mail.mime.foldencodedwords");
// default to false
foldEncodedWords = s != null && s.equalsIgnoreCase("true");
s = SAAJUtil.getSystemProperty("mail.mime.foldtext");
// default to true
foldText = s == null || !s.equalsIgnoreCase("false");
} catch (SecurityException sex) {
// ignore it
}
}
/**
* Get the content-transfer-encoding that should be applied
* to the input stream of this datasource, to make it mailsafe.
*
* The algorithm used here is:
*
getEncoding(DataSource)
except that instead
* of reading the data from an InputStream
it uses the
* writeTo
method to examine the data. This is more
* efficient in the common case of a DataHandler
* created with an object and a MIME type (for example, a
* "text/plain" String) because all the I/O is done in this
* thread. In the case requiring an InputStream
the
* DataHandler
uses a thread, a pair of pipe streams,
* and the writeTo
method to produce the data.
*
* @since JavaMail 1.2
*/
public static String getEncoding(DataHandler dh) {
ContentType cType = null;
String encoding = null;
/*
* Try to pick the most efficient means of determining the
* encoding. If this DataHandler was created using a DataSource,
* the getEncoding(DataSource) method is typically faster. If
* the DataHandler was created with an object, this method is
* much faster. To distinguish the two cases, we use a heuristic.
* A DataHandler created with an object will always have a null name.
* A DataHandler created with a DataSource will usually have a
* non-null name.
*
* XXX - This is actually quite a disgusting hack, but it makes
* a common case run over twice as fast.
*/
if (dh.getName() != null)
return getEncoding(dh.getDataSource());
try {
cType = new ContentType(dh.getContentType());
} catch (Exception ex) {
return "base64"; // what else ?!
}
if (cType.match("text/*")) {
// Check all of the available bytes
AsciiOutputStream aos = new AsciiOutputStream(false, false);
try {
dh.writeTo(aos);
} catch (IOException ex) { } // ignore it
switch (aos.getAscii()) {
case ALL_ASCII:
encoding = "7bit"; // all ascii
break;
case MOSTLY_ASCII:
encoding = "quoted-printable"; // mostly ascii
break;
default:
encoding = "base64"; // mostly binary
break;
}
} else { // not "text"
// Check all of available bytes, break out if we find
// at least one non-US-ASCII character
AsciiOutputStream aos =
new AsciiOutputStream(true, encodeEolStrict);
try {
dh.writeTo(aos);
} catch (IOException ex) { } // ignore it
if (aos.getAscii() == ALL_ASCII) // all ascii
encoding = "7bit";
else // found atleast one non-ascii character, use b64
encoding = "base64";
}
return encoding;
}
/**
* Decode the given input stream. The Input stream returned is
* the decoded input stream. All the encodings defined in RFC 2045
* are supported here. They include "base64", "quoted-printable",
* "7bit", "8bit", and "binary". In addition, "uuencode" is also
* supported.
*
* @param is input stream
* @param encoding the encoding of the stream.
* @return decoded input stream.
*/
public static InputStream decode(InputStream is, String encoding)
throws MessagingException {
if (encoding.equalsIgnoreCase("base64"))
return new BASE64DecoderStream(is);
else if (encoding.equalsIgnoreCase("quoted-printable"))
return new QPDecoderStream(is);
else if (encoding.equalsIgnoreCase("uuencode") ||
encoding.equalsIgnoreCase("x-uuencode") ||
encoding.equalsIgnoreCase("x-uue"))
return new UUDecoderStream(is);
else if (encoding.equalsIgnoreCase("binary") ||
encoding.equalsIgnoreCase("7bit") ||
encoding.equalsIgnoreCase("8bit"))
return is;
else
throw new MessagingException("Unknown encoding: " + encoding);
}
/**
* Wrap an encoder around the given output stream.
* All the encodings defined in RFC 2045 are supported here.
* They include "base64", "quoted-printable", "7bit", "8bit" and
* "binary". In addition, "uuencode" is also supported.
*
* @param os output stream
* @param encoding the encoding of the stream.
* @return output stream that applies the
* specified encoding.
*/
public static OutputStream encode(OutputStream os, String encoding)
throws MessagingException {
if (encoding == null)
return os;
else if (encoding.equalsIgnoreCase("base64"))
return new BASE64EncoderStream(os);
else if (encoding.equalsIgnoreCase("quoted-printable"))
return new QPEncoderStream(os);
else if (encoding.equalsIgnoreCase("uuencode") ||
encoding.equalsIgnoreCase("x-uuencode") ||
encoding.equalsIgnoreCase("x-uue"))
return new UUEncoderStream(os);
else if (encoding.equalsIgnoreCase("binary") ||
encoding.equalsIgnoreCase("7bit") ||
encoding.equalsIgnoreCase("8bit"))
return os;
else
throw new MessagingException("Unknown encoding: " +encoding);
}
/**
* Wrap an encoder around the given output stream.
* All the encodings defined in RFC 2045 are supported here.
* They include "base64", "quoted-printable", "7bit", "8bit" and
* "binary". In addition, "uuencode" is also supported.
* The filename
parameter is used with the "uuencode"
* encoding and is included in the encoded output.
*
* @param os output stream
* @param encoding the encoding of the stream.
* @param filename name for the file being encoded (only used
* with uuencode)
* @return output stream that applies the
* specified encoding.
* @since JavaMail 1.2
*/
public static OutputStream encode(OutputStream os, String encoding,
String filename)
throws MessagingException {
if (encoding == null)
return os;
else if (encoding.equalsIgnoreCase("base64"))
return new BASE64EncoderStream(os);
else if (encoding.equalsIgnoreCase("quoted-printable"))
return new QPEncoderStream(os);
else if (encoding.equalsIgnoreCase("uuencode") ||
encoding.equalsIgnoreCase("x-uuencode") ||
encoding.equalsIgnoreCase("x-uue"))
return new UUEncoderStream(os, filename);
else if (encoding.equalsIgnoreCase("binary") ||
encoding.equalsIgnoreCase("7bit") ||
encoding.equalsIgnoreCase("8bit"))
return os;
else
throw new MessagingException("Unknown encoding: " +encoding);
}
/**
* Encode a RFC 822 "text" token into mail-safe form as per
* RFC 2047.
* * The given Unicode string is examined for non US-ASCII * characters. If the string contains only US-ASCII characters, * it is returned as-is. If the string contains non US-ASCII * characters, it is first character-encoded using the platform's * default charset, then transfer-encoded using either the B or * Q encoding. The resulting bytes are then returned as a Unicode * string containing only ASCII characters.
* * Note that this method should be used to encode only * "unstructured" RFC 822 headers.
* * Example of usage: *
* * MimeBodyPart part = ... * String rawvalue = "FooBar Mailer, Japanese version 1.1" * try { * // If we know for sure that rawvalue contains only US-ASCII * // characters, we can skip the encoding part * part.setHeader("X-mailer", MimeUtility.encodeText(rawvalue)); * } catch (UnsupportedEncodingException e) { * // encoding failure * } catch (MessagingException me) { * // setHeader() failure * } * *
* * @param text unicode string * @return Unicode string containing only US-ASCII characters * @exception UnsupportedEncodingException if the encoding fails */ public static String encodeText(String text) throws UnsupportedEncodingException { return encodeText(text, null, null); } /** * Encode a RFC 822 "text" token into mail-safe form as per * RFC 2047.
* * The given Unicode string is examined for non US-ASCII * characters. If the string contains only US-ASCII characters, * it is returned as-is. If the string contains non US-ASCII * characters, it is first character-encoded using the specified * charset, then transfer-encoded using either the B or Q encoding. * The resulting bytes are then returned as a Unicode string * containing only ASCII characters.
* * Note that this method should be used to encode only * "unstructured" RFC 822 headers. * * @param text the header value * @param charset the charset. If this parameter is null, the * platform's default chatset is used. * @param encoding the encoding to be used. Currently supported * values are "B" and "Q". If this parameter is null, then * the "Q" encoding is used if most of characters to be * encoded are in the ASCII charset, otherwise "B" encoding * is used. * @return Unicode string containing only US-ASCII characters */ public static String encodeText(String text, String charset, String encoding) throws UnsupportedEncodingException { return encodeWord(text, charset, encoding, false); } /** * Decode "unstructured" headers, that is, headers that are defined * as '*text' as per RFC 822.
* * The string is decoded using the algorithm specified in * RFC 2047, Section 6.1.1. If the charset-conversion fails * for any sequence, an UnsupportedEncodingException is thrown. * If the String is not an RFC 2047 style encoded header, it is * returned as-is
* * Example of usage: *
* * MimeBodyPart part = ... * String rawvalue = null; * String value = null; * try { * if ((rawvalue = part.getHeader("X-mailer")[0]) != null) * value = MimeUtility.decodeText(rawvalue); * } catch (UnsupportedEncodingException e) { * // Don't care * value = rawvalue; * } catch (MessagingException me) { } * * return value; * *
* * @param etext the possibly encoded value * @exception UnsupportedEncodingException if the charset * conversion failed. */ public static String decodeText(String etext) throws UnsupportedEncodingException { /* * We look for sequences separated by "linear-white-space". * (as per RFC 2047, Section 6.1.1) * RFC 822 defines "linear-white-space" as SPACE | HT | CR | NL. */ String lwsp = " \t\n\r"; StringTokenizer st; /* * First, lets do a quick run thru the string and check * whether the sequence "=?" exists at all. If none exists, * we know there are no encoded-words in here and we can just * return the string as-is, without suffering thru the later * decoding logic. * This handles the most common case of unencoded headers * efficiently. */ if (etext.indexOf("=?") == -1) return etext; // Encoded words found. Start decoding ... st = new StringTokenizer(etext, lwsp, true); StringBuilder sb = new StringBuilder(); // decode buffer StringBuilder wsb = new StringBuilder(); // white space buffer boolean prevWasEncoded = false; while (st.hasMoreTokens()) { char c; String s = st.nextToken(); // If whitespace, append it to the whitespace buffer if (((c = s.charAt(0)) == ' ') || (c == '\t') || (c == '\r') || (c == '\n')) wsb.append(c); else { // Check if token is an 'encoded-word' .. String word; try { word = decodeWord(s); // Yes, this IS an 'encoded-word'. if (!prevWasEncoded && wsb.length() > 0) { // if the previous word was also encoded, we // should ignore the collected whitespace. Else // we include the whitespace as well. sb.append(wsb); } prevWasEncoded = true; } catch (ParseException pex) { // This is NOT an 'encoded-word'. word = s; // possibly decode inner encoded words if (!decodeStrict) word = decodeInnerWords(word); // include colleced whitespace .. if (wsb.length() > 0) sb.append(wsb); prevWasEncoded = false; } sb.append(word); // append the actual word wsb.setLength(0); // reset wsb for reuse } } return sb.toString(); } /** * Encode a RFC 822 "word" token into mail-safe form as per * RFC 2047.
* * The given Unicode string is examined for non US-ASCII * characters. If the string contains only US-ASCII characters, * it is returned as-is. If the string contains non US-ASCII * characters, it is first character-encoded using the platform's * default charset, then transfer-encoded using either the B or * Q encoding. The resulting bytes are then returned as a Unicode * string containing only ASCII characters.
* * This method is meant to be used when creating RFC 822 "phrases". * The InternetAddress class, for example, uses this to encode * it's 'phrase' component. * * @param text unicode string * @return Array of Unicode strings containing only US-ASCII * characters. * @exception UnsupportedEncodingException if the encoding fails */ public static String encodeWord(String word) throws UnsupportedEncodingException { return encodeWord(word, null, null); } /** * Encode a RFC 822 "word" token into mail-safe form as per * RFC 2047.
* * The given Unicode string is examined for non US-ASCII * characters. If the string contains only US-ASCII characters, * it is returned as-is. If the string contains non US-ASCII * characters, it is first character-encoded using the specified * charset, then transfer-encoded using either the B or Q encoding. * The resulting bytes are then returned as a Unicode string * containing only ASCII characters.
* * @param text unicode string * @param charset the MIME charset * @param encoding the encoding to be used. Currently supported * values are "B" and "Q". If this parameter is null, then * the "Q" encoding is used if most of characters to be * encoded are in the ASCII charset, otherwise "B" encoding * is used. * @return Unicode string containing only US-ASCII characters * @exception UnsupportedEncodingException if the encoding fails */ public static String encodeWord(String word, String charset, String encoding) throws UnsupportedEncodingException { return encodeWord(word, charset, encoding, true); } /* * Encode the given string. The parameter 'encodingWord' should * be true if a RFC 822 "word" token is being encoded and false if a * RFC 822 "text" token is being encoded. This is because the * "Q" encoding defined in RFC 2047 has more restrictions when * encoding "word" tokens. (Sigh) */ private static String encodeWord(String string, String charset, String encoding, boolean encodingWord) throws UnsupportedEncodingException { // If 'string' contains only US-ASCII characters, just // return it. int ascii = checkAscii(string); if (ascii == ALL_ASCII) return string; // Else, apply the specified charset conversion. String jcharset; if (charset == null) { // use default charset jcharset = getDefaultJavaCharset(); // the java charset charset = getDefaultMIMECharset(); // the MIME equivalent } else // MIME charset -> java charset jcharset = javaCharset(charset); // If no transfer-encoding is specified, figure one out. if (encoding == null) { if (ascii != MOSTLY_NONASCII) encoding = "Q"; else encoding = "B"; } boolean b64; if (encoding.equalsIgnoreCase("B")) b64 = true; else if (encoding.equalsIgnoreCase("Q")) b64 = false; else throw new UnsupportedEncodingException( "Unknown transfer encoding: " + encoding); StringBuilder outb = new StringBuilder(); // the output buffer doEncode(string, b64, jcharset, // As per RFC 2047, size of an encoded string should not // exceed 75 bytes. // 7 = size of "=?", '?', 'B'/'Q', '?', "?=" 75 - 7 - charset.length(), // the available space "=?" + charset + "?" + encoding + "?", // prefix true, encodingWord, outb); return outb.toString(); } private static void doEncode(String string, boolean b64, String jcharset, int avail, String prefix, boolean first, boolean encodingWord, StringBuilder buf) throws UnsupportedEncodingException { // First find out what the length of the encoded version of // 'string' would be. byte[] bytes = string.getBytes(jcharset); int len; if (b64) // "B" encoding len = BEncoderStream.encodedLength(bytes); else // "Q" len = QEncoderStream.encodedLength(bytes, encodingWord); int size; if ((len > avail) && ((size = string.length()) > 1)) { // If the length is greater than 'avail', split 'string' // into two and recurse. doEncode(string.substring(0, size/2), b64, jcharset, avail, prefix, first, encodingWord, buf); doEncode(string.substring(size/2, size), b64, jcharset, avail, prefix, false, encodingWord, buf); } else { // length <= than 'avail'. Encode the given string ByteArrayOutputStream os = new ByteArrayOutputStream(BUFFER_SIZE); OutputStream eos; // the encoder if (b64) // "B" encoding eos = new BEncoderStream(os); else // "Q" encoding eos = new QEncoderStream(os, encodingWord); try { // do the encoding eos.write(bytes); eos.close(); } catch (IOException ioex) { } byte[] encodedBytes = os.toByteArray(); // the encoded stuff // Now write out the encoded (all ASCII) bytes into our // StringBuffer if (!first) // not the first line of this sequence if (foldEncodedWords) buf.append("\r\n "); // start a continuation line else buf.append(" "); // line will be folded later buf.append(prefix); for (int i = 0; i < encodedBytes.length; i++) buf.append((char)encodedBytes[i]); buf.append("?="); // terminate the current sequence } } /** * The string is parsed using the rules in RFC 2047 for parsing * an "encoded-word". If the parse fails, a ParseException is * thrown. Otherwise, it is transfer-decoded, and then * charset-converted into Unicode. If the charset-conversion * fails, an UnsupportedEncodingException is thrown.
* * @param eword the possibly encoded value * @exception ParseException if the string is not an * encoded-word as per RFC 2047. * @exception UnsupportedEncodingException if the charset * conversion failed. */ public static String decodeWord(String eword) throws ParseException, UnsupportedEncodingException { if (!eword.startsWith("=?")) // not an encoded word throw new ParseException(); // get charset int start = 2; int pos; if ((pos = eword.indexOf('?', start)) == -1) throw new ParseException(); String charset = javaCharset(eword.substring(start, pos)); // get encoding start = pos+1; if ((pos = eword.indexOf('?', start)) == -1) throw new ParseException(); String encoding = eword.substring(start, pos); // get encoded-sequence start = pos+1; if ((pos = eword.indexOf("?=", start)) == -1) throw new ParseException(); String word = eword.substring(start, pos); try { // Extract the bytes from word ByteArrayInputStream bis = new ByteArrayInputStream(ASCIIUtility.getBytes(word)); // Get the appropriate decoder InputStream is; if (encoding.equalsIgnoreCase("B")) is = new BASE64DecoderStream(bis); else if (encoding.equalsIgnoreCase("Q")) is = new QDecoderStream(bis); else throw new UnsupportedEncodingException( "unknown encoding: " + encoding); // For b64 & q, size of decoded word <= size of word. So // the decoded bytes must fit into the 'bytes' array. This // is certainly more efficient than writing bytes into a // ByteArrayOutputStream and then pulling out the byte[] // from it. int count = bis.available(); byte[] bytes = new byte[count]; // count is set to the actual number of decoded bytes count = is.read(bytes, 0, count); // Finally, convert the decoded bytes into a String using // the specified charset String s = new String(bytes, 0, count, charset); if (pos + 2 < eword.length()) { // there's still more text in the string String rest = eword.substring(pos + 2); if (!decodeStrict) rest = decodeInnerWords(rest); s += rest; } return s; } catch (UnsupportedEncodingException uex) { // explicitly catch and rethrow this exception, otherwise // the below IOException catch will swallow this up! throw uex; } catch (IOException ioex) { // Shouldn't happen. throw new ParseException(); } catch (IllegalArgumentException iex) { /* An unknown charset of the form ISO-XXX-XXX, will cause * the JDK to throw an IllegalArgumentException ... Since the * JDK will attempt to create a classname using this string, * but valid classnames must not contain the character '-', * and this results in an IllegalArgumentException, rather than * the expected UnsupportedEncodingException. Yikes */ throw new UnsupportedEncodingException(); } } /** * Look for encoded words within a word. The MIME spec doesn't * allow this, but many broken mailers, especially Japanese mailers, * produce such incorrect encodings. */ private static String decodeInnerWords(String word) throws UnsupportedEncodingException { int start = 0, i; StringBuilder buf = new StringBuilder(); while ((i = word.indexOf("=?", start)) >= 0) { buf.append(word.substring(start, i)); int end = word.indexOf("?=", i); if (end < 0) break; String s = word.substring(i, end + 2); try { s = decodeWord(s); } catch (ParseException pex) { // ignore it, just use the original string } buf.append(s); start = end + 2; } if (start == 0) return word; if (start < word.length()) buf.append(word.substring(start)); return buf.toString(); } /** * A utility method to quote a word, if the word contains any * characters from the specified 'specials' list.
*
* The HeaderTokenizer
class defines two special
* sets of delimiters - MIME and RFC 822.
*
* This method is typically used during the generation of
* RFC 822 and MIME header fields.
*
* @param word word to be quoted
* @param specials the set of special characters
* @return the possibly quoted word
* @see javax.mail.internet.HeaderTokenizer#MIME
* @see javax.mail.internet.HeaderTokenizer#RFC822
*/
public static String quote(String word, String specials) {
int len = word.length();
/*
* Look for any "bad" characters, Escape and
* quote the entire string if necessary.
*/
boolean needQuoting = false;
for (int i = 0; i < len; i++) {
char c = word.charAt(i);
if (c == '"' || c == '\\' || c == '\r' || c == '\n') {
// need to escape them and then quote the whole string
StringBuilder sb = new StringBuilder(len + 3);
sb.append('"');
sb.append(word.substring(0, i));
int lastc = 0;
for (int j = i; j < len; j++) {
char cc = word.charAt(j);
if ((cc == '"') || (cc == '\\') ||
(cc == '\r') || (cc == '\n'))
if (cc == '\n' && lastc == '\r')
; // do nothing, CR was already escaped
else
sb.append('\\'); // Escape the character
sb.append(cc);
lastc = cc;
}
sb.append('"');
return sb.toString();
} else if (c < 040 || c >= 0177 || specials.indexOf(c) >= 0)
// These characters cause the string to be quoted
needQuoting = true;
}
if (needQuoting) {
StringBuilder sb = new StringBuilder(len + 2);
sb.append('"').append(word).append('"');
return sb.toString();
} else
return word;
}
/**
* Fold a string at linear whitespace so that each line is no longer
* than 76 characters, if possible. If there are more than 76
* non-whitespace characters consecutively, the string is folded at
* the first whitespace after that sequence. The parameter
* used
indicates how many characters have been used in
* the current line; it is usually the length of the header name.
* * Note that line breaks in the string aren't escaped; they probably * should be. * * @param used characters used in line so far * @param s the string to fold * @return the folded string */ /*public*/ static String fold(int used, String s) { if (!foldText) return s; int end; char c; // Strip trailing spaces for (end = s.length() - 1; end >= 0; end--) { c = s.charAt(end); if (c != ' ' && c != '\t') break; } if (end != s.length() - 1) s = s.substring(0, end + 1); // if the string fits now, just return it if (used + s.length() <= 76) return s; // have to actually fold the string StringBuilder sb = new StringBuilder(s.length() + 4); char lastc = 0; while (used + s.length() > 76) { int lastspace = -1; for (int i = 0; i < s.length(); i++) { if (lastspace != -1 && used + i > 76) break; c = s.charAt(i); if (c == ' ' || c == '\t') if (!(lastc == ' ' || lastc == '\t')) lastspace = i; lastc = c; } if (lastspace == -1) { // no space, use the whole thing sb.append(s); s = ""; used = 0; break; } sb.append(s.substring(0, lastspace)); sb.append("\r\n"); lastc = s.charAt(lastspace); sb.append(lastc); s = s.substring(lastspace + 1); used = 1; } sb.append(s); return sb.toString(); } /** * Unfold a folded header. Any line breaks that aren't escaped and * are followed by whitespace are removed. * * @param s the string to unfold * @return the unfolded string */ /*public*/ static String unfold(String s) { if (!foldText) return s; StringBuilder sb = null; int i; while ((i = indexOfAny(s, "\r\n")) >= 0) { int start = i; int l = s.length(); i++; // skip CR or NL if (i < l && s.charAt(i - 1) == '\r' && s.charAt(i) == '\n') i++; // skip LF if (start == 0 || s.charAt(start - 1) != '\\') { char c; // if next line starts with whitespace, skip all of it // XXX - always has to be true? if (i < l && ((c = s.charAt(i)) == ' ' || c == '\t')) { i++; // skip whitespace while (i < l && ((c = s.charAt(i)) == ' ' || c == '\t')) i++; if (sb == null) sb = new StringBuilder(s.length()); if (start != 0) { sb.append(s.substring(0, start)); sb.append(' '); } s = s.substring(i); continue; } // it's not a continuation line, just leave it in if (sb == null) sb = new StringBuilder(s.length()); sb.append(s.substring(0, i)); s = s.substring(i); } else { // there's a backslash at "start - 1" // strip it out, but leave in the line break if (sb == null) sb = new StringBuilder(s.length()); sb.append(s.substring(0, start - 1)); sb.append(s.substring(start, i)); s = s.substring(i); } } if (sb != null) { sb.append(s); return sb.toString(); } else return s; } /** * Return the first index of any of the characters in "any" in "s", * or -1 if none are found. * * This should be a method on String. */ private static int indexOfAny(String s, String any) { return indexOfAny(s, any, 0); } private static int indexOfAny(String s, String any, int start) { try { int len = s.length(); for (int i = start; i < len; i++) { if (any.indexOf(s.charAt(i)) >= 0) return i; } return -1; } catch (StringIndexOutOfBoundsException e) { return -1; } } /** * Convert a MIME charset name into a valid Java charset name.
* * @param charset the MIME charset name * @return the Java charset equivalent. If a suitable mapping is * not available, the passed in charset is itself returned. */ public static String javaCharset(String charset) { if (mime2java == null || charset == null) // no mapping table, or charset parameter is null return charset; String alias = mime2java.get(charset.toLowerCase()); return alias == null ? charset : alias; } /** * Convert a java charset into its MIME charset name.
*
* Note that a future version of JDK (post 1.2) might provide
* this functionality, in which case, we may deprecate this
* method then.
*
* @param charset the JDK charset
* @return the MIME/IANA equivalent. If a mapping
* is not possible, the passed in charset itself
* is returned.
* @since JavaMail 1.1
*/
public static String mimeCharset(String charset) {
if (java2mime == null || charset == null)
// no mapping table or charset param is null
return charset;
String alias = java2mime.get(charset.toLowerCase());
return alias == null ? charset : alias;
}
private static String defaultJavaCharset;
private static String defaultMIMECharset;
/**
* Get the default charset corresponding to the system's current
* default locale. If the System property mail.mime.charset
* is set, a system charset corresponding to this MIME charset will be
* returned.
*
* @return the default charset of the system's default locale,
* as a Java charset. (NOT a MIME charset)
* @since JavaMail 1.1
*/
public static String getDefaultJavaCharset() {
if (defaultJavaCharset == null) {
/*
* If mail.mime.charset is set, it controls the default
* Java charset as well.
*/
String mimecs = null;
mimecs = SAAJUtil.getSystemProperty("mail.mime.charset");
if (mimecs != null && mimecs.length() > 0) {
defaultJavaCharset = javaCharset(mimecs);
return defaultJavaCharset;
}
try {
defaultJavaCharset = System.getProperty("file.encoding",
"8859_1");
} catch (SecurityException sex) {
class NullInputStream extends InputStream {
public int read() {
return 0;
}
}
InputStreamReader reader =
new InputStreamReader(new NullInputStream());
defaultJavaCharset = reader.getEncoding();
if (defaultJavaCharset == null)
defaultJavaCharset = "8859_1";
}
}
return defaultJavaCharset;
}
/*
* Get the default MIME charset for this locale.
*/
static String getDefaultMIMECharset() {
if (defaultMIMECharset == null) {
defaultMIMECharset = SAAJUtil.getSystemProperty("mail.mime.charset");
}
if (defaultMIMECharset == null)
defaultMIMECharset = mimeCharset(getDefaultJavaCharset());
return defaultMIMECharset;
}
// Tables to map MIME charset names to Java names and vice versa.
// XXX - Should eventually use J2SE 1.4 java.nio.charset.Charset
private static Hashtablemax
bytes are checked. If max
is
* set to ALL
, then all the bytes available in this
* input stream are checked. If breakOnNonAscii
is true
* the check terminates when the first non-US-ASCII character is
* found and MOSTLY_NONASCII is returned. Else, the check continues
* till max
bytes or till the end of stream.
*
* @param is the input stream
* @param max maximum bytes to check for. The special value
* ALL indicates that all the bytes in this input
* stream must be checked.
* @param breakOnNonAscii if true
, then terminate the
* the check when the first non-US-ASCII character
* is found.
* @return ALL_ASCII if all characters in the string
* belong to the US-ASCII charset. MOSTLY_ASCII
* if more than half of the available characters
* are US-ASCII characters. Else MOSTLY_NONASCII.
*/
static int checkAscii(InputStream is, int max, boolean breakOnNonAscii) {
int ascii = 0, non_ascii = 0;
int len;
int block = 4096;
int linelen = 0;
boolean longLine = false, badEOL = false;
boolean checkEOL = encodeEolStrict && breakOnNonAscii;
byte buf[] = null;
if (max != 0) {
block = (max == ALL) ? 4096 : Math.min(max, 4096);
buf = new byte[block];
}
while (max != 0) {
try {
if ((len = is.read(buf, 0, block)) == -1)
break;
int lastb = 0;
for (int i = 0; i < len; i++) {
// The '&' operator automatically causes b[i] to
// be promoted to an int, and we mask out the higher
// bytes in the int so that the resulting value is
// not a negative integer.
int b = buf[i] & 0xff;
if (checkEOL &&
((lastb == '\r' && b != '\n') ||
(lastb != '\r' && b == '\n')))
badEOL = true;
if (b == '\r' || b == '\n')
linelen = 0;
else {
linelen++;
if (linelen > 998) // 1000 - CRLF
longLine = true;
}
if (nonascii(b)) { // non-ascii
if (breakOnNonAscii) // we are done
return MOSTLY_NONASCII;
else
non_ascii++;
} else
ascii++;
lastb = b;
}
} catch (IOException ioex) {
break;
}
if (max != ALL)
max -= len;
}
if (max == 0 && breakOnNonAscii)
// We have been told to break on the first non-ascii character.
// We haven't got any non-ascii character yet, but then we
// have not checked all of the available bytes either. So we
// cannot say for sure that this input stream is ALL_ASCII,
// and hence we must play safe and return MOSTLY_NONASCII
return MOSTLY_NONASCII;
if (non_ascii == 0) { // no non-us-ascii characters so far
// If we're looking at non-text data, and we saw CR without LF
// or vice versa, consider this mostly non-ASCII so that it
// will be base64 encoded (since the quoted-printable encoder
// doesn't encode this case properly).
if (badEOL)
return MOSTLY_NONASCII;
// if we've seen a long line, we degrade to mostly ascii
else if (longLine)
return MOSTLY_ASCII;
else
return ALL_ASCII;
}
if (ascii > non_ascii) // mostly ascii
return MOSTLY_ASCII;
return MOSTLY_NONASCII;
}
static final boolean nonascii(int b) {
return b >= 0177 || (b < 040 && b != '\r' && b != '\n' && b != '\t');
}
}
/**
* An OutputStream that determines whether the data written to
* it is all ASCII, mostly ASCII, or mostly non-ASCII.
*/
class AsciiOutputStream extends OutputStream {
private boolean breakOnNonAscii;
private int ascii = 0, non_ascii = 0;
private int linelen = 0;
private boolean longLine = false;
private boolean badEOL = false;
private boolean checkEOL = false;
private int lastb = 0;
private int ret = 0;
public AsciiOutputStream(boolean breakOnNonAscii, boolean encodeEolStrict) {
this.breakOnNonAscii = breakOnNonAscii;
checkEOL = encodeEolStrict && breakOnNonAscii;
}
public void write(int b) throws IOException {
check(b);
}
public void write(byte b[]) throws IOException {
write(b, 0, b.length);
}
public void write(byte b[], int off, int len) throws IOException {
len += off;
for (int i = off; i < len ; i++)
check(b[i]);
}
private final void check(int b) throws IOException {
b &= 0xff;
if (checkEOL &&
((lastb == '\r' && b != '\n') || (lastb != '\r' && b == '\n')))
badEOL = true;
if (b == '\r' || b == '\n')
linelen = 0;
else {
linelen++;
if (linelen > 998) // 1000 - CRLF
longLine = true;
}
if (MimeUtility.nonascii(b)) { // non-ascii
non_ascii++;
if (breakOnNonAscii) { // we are done
ret = MimeUtility.MOSTLY_NONASCII;
throw new EOFException();
}
} else
ascii++;
lastb = b;
}
/**
* Return ASCII-ness of data stream.
*/
public int getAscii() {
if (ret != 0)
return ret;
// If we're looking at non-text data, and we saw CR without LF
// or vice versa, consider this mostly non-ASCII so that it
// will be base64 encoded (since the quoted-printable encoder
// doesn't encode this case properly).
if (badEOL)
return MimeUtility.MOSTLY_NONASCII;
else if (non_ascii == 0) { // no non-us-ascii characters so far
// if we've seen a long line, we degrade to mostly ascii
if (longLine)
return MimeUtility.MOSTLY_ASCII;
else
return MimeUtility.ALL_ASCII;
}
if (ascii > non_ascii) // mostly ascii
return MimeUtility.MOSTLY_ASCII;
return MimeUtility.MOSTLY_NONASCII;
}
}