< prev index next >

src/java.xml.ws/share/classes/com/sun/xml/internal/messaging/saaj/packaging/mime/internet/MimeUtility.java

Print this page


   1 /*
   2  * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any


 493          * RFC 822 defines "linear-white-space" as SPACE | HT | CR | NL.
 494          */
 495         String lwsp = " \t\n\r";
 496         StringTokenizer st;
 497 
 498         /*
 499          * First, lets do a quick run thru the string and check
 500          * whether the sequence "=?"  exists at all. If none exists,
 501          * we know there are no encoded-words in here and we can just
 502          * return the string as-is, without suffering thru the later
 503          * decoding logic.
 504          * This handles the most common case of unencoded headers
 505          * efficiently.
 506          */
 507         if (etext.indexOf("=?") == -1)
 508             return etext;
 509 
 510         // Encoded words found. Start decoding ...
 511 
 512         st = new StringTokenizer(etext, lwsp, true);
 513         StringBuffer sb = new StringBuffer();  // decode buffer
 514         StringBuffer wsb = new StringBuffer(); // white space buffer
 515         boolean prevWasEncoded = false;
 516 
 517         while (st.hasMoreTokens()) {
 518             char c;
 519             String s = st.nextToken();
 520             // If whitespace, append it to the whitespace buffer
 521             if (((c = s.charAt(0)) == ' ') || (c == '\t') ||
 522                 (c == '\r') || (c == '\n'))
 523                 wsb.append(c);
 524             else {
 525                 // Check if token is an 'encoded-word' ..
 526                 String word;
 527                 try {
 528                     word = decodeWord(s);
 529                     // Yes, this IS an 'encoded-word'.
 530                     if (!prevWasEncoded && wsb.length() > 0) {
 531                         // if the previous word was also encoded, we
 532                         // should ignore the collected whitespace. Else
 533                         // we include the whitespace as well.
 534                         sb.append(wsb);


 631         } else // MIME charset -> java charset
 632             jcharset = javaCharset(charset);
 633 
 634         // If no transfer-encoding is specified, figure one out.
 635         if (encoding == null) {
 636             if (ascii != MOSTLY_NONASCII)
 637                 encoding = "Q";
 638             else
 639                 encoding = "B";
 640         }
 641 
 642         boolean b64;
 643         if (encoding.equalsIgnoreCase("B"))
 644             b64 = true;
 645         else if (encoding.equalsIgnoreCase("Q"))
 646             b64 = false;
 647         else
 648             throw new UnsupportedEncodingException(
 649                         "Unknown transfer encoding: " + encoding);
 650 
 651         StringBuffer outb = new StringBuffer(); // the output buffer
 652         doEncode(string, b64, jcharset,
 653                  // As per RFC 2047, size of an encoded string should not
 654                  // exceed 75 bytes.
 655                  // 7 = size of "=?", '?', 'B'/'Q', '?', "?="
 656                  75 - 7 - charset.length(), // the available space
 657                  "=?" + charset + "?" + encoding + "?", // prefix
 658                  true, encodingWord, outb);
 659 
 660         return outb.toString();
 661     }
 662 
 663     private static void doEncode(String string, boolean b64,
 664                 String jcharset, int avail, String prefix,
 665                 boolean first, boolean encodingWord, StringBuffer buf)
 666                         throws UnsupportedEncodingException {
 667 
 668         // First find out what the length of the encoded version of
 669         // 'string' would be.
 670         byte[] bytes = string.getBytes(jcharset);
 671         int len;
 672         if (b64) // "B" encoding
 673             len = BEncoderStream.encodedLength(bytes);
 674         else // "Q"
 675             len = QEncoderStream.encodedLength(bytes, encodingWord);
 676 
 677         int size;
 678         if ((len > avail) && ((size = string.length()) > 1)) {
 679             // If the length is greater than 'avail', split 'string'
 680             // into two and recurse.
 681             doEncode(string.substring(0, size/2), b64, jcharset,
 682                      avail, prefix, first, encodingWord, buf);
 683             doEncode(string.substring(size/2, size), b64, jcharset,
 684                      avail, prefix, false, encodingWord, buf);
 685         } else {


 795         } catch (IllegalArgumentException iex) {
 796             /* An unknown charset of the form ISO-XXX-XXX, will cause
 797              * the JDK to throw an IllegalArgumentException ... Since the
 798              * JDK will attempt to create a classname using this string,
 799              * but valid classnames must not contain the character '-',
 800              * and this results in an IllegalArgumentException, rather than
 801              * the expected UnsupportedEncodingException. Yikes
 802              */
 803             throw new UnsupportedEncodingException();
 804         }
 805     }
 806 
 807     /**
 808      * Look for encoded words within a word.  The MIME spec doesn't
 809      * allow this, but many broken mailers, especially Japanese mailers,
 810      * produce such incorrect encodings.
 811      */
 812     private static String decodeInnerWords(String word)
 813                                 throws UnsupportedEncodingException {
 814         int start = 0, i;
 815         StringBuffer buf = new StringBuffer();
 816         while ((i = word.indexOf("=?", start)) >= 0) {
 817             buf.append(word.substring(start, i));
 818             int end = word.indexOf("?=", i);
 819             if (end < 0)
 820                 break;
 821             String s = word.substring(i, end + 2);
 822             try {
 823                 s = decodeWord(s);
 824             } catch (ParseException pex) {
 825                 // ignore it, just use the original string
 826             }
 827             buf.append(s);
 828             start = end + 2;
 829         }
 830         if (start == 0)
 831             return word;
 832         if (start < word.length())
 833             buf.append(word.substring(start));
 834         return buf.toString();
 835     }


 845      * RFC 822 and MIME header fields.
 846      *
 847      * @param   word    word to be quoted
 848      * @param   specials the set of special characters
 849      * @return          the possibly quoted word
 850      * @see     javax.mail.internet.HeaderTokenizer#MIME
 851      * @see     javax.mail.internet.HeaderTokenizer#RFC822
 852      */
 853     public static String quote(String word, String specials) {
 854         int len = word.length();
 855 
 856         /*
 857          * Look for any "bad" characters, Escape and
 858          *  quote the entire string if necessary.
 859          */
 860         boolean needQuoting = false;
 861         for (int i = 0; i < len; i++) {
 862             char c = word.charAt(i);
 863             if (c == '"' || c == '\\' || c == '\r' || c == '\n') {
 864                 // need to escape them and then quote the whole string
 865                 StringBuffer sb = new StringBuffer(len + 3);
 866                 sb.append('"');
 867                 sb.append(word.substring(0, i));
 868                 int lastc = 0;
 869                 for (int j = i; j < len; j++) {
 870                     char cc = word.charAt(j);
 871                     if ((cc == '"') || (cc == '\\') ||
 872                         (cc == '\r') || (cc == '\n'))
 873                         if (cc == '\n' && lastc == '\r')
 874                             ;   // do nothing, CR was already escaped
 875                         else
 876                             sb.append('\\');    // Escape the character
 877                     sb.append(cc);
 878                     lastc = cc;
 879                 }
 880                 sb.append('"');
 881                 return sb.toString();
 882             } else if (c < 040 || c >= 0177 || specials.indexOf(c) >= 0)
 883                 // These characters cause the string to be quoted
 884                 needQuoting = true;
 885         }
 886 
 887         if (needQuoting) {
 888             StringBuffer sb = new StringBuffer(len + 2);
 889             sb.append('"').append(word).append('"');
 890             return sb.toString();
 891         } else
 892             return word;
 893     }
 894 
 895     /**
 896      * Fold a string at linear whitespace so that each line is no longer
 897      * than 76 characters, if possible.  If there are more than 76
 898      * non-whitespace characters consecutively, the string is folded at
 899      * the first whitespace after that sequence.  The parameter
 900      * <code>used</code> indicates how many characters have been used in
 901      * the current line; it is usually the length of the header name. <p>
 902      *
 903      * Note that line breaks in the string aren't escaped; they probably
 904      * should be.
 905      *
 906      * @param   used    characters used in line so far
 907      * @param   s       the string to fold
 908      * @return          the folded string


 910     /*public*/ static String fold(int used, String s) {
 911         if (!foldText)
 912             return s;
 913 
 914         int end;
 915         char c;
 916         // Strip trailing spaces
 917         for (end = s.length() - 1; end >= 0; end--) {
 918             c = s.charAt(end);
 919             if (c != ' ' && c != '\t')
 920                 break;
 921         }
 922         if (end != s.length() - 1)
 923             s = s.substring(0, end + 1);
 924 
 925         // if the string fits now, just return it
 926         if (used + s.length() <= 76)
 927             return s;
 928 
 929         // have to actually fold the string
 930         StringBuffer sb = new StringBuffer(s.length() + 4);
 931         char lastc = 0;
 932         while (used + s.length() > 76) {
 933             int lastspace = -1;
 934             for (int i = 0; i < s.length(); i++) {
 935                 if (lastspace != -1 && used + i > 76)
 936                     break;
 937                 c = s.charAt(i);
 938                 if (c == ' ' || c == '\t')
 939                     if (!(lastc == ' ' || lastc == '\t'))
 940                         lastspace = i;
 941                 lastc = c;
 942             }
 943             if (lastspace == -1) {
 944                 // no space, use the whole thing
 945                 sb.append(s);
 946                 s = "";
 947                 used = 0;
 948                 break;
 949             }
 950             sb.append(s.substring(0, lastspace));


 952             lastc = s.charAt(lastspace);
 953             sb.append(lastc);
 954             s = s.substring(lastspace + 1);
 955             used = 1;
 956         }
 957         sb.append(s);
 958         return sb.toString();
 959     }
 960 
 961     /**
 962      * Unfold a folded header.  Any line breaks that aren't escaped and
 963      * are followed by whitespace are removed.
 964      *
 965      * @param   s       the string to unfold
 966      * @return          the unfolded string
 967      */
 968     /*public*/ static String unfold(String s) {
 969         if (!foldText)
 970             return s;
 971 
 972         StringBuffer sb = null;
 973         int i;
 974         while ((i = indexOfAny(s, "\r\n")) >= 0) {
 975             int start = i;
 976             int l = s.length();
 977             i++;                // skip CR or NL
 978             if (i < l && s.charAt(i - 1) == '\r' && s.charAt(i) == '\n')
 979                 i++;    // skip LF
 980             if (start == 0 || s.charAt(start - 1) != '\\') {
 981                 char c;
 982                 // if next line starts with whitespace, skip all of it
 983                 // XXX - always has to be true?
 984                 if (i < l && ((c = s.charAt(i)) == ' ' || c == '\t')) {
 985                     i++;        // skip whitespace
 986                     while (i < l && ((c = s.charAt(i)) == ' ' || c == '\t'))
 987                         i++;
 988                     if (sb == null)
 989                         sb = new StringBuffer(s.length());
 990                     if (start != 0) {
 991                         sb.append(s.substring(0, start));
 992                         sb.append(' ');
 993                     }
 994                     s = s.substring(i);
 995                     continue;
 996                 }
 997                 // it's not a continuation line, just leave it in
 998                 if (sb == null)
 999                     sb = new StringBuffer(s.length());
1000                 sb.append(s.substring(0, i));
1001                 s = s.substring(i);
1002             } else {
1003                 // there's a backslash at "start - 1"
1004                 // strip it out, but leave in the line break
1005                 if (sb == null)
1006                     sb = new StringBuffer(s.length());
1007                 sb.append(s.substring(0, start - 1));
1008                 sb.append(s.substring(start, i));
1009                 s = s.substring(i);
1010             }
1011         }
1012         if (sb != null) {
1013             sb.append(s);
1014             return sb.toString();
1015         } else
1016             return s;
1017     }
1018 
1019     /**
1020      * Return the first index of any of the characters in "any" in "s",
1021      * or -1 if none are found.
1022      *
1023      * This should be a method on String.
1024      */
1025     private static int indexOfAny(String s, String any) {
1026         return indexOfAny(s, any, 0);


1034                     return i;
1035             }
1036             return -1;
1037         } catch (StringIndexOutOfBoundsException e) {
1038             return -1;
1039         }
1040     }
1041 
1042     /**
1043      * Convert a MIME charset name into a valid Java charset name. <p>
1044      *
1045      * @param charset   the MIME charset name
1046      * @return  the Java charset equivalent. If a suitable mapping is
1047      *          not available, the passed in charset is itself returned.
1048      */
1049     public static String javaCharset(String charset) {
1050         if (mime2java == null || charset == null)
1051             // no mapping table, or charset parameter is null
1052             return charset;
1053 
1054         String alias = (String)mime2java.get(charset.toLowerCase());
1055         return alias == null ? charset : alias;
1056     }
1057 
1058     /**
1059      * Convert a java charset into its MIME charset name. <p>
1060      *
1061      * Note that a future version of JDK (post 1.2) might provide
1062      * this functionality, in which case, we may deprecate this
1063      * method then.
1064      *
1065      * @param   charset    the JDK charset
1066      * @return          the MIME/IANA equivalent. If a mapping
1067      *                  is not possible, the passed in charset itself
1068      *                  is returned.
1069      * @since           JavaMail 1.1
1070      */
1071     public static String mimeCharset(String charset) {
1072         if (java2mime == null || charset == null)
1073             // no mapping table or charset param is null
1074             return charset;
1075 
1076         String alias = (String)java2mime.get(charset.toLowerCase());
1077         return alias == null ? charset : alias;
1078     }
1079 
1080     private static String defaultJavaCharset;
1081     private static String defaultMIMECharset;
1082 
1083     /**
1084      * Get the default charset corresponding to the system's current
1085      * default locale.  If the System property <code>mail.mime.charset</code>
1086      * is set, a system charset corresponding to this MIME charset will be
1087      * returned. <p>
1088      *
1089      * @return  the default charset of the system's default locale,
1090      *          as a Java charset. (NOT a MIME charset)
1091      * @since   JavaMail 1.1
1092      */
1093     public static String getDefaultJavaCharset() {
1094         if (defaultJavaCharset == null) {
1095             /*
1096              * If mail.mime.charset is set, it controls the default


1123             }
1124         }
1125 
1126         return defaultJavaCharset;
1127     }
1128 
1129     /*
1130      * Get the default MIME charset for this locale.
1131      */
1132     static String getDefaultMIMECharset() {
1133         if (defaultMIMECharset == null) {
1134                 defaultMIMECharset = SAAJUtil.getSystemProperty("mail.mime.charset");
1135         }
1136         if (defaultMIMECharset == null)
1137             defaultMIMECharset = mimeCharset(getDefaultJavaCharset());
1138         return defaultMIMECharset;
1139     }
1140 
1141     // Tables to map MIME charset names to Java names and vice versa.
1142     // XXX - Should eventually use J2SE 1.4 java.nio.charset.Charset
1143     private static Hashtable mime2java;
1144     private static Hashtable java2mime;
1145 
1146     static {
1147         java2mime = new Hashtable(40);
1148         mime2java = new Hashtable(10);
1149 
1150         try {
1151             // Use this class's classloader to load the mapping file
1152             // XXX - we should use SecuritySupport, but it's in another package
1153             InputStream is =
1154                     com.sun.xml.internal.messaging.saaj.packaging.mime.internet.MimeUtility.class.getResourceAsStream(
1155                     "/META-INF/javamail.charset.map");
1156 
1157             if (is != null) {
1158                 is = new LineInputStream(is);
1159 
1160                 // Load the JDK-to-MIME charset mapping table
1161                 loadMappings((LineInputStream)is, java2mime);
1162 
1163                 // Load the MIME-to-JDK charset mapping table
1164                 loadMappings((LineInputStream)is, mime2java);
1165             }
1166         } catch (Exception ex) { }
1167 
1168         // If we didn't load the tables, e.g., because we didn't have


1212             java2mime.put("EUC_JP", "euc-jp");
1213             java2mime.put("KOI8_R", "koi8-r");
1214             java2mime.put("EUC_CN", "euc-cn");
1215             java2mime.put("EUC_TW", "euc-tw");
1216             java2mime.put("EUC_KR", "euc-kr");
1217         }
1218         if (mime2java.isEmpty()) {
1219             mime2java.put("iso-2022-cn", "ISO2022CN");
1220             mime2java.put("iso-2022-kr", "ISO2022KR");
1221             mime2java.put("utf-8", "UTF8");
1222             mime2java.put("utf8", "UTF8");
1223             mime2java.put("ja_jp.iso2022-7", "ISO2022JP");
1224             mime2java.put("ja_jp.eucjp", "EUCJIS");
1225             mime2java.put("euc-kr", "KSC5601");
1226             mime2java.put("euckr", "KSC5601");
1227             mime2java.put("us-ascii", "ISO-8859-1");
1228             mime2java.put("x-us-ascii", "ISO-8859-1");
1229         }
1230     }
1231 
1232     private static void loadMappings(LineInputStream is, Hashtable table) {
1233         String currLine;
1234 
1235         while (true) {
1236             try {
1237                 currLine = is.readLine();
1238             } catch (IOException ioex) {
1239                 break; // error in reading, stop
1240             }
1241 
1242             if (currLine == null) // end of file, stop
1243                 break;
1244             if (currLine.startsWith("--") && currLine.endsWith("--"))
1245                 // end of this table
1246                 break;
1247 
1248             // ignore empty lines and comments
1249             if (currLine.trim().length() == 0 || currLine.startsWith("#"))
1250                 continue;
1251 
1252             // A valid entry is of the form <key><separator><value>


   1 /*
   2  * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any


 493          * RFC 822 defines "linear-white-space" as SPACE | HT | CR | NL.
 494          */
 495         String lwsp = " \t\n\r";
 496         StringTokenizer st;
 497 
 498         /*
 499          * First, lets do a quick run thru the string and check
 500          * whether the sequence "=?"  exists at all. If none exists,
 501          * we know there are no encoded-words in here and we can just
 502          * return the string as-is, without suffering thru the later
 503          * decoding logic.
 504          * This handles the most common case of unencoded headers
 505          * efficiently.
 506          */
 507         if (etext.indexOf("=?") == -1)
 508             return etext;
 509 
 510         // Encoded words found. Start decoding ...
 511 
 512         st = new StringTokenizer(etext, lwsp, true);
 513         StringBuilder sb = new StringBuilder();  // decode buffer
 514         StringBuilder wsb = new StringBuilder(); // white space buffer
 515         boolean prevWasEncoded = false;
 516 
 517         while (st.hasMoreTokens()) {
 518             char c;
 519             String s = st.nextToken();
 520             // If whitespace, append it to the whitespace buffer
 521             if (((c = s.charAt(0)) == ' ') || (c == '\t') ||
 522                 (c == '\r') || (c == '\n'))
 523                 wsb.append(c);
 524             else {
 525                 // Check if token is an 'encoded-word' ..
 526                 String word;
 527                 try {
 528                     word = decodeWord(s);
 529                     // Yes, this IS an 'encoded-word'.
 530                     if (!prevWasEncoded && wsb.length() > 0) {
 531                         // if the previous word was also encoded, we
 532                         // should ignore the collected whitespace. Else
 533                         // we include the whitespace as well.
 534                         sb.append(wsb);


 631         } else // MIME charset -> java charset
 632             jcharset = javaCharset(charset);
 633 
 634         // If no transfer-encoding is specified, figure one out.
 635         if (encoding == null) {
 636             if (ascii != MOSTLY_NONASCII)
 637                 encoding = "Q";
 638             else
 639                 encoding = "B";
 640         }
 641 
 642         boolean b64;
 643         if (encoding.equalsIgnoreCase("B"))
 644             b64 = true;
 645         else if (encoding.equalsIgnoreCase("Q"))
 646             b64 = false;
 647         else
 648             throw new UnsupportedEncodingException(
 649                         "Unknown transfer encoding: " + encoding);
 650 
 651         StringBuilder outb = new StringBuilder(); // the output buffer
 652         doEncode(string, b64, jcharset,
 653                  // As per RFC 2047, size of an encoded string should not
 654                  // exceed 75 bytes.
 655                  // 7 = size of "=?", '?', 'B'/'Q', '?', "?="
 656                  75 - 7 - charset.length(), // the available space
 657                  "=?" + charset + "?" + encoding + "?", // prefix
 658                  true, encodingWord, outb);
 659 
 660         return outb.toString();
 661     }
 662 
 663     private static void doEncode(String string, boolean b64,
 664                 String jcharset, int avail, String prefix,
 665                 boolean first, boolean encodingWord, StringBuilder buf)
 666                         throws UnsupportedEncodingException {
 667 
 668         // First find out what the length of the encoded version of
 669         // 'string' would be.
 670         byte[] bytes = string.getBytes(jcharset);
 671         int len;
 672         if (b64) // "B" encoding
 673             len = BEncoderStream.encodedLength(bytes);
 674         else // "Q"
 675             len = QEncoderStream.encodedLength(bytes, encodingWord);
 676 
 677         int size;
 678         if ((len > avail) && ((size = string.length()) > 1)) {
 679             // If the length is greater than 'avail', split 'string'
 680             // into two and recurse.
 681             doEncode(string.substring(0, size/2), b64, jcharset,
 682                      avail, prefix, first, encodingWord, buf);
 683             doEncode(string.substring(size/2, size), b64, jcharset,
 684                      avail, prefix, false, encodingWord, buf);
 685         } else {


 795         } catch (IllegalArgumentException iex) {
 796             /* An unknown charset of the form ISO-XXX-XXX, will cause
 797              * the JDK to throw an IllegalArgumentException ... Since the
 798              * JDK will attempt to create a classname using this string,
 799              * but valid classnames must not contain the character '-',
 800              * and this results in an IllegalArgumentException, rather than
 801              * the expected UnsupportedEncodingException. Yikes
 802              */
 803             throw new UnsupportedEncodingException();
 804         }
 805     }
 806 
 807     /**
 808      * Look for encoded words within a word.  The MIME spec doesn't
 809      * allow this, but many broken mailers, especially Japanese mailers,
 810      * produce such incorrect encodings.
 811      */
 812     private static String decodeInnerWords(String word)
 813                                 throws UnsupportedEncodingException {
 814         int start = 0, i;
 815         StringBuilder buf = new StringBuilder();
 816         while ((i = word.indexOf("=?", start)) >= 0) {
 817             buf.append(word.substring(start, i));
 818             int end = word.indexOf("?=", i);
 819             if (end < 0)
 820                 break;
 821             String s = word.substring(i, end + 2);
 822             try {
 823                 s = decodeWord(s);
 824             } catch (ParseException pex) {
 825                 // ignore it, just use the original string
 826             }
 827             buf.append(s);
 828             start = end + 2;
 829         }
 830         if (start == 0)
 831             return word;
 832         if (start < word.length())
 833             buf.append(word.substring(start));
 834         return buf.toString();
 835     }


 845      * RFC 822 and MIME header fields.
 846      *
 847      * @param   word    word to be quoted
 848      * @param   specials the set of special characters
 849      * @return          the possibly quoted word
 850      * @see     javax.mail.internet.HeaderTokenizer#MIME
 851      * @see     javax.mail.internet.HeaderTokenizer#RFC822
 852      */
 853     public static String quote(String word, String specials) {
 854         int len = word.length();
 855 
 856         /*
 857          * Look for any "bad" characters, Escape and
 858          *  quote the entire string if necessary.
 859          */
 860         boolean needQuoting = false;
 861         for (int i = 0; i < len; i++) {
 862             char c = word.charAt(i);
 863             if (c == '"' || c == '\\' || c == '\r' || c == '\n') {
 864                 // need to escape them and then quote the whole string
 865                 StringBuilder sb = new StringBuilder(len + 3);
 866                 sb.append('"');
 867                 sb.append(word.substring(0, i));
 868                 int lastc = 0;
 869                 for (int j = i; j < len; j++) {
 870                     char cc = word.charAt(j);
 871                     if ((cc == '"') || (cc == '\\') ||
 872                         (cc == '\r') || (cc == '\n'))
 873                         if (cc == '\n' && lastc == '\r')
 874                             ;   // do nothing, CR was already escaped
 875                         else
 876                             sb.append('\\');    // Escape the character
 877                     sb.append(cc);
 878                     lastc = cc;
 879                 }
 880                 sb.append('"');
 881                 return sb.toString();
 882             } else if (c < 040 || c >= 0177 || specials.indexOf(c) >= 0)
 883                 // These characters cause the string to be quoted
 884                 needQuoting = true;
 885         }
 886 
 887         if (needQuoting) {
 888             StringBuilder sb = new StringBuilder(len + 2);
 889             sb.append('"').append(word).append('"');
 890             return sb.toString();
 891         } else
 892             return word;
 893     }
 894 
 895     /**
 896      * Fold a string at linear whitespace so that each line is no longer
 897      * than 76 characters, if possible.  If there are more than 76
 898      * non-whitespace characters consecutively, the string is folded at
 899      * the first whitespace after that sequence.  The parameter
 900      * <code>used</code> indicates how many characters have been used in
 901      * the current line; it is usually the length of the header name. <p>
 902      *
 903      * Note that line breaks in the string aren't escaped; they probably
 904      * should be.
 905      *
 906      * @param   used    characters used in line so far
 907      * @param   s       the string to fold
 908      * @return          the folded string


 910     /*public*/ static String fold(int used, String s) {
 911         if (!foldText)
 912             return s;
 913 
 914         int end;
 915         char c;
 916         // Strip trailing spaces
 917         for (end = s.length() - 1; end >= 0; end--) {
 918             c = s.charAt(end);
 919             if (c != ' ' && c != '\t')
 920                 break;
 921         }
 922         if (end != s.length() - 1)
 923             s = s.substring(0, end + 1);
 924 
 925         // if the string fits now, just return it
 926         if (used + s.length() <= 76)
 927             return s;
 928 
 929         // have to actually fold the string
 930         StringBuilder sb = new StringBuilder(s.length() + 4);
 931         char lastc = 0;
 932         while (used + s.length() > 76) {
 933             int lastspace = -1;
 934             for (int i = 0; i < s.length(); i++) {
 935                 if (lastspace != -1 && used + i > 76)
 936                     break;
 937                 c = s.charAt(i);
 938                 if (c == ' ' || c == '\t')
 939                     if (!(lastc == ' ' || lastc == '\t'))
 940                         lastspace = i;
 941                 lastc = c;
 942             }
 943             if (lastspace == -1) {
 944                 // no space, use the whole thing
 945                 sb.append(s);
 946                 s = "";
 947                 used = 0;
 948                 break;
 949             }
 950             sb.append(s.substring(0, lastspace));


 952             lastc = s.charAt(lastspace);
 953             sb.append(lastc);
 954             s = s.substring(lastspace + 1);
 955             used = 1;
 956         }
 957         sb.append(s);
 958         return sb.toString();
 959     }
 960 
 961     /**
 962      * Unfold a folded header.  Any line breaks that aren't escaped and
 963      * are followed by whitespace are removed.
 964      *
 965      * @param   s       the string to unfold
 966      * @return          the unfolded string
 967      */
 968     /*public*/ static String unfold(String s) {
 969         if (!foldText)
 970             return s;
 971 
 972         StringBuilder sb = null;
 973         int i;
 974         while ((i = indexOfAny(s, "\r\n")) >= 0) {
 975             int start = i;
 976             int l = s.length();
 977             i++;                // skip CR or NL
 978             if (i < l && s.charAt(i - 1) == '\r' && s.charAt(i) == '\n')
 979                 i++;    // skip LF
 980             if (start == 0 || s.charAt(start - 1) != '\\') {
 981                 char c;
 982                 // if next line starts with whitespace, skip all of it
 983                 // XXX - always has to be true?
 984                 if (i < l && ((c = s.charAt(i)) == ' ' || c == '\t')) {
 985                     i++;        // skip whitespace
 986                     while (i < l && ((c = s.charAt(i)) == ' ' || c == '\t'))
 987                         i++;
 988                     if (sb == null)
 989                         sb = new StringBuilder(s.length());
 990                     if (start != 0) {
 991                         sb.append(s.substring(0, start));
 992                         sb.append(' ');
 993                     }
 994                     s = s.substring(i);
 995                     continue;
 996                 }
 997                 // it's not a continuation line, just leave it in
 998                 if (sb == null)
 999                     sb = new StringBuilder(s.length());
1000                 sb.append(s.substring(0, i));
1001                 s = s.substring(i);
1002             } else {
1003                 // there's a backslash at "start - 1"
1004                 // strip it out, but leave in the line break
1005                 if (sb == null)
1006                     sb = new StringBuilder(s.length());
1007                 sb.append(s.substring(0, start - 1));
1008                 sb.append(s.substring(start, i));
1009                 s = s.substring(i);
1010             }
1011         }
1012         if (sb != null) {
1013             sb.append(s);
1014             return sb.toString();
1015         } else
1016             return s;
1017     }
1018 
1019     /**
1020      * Return the first index of any of the characters in "any" in "s",
1021      * or -1 if none are found.
1022      *
1023      * This should be a method on String.
1024      */
1025     private static int indexOfAny(String s, String any) {
1026         return indexOfAny(s, any, 0);


1034                     return i;
1035             }
1036             return -1;
1037         } catch (StringIndexOutOfBoundsException e) {
1038             return -1;
1039         }
1040     }
1041 
1042     /**
1043      * Convert a MIME charset name into a valid Java charset name. <p>
1044      *
1045      * @param charset   the MIME charset name
1046      * @return  the Java charset equivalent. If a suitable mapping is
1047      *          not available, the passed in charset is itself returned.
1048      */
1049     public static String javaCharset(String charset) {
1050         if (mime2java == null || charset == null)
1051             // no mapping table, or charset parameter is null
1052             return charset;
1053 
1054         String alias = mime2java.get(charset.toLowerCase());
1055         return alias == null ? charset : alias;
1056     }
1057 
1058     /**
1059      * Convert a java charset into its MIME charset name. <p>
1060      *
1061      * Note that a future version of JDK (post 1.2) might provide
1062      * this functionality, in which case, we may deprecate this
1063      * method then.
1064      *
1065      * @param   charset    the JDK charset
1066      * @return          the MIME/IANA equivalent. If a mapping
1067      *                  is not possible, the passed in charset itself
1068      *                  is returned.
1069      * @since           JavaMail 1.1
1070      */
1071     public static String mimeCharset(String charset) {
1072         if (java2mime == null || charset == null)
1073             // no mapping table or charset param is null
1074             return charset;
1075 
1076         String alias = java2mime.get(charset.toLowerCase());
1077         return alias == null ? charset : alias;
1078     }
1079 
1080     private static String defaultJavaCharset;
1081     private static String defaultMIMECharset;
1082 
1083     /**
1084      * Get the default charset corresponding to the system's current
1085      * default locale.  If the System property <code>mail.mime.charset</code>
1086      * is set, a system charset corresponding to this MIME charset will be
1087      * returned. <p>
1088      *
1089      * @return  the default charset of the system's default locale,
1090      *          as a Java charset. (NOT a MIME charset)
1091      * @since   JavaMail 1.1
1092      */
1093     public static String getDefaultJavaCharset() {
1094         if (defaultJavaCharset == null) {
1095             /*
1096              * If mail.mime.charset is set, it controls the default


1123             }
1124         }
1125 
1126         return defaultJavaCharset;
1127     }
1128 
1129     /*
1130      * Get the default MIME charset for this locale.
1131      */
1132     static String getDefaultMIMECharset() {
1133         if (defaultMIMECharset == null) {
1134                 defaultMIMECharset = SAAJUtil.getSystemProperty("mail.mime.charset");
1135         }
1136         if (defaultMIMECharset == null)
1137             defaultMIMECharset = mimeCharset(getDefaultJavaCharset());
1138         return defaultMIMECharset;
1139     }
1140 
1141     // Tables to map MIME charset names to Java names and vice versa.
1142     // XXX - Should eventually use J2SE 1.4 java.nio.charset.Charset
1143     private static Hashtable<String, String> mime2java;
1144     private static Hashtable<String, String> java2mime;
1145 
1146     static {
1147         java2mime = new Hashtable<String, String>(40);
1148         mime2java = new Hashtable<String, String>(10);
1149 
1150         try {
1151             // Use this class's classloader to load the mapping file
1152             // XXX - we should use SecuritySupport, but it's in another package
1153             InputStream is =
1154                     com.sun.xml.internal.messaging.saaj.packaging.mime.internet.MimeUtility.class.getResourceAsStream(
1155                     "/META-INF/javamail.charset.map");
1156 
1157             if (is != null) {
1158                 is = new LineInputStream(is);
1159 
1160                 // Load the JDK-to-MIME charset mapping table
1161                 loadMappings((LineInputStream)is, java2mime);
1162 
1163                 // Load the MIME-to-JDK charset mapping table
1164                 loadMappings((LineInputStream)is, mime2java);
1165             }
1166         } catch (Exception ex) { }
1167 
1168         // If we didn't load the tables, e.g., because we didn't have


1212             java2mime.put("EUC_JP", "euc-jp");
1213             java2mime.put("KOI8_R", "koi8-r");
1214             java2mime.put("EUC_CN", "euc-cn");
1215             java2mime.put("EUC_TW", "euc-tw");
1216             java2mime.put("EUC_KR", "euc-kr");
1217         }
1218         if (mime2java.isEmpty()) {
1219             mime2java.put("iso-2022-cn", "ISO2022CN");
1220             mime2java.put("iso-2022-kr", "ISO2022KR");
1221             mime2java.put("utf-8", "UTF8");
1222             mime2java.put("utf8", "UTF8");
1223             mime2java.put("ja_jp.iso2022-7", "ISO2022JP");
1224             mime2java.put("ja_jp.eucjp", "EUCJIS");
1225             mime2java.put("euc-kr", "KSC5601");
1226             mime2java.put("euckr", "KSC5601");
1227             mime2java.put("us-ascii", "ISO-8859-1");
1228             mime2java.put("x-us-ascii", "ISO-8859-1");
1229         }
1230     }
1231 
1232     private static void loadMappings(LineInputStream is, Hashtable<String, String> table) {
1233         String currLine;
1234 
1235         while (true) {
1236             try {
1237                 currLine = is.readLine();
1238             } catch (IOException ioex) {
1239                 break; // error in reading, stop
1240             }
1241 
1242             if (currLine == null) // end of file, stop
1243                 break;
1244             if (currLine.startsWith("--") && currLine.endsWith("--"))
1245                 // end of this table
1246                 break;
1247 
1248             // ignore empty lines and comments
1249             if (currLine.trim().length() == 0 || currLine.startsWith("#"))
1250                 continue;
1251 
1252             // A valid entry is of the form <key><separator><value>


< prev index next >