< prev index next >

jaxws/src/java.xml.ws/share/classes/com/sun/xml/internal/messaging/saaj/packaging/mime/internet/MimeUtility.java

Print this page


   1 /*
   2  * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any


  48  * There are a set of methods to encode and decode MIME headers as
  49  * per RFC 2047. A brief description on handling such headers is
  50  * given below: <p>
  51  *
  52  * RFC 822 mail headers <strong>must</strong> contain only US-ASCII
  53  * characters. Headers that contain non US-ASCII characters must be
  54  * encoded so that they contain only US-ASCII characters. Basically,
  55  * this process involves using either BASE64 or QP to encode certain
  56  * characters. RFC 2047 describes this in detail. <p>
  57  *
  58  * In Java, Strings contain (16 bit) Unicode characters. ASCII is a
  59  * subset of Unicode (and occupies the range 0 - 127). A String
  60  * that contains only ASCII characters is already mail-safe. If the
  61  * String contains non US-ASCII characters, it must be encoded. An
  62  * additional complexity in this step is that since Unicode is not
  63  * yet a widely used charset, one might want to first charset-encode
  64  * the String into another charset and then do the transfer-encoding.
  65  * <p>
  66  * Note that to get the actual bytes of a mail-safe String (say,
  67  * for sending over SMTP), one must do
  68  * <p><blockquote><pre>
  69  *
  70  *      byte[] bytes = string.getBytes("iso-8859-1");
  71  *
  72  * </pre></blockquote><p>
  73  *
  74  * The <code>setHeader</code> and <code>addHeader</code> methods
  75  * on MimeMessage and MimeBodyPart assume that the given header values
  76  * are Unicode strings that contain only US-ASCII characters. Hence
  77  * the callers of those methods must insure that the values they pass
  78  * do not contain non US-ASCII characters. The methods in this class
  79  * help do this. <p>
  80  *
  81  * The <code>getHeader</code> family of methods on MimeMessage and
  82  * MimeBodyPart return the raw header value. These might be encoded
  83  * as per RFC 2047, and if so, must be decoded into Unicode Strings.
  84  * The methods in this class help to do this. <p>
  85  *
  86  * Several System properties control strict conformance to the MIME
  87  * spec.  Note that these are not session properties but must be set
  88  * globally as System properties. <p>
  89  *
  90  * The <code>mail.mime.decodetext.strict</code> property controls
  91  * decoding of MIME encoded words.  The MIME spec requires that encoded
  92  * words start at the beginning of a whitespace separated word.  Some


 205 
 206         // Close the input stream
 207         try {
 208             is.close();
 209         } catch (IOException ioex) { }
 210 
 211         return encoding;
 212     }
 213 
 214     /**
 215      * Same as <code>getEncoding(DataSource)</code> except that instead
 216      * of reading the data from an <code>InputStream</code> it uses the
 217      * <code>writeTo</code> method to examine the data.  This is more
 218      * efficient in the common case of a <code>DataHandler</code>
 219      * created with an object and a MIME type (for example, a
 220      * "text/plain" String) because all the I/O is done in this
 221      * thread.  In the case requiring an <code>InputStream</code> the
 222      * <code>DataHandler</code> uses a thread, a pair of pipe streams,
 223      * and the <code>writeTo</code> method to produce the data. <p>
 224      *




 225      * @since   JavaMail 1.2
 226      */
 227     public static String getEncoding(DataHandler dh) {
 228         ContentType cType = null;
 229         String encoding = null;
 230 
 231         /*
 232          * Try to pick the most efficient means of determining the
 233          * encoding.  If this DataHandler was created using a DataSource,
 234          * the getEncoding(DataSource) method is typically faster.  If
 235          * the DataHandler was created with an object, this method is
 236          * much faster.  To distinguish the two cases, we use a heuristic.
 237          * A DataHandler created with an object will always have a null name.
 238          * A DataHandler created with a DataSource will usually have a
 239          * non-null name.
 240          *
 241          * XXX - This is actually quite a disgusting hack, but it makes
 242          *       a common case run over twice as fast.
 243          */
 244         if (dh.getName() != null)


 277             } catch (IOException ex) { }        // ignore it
 278             if (aos.getAscii() == ALL_ASCII) // all ascii
 279                 encoding = "7bit";
 280             else // found atleast one non-ascii character, use b64
 281                 encoding = "base64";
 282         }
 283 
 284         return encoding;
 285     }
 286 
 287     /**
 288      * Decode the given input stream. The Input stream returned is
 289      * the decoded input stream. All the encodings defined in RFC 2045
 290      * are supported here. They include "base64", "quoted-printable",
 291      * "7bit", "8bit", and "binary". In addition, "uuencode" is also
 292      * supported.
 293      *
 294      * @param   is              input stream
 295      * @param   encoding        the encoding of the stream.
 296      * @return                  decoded input stream.

 297      */
 298     public static InputStream decode(InputStream is, String encoding)
 299                 throws MessagingException {
 300         if (encoding.equalsIgnoreCase("base64"))
 301             return new BASE64DecoderStream(is);
 302         else if (encoding.equalsIgnoreCase("quoted-printable"))
 303             return new QPDecoderStream(is);
 304         else if (encoding.equalsIgnoreCase("uuencode") ||
 305                  encoding.equalsIgnoreCase("x-uuencode") ||
 306                  encoding.equalsIgnoreCase("x-uue"))
 307             return new UUDecoderStream(is);
 308         else if (encoding.equalsIgnoreCase("binary") ||
 309                  encoding.equalsIgnoreCase("7bit") ||
 310                  encoding.equalsIgnoreCase("8bit"))
 311             return is;
 312         else
 313             throw new MessagingException("Unknown encoding: " + encoding);
 314     }
 315 
 316     /**
 317      * Wrap an encoder around the given output stream.
 318      * All the encodings defined in RFC 2045 are supported here.
 319      * They include "base64", "quoted-printable", "7bit", "8bit" and
 320      * "binary". In addition, "uuencode" is also supported.
 321      *
 322      * @param   os              output stream
 323      * @param   encoding        the encoding of the stream.
 324      * @return                  output stream that applies the
 325      *                          specified encoding.

 326      */
 327     public static OutputStream encode(OutputStream os, String encoding)
 328                 throws MessagingException {
 329         if (encoding == null)
 330             return os;
 331         else if (encoding.equalsIgnoreCase("base64"))
 332             return new BASE64EncoderStream(os);
 333         else if (encoding.equalsIgnoreCase("quoted-printable"))
 334             return new QPEncoderStream(os);
 335         else if (encoding.equalsIgnoreCase("uuencode") ||
 336                  encoding.equalsIgnoreCase("x-uuencode") ||
 337                  encoding.equalsIgnoreCase("x-uue"))
 338             return new UUEncoderStream(os);
 339         else if (encoding.equalsIgnoreCase("binary") ||
 340                  encoding.equalsIgnoreCase("7bit") ||
 341                  encoding.equalsIgnoreCase("8bit"))
 342             return os;
 343         else
 344             throw new MessagingException("Unknown encoding: " +encoding);
 345     }
 346 
 347     /**
 348      * Wrap an encoder around the given output stream.
 349      * All the encodings defined in RFC 2045 are supported here.
 350      * They include "base64", "quoted-printable", "7bit", "8bit" and
 351      * "binary". In addition, "uuencode" is also supported.
 352      * The <code>filename</code> parameter is used with the "uuencode"
 353      * encoding and is included in the encoded output.
 354      *
 355      * @param   os              output stream
 356      * @param   encoding        the encoding of the stream.
 357      * @param   filename        name for the file being encoded (only used
 358      *                          with uuencode)
 359      * @return                  output stream that applies the
 360      *                          specified encoding.

 361      * @since                   JavaMail 1.2
 362      */
 363     public static OutputStream encode(OutputStream os, String encoding,
 364                                       String filename)
 365                 throws MessagingException {
 366         if (encoding == null)
 367             return os;
 368         else if (encoding.equalsIgnoreCase("base64"))
 369             return new BASE64EncoderStream(os);
 370         else if (encoding.equalsIgnoreCase("quoted-printable"))
 371             return new QPEncoderStream(os);
 372         else if (encoding.equalsIgnoreCase("uuencode") ||
 373                  encoding.equalsIgnoreCase("x-uuencode") ||
 374                  encoding.equalsIgnoreCase("x-uue"))
 375             return new UUEncoderStream(os, filename);
 376         else if (encoding.equalsIgnoreCase("binary") ||
 377                  encoding.equalsIgnoreCase("7bit") ||
 378                  encoding.equalsIgnoreCase("8bit"))
 379             return os;
 380         else
 381             throw new MessagingException("Unknown encoding: " +encoding);
 382     }
 383 
 384     /**
 385      * Encode a RFC 822 "text" token into mail-safe form as per
 386      * RFC 2047. <p>
 387      *
 388      * The given Unicode string is examined for non US-ASCII
 389      * characters. If the string contains only US-ASCII characters,
 390      * it is returned as-is.  If the string contains non US-ASCII
 391      * characters, it is first character-encoded using the platform's
 392      * default charset, then transfer-encoded using either the B or
 393      * Q encoding. The resulting bytes are then returned as a Unicode
 394      * string containing only ASCII  characters. <p>
 395      *
 396      * Note that this method should be used to encode only
 397      * "unstructured" RFC 822 headers. <p>
 398      *
 399      * Example of usage:
 400      * <p><blockquote><pre>
 401      *
 402      *  MimeBodyPart part = ...
 403      *  String rawvalue = "FooBar Mailer, Japanese version 1.1"
 404      *  try {
 405      *    // If we know for sure that rawvalue contains only US-ASCII
 406      *    // characters, we can skip the encoding part
 407      *    part.setHeader("X-mailer", MimeUtility.encodeText(rawvalue));
 408      *  } catch (UnsupportedEncodingException e) {
 409      *    // encoding failure
 410      *  } catch (MessagingException me) {
 411      *   // setHeader() failure
 412      *  }
 413      *
 414      * </pre></blockquote><p>
 415      *
 416      * @param   text    unicode string
 417      * @return  Unicode string containing only US-ASCII characters
 418      * @exception UnsupportedEncodingException if the encoding fails
 419      */
 420     public static String encodeText(String text)
 421                         throws UnsupportedEncodingException {
 422         return encodeText(text, null, null);
 423     }
 424 
 425     /**
 426      * Encode a RFC 822 "text" token into mail-safe form as per
 427      * RFC 2047. <p>
 428      *
 429      * The given Unicode string is examined for non US-ASCII
 430      * characters. If the string contains only US-ASCII characters,
 431      * it is returned as-is.  If the string contains non US-ASCII
 432      * characters, it is first character-encoded using the specified
 433      * charset, then transfer-encoded using either the B or Q encoding.
 434      * The resulting bytes are then returned as a Unicode string
 435      * containing only ASCII characters. <p>
 436      *
 437      * Note that this method should be used to encode only
 438      * "unstructured" RFC 822 headers.
 439      *
 440      * @param   text    the header value
 441      * @param   charset the charset. If this parameter is null, the
 442      *          platform's default chatset is used.
 443      * @param   encoding the encoding to be used. Currently supported
 444      *          values are "B" and "Q". If this parameter is null, then
 445      *          the "Q" encoding is used if most of characters to be
 446      *          encoded are in the ASCII charset, otherwise "B" encoding
 447      *          is used.
 448      * @return  Unicode string containing only US-ASCII characters

 449      */
 450     public static String encodeText(String text, String charset,
 451                                     String encoding)
 452                         throws UnsupportedEncodingException {
 453         return encodeWord(text, charset, encoding, false);
 454     }
 455 
 456     /**
 457      * Decode "unstructured" headers, that is, headers that are defined
 458      * as '*text' as per RFC 822. <p>
 459      *
 460      * The string is decoded using the algorithm specified in
 461      * RFC 2047, Section 6.1.1. If the charset-conversion fails
 462      * for any sequence, an UnsupportedEncodingException is thrown.
 463      * If the String is not an RFC 2047 style encoded header, it is
 464      * returned as-is <p>
 465      *
 466      * Example of usage:
 467      * <p><blockquote><pre>
 468      *
 469      *  MimeBodyPart part = ...
 470      *  String rawvalue = null;
 471      *  String  value = null;
 472      *  try {
 473      *    if ((rawvalue = part.getHeader("X-mailer")[0]) != null)
 474      *      value = MimeUtility.decodeText(rawvalue);
 475      *  } catch (UnsupportedEncodingException e) {
 476      *      // Don't care
 477      *      value = rawvalue;
 478      *  } catch (MessagingException me) { }
 479      *
 480      *  return value;
 481      *
 482      * </pre></blockquote><p>
 483      *
 484      * @param   etext   the possibly encoded value

 485      * @exception       UnsupportedEncodingException if the charset
 486      *                  conversion failed.
 487      */
 488     public static String decodeText(String etext)
 489                 throws UnsupportedEncodingException {
 490         /*
 491          * We look for sequences separated by "linear-white-space".
 492          * (as per RFC 2047, Section 6.1.1)
 493          * RFC 822 defines "linear-white-space" as SPACE | HT | CR | NL.
 494          */
 495         String lwsp = " \t\n\r";
 496         StringTokenizer st;
 497 
 498         /*
 499          * First, lets do a quick run thru the string and check
 500          * whether the sequence "=?"  exists at all. If none exists,
 501          * we know there are no encoded-words in here and we can just
 502          * return the string as-is, without suffering thru the later
 503          * decoding logic.
 504          * This handles the most common case of unencoded headers


 551         }
 552         return sb.toString();
 553     }
 554 
 555     /**
 556      * Encode a RFC 822 "word" token into mail-safe form as per
 557      * RFC 2047. <p>
 558      *
 559      * The given Unicode string is examined for non US-ASCII
 560      * characters. If the string contains only US-ASCII characters,
 561      * it is returned as-is.  If the string contains non US-ASCII
 562      * characters, it is first character-encoded using the platform's
 563      * default charset, then transfer-encoded using either the B or
 564      * Q encoding. The resulting bytes are then returned as a Unicode
 565      * string containing only ASCII  characters. <p>
 566      *
 567      * This method is meant to be used when creating RFC 822 "phrases".
 568      * The InternetAddress class, for example, uses this to encode
 569      * it's 'phrase' component.
 570      *
 571      * @param   text    unicode string
 572      * @return  Array of Unicode strings containing only US-ASCII
 573      *          characters.
 574      * @exception UnsupportedEncodingException if the encoding fails
 575      */
 576     public static String encodeWord(String word)
 577                         throws UnsupportedEncodingException {
 578         return encodeWord(word, null, null);
 579     }
 580 
 581     /**
 582      * Encode a RFC 822 "word" token into mail-safe form as per
 583      * RFC 2047. <p>
 584      *
 585      * The given Unicode string is examined for non US-ASCII
 586      * characters. If the string contains only US-ASCII characters,
 587      * it is returned as-is.  If the string contains non US-ASCII
 588      * characters, it is first character-encoded using the specified
 589      * charset, then transfer-encoded using either the B or Q encoding.
 590      * The resulting bytes are then returned as a Unicode string
 591      * containing only ASCII characters. <p>
 592      *
 593      * @param   text    unicode string
 594      * @param   charset the MIME charset
 595      * @param   encoding the encoding to be used. Currently supported
 596      *          values are "B" and "Q". If this parameter is null, then
 597      *          the "Q" encoding is used if most of characters to be
 598      *          encoded are in the ASCII charset, otherwise "B" encoding
 599      *          is used.
 600      * @return  Unicode string containing only US-ASCII characters
 601      * @exception UnsupportedEncodingException if the encoding fails
 602      */
 603     public static String encodeWord(String word, String charset,
 604                                     String encoding)
 605                         throws UnsupportedEncodingException {
 606         return encodeWord(word, charset, encoding, true);
 607     }
 608 
 609     /*
 610      * Encode the given string. The parameter 'encodingWord' should
 611      * be true if a RFC 822 "word" token is being encoded and false if a
 612      * RFC 822 "text" token is being encoded. This is because the
 613      * "Q" encoding defined in RFC 2047 has more restrictions when


 703                 if (foldEncodedWords)
 704                     buf.append("\r\n "); // start a continuation line
 705                 else
 706                     buf.append(" "); // line will be folded later
 707 
 708             buf.append(prefix);
 709             for (int i = 0; i < encodedBytes.length; i++)
 710                 buf.append((char)encodedBytes[i]);
 711             buf.append("?="); // terminate the current sequence
 712         }
 713     }
 714 
 715     /**
 716      * The string is parsed using the rules in RFC 2047 for parsing
 717      * an "encoded-word". If the parse fails, a ParseException is
 718      * thrown. Otherwise, it is transfer-decoded, and then
 719      * charset-converted into Unicode. If the charset-conversion
 720      * fails, an UnsupportedEncodingException is thrown.<p>
 721      *
 722      * @param   eword   the possibly encoded value

 723      * @exception       ParseException if the string is not an
 724      *                  encoded-word as per RFC 2047.
 725      * @exception       UnsupportedEncodingException if the charset
 726      *                  conversion failed.
 727      */
 728     public static String decodeWord(String eword)
 729                 throws ParseException, UnsupportedEncodingException {
 730 
 731         if (!eword.startsWith("=?")) // not an encoded word
 732             throw new ParseException();
 733 
 734         // get charset
 735         int start = 2; int pos;
 736         if ((pos = eword.indexOf('?', start)) == -1)
 737             throw new ParseException();
 738         String charset = javaCharset(eword.substring(start, pos));
 739 
 740         // get encoding
 741         start = pos+1;
 742         if ((pos = eword.indexOf('?', start)) == -1)


 830         if (start == 0)
 831             return word;
 832         if (start < word.length())
 833             buf.append(word.substring(start));
 834         return buf.toString();
 835     }
 836 
 837     /**
 838      * A utility method to quote a word, if the word contains any
 839      * characters from the specified 'specials' list.<p>
 840      *
 841      * The <code>HeaderTokenizer</code> class defines two special
 842      * sets of delimiters - MIME and RFC 822. <p>
 843      *
 844      * This method is typically used during the generation of
 845      * RFC 822 and MIME header fields.
 846      *
 847      * @param   word    word to be quoted
 848      * @param   specials the set of special characters
 849      * @return          the possibly quoted word
 850      * @see     javax.mail.internet.HeaderTokenizer#MIME
 851      * @see     javax.mail.internet.HeaderTokenizer#RFC822
 852      */
 853     public static String quote(String word, String specials) {
 854         int len = word.length();
 855 
 856         /*
 857          * Look for any "bad" characters, Escape and
 858          *  quote the entire string if necessary.
 859          */
 860         boolean needQuoting = false;
 861         for (int i = 0; i < len; i++) {
 862             char c = word.charAt(i);
 863             if (c == '"' || c == '\\' || c == '\r' || c == '\n') {
 864                 // need to escape them and then quote the whole string
 865                 StringBuilder sb = new StringBuilder(len + 3);
 866                 sb.append('"');
 867                 sb.append(word.substring(0, i));
 868                 int lastc = 0;
 869                 for (int j = i; j < len; j++) {
 870                     char cc = word.charAt(j);
 871                     if ((cc == '"') || (cc == '\\') ||


1094         if (defaultJavaCharset == null) {
1095             /*
1096              * If mail.mime.charset is set, it controls the default
1097              * Java charset as well.
1098              */
1099             String mimecs = null;
1100 
1101             mimecs = SAAJUtil.getSystemProperty("mail.mime.charset");
1102 
1103             if (mimecs != null && mimecs.length() > 0) {
1104                 defaultJavaCharset = javaCharset(mimecs);
1105                 return defaultJavaCharset;
1106             }
1107 
1108             try {
1109                 defaultJavaCharset = System.getProperty("file.encoding",
1110                                                         "8859_1");
1111             } catch (SecurityException sex) {
1112 
1113                 class NullInputStream extends InputStream {

1114                     public int read() {
1115                         return 0;
1116                     }
1117                 }
1118                 InputStreamReader reader =
1119                         new InputStreamReader(new NullInputStream());
1120                 defaultJavaCharset = reader.getEncoding();
1121                 if (defaultJavaCharset == null)
1122                     defaultJavaCharset = "8859_1";
1123             }
1124         }
1125 
1126         return defaultJavaCharset;
1127     }
1128 
1129     /*
1130      * Get the default MIME charset for this locale.
1131      */
1132     static String getDefaultMIMECharset() {
1133         if (defaultMIMECharset == null) {


1260         }
1261     }
1262 
1263     static final int ALL_ASCII          = 1;
1264     static final int MOSTLY_ASCII       = 2;
1265     static final int MOSTLY_NONASCII    = 3;
1266 
1267     /**
1268      * Check if the given string contains non US-ASCII characters.
1269      * @param   s       string
1270      * @return          ALL_ASCII if all characters in the string
1271      *                  belong to the US-ASCII charset. MOSTLY_ASCII
1272      *                  if more than half of the available characters
1273      *                  are US-ASCII characters. Else MOSTLY_NONASCII.
1274      */
1275     static int checkAscii(String s) {
1276         int ascii = 0, non_ascii = 0;
1277         int l = s.length();
1278 
1279         for (int i = 0; i < l; i++) {
1280             if (nonascii((int)s.charAt(i))) // non-ascii
1281                 non_ascii++;
1282             else
1283                 ascii++;
1284         }
1285 
1286         if (non_ascii == 0)
1287             return ALL_ASCII;
1288         if (ascii > non_ascii)
1289             return MOSTLY_ASCII;
1290 
1291         return MOSTLY_NONASCII;
1292     }
1293 
1294     /**
1295      * Check if the given byte array contains non US-ASCII characters.
1296      * @param   b       byte array
1297      * @return          ALL_ASCII if all characters in the string
1298      *                  belong to the US-ASCII charset. MOSTLY_ASCII
1299      *                  if more than half of the available characters
1300      *                  are US-ASCII characters. Else MOSTLY_NONASCII.


1427 
1428 /**
1429  * An OutputStream that determines whether the data written to
1430  * it is all ASCII, mostly ASCII, or mostly non-ASCII.
1431  */
1432 class AsciiOutputStream extends OutputStream {
1433     private boolean breakOnNonAscii;
1434     private int ascii = 0, non_ascii = 0;
1435     private int linelen = 0;
1436     private boolean longLine = false;
1437     private boolean badEOL = false;
1438     private boolean checkEOL = false;
1439     private int lastb = 0;
1440     private int ret = 0;
1441 
1442     public AsciiOutputStream(boolean breakOnNonAscii, boolean encodeEolStrict) {
1443         this.breakOnNonAscii = breakOnNonAscii;
1444         checkEOL = encodeEolStrict && breakOnNonAscii;
1445     }
1446 

1447     public void write(int b) throws IOException {
1448         check(b);
1449     }
1450 

1451     public void write(byte b[]) throws IOException {
1452         write(b, 0, b.length);
1453     }
1454 

1455     public void write(byte b[], int off, int len) throws IOException {
1456         len += off;
1457         for (int i = off; i < len ; i++)
1458             check(b[i]);
1459     }
1460 
1461     private final void check(int b) throws IOException {
1462         b &= 0xff;
1463         if (checkEOL &&
1464                 ((lastb == '\r' && b != '\n') || (lastb != '\r' && b == '\n')))
1465             badEOL = true;
1466         if (b == '\r' || b == '\n')
1467             linelen = 0;
1468         else {
1469             linelen++;
1470             if (linelen > 998)  // 1000 - CRLF
1471                 longLine = true;
1472         }
1473         if (MimeUtility.nonascii(b)) { // non-ascii
1474             non_ascii++;


   1 /*
   2  * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any


  48  * There are a set of methods to encode and decode MIME headers as
  49  * per RFC 2047. A brief description on handling such headers is
  50  * given below: <p>
  51  *
  52  * RFC 822 mail headers <strong>must</strong> contain only US-ASCII
  53  * characters. Headers that contain non US-ASCII characters must be
  54  * encoded so that they contain only US-ASCII characters. Basically,
  55  * this process involves using either BASE64 or QP to encode certain
  56  * characters. RFC 2047 describes this in detail. <p>
  57  *
  58  * In Java, Strings contain (16 bit) Unicode characters. ASCII is a
  59  * subset of Unicode (and occupies the range 0 - 127). A String
  60  * that contains only ASCII characters is already mail-safe. If the
  61  * String contains non US-ASCII characters, it must be encoded. An
  62  * additional complexity in this step is that since Unicode is not
  63  * yet a widely used charset, one might want to first charset-encode
  64  * the String into another charset and then do the transfer-encoding.
  65  * <p>
  66  * Note that to get the actual bytes of a mail-safe String (say,
  67  * for sending over SMTP), one must do
  68  * <blockquote><pre>
  69  *
  70  *      byte[] bytes = string.getBytes("iso-8859-1");
  71  *
  72  * </pre></blockquote>
  73  *
  74  * The <code>setHeader</code> and <code>addHeader</code> methods
  75  * on MimeMessage and MimeBodyPart assume that the given header values
  76  * are Unicode strings that contain only US-ASCII characters. Hence
  77  * the callers of those methods must insure that the values they pass
  78  * do not contain non US-ASCII characters. The methods in this class
  79  * help do this. <p>
  80  *
  81  * The <code>getHeader</code> family of methods on MimeMessage and
  82  * MimeBodyPart return the raw header value. These might be encoded
  83  * as per RFC 2047, and if so, must be decoded into Unicode Strings.
  84  * The methods in this class help to do this. <p>
  85  *
  86  * Several System properties control strict conformance to the MIME
  87  * spec.  Note that these are not session properties but must be set
  88  * globally as System properties. <p>
  89  *
  90  * The <code>mail.mime.decodetext.strict</code> property controls
  91  * decoding of MIME encoded words.  The MIME spec requires that encoded
  92  * words start at the beginning of a whitespace separated word.  Some


 205 
 206         // Close the input stream
 207         try {
 208             is.close();
 209         } catch (IOException ioex) { }
 210 
 211         return encoding;
 212     }
 213 
 214     /**
 215      * Same as <code>getEncoding(DataSource)</code> except that instead
 216      * of reading the data from an <code>InputStream</code> it uses the
 217      * <code>writeTo</code> method to examine the data.  This is more
 218      * efficient in the common case of a <code>DataHandler</code>
 219      * created with an object and a MIME type (for example, a
 220      * "text/plain" String) because all the I/O is done in this
 221      * thread.  In the case requiring an <code>InputStream</code> the
 222      * <code>DataHandler</code> uses a thread, a pair of pipe streams,
 223      * and the <code>writeTo</code> method to produce the data. <p>
 224      *
 225      * @param dh data handler
 226      *
 227      * @return encoding
 228      *
 229      * @since   JavaMail 1.2
 230      */
 231     public static String getEncoding(DataHandler dh) {
 232         ContentType cType = null;
 233         String encoding = null;
 234 
 235         /*
 236          * Try to pick the most efficient means of determining the
 237          * encoding.  If this DataHandler was created using a DataSource,
 238          * the getEncoding(DataSource) method is typically faster.  If
 239          * the DataHandler was created with an object, this method is
 240          * much faster.  To distinguish the two cases, we use a heuristic.
 241          * A DataHandler created with an object will always have a null name.
 242          * A DataHandler created with a DataSource will usually have a
 243          * non-null name.
 244          *
 245          * XXX - This is actually quite a disgusting hack, but it makes
 246          *       a common case run over twice as fast.
 247          */
 248         if (dh.getName() != null)


 281             } catch (IOException ex) { }        // ignore it
 282             if (aos.getAscii() == ALL_ASCII) // all ascii
 283                 encoding = "7bit";
 284             else // found atleast one non-ascii character, use b64
 285                 encoding = "base64";
 286         }
 287 
 288         return encoding;
 289     }
 290 
 291     /**
 292      * Decode the given input stream. The Input stream returned is
 293      * the decoded input stream. All the encodings defined in RFC 2045
 294      * are supported here. They include "base64", "quoted-printable",
 295      * "7bit", "8bit", and "binary". In addition, "uuencode" is also
 296      * supported.
 297      *
 298      * @param   is              input stream
 299      * @param   encoding        the encoding of the stream.
 300      * @return                  decoded input stream.
 301      * @exception MessagingException in case of error
 302      */
 303     public static InputStream decode(InputStream is, String encoding)
 304                 throws MessagingException {
 305         if (encoding.equalsIgnoreCase("base64"))
 306             return new BASE64DecoderStream(is);
 307         else if (encoding.equalsIgnoreCase("quoted-printable"))
 308             return new QPDecoderStream(is);
 309         else if (encoding.equalsIgnoreCase("uuencode") ||
 310                  encoding.equalsIgnoreCase("x-uuencode") ||
 311                  encoding.equalsIgnoreCase("x-uue"))
 312             return new UUDecoderStream(is);
 313         else if (encoding.equalsIgnoreCase("binary") ||
 314                  encoding.equalsIgnoreCase("7bit") ||
 315                  encoding.equalsIgnoreCase("8bit"))
 316             return is;
 317         else
 318             throw new MessagingException("Unknown encoding: " + encoding);
 319     }
 320 
 321     /**
 322      * Wrap an encoder around the given output stream.
 323      * All the encodings defined in RFC 2045 are supported here.
 324      * They include "base64", "quoted-printable", "7bit", "8bit" and
 325      * "binary". In addition, "uuencode" is also supported.
 326      *
 327      * @param   os              output stream
 328      * @param   encoding        the encoding of the stream.
 329      * @return                  output stream that applies the
 330      *                          specified encoding.
 331      * @exception MessagingException in case of error
 332      */
 333     public static OutputStream encode(OutputStream os, String encoding)
 334                 throws MessagingException {
 335         if (encoding == null)
 336             return os;
 337         else if (encoding.equalsIgnoreCase("base64"))
 338             return new BASE64EncoderStream(os);
 339         else if (encoding.equalsIgnoreCase("quoted-printable"))
 340             return new QPEncoderStream(os);
 341         else if (encoding.equalsIgnoreCase("uuencode") ||
 342                  encoding.equalsIgnoreCase("x-uuencode") ||
 343                  encoding.equalsIgnoreCase("x-uue"))
 344             return new UUEncoderStream(os);
 345         else if (encoding.equalsIgnoreCase("binary") ||
 346                  encoding.equalsIgnoreCase("7bit") ||
 347                  encoding.equalsIgnoreCase("8bit"))
 348             return os;
 349         else
 350             throw new MessagingException("Unknown encoding: " +encoding);
 351     }
 352 
 353     /**
 354      * Wrap an encoder around the given output stream.
 355      * All the encodings defined in RFC 2045 are supported here.
 356      * They include "base64", "quoted-printable", "7bit", "8bit" and
 357      * "binary". In addition, "uuencode" is also supported.
 358      * The <code>filename</code> parameter is used with the "uuencode"
 359      * encoding and is included in the encoded output.
 360      *
 361      * @param   os              output stream
 362      * @param   encoding        the encoding of the stream.
 363      * @param   filename        name for the file being encoded (only used
 364      *                          with uuencode)
 365      * @return                  output stream that applies the
 366      *                          specified encoding.
 367      * @exception MessagingException in case of error
 368      * @since                   JavaMail 1.2
 369      */
 370     public static OutputStream encode(OutputStream os, String encoding,
 371                                       String filename)
 372                 throws MessagingException {
 373         if (encoding == null)
 374             return os;
 375         else if (encoding.equalsIgnoreCase("base64"))
 376             return new BASE64EncoderStream(os);
 377         else if (encoding.equalsIgnoreCase("quoted-printable"))
 378             return new QPEncoderStream(os);
 379         else if (encoding.equalsIgnoreCase("uuencode") ||
 380                  encoding.equalsIgnoreCase("x-uuencode") ||
 381                  encoding.equalsIgnoreCase("x-uue"))
 382             return new UUEncoderStream(os, filename);
 383         else if (encoding.equalsIgnoreCase("binary") ||
 384                  encoding.equalsIgnoreCase("7bit") ||
 385                  encoding.equalsIgnoreCase("8bit"))
 386             return os;
 387         else
 388             throw new MessagingException("Unknown encoding: " +encoding);
 389     }
 390 
 391     /**
 392      * Encode a RFC 822 "text" token into mail-safe form as per
 393      * RFC 2047. <p>
 394      *
 395      * The given Unicode string is examined for non US-ASCII
 396      * characters. If the string contains only US-ASCII characters,
 397      * it is returned as-is.  If the string contains non US-ASCII
 398      * characters, it is first character-encoded using the platform's
 399      * default charset, then transfer-encoded using either the B or
 400      * Q encoding. The resulting bytes are then returned as a Unicode
 401      * string containing only ASCII  characters. <p>
 402      *
 403      * Note that this method should be used to encode only
 404      * "unstructured" RFC 822 headers. <p>
 405      *
 406      * Example of usage:
 407      * <blockquote><pre>
 408      *
 409      *  MimeBodyPart part = ...
 410      *  String rawvalue = "FooBar Mailer, Japanese version 1.1"
 411      *  try {
 412      *    // If we know for sure that rawvalue contains only US-ASCII
 413      *    // characters, we can skip the encoding part
 414      *    part.setHeader("X-mailer", MimeUtility.encodeText(rawvalue));
 415      *  } catch (UnsupportedEncodingException e) {
 416      *    // encoding failure
 417      *  } catch (MessagingException me) {
 418      *   // setHeader() failure
 419      *  }
 420      *
 421      * </pre></blockquote>
 422      *
 423      * @param   text    unicode string
 424      * @return  Unicode string containing only US-ASCII characters
 425      * @exception UnsupportedEncodingException if the encoding fails
 426      */
 427     public static String encodeText(String text)
 428                         throws UnsupportedEncodingException {
 429         return encodeText(text, null, null);
 430     }
 431 
 432     /**
 433      * Encode a RFC 822 "text" token into mail-safe form as per
 434      * RFC 2047. <p>
 435      *
 436      * The given Unicode string is examined for non US-ASCII
 437      * characters. If the string contains only US-ASCII characters,
 438      * it is returned as-is.  If the string contains non US-ASCII
 439      * characters, it is first character-encoded using the specified
 440      * charset, then transfer-encoded using either the B or Q encoding.
 441      * The resulting bytes are then returned as a Unicode string
 442      * containing only ASCII characters. <p>
 443      *
 444      * Note that this method should be used to encode only
 445      * "unstructured" RFC 822 headers.
 446      *
 447      * @param   text    the header value
 448      * @param   charset the charset. If this parameter is null, the
 449      *          platform's default chatset is used.
 450      * @param   encoding the encoding to be used. Currently supported
 451      *          values are "B" and "Q". If this parameter is null, then
 452      *          the "Q" encoding is used if most of characters to be
 453      *          encoded are in the ASCII charset, otherwise "B" encoding
 454      *          is used.
 455      * @return  Unicode string containing only US-ASCII characters
 456      * @exception UnsupportedEncodingException in case of unsupported encoding
 457      */
 458     public static String encodeText(String text, String charset,
 459                                     String encoding)
 460                         throws UnsupportedEncodingException {
 461         return encodeWord(text, charset, encoding, false);
 462     }
 463 
 464     /**
 465      * Decode "unstructured" headers, that is, headers that are defined
 466      * as '*text' as per RFC 822. <p>
 467      *
 468      * The string is decoded using the algorithm specified in
 469      * RFC 2047, Section 6.1.1. If the charset-conversion fails
 470      * for any sequence, an UnsupportedEncodingException is thrown.
 471      * If the String is not an RFC 2047 style encoded header, it is
 472      * returned as-is <p>
 473      *
 474      * Example of usage:
 475      * <blockquote><pre>
 476      *
 477      *  MimeBodyPart part = ...
 478      *  String rawvalue = null;
 479      *  String  value = null;
 480      *  try {
 481      *    if ((rawvalue = part.getHeader("X-mailer")[0]) != null)
 482      *      value = MimeUtility.decodeText(rawvalue);
 483      *  } catch (UnsupportedEncodingException e) {
 484      *      // Don't care
 485      *      value = rawvalue;
 486      *  } catch (MessagingException me) { }
 487      *
 488      *  return value;
 489      *
 490      * </pre></blockquote>
 491      *
 492      * @param   etext   the possibly encoded value
 493      * @return decoded text
 494      * @exception       UnsupportedEncodingException if the charset
 495      *                  conversion failed.
 496      */
 497     public static String decodeText(String etext)
 498                 throws UnsupportedEncodingException {
 499         /*
 500          * We look for sequences separated by "linear-white-space".
 501          * (as per RFC 2047, Section 6.1.1)
 502          * RFC 822 defines "linear-white-space" as SPACE | HT | CR | NL.
 503          */
 504         String lwsp = " \t\n\r";
 505         StringTokenizer st;
 506 
 507         /*
 508          * First, lets do a quick run thru the string and check
 509          * whether the sequence "=?"  exists at all. If none exists,
 510          * we know there are no encoded-words in here and we can just
 511          * return the string as-is, without suffering thru the later
 512          * decoding logic.
 513          * This handles the most common case of unencoded headers


 560         }
 561         return sb.toString();
 562     }
 563 
 564     /**
 565      * Encode a RFC 822 "word" token into mail-safe form as per
 566      * RFC 2047. <p>
 567      *
 568      * The given Unicode string is examined for non US-ASCII
 569      * characters. If the string contains only US-ASCII characters,
 570      * it is returned as-is.  If the string contains non US-ASCII
 571      * characters, it is first character-encoded using the platform's
 572      * default charset, then transfer-encoded using either the B or
 573      * Q encoding. The resulting bytes are then returned as a Unicode
 574      * string containing only ASCII  characters. <p>
 575      *
 576      * This method is meant to be used when creating RFC 822 "phrases".
 577      * The InternetAddress class, for example, uses this to encode
 578      * it's 'phrase' component.
 579      *
 580      * @param   word    unicode string
 581      * @return  Array of Unicode strings containing only US-ASCII
 582      *          characters.
 583      * @exception UnsupportedEncodingException if the encoding fails
 584      */
 585     public static String encodeWord(String word)
 586                         throws UnsupportedEncodingException {
 587         return encodeWord(word, null, null);
 588     }
 589 
 590     /**
 591      * Encode a RFC 822 "word" token into mail-safe form as per
 592      * RFC 2047. <p>
 593      *
 594      * The given Unicode string is examined for non US-ASCII
 595      * characters. If the string contains only US-ASCII characters,
 596      * it is returned as-is.  If the string contains non US-ASCII
 597      * characters, it is first character-encoded using the specified
 598      * charset, then transfer-encoded using either the B or Q encoding.
 599      * The resulting bytes are then returned as a Unicode string
 600      * containing only ASCII characters. <p>
 601      *
 602      * @param   word    unicode string
 603      * @param   charset the MIME charset
 604      * @param   encoding the encoding to be used. Currently supported
 605      *          values are "B" and "Q". If this parameter is null, then
 606      *          the "Q" encoding is used if most of characters to be
 607      *          encoded are in the ASCII charset, otherwise "B" encoding
 608      *          is used.
 609      * @return  Unicode string containing only US-ASCII characters
 610      * @exception UnsupportedEncodingException if the encoding fails
 611      */
 612     public static String encodeWord(String word, String charset,
 613                                     String encoding)
 614                         throws UnsupportedEncodingException {
 615         return encodeWord(word, charset, encoding, true);
 616     }
 617 
 618     /*
 619      * Encode the given string. The parameter 'encodingWord' should
 620      * be true if a RFC 822 "word" token is being encoded and false if a
 621      * RFC 822 "text" token is being encoded. This is because the
 622      * "Q" encoding defined in RFC 2047 has more restrictions when


 712                 if (foldEncodedWords)
 713                     buf.append("\r\n "); // start a continuation line
 714                 else
 715                     buf.append(" "); // line will be folded later
 716 
 717             buf.append(prefix);
 718             for (int i = 0; i < encodedBytes.length; i++)
 719                 buf.append((char)encodedBytes[i]);
 720             buf.append("?="); // terminate the current sequence
 721         }
 722     }
 723 
 724     /**
 725      * The string is parsed using the rules in RFC 2047 for parsing
 726      * an "encoded-word". If the parse fails, a ParseException is
 727      * thrown. Otherwise, it is transfer-decoded, and then
 728      * charset-converted into Unicode. If the charset-conversion
 729      * fails, an UnsupportedEncodingException is thrown.<p>
 730      *
 731      * @param   eword   the possibly encoded value
 732      * @return deocoded word
 733      * @exception       ParseException if the string is not an
 734      *                  encoded-word as per RFC 2047.
 735      * @exception       UnsupportedEncodingException if the charset
 736      *                  conversion failed.
 737      */
 738     public static String decodeWord(String eword)
 739                 throws ParseException, UnsupportedEncodingException {
 740 
 741         if (!eword.startsWith("=?")) // not an encoded word
 742             throw new ParseException();
 743 
 744         // get charset
 745         int start = 2; int pos;
 746         if ((pos = eword.indexOf('?', start)) == -1)
 747             throw new ParseException();
 748         String charset = javaCharset(eword.substring(start, pos));
 749 
 750         // get encoding
 751         start = pos+1;
 752         if ((pos = eword.indexOf('?', start)) == -1)


 840         if (start == 0)
 841             return word;
 842         if (start < word.length())
 843             buf.append(word.substring(start));
 844         return buf.toString();
 845     }
 846 
 847     /**
 848      * A utility method to quote a word, if the word contains any
 849      * characters from the specified 'specials' list.<p>
 850      *
 851      * The <code>HeaderTokenizer</code> class defines two special
 852      * sets of delimiters - MIME and RFC 822. <p>
 853      *
 854      * This method is typically used during the generation of
 855      * RFC 822 and MIME header fields.
 856      *
 857      * @param   word    word to be quoted
 858      * @param   specials the set of special characters
 859      * @return          the possibly quoted word
 860      * @see     com.sun.xml.internal.messaging.saaj.packaging.mime.internet.HeaderTokenizer#MIME
 861      * @see     com.sun.xml.internal.messaging.saaj.packaging.mime.internet.HeaderTokenizer#RFC822
 862      */
 863     public static String quote(String word, String specials) {
 864         int len = word.length();
 865 
 866         /*
 867          * Look for any "bad" characters, Escape and
 868          *  quote the entire string if necessary.
 869          */
 870         boolean needQuoting = false;
 871         for (int i = 0; i < len; i++) {
 872             char c = word.charAt(i);
 873             if (c == '"' || c == '\\' || c == '\r' || c == '\n') {
 874                 // need to escape them and then quote the whole string
 875                 StringBuilder sb = new StringBuilder(len + 3);
 876                 sb.append('"');
 877                 sb.append(word.substring(0, i));
 878                 int lastc = 0;
 879                 for (int j = i; j < len; j++) {
 880                     char cc = word.charAt(j);
 881                     if ((cc == '"') || (cc == '\\') ||


1104         if (defaultJavaCharset == null) {
1105             /*
1106              * If mail.mime.charset is set, it controls the default
1107              * Java charset as well.
1108              */
1109             String mimecs = null;
1110 
1111             mimecs = SAAJUtil.getSystemProperty("mail.mime.charset");
1112 
1113             if (mimecs != null && mimecs.length() > 0) {
1114                 defaultJavaCharset = javaCharset(mimecs);
1115                 return defaultJavaCharset;
1116             }
1117 
1118             try {
1119                 defaultJavaCharset = System.getProperty("file.encoding",
1120                                                         "8859_1");
1121             } catch (SecurityException sex) {
1122 
1123                 class NullInputStream extends InputStream {
1124                     @Override
1125                    public int read() {
1126                         return 0;
1127                     }
1128                 }
1129                 InputStreamReader reader =
1130                         new InputStreamReader(new NullInputStream());
1131                 defaultJavaCharset = reader.getEncoding();
1132                 if (defaultJavaCharset == null)
1133                     defaultJavaCharset = "8859_1";
1134             }
1135         }
1136 
1137         return defaultJavaCharset;
1138     }
1139 
1140     /*
1141      * Get the default MIME charset for this locale.
1142      */
1143     static String getDefaultMIMECharset() {
1144         if (defaultMIMECharset == null) {


1271         }
1272     }
1273 
1274     static final int ALL_ASCII          = 1;
1275     static final int MOSTLY_ASCII       = 2;
1276     static final int MOSTLY_NONASCII    = 3;
1277 
1278     /**
1279      * Check if the given string contains non US-ASCII characters.
1280      * @param   s       string
1281      * @return          ALL_ASCII if all characters in the string
1282      *                  belong to the US-ASCII charset. MOSTLY_ASCII
1283      *                  if more than half of the available characters
1284      *                  are US-ASCII characters. Else MOSTLY_NONASCII.
1285      */
1286     static int checkAscii(String s) {
1287         int ascii = 0, non_ascii = 0;
1288         int l = s.length();
1289 
1290         for (int i = 0; i < l; i++) {
1291             if (nonascii(s.charAt(i))) // non-ascii
1292                 non_ascii++;
1293             else
1294                 ascii++;
1295         }
1296 
1297         if (non_ascii == 0)
1298             return ALL_ASCII;
1299         if (ascii > non_ascii)
1300             return MOSTLY_ASCII;
1301 
1302         return MOSTLY_NONASCII;
1303     }
1304 
1305     /**
1306      * Check if the given byte array contains non US-ASCII characters.
1307      * @param   b       byte array
1308      * @return          ALL_ASCII if all characters in the string
1309      *                  belong to the US-ASCII charset. MOSTLY_ASCII
1310      *                  if more than half of the available characters
1311      *                  are US-ASCII characters. Else MOSTLY_NONASCII.


1438 
1439 /**
1440  * An OutputStream that determines whether the data written to
1441  * it is all ASCII, mostly ASCII, or mostly non-ASCII.
1442  */
1443 class AsciiOutputStream extends OutputStream {
1444     private boolean breakOnNonAscii;
1445     private int ascii = 0, non_ascii = 0;
1446     private int linelen = 0;
1447     private boolean longLine = false;
1448     private boolean badEOL = false;
1449     private boolean checkEOL = false;
1450     private int lastb = 0;
1451     private int ret = 0;
1452 
1453     public AsciiOutputStream(boolean breakOnNonAscii, boolean encodeEolStrict) {
1454         this.breakOnNonAscii = breakOnNonAscii;
1455         checkEOL = encodeEolStrict && breakOnNonAscii;
1456     }
1457 
1458     @Override
1459     public void write(int b) throws IOException {
1460         check(b);
1461     }
1462 
1463     @Override
1464     public void write(byte b[]) throws IOException {
1465         write(b, 0, b.length);
1466     }
1467 
1468     @Override
1469     public void write(byte b[], int off, int len) throws IOException {
1470         len += off;
1471         for (int i = off; i < len ; i++)
1472             check(b[i]);
1473     }
1474 
1475     private final void check(int b) throws IOException {
1476         b &= 0xff;
1477         if (checkEOL &&
1478                 ((lastb == '\r' && b != '\n') || (lastb != '\r' && b == '\n')))
1479             badEOL = true;
1480         if (b == '\r' || b == '\n')
1481             linelen = 0;
1482         else {
1483             linelen++;
1484             if (linelen > 998)  // 1000 - CRLF
1485                 longLine = true;
1486         }
1487         if (MimeUtility.nonascii(b)) { // non-ascii
1488             non_ascii++;


< prev index next >