1 /*
   2  * Copyright (c) 2006, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package com.sun.xml.internal.stream.writers;
  27 
  28 import java.io.Writer;
  29 import java.io.OutputStream;
  30 import java.io.IOException;
  31 
  32 import com.sun.org.apache.xerces.internal.util.XMLChar;
  33 
  34 /**
  35  * <p>This class is used to write a stream of chars as a stream of
  36  * bytes using the UTF8 encoding. It assumes that the underlying
  37  * output stream is buffered or does not need additional buffering.</p>
  38  *
  39  * <p>It is more efficient than using a <code>java.io.OutputStreamWriter</code>
  40  * because it does not need to be wrapped in a
  41  * <code>java.io.BufferedWriter</code>. Creating multiple instances
  42  * of <code>java.io.BufferedWriter</code> has been shown to be very
  43  * expensive in JAX-WS.</p>
  44  *
  45  * @author Santiago PericasGeertsen
  46  */
  47 public final class UTF8OutputStreamWriter extends Writer {
  48 
  49     /**
  50      * Undelying output stream. This class assumes that this
  51      * output stream does not need buffering.
  52      */
  53     OutputStream out;
  54 
  55     /**
  56      * Java represents chars that are not in the Basic Multilingual
  57      * Plane (BMP) in UTF-16. This int stores the first code unit
  58      * for a code point encoded in two UTF-16 code units.
  59      */
  60     int lastUTF16CodePoint = 0;
  61 
  62     public UTF8OutputStreamWriter(OutputStream out) {
  63         this.out = out;
  64     }
  65 
  66     public String getEncoding() {
  67         return "UTF-8";
  68     }
  69 
  70     public void write(int c) throws IOException {
  71         // Check in we are encoding at high and low surrogates
  72         if (lastUTF16CodePoint != 0) {
  73             final int uc =
  74                 (((lastUTF16CodePoint & 0x3ff) << 10) | (c & 0x3ff)) + 0x10000;
  75 
  76             if (uc < 0 || uc >= 0x200000) {
  77                 throw new IOException("Atttempting to write invalid Unicode code point '" + uc + "'");
  78             }
  79 
  80             out.write(0xF0 | (uc >> 18));
  81             out.write(0x80 | ((uc >> 12) & 0x3F));
  82             out.write(0x80 | ((uc >> 6) & 0x3F));
  83             out.write(0x80 | (uc & 0x3F));
  84 
  85             lastUTF16CodePoint = 0;
  86             return;
  87         }
  88 
  89         // Otherwise, encode char as defined in UTF-8
  90         if (c < 0x80) {
  91             // 1 byte, 7 bits
  92             out.write(c);
  93         }
  94         else if (c < 0x800) {
  95             // 2 bytes, 11 bits
  96             out.write(0xC0 | (c >> 6));    // first 5
  97             out.write(0x80 | (c & 0x3F));  // second 6
  98         }
  99         else if (c <= '\uFFFF') {
 100             if (!XMLChar.isHighSurrogate(c) && !XMLChar.isLowSurrogate(c)) {
 101                 // 3 bytes, 16 bits
 102                 out.write(0xE0 | (c >> 12));   // first 4
 103                 out.write(0x80 | ((c >> 6) & 0x3F));  // second 6
 104                 out.write(0x80 | (c & 0x3F));  // third 6
 105             }
 106             else {
 107                 lastUTF16CodePoint = c;
 108             }
 109         }
 110     }
 111 
 112     public void write(char cbuf[]) throws IOException {
 113         for (int i = 0; i < cbuf.length; i++) {
 114             write(cbuf[i]);
 115         }
 116     }
 117 
 118     public void write(char cbuf[], int off, int len) throws IOException {
 119         for (int i = 0; i < len; i++) {
 120             write(cbuf[off + i]);
 121         }
 122     }
 123 
 124     public void write(String str) throws IOException {
 125         final int len = str.length();
 126         for (int i = 0; i < len; i++) {
 127             write(str.charAt(i));
 128         }
 129     }
 130 
 131     public void write(String str, int off, int len) throws IOException {
 132         for (int i = 0; i < len; i++) {
 133             write(str.charAt(off + i));
 134         }
 135     }
 136 
 137     public void flush() throws IOException {
 138         out.flush();
 139     }
 140 
 141     public void close() throws IOException {
 142         if (lastUTF16CodePoint != 0) {
 143             throw new IllegalStateException("Attempting to close a UTF8OutputStreamWriter"
 144                 + " while awaiting for a UTF-16 code unit");
 145         }
 146         out.close();
 147     }
 148 
 149 }