1 /* 2 * Copyright (c) 2006, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package com.sun.xml.internal.stream.writers; 27 28 import java.io.Writer; 29 import java.io.OutputStream; 30 import java.io.IOException; 31 32 import com.sun.org.apache.xerces.internal.util.XMLChar; 33 34 /** 35 * <p>This class is used to write a stream of chars as a stream of 36 * bytes using the UTF8 encoding. It assumes that the underlying 37 * output stream is buffered or does not need additional buffering.</p> 38 * 39 * <p>It is more efficient than using a <code>java.io.OutputStreamWriter</code> 40 * because it does not need to be wrapped in a 41 * <code>java.io.BufferedWriter</code>. Creating multiple instances 42 * of <code>java.io.BufferedWriter</code> has been shown to be very 43 * expensive in JAX-WS.</p> 44 * 45 * @author Santiago PericasGeertsen 46 */ 47 public final class UTF8OutputStreamWriter extends Writer { 48 49 /** 50 * Undelying output stream. This class assumes that this 51 * output stream does not need buffering. 52 */ 53 OutputStream out; 54 55 /** 56 * Java represents chars that are not in the Basic Multilingual 57 * Plane (BMP) in UTF-16. This int stores the first code unit 58 * for a code point encoded in two UTF-16 code units. 59 */ 60 int lastUTF16CodePoint = 0; 61 62 public UTF8OutputStreamWriter(OutputStream out) { 63 this.out = out; 64 } 65 66 public String getEncoding() { 67 return "UTF-8"; 68 } 69 70 public void write(int c) throws IOException { 71 // Check in we are encoding at high and low surrogates 72 if (lastUTF16CodePoint != 0) { 73 final int uc = 74 (((lastUTF16CodePoint & 0x3ff) << 10) | (c & 0x3ff)) + 0x10000; 75 76 if (uc < 0 || uc >= 0x200000) { 77 throw new IOException("Atttempting to write invalid Unicode code point '" + uc + "'"); 78 } 79 80 out.write(0xF0 | (uc >> 18)); 81 out.write(0x80 | ((uc >> 12) & 0x3F)); 82 out.write(0x80 | ((uc >> 6) & 0x3F)); 83 out.write(0x80 | (uc & 0x3F)); 84 85 lastUTF16CodePoint = 0; 86 return; 87 } 88 89 // Otherwise, encode char as defined in UTF-8 90 if (c < 0x80) { 91 // 1 byte, 7 bits 92 out.write(c); 93 } 94 else if (c < 0x800) { 95 // 2 bytes, 11 bits 96 out.write(0xC0 | (c >> 6)); // first 5 97 out.write(0x80 | (c & 0x3F)); // second 6 98 } 99 else if (c <= '\uFFFF') { 100 if (!XMLChar.isHighSurrogate(c) && !XMLChar.isLowSurrogate(c)) { 101 // 3 bytes, 16 bits 102 out.write(0xE0 | (c >> 12)); // first 4 103 out.write(0x80 | ((c >> 6) & 0x3F)); // second 6 104 out.write(0x80 | (c & 0x3F)); // third 6 105 } 106 else { 107 lastUTF16CodePoint = c; 108 } 109 } 110 } 111 112 public void write(char cbuf[]) throws IOException { 113 for (int i = 0; i < cbuf.length; i++) { 114 write(cbuf[i]); 115 } 116 } 117 118 public void write(char cbuf[], int off, int len) throws IOException { 119 for (int i = 0; i < len; i++) { 120 write(cbuf[off + i]); 121 } 122 } 123 124 public void write(String str) throws IOException { 125 final int len = str.length(); 126 for (int i = 0; i < len; i++) { 127 write(str.charAt(i)); 128 } 129 } 130 131 public void write(String str, int off, int len) throws IOException { 132 for (int i = 0; i < len; i++) { 133 write(str.charAt(off + i)); 134 } 135 } 136 137 public void flush() throws IOException { 138 out.flush(); 139 } 140 141 public void close() throws IOException { 142 if (lastUTF16CodePoint != 0) { 143 throw new IllegalStateException("Attempting to close a UTF8OutputStreamWriter" 144 + " while awaiting for a UTF-16 code unit"); 145 } 146 out.close(); 147 } 148 149 }