--- /dev/null 2012-12-05 14:31:32.000000000 +0000 +++ new/src/share/classes/jdk/internal/util/xml/impl/ReaderUTF8.java 2012-12-05 14:31:31.000000000 +0000 @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package jdk.internal.util.xml.impl; + +import java.io.Reader; +import java.io.InputStream; +import java.io.IOException; +import java.io.UnsupportedEncodingException; + +/** + * UTF-8 transformed UCS-2 character stream reader. + * + * This reader converts UTF-8 transformed UCS-2 characters to Java characters. + * The UCS-2 subset of UTF-8 transformation is described in RFC-2279 #2 + * "UTF-8 definition": + * 0000 0000-0000 007F 0xxxxxxx + * 0000 0080-0000 07FF 110xxxxx 10xxxxxx + * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx + * + * This reader will return incorrect last character on broken UTF-8 stream. + */ +public class ReaderUTF8 + extends Reader +{ + private InputStream is; + + /** + * Constructor. + * + * @param is A byte input stream. + */ + public ReaderUTF8(InputStream is) + { + this.is = is; + } + + /** + * Reads characters into a portion of an array. + * + * @param cbuf Destination buffer. + * @param off Offset at which to start storing characters. + * @param len Maximum number of characters to read. + * @exception IOException If any IO errors occur. + * @exception UnsupportedEncodingException If UCS-4 character occur in the stream. + */ + public int read(char[] cbuf, int off, int len) + throws IOException + { + int num = 0; + int val; + while (num < len) { + if ((val = is.read()) < 0) + return (num != 0)? num: -1; + switch (val & 0xf0) { + case 0xc0: + case 0xd0: + cbuf[off++] = (char)(((val & 0x1f) << 6) | (is.read() & 0x3f)); + break; + + case 0xe0: + cbuf[off++] = (char)(((val & 0x0f) << 12) | + ((is.read() & 0x3f) << 6) | (is.read() & 0x3f)); + break; + + case 0xf0: // UCS-4 character + throw new UnsupportedEncodingException("UTF-32 (or UCS-4) encoding not supported."); + + default: + cbuf[off++] = (char)val; + break; + } + num++; + } + return num; + } + + /** + * Reads a single character. + * + * @return The character read, as an integer in the range 0 to 65535 + * (0x00-0xffff), or -1 if the end of the stream has been reached. + * @exception IOException If any IO errors occur. + * @exception UnsupportedEncodingException If UCS-4 character occur in the stream. + */ + public int read() + throws IOException + { + int val; + if ((val = is.read()) < 0) + return -1; + switch (val & 0xf0) { + case 0xc0: + case 0xd0: + val = ((val & 0x1f) << 6) | (is.read() & 0x3f); + break; + + case 0xe0: + val = ((val & 0x0f) << 12) | + ((is.read() & 0x3f) << 6) | (is.read() & 0x3f); + break; + + case 0xf0: // UCS-4 character + throw new UnsupportedEncodingException(); + + default: + break; + } + return val; + } + + /** + * Closes the stream. + * + * @exception IOException If any IO errors occur. + */ + public void close() + throws IOException + { + is.close(); + } +}