1 /* 2 * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package jdk.internal.util.xml.impl; 27 28 import java.io.Reader; 29 import java.io.InputStream; 30 import java.io.IOException; 31 import java.io.UnsupportedEncodingException; 32 33 /** 34 * UTF-8 transformed UCS-2 character stream reader. 35 * 36 * This reader converts UTF-8 transformed UCS-2 characters to Java characters. 37 * The UCS-2 subset of UTF-8 transformation is described in RFC-2279 #2 38 * "UTF-8 definition": 39 * 0000 0000-0000 007F 0xxxxxxx 40 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 41 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 42 * 43 * This reader will return incorrect last character on broken UTF-8 stream. 44 */ 45 public class ReaderUTF8 46 extends Reader 47 { 48 private InputStream is; 49 50 /** 51 * Constructor. 52 * 53 * @param is A byte input stream. 54 */ 55 public ReaderUTF8(InputStream is) 56 { 57 this.is = is; 58 } 59 60 /** 61 * Reads characters into a portion of an array. 62 * 63 * @param cbuf Destination buffer. 64 * @param off Offset at which to start storing characters. 65 * @param len Maximum number of characters to read. 66 * @exception IOException If any IO errors occur. 67 * @exception UnsupportedEncodingException If UCS-4 character occur in the stream. 68 */ 69 public int read(char[] cbuf, int off, int len) 70 throws IOException 71 { 72 int num = 0; 73 int val; 74 while (num < len) { 75 if ((val = is.read()) < 0) 76 return (num != 0)? num: -1; 77 switch (val & 0xf0) { 78 case 0xc0: 79 case 0xd0: 80 cbuf[off++] = (char)(((val & 0x1f) << 6) | (is.read() & 0x3f)); 81 break; 82 83 case 0xe0: 84 cbuf[off++] = (char)(((val & 0x0f) << 12) | 85 ((is.read() & 0x3f) << 6) | (is.read() & 0x3f)); 86 break; 87 88 case 0xf0: // UCS-4 character 89 throw new UnsupportedEncodingException("UTF-32 (or UCS-4) encoding not supported."); 90 91 default: 92 cbuf[off++] = (char)val; 93 break; 94 } 95 num++; 96 } 97 return num; 98 } 99 100 /** 101 * Reads a single character. 102 * 103 * @return The character read, as an integer in the range 0 to 65535 104 * (0x00-0xffff), or -1 if the end of the stream has been reached. 105 * @exception IOException If any IO errors occur. 106 * @exception UnsupportedEncodingException If UCS-4 character occur in the stream. 107 */ 108 public int read() 109 throws IOException 110 { 111 int val; 112 if ((val = is.read()) < 0) 113 return -1; 114 switch (val & 0xf0) { 115 case 0xc0: 116 case 0xd0: 117 val = ((val & 0x1f) << 6) | (is.read() & 0x3f); 118 break; 119 120 case 0xe0: 121 val = ((val & 0x0f) << 12) | 122 ((is.read() & 0x3f) << 6) | (is.read() & 0x3f); 123 break; 124 125 case 0xf0: // UCS-4 character 126 throw new UnsupportedEncodingException(); 127 128 default: 129 break; 130 } 131 return val; 132 } 133 134 /** 135 * Closes the stream. 136 * 137 * @exception IOException If any IO errors occur. 138 */ 139 public void close() 140 throws IOException 141 { 142 is.close(); 143 } 144 }