1 /* 2 * Copyright (c) 1996, 2010, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 package sun.io; 26 27 28 /** 29 * UCS Transformation Format 8 (UTF-8) -> UCS2 (UTF16) converter 30 * 31 * see CharToByteUTF8.java about UTF-8 format 32 */ 33 34 public class ByteToCharUTF8 extends ByteToCharConverter { 35 36 private int savedSize; 37 private byte[] savedBytes; 38 39 public ByteToCharUTF8() { 40 super(); 41 savedSize = 0; 42 savedBytes = new byte[5]; 43 } 44 45 public int flush(char[] output, int outStart, int outEnd) 46 throws MalformedInputException 47 { 48 if (savedSize != 0) { 49 savedSize = 0; 50 badInputLength = 0; 51 throw new MalformedInputException(); 52 } 53 byteOff = charOff = 0; 54 return 0; 55 } 56 57 /** 58 * Character converson 59 */ 60 public int convert(byte[] input, int inOff, int inEnd, 61 char[] output, int outOff, int outEnd) 62 throws MalformedInputException, ConversionBufferFullException 63 { 64 int byte1, byte2, byte3, byte4; 65 char[] outputChar = new char[2]; 66 int outputSize; 67 int byteOffAdjustment = 0; 68 69 if (savedSize != 0) { 70 byte[] newBuf; 71 newBuf = new byte[inEnd - inOff + savedSize]; 72 for (int i = 0; i < savedSize; i++) { 73 newBuf[i] = savedBytes[i]; 74 } 75 System.arraycopy(input, inOff, newBuf, savedSize, inEnd - inOff); 76 input = newBuf; 77 inOff = 0; 78 inEnd = newBuf.length; 79 byteOffAdjustment = -savedSize; 80 savedSize = 0; 81 } 82 83 charOff = outOff; 84 byteOff = inOff; 85 int startByteOff; 86 87 while(byteOff < inEnd) { 88 89 startByteOff = byteOff; 90 byte1 = input[byteOff++] & 0xff; 91 92 if ((byte1 & 0x80) == 0){ 93 outputChar[0] = (char)byte1; 94 outputSize = 1; 95 } else if ((byte1 & 0xe0) == 0xc0) { 96 if (byteOff >= inEnd) { 97 savedSize = 1; 98 savedBytes[0] = (byte)byte1; 99 break; 100 } 101 byte2 = input[byteOff++] & 0xff; 102 if ((byte2 & 0xc0) != 0x80) { 103 badInputLength = 2; 104 byteOff += byteOffAdjustment; 105 throw new MalformedInputException(); 106 } 107 outputChar[0] = (char)(((byte1 & 0x1f) << 6) | (byte2 & 0x3f)); 108 outputSize = 1; 109 } else if ((byte1 & 0xf0) == 0xe0){ 110 if (byteOff + 1 >= inEnd) { 111 savedBytes[0] = (byte)byte1; 112 if (byteOff >= inEnd) { 113 savedSize = 1; 114 } else { 115 savedSize = 2; 116 savedBytes[1] = input[byteOff++]; 117 } 118 break; 119 } 120 byte2 = input[byteOff++] & 0xff; 121 byte3 = input[byteOff++] & 0xff; 122 if ((byte2 & 0xc0) != 0x80 || (byte3 & 0xc0) != 0x80) { 123 badInputLength = 3; 124 byteOff += byteOffAdjustment; 125 throw new MalformedInputException(); 126 } 127 outputChar[0] = (char)(((byte1 & 0x0f) << 12) 128 | ((byte2 & 0x3f) << 6) 129 | (byte3 & 0x3f)); 130 outputSize = 1; 131 } else if ((byte1 & 0xf8) == 0xf0) { 132 if (byteOff + 2 >= inEnd) { 133 savedBytes[0] = (byte)byte1; 134 if (byteOff >= inEnd) { 135 savedSize = 1; 136 } else if (byteOff + 1 >= inEnd) { 137 savedSize = 2; 138 savedBytes[1] = input[byteOff++]; 139 } else { 140 savedSize = 3; 141 savedBytes[1] = input[byteOff++]; 142 savedBytes[2] = input[byteOff++]; 143 } 144 break; 145 } 146 byte2 = input[byteOff++] & 0xff; 147 byte3 = input[byteOff++] & 0xff; 148 byte4 = input[byteOff++] & 0xff; 149 if ((byte2 & 0xc0) != 0x80 || 150 (byte3 & 0xc0) != 0x80 || 151 (byte4 & 0xc0) != 0x80) { 152 badInputLength = 4; 153 byteOff += byteOffAdjustment; 154 throw new MalformedInputException(); 155 } 156 // this byte sequence is UTF16 character 157 int ucs4 = (0x07 & byte1) << 18 | 158 (0x3f & byte2) << 12 | 159 (0x3f & byte3) << 6 | 160 (0x3f & byte4); 161 outputChar[0] = (char)((ucs4 - 0x10000) / 0x400 + 0xd800); 162 outputChar[1] = (char)((ucs4 - 0x10000) % 0x400 + 0xdc00); 163 outputSize = 2; 164 } else { 165 badInputLength = 1; 166 byteOff += byteOffAdjustment; 167 throw new MalformedInputException(); 168 } 169 170 if (charOff + outputSize > outEnd) { 171 byteOff = startByteOff; 172 byteOff += byteOffAdjustment; 173 throw new ConversionBufferFullException(); 174 } 175 176 for (int i = 0; i < outputSize; i++) { 177 output[charOff + i] = outputChar[i]; 178 } 179 charOff += outputSize; 180 } 181 182 byteOff += byteOffAdjustment; 183 return charOff - outOff; 184 } 185 186 /* 187 * Return the character set id 188 */ 189 public String getCharacterEncoding() { 190 return "UTF8"; 191 } 192 193 /* 194 * Reset after finding bad input 195 */ 196 public void reset() { 197 byteOff = charOff = 0; 198 savedSize = 0; 199 } 200 }