1 /*
   2  * Copyright (c) 1996, 2010, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 package sun.io;
  26 
  27 
  28 /**
  29  * UCS Transformation Format 8 (UTF-8) -> UCS2 (UTF16) converter
  30  *
  31  * see CharToByteUTF8.java about UTF-8 format
  32  */
  33 
  34 public class ByteToCharUTF8 extends ByteToCharConverter {
  35 
  36     private int savedSize;
  37     private byte[] savedBytes;
  38 
  39     public ByteToCharUTF8() {
  40         super();
  41         savedSize = 0;
  42         savedBytes = new byte[5];
  43     }
  44 
  45     public int flush(char[] output, int outStart, int outEnd)
  46         throws MalformedInputException
  47     {
  48         if (savedSize != 0) {
  49             savedSize = 0;
  50             badInputLength = 0;
  51             throw new MalformedInputException();
  52         }
  53         byteOff = charOff = 0;
  54         return 0;
  55     }
  56 
  57     /**
  58      * Character converson
  59      */
  60     public int convert(byte[] input, int inOff, int inEnd,
  61                        char[] output, int outOff, int outEnd)
  62         throws MalformedInputException, ConversionBufferFullException
  63     {
  64         int byte1, byte2, byte3, byte4;
  65         char[] outputChar = new char[2];
  66         int outputSize;
  67         int byteOffAdjustment = 0;
  68 
  69         if (savedSize != 0) {
  70             byte[] newBuf;
  71             newBuf = new byte[inEnd - inOff + savedSize];
  72             for (int i = 0; i < savedSize; i++) {
  73                 newBuf[i] = savedBytes[i];
  74             }
  75             System.arraycopy(input, inOff, newBuf, savedSize, inEnd - inOff);
  76             input = newBuf;
  77             inOff = 0;
  78             inEnd = newBuf.length;
  79             byteOffAdjustment = -savedSize;
  80             savedSize = 0;
  81         }
  82 
  83         charOff = outOff;
  84         byteOff = inOff;
  85         int startByteOff;
  86 
  87         while(byteOff < inEnd) {
  88 
  89             startByteOff = byteOff;
  90             byte1 = input[byteOff++] & 0xff;
  91 
  92             if ((byte1 & 0x80) == 0){
  93                 outputChar[0] = (char)byte1;
  94                 outputSize = 1;
  95             } else if ((byte1 & 0xe0) == 0xc0) {
  96                 if (byteOff >= inEnd) {
  97                     savedSize = 1;
  98                     savedBytes[0] = (byte)byte1;
  99                     break;
 100                 }
 101                 byte2 = input[byteOff++] & 0xff;
 102                 if ((byte2 & 0xc0) != 0x80) {
 103                     badInputLength = 2;
 104                     byteOff += byteOffAdjustment;
 105                     throw new MalformedInputException();
 106                 }
 107                 outputChar[0] = (char)(((byte1 & 0x1f) << 6) | (byte2 & 0x3f));
 108                 outputSize = 1;
 109             } else if ((byte1 & 0xf0) == 0xe0){
 110                 if (byteOff + 1 >= inEnd) {
 111                         savedBytes[0] = (byte)byte1;
 112                     if (byteOff >= inEnd) {
 113                         savedSize = 1;
 114                     } else {
 115                         savedSize = 2;
 116                         savedBytes[1] = input[byteOff++];
 117                     }
 118                     break;
 119                 }
 120                 byte2 = input[byteOff++] & 0xff;
 121                 byte3 = input[byteOff++] & 0xff;
 122                 if ((byte2 & 0xc0) != 0x80 || (byte3 & 0xc0) != 0x80) {
 123                     badInputLength = 3;
 124                     byteOff += byteOffAdjustment;
 125                     throw new MalformedInputException();
 126                 }
 127                 outputChar[0] = (char)(((byte1 & 0x0f) << 12)
 128                                        | ((byte2 & 0x3f) << 6)
 129                                        | (byte3 & 0x3f));
 130                 outputSize = 1;
 131             } else if ((byte1 & 0xf8) == 0xf0) {
 132                 if (byteOff + 2 >= inEnd) {
 133                     savedBytes[0] = (byte)byte1;
 134                     if (byteOff >= inEnd) {
 135                         savedSize = 1;
 136                     } else if (byteOff + 1 >= inEnd) {
 137                         savedSize = 2;
 138                         savedBytes[1] = input[byteOff++];
 139                     } else {
 140                         savedSize = 3;
 141                         savedBytes[1] = input[byteOff++];
 142                         savedBytes[2] = input[byteOff++];
 143                     }
 144                     break;
 145                 }
 146                 byte2 = input[byteOff++] & 0xff;
 147                 byte3 = input[byteOff++] & 0xff;
 148                 byte4 = input[byteOff++] & 0xff;
 149                 if ((byte2 & 0xc0) != 0x80 ||
 150                     (byte3 & 0xc0) != 0x80 ||
 151                     (byte4 & 0xc0) != 0x80) {
 152                     badInputLength = 4;
 153                     byteOff += byteOffAdjustment;
 154                     throw new MalformedInputException();
 155                 }
 156                 // this byte sequence is UTF16 character
 157                 int ucs4 = (0x07 & byte1) << 18 |
 158                            (0x3f & byte2) << 12 |
 159                            (0x3f & byte3) <<  6 |
 160                            (0x3f & byte4);
 161                 outputChar[0] = (char)((ucs4 - 0x10000) / 0x400 + 0xd800);
 162                 outputChar[1] = (char)((ucs4 - 0x10000) % 0x400 + 0xdc00);
 163                 outputSize = 2;
 164             } else {
 165                 badInputLength = 1;
 166                 byteOff += byteOffAdjustment;
 167                 throw new MalformedInputException();
 168             }
 169 
 170             if (charOff + outputSize > outEnd) {
 171                 byteOff = startByteOff;
 172                 byteOff += byteOffAdjustment;
 173                 throw new ConversionBufferFullException();
 174             }
 175 
 176             for (int i = 0; i < outputSize; i++) {
 177                 output[charOff + i] = outputChar[i];
 178             }
 179             charOff += outputSize;
 180         }
 181 
 182         byteOff += byteOffAdjustment;
 183         return charOff - outOff;
 184     }
 185 
 186     /*
 187      * Return the character set id
 188      */
 189     public String getCharacterEncoding() {
 190         return "UTF8";
 191     }
 192 
 193     /*
 194      *   Reset after finding bad input
 195      */
 196     public void reset() {
 197         byteOff = charOff = 0;
 198         savedSize = 0;
 199     }
 200 }