1 /*
   2  * Copyright 1997-2003 Sun Microsystems, Inc.  All Rights Reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Sun designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Sun in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  22  * CA 95054 USA or visit www.sun.com if you need additional information or
  23  * have any questions.
  24  */
  25 package sun.io;
  26 
  27 import sun.nio.cs.ext.IBM949;
  28 
  29 /**
  30 * @author Malcolm Ayres
  31 */
  32 
  33 public class CharToByteCp949 extends CharToByteConverter
  34 {
  35     private static final char SBase = '\uAC00';
  36     private static final char LBase = '\u1100';
  37     private static final char VBase = '\u1161';
  38     private static final char TBase = '\u11A7';
  39     private static final int  VCount = 21;
  40     private static final int  TCount = 28;
  41     private static final byte G0 = 0;
  42     private static final byte G1 = 1;
  43     private static final byte G2 = 2;
  44     private static final byte G3 = 3;
  45     private byte   charState = G0;
  46     private char   l, v, t;
  47 
  48     private byte[] outputByte;
  49 
  50     private char highHalfZoneCode;
  51     private int  mask1;
  52     private int  mask2;
  53     private int  shift;
  54     private short[] index1;
  55     private String index2;
  56     private String index2a;
  57 
  58     private final static IBM949 nioCoder = new IBM949();
  59 
  60     public CharToByteCp949() {
  61        super();
  62        highHalfZoneCode = 0;
  63        outputByte = new byte[2];
  64        mask1 = 0xFFF8;
  65        mask2 = 0x0007;
  66        shift = 3;
  67        index1 = nioCoder.getEncoderIndex1();
  68        index2 = nioCoder.getEncoderIndex2();
  69        index2a = nioCoder.getEncoderIndex2a();
  70     }
  71 
  72     /**
  73       * flush out any residual data and reset the buffer state
  74       */
  75     public int flush(byte[] output, int outStart, int outEnd)
  76         throws MalformedInputException,
  77                ConversionBufferFullException
  78     {
  79        int bytesOut;
  80 
  81        byteOff = outStart;
  82 
  83        if (highHalfZoneCode != 0) {
  84            reset();
  85            badInputLength = 0;
  86            throw new MalformedInputException();
  87        }
  88 
  89        if (charState != G0) {
  90            try {
  91               unicodeToBuffer(composeHangul() ,output, outEnd);
  92            }
  93            catch(UnknownCharacterException e) {
  94               reset();
  95               badInputLength = 0;
  96               throw new MalformedInputException();
  97            }
  98            charState = G0;
  99        }
 100 
 101        bytesOut = byteOff - outStart;
 102 
 103        reset();
 104        return bytesOut;
 105     }
 106 
 107     /**
 108      * Resets converter to its initial state.
 109      */
 110     public void reset() {
 111        highHalfZoneCode = 0;
 112        charState = G0;
 113        charOff = byteOff = 0;
 114     }
 115 
 116     /**
 117      * Returns true if the given character can be converted to the
 118      * target character encoding.
 119      */
 120     public boolean canConvert(char ch) {
 121        int  index;
 122        int  theBytes;
 123 
 124        index = index1[((ch & mask1) >> shift)] + (ch & mask2);
 125        if (index < 15000)
 126          theBytes = (int)(index2.charAt(index));
 127        else
 128          theBytes = (int)(index2a.charAt(index-15000));
 129 
 130        if (theBytes != 0)
 131           return (true);
 132 
 133        // only return true if input char was unicode null - all others are
 134        //    undefined
 135        return( ch == '\u0000');
 136     }
 137 
 138     /**
 139      * Character conversion
 140      */
 141 
 142     public int convert(char[] input, int inOff, int inEnd,
 143                        byte[] output, int outOff, int outEnd)
 144         throws UnknownCharacterException, MalformedInputException,
 145                ConversionBufferFullException
 146     {
 147        char    inputChar;
 148        int     inputSize;
 149 
 150        charOff = inOff;
 151        byteOff = outOff;
 152 
 153        while (charOff < inEnd) {
 154 
 155           if (highHalfZoneCode == 0) {
 156              inputChar = input[charOff];
 157              inputSize = 1;
 158           } else {
 159              inputChar = highHalfZoneCode;
 160              inputSize = 0;
 161              highHalfZoneCode = 0;
 162           }
 163 
 164           switch (charState) {
 165           case G0:
 166 
 167              l = LBase;
 168              v = VBase;
 169              t = TBase;
 170 
 171              if ( isLeadingC(inputChar) ) {     // Leading Consonant
 172                 l = inputChar;
 173                 charState = G1;
 174                 break;
 175              }
 176 
 177              if ( isVowel(inputChar) ) {        // Vowel
 178                 v = inputChar;
 179                 charState = G2;
 180                 break;
 181              }
 182 
 183              if ( isTrailingC(inputChar) ) {    // Trailing Consonant
 184                 t = inputChar;
 185                 charState = G3;
 186                 break;
 187              }
 188 
 189              break;
 190 
 191           case G1:
 192              if ( isLeadingC(inputChar) ) {     // Leading Consonant
 193                 l = composeLL(l, inputChar);
 194                 break;
 195              }
 196 
 197              if ( isVowel(inputChar) ) {        // Vowel
 198                 v = inputChar;
 199                 charState = G2;
 200                 break;
 201              }
 202 
 203              if ( isTrailingC(inputChar) ) {    // Trailing Consonant
 204                 t = inputChar;
 205                 charState = G3;
 206                 break;
 207              }
 208 
 209              unicodeToBuffer(composeHangul(), output, outEnd);
 210 
 211              charState = G0;
 212              break;
 213 
 214           case G2:
 215              if ( isLeadingC(inputChar) ) {     // Leading Consonant
 216 
 217                 unicodeToBuffer(composeHangul(), output, outEnd);
 218 
 219                 l = inputChar;
 220                 v = VBase;
 221                 t = TBase;
 222                 charState = G1;
 223                 break;
 224              }
 225 
 226              if ( isVowel(inputChar) ) {        // Vowel
 227                 v = composeVV(l, inputChar);
 228                 charState = G2;
 229                 break;
 230              }
 231 
 232              if ( isTrailingC(inputChar) ) {    // Trailing Consonant
 233                 t = inputChar;
 234                 charState = G3;
 235                 break;
 236              }
 237 
 238              unicodeToBuffer(composeHangul(), output, outEnd);
 239 
 240              charState = G0;
 241 
 242              break;
 243 
 244           case G3:
 245              if ( isTrailingC(inputChar) ) {    // Trailing Consonant
 246                 t = composeTT(t, inputChar);
 247                 charState = G3;
 248                 break;
 249              }
 250 
 251              unicodeToBuffer(composeHangul(), output, outEnd);
 252 
 253              charState = G0;
 254 
 255              break;
 256           }
 257 
 258           if (charState != G0)
 259              charOff++;
 260           else {
 261 
 262              // Is this a high surrogate?
 263              if(inputChar >= '\ud800' && inputChar <= '\udbff') {
 264                 // Is this the last character of the input?
 265                 if (charOff + inputSize >= inEnd) {
 266                    highHalfZoneCode = inputChar;
 267                    charOff += inputSize;
 268                    break;
 269                 }
 270 
 271                 // Is there a low surrogate following?
 272                 inputChar = input[charOff + inputSize];
 273                 if (inputChar >= '\udc00' && inputChar <= '\udfff') {
 274                    // We have a valid surrogate pair.  Too bad we don't do
 275                    // surrogates.  Is substitution enabled?
 276                    if (subMode) {
 277                       if (subBytes.length == 1) {
 278                          outputByte[0] = 0x00;
 279                          outputByte[1] = subBytes[0];
 280                       } else {
 281                          outputByte[0] = subBytes[0];
 282                          outputByte[1] = subBytes[1];
 283                       }
 284 
 285                       bytesToBuffer(outputByte, output, outEnd);
 286                       inputSize++;
 287                    } else {
 288                       badInputLength = 2;
 289                       throw new UnknownCharacterException();
 290                    }
 291                 } else {
 292                    // We have a malformed surrogate pair
 293                    badInputLength = 1;
 294                    throw new MalformedInputException();
 295                 }
 296              }
 297 
 298                // Is this an unaccompanied low surrogate?
 299              else
 300                 if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') {
 301                    badInputLength = 1;
 302                    throw new MalformedInputException();
 303                 } else {
 304                    unicodeToBuffer(inputChar, output, outEnd);
 305                 }
 306 
 307              charOff += inputSize;
 308 
 309           }
 310 
 311        }
 312 
 313        return byteOff - outOff;
 314 
 315     }
 316 
 317     private char composeHangul() {
 318        int lIndex, vIndex, tIndex;
 319 
 320        lIndex = l - LBase;
 321        vIndex = v - VBase;
 322        tIndex = t - TBase;
 323 
 324        return (char)((lIndex * VCount + vIndex) * TCount + tIndex + SBase);
 325     }
 326 
 327     private char composeLL(char l1, char l2) {
 328        return l2;
 329     }
 330 
 331     private char composeVV(char v1, char v2) {
 332        return v2;
 333     }
 334 
 335     private char composeTT(char t1, char t2) {
 336        return t2;
 337     }
 338 
 339     private boolean isLeadingC(char c) {
 340        return (c >= LBase && c <= '\u1159');
 341     }
 342 
 343     private boolean isVowel(char c) {
 344        return (c >= VBase && c <= '\u11a2');
 345     }
 346 
 347     private boolean isTrailingC(char c) {
 348        return (c >= TBase && c <= '\u11f9');
 349     }
 350 
 351     /**
 352      * returns the maximum number of bytes needed to convert a char
 353      */
 354     public int getMaxBytesPerChar() {
 355        return 2;
 356     }
 357 
 358 
 359     /**
 360      * Return the character set ID
 361      */
 362     public String getCharacterEncoding() {
 363        return "Cp949";
 364     }
 365 
 366     /**
 367      * private function to add the bytes to the output buffer
 368      */
 369     private void bytesToBuffer(byte[] theBytes, byte[] output, int outEnd)
 370         throws ConversionBufferFullException,
 371                UnknownCharacterException {
 372 
 373        int spaceNeeded;
 374 
 375        // ensure sufficient space for the bytes(s)
 376 
 377        if (theBytes[0] == 0x00)
 378           spaceNeeded = 1;
 379        else
 380           spaceNeeded = 2;
 381 
 382        if (byteOff + spaceNeeded > outEnd)
 383           throw new ConversionBufferFullException();
 384 
 385        // move the data into the buffer
 386 
 387        if (spaceNeeded == 1)
 388           output[byteOff++] = theBytes[1];
 389        else {
 390           output[byteOff++] = theBytes[0];
 391           output[byteOff++] = theBytes[1];
 392        }
 393 
 394     }
 395 
 396     /**
 397      * private function to add a unicode character to the output buffer
 398      */
 399     private void unicodeToBuffer(char unicode, byte[] output, int outEnd)
 400         throws ConversionBufferFullException,
 401                UnknownCharacterException {
 402 
 403        int index;
 404        int theBytes;
 405 
 406        // first we convert the unicode to its byte representation
 407 
 408        index = index1[((unicode & mask1) >> shift)] + (unicode & mask2);
 409        if (index < 15000)
 410          theBytes = (int)(index2.charAt(index));
 411        else
 412          theBytes = (int)(index2a.charAt(index-15000));
 413        outputByte[0] = (byte)((theBytes & 0x0000ff00)>>8);
 414        outputByte[1] = (byte)(theBytes & 0x000000ff);
 415 
 416        // if the unicode was not mappable - look for the substitution bytes
 417 
 418        if (outputByte[0] == 0x00 && outputByte[1] == 0x00
 419                           && unicode != '\u0000') {
 420           if (subMode) {
 421              if (subBytes.length == 1) {
 422                 outputByte[0] = 0x00;
 423                 outputByte[1] = subBytes[0];
 424              } else {
 425                 outputByte[0] = subBytes[0];
 426                 outputByte[1] = subBytes[1];
 427              }
 428           } else {
 429              badInputLength = 1;
 430              throw new UnknownCharacterException();
 431           }
 432        }
 433 
 434        // now put the bytes in the buffer
 435 
 436        bytesToBuffer(outputByte, output, outEnd);
 437 
 438     }
 439 }