--- old/src/share/classes/sun/io/CharToByteCp933.java Wed May 20 19:00:03 2009 +++ new/src/share/classes/sun/io/CharToByteCp933.java Wed May 20 19:00:02 2009 @@ -1,5 +1,5 @@ /* - * Copyright 1997-2006 Sun Microsystems, Inc. All Rights Reserved. + * Copyright 1997-2003 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -24,462 +24,16 @@ */ package sun.io; -import sun.nio.cs.ext.IBM933; +import sun.nio.cs.ext.*; -/** -* @author Malcolm Ayres -*/ +public class CharToByteCp933 extends CharToByteDBCS_EBCDIC { -public class CharToByteCp933 extends CharToByteConverter -{ - private static final char SBase = '\uAC00'; - private static final char LBase = '\u1100'; - private static final char VBase = '\u1161'; - private static final char TBase = '\u11A7'; - private static final int VCount = 21; - private static final int TCount = 28; - private static final byte G0 = 0; - private static final byte G1 = 1; - private static final byte G2 = 2; - private static final byte G3 = 3; - private byte charState = G0; - private char l, v, t; - - private int byteState; - private byte[] outputByte; - private static final int SBCS = 0; - private static final int DBCS = 1; - private static final byte SO = 0x0e; - private static final byte SI = 0x0f; - - private char highHalfZoneCode; - - private short[] index1; - private String index2; - private String index2a; - private int mask1; - private int mask2; - private int shift; - - private final static IBM933 nioCoder = new IBM933(); - - - public CharToByteCp933() { - super(); - byteState = doSBCS()?SBCS:DBCS; - highHalfZoneCode = 0; - outputByte = new byte[2]; - mask1 = 0xFFF8; - mask2 = 0x0007; - shift = 3; - index1 = nioCoder.getEncoderIndex1(); - index2 = nioCoder.getEncoderIndex2(); - index2a = nioCoder.getEncoderIndex2a(); - subBytes = new byte[1]; - subBytes[0] = 0x6f; - } - - /** - * flush out any residual data and reset the buffer state - */ - public int flush(byte[] output, int outStart, int outEnd) - throws MalformedInputException, - ConversionBufferFullException - { - int bytesOut; - - byteOff = outStart; - - if (highHalfZoneCode != 0) { - reset(); - badInputLength = 0; - throw new MalformedInputException(); - } - - if (charState != G0) { - try { - unicodeToBuffer(composeHangul() ,output, outEnd); - } - catch(UnknownCharacterException e) { - reset(); - badInputLength = 0; - throw new MalformedInputException(); - } - charState = G0; - } - - if (byteState == DBCS && doSBCS()) { - if (byteOff >= outEnd) - throw new ConversionBufferFullException(); - output[byteOff++] = SI; - byteState = SBCS; - } - - bytesOut = byteOff - outStart; - - reset(); - return bytesOut; - } - - /** - * Resets converter to its initial state. - */ - public void reset() { - byteState = doSBCS()?SBCS:DBCS; - highHalfZoneCode = 0; - charState = G0; - charOff = byteOff = 0; - } - - /** - * Returns true if the given character can be converted to the - * target character encoding. - */ - public boolean canConvert(char ch) { - return encodeHangul(ch) != -1; - } - - /** - * Sets the substitution bytes to use when the converter is in - * substitution mode. The given bytes should represent a valid - * character in the target character encoding. - */ - - public void setSubstitutionBytes( byte[] newSubBytes ) - throws IllegalArgumentException - { - if( newSubBytes.length > 2 || newSubBytes.length == 0) { - throw new IllegalArgumentException(); - } - - subBytes = new byte[ newSubBytes.length ]; - System.arraycopy( newSubBytes, 0, subBytes, 0, newSubBytes.length ); - - } - - /** - * Character conversion - */ - - public int convert(char[] input, int inOff, int inEnd, - byte[] output, int outOff, int outEnd) - throws UnknownCharacterException, MalformedInputException, - ConversionBufferFullException - { - char inputChar; - int inputSize; - - charOff = inOff; - byteOff = outOff; - - while (charOff < inEnd) { - - if (highHalfZoneCode == 0) { - inputChar = input[charOff]; - inputSize = 1; - } else { - inputChar = highHalfZoneCode; - inputSize = 0; - highHalfZoneCode = 0; - } - - switch (charState) { - case G0: - - l = LBase; - v = VBase; - t = TBase; - - if ( isLeadingC(inputChar) ) { // Leading Consonant - l = inputChar; - charState = G1; - break; - } - - if ( isVowel(inputChar) ) { // Vowel - v = inputChar; - charState = G2; - break; - } - - if ( isTrailingC(inputChar) ) { // Trailing Consonant - t = inputChar; - charState = G3; - break; - } - - break; - - case G1: - if ( isLeadingC(inputChar) ) { // Leading Consonant - l = composeLL(l, inputChar); - break; - } - - if ( isVowel(inputChar) ) { // Vowel - v = inputChar; - charState = G2; - break; - } - - if ( isTrailingC(inputChar) ) { // Trailing Consonant - t = inputChar; - charState = G3; - break; - } - - unicodeToBuffer(composeHangul(), output, outEnd); - - charState = G0; - break; - - case G2: - if ( isLeadingC(inputChar) ) { // Leading Consonant - - unicodeToBuffer(composeHangul(), output, outEnd); - - l = inputChar; - v = VBase; - t = TBase; - charState = G1; - break; - } - - if ( isVowel(inputChar) ) { // Vowel - v = composeVV(l, inputChar); - charState = G2; - break; - } - - if ( isTrailingC(inputChar) ) { // Trailing Consonant - t = inputChar; - charState = G3; - break; - } - - unicodeToBuffer(composeHangul(), output, outEnd); - - charState = G0; - - break; - - case G3: - if ( isTrailingC(inputChar) ) { // Trailing Consonant - t = composeTT(t, inputChar); - charState = G3; - break; - } - - unicodeToBuffer(composeHangul(), output, outEnd); - - charState = G0; - - break; - } - - if (charState != G0) - charOff++; - else { - - // Is this a high surrogate? - if(inputChar >= '\ud800' && inputChar <= '\udbff') { - // Is this the last character of the input? - if (charOff + inputSize >= inEnd) { - highHalfZoneCode = inputChar; - charOff += inputSize; - break; - } - - // Is there a low surrogate following? - inputChar = input[charOff + inputSize]; - if (inputChar >= '\udc00' && inputChar <= '\udfff') { - // We have a valid surrogate pair. Too bad we don't do - // surrogates. Is substitution enabled? - if (subMode) { - if (subBytes.length == 1) { - outputByte[0] = 0x00; - outputByte[1] = subBytes[0]; - } else { - outputByte[0] = subBytes[0]; - outputByte[1] = subBytes[1]; - } - - bytesToBuffer(outputByte, output, outEnd); - inputSize++; - } else { - badInputLength = 2; - throw new UnknownCharacterException(); - } - } else { - // We have a malformed surrogate pair - badInputLength = 1; - throw new MalformedInputException(); - } - } - - // Is this an unaccompanied low surrogate? - else - if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') { - badInputLength = 1; - throw new MalformedInputException(); - } else { - unicodeToBuffer(inputChar, output, outEnd); - } - - charOff += inputSize; - - } - - } - - return byteOff - outOff; - - } - - private char composeHangul() { - int lIndex, vIndex, tIndex; - - lIndex = l - LBase; - vIndex = v - VBase; - tIndex = t - TBase; - - return (char)((lIndex * VCount + vIndex) * TCount + tIndex + SBase); - } - - private char composeLL(char l1, char l2) { - return l2; - } - - private char composeVV(char v1, char v2) { - return v2; - } - - private char composeTT(char t1, char t2) { - return t2; - } - - private boolean isLeadingC(char c) { - return (c >= LBase && c <= '\u1159'); - } - - private boolean isVowel(char c) { - return (c >= VBase && c <= '\u11a2'); - } - - private boolean isTrailingC(char c) { - return (c >= TBase && c <= '\u11f9'); - } - - /** - * returns the maximum number of bytes needed to convert a char - */ - public int getMaxBytesPerChar() { - return 4; - } - - /** - * Return the character set ID - */ + // Return the character set id public String getCharacterEncoding() { - return "Cp933"; + return "Cp933"; } - /** - * private function to add the bytes to the output buffer - */ - private void bytesToBuffer(byte[] theBytes, byte[] output, int outEnd) - throws ConversionBufferFullException, - UnknownCharacterException { - - int spaceNeeded; - - // Set the output buffer into the correct state - - if (byteState == DBCS && theBytes[0] == 0x00) { - if (byteOff >= outEnd) - throw new ConversionBufferFullException(); - byteState = SBCS; - output[byteOff++] = SI; - } else - if (byteState == SBCS && theBytes[0] != 0x00) { - if (byteOff >= outEnd) - throw new ConversionBufferFullException(); - byteState = DBCS; - output[byteOff++] = SO; - } - - - // ensure sufficient space for the bytes(s) - - if (byteState == DBCS) - spaceNeeded = 2; - else - spaceNeeded = 1; - - if (byteOff + spaceNeeded > outEnd) - throw new ConversionBufferFullException(); - - // move the data into the buffer - - if (byteState == SBCS) - output[byteOff++] = theBytes[1]; - else { - output[byteOff++] = theBytes[0]; - output[byteOff++] = theBytes[1]; - } + public CharToByteCp933() { + super((DoubleByte.Encoder)new IBM933().newEncoder()); } - - // return -1 for unmappable character - protected int encodeHangul(char unicode) { - int theBytes; - int index; - index = index1[((unicode & mask1) >> shift)] + (unicode & mask2); - if (index < 15000) - theBytes = (int)(index2.charAt(index)); - else - theBytes = (int)(index2a.charAt(index-15000)); - - // The input char is undefined if theBytes is 0 and the char is NOT unicode 0 - if (theBytes == 0 && unicode != '\u0000') - return -1; - return theBytes; - } - - /** - * private function to add a unicode character to the output buffer - */ - private void unicodeToBuffer(char unicode, byte[] output, int outEnd) - throws ConversionBufferFullException, - UnknownCharacterException { - - // first we convert the unicode to its byte representation - int theBytes = encodeHangul(unicode); - - // if the unicode was not mappable - look for the substitution bytes - if (theBytes == -1) { - if (subMode) { - if (subBytes.length == 1) { - outputByte[0] = 0x00; - outputByte[1] = subBytes[0]; - } else { - outputByte[0] = subBytes[0]; - outputByte[1] = subBytes[1]; - } - } else { - badInputLength = 1; - throw new UnknownCharacterException(); - } - } else { - outputByte[0] = (byte)((theBytes & 0x0000ff00)>>8); - outputByte[1] = (byte)(theBytes & 0x000000ff); - } - - // now put the bytes in the buffer - bytesToBuffer(outputByte, output, outEnd); - } - - //Methods below are for subclass Cp834 - protected boolean doSBCS() { - return true; - } }