src/share/classes/sun/io/CharToByteCp933.java
Print this page
*** 1,7 ****
/*
! * Copyright 1997-2006 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Sun designates this
--- 1,7 ----
/*
! * Copyright 1997-2003 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Sun designates this
*** 22,485 ****
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*/
package sun.io;
! import sun.nio.cs.ext.IBM933;
! /**
! * @author Malcolm Ayres
! */
! public class CharToByteCp933 extends CharToByteConverter
! {
! private static final char SBase = '\uAC00';
! private static final char LBase = '\u1100';
! private static final char VBase = '\u1161';
! private static final char TBase = '\u11A7';
! private static final int VCount = 21;
! private static final int TCount = 28;
! private static final byte G0 = 0;
! private static final byte G1 = 1;
! private static final byte G2 = 2;
! private static final byte G3 = 3;
! private byte charState = G0;
! private char l, v, t;
!
! private int byteState;
! private byte[] outputByte;
! private static final int SBCS = 0;
! private static final int DBCS = 1;
! private static final byte SO = 0x0e;
! private static final byte SI = 0x0f;
!
! private char highHalfZoneCode;
!
! private short[] index1;
! private String index2;
! private String index2a;
! private int mask1;
! private int mask2;
! private int shift;
!
! private final static IBM933 nioCoder = new IBM933();
!
!
! public CharToByteCp933() {
! super();
! byteState = doSBCS()?SBCS:DBCS;
! highHalfZoneCode = 0;
! outputByte = new byte[2];
! mask1 = 0xFFF8;
! mask2 = 0x0007;
! shift = 3;
! index1 = nioCoder.getEncoderIndex1();
! index2 = nioCoder.getEncoderIndex2();
! index2a = nioCoder.getEncoderIndex2a();
! subBytes = new byte[1];
! subBytes[0] = 0x6f;
! }
!
! /**
! * flush out any residual data and reset the buffer state
! */
! public int flush(byte[] output, int outStart, int outEnd)
! throws MalformedInputException,
! ConversionBufferFullException
! {
! int bytesOut;
!
! byteOff = outStart;
!
! if (highHalfZoneCode != 0) {
! reset();
! badInputLength = 0;
! throw new MalformedInputException();
! }
!
! if (charState != G0) {
! try {
! unicodeToBuffer(composeHangul() ,output, outEnd);
! }
! catch(UnknownCharacterException e) {
! reset();
! badInputLength = 0;
! throw new MalformedInputException();
! }
! charState = G0;
! }
!
! if (byteState == DBCS && doSBCS()) {
! if (byteOff >= outEnd)
! throw new ConversionBufferFullException();
! output[byteOff++] = SI;
! byteState = SBCS;
! }
!
! bytesOut = byteOff - outStart;
!
! reset();
! return bytesOut;
! }
!
! /**
! * Resets converter to its initial state.
! */
! public void reset() {
! byteState = doSBCS()?SBCS:DBCS;
! highHalfZoneCode = 0;
! charState = G0;
! charOff = byteOff = 0;
! }
!
! /**
! * Returns true if the given character can be converted to the
! * target character encoding.
! */
! public boolean canConvert(char ch) {
! return encodeHangul(ch) != -1;
! }
!
! /**
! * Sets the substitution bytes to use when the converter is in
! * substitution mode. The given bytes should represent a valid
! * character in the target character encoding.
! */
!
! public void setSubstitutionBytes( byte[] newSubBytes )
! throws IllegalArgumentException
! {
! if( newSubBytes.length > 2 || newSubBytes.length == 0) {
! throw new IllegalArgumentException();
! }
!
! subBytes = new byte[ newSubBytes.length ];
! System.arraycopy( newSubBytes, 0, subBytes, 0, newSubBytes.length );
!
! }
!
! /**
! * Character conversion
! */
!
! public int convert(char[] input, int inOff, int inEnd,
! byte[] output, int outOff, int outEnd)
! throws UnknownCharacterException, MalformedInputException,
! ConversionBufferFullException
! {
! char inputChar;
! int inputSize;
!
! charOff = inOff;
! byteOff = outOff;
!
! while (charOff < inEnd) {
!
! if (highHalfZoneCode == 0) {
! inputChar = input[charOff];
! inputSize = 1;
! } else {
! inputChar = highHalfZoneCode;
! inputSize = 0;
! highHalfZoneCode = 0;
! }
!
! switch (charState) {
! case G0:
!
! l = LBase;
! v = VBase;
! t = TBase;
!
! if ( isLeadingC(inputChar) ) { // Leading Consonant
! l = inputChar;
! charState = G1;
! break;
! }
!
! if ( isVowel(inputChar) ) { // Vowel
! v = inputChar;
! charState = G2;
! break;
! }
!
! if ( isTrailingC(inputChar) ) { // Trailing Consonant
! t = inputChar;
! charState = G3;
! break;
! }
!
! break;
!
! case G1:
! if ( isLeadingC(inputChar) ) { // Leading Consonant
! l = composeLL(l, inputChar);
! break;
! }
!
! if ( isVowel(inputChar) ) { // Vowel
! v = inputChar;
! charState = G2;
! break;
! }
!
! if ( isTrailingC(inputChar) ) { // Trailing Consonant
! t = inputChar;
! charState = G3;
! break;
! }
!
! unicodeToBuffer(composeHangul(), output, outEnd);
!
! charState = G0;
! break;
!
! case G2:
! if ( isLeadingC(inputChar) ) { // Leading Consonant
!
! unicodeToBuffer(composeHangul(), output, outEnd);
!
! l = inputChar;
! v = VBase;
! t = TBase;
! charState = G1;
! break;
! }
!
! if ( isVowel(inputChar) ) { // Vowel
! v = composeVV(l, inputChar);
! charState = G2;
! break;
! }
!
! if ( isTrailingC(inputChar) ) { // Trailing Consonant
! t = inputChar;
! charState = G3;
! break;
! }
!
! unicodeToBuffer(composeHangul(), output, outEnd);
!
! charState = G0;
!
! break;
!
! case G3:
! if ( isTrailingC(inputChar) ) { // Trailing Consonant
! t = composeTT(t, inputChar);
! charState = G3;
! break;
! }
!
! unicodeToBuffer(composeHangul(), output, outEnd);
!
! charState = G0;
!
! break;
! }
!
! if (charState != G0)
! charOff++;
! else {
!
! // Is this a high surrogate?
! if(inputChar >= '\ud800' && inputChar <= '\udbff') {
! // Is this the last character of the input?
! if (charOff + inputSize >= inEnd) {
! highHalfZoneCode = inputChar;
! charOff += inputSize;
! break;
! }
!
! // Is there a low surrogate following?
! inputChar = input[charOff + inputSize];
! if (inputChar >= '\udc00' && inputChar <= '\udfff') {
! // We have a valid surrogate pair. Too bad we don't do
! // surrogates. Is substitution enabled?
! if (subMode) {
! if (subBytes.length == 1) {
! outputByte[0] = 0x00;
! outputByte[1] = subBytes[0];
! } else {
! outputByte[0] = subBytes[0];
! outputByte[1] = subBytes[1];
! }
!
! bytesToBuffer(outputByte, output, outEnd);
! inputSize++;
! } else {
! badInputLength = 2;
! throw new UnknownCharacterException();
! }
! } else {
! // We have a malformed surrogate pair
! badInputLength = 1;
! throw new MalformedInputException();
! }
! }
!
! // Is this an unaccompanied low surrogate?
! else
! if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') {
! badInputLength = 1;
! throw new MalformedInputException();
! } else {
! unicodeToBuffer(inputChar, output, outEnd);
! }
!
! charOff += inputSize;
!
! }
!
! }
!
! return byteOff - outOff;
!
! }
!
! private char composeHangul() {
! int lIndex, vIndex, tIndex;
!
! lIndex = l - LBase;
! vIndex = v - VBase;
! tIndex = t - TBase;
!
! return (char)((lIndex * VCount + vIndex) * TCount + tIndex + SBase);
! }
!
! private char composeLL(char l1, char l2) {
! return l2;
! }
!
! private char composeVV(char v1, char v2) {
! return v2;
! }
!
! private char composeTT(char t1, char t2) {
! return t2;
! }
!
! private boolean isLeadingC(char c) {
! return (c >= LBase && c <= '\u1159');
! }
!
! private boolean isVowel(char c) {
! return (c >= VBase && c <= '\u11a2');
! }
!
! private boolean isTrailingC(char c) {
! return (c >= TBase && c <= '\u11f9');
! }
!
! /**
! * returns the maximum number of bytes needed to convert a char
! */
! public int getMaxBytesPerChar() {
! return 4;
! }
!
! /**
! * Return the character set ID
! */
public String getCharacterEncoding() {
return "Cp933";
}
! /**
! * private function to add the bytes to the output buffer
! */
! private void bytesToBuffer(byte[] theBytes, byte[] output, int outEnd)
! throws ConversionBufferFullException,
! UnknownCharacterException {
!
! int spaceNeeded;
!
! // Set the output buffer into the correct state
!
! if (byteState == DBCS && theBytes[0] == 0x00) {
! if (byteOff >= outEnd)
! throw new ConversionBufferFullException();
! byteState = SBCS;
! output[byteOff++] = SI;
! } else
! if (byteState == SBCS && theBytes[0] != 0x00) {
! if (byteOff >= outEnd)
! throw new ConversionBufferFullException();
! byteState = DBCS;
! output[byteOff++] = SO;
}
-
-
- // ensure sufficient space for the bytes(s)
-
- if (byteState == DBCS)
- spaceNeeded = 2;
- else
- spaceNeeded = 1;
-
- if (byteOff + spaceNeeded > outEnd)
- throw new ConversionBufferFullException();
-
- // move the data into the buffer
-
- if (byteState == SBCS)
- output[byteOff++] = theBytes[1];
- else {
- output[byteOff++] = theBytes[0];
- output[byteOff++] = theBytes[1];
- }
- }
-
- // return -1 for unmappable character
- protected int encodeHangul(char unicode) {
- int theBytes;
- int index;
- index = index1[((unicode & mask1) >> shift)] + (unicode & mask2);
- if (index < 15000)
- theBytes = (int)(index2.charAt(index));
- else
- theBytes = (int)(index2a.charAt(index-15000));
-
- // The input char is undefined if theBytes is 0 and the char is NOT unicode 0
- if (theBytes == 0 && unicode != '\u0000')
- return -1;
- return theBytes;
- }
-
- /**
- * private function to add a unicode character to the output buffer
- */
- private void unicodeToBuffer(char unicode, byte[] output, int outEnd)
- throws ConversionBufferFullException,
- UnknownCharacterException {
-
- // first we convert the unicode to its byte representation
- int theBytes = encodeHangul(unicode);
-
- // if the unicode was not mappable - look for the substitution bytes
- if (theBytes == -1) {
- if (subMode) {
- if (subBytes.length == 1) {
- outputByte[0] = 0x00;
- outputByte[1] = subBytes[0];
- } else {
- outputByte[0] = subBytes[0];
- outputByte[1] = subBytes[1];
- }
- } else {
- badInputLength = 1;
- throw new UnknownCharacterException();
- }
- } else {
- outputByte[0] = (byte)((theBytes & 0x0000ff00)>>8);
- outputByte[1] = (byte)(theBytes & 0x000000ff);
- }
-
- // now put the bytes in the buffer
- bytesToBuffer(outputByte, output, outEnd);
- }
-
- //Methods below are for subclass Cp834
- protected boolean doSBCS() {
- return true;
- }
}
--- 22,39 ----
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*/
package sun.io;
! import sun.nio.cs.ext.*;
! public class CharToByteCp933 extends CharToByteDBCS_EBCDIC {
! // Return the character set id
public String getCharacterEncoding() {
return "Cp933";
}
! public CharToByteCp933() {
! super((DoubleByte.Encoder)new IBM933().newEncoder());
}
}