src/share/classes/sun/io/CharToByteCp970.java
Print this page
*** 22,442 ****
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*/
package sun.io;
! import sun.nio.cs.ext.IBM970;
! /**
! * @author Malcolm Ayres
! */
! public class CharToByteCp970 extends CharToByteConverter
! {
! private static final char SBase = '\uAC00';
! private static final char LBase = '\u1100';
! private static final char VBase = '\u1161';
! private static final char TBase = '\u11A7';
! private static final int VCount = 21;
! private static final int TCount = 28;
! private static final byte G0 = 0;
! private static final byte G1 = 1;
! private static final byte G2 = 2;
! private static final byte G3 = 3;
! private byte charState = G0;
! private char l, v, t;
!
! private byte[] outputByte;
!
! private char highHalfZoneCode;
! private int mask1;
! private int mask2;
! private int shift;
!
! private short[] index1;
! private String index2;
! private String index2a;
!
! private final static IBM970 nioCoder = new IBM970();
!
! public CharToByteCp970() {
! super();
! highHalfZoneCode = 0;
! outputByte = new byte[2];
! mask1 = 0xFFF8;
! mask2 = 0x0007;
! shift = 3;
! index1 = nioCoder.getEncoderIndex1();
! index2 = nioCoder.getEncoderIndex2();
! index2a = nioCoder.getEncoderIndex2a();
! }
!
! /**
! * flush out any residual data and reset the buffer state
! */
! public int flush(byte[] output, int outStart, int outEnd)
! throws MalformedInputException,
! ConversionBufferFullException
! {
! int bytesOut;
!
! byteOff = outStart;
!
! if (highHalfZoneCode != 0) {
! reset();
! badInputLength = 0;
! throw new MalformedInputException();
! }
!
! if (charState != G0) {
! try {
! unicodeToBuffer(composeHangul() ,output, outEnd);
! }
! catch(UnknownCharacterException e) {
! reset();
! badInputLength = 0;
! throw new MalformedInputException();
! }
! charState = G0;
! }
!
! bytesOut = byteOff - outStart;
!
! reset();
! return bytesOut;
! }
!
! /**
! * Resets converter to its initial state.
! */
! public void reset() {
! highHalfZoneCode = 0;
! charState = G0;
! charOff = byteOff = 0;
! }
!
! /**
! * Returns true if the given character can be converted to the
! * target character encoding.
! */
! public boolean canConvert(char ch) {
! int index;
! int theBytes;
!
! index = index1[((ch & mask1) >> shift)] + (ch & mask2);
! if (index < 15000)
! theBytes = (int)(index2.charAt(index));
! else
! theBytes = (int)(index2a.charAt(index-15000));
!
! if (theBytes != 0)
! return (true);
!
! // only return true if input char was unicode null - all others are
! // undefined
! return( ch == '\u0000');
! }
!
! /**
! * Character conversion
! */
!
! public int convert(char[] input, int inOff, int inEnd,
! byte[] output, int outOff, int outEnd)
! throws UnknownCharacterException, MalformedInputException,
! ConversionBufferFullException
! {
! char inputChar;
! int inputSize;
!
! charOff = inOff;
! byteOff = outOff;
!
! while (charOff < inEnd) {
!
! if (highHalfZoneCode == 0) {
! inputChar = input[charOff];
! inputSize = 1;
! } else {
! inputChar = highHalfZoneCode;
! inputSize = 0;
! highHalfZoneCode = 0;
! }
!
! switch (charState) {
! case G0:
!
! l = LBase;
! v = VBase;
! t = TBase;
!
! if ( isLeadingC(inputChar) ) { // Leading Consonant
! l = inputChar;
! charState = G1;
! break;
! }
!
! if ( isVowel(inputChar) ) { // Vowel
! v = inputChar;
! charState = G2;
! break;
! }
!
! if ( isTrailingC(inputChar) ) { // Trailing Consonant
! t = inputChar;
! charState = G3;
! break;
! }
!
! break;
!
! case G1:
! if ( isLeadingC(inputChar) ) { // Leading Consonant
! l = composeLL(l, inputChar);
! break;
! }
!
! if ( isVowel(inputChar) ) { // Vowel
! v = inputChar;
! charState = G2;
! break;
! }
!
! if ( isTrailingC(inputChar) ) { // Trailing Consonant
! t = inputChar;
! charState = G3;
! break;
! }
!
! unicodeToBuffer(composeHangul(), output, outEnd);
!
! charState = G0;
! break;
!
! case G2:
! if ( isLeadingC(inputChar) ) { // Leading Consonant
!
! unicodeToBuffer(composeHangul(), output, outEnd);
!
! l = inputChar;
! v = VBase;
! t = TBase;
! charState = G1;
! break;
! }
!
! if ( isVowel(inputChar) ) { // Vowel
! v = composeVV(l, inputChar);
! charState = G2;
! break;
! }
!
! if ( isTrailingC(inputChar) ) { // Trailing Consonant
! t = inputChar;
! charState = G3;
! break;
! }
!
! unicodeToBuffer(composeHangul(), output, outEnd);
!
! charState = G0;
!
! break;
!
! case G3:
! if ( isTrailingC(inputChar) ) { // Trailing Consonant
! t = composeTT(t, inputChar);
! charState = G3;
! break;
! }
!
! unicodeToBuffer(composeHangul(), output, outEnd);
!
! charState = G0;
!
! break;
! }
!
! if (charState != G0)
! charOff++;
! else {
!
! // Is this a high surrogate?
! if(inputChar >= '\ud800' && inputChar <= '\udbff') {
! // Is this the last character of the input?
! if (charOff + inputSize >= inEnd) {
! highHalfZoneCode = inputChar;
! charOff += inputSize;
! break;
! }
!
! // Is there a low surrogate following?
! inputChar = input[charOff + inputSize];
! if (inputChar >= '\udc00' && inputChar <= '\udfff') {
! // We have a valid surrogate pair. Too bad we don't do
! // surrogates. Is substitution enabled?
! if (subMode) {
! if (subBytes.length == 1) {
! outputByte[0] = 0x00;
! outputByte[1] = subBytes[0];
! } else {
! outputByte[0] = subBytes[0];
! outputByte[1] = subBytes[1];
! }
!
! bytesToBuffer(outputByte, output, outEnd);
! inputSize++;
! } else {
! badInputLength = 2;
! throw new UnknownCharacterException();
! }
! } else {
! // We have a malformed surrogate pair
! badInputLength = 1;
! throw new MalformedInputException();
! }
! }
!
! // Is this an unaccompanied low surrogate?
! else
! if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') {
! badInputLength = 1;
! throw new MalformedInputException();
! } else {
! unicodeToBuffer(inputChar, output, outEnd);
! }
!
! charOff += inputSize;
!
! }
!
! }
!
! return byteOff - outOff;
!
! }
!
! private char composeHangul() {
! int lIndex, vIndex, tIndex;
!
! lIndex = l - LBase;
! vIndex = v - VBase;
! tIndex = t - TBase;
!
! return (char)((lIndex * VCount + vIndex) * TCount + tIndex + SBase);
! }
!
! private char composeLL(char l1, char l2) {
! return l2;
! }
!
! private char composeVV(char v1, char v2) {
! return v2;
! }
!
! private char composeTT(char t1, char t2) {
! return t2;
! }
!
! private boolean isLeadingC(char c) {
! return (c >= LBase && c <= '\u1159');
! }
!
! private boolean isVowel(char c) {
! return (c >= VBase && c <= '\u11a2');
! }
!
! private boolean isTrailingC(char c) {
! return (c >= TBase && c <= '\u11f9');
! }
!
! /**
! * returns the maximum number of bytes needed to convert a char
! */
! public int getMaxBytesPerChar() {
! return 2;
! }
!
!
! /**
! * Return the character set ID
! */
public String getCharacterEncoding() {
return "Cp970";
}
! /**
! * private function to add the bytes to the output buffer
! */
! private void bytesToBuffer(byte[] theBytes, byte[] output, int outEnd)
! throws ConversionBufferFullException,
! UnknownCharacterException {
!
! int spaceNeeded;
!
! // ensure sufficient space for the bytes(s)
!
! if (theBytes[0] == 0x00)
! spaceNeeded = 1;
! else
! spaceNeeded = 2;
!
! if (byteOff + spaceNeeded > outEnd)
! throw new ConversionBufferFullException();
!
! // move the data into the buffer
!
! if (spaceNeeded == 1)
! output[byteOff++] = theBytes[1];
! else {
! output[byteOff++] = theBytes[0];
! output[byteOff++] = theBytes[1];
}
-
- }
-
- /**
- * private function to add a unicode character to the output buffer
- */
- private void unicodeToBuffer(char unicode, byte[] output, int outEnd)
- throws ConversionBufferFullException,
- UnknownCharacterException {
-
- int index;
- int theBytes;
-
- // first we convert the unicode to its byte representation
-
- index = index1[((unicode & mask1) >> shift)] + (unicode & mask2);
- if (index < 15000) {
- theBytes = (int)(index2.charAt(index));
- } else {
- theBytes = (int)(index2a.charAt(index-15000));
- }
- outputByte[0] = (byte)((theBytes & 0x0000ff00)>>8);
- outputByte[1] = (byte)(theBytes & 0x000000ff);
-
- // if the unicode was not mappable - look for the substitution bytes
-
- if (outputByte[0] == 0x00 && outputByte[1] == 0x00
- && unicode != '\u0000') {
- if (subMode) {
- if (subBytes.length == 1) {
- outputByte[0] = 0x00;
- outputByte[1] = subBytes[0];
- } else {
- outputByte[0] = subBytes[0];
- outputByte[1] = subBytes[1];
- }
- } else {
- badInputLength = 1;
- throw new UnknownCharacterException();
- }
- }
-
- // now put the bytes in the buffer
-
- bytesToBuffer(outputByte, output, outEnd);
-
- }
-
}
--- 22,40 ----
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*/
package sun.io;
! import sun.nio.cs.ext.*;
! // EUC_Simple is the same as DBCS_ASCII
! public class CharToByteCp970 extends CharToByteDBCS_ASCII {
! // Return the character set id
public String getCharacterEncoding() {
return "Cp970";
}
! public CharToByteCp970() {
! super((DoubleByte.Encoder)new IBM970().newEncoder());
}
}