1 /* 2 * Copyright 1997-2003 Sun Microsystems, Inc. All Rights Reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Sun designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Sun in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 22 * CA 95054 USA or visit www.sun.com if you need additional information or 23 * have any questions. 24 */ 25 package sun.io; 26 27 import sun.nio.cs.ext.IBM949; 28 29 /** 30 * @author Malcolm Ayres 31 */ 32 33 public class CharToByteCp949 extends CharToByteConverter 34 { 35 private static final char SBase = '\uAC00'; 36 private static final char LBase = '\u1100'; 37 private static final char VBase = '\u1161'; 38 private static final char TBase = '\u11A7'; 39 private static final int VCount = 21; 40 private static final int TCount = 28; 41 private static final byte G0 = 0; 42 private static final byte G1 = 1; 43 private static final byte G2 = 2; 44 private static final byte G3 = 3; 45 private byte charState = G0; 46 private char l, v, t; 47 48 private byte[] outputByte; 49 50 private char highHalfZoneCode; 51 private int mask1; 52 private int mask2; 53 private int shift; 54 private short[] index1; 55 private String index2; 56 private String index2a; 57 58 private final static IBM949 nioCoder = new IBM949(); 59 60 public CharToByteCp949() { 61 super(); 62 highHalfZoneCode = 0; 63 outputByte = new byte[2]; 64 mask1 = 0xFFF8; 65 mask2 = 0x0007; 66 shift = 3; 67 index1 = nioCoder.getEncoderIndex1(); 68 index2 = nioCoder.getEncoderIndex2(); 69 index2a = nioCoder.getEncoderIndex2a(); 70 } 71 72 /** 73 * flush out any residual data and reset the buffer state 74 */ 75 public int flush(byte[] output, int outStart, int outEnd) 76 throws MalformedInputException, 77 ConversionBufferFullException 78 { 79 int bytesOut; 80 81 byteOff = outStart; 82 83 if (highHalfZoneCode != 0) { 84 reset(); 85 badInputLength = 0; 86 throw new MalformedInputException(); 87 } 88 89 if (charState != G0) { 90 try { 91 unicodeToBuffer(composeHangul() ,output, outEnd); 92 } 93 catch(UnknownCharacterException e) { 94 reset(); 95 badInputLength = 0; 96 throw new MalformedInputException(); 97 } 98 charState = G0; 99 } 100 101 bytesOut = byteOff - outStart; 102 103 reset(); 104 return bytesOut; 105 } 106 107 /** 108 * Resets converter to its initial state. 109 */ 110 public void reset() { 111 highHalfZoneCode = 0; 112 charState = G0; 113 charOff = byteOff = 0; 114 } 115 116 /** 117 * Returns true if the given character can be converted to the 118 * target character encoding. 119 */ 120 public boolean canConvert(char ch) { 121 int index; 122 int theBytes; 123 124 index = index1[((ch & mask1) >> shift)] + (ch & mask2); 125 if (index < 15000) 126 theBytes = (int)(index2.charAt(index)); 127 else 128 theBytes = (int)(index2a.charAt(index-15000)); 129 130 if (theBytes != 0) 131 return (true); 132 133 // only return true if input char was unicode null - all others are 134 // undefined 135 return( ch == '\u0000'); 136 } 137 138 /** 139 * Character conversion 140 */ 141 142 public int convert(char[] input, int inOff, int inEnd, 143 byte[] output, int outOff, int outEnd) 144 throws UnknownCharacterException, MalformedInputException, 145 ConversionBufferFullException 146 { 147 char inputChar; 148 int inputSize; 149 150 charOff = inOff; 151 byteOff = outOff; 152 153 while (charOff < inEnd) { 154 155 if (highHalfZoneCode == 0) { 156 inputChar = input[charOff]; 157 inputSize = 1; 158 } else { 159 inputChar = highHalfZoneCode; 160 inputSize = 0; 161 highHalfZoneCode = 0; 162 } 163 164 switch (charState) { 165 case G0: 166 167 l = LBase; 168 v = VBase; 169 t = TBase; 170 171 if ( isLeadingC(inputChar) ) { // Leading Consonant 172 l = inputChar; 173 charState = G1; 174 break; 175 } 176 177 if ( isVowel(inputChar) ) { // Vowel 178 v = inputChar; 179 charState = G2; 180 break; 181 } 182 183 if ( isTrailingC(inputChar) ) { // Trailing Consonant 184 t = inputChar; 185 charState = G3; 186 break; 187 } 188 189 break; 190 191 case G1: 192 if ( isLeadingC(inputChar) ) { // Leading Consonant 193 l = composeLL(l, inputChar); 194 break; 195 } 196 197 if ( isVowel(inputChar) ) { // Vowel 198 v = inputChar; 199 charState = G2; 200 break; 201 } 202 203 if ( isTrailingC(inputChar) ) { // Trailing Consonant 204 t = inputChar; 205 charState = G3; 206 break; 207 } 208 209 unicodeToBuffer(composeHangul(), output, outEnd); 210 211 charState = G0; 212 break; 213 214 case G2: 215 if ( isLeadingC(inputChar) ) { // Leading Consonant 216 217 unicodeToBuffer(composeHangul(), output, outEnd); 218 219 l = inputChar; 220 v = VBase; 221 t = TBase; 222 charState = G1; 223 break; 224 } 225 226 if ( isVowel(inputChar) ) { // Vowel 227 v = composeVV(l, inputChar); 228 charState = G2; 229 break; 230 } 231 232 if ( isTrailingC(inputChar) ) { // Trailing Consonant 233 t = inputChar; 234 charState = G3; 235 break; 236 } 237 238 unicodeToBuffer(composeHangul(), output, outEnd); 239 240 charState = G0; 241 242 break; 243 244 case G3: 245 if ( isTrailingC(inputChar) ) { // Trailing Consonant 246 t = composeTT(t, inputChar); 247 charState = G3; 248 break; 249 } 250 251 unicodeToBuffer(composeHangul(), output, outEnd); 252 253 charState = G0; 254 255 break; 256 } 257 258 if (charState != G0) 259 charOff++; 260 else { 261 262 // Is this a high surrogate? 263 if(inputChar >= '\ud800' && inputChar <= '\udbff') { 264 // Is this the last character of the input? 265 if (charOff + inputSize >= inEnd) { 266 highHalfZoneCode = inputChar; 267 charOff += inputSize; 268 break; 269 } 270 271 // Is there a low surrogate following? 272 inputChar = input[charOff + inputSize]; 273 if (inputChar >= '\udc00' && inputChar <= '\udfff') { 274 // We have a valid surrogate pair. Too bad we don't do 275 // surrogates. Is substitution enabled? 276 if (subMode) { 277 if (subBytes.length == 1) { 278 outputByte[0] = 0x00; 279 outputByte[1] = subBytes[0]; 280 } else { 281 outputByte[0] = subBytes[0]; 282 outputByte[1] = subBytes[1]; 283 } 284 285 bytesToBuffer(outputByte, output, outEnd); 286 inputSize++; 287 } else { 288 badInputLength = 2; 289 throw new UnknownCharacterException(); 290 } 291 } else { 292 // We have a malformed surrogate pair 293 badInputLength = 1; 294 throw new MalformedInputException(); 295 } 296 } 297 298 // Is this an unaccompanied low surrogate? 299 else 300 if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') { 301 badInputLength = 1; 302 throw new MalformedInputException(); 303 } else { 304 unicodeToBuffer(inputChar, output, outEnd); 305 } 306 307 charOff += inputSize; 308 309 } 310 311 } 312 313 return byteOff - outOff; 314 315 } 316 317 private char composeHangul() { 318 int lIndex, vIndex, tIndex; 319 320 lIndex = l - LBase; 321 vIndex = v - VBase; 322 tIndex = t - TBase; 323 324 return (char)((lIndex * VCount + vIndex) * TCount + tIndex + SBase); 325 } 326 327 private char composeLL(char l1, char l2) { 328 return l2; 329 } 330 331 private char composeVV(char v1, char v2) { 332 return v2; 333 } 334 335 private char composeTT(char t1, char t2) { 336 return t2; 337 } 338 339 private boolean isLeadingC(char c) { 340 return (c >= LBase && c <= '\u1159'); 341 } 342 343 private boolean isVowel(char c) { 344 return (c >= VBase && c <= '\u11a2'); 345 } 346 347 private boolean isTrailingC(char c) { 348 return (c >= TBase && c <= '\u11f9'); 349 } 350 351 /** 352 * returns the maximum number of bytes needed to convert a char 353 */ 354 public int getMaxBytesPerChar() { 355 return 2; 356 } 357 358 359 /** 360 * Return the character set ID 361 */ 362 public String getCharacterEncoding() { 363 return "Cp949"; 364 } 365 366 /** 367 * private function to add the bytes to the output buffer 368 */ 369 private void bytesToBuffer(byte[] theBytes, byte[] output, int outEnd) 370 throws ConversionBufferFullException, 371 UnknownCharacterException { 372 373 int spaceNeeded; 374 375 // ensure sufficient space for the bytes(s) 376 377 if (theBytes[0] == 0x00) 378 spaceNeeded = 1; 379 else 380 spaceNeeded = 2; 381 382 if (byteOff + spaceNeeded > outEnd) 383 throw new ConversionBufferFullException(); 384 385 // move the data into the buffer 386 387 if (spaceNeeded == 1) 388 output[byteOff++] = theBytes[1]; 389 else { 390 output[byteOff++] = theBytes[0]; 391 output[byteOff++] = theBytes[1]; 392 } 393 394 } 395 396 /** 397 * private function to add a unicode character to the output buffer 398 */ 399 private void unicodeToBuffer(char unicode, byte[] output, int outEnd) 400 throws ConversionBufferFullException, 401 UnknownCharacterException { 402 403 int index; 404 int theBytes; 405 406 // first we convert the unicode to its byte representation 407 408 index = index1[((unicode & mask1) >> shift)] + (unicode & mask2); 409 if (index < 15000) 410 theBytes = (int)(index2.charAt(index)); 411 else 412 theBytes = (int)(index2a.charAt(index-15000)); 413 outputByte[0] = (byte)((theBytes & 0x0000ff00)>>8); 414 outputByte[1] = (byte)(theBytes & 0x000000ff); 415 416 // if the unicode was not mappable - look for the substitution bytes 417 418 if (outputByte[0] == 0x00 && outputByte[1] == 0x00 419 && unicode != '\u0000') { 420 if (subMode) { 421 if (subBytes.length == 1) { 422 outputByte[0] = 0x00; 423 outputByte[1] = subBytes[0]; 424 } else { 425 outputByte[0] = subBytes[0]; 426 outputByte[1] = subBytes[1]; 427 } 428 } else { 429 badInputLength = 1; 430 throw new UnknownCharacterException(); 431 } 432 } 433 434 // now put the bytes in the buffer 435 436 bytesToBuffer(outputByte, output, outEnd); 437 438 } 439 }