1 /* 2 * Copyright 2003-2006 Sun Microsystems, Inc. All Rights Reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Sun designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Sun in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 22 * CA 95054 USA or visit www.sun.com if you need additional information or 23 * have any questions. 24 */ 25 26 /* 27 */ 28 29 package sun.nio.cs.ext; 30 31 import java.nio.ByteBuffer; 32 import java.nio.CharBuffer; 33 import java.nio.charset.Charset; 34 import java.nio.charset.CharsetDecoder; 35 import java.nio.charset.CharsetEncoder; 36 import java.nio.charset.CoderResult; 37 import java.nio.charset.CharacterCodingException; 38 import sun.nio.cs.HistoricallyNamedCharset; 39 import sun.nio.cs.US_ASCII; 40 41 public class ISO2022_CN 42 extends Charset 43 implements HistoricallyNamedCharset 44 { 45 private static final byte ISO_ESC = 0x1b; 46 private static final byte ISO_SI = 0x0f; 47 private static final byte ISO_SO = 0x0e; 48 private static final byte ISO_SS2_7 = 0x4e; 49 private static final byte ISO_SS3_7 = 0x4f; 50 private static final byte MSB = (byte)0x80; 51 private static final char REPLACE_CHAR = '\uFFFD'; 52 53 private static final byte SODesigGB = 0; 54 private static final byte SODesigCNS = 1; 55 56 public ISO2022_CN() { 57 super("ISO-2022-CN", ExtendedCharsets.aliasesFor("ISO-2022-CN")); 58 } 59 60 public String historicalName() { 61 return "ISO2022CN"; 62 } 63 64 public boolean contains(Charset cs) { 65 return ((cs instanceof EUC_CN) // GB2312-80 repertoire 66 || (cs instanceof US_ASCII) 67 || (cs instanceof EUC_TW) // CNS11643 repertoire 68 || (cs instanceof ISO2022_CN)); 69 } 70 71 public CharsetDecoder newDecoder() { 72 return new Decoder(this); 73 } 74 75 public CharsetEncoder newEncoder() { 76 throw new UnsupportedOperationException(); 77 } 78 79 public boolean canEncode() { 80 return false; 81 } 82 83 static class Decoder extends CharsetDecoder { 84 private boolean shiftOut; 85 private byte currentSODesig; 86 87 private static final Charset gb2312 = new EUC_CN(); 88 private static final Charset cns = new EUC_TW(); 89 private final EUC_CN.Decoder gb2312Decoder; 90 private final EUC_TW.Decoder cnsDecoder; 91 92 Decoder(Charset cs) { 93 super(cs, 1.0f, 1.0f); 94 shiftOut = false; 95 currentSODesig = SODesigGB; 96 gb2312Decoder = (EUC_CN.Decoder)gb2312.newDecoder(); 97 cnsDecoder = (EUC_TW.Decoder)cns.newDecoder(); 98 } 99 100 protected void implReset() { 101 shiftOut= false; 102 currentSODesig = SODesigGB; 103 } 104 105 private char cnsDecode(byte byte1, byte byte2, byte SS) { 106 byte1 |= MSB; 107 byte2 |= MSB; 108 int p = 0; 109 if (SS == ISO_SS2_7) 110 p = 1; //plane 2, index -- 1 111 else if (SS == ISO_SS3_7) 112 p = 2; //plane 3, index -- 2 113 else 114 return REPLACE_CHAR; //never happen. 115 char[] ret = cnsDecoder.toUnicode(byte1 & 0xff, 116 byte2 & 0xff, 117 p); 118 if (ret == null || ret.length == 2) 119 return REPLACE_CHAR; 120 return ret[0]; 121 } 122 123 private char SODecode(byte byte1, byte byte2, byte SOD) { 124 byte1 |= MSB; 125 byte2 |= MSB; 126 if (SOD == SODesigGB) { 127 return gb2312Decoder.decodeDouble(byte1 & 0xff, 128 byte2 & 0xff); 129 } else { // SOD == SODesigCNS 130 char[] ret = cnsDecoder.toUnicode(byte1 & 0xff, 131 byte2 & 0xff, 132 0); 133 if (ret == null) 134 return REPLACE_CHAR; 135 return ret[0]; 136 } 137 } 138 139 private CoderResult decodeBufferLoop(ByteBuffer src, 140 CharBuffer dst) 141 { 142 int mark = src.position(); 143 byte b1 = 0, b2 = 0, b3 = 0, b4 = 0; 144 int inputSize = 0; 145 char c = REPLACE_CHAR; 146 try { 147 while (src.hasRemaining()) { 148 b1 = src.get(); 149 inputSize = 1; 150 151 while (b1 == ISO_ESC || 152 b1 == ISO_SO || 153 b1 == ISO_SI) { 154 if (b1 == ISO_ESC) { // ESC 155 currentSODesig = SODesigGB; 156 157 if (src.remaining() < 1) 158 return CoderResult.UNDERFLOW; 159 160 b2 = src.get(); 161 inputSize++; 162 163 if ((b2 & (byte)0x80) != 0) 164 return CoderResult.malformedForLength(inputSize); 165 166 if (b2 == (byte)0x24) { 167 if (src.remaining() < 1) 168 return CoderResult.UNDERFLOW; 169 170 b3 = src.get(); 171 inputSize++; 172 173 if ((b3 & (byte)0x80) != 0) 174 return CoderResult.malformedForLength(inputSize); 175 if (b3 == 'A'){ // "$A" 176 currentSODesig = SODesigGB; 177 } else if (b3 == ')') { 178 if (src.remaining() < 1) 179 return CoderResult.UNDERFLOW; 180 b4 = src.get(); 181 inputSize++; 182 if (b4 == 'A'){ // "$)A" 183 currentSODesig = SODesigGB; 184 } else if (b4 == 'G'){ // "$)G" 185 currentSODesig = SODesigCNS; 186 } else { 187 return CoderResult.malformedForLength(inputSize); 188 } 189 } else if (b3 == '*') { 190 if (src.remaining() < 1) 191 return CoderResult.UNDERFLOW; 192 b4 = src.get(); 193 inputSize++; 194 if (b4 != 'H') { // "$*H" 195 //SS2Desig -> CNS-P1 196 return CoderResult.malformedForLength(inputSize); 197 } 198 } else if (b3 == '+') { 199 if (src.remaining() < 1) 200 return CoderResult.UNDERFLOW; 201 b4 = src.get(); 202 inputSize++; 203 if (b4 != 'I'){ // "$+I" 204 //SS3Desig -> CNS-P2. 205 return CoderResult.malformedForLength(inputSize); 206 } 207 } else { 208 return CoderResult.malformedForLength(inputSize); 209 } 210 } else if (b2 == ISO_SS2_7 || b2 == ISO_SS3_7) { 211 if (src.remaining() < 2) 212 return CoderResult.UNDERFLOW; 213 b3 = src.get(); 214 b4 = src.get(); 215 inputSize += 2; 216 if (dst.remaining() < 1) 217 return CoderResult.OVERFLOW; 218 //SS2->CNS-P2, SS3->CNS-P3 219 c = cnsDecode(b3, b4, b2); 220 if (c == REPLACE_CHAR) 221 return CoderResult.unmappableForLength(inputSize); 222 dst.put(c); 223 } else { 224 return CoderResult.malformedForLength(inputSize); 225 } 226 } else if (b1 == ISO_SO) { 227 shiftOut = true; 228 } else if (b1 == ISO_SI) { // shift back in 229 shiftOut = false; 230 } 231 mark += inputSize; 232 if (src.remaining() < 1) 233 return CoderResult.UNDERFLOW; 234 b1 = src.get(); 235 inputSize = 1; 236 } 237 238 if (dst.remaining() < 1) 239 return CoderResult.OVERFLOW; 240 241 if (!shiftOut) { 242 dst.put((char)(b1 & 0xff)); //clear the upper byte 243 mark += inputSize; 244 } else { 245 if (src.remaining() < 1) 246 return CoderResult.UNDERFLOW; 247 b2 = src.get(); 248 inputSize++; 249 c = SODecode(b1, b2, currentSODesig); 250 if (c == REPLACE_CHAR) 251 return CoderResult.unmappableForLength(inputSize); 252 dst.put(c); 253 mark += inputSize; 254 } 255 } 256 return CoderResult.UNDERFLOW; 257 } finally { 258 src.position(mark); 259 } 260 } 261 262 private CoderResult decodeArrayLoop(ByteBuffer src, 263 CharBuffer dst) 264 { 265 int inputSize = 0; 266 byte b1 = 0, b2 = 0, b3 = 0, b4 = 0; 267 char c = REPLACE_CHAR; 268 269 byte[] sa = src.array(); 270 int sp = src.arrayOffset() + src.position(); 271 int sl = src.arrayOffset() + src.limit(); 272 assert (sp <= sl); 273 sp = (sp <= sl ? sp : sl); 274 275 char[] da = dst.array(); 276 int dp = dst.arrayOffset() + dst.position(); 277 int dl = dst.arrayOffset() + dst.limit(); 278 assert (dp <= dl); 279 dp = (dp <= dl ? dp : dl); 280 281 try { 282 while (sp < sl) { 283 b1 = sa[sp]; 284 inputSize = 1; 285 286 while (b1 == ISO_ESC || b1 == ISO_SO || b1 == ISO_SI) { 287 if (b1 == ISO_ESC) { // ESC 288 currentSODesig = SODesigGB; 289 290 if (sp + 2 > sl) 291 return CoderResult.UNDERFLOW; 292 293 b2 = sa[sp + 1]; 294 inputSize++; 295 296 if ((b2 & (byte)0x80) != 0) 297 return CoderResult.malformedForLength(inputSize); 298 if (b2 == (byte)0x24) { 299 if (sp + 3 > sl) 300 return CoderResult.UNDERFLOW; 301 302 b3 = sa[sp + 2]; 303 inputSize++; 304 305 if ((b3 & (byte)0x80) != 0) 306 return CoderResult.malformedForLength(inputSize); 307 if (b3 == 'A'){ // "$A" 308 /* <ESC>$A is not a legal designator sequence for 309 ISO2022_CN, it is listed as an escape sequence 310 for GB2312 in ISO2022-JP-2. Keep it here just for 311 the sake of "compatibility". 312 */ 313 currentSODesig = SODesigGB; 314 } else if (b3 == ')') { 315 if (sp + 4 > sl) 316 return CoderResult.UNDERFLOW; 317 b4 = sa[sp + 3]; 318 inputSize++; 319 320 if (b4 == 'A'){ // "$)A" 321 currentSODesig = SODesigGB; 322 } else if (b4 == 'G'){ // "$)G" 323 currentSODesig = SODesigCNS; 324 } else { 325 return CoderResult.malformedForLength(inputSize); 326 } 327 } else if (b3 == '*') { 328 if (sp + 4 > sl) 329 return CoderResult.UNDERFLOW; 330 b4 = sa[sp + 3]; 331 inputSize++; 332 if (b4 != 'H'){ // "$*H" 333 return CoderResult.malformedForLength(inputSize); 334 } 335 } else if (b3 == '+') { 336 if (sp + 4 > sl) 337 return CoderResult.UNDERFLOW; 338 b4 = sa[sp + 3]; 339 inputSize++; 340 if (b4 != 'I'){ // "$+I" 341 return CoderResult.malformedForLength(inputSize); 342 } 343 } else { 344 return CoderResult.malformedForLength(inputSize); 345 } 346 } else if (b2 == ISO_SS2_7 || b2 == ISO_SS3_7) { 347 if (sp + 4 > sl) { 348 return CoderResult.UNDERFLOW; 349 } 350 b3 = sa[sp + 2]; 351 b4 = sa[sp + 3]; 352 if (dl - dp < 1) { 353 return CoderResult.OVERFLOW; 354 } 355 inputSize += 2; 356 c = cnsDecode(b3, b4, b2); 357 if (c == REPLACE_CHAR) 358 return CoderResult.unmappableForLength(inputSize); 359 da[dp++] = c; 360 } else { 361 return CoderResult.malformedForLength(inputSize); 362 } 363 } else if (b1 == ISO_SO) { 364 shiftOut = true; 365 } else if (b1 == ISO_SI) { // shift back in 366 shiftOut = false; 367 } 368 sp += inputSize; 369 if (sp + 1 > sl) 370 return CoderResult.UNDERFLOW; 371 b1 = sa[sp]; 372 inputSize = 1; 373 } 374 375 if (dl - dp < 1) { 376 return CoderResult.OVERFLOW; 377 } 378 379 if (!shiftOut) { 380 da[dp++] = (char)(b1 & 0xff); //clear the upper byte 381 } else { 382 if (sp + 2 > sl) 383 return CoderResult.UNDERFLOW; 384 b2 = sa[sp + 1]; 385 inputSize++; 386 c = SODecode(b1, b2, currentSODesig); 387 if (c == REPLACE_CHAR) 388 return CoderResult.unmappableForLength(inputSize); 389 da[dp++] = c; 390 } 391 sp += inputSize; 392 } 393 return CoderResult.UNDERFLOW; 394 } finally { 395 src.position(sp - src.arrayOffset()); 396 dst.position(dp - dst.arrayOffset()); 397 } 398 } 399 400 protected CoderResult decodeLoop(ByteBuffer src, 401 CharBuffer dst) 402 { 403 if (src.hasArray() && dst.hasArray()) 404 return decodeArrayLoop(src, dst); 405 else 406 return decodeBufferLoop(src, dst); 407 } 408 } 409 }