1 /* 2 * Copyright (c) 2001, 2008, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package sun.awt.motif; 27 28 import java.nio.CharBuffer; 29 import java.nio.ByteBuffer; 30 import java.nio.charset.*; 31 import java.util.Arrays; 32 import static sun.awt.motif.DoubleByte.*; 33 34 public abstract class X11CNS11643 extends Charset { 35 private final int plane; 36 public X11CNS11643 (int plane, String name) { 37 super(name, null); 38 switch (plane) { 39 case 1: 40 this.plane = 0; // CS1 41 break; 42 case 2: 43 case 3: 44 this.plane = plane; 45 break; 46 default: 47 throw new IllegalArgumentException 48 ("Only planes 1, 2, and 3 supported"); 49 } 50 } 51 52 public CharsetEncoder newEncoder() { 53 return new Encoder(this, plane); 54 } 55 56 public CharsetDecoder newDecoder() { 57 return new Decoder(this, plane); 58 } 59 60 public boolean contains(Charset cs) { 61 return cs instanceof X11CNS11643; 62 } 63 64 /* This class copied from sun.nio */ 65 static class EUC_TW_Encoder extends CharsetEncoder { 66 67 static final int SS2 = 0x8E; 68 69 private byte[] bb = new byte[4]; 70 71 public EUC_TW_Encoder(Charset cs) { 72 super(cs, 4.0f, 4.0f); 73 } 74 75 public boolean canEncode(char c) { 76 return (c <= '\u007f' || toEUC(c, bb) != -1); 77 } 78 79 public boolean canEncode(CharSequence cs) { 80 int i = 0; 81 while (i < cs.length()) { 82 char c = cs.charAt(i++); 83 if (Character.isHighSurrogate(c)) { 84 if (i == cs.length()) 85 return false; 86 char low = cs.charAt(i++); 87 if (!Character.isLowSurrogate(low) || toEUC(c, low, bb) == -1) 88 return false; 89 } else if (!canEncode(c)) { 90 return false; 91 } 92 } 93 return true; 94 } 95 96 public int toEUC(char hi, char low, byte[] bb) { 97 return encode(hi, low, bb); 98 } 99 100 public int toEUC(char c, byte[] bb) { 101 return encode(c, bb); 102 } 103 104 private CoderResult encodeArrayLoop(CharBuffer src, 105 ByteBuffer dst) 106 { 107 char[] sa = src.array(); 108 int sp = src.arrayOffset() + src.position(); 109 int sl = src.arrayOffset() + src.limit(); 110 111 byte[] da = dst.array(); 112 int dp = dst.arrayOffset() + dst.position(); 113 int dl = dst.arrayOffset() + dst.limit(); 114 115 int inSize; 116 int outSize; 117 118 try { 119 while (sp < sl) { 120 char c = sa[sp]; 121 inSize = 1; 122 if (c < 0x80) { // ASCII 123 bb[0] = (byte)c; 124 outSize = 1; 125 } else { 126 outSize = toEUC(c, bb); 127 if (outSize == -1) { 128 // to check surrogates only after BMP failed 129 // has the benefit of improving the BMP encoding 130 // 10% faster, with the price of the slowdown of 131 // supplementary character encoding. given the use 132 // of supplementary characters is really rare, this 133 // is something worth doing. 134 if (Character.isHighSurrogate(c)) { 135 if ((sp + 1) == sl) 136 return CoderResult.UNDERFLOW; 137 if (!Character.isLowSurrogate(sa[sp + 1])) 138 return CoderResult.malformedForLength(1); 139 outSize = toEUC(c, sa[sp+1], bb); 140 inSize = 2; 141 } else if (Character.isLowSurrogate(c)) { 142 return CoderResult.malformedForLength(1); 143 } 144 } 145 } 146 if (outSize == -1) 147 return CoderResult.unmappableForLength(inSize); 148 if ( dl - dp < outSize) 149 return CoderResult.OVERFLOW; 150 for (int i = 0; i < outSize; i++) 151 da[dp++] = bb[i]; 152 sp += inSize; 153 } 154 return CoderResult.UNDERFLOW; 155 } finally { 156 src.position(sp - src.arrayOffset()); 157 dst.position(dp - dst.arrayOffset()); 158 } 159 } 160 161 private CoderResult encodeBufferLoop(CharBuffer src, 162 ByteBuffer dst) 163 { 164 int outSize; 165 int inSize; 166 int mark = src.position(); 167 168 try { 169 while (src.hasRemaining()) { 170 inSize = 1; 171 char c = src.get(); 172 if (c < 0x80) { // ASCII 173 outSize = 1; 174 bb[0] = (byte)c; 175 } else { 176 outSize = toEUC(c, bb); 177 if (outSize == -1) { 178 if (Character.isHighSurrogate(c)) { 179 if (!src.hasRemaining()) 180 return CoderResult.UNDERFLOW; 181 char c2 = src.get(); 182 if (!Character.isLowSurrogate(c2)) 183 return CoderResult.malformedForLength(1); 184 outSize = toEUC(c, c2, bb); 185 inSize = 2; 186 } else if (Character.isLowSurrogate(c)) { 187 return CoderResult.malformedForLength(1); 188 } 189 } 190 } 191 if (outSize == -1) 192 return CoderResult.unmappableForLength(inSize); 193 if (dst.remaining() < outSize) 194 return CoderResult.OVERFLOW; 195 for (int i = 0; i < outSize; i++) 196 dst.put(bb[i]); 197 mark += inSize; 198 } 199 return CoderResult.UNDERFLOW; 200 } finally { 201 src.position(mark); 202 } 203 } 204 205 protected CoderResult encodeLoop(CharBuffer src, ByteBuffer dst) 206 { 207 if (src.hasArray() && dst.hasArray()) 208 return encodeArrayLoop(src, dst); 209 else 210 return encodeBufferLoop(src, dst); 211 } 212 213 static int encode(char hi, char low, byte[] bb) { 214 int c = Character.toCodePoint(hi, low); 215 if ((c & 0xf0000) != 0x20000) 216 return -1; 217 c -= 0x20000; 218 int index = c2bSuppIndex[c >> 8]; 219 if (index == UNMAPPABLE_ENCODING) 220 return -1; 221 index = index + (c & 0xff); 222 int db = c2bSupp[index]; 223 if (db == UNMAPPABLE_ENCODING) 224 return -1; 225 int p = (c2bPlane[index] >> 4) & 0xf; 226 bb[0] = (byte)SS2; 227 bb[1] = (byte)(0xa0 | p); 228 bb[2] = (byte)(db >> 8); 229 bb[3] = (byte)db; 230 return 4; 231 } 232 233 static int encode(char c, byte[] bb) { 234 int index = c2bIndex[c >> 8]; 235 if (index == UNMAPPABLE_ENCODING) 236 return -1; 237 index = index + (c & 0xff); 238 int db = c2b[index]; 239 if (db == UNMAPPABLE_ENCODING) 240 return -1; 241 int p = c2bPlane[index] & 0xf; 242 if (p == 0) { 243 bb[0] = (byte)(db >> 8); 244 bb[1] = (byte)db; 245 return 2; 246 } else { 247 bb[0] = (byte)SS2; 248 bb[1] = (byte)(0xa0 | p); 249 bb[2] = (byte)(db >> 8); 250 bb[3] = (byte)db; 251 return 4; 252 } 253 } 254 255 static final char[] c2b; 256 static final char[] c2bIndex; 257 static final char[] c2bSupp; 258 static final char[] c2bSuppIndex; 259 static final byte[] c2bPlane; 260 static { 261 int b1Min = Decoder.b1Min; 262 int b1Max = Decoder.b1Max; 263 int b2Min = Decoder.b2Min; 264 int b2Max = Decoder.b2Max; 265 int dbSegSize = Decoder.dbSegSize; 266 String[] b2c = Decoder.b2c; 267 byte[] b2cIsSupp = Decoder.b2cIsSupp; 268 269 c2bIndex = EUC_TWMapping.c2bIndex; 270 c2bSuppIndex = EUC_TWMapping.c2bSuppIndex; 271 char[] c2b0 = new char[EUC_TWMapping.C2BSIZE]; 272 char[] c2bSupp0 = new char[EUC_TWMapping.C2BSUPPSIZE]; 273 byte[] c2bPlane0 = new byte[Math.max(EUC_TWMapping.C2BSIZE, 274 EUC_TWMapping.C2BSUPPSIZE)]; 275 276 Arrays.fill(c2b0, (char)UNMAPPABLE_ENCODING); 277 Arrays.fill(c2bSupp0, (char)UNMAPPABLE_ENCODING); 278 279 for (int p = 0; p < b2c.length; p++) { 280 String db = b2c[p]; 281 /* 282 adjust the "plane" from 0..7 to 0, 2, 3, 4, 5, 6, 7, 0xf, 283 which helps balance between footprint (to save the plane 284 info in 4 bits) and runtime performance (to require only 285 one operation "0xa0 | plane" to encode the plane byte) 286 */ 287 int plane = p; 288 if (plane == 7) 289 plane = 0xf; 290 else if (plane != 0) 291 plane = p + 1; 292 293 int off = 0; 294 for (int b1 = b1Min; b1 <= b1Max; b1++) { 295 for (int b2 = b2Min; b2 <= b2Max; b2++) { 296 char c = db.charAt(off); 297 if (c != UNMAPPABLE_DECODING) { 298 if ((b2cIsSupp[off] & (1 << p)) != 0) { 299 int index = c2bSuppIndex[c >> 8] + (c&0xff); 300 c2bSupp0[index] = (char)((b1 << 8) + b2); 301 c2bPlane0[index] |= (byte)(plane << 4); 302 } else { 303 int index = c2bIndex[c >> 8] + (c&0xff); 304 c2b0[index] = (char)((b1 << 8) + b2); 305 c2bPlane0[index] |= (byte)plane; 306 } 307 } 308 off++; 309 } 310 } 311 } 312 c2b = c2b0; 313 c2bSupp = c2bSupp0; 314 c2bPlane = c2bPlane0; 315 } 316 } 317 318 private class Encoder extends EUC_TW_Encoder { 319 320 private int plane; 321 public Encoder(Charset cs, int plane) { 322 super(cs); 323 this.plane = plane; 324 } 325 326 private byte[] bb = new byte[4]; 327 public boolean canEncode(char c) { 328 if (c <= 0x7F) { 329 return false; 330 } 331 int nb = toEUC(c, bb); 332 if (nb == -1) 333 return false; 334 int p = 0; 335 if (nb == 4) 336 p = (bb[1] & 0xff) - 0xa0; 337 return (p == plane); 338 } 339 340 public boolean isLegalReplacement(byte[] repl) { 341 return true; 342 } 343 344 protected CoderResult encodeLoop(CharBuffer src, ByteBuffer dst) { 345 char[] sa = src.array(); 346 int sp = src.arrayOffset() + src.position(); 347 int sl = src.arrayOffset() + src.limit(); 348 byte[] da = dst.array(); 349 int dp = dst.arrayOffset() + dst.position(); 350 int dl = dst.arrayOffset() + dst.limit(); 351 352 try { 353 while (sp < sl) { 354 char c = sa[sp]; 355 if ( c > '\u007f'&& c < '\uFFFE') { 356 int nb = toEUC(c, bb); 357 if (nb != -1) { 358 int p = 0; 359 if (nb == 4) 360 p = (bb[1] & 0xff) - 0xa0; 361 if (p == plane) { 362 if (dl - dp < 2) 363 return CoderResult.OVERFLOW; 364 if (nb == 2) { 365 da[dp++] = (byte)(bb[0] & 0x7f); 366 da[dp++] = (byte)(bb[1] & 0x7f); 367 } else { 368 da[dp++] = (byte)(bb[2] & 0x7f); 369 da[dp++] = (byte)(bb[3] & 0x7f); 370 } 371 sp++; 372 continue; 373 } 374 } 375 } 376 return CoderResult.unmappableForLength(1); 377 } 378 return CoderResult.UNDERFLOW; 379 } finally { 380 src.position(sp - src.arrayOffset()); 381 dst.position(dp - dst.arrayOffset()); 382 } 383 } 384 } 385 386 /* Copied from sun.nio */ 387 static class EUC_TW_Decoder extends CharsetDecoder { 388 389 static final int SS2 = 0x8E; 390 391 public EUC_TW_Decoder(Charset cs) { 392 super(cs, 2.0f, 2.0f); 393 } 394 395 char[] c1 = new char[1]; 396 char[] c2 = new char[2]; 397 public char[] toUnicode(int b1, int b2, int p) { 398 return decode(b1, b2, p, c1, c2); 399 } 400 401 static final String[] b2c = EUC_TWMapping.b2c; 402 static final int b1Min = EUC_TWMapping.b1Min; 403 static final int b1Max = EUC_TWMapping.b1Max; 404 static final int b2Min = EUC_TWMapping.b2Min; 405 static final int b2Max = EUC_TWMapping.b2Max; 406 static final int dbSegSize = b2Max - b2Min + 1; 407 static final byte[] b2cIsSupp; 408 409 // adjust from cns planeNo to the plane index of b2c 410 static final byte[] cnspToIndex = new byte[0x100]; 411 static { 412 Arrays.fill(cnspToIndex, (byte)-1); 413 cnspToIndex[0xa2] = 1; cnspToIndex[0xa3] = 2; cnspToIndex[0xa4] = 3; 414 cnspToIndex[0xa5] = 4; cnspToIndex[0xa6] = 5; cnspToIndex[0xa7] = 6; 415 cnspToIndex[0xaf] = 7; 416 } 417 418 //static final BitSet b2cIsSupp; 419 static { 420 String b2cIsSuppStr = EUC_TWMapping.b2cIsSuppStr; 421 // work on a local copy is much faster than operate 422 // directly on b2cIsSupp 423 byte[] flag = new byte[b2cIsSuppStr.length() << 1]; 424 int off = 0; 425 for (int i = 0; i < b2cIsSuppStr.length(); i++) { 426 char c = b2cIsSuppStr.charAt(i); 427 flag[off++] = (byte)(c >> 8); 428 flag[off++] = (byte)(c & 0xff); 429 } 430 b2cIsSupp = flag; 431 } 432 433 static boolean isLegalDB(int b) { 434 return b >= b1Min && b <= b1Max; 435 } 436 437 static char[] decode(int b1, int b2, int p, char[] c1, char[] c2) 438 { 439 if (b1 < b1Min || b1 > b1Max || b2 < b2Min || b2 > b2Max) 440 return null; 441 int index = (b1 - b1Min) * dbSegSize + b2 - b2Min; 442 char c = b2c[p].charAt(index); 443 if (c == UNMAPPABLE_DECODING) 444 return null; 445 if ((b2cIsSupp[index] & (1 << p)) == 0) { 446 c1[0] = c; 447 return c1; 448 } else { 449 c2[0] = Character.highSurrogate(0x20000 + c); 450 c2[1] = Character.lowSurrogate(0x20000 + c); 451 return c2; 452 } 453 } 454 455 private CoderResult decodeArrayLoop(ByteBuffer src, 456 CharBuffer dst) 457 { 458 byte[] sa = src.array(); 459 int sp = src.arrayOffset() + src.position(); 460 int sl = src.arrayOffset() + src.limit(); 461 462 char[] da = dst.array(); 463 int dp = dst.arrayOffset() + dst.position(); 464 int dl = dst.arrayOffset() + dst.limit(); 465 try { 466 while (sp < sl) { 467 int byte1 = sa[sp] & 0xff; 468 if (byte1 == SS2) { // Codeset 2 G2 469 if ( sl - sp < 4) 470 return CoderResult.UNDERFLOW; 471 int cnsPlane = cnspToIndex[sa[sp + 1] & 0xff]; 472 if (cnsPlane < 0) 473 return CoderResult.malformedForLength(2); 474 byte1 = sa[sp + 2] & 0xff; 475 int byte2 = sa[sp + 3] & 0xff; 476 char[] cc = toUnicode(byte1, byte2, cnsPlane); 477 if (cc == null) { 478 if (!isLegalDB(byte1) || !isLegalDB(byte2)) 479 return CoderResult.malformedForLength(4); 480 return CoderResult.unmappableForLength(4); 481 } 482 if (dl - dp < cc.length) 483 return CoderResult.OVERFLOW; 484 if (cc.length == 1) { 485 da[dp++] = cc[0]; 486 } else { 487 da[dp++] = cc[0]; 488 da[dp++] = cc[1]; 489 } 490 sp += 4; 491 } else if (byte1 < 0x80) { // ASCII G0 492 if (dl - dp < 1) 493 return CoderResult.OVERFLOW; 494 da[dp++] = (char) byte1; 495 sp++; 496 } else { // Codeset 1 G1 497 if ( sl - sp < 2) 498 return CoderResult.UNDERFLOW; 499 int byte2 = sa[sp + 1] & 0xff; 500 char[] cc = toUnicode(byte1, byte2, 0); 501 if (cc == null) { 502 if (!isLegalDB(byte1) || !isLegalDB(byte2)) 503 return CoderResult.malformedForLength(1); 504 return CoderResult.unmappableForLength(2); 505 } 506 if (dl - dp < 1) 507 return CoderResult.OVERFLOW; 508 da[dp++] = cc[0]; 509 sp += 2; 510 } 511 } 512 return CoderResult.UNDERFLOW; 513 } finally { 514 src.position(sp - src.arrayOffset()); 515 dst.position(dp - dst.arrayOffset()); 516 } 517 } 518 519 private CoderResult decodeBufferLoop(ByteBuffer src, 520 CharBuffer dst) 521 { 522 int mark = src.position(); 523 try { 524 while (src.hasRemaining()) { 525 int byte1 = src.get() & 0xff; 526 if (byte1 == SS2) { // Codeset 2 G2 527 if ( src.remaining() < 3) 528 return CoderResult.UNDERFLOW; 529 int cnsPlane = cnspToIndex[src.get() & 0xff]; 530 if (cnsPlane < 0) 531 return CoderResult.malformedForLength(2); 532 byte1 = src.get() & 0xff; 533 int byte2 = src.get() & 0xff; 534 char[] cc = toUnicode(byte1, byte2, cnsPlane); 535 if (cc == null) { 536 if (!isLegalDB(byte1) || !isLegalDB(byte2)) 537 return CoderResult.malformedForLength(4); 538 return CoderResult.unmappableForLength(4); 539 } 540 if (dst.remaining() < cc.length) 541 return CoderResult.OVERFLOW; 542 if (cc.length == 1) { 543 dst.put(cc[0]); 544 } else { 545 dst.put(cc[0]); 546 dst.put(cc[1]); 547 } 548 mark += 4; 549 } else if (byte1 < 0x80) { // ASCII G0 550 if (!dst.hasRemaining()) 551 return CoderResult.OVERFLOW; 552 dst.put((char) byte1); 553 mark++; 554 } else { // Codeset 1 G1 555 if (!src.hasRemaining()) 556 return CoderResult.UNDERFLOW; 557 int byte2 = src.get() & 0xff; 558 char[] cc = toUnicode(byte1, byte2, 0); 559 if (cc == null) { 560 if (!isLegalDB(byte1) || !isLegalDB(byte2)) 561 return CoderResult.malformedForLength(1); 562 return CoderResult.unmappableForLength(2); 563 } 564 if (!dst.hasRemaining()) 565 return CoderResult.OVERFLOW; 566 dst.put(cc[0]); 567 mark +=2; 568 } 569 } 570 return CoderResult.UNDERFLOW; 571 } finally { 572 src.position(mark); 573 } 574 } 575 576 protected CoderResult decodeLoop(ByteBuffer src, CharBuffer dst) 577 { 578 if (src.hasArray() && dst.hasArray()) 579 return decodeArrayLoop(src, dst); 580 else 581 return decodeBufferLoop(src, dst); 582 } 583 } 584 585 private class Decoder extends EUC_TW_Decoder { 586 int plane; 587 private String table; 588 protected Decoder(Charset cs, int plane) { 589 super(cs); 590 if (plane == 0) 591 this.plane = plane; 592 else if (plane == 2 || plane == 3) 593 this.plane = plane - 1; 594 else 595 throw new IllegalArgumentException 596 ("Only planes 1, 2, and 3 supported"); 597 } 598 599 //we only work on array backed buffer. 600 protected CoderResult decodeLoop(ByteBuffer src, CharBuffer dst) { 601 byte[] sa = src.array(); 602 int sp = src.arrayOffset() + src.position(); 603 int sl = src.arrayOffset() + src.limit(); 604 605 char[] da = dst.array(); 606 int dp = dst.arrayOffset() + dst.position(); 607 int dl = dst.arrayOffset() + dst.limit(); 608 609 try { 610 while (sp < sl) { 611 if ( sl - sp < 2) { 612 return CoderResult.UNDERFLOW; 613 } 614 int b1 = (sa[sp] & 0xff) | 0x80; 615 int b2 = (sa[sp + 1] & 0xff) | 0x80; 616 char[] cc = toUnicode(b1, b2, plane); 617 // plane3 has non-bmp characters(added), x11cnsp3 618 // however does not support them 619 if (cc == null || cc.length == 2) 620 return CoderResult.unmappableForLength(2); 621 if (dl - dp < 1) 622 return CoderResult.OVERFLOW; 623 da[dp++] = cc[0]; 624 sp +=2; 625 } 626 return CoderResult.UNDERFLOW; 627 } finally { 628 src.position(sp - src.arrayOffset()); 629 dst.position(dp - dst.arrayOffset()); 630 } 631 } 632 } 633 }