1 /* 2 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package sun.nio.cs.ext; 27 28 import java.nio.ByteBuffer; 29 import java.nio.CharBuffer; 30 import java.nio.charset.Charset; 31 import java.nio.charset.CharsetDecoder; 32 import java.nio.charset.CharsetEncoder; 33 import java.nio.charset.CoderResult; 34 import java.util.Arrays; 35 import sun.nio.cs.Surrogate; 36 import sun.nio.cs.ArrayDecoder; 37 import sun.nio.cs.ArrayEncoder; 38 import static sun.nio.cs.CharsetMapping.*; 39 40 /* 41 * Four types of "DoubleByte" charsets are implemented in this class 42 * (1)DoubleByte 43 * The "mostly widely used" multibyte charset, a combination of 44 * a singlebyte character set (usually the ASCII charset) and a 45 * doublebyte character set. The codepoint values of singlebyte 46 * and doublebyte don't overlap. Microsoft's multibyte charsets 47 * and IBM's "DBCS_ASCII" charsets, such as IBM1381, 942, 943, 48 * 948, 949 and 950 are such charsets. 49 * 50 * (2)DoubleByte_EBCDIC 51 * IBM EBCDIC Mix multibyte charset. Use SO and SI to shift (switch) 52 * in and out between the singlebyte character set and doublebyte 53 * character set. 54 * 55 * (3)DoubleByte_SIMPLE_EUC 56 * It's a "simple" form of EUC encoding scheme, only have the 57 * singlebyte character set G0 and one doublebyte character set 58 * G1 are defined, G2 (with SS2) and G3 (with SS3) are not used. 59 * So it is actually the same as the "typical" type (1) mentioned 60 * above, except it return "malformed" for the SS2 and SS3 when 61 * decoding. 62 * 63 * (4)DoubleByte ONLY 64 * A "pure" doublebyte only character set. From implementation 65 * point of view, this is the type (1) with "decodeSingle" always 66 * returns unmappable. 67 * 68 * For simplicity, all implementations share the same decoding and 69 * encoding data structure. 70 * 71 * Decoding: 72 * 73 * char[][] b2c; 74 * char[] b2cSB; 75 * int b2Min, b2Max 76 * 77 * public char decodeSingle(int b) { 78 * return b2cSB.[b]; 79 * } 80 * 81 * public char decodeDouble(int b1, int b2) { 82 * if (b2 < b2Min || b2 > b2Max) 83 * return UNMAPPABLE_DECODING; 84 * return b2c[b1][b2 - b2Min]; 85 * } 86 * 87 * (1)b2Min, b2Max are the corresponding min and max value of the 88 * low-half of the double-byte. 89 * (2)The high 8-bit/b1 of the double-byte are used to indexed into 90 * b2c array. 91 * 92 * Encoding: 93 * 94 * char[] c2b; 95 * char[] c2bIndex; 96 * 97 * public int encodeChar(char ch) { 98 * return c2b[c2bIndex[ch >> 8] + (ch & 0xff)]; 99 * } 100 * 101 */ 102 103 public class DoubleByte { 104 105 public final static char[] B2C_UNMAPPABLE; 106 static { 107 B2C_UNMAPPABLE = new char[0x100]; 108 Arrays.fill(B2C_UNMAPPABLE, UNMAPPABLE_DECODING); 109 } 110 111 public static class Decoder extends CharsetDecoder 112 implements DelegatableDecoder, ArrayDecoder 113 { 114 115 final char[][] b2c; 116 final char[] b2cSB; 117 final int b2Min; 118 final int b2Max; 119 120 // for SimpleEUC override 121 protected CoderResult crMalformedOrUnderFlow(int b) { 122 return CoderResult.UNDERFLOW; 123 } 124 125 protected CoderResult crMalformedOrUnmappable(int b) { 126 return CoderResult.unmappableForLength(2); 127 } 128 129 Decoder(Charset cs, float avgcpb, float maxcpb, 130 char[][] b2c, char[] b2cSB, 131 int b2Min, int b2Max) { 132 super(cs, avgcpb, maxcpb); 133 this.b2c = b2c; 134 this.b2cSB = b2cSB; 135 this.b2Min = b2Min; 136 this.b2Max = b2Max; 137 } 138 139 Decoder(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max) { 140 this(cs, 0.5f, 1.0f, b2c, b2cSB, b2Min, b2Max); 141 } 142 143 protected CoderResult decodeArrayLoop(ByteBuffer src, CharBuffer dst) { 144 byte[] sa = src.array(); 145 int sp = src.arrayOffset() + src.position(); 146 int sl = src.arrayOffset() + src.limit(); 147 148 char[] da = dst.array(); 149 int dp = dst.arrayOffset() + dst.position(); 150 int dl = dst.arrayOffset() + dst.limit(); 151 152 try { 153 while (sp < sl && dp < dl) { 154 // inline the decodeSingle/Double() for better performance 155 int inSize = 1; 156 int b1 = sa[sp] & 0xff; 157 char c = b2cSB[b1]; 158 if (c == UNMAPPABLE_DECODING) { 159 if (sl - sp < 2) 160 return crMalformedOrUnderFlow(b1); 161 int b2 = sa[sp + 1] & 0xff; 162 if (b2 < b2Min || b2 > b2Max || 163 (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) { 164 return crMalformedOrUnmappable(b1); 165 } 166 inSize++; 167 } 168 da[dp++] = c; 169 sp += inSize; 170 } 171 return (sp >= sl) ? CoderResult.UNDERFLOW 172 : CoderResult.OVERFLOW; 173 } finally { 174 src.position(sp - src.arrayOffset()); 175 dst.position(dp - dst.arrayOffset()); 176 } 177 } 178 179 protected CoderResult decodeBufferLoop(ByteBuffer src, CharBuffer dst) { 180 int mark = src.position(); 181 try { 182 183 while (src.hasRemaining() && dst.hasRemaining()) { 184 int b1 = src.get() & 0xff; 185 char c = b2cSB[b1]; 186 int inSize = 1; 187 if (c == UNMAPPABLE_DECODING) { 188 if (src.remaining() < 1) 189 return crMalformedOrUnderFlow(b1); 190 int b2 = src.get() & 0xff; 191 if (b2 < b2Min || b2 > b2Max || 192 (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) 193 return crMalformedOrUnmappable(b1); 194 inSize++; 195 } 196 dst.put(c); 197 mark += inSize; 198 } 199 return src.hasRemaining()? CoderResult.OVERFLOW 200 : CoderResult.UNDERFLOW; 201 } finally { 202 src.position(mark); 203 } 204 } 205 206 // Make some protected methods public for use by JISAutoDetect 207 public CoderResult decodeLoop(ByteBuffer src, CharBuffer dst) { 208 if (src.hasArray() && dst.hasArray()) 209 return decodeArrayLoop(src, dst); 210 else 211 return decodeBufferLoop(src, dst); 212 } 213 214 public int decode(byte[] src, int sp, int len, char[] dst) { 215 int dp = 0; 216 int sl = sp + len; 217 char repl = replacement().charAt(0); 218 while (sp < sl) { 219 int b1 = src[sp++] & 0xff; 220 char c = b2cSB[b1]; 221 if (c == UNMAPPABLE_DECODING) { 222 if (sp < sl) { 223 int b2 = src[sp++] & 0xff; 224 if (b2 >= b2Min && b2 <= b2Max) { 225 c = b2c[b1][b2 - b2Min]; 226 } 227 } 228 if (c == UNMAPPABLE_DECODING) { 229 c = repl; 230 } 231 } 232 dst[dp++] = c; 233 } 234 return dp; 235 } 236 237 public void implReset() { 238 super.implReset(); 239 } 240 241 public CoderResult implFlush(CharBuffer out) { 242 return super.implFlush(out); 243 } 244 245 // decode loops are not using decodeSingle/Double() for performance 246 // reason. 247 public char decodeSingle(int b) { 248 return b2cSB[b]; 249 } 250 251 public char decodeDouble(int b1, int b2) { 252 if (b2 < b2Min || b2 > b2Max) 253 return UNMAPPABLE_DECODING; 254 return b2c[b1][b2 - b2Min]; 255 } 256 257 } 258 259 // IBM_EBCDIC_DBCS 260 public static class Decoder_EBCDIC extends Decoder { 261 private static final int SBCS = 0; 262 private static final int DBCS = 1; 263 private static final int SO = 0x0e; 264 private static final int SI = 0x0f; 265 private int currentState; 266 267 Decoder_EBCDIC(Charset cs, 268 char[][] b2c, char[] b2cSB, int b2Min, int b2Max) { 269 super(cs, b2c, b2cSB, b2Min, b2Max); 270 } 271 272 public void implReset() { 273 currentState = SBCS; 274 } 275 276 // Check validity of dbcs ebcdic byte pair values 277 // 278 // First byte : 0x41 -- 0xFE 279 // Second byte: 0x41 -- 0xFE 280 // Doublebyte blank: 0x4040 281 // 282 // The validation implementation in "old" DBCS_IBM_EBCDIC and sun.io 283 // as 284 // if ((b1 != 0x40 || b2 != 0x40) && 285 // (b2 < 0x41 || b2 > 0xfe)) {...} 286 // is not correct/complete (range check for b1) 287 // 288 private static boolean isDoubleByte(int b1, int b2) { 289 return (0x41 <= b1 && b1 <= 0xfe && 0x41 <= b2 && b2 <= 0xfe) 290 || (b1 == 0x40 && b2 == 0x40); // DBCS-HOST SPACE 291 } 292 293 protected CoderResult decodeArrayLoop(ByteBuffer src, CharBuffer dst) { 294 byte[] sa = src.array(); 295 int sp = src.arrayOffset() + src.position(); 296 int sl = src.arrayOffset() + src.limit(); 297 char[] da = dst.array(); 298 int dp = dst.arrayOffset() + dst.position(); 299 int dl = dst.arrayOffset() + dst.limit(); 300 301 try { 302 // don't check dp/dl together here, it's possible to 303 // decdoe a SO/SI without space in output buffer. 304 while (sp < sl) { 305 int b1 = sa[sp] & 0xff; 306 int inSize = 1; 307 if (b1 == SO) { // Shift out 308 if (currentState != SBCS) 309 return CoderResult.malformedForLength(1); 310 else 311 currentState = DBCS; 312 } else if (b1 == SI) { 313 if (currentState != DBCS) 314 return CoderResult.malformedForLength(1); 315 else 316 currentState = SBCS; 317 } else { 318 char c = UNMAPPABLE_DECODING; 319 if (currentState == SBCS) { 320 c = b2cSB[b1]; 321 if (c == UNMAPPABLE_DECODING) 322 return CoderResult.unmappableForLength(1); 323 } else { 324 if (sl - sp < 2) 325 return CoderResult.UNDERFLOW; 326 int b2 = sa[sp + 1] & 0xff; 327 if (b2 < b2Min || b2 > b2Max || 328 (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) { 329 if (!isDoubleByte(b1, b2)) 330 return CoderResult.malformedForLength(2); 331 return CoderResult.unmappableForLength(2); 332 } 333 inSize++; 334 } 335 if (dl - dp < 1) 336 return CoderResult.OVERFLOW; 337 338 da[dp++] = c; 339 } 340 sp += inSize; 341 } 342 return CoderResult.UNDERFLOW; 343 } finally { 344 src.position(sp - src.arrayOffset()); 345 dst.position(dp - dst.arrayOffset()); 346 } 347 } 348 349 protected CoderResult decodeBufferLoop(ByteBuffer src, CharBuffer dst) { 350 int mark = src.position(); 351 try { 352 while (src.hasRemaining()) { 353 int b1 = src.get() & 0xff; 354 int inSize = 1; 355 if (b1 == SO) { // Shift out 356 if (currentState != SBCS) 357 return CoderResult.malformedForLength(1); 358 else 359 currentState = DBCS; 360 } else if (b1 == SI) { 361 if (currentState != DBCS) 362 return CoderResult.malformedForLength(1); 363 else 364 currentState = SBCS; 365 } else { 366 char c = UNMAPPABLE_DECODING; 367 if (currentState == SBCS) { 368 c = b2cSB[b1]; 369 if (c == UNMAPPABLE_DECODING) 370 return CoderResult.unmappableForLength(1); 371 } else { 372 if (src.remaining() < 1) 373 return CoderResult.UNDERFLOW; 374 int b2 = src.get()&0xff; 375 if (b2 < b2Min || b2 > b2Max || 376 (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) { 377 if (!isDoubleByte(b1, b2)) 378 return CoderResult.malformedForLength(2); 379 return CoderResult.unmappableForLength(2); 380 } 381 inSize++; 382 } 383 384 if (dst.remaining() < 1) 385 return CoderResult.OVERFLOW; 386 387 dst.put(c); 388 } 389 mark += inSize; 390 } 391 return CoderResult.UNDERFLOW; 392 } finally { 393 src.position(mark); 394 } 395 } 396 397 public int decode(byte[] src, int sp, int len, char[] dst) { 398 int dp = 0; 399 int sl = sp + len; 400 currentState = SBCS; 401 char repl = replacement().charAt(0); 402 while (sp < sl) { 403 int b1 = src[sp++] & 0xff; 404 if (b1 == SO) { // Shift out 405 if (currentState != SBCS) 406 dst[dp++] = repl; 407 else 408 currentState = DBCS; 409 } else if (b1 == SI) { 410 if (currentState != DBCS) 411 dst[dp++] = repl; 412 else 413 currentState = SBCS; 414 } else { 415 char c = UNMAPPABLE_DECODING; 416 if (currentState == SBCS) { 417 c = b2cSB[b1]; 418 if (c == UNMAPPABLE_DECODING) 419 c = repl; 420 } else { 421 if (sl == sp) { 422 c = repl; 423 } else { 424 int b2 = src[sp++] & 0xff; 425 if (b2 < b2Min || b2 > b2Max || 426 (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) { 427 c = repl; 428 } 429 } 430 } 431 dst[dp++] = c; 432 } 433 } 434 return dp; 435 } 436 } 437 438 // EBCDIC_DBCS_ONLY 439 public static class Decoder_EBCDIC_DBCSONLY extends Decoder { 440 static final char[] b2cSB; 441 static { 442 b2cSB = new char[0x100]; 443 Arrays.fill(b2cSB, UNMAPPABLE_DECODING); 444 } 445 Decoder_EBCDIC_DBCSONLY(Charset cs, char[][] b2c, int b2Min, int b2Max) { 446 super(cs, 0.5f, 1.0f, b2c, b2cSB, b2Min, b2Max); 447 } 448 } 449 450 // EUC_SIMPLE 451 // The only thing we need to "override" is to check SS2/SS3 and 452 // return "malformed" if found 453 public static class Decoder_EUC_SIM extends Decoder { 454 private final int SS2 = 0x8E; 455 private final int SS3 = 0x8F; 456 457 Decoder_EUC_SIM(Charset cs, 458 char[][] b2c, char[] b2cSB, int b2Min, int b2Max) { 459 super(cs, b2c, b2cSB, b2Min, b2Max); 460 } 461 462 // No support provided for G2/G3 for SimpleEUC 463 protected CoderResult crMalformedOrUnderFlow(int b) { 464 if (b == SS2 || b == SS3 ) 465 return CoderResult.malformedForLength(1); 466 return CoderResult.UNDERFLOW; 467 } 468 469 protected CoderResult crMalformedOrUnmappable(int b) { 470 if (b == SS2 || b == SS3 ) 471 return CoderResult.malformedForLength(1); 472 return CoderResult.unmappableForLength(2); 473 } 474 475 public int decode(byte[] src, int sp, int len, char[] dst) { 476 int dp = 0; 477 int sl = sp + len; 478 char repl = replacement().charAt(0); 479 while (sp < sl) { 480 int b1 = src[sp++] & 0xff; 481 char c = b2cSB[b1]; 482 if (c == UNMAPPABLE_DECODING) { 483 if (sp < sl) { 484 int b2 = src[sp++] & 0xff; 485 if (b2 < b2Min || b2 > b2Max || 486 (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) { 487 if (b1 == SS2 || b1 == SS3) { 488 sp--; 489 } 490 c = repl; 491 } 492 } else { 493 c = repl; 494 } 495 } 496 dst[dp++] = c; 497 } 498 return dp; 499 } 500 } 501 502 public static class Encoder extends CharsetEncoder 503 implements ArrayEncoder 504 { 505 final int MAX_SINGLEBYTE = 0xff; 506 private final char[] c2b; 507 private final char[] c2bIndex; 508 Surrogate.Parser sgp; 509 510 protected Encoder(Charset cs, char[] c2b, char[] c2bIndex) { 511 super(cs, 2.0f, 2.0f); 512 this.c2b = c2b; 513 this.c2bIndex = c2bIndex; 514 } 515 516 Encoder(Charset cs, float avg, float max, byte[] repl, char[] c2b, char[] c2bIndex) { 517 super(cs, avg, max, repl); 518 this.c2b = c2b; 519 this.c2bIndex = c2bIndex; 520 } 521 522 public boolean canEncode(char c) { 523 return encodeChar(c) != UNMAPPABLE_ENCODING; 524 } 525 526 Surrogate.Parser sgp() { 527 if (sgp == null) 528 sgp = new Surrogate.Parser(); 529 return sgp; 530 } 531 532 protected CoderResult encodeArrayLoop(CharBuffer src, ByteBuffer dst) { 533 char[] sa = src.array(); 534 int sp = src.arrayOffset() + src.position(); 535 int sl = src.arrayOffset() + src.limit(); 536 537 byte[] da = dst.array(); 538 int dp = dst.arrayOffset() + dst.position(); 539 int dl = dst.arrayOffset() + dst.limit(); 540 541 try { 542 while (sp < sl) { 543 char c = sa[sp]; 544 int bb = encodeChar(c); 545 if (bb == UNMAPPABLE_ENCODING) { 546 if (Character.isSurrogate(c)) { 547 if (sgp().parse(c, sa, sp, sl) < 0) 548 return sgp.error(); 549 return sgp.unmappableResult(); 550 } 551 return CoderResult.unmappableForLength(1); 552 } 553 554 if (bb > MAX_SINGLEBYTE) { // DoubleByte 555 if (dl - dp < 2) 556 return CoderResult.OVERFLOW; 557 da[dp++] = (byte)(bb >> 8); 558 da[dp++] = (byte)bb; 559 } else { // SingleByte 560 if (dl - dp < 1) 561 return CoderResult.OVERFLOW; 562 da[dp++] = (byte)bb; 563 } 564 565 sp++; 566 } 567 return CoderResult.UNDERFLOW; 568 } finally { 569 src.position(sp - src.arrayOffset()); 570 dst.position(dp - dst.arrayOffset()); 571 } 572 } 573 574 protected CoderResult encodeBufferLoop(CharBuffer src, ByteBuffer dst) { 575 int mark = src.position(); 576 try { 577 while (src.hasRemaining()) { 578 char c = src.get(); 579 int bb = encodeChar(c); 580 if (bb == UNMAPPABLE_ENCODING) { 581 if (Character.isSurrogate(c)) { 582 if (sgp().parse(c, src) < 0) 583 return sgp.error(); 584 return sgp.unmappableResult(); 585 } 586 return CoderResult.unmappableForLength(1); 587 } 588 if (bb > MAX_SINGLEBYTE) { // DoubleByte 589 if (dst.remaining() < 2) 590 return CoderResult.OVERFLOW; 591 dst.put((byte)(bb >> 8)); 592 dst.put((byte)(bb)); 593 } else { 594 if (dst.remaining() < 1) 595 return CoderResult.OVERFLOW; 596 dst.put((byte)bb); 597 } 598 mark++; 599 } 600 return CoderResult.UNDERFLOW; 601 } finally { 602 src.position(mark); 603 } 604 } 605 606 protected CoderResult encodeLoop(CharBuffer src, ByteBuffer dst) { 607 if (src.hasArray() && dst.hasArray()) 608 return encodeArrayLoop(src, dst); 609 else 610 return encodeBufferLoop(src, dst); 611 } 612 613 public int encode(char[] src, int sp, int len, byte[] dst) { 614 int dp = 0; 615 int sl = sp + len; 616 int dl = dst.length; 617 while (sp < sl) { 618 char c = src[sp++]; 619 int bb = encodeChar(c); 620 if (bb == UNMAPPABLE_ENCODING) { 621 if (Character.isHighSurrogate(c) && sp < sl && 622 Character.isLowSurrogate(src[sp])) { 623 sp++; 624 } 625 byte[] repl = replacement(); 626 dst[dp++] = repl[0]; 627 if (repl.length > 1) 628 dst[dp++] = repl[1]; 629 continue; 630 } //else 631 if (bb > MAX_SINGLEBYTE) { // DoubleByte 632 dst[dp++] = (byte)(bb >> 8); 633 dst[dp++] = (byte)bb; 634 } else { // SingleByte 635 dst[dp++] = (byte)bb; 636 } 637 638 } 639 return dp; 640 } 641 642 public int encodeChar(char ch) { 643 return c2b[c2bIndex[ch >> 8] + (ch & 0xff)]; 644 } 645 646 // init the c2b and c2bIndex tables from b2c. 647 static void initC2B(String[] b2c, String b2cSB, String b2cNR, String c2bNR, 648 int b2Min, int b2Max, 649 char[] c2b, char[] c2bIndex) 650 { 651 Arrays.fill(c2b, (char)UNMAPPABLE_ENCODING); 652 int off = 0x100; 653 654 char[][] b2c_ca = new char[b2c.length][]; 655 char[] b2cSB_ca = null; 656 if (b2cSB != null) 657 b2cSB_ca = b2cSB.toCharArray(); 658 659 for (int i = 0; i < b2c.length; i++) { 660 if (b2c[i] == null) 661 continue; 662 b2c_ca[i] = b2c[i].toCharArray(); 663 } 664 665 if (b2cNR != null) { 666 int j = 0; 667 while (j < b2cNR.length()) { 668 char b = b2cNR.charAt(j++); 669 char c = b2cNR.charAt(j++); 670 if (b < 0x100 && b2cSB_ca != null) { 671 if (b2cSB_ca[b] == c) 672 b2cSB_ca[b] = UNMAPPABLE_DECODING; 673 } else { 674 if (b2c_ca[b >> 8][(b & 0xff) - b2Min] == c) 675 b2c_ca[b >> 8][(b & 0xff) - b2Min] = UNMAPPABLE_DECODING; 676 } 677 } 678 } 679 680 if (b2cSB_ca != null) { // SingleByte 681 for (int b = 0; b < b2cSB_ca.length; b++) { 682 char c = b2cSB_ca[b]; 683 if (c == UNMAPPABLE_DECODING) 684 continue; 685 int index = c2bIndex[c >> 8]; 686 if (index == 0) { 687 index = off; 688 off += 0x100; 689 c2bIndex[c >> 8] = (char)index; 690 } 691 c2b[index + (c & 0xff)] = (char)b; 692 } 693 } 694 695 for (int b1 = 0; b1 < b2c.length; b1++) { // DoubleByte 696 char[] db = b2c_ca[b1]; 697 if (db == null) 698 continue; 699 for (int b2 = b2Min; b2 <= b2Max; b2++) { 700 char c = db[b2 - b2Min]; 701 if (c == UNMAPPABLE_DECODING) 702 continue; 703 int index = c2bIndex[c >> 8]; 704 if (index == 0) { 705 index = off; 706 off += 0x100; 707 c2bIndex[c >> 8] = (char)index; 708 } 709 c2b[index + (c & 0xff)] = (char)((b1 << 8) | b2); 710 } 711 } 712 713 if (c2bNR != null) { 714 // add c->b only nr entries 715 for (int i = 0; i < c2bNR.length(); i += 2) { 716 char b = c2bNR.charAt(i); 717 char c = c2bNR.charAt(i + 1); 718 int index = (c >> 8); 719 if (c2bIndex[index] == 0) { 720 c2bIndex[index] = (char)off; 721 off += 0x100; 722 } 723 index = c2bIndex[index] + (c & 0xff); 724 c2b[index] = b; 725 } 726 } 727 } 728 } 729 730 public static class Encoder_EBCDIC_DBCSONLY extends Encoder { 731 Encoder_EBCDIC_DBCSONLY(Charset cs, byte[] repl, 732 char[] c2b, char[] c2bIndex) { 733 super(cs, 2.0f, 2.0f, repl, c2b, c2bIndex); 734 } 735 736 public int encodeChar(char ch) { 737 int bb = super.encodeChar(ch); 738 if (bb <= MAX_SINGLEBYTE) 739 return UNMAPPABLE_ENCODING; 740 return bb; 741 } 742 } 743 744 public static class Encoder_EBCDIC extends Encoder { 745 static final int SBCS = 0; 746 static final int DBCS = 1; 747 static final byte SO = 0x0e; 748 static final byte SI = 0x0f; 749 750 protected int currentState = SBCS; 751 752 Encoder_EBCDIC(Charset cs, char[] c2b, char[] c2bIndex) { 753 super(cs, 4.0f, 5.0f, new byte[] {(byte)0x6f}, c2b, c2bIndex); 754 } 755 756 protected void implReset() { 757 currentState = SBCS; 758 } 759 760 protected CoderResult implFlush(ByteBuffer out) { 761 if (currentState == DBCS) { 762 if (out.remaining() < 1) 763 return CoderResult.OVERFLOW; 764 out.put(SI); 765 } 766 implReset(); 767 return CoderResult.UNDERFLOW; 768 } 769 770 protected CoderResult encodeArrayLoop(CharBuffer src, ByteBuffer dst) { 771 char[] sa = src.array(); 772 int sp = src.arrayOffset() + src.position(); 773 int sl = src.arrayOffset() + src.limit(); 774 byte[] da = dst.array(); 775 int dp = dst.arrayOffset() + dst.position(); 776 int dl = dst.arrayOffset() + dst.limit(); 777 778 try { 779 while (sp < sl) { 780 char c = sa[sp]; 781 int bb = encodeChar(c); 782 if (bb == UNMAPPABLE_ENCODING) { 783 if (Character.isSurrogate(c)) { 784 if (sgp().parse(c, sa, sp, sl) < 0) 785 return sgp.error(); 786 return sgp.unmappableResult(); 787 } 788 return CoderResult.unmappableForLength(1); 789 } 790 if (bb > MAX_SINGLEBYTE) { // DoubleByte 791 if (currentState == SBCS) { 792 if (dl - dp < 1) 793 return CoderResult.OVERFLOW; 794 currentState = DBCS; 795 da[dp++] = SO; 796 } 797 if (dl - dp < 2) 798 return CoderResult.OVERFLOW; 799 da[dp++] = (byte)(bb >> 8); 800 da[dp++] = (byte)bb; 801 } else { // SingleByte 802 if (currentState == DBCS) { 803 if (dl - dp < 1) 804 return CoderResult.OVERFLOW; 805 currentState = SBCS; 806 da[dp++] = SI; 807 } 808 if (dl - dp < 1) 809 return CoderResult.OVERFLOW; 810 da[dp++] = (byte)bb; 811 812 } 813 sp++; 814 } 815 return CoderResult.UNDERFLOW; 816 } finally { 817 src.position(sp - src.arrayOffset()); 818 dst.position(dp - dst.arrayOffset()); 819 } 820 } 821 822 protected CoderResult encodeBufferLoop(CharBuffer src, ByteBuffer dst) { 823 int mark = src.position(); 824 try { 825 while (src.hasRemaining()) { 826 char c = src.get(); 827 int bb = encodeChar(c); 828 if (bb == UNMAPPABLE_ENCODING) { 829 if (Character.isSurrogate(c)) { 830 if (sgp().parse(c, src) < 0) 831 return sgp.error(); 832 return sgp.unmappableResult(); 833 } 834 return CoderResult.unmappableForLength(1); 835 } 836 if (bb > MAX_SINGLEBYTE) { // DoubleByte 837 if (currentState == SBCS) { 838 if (dst.remaining() < 1) 839 return CoderResult.OVERFLOW; 840 currentState = DBCS; 841 dst.put(SO); 842 } 843 if (dst.remaining() < 2) 844 return CoderResult.OVERFLOW; 845 dst.put((byte)(bb >> 8)); 846 dst.put((byte)(bb)); 847 } else { // Single-byte 848 if (currentState == DBCS) { 849 if (dst.remaining() < 1) 850 return CoderResult.OVERFLOW; 851 currentState = SBCS; 852 dst.put(SI); 853 } 854 if (dst.remaining() < 1) 855 return CoderResult.OVERFLOW; 856 dst.put((byte)bb); 857 } 858 mark++; 859 } 860 return CoderResult.UNDERFLOW; 861 } finally { 862 src.position(mark); 863 } 864 } 865 866 public int encode(char[] src, int sp, int len, byte[] dst) { 867 int dp = 0; 868 int sl = sp + len; 869 while (sp < sl) { 870 char c = src[sp++]; 871 int bb = encodeChar(c); 872 873 if (bb == UNMAPPABLE_ENCODING) { 874 if (Character.isHighSurrogate(c) && sp < sl && 875 Character.isLowSurrogate(src[sp])) { 876 sp++; 877 } 878 byte[] repl = replacement(); 879 dst[dp++] = repl[0]; 880 if (repl.length > 1) 881 dst[dp++] = repl[1]; 882 continue; 883 } //else 884 if (bb > MAX_SINGLEBYTE) { // DoubleByte 885 if (currentState == SBCS) { 886 currentState = DBCS; 887 dst[dp++] = SO; 888 } 889 dst[dp++] = (byte)(bb >> 8); 890 dst[dp++] = (byte)bb; 891 } else { // SingleByte 892 if (currentState == DBCS) { 893 currentState = SBCS; 894 dst[dp++] = SI; 895 } 896 dst[dp++] = (byte)bb; 897 } 898 } 899 900 if (currentState == DBCS) { 901 currentState = SBCS; 902 dst[dp++] = SI; 903 } 904 return dp; 905 } 906 } 907 908 // EUC_SIMPLE 909 public static class Encoder_EUC_SIM extends Encoder { 910 Encoder_EUC_SIM(Charset cs, char[] c2b, char[] c2bIndex) { 911 super(cs, c2b, c2bIndex); 912 } 913 } 914 }