1 /* 2 * Copyright (c) 2009, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package sun.nio.cs; 27 28 import java.nio.ByteBuffer; 29 import java.nio.CharBuffer; 30 import java.nio.charset.Charset; 31 import java.nio.charset.CharsetDecoder; 32 import java.nio.charset.CharsetEncoder; 33 import java.nio.charset.CoderResult; 34 import java.util.Arrays; 35 import sun.nio.cs.Surrogate; 36 import sun.nio.cs.ArrayDecoder; 37 import sun.nio.cs.ArrayEncoder; 38 import static sun.nio.cs.CharsetMapping.*; 39 40 /* 41 * Four types of "DoubleByte" charsets are implemented in this class 42 * (1)DoubleByte 43 * The "mostly widely used" multibyte charset, a combination of 44 * a singlebyte character set (usually the ASCII charset) and a 45 * doublebyte character set. The codepoint values of singlebyte 46 * and doublebyte don't overlap. Microsoft's multibyte charsets 47 * and IBM's "DBCS_ASCII" charsets, such as IBM1381, 942, 943, 48 * 948, 949 and 950 are such charsets. 49 * 50 * (2)DoubleByte_EBCDIC 51 * IBM EBCDIC Mix multibyte charset. Use SO and SI to shift (switch) 52 * in and out between the singlebyte character set and doublebyte 53 * character set. 54 * 55 * (3)DoubleByte_SIMPLE_EUC 56 * It's a "simple" form of EUC encoding scheme, only have the 57 * singlebyte character set G0 and one doublebyte character set 58 * G1 are defined, G2 (with SS2) and G3 (with SS3) are not used. 59 * So it is actually the same as the "typical" type (1) mentioned 60 * above, except it return "malformed" for the SS2 and SS3 when 61 * decoding. 62 * 63 * (4)DoubleByte ONLY 64 * A "pure" doublebyte only character set. From implementation 65 * point of view, this is the type (1) with "decodeSingle" always 66 * returns unmappable. 67 * 68 * For simplicity, all implementations share the same decoding and 69 * encoding data structure. 70 * 71 * Decoding: 72 * 73 * char[][] b2c; 74 * char[] b2cSB; 75 * int b2Min, b2Max 76 * 77 * public char decodeSingle(int b) { 78 * return b2cSB.[b]; 79 * } 80 * 81 * public char decodeDouble(int b1, int b2) { 82 * if (b2 < b2Min || b2 > b2Max) 83 * return UNMAPPABLE_DECODING; 84 * return b2c[b1][b2 - b2Min]; 85 * } 86 * 87 * (1)b2Min, b2Max are the corresponding min and max value of the 88 * low-half of the double-byte. 89 * (2)The high 8-bit/b1 of the double-byte are used to indexed into 90 * b2c array. 91 * 92 * Encoding: 93 * 94 * char[] c2b; 95 * char[] c2bIndex; 96 * 97 * public int encodeChar(char ch) { 98 * return c2b[c2bIndex[ch >> 8] + (ch & 0xff)]; 99 * } 100 * 101 */ 102 103 public class DoubleByte { 104 105 public final static char[] B2C_UNMAPPABLE; 106 static { 107 B2C_UNMAPPABLE = new char[0x100]; 108 Arrays.fill(B2C_UNMAPPABLE, UNMAPPABLE_DECODING); 109 } 110 111 public static class Decoder extends CharsetDecoder 112 implements DelegatableDecoder, ArrayDecoder 113 { 114 final char[][] b2c; 115 final char[] b2cSB; 116 final int b2Min; 117 final int b2Max; 118 119 // for SimpleEUC override 120 protected CoderResult crMalformedOrUnderFlow(int b) { 121 return CoderResult.UNDERFLOW; 122 } 123 124 protected CoderResult crMalformedOrUnmappable(int b1, int b2) { 125 if (b2c[b1] == B2C_UNMAPPABLE || // isNotLeadingByte(b1) 126 b2c[b2] != B2C_UNMAPPABLE || // isLeadingByte(b2) 127 decodeSingle(b2) != UNMAPPABLE_DECODING) { // isSingle(b2) 128 return CoderResult.malformedForLength(1); 129 } 130 return CoderResult.unmappableForLength(2); 131 } 132 133 public Decoder(Charset cs, float avgcpb, float maxcpb, 134 char[][] b2c, char[] b2cSB, 135 int b2Min, int b2Max) { 136 super(cs, avgcpb, maxcpb); 137 this.b2c = b2c; 138 this.b2cSB = b2cSB; 139 this.b2Min = b2Min; 140 this.b2Max = b2Max; 141 } 142 143 public Decoder(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max) { 144 this(cs, 0.5f, 1.0f, b2c, b2cSB, b2Min, b2Max); 145 } 146 147 protected CoderResult decodeArrayLoop(ByteBuffer src, CharBuffer dst) { 148 byte[] sa = src.array(); 149 int sp = src.arrayOffset() + src.position(); 150 int sl = src.arrayOffset() + src.limit(); 151 152 char[] da = dst.array(); 153 int dp = dst.arrayOffset() + dst.position(); 154 int dl = dst.arrayOffset() + dst.limit(); 155 156 try { 157 while (sp < sl && dp < dl) { 158 // inline the decodeSingle/Double() for better performance 159 int inSize = 1; 160 int b1 = sa[sp] & 0xff; 161 char c = b2cSB[b1]; 162 if (c == UNMAPPABLE_DECODING) { 163 if (sl - sp < 2) 164 return crMalformedOrUnderFlow(b1); 165 int b2 = sa[sp + 1] & 0xff; 166 if (b2 < b2Min || b2 > b2Max || 167 (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) { 168 return crMalformedOrUnmappable(b1, b2); 169 } 170 inSize++; 171 } 172 da[dp++] = c; 173 sp += inSize; 174 } 175 return (sp >= sl) ? CoderResult.UNDERFLOW 176 : CoderResult.OVERFLOW; 177 } finally { 178 src.position(sp - src.arrayOffset()); 179 dst.position(dp - dst.arrayOffset()); 180 } 181 } 182 183 protected CoderResult decodeBufferLoop(ByteBuffer src, CharBuffer dst) { 184 int mark = src.position(); 185 try { 186 187 while (src.hasRemaining() && dst.hasRemaining()) { 188 int b1 = src.get() & 0xff; 189 char c = b2cSB[b1]; 190 int inSize = 1; 191 if (c == UNMAPPABLE_DECODING) { 192 if (src.remaining() < 1) 193 return crMalformedOrUnderFlow(b1); 194 int b2 = src.get() & 0xff; 195 if (b2 < b2Min || b2 > b2Max || 196 (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) 197 return crMalformedOrUnmappable(b1, b2); 198 inSize++; 199 } 200 dst.put(c); 201 mark += inSize; 202 } 203 return src.hasRemaining()? CoderResult.OVERFLOW 204 : CoderResult.UNDERFLOW; 205 } finally { 206 src.position(mark); 207 } 208 } 209 210 // Make some protected methods public for use by JISAutoDetect 211 public CoderResult decodeLoop(ByteBuffer src, CharBuffer dst) { 212 if (src.hasArray() && dst.hasArray()) 213 return decodeArrayLoop(src, dst); 214 else 215 return decodeBufferLoop(src, dst); 216 } 217 218 public int decode(byte[] src, int sp, int len, char[] dst) { 219 int dp = 0; 220 int sl = sp + len; 221 char repl = replacement().charAt(0); 222 while (sp < sl) { 223 int b1 = src[sp++] & 0xff; 224 char c = b2cSB[b1]; 225 if (c == UNMAPPABLE_DECODING) { 226 if (sp < sl) { 227 int b2 = src[sp++] & 0xff; 228 if (b2 < b2Min || b2 > b2Max || 229 (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) { 230 if (b2c[b1] == B2C_UNMAPPABLE || // isNotLeadingByte 231 b2c[b2] != B2C_UNMAPPABLE || // isLeadingByte 232 decodeSingle(b2) != UNMAPPABLE_DECODING) { 233 sp--; 234 } 235 } 236 } 237 if (c == UNMAPPABLE_DECODING) { 238 c = repl; 239 } 240 } 241 dst[dp++] = c; 242 } 243 return dp; 244 } 245 246 public void implReset() { 247 super.implReset(); 248 } 249 250 public CoderResult implFlush(CharBuffer out) { 251 return super.implFlush(out); 252 } 253 254 // decode loops are not using decodeSingle/Double() for performance 255 // reason. 256 public char decodeSingle(int b) { 257 return b2cSB[b]; 258 } 259 260 public char decodeDouble(int b1, int b2) { 261 if (b1 < 0 || b1 > b2c.length || 262 b2 < b2Min || b2 > b2Max) 263 return UNMAPPABLE_DECODING; 264 return b2c[b1][b2 - b2Min]; 265 } 266 } 267 268 // IBM_EBCDIC_DBCS 269 public static class Decoder_EBCDIC extends Decoder { 270 private static final int SBCS = 0; 271 private static final int DBCS = 1; 272 private static final int SO = 0x0e; 273 private static final int SI = 0x0f; 274 private int currentState; 275 276 public Decoder_EBCDIC(Charset cs, 277 char[][] b2c, char[] b2cSB, int b2Min, int b2Max) { 278 super(cs, b2c, b2cSB, b2Min, b2Max); 279 } 280 281 public void implReset() { 282 currentState = SBCS; 283 } 284 285 // Check validity of dbcs ebcdic byte pair values 286 // 287 // First byte : 0x41 -- 0xFE 288 // Second byte: 0x41 -- 0xFE 289 // Doublebyte blank: 0x4040 290 // 291 // The validation implementation in "old" DBCS_IBM_EBCDIC and sun.io 292 // as 293 // if ((b1 != 0x40 || b2 != 0x40) && 294 // (b2 < 0x41 || b2 > 0xfe)) {...} 295 // is not correct/complete (range check for b1) 296 // 297 private static boolean isDoubleByte(int b1, int b2) { 298 return (0x41 <= b1 && b1 <= 0xfe && 0x41 <= b2 && b2 <= 0xfe) 299 || (b1 == 0x40 && b2 == 0x40); // DBCS-HOST SPACE 300 } 301 302 protected CoderResult decodeArrayLoop(ByteBuffer src, CharBuffer dst) { 303 byte[] sa = src.array(); 304 int sp = src.arrayOffset() + src.position(); 305 int sl = src.arrayOffset() + src.limit(); 306 char[] da = dst.array(); 307 int dp = dst.arrayOffset() + dst.position(); 308 int dl = dst.arrayOffset() + dst.limit(); 309 310 try { 311 // don't check dp/dl together here, it's possible to 312 // decdoe a SO/SI without space in output buffer. 313 while (sp < sl) { 314 int b1 = sa[sp] & 0xff; 315 int inSize = 1; 316 if (b1 == SO) { // Shift out 317 if (currentState != SBCS) 318 return CoderResult.malformedForLength(1); 319 else 320 currentState = DBCS; 321 } else if (b1 == SI) { 322 if (currentState != DBCS) 323 return CoderResult.malformedForLength(1); 324 else 325 currentState = SBCS; 326 } else { 327 char c = UNMAPPABLE_DECODING; 328 if (currentState == SBCS) { 329 c = b2cSB[b1]; 330 if (c == UNMAPPABLE_DECODING) 331 return CoderResult.unmappableForLength(1); 332 } else { 333 if (sl - sp < 2) 334 return CoderResult.UNDERFLOW; 335 int b2 = sa[sp + 1] & 0xff; 336 if (b2 < b2Min || b2 > b2Max || 337 (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) { 338 if (!isDoubleByte(b1, b2)) 339 return CoderResult.malformedForLength(2); 340 return CoderResult.unmappableForLength(2); 341 } 342 inSize++; 343 } 344 if (dl - dp < 1) 345 return CoderResult.OVERFLOW; 346 347 da[dp++] = c; 348 } 349 sp += inSize; 350 } 351 return CoderResult.UNDERFLOW; 352 } finally { 353 src.position(sp - src.arrayOffset()); 354 dst.position(dp - dst.arrayOffset()); 355 } 356 } 357 358 protected CoderResult decodeBufferLoop(ByteBuffer src, CharBuffer dst) { 359 int mark = src.position(); 360 try { 361 while (src.hasRemaining()) { 362 int b1 = src.get() & 0xff; 363 int inSize = 1; 364 if (b1 == SO) { // Shift out 365 if (currentState != SBCS) 366 return CoderResult.malformedForLength(1); 367 else 368 currentState = DBCS; 369 } else if (b1 == SI) { 370 if (currentState != DBCS) 371 return CoderResult.malformedForLength(1); 372 else 373 currentState = SBCS; 374 } else { 375 char c = UNMAPPABLE_DECODING; 376 if (currentState == SBCS) { 377 c = b2cSB[b1]; 378 if (c == UNMAPPABLE_DECODING) 379 return CoderResult.unmappableForLength(1); 380 } else { 381 if (src.remaining() < 1) 382 return CoderResult.UNDERFLOW; 383 int b2 = src.get()&0xff; 384 if (b2 < b2Min || b2 > b2Max || 385 (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) { 386 if (!isDoubleByte(b1, b2)) 387 return CoderResult.malformedForLength(2); 388 return CoderResult.unmappableForLength(2); 389 } 390 inSize++; 391 } 392 393 if (dst.remaining() < 1) 394 return CoderResult.OVERFLOW; 395 396 dst.put(c); 397 } 398 mark += inSize; 399 } 400 return CoderResult.UNDERFLOW; 401 } finally { 402 src.position(mark); 403 } 404 } 405 406 public int decode(byte[] src, int sp, int len, char[] dst) { 407 int dp = 0; 408 int sl = sp + len; 409 currentState = SBCS; 410 char repl = replacement().charAt(0); 411 while (sp < sl) { 412 int b1 = src[sp++] & 0xff; 413 if (b1 == SO) { // Shift out 414 if (currentState != SBCS) 415 dst[dp++] = repl; 416 else 417 currentState = DBCS; 418 } else if (b1 == SI) { 419 if (currentState != DBCS) 420 dst[dp++] = repl; 421 else 422 currentState = SBCS; 423 } else { 424 char c = UNMAPPABLE_DECODING; 425 if (currentState == SBCS) { 426 c = b2cSB[b1]; 427 if (c == UNMAPPABLE_DECODING) 428 c = repl; 429 } else { 430 if (sl == sp) { 431 c = repl; 432 } else { 433 int b2 = src[sp++] & 0xff; 434 if (b2 < b2Min || b2 > b2Max || 435 (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) { 436 c = repl; 437 } 438 } 439 } 440 dst[dp++] = c; 441 } 442 } 443 return dp; 444 } 445 } 446 447 // DBCS_ONLY 448 public static class Decoder_DBCSONLY extends Decoder { 449 static final char[] b2cSB_UNMAPPABLE; 450 static { 451 b2cSB_UNMAPPABLE = new char[0x100]; 452 Arrays.fill(b2cSB_UNMAPPABLE, UNMAPPABLE_DECODING); 453 } 454 public Decoder_DBCSONLY(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max) { 455 super(cs, 0.5f, 1.0f, b2c, b2cSB_UNMAPPABLE, b2Min, b2Max); 456 } 457 } 458 459 // EUC_SIMPLE 460 // The only thing we need to "override" is to check SS2/SS3 and 461 // return "malformed" if found 462 public static class Decoder_EUC_SIM extends Decoder { 463 private final int SS2 = 0x8E; 464 private final int SS3 = 0x8F; 465 466 public Decoder_EUC_SIM(Charset cs, 467 char[][] b2c, char[] b2cSB, int b2Min, int b2Max) { 468 super(cs, b2c, b2cSB, b2Min, b2Max); 469 } 470 471 // No support provided for G2/G3 for SimpleEUC 472 protected CoderResult crMalformedOrUnderFlow(int b) { 473 if (b == SS2 || b == SS3 ) 474 return CoderResult.malformedForLength(1); 475 return CoderResult.UNDERFLOW; 476 } 477 478 protected CoderResult crMalformedOrUnmappable(int b1, int b2) { 479 if (b1 == SS2 || b1 == SS3 ) 480 return CoderResult.malformedForLength(1); 481 return CoderResult.unmappableForLength(2); 482 } 483 484 public int decode(byte[] src, int sp, int len, char[] dst) { 485 int dp = 0; 486 int sl = sp + len; 487 char repl = replacement().charAt(0); 488 while (sp < sl) { 489 int b1 = src[sp++] & 0xff; 490 char c = b2cSB[b1]; 491 if (c == UNMAPPABLE_DECODING) { 492 if (sp < sl) { 493 int b2 = src[sp++] & 0xff; 494 if (b2 < b2Min || b2 > b2Max || 495 (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) { 496 if (b1 == SS2 || b1 == SS3) { 497 sp--; 498 } 499 c = repl; 500 } 501 } else { 502 c = repl; 503 } 504 } 505 dst[dp++] = c; 506 } 507 return dp; 508 } 509 } 510 511 public static class Encoder extends CharsetEncoder 512 implements ArrayEncoder 513 { 514 protected final int MAX_SINGLEBYTE = 0xff; 515 private final char[] c2b; 516 private final char[] c2bIndex; 517 protected Surrogate.Parser sgp; 518 519 public Encoder(Charset cs, char[] c2b, char[] c2bIndex) { 520 super(cs, 2.0f, 2.0f); 521 this.c2b = c2b; 522 this.c2bIndex = c2bIndex; 523 } 524 525 public Encoder(Charset cs, float avg, float max, byte[] repl, char[] c2b, char[] c2bIndex) { 526 super(cs, avg, max, repl); 527 this.c2b = c2b; 528 this.c2bIndex = c2bIndex; 529 } 530 531 public boolean canEncode(char c) { 532 return encodeChar(c) != UNMAPPABLE_ENCODING; 533 } 534 535 protected Surrogate.Parser sgp() { 536 if (sgp == null) 537 sgp = new Surrogate.Parser(); 538 return sgp; 539 } 540 541 protected CoderResult encodeArrayLoop(CharBuffer src, ByteBuffer dst) { 542 char[] sa = src.array(); 543 int sp = src.arrayOffset() + src.position(); 544 int sl = src.arrayOffset() + src.limit(); 545 546 byte[] da = dst.array(); 547 int dp = dst.arrayOffset() + dst.position(); 548 int dl = dst.arrayOffset() + dst.limit(); 549 550 try { 551 while (sp < sl) { 552 char c = sa[sp]; 553 int bb = encodeChar(c); 554 if (bb == UNMAPPABLE_ENCODING) { 555 if (Character.isSurrogate(c)) { 556 if (sgp().parse(c, sa, sp, sl) < 0) 557 return sgp.error(); 558 return sgp.unmappableResult(); 559 } 560 return CoderResult.unmappableForLength(1); 561 } 562 563 if (bb > MAX_SINGLEBYTE) { // DoubleByte 564 if (dl - dp < 2) 565 return CoderResult.OVERFLOW; 566 da[dp++] = (byte)(bb >> 8); 567 da[dp++] = (byte)bb; 568 } else { // SingleByte 569 if (dl - dp < 1) 570 return CoderResult.OVERFLOW; 571 da[dp++] = (byte)bb; 572 } 573 574 sp++; 575 } 576 return CoderResult.UNDERFLOW; 577 } finally { 578 src.position(sp - src.arrayOffset()); 579 dst.position(dp - dst.arrayOffset()); 580 } 581 } 582 583 protected CoderResult encodeBufferLoop(CharBuffer src, ByteBuffer dst) { 584 int mark = src.position(); 585 try { 586 while (src.hasRemaining()) { 587 char c = src.get(); 588 int bb = encodeChar(c); 589 if (bb == UNMAPPABLE_ENCODING) { 590 if (Character.isSurrogate(c)) { 591 if (sgp().parse(c, src) < 0) 592 return sgp.error(); 593 return sgp.unmappableResult(); 594 } 595 return CoderResult.unmappableForLength(1); 596 } 597 if (bb > MAX_SINGLEBYTE) { // DoubleByte 598 if (dst.remaining() < 2) 599 return CoderResult.OVERFLOW; 600 dst.put((byte)(bb >> 8)); 601 dst.put((byte)(bb)); 602 } else { 603 if (dst.remaining() < 1) 604 return CoderResult.OVERFLOW; 605 dst.put((byte)bb); 606 } 607 mark++; 608 } 609 return CoderResult.UNDERFLOW; 610 } finally { 611 src.position(mark); 612 } 613 } 614 615 protected CoderResult encodeLoop(CharBuffer src, ByteBuffer dst) { 616 if (src.hasArray() && dst.hasArray()) 617 return encodeArrayLoop(src, dst); 618 else 619 return encodeBufferLoop(src, dst); 620 } 621 622 protected byte[] repl = replacement(); 623 protected void implReplaceWith(byte[] newReplacement) { 624 repl = newReplacement; 625 } 626 627 public int encode(char[] src, int sp, int len, byte[] dst) { 628 int dp = 0; 629 int sl = sp + len; 630 int dl = dst.length; 631 while (sp < sl) { 632 char c = src[sp++]; 633 int bb = encodeChar(c); 634 if (bb == UNMAPPABLE_ENCODING) { 635 if (Character.isHighSurrogate(c) && sp < sl && 636 Character.isLowSurrogate(src[sp])) { 637 sp++; 638 } 639 dst[dp++] = repl[0]; 640 if (repl.length > 1) 641 dst[dp++] = repl[1]; 642 continue; 643 } //else 644 if (bb > MAX_SINGLEBYTE) { // DoubleByte 645 dst[dp++] = (byte)(bb >> 8); 646 dst[dp++] = (byte)bb; 647 } else { // SingleByte 648 dst[dp++] = (byte)bb; 649 } 650 651 } 652 return dp; 653 } 654 655 public int encodeChar(char ch) { 656 return c2b[c2bIndex[ch >> 8] + (ch & 0xff)]; 657 } 658 659 // init the c2b and c2bIndex tables from b2c. 660 public static void initC2B(String[] b2c, String b2cSB, String b2cNR, String c2bNR, 661 int b2Min, int b2Max, 662 char[] c2b, char[] c2bIndex) 663 { 664 Arrays.fill(c2b, (char)UNMAPPABLE_ENCODING); 665 int off = 0x100; 666 667 char[][] b2c_ca = new char[b2c.length][]; 668 char[] b2cSB_ca = null; 669 if (b2cSB != null) 670 b2cSB_ca = b2cSB.toCharArray(); 671 672 for (int i = 0; i < b2c.length; i++) { 673 if (b2c[i] == null) 674 continue; 675 b2c_ca[i] = b2c[i].toCharArray(); 676 } 677 678 if (b2cNR != null) { 679 int j = 0; 680 while (j < b2cNR.length()) { 681 char b = b2cNR.charAt(j++); 682 char c = b2cNR.charAt(j++); 683 if (b < 0x100 && b2cSB_ca != null) { 684 if (b2cSB_ca[b] == c) 685 b2cSB_ca[b] = UNMAPPABLE_DECODING; 686 } else { 687 if (b2c_ca[b >> 8][(b & 0xff) - b2Min] == c) 688 b2c_ca[b >> 8][(b & 0xff) - b2Min] = UNMAPPABLE_DECODING; 689 } 690 } 691 } 692 693 if (b2cSB_ca != null) { // SingleByte 694 for (int b = 0; b < b2cSB_ca.length; b++) { 695 char c = b2cSB_ca[b]; 696 if (c == UNMAPPABLE_DECODING) 697 continue; 698 int index = c2bIndex[c >> 8]; 699 if (index == 0) { 700 index = off; 701 off += 0x100; 702 c2bIndex[c >> 8] = (char)index; 703 } 704 c2b[index + (c & 0xff)] = (char)b; 705 } 706 } 707 708 for (int b1 = 0; b1 < b2c.length; b1++) { // DoubleByte 709 char[] db = b2c_ca[b1]; 710 if (db == null) 711 continue; 712 for (int b2 = b2Min; b2 <= b2Max; b2++) { 713 char c = db[b2 - b2Min]; 714 if (c == UNMAPPABLE_DECODING) 715 continue; 716 int index = c2bIndex[c >> 8]; 717 if (index == 0) { 718 index = off; 719 off += 0x100; 720 c2bIndex[c >> 8] = (char)index; 721 } 722 c2b[index + (c & 0xff)] = (char)((b1 << 8) | b2); 723 } 724 } 725 726 if (c2bNR != null) { 727 // add c->b only nr entries 728 for (int i = 0; i < c2bNR.length(); i += 2) { 729 char b = c2bNR.charAt(i); 730 char c = c2bNR.charAt(i + 1); 731 int index = (c >> 8); 732 if (c2bIndex[index] == 0) { 733 c2bIndex[index] = (char)off; 734 off += 0x100; 735 } 736 index = c2bIndex[index] + (c & 0xff); 737 c2b[index] = b; 738 } 739 } 740 } 741 } 742 743 public static class Encoder_DBCSONLY extends Encoder { 744 public Encoder_DBCSONLY(Charset cs, byte[] repl, 745 char[] c2b, char[] c2bIndex) { 746 super(cs, 2.0f, 2.0f, repl, c2b, c2bIndex); 747 } 748 749 public int encodeChar(char ch) { 750 int bb = super.encodeChar(ch); 751 if (bb <= MAX_SINGLEBYTE) 752 return UNMAPPABLE_ENCODING; 753 return bb; 754 } 755 } 756 757 758 759 public static class Encoder_EBCDIC extends Encoder { 760 static final int SBCS = 0; 761 static final int DBCS = 1; 762 static final byte SO = 0x0e; 763 static final byte SI = 0x0f; 764 765 protected int currentState = SBCS; 766 767 public Encoder_EBCDIC(Charset cs, char[] c2b, char[] c2bIndex) { 768 super(cs, 4.0f, 5.0f, new byte[] {(byte)0x6f}, c2b, c2bIndex); 769 } 770 771 protected void implReset() { 772 currentState = SBCS; 773 } 774 775 protected CoderResult implFlush(ByteBuffer out) { 776 if (currentState == DBCS) { 777 if (out.remaining() < 1) 778 return CoderResult.OVERFLOW; 779 out.put(SI); 780 } 781 implReset(); 782 return CoderResult.UNDERFLOW; 783 } 784 785 protected CoderResult encodeArrayLoop(CharBuffer src, ByteBuffer dst) { 786 char[] sa = src.array(); 787 int sp = src.arrayOffset() + src.position(); 788 int sl = src.arrayOffset() + src.limit(); 789 byte[] da = dst.array(); 790 int dp = dst.arrayOffset() + dst.position(); 791 int dl = dst.arrayOffset() + dst.limit(); 792 793 try { 794 while (sp < sl) { 795 char c = sa[sp]; 796 int bb = encodeChar(c); 797 if (bb == UNMAPPABLE_ENCODING) { 798 if (Character.isSurrogate(c)) { 799 if (sgp().parse(c, sa, sp, sl) < 0) 800 return sgp.error(); 801 return sgp.unmappableResult(); 802 } 803 return CoderResult.unmappableForLength(1); 804 } 805 if (bb > MAX_SINGLEBYTE) { // DoubleByte 806 if (currentState == SBCS) { 807 if (dl - dp < 1) 808 return CoderResult.OVERFLOW; 809 currentState = DBCS; 810 da[dp++] = SO; 811 } 812 if (dl - dp < 2) 813 return CoderResult.OVERFLOW; 814 da[dp++] = (byte)(bb >> 8); 815 da[dp++] = (byte)bb; 816 } else { // SingleByte 817 if (currentState == DBCS) { 818 if (dl - dp < 1) 819 return CoderResult.OVERFLOW; 820 currentState = SBCS; 821 da[dp++] = SI; 822 } 823 if (dl - dp < 1) 824 return CoderResult.OVERFLOW; 825 da[dp++] = (byte)bb; 826 827 } 828 sp++; 829 } 830 return CoderResult.UNDERFLOW; 831 } finally { 832 src.position(sp - src.arrayOffset()); 833 dst.position(dp - dst.arrayOffset()); 834 } 835 } 836 837 protected CoderResult encodeBufferLoop(CharBuffer src, ByteBuffer dst) { 838 int mark = src.position(); 839 try { 840 while (src.hasRemaining()) { 841 char c = src.get(); 842 int bb = encodeChar(c); 843 if (bb == UNMAPPABLE_ENCODING) { 844 if (Character.isSurrogate(c)) { 845 if (sgp().parse(c, src) < 0) 846 return sgp.error(); 847 return sgp.unmappableResult(); 848 } 849 return CoderResult.unmappableForLength(1); 850 } 851 if (bb > MAX_SINGLEBYTE) { // DoubleByte 852 if (currentState == SBCS) { 853 if (dst.remaining() < 1) 854 return CoderResult.OVERFLOW; 855 currentState = DBCS; 856 dst.put(SO); 857 } 858 if (dst.remaining() < 2) 859 return CoderResult.OVERFLOW; 860 dst.put((byte)(bb >> 8)); 861 dst.put((byte)(bb)); 862 } else { // Single-byte 863 if (currentState == DBCS) { 864 if (dst.remaining() < 1) 865 return CoderResult.OVERFLOW; 866 currentState = SBCS; 867 dst.put(SI); 868 } 869 if (dst.remaining() < 1) 870 return CoderResult.OVERFLOW; 871 dst.put((byte)bb); 872 } 873 mark++; 874 } 875 return CoderResult.UNDERFLOW; 876 } finally { 877 src.position(mark); 878 } 879 } 880 881 public int encode(char[] src, int sp, int len, byte[] dst) { 882 int dp = 0; 883 int sl = sp + len; 884 while (sp < sl) { 885 char c = src[sp++]; 886 int bb = encodeChar(c); 887 888 if (bb == UNMAPPABLE_ENCODING) { 889 if (Character.isHighSurrogate(c) && sp < sl && 890 Character.isLowSurrogate(src[sp])) { 891 sp++; 892 } 893 dst[dp++] = repl[0]; 894 if (repl.length > 1) 895 dst[dp++] = repl[1]; 896 continue; 897 } //else 898 if (bb > MAX_SINGLEBYTE) { // DoubleByte 899 if (currentState == SBCS) { 900 currentState = DBCS; 901 dst[dp++] = SO; 902 } 903 dst[dp++] = (byte)(bb >> 8); 904 dst[dp++] = (byte)bb; 905 } else { // SingleByte 906 if (currentState == DBCS) { 907 currentState = SBCS; 908 dst[dp++] = SI; 909 } 910 dst[dp++] = (byte)bb; 911 } 912 } 913 914 if (currentState == DBCS) { 915 currentState = SBCS; 916 dst[dp++] = SI; 917 } 918 return dp; 919 } 920 } 921 922 // EUC_SIMPLE 923 public static class Encoder_EUC_SIM extends Encoder { 924 public Encoder_EUC_SIM(Charset cs, char[] c2b, char[] c2bIndex) { 925 super(cs, c2b, c2bIndex); 926 } 927 } 928 929 }