1 /* 2 * Copyright (c) 2009, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package sun.nio.cs; 27 28 import java.nio.ByteBuffer; 29 import java.nio.CharBuffer; 30 import java.nio.charset.Charset; 31 import java.nio.charset.CharsetDecoder; 32 import java.nio.charset.CharsetEncoder; 33 import java.nio.charset.CoderResult; 34 import java.util.Arrays; 35 import sun.nio.cs.Surrogate; 36 import sun.nio.cs.ArrayDecoder; 37 import sun.nio.cs.ArrayEncoder; 38 import static sun.nio.cs.CharsetMapping.*; 39 40 /* 41 * Four types of "DoubleByte" charsets are implemented in this class 42 * (1)DoubleByte 43 * The "mostly widely used" multibyte charset, a combination of 44 * a singlebyte character set (usually the ASCII charset) and a 45 * doublebyte character set. The codepoint values of singlebyte 46 * and doublebyte don't overlap. Microsoft's multibyte charsets 47 * and IBM's "DBCS_ASCII" charsets, such as IBM1381, 942, 943, 48 * 948, 949 and 950 are such charsets. 49 * 50 * (2)DoubleByte_EBCDIC 51 * IBM EBCDIC Mix multibyte charset. Use SO and SI to shift (switch) 52 * in and out between the singlebyte character set and doublebyte 53 * character set. 54 * 55 * (3)DoubleByte_SIMPLE_EUC 56 * It's a "simple" form of EUC encoding scheme, only have the 57 * singlebyte character set G0 and one doublebyte character set 58 * G1 are defined, G2 (with SS2) and G3 (with SS3) are not used. 59 * So it is actually the same as the "typical" type (1) mentioned 60 * above, except it return "malformed" for the SS2 and SS3 when 61 * decoding. 62 * 63 * (4)DoubleByte ONLY 64 * A "pure" doublebyte only character set. From implementation 65 * point of view, this is the type (1) with "decodeSingle" always 66 * returns unmappable. 67 * 68 * For simplicity, all implementations share the same decoding and 69 * encoding data structure. 70 * 71 * Decoding: 72 * 73 * char[][] b2c; 74 * char[] b2cSB; 75 * int b2Min, b2Max 76 * 77 * public char decodeSingle(int b) { 78 * return b2cSB.[b]; 79 * } 80 * 81 * public char decodeDouble(int b1, int b2) { 82 * if (b2 < b2Min || b2 > b2Max) 83 * return UNMAPPABLE_DECODING; 84 * return b2c[b1][b2 - b2Min]; 85 * } 86 * 87 * (1)b2Min, b2Max are the corresponding min and max value of the 88 * low-half of the double-byte. 89 * (2)The high 8-bit/b1 of the double-byte are used to indexed into 90 * b2c array. 91 * 92 * Encoding: 93 * 94 * char[] c2b; 95 * char[] c2bIndex; 96 * 97 * public int encodeChar(char ch) { 98 * return c2b[c2bIndex[ch >> 8] + (ch & 0xff)]; 99 * } 100 * 101 */ 102 103 public class DoubleByte { 104 105 public static final char[] B2C_UNMAPPABLE; 106 static { 107 B2C_UNMAPPABLE = new char[0x100]; 108 Arrays.fill(B2C_UNMAPPABLE, UNMAPPABLE_DECODING); 109 } 110 111 public static class Decoder extends CharsetDecoder 112 implements DelegatableDecoder, ArrayDecoder 113 { 114 final char[][] b2c; 115 final char[] b2cSB; 116 final int b2Min; 117 final int b2Max; 118 final boolean isASCIICompatible; 119 120 // for SimpleEUC override 121 protected CoderResult crMalformedOrUnderFlow(int b) { 122 return CoderResult.UNDERFLOW; 123 } 124 125 protected CoderResult crMalformedOrUnmappable(int b1, int b2) { 126 if (b2c[b1] == B2C_UNMAPPABLE || // isNotLeadingByte(b1) 127 b2c[b2] != B2C_UNMAPPABLE || // isLeadingByte(b2) 128 decodeSingle(b2) != UNMAPPABLE_DECODING) { // isSingle(b2) 129 return CoderResult.malformedForLength(1); 130 } 131 return CoderResult.unmappableForLength(2); 132 } 133 134 public Decoder(Charset cs, float avgcpb, float maxcpb, 135 char[][] b2c, char[] b2cSB, 136 int b2Min, int b2Max, 137 boolean isASCIICompatible) { 138 super(cs, avgcpb, maxcpb); 139 this.b2c = b2c; 140 this.b2cSB = b2cSB; 141 this.b2Min = b2Min; 142 this.b2Max = b2Max; 143 this.isASCIICompatible = isASCIICompatible; 144 } 145 146 public Decoder(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max, 147 boolean isASCIICompatible) { 148 this(cs, 0.5f, 1.0f, b2c, b2cSB, b2Min, b2Max, isASCIICompatible); 149 } 150 151 public Decoder(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max) { 152 this(cs, 0.5f, 1.0f, b2c, b2cSB, b2Min, b2Max, false); 153 } 154 155 protected CoderResult decodeArrayLoop(ByteBuffer src, CharBuffer dst) { 156 byte[] sa = src.array(); 157 int sp = src.arrayOffset() + src.position(); 158 int sl = src.arrayOffset() + src.limit(); 159 160 char[] da = dst.array(); 161 int dp = dst.arrayOffset() + dst.position(); 162 int dl = dst.arrayOffset() + dst.limit(); 163 164 try { 165 while (sp < sl && dp < dl) { 166 // inline the decodeSingle/Double() for better performance 167 int inSize = 1; 168 int b1 = sa[sp] & 0xff; 169 char c = b2cSB[b1]; 170 if (c == UNMAPPABLE_DECODING) { 171 if (sl - sp < 2) 172 return crMalformedOrUnderFlow(b1); 173 int b2 = sa[sp + 1] & 0xff; 174 if (b2 < b2Min || b2 > b2Max || 175 (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) { 176 return crMalformedOrUnmappable(b1, b2); 177 } 178 inSize++; 179 } 180 da[dp++] = c; 181 sp += inSize; 182 } 183 return (sp >= sl) ? CoderResult.UNDERFLOW 184 : CoderResult.OVERFLOW; 185 } finally { 186 src.position(sp - src.arrayOffset()); 187 dst.position(dp - dst.arrayOffset()); 188 } 189 } 190 191 protected CoderResult decodeBufferLoop(ByteBuffer src, CharBuffer dst) { 192 int mark = src.position(); 193 try { 194 195 while (src.hasRemaining() && dst.hasRemaining()) { 196 int b1 = src.get() & 0xff; 197 char c = b2cSB[b1]; 198 int inSize = 1; 199 if (c == UNMAPPABLE_DECODING) { 200 if (src.remaining() < 1) 201 return crMalformedOrUnderFlow(b1); 202 int b2 = src.get() & 0xff; 203 if (b2 < b2Min || b2 > b2Max || 204 (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) 205 return crMalformedOrUnmappable(b1, b2); 206 inSize++; 207 } 208 dst.put(c); 209 mark += inSize; 210 } 211 return src.hasRemaining()? CoderResult.OVERFLOW 212 : CoderResult.UNDERFLOW; 213 } finally { 214 src.position(mark); 215 } 216 } 217 218 // Make some protected methods public for use by JISAutoDetect 219 public CoderResult decodeLoop(ByteBuffer src, CharBuffer dst) { 220 if (src.hasArray() && dst.hasArray()) 221 return decodeArrayLoop(src, dst); 222 else 223 return decodeBufferLoop(src, dst); 224 } 225 226 @Override 227 public int decode(byte[] src, int sp, int len, char[] dst) { 228 int dp = 0; 229 int sl = sp + len; 230 char repl = replacement().charAt(0); 231 while (sp < sl) { 232 int b1 = src[sp++] & 0xff; 233 char c = b2cSB[b1]; 234 if (c == UNMAPPABLE_DECODING) { 235 if (sp < sl) { 236 int b2 = src[sp++] & 0xff; 237 if (b2 < b2Min || b2 > b2Max || 238 (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) { 239 if (b2c[b1] == B2C_UNMAPPABLE || // isNotLeadingByte 240 b2c[b2] != B2C_UNMAPPABLE || // isLeadingByte 241 decodeSingle(b2) != UNMAPPABLE_DECODING) { 242 sp--; 243 } 244 } 245 } 246 if (c == UNMAPPABLE_DECODING) { 247 c = repl; 248 } 249 } 250 dst[dp++] = c; 251 } 252 return dp; 253 } 254 255 @Override 256 public boolean isASCIICompatible() { 257 return isASCIICompatible; 258 } 259 260 public void implReset() { 261 super.implReset(); 262 } 263 264 public CoderResult implFlush(CharBuffer out) { 265 return super.implFlush(out); 266 } 267 268 // decode loops are not using decodeSingle/Double() for performance 269 // reason. 270 public char decodeSingle(int b) { 271 return b2cSB[b]; 272 } 273 274 public char decodeDouble(int b1, int b2) { 275 if (b1 < 0 || b1 > b2c.length || 276 b2 < b2Min || b2 > b2Max) 277 return UNMAPPABLE_DECODING; 278 return b2c[b1][b2 - b2Min]; 279 } 280 } 281 282 // IBM_EBCDIC_DBCS 283 public static class Decoder_EBCDIC extends Decoder { 284 private static final int SBCS = 0; 285 private static final int DBCS = 1; 286 private static final int SO = 0x0e; 287 private static final int SI = 0x0f; 288 private int currentState; 289 290 public Decoder_EBCDIC(Charset cs, 291 char[][] b2c, char[] b2cSB, int b2Min, int b2Max, 292 boolean isASCIICompatible) { 293 super(cs, b2c, b2cSB, b2Min, b2Max, isASCIICompatible); 294 } 295 296 public Decoder_EBCDIC(Charset cs, 297 char[][] b2c, char[] b2cSB, int b2Min, int b2Max) { 298 super(cs, b2c, b2cSB, b2Min, b2Max, false); 299 } 300 301 public void implReset() { 302 currentState = SBCS; 303 } 304 305 // Check validity of dbcs ebcdic byte pair values 306 // 307 // First byte : 0x41 -- 0xFE 308 // Second byte: 0x41 -- 0xFE 309 // Doublebyte blank: 0x4040 310 // 311 // The validation implementation in "old" DBCS_IBM_EBCDIC and sun.io 312 // as 313 // if ((b1 != 0x40 || b2 != 0x40) && 314 // (b2 < 0x41 || b2 > 0xfe)) {...} 315 // is not correct/complete (range check for b1) 316 // 317 private static boolean isDoubleByte(int b1, int b2) { 318 return (0x41 <= b1 && b1 <= 0xfe && 0x41 <= b2 && b2 <= 0xfe) 319 || (b1 == 0x40 && b2 == 0x40); // DBCS-HOST SPACE 320 } 321 322 protected CoderResult decodeArrayLoop(ByteBuffer src, CharBuffer dst) { 323 byte[] sa = src.array(); 324 int sp = src.arrayOffset() + src.position(); 325 int sl = src.arrayOffset() + src.limit(); 326 char[] da = dst.array(); 327 int dp = dst.arrayOffset() + dst.position(); 328 int dl = dst.arrayOffset() + dst.limit(); 329 330 try { 331 // don't check dp/dl together here, it's possible to 332 // decdoe a SO/SI without space in output buffer. 333 while (sp < sl) { 334 int b1 = sa[sp] & 0xff; 335 int inSize = 1; 336 if (b1 == SO) { // Shift out 337 if (currentState != SBCS) 338 return CoderResult.malformedForLength(1); 339 else 340 currentState = DBCS; 341 } else if (b1 == SI) { 342 if (currentState != DBCS) 343 return CoderResult.malformedForLength(1); 344 else 345 currentState = SBCS; 346 } else { 347 char c = UNMAPPABLE_DECODING; 348 if (currentState == SBCS) { 349 c = b2cSB[b1]; 350 if (c == UNMAPPABLE_DECODING) 351 return CoderResult.unmappableForLength(1); 352 } else { 353 if (sl - sp < 2) 354 return CoderResult.UNDERFLOW; 355 int b2 = sa[sp + 1] & 0xff; 356 if (b2 < b2Min || b2 > b2Max || 357 (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) { 358 if (!isDoubleByte(b1, b2)) 359 return CoderResult.malformedForLength(2); 360 return CoderResult.unmappableForLength(2); 361 } 362 inSize++; 363 } 364 if (dl - dp < 1) 365 return CoderResult.OVERFLOW; 366 367 da[dp++] = c; 368 } 369 sp += inSize; 370 } 371 return CoderResult.UNDERFLOW; 372 } finally { 373 src.position(sp - src.arrayOffset()); 374 dst.position(dp - dst.arrayOffset()); 375 } 376 } 377 378 protected CoderResult decodeBufferLoop(ByteBuffer src, CharBuffer dst) { 379 int mark = src.position(); 380 try { 381 while (src.hasRemaining()) { 382 int b1 = src.get() & 0xff; 383 int inSize = 1; 384 if (b1 == SO) { // Shift out 385 if (currentState != SBCS) 386 return CoderResult.malformedForLength(1); 387 else 388 currentState = DBCS; 389 } else if (b1 == SI) { 390 if (currentState != DBCS) 391 return CoderResult.malformedForLength(1); 392 else 393 currentState = SBCS; 394 } else { 395 char c = UNMAPPABLE_DECODING; 396 if (currentState == SBCS) { 397 c = b2cSB[b1]; 398 if (c == UNMAPPABLE_DECODING) 399 return CoderResult.unmappableForLength(1); 400 } else { 401 if (src.remaining() < 1) 402 return CoderResult.UNDERFLOW; 403 int b2 = src.get()&0xff; 404 if (b2 < b2Min || b2 > b2Max || 405 (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) { 406 if (!isDoubleByte(b1, b2)) 407 return CoderResult.malformedForLength(2); 408 return CoderResult.unmappableForLength(2); 409 } 410 inSize++; 411 } 412 413 if (dst.remaining() < 1) 414 return CoderResult.OVERFLOW; 415 416 dst.put(c); 417 } 418 mark += inSize; 419 } 420 return CoderResult.UNDERFLOW; 421 } finally { 422 src.position(mark); 423 } 424 } 425 426 @Override 427 public int decode(byte[] src, int sp, int len, char[] dst) { 428 int dp = 0; 429 int sl = sp + len; 430 currentState = SBCS; 431 char repl = replacement().charAt(0); 432 while (sp < sl) { 433 int b1 = src[sp++] & 0xff; 434 if (b1 == SO) { // Shift out 435 if (currentState != SBCS) 436 dst[dp++] = repl; 437 else 438 currentState = DBCS; 439 } else if (b1 == SI) { 440 if (currentState != DBCS) 441 dst[dp++] = repl; 442 else 443 currentState = SBCS; 444 } else { 445 char c = UNMAPPABLE_DECODING; 446 if (currentState == SBCS) { 447 c = b2cSB[b1]; 448 if (c == UNMAPPABLE_DECODING) 449 c = repl; 450 } else { 451 if (sl == sp) { 452 c = repl; 453 } else { 454 int b2 = src[sp++] & 0xff; 455 if (b2 < b2Min || b2 > b2Max || 456 (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) { 457 c = repl; 458 } 459 } 460 } 461 dst[dp++] = c; 462 } 463 } 464 return dp; 465 } 466 } 467 468 // DBCS_ONLY 469 public static class Decoder_DBCSONLY extends Decoder { 470 static final char[] b2cSB_UNMAPPABLE; 471 static { 472 b2cSB_UNMAPPABLE = new char[0x100]; 473 Arrays.fill(b2cSB_UNMAPPABLE, UNMAPPABLE_DECODING); 474 } 475 public Decoder_DBCSONLY(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max, 476 boolean isASCIICompatible) { 477 super(cs, 0.5f, 1.0f, b2c, b2cSB_UNMAPPABLE, b2Min, b2Max, isASCIICompatible); 478 } 479 480 public Decoder_DBCSONLY(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max) { 481 super(cs, 0.5f, 1.0f, b2c, b2cSB_UNMAPPABLE, b2Min, b2Max, false); 482 } 483 } 484 485 // EUC_SIMPLE 486 // The only thing we need to "override" is to check SS2/SS3 and 487 // return "malformed" if found 488 public static class Decoder_EUC_SIM extends Decoder { 489 private final int SS2 = 0x8E; 490 private final int SS3 = 0x8F; 491 492 public Decoder_EUC_SIM(Charset cs, 493 char[][] b2c, char[] b2cSB, int b2Min, int b2Max, 494 boolean isASCIICompatible) { 495 super(cs, b2c, b2cSB, b2Min, b2Max, isASCIICompatible); 496 } 497 498 // No support provided for G2/G3 for SimpleEUC 499 protected CoderResult crMalformedOrUnderFlow(int b) { 500 if (b == SS2 || b == SS3 ) 501 return CoderResult.malformedForLength(1); 502 return CoderResult.UNDERFLOW; 503 } 504 505 protected CoderResult crMalformedOrUnmappable(int b1, int b2) { 506 if (b1 == SS2 || b1 == SS3 ) 507 return CoderResult.malformedForLength(1); 508 return CoderResult.unmappableForLength(2); 509 } 510 511 @Override 512 public int decode(byte[] src, int sp, int len, char[] dst) { 513 int dp = 0; 514 int sl = sp + len; 515 char repl = replacement().charAt(0); 516 while (sp < sl) { 517 int b1 = src[sp++] & 0xff; 518 char c = b2cSB[b1]; 519 if (c == UNMAPPABLE_DECODING) { 520 if (sp < sl) { 521 int b2 = src[sp++] & 0xff; 522 if (b2 < b2Min || b2 > b2Max || 523 (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) { 524 if (b1 == SS2 || b1 == SS3) { 525 sp--; 526 } 527 c = repl; 528 } 529 } else { 530 c = repl; 531 } 532 } 533 dst[dp++] = c; 534 } 535 return dp; 536 } 537 } 538 539 public static class Encoder extends CharsetEncoder 540 implements ArrayEncoder 541 { 542 protected final int MAX_SINGLEBYTE = 0xff; 543 private final char[] c2b; 544 private final char[] c2bIndex; 545 protected Surrogate.Parser sgp; 546 final boolean isASCIICompatible; 547 548 public Encoder(Charset cs, char[] c2b, char[] c2bIndex) { 549 this(cs, c2b, c2bIndex, false); 550 } 551 552 public Encoder(Charset cs, char[] c2b, char[] c2bIndex, boolean isASCIICompatible) { 553 super(cs, 2.0f, 2.0f); 554 this.c2b = c2b; 555 this.c2bIndex = c2bIndex; 556 this.isASCIICompatible = isASCIICompatible; 557 } 558 559 public Encoder(Charset cs, float avg, float max, byte[] repl, char[] c2b, char[] c2bIndex, 560 boolean isASCIICompatible) { 561 super(cs, avg, max, repl); 562 this.c2b = c2b; 563 this.c2bIndex = c2bIndex; 564 this.isASCIICompatible = isASCIICompatible; 565 } 566 567 public boolean canEncode(char c) { 568 return encodeChar(c) != UNMAPPABLE_ENCODING; 569 } 570 571 protected Surrogate.Parser sgp() { 572 if (sgp == null) 573 sgp = new Surrogate.Parser(); 574 return sgp; 575 } 576 577 protected CoderResult encodeArrayLoop(CharBuffer src, ByteBuffer dst) { 578 char[] sa = src.array(); 579 int sp = src.arrayOffset() + src.position(); 580 int sl = src.arrayOffset() + src.limit(); 581 582 byte[] da = dst.array(); 583 int dp = dst.arrayOffset() + dst.position(); 584 int dl = dst.arrayOffset() + dst.limit(); 585 586 try { 587 while (sp < sl) { 588 char c = sa[sp]; 589 int bb = encodeChar(c); 590 if (bb == UNMAPPABLE_ENCODING) { 591 if (Character.isSurrogate(c)) { 592 if (sgp().parse(c, sa, sp, sl) < 0) 593 return sgp.error(); 594 return sgp.unmappableResult(); 595 } 596 return CoderResult.unmappableForLength(1); 597 } 598 599 if (bb > MAX_SINGLEBYTE) { // DoubleByte 600 if (dl - dp < 2) 601 return CoderResult.OVERFLOW; 602 da[dp++] = (byte)(bb >> 8); 603 da[dp++] = (byte)bb; 604 } else { // SingleByte 605 if (dl - dp < 1) 606 return CoderResult.OVERFLOW; 607 da[dp++] = (byte)bb; 608 } 609 610 sp++; 611 } 612 return CoderResult.UNDERFLOW; 613 } finally { 614 src.position(sp - src.arrayOffset()); 615 dst.position(dp - dst.arrayOffset()); 616 } 617 } 618 619 protected CoderResult encodeBufferLoop(CharBuffer src, ByteBuffer dst) { 620 int mark = src.position(); 621 try { 622 while (src.hasRemaining()) { 623 char c = src.get(); 624 int bb = encodeChar(c); 625 if (bb == UNMAPPABLE_ENCODING) { 626 if (Character.isSurrogate(c)) { 627 if (sgp().parse(c, src) < 0) 628 return sgp.error(); 629 return sgp.unmappableResult(); 630 } 631 return CoderResult.unmappableForLength(1); 632 } 633 if (bb > MAX_SINGLEBYTE) { // DoubleByte 634 if (dst.remaining() < 2) 635 return CoderResult.OVERFLOW; 636 dst.put((byte)(bb >> 8)); 637 dst.put((byte)(bb)); 638 } else { 639 if (dst.remaining() < 1) 640 return CoderResult.OVERFLOW; 641 dst.put((byte)bb); 642 } 643 mark++; 644 } 645 return CoderResult.UNDERFLOW; 646 } finally { 647 src.position(mark); 648 } 649 } 650 651 protected CoderResult encodeLoop(CharBuffer src, ByteBuffer dst) { 652 if (src.hasArray() && dst.hasArray()) 653 return encodeArrayLoop(src, dst); 654 else 655 return encodeBufferLoop(src, dst); 656 } 657 658 protected byte[] repl = replacement(); 659 protected void implReplaceWith(byte[] newReplacement) { 660 repl = newReplacement; 661 } 662 663 @Override 664 public int encode(char[] src, int sp, int len, byte[] dst) { 665 int dp = 0; 666 int sl = sp + len; 667 int dl = dst.length; 668 while (sp < sl) { 669 char c = src[sp++]; 670 int bb = encodeChar(c); 671 if (bb == UNMAPPABLE_ENCODING) { 672 if (Character.isHighSurrogate(c) && sp < sl && 673 Character.isLowSurrogate(src[sp])) { 674 sp++; 675 } 676 dst[dp++] = repl[0]; 677 if (repl.length > 1) 678 dst[dp++] = repl[1]; 679 continue; 680 } //else 681 if (bb > MAX_SINGLEBYTE) { // DoubleByte 682 dst[dp++] = (byte)(bb >> 8); 683 dst[dp++] = (byte)bb; 684 } else { // SingleByte 685 dst[dp++] = (byte)bb; 686 } 687 } 688 return dp; 689 } 690 691 @Override 692 public int encodeFromLatin1(byte[] src, int sp, int len, byte[] dst) { 693 int dp = 0; 694 int sl = sp + len; 695 while (sp < sl) { 696 char c = (char)(src[sp++] & 0xff); 697 int bb = encodeChar(c); 698 if (bb == UNMAPPABLE_ENCODING) { 699 // no surrogate pair in latin1 string 700 dst[dp++] = repl[0]; 701 if (repl.length > 1) { 702 dst[dp++] = repl[1]; 703 } 704 continue; 705 } //else 706 if (bb > MAX_SINGLEBYTE) { // DoubleByte 707 dst[dp++] = (byte)(bb >> 8); 708 dst[dp++] = (byte)bb; 709 } else { // SingleByte 710 dst[dp++] = (byte)bb; 711 } 712 713 } 714 return dp; 715 } 716 717 @Override 718 public int encodeFromUTF16(byte[] src, int sp, int len, byte[] dst) { 719 int dp = 0; 720 int sl = sp + len; 721 while (sp < sl) { 722 char c = StringUTF16.getChar(src, sp++); 723 int bb = encodeChar(c); 724 if (bb == UNMAPPABLE_ENCODING) { 725 if (Character.isHighSurrogate(c) && sp < sl && 726 Character.isLowSurrogate(StringUTF16.getChar(src, sp))) { 727 sp++; 728 } 729 dst[dp++] = repl[0]; 730 if (repl.length > 1) { 731 dst[dp++] = repl[1]; 732 } 733 continue; 734 } //else 735 if (bb > MAX_SINGLEBYTE) { // DoubleByte 736 dst[dp++] = (byte)(bb >> 8); 737 dst[dp++] = (byte)bb; 738 } else { // SingleByte 739 dst[dp++] = (byte)bb; 740 } 741 } 742 return dp; 743 } 744 745 @Override 746 public boolean isASCIICompatible() { 747 return isASCIICompatible; 748 } 749 750 public int encodeChar(char ch) { 751 return c2b[c2bIndex[ch >> 8] + (ch & 0xff)]; 752 } 753 754 // init the c2b and c2bIndex tables from b2c. 755 public static void initC2B(String[] b2c, String b2cSB, String b2cNR, String c2bNR, 756 int b2Min, int b2Max, 757 char[] c2b, char[] c2bIndex) 758 { 759 Arrays.fill(c2b, (char)UNMAPPABLE_ENCODING); 760 int off = 0x100; 761 762 char[][] b2c_ca = new char[b2c.length][]; 763 char[] b2cSB_ca = null; 764 if (b2cSB != null) 765 b2cSB_ca = b2cSB.toCharArray(); 766 767 for (int i = 0; i < b2c.length; i++) { 768 if (b2c[i] == null) 769 continue; 770 b2c_ca[i] = b2c[i].toCharArray(); 771 } 772 773 if (b2cNR != null) { 774 int j = 0; 775 while (j < b2cNR.length()) { 776 char b = b2cNR.charAt(j++); 777 char c = b2cNR.charAt(j++); 778 if (b < 0x100 && b2cSB_ca != null) { 779 if (b2cSB_ca[b] == c) 780 b2cSB_ca[b] = UNMAPPABLE_DECODING; 781 } else { 782 if (b2c_ca[b >> 8][(b & 0xff) - b2Min] == c) 783 b2c_ca[b >> 8][(b & 0xff) - b2Min] = UNMAPPABLE_DECODING; 784 } 785 } 786 } 787 788 if (b2cSB_ca != null) { // SingleByte 789 for (int b = 0; b < b2cSB_ca.length; b++) { 790 char c = b2cSB_ca[b]; 791 if (c == UNMAPPABLE_DECODING) 792 continue; 793 int index = c2bIndex[c >> 8]; 794 if (index == 0) { 795 index = off; 796 off += 0x100; 797 c2bIndex[c >> 8] = (char)index; 798 } 799 c2b[index + (c & 0xff)] = (char)b; 800 } 801 } 802 803 for (int b1 = 0; b1 < b2c.length; b1++) { // DoubleByte 804 char[] db = b2c_ca[b1]; 805 if (db == null) 806 continue; 807 for (int b2 = b2Min; b2 <= b2Max; b2++) { 808 char c = db[b2 - b2Min]; 809 if (c == UNMAPPABLE_DECODING) 810 continue; 811 int index = c2bIndex[c >> 8]; 812 if (index == 0) { 813 index = off; 814 off += 0x100; 815 c2bIndex[c >> 8] = (char)index; 816 } 817 c2b[index + (c & 0xff)] = (char)((b1 << 8) | b2); 818 } 819 } 820 821 if (c2bNR != null) { 822 // add c->b only nr entries 823 for (int i = 0; i < c2bNR.length(); i += 2) { 824 char b = c2bNR.charAt(i); 825 char c = c2bNR.charAt(i + 1); 826 int index = (c >> 8); 827 if (c2bIndex[index] == 0) { 828 c2bIndex[index] = (char)off; 829 off += 0x100; 830 } 831 index = c2bIndex[index] + (c & 0xff); 832 c2b[index] = b; 833 } 834 } 835 } 836 } 837 838 public static class Encoder_DBCSONLY extends Encoder { 839 840 public Encoder_DBCSONLY(Charset cs, byte[] repl, 841 char[] c2b, char[] c2bIndex, 842 boolean isASCIICompatible) { 843 super(cs, 2.0f, 2.0f, repl, c2b, c2bIndex, isASCIICompatible); 844 } 845 846 public int encodeChar(char ch) { 847 int bb = super.encodeChar(ch); 848 if (bb <= MAX_SINGLEBYTE) 849 return UNMAPPABLE_ENCODING; 850 return bb; 851 } 852 } 853 854 public static class Encoder_EBCDIC extends Encoder { 855 static final int SBCS = 0; 856 static final int DBCS = 1; 857 static final byte SO = 0x0e; 858 static final byte SI = 0x0f; 859 860 protected int currentState = SBCS; 861 862 public Encoder_EBCDIC(Charset cs, char[] c2b, char[] c2bIndex, 863 boolean isASCIICompatible) { 864 super(cs, 4.0f, 5.0f, new byte[] {(byte)0x6f}, c2b, c2bIndex, isASCIICompatible); 865 } 866 867 protected void implReset() { 868 currentState = SBCS; 869 } 870 871 protected CoderResult implFlush(ByteBuffer out) { 872 if (currentState == DBCS) { 873 if (out.remaining() < 1) 874 return CoderResult.OVERFLOW; 875 out.put(SI); 876 } 877 implReset(); 878 return CoderResult.UNDERFLOW; 879 } 880 881 protected CoderResult encodeArrayLoop(CharBuffer src, ByteBuffer dst) { 882 char[] sa = src.array(); 883 int sp = src.arrayOffset() + src.position(); 884 int sl = src.arrayOffset() + src.limit(); 885 byte[] da = dst.array(); 886 int dp = dst.arrayOffset() + dst.position(); 887 int dl = dst.arrayOffset() + dst.limit(); 888 889 try { 890 while (sp < sl) { 891 char c = sa[sp]; 892 int bb = encodeChar(c); 893 if (bb == UNMAPPABLE_ENCODING) { 894 if (Character.isSurrogate(c)) { 895 if (sgp().parse(c, sa, sp, sl) < 0) 896 return sgp.error(); 897 return sgp.unmappableResult(); 898 } 899 return CoderResult.unmappableForLength(1); 900 } 901 if (bb > MAX_SINGLEBYTE) { // DoubleByte 902 if (currentState == SBCS) { 903 if (dl - dp < 1) 904 return CoderResult.OVERFLOW; 905 currentState = DBCS; 906 da[dp++] = SO; 907 } 908 if (dl - dp < 2) 909 return CoderResult.OVERFLOW; 910 da[dp++] = (byte)(bb >> 8); 911 da[dp++] = (byte)bb; 912 } else { // SingleByte 913 if (currentState == DBCS) { 914 if (dl - dp < 1) 915 return CoderResult.OVERFLOW; 916 currentState = SBCS; 917 da[dp++] = SI; 918 } 919 if (dl - dp < 1) 920 return CoderResult.OVERFLOW; 921 da[dp++] = (byte)bb; 922 923 } 924 sp++; 925 } 926 return CoderResult.UNDERFLOW; 927 } finally { 928 src.position(sp - src.arrayOffset()); 929 dst.position(dp - dst.arrayOffset()); 930 } 931 } 932 933 protected CoderResult encodeBufferLoop(CharBuffer src, ByteBuffer dst) { 934 int mark = src.position(); 935 try { 936 while (src.hasRemaining()) { 937 char c = src.get(); 938 int bb = encodeChar(c); 939 if (bb == UNMAPPABLE_ENCODING) { 940 if (Character.isSurrogate(c)) { 941 if (sgp().parse(c, src) < 0) 942 return sgp.error(); 943 return sgp.unmappableResult(); 944 } 945 return CoderResult.unmappableForLength(1); 946 } 947 if (bb > MAX_SINGLEBYTE) { // DoubleByte 948 if (currentState == SBCS) { 949 if (dst.remaining() < 1) 950 return CoderResult.OVERFLOW; 951 currentState = DBCS; 952 dst.put(SO); 953 } 954 if (dst.remaining() < 2) 955 return CoderResult.OVERFLOW; 956 dst.put((byte)(bb >> 8)); 957 dst.put((byte)(bb)); 958 } else { // Single-byte 959 if (currentState == DBCS) { 960 if (dst.remaining() < 1) 961 return CoderResult.OVERFLOW; 962 currentState = SBCS; 963 dst.put(SI); 964 } 965 if (dst.remaining() < 1) 966 return CoderResult.OVERFLOW; 967 dst.put((byte)bb); 968 } 969 mark++; 970 } 971 return CoderResult.UNDERFLOW; 972 } finally { 973 src.position(mark); 974 } 975 } 976 977 @Override 978 public int encode(char[] src, int sp, int len, byte[] dst) { 979 int dp = 0; 980 int sl = sp + len; 981 while (sp < sl) { 982 char c = src[sp++]; 983 int bb = encodeChar(c); 984 985 if (bb == UNMAPPABLE_ENCODING) { 986 if (Character.isHighSurrogate(c) && sp < sl && 987 Character.isLowSurrogate(src[sp])) { 988 sp++; 989 } 990 dst[dp++] = repl[0]; 991 if (repl.length > 1) 992 dst[dp++] = repl[1]; 993 continue; 994 } //else 995 if (bb > MAX_SINGLEBYTE) { // DoubleByte 996 if (currentState == SBCS) { 997 currentState = DBCS; 998 dst[dp++] = SO; 999 } 1000 dst[dp++] = (byte)(bb >> 8); 1001 dst[dp++] = (byte)bb; 1002 } else { // SingleByte 1003 if (currentState == DBCS) { 1004 currentState = SBCS; 1005 dst[dp++] = SI; 1006 } 1007 dst[dp++] = (byte)bb; 1008 } 1009 } 1010 1011 if (currentState == DBCS) { 1012 currentState = SBCS; 1013 dst[dp++] = SI; 1014 } 1015 return dp; 1016 } 1017 1018 @Override 1019 public int encodeFromLatin1(byte[] src, int sp, int len, byte[] dst) { 1020 int dp = 0; 1021 int sl = sp + len; 1022 while (sp < sl) { 1023 char c = (char)(src[sp++] & 0xff); 1024 int bb = encodeChar(c); 1025 if (bb == UNMAPPABLE_ENCODING) { 1026 // no surrogate pair in latin1 string 1027 dst[dp++] = repl[0]; 1028 if (repl.length > 1) 1029 dst[dp++] = repl[1]; 1030 continue; 1031 } //else 1032 if (bb > MAX_SINGLEBYTE) { // DoubleByte 1033 if (currentState == SBCS) { 1034 currentState = DBCS; 1035 dst[dp++] = SO; 1036 } 1037 dst[dp++] = (byte)(bb >> 8); 1038 dst[dp++] = (byte)bb; 1039 } else { // SingleByte 1040 if (currentState == DBCS) { 1041 currentState = SBCS; 1042 dst[dp++] = SI; 1043 } 1044 dst[dp++] = (byte)bb; 1045 } 1046 } 1047 if (currentState == DBCS) { 1048 currentState = SBCS; 1049 dst[dp++] = SI; 1050 } 1051 return dp; 1052 } 1053 1054 @Override 1055 public int encodeFromUTF16(byte[] src, int sp, int len, byte[] dst) { 1056 int dp = 0; 1057 int sl = sp + len; 1058 while (sp < sl) { 1059 char c = StringUTF16.getChar(src, sp++); 1060 int bb = encodeChar(c); 1061 if (bb == UNMAPPABLE_ENCODING) { 1062 if (Character.isHighSurrogate(c) && sp < sl && 1063 Character.isLowSurrogate(StringUTF16.getChar(src, sp))) { 1064 sp++; 1065 } 1066 dst[dp++] = repl[0]; 1067 if (repl.length > 1) 1068 dst[dp++] = repl[1]; 1069 continue; 1070 } //else 1071 if (bb > MAX_SINGLEBYTE) { // DoubleByte 1072 if (currentState == SBCS) { 1073 currentState = DBCS; 1074 dst[dp++] = SO; 1075 } 1076 dst[dp++] = (byte)(bb >> 8); 1077 dst[dp++] = (byte)bb; 1078 } else { // SingleByte 1079 if (currentState == DBCS) { 1080 currentState = SBCS; 1081 dst[dp++] = SI; 1082 } 1083 dst[dp++] = (byte)bb; 1084 } 1085 } 1086 if (currentState == DBCS) { 1087 currentState = SBCS; 1088 dst[dp++] = SI; 1089 } 1090 return dp; 1091 } 1092 } 1093 1094 // EUC_SIMPLE 1095 public static class Encoder_EUC_SIM extends Encoder { 1096 public Encoder_EUC_SIM(Charset cs, char[] c2b, char[] c2bIndex, 1097 boolean isASCIICompatible) { 1098 super(cs, c2b, c2bIndex, isASCIICompatible); 1099 } 1100 } 1101 1102 }