1 /*
   2  * Copyright (c) 2009, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package sun.nio.cs;
  27 
  28 import java.nio.ByteBuffer;
  29 import java.nio.CharBuffer;
  30 import java.nio.charset.Charset;
  31 import java.nio.charset.CharsetDecoder;
  32 import java.nio.charset.CharsetEncoder;
  33 import java.nio.charset.CoderResult;
  34 import java.util.Arrays;
  35 import sun.nio.cs.Surrogate;
  36 import sun.nio.cs.ArrayDecoder;
  37 import sun.nio.cs.ArrayEncoder;
  38 import static sun.nio.cs.CharsetMapping.*;
  39 
  40 /*
  41  * Four types of "DoubleByte" charsets are implemented in this class
  42  * (1)DoubleByte
  43  *    The "mostly widely used" multibyte charset, a combination of
  44  *    a singlebyte character set (usually the ASCII charset) and a
  45  *    doublebyte character set. The codepoint values of singlebyte
  46  *    and doublebyte don't overlap. Microsoft's multibyte charsets
  47  *    and IBM's "DBCS_ASCII" charsets, such as IBM1381, 942, 943,
  48  *    948, 949 and 950 are such charsets.
  49  *
  50  * (2)DoubleByte_EBCDIC
  51  *    IBM EBCDIC Mix multibyte charset. Use SO and SI to shift (switch)
  52  *    in and out between the singlebyte character set and doublebyte
  53  *    character set.
  54  *
  55  * (3)DoubleByte_SIMPLE_EUC
  56  *    It's a "simple" form of EUC encoding scheme, only have the
  57  *    singlebyte character set G0 and one doublebyte character set
  58  *    G1 are defined, G2 (with SS2) and G3 (with SS3) are not used.
  59  *    So it is actually the same as the "typical" type (1) mentioned
  60  *    above, except it return "malformed" for the SS2 and SS3 when
  61  *    decoding.
  62  *
  63  * (4)DoubleByte ONLY
  64  *    A "pure" doublebyte only character set. From implementation
  65  *    point of view, this is the type (1) with "decodeSingle" always
  66  *    returns unmappable.
  67  *
  68  * For simplicity, all implementations share the same decoding and
  69  * encoding data structure.
  70  *
  71  * Decoding:
  72  *
  73  *    char[][] b2c;
  74  *    char[] b2cSB;
  75  *    int b2Min, b2Max
  76  *
  77  *    public char decodeSingle(int b) {
  78  *        return b2cSB.[b];
  79  *    }
  80  *
  81  *    public char decodeDouble(int b1, int b2) {
  82  *        if (b2 < b2Min || b2 > b2Max)
  83  *            return UNMAPPABLE_DECODING;
  84  *         return b2c[b1][b2 - b2Min];
  85  *    }
  86  *
  87  *    (1)b2Min, b2Max are the corresponding min and max value of the
  88  *       low-half of the double-byte.
  89  *    (2)The high 8-bit/b1 of the double-byte are used to indexed into
  90  *       b2c array.
  91  *
  92  * Encoding:
  93  *
  94  *    char[] c2b;
  95  *    char[] c2bIndex;
  96  *
  97  *    public int encodeChar(char ch) {
  98  *        return c2b[c2bIndex[ch >> 8] + (ch & 0xff)];
  99  *    }
 100  *
 101  */
 102 
 103 public class DoubleByte {
 104 
 105     public static final char[] B2C_UNMAPPABLE;
 106     static {
 107         B2C_UNMAPPABLE = new char[0x100];
 108         Arrays.fill(B2C_UNMAPPABLE, UNMAPPABLE_DECODING);
 109     }
 110 
 111     public static class Decoder extends CharsetDecoder
 112                                 implements DelegatableDecoder, ArrayDecoder
 113     {
 114         final char[][] b2c;
 115         final char[] b2cSB;
 116         final int b2Min;
 117         final int b2Max;
 118         final boolean isASCIICompatible;
 119 
 120         // for SimpleEUC override
 121         protected CoderResult crMalformedOrUnderFlow(int b) {
 122             return CoderResult.UNDERFLOW;
 123         }
 124 
 125         protected CoderResult crMalformedOrUnmappable(int b1, int b2) {
 126             if (b2c[b1] == B2C_UNMAPPABLE ||                // isNotLeadingByte(b1)
 127                 b2c[b2] != B2C_UNMAPPABLE ||                // isLeadingByte(b2)
 128                 decodeSingle(b2) != UNMAPPABLE_DECODING) {  // isSingle(b2)
 129                 return CoderResult.malformedForLength(1);
 130             }
 131             return CoderResult.unmappableForLength(2);
 132         }
 133 
 134         public Decoder(Charset cs, float avgcpb, float maxcpb,
 135                        char[][] b2c, char[] b2cSB,
 136                        int b2Min, int b2Max,
 137                        boolean isASCIICompatible) {
 138             super(cs, avgcpb, maxcpb);
 139             this.b2c = b2c;
 140             this.b2cSB = b2cSB;
 141             this.b2Min = b2Min;
 142             this.b2Max = b2Max;
 143             this.isASCIICompatible = isASCIICompatible;
 144         }
 145 
 146         public Decoder(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max,
 147                        boolean isASCIICompatible) {
 148             this(cs, 0.5f, 1.0f, b2c, b2cSB, b2Min, b2Max, isASCIICompatible);
 149         }
 150 
 151         public Decoder(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max) {
 152             this(cs, 0.5f, 1.0f, b2c, b2cSB, b2Min, b2Max, false);
 153         }
 154 
 155         protected CoderResult decodeArrayLoop(ByteBuffer src, CharBuffer dst) {
 156             byte[] sa = src.array();
 157             int sp = src.arrayOffset() + src.position();
 158             int sl = src.arrayOffset() + src.limit();
 159 
 160             char[] da = dst.array();
 161             int dp = dst.arrayOffset() + dst.position();
 162             int dl = dst.arrayOffset() + dst.limit();
 163 
 164             try {
 165                 while (sp < sl && dp < dl) {
 166                     // inline the decodeSingle/Double() for better performance
 167                     int inSize = 1;
 168                     int b1 = sa[sp] & 0xff;
 169                     char c = b2cSB[b1];
 170                     if (c == UNMAPPABLE_DECODING) {
 171                         if (sl - sp < 2)
 172                             return crMalformedOrUnderFlow(b1);
 173                         int b2 = sa[sp + 1] & 0xff;
 174                         if (b2 < b2Min || b2 > b2Max ||
 175                             (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) {
 176                             return crMalformedOrUnmappable(b1, b2);
 177                         }
 178                         inSize++;
 179                     }
 180                     da[dp++] = c;
 181                     sp += inSize;
 182                 }
 183                 return (sp >= sl) ? CoderResult.UNDERFLOW
 184                                   : CoderResult.OVERFLOW;
 185             } finally {
 186                 src.position(sp - src.arrayOffset());
 187                 dst.position(dp - dst.arrayOffset());
 188             }
 189         }
 190 
 191         protected CoderResult decodeBufferLoop(ByteBuffer src, CharBuffer dst) {
 192             int mark = src.position();
 193             try {
 194 
 195                 while (src.hasRemaining() && dst.hasRemaining()) {
 196                     int b1 = src.get() & 0xff;
 197                     char c = b2cSB[b1];
 198                     int inSize = 1;
 199                     if (c == UNMAPPABLE_DECODING) {
 200                         if (src.remaining() < 1)
 201                             return crMalformedOrUnderFlow(b1);
 202                         int b2 = src.get() & 0xff;
 203                         if (b2 < b2Min || b2 > b2Max ||
 204                             (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING)
 205                             return crMalformedOrUnmappable(b1, b2);
 206                         inSize++;
 207                     }
 208                     dst.put(c);
 209                     mark += inSize;
 210                 }
 211                 return src.hasRemaining()? CoderResult.OVERFLOW
 212                                          : CoderResult.UNDERFLOW;
 213             } finally {
 214                 src.position(mark);
 215             }
 216         }
 217 
 218         // Make some protected methods public for use by JISAutoDetect
 219         public CoderResult decodeLoop(ByteBuffer src, CharBuffer dst) {
 220             if (src.hasArray() && dst.hasArray())
 221                 return decodeArrayLoop(src, dst);
 222             else
 223                 return decodeBufferLoop(src, dst);
 224         }
 225 
 226         @Override
 227         public int decode(byte[] src, int sp, int len, char[] dst) {
 228             int dp = 0;
 229             int sl = sp + len;
 230             char repl = replacement().charAt(0);
 231             while (sp < sl) {
 232                 int b1 = src[sp++] & 0xff;
 233                 char c = b2cSB[b1];
 234                 if (c == UNMAPPABLE_DECODING) {
 235                     if (sp < sl) {
 236                         int b2 = src[sp++] & 0xff;
 237                         if (b2 < b2Min || b2 > b2Max ||
 238                             (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) {
 239                             if (crMalformedOrUnmappable(b1, b2).length() == 1) {
 240                                 sp--;
 241                             }
 242                         }
 243                     }
 244                     if (c == UNMAPPABLE_DECODING) {
 245                          c = repl;
 246                     }
 247                 }
 248                 dst[dp++] = c;
 249             }
 250             return dp;
 251         }
 252 
 253         @Override
 254         public boolean isASCIICompatible() {
 255             return isASCIICompatible;
 256         }
 257 
 258         public void implReset() {
 259             super.implReset();
 260         }
 261 
 262         public CoderResult implFlush(CharBuffer out) {
 263             return super.implFlush(out);
 264         }
 265 
 266         // decode loops are not using decodeSingle/Double() for performance
 267         // reason.
 268         public char decodeSingle(int b) {
 269             return b2cSB[b];
 270         }
 271 
 272         public char decodeDouble(int b1, int b2) {
 273             if (b1 < 0 || b1 > b2c.length ||
 274                 b2 < b2Min || b2 > b2Max)
 275                 return UNMAPPABLE_DECODING;
 276             return  b2c[b1][b2 - b2Min];
 277         }
 278     }
 279 
 280     // IBM_EBCDIC_DBCS
 281     public static class Decoder_EBCDIC extends Decoder {
 282         private static final int SBCS = 0;
 283         private static final int DBCS = 1;
 284         private static final int SO = 0x0e;
 285         private static final int SI = 0x0f;
 286         private int  currentState;
 287 
 288         public Decoder_EBCDIC(Charset cs,
 289                               char[][] b2c, char[] b2cSB, int b2Min, int b2Max,
 290                               boolean isASCIICompatible) {
 291             super(cs, b2c, b2cSB, b2Min, b2Max, isASCIICompatible);
 292         }
 293 
 294         public Decoder_EBCDIC(Charset cs,
 295                               char[][] b2c, char[] b2cSB, int b2Min, int b2Max) {
 296             super(cs, b2c, b2cSB, b2Min, b2Max, false);
 297         }
 298 
 299         public void implReset() {
 300             currentState = SBCS;
 301         }
 302 
 303         // Check validity of dbcs ebcdic byte pair values
 304         //
 305         // First byte : 0x41 -- 0xFE
 306         // Second byte: 0x41 -- 0xFE
 307         // Doublebyte blank: 0x4040
 308         //
 309         // The validation implementation in "old" DBCS_IBM_EBCDIC and sun.io
 310         // as
 311         //            if ((b1 != 0x40 || b2 != 0x40) &&
 312         //                (b2 < 0x41 || b2 > 0xfe)) {...}
 313         // is not correct/complete (range check for b1)
 314         //
 315         private static boolean isDoubleByte(int b1, int b2) {
 316             return (0x41 <= b1 && b1 <= 0xfe && 0x41 <= b2 && b2 <= 0xfe)
 317                    || (b1 == 0x40 && b2 == 0x40); // DBCS-HOST SPACE
 318         }
 319 
 320         protected CoderResult decodeArrayLoop(ByteBuffer src, CharBuffer dst) {
 321             byte[] sa = src.array();
 322             int sp = src.arrayOffset() + src.position();
 323             int sl = src.arrayOffset() + src.limit();
 324             char[] da = dst.array();
 325             int dp = dst.arrayOffset() + dst.position();
 326             int dl = dst.arrayOffset() + dst.limit();
 327 
 328             try {
 329                 // don't check dp/dl together here, it's possible to
 330                 // decdoe a SO/SI without space in output buffer.
 331                 while (sp < sl) {
 332                     int b1 = sa[sp] & 0xff;
 333                     int inSize = 1;
 334                     if (b1 == SO) {  // Shift out
 335                         if (currentState != SBCS)
 336                             return CoderResult.malformedForLength(1);
 337                         else
 338                             currentState = DBCS;
 339                     } else if (b1 == SI) {
 340                         if (currentState != DBCS)
 341                             return CoderResult.malformedForLength(1);
 342                         else
 343                             currentState = SBCS;
 344                     } else {
 345                         char c =  UNMAPPABLE_DECODING;
 346                         if (currentState == SBCS) {
 347                             c = b2cSB[b1];
 348                             if (c == UNMAPPABLE_DECODING)
 349                                 return CoderResult.unmappableForLength(1);
 350                         } else {
 351                             if (sl - sp < 2)
 352                                 return CoderResult.UNDERFLOW;
 353                             int b2 = sa[sp + 1] & 0xff;
 354                             if (b2 < b2Min || b2 > b2Max ||
 355                                 (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) {
 356                                 if (!isDoubleByte(b1, b2))
 357                                     return CoderResult.malformedForLength(2);
 358                                 return CoderResult.unmappableForLength(2);
 359                             }
 360                             inSize++;
 361                         }
 362                         if (dl - dp < 1)
 363                             return CoderResult.OVERFLOW;
 364 
 365                         da[dp++] = c;
 366                     }
 367                     sp += inSize;
 368                 }
 369                 return CoderResult.UNDERFLOW;
 370             } finally {
 371                 src.position(sp - src.arrayOffset());
 372                 dst.position(dp - dst.arrayOffset());
 373             }
 374         }
 375 
 376         protected CoderResult decodeBufferLoop(ByteBuffer src, CharBuffer dst) {
 377             int mark = src.position();
 378             try {
 379                 while (src.hasRemaining()) {
 380                     int b1 = src.get() & 0xff;
 381                     int inSize = 1;
 382                     if (b1 == SO) {  // Shift out
 383                         if (currentState != SBCS)
 384                             return CoderResult.malformedForLength(1);
 385                         else
 386                             currentState = DBCS;
 387                     } else if (b1 == SI) {
 388                         if (currentState != DBCS)
 389                             return CoderResult.malformedForLength(1);
 390                         else
 391                             currentState = SBCS;
 392                     } else {
 393                         char c = UNMAPPABLE_DECODING;
 394                         if (currentState == SBCS) {
 395                             c = b2cSB[b1];
 396                             if (c == UNMAPPABLE_DECODING)
 397                                 return CoderResult.unmappableForLength(1);
 398                         } else {
 399                             if (src.remaining() < 1)
 400                                 return CoderResult.UNDERFLOW;
 401                             int b2 = src.get()&0xff;
 402                             if (b2 < b2Min || b2 > b2Max ||
 403                                 (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) {
 404                                 if (!isDoubleByte(b1, b2))
 405                                     return CoderResult.malformedForLength(2);
 406                                 return CoderResult.unmappableForLength(2);
 407                             }
 408                             inSize++;
 409                         }
 410 
 411                         if (dst.remaining() < 1)
 412                             return CoderResult.OVERFLOW;
 413 
 414                         dst.put(c);
 415                     }
 416                     mark += inSize;
 417                 }
 418                 return CoderResult.UNDERFLOW;
 419             } finally {
 420                 src.position(mark);
 421             }
 422         }
 423 
 424         @Override
 425         public int decode(byte[] src, int sp, int len, char[] dst) {
 426             int dp = 0;
 427             int sl = sp + len;
 428             currentState = SBCS;
 429             char repl = replacement().charAt(0);
 430             while (sp < sl) {
 431                 int b1 = src[sp++] & 0xff;
 432                 if (b1 == SO) {  // Shift out
 433                     if (currentState != SBCS)
 434                         dst[dp++] = repl;
 435                     else
 436                         currentState = DBCS;
 437                 } else if (b1 == SI) {
 438                     if (currentState != DBCS)
 439                         dst[dp++] = repl;
 440                     else
 441                         currentState = SBCS;
 442                 } else {
 443                     char c =  UNMAPPABLE_DECODING;
 444                     if (currentState == SBCS) {
 445                         c = b2cSB[b1];
 446                         if (c == UNMAPPABLE_DECODING)
 447                             c = repl;
 448                     } else {
 449                         if (sl == sp) {
 450                             c = repl;
 451                         } else {
 452                             int b2 = src[sp++] & 0xff;
 453                             if (b2 < b2Min || b2 > b2Max ||
 454                                 (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) {
 455                                 c = repl;
 456                             }
 457                         }
 458                     }
 459                     dst[dp++] = c;
 460                 }
 461             }
 462             return dp;
 463         }
 464     }
 465 
 466     // DBCS_ONLY
 467     public static class Decoder_DBCSONLY extends Decoder {
 468         static final char[] b2cSB_UNMAPPABLE;
 469         static {
 470             b2cSB_UNMAPPABLE = new char[0x100];
 471             Arrays.fill(b2cSB_UNMAPPABLE, UNMAPPABLE_DECODING);
 472         }
 473 
 474         // always returns unmappableForLenth(2) for doublebyte_only
 475         @Override
 476         protected CoderResult crMalformedOrUnmappable(int b1, int b2) {
 477             return CoderResult.unmappableForLength(2);
 478         }
 479 
 480         public Decoder_DBCSONLY(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max,
 481                                 boolean isASCIICompatible) {
 482             super(cs, 0.5f, 1.0f, b2c, b2cSB_UNMAPPABLE, b2Min, b2Max, isASCIICompatible);
 483         }
 484 
 485         public Decoder_DBCSONLY(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max) {
 486             super(cs, 0.5f, 1.0f, b2c, b2cSB_UNMAPPABLE, b2Min, b2Max, false);
 487         }
 488     }
 489 
 490     // EUC_SIMPLE
 491     // The only thing we need to "override" is to check SS2/SS3 and
 492     // return "malformed" if found
 493     public static class Decoder_EUC_SIM extends Decoder {
 494         private final int SS2 =  0x8E;
 495         private final int SS3 =  0x8F;
 496 
 497         public Decoder_EUC_SIM(Charset cs,
 498                                char[][] b2c, char[] b2cSB, int b2Min, int b2Max,
 499                                boolean isASCIICompatible) {
 500             super(cs, b2c, b2cSB, b2Min, b2Max, isASCIICompatible);
 501         }
 502 
 503         // No support provided for G2/G3 for SimpleEUC
 504         protected CoderResult crMalformedOrUnderFlow(int b) {
 505             if (b == SS2 || b == SS3 )
 506                 return CoderResult.malformedForLength(1);
 507             return CoderResult.UNDERFLOW;
 508         }
 509 
 510         protected CoderResult crMalformedOrUnmappable(int b1, int b2) {
 511             if (b1 == SS2 || b1 == SS3 )
 512                 return CoderResult.malformedForLength(1);
 513             return CoderResult.unmappableForLength(2);
 514         }
 515 
 516         @Override
 517         public int decode(byte[] src, int sp, int len, char[] dst) {
 518             int dp = 0;
 519             int sl = sp + len;
 520             char repl = replacement().charAt(0);
 521             while (sp < sl) {
 522                 int b1 = src[sp++] & 0xff;
 523                 char c = b2cSB[b1];
 524                 if (c == UNMAPPABLE_DECODING) {
 525                     if (sp < sl) {
 526                         int b2 = src[sp++] & 0xff;
 527                         if (b2 < b2Min || b2 > b2Max ||
 528                             (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) {
 529                             if (b1 == SS2 || b1 == SS3) {
 530                                 sp--;
 531                             }
 532                             c = repl;
 533                         }
 534                     } else {
 535                         c = repl;
 536                     }
 537                 }
 538                 dst[dp++] = c;
 539             }
 540             return dp;
 541         }
 542     }
 543 
 544     public static class Encoder extends CharsetEncoder
 545                                 implements ArrayEncoder
 546     {
 547         protected final int MAX_SINGLEBYTE = 0xff;
 548         private final char[] c2b;
 549         private final char[] c2bIndex;
 550         protected Surrogate.Parser sgp;
 551         final boolean isASCIICompatible;
 552 
 553         public Encoder(Charset cs, char[] c2b, char[] c2bIndex) {
 554             this(cs, c2b, c2bIndex, false);
 555         }
 556 
 557         public Encoder(Charset cs, char[] c2b, char[] c2bIndex, boolean isASCIICompatible) {
 558             super(cs, 2.0f, 2.0f);
 559             this.c2b = c2b;
 560             this.c2bIndex = c2bIndex;
 561             this.isASCIICompatible = isASCIICompatible;
 562         }
 563 
 564         public Encoder(Charset cs, float avg, float max, byte[] repl, char[] c2b, char[] c2bIndex,
 565                        boolean isASCIICompatible) {
 566             super(cs, avg, max, repl);
 567             this.c2b = c2b;
 568             this.c2bIndex = c2bIndex;
 569             this.isASCIICompatible = isASCIICompatible;
 570         }
 571 
 572         public boolean canEncode(char c) {
 573             return encodeChar(c) != UNMAPPABLE_ENCODING;
 574         }
 575 
 576         protected Surrogate.Parser sgp() {
 577             if (sgp == null)
 578                 sgp = new Surrogate.Parser();
 579             return sgp;
 580         }
 581 
 582         protected CoderResult encodeArrayLoop(CharBuffer src, ByteBuffer dst) {
 583             char[] sa = src.array();
 584             int sp = src.arrayOffset() + src.position();
 585             int sl = src.arrayOffset() + src.limit();
 586 
 587             byte[] da = dst.array();
 588             int dp = dst.arrayOffset() + dst.position();
 589             int dl = dst.arrayOffset() + dst.limit();
 590 
 591             try {
 592                 while (sp < sl) {
 593                     char c = sa[sp];
 594                     int bb = encodeChar(c);
 595                     if (bb == UNMAPPABLE_ENCODING) {
 596                         if (Character.isSurrogate(c)) {
 597                             if (sgp().parse(c, sa, sp, sl) < 0)
 598                                 return sgp.error();
 599                             return sgp.unmappableResult();
 600                         }
 601                         return CoderResult.unmappableForLength(1);
 602                     }
 603 
 604                     if (bb > MAX_SINGLEBYTE) {    // DoubleByte
 605                         if (dl - dp < 2)
 606                             return CoderResult.OVERFLOW;
 607                         da[dp++] = (byte)(bb >> 8);
 608                         da[dp++] = (byte)bb;
 609                     } else {                      // SingleByte
 610                         if (dl - dp < 1)
 611                             return CoderResult.OVERFLOW;
 612                         da[dp++] = (byte)bb;
 613                     }
 614 
 615                     sp++;
 616                 }
 617                 return CoderResult.UNDERFLOW;
 618             } finally {
 619                 src.position(sp - src.arrayOffset());
 620                 dst.position(dp - dst.arrayOffset());
 621             }
 622         }
 623 
 624         protected CoderResult encodeBufferLoop(CharBuffer src, ByteBuffer dst) {
 625             int mark = src.position();
 626             try {
 627                 while (src.hasRemaining()) {
 628                     char c = src.get();
 629                     int bb = encodeChar(c);
 630                     if (bb == UNMAPPABLE_ENCODING) {
 631                         if (Character.isSurrogate(c)) {
 632                             if (sgp().parse(c, src) < 0)
 633                                 return sgp.error();
 634                             return sgp.unmappableResult();
 635                         }
 636                         return CoderResult.unmappableForLength(1);
 637                     }
 638                     if (bb > MAX_SINGLEBYTE) {  // DoubleByte
 639                         if (dst.remaining() < 2)
 640                             return CoderResult.OVERFLOW;
 641                         dst.put((byte)(bb >> 8));
 642                         dst.put((byte)(bb));
 643                     } else {
 644                         if (dst.remaining() < 1)
 645                         return CoderResult.OVERFLOW;
 646                         dst.put((byte)bb);
 647                     }
 648                     mark++;
 649                 }
 650                 return CoderResult.UNDERFLOW;
 651             } finally {
 652                 src.position(mark);
 653             }
 654         }
 655 
 656         protected CoderResult encodeLoop(CharBuffer src, ByteBuffer dst) {
 657             if (src.hasArray() && dst.hasArray())
 658                 return encodeArrayLoop(src, dst);
 659             else
 660                 return encodeBufferLoop(src, dst);
 661         }
 662 
 663         protected byte[] repl = replacement();
 664         protected void implReplaceWith(byte[] newReplacement) {
 665             repl = newReplacement;
 666         }
 667 
 668         @Override
 669         public int encode(char[] src, int sp, int len, byte[] dst) {
 670             int dp = 0;
 671             int sl = sp + len;
 672             int dl = dst.length;
 673             while (sp < sl) {
 674                 char c = src[sp++];
 675                 int bb = encodeChar(c);
 676                 if (bb == UNMAPPABLE_ENCODING) {
 677                     if (Character.isHighSurrogate(c) && sp < sl &&
 678                         Character.isLowSurrogate(src[sp])) {
 679                         sp++;
 680                     }
 681                     dst[dp++] = repl[0];
 682                     if (repl.length > 1)
 683                         dst[dp++] = repl[1];
 684                     continue;
 685                 } //else
 686                 if (bb > MAX_SINGLEBYTE) { // DoubleByte
 687                     dst[dp++] = (byte)(bb >> 8);
 688                     dst[dp++] = (byte)bb;
 689                 } else {                          // SingleByte
 690                     dst[dp++] = (byte)bb;
 691                 }
 692             }
 693             return dp;
 694         }
 695 
 696         @Override
 697         public int encodeFromLatin1(byte[] src, int sp, int len, byte[] dst) {
 698             int dp = 0;
 699             int sl = sp + len;
 700             while (sp < sl) {
 701                 char c = (char)(src[sp++] & 0xff);
 702                 int bb = encodeChar(c);
 703                 if (bb == UNMAPPABLE_ENCODING) {
 704                     // no surrogate pair in latin1 string
 705                     dst[dp++] = repl[0];
 706                     if (repl.length > 1) {
 707                         dst[dp++] = repl[1];
 708                     }
 709                     continue;
 710                 } //else
 711                 if (bb > MAX_SINGLEBYTE) { // DoubleByte
 712                     dst[dp++] = (byte)(bb >> 8);
 713                     dst[dp++] = (byte)bb;
 714                 } else {                   // SingleByte
 715                     dst[dp++] = (byte)bb;
 716                 }
 717 
 718             }
 719             return dp;
 720         }
 721 
 722         @Override
 723         public int encodeFromUTF16(byte[] src, int sp, int len, byte[] dst) {
 724             int dp = 0;
 725             int sl = sp + len;
 726             while (sp < sl) {
 727                 char c = StringUTF16.getChar(src, sp++);
 728                 int bb = encodeChar(c);
 729                 if (bb == UNMAPPABLE_ENCODING) {
 730                     if (Character.isHighSurrogate(c) && sp < sl &&
 731                         Character.isLowSurrogate(StringUTF16.getChar(src, sp))) {
 732                         sp++;
 733                     }
 734                     dst[dp++] = repl[0];
 735                     if (repl.length > 1) {
 736                         dst[dp++] = repl[1];
 737                     }
 738                     continue;
 739                 } //else
 740                 if (bb > MAX_SINGLEBYTE) { // DoubleByte
 741                     dst[dp++] = (byte)(bb >> 8);
 742                     dst[dp++] = (byte)bb;
 743                 } else {                   // SingleByte
 744                     dst[dp++] = (byte)bb;
 745                 }
 746             }
 747             return dp;
 748         }
 749 
 750         @Override
 751         public boolean isASCIICompatible() {
 752             return isASCIICompatible;
 753         }
 754 
 755         public int encodeChar(char ch) {
 756             return c2b[c2bIndex[ch >> 8] + (ch & 0xff)];
 757         }
 758 
 759         // init the c2b and c2bIndex tables from b2c.
 760         public static void initC2B(String[] b2c, String b2cSB, String b2cNR,  String c2bNR,
 761                             int b2Min, int b2Max,
 762                             char[] c2b, char[] c2bIndex)
 763         {
 764             Arrays.fill(c2b, (char)UNMAPPABLE_ENCODING);
 765             int off = 0x100;
 766 
 767             char[][] b2c_ca = new char[b2c.length][];
 768             char[] b2cSB_ca = null;
 769             if (b2cSB != null)
 770                 b2cSB_ca = b2cSB.toCharArray();
 771 
 772             for (int i = 0; i < b2c.length; i++) {
 773                 if (b2c[i] == null)
 774                     continue;
 775                 b2c_ca[i] = b2c[i].toCharArray();
 776             }
 777 
 778             if (b2cNR != null) {
 779                 int j = 0;
 780                 while (j < b2cNR.length()) {
 781                     char b  = b2cNR.charAt(j++);
 782                     char c  = b2cNR.charAt(j++);
 783                     if (b < 0x100 && b2cSB_ca != null) {
 784                         if (b2cSB_ca[b] == c)
 785                             b2cSB_ca[b] = UNMAPPABLE_DECODING;
 786                     } else {
 787                         if (b2c_ca[b >> 8][(b & 0xff) - b2Min] == c)
 788                             b2c_ca[b >> 8][(b & 0xff) - b2Min] = UNMAPPABLE_DECODING;
 789                     }
 790                 }
 791             }
 792 
 793             if (b2cSB_ca != null) {      // SingleByte
 794                 for (int b = 0; b < b2cSB_ca.length; b++) {
 795                     char c = b2cSB_ca[b];
 796                     if (c == UNMAPPABLE_DECODING)
 797                         continue;
 798                     int index = c2bIndex[c >> 8];
 799                     if (index == 0) {
 800                         index = off;
 801                         off += 0x100;
 802                         c2bIndex[c >> 8] = (char)index;
 803                     }
 804                     c2b[index + (c & 0xff)] = (char)b;
 805                 }
 806             }
 807 
 808             for (int b1 = 0; b1 < b2c.length; b1++) {  // DoubleByte
 809                 char[] db = b2c_ca[b1];
 810                 if (db == null)
 811                     continue;
 812                 for (int b2 = b2Min; b2 <= b2Max; b2++) {
 813                     char c = db[b2 - b2Min];
 814                     if (c == UNMAPPABLE_DECODING)
 815                         continue;
 816                     int index = c2bIndex[c >> 8];
 817                     if (index == 0) {
 818                         index = off;
 819                         off += 0x100;
 820                         c2bIndex[c >> 8] = (char)index;
 821                     }
 822                     c2b[index + (c & 0xff)] = (char)((b1 << 8) | b2);
 823                 }
 824             }
 825 
 826             if (c2bNR != null) {
 827                 // add c->b only nr entries
 828                 for (int i = 0; i < c2bNR.length(); i += 2) {
 829                     char b = c2bNR.charAt(i);
 830                     char c = c2bNR.charAt(i + 1);
 831                     int index = (c >> 8);
 832                     if (c2bIndex[index] == 0) {
 833                         c2bIndex[index] = (char)off;
 834                         off += 0x100;
 835                     }
 836                     index = c2bIndex[index] + (c & 0xff);
 837                     c2b[index] = b;
 838                 }
 839             }
 840         }
 841     }
 842 
 843     public static class Encoder_DBCSONLY extends Encoder {
 844 
 845         public Encoder_DBCSONLY(Charset cs, byte[] repl,
 846                                 char[] c2b, char[] c2bIndex,
 847                                 boolean isASCIICompatible) {
 848             super(cs, 2.0f, 2.0f, repl, c2b, c2bIndex, isASCIICompatible);
 849         }
 850 
 851         public int encodeChar(char ch) {
 852             int bb = super.encodeChar(ch);
 853             if (bb <= MAX_SINGLEBYTE)
 854                 return UNMAPPABLE_ENCODING;
 855             return bb;
 856         }
 857     }
 858 
 859     public static class Encoder_EBCDIC extends Encoder {
 860         static final int SBCS = 0;
 861         static final int DBCS = 1;
 862         static final byte SO = 0x0e;
 863         static final byte SI = 0x0f;
 864 
 865         protected int  currentState = SBCS;
 866 
 867         public Encoder_EBCDIC(Charset cs, char[] c2b, char[] c2bIndex,
 868                               boolean isASCIICompatible) {
 869             super(cs, 4.0f, 5.0f, new byte[] {(byte)0x6f}, c2b, c2bIndex, isASCIICompatible);
 870         }
 871 
 872         protected void implReset() {
 873             currentState = SBCS;
 874         }
 875 
 876         protected CoderResult implFlush(ByteBuffer out) {
 877             if (currentState == DBCS) {
 878                 if (out.remaining() < 1)
 879                     return CoderResult.OVERFLOW;
 880                 out.put(SI);
 881             }
 882             implReset();
 883             return CoderResult.UNDERFLOW;
 884         }
 885 
 886         protected CoderResult encodeArrayLoop(CharBuffer src, ByteBuffer dst) {
 887             char[] sa = src.array();
 888             int sp = src.arrayOffset() + src.position();
 889             int sl = src.arrayOffset() + src.limit();
 890             byte[] da = dst.array();
 891             int dp = dst.arrayOffset() + dst.position();
 892             int dl = dst.arrayOffset() + dst.limit();
 893 
 894             try {
 895                 while (sp < sl) {
 896                     char c = sa[sp];
 897                     int bb = encodeChar(c);
 898                     if (bb == UNMAPPABLE_ENCODING) {
 899                         if (Character.isSurrogate(c)) {
 900                             if (sgp().parse(c, sa, sp, sl) < 0)
 901                                 return sgp.error();
 902                             return sgp.unmappableResult();
 903                         }
 904                         return CoderResult.unmappableForLength(1);
 905                     }
 906                     if (bb > MAX_SINGLEBYTE) {  // DoubleByte
 907                         if (currentState == SBCS) {
 908                             if (dl - dp < 1)
 909                                 return CoderResult.OVERFLOW;
 910                             currentState = DBCS;
 911                             da[dp++] = SO;
 912                         }
 913                         if (dl - dp < 2)
 914                             return CoderResult.OVERFLOW;
 915                         da[dp++] = (byte)(bb >> 8);
 916                         da[dp++] = (byte)bb;
 917                     } else {                    // SingleByte
 918                         if (currentState == DBCS) {
 919                             if (dl - dp < 1)
 920                                 return CoderResult.OVERFLOW;
 921                             currentState = SBCS;
 922                             da[dp++] = SI;
 923                         }
 924                         if (dl - dp < 1)
 925                             return CoderResult.OVERFLOW;
 926                         da[dp++] = (byte)bb;
 927 
 928                     }
 929                     sp++;
 930                 }
 931                 return CoderResult.UNDERFLOW;
 932             } finally {
 933                 src.position(sp - src.arrayOffset());
 934                 dst.position(dp - dst.arrayOffset());
 935             }
 936         }
 937 
 938         protected CoderResult encodeBufferLoop(CharBuffer src, ByteBuffer dst) {
 939             int mark = src.position();
 940             try {
 941                 while (src.hasRemaining()) {
 942                     char c = src.get();
 943                     int bb = encodeChar(c);
 944                     if (bb == UNMAPPABLE_ENCODING) {
 945                         if (Character.isSurrogate(c)) {
 946                             if (sgp().parse(c, src) < 0)
 947                                 return sgp.error();
 948                             return sgp.unmappableResult();
 949                         }
 950                         return CoderResult.unmappableForLength(1);
 951                     }
 952                     if (bb > MAX_SINGLEBYTE) {  // DoubleByte
 953                         if (currentState == SBCS) {
 954                             if (dst.remaining() < 1)
 955                                 return CoderResult.OVERFLOW;
 956                             currentState = DBCS;
 957                             dst.put(SO);
 958                         }
 959                         if (dst.remaining() < 2)
 960                             return CoderResult.OVERFLOW;
 961                         dst.put((byte)(bb >> 8));
 962                         dst.put((byte)(bb));
 963                     } else {                  // Single-byte
 964                         if (currentState == DBCS) {
 965                             if (dst.remaining() < 1)
 966                                 return CoderResult.OVERFLOW;
 967                             currentState = SBCS;
 968                             dst.put(SI);
 969                         }
 970                         if (dst.remaining() < 1)
 971                             return CoderResult.OVERFLOW;
 972                         dst.put((byte)bb);
 973                     }
 974                     mark++;
 975                 }
 976                 return CoderResult.UNDERFLOW;
 977             } finally {
 978                 src.position(mark);
 979             }
 980         }
 981 
 982         @Override
 983         public int encode(char[] src, int sp, int len, byte[] dst) {
 984             int dp = 0;
 985             int sl = sp + len;
 986             while (sp < sl) {
 987                 char c = src[sp++];
 988                 int bb = encodeChar(c);
 989 
 990                 if (bb == UNMAPPABLE_ENCODING) {
 991                     if (Character.isHighSurrogate(c) && sp < sl &&
 992                         Character.isLowSurrogate(src[sp])) {
 993                         sp++;
 994                     }
 995                     dst[dp++] = repl[0];
 996                     if (repl.length > 1)
 997                         dst[dp++] = repl[1];
 998                     continue;
 999                 } //else
1000                 if (bb > MAX_SINGLEBYTE) {           // DoubleByte
1001                     if (currentState == SBCS) {
1002                         currentState = DBCS;
1003                         dst[dp++] = SO;
1004                     }
1005                     dst[dp++] = (byte)(bb >> 8);
1006                     dst[dp++] = (byte)bb;
1007                 } else {                             // SingleByte
1008                     if (currentState == DBCS) {
1009                          currentState = SBCS;
1010                          dst[dp++] = SI;
1011                     }
1012                     dst[dp++] = (byte)bb;
1013                 }
1014             }
1015 
1016             if (currentState == DBCS) {
1017                  currentState = SBCS;
1018                  dst[dp++] = SI;
1019             }
1020             return dp;
1021         }
1022 
1023         @Override
1024         public int encodeFromLatin1(byte[] src, int sp, int len, byte[] dst) {
1025             int dp = 0;
1026             int sl = sp + len;
1027             while (sp < sl) {
1028                 char c = (char)(src[sp++] & 0xff);
1029                 int bb = encodeChar(c);
1030                 if (bb == UNMAPPABLE_ENCODING) {
1031                     // no surrogate pair in latin1 string
1032                     dst[dp++] = repl[0];
1033                     if (repl.length > 1)
1034                         dst[dp++] = repl[1];
1035                     continue;
1036                 } //else
1037                 if (bb > MAX_SINGLEBYTE) {           // DoubleByte
1038                     if (currentState == SBCS) {
1039                         currentState = DBCS;
1040                         dst[dp++] = SO;
1041                     }
1042                     dst[dp++] = (byte)(bb >> 8);
1043                     dst[dp++] = (byte)bb;
1044                 } else {                             // SingleByte
1045                     if (currentState == DBCS) {
1046                          currentState = SBCS;
1047                          dst[dp++] = SI;
1048                     }
1049                     dst[dp++] = (byte)bb;
1050                 }
1051             }
1052             if (currentState == DBCS) {
1053                  currentState = SBCS;
1054                  dst[dp++] = SI;
1055             }
1056             return dp;
1057         }
1058 
1059         @Override
1060         public int encodeFromUTF16(byte[] src, int sp, int len, byte[] dst) {
1061             int dp = 0;
1062             int sl = sp + len;
1063             while (sp < sl) {
1064                 char c = StringUTF16.getChar(src, sp++);
1065                 int bb = encodeChar(c);
1066                 if (bb == UNMAPPABLE_ENCODING) {
1067                     if (Character.isHighSurrogate(c) && sp < sl &&
1068                         Character.isLowSurrogate(StringUTF16.getChar(src, sp))) {
1069                         sp++;
1070                     }
1071                     dst[dp++] = repl[0];
1072                     if (repl.length > 1)
1073                         dst[dp++] = repl[1];
1074                     continue;
1075                 } //else
1076                 if (bb > MAX_SINGLEBYTE) {           // DoubleByte
1077                     if (currentState == SBCS) {
1078                         currentState = DBCS;
1079                         dst[dp++] = SO;
1080                     }
1081                     dst[dp++] = (byte)(bb >> 8);
1082                     dst[dp++] = (byte)bb;
1083                 } else {                             // SingleByte
1084                     if (currentState == DBCS) {
1085                          currentState = SBCS;
1086                          dst[dp++] = SI;
1087                     }
1088                     dst[dp++] = (byte)bb;
1089                 }
1090             }
1091             if (currentState == DBCS) {
1092                  currentState = SBCS;
1093                  dst[dp++] = SI;
1094             }
1095             return dp;
1096         }
1097     }
1098 
1099     // EUC_SIMPLE
1100     public static class Encoder_EUC_SIM extends Encoder {
1101         public Encoder_EUC_SIM(Charset cs, char[] c2b, char[] c2bIndex,
1102                                boolean isASCIICompatible) {
1103             super(cs, c2b, c2bIndex, isASCIICompatible);
1104         }
1105     }
1106 
1107 }