1 /*
   2  * Copyright (c) 2009, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package sun.nio.cs;
  27 
  28 import java.nio.ByteBuffer;
  29 import java.nio.CharBuffer;
  30 import java.nio.charset.Charset;
  31 import java.nio.charset.CharsetDecoder;
  32 import java.nio.charset.CharsetEncoder;
  33 import java.nio.charset.CoderResult;
  34 import java.util.Arrays;
  35 import sun.nio.cs.Surrogate;
  36 import sun.nio.cs.ArrayDecoder;
  37 import sun.nio.cs.ArrayEncoder;
  38 import static sun.nio.cs.CharsetMapping.*;
  39 
  40 /*
  41  * Four types of "DoubleByte" charsets are implemented in this class
  42  * (1)DoubleByte
  43  *    The "mostly widely used" multibyte charset, a combination of
  44  *    a singlebyte character set (usually the ASCII charset) and a
  45  *    doublebyte character set. The codepoint values of singlebyte
  46  *    and doublebyte don't overlap. Microsoft's multibyte charsets
  47  *    and IBM's "DBCS_ASCII" charsets, such as IBM1381, 942, 943,
  48  *    948, 949 and 950 are such charsets.
  49  *
  50  * (2)DoubleByte_EBCDIC
  51  *    IBM EBCDIC Mix multibyte charset. Use SO and SI to shift (switch)
  52  *    in and out between the singlebyte character set and doublebyte
  53  *    character set.
  54  *
  55  * (3)DoubleByte_SIMPLE_EUC
  56  *    It's a "simple" form of EUC encoding scheme, only have the
  57  *    singlebyte character set G0 and one doublebyte character set
  58  *    G1 are defined, G2 (with SS2) and G3 (with SS3) are not used.
  59  *    So it is actually the same as the "typical" type (1) mentioned
  60  *    above, except it return "malformed" for the SS2 and SS3 when
  61  *    decoding.
  62  *
  63  * (4)DoubleByte ONLY
  64  *    A "pure" doublebyte only character set. From implementation
  65  *    point of view, this is the type (1) with "decodeSingle" always
  66  *    returns unmappable.
  67  *
  68  * For simplicity, all implementations share the same decoding and
  69  * encoding data structure.
  70  *
  71  * Decoding:
  72  *
  73  *    char[][] b2c;
  74  *    char[] b2cSB;
  75  *    int b2Min, b2Max
  76  *
  77  *    public char decodeSingle(int b) {
  78  *        return b2cSB.[b];
  79  *    }
  80  *
  81  *    public char decodeDouble(int b1, int b2) {
  82  *        if (b2 < b2Min || b2 > b2Max)
  83  *            return UNMAPPABLE_DECODING;
  84  *         return b2c[b1][b2 - b2Min];
  85  *    }
  86  *
  87  *    (1)b2Min, b2Max are the corresponding min and max value of the
  88  *       low-half of the double-byte.
  89  *    (2)The high 8-bit/b1 of the double-byte are used to indexed into
  90  *       b2c array.
  91  *
  92  * Encoding:
  93  *
  94  *    char[] c2b;
  95  *    char[] c2bIndex;
  96  *
  97  *    public int encodeChar(char ch) {
  98  *        return c2b[c2bIndex[ch >> 8] + (ch & 0xff)];
  99  *    }
 100  *
 101  */
 102 
 103 public class DoubleByte {
 104 
 105     public static final char[] B2C_UNMAPPABLE;
 106     static {
 107         B2C_UNMAPPABLE = new char[0x100];
 108         Arrays.fill(B2C_UNMAPPABLE, UNMAPPABLE_DECODING);
 109     }
 110 
 111     public static class Decoder extends CharsetDecoder
 112                                 implements DelegatableDecoder, ArrayDecoder
 113     {
 114         final char[][] b2c;
 115         final char[] b2cSB;
 116         final int b2Min;
 117         final int b2Max;
 118         final boolean isASCIICompatible;
 119 
 120         // for SimpleEUC override
 121         protected CoderResult crMalformedOrUnderFlow(int b) {
 122             return CoderResult.UNDERFLOW;
 123         }
 124 
 125         protected CoderResult crMalformedOrUnmappable(int b1, int b2) {
 126             if (b2c[b1] == B2C_UNMAPPABLE ||                // isNotLeadingByte(b1)
 127                 b2c[b2] != B2C_UNMAPPABLE ||                // isLeadingByte(b2)
 128                 decodeSingle(b2) != UNMAPPABLE_DECODING) {  // isSingle(b2)
 129                 return CoderResult.malformedForLength(1);
 130             }
 131             return CoderResult.unmappableForLength(2);
 132         }
 133 
 134         public Decoder(Charset cs, float avgcpb, float maxcpb,
 135                        char[][] b2c, char[] b2cSB,
 136                        int b2Min, int b2Max,
 137                        boolean isASCIICompatible) {
 138             super(cs, avgcpb, maxcpb);
 139             this.b2c = b2c;
 140             this.b2cSB = b2cSB;
 141             this.b2Min = b2Min;
 142             this.b2Max = b2Max;
 143             this.isASCIICompatible = isASCIICompatible;
 144         }
 145 
 146         public Decoder(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max,
 147                        boolean isASCIICompatible) {
 148             this(cs, 0.5f, 1.0f, b2c, b2cSB, b2Min, b2Max, isASCIICompatible);
 149         }
 150 
 151         public Decoder(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max) {
 152             this(cs, 0.5f, 1.0f, b2c, b2cSB, b2Min, b2Max, false);
 153         }
 154 
 155         protected CoderResult decodeArrayLoop(ByteBuffer src, CharBuffer dst) {
 156             byte[] sa = src.array();
 157             int sp = src.arrayOffset() + src.position();
 158             int sl = src.arrayOffset() + src.limit();
 159 
 160             char[] da = dst.array();
 161             int dp = dst.arrayOffset() + dst.position();
 162             int dl = dst.arrayOffset() + dst.limit();
 163 
 164             try {
 165                 while (sp < sl && dp < dl) {
 166                     // inline the decodeSingle/Double() for better performance
 167                     int inSize = 1;
 168                     int b1 = sa[sp] & 0xff;
 169                     char c = b2cSB[b1];
 170                     if (c == UNMAPPABLE_DECODING) {
 171                         if (sl - sp < 2)
 172                             return crMalformedOrUnderFlow(b1);
 173                         int b2 = sa[sp + 1] & 0xff;
 174                         if (b2 < b2Min || b2 > b2Max ||
 175                             (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) {
 176                             return crMalformedOrUnmappable(b1, b2);
 177                         }
 178                         inSize++;
 179                     }
 180                     da[dp++] = c;
 181                     sp += inSize;
 182                 }
 183                 return (sp >= sl) ? CoderResult.UNDERFLOW
 184                                   : CoderResult.OVERFLOW;
 185             } finally {
 186                 src.position(sp - src.arrayOffset());
 187                 dst.position(dp - dst.arrayOffset());
 188             }
 189         }
 190 
 191         protected CoderResult decodeBufferLoop(ByteBuffer src, CharBuffer dst) {
 192             int mark = src.position();
 193             try {
 194 
 195                 while (src.hasRemaining() && dst.hasRemaining()) {
 196                     int b1 = src.get() & 0xff;
 197                     char c = b2cSB[b1];
 198                     int inSize = 1;
 199                     if (c == UNMAPPABLE_DECODING) {
 200                         if (src.remaining() < 1)
 201                             return crMalformedOrUnderFlow(b1);
 202                         int b2 = src.get() & 0xff;
 203                         if (b2 < b2Min || b2 > b2Max ||
 204                             (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING)
 205                             return crMalformedOrUnmappable(b1, b2);
 206                         inSize++;
 207                     }
 208                     dst.put(c);
 209                     mark += inSize;
 210                 }
 211                 return src.hasRemaining()? CoderResult.OVERFLOW
 212                                          : CoderResult.UNDERFLOW;
 213             } finally {
 214                 src.position(mark);
 215             }
 216         }
 217 
 218         // Make some protected methods public for use by JISAutoDetect
 219         public CoderResult decodeLoop(ByteBuffer src, CharBuffer dst) {
 220             if (src.hasArray() && dst.hasArray())
 221                 return decodeArrayLoop(src, dst);
 222             else
 223                 return decodeBufferLoop(src, dst);
 224         }
 225 
 226         @Override
 227         public int decode(byte[] src, int sp, int len, char[] dst) {
 228             int dp = 0;
 229             int sl = sp + len;
 230             char repl = replacement().charAt(0);
 231             while (sp < sl) {
 232                 int b1 = src[sp++] & 0xff;
 233                 char c = b2cSB[b1];
 234                 if (c == UNMAPPABLE_DECODING) {
 235                     if (sp < sl) {
 236                         int b2 = src[sp++] & 0xff;
 237                         if (b2 < b2Min || b2 > b2Max ||
 238                             (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) {
 239                             if (b2c[b1] == B2C_UNMAPPABLE ||  // isNotLeadingByte
 240                                 b2c[b2] != B2C_UNMAPPABLE ||  // isLeadingByte
 241                                 decodeSingle(b2) != UNMAPPABLE_DECODING) {
 242                                sp--;
 243                             }
 244                         }
 245                     }
 246                     if (c == UNMAPPABLE_DECODING) {
 247                          c = repl;
 248                     }
 249                 }
 250                 dst[dp++] = c;
 251             }
 252             return dp;
 253         }
 254 
 255         @Override
 256         public boolean isASCIICompatible() {
 257             return isASCIICompatible;
 258         }
 259 
 260         public void implReset() {
 261             super.implReset();
 262         }
 263 
 264         public CoderResult implFlush(CharBuffer out) {
 265             return super.implFlush(out);
 266         }
 267 
 268         // decode loops are not using decodeSingle/Double() for performance
 269         // reason.
 270         public char decodeSingle(int b) {
 271             return b2cSB[b];
 272         }
 273 
 274         public char decodeDouble(int b1, int b2) {
 275             if (b1 < 0 || b1 > b2c.length ||
 276                 b2 < b2Min || b2 > b2Max)
 277                 return UNMAPPABLE_DECODING;
 278             return  b2c[b1][b2 - b2Min];
 279         }
 280     }
 281 
 282     // IBM_EBCDIC_DBCS
 283     public static class Decoder_EBCDIC extends Decoder {
 284         private static final int SBCS = 0;
 285         private static final int DBCS = 1;
 286         private static final int SO = 0x0e;
 287         private static final int SI = 0x0f;
 288         private int  currentState;
 289 
 290         public Decoder_EBCDIC(Charset cs,
 291                               char[][] b2c, char[] b2cSB, int b2Min, int b2Max,
 292                               boolean isASCIICompatible) {
 293             super(cs, b2c, b2cSB, b2Min, b2Max, isASCIICompatible);
 294         }
 295 
 296         public Decoder_EBCDIC(Charset cs,
 297                               char[][] b2c, char[] b2cSB, int b2Min, int b2Max) {
 298             super(cs, b2c, b2cSB, b2Min, b2Max, false);
 299         }
 300 
 301         public void implReset() {
 302             currentState = SBCS;
 303         }
 304 
 305         // Check validity of dbcs ebcdic byte pair values
 306         //
 307         // First byte : 0x41 -- 0xFE
 308         // Second byte: 0x41 -- 0xFE
 309         // Doublebyte blank: 0x4040
 310         //
 311         // The validation implementation in "old" DBCS_IBM_EBCDIC and sun.io
 312         // as
 313         //            if ((b1 != 0x40 || b2 != 0x40) &&
 314         //                (b2 < 0x41 || b2 > 0xfe)) {...}
 315         // is not correct/complete (range check for b1)
 316         //
 317         private static boolean isDoubleByte(int b1, int b2) {
 318             return (0x41 <= b1 && b1 <= 0xfe && 0x41 <= b2 && b2 <= 0xfe)
 319                    || (b1 == 0x40 && b2 == 0x40); // DBCS-HOST SPACE
 320         }
 321 
 322         protected CoderResult decodeArrayLoop(ByteBuffer src, CharBuffer dst) {
 323             byte[] sa = src.array();
 324             int sp = src.arrayOffset() + src.position();
 325             int sl = src.arrayOffset() + src.limit();
 326             char[] da = dst.array();
 327             int dp = dst.arrayOffset() + dst.position();
 328             int dl = dst.arrayOffset() + dst.limit();
 329 
 330             try {
 331                 // don't check dp/dl together here, it's possible to
 332                 // decdoe a SO/SI without space in output buffer.
 333                 while (sp < sl) {
 334                     int b1 = sa[sp] & 0xff;
 335                     int inSize = 1;
 336                     if (b1 == SO) {  // Shift out
 337                         if (currentState != SBCS)
 338                             return CoderResult.malformedForLength(1);
 339                         else
 340                             currentState = DBCS;
 341                     } else if (b1 == SI) {
 342                         if (currentState != DBCS)
 343                             return CoderResult.malformedForLength(1);
 344                         else
 345                             currentState = SBCS;
 346                     } else {
 347                         char c =  UNMAPPABLE_DECODING;
 348                         if (currentState == SBCS) {
 349                             c = b2cSB[b1];
 350                             if (c == UNMAPPABLE_DECODING)
 351                                 return CoderResult.unmappableForLength(1);
 352                         } else {
 353                             if (sl - sp < 2)
 354                                 return CoderResult.UNDERFLOW;
 355                             int b2 = sa[sp + 1] & 0xff;
 356                             if (b2 < b2Min || b2 > b2Max ||
 357                                 (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) {
 358                                 if (!isDoubleByte(b1, b2))
 359                                     return CoderResult.malformedForLength(2);
 360                                 return CoderResult.unmappableForLength(2);
 361                             }
 362                             inSize++;
 363                         }
 364                         if (dl - dp < 1)
 365                             return CoderResult.OVERFLOW;
 366 
 367                         da[dp++] = c;
 368                     }
 369                     sp += inSize;
 370                 }
 371                 return CoderResult.UNDERFLOW;
 372             } finally {
 373                 src.position(sp - src.arrayOffset());
 374                 dst.position(dp - dst.arrayOffset());
 375             }
 376         }
 377 
 378         protected CoderResult decodeBufferLoop(ByteBuffer src, CharBuffer dst) {
 379             int mark = src.position();
 380             try {
 381                 while (src.hasRemaining()) {
 382                     int b1 = src.get() & 0xff;
 383                     int inSize = 1;
 384                     if (b1 == SO) {  // Shift out
 385                         if (currentState != SBCS)
 386                             return CoderResult.malformedForLength(1);
 387                         else
 388                             currentState = DBCS;
 389                     } else if (b1 == SI) {
 390                         if (currentState != DBCS)
 391                             return CoderResult.malformedForLength(1);
 392                         else
 393                             currentState = SBCS;
 394                     } else {
 395                         char c = UNMAPPABLE_DECODING;
 396                         if (currentState == SBCS) {
 397                             c = b2cSB[b1];
 398                             if (c == UNMAPPABLE_DECODING)
 399                                 return CoderResult.unmappableForLength(1);
 400                         } else {
 401                             if (src.remaining() < 1)
 402                                 return CoderResult.UNDERFLOW;
 403                             int b2 = src.get()&0xff;
 404                             if (b2 < b2Min || b2 > b2Max ||
 405                                 (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) {
 406                                 if (!isDoubleByte(b1, b2))
 407                                     return CoderResult.malformedForLength(2);
 408                                 return CoderResult.unmappableForLength(2);
 409                             }
 410                             inSize++;
 411                         }
 412 
 413                         if (dst.remaining() < 1)
 414                             return CoderResult.OVERFLOW;
 415 
 416                         dst.put(c);
 417                     }
 418                     mark += inSize;
 419                 }
 420                 return CoderResult.UNDERFLOW;
 421             } finally {
 422                 src.position(mark);
 423             }
 424         }
 425 
 426         @Override
 427         public int decode(byte[] src, int sp, int len, char[] dst) {
 428             int dp = 0;
 429             int sl = sp + len;
 430             currentState = SBCS;
 431             char repl = replacement().charAt(0);
 432             while (sp < sl) {
 433                 int b1 = src[sp++] & 0xff;
 434                 if (b1 == SO) {  // Shift out
 435                     if (currentState != SBCS)
 436                         dst[dp++] = repl;
 437                     else
 438                         currentState = DBCS;
 439                 } else if (b1 == SI) {
 440                     if (currentState != DBCS)
 441                         dst[dp++] = repl;
 442                     else
 443                         currentState = SBCS;
 444                 } else {
 445                     char c =  UNMAPPABLE_DECODING;
 446                     if (currentState == SBCS) {
 447                         c = b2cSB[b1];
 448                         if (c == UNMAPPABLE_DECODING)
 449                             c = repl;
 450                     } else {
 451                         if (sl == sp) {
 452                             c = repl;
 453                         } else {
 454                             int b2 = src[sp++] & 0xff;
 455                             if (b2 < b2Min || b2 > b2Max ||
 456                                 (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) {
 457                                 c = repl;
 458                             }
 459                         }
 460                     }
 461                     dst[dp++] = c;
 462                 }
 463             }
 464             return dp;
 465         }
 466     }
 467 
 468     // DBCS_ONLY
 469     public static class Decoder_DBCSONLY extends Decoder {
 470         static final char[] b2cSB_UNMAPPABLE;
 471         static {
 472             b2cSB_UNMAPPABLE = new char[0x100];
 473             Arrays.fill(b2cSB_UNMAPPABLE, UNMAPPABLE_DECODING);
 474         }
 475         public Decoder_DBCSONLY(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max,
 476                                 boolean isASCIICompatible) {
 477             super(cs, 0.5f, 1.0f, b2c, b2cSB_UNMAPPABLE, b2Min, b2Max, isASCIICompatible);
 478         }
 479 
 480         public Decoder_DBCSONLY(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max) {
 481             super(cs, 0.5f, 1.0f, b2c, b2cSB_UNMAPPABLE, b2Min, b2Max, false);
 482         }
 483     }
 484 
 485     // EUC_SIMPLE
 486     // The only thing we need to "override" is to check SS2/SS3 and
 487     // return "malformed" if found
 488     public static class Decoder_EUC_SIM extends Decoder {
 489         private final int SS2 =  0x8E;
 490         private final int SS3 =  0x8F;
 491 
 492         public Decoder_EUC_SIM(Charset cs,
 493                                char[][] b2c, char[] b2cSB, int b2Min, int b2Max,
 494                                boolean isASCIICompatible) {
 495             super(cs, b2c, b2cSB, b2Min, b2Max, isASCIICompatible);
 496         }
 497 
 498         // No support provided for G2/G3 for SimpleEUC
 499         protected CoderResult crMalformedOrUnderFlow(int b) {
 500             if (b == SS2 || b == SS3 )
 501                 return CoderResult.malformedForLength(1);
 502             return CoderResult.UNDERFLOW;
 503         }
 504 
 505         protected CoderResult crMalformedOrUnmappable(int b1, int b2) {
 506             if (b1 == SS2 || b1 == SS3 )
 507                 return CoderResult.malformedForLength(1);
 508             return CoderResult.unmappableForLength(2);
 509         }
 510 
 511         @Override
 512         public int decode(byte[] src, int sp, int len, char[] dst) {
 513             int dp = 0;
 514             int sl = sp + len;
 515             char repl = replacement().charAt(0);
 516             while (sp < sl) {
 517                 int b1 = src[sp++] & 0xff;
 518                 char c = b2cSB[b1];
 519                 if (c == UNMAPPABLE_DECODING) {
 520                     if (sp < sl) {
 521                         int b2 = src[sp++] & 0xff;
 522                         if (b2 < b2Min || b2 > b2Max ||
 523                             (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) {
 524                             if (b1 == SS2 || b1 == SS3) {
 525                                 sp--;
 526                             }
 527                             c = repl;
 528                         }
 529                     } else {
 530                         c = repl;
 531                     }
 532                 }
 533                 dst[dp++] = c;
 534             }
 535             return dp;
 536         }
 537     }
 538 
 539     public static class Encoder extends CharsetEncoder
 540                                 implements ArrayEncoder
 541     {
 542         protected final int MAX_SINGLEBYTE = 0xff;
 543         private final char[] c2b;
 544         private final char[] c2bIndex;
 545         protected Surrogate.Parser sgp;
 546         final boolean isASCIICompatible;
 547 
 548         public Encoder(Charset cs, char[] c2b, char[] c2bIndex) {
 549             this(cs, c2b, c2bIndex, false);
 550         }
 551 
 552         public Encoder(Charset cs, char[] c2b, char[] c2bIndex, boolean isASCIICompatible) {
 553             super(cs, 2.0f, 2.0f);
 554             this.c2b = c2b;
 555             this.c2bIndex = c2bIndex;
 556             this.isASCIICompatible = isASCIICompatible;
 557         }
 558 
 559         public Encoder(Charset cs, float avg, float max, byte[] repl, char[] c2b, char[] c2bIndex,
 560                        boolean isASCIICompatible) {
 561             super(cs, avg, max, repl);
 562             this.c2b = c2b;
 563             this.c2bIndex = c2bIndex;
 564             this.isASCIICompatible = isASCIICompatible;
 565         }
 566 
 567         public boolean canEncode(char c) {
 568             return encodeChar(c) != UNMAPPABLE_ENCODING;
 569         }
 570 
 571         protected Surrogate.Parser sgp() {
 572             if (sgp == null)
 573                 sgp = new Surrogate.Parser();
 574             return sgp;
 575         }
 576 
 577         protected CoderResult encodeArrayLoop(CharBuffer src, ByteBuffer dst) {
 578             char[] sa = src.array();
 579             int sp = src.arrayOffset() + src.position();
 580             int sl = src.arrayOffset() + src.limit();
 581 
 582             byte[] da = dst.array();
 583             int dp = dst.arrayOffset() + dst.position();
 584             int dl = dst.arrayOffset() + dst.limit();
 585 
 586             try {
 587                 while (sp < sl) {
 588                     char c = sa[sp];
 589                     int bb = encodeChar(c);
 590                     if (bb == UNMAPPABLE_ENCODING) {
 591                         if (Character.isSurrogate(c)) {
 592                             if (sgp().parse(c, sa, sp, sl) < 0)
 593                                 return sgp.error();
 594                             return sgp.unmappableResult();
 595                         }
 596                         return CoderResult.unmappableForLength(1);
 597                     }
 598 
 599                     if (bb > MAX_SINGLEBYTE) {    // DoubleByte
 600                         if (dl - dp < 2)
 601                             return CoderResult.OVERFLOW;
 602                         da[dp++] = (byte)(bb >> 8);
 603                         da[dp++] = (byte)bb;
 604                     } else {                      // SingleByte
 605                         if (dl - dp < 1)
 606                             return CoderResult.OVERFLOW;
 607                         da[dp++] = (byte)bb;
 608                     }
 609 
 610                     sp++;
 611                 }
 612                 return CoderResult.UNDERFLOW;
 613             } finally {
 614                 src.position(sp - src.arrayOffset());
 615                 dst.position(dp - dst.arrayOffset());
 616             }
 617         }
 618 
 619         protected CoderResult encodeBufferLoop(CharBuffer src, ByteBuffer dst) {
 620             int mark = src.position();
 621             try {
 622                 while (src.hasRemaining()) {
 623                     char c = src.get();
 624                     int bb = encodeChar(c);
 625                     if (bb == UNMAPPABLE_ENCODING) {
 626                         if (Character.isSurrogate(c)) {
 627                             if (sgp().parse(c, src) < 0)
 628                                 return sgp.error();
 629                             return sgp.unmappableResult();
 630                         }
 631                         return CoderResult.unmappableForLength(1);
 632                     }
 633                     if (bb > MAX_SINGLEBYTE) {  // DoubleByte
 634                         if (dst.remaining() < 2)
 635                             return CoderResult.OVERFLOW;
 636                         dst.put((byte)(bb >> 8));
 637                         dst.put((byte)(bb));
 638                     } else {
 639                         if (dst.remaining() < 1)
 640                         return CoderResult.OVERFLOW;
 641                         dst.put((byte)bb);
 642                     }
 643                     mark++;
 644                 }
 645                 return CoderResult.UNDERFLOW;
 646             } finally {
 647                 src.position(mark);
 648             }
 649         }
 650 
 651         protected CoderResult encodeLoop(CharBuffer src, ByteBuffer dst) {
 652             if (src.hasArray() && dst.hasArray())
 653                 return encodeArrayLoop(src, dst);
 654             else
 655                 return encodeBufferLoop(src, dst);
 656         }
 657 
 658         protected byte[] repl = replacement();
 659         protected void implReplaceWith(byte[] newReplacement) {
 660             repl = newReplacement;
 661         }
 662 
 663         @Override
 664         public int encode(char[] src, int sp, int len, byte[] dst) {
 665             int dp = 0;
 666             int sl = sp + len;
 667             int dl = dst.length;
 668             while (sp < sl) {
 669                 char c = src[sp++];
 670                 int bb = encodeChar(c);
 671                 if (bb == UNMAPPABLE_ENCODING) {
 672                     if (Character.isHighSurrogate(c) && sp < sl &&
 673                         Character.isLowSurrogate(src[sp])) {
 674                         sp++;
 675                     }
 676                     dst[dp++] = repl[0];
 677                     if (repl.length > 1)
 678                         dst[dp++] = repl[1];
 679                     continue;
 680                 } //else
 681                 if (bb > MAX_SINGLEBYTE) { // DoubleByte
 682                     dst[dp++] = (byte)(bb >> 8);
 683                     dst[dp++] = (byte)bb;
 684                 } else {                          // SingleByte
 685                     dst[dp++] = (byte)bb;
 686                 }
 687             }
 688             return dp;
 689         }
 690 
 691         @Override
 692         public int encodeFromLatin1(byte[] src, int sp, int len, byte[] dst) {
 693             int dp = 0;
 694             int sl = sp + len;
 695             while (sp < sl) {
 696                 char c = (char)(src[sp++] & 0xff);
 697                 int bb = encodeChar(c);
 698                 if (bb == UNMAPPABLE_ENCODING) {
 699                     // no surrogate pair in latin1 string
 700                     dst[dp++] = repl[0];
 701                     if (repl.length > 1) {
 702                         dst[dp++] = repl[1];
 703                     }
 704                     continue;
 705                 } //else
 706                 if (bb > MAX_SINGLEBYTE) { // DoubleByte
 707                     dst[dp++] = (byte)(bb >> 8);
 708                     dst[dp++] = (byte)bb;
 709                 } else {                   // SingleByte
 710                     dst[dp++] = (byte)bb;
 711                 }
 712 
 713             }
 714             return dp;
 715         }
 716 
 717         @Override
 718         public int encodeFromUTF16(byte[] src, int sp, int len, byte[] dst) {
 719             int dp = 0;
 720             int sl = sp + len;
 721             while (sp < sl) {
 722                 char c = StringUTF16.getChar(src, sp++);
 723                 int bb = encodeChar(c);
 724                 if (bb == UNMAPPABLE_ENCODING) {
 725                     if (Character.isHighSurrogate(c) && sp < sl &&
 726                         Character.isLowSurrogate(StringUTF16.getChar(src, sp))) {
 727                         sp++;
 728                     }
 729                     dst[dp++] = repl[0];
 730                     if (repl.length > 1) {
 731                         dst[dp++] = repl[1];
 732                     }
 733                     continue;
 734                 } //else
 735                 if (bb > MAX_SINGLEBYTE) { // DoubleByte
 736                     dst[dp++] = (byte)(bb >> 8);
 737                     dst[dp++] = (byte)bb;
 738                 } else {                   // SingleByte
 739                     dst[dp++] = (byte)bb;
 740                 }
 741             }
 742             return dp;
 743         }
 744 
 745         @Override
 746         public boolean isASCIICompatible() {
 747             return isASCIICompatible;
 748         }
 749 
 750         public int encodeChar(char ch) {
 751             return c2b[c2bIndex[ch >> 8] + (ch & 0xff)];
 752         }
 753 
 754         // init the c2b and c2bIndex tables from b2c.
 755         public static void initC2B(String[] b2c, String b2cSB, String b2cNR,  String c2bNR,
 756                             int b2Min, int b2Max,
 757                             char[] c2b, char[] c2bIndex)
 758         {
 759             Arrays.fill(c2b, (char)UNMAPPABLE_ENCODING);
 760             int off = 0x100;
 761 
 762             char[][] b2c_ca = new char[b2c.length][];
 763             char[] b2cSB_ca = null;
 764             if (b2cSB != null)
 765                 b2cSB_ca = b2cSB.toCharArray();
 766 
 767             for (int i = 0; i < b2c.length; i++) {
 768                 if (b2c[i] == null)
 769                     continue;
 770                 b2c_ca[i] = b2c[i].toCharArray();
 771             }
 772 
 773             if (b2cNR != null) {
 774                 int j = 0;
 775                 while (j < b2cNR.length()) {
 776                     char b  = b2cNR.charAt(j++);
 777                     char c  = b2cNR.charAt(j++);
 778                     if (b < 0x100 && b2cSB_ca != null) {
 779                         if (b2cSB_ca[b] == c)
 780                             b2cSB_ca[b] = UNMAPPABLE_DECODING;
 781                     } else {
 782                         if (b2c_ca[b >> 8][(b & 0xff) - b2Min] == c)
 783                             b2c_ca[b >> 8][(b & 0xff) - b2Min] = UNMAPPABLE_DECODING;
 784                     }
 785                 }
 786             }
 787 
 788             if (b2cSB_ca != null) {      // SingleByte
 789                 for (int b = 0; b < b2cSB_ca.length; b++) {
 790                     char c = b2cSB_ca[b];
 791                     if (c == UNMAPPABLE_DECODING)
 792                         continue;
 793                     int index = c2bIndex[c >> 8];
 794                     if (index == 0) {
 795                         index = off;
 796                         off += 0x100;
 797                         c2bIndex[c >> 8] = (char)index;
 798                     }
 799                     c2b[index + (c & 0xff)] = (char)b;
 800                 }
 801             }
 802 
 803             for (int b1 = 0; b1 < b2c.length; b1++) {  // DoubleByte
 804                 char[] db = b2c_ca[b1];
 805                 if (db == null)
 806                     continue;
 807                 for (int b2 = b2Min; b2 <= b2Max; b2++) {
 808                     char c = db[b2 - b2Min];
 809                     if (c == UNMAPPABLE_DECODING)
 810                         continue;
 811                     int index = c2bIndex[c >> 8];
 812                     if (index == 0) {
 813                         index = off;
 814                         off += 0x100;
 815                         c2bIndex[c >> 8] = (char)index;
 816                     }
 817                     c2b[index + (c & 0xff)] = (char)((b1 << 8) | b2);
 818                 }
 819             }
 820 
 821             if (c2bNR != null) {
 822                 // add c->b only nr entries
 823                 for (int i = 0; i < c2bNR.length(); i += 2) {
 824                     char b = c2bNR.charAt(i);
 825                     char c = c2bNR.charAt(i + 1);
 826                     int index = (c >> 8);
 827                     if (c2bIndex[index] == 0) {
 828                         c2bIndex[index] = (char)off;
 829                         off += 0x100;
 830                     }
 831                     index = c2bIndex[index] + (c & 0xff);
 832                     c2b[index] = b;
 833                 }
 834             }
 835         }
 836     }
 837 
 838     public static class Encoder_DBCSONLY extends Encoder {
 839 
 840         public Encoder_DBCSONLY(Charset cs, byte[] repl,
 841                                 char[] c2b, char[] c2bIndex,
 842                                 boolean isASCIICompatible) {
 843             super(cs, 2.0f, 2.0f, repl, c2b, c2bIndex, isASCIICompatible);
 844         }
 845 
 846         public int encodeChar(char ch) {
 847             int bb = super.encodeChar(ch);
 848             if (bb <= MAX_SINGLEBYTE)
 849                 return UNMAPPABLE_ENCODING;
 850             return bb;
 851         }
 852     }
 853 
 854     public static class Encoder_EBCDIC extends Encoder {
 855         static final int SBCS = 0;
 856         static final int DBCS = 1;
 857         static final byte SO = 0x0e;
 858         static final byte SI = 0x0f;
 859 
 860         protected int  currentState = SBCS;
 861 
 862         public Encoder_EBCDIC(Charset cs, char[] c2b, char[] c2bIndex,
 863                               boolean isASCIICompatible) {
 864             super(cs, 4.0f, 5.0f, new byte[] {(byte)0x6f}, c2b, c2bIndex, isASCIICompatible);
 865         }
 866 
 867         protected void implReset() {
 868             currentState = SBCS;
 869         }
 870 
 871         protected CoderResult implFlush(ByteBuffer out) {
 872             if (currentState == DBCS) {
 873                 if (out.remaining() < 1)
 874                     return CoderResult.OVERFLOW;
 875                 out.put(SI);
 876             }
 877             implReset();
 878             return CoderResult.UNDERFLOW;
 879         }
 880 
 881         protected CoderResult encodeArrayLoop(CharBuffer src, ByteBuffer dst) {
 882             char[] sa = src.array();
 883             int sp = src.arrayOffset() + src.position();
 884             int sl = src.arrayOffset() + src.limit();
 885             byte[] da = dst.array();
 886             int dp = dst.arrayOffset() + dst.position();
 887             int dl = dst.arrayOffset() + dst.limit();
 888 
 889             try {
 890                 while (sp < sl) {
 891                     char c = sa[sp];
 892                     int bb = encodeChar(c);
 893                     if (bb == UNMAPPABLE_ENCODING) {
 894                         if (Character.isSurrogate(c)) {
 895                             if (sgp().parse(c, sa, sp, sl) < 0)
 896                                 return sgp.error();
 897                             return sgp.unmappableResult();
 898                         }
 899                         return CoderResult.unmappableForLength(1);
 900                     }
 901                     if (bb > MAX_SINGLEBYTE) {  // DoubleByte
 902                         if (currentState == SBCS) {
 903                             if (dl - dp < 1)
 904                                 return CoderResult.OVERFLOW;
 905                             currentState = DBCS;
 906                             da[dp++] = SO;
 907                         }
 908                         if (dl - dp < 2)
 909                             return CoderResult.OVERFLOW;
 910                         da[dp++] = (byte)(bb >> 8);
 911                         da[dp++] = (byte)bb;
 912                     } else {                    // SingleByte
 913                         if (currentState == DBCS) {
 914                             if (dl - dp < 1)
 915                                 return CoderResult.OVERFLOW;
 916                             currentState = SBCS;
 917                             da[dp++] = SI;
 918                         }
 919                         if (dl - dp < 1)
 920                             return CoderResult.OVERFLOW;
 921                         da[dp++] = (byte)bb;
 922 
 923                     }
 924                     sp++;
 925                 }
 926                 return CoderResult.UNDERFLOW;
 927             } finally {
 928                 src.position(sp - src.arrayOffset());
 929                 dst.position(dp - dst.arrayOffset());
 930             }
 931         }
 932 
 933         protected CoderResult encodeBufferLoop(CharBuffer src, ByteBuffer dst) {
 934             int mark = src.position();
 935             try {
 936                 while (src.hasRemaining()) {
 937                     char c = src.get();
 938                     int bb = encodeChar(c);
 939                     if (bb == UNMAPPABLE_ENCODING) {
 940                         if (Character.isSurrogate(c)) {
 941                             if (sgp().parse(c, src) < 0)
 942                                 return sgp.error();
 943                             return sgp.unmappableResult();
 944                         }
 945                         return CoderResult.unmappableForLength(1);
 946                     }
 947                     if (bb > MAX_SINGLEBYTE) {  // DoubleByte
 948                         if (currentState == SBCS) {
 949                             if (dst.remaining() < 1)
 950                                 return CoderResult.OVERFLOW;
 951                             currentState = DBCS;
 952                             dst.put(SO);
 953                         }
 954                         if (dst.remaining() < 2)
 955                             return CoderResult.OVERFLOW;
 956                         dst.put((byte)(bb >> 8));
 957                         dst.put((byte)(bb));
 958                     } else {                  // Single-byte
 959                         if (currentState == DBCS) {
 960                             if (dst.remaining() < 1)
 961                                 return CoderResult.OVERFLOW;
 962                             currentState = SBCS;
 963                             dst.put(SI);
 964                         }
 965                         if (dst.remaining() < 1)
 966                             return CoderResult.OVERFLOW;
 967                         dst.put((byte)bb);
 968                     }
 969                     mark++;
 970                 }
 971                 return CoderResult.UNDERFLOW;
 972             } finally {
 973                 src.position(mark);
 974             }
 975         }
 976 
 977         @Override
 978         public int encode(char[] src, int sp, int len, byte[] dst) {
 979             int dp = 0;
 980             int sl = sp + len;
 981             while (sp < sl) {
 982                 char c = src[sp++];
 983                 int bb = encodeChar(c);
 984 
 985                 if (bb == UNMAPPABLE_ENCODING) {
 986                     if (Character.isHighSurrogate(c) && sp < sl &&
 987                         Character.isLowSurrogate(src[sp])) {
 988                         sp++;
 989                     }
 990                     dst[dp++] = repl[0];
 991                     if (repl.length > 1)
 992                         dst[dp++] = repl[1];
 993                     continue;
 994                 } //else
 995                 if (bb > MAX_SINGLEBYTE) {           // DoubleByte
 996                     if (currentState == SBCS) {
 997                         currentState = DBCS;
 998                         dst[dp++] = SO;
 999                     }
1000                     dst[dp++] = (byte)(bb >> 8);
1001                     dst[dp++] = (byte)bb;
1002                 } else {                             // SingleByte
1003                     if (currentState == DBCS) {
1004                          currentState = SBCS;
1005                          dst[dp++] = SI;
1006                     }
1007                     dst[dp++] = (byte)bb;
1008                 }
1009             }
1010 
1011             if (currentState == DBCS) {
1012                  currentState = SBCS;
1013                  dst[dp++] = SI;
1014             }
1015             return dp;
1016         }
1017 
1018         @Override
1019         public int encodeFromLatin1(byte[] src, int sp, int len, byte[] dst) {
1020             int dp = 0;
1021             int sl = sp + len;
1022             while (sp < sl) {
1023                 char c = (char)(src[sp++] & 0xff);
1024                 int bb = encodeChar(c);
1025                 if (bb == UNMAPPABLE_ENCODING) {
1026                     // no surrogate pair in latin1 string
1027                     dst[dp++] = repl[0];
1028                     if (repl.length > 1)
1029                         dst[dp++] = repl[1];
1030                     continue;
1031                 } //else
1032                 if (bb > MAX_SINGLEBYTE) {           // DoubleByte
1033                     if (currentState == SBCS) {
1034                         currentState = DBCS;
1035                         dst[dp++] = SO;
1036                     }
1037                     dst[dp++] = (byte)(bb >> 8);
1038                     dst[dp++] = (byte)bb;
1039                 } else {                             // SingleByte
1040                     if (currentState == DBCS) {
1041                          currentState = SBCS;
1042                          dst[dp++] = SI;
1043                     }
1044                     dst[dp++] = (byte)bb;
1045                 }
1046             }
1047             if (currentState == DBCS) {
1048                  currentState = SBCS;
1049                  dst[dp++] = SI;
1050             }
1051             return dp;
1052         }
1053 
1054         @Override
1055         public int encodeFromUTF16(byte[] src, int sp, int len, byte[] dst) {
1056             int dp = 0;
1057             int sl = sp + len;
1058             while (sp < sl) {
1059                 char c = StringUTF16.getChar(src, sp++);
1060                 int bb = encodeChar(c);
1061                 if (bb == UNMAPPABLE_ENCODING) {
1062                     if (Character.isHighSurrogate(c) && sp < sl &&
1063                         Character.isLowSurrogate(StringUTF16.getChar(src, sp))) {
1064                         sp++;
1065                     }
1066                     dst[dp++] = repl[0];
1067                     if (repl.length > 1)
1068                         dst[dp++] = repl[1];
1069                     continue;
1070                 } //else
1071                 if (bb > MAX_SINGLEBYTE) {           // DoubleByte
1072                     if (currentState == SBCS) {
1073                         currentState = DBCS;
1074                         dst[dp++] = SO;
1075                     }
1076                     dst[dp++] = (byte)(bb >> 8);
1077                     dst[dp++] = (byte)bb;
1078                 } else {                             // SingleByte
1079                     if (currentState == DBCS) {
1080                          currentState = SBCS;
1081                          dst[dp++] = SI;
1082                     }
1083                     dst[dp++] = (byte)bb;
1084                 }
1085             }
1086             if (currentState == DBCS) {
1087                  currentState = SBCS;
1088                  dst[dp++] = SI;
1089             }
1090             return dp;
1091         }
1092     }
1093 
1094     // EUC_SIMPLE
1095     public static class Encoder_EUC_SIM extends Encoder {
1096         public Encoder_EUC_SIM(Charset cs, char[] c2b, char[] c2bIndex,
1097                                boolean isASCIICompatible) {
1098             super(cs, c2b, c2bIndex, isASCIICompatible);
1099         }
1100     }
1101 
1102 }