1 /*
   2  * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package sun.nio.cs;
  27 
  28 import java.nio.ByteBuffer;
  29 import java.nio.CharBuffer;
  30 import java.nio.charset.Charset;
  31 import java.nio.charset.CharsetDecoder;
  32 import java.nio.charset.CharsetEncoder;
  33 import java.nio.charset.CoderResult;
  34 import java.util.Arrays;
  35 import sun.nio.cs.DoubleByte;
  36 import sun.nio.cs.Surrogate;
  37 import static sun.nio.cs.CharsetMapping.*;
  38 
  39 public class HKSCS {
  40 
  41     public static class Decoder extends DoubleByte.Decoder {
  42         static int b2Min = 0x40;
  43         static int b2Max = 0xfe;
  44 
  45         private char[][] b2cBmp;
  46         private char[][] b2cSupp;
  47         private DoubleByte.Decoder big5Dec;
  48 
  49         protected Decoder(Charset cs,
  50                           DoubleByte.Decoder big5Dec,
  51                           char[][] b2cBmp, char[][] b2cSupp)
  52         {
  53             // super(cs, 0.5f, 1.0f);
  54             // need to extends DoubleByte.Decoder so the
  55             // sun.io can use it. this implementation
  56             super(cs, 0.5f, 1.0f, null, null, 0, 0, true);
  57             this.big5Dec = big5Dec;
  58             this.b2cBmp = b2cBmp;
  59             this.b2cSupp = b2cSupp;
  60         }
  61 
  62         public char decodeSingle(int b) {
  63             return big5Dec.decodeSingle(b);
  64         }
  65 
  66         public char decodeBig5(int b1, int b2) {
  67             return big5Dec.decodeDouble(b1, b2);
  68         }
  69 
  70         public char decodeDouble(int b1, int b2) {
  71             return b2cBmp[b1][b2 - b2Min];
  72         }
  73 
  74         public char decodeDoubleEx(int b1, int b2) {
  75             /* if the b2cSupp is null, the subclass need
  76                to override the methold
  77             if (b2cSupp == null)
  78                 return UNMAPPABLE_DECODING;
  79              */
  80             return b2cSupp[b1][b2 - b2Min];
  81         }
  82 
  83         protected CoderResult decodeArrayLoop(ByteBuffer src, CharBuffer dst) {
  84             byte[] sa = src.array();
  85             int sp = src.arrayOffset() + src.position();
  86             int sl = src.arrayOffset() + src.limit();
  87 
  88             char[] da = dst.array();
  89             int dp = dst.arrayOffset() + dst.position();
  90             int dl = dst.arrayOffset() + dst.limit();
  91 
  92             try {
  93                 while (sp < sl) {
  94                     int b1 = sa[sp] & 0xff;
  95                     char c = decodeSingle(b1);
  96                     int inSize = 1, outSize = 1;
  97                     char[] cc = null;
  98                     if (c == UNMAPPABLE_DECODING) {
  99                         if (sl - sp < 2)
 100                             return CoderResult.UNDERFLOW;
 101                         int b2 = sa[sp + 1] & 0xff;
 102                         inSize++;
 103                         if (b2 < b2Min || b2 > b2Max)
 104                             return CoderResult.unmappableForLength(2);
 105                         c = decodeDouble(b1, b2);           //bmp
 106                         if (c == UNMAPPABLE_DECODING) {
 107                             c = decodeDoubleEx(b1, b2);     //supp
 108                             if (c == UNMAPPABLE_DECODING) {
 109                                 c = decodeBig5(b1, b2);     //big5
 110                                 if (c == UNMAPPABLE_DECODING)
 111                                     return CoderResult.unmappableForLength(2);
 112                             } else {
 113                                 // supplementary character in u+2xxxx area
 114                                 outSize = 2;
 115                             }
 116                         }
 117                     }
 118                     if (dl - dp < outSize)
 119                         return CoderResult.OVERFLOW;
 120                     if (outSize == 2) {
 121                         // supplementary characters
 122                         da[dp++] = Surrogate.high(0x20000 + c);
 123                         da[dp++] = Surrogate.low(0x20000 + c);
 124                     } else {
 125                         da[dp++] = c;
 126                     }
 127                     sp += inSize;
 128                 }
 129                 return CoderResult.UNDERFLOW;
 130             } finally {
 131                 src.position(sp - src.arrayOffset());
 132                 dst.position(dp - dst.arrayOffset());
 133             }
 134         }
 135 
 136         protected CoderResult decodeBufferLoop(ByteBuffer src, CharBuffer dst) {
 137             int mark = src.position();
 138             try {
 139                 while (src.hasRemaining()) {
 140                     char[] cc = null;
 141                     int b1 = src.get() & 0xff;
 142                     int inSize = 1, outSize = 1;
 143                     char c = decodeSingle(b1);
 144                     if (c == UNMAPPABLE_DECODING) {
 145                         if (src.remaining() < 1)
 146                             return CoderResult.UNDERFLOW;
 147                         int b2 = src.get() & 0xff;
 148                         inSize++;
 149                         if (b2 < b2Min || b2 > b2Max)
 150                             return CoderResult.unmappableForLength(2);
 151                         c = decodeDouble(b1, b2);           //bmp
 152                         if (c == UNMAPPABLE_DECODING) {
 153                             c = decodeDoubleEx(b1, b2);     //supp
 154                             if (c == UNMAPPABLE_DECODING) {
 155                                 c = decodeBig5(b1, b2);     //big5
 156                                 if (c == UNMAPPABLE_DECODING)
 157                                     return CoderResult.unmappableForLength(2);
 158                             } else {
 159                                 outSize = 2;
 160                             }
 161                         }
 162                     }
 163                     if (dst.remaining() < outSize)
 164                         return CoderResult.OVERFLOW;
 165                     if (outSize == 2) {
 166                         dst.put(Surrogate.high(0x20000 + c));
 167                         dst.put(Surrogate.low(0x20000 + c));
 168                     } else {
 169                         dst.put(c);
 170                     }
 171                     mark += inSize;
 172                 }
 173                 return CoderResult.UNDERFLOW;
 174             } finally {
 175                 src.position(mark);
 176             }
 177         }
 178 
 179         public int decode(byte[] src, int sp, int len, char[] dst) {
 180             int dp = 0;
 181             int sl = sp + len;
 182             char repl = replacement().charAt(0);
 183             while (sp < sl) {
 184                 int b1 = src[sp++] & 0xff;
 185                 char c = decodeSingle(b1);
 186                 if (c == UNMAPPABLE_DECODING) {
 187                     if (sl == sp) {
 188                         c = repl;
 189                     } else {
 190                         int b2 = src[sp++] & 0xff;
 191                         if (b2 < b2Min || b2 > b2Max) {
 192                             c = repl;
 193                         } else if ((c = decodeDouble(b1, b2)) == UNMAPPABLE_DECODING) {
 194                             c = decodeDoubleEx(b1, b2);     //supp
 195                             if (c == UNMAPPABLE_DECODING) {
 196                                 c = decodeBig5(b1, b2);     //big5
 197                                 if (c == UNMAPPABLE_DECODING)
 198                                     c = repl;
 199                             } else {
 200                                 // supplementary character in u+2xxxx area
 201                                 dst[dp++] = Surrogate.high(0x20000 + c);
 202                                 dst[dp++] = Surrogate.low(0x20000 + c);
 203                                 continue;
 204                             }
 205                         }
 206                     }
 207                 }
 208                 dst[dp++] = c;
 209             }
 210             return dp;
 211         }
 212 
 213         public CoderResult decodeLoop(ByteBuffer src, CharBuffer dst) {
 214             if (src.hasArray() && dst.hasArray())
 215                 return decodeArrayLoop(src, dst);
 216             else
 217                 return decodeBufferLoop(src, dst);
 218         }
 219 
 220         public static void initb2c(char[][]b2c, String[] b2cStr)
 221         {
 222             for (int i = 0; i < b2cStr.length; i++) {
 223                 if (b2cStr[i] == null)
 224                     b2c[i] = DoubleByte.B2C_UNMAPPABLE;
 225                 else
 226                     b2c[i] = b2cStr[i].toCharArray();
 227             }
 228         }
 229 
 230     }
 231 
 232     public static class Encoder extends DoubleByte.Encoder {
 233         private DoubleByte.Encoder big5Enc;
 234         private char[][] c2bBmp;
 235         private char[][] c2bSupp;
 236 
 237         protected Encoder(Charset cs,
 238                           DoubleByte.Encoder big5Enc,
 239                           char[][] c2bBmp,
 240                           char[][] c2bSupp)
 241         {
 242             super(cs, null, null, true);
 243             this.big5Enc = big5Enc;
 244             this.c2bBmp = c2bBmp;
 245             this.c2bSupp = c2bSupp;
 246         }
 247 
 248         public int encodeBig5(char ch) {
 249             return big5Enc.encodeChar(ch);
 250         }
 251 
 252         public int encodeChar(char ch) {
 253             int bb = c2bBmp[ch >> 8][ch & 0xff];
 254             if (bb == UNMAPPABLE_ENCODING)
 255                 return encodeBig5(ch);
 256             return bb;
 257         }
 258 
 259         public int encodeSupp(int cp) {
 260             if ((cp & 0xf0000) != 0x20000)
 261                 return UNMAPPABLE_ENCODING;
 262             return c2bSupp[(cp >> 8) & 0xff][cp & 0xff];
 263         }
 264 
 265         public boolean canEncode(char c) {
 266             return encodeChar(c) != UNMAPPABLE_ENCODING;
 267         }
 268 
 269         protected CoderResult encodeArrayLoop(CharBuffer src, ByteBuffer dst) {
 270             char[] sa = src.array();
 271             int sp = src.arrayOffset() + src.position();
 272             int sl = src.arrayOffset() + src.limit();
 273 
 274             byte[] da = dst.array();
 275             int dp = dst.arrayOffset() + dst.position();
 276             int dl = dst.arrayOffset() + dst.limit();
 277 
 278             try {
 279                 while (sp < sl) {
 280                     char c = sa[sp];
 281                     int inSize = 1;
 282                     int bb = encodeChar(c);
 283                     if (bb == UNMAPPABLE_ENCODING) {
 284                         if (Character.isSurrogate(c)) {
 285                             int cp;
 286                             if ((cp = sgp().parse(c, sa, sp, sl)) < 0)
 287                                 return sgp.error();
 288                             bb = encodeSupp(cp);
 289                             if (bb == UNMAPPABLE_ENCODING)
 290                                 return CoderResult.unmappableForLength(2);
 291                             inSize = 2;
 292                         } else {
 293                             return CoderResult.unmappableForLength(1);
 294                         }
 295                     }
 296                     if (bb > MAX_SINGLEBYTE) {    // DoubleByte
 297                         if (dl - dp < 2)
 298                             return CoderResult.OVERFLOW;
 299                         da[dp++] = (byte)(bb >> 8);
 300                         da[dp++] = (byte)bb;
 301                     } else {                      // SingleByte
 302                         if (dl - dp < 1)
 303                             return CoderResult.OVERFLOW;
 304                         da[dp++] = (byte)bb;
 305                     }
 306                     sp += inSize;
 307                 }
 308                 return CoderResult.UNDERFLOW;
 309             } finally {
 310                 src.position(sp - src.arrayOffset());
 311                 dst.position(dp - dst.arrayOffset());
 312             }
 313         }
 314 
 315         protected CoderResult encodeBufferLoop(CharBuffer src, ByteBuffer dst) {
 316             int mark = src.position();
 317             try {
 318                 while (src.hasRemaining()) {
 319                     int inSize = 1;
 320                     char c = src.get();
 321                     int bb = encodeChar(c);
 322                     if (bb == UNMAPPABLE_ENCODING) {
 323                         if (Character.isSurrogate(c)) {
 324                             int cp;
 325                             if ((cp = sgp().parse(c, src)) < 0)
 326                                 return sgp.error();
 327                             bb = encodeSupp(cp);
 328                             if (bb == UNMAPPABLE_ENCODING)
 329                                 return CoderResult.unmappableForLength(2);
 330                             inSize = 2;
 331                         } else {
 332                             return CoderResult.unmappableForLength(1);
 333                         }
 334                     }
 335                     if (bb > MAX_SINGLEBYTE) {  // DoubleByte
 336                         if (dst.remaining() < 2)
 337                             return CoderResult.OVERFLOW;
 338                         dst.put((byte)(bb >> 8));
 339                         dst.put((byte)(bb));
 340                     } else {
 341                         if (dst.remaining() < 1)
 342                         return CoderResult.OVERFLOW;
 343                         dst.put((byte)bb);
 344                     }
 345                     mark += inSize;
 346                 }
 347                 return CoderResult.UNDERFLOW;
 348             } finally {
 349                 src.position(mark);
 350             }
 351         }
 352 
 353         protected CoderResult encodeLoop(CharBuffer src, ByteBuffer dst) {
 354             if (src.hasArray() && dst.hasArray())
 355                 return encodeArrayLoop(src, dst);
 356             else
 357                 return encodeBufferLoop(src, dst);
 358         }
 359 
 360         private byte[] repl = replacement();
 361         protected void implReplaceWith(byte[] newReplacement) {
 362             repl = newReplacement;
 363         }
 364 
 365         public int encode(char[] src, int sp, int len, byte[] dst) {
 366             int dp = 0;
 367             int sl = sp + len;
 368             while (sp < sl) {
 369                 char c = src[sp++];
 370                 int bb = encodeChar(c);
 371                 if (bb == UNMAPPABLE_ENCODING) {
 372                     if (!Character.isHighSurrogate(c) || sp == sl ||
 373                         !Character.isLowSurrogate(src[sp]) ||
 374                         (bb = encodeSupp(Character.toCodePoint(c, src[sp++])))
 375                         == UNMAPPABLE_ENCODING) {
 376                         dst[dp++] = repl[0];
 377                         if (repl.length > 1)
 378                             dst[dp++] = repl[1];
 379                         continue;
 380                     }
 381                 }
 382                 if (bb > MAX_SINGLEBYTE) {        // DoubleByte
 383                     dst[dp++] = (byte)(bb >> 8);
 384                     dst[dp++] = (byte)bb;
 385                 } else {                          // SingleByte
 386                     dst[dp++] = (byte)bb;
 387                 }
 388             }
 389             return dp;
 390         }
 391 
 392         public int encodeFromUTF16(byte[] src, int sp, int len, byte[] dst) {
 393             int dp = 0;
 394             int sl = sp + len;
 395             int dl = dst.length;
 396             while (sp < sl) {
 397                 char c = StringUTF16.getChar(src, sp++);
 398                 int bb = encodeChar(c);
 399                 if (bb == UNMAPPABLE_ENCODING) {
 400                     if (!Character.isHighSurrogate(c) || sp == sl ||
 401                         !Character.isLowSurrogate(StringUTF16.getChar(src,sp)) ||
 402                         (bb = encodeSupp(Character.toCodePoint(c, StringUTF16.getChar(src, sp++))))
 403                         == UNMAPPABLE_ENCODING) {
 404                         dst[dp++] = repl[0];
 405                         if (repl.length > 1)
 406                             dst[dp++] = repl[1];
 407                         continue;
 408                     }
 409                 }
 410                 if (bb > MAX_SINGLEBYTE) { // DoubleByte
 411                     dst[dp++] = (byte)(bb >> 8);
 412                     dst[dp++] = (byte)bb;
 413                 } else {                   // SingleByte
 414                     dst[dp++] = (byte)bb;
 415                 }
 416             }
 417             return dp;
 418         }
 419 
 420         static char[] C2B_UNMAPPABLE = new char[0x100];
 421         static {
 422             Arrays.fill(C2B_UNMAPPABLE, (char)UNMAPPABLE_ENCODING);
 423         }
 424 
 425         public static void initc2b(char[][] c2b, String[] b2cStr, String pua) {
 426             // init c2b/c2bSupp from b2cStr and supp
 427             int b2Min = 0x40;
 428             Arrays.fill(c2b, C2B_UNMAPPABLE);
 429             for (int b1 = 0; b1 < 0x100; b1++) {
 430                 String s = b2cStr[b1];
 431                 if (s == null)
 432                     continue;
 433                 for (int i = 0; i < s.length(); i++) {
 434                     char c = s.charAt(i);
 435                     int hi = c >> 8;
 436                     if (c2b[hi] == C2B_UNMAPPABLE) {
 437                         c2b[hi] = new char[0x100];
 438                         Arrays.fill(c2b[hi], (char)UNMAPPABLE_ENCODING);
 439                     }
 440                     c2b[hi][c & 0xff] = (char)((b1 << 8) | (i + b2Min));
 441                 }
 442             }
 443             if (pua != null) {        // add the compatibility pua entries
 444                 char c = '\ue000';    //first pua character
 445                 for (int i = 0; i < pua.length(); i++) {
 446                     char bb = pua.charAt(i);
 447                     if (bb != UNMAPPABLE_DECODING) {
 448                         int hi = c >> 8;
 449                         if (c2b[hi] == C2B_UNMAPPABLE) {
 450                             c2b[hi] = new char[0x100];
 451                             Arrays.fill(c2b[hi], (char)UNMAPPABLE_ENCODING);
 452                         }
 453                         c2b[hi][c & 0xff] = bb;
 454                     }
 455                     c++;
 456                 }
 457             }
 458         }
 459     }
 460 }