1 /* 2 * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package sun.nio.cs; 27 28 import java.nio.ByteBuffer; 29 import java.nio.CharBuffer; 30 import java.nio.charset.Charset; 31 import java.nio.charset.CharsetDecoder; 32 import java.nio.charset.CharsetEncoder; 33 import java.nio.charset.CoderResult; 34 import java.util.Arrays; 35 import sun.nio.cs.DoubleByte; 36 import sun.nio.cs.Surrogate; 37 import static sun.nio.cs.CharsetMapping.*; 38 39 public class HKSCS { 40 41 public static class Decoder extends DoubleByte.Decoder { 42 static int b2Min = 0x40; 43 static int b2Max = 0xfe; 44 45 private char[][] b2cBmp; 46 private char[][] b2cSupp; 47 private DoubleByte.Decoder big5Dec; 48 49 protected Decoder(Charset cs, 50 DoubleByte.Decoder big5Dec, 51 char[][] b2cBmp, char[][] b2cSupp) 52 { 53 // super(cs, 0.5f, 1.0f); 54 // need to extends DoubleByte.Decoder so the 55 // sun.io can use it. this implementation 56 super(cs, 0.5f, 1.0f, null, null, 0, 0, true); 57 this.big5Dec = big5Dec; 58 this.b2cBmp = b2cBmp; 59 this.b2cSupp = b2cSupp; 60 } 61 62 public char decodeSingle(int b) { 63 return big5Dec.decodeSingle(b); 64 } 65 66 public char decodeBig5(int b1, int b2) { 67 return big5Dec.decodeDouble(b1, b2); 68 } 69 70 public char decodeDouble(int b1, int b2) { 71 return b2cBmp[b1][b2 - b2Min]; 72 } 73 74 public char decodeDoubleEx(int b1, int b2) { 75 /* if the b2cSupp is null, the subclass need 76 to override the methold 77 if (b2cSupp == null) 78 return UNMAPPABLE_DECODING; 79 */ 80 return b2cSupp[b1][b2 - b2Min]; 81 } 82 83 protected CoderResult decodeArrayLoop(ByteBuffer src, CharBuffer dst) { 84 byte[] sa = src.array(); 85 int sp = src.arrayOffset() + src.position(); 86 int sl = src.arrayOffset() + src.limit(); 87 88 char[] da = dst.array(); 89 int dp = dst.arrayOffset() + dst.position(); 90 int dl = dst.arrayOffset() + dst.limit(); 91 92 try { 93 while (sp < sl) { 94 int b1 = sa[sp] & 0xff; 95 char c = decodeSingle(b1); 96 int inSize = 1, outSize = 1; 97 char[] cc = null; 98 if (c == UNMAPPABLE_DECODING) { 99 if (sl - sp < 2) 100 return CoderResult.UNDERFLOW; 101 int b2 = sa[sp + 1] & 0xff; 102 inSize++; 103 if (b2 < b2Min || b2 > b2Max) 104 return CoderResult.unmappableForLength(2); 105 c = decodeDouble(b1, b2); //bmp 106 if (c == UNMAPPABLE_DECODING) { 107 c = decodeDoubleEx(b1, b2); //supp 108 if (c == UNMAPPABLE_DECODING) { 109 c = decodeBig5(b1, b2); //big5 110 if (c == UNMAPPABLE_DECODING) 111 return CoderResult.unmappableForLength(2); 112 } else { 113 // supplementary character in u+2xxxx area 114 outSize = 2; 115 } 116 } 117 } 118 if (dl - dp < outSize) 119 return CoderResult.OVERFLOW; 120 if (outSize == 2) { 121 // supplementary characters 122 da[dp++] = Surrogate.high(0x20000 + c); 123 da[dp++] = Surrogate.low(0x20000 + c); 124 } else { 125 da[dp++] = c; 126 } 127 sp += inSize; 128 } 129 return CoderResult.UNDERFLOW; 130 } finally { 131 src.position(sp - src.arrayOffset()); 132 dst.position(dp - dst.arrayOffset()); 133 } 134 } 135 136 protected CoderResult decodeBufferLoop(ByteBuffer src, CharBuffer dst) { 137 int mark = src.position(); 138 try { 139 while (src.hasRemaining()) { 140 char[] cc = null; 141 int b1 = src.get() & 0xff; 142 int inSize = 1, outSize = 1; 143 char c = decodeSingle(b1); 144 if (c == UNMAPPABLE_DECODING) { 145 if (src.remaining() < 1) 146 return CoderResult.UNDERFLOW; 147 int b2 = src.get() & 0xff; 148 inSize++; 149 if (b2 < b2Min || b2 > b2Max) 150 return CoderResult.unmappableForLength(2); 151 c = decodeDouble(b1, b2); //bmp 152 if (c == UNMAPPABLE_DECODING) { 153 c = decodeDoubleEx(b1, b2); //supp 154 if (c == UNMAPPABLE_DECODING) { 155 c = decodeBig5(b1, b2); //big5 156 if (c == UNMAPPABLE_DECODING) 157 return CoderResult.unmappableForLength(2); 158 } else { 159 outSize = 2; 160 } 161 } 162 } 163 if (dst.remaining() < outSize) 164 return CoderResult.OVERFLOW; 165 if (outSize == 2) { 166 dst.put(Surrogate.high(0x20000 + c)); 167 dst.put(Surrogate.low(0x20000 + c)); 168 } else { 169 dst.put(c); 170 } 171 mark += inSize; 172 } 173 return CoderResult.UNDERFLOW; 174 } finally { 175 src.position(mark); 176 } 177 } 178 179 public int decode(byte[] src, int sp, int len, char[] dst) { 180 int dp = 0; 181 int sl = sp + len; 182 char repl = replacement().charAt(0); 183 while (sp < sl) { 184 int b1 = src[sp++] & 0xff; 185 char c = decodeSingle(b1); 186 if (c == UNMAPPABLE_DECODING) { 187 if (sl == sp) { 188 c = repl; 189 } else { 190 int b2 = src[sp++] & 0xff; 191 if (b2 < b2Min || b2 > b2Max) { 192 c = repl; 193 } else if ((c = decodeDouble(b1, b2)) == UNMAPPABLE_DECODING) { 194 c = decodeDoubleEx(b1, b2); //supp 195 if (c == UNMAPPABLE_DECODING) { 196 c = decodeBig5(b1, b2); //big5 197 if (c == UNMAPPABLE_DECODING) 198 c = repl; 199 } else { 200 // supplementary character in u+2xxxx area 201 dst[dp++] = Surrogate.high(0x20000 + c); 202 dst[dp++] = Surrogate.low(0x20000 + c); 203 continue; 204 } 205 } 206 } 207 } 208 dst[dp++] = c; 209 } 210 return dp; 211 } 212 213 public CoderResult decodeLoop(ByteBuffer src, CharBuffer dst) { 214 if (src.hasArray() && dst.hasArray()) 215 return decodeArrayLoop(src, dst); 216 else 217 return decodeBufferLoop(src, dst); 218 } 219 220 public static void initb2c(char[][]b2c, String[] b2cStr) 221 { 222 for (int i = 0; i < b2cStr.length; i++) { 223 if (b2cStr[i] == null) 224 b2c[i] = DoubleByte.B2C_UNMAPPABLE; 225 else 226 b2c[i] = b2cStr[i].toCharArray(); 227 } 228 } 229 230 } 231 232 public static class Encoder extends DoubleByte.Encoder { 233 private DoubleByte.Encoder big5Enc; 234 private char[][] c2bBmp; 235 private char[][] c2bSupp; 236 237 protected Encoder(Charset cs, 238 DoubleByte.Encoder big5Enc, 239 char[][] c2bBmp, 240 char[][] c2bSupp) 241 { 242 super(cs, null, null, true); 243 this.big5Enc = big5Enc; 244 this.c2bBmp = c2bBmp; 245 this.c2bSupp = c2bSupp; 246 } 247 248 public int encodeBig5(char ch) { 249 return big5Enc.encodeChar(ch); 250 } 251 252 public int encodeChar(char ch) { 253 int bb = c2bBmp[ch >> 8][ch & 0xff]; 254 if (bb == UNMAPPABLE_ENCODING) 255 return encodeBig5(ch); 256 return bb; 257 } 258 259 public int encodeSupp(int cp) { 260 if ((cp & 0xf0000) != 0x20000) 261 return UNMAPPABLE_ENCODING; 262 return c2bSupp[(cp >> 8) & 0xff][cp & 0xff]; 263 } 264 265 public boolean canEncode(char c) { 266 return encodeChar(c) != UNMAPPABLE_ENCODING; 267 } 268 269 protected CoderResult encodeArrayLoop(CharBuffer src, ByteBuffer dst) { 270 char[] sa = src.array(); 271 int sp = src.arrayOffset() + src.position(); 272 int sl = src.arrayOffset() + src.limit(); 273 274 byte[] da = dst.array(); 275 int dp = dst.arrayOffset() + dst.position(); 276 int dl = dst.arrayOffset() + dst.limit(); 277 278 try { 279 while (sp < sl) { 280 char c = sa[sp]; 281 int inSize = 1; 282 int bb = encodeChar(c); 283 if (bb == UNMAPPABLE_ENCODING) { 284 if (Character.isSurrogate(c)) { 285 int cp; 286 if ((cp = sgp().parse(c, sa, sp, sl)) < 0) 287 return sgp.error(); 288 bb = encodeSupp(cp); 289 if (bb == UNMAPPABLE_ENCODING) 290 return CoderResult.unmappableForLength(2); 291 inSize = 2; 292 } else { 293 return CoderResult.unmappableForLength(1); 294 } 295 } 296 if (bb > MAX_SINGLEBYTE) { // DoubleByte 297 if (dl - dp < 2) 298 return CoderResult.OVERFLOW; 299 da[dp++] = (byte)(bb >> 8); 300 da[dp++] = (byte)bb; 301 } else { // SingleByte 302 if (dl - dp < 1) 303 return CoderResult.OVERFLOW; 304 da[dp++] = (byte)bb; 305 } 306 sp += inSize; 307 } 308 return CoderResult.UNDERFLOW; 309 } finally { 310 src.position(sp - src.arrayOffset()); 311 dst.position(dp - dst.arrayOffset()); 312 } 313 } 314 315 protected CoderResult encodeBufferLoop(CharBuffer src, ByteBuffer dst) { 316 int mark = src.position(); 317 try { 318 while (src.hasRemaining()) { 319 int inSize = 1; 320 char c = src.get(); 321 int bb = encodeChar(c); 322 if (bb == UNMAPPABLE_ENCODING) { 323 if (Character.isSurrogate(c)) { 324 int cp; 325 if ((cp = sgp().parse(c, src)) < 0) 326 return sgp.error(); 327 bb = encodeSupp(cp); 328 if (bb == UNMAPPABLE_ENCODING) 329 return CoderResult.unmappableForLength(2); 330 inSize = 2; 331 } else { 332 return CoderResult.unmappableForLength(1); 333 } 334 } 335 if (bb > MAX_SINGLEBYTE) { // DoubleByte 336 if (dst.remaining() < 2) 337 return CoderResult.OVERFLOW; 338 dst.put((byte)(bb >> 8)); 339 dst.put((byte)(bb)); 340 } else { 341 if (dst.remaining() < 1) 342 return CoderResult.OVERFLOW; 343 dst.put((byte)bb); 344 } 345 mark += inSize; 346 } 347 return CoderResult.UNDERFLOW; 348 } finally { 349 src.position(mark); 350 } 351 } 352 353 protected CoderResult encodeLoop(CharBuffer src, ByteBuffer dst) { 354 if (src.hasArray() && dst.hasArray()) 355 return encodeArrayLoop(src, dst); 356 else 357 return encodeBufferLoop(src, dst); 358 } 359 360 private byte[] repl = replacement(); 361 protected void implReplaceWith(byte[] newReplacement) { 362 repl = newReplacement; 363 } 364 365 public int encode(char[] src, int sp, int len, byte[] dst) { 366 int dp = 0; 367 int sl = sp + len; 368 while (sp < sl) { 369 char c = src[sp++]; 370 int bb = encodeChar(c); 371 if (bb == UNMAPPABLE_ENCODING) { 372 if (!Character.isHighSurrogate(c) || sp == sl || 373 !Character.isLowSurrogate(src[sp]) || 374 (bb = encodeSupp(Character.toCodePoint(c, src[sp++]))) 375 == UNMAPPABLE_ENCODING) { 376 dst[dp++] = repl[0]; 377 if (repl.length > 1) 378 dst[dp++] = repl[1]; 379 continue; 380 } 381 } 382 if (bb > MAX_SINGLEBYTE) { // DoubleByte 383 dst[dp++] = (byte)(bb >> 8); 384 dst[dp++] = (byte)bb; 385 } else { // SingleByte 386 dst[dp++] = (byte)bb; 387 } 388 } 389 return dp; 390 } 391 392 public int encodeFromUTF16(byte[] src, int sp, int len, byte[] dst) { 393 int dp = 0; 394 int sl = sp + len; 395 int dl = dst.length; 396 while (sp < sl) { 397 char c = StringUTF16.getChar(src, sp++); 398 int bb = encodeChar(c); 399 if (bb == UNMAPPABLE_ENCODING) { 400 if (!Character.isHighSurrogate(c) || sp == sl || 401 !Character.isLowSurrogate(StringUTF16.getChar(src,sp)) || 402 (bb = encodeSupp(Character.toCodePoint(c, StringUTF16.getChar(src, sp++)))) 403 == UNMAPPABLE_ENCODING) { 404 dst[dp++] = repl[0]; 405 if (repl.length > 1) 406 dst[dp++] = repl[1]; 407 continue; 408 } 409 } 410 if (bb > MAX_SINGLEBYTE) { // DoubleByte 411 dst[dp++] = (byte)(bb >> 8); 412 dst[dp++] = (byte)bb; 413 } else { // SingleByte 414 dst[dp++] = (byte)bb; 415 } 416 } 417 return dp; 418 } 419 420 static char[] C2B_UNMAPPABLE = new char[0x100]; 421 static { 422 Arrays.fill(C2B_UNMAPPABLE, (char)UNMAPPABLE_ENCODING); 423 } 424 425 public static void initc2b(char[][] c2b, String[] b2cStr, String pua) { 426 // init c2b/c2bSupp from b2cStr and supp 427 int b2Min = 0x40; 428 Arrays.fill(c2b, C2B_UNMAPPABLE); 429 for (int b1 = 0; b1 < 0x100; b1++) { 430 String s = b2cStr[b1]; 431 if (s == null) 432 continue; 433 for (int i = 0; i < s.length(); i++) { 434 char c = s.charAt(i); 435 int hi = c >> 8; 436 if (c2b[hi] == C2B_UNMAPPABLE) { 437 c2b[hi] = new char[0x100]; 438 Arrays.fill(c2b[hi], (char)UNMAPPABLE_ENCODING); 439 } 440 c2b[hi][c & 0xff] = (char)((b1 << 8) | (i + b2Min)); 441 } 442 } 443 if (pua != null) { // add the compatibility pua entries 444 char c = '\ue000'; //first pua character 445 for (int i = 0; i < pua.length(); i++) { 446 char bb = pua.charAt(i); 447 if (bb != UNMAPPABLE_DECODING) { 448 int hi = c >> 8; 449 if (c2b[hi] == C2B_UNMAPPABLE) { 450 c2b[hi] = new char[0x100]; 451 Arrays.fill(c2b[hi], (char)UNMAPPABLE_ENCODING); 452 } 453 c2b[hi][c & 0xff] = bb; 454 } 455 c++; 456 } 457 } 458 } 459 } 460 }