1 /* 2 * Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 /* 25 * @test 26 * @bug 6831794 6229811 27 * @summary Test EUC_TW charset 28 * @modules java.base/sun.nio.cs 29 */ 30 31 import java.nio.charset.*; 32 import java.nio.*; 33 import java.util.*; 34 35 public class TestEUC_TW { 36 37 static class Time { 38 long t; 39 } 40 static int iteration = 100; 41 42 static char[] decode(byte[] bb, Charset cs, boolean testDirect, Time t) 43 throws Exception { 44 String csn = cs.name(); 45 CharsetDecoder dec = cs.newDecoder(); 46 ByteBuffer bbf; 47 CharBuffer cbf; 48 if (testDirect) { 49 bbf = ByteBuffer.allocateDirect(bb.length); 50 cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer(); 51 bbf.put(bb); 52 } else { 53 bbf = ByteBuffer.wrap(bb); 54 cbf = CharBuffer.allocate(bb.length); 55 } 56 CoderResult cr = null; 57 long t1 = System.nanoTime()/1000; 58 for (int i = 0; i < iteration; i++) { 59 bbf.rewind(); 60 cbf.clear(); 61 dec.reset(); 62 cr = dec.decode(bbf, cbf, true); 63 } 64 long t2 = System.nanoTime()/1000; 65 if (t != null) 66 t.t = (t2 - t1)/iteration; 67 if (cr != CoderResult.UNDERFLOW) { 68 System.out.println("DEC-----------------"); 69 int pos = bbf.position(); 70 System.out.printf(" cr=%s, bbf.pos=%d, bb[pos]=%x,%x,%x,%x%n", 71 cr.toString(), pos, 72 bb[pos++]&0xff, bb[pos++]&0xff,bb[pos++]&0xff, bb[pos++]&0xff); 73 throw new RuntimeException("Decoding err: " + csn); 74 } 75 char[] cc = new char[cbf.position()]; 76 cbf.flip(); cbf.get(cc); 77 return cc; 78 79 } 80 81 static CoderResult decodeCR(byte[] bb, Charset cs, boolean testDirect) 82 throws Exception { 83 CharsetDecoder dec = cs.newDecoder(); 84 ByteBuffer bbf; 85 CharBuffer cbf; 86 if (testDirect) { 87 bbf = ByteBuffer.allocateDirect(bb.length); 88 cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer(); 89 bbf.put(bb).flip(); 90 } else { 91 bbf = ByteBuffer.wrap(bb); 92 cbf = CharBuffer.allocate(bb.length); 93 } 94 return dec.decode(bbf, cbf, true); 95 } 96 97 static byte[] encode(char[] cc, Charset cs, boolean testDirect, Time t) 98 throws Exception { 99 ByteBuffer bbf; 100 CharBuffer cbf; 101 CharsetEncoder enc = cs.newEncoder(); 102 String csn = cs.name(); 103 if (testDirect) { 104 bbf = ByteBuffer.allocateDirect(cc.length * 4); 105 cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer(); 106 cbf.put(cc).flip(); 107 } else { 108 bbf = ByteBuffer.allocate(cc.length * 4); 109 cbf = CharBuffer.wrap(cc); 110 } 111 CoderResult cr = null; 112 long t1 = System.nanoTime()/1000; 113 for (int i = 0; i < iteration; i++) { 114 cbf.rewind(); 115 bbf.clear(); 116 enc.reset(); 117 cr = enc.encode(cbf, bbf, true); 118 } 119 long t2 = System.nanoTime()/1000; 120 if (t != null) 121 t.t = (t2 - t1)/iteration; 122 if (cr != CoderResult.UNDERFLOW) { 123 System.out.println("ENC-----------------"); 124 int pos = cbf.position(); 125 System.out.printf(" cr=%s, cbf.pos=%d, cc[pos]=%x%n", 126 cr.toString(), pos, cc[pos]&0xffff); 127 throw new RuntimeException("Encoding err: " + csn); 128 } 129 byte[] bb = new byte[bbf.position()]; 130 bbf.flip(); bbf.get(bb); 131 return bb; 132 } 133 134 static CoderResult encodeCR(char[] cc, Charset cs, boolean testDirect) 135 throws Exception { 136 ByteBuffer bbf; 137 CharBuffer cbf; 138 CharsetEncoder enc = cs.newEncoder(); 139 if (testDirect) { 140 bbf = ByteBuffer.allocateDirect(cc.length * 4); 141 cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer(); 142 cbf.put(cc).flip(); 143 } else { 144 bbf = ByteBuffer.allocate(cc.length * 4); 145 cbf = CharBuffer.wrap(cc); 146 } 147 return enc.encode(cbf, bbf, true); 148 } 149 150 static char[] getEUC_TWChars(boolean skipNR) { 151 //CharsetEncoder encOLD = Charset.forName("EUC_TW_OLD").newEncoder(); 152 CharsetEncoder encOLD = new EUC_TW_OLD().newEncoder(); 153 CharsetEncoder enc = Charset.forName("EUC_TW").newEncoder(); 154 char[] cc = new char[0x20000]; 155 char[] c2 = new char[2]; 156 int pos = 0; 157 int i = 0; 158 //bmp 159 for (i = 0; i < 0x10000; i++) { 160 //SKIP these 3 NR codepoints if compared to EUC_TW 161 if (skipNR && (i == 0x4ea0 || i == 0x51ab || i == 0x52f9)) 162 continue; 163 if (encOLD.canEncode((char)i) != enc.canEncode((char)i)) { 164 System.out.printf(" Err i=%x: old=%b new=%b%n", i, 165 encOLD.canEncode((char)i), 166 enc.canEncode((char)i)); 167 throw new RuntimeException("canEncode() err!"); 168 } 169 170 if (enc.canEncode((char)i)) { 171 cc[pos++] = (char)i; 172 } 173 } 174 175 //supp 176 CharBuffer cb = CharBuffer.wrap(new char[2]); 177 for (i = 0x20000; i < 0x30000; i++) { 178 Character.toChars(i, c2, 0); 179 cb.clear();cb.put(c2[0]);cb.put(c2[1]);cb.flip(); 180 181 if (encOLD.canEncode(cb) != enc.canEncode(cb)) { 182 throw new RuntimeException("canEncode() err!"); 183 } 184 185 if (enc.canEncode(cb)) { 186 //System.out.printf("cp=%x, (%x, %x) %n", i, c2[0] & 0xffff, c2[1] & 0xffff); 187 cc[pos++] = c2[0]; 188 cc[pos++] = c2[1]; 189 } 190 } 191 192 return Arrays.copyOf(cc, pos); 193 } 194 195 static void checkRoundtrip(Charset cs) throws Exception { 196 char[] cc = getEUC_TWChars(false); 197 System.out.printf("Check roundtrip <%s>...", cs.name()); 198 byte[] bb = encode(cc, cs, false, null); 199 char[] ccO = decode(bb, cs, false, null); 200 201 if (!Arrays.equals(cc, ccO)) { 202 System.out.printf(" non-direct failed"); 203 } 204 bb = encode(cc, cs, true, null); 205 ccO = decode(bb, cs, true, null); 206 if (!Arrays.equals(cc, ccO)) { 207 System.out.printf(" (direct) failed"); 208 } 209 System.out.println(); 210 } 211 212 static void checkInit(String csn) throws Exception { 213 System.out.printf("Check init <%s>...%n", csn); 214 Charset.forName("Big5"); // load in the ExtendedCharsets 215 long t1 = System.nanoTime()/1000; 216 Charset cs = Charset.forName(csn); 217 long t2 = System.nanoTime()/1000; 218 System.out.printf(" charset :%d%n", t2 - t1); 219 t1 = System.nanoTime()/1000; 220 cs.newDecoder(); 221 t2 = System.nanoTime()/1000; 222 System.out.printf(" new Decoder :%d%n", t2 - t1); 223 224 t1 = System.nanoTime()/1000; 225 cs.newEncoder(); 226 t2 = System.nanoTime()/1000; 227 System.out.printf(" new Encoder :%d%n", t2 - t1); 228 } 229 230 static void compare(Charset cs1, Charset cs2) throws Exception { 231 char[] cc = getEUC_TWChars(true); 232 233 String csn1 = cs1.name(); 234 String csn2 = cs2.name(); 235 System.out.printf("Diff <%s> <%s>...%n", csn1, csn2); 236 237 Time t1 = new Time(); 238 Time t2 = new Time(); 239 240 byte[] bb1 = encode(cc, cs1, false, t1); 241 byte[] bb2 = encode(cc, cs2, false, t2); 242 243 System.out.printf(" Encoding TimeRatio %s/%s: %d,%d :%f%n", 244 csn2, csn1, 245 t2.t, t1.t, 246 (double)(t2.t)/(t1.t)); 247 if (!Arrays.equals(bb1, bb2)) { 248 System.out.printf(" encoding failed%n"); 249 } 250 251 char[] cc2 = decode(bb1, cs2, false, t2); 252 char[] cc1 = decode(bb1, cs1, false, t1); 253 System.out.printf(" Decoding TimeRatio %s/%s: %d,%d :%f%n", 254 csn2, csn1, 255 t2.t, t1.t, 256 (double)(t2.t)/(t1.t)); 257 if (!Arrays.equals(cc1, cc2)) { 258 System.out.printf(" decoding failed%n"); 259 } 260 261 bb1 = encode(cc, cs1, true, t1); 262 bb2 = encode(cc, cs2, true, t2); 263 264 System.out.printf(" Encoding(dir) TimeRatio %s/%s: %d,%d :%f%n", 265 csn2, csn1, 266 t2.t, t1.t, 267 (double)(t2.t)/(t1.t)); 268 269 if (!Arrays.equals(bb1, bb2)) 270 System.out.printf(" encoding (direct) failed%n"); 271 272 cc1 = decode(bb1, cs1, true, t1); 273 cc2 = decode(bb1, cs2, true, t2); 274 System.out.printf(" Decoding(dir) TimeRatio %s/%s: %d,%d :%f%n", 275 csn2, csn1, 276 t2.t, t1.t, 277 (double)(t2.t)/(t1.t)); 278 if (!Arrays.equals(cc1, cc2)) { 279 System.out.printf(" decoding (direct) failed%n"); 280 } 281 } 282 283 // The first byte is the length of malformed bytes 284 static byte[][] malformed = { 285 //{5, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0x80, (byte)0xC0 }, 286 }; 287 288 static void checkMalformed(Charset cs) throws Exception { 289 boolean failed = false; 290 String csn = cs.name(); 291 System.out.printf("Check malformed <%s>...%n", csn); 292 for (boolean direct: new boolean[] {false, true}) { 293 for (byte[] bins : malformed) { 294 int mlen = bins[0]; 295 byte[] bin = Arrays.copyOfRange(bins, 1, bins.length); 296 CoderResult cr = decodeCR(bin, cs, direct); 297 String ashex = ""; 298 for (int i = 0; i < bin.length; i++) { 299 if (i > 0) ashex += " "; 300 ashex += Integer.toBinaryString((int)bin[i] & 0xff); 301 } 302 if (!cr.isMalformed()) { 303 System.out.printf(" FAIL(direct=%b): [%s] not malformed.\n", direct, ashex); 304 failed = true; 305 } else if (cr.length() != mlen) { 306 System.out.printf(" FAIL(direct=%b): [%s] malformed[len=%d].\n", direct, ashex, cr.length()); 307 failed = true; 308 } 309 } 310 } 311 if (failed) 312 throw new RuntimeException("Check malformed failed " + csn); 313 } 314 315 static boolean check(CharsetDecoder dec, byte[] bytes, boolean direct, int[] flow) { 316 int inPos = flow[0]; 317 int inLen = flow[1]; 318 int outPos = flow[2]; 319 int outLen = flow[3]; 320 int expedInPos = flow[4]; 321 int expedOutPos = flow[5]; 322 CoderResult expedCR = (flow[6]==0)?CoderResult.UNDERFLOW 323 :CoderResult.OVERFLOW; 324 ByteBuffer bbf; 325 CharBuffer cbf; 326 if (direct) { 327 bbf = ByteBuffer.allocateDirect(inPos + bytes.length); 328 cbf = ByteBuffer.allocateDirect((outPos + outLen)*2).asCharBuffer(); 329 } else { 330 bbf = ByteBuffer.allocate(inPos + bytes.length); 331 cbf = CharBuffer.allocate(outPos + outLen); 332 } 333 bbf.position(inPos); 334 bbf.put(bytes).flip().position(inPos).limit(inPos + inLen); 335 cbf.position(outPos); 336 dec.reset(); 337 CoderResult cr = dec.decode(bbf, cbf, false); 338 if (cr != expedCR || 339 bbf.position() != expedInPos || 340 cbf.position() != expedOutPos) { 341 System.out.printf("Expected(direct=%5b): [", direct); 342 for (int i:flow) System.out.print(" " + i); 343 System.out.println("] CR=" + cr + 344 ", inPos=" + bbf.position() + 345 ", outPos=" + cbf.position()); 346 return false; 347 } 348 return true; 349 } 350 351 static void checkUnderOverflow(Charset cs) throws Exception { 352 String csn = cs.name(); 353 System.out.printf("Check under/overflow <%s>...%n", csn); 354 CharsetDecoder dec = cs.newDecoder(); 355 boolean failed = false; 356 //7f, a1a1, 8ea2a1a1, 8ea3a1a1, 8ea7a1a1 357 //0 1 2 3 7 11 358 byte[] bytes = new String("\u007f\u3000\u4e42\u4e28\ud840\udc55").getBytes("EUC_TW"); 359 int inlen = bytes.length; 360 361 int MAXOFF = 20; 362 for (int inoff = 0; inoff < MAXOFF; inoff++) { 363 for (int outoff = 0; outoff < MAXOFF; outoff++) { 364 int[][] Flows = { 365 //inpos, inLen, outPos, outLen, inPosEP, outposEP, under(0)/over(1) 366 //overflow 367 {inoff, inlen, outoff, 1, inoff + 1, outoff + 1, 1}, 368 {inoff, inlen, outoff, 2, inoff + 3, outoff + 2, 1}, 369 {inoff, inlen, outoff, 3, inoff + 7, outoff + 3, 1}, 370 {inoff, inlen, outoff, 4, inoff + 11, outoff + 4, 1}, 371 {inoff, inlen, outoff, 5, inoff + 11, outoff + 4, 1}, 372 {inoff, inlen, outoff, 6, inoff + 15, outoff + 6, 0}, 373 //underflow 374 {inoff, 1, outoff, 6, inoff + 1, outoff + 1, 0}, 375 {inoff, 2, outoff, 6, inoff + 1, outoff + 1, 0}, 376 {inoff, 3, outoff, 6, inoff + 3, outoff + 2, 0}, 377 {inoff, 4, outoff, 6, inoff + 3, outoff + 2, 0}, 378 {inoff, 5, outoff, 6, inoff + 3, outoff + 2, 0}, 379 {inoff, 8, outoff, 6, inoff + 7, outoff + 3, 0}, 380 {inoff, 9, outoff, 6, inoff + 7, outoff + 3, 0}, 381 {inoff, 10, outoff, 6, inoff + 7, outoff + 3, 0}, 382 {inoff, 11, outoff, 6, inoff +11, outoff + 4, 0}, 383 {inoff, 12, outoff, 6, inoff +11, outoff + 4, 0}, 384 {inoff, 15, outoff, 6, inoff +15, outoff + 6, 0}, 385 // 2-byte under/overflow 386 {inoff, 2, outoff, 1, inoff + 1, outoff + 1, 0}, 387 {inoff, 3, outoff, 1, inoff + 1, outoff + 1, 1}, 388 {inoff, 3, outoff, 2, inoff + 3, outoff + 2, 0}, 389 // 4-byte under/overflow 390 {inoff, 4, outoff, 2, inoff + 3, outoff + 2, 0}, 391 {inoff, 5, outoff, 2, inoff + 3, outoff + 2, 0}, 392 {inoff, 6, outoff, 2, inoff + 3, outoff + 2, 0}, 393 {inoff, 7, outoff, 2, inoff + 3, outoff + 2, 1}, 394 {inoff, 7, outoff, 3, inoff + 7, outoff + 3, 0}, 395 // 4-byte under/overflow 396 {inoff, 8, outoff, 3, inoff + 7, outoff + 3, 0}, 397 {inoff, 9, outoff, 3, inoff + 7, outoff + 3, 0}, 398 {inoff, 10, outoff, 3, inoff + 7, outoff + 3, 0}, 399 {inoff, 11, outoff, 3, inoff + 7, outoff + 3, 1}, 400 {inoff, 11, outoff, 4, inoff +11, outoff + 4, 0}, 401 // 4-byte/supp under/overflow 402 {inoff, 11, outoff, 4, inoff +11, outoff + 4, 0}, 403 {inoff, 12, outoff, 4, inoff +11, outoff + 4, 0}, 404 {inoff, 13, outoff, 4, inoff +11, outoff + 4, 0}, 405 {inoff, 14, outoff, 4, inoff +11, outoff + 4, 0}, 406 {inoff, 15, outoff, 4, inoff +11, outoff + 4, 1}, 407 {inoff, 15, outoff, 5, inoff +11, outoff + 4, 1}, 408 {inoff, 15, outoff, 6, inoff +15, outoff + 6, 0}, 409 }; 410 for (boolean direct: new boolean[] {false, true}) { 411 for (int[] flow: Flows) { 412 if (!check(dec, bytes, direct, flow)) 413 failed = true; 414 } 415 }}} 416 if (failed) 417 throw new RuntimeException("Check under/overflow failed " + csn); 418 } 419 420 public static void main(String[] args) throws Exception { 421 // be the first one 422 //checkInit("EUC_TW_OLD"); 423 checkInit("EUC_TW"); 424 Charset euctw = Charset.forName("EUC_TW"); 425 checkRoundtrip(euctw); 426 compare(euctw, new EUC_TW_OLD()); 427 checkMalformed(euctw); 428 checkUnderOverflow(euctw); 429 } 430 }