1 /* 2 * Copyright (c) 2008, 2017, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 /* 25 * @test 26 * @bug 6233345 6381699 6381702 6381705 6381706 27 * @summary Encode many char sequences in many ways 28 * @library /test/lib 29 * @build jdk.test.lib.RandomFactory 30 * @run main/timeout=1200 FindEncoderBugs 31 * @author Martin Buchholz 32 * @key randomness 33 */ 34 35 import java.util.*; 36 import java.util.regex.*; 37 import java.nio.*; 38 import java.nio.charset.*; 39 import jdk.test.lib.RandomFactory; 40 41 public class FindEncoderBugs { 42 43 static boolean isBroken(String csn) { 44 if (csn.equals("x-COMPOUND_TEXT")) return true; 45 return false; 46 } 47 48 static <T extends Comparable<? super T>> List<T> sort(Collection<T> c) { 49 List<T> list = new ArrayList<T>(c); 50 Collections.sort(list); 51 return list; 52 } 53 54 static class TooManyFailures extends RuntimeException { 55 private static final long serialVersionUID = 0L; 56 } 57 58 static String string(byte[] a) { 59 final StringBuilder sb = new StringBuilder(); 60 for (byte b : a) { 61 if (sb.length() != 0) sb.append(' '); 62 sb.append(String.format("%02x", b & 0xff)); 63 } 64 return sb.toString(); 65 } 66 67 static String string(char[] a) { 68 final StringBuilder sb = new StringBuilder(); 69 for (char c : a) { 70 if (sb.length() != 0) sb.append(' '); 71 sb.append(String.format("\\u%04x", (int) c)); 72 } 73 return sb.toString(); 74 } 75 76 static class Reporter { 77 // Some machinery to make sure only a small number of errors 78 // that are "too similar" are reported. 79 static class Counts extends HashMap<String, Long> { 80 private static final long serialVersionUID = -1; 81 long inc(String signature) { 82 Long count = get(signature); 83 if (count == null) count = 0L; 84 put(signature, count+1); 85 return count+1; 86 } 87 } 88 89 final Counts failureCounts = new Counts(); 90 final static long maxFailures = 2; 91 92 final static Pattern hideBytes = Pattern.compile("\"[0-9a-f ]+\""); 93 final static Pattern hideChars = Pattern.compile("\\\\u[0-9a-f]{4}"); 94 95 boolean bug(String format, Object... args) { 96 String signature = String.format(format, args); 97 // signature = hideBytes.matcher(signature).replaceAll("\"??\""); 98 // signature = hideChars.matcher(signature).replaceAll("\\u????"); 99 failed++; 100 if (failureCounts.inc(signature) <= maxFailures) { 101 System.out.printf(format, args); 102 System.out.println(); 103 return true; 104 } 105 return false; 106 } 107 108 void summarize() { 109 for (String key : sort(failureCounts.keySet())) 110 System.out.printf("-----%n%s%nfailures=%d%n", 111 key, failureCounts.get(key)); 112 } 113 } 114 115 static final Reporter reporter = new Reporter(); 116 117 static class Result { 118 final int limit; 119 final int ipos; 120 final boolean direct; 121 final char[] ia; 122 final byte[] oa; 123 final CoderResult cr; 124 125 private static byte[] toByteArray(ByteBuffer bb) { 126 byte[] bytes = new byte[bb.position()]; 127 for (int i = 0; i < bytes.length; i++) 128 bytes[i] = bb.get(i); 129 return bytes; 130 } 131 132 Result(CharBuffer ib, ByteBuffer ob, CoderResult cr) { 133 ipos = ib.position(); 134 ia = toArray(ib); 135 oa = toArray(ob); 136 direct = ib.isDirect(); 137 limit = ob.limit(); 138 this.cr = cr; 139 } 140 141 static char[] toArray(CharBuffer b) { 142 int pos = b.position(); 143 char[] a = new char[b.limit()]; 144 b.position(0); 145 b.get(a); 146 b.position(pos); 147 return a; 148 } 149 150 static byte[] toArray(ByteBuffer b) { 151 byte[] a = new byte[b.position()]; 152 b.position(0); 153 b.get(a); 154 return a; 155 } 156 157 static boolean eq(Result x, Result y) { 158 return x == y || 159 (x != null && y != null && 160 (Arrays.equals(x.oa, y.oa) && 161 x.ipos == y.ipos && 162 x.cr == y.cr)); 163 } 164 165 public String toString() { 166 return String.format("\"%s\"[%d/%d] => %s \"%s\"[%d/%d]%s", 167 string(ia), ipos, ia.length, 168 cr, string(oa), oa.length, limit, 169 (direct ? " (direct)" : "")); 170 } 171 } 172 173 static class CharsetTester { 174 private final Charset cs; 175 private final boolean hasBom; 176 private static final int maxFailures = 5; 177 private int failures = 0; 178 // private static final long maxCharsetFailures = Long.MAX_VALUE; 179 private static final long maxCharsetFailures = 10000L; 180 private final long failed0 = failed; 181 182 // legend: r=regular d=direct In=Input Ou=Output 183 static final int maxBufSize = 20; 184 static final CharBuffer[] rInBuffers = new CharBuffer[maxBufSize]; 185 static final CharBuffer[] dInBuffers = new CharBuffer[maxBufSize]; 186 187 static final ByteBuffer[] rOuBuffers = new ByteBuffer[maxBufSize]; 188 static final ByteBuffer[] dOuBuffers = new ByteBuffer[maxBufSize]; 189 static { 190 for (int i = 0; i < maxBufSize; i++) { 191 rInBuffers[i] = CharBuffer.allocate(i); 192 dInBuffers[i] = ByteBuffer.allocateDirect(i*2).asCharBuffer(); 193 rOuBuffers[i] = ByteBuffer.allocate(i); 194 dOuBuffers[i] = ByteBuffer.allocateDirect(i); 195 } 196 } 197 198 CharsetTester(Charset cs) { 199 this.cs = cs; 200 this.hasBom = 201 cs.name().matches(".*BOM.*") || 202 cs.name().equals("UTF-16"); 203 } 204 205 static boolean bug(String format, Object... args) { 206 return reporter.bug(format, args); 207 } 208 209 static boolean hasBom(byte[] a) { 210 switch (a.length) { 211 case 2: case 4: 212 int sum = 0; 213 for (byte x : a) 214 sum += x; 215 return sum == (byte) 0xfe + (byte) 0xff; 216 default: return false; 217 } 218 } 219 220 void testSurrogates() { 221 int failures = 0; 222 for (int i = 0; i < 10; i++) { 223 Result r = test(new char[] { randomHighSurrogate() }); 224 if (r == null) break; 225 if (! (r.cr.isUnderflow() && 226 r.ipos == 0)) 227 bug("Lone high surrogate not UNDERFLOW: %s %s", 228 cs, r); 229 } 230 for (int i = 0; i < 10; i++) { 231 Result r = test(new char[] { randomLowSurrogate() }); 232 if (r == null) break; 233 if (! (r.cr.isMalformed() && r.cr.length() == 1)) 234 bug("Lone low surrogate not MALFORMED[1]: %s %s", 235 cs, r); 236 } 237 char[] chars = new char[2]; 238 for (int i = 0; i < 10; i++) { 239 chars[0] = randomLowSurrogate(); // Always illegal 240 chars[1] = randomChar(); 241 Result r = test(chars); 242 if (r == null) break; 243 if (! (r.cr.isMalformed() && 244 r.cr.length() == 1 && 245 (r.ipos == 0 || (hasBom && hasBom(r.oa))))) { 246 if (failures++ > 5) return; 247 bug("Unpaired low surrogate not MALFORMED[1]: %s %s", 248 cs, r); 249 } 250 } 251 for (int i = 0; i < 10; i++) { 252 chars[0] = randomHighSurrogate(); 253 do { 254 chars[1] = randomChar(); 255 } while (Character.isLowSurrogate(chars[1])); 256 Result r = test(chars); 257 if (r == null) break; 258 if (! (r.cr.isMalformed() && 259 r.cr.length() == 1 && 260 (r.ipos == 0 || (hasBom && hasBom(r.oa))))) { 261 if (failures++ > 5) return; 262 bug("Unpaired high surrogate not MALFORMED[1]: %s %s", 263 cs, r); 264 } 265 } 266 for (int i = 0; i < 1000; i++) { 267 chars[0] = randomHighSurrogate(); 268 chars[1] = randomLowSurrogate(); 269 Result r = test(chars); 270 if (r == null) break; 271 if (! ((r.cr.isUnmappable() && 272 r.cr.length() == 2 && 273 r.oa.length == 0) 274 || 275 (r.cr.isUnderflow() && 276 r.oa.length > 0 && 277 r.ipos == 2))) { 278 if (failures++ > 5) return; 279 bug("Legal supplementary character bug: %s %s", 280 cs, r); 281 } 282 } 283 } 284 285 // if (! (r.cr.isMalformed() && 286 // r.cr.length() == 1 && 287 // (rob.position() == 0 || hasBom(rob)))) { 288 // if (failures++ > 5) return; 289 // bug("Unpaired surrogate not malformed: %s %s", 290 // cs, r); 291 // } 292 // } 293 294 // dib.clear(); dib.put(chars); dib.flip(); 295 // rib.position(0); 296 // rob.clear(); rob.limit(lim); 297 // for (CharBuffer ib : new CharBuffer[] { rib, dib }) { 298 // Result r = recode(ib, rob); 299 // if (! (r.cr.isMalformed() && 300 // r.cr.length() == 1 && 301 // (rob.position() == 0 || hasBom(rob)))) { 302 // if (failures++ > 5) return; 303 // bug("Unpaired surrogate not malformed: %s %s", 304 // cs, r); 305 // } 306 // } 307 // //} 308 // for (int i = 0; i < 10000; i++) { 309 // chars[0] = randomHighSurrogate(); 310 // chars[1] = randomLowSurrogate(); 311 // dib.clear(); dib.put(chars); dib.flip(); 312 // rib.position(0); 313 // rob.clear(); rob.limit(lim); 314 // for (CharBuffer ib : new CharBuffer[] { rib, dib }) { 315 // Result r = recode(ib, rob); 316 // if (! ((r.cr.isUnmappable() && 317 // r.cr.length() == 2 && 318 // rob.position() == 0) 319 // || 320 // (r.cr.isUnderflow() && 321 // rob.position() > 0 && 322 // ib.position() == 2))) { 323 // if (failures++ > 5) return; 324 // bug("Legal supplementary character bug: %s %s", 325 // cs, r); 326 // } 327 // } 328 // } 329 // } 330 // } 331 332 Result recode(CharBuffer ib, ByteBuffer ob) { 333 try { 334 byte canary = 22; 335 ib.clear(); // Prepare to read 336 ob.clear(); // Prepare to write 337 for (int i = 0; i < ob.limit(); i++) 338 ob.put(i, canary); 339 CharsetEncoder coder = cs.newEncoder(); 340 CoderResult cr = coder.encode(ib, ob, false); 341 equal(ib.limit(), ib.capacity()); 342 equal(ob.limit(), ob.capacity()); 343 Result r = new Result(ib, ob, cr); 344 if (cr.isError()) 345 check(cr.length() > 0); 346 if (cr.isOverflow() && ob.remaining() > 10) 347 bug("OVERFLOW, but there's lots of room: %s %s", 348 cs, r); 349 // if (cr.isOverflow() && ib.remaining() == 0 && ! hasBom) 350 // bug("OVERFLOW, yet remaining() == 0: %s %s", 351 // cs, r); 352 if (cr.isError() && ib.remaining() < cr.length()) 353 bug("remaining() < CoderResult.length(): %s %s", 354 cs, r); 355 // if (ib.position() == 0 356 // && ob.position() > 0 357 // && ! hasBom(r.oa)) 358 // bug("output only if input consumed: %s %s", 359 // cs, r); 360 CoderResult cr2 = coder.encode(ib, ob, false); 361 if (ib.position() != r.ipos || 362 ob.position() != r.oa.length || 363 cr != cr2) 364 bug("Coding operation not idempotent: %s%n %s%n %s", 365 cs, r, new Result(ib, ob, cr2)); 366 if (ob.position() < ob.limit() && 367 ob.get(ob.position()) != canary) 368 bug("Buffer overrun: %s %s %s", 369 cs, r, ob.get(ob.position())); 370 return r; 371 } catch (Throwable t) { 372 if (bug("Unexpected exception: %s %s %s", 373 cs, t.getClass().getSimpleName(), 374 new Result(ib, ob, null))) 375 t.printStackTrace(); 376 return null; 377 } 378 } 379 380 Result recode2(char[] ia, int n) { 381 int len = ia.length; 382 CharBuffer rib = CharBuffer.wrap(ia); 383 CharBuffer dib = dInBuffers[len]; 384 dib.clear(); dib.put(ia); dib.clear(); 385 ByteBuffer rob = rOuBuffers[n]; 386 ByteBuffer dob = dOuBuffers[n]; 387 equal(rob.limit(), n); 388 equal(dob.limit(), n); 389 check(dib.isDirect()); 390 check(dob.isDirect()); 391 Result r1 = recode(rib, rob); 392 Result r2 = recode(dib, dob); 393 if (r1 != null && r2 != null && ! Result.eq(r1, r2)) 394 bug("Results differ for direct buffers: %s%n %s%n %s", 395 cs, r1, r2); 396 return r1; 397 } 398 399 Result test(char[] ia) { 400 if (failed - failed0 >= maxCharsetFailures) 401 throw new TooManyFailures(); 402 403 Result roomy = recode2(ia, maxBufSize - 1); 404 if (roomy == null) return roomy; 405 int olen = roomy.oa.length; 406 if (olen > 0) { 407 if (roomy.ipos == roomy.ia.length) { 408 Result perfectFit = recode2(ia, olen); 409 if (! Result.eq(roomy, perfectFit)) 410 bug("Results differ: %s%n %s%n %s", 411 cs, roomy, perfectFit); 412 } 413 for (int i = 0; i < olen; i++) { 414 Result claustrophobic = recode2(ia, i); 415 if (claustrophobic == null) return roomy; 416 if (roomy.cr.isUnderflow() && 417 ! claustrophobic.cr.isOverflow()) 418 bug("Expected OVERFLOW: %s%n %s%n %s", 419 cs, roomy, claustrophobic); 420 } 421 } 422 return roomy; 423 } 424 425 void testExhaustively(char[] prefix, int n) { 426 int len = prefix.length; 427 char[] ia = Arrays.copyOf(prefix, len + 1); 428 for (int i = 0; i < 0x10000; i++) { 429 ia[len] = (char) i; 430 if (n == 1) 431 test(ia); 432 else 433 testExhaustively(ia, n - 1); 434 } 435 } 436 437 void testRandomly(char[] prefix, int n) { 438 int len = prefix.length; 439 char[] ia = Arrays.copyOf(prefix, len + n); 440 for (int i = 0; i < 10000; i++) { 441 for (int j = 0; j < n; j++) 442 ia[len + j] = randomChar(); 443 test(ia); 444 } 445 } 446 447 void testPrefix(char[] prefix) { 448 if (prefix.length > 0) 449 System.out.printf("Testing prefix %s%n", string(prefix)); 450 451 test(prefix); 452 453 testExhaustively(prefix, 1); 454 // Can you spare a year of CPU time? 455 //testExhaustively(prefix, 2); 456 457 testRandomly(prefix, 2); 458 testRandomly(prefix, 3); 459 } 460 } 461 462 private final static Random rnd = RandomFactory.getRandom(); 463 private static char randomChar() { 464 return (char) rnd.nextInt(Character.MAX_VALUE); 465 } 466 private static char randomHighSurrogate() { 467 return (char) (Character.MIN_HIGH_SURROGATE + rnd.nextInt(1024)); 468 } 469 private static char randomLowSurrogate() { 470 return (char) (Character.MIN_LOW_SURROGATE + rnd.nextInt(1024)); 471 } 472 473 private static void testCharset(Charset cs) throws Throwable { 474 if (! cs.canEncode()) 475 return; 476 477 final String csn = cs.name(); 478 479 if (isBroken(csn)) { 480 System.out.printf("Skipping possibly broken charset %s%n", csn); 481 return; 482 } 483 System.out.println(csn); 484 485 CharsetTester tester = new CharsetTester(cs); 486 487 tester.testSurrogates(); 488 489 tester.testPrefix(new char[] {}); 490 491 if (csn.equals("x-ISCII91")) { 492 System.out.println("More ISCII testing..."); 493 new CharsetTester(cs).testPrefix(new char[]{'\u094d'}); // Halant 494 new CharsetTester(cs).testPrefix(new char[]{'\u093c'}); // Nukta 495 } 496 } 497 498 private static void realMain(String[] args) { 499 for (Charset cs : sort(Charset.availableCharsets().values())) { 500 try { 501 testCharset(cs); 502 } catch (TooManyFailures e) { 503 System.out.printf("Too many failures for %s%n", cs); 504 } catch (Throwable t) { 505 unexpected(t); 506 } 507 } 508 reporter.summarize(); 509 } 510 511 //--------------------- Infrastructure --------------------------- 512 static volatile long passed = 0, failed = 0; 513 static void pass() {passed++;} 514 static void fail() {failed++; Thread.dumpStack();} 515 static void fail(String format, Object... args) { 516 System.out.println(String.format(format, args)); failed++;} 517 static void fail(String msg) {System.out.println(msg); fail();} 518 static void unexpected(Throwable t) {failed++; t.printStackTrace();} 519 static void check(boolean cond) {if (cond) pass(); else fail();} 520 static void equal(Object x, Object y) { 521 if (x == null ? y == null : x.equals(y)) pass(); 522 else fail(x + " not equal to " + y);} 523 public static void main(String[] args) throws Throwable { 524 try {realMain(args);} catch (Throwable t) {unexpected(t);} 525 System.out.printf("%nPassed = %d, failed = %d%n%n", passed, failed); 526 if (failed > 0) throw new AssertionError("Some tests failed");} 527 }