1 /* 2 * Copyright (c) 2008, 2017, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 /* 25 * @test 26 * @bug 6233345 6381699 6381702 6381705 6381706 27 * @summary Encode many char sequences in many ways 28 * @library /test/lib 29 * @run main/timeout=1200 FindEncoderBugs 30 * @author Martin Buchholz 31 * @key randomness 32 */ 33 34 import java.util.*; 35 import java.util.regex.*; 36 import java.nio.*; 37 import java.nio.charset.*; 38 import jdk.test.lib.RandomFactory; 39 40 public class FindEncoderBugs { 41 42 static boolean isBroken(String csn) { 43 if (csn.equals("x-COMPOUND_TEXT")) return true; 44 return false; 45 } 46 47 static <T extends Comparable<? super T>> List<T> sort(Collection<T> c) { 48 List<T> list = new ArrayList<T>(c); 49 Collections.sort(list); 50 return list; 51 } 52 53 static class TooManyFailures extends RuntimeException { 54 private static final long serialVersionUID = 0L; 55 } 56 57 static String string(byte[] a) { 58 final StringBuilder sb = new StringBuilder(); 59 for (byte b : a) { 60 if (sb.length() != 0) sb.append(' '); 61 sb.append(String.format("%02x", b & 0xff)); 62 } 63 return sb.toString(); 64 } 65 66 static String string(char[] a) { 67 final StringBuilder sb = new StringBuilder(); 68 for (char c : a) { 69 if (sb.length() != 0) sb.append(' '); 70 sb.append(String.format("\\u%04x", (int) c)); 71 } 72 return sb.toString(); 73 } 74 75 static class Reporter { 76 // Some machinery to make sure only a small number of errors 77 // that are "too similar" are reported. 78 static class Counts extends HashMap<String, Long> { 79 private static final long serialVersionUID = -1; 80 long inc(String signature) { 81 Long count = get(signature); 82 if (count == null) count = 0L; 83 put(signature, count+1); 84 return count+1; 85 } 86 } 87 88 final Counts failureCounts = new Counts(); 89 final static long maxFailures = 2; 90 91 final static Pattern hideBytes = Pattern.compile("\"[0-9a-f ]+\""); 92 final static Pattern hideChars = Pattern.compile("\\\\u[0-9a-f]{4}"); 93 94 boolean bug(String format, Object... args) { 95 String signature = String.format(format, args); 96 // signature = hideBytes.matcher(signature).replaceAll("\"??\""); 97 // signature = hideChars.matcher(signature).replaceAll("\\u????"); 98 failed++; 99 if (failureCounts.inc(signature) <= maxFailures) { 100 System.out.printf(format, args); 101 System.out.println(); 102 return true; 103 } 104 return false; 105 } 106 107 void summarize() { 108 for (String key : sort(failureCounts.keySet())) 109 System.out.printf("-----%n%s%nfailures=%d%n", 110 key, failureCounts.get(key)); 111 } 112 } 113 114 static final Reporter reporter = new Reporter(); 115 116 static class Result { 117 final int limit; 118 final int ipos; 119 final boolean direct; 120 final char[] ia; 121 final byte[] oa; 122 final CoderResult cr; 123 124 private static byte[] toByteArray(ByteBuffer bb) { 125 byte[] bytes = new byte[bb.position()]; 126 for (int i = 0; i < bytes.length; i++) 127 bytes[i] = bb.get(i); 128 return bytes; 129 } 130 131 Result(CharBuffer ib, ByteBuffer ob, CoderResult cr) { 132 ipos = ib.position(); 133 ia = toArray(ib); 134 oa = toArray(ob); 135 direct = ib.isDirect(); 136 limit = ob.limit(); 137 this.cr = cr; 138 } 139 140 static char[] toArray(CharBuffer b) { 141 int pos = b.position(); 142 char[] a = new char[b.limit()]; 143 b.position(0); 144 b.get(a); 145 b.position(pos); 146 return a; 147 } 148 149 static byte[] toArray(ByteBuffer b) { 150 byte[] a = new byte[b.position()]; 151 b.position(0); 152 b.get(a); 153 return a; 154 } 155 156 static boolean eq(Result x, Result y) { 157 return x == y || 158 (x != null && y != null && 159 (Arrays.equals(x.oa, y.oa) && 160 x.ipos == y.ipos && 161 x.cr == y.cr)); 162 } 163 164 public String toString() { 165 return String.format("\"%s\"[%d/%d] => %s \"%s\"[%d/%d]%s", 166 string(ia), ipos, ia.length, 167 cr, string(oa), oa.length, limit, 168 (direct ? " (direct)" : "")); 169 } 170 } 171 172 static class CharsetTester { 173 private final Charset cs; 174 private final boolean hasBom; 175 private static final int maxFailures = 5; 176 private int failures = 0; 177 // private static final long maxCharsetFailures = Long.MAX_VALUE; 178 private static final long maxCharsetFailures = 10000L; 179 private final long failed0 = failed; 180 181 // legend: r=regular d=direct In=Input Ou=Output 182 static final int maxBufSize = 20; 183 static final CharBuffer[] rInBuffers = new CharBuffer[maxBufSize]; 184 static final CharBuffer[] dInBuffers = new CharBuffer[maxBufSize]; 185 186 static final ByteBuffer[] rOuBuffers = new ByteBuffer[maxBufSize]; 187 static final ByteBuffer[] dOuBuffers = new ByteBuffer[maxBufSize]; 188 static { 189 for (int i = 0; i < maxBufSize; i++) { 190 rInBuffers[i] = CharBuffer.allocate(i); 191 dInBuffers[i] = ByteBuffer.allocateDirect(i*2).asCharBuffer(); 192 rOuBuffers[i] = ByteBuffer.allocate(i); 193 dOuBuffers[i] = ByteBuffer.allocateDirect(i); 194 } 195 } 196 197 CharsetTester(Charset cs) { 198 this.cs = cs; 199 this.hasBom = 200 cs.name().matches(".*BOM.*") || 201 cs.name().equals("UTF-16"); 202 } 203 204 static boolean bug(String format, Object... args) { 205 return reporter.bug(format, args); 206 } 207 208 static boolean hasBom(byte[] a) { 209 switch (a.length) { 210 case 2: case 4: 211 int sum = 0; 212 for (byte x : a) 213 sum += x; 214 return sum == (byte) 0xfe + (byte) 0xff; 215 default: return false; 216 } 217 } 218 219 void testSurrogates() { 220 int failures = 0; 221 for (int i = 0; i < 10; i++) { 222 Result r = test(new char[] { randomHighSurrogate() }); 223 if (r == null) break; 224 if (! (r.cr.isUnderflow() && 225 r.ipos == 0)) 226 bug("Lone high surrogate not UNDERFLOW: %s %s", 227 cs, r); 228 } 229 for (int i = 0; i < 10; i++) { 230 Result r = test(new char[] { randomLowSurrogate() }); 231 if (r == null) break; 232 if (! (r.cr.isMalformed() && r.cr.length() == 1)) 233 bug("Lone low surrogate not MALFORMED[1]: %s %s", 234 cs, r); 235 } 236 char[] chars = new char[2]; 237 for (int i = 0; i < 10; i++) { 238 chars[0] = randomLowSurrogate(); // Always illegal 239 chars[1] = randomChar(); 240 Result r = test(chars); 241 if (r == null) break; 242 if (! (r.cr.isMalformed() && 243 r.cr.length() == 1 && 244 (r.ipos == 0 || (hasBom && hasBom(r.oa))))) { 245 if (failures++ > 5) return; 246 bug("Unpaired low surrogate not MALFORMED[1]: %s %s", 247 cs, r); 248 } 249 } 250 for (int i = 0; i < 10; i++) { 251 chars[0] = randomHighSurrogate(); 252 do { 253 chars[1] = randomChar(); 254 } while (Character.isLowSurrogate(chars[1])); 255 Result r = test(chars); 256 if (r == null) break; 257 if (! (r.cr.isMalformed() && 258 r.cr.length() == 1 && 259 (r.ipos == 0 || (hasBom && hasBom(r.oa))))) { 260 if (failures++ > 5) return; 261 bug("Unpaired high surrogate not MALFORMED[1]: %s %s", 262 cs, r); 263 } 264 } 265 for (int i = 0; i < 1000; i++) { 266 chars[0] = randomHighSurrogate(); 267 chars[1] = randomLowSurrogate(); 268 Result r = test(chars); 269 if (r == null) break; 270 if (! ((r.cr.isUnmappable() && 271 r.cr.length() == 2 && 272 r.oa.length == 0) 273 || 274 (r.cr.isUnderflow() && 275 r.oa.length > 0 && 276 r.ipos == 2))) { 277 if (failures++ > 5) return; 278 bug("Legal supplementary character bug: %s %s", 279 cs, r); 280 } 281 } 282 } 283 284 // if (! (r.cr.isMalformed() && 285 // r.cr.length() == 1 && 286 // (rob.position() == 0 || hasBom(rob)))) { 287 // if (failures++ > 5) return; 288 // bug("Unpaired surrogate not malformed: %s %s", 289 // cs, r); 290 // } 291 // } 292 293 // dib.clear(); dib.put(chars); dib.flip(); 294 // rib.position(0); 295 // rob.clear(); rob.limit(lim); 296 // for (CharBuffer ib : new CharBuffer[] { rib, dib }) { 297 // Result r = recode(ib, rob); 298 // if (! (r.cr.isMalformed() && 299 // r.cr.length() == 1 && 300 // (rob.position() == 0 || hasBom(rob)))) { 301 // if (failures++ > 5) return; 302 // bug("Unpaired surrogate not malformed: %s %s", 303 // cs, r); 304 // } 305 // } 306 // //} 307 // for (int i = 0; i < 10000; i++) { 308 // chars[0] = randomHighSurrogate(); 309 // chars[1] = randomLowSurrogate(); 310 // dib.clear(); dib.put(chars); dib.flip(); 311 // rib.position(0); 312 // rob.clear(); rob.limit(lim); 313 // for (CharBuffer ib : new CharBuffer[] { rib, dib }) { 314 // Result r = recode(ib, rob); 315 // if (! ((r.cr.isUnmappable() && 316 // r.cr.length() == 2 && 317 // rob.position() == 0) 318 // || 319 // (r.cr.isUnderflow() && 320 // rob.position() > 0 && 321 // ib.position() == 2))) { 322 // if (failures++ > 5) return; 323 // bug("Legal supplementary character bug: %s %s", 324 // cs, r); 325 // } 326 // } 327 // } 328 // } 329 // } 330 331 Result recode(CharBuffer ib, ByteBuffer ob) { 332 try { 333 byte canary = 22; 334 ib.clear(); // Prepare to read 335 ob.clear(); // Prepare to write 336 for (int i = 0; i < ob.limit(); i++) 337 ob.put(i, canary); 338 CharsetEncoder coder = cs.newEncoder(); 339 CoderResult cr = coder.encode(ib, ob, false); 340 equal(ib.limit(), ib.capacity()); 341 equal(ob.limit(), ob.capacity()); 342 Result r = new Result(ib, ob, cr); 343 if (cr.isError()) 344 check(cr.length() > 0); 345 if (cr.isOverflow() && ob.remaining() > 10) 346 bug("OVERFLOW, but there's lots of room: %s %s", 347 cs, r); 348 // if (cr.isOverflow() && ib.remaining() == 0 && ! hasBom) 349 // bug("OVERFLOW, yet remaining() == 0: %s %s", 350 // cs, r); 351 if (cr.isError() && ib.remaining() < cr.length()) 352 bug("remaining() < CoderResult.length(): %s %s", 353 cs, r); 354 // if (ib.position() == 0 355 // && ob.position() > 0 356 // && ! hasBom(r.oa)) 357 // bug("output only if input consumed: %s %s", 358 // cs, r); 359 CoderResult cr2 = coder.encode(ib, ob, false); 360 if (ib.position() != r.ipos || 361 ob.position() != r.oa.length || 362 cr != cr2) 363 bug("Coding operation not idempotent: %s%n %s%n %s", 364 cs, r, new Result(ib, ob, cr2)); 365 if (ob.position() < ob.limit() && 366 ob.get(ob.position()) != canary) 367 bug("Buffer overrun: %s %s %s", 368 cs, r, ob.get(ob.position())); 369 return r; 370 } catch (Throwable t) { 371 if (bug("Unexpected exception: %s %s %s", 372 cs, t.getClass().getSimpleName(), 373 new Result(ib, ob, null))) 374 t.printStackTrace(); 375 return null; 376 } 377 } 378 379 Result recode2(char[] ia, int n) { 380 int len = ia.length; 381 CharBuffer rib = CharBuffer.wrap(ia); 382 CharBuffer dib = dInBuffers[len]; 383 dib.clear(); dib.put(ia); dib.clear(); 384 ByteBuffer rob = rOuBuffers[n]; 385 ByteBuffer dob = dOuBuffers[n]; 386 equal(rob.limit(), n); 387 equal(dob.limit(), n); 388 check(dib.isDirect()); 389 check(dob.isDirect()); 390 Result r1 = recode(rib, rob); 391 Result r2 = recode(dib, dob); 392 if (r1 != null && r2 != null && ! Result.eq(r1, r2)) 393 bug("Results differ for direct buffers: %s%n %s%n %s", 394 cs, r1, r2); 395 return r1; 396 } 397 398 Result test(char[] ia) { 399 if (failed - failed0 >= maxCharsetFailures) 400 throw new TooManyFailures(); 401 402 Result roomy = recode2(ia, maxBufSize - 1); 403 if (roomy == null) return roomy; 404 int olen = roomy.oa.length; 405 if (olen > 0) { 406 if (roomy.ipos == roomy.ia.length) { 407 Result perfectFit = recode2(ia, olen); 408 if (! Result.eq(roomy, perfectFit)) 409 bug("Results differ: %s%n %s%n %s", 410 cs, roomy, perfectFit); 411 } 412 for (int i = 0; i < olen; i++) { 413 Result claustrophobic = recode2(ia, i); 414 if (claustrophobic == null) return roomy; 415 if (roomy.cr.isUnderflow() && 416 ! claustrophobic.cr.isOverflow()) 417 bug("Expected OVERFLOW: %s%n %s%n %s", 418 cs, roomy, claustrophobic); 419 } 420 } 421 return roomy; 422 } 423 424 void testExhaustively(char[] prefix, int n) { 425 int len = prefix.length; 426 char[] ia = Arrays.copyOf(prefix, len + 1); 427 for (int i = 0; i < 0x10000; i++) { 428 ia[len] = (char) i; 429 if (n == 1) 430 test(ia); 431 else 432 testExhaustively(ia, n - 1); 433 } 434 } 435 436 void testRandomly(char[] prefix, int n) { 437 int len = prefix.length; 438 char[] ia = Arrays.copyOf(prefix, len + n); 439 for (int i = 0; i < 10000; i++) { 440 for (int j = 0; j < n; j++) 441 ia[len + j] = randomChar(); 442 test(ia); 443 } 444 } 445 446 void testPrefix(char[] prefix) { 447 if (prefix.length > 0) 448 System.out.printf("Testing prefix %s%n", string(prefix)); 449 450 test(prefix); 451 452 testExhaustively(prefix, 1); 453 // Can you spare a year of CPU time? 454 //testExhaustively(prefix, 2); 455 456 testRandomly(prefix, 2); 457 testRandomly(prefix, 3); 458 } 459 } 460 461 private final static Random rnd = RandomFactory.getRandom(); 462 private static char randomChar() { 463 return (char) rnd.nextInt(Character.MAX_VALUE); 464 } 465 private static char randomHighSurrogate() { 466 return (char) (Character.MIN_HIGH_SURROGATE + rnd.nextInt(1024)); 467 } 468 private static char randomLowSurrogate() { 469 return (char) (Character.MIN_LOW_SURROGATE + rnd.nextInt(1024)); 470 } 471 472 private static void testCharset(Charset cs) throws Throwable { 473 if (! cs.canEncode()) 474 return; 475 476 final String csn = cs.name(); 477 478 if (isBroken(csn)) { 479 System.out.printf("Skipping possibly broken charset %s%n", csn); 480 return; 481 } 482 System.out.println(csn); 483 484 CharsetTester tester = new CharsetTester(cs); 485 486 tester.testSurrogates(); 487 488 tester.testPrefix(new char[] {}); 489 490 if (csn.equals("x-ISCII91")) { 491 System.out.println("More ISCII testing..."); 492 new CharsetTester(cs).testPrefix(new char[]{'\u094d'}); // Halant 493 new CharsetTester(cs).testPrefix(new char[]{'\u093c'}); // Nukta 494 } 495 } 496 497 private static void realMain(String[] args) { 498 for (Charset cs : sort(Charset.availableCharsets().values())) { 499 try { 500 testCharset(cs); 501 } catch (TooManyFailures e) { 502 System.out.printf("Too many failures for %s%n", cs); 503 } catch (Throwable t) { 504 unexpected(t); 505 } 506 } 507 reporter.summarize(); 508 } 509 510 //--------------------- Infrastructure --------------------------- 511 static volatile long passed = 0, failed = 0; 512 static void pass() {passed++;} 513 static void fail() {failed++; Thread.dumpStack();} 514 static void fail(String format, Object... args) { 515 System.out.println(String.format(format, args)); failed++;} 516 static void fail(String msg) {System.out.println(msg); fail();} 517 static void unexpected(Throwable t) {failed++; t.printStackTrace();} 518 static void check(boolean cond) {if (cond) pass(); else fail();} 519 static void equal(Object x, Object y) { 520 if (x == null ? y == null : x.equals(y)) pass(); 521 else fail(x + " not equal to " + y);} 522 public static void main(String[] args) throws Throwable { 523 try {realMain(args);} catch (Throwable t) {unexpected(t);} 524 System.out.printf("%nPassed = %d, failed = %d%n%n", passed, failed); 525 if (failed > 0) throw new AssertionError("Some tests failed");} 526 }