1 /* 2 * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 /** 27 * @test 28 * @summary tests RegExp framework 29 * @author Mike McCloskey 30 * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345 31 * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962 32 * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476 33 * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940 34 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133 35 * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066 36 * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 37 */ 38 39 import java.util.regex.*; 40 import java.util.Random; 41 import java.io.*; 42 import java.util.*; 43 import java.nio.CharBuffer; 44 import java.util.function.Predicate; 45 46 /** 47 * This is a test class created to check the operation of 48 * the Pattern and Matcher classes. 49 */ 50 public class RegExTest { 51 52 private static Random generator = new Random(); 53 private static boolean failure = false; 54 private static int failCount = 0; 55 private static String firstFailure = null; 56 57 /** 58 * Main to interpret arguments and run several tests. 59 * 60 */ 61 public static void main(String[] args) throws Exception { 62 // Most of the tests are in a file 63 processFile("TestCases.txt"); 64 //processFile("PerlCases.txt"); 65 processFile("BMPTestCases.txt"); 66 processFile("SupplementaryTestCases.txt"); 67 68 // These test many randomly generated char patterns 69 bm(); 70 slice(); 71 72 // These are hard to put into the file 73 escapes(); 74 blankInput(); 75 76 // Substitition tests on randomly generated sequences 77 globalSubstitute(); 78 stringbufferSubstitute(); 79 substitutionBasher(); 80 81 // Canonical Equivalence 82 ceTest(); 83 84 // Anchors 85 anchorTest(); 86 87 // boolean match calls 88 matchesTest(); 89 lookingAtTest(); 90 91 // Pattern API 92 patternMatchesTest(); 93 94 // Misc 95 lookbehindTest(); 96 nullArgumentTest(); 97 backRefTest(); 98 groupCaptureTest(); 99 caretTest(); 100 charClassTest(); 101 emptyPatternTest(); 102 findIntTest(); 103 group0Test(); 104 longPatternTest(); 105 octalTest(); 106 ampersandTest(); 107 negationTest(); 108 splitTest(); 109 appendTest(); 110 caseFoldingTest(); 111 commentsTest(); 112 unixLinesTest(); 113 replaceFirstTest(); 114 gTest(); 115 zTest(); 116 serializeTest(); 117 reluctantRepetitionTest(); 118 multilineDollarTest(); 119 dollarAtEndTest(); 120 caretBetweenTerminatorsTest(); 121 // This RFE rejected in Tiger numOccurrencesTest(); 122 javaCharClassTest(); 123 nonCaptureRepetitionTest(); 124 notCapturedGroupCurlyMatchTest(); 125 escapedSegmentTest(); 126 literalPatternTest(); 127 literalReplacementTest(); 128 regionTest(); 129 toStringTest(); 130 negatedCharClassTest(); 131 findFromTest(); 132 boundsTest(); 133 unicodeWordBoundsTest(); 134 caretAtEndTest(); 135 wordSearchTest(); 136 hitEndTest(); 137 toMatchResultTest(); 138 surrogatesInClassTest(); 139 removeQEQuotingTest(); 140 namedGroupCaptureTest(); 141 nonBmpClassComplementTest(); 142 unicodePropertiesTest(); 143 unicodeHexNotationTest(); 144 unicodeClassesTest(); 145 horizontalAndVerticalWSTest(); 146 linebreakTest(); 147 branchTest(); 148 groupCurlyNotFoundSuppTest(); 149 groupCurlyBackoffTest(); 150 patternAsPredicate(); 151 if (failure) { 152 throw new 153 RuntimeException("RegExTest failed, 1st failure: " + 154 firstFailure); 155 } else { 156 System.err.println("OKAY: All tests passed."); 157 } 158 } 159 160 // Utility functions 161 162 private static String getRandomAlphaString(int length) { 163 StringBuffer buf = new StringBuffer(length); 164 for (int i=0; i<length; i++) { 165 char randChar = (char)(97 + generator.nextInt(26)); 166 buf.append(randChar); 167 } 168 return buf.toString(); 169 } 170 171 private static void check(Matcher m, String expected) { 172 m.find(); 173 if (!m.group().equals(expected)) 174 failCount++; 175 } 176 177 private static void check(Matcher m, String result, boolean expected) { 178 m.find(); 179 if (m.group().equals(result) != expected) 180 failCount++; 181 } 182 183 private static void check(Pattern p, String s, boolean expected) { 184 if (p.matcher(s).find() != expected) 185 failCount++; 186 } 187 188 private static void check(String p, String s, boolean expected) { 189 Matcher matcher = Pattern.compile(p).matcher(s); 190 if (matcher.find() != expected) 191 failCount++; 192 } 193 194 private static void check(String p, char c, boolean expected) { 195 String propertyPattern = expected ? "\\p" + p : "\\P" + p; 196 Pattern pattern = Pattern.compile(propertyPattern); 197 char[] ca = new char[1]; ca[0] = c; 198 Matcher matcher = pattern.matcher(new String(ca)); 199 if (!matcher.find()) 200 failCount++; 201 } 202 203 private static void check(String p, int codePoint, boolean expected) { 204 String propertyPattern = expected ? "\\p" + p : "\\P" + p; 205 Pattern pattern = Pattern.compile(propertyPattern); 206 char[] ca = Character.toChars(codePoint); 207 Matcher matcher = pattern.matcher(new String(ca)); 208 if (!matcher.find()) 209 failCount++; 210 } 211 212 private static void check(String p, int flag, String input, String s, 213 boolean expected) 214 { 215 Pattern pattern = Pattern.compile(p, flag); 216 Matcher matcher = pattern.matcher(input); 217 if (expected) 218 check(matcher, s, expected); 219 else 220 check(pattern, input, false); 221 } 222 223 private static void report(String testName) { 224 int spacesToAdd = 30 - testName.length(); 225 StringBuffer paddedNameBuffer = new StringBuffer(testName); 226 for (int i=0; i<spacesToAdd; i++) 227 paddedNameBuffer.append(" "); 228 String paddedName = paddedNameBuffer.toString(); 229 System.err.println(paddedName + ": " + 230 (failCount==0 ? "Passed":"Failed("+failCount+")")); 231 if (failCount > 0) { 232 failure = true; 233 234 if (firstFailure == null) { 235 firstFailure = testName; 236 } 237 } 238 239 failCount = 0; 240 } 241 242 /** 243 * Converts ASCII alphabet characters [A-Za-z] in the given 's' to 244 * supplementary characters. This method does NOT fully take care 245 * of the regex syntax. 246 */ 247 private static String toSupplementaries(String s) { 248 int length = s.length(); 249 StringBuffer sb = new StringBuffer(length * 2); 250 251 for (int i = 0; i < length; ) { 252 char c = s.charAt(i++); 253 if (c == '\\') { 254 sb.append(c); 255 if (i < length) { 256 c = s.charAt(i++); 257 sb.append(c); 258 if (c == 'u') { 259 // assume no syntax error 260 sb.append(s.charAt(i++)); 261 sb.append(s.charAt(i++)); 262 sb.append(s.charAt(i++)); 263 sb.append(s.charAt(i++)); 264 } 265 } 266 } else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) { 267 sb.append('\ud800').append((char)('\udc00'+c)); 268 } else { 269 sb.append(c); 270 } 271 } 272 return sb.toString(); 273 } 274 275 // Regular expression tests 276 277 // This is for bug 6178785 278 // Test if an expected NPE gets thrown when passing in a null argument 279 private static boolean check(Runnable test) { 280 try { 281 test.run(); 282 failCount++; 283 return false; 284 } catch (NullPointerException npe) { 285 return true; 286 } 287 } 288 289 private static void nullArgumentTest() { 290 check(new Runnable() { public void run() { Pattern.compile(null); }}); 291 check(new Runnable() { public void run() { Pattern.matches(null, null); }}); 292 check(new Runnable() { public void run() { Pattern.matches("xyz", null);}}); 293 check(new Runnable() { public void run() { Pattern.quote(null);}}); 294 check(new Runnable() { public void run() { Pattern.compile("xyz").split(null);}}); 295 check(new Runnable() { public void run() { Pattern.compile("xyz").matcher(null);}}); 296 297 final Matcher m = Pattern.compile("xyz").matcher("xyz"); 298 m.matches(); 299 check(new Runnable() { public void run() { m.appendTail(null);}}); 300 check(new Runnable() { public void run() { m.replaceAll(null);}}); 301 check(new Runnable() { public void run() { m.replaceFirst(null);}}); 302 check(new Runnable() { public void run() { m.appendReplacement(null, null);}}); 303 check(new Runnable() { public void run() { m.reset(null);}}); 304 check(new Runnable() { public void run() { Matcher.quoteReplacement(null);}}); 305 //check(new Runnable() { public void run() { m.usePattern(null);}}); 306 307 report("Null Argument"); 308 } 309 310 // This is for bug6635133 311 // Test if surrogate pair in Unicode escapes can be handled correctly. 312 private static void surrogatesInClassTest() throws Exception { 313 Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]"); 314 Matcher matcher = pattern.matcher("\ud834\udd22"); 315 if (!matcher.find()) 316 failCount++; 317 318 report("Surrogate pair in Unicode escape"); 319 } 320 321 // This is for bug6990617 322 // Test if Pattern.RemoveQEQuoting works correctly if the octal unicode 323 // char encoding is only 2 or 3 digits instead of 4 and the first quoted 324 // char is an octal digit. 325 private static void removeQEQuotingTest() throws Exception { 326 Pattern pattern = 327 Pattern.compile("\\011\\Q1sometext\\E\\011\\Q2sometext\\E"); 328 Matcher matcher = pattern.matcher("\t1sometext\t2sometext"); 329 if (!matcher.find()) 330 failCount++; 331 332 report("Remove Q/E Quoting"); 333 } 334 335 // This is for bug 4988891 336 // Test toMatchResult to see that it is a copy of the Matcher 337 // that is not affected by subsequent operations on the original 338 private static void toMatchResultTest() throws Exception { 339 Pattern pattern = Pattern.compile("squid"); 340 Matcher matcher = pattern.matcher( 341 "agiantsquidofdestinyasmallsquidoffate"); 342 matcher.find(); 343 int matcherStart1 = matcher.start(); 344 MatchResult mr = matcher.toMatchResult(); 345 if (mr == matcher) 346 failCount++; 347 int resultStart1 = mr.start(); 348 if (matcherStart1 != resultStart1) 349 failCount++; 350 matcher.find(); 351 int matcherStart2 = matcher.start(); 352 int resultStart2 = mr.start(); 353 if (matcherStart2 == resultStart2) 354 failCount++; 355 if (resultStart1 != resultStart2) 356 failCount++; 357 MatchResult mr2 = matcher.toMatchResult(); 358 if (mr == mr2) 359 failCount++; 360 if (mr2.start() != matcherStart2) 361 failCount++; 362 report("toMatchResult is a copy"); 363 } 364 365 // This is for bug 5013885 366 // Must test a slice to see if it reports hitEnd correctly 367 private static void hitEndTest() throws Exception { 368 // Basic test of Slice node 369 Pattern p = Pattern.compile("^squidattack"); 370 Matcher m = p.matcher("squack"); 371 m.find(); 372 if (m.hitEnd()) 373 failCount++; 374 m.reset("squid"); 375 m.find(); 376 if (!m.hitEnd()) 377 failCount++; 378 379 // Test Slice, SliceA and SliceU nodes 380 for (int i=0; i<3; i++) { 381 int flags = 0; 382 if (i==1) flags = Pattern.CASE_INSENSITIVE; 383 if (i==2) flags = Pattern.UNICODE_CASE; 384 p = Pattern.compile("^abc", flags); 385 m = p.matcher("ad"); 386 m.find(); 387 if (m.hitEnd()) 388 failCount++; 389 m.reset("ab"); 390 m.find(); 391 if (!m.hitEnd()) 392 failCount++; 393 } 394 395 // Test Boyer-Moore node 396 p = Pattern.compile("catattack"); 397 m = p.matcher("attack"); 398 m.find(); 399 if (!m.hitEnd()) 400 failCount++; 401 402 p = Pattern.compile("catattack"); 403 m = p.matcher("attackattackattackcatatta"); 404 m.find(); 405 if (!m.hitEnd()) 406 failCount++; 407 report("hitEnd from a Slice"); 408 } 409 410 // This is for bug 4997476 411 // It is weird code submitted by customer demonstrating a regression 412 private static void wordSearchTest() throws Exception { 413 String testString = new String("word1 word2 word3"); 414 Pattern p = Pattern.compile("\\b"); 415 Matcher m = p.matcher(testString); 416 int position = 0; 417 int start = 0; 418 while (m.find(position)) { 419 start = m.start(); 420 if (start == testString.length()) 421 break; 422 if (m.find(start+1)) { 423 position = m.start(); 424 } else { 425 position = testString.length(); 426 } 427 if (testString.substring(start, position).equals(" ")) 428 continue; 429 if (!testString.substring(start, position-1).startsWith("word")) 430 failCount++; 431 } 432 report("Customer word search"); 433 } 434 435 // This is for bug 4994840 436 private static void caretAtEndTest() throws Exception { 437 // Problem only occurs with multiline patterns 438 // containing a beginning-of-line caret "^" followed 439 // by an expression that also matches the empty string. 440 Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE); 441 Matcher matcher = pattern.matcher("\r"); 442 matcher.find(); 443 matcher.find(); 444 report("Caret at end"); 445 } 446 447 // This test is for 4979006 448 // Check to see if word boundary construct properly handles unicode 449 // non spacing marks 450 private static void unicodeWordBoundsTest() throws Exception { 451 String spaces = " "; 452 String wordChar = "a"; 453 String nsm = "\u030a"; 454 455 assert (Character.getType('\u030a') == Character.NON_SPACING_MARK); 456 457 Pattern pattern = Pattern.compile("\\b"); 458 Matcher matcher = pattern.matcher(""); 459 // S=other B=word character N=non spacing mark .=word boundary 460 // SS.BB.SS 461 String input = spaces + wordChar + wordChar + spaces; 462 twoFindIndexes(input, matcher, 2, 4); 463 // SS.BBN.SS 464 input = spaces + wordChar +wordChar + nsm + spaces; 465 twoFindIndexes(input, matcher, 2, 5); 466 // SS.BN.SS 467 input = spaces + wordChar + nsm + spaces; 468 twoFindIndexes(input, matcher, 2, 4); 469 // SS.BNN.SS 470 input = spaces + wordChar + nsm + nsm + spaces; 471 twoFindIndexes(input, matcher, 2, 5); 472 // SSN.BB.SS 473 input = spaces + nsm + wordChar + wordChar + spaces; 474 twoFindIndexes(input, matcher, 3, 5); 475 // SS.BNB.SS 476 input = spaces + wordChar + nsm + wordChar + spaces; 477 twoFindIndexes(input, matcher, 2, 5); 478 // SSNNSS 479 input = spaces + nsm + nsm + spaces; 480 matcher.reset(input); 481 if (matcher.find()) 482 failCount++; 483 // SSN.BBN.SS 484 input = spaces + nsm + wordChar + wordChar + nsm + spaces; 485 twoFindIndexes(input, matcher, 3, 6); 486 487 report("Unicode word boundary"); 488 } 489 490 private static void twoFindIndexes(String input, Matcher matcher, int a, 491 int b) throws Exception 492 { 493 matcher.reset(input); 494 matcher.find(); 495 if (matcher.start() != a) 496 failCount++; 497 matcher.find(); 498 if (matcher.start() != b) 499 failCount++; 500 } 501 502 // This test is for 6284152 503 static void check(String regex, String input, String[] expected) { 504 List<String> result = new ArrayList<String>(); 505 Pattern p = Pattern.compile(regex); 506 Matcher m = p.matcher(input); 507 while (m.find()) { 508 result.add(m.group()); 509 } 510 if (!Arrays.asList(expected).equals(result)) 511 failCount++; 512 } 513 514 private static void lookbehindTest() throws Exception { 515 //Positive 516 check("(?<=%.{0,5})foo\\d", 517 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5", 518 new String[]{"foo1", "foo2", "foo3"}); 519 520 //boundary at end of the lookbehind sub-regex should work consistently 521 //with the boundary just after the lookbehind sub-regex 522 check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"}); 523 check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"}); 524 check("(?<!abc )\\bfoo", "abc foo", new String[0]); 525 check("(?<!abc \\b)foo", "abc foo", new String[0]); 526 527 //Negative 528 check("(?<!%.{0,5})foo\\d", 529 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5", 530 new String[] {"foo4", "foo5"}); 531 532 //Positive greedy 533 check("(?<=%b{1,4})foo", "%bbbbfoo", new String[] {"foo"}); 534 535 //Positive reluctant 536 check("(?<=%b{1,4}?)foo", "%bbbbfoo", new String[] {"foo"}); 537 538 //supplementary 539 check("(?<=%b{1,4})fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", 540 new String[] {"fo\ud800\udc00o"}); 541 check("(?<=%b{1,4}?)fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", 542 new String[] {"fo\ud800\udc00o"}); 543 check("(?<!%b{1,4})fo\ud800\udc00o", "%afo\ud800\udc00o", 544 new String[] {"fo\ud800\udc00o"}); 545 check("(?<!%b{1,4}?)fo\ud800\udc00o", "%afo\ud800\udc00o", 546 new String[] {"fo\ud800\udc00o"}); 547 report("Lookbehind"); 548 } 549 550 // This test is for 4938995 551 // Check to see if weak region boundaries are transparent to 552 // lookahead and lookbehind constructs 553 private static void boundsTest() throws Exception { 554 String fullMessage = "catdogcat"; 555 Pattern pattern = Pattern.compile("(?<=cat)dog(?=cat)"); 556 Matcher matcher = pattern.matcher("catdogca"); 557 matcher.useTransparentBounds(true); 558 if (matcher.find()) 559 failCount++; 560 matcher.reset("atdogcat"); 561 if (matcher.find()) 562 failCount++; 563 matcher.reset(fullMessage); 564 if (!matcher.find()) 565 failCount++; 566 matcher.reset(fullMessage); 567 matcher.region(0,9); 568 if (!matcher.find()) 569 failCount++; 570 matcher.reset(fullMessage); 571 matcher.region(0,6); 572 if (!matcher.find()) 573 failCount++; 574 matcher.reset(fullMessage); 575 matcher.region(3,6); 576 if (!matcher.find()) 577 failCount++; 578 matcher.useTransparentBounds(false); 579 if (matcher.find()) 580 failCount++; 581 582 // Negative lookahead/lookbehind 583 pattern = Pattern.compile("(?<!cat)dog(?!cat)"); 584 matcher = pattern.matcher("dogcat"); 585 matcher.useTransparentBounds(true); 586 matcher.region(0,3); 587 if (matcher.find()) 588 failCount++; 589 matcher.reset("catdog"); 590 matcher.region(3,6); 591 if (matcher.find()) 592 failCount++; 593 matcher.useTransparentBounds(false); 594 matcher.reset("dogcat"); 595 matcher.region(0,3); 596 if (!matcher.find()) 597 failCount++; 598 matcher.reset("catdog"); 599 matcher.region(3,6); 600 if (!matcher.find()) 601 failCount++; 602 603 report("Region bounds transparency"); 604 } 605 606 // This test is for 4945394 607 private static void findFromTest() throws Exception { 608 String message = "This is 40 $0 message."; 609 Pattern pat = Pattern.compile("\\$0"); 610 Matcher match = pat.matcher(message); 611 if (!match.find()) 612 failCount++; 613 if (match.find()) 614 failCount++; 615 if (match.find()) 616 failCount++; 617 report("Check for alternating find"); 618 } 619 620 // This test is for 4872664 and 4892980 621 private static void negatedCharClassTest() throws Exception { 622 Pattern pattern = Pattern.compile("[^>]"); 623 Matcher matcher = pattern.matcher("\u203A"); 624 if (!matcher.matches()) 625 failCount++; 626 pattern = Pattern.compile("[^fr]"); 627 matcher = pattern.matcher("a"); 628 if (!matcher.find()) 629 failCount++; 630 matcher.reset("\u203A"); 631 if (!matcher.find()) 632 failCount++; 633 String s = "for"; 634 String result[] = s.split("[^fr]"); 635 if (!result[0].equals("f")) 636 failCount++; 637 if (!result[1].equals("r")) 638 failCount++; 639 s = "f\u203Ar"; 640 result = s.split("[^fr]"); 641 if (!result[0].equals("f")) 642 failCount++; 643 if (!result[1].equals("r")) 644 failCount++; 645 646 // Test adding to bits, subtracting a node, then adding to bits again 647 pattern = Pattern.compile("[^f\u203Ar]"); 648 matcher = pattern.matcher("a"); 649 if (!matcher.find()) 650 failCount++; 651 matcher.reset("f"); 652 if (matcher.find()) 653 failCount++; 654 matcher.reset("\u203A"); 655 if (matcher.find()) 656 failCount++; 657 matcher.reset("r"); 658 if (matcher.find()) 659 failCount++; 660 matcher.reset("\u203B"); 661 if (!matcher.find()) 662 failCount++; 663 664 // Test subtracting a node, adding to bits, subtracting again 665 pattern = Pattern.compile("[^\u203Ar\u203B]"); 666 matcher = pattern.matcher("a"); 667 if (!matcher.find()) 668 failCount++; 669 matcher.reset("\u203A"); 670 if (matcher.find()) 671 failCount++; 672 matcher.reset("r"); 673 if (matcher.find()) 674 failCount++; 675 matcher.reset("\u203B"); 676 if (matcher.find()) 677 failCount++; 678 matcher.reset("\u203C"); 679 if (!matcher.find()) 680 failCount++; 681 682 report("Negated Character Class"); 683 } 684 685 // This test is for 4628291 686 private static void toStringTest() throws Exception { 687 Pattern pattern = Pattern.compile("b+"); 688 if (pattern.toString() != "b+") 689 failCount++; 690 Matcher matcher = pattern.matcher("aaabbbccc"); 691 String matcherString = matcher.toString(); // unspecified 692 matcher.find(); 693 matcherString = matcher.toString(); // unspecified 694 matcher.region(0,3); 695 matcherString = matcher.toString(); // unspecified 696 matcher.reset(); 697 matcherString = matcher.toString(); // unspecified 698 report("toString"); 699 } 700 701 // This test is for 4808962 702 private static void literalPatternTest() throws Exception { 703 int flags = Pattern.LITERAL; 704 705 Pattern pattern = Pattern.compile("abc\\t$^", flags); 706 check(pattern, "abc\\t$^", true); 707 708 pattern = Pattern.compile(Pattern.quote("abc\\t$^")); 709 check(pattern, "abc\\t$^", true); 710 711 pattern = Pattern.compile("\\Qa^$bcabc\\E", flags); 712 check(pattern, "\\Qa^$bcabc\\E", true); 713 check(pattern, "a^$bcabc", false); 714 715 pattern = Pattern.compile("\\\\Q\\\\E"); 716 check(pattern, "\\Q\\E", true); 717 718 pattern = Pattern.compile("\\Qabc\\Eefg\\\\Q\\\\Ehij"); 719 check(pattern, "abcefg\\Q\\Ehij", true); 720 721 pattern = Pattern.compile("\\\\\\Q\\\\E"); 722 check(pattern, "\\\\\\\\", true); 723 724 pattern = Pattern.compile(Pattern.quote("\\Qa^$bcabc\\E")); 725 check(pattern, "\\Qa^$bcabc\\E", true); 726 check(pattern, "a^$bcabc", false); 727 728 pattern = Pattern.compile(Pattern.quote("\\Qabc\\Edef")); 729 check(pattern, "\\Qabc\\Edef", true); 730 check(pattern, "abcdef", false); 731 732 pattern = Pattern.compile(Pattern.quote("abc\\Edef")); 733 check(pattern, "abc\\Edef", true); 734 check(pattern, "abcdef", false); 735 736 pattern = Pattern.compile(Pattern.quote("\\E")); 737 check(pattern, "\\E", true); 738 739 pattern = Pattern.compile("((((abc.+?:)", flags); 740 check(pattern, "((((abc.+?:)", true); 741 742 flags |= Pattern.MULTILINE; 743 744 pattern = Pattern.compile("^cat$", flags); 745 check(pattern, "abc^cat$def", true); 746 check(pattern, "cat", false); 747 748 flags |= Pattern.CASE_INSENSITIVE; 749 750 pattern = Pattern.compile("abcdef", flags); 751 check(pattern, "ABCDEF", true); 752 check(pattern, "AbCdEf", true); 753 754 flags |= Pattern.DOTALL; 755 756 pattern = Pattern.compile("a...b", flags); 757 check(pattern, "A...b", true); 758 check(pattern, "Axxxb", false); 759 760 flags |= Pattern.CANON_EQ; 761 762 Pattern p = Pattern.compile("testa\u030a", flags); 763 check(pattern, "testa\u030a", false); 764 check(pattern, "test\u00e5", false); 765 766 // Supplementary character test 767 flags = Pattern.LITERAL; 768 769 pattern = Pattern.compile(toSupplementaries("abc\\t$^"), flags); 770 check(pattern, toSupplementaries("abc\\t$^"), true); 771 772 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\t$^"))); 773 check(pattern, toSupplementaries("abc\\t$^"), true); 774 775 pattern = Pattern.compile(toSupplementaries("\\Qa^$bcabc\\E"), flags); 776 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true); 777 check(pattern, toSupplementaries("a^$bcabc"), false); 778 779 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qa^$bcabc\\E"))); 780 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true); 781 check(pattern, toSupplementaries("a^$bcabc"), false); 782 783 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qabc\\Edef"))); 784 check(pattern, toSupplementaries("\\Qabc\\Edef"), true); 785 check(pattern, toSupplementaries("abcdef"), false); 786 787 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\Edef"))); 788 check(pattern, toSupplementaries("abc\\Edef"), true); 789 check(pattern, toSupplementaries("abcdef"), false); 790 791 pattern = Pattern.compile(toSupplementaries("((((abc.+?:)"), flags); 792 check(pattern, toSupplementaries("((((abc.+?:)"), true); 793 794 flags |= Pattern.MULTILINE; 795 796 pattern = Pattern.compile(toSupplementaries("^cat$"), flags); 797 check(pattern, toSupplementaries("abc^cat$def"), true); 798 check(pattern, toSupplementaries("cat"), false); 799 800 flags |= Pattern.DOTALL; 801 802 // note: this is case-sensitive. 803 pattern = Pattern.compile(toSupplementaries("a...b"), flags); 804 check(pattern, toSupplementaries("a...b"), true); 805 check(pattern, toSupplementaries("axxxb"), false); 806 807 flags |= Pattern.CANON_EQ; 808 809 String t = toSupplementaries("test"); 810 p = Pattern.compile(t + "a\u030a", flags); 811 check(pattern, t + "a\u030a", false); 812 check(pattern, t + "\u00e5", false); 813 814 report("Literal pattern"); 815 } 816 817 // This test is for 4803179 818 // This test is also for 4808962, replacement parts 819 private static void literalReplacementTest() throws Exception { 820 int flags = Pattern.LITERAL; 821 822 Pattern pattern = Pattern.compile("abc", flags); 823 Matcher matcher = pattern.matcher("zzzabczzz"); 824 String replaceTest = "$0"; 825 String result = matcher.replaceAll(replaceTest); 826 if (!result.equals("zzzabczzz")) 827 failCount++; 828 829 matcher.reset(); 830 String literalReplacement = matcher.quoteReplacement(replaceTest); 831 result = matcher.replaceAll(literalReplacement); 832 if (!result.equals("zzz$0zzz")) 833 failCount++; 834 835 matcher.reset(); 836 replaceTest = "\\t$\\$"; 837 literalReplacement = matcher.quoteReplacement(replaceTest); 838 result = matcher.replaceAll(literalReplacement); 839 if (!result.equals("zzz\\t$\\$zzz")) 840 failCount++; 841 842 // Supplementary character test 843 pattern = Pattern.compile(toSupplementaries("abc"), flags); 844 matcher = pattern.matcher(toSupplementaries("zzzabczzz")); 845 replaceTest = "$0"; 846 result = matcher.replaceAll(replaceTest); 847 if (!result.equals(toSupplementaries("zzzabczzz"))) 848 failCount++; 849 850 matcher.reset(); 851 literalReplacement = matcher.quoteReplacement(replaceTest); 852 result = matcher.replaceAll(literalReplacement); 853 if (!result.equals(toSupplementaries("zzz$0zzz"))) 854 failCount++; 855 856 matcher.reset(); 857 replaceTest = "\\t$\\$"; 858 literalReplacement = matcher.quoteReplacement(replaceTest); 859 result = matcher.replaceAll(literalReplacement); 860 if (!result.equals(toSupplementaries("zzz\\t$\\$zzz"))) 861 failCount++; 862 863 // IAE should be thrown if backslash or '$' is the last character 864 // in replacement string 865 try { 866 "\uac00".replaceAll("\uac00", "$"); 867 failCount++; 868 } catch (IllegalArgumentException iie) { 869 } catch (Exception e) { 870 failCount++; 871 } 872 try { 873 "\uac00".replaceAll("\uac00", "\\"); 874 failCount++; 875 } catch (IllegalArgumentException iie) { 876 } catch (Exception e) { 877 failCount++; 878 } 879 report("Literal replacement"); 880 } 881 882 // This test is for 4757029 883 private static void regionTest() throws Exception { 884 Pattern pattern = Pattern.compile("abc"); 885 Matcher matcher = pattern.matcher("abcdefabc"); 886 887 matcher.region(0,9); 888 if (!matcher.find()) 889 failCount++; 890 if (!matcher.find()) 891 failCount++; 892 matcher.region(0,3); 893 if (!matcher.find()) 894 failCount++; 895 matcher.region(3,6); 896 if (matcher.find()) 897 failCount++; 898 matcher.region(0,2); 899 if (matcher.find()) 900 failCount++; 901 902 expectRegionFail(matcher, 1, -1); 903 expectRegionFail(matcher, -1, -1); 904 expectRegionFail(matcher, -1, 1); 905 expectRegionFail(matcher, 5, 3); 906 expectRegionFail(matcher, 5, 12); 907 expectRegionFail(matcher, 12, 12); 908 909 pattern = Pattern.compile("^abc$"); 910 matcher = pattern.matcher("zzzabczzz"); 911 matcher.region(0,9); 912 if (matcher.find()) 913 failCount++; 914 matcher.region(3,6); 915 if (!matcher.find()) 916 failCount++; 917 matcher.region(3,6); 918 matcher.useAnchoringBounds(false); 919 if (matcher.find()) 920 failCount++; 921 922 // Supplementary character test 923 pattern = Pattern.compile(toSupplementaries("abc")); 924 matcher = pattern.matcher(toSupplementaries("abcdefabc")); 925 matcher.region(0,9*2); 926 if (!matcher.find()) 927 failCount++; 928 if (!matcher.find()) 929 failCount++; 930 matcher.region(0,3*2); 931 if (!matcher.find()) 932 failCount++; 933 matcher.region(1,3*2); 934 if (matcher.find()) 935 failCount++; 936 matcher.region(3*2,6*2); 937 if (matcher.find()) 938 failCount++; 939 matcher.region(0,2*2); 940 if (matcher.find()) 941 failCount++; 942 matcher.region(0,2*2+1); 943 if (matcher.find()) 944 failCount++; 945 946 expectRegionFail(matcher, 1*2, -1); 947 expectRegionFail(matcher, -1, -1); 948 expectRegionFail(matcher, -1, 1*2); 949 expectRegionFail(matcher, 5*2, 3*2); 950 expectRegionFail(matcher, 5*2, 12*2); 951 expectRegionFail(matcher, 12*2, 12*2); 952 953 pattern = Pattern.compile(toSupplementaries("^abc$")); 954 matcher = pattern.matcher(toSupplementaries("zzzabczzz")); 955 matcher.region(0,9*2); 956 if (matcher.find()) 957 failCount++; 958 matcher.region(3*2,6*2); 959 if (!matcher.find()) 960 failCount++; 961 matcher.region(3*2+1,6*2); 962 if (matcher.find()) 963 failCount++; 964 matcher.region(3*2,6*2-1); 965 if (matcher.find()) 966 failCount++; 967 matcher.region(3*2,6*2); 968 matcher.useAnchoringBounds(false); 969 if (matcher.find()) 970 failCount++; 971 report("Regions"); 972 } 973 974 private static void expectRegionFail(Matcher matcher, int index1, 975 int index2) 976 { 977 try { 978 matcher.region(index1, index2); 979 failCount++; 980 } catch (IndexOutOfBoundsException ioobe) { 981 // Correct result 982 } catch (IllegalStateException ise) { 983 // Correct result 984 } 985 } 986 987 // This test is for 4803197 988 private static void escapedSegmentTest() throws Exception { 989 990 Pattern pattern = Pattern.compile("\\Qdir1\\dir2\\E"); 991 check(pattern, "dir1\\dir2", true); 992 993 pattern = Pattern.compile("\\Qdir1\\dir2\\\\E"); 994 check(pattern, "dir1\\dir2\\", true); 995 996 pattern = Pattern.compile("(\\Qdir1\\dir2\\\\E)"); 997 check(pattern, "dir1\\dir2\\", true); 998 999 // Supplementary character test 1000 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E")); 1001 check(pattern, toSupplementaries("dir1\\dir2"), true); 1002 1003 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2")+"\\\\E"); 1004 check(pattern, toSupplementaries("dir1\\dir2\\"), true); 1005 1006 pattern = Pattern.compile(toSupplementaries("(\\Qdir1\\dir2")+"\\\\E)"); 1007 check(pattern, toSupplementaries("dir1\\dir2\\"), true); 1008 1009 report("Escaped segment"); 1010 } 1011 1012 // This test is for 4792284 1013 private static void nonCaptureRepetitionTest() throws Exception { 1014 String input = "abcdefgh;"; 1015 1016 String[] patterns = new String[] { 1017 "(?:\\w{4})+;", 1018 "(?:\\w{8})*;", 1019 "(?:\\w{2}){2,4};", 1020 "(?:\\w{4}){2,};", // only matches the 1021 ".*?(?:\\w{5})+;", // specified minimum 1022 ".*?(?:\\w{9})*;", // number of reps - OK 1023 "(?:\\w{4})+?;", // lazy repetition - OK 1024 "(?:\\w{4})++;", // possessive repetition - OK 1025 "(?:\\w{2,}?)+;", // non-deterministic - OK 1026 "(\\w{4})+;", // capturing group - OK 1027 }; 1028 1029 for (int i = 0; i < patterns.length; i++) { 1030 // Check find() 1031 check(patterns[i], 0, input, input, true); 1032 // Check matches() 1033 Pattern p = Pattern.compile(patterns[i]); 1034 Matcher m = p.matcher(input); 1035 1036 if (m.matches()) { 1037 if (!m.group(0).equals(input)) 1038 failCount++; 1039 } else { 1040 failCount++; 1041 } 1042 } 1043 1044 report("Non capturing repetition"); 1045 } 1046 1047 // This test is for 6358731 1048 private static void notCapturedGroupCurlyMatchTest() throws Exception { 1049 Pattern pattern = Pattern.compile("(abc)+|(abcd)+"); 1050 Matcher matcher = pattern.matcher("abcd"); 1051 if (!matcher.matches() || 1052 matcher.group(1) != null || 1053 !matcher.group(2).equals("abcd")) { 1054 failCount++; 1055 } 1056 report("Not captured GroupCurly"); 1057 } 1058 1059 // This test is for 4706545 1060 private static void javaCharClassTest() throws Exception { 1061 for (int i=0; i<1000; i++) { 1062 char c = (char)generator.nextInt(); 1063 check("{javaLowerCase}", c, Character.isLowerCase(c)); 1064 check("{javaUpperCase}", c, Character.isUpperCase(c)); 1065 check("{javaUpperCase}+", c, Character.isUpperCase(c)); 1066 check("{javaTitleCase}", c, Character.isTitleCase(c)); 1067 check("{javaDigit}", c, Character.isDigit(c)); 1068 check("{javaDefined}", c, Character.isDefined(c)); 1069 check("{javaLetter}", c, Character.isLetter(c)); 1070 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c)); 1071 check("{javaJavaIdentifierStart}", c, 1072 Character.isJavaIdentifierStart(c)); 1073 check("{javaJavaIdentifierPart}", c, 1074 Character.isJavaIdentifierPart(c)); 1075 check("{javaUnicodeIdentifierStart}", c, 1076 Character.isUnicodeIdentifierStart(c)); 1077 check("{javaUnicodeIdentifierPart}", c, 1078 Character.isUnicodeIdentifierPart(c)); 1079 check("{javaIdentifierIgnorable}", c, 1080 Character.isIdentifierIgnorable(c)); 1081 check("{javaSpaceChar}", c, Character.isSpaceChar(c)); 1082 check("{javaWhitespace}", c, Character.isWhitespace(c)); 1083 check("{javaISOControl}", c, Character.isISOControl(c)); 1084 check("{javaMirrored}", c, Character.isMirrored(c)); 1085 1086 } 1087 1088 // Supplementary character test 1089 for (int i=0; i<1000; i++) { 1090 int c = generator.nextInt(Character.MAX_CODE_POINT 1091 - Character.MIN_SUPPLEMENTARY_CODE_POINT) 1092 + Character.MIN_SUPPLEMENTARY_CODE_POINT; 1093 check("{javaLowerCase}", c, Character.isLowerCase(c)); 1094 check("{javaUpperCase}", c, Character.isUpperCase(c)); 1095 check("{javaUpperCase}+", c, Character.isUpperCase(c)); 1096 check("{javaTitleCase}", c, Character.isTitleCase(c)); 1097 check("{javaDigit}", c, Character.isDigit(c)); 1098 check("{javaDefined}", c, Character.isDefined(c)); 1099 check("{javaLetter}", c, Character.isLetter(c)); 1100 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c)); 1101 check("{javaJavaIdentifierStart}", c, 1102 Character.isJavaIdentifierStart(c)); 1103 check("{javaJavaIdentifierPart}", c, 1104 Character.isJavaIdentifierPart(c)); 1105 check("{javaUnicodeIdentifierStart}", c, 1106 Character.isUnicodeIdentifierStart(c)); 1107 check("{javaUnicodeIdentifierPart}", c, 1108 Character.isUnicodeIdentifierPart(c)); 1109 check("{javaIdentifierIgnorable}", c, 1110 Character.isIdentifierIgnorable(c)); 1111 check("{javaSpaceChar}", c, Character.isSpaceChar(c)); 1112 check("{javaWhitespace}", c, Character.isWhitespace(c)); 1113 check("{javaISOControl}", c, Character.isISOControl(c)); 1114 check("{javaMirrored}", c, Character.isMirrored(c)); 1115 } 1116 1117 report("Java character classes"); 1118 } 1119 1120 // This test is for 4523620 1121 /* 1122 private static void numOccurrencesTest() throws Exception { 1123 Pattern pattern = Pattern.compile("aaa"); 1124 1125 if (pattern.numOccurrences("aaaaaa", false) != 2) 1126 failCount++; 1127 if (pattern.numOccurrences("aaaaaa", true) != 4) 1128 failCount++; 1129 1130 pattern = Pattern.compile("^"); 1131 if (pattern.numOccurrences("aaaaaa", false) != 1) 1132 failCount++; 1133 if (pattern.numOccurrences("aaaaaa", true) != 1) 1134 failCount++; 1135 1136 report("Number of Occurrences"); 1137 } 1138 */ 1139 1140 // This test is for 4776374 1141 private static void caretBetweenTerminatorsTest() throws Exception { 1142 int flags1 = Pattern.DOTALL; 1143 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; 1144 int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE; 1145 int flags4 = Pattern.DOTALL | Pattern.MULTILINE; 1146 1147 check("^....", flags1, "test\ntest", "test", true); 1148 check(".....^", flags1, "test\ntest", "test", false); 1149 check(".....^", flags1, "test\n", "test", false); 1150 check("....^", flags1, "test\r\n", "test", false); 1151 1152 check("^....", flags2, "test\ntest", "test", true); 1153 check("....^", flags2, "test\ntest", "test", false); 1154 check(".....^", flags2, "test\n", "test", false); 1155 check("....^", flags2, "test\r\n", "test", false); 1156 1157 check("^....", flags3, "test\ntest", "test", true); 1158 check(".....^", flags3, "test\ntest", "test\n", true); 1159 check(".....^", flags3, "test\u0085test", "test\u0085", false); 1160 check(".....^", flags3, "test\n", "test", false); 1161 check(".....^", flags3, "test\r\n", "test", false); 1162 check("......^", flags3, "test\r\ntest", "test\r\n", true); 1163 1164 check("^....", flags4, "test\ntest", "test", true); 1165 check(".....^", flags3, "test\ntest", "test\n", true); 1166 check(".....^", flags4, "test\u0085test", "test\u0085", true); 1167 check(".....^", flags4, "test\n", "test\n", false); 1168 check(".....^", flags4, "test\r\n", "test\r", false); 1169 1170 // Supplementary character test 1171 String t = toSupplementaries("test"); 1172 check("^....", flags1, t+"\n"+t, t, true); 1173 check(".....^", flags1, t+"\n"+t, t, false); 1174 check(".....^", flags1, t+"\n", t, false); 1175 check("....^", flags1, t+"\r\n", t, false); 1176 1177 check("^....", flags2, t+"\n"+t, t, true); 1178 check("....^", flags2, t+"\n"+t, t, false); 1179 check(".....^", flags2, t+"\n", t, false); 1180 check("....^", flags2, t+"\r\n", t, false); 1181 1182 check("^....", flags3, t+"\n"+t, t, true); 1183 check(".....^", flags3, t+"\n"+t, t+"\n", true); 1184 check(".....^", flags3, t+"\u0085"+t, t+"\u0085", false); 1185 check(".....^", flags3, t+"\n", t, false); 1186 check(".....^", flags3, t+"\r\n", t, false); 1187 check("......^", flags3, t+"\r\n"+t, t+"\r\n", true); 1188 1189 check("^....", flags4, t+"\n"+t, t, true); 1190 check(".....^", flags3, t+"\n"+t, t+"\n", true); 1191 check(".....^", flags4, t+"\u0085"+t, t+"\u0085", true); 1192 check(".....^", flags4, t+"\n", t+"\n", false); 1193 check(".....^", flags4, t+"\r\n", t+"\r", false); 1194 1195 report("Caret between terminators"); 1196 } 1197 1198 // This test is for 4727935 1199 private static void dollarAtEndTest() throws Exception { 1200 int flags1 = Pattern.DOTALL; 1201 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; 1202 int flags3 = Pattern.DOTALL | Pattern.MULTILINE; 1203 1204 check("....$", flags1, "test\n", "test", true); 1205 check("....$", flags1, "test\r\n", "test", true); 1206 check(".....$", flags1, "test\n", "test\n", true); 1207 check(".....$", flags1, "test\u0085", "test\u0085", true); 1208 check("....$", flags1, "test\u0085", "test", true); 1209 1210 check("....$", flags2, "test\n", "test", true); 1211 check(".....$", flags2, "test\n", "test\n", true); 1212 check(".....$", flags2, "test\u0085", "test\u0085", true); 1213 check("....$", flags2, "test\u0085", "est\u0085", true); 1214 1215 check("....$.blah", flags3, "test\nblah", "test\nblah", true); 1216 check(".....$.blah", flags3, "test\n\nblah", "test\n\nblah", true); 1217 check("....$blah", flags3, "test\nblah", "!!!!", false); 1218 check(".....$blah", flags3, "test\nblah", "!!!!", false); 1219 1220 // Supplementary character test 1221 String t = toSupplementaries("test"); 1222 String b = toSupplementaries("blah"); 1223 check("....$", flags1, t+"\n", t, true); 1224 check("....$", flags1, t+"\r\n", t, true); 1225 check(".....$", flags1, t+"\n", t+"\n", true); 1226 check(".....$", flags1, t+"\u0085", t+"\u0085", true); 1227 check("....$", flags1, t+"\u0085", t, true); 1228 1229 check("....$", flags2, t+"\n", t, true); 1230 check(".....$", flags2, t+"\n", t+"\n", true); 1231 check(".....$", flags2, t+"\u0085", t+"\u0085", true); 1232 check("....$", flags2, t+"\u0085", toSupplementaries("est\u0085"), true); 1233 1234 check("....$."+b, flags3, t+"\n"+b, t+"\n"+b, true); 1235 check(".....$."+b, flags3, t+"\n\n"+b, t+"\n\n"+b, true); 1236 check("....$"+b, flags3, t+"\n"+b, "!!!!", false); 1237 check(".....$"+b, flags3, t+"\n"+b, "!!!!", false); 1238 1239 report("Dollar at End"); 1240 } 1241 1242 // This test is for 4711773 1243 private static void multilineDollarTest() throws Exception { 1244 Pattern findCR = Pattern.compile("$", Pattern.MULTILINE); 1245 Matcher matcher = findCR.matcher("first bit\nsecond bit"); 1246 matcher.find(); 1247 if (matcher.start(0) != 9) 1248 failCount++; 1249 matcher.find(); 1250 if (matcher.start(0) != 20) 1251 failCount++; 1252 1253 // Supplementary character test 1254 matcher = findCR.matcher(toSupplementaries("first bit\n second bit")); // double BMP chars 1255 matcher.find(); 1256 if (matcher.start(0) != 9*2) 1257 failCount++; 1258 matcher.find(); 1259 if (matcher.start(0) != 20*2) 1260 failCount++; 1261 1262 report("Multiline Dollar"); 1263 } 1264 1265 private static void reluctantRepetitionTest() throws Exception { 1266 Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2"); 1267 check(p, "1 word word word 2", true); 1268 check(p, "1 wor wo w 2", true); 1269 check(p, "1 word word 2", true); 1270 check(p, "1 word 2", true); 1271 check(p, "1 wo w w 2", true); 1272 check(p, "1 wo w 2", true); 1273 check(p, "1 wor w 2", true); 1274 1275 p = Pattern.compile("([a-z])+?c"); 1276 Matcher m = p.matcher("ababcdefdec"); 1277 check(m, "ababc"); 1278 1279 // Supplementary character test 1280 p = Pattern.compile(toSupplementaries("([a-z])+?c")); 1281 m = p.matcher(toSupplementaries("ababcdefdec")); 1282 check(m, toSupplementaries("ababc")); 1283 1284 report("Reluctant Repetition"); 1285 } 1286 1287 private static void serializeTest() throws Exception { 1288 String patternStr = "(b)"; 1289 String matchStr = "b"; 1290 Pattern pattern = Pattern.compile(patternStr); 1291 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 1292 ObjectOutputStream oos = new ObjectOutputStream(baos); 1293 oos.writeObject(pattern); 1294 oos.close(); 1295 ObjectInputStream ois = new ObjectInputStream( 1296 new ByteArrayInputStream(baos.toByteArray())); 1297 Pattern serializedPattern = (Pattern)ois.readObject(); 1298 ois.close(); 1299 Matcher matcher = serializedPattern.matcher(matchStr); 1300 if (!matcher.matches()) 1301 failCount++; 1302 if (matcher.groupCount() != 1) 1303 failCount++; 1304 1305 report("Serialization"); 1306 } 1307 1308 private static void gTest() { 1309 Pattern pattern = Pattern.compile("\\G\\w"); 1310 Matcher matcher = pattern.matcher("abc#x#x"); 1311 matcher.find(); 1312 matcher.find(); 1313 matcher.find(); 1314 if (matcher.find()) 1315 failCount++; 1316 1317 pattern = Pattern.compile("\\GA*"); 1318 matcher = pattern.matcher("1A2AA3"); 1319 matcher.find(); 1320 if (matcher.find()) 1321 failCount++; 1322 1323 pattern = Pattern.compile("\\GA*"); 1324 matcher = pattern.matcher("1A2AA3"); 1325 if (!matcher.find(1)) 1326 failCount++; 1327 matcher.find(); 1328 if (matcher.find()) 1329 failCount++; 1330 1331 report("\\G"); 1332 } 1333 1334 private static void zTest() { 1335 Pattern pattern = Pattern.compile("foo\\Z"); 1336 // Positives 1337 check(pattern, "foo\u0085", true); 1338 check(pattern, "foo\u2028", true); 1339 check(pattern, "foo\u2029", true); 1340 check(pattern, "foo\n", true); 1341 check(pattern, "foo\r", true); 1342 check(pattern, "foo\r\n", true); 1343 // Negatives 1344 check(pattern, "fooo", false); 1345 check(pattern, "foo\n\r", false); 1346 1347 pattern = Pattern.compile("foo\\Z", Pattern.UNIX_LINES); 1348 // Positives 1349 check(pattern, "foo", true); 1350 check(pattern, "foo\n", true); 1351 // Negatives 1352 check(pattern, "foo\r", false); 1353 check(pattern, "foo\u0085", false); 1354 check(pattern, "foo\u2028", false); 1355 check(pattern, "foo\u2029", false); 1356 1357 report("\\Z"); 1358 } 1359 1360 private static void replaceFirstTest() { 1361 Pattern pattern = Pattern.compile("(ab)(c*)"); 1362 Matcher matcher = pattern.matcher("abccczzzabcczzzabccc"); 1363 if (!matcher.replaceFirst("test").equals("testzzzabcczzzabccc")) 1364 failCount++; 1365 1366 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1367 if (!matcher.replaceFirst("test").equals("zzztestzzzabcczzzabccczzz")) 1368 failCount++; 1369 1370 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1371 String result = matcher.replaceFirst("$1"); 1372 if (!result.equals("zzzabzzzabcczzzabccczzz")) 1373 failCount++; 1374 1375 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1376 result = matcher.replaceFirst("$2"); 1377 if (!result.equals("zzzccczzzabcczzzabccczzz")) 1378 failCount++; 1379 1380 pattern = Pattern.compile("a*"); 1381 matcher = pattern.matcher("aaaaaaaaaa"); 1382 if (!matcher.replaceFirst("test").equals("test")) 1383 failCount++; 1384 1385 pattern = Pattern.compile("a+"); 1386 matcher = pattern.matcher("zzzaaaaaaaaaa"); 1387 if (!matcher.replaceFirst("test").equals("zzztest")) 1388 failCount++; 1389 1390 // Supplementary character test 1391 pattern = Pattern.compile(toSupplementaries("(ab)(c*)")); 1392 matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc")); 1393 if (!matcher.replaceFirst(toSupplementaries("test")) 1394 .equals(toSupplementaries("testzzzabcczzzabccc"))) 1395 failCount++; 1396 1397 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1398 if (!matcher.replaceFirst(toSupplementaries("test")). 1399 equals(toSupplementaries("zzztestzzzabcczzzabccczzz"))) 1400 failCount++; 1401 1402 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1403 result = matcher.replaceFirst("$1"); 1404 if (!result.equals(toSupplementaries("zzzabzzzabcczzzabccczzz"))) 1405 failCount++; 1406 1407 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1408 result = matcher.replaceFirst("$2"); 1409 if (!result.equals(toSupplementaries("zzzccczzzabcczzzabccczzz"))) 1410 failCount++; 1411 1412 pattern = Pattern.compile(toSupplementaries("a*")); 1413 matcher = pattern.matcher(toSupplementaries("aaaaaaaaaa")); 1414 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("test"))) 1415 failCount++; 1416 1417 pattern = Pattern.compile(toSupplementaries("a+")); 1418 matcher = pattern.matcher(toSupplementaries("zzzaaaaaaaaaa")); 1419 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("zzztest"))) 1420 failCount++; 1421 1422 report("Replace First"); 1423 } 1424 1425 private static void unixLinesTest() { 1426 Pattern pattern = Pattern.compile(".*"); 1427 Matcher matcher = pattern.matcher("aa\u2028blah"); 1428 matcher.find(); 1429 if (!matcher.group(0).equals("aa")) 1430 failCount++; 1431 1432 pattern = Pattern.compile(".*", Pattern.UNIX_LINES); 1433 matcher = pattern.matcher("aa\u2028blah"); 1434 matcher.find(); 1435 if (!matcher.group(0).equals("aa\u2028blah")) 1436 failCount++; 1437 1438 pattern = Pattern.compile("[az]$", 1439 Pattern.MULTILINE | Pattern.UNIX_LINES); 1440 matcher = pattern.matcher("aa\u2028zz"); 1441 check(matcher, "a\u2028", false); 1442 1443 // Supplementary character test 1444 pattern = Pattern.compile(".*"); 1445 matcher = pattern.matcher(toSupplementaries("aa\u2028blah")); 1446 matcher.find(); 1447 if (!matcher.group(0).equals(toSupplementaries("aa"))) 1448 failCount++; 1449 1450 pattern = Pattern.compile(".*", Pattern.UNIX_LINES); 1451 matcher = pattern.matcher(toSupplementaries("aa\u2028blah")); 1452 matcher.find(); 1453 if (!matcher.group(0).equals(toSupplementaries("aa\u2028blah"))) 1454 failCount++; 1455 1456 pattern = Pattern.compile(toSupplementaries("[az]$"), 1457 Pattern.MULTILINE | Pattern.UNIX_LINES); 1458 matcher = pattern.matcher(toSupplementaries("aa\u2028zz")); 1459 check(matcher, toSupplementaries("a\u2028"), false); 1460 1461 report("Unix Lines"); 1462 } 1463 1464 private static void commentsTest() { 1465 int flags = Pattern.COMMENTS; 1466 1467 Pattern pattern = Pattern.compile("aa \\# aa", flags); 1468 Matcher matcher = pattern.matcher("aa#aa"); 1469 if (!matcher.matches()) 1470 failCount++; 1471 1472 pattern = Pattern.compile("aa # blah", flags); 1473 matcher = pattern.matcher("aa"); 1474 if (!matcher.matches()) 1475 failCount++; 1476 1477 pattern = Pattern.compile("aa blah", flags); 1478 matcher = pattern.matcher("aablah"); 1479 if (!matcher.matches()) 1480 failCount++; 1481 1482 pattern = Pattern.compile("aa # blah blech ", flags); 1483 matcher = pattern.matcher("aa"); 1484 if (!matcher.matches()) 1485 failCount++; 1486 1487 pattern = Pattern.compile("aa # blah\n ", flags); 1488 matcher = pattern.matcher("aa"); 1489 if (!matcher.matches()) 1490 failCount++; 1491 1492 pattern = Pattern.compile("aa # blah\nbc # blech", flags); 1493 matcher = pattern.matcher("aabc"); 1494 if (!matcher.matches()) 1495 failCount++; 1496 1497 pattern = Pattern.compile("aa # blah\nbc# blech", flags); 1498 matcher = pattern.matcher("aabc"); 1499 if (!matcher.matches()) 1500 failCount++; 1501 1502 pattern = Pattern.compile("aa # blah\nbc\\# blech", flags); 1503 matcher = pattern.matcher("aabc#blech"); 1504 if (!matcher.matches()) 1505 failCount++; 1506 1507 // Supplementary character test 1508 pattern = Pattern.compile(toSupplementaries("aa \\# aa"), flags); 1509 matcher = pattern.matcher(toSupplementaries("aa#aa")); 1510 if (!matcher.matches()) 1511 failCount++; 1512 1513 pattern = Pattern.compile(toSupplementaries("aa # blah"), flags); 1514 matcher = pattern.matcher(toSupplementaries("aa")); 1515 if (!matcher.matches()) 1516 failCount++; 1517 1518 pattern = Pattern.compile(toSupplementaries("aa blah"), flags); 1519 matcher = pattern.matcher(toSupplementaries("aablah")); 1520 if (!matcher.matches()) 1521 failCount++; 1522 1523 pattern = Pattern.compile(toSupplementaries("aa # blah blech "), flags); 1524 matcher = pattern.matcher(toSupplementaries("aa")); 1525 if (!matcher.matches()) 1526 failCount++; 1527 1528 pattern = Pattern.compile(toSupplementaries("aa # blah\n "), flags); 1529 matcher = pattern.matcher(toSupplementaries("aa")); 1530 if (!matcher.matches()) 1531 failCount++; 1532 1533 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc # blech"), flags); 1534 matcher = pattern.matcher(toSupplementaries("aabc")); 1535 if (!matcher.matches()) 1536 failCount++; 1537 1538 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc# blech"), flags); 1539 matcher = pattern.matcher(toSupplementaries("aabc")); 1540 if (!matcher.matches()) 1541 failCount++; 1542 1543 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc\\# blech"), flags); 1544 matcher = pattern.matcher(toSupplementaries("aabc#blech")); 1545 if (!matcher.matches()) 1546 failCount++; 1547 1548 report("Comments"); 1549 } 1550 1551 private static void caseFoldingTest() { // bug 4504687 1552 int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; 1553 Pattern pattern = Pattern.compile("aa", flags); 1554 Matcher matcher = pattern.matcher("ab"); 1555 if (matcher.matches()) 1556 failCount++; 1557 1558 pattern = Pattern.compile("aA", flags); 1559 matcher = pattern.matcher("ab"); 1560 if (matcher.matches()) 1561 failCount++; 1562 1563 pattern = Pattern.compile("aa", flags); 1564 matcher = pattern.matcher("aB"); 1565 if (matcher.matches()) 1566 failCount++; 1567 matcher = pattern.matcher("Ab"); 1568 if (matcher.matches()) 1569 failCount++; 1570 1571 // ASCII "a" 1572 // Latin-1 Supplement "a" + grave 1573 // Cyrillic "a" 1574 String[] patterns = new String[] { 1575 //single 1576 "a", "\u00e0", "\u0430", 1577 //slice 1578 "ab", "\u00e0\u00e1", "\u0430\u0431", 1579 //class single 1580 "[a]", "[\u00e0]", "[\u0430]", 1581 //class range 1582 "[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]", 1583 //back reference 1584 "(a)\\1", "(\u00e0)\\1", "(\u0430)\\1" 1585 }; 1586 1587 String[] texts = new String[] { 1588 "A", "\u00c0", "\u0410", 1589 "AB", "\u00c0\u00c1", "\u0410\u0411", 1590 "A", "\u00c0", "\u0410", 1591 "B", "\u00c2", "\u0411", 1592 "aA", "\u00e0\u00c0", "\u0430\u0410" 1593 }; 1594 1595 boolean[] expected = new boolean[] { 1596 true, false, false, 1597 true, false, false, 1598 true, false, false, 1599 true, false, false, 1600 true, false, false 1601 }; 1602 1603 flags = Pattern.CASE_INSENSITIVE; 1604 for (int i = 0; i < patterns.length; i++) { 1605 pattern = Pattern.compile(patterns[i], flags); 1606 matcher = pattern.matcher(texts[i]); 1607 if (matcher.matches() != expected[i]) { 1608 System.out.println("<1> Failed at " + i); 1609 failCount++; 1610 } 1611 } 1612 1613 flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; 1614 for (int i = 0; i < patterns.length; i++) { 1615 pattern = Pattern.compile(patterns[i], flags); 1616 matcher = pattern.matcher(texts[i]); 1617 if (!matcher.matches()) { 1618 System.out.println("<2> Failed at " + i); 1619 failCount++; 1620 } 1621 } 1622 // flag unicode_case alone should do nothing 1623 flags = Pattern.UNICODE_CASE; 1624 for (int i = 0; i < patterns.length; i++) { 1625 pattern = Pattern.compile(patterns[i], flags); 1626 matcher = pattern.matcher(texts[i]); 1627 if (matcher.matches()) { 1628 System.out.println("<3> Failed at " + i); 1629 failCount++; 1630 } 1631 } 1632 1633 // Special cases: i, I, u+0131 and u+0130 1634 flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE; 1635 pattern = Pattern.compile("[h-j]+", flags); 1636 if (!pattern.matcher("\u0131\u0130").matches()) 1637 failCount++; 1638 report("Case Folding"); 1639 } 1640 1641 private static void appendTest() { 1642 Pattern pattern = Pattern.compile("(ab)(cd)"); 1643 Matcher matcher = pattern.matcher("abcd"); 1644 String result = matcher.replaceAll("$2$1"); 1645 if (!result.equals("cdab")) 1646 failCount++; 1647 1648 String s1 = "Swap all: first = 123, second = 456"; 1649 String s2 = "Swap one: first = 123, second = 456"; 1650 String r = "$3$2$1"; 1651 pattern = Pattern.compile("([a-z]+)( *= *)([0-9]+)"); 1652 matcher = pattern.matcher(s1); 1653 1654 result = matcher.replaceAll(r); 1655 if (!result.equals("Swap all: 123 = first, 456 = second")) 1656 failCount++; 1657 1658 matcher = pattern.matcher(s2); 1659 1660 if (matcher.find()) { 1661 StringBuffer sb = new StringBuffer(); 1662 matcher.appendReplacement(sb, r); 1663 matcher.appendTail(sb); 1664 result = sb.toString(); 1665 if (!result.equals("Swap one: 123 = first, second = 456")) 1666 failCount++; 1667 } 1668 1669 // Supplementary character test 1670 pattern = Pattern.compile(toSupplementaries("(ab)(cd)")); 1671 matcher = pattern.matcher(toSupplementaries("abcd")); 1672 result = matcher.replaceAll("$2$1"); 1673 if (!result.equals(toSupplementaries("cdab"))) 1674 failCount++; 1675 1676 s1 = toSupplementaries("Swap all: first = 123, second = 456"); 1677 s2 = toSupplementaries("Swap one: first = 123, second = 456"); 1678 r = toSupplementaries("$3$2$1"); 1679 pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)")); 1680 matcher = pattern.matcher(s1); 1681 1682 result = matcher.replaceAll(r); 1683 if (!result.equals(toSupplementaries("Swap all: 123 = first, 456 = second"))) 1684 failCount++; 1685 1686 matcher = pattern.matcher(s2); 1687 1688 if (matcher.find()) { 1689 StringBuffer sb = new StringBuffer(); 1690 matcher.appendReplacement(sb, r); 1691 matcher.appendTail(sb); 1692 result = sb.toString(); 1693 if (!result.equals(toSupplementaries("Swap one: 123 = first, second = 456"))) 1694 failCount++; 1695 } 1696 report("Append"); 1697 } 1698 1699 private static void splitTest() { 1700 Pattern pattern = Pattern.compile(":"); 1701 String[] result = pattern.split("foo:and:boo", 2); 1702 if (!result[0].equals("foo")) 1703 failCount++; 1704 if (!result[1].equals("and:boo")) 1705 failCount++; 1706 // Supplementary character test 1707 Pattern patternX = Pattern.compile(toSupplementaries("X")); 1708 result = patternX.split(toSupplementaries("fooXandXboo"), 2); 1709 if (!result[0].equals(toSupplementaries("foo"))) 1710 failCount++; 1711 if (!result[1].equals(toSupplementaries("andXboo"))) 1712 failCount++; 1713 1714 CharBuffer cb = CharBuffer.allocate(100); 1715 cb.put("foo:and:boo"); 1716 cb.flip(); 1717 result = pattern.split(cb); 1718 if (!result[0].equals("foo")) 1719 failCount++; 1720 if (!result[1].equals("and")) 1721 failCount++; 1722 if (!result[2].equals("boo")) 1723 failCount++; 1724 1725 // Supplementary character test 1726 CharBuffer cbs = CharBuffer.allocate(100); 1727 cbs.put(toSupplementaries("fooXandXboo")); 1728 cbs.flip(); 1729 result = patternX.split(cbs); 1730 if (!result[0].equals(toSupplementaries("foo"))) 1731 failCount++; 1732 if (!result[1].equals(toSupplementaries("and"))) 1733 failCount++; 1734 if (!result[2].equals(toSupplementaries("boo"))) 1735 failCount++; 1736 1737 String source = "0123456789"; 1738 for (int limit=-2; limit<3; limit++) { 1739 for (int x=0; x<10; x++) { 1740 result = source.split(Integer.toString(x), limit); 1741 int expectedLength = limit < 1 ? 2 : limit; 1742 1743 if ((limit == 0) && (x == 9)) { 1744 // expected dropping of "" 1745 if (result.length != 1) 1746 failCount++; 1747 if (!result[0].equals("012345678")) { 1748 failCount++; 1749 } 1750 } else { 1751 if (result.length != expectedLength) { 1752 failCount++; 1753 } 1754 if (!result[0].equals(source.substring(0,x))) { 1755 if (limit != 1) { 1756 failCount++; 1757 } else { 1758 if (!result[0].equals(source.substring(0,10))) { 1759 failCount++; 1760 } 1761 } 1762 } 1763 if (expectedLength > 1) { // Check segment 2 1764 if (!result[1].equals(source.substring(x+1,10))) 1765 failCount++; 1766 } 1767 } 1768 } 1769 } 1770 // Check the case for no match found 1771 for (int limit=-2; limit<3; limit++) { 1772 result = source.split("e", limit); 1773 if (result.length != 1) 1774 failCount++; 1775 if (!result[0].equals(source)) 1776 failCount++; 1777 } 1778 // Check the case for limit == 0, source = ""; 1779 source = ""; 1780 result = source.split("e", 0); 1781 if (result.length != 1) 1782 failCount++; 1783 if (!result[0].equals(source)) 1784 failCount++; 1785 1786 report("Split"); 1787 } 1788 1789 private static void negationTest() { 1790 Pattern pattern = Pattern.compile("[\\[@^]+"); 1791 Matcher matcher = pattern.matcher("@@@@[[[[^^^^"); 1792 if (!matcher.find()) 1793 failCount++; 1794 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1795 failCount++; 1796 pattern = Pattern.compile("[@\\[^]+"); 1797 matcher = pattern.matcher("@@@@[[[[^^^^"); 1798 if (!matcher.find()) 1799 failCount++; 1800 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1801 failCount++; 1802 pattern = Pattern.compile("[@\\[^@]+"); 1803 matcher = pattern.matcher("@@@@[[[[^^^^"); 1804 if (!matcher.find()) 1805 failCount++; 1806 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1807 failCount++; 1808 1809 pattern = Pattern.compile("\\)"); 1810 matcher = pattern.matcher("xxx)xxx"); 1811 if (!matcher.find()) 1812 failCount++; 1813 1814 report("Negation"); 1815 } 1816 1817 private static void ampersandTest() { 1818 Pattern pattern = Pattern.compile("[&@]+"); 1819 check(pattern, "@@@@&&&&", true); 1820 1821 pattern = Pattern.compile("[@&]+"); 1822 check(pattern, "@@@@&&&&", true); 1823 1824 pattern = Pattern.compile("[@\\&]+"); 1825 check(pattern, "@@@@&&&&", true); 1826 1827 report("Ampersand"); 1828 } 1829 1830 private static void octalTest() throws Exception { 1831 Pattern pattern = Pattern.compile("\\u0007"); 1832 Matcher matcher = pattern.matcher("\u0007"); 1833 if (!matcher.matches()) 1834 failCount++; 1835 pattern = Pattern.compile("\\07"); 1836 matcher = pattern.matcher("\u0007"); 1837 if (!matcher.matches()) 1838 failCount++; 1839 pattern = Pattern.compile("\\007"); 1840 matcher = pattern.matcher("\u0007"); 1841 if (!matcher.matches()) 1842 failCount++; 1843 pattern = Pattern.compile("\\0007"); 1844 matcher = pattern.matcher("\u0007"); 1845 if (!matcher.matches()) 1846 failCount++; 1847 pattern = Pattern.compile("\\040"); 1848 matcher = pattern.matcher("\u0020"); 1849 if (!matcher.matches()) 1850 failCount++; 1851 pattern = Pattern.compile("\\0403"); 1852 matcher = pattern.matcher("\u00203"); 1853 if (!matcher.matches()) 1854 failCount++; 1855 pattern = Pattern.compile("\\0103"); 1856 matcher = pattern.matcher("\u0043"); 1857 if (!matcher.matches()) 1858 failCount++; 1859 1860 report("Octal"); 1861 } 1862 1863 private static void longPatternTest() throws Exception { 1864 try { 1865 Pattern pattern = Pattern.compile( 1866 "a 32-character-long pattern xxxx"); 1867 pattern = Pattern.compile("a 33-character-long pattern xxxxx"); 1868 pattern = Pattern.compile("a thirty four character long regex"); 1869 StringBuffer patternToBe = new StringBuffer(101); 1870 for (int i=0; i<100; i++) 1871 patternToBe.append((char)(97 + i%26)); 1872 pattern = Pattern.compile(patternToBe.toString()); 1873 } catch (PatternSyntaxException e) { 1874 failCount++; 1875 } 1876 1877 // Supplementary character test 1878 try { 1879 Pattern pattern = Pattern.compile( 1880 toSupplementaries("a 32-character-long pattern xxxx")); 1881 pattern = Pattern.compile(toSupplementaries("a 33-character-long pattern xxxxx")); 1882 pattern = Pattern.compile(toSupplementaries("a thirty four character long regex")); 1883 StringBuffer patternToBe = new StringBuffer(101*2); 1884 for (int i=0; i<100; i++) 1885 patternToBe.append(Character.toChars(Character.MIN_SUPPLEMENTARY_CODE_POINT 1886 + 97 + i%26)); 1887 pattern = Pattern.compile(patternToBe.toString()); 1888 } catch (PatternSyntaxException e) { 1889 failCount++; 1890 } 1891 report("LongPattern"); 1892 } 1893 1894 private static void group0Test() throws Exception { 1895 Pattern pattern = Pattern.compile("(tes)ting"); 1896 Matcher matcher = pattern.matcher("testing"); 1897 check(matcher, "testing"); 1898 1899 matcher.reset("testing"); 1900 if (matcher.lookingAt()) { 1901 if (!matcher.group(0).equals("testing")) 1902 failCount++; 1903 } else { 1904 failCount++; 1905 } 1906 1907 matcher.reset("testing"); 1908 if (matcher.matches()) { 1909 if (!matcher.group(0).equals("testing")) 1910 failCount++; 1911 } else { 1912 failCount++; 1913 } 1914 1915 pattern = Pattern.compile("(tes)ting"); 1916 matcher = pattern.matcher("testing"); 1917 if (matcher.lookingAt()) { 1918 if (!matcher.group(0).equals("testing")) 1919 failCount++; 1920 } else { 1921 failCount++; 1922 } 1923 1924 pattern = Pattern.compile("^(tes)ting"); 1925 matcher = pattern.matcher("testing"); 1926 if (matcher.matches()) { 1927 if (!matcher.group(0).equals("testing")) 1928 failCount++; 1929 } else { 1930 failCount++; 1931 } 1932 1933 // Supplementary character test 1934 pattern = Pattern.compile(toSupplementaries("(tes)ting")); 1935 matcher = pattern.matcher(toSupplementaries("testing")); 1936 check(matcher, toSupplementaries("testing")); 1937 1938 matcher.reset(toSupplementaries("testing")); 1939 if (matcher.lookingAt()) { 1940 if (!matcher.group(0).equals(toSupplementaries("testing"))) 1941 failCount++; 1942 } else { 1943 failCount++; 1944 } 1945 1946 matcher.reset(toSupplementaries("testing")); 1947 if (matcher.matches()) { 1948 if (!matcher.group(0).equals(toSupplementaries("testing"))) 1949 failCount++; 1950 } else { 1951 failCount++; 1952 } 1953 1954 pattern = Pattern.compile(toSupplementaries("(tes)ting")); 1955 matcher = pattern.matcher(toSupplementaries("testing")); 1956 if (matcher.lookingAt()) { 1957 if (!matcher.group(0).equals(toSupplementaries("testing"))) 1958 failCount++; 1959 } else { 1960 failCount++; 1961 } 1962 1963 pattern = Pattern.compile(toSupplementaries("^(tes)ting")); 1964 matcher = pattern.matcher(toSupplementaries("testing")); 1965 if (matcher.matches()) { 1966 if (!matcher.group(0).equals(toSupplementaries("testing"))) 1967 failCount++; 1968 } else { 1969 failCount++; 1970 } 1971 1972 report("Group0"); 1973 } 1974 1975 private static void findIntTest() throws Exception { 1976 Pattern p = Pattern.compile("blah"); 1977 Matcher m = p.matcher("zzzzblahzzzzzblah"); 1978 boolean result = m.find(2); 1979 if (!result) 1980 failCount++; 1981 1982 p = Pattern.compile("$"); 1983 m = p.matcher("1234567890"); 1984 result = m.find(10); 1985 if (!result) 1986 failCount++; 1987 try { 1988 result = m.find(11); 1989 failCount++; 1990 } catch (IndexOutOfBoundsException e) { 1991 // correct result 1992 } 1993 1994 // Supplementary character test 1995 p = Pattern.compile(toSupplementaries("blah")); 1996 m = p.matcher(toSupplementaries("zzzzblahzzzzzblah")); 1997 result = m.find(2); 1998 if (!result) 1999 failCount++; 2000 2001 report("FindInt"); 2002 } 2003 2004 private static void emptyPatternTest() throws Exception { 2005 Pattern p = Pattern.compile(""); 2006 Matcher m = p.matcher("foo"); 2007 2008 // Should find empty pattern at beginning of input 2009 boolean result = m.find(); 2010 if (result != true) 2011 failCount++; 2012 if (m.start() != 0) 2013 failCount++; 2014 2015 // Should not match entire input if input is not empty 2016 m.reset(); 2017 result = m.matches(); 2018 if (result == true) 2019 failCount++; 2020 2021 try { 2022 m.start(0); 2023 failCount++; 2024 } catch (IllegalStateException e) { 2025 // Correct result 2026 } 2027 2028 // Should match entire input if input is empty 2029 m.reset(""); 2030 result = m.matches(); 2031 if (result != true) 2032 failCount++; 2033 2034 result = Pattern.matches("", ""); 2035 if (result != true) 2036 failCount++; 2037 2038 result = Pattern.matches("", "foo"); 2039 if (result == true) 2040 failCount++; 2041 report("EmptyPattern"); 2042 } 2043 2044 private static void charClassTest() throws Exception { 2045 Pattern pattern = Pattern.compile("blah[ab]]blech"); 2046 check(pattern, "blahb]blech", true); 2047 2048 pattern = Pattern.compile("[abc[def]]"); 2049 check(pattern, "b", true); 2050 2051 // Supplementary character tests 2052 pattern = Pattern.compile(toSupplementaries("blah[ab]]blech")); 2053 check(pattern, toSupplementaries("blahb]blech"), true); 2054 2055 pattern = Pattern.compile(toSupplementaries("[abc[def]]")); 2056 check(pattern, toSupplementaries("b"), true); 2057 2058 try { 2059 // u00ff when UNICODE_CASE 2060 pattern = Pattern.compile("[ab\u00ffcd]", 2061 Pattern.CASE_INSENSITIVE| 2062 Pattern.UNICODE_CASE); 2063 check(pattern, "ab\u00ffcd", true); 2064 check(pattern, "Ab\u0178Cd", true); 2065 2066 // u00b5 when UNICODE_CASE 2067 pattern = Pattern.compile("[ab\u00b5cd]", 2068 Pattern.CASE_INSENSITIVE| 2069 Pattern.UNICODE_CASE); 2070 check(pattern, "ab\u00b5cd", true); 2071 check(pattern, "Ab\u039cCd", true); 2072 } catch (Exception e) { failCount++; } 2073 2074 /* Special cases 2075 (1)LatinSmallLetterLongS u+017f 2076 (2)LatinSmallLetterDotlessI u+0131 2077 (3)LatineCapitalLetterIWithDotAbove u+0130 2078 (4)KelvinSign u+212a 2079 (5)AngstromSign u+212b 2080 */ 2081 int flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE; 2082 pattern = Pattern.compile("[sik\u00c5]+", flags); 2083 if (!pattern.matcher("\u017f\u0130\u0131\u212a\u212b").matches()) 2084 failCount++; 2085 2086 report("CharClass"); 2087 } 2088 2089 private static void caretTest() throws Exception { 2090 Pattern pattern = Pattern.compile("\\w*"); 2091 Matcher matcher = pattern.matcher("a#bc#def##g"); 2092 check(matcher, "a"); 2093 check(matcher, ""); 2094 check(matcher, "bc"); 2095 check(matcher, ""); 2096 check(matcher, "def"); 2097 check(matcher, ""); 2098 check(matcher, ""); 2099 check(matcher, "g"); 2100 check(matcher, ""); 2101 if (matcher.find()) 2102 failCount++; 2103 2104 pattern = Pattern.compile("^\\w*"); 2105 matcher = pattern.matcher("a#bc#def##g"); 2106 check(matcher, "a"); 2107 if (matcher.find()) 2108 failCount++; 2109 2110 pattern = Pattern.compile("\\w"); 2111 matcher = pattern.matcher("abc##x"); 2112 check(matcher, "a"); 2113 check(matcher, "b"); 2114 check(matcher, "c"); 2115 check(matcher, "x"); 2116 if (matcher.find()) 2117 failCount++; 2118 2119 pattern = Pattern.compile("^\\w"); 2120 matcher = pattern.matcher("abc##x"); 2121 check(matcher, "a"); 2122 if (matcher.find()) 2123 failCount++; 2124 2125 pattern = Pattern.compile("\\A\\p{Alpha}{3}"); 2126 matcher = pattern.matcher("abcdef-ghi\njklmno"); 2127 check(matcher, "abc"); 2128 if (matcher.find()) 2129 failCount++; 2130 2131 pattern = Pattern.compile("^\\p{Alpha}{3}", Pattern.MULTILINE); 2132 matcher = pattern.matcher("abcdef-ghi\njklmno"); 2133 check(matcher, "abc"); 2134 check(matcher, "jkl"); 2135 if (matcher.find()) 2136 failCount++; 2137 2138 pattern = Pattern.compile("^", Pattern.MULTILINE); 2139 matcher = pattern.matcher("this is some text"); 2140 String result = matcher.replaceAll("X"); 2141 if (!result.equals("Xthis is some text")) 2142 failCount++; 2143 2144 pattern = Pattern.compile("^"); 2145 matcher = pattern.matcher("this is some text"); 2146 result = matcher.replaceAll("X"); 2147 if (!result.equals("Xthis is some text")) 2148 failCount++; 2149 2150 pattern = Pattern.compile("^", Pattern.MULTILINE | Pattern.UNIX_LINES); 2151 matcher = pattern.matcher("this is some text\n"); 2152 result = matcher.replaceAll("X"); 2153 if (!result.equals("Xthis is some text\n")) 2154 failCount++; 2155 2156 report("Caret"); 2157 } 2158 2159 private static void groupCaptureTest() throws Exception { 2160 // Independent group 2161 Pattern pattern = Pattern.compile("x+(?>y+)z+"); 2162 Matcher matcher = pattern.matcher("xxxyyyzzz"); 2163 matcher.find(); 2164 try { 2165 String blah = matcher.group(1); 2166 failCount++; 2167 } catch (IndexOutOfBoundsException ioobe) { 2168 // Good result 2169 } 2170 // Pure group 2171 pattern = Pattern.compile("x+(?:y+)z+"); 2172 matcher = pattern.matcher("xxxyyyzzz"); 2173 matcher.find(); 2174 try { 2175 String blah = matcher.group(1); 2176 failCount++; 2177 } catch (IndexOutOfBoundsException ioobe) { 2178 // Good result 2179 } 2180 2181 // Supplementary character tests 2182 // Independent group 2183 pattern = Pattern.compile(toSupplementaries("x+(?>y+)z+")); 2184 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz")); 2185 matcher.find(); 2186 try { 2187 String blah = matcher.group(1); 2188 failCount++; 2189 } catch (IndexOutOfBoundsException ioobe) { 2190 // Good result 2191 } 2192 // Pure group 2193 pattern = Pattern.compile(toSupplementaries("x+(?:y+)z+")); 2194 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz")); 2195 matcher.find(); 2196 try { 2197 String blah = matcher.group(1); 2198 failCount++; 2199 } catch (IndexOutOfBoundsException ioobe) { 2200 // Good result 2201 } 2202 2203 report("GroupCapture"); 2204 } 2205 2206 private static void backRefTest() throws Exception { 2207 Pattern pattern = Pattern.compile("(a*)bc\\1"); 2208 check(pattern, "zzzaabcazzz", true); 2209 2210 pattern = Pattern.compile("(a*)bc\\1"); 2211 check(pattern, "zzzaabcaazzz", true); 2212 2213 pattern = Pattern.compile("(abc)(def)\\1"); 2214 check(pattern, "abcdefabc", true); 2215 2216 pattern = Pattern.compile("(abc)(def)\\3"); 2217 check(pattern, "abcdefabc", false); 2218 2219 try { 2220 for (int i = 1; i < 10; i++) { 2221 // Make sure backref 1-9 are always accepted 2222 pattern = Pattern.compile("abcdef\\" + i); 2223 // and fail to match if the target group does not exit 2224 check(pattern, "abcdef", false); 2225 } 2226 } catch(PatternSyntaxException e) { 2227 failCount++; 2228 } 2229 2230 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"); 2231 check(pattern, "abcdefghija", false); 2232 check(pattern, "abcdefghija1", true); 2233 2234 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"); 2235 check(pattern, "abcdefghijkk", true); 2236 2237 pattern = Pattern.compile("(a)bcdefghij\\11"); 2238 check(pattern, "abcdefghija1", true); 2239 2240 // Supplementary character tests 2241 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1")); 2242 check(pattern, toSupplementaries("zzzaabcazzz"), true); 2243 2244 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1")); 2245 check(pattern, toSupplementaries("zzzaabcaazzz"), true); 2246 2247 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\1")); 2248 check(pattern, toSupplementaries("abcdefabc"), true); 2249 2250 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\3")); 2251 check(pattern, toSupplementaries("abcdefabc"), false); 2252 2253 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11")); 2254 check(pattern, toSupplementaries("abcdefghija"), false); 2255 check(pattern, toSupplementaries("abcdefghija1"), true); 2256 2257 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11")); 2258 check(pattern, toSupplementaries("abcdefghijkk"), true); 2259 2260 report("BackRef"); 2261 } 2262 2263 /** 2264 * Unicode Technical Report #18, section 2.6 End of Line 2265 * There is no empty line to be matched in the sequence \u000D\u000A 2266 * but there is an empty line in the sequence \u000A\u000D. 2267 */ 2268 private static void anchorTest() throws Exception { 2269 Pattern p = Pattern.compile("^.*$", Pattern.MULTILINE); 2270 Matcher m = p.matcher("blah1\r\nblah2"); 2271 m.find(); 2272 m.find(); 2273 if (!m.group().equals("blah2")) 2274 failCount++; 2275 2276 m.reset("blah1\n\rblah2"); 2277 m.find(); 2278 m.find(); 2279 m.find(); 2280 if (!m.group().equals("blah2")) 2281 failCount++; 2282 2283 // Test behavior of $ with \r\n at end of input 2284 p = Pattern.compile(".+$"); 2285 m = p.matcher("blah1\r\n"); 2286 if (!m.find()) 2287 failCount++; 2288 if (!m.group().equals("blah1")) 2289 failCount++; 2290 if (m.find()) 2291 failCount++; 2292 2293 // Test behavior of $ with \r\n at end of input in multiline 2294 p = Pattern.compile(".+$", Pattern.MULTILINE); 2295 m = p.matcher("blah1\r\n"); 2296 if (!m.find()) 2297 failCount++; 2298 if (m.find()) 2299 failCount++; 2300 2301 // Test for $ recognition of \u0085 for bug 4527731 2302 p = Pattern.compile(".+$", Pattern.MULTILINE); 2303 m = p.matcher("blah1\u0085"); 2304 if (!m.find()) 2305 failCount++; 2306 2307 // Supplementary character test 2308 p = Pattern.compile("^.*$", Pattern.MULTILINE); 2309 m = p.matcher(toSupplementaries("blah1\r\nblah2")); 2310 m.find(); 2311 m.find(); 2312 if (!m.group().equals(toSupplementaries("blah2"))) 2313 failCount++; 2314 2315 m.reset(toSupplementaries("blah1\n\rblah2")); 2316 m.find(); 2317 m.find(); 2318 m.find(); 2319 if (!m.group().equals(toSupplementaries("blah2"))) 2320 failCount++; 2321 2322 // Test behavior of $ with \r\n at end of input 2323 p = Pattern.compile(".+$"); 2324 m = p.matcher(toSupplementaries("blah1\r\n")); 2325 if (!m.find()) 2326 failCount++; 2327 if (!m.group().equals(toSupplementaries("blah1"))) 2328 failCount++; 2329 if (m.find()) 2330 failCount++; 2331 2332 // Test behavior of $ with \r\n at end of input in multiline 2333 p = Pattern.compile(".+$", Pattern.MULTILINE); 2334 m = p.matcher(toSupplementaries("blah1\r\n")); 2335 if (!m.find()) 2336 failCount++; 2337 if (m.find()) 2338 failCount++; 2339 2340 // Test for $ recognition of \u0085 for bug 4527731 2341 p = Pattern.compile(".+$", Pattern.MULTILINE); 2342 m = p.matcher(toSupplementaries("blah1\u0085")); 2343 if (!m.find()) 2344 failCount++; 2345 2346 report("Anchors"); 2347 } 2348 2349 /** 2350 * A basic sanity test of Matcher.lookingAt(). 2351 */ 2352 private static void lookingAtTest() throws Exception { 2353 Pattern p = Pattern.compile("(ab)(c*)"); 2354 Matcher m = p.matcher("abccczzzabcczzzabccc"); 2355 2356 if (!m.lookingAt()) 2357 failCount++; 2358 2359 if (!m.group().equals(m.group(0))) 2360 failCount++; 2361 2362 m = p.matcher("zzzabccczzzabcczzzabccczzz"); 2363 if (m.lookingAt()) 2364 failCount++; 2365 2366 // Supplementary character test 2367 p = Pattern.compile(toSupplementaries("(ab)(c*)")); 2368 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc")); 2369 2370 if (!m.lookingAt()) 2371 failCount++; 2372 2373 if (!m.group().equals(m.group(0))) 2374 failCount++; 2375 2376 m = p.matcher(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2377 if (m.lookingAt()) 2378 failCount++; 2379 2380 report("Looking At"); 2381 } 2382 2383 /** 2384 * A basic sanity test of Matcher.matches(). 2385 */ 2386 private static void matchesTest() throws Exception { 2387 // matches() 2388 Pattern p = Pattern.compile("ulb(c*)"); 2389 Matcher m = p.matcher("ulbcccccc"); 2390 if (!m.matches()) 2391 failCount++; 2392 2393 // find() but not matches() 2394 m.reset("zzzulbcccccc"); 2395 if (m.matches()) 2396 failCount++; 2397 2398 // lookingAt() but not matches() 2399 m.reset("ulbccccccdef"); 2400 if (m.matches()) 2401 failCount++; 2402 2403 // matches() 2404 p = Pattern.compile("a|ad"); 2405 m = p.matcher("ad"); 2406 if (!m.matches()) 2407 failCount++; 2408 2409 // Supplementary character test 2410 // matches() 2411 p = Pattern.compile(toSupplementaries("ulb(c*)")); 2412 m = p.matcher(toSupplementaries("ulbcccccc")); 2413 if (!m.matches()) 2414 failCount++; 2415 2416 // find() but not matches() 2417 m.reset(toSupplementaries("zzzulbcccccc")); 2418 if (m.matches()) 2419 failCount++; 2420 2421 // lookingAt() but not matches() 2422 m.reset(toSupplementaries("ulbccccccdef")); 2423 if (m.matches()) 2424 failCount++; 2425 2426 // matches() 2427 p = Pattern.compile(toSupplementaries("a|ad")); 2428 m = p.matcher(toSupplementaries("ad")); 2429 if (!m.matches()) 2430 failCount++; 2431 2432 report("Matches"); 2433 } 2434 2435 /** 2436 * A basic sanity test of Pattern.matches(). 2437 */ 2438 private static void patternMatchesTest() throws Exception { 2439 // matches() 2440 if (!Pattern.matches(toSupplementaries("ulb(c*)"), 2441 toSupplementaries("ulbcccccc"))) 2442 failCount++; 2443 2444 // find() but not matches() 2445 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2446 toSupplementaries("zzzulbcccccc"))) 2447 failCount++; 2448 2449 // lookingAt() but not matches() 2450 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2451 toSupplementaries("ulbccccccdef"))) 2452 failCount++; 2453 2454 // Supplementary character test 2455 // matches() 2456 if (!Pattern.matches(toSupplementaries("ulb(c*)"), 2457 toSupplementaries("ulbcccccc"))) 2458 failCount++; 2459 2460 // find() but not matches() 2461 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2462 toSupplementaries("zzzulbcccccc"))) 2463 failCount++; 2464 2465 // lookingAt() but not matches() 2466 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2467 toSupplementaries("ulbccccccdef"))) 2468 failCount++; 2469 2470 report("Pattern Matches"); 2471 } 2472 2473 /** 2474 * Canonical equivalence testing. Tests the ability of the engine 2475 * to match sequences that are not explicitly specified in the 2476 * pattern when they are considered equivalent by the Unicode Standard. 2477 */ 2478 private static void ceTest() throws Exception { 2479 // Decomposed char outside char classes 2480 Pattern p = Pattern.compile("testa\u030a", Pattern.CANON_EQ); 2481 Matcher m = p.matcher("test\u00e5"); 2482 if (!m.matches()) 2483 failCount++; 2484 2485 m.reset("testa\u030a"); 2486 if (!m.matches()) 2487 failCount++; 2488 2489 // Composed char outside char classes 2490 p = Pattern.compile("test\u00e5", Pattern.CANON_EQ); 2491 m = p.matcher("test\u00e5"); 2492 if (!m.matches()) 2493 failCount++; 2494 2495 m.reset("testa\u030a"); 2496 if (!m.find()) 2497 failCount++; 2498 2499 // Decomposed char inside a char class 2500 p = Pattern.compile("test[abca\u030a]", Pattern.CANON_EQ); 2501 m = p.matcher("test\u00e5"); 2502 if (!m.find()) 2503 failCount++; 2504 2505 m.reset("testa\u030a"); 2506 if (!m.find()) 2507 failCount++; 2508 2509 // Composed char inside a char class 2510 p = Pattern.compile("test[abc\u00e5def\u00e0]", Pattern.CANON_EQ); 2511 m = p.matcher("test\u00e5"); 2512 if (!m.find()) 2513 failCount++; 2514 2515 m.reset("testa\u0300"); 2516 if (!m.find()) 2517 failCount++; 2518 2519 m.reset("testa\u030a"); 2520 if (!m.find()) 2521 failCount++; 2522 2523 // Marks that cannot legally change order and be equivalent 2524 p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ); 2525 check(p, "testa\u0308\u0300", true); 2526 check(p, "testa\u0300\u0308", false); 2527 2528 // Marks that can legally change order and be equivalent 2529 p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ); 2530 check(p, "testa\u0308\u0323", true); 2531 check(p, "testa\u0323\u0308", true); 2532 2533 // Test all equivalences of the sequence a\u0308\u0323\u0300 2534 p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ); 2535 check(p, "testa\u0308\u0323\u0300", true); 2536 check(p, "testa\u0323\u0308\u0300", true); 2537 check(p, "testa\u0308\u0300\u0323", true); 2538 check(p, "test\u00e4\u0323\u0300", true); 2539 check(p, "test\u00e4\u0300\u0323", true); 2540 2541 /* 2542 * The following canonical equivalence tests don't work. Bug id: 4916384. 2543 * 2544 // Decomposed hangul (jamos) 2545 p = Pattern.compile("\u1100\u1161", Pattern.CANON_EQ); 2546 m = p.matcher("\u1100\u1161"); 2547 if (!m.matches()) 2548 failCount++; 2549 2550 m.reset("\uac00"); 2551 if (!m.matches()) 2552 failCount++; 2553 2554 // Composed hangul 2555 p = Pattern.compile("\uac00", Pattern.CANON_EQ); 2556 m = p.matcher("\u1100\u1161"); 2557 if (!m.matches()) 2558 failCount++; 2559 2560 m.reset("\uac00"); 2561 if (!m.matches()) 2562 failCount++; 2563 2564 // Decomposed supplementary outside char classes 2565 p = Pattern.compile("test\ud834\uddbc\ud834\udd6f", Pattern.CANON_EQ); 2566 m = p.matcher("test\ud834\uddc0"); 2567 if (!m.matches()) 2568 failCount++; 2569 2570 m.reset("test\ud834\uddbc\ud834\udd6f"); 2571 if (!m.matches()) 2572 failCount++; 2573 2574 // Composed supplementary outside char classes 2575 p = Pattern.compile("test\ud834\uddc0", Pattern.CANON_EQ); 2576 m.reset("test\ud834\uddbc\ud834\udd6f"); 2577 if (!m.matches()) 2578 failCount++; 2579 2580 m = p.matcher("test\ud834\uddc0"); 2581 if (!m.matches()) 2582 failCount++; 2583 2584 */ 2585 2586 report("Canonical Equivalence"); 2587 } 2588 2589 /** 2590 * A basic sanity test of Matcher.replaceAll(). 2591 */ 2592 private static void globalSubstitute() throws Exception { 2593 // Global substitution with a literal 2594 Pattern p = Pattern.compile("(ab)(c*)"); 2595 Matcher m = p.matcher("abccczzzabcczzzabccc"); 2596 if (!m.replaceAll("test").equals("testzzztestzzztest")) 2597 failCount++; 2598 2599 m.reset("zzzabccczzzabcczzzabccczzz"); 2600 if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz")) 2601 failCount++; 2602 2603 // Global substitution with groups 2604 m.reset("zzzabccczzzabcczzzabccczzz"); 2605 String result = m.replaceAll("$1"); 2606 if (!result.equals("zzzabzzzabzzzabzzz")) 2607 failCount++; 2608 2609 // Supplementary character test 2610 // Global substitution with a literal 2611 p = Pattern.compile(toSupplementaries("(ab)(c*)")); 2612 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc")); 2613 if (!m.replaceAll(toSupplementaries("test")). 2614 equals(toSupplementaries("testzzztestzzztest"))) 2615 failCount++; 2616 2617 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2618 if (!m.replaceAll(toSupplementaries("test")). 2619 equals(toSupplementaries("zzztestzzztestzzztestzzz"))) 2620 failCount++; 2621 2622 // Global substitution with groups 2623 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2624 result = m.replaceAll("$1"); 2625 if (!result.equals(toSupplementaries("zzzabzzzabzzzabzzz"))) 2626 failCount++; 2627 2628 report("Global Substitution"); 2629 } 2630 2631 /** 2632 * Tests the usage of Matcher.appendReplacement() with literal 2633 * and group substitutions. 2634 */ 2635 private static void stringbufferSubstitute() throws Exception { 2636 // SB substitution with literal 2637 String blah = "zzzblahzzz"; 2638 Pattern p = Pattern.compile("blah"); 2639 Matcher m = p.matcher(blah); 2640 StringBuffer result = new StringBuffer(); 2641 try { 2642 m.appendReplacement(result, "blech"); 2643 failCount++; 2644 } catch (IllegalStateException e) { 2645 } 2646 m.find(); 2647 m.appendReplacement(result, "blech"); 2648 if (!result.toString().equals("zzzblech")) 2649 failCount++; 2650 2651 m.appendTail(result); 2652 if (!result.toString().equals("zzzblechzzz")) 2653 failCount++; 2654 2655 // SB substitution with groups 2656 blah = "zzzabcdzzz"; 2657 p = Pattern.compile("(ab)(cd)*"); 2658 m = p.matcher(blah); 2659 result = new StringBuffer(); 2660 try { 2661 m.appendReplacement(result, "$1"); 2662 failCount++; 2663 } catch (IllegalStateException e) { 2664 } 2665 m.find(); 2666 m.appendReplacement(result, "$1"); 2667 if (!result.toString().equals("zzzab")) 2668 failCount++; 2669 2670 m.appendTail(result); 2671 if (!result.toString().equals("zzzabzzz")) 2672 failCount++; 2673 2674 // SB substitution with 3 groups 2675 blah = "zzzabcdcdefzzz"; 2676 p = Pattern.compile("(ab)(cd)*(ef)"); 2677 m = p.matcher(blah); 2678 result = new StringBuffer(); 2679 try { 2680 m.appendReplacement(result, "$1w$2w$3"); 2681 failCount++; 2682 } catch (IllegalStateException e) { 2683 } 2684 m.find(); 2685 m.appendReplacement(result, "$1w$2w$3"); 2686 if (!result.toString().equals("zzzabwcdwef")) 2687 failCount++; 2688 2689 m.appendTail(result); 2690 if (!result.toString().equals("zzzabwcdwefzzz")) 2691 failCount++; 2692 2693 // SB substitution with groups and three matches 2694 // skipping middle match 2695 blah = "zzzabcdzzzabcddzzzabcdzzz"; 2696 p = Pattern.compile("(ab)(cd*)"); 2697 m = p.matcher(blah); 2698 result = new StringBuffer(); 2699 try { 2700 m.appendReplacement(result, "$1"); 2701 failCount++; 2702 } catch (IllegalStateException e) { 2703 } 2704 m.find(); 2705 m.appendReplacement(result, "$1"); 2706 if (!result.toString().equals("zzzab")) 2707 failCount++; 2708 2709 m.find(); 2710 m.find(); 2711 m.appendReplacement(result, "$2"); 2712 if (!result.toString().equals("zzzabzzzabcddzzzcd")) 2713 failCount++; 2714 2715 m.appendTail(result); 2716 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz")) 2717 failCount++; 2718 2719 // Check to make sure escaped $ is ignored 2720 blah = "zzzabcdcdefzzz"; 2721 p = Pattern.compile("(ab)(cd)*(ef)"); 2722 m = p.matcher(blah); 2723 result = new StringBuffer(); 2724 m.find(); 2725 m.appendReplacement(result, "$1w\\$2w$3"); 2726 if (!result.toString().equals("zzzabw$2wef")) 2727 failCount++; 2728 2729 m.appendTail(result); 2730 if (!result.toString().equals("zzzabw$2wefzzz")) 2731 failCount++; 2732 2733 // Check to make sure a reference to nonexistent group causes error 2734 blah = "zzzabcdcdefzzz"; 2735 p = Pattern.compile("(ab)(cd)*(ef)"); 2736 m = p.matcher(blah); 2737 result = new StringBuffer(); 2738 m.find(); 2739 try { 2740 m.appendReplacement(result, "$1w$5w$3"); 2741 failCount++; 2742 } catch (IndexOutOfBoundsException ioobe) { 2743 // Correct result 2744 } 2745 2746 // Check double digit group references 2747 blah = "zzz123456789101112zzz"; 2748 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 2749 m = p.matcher(blah); 2750 result = new StringBuffer(); 2751 m.find(); 2752 m.appendReplacement(result, "$1w$11w$3"); 2753 if (!result.toString().equals("zzz1w11w3")) 2754 failCount++; 2755 2756 // Check to make sure it backs off $15 to $1 if only three groups 2757 blah = "zzzabcdcdefzzz"; 2758 p = Pattern.compile("(ab)(cd)*(ef)"); 2759 m = p.matcher(blah); 2760 result = new StringBuffer(); 2761 m.find(); 2762 m.appendReplacement(result, "$1w$15w$3"); 2763 if (!result.toString().equals("zzzabwab5wef")) 2764 failCount++; 2765 2766 2767 // Supplementary character test 2768 // SB substitution with literal 2769 blah = toSupplementaries("zzzblahzzz"); 2770 p = Pattern.compile(toSupplementaries("blah")); 2771 m = p.matcher(blah); 2772 result = new StringBuffer(); 2773 try { 2774 m.appendReplacement(result, toSupplementaries("blech")); 2775 failCount++; 2776 } catch (IllegalStateException e) { 2777 } 2778 m.find(); 2779 m.appendReplacement(result, toSupplementaries("blech")); 2780 if (!result.toString().equals(toSupplementaries("zzzblech"))) 2781 failCount++; 2782 2783 m.appendTail(result); 2784 if (!result.toString().equals(toSupplementaries("zzzblechzzz"))) 2785 failCount++; 2786 2787 // SB substitution with groups 2788 blah = toSupplementaries("zzzabcdzzz"); 2789 p = Pattern.compile(toSupplementaries("(ab)(cd)*")); 2790 m = p.matcher(blah); 2791 result = new StringBuffer(); 2792 try { 2793 m.appendReplacement(result, "$1"); 2794 failCount++; 2795 } catch (IllegalStateException e) { 2796 } 2797 m.find(); 2798 m.appendReplacement(result, "$1"); 2799 if (!result.toString().equals(toSupplementaries("zzzab"))) 2800 failCount++; 2801 2802 m.appendTail(result); 2803 if (!result.toString().equals(toSupplementaries("zzzabzzz"))) 2804 failCount++; 2805 2806 // SB substitution with 3 groups 2807 blah = toSupplementaries("zzzabcdcdefzzz"); 2808 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 2809 m = p.matcher(blah); 2810 result = new StringBuffer(); 2811 try { 2812 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 2813 failCount++; 2814 } catch (IllegalStateException e) { 2815 } 2816 m.find(); 2817 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 2818 if (!result.toString().equals(toSupplementaries("zzzabwcdwef"))) 2819 failCount++; 2820 2821 m.appendTail(result); 2822 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz"))) 2823 failCount++; 2824 2825 // SB substitution with groups and three matches 2826 // skipping middle match 2827 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz"); 2828 p = Pattern.compile(toSupplementaries("(ab)(cd*)")); 2829 m = p.matcher(blah); 2830 result = new StringBuffer(); 2831 try { 2832 m.appendReplacement(result, "$1"); 2833 failCount++; 2834 } catch (IllegalStateException e) { 2835 } 2836 m.find(); 2837 m.appendReplacement(result, "$1"); 2838 if (!result.toString().equals(toSupplementaries("zzzab"))) 2839 failCount++; 2840 2841 m.find(); 2842 m.find(); 2843 m.appendReplacement(result, "$2"); 2844 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd"))) 2845 failCount++; 2846 2847 m.appendTail(result); 2848 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz"))) 2849 failCount++; 2850 2851 // Check to make sure escaped $ is ignored 2852 blah = toSupplementaries("zzzabcdcdefzzz"); 2853 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 2854 m = p.matcher(blah); 2855 result = new StringBuffer(); 2856 m.find(); 2857 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3")); 2858 if (!result.toString().equals(toSupplementaries("zzzabw$2wef"))) 2859 failCount++; 2860 2861 m.appendTail(result); 2862 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz"))) 2863 failCount++; 2864 2865 // Check to make sure a reference to nonexistent group causes error 2866 blah = toSupplementaries("zzzabcdcdefzzz"); 2867 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 2868 m = p.matcher(blah); 2869 result = new StringBuffer(); 2870 m.find(); 2871 try { 2872 m.appendReplacement(result, toSupplementaries("$1w$5w$3")); 2873 failCount++; 2874 } catch (IndexOutOfBoundsException ioobe) { 2875 // Correct result 2876 } 2877 2878 // Check double digit group references 2879 blah = toSupplementaries("zzz123456789101112zzz"); 2880 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 2881 m = p.matcher(blah); 2882 result = new StringBuffer(); 2883 m.find(); 2884 m.appendReplacement(result, toSupplementaries("$1w$11w$3")); 2885 if (!result.toString().equals(toSupplementaries("zzz1w11w3"))) 2886 failCount++; 2887 2888 // Check to make sure it backs off $15 to $1 if only three groups 2889 blah = toSupplementaries("zzzabcdcdefzzz"); 2890 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 2891 m = p.matcher(blah); 2892 result = new StringBuffer(); 2893 m.find(); 2894 m.appendReplacement(result, toSupplementaries("$1w$15w$3")); 2895 if (!result.toString().equals(toSupplementaries("zzzabwab5wef"))) 2896 failCount++; 2897 2898 // Check nothing has been appended into the output buffer if 2899 // the replacement string triggers IllegalArgumentException. 2900 p = Pattern.compile("(abc)"); 2901 m = p.matcher("abcd"); 2902 result = new StringBuffer(); 2903 m.find(); 2904 try { 2905 m.appendReplacement(result, ("xyz$g")); 2906 failCount++; 2907 } catch (IllegalArgumentException iae) { 2908 if (result.length() != 0) 2909 failCount++; 2910 } 2911 2912 report("SB Substitution"); 2913 } 2914 2915 /* 2916 * 5 groups of characters are created to make a substitution string. 2917 * A base string will be created including random lead chars, the 2918 * substitution string, and random trailing chars. 2919 * A pattern containing the 5 groups is searched for and replaced with: 2920 * random group + random string + random group. 2921 * The results are checked for correctness. 2922 */ 2923 private static void substitutionBasher() { 2924 for (int runs = 0; runs<1000; runs++) { 2925 // Create a base string to work in 2926 int leadingChars = generator.nextInt(10); 2927 StringBuffer baseBuffer = new StringBuffer(100); 2928 String leadingString = getRandomAlphaString(leadingChars); 2929 baseBuffer.append(leadingString); 2930 2931 // Create 5 groups of random number of random chars 2932 // Create the string to substitute 2933 // Create the pattern string to search for 2934 StringBuffer bufferToSub = new StringBuffer(25); 2935 StringBuffer bufferToPat = new StringBuffer(50); 2936 String[] groups = new String[5]; 2937 for(int i=0; i<5; i++) { 2938 int aGroupSize = generator.nextInt(5)+1; 2939 groups[i] = getRandomAlphaString(aGroupSize); 2940 bufferToSub.append(groups[i]); 2941 bufferToPat.append('('); 2942 bufferToPat.append(groups[i]); 2943 bufferToPat.append(')'); 2944 } 2945 String stringToSub = bufferToSub.toString(); 2946 String pattern = bufferToPat.toString(); 2947 2948 // Place sub string into working string at random index 2949 baseBuffer.append(stringToSub); 2950 2951 // Append random chars to end 2952 int trailingChars = generator.nextInt(10); 2953 String trailingString = getRandomAlphaString(trailingChars); 2954 baseBuffer.append(trailingString); 2955 String baseString = baseBuffer.toString(); 2956 2957 // Create test pattern and matcher 2958 Pattern p = Pattern.compile(pattern); 2959 Matcher m = p.matcher(baseString); 2960 2961 // Reject candidate if pattern happens to start early 2962 m.find(); 2963 if (m.start() < leadingChars) 2964 continue; 2965 2966 // Reject candidate if more than one match 2967 if (m.find()) 2968 continue; 2969 2970 // Construct a replacement string with : 2971 // random group + random string + random group 2972 StringBuffer bufferToRep = new StringBuffer(); 2973 int groupIndex1 = generator.nextInt(5); 2974 bufferToRep.append("$" + (groupIndex1 + 1)); 2975 String randomMidString = getRandomAlphaString(5); 2976 bufferToRep.append(randomMidString); 2977 int groupIndex2 = generator.nextInt(5); 2978 bufferToRep.append("$" + (groupIndex2 + 1)); 2979 String replacement = bufferToRep.toString(); 2980 2981 // Do the replacement 2982 String result = m.replaceAll(replacement); 2983 2984 // Construct expected result 2985 StringBuffer bufferToRes = new StringBuffer(); 2986 bufferToRes.append(leadingString); 2987 bufferToRes.append(groups[groupIndex1]); 2988 bufferToRes.append(randomMidString); 2989 bufferToRes.append(groups[groupIndex2]); 2990 bufferToRes.append(trailingString); 2991 String expectedResult = bufferToRes.toString(); 2992 2993 // Check results 2994 if (!result.equals(expectedResult)) 2995 failCount++; 2996 } 2997 2998 report("Substitution Basher"); 2999 } 3000 3001 /** 3002 * Checks the handling of some escape sequences that the Pattern 3003 * class should process instead of the java compiler. These are 3004 * not in the file because the escapes should be be processed 3005 * by the Pattern class when the regex is compiled. 3006 */ 3007 private static void escapes() throws Exception { 3008 Pattern p = Pattern.compile("\\043"); 3009 Matcher m = p.matcher("#"); 3010 if (!m.find()) 3011 failCount++; 3012 3013 p = Pattern.compile("\\x23"); 3014 m = p.matcher("#"); 3015 if (!m.find()) 3016 failCount++; 3017 3018 p = Pattern.compile("\\u0023"); 3019 m = p.matcher("#"); 3020 if (!m.find()) 3021 failCount++; 3022 3023 report("Escape sequences"); 3024 } 3025 3026 /** 3027 * Checks the handling of blank input situations. These 3028 * tests are incompatible with my test file format. 3029 */ 3030 private static void blankInput() throws Exception { 3031 Pattern p = Pattern.compile("abc", Pattern.CASE_INSENSITIVE); 3032 Matcher m = p.matcher(""); 3033 if (m.find()) 3034 failCount++; 3035 3036 p = Pattern.compile("a*", Pattern.CASE_INSENSITIVE); 3037 m = p.matcher(""); 3038 if (!m.find()) 3039 failCount++; 3040 3041 p = Pattern.compile("abc"); 3042 m = p.matcher(""); 3043 if (m.find()) 3044 failCount++; 3045 3046 p = Pattern.compile("a*"); 3047 m = p.matcher(""); 3048 if (!m.find()) 3049 failCount++; 3050 3051 report("Blank input"); 3052 } 3053 3054 /** 3055 * Tests the Boyer-Moore pattern matching of a character sequence 3056 * on randomly generated patterns. 3057 */ 3058 private static void bm() throws Exception { 3059 doBnM('a'); 3060 report("Boyer Moore (ASCII)"); 3061 3062 doBnM(Character.MIN_SUPPLEMENTARY_CODE_POINT - 10); 3063 report("Boyer Moore (Supplementary)"); 3064 } 3065 3066 private static void doBnM(int baseCharacter) throws Exception { 3067 int achar=0; 3068 3069 for (int i=0; i<100; i++) { 3070 // Create a short pattern to search for 3071 int patternLength = generator.nextInt(7) + 4; 3072 StringBuffer patternBuffer = new StringBuffer(patternLength); 3073 for (int x=0; x<patternLength; x++) { 3074 int ch = baseCharacter + generator.nextInt(26); 3075 if (Character.isSupplementaryCodePoint(ch)) { 3076 patternBuffer.append(Character.toChars(ch)); 3077 } else { 3078 patternBuffer.append((char)ch); 3079 } 3080 } 3081 String pattern = patternBuffer.toString(); 3082 Pattern p = Pattern.compile(pattern); 3083 3084 // Create a buffer with random ASCII chars that does 3085 // not match the sample 3086 String toSearch = null; 3087 StringBuffer s = null; 3088 Matcher m = p.matcher(""); 3089 do { 3090 s = new StringBuffer(100); 3091 for (int x=0; x<100; x++) { 3092 int ch = baseCharacter + generator.nextInt(26); 3093 if (Character.isSupplementaryCodePoint(ch)) { 3094 s.append(Character.toChars(ch)); 3095 } else { 3096 s.append((char)ch); 3097 } 3098 } 3099 toSearch = s.toString(); 3100 m.reset(toSearch); 3101 } while (m.find()); 3102 3103 // Insert the pattern at a random spot 3104 int insertIndex = generator.nextInt(99); 3105 if (Character.isLowSurrogate(s.charAt(insertIndex))) 3106 insertIndex++; 3107 s = s.insert(insertIndex, pattern); 3108 toSearch = s.toString(); 3109 3110 // Make sure that the pattern is found 3111 m.reset(toSearch); 3112 if (!m.find()) 3113 failCount++; 3114 3115 // Make sure that the match text is the pattern 3116 if (!m.group().equals(pattern)) 3117 failCount++; 3118 3119 // Make sure match occured at insertion point 3120 if (m.start() != insertIndex) 3121 failCount++; 3122 } 3123 } 3124 3125 /** 3126 * Tests the matching of slices on randomly generated patterns. 3127 * The Boyer-Moore optimization is not done on these patterns 3128 * because it uses unicode case folding. 3129 */ 3130 private static void slice() throws Exception { 3131 doSlice(Character.MAX_VALUE); 3132 report("Slice"); 3133 3134 doSlice(Character.MAX_CODE_POINT); 3135 report("Slice (Supplementary)"); 3136 } 3137 3138 private static void doSlice(int maxCharacter) throws Exception { 3139 Random generator = new Random(); 3140 int achar=0; 3141 3142 for (int i=0; i<100; i++) { 3143 // Create a short pattern to search for 3144 int patternLength = generator.nextInt(7) + 4; 3145 StringBuffer patternBuffer = new StringBuffer(patternLength); 3146 for (int x=0; x<patternLength; x++) { 3147 int randomChar = 0; 3148 while (!Character.isLetterOrDigit(randomChar)) 3149 randomChar = generator.nextInt(maxCharacter); 3150 if (Character.isSupplementaryCodePoint(randomChar)) { 3151 patternBuffer.append(Character.toChars(randomChar)); 3152 } else { 3153 patternBuffer.append((char) randomChar); 3154 } 3155 } 3156 String pattern = patternBuffer.toString(); 3157 Pattern p = Pattern.compile(pattern, Pattern.UNICODE_CASE); 3158 3159 // Create a buffer with random chars that does not match the sample 3160 String toSearch = null; 3161 StringBuffer s = null; 3162 Matcher m = p.matcher(""); 3163 do { 3164 s = new StringBuffer(100); 3165 for (int x=0; x<100; x++) { 3166 int randomChar = 0; 3167 while (!Character.isLetterOrDigit(randomChar)) 3168 randomChar = generator.nextInt(maxCharacter); 3169 if (Character.isSupplementaryCodePoint(randomChar)) { 3170 s.append(Character.toChars(randomChar)); 3171 } else { 3172 s.append((char) randomChar); 3173 } 3174 } 3175 toSearch = s.toString(); 3176 m.reset(toSearch); 3177 } while (m.find()); 3178 3179 // Insert the pattern at a random spot 3180 int insertIndex = generator.nextInt(99); 3181 if (Character.isLowSurrogate(s.charAt(insertIndex))) 3182 insertIndex++; 3183 s = s.insert(insertIndex, pattern); 3184 toSearch = s.toString(); 3185 3186 // Make sure that the pattern is found 3187 m.reset(toSearch); 3188 if (!m.find()) 3189 failCount++; 3190 3191 // Make sure that the match text is the pattern 3192 if (!m.group().equals(pattern)) 3193 failCount++; 3194 3195 // Make sure match occured at insertion point 3196 if (m.start() != insertIndex) 3197 failCount++; 3198 } 3199 } 3200 3201 private static void explainFailure(String pattern, String data, 3202 String expected, String actual) { 3203 System.err.println("----------------------------------------"); 3204 System.err.println("Pattern = "+pattern); 3205 System.err.println("Data = "+data); 3206 System.err.println("Expected = " + expected); 3207 System.err.println("Actual = " + actual); 3208 } 3209 3210 private static void explainFailure(String pattern, String data, 3211 Throwable t) { 3212 System.err.println("----------------------------------------"); 3213 System.err.println("Pattern = "+pattern); 3214 System.err.println("Data = "+data); 3215 t.printStackTrace(System.err); 3216 } 3217 3218 // Testing examples from a file 3219 3220 /** 3221 * Goes through the file "TestCases.txt" and creates many patterns 3222 * described in the file, matching the patterns against input lines in 3223 * the file, and comparing the results against the correct results 3224 * also found in the file. The file format is described in comments 3225 * at the head of the file. 3226 */ 3227 private static void processFile(String fileName) throws Exception { 3228 File testCases = new File(System.getProperty("test.src", "."), 3229 fileName); 3230 FileInputStream in = new FileInputStream(testCases); 3231 BufferedReader r = new BufferedReader(new InputStreamReader(in)); 3232 3233 // Process next test case. 3234 String aLine; 3235 while((aLine = r.readLine()) != null) { 3236 // Read a line for pattern 3237 String patternString = grabLine(r); 3238 Pattern p = null; 3239 try { 3240 p = compileTestPattern(patternString); 3241 } catch (PatternSyntaxException e) { 3242 String dataString = grabLine(r); 3243 String expectedResult = grabLine(r); 3244 if (expectedResult.startsWith("error")) 3245 continue; 3246 explainFailure(patternString, dataString, e); 3247 failCount++; 3248 continue; 3249 } 3250 3251 // Read a line for input string 3252 String dataString = grabLine(r); 3253 Matcher m = p.matcher(dataString); 3254 StringBuffer result = new StringBuffer(); 3255 3256 // Check for IllegalStateExceptions before a match 3257 failCount += preMatchInvariants(m); 3258 3259 boolean found = m.find(); 3260 3261 if (found) 3262 failCount += postTrueMatchInvariants(m); 3263 else 3264 failCount += postFalseMatchInvariants(m); 3265 3266 if (found) { 3267 result.append("true "); 3268 result.append(m.group(0) + " "); 3269 } else { 3270 result.append("false "); 3271 } 3272 3273 result.append(m.groupCount()); 3274 3275 if (found) { 3276 for (int i=1; i<m.groupCount()+1; i++) 3277 if (m.group(i) != null) 3278 result.append(" " +m.group(i)); 3279 } 3280 3281 // Read a line for the expected result 3282 String expectedResult = grabLine(r); 3283 3284 if (!result.toString().equals(expectedResult)) { 3285 explainFailure(patternString, dataString, expectedResult, result.toString()); 3286 failCount++; 3287 } 3288 } 3289 3290 report(fileName); 3291 } 3292 3293 private static int preMatchInvariants(Matcher m) { 3294 int failCount = 0; 3295 try { 3296 m.start(); 3297 failCount++; 3298 } catch (IllegalStateException ise) {} 3299 try { 3300 m.end(); 3301 failCount++; 3302 } catch (IllegalStateException ise) {} 3303 try { 3304 m.group(); 3305 failCount++; 3306 } catch (IllegalStateException ise) {} 3307 return failCount; 3308 } 3309 3310 private static int postFalseMatchInvariants(Matcher m) { 3311 int failCount = 0; 3312 try { 3313 m.group(); 3314 failCount++; 3315 } catch (IllegalStateException ise) {} 3316 try { 3317 m.start(); 3318 failCount++; 3319 } catch (IllegalStateException ise) {} 3320 try { 3321 m.end(); 3322 failCount++; 3323 } catch (IllegalStateException ise) {} 3324 return failCount; 3325 } 3326 3327 private static int postTrueMatchInvariants(Matcher m) { 3328 int failCount = 0; 3329 //assert(m.start() = m.start(0); 3330 if (m.start() != m.start(0)) 3331 failCount++; 3332 //assert(m.end() = m.end(0); 3333 if (m.start() != m.start(0)) 3334 failCount++; 3335 //assert(m.group() = m.group(0); 3336 if (!m.group().equals(m.group(0))) 3337 failCount++; 3338 try { 3339 m.group(50); 3340 failCount++; 3341 } catch (IndexOutOfBoundsException ise) {} 3342 3343 return failCount; 3344 } 3345 3346 private static Pattern compileTestPattern(String patternString) { 3347 if (!patternString.startsWith("'")) { 3348 return Pattern.compile(patternString); 3349 } 3350 3351 int break1 = patternString.lastIndexOf("'"); 3352 String flagString = patternString.substring( 3353 break1+1, patternString.length()); 3354 patternString = patternString.substring(1, break1); 3355 3356 if (flagString.equals("i")) 3357 return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE); 3358 3359 if (flagString.equals("m")) 3360 return Pattern.compile(patternString, Pattern.MULTILINE); 3361 3362 return Pattern.compile(patternString); 3363 } 3364 3365 /** 3366 * Reads a line from the input file. Keeps reading lines until a non 3367 * empty non comment line is read. If the line contains a \n then 3368 * these two characters are replaced by a newline char. If a \\uxxxx 3369 * sequence is read then the sequence is replaced by the unicode char. 3370 */ 3371 private static String grabLine(BufferedReader r) throws Exception { 3372 int index = 0; 3373 String line = r.readLine(); 3374 while (line.startsWith("//") || line.length() < 1) 3375 line = r.readLine(); 3376 while ((index = line.indexOf("\\n")) != -1) { 3377 StringBuffer temp = new StringBuffer(line); 3378 temp.replace(index, index+2, "\n"); 3379 line = temp.toString(); 3380 } 3381 while ((index = line.indexOf("\\u")) != -1) { 3382 StringBuffer temp = new StringBuffer(line); 3383 String value = temp.substring(index+2, index+6); 3384 char aChar = (char)Integer.parseInt(value, 16); 3385 String unicodeChar = "" + aChar; 3386 temp.replace(index, index+6, unicodeChar); 3387 line = temp.toString(); 3388 } 3389 3390 return line; 3391 } 3392 3393 private static void check(Pattern p, String s, String g, String expected) { 3394 Matcher m = p.matcher(s); 3395 m.find(); 3396 if (!m.group(g).equals(expected) || 3397 s.charAt(m.start(g)) != expected.charAt(0) || 3398 s.charAt(m.end(g) - 1) != expected.charAt(expected.length() - 1)) 3399 failCount++; 3400 } 3401 3402 private static void checkReplaceFirst(String p, String s, String r, String expected) 3403 { 3404 if (!expected.equals(Pattern.compile(p) 3405 .matcher(s) 3406 .replaceFirst(r))) 3407 failCount++; 3408 } 3409 3410 private static void checkReplaceAll(String p, String s, String r, String expected) 3411 { 3412 if (!expected.equals(Pattern.compile(p) 3413 .matcher(s) 3414 .replaceAll(r))) 3415 failCount++; 3416 } 3417 3418 private static void checkExpectedFail(String p) { 3419 try { 3420 Pattern.compile(p); 3421 } catch (PatternSyntaxException pse) { 3422 //pse.printStackTrace(); 3423 return; 3424 } 3425 failCount++; 3426 } 3427 3428 private static void checkExpectedIAE(Matcher m, String g) { 3429 m.find(); 3430 try { 3431 m.group(g); 3432 } catch (IllegalArgumentException x) { 3433 //iae.printStackTrace(); 3434 try { 3435 m.start(g); 3436 } catch (IllegalArgumentException xx) { 3437 try { 3438 m.start(g); 3439 } catch (IllegalArgumentException xxx) { 3440 return; 3441 } 3442 } 3443 } 3444 failCount++; 3445 } 3446 3447 private static void checkExpectedNPE(Matcher m) { 3448 m.find(); 3449 try { 3450 m.group(null); 3451 } catch (NullPointerException x) { 3452 try { 3453 m.start(null); 3454 } catch (NullPointerException xx) { 3455 try { 3456 m.end(null); 3457 } catch (NullPointerException xxx) { 3458 return; 3459 } 3460 } 3461 } 3462 failCount++; 3463 } 3464 3465 private static void namedGroupCaptureTest() throws Exception { 3466 check(Pattern.compile("x+(?<gname>y+)z+"), 3467 "xxxyyyzzz", 3468 "gname", 3469 "yyy"); 3470 3471 check(Pattern.compile("x+(?<gname8>y+)z+"), 3472 "xxxyyyzzz", 3473 "gname8", 3474 "yyy"); 3475 3476 //backref 3477 Pattern pattern = Pattern.compile("(a*)bc\\1"); 3478 check(pattern, "zzzaabcazzz", true); // found "abca" 3479 3480 check(Pattern.compile("(?<gname>a*)bc\\k<gname>"), 3481 "zzzaabcaazzz", true); 3482 3483 check(Pattern.compile("(?<gname>abc)(def)\\k<gname>"), 3484 "abcdefabc", true); 3485 3486 check(Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(?<gname>k)\\k<gname>"), 3487 "abcdefghijkk", true); 3488 3489 // Supplementary character tests 3490 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"), 3491 toSupplementaries("zzzaabcazzz"), true); 3492 3493 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"), 3494 toSupplementaries("zzzaabcaazzz"), true); 3495 3496 check(Pattern.compile("(?<gname>" + toSupplementaries("abc)(def)") + "\\k<gname>"), 3497 toSupplementaries("abcdefabc"), true); 3498 3499 check(Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)") + 3500 "(?<gname>" + 3501 toSupplementaries("k)") + "\\k<gname>"), 3502 toSupplementaries("abcdefghijkk"), true); 3503 3504 check(Pattern.compile("x+(?<gname>y+)z+\\k<gname>"), 3505 "xxxyyyzzzyyy", 3506 "gname", 3507 "yyy"); 3508 3509 //replaceFirst/All 3510 checkReplaceFirst("(?<gn>ab)(c*)", 3511 "abccczzzabcczzzabccc", 3512 "${gn}", 3513 "abzzzabcczzzabccc"); 3514 3515 checkReplaceAll("(?<gn>ab)(c*)", 3516 "abccczzzabcczzzabccc", 3517 "${gn}", 3518 "abzzzabzzzab"); 3519 3520 3521 checkReplaceFirst("(?<gn>ab)(c*)", 3522 "zzzabccczzzabcczzzabccczzz", 3523 "${gn}", 3524 "zzzabzzzabcczzzabccczzz"); 3525 3526 checkReplaceAll("(?<gn>ab)(c*)", 3527 "zzzabccczzzabcczzzabccczzz", 3528 "${gn}", 3529 "zzzabzzzabzzzabzzz"); 3530 3531 checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)", 3532 "zzzabccczzzabcczzzabccczzz", 3533 "${gn2}", 3534 "zzzccczzzabcczzzabccczzz"); 3535 3536 checkReplaceAll("(?<gn1>ab)(?<gn2>c*)", 3537 "zzzabccczzzabcczzzabccczzz", 3538 "${gn2}", 3539 "zzzccczzzcczzzccczzz"); 3540 3541 //toSupplementaries("(ab)(c*)")); 3542 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + 3543 ")(?<gn2>" + toSupplementaries("c") + "*)", 3544 toSupplementaries("abccczzzabcczzzabccc"), 3545 "${gn1}", 3546 toSupplementaries("abzzzabcczzzabccc")); 3547 3548 3549 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + 3550 ")(?<gn2>" + toSupplementaries("c") + "*)", 3551 toSupplementaries("abccczzzabcczzzabccc"), 3552 "${gn1}", 3553 toSupplementaries("abzzzabzzzab")); 3554 3555 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + 3556 ")(?<gn2>" + toSupplementaries("c") + "*)", 3557 toSupplementaries("abccczzzabcczzzabccc"), 3558 "${gn2}", 3559 toSupplementaries("ccczzzabcczzzabccc")); 3560 3561 3562 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + 3563 ")(?<gn2>" + toSupplementaries("c") + "*)", 3564 toSupplementaries("abccczzzabcczzzabccc"), 3565 "${gn2}", 3566 toSupplementaries("ccczzzcczzzccc")); 3567 3568 checkReplaceFirst("(?<dog>Dog)AndCat", 3569 "zzzDogAndCatzzzDogAndCatzzz", 3570 "${dog}", 3571 "zzzDogzzzDogAndCatzzz"); 3572 3573 3574 checkReplaceAll("(?<dog>Dog)AndCat", 3575 "zzzDogAndCatzzzDogAndCatzzz", 3576 "${dog}", 3577 "zzzDogzzzDogzzz"); 3578 3579 // backref in Matcher & String 3580 if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") || 3581 !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh")) 3582 failCount++; 3583 3584 // negative 3585 checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)"); 3586 checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)"); 3587 checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)"); 3588 checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>"); 3589 checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>"); 3590 checkExpectedIAE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"), 3591 "gnameX"); 3592 checkExpectedNPE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef")); 3593 report("NamedGroupCapture"); 3594 } 3595 3596 // This is for bug 6969132 3597 private static void nonBmpClassComplementTest() throws Exception { 3598 Pattern p = Pattern.compile("\\P{Lu}"); 3599 Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 3600 if (m.find() && m.start() == 1) 3601 failCount++; 3602 3603 // from a unicode category 3604 p = Pattern.compile("\\P{Lu}"); 3605 m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 3606 if (m.find()) 3607 failCount++; 3608 if (!m.hitEnd()) 3609 failCount++; 3610 3611 // block 3612 p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}"); 3613 m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 3614 if (m.find() && m.start() == 1) 3615 failCount++; 3616 3617 report("NonBmpClassComplement"); 3618 } 3619 3620 private static void unicodePropertiesTest() throws Exception { 3621 // different forms 3622 if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() || 3623 !Pattern.compile("\\p{Lu}").matcher("A").matches() || 3624 !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() || 3625 !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() || 3626 !Pattern.compile("\\p{IsLatin}").matcher("B").matches() || 3627 !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() || 3628 !Pattern.compile("\\p{script=Latin}").matcher("B").matches() || 3629 !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() || 3630 !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() || 3631 !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches()) 3632 failCount++; 3633 3634 Matcher common = Pattern.compile("\\p{script=Common}").matcher(""); 3635 Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher(""); 3636 Matcher lastSM = common; 3637 Character.UnicodeScript lastScript = Character.UnicodeScript.of(0); 3638 3639 Matcher latin = Pattern.compile("\\p{block=basic_latin}").matcher(""); 3640 Matcher greek = Pattern.compile("\\p{InGreek}").matcher(""); 3641 Matcher lastBM = latin; 3642 Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0); 3643 3644 for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) { 3645 if (cp >= 0x30000 && (cp & 0x70) == 0){ 3646 continue; // only pick couple code points, they are the same 3647 } 3648 3649 // Unicode Script 3650 Character.UnicodeScript script = Character.UnicodeScript.of(cp); 3651 Matcher m; 3652 String str = new String(Character.toChars(cp)); 3653 if (script == lastScript) { 3654 m = lastSM; 3655 m.reset(str); 3656 } else { 3657 m = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str); 3658 } 3659 if (!m.matches()) { 3660 failCount++; 3661 } 3662 Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common; 3663 other.reset(str); 3664 if (other.matches()) { 3665 failCount++; 3666 } 3667 lastSM = m; 3668 lastScript = script; 3669 3670 // Unicode Block 3671 Character.UnicodeBlock block = Character.UnicodeBlock.of(cp); 3672 if (block == null) { 3673 //System.out.printf("Not a Block: cp=%x%n", cp); 3674 continue; 3675 } 3676 if (block == lastBlock) { 3677 m = lastBM; 3678 m.reset(str); 3679 } else { 3680 m = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str); 3681 } 3682 if (!m.matches()) { 3683 failCount++; 3684 } 3685 other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin; 3686 other.reset(str); 3687 if (other.matches()) { 3688 failCount++; 3689 } 3690 lastBM = m; 3691 lastBlock = block; 3692 } 3693 report("unicodeProperties"); 3694 } 3695 3696 private static void unicodeHexNotationTest() throws Exception { 3697 3698 // negative 3699 checkExpectedFail("\\x{-23}"); 3700 checkExpectedFail("\\x{110000}"); 3701 checkExpectedFail("\\x{}"); 3702 checkExpectedFail("\\x{AB[ef]"); 3703 3704 // codepoint 3705 check("^\\x{1033c}$", "\uD800\uDF3C", true); 3706 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false); 3707 check("^\\x{D800}\\x{DF3c}+$", "\uD800\uDF3C", false); 3708 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false); 3709 3710 // in class 3711 check("^[\\x{D800}\\x{DF3c}]+$", "\uD800\uDF3C", false); 3712 check("^[\\xF0\\x90\\x8C\\xBC]+$", "\uD800\uDF3C", false); 3713 check("^[\\x{D800}\\x{DF3C}]+$", "\uD800\uDF3C", false); 3714 check("^[\\x{DF3C}\\x{D800}]+$", "\uD800\uDF3C", false); 3715 check("^[\\x{D800}\\x{DF3C}]+$", "\uDF3C\uD800", true); 3716 check("^[\\x{DF3C}\\x{D800}]+$", "\uDF3C\uD800", true); 3717 3718 for (int cp = 0; cp <= 0x10FFFF; cp++) { 3719 String s = "A" + new String(Character.toChars(cp)) + "B"; 3720 String hexUTF16 = (cp <= 0xFFFF)? String.format("\\u%04x", cp) 3721 : String.format("\\u%04x\\u%04x", 3722 (int) Character.toChars(cp)[0], 3723 (int) Character.toChars(cp)[1]); 3724 String hexCodePoint = "\\x{" + Integer.toHexString(cp) + "}"; 3725 if (!Pattern.matches("A" + hexUTF16 + "B", s)) 3726 failCount++; 3727 if (!Pattern.matches("A[" + hexUTF16 + "]B", s)) 3728 failCount++; 3729 if (!Pattern.matches("A" + hexCodePoint + "B", s)) 3730 failCount++; 3731 if (!Pattern.matches("A[" + hexCodePoint + "]B", s)) 3732 failCount++; 3733 } 3734 report("unicodeHexNotation"); 3735 } 3736 3737 private static void unicodeClassesTest() throws Exception { 3738 3739 Matcher lower = Pattern.compile("\\p{Lower}").matcher(""); 3740 Matcher upper = Pattern.compile("\\p{Upper}").matcher(""); 3741 Matcher ASCII = Pattern.compile("\\p{ASCII}").matcher(""); 3742 Matcher alpha = Pattern.compile("\\p{Alpha}").matcher(""); 3743 Matcher digit = Pattern.compile("\\p{Digit}").matcher(""); 3744 Matcher alnum = Pattern.compile("\\p{Alnum}").matcher(""); 3745 Matcher punct = Pattern.compile("\\p{Punct}").matcher(""); 3746 Matcher graph = Pattern.compile("\\p{Graph}").matcher(""); 3747 Matcher print = Pattern.compile("\\p{Print}").matcher(""); 3748 Matcher blank = Pattern.compile("\\p{Blank}").matcher(""); 3749 Matcher cntrl = Pattern.compile("\\p{Cntrl}").matcher(""); 3750 Matcher xdigit = Pattern.compile("\\p{XDigit}").matcher(""); 3751 Matcher space = Pattern.compile("\\p{Space}").matcher(""); 3752 Matcher bound = Pattern.compile("\\b").matcher(""); 3753 Matcher word = Pattern.compile("\\w++").matcher(""); 3754 // UNICODE_CHARACTER_CLASS 3755 Matcher lowerU = Pattern.compile("\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3756 Matcher upperU = Pattern.compile("\\p{Upper}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3757 Matcher ASCIIU = Pattern.compile("\\p{ASCII}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3758 Matcher alphaU = Pattern.compile("\\p{Alpha}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3759 Matcher digitU = Pattern.compile("\\p{Digit}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3760 Matcher alnumU = Pattern.compile("\\p{Alnum}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3761 Matcher punctU = Pattern.compile("\\p{Punct}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3762 Matcher graphU = Pattern.compile("\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3763 Matcher printU = Pattern.compile("\\p{Print}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3764 Matcher blankU = Pattern.compile("\\p{Blank}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3765 Matcher cntrlU = Pattern.compile("\\p{Cntrl}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3766 Matcher xdigitU = Pattern.compile("\\p{XDigit}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3767 Matcher spaceU = Pattern.compile("\\p{Space}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3768 Matcher boundU = Pattern.compile("\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3769 Matcher wordU = Pattern.compile("\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3770 // embedded flag (?U) 3771 Matcher lowerEU = Pattern.compile("(?U)\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3772 Matcher graphEU = Pattern.compile("(?U)\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3773 Matcher wordEU = Pattern.compile("(?U)\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3774 3775 Matcher bwb = Pattern.compile("\\b\\w\\b").matcher(""); 3776 Matcher bwbU = Pattern.compile("\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3777 Matcher bwbEU = Pattern.compile("(?U)\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3778 // properties 3779 Matcher lowerP = Pattern.compile("\\p{IsLowerCase}").matcher(""); 3780 Matcher upperP = Pattern.compile("\\p{IsUpperCase}").matcher(""); 3781 Matcher titleP = Pattern.compile("\\p{IsTitleCase}").matcher(""); 3782 Matcher letterP = Pattern.compile("\\p{IsLetter}").matcher(""); 3783 Matcher alphaP = Pattern.compile("\\p{IsAlphabetic}").matcher(""); 3784 Matcher ideogP = Pattern.compile("\\p{IsIdeographic}").matcher(""); 3785 Matcher cntrlP = Pattern.compile("\\p{IsControl}").matcher(""); 3786 Matcher spaceP = Pattern.compile("\\p{IsWhiteSpace}").matcher(""); 3787 Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher(""); 3788 Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher(""); 3789 Matcher joinCrtl = Pattern.compile("\\p{IsJoinControl}").matcher(""); 3790 3791 // javaMethod 3792 Matcher lowerJ = Pattern.compile("\\p{javaLowerCase}").matcher(""); 3793 Matcher upperJ = Pattern.compile("\\p{javaUpperCase}").matcher(""); 3794 Matcher alphaJ = Pattern.compile("\\p{javaAlphabetic}").matcher(""); 3795 Matcher ideogJ = Pattern.compile("\\p{javaIdeographic}").matcher(""); 3796 3797 for (int cp = 1; cp < 0x30000; cp++) { 3798 String str = new String(Character.toChars(cp)); 3799 int type = Character.getType(cp); 3800 if (// lower 3801 POSIX_ASCII.isLower(cp) != lower.reset(str).matches() || 3802 Character.isLowerCase(cp) != lowerU.reset(str).matches() || 3803 Character.isLowerCase(cp) != lowerP.reset(str).matches() || 3804 Character.isLowerCase(cp) != lowerEU.reset(str).matches()|| 3805 Character.isLowerCase(cp) != lowerJ.reset(str).matches()|| 3806 // upper 3807 POSIX_ASCII.isUpper(cp) != upper.reset(str).matches() || 3808 POSIX_Unicode.isUpper(cp) != upperU.reset(str).matches() || 3809 Character.isUpperCase(cp) != upperP.reset(str).matches() || 3810 Character.isUpperCase(cp) != upperJ.reset(str).matches() || 3811 // alpha 3812 POSIX_ASCII.isAlpha(cp) != alpha.reset(str).matches() || 3813 POSIX_Unicode.isAlpha(cp) != alphaU.reset(str).matches() || 3814 Character.isAlphabetic(cp)!= alphaP.reset(str).matches() || 3815 Character.isAlphabetic(cp)!= alphaJ.reset(str).matches() || 3816 // digit 3817 POSIX_ASCII.isDigit(cp) != digit.reset(str).matches() || 3818 Character.isDigit(cp) != digitU.reset(str).matches() || 3819 // alnum 3820 POSIX_ASCII.isAlnum(cp) != alnum.reset(str).matches() || 3821 POSIX_Unicode.isAlnum(cp) != alnumU.reset(str).matches() || 3822 // punct 3823 POSIX_ASCII.isPunct(cp) != punct.reset(str).matches() || 3824 POSIX_Unicode.isPunct(cp) != punctU.reset(str).matches() || 3825 // graph 3826 POSIX_ASCII.isGraph(cp) != graph.reset(str).matches() || 3827 POSIX_Unicode.isGraph(cp) != graphU.reset(str).matches() || 3828 POSIX_Unicode.isGraph(cp) != graphEU.reset(str).matches()|| 3829 // blank 3830 POSIX_ASCII.isType(cp, POSIX_ASCII.BLANK) 3831 != blank.reset(str).matches() || 3832 POSIX_Unicode.isBlank(cp) != blankU.reset(str).matches() || 3833 // print 3834 POSIX_ASCII.isPrint(cp) != print.reset(str).matches() || 3835 POSIX_Unicode.isPrint(cp) != printU.reset(str).matches() || 3836 // cntrl 3837 POSIX_ASCII.isCntrl(cp) != cntrl.reset(str).matches() || 3838 POSIX_Unicode.isCntrl(cp) != cntrlU.reset(str).matches() || 3839 (Character.CONTROL == type) != cntrlP.reset(str).matches() || 3840 // hexdigit 3841 POSIX_ASCII.isHexDigit(cp) != xdigit.reset(str).matches() || 3842 POSIX_Unicode.isHexDigit(cp) != xdigitU.reset(str).matches() || 3843 // space 3844 POSIX_ASCII.isSpace(cp) != space.reset(str).matches() || 3845 POSIX_Unicode.isSpace(cp) != spaceU.reset(str).matches() || 3846 POSIX_Unicode.isSpace(cp) != spaceP.reset(str).matches() || 3847 // word 3848 POSIX_ASCII.isWord(cp) != word.reset(str).matches() || 3849 POSIX_Unicode.isWord(cp) != wordU.reset(str).matches() || 3850 POSIX_Unicode.isWord(cp) != wordEU.reset(str).matches()|| 3851 // bwordb 3852 POSIX_ASCII.isWord(cp) != bwb.reset(str).matches() || 3853 POSIX_Unicode.isWord(cp) != bwbU.reset(str).matches() || 3854 // properties 3855 Character.isTitleCase(cp) != titleP.reset(str).matches() || 3856 Character.isLetter(cp) != letterP.reset(str).matches()|| 3857 Character.isIdeographic(cp) != ideogP.reset(str).matches() || 3858 Character.isIdeographic(cp) != ideogJ.reset(str).matches() || 3859 (Character.UNASSIGNED == type) == definedP.reset(str).matches() || 3860 POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches() || 3861 POSIX_Unicode.isJoinControl(cp) != joinCrtl.reset(str).matches()) 3862 failCount++; 3863 } 3864 3865 // bounds/word align 3866 twoFindIndexes(" \u0180sherman\u0400 ", bound, 1, 10); 3867 if (!bwbU.reset("\u0180sherman\u0400").matches()) 3868 failCount++; 3869 twoFindIndexes(" \u0180sh\u0345erman\u0400 ", bound, 1, 11); 3870 if (!bwbU.reset("\u0180sh\u0345erman\u0400").matches()) 3871 failCount++; 3872 twoFindIndexes(" \u0724\u0739\u0724 ", bound, 1, 4); 3873 if (!bwbU.reset("\u0724\u0739\u0724").matches()) 3874 failCount++; 3875 if (!bwbEU.reset("\u0724\u0739\u0724").matches()) 3876 failCount++; 3877 report("unicodePredefinedClasses"); 3878 } 3879 3880 private static void horizontalAndVerticalWSTest() throws Exception { 3881 String hws = new String (new char[] { 3882 0x09, 0x20, 0xa0, 0x1680, 0x180e, 3883 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 3884 0x2006, 0x2007, 0x2008, 0x2009, 0x200a, 3885 0x202f, 0x205f, 0x3000 }); 3886 String vws = new String (new char[] { 3887 0x0a, 0x0b, 0x0c, 0x0d, 0x85, 0x2028, 0x2029 }); 3888 if (!Pattern.compile("\\h+").matcher(hws).matches() || 3889 !Pattern.compile("[\\h]+").matcher(hws).matches()) 3890 failCount++; 3891 if (Pattern.compile("\\H").matcher(hws).find() || 3892 Pattern.compile("[\\H]").matcher(hws).find()) 3893 failCount++; 3894 if (!Pattern.compile("\\v+").matcher(vws).matches() || 3895 !Pattern.compile("[\\v]+").matcher(vws).matches()) 3896 failCount++; 3897 if (Pattern.compile("\\V").matcher(vws).find() || 3898 Pattern.compile("[\\V]").matcher(vws).find()) 3899 failCount++; 3900 String prefix = "abcd"; 3901 String suffix = "efgh"; 3902 String ng = "A"; 3903 for (int i = 0; i < hws.length(); i++) { 3904 String c = String.valueOf(hws.charAt(i)); 3905 Matcher m = Pattern.compile("\\h").matcher(prefix + c + suffix); 3906 if (!m.find() || !c.equals(m.group())) 3907 failCount++; 3908 m = Pattern.compile("[\\h]").matcher(prefix + c + suffix); 3909 if (!m.find() || !c.equals(m.group())) 3910 failCount++; 3911 3912 m = Pattern.compile("\\H").matcher(hws.substring(0, i) + ng + hws.substring(i)); 3913 if (!m.find() || !ng.equals(m.group())) 3914 failCount++; 3915 m = Pattern.compile("[\\H]").matcher(hws.substring(0, i) + ng + hws.substring(i)); 3916 if (!m.find() || !ng.equals(m.group())) 3917 failCount++; 3918 } 3919 for (int i = 0; i < vws.length(); i++) { 3920 String c = String.valueOf(vws.charAt(i)); 3921 Matcher m = Pattern.compile("\\v").matcher(prefix + c + suffix); 3922 if (!m.find() || !c.equals(m.group())) 3923 failCount++; 3924 m = Pattern.compile("[\\v]").matcher(prefix + c + suffix); 3925 if (!m.find() || !c.equals(m.group())) 3926 failCount++; 3927 3928 m = Pattern.compile("\\V").matcher(vws.substring(0, i) + ng + vws.substring(i)); 3929 if (!m.find() || !ng.equals(m.group())) 3930 failCount++; 3931 m = Pattern.compile("[\\V]").matcher(vws.substring(0, i) + ng + vws.substring(i)); 3932 if (!m.find() || !ng.equals(m.group())) 3933 failCount++; 3934 } 3935 // \v in range is interpreted as 0x0B. This is the undocumented behavior 3936 if (!Pattern.compile("[\\v-\\v]").matcher(String.valueOf((char)0x0B)).matches()) 3937 failCount++; 3938 report("horizontalAndVerticalWSTest"); 3939 } 3940 3941 private static void linebreakTest() throws Exception { 3942 String linebreaks = new String (new char[] { 3943 0x0A, 0x0B, 0x0C, 0x0D, 0x85, 0x2028, 0x2029 }); 3944 String crnl = "\r\n"; 3945 if (!Pattern.compile("\\R+").matcher(linebreaks).matches() || 3946 !Pattern.compile("\\R").matcher(crnl).matches() || 3947 Pattern.compile("\\R\\R").matcher(crnl).matches()) 3948 failCount++; 3949 report("linebreakTest"); 3950 } 3951 3952 // #7189363 3953 private static void branchTest() throws Exception { 3954 if (!Pattern.compile("(a)?bc|d").matcher("d").find() || // greedy 3955 !Pattern.compile("(a)+bc|d").matcher("d").find() || 3956 !Pattern.compile("(a)*bc|d").matcher("d").find() || 3957 !Pattern.compile("(a)??bc|d").matcher("d").find() || // reluctant 3958 !Pattern.compile("(a)+?bc|d").matcher("d").find() || 3959 !Pattern.compile("(a)*?bc|d").matcher("d").find() || 3960 !Pattern.compile("(a)?+bc|d").matcher("d").find() || // possessive 3961 !Pattern.compile("(a)++bc|d").matcher("d").find() || 3962 !Pattern.compile("(a)*+bc|d").matcher("d").find() || 3963 !Pattern.compile("(a)?bc|d").matcher("d").matches() || // greedy 3964 !Pattern.compile("(a)+bc|d").matcher("d").matches() || 3965 !Pattern.compile("(a)*bc|d").matcher("d").matches() || 3966 !Pattern.compile("(a)??bc|d").matcher("d").matches() || // reluctant 3967 !Pattern.compile("(a)+?bc|d").matcher("d").matches() || 3968 !Pattern.compile("(a)*?bc|d").matcher("d").matches() || 3969 !Pattern.compile("(a)?+bc|d").matcher("d").matches() || // possessive 3970 !Pattern.compile("(a)++bc|d").matcher("d").matches() || 3971 !Pattern.compile("(a)*+bc|d").matcher("d").matches() || 3972 !Pattern.compile("(a)?bc|de").matcher("de").find() || // others 3973 !Pattern.compile("(a)??bc|de").matcher("de").find() || 3974 !Pattern.compile("(a)?bc|de").matcher("de").matches() || 3975 !Pattern.compile("(a)??bc|de").matcher("de").matches()) 3976 failCount++; 3977 report("branchTest"); 3978 } 3979 3980 // This test is for 8007395 3981 private static void groupCurlyNotFoundSuppTest() throws Exception { 3982 String input = "test this as \ud83d\ude0d"; 3983 for (String pStr : new String[] { "test(.)+(@[a-zA-Z.]+)", 3984 "test(.)*(@[a-zA-Z.]+)", 3985 "test([^B])+(@[a-zA-Z.]+)", 3986 "test([^B])*(@[a-zA-Z.]+)", 3987 "test(\\P{IsControl})+(@[a-zA-Z.]+)", 3988 "test(\\P{IsControl})*(@[a-zA-Z.]+)", 3989 }) { 3990 Matcher m = Pattern.compile(pStr, Pattern.CASE_INSENSITIVE) 3991 .matcher(input); 3992 try { 3993 if (m.find()) { 3994 failCount++; 3995 } 3996 } catch (Exception x) { 3997 failCount++; 3998 } 3999 } 4000 report("GroupCurly NotFoundSupp"); 4001 } 4002 4003 // This test is for 8023647 4004 private static void groupCurlyBackoffTest() throws Exception { 4005 if (!"abc1c".matches("(\\w)+1\\1") || 4006 "abc11".matches("(\\w)+1\\1")) { 4007 failCount++; 4008 } 4009 report("GroupCurly backoff"); 4010 } 4011 4012 // This test is for 8012646 4013 private static void patternAsPredicate() throws Exception { 4014 Predicate<String> p = Pattern.compile("[a-z]+").asPredicate(); 4015 4016 if (p.test("")) { 4017 failCount++; 4018 } 4019 if (!p.test("word")) { 4020 failCount++; 4021 } 4022 if (p.test("1234")) { 4023 failCount++; 4024 } 4025 report("Pattern.asPredicate"); 4026 } 4027 }