1 /* 2 * Copyright (c) 1999, 2017, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 /** 25 * @test 26 * @summary tests RegExp framework (use -Dseed=X to set PRNG seed) 27 * @author Mike McCloskey 28 * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345 29 * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962 30 * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476 31 * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940 32 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133 33 * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066 34 * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590 35 * 8027645 8035076 8039124 8035975 8074678 6854417 8143854 8147531 7071819 36 * 8151481 4867170 7080302 6728861 6995635 6736245 4916384 37 * 6328855 6192895 6345469 6988218 6693451 7006761 8140212 8143282 8158482 38 * 8176029 39 * 40 * @library /test/lib 41 * @run main RegExTest 42 * @key randomness 43 */ 44 45 import java.util.function.Function; 46 import java.util.regex.*; 47 import java.util.Random; 48 import java.util.Scanner; 49 import java.io.*; 50 import java.nio.file.*; 51 import java.util.*; 52 import java.nio.CharBuffer; 53 import java.util.function.Predicate; 54 import jdk.test.lib.RandomFactory; 55 56 /** 57 * This is a test class created to check the operation of 58 * the Pattern and Matcher classes. 59 */ 60 public class RegExTest { 61 62 private static Random generator = RandomFactory.getRandom(); 63 private static boolean failure = false; 64 private static int failCount = 0; 65 private static String firstFailure = null; 66 67 /** 68 * Main to interpret arguments and run several tests. 69 * 70 */ 71 public static void main(String[] args) throws Exception { 72 // Most of the tests are in a file 73 processFile("TestCases.txt"); 74 //processFile("PerlCases.txt"); 75 processFile("BMPTestCases.txt"); 76 processFile("SupplementaryTestCases.txt"); 77 78 // These test many randomly generated char patterns 79 bm(); 80 slice(); 81 82 // These are hard to put into the file 83 escapes(); 84 blankInput(); 85 86 // Substitition tests on randomly generated sequences 87 globalSubstitute(); 88 stringbufferSubstitute(); 89 stringbuilderSubstitute(); 90 91 substitutionBasher(); 92 substitutionBasher2(); 93 94 // Canonical Equivalence 95 ceTest(); 96 97 // Anchors 98 anchorTest(); 99 100 // boolean match calls 101 matchesTest(); 102 lookingAtTest(); 103 104 // Pattern API 105 patternMatchesTest(); 106 107 // Misc 108 lookbehindTest(); 109 nullArgumentTest(); 110 backRefTest(); 111 groupCaptureTest(); 112 caretTest(); 113 charClassTest(); 114 emptyPatternTest(); 115 findIntTest(); 116 group0Test(); 117 longPatternTest(); 118 octalTest(); 119 ampersandTest(); 120 negationTest(); 121 splitTest(); 122 appendTest(); 123 caseFoldingTest(); 124 commentsTest(); 125 unixLinesTest(); 126 replaceFirstTest(); 127 gTest(); 128 zTest(); 129 serializeTest(); 130 reluctantRepetitionTest(); 131 multilineDollarTest(); 132 dollarAtEndTest(); 133 caretBetweenTerminatorsTest(); 134 // This RFE rejected in Tiger numOccurrencesTest(); 135 javaCharClassTest(); 136 nonCaptureRepetitionTest(); 137 notCapturedGroupCurlyMatchTest(); 138 escapedSegmentTest(); 139 literalPatternTest(); 140 literalReplacementTest(); 141 regionTest(); 142 toStringTest(); 143 negatedCharClassTest(); 144 findFromTest(); 145 boundsTest(); 146 unicodeWordBoundsTest(); 147 caretAtEndTest(); 148 wordSearchTest(); 149 hitEndTest(); 150 toMatchResultTest(); 151 toMatchResultTest2(); 152 surrogatesInClassTest(); 153 removeQEQuotingTest(); 154 namedGroupCaptureTest(); 155 nonBmpClassComplementTest(); 156 unicodePropertiesTest(); 157 unicodeHexNotationTest(); 158 unicodeClassesTest(); 159 unicodeCharacterNameTest(); 160 horizontalAndVerticalWSTest(); 161 linebreakTest(); 162 branchTest(); 163 groupCurlyNotFoundSuppTest(); 164 groupCurlyBackoffTest(); 165 patternAsPredicate(); 166 invalidFlags(); 167 embeddedFlags(); 168 grapheme(); 169 expoBacktracking(); 170 171 if (failure) { 172 throw new 173 RuntimeException("RegExTest failed, 1st failure: " + 174 firstFailure); 175 } else { 176 System.err.println("OKAY: All tests passed."); 177 } 178 } 179 180 // Utility functions 181 182 private static String getRandomAlphaString(int length) { 183 StringBuffer buf = new StringBuffer(length); 184 for (int i=0; i<length; i++) { 185 char randChar = (char)(97 + generator.nextInt(26)); 186 buf.append(randChar); 187 } 188 return buf.toString(); 189 } 190 191 private static void check(Matcher m, String expected) { 192 m.find(); 193 if (!m.group().equals(expected)) 194 failCount++; 195 } 196 197 private static void check(Matcher m, String result, boolean expected) { 198 m.find(); 199 if (m.group().equals(result) != expected) 200 failCount++; 201 } 202 203 private static void check(Pattern p, String s, boolean expected) { 204 if (p.matcher(s).find() != expected) 205 failCount++; 206 } 207 208 private static void check(String p, String s, boolean expected) { 209 Matcher matcher = Pattern.compile(p).matcher(s); 210 if (matcher.find() != expected) 211 failCount++; 212 } 213 214 private static void check(String p, char c, boolean expected) { 215 String propertyPattern = expected ? "\\p" + p : "\\P" + p; 216 Pattern pattern = Pattern.compile(propertyPattern); 217 char[] ca = new char[1]; ca[0] = c; 218 Matcher matcher = pattern.matcher(new String(ca)); 219 if (!matcher.find()) 220 failCount++; 221 } 222 223 private static void check(String p, int codePoint, boolean expected) { 224 String propertyPattern = expected ? "\\p" + p : "\\P" + p; 225 Pattern pattern = Pattern.compile(propertyPattern); 226 char[] ca = Character.toChars(codePoint); 227 Matcher matcher = pattern.matcher(new String(ca)); 228 if (!matcher.find()) 229 failCount++; 230 } 231 232 private static void check(String p, int flag, String input, String s, 233 boolean expected) 234 { 235 Pattern pattern = Pattern.compile(p, flag); 236 Matcher matcher = pattern.matcher(input); 237 if (expected) 238 check(matcher, s, expected); 239 else 240 check(pattern, input, false); 241 } 242 243 private static void report(String testName) { 244 int spacesToAdd = 30 - testName.length(); 245 StringBuffer paddedNameBuffer = new StringBuffer(testName); 246 for (int i=0; i<spacesToAdd; i++) 247 paddedNameBuffer.append(" "); 248 String paddedName = paddedNameBuffer.toString(); 249 System.err.println(paddedName + ": " + 250 (failCount==0 ? "Passed":"Failed("+failCount+")")); 251 if (failCount > 0) { 252 failure = true; 253 254 if (firstFailure == null) { 255 firstFailure = testName; 256 } 257 } 258 259 failCount = 0; 260 } 261 262 /** 263 * Converts ASCII alphabet characters [A-Za-z] in the given 's' to 264 * supplementary characters. This method does NOT fully take care 265 * of the regex syntax. 266 */ 267 private static String toSupplementaries(String s) { 268 int length = s.length(); 269 StringBuffer sb = new StringBuffer(length * 2); 270 271 for (int i = 0; i < length; ) { 272 char c = s.charAt(i++); 273 if (c == '\\') { 274 sb.append(c); 275 if (i < length) { 276 c = s.charAt(i++); 277 sb.append(c); 278 if (c == 'u') { 279 // assume no syntax error 280 sb.append(s.charAt(i++)); 281 sb.append(s.charAt(i++)); 282 sb.append(s.charAt(i++)); 283 sb.append(s.charAt(i++)); 284 } 285 } 286 } else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) { 287 sb.append('\ud800').append((char)('\udc00'+c)); 288 } else { 289 sb.append(c); 290 } 291 } 292 return sb.toString(); 293 } 294 295 // Regular expression tests 296 297 // This is for bug 6178785 298 // Test if an expected NPE gets thrown when passing in a null argument 299 private static boolean check(Runnable test) { 300 try { 301 test.run(); 302 failCount++; 303 return false; 304 } catch (NullPointerException npe) { 305 return true; 306 } 307 } 308 309 private static void nullArgumentTest() { 310 check(() -> Pattern.compile(null)); 311 check(() -> Pattern.matches(null, null)); 312 check(() -> Pattern.matches("xyz", null)); 313 check(() -> Pattern.quote(null)); 314 check(() -> Pattern.compile("xyz").split(null)); 315 check(() -> Pattern.compile("xyz").matcher(null)); 316 317 final Matcher m = Pattern.compile("xyz").matcher("xyz"); 318 m.matches(); 319 check(() -> m.appendTail((StringBuffer) null)); 320 check(() -> m.appendTail((StringBuilder)null)); 321 check(() -> m.replaceAll((String) null)); 322 check(() -> m.replaceAll((Function<MatchResult, String>)null)); 323 check(() -> m.replaceFirst((String)null)); 324 check(() -> m.replaceFirst((Function<MatchResult, String>) null)); 325 check(() -> m.appendReplacement((StringBuffer)null, null)); 326 check(() -> m.appendReplacement((StringBuilder)null, null)); 327 check(() -> m.reset(null)); 328 check(() -> Matcher.quoteReplacement(null)); 329 //check(() -> m.usePattern(null)); 330 331 report("Null Argument"); 332 } 333 334 // This is for bug6635133 335 // Test if surrogate pair in Unicode escapes can be handled correctly. 336 private static void surrogatesInClassTest() throws Exception { 337 Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]"); 338 Matcher matcher = pattern.matcher("\ud834\udd22"); 339 if (!matcher.find()) 340 failCount++; 341 342 report("Surrogate pair in Unicode escape"); 343 } 344 345 // This is for bug6990617 346 // Test if Pattern.RemoveQEQuoting works correctly if the octal unicode 347 // char encoding is only 2 or 3 digits instead of 4 and the first quoted 348 // char is an octal digit. 349 private static void removeQEQuotingTest() throws Exception { 350 Pattern pattern = 351 Pattern.compile("\\011\\Q1sometext\\E\\011\\Q2sometext\\E"); 352 Matcher matcher = pattern.matcher("\t1sometext\t2sometext"); 353 if (!matcher.find()) 354 failCount++; 355 356 report("Remove Q/E Quoting"); 357 } 358 359 // This is for bug 4988891 360 // Test toMatchResult to see that it is a copy of the Matcher 361 // that is not affected by subsequent operations on the original 362 private static void toMatchResultTest() throws Exception { 363 Pattern pattern = Pattern.compile("squid"); 364 Matcher matcher = pattern.matcher( 365 "agiantsquidofdestinyasmallsquidoffate"); 366 matcher.find(); 367 int matcherStart1 = matcher.start(); 368 MatchResult mr = matcher.toMatchResult(); 369 if (mr == matcher) 370 failCount++; 371 int resultStart1 = mr.start(); 372 if (matcherStart1 != resultStart1) 373 failCount++; 374 matcher.find(); 375 int matcherStart2 = matcher.start(); 376 int resultStart2 = mr.start(); 377 if (matcherStart2 == resultStart2) 378 failCount++; 379 if (resultStart1 != resultStart2) 380 failCount++; 381 MatchResult mr2 = matcher.toMatchResult(); 382 if (mr == mr2) 383 failCount++; 384 if (mr2.start() != matcherStart2) 385 failCount++; 386 report("toMatchResult is a copy"); 387 } 388 389 private static void checkExpectedISE(Runnable test) { 390 try { 391 test.run(); 392 failCount++; 393 } catch (IllegalStateException x) { 394 } catch (IndexOutOfBoundsException xx) { 395 failCount++; 396 } 397 } 398 399 private static void checkExpectedIOOE(Runnable test) { 400 try { 401 test.run(); 402 failCount++; 403 } catch (IndexOutOfBoundsException x) {} 404 } 405 406 // This is for bug 8074678 407 // Test the result of toMatchResult throws ISE if no match is availble 408 private static void toMatchResultTest2() throws Exception { 409 Matcher matcher = Pattern.compile("nomatch").matcher("hello world"); 410 matcher.find(); 411 MatchResult mr = matcher.toMatchResult(); 412 413 checkExpectedISE(() -> mr.start()); 414 checkExpectedISE(() -> mr.start(2)); 415 checkExpectedISE(() -> mr.end()); 416 checkExpectedISE(() -> mr.end(2)); 417 checkExpectedISE(() -> mr.group()); 418 checkExpectedISE(() -> mr.group(2)); 419 420 matcher = Pattern.compile("(match)").matcher("there is a match"); 421 matcher.find(); 422 MatchResult mr2 = matcher.toMatchResult(); 423 checkExpectedIOOE(() -> mr2.start(2)); 424 checkExpectedIOOE(() -> mr2.end(2)); 425 checkExpectedIOOE(() -> mr2.group(2)); 426 427 report("toMatchResult2 appropriate exceptions"); 428 } 429 430 // This is for bug 5013885 431 // Must test a slice to see if it reports hitEnd correctly 432 private static void hitEndTest() throws Exception { 433 // Basic test of Slice node 434 Pattern p = Pattern.compile("^squidattack"); 435 Matcher m = p.matcher("squack"); 436 m.find(); 437 if (m.hitEnd()) 438 failCount++; 439 m.reset("squid"); 440 m.find(); 441 if (!m.hitEnd()) 442 failCount++; 443 444 // Test Slice, SliceA and SliceU nodes 445 for (int i=0; i<3; i++) { 446 int flags = 0; 447 if (i==1) flags = Pattern.CASE_INSENSITIVE; 448 if (i==2) flags = Pattern.UNICODE_CASE; 449 p = Pattern.compile("^abc", flags); 450 m = p.matcher("ad"); 451 m.find(); 452 if (m.hitEnd()) 453 failCount++; 454 m.reset("ab"); 455 m.find(); 456 if (!m.hitEnd()) 457 failCount++; 458 } 459 460 // Test Boyer-Moore node 461 p = Pattern.compile("catattack"); 462 m = p.matcher("attack"); 463 m.find(); 464 if (!m.hitEnd()) 465 failCount++; 466 467 p = Pattern.compile("catattack"); 468 m = p.matcher("attackattackattackcatatta"); 469 m.find(); 470 if (!m.hitEnd()) 471 failCount++; 472 report("hitEnd from a Slice"); 473 } 474 475 // This is for bug 4997476 476 // It is weird code submitted by customer demonstrating a regression 477 private static void wordSearchTest() throws Exception { 478 String testString = new String("word1 word2 word3"); 479 Pattern p = Pattern.compile("\\b"); 480 Matcher m = p.matcher(testString); 481 int position = 0; 482 int start = 0; 483 while (m.find(position)) { 484 start = m.start(); 485 if (start == testString.length()) 486 break; 487 if (m.find(start+1)) { 488 position = m.start(); 489 } else { 490 position = testString.length(); 491 } 492 if (testString.substring(start, position).equals(" ")) 493 continue; 494 if (!testString.substring(start, position-1).startsWith("word")) 495 failCount++; 496 } 497 report("Customer word search"); 498 } 499 500 // This is for bug 4994840 501 private static void caretAtEndTest() throws Exception { 502 // Problem only occurs with multiline patterns 503 // containing a beginning-of-line caret "^" followed 504 // by an expression that also matches the empty string. 505 Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE); 506 Matcher matcher = pattern.matcher("\r"); 507 matcher.find(); 508 matcher.find(); 509 report("Caret at end"); 510 } 511 512 // This test is for 4979006 513 // Check to see if word boundary construct properly handles unicode 514 // non spacing marks 515 private static void unicodeWordBoundsTest() throws Exception { 516 String spaces = " "; 517 String wordChar = "a"; 518 String nsm = "\u030a"; 519 520 assert (Character.getType('\u030a') == Character.NON_SPACING_MARK); 521 522 Pattern pattern = Pattern.compile("\\b"); 523 Matcher matcher = pattern.matcher(""); 524 // S=other B=word character N=non spacing mark .=word boundary 525 // SS.BB.SS 526 String input = spaces + wordChar + wordChar + spaces; 527 twoFindIndexes(input, matcher, 2, 4); 528 // SS.BBN.SS 529 input = spaces + wordChar +wordChar + nsm + spaces; 530 twoFindIndexes(input, matcher, 2, 5); 531 // SS.BN.SS 532 input = spaces + wordChar + nsm + spaces; 533 twoFindIndexes(input, matcher, 2, 4); 534 // SS.BNN.SS 535 input = spaces + wordChar + nsm + nsm + spaces; 536 twoFindIndexes(input, matcher, 2, 5); 537 // SSN.BB.SS 538 input = spaces + nsm + wordChar + wordChar + spaces; 539 twoFindIndexes(input, matcher, 3, 5); 540 // SS.BNB.SS 541 input = spaces + wordChar + nsm + wordChar + spaces; 542 twoFindIndexes(input, matcher, 2, 5); 543 // SSNNSS 544 input = spaces + nsm + nsm + spaces; 545 matcher.reset(input); 546 if (matcher.find()) 547 failCount++; 548 // SSN.BBN.SS 549 input = spaces + nsm + wordChar + wordChar + nsm + spaces; 550 twoFindIndexes(input, matcher, 3, 6); 551 552 report("Unicode word boundary"); 553 } 554 555 private static void twoFindIndexes(String input, Matcher matcher, int a, 556 int b) throws Exception 557 { 558 matcher.reset(input); 559 matcher.find(); 560 if (matcher.start() != a) 561 failCount++; 562 matcher.find(); 563 if (matcher.start() != b) 564 failCount++; 565 } 566 567 // This test is for 6284152 568 static void check(String regex, String input, String[] expected) { 569 List<String> result = new ArrayList<String>(); 570 Pattern p = Pattern.compile(regex); 571 Matcher m = p.matcher(input); 572 while (m.find()) { 573 result.add(m.group()); 574 } 575 if (!Arrays.asList(expected).equals(result)) 576 failCount++; 577 } 578 579 private static void lookbehindTest() throws Exception { 580 //Positive 581 check("(?<=%.{0,5})foo\\d", 582 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5", 583 new String[]{"foo1", "foo2", "foo3"}); 584 585 //boundary at end of the lookbehind sub-regex should work consistently 586 //with the boundary just after the lookbehind sub-regex 587 check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"}); 588 check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"}); 589 check("(?<!abc )\\bfoo", "abc foo", new String[0]); 590 check("(?<!abc \\b)foo", "abc foo", new String[0]); 591 592 //Negative 593 check("(?<!%.{0,5})foo\\d", 594 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5", 595 new String[] {"foo4", "foo5"}); 596 597 //Positive greedy 598 check("(?<=%b{1,4})foo", "%bbbbfoo", new String[] {"foo"}); 599 600 //Positive reluctant 601 check("(?<=%b{1,4}?)foo", "%bbbbfoo", new String[] {"foo"}); 602 603 //supplementary 604 check("(?<=%b{1,4})fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", 605 new String[] {"fo\ud800\udc00o"}); 606 check("(?<=%b{1,4}?)fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", 607 new String[] {"fo\ud800\udc00o"}); 608 check("(?<!%b{1,4})fo\ud800\udc00o", "%afo\ud800\udc00o", 609 new String[] {"fo\ud800\udc00o"}); 610 check("(?<!%b{1,4}?)fo\ud800\udc00o", "%afo\ud800\udc00o", 611 new String[] {"fo\ud800\udc00o"}); 612 report("Lookbehind"); 613 } 614 615 // This test is for 4938995 616 // Check to see if weak region boundaries are transparent to 617 // lookahead and lookbehind constructs 618 private static void boundsTest() throws Exception { 619 String fullMessage = "catdogcat"; 620 Pattern pattern = Pattern.compile("(?<=cat)dog(?=cat)"); 621 Matcher matcher = pattern.matcher("catdogca"); 622 matcher.useTransparentBounds(true); 623 if (matcher.find()) 624 failCount++; 625 matcher.reset("atdogcat"); 626 if (matcher.find()) 627 failCount++; 628 matcher.reset(fullMessage); 629 if (!matcher.find()) 630 failCount++; 631 matcher.reset(fullMessage); 632 matcher.region(0,9); 633 if (!matcher.find()) 634 failCount++; 635 matcher.reset(fullMessage); 636 matcher.region(0,6); 637 if (!matcher.find()) 638 failCount++; 639 matcher.reset(fullMessage); 640 matcher.region(3,6); 641 if (!matcher.find()) 642 failCount++; 643 matcher.useTransparentBounds(false); 644 if (matcher.find()) 645 failCount++; 646 647 // Negative lookahead/lookbehind 648 pattern = Pattern.compile("(?<!cat)dog(?!cat)"); 649 matcher = pattern.matcher("dogcat"); 650 matcher.useTransparentBounds(true); 651 matcher.region(0,3); 652 if (matcher.find()) 653 failCount++; 654 matcher.reset("catdog"); 655 matcher.region(3,6); 656 if (matcher.find()) 657 failCount++; 658 matcher.useTransparentBounds(false); 659 matcher.reset("dogcat"); 660 matcher.region(0,3); 661 if (!matcher.find()) 662 failCount++; 663 matcher.reset("catdog"); 664 matcher.region(3,6); 665 if (!matcher.find()) 666 failCount++; 667 668 report("Region bounds transparency"); 669 } 670 671 // This test is for 4945394 672 private static void findFromTest() throws Exception { 673 String message = "This is 40 $0 message."; 674 Pattern pat = Pattern.compile("\\$0"); 675 Matcher match = pat.matcher(message); 676 if (!match.find()) 677 failCount++; 678 if (match.find()) 679 failCount++; 680 if (match.find()) 681 failCount++; 682 report("Check for alternating find"); 683 } 684 685 // This test is for 4872664 and 4892980 686 private static void negatedCharClassTest() throws Exception { 687 Pattern pattern = Pattern.compile("[^>]"); 688 Matcher matcher = pattern.matcher("\u203A"); 689 if (!matcher.matches()) 690 failCount++; 691 pattern = Pattern.compile("[^fr]"); 692 matcher = pattern.matcher("a"); 693 if (!matcher.find()) 694 failCount++; 695 matcher.reset("\u203A"); 696 if (!matcher.find()) 697 failCount++; 698 String s = "for"; 699 String result[] = s.split("[^fr]"); 700 if (!result[0].equals("f")) 701 failCount++; 702 if (!result[1].equals("r")) 703 failCount++; 704 s = "f\u203Ar"; 705 result = s.split("[^fr]"); 706 if (!result[0].equals("f")) 707 failCount++; 708 if (!result[1].equals("r")) 709 failCount++; 710 711 // Test adding to bits, subtracting a node, then adding to bits again 712 pattern = Pattern.compile("[^f\u203Ar]"); 713 matcher = pattern.matcher("a"); 714 if (!matcher.find()) 715 failCount++; 716 matcher.reset("f"); 717 if (matcher.find()) 718 failCount++; 719 matcher.reset("\u203A"); 720 if (matcher.find()) 721 failCount++; 722 matcher.reset("r"); 723 if (matcher.find()) 724 failCount++; 725 matcher.reset("\u203B"); 726 if (!matcher.find()) 727 failCount++; 728 729 // Test subtracting a node, adding to bits, subtracting again 730 pattern = Pattern.compile("[^\u203Ar\u203B]"); 731 matcher = pattern.matcher("a"); 732 if (!matcher.find()) 733 failCount++; 734 matcher.reset("\u203A"); 735 if (matcher.find()) 736 failCount++; 737 matcher.reset("r"); 738 if (matcher.find()) 739 failCount++; 740 matcher.reset("\u203B"); 741 if (matcher.find()) 742 failCount++; 743 matcher.reset("\u203C"); 744 if (!matcher.find()) 745 failCount++; 746 747 report("Negated Character Class"); 748 } 749 750 // This test is for 4628291 751 private static void toStringTest() throws Exception { 752 Pattern pattern = Pattern.compile("b+"); 753 if (pattern.toString() != "b+") 754 failCount++; 755 Matcher matcher = pattern.matcher("aaabbbccc"); 756 String matcherString = matcher.toString(); // unspecified 757 matcher.find(); 758 matcherString = matcher.toString(); // unspecified 759 matcher.region(0,3); 760 matcherString = matcher.toString(); // unspecified 761 matcher.reset(); 762 matcherString = matcher.toString(); // unspecified 763 report("toString"); 764 } 765 766 // This test is for 4808962 767 private static void literalPatternTest() throws Exception { 768 int flags = Pattern.LITERAL; 769 770 Pattern pattern = Pattern.compile("abc\\t$^", flags); 771 check(pattern, "abc\\t$^", true); 772 773 pattern = Pattern.compile(Pattern.quote("abc\\t$^")); 774 check(pattern, "abc\\t$^", true); 775 776 pattern = Pattern.compile("\\Qa^$bcabc\\E", flags); 777 check(pattern, "\\Qa^$bcabc\\E", true); 778 check(pattern, "a^$bcabc", false); 779 780 pattern = Pattern.compile("\\\\Q\\\\E"); 781 check(pattern, "\\Q\\E", true); 782 783 pattern = Pattern.compile("\\Qabc\\Eefg\\\\Q\\\\Ehij"); 784 check(pattern, "abcefg\\Q\\Ehij", true); 785 786 pattern = Pattern.compile("\\\\\\Q\\\\E"); 787 check(pattern, "\\\\\\\\", true); 788 789 pattern = Pattern.compile(Pattern.quote("\\Qa^$bcabc\\E")); 790 check(pattern, "\\Qa^$bcabc\\E", true); 791 check(pattern, "a^$bcabc", false); 792 793 pattern = Pattern.compile(Pattern.quote("\\Qabc\\Edef")); 794 check(pattern, "\\Qabc\\Edef", true); 795 check(pattern, "abcdef", false); 796 797 pattern = Pattern.compile(Pattern.quote("abc\\Edef")); 798 check(pattern, "abc\\Edef", true); 799 check(pattern, "abcdef", false); 800 801 pattern = Pattern.compile(Pattern.quote("\\E")); 802 check(pattern, "\\E", true); 803 804 pattern = Pattern.compile("((((abc.+?:)", flags); 805 check(pattern, "((((abc.+?:)", true); 806 807 flags |= Pattern.MULTILINE; 808 809 pattern = Pattern.compile("^cat$", flags); 810 check(pattern, "abc^cat$def", true); 811 check(pattern, "cat", false); 812 813 flags |= Pattern.CASE_INSENSITIVE; 814 815 pattern = Pattern.compile("abcdef", flags); 816 check(pattern, "ABCDEF", true); 817 check(pattern, "AbCdEf", true); 818 819 flags |= Pattern.DOTALL; 820 821 pattern = Pattern.compile("a...b", flags); 822 check(pattern, "A...b", true); 823 check(pattern, "Axxxb", false); 824 825 flags |= Pattern.CANON_EQ; 826 827 Pattern p = Pattern.compile("testa\u030a", flags); 828 check(pattern, "testa\u030a", false); 829 check(pattern, "test\u00e5", false); 830 831 // Supplementary character test 832 flags = Pattern.LITERAL; 833 834 pattern = Pattern.compile(toSupplementaries("abc\\t$^"), flags); 835 check(pattern, toSupplementaries("abc\\t$^"), true); 836 837 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\t$^"))); 838 check(pattern, toSupplementaries("abc\\t$^"), true); 839 840 pattern = Pattern.compile(toSupplementaries("\\Qa^$bcabc\\E"), flags); 841 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true); 842 check(pattern, toSupplementaries("a^$bcabc"), false); 843 844 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qa^$bcabc\\E"))); 845 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true); 846 check(pattern, toSupplementaries("a^$bcabc"), false); 847 848 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qabc\\Edef"))); 849 check(pattern, toSupplementaries("\\Qabc\\Edef"), true); 850 check(pattern, toSupplementaries("abcdef"), false); 851 852 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\Edef"))); 853 check(pattern, toSupplementaries("abc\\Edef"), true); 854 check(pattern, toSupplementaries("abcdef"), false); 855 856 pattern = Pattern.compile(toSupplementaries("((((abc.+?:)"), flags); 857 check(pattern, toSupplementaries("((((abc.+?:)"), true); 858 859 flags |= Pattern.MULTILINE; 860 861 pattern = Pattern.compile(toSupplementaries("^cat$"), flags); 862 check(pattern, toSupplementaries("abc^cat$def"), true); 863 check(pattern, toSupplementaries("cat"), false); 864 865 flags |= Pattern.DOTALL; 866 867 // note: this is case-sensitive. 868 pattern = Pattern.compile(toSupplementaries("a...b"), flags); 869 check(pattern, toSupplementaries("a...b"), true); 870 check(pattern, toSupplementaries("axxxb"), false); 871 872 flags |= Pattern.CANON_EQ; 873 874 String t = toSupplementaries("test"); 875 p = Pattern.compile(t + "a\u030a", flags); 876 check(pattern, t + "a\u030a", false); 877 check(pattern, t + "\u00e5", false); 878 879 report("Literal pattern"); 880 } 881 882 // This test is for 4803179 883 // This test is also for 4808962, replacement parts 884 private static void literalReplacementTest() throws Exception { 885 int flags = Pattern.LITERAL; 886 887 Pattern pattern = Pattern.compile("abc", flags); 888 Matcher matcher = pattern.matcher("zzzabczzz"); 889 String replaceTest = "$0"; 890 String result = matcher.replaceAll(replaceTest); 891 if (!result.equals("zzzabczzz")) 892 failCount++; 893 894 matcher.reset(); 895 String literalReplacement = matcher.quoteReplacement(replaceTest); 896 result = matcher.replaceAll(literalReplacement); 897 if (!result.equals("zzz$0zzz")) 898 failCount++; 899 900 matcher.reset(); 901 replaceTest = "\\t$\\$"; 902 literalReplacement = matcher.quoteReplacement(replaceTest); 903 result = matcher.replaceAll(literalReplacement); 904 if (!result.equals("zzz\\t$\\$zzz")) 905 failCount++; 906 907 // Supplementary character test 908 pattern = Pattern.compile(toSupplementaries("abc"), flags); 909 matcher = pattern.matcher(toSupplementaries("zzzabczzz")); 910 replaceTest = "$0"; 911 result = matcher.replaceAll(replaceTest); 912 if (!result.equals(toSupplementaries("zzzabczzz"))) 913 failCount++; 914 915 matcher.reset(); 916 literalReplacement = matcher.quoteReplacement(replaceTest); 917 result = matcher.replaceAll(literalReplacement); 918 if (!result.equals(toSupplementaries("zzz$0zzz"))) 919 failCount++; 920 921 matcher.reset(); 922 replaceTest = "\\t$\\$"; 923 literalReplacement = matcher.quoteReplacement(replaceTest); 924 result = matcher.replaceAll(literalReplacement); 925 if (!result.equals(toSupplementaries("zzz\\t$\\$zzz"))) 926 failCount++; 927 928 // IAE should be thrown if backslash or '$' is the last character 929 // in replacement string 930 try { 931 "\uac00".replaceAll("\uac00", "$"); 932 failCount++; 933 } catch (IllegalArgumentException iie) { 934 } catch (Exception e) { 935 failCount++; 936 } 937 try { 938 "\uac00".replaceAll("\uac00", "\\"); 939 failCount++; 940 } catch (IllegalArgumentException iie) { 941 } catch (Exception e) { 942 failCount++; 943 } 944 report("Literal replacement"); 945 } 946 947 // This test is for 4757029 948 private static void regionTest() throws Exception { 949 Pattern pattern = Pattern.compile("abc"); 950 Matcher matcher = pattern.matcher("abcdefabc"); 951 952 matcher.region(0,9); 953 if (!matcher.find()) 954 failCount++; 955 if (!matcher.find()) 956 failCount++; 957 matcher.region(0,3); 958 if (!matcher.find()) 959 failCount++; 960 matcher.region(3,6); 961 if (matcher.find()) 962 failCount++; 963 matcher.region(0,2); 964 if (matcher.find()) 965 failCount++; 966 967 expectRegionFail(matcher, 1, -1); 968 expectRegionFail(matcher, -1, -1); 969 expectRegionFail(matcher, -1, 1); 970 expectRegionFail(matcher, 5, 3); 971 expectRegionFail(matcher, 5, 12); 972 expectRegionFail(matcher, 12, 12); 973 974 pattern = Pattern.compile("^abc$"); 975 matcher = pattern.matcher("zzzabczzz"); 976 matcher.region(0,9); 977 if (matcher.find()) 978 failCount++; 979 matcher.region(3,6); 980 if (!matcher.find()) 981 failCount++; 982 matcher.region(3,6); 983 matcher.useAnchoringBounds(false); 984 if (matcher.find()) 985 failCount++; 986 987 // Supplementary character test 988 pattern = Pattern.compile(toSupplementaries("abc")); 989 matcher = pattern.matcher(toSupplementaries("abcdefabc")); 990 matcher.region(0,9*2); 991 if (!matcher.find()) 992 failCount++; 993 if (!matcher.find()) 994 failCount++; 995 matcher.region(0,3*2); 996 if (!matcher.find()) 997 failCount++; 998 matcher.region(1,3*2); 999 if (matcher.find()) 1000 failCount++; 1001 matcher.region(3*2,6*2); 1002 if (matcher.find()) 1003 failCount++; 1004 matcher.region(0,2*2); 1005 if (matcher.find()) 1006 failCount++; 1007 matcher.region(0,2*2+1); 1008 if (matcher.find()) 1009 failCount++; 1010 1011 expectRegionFail(matcher, 1*2, -1); 1012 expectRegionFail(matcher, -1, -1); 1013 expectRegionFail(matcher, -1, 1*2); 1014 expectRegionFail(matcher, 5*2, 3*2); 1015 expectRegionFail(matcher, 5*2, 12*2); 1016 expectRegionFail(matcher, 12*2, 12*2); 1017 1018 pattern = Pattern.compile(toSupplementaries("^abc$")); 1019 matcher = pattern.matcher(toSupplementaries("zzzabczzz")); 1020 matcher.region(0,9*2); 1021 if (matcher.find()) 1022 failCount++; 1023 matcher.region(3*2,6*2); 1024 if (!matcher.find()) 1025 failCount++; 1026 matcher.region(3*2+1,6*2); 1027 if (matcher.find()) 1028 failCount++; 1029 matcher.region(3*2,6*2-1); 1030 if (matcher.find()) 1031 failCount++; 1032 matcher.region(3*2,6*2); 1033 matcher.useAnchoringBounds(false); 1034 if (matcher.find()) 1035 failCount++; 1036 report("Regions"); 1037 } 1038 1039 private static void expectRegionFail(Matcher matcher, int index1, 1040 int index2) 1041 { 1042 try { 1043 matcher.region(index1, index2); 1044 failCount++; 1045 } catch (IndexOutOfBoundsException ioobe) { 1046 // Correct result 1047 } catch (IllegalStateException ise) { 1048 // Correct result 1049 } 1050 } 1051 1052 // This test is for 4803197 1053 private static void escapedSegmentTest() throws Exception { 1054 1055 Pattern pattern = Pattern.compile("\\Qdir1\\dir2\\E"); 1056 check(pattern, "dir1\\dir2", true); 1057 1058 pattern = Pattern.compile("\\Qdir1\\dir2\\\\E"); 1059 check(pattern, "dir1\\dir2\\", true); 1060 1061 pattern = Pattern.compile("(\\Qdir1\\dir2\\\\E)"); 1062 check(pattern, "dir1\\dir2\\", true); 1063 1064 // Supplementary character test 1065 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E")); 1066 check(pattern, toSupplementaries("dir1\\dir2"), true); 1067 1068 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2")+"\\\\E"); 1069 check(pattern, toSupplementaries("dir1\\dir2\\"), true); 1070 1071 pattern = Pattern.compile(toSupplementaries("(\\Qdir1\\dir2")+"\\\\E)"); 1072 check(pattern, toSupplementaries("dir1\\dir2\\"), true); 1073 1074 report("Escaped segment"); 1075 } 1076 1077 // This test is for 4792284 1078 private static void nonCaptureRepetitionTest() throws Exception { 1079 String input = "abcdefgh;"; 1080 1081 String[] patterns = new String[] { 1082 "(?:\\w{4})+;", 1083 "(?:\\w{8})*;", 1084 "(?:\\w{2}){2,4};", 1085 "(?:\\w{4}){2,};", // only matches the 1086 ".*?(?:\\w{5})+;", // specified minimum 1087 ".*?(?:\\w{9})*;", // number of reps - OK 1088 "(?:\\w{4})+?;", // lazy repetition - OK 1089 "(?:\\w{4})++;", // possessive repetition - OK 1090 "(?:\\w{2,}?)+;", // non-deterministic - OK 1091 "(\\w{4})+;", // capturing group - OK 1092 }; 1093 1094 for (int i = 0; i < patterns.length; i++) { 1095 // Check find() 1096 check(patterns[i], 0, input, input, true); 1097 // Check matches() 1098 Pattern p = Pattern.compile(patterns[i]); 1099 Matcher m = p.matcher(input); 1100 1101 if (m.matches()) { 1102 if (!m.group(0).equals(input)) 1103 failCount++; 1104 } else { 1105 failCount++; 1106 } 1107 } 1108 1109 report("Non capturing repetition"); 1110 } 1111 1112 // This test is for 6358731 1113 private static void notCapturedGroupCurlyMatchTest() throws Exception { 1114 Pattern pattern = Pattern.compile("(abc)+|(abcd)+"); 1115 Matcher matcher = pattern.matcher("abcd"); 1116 if (!matcher.matches() || 1117 matcher.group(1) != null || 1118 !matcher.group(2).equals("abcd")) { 1119 failCount++; 1120 } 1121 report("Not captured GroupCurly"); 1122 } 1123 1124 // This test is for 4706545 1125 private static void javaCharClassTest() throws Exception { 1126 for (int i=0; i<1000; i++) { 1127 char c = (char)generator.nextInt(); 1128 check("{javaLowerCase}", c, Character.isLowerCase(c)); 1129 check("{javaUpperCase}", c, Character.isUpperCase(c)); 1130 check("{javaUpperCase}+", c, Character.isUpperCase(c)); 1131 check("{javaTitleCase}", c, Character.isTitleCase(c)); 1132 check("{javaDigit}", c, Character.isDigit(c)); 1133 check("{javaDefined}", c, Character.isDefined(c)); 1134 check("{javaLetter}", c, Character.isLetter(c)); 1135 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c)); 1136 check("{javaJavaIdentifierStart}", c, 1137 Character.isJavaIdentifierStart(c)); 1138 check("{javaJavaIdentifierPart}", c, 1139 Character.isJavaIdentifierPart(c)); 1140 check("{javaUnicodeIdentifierStart}", c, 1141 Character.isUnicodeIdentifierStart(c)); 1142 check("{javaUnicodeIdentifierPart}", c, 1143 Character.isUnicodeIdentifierPart(c)); 1144 check("{javaIdentifierIgnorable}", c, 1145 Character.isIdentifierIgnorable(c)); 1146 check("{javaSpaceChar}", c, Character.isSpaceChar(c)); 1147 check("{javaWhitespace}", c, Character.isWhitespace(c)); 1148 check("{javaISOControl}", c, Character.isISOControl(c)); 1149 check("{javaMirrored}", c, Character.isMirrored(c)); 1150 1151 } 1152 1153 // Supplementary character test 1154 for (int i=0; i<1000; i++) { 1155 int c = generator.nextInt(Character.MAX_CODE_POINT 1156 - Character.MIN_SUPPLEMENTARY_CODE_POINT) 1157 + Character.MIN_SUPPLEMENTARY_CODE_POINT; 1158 check("{javaLowerCase}", c, Character.isLowerCase(c)); 1159 check("{javaUpperCase}", c, Character.isUpperCase(c)); 1160 check("{javaUpperCase}+", c, Character.isUpperCase(c)); 1161 check("{javaTitleCase}", c, Character.isTitleCase(c)); 1162 check("{javaDigit}", c, Character.isDigit(c)); 1163 check("{javaDefined}", c, Character.isDefined(c)); 1164 check("{javaLetter}", c, Character.isLetter(c)); 1165 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c)); 1166 check("{javaJavaIdentifierStart}", c, 1167 Character.isJavaIdentifierStart(c)); 1168 check("{javaJavaIdentifierPart}", c, 1169 Character.isJavaIdentifierPart(c)); 1170 check("{javaUnicodeIdentifierStart}", c, 1171 Character.isUnicodeIdentifierStart(c)); 1172 check("{javaUnicodeIdentifierPart}", c, 1173 Character.isUnicodeIdentifierPart(c)); 1174 check("{javaIdentifierIgnorable}", c, 1175 Character.isIdentifierIgnorable(c)); 1176 check("{javaSpaceChar}", c, Character.isSpaceChar(c)); 1177 check("{javaWhitespace}", c, Character.isWhitespace(c)); 1178 check("{javaISOControl}", c, Character.isISOControl(c)); 1179 check("{javaMirrored}", c, Character.isMirrored(c)); 1180 } 1181 1182 report("Java character classes"); 1183 } 1184 1185 // This test is for 4523620 1186 /* 1187 private static void numOccurrencesTest() throws Exception { 1188 Pattern pattern = Pattern.compile("aaa"); 1189 1190 if (pattern.numOccurrences("aaaaaa", false) != 2) 1191 failCount++; 1192 if (pattern.numOccurrences("aaaaaa", true) != 4) 1193 failCount++; 1194 1195 pattern = Pattern.compile("^"); 1196 if (pattern.numOccurrences("aaaaaa", false) != 1) 1197 failCount++; 1198 if (pattern.numOccurrences("aaaaaa", true) != 1) 1199 failCount++; 1200 1201 report("Number of Occurrences"); 1202 } 1203 */ 1204 1205 // This test is for 4776374 1206 private static void caretBetweenTerminatorsTest() throws Exception { 1207 int flags1 = Pattern.DOTALL; 1208 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; 1209 int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE; 1210 int flags4 = Pattern.DOTALL | Pattern.MULTILINE; 1211 1212 check("^....", flags1, "test\ntest", "test", true); 1213 check(".....^", flags1, "test\ntest", "test", false); 1214 check(".....^", flags1, "test\n", "test", false); 1215 check("....^", flags1, "test\r\n", "test", false); 1216 1217 check("^....", flags2, "test\ntest", "test", true); 1218 check("....^", flags2, "test\ntest", "test", false); 1219 check(".....^", flags2, "test\n", "test", false); 1220 check("....^", flags2, "test\r\n", "test", false); 1221 1222 check("^....", flags3, "test\ntest", "test", true); 1223 check(".....^", flags3, "test\ntest", "test\n", true); 1224 check(".....^", flags3, "test\u0085test", "test\u0085", false); 1225 check(".....^", flags3, "test\n", "test", false); 1226 check(".....^", flags3, "test\r\n", "test", false); 1227 check("......^", flags3, "test\r\ntest", "test\r\n", true); 1228 1229 check("^....", flags4, "test\ntest", "test", true); 1230 check(".....^", flags3, "test\ntest", "test\n", true); 1231 check(".....^", flags4, "test\u0085test", "test\u0085", true); 1232 check(".....^", flags4, "test\n", "test\n", false); 1233 check(".....^", flags4, "test\r\n", "test\r", false); 1234 1235 // Supplementary character test 1236 String t = toSupplementaries("test"); 1237 check("^....", flags1, t+"\n"+t, t, true); 1238 check(".....^", flags1, t+"\n"+t, t, false); 1239 check(".....^", flags1, t+"\n", t, false); 1240 check("....^", flags1, t+"\r\n", t, false); 1241 1242 check("^....", flags2, t+"\n"+t, t, true); 1243 check("....^", flags2, t+"\n"+t, t, false); 1244 check(".....^", flags2, t+"\n", t, false); 1245 check("....^", flags2, t+"\r\n", t, false); 1246 1247 check("^....", flags3, t+"\n"+t, t, true); 1248 check(".....^", flags3, t+"\n"+t, t+"\n", true); 1249 check(".....^", flags3, t+"\u0085"+t, t+"\u0085", false); 1250 check(".....^", flags3, t+"\n", t, false); 1251 check(".....^", flags3, t+"\r\n", t, false); 1252 check("......^", flags3, t+"\r\n"+t, t+"\r\n", true); 1253 1254 check("^....", flags4, t+"\n"+t, t, true); 1255 check(".....^", flags3, t+"\n"+t, t+"\n", true); 1256 check(".....^", flags4, t+"\u0085"+t, t+"\u0085", true); 1257 check(".....^", flags4, t+"\n", t+"\n", false); 1258 check(".....^", flags4, t+"\r\n", t+"\r", false); 1259 1260 report("Caret between terminators"); 1261 } 1262 1263 // This test is for 4727935 1264 private static void dollarAtEndTest() throws Exception { 1265 int flags1 = Pattern.DOTALL; 1266 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; 1267 int flags3 = Pattern.DOTALL | Pattern.MULTILINE; 1268 1269 check("....$", flags1, "test\n", "test", true); 1270 check("....$", flags1, "test\r\n", "test", true); 1271 check(".....$", flags1, "test\n", "test\n", true); 1272 check(".....$", flags1, "test\u0085", "test\u0085", true); 1273 check("....$", flags1, "test\u0085", "test", true); 1274 1275 check("....$", flags2, "test\n", "test", true); 1276 check(".....$", flags2, "test\n", "test\n", true); 1277 check(".....$", flags2, "test\u0085", "test\u0085", true); 1278 check("....$", flags2, "test\u0085", "est\u0085", true); 1279 1280 check("....$.blah", flags3, "test\nblah", "test\nblah", true); 1281 check(".....$.blah", flags3, "test\n\nblah", "test\n\nblah", true); 1282 check("....$blah", flags3, "test\nblah", "!!!!", false); 1283 check(".....$blah", flags3, "test\nblah", "!!!!", false); 1284 1285 // Supplementary character test 1286 String t = toSupplementaries("test"); 1287 String b = toSupplementaries("blah"); 1288 check("....$", flags1, t+"\n", t, true); 1289 check("....$", flags1, t+"\r\n", t, true); 1290 check(".....$", flags1, t+"\n", t+"\n", true); 1291 check(".....$", flags1, t+"\u0085", t+"\u0085", true); 1292 check("....$", flags1, t+"\u0085", t, true); 1293 1294 check("....$", flags2, t+"\n", t, true); 1295 check(".....$", flags2, t+"\n", t+"\n", true); 1296 check(".....$", flags2, t+"\u0085", t+"\u0085", true); 1297 check("....$", flags2, t+"\u0085", toSupplementaries("est\u0085"), true); 1298 1299 check("....$."+b, flags3, t+"\n"+b, t+"\n"+b, true); 1300 check(".....$."+b, flags3, t+"\n\n"+b, t+"\n\n"+b, true); 1301 check("....$"+b, flags3, t+"\n"+b, "!!!!", false); 1302 check(".....$"+b, flags3, t+"\n"+b, "!!!!", false); 1303 1304 report("Dollar at End"); 1305 } 1306 1307 // This test is for 4711773 1308 private static void multilineDollarTest() throws Exception { 1309 Pattern findCR = Pattern.compile("$", Pattern.MULTILINE); 1310 Matcher matcher = findCR.matcher("first bit\nsecond bit"); 1311 matcher.find(); 1312 if (matcher.start(0) != 9) 1313 failCount++; 1314 matcher.find(); 1315 if (matcher.start(0) != 20) 1316 failCount++; 1317 1318 // Supplementary character test 1319 matcher = findCR.matcher(toSupplementaries("first bit\n second bit")); // double BMP chars 1320 matcher.find(); 1321 if (matcher.start(0) != 9*2) 1322 failCount++; 1323 matcher.find(); 1324 if (matcher.start(0) != 20*2) 1325 failCount++; 1326 1327 report("Multiline Dollar"); 1328 } 1329 1330 private static void reluctantRepetitionTest() throws Exception { 1331 Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2"); 1332 check(p, "1 word word word 2", true); 1333 check(p, "1 wor wo w 2", true); 1334 check(p, "1 word word 2", true); 1335 check(p, "1 word 2", true); 1336 check(p, "1 wo w w 2", true); 1337 check(p, "1 wo w 2", true); 1338 check(p, "1 wor w 2", true); 1339 1340 p = Pattern.compile("([a-z])+?c"); 1341 Matcher m = p.matcher("ababcdefdec"); 1342 check(m, "ababc"); 1343 1344 // Supplementary character test 1345 p = Pattern.compile(toSupplementaries("([a-z])+?c")); 1346 m = p.matcher(toSupplementaries("ababcdefdec")); 1347 check(m, toSupplementaries("ababc")); 1348 1349 report("Reluctant Repetition"); 1350 } 1351 1352 private static void serializeTest() throws Exception { 1353 String patternStr = "(b)"; 1354 String matchStr = "b"; 1355 Pattern pattern = Pattern.compile(patternStr); 1356 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 1357 ObjectOutputStream oos = new ObjectOutputStream(baos); 1358 oos.writeObject(pattern); 1359 oos.close(); 1360 ObjectInputStream ois = new ObjectInputStream( 1361 new ByteArrayInputStream(baos.toByteArray())); 1362 Pattern serializedPattern = (Pattern)ois.readObject(); 1363 ois.close(); 1364 Matcher matcher = serializedPattern.matcher(matchStr); 1365 if (!matcher.matches()) 1366 failCount++; 1367 if (matcher.groupCount() != 1) 1368 failCount++; 1369 1370 report("Serialization"); 1371 } 1372 1373 private static void gTest() { 1374 Pattern pattern = Pattern.compile("\\G\\w"); 1375 Matcher matcher = pattern.matcher("abc#x#x"); 1376 matcher.find(); 1377 matcher.find(); 1378 matcher.find(); 1379 if (matcher.find()) 1380 failCount++; 1381 1382 pattern = Pattern.compile("\\GA*"); 1383 matcher = pattern.matcher("1A2AA3"); 1384 matcher.find(); 1385 if (matcher.find()) 1386 failCount++; 1387 1388 pattern = Pattern.compile("\\GA*"); 1389 matcher = pattern.matcher("1A2AA3"); 1390 if (!matcher.find(1)) 1391 failCount++; 1392 matcher.find(); 1393 if (matcher.find()) 1394 failCount++; 1395 1396 report("\\G"); 1397 } 1398 1399 private static void zTest() { 1400 Pattern pattern = Pattern.compile("foo\\Z"); 1401 // Positives 1402 check(pattern, "foo\u0085", true); 1403 check(pattern, "foo\u2028", true); 1404 check(pattern, "foo\u2029", true); 1405 check(pattern, "foo\n", true); 1406 check(pattern, "foo\r", true); 1407 check(pattern, "foo\r\n", true); 1408 // Negatives 1409 check(pattern, "fooo", false); 1410 check(pattern, "foo\n\r", false); 1411 1412 pattern = Pattern.compile("foo\\Z", Pattern.UNIX_LINES); 1413 // Positives 1414 check(pattern, "foo", true); 1415 check(pattern, "foo\n", true); 1416 // Negatives 1417 check(pattern, "foo\r", false); 1418 check(pattern, "foo\u0085", false); 1419 check(pattern, "foo\u2028", false); 1420 check(pattern, "foo\u2029", false); 1421 1422 report("\\Z"); 1423 } 1424 1425 private static void replaceFirstTest() { 1426 Pattern pattern = Pattern.compile("(ab)(c*)"); 1427 Matcher matcher = pattern.matcher("abccczzzabcczzzabccc"); 1428 if (!matcher.replaceFirst("test").equals("testzzzabcczzzabccc")) 1429 failCount++; 1430 1431 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1432 if (!matcher.replaceFirst("test").equals("zzztestzzzabcczzzabccczzz")) 1433 failCount++; 1434 1435 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1436 String result = matcher.replaceFirst("$1"); 1437 if (!result.equals("zzzabzzzabcczzzabccczzz")) 1438 failCount++; 1439 1440 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1441 result = matcher.replaceFirst("$2"); 1442 if (!result.equals("zzzccczzzabcczzzabccczzz")) 1443 failCount++; 1444 1445 pattern = Pattern.compile("a*"); 1446 matcher = pattern.matcher("aaaaaaaaaa"); 1447 if (!matcher.replaceFirst("test").equals("test")) 1448 failCount++; 1449 1450 pattern = Pattern.compile("a+"); 1451 matcher = pattern.matcher("zzzaaaaaaaaaa"); 1452 if (!matcher.replaceFirst("test").equals("zzztest")) 1453 failCount++; 1454 1455 // Supplementary character test 1456 pattern = Pattern.compile(toSupplementaries("(ab)(c*)")); 1457 matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc")); 1458 if (!matcher.replaceFirst(toSupplementaries("test")) 1459 .equals(toSupplementaries("testzzzabcczzzabccc"))) 1460 failCount++; 1461 1462 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1463 if (!matcher.replaceFirst(toSupplementaries("test")). 1464 equals(toSupplementaries("zzztestzzzabcczzzabccczzz"))) 1465 failCount++; 1466 1467 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1468 result = matcher.replaceFirst("$1"); 1469 if (!result.equals(toSupplementaries("zzzabzzzabcczzzabccczzz"))) 1470 failCount++; 1471 1472 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1473 result = matcher.replaceFirst("$2"); 1474 if (!result.equals(toSupplementaries("zzzccczzzabcczzzabccczzz"))) 1475 failCount++; 1476 1477 pattern = Pattern.compile(toSupplementaries("a*")); 1478 matcher = pattern.matcher(toSupplementaries("aaaaaaaaaa")); 1479 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("test"))) 1480 failCount++; 1481 1482 pattern = Pattern.compile(toSupplementaries("a+")); 1483 matcher = pattern.matcher(toSupplementaries("zzzaaaaaaaaaa")); 1484 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("zzztest"))) 1485 failCount++; 1486 1487 report("Replace First"); 1488 } 1489 1490 private static void unixLinesTest() { 1491 Pattern pattern = Pattern.compile(".*"); 1492 Matcher matcher = pattern.matcher("aa\u2028blah"); 1493 matcher.find(); 1494 if (!matcher.group(0).equals("aa")) 1495 failCount++; 1496 1497 pattern = Pattern.compile(".*", Pattern.UNIX_LINES); 1498 matcher = pattern.matcher("aa\u2028blah"); 1499 matcher.find(); 1500 if (!matcher.group(0).equals("aa\u2028blah")) 1501 failCount++; 1502 1503 pattern = Pattern.compile("[az]$", 1504 Pattern.MULTILINE | Pattern.UNIX_LINES); 1505 matcher = pattern.matcher("aa\u2028zz"); 1506 check(matcher, "a\u2028", false); 1507 1508 // Supplementary character test 1509 pattern = Pattern.compile(".*"); 1510 matcher = pattern.matcher(toSupplementaries("aa\u2028blah")); 1511 matcher.find(); 1512 if (!matcher.group(0).equals(toSupplementaries("aa"))) 1513 failCount++; 1514 1515 pattern = Pattern.compile(".*", Pattern.UNIX_LINES); 1516 matcher = pattern.matcher(toSupplementaries("aa\u2028blah")); 1517 matcher.find(); 1518 if (!matcher.group(0).equals(toSupplementaries("aa\u2028blah"))) 1519 failCount++; 1520 1521 pattern = Pattern.compile(toSupplementaries("[az]$"), 1522 Pattern.MULTILINE | Pattern.UNIX_LINES); 1523 matcher = pattern.matcher(toSupplementaries("aa\u2028zz")); 1524 check(matcher, toSupplementaries("a\u2028"), false); 1525 1526 report("Unix Lines"); 1527 } 1528 1529 private static void commentsTest() { 1530 int flags = Pattern.COMMENTS; 1531 1532 Pattern pattern = Pattern.compile("aa \\# aa", flags); 1533 Matcher matcher = pattern.matcher("aa#aa"); 1534 if (!matcher.matches()) 1535 failCount++; 1536 1537 pattern = Pattern.compile("aa # blah", flags); 1538 matcher = pattern.matcher("aa"); 1539 if (!matcher.matches()) 1540 failCount++; 1541 1542 pattern = Pattern.compile("aa blah", flags); 1543 matcher = pattern.matcher("aablah"); 1544 if (!matcher.matches()) 1545 failCount++; 1546 1547 pattern = Pattern.compile("aa # blah blech ", flags); 1548 matcher = pattern.matcher("aa"); 1549 if (!matcher.matches()) 1550 failCount++; 1551 1552 pattern = Pattern.compile("aa # blah\n ", flags); 1553 matcher = pattern.matcher("aa"); 1554 if (!matcher.matches()) 1555 failCount++; 1556 1557 pattern = Pattern.compile("aa # blah\nbc # blech", flags); 1558 matcher = pattern.matcher("aabc"); 1559 if (!matcher.matches()) 1560 failCount++; 1561 1562 pattern = Pattern.compile("aa # blah\nbc# blech", flags); 1563 matcher = pattern.matcher("aabc"); 1564 if (!matcher.matches()) 1565 failCount++; 1566 1567 pattern = Pattern.compile("aa # blah\nbc\\# blech", flags); 1568 matcher = pattern.matcher("aabc#blech"); 1569 if (!matcher.matches()) 1570 failCount++; 1571 1572 // Supplementary character test 1573 pattern = Pattern.compile(toSupplementaries("aa \\# aa"), flags); 1574 matcher = pattern.matcher(toSupplementaries("aa#aa")); 1575 if (!matcher.matches()) 1576 failCount++; 1577 1578 pattern = Pattern.compile(toSupplementaries("aa # blah"), flags); 1579 matcher = pattern.matcher(toSupplementaries("aa")); 1580 if (!matcher.matches()) 1581 failCount++; 1582 1583 pattern = Pattern.compile(toSupplementaries("aa blah"), flags); 1584 matcher = pattern.matcher(toSupplementaries("aablah")); 1585 if (!matcher.matches()) 1586 failCount++; 1587 1588 pattern = Pattern.compile(toSupplementaries("aa # blah blech "), flags); 1589 matcher = pattern.matcher(toSupplementaries("aa")); 1590 if (!matcher.matches()) 1591 failCount++; 1592 1593 pattern = Pattern.compile(toSupplementaries("aa # blah\n "), flags); 1594 matcher = pattern.matcher(toSupplementaries("aa")); 1595 if (!matcher.matches()) 1596 failCount++; 1597 1598 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc # blech"), flags); 1599 matcher = pattern.matcher(toSupplementaries("aabc")); 1600 if (!matcher.matches()) 1601 failCount++; 1602 1603 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc# blech"), flags); 1604 matcher = pattern.matcher(toSupplementaries("aabc")); 1605 if (!matcher.matches()) 1606 failCount++; 1607 1608 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc\\# blech"), flags); 1609 matcher = pattern.matcher(toSupplementaries("aabc#blech")); 1610 if (!matcher.matches()) 1611 failCount++; 1612 1613 report("Comments"); 1614 } 1615 1616 private static void caseFoldingTest() { // bug 4504687 1617 int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; 1618 Pattern pattern = Pattern.compile("aa", flags); 1619 Matcher matcher = pattern.matcher("ab"); 1620 if (matcher.matches()) 1621 failCount++; 1622 1623 pattern = Pattern.compile("aA", flags); 1624 matcher = pattern.matcher("ab"); 1625 if (matcher.matches()) 1626 failCount++; 1627 1628 pattern = Pattern.compile("aa", flags); 1629 matcher = pattern.matcher("aB"); 1630 if (matcher.matches()) 1631 failCount++; 1632 matcher = pattern.matcher("Ab"); 1633 if (matcher.matches()) 1634 failCount++; 1635 1636 // ASCII "a" 1637 // Latin-1 Supplement "a" + grave 1638 // Cyrillic "a" 1639 String[] patterns = new String[] { 1640 //single 1641 "a", "\u00e0", "\u0430", 1642 //slice 1643 "ab", "\u00e0\u00e1", "\u0430\u0431", 1644 //class single 1645 "[a]", "[\u00e0]", "[\u0430]", 1646 //class range 1647 "[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]", 1648 //back reference 1649 "(a)\\1", "(\u00e0)\\1", "(\u0430)\\1" 1650 }; 1651 1652 String[] texts = new String[] { 1653 "A", "\u00c0", "\u0410", 1654 "AB", "\u00c0\u00c1", "\u0410\u0411", 1655 "A", "\u00c0", "\u0410", 1656 "B", "\u00c2", "\u0411", 1657 "aA", "\u00e0\u00c0", "\u0430\u0410" 1658 }; 1659 1660 boolean[] expected = new boolean[] { 1661 true, false, false, 1662 true, false, false, 1663 true, false, false, 1664 true, false, false, 1665 true, false, false 1666 }; 1667 1668 flags = Pattern.CASE_INSENSITIVE; 1669 for (int i = 0; i < patterns.length; i++) { 1670 pattern = Pattern.compile(patterns[i], flags); 1671 matcher = pattern.matcher(texts[i]); 1672 if (matcher.matches() != expected[i]) { 1673 System.out.println("<1> Failed at " + i); 1674 failCount++; 1675 } 1676 } 1677 1678 flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; 1679 for (int i = 0; i < patterns.length; i++) { 1680 pattern = Pattern.compile(patterns[i], flags); 1681 matcher = pattern.matcher(texts[i]); 1682 if (!matcher.matches()) { 1683 System.out.println("<2> Failed at " + i); 1684 failCount++; 1685 } 1686 } 1687 // flag unicode_case alone should do nothing 1688 flags = Pattern.UNICODE_CASE; 1689 for (int i = 0; i < patterns.length; i++) { 1690 pattern = Pattern.compile(patterns[i], flags); 1691 matcher = pattern.matcher(texts[i]); 1692 if (matcher.matches()) { 1693 System.out.println("<3> Failed at " + i); 1694 failCount++; 1695 } 1696 } 1697 1698 // Special cases: i, I, u+0131 and u+0130 1699 flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE; 1700 pattern = Pattern.compile("[h-j]+", flags); 1701 if (!pattern.matcher("\u0131\u0130").matches()) 1702 failCount++; 1703 report("Case Folding"); 1704 } 1705 1706 private static void appendTest() { 1707 Pattern pattern = Pattern.compile("(ab)(cd)"); 1708 Matcher matcher = pattern.matcher("abcd"); 1709 String result = matcher.replaceAll("$2$1"); 1710 if (!result.equals("cdab")) 1711 failCount++; 1712 1713 String s1 = "Swap all: first = 123, second = 456"; 1714 String s2 = "Swap one: first = 123, second = 456"; 1715 String r = "$3$2$1"; 1716 pattern = Pattern.compile("([a-z]+)( *= *)([0-9]+)"); 1717 matcher = pattern.matcher(s1); 1718 1719 result = matcher.replaceAll(r); 1720 if (!result.equals("Swap all: 123 = first, 456 = second")) 1721 failCount++; 1722 1723 matcher = pattern.matcher(s2); 1724 1725 if (matcher.find()) { 1726 StringBuffer sb = new StringBuffer(); 1727 matcher.appendReplacement(sb, r); 1728 matcher.appendTail(sb); 1729 result = sb.toString(); 1730 if (!result.equals("Swap one: 123 = first, second = 456")) 1731 failCount++; 1732 } 1733 1734 // Supplementary character test 1735 pattern = Pattern.compile(toSupplementaries("(ab)(cd)")); 1736 matcher = pattern.matcher(toSupplementaries("abcd")); 1737 result = matcher.replaceAll("$2$1"); 1738 if (!result.equals(toSupplementaries("cdab"))) 1739 failCount++; 1740 1741 s1 = toSupplementaries("Swap all: first = 123, second = 456"); 1742 s2 = toSupplementaries("Swap one: first = 123, second = 456"); 1743 r = toSupplementaries("$3$2$1"); 1744 pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)")); 1745 matcher = pattern.matcher(s1); 1746 1747 result = matcher.replaceAll(r); 1748 if (!result.equals(toSupplementaries("Swap all: 123 = first, 456 = second"))) 1749 failCount++; 1750 1751 matcher = pattern.matcher(s2); 1752 1753 if (matcher.find()) { 1754 StringBuffer sb = new StringBuffer(); 1755 matcher.appendReplacement(sb, r); 1756 matcher.appendTail(sb); 1757 result = sb.toString(); 1758 if (!result.equals(toSupplementaries("Swap one: 123 = first, second = 456"))) 1759 failCount++; 1760 } 1761 report("Append"); 1762 } 1763 1764 private static void splitTest() { 1765 Pattern pattern = Pattern.compile(":"); 1766 String[] result = pattern.split("foo:and:boo", 2); 1767 if (!result[0].equals("foo")) 1768 failCount++; 1769 if (!result[1].equals("and:boo")) 1770 failCount++; 1771 // Supplementary character test 1772 Pattern patternX = Pattern.compile(toSupplementaries("X")); 1773 result = patternX.split(toSupplementaries("fooXandXboo"), 2); 1774 if (!result[0].equals(toSupplementaries("foo"))) 1775 failCount++; 1776 if (!result[1].equals(toSupplementaries("andXboo"))) 1777 failCount++; 1778 1779 CharBuffer cb = CharBuffer.allocate(100); 1780 cb.put("foo:and:boo"); 1781 cb.flip(); 1782 result = pattern.split(cb); 1783 if (!result[0].equals("foo")) 1784 failCount++; 1785 if (!result[1].equals("and")) 1786 failCount++; 1787 if (!result[2].equals("boo")) 1788 failCount++; 1789 1790 // Supplementary character test 1791 CharBuffer cbs = CharBuffer.allocate(100); 1792 cbs.put(toSupplementaries("fooXandXboo")); 1793 cbs.flip(); 1794 result = patternX.split(cbs); 1795 if (!result[0].equals(toSupplementaries("foo"))) 1796 failCount++; 1797 if (!result[1].equals(toSupplementaries("and"))) 1798 failCount++; 1799 if (!result[2].equals(toSupplementaries("boo"))) 1800 failCount++; 1801 1802 String source = "0123456789"; 1803 for (int limit=-2; limit<3; limit++) { 1804 for (int x=0; x<10; x++) { 1805 result = source.split(Integer.toString(x), limit); 1806 int expectedLength = limit < 1 ? 2 : limit; 1807 1808 if ((limit == 0) && (x == 9)) { 1809 // expected dropping of "" 1810 if (result.length != 1) 1811 failCount++; 1812 if (!result[0].equals("012345678")) { 1813 failCount++; 1814 } 1815 } else { 1816 if (result.length != expectedLength) { 1817 failCount++; 1818 } 1819 if (!result[0].equals(source.substring(0,x))) { 1820 if (limit != 1) { 1821 failCount++; 1822 } else { 1823 if (!result[0].equals(source.substring(0,10))) { 1824 failCount++; 1825 } 1826 } 1827 } 1828 if (expectedLength > 1) { // Check segment 2 1829 if (!result[1].equals(source.substring(x+1,10))) 1830 failCount++; 1831 } 1832 } 1833 } 1834 } 1835 // Check the case for no match found 1836 for (int limit=-2; limit<3; limit++) { 1837 result = source.split("e", limit); 1838 if (result.length != 1) 1839 failCount++; 1840 if (!result[0].equals(source)) 1841 failCount++; 1842 } 1843 // Check the case for limit == 0, source = ""; 1844 // split() now returns 0-length for empty source "" see #6559590 1845 source = ""; 1846 result = source.split("e", 0); 1847 if (result.length != 1) 1848 failCount++; 1849 if (!result[0].equals(source)) 1850 failCount++; 1851 1852 // Check both split() and splitAsStraem(), especially for zero-lenth 1853 // input and zero-lenth match cases 1854 String[][] input = new String[][] { 1855 { " ", "Abc Efg Hij" }, // normal non-zero-match 1856 { " ", " Abc Efg Hij" }, // leading empty str for non-zero-match 1857 { " ", "Abc Efg Hij" }, // non-zero-match in the middle 1858 { "(?=\\p{Lu})", "AbcEfgHij" }, // no leading empty str for zero-match 1859 { "(?=\\p{Lu})", "AbcEfg" }, 1860 { "(?=\\p{Lu})", "Abc" }, 1861 { " ", "" }, // zero-length input 1862 { ".*", "" }, 1863 1864 // some tests from PatternStreamTest.java 1865 { "4", "awgqwefg1fefw4vssv1vvv1" }, 1866 { "\u00a3a", "afbfq\u00a3abgwgb\u00a3awngnwggw\u00a3a\u00a3ahjrnhneerh" }, 1867 { "1", "awgqwefg1fefw4vssv1vvv1" }, 1868 { "1", "a\u4ebafg1fefw\u4eba4\u9f9cvssv\u9f9c1v\u672c\u672cvv" }, 1869 { "\u56da", "1\u56da23\u56da456\u56da7890" }, 1870 { "\u56da", "1\u56da23\u9f9c\u672c\u672c\u56da456\u56da\u9f9c\u672c7890" }, 1871 { "\u56da", "" }, 1872 { "[ \t,:.]","This is,testing: with\tdifferent separators." }, //multiple septs 1873 { "o", "boo:and:foo" }, 1874 { "o", "booooo:and:fooooo" }, 1875 { "o", "fooooo:" }, 1876 }; 1877 1878 String[][] expected = new String[][] { 1879 { "Abc", "Efg", "Hij" }, 1880 { "", "Abc", "Efg", "Hij" }, 1881 { "Abc", "", "Efg", "Hij" }, 1882 { "Abc", "Efg", "Hij" }, 1883 { "Abc", "Efg" }, 1884 { "Abc" }, 1885 { "" }, 1886 { "" }, 1887 1888 { "awgqwefg1fefw", "vssv1vvv1" }, 1889 { "afbfq", "bgwgb", "wngnwggw", "", "hjrnhneerh" }, 1890 { "awgqwefg", "fefw4vssv", "vvv" }, 1891 { "a\u4ebafg", "fefw\u4eba4\u9f9cvssv\u9f9c", "v\u672c\u672cvv" }, 1892 { "1", "23", "456", "7890" }, 1893 { "1", "23\u9f9c\u672c\u672c", "456", "\u9f9c\u672c7890" }, 1894 { "" }, 1895 { "This", "is", "testing", "", "with", "different", "separators" }, 1896 { "b", "", ":and:f" }, 1897 { "b", "", "", "", "", ":and:f" }, 1898 { "f", "", "", "", "", ":" }, 1899 }; 1900 for (int i = 0; i < input.length; i++) { 1901 pattern = Pattern.compile(input[i][0]); 1902 if (!Arrays.equals(pattern.split(input[i][1]), expected[i])) { 1903 failCount++; 1904 } 1905 if (input[i][1].length() > 0 && // splitAsStream() return empty resulting 1906 // array for zero-length input for now 1907 !Arrays.equals(pattern.splitAsStream(input[i][1]).toArray(), 1908 expected[i])) { 1909 failCount++; 1910 } 1911 } 1912 report("Split"); 1913 } 1914 1915 private static void negationTest() { 1916 Pattern pattern = Pattern.compile("[\\[@^]+"); 1917 Matcher matcher = pattern.matcher("@@@@[[[[^^^^"); 1918 if (!matcher.find()) 1919 failCount++; 1920 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1921 failCount++; 1922 pattern = Pattern.compile("[@\\[^]+"); 1923 matcher = pattern.matcher("@@@@[[[[^^^^"); 1924 if (!matcher.find()) 1925 failCount++; 1926 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1927 failCount++; 1928 pattern = Pattern.compile("[@\\[^@]+"); 1929 matcher = pattern.matcher("@@@@[[[[^^^^"); 1930 if (!matcher.find()) 1931 failCount++; 1932 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1933 failCount++; 1934 1935 pattern = Pattern.compile("\\)"); 1936 matcher = pattern.matcher("xxx)xxx"); 1937 if (!matcher.find()) 1938 failCount++; 1939 1940 report("Negation"); 1941 } 1942 1943 private static void ampersandTest() { 1944 Pattern pattern = Pattern.compile("[&@]+"); 1945 check(pattern, "@@@@&&&&", true); 1946 1947 pattern = Pattern.compile("[@&]+"); 1948 check(pattern, "@@@@&&&&", true); 1949 1950 pattern = Pattern.compile("[@\\&]+"); 1951 check(pattern, "@@@@&&&&", true); 1952 1953 report("Ampersand"); 1954 } 1955 1956 private static void octalTest() throws Exception { 1957 Pattern pattern = Pattern.compile("\\u0007"); 1958 Matcher matcher = pattern.matcher("\u0007"); 1959 if (!matcher.matches()) 1960 failCount++; 1961 pattern = Pattern.compile("\\07"); 1962 matcher = pattern.matcher("\u0007"); 1963 if (!matcher.matches()) 1964 failCount++; 1965 pattern = Pattern.compile("\\007"); 1966 matcher = pattern.matcher("\u0007"); 1967 if (!matcher.matches()) 1968 failCount++; 1969 pattern = Pattern.compile("\\0007"); 1970 matcher = pattern.matcher("\u0007"); 1971 if (!matcher.matches()) 1972 failCount++; 1973 pattern = Pattern.compile("\\040"); 1974 matcher = pattern.matcher("\u0020"); 1975 if (!matcher.matches()) 1976 failCount++; 1977 pattern = Pattern.compile("\\0403"); 1978 matcher = pattern.matcher("\u00203"); 1979 if (!matcher.matches()) 1980 failCount++; 1981 pattern = Pattern.compile("\\0103"); 1982 matcher = pattern.matcher("\u0043"); 1983 if (!matcher.matches()) 1984 failCount++; 1985 1986 report("Octal"); 1987 } 1988 1989 private static void longPatternTest() throws Exception { 1990 try { 1991 Pattern pattern = Pattern.compile( 1992 "a 32-character-long pattern xxxx"); 1993 pattern = Pattern.compile("a 33-character-long pattern xxxxx"); 1994 pattern = Pattern.compile("a thirty four character long regex"); 1995 StringBuffer patternToBe = new StringBuffer(101); 1996 for (int i=0; i<100; i++) 1997 patternToBe.append((char)(97 + i%26)); 1998 pattern = Pattern.compile(patternToBe.toString()); 1999 } catch (PatternSyntaxException e) { 2000 failCount++; 2001 } 2002 2003 // Supplementary character test 2004 try { 2005 Pattern pattern = Pattern.compile( 2006 toSupplementaries("a 32-character-long pattern xxxx")); 2007 pattern = Pattern.compile(toSupplementaries("a 33-character-long pattern xxxxx")); 2008 pattern = Pattern.compile(toSupplementaries("a thirty four character long regex")); 2009 StringBuffer patternToBe = new StringBuffer(101*2); 2010 for (int i=0; i<100; i++) 2011 patternToBe.append(Character.toChars(Character.MIN_SUPPLEMENTARY_CODE_POINT 2012 + 97 + i%26)); 2013 pattern = Pattern.compile(patternToBe.toString()); 2014 } catch (PatternSyntaxException e) { 2015 failCount++; 2016 } 2017 report("LongPattern"); 2018 } 2019 2020 private static void group0Test() throws Exception { 2021 Pattern pattern = Pattern.compile("(tes)ting"); 2022 Matcher matcher = pattern.matcher("testing"); 2023 check(matcher, "testing"); 2024 2025 matcher.reset("testing"); 2026 if (matcher.lookingAt()) { 2027 if (!matcher.group(0).equals("testing")) 2028 failCount++; 2029 } else { 2030 failCount++; 2031 } 2032 2033 matcher.reset("testing"); 2034 if (matcher.matches()) { 2035 if (!matcher.group(0).equals("testing")) 2036 failCount++; 2037 } else { 2038 failCount++; 2039 } 2040 2041 pattern = Pattern.compile("(tes)ting"); 2042 matcher = pattern.matcher("testing"); 2043 if (matcher.lookingAt()) { 2044 if (!matcher.group(0).equals("testing")) 2045 failCount++; 2046 } else { 2047 failCount++; 2048 } 2049 2050 pattern = Pattern.compile("^(tes)ting"); 2051 matcher = pattern.matcher("testing"); 2052 if (matcher.matches()) { 2053 if (!matcher.group(0).equals("testing")) 2054 failCount++; 2055 } else { 2056 failCount++; 2057 } 2058 2059 // Supplementary character test 2060 pattern = Pattern.compile(toSupplementaries("(tes)ting")); 2061 matcher = pattern.matcher(toSupplementaries("testing")); 2062 check(matcher, toSupplementaries("testing")); 2063 2064 matcher.reset(toSupplementaries("testing")); 2065 if (matcher.lookingAt()) { 2066 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2067 failCount++; 2068 } else { 2069 failCount++; 2070 } 2071 2072 matcher.reset(toSupplementaries("testing")); 2073 if (matcher.matches()) { 2074 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2075 failCount++; 2076 } else { 2077 failCount++; 2078 } 2079 2080 pattern = Pattern.compile(toSupplementaries("(tes)ting")); 2081 matcher = pattern.matcher(toSupplementaries("testing")); 2082 if (matcher.lookingAt()) { 2083 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2084 failCount++; 2085 } else { 2086 failCount++; 2087 } 2088 2089 pattern = Pattern.compile(toSupplementaries("^(tes)ting")); 2090 matcher = pattern.matcher(toSupplementaries("testing")); 2091 if (matcher.matches()) { 2092 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2093 failCount++; 2094 } else { 2095 failCount++; 2096 } 2097 2098 report("Group0"); 2099 } 2100 2101 private static void findIntTest() throws Exception { 2102 Pattern p = Pattern.compile("blah"); 2103 Matcher m = p.matcher("zzzzblahzzzzzblah"); 2104 boolean result = m.find(2); 2105 if (!result) 2106 failCount++; 2107 2108 p = Pattern.compile("$"); 2109 m = p.matcher("1234567890"); 2110 result = m.find(10); 2111 if (!result) 2112 failCount++; 2113 try { 2114 result = m.find(11); 2115 failCount++; 2116 } catch (IndexOutOfBoundsException e) { 2117 // correct result 2118 } 2119 2120 // Supplementary character test 2121 p = Pattern.compile(toSupplementaries("blah")); 2122 m = p.matcher(toSupplementaries("zzzzblahzzzzzblah")); 2123 result = m.find(2); 2124 if (!result) 2125 failCount++; 2126 2127 report("FindInt"); 2128 } 2129 2130 private static void emptyPatternTest() throws Exception { 2131 Pattern p = Pattern.compile(""); 2132 Matcher m = p.matcher("foo"); 2133 2134 // Should find empty pattern at beginning of input 2135 boolean result = m.find(); 2136 if (result != true) 2137 failCount++; 2138 if (m.start() != 0) 2139 failCount++; 2140 2141 // Should not match entire input if input is not empty 2142 m.reset(); 2143 result = m.matches(); 2144 if (result == true) 2145 failCount++; 2146 2147 try { 2148 m.start(0); 2149 failCount++; 2150 } catch (IllegalStateException e) { 2151 // Correct result 2152 } 2153 2154 // Should match entire input if input is empty 2155 m.reset(""); 2156 result = m.matches(); 2157 if (result != true) 2158 failCount++; 2159 2160 result = Pattern.matches("", ""); 2161 if (result != true) 2162 failCount++; 2163 2164 result = Pattern.matches("", "foo"); 2165 if (result == true) 2166 failCount++; 2167 report("EmptyPattern"); 2168 } 2169 2170 private static void charClassTest() throws Exception { 2171 Pattern pattern = Pattern.compile("blah[ab]]blech"); 2172 check(pattern, "blahb]blech", true); 2173 2174 pattern = Pattern.compile("[abc[def]]"); 2175 check(pattern, "b", true); 2176 2177 // Supplementary character tests 2178 pattern = Pattern.compile(toSupplementaries("blah[ab]]blech")); 2179 check(pattern, toSupplementaries("blahb]blech"), true); 2180 2181 pattern = Pattern.compile(toSupplementaries("[abc[def]]")); 2182 check(pattern, toSupplementaries("b"), true); 2183 2184 try { 2185 // u00ff when UNICODE_CASE 2186 pattern = Pattern.compile("[ab\u00ffcd]", 2187 Pattern.CASE_INSENSITIVE| 2188 Pattern.UNICODE_CASE); 2189 check(pattern, "ab\u00ffcd", true); 2190 check(pattern, "Ab\u0178Cd", true); 2191 2192 // u00b5 when UNICODE_CASE 2193 pattern = Pattern.compile("[ab\u00b5cd]", 2194 Pattern.CASE_INSENSITIVE| 2195 Pattern.UNICODE_CASE); 2196 check(pattern, "ab\u00b5cd", true); 2197 check(pattern, "Ab\u039cCd", true); 2198 } catch (Exception e) { failCount++; } 2199 2200 /* Special cases 2201 (1)LatinSmallLetterLongS u+017f 2202 (2)LatinSmallLetterDotlessI u+0131 2203 (3)LatineCapitalLetterIWithDotAbove u+0130 2204 (4)KelvinSign u+212a 2205 (5)AngstromSign u+212b 2206 */ 2207 int flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE; 2208 pattern = Pattern.compile("[sik\u00c5]+", flags); 2209 if (!pattern.matcher("\u017f\u0130\u0131\u212a\u212b").matches()) 2210 failCount++; 2211 2212 report("CharClass"); 2213 } 2214 2215 private static void caretTest() throws Exception { 2216 Pattern pattern = Pattern.compile("\\w*"); 2217 Matcher matcher = pattern.matcher("a#bc#def##g"); 2218 check(matcher, "a"); 2219 check(matcher, ""); 2220 check(matcher, "bc"); 2221 check(matcher, ""); 2222 check(matcher, "def"); 2223 check(matcher, ""); 2224 check(matcher, ""); 2225 check(matcher, "g"); 2226 check(matcher, ""); 2227 if (matcher.find()) 2228 failCount++; 2229 2230 pattern = Pattern.compile("^\\w*"); 2231 matcher = pattern.matcher("a#bc#def##g"); 2232 check(matcher, "a"); 2233 if (matcher.find()) 2234 failCount++; 2235 2236 pattern = Pattern.compile("\\w"); 2237 matcher = pattern.matcher("abc##x"); 2238 check(matcher, "a"); 2239 check(matcher, "b"); 2240 check(matcher, "c"); 2241 check(matcher, "x"); 2242 if (matcher.find()) 2243 failCount++; 2244 2245 pattern = Pattern.compile("^\\w"); 2246 matcher = pattern.matcher("abc##x"); 2247 check(matcher, "a"); 2248 if (matcher.find()) 2249 failCount++; 2250 2251 pattern = Pattern.compile("\\A\\p{Alpha}{3}"); 2252 matcher = pattern.matcher("abcdef-ghi\njklmno"); 2253 check(matcher, "abc"); 2254 if (matcher.find()) 2255 failCount++; 2256 2257 pattern = Pattern.compile("^\\p{Alpha}{3}", Pattern.MULTILINE); 2258 matcher = pattern.matcher("abcdef-ghi\njklmno"); 2259 check(matcher, "abc"); 2260 check(matcher, "jkl"); 2261 if (matcher.find()) 2262 failCount++; 2263 2264 pattern = Pattern.compile("^", Pattern.MULTILINE); 2265 matcher = pattern.matcher("this is some text"); 2266 String result = matcher.replaceAll("X"); 2267 if (!result.equals("Xthis is some text")) 2268 failCount++; 2269 2270 pattern = Pattern.compile("^"); 2271 matcher = pattern.matcher("this is some text"); 2272 result = matcher.replaceAll("X"); 2273 if (!result.equals("Xthis is some text")) 2274 failCount++; 2275 2276 pattern = Pattern.compile("^", Pattern.MULTILINE | Pattern.UNIX_LINES); 2277 matcher = pattern.matcher("this is some text\n"); 2278 result = matcher.replaceAll("X"); 2279 if (!result.equals("Xthis is some text\n")) 2280 failCount++; 2281 2282 report("Caret"); 2283 } 2284 2285 private static void groupCaptureTest() throws Exception { 2286 // Independent group 2287 Pattern pattern = Pattern.compile("x+(?>y+)z+"); 2288 Matcher matcher = pattern.matcher("xxxyyyzzz"); 2289 matcher.find(); 2290 try { 2291 String blah = matcher.group(1); 2292 failCount++; 2293 } catch (IndexOutOfBoundsException ioobe) { 2294 // Good result 2295 } 2296 // Pure group 2297 pattern = Pattern.compile("x+(?:y+)z+"); 2298 matcher = pattern.matcher("xxxyyyzzz"); 2299 matcher.find(); 2300 try { 2301 String blah = matcher.group(1); 2302 failCount++; 2303 } catch (IndexOutOfBoundsException ioobe) { 2304 // Good result 2305 } 2306 2307 // Supplementary character tests 2308 // Independent group 2309 pattern = Pattern.compile(toSupplementaries("x+(?>y+)z+")); 2310 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz")); 2311 matcher.find(); 2312 try { 2313 String blah = matcher.group(1); 2314 failCount++; 2315 } catch (IndexOutOfBoundsException ioobe) { 2316 // Good result 2317 } 2318 // Pure group 2319 pattern = Pattern.compile(toSupplementaries("x+(?:y+)z+")); 2320 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz")); 2321 matcher.find(); 2322 try { 2323 String blah = matcher.group(1); 2324 failCount++; 2325 } catch (IndexOutOfBoundsException ioobe) { 2326 // Good result 2327 } 2328 2329 report("GroupCapture"); 2330 } 2331 2332 private static void backRefTest() throws Exception { 2333 Pattern pattern = Pattern.compile("(a*)bc\\1"); 2334 check(pattern, "zzzaabcazzz", true); 2335 2336 pattern = Pattern.compile("(a*)bc\\1"); 2337 check(pattern, "zzzaabcaazzz", true); 2338 2339 pattern = Pattern.compile("(abc)(def)\\1"); 2340 check(pattern, "abcdefabc", true); 2341 2342 pattern = Pattern.compile("(abc)(def)\\3"); 2343 check(pattern, "abcdefabc", false); 2344 2345 try { 2346 for (int i = 1; i < 10; i++) { 2347 // Make sure backref 1-9 are always accepted 2348 pattern = Pattern.compile("abcdef\\" + i); 2349 // and fail to match if the target group does not exit 2350 check(pattern, "abcdef", false); 2351 } 2352 } catch(PatternSyntaxException e) { 2353 failCount++; 2354 } 2355 2356 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"); 2357 check(pattern, "abcdefghija", false); 2358 check(pattern, "abcdefghija1", true); 2359 2360 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"); 2361 check(pattern, "abcdefghijkk", true); 2362 2363 pattern = Pattern.compile("(a)bcdefghij\\11"); 2364 check(pattern, "abcdefghija1", true); 2365 2366 // Supplementary character tests 2367 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1")); 2368 check(pattern, toSupplementaries("zzzaabcazzz"), true); 2369 2370 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1")); 2371 check(pattern, toSupplementaries("zzzaabcaazzz"), true); 2372 2373 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\1")); 2374 check(pattern, toSupplementaries("abcdefabc"), true); 2375 2376 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\3")); 2377 check(pattern, toSupplementaries("abcdefabc"), false); 2378 2379 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11")); 2380 check(pattern, toSupplementaries("abcdefghija"), false); 2381 check(pattern, toSupplementaries("abcdefghija1"), true); 2382 2383 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11")); 2384 check(pattern, toSupplementaries("abcdefghijkk"), true); 2385 2386 report("BackRef"); 2387 } 2388 2389 /** 2390 * Unicode Technical Report #18, section 2.6 End of Line 2391 * There is no empty line to be matched in the sequence \u000D\u000A 2392 * but there is an empty line in the sequence \u000A\u000D. 2393 */ 2394 private static void anchorTest() throws Exception { 2395 Pattern p = Pattern.compile("^.*$", Pattern.MULTILINE); 2396 Matcher m = p.matcher("blah1\r\nblah2"); 2397 m.find(); 2398 m.find(); 2399 if (!m.group().equals("blah2")) 2400 failCount++; 2401 2402 m.reset("blah1\n\rblah2"); 2403 m.find(); 2404 m.find(); 2405 m.find(); 2406 if (!m.group().equals("blah2")) 2407 failCount++; 2408 2409 // Test behavior of $ with \r\n at end of input 2410 p = Pattern.compile(".+$"); 2411 m = p.matcher("blah1\r\n"); 2412 if (!m.find()) 2413 failCount++; 2414 if (!m.group().equals("blah1")) 2415 failCount++; 2416 if (m.find()) 2417 failCount++; 2418 2419 // Test behavior of $ with \r\n at end of input in multiline 2420 p = Pattern.compile(".+$", Pattern.MULTILINE); 2421 m = p.matcher("blah1\r\n"); 2422 if (!m.find()) 2423 failCount++; 2424 if (m.find()) 2425 failCount++; 2426 2427 // Test for $ recognition of \u0085 for bug 4527731 2428 p = Pattern.compile(".+$", Pattern.MULTILINE); 2429 m = p.matcher("blah1\u0085"); 2430 if (!m.find()) 2431 failCount++; 2432 2433 // Supplementary character test 2434 p = Pattern.compile("^.*$", Pattern.MULTILINE); 2435 m = p.matcher(toSupplementaries("blah1\r\nblah2")); 2436 m.find(); 2437 m.find(); 2438 if (!m.group().equals(toSupplementaries("blah2"))) 2439 failCount++; 2440 2441 m.reset(toSupplementaries("blah1\n\rblah2")); 2442 m.find(); 2443 m.find(); 2444 m.find(); 2445 if (!m.group().equals(toSupplementaries("blah2"))) 2446 failCount++; 2447 2448 // Test behavior of $ with \r\n at end of input 2449 p = Pattern.compile(".+$"); 2450 m = p.matcher(toSupplementaries("blah1\r\n")); 2451 if (!m.find()) 2452 failCount++; 2453 if (!m.group().equals(toSupplementaries("blah1"))) 2454 failCount++; 2455 if (m.find()) 2456 failCount++; 2457 2458 // Test behavior of $ with \r\n at end of input in multiline 2459 p = Pattern.compile(".+$", Pattern.MULTILINE); 2460 m = p.matcher(toSupplementaries("blah1\r\n")); 2461 if (!m.find()) 2462 failCount++; 2463 if (m.find()) 2464 failCount++; 2465 2466 // Test for $ recognition of \u0085 for bug 4527731 2467 p = Pattern.compile(".+$", Pattern.MULTILINE); 2468 m = p.matcher(toSupplementaries("blah1\u0085")); 2469 if (!m.find()) 2470 failCount++; 2471 2472 report("Anchors"); 2473 } 2474 2475 /** 2476 * A basic sanity test of Matcher.lookingAt(). 2477 */ 2478 private static void lookingAtTest() throws Exception { 2479 Pattern p = Pattern.compile("(ab)(c*)"); 2480 Matcher m = p.matcher("abccczzzabcczzzabccc"); 2481 2482 if (!m.lookingAt()) 2483 failCount++; 2484 2485 if (!m.group().equals(m.group(0))) 2486 failCount++; 2487 2488 m = p.matcher("zzzabccczzzabcczzzabccczzz"); 2489 if (m.lookingAt()) 2490 failCount++; 2491 2492 // Supplementary character test 2493 p = Pattern.compile(toSupplementaries("(ab)(c*)")); 2494 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc")); 2495 2496 if (!m.lookingAt()) 2497 failCount++; 2498 2499 if (!m.group().equals(m.group(0))) 2500 failCount++; 2501 2502 m = p.matcher(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2503 if (m.lookingAt()) 2504 failCount++; 2505 2506 report("Looking At"); 2507 } 2508 2509 /** 2510 * A basic sanity test of Matcher.matches(). 2511 */ 2512 private static void matchesTest() throws Exception { 2513 // matches() 2514 Pattern p = Pattern.compile("ulb(c*)"); 2515 Matcher m = p.matcher("ulbcccccc"); 2516 if (!m.matches()) 2517 failCount++; 2518 2519 // find() but not matches() 2520 m.reset("zzzulbcccccc"); 2521 if (m.matches()) 2522 failCount++; 2523 2524 // lookingAt() but not matches() 2525 m.reset("ulbccccccdef"); 2526 if (m.matches()) 2527 failCount++; 2528 2529 // matches() 2530 p = Pattern.compile("a|ad"); 2531 m = p.matcher("ad"); 2532 if (!m.matches()) 2533 failCount++; 2534 2535 // Supplementary character test 2536 // matches() 2537 p = Pattern.compile(toSupplementaries("ulb(c*)")); 2538 m = p.matcher(toSupplementaries("ulbcccccc")); 2539 if (!m.matches()) 2540 failCount++; 2541 2542 // find() but not matches() 2543 m.reset(toSupplementaries("zzzulbcccccc")); 2544 if (m.matches()) 2545 failCount++; 2546 2547 // lookingAt() but not matches() 2548 m.reset(toSupplementaries("ulbccccccdef")); 2549 if (m.matches()) 2550 failCount++; 2551 2552 // matches() 2553 p = Pattern.compile(toSupplementaries("a|ad")); 2554 m = p.matcher(toSupplementaries("ad")); 2555 if (!m.matches()) 2556 failCount++; 2557 2558 report("Matches"); 2559 } 2560 2561 /** 2562 * A basic sanity test of Pattern.matches(). 2563 */ 2564 private static void patternMatchesTest() throws Exception { 2565 // matches() 2566 if (!Pattern.matches(toSupplementaries("ulb(c*)"), 2567 toSupplementaries("ulbcccccc"))) 2568 failCount++; 2569 2570 // find() but not matches() 2571 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2572 toSupplementaries("zzzulbcccccc"))) 2573 failCount++; 2574 2575 // lookingAt() but not matches() 2576 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2577 toSupplementaries("ulbccccccdef"))) 2578 failCount++; 2579 2580 // Supplementary character test 2581 // matches() 2582 if (!Pattern.matches(toSupplementaries("ulb(c*)"), 2583 toSupplementaries("ulbcccccc"))) 2584 failCount++; 2585 2586 // find() but not matches() 2587 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2588 toSupplementaries("zzzulbcccccc"))) 2589 failCount++; 2590 2591 // lookingAt() but not matches() 2592 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2593 toSupplementaries("ulbccccccdef"))) 2594 failCount++; 2595 2596 report("Pattern Matches"); 2597 } 2598 2599 /** 2600 * Canonical equivalence testing. Tests the ability of the engine 2601 * to match sequences that are not explicitly specified in the 2602 * pattern when they are considered equivalent by the Unicode Standard. 2603 */ 2604 private static void ceTest() throws Exception { 2605 // Decomposed char outside char classes 2606 Pattern p = Pattern.compile("testa\u030a", Pattern.CANON_EQ); 2607 Matcher m = p.matcher("test\u00e5"); 2608 if (!m.matches()) 2609 failCount++; 2610 2611 m.reset("testa\u030a"); 2612 if (!m.matches()) 2613 failCount++; 2614 2615 // Composed char outside char classes 2616 p = Pattern.compile("test\u00e5", Pattern.CANON_EQ); 2617 m = p.matcher("test\u00e5"); 2618 if (!m.matches()) 2619 failCount++; 2620 2621 m.reset("testa\u030a"); 2622 if (!m.find()) 2623 failCount++; 2624 2625 // Decomposed char inside a char class 2626 p = Pattern.compile("test[abca\u030a]", Pattern.CANON_EQ); 2627 m = p.matcher("test\u00e5"); 2628 if (!m.find()) 2629 failCount++; 2630 2631 m.reset("testa\u030a"); 2632 if (!m.find()) 2633 failCount++; 2634 2635 // Composed char inside a char class 2636 p = Pattern.compile("test[abc\u00e5def\u00e0]", Pattern.CANON_EQ); 2637 m = p.matcher("test\u00e5"); 2638 if (!m.find()) 2639 failCount++; 2640 2641 m.reset("testa\u0300"); 2642 if (!m.find()) 2643 failCount++; 2644 2645 m.reset("testa\u030a"); 2646 if (!m.find()) 2647 failCount++; 2648 2649 // Marks that cannot legally change order and be equivalent 2650 p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ); 2651 check(p, "testa\u0308\u0300", true); 2652 check(p, "testa\u0300\u0308", false); 2653 2654 // Marks that can legally change order and be equivalent 2655 p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ); 2656 check(p, "testa\u0308\u0323", true); 2657 check(p, "testa\u0323\u0308", true); 2658 2659 // Test all equivalences of the sequence a\u0308\u0323\u0300 2660 p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ); 2661 check(p, "testa\u0308\u0323\u0300", true); 2662 check(p, "testa\u0323\u0308\u0300", true); 2663 check(p, "testa\u0308\u0300\u0323", true); 2664 check(p, "test\u00e4\u0323\u0300", true); 2665 check(p, "test\u00e4\u0300\u0323", true); 2666 2667 Object[][] data = new Object[][] { 2668 2669 // JDK-4867170 2670 { "[\u1f80-\u1f82]", "ab\u1f80cd", "f", true }, 2671 { "[\u1f80-\u1f82]", "ab\u1f81cd", "f", true }, 2672 { "[\u1f80-\u1f82]", "ab\u1f82cd", "f", true }, 2673 { "[\u1f80-\u1f82]", "ab\u03b1\u0314\u0345cd", "f", true }, 2674 { "[\u1f80-\u1f82]", "ab\u03b1\u0345\u0314cd", "f", true }, 2675 { "[\u1f80-\u1f82]", "ab\u1f01\u0345cd", "f", true }, 2676 { "[\u1f80-\u1f82]", "ab\u1f00\u0345cd", "f", true }, 2677 2678 { "\\p{IsGreek}", "ab\u1f80cd", "f", true }, 2679 { "\\p{IsGreek}", "ab\u1f81cd", "f", true }, 2680 { "\\p{IsGreek}", "ab\u1f82cd", "f", true }, 2681 { "\\p{IsGreek}", "ab\u03b1\u0314\u0345cd", "f", true }, 2682 { "\\p{IsGreek}", "ab\u1f01\u0345cd", "f", true }, 2683 2684 // backtracking, force to match "\u1f80", instead of \u1f82" 2685 { "ab\\p{IsGreek}\u0300cd", "ab\u03b1\u0313\u0345\u0300cd", "m", true }, 2686 2687 { "[\\p{IsGreek}]", "\u03b1\u0314\u0345", "m", true }, 2688 { "\\p{IsGreek}", "\u03b1\u0314\u0345", "m", true }, 2689 2690 { "[^\u1f80-\u1f82]","\u1f81", "m", false }, 2691 { "[^\u1f80-\u1f82]","\u03b1\u0314\u0345", "m", false }, 2692 { "[^\u1f01\u0345]", "\u1f81", "f", false }, 2693 2694 { "[^\u1f81]+", "\u1f80\u1f82", "f", true }, 2695 { "[\u1f80]", "ab\u1f80cd", "f", true }, 2696 { "\u1f80", "ab\u1f80cd", "f", true }, 2697 { "\u1f00\u0345\u0300", "\u1f82", "m", true }, 2698 { "\u1f80", "-\u1f00\u0345\u0300-", "f", true }, 2699 { "\u1f82", "\u1f00\u0345\u0300", "m", true }, 2700 { "\u1f82", "\u1f80\u0300", "m", true }, 2701 2702 // JDK-7080302 # compile failed 2703 { "a(\u0041\u0301\u0328)", "a\u0041\u0301\u0328", "m", true}, 2704 2705 // JDK-6728861, same cause as above one 2706 { "\u00e9\u00e9n", "e\u0301e\u0301n", "m", true}, 2707 2708 // JDK-6995635 2709 { "(\u00e9)", "e\u0301", "m", true }, 2710 2711 // JDK-6736245 2712 // intereting special case, nfc(u2add+u0338) -> u2add+u0338) NOT u2adc 2713 { "\u2ADC", "\u2ADC", "m", true}, // NFC 2714 { "\u2ADC", "\u2ADD\u0338", "m", true}, // NFD 2715 2716 // 4916384. 2717 // Decomposed hangul (jamos) works inside clazz 2718 { "[\u1100\u1161]", "\u1100\u1161", "m", true}, 2719 { "[\u1100\u1161]", "\uac00", "m", true}, 2720 2721 { "[\uac00]", "\u1100\u1161", "m", true}, 2722 { "[\uac00]", "\uac00", "m", true}, 2723 2724 // Decomposed hangul (jamos) 2725 { "\u1100\u1161", "\u1100\u1161", "m", true}, 2726 { "\u1100\u1161", "\uac00", "m", true}, 2727 2728 // Composed hangul 2729 { "\uac00", "\u1100\u1161", "m", true }, 2730 { "\uac00", "\uac00", "m", true }, 2731 2732 /* Need a NFDSlice to nfd the source to solve this issue 2733 u+1d1c0 -> nfd: <u+1d1ba><u+1d165><u+1d16f> -> nfc: <u+1d1ba><u+1d165><u+1d16f> 2734 u+1d1bc -> nfd: <u+1d1ba><u+1d165> -> nfc: <u+1d1ba><u+1d165> 2735 <u+1d1bc><u+1d16f> -> nfd: <u+1d1ba><u+1d165><u+1d16f> -> nfc: <u+1d1ba><u+1d165><u+1d16f> 2736 2737 // Decomposed supplementary outside char classes 2738 // { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0", "m", true }, 2739 // Composed supplementary outside char classes 2740 // { "test\ud834\uddc0", "test\ud834\uddbc\ud834\udd6f", "m", true }, 2741 */ 2742 { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddbc\ud834\udd6f", "m", true }, 2743 { "test\ud834\uddc0", "test\ud834\uddbc\ud834\udd6f", "m", true }, 2744 2745 { "test\ud834\uddc0", "test\ud834\uddc0", "m", true }, 2746 { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0", "m", true }, 2747 }; 2748 2749 int failCount = 0; 2750 for (Object[] d : data) { 2751 String pn = (String)d[0]; 2752 String tt = (String)d[1]; 2753 boolean isFind = "f".equals(((String)d[2])); 2754 boolean expected = (boolean)d[3]; 2755 boolean ret = isFind ? Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).find() 2756 : Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).matches(); 2757 if (ret != expected) { 2758 failCount++; 2759 continue; 2760 } 2761 } 2762 report("Canonical Equivalence"); 2763 } 2764 2765 /** 2766 * A basic sanity test of Matcher.replaceAll(). 2767 */ 2768 private static void globalSubstitute() throws Exception { 2769 // Global substitution with a literal 2770 Pattern p = Pattern.compile("(ab)(c*)"); 2771 Matcher m = p.matcher("abccczzzabcczzzabccc"); 2772 if (!m.replaceAll("test").equals("testzzztestzzztest")) 2773 failCount++; 2774 2775 m.reset("zzzabccczzzabcczzzabccczzz"); 2776 if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz")) 2777 failCount++; 2778 2779 // Global substitution with groups 2780 m.reset("zzzabccczzzabcczzzabccczzz"); 2781 String result = m.replaceAll("$1"); 2782 if (!result.equals("zzzabzzzabzzzabzzz")) 2783 failCount++; 2784 2785 // Supplementary character test 2786 // Global substitution with a literal 2787 p = Pattern.compile(toSupplementaries("(ab)(c*)")); 2788 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc")); 2789 if (!m.replaceAll(toSupplementaries("test")). 2790 equals(toSupplementaries("testzzztestzzztest"))) 2791 failCount++; 2792 2793 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2794 if (!m.replaceAll(toSupplementaries("test")). 2795 equals(toSupplementaries("zzztestzzztestzzztestzzz"))) 2796 failCount++; 2797 2798 // Global substitution with groups 2799 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2800 result = m.replaceAll("$1"); 2801 if (!result.equals(toSupplementaries("zzzabzzzabzzzabzzz"))) 2802 failCount++; 2803 2804 report("Global Substitution"); 2805 } 2806 2807 /** 2808 * Tests the usage of Matcher.appendReplacement() with literal 2809 * and group substitutions. 2810 */ 2811 private static void stringbufferSubstitute() throws Exception { 2812 // SB substitution with literal 2813 String blah = "zzzblahzzz"; 2814 Pattern p = Pattern.compile("blah"); 2815 Matcher m = p.matcher(blah); 2816 StringBuffer result = new StringBuffer(); 2817 try { 2818 m.appendReplacement(result, "blech"); 2819 failCount++; 2820 } catch (IllegalStateException e) { 2821 } 2822 m.find(); 2823 m.appendReplacement(result, "blech"); 2824 if (!result.toString().equals("zzzblech")) 2825 failCount++; 2826 2827 m.appendTail(result); 2828 if (!result.toString().equals("zzzblechzzz")) 2829 failCount++; 2830 2831 // SB substitution with groups 2832 blah = "zzzabcdzzz"; 2833 p = Pattern.compile("(ab)(cd)*"); 2834 m = p.matcher(blah); 2835 result = new StringBuffer(); 2836 try { 2837 m.appendReplacement(result, "$1"); 2838 failCount++; 2839 } catch (IllegalStateException e) { 2840 } 2841 m.find(); 2842 m.appendReplacement(result, "$1"); 2843 if (!result.toString().equals("zzzab")) 2844 failCount++; 2845 2846 m.appendTail(result); 2847 if (!result.toString().equals("zzzabzzz")) 2848 failCount++; 2849 2850 // SB substitution with 3 groups 2851 blah = "zzzabcdcdefzzz"; 2852 p = Pattern.compile("(ab)(cd)*(ef)"); 2853 m = p.matcher(blah); 2854 result = new StringBuffer(); 2855 try { 2856 m.appendReplacement(result, "$1w$2w$3"); 2857 failCount++; 2858 } catch (IllegalStateException e) { 2859 } 2860 m.find(); 2861 m.appendReplacement(result, "$1w$2w$3"); 2862 if (!result.toString().equals("zzzabwcdwef")) 2863 failCount++; 2864 2865 m.appendTail(result); 2866 if (!result.toString().equals("zzzabwcdwefzzz")) 2867 failCount++; 2868 2869 // SB substitution with groups and three matches 2870 // skipping middle match 2871 blah = "zzzabcdzzzabcddzzzabcdzzz"; 2872 p = Pattern.compile("(ab)(cd*)"); 2873 m = p.matcher(blah); 2874 result = new StringBuffer(); 2875 try { 2876 m.appendReplacement(result, "$1"); 2877 failCount++; 2878 } catch (IllegalStateException e) { 2879 } 2880 m.find(); 2881 m.appendReplacement(result, "$1"); 2882 if (!result.toString().equals("zzzab")) 2883 failCount++; 2884 2885 m.find(); 2886 m.find(); 2887 m.appendReplacement(result, "$2"); 2888 if (!result.toString().equals("zzzabzzzabcddzzzcd")) 2889 failCount++; 2890 2891 m.appendTail(result); 2892 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz")) 2893 failCount++; 2894 2895 // Check to make sure escaped $ is ignored 2896 blah = "zzzabcdcdefzzz"; 2897 p = Pattern.compile("(ab)(cd)*(ef)"); 2898 m = p.matcher(blah); 2899 result = new StringBuffer(); 2900 m.find(); 2901 m.appendReplacement(result, "$1w\\$2w$3"); 2902 if (!result.toString().equals("zzzabw$2wef")) 2903 failCount++; 2904 2905 m.appendTail(result); 2906 if (!result.toString().equals("zzzabw$2wefzzz")) 2907 failCount++; 2908 2909 // Check to make sure a reference to nonexistent group causes error 2910 blah = "zzzabcdcdefzzz"; 2911 p = Pattern.compile("(ab)(cd)*(ef)"); 2912 m = p.matcher(blah); 2913 result = new StringBuffer(); 2914 m.find(); 2915 try { 2916 m.appendReplacement(result, "$1w$5w$3"); 2917 failCount++; 2918 } catch (IndexOutOfBoundsException ioobe) { 2919 // Correct result 2920 } 2921 2922 // Check double digit group references 2923 blah = "zzz123456789101112zzz"; 2924 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 2925 m = p.matcher(blah); 2926 result = new StringBuffer(); 2927 m.find(); 2928 m.appendReplacement(result, "$1w$11w$3"); 2929 if (!result.toString().equals("zzz1w11w3")) 2930 failCount++; 2931 2932 // Check to make sure it backs off $15 to $1 if only three groups 2933 blah = "zzzabcdcdefzzz"; 2934 p = Pattern.compile("(ab)(cd)*(ef)"); 2935 m = p.matcher(blah); 2936 result = new StringBuffer(); 2937 m.find(); 2938 m.appendReplacement(result, "$1w$15w$3"); 2939 if (!result.toString().equals("zzzabwab5wef")) 2940 failCount++; 2941 2942 2943 // Supplementary character test 2944 // SB substitution with literal 2945 blah = toSupplementaries("zzzblahzzz"); 2946 p = Pattern.compile(toSupplementaries("blah")); 2947 m = p.matcher(blah); 2948 result = new StringBuffer(); 2949 try { 2950 m.appendReplacement(result, toSupplementaries("blech")); 2951 failCount++; 2952 } catch (IllegalStateException e) { 2953 } 2954 m.find(); 2955 m.appendReplacement(result, toSupplementaries("blech")); 2956 if (!result.toString().equals(toSupplementaries("zzzblech"))) 2957 failCount++; 2958 2959 m.appendTail(result); 2960 if (!result.toString().equals(toSupplementaries("zzzblechzzz"))) 2961 failCount++; 2962 2963 // SB substitution with groups 2964 blah = toSupplementaries("zzzabcdzzz"); 2965 p = Pattern.compile(toSupplementaries("(ab)(cd)*")); 2966 m = p.matcher(blah); 2967 result = new StringBuffer(); 2968 try { 2969 m.appendReplacement(result, "$1"); 2970 failCount++; 2971 } catch (IllegalStateException e) { 2972 } 2973 m.find(); 2974 m.appendReplacement(result, "$1"); 2975 if (!result.toString().equals(toSupplementaries("zzzab"))) 2976 failCount++; 2977 2978 m.appendTail(result); 2979 if (!result.toString().equals(toSupplementaries("zzzabzzz"))) 2980 failCount++; 2981 2982 // SB substitution with 3 groups 2983 blah = toSupplementaries("zzzabcdcdefzzz"); 2984 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 2985 m = p.matcher(blah); 2986 result = new StringBuffer(); 2987 try { 2988 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 2989 failCount++; 2990 } catch (IllegalStateException e) { 2991 } 2992 m.find(); 2993 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 2994 if (!result.toString().equals(toSupplementaries("zzzabwcdwef"))) 2995 failCount++; 2996 2997 m.appendTail(result); 2998 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz"))) 2999 failCount++; 3000 3001 // SB substitution with groups and three matches 3002 // skipping middle match 3003 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz"); 3004 p = Pattern.compile(toSupplementaries("(ab)(cd*)")); 3005 m = p.matcher(blah); 3006 result = new StringBuffer(); 3007 try { 3008 m.appendReplacement(result, "$1"); 3009 failCount++; 3010 } catch (IllegalStateException e) { 3011 } 3012 m.find(); 3013 m.appendReplacement(result, "$1"); 3014 if (!result.toString().equals(toSupplementaries("zzzab"))) 3015 failCount++; 3016 3017 m.find(); 3018 m.find(); 3019 m.appendReplacement(result, "$2"); 3020 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd"))) 3021 failCount++; 3022 3023 m.appendTail(result); 3024 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz"))) 3025 failCount++; 3026 3027 // Check to make sure escaped $ is ignored 3028 blah = toSupplementaries("zzzabcdcdefzzz"); 3029 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3030 m = p.matcher(blah); 3031 result = new StringBuffer(); 3032 m.find(); 3033 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3")); 3034 if (!result.toString().equals(toSupplementaries("zzzabw$2wef"))) 3035 failCount++; 3036 3037 m.appendTail(result); 3038 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz"))) 3039 failCount++; 3040 3041 // Check to make sure a reference to nonexistent group causes error 3042 blah = toSupplementaries("zzzabcdcdefzzz"); 3043 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3044 m = p.matcher(blah); 3045 result = new StringBuffer(); 3046 m.find(); 3047 try { 3048 m.appendReplacement(result, toSupplementaries("$1w$5w$3")); 3049 failCount++; 3050 } catch (IndexOutOfBoundsException ioobe) { 3051 // Correct result 3052 } 3053 3054 // Check double digit group references 3055 blah = toSupplementaries("zzz123456789101112zzz"); 3056 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3057 m = p.matcher(blah); 3058 result = new StringBuffer(); 3059 m.find(); 3060 m.appendReplacement(result, toSupplementaries("$1w$11w$3")); 3061 if (!result.toString().equals(toSupplementaries("zzz1w11w3"))) 3062 failCount++; 3063 3064 // Check to make sure it backs off $15 to $1 if only three groups 3065 blah = toSupplementaries("zzzabcdcdefzzz"); 3066 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3067 m = p.matcher(blah); 3068 result = new StringBuffer(); 3069 m.find(); 3070 m.appendReplacement(result, toSupplementaries("$1w$15w$3")); 3071 if (!result.toString().equals(toSupplementaries("zzzabwab5wef"))) 3072 failCount++; 3073 3074 // Check nothing has been appended into the output buffer if 3075 // the replacement string triggers IllegalArgumentException. 3076 p = Pattern.compile("(abc)"); 3077 m = p.matcher("abcd"); 3078 result = new StringBuffer(); 3079 m.find(); 3080 try { 3081 m.appendReplacement(result, ("xyz$g")); 3082 failCount++; 3083 } catch (IllegalArgumentException iae) { 3084 if (result.length() != 0) 3085 failCount++; 3086 } 3087 3088 report("SB Substitution"); 3089 } 3090 3091 /** 3092 * Tests the usage of Matcher.appendReplacement() with literal 3093 * and group substitutions. 3094 */ 3095 private static void stringbuilderSubstitute() throws Exception { 3096 // SB substitution with literal 3097 String blah = "zzzblahzzz"; 3098 Pattern p = Pattern.compile("blah"); 3099 Matcher m = p.matcher(blah); 3100 StringBuilder result = new StringBuilder(); 3101 try { 3102 m.appendReplacement(result, "blech"); 3103 failCount++; 3104 } catch (IllegalStateException e) { 3105 } 3106 m.find(); 3107 m.appendReplacement(result, "blech"); 3108 if (!result.toString().equals("zzzblech")) 3109 failCount++; 3110 3111 m.appendTail(result); 3112 if (!result.toString().equals("zzzblechzzz")) 3113 failCount++; 3114 3115 // SB substitution with groups 3116 blah = "zzzabcdzzz"; 3117 p = Pattern.compile("(ab)(cd)*"); 3118 m = p.matcher(blah); 3119 result = new StringBuilder(); 3120 try { 3121 m.appendReplacement(result, "$1"); 3122 failCount++; 3123 } catch (IllegalStateException e) { 3124 } 3125 m.find(); 3126 m.appendReplacement(result, "$1"); 3127 if (!result.toString().equals("zzzab")) 3128 failCount++; 3129 3130 m.appendTail(result); 3131 if (!result.toString().equals("zzzabzzz")) 3132 failCount++; 3133 3134 // SB substitution with 3 groups 3135 blah = "zzzabcdcdefzzz"; 3136 p = Pattern.compile("(ab)(cd)*(ef)"); 3137 m = p.matcher(blah); 3138 result = new StringBuilder(); 3139 try { 3140 m.appendReplacement(result, "$1w$2w$3"); 3141 failCount++; 3142 } catch (IllegalStateException e) { 3143 } 3144 m.find(); 3145 m.appendReplacement(result, "$1w$2w$3"); 3146 if (!result.toString().equals("zzzabwcdwef")) 3147 failCount++; 3148 3149 m.appendTail(result); 3150 if (!result.toString().equals("zzzabwcdwefzzz")) 3151 failCount++; 3152 3153 // SB substitution with groups and three matches 3154 // skipping middle match 3155 blah = "zzzabcdzzzabcddzzzabcdzzz"; 3156 p = Pattern.compile("(ab)(cd*)"); 3157 m = p.matcher(blah); 3158 result = new StringBuilder(); 3159 try { 3160 m.appendReplacement(result, "$1"); 3161 failCount++; 3162 } catch (IllegalStateException e) { 3163 } 3164 m.find(); 3165 m.appendReplacement(result, "$1"); 3166 if (!result.toString().equals("zzzab")) 3167 failCount++; 3168 3169 m.find(); 3170 m.find(); 3171 m.appendReplacement(result, "$2"); 3172 if (!result.toString().equals("zzzabzzzabcddzzzcd")) 3173 failCount++; 3174 3175 m.appendTail(result); 3176 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz")) 3177 failCount++; 3178 3179 // Check to make sure escaped $ is ignored 3180 blah = "zzzabcdcdefzzz"; 3181 p = Pattern.compile("(ab)(cd)*(ef)"); 3182 m = p.matcher(blah); 3183 result = new StringBuilder(); 3184 m.find(); 3185 m.appendReplacement(result, "$1w\\$2w$3"); 3186 if (!result.toString().equals("zzzabw$2wef")) 3187 failCount++; 3188 3189 m.appendTail(result); 3190 if (!result.toString().equals("zzzabw$2wefzzz")) 3191 failCount++; 3192 3193 // Check to make sure a reference to nonexistent group causes error 3194 blah = "zzzabcdcdefzzz"; 3195 p = Pattern.compile("(ab)(cd)*(ef)"); 3196 m = p.matcher(blah); 3197 result = new StringBuilder(); 3198 m.find(); 3199 try { 3200 m.appendReplacement(result, "$1w$5w$3"); 3201 failCount++; 3202 } catch (IndexOutOfBoundsException ioobe) { 3203 // Correct result 3204 } 3205 3206 // Check double digit group references 3207 blah = "zzz123456789101112zzz"; 3208 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3209 m = p.matcher(blah); 3210 result = new StringBuilder(); 3211 m.find(); 3212 m.appendReplacement(result, "$1w$11w$3"); 3213 if (!result.toString().equals("zzz1w11w3")) 3214 failCount++; 3215 3216 // Check to make sure it backs off $15 to $1 if only three groups 3217 blah = "zzzabcdcdefzzz"; 3218 p = Pattern.compile("(ab)(cd)*(ef)"); 3219 m = p.matcher(blah); 3220 result = new StringBuilder(); 3221 m.find(); 3222 m.appendReplacement(result, "$1w$15w$3"); 3223 if (!result.toString().equals("zzzabwab5wef")) 3224 failCount++; 3225 3226 3227 // Supplementary character test 3228 // SB substitution with literal 3229 blah = toSupplementaries("zzzblahzzz"); 3230 p = Pattern.compile(toSupplementaries("blah")); 3231 m = p.matcher(blah); 3232 result = new StringBuilder(); 3233 try { 3234 m.appendReplacement(result, toSupplementaries("blech")); 3235 failCount++; 3236 } catch (IllegalStateException e) { 3237 } 3238 m.find(); 3239 m.appendReplacement(result, toSupplementaries("blech")); 3240 if (!result.toString().equals(toSupplementaries("zzzblech"))) 3241 failCount++; 3242 m.appendTail(result); 3243 if (!result.toString().equals(toSupplementaries("zzzblechzzz"))) 3244 failCount++; 3245 3246 // SB substitution with groups 3247 blah = toSupplementaries("zzzabcdzzz"); 3248 p = Pattern.compile(toSupplementaries("(ab)(cd)*")); 3249 m = p.matcher(blah); 3250 result = new StringBuilder(); 3251 try { 3252 m.appendReplacement(result, "$1"); 3253 failCount++; 3254 } catch (IllegalStateException e) { 3255 } 3256 m.find(); 3257 m.appendReplacement(result, "$1"); 3258 if (!result.toString().equals(toSupplementaries("zzzab"))) 3259 failCount++; 3260 3261 m.appendTail(result); 3262 if (!result.toString().equals(toSupplementaries("zzzabzzz"))) 3263 failCount++; 3264 3265 // SB substitution with 3 groups 3266 blah = toSupplementaries("zzzabcdcdefzzz"); 3267 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3268 m = p.matcher(blah); 3269 result = new StringBuilder(); 3270 try { 3271 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3272 failCount++; 3273 } catch (IllegalStateException e) { 3274 } 3275 m.find(); 3276 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3277 if (!result.toString().equals(toSupplementaries("zzzabwcdwef"))) 3278 failCount++; 3279 3280 m.appendTail(result); 3281 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz"))) 3282 failCount++; 3283 3284 // SB substitution with groups and three matches 3285 // skipping middle match 3286 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz"); 3287 p = Pattern.compile(toSupplementaries("(ab)(cd*)")); 3288 m = p.matcher(blah); 3289 result = new StringBuilder(); 3290 try { 3291 m.appendReplacement(result, "$1"); 3292 failCount++; 3293 } catch (IllegalStateException e) { 3294 } 3295 m.find(); 3296 m.appendReplacement(result, "$1"); 3297 if (!result.toString().equals(toSupplementaries("zzzab"))) 3298 failCount++; 3299 3300 m.find(); 3301 m.find(); 3302 m.appendReplacement(result, "$2"); 3303 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd"))) 3304 failCount++; 3305 3306 m.appendTail(result); 3307 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz"))) 3308 failCount++; 3309 3310 // Check to make sure escaped $ is ignored 3311 blah = toSupplementaries("zzzabcdcdefzzz"); 3312 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3313 m = p.matcher(blah); 3314 result = new StringBuilder(); 3315 m.find(); 3316 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3")); 3317 if (!result.toString().equals(toSupplementaries("zzzabw$2wef"))) 3318 failCount++; 3319 3320 m.appendTail(result); 3321 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz"))) 3322 failCount++; 3323 3324 // Check to make sure a reference to nonexistent group causes error 3325 blah = toSupplementaries("zzzabcdcdefzzz"); 3326 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3327 m = p.matcher(blah); 3328 result = new StringBuilder(); 3329 m.find(); 3330 try { 3331 m.appendReplacement(result, toSupplementaries("$1w$5w$3")); 3332 failCount++; 3333 } catch (IndexOutOfBoundsException ioobe) { 3334 // Correct result 3335 } 3336 // Check double digit group references 3337 blah = toSupplementaries("zzz123456789101112zzz"); 3338 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3339 m = p.matcher(blah); 3340 result = new StringBuilder(); 3341 m.find(); 3342 m.appendReplacement(result, toSupplementaries("$1w$11w$3")); 3343 if (!result.toString().equals(toSupplementaries("zzz1w11w3"))) 3344 failCount++; 3345 3346 // Check to make sure it backs off $15 to $1 if only three groups 3347 blah = toSupplementaries("zzzabcdcdefzzz"); 3348 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3349 m = p.matcher(blah); 3350 result = new StringBuilder(); 3351 m.find(); 3352 m.appendReplacement(result, toSupplementaries("$1w$15w$3")); 3353 if (!result.toString().equals(toSupplementaries("zzzabwab5wef"))) 3354 failCount++; 3355 // Check nothing has been appended into the output buffer if 3356 // the replacement string triggers IllegalArgumentException. 3357 p = Pattern.compile("(abc)"); 3358 m = p.matcher("abcd"); 3359 result = new StringBuilder(); 3360 m.find(); 3361 try { 3362 m.appendReplacement(result, ("xyz$g")); 3363 failCount++; 3364 } catch (IllegalArgumentException iae) { 3365 if (result.length() != 0) 3366 failCount++; 3367 } 3368 report("SB Substitution 2"); 3369 } 3370 3371 /* 3372 * 5 groups of characters are created to make a substitution string. 3373 * A base string will be created including random lead chars, the 3374 * substitution string, and random trailing chars. 3375 * A pattern containing the 5 groups is searched for and replaced with: 3376 * random group + random string + random group. 3377 * The results are checked for correctness. 3378 */ 3379 private static void substitutionBasher() { 3380 for (int runs = 0; runs<1000; runs++) { 3381 // Create a base string to work in 3382 int leadingChars = generator.nextInt(10); 3383 StringBuffer baseBuffer = new StringBuffer(100); 3384 String leadingString = getRandomAlphaString(leadingChars); 3385 baseBuffer.append(leadingString); 3386 3387 // Create 5 groups of random number of random chars 3388 // Create the string to substitute 3389 // Create the pattern string to search for 3390 StringBuffer bufferToSub = new StringBuffer(25); 3391 StringBuffer bufferToPat = new StringBuffer(50); 3392 String[] groups = new String[5]; 3393 for(int i=0; i<5; i++) { 3394 int aGroupSize = generator.nextInt(5)+1; 3395 groups[i] = getRandomAlphaString(aGroupSize); 3396 bufferToSub.append(groups[i]); 3397 bufferToPat.append('('); 3398 bufferToPat.append(groups[i]); 3399 bufferToPat.append(')'); 3400 } 3401 String stringToSub = bufferToSub.toString(); 3402 String pattern = bufferToPat.toString(); 3403 3404 // Place sub string into working string at random index 3405 baseBuffer.append(stringToSub); 3406 3407 // Append random chars to end 3408 int trailingChars = generator.nextInt(10); 3409 String trailingString = getRandomAlphaString(trailingChars); 3410 baseBuffer.append(trailingString); 3411 String baseString = baseBuffer.toString(); 3412 3413 // Create test pattern and matcher 3414 Pattern p = Pattern.compile(pattern); 3415 Matcher m = p.matcher(baseString); 3416 3417 // Reject candidate if pattern happens to start early 3418 m.find(); 3419 if (m.start() < leadingChars) 3420 continue; 3421 3422 // Reject candidate if more than one match 3423 if (m.find()) 3424 continue; 3425 3426 // Construct a replacement string with : 3427 // random group + random string + random group 3428 StringBuffer bufferToRep = new StringBuffer(); 3429 int groupIndex1 = generator.nextInt(5); 3430 bufferToRep.append("$" + (groupIndex1 + 1)); 3431 String randomMidString = getRandomAlphaString(5); 3432 bufferToRep.append(randomMidString); 3433 int groupIndex2 = generator.nextInt(5); 3434 bufferToRep.append("$" + (groupIndex2 + 1)); 3435 String replacement = bufferToRep.toString(); 3436 3437 // Do the replacement 3438 String result = m.replaceAll(replacement); 3439 3440 // Construct expected result 3441 StringBuffer bufferToRes = new StringBuffer(); 3442 bufferToRes.append(leadingString); 3443 bufferToRes.append(groups[groupIndex1]); 3444 bufferToRes.append(randomMidString); 3445 bufferToRes.append(groups[groupIndex2]); 3446 bufferToRes.append(trailingString); 3447 String expectedResult = bufferToRes.toString(); 3448 3449 // Check results 3450 if (!result.equals(expectedResult)) 3451 failCount++; 3452 } 3453 3454 report("Substitution Basher"); 3455 } 3456 3457 /* 3458 * 5 groups of characters are created to make a substitution string. 3459 * A base string will be created including random lead chars, the 3460 * substitution string, and random trailing chars. 3461 * A pattern containing the 5 groups is searched for and replaced with: 3462 * random group + random string + random group. 3463 * The results are checked for correctness. 3464 */ 3465 private static void substitutionBasher2() { 3466 for (int runs = 0; runs<1000; runs++) { 3467 // Create a base string to work in 3468 int leadingChars = generator.nextInt(10); 3469 StringBuilder baseBuffer = new StringBuilder(100); 3470 String leadingString = getRandomAlphaString(leadingChars); 3471 baseBuffer.append(leadingString); 3472 3473 // Create 5 groups of random number of random chars 3474 // Create the string to substitute 3475 // Create the pattern string to search for 3476 StringBuilder bufferToSub = new StringBuilder(25); 3477 StringBuilder bufferToPat = new StringBuilder(50); 3478 String[] groups = new String[5]; 3479 for(int i=0; i<5; i++) { 3480 int aGroupSize = generator.nextInt(5)+1; 3481 groups[i] = getRandomAlphaString(aGroupSize); 3482 bufferToSub.append(groups[i]); 3483 bufferToPat.append('('); 3484 bufferToPat.append(groups[i]); 3485 bufferToPat.append(')'); 3486 } 3487 String stringToSub = bufferToSub.toString(); 3488 String pattern = bufferToPat.toString(); 3489 3490 // Place sub string into working string at random index 3491 baseBuffer.append(stringToSub); 3492 3493 // Append random chars to end 3494 int trailingChars = generator.nextInt(10); 3495 String trailingString = getRandomAlphaString(trailingChars); 3496 baseBuffer.append(trailingString); 3497 String baseString = baseBuffer.toString(); 3498 3499 // Create test pattern and matcher 3500 Pattern p = Pattern.compile(pattern); 3501 Matcher m = p.matcher(baseString); 3502 3503 // Reject candidate if pattern happens to start early 3504 m.find(); 3505 if (m.start() < leadingChars) 3506 continue; 3507 3508 // Reject candidate if more than one match 3509 if (m.find()) 3510 continue; 3511 3512 // Construct a replacement string with : 3513 // random group + random string + random group 3514 StringBuilder bufferToRep = new StringBuilder(); 3515 int groupIndex1 = generator.nextInt(5); 3516 bufferToRep.append("$" + (groupIndex1 + 1)); 3517 String randomMidString = getRandomAlphaString(5); 3518 bufferToRep.append(randomMidString); 3519 int groupIndex2 = generator.nextInt(5); 3520 bufferToRep.append("$" + (groupIndex2 + 1)); 3521 String replacement = bufferToRep.toString(); 3522 3523 // Do the replacement 3524 String result = m.replaceAll(replacement); 3525 3526 // Construct expected result 3527 StringBuilder bufferToRes = new StringBuilder(); 3528 bufferToRes.append(leadingString); 3529 bufferToRes.append(groups[groupIndex1]); 3530 bufferToRes.append(randomMidString); 3531 bufferToRes.append(groups[groupIndex2]); 3532 bufferToRes.append(trailingString); 3533 String expectedResult = bufferToRes.toString(); 3534 3535 // Check results 3536 if (!result.equals(expectedResult)) { 3537 failCount++; 3538 } 3539 } 3540 3541 report("Substitution Basher 2"); 3542 } 3543 3544 /** 3545 * Checks the handling of some escape sequences that the Pattern 3546 * class should process instead of the java compiler. These are 3547 * not in the file because the escapes should be be processed 3548 * by the Pattern class when the regex is compiled. 3549 */ 3550 private static void escapes() throws Exception { 3551 Pattern p = Pattern.compile("\\043"); 3552 Matcher m = p.matcher("#"); 3553 if (!m.find()) 3554 failCount++; 3555 3556 p = Pattern.compile("\\x23"); 3557 m = p.matcher("#"); 3558 if (!m.find()) 3559 failCount++; 3560 3561 p = Pattern.compile("\\u0023"); 3562 m = p.matcher("#"); 3563 if (!m.find()) 3564 failCount++; 3565 3566 report("Escape sequences"); 3567 } 3568 3569 /** 3570 * Checks the handling of blank input situations. These 3571 * tests are incompatible with my test file format. 3572 */ 3573 private static void blankInput() throws Exception { 3574 Pattern p = Pattern.compile("abc", Pattern.CASE_INSENSITIVE); 3575 Matcher m = p.matcher(""); 3576 if (m.find()) 3577 failCount++; 3578 3579 p = Pattern.compile("a*", Pattern.CASE_INSENSITIVE); 3580 m = p.matcher(""); 3581 if (!m.find()) 3582 failCount++; 3583 3584 p = Pattern.compile("abc"); 3585 m = p.matcher(""); 3586 if (m.find()) 3587 failCount++; 3588 3589 p = Pattern.compile("a*"); 3590 m = p.matcher(""); 3591 if (!m.find()) 3592 failCount++; 3593 3594 report("Blank input"); 3595 } 3596 3597 /** 3598 * Tests the Boyer-Moore pattern matching of a character sequence 3599 * on randomly generated patterns. 3600 */ 3601 private static void bm() throws Exception { 3602 doBnM('a'); 3603 report("Boyer Moore (ASCII)"); 3604 3605 doBnM(Character.MIN_SUPPLEMENTARY_CODE_POINT - 10); 3606 report("Boyer Moore (Supplementary)"); 3607 } 3608 3609 private static void doBnM(int baseCharacter) throws Exception { 3610 int achar=0; 3611 3612 for (int i=0; i<100; i++) { 3613 // Create a short pattern to search for 3614 int patternLength = generator.nextInt(7) + 4; 3615 StringBuffer patternBuffer = new StringBuffer(patternLength); 3616 String pattern; 3617 retry: for (;;) { 3618 for (int x=0; x<patternLength; x++) { 3619 int ch = baseCharacter + generator.nextInt(26); 3620 if (Character.isSupplementaryCodePoint(ch)) { 3621 patternBuffer.append(Character.toChars(ch)); 3622 } else { 3623 patternBuffer.append((char)ch); 3624 } 3625 } 3626 pattern = patternBuffer.toString(); 3627 3628 // Avoid patterns that start and end with the same substring 3629 // See JDK-6854417 3630 for (int x=1; x < pattern.length(); x++) { 3631 if (pattern.startsWith(pattern.substring(x))) 3632 continue retry; 3633 } 3634 break; 3635 } 3636 Pattern p = Pattern.compile(pattern); 3637 3638 // Create a buffer with random ASCII chars that does 3639 // not match the sample 3640 String toSearch = null; 3641 StringBuffer s = null; 3642 Matcher m = p.matcher(""); 3643 do { 3644 s = new StringBuffer(100); 3645 for (int x=0; x<100; x++) { 3646 int ch = baseCharacter + generator.nextInt(26); 3647 if (Character.isSupplementaryCodePoint(ch)) { 3648 s.append(Character.toChars(ch)); 3649 } else { 3650 s.append((char)ch); 3651 } 3652 } 3653 toSearch = s.toString(); 3654 m.reset(toSearch); 3655 } while (m.find()); 3656 3657 // Insert the pattern at a random spot 3658 int insertIndex = generator.nextInt(99); 3659 if (Character.isLowSurrogate(s.charAt(insertIndex))) 3660 insertIndex++; 3661 s = s.insert(insertIndex, pattern); 3662 toSearch = s.toString(); 3663 3664 // Make sure that the pattern is found 3665 m.reset(toSearch); 3666 if (!m.find()) 3667 failCount++; 3668 3669 // Make sure that the match text is the pattern 3670 if (!m.group().equals(pattern)) 3671 failCount++; 3672 3673 // Make sure match occured at insertion point 3674 if (m.start() != insertIndex) 3675 failCount++; 3676 } 3677 } 3678 3679 /** 3680 * Tests the matching of slices on randomly generated patterns. 3681 * The Boyer-Moore optimization is not done on these patterns 3682 * because it uses unicode case folding. 3683 */ 3684 private static void slice() throws Exception { 3685 doSlice(Character.MAX_VALUE); 3686 report("Slice"); 3687 3688 doSlice(Character.MAX_CODE_POINT); 3689 report("Slice (Supplementary)"); 3690 } 3691 3692 private static void doSlice(int maxCharacter) throws Exception { 3693 Random generator = new Random(); 3694 int achar=0; 3695 3696 for (int i=0; i<100; i++) { 3697 // Create a short pattern to search for 3698 int patternLength = generator.nextInt(7) + 4; 3699 StringBuffer patternBuffer = new StringBuffer(patternLength); 3700 for (int x=0; x<patternLength; x++) { 3701 int randomChar = 0; 3702 while (!Character.isLetterOrDigit(randomChar)) 3703 randomChar = generator.nextInt(maxCharacter); 3704 if (Character.isSupplementaryCodePoint(randomChar)) { 3705 patternBuffer.append(Character.toChars(randomChar)); 3706 } else { 3707 patternBuffer.append((char) randomChar); 3708 } 3709 } 3710 String pattern = patternBuffer.toString(); 3711 Pattern p = Pattern.compile(pattern, Pattern.UNICODE_CASE); 3712 3713 // Create a buffer with random chars that does not match the sample 3714 String toSearch = null; 3715 StringBuffer s = null; 3716 Matcher m = p.matcher(""); 3717 do { 3718 s = new StringBuffer(100); 3719 for (int x=0; x<100; x++) { 3720 int randomChar = 0; 3721 while (!Character.isLetterOrDigit(randomChar)) 3722 randomChar = generator.nextInt(maxCharacter); 3723 if (Character.isSupplementaryCodePoint(randomChar)) { 3724 s.append(Character.toChars(randomChar)); 3725 } else { 3726 s.append((char) randomChar); 3727 } 3728 } 3729 toSearch = s.toString(); 3730 m.reset(toSearch); 3731 } while (m.find()); 3732 3733 // Insert the pattern at a random spot 3734 int insertIndex = generator.nextInt(99); 3735 if (Character.isLowSurrogate(s.charAt(insertIndex))) 3736 insertIndex++; 3737 s = s.insert(insertIndex, pattern); 3738 toSearch = s.toString(); 3739 3740 // Make sure that the pattern is found 3741 m.reset(toSearch); 3742 if (!m.find()) 3743 failCount++; 3744 3745 // Make sure that the match text is the pattern 3746 if (!m.group().equals(pattern)) 3747 failCount++; 3748 3749 // Make sure match occured at insertion point 3750 if (m.start() != insertIndex) 3751 failCount++; 3752 } 3753 } 3754 3755 private static void explainFailure(String pattern, String data, 3756 String expected, String actual) { 3757 System.err.println("----------------------------------------"); 3758 System.err.println("Pattern = "+pattern); 3759 System.err.println("Data = "+data); 3760 System.err.println("Expected = " + expected); 3761 System.err.println("Actual = " + actual); 3762 } 3763 3764 private static void explainFailure(String pattern, String data, 3765 Throwable t) { 3766 System.err.println("----------------------------------------"); 3767 System.err.println("Pattern = "+pattern); 3768 System.err.println("Data = "+data); 3769 t.printStackTrace(System.err); 3770 } 3771 3772 // Testing examples from a file 3773 3774 /** 3775 * Goes through the file "TestCases.txt" and creates many patterns 3776 * described in the file, matching the patterns against input lines in 3777 * the file, and comparing the results against the correct results 3778 * also found in the file. The file format is described in comments 3779 * at the head of the file. 3780 */ 3781 private static void processFile(String fileName) throws Exception { 3782 File testCases = new File(System.getProperty("test.src", "."), 3783 fileName); 3784 FileInputStream in = new FileInputStream(testCases); 3785 BufferedReader r = new BufferedReader(new InputStreamReader(in)); 3786 3787 // Process next test case. 3788 String aLine; 3789 while((aLine = r.readLine()) != null) { 3790 // Read a line for pattern 3791 String patternString = grabLine(r); 3792 Pattern p = null; 3793 try { 3794 p = compileTestPattern(patternString); 3795 } catch (PatternSyntaxException e) { 3796 String dataString = grabLine(r); 3797 String expectedResult = grabLine(r); 3798 if (expectedResult.startsWith("error")) 3799 continue; 3800 explainFailure(patternString, dataString, e); 3801 failCount++; 3802 continue; 3803 } 3804 3805 // Read a line for input string 3806 String dataString = grabLine(r); 3807 Matcher m = p.matcher(dataString); 3808 StringBuffer result = new StringBuffer(); 3809 3810 // Check for IllegalStateExceptions before a match 3811 failCount += preMatchInvariants(m); 3812 3813 boolean found = m.find(); 3814 3815 if (found) 3816 failCount += postTrueMatchInvariants(m); 3817 else 3818 failCount += postFalseMatchInvariants(m); 3819 3820 if (found) { 3821 result.append("true "); 3822 result.append(m.group(0) + " "); 3823 } else { 3824 result.append("false "); 3825 } 3826 3827 result.append(m.groupCount()); 3828 3829 if (found) { 3830 for (int i=1; i<m.groupCount()+1; i++) 3831 if (m.group(i) != null) 3832 result.append(" " +m.group(i)); 3833 } 3834 3835 // Read a line for the expected result 3836 String expectedResult = grabLine(r); 3837 3838 if (!result.toString().equals(expectedResult)) { 3839 explainFailure(patternString, dataString, expectedResult, result.toString()); 3840 failCount++; 3841 } 3842 } 3843 3844 report(fileName); 3845 } 3846 3847 private static int preMatchInvariants(Matcher m) { 3848 int failCount = 0; 3849 try { 3850 m.start(); 3851 failCount++; 3852 } catch (IllegalStateException ise) {} 3853 try { 3854 m.end(); 3855 failCount++; 3856 } catch (IllegalStateException ise) {} 3857 try { 3858 m.group(); 3859 failCount++; 3860 } catch (IllegalStateException ise) {} 3861 return failCount; 3862 } 3863 3864 private static int postFalseMatchInvariants(Matcher m) { 3865 int failCount = 0; 3866 try { 3867 m.group(); 3868 failCount++; 3869 } catch (IllegalStateException ise) {} 3870 try { 3871 m.start(); 3872 failCount++; 3873 } catch (IllegalStateException ise) {} 3874 try { 3875 m.end(); 3876 failCount++; 3877 } catch (IllegalStateException ise) {} 3878 return failCount; 3879 } 3880 3881 private static int postTrueMatchInvariants(Matcher m) { 3882 int failCount = 0; 3883 //assert(m.start() = m.start(0); 3884 if (m.start() != m.start(0)) 3885 failCount++; 3886 //assert(m.end() = m.end(0); 3887 if (m.start() != m.start(0)) 3888 failCount++; 3889 //assert(m.group() = m.group(0); 3890 if (!m.group().equals(m.group(0))) 3891 failCount++; 3892 try { 3893 m.group(50); 3894 failCount++; 3895 } catch (IndexOutOfBoundsException ise) {} 3896 3897 return failCount; 3898 } 3899 3900 private static Pattern compileTestPattern(String patternString) { 3901 if (!patternString.startsWith("'")) { 3902 return Pattern.compile(patternString); 3903 } 3904 int break1 = patternString.lastIndexOf("'"); 3905 String flagString = patternString.substring( 3906 break1+1, patternString.length()); 3907 patternString = patternString.substring(1, break1); 3908 3909 if (flagString.equals("i")) 3910 return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE); 3911 3912 if (flagString.equals("m")) 3913 return Pattern.compile(patternString, Pattern.MULTILINE); 3914 3915 return Pattern.compile(patternString); 3916 } 3917 3918 /** 3919 * Reads a line from the input file. Keeps reading lines until a non 3920 * empty non comment line is read. If the line contains a \n then 3921 * these two characters are replaced by a newline char. If a \\uxxxx 3922 * sequence is read then the sequence is replaced by the unicode char. 3923 */ 3924 private static String grabLine(BufferedReader r) throws Exception { 3925 int index = 0; 3926 String line = r.readLine(); 3927 while (line.startsWith("//") || line.length() < 1) 3928 line = r.readLine(); 3929 while ((index = line.indexOf("\\n")) != -1) { 3930 StringBuffer temp = new StringBuffer(line); 3931 temp.replace(index, index+2, "\n"); 3932 line = temp.toString(); 3933 } 3934 while ((index = line.indexOf("\\u")) != -1) { 3935 StringBuffer temp = new StringBuffer(line); 3936 String value = temp.substring(index+2, index+6); 3937 char aChar = (char)Integer.parseInt(value, 16); 3938 String unicodeChar = "" + aChar; 3939 temp.replace(index, index+6, unicodeChar); 3940 line = temp.toString(); 3941 } 3942 3943 return line; 3944 } 3945 3946 private static void check(Pattern p, String s, String g, String expected) { 3947 Matcher m = p.matcher(s); 3948 m.find(); 3949 if (!m.group(g).equals(expected) || 3950 s.charAt(m.start(g)) != expected.charAt(0) || 3951 s.charAt(m.end(g) - 1) != expected.charAt(expected.length() - 1)) 3952 failCount++; 3953 } 3954 3955 private static void checkReplaceFirst(String p, String s, String r, String expected) 3956 { 3957 if (!expected.equals(Pattern.compile(p) 3958 .matcher(s) 3959 .replaceFirst(r))) 3960 failCount++; 3961 } 3962 3963 private static void checkReplaceAll(String p, String s, String r, String expected) 3964 { 3965 if (!expected.equals(Pattern.compile(p) 3966 .matcher(s) 3967 .replaceAll(r))) 3968 failCount++; 3969 } 3970 3971 private static void checkExpectedFail(String p) { 3972 try { 3973 Pattern.compile(p); 3974 } catch (PatternSyntaxException pse) { 3975 //pse.printStackTrace(); 3976 return; 3977 } 3978 failCount++; 3979 } 3980 3981 private static void checkExpectedIAE(Matcher m, String g) { 3982 m.find(); 3983 try { 3984 m.group(g); 3985 } catch (IllegalArgumentException x) { 3986 //iae.printStackTrace(); 3987 try { 3988 m.start(g); 3989 } catch (IllegalArgumentException xx) { 3990 try { 3991 m.start(g); 3992 } catch (IllegalArgumentException xxx) { 3993 return; 3994 } 3995 } 3996 } 3997 failCount++; 3998 } 3999 4000 private static void checkExpectedNPE(Matcher m) { 4001 m.find(); 4002 try { 4003 m.group(null); 4004 } catch (NullPointerException x) { 4005 try { 4006 m.start(null); 4007 } catch (NullPointerException xx) { 4008 try { 4009 m.end(null); 4010 } catch (NullPointerException xxx) { 4011 return; 4012 } 4013 } 4014 } 4015 failCount++; 4016 } 4017 4018 private static void namedGroupCaptureTest() throws Exception { 4019 check(Pattern.compile("x+(?<gname>y+)z+"), 4020 "xxxyyyzzz", 4021 "gname", 4022 "yyy"); 4023 4024 check(Pattern.compile("x+(?<gname8>y+)z+"), 4025 "xxxyyyzzz", 4026 "gname8", 4027 "yyy"); 4028 4029 //backref 4030 Pattern pattern = Pattern.compile("(a*)bc\\1"); 4031 check(pattern, "zzzaabcazzz", true); // found "abca" 4032 4033 check(Pattern.compile("(?<gname>a*)bc\\k<gname>"), 4034 "zzzaabcaazzz", true); 4035 4036 check(Pattern.compile("(?<gname>abc)(def)\\k<gname>"), 4037 "abcdefabc", true); 4038 4039 check(Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(?<gname>k)\\k<gname>"), 4040 "abcdefghijkk", true); 4041 4042 // Supplementary character tests 4043 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"), 4044 toSupplementaries("zzzaabcazzz"), true); 4045 4046 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"), 4047 toSupplementaries("zzzaabcaazzz"), true); 4048 4049 check(Pattern.compile("(?<gname>" + toSupplementaries("abc)(def)") + "\\k<gname>"), 4050 toSupplementaries("abcdefabc"), true); 4051 4052 check(Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)") + 4053 "(?<gname>" + 4054 toSupplementaries("k)") + "\\k<gname>"), 4055 toSupplementaries("abcdefghijkk"), true); 4056 4057 check(Pattern.compile("x+(?<gname>y+)z+\\k<gname>"), 4058 "xxxyyyzzzyyy", 4059 "gname", 4060 "yyy"); 4061 4062 //replaceFirst/All 4063 checkReplaceFirst("(?<gn>ab)(c*)", 4064 "abccczzzabcczzzabccc", 4065 "${gn}", 4066 "abzzzabcczzzabccc"); 4067 4068 checkReplaceAll("(?<gn>ab)(c*)", 4069 "abccczzzabcczzzabccc", 4070 "${gn}", 4071 "abzzzabzzzab"); 4072 4073 4074 checkReplaceFirst("(?<gn>ab)(c*)", 4075 "zzzabccczzzabcczzzabccczzz", 4076 "${gn}", 4077 "zzzabzzzabcczzzabccczzz"); 4078 4079 checkReplaceAll("(?<gn>ab)(c*)", 4080 "zzzabccczzzabcczzzabccczzz", 4081 "${gn}", 4082 "zzzabzzzabzzzabzzz"); 4083 4084 checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)", 4085 "zzzabccczzzabcczzzabccczzz", 4086 "${gn2}", 4087 "zzzccczzzabcczzzabccczzz"); 4088 4089 checkReplaceAll("(?<gn1>ab)(?<gn2>c*)", 4090 "zzzabccczzzabcczzzabccczzz", 4091 "${gn2}", 4092 "zzzccczzzcczzzccczzz"); 4093 4094 //toSupplementaries("(ab)(c*)")); 4095 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + 4096 ")(?<gn2>" + toSupplementaries("c") + "*)", 4097 toSupplementaries("abccczzzabcczzzabccc"), 4098 "${gn1}", 4099 toSupplementaries("abzzzabcczzzabccc")); 4100 4101 4102 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + 4103 ")(?<gn2>" + toSupplementaries("c") + "*)", 4104 toSupplementaries("abccczzzabcczzzabccc"), 4105 "${gn1}", 4106 toSupplementaries("abzzzabzzzab")); 4107 4108 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + 4109 ")(?<gn2>" + toSupplementaries("c") + "*)", 4110 toSupplementaries("abccczzzabcczzzabccc"), 4111 "${gn2}", 4112 toSupplementaries("ccczzzabcczzzabccc")); 4113 4114 4115 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + 4116 ")(?<gn2>" + toSupplementaries("c") + "*)", 4117 toSupplementaries("abccczzzabcczzzabccc"), 4118 "${gn2}", 4119 toSupplementaries("ccczzzcczzzccc")); 4120 4121 checkReplaceFirst("(?<dog>Dog)AndCat", 4122 "zzzDogAndCatzzzDogAndCatzzz", 4123 "${dog}", 4124 "zzzDogzzzDogAndCatzzz"); 4125 4126 4127 checkReplaceAll("(?<dog>Dog)AndCat", 4128 "zzzDogAndCatzzzDogAndCatzzz", 4129 "${dog}", 4130 "zzzDogzzzDogzzz"); 4131 4132 // backref in Matcher & String 4133 if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") || 4134 !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh")) 4135 failCount++; 4136 4137 // negative 4138 checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)"); 4139 checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)"); 4140 checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)"); 4141 checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>"); 4142 checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>"); 4143 checkExpectedIAE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"), 4144 "gnameX"); 4145 checkExpectedNPE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef")); 4146 report("NamedGroupCapture"); 4147 } 4148 4149 // This is for bug 6919132 4150 private static void nonBmpClassComplementTest() throws Exception { 4151 Pattern p = Pattern.compile("\\P{Lu}"); 4152 Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4153 4154 if (m.find() && m.start() == 1) 4155 failCount++; 4156 4157 // from a unicode category 4158 p = Pattern.compile("\\P{Lu}"); 4159 m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4160 if (m.find()) 4161 failCount++; 4162 if (!m.hitEnd()) 4163 failCount++; 4164 4165 // block 4166 p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}"); 4167 m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4168 if (m.find() && m.start() == 1) 4169 failCount++; 4170 4171 p = Pattern.compile("\\P{sc=GRANTHA}"); 4172 m = p.matcher(new String(new int[] {0x11350}, 0, 1)); 4173 if (m.find() && m.start() == 1) 4174 failCount++; 4175 4176 report("NonBmpClassComplement"); 4177 } 4178 4179 private static void unicodePropertiesTest() throws Exception { 4180 // different forms 4181 if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() || 4182 !Pattern.compile("\\p{Lu}").matcher("A").matches() || 4183 !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() || 4184 !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() || 4185 !Pattern.compile("\\p{IsLatin}").matcher("B").matches() || 4186 !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() || 4187 !Pattern.compile("\\p{script=Latin}").matcher("B").matches() || 4188 !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() || 4189 !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() || 4190 !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches()) 4191 failCount++; 4192 4193 Matcher common = Pattern.compile("\\p{script=Common}").matcher(""); 4194 Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher(""); 4195 Matcher lastSM = common; 4196 Character.UnicodeScript lastScript = Character.UnicodeScript.of(0); 4197 4198 Matcher latin = Pattern.compile("\\p{block=basic_latin}").matcher(""); 4199 Matcher greek = Pattern.compile("\\p{InGreek}").matcher(""); 4200 Matcher lastBM = latin; 4201 Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0); 4202 4203 for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) { 4204 if (cp >= 0x30000 && (cp & 0x70) == 0){ 4205 continue; // only pick couple code points, they are the same 4206 } 4207 4208 // Unicode Script 4209 Character.UnicodeScript script = Character.UnicodeScript.of(cp); 4210 Matcher m; 4211 String str = new String(Character.toChars(cp)); 4212 if (script == lastScript) { 4213 m = lastSM; 4214 m.reset(str); 4215 } else { 4216 m = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str); 4217 } 4218 if (!m.matches()) { 4219 failCount++; 4220 } 4221 Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common; 4222 other.reset(str); 4223 if (other.matches()) { 4224 failCount++; 4225 } 4226 lastSM = m; 4227 lastScript = script; 4228 4229 // Unicode Block 4230 Character.UnicodeBlock block = Character.UnicodeBlock.of(cp); 4231 if (block == null) { 4232 //System.out.printf("Not a Block: cp=%x%n", cp); 4233 continue; 4234 } 4235 if (block == lastBlock) { 4236 m = lastBM; 4237 m.reset(str); 4238 } else { 4239 m = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str); 4240 } 4241 if (!m.matches()) { 4242 failCount++; 4243 } 4244 other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin; 4245 other.reset(str); 4246 if (other.matches()) { 4247 failCount++; 4248 } 4249 lastBM = m; 4250 lastBlock = block; 4251 } 4252 report("unicodeProperties"); 4253 } 4254 4255 private static void unicodeHexNotationTest() throws Exception { 4256 4257 // negative 4258 checkExpectedFail("\\x{-23}"); 4259 checkExpectedFail("\\x{110000}"); 4260 checkExpectedFail("\\x{}"); 4261 checkExpectedFail("\\x{AB[ef]"); 4262 4263 // codepoint 4264 check("^\\x{1033c}$", "\uD800\uDF3C", true); 4265 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false); 4266 check("^\\x{D800}\\x{DF3c}+$", "\uD800\uDF3C", false); 4267 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false); 4268 4269 // in class 4270 check("^[\\x{D800}\\x{DF3c}]+$", "\uD800\uDF3C", false); 4271 check("^[\\xF0\\x90\\x8C\\xBC]+$", "\uD800\uDF3C", false); 4272 check("^[\\x{D800}\\x{DF3C}]+$", "\uD800\uDF3C", false); 4273 check("^[\\x{DF3C}\\x{D800}]+$", "\uD800\uDF3C", false); 4274 check("^[\\x{D800}\\x{DF3C}]+$", "\uDF3C\uD800", true); 4275 check("^[\\x{DF3C}\\x{D800}]+$", "\uDF3C\uD800", true); 4276 4277 for (int cp = 0; cp <= 0x10FFFF; cp++) { 4278 String s = "A" + new String(Character.toChars(cp)) + "B"; 4279 String hexUTF16 = (cp <= 0xFFFF)? String.format("\\u%04x", cp) 4280 : String.format("\\u%04x\\u%04x", 4281 (int) Character.toChars(cp)[0], 4282 (int) Character.toChars(cp)[1]); 4283 String hexCodePoint = "\\x{" + Integer.toHexString(cp) + "}"; 4284 if (!Pattern.matches("A" + hexUTF16 + "B", s)) 4285 failCount++; 4286 if (!Pattern.matches("A[" + hexUTF16 + "]B", s)) 4287 failCount++; 4288 if (!Pattern.matches("A" + hexCodePoint + "B", s)) 4289 failCount++; 4290 if (!Pattern.matches("A[" + hexCodePoint + "]B", s)) 4291 failCount++; 4292 } 4293 report("unicodeHexNotation"); 4294 } 4295 4296 private static void unicodeClassesTest() throws Exception { 4297 4298 Matcher lower = Pattern.compile("\\p{Lower}").matcher(""); 4299 Matcher upper = Pattern.compile("\\p{Upper}").matcher(""); 4300 Matcher ASCII = Pattern.compile("\\p{ASCII}").matcher(""); 4301 Matcher alpha = Pattern.compile("\\p{Alpha}").matcher(""); 4302 Matcher digit = Pattern.compile("\\p{Digit}").matcher(""); 4303 Matcher alnum = Pattern.compile("\\p{Alnum}").matcher(""); 4304 Matcher punct = Pattern.compile("\\p{Punct}").matcher(""); 4305 Matcher graph = Pattern.compile("\\p{Graph}").matcher(""); 4306 Matcher print = Pattern.compile("\\p{Print}").matcher(""); 4307 Matcher blank = Pattern.compile("\\p{Blank}").matcher(""); 4308 Matcher cntrl = Pattern.compile("\\p{Cntrl}").matcher(""); 4309 Matcher xdigit = Pattern.compile("\\p{XDigit}").matcher(""); 4310 Matcher space = Pattern.compile("\\p{Space}").matcher(""); 4311 Matcher bound = Pattern.compile("\\b").matcher(""); 4312 Matcher word = Pattern.compile("\\w++").matcher(""); 4313 // UNICODE_CHARACTER_CLASS 4314 Matcher lowerU = Pattern.compile("\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4315 Matcher upperU = Pattern.compile("\\p{Upper}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4316 Matcher ASCIIU = Pattern.compile("\\p{ASCII}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4317 Matcher alphaU = Pattern.compile("\\p{Alpha}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4318 Matcher digitU = Pattern.compile("\\p{Digit}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4319 Matcher alnumU = Pattern.compile("\\p{Alnum}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4320 Matcher punctU = Pattern.compile("\\p{Punct}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4321 Matcher graphU = Pattern.compile("\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4322 Matcher printU = Pattern.compile("\\p{Print}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4323 Matcher blankU = Pattern.compile("\\p{Blank}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4324 Matcher cntrlU = Pattern.compile("\\p{Cntrl}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4325 Matcher xdigitU = Pattern.compile("\\p{XDigit}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4326 Matcher spaceU = Pattern.compile("\\p{Space}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4327 Matcher boundU = Pattern.compile("\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4328 Matcher wordU = Pattern.compile("\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4329 // embedded flag (?U) 4330 Matcher lowerEU = Pattern.compile("(?U)\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4331 Matcher graphEU = Pattern.compile("(?U)\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4332 Matcher wordEU = Pattern.compile("(?U)\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4333 4334 Matcher bwb = Pattern.compile("\\b\\w\\b").matcher(""); 4335 Matcher bwbU = Pattern.compile("\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4336 Matcher bwbEU = Pattern.compile("(?U)\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4337 // properties 4338 Matcher lowerP = Pattern.compile("\\p{IsLowerCase}").matcher(""); 4339 Matcher upperP = Pattern.compile("\\p{IsUpperCase}").matcher(""); 4340 Matcher titleP = Pattern.compile("\\p{IsTitleCase}").matcher(""); 4341 Matcher letterP = Pattern.compile("\\p{IsLetter}").matcher(""); 4342 Matcher alphaP = Pattern.compile("\\p{IsAlphabetic}").matcher(""); 4343 Matcher ideogP = Pattern.compile("\\p{IsIdeographic}").matcher(""); 4344 Matcher cntrlP = Pattern.compile("\\p{IsControl}").matcher(""); 4345 Matcher spaceP = Pattern.compile("\\p{IsWhiteSpace}").matcher(""); 4346 Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher(""); 4347 Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher(""); 4348 Matcher joinCrtl = Pattern.compile("\\p{IsJoinControl}").matcher(""); 4349 // javaMethod 4350 Matcher lowerJ = Pattern.compile("\\p{javaLowerCase}").matcher(""); 4351 Matcher upperJ = Pattern.compile("\\p{javaUpperCase}").matcher(""); 4352 Matcher alphaJ = Pattern.compile("\\p{javaAlphabetic}").matcher(""); 4353 Matcher ideogJ = Pattern.compile("\\p{javaIdeographic}").matcher(""); 4354 // GC/C 4355 Matcher gcC = Pattern.compile("\\p{C}").matcher(""); 4356 4357 for (int cp = 1; cp < 0x30000; cp++) { 4358 String str = new String(Character.toChars(cp)); 4359 int type = Character.getType(cp); 4360 if (// lower 4361 POSIX_ASCII.isLower(cp) != lower.reset(str).matches() || 4362 Character.isLowerCase(cp) != lowerU.reset(str).matches() || 4363 Character.isLowerCase(cp) != lowerP.reset(str).matches() || 4364 Character.isLowerCase(cp) != lowerEU.reset(str).matches()|| 4365 Character.isLowerCase(cp) != lowerJ.reset(str).matches()|| 4366 // upper 4367 POSIX_ASCII.isUpper(cp) != upper.reset(str).matches() || 4368 POSIX_Unicode.isUpper(cp) != upperU.reset(str).matches() || 4369 Character.isUpperCase(cp) != upperP.reset(str).matches() || 4370 Character.isUpperCase(cp) != upperJ.reset(str).matches() || 4371 // alpha 4372 POSIX_ASCII.isAlpha(cp) != alpha.reset(str).matches() || 4373 POSIX_Unicode.isAlpha(cp) != alphaU.reset(str).matches() || 4374 Character.isAlphabetic(cp)!= alphaP.reset(str).matches() || 4375 Character.isAlphabetic(cp)!= alphaJ.reset(str).matches() || 4376 // digit 4377 POSIX_ASCII.isDigit(cp) != digit.reset(str).matches() || 4378 Character.isDigit(cp) != digitU.reset(str).matches() || 4379 // alnum 4380 POSIX_ASCII.isAlnum(cp) != alnum.reset(str).matches() || 4381 POSIX_Unicode.isAlnum(cp) != alnumU.reset(str).matches() || 4382 // punct 4383 POSIX_ASCII.isPunct(cp) != punct.reset(str).matches() || 4384 POSIX_Unicode.isPunct(cp) != punctU.reset(str).matches() || 4385 // graph 4386 POSIX_ASCII.isGraph(cp) != graph.reset(str).matches() || 4387 POSIX_Unicode.isGraph(cp) != graphU.reset(str).matches() || 4388 POSIX_Unicode.isGraph(cp) != graphEU.reset(str).matches()|| 4389 // blank 4390 POSIX_ASCII.isType(cp, POSIX_ASCII.BLANK) 4391 != blank.reset(str).matches() || 4392 POSIX_Unicode.isBlank(cp) != blankU.reset(str).matches() || 4393 // print 4394 POSIX_ASCII.isPrint(cp) != print.reset(str).matches() || 4395 POSIX_Unicode.isPrint(cp) != printU.reset(str).matches() || 4396 // cntrl 4397 POSIX_ASCII.isCntrl(cp) != cntrl.reset(str).matches() || 4398 POSIX_Unicode.isCntrl(cp) != cntrlU.reset(str).matches() || 4399 (Character.CONTROL == type) != cntrlP.reset(str).matches() || 4400 // hexdigit 4401 POSIX_ASCII.isHexDigit(cp) != xdigit.reset(str).matches() || 4402 POSIX_Unicode.isHexDigit(cp) != xdigitU.reset(str).matches() || 4403 // space 4404 POSIX_ASCII.isSpace(cp) != space.reset(str).matches() || 4405 POSIX_Unicode.isSpace(cp) != spaceU.reset(str).matches() || 4406 POSIX_Unicode.isSpace(cp) != spaceP.reset(str).matches() || 4407 // word 4408 POSIX_ASCII.isWord(cp) != word.reset(str).matches() || 4409 POSIX_Unicode.isWord(cp) != wordU.reset(str).matches() || 4410 POSIX_Unicode.isWord(cp) != wordEU.reset(str).matches()|| 4411 // bwordb 4412 POSIX_ASCII.isWord(cp) != bwb.reset(str).matches() || 4413 POSIX_Unicode.isWord(cp) != bwbU.reset(str).matches() || 4414 // properties 4415 Character.isTitleCase(cp) != titleP.reset(str).matches() || 4416 Character.isLetter(cp) != letterP.reset(str).matches()|| 4417 Character.isIdeographic(cp) != ideogP.reset(str).matches() || 4418 Character.isIdeographic(cp) != ideogJ.reset(str).matches() || 4419 (Character.UNASSIGNED == type) == definedP.reset(str).matches() || 4420 POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches() || 4421 POSIX_Unicode.isJoinControl(cp) != joinCrtl.reset(str).matches() || 4422 // gc_C 4423 (Character.CONTROL == type || Character.FORMAT == type || 4424 Character.PRIVATE_USE == type || Character.SURROGATE == type || 4425 Character.UNASSIGNED == type) 4426 != gcC.reset(str).matches()) { 4427 failCount++; 4428 } 4429 } 4430 4431 // bounds/word align 4432 twoFindIndexes(" \u0180sherman\u0400 ", bound, 1, 10); 4433 if (!bwbU.reset("\u0180sherman\u0400").matches()) 4434 failCount++; 4435 twoFindIndexes(" \u0180sh\u0345erman\u0400 ", bound, 1, 11); 4436 if (!bwbU.reset("\u0180sh\u0345erman\u0400").matches()) 4437 failCount++; 4438 twoFindIndexes(" \u0724\u0739\u0724 ", bound, 1, 4); 4439 if (!bwbU.reset("\u0724\u0739\u0724").matches()) 4440 failCount++; 4441 if (!bwbEU.reset("\u0724\u0739\u0724").matches()) 4442 failCount++; 4443 report("unicodePredefinedClasses"); 4444 } 4445 4446 private static void unicodeCharacterNameTest() throws Exception { 4447 4448 for (int cp = 0; cp < Character.MAX_CODE_POINT; cp++) { 4449 if (!Character.isValidCodePoint(cp) || 4450 Character.getType(cp) == Character.UNASSIGNED) 4451 continue; 4452 String str = new String(Character.toChars(cp)); 4453 // single 4454 String p = "\\N{" + Character.getName(cp) + "}"; 4455 if (!Pattern.compile(p).matcher(str).matches()) { 4456 failCount++; 4457 } 4458 // class[c] 4459 p = "[\\N{" + Character.getName(cp) + "}]"; 4460 if (!Pattern.compile(p).matcher(str).matches()) { 4461 failCount++; 4462 } 4463 } 4464 4465 // range 4466 for (int i = 0; i < 10; i++) { 4467 int start = generator.nextInt(20); 4468 int end = start + generator.nextInt(200); 4469 String p = "[\\N{" + Character.getName(start) + "}-\\N{" + Character.getName(end) + "}]"; 4470 String str; 4471 for (int cp = start; cp < end; cp++) { 4472 str = new String(Character.toChars(cp)); 4473 if (!Pattern.compile(p).matcher(str).matches()) { 4474 failCount++; 4475 } 4476 } 4477 str = new String(Character.toChars(end + 10)); 4478 if (Pattern.compile(p).matcher(str).matches()) { 4479 failCount++; 4480 } 4481 } 4482 4483 // slice 4484 for (int i = 0; i < 10; i++) { 4485 int n = generator.nextInt(256); 4486 int[] buf = new int[n]; 4487 StringBuffer sb = new StringBuffer(1024); 4488 for (int j = 0; j < n; j++) { 4489 int cp = generator.nextInt(1000); 4490 if (!Character.isValidCodePoint(cp) || 4491 Character.getType(cp) == Character.UNASSIGNED) 4492 cp = 0x4e00; // just use 4e00 4493 sb.append("\\N{" + Character.getName(cp) + "}"); 4494 buf[j] = cp; 4495 } 4496 String p = sb.toString(); 4497 String str = new String(buf, 0, buf.length); 4498 if (!Pattern.compile(p).matcher(str).matches()) { 4499 failCount++; 4500 } 4501 } 4502 report("unicodeCharacterName"); 4503 } 4504 4505 private static void horizontalAndVerticalWSTest() throws Exception { 4506 String hws = new String (new char[] { 4507 0x09, 0x20, 0xa0, 0x1680, 0x180e, 4508 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 4509 0x2006, 0x2007, 0x2008, 0x2009, 0x200a, 4510 0x202f, 0x205f, 0x3000 }); 4511 String vws = new String (new char[] { 4512 0x0a, 0x0b, 0x0c, 0x0d, 0x85, 0x2028, 0x2029 }); 4513 if (!Pattern.compile("\\h+").matcher(hws).matches() || 4514 !Pattern.compile("[\\h]+").matcher(hws).matches()) 4515 failCount++; 4516 if (Pattern.compile("\\H").matcher(hws).find() || 4517 Pattern.compile("[\\H]").matcher(hws).find()) 4518 failCount++; 4519 if (!Pattern.compile("\\v+").matcher(vws).matches() || 4520 !Pattern.compile("[\\v]+").matcher(vws).matches()) 4521 failCount++; 4522 if (Pattern.compile("\\V").matcher(vws).find() || 4523 Pattern.compile("[\\V]").matcher(vws).find()) 4524 failCount++; 4525 String prefix = "abcd"; 4526 String suffix = "efgh"; 4527 String ng = "A"; 4528 for (int i = 0; i < hws.length(); i++) { 4529 String c = String.valueOf(hws.charAt(i)); 4530 Matcher m = Pattern.compile("\\h").matcher(prefix + c + suffix); 4531 if (!m.find() || !c.equals(m.group())) 4532 failCount++; 4533 m = Pattern.compile("[\\h]").matcher(prefix + c + suffix); 4534 if (!m.find() || !c.equals(m.group())) 4535 failCount++; 4536 4537 m = Pattern.compile("\\H").matcher(hws.substring(0, i) + ng + hws.substring(i)); 4538 if (!m.find() || !ng.equals(m.group())) 4539 failCount++; 4540 m = Pattern.compile("[\\H]").matcher(hws.substring(0, i) + ng + hws.substring(i)); 4541 if (!m.find() || !ng.equals(m.group())) 4542 failCount++; 4543 } 4544 for (int i = 0; i < vws.length(); i++) { 4545 String c = String.valueOf(vws.charAt(i)); 4546 Matcher m = Pattern.compile("\\v").matcher(prefix + c + suffix); 4547 if (!m.find() || !c.equals(m.group())) 4548 failCount++; 4549 m = Pattern.compile("[\\v]").matcher(prefix + c + suffix); 4550 if (!m.find() || !c.equals(m.group())) 4551 failCount++; 4552 4553 m = Pattern.compile("\\V").matcher(vws.substring(0, i) + ng + vws.substring(i)); 4554 if (!m.find() || !ng.equals(m.group())) 4555 failCount++; 4556 m = Pattern.compile("[\\V]").matcher(vws.substring(0, i) + ng + vws.substring(i)); 4557 if (!m.find() || !ng.equals(m.group())) 4558 failCount++; 4559 } 4560 // \v in range is interpreted as 0x0B. This is the undocumented behavior 4561 if (!Pattern.compile("[\\v-\\v]").matcher(String.valueOf((char)0x0B)).matches()) 4562 failCount++; 4563 report("horizontalAndVerticalWSTest"); 4564 } 4565 4566 private static void linebreakTest() throws Exception { 4567 String linebreaks = new String (new char[] { 4568 0x0A, 0x0B, 0x0C, 0x0D, 0x85, 0x2028, 0x2029 }); 4569 String crnl = "\r\n"; 4570 if (!(Pattern.compile("\\R+").matcher(linebreaks).matches() && 4571 Pattern.compile("\\R").matcher(crnl).matches() && 4572 Pattern.compile("\\Rabc").matcher(crnl + "abc").matches() && 4573 Pattern.compile("\\Rabc").matcher("\rabc").matches() && 4574 Pattern.compile("\\R\\R").matcher(crnl).matches() && // backtracking 4575 Pattern.compile("\\R\\n").matcher(crnl).matches()) && // backtracking 4576 !Pattern.compile("((?<!\\R)\\s)*").matcher(crnl).matches()) { // #8176029 4577 failCount++; 4578 } 4579 report("linebreakTest"); 4580 } 4581 4582 // #7189363 4583 private static void branchTest() throws Exception { 4584 if (!Pattern.compile("(a)?bc|d").matcher("d").find() || // greedy 4585 !Pattern.compile("(a)+bc|d").matcher("d").find() || 4586 !Pattern.compile("(a)*bc|d").matcher("d").find() || 4587 !Pattern.compile("(a)??bc|d").matcher("d").find() || // reluctant 4588 !Pattern.compile("(a)+?bc|d").matcher("d").find() || 4589 !Pattern.compile("(a)*?bc|d").matcher("d").find() || 4590 !Pattern.compile("(a)?+bc|d").matcher("d").find() || // possessive 4591 !Pattern.compile("(a)++bc|d").matcher("d").find() || 4592 !Pattern.compile("(a)*+bc|d").matcher("d").find() || 4593 !Pattern.compile("(a)?bc|d").matcher("d").matches() || // greedy 4594 !Pattern.compile("(a)+bc|d").matcher("d").matches() || 4595 !Pattern.compile("(a)*bc|d").matcher("d").matches() || 4596 !Pattern.compile("(a)??bc|d").matcher("d").matches() || // reluctant 4597 !Pattern.compile("(a)+?bc|d").matcher("d").matches() || 4598 !Pattern.compile("(a)*?bc|d").matcher("d").matches() || 4599 !Pattern.compile("(a)?+bc|d").matcher("d").matches() || // possessive 4600 !Pattern.compile("(a)++bc|d").matcher("d").matches() || 4601 !Pattern.compile("(a)*+bc|d").matcher("d").matches() || 4602 !Pattern.compile("(a)?bc|de").matcher("de").find() || // others 4603 !Pattern.compile("(a)??bc|de").matcher("de").find() || 4604 !Pattern.compile("(a)?bc|de").matcher("de").matches() || 4605 !Pattern.compile("(a)??bc|de").matcher("de").matches()) 4606 failCount++; 4607 report("branchTest"); 4608 } 4609 4610 // This test is for 8007395 4611 private static void groupCurlyNotFoundSuppTest() throws Exception { 4612 String input = "test this as \ud83d\ude0d"; 4613 for (String pStr : new String[] { "test(.)+(@[a-zA-Z.]+)", 4614 "test(.)*(@[a-zA-Z.]+)", 4615 "test([^B])+(@[a-zA-Z.]+)", 4616 "test([^B])*(@[a-zA-Z.]+)", 4617 "test(\\P{IsControl})+(@[a-zA-Z.]+)", 4618 "test(\\P{IsControl})*(@[a-zA-Z.]+)", 4619 }) { 4620 Matcher m = Pattern.compile(pStr, Pattern.CASE_INSENSITIVE) 4621 .matcher(input); 4622 try { 4623 if (m.find()) { 4624 failCount++; 4625 } 4626 } catch (Exception x) { 4627 failCount++; 4628 } 4629 } 4630 report("GroupCurly NotFoundSupp"); 4631 } 4632 4633 // This test is for 8023647 4634 private static void groupCurlyBackoffTest() throws Exception { 4635 if (!"abc1c".matches("(\\w)+1\\1") || 4636 "abc11".matches("(\\w)+1\\1")) { 4637 failCount++; 4638 } 4639 report("GroupCurly backoff"); 4640 } 4641 4642 // This test is for 8012646 4643 private static void patternAsPredicate() throws Exception { 4644 Predicate<String> p = Pattern.compile("[a-z]+").asPredicate(); 4645 4646 if (p.test("")) { 4647 failCount++; 4648 } 4649 if (!p.test("word")) { 4650 failCount++; 4651 } 4652 if (p.test("1234")) { 4653 failCount++; 4654 } 4655 report("Pattern.asPredicate"); 4656 } 4657 4658 // This test is for 8035975 4659 private static void invalidFlags() throws Exception { 4660 for (int flag = 1; flag != 0; flag <<= 1) { 4661 switch (flag) { 4662 case Pattern.CASE_INSENSITIVE: 4663 case Pattern.MULTILINE: 4664 case Pattern.DOTALL: 4665 case Pattern.UNICODE_CASE: 4666 case Pattern.CANON_EQ: 4667 case Pattern.UNIX_LINES: 4668 case Pattern.LITERAL: 4669 case Pattern.UNICODE_CHARACTER_CLASS: 4670 case Pattern.COMMENTS: 4671 // valid flag, continue 4672 break; 4673 default: 4674 try { 4675 Pattern.compile(".", flag); 4676 failCount++; 4677 } catch (IllegalArgumentException expected) { 4678 } 4679 } 4680 } 4681 report("Invalid compile flags"); 4682 } 4683 4684 // This test is for 8158482 4685 private static void embeddedFlags() throws Exception { 4686 try { 4687 Pattern.compile("(?i).(?-i)."); 4688 Pattern.compile("(?m).(?-m)."); 4689 Pattern.compile("(?s).(?-s)."); 4690 Pattern.compile("(?d).(?-d)."); 4691 Pattern.compile("(?u).(?-u)."); 4692 Pattern.compile("(?c).(?-c)."); 4693 Pattern.compile("(?x).(?-x)."); 4694 Pattern.compile("(?U).(?-U)."); 4695 Pattern.compile("(?imsducxU).(?-imsducxU)."); 4696 } catch (PatternSyntaxException x) { 4697 failCount++; 4698 } 4699 report("Embedded flags"); 4700 } 4701 4702 private static void grapheme() throws Exception { 4703 Files.lines(Paths.get(System.getProperty("test.src", "."), 4704 "GraphemeBreakTest.txt")) 4705 .filter( ln -> ln.length() != 0 && !ln.startsWith("#") ) 4706 .forEach( ln -> { 4707 ln = ln.replaceAll("\\s+|\\([a-zA-Z]+\\)|\\[[a-zA-Z]]+\\]|#.*", ""); 4708 // System.out.println(str); 4709 String[] strs = ln.split("\u00f7|\u00d7"); 4710 StringBuilder src = new StringBuilder(); 4711 ArrayList<String> graphemes = new ArrayList<>(); 4712 StringBuilder buf = new StringBuilder(); 4713 int offBk = 0; 4714 for (String str : strs) { 4715 if (str.length() == 0) // first empty str 4716 continue; 4717 int cp = Integer.parseInt(str, 16); 4718 src.appendCodePoint(cp); 4719 buf.appendCodePoint(cp); 4720 offBk += (str.length() + 1); 4721 if (ln.charAt(offBk) == '\u00f7') { // DIV 4722 graphemes.add(buf.toString()); 4723 buf = new StringBuilder(); 4724 } 4725 } 4726 Pattern p = Pattern.compile("\\X"); 4727 Matcher m = p.matcher(src.toString()); 4728 Scanner s = new Scanner(src.toString()).useDelimiter("\\b{g}"); 4729 for (String g : graphemes) { 4730 // System.out.printf(" grapheme:=[%s]%n", g); 4731 // (1) test \\X directly 4732 if (!m.find() || !m.group().equals(g)) { 4733 System.out.println("Failed \\X [" + ln + "] : " + g); 4734 failCount++; 4735 } 4736 // (2) test \\b{g} + \\X via Scanner 4737 boolean hasNext = s.hasNext(p); 4738 // if (!s.hasNext() || !s.next().equals(next)) { 4739 if (!s.hasNext(p) || !s.next(p).equals(g)) { 4740 System.out.println("Failed b{g} [" + ln + "] : " + g); 4741 failCount++; 4742 } 4743 } 4744 }); 4745 // some sanity checks 4746 if (!Pattern.compile("\\X{10}").matcher("abcdefghij").matches() || 4747 !Pattern.compile("\\b{g}(?:\\X\\b{g}){5}\\b{g}").matcher("abcde").matches() || 4748 !Pattern.compile("(?:\\X\\b{g}){2}").matcher("\ud800\udc00\ud801\udc02").matches()) 4749 failCount++; 4750 // make sure "\b{n}" still works 4751 if (!Pattern.compile("\\b{1}hello\\b{1} \\b{1}world\\b{1}").matcher("hello world").matches()) 4752 failCount++; 4753 report("Unicode extended grapheme cluster"); 4754 } 4755 4756 // hangup/timeout if go into exponential backtracking 4757 private static void expoBacktracking() throws Exception { 4758 4759 Object[][] patternMatchers = { 4760 // 6328855 4761 { "(.*\n*)*", 4762 "this little fine string lets\r\njava.lang.String.matches\r\ncrash\r\n(We don't know why but adding \r* to the regex makes it work again)", 4763 false }, 4764 // 6192895 4765 { " *([a-zA-Z0-9/\\-\\?:\\(\\)\\.,'\\+\\{\\}]+ *)+", 4766 "Hello World this is a test this is a test this is a test A", 4767 true }, 4768 { " *([a-zA-Z0-9/\\-\\?:\\(\\)\\.,'\\+\\{\\}]+ *)+", 4769 "Hello World this is a test this is a test this is a test \u4e00 ", 4770 false }, 4771 { " *([a-z0-9]+ *)+", 4772 "hello world this is a test this is a test this is a test A", 4773 false }, 4774 // 4771934 [FIXED] #5013651? 4775 { "^(\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,4})+[,;]?)+$", 4776 "abc@efg.abc,efg@abc.abc,abc@xyz.mno;abc@sdfsd.com", 4777 true }, 4778 // 4866249 [FIXED] 4779 { "<\\s*" + "(meta|META)" + "(\\s|[^>])+" + "(CHARSET|charset)=" + "(\\s|[^>])+>", 4780 "<META http-equiv=\"Content-Type\" content=\"text/html; charset=ISO-8859-5\">", 4781 true }, 4782 { "^(\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,4})+[,;]?)+$", 4783 "abc@efg.abc,efg@abc.abc,abc@xyz.mno;sdfsd.com", 4784 false }, 4785 // 6345469 4786 { "((<[^>]+>)?(((\\s)?)*(\\ )?)*((\\s)?)*)+", 4787 " < br/> < / p> <p> <html> <adfasfdasdf> </p>", 4788 true }, // --> matched 4789 { "((<[^>]+>)?(((\\s)?)*(\\ )?)*((\\s)?)*)+", 4790 " < br/> < / p> <p> <html> <adfasfdasdf> p </p>", 4791 false }, 4792 // 5026912 4793 { "^\\s*" + "(\\w|\\d|[\\xC0-\\xFF]|/)+" + "\\s+|$", 4794 "156580451111112225588087755221111111566969655555555", 4795 false}, 4796 // 6988218 4797 { "^([+-]?((0[xX](\\p{XDigit}+))|(((\\p{Digit}+)(\\.)?((\\p{Digit}+)?)([eE][+-]?(\\p{Digit}+))?)|(\\.((\\p{Digit}+))([eE][+-]?(\\p{Digit}+))?)))|[n|N]?'([^']*(?:'')*[^']*)*')", 4798 "'%)) order by ANGEBOT.ID", 4799 false}, // find 4800 // 6693451 4801 { "^(\\s*foo\\s*)*$", 4802 "foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo", 4803 true }, 4804 { "^(\\s*foo\\s*)*$", 4805 "foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo fo", 4806 false 4807 }, 4808 // 7006761 4809 { "(([0-9A-Z]+)([_]?+)*)*", "FOOOOO_BAAAR_FOOOOOOOOO_BA_", true}, 4810 { "(([0-9A-Z]+)([_]?+)*)*", "FOOOOO_BAAAR_FOOOOOOOOO_BA_ ", false}, 4811 // 8140212 4812 { "(?<before>.*)\\{(?<reflection>\\w+):(?<innerMethod>\\w+(\\.?\\w+(\\(((?<args>(('[^']*')|((/|\\w)+))(,(('[^']*')|((/|\\w)+)))*))?\\))?)*)\\}(?<after>.*)", 4813 "{CeGlobal:getSodCutoff.getGui.getAmqp.getSimpleModeEnabled()", 4814 false 4815 }, 4816 { "^(a+)+$", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", true}, 4817 { "^(a+)+$", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa!", false}, 4818 4819 { "(x+)*y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy", true }, 4820 { "(x+)*y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz", false}, 4821 4822 { "(x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy", true}, 4823 { "(x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz", false}, 4824 4825 { "(([0-9A-Z]+)([_]?+)*)*", "--------------------------------------", false}, 4826 4827 /* not fixed 4828 //8132141 ---> second level exponential backtracking 4829 { "(h|h|ih(((i|a|c|c|a|i|i|j|b|a|i|b|a|a|j))+h)ahbfhba|c|i)*", 4830 "hchcchicihcchciiicichhcichcihcchiihichiciiiihhcchicchhcihchcihiihciichhccciccichcichiihcchcihhicchcciicchcccihiiihhihihihichicihhcciccchihhhcchichchciihiicihciihcccciciccicciiiiiiiiicihhhiiiihchccchchhhhiiihchihcccchhhiiiiiiiicicichicihcciciihichhhhchihciiihhiccccccciciihhichiccchhicchicihihccichicciihcichccihhiciccccccccichhhhihihhcchchihihiihhihihihicichihiiiihhhhihhhchhichiicihhiiiiihchccccchichci" }, 4831 */ 4832 }; 4833 4834 for (Object[] pm : patternMatchers) { 4835 String p = (String)pm[0]; 4836 String s = (String)pm[1]; 4837 boolean r = (Boolean)pm[2]; 4838 if (r != Pattern.compile(p).matcher(s).matches()) { 4839 failCount++; 4840 } 4841 } 4842 } 4843 }