1 /* 2 * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 /** 25 * @test 26 * @summary tests RegExp framework (use -Dseed=X to set PRNG seed) 27 * @author Mike McCloskey 28 * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345 29 * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962 30 * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476 31 * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940 32 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133 33 * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066 34 * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590 35 * 8027645 8035076 8039124 8035975 8074678 6854417 8143854 8147531 7071819 36 * 8151481 4867170 7080302 6728861 6995635 6736245 4916384 37 * 6328855 6192895 6345469 6988218 6693451 7006761 8140212 8143282 8158482 38 * 8176029 39 * 40 * @library /lib/testlibrary 41 * @build jdk.testlibrary.* 42 * @run main RegExTest 43 * @key randomness 44 */ 45 46 import java.util.function.Function; 47 import java.util.regex.*; 48 import java.util.Random; 49 import java.util.Scanner; 50 import java.io.*; 51 import java.nio.file.*; 52 import java.util.*; 53 import java.nio.CharBuffer; 54 import java.util.function.Predicate; 55 import jdk.testlibrary.RandomFactory; 56 57 /** 58 * This is a test class created to check the operation of 59 * the Pattern and Matcher classes. 60 */ 61 public class RegExTest { 62 63 private static Random generator = RandomFactory.getRandom(); 64 private static boolean failure = false; 65 private static int failCount = 0; 66 private static String firstFailure = null; 67 68 /** 69 * Main to interpret arguments and run several tests. 70 * 71 */ 72 public static void main(String[] args) throws Exception { 73 // Most of the tests are in a file 74 processFile("TestCases.txt"); 75 //processFile("PerlCases.txt"); 76 processFile("BMPTestCases.txt"); 77 processFile("SupplementaryTestCases.txt"); 78 79 // These test many randomly generated char patterns 80 bm(); 81 slice(); 82 83 // These are hard to put into the file 84 escapes(); 85 blankInput(); 86 87 // Substitition tests on randomly generated sequences 88 globalSubstitute(); 89 stringbufferSubstitute(); 90 stringbuilderSubstitute(); 91 92 substitutionBasher(); 93 substitutionBasher2(); 94 95 // Canonical Equivalence 96 ceTest(); 97 98 // Anchors 99 anchorTest(); 100 101 // boolean match calls 102 matchesTest(); 103 lookingAtTest(); 104 105 // Pattern API 106 patternMatchesTest(); 107 108 // Misc 109 lookbehindTest(); 110 nullArgumentTest(); 111 backRefTest(); 112 groupCaptureTest(); 113 caretTest(); 114 charClassTest(); 115 emptyPatternTest(); 116 findIntTest(); 117 group0Test(); 118 longPatternTest(); 119 octalTest(); 120 ampersandTest(); 121 negationTest(); 122 splitTest(); 123 appendTest(); 124 caseFoldingTest(); 125 commentsTest(); 126 unixLinesTest(); 127 replaceFirstTest(); 128 gTest(); 129 zTest(); 130 serializeTest(); 131 reluctantRepetitionTest(); 132 multilineDollarTest(); 133 dollarAtEndTest(); 134 caretBetweenTerminatorsTest(); 135 // This RFE rejected in Tiger numOccurrencesTest(); 136 javaCharClassTest(); 137 nonCaptureRepetitionTest(); 138 notCapturedGroupCurlyMatchTest(); 139 escapedSegmentTest(); 140 literalPatternTest(); 141 literalReplacementTest(); 142 regionTest(); 143 toStringTest(); 144 negatedCharClassTest(); 145 findFromTest(); 146 boundsTest(); 147 unicodeWordBoundsTest(); 148 caretAtEndTest(); 149 wordSearchTest(); 150 hitEndTest(); 151 toMatchResultTest(); 152 toMatchResultTest2(); 153 surrogatesInClassTest(); 154 removeQEQuotingTest(); 155 namedGroupCaptureTest(); 156 nonBmpClassComplementTest(); 157 unicodePropertiesTest(); 158 unicodeHexNotationTest(); 159 unicodeClassesTest(); 160 unicodeCharacterNameTest(); 161 horizontalAndVerticalWSTest(); 162 linebreakTest(); 163 branchTest(); 164 groupCurlyNotFoundSuppTest(); 165 groupCurlyBackoffTest(); 166 patternAsPredicate(); 167 invalidFlags(); 168 embeddedFlags(); 169 grapheme(); 170 expoBacktracking(); 171 172 if (failure) { 173 throw new 174 RuntimeException("RegExTest failed, 1st failure: " + 175 firstFailure); 176 } else { 177 System.err.println("OKAY: All tests passed."); 178 } 179 } 180 181 // Utility functions 182 183 private static String getRandomAlphaString(int length) { 184 StringBuffer buf = new StringBuffer(length); 185 for (int i=0; i<length; i++) { 186 char randChar = (char)(97 + generator.nextInt(26)); 187 buf.append(randChar); 188 } 189 return buf.toString(); 190 } 191 192 private static void check(Matcher m, String expected) { 193 m.find(); 194 if (!m.group().equals(expected)) 195 failCount++; 196 } 197 198 private static void check(Matcher m, String result, boolean expected) { 199 m.find(); 200 if (m.group().equals(result) != expected) 201 failCount++; 202 } 203 204 private static void check(Pattern p, String s, boolean expected) { 205 if (p.matcher(s).find() != expected) 206 failCount++; 207 } 208 209 private static void check(String p, String s, boolean expected) { 210 Matcher matcher = Pattern.compile(p).matcher(s); 211 if (matcher.find() != expected) 212 failCount++; 213 } 214 215 private static void check(String p, char c, boolean expected) { 216 String propertyPattern = expected ? "\\p" + p : "\\P" + p; 217 Pattern pattern = Pattern.compile(propertyPattern); 218 char[] ca = new char[1]; ca[0] = c; 219 Matcher matcher = pattern.matcher(new String(ca)); 220 if (!matcher.find()) 221 failCount++; 222 } 223 224 private static void check(String p, int codePoint, boolean expected) { 225 String propertyPattern = expected ? "\\p" + p : "\\P" + p; 226 Pattern pattern = Pattern.compile(propertyPattern); 227 char[] ca = Character.toChars(codePoint); 228 Matcher matcher = pattern.matcher(new String(ca)); 229 if (!matcher.find()) 230 failCount++; 231 } 232 233 private static void check(String p, int flag, String input, String s, 234 boolean expected) 235 { 236 Pattern pattern = Pattern.compile(p, flag); 237 Matcher matcher = pattern.matcher(input); 238 if (expected) 239 check(matcher, s, expected); 240 else 241 check(pattern, input, false); 242 } 243 244 private static void report(String testName) { 245 int spacesToAdd = 30 - testName.length(); 246 StringBuffer paddedNameBuffer = new StringBuffer(testName); 247 for (int i=0; i<spacesToAdd; i++) 248 paddedNameBuffer.append(" "); 249 String paddedName = paddedNameBuffer.toString(); 250 System.err.println(paddedName + ": " + 251 (failCount==0 ? "Passed":"Failed("+failCount+")")); 252 if (failCount > 0) { 253 failure = true; 254 255 if (firstFailure == null) { 256 firstFailure = testName; 257 } 258 } 259 260 failCount = 0; 261 } 262 263 /** 264 * Converts ASCII alphabet characters [A-Za-z] in the given 's' to 265 * supplementary characters. This method does NOT fully take care 266 * of the regex syntax. 267 */ 268 private static String toSupplementaries(String s) { 269 int length = s.length(); 270 StringBuffer sb = new StringBuffer(length * 2); 271 272 for (int i = 0; i < length; ) { 273 char c = s.charAt(i++); 274 if (c == '\\') { 275 sb.append(c); 276 if (i < length) { 277 c = s.charAt(i++); 278 sb.append(c); 279 if (c == 'u') { 280 // assume no syntax error 281 sb.append(s.charAt(i++)); 282 sb.append(s.charAt(i++)); 283 sb.append(s.charAt(i++)); 284 sb.append(s.charAt(i++)); 285 } 286 } 287 } else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) { 288 sb.append('\ud800').append((char)('\udc00'+c)); 289 } else { 290 sb.append(c); 291 } 292 } 293 return sb.toString(); 294 } 295 296 // Regular expression tests 297 298 // This is for bug 6178785 299 // Test if an expected NPE gets thrown when passing in a null argument 300 private static boolean check(Runnable test) { 301 try { 302 test.run(); 303 failCount++; 304 return false; 305 } catch (NullPointerException npe) { 306 return true; 307 } 308 } 309 310 private static void nullArgumentTest() { 311 check(() -> Pattern.compile(null)); 312 check(() -> Pattern.matches(null, null)); 313 check(() -> Pattern.matches("xyz", null)); 314 check(() -> Pattern.quote(null)); 315 check(() -> Pattern.compile("xyz").split(null)); 316 check(() -> Pattern.compile("xyz").matcher(null)); 317 318 final Matcher m = Pattern.compile("xyz").matcher("xyz"); 319 m.matches(); 320 check(() -> m.appendTail((StringBuffer) null)); 321 check(() -> m.appendTail((StringBuilder)null)); 322 check(() -> m.replaceAll((String) null)); 323 check(() -> m.replaceAll((Function<MatchResult, String>)null)); 324 check(() -> m.replaceFirst((String)null)); 325 check(() -> m.replaceFirst((Function<MatchResult, String>) null)); 326 check(() -> m.appendReplacement((StringBuffer)null, null)); 327 check(() -> m.appendReplacement((StringBuilder)null, null)); 328 check(() -> m.reset(null)); 329 check(() -> Matcher.quoteReplacement(null)); 330 //check(() -> m.usePattern(null)); 331 332 report("Null Argument"); 333 } 334 335 // This is for bug6635133 336 // Test if surrogate pair in Unicode escapes can be handled correctly. 337 private static void surrogatesInClassTest() throws Exception { 338 Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]"); 339 Matcher matcher = pattern.matcher("\ud834\udd22"); 340 if (!matcher.find()) 341 failCount++; 342 343 report("Surrogate pair in Unicode escape"); 344 } 345 346 // This is for bug6990617 347 // Test if Pattern.RemoveQEQuoting works correctly if the octal unicode 348 // char encoding is only 2 or 3 digits instead of 4 and the first quoted 349 // char is an octal digit. 350 private static void removeQEQuotingTest() throws Exception { 351 Pattern pattern = 352 Pattern.compile("\\011\\Q1sometext\\E\\011\\Q2sometext\\E"); 353 Matcher matcher = pattern.matcher("\t1sometext\t2sometext"); 354 if (!matcher.find()) 355 failCount++; 356 357 report("Remove Q/E Quoting"); 358 } 359 360 // This is for bug 4988891 361 // Test toMatchResult to see that it is a copy of the Matcher 362 // that is not affected by subsequent operations on the original 363 private static void toMatchResultTest() throws Exception { 364 Pattern pattern = Pattern.compile("squid"); 365 Matcher matcher = pattern.matcher( 366 "agiantsquidofdestinyasmallsquidoffate"); 367 matcher.find(); 368 int matcherStart1 = matcher.start(); 369 MatchResult mr = matcher.toMatchResult(); 370 if (mr == matcher) 371 failCount++; 372 int resultStart1 = mr.start(); 373 if (matcherStart1 != resultStart1) 374 failCount++; 375 matcher.find(); 376 int matcherStart2 = matcher.start(); 377 int resultStart2 = mr.start(); 378 if (matcherStart2 == resultStart2) 379 failCount++; 380 if (resultStart1 != resultStart2) 381 failCount++; 382 MatchResult mr2 = matcher.toMatchResult(); 383 if (mr == mr2) 384 failCount++; 385 if (mr2.start() != matcherStart2) 386 failCount++; 387 report("toMatchResult is a copy"); 388 } 389 390 private static void checkExpectedISE(Runnable test) { 391 try { 392 test.run(); 393 failCount++; 394 } catch (IllegalStateException x) { 395 } catch (IndexOutOfBoundsException xx) { 396 failCount++; 397 } 398 } 399 400 private static void checkExpectedIOOE(Runnable test) { 401 try { 402 test.run(); 403 failCount++; 404 } catch (IndexOutOfBoundsException x) {} 405 } 406 407 // This is for bug 8074678 408 // Test the result of toMatchResult throws ISE if no match is availble 409 private static void toMatchResultTest2() throws Exception { 410 Matcher matcher = Pattern.compile("nomatch").matcher("hello world"); 411 matcher.find(); 412 MatchResult mr = matcher.toMatchResult(); 413 414 checkExpectedISE(() -> mr.start()); 415 checkExpectedISE(() -> mr.start(2)); 416 checkExpectedISE(() -> mr.end()); 417 checkExpectedISE(() -> mr.end(2)); 418 checkExpectedISE(() -> mr.group()); 419 checkExpectedISE(() -> mr.group(2)); 420 421 matcher = Pattern.compile("(match)").matcher("there is a match"); 422 matcher.find(); 423 MatchResult mr2 = matcher.toMatchResult(); 424 checkExpectedIOOE(() -> mr2.start(2)); 425 checkExpectedIOOE(() -> mr2.end(2)); 426 checkExpectedIOOE(() -> mr2.group(2)); 427 428 report("toMatchResult2 appropriate exceptions"); 429 } 430 431 // This is for bug 5013885 432 // Must test a slice to see if it reports hitEnd correctly 433 private static void hitEndTest() throws Exception { 434 // Basic test of Slice node 435 Pattern p = Pattern.compile("^squidattack"); 436 Matcher m = p.matcher("squack"); 437 m.find(); 438 if (m.hitEnd()) 439 failCount++; 440 m.reset("squid"); 441 m.find(); 442 if (!m.hitEnd()) 443 failCount++; 444 445 // Test Slice, SliceA and SliceU nodes 446 for (int i=0; i<3; i++) { 447 int flags = 0; 448 if (i==1) flags = Pattern.CASE_INSENSITIVE; 449 if (i==2) flags = Pattern.UNICODE_CASE; 450 p = Pattern.compile("^abc", flags); 451 m = p.matcher("ad"); 452 m.find(); 453 if (m.hitEnd()) 454 failCount++; 455 m.reset("ab"); 456 m.find(); 457 if (!m.hitEnd()) 458 failCount++; 459 } 460 461 // Test Boyer-Moore node 462 p = Pattern.compile("catattack"); 463 m = p.matcher("attack"); 464 m.find(); 465 if (!m.hitEnd()) 466 failCount++; 467 468 p = Pattern.compile("catattack"); 469 m = p.matcher("attackattackattackcatatta"); 470 m.find(); 471 if (!m.hitEnd()) 472 failCount++; 473 report("hitEnd from a Slice"); 474 } 475 476 // This is for bug 4997476 477 // It is weird code submitted by customer demonstrating a regression 478 private static void wordSearchTest() throws Exception { 479 String testString = new String("word1 word2 word3"); 480 Pattern p = Pattern.compile("\\b"); 481 Matcher m = p.matcher(testString); 482 int position = 0; 483 int start = 0; 484 while (m.find(position)) { 485 start = m.start(); 486 if (start == testString.length()) 487 break; 488 if (m.find(start+1)) { 489 position = m.start(); 490 } else { 491 position = testString.length(); 492 } 493 if (testString.substring(start, position).equals(" ")) 494 continue; 495 if (!testString.substring(start, position-1).startsWith("word")) 496 failCount++; 497 } 498 report("Customer word search"); 499 } 500 501 // This is for bug 4994840 502 private static void caretAtEndTest() throws Exception { 503 // Problem only occurs with multiline patterns 504 // containing a beginning-of-line caret "^" followed 505 // by an expression that also matches the empty string. 506 Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE); 507 Matcher matcher = pattern.matcher("\r"); 508 matcher.find(); 509 matcher.find(); 510 report("Caret at end"); 511 } 512 513 // This test is for 4979006 514 // Check to see if word boundary construct properly handles unicode 515 // non spacing marks 516 private static void unicodeWordBoundsTest() throws Exception { 517 String spaces = " "; 518 String wordChar = "a"; 519 String nsm = "\u030a"; 520 521 assert (Character.getType('\u030a') == Character.NON_SPACING_MARK); 522 523 Pattern pattern = Pattern.compile("\\b"); 524 Matcher matcher = pattern.matcher(""); 525 // S=other B=word character N=non spacing mark .=word boundary 526 // SS.BB.SS 527 String input = spaces + wordChar + wordChar + spaces; 528 twoFindIndexes(input, matcher, 2, 4); 529 // SS.BBN.SS 530 input = spaces + wordChar +wordChar + nsm + spaces; 531 twoFindIndexes(input, matcher, 2, 5); 532 // SS.BN.SS 533 input = spaces + wordChar + nsm + spaces; 534 twoFindIndexes(input, matcher, 2, 4); 535 // SS.BNN.SS 536 input = spaces + wordChar + nsm + nsm + spaces; 537 twoFindIndexes(input, matcher, 2, 5); 538 // SSN.BB.SS 539 input = spaces + nsm + wordChar + wordChar + spaces; 540 twoFindIndexes(input, matcher, 3, 5); 541 // SS.BNB.SS 542 input = spaces + wordChar + nsm + wordChar + spaces; 543 twoFindIndexes(input, matcher, 2, 5); 544 // SSNNSS 545 input = spaces + nsm + nsm + spaces; 546 matcher.reset(input); 547 if (matcher.find()) 548 failCount++; 549 // SSN.BBN.SS 550 input = spaces + nsm + wordChar + wordChar + nsm + spaces; 551 twoFindIndexes(input, matcher, 3, 6); 552 553 report("Unicode word boundary"); 554 } 555 556 private static void twoFindIndexes(String input, Matcher matcher, int a, 557 int b) throws Exception 558 { 559 matcher.reset(input); 560 matcher.find(); 561 if (matcher.start() != a) 562 failCount++; 563 matcher.find(); 564 if (matcher.start() != b) 565 failCount++; 566 } 567 568 // This test is for 6284152 569 static void check(String regex, String input, String[] expected) { 570 List<String> result = new ArrayList<String>(); 571 Pattern p = Pattern.compile(regex); 572 Matcher m = p.matcher(input); 573 while (m.find()) { 574 result.add(m.group()); 575 } 576 if (!Arrays.asList(expected).equals(result)) 577 failCount++; 578 } 579 580 private static void lookbehindTest() throws Exception { 581 //Positive 582 check("(?<=%.{0,5})foo\\d", 583 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5", 584 new String[]{"foo1", "foo2", "foo3"}); 585 586 //boundary at end of the lookbehind sub-regex should work consistently 587 //with the boundary just after the lookbehind sub-regex 588 check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"}); 589 check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"}); 590 check("(?<!abc )\\bfoo", "abc foo", new String[0]); 591 check("(?<!abc \\b)foo", "abc foo", new String[0]); 592 593 //Negative 594 check("(?<!%.{0,5})foo\\d", 595 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5", 596 new String[] {"foo4", "foo5"}); 597 598 //Positive greedy 599 check("(?<=%b{1,4})foo", "%bbbbfoo", new String[] {"foo"}); 600 601 //Positive reluctant 602 check("(?<=%b{1,4}?)foo", "%bbbbfoo", new String[] {"foo"}); 603 604 //supplementary 605 check("(?<=%b{1,4})fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", 606 new String[] {"fo\ud800\udc00o"}); 607 check("(?<=%b{1,4}?)fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", 608 new String[] {"fo\ud800\udc00o"}); 609 check("(?<!%b{1,4})fo\ud800\udc00o", "%afo\ud800\udc00o", 610 new String[] {"fo\ud800\udc00o"}); 611 check("(?<!%b{1,4}?)fo\ud800\udc00o", "%afo\ud800\udc00o", 612 new String[] {"fo\ud800\udc00o"}); 613 report("Lookbehind"); 614 } 615 616 // This test is for 4938995 617 // Check to see if weak region boundaries are transparent to 618 // lookahead and lookbehind constructs 619 private static void boundsTest() throws Exception { 620 String fullMessage = "catdogcat"; 621 Pattern pattern = Pattern.compile("(?<=cat)dog(?=cat)"); 622 Matcher matcher = pattern.matcher("catdogca"); 623 matcher.useTransparentBounds(true); 624 if (matcher.find()) 625 failCount++; 626 matcher.reset("atdogcat"); 627 if (matcher.find()) 628 failCount++; 629 matcher.reset(fullMessage); 630 if (!matcher.find()) 631 failCount++; 632 matcher.reset(fullMessage); 633 matcher.region(0,9); 634 if (!matcher.find()) 635 failCount++; 636 matcher.reset(fullMessage); 637 matcher.region(0,6); 638 if (!matcher.find()) 639 failCount++; 640 matcher.reset(fullMessage); 641 matcher.region(3,6); 642 if (!matcher.find()) 643 failCount++; 644 matcher.useTransparentBounds(false); 645 if (matcher.find()) 646 failCount++; 647 648 // Negative lookahead/lookbehind 649 pattern = Pattern.compile("(?<!cat)dog(?!cat)"); 650 matcher = pattern.matcher("dogcat"); 651 matcher.useTransparentBounds(true); 652 matcher.region(0,3); 653 if (matcher.find()) 654 failCount++; 655 matcher.reset("catdog"); 656 matcher.region(3,6); 657 if (matcher.find()) 658 failCount++; 659 matcher.useTransparentBounds(false); 660 matcher.reset("dogcat"); 661 matcher.region(0,3); 662 if (!matcher.find()) 663 failCount++; 664 matcher.reset("catdog"); 665 matcher.region(3,6); 666 if (!matcher.find()) 667 failCount++; 668 669 report("Region bounds transparency"); 670 } 671 672 // This test is for 4945394 673 private static void findFromTest() throws Exception { 674 String message = "This is 40 $0 message."; 675 Pattern pat = Pattern.compile("\\$0"); 676 Matcher match = pat.matcher(message); 677 if (!match.find()) 678 failCount++; 679 if (match.find()) 680 failCount++; 681 if (match.find()) 682 failCount++; 683 report("Check for alternating find"); 684 } 685 686 // This test is for 4872664 and 4892980 687 private static void negatedCharClassTest() throws Exception { 688 Pattern pattern = Pattern.compile("[^>]"); 689 Matcher matcher = pattern.matcher("\u203A"); 690 if (!matcher.matches()) 691 failCount++; 692 pattern = Pattern.compile("[^fr]"); 693 matcher = pattern.matcher("a"); 694 if (!matcher.find()) 695 failCount++; 696 matcher.reset("\u203A"); 697 if (!matcher.find()) 698 failCount++; 699 String s = "for"; 700 String result[] = s.split("[^fr]"); 701 if (!result[0].equals("f")) 702 failCount++; 703 if (!result[1].equals("r")) 704 failCount++; 705 s = "f\u203Ar"; 706 result = s.split("[^fr]"); 707 if (!result[0].equals("f")) 708 failCount++; 709 if (!result[1].equals("r")) 710 failCount++; 711 712 // Test adding to bits, subtracting a node, then adding to bits again 713 pattern = Pattern.compile("[^f\u203Ar]"); 714 matcher = pattern.matcher("a"); 715 if (!matcher.find()) 716 failCount++; 717 matcher.reset("f"); 718 if (matcher.find()) 719 failCount++; 720 matcher.reset("\u203A"); 721 if (matcher.find()) 722 failCount++; 723 matcher.reset("r"); 724 if (matcher.find()) 725 failCount++; 726 matcher.reset("\u203B"); 727 if (!matcher.find()) 728 failCount++; 729 730 // Test subtracting a node, adding to bits, subtracting again 731 pattern = Pattern.compile("[^\u203Ar\u203B]"); 732 matcher = pattern.matcher("a"); 733 if (!matcher.find()) 734 failCount++; 735 matcher.reset("\u203A"); 736 if (matcher.find()) 737 failCount++; 738 matcher.reset("r"); 739 if (matcher.find()) 740 failCount++; 741 matcher.reset("\u203B"); 742 if (matcher.find()) 743 failCount++; 744 matcher.reset("\u203C"); 745 if (!matcher.find()) 746 failCount++; 747 748 report("Negated Character Class"); 749 } 750 751 // This test is for 4628291 752 private static void toStringTest() throws Exception { 753 Pattern pattern = Pattern.compile("b+"); 754 if (pattern.toString() != "b+") 755 failCount++; 756 Matcher matcher = pattern.matcher("aaabbbccc"); 757 String matcherString = matcher.toString(); // unspecified 758 matcher.find(); 759 matcherString = matcher.toString(); // unspecified 760 matcher.region(0,3); 761 matcherString = matcher.toString(); // unspecified 762 matcher.reset(); 763 matcherString = matcher.toString(); // unspecified 764 report("toString"); 765 } 766 767 // This test is for 4808962 768 private static void literalPatternTest() throws Exception { 769 int flags = Pattern.LITERAL; 770 771 Pattern pattern = Pattern.compile("abc\\t$^", flags); 772 check(pattern, "abc\\t$^", true); 773 774 pattern = Pattern.compile(Pattern.quote("abc\\t$^")); 775 check(pattern, "abc\\t$^", true); 776 777 pattern = Pattern.compile("\\Qa^$bcabc\\E", flags); 778 check(pattern, "\\Qa^$bcabc\\E", true); 779 check(pattern, "a^$bcabc", false); 780 781 pattern = Pattern.compile("\\\\Q\\\\E"); 782 check(pattern, "\\Q\\E", true); 783 784 pattern = Pattern.compile("\\Qabc\\Eefg\\\\Q\\\\Ehij"); 785 check(pattern, "abcefg\\Q\\Ehij", true); 786 787 pattern = Pattern.compile("\\\\\\Q\\\\E"); 788 check(pattern, "\\\\\\\\", true); 789 790 pattern = Pattern.compile(Pattern.quote("\\Qa^$bcabc\\E")); 791 check(pattern, "\\Qa^$bcabc\\E", true); 792 check(pattern, "a^$bcabc", false); 793 794 pattern = Pattern.compile(Pattern.quote("\\Qabc\\Edef")); 795 check(pattern, "\\Qabc\\Edef", true); 796 check(pattern, "abcdef", false); 797 798 pattern = Pattern.compile(Pattern.quote("abc\\Edef")); 799 check(pattern, "abc\\Edef", true); 800 check(pattern, "abcdef", false); 801 802 pattern = Pattern.compile(Pattern.quote("\\E")); 803 check(pattern, "\\E", true); 804 805 pattern = Pattern.compile("((((abc.+?:)", flags); 806 check(pattern, "((((abc.+?:)", true); 807 808 flags |= Pattern.MULTILINE; 809 810 pattern = Pattern.compile("^cat$", flags); 811 check(pattern, "abc^cat$def", true); 812 check(pattern, "cat", false); 813 814 flags |= Pattern.CASE_INSENSITIVE; 815 816 pattern = Pattern.compile("abcdef", flags); 817 check(pattern, "ABCDEF", true); 818 check(pattern, "AbCdEf", true); 819 820 flags |= Pattern.DOTALL; 821 822 pattern = Pattern.compile("a...b", flags); 823 check(pattern, "A...b", true); 824 check(pattern, "Axxxb", false); 825 826 flags |= Pattern.CANON_EQ; 827 828 Pattern p = Pattern.compile("testa\u030a", flags); 829 check(pattern, "testa\u030a", false); 830 check(pattern, "test\u00e5", false); 831 832 // Supplementary character test 833 flags = Pattern.LITERAL; 834 835 pattern = Pattern.compile(toSupplementaries("abc\\t$^"), flags); 836 check(pattern, toSupplementaries("abc\\t$^"), true); 837 838 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\t$^"))); 839 check(pattern, toSupplementaries("abc\\t$^"), true); 840 841 pattern = Pattern.compile(toSupplementaries("\\Qa^$bcabc\\E"), flags); 842 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true); 843 check(pattern, toSupplementaries("a^$bcabc"), false); 844 845 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qa^$bcabc\\E"))); 846 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true); 847 check(pattern, toSupplementaries("a^$bcabc"), false); 848 849 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qabc\\Edef"))); 850 check(pattern, toSupplementaries("\\Qabc\\Edef"), true); 851 check(pattern, toSupplementaries("abcdef"), false); 852 853 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\Edef"))); 854 check(pattern, toSupplementaries("abc\\Edef"), true); 855 check(pattern, toSupplementaries("abcdef"), false); 856 857 pattern = Pattern.compile(toSupplementaries("((((abc.+?:)"), flags); 858 check(pattern, toSupplementaries("((((abc.+?:)"), true); 859 860 flags |= Pattern.MULTILINE; 861 862 pattern = Pattern.compile(toSupplementaries("^cat$"), flags); 863 check(pattern, toSupplementaries("abc^cat$def"), true); 864 check(pattern, toSupplementaries("cat"), false); 865 866 flags |= Pattern.DOTALL; 867 868 // note: this is case-sensitive. 869 pattern = Pattern.compile(toSupplementaries("a...b"), flags); 870 check(pattern, toSupplementaries("a...b"), true); 871 check(pattern, toSupplementaries("axxxb"), false); 872 873 flags |= Pattern.CANON_EQ; 874 875 String t = toSupplementaries("test"); 876 p = Pattern.compile(t + "a\u030a", flags); 877 check(pattern, t + "a\u030a", false); 878 check(pattern, t + "\u00e5", false); 879 880 report("Literal pattern"); 881 } 882 883 // This test is for 4803179 884 // This test is also for 4808962, replacement parts 885 private static void literalReplacementTest() throws Exception { 886 int flags = Pattern.LITERAL; 887 888 Pattern pattern = Pattern.compile("abc", flags); 889 Matcher matcher = pattern.matcher("zzzabczzz"); 890 String replaceTest = "$0"; 891 String result = matcher.replaceAll(replaceTest); 892 if (!result.equals("zzzabczzz")) 893 failCount++; 894 895 matcher.reset(); 896 String literalReplacement = matcher.quoteReplacement(replaceTest); 897 result = matcher.replaceAll(literalReplacement); 898 if (!result.equals("zzz$0zzz")) 899 failCount++; 900 901 matcher.reset(); 902 replaceTest = "\\t$\\$"; 903 literalReplacement = matcher.quoteReplacement(replaceTest); 904 result = matcher.replaceAll(literalReplacement); 905 if (!result.equals("zzz\\t$\\$zzz")) 906 failCount++; 907 908 // Supplementary character test 909 pattern = Pattern.compile(toSupplementaries("abc"), flags); 910 matcher = pattern.matcher(toSupplementaries("zzzabczzz")); 911 replaceTest = "$0"; 912 result = matcher.replaceAll(replaceTest); 913 if (!result.equals(toSupplementaries("zzzabczzz"))) 914 failCount++; 915 916 matcher.reset(); 917 literalReplacement = matcher.quoteReplacement(replaceTest); 918 result = matcher.replaceAll(literalReplacement); 919 if (!result.equals(toSupplementaries("zzz$0zzz"))) 920 failCount++; 921 922 matcher.reset(); 923 replaceTest = "\\t$\\$"; 924 literalReplacement = matcher.quoteReplacement(replaceTest); 925 result = matcher.replaceAll(literalReplacement); 926 if (!result.equals(toSupplementaries("zzz\\t$\\$zzz"))) 927 failCount++; 928 929 // IAE should be thrown if backslash or '$' is the last character 930 // in replacement string 931 try { 932 "\uac00".replaceAll("\uac00", "$"); 933 failCount++; 934 } catch (IllegalArgumentException iie) { 935 } catch (Exception e) { 936 failCount++; 937 } 938 try { 939 "\uac00".replaceAll("\uac00", "\\"); 940 failCount++; 941 } catch (IllegalArgumentException iie) { 942 } catch (Exception e) { 943 failCount++; 944 } 945 report("Literal replacement"); 946 } 947 948 // This test is for 4757029 949 private static void regionTest() throws Exception { 950 Pattern pattern = Pattern.compile("abc"); 951 Matcher matcher = pattern.matcher("abcdefabc"); 952 953 matcher.region(0,9); 954 if (!matcher.find()) 955 failCount++; 956 if (!matcher.find()) 957 failCount++; 958 matcher.region(0,3); 959 if (!matcher.find()) 960 failCount++; 961 matcher.region(3,6); 962 if (matcher.find()) 963 failCount++; 964 matcher.region(0,2); 965 if (matcher.find()) 966 failCount++; 967 968 expectRegionFail(matcher, 1, -1); 969 expectRegionFail(matcher, -1, -1); 970 expectRegionFail(matcher, -1, 1); 971 expectRegionFail(matcher, 5, 3); 972 expectRegionFail(matcher, 5, 12); 973 expectRegionFail(matcher, 12, 12); 974 975 pattern = Pattern.compile("^abc$"); 976 matcher = pattern.matcher("zzzabczzz"); 977 matcher.region(0,9); 978 if (matcher.find()) 979 failCount++; 980 matcher.region(3,6); 981 if (!matcher.find()) 982 failCount++; 983 matcher.region(3,6); 984 matcher.useAnchoringBounds(false); 985 if (matcher.find()) 986 failCount++; 987 988 // Supplementary character test 989 pattern = Pattern.compile(toSupplementaries("abc")); 990 matcher = pattern.matcher(toSupplementaries("abcdefabc")); 991 matcher.region(0,9*2); 992 if (!matcher.find()) 993 failCount++; 994 if (!matcher.find()) 995 failCount++; 996 matcher.region(0,3*2); 997 if (!matcher.find()) 998 failCount++; 999 matcher.region(1,3*2); 1000 if (matcher.find()) 1001 failCount++; 1002 matcher.region(3*2,6*2); 1003 if (matcher.find()) 1004 failCount++; 1005 matcher.region(0,2*2); 1006 if (matcher.find()) 1007 failCount++; 1008 matcher.region(0,2*2+1); 1009 if (matcher.find()) 1010 failCount++; 1011 1012 expectRegionFail(matcher, 1*2, -1); 1013 expectRegionFail(matcher, -1, -1); 1014 expectRegionFail(matcher, -1, 1*2); 1015 expectRegionFail(matcher, 5*2, 3*2); 1016 expectRegionFail(matcher, 5*2, 12*2); 1017 expectRegionFail(matcher, 12*2, 12*2); 1018 1019 pattern = Pattern.compile(toSupplementaries("^abc$")); 1020 matcher = pattern.matcher(toSupplementaries("zzzabczzz")); 1021 matcher.region(0,9*2); 1022 if (matcher.find()) 1023 failCount++; 1024 matcher.region(3*2,6*2); 1025 if (!matcher.find()) 1026 failCount++; 1027 matcher.region(3*2+1,6*2); 1028 if (matcher.find()) 1029 failCount++; 1030 matcher.region(3*2,6*2-1); 1031 if (matcher.find()) 1032 failCount++; 1033 matcher.region(3*2,6*2); 1034 matcher.useAnchoringBounds(false); 1035 if (matcher.find()) 1036 failCount++; 1037 report("Regions"); 1038 } 1039 1040 private static void expectRegionFail(Matcher matcher, int index1, 1041 int index2) 1042 { 1043 try { 1044 matcher.region(index1, index2); 1045 failCount++; 1046 } catch (IndexOutOfBoundsException ioobe) { 1047 // Correct result 1048 } catch (IllegalStateException ise) { 1049 // Correct result 1050 } 1051 } 1052 1053 // This test is for 4803197 1054 private static void escapedSegmentTest() throws Exception { 1055 1056 Pattern pattern = Pattern.compile("\\Qdir1\\dir2\\E"); 1057 check(pattern, "dir1\\dir2", true); 1058 1059 pattern = Pattern.compile("\\Qdir1\\dir2\\\\E"); 1060 check(pattern, "dir1\\dir2\\", true); 1061 1062 pattern = Pattern.compile("(\\Qdir1\\dir2\\\\E)"); 1063 check(pattern, "dir1\\dir2\\", true); 1064 1065 // Supplementary character test 1066 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E")); 1067 check(pattern, toSupplementaries("dir1\\dir2"), true); 1068 1069 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2")+"\\\\E"); 1070 check(pattern, toSupplementaries("dir1\\dir2\\"), true); 1071 1072 pattern = Pattern.compile(toSupplementaries("(\\Qdir1\\dir2")+"\\\\E)"); 1073 check(pattern, toSupplementaries("dir1\\dir2\\"), true); 1074 1075 report("Escaped segment"); 1076 } 1077 1078 // This test is for 4792284 1079 private static void nonCaptureRepetitionTest() throws Exception { 1080 String input = "abcdefgh;"; 1081 1082 String[] patterns = new String[] { 1083 "(?:\\w{4})+;", 1084 "(?:\\w{8})*;", 1085 "(?:\\w{2}){2,4};", 1086 "(?:\\w{4}){2,};", // only matches the 1087 ".*?(?:\\w{5})+;", // specified minimum 1088 ".*?(?:\\w{9})*;", // number of reps - OK 1089 "(?:\\w{4})+?;", // lazy repetition - OK 1090 "(?:\\w{4})++;", // possessive repetition - OK 1091 "(?:\\w{2,}?)+;", // non-deterministic - OK 1092 "(\\w{4})+;", // capturing group - OK 1093 }; 1094 1095 for (int i = 0; i < patterns.length; i++) { 1096 // Check find() 1097 check(patterns[i], 0, input, input, true); 1098 // Check matches() 1099 Pattern p = Pattern.compile(patterns[i]); 1100 Matcher m = p.matcher(input); 1101 1102 if (m.matches()) { 1103 if (!m.group(0).equals(input)) 1104 failCount++; 1105 } else { 1106 failCount++; 1107 } 1108 } 1109 1110 report("Non capturing repetition"); 1111 } 1112 1113 // This test is for 6358731 1114 private static void notCapturedGroupCurlyMatchTest() throws Exception { 1115 Pattern pattern = Pattern.compile("(abc)+|(abcd)+"); 1116 Matcher matcher = pattern.matcher("abcd"); 1117 if (!matcher.matches() || 1118 matcher.group(1) != null || 1119 !matcher.group(2).equals("abcd")) { 1120 failCount++; 1121 } 1122 report("Not captured GroupCurly"); 1123 } 1124 1125 // This test is for 4706545 1126 private static void javaCharClassTest() throws Exception { 1127 for (int i=0; i<1000; i++) { 1128 char c = (char)generator.nextInt(); 1129 check("{javaLowerCase}", c, Character.isLowerCase(c)); 1130 check("{javaUpperCase}", c, Character.isUpperCase(c)); 1131 check("{javaUpperCase}+", c, Character.isUpperCase(c)); 1132 check("{javaTitleCase}", c, Character.isTitleCase(c)); 1133 check("{javaDigit}", c, Character.isDigit(c)); 1134 check("{javaDefined}", c, Character.isDefined(c)); 1135 check("{javaLetter}", c, Character.isLetter(c)); 1136 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c)); 1137 check("{javaJavaIdentifierStart}", c, 1138 Character.isJavaIdentifierStart(c)); 1139 check("{javaJavaIdentifierPart}", c, 1140 Character.isJavaIdentifierPart(c)); 1141 check("{javaUnicodeIdentifierStart}", c, 1142 Character.isUnicodeIdentifierStart(c)); 1143 check("{javaUnicodeIdentifierPart}", c, 1144 Character.isUnicodeIdentifierPart(c)); 1145 check("{javaIdentifierIgnorable}", c, 1146 Character.isIdentifierIgnorable(c)); 1147 check("{javaSpaceChar}", c, Character.isSpaceChar(c)); 1148 check("{javaWhitespace}", c, Character.isWhitespace(c)); 1149 check("{javaISOControl}", c, Character.isISOControl(c)); 1150 check("{javaMirrored}", c, Character.isMirrored(c)); 1151 1152 } 1153 1154 // Supplementary character test 1155 for (int i=0; i<1000; i++) { 1156 int c = generator.nextInt(Character.MAX_CODE_POINT 1157 - Character.MIN_SUPPLEMENTARY_CODE_POINT) 1158 + Character.MIN_SUPPLEMENTARY_CODE_POINT; 1159 check("{javaLowerCase}", c, Character.isLowerCase(c)); 1160 check("{javaUpperCase}", c, Character.isUpperCase(c)); 1161 check("{javaUpperCase}+", c, Character.isUpperCase(c)); 1162 check("{javaTitleCase}", c, Character.isTitleCase(c)); 1163 check("{javaDigit}", c, Character.isDigit(c)); 1164 check("{javaDefined}", c, Character.isDefined(c)); 1165 check("{javaLetter}", c, Character.isLetter(c)); 1166 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c)); 1167 check("{javaJavaIdentifierStart}", c, 1168 Character.isJavaIdentifierStart(c)); 1169 check("{javaJavaIdentifierPart}", c, 1170 Character.isJavaIdentifierPart(c)); 1171 check("{javaUnicodeIdentifierStart}", c, 1172 Character.isUnicodeIdentifierStart(c)); 1173 check("{javaUnicodeIdentifierPart}", c, 1174 Character.isUnicodeIdentifierPart(c)); 1175 check("{javaIdentifierIgnorable}", c, 1176 Character.isIdentifierIgnorable(c)); 1177 check("{javaSpaceChar}", c, Character.isSpaceChar(c)); 1178 check("{javaWhitespace}", c, Character.isWhitespace(c)); 1179 check("{javaISOControl}", c, Character.isISOControl(c)); 1180 check("{javaMirrored}", c, Character.isMirrored(c)); 1181 } 1182 1183 report("Java character classes"); 1184 } 1185 1186 // This test is for 4523620 1187 /* 1188 private static void numOccurrencesTest() throws Exception { 1189 Pattern pattern = Pattern.compile("aaa"); 1190 1191 if (pattern.numOccurrences("aaaaaa", false) != 2) 1192 failCount++; 1193 if (pattern.numOccurrences("aaaaaa", true) != 4) 1194 failCount++; 1195 1196 pattern = Pattern.compile("^"); 1197 if (pattern.numOccurrences("aaaaaa", false) != 1) 1198 failCount++; 1199 if (pattern.numOccurrences("aaaaaa", true) != 1) 1200 failCount++; 1201 1202 report("Number of Occurrences"); 1203 } 1204 */ 1205 1206 // This test is for 4776374 1207 private static void caretBetweenTerminatorsTest() throws Exception { 1208 int flags1 = Pattern.DOTALL; 1209 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; 1210 int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE; 1211 int flags4 = Pattern.DOTALL | Pattern.MULTILINE; 1212 1213 check("^....", flags1, "test\ntest", "test", true); 1214 check(".....^", flags1, "test\ntest", "test", false); 1215 check(".....^", flags1, "test\n", "test", false); 1216 check("....^", flags1, "test\r\n", "test", false); 1217 1218 check("^....", flags2, "test\ntest", "test", true); 1219 check("....^", flags2, "test\ntest", "test", false); 1220 check(".....^", flags2, "test\n", "test", false); 1221 check("....^", flags2, "test\r\n", "test", false); 1222 1223 check("^....", flags3, "test\ntest", "test", true); 1224 check(".....^", flags3, "test\ntest", "test\n", true); 1225 check(".....^", flags3, "test\u0085test", "test\u0085", false); 1226 check(".....^", flags3, "test\n", "test", false); 1227 check(".....^", flags3, "test\r\n", "test", false); 1228 check("......^", flags3, "test\r\ntest", "test\r\n", true); 1229 1230 check("^....", flags4, "test\ntest", "test", true); 1231 check(".....^", flags3, "test\ntest", "test\n", true); 1232 check(".....^", flags4, "test\u0085test", "test\u0085", true); 1233 check(".....^", flags4, "test\n", "test\n", false); 1234 check(".....^", flags4, "test\r\n", "test\r", false); 1235 1236 // Supplementary character test 1237 String t = toSupplementaries("test"); 1238 check("^....", flags1, t+"\n"+t, t, true); 1239 check(".....^", flags1, t+"\n"+t, t, false); 1240 check(".....^", flags1, t+"\n", t, false); 1241 check("....^", flags1, t+"\r\n", t, false); 1242 1243 check("^....", flags2, t+"\n"+t, t, true); 1244 check("....^", flags2, t+"\n"+t, t, false); 1245 check(".....^", flags2, t+"\n", t, false); 1246 check("....^", flags2, t+"\r\n", t, false); 1247 1248 check("^....", flags3, t+"\n"+t, t, true); 1249 check(".....^", flags3, t+"\n"+t, t+"\n", true); 1250 check(".....^", flags3, t+"\u0085"+t, t+"\u0085", false); 1251 check(".....^", flags3, t+"\n", t, false); 1252 check(".....^", flags3, t+"\r\n", t, false); 1253 check("......^", flags3, t+"\r\n"+t, t+"\r\n", true); 1254 1255 check("^....", flags4, t+"\n"+t, t, true); 1256 check(".....^", flags3, t+"\n"+t, t+"\n", true); 1257 check(".....^", flags4, t+"\u0085"+t, t+"\u0085", true); 1258 check(".....^", flags4, t+"\n", t+"\n", false); 1259 check(".....^", flags4, t+"\r\n", t+"\r", false); 1260 1261 report("Caret between terminators"); 1262 } 1263 1264 // This test is for 4727935 1265 private static void dollarAtEndTest() throws Exception { 1266 int flags1 = Pattern.DOTALL; 1267 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; 1268 int flags3 = Pattern.DOTALL | Pattern.MULTILINE; 1269 1270 check("....$", flags1, "test\n", "test", true); 1271 check("....$", flags1, "test\r\n", "test", true); 1272 check(".....$", flags1, "test\n", "test\n", true); 1273 check(".....$", flags1, "test\u0085", "test\u0085", true); 1274 check("....$", flags1, "test\u0085", "test", true); 1275 1276 check("....$", flags2, "test\n", "test", true); 1277 check(".....$", flags2, "test\n", "test\n", true); 1278 check(".....$", flags2, "test\u0085", "test\u0085", true); 1279 check("....$", flags2, "test\u0085", "est\u0085", true); 1280 1281 check("....$.blah", flags3, "test\nblah", "test\nblah", true); 1282 check(".....$.blah", flags3, "test\n\nblah", "test\n\nblah", true); 1283 check("....$blah", flags3, "test\nblah", "!!!!", false); 1284 check(".....$blah", flags3, "test\nblah", "!!!!", false); 1285 1286 // Supplementary character test 1287 String t = toSupplementaries("test"); 1288 String b = toSupplementaries("blah"); 1289 check("....$", flags1, t+"\n", t, true); 1290 check("....$", flags1, t+"\r\n", t, true); 1291 check(".....$", flags1, t+"\n", t+"\n", true); 1292 check(".....$", flags1, t+"\u0085", t+"\u0085", true); 1293 check("....$", flags1, t+"\u0085", t, true); 1294 1295 check("....$", flags2, t+"\n", t, true); 1296 check(".....$", flags2, t+"\n", t+"\n", true); 1297 check(".....$", flags2, t+"\u0085", t+"\u0085", true); 1298 check("....$", flags2, t+"\u0085", toSupplementaries("est\u0085"), true); 1299 1300 check("....$."+b, flags3, t+"\n"+b, t+"\n"+b, true); 1301 check(".....$."+b, flags3, t+"\n\n"+b, t+"\n\n"+b, true); 1302 check("....$"+b, flags3, t+"\n"+b, "!!!!", false); 1303 check(".....$"+b, flags3, t+"\n"+b, "!!!!", false); 1304 1305 report("Dollar at End"); 1306 } 1307 1308 // This test is for 4711773 1309 private static void multilineDollarTest() throws Exception { 1310 Pattern findCR = Pattern.compile("$", Pattern.MULTILINE); 1311 Matcher matcher = findCR.matcher("first bit\nsecond bit"); 1312 matcher.find(); 1313 if (matcher.start(0) != 9) 1314 failCount++; 1315 matcher.find(); 1316 if (matcher.start(0) != 20) 1317 failCount++; 1318 1319 // Supplementary character test 1320 matcher = findCR.matcher(toSupplementaries("first bit\n second bit")); // double BMP chars 1321 matcher.find(); 1322 if (matcher.start(0) != 9*2) 1323 failCount++; 1324 matcher.find(); 1325 if (matcher.start(0) != 20*2) 1326 failCount++; 1327 1328 report("Multiline Dollar"); 1329 } 1330 1331 private static void reluctantRepetitionTest() throws Exception { 1332 Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2"); 1333 check(p, "1 word word word 2", true); 1334 check(p, "1 wor wo w 2", true); 1335 check(p, "1 word word 2", true); 1336 check(p, "1 word 2", true); 1337 check(p, "1 wo w w 2", true); 1338 check(p, "1 wo w 2", true); 1339 check(p, "1 wor w 2", true); 1340 1341 p = Pattern.compile("([a-z])+?c"); 1342 Matcher m = p.matcher("ababcdefdec"); 1343 check(m, "ababc"); 1344 1345 // Supplementary character test 1346 p = Pattern.compile(toSupplementaries("([a-z])+?c")); 1347 m = p.matcher(toSupplementaries("ababcdefdec")); 1348 check(m, toSupplementaries("ababc")); 1349 1350 report("Reluctant Repetition"); 1351 } 1352 1353 private static void serializeTest() throws Exception { 1354 String patternStr = "(b)"; 1355 String matchStr = "b"; 1356 Pattern pattern = Pattern.compile(patternStr); 1357 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 1358 ObjectOutputStream oos = new ObjectOutputStream(baos); 1359 oos.writeObject(pattern); 1360 oos.close(); 1361 ObjectInputStream ois = new ObjectInputStream( 1362 new ByteArrayInputStream(baos.toByteArray())); 1363 Pattern serializedPattern = (Pattern)ois.readObject(); 1364 ois.close(); 1365 Matcher matcher = serializedPattern.matcher(matchStr); 1366 if (!matcher.matches()) 1367 failCount++; 1368 if (matcher.groupCount() != 1) 1369 failCount++; 1370 1371 report("Serialization"); 1372 } 1373 1374 private static void gTest() { 1375 Pattern pattern = Pattern.compile("\\G\\w"); 1376 Matcher matcher = pattern.matcher("abc#x#x"); 1377 matcher.find(); 1378 matcher.find(); 1379 matcher.find(); 1380 if (matcher.find()) 1381 failCount++; 1382 1383 pattern = Pattern.compile("\\GA*"); 1384 matcher = pattern.matcher("1A2AA3"); 1385 matcher.find(); 1386 if (matcher.find()) 1387 failCount++; 1388 1389 pattern = Pattern.compile("\\GA*"); 1390 matcher = pattern.matcher("1A2AA3"); 1391 if (!matcher.find(1)) 1392 failCount++; 1393 matcher.find(); 1394 if (matcher.find()) 1395 failCount++; 1396 1397 report("\\G"); 1398 } 1399 1400 private static void zTest() { 1401 Pattern pattern = Pattern.compile("foo\\Z"); 1402 // Positives 1403 check(pattern, "foo\u0085", true); 1404 check(pattern, "foo\u2028", true); 1405 check(pattern, "foo\u2029", true); 1406 check(pattern, "foo\n", true); 1407 check(pattern, "foo\r", true); 1408 check(pattern, "foo\r\n", true); 1409 // Negatives 1410 check(pattern, "fooo", false); 1411 check(pattern, "foo\n\r", false); 1412 1413 pattern = Pattern.compile("foo\\Z", Pattern.UNIX_LINES); 1414 // Positives 1415 check(pattern, "foo", true); 1416 check(pattern, "foo\n", true); 1417 // Negatives 1418 check(pattern, "foo\r", false); 1419 check(pattern, "foo\u0085", false); 1420 check(pattern, "foo\u2028", false); 1421 check(pattern, "foo\u2029", false); 1422 1423 report("\\Z"); 1424 } 1425 1426 private static void replaceFirstTest() { 1427 Pattern pattern = Pattern.compile("(ab)(c*)"); 1428 Matcher matcher = pattern.matcher("abccczzzabcczzzabccc"); 1429 if (!matcher.replaceFirst("test").equals("testzzzabcczzzabccc")) 1430 failCount++; 1431 1432 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1433 if (!matcher.replaceFirst("test").equals("zzztestzzzabcczzzabccczzz")) 1434 failCount++; 1435 1436 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1437 String result = matcher.replaceFirst("$1"); 1438 if (!result.equals("zzzabzzzabcczzzabccczzz")) 1439 failCount++; 1440 1441 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1442 result = matcher.replaceFirst("$2"); 1443 if (!result.equals("zzzccczzzabcczzzabccczzz")) 1444 failCount++; 1445 1446 pattern = Pattern.compile("a*"); 1447 matcher = pattern.matcher("aaaaaaaaaa"); 1448 if (!matcher.replaceFirst("test").equals("test")) 1449 failCount++; 1450 1451 pattern = Pattern.compile("a+"); 1452 matcher = pattern.matcher("zzzaaaaaaaaaa"); 1453 if (!matcher.replaceFirst("test").equals("zzztest")) 1454 failCount++; 1455 1456 // Supplementary character test 1457 pattern = Pattern.compile(toSupplementaries("(ab)(c*)")); 1458 matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc")); 1459 if (!matcher.replaceFirst(toSupplementaries("test")) 1460 .equals(toSupplementaries("testzzzabcczzzabccc"))) 1461 failCount++; 1462 1463 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1464 if (!matcher.replaceFirst(toSupplementaries("test")). 1465 equals(toSupplementaries("zzztestzzzabcczzzabccczzz"))) 1466 failCount++; 1467 1468 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1469 result = matcher.replaceFirst("$1"); 1470 if (!result.equals(toSupplementaries("zzzabzzzabcczzzabccczzz"))) 1471 failCount++; 1472 1473 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1474 result = matcher.replaceFirst("$2"); 1475 if (!result.equals(toSupplementaries("zzzccczzzabcczzzabccczzz"))) 1476 failCount++; 1477 1478 pattern = Pattern.compile(toSupplementaries("a*")); 1479 matcher = pattern.matcher(toSupplementaries("aaaaaaaaaa")); 1480 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("test"))) 1481 failCount++; 1482 1483 pattern = Pattern.compile(toSupplementaries("a+")); 1484 matcher = pattern.matcher(toSupplementaries("zzzaaaaaaaaaa")); 1485 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("zzztest"))) 1486 failCount++; 1487 1488 report("Replace First"); 1489 } 1490 1491 private static void unixLinesTest() { 1492 Pattern pattern = Pattern.compile(".*"); 1493 Matcher matcher = pattern.matcher("aa\u2028blah"); 1494 matcher.find(); 1495 if (!matcher.group(0).equals("aa")) 1496 failCount++; 1497 1498 pattern = Pattern.compile(".*", Pattern.UNIX_LINES); 1499 matcher = pattern.matcher("aa\u2028blah"); 1500 matcher.find(); 1501 if (!matcher.group(0).equals("aa\u2028blah")) 1502 failCount++; 1503 1504 pattern = Pattern.compile("[az]$", 1505 Pattern.MULTILINE | Pattern.UNIX_LINES); 1506 matcher = pattern.matcher("aa\u2028zz"); 1507 check(matcher, "a\u2028", false); 1508 1509 // Supplementary character test 1510 pattern = Pattern.compile(".*"); 1511 matcher = pattern.matcher(toSupplementaries("aa\u2028blah")); 1512 matcher.find(); 1513 if (!matcher.group(0).equals(toSupplementaries("aa"))) 1514 failCount++; 1515 1516 pattern = Pattern.compile(".*", Pattern.UNIX_LINES); 1517 matcher = pattern.matcher(toSupplementaries("aa\u2028blah")); 1518 matcher.find(); 1519 if (!matcher.group(0).equals(toSupplementaries("aa\u2028blah"))) 1520 failCount++; 1521 1522 pattern = Pattern.compile(toSupplementaries("[az]$"), 1523 Pattern.MULTILINE | Pattern.UNIX_LINES); 1524 matcher = pattern.matcher(toSupplementaries("aa\u2028zz")); 1525 check(matcher, toSupplementaries("a\u2028"), false); 1526 1527 report("Unix Lines"); 1528 } 1529 1530 private static void commentsTest() { 1531 int flags = Pattern.COMMENTS; 1532 1533 Pattern pattern = Pattern.compile("aa \\# aa", flags); 1534 Matcher matcher = pattern.matcher("aa#aa"); 1535 if (!matcher.matches()) 1536 failCount++; 1537 1538 pattern = Pattern.compile("aa # blah", flags); 1539 matcher = pattern.matcher("aa"); 1540 if (!matcher.matches()) 1541 failCount++; 1542 1543 pattern = Pattern.compile("aa blah", flags); 1544 matcher = pattern.matcher("aablah"); 1545 if (!matcher.matches()) 1546 failCount++; 1547 1548 pattern = Pattern.compile("aa # blah blech ", flags); 1549 matcher = pattern.matcher("aa"); 1550 if (!matcher.matches()) 1551 failCount++; 1552 1553 pattern = Pattern.compile("aa # blah\n ", flags); 1554 matcher = pattern.matcher("aa"); 1555 if (!matcher.matches()) 1556 failCount++; 1557 1558 pattern = Pattern.compile("aa # blah\nbc # blech", flags); 1559 matcher = pattern.matcher("aabc"); 1560 if (!matcher.matches()) 1561 failCount++; 1562 1563 pattern = Pattern.compile("aa # blah\nbc# blech", flags); 1564 matcher = pattern.matcher("aabc"); 1565 if (!matcher.matches()) 1566 failCount++; 1567 1568 pattern = Pattern.compile("aa # blah\nbc\\# blech", flags); 1569 matcher = pattern.matcher("aabc#blech"); 1570 if (!matcher.matches()) 1571 failCount++; 1572 1573 // Supplementary character test 1574 pattern = Pattern.compile(toSupplementaries("aa \\# aa"), flags); 1575 matcher = pattern.matcher(toSupplementaries("aa#aa")); 1576 if (!matcher.matches()) 1577 failCount++; 1578 1579 pattern = Pattern.compile(toSupplementaries("aa # blah"), flags); 1580 matcher = pattern.matcher(toSupplementaries("aa")); 1581 if (!matcher.matches()) 1582 failCount++; 1583 1584 pattern = Pattern.compile(toSupplementaries("aa blah"), flags); 1585 matcher = pattern.matcher(toSupplementaries("aablah")); 1586 if (!matcher.matches()) 1587 failCount++; 1588 1589 pattern = Pattern.compile(toSupplementaries("aa # blah blech "), flags); 1590 matcher = pattern.matcher(toSupplementaries("aa")); 1591 if (!matcher.matches()) 1592 failCount++; 1593 1594 pattern = Pattern.compile(toSupplementaries("aa # blah\n "), flags); 1595 matcher = pattern.matcher(toSupplementaries("aa")); 1596 if (!matcher.matches()) 1597 failCount++; 1598 1599 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc # blech"), flags); 1600 matcher = pattern.matcher(toSupplementaries("aabc")); 1601 if (!matcher.matches()) 1602 failCount++; 1603 1604 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc# blech"), flags); 1605 matcher = pattern.matcher(toSupplementaries("aabc")); 1606 if (!matcher.matches()) 1607 failCount++; 1608 1609 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc\\# blech"), flags); 1610 matcher = pattern.matcher(toSupplementaries("aabc#blech")); 1611 if (!matcher.matches()) 1612 failCount++; 1613 1614 report("Comments"); 1615 } 1616 1617 private static void caseFoldingTest() { // bug 4504687 1618 int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; 1619 Pattern pattern = Pattern.compile("aa", flags); 1620 Matcher matcher = pattern.matcher("ab"); 1621 if (matcher.matches()) 1622 failCount++; 1623 1624 pattern = Pattern.compile("aA", flags); 1625 matcher = pattern.matcher("ab"); 1626 if (matcher.matches()) 1627 failCount++; 1628 1629 pattern = Pattern.compile("aa", flags); 1630 matcher = pattern.matcher("aB"); 1631 if (matcher.matches()) 1632 failCount++; 1633 matcher = pattern.matcher("Ab"); 1634 if (matcher.matches()) 1635 failCount++; 1636 1637 // ASCII "a" 1638 // Latin-1 Supplement "a" + grave 1639 // Cyrillic "a" 1640 String[] patterns = new String[] { 1641 //single 1642 "a", "\u00e0", "\u0430", 1643 //slice 1644 "ab", "\u00e0\u00e1", "\u0430\u0431", 1645 //class single 1646 "[a]", "[\u00e0]", "[\u0430]", 1647 //class range 1648 "[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]", 1649 //back reference 1650 "(a)\\1", "(\u00e0)\\1", "(\u0430)\\1" 1651 }; 1652 1653 String[] texts = new String[] { 1654 "A", "\u00c0", "\u0410", 1655 "AB", "\u00c0\u00c1", "\u0410\u0411", 1656 "A", "\u00c0", "\u0410", 1657 "B", "\u00c2", "\u0411", 1658 "aA", "\u00e0\u00c0", "\u0430\u0410" 1659 }; 1660 1661 boolean[] expected = new boolean[] { 1662 true, false, false, 1663 true, false, false, 1664 true, false, false, 1665 true, false, false, 1666 true, false, false 1667 }; 1668 1669 flags = Pattern.CASE_INSENSITIVE; 1670 for (int i = 0; i < patterns.length; i++) { 1671 pattern = Pattern.compile(patterns[i], flags); 1672 matcher = pattern.matcher(texts[i]); 1673 if (matcher.matches() != expected[i]) { 1674 System.out.println("<1> Failed at " + i); 1675 failCount++; 1676 } 1677 } 1678 1679 flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; 1680 for (int i = 0; i < patterns.length; i++) { 1681 pattern = Pattern.compile(patterns[i], flags); 1682 matcher = pattern.matcher(texts[i]); 1683 if (!matcher.matches()) { 1684 System.out.println("<2> Failed at " + i); 1685 failCount++; 1686 } 1687 } 1688 // flag unicode_case alone should do nothing 1689 flags = Pattern.UNICODE_CASE; 1690 for (int i = 0; i < patterns.length; i++) { 1691 pattern = Pattern.compile(patterns[i], flags); 1692 matcher = pattern.matcher(texts[i]); 1693 if (matcher.matches()) { 1694 System.out.println("<3> Failed at " + i); 1695 failCount++; 1696 } 1697 } 1698 1699 // Special cases: i, I, u+0131 and u+0130 1700 flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE; 1701 pattern = Pattern.compile("[h-j]+", flags); 1702 if (!pattern.matcher("\u0131\u0130").matches()) 1703 failCount++; 1704 report("Case Folding"); 1705 } 1706 1707 private static void appendTest() { 1708 Pattern pattern = Pattern.compile("(ab)(cd)"); 1709 Matcher matcher = pattern.matcher("abcd"); 1710 String result = matcher.replaceAll("$2$1"); 1711 if (!result.equals("cdab")) 1712 failCount++; 1713 1714 String s1 = "Swap all: first = 123, second = 456"; 1715 String s2 = "Swap one: first = 123, second = 456"; 1716 String r = "$3$2$1"; 1717 pattern = Pattern.compile("([a-z]+)( *= *)([0-9]+)"); 1718 matcher = pattern.matcher(s1); 1719 1720 result = matcher.replaceAll(r); 1721 if (!result.equals("Swap all: 123 = first, 456 = second")) 1722 failCount++; 1723 1724 matcher = pattern.matcher(s2); 1725 1726 if (matcher.find()) { 1727 StringBuffer sb = new StringBuffer(); 1728 matcher.appendReplacement(sb, r); 1729 matcher.appendTail(sb); 1730 result = sb.toString(); 1731 if (!result.equals("Swap one: 123 = first, second = 456")) 1732 failCount++; 1733 } 1734 1735 // Supplementary character test 1736 pattern = Pattern.compile(toSupplementaries("(ab)(cd)")); 1737 matcher = pattern.matcher(toSupplementaries("abcd")); 1738 result = matcher.replaceAll("$2$1"); 1739 if (!result.equals(toSupplementaries("cdab"))) 1740 failCount++; 1741 1742 s1 = toSupplementaries("Swap all: first = 123, second = 456"); 1743 s2 = toSupplementaries("Swap one: first = 123, second = 456"); 1744 r = toSupplementaries("$3$2$1"); 1745 pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)")); 1746 matcher = pattern.matcher(s1); 1747 1748 result = matcher.replaceAll(r); 1749 if (!result.equals(toSupplementaries("Swap all: 123 = first, 456 = second"))) 1750 failCount++; 1751 1752 matcher = pattern.matcher(s2); 1753 1754 if (matcher.find()) { 1755 StringBuffer sb = new StringBuffer(); 1756 matcher.appendReplacement(sb, r); 1757 matcher.appendTail(sb); 1758 result = sb.toString(); 1759 if (!result.equals(toSupplementaries("Swap one: 123 = first, second = 456"))) 1760 failCount++; 1761 } 1762 report("Append"); 1763 } 1764 1765 private static void splitTest() { 1766 Pattern pattern = Pattern.compile(":"); 1767 String[] result = pattern.split("foo:and:boo", 2); 1768 if (!result[0].equals("foo")) 1769 failCount++; 1770 if (!result[1].equals("and:boo")) 1771 failCount++; 1772 // Supplementary character test 1773 Pattern patternX = Pattern.compile(toSupplementaries("X")); 1774 result = patternX.split(toSupplementaries("fooXandXboo"), 2); 1775 if (!result[0].equals(toSupplementaries("foo"))) 1776 failCount++; 1777 if (!result[1].equals(toSupplementaries("andXboo"))) 1778 failCount++; 1779 1780 CharBuffer cb = CharBuffer.allocate(100); 1781 cb.put("foo:and:boo"); 1782 cb.flip(); 1783 result = pattern.split(cb); 1784 if (!result[0].equals("foo")) 1785 failCount++; 1786 if (!result[1].equals("and")) 1787 failCount++; 1788 if (!result[2].equals("boo")) 1789 failCount++; 1790 1791 // Supplementary character test 1792 CharBuffer cbs = CharBuffer.allocate(100); 1793 cbs.put(toSupplementaries("fooXandXboo")); 1794 cbs.flip(); 1795 result = patternX.split(cbs); 1796 if (!result[0].equals(toSupplementaries("foo"))) 1797 failCount++; 1798 if (!result[1].equals(toSupplementaries("and"))) 1799 failCount++; 1800 if (!result[2].equals(toSupplementaries("boo"))) 1801 failCount++; 1802 1803 String source = "0123456789"; 1804 for (int limit=-2; limit<3; limit++) { 1805 for (int x=0; x<10; x++) { 1806 result = source.split(Integer.toString(x), limit); 1807 int expectedLength = limit < 1 ? 2 : limit; 1808 1809 if ((limit == 0) && (x == 9)) { 1810 // expected dropping of "" 1811 if (result.length != 1) 1812 failCount++; 1813 if (!result[0].equals("012345678")) { 1814 failCount++; 1815 } 1816 } else { 1817 if (result.length != expectedLength) { 1818 failCount++; 1819 } 1820 if (!result[0].equals(source.substring(0,x))) { 1821 if (limit != 1) { 1822 failCount++; 1823 } else { 1824 if (!result[0].equals(source.substring(0,10))) { 1825 failCount++; 1826 } 1827 } 1828 } 1829 if (expectedLength > 1) { // Check segment 2 1830 if (!result[1].equals(source.substring(x+1,10))) 1831 failCount++; 1832 } 1833 } 1834 } 1835 } 1836 // Check the case for no match found 1837 for (int limit=-2; limit<3; limit++) { 1838 result = source.split("e", limit); 1839 if (result.length != 1) 1840 failCount++; 1841 if (!result[0].equals(source)) 1842 failCount++; 1843 } 1844 // Check the case for limit == 0, source = ""; 1845 // split() now returns 0-length for empty source "" see #6559590 1846 source = ""; 1847 result = source.split("e", 0); 1848 if (result.length != 1) 1849 failCount++; 1850 if (!result[0].equals(source)) 1851 failCount++; 1852 1853 // Check both split() and splitAsStraem(), especially for zero-lenth 1854 // input and zero-lenth match cases 1855 String[][] input = new String[][] { 1856 { " ", "Abc Efg Hij" }, // normal non-zero-match 1857 { " ", " Abc Efg Hij" }, // leading empty str for non-zero-match 1858 { " ", "Abc Efg Hij" }, // non-zero-match in the middle 1859 { "(?=\\p{Lu})", "AbcEfgHij" }, // no leading empty str for zero-match 1860 { "(?=\\p{Lu})", "AbcEfg" }, 1861 { "(?=\\p{Lu})", "Abc" }, 1862 { " ", "" }, // zero-length input 1863 { ".*", "" }, 1864 1865 // some tests from PatternStreamTest.java 1866 { "4", "awgqwefg1fefw4vssv1vvv1" }, 1867 { "\u00a3a", "afbfq\u00a3abgwgb\u00a3awngnwggw\u00a3a\u00a3ahjrnhneerh" }, 1868 { "1", "awgqwefg1fefw4vssv1vvv1" }, 1869 { "1", "a\u4ebafg1fefw\u4eba4\u9f9cvssv\u9f9c1v\u672c\u672cvv" }, 1870 { "\u56da", "1\u56da23\u56da456\u56da7890" }, 1871 { "\u56da", "1\u56da23\u9f9c\u672c\u672c\u56da456\u56da\u9f9c\u672c7890" }, 1872 { "\u56da", "" }, 1873 { "[ \t,:.]","This is,testing: with\tdifferent separators." }, //multiple septs 1874 { "o", "boo:and:foo" }, 1875 { "o", "booooo:and:fooooo" }, 1876 { "o", "fooooo:" }, 1877 }; 1878 1879 String[][] expected = new String[][] { 1880 { "Abc", "Efg", "Hij" }, 1881 { "", "Abc", "Efg", "Hij" }, 1882 { "Abc", "", "Efg", "Hij" }, 1883 { "Abc", "Efg", "Hij" }, 1884 { "Abc", "Efg" }, 1885 { "Abc" }, 1886 { "" }, 1887 { "" }, 1888 1889 { "awgqwefg1fefw", "vssv1vvv1" }, 1890 { "afbfq", "bgwgb", "wngnwggw", "", "hjrnhneerh" }, 1891 { "awgqwefg", "fefw4vssv", "vvv" }, 1892 { "a\u4ebafg", "fefw\u4eba4\u9f9cvssv\u9f9c", "v\u672c\u672cvv" }, 1893 { "1", "23", "456", "7890" }, 1894 { "1", "23\u9f9c\u672c\u672c", "456", "\u9f9c\u672c7890" }, 1895 { "" }, 1896 { "This", "is", "testing", "", "with", "different", "separators" }, 1897 { "b", "", ":and:f" }, 1898 { "b", "", "", "", "", ":and:f" }, 1899 { "f", "", "", "", "", ":" }, 1900 }; 1901 for (int i = 0; i < input.length; i++) { 1902 pattern = Pattern.compile(input[i][0]); 1903 if (!Arrays.equals(pattern.split(input[i][1]), expected[i])) { 1904 failCount++; 1905 } 1906 if (input[i][1].length() > 0 && // splitAsStream() return empty resulting 1907 // array for zero-length input for now 1908 !Arrays.equals(pattern.splitAsStream(input[i][1]).toArray(), 1909 expected[i])) { 1910 failCount++; 1911 } 1912 } 1913 report("Split"); 1914 } 1915 1916 private static void negationTest() { 1917 Pattern pattern = Pattern.compile("[\\[@^]+"); 1918 Matcher matcher = pattern.matcher("@@@@[[[[^^^^"); 1919 if (!matcher.find()) 1920 failCount++; 1921 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1922 failCount++; 1923 pattern = Pattern.compile("[@\\[^]+"); 1924 matcher = pattern.matcher("@@@@[[[[^^^^"); 1925 if (!matcher.find()) 1926 failCount++; 1927 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1928 failCount++; 1929 pattern = Pattern.compile("[@\\[^@]+"); 1930 matcher = pattern.matcher("@@@@[[[[^^^^"); 1931 if (!matcher.find()) 1932 failCount++; 1933 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1934 failCount++; 1935 1936 pattern = Pattern.compile("\\)"); 1937 matcher = pattern.matcher("xxx)xxx"); 1938 if (!matcher.find()) 1939 failCount++; 1940 1941 report("Negation"); 1942 } 1943 1944 private static void ampersandTest() { 1945 Pattern pattern = Pattern.compile("[&@]+"); 1946 check(pattern, "@@@@&&&&", true); 1947 1948 pattern = Pattern.compile("[@&]+"); 1949 check(pattern, "@@@@&&&&", true); 1950 1951 pattern = Pattern.compile("[@\\&]+"); 1952 check(pattern, "@@@@&&&&", true); 1953 1954 report("Ampersand"); 1955 } 1956 1957 private static void octalTest() throws Exception { 1958 Pattern pattern = Pattern.compile("\\u0007"); 1959 Matcher matcher = pattern.matcher("\u0007"); 1960 if (!matcher.matches()) 1961 failCount++; 1962 pattern = Pattern.compile("\\07"); 1963 matcher = pattern.matcher("\u0007"); 1964 if (!matcher.matches()) 1965 failCount++; 1966 pattern = Pattern.compile("\\007"); 1967 matcher = pattern.matcher("\u0007"); 1968 if (!matcher.matches()) 1969 failCount++; 1970 pattern = Pattern.compile("\\0007"); 1971 matcher = pattern.matcher("\u0007"); 1972 if (!matcher.matches()) 1973 failCount++; 1974 pattern = Pattern.compile("\\040"); 1975 matcher = pattern.matcher("\u0020"); 1976 if (!matcher.matches()) 1977 failCount++; 1978 pattern = Pattern.compile("\\0403"); 1979 matcher = pattern.matcher("\u00203"); 1980 if (!matcher.matches()) 1981 failCount++; 1982 pattern = Pattern.compile("\\0103"); 1983 matcher = pattern.matcher("\u0043"); 1984 if (!matcher.matches()) 1985 failCount++; 1986 1987 report("Octal"); 1988 } 1989 1990 private static void longPatternTest() throws Exception { 1991 try { 1992 Pattern pattern = Pattern.compile( 1993 "a 32-character-long pattern xxxx"); 1994 pattern = Pattern.compile("a 33-character-long pattern xxxxx"); 1995 pattern = Pattern.compile("a thirty four character long regex"); 1996 StringBuffer patternToBe = new StringBuffer(101); 1997 for (int i=0; i<100; i++) 1998 patternToBe.append((char)(97 + i%26)); 1999 pattern = Pattern.compile(patternToBe.toString()); 2000 } catch (PatternSyntaxException e) { 2001 failCount++; 2002 } 2003 2004 // Supplementary character test 2005 try { 2006 Pattern pattern = Pattern.compile( 2007 toSupplementaries("a 32-character-long pattern xxxx")); 2008 pattern = Pattern.compile(toSupplementaries("a 33-character-long pattern xxxxx")); 2009 pattern = Pattern.compile(toSupplementaries("a thirty four character long regex")); 2010 StringBuffer patternToBe = new StringBuffer(101*2); 2011 for (int i=0; i<100; i++) 2012 patternToBe.append(Character.toChars(Character.MIN_SUPPLEMENTARY_CODE_POINT 2013 + 97 + i%26)); 2014 pattern = Pattern.compile(patternToBe.toString()); 2015 } catch (PatternSyntaxException e) { 2016 failCount++; 2017 } 2018 report("LongPattern"); 2019 } 2020 2021 private static void group0Test() throws Exception { 2022 Pattern pattern = Pattern.compile("(tes)ting"); 2023 Matcher matcher = pattern.matcher("testing"); 2024 check(matcher, "testing"); 2025 2026 matcher.reset("testing"); 2027 if (matcher.lookingAt()) { 2028 if (!matcher.group(0).equals("testing")) 2029 failCount++; 2030 } else { 2031 failCount++; 2032 } 2033 2034 matcher.reset("testing"); 2035 if (matcher.matches()) { 2036 if (!matcher.group(0).equals("testing")) 2037 failCount++; 2038 } else { 2039 failCount++; 2040 } 2041 2042 pattern = Pattern.compile("(tes)ting"); 2043 matcher = pattern.matcher("testing"); 2044 if (matcher.lookingAt()) { 2045 if (!matcher.group(0).equals("testing")) 2046 failCount++; 2047 } else { 2048 failCount++; 2049 } 2050 2051 pattern = Pattern.compile("^(tes)ting"); 2052 matcher = pattern.matcher("testing"); 2053 if (matcher.matches()) { 2054 if (!matcher.group(0).equals("testing")) 2055 failCount++; 2056 } else { 2057 failCount++; 2058 } 2059 2060 // Supplementary character test 2061 pattern = Pattern.compile(toSupplementaries("(tes)ting")); 2062 matcher = pattern.matcher(toSupplementaries("testing")); 2063 check(matcher, toSupplementaries("testing")); 2064 2065 matcher.reset(toSupplementaries("testing")); 2066 if (matcher.lookingAt()) { 2067 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2068 failCount++; 2069 } else { 2070 failCount++; 2071 } 2072 2073 matcher.reset(toSupplementaries("testing")); 2074 if (matcher.matches()) { 2075 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2076 failCount++; 2077 } else { 2078 failCount++; 2079 } 2080 2081 pattern = Pattern.compile(toSupplementaries("(tes)ting")); 2082 matcher = pattern.matcher(toSupplementaries("testing")); 2083 if (matcher.lookingAt()) { 2084 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2085 failCount++; 2086 } else { 2087 failCount++; 2088 } 2089 2090 pattern = Pattern.compile(toSupplementaries("^(tes)ting")); 2091 matcher = pattern.matcher(toSupplementaries("testing")); 2092 if (matcher.matches()) { 2093 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2094 failCount++; 2095 } else { 2096 failCount++; 2097 } 2098 2099 report("Group0"); 2100 } 2101 2102 private static void findIntTest() throws Exception { 2103 Pattern p = Pattern.compile("blah"); 2104 Matcher m = p.matcher("zzzzblahzzzzzblah"); 2105 boolean result = m.find(2); 2106 if (!result) 2107 failCount++; 2108 2109 p = Pattern.compile("$"); 2110 m = p.matcher("1234567890"); 2111 result = m.find(10); 2112 if (!result) 2113 failCount++; 2114 try { 2115 result = m.find(11); 2116 failCount++; 2117 } catch (IndexOutOfBoundsException e) { 2118 // correct result 2119 } 2120 2121 // Supplementary character test 2122 p = Pattern.compile(toSupplementaries("blah")); 2123 m = p.matcher(toSupplementaries("zzzzblahzzzzzblah")); 2124 result = m.find(2); 2125 if (!result) 2126 failCount++; 2127 2128 report("FindInt"); 2129 } 2130 2131 private static void emptyPatternTest() throws Exception { 2132 Pattern p = Pattern.compile(""); 2133 Matcher m = p.matcher("foo"); 2134 2135 // Should find empty pattern at beginning of input 2136 boolean result = m.find(); 2137 if (result != true) 2138 failCount++; 2139 if (m.start() != 0) 2140 failCount++; 2141 2142 // Should not match entire input if input is not empty 2143 m.reset(); 2144 result = m.matches(); 2145 if (result == true) 2146 failCount++; 2147 2148 try { 2149 m.start(0); 2150 failCount++; 2151 } catch (IllegalStateException e) { 2152 // Correct result 2153 } 2154 2155 // Should match entire input if input is empty 2156 m.reset(""); 2157 result = m.matches(); 2158 if (result != true) 2159 failCount++; 2160 2161 result = Pattern.matches("", ""); 2162 if (result != true) 2163 failCount++; 2164 2165 result = Pattern.matches("", "foo"); 2166 if (result == true) 2167 failCount++; 2168 report("EmptyPattern"); 2169 } 2170 2171 private static void charClassTest() throws Exception { 2172 Pattern pattern = Pattern.compile("blah[ab]]blech"); 2173 check(pattern, "blahb]blech", true); 2174 2175 pattern = Pattern.compile("[abc[def]]"); 2176 check(pattern, "b", true); 2177 2178 // Supplementary character tests 2179 pattern = Pattern.compile(toSupplementaries("blah[ab]]blech")); 2180 check(pattern, toSupplementaries("blahb]blech"), true); 2181 2182 pattern = Pattern.compile(toSupplementaries("[abc[def]]")); 2183 check(pattern, toSupplementaries("b"), true); 2184 2185 try { 2186 // u00ff when UNICODE_CASE 2187 pattern = Pattern.compile("[ab\u00ffcd]", 2188 Pattern.CASE_INSENSITIVE| 2189 Pattern.UNICODE_CASE); 2190 check(pattern, "ab\u00ffcd", true); 2191 check(pattern, "Ab\u0178Cd", true); 2192 2193 // u00b5 when UNICODE_CASE 2194 pattern = Pattern.compile("[ab\u00b5cd]", 2195 Pattern.CASE_INSENSITIVE| 2196 Pattern.UNICODE_CASE); 2197 check(pattern, "ab\u00b5cd", true); 2198 check(pattern, "Ab\u039cCd", true); 2199 } catch (Exception e) { failCount++; } 2200 2201 /* Special cases 2202 (1)LatinSmallLetterLongS u+017f 2203 (2)LatinSmallLetterDotlessI u+0131 2204 (3)LatineCapitalLetterIWithDotAbove u+0130 2205 (4)KelvinSign u+212a 2206 (5)AngstromSign u+212b 2207 */ 2208 int flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE; 2209 pattern = Pattern.compile("[sik\u00c5]+", flags); 2210 if (!pattern.matcher("\u017f\u0130\u0131\u212a\u212b").matches()) 2211 failCount++; 2212 2213 report("CharClass"); 2214 } 2215 2216 private static void caretTest() throws Exception { 2217 Pattern pattern = Pattern.compile("\\w*"); 2218 Matcher matcher = pattern.matcher("a#bc#def##g"); 2219 check(matcher, "a"); 2220 check(matcher, ""); 2221 check(matcher, "bc"); 2222 check(matcher, ""); 2223 check(matcher, "def"); 2224 check(matcher, ""); 2225 check(matcher, ""); 2226 check(matcher, "g"); 2227 check(matcher, ""); 2228 if (matcher.find()) 2229 failCount++; 2230 2231 pattern = Pattern.compile("^\\w*"); 2232 matcher = pattern.matcher("a#bc#def##g"); 2233 check(matcher, "a"); 2234 if (matcher.find()) 2235 failCount++; 2236 2237 pattern = Pattern.compile("\\w"); 2238 matcher = pattern.matcher("abc##x"); 2239 check(matcher, "a"); 2240 check(matcher, "b"); 2241 check(matcher, "c"); 2242 check(matcher, "x"); 2243 if (matcher.find()) 2244 failCount++; 2245 2246 pattern = Pattern.compile("^\\w"); 2247 matcher = pattern.matcher("abc##x"); 2248 check(matcher, "a"); 2249 if (matcher.find()) 2250 failCount++; 2251 2252 pattern = Pattern.compile("\\A\\p{Alpha}{3}"); 2253 matcher = pattern.matcher("abcdef-ghi\njklmno"); 2254 check(matcher, "abc"); 2255 if (matcher.find()) 2256 failCount++; 2257 2258 pattern = Pattern.compile("^\\p{Alpha}{3}", Pattern.MULTILINE); 2259 matcher = pattern.matcher("abcdef-ghi\njklmno"); 2260 check(matcher, "abc"); 2261 check(matcher, "jkl"); 2262 if (matcher.find()) 2263 failCount++; 2264 2265 pattern = Pattern.compile("^", Pattern.MULTILINE); 2266 matcher = pattern.matcher("this is some text"); 2267 String result = matcher.replaceAll("X"); 2268 if (!result.equals("Xthis is some text")) 2269 failCount++; 2270 2271 pattern = Pattern.compile("^"); 2272 matcher = pattern.matcher("this is some text"); 2273 result = matcher.replaceAll("X"); 2274 if (!result.equals("Xthis is some text")) 2275 failCount++; 2276 2277 pattern = Pattern.compile("^", Pattern.MULTILINE | Pattern.UNIX_LINES); 2278 matcher = pattern.matcher("this is some text\n"); 2279 result = matcher.replaceAll("X"); 2280 if (!result.equals("Xthis is some text\n")) 2281 failCount++; 2282 2283 report("Caret"); 2284 } 2285 2286 private static void groupCaptureTest() throws Exception { 2287 // Independent group 2288 Pattern pattern = Pattern.compile("x+(?>y+)z+"); 2289 Matcher matcher = pattern.matcher("xxxyyyzzz"); 2290 matcher.find(); 2291 try { 2292 String blah = matcher.group(1); 2293 failCount++; 2294 } catch (IndexOutOfBoundsException ioobe) { 2295 // Good result 2296 } 2297 // Pure group 2298 pattern = Pattern.compile("x+(?:y+)z+"); 2299 matcher = pattern.matcher("xxxyyyzzz"); 2300 matcher.find(); 2301 try { 2302 String blah = matcher.group(1); 2303 failCount++; 2304 } catch (IndexOutOfBoundsException ioobe) { 2305 // Good result 2306 } 2307 2308 // Supplementary character tests 2309 // Independent group 2310 pattern = Pattern.compile(toSupplementaries("x+(?>y+)z+")); 2311 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz")); 2312 matcher.find(); 2313 try { 2314 String blah = matcher.group(1); 2315 failCount++; 2316 } catch (IndexOutOfBoundsException ioobe) { 2317 // Good result 2318 } 2319 // Pure group 2320 pattern = Pattern.compile(toSupplementaries("x+(?:y+)z+")); 2321 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz")); 2322 matcher.find(); 2323 try { 2324 String blah = matcher.group(1); 2325 failCount++; 2326 } catch (IndexOutOfBoundsException ioobe) { 2327 // Good result 2328 } 2329 2330 report("GroupCapture"); 2331 } 2332 2333 private static void backRefTest() throws Exception { 2334 Pattern pattern = Pattern.compile("(a*)bc\\1"); 2335 check(pattern, "zzzaabcazzz", true); 2336 2337 pattern = Pattern.compile("(a*)bc\\1"); 2338 check(pattern, "zzzaabcaazzz", true); 2339 2340 pattern = Pattern.compile("(abc)(def)\\1"); 2341 check(pattern, "abcdefabc", true); 2342 2343 pattern = Pattern.compile("(abc)(def)\\3"); 2344 check(pattern, "abcdefabc", false); 2345 2346 try { 2347 for (int i = 1; i < 10; i++) { 2348 // Make sure backref 1-9 are always accepted 2349 pattern = Pattern.compile("abcdef\\" + i); 2350 // and fail to match if the target group does not exit 2351 check(pattern, "abcdef", false); 2352 } 2353 } catch(PatternSyntaxException e) { 2354 failCount++; 2355 } 2356 2357 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"); 2358 check(pattern, "abcdefghija", false); 2359 check(pattern, "abcdefghija1", true); 2360 2361 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"); 2362 check(pattern, "abcdefghijkk", true); 2363 2364 pattern = Pattern.compile("(a)bcdefghij\\11"); 2365 check(pattern, "abcdefghija1", true); 2366 2367 // Supplementary character tests 2368 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1")); 2369 check(pattern, toSupplementaries("zzzaabcazzz"), true); 2370 2371 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1")); 2372 check(pattern, toSupplementaries("zzzaabcaazzz"), true); 2373 2374 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\1")); 2375 check(pattern, toSupplementaries("abcdefabc"), true); 2376 2377 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\3")); 2378 check(pattern, toSupplementaries("abcdefabc"), false); 2379 2380 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11")); 2381 check(pattern, toSupplementaries("abcdefghija"), false); 2382 check(pattern, toSupplementaries("abcdefghija1"), true); 2383 2384 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11")); 2385 check(pattern, toSupplementaries("abcdefghijkk"), true); 2386 2387 report("BackRef"); 2388 } 2389 2390 /** 2391 * Unicode Technical Report #18, section 2.6 End of Line 2392 * There is no empty line to be matched in the sequence \u000D\u000A 2393 * but there is an empty line in the sequence \u000A\u000D. 2394 */ 2395 private static void anchorTest() throws Exception { 2396 Pattern p = Pattern.compile("^.*$", Pattern.MULTILINE); 2397 Matcher m = p.matcher("blah1\r\nblah2"); 2398 m.find(); 2399 m.find(); 2400 if (!m.group().equals("blah2")) 2401 failCount++; 2402 2403 m.reset("blah1\n\rblah2"); 2404 m.find(); 2405 m.find(); 2406 m.find(); 2407 if (!m.group().equals("blah2")) 2408 failCount++; 2409 2410 // Test behavior of $ with \r\n at end of input 2411 p = Pattern.compile(".+$"); 2412 m = p.matcher("blah1\r\n"); 2413 if (!m.find()) 2414 failCount++; 2415 if (!m.group().equals("blah1")) 2416 failCount++; 2417 if (m.find()) 2418 failCount++; 2419 2420 // Test behavior of $ with \r\n at end of input in multiline 2421 p = Pattern.compile(".+$", Pattern.MULTILINE); 2422 m = p.matcher("blah1\r\n"); 2423 if (!m.find()) 2424 failCount++; 2425 if (m.find()) 2426 failCount++; 2427 2428 // Test for $ recognition of \u0085 for bug 4527731 2429 p = Pattern.compile(".+$", Pattern.MULTILINE); 2430 m = p.matcher("blah1\u0085"); 2431 if (!m.find()) 2432 failCount++; 2433 2434 // Supplementary character test 2435 p = Pattern.compile("^.*$", Pattern.MULTILINE); 2436 m = p.matcher(toSupplementaries("blah1\r\nblah2")); 2437 m.find(); 2438 m.find(); 2439 if (!m.group().equals(toSupplementaries("blah2"))) 2440 failCount++; 2441 2442 m.reset(toSupplementaries("blah1\n\rblah2")); 2443 m.find(); 2444 m.find(); 2445 m.find(); 2446 if (!m.group().equals(toSupplementaries("blah2"))) 2447 failCount++; 2448 2449 // Test behavior of $ with \r\n at end of input 2450 p = Pattern.compile(".+$"); 2451 m = p.matcher(toSupplementaries("blah1\r\n")); 2452 if (!m.find()) 2453 failCount++; 2454 if (!m.group().equals(toSupplementaries("blah1"))) 2455 failCount++; 2456 if (m.find()) 2457 failCount++; 2458 2459 // Test behavior of $ with \r\n at end of input in multiline 2460 p = Pattern.compile(".+$", Pattern.MULTILINE); 2461 m = p.matcher(toSupplementaries("blah1\r\n")); 2462 if (!m.find()) 2463 failCount++; 2464 if (m.find()) 2465 failCount++; 2466 2467 // Test for $ recognition of \u0085 for bug 4527731 2468 p = Pattern.compile(".+$", Pattern.MULTILINE); 2469 m = p.matcher(toSupplementaries("blah1\u0085")); 2470 if (!m.find()) 2471 failCount++; 2472 2473 report("Anchors"); 2474 } 2475 2476 /** 2477 * A basic sanity test of Matcher.lookingAt(). 2478 */ 2479 private static void lookingAtTest() throws Exception { 2480 Pattern p = Pattern.compile("(ab)(c*)"); 2481 Matcher m = p.matcher("abccczzzabcczzzabccc"); 2482 2483 if (!m.lookingAt()) 2484 failCount++; 2485 2486 if (!m.group().equals(m.group(0))) 2487 failCount++; 2488 2489 m = p.matcher("zzzabccczzzabcczzzabccczzz"); 2490 if (m.lookingAt()) 2491 failCount++; 2492 2493 // Supplementary character test 2494 p = Pattern.compile(toSupplementaries("(ab)(c*)")); 2495 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc")); 2496 2497 if (!m.lookingAt()) 2498 failCount++; 2499 2500 if (!m.group().equals(m.group(0))) 2501 failCount++; 2502 2503 m = p.matcher(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2504 if (m.lookingAt()) 2505 failCount++; 2506 2507 report("Looking At"); 2508 } 2509 2510 /** 2511 * A basic sanity test of Matcher.matches(). 2512 */ 2513 private static void matchesTest() throws Exception { 2514 // matches() 2515 Pattern p = Pattern.compile("ulb(c*)"); 2516 Matcher m = p.matcher("ulbcccccc"); 2517 if (!m.matches()) 2518 failCount++; 2519 2520 // find() but not matches() 2521 m.reset("zzzulbcccccc"); 2522 if (m.matches()) 2523 failCount++; 2524 2525 // lookingAt() but not matches() 2526 m.reset("ulbccccccdef"); 2527 if (m.matches()) 2528 failCount++; 2529 2530 // matches() 2531 p = Pattern.compile("a|ad"); 2532 m = p.matcher("ad"); 2533 if (!m.matches()) 2534 failCount++; 2535 2536 // Supplementary character test 2537 // matches() 2538 p = Pattern.compile(toSupplementaries("ulb(c*)")); 2539 m = p.matcher(toSupplementaries("ulbcccccc")); 2540 if (!m.matches()) 2541 failCount++; 2542 2543 // find() but not matches() 2544 m.reset(toSupplementaries("zzzulbcccccc")); 2545 if (m.matches()) 2546 failCount++; 2547 2548 // lookingAt() but not matches() 2549 m.reset(toSupplementaries("ulbccccccdef")); 2550 if (m.matches()) 2551 failCount++; 2552 2553 // matches() 2554 p = Pattern.compile(toSupplementaries("a|ad")); 2555 m = p.matcher(toSupplementaries("ad")); 2556 if (!m.matches()) 2557 failCount++; 2558 2559 report("Matches"); 2560 } 2561 2562 /** 2563 * A basic sanity test of Pattern.matches(). 2564 */ 2565 private static void patternMatchesTest() throws Exception { 2566 // matches() 2567 if (!Pattern.matches(toSupplementaries("ulb(c*)"), 2568 toSupplementaries("ulbcccccc"))) 2569 failCount++; 2570 2571 // find() but not matches() 2572 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2573 toSupplementaries("zzzulbcccccc"))) 2574 failCount++; 2575 2576 // lookingAt() but not matches() 2577 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2578 toSupplementaries("ulbccccccdef"))) 2579 failCount++; 2580 2581 // Supplementary character test 2582 // matches() 2583 if (!Pattern.matches(toSupplementaries("ulb(c*)"), 2584 toSupplementaries("ulbcccccc"))) 2585 failCount++; 2586 2587 // find() but not matches() 2588 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2589 toSupplementaries("zzzulbcccccc"))) 2590 failCount++; 2591 2592 // lookingAt() but not matches() 2593 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2594 toSupplementaries("ulbccccccdef"))) 2595 failCount++; 2596 2597 report("Pattern Matches"); 2598 } 2599 2600 /** 2601 * Canonical equivalence testing. Tests the ability of the engine 2602 * to match sequences that are not explicitly specified in the 2603 * pattern when they are considered equivalent by the Unicode Standard. 2604 */ 2605 private static void ceTest() throws Exception { 2606 // Decomposed char outside char classes 2607 Pattern p = Pattern.compile("testa\u030a", Pattern.CANON_EQ); 2608 Matcher m = p.matcher("test\u00e5"); 2609 if (!m.matches()) 2610 failCount++; 2611 2612 m.reset("testa\u030a"); 2613 if (!m.matches()) 2614 failCount++; 2615 2616 // Composed char outside char classes 2617 p = Pattern.compile("test\u00e5", Pattern.CANON_EQ); 2618 m = p.matcher("test\u00e5"); 2619 if (!m.matches()) 2620 failCount++; 2621 2622 m.reset("testa\u030a"); 2623 if (!m.find()) 2624 failCount++; 2625 2626 // Decomposed char inside a char class 2627 p = Pattern.compile("test[abca\u030a]", Pattern.CANON_EQ); 2628 m = p.matcher("test\u00e5"); 2629 if (!m.find()) 2630 failCount++; 2631 2632 m.reset("testa\u030a"); 2633 if (!m.find()) 2634 failCount++; 2635 2636 // Composed char inside a char class 2637 p = Pattern.compile("test[abc\u00e5def\u00e0]", Pattern.CANON_EQ); 2638 m = p.matcher("test\u00e5"); 2639 if (!m.find()) 2640 failCount++; 2641 2642 m.reset("testa\u0300"); 2643 if (!m.find()) 2644 failCount++; 2645 2646 m.reset("testa\u030a"); 2647 if (!m.find()) 2648 failCount++; 2649 2650 // Marks that cannot legally change order and be equivalent 2651 p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ); 2652 check(p, "testa\u0308\u0300", true); 2653 check(p, "testa\u0300\u0308", false); 2654 2655 // Marks that can legally change order and be equivalent 2656 p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ); 2657 check(p, "testa\u0308\u0323", true); 2658 check(p, "testa\u0323\u0308", true); 2659 2660 // Test all equivalences of the sequence a\u0308\u0323\u0300 2661 p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ); 2662 check(p, "testa\u0308\u0323\u0300", true); 2663 check(p, "testa\u0323\u0308\u0300", true); 2664 check(p, "testa\u0308\u0300\u0323", true); 2665 check(p, "test\u00e4\u0323\u0300", true); 2666 check(p, "test\u00e4\u0300\u0323", true); 2667 2668 Object[][] data = new Object[][] { 2669 2670 // JDK-4867170 2671 { "[\u1f80-\u1f82]", "ab\u1f80cd", "f", true }, 2672 { "[\u1f80-\u1f82]", "ab\u1f81cd", "f", true }, 2673 { "[\u1f80-\u1f82]", "ab\u1f82cd", "f", true }, 2674 { "[\u1f80-\u1f82]", "ab\u03b1\u0314\u0345cd", "f", true }, 2675 { "[\u1f80-\u1f82]", "ab\u03b1\u0345\u0314cd", "f", true }, 2676 { "[\u1f80-\u1f82]", "ab\u1f01\u0345cd", "f", true }, 2677 { "[\u1f80-\u1f82]", "ab\u1f00\u0345cd", "f", true }, 2678 2679 { "\\p{IsGreek}", "ab\u1f80cd", "f", true }, 2680 { "\\p{IsGreek}", "ab\u1f81cd", "f", true }, 2681 { "\\p{IsGreek}", "ab\u1f82cd", "f", true }, 2682 { "\\p{IsGreek}", "ab\u03b1\u0314\u0345cd", "f", true }, 2683 { "\\p{IsGreek}", "ab\u1f01\u0345cd", "f", true }, 2684 2685 // backtracking, force to match "\u1f80", instead of \u1f82" 2686 { "ab\\p{IsGreek}\u0300cd", "ab\u03b1\u0313\u0345\u0300cd", "m", true }, 2687 2688 { "[\\p{IsGreek}]", "\u03b1\u0314\u0345", "m", true }, 2689 { "\\p{IsGreek}", "\u03b1\u0314\u0345", "m", true }, 2690 2691 { "[^\u1f80-\u1f82]","\u1f81", "m", false }, 2692 { "[^\u1f80-\u1f82]","\u03b1\u0314\u0345", "m", false }, 2693 { "[^\u1f01\u0345]", "\u1f81", "f", false }, 2694 2695 { "[^\u1f81]+", "\u1f80\u1f82", "f", true }, 2696 { "[\u1f80]", "ab\u1f80cd", "f", true }, 2697 { "\u1f80", "ab\u1f80cd", "f", true }, 2698 { "\u1f00\u0345\u0300", "\u1f82", "m", true }, 2699 { "\u1f80", "-\u1f00\u0345\u0300-", "f", true }, 2700 { "\u1f82", "\u1f00\u0345\u0300", "m", true }, 2701 { "\u1f82", "\u1f80\u0300", "m", true }, 2702 2703 // JDK-7080302 # compile failed 2704 { "a(\u0041\u0301\u0328)", "a\u0041\u0301\u0328", "m", true}, 2705 2706 // JDK-6728861, same cause as above one 2707 { "\u00e9\u00e9n", "e\u0301e\u0301n", "m", true}, 2708 2709 // JDK-6995635 2710 { "(\u00e9)", "e\u0301", "m", true }, 2711 2712 // JDK-6736245 2713 // intereting special case, nfc(u2add+u0338) -> u2add+u0338) NOT u2adc 2714 { "\u2ADC", "\u2ADC", "m", true}, // NFC 2715 { "\u2ADC", "\u2ADD\u0338", "m", true}, // NFD 2716 2717 // 4916384. 2718 // Decomposed hangul (jamos) works inside clazz 2719 { "[\u1100\u1161]", "\u1100\u1161", "m", true}, 2720 { "[\u1100\u1161]", "\uac00", "m", true}, 2721 2722 { "[\uac00]", "\u1100\u1161", "m", true}, 2723 { "[\uac00]", "\uac00", "m", true}, 2724 2725 // Decomposed hangul (jamos) 2726 { "\u1100\u1161", "\u1100\u1161", "m", true}, 2727 { "\u1100\u1161", "\uac00", "m", true}, 2728 2729 // Composed hangul 2730 { "\uac00", "\u1100\u1161", "m", true }, 2731 { "\uac00", "\uac00", "m", true }, 2732 2733 /* Need a NFDSlice to nfd the source to solve this issue 2734 u+1d1c0 -> nfd: <u+1d1ba><u+1d165><u+1d16f> -> nfc: <u+1d1ba><u+1d165><u+1d16f> 2735 u+1d1bc -> nfd: <u+1d1ba><u+1d165> -> nfc: <u+1d1ba><u+1d165> 2736 <u+1d1bc><u+1d16f> -> nfd: <u+1d1ba><u+1d165><u+1d16f> -> nfc: <u+1d1ba><u+1d165><u+1d16f> 2737 2738 // Decomposed supplementary outside char classes 2739 // { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0", "m", true }, 2740 // Composed supplementary outside char classes 2741 // { "test\ud834\uddc0", "test\ud834\uddbc\ud834\udd6f", "m", true }, 2742 */ 2743 { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddbc\ud834\udd6f", "m", true }, 2744 { "test\ud834\uddc0", "test\ud834\uddbc\ud834\udd6f", "m", true }, 2745 2746 { "test\ud834\uddc0", "test\ud834\uddc0", "m", true }, 2747 { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0", "m", true }, 2748 }; 2749 2750 int failCount = 0; 2751 for (Object[] d : data) { 2752 String pn = (String)d[0]; 2753 String tt = (String)d[1]; 2754 boolean isFind = "f".equals(((String)d[2])); 2755 boolean expected = (boolean)d[3]; 2756 boolean ret = isFind ? Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).find() 2757 : Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).matches(); 2758 if (ret != expected) { 2759 failCount++; 2760 continue; 2761 } 2762 } 2763 report("Canonical Equivalence"); 2764 } 2765 2766 /** 2767 * A basic sanity test of Matcher.replaceAll(). 2768 */ 2769 private static void globalSubstitute() throws Exception { 2770 // Global substitution with a literal 2771 Pattern p = Pattern.compile("(ab)(c*)"); 2772 Matcher m = p.matcher("abccczzzabcczzzabccc"); 2773 if (!m.replaceAll("test").equals("testzzztestzzztest")) 2774 failCount++; 2775 2776 m.reset("zzzabccczzzabcczzzabccczzz"); 2777 if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz")) 2778 failCount++; 2779 2780 // Global substitution with groups 2781 m.reset("zzzabccczzzabcczzzabccczzz"); 2782 String result = m.replaceAll("$1"); 2783 if (!result.equals("zzzabzzzabzzzabzzz")) 2784 failCount++; 2785 2786 // Supplementary character test 2787 // Global substitution with a literal 2788 p = Pattern.compile(toSupplementaries("(ab)(c*)")); 2789 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc")); 2790 if (!m.replaceAll(toSupplementaries("test")). 2791 equals(toSupplementaries("testzzztestzzztest"))) 2792 failCount++; 2793 2794 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2795 if (!m.replaceAll(toSupplementaries("test")). 2796 equals(toSupplementaries("zzztestzzztestzzztestzzz"))) 2797 failCount++; 2798 2799 // Global substitution with groups 2800 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2801 result = m.replaceAll("$1"); 2802 if (!result.equals(toSupplementaries("zzzabzzzabzzzabzzz"))) 2803 failCount++; 2804 2805 report("Global Substitution"); 2806 } 2807 2808 /** 2809 * Tests the usage of Matcher.appendReplacement() with literal 2810 * and group substitutions. 2811 */ 2812 private static void stringbufferSubstitute() throws Exception { 2813 // SB substitution with literal 2814 String blah = "zzzblahzzz"; 2815 Pattern p = Pattern.compile("blah"); 2816 Matcher m = p.matcher(blah); 2817 StringBuffer result = new StringBuffer(); 2818 try { 2819 m.appendReplacement(result, "blech"); 2820 failCount++; 2821 } catch (IllegalStateException e) { 2822 } 2823 m.find(); 2824 m.appendReplacement(result, "blech"); 2825 if (!result.toString().equals("zzzblech")) 2826 failCount++; 2827 2828 m.appendTail(result); 2829 if (!result.toString().equals("zzzblechzzz")) 2830 failCount++; 2831 2832 // SB substitution with groups 2833 blah = "zzzabcdzzz"; 2834 p = Pattern.compile("(ab)(cd)*"); 2835 m = p.matcher(blah); 2836 result = new StringBuffer(); 2837 try { 2838 m.appendReplacement(result, "$1"); 2839 failCount++; 2840 } catch (IllegalStateException e) { 2841 } 2842 m.find(); 2843 m.appendReplacement(result, "$1"); 2844 if (!result.toString().equals("zzzab")) 2845 failCount++; 2846 2847 m.appendTail(result); 2848 if (!result.toString().equals("zzzabzzz")) 2849 failCount++; 2850 2851 // SB substitution with 3 groups 2852 blah = "zzzabcdcdefzzz"; 2853 p = Pattern.compile("(ab)(cd)*(ef)"); 2854 m = p.matcher(blah); 2855 result = new StringBuffer(); 2856 try { 2857 m.appendReplacement(result, "$1w$2w$3"); 2858 failCount++; 2859 } catch (IllegalStateException e) { 2860 } 2861 m.find(); 2862 m.appendReplacement(result, "$1w$2w$3"); 2863 if (!result.toString().equals("zzzabwcdwef")) 2864 failCount++; 2865 2866 m.appendTail(result); 2867 if (!result.toString().equals("zzzabwcdwefzzz")) 2868 failCount++; 2869 2870 // SB substitution with groups and three matches 2871 // skipping middle match 2872 blah = "zzzabcdzzzabcddzzzabcdzzz"; 2873 p = Pattern.compile("(ab)(cd*)"); 2874 m = p.matcher(blah); 2875 result = new StringBuffer(); 2876 try { 2877 m.appendReplacement(result, "$1"); 2878 failCount++; 2879 } catch (IllegalStateException e) { 2880 } 2881 m.find(); 2882 m.appendReplacement(result, "$1"); 2883 if (!result.toString().equals("zzzab")) 2884 failCount++; 2885 2886 m.find(); 2887 m.find(); 2888 m.appendReplacement(result, "$2"); 2889 if (!result.toString().equals("zzzabzzzabcddzzzcd")) 2890 failCount++; 2891 2892 m.appendTail(result); 2893 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz")) 2894 failCount++; 2895 2896 // Check to make sure escaped $ is ignored 2897 blah = "zzzabcdcdefzzz"; 2898 p = Pattern.compile("(ab)(cd)*(ef)"); 2899 m = p.matcher(blah); 2900 result = new StringBuffer(); 2901 m.find(); 2902 m.appendReplacement(result, "$1w\\$2w$3"); 2903 if (!result.toString().equals("zzzabw$2wef")) 2904 failCount++; 2905 2906 m.appendTail(result); 2907 if (!result.toString().equals("zzzabw$2wefzzz")) 2908 failCount++; 2909 2910 // Check to make sure a reference to nonexistent group causes error 2911 blah = "zzzabcdcdefzzz"; 2912 p = Pattern.compile("(ab)(cd)*(ef)"); 2913 m = p.matcher(blah); 2914 result = new StringBuffer(); 2915 m.find(); 2916 try { 2917 m.appendReplacement(result, "$1w$5w$3"); 2918 failCount++; 2919 } catch (IndexOutOfBoundsException ioobe) { 2920 // Correct result 2921 } 2922 2923 // Check double digit group references 2924 blah = "zzz123456789101112zzz"; 2925 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 2926 m = p.matcher(blah); 2927 result = new StringBuffer(); 2928 m.find(); 2929 m.appendReplacement(result, "$1w$11w$3"); 2930 if (!result.toString().equals("zzz1w11w3")) 2931 failCount++; 2932 2933 // Check to make sure it backs off $15 to $1 if only three groups 2934 blah = "zzzabcdcdefzzz"; 2935 p = Pattern.compile("(ab)(cd)*(ef)"); 2936 m = p.matcher(blah); 2937 result = new StringBuffer(); 2938 m.find(); 2939 m.appendReplacement(result, "$1w$15w$3"); 2940 if (!result.toString().equals("zzzabwab5wef")) 2941 failCount++; 2942 2943 2944 // Supplementary character test 2945 // SB substitution with literal 2946 blah = toSupplementaries("zzzblahzzz"); 2947 p = Pattern.compile(toSupplementaries("blah")); 2948 m = p.matcher(blah); 2949 result = new StringBuffer(); 2950 try { 2951 m.appendReplacement(result, toSupplementaries("blech")); 2952 failCount++; 2953 } catch (IllegalStateException e) { 2954 } 2955 m.find(); 2956 m.appendReplacement(result, toSupplementaries("blech")); 2957 if (!result.toString().equals(toSupplementaries("zzzblech"))) 2958 failCount++; 2959 2960 m.appendTail(result); 2961 if (!result.toString().equals(toSupplementaries("zzzblechzzz"))) 2962 failCount++; 2963 2964 // SB substitution with groups 2965 blah = toSupplementaries("zzzabcdzzz"); 2966 p = Pattern.compile(toSupplementaries("(ab)(cd)*")); 2967 m = p.matcher(blah); 2968 result = new StringBuffer(); 2969 try { 2970 m.appendReplacement(result, "$1"); 2971 failCount++; 2972 } catch (IllegalStateException e) { 2973 } 2974 m.find(); 2975 m.appendReplacement(result, "$1"); 2976 if (!result.toString().equals(toSupplementaries("zzzab"))) 2977 failCount++; 2978 2979 m.appendTail(result); 2980 if (!result.toString().equals(toSupplementaries("zzzabzzz"))) 2981 failCount++; 2982 2983 // SB substitution with 3 groups 2984 blah = toSupplementaries("zzzabcdcdefzzz"); 2985 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 2986 m = p.matcher(blah); 2987 result = new StringBuffer(); 2988 try { 2989 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 2990 failCount++; 2991 } catch (IllegalStateException e) { 2992 } 2993 m.find(); 2994 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 2995 if (!result.toString().equals(toSupplementaries("zzzabwcdwef"))) 2996 failCount++; 2997 2998 m.appendTail(result); 2999 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz"))) 3000 failCount++; 3001 3002 // SB substitution with groups and three matches 3003 // skipping middle match 3004 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz"); 3005 p = Pattern.compile(toSupplementaries("(ab)(cd*)")); 3006 m = p.matcher(blah); 3007 result = new StringBuffer(); 3008 try { 3009 m.appendReplacement(result, "$1"); 3010 failCount++; 3011 } catch (IllegalStateException e) { 3012 } 3013 m.find(); 3014 m.appendReplacement(result, "$1"); 3015 if (!result.toString().equals(toSupplementaries("zzzab"))) 3016 failCount++; 3017 3018 m.find(); 3019 m.find(); 3020 m.appendReplacement(result, "$2"); 3021 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd"))) 3022 failCount++; 3023 3024 m.appendTail(result); 3025 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz"))) 3026 failCount++; 3027 3028 // Check to make sure escaped $ is ignored 3029 blah = toSupplementaries("zzzabcdcdefzzz"); 3030 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3031 m = p.matcher(blah); 3032 result = new StringBuffer(); 3033 m.find(); 3034 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3")); 3035 if (!result.toString().equals(toSupplementaries("zzzabw$2wef"))) 3036 failCount++; 3037 3038 m.appendTail(result); 3039 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz"))) 3040 failCount++; 3041 3042 // Check to make sure a reference to nonexistent group causes error 3043 blah = toSupplementaries("zzzabcdcdefzzz"); 3044 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3045 m = p.matcher(blah); 3046 result = new StringBuffer(); 3047 m.find(); 3048 try { 3049 m.appendReplacement(result, toSupplementaries("$1w$5w$3")); 3050 failCount++; 3051 } catch (IndexOutOfBoundsException ioobe) { 3052 // Correct result 3053 } 3054 3055 // Check double digit group references 3056 blah = toSupplementaries("zzz123456789101112zzz"); 3057 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3058 m = p.matcher(blah); 3059 result = new StringBuffer(); 3060 m.find(); 3061 m.appendReplacement(result, toSupplementaries("$1w$11w$3")); 3062 if (!result.toString().equals(toSupplementaries("zzz1w11w3"))) 3063 failCount++; 3064 3065 // Check to make sure it backs off $15 to $1 if only three groups 3066 blah = toSupplementaries("zzzabcdcdefzzz"); 3067 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3068 m = p.matcher(blah); 3069 result = new StringBuffer(); 3070 m.find(); 3071 m.appendReplacement(result, toSupplementaries("$1w$15w$3")); 3072 if (!result.toString().equals(toSupplementaries("zzzabwab5wef"))) 3073 failCount++; 3074 3075 // Check nothing has been appended into the output buffer if 3076 // the replacement string triggers IllegalArgumentException. 3077 p = Pattern.compile("(abc)"); 3078 m = p.matcher("abcd"); 3079 result = new StringBuffer(); 3080 m.find(); 3081 try { 3082 m.appendReplacement(result, ("xyz$g")); 3083 failCount++; 3084 } catch (IllegalArgumentException iae) { 3085 if (result.length() != 0) 3086 failCount++; 3087 } 3088 3089 report("SB Substitution"); 3090 } 3091 3092 /** 3093 * Tests the usage of Matcher.appendReplacement() with literal 3094 * and group substitutions. 3095 */ 3096 private static void stringbuilderSubstitute() throws Exception { 3097 // SB substitution with literal 3098 String blah = "zzzblahzzz"; 3099 Pattern p = Pattern.compile("blah"); 3100 Matcher m = p.matcher(blah); 3101 StringBuilder result = new StringBuilder(); 3102 try { 3103 m.appendReplacement(result, "blech"); 3104 failCount++; 3105 } catch (IllegalStateException e) { 3106 } 3107 m.find(); 3108 m.appendReplacement(result, "blech"); 3109 if (!result.toString().equals("zzzblech")) 3110 failCount++; 3111 3112 m.appendTail(result); 3113 if (!result.toString().equals("zzzblechzzz")) 3114 failCount++; 3115 3116 // SB substitution with groups 3117 blah = "zzzabcdzzz"; 3118 p = Pattern.compile("(ab)(cd)*"); 3119 m = p.matcher(blah); 3120 result = new StringBuilder(); 3121 try { 3122 m.appendReplacement(result, "$1"); 3123 failCount++; 3124 } catch (IllegalStateException e) { 3125 } 3126 m.find(); 3127 m.appendReplacement(result, "$1"); 3128 if (!result.toString().equals("zzzab")) 3129 failCount++; 3130 3131 m.appendTail(result); 3132 if (!result.toString().equals("zzzabzzz")) 3133 failCount++; 3134 3135 // SB substitution with 3 groups 3136 blah = "zzzabcdcdefzzz"; 3137 p = Pattern.compile("(ab)(cd)*(ef)"); 3138 m = p.matcher(blah); 3139 result = new StringBuilder(); 3140 try { 3141 m.appendReplacement(result, "$1w$2w$3"); 3142 failCount++; 3143 } catch (IllegalStateException e) { 3144 } 3145 m.find(); 3146 m.appendReplacement(result, "$1w$2w$3"); 3147 if (!result.toString().equals("zzzabwcdwef")) 3148 failCount++; 3149 3150 m.appendTail(result); 3151 if (!result.toString().equals("zzzabwcdwefzzz")) 3152 failCount++; 3153 3154 // SB substitution with groups and three matches 3155 // skipping middle match 3156 blah = "zzzabcdzzzabcddzzzabcdzzz"; 3157 p = Pattern.compile("(ab)(cd*)"); 3158 m = p.matcher(blah); 3159 result = new StringBuilder(); 3160 try { 3161 m.appendReplacement(result, "$1"); 3162 failCount++; 3163 } catch (IllegalStateException e) { 3164 } 3165 m.find(); 3166 m.appendReplacement(result, "$1"); 3167 if (!result.toString().equals("zzzab")) 3168 failCount++; 3169 3170 m.find(); 3171 m.find(); 3172 m.appendReplacement(result, "$2"); 3173 if (!result.toString().equals("zzzabzzzabcddzzzcd")) 3174 failCount++; 3175 3176 m.appendTail(result); 3177 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz")) 3178 failCount++; 3179 3180 // Check to make sure escaped $ is ignored 3181 blah = "zzzabcdcdefzzz"; 3182 p = Pattern.compile("(ab)(cd)*(ef)"); 3183 m = p.matcher(blah); 3184 result = new StringBuilder(); 3185 m.find(); 3186 m.appendReplacement(result, "$1w\\$2w$3"); 3187 if (!result.toString().equals("zzzabw$2wef")) 3188 failCount++; 3189 3190 m.appendTail(result); 3191 if (!result.toString().equals("zzzabw$2wefzzz")) 3192 failCount++; 3193 3194 // Check to make sure a reference to nonexistent group causes error 3195 blah = "zzzabcdcdefzzz"; 3196 p = Pattern.compile("(ab)(cd)*(ef)"); 3197 m = p.matcher(blah); 3198 result = new StringBuilder(); 3199 m.find(); 3200 try { 3201 m.appendReplacement(result, "$1w$5w$3"); 3202 failCount++; 3203 } catch (IndexOutOfBoundsException ioobe) { 3204 // Correct result 3205 } 3206 3207 // Check double digit group references 3208 blah = "zzz123456789101112zzz"; 3209 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3210 m = p.matcher(blah); 3211 result = new StringBuilder(); 3212 m.find(); 3213 m.appendReplacement(result, "$1w$11w$3"); 3214 if (!result.toString().equals("zzz1w11w3")) 3215 failCount++; 3216 3217 // Check to make sure it backs off $15 to $1 if only three groups 3218 blah = "zzzabcdcdefzzz"; 3219 p = Pattern.compile("(ab)(cd)*(ef)"); 3220 m = p.matcher(blah); 3221 result = new StringBuilder(); 3222 m.find(); 3223 m.appendReplacement(result, "$1w$15w$3"); 3224 if (!result.toString().equals("zzzabwab5wef")) 3225 failCount++; 3226 3227 3228 // Supplementary character test 3229 // SB substitution with literal 3230 blah = toSupplementaries("zzzblahzzz"); 3231 p = Pattern.compile(toSupplementaries("blah")); 3232 m = p.matcher(blah); 3233 result = new StringBuilder(); 3234 try { 3235 m.appendReplacement(result, toSupplementaries("blech")); 3236 failCount++; 3237 } catch (IllegalStateException e) { 3238 } 3239 m.find(); 3240 m.appendReplacement(result, toSupplementaries("blech")); 3241 if (!result.toString().equals(toSupplementaries("zzzblech"))) 3242 failCount++; 3243 m.appendTail(result); 3244 if (!result.toString().equals(toSupplementaries("zzzblechzzz"))) 3245 failCount++; 3246 3247 // SB substitution with groups 3248 blah = toSupplementaries("zzzabcdzzz"); 3249 p = Pattern.compile(toSupplementaries("(ab)(cd)*")); 3250 m = p.matcher(blah); 3251 result = new StringBuilder(); 3252 try { 3253 m.appendReplacement(result, "$1"); 3254 failCount++; 3255 } catch (IllegalStateException e) { 3256 } 3257 m.find(); 3258 m.appendReplacement(result, "$1"); 3259 if (!result.toString().equals(toSupplementaries("zzzab"))) 3260 failCount++; 3261 3262 m.appendTail(result); 3263 if (!result.toString().equals(toSupplementaries("zzzabzzz"))) 3264 failCount++; 3265 3266 // SB substitution with 3 groups 3267 blah = toSupplementaries("zzzabcdcdefzzz"); 3268 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3269 m = p.matcher(blah); 3270 result = new StringBuilder(); 3271 try { 3272 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3273 failCount++; 3274 } catch (IllegalStateException e) { 3275 } 3276 m.find(); 3277 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3278 if (!result.toString().equals(toSupplementaries("zzzabwcdwef"))) 3279 failCount++; 3280 3281 m.appendTail(result); 3282 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz"))) 3283 failCount++; 3284 3285 // SB substitution with groups and three matches 3286 // skipping middle match 3287 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz"); 3288 p = Pattern.compile(toSupplementaries("(ab)(cd*)")); 3289 m = p.matcher(blah); 3290 result = new StringBuilder(); 3291 try { 3292 m.appendReplacement(result, "$1"); 3293 failCount++; 3294 } catch (IllegalStateException e) { 3295 } 3296 m.find(); 3297 m.appendReplacement(result, "$1"); 3298 if (!result.toString().equals(toSupplementaries("zzzab"))) 3299 failCount++; 3300 3301 m.find(); 3302 m.find(); 3303 m.appendReplacement(result, "$2"); 3304 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd"))) 3305 failCount++; 3306 3307 m.appendTail(result); 3308 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz"))) 3309 failCount++; 3310 3311 // Check to make sure escaped $ is ignored 3312 blah = toSupplementaries("zzzabcdcdefzzz"); 3313 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3314 m = p.matcher(blah); 3315 result = new StringBuilder(); 3316 m.find(); 3317 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3")); 3318 if (!result.toString().equals(toSupplementaries("zzzabw$2wef"))) 3319 failCount++; 3320 3321 m.appendTail(result); 3322 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz"))) 3323 failCount++; 3324 3325 // Check to make sure a reference to nonexistent group causes error 3326 blah = toSupplementaries("zzzabcdcdefzzz"); 3327 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3328 m = p.matcher(blah); 3329 result = new StringBuilder(); 3330 m.find(); 3331 try { 3332 m.appendReplacement(result, toSupplementaries("$1w$5w$3")); 3333 failCount++; 3334 } catch (IndexOutOfBoundsException ioobe) { 3335 // Correct result 3336 } 3337 // Check double digit group references 3338 blah = toSupplementaries("zzz123456789101112zzz"); 3339 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3340 m = p.matcher(blah); 3341 result = new StringBuilder(); 3342 m.find(); 3343 m.appendReplacement(result, toSupplementaries("$1w$11w$3")); 3344 if (!result.toString().equals(toSupplementaries("zzz1w11w3"))) 3345 failCount++; 3346 3347 // Check to make sure it backs off $15 to $1 if only three groups 3348 blah = toSupplementaries("zzzabcdcdefzzz"); 3349 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3350 m = p.matcher(blah); 3351 result = new StringBuilder(); 3352 m.find(); 3353 m.appendReplacement(result, toSupplementaries("$1w$15w$3")); 3354 if (!result.toString().equals(toSupplementaries("zzzabwab5wef"))) 3355 failCount++; 3356 // Check nothing has been appended into the output buffer if 3357 // the replacement string triggers IllegalArgumentException. 3358 p = Pattern.compile("(abc)"); 3359 m = p.matcher("abcd"); 3360 result = new StringBuilder(); 3361 m.find(); 3362 try { 3363 m.appendReplacement(result, ("xyz$g")); 3364 failCount++; 3365 } catch (IllegalArgumentException iae) { 3366 if (result.length() != 0) 3367 failCount++; 3368 } 3369 report("SB Substitution 2"); 3370 } 3371 3372 /* 3373 * 5 groups of characters are created to make a substitution string. 3374 * A base string will be created including random lead chars, the 3375 * substitution string, and random trailing chars. 3376 * A pattern containing the 5 groups is searched for and replaced with: 3377 * random group + random string + random group. 3378 * The results are checked for correctness. 3379 */ 3380 private static void substitutionBasher() { 3381 for (int runs = 0; runs<1000; runs++) { 3382 // Create a base string to work in 3383 int leadingChars = generator.nextInt(10); 3384 StringBuffer baseBuffer = new StringBuffer(100); 3385 String leadingString = getRandomAlphaString(leadingChars); 3386 baseBuffer.append(leadingString); 3387 3388 // Create 5 groups of random number of random chars 3389 // Create the string to substitute 3390 // Create the pattern string to search for 3391 StringBuffer bufferToSub = new StringBuffer(25); 3392 StringBuffer bufferToPat = new StringBuffer(50); 3393 String[] groups = new String[5]; 3394 for(int i=0; i<5; i++) { 3395 int aGroupSize = generator.nextInt(5)+1; 3396 groups[i] = getRandomAlphaString(aGroupSize); 3397 bufferToSub.append(groups[i]); 3398 bufferToPat.append('('); 3399 bufferToPat.append(groups[i]); 3400 bufferToPat.append(')'); 3401 } 3402 String stringToSub = bufferToSub.toString(); 3403 String pattern = bufferToPat.toString(); 3404 3405 // Place sub string into working string at random index 3406 baseBuffer.append(stringToSub); 3407 3408 // Append random chars to end 3409 int trailingChars = generator.nextInt(10); 3410 String trailingString = getRandomAlphaString(trailingChars); 3411 baseBuffer.append(trailingString); 3412 String baseString = baseBuffer.toString(); 3413 3414 // Create test pattern and matcher 3415 Pattern p = Pattern.compile(pattern); 3416 Matcher m = p.matcher(baseString); 3417 3418 // Reject candidate if pattern happens to start early 3419 m.find(); 3420 if (m.start() < leadingChars) 3421 continue; 3422 3423 // Reject candidate if more than one match 3424 if (m.find()) 3425 continue; 3426 3427 // Construct a replacement string with : 3428 // random group + random string + random group 3429 StringBuffer bufferToRep = new StringBuffer(); 3430 int groupIndex1 = generator.nextInt(5); 3431 bufferToRep.append("$" + (groupIndex1 + 1)); 3432 String randomMidString = getRandomAlphaString(5); 3433 bufferToRep.append(randomMidString); 3434 int groupIndex2 = generator.nextInt(5); 3435 bufferToRep.append("$" + (groupIndex2 + 1)); 3436 String replacement = bufferToRep.toString(); 3437 3438 // Do the replacement 3439 String result = m.replaceAll(replacement); 3440 3441 // Construct expected result 3442 StringBuffer bufferToRes = new StringBuffer(); 3443 bufferToRes.append(leadingString); 3444 bufferToRes.append(groups[groupIndex1]); 3445 bufferToRes.append(randomMidString); 3446 bufferToRes.append(groups[groupIndex2]); 3447 bufferToRes.append(trailingString); 3448 String expectedResult = bufferToRes.toString(); 3449 3450 // Check results 3451 if (!result.equals(expectedResult)) 3452 failCount++; 3453 } 3454 3455 report("Substitution Basher"); 3456 } 3457 3458 /* 3459 * 5 groups of characters are created to make a substitution string. 3460 * A base string will be created including random lead chars, the 3461 * substitution string, and random trailing chars. 3462 * A pattern containing the 5 groups is searched for and replaced with: 3463 * random group + random string + random group. 3464 * The results are checked for correctness. 3465 */ 3466 private static void substitutionBasher2() { 3467 for (int runs = 0; runs<1000; runs++) { 3468 // Create a base string to work in 3469 int leadingChars = generator.nextInt(10); 3470 StringBuilder baseBuffer = new StringBuilder(100); 3471 String leadingString = getRandomAlphaString(leadingChars); 3472 baseBuffer.append(leadingString); 3473 3474 // Create 5 groups of random number of random chars 3475 // Create the string to substitute 3476 // Create the pattern string to search for 3477 StringBuilder bufferToSub = new StringBuilder(25); 3478 StringBuilder bufferToPat = new StringBuilder(50); 3479 String[] groups = new String[5]; 3480 for(int i=0; i<5; i++) { 3481 int aGroupSize = generator.nextInt(5)+1; 3482 groups[i] = getRandomAlphaString(aGroupSize); 3483 bufferToSub.append(groups[i]); 3484 bufferToPat.append('('); 3485 bufferToPat.append(groups[i]); 3486 bufferToPat.append(')'); 3487 } 3488 String stringToSub = bufferToSub.toString(); 3489 String pattern = bufferToPat.toString(); 3490 3491 // Place sub string into working string at random index 3492 baseBuffer.append(stringToSub); 3493 3494 // Append random chars to end 3495 int trailingChars = generator.nextInt(10); 3496 String trailingString = getRandomAlphaString(trailingChars); 3497 baseBuffer.append(trailingString); 3498 String baseString = baseBuffer.toString(); 3499 3500 // Create test pattern and matcher 3501 Pattern p = Pattern.compile(pattern); 3502 Matcher m = p.matcher(baseString); 3503 3504 // Reject candidate if pattern happens to start early 3505 m.find(); 3506 if (m.start() < leadingChars) 3507 continue; 3508 3509 // Reject candidate if more than one match 3510 if (m.find()) 3511 continue; 3512 3513 // Construct a replacement string with : 3514 // random group + random string + random group 3515 StringBuilder bufferToRep = new StringBuilder(); 3516 int groupIndex1 = generator.nextInt(5); 3517 bufferToRep.append("$" + (groupIndex1 + 1)); 3518 String randomMidString = getRandomAlphaString(5); 3519 bufferToRep.append(randomMidString); 3520 int groupIndex2 = generator.nextInt(5); 3521 bufferToRep.append("$" + (groupIndex2 + 1)); 3522 String replacement = bufferToRep.toString(); 3523 3524 // Do the replacement 3525 String result = m.replaceAll(replacement); 3526 3527 // Construct expected result 3528 StringBuilder bufferToRes = new StringBuilder(); 3529 bufferToRes.append(leadingString); 3530 bufferToRes.append(groups[groupIndex1]); 3531 bufferToRes.append(randomMidString); 3532 bufferToRes.append(groups[groupIndex2]); 3533 bufferToRes.append(trailingString); 3534 String expectedResult = bufferToRes.toString(); 3535 3536 // Check results 3537 if (!result.equals(expectedResult)) { 3538 failCount++; 3539 } 3540 } 3541 3542 report("Substitution Basher 2"); 3543 } 3544 3545 /** 3546 * Checks the handling of some escape sequences that the Pattern 3547 * class should process instead of the java compiler. These are 3548 * not in the file because the escapes should be be processed 3549 * by the Pattern class when the regex is compiled. 3550 */ 3551 private static void escapes() throws Exception { 3552 Pattern p = Pattern.compile("\\043"); 3553 Matcher m = p.matcher("#"); 3554 if (!m.find()) 3555 failCount++; 3556 3557 p = Pattern.compile("\\x23"); 3558 m = p.matcher("#"); 3559 if (!m.find()) 3560 failCount++; 3561 3562 p = Pattern.compile("\\u0023"); 3563 m = p.matcher("#"); 3564 if (!m.find()) 3565 failCount++; 3566 3567 report("Escape sequences"); 3568 } 3569 3570 /** 3571 * Checks the handling of blank input situations. These 3572 * tests are incompatible with my test file format. 3573 */ 3574 private static void blankInput() throws Exception { 3575 Pattern p = Pattern.compile("abc", Pattern.CASE_INSENSITIVE); 3576 Matcher m = p.matcher(""); 3577 if (m.find()) 3578 failCount++; 3579 3580 p = Pattern.compile("a*", Pattern.CASE_INSENSITIVE); 3581 m = p.matcher(""); 3582 if (!m.find()) 3583 failCount++; 3584 3585 p = Pattern.compile("abc"); 3586 m = p.matcher(""); 3587 if (m.find()) 3588 failCount++; 3589 3590 p = Pattern.compile("a*"); 3591 m = p.matcher(""); 3592 if (!m.find()) 3593 failCount++; 3594 3595 report("Blank input"); 3596 } 3597 3598 /** 3599 * Tests the Boyer-Moore pattern matching of a character sequence 3600 * on randomly generated patterns. 3601 */ 3602 private static void bm() throws Exception { 3603 doBnM('a'); 3604 report("Boyer Moore (ASCII)"); 3605 3606 doBnM(Character.MIN_SUPPLEMENTARY_CODE_POINT - 10); 3607 report("Boyer Moore (Supplementary)"); 3608 } 3609 3610 private static void doBnM(int baseCharacter) throws Exception { 3611 int achar=0; 3612 3613 for (int i=0; i<100; i++) { 3614 // Create a short pattern to search for 3615 int patternLength = generator.nextInt(7) + 4; 3616 StringBuffer patternBuffer = new StringBuffer(patternLength); 3617 String pattern; 3618 retry: for (;;) { 3619 for (int x=0; x<patternLength; x++) { 3620 int ch = baseCharacter + generator.nextInt(26); 3621 if (Character.isSupplementaryCodePoint(ch)) { 3622 patternBuffer.append(Character.toChars(ch)); 3623 } else { 3624 patternBuffer.append((char)ch); 3625 } 3626 } 3627 pattern = patternBuffer.toString(); 3628 3629 // Avoid patterns that start and end with the same substring 3630 // See JDK-6854417 3631 for (int x=1; x < pattern.length(); x++) { 3632 if (pattern.startsWith(pattern.substring(x))) 3633 continue retry; 3634 } 3635 break; 3636 } 3637 Pattern p = Pattern.compile(pattern); 3638 3639 // Create a buffer with random ASCII chars that does 3640 // not match the sample 3641 String toSearch = null; 3642 StringBuffer s = null; 3643 Matcher m = p.matcher(""); 3644 do { 3645 s = new StringBuffer(100); 3646 for (int x=0; x<100; x++) { 3647 int ch = baseCharacter + generator.nextInt(26); 3648 if (Character.isSupplementaryCodePoint(ch)) { 3649 s.append(Character.toChars(ch)); 3650 } else { 3651 s.append((char)ch); 3652 } 3653 } 3654 toSearch = s.toString(); 3655 m.reset(toSearch); 3656 } while (m.find()); 3657 3658 // Insert the pattern at a random spot 3659 int insertIndex = generator.nextInt(99); 3660 if (Character.isLowSurrogate(s.charAt(insertIndex))) 3661 insertIndex++; 3662 s = s.insert(insertIndex, pattern); 3663 toSearch = s.toString(); 3664 3665 // Make sure that the pattern is found 3666 m.reset(toSearch); 3667 if (!m.find()) 3668 failCount++; 3669 3670 // Make sure that the match text is the pattern 3671 if (!m.group().equals(pattern)) 3672 failCount++; 3673 3674 // Make sure match occured at insertion point 3675 if (m.start() != insertIndex) 3676 failCount++; 3677 } 3678 } 3679 3680 /** 3681 * Tests the matching of slices on randomly generated patterns. 3682 * The Boyer-Moore optimization is not done on these patterns 3683 * because it uses unicode case folding. 3684 */ 3685 private static void slice() throws Exception { 3686 doSlice(Character.MAX_VALUE); 3687 report("Slice"); 3688 3689 doSlice(Character.MAX_CODE_POINT); 3690 report("Slice (Supplementary)"); 3691 } 3692 3693 private static void doSlice(int maxCharacter) throws Exception { 3694 Random generator = new Random(); 3695 int achar=0; 3696 3697 for (int i=0; i<100; i++) { 3698 // Create a short pattern to search for 3699 int patternLength = generator.nextInt(7) + 4; 3700 StringBuffer patternBuffer = new StringBuffer(patternLength); 3701 for (int x=0; x<patternLength; x++) { 3702 int randomChar = 0; 3703 while (!Character.isLetterOrDigit(randomChar)) 3704 randomChar = generator.nextInt(maxCharacter); 3705 if (Character.isSupplementaryCodePoint(randomChar)) { 3706 patternBuffer.append(Character.toChars(randomChar)); 3707 } else { 3708 patternBuffer.append((char) randomChar); 3709 } 3710 } 3711 String pattern = patternBuffer.toString(); 3712 Pattern p = Pattern.compile(pattern, Pattern.UNICODE_CASE); 3713 3714 // Create a buffer with random chars that does not match the sample 3715 String toSearch = null; 3716 StringBuffer s = null; 3717 Matcher m = p.matcher(""); 3718 do { 3719 s = new StringBuffer(100); 3720 for (int x=0; x<100; x++) { 3721 int randomChar = 0; 3722 while (!Character.isLetterOrDigit(randomChar)) 3723 randomChar = generator.nextInt(maxCharacter); 3724 if (Character.isSupplementaryCodePoint(randomChar)) { 3725 s.append(Character.toChars(randomChar)); 3726 } else { 3727 s.append((char) randomChar); 3728 } 3729 } 3730 toSearch = s.toString(); 3731 m.reset(toSearch); 3732 } while (m.find()); 3733 3734 // Insert the pattern at a random spot 3735 int insertIndex = generator.nextInt(99); 3736 if (Character.isLowSurrogate(s.charAt(insertIndex))) 3737 insertIndex++; 3738 s = s.insert(insertIndex, pattern); 3739 toSearch = s.toString(); 3740 3741 // Make sure that the pattern is found 3742 m.reset(toSearch); 3743 if (!m.find()) 3744 failCount++; 3745 3746 // Make sure that the match text is the pattern 3747 if (!m.group().equals(pattern)) 3748 failCount++; 3749 3750 // Make sure match occured at insertion point 3751 if (m.start() != insertIndex) 3752 failCount++; 3753 } 3754 } 3755 3756 private static void explainFailure(String pattern, String data, 3757 String expected, String actual) { 3758 System.err.println("----------------------------------------"); 3759 System.err.println("Pattern = "+pattern); 3760 System.err.println("Data = "+data); 3761 System.err.println("Expected = " + expected); 3762 System.err.println("Actual = " + actual); 3763 } 3764 3765 private static void explainFailure(String pattern, String data, 3766 Throwable t) { 3767 System.err.println("----------------------------------------"); 3768 System.err.println("Pattern = "+pattern); 3769 System.err.println("Data = "+data); 3770 t.printStackTrace(System.err); 3771 } 3772 3773 // Testing examples from a file 3774 3775 /** 3776 * Goes through the file "TestCases.txt" and creates many patterns 3777 * described in the file, matching the patterns against input lines in 3778 * the file, and comparing the results against the correct results 3779 * also found in the file. The file format is described in comments 3780 * at the head of the file. 3781 */ 3782 private static void processFile(String fileName) throws Exception { 3783 File testCases = new File(System.getProperty("test.src", "."), 3784 fileName); 3785 FileInputStream in = new FileInputStream(testCases); 3786 BufferedReader r = new BufferedReader(new InputStreamReader(in)); 3787 3788 // Process next test case. 3789 String aLine; 3790 while((aLine = r.readLine()) != null) { 3791 // Read a line for pattern 3792 String patternString = grabLine(r); 3793 Pattern p = null; 3794 try { 3795 p = compileTestPattern(patternString); 3796 } catch (PatternSyntaxException e) { 3797 String dataString = grabLine(r); 3798 String expectedResult = grabLine(r); 3799 if (expectedResult.startsWith("error")) 3800 continue; 3801 explainFailure(patternString, dataString, e); 3802 failCount++; 3803 continue; 3804 } 3805 3806 // Read a line for input string 3807 String dataString = grabLine(r); 3808 Matcher m = p.matcher(dataString); 3809 StringBuffer result = new StringBuffer(); 3810 3811 // Check for IllegalStateExceptions before a match 3812 failCount += preMatchInvariants(m); 3813 3814 boolean found = m.find(); 3815 3816 if (found) 3817 failCount += postTrueMatchInvariants(m); 3818 else 3819 failCount += postFalseMatchInvariants(m); 3820 3821 if (found) { 3822 result.append("true "); 3823 result.append(m.group(0) + " "); 3824 } else { 3825 result.append("false "); 3826 } 3827 3828 result.append(m.groupCount()); 3829 3830 if (found) { 3831 for (int i=1; i<m.groupCount()+1; i++) 3832 if (m.group(i) != null) 3833 result.append(" " +m.group(i)); 3834 } 3835 3836 // Read a line for the expected result 3837 String expectedResult = grabLine(r); 3838 3839 if (!result.toString().equals(expectedResult)) { 3840 explainFailure(patternString, dataString, expectedResult, result.toString()); 3841 failCount++; 3842 } 3843 } 3844 3845 report(fileName); 3846 } 3847 3848 private static int preMatchInvariants(Matcher m) { 3849 int failCount = 0; 3850 try { 3851 m.start(); 3852 failCount++; 3853 } catch (IllegalStateException ise) {} 3854 try { 3855 m.end(); 3856 failCount++; 3857 } catch (IllegalStateException ise) {} 3858 try { 3859 m.group(); 3860 failCount++; 3861 } catch (IllegalStateException ise) {} 3862 return failCount; 3863 } 3864 3865 private static int postFalseMatchInvariants(Matcher m) { 3866 int failCount = 0; 3867 try { 3868 m.group(); 3869 failCount++; 3870 } catch (IllegalStateException ise) {} 3871 try { 3872 m.start(); 3873 failCount++; 3874 } catch (IllegalStateException ise) {} 3875 try { 3876 m.end(); 3877 failCount++; 3878 } catch (IllegalStateException ise) {} 3879 return failCount; 3880 } 3881 3882 private static int postTrueMatchInvariants(Matcher m) { 3883 int failCount = 0; 3884 //assert(m.start() = m.start(0); 3885 if (m.start() != m.start(0)) 3886 failCount++; 3887 //assert(m.end() = m.end(0); 3888 if (m.start() != m.start(0)) 3889 failCount++; 3890 //assert(m.group() = m.group(0); 3891 if (!m.group().equals(m.group(0))) 3892 failCount++; 3893 try { 3894 m.group(50); 3895 failCount++; 3896 } catch (IndexOutOfBoundsException ise) {} 3897 3898 return failCount; 3899 } 3900 3901 private static Pattern compileTestPattern(String patternString) { 3902 if (!patternString.startsWith("'")) { 3903 return Pattern.compile(patternString); 3904 } 3905 int break1 = patternString.lastIndexOf("'"); 3906 String flagString = patternString.substring( 3907 break1+1, patternString.length()); 3908 patternString = patternString.substring(1, break1); 3909 3910 if (flagString.equals("i")) 3911 return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE); 3912 3913 if (flagString.equals("m")) 3914 return Pattern.compile(patternString, Pattern.MULTILINE); 3915 3916 return Pattern.compile(patternString); 3917 } 3918 3919 /** 3920 * Reads a line from the input file. Keeps reading lines until a non 3921 * empty non comment line is read. If the line contains a \n then 3922 * these two characters are replaced by a newline char. If a \\uxxxx 3923 * sequence is read then the sequence is replaced by the unicode char. 3924 */ 3925 private static String grabLine(BufferedReader r) throws Exception { 3926 int index = 0; 3927 String line = r.readLine(); 3928 while (line.startsWith("//") || line.length() < 1) 3929 line = r.readLine(); 3930 while ((index = line.indexOf("\\n")) != -1) { 3931 StringBuffer temp = new StringBuffer(line); 3932 temp.replace(index, index+2, "\n"); 3933 line = temp.toString(); 3934 } 3935 while ((index = line.indexOf("\\u")) != -1) { 3936 StringBuffer temp = new StringBuffer(line); 3937 String value = temp.substring(index+2, index+6); 3938 char aChar = (char)Integer.parseInt(value, 16); 3939 String unicodeChar = "" + aChar; 3940 temp.replace(index, index+6, unicodeChar); 3941 line = temp.toString(); 3942 } 3943 3944 return line; 3945 } 3946 3947 private static void check(Pattern p, String s, String g, String expected) { 3948 Matcher m = p.matcher(s); 3949 m.find(); 3950 if (!m.group(g).equals(expected) || 3951 s.charAt(m.start(g)) != expected.charAt(0) || 3952 s.charAt(m.end(g) - 1) != expected.charAt(expected.length() - 1)) 3953 failCount++; 3954 } 3955 3956 private static void checkReplaceFirst(String p, String s, String r, String expected) 3957 { 3958 if (!expected.equals(Pattern.compile(p) 3959 .matcher(s) 3960 .replaceFirst(r))) 3961 failCount++; 3962 } 3963 3964 private static void checkReplaceAll(String p, String s, String r, String expected) 3965 { 3966 if (!expected.equals(Pattern.compile(p) 3967 .matcher(s) 3968 .replaceAll(r))) 3969 failCount++; 3970 } 3971 3972 private static void checkExpectedFail(String p) { 3973 try { 3974 Pattern.compile(p); 3975 } catch (PatternSyntaxException pse) { 3976 //pse.printStackTrace(); 3977 return; 3978 } 3979 failCount++; 3980 } 3981 3982 private static void checkExpectedIAE(Matcher m, String g) { 3983 m.find(); 3984 try { 3985 m.group(g); 3986 } catch (IllegalArgumentException x) { 3987 //iae.printStackTrace(); 3988 try { 3989 m.start(g); 3990 } catch (IllegalArgumentException xx) { 3991 try { 3992 m.start(g); 3993 } catch (IllegalArgumentException xxx) { 3994 return; 3995 } 3996 } 3997 } 3998 failCount++; 3999 } 4000 4001 private static void checkExpectedNPE(Matcher m) { 4002 m.find(); 4003 try { 4004 m.group(null); 4005 } catch (NullPointerException x) { 4006 try { 4007 m.start(null); 4008 } catch (NullPointerException xx) { 4009 try { 4010 m.end(null); 4011 } catch (NullPointerException xxx) { 4012 return; 4013 } 4014 } 4015 } 4016 failCount++; 4017 } 4018 4019 private static void namedGroupCaptureTest() throws Exception { 4020 check(Pattern.compile("x+(?<gname>y+)z+"), 4021 "xxxyyyzzz", 4022 "gname", 4023 "yyy"); 4024 4025 check(Pattern.compile("x+(?<gname8>y+)z+"), 4026 "xxxyyyzzz", 4027 "gname8", 4028 "yyy"); 4029 4030 //backref 4031 Pattern pattern = Pattern.compile("(a*)bc\\1"); 4032 check(pattern, "zzzaabcazzz", true); // found "abca" 4033 4034 check(Pattern.compile("(?<gname>a*)bc\\k<gname>"), 4035 "zzzaabcaazzz", true); 4036 4037 check(Pattern.compile("(?<gname>abc)(def)\\k<gname>"), 4038 "abcdefabc", true); 4039 4040 check(Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(?<gname>k)\\k<gname>"), 4041 "abcdefghijkk", true); 4042 4043 // Supplementary character tests 4044 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"), 4045 toSupplementaries("zzzaabcazzz"), true); 4046 4047 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"), 4048 toSupplementaries("zzzaabcaazzz"), true); 4049 4050 check(Pattern.compile("(?<gname>" + toSupplementaries("abc)(def)") + "\\k<gname>"), 4051 toSupplementaries("abcdefabc"), true); 4052 4053 check(Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)") + 4054 "(?<gname>" + 4055 toSupplementaries("k)") + "\\k<gname>"), 4056 toSupplementaries("abcdefghijkk"), true); 4057 4058 check(Pattern.compile("x+(?<gname>y+)z+\\k<gname>"), 4059 "xxxyyyzzzyyy", 4060 "gname", 4061 "yyy"); 4062 4063 //replaceFirst/All 4064 checkReplaceFirst("(?<gn>ab)(c*)", 4065 "abccczzzabcczzzabccc", 4066 "${gn}", 4067 "abzzzabcczzzabccc"); 4068 4069 checkReplaceAll("(?<gn>ab)(c*)", 4070 "abccczzzabcczzzabccc", 4071 "${gn}", 4072 "abzzzabzzzab"); 4073 4074 4075 checkReplaceFirst("(?<gn>ab)(c*)", 4076 "zzzabccczzzabcczzzabccczzz", 4077 "${gn}", 4078 "zzzabzzzabcczzzabccczzz"); 4079 4080 checkReplaceAll("(?<gn>ab)(c*)", 4081 "zzzabccczzzabcczzzabccczzz", 4082 "${gn}", 4083 "zzzabzzzabzzzabzzz"); 4084 4085 checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)", 4086 "zzzabccczzzabcczzzabccczzz", 4087 "${gn2}", 4088 "zzzccczzzabcczzzabccczzz"); 4089 4090 checkReplaceAll("(?<gn1>ab)(?<gn2>c*)", 4091 "zzzabccczzzabcczzzabccczzz", 4092 "${gn2}", 4093 "zzzccczzzcczzzccczzz"); 4094 4095 //toSupplementaries("(ab)(c*)")); 4096 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + 4097 ")(?<gn2>" + toSupplementaries("c") + "*)", 4098 toSupplementaries("abccczzzabcczzzabccc"), 4099 "${gn1}", 4100 toSupplementaries("abzzzabcczzzabccc")); 4101 4102 4103 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + 4104 ")(?<gn2>" + toSupplementaries("c") + "*)", 4105 toSupplementaries("abccczzzabcczzzabccc"), 4106 "${gn1}", 4107 toSupplementaries("abzzzabzzzab")); 4108 4109 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + 4110 ")(?<gn2>" + toSupplementaries("c") + "*)", 4111 toSupplementaries("abccczzzabcczzzabccc"), 4112 "${gn2}", 4113 toSupplementaries("ccczzzabcczzzabccc")); 4114 4115 4116 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + 4117 ")(?<gn2>" + toSupplementaries("c") + "*)", 4118 toSupplementaries("abccczzzabcczzzabccc"), 4119 "${gn2}", 4120 toSupplementaries("ccczzzcczzzccc")); 4121 4122 checkReplaceFirst("(?<dog>Dog)AndCat", 4123 "zzzDogAndCatzzzDogAndCatzzz", 4124 "${dog}", 4125 "zzzDogzzzDogAndCatzzz"); 4126 4127 4128 checkReplaceAll("(?<dog>Dog)AndCat", 4129 "zzzDogAndCatzzzDogAndCatzzz", 4130 "${dog}", 4131 "zzzDogzzzDogzzz"); 4132 4133 // backref in Matcher & String 4134 if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") || 4135 !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh")) 4136 failCount++; 4137 4138 // negative 4139 checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)"); 4140 checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)"); 4141 checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)"); 4142 checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>"); 4143 checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>"); 4144 checkExpectedIAE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"), 4145 "gnameX"); 4146 checkExpectedNPE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef")); 4147 report("NamedGroupCapture"); 4148 } 4149 4150 // This is for bug 6919132 4151 private static void nonBmpClassComplementTest() throws Exception { 4152 Pattern p = Pattern.compile("\\P{Lu}"); 4153 Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4154 4155 if (m.find() && m.start() == 1) 4156 failCount++; 4157 4158 // from a unicode category 4159 p = Pattern.compile("\\P{Lu}"); 4160 m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4161 if (m.find()) 4162 failCount++; 4163 if (!m.hitEnd()) 4164 failCount++; 4165 4166 // block 4167 p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}"); 4168 m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4169 if (m.find() && m.start() == 1) 4170 failCount++; 4171 4172 p = Pattern.compile("\\P{sc=GRANTHA}"); 4173 m = p.matcher(new String(new int[] {0x11350}, 0, 1)); 4174 if (m.find() && m.start() == 1) 4175 failCount++; 4176 4177 report("NonBmpClassComplement"); 4178 } 4179 4180 private static void unicodePropertiesTest() throws Exception { 4181 // different forms 4182 if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() || 4183 !Pattern.compile("\\p{Lu}").matcher("A").matches() || 4184 !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() || 4185 !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() || 4186 !Pattern.compile("\\p{IsLatin}").matcher("B").matches() || 4187 !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() || 4188 !Pattern.compile("\\p{script=Latin}").matcher("B").matches() || 4189 !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() || 4190 !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() || 4191 !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches()) 4192 failCount++; 4193 4194 Matcher common = Pattern.compile("\\p{script=Common}").matcher(""); 4195 Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher(""); 4196 Matcher lastSM = common; 4197 Character.UnicodeScript lastScript = Character.UnicodeScript.of(0); 4198 4199 Matcher latin = Pattern.compile("\\p{block=basic_latin}").matcher(""); 4200 Matcher greek = Pattern.compile("\\p{InGreek}").matcher(""); 4201 Matcher lastBM = latin; 4202 Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0); 4203 4204 for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) { 4205 if (cp >= 0x30000 && (cp & 0x70) == 0){ 4206 continue; // only pick couple code points, they are the same 4207 } 4208 4209 // Unicode Script 4210 Character.UnicodeScript script = Character.UnicodeScript.of(cp); 4211 Matcher m; 4212 String str = new String(Character.toChars(cp)); 4213 if (script == lastScript) { 4214 m = lastSM; 4215 m.reset(str); 4216 } else { 4217 m = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str); 4218 } 4219 if (!m.matches()) { 4220 failCount++; 4221 } 4222 Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common; 4223 other.reset(str); 4224 if (other.matches()) { 4225 failCount++; 4226 } 4227 lastSM = m; 4228 lastScript = script; 4229 4230 // Unicode Block 4231 Character.UnicodeBlock block = Character.UnicodeBlock.of(cp); 4232 if (block == null) { 4233 //System.out.printf("Not a Block: cp=%x%n", cp); 4234 continue; 4235 } 4236 if (block == lastBlock) { 4237 m = lastBM; 4238 m.reset(str); 4239 } else { 4240 m = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str); 4241 } 4242 if (!m.matches()) { 4243 failCount++; 4244 } 4245 other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin; 4246 other.reset(str); 4247 if (other.matches()) { 4248 failCount++; 4249 } 4250 lastBM = m; 4251 lastBlock = block; 4252 } 4253 report("unicodeProperties"); 4254 } 4255 4256 private static void unicodeHexNotationTest() throws Exception { 4257 4258 // negative 4259 checkExpectedFail("\\x{-23}"); 4260 checkExpectedFail("\\x{110000}"); 4261 checkExpectedFail("\\x{}"); 4262 checkExpectedFail("\\x{AB[ef]"); 4263 4264 // codepoint 4265 check("^\\x{1033c}$", "\uD800\uDF3C", true); 4266 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false); 4267 check("^\\x{D800}\\x{DF3c}+$", "\uD800\uDF3C", false); 4268 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false); 4269 4270 // in class 4271 check("^[\\x{D800}\\x{DF3c}]+$", "\uD800\uDF3C", false); 4272 check("^[\\xF0\\x90\\x8C\\xBC]+$", "\uD800\uDF3C", false); 4273 check("^[\\x{D800}\\x{DF3C}]+$", "\uD800\uDF3C", false); 4274 check("^[\\x{DF3C}\\x{D800}]+$", "\uD800\uDF3C", false); 4275 check("^[\\x{D800}\\x{DF3C}]+$", "\uDF3C\uD800", true); 4276 check("^[\\x{DF3C}\\x{D800}]+$", "\uDF3C\uD800", true); 4277 4278 for (int cp = 0; cp <= 0x10FFFF; cp++) { 4279 String s = "A" + new String(Character.toChars(cp)) + "B"; 4280 String hexUTF16 = (cp <= 0xFFFF)? String.format("\\u%04x", cp) 4281 : String.format("\\u%04x\\u%04x", 4282 (int) Character.toChars(cp)[0], 4283 (int) Character.toChars(cp)[1]); 4284 String hexCodePoint = "\\x{" + Integer.toHexString(cp) + "}"; 4285 if (!Pattern.matches("A" + hexUTF16 + "B", s)) 4286 failCount++; 4287 if (!Pattern.matches("A[" + hexUTF16 + "]B", s)) 4288 failCount++; 4289 if (!Pattern.matches("A" + hexCodePoint + "B", s)) 4290 failCount++; 4291 if (!Pattern.matches("A[" + hexCodePoint + "]B", s)) 4292 failCount++; 4293 } 4294 report("unicodeHexNotation"); 4295 } 4296 4297 private static void unicodeClassesTest() throws Exception { 4298 4299 Matcher lower = Pattern.compile("\\p{Lower}").matcher(""); 4300 Matcher upper = Pattern.compile("\\p{Upper}").matcher(""); 4301 Matcher ASCII = Pattern.compile("\\p{ASCII}").matcher(""); 4302 Matcher alpha = Pattern.compile("\\p{Alpha}").matcher(""); 4303 Matcher digit = Pattern.compile("\\p{Digit}").matcher(""); 4304 Matcher alnum = Pattern.compile("\\p{Alnum}").matcher(""); 4305 Matcher punct = Pattern.compile("\\p{Punct}").matcher(""); 4306 Matcher graph = Pattern.compile("\\p{Graph}").matcher(""); 4307 Matcher print = Pattern.compile("\\p{Print}").matcher(""); 4308 Matcher blank = Pattern.compile("\\p{Blank}").matcher(""); 4309 Matcher cntrl = Pattern.compile("\\p{Cntrl}").matcher(""); 4310 Matcher xdigit = Pattern.compile("\\p{XDigit}").matcher(""); 4311 Matcher space = Pattern.compile("\\p{Space}").matcher(""); 4312 Matcher bound = Pattern.compile("\\b").matcher(""); 4313 Matcher word = Pattern.compile("\\w++").matcher(""); 4314 // UNICODE_CHARACTER_CLASS 4315 Matcher lowerU = Pattern.compile("\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4316 Matcher upperU = Pattern.compile("\\p{Upper}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4317 Matcher ASCIIU = Pattern.compile("\\p{ASCII}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4318 Matcher alphaU = Pattern.compile("\\p{Alpha}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4319 Matcher digitU = Pattern.compile("\\p{Digit}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4320 Matcher alnumU = Pattern.compile("\\p{Alnum}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4321 Matcher punctU = Pattern.compile("\\p{Punct}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4322 Matcher graphU = Pattern.compile("\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4323 Matcher printU = Pattern.compile("\\p{Print}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4324 Matcher blankU = Pattern.compile("\\p{Blank}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4325 Matcher cntrlU = Pattern.compile("\\p{Cntrl}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4326 Matcher xdigitU = Pattern.compile("\\p{XDigit}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4327 Matcher spaceU = Pattern.compile("\\p{Space}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4328 Matcher boundU = Pattern.compile("\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4329 Matcher wordU = Pattern.compile("\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4330 // embedded flag (?U) 4331 Matcher lowerEU = Pattern.compile("(?U)\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4332 Matcher graphEU = Pattern.compile("(?U)\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4333 Matcher wordEU = Pattern.compile("(?U)\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4334 4335 Matcher bwb = Pattern.compile("\\b\\w\\b").matcher(""); 4336 Matcher bwbU = Pattern.compile("\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4337 Matcher bwbEU = Pattern.compile("(?U)\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4338 // properties 4339 Matcher lowerP = Pattern.compile("\\p{IsLowerCase}").matcher(""); 4340 Matcher upperP = Pattern.compile("\\p{IsUpperCase}").matcher(""); 4341 Matcher titleP = Pattern.compile("\\p{IsTitleCase}").matcher(""); 4342 Matcher letterP = Pattern.compile("\\p{IsLetter}").matcher(""); 4343 Matcher alphaP = Pattern.compile("\\p{IsAlphabetic}").matcher(""); 4344 Matcher ideogP = Pattern.compile("\\p{IsIdeographic}").matcher(""); 4345 Matcher cntrlP = Pattern.compile("\\p{IsControl}").matcher(""); 4346 Matcher spaceP = Pattern.compile("\\p{IsWhiteSpace}").matcher(""); 4347 Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher(""); 4348 Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher(""); 4349 Matcher joinCrtl = Pattern.compile("\\p{IsJoinControl}").matcher(""); 4350 // javaMethod 4351 Matcher lowerJ = Pattern.compile("\\p{javaLowerCase}").matcher(""); 4352 Matcher upperJ = Pattern.compile("\\p{javaUpperCase}").matcher(""); 4353 Matcher alphaJ = Pattern.compile("\\p{javaAlphabetic}").matcher(""); 4354 Matcher ideogJ = Pattern.compile("\\p{javaIdeographic}").matcher(""); 4355 // GC/C 4356 Matcher gcC = Pattern.compile("\\p{C}").matcher(""); 4357 4358 for (int cp = 1; cp < 0x30000; cp++) { 4359 String str = new String(Character.toChars(cp)); 4360 int type = Character.getType(cp); 4361 if (// lower 4362 POSIX_ASCII.isLower(cp) != lower.reset(str).matches() || 4363 Character.isLowerCase(cp) != lowerU.reset(str).matches() || 4364 Character.isLowerCase(cp) != lowerP.reset(str).matches() || 4365 Character.isLowerCase(cp) != lowerEU.reset(str).matches()|| 4366 Character.isLowerCase(cp) != lowerJ.reset(str).matches()|| 4367 // upper 4368 POSIX_ASCII.isUpper(cp) != upper.reset(str).matches() || 4369 POSIX_Unicode.isUpper(cp) != upperU.reset(str).matches() || 4370 Character.isUpperCase(cp) != upperP.reset(str).matches() || 4371 Character.isUpperCase(cp) != upperJ.reset(str).matches() || 4372 // alpha 4373 POSIX_ASCII.isAlpha(cp) != alpha.reset(str).matches() || 4374 POSIX_Unicode.isAlpha(cp) != alphaU.reset(str).matches() || 4375 Character.isAlphabetic(cp)!= alphaP.reset(str).matches() || 4376 Character.isAlphabetic(cp)!= alphaJ.reset(str).matches() || 4377 // digit 4378 POSIX_ASCII.isDigit(cp) != digit.reset(str).matches() || 4379 Character.isDigit(cp) != digitU.reset(str).matches() || 4380 // alnum 4381 POSIX_ASCII.isAlnum(cp) != alnum.reset(str).matches() || 4382 POSIX_Unicode.isAlnum(cp) != alnumU.reset(str).matches() || 4383 // punct 4384 POSIX_ASCII.isPunct(cp) != punct.reset(str).matches() || 4385 POSIX_Unicode.isPunct(cp) != punctU.reset(str).matches() || 4386 // graph 4387 POSIX_ASCII.isGraph(cp) != graph.reset(str).matches() || 4388 POSIX_Unicode.isGraph(cp) != graphU.reset(str).matches() || 4389 POSIX_Unicode.isGraph(cp) != graphEU.reset(str).matches()|| 4390 // blank 4391 POSIX_ASCII.isType(cp, POSIX_ASCII.BLANK) 4392 != blank.reset(str).matches() || 4393 POSIX_Unicode.isBlank(cp) != blankU.reset(str).matches() || 4394 // print 4395 POSIX_ASCII.isPrint(cp) != print.reset(str).matches() || 4396 POSIX_Unicode.isPrint(cp) != printU.reset(str).matches() || 4397 // cntrl 4398 POSIX_ASCII.isCntrl(cp) != cntrl.reset(str).matches() || 4399 POSIX_Unicode.isCntrl(cp) != cntrlU.reset(str).matches() || 4400 (Character.CONTROL == type) != cntrlP.reset(str).matches() || 4401 // hexdigit 4402 POSIX_ASCII.isHexDigit(cp) != xdigit.reset(str).matches() || 4403 POSIX_Unicode.isHexDigit(cp) != xdigitU.reset(str).matches() || 4404 // space 4405 POSIX_ASCII.isSpace(cp) != space.reset(str).matches() || 4406 POSIX_Unicode.isSpace(cp) != spaceU.reset(str).matches() || 4407 POSIX_Unicode.isSpace(cp) != spaceP.reset(str).matches() || 4408 // word 4409 POSIX_ASCII.isWord(cp) != word.reset(str).matches() || 4410 POSIX_Unicode.isWord(cp) != wordU.reset(str).matches() || 4411 POSIX_Unicode.isWord(cp) != wordEU.reset(str).matches()|| 4412 // bwordb 4413 POSIX_ASCII.isWord(cp) != bwb.reset(str).matches() || 4414 POSIX_Unicode.isWord(cp) != bwbU.reset(str).matches() || 4415 // properties 4416 Character.isTitleCase(cp) != titleP.reset(str).matches() || 4417 Character.isLetter(cp) != letterP.reset(str).matches()|| 4418 Character.isIdeographic(cp) != ideogP.reset(str).matches() || 4419 Character.isIdeographic(cp) != ideogJ.reset(str).matches() || 4420 (Character.UNASSIGNED == type) == definedP.reset(str).matches() || 4421 POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches() || 4422 POSIX_Unicode.isJoinControl(cp) != joinCrtl.reset(str).matches() || 4423 // gc_C 4424 (Character.CONTROL == type || Character.FORMAT == type || 4425 Character.PRIVATE_USE == type || Character.SURROGATE == type || 4426 Character.UNASSIGNED == type) 4427 != gcC.reset(str).matches()) { 4428 failCount++; 4429 } 4430 } 4431 4432 // bounds/word align 4433 twoFindIndexes(" \u0180sherman\u0400 ", bound, 1, 10); 4434 if (!bwbU.reset("\u0180sherman\u0400").matches()) 4435 failCount++; 4436 twoFindIndexes(" \u0180sh\u0345erman\u0400 ", bound, 1, 11); 4437 if (!bwbU.reset("\u0180sh\u0345erman\u0400").matches()) 4438 failCount++; 4439 twoFindIndexes(" \u0724\u0739\u0724 ", bound, 1, 4); 4440 if (!bwbU.reset("\u0724\u0739\u0724").matches()) 4441 failCount++; 4442 if (!bwbEU.reset("\u0724\u0739\u0724").matches()) 4443 failCount++; 4444 report("unicodePredefinedClasses"); 4445 } 4446 4447 private static void unicodeCharacterNameTest() throws Exception { 4448 4449 for (int cp = 0; cp < Character.MAX_CODE_POINT; cp++) { 4450 if (!Character.isValidCodePoint(cp) || 4451 Character.getType(cp) == Character.UNASSIGNED) 4452 continue; 4453 String str = new String(Character.toChars(cp)); 4454 // single 4455 String p = "\\N{" + Character.getName(cp) + "}"; 4456 if (!Pattern.compile(p).matcher(str).matches()) { 4457 failCount++; 4458 } 4459 // class[c] 4460 p = "[\\N{" + Character.getName(cp) + "}]"; 4461 if (!Pattern.compile(p).matcher(str).matches()) { 4462 failCount++; 4463 } 4464 } 4465 4466 // range 4467 for (int i = 0; i < 10; i++) { 4468 int start = generator.nextInt(20); 4469 int end = start + generator.nextInt(200); 4470 String p = "[\\N{" + Character.getName(start) + "}-\\N{" + Character.getName(end) + "}]"; 4471 String str; 4472 for (int cp = start; cp < end; cp++) { 4473 str = new String(Character.toChars(cp)); 4474 if (!Pattern.compile(p).matcher(str).matches()) { 4475 failCount++; 4476 } 4477 } 4478 str = new String(Character.toChars(end + 10)); 4479 if (Pattern.compile(p).matcher(str).matches()) { 4480 failCount++; 4481 } 4482 } 4483 4484 // slice 4485 for (int i = 0; i < 10; i++) { 4486 int n = generator.nextInt(256); 4487 int[] buf = new int[n]; 4488 StringBuffer sb = new StringBuffer(1024); 4489 for (int j = 0; j < n; j++) { 4490 int cp = generator.nextInt(1000); 4491 if (!Character.isValidCodePoint(cp) || 4492 Character.getType(cp) == Character.UNASSIGNED) 4493 cp = 0x4e00; // just use 4e00 4494 sb.append("\\N{" + Character.getName(cp) + "}"); 4495 buf[j] = cp; 4496 } 4497 String p = sb.toString(); 4498 String str = new String(buf, 0, buf.length); 4499 if (!Pattern.compile(p).matcher(str).matches()) { 4500 failCount++; 4501 } 4502 } 4503 report("unicodeCharacterName"); 4504 } 4505 4506 private static void horizontalAndVerticalWSTest() throws Exception { 4507 String hws = new String (new char[] { 4508 0x09, 0x20, 0xa0, 0x1680, 0x180e, 4509 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 4510 0x2006, 0x2007, 0x2008, 0x2009, 0x200a, 4511 0x202f, 0x205f, 0x3000 }); 4512 String vws = new String (new char[] { 4513 0x0a, 0x0b, 0x0c, 0x0d, 0x85, 0x2028, 0x2029 }); 4514 if (!Pattern.compile("\\h+").matcher(hws).matches() || 4515 !Pattern.compile("[\\h]+").matcher(hws).matches()) 4516 failCount++; 4517 if (Pattern.compile("\\H").matcher(hws).find() || 4518 Pattern.compile("[\\H]").matcher(hws).find()) 4519 failCount++; 4520 if (!Pattern.compile("\\v+").matcher(vws).matches() || 4521 !Pattern.compile("[\\v]+").matcher(vws).matches()) 4522 failCount++; 4523 if (Pattern.compile("\\V").matcher(vws).find() || 4524 Pattern.compile("[\\V]").matcher(vws).find()) 4525 failCount++; 4526 String prefix = "abcd"; 4527 String suffix = "efgh"; 4528 String ng = "A"; 4529 for (int i = 0; i < hws.length(); i++) { 4530 String c = String.valueOf(hws.charAt(i)); 4531 Matcher m = Pattern.compile("\\h").matcher(prefix + c + suffix); 4532 if (!m.find() || !c.equals(m.group())) 4533 failCount++; 4534 m = Pattern.compile("[\\h]").matcher(prefix + c + suffix); 4535 if (!m.find() || !c.equals(m.group())) 4536 failCount++; 4537 4538 m = Pattern.compile("\\H").matcher(hws.substring(0, i) + ng + hws.substring(i)); 4539 if (!m.find() || !ng.equals(m.group())) 4540 failCount++; 4541 m = Pattern.compile("[\\H]").matcher(hws.substring(0, i) + ng + hws.substring(i)); 4542 if (!m.find() || !ng.equals(m.group())) 4543 failCount++; 4544 } 4545 for (int i = 0; i < vws.length(); i++) { 4546 String c = String.valueOf(vws.charAt(i)); 4547 Matcher m = Pattern.compile("\\v").matcher(prefix + c + suffix); 4548 if (!m.find() || !c.equals(m.group())) 4549 failCount++; 4550 m = Pattern.compile("[\\v]").matcher(prefix + c + suffix); 4551 if (!m.find() || !c.equals(m.group())) 4552 failCount++; 4553 4554 m = Pattern.compile("\\V").matcher(vws.substring(0, i) + ng + vws.substring(i)); 4555 if (!m.find() || !ng.equals(m.group())) 4556 failCount++; 4557 m = Pattern.compile("[\\V]").matcher(vws.substring(0, i) + ng + vws.substring(i)); 4558 if (!m.find() || !ng.equals(m.group())) 4559 failCount++; 4560 } 4561 // \v in range is interpreted as 0x0B. This is the undocumented behavior 4562 if (!Pattern.compile("[\\v-\\v]").matcher(String.valueOf((char)0x0B)).matches()) 4563 failCount++; 4564 report("horizontalAndVerticalWSTest"); 4565 } 4566 4567 private static void linebreakTest() throws Exception { 4568 String linebreaks = new String (new char[] { 4569 0x0A, 0x0B, 0x0C, 0x0D, 0x85, 0x2028, 0x2029 }); 4570 String crnl = "\r\n"; 4571 if (!(Pattern.compile("\\R+").matcher(linebreaks).matches() && 4572 Pattern.compile("\\R").matcher(crnl).matches() && 4573 Pattern.compile("\\Rabc").matcher(crnl + "abc").matches() && 4574 Pattern.compile("\\Rabc").matcher("\rabc").matches() && 4575 Pattern.compile("\\R\\R").matcher(crnl).matches() && // backtracking 4576 Pattern.compile("\\R\\n").matcher(crnl).matches()) && // backtracking 4577 !Pattern.compile("((?<!\\R)\\s)*").matcher(crnl).matches()) { // #8176029 4578 failCount++; 4579 } 4580 report("linebreakTest"); 4581 } 4582 4583 // #7189363 4584 private static void branchTest() throws Exception { 4585 if (!Pattern.compile("(a)?bc|d").matcher("d").find() || // greedy 4586 !Pattern.compile("(a)+bc|d").matcher("d").find() || 4587 !Pattern.compile("(a)*bc|d").matcher("d").find() || 4588 !Pattern.compile("(a)??bc|d").matcher("d").find() || // reluctant 4589 !Pattern.compile("(a)+?bc|d").matcher("d").find() || 4590 !Pattern.compile("(a)*?bc|d").matcher("d").find() || 4591 !Pattern.compile("(a)?+bc|d").matcher("d").find() || // possessive 4592 !Pattern.compile("(a)++bc|d").matcher("d").find() || 4593 !Pattern.compile("(a)*+bc|d").matcher("d").find() || 4594 !Pattern.compile("(a)?bc|d").matcher("d").matches() || // greedy 4595 !Pattern.compile("(a)+bc|d").matcher("d").matches() || 4596 !Pattern.compile("(a)*bc|d").matcher("d").matches() || 4597 !Pattern.compile("(a)??bc|d").matcher("d").matches() || // reluctant 4598 !Pattern.compile("(a)+?bc|d").matcher("d").matches() || 4599 !Pattern.compile("(a)*?bc|d").matcher("d").matches() || 4600 !Pattern.compile("(a)?+bc|d").matcher("d").matches() || // possessive 4601 !Pattern.compile("(a)++bc|d").matcher("d").matches() || 4602 !Pattern.compile("(a)*+bc|d").matcher("d").matches() || 4603 !Pattern.compile("(a)?bc|de").matcher("de").find() || // others 4604 !Pattern.compile("(a)??bc|de").matcher("de").find() || 4605 !Pattern.compile("(a)?bc|de").matcher("de").matches() || 4606 !Pattern.compile("(a)??bc|de").matcher("de").matches()) 4607 failCount++; 4608 report("branchTest"); 4609 } 4610 4611 // This test is for 8007395 4612 private static void groupCurlyNotFoundSuppTest() throws Exception { 4613 String input = "test this as \ud83d\ude0d"; 4614 for (String pStr : new String[] { "test(.)+(@[a-zA-Z.]+)", 4615 "test(.)*(@[a-zA-Z.]+)", 4616 "test([^B])+(@[a-zA-Z.]+)", 4617 "test([^B])*(@[a-zA-Z.]+)", 4618 "test(\\P{IsControl})+(@[a-zA-Z.]+)", 4619 "test(\\P{IsControl})*(@[a-zA-Z.]+)", 4620 }) { 4621 Matcher m = Pattern.compile(pStr, Pattern.CASE_INSENSITIVE) 4622 .matcher(input); 4623 try { 4624 if (m.find()) { 4625 failCount++; 4626 } 4627 } catch (Exception x) { 4628 failCount++; 4629 } 4630 } 4631 report("GroupCurly NotFoundSupp"); 4632 } 4633 4634 // This test is for 8023647 4635 private static void groupCurlyBackoffTest() throws Exception { 4636 if (!"abc1c".matches("(\\w)+1\\1") || 4637 "abc11".matches("(\\w)+1\\1")) { 4638 failCount++; 4639 } 4640 report("GroupCurly backoff"); 4641 } 4642 4643 // This test is for 8012646 4644 private static void patternAsPredicate() throws Exception { 4645 Predicate<String> p = Pattern.compile("[a-z]+").asPredicate(); 4646 4647 if (p.test("")) { 4648 failCount++; 4649 } 4650 if (!p.test("word")) { 4651 failCount++; 4652 } 4653 if (p.test("1234")) { 4654 failCount++; 4655 } 4656 report("Pattern.asPredicate"); 4657 } 4658 4659 // This test is for 8035975 4660 private static void invalidFlags() throws Exception { 4661 for (int flag = 1; flag != 0; flag <<= 1) { 4662 switch (flag) { 4663 case Pattern.CASE_INSENSITIVE: 4664 case Pattern.MULTILINE: 4665 case Pattern.DOTALL: 4666 case Pattern.UNICODE_CASE: 4667 case Pattern.CANON_EQ: 4668 case Pattern.UNIX_LINES: 4669 case Pattern.LITERAL: 4670 case Pattern.UNICODE_CHARACTER_CLASS: 4671 case Pattern.COMMENTS: 4672 // valid flag, continue 4673 break; 4674 default: 4675 try { 4676 Pattern.compile(".", flag); 4677 failCount++; 4678 } catch (IllegalArgumentException expected) { 4679 } 4680 } 4681 } 4682 report("Invalid compile flags"); 4683 } 4684 4685 // This test is for 8158482 4686 private static void embeddedFlags() throws Exception { 4687 try { 4688 Pattern.compile("(?i).(?-i)."); 4689 Pattern.compile("(?m).(?-m)."); 4690 Pattern.compile("(?s).(?-s)."); 4691 Pattern.compile("(?d).(?-d)."); 4692 Pattern.compile("(?u).(?-u)."); 4693 Pattern.compile("(?c).(?-c)."); 4694 Pattern.compile("(?x).(?-x)."); 4695 Pattern.compile("(?U).(?-U)."); 4696 Pattern.compile("(?imsducxU).(?-imsducxU)."); 4697 } catch (PatternSyntaxException x) { 4698 failCount++; 4699 } 4700 report("Embedded flags"); 4701 } 4702 4703 private static void grapheme() throws Exception { 4704 Files.lines(Paths.get(System.getProperty("test.src", "."), 4705 "GraphemeBreakTest.txt")) 4706 .filter( ln -> ln.length() != 0 && !ln.startsWith("#") ) 4707 .forEach( ln -> { 4708 ln = ln.replaceAll("\\s+|\\([a-zA-Z]+\\)|\\[[a-zA-Z]]+\\]|#.*", ""); 4709 // System.out.println(str); 4710 String[] strs = ln.split("\u00f7|\u00d7"); 4711 StringBuilder src = new StringBuilder(); 4712 ArrayList<String> graphemes = new ArrayList<>(); 4713 StringBuilder buf = new StringBuilder(); 4714 int offBk = 0; 4715 for (String str : strs) { 4716 if (str.length() == 0) // first empty str 4717 continue; 4718 int cp = Integer.parseInt(str, 16); 4719 src.appendCodePoint(cp); 4720 buf.appendCodePoint(cp); 4721 offBk += (str.length() + 1); 4722 if (ln.charAt(offBk) == '\u00f7') { // DIV 4723 graphemes.add(buf.toString()); 4724 buf = new StringBuilder(); 4725 } 4726 } 4727 Pattern p = Pattern.compile("\\X"); 4728 Matcher m = p.matcher(src.toString()); 4729 Scanner s = new Scanner(src.toString()).useDelimiter("\\b{g}"); 4730 for (String g : graphemes) { 4731 // System.out.printf(" grapheme:=[%s]%n", g); 4732 // (1) test \\X directly 4733 if (!m.find() || !m.group().equals(g)) { 4734 System.out.println("Failed \\X [" + ln + "] : " + g); 4735 failCount++; 4736 } 4737 // (2) test \\b{g} + \\X via Scanner 4738 boolean hasNext = s.hasNext(p); 4739 // if (!s.hasNext() || !s.next().equals(next)) { 4740 if (!s.hasNext(p) || !s.next(p).equals(g)) { 4741 System.out.println("Failed b{g} [" + ln + "] : " + g); 4742 failCount++; 4743 } 4744 } 4745 }); 4746 // some sanity checks 4747 if (!Pattern.compile("\\X{10}").matcher("abcdefghij").matches() || 4748 !Pattern.compile("\\b{g}(?:\\X\\b{g}){5}\\b{g}").matcher("abcde").matches() || 4749 !Pattern.compile("(?:\\X\\b{g}){2}").matcher("\ud800\udc00\ud801\udc02").matches()) 4750 failCount++; 4751 // make sure "\b{n}" still works 4752 if (!Pattern.compile("\\b{1}hello\\b{1} \\b{1}world\\b{1}").matcher("hello world").matches()) 4753 failCount++; 4754 report("Unicode extended grapheme cluster"); 4755 } 4756 4757 // hangup/timeout if go into exponential backtracking 4758 private static void expoBacktracking() throws Exception { 4759 4760 Object[][] patternMatchers = { 4761 // 6328855 4762 { "(.*\n*)*", 4763 "this little fine string lets\r\njava.lang.String.matches\r\ncrash\r\n(We don't know why but adding \r* to the regex makes it work again)", 4764 false }, 4765 // 6192895 4766 { " *([a-zA-Z0-9/\\-\\?:\\(\\)\\.,'\\+\\{\\}]+ *)+", 4767 "Hello World this is a test this is a test this is a test A", 4768 true }, 4769 { " *([a-zA-Z0-9/\\-\\?:\\(\\)\\.,'\\+\\{\\}]+ *)+", 4770 "Hello World this is a test this is a test this is a test \u4e00 ", 4771 false }, 4772 { " *([a-z0-9]+ *)+", 4773 "hello world this is a test this is a test this is a test A", 4774 false }, 4775 // 4771934 [FIXED] #5013651? 4776 { "^(\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,4})+[,;]?)+$", 4777 "abc@efg.abc,efg@abc.abc,abc@xyz.mno;abc@sdfsd.com", 4778 true }, 4779 // 4866249 [FIXED] 4780 { "<\\s*" + "(meta|META)" + "(\\s|[^>])+" + "(CHARSET|charset)=" + "(\\s|[^>])+>", 4781 "<META http-equiv=\"Content-Type\" content=\"text/html; charset=ISO-8859-5\">", 4782 true }, 4783 { "^(\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,4})+[,;]?)+$", 4784 "abc@efg.abc,efg@abc.abc,abc@xyz.mno;sdfsd.com", 4785 false }, 4786 // 6345469 4787 { "((<[^>]+>)?(((\\s)?)*(\\ )?)*((\\s)?)*)+", 4788 " < br/> < / p> <p> <html> <adfasfdasdf> </p>", 4789 true }, // --> matched 4790 { "((<[^>]+>)?(((\\s)?)*(\\ )?)*((\\s)?)*)+", 4791 " < br/> < / p> <p> <html> <adfasfdasdf> p </p>", 4792 false }, 4793 // 5026912 4794 { "^\\s*" + "(\\w|\\d|[\\xC0-\\xFF]|/)+" + "\\s+|$", 4795 "156580451111112225588087755221111111566969655555555", 4796 false}, 4797 // 6988218 4798 { "^([+-]?((0[xX](\\p{XDigit}+))|(((\\p{Digit}+)(\\.)?((\\p{Digit}+)?)([eE][+-]?(\\p{Digit}+))?)|(\\.((\\p{Digit}+))([eE][+-]?(\\p{Digit}+))?)))|[n|N]?'([^']*(?:'')*[^']*)*')", 4799 "'%)) order by ANGEBOT.ID", 4800 false}, // find 4801 // 6693451 4802 { "^(\\s*foo\\s*)*$", 4803 "foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo", 4804 true }, 4805 { "^(\\s*foo\\s*)*$", 4806 "foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo fo", 4807 false 4808 }, 4809 // 7006761 4810 { "(([0-9A-Z]+)([_]?+)*)*", "FOOOOO_BAAAR_FOOOOOOOOO_BA_", true}, 4811 { "(([0-9A-Z]+)([_]?+)*)*", "FOOOOO_BAAAR_FOOOOOOOOO_BA_ ", false}, 4812 // 8140212 4813 { "(?<before>.*)\\{(?<reflection>\\w+):(?<innerMethod>\\w+(\\.?\\w+(\\(((?<args>(('[^']*')|((/|\\w)+))(,(('[^']*')|((/|\\w)+)))*))?\\))?)*)\\}(?<after>.*)", 4814 "{CeGlobal:getSodCutoff.getGui.getAmqp.getSimpleModeEnabled()", 4815 false 4816 }, 4817 { "^(a+)+$", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", true}, 4818 { "^(a+)+$", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa!", false}, 4819 4820 { "(x+)*y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy", true }, 4821 { "(x+)*y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz", false}, 4822 4823 { "(x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy", true}, 4824 { "(x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz", false}, 4825 4826 { "(([0-9A-Z]+)([_]?+)*)*", "--------------------------------------", false}, 4827 4828 /* not fixed 4829 //8132141 ---> second level exponential backtracking 4830 { "(h|h|ih(((i|a|c|c|a|i|i|j|b|a|i|b|a|a|j))+h)ahbfhba|c|i)*", 4831 "hchcchicihcchciiicichhcichcihcchiihichiciiiihhcchicchhcihchcihiihciichhccciccichcichiihcchcihhicchcciicchcccihiiihhihihihichicihhcciccchihhhcchichchciihiicihciihcccciciccicciiiiiiiiicihhhiiiihchccchchhhhiiihchihcccchhhiiiiiiiicicichicihcciciihichhhhchihciiihhiccccccciciihhichiccchhicchicihihccichicciihcichccihhiciccccccccichhhhihihhcchchihihiihhihihihicichihiiiihhhhihhhchhichiicihhiiiiihchccccchichci" }, 4832 */ 4833 }; 4834 4835 for (Object[] pm : patternMatchers) { 4836 String p = (String)pm[0]; 4837 String s = (String)pm[1]; 4838 boolean r = (Boolean)pm[2]; 4839 if (r != Pattern.compile(p).matcher(s).matches()) { 4840 failCount++; 4841 } 4842 } 4843 } 4844 }