1 /* 2 * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 /** 25 * @test 26 * @summary tests RegExp framework (use -Dseed=X to set PRNG seed) 27 * @author Mike McCloskey 28 * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345 29 * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962 30 * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476 31 * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940 32 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133 33 * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066 34 * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590 35 * 8027645 8035076 8039124 8035975 8074678 6854417 8143854 8147531 7071819 36 * 8151481 4867170 7080302 6728861 6995635 6736245 4916384 37 * 6328855 6192895 6345469 6988218 6693451 7006761 8140212 38 * 39 * @library /lib/testlibrary 40 * @build jdk.testlibrary.* 41 * @run main RegExTest 42 * @key randomness 43 */ 44 45 import java.util.function.Function; 46 import java.util.regex.*; 47 import java.util.Random; 48 import java.util.Scanner; 49 import java.io.*; 50 import java.nio.file.*; 51 import java.util.*; 52 import java.nio.CharBuffer; 53 import java.util.function.Predicate; 54 import jdk.testlibrary.RandomFactory; 55 56 /** 57 * This is a test class created to check the operation of 58 * the Pattern and Matcher classes. 59 */ 60 public class RegExTest { 61 62 private static Random generator = RandomFactory.getRandom(); 63 private static boolean failure = false; 64 private static int failCount = 0; 65 private static String firstFailure = null; 66 67 /** 68 * Main to interpret arguments and run several tests. 69 * 70 */ 71 public static void main(String[] args) throws Exception { 72 // Most of the tests are in a file 73 processFile("TestCases.txt"); 74 //processFile("PerlCases.txt"); 75 processFile("BMPTestCases.txt"); 76 processFile("SupplementaryTestCases.txt"); 77 78 // These test many randomly generated char patterns 79 bm(); 80 slice(); 81 82 // These are hard to put into the file 83 escapes(); 84 blankInput(); 85 86 // Substitition tests on randomly generated sequences 87 globalSubstitute(); 88 stringbufferSubstitute(); 89 stringbuilderSubstitute(); 90 91 substitutionBasher(); 92 substitutionBasher2(); 93 94 // Canonical Equivalence 95 ceTest(); 96 97 // Anchors 98 anchorTest(); 99 100 // boolean match calls 101 matchesTest(); 102 lookingAtTest(); 103 104 // Pattern API 105 patternMatchesTest(); 106 107 // Misc 108 lookbehindTest(); 109 nullArgumentTest(); 110 backRefTest(); 111 groupCaptureTest(); 112 caretTest(); 113 charClassTest(); 114 emptyPatternTest(); 115 findIntTest(); 116 group0Test(); 117 longPatternTest(); 118 octalTest(); 119 ampersandTest(); 120 negationTest(); 121 splitTest(); 122 appendTest(); 123 caseFoldingTest(); 124 commentsTest(); 125 unixLinesTest(); 126 replaceFirstTest(); 127 gTest(); 128 zTest(); 129 serializeTest(); 130 reluctantRepetitionTest(); 131 multilineDollarTest(); 132 dollarAtEndTest(); 133 caretBetweenTerminatorsTest(); 134 // This RFE rejected in Tiger numOccurrencesTest(); 135 javaCharClassTest(); 136 nonCaptureRepetitionTest(); 137 notCapturedGroupCurlyMatchTest(); 138 escapedSegmentTest(); 139 literalPatternTest(); 140 literalReplacementTest(); 141 regionTest(); 142 toStringTest(); 143 negatedCharClassTest(); 144 findFromTest(); 145 boundsTest(); 146 unicodeWordBoundsTest(); 147 caretAtEndTest(); 148 wordSearchTest(); 149 hitEndTest(); 150 toMatchResultTest(); 151 toMatchResultTest2(); 152 surrogatesInClassTest(); 153 removeQEQuotingTest(); 154 namedGroupCaptureTest(); 155 nonBmpClassComplementTest(); 156 unicodePropertiesTest(); 157 unicodeHexNotationTest(); 158 unicodeClassesTest(); 159 unicodeCharacterNameTest(); 160 horizontalAndVerticalWSTest(); 161 linebreakTest(); 162 branchTest(); 163 groupCurlyNotFoundSuppTest(); 164 groupCurlyBackoffTest(); 165 patternAsPredicate(); 166 invalidFlags(); 167 grapheme(); 168 expoBacktracking(); 169 170 if (failure) { 171 throw new 172 RuntimeException("RegExTest failed, 1st failure: " + 173 firstFailure); 174 } else { 175 System.err.println("OKAY: All tests passed."); 176 } 177 } 178 179 // Utility functions 180 181 private static String getRandomAlphaString(int length) { 182 StringBuffer buf = new StringBuffer(length); 183 for (int i=0; i<length; i++) { 184 char randChar = (char)(97 + generator.nextInt(26)); 185 buf.append(randChar); 186 } 187 return buf.toString(); 188 } 189 190 private static void check(Matcher m, String expected) { 191 m.find(); 192 if (!m.group().equals(expected)) 193 failCount++; 194 } 195 196 private static void check(Matcher m, String result, boolean expected) { 197 m.find(); 198 if (m.group().equals(result) != expected) 199 failCount++; 200 } 201 202 private static void check(Pattern p, String s, boolean expected) { 203 if (p.matcher(s).find() != expected) 204 failCount++; 205 } 206 207 private static void check(String p, String s, boolean expected) { 208 Matcher matcher = Pattern.compile(p).matcher(s); 209 if (matcher.find() != expected) 210 failCount++; 211 } 212 213 private static void check(String p, char c, boolean expected) { 214 String propertyPattern = expected ? "\\p" + p : "\\P" + p; 215 Pattern pattern = Pattern.compile(propertyPattern); 216 char[] ca = new char[1]; ca[0] = c; 217 Matcher matcher = pattern.matcher(new String(ca)); 218 if (!matcher.find()) 219 failCount++; 220 } 221 222 private static void check(String p, int codePoint, boolean expected) { 223 String propertyPattern = expected ? "\\p" + p : "\\P" + p; 224 Pattern pattern = Pattern.compile(propertyPattern); 225 char[] ca = Character.toChars(codePoint); 226 Matcher matcher = pattern.matcher(new String(ca)); 227 if (!matcher.find()) 228 failCount++; 229 } 230 231 private static void check(String p, int flag, String input, String s, 232 boolean expected) 233 { 234 Pattern pattern = Pattern.compile(p, flag); 235 Matcher matcher = pattern.matcher(input); 236 if (expected) 237 check(matcher, s, expected); 238 else 239 check(pattern, input, false); 240 } 241 242 private static void report(String testName) { 243 int spacesToAdd = 30 - testName.length(); 244 StringBuffer paddedNameBuffer = new StringBuffer(testName); 245 for (int i=0; i<spacesToAdd; i++) 246 paddedNameBuffer.append(" "); 247 String paddedName = paddedNameBuffer.toString(); 248 System.err.println(paddedName + ": " + 249 (failCount==0 ? "Passed":"Failed("+failCount+")")); 250 if (failCount > 0) { 251 failure = true; 252 253 if (firstFailure == null) { 254 firstFailure = testName; 255 } 256 } 257 258 failCount = 0; 259 } 260 261 /** 262 * Converts ASCII alphabet characters [A-Za-z] in the given 's' to 263 * supplementary characters. This method does NOT fully take care 264 * of the regex syntax. 265 */ 266 private static String toSupplementaries(String s) { 267 int length = s.length(); 268 StringBuffer sb = new StringBuffer(length * 2); 269 270 for (int i = 0; i < length; ) { 271 char c = s.charAt(i++); 272 if (c == '\\') { 273 sb.append(c); 274 if (i < length) { 275 c = s.charAt(i++); 276 sb.append(c); 277 if (c == 'u') { 278 // assume no syntax error 279 sb.append(s.charAt(i++)); 280 sb.append(s.charAt(i++)); 281 sb.append(s.charAt(i++)); 282 sb.append(s.charAt(i++)); 283 } 284 } 285 } else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) { 286 sb.append('\ud800').append((char)('\udc00'+c)); 287 } else { 288 sb.append(c); 289 } 290 } 291 return sb.toString(); 292 } 293 294 // Regular expression tests 295 296 // This is for bug 6178785 297 // Test if an expected NPE gets thrown when passing in a null argument 298 private static boolean check(Runnable test) { 299 try { 300 test.run(); 301 failCount++; 302 return false; 303 } catch (NullPointerException npe) { 304 return true; 305 } 306 } 307 308 private static void nullArgumentTest() { 309 check(() -> Pattern.compile(null)); 310 check(() -> Pattern.matches(null, null)); 311 check(() -> Pattern.matches("xyz", null)); 312 check(() -> Pattern.quote(null)); 313 check(() -> Pattern.compile("xyz").split(null)); 314 check(() -> Pattern.compile("xyz").matcher(null)); 315 316 final Matcher m = Pattern.compile("xyz").matcher("xyz"); 317 m.matches(); 318 check(() -> m.appendTail((StringBuffer) null)); 319 check(() -> m.appendTail((StringBuilder)null)); 320 check(() -> m.replaceAll((String) null)); 321 check(() -> m.replaceAll((Function<MatchResult, String>)null)); 322 check(() -> m.replaceFirst((String)null)); 323 check(() -> m.replaceFirst((Function<MatchResult, String>) null)); 324 check(() -> m.appendReplacement((StringBuffer)null, null)); 325 check(() -> m.appendReplacement((StringBuilder)null, null)); 326 check(() -> m.reset(null)); 327 check(() -> Matcher.quoteReplacement(null)); 328 //check(() -> m.usePattern(null)); 329 330 report("Null Argument"); 331 } 332 333 // This is for bug6635133 334 // Test if surrogate pair in Unicode escapes can be handled correctly. 335 private static void surrogatesInClassTest() throws Exception { 336 Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]"); 337 Matcher matcher = pattern.matcher("\ud834\udd22"); 338 if (!matcher.find()) 339 failCount++; 340 341 report("Surrogate pair in Unicode escape"); 342 } 343 344 // This is for bug6990617 345 // Test if Pattern.RemoveQEQuoting works correctly if the octal unicode 346 // char encoding is only 2 or 3 digits instead of 4 and the first quoted 347 // char is an octal digit. 348 private static void removeQEQuotingTest() throws Exception { 349 Pattern pattern = 350 Pattern.compile("\\011\\Q1sometext\\E\\011\\Q2sometext\\E"); 351 Matcher matcher = pattern.matcher("\t1sometext\t2sometext"); 352 if (!matcher.find()) 353 failCount++; 354 355 report("Remove Q/E Quoting"); 356 } 357 358 // This is for bug 4988891 359 // Test toMatchResult to see that it is a copy of the Matcher 360 // that is not affected by subsequent operations on the original 361 private static void toMatchResultTest() throws Exception { 362 Pattern pattern = Pattern.compile("squid"); 363 Matcher matcher = pattern.matcher( 364 "agiantsquidofdestinyasmallsquidoffate"); 365 matcher.find(); 366 int matcherStart1 = matcher.start(); 367 MatchResult mr = matcher.toMatchResult(); 368 if (mr == matcher) 369 failCount++; 370 int resultStart1 = mr.start(); 371 if (matcherStart1 != resultStart1) 372 failCount++; 373 matcher.find(); 374 int matcherStart2 = matcher.start(); 375 int resultStart2 = mr.start(); 376 if (matcherStart2 == resultStart2) 377 failCount++; 378 if (resultStart1 != resultStart2) 379 failCount++; 380 MatchResult mr2 = matcher.toMatchResult(); 381 if (mr == mr2) 382 failCount++; 383 if (mr2.start() != matcherStart2) 384 failCount++; 385 report("toMatchResult is a copy"); 386 } 387 388 private static void checkExpectedISE(Runnable test) { 389 try { 390 test.run(); 391 failCount++; 392 } catch (IllegalStateException x) { 393 } catch (IndexOutOfBoundsException xx) { 394 failCount++; 395 } 396 } 397 398 private static void checkExpectedIOOE(Runnable test) { 399 try { 400 test.run(); 401 failCount++; 402 } catch (IndexOutOfBoundsException x) {} 403 } 404 405 // This is for bug 8074678 406 // Test the result of toMatchResult throws ISE if no match is availble 407 private static void toMatchResultTest2() throws Exception { 408 Matcher matcher = Pattern.compile("nomatch").matcher("hello world"); 409 matcher.find(); 410 MatchResult mr = matcher.toMatchResult(); 411 412 checkExpectedISE(() -> mr.start()); 413 checkExpectedISE(() -> mr.start(2)); 414 checkExpectedISE(() -> mr.end()); 415 checkExpectedISE(() -> mr.end(2)); 416 checkExpectedISE(() -> mr.group()); 417 checkExpectedISE(() -> mr.group(2)); 418 419 matcher = Pattern.compile("(match)").matcher("there is a match"); 420 matcher.find(); 421 MatchResult mr2 = matcher.toMatchResult(); 422 checkExpectedIOOE(() -> mr2.start(2)); 423 checkExpectedIOOE(() -> mr2.end(2)); 424 checkExpectedIOOE(() -> mr2.group(2)); 425 426 report("toMatchResult2 appropriate exceptions"); 427 } 428 429 // This is for bug 5013885 430 // Must test a slice to see if it reports hitEnd correctly 431 private static void hitEndTest() throws Exception { 432 // Basic test of Slice node 433 Pattern p = Pattern.compile("^squidattack"); 434 Matcher m = p.matcher("squack"); 435 m.find(); 436 if (m.hitEnd()) 437 failCount++; 438 m.reset("squid"); 439 m.find(); 440 if (!m.hitEnd()) 441 failCount++; 442 443 // Test Slice, SliceA and SliceU nodes 444 for (int i=0; i<3; i++) { 445 int flags = 0; 446 if (i==1) flags = Pattern.CASE_INSENSITIVE; 447 if (i==2) flags = Pattern.UNICODE_CASE; 448 p = Pattern.compile("^abc", flags); 449 m = p.matcher("ad"); 450 m.find(); 451 if (m.hitEnd()) 452 failCount++; 453 m.reset("ab"); 454 m.find(); 455 if (!m.hitEnd()) 456 failCount++; 457 } 458 459 // Test Boyer-Moore node 460 p = Pattern.compile("catattack"); 461 m = p.matcher("attack"); 462 m.find(); 463 if (!m.hitEnd()) 464 failCount++; 465 466 p = Pattern.compile("catattack"); 467 m = p.matcher("attackattackattackcatatta"); 468 m.find(); 469 if (!m.hitEnd()) 470 failCount++; 471 report("hitEnd from a Slice"); 472 } 473 474 // This is for bug 4997476 475 // It is weird code submitted by customer demonstrating a regression 476 private static void wordSearchTest() throws Exception { 477 String testString = new String("word1 word2 word3"); 478 Pattern p = Pattern.compile("\\b"); 479 Matcher m = p.matcher(testString); 480 int position = 0; 481 int start = 0; 482 while (m.find(position)) { 483 start = m.start(); 484 if (start == testString.length()) 485 break; 486 if (m.find(start+1)) { 487 position = m.start(); 488 } else { 489 position = testString.length(); 490 } 491 if (testString.substring(start, position).equals(" ")) 492 continue; 493 if (!testString.substring(start, position-1).startsWith("word")) 494 failCount++; 495 } 496 report("Customer word search"); 497 } 498 499 // This is for bug 4994840 500 private static void caretAtEndTest() throws Exception { 501 // Problem only occurs with multiline patterns 502 // containing a beginning-of-line caret "^" followed 503 // by an expression that also matches the empty string. 504 Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE); 505 Matcher matcher = pattern.matcher("\r"); 506 matcher.find(); 507 matcher.find(); 508 report("Caret at end"); 509 } 510 511 // This test is for 4979006 512 // Check to see if word boundary construct properly handles unicode 513 // non spacing marks 514 private static void unicodeWordBoundsTest() throws Exception { 515 String spaces = " "; 516 String wordChar = "a"; 517 String nsm = "\u030a"; 518 519 assert (Character.getType('\u030a') == Character.NON_SPACING_MARK); 520 521 Pattern pattern = Pattern.compile("\\b"); 522 Matcher matcher = pattern.matcher(""); 523 // S=other B=word character N=non spacing mark .=word boundary 524 // SS.BB.SS 525 String input = spaces + wordChar + wordChar + spaces; 526 twoFindIndexes(input, matcher, 2, 4); 527 // SS.BBN.SS 528 input = spaces + wordChar +wordChar + nsm + spaces; 529 twoFindIndexes(input, matcher, 2, 5); 530 // SS.BN.SS 531 input = spaces + wordChar + nsm + spaces; 532 twoFindIndexes(input, matcher, 2, 4); 533 // SS.BNN.SS 534 input = spaces + wordChar + nsm + nsm + spaces; 535 twoFindIndexes(input, matcher, 2, 5); 536 // SSN.BB.SS 537 input = spaces + nsm + wordChar + wordChar + spaces; 538 twoFindIndexes(input, matcher, 3, 5); 539 // SS.BNB.SS 540 input = spaces + wordChar + nsm + wordChar + spaces; 541 twoFindIndexes(input, matcher, 2, 5); 542 // SSNNSS 543 input = spaces + nsm + nsm + spaces; 544 matcher.reset(input); 545 if (matcher.find()) 546 failCount++; 547 // SSN.BBN.SS 548 input = spaces + nsm + wordChar + wordChar + nsm + spaces; 549 twoFindIndexes(input, matcher, 3, 6); 550 551 report("Unicode word boundary"); 552 } 553 554 private static void twoFindIndexes(String input, Matcher matcher, int a, 555 int b) throws Exception 556 { 557 matcher.reset(input); 558 matcher.find(); 559 if (matcher.start() != a) 560 failCount++; 561 matcher.find(); 562 if (matcher.start() != b) 563 failCount++; 564 } 565 566 // This test is for 6284152 567 static void check(String regex, String input, String[] expected) { 568 List<String> result = new ArrayList<String>(); 569 Pattern p = Pattern.compile(regex); 570 Matcher m = p.matcher(input); 571 while (m.find()) { 572 result.add(m.group()); 573 } 574 if (!Arrays.asList(expected).equals(result)) 575 failCount++; 576 } 577 578 private static void lookbehindTest() throws Exception { 579 //Positive 580 check("(?<=%.{0,5})foo\\d", 581 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5", 582 new String[]{"foo1", "foo2", "foo3"}); 583 584 //boundary at end of the lookbehind sub-regex should work consistently 585 //with the boundary just after the lookbehind sub-regex 586 check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"}); 587 check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"}); 588 check("(?<!abc )\\bfoo", "abc foo", new String[0]); 589 check("(?<!abc \\b)foo", "abc foo", new String[0]); 590 591 //Negative 592 check("(?<!%.{0,5})foo\\d", 593 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5", 594 new String[] {"foo4", "foo5"}); 595 596 //Positive greedy 597 check("(?<=%b{1,4})foo", "%bbbbfoo", new String[] {"foo"}); 598 599 //Positive reluctant 600 check("(?<=%b{1,4}?)foo", "%bbbbfoo", new String[] {"foo"}); 601 602 //supplementary 603 check("(?<=%b{1,4})fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", 604 new String[] {"fo\ud800\udc00o"}); 605 check("(?<=%b{1,4}?)fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", 606 new String[] {"fo\ud800\udc00o"}); 607 check("(?<!%b{1,4})fo\ud800\udc00o", "%afo\ud800\udc00o", 608 new String[] {"fo\ud800\udc00o"}); 609 check("(?<!%b{1,4}?)fo\ud800\udc00o", "%afo\ud800\udc00o", 610 new String[] {"fo\ud800\udc00o"}); 611 report("Lookbehind"); 612 } 613 614 // This test is for 4938995 615 // Check to see if weak region boundaries are transparent to 616 // lookahead and lookbehind constructs 617 private static void boundsTest() throws Exception { 618 String fullMessage = "catdogcat"; 619 Pattern pattern = Pattern.compile("(?<=cat)dog(?=cat)"); 620 Matcher matcher = pattern.matcher("catdogca"); 621 matcher.useTransparentBounds(true); 622 if (matcher.find()) 623 failCount++; 624 matcher.reset("atdogcat"); 625 if (matcher.find()) 626 failCount++; 627 matcher.reset(fullMessage); 628 if (!matcher.find()) 629 failCount++; 630 matcher.reset(fullMessage); 631 matcher.region(0,9); 632 if (!matcher.find()) 633 failCount++; 634 matcher.reset(fullMessage); 635 matcher.region(0,6); 636 if (!matcher.find()) 637 failCount++; 638 matcher.reset(fullMessage); 639 matcher.region(3,6); 640 if (!matcher.find()) 641 failCount++; 642 matcher.useTransparentBounds(false); 643 if (matcher.find()) 644 failCount++; 645 646 // Negative lookahead/lookbehind 647 pattern = Pattern.compile("(?<!cat)dog(?!cat)"); 648 matcher = pattern.matcher("dogcat"); 649 matcher.useTransparentBounds(true); 650 matcher.region(0,3); 651 if (matcher.find()) 652 failCount++; 653 matcher.reset("catdog"); 654 matcher.region(3,6); 655 if (matcher.find()) 656 failCount++; 657 matcher.useTransparentBounds(false); 658 matcher.reset("dogcat"); 659 matcher.region(0,3); 660 if (!matcher.find()) 661 failCount++; 662 matcher.reset("catdog"); 663 matcher.region(3,6); 664 if (!matcher.find()) 665 failCount++; 666 667 report("Region bounds transparency"); 668 } 669 670 // This test is for 4945394 671 private static void findFromTest() throws Exception { 672 String message = "This is 40 $0 message."; 673 Pattern pat = Pattern.compile("\\$0"); 674 Matcher match = pat.matcher(message); 675 if (!match.find()) 676 failCount++; 677 if (match.find()) 678 failCount++; 679 if (match.find()) 680 failCount++; 681 report("Check for alternating find"); 682 } 683 684 // This test is for 4872664 and 4892980 685 private static void negatedCharClassTest() throws Exception { 686 Pattern pattern = Pattern.compile("[^>]"); 687 Matcher matcher = pattern.matcher("\u203A"); 688 if (!matcher.matches()) 689 failCount++; 690 pattern = Pattern.compile("[^fr]"); 691 matcher = pattern.matcher("a"); 692 if (!matcher.find()) 693 failCount++; 694 matcher.reset("\u203A"); 695 if (!matcher.find()) 696 failCount++; 697 String s = "for"; 698 String result[] = s.split("[^fr]"); 699 if (!result[0].equals("f")) 700 failCount++; 701 if (!result[1].equals("r")) 702 failCount++; 703 s = "f\u203Ar"; 704 result = s.split("[^fr]"); 705 if (!result[0].equals("f")) 706 failCount++; 707 if (!result[1].equals("r")) 708 failCount++; 709 710 // Test adding to bits, subtracting a node, then adding to bits again 711 pattern = Pattern.compile("[^f\u203Ar]"); 712 matcher = pattern.matcher("a"); 713 if (!matcher.find()) 714 failCount++; 715 matcher.reset("f"); 716 if (matcher.find()) 717 failCount++; 718 matcher.reset("\u203A"); 719 if (matcher.find()) 720 failCount++; 721 matcher.reset("r"); 722 if (matcher.find()) 723 failCount++; 724 matcher.reset("\u203B"); 725 if (!matcher.find()) 726 failCount++; 727 728 // Test subtracting a node, adding to bits, subtracting again 729 pattern = Pattern.compile("[^\u203Ar\u203B]"); 730 matcher = pattern.matcher("a"); 731 if (!matcher.find()) 732 failCount++; 733 matcher.reset("\u203A"); 734 if (matcher.find()) 735 failCount++; 736 matcher.reset("r"); 737 if (matcher.find()) 738 failCount++; 739 matcher.reset("\u203B"); 740 if (matcher.find()) 741 failCount++; 742 matcher.reset("\u203C"); 743 if (!matcher.find()) 744 failCount++; 745 746 report("Negated Character Class"); 747 } 748 749 // This test is for 4628291 750 private static void toStringTest() throws Exception { 751 Pattern pattern = Pattern.compile("b+"); 752 if (pattern.toString() != "b+") 753 failCount++; 754 Matcher matcher = pattern.matcher("aaabbbccc"); 755 String matcherString = matcher.toString(); // unspecified 756 matcher.find(); 757 matcherString = matcher.toString(); // unspecified 758 matcher.region(0,3); 759 matcherString = matcher.toString(); // unspecified 760 matcher.reset(); 761 matcherString = matcher.toString(); // unspecified 762 report("toString"); 763 } 764 765 // This test is for 4808962 766 private static void literalPatternTest() throws Exception { 767 int flags = Pattern.LITERAL; 768 769 Pattern pattern = Pattern.compile("abc\\t$^", flags); 770 check(pattern, "abc\\t$^", true); 771 772 pattern = Pattern.compile(Pattern.quote("abc\\t$^")); 773 check(pattern, "abc\\t$^", true); 774 775 pattern = Pattern.compile("\\Qa^$bcabc\\E", flags); 776 check(pattern, "\\Qa^$bcabc\\E", true); 777 check(pattern, "a^$bcabc", false); 778 779 pattern = Pattern.compile("\\\\Q\\\\E"); 780 check(pattern, "\\Q\\E", true); 781 782 pattern = Pattern.compile("\\Qabc\\Eefg\\\\Q\\\\Ehij"); 783 check(pattern, "abcefg\\Q\\Ehij", true); 784 785 pattern = Pattern.compile("\\\\\\Q\\\\E"); 786 check(pattern, "\\\\\\\\", true); 787 788 pattern = Pattern.compile(Pattern.quote("\\Qa^$bcabc\\E")); 789 check(pattern, "\\Qa^$bcabc\\E", true); 790 check(pattern, "a^$bcabc", false); 791 792 pattern = Pattern.compile(Pattern.quote("\\Qabc\\Edef")); 793 check(pattern, "\\Qabc\\Edef", true); 794 check(pattern, "abcdef", false); 795 796 pattern = Pattern.compile(Pattern.quote("abc\\Edef")); 797 check(pattern, "abc\\Edef", true); 798 check(pattern, "abcdef", false); 799 800 pattern = Pattern.compile(Pattern.quote("\\E")); 801 check(pattern, "\\E", true); 802 803 pattern = Pattern.compile("((((abc.+?:)", flags); 804 check(pattern, "((((abc.+?:)", true); 805 806 flags |= Pattern.MULTILINE; 807 808 pattern = Pattern.compile("^cat$", flags); 809 check(pattern, "abc^cat$def", true); 810 check(pattern, "cat", false); 811 812 flags |= Pattern.CASE_INSENSITIVE; 813 814 pattern = Pattern.compile("abcdef", flags); 815 check(pattern, "ABCDEF", true); 816 check(pattern, "AbCdEf", true); 817 818 flags |= Pattern.DOTALL; 819 820 pattern = Pattern.compile("a...b", flags); 821 check(pattern, "A...b", true); 822 check(pattern, "Axxxb", false); 823 824 flags |= Pattern.CANON_EQ; 825 826 Pattern p = Pattern.compile("testa\u030a", flags); 827 check(pattern, "testa\u030a", false); 828 check(pattern, "test\u00e5", false); 829 830 // Supplementary character test 831 flags = Pattern.LITERAL; 832 833 pattern = Pattern.compile(toSupplementaries("abc\\t$^"), flags); 834 check(pattern, toSupplementaries("abc\\t$^"), true); 835 836 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\t$^"))); 837 check(pattern, toSupplementaries("abc\\t$^"), true); 838 839 pattern = Pattern.compile(toSupplementaries("\\Qa^$bcabc\\E"), flags); 840 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true); 841 check(pattern, toSupplementaries("a^$bcabc"), false); 842 843 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qa^$bcabc\\E"))); 844 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true); 845 check(pattern, toSupplementaries("a^$bcabc"), false); 846 847 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qabc\\Edef"))); 848 check(pattern, toSupplementaries("\\Qabc\\Edef"), true); 849 check(pattern, toSupplementaries("abcdef"), false); 850 851 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\Edef"))); 852 check(pattern, toSupplementaries("abc\\Edef"), true); 853 check(pattern, toSupplementaries("abcdef"), false); 854 855 pattern = Pattern.compile(toSupplementaries("((((abc.+?:)"), flags); 856 check(pattern, toSupplementaries("((((abc.+?:)"), true); 857 858 flags |= Pattern.MULTILINE; 859 860 pattern = Pattern.compile(toSupplementaries("^cat$"), flags); 861 check(pattern, toSupplementaries("abc^cat$def"), true); 862 check(pattern, toSupplementaries("cat"), false); 863 864 flags |= Pattern.DOTALL; 865 866 // note: this is case-sensitive. 867 pattern = Pattern.compile(toSupplementaries("a...b"), flags); 868 check(pattern, toSupplementaries("a...b"), true); 869 check(pattern, toSupplementaries("axxxb"), false); 870 871 flags |= Pattern.CANON_EQ; 872 873 String t = toSupplementaries("test"); 874 p = Pattern.compile(t + "a\u030a", flags); 875 check(pattern, t + "a\u030a", false); 876 check(pattern, t + "\u00e5", false); 877 878 report("Literal pattern"); 879 } 880 881 // This test is for 4803179 882 // This test is also for 4808962, replacement parts 883 private static void literalReplacementTest() throws Exception { 884 int flags = Pattern.LITERAL; 885 886 Pattern pattern = Pattern.compile("abc", flags); 887 Matcher matcher = pattern.matcher("zzzabczzz"); 888 String replaceTest = "$0"; 889 String result = matcher.replaceAll(replaceTest); 890 if (!result.equals("zzzabczzz")) 891 failCount++; 892 893 matcher.reset(); 894 String literalReplacement = matcher.quoteReplacement(replaceTest); 895 result = matcher.replaceAll(literalReplacement); 896 if (!result.equals("zzz$0zzz")) 897 failCount++; 898 899 matcher.reset(); 900 replaceTest = "\\t$\\$"; 901 literalReplacement = matcher.quoteReplacement(replaceTest); 902 result = matcher.replaceAll(literalReplacement); 903 if (!result.equals("zzz\\t$\\$zzz")) 904 failCount++; 905 906 // Supplementary character test 907 pattern = Pattern.compile(toSupplementaries("abc"), flags); 908 matcher = pattern.matcher(toSupplementaries("zzzabczzz")); 909 replaceTest = "$0"; 910 result = matcher.replaceAll(replaceTest); 911 if (!result.equals(toSupplementaries("zzzabczzz"))) 912 failCount++; 913 914 matcher.reset(); 915 literalReplacement = matcher.quoteReplacement(replaceTest); 916 result = matcher.replaceAll(literalReplacement); 917 if (!result.equals(toSupplementaries("zzz$0zzz"))) 918 failCount++; 919 920 matcher.reset(); 921 replaceTest = "\\t$\\$"; 922 literalReplacement = matcher.quoteReplacement(replaceTest); 923 result = matcher.replaceAll(literalReplacement); 924 if (!result.equals(toSupplementaries("zzz\\t$\\$zzz"))) 925 failCount++; 926 927 // IAE should be thrown if backslash or '$' is the last character 928 // in replacement string 929 try { 930 "\uac00".replaceAll("\uac00", "$"); 931 failCount++; 932 } catch (IllegalArgumentException iie) { 933 } catch (Exception e) { 934 failCount++; 935 } 936 try { 937 "\uac00".replaceAll("\uac00", "\\"); 938 failCount++; 939 } catch (IllegalArgumentException iie) { 940 } catch (Exception e) { 941 failCount++; 942 } 943 report("Literal replacement"); 944 } 945 946 // This test is for 4757029 947 private static void regionTest() throws Exception { 948 Pattern pattern = Pattern.compile("abc"); 949 Matcher matcher = pattern.matcher("abcdefabc"); 950 951 matcher.region(0,9); 952 if (!matcher.find()) 953 failCount++; 954 if (!matcher.find()) 955 failCount++; 956 matcher.region(0,3); 957 if (!matcher.find()) 958 failCount++; 959 matcher.region(3,6); 960 if (matcher.find()) 961 failCount++; 962 matcher.region(0,2); 963 if (matcher.find()) 964 failCount++; 965 966 expectRegionFail(matcher, 1, -1); 967 expectRegionFail(matcher, -1, -1); 968 expectRegionFail(matcher, -1, 1); 969 expectRegionFail(matcher, 5, 3); 970 expectRegionFail(matcher, 5, 12); 971 expectRegionFail(matcher, 12, 12); 972 973 pattern = Pattern.compile("^abc$"); 974 matcher = pattern.matcher("zzzabczzz"); 975 matcher.region(0,9); 976 if (matcher.find()) 977 failCount++; 978 matcher.region(3,6); 979 if (!matcher.find()) 980 failCount++; 981 matcher.region(3,6); 982 matcher.useAnchoringBounds(false); 983 if (matcher.find()) 984 failCount++; 985 986 // Supplementary character test 987 pattern = Pattern.compile(toSupplementaries("abc")); 988 matcher = pattern.matcher(toSupplementaries("abcdefabc")); 989 matcher.region(0,9*2); 990 if (!matcher.find()) 991 failCount++; 992 if (!matcher.find()) 993 failCount++; 994 matcher.region(0,3*2); 995 if (!matcher.find()) 996 failCount++; 997 matcher.region(1,3*2); 998 if (matcher.find()) 999 failCount++; 1000 matcher.region(3*2,6*2); 1001 if (matcher.find()) 1002 failCount++; 1003 matcher.region(0,2*2); 1004 if (matcher.find()) 1005 failCount++; 1006 matcher.region(0,2*2+1); 1007 if (matcher.find()) 1008 failCount++; 1009 1010 expectRegionFail(matcher, 1*2, -1); 1011 expectRegionFail(matcher, -1, -1); 1012 expectRegionFail(matcher, -1, 1*2); 1013 expectRegionFail(matcher, 5*2, 3*2); 1014 expectRegionFail(matcher, 5*2, 12*2); 1015 expectRegionFail(matcher, 12*2, 12*2); 1016 1017 pattern = Pattern.compile(toSupplementaries("^abc$")); 1018 matcher = pattern.matcher(toSupplementaries("zzzabczzz")); 1019 matcher.region(0,9*2); 1020 if (matcher.find()) 1021 failCount++; 1022 matcher.region(3*2,6*2); 1023 if (!matcher.find()) 1024 failCount++; 1025 matcher.region(3*2+1,6*2); 1026 if (matcher.find()) 1027 failCount++; 1028 matcher.region(3*2,6*2-1); 1029 if (matcher.find()) 1030 failCount++; 1031 matcher.region(3*2,6*2); 1032 matcher.useAnchoringBounds(false); 1033 if (matcher.find()) 1034 failCount++; 1035 report("Regions"); 1036 } 1037 1038 private static void expectRegionFail(Matcher matcher, int index1, 1039 int index2) 1040 { 1041 try { 1042 matcher.region(index1, index2); 1043 failCount++; 1044 } catch (IndexOutOfBoundsException ioobe) { 1045 // Correct result 1046 } catch (IllegalStateException ise) { 1047 // Correct result 1048 } 1049 } 1050 1051 // This test is for 4803197 1052 private static void escapedSegmentTest() throws Exception { 1053 1054 Pattern pattern = Pattern.compile("\\Qdir1\\dir2\\E"); 1055 check(pattern, "dir1\\dir2", true); 1056 1057 pattern = Pattern.compile("\\Qdir1\\dir2\\\\E"); 1058 check(pattern, "dir1\\dir2\\", true); 1059 1060 pattern = Pattern.compile("(\\Qdir1\\dir2\\\\E)"); 1061 check(pattern, "dir1\\dir2\\", true); 1062 1063 // Supplementary character test 1064 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E")); 1065 check(pattern, toSupplementaries("dir1\\dir2"), true); 1066 1067 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2")+"\\\\E"); 1068 check(pattern, toSupplementaries("dir1\\dir2\\"), true); 1069 1070 pattern = Pattern.compile(toSupplementaries("(\\Qdir1\\dir2")+"\\\\E)"); 1071 check(pattern, toSupplementaries("dir1\\dir2\\"), true); 1072 1073 report("Escaped segment"); 1074 } 1075 1076 // This test is for 4792284 1077 private static void nonCaptureRepetitionTest() throws Exception { 1078 String input = "abcdefgh;"; 1079 1080 String[] patterns = new String[] { 1081 "(?:\\w{4})+;", 1082 "(?:\\w{8})*;", 1083 "(?:\\w{2}){2,4};", 1084 "(?:\\w{4}){2,};", // only matches the 1085 ".*?(?:\\w{5})+;", // specified minimum 1086 ".*?(?:\\w{9})*;", // number of reps - OK 1087 "(?:\\w{4})+?;", // lazy repetition - OK 1088 "(?:\\w{4})++;", // possessive repetition - OK 1089 "(?:\\w{2,}?)+;", // non-deterministic - OK 1090 "(\\w{4})+;", // capturing group - OK 1091 }; 1092 1093 for (int i = 0; i < patterns.length; i++) { 1094 // Check find() 1095 check(patterns[i], 0, input, input, true); 1096 // Check matches() 1097 Pattern p = Pattern.compile(patterns[i]); 1098 Matcher m = p.matcher(input); 1099 1100 if (m.matches()) { 1101 if (!m.group(0).equals(input)) 1102 failCount++; 1103 } else { 1104 failCount++; 1105 } 1106 } 1107 1108 report("Non capturing repetition"); 1109 } 1110 1111 // This test is for 6358731 1112 private static void notCapturedGroupCurlyMatchTest() throws Exception { 1113 Pattern pattern = Pattern.compile("(abc)+|(abcd)+"); 1114 Matcher matcher = pattern.matcher("abcd"); 1115 if (!matcher.matches() || 1116 matcher.group(1) != null || 1117 !matcher.group(2).equals("abcd")) { 1118 failCount++; 1119 } 1120 report("Not captured GroupCurly"); 1121 } 1122 1123 // This test is for 4706545 1124 private static void javaCharClassTest() throws Exception { 1125 for (int i=0; i<1000; i++) { 1126 char c = (char)generator.nextInt(); 1127 check("{javaLowerCase}", c, Character.isLowerCase(c)); 1128 check("{javaUpperCase}", c, Character.isUpperCase(c)); 1129 check("{javaUpperCase}+", c, Character.isUpperCase(c)); 1130 check("{javaTitleCase}", c, Character.isTitleCase(c)); 1131 check("{javaDigit}", c, Character.isDigit(c)); 1132 check("{javaDefined}", c, Character.isDefined(c)); 1133 check("{javaLetter}", c, Character.isLetter(c)); 1134 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c)); 1135 check("{javaJavaIdentifierStart}", c, 1136 Character.isJavaIdentifierStart(c)); 1137 check("{javaJavaIdentifierPart}", c, 1138 Character.isJavaIdentifierPart(c)); 1139 check("{javaUnicodeIdentifierStart}", c, 1140 Character.isUnicodeIdentifierStart(c)); 1141 check("{javaUnicodeIdentifierPart}", c, 1142 Character.isUnicodeIdentifierPart(c)); 1143 check("{javaIdentifierIgnorable}", c, 1144 Character.isIdentifierIgnorable(c)); 1145 check("{javaSpaceChar}", c, Character.isSpaceChar(c)); 1146 check("{javaWhitespace}", c, Character.isWhitespace(c)); 1147 check("{javaISOControl}", c, Character.isISOControl(c)); 1148 check("{javaMirrored}", c, Character.isMirrored(c)); 1149 1150 } 1151 1152 // Supplementary character test 1153 for (int i=0; i<1000; i++) { 1154 int c = generator.nextInt(Character.MAX_CODE_POINT 1155 - Character.MIN_SUPPLEMENTARY_CODE_POINT) 1156 + Character.MIN_SUPPLEMENTARY_CODE_POINT; 1157 check("{javaLowerCase}", c, Character.isLowerCase(c)); 1158 check("{javaUpperCase}", c, Character.isUpperCase(c)); 1159 check("{javaUpperCase}+", c, Character.isUpperCase(c)); 1160 check("{javaTitleCase}", c, Character.isTitleCase(c)); 1161 check("{javaDigit}", c, Character.isDigit(c)); 1162 check("{javaDefined}", c, Character.isDefined(c)); 1163 check("{javaLetter}", c, Character.isLetter(c)); 1164 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c)); 1165 check("{javaJavaIdentifierStart}", c, 1166 Character.isJavaIdentifierStart(c)); 1167 check("{javaJavaIdentifierPart}", c, 1168 Character.isJavaIdentifierPart(c)); 1169 check("{javaUnicodeIdentifierStart}", c, 1170 Character.isUnicodeIdentifierStart(c)); 1171 check("{javaUnicodeIdentifierPart}", c, 1172 Character.isUnicodeIdentifierPart(c)); 1173 check("{javaIdentifierIgnorable}", c, 1174 Character.isIdentifierIgnorable(c)); 1175 check("{javaSpaceChar}", c, Character.isSpaceChar(c)); 1176 check("{javaWhitespace}", c, Character.isWhitespace(c)); 1177 check("{javaISOControl}", c, Character.isISOControl(c)); 1178 check("{javaMirrored}", c, Character.isMirrored(c)); 1179 } 1180 1181 report("Java character classes"); 1182 } 1183 1184 // This test is for 4523620 1185 /* 1186 private static void numOccurrencesTest() throws Exception { 1187 Pattern pattern = Pattern.compile("aaa"); 1188 1189 if (pattern.numOccurrences("aaaaaa", false) != 2) 1190 failCount++; 1191 if (pattern.numOccurrences("aaaaaa", true) != 4) 1192 failCount++; 1193 1194 pattern = Pattern.compile("^"); 1195 if (pattern.numOccurrences("aaaaaa", false) != 1) 1196 failCount++; 1197 if (pattern.numOccurrences("aaaaaa", true) != 1) 1198 failCount++; 1199 1200 report("Number of Occurrences"); 1201 } 1202 */ 1203 1204 // This test is for 4776374 1205 private static void caretBetweenTerminatorsTest() throws Exception { 1206 int flags1 = Pattern.DOTALL; 1207 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; 1208 int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE; 1209 int flags4 = Pattern.DOTALL | Pattern.MULTILINE; 1210 1211 check("^....", flags1, "test\ntest", "test", true); 1212 check(".....^", flags1, "test\ntest", "test", false); 1213 check(".....^", flags1, "test\n", "test", false); 1214 check("....^", flags1, "test\r\n", "test", false); 1215 1216 check("^....", flags2, "test\ntest", "test", true); 1217 check("....^", flags2, "test\ntest", "test", false); 1218 check(".....^", flags2, "test\n", "test", false); 1219 check("....^", flags2, "test\r\n", "test", false); 1220 1221 check("^....", flags3, "test\ntest", "test", true); 1222 check(".....^", flags3, "test\ntest", "test\n", true); 1223 check(".....^", flags3, "test\u0085test", "test\u0085", false); 1224 check(".....^", flags3, "test\n", "test", false); 1225 check(".....^", flags3, "test\r\n", "test", false); 1226 check("......^", flags3, "test\r\ntest", "test\r\n", true); 1227 1228 check("^....", flags4, "test\ntest", "test", true); 1229 check(".....^", flags3, "test\ntest", "test\n", true); 1230 check(".....^", flags4, "test\u0085test", "test\u0085", true); 1231 check(".....^", flags4, "test\n", "test\n", false); 1232 check(".....^", flags4, "test\r\n", "test\r", false); 1233 1234 // Supplementary character test 1235 String t = toSupplementaries("test"); 1236 check("^....", flags1, t+"\n"+t, t, true); 1237 check(".....^", flags1, t+"\n"+t, t, false); 1238 check(".....^", flags1, t+"\n", t, false); 1239 check("....^", flags1, t+"\r\n", t, false); 1240 1241 check("^....", flags2, t+"\n"+t, t, true); 1242 check("....^", flags2, t+"\n"+t, t, false); 1243 check(".....^", flags2, t+"\n", t, false); 1244 check("....^", flags2, t+"\r\n", t, false); 1245 1246 check("^....", flags3, t+"\n"+t, t, true); 1247 check(".....^", flags3, t+"\n"+t, t+"\n", true); 1248 check(".....^", flags3, t+"\u0085"+t, t+"\u0085", false); 1249 check(".....^", flags3, t+"\n", t, false); 1250 check(".....^", flags3, t+"\r\n", t, false); 1251 check("......^", flags3, t+"\r\n"+t, t+"\r\n", true); 1252 1253 check("^....", flags4, t+"\n"+t, t, true); 1254 check(".....^", flags3, t+"\n"+t, t+"\n", true); 1255 check(".....^", flags4, t+"\u0085"+t, t+"\u0085", true); 1256 check(".....^", flags4, t+"\n", t+"\n", false); 1257 check(".....^", flags4, t+"\r\n", t+"\r", false); 1258 1259 report("Caret between terminators"); 1260 } 1261 1262 // This test is for 4727935 1263 private static void dollarAtEndTest() throws Exception { 1264 int flags1 = Pattern.DOTALL; 1265 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; 1266 int flags3 = Pattern.DOTALL | Pattern.MULTILINE; 1267 1268 check("....$", flags1, "test\n", "test", true); 1269 check("....$", flags1, "test\r\n", "test", true); 1270 check(".....$", flags1, "test\n", "test\n", true); 1271 check(".....$", flags1, "test\u0085", "test\u0085", true); 1272 check("....$", flags1, "test\u0085", "test", true); 1273 1274 check("....$", flags2, "test\n", "test", true); 1275 check(".....$", flags2, "test\n", "test\n", true); 1276 check(".....$", flags2, "test\u0085", "test\u0085", true); 1277 check("....$", flags2, "test\u0085", "est\u0085", true); 1278 1279 check("....$.blah", flags3, "test\nblah", "test\nblah", true); 1280 check(".....$.blah", flags3, "test\n\nblah", "test\n\nblah", true); 1281 check("....$blah", flags3, "test\nblah", "!!!!", false); 1282 check(".....$blah", flags3, "test\nblah", "!!!!", false); 1283 1284 // Supplementary character test 1285 String t = toSupplementaries("test"); 1286 String b = toSupplementaries("blah"); 1287 check("....$", flags1, t+"\n", t, true); 1288 check("....$", flags1, t+"\r\n", t, true); 1289 check(".....$", flags1, t+"\n", t+"\n", true); 1290 check(".....$", flags1, t+"\u0085", t+"\u0085", true); 1291 check("....$", flags1, t+"\u0085", t, true); 1292 1293 check("....$", flags2, t+"\n", t, true); 1294 check(".....$", flags2, t+"\n", t+"\n", true); 1295 check(".....$", flags2, t+"\u0085", t+"\u0085", true); 1296 check("....$", flags2, t+"\u0085", toSupplementaries("est\u0085"), true); 1297 1298 check("....$."+b, flags3, t+"\n"+b, t+"\n"+b, true); 1299 check(".....$."+b, flags3, t+"\n\n"+b, t+"\n\n"+b, true); 1300 check("....$"+b, flags3, t+"\n"+b, "!!!!", false); 1301 check(".....$"+b, flags3, t+"\n"+b, "!!!!", false); 1302 1303 report("Dollar at End"); 1304 } 1305 1306 // This test is for 4711773 1307 private static void multilineDollarTest() throws Exception { 1308 Pattern findCR = Pattern.compile("$", Pattern.MULTILINE); 1309 Matcher matcher = findCR.matcher("first bit\nsecond bit"); 1310 matcher.find(); 1311 if (matcher.start(0) != 9) 1312 failCount++; 1313 matcher.find(); 1314 if (matcher.start(0) != 20) 1315 failCount++; 1316 1317 // Supplementary character test 1318 matcher = findCR.matcher(toSupplementaries("first bit\n second bit")); // double BMP chars 1319 matcher.find(); 1320 if (matcher.start(0) != 9*2) 1321 failCount++; 1322 matcher.find(); 1323 if (matcher.start(0) != 20*2) 1324 failCount++; 1325 1326 report("Multiline Dollar"); 1327 } 1328 1329 private static void reluctantRepetitionTest() throws Exception { 1330 Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2"); 1331 check(p, "1 word word word 2", true); 1332 check(p, "1 wor wo w 2", true); 1333 check(p, "1 word word 2", true); 1334 check(p, "1 word 2", true); 1335 check(p, "1 wo w w 2", true); 1336 check(p, "1 wo w 2", true); 1337 check(p, "1 wor w 2", true); 1338 1339 p = Pattern.compile("([a-z])+?c"); 1340 Matcher m = p.matcher("ababcdefdec"); 1341 check(m, "ababc"); 1342 1343 // Supplementary character test 1344 p = Pattern.compile(toSupplementaries("([a-z])+?c")); 1345 m = p.matcher(toSupplementaries("ababcdefdec")); 1346 check(m, toSupplementaries("ababc")); 1347 1348 report("Reluctant Repetition"); 1349 } 1350 1351 private static void serializeTest() throws Exception { 1352 String patternStr = "(b)"; 1353 String matchStr = "b"; 1354 Pattern pattern = Pattern.compile(patternStr); 1355 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 1356 ObjectOutputStream oos = new ObjectOutputStream(baos); 1357 oos.writeObject(pattern); 1358 oos.close(); 1359 ObjectInputStream ois = new ObjectInputStream( 1360 new ByteArrayInputStream(baos.toByteArray())); 1361 Pattern serializedPattern = (Pattern)ois.readObject(); 1362 ois.close(); 1363 Matcher matcher = serializedPattern.matcher(matchStr); 1364 if (!matcher.matches()) 1365 failCount++; 1366 if (matcher.groupCount() != 1) 1367 failCount++; 1368 1369 report("Serialization"); 1370 } 1371 1372 private static void gTest() { 1373 Pattern pattern = Pattern.compile("\\G\\w"); 1374 Matcher matcher = pattern.matcher("abc#x#x"); 1375 matcher.find(); 1376 matcher.find(); 1377 matcher.find(); 1378 if (matcher.find()) 1379 failCount++; 1380 1381 pattern = Pattern.compile("\\GA*"); 1382 matcher = pattern.matcher("1A2AA3"); 1383 matcher.find(); 1384 if (matcher.find()) 1385 failCount++; 1386 1387 pattern = Pattern.compile("\\GA*"); 1388 matcher = pattern.matcher("1A2AA3"); 1389 if (!matcher.find(1)) 1390 failCount++; 1391 matcher.find(); 1392 if (matcher.find()) 1393 failCount++; 1394 1395 report("\\G"); 1396 } 1397 1398 private static void zTest() { 1399 Pattern pattern = Pattern.compile("foo\\Z"); 1400 // Positives 1401 check(pattern, "foo\u0085", true); 1402 check(pattern, "foo\u2028", true); 1403 check(pattern, "foo\u2029", true); 1404 check(pattern, "foo\n", true); 1405 check(pattern, "foo\r", true); 1406 check(pattern, "foo\r\n", true); 1407 // Negatives 1408 check(pattern, "fooo", false); 1409 check(pattern, "foo\n\r", false); 1410 1411 pattern = Pattern.compile("foo\\Z", Pattern.UNIX_LINES); 1412 // Positives 1413 check(pattern, "foo", true); 1414 check(pattern, "foo\n", true); 1415 // Negatives 1416 check(pattern, "foo\r", false); 1417 check(pattern, "foo\u0085", false); 1418 check(pattern, "foo\u2028", false); 1419 check(pattern, "foo\u2029", false); 1420 1421 report("\\Z"); 1422 } 1423 1424 private static void replaceFirstTest() { 1425 Pattern pattern = Pattern.compile("(ab)(c*)"); 1426 Matcher matcher = pattern.matcher("abccczzzabcczzzabccc"); 1427 if (!matcher.replaceFirst("test").equals("testzzzabcczzzabccc")) 1428 failCount++; 1429 1430 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1431 if (!matcher.replaceFirst("test").equals("zzztestzzzabcczzzabccczzz")) 1432 failCount++; 1433 1434 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1435 String result = matcher.replaceFirst("$1"); 1436 if (!result.equals("zzzabzzzabcczzzabccczzz")) 1437 failCount++; 1438 1439 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1440 result = matcher.replaceFirst("$2"); 1441 if (!result.equals("zzzccczzzabcczzzabccczzz")) 1442 failCount++; 1443 1444 pattern = Pattern.compile("a*"); 1445 matcher = pattern.matcher("aaaaaaaaaa"); 1446 if (!matcher.replaceFirst("test").equals("test")) 1447 failCount++; 1448 1449 pattern = Pattern.compile("a+"); 1450 matcher = pattern.matcher("zzzaaaaaaaaaa"); 1451 if (!matcher.replaceFirst("test").equals("zzztest")) 1452 failCount++; 1453 1454 // Supplementary character test 1455 pattern = Pattern.compile(toSupplementaries("(ab)(c*)")); 1456 matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc")); 1457 if (!matcher.replaceFirst(toSupplementaries("test")) 1458 .equals(toSupplementaries("testzzzabcczzzabccc"))) 1459 failCount++; 1460 1461 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1462 if (!matcher.replaceFirst(toSupplementaries("test")). 1463 equals(toSupplementaries("zzztestzzzabcczzzabccczzz"))) 1464 failCount++; 1465 1466 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1467 result = matcher.replaceFirst("$1"); 1468 if (!result.equals(toSupplementaries("zzzabzzzabcczzzabccczzz"))) 1469 failCount++; 1470 1471 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1472 result = matcher.replaceFirst("$2"); 1473 if (!result.equals(toSupplementaries("zzzccczzzabcczzzabccczzz"))) 1474 failCount++; 1475 1476 pattern = Pattern.compile(toSupplementaries("a*")); 1477 matcher = pattern.matcher(toSupplementaries("aaaaaaaaaa")); 1478 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("test"))) 1479 failCount++; 1480 1481 pattern = Pattern.compile(toSupplementaries("a+")); 1482 matcher = pattern.matcher(toSupplementaries("zzzaaaaaaaaaa")); 1483 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("zzztest"))) 1484 failCount++; 1485 1486 report("Replace First"); 1487 } 1488 1489 private static void unixLinesTest() { 1490 Pattern pattern = Pattern.compile(".*"); 1491 Matcher matcher = pattern.matcher("aa\u2028blah"); 1492 matcher.find(); 1493 if (!matcher.group(0).equals("aa")) 1494 failCount++; 1495 1496 pattern = Pattern.compile(".*", Pattern.UNIX_LINES); 1497 matcher = pattern.matcher("aa\u2028blah"); 1498 matcher.find(); 1499 if (!matcher.group(0).equals("aa\u2028blah")) 1500 failCount++; 1501 1502 pattern = Pattern.compile("[az]$", 1503 Pattern.MULTILINE | Pattern.UNIX_LINES); 1504 matcher = pattern.matcher("aa\u2028zz"); 1505 check(matcher, "a\u2028", false); 1506 1507 // Supplementary character test 1508 pattern = Pattern.compile(".*"); 1509 matcher = pattern.matcher(toSupplementaries("aa\u2028blah")); 1510 matcher.find(); 1511 if (!matcher.group(0).equals(toSupplementaries("aa"))) 1512 failCount++; 1513 1514 pattern = Pattern.compile(".*", Pattern.UNIX_LINES); 1515 matcher = pattern.matcher(toSupplementaries("aa\u2028blah")); 1516 matcher.find(); 1517 if (!matcher.group(0).equals(toSupplementaries("aa\u2028blah"))) 1518 failCount++; 1519 1520 pattern = Pattern.compile(toSupplementaries("[az]$"), 1521 Pattern.MULTILINE | Pattern.UNIX_LINES); 1522 matcher = pattern.matcher(toSupplementaries("aa\u2028zz")); 1523 check(matcher, toSupplementaries("a\u2028"), false); 1524 1525 report("Unix Lines"); 1526 } 1527 1528 private static void commentsTest() { 1529 int flags = Pattern.COMMENTS; 1530 1531 Pattern pattern = Pattern.compile("aa \\# aa", flags); 1532 Matcher matcher = pattern.matcher("aa#aa"); 1533 if (!matcher.matches()) 1534 failCount++; 1535 1536 pattern = Pattern.compile("aa # blah", flags); 1537 matcher = pattern.matcher("aa"); 1538 if (!matcher.matches()) 1539 failCount++; 1540 1541 pattern = Pattern.compile("aa blah", flags); 1542 matcher = pattern.matcher("aablah"); 1543 if (!matcher.matches()) 1544 failCount++; 1545 1546 pattern = Pattern.compile("aa # blah blech ", flags); 1547 matcher = pattern.matcher("aa"); 1548 if (!matcher.matches()) 1549 failCount++; 1550 1551 pattern = Pattern.compile("aa # blah\n ", flags); 1552 matcher = pattern.matcher("aa"); 1553 if (!matcher.matches()) 1554 failCount++; 1555 1556 pattern = Pattern.compile("aa # blah\nbc # blech", flags); 1557 matcher = pattern.matcher("aabc"); 1558 if (!matcher.matches()) 1559 failCount++; 1560 1561 pattern = Pattern.compile("aa # blah\nbc# blech", flags); 1562 matcher = pattern.matcher("aabc"); 1563 if (!matcher.matches()) 1564 failCount++; 1565 1566 pattern = Pattern.compile("aa # blah\nbc\\# blech", flags); 1567 matcher = pattern.matcher("aabc#blech"); 1568 if (!matcher.matches()) 1569 failCount++; 1570 1571 // Supplementary character test 1572 pattern = Pattern.compile(toSupplementaries("aa \\# aa"), flags); 1573 matcher = pattern.matcher(toSupplementaries("aa#aa")); 1574 if (!matcher.matches()) 1575 failCount++; 1576 1577 pattern = Pattern.compile(toSupplementaries("aa # blah"), flags); 1578 matcher = pattern.matcher(toSupplementaries("aa")); 1579 if (!matcher.matches()) 1580 failCount++; 1581 1582 pattern = Pattern.compile(toSupplementaries("aa blah"), flags); 1583 matcher = pattern.matcher(toSupplementaries("aablah")); 1584 if (!matcher.matches()) 1585 failCount++; 1586 1587 pattern = Pattern.compile(toSupplementaries("aa # blah blech "), flags); 1588 matcher = pattern.matcher(toSupplementaries("aa")); 1589 if (!matcher.matches()) 1590 failCount++; 1591 1592 pattern = Pattern.compile(toSupplementaries("aa # blah\n "), flags); 1593 matcher = pattern.matcher(toSupplementaries("aa")); 1594 if (!matcher.matches()) 1595 failCount++; 1596 1597 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc # blech"), flags); 1598 matcher = pattern.matcher(toSupplementaries("aabc")); 1599 if (!matcher.matches()) 1600 failCount++; 1601 1602 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc# blech"), flags); 1603 matcher = pattern.matcher(toSupplementaries("aabc")); 1604 if (!matcher.matches()) 1605 failCount++; 1606 1607 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc\\# blech"), flags); 1608 matcher = pattern.matcher(toSupplementaries("aabc#blech")); 1609 if (!matcher.matches()) 1610 failCount++; 1611 1612 report("Comments"); 1613 } 1614 1615 private static void caseFoldingTest() { // bug 4504687 1616 int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; 1617 Pattern pattern = Pattern.compile("aa", flags); 1618 Matcher matcher = pattern.matcher("ab"); 1619 if (matcher.matches()) 1620 failCount++; 1621 1622 pattern = Pattern.compile("aA", flags); 1623 matcher = pattern.matcher("ab"); 1624 if (matcher.matches()) 1625 failCount++; 1626 1627 pattern = Pattern.compile("aa", flags); 1628 matcher = pattern.matcher("aB"); 1629 if (matcher.matches()) 1630 failCount++; 1631 matcher = pattern.matcher("Ab"); 1632 if (matcher.matches()) 1633 failCount++; 1634 1635 // ASCII "a" 1636 // Latin-1 Supplement "a" + grave 1637 // Cyrillic "a" 1638 String[] patterns = new String[] { 1639 //single 1640 "a", "\u00e0", "\u0430", 1641 //slice 1642 "ab", "\u00e0\u00e1", "\u0430\u0431", 1643 //class single 1644 "[a]", "[\u00e0]", "[\u0430]", 1645 //class range 1646 "[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]", 1647 //back reference 1648 "(a)\\1", "(\u00e0)\\1", "(\u0430)\\1" 1649 }; 1650 1651 String[] texts = new String[] { 1652 "A", "\u00c0", "\u0410", 1653 "AB", "\u00c0\u00c1", "\u0410\u0411", 1654 "A", "\u00c0", "\u0410", 1655 "B", "\u00c2", "\u0411", 1656 "aA", "\u00e0\u00c0", "\u0430\u0410" 1657 }; 1658 1659 boolean[] expected = new boolean[] { 1660 true, false, false, 1661 true, false, false, 1662 true, false, false, 1663 true, false, false, 1664 true, false, false 1665 }; 1666 1667 flags = Pattern.CASE_INSENSITIVE; 1668 for (int i = 0; i < patterns.length; i++) { 1669 pattern = Pattern.compile(patterns[i], flags); 1670 matcher = pattern.matcher(texts[i]); 1671 if (matcher.matches() != expected[i]) { 1672 System.out.println("<1> Failed at " + i); 1673 failCount++; 1674 } 1675 } 1676 1677 flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; 1678 for (int i = 0; i < patterns.length; i++) { 1679 pattern = Pattern.compile(patterns[i], flags); 1680 matcher = pattern.matcher(texts[i]); 1681 if (!matcher.matches()) { 1682 System.out.println("<2> Failed at " + i); 1683 failCount++; 1684 } 1685 } 1686 // flag unicode_case alone should do nothing 1687 flags = Pattern.UNICODE_CASE; 1688 for (int i = 0; i < patterns.length; i++) { 1689 pattern = Pattern.compile(patterns[i], flags); 1690 matcher = pattern.matcher(texts[i]); 1691 if (matcher.matches()) { 1692 System.out.println("<3> Failed at " + i); 1693 failCount++; 1694 } 1695 } 1696 1697 // Special cases: i, I, u+0131 and u+0130 1698 flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE; 1699 pattern = Pattern.compile("[h-j]+", flags); 1700 if (!pattern.matcher("\u0131\u0130").matches()) 1701 failCount++; 1702 report("Case Folding"); 1703 } 1704 1705 private static void appendTest() { 1706 Pattern pattern = Pattern.compile("(ab)(cd)"); 1707 Matcher matcher = pattern.matcher("abcd"); 1708 String result = matcher.replaceAll("$2$1"); 1709 if (!result.equals("cdab")) 1710 failCount++; 1711 1712 String s1 = "Swap all: first = 123, second = 456"; 1713 String s2 = "Swap one: first = 123, second = 456"; 1714 String r = "$3$2$1"; 1715 pattern = Pattern.compile("([a-z]+)( *= *)([0-9]+)"); 1716 matcher = pattern.matcher(s1); 1717 1718 result = matcher.replaceAll(r); 1719 if (!result.equals("Swap all: 123 = first, 456 = second")) 1720 failCount++; 1721 1722 matcher = pattern.matcher(s2); 1723 1724 if (matcher.find()) { 1725 StringBuffer sb = new StringBuffer(); 1726 matcher.appendReplacement(sb, r); 1727 matcher.appendTail(sb); 1728 result = sb.toString(); 1729 if (!result.equals("Swap one: 123 = first, second = 456")) 1730 failCount++; 1731 } 1732 1733 // Supplementary character test 1734 pattern = Pattern.compile(toSupplementaries("(ab)(cd)")); 1735 matcher = pattern.matcher(toSupplementaries("abcd")); 1736 result = matcher.replaceAll("$2$1"); 1737 if (!result.equals(toSupplementaries("cdab"))) 1738 failCount++; 1739 1740 s1 = toSupplementaries("Swap all: first = 123, second = 456"); 1741 s2 = toSupplementaries("Swap one: first = 123, second = 456"); 1742 r = toSupplementaries("$3$2$1"); 1743 pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)")); 1744 matcher = pattern.matcher(s1); 1745 1746 result = matcher.replaceAll(r); 1747 if (!result.equals(toSupplementaries("Swap all: 123 = first, 456 = second"))) 1748 failCount++; 1749 1750 matcher = pattern.matcher(s2); 1751 1752 if (matcher.find()) { 1753 StringBuffer sb = new StringBuffer(); 1754 matcher.appendReplacement(sb, r); 1755 matcher.appendTail(sb); 1756 result = sb.toString(); 1757 if (!result.equals(toSupplementaries("Swap one: 123 = first, second = 456"))) 1758 failCount++; 1759 } 1760 report("Append"); 1761 } 1762 1763 private static void splitTest() { 1764 Pattern pattern = Pattern.compile(":"); 1765 String[] result = pattern.split("foo:and:boo", 2); 1766 if (!result[0].equals("foo")) 1767 failCount++; 1768 if (!result[1].equals("and:boo")) 1769 failCount++; 1770 // Supplementary character test 1771 Pattern patternX = Pattern.compile(toSupplementaries("X")); 1772 result = patternX.split(toSupplementaries("fooXandXboo"), 2); 1773 if (!result[0].equals(toSupplementaries("foo"))) 1774 failCount++; 1775 if (!result[1].equals(toSupplementaries("andXboo"))) 1776 failCount++; 1777 1778 CharBuffer cb = CharBuffer.allocate(100); 1779 cb.put("foo:and:boo"); 1780 cb.flip(); 1781 result = pattern.split(cb); 1782 if (!result[0].equals("foo")) 1783 failCount++; 1784 if (!result[1].equals("and")) 1785 failCount++; 1786 if (!result[2].equals("boo")) 1787 failCount++; 1788 1789 // Supplementary character test 1790 CharBuffer cbs = CharBuffer.allocate(100); 1791 cbs.put(toSupplementaries("fooXandXboo")); 1792 cbs.flip(); 1793 result = patternX.split(cbs); 1794 if (!result[0].equals(toSupplementaries("foo"))) 1795 failCount++; 1796 if (!result[1].equals(toSupplementaries("and"))) 1797 failCount++; 1798 if (!result[2].equals(toSupplementaries("boo"))) 1799 failCount++; 1800 1801 String source = "0123456789"; 1802 for (int limit=-2; limit<3; limit++) { 1803 for (int x=0; x<10; x++) { 1804 result = source.split(Integer.toString(x), limit); 1805 int expectedLength = limit < 1 ? 2 : limit; 1806 1807 if ((limit == 0) && (x == 9)) { 1808 // expected dropping of "" 1809 if (result.length != 1) 1810 failCount++; 1811 if (!result[0].equals("012345678")) { 1812 failCount++; 1813 } 1814 } else { 1815 if (result.length != expectedLength) { 1816 failCount++; 1817 } 1818 if (!result[0].equals(source.substring(0,x))) { 1819 if (limit != 1) { 1820 failCount++; 1821 } else { 1822 if (!result[0].equals(source.substring(0,10))) { 1823 failCount++; 1824 } 1825 } 1826 } 1827 if (expectedLength > 1) { // Check segment 2 1828 if (!result[1].equals(source.substring(x+1,10))) 1829 failCount++; 1830 } 1831 } 1832 } 1833 } 1834 // Check the case for no match found 1835 for (int limit=-2; limit<3; limit++) { 1836 result = source.split("e", limit); 1837 if (result.length != 1) 1838 failCount++; 1839 if (!result[0].equals(source)) 1840 failCount++; 1841 } 1842 // Check the case for limit == 0, source = ""; 1843 // split() now returns 0-length for empty source "" see #6559590 1844 source = ""; 1845 result = source.split("e", 0); 1846 if (result.length != 1) 1847 failCount++; 1848 if (!result[0].equals(source)) 1849 failCount++; 1850 1851 // Check both split() and splitAsStraem(), especially for zero-lenth 1852 // input and zero-lenth match cases 1853 String[][] input = new String[][] { 1854 { " ", "Abc Efg Hij" }, // normal non-zero-match 1855 { " ", " Abc Efg Hij" }, // leading empty str for non-zero-match 1856 { " ", "Abc Efg Hij" }, // non-zero-match in the middle 1857 { "(?=\\p{Lu})", "AbcEfgHij" }, // no leading empty str for zero-match 1858 { "(?=\\p{Lu})", "AbcEfg" }, 1859 { "(?=\\p{Lu})", "Abc" }, 1860 { " ", "" }, // zero-length input 1861 { ".*", "" }, 1862 1863 // some tests from PatternStreamTest.java 1864 { "4", "awgqwefg1fefw4vssv1vvv1" }, 1865 { "\u00a3a", "afbfq\u00a3abgwgb\u00a3awngnwggw\u00a3a\u00a3ahjrnhneerh" }, 1866 { "1", "awgqwefg1fefw4vssv1vvv1" }, 1867 { "1", "a\u4ebafg1fefw\u4eba4\u9f9cvssv\u9f9c1v\u672c\u672cvv" }, 1868 { "\u56da", "1\u56da23\u56da456\u56da7890" }, 1869 { "\u56da", "1\u56da23\u9f9c\u672c\u672c\u56da456\u56da\u9f9c\u672c7890" }, 1870 { "\u56da", "" }, 1871 { "[ \t,:.]","This is,testing: with\tdifferent separators." }, //multiple septs 1872 { "o", "boo:and:foo" }, 1873 { "o", "booooo:and:fooooo" }, 1874 { "o", "fooooo:" }, 1875 }; 1876 1877 String[][] expected = new String[][] { 1878 { "Abc", "Efg", "Hij" }, 1879 { "", "Abc", "Efg", "Hij" }, 1880 { "Abc", "", "Efg", "Hij" }, 1881 { "Abc", "Efg", "Hij" }, 1882 { "Abc", "Efg" }, 1883 { "Abc" }, 1884 { "" }, 1885 { "" }, 1886 1887 { "awgqwefg1fefw", "vssv1vvv1" }, 1888 { "afbfq", "bgwgb", "wngnwggw", "", "hjrnhneerh" }, 1889 { "awgqwefg", "fefw4vssv", "vvv" }, 1890 { "a\u4ebafg", "fefw\u4eba4\u9f9cvssv\u9f9c", "v\u672c\u672cvv" }, 1891 { "1", "23", "456", "7890" }, 1892 { "1", "23\u9f9c\u672c\u672c", "456", "\u9f9c\u672c7890" }, 1893 { "" }, 1894 { "This", "is", "testing", "", "with", "different", "separators" }, 1895 { "b", "", ":and:f" }, 1896 { "b", "", "", "", "", ":and:f" }, 1897 { "f", "", "", "", "", ":" }, 1898 }; 1899 for (int i = 0; i < input.length; i++) { 1900 pattern = Pattern.compile(input[i][0]); 1901 if (!Arrays.equals(pattern.split(input[i][1]), expected[i])) { 1902 failCount++; 1903 } 1904 if (input[i][1].length() > 0 && // splitAsStream() return empty resulting 1905 // array for zero-length input for now 1906 !Arrays.equals(pattern.splitAsStream(input[i][1]).toArray(), 1907 expected[i])) { 1908 failCount++; 1909 } 1910 } 1911 report("Split"); 1912 } 1913 1914 private static void negationTest() { 1915 Pattern pattern = Pattern.compile("[\\[@^]+"); 1916 Matcher matcher = pattern.matcher("@@@@[[[[^^^^"); 1917 if (!matcher.find()) 1918 failCount++; 1919 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1920 failCount++; 1921 pattern = Pattern.compile("[@\\[^]+"); 1922 matcher = pattern.matcher("@@@@[[[[^^^^"); 1923 if (!matcher.find()) 1924 failCount++; 1925 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1926 failCount++; 1927 pattern = Pattern.compile("[@\\[^@]+"); 1928 matcher = pattern.matcher("@@@@[[[[^^^^"); 1929 if (!matcher.find()) 1930 failCount++; 1931 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1932 failCount++; 1933 1934 pattern = Pattern.compile("\\)"); 1935 matcher = pattern.matcher("xxx)xxx"); 1936 if (!matcher.find()) 1937 failCount++; 1938 1939 report("Negation"); 1940 } 1941 1942 private static void ampersandTest() { 1943 Pattern pattern = Pattern.compile("[&@]+"); 1944 check(pattern, "@@@@&&&&", true); 1945 1946 pattern = Pattern.compile("[@&]+"); 1947 check(pattern, "@@@@&&&&", true); 1948 1949 pattern = Pattern.compile("[@\\&]+"); 1950 check(pattern, "@@@@&&&&", true); 1951 1952 report("Ampersand"); 1953 } 1954 1955 private static void octalTest() throws Exception { 1956 Pattern pattern = Pattern.compile("\\u0007"); 1957 Matcher matcher = pattern.matcher("\u0007"); 1958 if (!matcher.matches()) 1959 failCount++; 1960 pattern = Pattern.compile("\\07"); 1961 matcher = pattern.matcher("\u0007"); 1962 if (!matcher.matches()) 1963 failCount++; 1964 pattern = Pattern.compile("\\007"); 1965 matcher = pattern.matcher("\u0007"); 1966 if (!matcher.matches()) 1967 failCount++; 1968 pattern = Pattern.compile("\\0007"); 1969 matcher = pattern.matcher("\u0007"); 1970 if (!matcher.matches()) 1971 failCount++; 1972 pattern = Pattern.compile("\\040"); 1973 matcher = pattern.matcher("\u0020"); 1974 if (!matcher.matches()) 1975 failCount++; 1976 pattern = Pattern.compile("\\0403"); 1977 matcher = pattern.matcher("\u00203"); 1978 if (!matcher.matches()) 1979 failCount++; 1980 pattern = Pattern.compile("\\0103"); 1981 matcher = pattern.matcher("\u0043"); 1982 if (!matcher.matches()) 1983 failCount++; 1984 1985 report("Octal"); 1986 } 1987 1988 private static void longPatternTest() throws Exception { 1989 try { 1990 Pattern pattern = Pattern.compile( 1991 "a 32-character-long pattern xxxx"); 1992 pattern = Pattern.compile("a 33-character-long pattern xxxxx"); 1993 pattern = Pattern.compile("a thirty four character long regex"); 1994 StringBuffer patternToBe = new StringBuffer(101); 1995 for (int i=0; i<100; i++) 1996 patternToBe.append((char)(97 + i%26)); 1997 pattern = Pattern.compile(patternToBe.toString()); 1998 } catch (PatternSyntaxException e) { 1999 failCount++; 2000 } 2001 2002 // Supplementary character test 2003 try { 2004 Pattern pattern = Pattern.compile( 2005 toSupplementaries("a 32-character-long pattern xxxx")); 2006 pattern = Pattern.compile(toSupplementaries("a 33-character-long pattern xxxxx")); 2007 pattern = Pattern.compile(toSupplementaries("a thirty four character long regex")); 2008 StringBuffer patternToBe = new StringBuffer(101*2); 2009 for (int i=0; i<100; i++) 2010 patternToBe.append(Character.toChars(Character.MIN_SUPPLEMENTARY_CODE_POINT 2011 + 97 + i%26)); 2012 pattern = Pattern.compile(patternToBe.toString()); 2013 } catch (PatternSyntaxException e) { 2014 failCount++; 2015 } 2016 report("LongPattern"); 2017 } 2018 2019 private static void group0Test() throws Exception { 2020 Pattern pattern = Pattern.compile("(tes)ting"); 2021 Matcher matcher = pattern.matcher("testing"); 2022 check(matcher, "testing"); 2023 2024 matcher.reset("testing"); 2025 if (matcher.lookingAt()) { 2026 if (!matcher.group(0).equals("testing")) 2027 failCount++; 2028 } else { 2029 failCount++; 2030 } 2031 2032 matcher.reset("testing"); 2033 if (matcher.matches()) { 2034 if (!matcher.group(0).equals("testing")) 2035 failCount++; 2036 } else { 2037 failCount++; 2038 } 2039 2040 pattern = Pattern.compile("(tes)ting"); 2041 matcher = pattern.matcher("testing"); 2042 if (matcher.lookingAt()) { 2043 if (!matcher.group(0).equals("testing")) 2044 failCount++; 2045 } else { 2046 failCount++; 2047 } 2048 2049 pattern = Pattern.compile("^(tes)ting"); 2050 matcher = pattern.matcher("testing"); 2051 if (matcher.matches()) { 2052 if (!matcher.group(0).equals("testing")) 2053 failCount++; 2054 } else { 2055 failCount++; 2056 } 2057 2058 // Supplementary character test 2059 pattern = Pattern.compile(toSupplementaries("(tes)ting")); 2060 matcher = pattern.matcher(toSupplementaries("testing")); 2061 check(matcher, toSupplementaries("testing")); 2062 2063 matcher.reset(toSupplementaries("testing")); 2064 if (matcher.lookingAt()) { 2065 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2066 failCount++; 2067 } else { 2068 failCount++; 2069 } 2070 2071 matcher.reset(toSupplementaries("testing")); 2072 if (matcher.matches()) { 2073 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2074 failCount++; 2075 } else { 2076 failCount++; 2077 } 2078 2079 pattern = Pattern.compile(toSupplementaries("(tes)ting")); 2080 matcher = pattern.matcher(toSupplementaries("testing")); 2081 if (matcher.lookingAt()) { 2082 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2083 failCount++; 2084 } else { 2085 failCount++; 2086 } 2087 2088 pattern = Pattern.compile(toSupplementaries("^(tes)ting")); 2089 matcher = pattern.matcher(toSupplementaries("testing")); 2090 if (matcher.matches()) { 2091 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2092 failCount++; 2093 } else { 2094 failCount++; 2095 } 2096 2097 report("Group0"); 2098 } 2099 2100 private static void findIntTest() throws Exception { 2101 Pattern p = Pattern.compile("blah"); 2102 Matcher m = p.matcher("zzzzblahzzzzzblah"); 2103 boolean result = m.find(2); 2104 if (!result) 2105 failCount++; 2106 2107 p = Pattern.compile("$"); 2108 m = p.matcher("1234567890"); 2109 result = m.find(10); 2110 if (!result) 2111 failCount++; 2112 try { 2113 result = m.find(11); 2114 failCount++; 2115 } catch (IndexOutOfBoundsException e) { 2116 // correct result 2117 } 2118 2119 // Supplementary character test 2120 p = Pattern.compile(toSupplementaries("blah")); 2121 m = p.matcher(toSupplementaries("zzzzblahzzzzzblah")); 2122 result = m.find(2); 2123 if (!result) 2124 failCount++; 2125 2126 report("FindInt"); 2127 } 2128 2129 private static void emptyPatternTest() throws Exception { 2130 Pattern p = Pattern.compile(""); 2131 Matcher m = p.matcher("foo"); 2132 2133 // Should find empty pattern at beginning of input 2134 boolean result = m.find(); 2135 if (result != true) 2136 failCount++; 2137 if (m.start() != 0) 2138 failCount++; 2139 2140 // Should not match entire input if input is not empty 2141 m.reset(); 2142 result = m.matches(); 2143 if (result == true) 2144 failCount++; 2145 2146 try { 2147 m.start(0); 2148 failCount++; 2149 } catch (IllegalStateException e) { 2150 // Correct result 2151 } 2152 2153 // Should match entire input if input is empty 2154 m.reset(""); 2155 result = m.matches(); 2156 if (result != true) 2157 failCount++; 2158 2159 result = Pattern.matches("", ""); 2160 if (result != true) 2161 failCount++; 2162 2163 result = Pattern.matches("", "foo"); 2164 if (result == true) 2165 failCount++; 2166 report("EmptyPattern"); 2167 } 2168 2169 private static void charClassTest() throws Exception { 2170 Pattern pattern = Pattern.compile("blah[ab]]blech"); 2171 check(pattern, "blahb]blech", true); 2172 2173 pattern = Pattern.compile("[abc[def]]"); 2174 check(pattern, "b", true); 2175 2176 // Supplementary character tests 2177 pattern = Pattern.compile(toSupplementaries("blah[ab]]blech")); 2178 check(pattern, toSupplementaries("blahb]blech"), true); 2179 2180 pattern = Pattern.compile(toSupplementaries("[abc[def]]")); 2181 check(pattern, toSupplementaries("b"), true); 2182 2183 try { 2184 // u00ff when UNICODE_CASE 2185 pattern = Pattern.compile("[ab\u00ffcd]", 2186 Pattern.CASE_INSENSITIVE| 2187 Pattern.UNICODE_CASE); 2188 check(pattern, "ab\u00ffcd", true); 2189 check(pattern, "Ab\u0178Cd", true); 2190 2191 // u00b5 when UNICODE_CASE 2192 pattern = Pattern.compile("[ab\u00b5cd]", 2193 Pattern.CASE_INSENSITIVE| 2194 Pattern.UNICODE_CASE); 2195 check(pattern, "ab\u00b5cd", true); 2196 check(pattern, "Ab\u039cCd", true); 2197 } catch (Exception e) { failCount++; } 2198 2199 /* Special cases 2200 (1)LatinSmallLetterLongS u+017f 2201 (2)LatinSmallLetterDotlessI u+0131 2202 (3)LatineCapitalLetterIWithDotAbove u+0130 2203 (4)KelvinSign u+212a 2204 (5)AngstromSign u+212b 2205 */ 2206 int flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE; 2207 pattern = Pattern.compile("[sik\u00c5]+", flags); 2208 if (!pattern.matcher("\u017f\u0130\u0131\u212a\u212b").matches()) 2209 failCount++; 2210 2211 report("CharClass"); 2212 } 2213 2214 private static void caretTest() throws Exception { 2215 Pattern pattern = Pattern.compile("\\w*"); 2216 Matcher matcher = pattern.matcher("a#bc#def##g"); 2217 check(matcher, "a"); 2218 check(matcher, ""); 2219 check(matcher, "bc"); 2220 check(matcher, ""); 2221 check(matcher, "def"); 2222 check(matcher, ""); 2223 check(matcher, ""); 2224 check(matcher, "g"); 2225 check(matcher, ""); 2226 if (matcher.find()) 2227 failCount++; 2228 2229 pattern = Pattern.compile("^\\w*"); 2230 matcher = pattern.matcher("a#bc#def##g"); 2231 check(matcher, "a"); 2232 if (matcher.find()) 2233 failCount++; 2234 2235 pattern = Pattern.compile("\\w"); 2236 matcher = pattern.matcher("abc##x"); 2237 check(matcher, "a"); 2238 check(matcher, "b"); 2239 check(matcher, "c"); 2240 check(matcher, "x"); 2241 if (matcher.find()) 2242 failCount++; 2243 2244 pattern = Pattern.compile("^\\w"); 2245 matcher = pattern.matcher("abc##x"); 2246 check(matcher, "a"); 2247 if (matcher.find()) 2248 failCount++; 2249 2250 pattern = Pattern.compile("\\A\\p{Alpha}{3}"); 2251 matcher = pattern.matcher("abcdef-ghi\njklmno"); 2252 check(matcher, "abc"); 2253 if (matcher.find()) 2254 failCount++; 2255 2256 pattern = Pattern.compile("^\\p{Alpha}{3}", Pattern.MULTILINE); 2257 matcher = pattern.matcher("abcdef-ghi\njklmno"); 2258 check(matcher, "abc"); 2259 check(matcher, "jkl"); 2260 if (matcher.find()) 2261 failCount++; 2262 2263 pattern = Pattern.compile("^", Pattern.MULTILINE); 2264 matcher = pattern.matcher("this is some text"); 2265 String result = matcher.replaceAll("X"); 2266 if (!result.equals("Xthis is some text")) 2267 failCount++; 2268 2269 pattern = Pattern.compile("^"); 2270 matcher = pattern.matcher("this is some text"); 2271 result = matcher.replaceAll("X"); 2272 if (!result.equals("Xthis is some text")) 2273 failCount++; 2274 2275 pattern = Pattern.compile("^", Pattern.MULTILINE | Pattern.UNIX_LINES); 2276 matcher = pattern.matcher("this is some text\n"); 2277 result = matcher.replaceAll("X"); 2278 if (!result.equals("Xthis is some text\n")) 2279 failCount++; 2280 2281 report("Caret"); 2282 } 2283 2284 private static void groupCaptureTest() throws Exception { 2285 // Independent group 2286 Pattern pattern = Pattern.compile("x+(?>y+)z+"); 2287 Matcher matcher = pattern.matcher("xxxyyyzzz"); 2288 matcher.find(); 2289 try { 2290 String blah = matcher.group(1); 2291 failCount++; 2292 } catch (IndexOutOfBoundsException ioobe) { 2293 // Good result 2294 } 2295 // Pure group 2296 pattern = Pattern.compile("x+(?:y+)z+"); 2297 matcher = pattern.matcher("xxxyyyzzz"); 2298 matcher.find(); 2299 try { 2300 String blah = matcher.group(1); 2301 failCount++; 2302 } catch (IndexOutOfBoundsException ioobe) { 2303 // Good result 2304 } 2305 2306 // Supplementary character tests 2307 // Independent group 2308 pattern = Pattern.compile(toSupplementaries("x+(?>y+)z+")); 2309 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz")); 2310 matcher.find(); 2311 try { 2312 String blah = matcher.group(1); 2313 failCount++; 2314 } catch (IndexOutOfBoundsException ioobe) { 2315 // Good result 2316 } 2317 // Pure group 2318 pattern = Pattern.compile(toSupplementaries("x+(?:y+)z+")); 2319 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz")); 2320 matcher.find(); 2321 try { 2322 String blah = matcher.group(1); 2323 failCount++; 2324 } catch (IndexOutOfBoundsException ioobe) { 2325 // Good result 2326 } 2327 2328 report("GroupCapture"); 2329 } 2330 2331 private static void backRefTest() throws Exception { 2332 Pattern pattern = Pattern.compile("(a*)bc\\1"); 2333 check(pattern, "zzzaabcazzz", true); 2334 2335 pattern = Pattern.compile("(a*)bc\\1"); 2336 check(pattern, "zzzaabcaazzz", true); 2337 2338 pattern = Pattern.compile("(abc)(def)\\1"); 2339 check(pattern, "abcdefabc", true); 2340 2341 pattern = Pattern.compile("(abc)(def)\\3"); 2342 check(pattern, "abcdefabc", false); 2343 2344 try { 2345 for (int i = 1; i < 10; i++) { 2346 // Make sure backref 1-9 are always accepted 2347 pattern = Pattern.compile("abcdef\\" + i); 2348 // and fail to match if the target group does not exit 2349 check(pattern, "abcdef", false); 2350 } 2351 } catch(PatternSyntaxException e) { 2352 failCount++; 2353 } 2354 2355 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"); 2356 check(pattern, "abcdefghija", false); 2357 check(pattern, "abcdefghija1", true); 2358 2359 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"); 2360 check(pattern, "abcdefghijkk", true); 2361 2362 pattern = Pattern.compile("(a)bcdefghij\\11"); 2363 check(pattern, "abcdefghija1", true); 2364 2365 // Supplementary character tests 2366 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1")); 2367 check(pattern, toSupplementaries("zzzaabcazzz"), true); 2368 2369 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1")); 2370 check(pattern, toSupplementaries("zzzaabcaazzz"), true); 2371 2372 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\1")); 2373 check(pattern, toSupplementaries("abcdefabc"), true); 2374 2375 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\3")); 2376 check(pattern, toSupplementaries("abcdefabc"), false); 2377 2378 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11")); 2379 check(pattern, toSupplementaries("abcdefghija"), false); 2380 check(pattern, toSupplementaries("abcdefghija1"), true); 2381 2382 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11")); 2383 check(pattern, toSupplementaries("abcdefghijkk"), true); 2384 2385 report("BackRef"); 2386 } 2387 2388 /** 2389 * Unicode Technical Report #18, section 2.6 End of Line 2390 * There is no empty line to be matched in the sequence \u000D\u000A 2391 * but there is an empty line in the sequence \u000A\u000D. 2392 */ 2393 private static void anchorTest() throws Exception { 2394 Pattern p = Pattern.compile("^.*$", Pattern.MULTILINE); 2395 Matcher m = p.matcher("blah1\r\nblah2"); 2396 m.find(); 2397 m.find(); 2398 if (!m.group().equals("blah2")) 2399 failCount++; 2400 2401 m.reset("blah1\n\rblah2"); 2402 m.find(); 2403 m.find(); 2404 m.find(); 2405 if (!m.group().equals("blah2")) 2406 failCount++; 2407 2408 // Test behavior of $ with \r\n at end of input 2409 p = Pattern.compile(".+$"); 2410 m = p.matcher("blah1\r\n"); 2411 if (!m.find()) 2412 failCount++; 2413 if (!m.group().equals("blah1")) 2414 failCount++; 2415 if (m.find()) 2416 failCount++; 2417 2418 // Test behavior of $ with \r\n at end of input in multiline 2419 p = Pattern.compile(".+$", Pattern.MULTILINE); 2420 m = p.matcher("blah1\r\n"); 2421 if (!m.find()) 2422 failCount++; 2423 if (m.find()) 2424 failCount++; 2425 2426 // Test for $ recognition of \u0085 for bug 4527731 2427 p = Pattern.compile(".+$", Pattern.MULTILINE); 2428 m = p.matcher("blah1\u0085"); 2429 if (!m.find()) 2430 failCount++; 2431 2432 // Supplementary character test 2433 p = Pattern.compile("^.*$", Pattern.MULTILINE); 2434 m = p.matcher(toSupplementaries("blah1\r\nblah2")); 2435 m.find(); 2436 m.find(); 2437 if (!m.group().equals(toSupplementaries("blah2"))) 2438 failCount++; 2439 2440 m.reset(toSupplementaries("blah1\n\rblah2")); 2441 m.find(); 2442 m.find(); 2443 m.find(); 2444 if (!m.group().equals(toSupplementaries("blah2"))) 2445 failCount++; 2446 2447 // Test behavior of $ with \r\n at end of input 2448 p = Pattern.compile(".+$"); 2449 m = p.matcher(toSupplementaries("blah1\r\n")); 2450 if (!m.find()) 2451 failCount++; 2452 if (!m.group().equals(toSupplementaries("blah1"))) 2453 failCount++; 2454 if (m.find()) 2455 failCount++; 2456 2457 // Test behavior of $ with \r\n at end of input in multiline 2458 p = Pattern.compile(".+$", Pattern.MULTILINE); 2459 m = p.matcher(toSupplementaries("blah1\r\n")); 2460 if (!m.find()) 2461 failCount++; 2462 if (m.find()) 2463 failCount++; 2464 2465 // Test for $ recognition of \u0085 for bug 4527731 2466 p = Pattern.compile(".+$", Pattern.MULTILINE); 2467 m = p.matcher(toSupplementaries("blah1\u0085")); 2468 if (!m.find()) 2469 failCount++; 2470 2471 report("Anchors"); 2472 } 2473 2474 /** 2475 * A basic sanity test of Matcher.lookingAt(). 2476 */ 2477 private static void lookingAtTest() throws Exception { 2478 Pattern p = Pattern.compile("(ab)(c*)"); 2479 Matcher m = p.matcher("abccczzzabcczzzabccc"); 2480 2481 if (!m.lookingAt()) 2482 failCount++; 2483 2484 if (!m.group().equals(m.group(0))) 2485 failCount++; 2486 2487 m = p.matcher("zzzabccczzzabcczzzabccczzz"); 2488 if (m.lookingAt()) 2489 failCount++; 2490 2491 // Supplementary character test 2492 p = Pattern.compile(toSupplementaries("(ab)(c*)")); 2493 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc")); 2494 2495 if (!m.lookingAt()) 2496 failCount++; 2497 2498 if (!m.group().equals(m.group(0))) 2499 failCount++; 2500 2501 m = p.matcher(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2502 if (m.lookingAt()) 2503 failCount++; 2504 2505 report("Looking At"); 2506 } 2507 2508 /** 2509 * A basic sanity test of Matcher.matches(). 2510 */ 2511 private static void matchesTest() throws Exception { 2512 // matches() 2513 Pattern p = Pattern.compile("ulb(c*)"); 2514 Matcher m = p.matcher("ulbcccccc"); 2515 if (!m.matches()) 2516 failCount++; 2517 2518 // find() but not matches() 2519 m.reset("zzzulbcccccc"); 2520 if (m.matches()) 2521 failCount++; 2522 2523 // lookingAt() but not matches() 2524 m.reset("ulbccccccdef"); 2525 if (m.matches()) 2526 failCount++; 2527 2528 // matches() 2529 p = Pattern.compile("a|ad"); 2530 m = p.matcher("ad"); 2531 if (!m.matches()) 2532 failCount++; 2533 2534 // Supplementary character test 2535 // matches() 2536 p = Pattern.compile(toSupplementaries("ulb(c*)")); 2537 m = p.matcher(toSupplementaries("ulbcccccc")); 2538 if (!m.matches()) 2539 failCount++; 2540 2541 // find() but not matches() 2542 m.reset(toSupplementaries("zzzulbcccccc")); 2543 if (m.matches()) 2544 failCount++; 2545 2546 // lookingAt() but not matches() 2547 m.reset(toSupplementaries("ulbccccccdef")); 2548 if (m.matches()) 2549 failCount++; 2550 2551 // matches() 2552 p = Pattern.compile(toSupplementaries("a|ad")); 2553 m = p.matcher(toSupplementaries("ad")); 2554 if (!m.matches()) 2555 failCount++; 2556 2557 report("Matches"); 2558 } 2559 2560 /** 2561 * A basic sanity test of Pattern.matches(). 2562 */ 2563 private static void patternMatchesTest() throws Exception { 2564 // matches() 2565 if (!Pattern.matches(toSupplementaries("ulb(c*)"), 2566 toSupplementaries("ulbcccccc"))) 2567 failCount++; 2568 2569 // find() but not matches() 2570 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2571 toSupplementaries("zzzulbcccccc"))) 2572 failCount++; 2573 2574 // lookingAt() but not matches() 2575 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2576 toSupplementaries("ulbccccccdef"))) 2577 failCount++; 2578 2579 // Supplementary character test 2580 // matches() 2581 if (!Pattern.matches(toSupplementaries("ulb(c*)"), 2582 toSupplementaries("ulbcccccc"))) 2583 failCount++; 2584 2585 // find() but not matches() 2586 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2587 toSupplementaries("zzzulbcccccc"))) 2588 failCount++; 2589 2590 // lookingAt() but not matches() 2591 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2592 toSupplementaries("ulbccccccdef"))) 2593 failCount++; 2594 2595 report("Pattern Matches"); 2596 } 2597 2598 /** 2599 * Canonical equivalence testing. Tests the ability of the engine 2600 * to match sequences that are not explicitly specified in the 2601 * pattern when they are considered equivalent by the Unicode Standard. 2602 */ 2603 private static void ceTest() throws Exception { 2604 // Decomposed char outside char classes 2605 Pattern p = Pattern.compile("testa\u030a", Pattern.CANON_EQ); 2606 Matcher m = p.matcher("test\u00e5"); 2607 if (!m.matches()) 2608 failCount++; 2609 2610 m.reset("testa\u030a"); 2611 if (!m.matches()) 2612 failCount++; 2613 2614 // Composed char outside char classes 2615 p = Pattern.compile("test\u00e5", Pattern.CANON_EQ); 2616 m = p.matcher("test\u00e5"); 2617 if (!m.matches()) 2618 failCount++; 2619 2620 m.reset("testa\u030a"); 2621 if (!m.find()) 2622 failCount++; 2623 2624 // Decomposed char inside a char class 2625 p = Pattern.compile("test[abca\u030a]", Pattern.CANON_EQ); 2626 m = p.matcher("test\u00e5"); 2627 if (!m.find()) 2628 failCount++; 2629 2630 m.reset("testa\u030a"); 2631 if (!m.find()) 2632 failCount++; 2633 2634 // Composed char inside a char class 2635 p = Pattern.compile("test[abc\u00e5def\u00e0]", Pattern.CANON_EQ); 2636 m = p.matcher("test\u00e5"); 2637 if (!m.find()) 2638 failCount++; 2639 2640 m.reset("testa\u0300"); 2641 if (!m.find()) 2642 failCount++; 2643 2644 m.reset("testa\u030a"); 2645 if (!m.find()) 2646 failCount++; 2647 2648 // Marks that cannot legally change order and be equivalent 2649 p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ); 2650 check(p, "testa\u0308\u0300", true); 2651 check(p, "testa\u0300\u0308", false); 2652 2653 // Marks that can legally change order and be equivalent 2654 p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ); 2655 check(p, "testa\u0308\u0323", true); 2656 check(p, "testa\u0323\u0308", true); 2657 2658 // Test all equivalences of the sequence a\u0308\u0323\u0300 2659 p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ); 2660 check(p, "testa\u0308\u0323\u0300", true); 2661 check(p, "testa\u0323\u0308\u0300", true); 2662 check(p, "testa\u0308\u0300\u0323", true); 2663 check(p, "test\u00e4\u0323\u0300", true); 2664 check(p, "test\u00e4\u0300\u0323", true); 2665 2666 Object[][] data = new Object[][] { 2667 2668 // JDK-4867170 2669 { "[\u1f80-\u1f82]", "ab\u1f80cd", "f", true }, 2670 { "[\u1f80-\u1f82]", "ab\u1f81cd", "f", true }, 2671 { "[\u1f80-\u1f82]", "ab\u1f82cd", "f", true }, 2672 { "[\u1f80-\u1f82]", "ab\u03b1\u0314\u0345cd", "f", true }, 2673 { "[\u1f80-\u1f82]", "ab\u03b1\u0345\u0314cd", "f", true }, 2674 { "[\u1f80-\u1f82]", "ab\u1f01\u0345cd", "f", true }, 2675 { "[\u1f80-\u1f82]", "ab\u1f00\u0345cd", "f", true }, 2676 2677 { "\\p{IsGreek}", "ab\u1f80cd", "f", true }, 2678 { "\\p{IsGreek}", "ab\u1f81cd", "f", true }, 2679 { "\\p{IsGreek}", "ab\u1f82cd", "f", true }, 2680 { "\\p{IsGreek}", "ab\u03b1\u0314\u0345cd", "f", true }, 2681 { "\\p{IsGreek}", "ab\u1f01\u0345cd", "f", true }, 2682 2683 // backtracking, force to match "\u1f80", instead of \u1f82" 2684 { "ab\\p{IsGreek}\u0300cd", "ab\u03b1\u0313\u0345\u0300cd", "m", true }, 2685 2686 { "[\\p{IsGreek}]", "\u03b1\u0314\u0345", "m", true }, 2687 { "\\p{IsGreek}", "\u03b1\u0314\u0345", "m", true }, 2688 2689 { "[^\u1f80-\u1f82]","\u1f81", "m", false }, 2690 { "[^\u1f80-\u1f82]","\u03b1\u0314\u0345", "m", false }, 2691 { "[^\u1f01\u0345]", "\u1f81", "f", false }, 2692 2693 { "[^\u1f81]+", "\u1f80\u1f82", "f", true }, 2694 { "[\u1f80]", "ab\u1f80cd", "f", true }, 2695 { "\u1f80", "ab\u1f80cd", "f", true }, 2696 { "\u1f00\u0345\u0300", "\u1f82", "m", true }, 2697 { "\u1f80", "-\u1f00\u0345\u0300-", "f", true }, 2698 { "\u1f82", "\u1f00\u0345\u0300", "m", true }, 2699 { "\u1f82", "\u1f80\u0300", "m", true }, 2700 2701 // JDK-7080302 # compile failed 2702 { "a(\u0041\u0301\u0328)", "a\u0041\u0301\u0328", "m", true}, 2703 2704 // JDK-6728861, same cause as above one 2705 { "\u00e9\u00e9n", "e\u0301e\u0301n", "m", true}, 2706 2707 // JDK-6995635 2708 { "(\u00e9)", "e\u0301", "m", true }, 2709 2710 // JDK-6736245 2711 // intereting special case, nfc(u2add+u0338) -> u2add+u0338) NOT u2adc 2712 { "\u2ADC", "\u2ADC", "m", true}, // NFC 2713 { "\u2ADC", "\u2ADD\u0338", "m", true}, // NFD 2714 2715 // 4916384. 2716 // Decomposed hangul (jamos) works inside clazz 2717 { "[\u1100\u1161]", "\u1100\u1161", "m", true}, 2718 { "[\u1100\u1161]", "\uac00", "m", true}, 2719 2720 { "[\uac00]", "\u1100\u1161", "m", true}, 2721 { "[\uac00]", "\uac00", "m", true}, 2722 2723 // Decomposed hangul (jamos) 2724 { "\u1100\u1161", "\u1100\u1161", "m", true}, 2725 { "\u1100\u1161", "\uac00", "m", true}, 2726 2727 // Composed hangul 2728 { "\uac00", "\u1100\u1161", "m", true }, 2729 { "\uac00", "\uac00", "m", true }, 2730 2731 /* Need a NFDSlice to nfd the source to solve this issue 2732 u+1d1c0 -> nfd: <u+1d1ba><u+1d165><u+1d16f> -> nfc: <u+1d1ba><u+1d165><u+1d16f> 2733 u+1d1bc -> nfd: <u+1d1ba><u+1d165> -> nfc: <u+1d1ba><u+1d165> 2734 <u+1d1bc><u+1d16f> -> nfd: <u+1d1ba><u+1d165><u+1d16f> -> nfc: <u+1d1ba><u+1d165><u+1d16f> 2735 2736 // Decomposed supplementary outside char classes 2737 // { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0", "m", true }, 2738 // Composed supplementary outside char classes 2739 // { "test\ud834\uddc0", "test\ud834\uddbc\ud834\udd6f", "m", true }, 2740 */ 2741 { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddbc\ud834\udd6f", "m", true }, 2742 { "test\ud834\uddc0", "test\ud834\uddbc\ud834\udd6f", "m", true }, 2743 2744 { "test\ud834\uddc0", "test\ud834\uddc0", "m", true }, 2745 { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0", "m", true }, 2746 }; 2747 2748 int failCount = 0; 2749 for (Object[] d : data) { 2750 String pn = (String)d[0]; 2751 String tt = (String)d[1]; 2752 boolean isFind = "f".equals(((String)d[2])); 2753 boolean expected = (boolean)d[3]; 2754 boolean ret = isFind ? Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).find() 2755 : Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).matches(); 2756 if (ret != expected) { 2757 failCount++; 2758 continue; 2759 } 2760 } 2761 report("Canonical Equivalence"); 2762 } 2763 2764 /** 2765 * A basic sanity test of Matcher.replaceAll(). 2766 */ 2767 private static void globalSubstitute() throws Exception { 2768 // Global substitution with a literal 2769 Pattern p = Pattern.compile("(ab)(c*)"); 2770 Matcher m = p.matcher("abccczzzabcczzzabccc"); 2771 if (!m.replaceAll("test").equals("testzzztestzzztest")) 2772 failCount++; 2773 2774 m.reset("zzzabccczzzabcczzzabccczzz"); 2775 if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz")) 2776 failCount++; 2777 2778 // Global substitution with groups 2779 m.reset("zzzabccczzzabcczzzabccczzz"); 2780 String result = m.replaceAll("$1"); 2781 if (!result.equals("zzzabzzzabzzzabzzz")) 2782 failCount++; 2783 2784 // Supplementary character test 2785 // Global substitution with a literal 2786 p = Pattern.compile(toSupplementaries("(ab)(c*)")); 2787 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc")); 2788 if (!m.replaceAll(toSupplementaries("test")). 2789 equals(toSupplementaries("testzzztestzzztest"))) 2790 failCount++; 2791 2792 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2793 if (!m.replaceAll(toSupplementaries("test")). 2794 equals(toSupplementaries("zzztestzzztestzzztestzzz"))) 2795 failCount++; 2796 2797 // Global substitution with groups 2798 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2799 result = m.replaceAll("$1"); 2800 if (!result.equals(toSupplementaries("zzzabzzzabzzzabzzz"))) 2801 failCount++; 2802 2803 report("Global Substitution"); 2804 } 2805 2806 /** 2807 * Tests the usage of Matcher.appendReplacement() with literal 2808 * and group substitutions. 2809 */ 2810 private static void stringbufferSubstitute() throws Exception { 2811 // SB substitution with literal 2812 String blah = "zzzblahzzz"; 2813 Pattern p = Pattern.compile("blah"); 2814 Matcher m = p.matcher(blah); 2815 StringBuffer result = new StringBuffer(); 2816 try { 2817 m.appendReplacement(result, "blech"); 2818 failCount++; 2819 } catch (IllegalStateException e) { 2820 } 2821 m.find(); 2822 m.appendReplacement(result, "blech"); 2823 if (!result.toString().equals("zzzblech")) 2824 failCount++; 2825 2826 m.appendTail(result); 2827 if (!result.toString().equals("zzzblechzzz")) 2828 failCount++; 2829 2830 // SB substitution with groups 2831 blah = "zzzabcdzzz"; 2832 p = Pattern.compile("(ab)(cd)*"); 2833 m = p.matcher(blah); 2834 result = new StringBuffer(); 2835 try { 2836 m.appendReplacement(result, "$1"); 2837 failCount++; 2838 } catch (IllegalStateException e) { 2839 } 2840 m.find(); 2841 m.appendReplacement(result, "$1"); 2842 if (!result.toString().equals("zzzab")) 2843 failCount++; 2844 2845 m.appendTail(result); 2846 if (!result.toString().equals("zzzabzzz")) 2847 failCount++; 2848 2849 // SB substitution with 3 groups 2850 blah = "zzzabcdcdefzzz"; 2851 p = Pattern.compile("(ab)(cd)*(ef)"); 2852 m = p.matcher(blah); 2853 result = new StringBuffer(); 2854 try { 2855 m.appendReplacement(result, "$1w$2w$3"); 2856 failCount++; 2857 } catch (IllegalStateException e) { 2858 } 2859 m.find(); 2860 m.appendReplacement(result, "$1w$2w$3"); 2861 if (!result.toString().equals("zzzabwcdwef")) 2862 failCount++; 2863 2864 m.appendTail(result); 2865 if (!result.toString().equals("zzzabwcdwefzzz")) 2866 failCount++; 2867 2868 // SB substitution with groups and three matches 2869 // skipping middle match 2870 blah = "zzzabcdzzzabcddzzzabcdzzz"; 2871 p = Pattern.compile("(ab)(cd*)"); 2872 m = p.matcher(blah); 2873 result = new StringBuffer(); 2874 try { 2875 m.appendReplacement(result, "$1"); 2876 failCount++; 2877 } catch (IllegalStateException e) { 2878 } 2879 m.find(); 2880 m.appendReplacement(result, "$1"); 2881 if (!result.toString().equals("zzzab")) 2882 failCount++; 2883 2884 m.find(); 2885 m.find(); 2886 m.appendReplacement(result, "$2"); 2887 if (!result.toString().equals("zzzabzzzabcddzzzcd")) 2888 failCount++; 2889 2890 m.appendTail(result); 2891 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz")) 2892 failCount++; 2893 2894 // Check to make sure escaped $ is ignored 2895 blah = "zzzabcdcdefzzz"; 2896 p = Pattern.compile("(ab)(cd)*(ef)"); 2897 m = p.matcher(blah); 2898 result = new StringBuffer(); 2899 m.find(); 2900 m.appendReplacement(result, "$1w\\$2w$3"); 2901 if (!result.toString().equals("zzzabw$2wef")) 2902 failCount++; 2903 2904 m.appendTail(result); 2905 if (!result.toString().equals("zzzabw$2wefzzz")) 2906 failCount++; 2907 2908 // Check to make sure a reference to nonexistent group causes error 2909 blah = "zzzabcdcdefzzz"; 2910 p = Pattern.compile("(ab)(cd)*(ef)"); 2911 m = p.matcher(blah); 2912 result = new StringBuffer(); 2913 m.find(); 2914 try { 2915 m.appendReplacement(result, "$1w$5w$3"); 2916 failCount++; 2917 } catch (IndexOutOfBoundsException ioobe) { 2918 // Correct result 2919 } 2920 2921 // Check double digit group references 2922 blah = "zzz123456789101112zzz"; 2923 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 2924 m = p.matcher(blah); 2925 result = new StringBuffer(); 2926 m.find(); 2927 m.appendReplacement(result, "$1w$11w$3"); 2928 if (!result.toString().equals("zzz1w11w3")) 2929 failCount++; 2930 2931 // Check to make sure it backs off $15 to $1 if only three groups 2932 blah = "zzzabcdcdefzzz"; 2933 p = Pattern.compile("(ab)(cd)*(ef)"); 2934 m = p.matcher(blah); 2935 result = new StringBuffer(); 2936 m.find(); 2937 m.appendReplacement(result, "$1w$15w$3"); 2938 if (!result.toString().equals("zzzabwab5wef")) 2939 failCount++; 2940 2941 2942 // Supplementary character test 2943 // SB substitution with literal 2944 blah = toSupplementaries("zzzblahzzz"); 2945 p = Pattern.compile(toSupplementaries("blah")); 2946 m = p.matcher(blah); 2947 result = new StringBuffer(); 2948 try { 2949 m.appendReplacement(result, toSupplementaries("blech")); 2950 failCount++; 2951 } catch (IllegalStateException e) { 2952 } 2953 m.find(); 2954 m.appendReplacement(result, toSupplementaries("blech")); 2955 if (!result.toString().equals(toSupplementaries("zzzblech"))) 2956 failCount++; 2957 2958 m.appendTail(result); 2959 if (!result.toString().equals(toSupplementaries("zzzblechzzz"))) 2960 failCount++; 2961 2962 // SB substitution with groups 2963 blah = toSupplementaries("zzzabcdzzz"); 2964 p = Pattern.compile(toSupplementaries("(ab)(cd)*")); 2965 m = p.matcher(blah); 2966 result = new StringBuffer(); 2967 try { 2968 m.appendReplacement(result, "$1"); 2969 failCount++; 2970 } catch (IllegalStateException e) { 2971 } 2972 m.find(); 2973 m.appendReplacement(result, "$1"); 2974 if (!result.toString().equals(toSupplementaries("zzzab"))) 2975 failCount++; 2976 2977 m.appendTail(result); 2978 if (!result.toString().equals(toSupplementaries("zzzabzzz"))) 2979 failCount++; 2980 2981 // SB substitution with 3 groups 2982 blah = toSupplementaries("zzzabcdcdefzzz"); 2983 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 2984 m = p.matcher(blah); 2985 result = new StringBuffer(); 2986 try { 2987 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 2988 failCount++; 2989 } catch (IllegalStateException e) { 2990 } 2991 m.find(); 2992 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 2993 if (!result.toString().equals(toSupplementaries("zzzabwcdwef"))) 2994 failCount++; 2995 2996 m.appendTail(result); 2997 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz"))) 2998 failCount++; 2999 3000 // SB substitution with groups and three matches 3001 // skipping middle match 3002 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz"); 3003 p = Pattern.compile(toSupplementaries("(ab)(cd*)")); 3004 m = p.matcher(blah); 3005 result = new StringBuffer(); 3006 try { 3007 m.appendReplacement(result, "$1"); 3008 failCount++; 3009 } catch (IllegalStateException e) { 3010 } 3011 m.find(); 3012 m.appendReplacement(result, "$1"); 3013 if (!result.toString().equals(toSupplementaries("zzzab"))) 3014 failCount++; 3015 3016 m.find(); 3017 m.find(); 3018 m.appendReplacement(result, "$2"); 3019 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd"))) 3020 failCount++; 3021 3022 m.appendTail(result); 3023 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz"))) 3024 failCount++; 3025 3026 // Check to make sure escaped $ is ignored 3027 blah = toSupplementaries("zzzabcdcdefzzz"); 3028 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3029 m = p.matcher(blah); 3030 result = new StringBuffer(); 3031 m.find(); 3032 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3")); 3033 if (!result.toString().equals(toSupplementaries("zzzabw$2wef"))) 3034 failCount++; 3035 3036 m.appendTail(result); 3037 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz"))) 3038 failCount++; 3039 3040 // Check to make sure a reference to nonexistent group causes error 3041 blah = toSupplementaries("zzzabcdcdefzzz"); 3042 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3043 m = p.matcher(blah); 3044 result = new StringBuffer(); 3045 m.find(); 3046 try { 3047 m.appendReplacement(result, toSupplementaries("$1w$5w$3")); 3048 failCount++; 3049 } catch (IndexOutOfBoundsException ioobe) { 3050 // Correct result 3051 } 3052 3053 // Check double digit group references 3054 blah = toSupplementaries("zzz123456789101112zzz"); 3055 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3056 m = p.matcher(blah); 3057 result = new StringBuffer(); 3058 m.find(); 3059 m.appendReplacement(result, toSupplementaries("$1w$11w$3")); 3060 if (!result.toString().equals(toSupplementaries("zzz1w11w3"))) 3061 failCount++; 3062 3063 // Check to make sure it backs off $15 to $1 if only three groups 3064 blah = toSupplementaries("zzzabcdcdefzzz"); 3065 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3066 m = p.matcher(blah); 3067 result = new StringBuffer(); 3068 m.find(); 3069 m.appendReplacement(result, toSupplementaries("$1w$15w$3")); 3070 if (!result.toString().equals(toSupplementaries("zzzabwab5wef"))) 3071 failCount++; 3072 3073 // Check nothing has been appended into the output buffer if 3074 // the replacement string triggers IllegalArgumentException. 3075 p = Pattern.compile("(abc)"); 3076 m = p.matcher("abcd"); 3077 result = new StringBuffer(); 3078 m.find(); 3079 try { 3080 m.appendReplacement(result, ("xyz$g")); 3081 failCount++; 3082 } catch (IllegalArgumentException iae) { 3083 if (result.length() != 0) 3084 failCount++; 3085 } 3086 3087 report("SB Substitution"); 3088 } 3089 3090 /** 3091 * Tests the usage of Matcher.appendReplacement() with literal 3092 * and group substitutions. 3093 */ 3094 private static void stringbuilderSubstitute() throws Exception { 3095 // SB substitution with literal 3096 String blah = "zzzblahzzz"; 3097 Pattern p = Pattern.compile("blah"); 3098 Matcher m = p.matcher(blah); 3099 StringBuilder result = new StringBuilder(); 3100 try { 3101 m.appendReplacement(result, "blech"); 3102 failCount++; 3103 } catch (IllegalStateException e) { 3104 } 3105 m.find(); 3106 m.appendReplacement(result, "blech"); 3107 if (!result.toString().equals("zzzblech")) 3108 failCount++; 3109 3110 m.appendTail(result); 3111 if (!result.toString().equals("zzzblechzzz")) 3112 failCount++; 3113 3114 // SB substitution with groups 3115 blah = "zzzabcdzzz"; 3116 p = Pattern.compile("(ab)(cd)*"); 3117 m = p.matcher(blah); 3118 result = new StringBuilder(); 3119 try { 3120 m.appendReplacement(result, "$1"); 3121 failCount++; 3122 } catch (IllegalStateException e) { 3123 } 3124 m.find(); 3125 m.appendReplacement(result, "$1"); 3126 if (!result.toString().equals("zzzab")) 3127 failCount++; 3128 3129 m.appendTail(result); 3130 if (!result.toString().equals("zzzabzzz")) 3131 failCount++; 3132 3133 // SB substitution with 3 groups 3134 blah = "zzzabcdcdefzzz"; 3135 p = Pattern.compile("(ab)(cd)*(ef)"); 3136 m = p.matcher(blah); 3137 result = new StringBuilder(); 3138 try { 3139 m.appendReplacement(result, "$1w$2w$3"); 3140 failCount++; 3141 } catch (IllegalStateException e) { 3142 } 3143 m.find(); 3144 m.appendReplacement(result, "$1w$2w$3"); 3145 if (!result.toString().equals("zzzabwcdwef")) 3146 failCount++; 3147 3148 m.appendTail(result); 3149 if (!result.toString().equals("zzzabwcdwefzzz")) 3150 failCount++; 3151 3152 // SB substitution with groups and three matches 3153 // skipping middle match 3154 blah = "zzzabcdzzzabcddzzzabcdzzz"; 3155 p = Pattern.compile("(ab)(cd*)"); 3156 m = p.matcher(blah); 3157 result = new StringBuilder(); 3158 try { 3159 m.appendReplacement(result, "$1"); 3160 failCount++; 3161 } catch (IllegalStateException e) { 3162 } 3163 m.find(); 3164 m.appendReplacement(result, "$1"); 3165 if (!result.toString().equals("zzzab")) 3166 failCount++; 3167 3168 m.find(); 3169 m.find(); 3170 m.appendReplacement(result, "$2"); 3171 if (!result.toString().equals("zzzabzzzabcddzzzcd")) 3172 failCount++; 3173 3174 m.appendTail(result); 3175 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz")) 3176 failCount++; 3177 3178 // Check to make sure escaped $ is ignored 3179 blah = "zzzabcdcdefzzz"; 3180 p = Pattern.compile("(ab)(cd)*(ef)"); 3181 m = p.matcher(blah); 3182 result = new StringBuilder(); 3183 m.find(); 3184 m.appendReplacement(result, "$1w\\$2w$3"); 3185 if (!result.toString().equals("zzzabw$2wef")) 3186 failCount++; 3187 3188 m.appendTail(result); 3189 if (!result.toString().equals("zzzabw$2wefzzz")) 3190 failCount++; 3191 3192 // Check to make sure a reference to nonexistent group causes error 3193 blah = "zzzabcdcdefzzz"; 3194 p = Pattern.compile("(ab)(cd)*(ef)"); 3195 m = p.matcher(blah); 3196 result = new StringBuilder(); 3197 m.find(); 3198 try { 3199 m.appendReplacement(result, "$1w$5w$3"); 3200 failCount++; 3201 } catch (IndexOutOfBoundsException ioobe) { 3202 // Correct result 3203 } 3204 3205 // Check double digit group references 3206 blah = "zzz123456789101112zzz"; 3207 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3208 m = p.matcher(blah); 3209 result = new StringBuilder(); 3210 m.find(); 3211 m.appendReplacement(result, "$1w$11w$3"); 3212 if (!result.toString().equals("zzz1w11w3")) 3213 failCount++; 3214 3215 // Check to make sure it backs off $15 to $1 if only three groups 3216 blah = "zzzabcdcdefzzz"; 3217 p = Pattern.compile("(ab)(cd)*(ef)"); 3218 m = p.matcher(blah); 3219 result = new StringBuilder(); 3220 m.find(); 3221 m.appendReplacement(result, "$1w$15w$3"); 3222 if (!result.toString().equals("zzzabwab5wef")) 3223 failCount++; 3224 3225 3226 // Supplementary character test 3227 // SB substitution with literal 3228 blah = toSupplementaries("zzzblahzzz"); 3229 p = Pattern.compile(toSupplementaries("blah")); 3230 m = p.matcher(blah); 3231 result = new StringBuilder(); 3232 try { 3233 m.appendReplacement(result, toSupplementaries("blech")); 3234 failCount++; 3235 } catch (IllegalStateException e) { 3236 } 3237 m.find(); 3238 m.appendReplacement(result, toSupplementaries("blech")); 3239 if (!result.toString().equals(toSupplementaries("zzzblech"))) 3240 failCount++; 3241 m.appendTail(result); 3242 if (!result.toString().equals(toSupplementaries("zzzblechzzz"))) 3243 failCount++; 3244 3245 // SB substitution with groups 3246 blah = toSupplementaries("zzzabcdzzz"); 3247 p = Pattern.compile(toSupplementaries("(ab)(cd)*")); 3248 m = p.matcher(blah); 3249 result = new StringBuilder(); 3250 try { 3251 m.appendReplacement(result, "$1"); 3252 failCount++; 3253 } catch (IllegalStateException e) { 3254 } 3255 m.find(); 3256 m.appendReplacement(result, "$1"); 3257 if (!result.toString().equals(toSupplementaries("zzzab"))) 3258 failCount++; 3259 3260 m.appendTail(result); 3261 if (!result.toString().equals(toSupplementaries("zzzabzzz"))) 3262 failCount++; 3263 3264 // SB substitution with 3 groups 3265 blah = toSupplementaries("zzzabcdcdefzzz"); 3266 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3267 m = p.matcher(blah); 3268 result = new StringBuilder(); 3269 try { 3270 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3271 failCount++; 3272 } catch (IllegalStateException e) { 3273 } 3274 m.find(); 3275 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3276 if (!result.toString().equals(toSupplementaries("zzzabwcdwef"))) 3277 failCount++; 3278 3279 m.appendTail(result); 3280 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz"))) 3281 failCount++; 3282 3283 // SB substitution with groups and three matches 3284 // skipping middle match 3285 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz"); 3286 p = Pattern.compile(toSupplementaries("(ab)(cd*)")); 3287 m = p.matcher(blah); 3288 result = new StringBuilder(); 3289 try { 3290 m.appendReplacement(result, "$1"); 3291 failCount++; 3292 } catch (IllegalStateException e) { 3293 } 3294 m.find(); 3295 m.appendReplacement(result, "$1"); 3296 if (!result.toString().equals(toSupplementaries("zzzab"))) 3297 failCount++; 3298 3299 m.find(); 3300 m.find(); 3301 m.appendReplacement(result, "$2"); 3302 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd"))) 3303 failCount++; 3304 3305 m.appendTail(result); 3306 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz"))) 3307 failCount++; 3308 3309 // Check to make sure escaped $ is ignored 3310 blah = toSupplementaries("zzzabcdcdefzzz"); 3311 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3312 m = p.matcher(blah); 3313 result = new StringBuilder(); 3314 m.find(); 3315 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3")); 3316 if (!result.toString().equals(toSupplementaries("zzzabw$2wef"))) 3317 failCount++; 3318 3319 m.appendTail(result); 3320 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz"))) 3321 failCount++; 3322 3323 // Check to make sure a reference to nonexistent group causes error 3324 blah = toSupplementaries("zzzabcdcdefzzz"); 3325 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3326 m = p.matcher(blah); 3327 result = new StringBuilder(); 3328 m.find(); 3329 try { 3330 m.appendReplacement(result, toSupplementaries("$1w$5w$3")); 3331 failCount++; 3332 } catch (IndexOutOfBoundsException ioobe) { 3333 // Correct result 3334 } 3335 // Check double digit group references 3336 blah = toSupplementaries("zzz123456789101112zzz"); 3337 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3338 m = p.matcher(blah); 3339 result = new StringBuilder(); 3340 m.find(); 3341 m.appendReplacement(result, toSupplementaries("$1w$11w$3")); 3342 if (!result.toString().equals(toSupplementaries("zzz1w11w3"))) 3343 failCount++; 3344 3345 // Check to make sure it backs off $15 to $1 if only three groups 3346 blah = toSupplementaries("zzzabcdcdefzzz"); 3347 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3348 m = p.matcher(blah); 3349 result = new StringBuilder(); 3350 m.find(); 3351 m.appendReplacement(result, toSupplementaries("$1w$15w$3")); 3352 if (!result.toString().equals(toSupplementaries("zzzabwab5wef"))) 3353 failCount++; 3354 // Check nothing has been appended into the output buffer if 3355 // the replacement string triggers IllegalArgumentException. 3356 p = Pattern.compile("(abc)"); 3357 m = p.matcher("abcd"); 3358 result = new StringBuilder(); 3359 m.find(); 3360 try { 3361 m.appendReplacement(result, ("xyz$g")); 3362 failCount++; 3363 } catch (IllegalArgumentException iae) { 3364 if (result.length() != 0) 3365 failCount++; 3366 } 3367 report("SB Substitution 2"); 3368 } 3369 3370 /* 3371 * 5 groups of characters are created to make a substitution string. 3372 * A base string will be created including random lead chars, the 3373 * substitution string, and random trailing chars. 3374 * A pattern containing the 5 groups is searched for and replaced with: 3375 * random group + random string + random group. 3376 * The results are checked for correctness. 3377 */ 3378 private static void substitutionBasher() { 3379 for (int runs = 0; runs<1000; runs++) { 3380 // Create a base string to work in 3381 int leadingChars = generator.nextInt(10); 3382 StringBuffer baseBuffer = new StringBuffer(100); 3383 String leadingString = getRandomAlphaString(leadingChars); 3384 baseBuffer.append(leadingString); 3385 3386 // Create 5 groups of random number of random chars 3387 // Create the string to substitute 3388 // Create the pattern string to search for 3389 StringBuffer bufferToSub = new StringBuffer(25); 3390 StringBuffer bufferToPat = new StringBuffer(50); 3391 String[] groups = new String[5]; 3392 for(int i=0; i<5; i++) { 3393 int aGroupSize = generator.nextInt(5)+1; 3394 groups[i] = getRandomAlphaString(aGroupSize); 3395 bufferToSub.append(groups[i]); 3396 bufferToPat.append('('); 3397 bufferToPat.append(groups[i]); 3398 bufferToPat.append(')'); 3399 } 3400 String stringToSub = bufferToSub.toString(); 3401 String pattern = bufferToPat.toString(); 3402 3403 // Place sub string into working string at random index 3404 baseBuffer.append(stringToSub); 3405 3406 // Append random chars to end 3407 int trailingChars = generator.nextInt(10); 3408 String trailingString = getRandomAlphaString(trailingChars); 3409 baseBuffer.append(trailingString); 3410 String baseString = baseBuffer.toString(); 3411 3412 // Create test pattern and matcher 3413 Pattern p = Pattern.compile(pattern); 3414 Matcher m = p.matcher(baseString); 3415 3416 // Reject candidate if pattern happens to start early 3417 m.find(); 3418 if (m.start() < leadingChars) 3419 continue; 3420 3421 // Reject candidate if more than one match 3422 if (m.find()) 3423 continue; 3424 3425 // Construct a replacement string with : 3426 // random group + random string + random group 3427 StringBuffer bufferToRep = new StringBuffer(); 3428 int groupIndex1 = generator.nextInt(5); 3429 bufferToRep.append("$" + (groupIndex1 + 1)); 3430 String randomMidString = getRandomAlphaString(5); 3431 bufferToRep.append(randomMidString); 3432 int groupIndex2 = generator.nextInt(5); 3433 bufferToRep.append("$" + (groupIndex2 + 1)); 3434 String replacement = bufferToRep.toString(); 3435 3436 // Do the replacement 3437 String result = m.replaceAll(replacement); 3438 3439 // Construct expected result 3440 StringBuffer bufferToRes = new StringBuffer(); 3441 bufferToRes.append(leadingString); 3442 bufferToRes.append(groups[groupIndex1]); 3443 bufferToRes.append(randomMidString); 3444 bufferToRes.append(groups[groupIndex2]); 3445 bufferToRes.append(trailingString); 3446 String expectedResult = bufferToRes.toString(); 3447 3448 // Check results 3449 if (!result.equals(expectedResult)) 3450 failCount++; 3451 } 3452 3453 report("Substitution Basher"); 3454 } 3455 3456 /* 3457 * 5 groups of characters are created to make a substitution string. 3458 * A base string will be created including random lead chars, the 3459 * substitution string, and random trailing chars. 3460 * A pattern containing the 5 groups is searched for and replaced with: 3461 * random group + random string + random group. 3462 * The results are checked for correctness. 3463 */ 3464 private static void substitutionBasher2() { 3465 for (int runs = 0; runs<1000; runs++) { 3466 // Create a base string to work in 3467 int leadingChars = generator.nextInt(10); 3468 StringBuilder baseBuffer = new StringBuilder(100); 3469 String leadingString = getRandomAlphaString(leadingChars); 3470 baseBuffer.append(leadingString); 3471 3472 // Create 5 groups of random number of random chars 3473 // Create the string to substitute 3474 // Create the pattern string to search for 3475 StringBuilder bufferToSub = new StringBuilder(25); 3476 StringBuilder bufferToPat = new StringBuilder(50); 3477 String[] groups = new String[5]; 3478 for(int i=0; i<5; i++) { 3479 int aGroupSize = generator.nextInt(5)+1; 3480 groups[i] = getRandomAlphaString(aGroupSize); 3481 bufferToSub.append(groups[i]); 3482 bufferToPat.append('('); 3483 bufferToPat.append(groups[i]); 3484 bufferToPat.append(')'); 3485 } 3486 String stringToSub = bufferToSub.toString(); 3487 String pattern = bufferToPat.toString(); 3488 3489 // Place sub string into working string at random index 3490 baseBuffer.append(stringToSub); 3491 3492 // Append random chars to end 3493 int trailingChars = generator.nextInt(10); 3494 String trailingString = getRandomAlphaString(trailingChars); 3495 baseBuffer.append(trailingString); 3496 String baseString = baseBuffer.toString(); 3497 3498 // Create test pattern and matcher 3499 Pattern p = Pattern.compile(pattern); 3500 Matcher m = p.matcher(baseString); 3501 3502 // Reject candidate if pattern happens to start early 3503 m.find(); 3504 if (m.start() < leadingChars) 3505 continue; 3506 3507 // Reject candidate if more than one match 3508 if (m.find()) 3509 continue; 3510 3511 // Construct a replacement string with : 3512 // random group + random string + random group 3513 StringBuilder bufferToRep = new StringBuilder(); 3514 int groupIndex1 = generator.nextInt(5); 3515 bufferToRep.append("$" + (groupIndex1 + 1)); 3516 String randomMidString = getRandomAlphaString(5); 3517 bufferToRep.append(randomMidString); 3518 int groupIndex2 = generator.nextInt(5); 3519 bufferToRep.append("$" + (groupIndex2 + 1)); 3520 String replacement = bufferToRep.toString(); 3521 3522 // Do the replacement 3523 String result = m.replaceAll(replacement); 3524 3525 // Construct expected result 3526 StringBuilder bufferToRes = new StringBuilder(); 3527 bufferToRes.append(leadingString); 3528 bufferToRes.append(groups[groupIndex1]); 3529 bufferToRes.append(randomMidString); 3530 bufferToRes.append(groups[groupIndex2]); 3531 bufferToRes.append(trailingString); 3532 String expectedResult = bufferToRes.toString(); 3533 3534 // Check results 3535 if (!result.equals(expectedResult)) { 3536 failCount++; 3537 } 3538 } 3539 3540 report("Substitution Basher 2"); 3541 } 3542 3543 /** 3544 * Checks the handling of some escape sequences that the Pattern 3545 * class should process instead of the java compiler. These are 3546 * not in the file because the escapes should be be processed 3547 * by the Pattern class when the regex is compiled. 3548 */ 3549 private static void escapes() throws Exception { 3550 Pattern p = Pattern.compile("\\043"); 3551 Matcher m = p.matcher("#"); 3552 if (!m.find()) 3553 failCount++; 3554 3555 p = Pattern.compile("\\x23"); 3556 m = p.matcher("#"); 3557 if (!m.find()) 3558 failCount++; 3559 3560 p = Pattern.compile("\\u0023"); 3561 m = p.matcher("#"); 3562 if (!m.find()) 3563 failCount++; 3564 3565 report("Escape sequences"); 3566 } 3567 3568 /** 3569 * Checks the handling of blank input situations. These 3570 * tests are incompatible with my test file format. 3571 */ 3572 private static void blankInput() throws Exception { 3573 Pattern p = Pattern.compile("abc", Pattern.CASE_INSENSITIVE); 3574 Matcher m = p.matcher(""); 3575 if (m.find()) 3576 failCount++; 3577 3578 p = Pattern.compile("a*", Pattern.CASE_INSENSITIVE); 3579 m = p.matcher(""); 3580 if (!m.find()) 3581 failCount++; 3582 3583 p = Pattern.compile("abc"); 3584 m = p.matcher(""); 3585 if (m.find()) 3586 failCount++; 3587 3588 p = Pattern.compile("a*"); 3589 m = p.matcher(""); 3590 if (!m.find()) 3591 failCount++; 3592 3593 report("Blank input"); 3594 } 3595 3596 /** 3597 * Tests the Boyer-Moore pattern matching of a character sequence 3598 * on randomly generated patterns. 3599 */ 3600 private static void bm() throws Exception { 3601 doBnM('a'); 3602 report("Boyer Moore (ASCII)"); 3603 3604 doBnM(Character.MIN_SUPPLEMENTARY_CODE_POINT - 10); 3605 report("Boyer Moore (Supplementary)"); 3606 } 3607 3608 private static void doBnM(int baseCharacter) throws Exception { 3609 int achar=0; 3610 3611 for (int i=0; i<100; i++) { 3612 // Create a short pattern to search for 3613 int patternLength = generator.nextInt(7) + 4; 3614 StringBuffer patternBuffer = new StringBuffer(patternLength); 3615 String pattern; 3616 retry: for (;;) { 3617 for (int x=0; x<patternLength; x++) { 3618 int ch = baseCharacter + generator.nextInt(26); 3619 if (Character.isSupplementaryCodePoint(ch)) { 3620 patternBuffer.append(Character.toChars(ch)); 3621 } else { 3622 patternBuffer.append((char)ch); 3623 } 3624 } 3625 pattern = patternBuffer.toString(); 3626 3627 // Avoid patterns that start and end with the same substring 3628 // See JDK-6854417 3629 for (int x=1; x < pattern.length(); x++) { 3630 if (pattern.startsWith(pattern.substring(x))) 3631 continue retry; 3632 } 3633 break; 3634 } 3635 Pattern p = Pattern.compile(pattern); 3636 3637 // Create a buffer with random ASCII chars that does 3638 // not match the sample 3639 String toSearch = null; 3640 StringBuffer s = null; 3641 Matcher m = p.matcher(""); 3642 do { 3643 s = new StringBuffer(100); 3644 for (int x=0; x<100; x++) { 3645 int ch = baseCharacter + generator.nextInt(26); 3646 if (Character.isSupplementaryCodePoint(ch)) { 3647 s.append(Character.toChars(ch)); 3648 } else { 3649 s.append((char)ch); 3650 } 3651 } 3652 toSearch = s.toString(); 3653 m.reset(toSearch); 3654 } while (m.find()); 3655 3656 // Insert the pattern at a random spot 3657 int insertIndex = generator.nextInt(99); 3658 if (Character.isLowSurrogate(s.charAt(insertIndex))) 3659 insertIndex++; 3660 s = s.insert(insertIndex, pattern); 3661 toSearch = s.toString(); 3662 3663 // Make sure that the pattern is found 3664 m.reset(toSearch); 3665 if (!m.find()) 3666 failCount++; 3667 3668 // Make sure that the match text is the pattern 3669 if (!m.group().equals(pattern)) 3670 failCount++; 3671 3672 // Make sure match occured at insertion point 3673 if (m.start() != insertIndex) 3674 failCount++; 3675 } 3676 } 3677 3678 /** 3679 * Tests the matching of slices on randomly generated patterns. 3680 * The Boyer-Moore optimization is not done on these patterns 3681 * because it uses unicode case folding. 3682 */ 3683 private static void slice() throws Exception { 3684 doSlice(Character.MAX_VALUE); 3685 report("Slice"); 3686 3687 doSlice(Character.MAX_CODE_POINT); 3688 report("Slice (Supplementary)"); 3689 } 3690 3691 private static void doSlice(int maxCharacter) throws Exception { 3692 Random generator = new Random(); 3693 int achar=0; 3694 3695 for (int i=0; i<100; i++) { 3696 // Create a short pattern to search for 3697 int patternLength = generator.nextInt(7) + 4; 3698 StringBuffer patternBuffer = new StringBuffer(patternLength); 3699 for (int x=0; x<patternLength; x++) { 3700 int randomChar = 0; 3701 while (!Character.isLetterOrDigit(randomChar)) 3702 randomChar = generator.nextInt(maxCharacter); 3703 if (Character.isSupplementaryCodePoint(randomChar)) { 3704 patternBuffer.append(Character.toChars(randomChar)); 3705 } else { 3706 patternBuffer.append((char) randomChar); 3707 } 3708 } 3709 String pattern = patternBuffer.toString(); 3710 Pattern p = Pattern.compile(pattern, Pattern.UNICODE_CASE); 3711 3712 // Create a buffer with random chars that does not match the sample 3713 String toSearch = null; 3714 StringBuffer s = null; 3715 Matcher m = p.matcher(""); 3716 do { 3717 s = new StringBuffer(100); 3718 for (int x=0; x<100; x++) { 3719 int randomChar = 0; 3720 while (!Character.isLetterOrDigit(randomChar)) 3721 randomChar = generator.nextInt(maxCharacter); 3722 if (Character.isSupplementaryCodePoint(randomChar)) { 3723 s.append(Character.toChars(randomChar)); 3724 } else { 3725 s.append((char) randomChar); 3726 } 3727 } 3728 toSearch = s.toString(); 3729 m.reset(toSearch); 3730 } while (m.find()); 3731 3732 // Insert the pattern at a random spot 3733 int insertIndex = generator.nextInt(99); 3734 if (Character.isLowSurrogate(s.charAt(insertIndex))) 3735 insertIndex++; 3736 s = s.insert(insertIndex, pattern); 3737 toSearch = s.toString(); 3738 3739 // Make sure that the pattern is found 3740 m.reset(toSearch); 3741 if (!m.find()) 3742 failCount++; 3743 3744 // Make sure that the match text is the pattern 3745 if (!m.group().equals(pattern)) 3746 failCount++; 3747 3748 // Make sure match occured at insertion point 3749 if (m.start() != insertIndex) 3750 failCount++; 3751 } 3752 } 3753 3754 private static void explainFailure(String pattern, String data, 3755 String expected, String actual) { 3756 System.err.println("----------------------------------------"); 3757 System.err.println("Pattern = "+pattern); 3758 System.err.println("Data = "+data); 3759 System.err.println("Expected = " + expected); 3760 System.err.println("Actual = " + actual); 3761 } 3762 3763 private static void explainFailure(String pattern, String data, 3764 Throwable t) { 3765 System.err.println("----------------------------------------"); 3766 System.err.println("Pattern = "+pattern); 3767 System.err.println("Data = "+data); 3768 t.printStackTrace(System.err); 3769 } 3770 3771 // Testing examples from a file 3772 3773 /** 3774 * Goes through the file "TestCases.txt" and creates many patterns 3775 * described in the file, matching the patterns against input lines in 3776 * the file, and comparing the results against the correct results 3777 * also found in the file. The file format is described in comments 3778 * at the head of the file. 3779 */ 3780 private static void processFile(String fileName) throws Exception { 3781 File testCases = new File(System.getProperty("test.src", "."), 3782 fileName); 3783 FileInputStream in = new FileInputStream(testCases); 3784 BufferedReader r = new BufferedReader(new InputStreamReader(in)); 3785 3786 // Process next test case. 3787 String aLine; 3788 while((aLine = r.readLine()) != null) { 3789 // Read a line for pattern 3790 String patternString = grabLine(r); 3791 Pattern p = null; 3792 try { 3793 p = compileTestPattern(patternString); 3794 } catch (PatternSyntaxException e) { 3795 String dataString = grabLine(r); 3796 String expectedResult = grabLine(r); 3797 if (expectedResult.startsWith("error")) 3798 continue; 3799 explainFailure(patternString, dataString, e); 3800 failCount++; 3801 continue; 3802 } 3803 3804 // Read a line for input string 3805 String dataString = grabLine(r); 3806 Matcher m = p.matcher(dataString); 3807 StringBuffer result = new StringBuffer(); 3808 3809 // Check for IllegalStateExceptions before a match 3810 failCount += preMatchInvariants(m); 3811 3812 boolean found = m.find(); 3813 3814 if (found) 3815 failCount += postTrueMatchInvariants(m); 3816 else 3817 failCount += postFalseMatchInvariants(m); 3818 3819 if (found) { 3820 result.append("true "); 3821 result.append(m.group(0) + " "); 3822 } else { 3823 result.append("false "); 3824 } 3825 3826 result.append(m.groupCount()); 3827 3828 if (found) { 3829 for (int i=1; i<m.groupCount()+1; i++) 3830 if (m.group(i) != null) 3831 result.append(" " +m.group(i)); 3832 } 3833 3834 // Read a line for the expected result 3835 String expectedResult = grabLine(r); 3836 3837 if (!result.toString().equals(expectedResult)) { 3838 explainFailure(patternString, dataString, expectedResult, result.toString()); 3839 failCount++; 3840 } 3841 } 3842 3843 report(fileName); 3844 } 3845 3846 private static int preMatchInvariants(Matcher m) { 3847 int failCount = 0; 3848 try { 3849 m.start(); 3850 failCount++; 3851 } catch (IllegalStateException ise) {} 3852 try { 3853 m.end(); 3854 failCount++; 3855 } catch (IllegalStateException ise) {} 3856 try { 3857 m.group(); 3858 failCount++; 3859 } catch (IllegalStateException ise) {} 3860 return failCount; 3861 } 3862 3863 private static int postFalseMatchInvariants(Matcher m) { 3864 int failCount = 0; 3865 try { 3866 m.group(); 3867 failCount++; 3868 } catch (IllegalStateException ise) {} 3869 try { 3870 m.start(); 3871 failCount++; 3872 } catch (IllegalStateException ise) {} 3873 try { 3874 m.end(); 3875 failCount++; 3876 } catch (IllegalStateException ise) {} 3877 return failCount; 3878 } 3879 3880 private static int postTrueMatchInvariants(Matcher m) { 3881 int failCount = 0; 3882 //assert(m.start() = m.start(0); 3883 if (m.start() != m.start(0)) 3884 failCount++; 3885 //assert(m.end() = m.end(0); 3886 if (m.start() != m.start(0)) 3887 failCount++; 3888 //assert(m.group() = m.group(0); 3889 if (!m.group().equals(m.group(0))) 3890 failCount++; 3891 try { 3892 m.group(50); 3893 failCount++; 3894 } catch (IndexOutOfBoundsException ise) {} 3895 3896 return failCount; 3897 } 3898 3899 private static Pattern compileTestPattern(String patternString) { 3900 if (!patternString.startsWith("'")) { 3901 return Pattern.compile(patternString); 3902 } 3903 int break1 = patternString.lastIndexOf("'"); 3904 String flagString = patternString.substring( 3905 break1+1, patternString.length()); 3906 patternString = patternString.substring(1, break1); 3907 3908 if (flagString.equals("i")) 3909 return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE); 3910 3911 if (flagString.equals("m")) 3912 return Pattern.compile(patternString, Pattern.MULTILINE); 3913 3914 return Pattern.compile(patternString); 3915 } 3916 3917 /** 3918 * Reads a line from the input file. Keeps reading lines until a non 3919 * empty non comment line is read. If the line contains a \n then 3920 * these two characters are replaced by a newline char. If a \\uxxxx 3921 * sequence is read then the sequence is replaced by the unicode char. 3922 */ 3923 private static String grabLine(BufferedReader r) throws Exception { 3924 int index = 0; 3925 String line = r.readLine(); 3926 while (line.startsWith("//") || line.length() < 1) 3927 line = r.readLine(); 3928 while ((index = line.indexOf("\\n")) != -1) { 3929 StringBuffer temp = new StringBuffer(line); 3930 temp.replace(index, index+2, "\n"); 3931 line = temp.toString(); 3932 } 3933 while ((index = line.indexOf("\\u")) != -1) { 3934 StringBuffer temp = new StringBuffer(line); 3935 String value = temp.substring(index+2, index+6); 3936 char aChar = (char)Integer.parseInt(value, 16); 3937 String unicodeChar = "" + aChar; 3938 temp.replace(index, index+6, unicodeChar); 3939 line = temp.toString(); 3940 } 3941 3942 return line; 3943 } 3944 3945 private static void check(Pattern p, String s, String g, String expected) { 3946 Matcher m = p.matcher(s); 3947 m.find(); 3948 if (!m.group(g).equals(expected) || 3949 s.charAt(m.start(g)) != expected.charAt(0) || 3950 s.charAt(m.end(g) - 1) != expected.charAt(expected.length() - 1)) 3951 failCount++; 3952 } 3953 3954 private static void checkReplaceFirst(String p, String s, String r, String expected) 3955 { 3956 if (!expected.equals(Pattern.compile(p) 3957 .matcher(s) 3958 .replaceFirst(r))) 3959 failCount++; 3960 } 3961 3962 private static void checkReplaceAll(String p, String s, String r, String expected) 3963 { 3964 if (!expected.equals(Pattern.compile(p) 3965 .matcher(s) 3966 .replaceAll(r))) 3967 failCount++; 3968 } 3969 3970 private static void checkExpectedFail(String p) { 3971 try { 3972 Pattern.compile(p); 3973 } catch (PatternSyntaxException pse) { 3974 //pse.printStackTrace(); 3975 return; 3976 } 3977 failCount++; 3978 } 3979 3980 private static void checkExpectedIAE(Matcher m, String g) { 3981 m.find(); 3982 try { 3983 m.group(g); 3984 } catch (IllegalArgumentException x) { 3985 //iae.printStackTrace(); 3986 try { 3987 m.start(g); 3988 } catch (IllegalArgumentException xx) { 3989 try { 3990 m.start(g); 3991 } catch (IllegalArgumentException xxx) { 3992 return; 3993 } 3994 } 3995 } 3996 failCount++; 3997 } 3998 3999 private static void checkExpectedNPE(Matcher m) { 4000 m.find(); 4001 try { 4002 m.group(null); 4003 } catch (NullPointerException x) { 4004 try { 4005 m.start(null); 4006 } catch (NullPointerException xx) { 4007 try { 4008 m.end(null); 4009 } catch (NullPointerException xxx) { 4010 return; 4011 } 4012 } 4013 } 4014 failCount++; 4015 } 4016 4017 private static void namedGroupCaptureTest() throws Exception { 4018 check(Pattern.compile("x+(?<gname>y+)z+"), 4019 "xxxyyyzzz", 4020 "gname", 4021 "yyy"); 4022 4023 check(Pattern.compile("x+(?<gname8>y+)z+"), 4024 "xxxyyyzzz", 4025 "gname8", 4026 "yyy"); 4027 4028 //backref 4029 Pattern pattern = Pattern.compile("(a*)bc\\1"); 4030 check(pattern, "zzzaabcazzz", true); // found "abca" 4031 4032 check(Pattern.compile("(?<gname>a*)bc\\k<gname>"), 4033 "zzzaabcaazzz", true); 4034 4035 check(Pattern.compile("(?<gname>abc)(def)\\k<gname>"), 4036 "abcdefabc", true); 4037 4038 check(Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(?<gname>k)\\k<gname>"), 4039 "abcdefghijkk", true); 4040 4041 // Supplementary character tests 4042 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"), 4043 toSupplementaries("zzzaabcazzz"), true); 4044 4045 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"), 4046 toSupplementaries("zzzaabcaazzz"), true); 4047 4048 check(Pattern.compile("(?<gname>" + toSupplementaries("abc)(def)") + "\\k<gname>"), 4049 toSupplementaries("abcdefabc"), true); 4050 4051 check(Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)") + 4052 "(?<gname>" + 4053 toSupplementaries("k)") + "\\k<gname>"), 4054 toSupplementaries("abcdefghijkk"), true); 4055 4056 check(Pattern.compile("x+(?<gname>y+)z+\\k<gname>"), 4057 "xxxyyyzzzyyy", 4058 "gname", 4059 "yyy"); 4060 4061 //replaceFirst/All 4062 checkReplaceFirst("(?<gn>ab)(c*)", 4063 "abccczzzabcczzzabccc", 4064 "${gn}", 4065 "abzzzabcczzzabccc"); 4066 4067 checkReplaceAll("(?<gn>ab)(c*)", 4068 "abccczzzabcczzzabccc", 4069 "${gn}", 4070 "abzzzabzzzab"); 4071 4072 4073 checkReplaceFirst("(?<gn>ab)(c*)", 4074 "zzzabccczzzabcczzzabccczzz", 4075 "${gn}", 4076 "zzzabzzzabcczzzabccczzz"); 4077 4078 checkReplaceAll("(?<gn>ab)(c*)", 4079 "zzzabccczzzabcczzzabccczzz", 4080 "${gn}", 4081 "zzzabzzzabzzzabzzz"); 4082 4083 checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)", 4084 "zzzabccczzzabcczzzabccczzz", 4085 "${gn2}", 4086 "zzzccczzzabcczzzabccczzz"); 4087 4088 checkReplaceAll("(?<gn1>ab)(?<gn2>c*)", 4089 "zzzabccczzzabcczzzabccczzz", 4090 "${gn2}", 4091 "zzzccczzzcczzzccczzz"); 4092 4093 //toSupplementaries("(ab)(c*)")); 4094 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + 4095 ")(?<gn2>" + toSupplementaries("c") + "*)", 4096 toSupplementaries("abccczzzabcczzzabccc"), 4097 "${gn1}", 4098 toSupplementaries("abzzzabcczzzabccc")); 4099 4100 4101 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + 4102 ")(?<gn2>" + toSupplementaries("c") + "*)", 4103 toSupplementaries("abccczzzabcczzzabccc"), 4104 "${gn1}", 4105 toSupplementaries("abzzzabzzzab")); 4106 4107 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + 4108 ")(?<gn2>" + toSupplementaries("c") + "*)", 4109 toSupplementaries("abccczzzabcczzzabccc"), 4110 "${gn2}", 4111 toSupplementaries("ccczzzabcczzzabccc")); 4112 4113 4114 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + 4115 ")(?<gn2>" + toSupplementaries("c") + "*)", 4116 toSupplementaries("abccczzzabcczzzabccc"), 4117 "${gn2}", 4118 toSupplementaries("ccczzzcczzzccc")); 4119 4120 checkReplaceFirst("(?<dog>Dog)AndCat", 4121 "zzzDogAndCatzzzDogAndCatzzz", 4122 "${dog}", 4123 "zzzDogzzzDogAndCatzzz"); 4124 4125 4126 checkReplaceAll("(?<dog>Dog)AndCat", 4127 "zzzDogAndCatzzzDogAndCatzzz", 4128 "${dog}", 4129 "zzzDogzzzDogzzz"); 4130 4131 // backref in Matcher & String 4132 if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") || 4133 !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh")) 4134 failCount++; 4135 4136 // negative 4137 checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)"); 4138 checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)"); 4139 checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)"); 4140 checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>"); 4141 checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>"); 4142 checkExpectedIAE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"), 4143 "gnameX"); 4144 checkExpectedNPE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef")); 4145 report("NamedGroupCapture"); 4146 } 4147 4148 // This is for bug 6919132 4149 private static void nonBmpClassComplementTest() throws Exception { 4150 Pattern p = Pattern.compile("\\P{Lu}"); 4151 Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4152 4153 if (m.find() && m.start() == 1) 4154 failCount++; 4155 4156 // from a unicode category 4157 p = Pattern.compile("\\P{Lu}"); 4158 m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4159 if (m.find()) 4160 failCount++; 4161 if (!m.hitEnd()) 4162 failCount++; 4163 4164 // block 4165 p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}"); 4166 m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4167 if (m.find() && m.start() == 1) 4168 failCount++; 4169 4170 p = Pattern.compile("\\P{sc=GRANTHA}"); 4171 m = p.matcher(new String(new int[] {0x11350}, 0, 1)); 4172 if (m.find() && m.start() == 1) 4173 failCount++; 4174 4175 report("NonBmpClassComplement"); 4176 } 4177 4178 private static void unicodePropertiesTest() throws Exception { 4179 // different forms 4180 if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() || 4181 !Pattern.compile("\\p{Lu}").matcher("A").matches() || 4182 !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() || 4183 !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() || 4184 !Pattern.compile("\\p{IsLatin}").matcher("B").matches() || 4185 !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() || 4186 !Pattern.compile("\\p{script=Latin}").matcher("B").matches() || 4187 !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() || 4188 !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() || 4189 !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches()) 4190 failCount++; 4191 4192 Matcher common = Pattern.compile("\\p{script=Common}").matcher(""); 4193 Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher(""); 4194 Matcher lastSM = common; 4195 Character.UnicodeScript lastScript = Character.UnicodeScript.of(0); 4196 4197 Matcher latin = Pattern.compile("\\p{block=basic_latin}").matcher(""); 4198 Matcher greek = Pattern.compile("\\p{InGreek}").matcher(""); 4199 Matcher lastBM = latin; 4200 Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0); 4201 4202 for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) { 4203 if (cp >= 0x30000 && (cp & 0x70) == 0){ 4204 continue; // only pick couple code points, they are the same 4205 } 4206 4207 // Unicode Script 4208 Character.UnicodeScript script = Character.UnicodeScript.of(cp); 4209 Matcher m; 4210 String str = new String(Character.toChars(cp)); 4211 if (script == lastScript) { 4212 m = lastSM; 4213 m.reset(str); 4214 } else { 4215 m = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str); 4216 } 4217 if (!m.matches()) { 4218 failCount++; 4219 } 4220 Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common; 4221 other.reset(str); 4222 if (other.matches()) { 4223 failCount++; 4224 } 4225 lastSM = m; 4226 lastScript = script; 4227 4228 // Unicode Block 4229 Character.UnicodeBlock block = Character.UnicodeBlock.of(cp); 4230 if (block == null) { 4231 //System.out.printf("Not a Block: cp=%x%n", cp); 4232 continue; 4233 } 4234 if (block == lastBlock) { 4235 m = lastBM; 4236 m.reset(str); 4237 } else { 4238 m = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str); 4239 } 4240 if (!m.matches()) { 4241 failCount++; 4242 } 4243 other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin; 4244 other.reset(str); 4245 if (other.matches()) { 4246 failCount++; 4247 } 4248 lastBM = m; 4249 lastBlock = block; 4250 } 4251 report("unicodeProperties"); 4252 } 4253 4254 private static void unicodeHexNotationTest() throws Exception { 4255 4256 // negative 4257 checkExpectedFail("\\x{-23}"); 4258 checkExpectedFail("\\x{110000}"); 4259 checkExpectedFail("\\x{}"); 4260 checkExpectedFail("\\x{AB[ef]"); 4261 4262 // codepoint 4263 check("^\\x{1033c}$", "\uD800\uDF3C", true); 4264 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false); 4265 check("^\\x{D800}\\x{DF3c}+$", "\uD800\uDF3C", false); 4266 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false); 4267 4268 // in class 4269 check("^[\\x{D800}\\x{DF3c}]+$", "\uD800\uDF3C", false); 4270 check("^[\\xF0\\x90\\x8C\\xBC]+$", "\uD800\uDF3C", false); 4271 check("^[\\x{D800}\\x{DF3C}]+$", "\uD800\uDF3C", false); 4272 check("^[\\x{DF3C}\\x{D800}]+$", "\uD800\uDF3C", false); 4273 check("^[\\x{D800}\\x{DF3C}]+$", "\uDF3C\uD800", true); 4274 check("^[\\x{DF3C}\\x{D800}]+$", "\uDF3C\uD800", true); 4275 4276 for (int cp = 0; cp <= 0x10FFFF; cp++) { 4277 String s = "A" + new String(Character.toChars(cp)) + "B"; 4278 String hexUTF16 = (cp <= 0xFFFF)? String.format("\\u%04x", cp) 4279 : String.format("\\u%04x\\u%04x", 4280 (int) Character.toChars(cp)[0], 4281 (int) Character.toChars(cp)[1]); 4282 String hexCodePoint = "\\x{" + Integer.toHexString(cp) + "}"; 4283 if (!Pattern.matches("A" + hexUTF16 + "B", s)) 4284 failCount++; 4285 if (!Pattern.matches("A[" + hexUTF16 + "]B", s)) 4286 failCount++; 4287 if (!Pattern.matches("A" + hexCodePoint + "B", s)) 4288 failCount++; 4289 if (!Pattern.matches("A[" + hexCodePoint + "]B", s)) 4290 failCount++; 4291 } 4292 report("unicodeHexNotation"); 4293 } 4294 4295 private static void unicodeClassesTest() throws Exception { 4296 4297 Matcher lower = Pattern.compile("\\p{Lower}").matcher(""); 4298 Matcher upper = Pattern.compile("\\p{Upper}").matcher(""); 4299 Matcher ASCII = Pattern.compile("\\p{ASCII}").matcher(""); 4300 Matcher alpha = Pattern.compile("\\p{Alpha}").matcher(""); 4301 Matcher digit = Pattern.compile("\\p{Digit}").matcher(""); 4302 Matcher alnum = Pattern.compile("\\p{Alnum}").matcher(""); 4303 Matcher punct = Pattern.compile("\\p{Punct}").matcher(""); 4304 Matcher graph = Pattern.compile("\\p{Graph}").matcher(""); 4305 Matcher print = Pattern.compile("\\p{Print}").matcher(""); 4306 Matcher blank = Pattern.compile("\\p{Blank}").matcher(""); 4307 Matcher cntrl = Pattern.compile("\\p{Cntrl}").matcher(""); 4308 Matcher xdigit = Pattern.compile("\\p{XDigit}").matcher(""); 4309 Matcher space = Pattern.compile("\\p{Space}").matcher(""); 4310 Matcher bound = Pattern.compile("\\b").matcher(""); 4311 Matcher word = Pattern.compile("\\w++").matcher(""); 4312 // UNICODE_CHARACTER_CLASS 4313 Matcher lowerU = Pattern.compile("\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4314 Matcher upperU = Pattern.compile("\\p{Upper}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4315 Matcher ASCIIU = Pattern.compile("\\p{ASCII}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4316 Matcher alphaU = Pattern.compile("\\p{Alpha}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4317 Matcher digitU = Pattern.compile("\\p{Digit}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4318 Matcher alnumU = Pattern.compile("\\p{Alnum}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4319 Matcher punctU = Pattern.compile("\\p{Punct}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4320 Matcher graphU = Pattern.compile("\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4321 Matcher printU = Pattern.compile("\\p{Print}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4322 Matcher blankU = Pattern.compile("\\p{Blank}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4323 Matcher cntrlU = Pattern.compile("\\p{Cntrl}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4324 Matcher xdigitU = Pattern.compile("\\p{XDigit}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4325 Matcher spaceU = Pattern.compile("\\p{Space}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4326 Matcher boundU = Pattern.compile("\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4327 Matcher wordU = Pattern.compile("\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4328 // embedded flag (?U) 4329 Matcher lowerEU = Pattern.compile("(?U)\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4330 Matcher graphEU = Pattern.compile("(?U)\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4331 Matcher wordEU = Pattern.compile("(?U)\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4332 4333 Matcher bwb = Pattern.compile("\\b\\w\\b").matcher(""); 4334 Matcher bwbU = Pattern.compile("\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4335 Matcher bwbEU = Pattern.compile("(?U)\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4336 // properties 4337 Matcher lowerP = Pattern.compile("\\p{IsLowerCase}").matcher(""); 4338 Matcher upperP = Pattern.compile("\\p{IsUpperCase}").matcher(""); 4339 Matcher titleP = Pattern.compile("\\p{IsTitleCase}").matcher(""); 4340 Matcher letterP = Pattern.compile("\\p{IsLetter}").matcher(""); 4341 Matcher alphaP = Pattern.compile("\\p{IsAlphabetic}").matcher(""); 4342 Matcher ideogP = Pattern.compile("\\p{IsIdeographic}").matcher(""); 4343 Matcher cntrlP = Pattern.compile("\\p{IsControl}").matcher(""); 4344 Matcher spaceP = Pattern.compile("\\p{IsWhiteSpace}").matcher(""); 4345 Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher(""); 4346 Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher(""); 4347 Matcher joinCrtl = Pattern.compile("\\p{IsJoinControl}").matcher(""); 4348 4349 // javaMethod 4350 Matcher lowerJ = Pattern.compile("\\p{javaLowerCase}").matcher(""); 4351 Matcher upperJ = Pattern.compile("\\p{javaUpperCase}").matcher(""); 4352 Matcher alphaJ = Pattern.compile("\\p{javaAlphabetic}").matcher(""); 4353 Matcher ideogJ = Pattern.compile("\\p{javaIdeographic}").matcher(""); 4354 4355 for (int cp = 1; cp < 0x30000; cp++) { 4356 String str = new String(Character.toChars(cp)); 4357 int type = Character.getType(cp); 4358 if (// lower 4359 POSIX_ASCII.isLower(cp) != lower.reset(str).matches() || 4360 Character.isLowerCase(cp) != lowerU.reset(str).matches() || 4361 Character.isLowerCase(cp) != lowerP.reset(str).matches() || 4362 Character.isLowerCase(cp) != lowerEU.reset(str).matches()|| 4363 Character.isLowerCase(cp) != lowerJ.reset(str).matches()|| 4364 // upper 4365 POSIX_ASCII.isUpper(cp) != upper.reset(str).matches() || 4366 POSIX_Unicode.isUpper(cp) != upperU.reset(str).matches() || 4367 Character.isUpperCase(cp) != upperP.reset(str).matches() || 4368 Character.isUpperCase(cp) != upperJ.reset(str).matches() || 4369 // alpha 4370 POSIX_ASCII.isAlpha(cp) != alpha.reset(str).matches() || 4371 POSIX_Unicode.isAlpha(cp) != alphaU.reset(str).matches() || 4372 Character.isAlphabetic(cp)!= alphaP.reset(str).matches() || 4373 Character.isAlphabetic(cp)!= alphaJ.reset(str).matches() || 4374 // digit 4375 POSIX_ASCII.isDigit(cp) != digit.reset(str).matches() || 4376 Character.isDigit(cp) != digitU.reset(str).matches() || 4377 // alnum 4378 POSIX_ASCII.isAlnum(cp) != alnum.reset(str).matches() || 4379 POSIX_Unicode.isAlnum(cp) != alnumU.reset(str).matches() || 4380 // punct 4381 POSIX_ASCII.isPunct(cp) != punct.reset(str).matches() || 4382 POSIX_Unicode.isPunct(cp) != punctU.reset(str).matches() || 4383 // graph 4384 POSIX_ASCII.isGraph(cp) != graph.reset(str).matches() || 4385 POSIX_Unicode.isGraph(cp) != graphU.reset(str).matches() || 4386 POSIX_Unicode.isGraph(cp) != graphEU.reset(str).matches()|| 4387 // blank 4388 POSIX_ASCII.isType(cp, POSIX_ASCII.BLANK) 4389 != blank.reset(str).matches() || 4390 POSIX_Unicode.isBlank(cp) != blankU.reset(str).matches() || 4391 // print 4392 POSIX_ASCII.isPrint(cp) != print.reset(str).matches() || 4393 POSIX_Unicode.isPrint(cp) != printU.reset(str).matches() || 4394 // cntrl 4395 POSIX_ASCII.isCntrl(cp) != cntrl.reset(str).matches() || 4396 POSIX_Unicode.isCntrl(cp) != cntrlU.reset(str).matches() || 4397 (Character.CONTROL == type) != cntrlP.reset(str).matches() || 4398 // hexdigit 4399 POSIX_ASCII.isHexDigit(cp) != xdigit.reset(str).matches() || 4400 POSIX_Unicode.isHexDigit(cp) != xdigitU.reset(str).matches() || 4401 // space 4402 POSIX_ASCII.isSpace(cp) != space.reset(str).matches() || 4403 POSIX_Unicode.isSpace(cp) != spaceU.reset(str).matches() || 4404 POSIX_Unicode.isSpace(cp) != spaceP.reset(str).matches() || 4405 // word 4406 POSIX_ASCII.isWord(cp) != word.reset(str).matches() || 4407 POSIX_Unicode.isWord(cp) != wordU.reset(str).matches() || 4408 POSIX_Unicode.isWord(cp) != wordEU.reset(str).matches()|| 4409 // bwordb 4410 POSIX_ASCII.isWord(cp) != bwb.reset(str).matches() || 4411 POSIX_Unicode.isWord(cp) != bwbU.reset(str).matches() || 4412 // properties 4413 Character.isTitleCase(cp) != titleP.reset(str).matches() || 4414 Character.isLetter(cp) != letterP.reset(str).matches()|| 4415 Character.isIdeographic(cp) != ideogP.reset(str).matches() || 4416 Character.isIdeographic(cp) != ideogJ.reset(str).matches() || 4417 (Character.UNASSIGNED == type) == definedP.reset(str).matches() || 4418 POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches() || 4419 POSIX_Unicode.isJoinControl(cp) != joinCrtl.reset(str).matches()) 4420 failCount++; 4421 } 4422 4423 // bounds/word align 4424 twoFindIndexes(" \u0180sherman\u0400 ", bound, 1, 10); 4425 if (!bwbU.reset("\u0180sherman\u0400").matches()) 4426 failCount++; 4427 twoFindIndexes(" \u0180sh\u0345erman\u0400 ", bound, 1, 11); 4428 if (!bwbU.reset("\u0180sh\u0345erman\u0400").matches()) 4429 failCount++; 4430 twoFindIndexes(" \u0724\u0739\u0724 ", bound, 1, 4); 4431 if (!bwbU.reset("\u0724\u0739\u0724").matches()) 4432 failCount++; 4433 if (!bwbEU.reset("\u0724\u0739\u0724").matches()) 4434 failCount++; 4435 report("unicodePredefinedClasses"); 4436 } 4437 4438 private static void unicodeCharacterNameTest() throws Exception { 4439 4440 for (int cp = 0; cp < Character.MAX_CODE_POINT; cp++) { 4441 if (!Character.isValidCodePoint(cp) || 4442 Character.getType(cp) == Character.UNASSIGNED) 4443 continue; 4444 String str = new String(Character.toChars(cp)); 4445 // single 4446 String p = "\\N{" + Character.getName(cp) + "}"; 4447 if (!Pattern.compile(p).matcher(str).matches()) { 4448 failCount++; 4449 } 4450 // class[c] 4451 p = "[\\N{" + Character.getName(cp) + "}]"; 4452 if (!Pattern.compile(p).matcher(str).matches()) { 4453 failCount++; 4454 } 4455 } 4456 4457 // range 4458 for (int i = 0; i < 10; i++) { 4459 int start = generator.nextInt(20); 4460 int end = start + generator.nextInt(200); 4461 String p = "[\\N{" + Character.getName(start) + "}-\\N{" + Character.getName(end) + "}]"; 4462 String str; 4463 for (int cp = start; cp < end; cp++) { 4464 str = new String(Character.toChars(cp)); 4465 if (!Pattern.compile(p).matcher(str).matches()) { 4466 failCount++; 4467 } 4468 } 4469 str = new String(Character.toChars(end + 10)); 4470 if (Pattern.compile(p).matcher(str).matches()) { 4471 failCount++; 4472 } 4473 } 4474 4475 // slice 4476 for (int i = 0; i < 10; i++) { 4477 int n = generator.nextInt(256); 4478 int[] buf = new int[n]; 4479 StringBuffer sb = new StringBuffer(1024); 4480 for (int j = 0; j < n; j++) { 4481 int cp = generator.nextInt(1000); 4482 if (!Character.isValidCodePoint(cp) || 4483 Character.getType(cp) == Character.UNASSIGNED) 4484 cp = 0x4e00; // just use 4e00 4485 sb.append("\\N{" + Character.getName(cp) + "}"); 4486 buf[j] = cp; 4487 } 4488 String p = sb.toString(); 4489 String str = new String(buf, 0, buf.length); 4490 if (!Pattern.compile(p).matcher(str).matches()) { 4491 failCount++; 4492 } 4493 } 4494 report("unicodeCharacterName"); 4495 } 4496 4497 private static void horizontalAndVerticalWSTest() throws Exception { 4498 String hws = new String (new char[] { 4499 0x09, 0x20, 0xa0, 0x1680, 0x180e, 4500 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 4501 0x2006, 0x2007, 0x2008, 0x2009, 0x200a, 4502 0x202f, 0x205f, 0x3000 }); 4503 String vws = new String (new char[] { 4504 0x0a, 0x0b, 0x0c, 0x0d, 0x85, 0x2028, 0x2029 }); 4505 if (!Pattern.compile("\\h+").matcher(hws).matches() || 4506 !Pattern.compile("[\\h]+").matcher(hws).matches()) 4507 failCount++; 4508 if (Pattern.compile("\\H").matcher(hws).find() || 4509 Pattern.compile("[\\H]").matcher(hws).find()) 4510 failCount++; 4511 if (!Pattern.compile("\\v+").matcher(vws).matches() || 4512 !Pattern.compile("[\\v]+").matcher(vws).matches()) 4513 failCount++; 4514 if (Pattern.compile("\\V").matcher(vws).find() || 4515 Pattern.compile("[\\V]").matcher(vws).find()) 4516 failCount++; 4517 String prefix = "abcd"; 4518 String suffix = "efgh"; 4519 String ng = "A"; 4520 for (int i = 0; i < hws.length(); i++) { 4521 String c = String.valueOf(hws.charAt(i)); 4522 Matcher m = Pattern.compile("\\h").matcher(prefix + c + suffix); 4523 if (!m.find() || !c.equals(m.group())) 4524 failCount++; 4525 m = Pattern.compile("[\\h]").matcher(prefix + c + suffix); 4526 if (!m.find() || !c.equals(m.group())) 4527 failCount++; 4528 4529 m = Pattern.compile("\\H").matcher(hws.substring(0, i) + ng + hws.substring(i)); 4530 if (!m.find() || !ng.equals(m.group())) 4531 failCount++; 4532 m = Pattern.compile("[\\H]").matcher(hws.substring(0, i) + ng + hws.substring(i)); 4533 if (!m.find() || !ng.equals(m.group())) 4534 failCount++; 4535 } 4536 for (int i = 0; i < vws.length(); i++) { 4537 String c = String.valueOf(vws.charAt(i)); 4538 Matcher m = Pattern.compile("\\v").matcher(prefix + c + suffix); 4539 if (!m.find() || !c.equals(m.group())) 4540 failCount++; 4541 m = Pattern.compile("[\\v]").matcher(prefix + c + suffix); 4542 if (!m.find() || !c.equals(m.group())) 4543 failCount++; 4544 4545 m = Pattern.compile("\\V").matcher(vws.substring(0, i) + ng + vws.substring(i)); 4546 if (!m.find() || !ng.equals(m.group())) 4547 failCount++; 4548 m = Pattern.compile("[\\V]").matcher(vws.substring(0, i) + ng + vws.substring(i)); 4549 if (!m.find() || !ng.equals(m.group())) 4550 failCount++; 4551 } 4552 // \v in range is interpreted as 0x0B. This is the undocumented behavior 4553 if (!Pattern.compile("[\\v-\\v]").matcher(String.valueOf((char)0x0B)).matches()) 4554 failCount++; 4555 report("horizontalAndVerticalWSTest"); 4556 } 4557 4558 private static void linebreakTest() throws Exception { 4559 String linebreaks = new String (new char[] { 4560 0x0A, 0x0B, 0x0C, 0x0D, 0x85, 0x2028, 0x2029 }); 4561 String crnl = "\r\n"; 4562 if (!Pattern.compile("\\R+").matcher(linebreaks).matches() || 4563 !Pattern.compile("\\R").matcher(crnl).matches() || 4564 Pattern.compile("\\R\\R").matcher(crnl).matches()) 4565 failCount++; 4566 report("linebreakTest"); 4567 } 4568 4569 // #7189363 4570 private static void branchTest() throws Exception { 4571 if (!Pattern.compile("(a)?bc|d").matcher("d").find() || // greedy 4572 !Pattern.compile("(a)+bc|d").matcher("d").find() || 4573 !Pattern.compile("(a)*bc|d").matcher("d").find() || 4574 !Pattern.compile("(a)??bc|d").matcher("d").find() || // reluctant 4575 !Pattern.compile("(a)+?bc|d").matcher("d").find() || 4576 !Pattern.compile("(a)*?bc|d").matcher("d").find() || 4577 !Pattern.compile("(a)?+bc|d").matcher("d").find() || // possessive 4578 !Pattern.compile("(a)++bc|d").matcher("d").find() || 4579 !Pattern.compile("(a)*+bc|d").matcher("d").find() || 4580 !Pattern.compile("(a)?bc|d").matcher("d").matches() || // greedy 4581 !Pattern.compile("(a)+bc|d").matcher("d").matches() || 4582 !Pattern.compile("(a)*bc|d").matcher("d").matches() || 4583 !Pattern.compile("(a)??bc|d").matcher("d").matches() || // reluctant 4584 !Pattern.compile("(a)+?bc|d").matcher("d").matches() || 4585 !Pattern.compile("(a)*?bc|d").matcher("d").matches() || 4586 !Pattern.compile("(a)?+bc|d").matcher("d").matches() || // possessive 4587 !Pattern.compile("(a)++bc|d").matcher("d").matches() || 4588 !Pattern.compile("(a)*+bc|d").matcher("d").matches() || 4589 !Pattern.compile("(a)?bc|de").matcher("de").find() || // others 4590 !Pattern.compile("(a)??bc|de").matcher("de").find() || 4591 !Pattern.compile("(a)?bc|de").matcher("de").matches() || 4592 !Pattern.compile("(a)??bc|de").matcher("de").matches()) 4593 failCount++; 4594 report("branchTest"); 4595 } 4596 4597 // This test is for 8007395 4598 private static void groupCurlyNotFoundSuppTest() throws Exception { 4599 String input = "test this as \ud83d\ude0d"; 4600 for (String pStr : new String[] { "test(.)+(@[a-zA-Z.]+)", 4601 "test(.)*(@[a-zA-Z.]+)", 4602 "test([^B])+(@[a-zA-Z.]+)", 4603 "test([^B])*(@[a-zA-Z.]+)", 4604 "test(\\P{IsControl})+(@[a-zA-Z.]+)", 4605 "test(\\P{IsControl})*(@[a-zA-Z.]+)", 4606 }) { 4607 Matcher m = Pattern.compile(pStr, Pattern.CASE_INSENSITIVE) 4608 .matcher(input); 4609 try { 4610 if (m.find()) { 4611 failCount++; 4612 } 4613 } catch (Exception x) { 4614 failCount++; 4615 } 4616 } 4617 report("GroupCurly NotFoundSupp"); 4618 } 4619 4620 // This test is for 8023647 4621 private static void groupCurlyBackoffTest() throws Exception { 4622 if (!"abc1c".matches("(\\w)+1\\1") || 4623 "abc11".matches("(\\w)+1\\1")) { 4624 failCount++; 4625 } 4626 report("GroupCurly backoff"); 4627 } 4628 4629 // This test is for 8012646 4630 private static void patternAsPredicate() throws Exception { 4631 Predicate<String> p = Pattern.compile("[a-z]+").asPredicate(); 4632 4633 if (p.test("")) { 4634 failCount++; 4635 } 4636 if (!p.test("word")) { 4637 failCount++; 4638 } 4639 if (p.test("1234")) { 4640 failCount++; 4641 } 4642 report("Pattern.asPredicate"); 4643 } 4644 4645 // This test is for 8035975 4646 private static void invalidFlags() throws Exception { 4647 for (int flag = 1; flag != 0; flag <<= 1) { 4648 switch (flag) { 4649 case Pattern.CASE_INSENSITIVE: 4650 case Pattern.MULTILINE: 4651 case Pattern.DOTALL: 4652 case Pattern.UNICODE_CASE: 4653 case Pattern.CANON_EQ: 4654 case Pattern.UNIX_LINES: 4655 case Pattern.LITERAL: 4656 case Pattern.UNICODE_CHARACTER_CLASS: 4657 case Pattern.COMMENTS: 4658 // valid flag, continue 4659 break; 4660 default: 4661 try { 4662 Pattern.compile(".", flag); 4663 failCount++; 4664 } catch (IllegalArgumentException expected) { 4665 } 4666 } 4667 } 4668 report("Invalid compile flags"); 4669 } 4670 4671 private static void grapheme() throws Exception { 4672 Files.lines(Paths.get(System.getProperty("test.src", "."), 4673 "GraphemeBreakTest.txt")) 4674 .filter( ln -> ln.length() != 0 && !ln.startsWith("#") ) 4675 .forEach( ln -> { 4676 ln = ln.replaceAll("\\s+|\\([a-zA-Z]+\\)|\\[[a-zA-Z]]+\\]|#.*", ""); 4677 // System.out.println(str); 4678 String[] strs = ln.split("\u00f7|\u00d7"); 4679 StringBuilder src = new StringBuilder(); 4680 ArrayList<String> graphemes = new ArrayList<>(); 4681 StringBuilder buf = new StringBuilder(); 4682 int offBk = 0; 4683 for (String str : strs) { 4684 if (str.length() == 0) // first empty str 4685 continue; 4686 int cp = Integer.parseInt(str, 16); 4687 src.appendCodePoint(cp); 4688 buf.appendCodePoint(cp); 4689 offBk += (str.length() + 1); 4690 if (ln.charAt(offBk) == '\u00f7') { // DIV 4691 graphemes.add(buf.toString()); 4692 buf = new StringBuilder(); 4693 } 4694 } 4695 Pattern p = Pattern.compile("\\X"); 4696 Matcher m = p.matcher(src.toString()); 4697 Scanner s = new Scanner(src.toString()).useDelimiter("\\b{g}"); 4698 for (String g : graphemes) { 4699 // System.out.printf(" grapheme:=[%s]%n", g); 4700 // (1) test \\X directly 4701 if (!m.find() || !m.group().equals(g)) { 4702 System.out.println("Failed \\X [" + ln + "] : " + g); 4703 failCount++; 4704 } 4705 // (2) test \\b{g} + \\X via Scanner 4706 boolean hasNext = s.hasNext(p); 4707 // if (!s.hasNext() || !s.next().equals(next)) { 4708 if (!s.hasNext(p) || !s.next(p).equals(g)) { 4709 System.out.println("Failed b{g} [" + ln + "] : " + g); 4710 failCount++; 4711 } 4712 } 4713 }); 4714 // some sanity checks 4715 if (!Pattern.compile("\\X{10}").matcher("abcdefghij").matches() || 4716 !Pattern.compile("\\b{g}(?:\\X\\b{g}){5}\\b{g}").matcher("abcde").matches() || 4717 !Pattern.compile("(?:\\X\\b{g}){2}").matcher("\ud800\udc00\ud801\udc02").matches()) 4718 failCount++; 4719 // make sure "\b{n}" still works 4720 if (!Pattern.compile("\\b{1}hello\\b{1} \\b{1}world\\b{1}").matcher("hello world").matches()) 4721 failCount++; 4722 report("Unicode extended grapheme cluster"); 4723 } 4724 4725 // hangup/timeout if go into exponential backtracking 4726 private static void expoBacktracking() throws Exception { 4727 4728 Object[][] patternMatchers = { 4729 // 6328855 4730 { "(.*\n*)*", 4731 "this little fine string lets\r\njava.lang.String.matches\r\ncrash\r\n(We don't know why but adding \r* to the regex makes it work again)", 4732 false }, 4733 // 6192895 4734 { " *([a-zA-Z0-9/\\-\\?:\\(\\)\\.,'\\+\\{\\}]+ *)+", 4735 "Hello World this is a test this is a test this is a test A", 4736 true }, 4737 { " *([a-zA-Z0-9/\\-\\?:\\(\\)\\.,'\\+\\{\\}]+ *)+", 4738 "Hello World this is a test this is a test this is a test \u4e00 ", 4739 false }, 4740 { " *([a-z0-9]+ *)+", 4741 "hello world this is a test this is a test this is a test A", 4742 false }, 4743 // 4771934 [FIXED] #5013651? 4744 { "^(\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,4})+[,;]?)+$", 4745 "abc@efg.abc,efg@abc.abc,abc@xyz.mno;abc@sdfsd.com", 4746 true }, 4747 // 4866249 [FIXED] 4748 { "<\\s*" + "(meta|META)" + "(\\s|[^>])+" + "(CHARSET|charset)=" + "(\\s|[^>])+>", 4749 "<META http-equiv=\"Content-Type\" content=\"text/html; charset=ISO-8859-5\">", 4750 true }, 4751 { "^(\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,4})+[,;]?)+$", 4752 "abc@efg.abc,efg@abc.abc,abc@xyz.mno;sdfsd.com", 4753 false }, 4754 // 6345469 4755 { "((<[^>]+>)?(((\\s)?)*(\\ )?)*((\\s)?)*)+", 4756 " < br/> < / p> <p> <html> <adfasfdasdf> </p>", 4757 true }, // --> matched 4758 { "((<[^>]+>)?(((\\s)?)*(\\ )?)*((\\s)?)*)+", 4759 " < br/> < / p> <p> <html> <adfasfdasdf> p </p>", 4760 false }, 4761 // 5026912 4762 { "^\\s*" + "(\\w|\\d|[\\xC0-\\xFF]|/)+" + "\\s+|$", 4763 "156580451111112225588087755221111111566969655555555", 4764 false}, 4765 // 6988218 4766 { "^([+-]?((0[xX](\\p{XDigit}+))|(((\\p{Digit}+)(\\.)?((\\p{Digit}+)?)([eE][+-]?(\\p{Digit}+))?)|(\\.((\\p{Digit}+))([eE][+-]?(\\p{Digit}+))?)))|[n|N]?'([^']*(?:'')*[^']*)*')", 4767 "'%)) order by ANGEBOT.ID", 4768 false}, // find 4769 // 6693451 4770 { "^(\\s*foo\\s*)*$", 4771 "foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo", 4772 true }, 4773 { "^(\\s*foo\\s*)*$", 4774 "foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo fo", 4775 false 4776 }, 4777 // 7006761 4778 { "(([0-9A-Z]+)([_]?+)*)*", "FOOOOO_BAAAR_FOOOOOOOOO_BA_", true}, 4779 { "(([0-9A-Z]+)([_]?+)*)*", "FOOOOO_BAAAR_FOOOOOOOOO_BA_ ", false}, 4780 // 8140212 4781 { "(?<before>.*)\\{(?<reflection>\\w+):(?<innerMethod>\\w+(\\.?\\w+(\\(((?<args>(('[^']*')|((/|\\w)+))(,(('[^']*')|((/|\\w)+)))*))?\\))?)*)\\}(?<after>.*)", 4782 "{CeGlobal:getSodCutoff.getGui.getAmqp.getSimpleModeEnabled()", 4783 false 4784 }, 4785 { "^(a+)+$", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", true}, 4786 { "^(a+)+$", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa!", false}, 4787 4788 { "(x+)*y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy", true }, 4789 { "(x+)*y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz", false}, 4790 4791 { "(x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy", true}, 4792 { "(x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz", false}, 4793 4794 { "(([0-9A-Z]+)([_]?+)*)*", "--------------------------------------", false}, 4795 4796 /* not fixed 4797 //8132141 ---> second level exponential backtracking 4798 { "(h|h|ih(((i|a|c|c|a|i|i|j|b|a|i|b|a|a|j))+h)ahbfhba|c|i)*", 4799 "hchcchicihcchciiicichhcichcihcchiihichiciiiihhcchicchhcihchcihiihciichhccciccichcichiihcchcihhicchcciicchcccihiiihhihihihichicihhcciccchihhhcchichchciihiicihciihcccciciccicciiiiiiiiicihhhiiiihchccchchhhhiiihchihcccchhhiiiiiiiicicichicihcciciihichhhhchihciiihhiccccccciciihhichiccchhicchicihihccichicciihcichccihhiciccccccccichhhhihihhcchchihihiihhihihihicichihiiiihhhhihhhchhichiicihhiiiiihchccccchichci" }, 4800 */ 4801 }; 4802 4803 for (Object[] pm : patternMatchers) { 4804 String p = (String)pm[0]; 4805 String s = (String)pm[1]; 4806 boolean r = (Boolean)pm[2]; 4807 if (r != Pattern.compile(p).matcher(s).matches()) { 4808 failCount++; 4809 } 4810 } 4811 } 4812 } --- EOF ---